aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Woodhouse <David.Woodhouse@intel.com>2008-10-21 14:42:20 -0400
committerDavid Woodhouse <David.Woodhouse@intel.com>2008-10-21 14:42:20 -0400
commitb876d08f816527af257e13d89fb0d3b4b849223c (patch)
tree40569f568230f918ca55f04b355e251747f913ed
parentb364776ad1208a71f0c53578c84619a395412a8d (diff)
parent2515ddc6db8eb49a79f0fe5e67ff09ac7c81eab4 (diff)
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts: drivers/pci/dmar.c
-rw-r--r--CREDITS12
-rw-r--r--Documentation/DocBook/kernel-hacking.tmpl2
-rw-r--r--Documentation/MSI-HOWTO.txt6
-rw-r--r--Documentation/PCI/pci.txt4
-rw-r--r--Documentation/PCI/pcieaer-howto.txt11
-rw-r--r--Documentation/kernel-parameters.txt31
-rw-r--r--Documentation/markers.txt10
-rw-r--r--Documentation/sysrq.txt5
-rw-r--r--Documentation/tracepoints.txt101
-rw-r--r--Documentation/tracers/mmiotrace.txt5
-rw-r--r--MAINTAINERS2
-rw-r--r--arch/alpha/kernel/sys_sable.c6
-rw-r--r--arch/arm/mach-iop13xx/include/mach/time.h4
-rw-r--r--arch/arm/mach-ixp2000/ixdp2x00.c4
-rw-r--r--arch/arm/mach-omap2/irq.c8
-rw-r--r--arch/arm/mach-pxa/include/mach/zylonite.h4
-rw-r--r--arch/arm/mach-sa1100/include/mach/ide.h75
-rw-r--r--arch/avr32/mach-at32ap/extint.c8
-rw-r--r--arch/ia64/include/asm/pci.h12
-rw-r--r--arch/ia64/pci/pci.c7
-rw-r--r--arch/m32r/kernel/smpboot.c1
-rw-r--r--arch/parisc/Kconfig2
-rw-r--r--arch/parisc/include/asm/Kbuild (renamed from include/asm-parisc/Kbuild)0
-rw-r--r--arch/parisc/include/asm/agp.h (renamed from include/asm-parisc/agp.h)0
-rw-r--r--arch/parisc/include/asm/asmregs.h (renamed from include/asm-parisc/asmregs.h)0
-rw-r--r--arch/parisc/include/asm/assembly.h (renamed from include/asm-parisc/assembly.h)0
-rw-r--r--arch/parisc/include/asm/atomic.h (renamed from include/asm-parisc/atomic.h)0
-rw-r--r--arch/parisc/include/asm/auxvec.h (renamed from include/asm-parisc/auxvec.h)0
-rw-r--r--arch/parisc/include/asm/bitops.h (renamed from include/asm-parisc/bitops.h)0
-rw-r--r--arch/parisc/include/asm/bug.h (renamed from include/asm-parisc/bug.h)0
-rw-r--r--arch/parisc/include/asm/bugs.h (renamed from include/asm-parisc/bugs.h)0
-rw-r--r--arch/parisc/include/asm/byteorder.h (renamed from include/asm-parisc/byteorder.h)0
-rw-r--r--arch/parisc/include/asm/cache.h (renamed from include/asm-parisc/cache.h)0
-rw-r--r--arch/parisc/include/asm/cacheflush.h (renamed from include/asm-parisc/cacheflush.h)0
-rw-r--r--arch/parisc/include/asm/checksum.h (renamed from include/asm-parisc/checksum.h)0
-rw-r--r--arch/parisc/include/asm/compat.h (renamed from include/asm-parisc/compat.h)0
-rw-r--r--arch/parisc/include/asm/compat_rt_sigframe.h (renamed from include/asm-parisc/compat_rt_sigframe.h)0
-rw-r--r--arch/parisc/include/asm/compat_signal.h (renamed from include/asm-parisc/compat_signal.h)0
-rw-r--r--arch/parisc/include/asm/compat_ucontext.h (renamed from include/asm-parisc/compat_ucontext.h)0
-rw-r--r--arch/parisc/include/asm/cputime.h (renamed from include/asm-parisc/cputime.h)0
-rw-r--r--arch/parisc/include/asm/current.h (renamed from include/asm-parisc/current.h)0
-rw-r--r--arch/parisc/include/asm/delay.h (renamed from include/asm-parisc/delay.h)0
-rw-r--r--arch/parisc/include/asm/device.h (renamed from include/asm-parisc/device.h)0
-rw-r--r--arch/parisc/include/asm/div64.h (renamed from include/asm-parisc/div64.h)0
-rw-r--r--arch/parisc/include/asm/dma-mapping.h (renamed from include/asm-parisc/dma-mapping.h)0
-rw-r--r--arch/parisc/include/asm/dma.h (renamed from include/asm-parisc/dma.h)0
-rw-r--r--arch/parisc/include/asm/eisa_bus.h (renamed from include/asm-parisc/eisa_bus.h)0
-rw-r--r--arch/parisc/include/asm/eisa_eeprom.h (renamed from include/asm-parisc/eisa_eeprom.h)0
-rw-r--r--arch/parisc/include/asm/elf.h (renamed from include/asm-parisc/elf.h)0
-rw-r--r--arch/parisc/include/asm/emergency-restart.h (renamed from include/asm-parisc/emergency-restart.h)0
-rw-r--r--arch/parisc/include/asm/errno.h (renamed from include/asm-parisc/errno.h)0
-rw-r--r--arch/parisc/include/asm/fb.h (renamed from include/asm-parisc/fb.h)0
-rw-r--r--arch/parisc/include/asm/fcntl.h (renamed from include/asm-parisc/fcntl.h)0
-rw-r--r--arch/parisc/include/asm/fixmap.h (renamed from include/asm-parisc/fixmap.h)0
-rw-r--r--arch/parisc/include/asm/floppy.h (renamed from include/asm-parisc/floppy.h)0
-rw-r--r--arch/parisc/include/asm/futex.h (renamed from include/asm-parisc/futex.h)0
-rw-r--r--arch/parisc/include/asm/grfioctl.h (renamed from include/asm-parisc/grfioctl.h)0
-rw-r--r--arch/parisc/include/asm/hardirq.h (renamed from include/asm-parisc/hardirq.h)0
-rw-r--r--arch/parisc/include/asm/hardware.h (renamed from include/asm-parisc/hardware.h)0
-rw-r--r--arch/parisc/include/asm/hw_irq.h (renamed from include/asm-parisc/hw_irq.h)0
-rw-r--r--arch/parisc/include/asm/ide.h (renamed from include/asm-parisc/ide.h)4
-rw-r--r--arch/parisc/include/asm/io.h (renamed from include/asm-parisc/io.h)0
-rw-r--r--arch/parisc/include/asm/ioctl.h (renamed from include/asm-parisc/ioctl.h)0
-rw-r--r--arch/parisc/include/asm/ioctls.h (renamed from include/asm-parisc/ioctls.h)0
-rw-r--r--arch/parisc/include/asm/ipcbuf.h (renamed from include/asm-parisc/ipcbuf.h)0
-rw-r--r--arch/parisc/include/asm/irq.h (renamed from include/asm-parisc/irq.h)0
-rw-r--r--arch/parisc/include/asm/irq_regs.h (renamed from include/asm-parisc/irq_regs.h)0
-rw-r--r--arch/parisc/include/asm/kdebug.h (renamed from include/asm-parisc/kdebug.h)0
-rw-r--r--arch/parisc/include/asm/kmap_types.h (renamed from include/asm-parisc/kmap_types.h)0
-rw-r--r--arch/parisc/include/asm/led.h (renamed from include/asm-parisc/led.h)0
-rw-r--r--arch/parisc/include/asm/linkage.h (renamed from include/asm-parisc/linkage.h)0
-rw-r--r--arch/parisc/include/asm/local.h (renamed from include/asm-parisc/local.h)0
-rw-r--r--arch/parisc/include/asm/machdep.h (renamed from include/asm-parisc/machdep.h)0
-rw-r--r--arch/parisc/include/asm/mc146818rtc.h (renamed from include/asm-parisc/mc146818rtc.h)0
-rw-r--r--arch/parisc/include/asm/mckinley.h (renamed from include/asm-parisc/mckinley.h)0
-rw-r--r--arch/parisc/include/asm/mman.h (renamed from include/asm-parisc/mman.h)0
-rw-r--r--arch/parisc/include/asm/mmu.h (renamed from include/asm-parisc/mmu.h)0
-rw-r--r--arch/parisc/include/asm/mmu_context.h (renamed from include/asm-parisc/mmu_context.h)0
-rw-r--r--arch/parisc/include/asm/mmzone.h (renamed from include/asm-parisc/mmzone.h)0
-rw-r--r--arch/parisc/include/asm/module.h (renamed from include/asm-parisc/module.h)0
-rw-r--r--arch/parisc/include/asm/msgbuf.h (renamed from include/asm-parisc/msgbuf.h)0
-rw-r--r--arch/parisc/include/asm/mutex.h (renamed from include/asm-parisc/mutex.h)0
-rw-r--r--arch/parisc/include/asm/page.h (renamed from include/asm-parisc/page.h)0
-rw-r--r--arch/parisc/include/asm/param.h (renamed from include/asm-parisc/param.h)0
-rw-r--r--arch/parisc/include/asm/parisc-device.h (renamed from include/asm-parisc/parisc-device.h)0
-rw-r--r--arch/parisc/include/asm/parport.h (renamed from include/asm-parisc/parport.h)0
-rw-r--r--arch/parisc/include/asm/pci.h (renamed from include/asm-parisc/pci.h)0
-rw-r--r--arch/parisc/include/asm/pdc.h (renamed from include/asm-parisc/pdc.h)5
-rw-r--r--arch/parisc/include/asm/pdc_chassis.h (renamed from include/asm-parisc/pdc_chassis.h)0
-rw-r--r--arch/parisc/include/asm/pdcpat.h (renamed from include/asm-parisc/pdcpat.h)0
-rw-r--r--arch/parisc/include/asm/percpu.h (renamed from include/asm-parisc/percpu.h)0
-rw-r--r--arch/parisc/include/asm/perf.h (renamed from include/asm-parisc/perf.h)0
-rw-r--r--arch/parisc/include/asm/pgalloc.h (renamed from include/asm-parisc/pgalloc.h)0
-rw-r--r--arch/parisc/include/asm/pgtable.h (renamed from include/asm-parisc/pgtable.h)0
-rw-r--r--arch/parisc/include/asm/poll.h (renamed from include/asm-parisc/poll.h)0
-rw-r--r--arch/parisc/include/asm/posix_types.h (renamed from include/asm-parisc/posix_types.h)0
-rw-r--r--arch/parisc/include/asm/prefetch.h (renamed from include/asm-parisc/prefetch.h)0
-rw-r--r--arch/parisc/include/asm/processor.h (renamed from include/asm-parisc/processor.h)0
-rw-r--r--arch/parisc/include/asm/psw.h (renamed from include/asm-parisc/psw.h)0
-rw-r--r--arch/parisc/include/asm/ptrace.h (renamed from include/asm-parisc/ptrace.h)10
-rw-r--r--arch/parisc/include/asm/real.h (renamed from include/asm-parisc/real.h)0
-rw-r--r--arch/parisc/include/asm/resource.h (renamed from include/asm-parisc/resource.h)0
-rw-r--r--arch/parisc/include/asm/ropes.h (renamed from include/asm-parisc/ropes.h)2
-rw-r--r--arch/parisc/include/asm/rt_sigframe.h (renamed from include/asm-parisc/rt_sigframe.h)0
-rw-r--r--arch/parisc/include/asm/rtc.h (renamed from include/asm-parisc/rtc.h)0
-rw-r--r--arch/parisc/include/asm/runway.h (renamed from include/asm-parisc/runway.h)0
-rw-r--r--arch/parisc/include/asm/scatterlist.h (renamed from include/asm-parisc/scatterlist.h)0
-rw-r--r--arch/parisc/include/asm/sections.h (renamed from include/asm-parisc/sections.h)0
-rw-r--r--arch/parisc/include/asm/segment.h (renamed from include/asm-parisc/segment.h)0
-rw-r--r--arch/parisc/include/asm/sembuf.h (renamed from include/asm-parisc/sembuf.h)0
-rw-r--r--arch/parisc/include/asm/serial.h (renamed from include/asm-parisc/serial.h)0
-rw-r--r--arch/parisc/include/asm/setup.h (renamed from include/asm-parisc/setup.h)0
-rw-r--r--arch/parisc/include/asm/shmbuf.h (renamed from include/asm-parisc/shmbuf.h)0
-rw-r--r--arch/parisc/include/asm/shmparam.h (renamed from include/asm-parisc/shmparam.h)0
-rw-r--r--arch/parisc/include/asm/sigcontext.h (renamed from include/asm-parisc/sigcontext.h)0
-rw-r--r--arch/parisc/include/asm/siginfo.h (renamed from include/asm-parisc/siginfo.h)0
-rw-r--r--arch/parisc/include/asm/signal.h (renamed from include/asm-parisc/signal.h)0
-rw-r--r--arch/parisc/include/asm/smp.h (renamed from include/asm-parisc/smp.h)0
-rw-r--r--arch/parisc/include/asm/socket.h (renamed from include/asm-parisc/socket.h)0
-rw-r--r--arch/parisc/include/asm/sockios.h (renamed from include/asm-parisc/sockios.h)0
-rw-r--r--arch/parisc/include/asm/spinlock.h (renamed from include/asm-parisc/spinlock.h)0
-rw-r--r--arch/parisc/include/asm/spinlock_types.h (renamed from include/asm-parisc/spinlock_types.h)0
-rw-r--r--arch/parisc/include/asm/stat.h (renamed from include/asm-parisc/stat.h)0
-rw-r--r--arch/parisc/include/asm/statfs.h (renamed from include/asm-parisc/statfs.h)0
-rw-r--r--arch/parisc/include/asm/string.h (renamed from include/asm-parisc/string.h)0
-rw-r--r--arch/parisc/include/asm/superio.h (renamed from include/asm-parisc/superio.h)0
-rw-r--r--arch/parisc/include/asm/system.h (renamed from include/asm-parisc/system.h)0
-rw-r--r--arch/parisc/include/asm/termbits.h (renamed from include/asm-parisc/termbits.h)0
-rw-r--r--arch/parisc/include/asm/termios.h (renamed from include/asm-parisc/termios.h)0
-rw-r--r--arch/parisc/include/asm/thread_info.h (renamed from include/asm-parisc/thread_info.h)0
-rw-r--r--arch/parisc/include/asm/timex.h (renamed from include/asm-parisc/timex.h)0
-rw-r--r--arch/parisc/include/asm/tlb.h (renamed from include/asm-parisc/tlb.h)0
-rw-r--r--arch/parisc/include/asm/tlbflush.h (renamed from include/asm-parisc/tlbflush.h)0
-rw-r--r--arch/parisc/include/asm/topology.h (renamed from include/asm-parisc/topology.h)0
-rw-r--r--arch/parisc/include/asm/traps.h (renamed from include/asm-parisc/traps.h)0
-rw-r--r--arch/parisc/include/asm/types.h (renamed from include/asm-parisc/types.h)0
-rw-r--r--arch/parisc/include/asm/uaccess.h (renamed from include/asm-parisc/uaccess.h)0
-rw-r--r--arch/parisc/include/asm/ucontext.h (renamed from include/asm-parisc/ucontext.h)0
-rw-r--r--arch/parisc/include/asm/unaligned.h (renamed from include/asm-parisc/unaligned.h)0
-rw-r--r--arch/parisc/include/asm/unistd.h (renamed from include/asm-parisc/unistd.h)10
-rw-r--r--arch/parisc/include/asm/unwind.h (renamed from include/asm-parisc/unwind.h)2
-rw-r--r--arch/parisc/include/asm/user.h (renamed from include/asm-parisc/user.h)0
-rw-r--r--arch/parisc/include/asm/vga.h (renamed from include/asm-parisc/vga.h)0
-rw-r--r--arch/parisc/include/asm/xor.h (renamed from include/asm-parisc/xor.h)0
-rw-r--r--arch/parisc/kernel/.gitignore1
-rw-r--r--arch/parisc/kernel/asm-offsets.c3
-rw-r--r--arch/parisc/kernel/firmware.c69
-rw-r--r--arch/parisc/kernel/head.S2
-rw-r--r--arch/parisc/kernel/ptrace.c429
-rw-r--r--arch/parisc/kernel/real2.S12
-rw-r--r--arch/parisc/kernel/setup.c29
-rw-r--r--arch/parisc/kernel/syscall_table.S8
-rw-r--r--arch/parisc/kernel/time.c20
-rw-r--r--arch/parisc/kernel/unwind.c4
-rw-r--r--arch/powerpc/include/asm/page.h6
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h7
-rw-r--r--arch/powerpc/include/asm/pci.h11
-rw-r--r--arch/powerpc/include/asm/ptrace.h2
-rw-r--r--arch/powerpc/kernel/pci-common.c136
-rw-r--r--arch/powerpc/platforms/cell/spufs/sputrace.c1
-rw-r--r--arch/x86/Kconfig9
-rw-r--r--arch/x86/configs/i386_defconfig1
-rw-r--r--arch/x86/kernel/Makefile8
-rw-r--r--arch/x86/kernel/acpi/boot.c4
-rw-r--r--arch/x86/kernel/acpi/sleep.c3
-rw-r--r--arch/x86/kernel/apic.c (renamed from arch/x86/kernel/apic_32.c)627
-rw-r--r--arch/x86/kernel/apic_64.c1848
-rw-r--r--arch/x86/kernel/bios_uv.c137
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longhaul.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k6.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k7.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c2
-rw-r--r--arch/x86/kernel/cpu/intel.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/k7.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_32.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/non-fatal.c2
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c11
-rw-r--r--arch/x86/kernel/efi.c4
-rw-r--r--arch/x86/kernel/entry_32.S16
-rw-r--r--arch/x86/kernel/entry_64.S26
-rw-r--r--arch/x86/kernel/ftrace.c124
-rw-r--r--arch/x86/kernel/genapic_flat_64.c4
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c49
-rw-r--r--arch/x86/kernel/hpet.c453
-rw-r--r--arch/x86/kernel/io_apic.c (renamed from arch/x86/kernel/io_apic_64.c)1538
-rw-r--r--arch/x86/kernel/io_apic_32.c2908
-rw-r--r--arch/x86/kernel/irq.c189
-rw-r--r--arch/x86/kernel/irq_32.c194
-rw-r--r--arch/x86/kernel/irq_64.c169
-rw-r--r--arch/x86/kernel/irqinit_32.c47
-rw-r--r--arch/x86/kernel/irqinit_64.c28
-rw-r--r--arch/x86/kernel/quirks.c3
-rw-r--r--arch/x86/kernel/setup.c4
-rw-r--r--arch/x86/kernel/setup_percpu.c17
-rw-r--r--arch/x86/kernel/smpboot.c6
-rw-r--r--arch/x86/kernel/uv_irq.c79
-rw-r--r--arch/x86/kernel/uv_sysfs.c72
-rw-r--r--arch/x86/kernel/visws_quirks.c32
-rw-r--r--arch/x86/kernel/vmiclock_32.c3
-rw-r--r--arch/x86/lguest/boot.c2
-rw-r--r--arch/x86/mach-generic/bigsmp.c4
-rw-r--r--arch/x86/mach-generic/es7000.c14
-rw-r--r--arch/x86/mach-generic/numaq.c14
-rw-r--r--arch/x86/mach-generic/summit.c14
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c4
-rw-r--r--arch/x86/mm/mmio-mod.c87
-rw-r--r--arch/x86/mm/pf_in.c121
-rw-r--r--arch/x86/mm/testmmiotrace.c4
-rw-r--r--arch/x86/pci/irq.c19
-rw-r--r--arch/x86/xen/irq.c2
-rw-r--r--arch/x86/xen/spinlock.c2
-rw-r--r--crypto/async_tx/async_tx.c34
-rw-r--r--drivers/char/agp/ali-agp.c2
-rw-r--r--drivers/char/agp/amd64-agp.c2
-rw-r--r--drivers/char/agp/ati-agp.c2
-rw-r--r--drivers/char/agp/backend.c2
-rw-r--r--drivers/char/agp/intel-agp.c2
-rw-r--r--drivers/char/agp/nvidia-agp.c2
-rw-r--r--drivers/char/agp/parisc-agp.c4
-rw-r--r--drivers/char/agp/via-agp.c2
-rw-r--r--drivers/char/hpet.c2
-rw-r--r--drivers/char/random.c36
-rw-r--r--drivers/char/sysrq.c2
-rw-r--r--drivers/char/vr41xx_giu.c2
-rw-r--r--drivers/clocksource/acpi_pm.c7
-rw-r--r--drivers/dma/Kconfig10
-rw-r--r--drivers/dma/dmatest.c7
-rw-r--r--drivers/dma/fsldma.c270
-rw-r--r--drivers/dma/fsldma.h1
-rw-r--r--drivers/dma/ioat_dma.c2
-rw-r--r--drivers/gpio/gpiolib.c2
-rw-r--r--drivers/gpu/drm/Kconfig2
-rw-r--r--drivers/gpu/drm/drm_proc.c4
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c60
-rw-r--r--drivers/i2c/busses/i2c-amd756.c5
-rw-r--r--drivers/i2c/busses/i2c-viapro.c5
-rw-r--r--drivers/ide/Kconfig88
-rw-r--r--drivers/ide/Makefile19
-rw-r--r--drivers/ide/ide-atapi.c2
-rw-r--r--drivers/ide/ide-cd.c61
-rw-r--r--drivers/ide/ide-cd_ioctl.c8
-rw-r--r--drivers/ide/ide-disk.c382
-rw-r--r--drivers/ide/ide-disk.h21
-rw-r--r--drivers/ide/ide-disk_ioctl.c4
-rw-r--r--drivers/ide/ide-disk_proc.c2
-rw-r--r--drivers/ide/ide-dma-sff.c2
-rw-r--r--drivers/ide/ide-floppy.c357
-rw-r--r--drivers/ide/ide-floppy.h41
-rw-r--r--drivers/ide/ide-floppy_ioctl.c23
-rw-r--r--drivers/ide/ide-floppy_proc.c2
-rw-r--r--drivers/ide/ide-gd.c398
-rw-r--r--drivers/ide/ide-gd.h44
-rw-r--r--drivers/ide/ide-iops.c2
-rw-r--r--drivers/ide/ide-probe.c1
-rw-r--r--drivers/ide/ide-proc.c6
-rw-r--r--drivers/ide/ide-tape.c16
-rw-r--r--drivers/ide/pci/Makefile1
-rw-r--r--drivers/ide/pci/delkin_cb.c63
-rw-r--r--drivers/ide/pci/hpt34x.c193
-rw-r--r--drivers/ide/pci/hpt366.c35
-rw-r--r--drivers/ide/pci/scc_pata.c4
-rw-r--r--drivers/ide/pci/sgiioc4.c49
-rw-r--r--drivers/leds/Kconfig2
-rw-r--r--drivers/mfd/asic3.c4
-rw-r--r--drivers/mfd/htc-egpio.c2
-rw-r--r--drivers/net/3c59x.c4
-rw-r--r--drivers/net/hamradio/baycom_ser_fdx.c4
-rw-r--r--drivers/net/hamradio/scc.c6
-rw-r--r--drivers/net/usb/pegasus.c4
-rw-r--r--drivers/net/wan/sbni.c2
-rw-r--r--drivers/parisc/ccio-dma.c43
-rw-r--r--drivers/parisc/dino.c6
-rw-r--r--drivers/parisc/eisa.c4
-rw-r--r--drivers/parisc/gsc.c12
-rw-r--r--drivers/parisc/iosapic.c4
-rw-r--r--drivers/parisc/superio.c4
-rw-r--r--drivers/pci/bus.c7
-rw-r--r--drivers/pci/dmar.c57
-rw-r--r--drivers/pci/hotplug/ibmphp_ebda.c92
-rw-r--r--drivers/pci/hotplug/pci_hotplug_core.c14
-rw-r--r--drivers/pci/hotplug/pciehp.h16
-rw-r--r--drivers/pci/hotplug/pciehp_core.c78
-rw-r--r--drivers/pci/hotplug/pciehp_ctrl.c136
-rw-r--r--drivers/pci/hotplug/pciehp_hpc.c202
-rw-r--r--drivers/pci/hotplug/pciehp_pci.c26
-rw-r--r--drivers/pci/hotplug/rpaphp.h4
-rw-r--r--drivers/pci/hotplug/rpaphp_core.c4
-rw-r--r--drivers/pci/hotplug/rpaphp_pci.c2
-rw-r--r--drivers/pci/htirq.c3
-rw-r--r--drivers/pci/intr_remapping.c139
-rw-r--r--drivers/pci/msi.c10
-rw-r--r--drivers/pci/pci-driver.c21
-rw-r--r--drivers/pci/pci-sysfs.c241
-rw-r--r--drivers/pci/pci.c91
-rw-r--r--drivers/pci/pci.h26
-rw-r--r--drivers/pci/pcie/aer/aerdrv.c6
-rw-r--r--drivers/pci/pcie/aer/aerdrv_core.c47
-rw-r--r--drivers/pci/pcie/aspm.c6
-rw-r--r--drivers/pci/pcie/portdrv.h1
-rw-r--r--drivers/pci/pcie/portdrv_core.c23
-rw-r--r--drivers/pci/pcie/portdrv_pci.c2
-rw-r--r--drivers/pci/probe.c133
-rw-r--r--drivers/pci/quirks.c173
-rw-r--r--drivers/pci/remove.c11
-rw-r--r--drivers/pci/setup-bus.c4
-rw-r--r--drivers/pci/setup-res.c2
-rw-r--r--drivers/pci/slot.c10
-rw-r--r--drivers/pcmcia/at91_cf.c2
-rw-r--r--drivers/pcmcia/hd64465_ss.c12
-rw-r--r--drivers/pcmcia/vrc4171_card.c2
-rw-r--r--drivers/rtc/Kconfig8
-rw-r--r--drivers/rtc/Makefile1
-rw-r--r--drivers/rtc/rtc-parisc.c111
-rw-r--r--drivers/rtc/rtc-vr41xx.c4
-rw-r--r--drivers/scsi/aha152x.c2
-rw-r--r--drivers/scsi/ide-scsi.c26
-rw-r--r--drivers/scsi/ipr.c1
-rw-r--r--drivers/scsi/qla2xxx/qla_def.h2
-rw-r--r--drivers/scsi/qla2xxx/qla_os.c5
-rw-r--r--drivers/serial/68328serial.c11
-rw-r--r--drivers/serial/8250.c69
-rw-r--r--drivers/serial/amba-pl010.c2
-rw-r--r--drivers/serial/amba-pl011.c2
-rw-r--r--drivers/serial/cpm_uart/cpm_uart_core.c2
-rw-r--r--drivers/serial/m32r_sio.c4
-rw-r--r--drivers/serial/serial_core.c2
-rw-r--r--drivers/serial/serial_lh7a40x.c2
-rw-r--r--drivers/serial/sh-sci.c2
-rw-r--r--drivers/serial/ucc_uart.c2
-rw-r--r--drivers/uio/uio.c14
-rw-r--r--drivers/usb/host/ehci-hcd.c2
-rw-r--r--drivers/watchdog/ib700wdt.c2
-rw-r--r--drivers/xen/events.c18
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/binfmt_elf.c19
-rw-r--r--fs/binfmt_elf_fdpic.c19
-rw-r--r--fs/fuse/file.c5
-rw-r--r--fs/fuse/fuse_i.h5
-rw-r--r--fs/fuse/inode.c3
-rw-r--r--fs/proc/array.c8
-rw-r--r--fs/proc/proc_misc.c40
-rw-r--r--include/asm-frv/ide.h10
-rw-r--r--include/asm-generic/bug.h2
-rw-r--r--include/asm-generic/vmlinux.lds.h14
-rw-r--r--include/asm-m68k/ide.h9
-rw-r--r--include/asm-x86/apic.h16
-rw-r--r--include/asm-x86/bigsmp/apic.h15
-rw-r--r--include/asm-x86/efi.h13
-rw-r--r--include/asm-x86/es7000/apic.h3
-rw-r--r--include/asm-x86/ftrace.h10
-rw-r--r--include/asm-x86/genapic_32.h2
-rw-r--r--include/asm-x86/hpet.h21
-rw-r--r--include/asm-x86/hw_irq.h13
-rw-r--r--include/asm-x86/io_apic.h24
-rw-r--r--include/asm-x86/irq_vectors.h24
-rw-r--r--include/asm-x86/mach-default/entry_arch.h1
-rw-r--r--include/asm-x86/mach-default/mach_apic.h15
-rw-r--r--include/asm-x86/mach-generic/irq_vectors_limits.h14
-rw-r--r--include/asm-x86/mach-generic/mach_apic.h1
-rw-r--r--include/asm-x86/numaq/apic.h2
-rw-r--r--include/asm-x86/summit/apic.h1
-rw-r--r--include/asm-x86/summit/irq_vectors_limits.h14
-rw-r--r--include/asm-x86/uv/bios.h94
-rw-r--r--include/asm-x86/uv/uv_irq.h36
-rw-r--r--include/linux/aer.h5
-rw-r--r--include/linux/clocksource.h14
-rw-r--r--include/linux/compiler.h2
-rw-r--r--include/linux/dmar.h1
-rw-r--r--include/linux/efi.h4
-rw-r--r--include/linux/ftrace.h84
-rw-r--r--include/linux/fuse.h12
-rw-r--r--include/linux/hrtimer.h10
-rw-r--r--include/linux/ide.h36
-rw-r--r--include/linux/init.h2
-rw-r--r--include/linux/interrupt.h1
-rw-r--r--include/linux/irq.h56
-rw-r--r--include/linux/irqnr.h24
-rw-r--r--include/linux/kernel.h5
-rw-r--r--include/linux/kernel_stat.h21
-rw-r--r--include/linux/kprobes.h5
-rw-r--r--include/linux/linkage.h2
-rw-r--r--include/linux/marker.h7
-rw-r--r--include/linux/mmiotrace.h20
-rw-r--r--include/linux/module.h17
-rw-r--r--include/linux/pci.h24
-rw-r--r--include/linux/pci_ids.h6
-rw-r--r--include/linux/pci_regs.h14
-rw-r--r--include/linux/posix-timers.h4
-rw-r--r--include/linux/ring_buffer.h127
-rw-r--r--include/linux/sched.h84
-rw-r--r--include/linux/tick.h7
-rw-r--r--include/linux/time.h5
-rw-r--r--include/linux/timex.h11
-rw-r--r--include/linux/tracepoint.h137
-rw-r--r--include/trace/sched.h56
-rw-r--r--init/Kconfig15
-rw-r--r--init/main.c34
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/compat.c53
-rw-r--r--kernel/exit.c29
-rw-r--r--kernel/fork.c95
-rw-r--r--kernel/hrtimer.c15
-rw-r--r--kernel/irq/autoprobe.c43
-rw-r--r--kernel/irq/chip.c101
-rw-r--r--kernel/irq/handle.c27
-rw-r--r--kernel/irq/internals.h7
-rw-r--r--kernel/irq/manage.c123
-rw-r--r--kernel/irq/migration.c14
-rw-r--r--kernel/irq/proc.c45
-rw-r--r--kernel/irq/resend.c6
-rw-r--r--kernel/irq/spurious.c162
-rw-r--r--kernel/itimer.c33
-rw-r--r--kernel/kexec.c1
-rw-r--r--kernel/kthread.c5
-rw-r--r--kernel/marker.c36
-rw-r--r--kernel/module.c81
-rw-r--r--kernel/notifier.c2
-rw-r--r--kernel/posix-cpu-timers.c512
-rw-r--r--kernel/posix-timers.c153
-rw-r--r--kernel/rcutorture.c2
-rw-r--r--kernel/sched.c36
-rw-r--r--kernel/sched_fair.c1
-rw-r--r--kernel/sched_rt.c4
-rw-r--r--kernel/sched_stats.h86
-rw-r--r--kernel/signal.c11
-rw-r--r--kernel/softirq.c10
-rw-r--r--kernel/sys.c75
-rw-r--r--kernel/time/clocksource.c3
-rw-r--r--kernel/time/jiffies.c1
-rw-r--r--kernel/time/ntp.c93
-rw-r--r--kernel/time/tick-broadcast.c13
-rw-r--r--kernel/time/tick-internal.h2
-rw-r--r--kernel/time/tick-sched.c93
-rw-r--r--kernel/time/timekeeping.c122
-rw-r--r--kernel/time/timer_list.c20
-rw-r--r--kernel/timer.c11
-rw-r--r--kernel/trace/Kconfig64
-rw-r--r--kernel/trace/Makefile4
-rw-r--r--kernel/trace/ftrace.c275
-rw-r--r--kernel/trace/ring_buffer.c2014
-rw-r--r--kernel/trace/trace.c1845
-rw-r--r--kernel/trace/trace.h211
-rw-r--r--kernel/trace/trace_boot.c126
-rw-r--r--kernel/trace/trace_functions.c2
-rw-r--r--kernel/trace/trace_irqsoff.c19
-rw-r--r--kernel/trace/trace_mmiotrace.c116
-rw-r--r--kernel/trace/trace_nop.c64
-rw-r--r--kernel/trace/trace_sched_switch.c137
-rw-r--r--kernel/trace/trace_sched_wakeup.c148
-rw-r--r--kernel/trace/trace_selftest.c83
-rw-r--r--kernel/trace/trace_stack.c310
-rw-r--r--kernel/trace/trace_sysprof.c2
-rw-r--r--kernel/tracepoint.c477
-rw-r--r--mm/memory.c2
-rw-r--r--mm/tiny-shmem.c1
-rw-r--r--mm/vmalloc.c19
-rw-r--r--samples/Kconfig6
-rw-r--r--samples/Makefile2
-rw-r--r--samples/markers/probe-example.c1
-rw-r--r--samples/tracepoints/Makefile6
-rw-r--r--samples/tracepoints/tp-samples-trace.h13
-rw-r--r--samples/tracepoints/tracepoint-probe-sample.c55
-rw-r--r--samples/tracepoints/tracepoint-probe-sample2.c42
-rw-r--r--samples/tracepoints/tracepoint-sample.c53
-rw-r--r--scripts/Makefile.build7
-rw-r--r--scripts/bootgraph.pl24
-rwxr-xr-xscripts/checkpatch.pl2
-rwxr-xr-xscripts/recordmcount.pl395
-rw-r--r--security/selinux/hooks.c9
471 files changed, 13280 insertions, 10998 deletions
diff --git a/CREDITS b/CREDITS
index c62dcb3b7e26..2358846f06be 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1653,14 +1653,14 @@ S: Chapel Hill, North Carolina 27514-4818
1653S: USA 1653S: USA
1654 1654
1655N: Dave Jones 1655N: Dave Jones
1656E: davej@codemonkey.org.uk 1656E: davej@redhat.com
1657W: http://www.codemonkey.org.uk 1657W: http://www.codemonkey.org.uk
1658D: x86 errata/setup maintenance. 1658D: Assorted VIA x86 support.
1659D: AGPGART driver. 1659D: 2.5 AGPGART overhaul.
1660D: CPUFREQ maintenance. 1660D: CPUFREQ maintenance.
1661D: Backport/Forwardport merge monkey. 1661D: Fedora kernel maintainence.
1662D: Various Janitor work. 1662D: Misc/Other.
1663S: United Kingdom 1663S: 314 Littleton Rd, Westford, MA 01886, USA
1664 1664
1665N: Martin Josfsson 1665N: Martin Josfsson
1666E: gandalf@wlug.westbo.se 1666E: gandalf@wlug.westbo.se
diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl
index 4c63e5864160..ae15d55350ec 100644
--- a/Documentation/DocBook/kernel-hacking.tmpl
+++ b/Documentation/DocBook/kernel-hacking.tmpl
@@ -1105,7 +1105,7 @@ static struct block_device_operations opt_fops = {
1105 </listitem> 1105 </listitem>
1106 <listitem> 1106 <listitem>
1107 <para> 1107 <para>
1108 Function names as strings (__FUNCTION__). 1108 Function names as strings (__func__).
1109 </para> 1109 </para>
1110 </listitem> 1110 </listitem>
1111 <listitem> 1111 <listitem>
diff --git a/Documentation/MSI-HOWTO.txt b/Documentation/MSI-HOWTO.txt
index a51f693c1541..256defd7e174 100644
--- a/Documentation/MSI-HOWTO.txt
+++ b/Documentation/MSI-HOWTO.txt
@@ -236,10 +236,8 @@ software system can set different pages for controlling accesses to the
236MSI-X structure. The implementation of MSI support requires the PCI 236MSI-X structure. The implementation of MSI support requires the PCI
237subsystem, not a device driver, to maintain full control of the MSI-X 237subsystem, not a device driver, to maintain full control of the MSI-X
238table/MSI-X PBA (Pending Bit Array) and MMIO address space of the MSI-X 238table/MSI-X PBA (Pending Bit Array) and MMIO address space of the MSI-X
239table/MSI-X PBA. A device driver is prohibited from requesting the MMIO 239table/MSI-X PBA. A device driver should not access the MMIO address
240address space of the MSI-X table/MSI-X PBA. Otherwise, the PCI subsystem 240space of the MSI-X table/MSI-X PBA.
241will fail enabling MSI-X on its hardware device when it calls the function
242pci_enable_msix().
243 241
2445.3.2 API pci_enable_msix 2425.3.2 API pci_enable_msix
245 243
diff --git a/Documentation/PCI/pci.txt b/Documentation/PCI/pci.txt
index 8d4dc6250c58..fd4907a2968c 100644
--- a/Documentation/PCI/pci.txt
+++ b/Documentation/PCI/pci.txt
@@ -163,6 +163,10 @@ need pass only as many optional fields as necessary:
163 o class and classmask fields default to 0 163 o class and classmask fields default to 0
164 o driver_data defaults to 0UL. 164 o driver_data defaults to 0UL.
165 165
166Note that driver_data must match the value used by any of the pci_device_id
167entries defined in the driver. This makes the driver_data field mandatory
168if all the pci_device_id entries have a non-zero driver_data value.
169
166Once added, the driver probe routine will be invoked for any unclaimed 170Once added, the driver probe routine will be invoked for any unclaimed
167PCI devices listed in its (newly updated) pci_ids list. 171PCI devices listed in its (newly updated) pci_ids list.
168 172
diff --git a/Documentation/PCI/pcieaer-howto.txt b/Documentation/PCI/pcieaer-howto.txt
index 16c251230c82..ddeb14beacc8 100644
--- a/Documentation/PCI/pcieaer-howto.txt
+++ b/Documentation/PCI/pcieaer-howto.txt
@@ -203,22 +203,17 @@ to mmio_enabled.
203 203
2043.3 helper functions 2043.3 helper functions
205 205
2063.3.1 int pci_find_aer_capability(struct pci_dev *dev); 2063.3.1 int pci_enable_pcie_error_reporting(struct pci_dev *dev);
207pci_find_aer_capability locates the PCI Express AER capability
208in the device configuration space. If the device doesn't support
209PCI-Express AER, the function returns 0.
210
2113.3.2 int pci_enable_pcie_error_reporting(struct pci_dev *dev);
212pci_enable_pcie_error_reporting enables the device to send error 207pci_enable_pcie_error_reporting enables the device to send error
213messages to root port when an error is detected. Note that devices 208messages to root port when an error is detected. Note that devices
214don't enable the error reporting by default, so device drivers need 209don't enable the error reporting by default, so device drivers need
215call this function to enable it. 210call this function to enable it.
216 211
2173.3.3 int pci_disable_pcie_error_reporting(struct pci_dev *dev); 2123.3.2 int pci_disable_pcie_error_reporting(struct pci_dev *dev);
218pci_disable_pcie_error_reporting disables the device to send error 213pci_disable_pcie_error_reporting disables the device to send error
219messages to root port when an error is detected. 214messages to root port when an error is detected.
220 215
2213.3.4 int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev); 2163.3.3 int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
222pci_cleanup_aer_uncorrect_error_status cleanups the uncorrectable 217pci_cleanup_aer_uncorrect_error_status cleanups the uncorrectable
223error status register. 218error status register.
224 219
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 0f1544f67400..53ba7c7d82b3 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -101,6 +101,7 @@ parameter is applicable:
101 X86-64 X86-64 architecture is enabled. 101 X86-64 X86-64 architecture is enabled.
102 More X86-64 boot options can be found in 102 More X86-64 boot options can be found in
103 Documentation/x86_64/boot-options.txt . 103 Documentation/x86_64/boot-options.txt .
104 X86 Either 32bit or 64bit x86 (same as X86-32+X86-64)
104 105
105In addition, the following text indicates that the option: 106In addition, the following text indicates that the option:
106 107
@@ -1588,7 +1589,7 @@ and is between 256 and 4096 characters. It is defined in the file
1588 See also Documentation/paride.txt. 1589 See also Documentation/paride.txt.
1589 1590
1590 pci=option[,option...] [PCI] various PCI subsystem options: 1591 pci=option[,option...] [PCI] various PCI subsystem options:
1591 off [X86-32] don't probe for the PCI bus 1592 off [X86] don't probe for the PCI bus
1592 bios [X86-32] force use of PCI BIOS, don't access 1593 bios [X86-32] force use of PCI BIOS, don't access
1593 the hardware directly. Use this if your machine 1594 the hardware directly. Use this if your machine
1594 has a non-standard PCI host bridge. 1595 has a non-standard PCI host bridge.
@@ -1596,9 +1597,9 @@ and is between 256 and 4096 characters. It is defined in the file
1596 hardware access methods are allowed. Use this 1597 hardware access methods are allowed. Use this
1597 if you experience crashes upon bootup and you 1598 if you experience crashes upon bootup and you
1598 suspect they are caused by the BIOS. 1599 suspect they are caused by the BIOS.
1599 conf1 [X86-32] Force use of PCI Configuration 1600 conf1 [X86] Force use of PCI Configuration
1600 Mechanism 1. 1601 Mechanism 1.
1601 conf2 [X86-32] Force use of PCI Configuration 1602 conf2 [X86] Force use of PCI Configuration
1602 Mechanism 2. 1603 Mechanism 2.
1603 noaer [PCIE] If the PCIEAER kernel config parameter is 1604 noaer [PCIE] If the PCIEAER kernel config parameter is
1604 enabled, this kernel boot option can be used to 1605 enabled, this kernel boot option can be used to
@@ -1618,37 +1619,37 @@ and is between 256 and 4096 characters. It is defined in the file
1618 this option if the kernel is unable to allocate 1619 this option if the kernel is unable to allocate
1619 IRQs or discover secondary PCI buses on your 1620 IRQs or discover secondary PCI buses on your
1620 motherboard. 1621 motherboard.
1621 rom [X86-32] Assign address space to expansion ROMs. 1622 rom [X86] Assign address space to expansion ROMs.
1622 Use with caution as certain devices share 1623 Use with caution as certain devices share
1623 address decoders between ROMs and other 1624 address decoders between ROMs and other
1624 resources. 1625 resources.
1625 norom [X86-32,X86_64] Do not assign address space to 1626 norom [X86] Do not assign address space to
1626 expansion ROMs that do not already have 1627 expansion ROMs that do not already have
1627 BIOS assigned address ranges. 1628 BIOS assigned address ranges.
1628 irqmask=0xMMMM [X86-32] Set a bit mask of IRQs allowed to be 1629 irqmask=0xMMMM [X86] Set a bit mask of IRQs allowed to be
1629 assigned automatically to PCI devices. You can 1630 assigned automatically to PCI devices. You can
1630 make the kernel exclude IRQs of your ISA cards 1631 make the kernel exclude IRQs of your ISA cards
1631 this way. 1632 this way.
1632 pirqaddr=0xAAAAA [X86-32] Specify the physical address 1633 pirqaddr=0xAAAAA [X86] Specify the physical address
1633 of the PIRQ table (normally generated 1634 of the PIRQ table (normally generated
1634 by the BIOS) if it is outside the 1635 by the BIOS) if it is outside the
1635 F0000h-100000h range. 1636 F0000h-100000h range.
1636 lastbus=N [X86-32] Scan all buses thru bus #N. Can be 1637 lastbus=N [X86] Scan all buses thru bus #N. Can be
1637 useful if the kernel is unable to find your 1638 useful if the kernel is unable to find your
1638 secondary buses and you want to tell it 1639 secondary buses and you want to tell it
1639 explicitly which ones they are. 1640 explicitly which ones they are.
1640 assign-busses [X86-32] Always assign all PCI bus 1641 assign-busses [X86] Always assign all PCI bus
1641 numbers ourselves, overriding 1642 numbers ourselves, overriding
1642 whatever the firmware may have done. 1643 whatever the firmware may have done.
1643 usepirqmask [X86-32] Honor the possible IRQ mask stored 1644 usepirqmask [X86] Honor the possible IRQ mask stored
1644 in the BIOS $PIR table. This is needed on 1645 in the BIOS $PIR table. This is needed on
1645 some systems with broken BIOSes, notably 1646 some systems with broken BIOSes, notably
1646 some HP Pavilion N5400 and Omnibook XE3 1647 some HP Pavilion N5400 and Omnibook XE3
1647 notebooks. This will have no effect if ACPI 1648 notebooks. This will have no effect if ACPI
1648 IRQ routing is enabled. 1649 IRQ routing is enabled.
1649 noacpi [X86-32] Do not use ACPI for IRQ routing 1650 noacpi [X86] Do not use ACPI for IRQ routing
1650 or for PCI scanning. 1651 or for PCI scanning.
1651 use_crs [X86-32] Use _CRS for PCI resource 1652 use_crs [X86] Use _CRS for PCI resource
1652 allocation. 1653 allocation.
1653 routeirq Do IRQ routing for all PCI devices. 1654 routeirq Do IRQ routing for all PCI devices.
1654 This is normally done in pci_enable_device(), 1655 This is normally done in pci_enable_device(),
@@ -1677,6 +1678,12 @@ and is between 256 and 4096 characters. It is defined in the file
1677 reserved for the CardBus bridge's memory 1678 reserved for the CardBus bridge's memory
1678 window. The default value is 64 megabytes. 1679 window. The default value is 64 megabytes.
1679 1680
1681 pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power
1682 Management.
1683 off Disable ASPM.
1684 force Enable ASPM even on devices that claim not to support it.
1685 WARNING: Forcing ASPM on may cause system lockups.
1686
1680 pcmv= [HW,PCMCIA] BadgePAD 4 1687 pcmv= [HW,PCMCIA] BadgePAD 4
1681 1688
1682 pd. [PARIDE] 1689 pd. [PARIDE]
diff --git a/Documentation/markers.txt b/Documentation/markers.txt
index d9f50a19fa0c..089f6138fcd9 100644
--- a/Documentation/markers.txt
+++ b/Documentation/markers.txt
@@ -50,10 +50,12 @@ Connecting a function (probe) to a marker is done by providing a probe (function
50to call) for the specific marker through marker_probe_register() and can be 50to call) for the specific marker through marker_probe_register() and can be
51activated by calling marker_arm(). Marker deactivation can be done by calling 51activated by calling marker_arm(). Marker deactivation can be done by calling
52marker_disarm() as many times as marker_arm() has been called. Removing a probe 52marker_disarm() as many times as marker_arm() has been called. Removing a probe
53is done through marker_probe_unregister(); it will disarm the probe and make 53is done through marker_probe_unregister(); it will disarm the probe.
54sure there is no caller left using the probe when it returns. Probe removal is 54marker_synchronize_unregister() must be called before the end of the module exit
55preempt-safe because preemption is disabled around the probe call. See the 55function to make sure there is no caller left using the probe. This, and the
56"Probe example" section below for a sample probe module. 56fact that preemption is disabled around the probe call, make sure that probe
57removal and module unload are safe. See the "Probe example" section below for a
58sample probe module.
57 59
58The marker mechanism supports inserting multiple instances of the same marker. 60The marker mechanism supports inserting multiple instances of the same marker.
59Markers can be put in inline functions, inlined static functions, and 61Markers can be put in inline functions, inlined static functions, and
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index 49378a9f2b5f..10a0263ebb3f 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -95,8 +95,9 @@ On all - write a character to /proc/sysrq-trigger. e.g.:
95 95
96'p' - Will dump the current registers and flags to your console. 96'p' - Will dump the current registers and flags to your console.
97 97
98'q' - Will dump a list of all running hrtimers. 98'q' - Will dump per CPU lists of all armed hrtimers (but NOT regular
99 WARNING: Does not cover any other timers 99 timer_list timers) and detailed information about all
100 clockevent devices.
100 101
101'r' - Turns off keyboard raw mode and sets it to XLATE. 102'r' - Turns off keyboard raw mode and sets it to XLATE.
102 103
diff --git a/Documentation/tracepoints.txt b/Documentation/tracepoints.txt
new file mode 100644
index 000000000000..5d354e167494
--- /dev/null
+++ b/Documentation/tracepoints.txt
@@ -0,0 +1,101 @@
1 Using the Linux Kernel Tracepoints
2
3 Mathieu Desnoyers
4
5
6This document introduces Linux Kernel Tracepoints and their use. It provides
7examples of how to insert tracepoints in the kernel and connect probe functions
8to them and provides some examples of probe functions.
9
10
11* Purpose of tracepoints
12
13A tracepoint placed in code provides a hook to call a function (probe) that you
14can provide at runtime. A tracepoint can be "on" (a probe is connected to it) or
15"off" (no probe is attached). When a tracepoint is "off" it has no effect,
16except for adding a tiny time penalty (checking a condition for a branch) and
17space penalty (adding a few bytes for the function call at the end of the
18instrumented function and adds a data structure in a separate section). When a
19tracepoint is "on", the function you provide is called each time the tracepoint
20is executed, in the execution context of the caller. When the function provided
21ends its execution, it returns to the caller (continuing from the tracepoint
22site).
23
24You can put tracepoints at important locations in the code. They are
25lightweight hooks that can pass an arbitrary number of parameters,
26which prototypes are described in a tracepoint declaration placed in a header
27file.
28
29They can be used for tracing and performance accounting.
30
31
32* Usage
33
34Two elements are required for tracepoints :
35
36- A tracepoint definition, placed in a header file.
37- The tracepoint statement, in C code.
38
39In order to use tracepoints, you should include linux/tracepoint.h.
40
41In include/trace/subsys.h :
42
43#include <linux/tracepoint.h>
44
45DEFINE_TRACE(subsys_eventname,
46 TPPTOTO(int firstarg, struct task_struct *p),
47 TPARGS(firstarg, p));
48
49In subsys/file.c (where the tracing statement must be added) :
50
51#include <trace/subsys.h>
52
53void somefct(void)
54{
55 ...
56 trace_subsys_eventname(arg, task);
57 ...
58}
59
60Where :
61- subsys_eventname is an identifier unique to your event
62 - subsys is the name of your subsystem.
63 - eventname is the name of the event to trace.
64- TPPTOTO(int firstarg, struct task_struct *p) is the prototype of the function
65 called by this tracepoint.
66- TPARGS(firstarg, p) are the parameters names, same as found in the prototype.
67
68Connecting a function (probe) to a tracepoint is done by providing a probe
69(function to call) for the specific tracepoint through
70register_trace_subsys_eventname(). Removing a probe is done through
71unregister_trace_subsys_eventname(); it will remove the probe sure there is no
72caller left using the probe when it returns. Probe removal is preempt-safe
73because preemption is disabled around the probe call. See the "Probe example"
74section below for a sample probe module.
75
76The tracepoint mechanism supports inserting multiple instances of the same
77tracepoint, but a single definition must be made of a given tracepoint name over
78all the kernel to make sure no type conflict will occur. Name mangling of the
79tracepoints is done using the prototypes to make sure typing is correct.
80Verification of probe type correctness is done at the registration site by the
81compiler. Tracepoints can be put in inline functions, inlined static functions,
82and unrolled loops as well as regular functions.
83
84The naming scheme "subsys_event" is suggested here as a convention intended
85to limit collisions. Tracepoint names are global to the kernel: they are
86considered as being the same whether they are in the core kernel image or in
87modules.
88
89
90* Probe / tracepoint example
91
92See the example provided in samples/tracepoints/src
93
94Compile them with your kernel.
95
96Run, as root :
97modprobe tracepoint-example (insmod order is not important)
98modprobe tracepoint-probe-example
99cat /proc/tracepoint-example (returns an expected error)
100rmmod tracepoint-example tracepoint-probe-example
101dmesg
diff --git a/Documentation/tracers/mmiotrace.txt b/Documentation/tracers/mmiotrace.txt
index a4afb560a45b..5bbbe2096223 100644
--- a/Documentation/tracers/mmiotrace.txt
+++ b/Documentation/tracers/mmiotrace.txt
@@ -36,7 +36,7 @@ $ mount -t debugfs debugfs /debug
36$ echo mmiotrace > /debug/tracing/current_tracer 36$ echo mmiotrace > /debug/tracing/current_tracer
37$ cat /debug/tracing/trace_pipe > mydump.txt & 37$ cat /debug/tracing/trace_pipe > mydump.txt &
38Start X or whatever. 38Start X or whatever.
39$ echo "X is up" > /debug/tracing/marker 39$ echo "X is up" > /debug/tracing/trace_marker
40$ echo none > /debug/tracing/current_tracer 40$ echo none > /debug/tracing/current_tracer
41Check for lost events. 41Check for lost events.
42 42
@@ -59,9 +59,8 @@ The 'cat' process should stay running (sleeping) in the background.
59Load the driver you want to trace and use it. Mmiotrace will only catch MMIO 59Load the driver you want to trace and use it. Mmiotrace will only catch MMIO
60accesses to areas that are ioremapped while mmiotrace is active. 60accesses to areas that are ioremapped while mmiotrace is active.
61 61
62[Unimplemented feature:]
63During tracing you can place comments (markers) into the trace by 62During tracing you can place comments (markers) into the trace by
64$ echo "X is up" > /debug/tracing/marker 63$ echo "X is up" > /debug/tracing/trace_marker
65This makes it easier to see which part of the (huge) trace corresponds to 64This makes it easier to see which part of the (huge) trace corresponds to
66which action. It is recommended to place descriptive markers about what you 65which action. It is recommended to place descriptive markers about what you
67do. 66do.
diff --git a/MAINTAINERS b/MAINTAINERS
index 22303e5fe4ce..6d51f00dcdc0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1198,7 +1198,7 @@ S: Maintained
1198 1198
1199CPU FREQUENCY DRIVERS 1199CPU FREQUENCY DRIVERS
1200P: Dave Jones 1200P: Dave Jones
1201M: davej@codemonkey.org.uk 1201M: davej@redhat.com
1202L: cpufreq@vger.kernel.org 1202L: cpufreq@vger.kernel.org
1203W: http://www.codemonkey.org.uk/projects/cpufreq/ 1203W: http://www.codemonkey.org.uk/projects/cpufreq/
1204T: git kernel.org/pub/scm/linux/kernel/git/davej/cpufreq.git 1204T: git kernel.org/pub/scm/linux/kernel/git/davej/cpufreq.git
diff --git a/arch/alpha/kernel/sys_sable.c b/arch/alpha/kernel/sys_sable.c
index 99a7f19da13a..a4555f497639 100644
--- a/arch/alpha/kernel/sys_sable.c
+++ b/arch/alpha/kernel/sys_sable.c
@@ -47,7 +47,7 @@ typedef struct irq_swizzle_struct
47 47
48static irq_swizzle_t *sable_lynx_irq_swizzle; 48static irq_swizzle_t *sable_lynx_irq_swizzle;
49 49
50static void sable_lynx_init_irq(int nr_irqs); 50static void sable_lynx_init_irq(int nr_of_irqs);
51 51
52#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SABLE) 52#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SABLE)
53 53
@@ -530,11 +530,11 @@ sable_lynx_srm_device_interrupt(unsigned long vector)
530} 530}
531 531
532static void __init 532static void __init
533sable_lynx_init_irq(int nr_irqs) 533sable_lynx_init_irq(int nr_of_irqs)
534{ 534{
535 long i; 535 long i;
536 536
537 for (i = 0; i < nr_irqs; ++i) { 537 for (i = 0; i < nr_of_irqs; ++i) {
538 irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL; 538 irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL;
539 irq_desc[i].chip = &sable_lynx_irq_type; 539 irq_desc[i].chip = &sable_lynx_irq_type;
540 } 540 }
diff --git a/arch/arm/mach-iop13xx/include/mach/time.h b/arch/arm/mach-iop13xx/include/mach/time.h
index 49213d9d7cad..d6d52527589d 100644
--- a/arch/arm/mach-iop13xx/include/mach/time.h
+++ b/arch/arm/mach-iop13xx/include/mach/time.h
@@ -41,7 +41,7 @@ static inline unsigned long iop13xx_core_freq(void)
41 return 1200000000; 41 return 1200000000;
42 default: 42 default:
43 printk("%s: warning unknown frequency, defaulting to 800Mhz\n", 43 printk("%s: warning unknown frequency, defaulting to 800Mhz\n",
44 __FUNCTION__); 44 __func__);
45 } 45 }
46 46
47 return 800000000; 47 return 800000000;
@@ -60,7 +60,7 @@ static inline unsigned long iop13xx_xsi_bus_ratio(void)
60 return 4; 60 return 4;
61 default: 61 default:
62 printk("%s: warning unknown ratio, defaulting to 2\n", 62 printk("%s: warning unknown ratio, defaulting to 2\n",
63 __FUNCTION__); 63 __func__);
64 } 64 }
65 65
66 return 2; 66 return 2;
diff --git a/arch/arm/mach-ixp2000/ixdp2x00.c b/arch/arm/mach-ixp2000/ixdp2x00.c
index b0653a87159a..30451300751b 100644
--- a/arch/arm/mach-ixp2000/ixdp2x00.c
+++ b/arch/arm/mach-ixp2000/ixdp2x00.c
@@ -143,7 +143,7 @@ static struct irq_chip ixdp2x00_cpld_irq_chip = {
143 .unmask = ixdp2x00_irq_unmask 143 .unmask = ixdp2x00_irq_unmask
144}; 144};
145 145
146void __init ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigned long *mask_reg, unsigned long nr_irqs) 146void __init ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigned long *mask_reg, unsigned long nr_of_irqs)
147{ 147{
148 unsigned int irq; 148 unsigned int irq;
149 149
@@ -154,7 +154,7 @@ void __init ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigne
154 154
155 board_irq_stat = stat_reg; 155 board_irq_stat = stat_reg;
156 board_irq_mask = mask_reg; 156 board_irq_mask = mask_reg;
157 board_irq_count = nr_irqs; 157 board_irq_count = nr_of_irqs;
158 158
159 *board_irq_mask = 0xffffffff; 159 *board_irq_mask = 0xffffffff;
160 160
diff --git a/arch/arm/mach-omap2/irq.c b/arch/arm/mach-omap2/irq.c
index d354e0fe4477..c40fc378a251 100644
--- a/arch/arm/mach-omap2/irq.c
+++ b/arch/arm/mach-omap2/irq.c
@@ -119,7 +119,7 @@ static void __init omap_irq_bank_init_one(struct omap_irq_bank *bank)
119 119
120void __init omap_init_irq(void) 120void __init omap_init_irq(void)
121{ 121{
122 unsigned long nr_irqs = 0; 122 unsigned long nr_of_irqs = 0;
123 unsigned int nr_banks = 0; 123 unsigned int nr_banks = 0;
124 int i; 124 int i;
125 125
@@ -133,14 +133,14 @@ void __init omap_init_irq(void)
133 133
134 omap_irq_bank_init_one(bank); 134 omap_irq_bank_init_one(bank);
135 135
136 nr_irqs += bank->nr_irqs; 136 nr_of_irqs += bank->nr_irqs;
137 nr_banks++; 137 nr_banks++;
138 } 138 }
139 139
140 printk(KERN_INFO "Total of %ld interrupts on %d active controller%s\n", 140 printk(KERN_INFO "Total of %ld interrupts on %d active controller%s\n",
141 nr_irqs, nr_banks, nr_banks > 1 ? "s" : ""); 141 nr_of_irqs, nr_banks, nr_banks > 1 ? "s" : "");
142 142
143 for (i = 0; i < nr_irqs; i++) { 143 for (i = 0; i < nr_of_irqs; i++) {
144 set_irq_chip(i, &omap_irq_chip); 144 set_irq_chip(i, &omap_irq_chip);
145 set_irq_handler(i, handle_level_irq); 145 set_irq_handler(i, handle_level_irq);
146 set_irq_flags(i, IRQF_VALID); 146 set_irq_flags(i, IRQF_VALID);
diff --git a/arch/arm/mach-pxa/include/mach/zylonite.h b/arch/arm/mach-pxa/include/mach/zylonite.h
index 0d35ca04731e..bf6785adccf4 100644
--- a/arch/arm/mach-pxa/include/mach/zylonite.h
+++ b/arch/arm/mach-pxa/include/mach/zylonite.h
@@ -30,7 +30,7 @@ extern void zylonite_pxa300_init(void);
30static inline void zylonite_pxa300_init(void) 30static inline void zylonite_pxa300_init(void)
31{ 31{
32 if (cpu_is_pxa300() || cpu_is_pxa310()) 32 if (cpu_is_pxa300() || cpu_is_pxa310())
33 panic("%s: PXA300/PXA310 not supported\n", __FUNCTION__); 33 panic("%s: PXA300/PXA310 not supported\n", __func__);
34} 34}
35#endif 35#endif
36 36
@@ -40,7 +40,7 @@ extern void zylonite_pxa320_init(void);
40static inline void zylonite_pxa320_init(void) 40static inline void zylonite_pxa320_init(void)
41{ 41{
42 if (cpu_is_pxa320()) 42 if (cpu_is_pxa320())
43 panic("%s: PXA320 not supported\n", __FUNCTION__); 43 panic("%s: PXA320 not supported\n", __func__);
44} 44}
45#endif 45#endif
46 46
diff --git a/arch/arm/mach-sa1100/include/mach/ide.h b/arch/arm/mach-sa1100/include/mach/ide.h
deleted file mode 100644
index 4c99c8f5e617..000000000000
--- a/arch/arm/mach-sa1100/include/mach/ide.h
+++ /dev/null
@@ -1,75 +0,0 @@
1/*
2 * arch/arm/mach-sa1100/include/mach/ide.h
3 *
4 * Copyright (c) 1998 Hugo Fiennes & Nicolas Pitre
5 *
6 * 18-aug-2000: Cleanup by Erik Mouw (J.A.K.Mouw@its.tudelft.nl)
7 * Get rid of the special ide_init_hwif_ports() functions
8 * and make a generalised function that can be used by all
9 * architectures.
10 */
11
12#include <asm/irq.h>
13#include <mach/hardware.h>
14#include <asm/mach-types.h>
15
16#error "This code is broken and needs update to match with current ide support"
17
18
19/*
20 * Set up a hw structure for a specified data port, control port and IRQ.
21 * This should follow whatever the default interface uses.
22 */
23static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port,
24 unsigned long ctrl_port, int *irq)
25{
26 unsigned long reg = data_port;
27 int i;
28 int regincr = 1;
29
30 /* The Empeg board has the first two address lines unused */
31 if (machine_is_empeg())
32 regincr = 1 << 2;
33
34 /* The LART doesn't use A0 for IDE */
35 if (machine_is_lart())
36 regincr = 1 << 1;
37
38 memset(hw, 0, sizeof(*hw));
39
40 for (i = 0; i <= 7; i++) {
41 hw->io_ports_array[i] = reg;
42 reg += regincr;
43 }
44
45 hw->io_ports.ctl_addr = ctrl_port;
46
47 if (irq)
48 *irq = 0;
49}
50
51/*
52 * This registers the standard ports for this architecture with the IDE
53 * driver.
54 */
55static __inline__ void
56ide_init_default_hwifs(void)
57{
58 if (machine_is_lart()) {
59#ifdef CONFIG_SA1100_LART
60 hw_regs_t hw;
61
62 /* Enable GPIO as interrupt line */
63 GPDR &= ~LART_GPIO_IDE;
64 set_irq_type(LART_IRQ_IDE, IRQ_TYPE_EDGE_RISING);
65
66 /* set PCMCIA interface timing */
67 MECR = 0x00060006;
68
69 /* init the interface */
70 ide_init_hwif_ports(&hw, PCMCIA_IO_0_BASE + 0x0000, PCMCIA_IO_0_BASE + 0x1000, NULL);
71 hw.irq = LART_IRQ_IDE;
72 ide_register_hw(&hw);
73#endif
74 }
75}
diff --git a/arch/avr32/mach-at32ap/extint.c b/arch/avr32/mach-at32ap/extint.c
index c36a6d59d6f0..310477ba1bbf 100644
--- a/arch/avr32/mach-at32ap/extint.c
+++ b/arch/avr32/mach-at32ap/extint.c
@@ -191,7 +191,7 @@ static int __init eic_probe(struct platform_device *pdev)
191 struct eic *eic; 191 struct eic *eic;
192 struct resource *regs; 192 struct resource *regs;
193 unsigned int i; 193 unsigned int i;
194 unsigned int nr_irqs; 194 unsigned int nr_of_irqs;
195 unsigned int int_irq; 195 unsigned int int_irq;
196 int ret; 196 int ret;
197 u32 pattern; 197 u32 pattern;
@@ -224,7 +224,7 @@ static int __init eic_probe(struct platform_device *pdev)
224 eic_writel(eic, IDR, ~0UL); 224 eic_writel(eic, IDR, ~0UL);
225 eic_writel(eic, MODE, ~0UL); 225 eic_writel(eic, MODE, ~0UL);
226 pattern = eic_readl(eic, MODE); 226 pattern = eic_readl(eic, MODE);
227 nr_irqs = fls(pattern); 227 nr_of_irqs = fls(pattern);
228 228
229 /* Trigger on low level unless overridden by driver */ 229 /* Trigger on low level unless overridden by driver */
230 eic_writel(eic, EDGE, 0UL); 230 eic_writel(eic, EDGE, 0UL);
@@ -232,7 +232,7 @@ static int __init eic_probe(struct platform_device *pdev)
232 232
233 eic->chip = &eic_chip; 233 eic->chip = &eic_chip;
234 234
235 for (i = 0; i < nr_irqs; i++) { 235 for (i = 0; i < nr_of_irqs; i++) {
236 set_irq_chip_and_handler(eic->first_irq + i, &eic_chip, 236 set_irq_chip_and_handler(eic->first_irq + i, &eic_chip,
237 handle_level_irq); 237 handle_level_irq);
238 set_irq_chip_data(eic->first_irq + i, eic); 238 set_irq_chip_data(eic->first_irq + i, eic);
@@ -256,7 +256,7 @@ static int __init eic_probe(struct platform_device *pdev)
256 eic->regs, int_irq); 256 eic->regs, int_irq);
257 dev_info(&pdev->dev, 257 dev_info(&pdev->dev,
258 "Handling %u external IRQs, starting with IRQ %u\n", 258 "Handling %u external IRQs, starting with IRQ %u\n",
259 nr_irqs, eic->first_irq); 259 nr_of_irqs, eic->first_irq);
260 260
261 return 0; 261 return 0;
262 262
diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h
index 0149097b736d..ce342fb74246 100644
--- a/arch/ia64/include/asm/pci.h
+++ b/arch/ia64/include/asm/pci.h
@@ -95,16 +95,8 @@ extern int pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma,
95 enum pci_mmap_state mmap_state, int write_combine); 95 enum pci_mmap_state mmap_state, int write_combine);
96#define HAVE_PCI_LEGACY 96#define HAVE_PCI_LEGACY
97extern int pci_mmap_legacy_page_range(struct pci_bus *bus, 97extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
98 struct vm_area_struct *vma); 98 struct vm_area_struct *vma,
99extern ssize_t pci_read_legacy_io(struct kobject *kobj, 99 enum pci_mmap_state mmap_state);
100 struct bin_attribute *bin_attr,
101 char *buf, loff_t off, size_t count);
102extern ssize_t pci_write_legacy_io(struct kobject *kobj,
103 struct bin_attribute *bin_attr,
104 char *buf, loff_t off, size_t count);
105extern int pci_mmap_legacy_mem(struct kobject *kobj,
106 struct bin_attribute *attr,
107 struct vm_area_struct *vma);
108 100
109#define pci_get_legacy_mem platform_pci_get_legacy_mem 101#define pci_get_legacy_mem platform_pci_get_legacy_mem
110#define pci_legacy_read platform_pci_legacy_read 102#define pci_legacy_read platform_pci_legacy_read
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 7545037a8625..211fcfd115f9 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -614,12 +614,17 @@ char *ia64_pci_get_legacy_mem(struct pci_bus *bus)
614 * vector to get the base address. 614 * vector to get the base address.
615 */ 615 */
616int 616int
617pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma) 617pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma,
618 enum pci_mmap_state mmap_state)
618{ 619{
619 unsigned long size = vma->vm_end - vma->vm_start; 620 unsigned long size = vma->vm_end - vma->vm_start;
620 pgprot_t prot; 621 pgprot_t prot;
621 char *addr; 622 char *addr;
622 623
624 /* We only support mmap'ing of legacy memory space */
625 if (mmap_state != pci_mmap_mem)
626 return -ENOSYS;
627
623 /* 628 /*
624 * Avoid attribute aliasing. See Documentation/ia64/aliasing.txt 629 * Avoid attribute aliasing. See Documentation/ia64/aliasing.txt
625 * for more details. 630 * for more details.
diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c
index fc2994811f15..39cb6da72dcb 100644
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c
@@ -40,6 +40,7 @@
40 */ 40 */
41 41
42#include <linux/module.h> 42#include <linux/module.h>
43#include <linux/cpu.h>
43#include <linux/init.h> 44#include <linux/init.h>
44#include <linux/kernel.h> 45#include <linux/kernel.h>
45#include <linux/mm.h> 46#include <linux/mm.h>
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 2bd1f6ef5db0..644a70b1b04e 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -9,6 +9,8 @@ config PARISC
9 def_bool y 9 def_bool y
10 select HAVE_IDE 10 select HAVE_IDE
11 select HAVE_OPROFILE 11 select HAVE_OPROFILE
12 select RTC_CLASS
13 select RTC_DRV_PARISC
12 help 14 help
13 The PA-RISC microprocessor is designed by Hewlett-Packard and used 15 The PA-RISC microprocessor is designed by Hewlett-Packard and used
14 in many of their workstations & servers (HP9000 700 and 800 series, 16 in many of their workstations & servers (HP9000 700 and 800 series,
diff --git a/include/asm-parisc/Kbuild b/arch/parisc/include/asm/Kbuild
index f88b252e419c..f88b252e419c 100644
--- a/include/asm-parisc/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
diff --git a/include/asm-parisc/agp.h b/arch/parisc/include/asm/agp.h
index 9651660da639..9651660da639 100644
--- a/include/asm-parisc/agp.h
+++ b/arch/parisc/include/asm/agp.h
diff --git a/include/asm-parisc/asmregs.h b/arch/parisc/include/asm/asmregs.h
index d93c646e1887..d93c646e1887 100644
--- a/include/asm-parisc/asmregs.h
+++ b/arch/parisc/include/asm/asmregs.h
diff --git a/include/asm-parisc/assembly.h b/arch/parisc/include/asm/assembly.h
index ffb208840ecc..ffb208840ecc 100644
--- a/include/asm-parisc/assembly.h
+++ b/arch/parisc/include/asm/assembly.h
diff --git a/include/asm-parisc/atomic.h b/arch/parisc/include/asm/atomic.h
index 57fcc4a5ebb4..57fcc4a5ebb4 100644
--- a/include/asm-parisc/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
diff --git a/include/asm-parisc/auxvec.h b/arch/parisc/include/asm/auxvec.h
index 9c3ac4b89dc9..9c3ac4b89dc9 100644
--- a/include/asm-parisc/auxvec.h
+++ b/arch/parisc/include/asm/auxvec.h
diff --git a/include/asm-parisc/bitops.h b/arch/parisc/include/asm/bitops.h
index 7a6ea10bd231..7a6ea10bd231 100644
--- a/include/asm-parisc/bitops.h
+++ b/arch/parisc/include/asm/bitops.h
diff --git a/include/asm-parisc/bug.h b/arch/parisc/include/asm/bug.h
index 8cfc553fc837..8cfc553fc837 100644
--- a/include/asm-parisc/bug.h
+++ b/arch/parisc/include/asm/bug.h
diff --git a/include/asm-parisc/bugs.h b/arch/parisc/include/asm/bugs.h
index 9e6284342a5f..9e6284342a5f 100644
--- a/include/asm-parisc/bugs.h
+++ b/arch/parisc/include/asm/bugs.h
diff --git a/include/asm-parisc/byteorder.h b/arch/parisc/include/asm/byteorder.h
index db148313de5d..db148313de5d 100644
--- a/include/asm-parisc/byteorder.h
+++ b/arch/parisc/include/asm/byteorder.h
diff --git a/include/asm-parisc/cache.h b/arch/parisc/include/asm/cache.h
index 32c2cca74345..32c2cca74345 100644
--- a/include/asm-parisc/cache.h
+++ b/arch/parisc/include/asm/cache.h
diff --git a/include/asm-parisc/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index b7ca6dc7fddc..b7ca6dc7fddc 100644
--- a/include/asm-parisc/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
diff --git a/include/asm-parisc/checksum.h b/arch/parisc/include/asm/checksum.h
index e9639ccc3fce..e9639ccc3fce 100644
--- a/include/asm-parisc/checksum.h
+++ b/arch/parisc/include/asm/checksum.h
diff --git a/include/asm-parisc/compat.h b/arch/parisc/include/asm/compat.h
index 7f32611a7a5e..7f32611a7a5e 100644
--- a/include/asm-parisc/compat.h
+++ b/arch/parisc/include/asm/compat.h
diff --git a/include/asm-parisc/compat_rt_sigframe.h b/arch/parisc/include/asm/compat_rt_sigframe.h
index 81bec28bdc48..81bec28bdc48 100644
--- a/include/asm-parisc/compat_rt_sigframe.h
+++ b/arch/parisc/include/asm/compat_rt_sigframe.h
diff --git a/include/asm-parisc/compat_signal.h b/arch/parisc/include/asm/compat_signal.h
index 6ad02c360b21..6ad02c360b21 100644
--- a/include/asm-parisc/compat_signal.h
+++ b/arch/parisc/include/asm/compat_signal.h
diff --git a/include/asm-parisc/compat_ucontext.h b/arch/parisc/include/asm/compat_ucontext.h
index 2f7292afde3c..2f7292afde3c 100644
--- a/include/asm-parisc/compat_ucontext.h
+++ b/arch/parisc/include/asm/compat_ucontext.h
diff --git a/include/asm-parisc/cputime.h b/arch/parisc/include/asm/cputime.h
index dcdf2fbd7e72..dcdf2fbd7e72 100644
--- a/include/asm-parisc/cputime.h
+++ b/arch/parisc/include/asm/cputime.h
diff --git a/include/asm-parisc/current.h b/arch/parisc/include/asm/current.h
index 0fb9338e3bf2..0fb9338e3bf2 100644
--- a/include/asm-parisc/current.h
+++ b/arch/parisc/include/asm/current.h
diff --git a/include/asm-parisc/delay.h b/arch/parisc/include/asm/delay.h
index 7a75e984674b..7a75e984674b 100644
--- a/include/asm-parisc/delay.h
+++ b/arch/parisc/include/asm/delay.h
diff --git a/include/asm-parisc/device.h b/arch/parisc/include/asm/device.h
index d8f9872b0e2d..d8f9872b0e2d 100644
--- a/include/asm-parisc/device.h
+++ b/arch/parisc/include/asm/device.h
diff --git a/include/asm-parisc/div64.h b/arch/parisc/include/asm/div64.h
index 6cd978cefb28..6cd978cefb28 100644
--- a/include/asm-parisc/div64.h
+++ b/arch/parisc/include/asm/div64.h
diff --git a/include/asm-parisc/dma-mapping.h b/arch/parisc/include/asm/dma-mapping.h
index 53af696f23d2..53af696f23d2 100644
--- a/include/asm-parisc/dma-mapping.h
+++ b/arch/parisc/include/asm/dma-mapping.h
diff --git a/include/asm-parisc/dma.h b/arch/parisc/include/asm/dma.h
index 31ad0f05af3d..31ad0f05af3d 100644
--- a/include/asm-parisc/dma.h
+++ b/arch/parisc/include/asm/dma.h
diff --git a/include/asm-parisc/eisa_bus.h b/arch/parisc/include/asm/eisa_bus.h
index 201085f83dd5..201085f83dd5 100644
--- a/include/asm-parisc/eisa_bus.h
+++ b/arch/parisc/include/asm/eisa_bus.h
diff --git a/include/asm-parisc/eisa_eeprom.h b/arch/parisc/include/asm/eisa_eeprom.h
index 9c9da980402a..9c9da980402a 100644
--- a/include/asm-parisc/eisa_eeprom.h
+++ b/arch/parisc/include/asm/eisa_eeprom.h
diff --git a/include/asm-parisc/elf.h b/arch/parisc/include/asm/elf.h
index 7fa675799e6d..7fa675799e6d 100644
--- a/include/asm-parisc/elf.h
+++ b/arch/parisc/include/asm/elf.h
diff --git a/include/asm-parisc/emergency-restart.h b/arch/parisc/include/asm/emergency-restart.h
index 108d8c48e42e..108d8c48e42e 100644
--- a/include/asm-parisc/emergency-restart.h
+++ b/arch/parisc/include/asm/emergency-restart.h
diff --git a/include/asm-parisc/errno.h b/arch/parisc/include/asm/errno.h
index e2f3ddc796be..e2f3ddc796be 100644
--- a/include/asm-parisc/errno.h
+++ b/arch/parisc/include/asm/errno.h
diff --git a/include/asm-parisc/fb.h b/arch/parisc/include/asm/fb.h
index 4d503a023ab2..4d503a023ab2 100644
--- a/include/asm-parisc/fb.h
+++ b/arch/parisc/include/asm/fb.h
diff --git a/include/asm-parisc/fcntl.h b/arch/parisc/include/asm/fcntl.h
index 1e1c824764ee..1e1c824764ee 100644
--- a/include/asm-parisc/fcntl.h
+++ b/arch/parisc/include/asm/fcntl.h
diff --git a/include/asm-parisc/fixmap.h b/arch/parisc/include/asm/fixmap.h
index de3fe3a18229..de3fe3a18229 100644
--- a/include/asm-parisc/fixmap.h
+++ b/arch/parisc/include/asm/fixmap.h
diff --git a/include/asm-parisc/floppy.h b/arch/parisc/include/asm/floppy.h
index 4ca69f558fae..4ca69f558fae 100644
--- a/include/asm-parisc/floppy.h
+++ b/arch/parisc/include/asm/floppy.h
diff --git a/include/asm-parisc/futex.h b/arch/parisc/include/asm/futex.h
index 0c705c3a55ef..0c705c3a55ef 100644
--- a/include/asm-parisc/futex.h
+++ b/arch/parisc/include/asm/futex.h
diff --git a/include/asm-parisc/grfioctl.h b/arch/parisc/include/asm/grfioctl.h
index 671e06042b40..671e06042b40 100644
--- a/include/asm-parisc/grfioctl.h
+++ b/arch/parisc/include/asm/grfioctl.h
diff --git a/include/asm-parisc/hardirq.h b/arch/parisc/include/asm/hardirq.h
index ce93133d5112..ce93133d5112 100644
--- a/include/asm-parisc/hardirq.h
+++ b/arch/parisc/include/asm/hardirq.h
diff --git a/include/asm-parisc/hardware.h b/arch/parisc/include/asm/hardware.h
index 4e9626836bab..4e9626836bab 100644
--- a/include/asm-parisc/hardware.h
+++ b/arch/parisc/include/asm/hardware.h
diff --git a/include/asm-parisc/hw_irq.h b/arch/parisc/include/asm/hw_irq.h
index 6707f7df3921..6707f7df3921 100644
--- a/include/asm-parisc/hw_irq.h
+++ b/arch/parisc/include/asm/hw_irq.h
diff --git a/include/asm-parisc/ide.h b/arch/parisc/include/asm/ide.h
index c246ef75017d..81700a2321cf 100644
--- a/include/asm-parisc/ide.h
+++ b/arch/parisc/include/asm/ide.h
@@ -13,10 +13,6 @@
13 13
14#ifdef __KERNEL__ 14#ifdef __KERNEL__
15 15
16#define ide_request_irq(irq,hand,flg,dev,id) request_irq((irq),(hand),(flg),(dev),(id))
17#define ide_free_irq(irq,dev_id) free_irq((irq), (dev_id))
18#define ide_request_region(from,extent,name) request_region((from), (extent), (name))
19#define ide_release_region(from,extent) release_region((from), (extent))
20/* Generic I/O and MEMIO string operations. */ 16/* Generic I/O and MEMIO string operations. */
21 17
22#define __ide_insw insw 18#define __ide_insw insw
diff --git a/include/asm-parisc/io.h b/arch/parisc/include/asm/io.h
index 55ddb1842107..55ddb1842107 100644
--- a/include/asm-parisc/io.h
+++ b/arch/parisc/include/asm/io.h
diff --git a/include/asm-parisc/ioctl.h b/arch/parisc/include/asm/ioctl.h
index ec8efa02beda..ec8efa02beda 100644
--- a/include/asm-parisc/ioctl.h
+++ b/arch/parisc/include/asm/ioctl.h
diff --git a/include/asm-parisc/ioctls.h b/arch/parisc/include/asm/ioctls.h
index 6747fad07a3e..6747fad07a3e 100644
--- a/include/asm-parisc/ioctls.h
+++ b/arch/parisc/include/asm/ioctls.h
diff --git a/include/asm-parisc/ipcbuf.h b/arch/parisc/include/asm/ipcbuf.h
index bd956c425785..bd956c425785 100644
--- a/include/asm-parisc/ipcbuf.h
+++ b/arch/parisc/include/asm/ipcbuf.h
diff --git a/include/asm-parisc/irq.h b/arch/parisc/include/asm/irq.h
index 399c81981ed5..399c81981ed5 100644
--- a/include/asm-parisc/irq.h
+++ b/arch/parisc/include/asm/irq.h
diff --git a/include/asm-parisc/irq_regs.h b/arch/parisc/include/asm/irq_regs.h
index 3dd9c0b70270..3dd9c0b70270 100644
--- a/include/asm-parisc/irq_regs.h
+++ b/arch/parisc/include/asm/irq_regs.h
diff --git a/include/asm-parisc/kdebug.h b/arch/parisc/include/asm/kdebug.h
index 6ece1b037665..6ece1b037665 100644
--- a/include/asm-parisc/kdebug.h
+++ b/arch/parisc/include/asm/kdebug.h
diff --git a/include/asm-parisc/kmap_types.h b/arch/parisc/include/asm/kmap_types.h
index 806aae3c5338..806aae3c5338 100644
--- a/include/asm-parisc/kmap_types.h
+++ b/arch/parisc/include/asm/kmap_types.h
diff --git a/include/asm-parisc/led.h b/arch/parisc/include/asm/led.h
index c3405ab9d60a..c3405ab9d60a 100644
--- a/include/asm-parisc/led.h
+++ b/arch/parisc/include/asm/led.h
diff --git a/include/asm-parisc/linkage.h b/arch/parisc/include/asm/linkage.h
index 0b19a7242d0c..0b19a7242d0c 100644
--- a/include/asm-parisc/linkage.h
+++ b/arch/parisc/include/asm/linkage.h
diff --git a/include/asm-parisc/local.h b/arch/parisc/include/asm/local.h
index c11c530f74d0..c11c530f74d0 100644
--- a/include/asm-parisc/local.h
+++ b/arch/parisc/include/asm/local.h
diff --git a/include/asm-parisc/machdep.h b/arch/parisc/include/asm/machdep.h
index a231c97d703e..a231c97d703e 100644
--- a/include/asm-parisc/machdep.h
+++ b/arch/parisc/include/asm/machdep.h
diff --git a/include/asm-parisc/mc146818rtc.h b/arch/parisc/include/asm/mc146818rtc.h
index adf41631449f..adf41631449f 100644
--- a/include/asm-parisc/mc146818rtc.h
+++ b/arch/parisc/include/asm/mc146818rtc.h
diff --git a/include/asm-parisc/mckinley.h b/arch/parisc/include/asm/mckinley.h
index d1ea6f12915e..d1ea6f12915e 100644
--- a/include/asm-parisc/mckinley.h
+++ b/arch/parisc/include/asm/mckinley.h
diff --git a/include/asm-parisc/mman.h b/arch/parisc/include/asm/mman.h
index defe752cc996..defe752cc996 100644
--- a/include/asm-parisc/mman.h
+++ b/arch/parisc/include/asm/mman.h
diff --git a/include/asm-parisc/mmu.h b/arch/parisc/include/asm/mmu.h
index 6a310cf8b734..6a310cf8b734 100644
--- a/include/asm-parisc/mmu.h
+++ b/arch/parisc/include/asm/mmu.h
diff --git a/include/asm-parisc/mmu_context.h b/arch/parisc/include/asm/mmu_context.h
index 85856c74ad1d..85856c74ad1d 100644
--- a/include/asm-parisc/mmu_context.h
+++ b/arch/parisc/include/asm/mmu_context.h
diff --git a/include/asm-parisc/mmzone.h b/arch/parisc/include/asm/mmzone.h
index 9608d2cf214a..9608d2cf214a 100644
--- a/include/asm-parisc/mmzone.h
+++ b/arch/parisc/include/asm/mmzone.h
diff --git a/include/asm-parisc/module.h b/arch/parisc/include/asm/module.h
index c2cb49e934c1..c2cb49e934c1 100644
--- a/include/asm-parisc/module.h
+++ b/arch/parisc/include/asm/module.h
diff --git a/include/asm-parisc/msgbuf.h b/arch/parisc/include/asm/msgbuf.h
index fe88f2649418..fe88f2649418 100644
--- a/include/asm-parisc/msgbuf.h
+++ b/arch/parisc/include/asm/msgbuf.h
diff --git a/include/asm-parisc/mutex.h b/arch/parisc/include/asm/mutex.h
index 458c1f7fbc18..458c1f7fbc18 100644
--- a/include/asm-parisc/mutex.h
+++ b/arch/parisc/include/asm/mutex.h
diff --git a/include/asm-parisc/page.h b/arch/parisc/include/asm/page.h
index c3941f09a878..c3941f09a878 100644
--- a/include/asm-parisc/page.h
+++ b/arch/parisc/include/asm/page.h
diff --git a/include/asm-parisc/param.h b/arch/parisc/include/asm/param.h
index 32e03d877858..32e03d877858 100644
--- a/include/asm-parisc/param.h
+++ b/arch/parisc/include/asm/param.h
diff --git a/include/asm-parisc/parisc-device.h b/arch/parisc/include/asm/parisc-device.h
index 7aa13f2add7a..7aa13f2add7a 100644
--- a/include/asm-parisc/parisc-device.h
+++ b/arch/parisc/include/asm/parisc-device.h
diff --git a/include/asm-parisc/parport.h b/arch/parisc/include/asm/parport.h
index 00d9cc3e7b97..00d9cc3e7b97 100644
--- a/include/asm-parisc/parport.h
+++ b/arch/parisc/include/asm/parport.h
diff --git a/include/asm-parisc/pci.h b/arch/parisc/include/asm/pci.h
index 4ba868f44a5e..4ba868f44a5e 100644
--- a/include/asm-parisc/pci.h
+++ b/arch/parisc/include/asm/pci.h
diff --git a/include/asm-parisc/pdc.h b/arch/parisc/include/asm/pdc.h
index 9eaa794c3e4a..c584b00c6074 100644
--- a/include/asm-parisc/pdc.h
+++ b/arch/parisc/include/asm/pdc.h
@@ -332,6 +332,9 @@
332#define BOOT_CONSOLE_SPA_OFFSET 0x3c4 332#define BOOT_CONSOLE_SPA_OFFSET 0x3c4
333#define BOOT_CONSOLE_PATH_OFFSET 0x3a8 333#define BOOT_CONSOLE_PATH_OFFSET 0x3a8
334 334
335/* size of the pdc_result buffer for firmware.c */
336#define NUM_PDC_RESULT 32
337
335#if !defined(__ASSEMBLY__) 338#if !defined(__ASSEMBLY__)
336#ifdef __KERNEL__ 339#ifdef __KERNEL__
337 340
@@ -600,6 +603,7 @@ int pdc_chassis_info(struct pdc_chassis_info *chassis_info, void *led_info, unsi
600int pdc_chassis_disp(unsigned long disp); 603int pdc_chassis_disp(unsigned long disp);
601int pdc_chassis_warn(unsigned long *warn); 604int pdc_chassis_warn(unsigned long *warn);
602int pdc_coproc_cfg(struct pdc_coproc_cfg *pdc_coproc_info); 605int pdc_coproc_cfg(struct pdc_coproc_cfg *pdc_coproc_info);
606int pdc_coproc_cfg_unlocked(struct pdc_coproc_cfg *pdc_coproc_info);
603int pdc_iodc_read(unsigned long *actcnt, unsigned long hpa, unsigned int index, 607int pdc_iodc_read(unsigned long *actcnt, unsigned long hpa, unsigned int index,
604 void *iodc_data, unsigned int iodc_data_size); 608 void *iodc_data, unsigned int iodc_data_size);
605int pdc_system_map_find_mods(struct pdc_system_map_mod_info *pdc_mod_info, 609int pdc_system_map_find_mods(struct pdc_system_map_mod_info *pdc_mod_info,
@@ -638,6 +642,7 @@ int pdc_mem_mem_table(struct pdc_memory_table_raddr *r_addr,
638#endif 642#endif
639 643
640void set_firmware_width(void); 644void set_firmware_width(void);
645void set_firmware_width_unlocked(void);
641int pdc_do_firm_test_reset(unsigned long ftc_bitmap); 646int pdc_do_firm_test_reset(unsigned long ftc_bitmap);
642int pdc_do_reset(void); 647int pdc_do_reset(void);
643int pdc_soft_power_info(unsigned long *power_reg); 648int pdc_soft_power_info(unsigned long *power_reg);
diff --git a/include/asm-parisc/pdc_chassis.h b/arch/parisc/include/asm/pdc_chassis.h
index a609273dc6bf..a609273dc6bf 100644
--- a/include/asm-parisc/pdc_chassis.h
+++ b/arch/parisc/include/asm/pdc_chassis.h
diff --git a/include/asm-parisc/pdcpat.h b/arch/parisc/include/asm/pdcpat.h
index 47539f117958..47539f117958 100644
--- a/include/asm-parisc/pdcpat.h
+++ b/arch/parisc/include/asm/pdcpat.h
diff --git a/include/asm-parisc/percpu.h b/arch/parisc/include/asm/percpu.h
index a0dcd1970128..a0dcd1970128 100644
--- a/include/asm-parisc/percpu.h
+++ b/arch/parisc/include/asm/percpu.h
diff --git a/include/asm-parisc/perf.h b/arch/parisc/include/asm/perf.h
index a18e11972c09..a18e11972c09 100644
--- a/include/asm-parisc/perf.h
+++ b/arch/parisc/include/asm/perf.h
diff --git a/include/asm-parisc/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index fc987a1c12a8..fc987a1c12a8 100644
--- a/include/asm-parisc/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
diff --git a/include/asm-parisc/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 470a4b88124d..470a4b88124d 100644
--- a/include/asm-parisc/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
diff --git a/include/asm-parisc/poll.h b/arch/parisc/include/asm/poll.h
index c98509d3149e..c98509d3149e 100644
--- a/include/asm-parisc/poll.h
+++ b/arch/parisc/include/asm/poll.h
diff --git a/include/asm-parisc/posix_types.h b/arch/parisc/include/asm/posix_types.h
index bb725a6630bb..bb725a6630bb 100644
--- a/include/asm-parisc/posix_types.h
+++ b/arch/parisc/include/asm/posix_types.h
diff --git a/include/asm-parisc/prefetch.h b/arch/parisc/include/asm/prefetch.h
index c5edc60c059f..c5edc60c059f 100644
--- a/include/asm-parisc/prefetch.h
+++ b/arch/parisc/include/asm/prefetch.h
diff --git a/include/asm-parisc/processor.h b/arch/parisc/include/asm/processor.h
index 3c9d34844c83..3c9d34844c83 100644
--- a/include/asm-parisc/processor.h
+++ b/arch/parisc/include/asm/processor.h
diff --git a/include/asm-parisc/psw.h b/arch/parisc/include/asm/psw.h
index 5a3e23c9ce63..5a3e23c9ce63 100644
--- a/include/asm-parisc/psw.h
+++ b/arch/parisc/include/asm/psw.h
diff --git a/include/asm-parisc/ptrace.h b/arch/parisc/include/asm/ptrace.h
index 3e94c5d85ff5..afa5333187b4 100644
--- a/include/asm-parisc/ptrace.h
+++ b/arch/parisc/include/asm/ptrace.h
@@ -47,6 +47,16 @@ struct pt_regs {
47 47
48#define task_regs(task) ((struct pt_regs *) ((char *)(task) + TASK_REGS)) 48#define task_regs(task) ((struct pt_regs *) ((char *)(task) + TASK_REGS))
49 49
50#define __ARCH_WANT_COMPAT_SYS_PTRACE
51
52struct task_struct;
53#define arch_has_single_step() 1
54void user_disable_single_step(struct task_struct *task);
55void user_enable_single_step(struct task_struct *task);
56
57#define arch_has_block_step() 1
58void user_enable_block_step(struct task_struct *task);
59
50/* XXX should we use iaoq[1] or iaoq[0] ? */ 60/* XXX should we use iaoq[1] or iaoq[0] ? */
51#define user_mode(regs) (((regs)->iaoq[0] & 3) ? 1 : 0) 61#define user_mode(regs) (((regs)->iaoq[0] & 3) ? 1 : 0)
52#define user_space(regs) (((regs)->iasq[1] != 0) ? 1 : 0) 62#define user_space(regs) (((regs)->iasq[1] != 0) ? 1 : 0)
diff --git a/include/asm-parisc/real.h b/arch/parisc/include/asm/real.h
index 82acb25db395..82acb25db395 100644
--- a/include/asm-parisc/real.h
+++ b/arch/parisc/include/asm/real.h
diff --git a/include/asm-parisc/resource.h b/arch/parisc/include/asm/resource.h
index 8b06343b62ed..8b06343b62ed 100644
--- a/include/asm-parisc/resource.h
+++ b/arch/parisc/include/asm/resource.h
diff --git a/include/asm-parisc/ropes.h b/arch/parisc/include/asm/ropes.h
index 007a880615eb..09f51d5ab57c 100644
--- a/include/asm-parisc/ropes.h
+++ b/arch/parisc/include/asm/ropes.h
@@ -1,7 +1,7 @@
1#ifndef _ASM_PARISC_ROPES_H_ 1#ifndef _ASM_PARISC_ROPES_H_
2#define _ASM_PARISC_ROPES_H_ 2#define _ASM_PARISC_ROPES_H_
3 3
4#include <asm-parisc/parisc-device.h> 4#include <asm/parisc-device.h>
5 5
6#ifdef CONFIG_64BIT 6#ifdef CONFIG_64BIT
7/* "low end" PA8800 machines use ZX1 chipset: PAT PDC and only run 64-bit */ 7/* "low end" PA8800 machines use ZX1 chipset: PAT PDC and only run 64-bit */
diff --git a/include/asm-parisc/rt_sigframe.h b/arch/parisc/include/asm/rt_sigframe.h
index f0dd3b30f6c4..f0dd3b30f6c4 100644
--- a/include/asm-parisc/rt_sigframe.h
+++ b/arch/parisc/include/asm/rt_sigframe.h
diff --git a/include/asm-parisc/rtc.h b/arch/parisc/include/asm/rtc.h
index 099d641a42c2..099d641a42c2 100644
--- a/include/asm-parisc/rtc.h
+++ b/arch/parisc/include/asm/rtc.h
diff --git a/include/asm-parisc/runway.h b/arch/parisc/include/asm/runway.h
index 5bea02da7e22..5bea02da7e22 100644
--- a/include/asm-parisc/runway.h
+++ b/arch/parisc/include/asm/runway.h
diff --git a/include/asm-parisc/scatterlist.h b/arch/parisc/include/asm/scatterlist.h
index 62269b31ebf4..62269b31ebf4 100644
--- a/include/asm-parisc/scatterlist.h
+++ b/arch/parisc/include/asm/scatterlist.h
diff --git a/include/asm-parisc/sections.h b/arch/parisc/include/asm/sections.h
index 9d13c3507ad6..9d13c3507ad6 100644
--- a/include/asm-parisc/sections.h
+++ b/arch/parisc/include/asm/sections.h
diff --git a/include/asm-parisc/segment.h b/arch/parisc/include/asm/segment.h
index 26794ddb6524..26794ddb6524 100644
--- a/include/asm-parisc/segment.h
+++ b/arch/parisc/include/asm/segment.h
diff --git a/include/asm-parisc/sembuf.h b/arch/parisc/include/asm/sembuf.h
index 1e59ffd3bd1e..1e59ffd3bd1e 100644
--- a/include/asm-parisc/sembuf.h
+++ b/arch/parisc/include/asm/sembuf.h
diff --git a/include/asm-parisc/serial.h b/arch/parisc/include/asm/serial.h
index d7e3cc60dbc3..d7e3cc60dbc3 100644
--- a/include/asm-parisc/serial.h
+++ b/arch/parisc/include/asm/serial.h
diff --git a/include/asm-parisc/setup.h b/arch/parisc/include/asm/setup.h
index 7da2e5b8747e..7da2e5b8747e 100644
--- a/include/asm-parisc/setup.h
+++ b/arch/parisc/include/asm/setup.h
diff --git a/include/asm-parisc/shmbuf.h b/arch/parisc/include/asm/shmbuf.h
index 0a3eada1863b..0a3eada1863b 100644
--- a/include/asm-parisc/shmbuf.h
+++ b/arch/parisc/include/asm/shmbuf.h
diff --git a/include/asm-parisc/shmparam.h b/arch/parisc/include/asm/shmparam.h
index 628ddc22faa8..628ddc22faa8 100644
--- a/include/asm-parisc/shmparam.h
+++ b/arch/parisc/include/asm/shmparam.h
diff --git a/include/asm-parisc/sigcontext.h b/arch/parisc/include/asm/sigcontext.h
index 27ef31bb3b6e..27ef31bb3b6e 100644
--- a/include/asm-parisc/sigcontext.h
+++ b/arch/parisc/include/asm/sigcontext.h
diff --git a/include/asm-parisc/siginfo.h b/arch/parisc/include/asm/siginfo.h
index d7034728f377..d7034728f377 100644
--- a/include/asm-parisc/siginfo.h
+++ b/arch/parisc/include/asm/siginfo.h
diff --git a/include/asm-parisc/signal.h b/arch/parisc/include/asm/signal.h
index c20356375d1d..c20356375d1d 100644
--- a/include/asm-parisc/signal.h
+++ b/arch/parisc/include/asm/signal.h
diff --git a/include/asm-parisc/smp.h b/arch/parisc/include/asm/smp.h
index 398cdbaf4e54..398cdbaf4e54 100644
--- a/include/asm-parisc/smp.h
+++ b/arch/parisc/include/asm/smp.h
diff --git a/include/asm-parisc/socket.h b/arch/parisc/include/asm/socket.h
index fba402c95ac2..fba402c95ac2 100644
--- a/include/asm-parisc/socket.h
+++ b/arch/parisc/include/asm/socket.h
diff --git a/include/asm-parisc/sockios.h b/arch/parisc/include/asm/sockios.h
index dabfbc7483f6..dabfbc7483f6 100644
--- a/include/asm-parisc/sockios.h
+++ b/arch/parisc/include/asm/sockios.h
diff --git a/include/asm-parisc/spinlock.h b/arch/parisc/include/asm/spinlock.h
index f3d2090a18dc..f3d2090a18dc 100644
--- a/include/asm-parisc/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
diff --git a/include/asm-parisc/spinlock_types.h b/arch/parisc/include/asm/spinlock_types.h
index 3f72f47cf4b2..3f72f47cf4b2 100644
--- a/include/asm-parisc/spinlock_types.h
+++ b/arch/parisc/include/asm/spinlock_types.h
diff --git a/include/asm-parisc/stat.h b/arch/parisc/include/asm/stat.h
index 9d5fbbc5c31f..9d5fbbc5c31f 100644
--- a/include/asm-parisc/stat.h
+++ b/arch/parisc/include/asm/stat.h
diff --git a/include/asm-parisc/statfs.h b/arch/parisc/include/asm/statfs.h
index 324bea905dc6..324bea905dc6 100644
--- a/include/asm-parisc/statfs.h
+++ b/arch/parisc/include/asm/statfs.h
diff --git a/include/asm-parisc/string.h b/arch/parisc/include/asm/string.h
index eda01be65e35..eda01be65e35 100644
--- a/include/asm-parisc/string.h
+++ b/arch/parisc/include/asm/string.h
diff --git a/include/asm-parisc/superio.h b/arch/parisc/include/asm/superio.h
index 6598acb4d46d..6598acb4d46d 100644
--- a/include/asm-parisc/superio.h
+++ b/arch/parisc/include/asm/superio.h
diff --git a/include/asm-parisc/system.h b/arch/parisc/include/asm/system.h
index ee80c920b464..ee80c920b464 100644
--- a/include/asm-parisc/system.h
+++ b/arch/parisc/include/asm/system.h
diff --git a/include/asm-parisc/termbits.h b/arch/parisc/include/asm/termbits.h
index d8bbc73b16b7..d8bbc73b16b7 100644
--- a/include/asm-parisc/termbits.h
+++ b/arch/parisc/include/asm/termbits.h
diff --git a/include/asm-parisc/termios.h b/arch/parisc/include/asm/termios.h
index a2a57a4548af..a2a57a4548af 100644
--- a/include/asm-parisc/termios.h
+++ b/arch/parisc/include/asm/termios.h
diff --git a/include/asm-parisc/thread_info.h b/arch/parisc/include/asm/thread_info.h
index 0407959da489..0407959da489 100644
--- a/include/asm-parisc/thread_info.h
+++ b/arch/parisc/include/asm/thread_info.h
diff --git a/include/asm-parisc/timex.h b/arch/parisc/include/asm/timex.h
index 3b68d77273d9..3b68d77273d9 100644
--- a/include/asm-parisc/timex.h
+++ b/arch/parisc/include/asm/timex.h
diff --git a/include/asm-parisc/tlb.h b/arch/parisc/include/asm/tlb.h
index 383b1db310ee..383b1db310ee 100644
--- a/include/asm-parisc/tlb.h
+++ b/arch/parisc/include/asm/tlb.h
diff --git a/include/asm-parisc/tlbflush.h b/arch/parisc/include/asm/tlbflush.h
index b72ec66db699..b72ec66db699 100644
--- a/include/asm-parisc/tlbflush.h
+++ b/arch/parisc/include/asm/tlbflush.h
diff --git a/include/asm-parisc/topology.h b/arch/parisc/include/asm/topology.h
index d8133eb0b1e7..d8133eb0b1e7 100644
--- a/include/asm-parisc/topology.h
+++ b/arch/parisc/include/asm/topology.h
diff --git a/include/asm-parisc/traps.h b/arch/parisc/include/asm/traps.h
index 1945f995f2df..1945f995f2df 100644
--- a/include/asm-parisc/traps.h
+++ b/arch/parisc/include/asm/traps.h
diff --git a/include/asm-parisc/types.h b/arch/parisc/include/asm/types.h
index 7f5a39bfb4ce..7f5a39bfb4ce 100644
--- a/include/asm-parisc/types.h
+++ b/arch/parisc/include/asm/types.h
diff --git a/include/asm-parisc/uaccess.h b/arch/parisc/include/asm/uaccess.h
index 4878b9501f24..4878b9501f24 100644
--- a/include/asm-parisc/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
diff --git a/include/asm-parisc/ucontext.h b/arch/parisc/include/asm/ucontext.h
index 6c8883e4b0bd..6c8883e4b0bd 100644
--- a/include/asm-parisc/ucontext.h
+++ b/arch/parisc/include/asm/ucontext.h
diff --git a/include/asm-parisc/unaligned.h b/arch/parisc/include/asm/unaligned.h
index dfc5d3321a54..dfc5d3321a54 100644
--- a/include/asm-parisc/unaligned.h
+++ b/arch/parisc/include/asm/unaligned.h
diff --git a/include/asm-parisc/unistd.h b/arch/parisc/include/asm/unistd.h
index a7d857f0e4f4..ef26b009dc5d 100644
--- a/include/asm-parisc/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -801,8 +801,14 @@
801#define __NR_timerfd_create (__NR_Linux + 306) 801#define __NR_timerfd_create (__NR_Linux + 306)
802#define __NR_timerfd_settime (__NR_Linux + 307) 802#define __NR_timerfd_settime (__NR_Linux + 307)
803#define __NR_timerfd_gettime (__NR_Linux + 308) 803#define __NR_timerfd_gettime (__NR_Linux + 308)
804 804#define __NR_signalfd4 (__NR_Linux + 309)
805#define __NR_Linux_syscalls (__NR_timerfd_gettime + 1) 805#define __NR_eventfd2 (__NR_Linux + 310)
806#define __NR_epoll_create1 (__NR_Linux + 311)
807#define __NR_dup3 (__NR_Linux + 312)
808#define __NR_pipe2 (__NR_Linux + 313)
809#define __NR_inotify_init1 (__NR_Linux + 314)
810
811#define __NR_Linux_syscalls (__NR_inotify_init1 + 1)
806 812
807 813
808#define __IGNORE_select /* newselect */ 814#define __IGNORE_select /* newselect */
diff --git a/include/asm-parisc/unwind.h b/arch/parisc/include/asm/unwind.h
index 2f7e6e50a158..52482e4fc20d 100644
--- a/include/asm-parisc/unwind.h
+++ b/arch/parisc/include/asm/unwind.h
@@ -74,4 +74,6 @@ void unwind_frame_init_running(struct unwind_frame_info *info, struct pt_regs *r
74int unwind_once(struct unwind_frame_info *info); 74int unwind_once(struct unwind_frame_info *info);
75int unwind_to_user(struct unwind_frame_info *info); 75int unwind_to_user(struct unwind_frame_info *info);
76 76
77int unwind_init(void);
78
77#endif 79#endif
diff --git a/include/asm-parisc/user.h b/arch/parisc/include/asm/user.h
index 80224753e508..80224753e508 100644
--- a/include/asm-parisc/user.h
+++ b/arch/parisc/include/asm/user.h
diff --git a/include/asm-parisc/vga.h b/arch/parisc/include/asm/vga.h
index 171399a88ca6..171399a88ca6 100644
--- a/include/asm-parisc/vga.h
+++ b/arch/parisc/include/asm/vga.h
diff --git a/include/asm-parisc/xor.h b/arch/parisc/include/asm/xor.h
index c82eb12a5b18..c82eb12a5b18 100644
--- a/include/asm-parisc/xor.h
+++ b/arch/parisc/include/asm/xor.h
diff --git a/arch/parisc/kernel/.gitignore b/arch/parisc/kernel/.gitignore
new file mode 100644
index 000000000000..c5f676c3c224
--- /dev/null
+++ b/arch/parisc/kernel/.gitignore
@@ -0,0 +1 @@
vmlinux.lds
diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c
index 3efc0b73e4ff..699cf8ef2118 100644
--- a/arch/parisc/kernel/asm-offsets.c
+++ b/arch/parisc/kernel/asm-offsets.c
@@ -290,5 +290,8 @@ int main(void)
290 DEFINE(EXCDATA_IP, offsetof(struct exception_data, fault_ip)); 290 DEFINE(EXCDATA_IP, offsetof(struct exception_data, fault_ip));
291 DEFINE(EXCDATA_SPACE, offsetof(struct exception_data, fault_space)); 291 DEFINE(EXCDATA_SPACE, offsetof(struct exception_data, fault_space));
292 DEFINE(EXCDATA_ADDR, offsetof(struct exception_data, fault_addr)); 292 DEFINE(EXCDATA_ADDR, offsetof(struct exception_data, fault_addr));
293 BLANK();
294 DEFINE(ASM_PDC_RESULT_SIZE, NUM_PDC_RESULT * sizeof(unsigned long));
295 BLANK();
293 return 0; 296 return 0;
294} 297}
diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c
index 7177a6cd1b7f..03f26bd75bd8 100644
--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@@ -71,8 +71,8 @@
71#include <asm/processor.h> /* for boot_cpu_data */ 71#include <asm/processor.h> /* for boot_cpu_data */
72 72
73static DEFINE_SPINLOCK(pdc_lock); 73static DEFINE_SPINLOCK(pdc_lock);
74static unsigned long pdc_result[32] __attribute__ ((aligned (8))); 74extern unsigned long pdc_result[NUM_PDC_RESULT];
75static unsigned long pdc_result2[32] __attribute__ ((aligned (8))); 75extern unsigned long pdc_result2[NUM_PDC_RESULT];
76 76
77#ifdef CONFIG_64BIT 77#ifdef CONFIG_64BIT
78#define WIDE_FIRMWARE 0x1 78#define WIDE_FIRMWARE 0x1
@@ -150,26 +150,40 @@ static void convert_to_wide(unsigned long *addr)
150#endif 150#endif
151} 151}
152 152
153#ifdef CONFIG_64BIT
154void __init set_firmware_width_unlocked(void)
155{
156 int ret;
157
158 ret = mem_pdc_call(PDC_MODEL, PDC_MODEL_CAPABILITIES,
159 __pa(pdc_result), 0);
160 convert_to_wide(pdc_result);
161 if (pdc_result[0] != NARROW_FIRMWARE)
162 parisc_narrow_firmware = 0;
163}
164
153/** 165/**
154 * set_firmware_width - Determine if the firmware is wide or narrow. 166 * set_firmware_width - Determine if the firmware is wide or narrow.
155 * 167 *
156 * This function must be called before any pdc_* function that uses the convert_to_wide 168 * This function must be called before any pdc_* function that uses the
157 * function. 169 * convert_to_wide function.
158 */ 170 */
159void __init set_firmware_width(void) 171void __init set_firmware_width(void)
160{ 172{
161#ifdef CONFIG_64BIT
162 int retval;
163 unsigned long flags; 173 unsigned long flags;
174 spin_lock_irqsave(&pdc_lock, flags);
175 set_firmware_width_unlocked();
176 spin_unlock_irqrestore(&pdc_lock, flags);
177}
178#else
179void __init set_firmware_width_unlocked(void) {
180 return;
181}
164 182
165 spin_lock_irqsave(&pdc_lock, flags); 183void __init set_firmware_width(void) {
166 retval = mem_pdc_call(PDC_MODEL, PDC_MODEL_CAPABILITIES, __pa(pdc_result), 0); 184 return;
167 convert_to_wide(pdc_result);
168 if(pdc_result[0] != NARROW_FIRMWARE)
169 parisc_narrow_firmware = 0;
170 spin_unlock_irqrestore(&pdc_lock, flags);
171#endif
172} 185}
186#endif /*CONFIG_64BIT*/
173 187
174/** 188/**
175 * pdc_emergency_unlock - Unlock the linux pdc lock 189 * pdc_emergency_unlock - Unlock the linux pdc lock
@@ -288,6 +302,20 @@ int pdc_chassis_warn(unsigned long *warn)
288 return retval; 302 return retval;
289} 303}
290 304
305int __init pdc_coproc_cfg_unlocked(struct pdc_coproc_cfg *pdc_coproc_info)
306{
307 int ret;
308
309 ret = mem_pdc_call(PDC_COPROC, PDC_COPROC_CFG, __pa(pdc_result));
310 convert_to_wide(pdc_result);
311 pdc_coproc_info->ccr_functional = pdc_result[0];
312 pdc_coproc_info->ccr_present = pdc_result[1];
313 pdc_coproc_info->revision = pdc_result[17];
314 pdc_coproc_info->model = pdc_result[18];
315
316 return ret;
317}
318
291/** 319/**
292 * pdc_coproc_cfg - To identify coprocessors attached to the processor. 320 * pdc_coproc_cfg - To identify coprocessors attached to the processor.
293 * @pdc_coproc_info: Return buffer address. 321 * @pdc_coproc_info: Return buffer address.
@@ -297,19 +325,14 @@ int pdc_chassis_warn(unsigned long *warn)
297 */ 325 */
298int __init pdc_coproc_cfg(struct pdc_coproc_cfg *pdc_coproc_info) 326int __init pdc_coproc_cfg(struct pdc_coproc_cfg *pdc_coproc_info)
299{ 327{
300 int retval; 328 int ret;
301 unsigned long flags; 329 unsigned long flags;
302 330
303 spin_lock_irqsave(&pdc_lock, flags); 331 spin_lock_irqsave(&pdc_lock, flags);
304 retval = mem_pdc_call(PDC_COPROC, PDC_COPROC_CFG, __pa(pdc_result)); 332 ret = pdc_coproc_cfg_unlocked(pdc_coproc_info);
305 convert_to_wide(pdc_result); 333 spin_unlock_irqrestore(&pdc_lock, flags);
306 pdc_coproc_info->ccr_functional = pdc_result[0];
307 pdc_coproc_info->ccr_present = pdc_result[1];
308 pdc_coproc_info->revision = pdc_result[17];
309 pdc_coproc_info->model = pdc_result[18];
310 spin_unlock_irqrestore(&pdc_lock, flags);
311 334
312 return retval; 335 return ret;
313} 336}
314 337
315/** 338/**
diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S
index a84e31e82876..0e3d9f9b9e33 100644
--- a/arch/parisc/kernel/head.S
+++ b/arch/parisc/kernel/head.S
@@ -121,7 +121,7 @@ $pgt_fill_loop:
121 copy %r0,%r2 121 copy %r0,%r2
122 122
123 /* And the RFI Target address too */ 123 /* And the RFI Target address too */
124 load32 start_kernel,%r11 124 load32 start_parisc,%r11
125 125
126 /* And the initial task pointer */ 126 /* And the initial task pointer */
127 load32 init_thread_union,%r6 127 load32 init_thread_union,%r6
diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c
index 49c637970789..90904f9dfc50 100644
--- a/arch/parisc/kernel/ptrace.c
+++ b/arch/parisc/kernel/ptrace.c
@@ -4,6 +4,7 @@
4 * Copyright (C) 2000 Hewlett-Packard Co, Linuxcare Inc. 4 * Copyright (C) 2000 Hewlett-Packard Co, Linuxcare Inc.
5 * Copyright (C) 2000 Matthew Wilcox <matthew@wil.cx> 5 * Copyright (C) 2000 Matthew Wilcox <matthew@wil.cx>
6 * Copyright (C) 2000 David Huggins-Daines <dhd@debian.org> 6 * Copyright (C) 2000 David Huggins-Daines <dhd@debian.org>
7 * Copyright (C) 2008 Helge Deller <deller@gmx.de>
7 */ 8 */
8 9
9#include <linux/kernel.h> 10#include <linux/kernel.h>
@@ -27,15 +28,149 @@
27/* PSW bits we allow the debugger to modify */ 28/* PSW bits we allow the debugger to modify */
28#define USER_PSW_BITS (PSW_N | PSW_V | PSW_CB) 29#define USER_PSW_BITS (PSW_N | PSW_V | PSW_CB)
29 30
30#undef DEBUG_PTRACE 31/*
32 * Called by kernel/ptrace.c when detaching..
33 *
34 * Make sure single step bits etc are not set.
35 */
36void ptrace_disable(struct task_struct *task)
37{
38 task->ptrace &= ~(PT_SINGLESTEP|PT_BLOCKSTEP);
31 39
32#ifdef DEBUG_PTRACE 40 /* make sure the trap bits are not set */
33#define DBG(x...) printk(x) 41 pa_psw(task)->r = 0;
34#else 42 pa_psw(task)->t = 0;
35#define DBG(x...) 43 pa_psw(task)->h = 0;
36#endif 44 pa_psw(task)->l = 0;
45}
46
47/*
48 * The following functions are called by ptrace_resume() when
49 * enabling or disabling single/block tracing.
50 */
51void user_disable_single_step(struct task_struct *task)
52{
53 ptrace_disable(task);
54}
55
56void user_enable_single_step(struct task_struct *task)
57{
58 task->ptrace &= ~PT_BLOCKSTEP;
59 task->ptrace |= PT_SINGLESTEP;
60
61 if (pa_psw(task)->n) {
62 struct siginfo si;
63
64 /* Nullified, just crank over the queue. */
65 task_regs(task)->iaoq[0] = task_regs(task)->iaoq[1];
66 task_regs(task)->iasq[0] = task_regs(task)->iasq[1];
67 task_regs(task)->iaoq[1] = task_regs(task)->iaoq[0] + 4;
68 pa_psw(task)->n = 0;
69 pa_psw(task)->x = 0;
70 pa_psw(task)->y = 0;
71 pa_psw(task)->z = 0;
72 pa_psw(task)->b = 0;
73 ptrace_disable(task);
74 /* Don't wake up the task, but let the
75 parent know something happened. */
76 si.si_code = TRAP_TRACE;
77 si.si_addr = (void __user *) (task_regs(task)->iaoq[0] & ~3);
78 si.si_signo = SIGTRAP;
79 si.si_errno = 0;
80 force_sig_info(SIGTRAP, &si, task);
81 /* notify_parent(task, SIGCHLD); */
82 return;
83 }
84
85 /* Enable recovery counter traps. The recovery counter
86 * itself will be set to zero on a task switch. If the
87 * task is suspended on a syscall then the syscall return
88 * path will overwrite the recovery counter with a suitable
89 * value such that it traps once back in user space. We
90 * disable interrupts in the tasks PSW here also, to avoid
91 * interrupts while the recovery counter is decrementing.
92 */
93 pa_psw(task)->r = 1;
94 pa_psw(task)->t = 0;
95 pa_psw(task)->h = 0;
96 pa_psw(task)->l = 0;
97}
98
99void user_enable_block_step(struct task_struct *task)
100{
101 task->ptrace &= ~PT_SINGLESTEP;
102 task->ptrace |= PT_BLOCKSTEP;
103
104 /* Enable taken branch trap. */
105 pa_psw(task)->r = 0;
106 pa_psw(task)->t = 1;
107 pa_psw(task)->h = 0;
108 pa_psw(task)->l = 0;
109}
110
111long arch_ptrace(struct task_struct *child, long request, long addr, long data)
112{
113 unsigned long tmp;
114 long ret = -EIO;
37 115
38#ifdef CONFIG_64BIT 116 switch (request) {
117
118 /* Read the word at location addr in the USER area. For ptraced
119 processes, the kernel saves all regs on a syscall. */
120 case PTRACE_PEEKUSR:
121 if ((addr & (sizeof(long)-1)) ||
122 (unsigned long) addr >= sizeof(struct pt_regs))
123 break;
124 tmp = *(unsigned long *) ((char *) task_regs(child) + addr);
125 ret = put_user(tmp, (unsigned long *) data);
126 break;
127
128 /* Write the word at location addr in the USER area. This will need
129 to change when the kernel no longer saves all regs on a syscall.
130 FIXME. There is a problem at the moment in that r3-r18 are only
131 saved if the process is ptraced on syscall entry, and even then
132 those values are overwritten by actual register values on syscall
133 exit. */
134 case PTRACE_POKEUSR:
135 /* Some register values written here may be ignored in
136 * entry.S:syscall_restore_rfi; e.g. iaoq is written with
137 * r31/r31+4, and not with the values in pt_regs.
138 */
139 if (addr == PT_PSW) {
140 /* Allow writing to Nullify, Divide-step-correction,
141 * and carry/borrow bits.
142 * BEWARE, if you set N, and then single step, it won't
143 * stop on the nullified instruction.
144 */
145 data &= USER_PSW_BITS;
146 task_regs(child)->gr[0] &= ~USER_PSW_BITS;
147 task_regs(child)->gr[0] |= data;
148 ret = 0;
149 break;
150 }
151
152 if ((addr & (sizeof(long)-1)) ||
153 (unsigned long) addr >= sizeof(struct pt_regs))
154 break;
155 if ((addr >= PT_GR1 && addr <= PT_GR31) ||
156 addr == PT_IAOQ0 || addr == PT_IAOQ1 ||
157 (addr >= PT_FR0 && addr <= PT_FR31 + 4) ||
158 addr == PT_SAR) {
159 *(unsigned long *) ((char *) task_regs(child) + addr) = data;
160 ret = 0;
161 }
162 break;
163
164 default:
165 ret = ptrace_request(child, request, addr, data);
166 break;
167 }
168
169 return ret;
170}
171
172
173#ifdef CONFIG_COMPAT
39 174
40/* This function is needed to translate 32 bit pt_regs offsets in to 175/* This function is needed to translate 32 bit pt_regs offsets in to
41 * 64 bit pt_regs offsets. For example, a 32 bit gdb under a 64 bit kernel 176 * 64 bit pt_regs offsets. For example, a 32 bit gdb under a 64 bit kernel
@@ -61,106 +196,25 @@ static long translate_usr_offset(long offset)
61 else 196 else
62 return -1; 197 return -1;
63} 198}
64#endif
65 199
66/* 200long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
67 * Called by kernel/ptrace.c when detaching.. 201 compat_ulong_t addr, compat_ulong_t data)
68 *
69 * Make sure single step bits etc are not set.
70 */
71void ptrace_disable(struct task_struct *child)
72{ 202{
73 /* make sure the trap bits are not set */ 203 compat_uint_t tmp;
74 pa_psw(child)->r = 0; 204 long ret = -EIO;
75 pa_psw(child)->t = 0;
76 pa_psw(child)->h = 0;
77 pa_psw(child)->l = 0;
78}
79
80long arch_ptrace(struct task_struct *child, long request, long addr, long data)
81{
82 long ret;
83#ifdef DEBUG_PTRACE
84 long oaddr=addr, odata=data;
85#endif
86 205
87 switch (request) { 206 switch (request) {
88 case PTRACE_PEEKTEXT: /* read word at location addr. */
89 case PTRACE_PEEKDATA: {
90#ifdef CONFIG_64BIT
91 if (__is_compat_task(child)) {
92 int copied;
93 unsigned int tmp;
94
95 addr &= 0xffffffffL;
96 copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
97 ret = -EIO;
98 if (copied != sizeof(tmp))
99 goto out_tsk;
100 ret = put_user(tmp,(unsigned int *) data);
101 DBG("sys_ptrace(PEEK%s, %d, %lx, %lx) returning %ld, data %x\n",
102 request == PTRACE_PEEKTEXT ? "TEXT" : "DATA",
103 pid, oaddr, odata, ret, tmp);
104 }
105 else
106#endif
107 ret = generic_ptrace_peekdata(child, addr, data);
108 goto out_tsk;
109 }
110 207
111 /* when I and D space are separate, this will have to be fixed. */ 208 case PTRACE_PEEKUSR:
112 case PTRACE_POKETEXT: /* write the word at location addr. */ 209 if (addr & (sizeof(compat_uint_t)-1))
113 case PTRACE_POKEDATA: 210 break;
114 ret = 0; 211 addr = translate_usr_offset(addr);
115#ifdef CONFIG_64BIT 212 if (addr < 0)
116 if (__is_compat_task(child)) { 213 break;
117 unsigned int tmp = (unsigned int)data;
118 DBG("sys_ptrace(POKE%s, %d, %lx, %lx)\n",
119 request == PTRACE_POKETEXT ? "TEXT" : "DATA",
120 pid, oaddr, odata);
121 addr &= 0xffffffffL;
122 if (access_process_vm(child, addr, &tmp, sizeof(tmp), 1) == sizeof(tmp))
123 goto out_tsk;
124 }
125 else
126#endif
127 {
128 if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data))
129 goto out_tsk;
130 }
131 ret = -EIO;
132 goto out_tsk;
133
134 /* Read the word at location addr in the USER area. For ptraced
135 processes, the kernel saves all regs on a syscall. */
136 case PTRACE_PEEKUSR: {
137 ret = -EIO;
138#ifdef CONFIG_64BIT
139 if (__is_compat_task(child)) {
140 unsigned int tmp;
141
142 if (addr & (sizeof(int)-1))
143 goto out_tsk;
144 if ((addr = translate_usr_offset(addr)) < 0)
145 goto out_tsk;
146
147 tmp = *(unsigned int *) ((char *) task_regs(child) + addr);
148 ret = put_user(tmp, (unsigned int *) data);
149 DBG("sys_ptrace(PEEKUSR, %d, %lx, %lx) returning %ld, addr %lx, data %x\n",
150 pid, oaddr, odata, ret, addr, tmp);
151 }
152 else
153#endif
154 {
155 unsigned long tmp;
156 214
157 if ((addr & (sizeof(long)-1)) || (unsigned long) addr >= sizeof(struct pt_regs)) 215 tmp = *(compat_uint_t *) ((char *) task_regs(child) + addr);
158 goto out_tsk; 216 ret = put_user(tmp, (compat_uint_t *) (unsigned long) data);
159 tmp = *(unsigned long *) ((char *) task_regs(child) + addr); 217 break;
160 ret = put_user(tmp, (unsigned long *) data);
161 }
162 goto out_tsk;
163 }
164 218
165 /* Write the word at location addr in the USER area. This will need 219 /* Write the word at location addr in the USER area. This will need
166 to change when the kernel no longer saves all regs on a syscall. 220 to change when the kernel no longer saves all regs on a syscall.
@@ -169,185 +223,46 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
169 those values are overwritten by actual register values on syscall 223 those values are overwritten by actual register values on syscall
170 exit. */ 224 exit. */
171 case PTRACE_POKEUSR: 225 case PTRACE_POKEUSR:
172 ret = -EIO;
173 /* Some register values written here may be ignored in 226 /* Some register values written here may be ignored in
174 * entry.S:syscall_restore_rfi; e.g. iaoq is written with 227 * entry.S:syscall_restore_rfi; e.g. iaoq is written with
175 * r31/r31+4, and not with the values in pt_regs. 228 * r31/r31+4, and not with the values in pt_regs.
176 */ 229 */
177 /* PT_PSW=0, so this is valid for 32 bit processes under 64
178 * bit kernels.
179 */
180 if (addr == PT_PSW) { 230 if (addr == PT_PSW) {
181 /* PT_PSW=0, so this is valid for 32 bit processes 231 /* Since PT_PSW==0, it is valid for 32 bit processes
182 * under 64 bit kernels. 232 * under 64 bit kernels as well.
183 *
184 * Allow writing to Nullify, Divide-step-correction,
185 * and carry/borrow bits.
186 * BEWARE, if you set N, and then single step, it won't
187 * stop on the nullified instruction.
188 */ 233 */
189 DBG("sys_ptrace(POKEUSR, %d, %lx, %lx)\n", 234 ret = arch_ptrace(child, request, addr, data);
190 pid, oaddr, odata); 235 } else {
191 data &= USER_PSW_BITS; 236 if (addr & (sizeof(compat_uint_t)-1))
192 task_regs(child)->gr[0] &= ~USER_PSW_BITS; 237 break;
193 task_regs(child)->gr[0] |= data; 238 addr = translate_usr_offset(addr);
194 ret = 0; 239 if (addr < 0)
195 goto out_tsk; 240 break;
196 }
197#ifdef CONFIG_64BIT
198 if (__is_compat_task(child)) {
199 if (addr & (sizeof(int)-1))
200 goto out_tsk;
201 if ((addr = translate_usr_offset(addr)) < 0)
202 goto out_tsk;
203 DBG("sys_ptrace(POKEUSR, %d, %lx, %lx) addr %lx\n",
204 pid, oaddr, odata, addr);
205 if (addr >= PT_FR0 && addr <= PT_FR31 + 4) { 241 if (addr >= PT_FR0 && addr <= PT_FR31 + 4) {
206 /* Special case, fp regs are 64 bits anyway */ 242 /* Special case, fp regs are 64 bits anyway */
207 *(unsigned int *) ((char *) task_regs(child) + addr) = data; 243 *(__u64 *) ((char *) task_regs(child) + addr) = data;
208 ret = 0; 244 ret = 0;
209 } 245 }
210 else if ((addr >= PT_GR1+4 && addr <= PT_GR31+4) || 246 else if ((addr >= PT_GR1+4 && addr <= PT_GR31+4) ||
211 addr == PT_IAOQ0+4 || addr == PT_IAOQ1+4 || 247 addr == PT_IAOQ0+4 || addr == PT_IAOQ1+4 ||
212 addr == PT_SAR+4) { 248 addr == PT_SAR+4) {
213 /* Zero the top 32 bits */ 249 /* Zero the top 32 bits */
214 *(unsigned int *) ((char *) task_regs(child) + addr - 4) = 0; 250 *(__u32 *) ((char *) task_regs(child) + addr - 4) = 0;
215 *(unsigned int *) ((char *) task_regs(child) + addr) = data; 251 *(__u32 *) ((char *) task_regs(child) + addr) = data;
216 ret = 0; 252 ret = 0;
217 } 253 }
218 goto out_tsk;
219 } 254 }
220 else 255 break;
221#endif
222 {
223 if ((addr & (sizeof(long)-1)) || (unsigned long) addr >= sizeof(struct pt_regs))
224 goto out_tsk;
225 if ((addr >= PT_GR1 && addr <= PT_GR31) ||
226 addr == PT_IAOQ0 || addr == PT_IAOQ1 ||
227 (addr >= PT_FR0 && addr <= PT_FR31 + 4) ||
228 addr == PT_SAR) {
229 *(unsigned long *) ((char *) task_regs(child) + addr) = data;
230 ret = 0;
231 }
232 goto out_tsk;
233 }
234
235 case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
236 case PTRACE_CONT:
237 ret = -EIO;
238 DBG("sys_ptrace(%s)\n",
239 request == PTRACE_SYSCALL ? "SYSCALL" : "CONT");
240 if (!valid_signal(data))
241 goto out_tsk;
242 child->ptrace &= ~(PT_SINGLESTEP|PT_BLOCKSTEP);
243 if (request == PTRACE_SYSCALL) {
244 set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
245 } else {
246 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
247 }
248 child->exit_code = data;
249 goto out_wake_notrap;
250
251 case PTRACE_KILL:
252 /*
253 * make the child exit. Best I can do is send it a
254 * sigkill. perhaps it should be put in the status
255 * that it wants to exit.
256 */
257 ret = 0;
258 DBG("sys_ptrace(KILL)\n");
259 if (child->exit_state == EXIT_ZOMBIE) /* already dead */
260 goto out_tsk;
261 child->exit_code = SIGKILL;
262 goto out_wake_notrap;
263
264 case PTRACE_SINGLEBLOCK:
265 DBG("sys_ptrace(SINGLEBLOCK)\n");
266 ret = -EIO;
267 if (!valid_signal(data))
268 goto out_tsk;
269 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
270 child->ptrace &= ~PT_SINGLESTEP;
271 child->ptrace |= PT_BLOCKSTEP;
272 child->exit_code = data;
273
274 /* Enable taken branch trap. */
275 pa_psw(child)->r = 0;
276 pa_psw(child)->t = 1;
277 pa_psw(child)->h = 0;
278 pa_psw(child)->l = 0;
279 goto out_wake;
280
281 case PTRACE_SINGLESTEP:
282 DBG("sys_ptrace(SINGLESTEP)\n");
283 ret = -EIO;
284 if (!valid_signal(data))
285 goto out_tsk;
286
287 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
288 child->ptrace &= ~PT_BLOCKSTEP;
289 child->ptrace |= PT_SINGLESTEP;
290 child->exit_code = data;
291
292 if (pa_psw(child)->n) {
293 struct siginfo si;
294
295 /* Nullified, just crank over the queue. */
296 task_regs(child)->iaoq[0] = task_regs(child)->iaoq[1];
297 task_regs(child)->iasq[0] = task_regs(child)->iasq[1];
298 task_regs(child)->iaoq[1] = task_regs(child)->iaoq[0] + 4;
299 pa_psw(child)->n = 0;
300 pa_psw(child)->x = 0;
301 pa_psw(child)->y = 0;
302 pa_psw(child)->z = 0;
303 pa_psw(child)->b = 0;
304 ptrace_disable(child);
305 /* Don't wake up the child, but let the
306 parent know something happened. */
307 si.si_code = TRAP_TRACE;
308 si.si_addr = (void __user *) (task_regs(child)->iaoq[0] & ~3);
309 si.si_signo = SIGTRAP;
310 si.si_errno = 0;
311 force_sig_info(SIGTRAP, &si, child);
312 //notify_parent(child, SIGCHLD);
313 //ret = 0;
314 goto out_wake;
315 }
316
317 /* Enable recovery counter traps. The recovery counter
318 * itself will be set to zero on a task switch. If the
319 * task is suspended on a syscall then the syscall return
320 * path will overwrite the recovery counter with a suitable
321 * value such that it traps once back in user space. We
322 * disable interrupts in the childs PSW here also, to avoid
323 * interrupts while the recovery counter is decrementing.
324 */
325 pa_psw(child)->r = 1;
326 pa_psw(child)->t = 0;
327 pa_psw(child)->h = 0;
328 pa_psw(child)->l = 0;
329 /* give it a chance to run. */
330 goto out_wake;
331
332 case PTRACE_GETEVENTMSG:
333 ret = put_user(child->ptrace_message, (unsigned int __user *) data);
334 goto out_tsk;
335 256
336 default: 257 default:
337 ret = ptrace_request(child, request, addr, data); 258 ret = compat_ptrace_request(child, request, addr, data);
338 goto out_tsk; 259 break;
339 } 260 }
340 261
341out_wake_notrap:
342 ptrace_disable(child);
343out_wake:
344 wake_up_process(child);
345 ret = 0;
346out_tsk:
347 DBG("arch_ptrace(%ld, %d, %lx, %lx) returning %ld\n",
348 request, pid, oaddr, odata, ret);
349 return ret; 262 return ret;
350} 263}
264#endif
265
351 266
352void syscall_trace(void) 267void syscall_trace(void)
353{ 268{
diff --git a/arch/parisc/kernel/real2.S b/arch/parisc/kernel/real2.S
index 7a92695d95a6..5f3d3a1f9037 100644
--- a/arch/parisc/kernel/real2.S
+++ b/arch/parisc/kernel/real2.S
@@ -8,12 +8,24 @@
8 * 8 *
9 */ 9 */
10 10
11#include <asm/pdc.h>
11#include <asm/psw.h> 12#include <asm/psw.h>
12#include <asm/assembly.h> 13#include <asm/assembly.h>
14#include <asm/asm-offsets.h>
13 15
14#include <linux/linkage.h> 16#include <linux/linkage.h>
15 17
18
16 .section .bss 19 .section .bss
20
21 .export pdc_result
22 .export pdc_result2
23 .align 8
24pdc_result:
25 .block ASM_PDC_RESULT_SIZE
26pdc_result2:
27 .block ASM_PDC_RESULT_SIZE
28
17 .export real_stack 29 .export real_stack
18 .export real32_stack 30 .export real32_stack
19 .export real64_stack 31 .export real64_stack
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index 39e7c5a5946a..7d27853ff8c8 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -44,6 +44,7 @@
44#include <asm/pdc_chassis.h> 44#include <asm/pdc_chassis.h>
45#include <asm/io.h> 45#include <asm/io.h>
46#include <asm/setup.h> 46#include <asm/setup.h>
47#include <asm/unwind.h>
47 48
48static char __initdata command_line[COMMAND_LINE_SIZE]; 49static char __initdata command_line[COMMAND_LINE_SIZE];
49 50
@@ -123,6 +124,7 @@ void __init setup_arch(char **cmdline_p)
123#ifdef CONFIG_64BIT 124#ifdef CONFIG_64BIT
124 extern int parisc_narrow_firmware; 125 extern int parisc_narrow_firmware;
125#endif 126#endif
127 unwind_init();
126 128
127 init_per_cpu(smp_processor_id()); /* Set Modes & Enable FP */ 129 init_per_cpu(smp_processor_id()); /* Set Modes & Enable FP */
128 130
@@ -368,6 +370,31 @@ static int __init parisc_init(void)
368 370
369 return 0; 371 return 0;
370} 372}
371
372arch_initcall(parisc_init); 373arch_initcall(parisc_init);
373 374
375void start_parisc(void)
376{
377 extern void start_kernel(void);
378
379 int ret, cpunum;
380 struct pdc_coproc_cfg coproc_cfg;
381
382 cpunum = smp_processor_id();
383
384 set_firmware_width_unlocked();
385
386 ret = pdc_coproc_cfg_unlocked(&coproc_cfg);
387 if (ret >= 0 && coproc_cfg.ccr_functional) {
388 mtctl(coproc_cfg.ccr_functional, 10);
389
390 cpu_data[cpunum].fp_rev = coproc_cfg.revision;
391 cpu_data[cpunum].fp_model = coproc_cfg.model;
392
393 asm volatile ("fstd %fr0,8(%sp)");
394 } else {
395 panic("must have an fpu to boot linux");
396 }
397
398 start_kernel();
399 // not reached
400}
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index c7e59f548817..303d2b647e41 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -87,7 +87,7 @@
87 ENTRY_SAME(setuid) 87 ENTRY_SAME(setuid)
88 ENTRY_SAME(getuid) 88 ENTRY_SAME(getuid)
89 ENTRY_COMP(stime) /* 25 */ 89 ENTRY_COMP(stime) /* 25 */
90 ENTRY_SAME(ptrace) 90 ENTRY_COMP(ptrace)
91 ENTRY_SAME(alarm) 91 ENTRY_SAME(alarm)
92 /* see stat comment */ 92 /* see stat comment */
93 ENTRY_COMP(newfstat) 93 ENTRY_COMP(newfstat)
@@ -407,6 +407,12 @@
407 ENTRY_SAME(timerfd_create) 407 ENTRY_SAME(timerfd_create)
408 ENTRY_COMP(timerfd_settime) 408 ENTRY_COMP(timerfd_settime)
409 ENTRY_COMP(timerfd_gettime) 409 ENTRY_COMP(timerfd_gettime)
410 ENTRY_COMP(signalfd4)
411 ENTRY_SAME(eventfd2) /* 310 */
412 ENTRY_SAME(epoll_create1)
413 ENTRY_SAME(dup3)
414 ENTRY_SAME(pipe2)
415 ENTRY_SAME(inotify_init1)
410 416
411 /* Nothing yet */ 417 /* Nothing yet */
412 418
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index 24be86bba94d..4d09203bc693 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -23,6 +23,7 @@
23#include <linux/smp.h> 23#include <linux/smp.h>
24#include <linux/profile.h> 24#include <linux/profile.h>
25#include <linux/clocksource.h> 25#include <linux/clocksource.h>
26#include <linux/platform_device.h>
26 27
27#include <asm/uaccess.h> 28#include <asm/uaccess.h>
28#include <asm/io.h> 29#include <asm/io.h>
@@ -215,6 +216,24 @@ void __init start_cpu_itimer(void)
215 cpu_data[cpu].it_value = next_tick; 216 cpu_data[cpu].it_value = next_tick;
216} 217}
217 218
219struct platform_device rtc_parisc_dev = {
220 .name = "rtc-parisc",
221 .id = -1,
222};
223
224static int __init rtc_init(void)
225{
226 int ret;
227
228 ret = platform_device_register(&rtc_parisc_dev);
229 if (ret < 0)
230 printk(KERN_ERR "unable to register rtc device...\n");
231
232 /* not necessarily an error */
233 return 0;
234}
235module_init(rtc_init);
236
218void __init time_init(void) 237void __init time_init(void)
219{ 238{
220 static struct pdc_tod tod_data; 239 static struct pdc_tod tod_data;
@@ -245,4 +264,3 @@ void __init time_init(void)
245 xtime.tv_nsec = 0; 264 xtime.tv_nsec = 0;
246 } 265 }
247} 266}
248
diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
index 701b2d2d8882..6773c582e457 100644
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -170,7 +170,7 @@ void unwind_table_remove(struct unwind_table *table)
170} 170}
171 171
172/* Called from setup_arch to import the kernel unwind info */ 172/* Called from setup_arch to import the kernel unwind info */
173static int unwind_init(void) 173int unwind_init(void)
174{ 174{
175 long start, stop; 175 long start, stop;
176 register unsigned long gp __asm__ ("r27"); 176 register unsigned long gp __asm__ ("r27");
@@ -417,5 +417,3 @@ int unwind_to_user(struct unwind_frame_info *info)
417 417
418 return ret; 418 return ret;
419} 419}
420
421module_init(unwind_init);
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 64e144505f65..5ac51e6efc1d 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -10,9 +10,13 @@
10 * 2 of the License, or (at your option) any later version. 10 * 2 of the License, or (at your option) any later version.
11 */ 11 */
12 12
13#ifndef __ASSEMBLY__
14#include <linux/types.h>
15#else
16#include <asm/types.h>
17#endif
13#include <asm/asm-compat.h> 18#include <asm/asm-compat.h>
14#include <asm/kdump.h> 19#include <asm/kdump.h>
15#include <asm/types.h>
16 20
17/* 21/*
18 * On PPC32 page size is 4K. For PPC64 we support either 4K or 64K software 22 * On PPC32 page size is 4K. For PPC64 we support either 4K or 64K software
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index ae2ea803a0f2..9047af7baa69 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -74,6 +74,13 @@ struct pci_controller {
74 unsigned long pci_io_size; 74 unsigned long pci_io_size;
75#endif 75#endif
76 76
77 /* Some machines have a special region to forward the ISA
78 * "memory" cycles such as VGA memory regions. Left to 0
79 * if unsupported
80 */
81 resource_size_t isa_mem_phys;
82 resource_size_t isa_mem_size;
83
77 struct pci_ops *ops; 84 struct pci_ops *ops;
78 unsigned int __iomem *cfg_addr; 85 unsigned int __iomem *cfg_addr;
79 void __iomem *cfg_data; 86 void __iomem *cfg_data;
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 0e52c7828ea4..39d547fde956 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -123,6 +123,16 @@ int pci_mmap_page_range(struct pci_dev *pdev, struct vm_area_struct *vma,
123/* Tell drivers/pci/proc.c that we have pci_mmap_page_range() */ 123/* Tell drivers/pci/proc.c that we have pci_mmap_page_range() */
124#define HAVE_PCI_MMAP 1 124#define HAVE_PCI_MMAP 1
125 125
126extern int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val,
127 size_t count);
128extern int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val,
129 size_t count);
130extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
131 struct vm_area_struct *vma,
132 enum pci_mmap_state mmap_state);
133
134#define HAVE_PCI_LEGACY 1
135
126#if defined(CONFIG_PPC64) || defined(CONFIG_NOT_COHERENT_CACHE) 136#if defined(CONFIG_PPC64) || defined(CONFIG_NOT_COHERENT_CACHE)
127/* 137/*
128 * For 64-bit kernels, pci_unmap_{single,page} is not a nop. 138 * For 64-bit kernels, pci_unmap_{single,page} is not a nop.
@@ -226,5 +236,6 @@ extern void pci_resource_to_user(const struct pci_dev *dev, int bar,
226extern void pcibios_do_bus_setup(struct pci_bus *bus); 236extern void pcibios_do_bus_setup(struct pci_bus *bus);
227extern void pcibios_fixup_of_probed_bus(struct pci_bus *bus); 237extern void pcibios_fixup_of_probed_bus(struct pci_bus *bus);
228 238
239
229#endif /* __KERNEL__ */ 240#endif /* __KERNEL__ */
230#endif /* __ASM_POWERPC_PCI_H */ 241#endif /* __ASM_POWERPC_PCI_H */
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 734e0754fb9b..280a90cc9894 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -129,7 +129,7 @@ extern int ptrace_put_reg(struct task_struct *task, int regno,
129#define CHECK_FULL_REGS(regs) \ 129#define CHECK_FULL_REGS(regs) \
130do { \ 130do { \
131 if ((regs)->trap & 1) \ 131 if ((regs)->trap & 1) \
132 printk(KERN_CRIT "%s: partial register set\n", __FUNCTION__); \ 132 printk(KERN_CRIT "%s: partial register set\n", __func__); \
133} while (0) 133} while (0)
134#endif /* __powerpc64__ */ 134#endif /* __powerpc64__ */
135 135
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 01ce8c38bae6..3815d84a1ef4 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -451,7 +451,8 @@ pgprot_t pci_phys_mem_access_prot(struct file *file,
451 pci_dev_put(pdev); 451 pci_dev_put(pdev);
452 } 452 }
453 453
454 DBG("non-PCI map for %lx, prot: %lx\n", offset, prot); 454 DBG("non-PCI map for %llx, prot: %lx\n",
455 (unsigned long long)offset, prot);
455 456
456 return __pgprot(prot); 457 return __pgprot(prot);
457} 458}
@@ -490,6 +491,131 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
490 return ret; 491 return ret;
491} 492}
492 493
494/* This provides legacy IO read access on a bus */
495int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size)
496{
497 unsigned long offset;
498 struct pci_controller *hose = pci_bus_to_host(bus);
499 struct resource *rp = &hose->io_resource;
500 void __iomem *addr;
501
502 /* Check if port can be supported by that bus. We only check
503 * the ranges of the PHB though, not the bus itself as the rules
504 * for forwarding legacy cycles down bridges are not our problem
505 * here. So if the host bridge supports it, we do it.
506 */
507 offset = (unsigned long)hose->io_base_virt - _IO_BASE;
508 offset += port;
509
510 if (!(rp->flags & IORESOURCE_IO))
511 return -ENXIO;
512 if (offset < rp->start || (offset + size) > rp->end)
513 return -ENXIO;
514 addr = hose->io_base_virt + port;
515
516 switch(size) {
517 case 1:
518 *((u8 *)val) = in_8(addr);
519 return 1;
520 case 2:
521 if (port & 1)
522 return -EINVAL;
523 *((u16 *)val) = in_le16(addr);
524 return 2;
525 case 4:
526 if (port & 3)
527 return -EINVAL;
528 *((u32 *)val) = in_le32(addr);
529 return 4;
530 }
531 return -EINVAL;
532}
533
534/* This provides legacy IO write access on a bus */
535int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, size_t size)
536{
537 unsigned long offset;
538 struct pci_controller *hose = pci_bus_to_host(bus);
539 struct resource *rp = &hose->io_resource;
540 void __iomem *addr;
541
542 /* Check if port can be supported by that bus. We only check
543 * the ranges of the PHB though, not the bus itself as the rules
544 * for forwarding legacy cycles down bridges are not our problem
545 * here. So if the host bridge supports it, we do it.
546 */
547 offset = (unsigned long)hose->io_base_virt - _IO_BASE;
548 offset += port;
549
550 if (!(rp->flags & IORESOURCE_IO))
551 return -ENXIO;
552 if (offset < rp->start || (offset + size) > rp->end)
553 return -ENXIO;
554 addr = hose->io_base_virt + port;
555
556 /* WARNING: The generic code is idiotic. It gets passed a pointer
557 * to what can be a 1, 2 or 4 byte quantity and always reads that
558 * as a u32, which means that we have to correct the location of
559 * the data read within those 32 bits for size 1 and 2
560 */
561 switch(size) {
562 case 1:
563 out_8(addr, val >> 24);
564 return 1;
565 case 2:
566 if (port & 1)
567 return -EINVAL;
568 out_le16(addr, val >> 16);
569 return 2;
570 case 4:
571 if (port & 3)
572 return -EINVAL;
573 out_le32(addr, val);
574 return 4;
575 }
576 return -EINVAL;
577}
578
579/* This provides legacy IO or memory mmap access on a bus */
580int pci_mmap_legacy_page_range(struct pci_bus *bus,
581 struct vm_area_struct *vma,
582 enum pci_mmap_state mmap_state)
583{
584 struct pci_controller *hose = pci_bus_to_host(bus);
585 resource_size_t offset =
586 ((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT;
587 resource_size_t size = vma->vm_end - vma->vm_start;
588 struct resource *rp;
589
590 pr_debug("pci_mmap_legacy_page_range(%04x:%02x, %s @%llx..%llx)\n",
591 pci_domain_nr(bus), bus->number,
592 mmap_state == pci_mmap_mem ? "MEM" : "IO",
593 (unsigned long long)offset,
594 (unsigned long long)(offset + size - 1));
595
596 if (mmap_state == pci_mmap_mem) {
597 if ((offset + size) > hose->isa_mem_size)
598 return -ENXIO;
599 offset += hose->isa_mem_phys;
600 } else {
601 unsigned long io_offset = (unsigned long)hose->io_base_virt - _IO_BASE;
602 unsigned long roffset = offset + io_offset;
603 rp = &hose->io_resource;
604 if (!(rp->flags & IORESOURCE_IO))
605 return -ENXIO;
606 if (roffset < rp->start || (roffset + size) > rp->end)
607 return -ENXIO;
608 offset += hose->io_base_phys;
609 }
610 pr_debug(" -> mapping phys %llx\n", (unsigned long long)offset);
611
612 vma->vm_pgoff = offset >> PAGE_SHIFT;
613 vma->vm_page_prot |= _PAGE_NO_CACHE | _PAGE_GUARDED;
614 return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
615 vma->vm_end - vma->vm_start,
616 vma->vm_page_prot);
617}
618
493void pci_resource_to_user(const struct pci_dev *dev, int bar, 619void pci_resource_to_user(const struct pci_dev *dev, int bar,
494 const struct resource *rsrc, 620 const struct resource *rsrc,
495 resource_size_t *start, resource_size_t *end) 621 resource_size_t *start, resource_size_t *end)
@@ -592,6 +718,12 @@ void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
592 cpu_addr = of_translate_address(dev, ranges + 3); 718 cpu_addr = of_translate_address(dev, ranges + 3);
593 size = of_read_number(ranges + pna + 3, 2); 719 size = of_read_number(ranges + pna + 3, 2);
594 ranges += np; 720 ranges += np;
721
722 /* If we failed translation or got a zero-sized region
723 * (some FW try to feed us with non sensical zero sized regions
724 * such as power3 which look like some kind of attempt at exposing
725 * the VGA memory hole)
726 */
595 if (cpu_addr == OF_BAD_ADDR || size == 0) 727 if (cpu_addr == OF_BAD_ADDR || size == 0)
596 continue; 728 continue;
597 729
@@ -665,6 +797,8 @@ void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
665 isa_hole = memno; 797 isa_hole = memno;
666 if (primary || isa_mem_base == 0) 798 if (primary || isa_mem_base == 0)
667 isa_mem_base = cpu_addr; 799 isa_mem_base = cpu_addr;
800 hose->isa_mem_phys = cpu_addr;
801 hose->isa_mem_size = size;
668 } 802 }
669 803
670 /* We get the PCI/Mem offset from the first range or 804 /* We get the PCI/Mem offset from the first range or
diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.c b/arch/powerpc/platforms/cell/spufs/sputrace.c
index 92d20e993ede..2ece399f2862 100644
--- a/arch/powerpc/platforms/cell/spufs/sputrace.c
+++ b/arch/powerpc/platforms/cell/spufs/sputrace.c
@@ -232,6 +232,7 @@ static void __exit sputrace_exit(void)
232 232
233 remove_proc_entry("sputrace", NULL); 233 remove_proc_entry("sputrace", NULL);
234 kfree(sputrace_log); 234 kfree(sputrace_log);
235 marker_synchronize_unregister();
235} 236}
236 237
237module_init(sputrace_init); 238module_init(sputrace_init);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 49349ba77d80..5b9b12321ad1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -26,6 +26,7 @@ config X86
26 select HAVE_KPROBES 26 select HAVE_KPROBES
27 select ARCH_WANT_OPTIONAL_GPIOLIB 27 select ARCH_WANT_OPTIONAL_GPIOLIB
28 select HAVE_KRETPROBES 28 select HAVE_KRETPROBES
29 select HAVE_FTRACE_MCOUNT_RECORD
29 select HAVE_DYNAMIC_FTRACE 30 select HAVE_DYNAMIC_FTRACE
30 select HAVE_FTRACE 31 select HAVE_FTRACE
31 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) 32 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
@@ -1242,14 +1243,6 @@ config EFI
1242 resultant kernel should continue to boot on existing non-EFI 1243 resultant kernel should continue to boot on existing non-EFI
1243 platforms. 1244 platforms.
1244 1245
1245config IRQBALANCE
1246 def_bool y
1247 prompt "Enable kernel irq balancing"
1248 depends on X86_32 && SMP && X86_IO_APIC
1249 help
1250 The default yes will allow the kernel to do irq load balancing.
1251 Saying no will keep the kernel from doing irq load balancing.
1252
1253config SECCOMP 1246config SECCOMP
1254 def_bool y 1247 def_bool y
1255 prompt "Enable seccomp to safely compute untrusted bytecode" 1248 prompt "Enable seccomp to safely compute untrusted bytecode"
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 52d0359719d7..13b8c86ae985 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -287,7 +287,6 @@ CONFIG_MTRR=y
287# CONFIG_MTRR_SANITIZER is not set 287# CONFIG_MTRR_SANITIZER is not set
288CONFIG_X86_PAT=y 288CONFIG_X86_PAT=y
289CONFIG_EFI=y 289CONFIG_EFI=y
290# CONFIG_IRQBALANCE is not set
291CONFIG_SECCOMP=y 290CONFIG_SECCOMP=y
292# CONFIG_HZ_100 is not set 291# CONFIG_HZ_100 is not set
293# CONFIG_HZ_250 is not set 292# CONFIG_HZ_250 is not set
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0d41f0343dc0..d7e5a58ee22f 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -23,7 +23,7 @@ CFLAGS_hpet.o := $(nostackp)
23CFLAGS_tsc.o := $(nostackp) 23CFLAGS_tsc.o := $(nostackp)
24 24
25obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o 25obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
26obj-y += traps.o irq_$(BITS).o dumpstack_$(BITS).o 26obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
27obj-y += time_$(BITS).o ioport.o ldt.o 27obj-y += time_$(BITS).o ioport.o ldt.o
28obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o 28obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
29obj-$(CONFIG_X86_VISWS) += visws_quirks.o 29obj-$(CONFIG_X86_VISWS) += visws_quirks.o
@@ -60,8 +60,8 @@ obj-$(CONFIG_X86_32_SMP) += smpcommon.o
60obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o 60obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o
61obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o 61obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o
62obj-$(CONFIG_X86_MPPARSE) += mpparse.o 62obj-$(CONFIG_X86_MPPARSE) += mpparse.o
63obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi.o 63obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
64obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o 64obj-$(CONFIG_X86_IO_APIC) += io_apic.o
65obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o 65obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
66obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o 66obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
67obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 67obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
@@ -108,7 +108,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o
108# 64 bit specific files 108# 64 bit specific files
109ifeq ($(CONFIG_X86_64),y) 109ifeq ($(CONFIG_X86_64),y)
110 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o 110 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
111 obj-y += bios_uv.o 111 obj-y += bios_uv.o uv_irq.o uv_sysfs.o
112 obj-y += genx2apic_cluster.o 112 obj-y += genx2apic_cluster.o
113 obj-y += genx2apic_phys.o 113 obj-y += genx2apic_phys.o
114 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o 114 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index eb875cdc7367..0d1c26a583c5 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1256,7 +1256,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
1256 1256
1257 count = 1257 count =
1258 acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, 1258 acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr,
1259 NR_IRQ_VECTORS); 1259 nr_irqs);
1260 if (count < 0) { 1260 if (count < 0) {
1261 printk(KERN_ERR PREFIX 1261 printk(KERN_ERR PREFIX
1262 "Error parsing interrupt source overrides entry\n"); 1262 "Error parsing interrupt source overrides entry\n");
@@ -1276,7 +1276,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
1276 1276
1277 count = 1277 count =
1278 acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src, 1278 acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src,
1279 NR_IRQ_VECTORS); 1279 nr_irqs);
1280 if (count < 0) { 1280 if (count < 0) {
1281 printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); 1281 printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
1282 /* TBD: Cleanup to allow fallback to MPS */ 1282 /* TBD: Cleanup to allow fallback to MPS */
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 426e5d91b63a..c44cd6dbfa14 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -10,6 +10,7 @@
10#include <linux/dmi.h> 10#include <linux/dmi.h>
11#include <linux/cpumask.h> 11#include <linux/cpumask.h>
12#include <asm/segment.h> 12#include <asm/segment.h>
13#include <asm/desc.h>
13 14
14#include "realmode/wakeup.h" 15#include "realmode/wakeup.h"
15#include "sleep.h" 16#include "sleep.h"
@@ -98,6 +99,8 @@ int acpi_save_state_mem(void)
98 header->trampoline_segment = setup_trampoline() >> 4; 99 header->trampoline_segment = setup_trampoline() >> 4;
99#ifdef CONFIG_SMP 100#ifdef CONFIG_SMP
100 stack_start.sp = temp_stack + 4096; 101 stack_start.sp = temp_stack + 4096;
102 early_gdt_descr.address =
103 (unsigned long)get_cpu_gdt_table(smp_processor_id());
101#endif 104#endif
102 initial_code = (unsigned long)wakeup_long64; 105 initial_code = (unsigned long)wakeup_long64;
103 saved_magic = 0x123456789abcdef0; 106 saved_magic = 0x123456789abcdef0;
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic.c
index 21c831d96af3..04a7f960bbc0 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic.c
@@ -23,11 +23,13 @@
23#include <linux/mc146818rtc.h> 23#include <linux/mc146818rtc.h>
24#include <linux/kernel_stat.h> 24#include <linux/kernel_stat.h>
25#include <linux/sysdev.h> 25#include <linux/sysdev.h>
26#include <linux/ioport.h>
26#include <linux/cpu.h> 27#include <linux/cpu.h>
27#include <linux/clockchips.h> 28#include <linux/clockchips.h>
28#include <linux/acpi_pmtmr.h> 29#include <linux/acpi_pmtmr.h>
29#include <linux/module.h> 30#include <linux/module.h>
30#include <linux/dmi.h> 31#include <linux/dmi.h>
32#include <linux/dmar.h>
31 33
32#include <asm/atomic.h> 34#include <asm/atomic.h>
33#include <asm/smp.h> 35#include <asm/smp.h>
@@ -36,8 +38,14 @@
36#include <asm/desc.h> 38#include <asm/desc.h>
37#include <asm/arch_hooks.h> 39#include <asm/arch_hooks.h>
38#include <asm/hpet.h> 40#include <asm/hpet.h>
41#include <asm/pgalloc.h>
39#include <asm/i8253.h> 42#include <asm/i8253.h>
40#include <asm/nmi.h> 43#include <asm/nmi.h>
44#include <asm/idle.h>
45#include <asm/proto.h>
46#include <asm/timex.h>
47#include <asm/apic.h>
48#include <asm/i8259.h>
41 49
42#include <mach_apic.h> 50#include <mach_apic.h>
43#include <mach_apicdef.h> 51#include <mach_apicdef.h>
@@ -50,16 +58,58 @@
50# error SPURIOUS_APIC_VECTOR definition error 58# error SPURIOUS_APIC_VECTOR definition error
51#endif 59#endif
52 60
53unsigned long mp_lapic_addr; 61#ifdef CONFIG_X86_32
54
55/* 62/*
56 * Knob to control our willingness to enable the local APIC. 63 * Knob to control our willingness to enable the local APIC.
57 * 64 *
58 * +1=force-enable 65 * +1=force-enable
59 */ 66 */
60static int force_enable_local_apic; 67static int force_enable_local_apic;
61int disable_apic; 68/*
69 * APIC command line parameters
70 */
71static int __init parse_lapic(char *arg)
72{
73 force_enable_local_apic = 1;
74 return 0;
75}
76early_param("lapic", parse_lapic);
77/* Local APIC was disabled by the BIOS and enabled by the kernel */
78static int enabled_via_apicbase;
79
80#endif
81
82#ifdef CONFIG_X86_64
83static int apic_calibrate_pmtmr __initdata;
84static __init int setup_apicpmtimer(char *s)
85{
86 apic_calibrate_pmtmr = 1;
87 notsc_setup(NULL);
88 return 0;
89}
90__setup("apicpmtimer", setup_apicpmtimer);
91#endif
92
93#ifdef CONFIG_X86_64
94#define HAVE_X2APIC
95#endif
96
97#ifdef HAVE_X2APIC
98int x2apic;
99/* x2apic enabled before OS handover */
100int x2apic_preenabled;
101int disable_x2apic;
102static __init int setup_nox2apic(char *str)
103{
104 disable_x2apic = 1;
105 setup_clear_cpu_cap(X86_FEATURE_X2APIC);
106 return 0;
107}
108early_param("nox2apic", setup_nox2apic);
109#endif
62 110
111unsigned long mp_lapic_addr;
112int disable_apic;
63/* Disable local APIC timer from the kernel commandline or via dmi quirk */ 113/* Disable local APIC timer from the kernel commandline or via dmi quirk */
64static int disable_apic_timer __cpuinitdata; 114static int disable_apic_timer __cpuinitdata;
65/* Local APIC timer works in C2 */ 115/* Local APIC timer works in C2 */
@@ -110,9 +160,6 @@ static struct clock_event_device lapic_clockevent = {
110}; 160};
111static DEFINE_PER_CPU(struct clock_event_device, lapic_events); 161static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
112 162
113/* Local APIC was disabled by the BIOS and enabled by the kernel */
114static int enabled_via_apicbase;
115
116static unsigned long apic_phys; 163static unsigned long apic_phys;
117 164
118/* 165/*
@@ -202,6 +249,42 @@ static struct apic_ops xapic_ops = {
202struct apic_ops __read_mostly *apic_ops = &xapic_ops; 249struct apic_ops __read_mostly *apic_ops = &xapic_ops;
203EXPORT_SYMBOL_GPL(apic_ops); 250EXPORT_SYMBOL_GPL(apic_ops);
204 251
252#ifdef HAVE_X2APIC
253static void x2apic_wait_icr_idle(void)
254{
255 /* no need to wait for icr idle in x2apic */
256 return;
257}
258
259static u32 safe_x2apic_wait_icr_idle(void)
260{
261 /* no need to wait for icr idle in x2apic */
262 return 0;
263}
264
265void x2apic_icr_write(u32 low, u32 id)
266{
267 wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
268}
269
270u64 x2apic_icr_read(void)
271{
272 unsigned long val;
273
274 rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
275 return val;
276}
277
278static struct apic_ops x2apic_ops = {
279 .read = native_apic_msr_read,
280 .write = native_apic_msr_write,
281 .icr_read = x2apic_icr_read,
282 .icr_write = x2apic_icr_write,
283 .wait_icr_idle = x2apic_wait_icr_idle,
284 .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
285};
286#endif
287
205/** 288/**
206 * enable_NMI_through_LVT0 - enable NMI through local vector table 0 289 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
207 */ 290 */
@@ -219,6 +302,7 @@ void __cpuinit enable_NMI_through_LVT0(void)
219 apic_write(APIC_LVT0, v); 302 apic_write(APIC_LVT0, v);
220} 303}
221 304
305#ifdef CONFIG_X86_32
222/** 306/**
223 * get_physical_broadcast - Get number of physical broadcast IDs 307 * get_physical_broadcast - Get number of physical broadcast IDs
224 */ 308 */
@@ -226,6 +310,7 @@ int get_physical_broadcast(void)
226{ 310{
227 return modern_apic() ? 0xff : 0xf; 311 return modern_apic() ? 0xff : 0xf;
228} 312}
313#endif
229 314
230/** 315/**
231 * lapic_get_maxlvt - get the maximum number of local vector table entries 316 * lapic_get_maxlvt - get the maximum number of local vector table entries
@@ -247,11 +332,7 @@ int lapic_get_maxlvt(void)
247 */ 332 */
248 333
249/* Clock divisor */ 334/* Clock divisor */
250#ifdef CONFG_X86_64
251#define APIC_DIVISOR 1
252#else
253#define APIC_DIVISOR 16 335#define APIC_DIVISOR 16
254#endif
255 336
256/* 337/*
257 * This function sets up the local APIC timer, with a timeout of 338 * This function sets up the local APIC timer, with a timeout of
@@ -383,7 +464,7 @@ static void lapic_timer_broadcast(cpumask_t mask)
383 * Setup the local APIC timer for this CPU. Copy the initilized values 464 * Setup the local APIC timer for this CPU. Copy the initilized values
384 * of the boot CPU and register the clock event in the framework. 465 * of the boot CPU and register the clock event in the framework.
385 */ 466 */
386static void __devinit setup_APIC_timer(void) 467static void __cpuinit setup_APIC_timer(void)
387{ 468{
388 struct clock_event_device *levt = &__get_cpu_var(lapic_events); 469 struct clock_event_device *levt = &__get_cpu_var(lapic_events);
389 470
@@ -453,14 +534,51 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)
453 } 534 }
454} 535}
455 536
537static int __init calibrate_by_pmtimer(long deltapm, long *delta)
538{
539 const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
540 const long pm_thresh = pm_100ms / 100;
541 unsigned long mult;
542 u64 res;
543
544#ifndef CONFIG_X86_PM_TIMER
545 return -1;
546#endif
547
548 apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
549
550 /* Check, if the PM timer is available */
551 if (!deltapm)
552 return -1;
553
554 mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
555
556 if (deltapm > (pm_100ms - pm_thresh) &&
557 deltapm < (pm_100ms + pm_thresh)) {
558 apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
559 } else {
560 res = (((u64)deltapm) * mult) >> 22;
561 do_div(res, 1000000);
562 printk(KERN_WARNING "APIC calibration not consistent "
563 "with PM Timer: %ldms instead of 100ms\n",
564 (long)res);
565 /* Correct the lapic counter value */
566 res = (((u64)(*delta)) * pm_100ms);
567 do_div(res, deltapm);
568 printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
569 "%lu (%ld)\n", (unsigned long)res, *delta);
570 *delta = (long)res;
571 }
572
573 return 0;
574}
575
456static int __init calibrate_APIC_clock(void) 576static int __init calibrate_APIC_clock(void)
457{ 577{
458 struct clock_event_device *levt = &__get_cpu_var(lapic_events); 578 struct clock_event_device *levt = &__get_cpu_var(lapic_events);
459 const long pm_100ms = PMTMR_TICKS_PER_SEC/10;
460 const long pm_thresh = pm_100ms/100;
461 void (*real_handler)(struct clock_event_device *dev); 579 void (*real_handler)(struct clock_event_device *dev);
462 unsigned long deltaj; 580 unsigned long deltaj;
463 long delta, deltapm; 581 long delta;
464 int pm_referenced = 0; 582 int pm_referenced = 0;
465 583
466 local_irq_disable(); 584 local_irq_disable();
@@ -470,10 +588,10 @@ static int __init calibrate_APIC_clock(void)
470 global_clock_event->event_handler = lapic_cal_handler; 588 global_clock_event->event_handler = lapic_cal_handler;
471 589
472 /* 590 /*
473 * Setup the APIC counter to 1e9. There is no way the lapic 591 * Setup the APIC counter to maximum. There is no way the lapic
474 * can underflow in the 100ms detection time frame 592 * can underflow in the 100ms detection time frame
475 */ 593 */
476 __setup_APIC_LVTT(1000000000, 0, 0); 594 __setup_APIC_LVTT(0xffffffff, 0, 0);
477 595
478 /* Let the interrupts run */ 596 /* Let the interrupts run */
479 local_irq_enable(); 597 local_irq_enable();
@@ -490,34 +608,9 @@ static int __init calibrate_APIC_clock(void)
490 delta = lapic_cal_t1 - lapic_cal_t2; 608 delta = lapic_cal_t1 - lapic_cal_t2;
491 apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); 609 apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
492 610
493 /* Check, if the PM timer is available */ 611 /* we trust the PM based calibration if possible */
494 deltapm = lapic_cal_pm2 - lapic_cal_pm1; 612 pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
495 apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); 613 &delta);
496
497 if (deltapm) {
498 unsigned long mult;
499 u64 res;
500
501 mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
502
503 if (deltapm > (pm_100ms - pm_thresh) &&
504 deltapm < (pm_100ms + pm_thresh)) {
505 apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
506 } else {
507 res = (((u64) deltapm) * mult) >> 22;
508 do_div(res, 1000000);
509 printk(KERN_WARNING "APIC calibration not consistent "
510 "with PM Timer: %ldms instead of 100ms\n",
511 (long)res);
512 /* Correct the lapic counter value */
513 res = (((u64) delta) * pm_100ms);
514 do_div(res, deltapm);
515 printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
516 "%lu (%ld)\n", (unsigned long) res, delta);
517 delta = (long) res;
518 }
519 pm_referenced = 1;
520 }
521 614
522 /* Calculate the scaled math multiplication factor */ 615 /* Calculate the scaled math multiplication factor */
523 lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, 616 lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
@@ -559,7 +652,10 @@ static int __init calibrate_APIC_clock(void)
559 652
560 levt->features &= ~CLOCK_EVT_FEAT_DUMMY; 653 levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
561 654
562 /* We trust the pm timer based calibration */ 655 /*
656 * PM timer calibration failed or not turned on
657 * so lets try APIC timer based calibration
658 */
563 if (!pm_referenced) { 659 if (!pm_referenced) {
564 apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); 660 apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
565 661
@@ -652,7 +748,7 @@ void __init setup_boot_APIC_clock(void)
652 setup_APIC_timer(); 748 setup_APIC_timer();
653} 749}
654 750
655void __devinit setup_secondary_APIC_clock(void) 751void __cpuinit setup_secondary_APIC_clock(void)
656{ 752{
657 setup_APIC_timer(); 753 setup_APIC_timer();
658} 754}
@@ -718,6 +814,9 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
718 * Besides, if we don't timer interrupts ignore the global 814 * Besides, if we don't timer interrupts ignore the global
719 * interrupt lock, which is the WrongThing (tm) to do. 815 * interrupt lock, which is the WrongThing (tm) to do.
720 */ 816 */
817#ifdef CONFIG_X86_64
818 exit_idle();
819#endif
721 irq_enter(); 820 irq_enter();
722 local_apic_timer_interrupt(); 821 local_apic_timer_interrupt();
723 irq_exit(); 822 irq_exit();
@@ -991,40 +1090,43 @@ void __init init_bsp_APIC(void)
991 1090
992static void __cpuinit lapic_setup_esr(void) 1091static void __cpuinit lapic_setup_esr(void)
993{ 1092{
994 unsigned long oldvalue, value, maxlvt; 1093 unsigned int oldvalue, value, maxlvt;
995 if (lapic_is_integrated() && !esr_disable) { 1094
996 if (esr_disable) { 1095 if (!lapic_is_integrated()) {
997 /* 1096 printk(KERN_INFO "No ESR for 82489DX.\n");
998 * Something untraceable is creating bad interrupts on 1097 return;
999 * secondary quads ... for the moment, just leave the 1098 }
1000 * ESR disabled - we can't do anything useful with the
1001 * errors anyway - mbligh
1002 */
1003 printk(KERN_INFO "Leaving ESR disabled.\n");
1004 return;
1005 }
1006 /* !82489DX */
1007 maxlvt = lapic_get_maxlvt();
1008 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
1009 apic_write(APIC_ESR, 0);
1010 oldvalue = apic_read(APIC_ESR);
1011 1099
1012 /* enables sending errors */ 1100 if (esr_disable) {
1013 value = ERROR_APIC_VECTOR;
1014 apic_write(APIC_LVTERR, value);
1015 /* 1101 /*
1016 * spec says clear errors after enabling vector. 1102 * Something untraceable is creating bad interrupts on
1103 * secondary quads ... for the moment, just leave the
1104 * ESR disabled - we can't do anything useful with the
1105 * errors anyway - mbligh
1017 */ 1106 */
1018 if (maxlvt > 3) 1107 printk(KERN_INFO "Leaving ESR disabled.\n");
1019 apic_write(APIC_ESR, 0); 1108 return;
1020 value = apic_read(APIC_ESR);
1021 if (value != oldvalue)
1022 apic_printk(APIC_VERBOSE, "ESR value before enabling "
1023 "vector: 0x%08lx after: 0x%08lx\n",
1024 oldvalue, value);
1025 } else {
1026 printk(KERN_INFO "No ESR for 82489DX.\n");
1027 } 1109 }
1110
1111 maxlvt = lapic_get_maxlvt();
1112 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
1113 apic_write(APIC_ESR, 0);
1114 oldvalue = apic_read(APIC_ESR);
1115
1116 /* enables sending errors */
1117 value = ERROR_APIC_VECTOR;
1118 apic_write(APIC_LVTERR, value);
1119
1120 /*
1121 * spec says clear errors after enabling vector.
1122 */
1123 if (maxlvt > 3)
1124 apic_write(APIC_ESR, 0);
1125 value = apic_read(APIC_ESR);
1126 if (value != oldvalue)
1127 apic_printk(APIC_VERBOSE, "ESR value before enabling "
1128 "vector: 0x%08x after: 0x%08x\n",
1129 oldvalue, value);
1028} 1130}
1029 1131
1030 1132
@@ -1033,24 +1135,27 @@ static void __cpuinit lapic_setup_esr(void)
1033 */ 1135 */
1034void __cpuinit setup_local_APIC(void) 1136void __cpuinit setup_local_APIC(void)
1035{ 1137{
1036 unsigned long value, integrated; 1138 unsigned int value;
1037 int i, j; 1139 int i, j;
1038 1140
1141#ifdef CONFIG_X86_32
1039 /* Pound the ESR really hard over the head with a big hammer - mbligh */ 1142 /* Pound the ESR really hard over the head with a big hammer - mbligh */
1040 if (esr_disable) { 1143 if (lapic_is_integrated() && esr_disable) {
1041 apic_write(APIC_ESR, 0); 1144 apic_write(APIC_ESR, 0);
1042 apic_write(APIC_ESR, 0); 1145 apic_write(APIC_ESR, 0);
1043 apic_write(APIC_ESR, 0); 1146 apic_write(APIC_ESR, 0);
1044 apic_write(APIC_ESR, 0); 1147 apic_write(APIC_ESR, 0);
1045 } 1148 }
1149#endif
1046 1150
1047 integrated = lapic_is_integrated(); 1151 preempt_disable();
1048 1152
1049 /* 1153 /*
1050 * Double-check whether this APIC is really registered. 1154 * Double-check whether this APIC is really registered.
1155 * This is meaningless in clustered apic mode, so we skip it.
1051 */ 1156 */
1052 if (!apic_id_registered()) 1157 if (!apic_id_registered())
1053 WARN_ON_ONCE(1); 1158 BUG();
1054 1159
1055 /* 1160 /*
1056 * Intel recommends to set DFR, LDR and TPR before enabling 1161 * Intel recommends to set DFR, LDR and TPR before enabling
@@ -1096,6 +1201,7 @@ void __cpuinit setup_local_APIC(void)
1096 */ 1201 */
1097 value |= APIC_SPIV_APIC_ENABLED; 1202 value |= APIC_SPIV_APIC_ENABLED;
1098 1203
1204#ifdef CONFIG_X86_32
1099 /* 1205 /*
1100 * Some unknown Intel IO/APIC (or APIC) errata is biting us with 1206 * Some unknown Intel IO/APIC (or APIC) errata is biting us with
1101 * certain networking cards. If high frequency interrupts are 1207 * certain networking cards. If high frequency interrupts are
@@ -1116,8 +1222,13 @@ void __cpuinit setup_local_APIC(void)
1116 * See also the comment in end_level_ioapic_irq(). --macro 1222 * See also the comment in end_level_ioapic_irq(). --macro
1117 */ 1223 */
1118 1224
1119 /* Enable focus processor (bit==0) */ 1225 /*
1226 * - enable focus processor (bit==0)
1227 * - 64bit mode always use processor focus
1228 * so no need to set it
1229 */
1120 value &= ~APIC_SPIV_FOCUS_DISABLED; 1230 value &= ~APIC_SPIV_FOCUS_DISABLED;
1231#endif
1121 1232
1122 /* 1233 /*
1123 * Set spurious IRQ vector 1234 * Set spurious IRQ vector
@@ -1154,9 +1265,11 @@ void __cpuinit setup_local_APIC(void)
1154 value = APIC_DM_NMI; 1265 value = APIC_DM_NMI;
1155 else 1266 else
1156 value = APIC_DM_NMI | APIC_LVT_MASKED; 1267 value = APIC_DM_NMI | APIC_LVT_MASKED;
1157 if (!integrated) /* 82489DX */ 1268 if (!lapic_is_integrated()) /* 82489DX */
1158 value |= APIC_LVT_LEVEL_TRIGGER; 1269 value |= APIC_LVT_LEVEL_TRIGGER;
1159 apic_write(APIC_LVT1, value); 1270 apic_write(APIC_LVT1, value);
1271
1272 preempt_enable();
1160} 1273}
1161 1274
1162void __cpuinit end_local_APIC_setup(void) 1275void __cpuinit end_local_APIC_setup(void)
@@ -1177,6 +1290,153 @@ void __cpuinit end_local_APIC_setup(void)
1177 apic_pm_activate(); 1290 apic_pm_activate();
1178} 1291}
1179 1292
1293#ifdef HAVE_X2APIC
1294void check_x2apic(void)
1295{
1296 int msr, msr2;
1297
1298 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1299
1300 if (msr & X2APIC_ENABLE) {
1301 printk("x2apic enabled by BIOS, switching to x2apic ops\n");
1302 x2apic_preenabled = x2apic = 1;
1303 apic_ops = &x2apic_ops;
1304 }
1305}
1306
1307void enable_x2apic(void)
1308{
1309 int msr, msr2;
1310
1311 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1312 if (!(msr & X2APIC_ENABLE)) {
1313 printk("Enabling x2apic\n");
1314 wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
1315 }
1316}
1317
1318void enable_IR_x2apic(void)
1319{
1320#ifdef CONFIG_INTR_REMAP
1321 int ret;
1322 unsigned long flags;
1323
1324 if (!cpu_has_x2apic)
1325 return;
1326
1327 if (!x2apic_preenabled && disable_x2apic) {
1328 printk(KERN_INFO
1329 "Skipped enabling x2apic and Interrupt-remapping "
1330 "because of nox2apic\n");
1331 return;
1332 }
1333
1334 if (x2apic_preenabled && disable_x2apic)
1335 panic("Bios already enabled x2apic, can't enforce nox2apic");
1336
1337 if (!x2apic_preenabled && skip_ioapic_setup) {
1338 printk(KERN_INFO
1339 "Skipped enabling x2apic and Interrupt-remapping "
1340 "because of skipping io-apic setup\n");
1341 return;
1342 }
1343
1344 ret = dmar_table_init();
1345 if (ret) {
1346 printk(KERN_INFO
1347 "dmar_table_init() failed with %d:\n", ret);
1348
1349 if (x2apic_preenabled)
1350 panic("x2apic enabled by bios. But IR enabling failed");
1351 else
1352 printk(KERN_INFO
1353 "Not enabling x2apic,Intr-remapping\n");
1354 return;
1355 }
1356
1357 local_irq_save(flags);
1358 mask_8259A();
1359
1360 ret = save_mask_IO_APIC_setup();
1361 if (ret) {
1362 printk(KERN_INFO "Saving IO-APIC state failed: %d\n", ret);
1363 goto end;
1364 }
1365
1366 ret = enable_intr_remapping(1);
1367
1368 if (ret && x2apic_preenabled) {
1369 local_irq_restore(flags);
1370 panic("x2apic enabled by bios. But IR enabling failed");
1371 }
1372
1373 if (ret)
1374 goto end_restore;
1375
1376 if (!x2apic) {
1377 x2apic = 1;
1378 apic_ops = &x2apic_ops;
1379 enable_x2apic();
1380 }
1381
1382end_restore:
1383 if (ret)
1384 /*
1385 * IR enabling failed
1386 */
1387 restore_IO_APIC_setup();
1388 else
1389 reinit_intr_remapped_IO_APIC(x2apic_preenabled);
1390
1391end:
1392 unmask_8259A();
1393 local_irq_restore(flags);
1394
1395 if (!ret) {
1396 if (!x2apic_preenabled)
1397 printk(KERN_INFO
1398 "Enabled x2apic and interrupt-remapping\n");
1399 else
1400 printk(KERN_INFO
1401 "Enabled Interrupt-remapping\n");
1402 } else
1403 printk(KERN_ERR
1404 "Failed to enable Interrupt-remapping and x2apic\n");
1405#else
1406 if (!cpu_has_x2apic)
1407 return;
1408
1409 if (x2apic_preenabled)
1410 panic("x2apic enabled prior OS handover,"
1411 " enable CONFIG_INTR_REMAP");
1412
1413 printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
1414 " and x2apic\n");
1415#endif
1416
1417 return;
1418}
1419#endif /* HAVE_X2APIC */
1420
1421#ifdef CONFIG_X86_64
1422/*
1423 * Detect and enable local APICs on non-SMP boards.
1424 * Original code written by Keir Fraser.
1425 * On AMD64 we trust the BIOS - if it says no APIC it is likely
1426 * not correctly set up (usually the APIC timer won't work etc.)
1427 */
1428static int __init detect_init_APIC(void)
1429{
1430 if (!cpu_has_apic) {
1431 printk(KERN_INFO "No local APIC present\n");
1432 return -1;
1433 }
1434
1435 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1436 boot_cpu_physical_apicid = 0;
1437 return 0;
1438}
1439#else
1180/* 1440/*
1181 * Detect and initialize APIC 1441 * Detect and initialize APIC
1182 */ 1442 */
@@ -1255,12 +1515,46 @@ no_apic:
1255 printk(KERN_INFO "No local APIC present or hardware disabled\n"); 1515 printk(KERN_INFO "No local APIC present or hardware disabled\n");
1256 return -1; 1516 return -1;
1257} 1517}
1518#endif
1519
1520#ifdef CONFIG_X86_64
1521void __init early_init_lapic_mapping(void)
1522{
1523 unsigned long phys_addr;
1524
1525 /*
1526 * If no local APIC can be found then go out
1527 * : it means there is no mpatable and MADT
1528 */
1529 if (!smp_found_config)
1530 return;
1531
1532 phys_addr = mp_lapic_addr;
1533
1534 set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
1535 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
1536 APIC_BASE, phys_addr);
1537
1538 /*
1539 * Fetch the APIC ID of the BSP in case we have a
1540 * default configuration (or the MP table is broken).
1541 */
1542 boot_cpu_physical_apicid = read_apic_id();
1543}
1544#endif
1258 1545
1259/** 1546/**
1260 * init_apic_mappings - initialize APIC mappings 1547 * init_apic_mappings - initialize APIC mappings
1261 */ 1548 */
1262void __init init_apic_mappings(void) 1549void __init init_apic_mappings(void)
1263{ 1550{
1551#ifdef HAVE_X2APIC
1552 if (x2apic) {
1553 boot_cpu_physical_apicid = read_apic_id();
1554 return;
1555 }
1556#endif
1557
1264 /* 1558 /*
1265 * If no local APIC can be found then set up a fake all 1559 * If no local APIC can be found then set up a fake all
1266 * zeroes page to simulate the local APIC and another 1560 * zeroes page to simulate the local APIC and another
@@ -1273,8 +1567,8 @@ void __init init_apic_mappings(void)
1273 apic_phys = mp_lapic_addr; 1567 apic_phys = mp_lapic_addr;
1274 1568
1275 set_fixmap_nocache(FIX_APIC_BASE, apic_phys); 1569 set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
1276 printk(KERN_DEBUG "mapped APIC to %08lx (%08lx)\n", APIC_BASE, 1570 apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
1277 apic_phys); 1571 APIC_BASE, apic_phys);
1278 1572
1279 /* 1573 /*
1280 * Fetch the APIC ID of the BSP in case we have a 1574 * Fetch the APIC ID of the BSP in case we have a
@@ -1282,18 +1576,27 @@ void __init init_apic_mappings(void)
1282 */ 1576 */
1283 if (boot_cpu_physical_apicid == -1U) 1577 if (boot_cpu_physical_apicid == -1U)
1284 boot_cpu_physical_apicid = read_apic_id(); 1578 boot_cpu_physical_apicid = read_apic_id();
1285
1286} 1579}
1287 1580
1288/* 1581/*
1289 * This initializes the IO-APIC and APIC hardware if this is 1582 * This initializes the IO-APIC and APIC hardware if this is
1290 * a UP kernel. 1583 * a UP kernel.
1291 */ 1584 */
1292
1293int apic_version[MAX_APICS]; 1585int apic_version[MAX_APICS];
1294 1586
1295int __init APIC_init_uniprocessor(void) 1587int __init APIC_init_uniprocessor(void)
1296{ 1588{
1589#ifdef CONFIG_X86_64
1590 if (disable_apic) {
1591 printk(KERN_INFO "Apic disabled\n");
1592 return -1;
1593 }
1594 if (!cpu_has_apic) {
1595 disable_apic = 1;
1596 printk(KERN_INFO "Apic disabled by BIOS\n");
1597 return -1;
1598 }
1599#else
1297 if (!smp_found_config && !cpu_has_apic) 1600 if (!smp_found_config && !cpu_has_apic)
1298 return -1; 1601 return -1;
1299 1602
@@ -1302,39 +1605,68 @@ int __init APIC_init_uniprocessor(void)
1302 */ 1605 */
1303 if (!cpu_has_apic && 1606 if (!cpu_has_apic &&
1304 APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { 1607 APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
1305 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", 1608 printk(KERN_ERR "BIOS bug, local APIC 0x%x not detected!...\n",
1306 boot_cpu_physical_apicid); 1609 boot_cpu_physical_apicid);
1307 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); 1610 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
1308 return -1; 1611 return -1;
1309 } 1612 }
1613#endif
1310 1614
1311 verify_local_APIC(); 1615#ifdef HAVE_X2APIC
1616 enable_IR_x2apic();
1617#endif
1618#ifdef CONFIG_X86_64
1619 setup_apic_routing();
1620#endif
1312 1621
1622 verify_local_APIC();
1313 connect_bsp_APIC(); 1623 connect_bsp_APIC();
1314 1624
1625#ifdef CONFIG_X86_64
1626 apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
1627#else
1315 /* 1628 /*
1316 * Hack: In case of kdump, after a crash, kernel might be booting 1629 * Hack: In case of kdump, after a crash, kernel might be booting
1317 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid 1630 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
1318 * might be zero if read from MP tables. Get it from LAPIC. 1631 * might be zero if read from MP tables. Get it from LAPIC.
1319 */ 1632 */
1320#ifdef CONFIG_CRASH_DUMP 1633# ifdef CONFIG_CRASH_DUMP
1321 boot_cpu_physical_apicid = read_apic_id(); 1634 boot_cpu_physical_apicid = read_apic_id();
1635# endif
1322#endif 1636#endif
1323 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); 1637 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
1324
1325 setup_local_APIC(); 1638 setup_local_APIC();
1326 1639
1640#ifdef CONFIG_X86_64
1641 /*
1642 * Now enable IO-APICs, actually call clear_IO_APIC
1643 * We need clear_IO_APIC before enabling vector on BP
1644 */
1645 if (!skip_ioapic_setup && nr_ioapics)
1646 enable_IO_APIC();
1647#endif
1648
1327#ifdef CONFIG_X86_IO_APIC 1649#ifdef CONFIG_X86_IO_APIC
1328 if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) 1650 if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
1329#endif 1651#endif
1330 localise_nmi_watchdog(); 1652 localise_nmi_watchdog();
1331 end_local_APIC_setup(); 1653 end_local_APIC_setup();
1654
1332#ifdef CONFIG_X86_IO_APIC 1655#ifdef CONFIG_X86_IO_APIC
1333 if (smp_found_config) 1656 if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
1334 if (!skip_ioapic_setup && nr_ioapics) 1657 setup_IO_APIC();
1335 setup_IO_APIC(); 1658# ifdef CONFIG_X86_64
1659 else
1660 nr_ioapics = 0;
1661# endif
1336#endif 1662#endif
1663
1664#ifdef CONFIG_X86_64
1665 setup_boot_APIC_clock();
1666 check_nmi_watchdog();
1667#else
1337 setup_boot_clock(); 1668 setup_boot_clock();
1669#endif
1338 1670
1339 return 0; 1671 return 0;
1340} 1672}
@@ -1348,8 +1680,11 @@ int __init APIC_init_uniprocessor(void)
1348 */ 1680 */
1349void smp_spurious_interrupt(struct pt_regs *regs) 1681void smp_spurious_interrupt(struct pt_regs *regs)
1350{ 1682{
1351 unsigned long v; 1683 u32 v;
1352 1684
1685#ifdef CONFIG_X86_64
1686 exit_idle();
1687#endif
1353 irq_enter(); 1688 irq_enter();
1354 /* 1689 /*
1355 * Check if this really is a spurious interrupt and ACK it 1690 * Check if this really is a spurious interrupt and ACK it
@@ -1360,10 +1695,14 @@ void smp_spurious_interrupt(struct pt_regs *regs)
1360 if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) 1695 if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
1361 ack_APIC_irq(); 1696 ack_APIC_irq();
1362 1697
1698#ifdef CONFIG_X86_64
1699 add_pda(irq_spurious_count, 1);
1700#else
1363 /* see sw-dev-man vol 3, chapter 7.4.13.5 */ 1701 /* see sw-dev-man vol 3, chapter 7.4.13.5 */
1364 printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " 1702 printk(KERN_INFO "spurious APIC interrupt on CPU#%d, "
1365 "should never happen.\n", smp_processor_id()); 1703 "should never happen.\n", smp_processor_id());
1366 __get_cpu_var(irq_stat).irq_spurious_count++; 1704 __get_cpu_var(irq_stat).irq_spurious_count++;
1705#endif
1367 irq_exit(); 1706 irq_exit();
1368} 1707}
1369 1708
@@ -1372,8 +1711,11 @@ void smp_spurious_interrupt(struct pt_regs *regs)
1372 */ 1711 */
1373void smp_error_interrupt(struct pt_regs *regs) 1712void smp_error_interrupt(struct pt_regs *regs)
1374{ 1713{
1375 unsigned long v, v1; 1714 u32 v, v1;
1376 1715
1716#ifdef CONFIG_X86_64
1717 exit_idle();
1718#endif
1377 irq_enter(); 1719 irq_enter();
1378 /* First tickle the hardware, only then report what went on. -- REW */ 1720 /* First tickle the hardware, only then report what went on. -- REW */
1379 v = apic_read(APIC_ESR); 1721 v = apic_read(APIC_ESR);
@@ -1392,7 +1734,7 @@ void smp_error_interrupt(struct pt_regs *regs)
1392 6: Received illegal vector 1734 6: Received illegal vector
1393 7: Illegal register address 1735 7: Illegal register address
1394 */ 1736 */
1395 printk(KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n", 1737 printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
1396 smp_processor_id(), v , v1); 1738 smp_processor_id(), v , v1);
1397 irq_exit(); 1739 irq_exit();
1398} 1740}
@@ -1565,6 +1907,13 @@ void __cpuinit generic_processor_info(int apicid, int version)
1565 cpu_set(cpu, cpu_present_map); 1907 cpu_set(cpu, cpu_present_map);
1566} 1908}
1567 1909
1910#ifdef CONFIG_X86_64
1911int hard_smp_processor_id(void)
1912{
1913 return read_apic_id();
1914}
1915#endif
1916
1568/* 1917/*
1569 * Power management 1918 * Power management
1570 */ 1919 */
@@ -1640,7 +1989,7 @@ static int lapic_resume(struct sys_device *dev)
1640 1989
1641 local_irq_save(flags); 1990 local_irq_save(flags);
1642 1991
1643#ifdef CONFIG_X86_64 1992#ifdef HAVE_X2APIC
1644 if (x2apic) 1993 if (x2apic)
1645 enable_x2apic(); 1994 enable_x2apic();
1646 else 1995 else
@@ -1702,7 +2051,7 @@ static struct sys_device device_lapic = {
1702 .cls = &lapic_sysclass, 2051 .cls = &lapic_sysclass,
1703}; 2052};
1704 2053
1705static void __devinit apic_pm_activate(void) 2054static void __cpuinit apic_pm_activate(void)
1706{ 2055{
1707 apic_pm_state.active = 1; 2056 apic_pm_state.active = 1;
1708} 2057}
@@ -1728,16 +2077,87 @@ static void apic_pm_activate(void) { }
1728 2077
1729#endif /* CONFIG_PM */ 2078#endif /* CONFIG_PM */
1730 2079
2080#ifdef CONFIG_X86_64
1731/* 2081/*
1732 * APIC command line parameters 2082 * apic_is_clustered_box() -- Check if we can expect good TSC
2083 *
2084 * Thus far, the major user of this is IBM's Summit2 series:
2085 *
2086 * Clustered boxes may have unsynced TSC problems if they are
2087 * multi-chassis. Use available data to take a good guess.
2088 * If in doubt, go HPET.
1733 */ 2089 */
1734static int __init parse_lapic(char *arg) 2090__cpuinit int apic_is_clustered_box(void)
1735{ 2091{
1736 force_enable_local_apic = 1; 2092 int i, clusters, zeros;
1737 return 0; 2093 unsigned id;
2094 u16 *bios_cpu_apicid;
2095 DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
2096
2097 /*
2098 * there is not this kind of box with AMD CPU yet.
2099 * Some AMD box with quadcore cpu and 8 sockets apicid
2100 * will be [4, 0x23] or [8, 0x27] could be thought to
2101 * vsmp box still need checking...
2102 */
2103 if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
2104 return 0;
2105
2106 bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
2107 bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
2108
2109 for (i = 0; i < NR_CPUS; i++) {
2110 /* are we being called early in kernel startup? */
2111 if (bios_cpu_apicid) {
2112 id = bios_cpu_apicid[i];
2113 }
2114 else if (i < nr_cpu_ids) {
2115 if (cpu_present(i))
2116 id = per_cpu(x86_bios_cpu_apicid, i);
2117 else
2118 continue;
2119 }
2120 else
2121 break;
2122
2123 if (id != BAD_APICID)
2124 __set_bit(APIC_CLUSTERID(id), clustermap);
2125 }
2126
2127 /* Problem: Partially populated chassis may not have CPUs in some of
2128 * the APIC clusters they have been allocated. Only present CPUs have
2129 * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
2130 * Since clusters are allocated sequentially, count zeros only if
2131 * they are bounded by ones.
2132 */
2133 clusters = 0;
2134 zeros = 0;
2135 for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
2136 if (test_bit(i, clustermap)) {
2137 clusters += 1 + zeros;
2138 zeros = 0;
2139 } else
2140 ++zeros;
2141 }
2142
2143 /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
2144 * not guaranteed to be synced between boards
2145 */
2146 if (is_vsmp_box() && clusters > 1)
2147 return 1;
2148
2149 /*
2150 * If clusters > 2, then should be multi-chassis.
2151 * May have to revisit this when multi-core + hyperthreaded CPUs come
2152 * out, but AFAIK this will work even for them.
2153 */
2154 return (clusters > 2);
1738} 2155}
1739early_param("lapic", parse_lapic); 2156#endif
1740 2157
2158/*
2159 * APIC command line parameters
2160 */
1741static int __init setup_disableapic(char *arg) 2161static int __init setup_disableapic(char *arg)
1742{ 2162{
1743 disable_apic = 1; 2163 disable_apic = 1;
@@ -1779,7 +2199,6 @@ static int __init apic_set_verbosity(char *arg)
1779 if (!arg) { 2199 if (!arg) {
1780#ifdef CONFIG_X86_64 2200#ifdef CONFIG_X86_64
1781 skip_ioapic_setup = 0; 2201 skip_ioapic_setup = 0;
1782 ioapic_force = 1;
1783 return 0; 2202 return 0;
1784#endif 2203#endif
1785 return -EINVAL; 2204 return -EINVAL;
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
deleted file mode 100644
index 94ddb69ae15e..000000000000
--- a/arch/x86/kernel/apic_64.c
+++ /dev/null
@@ -1,1848 +0,0 @@
1/*
2 * Local APIC handling, local APIC timers
3 *
4 * (c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
5 *
6 * Fixes
7 * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
8 * thanks to Eric Gilmore
9 * and Rolf G. Tews
10 * for testing these extensively.
11 * Maciej W. Rozycki : Various updates and fixes.
12 * Mikael Pettersson : Power Management for UP-APIC.
13 * Pavel Machek and
14 * Mikael Pettersson : PM converted to driver model.
15 */
16
17#include <linux/init.h>
18
19#include <linux/mm.h>
20#include <linux/delay.h>
21#include <linux/bootmem.h>
22#include <linux/interrupt.h>
23#include <linux/mc146818rtc.h>
24#include <linux/kernel_stat.h>
25#include <linux/sysdev.h>
26#include <linux/ioport.h>
27#include <linux/clockchips.h>
28#include <linux/acpi_pmtmr.h>
29#include <linux/module.h>
30#include <linux/dmar.h>
31
32#include <asm/atomic.h>
33#include <asm/smp.h>
34#include <asm/mtrr.h>
35#include <asm/mpspec.h>
36#include <asm/hpet.h>
37#include <asm/pgalloc.h>
38#include <asm/nmi.h>
39#include <asm/idle.h>
40#include <asm/proto.h>
41#include <asm/timex.h>
42#include <asm/apic.h>
43#include <asm/i8259.h>
44
45#include <mach_ipi.h>
46#include <mach_apic.h>
47
48/* Disable local APIC timer from the kernel commandline or via dmi quirk */
49static int disable_apic_timer __cpuinitdata;
50static int apic_calibrate_pmtmr __initdata;
51int disable_apic;
52int disable_x2apic;
53int x2apic;
54
55/* x2apic enabled before OS handover */
56int x2apic_preenabled;
57
58/* Local APIC timer works in C2 */
59int local_apic_timer_c2_ok;
60EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
61
62/*
63 * Debug level, exported for io_apic.c
64 */
65unsigned int apic_verbosity;
66
67/* Have we found an MP table */
68int smp_found_config;
69
70static struct resource lapic_resource = {
71 .name = "Local APIC",
72 .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
73};
74
75static unsigned int calibration_result;
76
77static int lapic_next_event(unsigned long delta,
78 struct clock_event_device *evt);
79static void lapic_timer_setup(enum clock_event_mode mode,
80 struct clock_event_device *evt);
81static void lapic_timer_broadcast(cpumask_t mask);
82static void apic_pm_activate(void);
83
84/*
85 * The local apic timer can be used for any function which is CPU local.
86 */
87static struct clock_event_device lapic_clockevent = {
88 .name = "lapic",
89 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
90 | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
91 .shift = 32,
92 .set_mode = lapic_timer_setup,
93 .set_next_event = lapic_next_event,
94 .broadcast = lapic_timer_broadcast,
95 .rating = 100,
96 .irq = -1,
97};
98static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
99
100static unsigned long apic_phys;
101
102unsigned long mp_lapic_addr;
103
104/*
105 * Get the LAPIC version
106 */
107static inline int lapic_get_version(void)
108{
109 return GET_APIC_VERSION(apic_read(APIC_LVR));
110}
111
112/*
113 * Check, if the APIC is integrated or a separate chip
114 */
115static inline int lapic_is_integrated(void)
116{
117#ifdef CONFIG_X86_64
118 return 1;
119#else
120 return APIC_INTEGRATED(lapic_get_version());
121#endif
122}
123
124/*
125 * Check, whether this is a modern or a first generation APIC
126 */
127static int modern_apic(void)
128{
129 /* AMD systems use old APIC versions, so check the CPU */
130 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
131 boot_cpu_data.x86 >= 0xf)
132 return 1;
133 return lapic_get_version() >= 0x14;
134}
135
136/*
137 * Paravirt kernels also might be using these below ops. So we still
138 * use generic apic_read()/apic_write(), which might be pointing to different
139 * ops in PARAVIRT case.
140 */
141void xapic_wait_icr_idle(void)
142{
143 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
144 cpu_relax();
145}
146
147u32 safe_xapic_wait_icr_idle(void)
148{
149 u32 send_status;
150 int timeout;
151
152 timeout = 0;
153 do {
154 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
155 if (!send_status)
156 break;
157 udelay(100);
158 } while (timeout++ < 1000);
159
160 return send_status;
161}
162
163void xapic_icr_write(u32 low, u32 id)
164{
165 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
166 apic_write(APIC_ICR, low);
167}
168
169u64 xapic_icr_read(void)
170{
171 u32 icr1, icr2;
172
173 icr2 = apic_read(APIC_ICR2);
174 icr1 = apic_read(APIC_ICR);
175
176 return icr1 | ((u64)icr2 << 32);
177}
178
179static struct apic_ops xapic_ops = {
180 .read = native_apic_mem_read,
181 .write = native_apic_mem_write,
182 .icr_read = xapic_icr_read,
183 .icr_write = xapic_icr_write,
184 .wait_icr_idle = xapic_wait_icr_idle,
185 .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
186};
187
188struct apic_ops __read_mostly *apic_ops = &xapic_ops;
189EXPORT_SYMBOL_GPL(apic_ops);
190
191static void x2apic_wait_icr_idle(void)
192{
193 /* no need to wait for icr idle in x2apic */
194 return;
195}
196
197static u32 safe_x2apic_wait_icr_idle(void)
198{
199 /* no need to wait for icr idle in x2apic */
200 return 0;
201}
202
203void x2apic_icr_write(u32 low, u32 id)
204{
205 wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
206}
207
208u64 x2apic_icr_read(void)
209{
210 unsigned long val;
211
212 rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
213 return val;
214}
215
216static struct apic_ops x2apic_ops = {
217 .read = native_apic_msr_read,
218 .write = native_apic_msr_write,
219 .icr_read = x2apic_icr_read,
220 .icr_write = x2apic_icr_write,
221 .wait_icr_idle = x2apic_wait_icr_idle,
222 .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
223};
224
225/**
226 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
227 */
228void __cpuinit enable_NMI_through_LVT0(void)
229{
230 unsigned int v;
231
232 /* unmask and set to NMI */
233 v = APIC_DM_NMI;
234
235 /* Level triggered for 82489DX (32bit mode) */
236 if (!lapic_is_integrated())
237 v |= APIC_LVT_LEVEL_TRIGGER;
238
239 apic_write(APIC_LVT0, v);
240}
241
242/**
243 * lapic_get_maxlvt - get the maximum number of local vector table entries
244 */
245int lapic_get_maxlvt(void)
246{
247 unsigned int v;
248
249 v = apic_read(APIC_LVR);
250 /*
251 * - we always have APIC integrated on 64bit mode
252 * - 82489DXs do not report # of LVT entries
253 */
254 return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
255}
256
257/*
258 * Local APIC timer
259 */
260
261/* Clock divisor */
262#ifdef CONFG_X86_64
263#define APIC_DIVISOR 1
264#else
265#define APIC_DIVISOR 16
266#endif
267
268/*
269 * This function sets up the local APIC timer, with a timeout of
270 * 'clocks' APIC bus clock. During calibration we actually call
271 * this function twice on the boot CPU, once with a bogus timeout
272 * value, second time for real. The other (noncalibrating) CPUs
273 * call this function only once, with the real, calibrated value.
274 *
275 * We do reads before writes even if unnecessary, to get around the
276 * P5 APIC double write bug.
277 */
278static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
279{
280 unsigned int lvtt_value, tmp_value;
281
282 lvtt_value = LOCAL_TIMER_VECTOR;
283 if (!oneshot)
284 lvtt_value |= APIC_LVT_TIMER_PERIODIC;
285 if (!lapic_is_integrated())
286 lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
287
288 if (!irqen)
289 lvtt_value |= APIC_LVT_MASKED;
290
291 apic_write(APIC_LVTT, lvtt_value);
292
293 /*
294 * Divide PICLK by 16
295 */
296 tmp_value = apic_read(APIC_TDCR);
297 apic_write(APIC_TDCR,
298 (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
299 APIC_TDR_DIV_16);
300
301 if (!oneshot)
302 apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
303}
304
305/*
306 * Setup extended LVT, AMD specific (K8, family 10h)
307 *
308 * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
309 * MCE interrupts are supported. Thus MCE offset must be set to 0.
310 *
311 * If mask=1, the LVT entry does not generate interrupts while mask=0
312 * enables the vector. See also the BKDGs.
313 */
314
315#define APIC_EILVT_LVTOFF_MCE 0
316#define APIC_EILVT_LVTOFF_IBS 1
317
318static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
319{
320 unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
321 unsigned int v = (mask << 16) | (msg_type << 8) | vector;
322
323 apic_write(reg, v);
324}
325
326u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
327{
328 setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
329 return APIC_EILVT_LVTOFF_MCE;
330}
331
332u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
333{
334 setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
335 return APIC_EILVT_LVTOFF_IBS;
336}
337EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs);
338
339/*
340 * Program the next event, relative to now
341 */
342static int lapic_next_event(unsigned long delta,
343 struct clock_event_device *evt)
344{
345 apic_write(APIC_TMICT, delta);
346 return 0;
347}
348
349/*
350 * Setup the lapic timer in periodic or oneshot mode
351 */
352static void lapic_timer_setup(enum clock_event_mode mode,
353 struct clock_event_device *evt)
354{
355 unsigned long flags;
356 unsigned int v;
357
358 /* Lapic used as dummy for broadcast ? */
359 if (evt->features & CLOCK_EVT_FEAT_DUMMY)
360 return;
361
362 local_irq_save(flags);
363
364 switch (mode) {
365 case CLOCK_EVT_MODE_PERIODIC:
366 case CLOCK_EVT_MODE_ONESHOT:
367 __setup_APIC_LVTT(calibration_result,
368 mode != CLOCK_EVT_MODE_PERIODIC, 1);
369 break;
370 case CLOCK_EVT_MODE_UNUSED:
371 case CLOCK_EVT_MODE_SHUTDOWN:
372 v = apic_read(APIC_LVTT);
373 v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
374 apic_write(APIC_LVTT, v);
375 break;
376 case CLOCK_EVT_MODE_RESUME:
377 /* Nothing to do here */
378 break;
379 }
380
381 local_irq_restore(flags);
382}
383
384/*
385 * Local APIC timer broadcast function
386 */
387static void lapic_timer_broadcast(cpumask_t mask)
388{
389#ifdef CONFIG_SMP
390 send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
391#endif
392}
393
394/*
395 * Setup the local APIC timer for this CPU. Copy the initilized values
396 * of the boot CPU and register the clock event in the framework.
397 */
398static void setup_APIC_timer(void)
399{
400 struct clock_event_device *levt = &__get_cpu_var(lapic_events);
401
402 memcpy(levt, &lapic_clockevent, sizeof(*levt));
403 levt->cpumask = cpumask_of_cpu(smp_processor_id());
404
405 clockevents_register_device(levt);
406}
407
408/*
409 * In this function we calibrate APIC bus clocks to the external
410 * timer. Unfortunately we cannot use jiffies and the timer irq
411 * to calibrate, since some later bootup code depends on getting
412 * the first irq? Ugh.
413 *
414 * We want to do the calibration only once since we
415 * want to have local timer irqs syncron. CPUs connected
416 * by the same APIC bus have the very same bus frequency.
417 * And we want to have irqs off anyways, no accidental
418 * APIC irq that way.
419 */
420
421#define TICK_COUNT 100000000
422
423static int __init calibrate_APIC_clock(void)
424{
425 unsigned apic, apic_start;
426 unsigned long tsc, tsc_start;
427 int result;
428
429 local_irq_disable();
430
431 /*
432 * Put whatever arbitrary (but long enough) timeout
433 * value into the APIC clock, we just want to get the
434 * counter running for calibration.
435 *
436 * No interrupt enable !
437 */
438 __setup_APIC_LVTT(250000000, 0, 0);
439
440 apic_start = apic_read(APIC_TMCCT);
441#ifdef CONFIG_X86_PM_TIMER
442 if (apic_calibrate_pmtmr && pmtmr_ioport) {
443 pmtimer_wait(5000); /* 5ms wait */
444 apic = apic_read(APIC_TMCCT);
445 result = (apic_start - apic) * 1000L / 5;
446 } else
447#endif
448 {
449 rdtscll(tsc_start);
450
451 do {
452 apic = apic_read(APIC_TMCCT);
453 rdtscll(tsc);
454 } while ((tsc - tsc_start) < TICK_COUNT &&
455 (apic_start - apic) < TICK_COUNT);
456
457 result = (apic_start - apic) * 1000L * tsc_khz /
458 (tsc - tsc_start);
459 }
460
461 local_irq_enable();
462
463 printk(KERN_DEBUG "APIC timer calibration result %d\n", result);
464
465 printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n",
466 result / 1000 / 1000, result / 1000 % 1000);
467
468 /* Calculate the scaled math multiplication factor */
469 lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC,
470 lapic_clockevent.shift);
471 lapic_clockevent.max_delta_ns =
472 clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
473 lapic_clockevent.min_delta_ns =
474 clockevent_delta2ns(0xF, &lapic_clockevent);
475
476 calibration_result = (result * APIC_DIVISOR) / HZ;
477
478 /*
479 * Do a sanity check on the APIC calibration result
480 */
481 if (calibration_result < (1000000 / HZ)) {
482 printk(KERN_WARNING
483 "APIC frequency too slow, disabling apic timer\n");
484 return -1;
485 }
486
487 return 0;
488}
489
490/*
491 * Setup the boot APIC
492 *
493 * Calibrate and verify the result.
494 */
495void __init setup_boot_APIC_clock(void)
496{
497 /*
498 * The local apic timer can be disabled via the kernel
499 * commandline or from the CPU detection code. Register the lapic
500 * timer as a dummy clock event source on SMP systems, so the
501 * broadcast mechanism is used. On UP systems simply ignore it.
502 */
503 if (disable_apic_timer) {
504 printk(KERN_INFO "Disabling APIC timer\n");
505 /* No broadcast on UP ! */
506 if (num_possible_cpus() > 1) {
507 lapic_clockevent.mult = 1;
508 setup_APIC_timer();
509 }
510 return;
511 }
512
513 apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
514 "calibrating APIC timer ...\n");
515
516 if (calibrate_APIC_clock()) {
517 /* No broadcast on UP ! */
518 if (num_possible_cpus() > 1)
519 setup_APIC_timer();
520 return;
521 }
522
523 /*
524 * If nmi_watchdog is set to IO_APIC, we need the
525 * PIT/HPET going. Otherwise register lapic as a dummy
526 * device.
527 */
528 if (nmi_watchdog != NMI_IO_APIC)
529 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
530 else
531 printk(KERN_WARNING "APIC timer registered as dummy,"
532 " due to nmi_watchdog=%d!\n", nmi_watchdog);
533
534 /* Setup the lapic or request the broadcast */
535 setup_APIC_timer();
536}
537
538void __cpuinit setup_secondary_APIC_clock(void)
539{
540 setup_APIC_timer();
541}
542
543/*
544 * The guts of the apic timer interrupt
545 */
546static void local_apic_timer_interrupt(void)
547{
548 int cpu = smp_processor_id();
549 struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
550
551 /*
552 * Normally we should not be here till LAPIC has been initialized but
553 * in some cases like kdump, its possible that there is a pending LAPIC
554 * timer interrupt from previous kernel's context and is delivered in
555 * new kernel the moment interrupts are enabled.
556 *
557 * Interrupts are enabled early and LAPIC is setup much later, hence
558 * its possible that when we get here evt->event_handler is NULL.
559 * Check for event_handler being NULL and discard the interrupt as
560 * spurious.
561 */
562 if (!evt->event_handler) {
563 printk(KERN_WARNING
564 "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
565 /* Switch it off */
566 lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
567 return;
568 }
569
570 /*
571 * the NMI deadlock-detector uses this.
572 */
573#ifdef CONFIG_X86_64
574 add_pda(apic_timer_irqs, 1);
575#else
576 per_cpu(irq_stat, cpu).apic_timer_irqs++;
577#endif
578
579 evt->event_handler(evt);
580}
581
582/*
583 * Local APIC timer interrupt. This is the most natural way for doing
584 * local interrupts, but local timer interrupts can be emulated by
585 * broadcast interrupts too. [in case the hw doesn't support APIC timers]
586 *
587 * [ if a single-CPU system runs an SMP kernel then we call the local
588 * interrupt as well. Thus we cannot inline the local irq ... ]
589 */
590void smp_apic_timer_interrupt(struct pt_regs *regs)
591{
592 struct pt_regs *old_regs = set_irq_regs(regs);
593
594 /*
595 * NOTE! We'd better ACK the irq immediately,
596 * because timer handling can be slow.
597 */
598 ack_APIC_irq();
599 /*
600 * update_process_times() expects us to have done irq_enter().
601 * Besides, if we don't timer interrupts ignore the global
602 * interrupt lock, which is the WrongThing (tm) to do.
603 */
604 exit_idle();
605 irq_enter();
606 local_apic_timer_interrupt();
607 irq_exit();
608
609 set_irq_regs(old_regs);
610}
611
612int setup_profiling_timer(unsigned int multiplier)
613{
614 return -EINVAL;
615}
616
617
618/*
619 * Local APIC start and shutdown
620 */
621
622/**
623 * clear_local_APIC - shutdown the local APIC
624 *
625 * This is called, when a CPU is disabled and before rebooting, so the state of
626 * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
627 * leftovers during boot.
628 */
629void clear_local_APIC(void)
630{
631 int maxlvt;
632 u32 v;
633
634 /* APIC hasn't been mapped yet */
635 if (!apic_phys)
636 return;
637
638 maxlvt = lapic_get_maxlvt();
639 /*
640 * Masking an LVT entry can trigger a local APIC error
641 * if the vector is zero. Mask LVTERR first to prevent this.
642 */
643 if (maxlvt >= 3) {
644 v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
645 apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
646 }
647 /*
648 * Careful: we have to set masks only first to deassert
649 * any level-triggered sources.
650 */
651 v = apic_read(APIC_LVTT);
652 apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
653 v = apic_read(APIC_LVT0);
654 apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
655 v = apic_read(APIC_LVT1);
656 apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
657 if (maxlvt >= 4) {
658 v = apic_read(APIC_LVTPC);
659 apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
660 }
661
662 /* lets not touch this if we didn't frob it */
663#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL)
664 if (maxlvt >= 5) {
665 v = apic_read(APIC_LVTTHMR);
666 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
667 }
668#endif
669 /*
670 * Clean APIC state for other OSs:
671 */
672 apic_write(APIC_LVTT, APIC_LVT_MASKED);
673 apic_write(APIC_LVT0, APIC_LVT_MASKED);
674 apic_write(APIC_LVT1, APIC_LVT_MASKED);
675 if (maxlvt >= 3)
676 apic_write(APIC_LVTERR, APIC_LVT_MASKED);
677 if (maxlvt >= 4)
678 apic_write(APIC_LVTPC, APIC_LVT_MASKED);
679
680 /* Integrated APIC (!82489DX) ? */
681 if (lapic_is_integrated()) {
682 if (maxlvt > 3)
683 /* Clear ESR due to Pentium errata 3AP and 11AP */
684 apic_write(APIC_ESR, 0);
685 apic_read(APIC_ESR);
686 }
687}
688
689/**
690 * disable_local_APIC - clear and disable the local APIC
691 */
692void disable_local_APIC(void)
693{
694 unsigned int value;
695
696 clear_local_APIC();
697
698 /*
699 * Disable APIC (implies clearing of registers
700 * for 82489DX!).
701 */
702 value = apic_read(APIC_SPIV);
703 value &= ~APIC_SPIV_APIC_ENABLED;
704 apic_write(APIC_SPIV, value);
705
706#ifdef CONFIG_X86_32
707 /*
708 * When LAPIC was disabled by the BIOS and enabled by the kernel,
709 * restore the disabled state.
710 */
711 if (enabled_via_apicbase) {
712 unsigned int l, h;
713
714 rdmsr(MSR_IA32_APICBASE, l, h);
715 l &= ~MSR_IA32_APICBASE_ENABLE;
716 wrmsr(MSR_IA32_APICBASE, l, h);
717 }
718#endif
719}
720
721/*
722 * If Linux enabled the LAPIC against the BIOS default disable it down before
723 * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and
724 * not power-off. Additionally clear all LVT entries before disable_local_APIC
725 * for the case where Linux didn't enable the LAPIC.
726 */
727void lapic_shutdown(void)
728{
729 unsigned long flags;
730
731 if (!cpu_has_apic)
732 return;
733
734 local_irq_save(flags);
735
736#ifdef CONFIG_X86_32
737 if (!enabled_via_apicbase)
738 clear_local_APIC();
739 else
740#endif
741 disable_local_APIC();
742
743
744 local_irq_restore(flags);
745}
746
747/*
748 * This is to verify that we're looking at a real local APIC.
749 * Check these against your board if the CPUs aren't getting
750 * started for no apparent reason.
751 */
752int __init verify_local_APIC(void)
753{
754 unsigned int reg0, reg1;
755
756 /*
757 * The version register is read-only in a real APIC.
758 */
759 reg0 = apic_read(APIC_LVR);
760 apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
761 apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
762 reg1 = apic_read(APIC_LVR);
763 apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
764
765 /*
766 * The two version reads above should print the same
767 * numbers. If the second one is different, then we
768 * poke at a non-APIC.
769 */
770 if (reg1 != reg0)
771 return 0;
772
773 /*
774 * Check if the version looks reasonably.
775 */
776 reg1 = GET_APIC_VERSION(reg0);
777 if (reg1 == 0x00 || reg1 == 0xff)
778 return 0;
779 reg1 = lapic_get_maxlvt();
780 if (reg1 < 0x02 || reg1 == 0xff)
781 return 0;
782
783 /*
784 * The ID register is read/write in a real APIC.
785 */
786 reg0 = apic_read(APIC_ID);
787 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
788 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
789 reg1 = apic_read(APIC_ID);
790 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
791 apic_write(APIC_ID, reg0);
792 if (reg1 != (reg0 ^ APIC_ID_MASK))
793 return 0;
794
795 /*
796 * The next two are just to see if we have sane values.
797 * They're only really relevant if we're in Virtual Wire
798 * compatibility mode, but most boxes are anymore.
799 */
800 reg0 = apic_read(APIC_LVT0);
801 apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
802 reg1 = apic_read(APIC_LVT1);
803 apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
804
805 return 1;
806}
807
808/**
809 * sync_Arb_IDs - synchronize APIC bus arbitration IDs
810 */
811void __init sync_Arb_IDs(void)
812{
813 /*
814 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
815 * needed on AMD.
816 */
817 if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
818 return;
819
820 /*
821 * Wait for idle.
822 */
823 apic_wait_icr_idle();
824
825 apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
826 apic_write(APIC_ICR, APIC_DEST_ALLINC |
827 APIC_INT_LEVELTRIG | APIC_DM_INIT);
828}
829
830/*
831 * An initial setup of the virtual wire mode.
832 */
833void __init init_bsp_APIC(void)
834{
835 unsigned int value;
836
837 /*
838 * Don't do the setup now if we have a SMP BIOS as the
839 * through-I/O-APIC virtual wire mode might be active.
840 */
841 if (smp_found_config || !cpu_has_apic)
842 return;
843
844 /*
845 * Do not trust the local APIC being empty at bootup.
846 */
847 clear_local_APIC();
848
849 /*
850 * Enable APIC.
851 */
852 value = apic_read(APIC_SPIV);
853 value &= ~APIC_VECTOR_MASK;
854 value |= APIC_SPIV_APIC_ENABLED;
855
856#ifdef CONFIG_X86_32
857 /* This bit is reserved on P4/Xeon and should be cleared */
858 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
859 (boot_cpu_data.x86 == 15))
860 value &= ~APIC_SPIV_FOCUS_DISABLED;
861 else
862#endif
863 value |= APIC_SPIV_FOCUS_DISABLED;
864 value |= SPURIOUS_APIC_VECTOR;
865 apic_write(APIC_SPIV, value);
866
867 /*
868 * Set up the virtual wire mode.
869 */
870 apic_write(APIC_LVT0, APIC_DM_EXTINT);
871 value = APIC_DM_NMI;
872 if (!lapic_is_integrated()) /* 82489DX */
873 value |= APIC_LVT_LEVEL_TRIGGER;
874 apic_write(APIC_LVT1, value);
875}
876
877static void __cpuinit lapic_setup_esr(void)
878{
879 unsigned long oldvalue, value, maxlvt;
880 if (lapic_is_integrated() && !esr_disable) {
881 if (esr_disable) {
882 /*
883 * Something untraceable is creating bad interrupts on
884 * secondary quads ... for the moment, just leave the
885 * ESR disabled - we can't do anything useful with the
886 * errors anyway - mbligh
887 */
888 printk(KERN_INFO "Leaving ESR disabled.\n");
889 return;
890 }
891 /* !82489DX */
892 maxlvt = lapic_get_maxlvt();
893 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
894 apic_write(APIC_ESR, 0);
895 oldvalue = apic_read(APIC_ESR);
896
897 /* enables sending errors */
898 value = ERROR_APIC_VECTOR;
899 apic_write(APIC_LVTERR, value);
900 /*
901 * spec says clear errors after enabling vector.
902 */
903 if (maxlvt > 3)
904 apic_write(APIC_ESR, 0);
905 value = apic_read(APIC_ESR);
906 if (value != oldvalue)
907 apic_printk(APIC_VERBOSE, "ESR value before enabling "
908 "vector: 0x%08lx after: 0x%08lx\n",
909 oldvalue, value);
910 } else {
911 printk(KERN_INFO "No ESR for 82489DX.\n");
912 }
913}
914
915
916/**
917 * setup_local_APIC - setup the local APIC
918 */
919void __cpuinit setup_local_APIC(void)
920{
921 unsigned int value;
922 int i, j;
923
924 preempt_disable();
925 value = apic_read(APIC_LVR);
926
927 BUILD_BUG_ON((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f);
928
929 /*
930 * Double-check whether this APIC is really registered.
931 * This is meaningless in clustered apic mode, so we skip it.
932 */
933 if (!apic_id_registered())
934 BUG();
935
936 /*
937 * Intel recommends to set DFR, LDR and TPR before enabling
938 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
939 * document number 292116). So here it goes...
940 */
941 init_apic_ldr();
942
943 /*
944 * Set Task Priority to 'accept all'. We never change this
945 * later on.
946 */
947 value = apic_read(APIC_TASKPRI);
948 value &= ~APIC_TPRI_MASK;
949 apic_write(APIC_TASKPRI, value);
950
951 /*
952 * After a crash, we no longer service the interrupts and a pending
953 * interrupt from previous kernel might still have ISR bit set.
954 *
955 * Most probably by now CPU has serviced that pending interrupt and
956 * it might not have done the ack_APIC_irq() because it thought,
957 * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
958 * does not clear the ISR bit and cpu thinks it has already serivced
959 * the interrupt. Hence a vector might get locked. It was noticed
960 * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
961 */
962 for (i = APIC_ISR_NR - 1; i >= 0; i--) {
963 value = apic_read(APIC_ISR + i*0x10);
964 for (j = 31; j >= 0; j--) {
965 if (value & (1<<j))
966 ack_APIC_irq();
967 }
968 }
969
970 /*
971 * Now that we are all set up, enable the APIC
972 */
973 value = apic_read(APIC_SPIV);
974 value &= ~APIC_VECTOR_MASK;
975 /*
976 * Enable APIC
977 */
978 value |= APIC_SPIV_APIC_ENABLED;
979
980 /* We always use processor focus */
981
982 /*
983 * Set spurious IRQ vector
984 */
985 value |= SPURIOUS_APIC_VECTOR;
986 apic_write(APIC_SPIV, value);
987
988 /*
989 * Set up LVT0, LVT1:
990 *
991 * set up through-local-APIC on the BP's LINT0. This is not
992 * strictly necessary in pure symmetric-IO mode, but sometimes
993 * we delegate interrupts to the 8259A.
994 */
995 /*
996 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
997 */
998 value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
999 if (!smp_processor_id() && !value) {
1000 value = APIC_DM_EXTINT;
1001 apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n",
1002 smp_processor_id());
1003 } else {
1004 value = APIC_DM_EXTINT | APIC_LVT_MASKED;
1005 apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
1006 smp_processor_id());
1007 }
1008 apic_write(APIC_LVT0, value);
1009
1010 /*
1011 * only the BP should see the LINT1 NMI signal, obviously.
1012 */
1013 if (!smp_processor_id())
1014 value = APIC_DM_NMI;
1015 else
1016 value = APIC_DM_NMI | APIC_LVT_MASKED;
1017 apic_write(APIC_LVT1, value);
1018 preempt_enable();
1019}
1020
1021void __cpuinit end_local_APIC_setup(void)
1022{
1023 lapic_setup_esr();
1024
1025#ifdef CONFIG_X86_32
1026 {
1027 unsigned int value;
1028 /* Disable the local apic timer */
1029 value = apic_read(APIC_LVTT);
1030 value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
1031 apic_write(APIC_LVTT, value);
1032 }
1033#endif
1034
1035 setup_apic_nmi_watchdog(NULL);
1036 apic_pm_activate();
1037}
1038
1039void check_x2apic(void)
1040{
1041 int msr, msr2;
1042
1043 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1044
1045 if (msr & X2APIC_ENABLE) {
1046 printk("x2apic enabled by BIOS, switching to x2apic ops\n");
1047 x2apic_preenabled = x2apic = 1;
1048 apic_ops = &x2apic_ops;
1049 }
1050}
1051
1052void enable_x2apic(void)
1053{
1054 int msr, msr2;
1055
1056 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1057 if (!(msr & X2APIC_ENABLE)) {
1058 printk("Enabling x2apic\n");
1059 wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
1060 }
1061}
1062
1063void enable_IR_x2apic(void)
1064{
1065#ifdef CONFIG_INTR_REMAP
1066 int ret;
1067 unsigned long flags;
1068
1069 if (!cpu_has_x2apic)
1070 return;
1071
1072 if (!x2apic_preenabled && disable_x2apic) {
1073 printk(KERN_INFO
1074 "Skipped enabling x2apic and Interrupt-remapping "
1075 "because of nox2apic\n");
1076 return;
1077 }
1078
1079 if (x2apic_preenabled && disable_x2apic)
1080 panic("Bios already enabled x2apic, can't enforce nox2apic");
1081
1082 if (!x2apic_preenabled && skip_ioapic_setup) {
1083 printk(KERN_INFO
1084 "Skipped enabling x2apic and Interrupt-remapping "
1085 "because of skipping io-apic setup\n");
1086 return;
1087 }
1088
1089 ret = dmar_table_init();
1090 if (ret) {
1091 printk(KERN_INFO
1092 "dmar_table_init() failed with %d:\n", ret);
1093
1094 if (x2apic_preenabled)
1095 panic("x2apic enabled by bios. But IR enabling failed");
1096 else
1097 printk(KERN_INFO
1098 "Not enabling x2apic,Intr-remapping\n");
1099 return;
1100 }
1101
1102 local_irq_save(flags);
1103 mask_8259A();
1104 save_mask_IO_APIC_setup();
1105
1106 ret = enable_intr_remapping(1);
1107
1108 if (ret && x2apic_preenabled) {
1109 local_irq_restore(flags);
1110 panic("x2apic enabled by bios. But IR enabling failed");
1111 }
1112
1113 if (ret)
1114 goto end;
1115
1116 if (!x2apic) {
1117 x2apic = 1;
1118 apic_ops = &x2apic_ops;
1119 enable_x2apic();
1120 }
1121end:
1122 if (ret)
1123 /*
1124 * IR enabling failed
1125 */
1126 restore_IO_APIC_setup();
1127 else
1128 reinit_intr_remapped_IO_APIC(x2apic_preenabled);
1129
1130 unmask_8259A();
1131 local_irq_restore(flags);
1132
1133 if (!ret) {
1134 if (!x2apic_preenabled)
1135 printk(KERN_INFO
1136 "Enabled x2apic and interrupt-remapping\n");
1137 else
1138 printk(KERN_INFO
1139 "Enabled Interrupt-remapping\n");
1140 } else
1141 printk(KERN_ERR
1142 "Failed to enable Interrupt-remapping and x2apic\n");
1143#else
1144 if (!cpu_has_x2apic)
1145 return;
1146
1147 if (x2apic_preenabled)
1148 panic("x2apic enabled prior OS handover,"
1149 " enable CONFIG_INTR_REMAP");
1150
1151 printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
1152 " and x2apic\n");
1153#endif
1154
1155 return;
1156}
1157
1158/*
1159 * Detect and enable local APICs on non-SMP boards.
1160 * Original code written by Keir Fraser.
1161 * On AMD64 we trust the BIOS - if it says no APIC it is likely
1162 * not correctly set up (usually the APIC timer won't work etc.)
1163 */
1164static int __init detect_init_APIC(void)
1165{
1166 if (!cpu_has_apic) {
1167 printk(KERN_INFO "No local APIC present\n");
1168 return -1;
1169 }
1170
1171 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1172 boot_cpu_physical_apicid = 0;
1173 return 0;
1174}
1175
1176void __init early_init_lapic_mapping(void)
1177{
1178 unsigned long phys_addr;
1179
1180 /*
1181 * If no local APIC can be found then go out
1182 * : it means there is no mpatable and MADT
1183 */
1184 if (!smp_found_config)
1185 return;
1186
1187 phys_addr = mp_lapic_addr;
1188
1189 set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
1190 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
1191 APIC_BASE, phys_addr);
1192
1193 /*
1194 * Fetch the APIC ID of the BSP in case we have a
1195 * default configuration (or the MP table is broken).
1196 */
1197 boot_cpu_physical_apicid = read_apic_id();
1198}
1199
1200/**
1201 * init_apic_mappings - initialize APIC mappings
1202 */
1203void __init init_apic_mappings(void)
1204{
1205 if (x2apic) {
1206 boot_cpu_physical_apicid = read_apic_id();
1207 return;
1208 }
1209
1210 /*
1211 * If no local APIC can be found then set up a fake all
1212 * zeroes page to simulate the local APIC and another
1213 * one for the IO-APIC.
1214 */
1215 if (!smp_found_config && detect_init_APIC()) {
1216 apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
1217 apic_phys = __pa(apic_phys);
1218 } else
1219 apic_phys = mp_lapic_addr;
1220
1221 set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
1222 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
1223 APIC_BASE, apic_phys);
1224
1225 /*
1226 * Fetch the APIC ID of the BSP in case we have a
1227 * default configuration (or the MP table is broken).
1228 */
1229 boot_cpu_physical_apicid = read_apic_id();
1230}
1231
1232/*
1233 * This initializes the IO-APIC and APIC hardware if this is
1234 * a UP kernel.
1235 */
1236int apic_version[MAX_APICS];
1237
1238int __init APIC_init_uniprocessor(void)
1239{
1240 if (disable_apic) {
1241 printk(KERN_INFO "Apic disabled\n");
1242 return -1;
1243 }
1244 if (!cpu_has_apic) {
1245 disable_apic = 1;
1246 printk(KERN_INFO "Apic disabled by BIOS\n");
1247 return -1;
1248 }
1249
1250 enable_IR_x2apic();
1251 setup_apic_routing();
1252
1253 verify_local_APIC();
1254
1255 connect_bsp_APIC();
1256
1257 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
1258 apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
1259
1260 setup_local_APIC();
1261
1262 /*
1263 * Now enable IO-APICs, actually call clear_IO_APIC
1264 * We need clear_IO_APIC before enabling vector on BP
1265 */
1266 if (!skip_ioapic_setup && nr_ioapics)
1267 enable_IO_APIC();
1268
1269 if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
1270 localise_nmi_watchdog();
1271 end_local_APIC_setup();
1272
1273 if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
1274 setup_IO_APIC();
1275 else
1276 nr_ioapics = 0;
1277 setup_boot_APIC_clock();
1278 check_nmi_watchdog();
1279 return 0;
1280}
1281
1282/*
1283 * Local APIC interrupts
1284 */
1285
1286/*
1287 * This interrupt should _never_ happen with our APIC/SMP architecture
1288 */
1289asmlinkage void smp_spurious_interrupt(void)
1290{
1291 unsigned int v;
1292 exit_idle();
1293 irq_enter();
1294 /*
1295 * Check if this really is a spurious interrupt and ACK it
1296 * if it is a vectored one. Just in case...
1297 * Spurious interrupts should not be ACKed.
1298 */
1299 v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
1300 if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
1301 ack_APIC_irq();
1302
1303 add_pda(irq_spurious_count, 1);
1304 irq_exit();
1305}
1306
1307/*
1308 * This interrupt should never happen with our APIC/SMP architecture
1309 */
1310asmlinkage void smp_error_interrupt(void)
1311{
1312 unsigned int v, v1;
1313
1314 exit_idle();
1315 irq_enter();
1316 /* First tickle the hardware, only then report what went on. -- REW */
1317 v = apic_read(APIC_ESR);
1318 apic_write(APIC_ESR, 0);
1319 v1 = apic_read(APIC_ESR);
1320 ack_APIC_irq();
1321 atomic_inc(&irq_err_count);
1322
1323 /* Here is what the APIC error bits mean:
1324 0: Send CS error
1325 1: Receive CS error
1326 2: Send accept error
1327 3: Receive accept error
1328 4: Reserved
1329 5: Send illegal vector
1330 6: Received illegal vector
1331 7: Illegal register address
1332 */
1333 printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
1334 smp_processor_id(), v , v1);
1335 irq_exit();
1336}
1337
1338/**
1339 * connect_bsp_APIC - attach the APIC to the interrupt system
1340 */
1341void __init connect_bsp_APIC(void)
1342{
1343#ifdef CONFIG_X86_32
1344 if (pic_mode) {
1345 /*
1346 * Do not trust the local APIC being empty at bootup.
1347 */
1348 clear_local_APIC();
1349 /*
1350 * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's
1351 * local APIC to INT and NMI lines.
1352 */
1353 apic_printk(APIC_VERBOSE, "leaving PIC mode, "
1354 "enabling APIC mode.\n");
1355 outb(0x70, 0x22);
1356 outb(0x01, 0x23);
1357 }
1358#endif
1359 enable_apic_mode();
1360}
1361
1362/**
1363 * disconnect_bsp_APIC - detach the APIC from the interrupt system
1364 * @virt_wire_setup: indicates, whether virtual wire mode is selected
1365 *
1366 * Virtual wire mode is necessary to deliver legacy interrupts even when the
1367 * APIC is disabled.
1368 */
1369void disconnect_bsp_APIC(int virt_wire_setup)
1370{
1371 unsigned int value;
1372
1373#ifdef CONFIG_X86_32
1374 if (pic_mode) {
1375 /*
1376 * Put the board back into PIC mode (has an effect only on
1377 * certain older boards). Note that APIC interrupts, including
1378 * IPIs, won't work beyond this point! The only exception are
1379 * INIT IPIs.
1380 */
1381 apic_printk(APIC_VERBOSE, "disabling APIC mode, "
1382 "entering PIC mode.\n");
1383 outb(0x70, 0x22);
1384 outb(0x00, 0x23);
1385 return;
1386 }
1387#endif
1388
1389 /* Go back to Virtual Wire compatibility mode */
1390
1391 /* For the spurious interrupt use vector F, and enable it */
1392 value = apic_read(APIC_SPIV);
1393 value &= ~APIC_VECTOR_MASK;
1394 value |= APIC_SPIV_APIC_ENABLED;
1395 value |= 0xf;
1396 apic_write(APIC_SPIV, value);
1397
1398 if (!virt_wire_setup) {
1399 /*
1400 * For LVT0 make it edge triggered, active high,
1401 * external and enabled
1402 */
1403 value = apic_read(APIC_LVT0);
1404 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
1405 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
1406 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
1407 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
1408 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
1409 apic_write(APIC_LVT0, value);
1410 } else {
1411 /* Disable LVT0 */
1412 apic_write(APIC_LVT0, APIC_LVT_MASKED);
1413 }
1414
1415 /*
1416 * For LVT1 make it edge triggered, active high,
1417 * nmi and enabled
1418 */
1419 value = apic_read(APIC_LVT1);
1420 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
1421 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
1422 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
1423 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
1424 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
1425 apic_write(APIC_LVT1, value);
1426}
1427
1428void __cpuinit generic_processor_info(int apicid, int version)
1429{
1430 int cpu;
1431 cpumask_t tmp_map;
1432
1433 /*
1434 * Validate version
1435 */
1436 if (version == 0x0) {
1437 printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
1438 "fixing up to 0x10. (tell your hw vendor)\n",
1439 version);
1440 version = 0x10;
1441 }
1442 apic_version[apicid] = version;
1443
1444 if (num_processors >= NR_CPUS) {
1445 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
1446 " Processor ignored.\n", NR_CPUS);
1447 return;
1448 }
1449
1450 num_processors++;
1451 cpus_complement(tmp_map, cpu_present_map);
1452 cpu = first_cpu(tmp_map);
1453
1454 physid_set(apicid, phys_cpu_present_map);
1455 if (apicid == boot_cpu_physical_apicid) {
1456 /*
1457 * x86_bios_cpu_apicid is required to have processors listed
1458 * in same order as logical cpu numbers. Hence the first
1459 * entry is BSP, and so on.
1460 */
1461 cpu = 0;
1462 }
1463 if (apicid > max_physical_apicid)
1464 max_physical_apicid = apicid;
1465
1466#ifdef CONFIG_X86_32
1467 /*
1468 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
1469 * but we need to work other dependencies like SMP_SUSPEND etc
1470 * before this can be done without some confusion.
1471 * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
1472 * - Ashok Raj <ashok.raj@intel.com>
1473 */
1474 if (max_physical_apicid >= 8) {
1475 switch (boot_cpu_data.x86_vendor) {
1476 case X86_VENDOR_INTEL:
1477 if (!APIC_XAPIC(version)) {
1478 def_to_bigsmp = 0;
1479 break;
1480 }
1481 /* If P4 and above fall through */
1482 case X86_VENDOR_AMD:
1483 def_to_bigsmp = 1;
1484 }
1485 }
1486#endif
1487
1488#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
1489 /* are we being called early in kernel startup? */
1490 if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
1491 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
1492 u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
1493
1494 cpu_to_apicid[cpu] = apicid;
1495 bios_cpu_apicid[cpu] = apicid;
1496 } else {
1497 per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1498 per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1499 }
1500#endif
1501
1502 cpu_set(cpu, cpu_possible_map);
1503 cpu_set(cpu, cpu_present_map);
1504}
1505
1506int hard_smp_processor_id(void)
1507{
1508 return read_apic_id();
1509}
1510
1511/*
1512 * Power management
1513 */
1514#ifdef CONFIG_PM
1515
1516static struct {
1517 /*
1518 * 'active' is true if the local APIC was enabled by us and
1519 * not the BIOS; this signifies that we are also responsible
1520 * for disabling it before entering apm/acpi suspend
1521 */
1522 int active;
1523 /* r/w apic fields */
1524 unsigned int apic_id;
1525 unsigned int apic_taskpri;
1526 unsigned int apic_ldr;
1527 unsigned int apic_dfr;
1528 unsigned int apic_spiv;
1529 unsigned int apic_lvtt;
1530 unsigned int apic_lvtpc;
1531 unsigned int apic_lvt0;
1532 unsigned int apic_lvt1;
1533 unsigned int apic_lvterr;
1534 unsigned int apic_tmict;
1535 unsigned int apic_tdcr;
1536 unsigned int apic_thmr;
1537} apic_pm_state;
1538
1539static int lapic_suspend(struct sys_device *dev, pm_message_t state)
1540{
1541 unsigned long flags;
1542 int maxlvt;
1543
1544 if (!apic_pm_state.active)
1545 return 0;
1546
1547 maxlvt = lapic_get_maxlvt();
1548
1549 apic_pm_state.apic_id = apic_read(APIC_ID);
1550 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
1551 apic_pm_state.apic_ldr = apic_read(APIC_LDR);
1552 apic_pm_state.apic_dfr = apic_read(APIC_DFR);
1553 apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
1554 apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
1555 if (maxlvt >= 4)
1556 apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
1557 apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
1558 apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
1559 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
1560 apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
1561 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
1562#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
1563 if (maxlvt >= 5)
1564 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
1565#endif
1566
1567 local_irq_save(flags);
1568 disable_local_APIC();
1569 local_irq_restore(flags);
1570 return 0;
1571}
1572
1573static int lapic_resume(struct sys_device *dev)
1574{
1575 unsigned int l, h;
1576 unsigned long flags;
1577 int maxlvt;
1578
1579 if (!apic_pm_state.active)
1580 return 0;
1581
1582 maxlvt = lapic_get_maxlvt();
1583
1584 local_irq_save(flags);
1585
1586#ifdef CONFIG_X86_64
1587 if (x2apic)
1588 enable_x2apic();
1589 else
1590#endif
1591 {
1592 /*
1593 * Make sure the APICBASE points to the right address
1594 *
1595 * FIXME! This will be wrong if we ever support suspend on
1596 * SMP! We'll need to do this as part of the CPU restore!
1597 */
1598 rdmsr(MSR_IA32_APICBASE, l, h);
1599 l &= ~MSR_IA32_APICBASE_BASE;
1600 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
1601 wrmsr(MSR_IA32_APICBASE, l, h);
1602 }
1603
1604 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
1605 apic_write(APIC_ID, apic_pm_state.apic_id);
1606 apic_write(APIC_DFR, apic_pm_state.apic_dfr);
1607 apic_write(APIC_LDR, apic_pm_state.apic_ldr);
1608 apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
1609 apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
1610 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
1611 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
1612#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
1613 if (maxlvt >= 5)
1614 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
1615#endif
1616 if (maxlvt >= 4)
1617 apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
1618 apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
1619 apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
1620 apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
1621 apic_write(APIC_ESR, 0);
1622 apic_read(APIC_ESR);
1623 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
1624 apic_write(APIC_ESR, 0);
1625 apic_read(APIC_ESR);
1626
1627 local_irq_restore(flags);
1628
1629 return 0;
1630}
1631
1632/*
1633 * This device has no shutdown method - fully functioning local APICs
1634 * are needed on every CPU up until machine_halt/restart/poweroff.
1635 */
1636
1637static struct sysdev_class lapic_sysclass = {
1638 .name = "lapic",
1639 .resume = lapic_resume,
1640 .suspend = lapic_suspend,
1641};
1642
1643static struct sys_device device_lapic = {
1644 .id = 0,
1645 .cls = &lapic_sysclass,
1646};
1647
1648static void __cpuinit apic_pm_activate(void)
1649{
1650 apic_pm_state.active = 1;
1651}
1652
1653static int __init init_lapic_sysfs(void)
1654{
1655 int error;
1656
1657 if (!cpu_has_apic)
1658 return 0;
1659 /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
1660
1661 error = sysdev_class_register(&lapic_sysclass);
1662 if (!error)
1663 error = sysdev_register(&device_lapic);
1664 return error;
1665}
1666device_initcall(init_lapic_sysfs);
1667
1668#else /* CONFIG_PM */
1669
1670static void apic_pm_activate(void) { }
1671
1672#endif /* CONFIG_PM */
1673
1674/*
1675 * apic_is_clustered_box() -- Check if we can expect good TSC
1676 *
1677 * Thus far, the major user of this is IBM's Summit2 series:
1678 *
1679 * Clustered boxes may have unsynced TSC problems if they are
1680 * multi-chassis. Use available data to take a good guess.
1681 * If in doubt, go HPET.
1682 */
1683__cpuinit int apic_is_clustered_box(void)
1684{
1685 int i, clusters, zeros;
1686 unsigned id;
1687 u16 *bios_cpu_apicid;
1688 DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
1689
1690 /*
1691 * there is not this kind of box with AMD CPU yet.
1692 * Some AMD box with quadcore cpu and 8 sockets apicid
1693 * will be [4, 0x23] or [8, 0x27] could be thought to
1694 * vsmp box still need checking...
1695 */
1696 if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
1697 return 0;
1698
1699 bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
1700 bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
1701
1702 for (i = 0; i < NR_CPUS; i++) {
1703 /* are we being called early in kernel startup? */
1704 if (bios_cpu_apicid) {
1705 id = bios_cpu_apicid[i];
1706 }
1707 else if (i < nr_cpu_ids) {
1708 if (cpu_present(i))
1709 id = per_cpu(x86_bios_cpu_apicid, i);
1710 else
1711 continue;
1712 }
1713 else
1714 break;
1715
1716 if (id != BAD_APICID)
1717 __set_bit(APIC_CLUSTERID(id), clustermap);
1718 }
1719
1720 /* Problem: Partially populated chassis may not have CPUs in some of
1721 * the APIC clusters they have been allocated. Only present CPUs have
1722 * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
1723 * Since clusters are allocated sequentially, count zeros only if
1724 * they are bounded by ones.
1725 */
1726 clusters = 0;
1727 zeros = 0;
1728 for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
1729 if (test_bit(i, clustermap)) {
1730 clusters += 1 + zeros;
1731 zeros = 0;
1732 } else
1733 ++zeros;
1734 }
1735
1736 /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
1737 * not guaranteed to be synced between boards
1738 */
1739 if (is_vsmp_box() && clusters > 1)
1740 return 1;
1741
1742 /*
1743 * If clusters > 2, then should be multi-chassis.
1744 * May have to revisit this when multi-core + hyperthreaded CPUs come
1745 * out, but AFAIK this will work even for them.
1746 */
1747 return (clusters > 2);
1748}
1749
1750static __init int setup_nox2apic(char *str)
1751{
1752 disable_x2apic = 1;
1753 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
1754 return 0;
1755}
1756early_param("nox2apic", setup_nox2apic);
1757
1758
1759/*
1760 * APIC command line parameters
1761 */
1762static int __init setup_disableapic(char *arg)
1763{
1764 disable_apic = 1;
1765 setup_clear_cpu_cap(X86_FEATURE_APIC);
1766 return 0;
1767}
1768early_param("disableapic", setup_disableapic);
1769
1770/* same as disableapic, for compatibility */
1771static int __init setup_nolapic(char *arg)
1772{
1773 return setup_disableapic(arg);
1774}
1775early_param("nolapic", setup_nolapic);
1776
1777static int __init parse_lapic_timer_c2_ok(char *arg)
1778{
1779 local_apic_timer_c2_ok = 1;
1780 return 0;
1781}
1782early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
1783
1784static int __init parse_disable_apic_timer(char *arg)
1785{
1786 disable_apic_timer = 1;
1787 return 0;
1788}
1789early_param("noapictimer", parse_disable_apic_timer);
1790
1791static int __init parse_nolapic_timer(char *arg)
1792{
1793 disable_apic_timer = 1;
1794 return 0;
1795}
1796early_param("nolapic_timer", parse_nolapic_timer);
1797
1798static __init int setup_apicpmtimer(char *s)
1799{
1800 apic_calibrate_pmtmr = 1;
1801 notsc_setup(NULL);
1802 return 0;
1803}
1804__setup("apicpmtimer", setup_apicpmtimer);
1805
1806static int __init apic_set_verbosity(char *arg)
1807{
1808 if (!arg) {
1809#ifdef CONFIG_X86_64
1810 skip_ioapic_setup = 0;
1811 ioapic_force = 1;
1812 return 0;
1813#endif
1814 return -EINVAL;
1815 }
1816
1817 if (strcmp("debug", arg) == 0)
1818 apic_verbosity = APIC_DEBUG;
1819 else if (strcmp("verbose", arg) == 0)
1820 apic_verbosity = APIC_VERBOSE;
1821 else {
1822 printk(KERN_WARNING "APIC Verbosity level %s not recognised"
1823 " use apic=verbose or apic=debug\n", arg);
1824 return -EINVAL;
1825 }
1826
1827 return 0;
1828}
1829early_param("apic", apic_set_verbosity);
1830
1831static int __init lapic_insert_resource(void)
1832{
1833 if (!apic_phys)
1834 return -1;
1835
1836 /* Put local APIC into the resource map. */
1837 lapic_resource.start = apic_phys;
1838 lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
1839 insert_resource(&iomem_resource, &lapic_resource);
1840
1841 return 0;
1842}
1843
1844/*
1845 * need call insert after e820_reserve_resources()
1846 * that is using request_resource
1847 */
1848late_initcall(lapic_insert_resource);
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index fdd585f9c53d..f0dfe6f17e7e 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * BIOS run time interface routines. 2 * BIOS run time interface routines.
3 * 3 *
4 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
@@ -16,33 +14,128 @@
16 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
19 * Copyright (c) Russ Anderson
19 */ 20 */
20 21
22#include <linux/efi.h>
23#include <asm/efi.h>
24#include <linux/io.h>
21#include <asm/uv/bios.h> 25#include <asm/uv/bios.h>
26#include <asm/uv/uv_hub.h>
27
28struct uv_systab uv_systab;
22 29
23const char * 30s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
24x86_bios_strerror(long status)
25{ 31{
26 const char *str; 32 struct uv_systab *tab = &uv_systab;
27 switch (status) { 33
28 case 0: str = "Call completed without error"; break; 34 if (!tab->function)
29 case -1: str = "Not implemented"; break; 35 /*
30 case -2: str = "Invalid argument"; break; 36 * BIOS does not support UV systab
31 case -3: str = "Call completed with error"; break; 37 */
32 default: str = "Unknown BIOS status code"; break; 38 return BIOS_STATUS_UNIMPLEMENTED;
33 } 39
34 return str; 40 return efi_call6((void *)__va(tab->function),
41 (u64)which, a1, a2, a3, a4, a5);
35} 42}
36 43
37long 44s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
38x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second, 45 u64 a4, u64 a5)
39 unsigned long *drift_info)
40{ 46{
41 struct uv_bios_retval isrv; 47 unsigned long bios_flags;
48 s64 ret;
42 49
43 BIOS_CALL(isrv, BIOS_FREQ_BASE, which, 0, 0, 0, 0, 0, 0); 50 local_irq_save(bios_flags);
44 *ticks_per_second = isrv.v0; 51 ret = uv_bios_call(which, a1, a2, a3, a4, a5);
45 *drift_info = isrv.v1; 52 local_irq_restore(bios_flags);
46 return isrv.status; 53
54 return ret;
47} 55}
48EXPORT_SYMBOL_GPL(x86_bios_freq_base); 56
57s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
58 u64 a4, u64 a5)
59{
60 s64 ret;
61
62 preempt_disable();
63 ret = uv_bios_call(which, a1, a2, a3, a4, a5);
64 preempt_enable();
65
66 return ret;
67}
68
69
70long sn_partition_id;
71EXPORT_SYMBOL_GPL(sn_partition_id);
72long uv_coherency_id;
73EXPORT_SYMBOL_GPL(uv_coherency_id);
74long uv_region_size;
75EXPORT_SYMBOL_GPL(uv_region_size);
76int uv_type;
77
78
79s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
80 long *region)
81{
82 s64 ret;
83 u64 v0, v1;
84 union partition_info_u part;
85
86 ret = uv_bios_call_irqsave(UV_BIOS_GET_SN_INFO, fc,
87 (u64)(&v0), (u64)(&v1), 0, 0);
88 if (ret != BIOS_STATUS_SUCCESS)
89 return ret;
90
91 part.val = v0;
92 if (uvtype)
93 *uvtype = part.hub_version;
94 if (partid)
95 *partid = part.partition_id;
96 if (coher)
97 *coher = part.coherence_id;
98 if (region)
99 *region = part.region_size;
100 return ret;
101}
102
103
104s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second)
105{
106 return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type,
107 (u64)ticks_per_second, 0, 0, 0);
108}
109EXPORT_SYMBOL_GPL(uv_bios_freq_base);
110
111
112#ifdef CONFIG_EFI
113void uv_bios_init(void)
114{
115 struct uv_systab *tab;
116
117 if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
118 (efi.uv_systab == (unsigned long)NULL)) {
119 printk(KERN_CRIT "No EFI UV System Table.\n");
120 uv_systab.function = (unsigned long)NULL;
121 return;
122 }
123
124 tab = (struct uv_systab *)ioremap(efi.uv_systab,
125 sizeof(struct uv_systab));
126 if (strncmp(tab->signature, "UVST", 4) != 0)
127 printk(KERN_ERR "bad signature in UV system table!");
128
129 /*
130 * Copy table to permanent spot for later use.
131 */
132 memcpy(&uv_systab, tab, sizeof(struct uv_systab));
133 iounmap(tab);
134
135 printk(KERN_INFO "EFI UV System Table Revision %d\n", tab->revision);
136}
137#else /* !CONFIG_EFI */
138
139void uv_bios_init(void) { }
140#endif
141
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 32e73520adf7..8f1e31db2ad5 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -249,7 +249,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
249 } 249 }
250 numa_set_node(cpu, node); 250 numa_set_node(cpu, node);
251 251
252 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); 252 printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
253#endif 253#endif
254} 254}
255 255
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index 06fcce516d51..b0461856acfb 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * (C) 2001-2004 Dave Jones. <davej@codemonkey.org.uk> 2 * (C) 2001-2004 Dave Jones. <davej@redhat.com>
3 * (C) 2002 Padraig Brady. <padraig@antefacto.com> 3 * (C) 2002 Padraig Brady. <padraig@antefacto.com>
4 * 4 *
5 * Licensed under the terms of the GNU GPL License version 2. 5 * Licensed under the terms of the GNU GPL License version 2.
@@ -1019,7 +1019,7 @@ MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
1019module_param(revid_errata, int, 0644); 1019module_param(revid_errata, int, 0644);
1020MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID"); 1020MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID");
1021 1021
1022MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>"); 1022MODULE_AUTHOR ("Dave Jones <davej@redhat.com>");
1023MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors."); 1023MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors.");
1024MODULE_LICENSE ("GPL"); 1024MODULE_LICENSE ("GPL");
1025 1025
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
index b5ced806a316..c1ac5790c63e 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
@@ -246,7 +246,7 @@ static void __exit powernow_k6_exit(void)
246} 246}
247 247
248 248
249MODULE_AUTHOR("Arjan van de Ven <arjanv@redhat.com>, Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>"); 249MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>");
250MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); 250MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
251MODULE_LICENSE("GPL"); 251MODULE_LICENSE("GPL");
252 252
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index 0a61159d7b71..7c7d56b43136 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * AMD K7 Powernow driver. 2 * AMD K7 Powernow driver.
3 * (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs. 3 * (C) 2003 Dave Jones on behalf of SuSE Labs.
4 * (C) 2003-2004 Dave Jones <davej@redhat.com> 4 * (C) 2003-2004 Dave Jones <davej@redhat.com>
5 * 5 *
6 * Licensed under the terms of the GNU GPL License version 2. 6 * Licensed under the terms of the GNU GPL License version 2.
@@ -692,7 +692,7 @@ static void __exit powernow_exit (void)
692module_param(acpi_force, int, 0444); 692module_param(acpi_force, int, 0444);
693MODULE_PARM_DESC(acpi_force, "Force ACPI to be used."); 693MODULE_PARM_DESC(acpi_force, "Force ACPI to be used.");
694 694
695MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>"); 695MODULE_AUTHOR ("Dave Jones <davej@redhat.com>");
696MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors."); 696MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors.");
697MODULE_LICENSE ("GPL"); 697MODULE_LICENSE ("GPL");
698 698
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 84bb395038d8..008d23ba491b 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -7,7 +7,7 @@
7 * Support : mark.langsdorf@amd.com 7 * Support : mark.langsdorf@amd.com
8 * 8 *
9 * Based on the powernow-k7.c module written by Dave Jones. 9 * Based on the powernow-k7.c module written by Dave Jones.
10 * (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs 10 * (C) 2003 Dave Jones on behalf of SuSE Labs
11 * (C) 2004 Dominik Brodowski <linux@brodo.de> 11 * (C) 2004 Dominik Brodowski <linux@brodo.de>
12 * (C) 2004 Pavel Machek <pavel@suse.cz> 12 * (C) 2004 Pavel Machek <pavel@suse.cz>
13 * Licensed under the terms of the GNU GPL License version 2. 13 * Licensed under the terms of the GNU GPL License version 2.
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 191f7263c61d..04d0376b64b0 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -431,7 +431,7 @@ static void __exit speedstep_exit(void)
431} 431}
432 432
433 433
434MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>"); 434MODULE_AUTHOR ("Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>");
435MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges."); 435MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges.");
436MODULE_LICENSE ("GPL"); 436MODULE_LICENSE ("GPL");
437 437
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 99468dbd08da..cce0b6118d55 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -174,7 +174,7 @@ static void __cpuinit srat_detect_node(void)
174 node = first_node(node_online_map); 174 node = first_node(node_online_map);
175 numa_set_node(cpu, node); 175 numa_set_node(cpu, node);
176 176
177 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); 177 printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
178#endif 178#endif
179} 179}
180 180
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
index f390c9f66351..dd3af6e7b39a 100644
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Athlon/Hammer specific Machine Check Exception Reporting 2 * Athlon specific Machine Check Exception Reporting
3 * (C) Copyright 2002 Dave Jones <davej@codemonkey.org.uk> 3 * (C) Copyright 2002 Dave Jones <davej@redhat.com>
4 */ 4 */
5 5
6#include <linux/init.h> 6#include <linux/init.h>
diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c
index 774d87cfd8cd..0ebf3fc6a610 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_32.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_32.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * mce.c - x86 Machine Check Exception Reporting 2 * mce.c - x86 Machine Check Exception Reporting
3 * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@codemonkey.org.uk> 3 * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@redhat.com>
4 */ 4 */
5 5
6#include <linux/init.h> 6#include <linux/init.h>
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
index cc1fccdd31e0..a74af128efc9 100644
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Non Fatal Machine Check Exception Reporting 2 * Non Fatal Machine Check Exception Reporting
3 * 3 *
4 * (C) Copyright 2002 Dave Jones. <davej@codemonkey.org.uk> 4 * (C) Copyright 2002 Dave Jones. <davej@redhat.com>
5 * 5 *
6 * This file contains routines to check for non-fatal MCEs every 15s 6 * This file contains routines to check for non-fatal MCEs every 15s
7 * 7 *
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 6bff382094f5..9abd48b22674 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -17,6 +17,8 @@
17#include <linux/bitops.h> 17#include <linux/bitops.h>
18#include <linux/smp.h> 18#include <linux/smp.h>
19#include <linux/nmi.h> 19#include <linux/nmi.h>
20#include <linux/kprobes.h>
21
20#include <asm/apic.h> 22#include <asm/apic.h>
21#include <asm/intel_arch_perfmon.h> 23#include <asm/intel_arch_perfmon.h>
22 24
@@ -336,7 +338,8 @@ static void single_msr_unreserve(void)
336 release_perfctr_nmi(wd_ops->perfctr); 338 release_perfctr_nmi(wd_ops->perfctr);
337} 339}
338 340
339static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) 341static void __kprobes
342single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
340{ 343{
341 /* start the cycle over again */ 344 /* start the cycle over again */
342 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); 345 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
@@ -401,7 +404,7 @@ static int setup_p6_watchdog(unsigned nmi_hz)
401 return 1; 404 return 1;
402} 405}
403 406
404static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) 407static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
405{ 408{
406 /* 409 /*
407 * P6 based Pentium M need to re-unmask 410 * P6 based Pentium M need to re-unmask
@@ -605,7 +608,7 @@ static void p4_unreserve(void)
605 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); 608 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
606} 609}
607 610
608static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) 611static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
609{ 612{
610 unsigned dummy; 613 unsigned dummy;
611 /* 614 /*
@@ -784,7 +787,7 @@ unsigned lapic_adjust_nmi_hz(unsigned hz)
784 return hz; 787 return hz;
785} 788}
786 789
787int lapic_wd_event(unsigned nmi_hz) 790int __kprobes lapic_wd_event(unsigned nmi_hz)
788{ 791{
789 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 792 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
790 u64 ctr; 793 u64 ctr;
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 945a31cdd81f..1119d247fe11 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -367,6 +367,10 @@ void __init efi_init(void)
367 efi.smbios = config_tables[i].table; 367 efi.smbios = config_tables[i].table;
368 printk(" SMBIOS=0x%lx ", config_tables[i].table); 368 printk(" SMBIOS=0x%lx ", config_tables[i].table);
369 } else if (!efi_guidcmp(config_tables[i].guid, 369 } else if (!efi_guidcmp(config_tables[i].guid,
370 UV_SYSTEM_TABLE_GUID)) {
371 efi.uv_systab = config_tables[i].table;
372 printk(" UVsystab=0x%lx ", config_tables[i].table);
373 } else if (!efi_guidcmp(config_tables[i].guid,
370 HCDP_TABLE_GUID)) { 374 HCDP_TABLE_GUID)) {
371 efi.hcdp = config_tables[i].table; 375 efi.hcdp = config_tables[i].table;
372 printk(" HCDP=0x%lx ", config_tables[i].table); 376 printk(" HCDP=0x%lx ", config_tables[i].table);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index b21fbfaffe39..c356423a6026 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -629,7 +629,7 @@ ENTRY(interrupt)
629ENTRY(irq_entries_start) 629ENTRY(irq_entries_start)
630 RING0_INT_FRAME 630 RING0_INT_FRAME
631vector=0 631vector=0
632.rept NR_IRQS 632.rept NR_VECTORS
633 ALIGN 633 ALIGN
634 .if vector 634 .if vector
635 CFI_ADJUST_CFA_OFFSET -4 635 CFI_ADJUST_CFA_OFFSET -4
@@ -1153,20 +1153,6 @@ ENDPROC(xen_failsafe_callback)
1153#ifdef CONFIG_DYNAMIC_FTRACE 1153#ifdef CONFIG_DYNAMIC_FTRACE
1154 1154
1155ENTRY(mcount) 1155ENTRY(mcount)
1156 pushl %eax
1157 pushl %ecx
1158 pushl %edx
1159 movl 0xc(%esp), %eax
1160 subl $MCOUNT_INSN_SIZE, %eax
1161
1162.globl mcount_call
1163mcount_call:
1164 call ftrace_stub
1165
1166 popl %edx
1167 popl %ecx
1168 popl %eax
1169
1170 ret 1156 ret
1171END(mcount) 1157END(mcount)
1172 1158
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1db6ce4314e1..09e7145484c5 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -64,32 +64,6 @@
64#ifdef CONFIG_FTRACE 64#ifdef CONFIG_FTRACE
65#ifdef CONFIG_DYNAMIC_FTRACE 65#ifdef CONFIG_DYNAMIC_FTRACE
66ENTRY(mcount) 66ENTRY(mcount)
67
68 subq $0x38, %rsp
69 movq %rax, (%rsp)
70 movq %rcx, 8(%rsp)
71 movq %rdx, 16(%rsp)
72 movq %rsi, 24(%rsp)
73 movq %rdi, 32(%rsp)
74 movq %r8, 40(%rsp)
75 movq %r9, 48(%rsp)
76
77 movq 0x38(%rsp), %rdi
78 subq $MCOUNT_INSN_SIZE, %rdi
79
80.globl mcount_call
81mcount_call:
82 call ftrace_stub
83
84 movq 48(%rsp), %r9
85 movq 40(%rsp), %r8
86 movq 32(%rsp), %rdi
87 movq 24(%rsp), %rsi
88 movq 16(%rsp), %rdx
89 movq 8(%rsp), %rcx
90 movq (%rsp), %rax
91 addq $0x38, %rsp
92
93 retq 67 retq
94END(mcount) 68END(mcount)
95 69
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index ab115cd15fdf..d073d981a730 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -11,17 +11,18 @@
11 11
12#include <linux/spinlock.h> 12#include <linux/spinlock.h>
13#include <linux/hardirq.h> 13#include <linux/hardirq.h>
14#include <linux/uaccess.h>
14#include <linux/ftrace.h> 15#include <linux/ftrace.h>
15#include <linux/percpu.h> 16#include <linux/percpu.h>
16#include <linux/init.h> 17#include <linux/init.h>
17#include <linux/list.h> 18#include <linux/list.h>
18 19
19#include <asm/alternative.h>
20#include <asm/ftrace.h> 20#include <asm/ftrace.h>
21#include <asm/nops.h>
21 22
22 23
23/* Long is fine, even if it is only 4 bytes ;-) */ 24/* Long is fine, even if it is only 4 bytes ;-) */
24static long *ftrace_nop; 25static unsigned long *ftrace_nop;
25 26
26union ftrace_code_union { 27union ftrace_code_union {
27 char code[MCOUNT_INSN_SIZE]; 28 char code[MCOUNT_INSN_SIZE];
@@ -60,11 +61,7 @@ notrace int
60ftrace_modify_code(unsigned long ip, unsigned char *old_code, 61ftrace_modify_code(unsigned long ip, unsigned char *old_code,
61 unsigned char *new_code) 62 unsigned char *new_code)
62{ 63{
63 unsigned replaced; 64 unsigned char replaced[MCOUNT_INSN_SIZE];
64 unsigned old = *(unsigned *)old_code; /* 4 bytes */
65 unsigned new = *(unsigned *)new_code; /* 4 bytes */
66 unsigned char newch = new_code[4];
67 int faulted = 0;
68 65
69 /* 66 /*
70 * Note: Due to modules and __init, code can 67 * Note: Due to modules and __init, code can
@@ -72,29 +69,20 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
72 * as well as code changing. 69 * as well as code changing.
73 * 70 *
74 * No real locking needed, this code is run through 71 * No real locking needed, this code is run through
75 * kstop_machine. 72 * kstop_machine, or before SMP starts.
76 */ 73 */
77 asm volatile ( 74 if (__copy_from_user_inatomic(replaced, (char __user *)ip, MCOUNT_INSN_SIZE))
78 "1: lock\n" 75 return 1;
79 " cmpxchg %3, (%2)\n" 76
80 " jnz 2f\n" 77 if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
81 " movb %b4, 4(%2)\n" 78 return 2;
82 "2:\n"
83 ".section .fixup, \"ax\"\n"
84 "3: movl $1, %0\n"
85 " jmp 2b\n"
86 ".previous\n"
87 _ASM_EXTABLE(1b, 3b)
88 : "=r"(faulted), "=a"(replaced)
89 : "r"(ip), "r"(new), "c"(newch),
90 "0"(faulted), "a"(old)
91 : "memory");
92 sync_core();
93 79
94 if (replaced != old && replaced != new) 80 WARN_ON_ONCE(__copy_to_user_inatomic((char __user *)ip, new_code,
95 faulted = 2; 81 MCOUNT_INSN_SIZE));
96 82
97 return faulted; 83 sync_core();
84
85 return 0;
98} 86}
99 87
100notrace int ftrace_update_ftrace_func(ftrace_func_t func) 88notrace int ftrace_update_ftrace_func(ftrace_func_t func)
@@ -112,30 +100,76 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func)
112 100
113notrace int ftrace_mcount_set(unsigned long *data) 101notrace int ftrace_mcount_set(unsigned long *data)
114{ 102{
115 unsigned long ip = (long)(&mcount_call); 103 /* mcount is initialized as a nop */
116 unsigned long *addr = data; 104 *data = 0;
117 unsigned char old[MCOUNT_INSN_SIZE], *new;
118
119 /*
120 * Replace the mcount stub with a pointer to the
121 * ip recorder function.
122 */
123 memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
124 new = ftrace_call_replace(ip, *addr);
125 *addr = ftrace_modify_code(ip, old, new);
126
127 return 0; 105 return 0;
128} 106}
129 107
130int __init ftrace_dyn_arch_init(void *data) 108int __init ftrace_dyn_arch_init(void *data)
131{ 109{
132 const unsigned char *const *noptable = find_nop_table(); 110 extern const unsigned char ftrace_test_p6nop[];
133 111 extern const unsigned char ftrace_test_nop5[];
134 /* This is running in kstop_machine */ 112 extern const unsigned char ftrace_test_jmp[];
135 113 int faulted = 0;
136 ftrace_mcount_set(data);
137 114
138 ftrace_nop = (unsigned long *)noptable[MCOUNT_INSN_SIZE]; 115 /*
116 * There is no good nop for all x86 archs.
117 * We will default to using the P6_NOP5, but first we
118 * will test to make sure that the nop will actually
119 * work on this CPU. If it faults, we will then
120 * go to a lesser efficient 5 byte nop. If that fails
121 * we then just use a jmp as our nop. This isn't the most
122 * efficient nop, but we can not use a multi part nop
123 * since we would then risk being preempted in the middle
124 * of that nop, and if we enabled tracing then, it might
125 * cause a system crash.
126 *
127 * TODO: check the cpuid to determine the best nop.
128 */
129 asm volatile (
130 "jmp ftrace_test_jmp\n"
131 /* This code needs to stay around */
132 ".section .text, \"ax\"\n"
133 "ftrace_test_jmp:"
134 "jmp ftrace_test_p6nop\n"
135 "nop\n"
136 "nop\n"
137 "nop\n" /* 2 byte jmp + 3 bytes */
138 "ftrace_test_p6nop:"
139 P6_NOP5
140 "jmp 1f\n"
141 "ftrace_test_nop5:"
142 ".byte 0x66,0x66,0x66,0x66,0x90\n"
143 "jmp 1f\n"
144 ".previous\n"
145 "1:"
146 ".section .fixup, \"ax\"\n"
147 "2: movl $1, %0\n"
148 " jmp ftrace_test_nop5\n"
149 "3: movl $2, %0\n"
150 " jmp 1b\n"
151 ".previous\n"
152 _ASM_EXTABLE(ftrace_test_p6nop, 2b)
153 _ASM_EXTABLE(ftrace_test_nop5, 3b)
154 : "=r"(faulted) : "0" (faulted));
155
156 switch (faulted) {
157 case 0:
158 pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n");
159 ftrace_nop = (unsigned long *)ftrace_test_p6nop;
160 break;
161 case 1:
162 pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n");
163 ftrace_nop = (unsigned long *)ftrace_test_nop5;
164 break;
165 case 2:
166 pr_info("ftrace: converting mcount calls to jmp . + 5\n");
167 ftrace_nop = (unsigned long *)ftrace_test_jmp;
168 break;
169 }
170
171 /* The return code is retured via data */
172 *(unsigned long *)data = 0;
139 173
140 return 0; 174 return 0;
141} 175}
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 9eca5ba7a6b1..2ec2de8d8c46 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -179,8 +179,10 @@ static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
179 * is an example). 179 * is an example).
180 */ 180 */
181 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && 181 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
182 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) 182 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
183 printk(KERN_DEBUG "system APIC only can use physical flat");
183 return 1; 184 return 1;
185 }
184#endif 186#endif
185 187
186 return 0; 188 return 0;
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 33581d94a90e..bfd532843df6 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -341,12 +341,12 @@ static __init void map_mmioh_high(int max_pnode)
341 341
342static __init void uv_rtc_init(void) 342static __init void uv_rtc_init(void)
343{ 343{
344 long status, ticks_per_sec, drift; 344 long status;
345 u64 ticks_per_sec;
345 346
346 status = 347 status = uv_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK,
347 x86_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec, 348 &ticks_per_sec);
348 &drift); 349 if (status != BIOS_STATUS_SUCCESS || ticks_per_sec < 100000) {
349 if (status != 0 || ticks_per_sec < 100000) {
350 printk(KERN_WARNING 350 printk(KERN_WARNING
351 "unable to determine platform RTC clock frequency, " 351 "unable to determine platform RTC clock frequency, "
352 "guessing.\n"); 352 "guessing.\n");
@@ -356,7 +356,22 @@ static __init void uv_rtc_init(void)
356 sn_rtc_cycles_per_second = ticks_per_sec; 356 sn_rtc_cycles_per_second = ticks_per_sec;
357} 357}
358 358
359static bool uv_system_inited; 359/*
360 * Called on each cpu to initialize the per_cpu UV data area.
361 * ZZZ hotplug not supported yet
362 */
363void __cpuinit uv_cpu_init(void)
364{
365 /* CPU 0 initilization will be done via uv_system_init. */
366 if (!uv_blade_info)
367 return;
368
369 uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
370
371 if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
372 set_x2apic_extra_bits(uv_hub_info->pnode);
373}
374
360 375
361void __init uv_system_init(void) 376void __init uv_system_init(void)
362{ 377{
@@ -412,6 +427,9 @@ void __init uv_system_init(void)
412 gnode_upper = (((unsigned long)node_id.s.node_id) & 427 gnode_upper = (((unsigned long)node_id.s.node_id) &
413 ~((1 << n_val) - 1)) << m_val; 428 ~((1 << n_val) - 1)) << m_val;
414 429
430 uv_bios_init();
431 uv_bios_get_sn_info(0, &uv_type, &sn_partition_id,
432 &uv_coherency_id, &uv_region_size);
415 uv_rtc_init(); 433 uv_rtc_init();
416 434
417 for_each_present_cpu(cpu) { 435 for_each_present_cpu(cpu) {
@@ -433,7 +451,7 @@ void __init uv_system_init(void)
433 uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; 451 uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
434 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; 452 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
435 uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; 453 uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
436 uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */ 454 uv_cpu_hub_info(cpu)->coherency_domain_number = uv_coherency_id;
437 uv_node_to_blade[nid] = blade; 455 uv_node_to_blade[nid] = blade;
438 uv_cpu_to_blade[cpu] = blade; 456 uv_cpu_to_blade[cpu] = blade;
439 max_pnode = max(pnode, max_pnode); 457 max_pnode = max(pnode, max_pnode);
@@ -448,21 +466,6 @@ void __init uv_system_init(void)
448 map_mmr_high(max_pnode); 466 map_mmr_high(max_pnode);
449 map_config_high(max_pnode); 467 map_config_high(max_pnode);
450 map_mmioh_high(max_pnode); 468 map_mmioh_high(max_pnode);
451 uv_system_inited = true;
452}
453 469
454/* 470 uv_cpu_init();
455 * Called on each cpu to initialize the per_cpu UV data area.
456 * ZZZ hotplug not supported yet
457 */
458void __cpuinit uv_cpu_init(void)
459{
460 BUG_ON(!uv_system_inited);
461
462 uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
463
464 if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
465 set_x2apic_extra_bits(uv_hub_info->pnode);
466} 471}
467
468
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index acf62fc233da..77017e834cf7 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1,29 +1,49 @@
1#include <linux/clocksource.h> 1#include <linux/clocksource.h>
2#include <linux/clockchips.h> 2#include <linux/clockchips.h>
3#include <linux/interrupt.h>
4#include <linux/sysdev.h>
3#include <linux/delay.h> 5#include <linux/delay.h>
4#include <linux/errno.h> 6#include <linux/errno.h>
5#include <linux/hpet.h> 7#include <linux/hpet.h>
6#include <linux/init.h> 8#include <linux/init.h>
7#include <linux/sysdev.h> 9#include <linux/cpu.h>
8#include <linux/pm.h> 10#include <linux/pm.h>
11#include <linux/io.h>
9 12
10#include <asm/fixmap.h> 13#include <asm/fixmap.h>
11#include <asm/hpet.h>
12#include <asm/i8253.h> 14#include <asm/i8253.h>
13#include <asm/io.h> 15#include <asm/hpet.h>
14 16
15#define HPET_MASK CLOCKSOURCE_MASK(32) 17#define HPET_MASK CLOCKSOURCE_MASK(32)
16#define HPET_SHIFT 22 18#define HPET_SHIFT 22
17 19
18/* FSEC = 10^-15 20/* FSEC = 10^-15
19 NSEC = 10^-9 */ 21 NSEC = 10^-9 */
20#define FSEC_PER_NSEC 1000000L 22#define FSEC_PER_NSEC 1000000L
23
24#define HPET_DEV_USED_BIT 2
25#define HPET_DEV_USED (1 << HPET_DEV_USED_BIT)
26#define HPET_DEV_VALID 0x8
27#define HPET_DEV_FSB_CAP 0x1000
28#define HPET_DEV_PERI_CAP 0x2000
29
30#define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt)
21 31
22/* 32/*
23 * HPET address is set in acpi/boot.c, when an ACPI entry exists 33 * HPET address is set in acpi/boot.c, when an ACPI entry exists
24 */ 34 */
25unsigned long hpet_address; 35unsigned long hpet_address;
26static void __iomem *hpet_virt_address; 36unsigned long hpet_num_timers;
37static void __iomem *hpet_virt_address;
38
39struct hpet_dev {
40 struct clock_event_device evt;
41 unsigned int num;
42 int cpu;
43 unsigned int irq;
44 unsigned int flags;
45 char name[10];
46};
27 47
28unsigned long hpet_readl(unsigned long a) 48unsigned long hpet_readl(unsigned long a)
29{ 49{
@@ -59,7 +79,7 @@ static inline void hpet_clear_mapping(void)
59static int boot_hpet_disable; 79static int boot_hpet_disable;
60int hpet_force_user; 80int hpet_force_user;
61 81
62static int __init hpet_setup(char* str) 82static int __init hpet_setup(char *str)
63{ 83{
64 if (str) { 84 if (str) {
65 if (!strncmp("disable", str, 7)) 85 if (!strncmp("disable", str, 7))
@@ -80,7 +100,7 @@ __setup("nohpet", disable_hpet);
80 100
81static inline int is_hpet_capable(void) 101static inline int is_hpet_capable(void)
82{ 102{
83 return (!boot_hpet_disable && hpet_address); 103 return !boot_hpet_disable && hpet_address;
84} 104}
85 105
86/* 106/*
@@ -102,6 +122,9 @@ EXPORT_SYMBOL_GPL(is_hpet_enabled);
102 * timer 0 and timer 1 in case of RTC emulation. 122 * timer 0 and timer 1 in case of RTC emulation.
103 */ 123 */
104#ifdef CONFIG_HPET 124#ifdef CONFIG_HPET
125
126static void hpet_reserve_msi_timers(struct hpet_data *hd);
127
105static void hpet_reserve_platform_timers(unsigned long id) 128static void hpet_reserve_platform_timers(unsigned long id)
106{ 129{
107 struct hpet __iomem *hpet = hpet_virt_address; 130 struct hpet __iomem *hpet = hpet_virt_address;
@@ -111,10 +134,10 @@ static void hpet_reserve_platform_timers(unsigned long id)
111 134
112 nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; 135 nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
113 136
114 memset(&hd, 0, sizeof (hd)); 137 memset(&hd, 0, sizeof(hd));
115 hd.hd_phys_address = hpet_address; 138 hd.hd_phys_address = hpet_address;
116 hd.hd_address = hpet; 139 hd.hd_address = hpet;
117 hd.hd_nirqs = nrtimers; 140 hd.hd_nirqs = nrtimers;
118 hpet_reserve_timer(&hd, 0); 141 hpet_reserve_timer(&hd, 0);
119 142
120#ifdef CONFIG_HPET_EMULATE_RTC 143#ifdef CONFIG_HPET_EMULATE_RTC
@@ -130,10 +153,12 @@ static void hpet_reserve_platform_timers(unsigned long id)
130 hd.hd_irq[1] = HPET_LEGACY_RTC; 153 hd.hd_irq[1] = HPET_LEGACY_RTC;
131 154
132 for (i = 2; i < nrtimers; timer++, i++) { 155 for (i = 2; i < nrtimers; timer++, i++) {
133 hd.hd_irq[i] = (readl(&timer->hpet_config) & Tn_INT_ROUTE_CNF_MASK) >> 156 hd.hd_irq[i] = (readl(&timer->hpet_config) &
134 Tn_INT_ROUTE_CNF_SHIFT; 157 Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT;
135 } 158 }
136 159
160 hpet_reserve_msi_timers(&hd);
161
137 hpet_alloc(&hd); 162 hpet_alloc(&hd);
138 163
139} 164}
@@ -227,60 +252,70 @@ static void hpet_legacy_clockevent_register(void)
227 printk(KERN_DEBUG "hpet clockevent registered\n"); 252 printk(KERN_DEBUG "hpet clockevent registered\n");
228} 253}
229 254
230static void hpet_legacy_set_mode(enum clock_event_mode mode, 255static int hpet_setup_msi_irq(unsigned int irq);
231 struct clock_event_device *evt) 256
257static void hpet_set_mode(enum clock_event_mode mode,
258 struct clock_event_device *evt, int timer)
232{ 259{
233 unsigned long cfg, cmp, now; 260 unsigned long cfg, cmp, now;
234 uint64_t delta; 261 uint64_t delta;
235 262
236 switch(mode) { 263 switch (mode) {
237 case CLOCK_EVT_MODE_PERIODIC: 264 case CLOCK_EVT_MODE_PERIODIC:
238 delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * hpet_clockevent.mult; 265 delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult;
239 delta >>= hpet_clockevent.shift; 266 delta >>= evt->shift;
240 now = hpet_readl(HPET_COUNTER); 267 now = hpet_readl(HPET_COUNTER);
241 cmp = now + (unsigned long) delta; 268 cmp = now + (unsigned long) delta;
242 cfg = hpet_readl(HPET_T0_CFG); 269 cfg = hpet_readl(HPET_Tn_CFG(timer));
243 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | 270 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
244 HPET_TN_SETVAL | HPET_TN_32BIT; 271 HPET_TN_SETVAL | HPET_TN_32BIT;
245 hpet_writel(cfg, HPET_T0_CFG); 272 hpet_writel(cfg, HPET_Tn_CFG(timer));
246 /* 273 /*
247 * The first write after writing TN_SETVAL to the 274 * The first write after writing TN_SETVAL to the
248 * config register sets the counter value, the second 275 * config register sets the counter value, the second
249 * write sets the period. 276 * write sets the period.
250 */ 277 */
251 hpet_writel(cmp, HPET_T0_CMP); 278 hpet_writel(cmp, HPET_Tn_CMP(timer));
252 udelay(1); 279 udelay(1);
253 hpet_writel((unsigned long) delta, HPET_T0_CMP); 280 hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer));
254 break; 281 break;
255 282
256 case CLOCK_EVT_MODE_ONESHOT: 283 case CLOCK_EVT_MODE_ONESHOT:
257 cfg = hpet_readl(HPET_T0_CFG); 284 cfg = hpet_readl(HPET_Tn_CFG(timer));
258 cfg &= ~HPET_TN_PERIODIC; 285 cfg &= ~HPET_TN_PERIODIC;
259 cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; 286 cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
260 hpet_writel(cfg, HPET_T0_CFG); 287 hpet_writel(cfg, HPET_Tn_CFG(timer));
261 break; 288 break;
262 289
263 case CLOCK_EVT_MODE_UNUSED: 290 case CLOCK_EVT_MODE_UNUSED:
264 case CLOCK_EVT_MODE_SHUTDOWN: 291 case CLOCK_EVT_MODE_SHUTDOWN:
265 cfg = hpet_readl(HPET_T0_CFG); 292 cfg = hpet_readl(HPET_Tn_CFG(timer));
266 cfg &= ~HPET_TN_ENABLE; 293 cfg &= ~HPET_TN_ENABLE;
267 hpet_writel(cfg, HPET_T0_CFG); 294 hpet_writel(cfg, HPET_Tn_CFG(timer));
268 break; 295 break;
269 296
270 case CLOCK_EVT_MODE_RESUME: 297 case CLOCK_EVT_MODE_RESUME:
271 hpet_enable_legacy_int(); 298 if (timer == 0) {
299 hpet_enable_legacy_int();
300 } else {
301 struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
302 hpet_setup_msi_irq(hdev->irq);
303 disable_irq(hdev->irq);
304 irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu));
305 enable_irq(hdev->irq);
306 }
272 break; 307 break;
273 } 308 }
274} 309}
275 310
276static int hpet_legacy_next_event(unsigned long delta, 311static int hpet_next_event(unsigned long delta,
277 struct clock_event_device *evt) 312 struct clock_event_device *evt, int timer)
278{ 313{
279 u32 cnt; 314 u32 cnt;
280 315
281 cnt = hpet_readl(HPET_COUNTER); 316 cnt = hpet_readl(HPET_COUNTER);
282 cnt += (u32) delta; 317 cnt += (u32) delta;
283 hpet_writel(cnt, HPET_T0_CMP); 318 hpet_writel(cnt, HPET_Tn_CMP(timer));
284 319
285 /* 320 /*
286 * We need to read back the CMP register to make sure that 321 * We need to read back the CMP register to make sure that
@@ -292,6 +327,347 @@ static int hpet_legacy_next_event(unsigned long delta,
292 return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; 327 return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
293} 328}
294 329
330static void hpet_legacy_set_mode(enum clock_event_mode mode,
331 struct clock_event_device *evt)
332{
333 hpet_set_mode(mode, evt, 0);
334}
335
336static int hpet_legacy_next_event(unsigned long delta,
337 struct clock_event_device *evt)
338{
339 return hpet_next_event(delta, evt, 0);
340}
341
342/*
343 * HPET MSI Support
344 */
345#ifdef CONFIG_PCI_MSI
346
347static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev);
348static struct hpet_dev *hpet_devs;
349
350void hpet_msi_unmask(unsigned int irq)
351{
352 struct hpet_dev *hdev = get_irq_data(irq);
353 unsigned long cfg;
354
355 /* unmask it */
356 cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
357 cfg |= HPET_TN_FSB;
358 hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
359}
360
361void hpet_msi_mask(unsigned int irq)
362{
363 unsigned long cfg;
364 struct hpet_dev *hdev = get_irq_data(irq);
365
366 /* mask it */
367 cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
368 cfg &= ~HPET_TN_FSB;
369 hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
370}
371
372void hpet_msi_write(unsigned int irq, struct msi_msg *msg)
373{
374 struct hpet_dev *hdev = get_irq_data(irq);
375
376 hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num));
377 hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4);
378}
379
380void hpet_msi_read(unsigned int irq, struct msi_msg *msg)
381{
382 struct hpet_dev *hdev = get_irq_data(irq);
383
384 msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num));
385 msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4);
386 msg->address_hi = 0;
387}
388
389static void hpet_msi_set_mode(enum clock_event_mode mode,
390 struct clock_event_device *evt)
391{
392 struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
393 hpet_set_mode(mode, evt, hdev->num);
394}
395
396static int hpet_msi_next_event(unsigned long delta,
397 struct clock_event_device *evt)
398{
399 struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
400 return hpet_next_event(delta, evt, hdev->num);
401}
402
403static int hpet_setup_msi_irq(unsigned int irq)
404{
405 if (arch_setup_hpet_msi(irq)) {
406 destroy_irq(irq);
407 return -EINVAL;
408 }
409 return 0;
410}
411
412static int hpet_assign_irq(struct hpet_dev *dev)
413{
414 unsigned int irq;
415
416 irq = create_irq();
417 if (!irq)
418 return -EINVAL;
419
420 set_irq_data(irq, dev);
421
422 if (hpet_setup_msi_irq(irq))
423 return -EINVAL;
424
425 dev->irq = irq;
426 return 0;
427}
428
429static irqreturn_t hpet_interrupt_handler(int irq, void *data)
430{
431 struct hpet_dev *dev = (struct hpet_dev *)data;
432 struct clock_event_device *hevt = &dev->evt;
433
434 if (!hevt->event_handler) {
435 printk(KERN_INFO "Spurious HPET timer interrupt on HPET timer %d\n",
436 dev->num);
437 return IRQ_HANDLED;
438 }
439
440 hevt->event_handler(hevt);
441 return IRQ_HANDLED;
442}
443
444static int hpet_setup_irq(struct hpet_dev *dev)
445{
446
447 if (request_irq(dev->irq, hpet_interrupt_handler,
448 IRQF_SHARED|IRQF_NOBALANCING, dev->name, dev))
449 return -1;
450
451 disable_irq(dev->irq);
452 irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu));
453 enable_irq(dev->irq);
454
455 printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
456 dev->name, dev->irq);
457
458 return 0;
459}
460
461/* This should be called in specific @cpu */
462static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
463{
464 struct clock_event_device *evt = &hdev->evt;
465 uint64_t hpet_freq;
466
467 WARN_ON(cpu != smp_processor_id());
468 if (!(hdev->flags & HPET_DEV_VALID))
469 return;
470
471 if (hpet_setup_msi_irq(hdev->irq))
472 return;
473
474 hdev->cpu = cpu;
475 per_cpu(cpu_hpet_dev, cpu) = hdev;
476 evt->name = hdev->name;
477 hpet_setup_irq(hdev);
478 evt->irq = hdev->irq;
479
480 evt->rating = 110;
481 evt->features = CLOCK_EVT_FEAT_ONESHOT;
482 if (hdev->flags & HPET_DEV_PERI_CAP)
483 evt->features |= CLOCK_EVT_FEAT_PERIODIC;
484
485 evt->set_mode = hpet_msi_set_mode;
486 evt->set_next_event = hpet_msi_next_event;
487 evt->shift = 32;
488
489 /*
490 * The period is a femto seconds value. We need to calculate the
491 * scaled math multiplication factor for nanosecond to hpet tick
492 * conversion.
493 */
494 hpet_freq = 1000000000000000ULL;
495 do_div(hpet_freq, hpet_period);
496 evt->mult = div_sc((unsigned long) hpet_freq,
497 NSEC_PER_SEC, evt->shift);
498 /* Calculate the max delta */
499 evt->max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, evt);
500 /* 5 usec minimum reprogramming delta. */
501 evt->min_delta_ns = 5000;
502
503 evt->cpumask = cpumask_of_cpu(hdev->cpu);
504 clockevents_register_device(evt);
505}
506
507#ifdef CONFIG_HPET
508/* Reserve at least one timer for userspace (/dev/hpet) */
509#define RESERVE_TIMERS 1
510#else
511#define RESERVE_TIMERS 0
512#endif
513
514static void hpet_msi_capability_lookup(unsigned int start_timer)
515{
516 unsigned int id;
517 unsigned int num_timers;
518 unsigned int num_timers_used = 0;
519 int i;
520
521 id = hpet_readl(HPET_ID);
522
523 num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT);
524 num_timers++; /* Value read out starts from 0 */
525
526 hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL);
527 if (!hpet_devs)
528 return;
529
530 hpet_num_timers = num_timers;
531
532 for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) {
533 struct hpet_dev *hdev = &hpet_devs[num_timers_used];
534 unsigned long cfg = hpet_readl(HPET_Tn_CFG(i));
535
536 /* Only consider HPET timer with MSI support */
537 if (!(cfg & HPET_TN_FSB_CAP))
538 continue;
539
540 hdev->flags = 0;
541 if (cfg & HPET_TN_PERIODIC_CAP)
542 hdev->flags |= HPET_DEV_PERI_CAP;
543 hdev->num = i;
544
545 sprintf(hdev->name, "hpet%d", i);
546 if (hpet_assign_irq(hdev))
547 continue;
548
549 hdev->flags |= HPET_DEV_FSB_CAP;
550 hdev->flags |= HPET_DEV_VALID;
551 num_timers_used++;
552 if (num_timers_used == num_possible_cpus())
553 break;
554 }
555
556 printk(KERN_INFO "HPET: %d timers in total, %d timers will be used for per-cpu timer\n",
557 num_timers, num_timers_used);
558}
559
560#ifdef CONFIG_HPET
561static void hpet_reserve_msi_timers(struct hpet_data *hd)
562{
563 int i;
564
565 if (!hpet_devs)
566 return;
567
568 for (i = 0; i < hpet_num_timers; i++) {
569 struct hpet_dev *hdev = &hpet_devs[i];
570
571 if (!(hdev->flags & HPET_DEV_VALID))
572 continue;
573
574 hd->hd_irq[hdev->num] = hdev->irq;
575 hpet_reserve_timer(hd, hdev->num);
576 }
577}
578#endif
579
580static struct hpet_dev *hpet_get_unused_timer(void)
581{
582 int i;
583
584 if (!hpet_devs)
585 return NULL;
586
587 for (i = 0; i < hpet_num_timers; i++) {
588 struct hpet_dev *hdev = &hpet_devs[i];
589
590 if (!(hdev->flags & HPET_DEV_VALID))
591 continue;
592 if (test_and_set_bit(HPET_DEV_USED_BIT,
593 (unsigned long *)&hdev->flags))
594 continue;
595 return hdev;
596 }
597 return NULL;
598}
599
600struct hpet_work_struct {
601 struct delayed_work work;
602 struct completion complete;
603};
604
605static void hpet_work(struct work_struct *w)
606{
607 struct hpet_dev *hdev;
608 int cpu = smp_processor_id();
609 struct hpet_work_struct *hpet_work;
610
611 hpet_work = container_of(w, struct hpet_work_struct, work.work);
612
613 hdev = hpet_get_unused_timer();
614 if (hdev)
615 init_one_hpet_msi_clockevent(hdev, cpu);
616
617 complete(&hpet_work->complete);
618}
619
620static int hpet_cpuhp_notify(struct notifier_block *n,
621 unsigned long action, void *hcpu)
622{
623 unsigned long cpu = (unsigned long)hcpu;
624 struct hpet_work_struct work;
625 struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu);
626
627 switch (action & 0xf) {
628 case CPU_ONLINE:
629 INIT_DELAYED_WORK(&work.work, hpet_work);
630 init_completion(&work.complete);
631 /* FIXME: add schedule_work_on() */
632 schedule_delayed_work_on(cpu, &work.work, 0);
633 wait_for_completion(&work.complete);
634 break;
635 case CPU_DEAD:
636 if (hdev) {
637 free_irq(hdev->irq, hdev);
638 hdev->flags &= ~HPET_DEV_USED;
639 per_cpu(cpu_hpet_dev, cpu) = NULL;
640 }
641 break;
642 }
643 return NOTIFY_OK;
644}
645#else
646
647static int hpet_setup_msi_irq(unsigned int irq)
648{
649 return 0;
650}
651static void hpet_msi_capability_lookup(unsigned int start_timer)
652{
653 return;
654}
655
656#ifdef CONFIG_HPET
657static void hpet_reserve_msi_timers(struct hpet_data *hd)
658{
659 return;
660}
661#endif
662
663static int hpet_cpuhp_notify(struct notifier_block *n,
664 unsigned long action, void *hcpu)
665{
666 return NOTIFY_OK;
667}
668
669#endif
670
295/* 671/*
296 * Clock source related code 672 * Clock source related code
297 */ 673 */
@@ -427,8 +803,10 @@ int __init hpet_enable(void)
427 803
428 if (id & HPET_ID_LEGSUP) { 804 if (id & HPET_ID_LEGSUP) {
429 hpet_legacy_clockevent_register(); 805 hpet_legacy_clockevent_register();
806 hpet_msi_capability_lookup(2);
430 return 1; 807 return 1;
431 } 808 }
809 hpet_msi_capability_lookup(0);
432 return 0; 810 return 0;
433 811
434out_nohpet: 812out_nohpet:
@@ -445,6 +823,8 @@ out_nohpet:
445 */ 823 */
446static __init int hpet_late_init(void) 824static __init int hpet_late_init(void)
447{ 825{
826 int cpu;
827
448 if (boot_hpet_disable) 828 if (boot_hpet_disable)
449 return -ENODEV; 829 return -ENODEV;
450 830
@@ -460,6 +840,13 @@ static __init int hpet_late_init(void)
460 840
461 hpet_reserve_platform_timers(hpet_readl(HPET_ID)); 841 hpet_reserve_platform_timers(hpet_readl(HPET_ID));
462 842
843 for_each_online_cpu(cpu) {
844 hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu);
845 }
846
847 /* This notifier should be called after workqueue is ready */
848 hotcpu_notifier(hpet_cpuhp_notify, -20);
849
463 return 0; 850 return 0;
464} 851}
465fs_initcall(hpet_late_init); 852fs_initcall(hpet_late_init);
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic.c
index 02063ae042f7..b764d7429c61 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic.c
@@ -27,17 +27,21 @@
27#include <linux/sched.h> 27#include <linux/sched.h>
28#include <linux/pci.h> 28#include <linux/pci.h>
29#include <linux/mc146818rtc.h> 29#include <linux/mc146818rtc.h>
30#include <linux/compiler.h>
30#include <linux/acpi.h> 31#include <linux/acpi.h>
32#include <linux/module.h>
31#include <linux/sysdev.h> 33#include <linux/sysdev.h>
32#include <linux/msi.h> 34#include <linux/msi.h>
33#include <linux/htirq.h> 35#include <linux/htirq.h>
34#include <linux/dmar.h> 36#include <linux/freezer.h>
35#include <linux/jiffies.h> 37#include <linux/kthread.h>
38#include <linux/jiffies.h> /* time_after() */
36#ifdef CONFIG_ACPI 39#ifdef CONFIG_ACPI
37#include <acpi/acpi_bus.h> 40#include <acpi/acpi_bus.h>
38#endif 41#endif
39#include <linux/bootmem.h> 42#include <linux/bootmem.h>
40#include <linux/dmar.h> 43#include <linux/dmar.h>
44#include <linux/hpet.h>
41 45
42#include <asm/idle.h> 46#include <asm/idle.h>
43#include <asm/io.h> 47#include <asm/io.h>
@@ -46,61 +50,28 @@
46#include <asm/proto.h> 50#include <asm/proto.h>
47#include <asm/acpi.h> 51#include <asm/acpi.h>
48#include <asm/dma.h> 52#include <asm/dma.h>
53#include <asm/timer.h>
49#include <asm/i8259.h> 54#include <asm/i8259.h>
50#include <asm/nmi.h> 55#include <asm/nmi.h>
51#include <asm/msidef.h> 56#include <asm/msidef.h>
52#include <asm/hypertransport.h> 57#include <asm/hypertransport.h>
58#include <asm/setup.h>
53#include <asm/irq_remapping.h> 59#include <asm/irq_remapping.h>
60#include <asm/hpet.h>
61#include <asm/uv/uv_hub.h>
62#include <asm/uv/uv_irq.h>
54 63
55#include <mach_ipi.h> 64#include <mach_ipi.h>
56#include <mach_apic.h> 65#include <mach_apic.h>
66#include <mach_apicdef.h>
57 67
58#define __apicdebuginit(type) static type __init 68#define __apicdebuginit(type) static type __init
59 69
60struct irq_cfg { 70/*
61 cpumask_t domain; 71 * Is the SiS APIC rmw bug present ?
62 cpumask_t old_domain; 72 * -1 = don't know, 0 = no, 1 = yes
63 unsigned move_cleanup_count; 73 */
64 u8 vector; 74int sis_apic_bug = -1;
65 u8 move_in_progress : 1;
66};
67
68/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
69static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
70 [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, },
71 [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, },
72 [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, },
73 [3] = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, },
74 [4] = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, },
75 [5] = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, },
76 [6] = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, },
77 [7] = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, },
78 [8] = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, },
79 [9] = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, },
80 [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
81 [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
82 [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
83 [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
84 [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
85 [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
86};
87
88static int assign_irq_vector(int irq, cpumask_t mask);
89
90int first_system_vector = 0xfe;
91
92char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
93
94int sis_apic_bug; /* not actually supported, dummy for compile */
95
96static int no_timer_check;
97
98static int disable_timer_pin_1 __initdata;
99
100int timer_through_8259 __initdata;
101
102/* Where if anywhere is the i8259 connect in external int mode */
103static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
104 75
105static DEFINE_SPINLOCK(ioapic_lock); 76static DEFINE_SPINLOCK(ioapic_lock);
106static DEFINE_SPINLOCK(vector_lock); 77static DEFINE_SPINLOCK(vector_lock);
@@ -110,9 +81,6 @@ static DEFINE_SPINLOCK(vector_lock);
110 */ 81 */
111int nr_ioapic_registers[MAX_IO_APICS]; 82int nr_ioapic_registers[MAX_IO_APICS];
112 83
113/* I/O APIC RTE contents at the OS boot up */
114struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
115
116/* I/O APIC entries */ 84/* I/O APIC entries */
117struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; 85struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
118int nr_ioapics; 86int nr_ioapics;
@@ -123,11 +91,69 @@ struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
123/* # of MP IRQ source entries */ 91/* # of MP IRQ source entries */
124int mp_irq_entries; 92int mp_irq_entries;
125 93
94#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
95int mp_bus_id_to_type[MAX_MP_BUSSES];
96#endif
97
126DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); 98DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
127 99
100int skip_ioapic_setup;
101
102static int __init parse_noapic(char *str)
103{
104 /* disable IO-APIC */
105 disable_ioapic_setup();
106 return 0;
107}
108early_param("noapic", parse_noapic);
109
110struct irq_pin_list;
111struct irq_cfg {
112 unsigned int irq;
113 struct irq_pin_list *irq_2_pin;
114 cpumask_t domain;
115 cpumask_t old_domain;
116 unsigned move_cleanup_count;
117 u8 vector;
118 u8 move_in_progress : 1;
119};
120
121/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
122static struct irq_cfg irq_cfgx[NR_IRQS] = {
123 [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, },
124 [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, },
125 [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, },
126 [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, },
127 [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, },
128 [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, },
129 [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, },
130 [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, },
131 [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, },
132 [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, },
133 [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
134 [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
135 [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
136 [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
137 [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
138 [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
139};
140
141#define for_each_irq_cfg(irq, cfg) \
142 for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++)
143
144static struct irq_cfg *irq_cfg(unsigned int irq)
145{
146 return irq < nr_irqs ? irq_cfgx + irq : NULL;
147}
148
149static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
150{
151 return irq_cfg(irq);
152}
153
128/* 154/*
129 * Rough estimation of how many shared IRQs there are, can 155 * Rough estimation of how many shared IRQs there are, can be changed
130 * be changed anytime. 156 * anytime.
131 */ 157 */
132#define MAX_PLUS_SHARED_IRQS NR_IRQS 158#define MAX_PLUS_SHARED_IRQS NR_IRQS
133#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) 159#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
@@ -139,9 +165,36 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
139 * between pins and IRQs. 165 * between pins and IRQs.
140 */ 166 */
141 167
142static struct irq_pin_list { 168struct irq_pin_list {
143 short apic, pin, next; 169 int apic, pin;
144} irq_2_pin[PIN_MAP_SIZE]; 170 struct irq_pin_list *next;
171};
172
173static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE];
174static struct irq_pin_list *irq_2_pin_ptr;
175
176static void __init irq_2_pin_init(void)
177{
178 struct irq_pin_list *pin = irq_2_pin_head;
179 int i;
180
181 for (i = 1; i < PIN_MAP_SIZE; i++)
182 pin[i-1].next = &pin[i];
183
184 irq_2_pin_ptr = &pin[0];
185}
186
187static struct irq_pin_list *get_one_free_irq_2_pin(void)
188{
189 struct irq_pin_list *pin = irq_2_pin_ptr;
190
191 if (!pin)
192 panic("can not get more irq_2_pin\n");
193
194 irq_2_pin_ptr = pin->next;
195 pin->next = NULL;
196 return pin;
197}
145 198
146struct io_apic { 199struct io_apic {
147 unsigned int index; 200 unsigned int index;
@@ -172,10 +225,15 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i
172/* 225/*
173 * Re-write a value: to be used for read-modify-write 226 * Re-write a value: to be used for read-modify-write
174 * cycles where the read already set up the index register. 227 * cycles where the read already set up the index register.
228 *
229 * Older SiS APIC requires we rewrite the index register
175 */ 230 */
176static inline void io_apic_modify(unsigned int apic, unsigned int value) 231static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
177{ 232{
178 struct io_apic __iomem *io_apic = io_apic_base(apic); 233 struct io_apic __iomem *io_apic = io_apic_base(apic);
234
235 if (sis_apic_bug)
236 writel(reg, &io_apic->index);
179 writel(value, &io_apic->data); 237 writel(value, &io_apic->data);
180} 238}
181 239
@@ -183,16 +241,17 @@ static bool io_apic_level_ack_pending(unsigned int irq)
183{ 241{
184 struct irq_pin_list *entry; 242 struct irq_pin_list *entry;
185 unsigned long flags; 243 unsigned long flags;
244 struct irq_cfg *cfg = irq_cfg(irq);
186 245
187 spin_lock_irqsave(&ioapic_lock, flags); 246 spin_lock_irqsave(&ioapic_lock, flags);
188 entry = irq_2_pin + irq; 247 entry = cfg->irq_2_pin;
189 for (;;) { 248 for (;;) {
190 unsigned int reg; 249 unsigned int reg;
191 int pin; 250 int pin;
192 251
193 pin = entry->pin; 252 if (!entry)
194 if (pin == -1)
195 break; 253 break;
254 pin = entry->pin;
196 reg = io_apic_read(entry->apic, 0x10 + pin*2); 255 reg = io_apic_read(entry->apic, 0x10 + pin*2);
197 /* Is the remote IRR bit set? */ 256 /* Is the remote IRR bit set? */
198 if (reg & IO_APIC_REDIR_REMOTE_IRR) { 257 if (reg & IO_APIC_REDIR_REMOTE_IRR) {
@@ -201,45 +260,13 @@ static bool io_apic_level_ack_pending(unsigned int irq)
201 } 260 }
202 if (!entry->next) 261 if (!entry->next)
203 break; 262 break;
204 entry = irq_2_pin + entry->next; 263 entry = entry->next;
205 } 264 }
206 spin_unlock_irqrestore(&ioapic_lock, flags); 265 spin_unlock_irqrestore(&ioapic_lock, flags);
207 266
208 return false; 267 return false;
209} 268}
210 269
211/*
212 * Synchronize the IO-APIC and the CPU by doing
213 * a dummy read from the IO-APIC
214 */
215static inline void io_apic_sync(unsigned int apic)
216{
217 struct io_apic __iomem *io_apic = io_apic_base(apic);
218 readl(&io_apic->data);
219}
220
221#define __DO_ACTION(R, ACTION, FINAL) \
222 \
223{ \
224 int pin; \
225 struct irq_pin_list *entry = irq_2_pin + irq; \
226 \
227 BUG_ON(irq >= NR_IRQS); \
228 for (;;) { \
229 unsigned int reg; \
230 pin = entry->pin; \
231 if (pin == -1) \
232 break; \
233 reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \
234 reg ACTION; \
235 io_apic_modify(entry->apic, reg); \
236 FINAL; \
237 if (!entry->next) \
238 break; \
239 entry = irq_2_pin + entry->next; \
240 } \
241}
242
243union entry_union { 270union entry_union {
244 struct { u32 w1, w2; }; 271 struct { u32 w1, w2; };
245 struct IO_APIC_route_entry entry; 272 struct IO_APIC_route_entry entry;
@@ -299,59 +326,71 @@ static void ioapic_mask_entry(int apic, int pin)
299static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) 326static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
300{ 327{
301 int apic, pin; 328 int apic, pin;
302 struct irq_pin_list *entry = irq_2_pin + irq; 329 struct irq_cfg *cfg;
330 struct irq_pin_list *entry;
303 331
304 BUG_ON(irq >= NR_IRQS); 332 cfg = irq_cfg(irq);
333 entry = cfg->irq_2_pin;
305 for (;;) { 334 for (;;) {
306 unsigned int reg; 335 unsigned int reg;
336
337 if (!entry)
338 break;
339
307 apic = entry->apic; 340 apic = entry->apic;
308 pin = entry->pin; 341 pin = entry->pin;
309 if (pin == -1) 342#ifdef CONFIG_INTR_REMAP
310 break;
311 /* 343 /*
312 * With interrupt-remapping, destination information comes 344 * With interrupt-remapping, destination information comes
313 * from interrupt-remapping table entry. 345 * from interrupt-remapping table entry.
314 */ 346 */
315 if (!irq_remapped(irq)) 347 if (!irq_remapped(irq))
316 io_apic_write(apic, 0x11 + pin*2, dest); 348 io_apic_write(apic, 0x11 + pin*2, dest);
349#else
350 io_apic_write(apic, 0x11 + pin*2, dest);
351#endif
317 reg = io_apic_read(apic, 0x10 + pin*2); 352 reg = io_apic_read(apic, 0x10 + pin*2);
318 reg &= ~IO_APIC_REDIR_VECTOR_MASK; 353 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
319 reg |= vector; 354 reg |= vector;
320 io_apic_modify(apic, reg); 355 io_apic_modify(apic, 0x10 + pin*2, reg);
321 if (!entry->next) 356 if (!entry->next)
322 break; 357 break;
323 entry = irq_2_pin + entry->next; 358 entry = entry->next;
324 } 359 }
325} 360}
326 361
362static int assign_irq_vector(int irq, cpumask_t mask);
363
327static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) 364static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
328{ 365{
329 struct irq_cfg *cfg = irq_cfg + irq; 366 struct irq_cfg *cfg;
330 unsigned long flags; 367 unsigned long flags;
331 unsigned int dest; 368 unsigned int dest;
332 cpumask_t tmp; 369 cpumask_t tmp;
370 struct irq_desc *desc;
333 371
334 cpus_and(tmp, mask, cpu_online_map); 372 cpus_and(tmp, mask, cpu_online_map);
335 if (cpus_empty(tmp)) 373 if (cpus_empty(tmp))
336 return; 374 return;
337 375
376 cfg = irq_cfg(irq);
338 if (assign_irq_vector(irq, mask)) 377 if (assign_irq_vector(irq, mask))
339 return; 378 return;
340 379
341 cpus_and(tmp, cfg->domain, mask); 380 cpus_and(tmp, cfg->domain, mask);
342 dest = cpu_mask_to_apicid(tmp); 381 dest = cpu_mask_to_apicid(tmp);
343
344 /* 382 /*
345 * Only the high 8 bits are valid. 383 * Only the high 8 bits are valid.
346 */ 384 */
347 dest = SET_APIC_LOGICAL_ID(dest); 385 dest = SET_APIC_LOGICAL_ID(dest);
348 386
387 desc = irq_to_desc(irq);
349 spin_lock_irqsave(&ioapic_lock, flags); 388 spin_lock_irqsave(&ioapic_lock, flags);
350 __target_IO_APIC_irq(irq, dest, cfg->vector); 389 __target_IO_APIC_irq(irq, dest, cfg->vector);
351 irq_desc[irq].affinity = mask; 390 desc->affinity = mask;
352 spin_unlock_irqrestore(&ioapic_lock, flags); 391 spin_unlock_irqrestore(&ioapic_lock, flags);
353} 392}
354#endif 393#endif /* CONFIG_SMP */
355 394
356/* 395/*
357 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are 396 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
@@ -360,19 +399,30 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
360 */ 399 */
361static void add_pin_to_irq(unsigned int irq, int apic, int pin) 400static void add_pin_to_irq(unsigned int irq, int apic, int pin)
362{ 401{
363 static int first_free_entry = NR_IRQS; 402 struct irq_cfg *cfg;
364 struct irq_pin_list *entry = irq_2_pin + irq; 403 struct irq_pin_list *entry;
404
405 /* first time to refer irq_cfg, so with new */
406 cfg = irq_cfg_alloc(irq);
407 entry = cfg->irq_2_pin;
408 if (!entry) {
409 entry = get_one_free_irq_2_pin();
410 cfg->irq_2_pin = entry;
411 entry->apic = apic;
412 entry->pin = pin;
413 return;
414 }
365 415
366 BUG_ON(irq >= NR_IRQS); 416 while (entry->next) {
367 while (entry->next) 417 /* not again, please */
368 entry = irq_2_pin + entry->next; 418 if (entry->apic == apic && entry->pin == pin)
419 return;
369 420
370 if (entry->pin != -1) { 421 entry = entry->next;
371 entry->next = first_free_entry;
372 entry = irq_2_pin + entry->next;
373 if (++first_free_entry >= PIN_MAP_SIZE)
374 panic("io_apic.c: ran out of irq_2_pin entries!");
375 } 422 }
423
424 entry->next = get_one_free_irq_2_pin();
425 entry = entry->next;
376 entry->apic = apic; 426 entry->apic = apic;
377 entry->pin = pin; 427 entry->pin = pin;
378} 428}
@@ -384,30 +434,86 @@ static void __init replace_pin_at_irq(unsigned int irq,
384 int oldapic, int oldpin, 434 int oldapic, int oldpin,
385 int newapic, int newpin) 435 int newapic, int newpin)
386{ 436{
387 struct irq_pin_list *entry = irq_2_pin + irq; 437 struct irq_cfg *cfg = irq_cfg(irq);
438 struct irq_pin_list *entry = cfg->irq_2_pin;
439 int replaced = 0;
388 440
389 while (1) { 441 while (entry) {
390 if (entry->apic == oldapic && entry->pin == oldpin) { 442 if (entry->apic == oldapic && entry->pin == oldpin) {
391 entry->apic = newapic; 443 entry->apic = newapic;
392 entry->pin = newpin; 444 entry->pin = newpin;
393 } 445 replaced = 1;
394 if (!entry->next) 446 /* every one is different, right? */
395 break; 447 break;
396 entry = irq_2_pin + entry->next; 448 }
449 entry = entry->next;
450 }
451
452 /* why? call replace before add? */
453 if (!replaced)
454 add_pin_to_irq(irq, newapic, newpin);
455}
456
457static inline void io_apic_modify_irq(unsigned int irq,
458 int mask_and, int mask_or,
459 void (*final)(struct irq_pin_list *entry))
460{
461 int pin;
462 struct irq_cfg *cfg;
463 struct irq_pin_list *entry;
464
465 cfg = irq_cfg(irq);
466 for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
467 unsigned int reg;
468 pin = entry->pin;
469 reg = io_apic_read(entry->apic, 0x10 + pin * 2);
470 reg &= mask_and;
471 reg |= mask_or;
472 io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
473 if (final)
474 final(entry);
397 } 475 }
398} 476}
399 477
478static void __unmask_IO_APIC_irq(unsigned int irq)
479{
480 io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL);
481}
400 482
401#define DO_ACTION(name,R,ACTION, FINAL) \ 483#ifdef CONFIG_X86_64
402 \ 484void io_apic_sync(struct irq_pin_list *entry)
403 static void name##_IO_APIC_irq (unsigned int irq) \ 485{
404 __DO_ACTION(R, ACTION, FINAL) 486 /*
487 * Synchronize the IO-APIC and the CPU by doing
488 * a dummy read from the IO-APIC
489 */
490 struct io_apic __iomem *io_apic;
491 io_apic = io_apic_base(entry->apic);
492 readl(&io_apic->data);
493}
405 494
406/* mask = 1 */ 495static void __mask_IO_APIC_irq(unsigned int irq)
407DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic)) 496{
497 io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
498}
499#else /* CONFIG_X86_32 */
500static void __mask_IO_APIC_irq(unsigned int irq)
501{
502 io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL);
503}
408 504
409/* mask = 0 */ 505static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
410DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, ) 506{
507 io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER,
508 IO_APIC_REDIR_MASKED, NULL);
509}
510
511static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
512{
513 io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED,
514 IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
515}
516#endif /* CONFIG_X86_32 */
411 517
412static void mask_IO_APIC_irq (unsigned int irq) 518static void mask_IO_APIC_irq (unsigned int irq)
413{ 519{
@@ -450,6 +556,68 @@ static void clear_IO_APIC (void)
450 clear_IO_APIC_pin(apic, pin); 556 clear_IO_APIC_pin(apic, pin);
451} 557}
452 558
559#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32)
560void send_IPI_self(int vector)
561{
562 unsigned int cfg;
563
564 /*
565 * Wait for idle.
566 */
567 apic_wait_icr_idle();
568 cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
569 /*
570 * Send the IPI. The write to APIC_ICR fires this off.
571 */
572 apic_write(APIC_ICR, cfg);
573}
574#endif /* !CONFIG_SMP && CONFIG_X86_32*/
575
576#ifdef CONFIG_X86_32
577/*
578 * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
579 * specific CPU-side IRQs.
580 */
581
582#define MAX_PIRQS 8
583static int pirq_entries [MAX_PIRQS];
584static int pirqs_enabled;
585
586static int __init ioapic_pirq_setup(char *str)
587{
588 int i, max;
589 int ints[MAX_PIRQS+1];
590
591 get_options(str, ARRAY_SIZE(ints), ints);
592
593 for (i = 0; i < MAX_PIRQS; i++)
594 pirq_entries[i] = -1;
595
596 pirqs_enabled = 1;
597 apic_printk(APIC_VERBOSE, KERN_INFO
598 "PIRQ redirection, working around broken MP-BIOS.\n");
599 max = MAX_PIRQS;
600 if (ints[0] < MAX_PIRQS)
601 max = ints[0];
602
603 for (i = 0; i < max; i++) {
604 apic_printk(APIC_VERBOSE, KERN_DEBUG
605 "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
606 /*
607 * PIRQs are mapped upside down, usually.
608 */
609 pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
610 }
611 return 1;
612}
613
614__setup("pirq=", ioapic_pirq_setup);
615#endif /* CONFIG_X86_32 */
616
617#ifdef CONFIG_INTR_REMAP
618/* I/O APIC RTE contents at the OS boot up */
619static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
620
453/* 621/*
454 * Saves and masks all the unmasked IO-APIC RTE's 622 * Saves and masks all the unmasked IO-APIC RTE's
455 */ 623 */
@@ -474,7 +642,7 @@ int save_mask_IO_APIC_setup(void)
474 kzalloc(sizeof(struct IO_APIC_route_entry) * 642 kzalloc(sizeof(struct IO_APIC_route_entry) *
475 nr_ioapic_registers[apic], GFP_KERNEL); 643 nr_ioapic_registers[apic], GFP_KERNEL);
476 if (!early_ioapic_entries[apic]) 644 if (!early_ioapic_entries[apic])
477 return -ENOMEM; 645 goto nomem;
478 } 646 }
479 647
480 for (apic = 0; apic < nr_ioapics; apic++) 648 for (apic = 0; apic < nr_ioapics; apic++)
@@ -488,17 +656,31 @@ int save_mask_IO_APIC_setup(void)
488 ioapic_write_entry(apic, pin, entry); 656 ioapic_write_entry(apic, pin, entry);
489 } 657 }
490 } 658 }
659
491 return 0; 660 return 0;
661
662nomem:
663 while (apic >= 0)
664 kfree(early_ioapic_entries[apic--]);
665 memset(early_ioapic_entries, 0,
666 ARRAY_SIZE(early_ioapic_entries));
667
668 return -ENOMEM;
492} 669}
493 670
494void restore_IO_APIC_setup(void) 671void restore_IO_APIC_setup(void)
495{ 672{
496 int apic, pin; 673 int apic, pin;
497 674
498 for (apic = 0; apic < nr_ioapics; apic++) 675 for (apic = 0; apic < nr_ioapics; apic++) {
676 if (!early_ioapic_entries[apic])
677 break;
499 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) 678 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
500 ioapic_write_entry(apic, pin, 679 ioapic_write_entry(apic, pin,
501 early_ioapic_entries[apic][pin]); 680 early_ioapic_entries[apic][pin]);
681 kfree(early_ioapic_entries[apic]);
682 early_ioapic_entries[apic] = NULL;
683 }
502} 684}
503 685
504void reinit_intr_remapped_IO_APIC(int intr_remapping) 686void reinit_intr_remapped_IO_APIC(int intr_remapping)
@@ -512,25 +694,7 @@ void reinit_intr_remapped_IO_APIC(int intr_remapping)
512 */ 694 */
513 restore_IO_APIC_setup(); 695 restore_IO_APIC_setup();
514} 696}
515 697#endif
516int skip_ioapic_setup;
517int ioapic_force;
518
519static int __init parse_noapic(char *str)
520{
521 disable_ioapic_setup();
522 return 0;
523}
524early_param("noapic", parse_noapic);
525
526/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
527static int __init disable_timer_pin_setup(char *arg)
528{
529 disable_timer_pin_1 = 1;
530 return 1;
531}
532__setup("disable_timer_pin_1", disable_timer_pin_setup);
533
534 698
535/* 699/*
536 * Find the IRQ entry number of a certain pin. 700 * Find the IRQ entry number of a certain pin.
@@ -634,22 +798,54 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
634 best_guess = irq; 798 best_guess = irq;
635 } 799 }
636 } 800 }
637 BUG_ON(best_guess >= NR_IRQS);
638 return best_guess; 801 return best_guess;
639} 802}
640 803
804EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
805
806#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
807/*
808 * EISA Edge/Level control register, ELCR
809 */
810static int EISA_ELCR(unsigned int irq)
811{
812 if (irq < 16) {
813 unsigned int port = 0x4d0 + (irq >> 3);
814 return (inb(port) >> (irq & 7)) & 1;
815 }
816 apic_printk(APIC_VERBOSE, KERN_INFO
817 "Broken MPtable reports ISA irq %d\n", irq);
818 return 0;
819}
820
821#endif
822
641/* ISA interrupts are always polarity zero edge triggered, 823/* ISA interrupts are always polarity zero edge triggered,
642 * when listed as conforming in the MP table. */ 824 * when listed as conforming in the MP table. */
643 825
644#define default_ISA_trigger(idx) (0) 826#define default_ISA_trigger(idx) (0)
645#define default_ISA_polarity(idx) (0) 827#define default_ISA_polarity(idx) (0)
646 828
829/* EISA interrupts are always polarity zero and can be edge or level
830 * trigger depending on the ELCR value. If an interrupt is listed as
831 * EISA conforming in the MP table, that means its trigger type must
832 * be read in from the ELCR */
833
834#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
835#define default_EISA_polarity(idx) default_ISA_polarity(idx)
836
647/* PCI interrupts are always polarity one level triggered, 837/* PCI interrupts are always polarity one level triggered,
648 * when listed as conforming in the MP table. */ 838 * when listed as conforming in the MP table. */
649 839
650#define default_PCI_trigger(idx) (1) 840#define default_PCI_trigger(idx) (1)
651#define default_PCI_polarity(idx) (1) 841#define default_PCI_polarity(idx) (1)
652 842
843/* MCA interrupts are always polarity zero level triggered,
844 * when listed as conforming in the MP table. */
845
846#define default_MCA_trigger(idx) (1)
847#define default_MCA_polarity(idx) default_ISA_polarity(idx)
848
653static int MPBIOS_polarity(int idx) 849static int MPBIOS_polarity(int idx)
654{ 850{
655 int bus = mp_irqs[idx].mp_srcbus; 851 int bus = mp_irqs[idx].mp_srcbus;
@@ -707,6 +903,36 @@ static int MPBIOS_trigger(int idx)
707 trigger = default_ISA_trigger(idx); 903 trigger = default_ISA_trigger(idx);
708 else 904 else
709 trigger = default_PCI_trigger(idx); 905 trigger = default_PCI_trigger(idx);
906#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
907 switch (mp_bus_id_to_type[bus]) {
908 case MP_BUS_ISA: /* ISA pin */
909 {
910 /* set before the switch */
911 break;
912 }
913 case MP_BUS_EISA: /* EISA pin */
914 {
915 trigger = default_EISA_trigger(idx);
916 break;
917 }
918 case MP_BUS_PCI: /* PCI pin */
919 {
920 /* set before the switch */
921 break;
922 }
923 case MP_BUS_MCA: /* MCA pin */
924 {
925 trigger = default_MCA_trigger(idx);
926 break;
927 }
928 default:
929 {
930 printk(KERN_WARNING "broken BIOS!!\n");
931 trigger = 1;
932 break;
933 }
934 }
935#endif
710 break; 936 break;
711 case 1: /* edge */ 937 case 1: /* edge */
712 { 938 {
@@ -744,6 +970,7 @@ static inline int irq_trigger(int idx)
744 return MPBIOS_trigger(idx); 970 return MPBIOS_trigger(idx);
745} 971}
746 972
973int (*ioapic_renumber_irq)(int ioapic, int irq);
747static int pin_2_irq(int idx, int apic, int pin) 974static int pin_2_irq(int idx, int apic, int pin)
748{ 975{
749 int irq, i; 976 int irq, i;
@@ -765,8 +992,32 @@ static int pin_2_irq(int idx, int apic, int pin)
765 while (i < apic) 992 while (i < apic)
766 irq += nr_ioapic_registers[i++]; 993 irq += nr_ioapic_registers[i++];
767 irq += pin; 994 irq += pin;
995 /*
996 * For MPS mode, so far only needed by ES7000 platform
997 */
998 if (ioapic_renumber_irq)
999 irq = ioapic_renumber_irq(apic, irq);
768 } 1000 }
769 BUG_ON(irq >= NR_IRQS); 1001
1002#ifdef CONFIG_X86_32
1003 /*
1004 * PCI IRQ command line redirection. Yes, limits are hardcoded.
1005 */
1006 if ((pin >= 16) && (pin <= 23)) {
1007 if (pirq_entries[pin-16] != -1) {
1008 if (!pirq_entries[pin-16]) {
1009 apic_printk(APIC_VERBOSE, KERN_DEBUG
1010 "disabling PIRQ%d\n", pin-16);
1011 } else {
1012 irq = pirq_entries[pin-16];
1013 apic_printk(APIC_VERBOSE, KERN_DEBUG
1014 "using PIRQ%d -> IRQ %d\n",
1015 pin-16, irq);
1016 }
1017 }
1018 }
1019#endif
1020
770 return irq; 1021 return irq;
771} 1022}
772 1023
@@ -801,8 +1052,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
801 int cpu; 1052 int cpu;
802 struct irq_cfg *cfg; 1053 struct irq_cfg *cfg;
803 1054
804 BUG_ON((unsigned)irq >= NR_IRQS); 1055 cfg = irq_cfg(irq);
805 cfg = &irq_cfg[irq];
806 1056
807 /* Only try and allocate irqs on cpus that are present */ 1057 /* Only try and allocate irqs on cpus that are present */
808 cpus_and(mask, mask, cpu_online_map); 1058 cpus_and(mask, mask, cpu_online_map);
@@ -837,8 +1087,13 @@ next:
837 } 1087 }
838 if (unlikely(current_vector == vector)) 1088 if (unlikely(current_vector == vector))
839 continue; 1089 continue;
1090#ifdef CONFIG_X86_64
840 if (vector == IA32_SYSCALL_VECTOR) 1091 if (vector == IA32_SYSCALL_VECTOR)
841 goto next; 1092 goto next;
1093#else
1094 if (vector == SYSCALL_VECTOR)
1095 goto next;
1096#endif
842 for_each_cpu_mask_nr(new_cpu, new_mask) 1097 for_each_cpu_mask_nr(new_cpu, new_mask)
843 if (per_cpu(vector_irq, new_cpu)[vector] != -1) 1098 if (per_cpu(vector_irq, new_cpu)[vector] != -1)
844 goto next; 1099 goto next;
@@ -875,8 +1130,7 @@ static void __clear_irq_vector(int irq)
875 cpumask_t mask; 1130 cpumask_t mask;
876 int cpu, vector; 1131 int cpu, vector;
877 1132
878 BUG_ON((unsigned)irq >= NR_IRQS); 1133 cfg = irq_cfg(irq);
879 cfg = &irq_cfg[irq];
880 BUG_ON(!cfg->vector); 1134 BUG_ON(!cfg->vector);
881 1135
882 vector = cfg->vector; 1136 vector = cfg->vector;
@@ -893,12 +1147,13 @@ void __setup_vector_irq(int cpu)
893 /* Initialize vector_irq on a new cpu */ 1147 /* Initialize vector_irq on a new cpu */
894 /* This function must be called with vector_lock held */ 1148 /* This function must be called with vector_lock held */
895 int irq, vector; 1149 int irq, vector;
1150 struct irq_cfg *cfg;
896 1151
897 /* Mark the inuse vectors */ 1152 /* Mark the inuse vectors */
898 for (irq = 0; irq < NR_IRQS; ++irq) { 1153 for_each_irq_cfg(irq, cfg) {
899 if (!cpu_isset(cpu, irq_cfg[irq].domain)) 1154 if (!cpu_isset(cpu, cfg->domain))
900 continue; 1155 continue;
901 vector = irq_cfg[irq].vector; 1156 vector = cfg->vector;
902 per_cpu(vector_irq, cpu)[vector] = irq; 1157 per_cpu(vector_irq, cpu)[vector] = irq;
903 } 1158 }
904 /* Mark the free vectors */ 1159 /* Mark the free vectors */
@@ -906,7 +1161,9 @@ void __setup_vector_irq(int cpu)
906 irq = per_cpu(vector_irq, cpu)[vector]; 1161 irq = per_cpu(vector_irq, cpu)[vector];
907 if (irq < 0) 1162 if (irq < 0)
908 continue; 1163 continue;
909 if (!cpu_isset(cpu, irq_cfg[irq].domain)) 1164
1165 cfg = irq_cfg(irq);
1166 if (!cpu_isset(cpu, cfg->domain))
910 per_cpu(vector_irq, cpu)[vector] = -1; 1167 per_cpu(vector_irq, cpu)[vector] = -1;
911 } 1168 }
912} 1169}
@@ -916,16 +1173,49 @@ static struct irq_chip ioapic_chip;
916static struct irq_chip ir_ioapic_chip; 1173static struct irq_chip ir_ioapic_chip;
917#endif 1174#endif
918 1175
1176#define IOAPIC_AUTO -1
1177#define IOAPIC_EDGE 0
1178#define IOAPIC_LEVEL 1
1179
1180#ifdef CONFIG_X86_32
1181static inline int IO_APIC_irq_trigger(int irq)
1182{
1183 int apic, idx, pin;
1184
1185 for (apic = 0; apic < nr_ioapics; apic++) {
1186 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1187 idx = find_irq_entry(apic, pin, mp_INT);
1188 if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
1189 return irq_trigger(idx);
1190 }
1191 }
1192 /*
1193 * nonexistent IRQs are edge default
1194 */
1195 return 0;
1196}
1197#else
1198static inline int IO_APIC_irq_trigger(int irq)
1199{
1200 return 1;
1201}
1202#endif
1203
919static void ioapic_register_intr(int irq, unsigned long trigger) 1204static void ioapic_register_intr(int irq, unsigned long trigger)
920{ 1205{
921 if (trigger) 1206 struct irq_desc *desc;
922 irq_desc[irq].status |= IRQ_LEVEL; 1207
1208 desc = irq_to_desc(irq);
1209
1210 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1211 trigger == IOAPIC_LEVEL)
1212 desc->status |= IRQ_LEVEL;
923 else 1213 else
924 irq_desc[irq].status &= ~IRQ_LEVEL; 1214 desc->status &= ~IRQ_LEVEL;
925 1215
926#ifdef CONFIG_INTR_REMAP 1216#ifdef CONFIG_INTR_REMAP
927 if (irq_remapped(irq)) { 1217 if (irq_remapped(irq)) {
928 irq_desc[irq].status |= IRQ_MOVE_PCNTXT; 1218 desc->status |= IRQ_MOVE_PCNTXT;
929 if (trigger) 1219 if (trigger)
930 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, 1220 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
931 handle_fasteoi_irq, 1221 handle_fasteoi_irq,
@@ -936,7 +1226,8 @@ static void ioapic_register_intr(int irq, unsigned long trigger)
936 return; 1226 return;
937 } 1227 }
938#endif 1228#endif
939 if (trigger) 1229 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1230 trigger == IOAPIC_LEVEL)
940 set_irq_chip_and_handler_name(irq, &ioapic_chip, 1231 set_irq_chip_and_handler_name(irq, &ioapic_chip,
941 handle_fasteoi_irq, 1232 handle_fasteoi_irq,
942 "fasteoi"); 1233 "fasteoi");
@@ -1009,13 +1300,15 @@ static int setup_ioapic_entry(int apic, int irq,
1009static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, 1300static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
1010 int trigger, int polarity) 1301 int trigger, int polarity)
1011{ 1302{
1012 struct irq_cfg *cfg = irq_cfg + irq; 1303 struct irq_cfg *cfg;
1013 struct IO_APIC_route_entry entry; 1304 struct IO_APIC_route_entry entry;
1014 cpumask_t mask; 1305 cpumask_t mask;
1015 1306
1016 if (!IO_APIC_IRQ(irq)) 1307 if (!IO_APIC_IRQ(irq))
1017 return; 1308 return;
1018 1309
1310 cfg = irq_cfg(irq);
1311
1019 mask = TARGET_CPUS; 1312 mask = TARGET_CPUS;
1020 if (assign_irq_vector(irq, mask)) 1313 if (assign_irq_vector(irq, mask))
1021 return; 1314 return;
@@ -1047,37 +1340,49 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
1047 1340
1048static void __init setup_IO_APIC_irqs(void) 1341static void __init setup_IO_APIC_irqs(void)
1049{ 1342{
1050 int apic, pin, idx, irq, first_notcon = 1; 1343 int apic, pin, idx, irq;
1344 int notcon = 0;
1051 1345
1052 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); 1346 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1053 1347
1054 for (apic = 0; apic < nr_ioapics; apic++) { 1348 for (apic = 0; apic < nr_ioapics; apic++) {
1055 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { 1349 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1056
1057 idx = find_irq_entry(apic,pin,mp_INT);
1058 if (idx == -1) {
1059 if (first_notcon) {
1060 apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
1061 first_notcon = 0;
1062 } else
1063 apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
1064 continue;
1065 }
1066 if (!first_notcon) {
1067 apic_printk(APIC_VERBOSE, " not connected.\n");
1068 first_notcon = 1;
1069 }
1070 1350
1071 irq = pin_2_irq(idx, apic, pin); 1351 idx = find_irq_entry(apic, pin, mp_INT);
1072 add_pin_to_irq(irq, apic, pin); 1352 if (idx == -1) {
1353 if (!notcon) {
1354 notcon = 1;
1355 apic_printk(APIC_VERBOSE,
1356 KERN_DEBUG " %d-%d",
1357 mp_ioapics[apic].mp_apicid,
1358 pin);
1359 } else
1360 apic_printk(APIC_VERBOSE, " %d-%d",
1361 mp_ioapics[apic].mp_apicid,
1362 pin);
1363 continue;
1364 }
1365 if (notcon) {
1366 apic_printk(APIC_VERBOSE,
1367 " (apicid-pin) not connected\n");
1368 notcon = 0;
1369 }
1073 1370
1074 setup_IO_APIC_irq(apic, pin, irq, 1371 irq = pin_2_irq(idx, apic, pin);
1075 irq_trigger(idx), irq_polarity(idx)); 1372#ifdef CONFIG_X86_32
1076 } 1373 if (multi_timer_check(apic, irq))
1374 continue;
1375#endif
1376 add_pin_to_irq(irq, apic, pin);
1377
1378 setup_IO_APIC_irq(apic, pin, irq,
1379 irq_trigger(idx), irq_polarity(idx));
1380 }
1077 } 1381 }
1078 1382
1079 if (!first_notcon) 1383 if (notcon)
1080 apic_printk(APIC_VERBOSE, " not connected.\n"); 1384 apic_printk(APIC_VERBOSE,
1385 " (apicid-pin) not connected\n");
1081} 1386}
1082 1387
1083/* 1388/*
@@ -1088,8 +1393,10 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
1088{ 1393{
1089 struct IO_APIC_route_entry entry; 1394 struct IO_APIC_route_entry entry;
1090 1395
1396#ifdef CONFIG_INTR_REMAP
1091 if (intr_remapping_enabled) 1397 if (intr_remapping_enabled)
1092 return; 1398 return;
1399#endif
1093 1400
1094 memset(&entry, 0, sizeof(entry)); 1401 memset(&entry, 0, sizeof(entry));
1095 1402
@@ -1124,7 +1431,10 @@ __apicdebuginit(void) print_IO_APIC(void)
1124 union IO_APIC_reg_00 reg_00; 1431 union IO_APIC_reg_00 reg_00;
1125 union IO_APIC_reg_01 reg_01; 1432 union IO_APIC_reg_01 reg_01;
1126 union IO_APIC_reg_02 reg_02; 1433 union IO_APIC_reg_02 reg_02;
1434 union IO_APIC_reg_03 reg_03;
1127 unsigned long flags; 1435 unsigned long flags;
1436 struct irq_cfg *cfg;
1437 unsigned int irq;
1128 1438
1129 if (apic_verbosity == APIC_QUIET) 1439 if (apic_verbosity == APIC_QUIET)
1130 return; 1440 return;
@@ -1147,12 +1457,16 @@ __apicdebuginit(void) print_IO_APIC(void)
1147 reg_01.raw = io_apic_read(apic, 1); 1457 reg_01.raw = io_apic_read(apic, 1);
1148 if (reg_01.bits.version >= 0x10) 1458 if (reg_01.bits.version >= 0x10)
1149 reg_02.raw = io_apic_read(apic, 2); 1459 reg_02.raw = io_apic_read(apic, 2);
1460 if (reg_01.bits.version >= 0x20)
1461 reg_03.raw = io_apic_read(apic, 3);
1150 spin_unlock_irqrestore(&ioapic_lock, flags); 1462 spin_unlock_irqrestore(&ioapic_lock, flags);
1151 1463
1152 printk("\n"); 1464 printk("\n");
1153 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); 1465 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
1154 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); 1466 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
1155 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); 1467 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
1468 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
1469 printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS);
1156 1470
1157 printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01); 1471 printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
1158 printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); 1472 printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
@@ -1160,11 +1474,27 @@ __apicdebuginit(void) print_IO_APIC(void)
1160 printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); 1474 printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
1161 printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); 1475 printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
1162 1476
1163 if (reg_01.bits.version >= 0x10) { 1477 /*
1478 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
1479 * but the value of reg_02 is read as the previous read register
1480 * value, so ignore it if reg_02 == reg_01.
1481 */
1482 if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
1164 printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); 1483 printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
1165 printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); 1484 printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
1166 } 1485 }
1167 1486
1487 /*
1488 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
1489 * or reg_03, but the value of reg_0[23] is read as the previous read
1490 * register value, so ignore it if reg_03 == reg_0[12].
1491 */
1492 if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
1493 reg_03.raw != reg_01.raw) {
1494 printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
1495 printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT);
1496 }
1497
1168 printk(KERN_DEBUG ".... IRQ redirection table:\n"); 1498 printk(KERN_DEBUG ".... IRQ redirection table:\n");
1169 1499
1170 printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" 1500 printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
@@ -1193,16 +1523,16 @@ __apicdebuginit(void) print_IO_APIC(void)
1193 } 1523 }
1194 } 1524 }
1195 printk(KERN_DEBUG "IRQ to pin mappings:\n"); 1525 printk(KERN_DEBUG "IRQ to pin mappings:\n");
1196 for (i = 0; i < NR_IRQS; i++) { 1526 for_each_irq_cfg(irq, cfg) {
1197 struct irq_pin_list *entry = irq_2_pin + i; 1527 struct irq_pin_list *entry = cfg->irq_2_pin;
1198 if (entry->pin < 0) 1528 if (!entry)
1199 continue; 1529 continue;
1200 printk(KERN_DEBUG "IRQ%d ", i); 1530 printk(KERN_DEBUG "IRQ%d ", irq);
1201 for (;;) { 1531 for (;;) {
1202 printk("-> %d:%d", entry->apic, entry->pin); 1532 printk("-> %d:%d", entry->apic, entry->pin);
1203 if (!entry->next) 1533 if (!entry->next)
1204 break; 1534 break;
1205 entry = irq_2_pin + entry->next; 1535 entry = entry->next;
1206 } 1536 }
1207 printk("\n"); 1537 printk("\n");
1208 } 1538 }
@@ -1236,7 +1566,7 @@ __apicdebuginit(void) print_APIC_bitfield(int base)
1236__apicdebuginit(void) print_local_APIC(void *dummy) 1566__apicdebuginit(void) print_local_APIC(void *dummy)
1237{ 1567{
1238 unsigned int v, ver, maxlvt; 1568 unsigned int v, ver, maxlvt;
1239 unsigned long icr; 1569 u64 icr;
1240 1570
1241 if (apic_verbosity == APIC_QUIET) 1571 if (apic_verbosity == APIC_QUIET)
1242 return; 1572 return;
@@ -1253,20 +1583,31 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
1253 v = apic_read(APIC_TASKPRI); 1583 v = apic_read(APIC_TASKPRI);
1254 printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); 1584 printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
1255 1585
1256 v = apic_read(APIC_ARBPRI); 1586 if (APIC_INTEGRATED(ver)) { /* !82489DX */
1257 printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, 1587 if (!APIC_XAPIC(ver)) {
1258 v & APIC_ARBPRI_MASK); 1588 v = apic_read(APIC_ARBPRI);
1259 v = apic_read(APIC_PROCPRI); 1589 printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
1260 printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v); 1590 v & APIC_ARBPRI_MASK);
1591 }
1592 v = apic_read(APIC_PROCPRI);
1593 printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
1594 }
1595
1596 /*
1597 * Remote read supported only in the 82489DX and local APIC for
1598 * Pentium processors.
1599 */
1600 if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
1601 v = apic_read(APIC_RRR);
1602 printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
1603 }
1261 1604
1262 v = apic_read(APIC_EOI);
1263 printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
1264 v = apic_read(APIC_RRR);
1265 printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
1266 v = apic_read(APIC_LDR); 1605 v = apic_read(APIC_LDR);
1267 printk(KERN_DEBUG "... APIC LDR: %08x\n", v); 1606 printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
1268 v = apic_read(APIC_DFR); 1607 if (!x2apic_enabled()) {
1269 printk(KERN_DEBUG "... APIC DFR: %08x\n", v); 1608 v = apic_read(APIC_DFR);
1609 printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
1610 }
1270 v = apic_read(APIC_SPIV); 1611 v = apic_read(APIC_SPIV);
1271 printk(KERN_DEBUG "... APIC SPIV: %08x\n", v); 1612 printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
1272 1613
@@ -1277,8 +1618,13 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
1277 printk(KERN_DEBUG "... APIC IRR field:\n"); 1618 printk(KERN_DEBUG "... APIC IRR field:\n");
1278 print_APIC_bitfield(APIC_IRR); 1619 print_APIC_bitfield(APIC_IRR);
1279 1620
1280 v = apic_read(APIC_ESR); 1621 if (APIC_INTEGRATED(ver)) { /* !82489DX */
1281 printk(KERN_DEBUG "... APIC ESR: %08x\n", v); 1622 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
1623 apic_write(APIC_ESR, 0);
1624
1625 v = apic_read(APIC_ESR);
1626 printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
1627 }
1282 1628
1283 icr = apic_icr_read(); 1629 icr = apic_icr_read();
1284 printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr); 1630 printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
@@ -1312,7 +1658,12 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
1312 1658
1313__apicdebuginit(void) print_all_local_APICs(void) 1659__apicdebuginit(void) print_all_local_APICs(void)
1314{ 1660{
1315 on_each_cpu(print_local_APIC, NULL, 1); 1661 int cpu;
1662
1663 preempt_disable();
1664 for_each_online_cpu(cpu)
1665 smp_call_function_single(cpu, print_local_APIC, NULL, 1);
1666 preempt_enable();
1316} 1667}
1317 1668
1318__apicdebuginit(void) print_PIC(void) 1669__apicdebuginit(void) print_PIC(void)
@@ -1359,17 +1710,22 @@ __apicdebuginit(int) print_all_ICs(void)
1359fs_initcall(print_all_ICs); 1710fs_initcall(print_all_ICs);
1360 1711
1361 1712
1713/* Where if anywhere is the i8259 connect in external int mode */
1714static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
1715
1362void __init enable_IO_APIC(void) 1716void __init enable_IO_APIC(void)
1363{ 1717{
1364 union IO_APIC_reg_01 reg_01; 1718 union IO_APIC_reg_01 reg_01;
1365 int i8259_apic, i8259_pin; 1719 int i8259_apic, i8259_pin;
1366 int i, apic; 1720 int apic;
1367 unsigned long flags; 1721 unsigned long flags;
1368 1722
1369 for (i = 0; i < PIN_MAP_SIZE; i++) { 1723#ifdef CONFIG_X86_32
1370 irq_2_pin[i].pin = -1; 1724 int i;
1371 irq_2_pin[i].next = 0; 1725 if (!pirqs_enabled)
1372 } 1726 for (i = 0; i < MAX_PIRQS; i++)
1727 pirq_entries[i] = -1;
1728#endif
1373 1729
1374 /* 1730 /*
1375 * The number of IO-APIC IRQ registers (== #pins): 1731 * The number of IO-APIC IRQ registers (== #pins):
@@ -1399,6 +1755,10 @@ void __init enable_IO_APIC(void)
1399 } 1755 }
1400 found_i8259: 1756 found_i8259:
1401 /* Look to see what if the MP table has reported the ExtINT */ 1757 /* Look to see what if the MP table has reported the ExtINT */
1758 /* If we could not find the appropriate pin by looking at the ioapic
1759 * the i8259 probably is not connected the ioapic but give the
1760 * mptable a chance anyway.
1761 */
1402 i8259_pin = find_isa_irq_pin(0, mp_ExtINT); 1762 i8259_pin = find_isa_irq_pin(0, mp_ExtINT);
1403 i8259_apic = find_isa_irq_apic(0, mp_ExtINT); 1763 i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
1404 /* Trust the MP table if nothing is setup in the hardware */ 1764 /* Trust the MP table if nothing is setup in the hardware */
@@ -1458,6 +1818,133 @@ void disable_IO_APIC(void)
1458 disconnect_bsp_APIC(ioapic_i8259.pin != -1); 1818 disconnect_bsp_APIC(ioapic_i8259.pin != -1);
1459} 1819}
1460 1820
1821#ifdef CONFIG_X86_32
1822/*
1823 * function to set the IO-APIC physical IDs based on the
1824 * values stored in the MPC table.
1825 *
1826 * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
1827 */
1828
1829static void __init setup_ioapic_ids_from_mpc(void)
1830{
1831 union IO_APIC_reg_00 reg_00;
1832 physid_mask_t phys_id_present_map;
1833 int apic;
1834 int i;
1835 unsigned char old_id;
1836 unsigned long flags;
1837
1838 if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
1839 return;
1840
1841 /*
1842 * Don't check I/O APIC IDs for xAPIC systems. They have
1843 * no meaning without the serial APIC bus.
1844 */
1845 if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
1846 || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
1847 return;
1848 /*
1849 * This is broken; anything with a real cpu count has to
1850 * circumvent this idiocy regardless.
1851 */
1852 phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
1853
1854 /*
1855 * Set the IOAPIC ID to the value stored in the MPC table.
1856 */
1857 for (apic = 0; apic < nr_ioapics; apic++) {
1858
1859 /* Read the register 0 value */
1860 spin_lock_irqsave(&ioapic_lock, flags);
1861 reg_00.raw = io_apic_read(apic, 0);
1862 spin_unlock_irqrestore(&ioapic_lock, flags);
1863
1864 old_id = mp_ioapics[apic].mp_apicid;
1865
1866 if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
1867 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
1868 apic, mp_ioapics[apic].mp_apicid);
1869 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
1870 reg_00.bits.ID);
1871 mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
1872 }
1873
1874 /*
1875 * Sanity check, is the ID really free? Every APIC in a
1876 * system must have a unique ID or we get lots of nice
1877 * 'stuck on smp_invalidate_needed IPI wait' messages.
1878 */
1879 if (check_apicid_used(phys_id_present_map,
1880 mp_ioapics[apic].mp_apicid)) {
1881 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
1882 apic, mp_ioapics[apic].mp_apicid);
1883 for (i = 0; i < get_physical_broadcast(); i++)
1884 if (!physid_isset(i, phys_id_present_map))
1885 break;
1886 if (i >= get_physical_broadcast())
1887 panic("Max APIC ID exceeded!\n");
1888 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
1889 i);
1890 physid_set(i, phys_id_present_map);
1891 mp_ioapics[apic].mp_apicid = i;
1892 } else {
1893 physid_mask_t tmp;
1894 tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
1895 apic_printk(APIC_VERBOSE, "Setting %d in the "
1896 "phys_id_present_map\n",
1897 mp_ioapics[apic].mp_apicid);
1898 physids_or(phys_id_present_map, phys_id_present_map, tmp);
1899 }
1900
1901
1902 /*
1903 * We need to adjust the IRQ routing table
1904 * if the ID changed.
1905 */
1906 if (old_id != mp_ioapics[apic].mp_apicid)
1907 for (i = 0; i < mp_irq_entries; i++)
1908 if (mp_irqs[i].mp_dstapic == old_id)
1909 mp_irqs[i].mp_dstapic
1910 = mp_ioapics[apic].mp_apicid;
1911
1912 /*
1913 * Read the right value from the MPC table and
1914 * write it into the ID register.
1915 */
1916 apic_printk(APIC_VERBOSE, KERN_INFO
1917 "...changing IO-APIC physical APIC ID to %d ...",
1918 mp_ioapics[apic].mp_apicid);
1919
1920 reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
1921 spin_lock_irqsave(&ioapic_lock, flags);
1922 io_apic_write(apic, 0, reg_00.raw);
1923 spin_unlock_irqrestore(&ioapic_lock, flags);
1924
1925 /*
1926 * Sanity check
1927 */
1928 spin_lock_irqsave(&ioapic_lock, flags);
1929 reg_00.raw = io_apic_read(apic, 0);
1930 spin_unlock_irqrestore(&ioapic_lock, flags);
1931 if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
1932 printk("could not set ID!\n");
1933 else
1934 apic_printk(APIC_VERBOSE, " ok.\n");
1935 }
1936}
1937#endif
1938
1939int no_timer_check __initdata;
1940
1941static int __init notimercheck(char *s)
1942{
1943 no_timer_check = 1;
1944 return 1;
1945}
1946__setup("no_timer_check", notimercheck);
1947
1461/* 1948/*
1462 * There is a nasty bug in some older SMP boards, their mptable lies 1949 * There is a nasty bug in some older SMP boards, their mptable lies
1463 * about the timer IRQ. We do the following to work around the situation: 1950 * about the timer IRQ. We do the following to work around the situation:
@@ -1471,6 +1958,9 @@ static int __init timer_irq_works(void)
1471 unsigned long t1 = jiffies; 1958 unsigned long t1 = jiffies;
1472 unsigned long flags; 1959 unsigned long flags;
1473 1960
1961 if (no_timer_check)
1962 return 1;
1963
1474 local_save_flags(flags); 1964 local_save_flags(flags);
1475 local_irq_enable(); 1965 local_irq_enable();
1476 /* Let ten ticks pass... */ 1966 /* Let ten ticks pass... */
@@ -1531,9 +2021,11 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
1531 return was_pending; 2021 return was_pending;
1532} 2022}
1533 2023
2024#ifdef CONFIG_X86_64
1534static int ioapic_retrigger_irq(unsigned int irq) 2025static int ioapic_retrigger_irq(unsigned int irq)
1535{ 2026{
1536 struct irq_cfg *cfg = &irq_cfg[irq]; 2027
2028 struct irq_cfg *cfg = irq_cfg(irq);
1537 unsigned long flags; 2029 unsigned long flags;
1538 2030
1539 spin_lock_irqsave(&vector_lock, flags); 2031 spin_lock_irqsave(&vector_lock, flags);
@@ -1542,6 +2034,14 @@ static int ioapic_retrigger_irq(unsigned int irq)
1542 2034
1543 return 1; 2035 return 1;
1544} 2036}
2037#else
2038static int ioapic_retrigger_irq(unsigned int irq)
2039{
2040 send_IPI_self(irq_cfg(irq)->vector);
2041
2042 return 1;
2043}
2044#endif
1545 2045
1546/* 2046/*
1547 * Level and edge triggered IO-APIC interrupts need different handling, 2047 * Level and edge triggered IO-APIC interrupts need different handling,
@@ -1580,11 +2080,11 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
1580 */ 2080 */
1581static void migrate_ioapic_irq(int irq, cpumask_t mask) 2081static void migrate_ioapic_irq(int irq, cpumask_t mask)
1582{ 2082{
1583 struct irq_cfg *cfg = irq_cfg + irq; 2083 struct irq_cfg *cfg;
1584 struct irq_desc *desc = irq_desc + irq; 2084 struct irq_desc *desc;
1585 cpumask_t tmp, cleanup_mask; 2085 cpumask_t tmp, cleanup_mask;
1586 struct irte irte; 2086 struct irte irte;
1587 int modify_ioapic_rte = desc->status & IRQ_LEVEL; 2087 int modify_ioapic_rte;
1588 unsigned int dest; 2088 unsigned int dest;
1589 unsigned long flags; 2089 unsigned long flags;
1590 2090
@@ -1598,9 +2098,12 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
1598 if (assign_irq_vector(irq, mask)) 2098 if (assign_irq_vector(irq, mask))
1599 return; 2099 return;
1600 2100
2101 cfg = irq_cfg(irq);
1601 cpus_and(tmp, cfg->domain, mask); 2102 cpus_and(tmp, cfg->domain, mask);
1602 dest = cpu_mask_to_apicid(tmp); 2103 dest = cpu_mask_to_apicid(tmp);
1603 2104
2105 desc = irq_to_desc(irq);
2106 modify_ioapic_rte = desc->status & IRQ_LEVEL;
1604 if (modify_ioapic_rte) { 2107 if (modify_ioapic_rte) {
1605 spin_lock_irqsave(&ioapic_lock, flags); 2108 spin_lock_irqsave(&ioapic_lock, flags);
1606 __target_IO_APIC_irq(irq, dest, cfg->vector); 2109 __target_IO_APIC_irq(irq, dest, cfg->vector);
@@ -1622,18 +2125,19 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
1622 cfg->move_in_progress = 0; 2125 cfg->move_in_progress = 0;
1623 } 2126 }
1624 2127
1625 irq_desc[irq].affinity = mask; 2128 desc->affinity = mask;
1626} 2129}
1627 2130
1628static int migrate_irq_remapped_level(int irq) 2131static int migrate_irq_remapped_level(int irq)
1629{ 2132{
1630 int ret = -1; 2133 int ret = -1;
2134 struct irq_desc *desc = irq_to_desc(irq);
1631 2135
1632 mask_IO_APIC_irq(irq); 2136 mask_IO_APIC_irq(irq);
1633 2137
1634 if (io_apic_level_ack_pending(irq)) { 2138 if (io_apic_level_ack_pending(irq)) {
1635 /* 2139 /*
1636 * Interrupt in progress. Migrating irq now will change the 2140 * Interrupt in progress. Migrating irq now will change the
1637 * vector information in the IO-APIC RTE and that will confuse 2141 * vector information in the IO-APIC RTE and that will confuse
1638 * the EOI broadcast performed by cpu. 2142 * the EOI broadcast performed by cpu.
1639 * So, delay the irq migration to the next instance. 2143 * So, delay the irq migration to the next instance.
@@ -1643,11 +2147,11 @@ static int migrate_irq_remapped_level(int irq)
1643 } 2147 }
1644 2148
1645 /* everthing is clear. we have right of way */ 2149 /* everthing is clear. we have right of way */
1646 migrate_ioapic_irq(irq, irq_desc[irq].pending_mask); 2150 migrate_ioapic_irq(irq, desc->pending_mask);
1647 2151
1648 ret = 0; 2152 ret = 0;
1649 irq_desc[irq].status &= ~IRQ_MOVE_PENDING; 2153 desc->status &= ~IRQ_MOVE_PENDING;
1650 cpus_clear(irq_desc[irq].pending_mask); 2154 cpus_clear(desc->pending_mask);
1651 2155
1652unmask: 2156unmask:
1653 unmask_IO_APIC_irq(irq); 2157 unmask_IO_APIC_irq(irq);
@@ -1656,10 +2160,10 @@ unmask:
1656 2160
1657static void ir_irq_migration(struct work_struct *work) 2161static void ir_irq_migration(struct work_struct *work)
1658{ 2162{
1659 int irq; 2163 unsigned int irq;
2164 struct irq_desc *desc;
1660 2165
1661 for (irq = 0; irq < NR_IRQS; irq++) { 2166 for_each_irq_desc(irq, desc) {
1662 struct irq_desc *desc = irq_desc + irq;
1663 if (desc->status & IRQ_MOVE_PENDING) { 2167 if (desc->status & IRQ_MOVE_PENDING) {
1664 unsigned long flags; 2168 unsigned long flags;
1665 2169
@@ -1671,8 +2175,7 @@ static void ir_irq_migration(struct work_struct *work)
1671 continue; 2175 continue;
1672 } 2176 }
1673 2177
1674 desc->chip->set_affinity(irq, 2178 desc->chip->set_affinity(irq, desc->pending_mask);
1675 irq_desc[irq].pending_mask);
1676 spin_unlock_irqrestore(&desc->lock, flags); 2179 spin_unlock_irqrestore(&desc->lock, flags);
1677 } 2180 }
1678 } 2181 }
@@ -1683,9 +2186,11 @@ static void ir_irq_migration(struct work_struct *work)
1683 */ 2186 */
1684static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) 2187static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
1685{ 2188{
1686 if (irq_desc[irq].status & IRQ_LEVEL) { 2189 struct irq_desc *desc = irq_to_desc(irq);
1687 irq_desc[irq].status |= IRQ_MOVE_PENDING; 2190
1688 irq_desc[irq].pending_mask = mask; 2191 if (desc->status & IRQ_LEVEL) {
2192 desc->status |= IRQ_MOVE_PENDING;
2193 desc->pending_mask = mask;
1689 migrate_irq_remapped_level(irq); 2194 migrate_irq_remapped_level(irq);
1690 return; 2195 return;
1691 } 2196 }
@@ -1698,7 +2203,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
1698{ 2203{
1699 unsigned vector, me; 2204 unsigned vector, me;
1700 ack_APIC_irq(); 2205 ack_APIC_irq();
2206#ifdef CONFIG_X86_64
1701 exit_idle(); 2207 exit_idle();
2208#endif
1702 irq_enter(); 2209 irq_enter();
1703 2210
1704 me = smp_processor_id(); 2211 me = smp_processor_id();
@@ -1707,11 +2214,12 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
1707 struct irq_desc *desc; 2214 struct irq_desc *desc;
1708 struct irq_cfg *cfg; 2215 struct irq_cfg *cfg;
1709 irq = __get_cpu_var(vector_irq)[vector]; 2216 irq = __get_cpu_var(vector_irq)[vector];
1710 if (irq >= NR_IRQS) 2217
2218 desc = irq_to_desc(irq);
2219 if (!desc)
1711 continue; 2220 continue;
1712 2221
1713 desc = irq_desc + irq; 2222 cfg = irq_cfg(irq);
1714 cfg = irq_cfg + irq;
1715 spin_lock(&desc->lock); 2223 spin_lock(&desc->lock);
1716 if (!cfg->move_cleanup_count) 2224 if (!cfg->move_cleanup_count)
1717 goto unlock; 2225 goto unlock;
@@ -1730,7 +2238,7 @@ unlock:
1730 2238
1731static void irq_complete_move(unsigned int irq) 2239static void irq_complete_move(unsigned int irq)
1732{ 2240{
1733 struct irq_cfg *cfg = irq_cfg + irq; 2241 struct irq_cfg *cfg = irq_cfg(irq);
1734 unsigned vector, me; 2242 unsigned vector, me;
1735 2243
1736 if (likely(!cfg->move_in_progress)) 2244 if (likely(!cfg->move_in_progress))
@@ -1769,19 +2277,50 @@ static void ack_apic_edge(unsigned int irq)
1769 ack_APIC_irq(); 2277 ack_APIC_irq();
1770} 2278}
1771 2279
2280atomic_t irq_mis_count;
2281
1772static void ack_apic_level(unsigned int irq) 2282static void ack_apic_level(unsigned int irq)
1773{ 2283{
2284#ifdef CONFIG_X86_32
2285 unsigned long v;
2286 int i;
2287#endif
1774 int do_unmask_irq = 0; 2288 int do_unmask_irq = 0;
1775 2289
1776 irq_complete_move(irq); 2290 irq_complete_move(irq);
1777#ifdef CONFIG_GENERIC_PENDING_IRQ 2291#ifdef CONFIG_GENERIC_PENDING_IRQ
1778 /* If we are moving the irq we need to mask it */ 2292 /* If we are moving the irq we need to mask it */
1779 if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { 2293 if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
1780 do_unmask_irq = 1; 2294 do_unmask_irq = 1;
1781 mask_IO_APIC_irq(irq); 2295 mask_IO_APIC_irq(irq);
1782 } 2296 }
1783#endif 2297#endif
1784 2298
2299#ifdef CONFIG_X86_32
2300 /*
2301 * It appears there is an erratum which affects at least version 0x11
2302 * of I/O APIC (that's the 82093AA and cores integrated into various
2303 * chipsets). Under certain conditions a level-triggered interrupt is
2304 * erroneously delivered as edge-triggered one but the respective IRR
2305 * bit gets set nevertheless. As a result the I/O unit expects an EOI
2306 * message but it will never arrive and further interrupts are blocked
2307 * from the source. The exact reason is so far unknown, but the
2308 * phenomenon was observed when two consecutive interrupt requests
2309 * from a given source get delivered to the same CPU and the source is
2310 * temporarily disabled in between.
2311 *
2312 * A workaround is to simulate an EOI message manually. We achieve it
2313 * by setting the trigger mode to edge and then to level when the edge
2314 * trigger mode gets detected in the TMR of a local APIC for a
2315 * level-triggered interrupt. We mask the source for the time of the
2316 * operation to prevent an edge-triggered interrupt escaping meanwhile.
2317 * The idea is from Manfred Spraul. --macro
2318 */
2319 i = irq_cfg(irq)->vector;
2320
2321 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
2322#endif
2323
1785 /* 2324 /*
1786 * We must acknowledge the irq before we move it or the acknowledge will 2325 * We must acknowledge the irq before we move it or the acknowledge will
1787 * not propagate properly. 2326 * not propagate properly.
@@ -1820,31 +2359,41 @@ static void ack_apic_level(unsigned int irq)
1820 move_masked_irq(irq); 2359 move_masked_irq(irq);
1821 unmask_IO_APIC_irq(irq); 2360 unmask_IO_APIC_irq(irq);
1822 } 2361 }
2362
2363#ifdef CONFIG_X86_32
2364 if (!(v & (1 << (i & 0x1f)))) {
2365 atomic_inc(&irq_mis_count);
2366 spin_lock(&ioapic_lock);
2367 __mask_and_edge_IO_APIC_irq(irq);
2368 __unmask_and_level_IO_APIC_irq(irq);
2369 spin_unlock(&ioapic_lock);
2370 }
2371#endif
1823} 2372}
1824 2373
1825static struct irq_chip ioapic_chip __read_mostly = { 2374static struct irq_chip ioapic_chip __read_mostly = {
1826 .name = "IO-APIC", 2375 .name = "IO-APIC",
1827 .startup = startup_ioapic_irq, 2376 .startup = startup_ioapic_irq,
1828 .mask = mask_IO_APIC_irq, 2377 .mask = mask_IO_APIC_irq,
1829 .unmask = unmask_IO_APIC_irq, 2378 .unmask = unmask_IO_APIC_irq,
1830 .ack = ack_apic_edge, 2379 .ack = ack_apic_edge,
1831 .eoi = ack_apic_level, 2380 .eoi = ack_apic_level,
1832#ifdef CONFIG_SMP 2381#ifdef CONFIG_SMP
1833 .set_affinity = set_ioapic_affinity_irq, 2382 .set_affinity = set_ioapic_affinity_irq,
1834#endif 2383#endif
1835 .retrigger = ioapic_retrigger_irq, 2384 .retrigger = ioapic_retrigger_irq,
1836}; 2385};
1837 2386
1838#ifdef CONFIG_INTR_REMAP 2387#ifdef CONFIG_INTR_REMAP
1839static struct irq_chip ir_ioapic_chip __read_mostly = { 2388static struct irq_chip ir_ioapic_chip __read_mostly = {
1840 .name = "IR-IO-APIC", 2389 .name = "IR-IO-APIC",
1841 .startup = startup_ioapic_irq, 2390 .startup = startup_ioapic_irq,
1842 .mask = mask_IO_APIC_irq, 2391 .mask = mask_IO_APIC_irq,
1843 .unmask = unmask_IO_APIC_irq, 2392 .unmask = unmask_IO_APIC_irq,
1844 .ack = ack_x2apic_edge, 2393 .ack = ack_x2apic_edge,
1845 .eoi = ack_x2apic_level, 2394 .eoi = ack_x2apic_level,
1846#ifdef CONFIG_SMP 2395#ifdef CONFIG_SMP
1847 .set_affinity = set_ir_ioapic_affinity_irq, 2396 .set_affinity = set_ir_ioapic_affinity_irq,
1848#endif 2397#endif
1849 .retrigger = ioapic_retrigger_irq, 2398 .retrigger = ioapic_retrigger_irq,
1850}; 2399};
@@ -1853,6 +2402,8 @@ static struct irq_chip ir_ioapic_chip __read_mostly = {
1853static inline void init_IO_APIC_traps(void) 2402static inline void init_IO_APIC_traps(void)
1854{ 2403{
1855 int irq; 2404 int irq;
2405 struct irq_desc *desc;
2406 struct irq_cfg *cfg;
1856 2407
1857 /* 2408 /*
1858 * NOTE! The local APIC isn't very good at handling 2409 * NOTE! The local APIC isn't very good at handling
@@ -1865,8 +2416,8 @@ static inline void init_IO_APIC_traps(void)
1865 * Also, we've got to be careful not to trash gate 2416 * Also, we've got to be careful not to trash gate
1866 * 0x80, because int 0x80 is hm, kind of importantish. ;) 2417 * 0x80, because int 0x80 is hm, kind of importantish. ;)
1867 */ 2418 */
1868 for (irq = 0; irq < NR_IRQS ; irq++) { 2419 for_each_irq_cfg(irq, cfg) {
1869 if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) { 2420 if (IO_APIC_IRQ(irq) && !cfg->vector) {
1870 /* 2421 /*
1871 * Hmm.. We don't have an entry for this, 2422 * Hmm.. We don't have an entry for this,
1872 * so default to an old-fashioned 8259 2423 * so default to an old-fashioned 8259
@@ -1874,27 +2425,33 @@ static inline void init_IO_APIC_traps(void)
1874 */ 2425 */
1875 if (irq < 16) 2426 if (irq < 16)
1876 make_8259A_irq(irq); 2427 make_8259A_irq(irq);
1877 else 2428 else {
2429 desc = irq_to_desc(irq);
1878 /* Strange. Oh, well.. */ 2430 /* Strange. Oh, well.. */
1879 irq_desc[irq].chip = &no_irq_chip; 2431 desc->chip = &no_irq_chip;
2432 }
1880 } 2433 }
1881 } 2434 }
1882} 2435}
1883 2436
1884static void unmask_lapic_irq(unsigned int irq) 2437/*
2438 * The local APIC irq-chip implementation:
2439 */
2440
2441static void mask_lapic_irq(unsigned int irq)
1885{ 2442{
1886 unsigned long v; 2443 unsigned long v;
1887 2444
1888 v = apic_read(APIC_LVT0); 2445 v = apic_read(APIC_LVT0);
1889 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); 2446 apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
1890} 2447}
1891 2448
1892static void mask_lapic_irq(unsigned int irq) 2449static void unmask_lapic_irq(unsigned int irq)
1893{ 2450{
1894 unsigned long v; 2451 unsigned long v;
1895 2452
1896 v = apic_read(APIC_LVT0); 2453 v = apic_read(APIC_LVT0);
1897 apic_write(APIC_LVT0, v | APIC_LVT_MASKED); 2454 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
1898} 2455}
1899 2456
1900static void ack_lapic_irq (unsigned int irq) 2457static void ack_lapic_irq (unsigned int irq)
@@ -1911,7 +2468,10 @@ static struct irq_chip lapic_chip __read_mostly = {
1911 2468
1912static void lapic_register_intr(int irq) 2469static void lapic_register_intr(int irq)
1913{ 2470{
1914 irq_desc[irq].status &= ~IRQ_LEVEL; 2471 struct irq_desc *desc;
2472
2473 desc = irq_to_desc(irq);
2474 desc->status &= ~IRQ_LEVEL;
1915 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, 2475 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
1916 "edge"); 2476 "edge");
1917} 2477}
@@ -1919,19 +2479,19 @@ static void lapic_register_intr(int irq)
1919static void __init setup_nmi(void) 2479static void __init setup_nmi(void)
1920{ 2480{
1921 /* 2481 /*
1922 * Dirty trick to enable the NMI watchdog ... 2482 * Dirty trick to enable the NMI watchdog ...
1923 * We put the 8259A master into AEOI mode and 2483 * We put the 8259A master into AEOI mode and
1924 * unmask on all local APICs LVT0 as NMI. 2484 * unmask on all local APICs LVT0 as NMI.
1925 * 2485 *
1926 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') 2486 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
1927 * is from Maciej W. Rozycki - so we do not have to EOI from 2487 * is from Maciej W. Rozycki - so we do not have to EOI from
1928 * the NMI handler or the timer interrupt. 2488 * the NMI handler or the timer interrupt.
1929 */ 2489 */
1930 printk(KERN_INFO "activating NMI Watchdog ..."); 2490 apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
1931 2491
1932 enable_NMI_through_LVT0(); 2492 enable_NMI_through_LVT0();
1933 2493
1934 printk(" done.\n"); 2494 apic_printk(APIC_VERBOSE, " done.\n");
1935} 2495}
1936 2496
1937/* 2497/*
@@ -1948,12 +2508,17 @@ static inline void __init unlock_ExtINT_logic(void)
1948 unsigned char save_control, save_freq_select; 2508 unsigned char save_control, save_freq_select;
1949 2509
1950 pin = find_isa_irq_pin(8, mp_INT); 2510 pin = find_isa_irq_pin(8, mp_INT);
2511 if (pin == -1) {
2512 WARN_ON_ONCE(1);
2513 return;
2514 }
1951 apic = find_isa_irq_apic(8, mp_INT); 2515 apic = find_isa_irq_apic(8, mp_INT);
1952 if (pin == -1) 2516 if (apic == -1) {
2517 WARN_ON_ONCE(1);
1953 return; 2518 return;
2519 }
1954 2520
1955 entry0 = ioapic_read_entry(apic, pin); 2521 entry0 = ioapic_read_entry(apic, pin);
1956
1957 clear_IO_APIC_pin(apic, pin); 2522 clear_IO_APIC_pin(apic, pin);
1958 2523
1959 memset(&entry1, 0, sizeof(entry1)); 2524 memset(&entry1, 0, sizeof(entry1));
@@ -1988,23 +2553,38 @@ static inline void __init unlock_ExtINT_logic(void)
1988 ioapic_write_entry(apic, pin, entry0); 2553 ioapic_write_entry(apic, pin, entry0);
1989} 2554}
1990 2555
2556static int disable_timer_pin_1 __initdata;
2557/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
2558static int __init disable_timer_pin_setup(char *arg)
2559{
2560 disable_timer_pin_1 = 1;
2561 return 0;
2562}
2563early_param("disable_timer_pin_1", disable_timer_pin_setup);
2564
2565int timer_through_8259 __initdata;
2566
1991/* 2567/*
1992 * This code may look a bit paranoid, but it's supposed to cooperate with 2568 * This code may look a bit paranoid, but it's supposed to cooperate with
1993 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ 2569 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
1994 * is so screwy. Thanks to Brian Perkins for testing/hacking this beast 2570 * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
1995 * fanatically on his truly buggy board. 2571 * fanatically on his truly buggy board.
1996 * 2572 *
1997 * FIXME: really need to revamp this for modern platforms only. 2573 * FIXME: really need to revamp this for all platforms.
1998 */ 2574 */
1999static inline void __init check_timer(void) 2575static inline void __init check_timer(void)
2000{ 2576{
2001 struct irq_cfg *cfg = irq_cfg + 0; 2577 struct irq_cfg *cfg = irq_cfg(0);
2002 int apic1, pin1, apic2, pin2; 2578 int apic1, pin1, apic2, pin2;
2003 unsigned long flags; 2579 unsigned long flags;
2580 unsigned int ver;
2004 int no_pin1 = 0; 2581 int no_pin1 = 0;
2005 2582
2006 local_irq_save(flags); 2583 local_irq_save(flags);
2007 2584
2585 ver = apic_read(APIC_LVR);
2586 ver = GET_APIC_VERSION(ver);
2587
2008 /* 2588 /*
2009 * get/set the timer IRQ vector: 2589 * get/set the timer IRQ vector:
2010 */ 2590 */
@@ -2013,10 +2593,18 @@ static inline void __init check_timer(void)
2013 2593
2014 /* 2594 /*
2015 * As IRQ0 is to be enabled in the 8259A, the virtual 2595 * As IRQ0 is to be enabled in the 8259A, the virtual
2016 * wire has to be disabled in the local APIC. 2596 * wire has to be disabled in the local APIC. Also
2597 * timer interrupts need to be acknowledged manually in
2598 * the 8259A for the i82489DX when using the NMI
2599 * watchdog as that APIC treats NMIs as level-triggered.
2600 * The AEOI mode will finish them in the 8259A
2601 * automatically.
2017 */ 2602 */
2018 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 2603 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2019 init_8259A(1); 2604 init_8259A(1);
2605#ifdef CONFIG_X86_32
2606 timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
2607#endif
2020 2608
2021 pin1 = find_isa_irq_pin(0, mp_INT); 2609 pin1 = find_isa_irq_pin(0, mp_INT);
2022 apic1 = find_isa_irq_apic(0, mp_INT); 2610 apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2035,8 +2623,10 @@ static inline void __init check_timer(void)
2035 * 8259A. 2623 * 8259A.
2036 */ 2624 */
2037 if (pin1 == -1) { 2625 if (pin1 == -1) {
2626#ifdef CONFIG_INTR_REMAP
2038 if (intr_remapping_enabled) 2627 if (intr_remapping_enabled)
2039 panic("BIOS bug: timer not connected to IO-APIC"); 2628 panic("BIOS bug: timer not connected to IO-APIC");
2629#endif
2040 pin1 = pin2; 2630 pin1 = pin2;
2041 apic1 = apic2; 2631 apic1 = apic2;
2042 no_pin1 = 1; 2632 no_pin1 = 1;
@@ -2054,7 +2644,7 @@ static inline void __init check_timer(void)
2054 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); 2644 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
2055 } 2645 }
2056 unmask_IO_APIC_irq(0); 2646 unmask_IO_APIC_irq(0);
2057 if (!no_timer_check && timer_irq_works()) { 2647 if (timer_irq_works()) {
2058 if (nmi_watchdog == NMI_IO_APIC) { 2648 if (nmi_watchdog == NMI_IO_APIC) {
2059 setup_nmi(); 2649 setup_nmi();
2060 enable_8259A_irq(0); 2650 enable_8259A_irq(0);
@@ -2063,8 +2653,10 @@ static inline void __init check_timer(void)
2063 clear_IO_APIC_pin(0, pin1); 2653 clear_IO_APIC_pin(0, pin1);
2064 goto out; 2654 goto out;
2065 } 2655 }
2656#ifdef CONFIG_INTR_REMAP
2066 if (intr_remapping_enabled) 2657 if (intr_remapping_enabled)
2067 panic("timer doesn't work through Interrupt-remapped IO-APIC"); 2658 panic("timer doesn't work through Interrupt-remapped IO-APIC");
2659#endif
2068 clear_IO_APIC_pin(apic1, pin1); 2660 clear_IO_APIC_pin(apic1, pin1);
2069 if (!no_pin1) 2661 if (!no_pin1)
2070 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " 2662 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -2104,6 +2696,9 @@ static inline void __init check_timer(void)
2104 "through the IO-APIC - disabling NMI Watchdog!\n"); 2696 "through the IO-APIC - disabling NMI Watchdog!\n");
2105 nmi_watchdog = NMI_NONE; 2697 nmi_watchdog = NMI_NONE;
2106 } 2698 }
2699#ifdef CONFIG_X86_32
2700 timer_ack = 0;
2701#endif
2107 2702
2108 apic_printk(APIC_QUIET, KERN_INFO 2703 apic_printk(APIC_QUIET, KERN_INFO
2109 "...trying to set up timer as Virtual Wire IRQ...\n"); 2704 "...trying to set up timer as Virtual Wire IRQ...\n");
@@ -2140,13 +2735,6 @@ out:
2140 local_irq_restore(flags); 2735 local_irq_restore(flags);
2141} 2736}
2142 2737
2143static int __init notimercheck(char *s)
2144{
2145 no_timer_check = 1;
2146 return 1;
2147}
2148__setup("no_timer_check", notimercheck);
2149
2150/* 2738/*
2151 * Traditionally ISA IRQ2 is the cascade IRQ, and is not available 2739 * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
2152 * to devices. However there may be an I/O APIC pin available for 2740 * to devices. However there may be an I/O APIC pin available for
@@ -2164,25 +2752,49 @@ __setup("no_timer_check", notimercheck);
2164 * the I/O APIC in all cases now. No actual device should request 2752 * the I/O APIC in all cases now. No actual device should request
2165 * it anyway. --macro 2753 * it anyway. --macro
2166 */ 2754 */
2167#define PIC_IRQS (1<<2) 2755#define PIC_IRQS (1 << PIC_CASCADE_IR)
2168 2756
2169void __init setup_IO_APIC(void) 2757void __init setup_IO_APIC(void)
2170{ 2758{
2171 2759
2760#ifdef CONFIG_X86_32
2761 enable_IO_APIC();
2762#else
2172 /* 2763 /*
2173 * calling enable_IO_APIC() is moved to setup_local_APIC for BP 2764 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
2174 */ 2765 */
2766#endif
2175 2767
2176 io_apic_irqs = ~PIC_IRQS; 2768 io_apic_irqs = ~PIC_IRQS;
2177 2769
2178 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); 2770 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
2179 2771 /*
2772 * Set up IO-APIC IRQ routing.
2773 */
2774#ifdef CONFIG_X86_32
2775 if (!acpi_ioapic)
2776 setup_ioapic_ids_from_mpc();
2777#endif
2180 sync_Arb_IDs(); 2778 sync_Arb_IDs();
2181 setup_IO_APIC_irqs(); 2779 setup_IO_APIC_irqs();
2182 init_IO_APIC_traps(); 2780 init_IO_APIC_traps();
2183 check_timer(); 2781 check_timer();
2184} 2782}
2185 2783
2784/*
2785 * Called after all the initialization is done. If we didnt find any
2786 * APIC bugs then we can allow the modify fast path
2787 */
2788
2789static int __init io_apic_bug_finalize(void)
2790{
2791 if (sis_apic_bug == -1)
2792 sis_apic_bug = 0;
2793 return 0;
2794}
2795
2796late_initcall(io_apic_bug_finalize);
2797
2186struct sysfs_ioapic_data { 2798struct sysfs_ioapic_data {
2187 struct sys_device dev; 2799 struct sys_device dev;
2188 struct IO_APIC_route_entry entry[0]; 2800 struct IO_APIC_route_entry entry[0];
@@ -2270,32 +2882,51 @@ device_initcall(ioapic_init_sysfs);
2270/* 2882/*
2271 * Dynamic irq allocate and deallocation 2883 * Dynamic irq allocate and deallocation
2272 */ 2884 */
2273int create_irq(void) 2885unsigned int create_irq_nr(unsigned int irq_want)
2274{ 2886{
2275 /* Allocate an unused irq */ 2887 /* Allocate an unused irq */
2276 int irq; 2888 unsigned int irq;
2277 int new; 2889 unsigned int new;
2278 unsigned long flags; 2890 unsigned long flags;
2891 struct irq_cfg *cfg_new;
2892
2893 irq_want = nr_irqs - 1;
2279 2894
2280 irq = -ENOSPC; 2895 irq = 0;
2281 spin_lock_irqsave(&vector_lock, flags); 2896 spin_lock_irqsave(&vector_lock, flags);
2282 for (new = (NR_IRQS - 1); new >= 0; new--) { 2897 for (new = irq_want; new > 0; new--) {
2283 if (platform_legacy_irq(new)) 2898 if (platform_legacy_irq(new))
2284 continue; 2899 continue;
2285 if (irq_cfg[new].vector != 0) 2900 cfg_new = irq_cfg(new);
2901 if (cfg_new && cfg_new->vector != 0)
2286 continue; 2902 continue;
2903 /* check if need to create one */
2904 if (!cfg_new)
2905 cfg_new = irq_cfg_alloc(new);
2287 if (__assign_irq_vector(new, TARGET_CPUS) == 0) 2906 if (__assign_irq_vector(new, TARGET_CPUS) == 0)
2288 irq = new; 2907 irq = new;
2289 break; 2908 break;
2290 } 2909 }
2291 spin_unlock_irqrestore(&vector_lock, flags); 2910 spin_unlock_irqrestore(&vector_lock, flags);
2292 2911
2293 if (irq >= 0) { 2912 if (irq > 0) {
2294 dynamic_irq_init(irq); 2913 dynamic_irq_init(irq);
2295 } 2914 }
2296 return irq; 2915 return irq;
2297} 2916}
2298 2917
2918int create_irq(void)
2919{
2920 int irq;
2921
2922 irq = create_irq_nr(nr_irqs - 1);
2923
2924 if (irq == 0)
2925 irq = -1;
2926
2927 return irq;
2928}
2929
2299void destroy_irq(unsigned int irq) 2930void destroy_irq(unsigned int irq)
2300{ 2931{
2301 unsigned long flags; 2932 unsigned long flags;
@@ -2316,7 +2947,7 @@ void destroy_irq(unsigned int irq)
2316#ifdef CONFIG_PCI_MSI 2947#ifdef CONFIG_PCI_MSI
2317static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) 2948static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
2318{ 2949{
2319 struct irq_cfg *cfg = irq_cfg + irq; 2950 struct irq_cfg *cfg;
2320 int err; 2951 int err;
2321 unsigned dest; 2952 unsigned dest;
2322 cpumask_t tmp; 2953 cpumask_t tmp;
@@ -2326,6 +2957,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
2326 if (err) 2957 if (err)
2327 return err; 2958 return err;
2328 2959
2960 cfg = irq_cfg(irq);
2329 cpus_and(tmp, cfg->domain, tmp); 2961 cpus_and(tmp, cfg->domain, tmp);
2330 dest = cpu_mask_to_apicid(tmp); 2962 dest = cpu_mask_to_apicid(tmp);
2331 2963
@@ -2383,10 +3015,11 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
2383#ifdef CONFIG_SMP 3015#ifdef CONFIG_SMP
2384static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) 3016static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2385{ 3017{
2386 struct irq_cfg *cfg = irq_cfg + irq; 3018 struct irq_cfg *cfg;
2387 struct msi_msg msg; 3019 struct msi_msg msg;
2388 unsigned int dest; 3020 unsigned int dest;
2389 cpumask_t tmp; 3021 cpumask_t tmp;
3022 struct irq_desc *desc;
2390 3023
2391 cpus_and(tmp, mask, cpu_online_map); 3024 cpus_and(tmp, mask, cpu_online_map);
2392 if (cpus_empty(tmp)) 3025 if (cpus_empty(tmp))
@@ -2395,6 +3028,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2395 if (assign_irq_vector(irq, mask)) 3028 if (assign_irq_vector(irq, mask))
2396 return; 3029 return;
2397 3030
3031 cfg = irq_cfg(irq);
2398 cpus_and(tmp, cfg->domain, mask); 3032 cpus_and(tmp, cfg->domain, mask);
2399 dest = cpu_mask_to_apicid(tmp); 3033 dest = cpu_mask_to_apicid(tmp);
2400 3034
@@ -2406,7 +3040,8 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2406 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3040 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
2407 3041
2408 write_msi_msg(irq, &msg); 3042 write_msi_msg(irq, &msg);
2409 irq_desc[irq].affinity = mask; 3043 desc = irq_to_desc(irq);
3044 desc->affinity = mask;
2410} 3045}
2411 3046
2412#ifdef CONFIG_INTR_REMAP 3047#ifdef CONFIG_INTR_REMAP
@@ -2416,10 +3051,11 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2416 */ 3051 */
2417static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) 3052static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2418{ 3053{
2419 struct irq_cfg *cfg = irq_cfg + irq; 3054 struct irq_cfg *cfg;
2420 unsigned int dest; 3055 unsigned int dest;
2421 cpumask_t tmp, cleanup_mask; 3056 cpumask_t tmp, cleanup_mask;
2422 struct irte irte; 3057 struct irte irte;
3058 struct irq_desc *desc;
2423 3059
2424 cpus_and(tmp, mask, cpu_online_map); 3060 cpus_and(tmp, mask, cpu_online_map);
2425 if (cpus_empty(tmp)) 3061 if (cpus_empty(tmp))
@@ -2431,6 +3067,7 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2431 if (assign_irq_vector(irq, mask)) 3067 if (assign_irq_vector(irq, mask))
2432 return; 3068 return;
2433 3069
3070 cfg = irq_cfg(irq);
2434 cpus_and(tmp, cfg->domain, mask); 3071 cpus_and(tmp, cfg->domain, mask);
2435 dest = cpu_mask_to_apicid(tmp); 3072 dest = cpu_mask_to_apicid(tmp);
2436 3073
@@ -2454,7 +3091,8 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2454 cfg->move_in_progress = 0; 3091 cfg->move_in_progress = 0;
2455 } 3092 }
2456 3093
2457 irq_desc[irq].affinity = mask; 3094 desc = irq_to_desc(irq);
3095 desc->affinity = mask;
2458} 3096}
2459#endif 3097#endif
2460#endif /* CONFIG_SMP */ 3098#endif /* CONFIG_SMP */
@@ -2507,7 +3145,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
2507 if (index < 0) { 3145 if (index < 0) {
2508 printk(KERN_ERR 3146 printk(KERN_ERR
2509 "Unable to allocate %d IRTE for PCI %s\n", nvec, 3147 "Unable to allocate %d IRTE for PCI %s\n", nvec,
2510 pci_name(dev)); 3148 pci_name(dev));
2511 return -ENOSPC; 3149 return -ENOSPC;
2512 } 3150 }
2513 return index; 3151 return index;
@@ -2528,7 +3166,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
2528 3166
2529#ifdef CONFIG_INTR_REMAP 3167#ifdef CONFIG_INTR_REMAP
2530 if (irq_remapped(irq)) { 3168 if (irq_remapped(irq)) {
2531 struct irq_desc *desc = irq_desc + irq; 3169 struct irq_desc *desc = irq_to_desc(irq);
2532 /* 3170 /*
2533 * irq migration in process context 3171 * irq migration in process context
2534 */ 3172 */
@@ -2538,16 +3176,34 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
2538#endif 3176#endif
2539 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); 3177 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
2540 3178
3179 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
3180
2541 return 0; 3181 return 0;
2542} 3182}
2543 3183
3184static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
3185{
3186 unsigned int irq;
3187
3188 irq = dev->bus->number;
3189 irq <<= 8;
3190 irq |= dev->devfn;
3191 irq <<= 12;
3192
3193 return irq;
3194}
3195
2544int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) 3196int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
2545{ 3197{
2546 int irq, ret; 3198 unsigned int irq;
3199 int ret;
3200 unsigned int irq_want;
2547 3201
2548 irq = create_irq(); 3202 irq_want = build_irq_for_pci_dev(dev) + 0x100;
2549 if (irq < 0) 3203
2550 return irq; 3204 irq = create_irq_nr(irq_want);
3205 if (irq == 0)
3206 return -1;
2551 3207
2552#ifdef CONFIG_INTR_REMAP 3208#ifdef CONFIG_INTR_REMAP
2553 if (!intr_remapping_enabled) 3209 if (!intr_remapping_enabled)
@@ -2574,18 +3230,22 @@ error:
2574 3230
2575int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 3231int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
2576{ 3232{
2577 int irq, ret, sub_handle; 3233 unsigned int irq;
3234 int ret, sub_handle;
2578 struct msi_desc *desc; 3235 struct msi_desc *desc;
3236 unsigned int irq_want;
3237
2579#ifdef CONFIG_INTR_REMAP 3238#ifdef CONFIG_INTR_REMAP
2580 struct intel_iommu *iommu = 0; 3239 struct intel_iommu *iommu = 0;
2581 int index = 0; 3240 int index = 0;
2582#endif 3241#endif
2583 3242
3243 irq_want = build_irq_for_pci_dev(dev) + 0x100;
2584 sub_handle = 0; 3244 sub_handle = 0;
2585 list_for_each_entry(desc, &dev->msi_list, list) { 3245 list_for_each_entry(desc, &dev->msi_list, list) {
2586 irq = create_irq(); 3246 irq = create_irq_nr(irq_want--);
2587 if (irq < 0) 3247 if (irq == 0)
2588 return irq; 3248 return -1;
2589#ifdef CONFIG_INTR_REMAP 3249#ifdef CONFIG_INTR_REMAP
2590 if (!intr_remapping_enabled) 3250 if (!intr_remapping_enabled)
2591 goto no_ir; 3251 goto no_ir;
@@ -2636,10 +3296,11 @@ void arch_teardown_msi_irq(unsigned int irq)
2636#ifdef CONFIG_SMP 3296#ifdef CONFIG_SMP
2637static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) 3297static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
2638{ 3298{
2639 struct irq_cfg *cfg = irq_cfg + irq; 3299 struct irq_cfg *cfg;
2640 struct msi_msg msg; 3300 struct msi_msg msg;
2641 unsigned int dest; 3301 unsigned int dest;
2642 cpumask_t tmp; 3302 cpumask_t tmp;
3303 struct irq_desc *desc;
2643 3304
2644 cpus_and(tmp, mask, cpu_online_map); 3305 cpus_and(tmp, mask, cpu_online_map);
2645 if (cpus_empty(tmp)) 3306 if (cpus_empty(tmp))
@@ -2648,6 +3309,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
2648 if (assign_irq_vector(irq, mask)) 3309 if (assign_irq_vector(irq, mask))
2649 return; 3310 return;
2650 3311
3312 cfg = irq_cfg(irq);
2651 cpus_and(tmp, cfg->domain, mask); 3313 cpus_and(tmp, cfg->domain, mask);
2652 dest = cpu_mask_to_apicid(tmp); 3314 dest = cpu_mask_to_apicid(tmp);
2653 3315
@@ -2659,7 +3321,8 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
2659 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3321 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
2660 3322
2661 dmar_msi_write(irq, &msg); 3323 dmar_msi_write(irq, &msg);
2662 irq_desc[irq].affinity = mask; 3324 desc = irq_to_desc(irq);
3325 desc->affinity = mask;
2663} 3326}
2664#endif /* CONFIG_SMP */ 3327#endif /* CONFIG_SMP */
2665 3328
@@ -2689,6 +3352,69 @@ int arch_setup_dmar_msi(unsigned int irq)
2689} 3352}
2690#endif 3353#endif
2691 3354
3355#ifdef CONFIG_HPET_TIMER
3356
3357#ifdef CONFIG_SMP
3358static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
3359{
3360 struct irq_cfg *cfg;
3361 struct irq_desc *desc;
3362 struct msi_msg msg;
3363 unsigned int dest;
3364 cpumask_t tmp;
3365
3366 cpus_and(tmp, mask, cpu_online_map);
3367 if (cpus_empty(tmp))
3368 return;
3369
3370 if (assign_irq_vector(irq, mask))
3371 return;
3372
3373 cfg = irq_cfg(irq);
3374 cpus_and(tmp, cfg->domain, mask);
3375 dest = cpu_mask_to_apicid(tmp);
3376
3377 hpet_msi_read(irq, &msg);
3378
3379 msg.data &= ~MSI_DATA_VECTOR_MASK;
3380 msg.data |= MSI_DATA_VECTOR(cfg->vector);
3381 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3382 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3383
3384 hpet_msi_write(irq, &msg);
3385 desc = irq_to_desc(irq);
3386 desc->affinity = mask;
3387}
3388#endif /* CONFIG_SMP */
3389
3390struct irq_chip hpet_msi_type = {
3391 .name = "HPET_MSI",
3392 .unmask = hpet_msi_unmask,
3393 .mask = hpet_msi_mask,
3394 .ack = ack_apic_edge,
3395#ifdef CONFIG_SMP
3396 .set_affinity = hpet_msi_set_affinity,
3397#endif
3398 .retrigger = ioapic_retrigger_irq,
3399};
3400
3401int arch_setup_hpet_msi(unsigned int irq)
3402{
3403 int ret;
3404 struct msi_msg msg;
3405
3406 ret = msi_compose_msg(NULL, irq, &msg);
3407 if (ret < 0)
3408 return ret;
3409
3410 hpet_msi_write(irq, &msg);
3411 set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
3412 "edge");
3413
3414 return 0;
3415}
3416#endif
3417
2692#endif /* CONFIG_PCI_MSI */ 3418#endif /* CONFIG_PCI_MSI */
2693/* 3419/*
2694 * Hypertransport interrupt support 3420 * Hypertransport interrupt support
@@ -2713,9 +3439,10 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
2713 3439
2714static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) 3440static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
2715{ 3441{
2716 struct irq_cfg *cfg = irq_cfg + irq; 3442 struct irq_cfg *cfg;
2717 unsigned int dest; 3443 unsigned int dest;
2718 cpumask_t tmp; 3444 cpumask_t tmp;
3445 struct irq_desc *desc;
2719 3446
2720 cpus_and(tmp, mask, cpu_online_map); 3447 cpus_and(tmp, mask, cpu_online_map);
2721 if (cpus_empty(tmp)) 3448 if (cpus_empty(tmp))
@@ -2724,11 +3451,13 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
2724 if (assign_irq_vector(irq, mask)) 3451 if (assign_irq_vector(irq, mask))
2725 return; 3452 return;
2726 3453
3454 cfg = irq_cfg(irq);
2727 cpus_and(tmp, cfg->domain, mask); 3455 cpus_and(tmp, cfg->domain, mask);
2728 dest = cpu_mask_to_apicid(tmp); 3456 dest = cpu_mask_to_apicid(tmp);
2729 3457
2730 target_ht_irq(irq, dest, cfg->vector); 3458 target_ht_irq(irq, dest, cfg->vector);
2731 irq_desc[irq].affinity = mask; 3459 desc = irq_to_desc(irq);
3460 desc->affinity = mask;
2732} 3461}
2733#endif 3462#endif
2734 3463
@@ -2745,7 +3474,7 @@ static struct irq_chip ht_irq_chip = {
2745 3474
2746int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) 3475int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
2747{ 3476{
2748 struct irq_cfg *cfg = irq_cfg + irq; 3477 struct irq_cfg *cfg;
2749 int err; 3478 int err;
2750 cpumask_t tmp; 3479 cpumask_t tmp;
2751 3480
@@ -2755,6 +3484,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
2755 struct ht_irq_msg msg; 3484 struct ht_irq_msg msg;
2756 unsigned dest; 3485 unsigned dest;
2757 3486
3487 cfg = irq_cfg(irq);
2758 cpus_and(tmp, cfg->domain, tmp); 3488 cpus_and(tmp, cfg->domain, tmp);
2759 dest = cpu_mask_to_apicid(tmp); 3489 dest = cpu_mask_to_apicid(tmp);
2760 3490
@@ -2777,20 +3507,196 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
2777 3507
2778 set_irq_chip_and_handler_name(irq, &ht_irq_chip, 3508 set_irq_chip_and_handler_name(irq, &ht_irq_chip,
2779 handle_edge_irq, "edge"); 3509 handle_edge_irq, "edge");
3510
3511 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
2780 } 3512 }
2781 return err; 3513 return err;
2782} 3514}
2783#endif /* CONFIG_HT_IRQ */ 3515#endif /* CONFIG_HT_IRQ */
2784 3516
3517#ifdef CONFIG_X86_64
3518/*
3519 * Re-target the irq to the specified CPU and enable the specified MMR located
3520 * on the specified blade to allow the sending of MSIs to the specified CPU.
3521 */
3522int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3523 unsigned long mmr_offset)
3524{
3525 const cpumask_t *eligible_cpu = get_cpu_mask(cpu);
3526 struct irq_cfg *cfg;
3527 int mmr_pnode;
3528 unsigned long mmr_value;
3529 struct uv_IO_APIC_route_entry *entry;
3530 unsigned long flags;
3531 int err;
3532
3533 err = assign_irq_vector(irq, *eligible_cpu);
3534 if (err != 0)
3535 return err;
3536
3537 spin_lock_irqsave(&vector_lock, flags);
3538 set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
3539 irq_name);
3540 spin_unlock_irqrestore(&vector_lock, flags);
3541
3542 cfg = irq_cfg(irq);
3543
3544 mmr_value = 0;
3545 entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
3546 BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
3547
3548 entry->vector = cfg->vector;
3549 entry->delivery_mode = INT_DELIVERY_MODE;
3550 entry->dest_mode = INT_DEST_MODE;
3551 entry->polarity = 0;
3552 entry->trigger = 0;
3553 entry->mask = 0;
3554 entry->dest = cpu_mask_to_apicid(*eligible_cpu);
3555
3556 mmr_pnode = uv_blade_to_pnode(mmr_blade);
3557 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
3558
3559 return irq;
3560}
3561
3562/*
3563 * Disable the specified MMR located on the specified blade so that MSIs are
3564 * longer allowed to be sent.
3565 */
3566void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
3567{
3568 unsigned long mmr_value;
3569 struct uv_IO_APIC_route_entry *entry;
3570 int mmr_pnode;
3571
3572 mmr_value = 0;
3573 entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
3574 BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
3575
3576 entry->mask = 1;
3577
3578 mmr_pnode = uv_blade_to_pnode(mmr_blade);
3579 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
3580}
3581#endif /* CONFIG_X86_64 */
3582
3583int __init io_apic_get_redir_entries (int ioapic)
3584{
3585 union IO_APIC_reg_01 reg_01;
3586 unsigned long flags;
3587
3588 spin_lock_irqsave(&ioapic_lock, flags);
3589 reg_01.raw = io_apic_read(ioapic, 1);
3590 spin_unlock_irqrestore(&ioapic_lock, flags);
3591
3592 return reg_01.bits.entries;
3593}
3594
3595int __init probe_nr_irqs(void)
3596{
3597 int idx;
3598 int nr = 0;
3599#ifndef CONFIG_XEN
3600 int nr_min = 32;
3601#else
3602 int nr_min = NR_IRQS;
3603#endif
3604
3605 for (idx = 0; idx < nr_ioapics; idx++)
3606 nr += io_apic_get_redir_entries(idx) + 1;
3607
3608 /* double it for hotplug and msi and nmi */
3609 nr <<= 1;
3610
3611 /* something wrong ? */
3612 if (nr < nr_min)
3613 nr = nr_min;
3614
3615 return nr;
3616}
3617
2785/* -------------------------------------------------------------------------- 3618/* --------------------------------------------------------------------------
2786 ACPI-based IOAPIC Configuration 3619 ACPI-based IOAPIC Configuration
2787 -------------------------------------------------------------------------- */ 3620 -------------------------------------------------------------------------- */
2788 3621
2789#ifdef CONFIG_ACPI 3622#ifdef CONFIG_ACPI
2790 3623
2791#define IO_APIC_MAX_ID 0xFE 3624#ifdef CONFIG_X86_32
3625int __init io_apic_get_unique_id(int ioapic, int apic_id)
3626{
3627 union IO_APIC_reg_00 reg_00;
3628 static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
3629 physid_mask_t tmp;
3630 unsigned long flags;
3631 int i = 0;
2792 3632
2793int __init io_apic_get_redir_entries (int ioapic) 3633 /*
3634 * The P4 platform supports up to 256 APIC IDs on two separate APIC
3635 * buses (one for LAPICs, one for IOAPICs), where predecessors only
3636 * supports up to 16 on one shared APIC bus.
3637 *
3638 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
3639 * advantage of new APIC bus architecture.
3640 */
3641
3642 if (physids_empty(apic_id_map))
3643 apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
3644
3645 spin_lock_irqsave(&ioapic_lock, flags);
3646 reg_00.raw = io_apic_read(ioapic, 0);
3647 spin_unlock_irqrestore(&ioapic_lock, flags);
3648
3649 if (apic_id >= get_physical_broadcast()) {
3650 printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
3651 "%d\n", ioapic, apic_id, reg_00.bits.ID);
3652 apic_id = reg_00.bits.ID;
3653 }
3654
3655 /*
3656 * Every APIC in a system must have a unique ID or we get lots of nice
3657 * 'stuck on smp_invalidate_needed IPI wait' messages.
3658 */
3659 if (check_apicid_used(apic_id_map, apic_id)) {
3660
3661 for (i = 0; i < get_physical_broadcast(); i++) {
3662 if (!check_apicid_used(apic_id_map, i))
3663 break;
3664 }
3665
3666 if (i == get_physical_broadcast())
3667 panic("Max apic_id exceeded!\n");
3668
3669 printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
3670 "trying %d\n", ioapic, apic_id, i);
3671
3672 apic_id = i;
3673 }
3674
3675 tmp = apicid_to_cpu_present(apic_id);
3676 physids_or(apic_id_map, apic_id_map, tmp);
3677
3678 if (reg_00.bits.ID != apic_id) {
3679 reg_00.bits.ID = apic_id;
3680
3681 spin_lock_irqsave(&ioapic_lock, flags);
3682 io_apic_write(ioapic, 0, reg_00.raw);
3683 reg_00.raw = io_apic_read(ioapic, 0);
3684 spin_unlock_irqrestore(&ioapic_lock, flags);
3685
3686 /* Sanity check */
3687 if (reg_00.bits.ID != apic_id) {
3688 printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
3689 return -1;
3690 }
3691 }
3692
3693 apic_printk(APIC_VERBOSE, KERN_INFO
3694 "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
3695
3696 return apic_id;
3697}
3698
3699int __init io_apic_get_version(int ioapic)
2794{ 3700{
2795 union IO_APIC_reg_01 reg_01; 3701 union IO_APIC_reg_01 reg_01;
2796 unsigned long flags; 3702 unsigned long flags;
@@ -2799,9 +3705,9 @@ int __init io_apic_get_redir_entries (int ioapic)
2799 reg_01.raw = io_apic_read(ioapic, 1); 3705 reg_01.raw = io_apic_read(ioapic, 1);
2800 spin_unlock_irqrestore(&ioapic_lock, flags); 3706 spin_unlock_irqrestore(&ioapic_lock, flags);
2801 3707
2802 return reg_01.bits.entries; 3708 return reg_01.bits.version;
2803} 3709}
2804 3710#endif
2805 3711
2806int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) 3712int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
2807{ 3713{
@@ -2853,6 +3759,7 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
2853void __init setup_ioapic_dest(void) 3759void __init setup_ioapic_dest(void)
2854{ 3760{
2855 int pin, ioapic, irq, irq_entry; 3761 int pin, ioapic, irq, irq_entry;
3762 struct irq_cfg *cfg;
2856 3763
2857 if (skip_ioapic_setup == 1) 3764 if (skip_ioapic_setup == 1)
2858 return; 3765 return;
@@ -2868,7 +3775,8 @@ void __init setup_ioapic_dest(void)
2868 * when you have too many devices, because at that time only boot 3775 * when you have too many devices, because at that time only boot
2869 * cpu is online. 3776 * cpu is online.
2870 */ 3777 */
2871 if (!irq_cfg[irq].vector) 3778 cfg = irq_cfg(irq);
3779 if (!cfg->vector)
2872 setup_IO_APIC_irq(ioapic, pin, irq, 3780 setup_IO_APIC_irq(ioapic, pin, irq,
2873 irq_trigger(irq_entry), 3781 irq_trigger(irq_entry),
2874 irq_polarity(irq_entry)); 3782 irq_polarity(irq_entry));
@@ -2926,18 +3834,33 @@ void __init ioapic_init_mappings(void)
2926 struct resource *ioapic_res; 3834 struct resource *ioapic_res;
2927 int i; 3835 int i;
2928 3836
3837 irq_2_pin_init();
2929 ioapic_res = ioapic_setup_resources(); 3838 ioapic_res = ioapic_setup_resources();
2930 for (i = 0; i < nr_ioapics; i++) { 3839 for (i = 0; i < nr_ioapics; i++) {
2931 if (smp_found_config) { 3840 if (smp_found_config) {
2932 ioapic_phys = mp_ioapics[i].mp_apicaddr; 3841 ioapic_phys = mp_ioapics[i].mp_apicaddr;
3842#ifdef CONFIG_X86_32
3843 if (!ioapic_phys) {
3844 printk(KERN_ERR
3845 "WARNING: bogus zero IO-APIC "
3846 "address found in MPTABLE, "
3847 "disabling IO/APIC support!\n");
3848 smp_found_config = 0;
3849 skip_ioapic_setup = 1;
3850 goto fake_ioapic_page;
3851 }
3852#endif
2933 } else { 3853 } else {
3854#ifdef CONFIG_X86_32
3855fake_ioapic_page:
3856#endif
2934 ioapic_phys = (unsigned long) 3857 ioapic_phys = (unsigned long)
2935 alloc_bootmem_pages(PAGE_SIZE); 3858 alloc_bootmem_pages(PAGE_SIZE);
2936 ioapic_phys = __pa(ioapic_phys); 3859 ioapic_phys = __pa(ioapic_phys);
2937 } 3860 }
2938 set_fixmap_nocache(idx, ioapic_phys); 3861 set_fixmap_nocache(idx, ioapic_phys);
2939 apic_printk(APIC_VERBOSE, 3862 apic_printk(APIC_VERBOSE,
2940 "mapped IOAPIC to %016lx (%016lx)\n", 3863 "mapped IOAPIC to %08lx (%08lx)\n",
2941 __fix_to_virt(idx), ioapic_phys); 3864 __fix_to_virt(idx), ioapic_phys);
2942 idx++; 3865 idx++;
2943 3866
@@ -2971,4 +3894,3 @@ static int __init ioapic_insert_resources(void)
2971/* Insert the IO APIC resources after PCI initialization has occured to handle 3894/* Insert the IO APIC resources after PCI initialization has occured to handle
2972 * IO APICS that are mapped in on a BAR in PCI space. */ 3895 * IO APICS that are mapped in on a BAR in PCI space. */
2973late_initcall(ioapic_insert_resources); 3896late_initcall(ioapic_insert_resources);
2974
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
deleted file mode 100644
index e710289f673e..000000000000
--- a/arch/x86/kernel/io_apic_32.c
+++ /dev/null
@@ -1,2908 +0,0 @@
1/*
2 * Intel IO-APIC support for multi-Pentium hosts.
3 *
4 * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
5 *
6 * Many thanks to Stig Venaas for trying out countless experimental
7 * patches and reporting/debugging problems patiently!
8 *
9 * (c) 1999, Multiple IO-APIC support, developed by
10 * Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
11 * Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
12 * further tested and cleaned up by Zach Brown <zab@redhat.com>
13 * and Ingo Molnar <mingo@redhat.com>
14 *
15 * Fixes
16 * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
17 * thanks to Eric Gilmore
18 * and Rolf G. Tews
19 * for testing these extensively
20 * Paul Diefenbaugh : Added full ACPI support
21 */
22
23#include <linux/mm.h>
24#include <linux/interrupt.h>
25#include <linux/init.h>
26#include <linux/delay.h>
27#include <linux/sched.h>
28#include <linux/bootmem.h>
29#include <linux/mc146818rtc.h>
30#include <linux/compiler.h>
31#include <linux/acpi.h>
32#include <linux/module.h>
33#include <linux/sysdev.h>
34#include <linux/pci.h>
35#include <linux/msi.h>
36#include <linux/htirq.h>
37#include <linux/freezer.h>
38#include <linux/kthread.h>
39#include <linux/jiffies.h> /* time_after() */
40
41#include <asm/io.h>
42#include <asm/smp.h>
43#include <asm/desc.h>
44#include <asm/timer.h>
45#include <asm/i8259.h>
46#include <asm/nmi.h>
47#include <asm/msidef.h>
48#include <asm/hypertransport.h>
49#include <asm/setup.h>
50
51#include <mach_apic.h>
52#include <mach_apicdef.h>
53
54#define __apicdebuginit(type) static type __init
55
56int (*ioapic_renumber_irq)(int ioapic, int irq);
57atomic_t irq_mis_count;
58
59/* Where if anywhere is the i8259 connect in external int mode */
60static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
61
62static DEFINE_SPINLOCK(ioapic_lock);
63DEFINE_SPINLOCK(vector_lock);
64
65int timer_through_8259 __initdata;
66
67/*
68 * Is the SiS APIC rmw bug present ?
69 * -1 = don't know, 0 = no, 1 = yes
70 */
71int sis_apic_bug = -1;
72
73/*
74 * # of IRQ routing registers
75 */
76int nr_ioapic_registers[MAX_IO_APICS];
77
78/* I/O APIC entries */
79struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
80int nr_ioapics;
81
82/* MP IRQ source entries */
83struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
84
85/* # of MP IRQ source entries */
86int mp_irq_entries;
87
88#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
89int mp_bus_id_to_type[MAX_MP_BUSSES];
90#endif
91
92DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
93
94static int disable_timer_pin_1 __initdata;
95
96/*
97 * Rough estimation of how many shared IRQs there are, can
98 * be changed anytime.
99 */
100#define MAX_PLUS_SHARED_IRQS NR_IRQS
101#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
102
103/*
104 * This is performance-critical, we want to do it O(1)
105 *
106 * the indexing order of this array favors 1:1 mappings
107 * between pins and IRQs.
108 */
109
110static struct irq_pin_list {
111 int apic, pin, next;
112} irq_2_pin[PIN_MAP_SIZE];
113
114struct io_apic {
115 unsigned int index;
116 unsigned int unused[3];
117 unsigned int data;
118};
119
120static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
121{
122 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
123 + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
124}
125
126static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
127{
128 struct io_apic __iomem *io_apic = io_apic_base(apic);
129 writel(reg, &io_apic->index);
130 return readl(&io_apic->data);
131}
132
133static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
134{
135 struct io_apic __iomem *io_apic = io_apic_base(apic);
136 writel(reg, &io_apic->index);
137 writel(value, &io_apic->data);
138}
139
140/*
141 * Re-write a value: to be used for read-modify-write
142 * cycles where the read already set up the index register.
143 *
144 * Older SiS APIC requires we rewrite the index register
145 */
146static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
147{
148 volatile struct io_apic __iomem *io_apic = io_apic_base(apic);
149 if (sis_apic_bug)
150 writel(reg, &io_apic->index);
151 writel(value, &io_apic->data);
152}
153
154union entry_union {
155 struct { u32 w1, w2; };
156 struct IO_APIC_route_entry entry;
157};
158
159static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
160{
161 union entry_union eu;
162 unsigned long flags;
163 spin_lock_irqsave(&ioapic_lock, flags);
164 eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
165 eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
166 spin_unlock_irqrestore(&ioapic_lock, flags);
167 return eu.entry;
168}
169
170/*
171 * When we write a new IO APIC routing entry, we need to write the high
172 * word first! If the mask bit in the low word is clear, we will enable
173 * the interrupt, and we need to make sure the entry is fully populated
174 * before that happens.
175 */
176static void
177__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
178{
179 union entry_union eu;
180 eu.entry = e;
181 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
182 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
183}
184
185static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
186{
187 unsigned long flags;
188 spin_lock_irqsave(&ioapic_lock, flags);
189 __ioapic_write_entry(apic, pin, e);
190 spin_unlock_irqrestore(&ioapic_lock, flags);
191}
192
193/*
194 * When we mask an IO APIC routing entry, we need to write the low
195 * word first, in order to set the mask bit before we change the
196 * high bits!
197 */
198static void ioapic_mask_entry(int apic, int pin)
199{
200 unsigned long flags;
201 union entry_union eu = { .entry.mask = 1 };
202
203 spin_lock_irqsave(&ioapic_lock, flags);
204 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
205 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
206 spin_unlock_irqrestore(&ioapic_lock, flags);
207}
208
209/*
210 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
211 * shared ISA-space IRQs, so we have to support them. We are super
212 * fast in the common case, and fast for shared ISA-space IRQs.
213 */
214static void add_pin_to_irq(unsigned int irq, int apic, int pin)
215{
216 static int first_free_entry = NR_IRQS;
217 struct irq_pin_list *entry = irq_2_pin + irq;
218
219 while (entry->next)
220 entry = irq_2_pin + entry->next;
221
222 if (entry->pin != -1) {
223 entry->next = first_free_entry;
224 entry = irq_2_pin + entry->next;
225 if (++first_free_entry >= PIN_MAP_SIZE)
226 panic("io_apic.c: whoops");
227 }
228 entry->apic = apic;
229 entry->pin = pin;
230}
231
232/*
233 * Reroute an IRQ to a different pin.
234 */
235static void __init replace_pin_at_irq(unsigned int irq,
236 int oldapic, int oldpin,
237 int newapic, int newpin)
238{
239 struct irq_pin_list *entry = irq_2_pin + irq;
240
241 while (1) {
242 if (entry->apic == oldapic && entry->pin == oldpin) {
243 entry->apic = newapic;
244 entry->pin = newpin;
245 }
246 if (!entry->next)
247 break;
248 entry = irq_2_pin + entry->next;
249 }
250}
251
252static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable)
253{
254 struct irq_pin_list *entry = irq_2_pin + irq;
255 unsigned int pin, reg;
256
257 for (;;) {
258 pin = entry->pin;
259 if (pin == -1)
260 break;
261 reg = io_apic_read(entry->apic, 0x10 + pin*2);
262 reg &= ~disable;
263 reg |= enable;
264 io_apic_modify(entry->apic, 0x10 + pin*2, reg);
265 if (!entry->next)
266 break;
267 entry = irq_2_pin + entry->next;
268 }
269}
270
271/* mask = 1 */
272static void __mask_IO_APIC_irq(unsigned int irq)
273{
274 __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0);
275}
276
277/* mask = 0 */
278static void __unmask_IO_APIC_irq(unsigned int irq)
279{
280 __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED);
281}
282
283/* mask = 1, trigger = 0 */
284static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
285{
286 __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED,
287 IO_APIC_REDIR_LEVEL_TRIGGER);
288}
289
290/* mask = 0, trigger = 1 */
291static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
292{
293 __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER,
294 IO_APIC_REDIR_MASKED);
295}
296
297static void mask_IO_APIC_irq(unsigned int irq)
298{
299 unsigned long flags;
300
301 spin_lock_irqsave(&ioapic_lock, flags);
302 __mask_IO_APIC_irq(irq);
303 spin_unlock_irqrestore(&ioapic_lock, flags);
304}
305
306static void unmask_IO_APIC_irq(unsigned int irq)
307{
308 unsigned long flags;
309
310 spin_lock_irqsave(&ioapic_lock, flags);
311 __unmask_IO_APIC_irq(irq);
312 spin_unlock_irqrestore(&ioapic_lock, flags);
313}
314
315static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
316{
317 struct IO_APIC_route_entry entry;
318
319 /* Check delivery_mode to be sure we're not clearing an SMI pin */
320 entry = ioapic_read_entry(apic, pin);
321 if (entry.delivery_mode == dest_SMI)
322 return;
323
324 /*
325 * Disable it in the IO-APIC irq-routing table:
326 */
327 ioapic_mask_entry(apic, pin);
328}
329
330static void clear_IO_APIC(void)
331{
332 int apic, pin;
333
334 for (apic = 0; apic < nr_ioapics; apic++)
335 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
336 clear_IO_APIC_pin(apic, pin);
337}
338
339#ifdef CONFIG_SMP
340static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
341{
342 unsigned long flags;
343 int pin;
344 struct irq_pin_list *entry = irq_2_pin + irq;
345 unsigned int apicid_value;
346 cpumask_t tmp;
347
348 cpus_and(tmp, cpumask, cpu_online_map);
349 if (cpus_empty(tmp))
350 tmp = TARGET_CPUS;
351
352 cpus_and(cpumask, tmp, CPU_MASK_ALL);
353
354 apicid_value = cpu_mask_to_apicid(cpumask);
355 /* Prepare to do the io_apic_write */
356 apicid_value = apicid_value << 24;
357 spin_lock_irqsave(&ioapic_lock, flags);
358 for (;;) {
359 pin = entry->pin;
360 if (pin == -1)
361 break;
362 io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
363 if (!entry->next)
364 break;
365 entry = irq_2_pin + entry->next;
366 }
367 irq_desc[irq].affinity = cpumask;
368 spin_unlock_irqrestore(&ioapic_lock, flags);
369}
370
371#if defined(CONFIG_IRQBALANCE)
372# include <asm/processor.h> /* kernel_thread() */
373# include <linux/kernel_stat.h> /* kstat */
374# include <linux/slab.h> /* kmalloc() */
375# include <linux/timer.h>
376
377#define IRQBALANCE_CHECK_ARCH -999
378#define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
379#define MIN_BALANCED_IRQ_INTERVAL (HZ/2)
380#define BALANCED_IRQ_MORE_DELTA (HZ/10)
381#define BALANCED_IRQ_LESS_DELTA (HZ)
382
383static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH;
384static int physical_balance __read_mostly;
385static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;
386
387static struct irq_cpu_info {
388 unsigned long *last_irq;
389 unsigned long *irq_delta;
390 unsigned long irq;
391} irq_cpu_data[NR_CPUS];
392
393#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq)
394#define LAST_CPU_IRQ(cpu, irq) (irq_cpu_data[cpu].last_irq[irq])
395#define IRQ_DELTA(cpu, irq) (irq_cpu_data[cpu].irq_delta[irq])
396
397#define IDLE_ENOUGH(cpu,now) \
398 (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
399
400#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask)
401
402#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i)))
403
404static cpumask_t balance_irq_affinity[NR_IRQS] = {
405 [0 ... NR_IRQS-1] = CPU_MASK_ALL
406};
407
408void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
409{
410 balance_irq_affinity[irq] = mask;
411}
412
413static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
414 unsigned long now, int direction)
415{
416 int search_idle = 1;
417 int cpu = curr_cpu;
418
419 goto inside;
420
421 do {
422 if (unlikely(cpu == curr_cpu))
423 search_idle = 0;
424inside:
425 if (direction == 1) {
426 cpu++;
427 if (cpu >= NR_CPUS)
428 cpu = 0;
429 } else {
430 cpu--;
431 if (cpu == -1)
432 cpu = NR_CPUS-1;
433 }
434 } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) ||
435 (search_idle && !IDLE_ENOUGH(cpu, now)));
436
437 return cpu;
438}
439
440static inline void balance_irq(int cpu, int irq)
441{
442 unsigned long now = jiffies;
443 cpumask_t allowed_mask;
444 unsigned int new_cpu;
445
446 if (irqbalance_disabled)
447 return;
448
449 cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
450 new_cpu = move(cpu, allowed_mask, now, 1);
451 if (cpu != new_cpu)
452 set_pending_irq(irq, cpumask_of_cpu(new_cpu));
453}
454
455static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
456{
457 int i, j;
458
459 for_each_online_cpu(i) {
460 for (j = 0; j < NR_IRQS; j++) {
461 if (!irq_desc[j].action)
462 continue;
463 /* Is it a significant load ? */
464 if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) <
465 useful_load_threshold)
466 continue;
467 balance_irq(i, j);
468 }
469 }
470 balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
471 balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
472 return;
473}
474
475static void do_irq_balance(void)
476{
477 int i, j;
478 unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
479 unsigned long move_this_load = 0;
480 int max_loaded = 0, min_loaded = 0;
481 int load;
482 unsigned long useful_load_threshold = balanced_irq_interval + 10;
483 int selected_irq;
484 int tmp_loaded, first_attempt = 1;
485 unsigned long tmp_cpu_irq;
486 unsigned long imbalance = 0;
487 cpumask_t allowed_mask, target_cpu_mask, tmp;
488
489 for_each_possible_cpu(i) {
490 int package_index;
491 CPU_IRQ(i) = 0;
492 if (!cpu_online(i))
493 continue;
494 package_index = CPU_TO_PACKAGEINDEX(i);
495 for (j = 0; j < NR_IRQS; j++) {
496 unsigned long value_now, delta;
497 /* Is this an active IRQ or balancing disabled ? */
498 if (!irq_desc[j].action || irq_balancing_disabled(j))
499 continue;
500 if (package_index == i)
501 IRQ_DELTA(package_index, j) = 0;
502 /* Determine the total count per processor per IRQ */
503 value_now = (unsigned long) kstat_cpu(i).irqs[j];
504
505 /* Determine the activity per processor per IRQ */
506 delta = value_now - LAST_CPU_IRQ(i, j);
507
508 /* Update last_cpu_irq[][] for the next time */
509 LAST_CPU_IRQ(i, j) = value_now;
510
511 /* Ignore IRQs whose rate is less than the clock */
512 if (delta < useful_load_threshold)
513 continue;
514 /* update the load for the processor or package total */
515 IRQ_DELTA(package_index, j) += delta;
516
517 /* Keep track of the higher numbered sibling as well */
518 if (i != package_index)
519 CPU_IRQ(i) += delta;
520 /*
521 * We have sibling A and sibling B in the package
522 *
523 * cpu_irq[A] = load for cpu A + load for cpu B
524 * cpu_irq[B] = load for cpu B
525 */
526 CPU_IRQ(package_index) += delta;
527 }
528 }
529 /* Find the least loaded processor package */
530 for_each_online_cpu(i) {
531 if (i != CPU_TO_PACKAGEINDEX(i))
532 continue;
533 if (min_cpu_irq > CPU_IRQ(i)) {
534 min_cpu_irq = CPU_IRQ(i);
535 min_loaded = i;
536 }
537 }
538 max_cpu_irq = ULONG_MAX;
539
540tryanothercpu:
541 /*
542 * Look for heaviest loaded processor.
543 * We may come back to get the next heaviest loaded processor.
544 * Skip processors with trivial loads.
545 */
546 tmp_cpu_irq = 0;
547 tmp_loaded = -1;
548 for_each_online_cpu(i) {
549 if (i != CPU_TO_PACKAGEINDEX(i))
550 continue;
551 if (max_cpu_irq <= CPU_IRQ(i))
552 continue;
553 if (tmp_cpu_irq < CPU_IRQ(i)) {
554 tmp_cpu_irq = CPU_IRQ(i);
555 tmp_loaded = i;
556 }
557 }
558
559 if (tmp_loaded == -1) {
560 /*
561 * In the case of small number of heavy interrupt sources,
562 * loading some of the cpus too much. We use Ingo's original
563 * approach to rotate them around.
564 */
565 if (!first_attempt && imbalance >= useful_load_threshold) {
566 rotate_irqs_among_cpus(useful_load_threshold);
567 return;
568 }
569 goto not_worth_the_effort;
570 }
571
572 first_attempt = 0; /* heaviest search */
573 max_cpu_irq = tmp_cpu_irq; /* load */
574 max_loaded = tmp_loaded; /* processor */
575 imbalance = (max_cpu_irq - min_cpu_irq) / 2;
576
577 /*
578 * if imbalance is less than approx 10% of max load, then
579 * observe diminishing returns action. - quit
580 */
581 if (imbalance < (max_cpu_irq >> 3))
582 goto not_worth_the_effort;
583
584tryanotherirq:
585 /* if we select an IRQ to move that can't go where we want, then
586 * see if there is another one to try.
587 */
588 move_this_load = 0;
589 selected_irq = -1;
590 for (j = 0; j < NR_IRQS; j++) {
591 /* Is this an active IRQ? */
592 if (!irq_desc[j].action)
593 continue;
594 if (imbalance <= IRQ_DELTA(max_loaded, j))
595 continue;
596 /* Try to find the IRQ that is closest to the imbalance
597 * without going over.
598 */
599 if (move_this_load < IRQ_DELTA(max_loaded, j)) {
600 move_this_load = IRQ_DELTA(max_loaded, j);
601 selected_irq = j;
602 }
603 }
604 if (selected_irq == -1)
605 goto tryanothercpu;
606
607 imbalance = move_this_load;
608
609 /* For physical_balance case, we accumulated both load
610 * values in the one of the siblings cpu_irq[],
611 * to use the same code for physical and logical processors
612 * as much as possible.
613 *
614 * NOTE: the cpu_irq[] array holds the sum of the load for
615 * sibling A and sibling B in the slot for the lowest numbered
616 * sibling (A), _AND_ the load for sibling B in the slot for
617 * the higher numbered sibling.
618 *
619 * We seek the least loaded sibling by making the comparison
620 * (A+B)/2 vs B
621 */
622 load = CPU_IRQ(min_loaded) >> 1;
623 for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) {
624 if (load > CPU_IRQ(j)) {
625 /* This won't change cpu_sibling_map[min_loaded] */
626 load = CPU_IRQ(j);
627 min_loaded = j;
628 }
629 }
630
631 cpus_and(allowed_mask,
632 cpu_online_map,
633 balance_irq_affinity[selected_irq]);
634 target_cpu_mask = cpumask_of_cpu(min_loaded);
635 cpus_and(tmp, target_cpu_mask, allowed_mask);
636
637 if (!cpus_empty(tmp)) {
638 /* mark for change destination */
639 set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
640
641 /* Since we made a change, come back sooner to
642 * check for more variation.
643 */
644 balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
645 balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
646 return;
647 }
648 goto tryanotherirq;
649
650not_worth_the_effort:
651 /*
652 * if we did not find an IRQ to move, then adjust the time interval
653 * upward
654 */
655 balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
656 balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
657 return;
658}
659
660static int balanced_irq(void *unused)
661{
662 int i;
663 unsigned long prev_balance_time = jiffies;
664 long time_remaining = balanced_irq_interval;
665
666 /* push everything to CPU 0 to give us a starting point. */
667 for (i = 0 ; i < NR_IRQS ; i++) {
668 irq_desc[i].pending_mask = cpumask_of_cpu(0);
669 set_pending_irq(i, cpumask_of_cpu(0));
670 }
671
672 set_freezable();
673 for ( ; ; ) {
674 time_remaining = schedule_timeout_interruptible(time_remaining);
675 try_to_freeze();
676 if (time_after(jiffies,
677 prev_balance_time+balanced_irq_interval)) {
678 preempt_disable();
679 do_irq_balance();
680 prev_balance_time = jiffies;
681 time_remaining = balanced_irq_interval;
682 preempt_enable();
683 }
684 }
685 return 0;
686}
687
688static int __init balanced_irq_init(void)
689{
690 int i;
691 struct cpuinfo_x86 *c;
692 cpumask_t tmp;
693
694 cpus_shift_right(tmp, cpu_online_map, 2);
695 c = &boot_cpu_data;
696 /* When not overwritten by the command line ask subarchitecture. */
697 if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
698 irqbalance_disabled = NO_BALANCE_IRQ;
699 if (irqbalance_disabled)
700 return 0;
701
702 /* disable irqbalance completely if there is only one processor online */
703 if (num_online_cpus() < 2) {
704 irqbalance_disabled = 1;
705 return 0;
706 }
707 /*
708 * Enable physical balance only if more than 1 physical processor
709 * is present
710 */
711 if (smp_num_siblings > 1 && !cpus_empty(tmp))
712 physical_balance = 1;
713
714 for_each_online_cpu(i) {
715 irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
716 irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
717 if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
718 printk(KERN_ERR "balanced_irq_init: out of memory");
719 goto failed;
720 }
721 }
722
723 printk(KERN_INFO "Starting balanced_irq\n");
724 if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
725 return 0;
726 printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
727failed:
728 for_each_possible_cpu(i) {
729 kfree(irq_cpu_data[i].irq_delta);
730 irq_cpu_data[i].irq_delta = NULL;
731 kfree(irq_cpu_data[i].last_irq);
732 irq_cpu_data[i].last_irq = NULL;
733 }
734 return 0;
735}
736
737int __devinit irqbalance_disable(char *str)
738{
739 irqbalance_disabled = 1;
740 return 1;
741}
742
743__setup("noirqbalance", irqbalance_disable);
744
745late_initcall(balanced_irq_init);
746#endif /* CONFIG_IRQBALANCE */
747#endif /* CONFIG_SMP */
748
749#ifndef CONFIG_SMP
750void send_IPI_self(int vector)
751{
752 unsigned int cfg;
753
754 /*
755 * Wait for idle.
756 */
757 apic_wait_icr_idle();
758 cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
759 /*
760 * Send the IPI. The write to APIC_ICR fires this off.
761 */
762 apic_write(APIC_ICR, cfg);
763}
764#endif /* !CONFIG_SMP */
765
766
767/*
768 * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
769 * specific CPU-side IRQs.
770 */
771
772#define MAX_PIRQS 8
773static int pirq_entries [MAX_PIRQS];
774static int pirqs_enabled;
775int skip_ioapic_setup;
776
777static int __init ioapic_pirq_setup(char *str)
778{
779 int i, max;
780 int ints[MAX_PIRQS+1];
781
782 get_options(str, ARRAY_SIZE(ints), ints);
783
784 for (i = 0; i < MAX_PIRQS; i++)
785 pirq_entries[i] = -1;
786
787 pirqs_enabled = 1;
788 apic_printk(APIC_VERBOSE, KERN_INFO
789 "PIRQ redirection, working around broken MP-BIOS.\n");
790 max = MAX_PIRQS;
791 if (ints[0] < MAX_PIRQS)
792 max = ints[0];
793
794 for (i = 0; i < max; i++) {
795 apic_printk(APIC_VERBOSE, KERN_DEBUG
796 "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
797 /*
798 * PIRQs are mapped upside down, usually.
799 */
800 pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
801 }
802 return 1;
803}
804
805__setup("pirq=", ioapic_pirq_setup);
806
807/*
808 * Find the IRQ entry number of a certain pin.
809 */
810static int find_irq_entry(int apic, int pin, int type)
811{
812 int i;
813
814 for (i = 0; i < mp_irq_entries; i++)
815 if (mp_irqs[i].mp_irqtype == type &&
816 (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
817 mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
818 mp_irqs[i].mp_dstirq == pin)
819 return i;
820
821 return -1;
822}
823
824/*
825 * Find the pin to which IRQ[irq] (ISA) is connected
826 */
827static int __init find_isa_irq_pin(int irq, int type)
828{
829 int i;
830
831 for (i = 0; i < mp_irq_entries; i++) {
832 int lbus = mp_irqs[i].mp_srcbus;
833
834 if (test_bit(lbus, mp_bus_not_pci) &&
835 (mp_irqs[i].mp_irqtype == type) &&
836 (mp_irqs[i].mp_srcbusirq == irq))
837
838 return mp_irqs[i].mp_dstirq;
839 }
840 return -1;
841}
842
843static int __init find_isa_irq_apic(int irq, int type)
844{
845 int i;
846
847 for (i = 0; i < mp_irq_entries; i++) {
848 int lbus = mp_irqs[i].mp_srcbus;
849
850 if (test_bit(lbus, mp_bus_not_pci) &&
851 (mp_irqs[i].mp_irqtype == type) &&
852 (mp_irqs[i].mp_srcbusirq == irq))
853 break;
854 }
855 if (i < mp_irq_entries) {
856 int apic;
857 for (apic = 0; apic < nr_ioapics; apic++) {
858 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
859 return apic;
860 }
861 }
862
863 return -1;
864}
865
866/*
867 * Find a specific PCI IRQ entry.
868 * Not an __init, possibly needed by modules
869 */
870static int pin_2_irq(int idx, int apic, int pin);
871
872int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
873{
874 int apic, i, best_guess = -1;
875
876 apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
877 "slot:%d, pin:%d.\n", bus, slot, pin);
878 if (test_bit(bus, mp_bus_not_pci)) {
879 printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
880 return -1;
881 }
882 for (i = 0; i < mp_irq_entries; i++) {
883 int lbus = mp_irqs[i].mp_srcbus;
884
885 for (apic = 0; apic < nr_ioapics; apic++)
886 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
887 mp_irqs[i].mp_dstapic == MP_APIC_ALL)
888 break;
889
890 if (!test_bit(lbus, mp_bus_not_pci) &&
891 !mp_irqs[i].mp_irqtype &&
892 (bus == lbus) &&
893 (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
894 int irq = pin_2_irq(i, apic, mp_irqs[i].mp_dstirq);
895
896 if (!(apic || IO_APIC_IRQ(irq)))
897 continue;
898
899 if (pin == (mp_irqs[i].mp_srcbusirq & 3))
900 return irq;
901 /*
902 * Use the first all-but-pin matching entry as a
903 * best-guess fuzzy result for broken mptables.
904 */
905 if (best_guess < 0)
906 best_guess = irq;
907 }
908 }
909 return best_guess;
910}
911EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
912
913/*
914 * This function currently is only a helper for the i386 smp boot process where
915 * we need to reprogram the ioredtbls to cater for the cpus which have come online
916 * so mask in all cases should simply be TARGET_CPUS
917 */
918#ifdef CONFIG_SMP
919void __init setup_ioapic_dest(void)
920{
921 int pin, ioapic, irq, irq_entry;
922
923 if (skip_ioapic_setup == 1)
924 return;
925
926 for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
927 for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
928 irq_entry = find_irq_entry(ioapic, pin, mp_INT);
929 if (irq_entry == -1)
930 continue;
931 irq = pin_2_irq(irq_entry, ioapic, pin);
932 set_ioapic_affinity_irq(irq, TARGET_CPUS);
933 }
934
935 }
936}
937#endif
938
939#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
940/*
941 * EISA Edge/Level control register, ELCR
942 */
943static int EISA_ELCR(unsigned int irq)
944{
945 if (irq < 16) {
946 unsigned int port = 0x4d0 + (irq >> 3);
947 return (inb(port) >> (irq & 7)) & 1;
948 }
949 apic_printk(APIC_VERBOSE, KERN_INFO
950 "Broken MPtable reports ISA irq %d\n", irq);
951 return 0;
952}
953#endif
954
955/* ISA interrupts are always polarity zero edge triggered,
956 * when listed as conforming in the MP table. */
957
958#define default_ISA_trigger(idx) (0)
959#define default_ISA_polarity(idx) (0)
960
961/* EISA interrupts are always polarity zero and can be edge or level
962 * trigger depending on the ELCR value. If an interrupt is listed as
963 * EISA conforming in the MP table, that means its trigger type must
964 * be read in from the ELCR */
965
966#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
967#define default_EISA_polarity(idx) default_ISA_polarity(idx)
968
969/* PCI interrupts are always polarity one level triggered,
970 * when listed as conforming in the MP table. */
971
972#define default_PCI_trigger(idx) (1)
973#define default_PCI_polarity(idx) (1)
974
975/* MCA interrupts are always polarity zero level triggered,
976 * when listed as conforming in the MP table. */
977
978#define default_MCA_trigger(idx) (1)
979#define default_MCA_polarity(idx) default_ISA_polarity(idx)
980
981static int MPBIOS_polarity(int idx)
982{
983 int bus = mp_irqs[idx].mp_srcbus;
984 int polarity;
985
986 /*
987 * Determine IRQ line polarity (high active or low active):
988 */
989 switch (mp_irqs[idx].mp_irqflag & 3) {
990 case 0: /* conforms, ie. bus-type dependent polarity */
991 {
992 polarity = test_bit(bus, mp_bus_not_pci)?
993 default_ISA_polarity(idx):
994 default_PCI_polarity(idx);
995 break;
996 }
997 case 1: /* high active */
998 {
999 polarity = 0;
1000 break;
1001 }
1002 case 2: /* reserved */
1003 {
1004 printk(KERN_WARNING "broken BIOS!!\n");
1005 polarity = 1;
1006 break;
1007 }
1008 case 3: /* low active */
1009 {
1010 polarity = 1;
1011 break;
1012 }
1013 default: /* invalid */
1014 {
1015 printk(KERN_WARNING "broken BIOS!!\n");
1016 polarity = 1;
1017 break;
1018 }
1019 }
1020 return polarity;
1021}
1022
1023static int MPBIOS_trigger(int idx)
1024{
1025 int bus = mp_irqs[idx].mp_srcbus;
1026 int trigger;
1027
1028 /*
1029 * Determine IRQ trigger mode (edge or level sensitive):
1030 */
1031 switch ((mp_irqs[idx].mp_irqflag>>2) & 3) {
1032 case 0: /* conforms, ie. bus-type dependent */
1033 {
1034 trigger = test_bit(bus, mp_bus_not_pci)?
1035 default_ISA_trigger(idx):
1036 default_PCI_trigger(idx);
1037#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
1038 switch (mp_bus_id_to_type[bus]) {
1039 case MP_BUS_ISA: /* ISA pin */
1040 {
1041 /* set before the switch */
1042 break;
1043 }
1044 case MP_BUS_EISA: /* EISA pin */
1045 {
1046 trigger = default_EISA_trigger(idx);
1047 break;
1048 }
1049 case MP_BUS_PCI: /* PCI pin */
1050 {
1051 /* set before the switch */
1052 break;
1053 }
1054 case MP_BUS_MCA: /* MCA pin */
1055 {
1056 trigger = default_MCA_trigger(idx);
1057 break;
1058 }
1059 default:
1060 {
1061 printk(KERN_WARNING "broken BIOS!!\n");
1062 trigger = 1;
1063 break;
1064 }
1065 }
1066#endif
1067 break;
1068 }
1069 case 1: /* edge */
1070 {
1071 trigger = 0;
1072 break;
1073 }
1074 case 2: /* reserved */
1075 {
1076 printk(KERN_WARNING "broken BIOS!!\n");
1077 trigger = 1;
1078 break;
1079 }
1080 case 3: /* level */
1081 {
1082 trigger = 1;
1083 break;
1084 }
1085 default: /* invalid */
1086 {
1087 printk(KERN_WARNING "broken BIOS!!\n");
1088 trigger = 0;
1089 break;
1090 }
1091 }
1092 return trigger;
1093}
1094
1095static inline int irq_polarity(int idx)
1096{
1097 return MPBIOS_polarity(idx);
1098}
1099
1100static inline int irq_trigger(int idx)
1101{
1102 return MPBIOS_trigger(idx);
1103}
1104
1105static int pin_2_irq(int idx, int apic, int pin)
1106{
1107 int irq, i;
1108 int bus = mp_irqs[idx].mp_srcbus;
1109
1110 /*
1111 * Debugging check, we are in big trouble if this message pops up!
1112 */
1113 if (mp_irqs[idx].mp_dstirq != pin)
1114 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
1115
1116 if (test_bit(bus, mp_bus_not_pci))
1117 irq = mp_irqs[idx].mp_srcbusirq;
1118 else {
1119 /*
1120 * PCI IRQs are mapped in order
1121 */
1122 i = irq = 0;
1123 while (i < apic)
1124 irq += nr_ioapic_registers[i++];
1125 irq += pin;
1126
1127 /*
1128 * For MPS mode, so far only needed by ES7000 platform
1129 */
1130 if (ioapic_renumber_irq)
1131 irq = ioapic_renumber_irq(apic, irq);
1132 }
1133
1134 /*
1135 * PCI IRQ command line redirection. Yes, limits are hardcoded.
1136 */
1137 if ((pin >= 16) && (pin <= 23)) {
1138 if (pirq_entries[pin-16] != -1) {
1139 if (!pirq_entries[pin-16]) {
1140 apic_printk(APIC_VERBOSE, KERN_DEBUG
1141 "disabling PIRQ%d\n", pin-16);
1142 } else {
1143 irq = pirq_entries[pin-16];
1144 apic_printk(APIC_VERBOSE, KERN_DEBUG
1145 "using PIRQ%d -> IRQ %d\n",
1146 pin-16, irq);
1147 }
1148 }
1149 }
1150 return irq;
1151}
1152
1153static inline int IO_APIC_irq_trigger(int irq)
1154{
1155 int apic, idx, pin;
1156
1157 for (apic = 0; apic < nr_ioapics; apic++) {
1158 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1159 idx = find_irq_entry(apic, pin, mp_INT);
1160 if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
1161 return irq_trigger(idx);
1162 }
1163 }
1164 /*
1165 * nonexistent IRQs are edge default
1166 */
1167 return 0;
1168}
1169
1170/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
1171static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
1172
1173static int __assign_irq_vector(int irq)
1174{
1175 static int current_vector = FIRST_DEVICE_VECTOR, current_offset;
1176 int vector, offset;
1177
1178 BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
1179
1180 if (irq_vector[irq] > 0)
1181 return irq_vector[irq];
1182
1183 vector = current_vector;
1184 offset = current_offset;
1185next:
1186 vector += 8;
1187 if (vector >= first_system_vector) {
1188 offset = (offset + 1) % 8;
1189 vector = FIRST_DEVICE_VECTOR + offset;
1190 }
1191 if (vector == current_vector)
1192 return -ENOSPC;
1193 if (test_and_set_bit(vector, used_vectors))
1194 goto next;
1195
1196 current_vector = vector;
1197 current_offset = offset;
1198 irq_vector[irq] = vector;
1199
1200 return vector;
1201}
1202
1203static int assign_irq_vector(int irq)
1204{
1205 unsigned long flags;
1206 int vector;
1207
1208 spin_lock_irqsave(&vector_lock, flags);
1209 vector = __assign_irq_vector(irq);
1210 spin_unlock_irqrestore(&vector_lock, flags);
1211
1212 return vector;
1213}
1214
1215static struct irq_chip ioapic_chip;
1216
1217#define IOAPIC_AUTO -1
1218#define IOAPIC_EDGE 0
1219#define IOAPIC_LEVEL 1
1220
1221static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
1222{
1223 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1224 trigger == IOAPIC_LEVEL) {
1225 irq_desc[irq].status |= IRQ_LEVEL;
1226 set_irq_chip_and_handler_name(irq, &ioapic_chip,
1227 handle_fasteoi_irq, "fasteoi");
1228 } else {
1229 irq_desc[irq].status &= ~IRQ_LEVEL;
1230 set_irq_chip_and_handler_name(irq, &ioapic_chip,
1231 handle_edge_irq, "edge");
1232 }
1233 set_intr_gate(vector, interrupt[irq]);
1234}
1235
1236static void __init setup_IO_APIC_irqs(void)
1237{
1238 struct IO_APIC_route_entry entry;
1239 int apic, pin, idx, irq, first_notcon = 1, vector;
1240
1241 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1242
1243 for (apic = 0; apic < nr_ioapics; apic++) {
1244 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1245
1246 /*
1247 * add it to the IO-APIC irq-routing table:
1248 */
1249 memset(&entry, 0, sizeof(entry));
1250
1251 entry.delivery_mode = INT_DELIVERY_MODE;
1252 entry.dest_mode = INT_DEST_MODE;
1253 entry.mask = 0; /* enable IRQ */
1254 entry.dest.logical.logical_dest =
1255 cpu_mask_to_apicid(TARGET_CPUS);
1256
1257 idx = find_irq_entry(apic, pin, mp_INT);
1258 if (idx == -1) {
1259 if (first_notcon) {
1260 apic_printk(APIC_VERBOSE, KERN_DEBUG
1261 " IO-APIC (apicid-pin) %d-%d",
1262 mp_ioapics[apic].mp_apicid,
1263 pin);
1264 first_notcon = 0;
1265 } else
1266 apic_printk(APIC_VERBOSE, ", %d-%d",
1267 mp_ioapics[apic].mp_apicid, pin);
1268 continue;
1269 }
1270
1271 if (!first_notcon) {
1272 apic_printk(APIC_VERBOSE, " not connected.\n");
1273 first_notcon = 1;
1274 }
1275
1276 entry.trigger = irq_trigger(idx);
1277 entry.polarity = irq_polarity(idx);
1278
1279 if (irq_trigger(idx)) {
1280 entry.trigger = 1;
1281 entry.mask = 1;
1282 }
1283
1284 irq = pin_2_irq(idx, apic, pin);
1285 /*
1286 * skip adding the timer int on secondary nodes, which causes
1287 * a small but painful rift in the time-space continuum
1288 */
1289 if (multi_timer_check(apic, irq))
1290 continue;
1291 else
1292 add_pin_to_irq(irq, apic, pin);
1293
1294 if (!apic && !IO_APIC_IRQ(irq))
1295 continue;
1296
1297 if (IO_APIC_IRQ(irq)) {
1298 vector = assign_irq_vector(irq);
1299 entry.vector = vector;
1300 ioapic_register_intr(irq, vector, IOAPIC_AUTO);
1301
1302 if (!apic && (irq < 16))
1303 disable_8259A_irq(irq);
1304 }
1305 ioapic_write_entry(apic, pin, entry);
1306 }
1307 }
1308
1309 if (!first_notcon)
1310 apic_printk(APIC_VERBOSE, " not connected.\n");
1311}
1312
1313/*
1314 * Set up the timer pin, possibly with the 8259A-master behind.
1315 */
1316static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
1317 int vector)
1318{
1319 struct IO_APIC_route_entry entry;
1320
1321 memset(&entry, 0, sizeof(entry));
1322
1323 /*
1324 * We use logical delivery to get the timer IRQ
1325 * to the first CPU.
1326 */
1327 entry.dest_mode = INT_DEST_MODE;
1328 entry.mask = 1; /* mask IRQ now */
1329 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
1330 entry.delivery_mode = INT_DELIVERY_MODE;
1331 entry.polarity = 0;
1332 entry.trigger = 0;
1333 entry.vector = vector;
1334
1335 /*
1336 * The timer IRQ doesn't have to know that behind the
1337 * scene we may have a 8259A-master in AEOI mode ...
1338 */
1339 ioapic_register_intr(0, vector, IOAPIC_EDGE);
1340
1341 /*
1342 * Add it to the IO-APIC irq-routing table:
1343 */
1344 ioapic_write_entry(apic, pin, entry);
1345}
1346
1347
1348__apicdebuginit(void) print_IO_APIC(void)
1349{
1350 int apic, i;
1351 union IO_APIC_reg_00 reg_00;
1352 union IO_APIC_reg_01 reg_01;
1353 union IO_APIC_reg_02 reg_02;
1354 union IO_APIC_reg_03 reg_03;
1355 unsigned long flags;
1356
1357 if (apic_verbosity == APIC_QUIET)
1358 return;
1359
1360 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
1361 for (i = 0; i < nr_ioapics; i++)
1362 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
1363 mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
1364
1365 /*
1366 * We are a bit conservative about what we expect. We have to
1367 * know about every hardware change ASAP.
1368 */
1369 printk(KERN_INFO "testing the IO APIC.......................\n");
1370
1371 for (apic = 0; apic < nr_ioapics; apic++) {
1372
1373 spin_lock_irqsave(&ioapic_lock, flags);
1374 reg_00.raw = io_apic_read(apic, 0);
1375 reg_01.raw = io_apic_read(apic, 1);
1376 if (reg_01.bits.version >= 0x10)
1377 reg_02.raw = io_apic_read(apic, 2);
1378 if (reg_01.bits.version >= 0x20)
1379 reg_03.raw = io_apic_read(apic, 3);
1380 spin_unlock_irqrestore(&ioapic_lock, flags);
1381
1382 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
1383 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
1384 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
1385 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
1386 printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS);
1387
1388 printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
1389 printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
1390
1391 printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
1392 printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
1393
1394 /*
1395 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
1396 * but the value of reg_02 is read as the previous read register
1397 * value, so ignore it if reg_02 == reg_01.
1398 */
1399 if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
1400 printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
1401 printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
1402 }
1403
1404 /*
1405 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
1406 * or reg_03, but the value of reg_0[23] is read as the previous read
1407 * register value, so ignore it if reg_03 == reg_0[12].
1408 */
1409 if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
1410 reg_03.raw != reg_01.raw) {
1411 printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
1412 printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT);
1413 }
1414
1415 printk(KERN_DEBUG ".... IRQ redirection table:\n");
1416
1417 printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
1418 " Stat Dest Deli Vect: \n");
1419
1420 for (i = 0; i <= reg_01.bits.entries; i++) {
1421 struct IO_APIC_route_entry entry;
1422
1423 entry = ioapic_read_entry(apic, i);
1424
1425 printk(KERN_DEBUG " %02x %03X %02X ",
1426 i,
1427 entry.dest.logical.logical_dest,
1428 entry.dest.physical.physical_dest
1429 );
1430
1431 printk("%1d %1d %1d %1d %1d %1d %1d %02X\n",
1432 entry.mask,
1433 entry.trigger,
1434 entry.irr,
1435 entry.polarity,
1436 entry.delivery_status,
1437 entry.dest_mode,
1438 entry.delivery_mode,
1439 entry.vector
1440 );
1441 }
1442 }
1443 printk(KERN_DEBUG "IRQ to pin mappings:\n");
1444 for (i = 0; i < NR_IRQS; i++) {
1445 struct irq_pin_list *entry = irq_2_pin + i;
1446 if (entry->pin < 0)
1447 continue;
1448 printk(KERN_DEBUG "IRQ%d ", i);
1449 for (;;) {
1450 printk("-> %d:%d", entry->apic, entry->pin);
1451 if (!entry->next)
1452 break;
1453 entry = irq_2_pin + entry->next;
1454 }
1455 printk("\n");
1456 }
1457
1458 printk(KERN_INFO ".................................... done.\n");
1459
1460 return;
1461}
1462
1463__apicdebuginit(void) print_APIC_bitfield(int base)
1464{
1465 unsigned int v;
1466 int i, j;
1467
1468 if (apic_verbosity == APIC_QUIET)
1469 return;
1470
1471 printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
1472 for (i = 0; i < 8; i++) {
1473 v = apic_read(base + i*0x10);
1474 for (j = 0; j < 32; j++) {
1475 if (v & (1<<j))
1476 printk("1");
1477 else
1478 printk("0");
1479 }
1480 printk("\n");
1481 }
1482}
1483
1484__apicdebuginit(void) print_local_APIC(void *dummy)
1485{
1486 unsigned int v, ver, maxlvt;
1487 u64 icr;
1488
1489 if (apic_verbosity == APIC_QUIET)
1490 return;
1491
1492 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
1493 smp_processor_id(), hard_smp_processor_id());
1494 v = apic_read(APIC_ID);
1495 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v,
1496 GET_APIC_ID(v));
1497 v = apic_read(APIC_LVR);
1498 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1499 ver = GET_APIC_VERSION(v);
1500 maxlvt = lapic_get_maxlvt();
1501
1502 v = apic_read(APIC_TASKPRI);
1503 printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
1504
1505 if (APIC_INTEGRATED(ver)) { /* !82489DX */
1506 v = apic_read(APIC_ARBPRI);
1507 printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
1508 v & APIC_ARBPRI_MASK);
1509 v = apic_read(APIC_PROCPRI);
1510 printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
1511 }
1512
1513 v = apic_read(APIC_EOI);
1514 printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
1515 v = apic_read(APIC_RRR);
1516 printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
1517 v = apic_read(APIC_LDR);
1518 printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
1519 v = apic_read(APIC_DFR);
1520 printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
1521 v = apic_read(APIC_SPIV);
1522 printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
1523
1524 printk(KERN_DEBUG "... APIC ISR field:\n");
1525 print_APIC_bitfield(APIC_ISR);
1526 printk(KERN_DEBUG "... APIC TMR field:\n");
1527 print_APIC_bitfield(APIC_TMR);
1528 printk(KERN_DEBUG "... APIC IRR field:\n");
1529 print_APIC_bitfield(APIC_IRR);
1530
1531 if (APIC_INTEGRATED(ver)) { /* !82489DX */
1532 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
1533 apic_write(APIC_ESR, 0);
1534 v = apic_read(APIC_ESR);
1535 printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
1536 }
1537
1538 icr = apic_icr_read();
1539 printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
1540 printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
1541
1542 v = apic_read(APIC_LVTT);
1543 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
1544
1545 if (maxlvt > 3) { /* PC is LVT#4. */
1546 v = apic_read(APIC_LVTPC);
1547 printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
1548 }
1549 v = apic_read(APIC_LVT0);
1550 printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
1551 v = apic_read(APIC_LVT1);
1552 printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
1553
1554 if (maxlvt > 2) { /* ERR is LVT#3. */
1555 v = apic_read(APIC_LVTERR);
1556 printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
1557 }
1558
1559 v = apic_read(APIC_TMICT);
1560 printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
1561 v = apic_read(APIC_TMCCT);
1562 printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
1563 v = apic_read(APIC_TDCR);
1564 printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
1565 printk("\n");
1566}
1567
1568__apicdebuginit(void) print_all_local_APICs(void)
1569{
1570 on_each_cpu(print_local_APIC, NULL, 1);
1571}
1572
1573__apicdebuginit(void) print_PIC(void)
1574{
1575 unsigned int v;
1576 unsigned long flags;
1577
1578 if (apic_verbosity == APIC_QUIET)
1579 return;
1580
1581 printk(KERN_DEBUG "\nprinting PIC contents\n");
1582
1583 spin_lock_irqsave(&i8259A_lock, flags);
1584
1585 v = inb(0xa1) << 8 | inb(0x21);
1586 printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
1587
1588 v = inb(0xa0) << 8 | inb(0x20);
1589 printk(KERN_DEBUG "... PIC IRR: %04x\n", v);
1590
1591 outb(0x0b, 0xa0);
1592 outb(0x0b, 0x20);
1593 v = inb(0xa0) << 8 | inb(0x20);
1594 outb(0x0a, 0xa0);
1595 outb(0x0a, 0x20);
1596
1597 spin_unlock_irqrestore(&i8259A_lock, flags);
1598
1599 printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
1600
1601 v = inb(0x4d1) << 8 | inb(0x4d0);
1602 printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
1603}
1604
1605__apicdebuginit(int) print_all_ICs(void)
1606{
1607 print_PIC();
1608 print_all_local_APICs();
1609 print_IO_APIC();
1610
1611 return 0;
1612}
1613
1614fs_initcall(print_all_ICs);
1615
1616
1617static void __init enable_IO_APIC(void)
1618{
1619 union IO_APIC_reg_01 reg_01;
1620 int i8259_apic, i8259_pin;
1621 int i, apic;
1622 unsigned long flags;
1623
1624 for (i = 0; i < PIN_MAP_SIZE; i++) {
1625 irq_2_pin[i].pin = -1;
1626 irq_2_pin[i].next = 0;
1627 }
1628 if (!pirqs_enabled)
1629 for (i = 0; i < MAX_PIRQS; i++)
1630 pirq_entries[i] = -1;
1631
1632 /*
1633 * The number of IO-APIC IRQ registers (== #pins):
1634 */
1635 for (apic = 0; apic < nr_ioapics; apic++) {
1636 spin_lock_irqsave(&ioapic_lock, flags);
1637 reg_01.raw = io_apic_read(apic, 1);
1638 spin_unlock_irqrestore(&ioapic_lock, flags);
1639 nr_ioapic_registers[apic] = reg_01.bits.entries+1;
1640 }
1641 for (apic = 0; apic < nr_ioapics; apic++) {
1642 int pin;
1643 /* See if any of the pins is in ExtINT mode */
1644 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1645 struct IO_APIC_route_entry entry;
1646 entry = ioapic_read_entry(apic, pin);
1647
1648
1649 /* If the interrupt line is enabled and in ExtInt mode
1650 * I have found the pin where the i8259 is connected.
1651 */
1652 if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
1653 ioapic_i8259.apic = apic;
1654 ioapic_i8259.pin = pin;
1655 goto found_i8259;
1656 }
1657 }
1658 }
1659 found_i8259:
1660 /* Look to see what if the MP table has reported the ExtINT */
1661 /* If we could not find the appropriate pin by looking at the ioapic
1662 * the i8259 probably is not connected the ioapic but give the
1663 * mptable a chance anyway.
1664 */
1665 i8259_pin = find_isa_irq_pin(0, mp_ExtINT);
1666 i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
1667 /* Trust the MP table if nothing is setup in the hardware */
1668 if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
1669 printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
1670 ioapic_i8259.pin = i8259_pin;
1671 ioapic_i8259.apic = i8259_apic;
1672 }
1673 /* Complain if the MP table and the hardware disagree */
1674 if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
1675 (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
1676 {
1677 printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
1678 }
1679
1680 /*
1681 * Do not trust the IO-APIC being empty at bootup
1682 */
1683 clear_IO_APIC();
1684}
1685
1686/*
1687 * Not an __init, needed by the reboot code
1688 */
1689void disable_IO_APIC(void)
1690{
1691 /*
1692 * Clear the IO-APIC before rebooting:
1693 */
1694 clear_IO_APIC();
1695
1696 /*
1697 * If the i8259 is routed through an IOAPIC
1698 * Put that IOAPIC in virtual wire mode
1699 * so legacy interrupts can be delivered.
1700 */
1701 if (ioapic_i8259.pin != -1) {
1702 struct IO_APIC_route_entry entry;
1703
1704 memset(&entry, 0, sizeof(entry));
1705 entry.mask = 0; /* Enabled */
1706 entry.trigger = 0; /* Edge */
1707 entry.irr = 0;
1708 entry.polarity = 0; /* High */
1709 entry.delivery_status = 0;
1710 entry.dest_mode = 0; /* Physical */
1711 entry.delivery_mode = dest_ExtINT; /* ExtInt */
1712 entry.vector = 0;
1713 entry.dest.physical.physical_dest = read_apic_id();
1714
1715 /*
1716 * Add it to the IO-APIC irq-routing table:
1717 */
1718 ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
1719 }
1720 disconnect_bsp_APIC(ioapic_i8259.pin != -1);
1721}
1722
1723/*
1724 * function to set the IO-APIC physical IDs based on the
1725 * values stored in the MPC table.
1726 *
1727 * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
1728 */
1729
1730static void __init setup_ioapic_ids_from_mpc(void)
1731{
1732 union IO_APIC_reg_00 reg_00;
1733 physid_mask_t phys_id_present_map;
1734 int apic;
1735 int i;
1736 unsigned char old_id;
1737 unsigned long flags;
1738
1739 if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
1740 return;
1741
1742 /*
1743 * Don't check I/O APIC IDs for xAPIC systems. They have
1744 * no meaning without the serial APIC bus.
1745 */
1746 if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
1747 || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
1748 return;
1749 /*
1750 * This is broken; anything with a real cpu count has to
1751 * circumvent this idiocy regardless.
1752 */
1753 phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
1754
1755 /*
1756 * Set the IOAPIC ID to the value stored in the MPC table.
1757 */
1758 for (apic = 0; apic < nr_ioapics; apic++) {
1759
1760 /* Read the register 0 value */
1761 spin_lock_irqsave(&ioapic_lock, flags);
1762 reg_00.raw = io_apic_read(apic, 0);
1763 spin_unlock_irqrestore(&ioapic_lock, flags);
1764
1765 old_id = mp_ioapics[apic].mp_apicid;
1766
1767 if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
1768 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
1769 apic, mp_ioapics[apic].mp_apicid);
1770 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
1771 reg_00.bits.ID);
1772 mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
1773 }
1774
1775 /*
1776 * Sanity check, is the ID really free? Every APIC in a
1777 * system must have a unique ID or we get lots of nice
1778 * 'stuck on smp_invalidate_needed IPI wait' messages.
1779 */
1780 if (check_apicid_used(phys_id_present_map,
1781 mp_ioapics[apic].mp_apicid)) {
1782 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
1783 apic, mp_ioapics[apic].mp_apicid);
1784 for (i = 0; i < get_physical_broadcast(); i++)
1785 if (!physid_isset(i, phys_id_present_map))
1786 break;
1787 if (i >= get_physical_broadcast())
1788 panic("Max APIC ID exceeded!\n");
1789 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
1790 i);
1791 physid_set(i, phys_id_present_map);
1792 mp_ioapics[apic].mp_apicid = i;
1793 } else {
1794 physid_mask_t tmp;
1795 tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
1796 apic_printk(APIC_VERBOSE, "Setting %d in the "
1797 "phys_id_present_map\n",
1798 mp_ioapics[apic].mp_apicid);
1799 physids_or(phys_id_present_map, phys_id_present_map, tmp);
1800 }
1801
1802
1803 /*
1804 * We need to adjust the IRQ routing table
1805 * if the ID changed.
1806 */
1807 if (old_id != mp_ioapics[apic].mp_apicid)
1808 for (i = 0; i < mp_irq_entries; i++)
1809 if (mp_irqs[i].mp_dstapic == old_id)
1810 mp_irqs[i].mp_dstapic
1811 = mp_ioapics[apic].mp_apicid;
1812
1813 /*
1814 * Read the right value from the MPC table and
1815 * write it into the ID register.
1816 */
1817 apic_printk(APIC_VERBOSE, KERN_INFO
1818 "...changing IO-APIC physical APIC ID to %d ...",
1819 mp_ioapics[apic].mp_apicid);
1820
1821 reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
1822 spin_lock_irqsave(&ioapic_lock, flags);
1823 io_apic_write(apic, 0, reg_00.raw);
1824 spin_unlock_irqrestore(&ioapic_lock, flags);
1825
1826 /*
1827 * Sanity check
1828 */
1829 spin_lock_irqsave(&ioapic_lock, flags);
1830 reg_00.raw = io_apic_read(apic, 0);
1831 spin_unlock_irqrestore(&ioapic_lock, flags);
1832 if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
1833 printk("could not set ID!\n");
1834 else
1835 apic_printk(APIC_VERBOSE, " ok.\n");
1836 }
1837}
1838
1839int no_timer_check __initdata;
1840
1841static int __init notimercheck(char *s)
1842{
1843 no_timer_check = 1;
1844 return 1;
1845}
1846__setup("no_timer_check", notimercheck);
1847
1848/*
1849 * There is a nasty bug in some older SMP boards, their mptable lies
1850 * about the timer IRQ. We do the following to work around the situation:
1851 *
1852 * - timer IRQ defaults to IO-APIC IRQ
1853 * - if this function detects that timer IRQs are defunct, then we fall
1854 * back to ISA timer IRQs
1855 */
1856static int __init timer_irq_works(void)
1857{
1858 unsigned long t1 = jiffies;
1859 unsigned long flags;
1860
1861 if (no_timer_check)
1862 return 1;
1863
1864 local_save_flags(flags);
1865 local_irq_enable();
1866 /* Let ten ticks pass... */
1867 mdelay((10 * 1000) / HZ);
1868 local_irq_restore(flags);
1869
1870 /*
1871 * Expect a few ticks at least, to be sure some possible
1872 * glue logic does not lock up after one or two first
1873 * ticks in a non-ExtINT mode. Also the local APIC
1874 * might have cached one ExtINT interrupt. Finally, at
1875 * least one tick may be lost due to delays.
1876 */
1877 if (time_after(jiffies, t1 + 4))
1878 return 1;
1879
1880 return 0;
1881}
1882
1883/*
1884 * In the SMP+IOAPIC case it might happen that there are an unspecified
1885 * number of pending IRQ events unhandled. These cases are very rare,
1886 * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
1887 * better to do it this way as thus we do not have to be aware of
1888 * 'pending' interrupts in the IRQ path, except at this point.
1889 */
1890/*
1891 * Edge triggered needs to resend any interrupt
1892 * that was delayed but this is now handled in the device
1893 * independent code.
1894 */
1895
1896/*
1897 * Startup quirk:
1898 *
1899 * Starting up a edge-triggered IO-APIC interrupt is
1900 * nasty - we need to make sure that we get the edge.
1901 * If it is already asserted for some reason, we need
1902 * return 1 to indicate that is was pending.
1903 *
1904 * This is not complete - we should be able to fake
1905 * an edge even if it isn't on the 8259A...
1906 *
1907 * (We do this for level-triggered IRQs too - it cannot hurt.)
1908 */
1909static unsigned int startup_ioapic_irq(unsigned int irq)
1910{
1911 int was_pending = 0;
1912 unsigned long flags;
1913
1914 spin_lock_irqsave(&ioapic_lock, flags);
1915 if (irq < 16) {
1916 disable_8259A_irq(irq);
1917 if (i8259A_irq_pending(irq))
1918 was_pending = 1;
1919 }
1920 __unmask_IO_APIC_irq(irq);
1921 spin_unlock_irqrestore(&ioapic_lock, flags);
1922
1923 return was_pending;
1924}
1925
1926static void ack_ioapic_irq(unsigned int irq)
1927{
1928 move_native_irq(irq);
1929 ack_APIC_irq();
1930}
1931
1932static void ack_ioapic_quirk_irq(unsigned int irq)
1933{
1934 unsigned long v;
1935 int i;
1936
1937 move_native_irq(irq);
1938/*
1939 * It appears there is an erratum which affects at least version 0x11
1940 * of I/O APIC (that's the 82093AA and cores integrated into various
1941 * chipsets). Under certain conditions a level-triggered interrupt is
1942 * erroneously delivered as edge-triggered one but the respective IRR
1943 * bit gets set nevertheless. As a result the I/O unit expects an EOI
1944 * message but it will never arrive and further interrupts are blocked
1945 * from the source. The exact reason is so far unknown, but the
1946 * phenomenon was observed when two consecutive interrupt requests
1947 * from a given source get delivered to the same CPU and the source is
1948 * temporarily disabled in between.
1949 *
1950 * A workaround is to simulate an EOI message manually. We achieve it
1951 * by setting the trigger mode to edge and then to level when the edge
1952 * trigger mode gets detected in the TMR of a local APIC for a
1953 * level-triggered interrupt. We mask the source for the time of the
1954 * operation to prevent an edge-triggered interrupt escaping meanwhile.
1955 * The idea is from Manfred Spraul. --macro
1956 */
1957 i = irq_vector[irq];
1958
1959 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
1960
1961 ack_APIC_irq();
1962
1963 if (!(v & (1 << (i & 0x1f)))) {
1964 atomic_inc(&irq_mis_count);
1965 spin_lock(&ioapic_lock);
1966 __mask_and_edge_IO_APIC_irq(irq);
1967 __unmask_and_level_IO_APIC_irq(irq);
1968 spin_unlock(&ioapic_lock);
1969 }
1970}
1971
1972static int ioapic_retrigger_irq(unsigned int irq)
1973{
1974 send_IPI_self(irq_vector[irq]);
1975
1976 return 1;
1977}
1978
1979static struct irq_chip ioapic_chip __read_mostly = {
1980 .name = "IO-APIC",
1981 .startup = startup_ioapic_irq,
1982 .mask = mask_IO_APIC_irq,
1983 .unmask = unmask_IO_APIC_irq,
1984 .ack = ack_ioapic_irq,
1985 .eoi = ack_ioapic_quirk_irq,
1986#ifdef CONFIG_SMP
1987 .set_affinity = set_ioapic_affinity_irq,
1988#endif
1989 .retrigger = ioapic_retrigger_irq,
1990};
1991
1992
1993static inline void init_IO_APIC_traps(void)
1994{
1995 int irq;
1996
1997 /*
1998 * NOTE! The local APIC isn't very good at handling
1999 * multiple interrupts at the same interrupt level.
2000 * As the interrupt level is determined by taking the
2001 * vector number and shifting that right by 4, we
2002 * want to spread these out a bit so that they don't
2003 * all fall in the same interrupt level.
2004 *
2005 * Also, we've got to be careful not to trash gate
2006 * 0x80, because int 0x80 is hm, kind of importantish. ;)
2007 */
2008 for (irq = 0; irq < NR_IRQS ; irq++) {
2009 if (IO_APIC_IRQ(irq) && !irq_vector[irq]) {
2010 /*
2011 * Hmm.. We don't have an entry for this,
2012 * so default to an old-fashioned 8259
2013 * interrupt if we can..
2014 */
2015 if (irq < 16)
2016 make_8259A_irq(irq);
2017 else
2018 /* Strange. Oh, well.. */
2019 irq_desc[irq].chip = &no_irq_chip;
2020 }
2021 }
2022}
2023
2024/*
2025 * The local APIC irq-chip implementation:
2026 */
2027
2028static void ack_lapic_irq(unsigned int irq)
2029{
2030 ack_APIC_irq();
2031}
2032
2033static void mask_lapic_irq(unsigned int irq)
2034{
2035 unsigned long v;
2036
2037 v = apic_read(APIC_LVT0);
2038 apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
2039}
2040
2041static void unmask_lapic_irq(unsigned int irq)
2042{
2043 unsigned long v;
2044
2045 v = apic_read(APIC_LVT0);
2046 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
2047}
2048
2049static struct irq_chip lapic_chip __read_mostly = {
2050 .name = "local-APIC",
2051 .mask = mask_lapic_irq,
2052 .unmask = unmask_lapic_irq,
2053 .ack = ack_lapic_irq,
2054};
2055
2056static void lapic_register_intr(int irq, int vector)
2057{
2058 irq_desc[irq].status &= ~IRQ_LEVEL;
2059 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
2060 "edge");
2061 set_intr_gate(vector, interrupt[irq]);
2062}
2063
2064static void __init setup_nmi(void)
2065{
2066 /*
2067 * Dirty trick to enable the NMI watchdog ...
2068 * We put the 8259A master into AEOI mode and
2069 * unmask on all local APICs LVT0 as NMI.
2070 *
2071 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
2072 * is from Maciej W. Rozycki - so we do not have to EOI from
2073 * the NMI handler or the timer interrupt.
2074 */
2075 apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
2076
2077 enable_NMI_through_LVT0();
2078
2079 apic_printk(APIC_VERBOSE, " done.\n");
2080}
2081
2082/*
2083 * This looks a bit hackish but it's about the only one way of sending
2084 * a few INTA cycles to 8259As and any associated glue logic. ICR does
2085 * not support the ExtINT mode, unfortunately. We need to send these
2086 * cycles as some i82489DX-based boards have glue logic that keeps the
2087 * 8259A interrupt line asserted until INTA. --macro
2088 */
2089static inline void __init unlock_ExtINT_logic(void)
2090{
2091 int apic, pin, i;
2092 struct IO_APIC_route_entry entry0, entry1;
2093 unsigned char save_control, save_freq_select;
2094
2095 pin = find_isa_irq_pin(8, mp_INT);
2096 if (pin == -1) {
2097 WARN_ON_ONCE(1);
2098 return;
2099 }
2100 apic = find_isa_irq_apic(8, mp_INT);
2101 if (apic == -1) {
2102 WARN_ON_ONCE(1);
2103 return;
2104 }
2105
2106 entry0 = ioapic_read_entry(apic, pin);
2107 clear_IO_APIC_pin(apic, pin);
2108
2109 memset(&entry1, 0, sizeof(entry1));
2110
2111 entry1.dest_mode = 0; /* physical delivery */
2112 entry1.mask = 0; /* unmask IRQ now */
2113 entry1.dest.physical.physical_dest = hard_smp_processor_id();
2114 entry1.delivery_mode = dest_ExtINT;
2115 entry1.polarity = entry0.polarity;
2116 entry1.trigger = 0;
2117 entry1.vector = 0;
2118
2119 ioapic_write_entry(apic, pin, entry1);
2120
2121 save_control = CMOS_READ(RTC_CONTROL);
2122 save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
2123 CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
2124 RTC_FREQ_SELECT);
2125 CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
2126
2127 i = 100;
2128 while (i-- > 0) {
2129 mdelay(10);
2130 if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
2131 i -= 10;
2132 }
2133
2134 CMOS_WRITE(save_control, RTC_CONTROL);
2135 CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
2136 clear_IO_APIC_pin(apic, pin);
2137
2138 ioapic_write_entry(apic, pin, entry0);
2139}
2140
2141/*
2142 * This code may look a bit paranoid, but it's supposed to cooperate with
2143 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
2144 * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
2145 * fanatically on his truly buggy board.
2146 */
2147static inline void __init check_timer(void)
2148{
2149 int apic1, pin1, apic2, pin2;
2150 int no_pin1 = 0;
2151 int vector;
2152 unsigned int ver;
2153 unsigned long flags;
2154
2155 local_irq_save(flags);
2156
2157 ver = apic_read(APIC_LVR);
2158 ver = GET_APIC_VERSION(ver);
2159
2160 /*
2161 * get/set the timer IRQ vector:
2162 */
2163 disable_8259A_irq(0);
2164 vector = assign_irq_vector(0);
2165 set_intr_gate(vector, interrupt[0]);
2166
2167 /*
2168 * As IRQ0 is to be enabled in the 8259A, the virtual
2169 * wire has to be disabled in the local APIC. Also
2170 * timer interrupts need to be acknowledged manually in
2171 * the 8259A for the i82489DX when using the NMI
2172 * watchdog as that APIC treats NMIs as level-triggered.
2173 * The AEOI mode will finish them in the 8259A
2174 * automatically.
2175 */
2176 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2177 init_8259A(1);
2178 timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
2179
2180 pin1 = find_isa_irq_pin(0, mp_INT);
2181 apic1 = find_isa_irq_apic(0, mp_INT);
2182 pin2 = ioapic_i8259.pin;
2183 apic2 = ioapic_i8259.apic;
2184
2185 apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
2186 "apic1=%d pin1=%d apic2=%d pin2=%d\n",
2187 vector, apic1, pin1, apic2, pin2);
2188
2189 /*
2190 * Some BIOS writers are clueless and report the ExtINTA
2191 * I/O APIC input from the cascaded 8259A as the timer
2192 * interrupt input. So just in case, if only one pin
2193 * was found above, try it both directly and through the
2194 * 8259A.
2195 */
2196 if (pin1 == -1) {
2197 pin1 = pin2;
2198 apic1 = apic2;
2199 no_pin1 = 1;
2200 } else if (pin2 == -1) {
2201 pin2 = pin1;
2202 apic2 = apic1;
2203 }
2204
2205 if (pin1 != -1) {
2206 /*
2207 * Ok, does IRQ0 through the IOAPIC work?
2208 */
2209 if (no_pin1) {
2210 add_pin_to_irq(0, apic1, pin1);
2211 setup_timer_IRQ0_pin(apic1, pin1, vector);
2212 }
2213 unmask_IO_APIC_irq(0);
2214 if (timer_irq_works()) {
2215 if (nmi_watchdog == NMI_IO_APIC) {
2216 setup_nmi();
2217 enable_8259A_irq(0);
2218 }
2219 if (disable_timer_pin_1 > 0)
2220 clear_IO_APIC_pin(0, pin1);
2221 goto out;
2222 }
2223 clear_IO_APIC_pin(apic1, pin1);
2224 if (!no_pin1)
2225 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
2226 "8254 timer not connected to IO-APIC\n");
2227
2228 apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
2229 "(IRQ0) through the 8259A ...\n");
2230 apic_printk(APIC_QUIET, KERN_INFO
2231 "..... (found apic %d pin %d) ...\n", apic2, pin2);
2232 /*
2233 * legacy devices should be connected to IO APIC #0
2234 */
2235 replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
2236 setup_timer_IRQ0_pin(apic2, pin2, vector);
2237 unmask_IO_APIC_irq(0);
2238 enable_8259A_irq(0);
2239 if (timer_irq_works()) {
2240 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
2241 timer_through_8259 = 1;
2242 if (nmi_watchdog == NMI_IO_APIC) {
2243 disable_8259A_irq(0);
2244 setup_nmi();
2245 enable_8259A_irq(0);
2246 }
2247 goto out;
2248 }
2249 /*
2250 * Cleanup, just in case ...
2251 */
2252 disable_8259A_irq(0);
2253 clear_IO_APIC_pin(apic2, pin2);
2254 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
2255 }
2256
2257 if (nmi_watchdog == NMI_IO_APIC) {
2258 apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
2259 "through the IO-APIC - disabling NMI Watchdog!\n");
2260 nmi_watchdog = NMI_NONE;
2261 }
2262 timer_ack = 0;
2263
2264 apic_printk(APIC_QUIET, KERN_INFO
2265 "...trying to set up timer as Virtual Wire IRQ...\n");
2266
2267 lapic_register_intr(0, vector);
2268 apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
2269 enable_8259A_irq(0);
2270
2271 if (timer_irq_works()) {
2272 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
2273 goto out;
2274 }
2275 disable_8259A_irq(0);
2276 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
2277 apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
2278
2279 apic_printk(APIC_QUIET, KERN_INFO
2280 "...trying to set up timer as ExtINT IRQ...\n");
2281
2282 init_8259A(0);
2283 make_8259A_irq(0);
2284 apic_write(APIC_LVT0, APIC_DM_EXTINT);
2285
2286 unlock_ExtINT_logic();
2287
2288 if (timer_irq_works()) {
2289 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
2290 goto out;
2291 }
2292 apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
2293 panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
2294 "report. Then try booting with the 'noapic' option.\n");
2295out:
2296 local_irq_restore(flags);
2297}
2298
2299/*
2300 * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
2301 * to devices. However there may be an I/O APIC pin available for
2302 * this interrupt regardless. The pin may be left unconnected, but
2303 * typically it will be reused as an ExtINT cascade interrupt for
2304 * the master 8259A. In the MPS case such a pin will normally be
2305 * reported as an ExtINT interrupt in the MP table. With ACPI
2306 * there is no provision for ExtINT interrupts, and in the absence
2307 * of an override it would be treated as an ordinary ISA I/O APIC
2308 * interrupt, that is edge-triggered and unmasked by default. We
2309 * used to do this, but it caused problems on some systems because
2310 * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
2311 * the same ExtINT cascade interrupt to drive the local APIC of the
2312 * bootstrap processor. Therefore we refrain from routing IRQ2 to
2313 * the I/O APIC in all cases now. No actual device should request
2314 * it anyway. --macro
2315 */
2316#define PIC_IRQS (1 << PIC_CASCADE_IR)
2317
2318void __init setup_IO_APIC(void)
2319{
2320 int i;
2321
2322 /* Reserve all the system vectors. */
2323 for (i = first_system_vector; i < NR_VECTORS; i++)
2324 set_bit(i, used_vectors);
2325
2326 enable_IO_APIC();
2327
2328 io_apic_irqs = ~PIC_IRQS;
2329
2330 printk("ENABLING IO-APIC IRQs\n");
2331
2332 /*
2333 * Set up IO-APIC IRQ routing.
2334 */
2335 if (!acpi_ioapic)
2336 setup_ioapic_ids_from_mpc();
2337 sync_Arb_IDs();
2338 setup_IO_APIC_irqs();
2339 init_IO_APIC_traps();
2340 check_timer();
2341}
2342
2343/*
2344 * Called after all the initialization is done. If we didnt find any
2345 * APIC bugs then we can allow the modify fast path
2346 */
2347
2348static int __init io_apic_bug_finalize(void)
2349{
2350 if (sis_apic_bug == -1)
2351 sis_apic_bug = 0;
2352 return 0;
2353}
2354
2355late_initcall(io_apic_bug_finalize);
2356
2357struct sysfs_ioapic_data {
2358 struct sys_device dev;
2359 struct IO_APIC_route_entry entry[0];
2360};
2361static struct sysfs_ioapic_data *mp_ioapic_data[MAX_IO_APICS];
2362
2363static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
2364{
2365 struct IO_APIC_route_entry *entry;
2366 struct sysfs_ioapic_data *data;
2367 int i;
2368
2369 data = container_of(dev, struct sysfs_ioapic_data, dev);
2370 entry = data->entry;
2371 for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
2372 entry[i] = ioapic_read_entry(dev->id, i);
2373
2374 return 0;
2375}
2376
2377static int ioapic_resume(struct sys_device *dev)
2378{
2379 struct IO_APIC_route_entry *entry;
2380 struct sysfs_ioapic_data *data;
2381 unsigned long flags;
2382 union IO_APIC_reg_00 reg_00;
2383 int i;
2384
2385 data = container_of(dev, struct sysfs_ioapic_data, dev);
2386 entry = data->entry;
2387
2388 spin_lock_irqsave(&ioapic_lock, flags);
2389 reg_00.raw = io_apic_read(dev->id, 0);
2390 if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
2391 reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
2392 io_apic_write(dev->id, 0, reg_00.raw);
2393 }
2394 spin_unlock_irqrestore(&ioapic_lock, flags);
2395 for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
2396 ioapic_write_entry(dev->id, i, entry[i]);
2397
2398 return 0;
2399}
2400
2401static struct sysdev_class ioapic_sysdev_class = {
2402 .name = "ioapic",
2403 .suspend = ioapic_suspend,
2404 .resume = ioapic_resume,
2405};
2406
2407static int __init ioapic_init_sysfs(void)
2408{
2409 struct sys_device *dev;
2410 int i, size, error = 0;
2411
2412 error = sysdev_class_register(&ioapic_sysdev_class);
2413 if (error)
2414 return error;
2415
2416 for (i = 0; i < nr_ioapics; i++) {
2417 size = sizeof(struct sys_device) + nr_ioapic_registers[i]
2418 * sizeof(struct IO_APIC_route_entry);
2419 mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
2420 if (!mp_ioapic_data[i]) {
2421 printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
2422 continue;
2423 }
2424 dev = &mp_ioapic_data[i]->dev;
2425 dev->id = i;
2426 dev->cls = &ioapic_sysdev_class;
2427 error = sysdev_register(dev);
2428 if (error) {
2429 kfree(mp_ioapic_data[i]);
2430 mp_ioapic_data[i] = NULL;
2431 printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
2432 continue;
2433 }
2434 }
2435
2436 return 0;
2437}
2438
2439device_initcall(ioapic_init_sysfs);
2440
2441/*
2442 * Dynamic irq allocate and deallocation
2443 */
2444int create_irq(void)
2445{
2446 /* Allocate an unused irq */
2447 int irq, new, vector = 0;
2448 unsigned long flags;
2449
2450 irq = -ENOSPC;
2451 spin_lock_irqsave(&vector_lock, flags);
2452 for (new = (NR_IRQS - 1); new >= 0; new--) {
2453 if (platform_legacy_irq(new))
2454 continue;
2455 if (irq_vector[new] != 0)
2456 continue;
2457 vector = __assign_irq_vector(new);
2458 if (likely(vector > 0))
2459 irq = new;
2460 break;
2461 }
2462 spin_unlock_irqrestore(&vector_lock, flags);
2463
2464 if (irq >= 0) {
2465 set_intr_gate(vector, interrupt[irq]);
2466 dynamic_irq_init(irq);
2467 }
2468 return irq;
2469}
2470
2471void destroy_irq(unsigned int irq)
2472{
2473 unsigned long flags;
2474
2475 dynamic_irq_cleanup(irq);
2476
2477 spin_lock_irqsave(&vector_lock, flags);
2478 clear_bit(irq_vector[irq], used_vectors);
2479 irq_vector[irq] = 0;
2480 spin_unlock_irqrestore(&vector_lock, flags);
2481}
2482
2483/*
2484 * MSI message composition
2485 */
2486#ifdef CONFIG_PCI_MSI
2487static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
2488{
2489 int vector;
2490 unsigned dest;
2491
2492 vector = assign_irq_vector(irq);
2493 if (vector >= 0) {
2494 dest = cpu_mask_to_apicid(TARGET_CPUS);
2495
2496 msg->address_hi = MSI_ADDR_BASE_HI;
2497 msg->address_lo =
2498 MSI_ADDR_BASE_LO |
2499 ((INT_DEST_MODE == 0) ?
2500MSI_ADDR_DEST_MODE_PHYSICAL:
2501 MSI_ADDR_DEST_MODE_LOGICAL) |
2502 ((INT_DELIVERY_MODE != dest_LowestPrio) ?
2503 MSI_ADDR_REDIRECTION_CPU:
2504 MSI_ADDR_REDIRECTION_LOWPRI) |
2505 MSI_ADDR_DEST_ID(dest);
2506
2507 msg->data =
2508 MSI_DATA_TRIGGER_EDGE |
2509 MSI_DATA_LEVEL_ASSERT |
2510 ((INT_DELIVERY_MODE != dest_LowestPrio) ?
2511MSI_DATA_DELIVERY_FIXED:
2512 MSI_DATA_DELIVERY_LOWPRI) |
2513 MSI_DATA_VECTOR(vector);
2514 }
2515 return vector;
2516}
2517
2518#ifdef CONFIG_SMP
2519static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2520{
2521 struct msi_msg msg;
2522 unsigned int dest;
2523 cpumask_t tmp;
2524 int vector;
2525
2526 cpus_and(tmp, mask, cpu_online_map);
2527 if (cpus_empty(tmp))
2528 tmp = TARGET_CPUS;
2529
2530 vector = assign_irq_vector(irq);
2531 if (vector < 0)
2532 return;
2533
2534 dest = cpu_mask_to_apicid(mask);
2535
2536 read_msi_msg(irq, &msg);
2537
2538 msg.data &= ~MSI_DATA_VECTOR_MASK;
2539 msg.data |= MSI_DATA_VECTOR(vector);
2540 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
2541 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
2542
2543 write_msi_msg(irq, &msg);
2544 irq_desc[irq].affinity = mask;
2545}
2546#endif /* CONFIG_SMP */
2547
2548/*
2549 * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
2550 * which implement the MSI or MSI-X Capability Structure.
2551 */
2552static struct irq_chip msi_chip = {
2553 .name = "PCI-MSI",
2554 .unmask = unmask_msi_irq,
2555 .mask = mask_msi_irq,
2556 .ack = ack_ioapic_irq,
2557#ifdef CONFIG_SMP
2558 .set_affinity = set_msi_irq_affinity,
2559#endif
2560 .retrigger = ioapic_retrigger_irq,
2561};
2562
2563int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
2564{
2565 struct msi_msg msg;
2566 int irq, ret;
2567 irq = create_irq();
2568 if (irq < 0)
2569 return irq;
2570
2571 ret = msi_compose_msg(dev, irq, &msg);
2572 if (ret < 0) {
2573 destroy_irq(irq);
2574 return ret;
2575 }
2576
2577 set_irq_msi(irq, desc);
2578 write_msi_msg(irq, &msg);
2579
2580 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
2581 "edge");
2582
2583 return 0;
2584}
2585
2586void arch_teardown_msi_irq(unsigned int irq)
2587{
2588 destroy_irq(irq);
2589}
2590
2591#endif /* CONFIG_PCI_MSI */
2592
2593/*
2594 * Hypertransport interrupt support
2595 */
2596#ifdef CONFIG_HT_IRQ
2597
2598#ifdef CONFIG_SMP
2599
2600static void target_ht_irq(unsigned int irq, unsigned int dest)
2601{
2602 struct ht_irq_msg msg;
2603 fetch_ht_irq_msg(irq, &msg);
2604
2605 msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK);
2606 msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
2607
2608 msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest);
2609 msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
2610
2611 write_ht_irq_msg(irq, &msg);
2612}
2613
2614static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
2615{
2616 unsigned int dest;
2617 cpumask_t tmp;
2618
2619 cpus_and(tmp, mask, cpu_online_map);
2620 if (cpus_empty(tmp))
2621 tmp = TARGET_CPUS;
2622
2623 cpus_and(mask, tmp, CPU_MASK_ALL);
2624
2625 dest = cpu_mask_to_apicid(mask);
2626
2627 target_ht_irq(irq, dest);
2628 irq_desc[irq].affinity = mask;
2629}
2630#endif
2631
2632static struct irq_chip ht_irq_chip = {
2633 .name = "PCI-HT",
2634 .mask = mask_ht_irq,
2635 .unmask = unmask_ht_irq,
2636 .ack = ack_ioapic_irq,
2637#ifdef CONFIG_SMP
2638 .set_affinity = set_ht_irq_affinity,
2639#endif
2640 .retrigger = ioapic_retrigger_irq,
2641};
2642
2643int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
2644{
2645 int vector;
2646
2647 vector = assign_irq_vector(irq);
2648 if (vector >= 0) {
2649 struct ht_irq_msg msg;
2650 unsigned dest;
2651 cpumask_t tmp;
2652
2653 cpus_clear(tmp);
2654 cpu_set(vector >> 8, tmp);
2655 dest = cpu_mask_to_apicid(tmp);
2656
2657 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
2658
2659 msg.address_lo =
2660 HT_IRQ_LOW_BASE |
2661 HT_IRQ_LOW_DEST_ID(dest) |
2662 HT_IRQ_LOW_VECTOR(vector) |
2663 ((INT_DEST_MODE == 0) ?
2664 HT_IRQ_LOW_DM_PHYSICAL :
2665 HT_IRQ_LOW_DM_LOGICAL) |
2666 HT_IRQ_LOW_RQEOI_EDGE |
2667 ((INT_DELIVERY_MODE != dest_LowestPrio) ?
2668 HT_IRQ_LOW_MT_FIXED :
2669 HT_IRQ_LOW_MT_ARBITRATED) |
2670 HT_IRQ_LOW_IRQ_MASKED;
2671
2672 write_ht_irq_msg(irq, &msg);
2673
2674 set_irq_chip_and_handler_name(irq, &ht_irq_chip,
2675 handle_edge_irq, "edge");
2676 }
2677 return vector;
2678}
2679#endif /* CONFIG_HT_IRQ */
2680
2681/* --------------------------------------------------------------------------
2682 ACPI-based IOAPIC Configuration
2683 -------------------------------------------------------------------------- */
2684
2685#ifdef CONFIG_ACPI
2686
2687int __init io_apic_get_unique_id(int ioapic, int apic_id)
2688{
2689 union IO_APIC_reg_00 reg_00;
2690 static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
2691 physid_mask_t tmp;
2692 unsigned long flags;
2693 int i = 0;
2694
2695 /*
2696 * The P4 platform supports up to 256 APIC IDs on two separate APIC
2697 * buses (one for LAPICs, one for IOAPICs), where predecessors only
2698 * supports up to 16 on one shared APIC bus.
2699 *
2700 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
2701 * advantage of new APIC bus architecture.
2702 */
2703
2704 if (physids_empty(apic_id_map))
2705 apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
2706
2707 spin_lock_irqsave(&ioapic_lock, flags);
2708 reg_00.raw = io_apic_read(ioapic, 0);
2709 spin_unlock_irqrestore(&ioapic_lock, flags);
2710
2711 if (apic_id >= get_physical_broadcast()) {
2712 printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
2713 "%d\n", ioapic, apic_id, reg_00.bits.ID);
2714 apic_id = reg_00.bits.ID;
2715 }
2716
2717 /*
2718 * Every APIC in a system must have a unique ID or we get lots of nice
2719 * 'stuck on smp_invalidate_needed IPI wait' messages.
2720 */
2721 if (check_apicid_used(apic_id_map, apic_id)) {
2722
2723 for (i = 0; i < get_physical_broadcast(); i++) {
2724 if (!check_apicid_used(apic_id_map, i))
2725 break;
2726 }
2727
2728 if (i == get_physical_broadcast())
2729 panic("Max apic_id exceeded!\n");
2730
2731 printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
2732 "trying %d\n", ioapic, apic_id, i);
2733
2734 apic_id = i;
2735 }
2736
2737 tmp = apicid_to_cpu_present(apic_id);
2738 physids_or(apic_id_map, apic_id_map, tmp);
2739
2740 if (reg_00.bits.ID != apic_id) {
2741 reg_00.bits.ID = apic_id;
2742
2743 spin_lock_irqsave(&ioapic_lock, flags);
2744 io_apic_write(ioapic, 0, reg_00.raw);
2745 reg_00.raw = io_apic_read(ioapic, 0);
2746 spin_unlock_irqrestore(&ioapic_lock, flags);
2747
2748 /* Sanity check */
2749 if (reg_00.bits.ID != apic_id) {
2750 printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
2751 return -1;
2752 }
2753 }
2754
2755 apic_printk(APIC_VERBOSE, KERN_INFO
2756 "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
2757
2758 return apic_id;
2759}
2760
2761
2762int __init io_apic_get_version(int ioapic)
2763{
2764 union IO_APIC_reg_01 reg_01;
2765 unsigned long flags;
2766
2767 spin_lock_irqsave(&ioapic_lock, flags);
2768 reg_01.raw = io_apic_read(ioapic, 1);
2769 spin_unlock_irqrestore(&ioapic_lock, flags);
2770
2771 return reg_01.bits.version;
2772}
2773
2774
2775int __init io_apic_get_redir_entries(int ioapic)
2776{
2777 union IO_APIC_reg_01 reg_01;
2778 unsigned long flags;
2779
2780 spin_lock_irqsave(&ioapic_lock, flags);
2781 reg_01.raw = io_apic_read(ioapic, 1);
2782 spin_unlock_irqrestore(&ioapic_lock, flags);
2783
2784 return reg_01.bits.entries;
2785}
2786
2787
2788int io_apic_set_pci_routing(int ioapic, int pin, int irq, int edge_level, int active_high_low)
2789{
2790 struct IO_APIC_route_entry entry;
2791
2792 if (!IO_APIC_IRQ(irq)) {
2793 printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
2794 ioapic);
2795 return -EINVAL;
2796 }
2797
2798 /*
2799 * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
2800 * Note that we mask (disable) IRQs now -- these get enabled when the
2801 * corresponding device driver registers for this IRQ.
2802 */
2803
2804 memset(&entry, 0, sizeof(entry));
2805
2806 entry.delivery_mode = INT_DELIVERY_MODE;
2807 entry.dest_mode = INT_DEST_MODE;
2808 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
2809 entry.trigger = edge_level;
2810 entry.polarity = active_high_low;
2811 entry.mask = 1;
2812
2813 /*
2814 * IRQs < 16 are already in the irq_2_pin[] map
2815 */
2816 if (irq >= 16)
2817 add_pin_to_irq(irq, ioapic, pin);
2818
2819 entry.vector = assign_irq_vector(irq);
2820
2821 apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
2822 "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
2823 mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq,
2824 edge_level, active_high_low);
2825
2826 ioapic_register_intr(irq, entry.vector, edge_level);
2827
2828 if (!ioapic && (irq < 16))
2829 disable_8259A_irq(irq);
2830
2831 ioapic_write_entry(ioapic, pin, entry);
2832
2833 return 0;
2834}
2835
2836int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
2837{
2838 int i;
2839
2840 if (skip_ioapic_setup)
2841 return -1;
2842
2843 for (i = 0; i < mp_irq_entries; i++)
2844 if (mp_irqs[i].mp_irqtype == mp_INT &&
2845 mp_irqs[i].mp_srcbusirq == bus_irq)
2846 break;
2847 if (i >= mp_irq_entries)
2848 return -1;
2849
2850 *trigger = irq_trigger(i);
2851 *polarity = irq_polarity(i);
2852 return 0;
2853}
2854
2855#endif /* CONFIG_ACPI */
2856
2857static int __init parse_disable_timer_pin_1(char *arg)
2858{
2859 disable_timer_pin_1 = 1;
2860 return 0;
2861}
2862early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
2863
2864static int __init parse_enable_timer_pin_1(char *arg)
2865{
2866 disable_timer_pin_1 = -1;
2867 return 0;
2868}
2869early_param("enable_timer_pin_1", parse_enable_timer_pin_1);
2870
2871static int __init parse_noapic(char *arg)
2872{
2873 /* disable IO-APIC */
2874 disable_ioapic_setup();
2875 return 0;
2876}
2877early_param("noapic", parse_noapic);
2878
2879void __init ioapic_init_mappings(void)
2880{
2881 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
2882 int i;
2883
2884 for (i = 0; i < nr_ioapics; i++) {
2885 if (smp_found_config) {
2886 ioapic_phys = mp_ioapics[i].mp_apicaddr;
2887 if (!ioapic_phys) {
2888 printk(KERN_ERR
2889 "WARNING: bogus zero IO-APIC "
2890 "address found in MPTABLE, "
2891 "disabling IO/APIC support!\n");
2892 smp_found_config = 0;
2893 skip_ioapic_setup = 1;
2894 goto fake_ioapic_page;
2895 }
2896 } else {
2897fake_ioapic_page:
2898 ioapic_phys = (unsigned long)
2899 alloc_bootmem_pages(PAGE_SIZE);
2900 ioapic_phys = __pa(ioapic_phys);
2901 }
2902 set_fixmap_nocache(idx, ioapic_phys);
2903 printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n",
2904 __fix_to_virt(idx), ioapic_phys);
2905 idx++;
2906 }
2907}
2908
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
new file mode 100644
index 000000000000..ccf6c503fc3b
--- /dev/null
+++ b/arch/x86/kernel/irq.c
@@ -0,0 +1,189 @@
1/*
2 * Common interrupt code for 32 and 64 bit
3 */
4#include <linux/cpu.h>
5#include <linux/interrupt.h>
6#include <linux/kernel_stat.h>
7#include <linux/seq_file.h>
8
9#include <asm/apic.h>
10#include <asm/io_apic.h>
11#include <asm/smp.h>
12
13atomic_t irq_err_count;
14
15/*
16 * 'what should we do if we get a hw irq event on an illegal vector'.
17 * each architecture has to answer this themselves.
18 */
19void ack_bad_irq(unsigned int irq)
20{
21 printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
22
23#ifdef CONFIG_X86_LOCAL_APIC
24 /*
25 * Currently unexpected vectors happen only on SMP and APIC.
26 * We _must_ ack these because every local APIC has only N
27 * irq slots per priority level, and a 'hanging, unacked' IRQ
28 * holds up an irq slot - in excessive cases (when multiple
29 * unexpected vectors occur) that might lock up the APIC
30 * completely.
31 * But only ack when the APIC is enabled -AK
32 */
33 if (cpu_has_apic)
34 ack_APIC_irq();
35#endif
36}
37
38#ifdef CONFIG_X86_32
39# define irq_stats(x) (&per_cpu(irq_stat,x))
40#else
41# define irq_stats(x) cpu_pda(x)
42#endif
43/*
44 * /proc/interrupts printing:
45 */
46static int show_other_interrupts(struct seq_file *p)
47{
48 int j;
49
50 seq_printf(p, "NMI: ");
51 for_each_online_cpu(j)
52 seq_printf(p, "%10u ", irq_stats(j)->__nmi_count);
53 seq_printf(p, " Non-maskable interrupts\n");
54#ifdef CONFIG_X86_LOCAL_APIC
55 seq_printf(p, "LOC: ");
56 for_each_online_cpu(j)
57 seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
58 seq_printf(p, " Local timer interrupts\n");
59#endif
60#ifdef CONFIG_SMP
61 seq_printf(p, "RES: ");
62 for_each_online_cpu(j)
63 seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count);
64 seq_printf(p, " Rescheduling interrupts\n");
65 seq_printf(p, "CAL: ");
66 for_each_online_cpu(j)
67 seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
68 seq_printf(p, " Function call interrupts\n");
69 seq_printf(p, "TLB: ");
70 for_each_online_cpu(j)
71 seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
72 seq_printf(p, " TLB shootdowns\n");
73#endif
74#ifdef CONFIG_X86_MCE
75 seq_printf(p, "TRM: ");
76 for_each_online_cpu(j)
77 seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
78 seq_printf(p, " Thermal event interrupts\n");
79# ifdef CONFIG_X86_64
80 seq_printf(p, "THR: ");
81 for_each_online_cpu(j)
82 seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
83 seq_printf(p, " Threshold APIC interrupts\n");
84# endif
85#endif
86#ifdef CONFIG_X86_LOCAL_APIC
87 seq_printf(p, "SPU: ");
88 for_each_online_cpu(j)
89 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
90 seq_printf(p, " Spurious interrupts\n");
91#endif
92 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
93#if defined(CONFIG_X86_IO_APIC)
94 seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
95#endif
96 return 0;
97}
98
99int show_interrupts(struct seq_file *p, void *v)
100{
101 unsigned long flags, any_count = 0;
102 int i = *(loff_t *) v, j;
103 struct irqaction *action;
104 struct irq_desc *desc;
105
106 if (i > nr_irqs)
107 return 0;
108
109 if (i == nr_irqs)
110 return show_other_interrupts(p);
111
112 /* print header */
113 if (i == 0) {
114 seq_printf(p, " ");
115 for_each_online_cpu(j)
116 seq_printf(p, "CPU%-8d",j);
117 seq_putc(p, '\n');
118 }
119
120 desc = irq_to_desc(i);
121 spin_lock_irqsave(&desc->lock, flags);
122#ifndef CONFIG_SMP
123 any_count = kstat_irqs(i);
124#else
125 for_each_online_cpu(j)
126 any_count |= kstat_irqs_cpu(i, j);
127#endif
128 action = desc->action;
129 if (!action && !any_count)
130 goto out;
131
132 seq_printf(p, "%3d: ", i);
133#ifndef CONFIG_SMP
134 seq_printf(p, "%10u ", kstat_irqs(i));
135#else
136 for_each_online_cpu(j)
137 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
138#endif
139 seq_printf(p, " %8s", desc->chip->name);
140 seq_printf(p, "-%-8s", desc->name);
141
142 if (action) {
143 seq_printf(p, " %s", action->name);
144 while ((action = action->next) != NULL)
145 seq_printf(p, ", %s", action->name);
146 }
147
148 seq_putc(p, '\n');
149out:
150 spin_unlock_irqrestore(&desc->lock, flags);
151 return 0;
152}
153
154/*
155 * /proc/stat helpers
156 */
157u64 arch_irq_stat_cpu(unsigned int cpu)
158{
159 u64 sum = irq_stats(cpu)->__nmi_count;
160
161#ifdef CONFIG_X86_LOCAL_APIC
162 sum += irq_stats(cpu)->apic_timer_irqs;
163#endif
164#ifdef CONFIG_SMP
165 sum += irq_stats(cpu)->irq_resched_count;
166 sum += irq_stats(cpu)->irq_call_count;
167 sum += irq_stats(cpu)->irq_tlb_count;
168#endif
169#ifdef CONFIG_X86_MCE
170 sum += irq_stats(cpu)->irq_thermal_count;
171# ifdef CONFIG_X86_64
172 sum += irq_stats(cpu)->irq_threshold_count;
173#endif
174#endif
175#ifdef CONFIG_X86_LOCAL_APIC
176 sum += irq_stats(cpu)->irq_spurious_count;
177#endif
178 return sum;
179}
180
181u64 arch_irq_stat(void)
182{
183 u64 sum = atomic_read(&irq_err_count);
184
185#ifdef CONFIG_X86_IO_APIC
186 sum += atomic_read(&irq_mis_count);
187#endif
188 return sum;
189}
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index b71e02d42f4f..a51382672de0 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -25,29 +25,6 @@ EXPORT_PER_CPU_SYMBOL(irq_stat);
25DEFINE_PER_CPU(struct pt_regs *, irq_regs); 25DEFINE_PER_CPU(struct pt_regs *, irq_regs);
26EXPORT_PER_CPU_SYMBOL(irq_regs); 26EXPORT_PER_CPU_SYMBOL(irq_regs);
27 27
28/*
29 * 'what should we do if we get a hw irq event on an illegal vector'.
30 * each architecture has to answer this themselves.
31 */
32void ack_bad_irq(unsigned int irq)
33{
34 printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
35
36#ifdef CONFIG_X86_LOCAL_APIC
37 /*
38 * Currently unexpected vectors happen only on SMP and APIC.
39 * We _must_ ack these because every local APIC has only N
40 * irq slots per priority level, and a 'hanging, unacked' IRQ
41 * holds up an irq slot - in excessive cases (when multiple
42 * unexpected vectors occur) that might lock up the APIC
43 * completely.
44 * But only ack when the APIC is enabled -AK
45 */
46 if (cpu_has_apic)
47 ack_APIC_irq();
48#endif
49}
50
51#ifdef CONFIG_DEBUG_STACKOVERFLOW 28#ifdef CONFIG_DEBUG_STACKOVERFLOW
52/* Debugging check for stack overflow: is there less than 1KB free? */ 29/* Debugging check for stack overflow: is there less than 1KB free? */
53static int check_stack_overflow(void) 30static int check_stack_overflow(void)
@@ -223,20 +200,25 @@ unsigned int do_IRQ(struct pt_regs *regs)
223{ 200{
224 struct pt_regs *old_regs; 201 struct pt_regs *old_regs;
225 /* high bit used in ret_from_ code */ 202 /* high bit used in ret_from_ code */
226 int overflow, irq = ~regs->orig_ax; 203 int overflow;
227 struct irq_desc *desc = irq_desc + irq; 204 unsigned vector = ~regs->orig_ax;
205 struct irq_desc *desc;
206 unsigned irq;
228 207
229 if (unlikely((unsigned)irq >= NR_IRQS)) {
230 printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
231 __func__, irq);
232 BUG();
233 }
234 208
235 old_regs = set_irq_regs(regs); 209 old_regs = set_irq_regs(regs);
236 irq_enter(); 210 irq_enter();
211 irq = __get_cpu_var(vector_irq)[vector];
237 212
238 overflow = check_stack_overflow(); 213 overflow = check_stack_overflow();
239 214
215 desc = irq_to_desc(irq);
216 if (unlikely(!desc)) {
217 printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x cpu %d\n",
218 __func__, irq, vector, smp_processor_id());
219 BUG();
220 }
221
240 if (!execute_on_irq_stack(overflow, desc, irq)) { 222 if (!execute_on_irq_stack(overflow, desc, irq)) {
241 if (unlikely(overflow)) 223 if (unlikely(overflow))
242 print_stack_overflow(); 224 print_stack_overflow();
@@ -248,146 +230,6 @@ unsigned int do_IRQ(struct pt_regs *regs)
248 return 1; 230 return 1;
249} 231}
250 232
251/*
252 * Interrupt statistics:
253 */
254
255atomic_t irq_err_count;
256
257/*
258 * /proc/interrupts printing:
259 */
260
261int show_interrupts(struct seq_file *p, void *v)
262{
263 int i = *(loff_t *) v, j;
264 struct irqaction * action;
265 unsigned long flags;
266
267 if (i == 0) {
268 seq_printf(p, " ");
269 for_each_online_cpu(j)
270 seq_printf(p, "CPU%-8d",j);
271 seq_putc(p, '\n');
272 }
273
274 if (i < NR_IRQS) {
275 unsigned any_count = 0;
276
277 spin_lock_irqsave(&irq_desc[i].lock, flags);
278#ifndef CONFIG_SMP
279 any_count = kstat_irqs(i);
280#else
281 for_each_online_cpu(j)
282 any_count |= kstat_cpu(j).irqs[i];
283#endif
284 action = irq_desc[i].action;
285 if (!action && !any_count)
286 goto skip;
287 seq_printf(p, "%3d: ",i);
288#ifndef CONFIG_SMP
289 seq_printf(p, "%10u ", kstat_irqs(i));
290#else
291 for_each_online_cpu(j)
292 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
293#endif
294 seq_printf(p, " %8s", irq_desc[i].chip->name);
295 seq_printf(p, "-%-8s", irq_desc[i].name);
296
297 if (action) {
298 seq_printf(p, " %s", action->name);
299 while ((action = action->next) != NULL)
300 seq_printf(p, ", %s", action->name);
301 }
302
303 seq_putc(p, '\n');
304skip:
305 spin_unlock_irqrestore(&irq_desc[i].lock, flags);
306 } else if (i == NR_IRQS) {
307 seq_printf(p, "NMI: ");
308 for_each_online_cpu(j)
309 seq_printf(p, "%10u ", nmi_count(j));
310 seq_printf(p, " Non-maskable interrupts\n");
311#ifdef CONFIG_X86_LOCAL_APIC
312 seq_printf(p, "LOC: ");
313 for_each_online_cpu(j)
314 seq_printf(p, "%10u ",
315 per_cpu(irq_stat,j).apic_timer_irqs);
316 seq_printf(p, " Local timer interrupts\n");
317#endif
318#ifdef CONFIG_SMP
319 seq_printf(p, "RES: ");
320 for_each_online_cpu(j)
321 seq_printf(p, "%10u ",
322 per_cpu(irq_stat,j).irq_resched_count);
323 seq_printf(p, " Rescheduling interrupts\n");
324 seq_printf(p, "CAL: ");
325 for_each_online_cpu(j)
326 seq_printf(p, "%10u ",
327 per_cpu(irq_stat,j).irq_call_count);
328 seq_printf(p, " Function call interrupts\n");
329 seq_printf(p, "TLB: ");
330 for_each_online_cpu(j)
331 seq_printf(p, "%10u ",
332 per_cpu(irq_stat,j).irq_tlb_count);
333 seq_printf(p, " TLB shootdowns\n");
334#endif
335#ifdef CONFIG_X86_MCE
336 seq_printf(p, "TRM: ");
337 for_each_online_cpu(j)
338 seq_printf(p, "%10u ",
339 per_cpu(irq_stat,j).irq_thermal_count);
340 seq_printf(p, " Thermal event interrupts\n");
341#endif
342#ifdef CONFIG_X86_LOCAL_APIC
343 seq_printf(p, "SPU: ");
344 for_each_online_cpu(j)
345 seq_printf(p, "%10u ",
346 per_cpu(irq_stat,j).irq_spurious_count);
347 seq_printf(p, " Spurious interrupts\n");
348#endif
349 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
350#if defined(CONFIG_X86_IO_APIC)
351 seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
352#endif
353 }
354 return 0;
355}
356
357/*
358 * /proc/stat helpers
359 */
360u64 arch_irq_stat_cpu(unsigned int cpu)
361{
362 u64 sum = nmi_count(cpu);
363
364#ifdef CONFIG_X86_LOCAL_APIC
365 sum += per_cpu(irq_stat, cpu).apic_timer_irqs;
366#endif
367#ifdef CONFIG_SMP
368 sum += per_cpu(irq_stat, cpu).irq_resched_count;
369 sum += per_cpu(irq_stat, cpu).irq_call_count;
370 sum += per_cpu(irq_stat, cpu).irq_tlb_count;
371#endif
372#ifdef CONFIG_X86_MCE
373 sum += per_cpu(irq_stat, cpu).irq_thermal_count;
374#endif
375#ifdef CONFIG_X86_LOCAL_APIC
376 sum += per_cpu(irq_stat, cpu).irq_spurious_count;
377#endif
378 return sum;
379}
380
381u64 arch_irq_stat(void)
382{
383 u64 sum = atomic_read(&irq_err_count);
384
385#ifdef CONFIG_X86_IO_APIC
386 sum += atomic_read(&irq_mis_count);
387#endif
388 return sum;
389}
390
391#ifdef CONFIG_HOTPLUG_CPU 233#ifdef CONFIG_HOTPLUG_CPU
392#include <mach_apic.h> 234#include <mach_apic.h>
393 235
@@ -395,20 +237,22 @@ void fixup_irqs(cpumask_t map)
395{ 237{
396 unsigned int irq; 238 unsigned int irq;
397 static int warned; 239 static int warned;
240 struct irq_desc *desc;
398 241
399 for (irq = 0; irq < NR_IRQS; irq++) { 242 for_each_irq_desc(irq, desc) {
400 cpumask_t mask; 243 cpumask_t mask;
244
401 if (irq == 2) 245 if (irq == 2)
402 continue; 246 continue;
403 247
404 cpus_and(mask, irq_desc[irq].affinity, map); 248 cpus_and(mask, desc->affinity, map);
405 if (any_online_cpu(mask) == NR_CPUS) { 249 if (any_online_cpu(mask) == NR_CPUS) {
406 printk("Breaking affinity for irq %i\n", irq); 250 printk("Breaking affinity for irq %i\n", irq);
407 mask = map; 251 mask = map;
408 } 252 }
409 if (irq_desc[irq].chip->set_affinity) 253 if (desc->chip->set_affinity)
410 irq_desc[irq].chip->set_affinity(irq, mask); 254 desc->chip->set_affinity(irq, mask);
411 else if (irq_desc[irq].action && !(warned++)) 255 else if (desc->action && !(warned++))
412 printk("Cannot set affinity for irq %i\n", irq); 256 printk("Cannot set affinity for irq %i\n", irq);
413 } 257 }
414 258
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index f065fe9071b9..60eb84eb77a0 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -18,28 +18,6 @@
18#include <asm/idle.h> 18#include <asm/idle.h>
19#include <asm/smp.h> 19#include <asm/smp.h>
20 20
21atomic_t irq_err_count;
22
23/*
24 * 'what should we do if we get a hw irq event on an illegal vector'.
25 * each architecture has to answer this themselves.
26 */
27void ack_bad_irq(unsigned int irq)
28{
29 printk(KERN_WARNING "unexpected IRQ trap at vector %02x\n", irq);
30 /*
31 * Currently unexpected vectors happen only on SMP and APIC.
32 * We _must_ ack these because every local APIC has only N
33 * irq slots per priority level, and a 'hanging, unacked' IRQ
34 * holds up an irq slot - in excessive cases (when multiple
35 * unexpected vectors occur) that might lock up the APIC
36 * completely.
37 * But don't ack when the APIC is disabled. -AK
38 */
39 if (!disable_apic)
40 ack_APIC_irq();
41}
42
43#ifdef CONFIG_DEBUG_STACKOVERFLOW 21#ifdef CONFIG_DEBUG_STACKOVERFLOW
44/* 22/*
45 * Probabilistic stack overflow check: 23 * Probabilistic stack overflow check:
@@ -65,122 +43,6 @@ static inline void stack_overflow_check(struct pt_regs *regs)
65#endif 43#endif
66 44
67/* 45/*
68 * Generic, controller-independent functions:
69 */
70
71int show_interrupts(struct seq_file *p, void *v)
72{
73 int i = *(loff_t *) v, j;
74 struct irqaction * action;
75 unsigned long flags;
76
77 if (i == 0) {
78 seq_printf(p, " ");
79 for_each_online_cpu(j)
80 seq_printf(p, "CPU%-8d",j);
81 seq_putc(p, '\n');
82 }
83
84 if (i < NR_IRQS) {
85 unsigned any_count = 0;
86
87 spin_lock_irqsave(&irq_desc[i].lock, flags);
88#ifndef CONFIG_SMP
89 any_count = kstat_irqs(i);
90#else
91 for_each_online_cpu(j)
92 any_count |= kstat_cpu(j).irqs[i];
93#endif
94 action = irq_desc[i].action;
95 if (!action && !any_count)
96 goto skip;
97 seq_printf(p, "%3d: ",i);
98#ifndef CONFIG_SMP
99 seq_printf(p, "%10u ", kstat_irqs(i));
100#else
101 for_each_online_cpu(j)
102 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
103#endif
104 seq_printf(p, " %8s", irq_desc[i].chip->name);
105 seq_printf(p, "-%-8s", irq_desc[i].name);
106
107 if (action) {
108 seq_printf(p, " %s", action->name);
109 while ((action = action->next) != NULL)
110 seq_printf(p, ", %s", action->name);
111 }
112 seq_putc(p, '\n');
113skip:
114 spin_unlock_irqrestore(&irq_desc[i].lock, flags);
115 } else if (i == NR_IRQS) {
116 seq_printf(p, "NMI: ");
117 for_each_online_cpu(j)
118 seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
119 seq_printf(p, " Non-maskable interrupts\n");
120 seq_printf(p, "LOC: ");
121 for_each_online_cpu(j)
122 seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
123 seq_printf(p, " Local timer interrupts\n");
124#ifdef CONFIG_SMP
125 seq_printf(p, "RES: ");
126 for_each_online_cpu(j)
127 seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count);
128 seq_printf(p, " Rescheduling interrupts\n");
129 seq_printf(p, "CAL: ");
130 for_each_online_cpu(j)
131 seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count);
132 seq_printf(p, " Function call interrupts\n");
133 seq_printf(p, "TLB: ");
134 for_each_online_cpu(j)
135 seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);
136 seq_printf(p, " TLB shootdowns\n");
137#endif
138#ifdef CONFIG_X86_MCE
139 seq_printf(p, "TRM: ");
140 for_each_online_cpu(j)
141 seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count);
142 seq_printf(p, " Thermal event interrupts\n");
143 seq_printf(p, "THR: ");
144 for_each_online_cpu(j)
145 seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count);
146 seq_printf(p, " Threshold APIC interrupts\n");
147#endif
148 seq_printf(p, "SPU: ");
149 for_each_online_cpu(j)
150 seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count);
151 seq_printf(p, " Spurious interrupts\n");
152 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
153 }
154 return 0;
155}
156
157/*
158 * /proc/stat helpers
159 */
160u64 arch_irq_stat_cpu(unsigned int cpu)
161{
162 u64 sum = cpu_pda(cpu)->__nmi_count;
163
164 sum += cpu_pda(cpu)->apic_timer_irqs;
165#ifdef CONFIG_SMP
166 sum += cpu_pda(cpu)->irq_resched_count;
167 sum += cpu_pda(cpu)->irq_call_count;
168 sum += cpu_pda(cpu)->irq_tlb_count;
169#endif
170#ifdef CONFIG_X86_MCE
171 sum += cpu_pda(cpu)->irq_thermal_count;
172 sum += cpu_pda(cpu)->irq_threshold_count;
173#endif
174 sum += cpu_pda(cpu)->irq_spurious_count;
175 return sum;
176}
177
178u64 arch_irq_stat(void)
179{
180 return atomic_read(&irq_err_count);
181}
182
183/*
184 * do_IRQ handles all normal device IRQ's (the special 46 * do_IRQ handles all normal device IRQ's (the special
185 * SMP cross-CPU interrupts have their own specific 47 * SMP cross-CPU interrupts have their own specific
186 * handlers). 48 * handlers).
@@ -188,6 +50,7 @@ u64 arch_irq_stat(void)
188asmlinkage unsigned int do_IRQ(struct pt_regs *regs) 50asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
189{ 51{
190 struct pt_regs *old_regs = set_irq_regs(regs); 52 struct pt_regs *old_regs = set_irq_regs(regs);
53 struct irq_desc *desc;
191 54
192 /* high bit used in ret_from_ code */ 55 /* high bit used in ret_from_ code */
193 unsigned vector = ~regs->orig_ax; 56 unsigned vector = ~regs->orig_ax;
@@ -201,8 +64,9 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
201 stack_overflow_check(regs); 64 stack_overflow_check(regs);
202#endif 65#endif
203 66
204 if (likely(irq < NR_IRQS)) 67 desc = irq_to_desc(irq);
205 generic_handle_irq(irq); 68 if (likely(desc))
69 generic_handle_irq_desc(irq, desc);
206 else { 70 else {
207 if (!disable_apic) 71 if (!disable_apic)
208 ack_APIC_irq(); 72 ack_APIC_irq();
@@ -223,8 +87,9 @@ void fixup_irqs(cpumask_t map)
223{ 87{
224 unsigned int irq; 88 unsigned int irq;
225 static int warned; 89 static int warned;
90 struct irq_desc *desc;
226 91
227 for (irq = 0; irq < NR_IRQS; irq++) { 92 for_each_irq_desc(irq, desc) {
228 cpumask_t mask; 93 cpumask_t mask;
229 int break_affinity = 0; 94 int break_affinity = 0;
230 int set_affinity = 1; 95 int set_affinity = 1;
@@ -233,32 +98,32 @@ void fixup_irqs(cpumask_t map)
233 continue; 98 continue;
234 99
235 /* interrupt's are disabled at this point */ 100 /* interrupt's are disabled at this point */
236 spin_lock(&irq_desc[irq].lock); 101 spin_lock(&desc->lock);
237 102
238 if (!irq_has_action(irq) || 103 if (!irq_has_action(irq) ||
239 cpus_equal(irq_desc[irq].affinity, map)) { 104 cpus_equal(desc->affinity, map)) {
240 spin_unlock(&irq_desc[irq].lock); 105 spin_unlock(&desc->lock);
241 continue; 106 continue;
242 } 107 }
243 108
244 cpus_and(mask, irq_desc[irq].affinity, map); 109 cpus_and(mask, desc->affinity, map);
245 if (cpus_empty(mask)) { 110 if (cpus_empty(mask)) {
246 break_affinity = 1; 111 break_affinity = 1;
247 mask = map; 112 mask = map;
248 } 113 }
249 114
250 if (irq_desc[irq].chip->mask) 115 if (desc->chip->mask)
251 irq_desc[irq].chip->mask(irq); 116 desc->chip->mask(irq);
252 117
253 if (irq_desc[irq].chip->set_affinity) 118 if (desc->chip->set_affinity)
254 irq_desc[irq].chip->set_affinity(irq, mask); 119 desc->chip->set_affinity(irq, mask);
255 else if (!(warned++)) 120 else if (!(warned++))
256 set_affinity = 0; 121 set_affinity = 0;
257 122
258 if (irq_desc[irq].chip->unmask) 123 if (desc->chip->unmask)
259 irq_desc[irq].chip->unmask(irq); 124 desc->chip->unmask(irq);
260 125
261 spin_unlock(&irq_desc[irq].lock); 126 spin_unlock(&desc->lock);
262 127
263 if (break_affinity && set_affinity) 128 if (break_affinity && set_affinity)
264 printk("Broke affinity for irq %i\n", irq); 129 printk("Broke affinity for irq %i\n", irq);
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 9200a1e2752d..845aa9803e80 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -69,6 +69,13 @@ void __init init_ISA_irqs (void)
69 * 16 old-style INTA-cycle interrupts: 69 * 16 old-style INTA-cycle interrupts:
70 */ 70 */
71 for (i = 0; i < 16; i++) { 71 for (i = 0; i < 16; i++) {
72 /* first time call this irq_desc */
73 struct irq_desc *desc = irq_to_desc(i);
74
75 desc->status = IRQ_DISABLED;
76 desc->action = NULL;
77 desc->depth = 1;
78
72 set_irq_chip_and_handler_name(i, &i8259A_chip, 79 set_irq_chip_and_handler_name(i, &i8259A_chip,
73 handle_level_irq, "XT"); 80 handle_level_irq, "XT");
74 } 81 }
@@ -83,6 +90,27 @@ static struct irqaction irq2 = {
83 .name = "cascade", 90 .name = "cascade",
84}; 91};
85 92
93DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
94 [0 ... IRQ0_VECTOR - 1] = -1,
95 [IRQ0_VECTOR] = 0,
96 [IRQ1_VECTOR] = 1,
97 [IRQ2_VECTOR] = 2,
98 [IRQ3_VECTOR] = 3,
99 [IRQ4_VECTOR] = 4,
100 [IRQ5_VECTOR] = 5,
101 [IRQ6_VECTOR] = 6,
102 [IRQ7_VECTOR] = 7,
103 [IRQ8_VECTOR] = 8,
104 [IRQ9_VECTOR] = 9,
105 [IRQ10_VECTOR] = 10,
106 [IRQ11_VECTOR] = 11,
107 [IRQ12_VECTOR] = 12,
108 [IRQ13_VECTOR] = 13,
109 [IRQ14_VECTOR] = 14,
110 [IRQ15_VECTOR] = 15,
111 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
112};
113
86/* Overridden in paravirt.c */ 114/* Overridden in paravirt.c */
87void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); 115void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
88 116
@@ -98,22 +126,14 @@ void __init native_init_IRQ(void)
98 * us. (some of these will be overridden and become 126 * us. (some of these will be overridden and become
99 * 'special' SMP interrupts) 127 * 'special' SMP interrupts)
100 */ 128 */
101 for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { 129 for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
102 int vector = FIRST_EXTERNAL_VECTOR + i;
103 if (i >= NR_IRQS)
104 break;
105 /* SYSCALL_VECTOR was reserved in trap_init. */ 130 /* SYSCALL_VECTOR was reserved in trap_init. */
106 if (!test_bit(vector, used_vectors)) 131 if (i != SYSCALL_VECTOR)
107 set_intr_gate(vector, interrupt[i]); 132 set_intr_gate(i, interrupt[i]);
108 } 133 }
109 134
110#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
111 /*
112 * IRQ0 must be given a fixed assignment and initialized,
113 * because it's used before the IO-APIC is set up.
114 */
115 set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
116 135
136#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
117 /* 137 /*
118 * The reschedule interrupt is a CPU-to-CPU reschedule-helper 138 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
119 * IPI, driven by wakeup. 139 * IPI, driven by wakeup.
@@ -128,6 +148,9 @@ void __init native_init_IRQ(void)
128 148
129 /* IPI for single call function */ 149 /* IPI for single call function */
130 set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); 150 set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt);
151
152 /* Low priority IPI to cleanup after moving an irq */
153 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
131#endif 154#endif
132 155
133#ifdef CONFIG_X86_LOCAL_APIC 156#ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 5b5be9d43c2a..ff0235391285 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -142,23 +142,19 @@ void __init init_ISA_irqs(void)
142 init_bsp_APIC(); 142 init_bsp_APIC();
143 init_8259A(0); 143 init_8259A(0);
144 144
145 for (i = 0; i < NR_IRQS; i++) { 145 for (i = 0; i < 16; i++) {
146 irq_desc[i].status = IRQ_DISABLED; 146 /* first time call this irq_desc */
147 irq_desc[i].action = NULL; 147 struct irq_desc *desc = irq_to_desc(i);
148 irq_desc[i].depth = 1; 148
149 149 desc->status = IRQ_DISABLED;
150 if (i < 16) { 150 desc->action = NULL;
151 /* 151 desc->depth = 1;
152 * 16 old-style INTA-cycle interrupts: 152
153 */ 153 /*
154 set_irq_chip_and_handler_name(i, &i8259A_chip, 154 * 16 old-style INTA-cycle interrupts:
155 */
156 set_irq_chip_and_handler_name(i, &i8259A_chip,
155 handle_level_irq, "XT"); 157 handle_level_irq, "XT");
156 } else {
157 /*
158 * 'high' PCI IRQs filled in on demand
159 */
160 irq_desc[i].chip = &no_irq_chip;
161 }
162 } 158 }
163} 159}
164 160
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index f6a11b9b1f98..67465ed89310 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -35,9 +35,6 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
35 if (!(word & (1 << 13))) { 35 if (!(word & (1 << 13))) {
36 dev_info(&dev->dev, "Intel E7520/7320/7525 detected; " 36 dev_info(&dev->dev, "Intel E7520/7320/7525 detected; "
37 "disabling irq balancing and affinity\n"); 37 "disabling irq balancing and affinity\n");
38#ifdef CONFIG_IRQBALANCE
39 irqbalance_disable("");
40#endif
41 noirqdebug_setup(""); 38 noirqdebug_setup("");
42#ifdef CONFIG_PROC_FS 39#ifdef CONFIG_PROC_FS
43 no_irq_affinity = 1; 40 no_irq_affinity = 1;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b2c97874ec0f..0fa6790c1dd3 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1073,6 +1073,7 @@ void __init setup_arch(char **cmdline_p)
1073#endif 1073#endif
1074 1074
1075 prefill_possible_map(); 1075 prefill_possible_map();
1076
1076#ifdef CONFIG_X86_64 1077#ifdef CONFIG_X86_64
1077 init_cpu_to_node(); 1078 init_cpu_to_node();
1078#endif 1079#endif
@@ -1080,6 +1081,9 @@ void __init setup_arch(char **cmdline_p)
1080 init_apic_mappings(); 1081 init_apic_mappings();
1081 ioapic_init_mappings(); 1082 ioapic_init_mappings();
1082 1083
1084 /* need to wait for io_apic is mapped */
1085 nr_irqs = probe_nr_irqs();
1086
1083 kvm_guest_init(); 1087 kvm_guest_init();
1084 1088
1085 e820_reserve_resources(); 1089 e820_reserve_resources();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 0e67f72d9316..410c88f0bfeb 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -140,25 +140,30 @@ static void __init setup_cpu_pda_map(void)
140 */ 140 */
141void __init setup_per_cpu_areas(void) 141void __init setup_per_cpu_areas(void)
142{ 142{
143 ssize_t size = PERCPU_ENOUGH_ROOM; 143 ssize_t size, old_size;
144 char *ptr; 144 char *ptr;
145 int cpu; 145 int cpu;
146 unsigned long align = 1;
146 147
147 /* Setup cpu_pda map */ 148 /* Setup cpu_pda map */
148 setup_cpu_pda_map(); 149 setup_cpu_pda_map();
149 150
150 /* Copy section for each CPU (we discard the original) */ 151 /* Copy section for each CPU (we discard the original) */
151 size = PERCPU_ENOUGH_ROOM; 152 old_size = PERCPU_ENOUGH_ROOM;
153 align = max_t(unsigned long, PAGE_SIZE, align);
154 size = roundup(old_size, align);
152 printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", 155 printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
153 size); 156 size);
154 157
155 for_each_possible_cpu(cpu) { 158 for_each_possible_cpu(cpu) {
156#ifndef CONFIG_NEED_MULTIPLE_NODES 159#ifndef CONFIG_NEED_MULTIPLE_NODES
157 ptr = alloc_bootmem_pages(size); 160 ptr = __alloc_bootmem(size, align,
161 __pa(MAX_DMA_ADDRESS));
158#else 162#else
159 int node = early_cpu_to_node(cpu); 163 int node = early_cpu_to_node(cpu);
160 if (!node_online(node) || !NODE_DATA(node)) { 164 if (!node_online(node) || !NODE_DATA(node)) {
161 ptr = alloc_bootmem_pages(size); 165 ptr = __alloc_bootmem(size, align,
166 __pa(MAX_DMA_ADDRESS));
162 printk(KERN_INFO 167 printk(KERN_INFO
163 "cpu %d has no node %d or node-local memory\n", 168 "cpu %d has no node %d or node-local memory\n",
164 cpu, node); 169 cpu, node);
@@ -167,7 +172,8 @@ void __init setup_per_cpu_areas(void)
167 cpu, __pa(ptr)); 172 cpu, __pa(ptr));
168 } 173 }
169 else { 174 else {
170 ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); 175 ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
176 __pa(MAX_DMA_ADDRESS));
171 if (ptr) 177 if (ptr)
172 printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", 178 printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n",
173 cpu, node, __pa(ptr)); 179 cpu, node, __pa(ptr));
@@ -175,7 +181,6 @@ void __init setup_per_cpu_areas(void)
175#endif 181#endif
176 per_cpu_offset(cpu) = ptr - __per_cpu_start; 182 per_cpu_offset(cpu) = ptr - __per_cpu_start;
177 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 183 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
178
179 } 184 }
180 185
181 printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", 186 printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7ed9e070a6e9..7ece815ea637 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -543,10 +543,10 @@ static inline void __inquire_remote_apic(int apicid)
543 int timeout; 543 int timeout;
544 u32 status; 544 u32 status;
545 545
546 printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid); 546 printk(KERN_INFO "Inquiring remote APIC 0x%x...\n", apicid);
547 547
548 for (i = 0; i < ARRAY_SIZE(regs); i++) { 548 for (i = 0; i < ARRAY_SIZE(regs); i++) {
549 printk(KERN_INFO "... APIC #%d %s: ", apicid, names[i]); 549 printk(KERN_INFO "... APIC 0x%x %s: ", apicid, names[i]);
550 550
551 /* 551 /*
552 * Wait for idle. 552 * Wait for idle.
@@ -874,7 +874,7 @@ do_rest:
874 start_ip = setup_trampoline(); 874 start_ip = setup_trampoline();
875 875
876 /* So we see what's up */ 876 /* So we see what's up */
877 printk(KERN_INFO "Booting processor %d/%d ip %lx\n", 877 printk(KERN_INFO "Booting processor %d APIC 0x%x ip 0x%lx\n",
878 cpu, apicid, start_ip); 878 cpu, apicid, start_ip);
879 879
880 /* 880 /*
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
new file mode 100644
index 000000000000..aeef529917e4
--- /dev/null
+++ b/arch/x86/kernel/uv_irq.c
@@ -0,0 +1,79 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * SGI UV IRQ functions
7 *
8 * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
9 */
10
11#include <linux/module.h>
12#include <linux/irq.h>
13
14#include <asm/apic.h>
15#include <asm/uv/uv_irq.h>
16
17static void uv_noop(unsigned int irq)
18{
19}
20
21static unsigned int uv_noop_ret(unsigned int irq)
22{
23 return 0;
24}
25
26static void uv_ack_apic(unsigned int irq)
27{
28 ack_APIC_irq();
29}
30
31struct irq_chip uv_irq_chip = {
32 .name = "UV-CORE",
33 .startup = uv_noop_ret,
34 .shutdown = uv_noop,
35 .enable = uv_noop,
36 .disable = uv_noop,
37 .ack = uv_noop,
38 .mask = uv_noop,
39 .unmask = uv_noop,
40 .eoi = uv_ack_apic,
41 .end = uv_noop,
42};
43
44/*
45 * Set up a mapping of an available irq and vector, and enable the specified
46 * MMR that defines the MSI that is to be sent to the specified CPU when an
47 * interrupt is raised.
48 */
49int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
50 unsigned long mmr_offset)
51{
52 int irq;
53 int ret;
54
55 irq = create_irq();
56 if (irq <= 0)
57 return -EBUSY;
58
59 ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset);
60 if (ret != irq)
61 destroy_irq(irq);
62
63 return ret;
64}
65EXPORT_SYMBOL_GPL(uv_setup_irq);
66
67/*
68 * Tear down a mapping of an irq and vector, and disable the specified MMR that
69 * defined the MSI that was to be sent to the specified CPU when an interrupt
70 * was raised.
71 *
72 * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq().
73 */
74void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset)
75{
76 arch_disable_uv_irq(mmr_blade, mmr_offset);
77 destroy_irq(irq);
78}
79EXPORT_SYMBOL_GPL(uv_teardown_irq);
diff --git a/arch/x86/kernel/uv_sysfs.c b/arch/x86/kernel/uv_sysfs.c
new file mode 100644
index 000000000000..67f9b9dbf800
--- /dev/null
+++ b/arch/x86/kernel/uv_sysfs.c
@@ -0,0 +1,72 @@
1/*
2 * This file supports the /sys/firmware/sgi_uv interfaces for SGI UV.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
19 * Copyright (c) Russ Anderson
20 */
21
22#include <linux/sysdev.h>
23#include <asm/uv/bios.h>
24
25struct kobject *sgi_uv_kobj;
26
27static ssize_t partition_id_show(struct kobject *kobj,
28 struct kobj_attribute *attr, char *buf)
29{
30 return snprintf(buf, PAGE_SIZE, "%ld\n", sn_partition_id);
31}
32
33static ssize_t coherence_id_show(struct kobject *kobj,
34 struct kobj_attribute *attr, char *buf)
35{
36 return snprintf(buf, PAGE_SIZE, "%ld\n", partition_coherence_id());
37}
38
39static struct kobj_attribute partition_id_attr =
40 __ATTR(partition_id, S_IRUGO, partition_id_show, NULL);
41
42static struct kobj_attribute coherence_id_attr =
43 __ATTR(coherence_id, S_IRUGO, coherence_id_show, NULL);
44
45
46static int __init sgi_uv_sysfs_init(void)
47{
48 unsigned long ret;
49
50 if (!sgi_uv_kobj)
51 sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj);
52 if (!sgi_uv_kobj) {
53 printk(KERN_WARNING "kobject_create_and_add sgi_uv failed \n");
54 return -EINVAL;
55 }
56
57 ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr);
58 if (ret) {
59 printk(KERN_WARNING "sysfs_create_file partition_id failed \n");
60 return ret;
61 }
62
63 ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr);
64 if (ret) {
65 printk(KERN_WARNING "sysfs_create_file coherence_id failed \n");
66 return ret;
67 }
68
69 return 0;
70}
71
72device_initcall(sgi_uv_sysfs_init);
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 61a97e616f70..0c9667f0752a 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -484,10 +484,11 @@ static void disable_cobalt_irq(unsigned int irq)
484static unsigned int startup_cobalt_irq(unsigned int irq) 484static unsigned int startup_cobalt_irq(unsigned int irq)
485{ 485{
486 unsigned long flags; 486 unsigned long flags;
487 struct irq_desc *desc = irq_to_desc(irq);
487 488
488 spin_lock_irqsave(&cobalt_lock, flags); 489 spin_lock_irqsave(&cobalt_lock, flags);
489 if ((irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING))) 490 if ((desc->status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING)))
490 irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING); 491 desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING);
491 enable_cobalt_irq(irq); 492 enable_cobalt_irq(irq);
492 spin_unlock_irqrestore(&cobalt_lock, flags); 493 spin_unlock_irqrestore(&cobalt_lock, flags);
493 return 0; 494 return 0;
@@ -506,9 +507,10 @@ static void ack_cobalt_irq(unsigned int irq)
506static void end_cobalt_irq(unsigned int irq) 507static void end_cobalt_irq(unsigned int irq)
507{ 508{
508 unsigned long flags; 509 unsigned long flags;
510 struct irq_desc *desc = irq_to_desc(irq);
509 511
510 spin_lock_irqsave(&cobalt_lock, flags); 512 spin_lock_irqsave(&cobalt_lock, flags);
511 if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS))) 513 if (!(desc->status & (IRQ_DISABLED | IRQ_INPROGRESS)))
512 enable_cobalt_irq(irq); 514 enable_cobalt_irq(irq);
513 spin_unlock_irqrestore(&cobalt_lock, flags); 515 spin_unlock_irqrestore(&cobalt_lock, flags);
514} 516}
@@ -626,12 +628,12 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
626 628
627 spin_unlock_irqrestore(&i8259A_lock, flags); 629 spin_unlock_irqrestore(&i8259A_lock, flags);
628 630
629 desc = irq_desc + realirq; 631 desc = irq_to_desc(realirq);
630 632
631 /* 633 /*
632 * handle this 'virtual interrupt' as a Cobalt one now. 634 * handle this 'virtual interrupt' as a Cobalt one now.
633 */ 635 */
634 kstat_cpu(smp_processor_id()).irqs[realirq]++; 636 kstat_incr_irqs_this_cpu(realirq, desc);
635 637
636 if (likely(desc->action != NULL)) 638 if (likely(desc->action != NULL))
637 handle_IRQ_event(realirq, desc->action); 639 handle_IRQ_event(realirq, desc->action);
@@ -662,27 +664,29 @@ void init_VISWS_APIC_irqs(void)
662 int i; 664 int i;
663 665
664 for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) { 666 for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) {
665 irq_desc[i].status = IRQ_DISABLED; 667 struct irq_desc *desc = irq_to_desc(i);
666 irq_desc[i].action = 0; 668
667 irq_desc[i].depth = 1; 669 desc->status = IRQ_DISABLED;
670 desc->action = 0;
671 desc->depth = 1;
668 672
669 if (i == 0) { 673 if (i == 0) {
670 irq_desc[i].chip = &cobalt_irq_type; 674 desc->chip = &cobalt_irq_type;
671 } 675 }
672 else if (i == CO_IRQ_IDE0) { 676 else if (i == CO_IRQ_IDE0) {
673 irq_desc[i].chip = &cobalt_irq_type; 677 desc->chip = &cobalt_irq_type;
674 } 678 }
675 else if (i == CO_IRQ_IDE1) { 679 else if (i == CO_IRQ_IDE1) {
676 irq_desc[i].chip = &cobalt_irq_type; 680 desc->chip = &cobalt_irq_type;
677 } 681 }
678 else if (i == CO_IRQ_8259) { 682 else if (i == CO_IRQ_8259) {
679 irq_desc[i].chip = &piix4_master_irq_type; 683 desc->chip = &piix4_master_irq_type;
680 } 684 }
681 else if (i < CO_IRQ_APIC0) { 685 else if (i < CO_IRQ_APIC0) {
682 irq_desc[i].chip = &piix4_virtual_irq_type; 686 desc->chip = &piix4_virtual_irq_type;
683 } 687 }
684 else if (IS_CO_APIC(i)) { 688 else if (IS_CO_APIC(i)) {
685 irq_desc[i].chip = &cobalt_irq_type; 689 desc->chip = &cobalt_irq_type;
686 } 690 }
687 } 691 }
688 692
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 6953859fe289..254ee07f8635 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -235,11 +235,14 @@ static void __devinit vmi_time_init_clockevent(void)
235 235
236void __init vmi_time_init(void) 236void __init vmi_time_init(void)
237{ 237{
238 unsigned int cpu;
238 /* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */ 239 /* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */
239 outb_pit(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */ 240 outb_pit(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
240 241
241 vmi_time_init_clockevent(); 242 vmi_time_init_clockevent();
242 setup_irq(0, &vmi_clock_action); 243 setup_irq(0, &vmi_clock_action);
244 for_each_possible_cpu(cpu)
245 per_cpu(vector_irq, cpu)[vmi_get_timer_vector()] = 0;
243} 246}
244 247
245#ifdef CONFIG_X86_LOCAL_APIC 248#ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 65f0b8a47bed..48ee4f9435f4 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -582,7 +582,7 @@ static void __init lguest_init_IRQ(void)
582 for (i = 0; i < LGUEST_IRQS; i++) { 582 for (i = 0; i < LGUEST_IRQS; i++) {
583 int vector = FIRST_EXTERNAL_VECTOR + i; 583 int vector = FIRST_EXTERNAL_VECTOR + i;
584 if (vector != SYSCALL_VECTOR) { 584 if (vector != SYSCALL_VECTOR) {
585 set_intr_gate(vector, interrupt[i]); 585 set_intr_gate(vector, interrupt[vector]);
586 set_irq_chip_and_handler_name(i, &lguest_irq_controller, 586 set_irq_chip_and_handler_name(i, &lguest_irq_controller,
587 handle_level_irq, 587 handle_level_irq,
588 "level"); 588 "level");
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index df37fc9d6a26..3c3b471ea496 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -41,6 +41,10 @@ static const struct dmi_system_id bigsmp_dmi_table[] = {
41 { } 41 { }
42}; 42};
43 43
44static cpumask_t vector_allocation_domain(int cpu)
45{
46 return cpumask_of_cpu(cpu);
47}
44 48
45static int probe_bigsmp(void) 49static int probe_bigsmp(void)
46{ 50{
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 6513d41ea21e..28459cab3ddb 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -75,4 +75,18 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
75} 75}
76#endif 76#endif
77 77
78static cpumask_t vector_allocation_domain(int cpu)
79{
80 /* Careful. Some cpus do not strictly honor the set of cpus
81 * specified in the interrupt destination when using lowest
82 * priority interrupt delivery mode.
83 *
84 * In particular there was a hyperthreading cpu observed to
85 * deliver interrupts to the wrong hyperthread when only one
86 * hyperthread was specified in the interrupt desitination.
87 */
88 cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
89 return domain;
90}
91
78struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000); 92struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000);
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c
index 8cf58394975e..71a309b122e6 100644
--- a/arch/x86/mach-generic/numaq.c
+++ b/arch/x86/mach-generic/numaq.c
@@ -38,4 +38,18 @@ static int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
38 return 0; 38 return 0;
39} 39}
40 40
41static cpumask_t vector_allocation_domain(int cpu)
42{
43 /* Careful. Some cpus do not strictly honor the set of cpus
44 * specified in the interrupt destination when using lowest
45 * priority interrupt delivery mode.
46 *
47 * In particular there was a hyperthreading cpu observed to
48 * deliver interrupts to the wrong hyperthread when only one
49 * hyperthread was specified in the interrupt desitination.
50 */
51 cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
52 return domain;
53}
54
41struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq); 55struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq);
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
index 6ad6b67a723d..6272b5e69da6 100644
--- a/arch/x86/mach-generic/summit.c
+++ b/arch/x86/mach-generic/summit.c
@@ -23,4 +23,18 @@ static int probe_summit(void)
23 return 0; 23 return 0;
24} 24}
25 25
26static cpumask_t vector_allocation_domain(int cpu)
27{
28 /* Careful. Some cpus do not strictly honor the set of cpus
29 * specified in the interrupt destination when using lowest
30 * priority interrupt delivery mode.
31 *
32 * In particular there was a hyperthreading cpu observed to
33 * deliver interrupts to the wrong hyperthread when only one
34 * hyperthread was specified in the interrupt desitination.
35 */
36 cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
37 return domain;
38}
39
26struct genapic apic_summit = APIC_INIT("summit", probe_summit); 40struct genapic apic_summit = APIC_INIT("summit", probe_summit);
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 199a5f4a873c..0f6e8a6523ae 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -1483,7 +1483,7 @@ static void disable_local_vic_irq(unsigned int irq)
1483 * the interrupt off to another CPU */ 1483 * the interrupt off to another CPU */
1484static void before_handle_vic_irq(unsigned int irq) 1484static void before_handle_vic_irq(unsigned int irq)
1485{ 1485{
1486 irq_desc_t *desc = irq_desc + irq; 1486 irq_desc_t *desc = irq_to_desc(irq);
1487 __u8 cpu = smp_processor_id(); 1487 __u8 cpu = smp_processor_id();
1488 1488
1489 _raw_spin_lock(&vic_irq_lock); 1489 _raw_spin_lock(&vic_irq_lock);
@@ -1518,7 +1518,7 @@ static void before_handle_vic_irq(unsigned int irq)
1518/* Finish the VIC interrupt: basically mask */ 1518/* Finish the VIC interrupt: basically mask */
1519static void after_handle_vic_irq(unsigned int irq) 1519static void after_handle_vic_irq(unsigned int irq)
1520{ 1520{
1521 irq_desc_t *desc = irq_desc + irq; 1521 irq_desc_t *desc = irq_to_desc(irq);
1522 1522
1523 _raw_spin_lock(&vic_irq_lock); 1523 _raw_spin_lock(&vic_irq_lock);
1524 { 1524 {
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 635b50e85581..2c4baa88f2cb 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -56,13 +56,6 @@ struct remap_trace {
56static DEFINE_PER_CPU(struct trap_reason, pf_reason); 56static DEFINE_PER_CPU(struct trap_reason, pf_reason);
57static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace); 57static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace);
58 58
59#if 0 /* XXX: no way gather this info anymore */
60/* Access to this is not per-cpu. */
61static DEFINE_PER_CPU(atomic_t, dropped);
62#endif
63
64static struct dentry *marker_file;
65
66static DEFINE_MUTEX(mmiotrace_mutex); 59static DEFINE_MUTEX(mmiotrace_mutex);
67static DEFINE_SPINLOCK(trace_lock); 60static DEFINE_SPINLOCK(trace_lock);
68static atomic_t mmiotrace_enabled; 61static atomic_t mmiotrace_enabled;
@@ -75,7 +68,7 @@ static LIST_HEAD(trace_list); /* struct remap_trace */
75 * and trace_lock. 68 * and trace_lock.
76 * - Routines depending on is_enabled() must take trace_lock. 69 * - Routines depending on is_enabled() must take trace_lock.
77 * - trace_list users must hold trace_lock. 70 * - trace_list users must hold trace_lock.
78 * - is_enabled() guarantees that mmio_trace_record is allowed. 71 * - is_enabled() guarantees that mmio_trace_{rw,mapping} are allowed.
79 * - pre/post callbacks assume the effect of is_enabled() being true. 72 * - pre/post callbacks assume the effect of is_enabled() being true.
80 */ 73 */
81 74
@@ -97,44 +90,6 @@ static bool is_enabled(void)
97 return atomic_read(&mmiotrace_enabled); 90 return atomic_read(&mmiotrace_enabled);
98} 91}
99 92
100#if 0 /* XXX: needs rewrite */
101/*
102 * Write callback for the debugfs entry:
103 * Read a marker and write it to the mmio trace log
104 */
105static ssize_t write_marker(struct file *file, const char __user *buffer,
106 size_t count, loff_t *ppos)
107{
108 char *event = NULL;
109 struct mm_io_header *headp;
110 ssize_t len = (count > 65535) ? 65535 : count;
111
112 event = kzalloc(sizeof(*headp) + len, GFP_KERNEL);
113 if (!event)
114 return -ENOMEM;
115
116 headp = (struct mm_io_header *)event;
117 headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT);
118 headp->data_len = len;
119
120 if (copy_from_user(event + sizeof(*headp), buffer, len)) {
121 kfree(event);
122 return -EFAULT;
123 }
124
125 spin_lock_irq(&trace_lock);
126#if 0 /* XXX: convert this to use tracing */
127 if (is_enabled())
128 relay_write(chan, event, sizeof(*headp) + len);
129 else
130#endif
131 len = -EINVAL;
132 spin_unlock_irq(&trace_lock);
133 kfree(event);
134 return len;
135}
136#endif
137
138static void print_pte(unsigned long address) 93static void print_pte(unsigned long address)
139{ 94{
140 unsigned int level; 95 unsigned int level;
@@ -307,8 +262,10 @@ static void ioremap_trace_core(resource_size_t offset, unsigned long size,
307 map.map_id = trace->id; 262 map.map_id = trace->id;
308 263
309 spin_lock_irq(&trace_lock); 264 spin_lock_irq(&trace_lock);
310 if (!is_enabled()) 265 if (!is_enabled()) {
266 kfree(trace);
311 goto not_enabled; 267 goto not_enabled;
268 }
312 269
313 mmio_trace_mapping(&map); 270 mmio_trace_mapping(&map);
314 list_add_tail(&trace->list, &trace_list); 271 list_add_tail(&trace->list, &trace_list);
@@ -377,6 +334,23 @@ void mmiotrace_iounmap(volatile void __iomem *addr)
377 iounmap_trace_core(addr); 334 iounmap_trace_core(addr);
378} 335}
379 336
337int mmiotrace_printk(const char *fmt, ...)
338{
339 int ret = 0;
340 va_list args;
341 unsigned long flags;
342 va_start(args, fmt);
343
344 spin_lock_irqsave(&trace_lock, flags);
345 if (is_enabled())
346 ret = mmio_trace_printk(fmt, args);
347 spin_unlock_irqrestore(&trace_lock, flags);
348
349 va_end(args);
350 return ret;
351}
352EXPORT_SYMBOL(mmiotrace_printk);
353
380static void clear_trace_list(void) 354static void clear_trace_list(void)
381{ 355{
382 struct remap_trace *trace; 356 struct remap_trace *trace;
@@ -462,26 +436,12 @@ static void leave_uniprocessor(void)
462} 436}
463#endif 437#endif
464 438
465#if 0 /* XXX: out of order */
466static struct file_operations fops_marker = {
467 .owner = THIS_MODULE,
468 .write = write_marker
469};
470#endif
471
472void enable_mmiotrace(void) 439void enable_mmiotrace(void)
473{ 440{
474 mutex_lock(&mmiotrace_mutex); 441 mutex_lock(&mmiotrace_mutex);
475 if (is_enabled()) 442 if (is_enabled())
476 goto out; 443 goto out;
477 444
478#if 0 /* XXX: tracing does not support text entries */
479 marker_file = debugfs_create_file("marker", 0660, dir, NULL,
480 &fops_marker);
481 if (!marker_file)
482 pr_err(NAME "marker file creation failed.\n");
483#endif
484
485 if (nommiotrace) 445 if (nommiotrace)
486 pr_info(NAME "MMIO tracing disabled.\n"); 446 pr_info(NAME "MMIO tracing disabled.\n");
487 enter_uniprocessor(); 447 enter_uniprocessor();
@@ -506,11 +466,6 @@ void disable_mmiotrace(void)
506 466
507 clear_trace_list(); /* guarantees: no more kmmio callbacks */ 467 clear_trace_list(); /* guarantees: no more kmmio callbacks */
508 leave_uniprocessor(); 468 leave_uniprocessor();
509 if (marker_file) {
510 debugfs_remove(marker_file);
511 marker_file = NULL;
512 }
513
514 pr_info(NAME "disabled.\n"); 469 pr_info(NAME "disabled.\n");
515out: 470out:
516 mutex_unlock(&mmiotrace_mutex); 471 mutex_unlock(&mmiotrace_mutex);
diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c
index efa1911e20ca..df3d5c861cda 100644
--- a/arch/x86/mm/pf_in.c
+++ b/arch/x86/mm/pf_in.c
@@ -79,25 +79,34 @@ static unsigned int mw32[] = { 0xC7 };
79static unsigned int mw64[] = { 0x89, 0x8B }; 79static unsigned int mw64[] = { 0x89, 0x8B };
80#endif /* not __i386__ */ 80#endif /* not __i386__ */
81 81
82static int skip_prefix(unsigned char *addr, int *shorted, int *enlarged, 82struct prefix_bits {
83 int *rexr) 83 unsigned shorted:1;
84 unsigned enlarged:1;
85 unsigned rexr:1;
86 unsigned rex:1;
87};
88
89static int skip_prefix(unsigned char *addr, struct prefix_bits *prf)
84{ 90{
85 int i; 91 int i;
86 unsigned char *p = addr; 92 unsigned char *p = addr;
87 *shorted = 0; 93 prf->shorted = 0;
88 *enlarged = 0; 94 prf->enlarged = 0;
89 *rexr = 0; 95 prf->rexr = 0;
96 prf->rex = 0;
90 97
91restart: 98restart:
92 for (i = 0; i < ARRAY_SIZE(prefix_codes); i++) { 99 for (i = 0; i < ARRAY_SIZE(prefix_codes); i++) {
93 if (*p == prefix_codes[i]) { 100 if (*p == prefix_codes[i]) {
94 if (*p == 0x66) 101 if (*p == 0x66)
95 *shorted = 1; 102 prf->shorted = 1;
96#ifdef __amd64__ 103#ifdef __amd64__
97 if ((*p & 0xf8) == 0x48) 104 if ((*p & 0xf8) == 0x48)
98 *enlarged = 1; 105 prf->enlarged = 1;
99 if ((*p & 0xf4) == 0x44) 106 if ((*p & 0xf4) == 0x44)
100 *rexr = 1; 107 prf->rexr = 1;
108 if ((*p & 0xf0) == 0x40)
109 prf->rex = 1;
101#endif 110#endif
102 p++; 111 p++;
103 goto restart; 112 goto restart;
@@ -135,12 +144,12 @@ enum reason_type get_ins_type(unsigned long ins_addr)
135{ 144{
136 unsigned int opcode; 145 unsigned int opcode;
137 unsigned char *p; 146 unsigned char *p;
138 int shorted, enlarged, rexr; 147 struct prefix_bits prf;
139 int i; 148 int i;
140 enum reason_type rv = OTHERS; 149 enum reason_type rv = OTHERS;
141 150
142 p = (unsigned char *)ins_addr; 151 p = (unsigned char *)ins_addr;
143 p += skip_prefix(p, &shorted, &enlarged, &rexr); 152 p += skip_prefix(p, &prf);
144 p += get_opcode(p, &opcode); 153 p += get_opcode(p, &opcode);
145 154
146 CHECK_OP_TYPE(opcode, reg_rop, REG_READ); 155 CHECK_OP_TYPE(opcode, reg_rop, REG_READ);
@@ -156,10 +165,11 @@ static unsigned int get_ins_reg_width(unsigned long ins_addr)
156{ 165{
157 unsigned int opcode; 166 unsigned int opcode;
158 unsigned char *p; 167 unsigned char *p;
159 int i, shorted, enlarged, rexr; 168 struct prefix_bits prf;
169 int i;
160 170
161 p = (unsigned char *)ins_addr; 171 p = (unsigned char *)ins_addr;
162 p += skip_prefix(p, &shorted, &enlarged, &rexr); 172 p += skip_prefix(p, &prf);
163 p += get_opcode(p, &opcode); 173 p += get_opcode(p, &opcode);
164 174
165 for (i = 0; i < ARRAY_SIZE(rw8); i++) 175 for (i = 0; i < ARRAY_SIZE(rw8); i++)
@@ -168,7 +178,7 @@ static unsigned int get_ins_reg_width(unsigned long ins_addr)
168 178
169 for (i = 0; i < ARRAY_SIZE(rw32); i++) 179 for (i = 0; i < ARRAY_SIZE(rw32); i++)
170 if (rw32[i] == opcode) 180 if (rw32[i] == opcode)
171 return (shorted ? 2 : (enlarged ? 8 : 4)); 181 return prf.shorted ? 2 : (prf.enlarged ? 8 : 4);
172 182
173 printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode); 183 printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode);
174 return 0; 184 return 0;
@@ -178,10 +188,11 @@ unsigned int get_ins_mem_width(unsigned long ins_addr)
178{ 188{
179 unsigned int opcode; 189 unsigned int opcode;
180 unsigned char *p; 190 unsigned char *p;
181 int i, shorted, enlarged, rexr; 191 struct prefix_bits prf;
192 int i;
182 193
183 p = (unsigned char *)ins_addr; 194 p = (unsigned char *)ins_addr;
184 p += skip_prefix(p, &shorted, &enlarged, &rexr); 195 p += skip_prefix(p, &prf);
185 p += get_opcode(p, &opcode); 196 p += get_opcode(p, &opcode);
186 197
187 for (i = 0; i < ARRAY_SIZE(mw8); i++) 198 for (i = 0; i < ARRAY_SIZE(mw8); i++)
@@ -194,11 +205,11 @@ unsigned int get_ins_mem_width(unsigned long ins_addr)
194 205
195 for (i = 0; i < ARRAY_SIZE(mw32); i++) 206 for (i = 0; i < ARRAY_SIZE(mw32); i++)
196 if (mw32[i] == opcode) 207 if (mw32[i] == opcode)
197 return shorted ? 2 : 4; 208 return prf.shorted ? 2 : 4;
198 209
199 for (i = 0; i < ARRAY_SIZE(mw64); i++) 210 for (i = 0; i < ARRAY_SIZE(mw64); i++)
200 if (mw64[i] == opcode) 211 if (mw64[i] == opcode)
201 return shorted ? 2 : (enlarged ? 8 : 4); 212 return prf.shorted ? 2 : (prf.enlarged ? 8 : 4);
202 213
203 printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode); 214 printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode);
204 return 0; 215 return 0;
@@ -238,7 +249,7 @@ enum {
238#endif 249#endif
239}; 250};
240 251
241static unsigned char *get_reg_w8(int no, struct pt_regs *regs) 252static unsigned char *get_reg_w8(int no, int rex, struct pt_regs *regs)
242{ 253{
243 unsigned char *rv = NULL; 254 unsigned char *rv = NULL;
244 255
@@ -255,18 +266,6 @@ static unsigned char *get_reg_w8(int no, struct pt_regs *regs)
255 case arg_DL: 266 case arg_DL:
256 rv = (unsigned char *)&regs->dx; 267 rv = (unsigned char *)&regs->dx;
257 break; 268 break;
258 case arg_AH:
259 rv = 1 + (unsigned char *)&regs->ax;
260 break;
261 case arg_BH:
262 rv = 1 + (unsigned char *)&regs->bx;
263 break;
264 case arg_CH:
265 rv = 1 + (unsigned char *)&regs->cx;
266 break;
267 case arg_DH:
268 rv = 1 + (unsigned char *)&regs->dx;
269 break;
270#ifdef __amd64__ 269#ifdef __amd64__
271 case arg_R8: 270 case arg_R8:
272 rv = (unsigned char *)&regs->r8; 271 rv = (unsigned char *)&regs->r8;
@@ -294,9 +293,55 @@ static unsigned char *get_reg_w8(int no, struct pt_regs *regs)
294 break; 293 break;
295#endif 294#endif
296 default: 295 default:
297 printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no);
298 break; 296 break;
299 } 297 }
298
299 if (rv)
300 return rv;
301
302 if (rex) {
303 /*
304 * If REX prefix exists, access low bytes of SI etc.
305 * instead of AH etc.
306 */
307 switch (no) {
308 case arg_SI:
309 rv = (unsigned char *)&regs->si;
310 break;
311 case arg_DI:
312 rv = (unsigned char *)&regs->di;
313 break;
314 case arg_BP:
315 rv = (unsigned char *)&regs->bp;
316 break;
317 case arg_SP:
318 rv = (unsigned char *)&regs->sp;
319 break;
320 default:
321 break;
322 }
323 } else {
324 switch (no) {
325 case arg_AH:
326 rv = 1 + (unsigned char *)&regs->ax;
327 break;
328 case arg_BH:
329 rv = 1 + (unsigned char *)&regs->bx;
330 break;
331 case arg_CH:
332 rv = 1 + (unsigned char *)&regs->cx;
333 break;
334 case arg_DH:
335 rv = 1 + (unsigned char *)&regs->dx;
336 break;
337 default:
338 break;
339 }
340 }
341
342 if (!rv)
343 printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no);
344
300 return rv; 345 return rv;
301} 346}
302 347
@@ -368,11 +413,12 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
368 unsigned char mod_rm; 413 unsigned char mod_rm;
369 int reg; 414 int reg;
370 unsigned char *p; 415 unsigned char *p;
371 int i, shorted, enlarged, rexr; 416 struct prefix_bits prf;
417 int i;
372 unsigned long rv; 418 unsigned long rv;
373 419
374 p = (unsigned char *)ins_addr; 420 p = (unsigned char *)ins_addr;
375 p += skip_prefix(p, &shorted, &enlarged, &rexr); 421 p += skip_prefix(p, &prf);
376 p += get_opcode(p, &opcode); 422 p += get_opcode(p, &opcode);
377 for (i = 0; i < ARRAY_SIZE(reg_rop); i++) 423 for (i = 0; i < ARRAY_SIZE(reg_rop); i++)
378 if (reg_rop[i] == opcode) { 424 if (reg_rop[i] == opcode) {
@@ -392,10 +438,10 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
392 438
393do_work: 439do_work:
394 mod_rm = *p; 440 mod_rm = *p;
395 reg = ((mod_rm >> 3) & 0x7) | (rexr << 3); 441 reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3);
396 switch (get_ins_reg_width(ins_addr)) { 442 switch (get_ins_reg_width(ins_addr)) {
397 case 1: 443 case 1:
398 return *get_reg_w8(reg, regs); 444 return *get_reg_w8(reg, prf.rex, regs);
399 445
400 case 2: 446 case 2:
401 return *(unsigned short *)get_reg_w32(reg, regs); 447 return *(unsigned short *)get_reg_w32(reg, regs);
@@ -422,11 +468,12 @@ unsigned long get_ins_imm_val(unsigned long ins_addr)
422 unsigned char mod_rm; 468 unsigned char mod_rm;
423 unsigned char mod; 469 unsigned char mod;
424 unsigned char *p; 470 unsigned char *p;
425 int i, shorted, enlarged, rexr; 471 struct prefix_bits prf;
472 int i;
426 unsigned long rv; 473 unsigned long rv;
427 474
428 p = (unsigned char *)ins_addr; 475 p = (unsigned char *)ins_addr;
429 p += skip_prefix(p, &shorted, &enlarged, &rexr); 476 p += skip_prefix(p, &prf);
430 p += get_opcode(p, &opcode); 477 p += get_opcode(p, &opcode);
431 for (i = 0; i < ARRAY_SIZE(imm_wop); i++) 478 for (i = 0; i < ARRAY_SIZE(imm_wop); i++)
432 if (imm_wop[i] == opcode) { 479 if (imm_wop[i] == opcode) {
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
index d877c5b423ef..ab50a8d7402c 100644
--- a/arch/x86/mm/testmmiotrace.c
+++ b/arch/x86/mm/testmmiotrace.c
@@ -3,6 +3,7 @@
3 */ 3 */
4#include <linux/module.h> 4#include <linux/module.h>
5#include <linux/io.h> 5#include <linux/io.h>
6#include <linux/mmiotrace.h>
6 7
7#define MODULE_NAME "testmmiotrace" 8#define MODULE_NAME "testmmiotrace"
8 9
@@ -13,6 +14,7 @@ MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB.");
13static void do_write_test(void __iomem *p) 14static void do_write_test(void __iomem *p)
14{ 15{
15 unsigned int i; 16 unsigned int i;
17 mmiotrace_printk("Write test.\n");
16 for (i = 0; i < 256; i++) 18 for (i = 0; i < 256; i++)
17 iowrite8(i, p + i); 19 iowrite8(i, p + i);
18 for (i = 1024; i < (5 * 1024); i += 2) 20 for (i = 1024; i < (5 * 1024); i += 2)
@@ -24,6 +26,7 @@ static void do_write_test(void __iomem *p)
24static void do_read_test(void __iomem *p) 26static void do_read_test(void __iomem *p)
25{ 27{
26 unsigned int i; 28 unsigned int i;
29 mmiotrace_printk("Read test.\n");
27 for (i = 0; i < 256; i++) 30 for (i = 0; i < 256; i++)
28 ioread8(p + i); 31 ioread8(p + i);
29 for (i = 1024; i < (5 * 1024); i += 2) 32 for (i = 1024; i < (5 * 1024); i += 2)
@@ -39,6 +42,7 @@ static void do_test(void)
39 pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); 42 pr_err(MODULE_NAME ": could not ioremap, aborting.\n");
40 return; 43 return;
41 } 44 }
45 mmiotrace_printk("ioremap returned %p.\n", p);
42 do_write_test(p); 46 do_write_test(p);
43 do_read_test(p); 47 do_read_test(p);
44 iounmap(p); 48 iounmap(p);
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 006599db0dc7..bf69dbe08bff 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -493,7 +493,7 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq
493 if (pirq <= 4) 493 if (pirq <= 4)
494 irq = read_config_nybble(router, 0x56, pirq - 1); 494 irq = read_config_nybble(router, 0x56, pirq - 1);
495 dev_info(&dev->dev, 495 dev_info(&dev->dev,
496 "AMD756: dev [%04x/%04x], router PIRQ %d get IRQ %d\n", 496 "AMD756: dev [%04x:%04x], router PIRQ %d get IRQ %d\n",
497 dev->vendor, dev->device, pirq, irq); 497 dev->vendor, dev->device, pirq, irq);
498 return irq; 498 return irq;
499} 499}
@@ -501,7 +501,7 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq
501static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) 501static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
502{ 502{
503 dev_info(&dev->dev, 503 dev_info(&dev->dev,
504 "AMD756: dev [%04x/%04x], router PIRQ %d set IRQ %d\n", 504 "AMD756: dev [%04x:%04x], router PIRQ %d set IRQ %d\n",
505 dev->vendor, dev->device, pirq, irq); 505 dev->vendor, dev->device, pirq, irq);
506 if (pirq <= 4) 506 if (pirq <= 4)
507 write_config_nybble(router, 0x56, pirq - 1, irq); 507 write_config_nybble(router, 0x56, pirq - 1, irq);
@@ -590,13 +590,20 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route
590 case PCI_DEVICE_ID_INTEL_ICH10_1: 590 case PCI_DEVICE_ID_INTEL_ICH10_1:
591 case PCI_DEVICE_ID_INTEL_ICH10_2: 591 case PCI_DEVICE_ID_INTEL_ICH10_2:
592 case PCI_DEVICE_ID_INTEL_ICH10_3: 592 case PCI_DEVICE_ID_INTEL_ICH10_3:
593 case PCI_DEVICE_ID_INTEL_PCH_0:
594 case PCI_DEVICE_ID_INTEL_PCH_1:
595 r->name = "PIIX/ICH"; 593 r->name = "PIIX/ICH";
596 r->get = pirq_piix_get; 594 r->get = pirq_piix_get;
597 r->set = pirq_piix_set; 595 r->set = pirq_piix_set;
598 return 1; 596 return 1;
599 } 597 }
598
599 if ((device >= PCI_DEVICE_ID_INTEL_PCH_LPC_MIN) &&
600 (device <= PCI_DEVICE_ID_INTEL_PCH_LPC_MAX)) {
601 r->name = "PIIX/ICH";
602 r->get = pirq_piix_get;
603 r->set = pirq_piix_set;
604 return 1;
605 }
606
600 return 0; 607 return 0;
601} 608}
602 609
@@ -823,7 +830,7 @@ static void __init pirq_find_router(struct irq_router *r)
823 r->get = NULL; 830 r->get = NULL;
824 r->set = NULL; 831 r->set = NULL;
825 832
826 DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n", 833 DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for [%04x:%04x]\n",
827 rt->rtr_vendor, rt->rtr_device); 834 rt->rtr_vendor, rt->rtr_device);
828 835
829 pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn); 836 pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn);
@@ -843,7 +850,7 @@ static void __init pirq_find_router(struct irq_router *r)
843 h->probe(r, pirq_router_dev, pirq_router_dev->device)) 850 h->probe(r, pirq_router_dev, pirq_router_dev->device))
844 break; 851 break;
845 } 852 }
846 dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x/%04x]\n", 853 dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x:%04x]\n",
847 pirq_router.name, 854 pirq_router.name,
848 pirq_router_dev->vendor, pirq_router_dev->device); 855 pirq_router_dev->vendor, pirq_router_dev->device);
849 856
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 28b85ab8422e..bb042608c602 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -21,7 +21,6 @@ void xen_force_evtchn_callback(void)
21 21
22static void __init __xen_init_IRQ(void) 22static void __init __xen_init_IRQ(void)
23{ 23{
24#ifdef CONFIG_X86_64
25 int i; 24 int i;
26 25
27 /* Create identity vector->irq map */ 26 /* Create identity vector->irq map */
@@ -31,7 +30,6 @@ static void __init __xen_init_IRQ(void)
31 for_each_possible_cpu(cpu) 30 for_each_possible_cpu(cpu)
32 per_cpu(vector_irq, cpu)[i] = i; 31 per_cpu(vector_irq, cpu)[i] = i;
33 } 32 }
34#endif /* CONFIG_X86_64 */
35 33
36 xen_init_IRQ(); 34 xen_init_IRQ();
37} 35}
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index dd71e3a021cd..5601506f2dd9 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -241,7 +241,7 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl
241 ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); 241 ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq));
242 } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */ 242 } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
243 243
244 kstat_this_cpu.irqs[irq]++; 244 kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
245 245
246out: 246out:
247 raw_local_irq_restore(flags); 247 raw_local_irq_restore(flags);
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index e8362c1efa30..dcbf1be149f3 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -115,34 +115,32 @@ EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
115 * (start) dependent operations on their target channel 115 * (start) dependent operations on their target channel
116 * @tx: transaction with dependencies 116 * @tx: transaction with dependencies
117 */ 117 */
118void 118void async_tx_run_dependencies(struct dma_async_tx_descriptor *tx)
119async_tx_run_dependencies(struct dma_async_tx_descriptor *tx)
120{ 119{
121 struct dma_async_tx_descriptor *next = tx->next; 120 struct dma_async_tx_descriptor *dep = tx->next;
121 struct dma_async_tx_descriptor *dep_next;
122 struct dma_chan *chan; 122 struct dma_chan *chan;
123 123
124 if (!next) 124 if (!dep)
125 return; 125 return;
126 126
127 tx->next = NULL; 127 chan = dep->chan;
128 chan = next->chan;
129 128
130 /* keep submitting up until a channel switch is detected 129 /* keep submitting up until a channel switch is detected
131 * in that case we will be called again as a result of 130 * in that case we will be called again as a result of
132 * processing the interrupt from async_tx_channel_switch 131 * processing the interrupt from async_tx_channel_switch
133 */ 132 */
134 while (next && next->chan == chan) { 133 for (; dep; dep = dep_next) {
135 struct dma_async_tx_descriptor *_next; 134 spin_lock_bh(&dep->lock);
136 135 dep->parent = NULL;
137 spin_lock_bh(&next->lock); 136 dep_next = dep->next;
138 next->parent = NULL; 137 if (dep_next && dep_next->chan == chan)
139 _next = next->next; 138 dep->next = NULL; /* ->next will be submitted */
140 if (_next && _next->chan == chan) 139 else
141 next->next = NULL; 140 dep_next = NULL; /* submit current dep and terminate */
142 spin_unlock_bh(&next->lock); 141 spin_unlock_bh(&dep->lock);
143 142
144 next->tx_submit(next); 143 dep->tx_submit(dep);
145 next = _next;
146 } 144 }
147 145
148 chan->device->device_issue_pending(chan); 146 chan->device->device_issue_pending(chan);
diff --git a/drivers/char/agp/ali-agp.c b/drivers/char/agp/ali-agp.c
index 31dcd9142d54..dc8d1a90971f 100644
--- a/drivers/char/agp/ali-agp.c
+++ b/drivers/char/agp/ali-agp.c
@@ -417,6 +417,6 @@ static void __exit agp_ali_cleanup(void)
417module_init(agp_ali_init); 417module_init(agp_ali_init);
418module_exit(agp_ali_cleanup); 418module_exit(agp_ali_cleanup);
419 419
420MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>"); 420MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
421MODULE_LICENSE("GPL and additional rights"); 421MODULE_LICENSE("GPL and additional rights");
422 422
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index 2812ee2b165a..52f4361eb6e4 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -772,6 +772,6 @@ module_init(agp_amd64_init);
772module_exit(agp_amd64_cleanup); 772module_exit(agp_amd64_cleanup);
773#endif 773#endif
774 774
775MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>, Andi Kleen"); 775MODULE_AUTHOR("Dave Jones <davej@redhat.com>, Andi Kleen");
776module_param(agp_try_unsupported, bool, 0); 776module_param(agp_try_unsupported, bool, 0);
777MODULE_LICENSE("GPL"); 777MODULE_LICENSE("GPL");
diff --git a/drivers/char/agp/ati-agp.c b/drivers/char/agp/ati-agp.c
index ae2791b926b9..f1537eece07f 100644
--- a/drivers/char/agp/ati-agp.c
+++ b/drivers/char/agp/ati-agp.c
@@ -561,6 +561,6 @@ static void __exit agp_ati_cleanup(void)
561module_init(agp_ati_init); 561module_init(agp_ati_init);
562module_exit(agp_ati_cleanup); 562module_exit(agp_ati_cleanup);
563 563
564MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>"); 564MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
565MODULE_LICENSE("GPL and additional rights"); 565MODULE_LICENSE("GPL and additional rights");
566 566
diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c
index 3a3cc03d401c..8c617ad7497f 100644
--- a/drivers/char/agp/backend.c
+++ b/drivers/char/agp/backend.c
@@ -349,7 +349,7 @@ static __init int agp_setup(char *s)
349__setup("agp=", agp_setup); 349__setup("agp=", agp_setup);
350#endif 350#endif
351 351
352MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>"); 352MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
353MODULE_DESCRIPTION("AGP GART driver"); 353MODULE_DESCRIPTION("AGP GART driver");
354MODULE_LICENSE("GPL and additional rights"); 354MODULE_LICENSE("GPL and additional rights");
355MODULE_ALIAS_MISCDEV(AGPGART_MINOR); 355MODULE_ALIAS_MISCDEV(AGPGART_MINOR);
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 1108665913e2..9cf6e9bb017e 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -2390,5 +2390,5 @@ static void __exit agp_intel_cleanup(void)
2390module_init(agp_intel_init); 2390module_init(agp_intel_init);
2391module_exit(agp_intel_cleanup); 2391module_exit(agp_intel_cleanup);
2392 2392
2393MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>"); 2393MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
2394MODULE_LICENSE("GPL and additional rights"); 2394MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/char/agp/nvidia-agp.c b/drivers/char/agp/nvidia-agp.c
index 5bbed3d79db9..16acee2de117 100644
--- a/drivers/char/agp/nvidia-agp.c
+++ b/drivers/char/agp/nvidia-agp.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Nvidia AGPGART routines. 2 * Nvidia AGPGART routines.
3 * Based upon a 2.4 agpgart diff by the folks from NVIDIA, and hacked up 3 * Based upon a 2.4 agpgart diff by the folks from NVIDIA, and hacked up
4 * to work in 2.5 by Dave Jones <davej@codemonkey.org.uk> 4 * to work in 2.5 by Dave Jones <davej@redhat.com>
5 */ 5 */
6 6
7#include <linux/module.h> 7#include <linux/module.h>
diff --git a/drivers/char/agp/parisc-agp.c b/drivers/char/agp/parisc-agp.c
index f2492ecf0824..db60539bf67a 100644
--- a/drivers/char/agp/parisc-agp.c
+++ b/drivers/char/agp/parisc-agp.c
@@ -20,8 +20,8 @@
20#include <linux/agp_backend.h> 20#include <linux/agp_backend.h>
21#include <linux/log2.h> 21#include <linux/log2.h>
22 22
23#include <asm-parisc/parisc-device.h> 23#include <asm/parisc-device.h>
24#include <asm-parisc/ropes.h> 24#include <asm/ropes.h>
25 25
26#include "agp.h" 26#include "agp.h"
27 27
diff --git a/drivers/char/agp/via-agp.c b/drivers/char/agp/via-agp.c
index 9f4d49e1b59a..d3bd243867fc 100644
--- a/drivers/char/agp/via-agp.c
+++ b/drivers/char/agp/via-agp.c
@@ -595,4 +595,4 @@ module_init(agp_via_init);
595module_exit(agp_via_cleanup); 595module_exit(agp_via_cleanup);
596 596
597MODULE_LICENSE("GPL"); 597MODULE_LICENSE("GPL");
598MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>"); 598MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index f3cfb4c76125..408f5f92cb4e 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -219,7 +219,7 @@ static void hpet_timer_set_irq(struct hpet_dev *devp)
219 for (irq = find_first_bit(&v, HPET_MAX_IRQ); irq < HPET_MAX_IRQ; 219 for (irq = find_first_bit(&v, HPET_MAX_IRQ); irq < HPET_MAX_IRQ;
220 irq = find_next_bit(&v, HPET_MAX_IRQ, 1 + irq)) { 220 irq = find_next_bit(&v, HPET_MAX_IRQ, 1 + irq)) {
221 221
222 if (irq >= NR_IRQS) { 222 if (irq >= nr_irqs) {
223 irq = HPET_MAX_IRQ; 223 irq = HPET_MAX_IRQ;
224 break; 224 break;
225 } 225 }
diff --git a/drivers/char/random.c b/drivers/char/random.c
index c8752eaad483..705a839f1796 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -558,9 +558,26 @@ struct timer_rand_state {
558 unsigned dont_count_entropy:1; 558 unsigned dont_count_entropy:1;
559}; 559};
560 560
561static struct timer_rand_state input_timer_state;
562static struct timer_rand_state *irq_timer_state[NR_IRQS]; 561static struct timer_rand_state *irq_timer_state[NR_IRQS];
563 562
563static struct timer_rand_state *get_timer_rand_state(unsigned int irq)
564{
565 if (irq >= nr_irqs)
566 return NULL;
567
568 return irq_timer_state[irq];
569}
570
571static void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state)
572{
573 if (irq >= nr_irqs)
574 return;
575
576 irq_timer_state[irq] = state;
577}
578
579static struct timer_rand_state input_timer_state;
580
564/* 581/*
565 * This function adds entropy to the entropy "pool" by using timing 582 * This function adds entropy to the entropy "pool" by using timing
566 * delays. It uses the timer_rand_state structure to make an estimate 583 * delays. It uses the timer_rand_state structure to make an estimate
@@ -648,11 +665,15 @@ EXPORT_SYMBOL_GPL(add_input_randomness);
648 665
649void add_interrupt_randomness(int irq) 666void add_interrupt_randomness(int irq)
650{ 667{
651 if (irq >= NR_IRQS || irq_timer_state[irq] == NULL) 668 struct timer_rand_state *state;
669
670 state = get_timer_rand_state(irq);
671
672 if (state == NULL)
652 return; 673 return;
653 674
654 DEBUG_ENT("irq event %d\n", irq); 675 DEBUG_ENT("irq event %d\n", irq);
655 add_timer_randomness(irq_timer_state[irq], 0x100 + irq); 676 add_timer_randomness(state, 0x100 + irq);
656} 677}
657 678
658#ifdef CONFIG_BLOCK 679#ifdef CONFIG_BLOCK
@@ -912,7 +933,12 @@ void rand_initialize_irq(int irq)
912{ 933{
913 struct timer_rand_state *state; 934 struct timer_rand_state *state;
914 935
915 if (irq >= NR_IRQS || irq_timer_state[irq]) 936 if (irq >= nr_irqs)
937 return;
938
939 state = get_timer_rand_state(irq);
940
941 if (state)
916 return; 942 return;
917 943
918 /* 944 /*
@@ -921,7 +947,7 @@ void rand_initialize_irq(int irq)
921 */ 947 */
922 state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL); 948 state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
923 if (state) 949 if (state)
924 irq_timer_state[irq] = state; 950 set_timer_rand_state(irq, state);
925} 951}
926 952
927#ifdef CONFIG_BLOCK 953#ifdef CONFIG_BLOCK
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index d0c0d64ed366..ce0d9da52a8a 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -168,7 +168,7 @@ static void sysrq_handle_show_timers(int key, struct tty_struct *tty)
168static struct sysrq_key_op sysrq_show_timers_op = { 168static struct sysrq_key_op sysrq_show_timers_op = {
169 .handler = sysrq_handle_show_timers, 169 .handler = sysrq_handle_show_timers,
170 .help_msg = "show-all-timers(Q)", 170 .help_msg = "show-all-timers(Q)",
171 .action_msg = "Show pending hrtimers (no others)", 171 .action_msg = "Show clockevent devices & pending hrtimers (no others)",
172}; 172};
173 173
174static void sysrq_handle_mountro(int key, struct tty_struct *tty) 174static void sysrq_handle_mountro(int key, struct tty_struct *tty)
diff --git a/drivers/char/vr41xx_giu.c b/drivers/char/vr41xx_giu.c
index ffe9b4e3072e..54c837288d19 100644
--- a/drivers/char/vr41xx_giu.c
+++ b/drivers/char/vr41xx_giu.c
@@ -641,7 +641,7 @@ static int __devinit giu_probe(struct platform_device *dev)
641 } 641 }
642 642
643 irq = platform_get_irq(dev, 0); 643 irq = platform_get_irq(dev, 0);
644 if (irq < 0 || irq >= NR_IRQS) 644 if (irq < 0 || irq >= nr_irqs)
645 return -EBUSY; 645 return -EBUSY;
646 646
647 return cascade_irq(irq, giu_get_irq); 647 return cascade_irq(irq, giu_get_irq);
diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c
index 71d2ac4e3f46..c20171078d1d 100644
--- a/drivers/clocksource/acpi_pm.c
+++ b/drivers/clocksource/acpi_pm.c
@@ -237,9 +237,12 @@ static int __init parse_pmtmr(char *arg)
237 237
238 if (strict_strtoul(arg, 16, &base)) 238 if (strict_strtoul(arg, 16, &base))
239 return -EINVAL; 239 return -EINVAL;
240 240#ifdef CONFIG_X86_64
241 if (base > UINT_MAX)
242 return -ERANGE;
243#endif
241 printk(KERN_INFO "PMTMR IOPort override: 0x%04x -> 0x%04lx\n", 244 printk(KERN_INFO "PMTMR IOPort override: 0x%04x -> 0x%04lx\n",
242 (unsigned int)pmtmr_ioport, base); 245 pmtmr_ioport, base);
243 pmtmr_ioport = base; 246 pmtmr_ioport = base;
244 247
245 return 1; 248 return 1;
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index cd303901eb5b..904e57558bb5 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -48,13 +48,13 @@ config DW_DMAC
48 can be integrated in chips such as the Atmel AT32ap7000. 48 can be integrated in chips such as the Atmel AT32ap7000.
49 49
50config FSL_DMA 50config FSL_DMA
51 bool "Freescale MPC85xx/MPC83xx DMA support" 51 tristate "Freescale Elo and Elo Plus DMA support"
52 depends on PPC 52 depends on FSL_SOC
53 select DMA_ENGINE 53 select DMA_ENGINE
54 ---help--- 54 ---help---
55 Enable support for the Freescale DMA engine. Now, it support 55 Enable support for the Freescale Elo and Elo Plus DMA controllers.
56 MPC8560/40, MPC8555, MPC8548 and MPC8641 processors. 56 The Elo is the DMA controller on some 82xx and 83xx parts, and the
57 The MPC8349, MPC8360 is also supported. 57 Elo Plus is the DMA controller on 85xx and 86xx parts.
58 58
59config MV_XOR 59config MV_XOR
60 bool "Marvell XOR engine support" 60 bool "Marvell XOR engine support"
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index a08d19704743..d1e381e35a9e 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -325,7 +325,12 @@ static enum dma_state_client dmatest_add_channel(struct dma_chan *chan)
325 struct dmatest_thread *thread; 325 struct dmatest_thread *thread;
326 unsigned int i; 326 unsigned int i;
327 327
328 dtc = kmalloc(sizeof(struct dmatest_chan), GFP_ATOMIC); 328 /* Have we already been told about this channel? */
329 list_for_each_entry(dtc, &dmatest_channels, node)
330 if (dtc->chan == chan)
331 return DMA_DUP;
332
333 dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL);
329 if (!dtc) { 334 if (!dtc) {
330 pr_warning("dmatest: No memory for %s\n", chan->dev.bus_id); 335 pr_warning("dmatest: No memory for %s\n", chan->dev.bus_id);
331 return DMA_NAK; 336 return DMA_NAK;
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index c0059ca58340..0b95dcce447e 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -370,7 +370,10 @@ static int fsl_dma_alloc_chan_resources(struct dma_chan *chan,
370 struct dma_client *client) 370 struct dma_client *client)
371{ 371{
372 struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan); 372 struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan);
373 LIST_HEAD(tmp_list); 373
374 /* Has this channel already been allocated? */
375 if (fsl_chan->desc_pool)
376 return 1;
374 377
375 /* We need the descriptor to be aligned to 32bytes 378 /* We need the descriptor to be aligned to 32bytes
376 * for meeting FSL DMA specification requirement. 379 * for meeting FSL DMA specification requirement.
@@ -410,6 +413,8 @@ static void fsl_dma_free_chan_resources(struct dma_chan *chan)
410 } 413 }
411 spin_unlock_irqrestore(&fsl_chan->desc_lock, flags); 414 spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
412 dma_pool_destroy(fsl_chan->desc_pool); 415 dma_pool_destroy(fsl_chan->desc_pool);
416
417 fsl_chan->desc_pool = NULL;
413} 418}
414 419
415static struct dma_async_tx_descriptor * 420static struct dma_async_tx_descriptor *
@@ -786,159 +791,29 @@ static void dma_do_tasklet(unsigned long data)
786 fsl_chan_ld_cleanup(fsl_chan); 791 fsl_chan_ld_cleanup(fsl_chan);
787} 792}
788 793
789static void fsl_dma_callback_test(void *param) 794static int __devinit fsl_dma_chan_probe(struct fsl_dma_device *fdev,
790{ 795 struct device_node *node, u32 feature, const char *compatible)
791 struct fsl_dma_chan *fsl_chan = param;
792 if (fsl_chan)
793 dev_dbg(fsl_chan->dev, "selftest: callback is ok!\n");
794}
795
796static int fsl_dma_self_test(struct fsl_dma_chan *fsl_chan)
797{
798 struct dma_chan *chan;
799 int err = 0;
800 dma_addr_t dma_dest, dma_src;
801 dma_cookie_t cookie;
802 u8 *src, *dest;
803 int i;
804 size_t test_size;
805 struct dma_async_tx_descriptor *tx1, *tx2, *tx3;
806
807 test_size = 4096;
808
809 src = kmalloc(test_size * 2, GFP_KERNEL);
810 if (!src) {
811 dev_err(fsl_chan->dev,
812 "selftest: Cannot alloc memory for test!\n");
813 return -ENOMEM;
814 }
815
816 dest = src + test_size;
817
818 for (i = 0; i < test_size; i++)
819 src[i] = (u8) i;
820
821 chan = &fsl_chan->common;
822
823 if (fsl_dma_alloc_chan_resources(chan, NULL) < 1) {
824 dev_err(fsl_chan->dev,
825 "selftest: Cannot alloc resources for DMA\n");
826 err = -ENODEV;
827 goto out;
828 }
829
830 /* TX 1 */
831 dma_src = dma_map_single(fsl_chan->dev, src, test_size / 2,
832 DMA_TO_DEVICE);
833 dma_dest = dma_map_single(fsl_chan->dev, dest, test_size / 2,
834 DMA_FROM_DEVICE);
835 tx1 = fsl_dma_prep_memcpy(chan, dma_dest, dma_src, test_size / 2, 0);
836 async_tx_ack(tx1);
837
838 cookie = fsl_dma_tx_submit(tx1);
839 fsl_dma_memcpy_issue_pending(chan);
840 msleep(2);
841
842 if (fsl_dma_is_complete(chan, cookie, NULL, NULL) != DMA_SUCCESS) {
843 dev_err(fsl_chan->dev, "selftest: Time out!\n");
844 err = -ENODEV;
845 goto free_resources;
846 }
847
848 /* Test free and re-alloc channel resources */
849 fsl_dma_free_chan_resources(chan);
850
851 if (fsl_dma_alloc_chan_resources(chan, NULL) < 1) {
852 dev_err(fsl_chan->dev,
853 "selftest: Cannot alloc resources for DMA\n");
854 err = -ENODEV;
855 goto free_resources;
856 }
857
858 /* Continue to test
859 * TX 2
860 */
861 dma_src = dma_map_single(fsl_chan->dev, src + test_size / 2,
862 test_size / 4, DMA_TO_DEVICE);
863 dma_dest = dma_map_single(fsl_chan->dev, dest + test_size / 2,
864 test_size / 4, DMA_FROM_DEVICE);
865 tx2 = fsl_dma_prep_memcpy(chan, dma_dest, dma_src, test_size / 4, 0);
866 async_tx_ack(tx2);
867
868 /* TX 3 */
869 dma_src = dma_map_single(fsl_chan->dev, src + test_size * 3 / 4,
870 test_size / 4, DMA_TO_DEVICE);
871 dma_dest = dma_map_single(fsl_chan->dev, dest + test_size * 3 / 4,
872 test_size / 4, DMA_FROM_DEVICE);
873 tx3 = fsl_dma_prep_memcpy(chan, dma_dest, dma_src, test_size / 4, 0);
874 async_tx_ack(tx3);
875
876 /* Interrupt tx test */
877 tx1 = fsl_dma_prep_interrupt(chan, 0);
878 async_tx_ack(tx1);
879 cookie = fsl_dma_tx_submit(tx1);
880
881 /* Test exchanging the prepared tx sort */
882 cookie = fsl_dma_tx_submit(tx3);
883 cookie = fsl_dma_tx_submit(tx2);
884
885 if (dma_has_cap(DMA_INTERRUPT, ((struct fsl_dma_device *)
886 dev_get_drvdata(fsl_chan->dev->parent))->common.cap_mask)) {
887 tx3->callback = fsl_dma_callback_test;
888 tx3->callback_param = fsl_chan;
889 }
890 fsl_dma_memcpy_issue_pending(chan);
891 msleep(2);
892
893 if (fsl_dma_is_complete(chan, cookie, NULL, NULL) != DMA_SUCCESS) {
894 dev_err(fsl_chan->dev, "selftest: Time out!\n");
895 err = -ENODEV;
896 goto free_resources;
897 }
898
899 err = memcmp(src, dest, test_size);
900 if (err) {
901 for (i = 0; (*(src + i) == *(dest + i)) && (i < test_size);
902 i++);
903 dev_err(fsl_chan->dev, "selftest: Test failed, data %d/%ld is "
904 "error! src 0x%x, dest 0x%x\n",
905 i, (long)test_size, *(src + i), *(dest + i));
906 }
907
908free_resources:
909 fsl_dma_free_chan_resources(chan);
910out:
911 kfree(src);
912 return err;
913}
914
915static int __devinit of_fsl_dma_chan_probe(struct of_device *dev,
916 const struct of_device_id *match)
917{ 796{
918 struct fsl_dma_device *fdev;
919 struct fsl_dma_chan *new_fsl_chan; 797 struct fsl_dma_chan *new_fsl_chan;
920 int err; 798 int err;
921 799
922 fdev = dev_get_drvdata(dev->dev.parent);
923 BUG_ON(!fdev);
924
925 /* alloc channel */ 800 /* alloc channel */
926 new_fsl_chan = kzalloc(sizeof(struct fsl_dma_chan), GFP_KERNEL); 801 new_fsl_chan = kzalloc(sizeof(struct fsl_dma_chan), GFP_KERNEL);
927 if (!new_fsl_chan) { 802 if (!new_fsl_chan) {
928 dev_err(&dev->dev, "No free memory for allocating " 803 dev_err(fdev->dev, "No free memory for allocating "
929 "dma channels!\n"); 804 "dma channels!\n");
930 return -ENOMEM; 805 return -ENOMEM;
931 } 806 }
932 807
933 /* get dma channel register base */ 808 /* get dma channel register base */
934 err = of_address_to_resource(dev->node, 0, &new_fsl_chan->reg); 809 err = of_address_to_resource(node, 0, &new_fsl_chan->reg);
935 if (err) { 810 if (err) {
936 dev_err(&dev->dev, "Can't get %s property 'reg'\n", 811 dev_err(fdev->dev, "Can't get %s property 'reg'\n",
937 dev->node->full_name); 812 node->full_name);
938 goto err_no_reg; 813 goto err_no_reg;
939 } 814 }
940 815
941 new_fsl_chan->feature = *(u32 *)match->data; 816 new_fsl_chan->feature = feature;
942 817
943 if (!fdev->feature) 818 if (!fdev->feature)
944 fdev->feature = new_fsl_chan->feature; 819 fdev->feature = new_fsl_chan->feature;
@@ -948,13 +823,13 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev,
948 */ 823 */
949 WARN_ON(fdev->feature != new_fsl_chan->feature); 824 WARN_ON(fdev->feature != new_fsl_chan->feature);
950 825
951 new_fsl_chan->dev = &dev->dev; 826 new_fsl_chan->dev = &new_fsl_chan->common.dev;
952 new_fsl_chan->reg_base = ioremap(new_fsl_chan->reg.start, 827 new_fsl_chan->reg_base = ioremap(new_fsl_chan->reg.start,
953 new_fsl_chan->reg.end - new_fsl_chan->reg.start + 1); 828 new_fsl_chan->reg.end - new_fsl_chan->reg.start + 1);
954 829
955 new_fsl_chan->id = ((new_fsl_chan->reg.start - 0x100) & 0xfff) >> 7; 830 new_fsl_chan->id = ((new_fsl_chan->reg.start - 0x100) & 0xfff) >> 7;
956 if (new_fsl_chan->id > FSL_DMA_MAX_CHANS_PER_DEVICE) { 831 if (new_fsl_chan->id > FSL_DMA_MAX_CHANS_PER_DEVICE) {
957 dev_err(&dev->dev, "There is no %d channel!\n", 832 dev_err(fdev->dev, "There is no %d channel!\n",
958 new_fsl_chan->id); 833 new_fsl_chan->id);
959 err = -EINVAL; 834 err = -EINVAL;
960 goto err_no_chan; 835 goto err_no_chan;
@@ -988,29 +863,23 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev,
988 &fdev->common.channels); 863 &fdev->common.channels);
989 fdev->common.chancnt++; 864 fdev->common.chancnt++;
990 865
991 new_fsl_chan->irq = irq_of_parse_and_map(dev->node, 0); 866 new_fsl_chan->irq = irq_of_parse_and_map(node, 0);
992 if (new_fsl_chan->irq != NO_IRQ) { 867 if (new_fsl_chan->irq != NO_IRQ) {
993 err = request_irq(new_fsl_chan->irq, 868 err = request_irq(new_fsl_chan->irq,
994 &fsl_dma_chan_do_interrupt, IRQF_SHARED, 869 &fsl_dma_chan_do_interrupt, IRQF_SHARED,
995 "fsldma-channel", new_fsl_chan); 870 "fsldma-channel", new_fsl_chan);
996 if (err) { 871 if (err) {
997 dev_err(&dev->dev, "DMA channel %s request_irq error " 872 dev_err(fdev->dev, "DMA channel %s request_irq error "
998 "with return %d\n", dev->node->full_name, err); 873 "with return %d\n", node->full_name, err);
999 goto err_no_irq; 874 goto err_no_irq;
1000 } 875 }
1001 } 876 }
1002 877
1003 err = fsl_dma_self_test(new_fsl_chan); 878 dev_info(fdev->dev, "#%d (%s), irq %d\n", new_fsl_chan->id,
1004 if (err) 879 compatible, new_fsl_chan->irq);
1005 goto err_self_test;
1006
1007 dev_info(&dev->dev, "#%d (%s), irq %d\n", new_fsl_chan->id,
1008 match->compatible, new_fsl_chan->irq);
1009 880
1010 return 0; 881 return 0;
1011 882
1012err_self_test:
1013 free_irq(new_fsl_chan->irq, new_fsl_chan);
1014err_no_irq: 883err_no_irq:
1015 list_del(&new_fsl_chan->common.device_node); 884 list_del(&new_fsl_chan->common.device_node);
1016err_no_chan: 885err_no_chan:
@@ -1020,38 +889,20 @@ err_no_reg:
1020 return err; 889 return err;
1021} 890}
1022 891
1023const u32 mpc8540_dma_ip_feature = FSL_DMA_IP_85XX | FSL_DMA_BIG_ENDIAN; 892static void fsl_dma_chan_remove(struct fsl_dma_chan *fchan)
1024const u32 mpc8349_dma_ip_feature = FSL_DMA_IP_83XX | FSL_DMA_LITTLE_ENDIAN;
1025
1026static struct of_device_id of_fsl_dma_chan_ids[] = {
1027 {
1028 .compatible = "fsl,eloplus-dma-channel",
1029 .data = (void *)&mpc8540_dma_ip_feature,
1030 },
1031 {
1032 .compatible = "fsl,elo-dma-channel",
1033 .data = (void *)&mpc8349_dma_ip_feature,
1034 },
1035 {}
1036};
1037
1038static struct of_platform_driver of_fsl_dma_chan_driver = {
1039 .name = "of-fsl-dma-channel",
1040 .match_table = of_fsl_dma_chan_ids,
1041 .probe = of_fsl_dma_chan_probe,
1042};
1043
1044static __init int of_fsl_dma_chan_init(void)
1045{ 893{
1046 return of_register_platform_driver(&of_fsl_dma_chan_driver); 894 free_irq(fchan->irq, fchan);
895 list_del(&fchan->common.device_node);
896 iounmap(fchan->reg_base);
897 kfree(fchan);
1047} 898}
1048 899
1049static int __devinit of_fsl_dma_probe(struct of_device *dev, 900static int __devinit of_fsl_dma_probe(struct of_device *dev,
1050 const struct of_device_id *match) 901 const struct of_device_id *match)
1051{ 902{
1052 int err; 903 int err;
1053 unsigned int irq;
1054 struct fsl_dma_device *fdev; 904 struct fsl_dma_device *fdev;
905 struct device_node *child;
1055 906
1056 fdev = kzalloc(sizeof(struct fsl_dma_device), GFP_KERNEL); 907 fdev = kzalloc(sizeof(struct fsl_dma_device), GFP_KERNEL);
1057 if (!fdev) { 908 if (!fdev) {
@@ -1085,9 +936,9 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
1085 fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending; 936 fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending;
1086 fdev->common.dev = &dev->dev; 937 fdev->common.dev = &dev->dev;
1087 938
1088 irq = irq_of_parse_and_map(dev->node, 0); 939 fdev->irq = irq_of_parse_and_map(dev->node, 0);
1089 if (irq != NO_IRQ) { 940 if (fdev->irq != NO_IRQ) {
1090 err = request_irq(irq, &fsl_dma_do_interrupt, IRQF_SHARED, 941 err = request_irq(fdev->irq, &fsl_dma_do_interrupt, IRQF_SHARED,
1091 "fsldma-device", fdev); 942 "fsldma-device", fdev);
1092 if (err) { 943 if (err) {
1093 dev_err(&dev->dev, "DMA device request_irq error " 944 dev_err(&dev->dev, "DMA device request_irq error "
@@ -1097,7 +948,21 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
1097 } 948 }
1098 949
1099 dev_set_drvdata(&(dev->dev), fdev); 950 dev_set_drvdata(&(dev->dev), fdev);
1100 of_platform_bus_probe(dev->node, of_fsl_dma_chan_ids, &dev->dev); 951
952 /* We cannot use of_platform_bus_probe() because there is no
953 * of_platform_bus_remove. Instead, we manually instantiate every DMA
954 * channel object.
955 */
956 for_each_child_of_node(dev->node, child) {
957 if (of_device_is_compatible(child, "fsl,eloplus-dma-channel"))
958 fsl_dma_chan_probe(fdev, child,
959 FSL_DMA_IP_85XX | FSL_DMA_BIG_ENDIAN,
960 "fsl,eloplus-dma-channel");
961 if (of_device_is_compatible(child, "fsl,elo-dma-channel"))
962 fsl_dma_chan_probe(fdev, child,
963 FSL_DMA_IP_83XX | FSL_DMA_LITTLE_ENDIAN,
964 "fsl,elo-dma-channel");
965 }
1101 966
1102 dma_async_device_register(&fdev->common); 967 dma_async_device_register(&fdev->common);
1103 return 0; 968 return 0;
@@ -1109,6 +974,30 @@ err_no_reg:
1109 return err; 974 return err;
1110} 975}
1111 976
977static int of_fsl_dma_remove(struct of_device *of_dev)
978{
979 struct fsl_dma_device *fdev;
980 unsigned int i;
981
982 fdev = dev_get_drvdata(&of_dev->dev);
983
984 dma_async_device_unregister(&fdev->common);
985
986 for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++)
987 if (fdev->chan[i])
988 fsl_dma_chan_remove(fdev->chan[i]);
989
990 if (fdev->irq != NO_IRQ)
991 free_irq(fdev->irq, fdev);
992
993 iounmap(fdev->reg_base);
994
995 kfree(fdev);
996 dev_set_drvdata(&of_dev->dev, NULL);
997
998 return 0;
999}
1000
1112static struct of_device_id of_fsl_dma_ids[] = { 1001static struct of_device_id of_fsl_dma_ids[] = {
1113 { .compatible = "fsl,eloplus-dma", }, 1002 { .compatible = "fsl,eloplus-dma", },
1114 { .compatible = "fsl,elo-dma", }, 1003 { .compatible = "fsl,elo-dma", },
@@ -1116,15 +1005,32 @@ static struct of_device_id of_fsl_dma_ids[] = {
1116}; 1005};
1117 1006
1118static struct of_platform_driver of_fsl_dma_driver = { 1007static struct of_platform_driver of_fsl_dma_driver = {
1119 .name = "of-fsl-dma", 1008 .name = "fsl-elo-dma",
1120 .match_table = of_fsl_dma_ids, 1009 .match_table = of_fsl_dma_ids,
1121 .probe = of_fsl_dma_probe, 1010 .probe = of_fsl_dma_probe,
1011 .remove = of_fsl_dma_remove,
1122}; 1012};
1123 1013
1124static __init int of_fsl_dma_init(void) 1014static __init int of_fsl_dma_init(void)
1125{ 1015{
1126 return of_register_platform_driver(&of_fsl_dma_driver); 1016 int ret;
1017
1018 pr_info("Freescale Elo / Elo Plus DMA driver\n");
1019
1020 ret = of_register_platform_driver(&of_fsl_dma_driver);
1021 if (ret)
1022 pr_err("fsldma: failed to register platform driver\n");
1023
1024 return ret;
1025}
1026
1027static void __exit of_fsl_dma_exit(void)
1028{
1029 of_unregister_platform_driver(&of_fsl_dma_driver);
1127} 1030}
1128 1031
1129subsys_initcall(of_fsl_dma_chan_init);
1130subsys_initcall(of_fsl_dma_init); 1032subsys_initcall(of_fsl_dma_init);
1033module_exit(of_fsl_dma_exit);
1034
1035MODULE_DESCRIPTION("Freescale Elo / Elo Plus DMA driver");
1036MODULE_LICENSE("GPL");
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
index 6faf07ba0d0e..4f21a512d848 100644
--- a/drivers/dma/fsldma.h
+++ b/drivers/dma/fsldma.h
@@ -114,6 +114,7 @@ struct fsl_dma_device {
114 struct dma_device common; 114 struct dma_device common;
115 struct fsl_dma_chan *chan[FSL_DMA_MAX_CHANS_PER_DEVICE]; 115 struct fsl_dma_chan *chan[FSL_DMA_MAX_CHANS_PER_DEVICE];
116 u32 feature; /* The same as DMA channels */ 116 u32 feature; /* The same as DMA channels */
117 int irq; /* Channel IRQ */
117}; 118};
118 119
119/* Define macros for fsl_dma_chan->feature property */ 120/* Define macros for fsl_dma_chan->feature property */
diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c
index bc8c6e3470ca..1ef68b315657 100644
--- a/drivers/dma/ioat_dma.c
+++ b/drivers/dma/ioat_dma.c
@@ -971,11 +971,9 @@ static struct ioat_desc_sw *ioat_dma_get_next_descriptor(
971 switch (ioat_chan->device->version) { 971 switch (ioat_chan->device->version) {
972 case IOAT_VER_1_2: 972 case IOAT_VER_1_2:
973 return ioat1_dma_get_next_descriptor(ioat_chan); 973 return ioat1_dma_get_next_descriptor(ioat_chan);
974 break;
975 case IOAT_VER_2_0: 974 case IOAT_VER_2_0:
976 case IOAT_VER_3_0: 975 case IOAT_VER_3_0:
977 return ioat2_dma_get_next_descriptor(ioat_chan); 976 return ioat2_dma_get_next_descriptor(ioat_chan);
978 break;
979 } 977 }
980 return NULL; 978 return NULL;
981} 979}
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 22edc4273ef6..faa1cc66e9cf 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1143,7 +1143,7 @@ static void gpiolib_dbg_show(struct seq_file *s, struct gpio_chip *chip)
1143 1143
1144 if (!is_out) { 1144 if (!is_out) {
1145 int irq = gpio_to_irq(gpio); 1145 int irq = gpio_to_irq(gpio);
1146 struct irq_desc *desc = irq_desc + irq; 1146 struct irq_desc *desc = irq_to_desc(irq);
1147 1147
1148 /* This races with request_irq(), set_irq_type(), 1148 /* This races with request_irq(), set_irq_type(),
1149 * and set_irq_wake() ... but those are "rare". 1149 * and set_irq_wake() ... but those are "rare".
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 9097500de5f4..a8b33c2ec8d2 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -6,7 +6,7 @@
6# 6#
7menuconfig DRM 7menuconfig DRM
8 tristate "Direct Rendering Manager (XFree86 4.1.0 and higher DRI support)" 8 tristate "Direct Rendering Manager (XFree86 4.1.0 and higher DRI support)"
9 depends on (AGP || AGP=n) && PCI && !EMULATED_CMPXCHG && SHMEM 9 depends on (AGP || AGP=n) && PCI && !EMULATED_CMPXCHG && MMU
10 help 10 help
11 Kernel-level support for the Direct Rendering Infrastructure (DRI) 11 Kernel-level support for the Direct Rendering Infrastructure (DRI)
12 introduced in XFree86 4.0. If you say Y here, you need to select 12 introduced in XFree86 4.0. If you say Y here, you need to select
diff --git a/drivers/gpu/drm/drm_proc.c b/drivers/gpu/drm/drm_proc.c
index d490db4c0de0..ae73b7f7249a 100644
--- a/drivers/gpu/drm/drm_proc.c
+++ b/drivers/gpu/drm/drm_proc.c
@@ -522,12 +522,12 @@ static int drm_gem_one_name_info(int id, void *ptr, void *data)
522 struct drm_gem_object *obj = ptr; 522 struct drm_gem_object *obj = ptr;
523 struct drm_gem_name_info_data *nid = data; 523 struct drm_gem_name_info_data *nid = data;
524 524
525 DRM_INFO("name %d size %d\n", obj->name, obj->size); 525 DRM_INFO("name %d size %zd\n", obj->name, obj->size);
526 if (nid->eof) 526 if (nid->eof)
527 return 0; 527 return 0;
528 528
529 nid->len += sprintf(&nid->buf[nid->len], 529 nid->len += sprintf(&nid->buf[nid->len],
530 "%6d%9d%8d%9d\n", 530 "%6d %8zd %7d %8d\n",
531 obj->name, obj->size, 531 obj->name, obj->size,
532 atomic_read(&obj->handlecount.refcount), 532 atomic_read(&obj->handlecount.refcount),
533 atomic_read(&obj->refcount.refcount)); 533 atomic_read(&obj->refcount.refcount));
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9ac73dd1b422..dc2e6fdb6ca3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -171,6 +171,37 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
171 return 0; 171 return 0;
172} 172}
173 173
174/*
175 * Try to write quickly with an atomic kmap. Return true on success.
176 *
177 * If this fails (which includes a partial write), we'll redo the whole
178 * thing with the slow version.
179 *
180 * This is a workaround for the low performance of iounmap (approximate
181 * 10% cpu cost on normal 3D workloads). kmap_atomic on HIGHMEM kernels
182 * happens to let us map card memory without taking IPIs. When the vmap
183 * rework lands we should be able to dump this hack.
184 */
185static inline int fast_user_write(unsigned long pfn, char __user *user_data,
186 int l, int o)
187{
188#ifdef CONFIG_HIGHMEM
189 unsigned long unwritten;
190 char *vaddr_atomic;
191
192 vaddr_atomic = kmap_atomic_pfn(pfn, KM_USER0);
193#if WATCH_PWRITE
194 DRM_INFO("pwrite i %d o %d l %d pfn %ld vaddr %p\n",
195 i, o, l, pfn, vaddr_atomic);
196#endif
197 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + o, user_data, l);
198 kunmap_atomic(vaddr_atomic, KM_USER0);
199 return !unwritten;
200#else
201 return 0;
202#endif
203}
204
174static int 205static int
175i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 206i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
176 struct drm_i915_gem_pwrite *args, 207 struct drm_i915_gem_pwrite *args,
@@ -180,12 +211,7 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
180 ssize_t remain; 211 ssize_t remain;
181 loff_t offset; 212 loff_t offset;
182 char __user *user_data; 213 char __user *user_data;
183 char __iomem *vaddr;
184 char *vaddr_atomic;
185 int i, o, l;
186 int ret = 0; 214 int ret = 0;
187 unsigned long pfn;
188 unsigned long unwritten;
189 215
190 user_data = (char __user *) (uintptr_t) args->data_ptr; 216 user_data = (char __user *) (uintptr_t) args->data_ptr;
191 remain = args->size; 217 remain = args->size;
@@ -209,6 +235,9 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
209 obj_priv->dirty = 1; 235 obj_priv->dirty = 1;
210 236
211 while (remain > 0) { 237 while (remain > 0) {
238 unsigned long pfn;
239 int i, o, l;
240
212 /* Operation in this page 241 /* Operation in this page
213 * 242 *
214 * i = page number 243 * i = page number
@@ -223,25 +252,10 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
223 252
224 pfn = (dev->agp->base >> PAGE_SHIFT) + i; 253 pfn = (dev->agp->base >> PAGE_SHIFT) + i;
225 254
226#ifdef CONFIG_HIGHMEM 255 if (!fast_user_write(pfn, user_data, l, o)) {
227 /* This is a workaround for the low performance of iounmap 256 unsigned long unwritten;
228 * (approximate 10% cpu cost on normal 3D workloads). 257 char __iomem *vaddr;
229 * kmap_atomic on HIGHMEM kernels happens to let us map card
230 * memory without taking IPIs. When the vmap rework lands
231 * we should be able to dump this hack.
232 */
233 vaddr_atomic = kmap_atomic_pfn(pfn, KM_USER0);
234#if WATCH_PWRITE
235 DRM_INFO("pwrite i %d o %d l %d pfn %ld vaddr %p\n",
236 i, o, l, pfn, vaddr_atomic);
237#endif
238 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + o,
239 user_data, l);
240 kunmap_atomic(vaddr_atomic, KM_USER0);
241 258
242 if (unwritten)
243#endif /* CONFIG_HIGHMEM */
244 {
245 vaddr = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE); 259 vaddr = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE);
246#if WATCH_PWRITE 260#if WATCH_PWRITE
247 DRM_INFO("pwrite slow i %d o %d l %d " 261 DRM_INFO("pwrite slow i %d o %d l %d "
diff --git a/drivers/i2c/busses/i2c-amd756.c b/drivers/i2c/busses/i2c-amd756.c
index 1ea39254dac6..424dad6f18d8 100644
--- a/drivers/i2c/busses/i2c-amd756.c
+++ b/drivers/i2c/busses/i2c-amd756.c
@@ -332,10 +332,6 @@ static int __devinit amd756_probe(struct pci_dev *pdev,
332 int error; 332 int error;
333 u8 temp; 333 u8 temp;
334 334
335 /* driver_data might come from user-space, so check it */
336 if (id->driver_data >= ARRAY_SIZE(chipname))
337 return -EINVAL;
338
339 if (amd756_ioport) { 335 if (amd756_ioport) {
340 dev_err(&pdev->dev, "Only one device supported " 336 dev_err(&pdev->dev, "Only one device supported "
341 "(you have a strange motherboard, btw)\n"); 337 "(you have a strange motherboard, btw)\n");
@@ -412,7 +408,6 @@ static struct pci_driver amd756_driver = {
412 .id_table = amd756_ids, 408 .id_table = amd756_ids,
413 .probe = amd756_probe, 409 .probe = amd756_probe,
414 .remove = __devexit_p(amd756_remove), 410 .remove = __devexit_p(amd756_remove),
415 .dynids.use_driver_data = 1,
416}; 411};
417 412
418static int __init amd756_init(void) 413static int __init amd756_init(void)
diff --git a/drivers/i2c/busses/i2c-viapro.c b/drivers/i2c/busses/i2c-viapro.c
index 73dc52e114eb..9f194d9efd91 100644
--- a/drivers/i2c/busses/i2c-viapro.c
+++ b/drivers/i2c/busses/i2c-viapro.c
@@ -332,10 +332,6 @@ static int __devinit vt596_probe(struct pci_dev *pdev,
332 unsigned char temp; 332 unsigned char temp;
333 int error = -ENODEV; 333 int error = -ENODEV;
334 334
335 /* driver_data might come from user-space, so check it */
336 if (id->driver_data & 1 || id->driver_data > 0xff)
337 return -EINVAL;
338
339 /* Determine the address of the SMBus areas */ 335 /* Determine the address of the SMBus areas */
340 if (force_addr) { 336 if (force_addr) {
341 vt596_smba = force_addr & 0xfff0; 337 vt596_smba = force_addr & 0xfff0;
@@ -483,7 +479,6 @@ static struct pci_driver vt596_driver = {
483 .name = "vt596_smbus", 479 .name = "vt596_smbus",
484 .id_table = vt596_ids, 480 .id_table = vt596_ids,
485 .probe = vt596_probe, 481 .probe = vt596_probe,
486 .dynids.use_driver_data = 1,
487}; 482};
488 483
489static int __init i2c_vt596_init(void) 484static int __init i2c_vt596_init(void)
diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index 74a369a6116f..a820ca6fc327 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -84,21 +84,40 @@ config BLK_DEV_IDE_SATA
84 84
85 If unsure, say N. 85 If unsure, say N.
86 86
87config BLK_DEV_IDEDISK 87config IDE_GD
88 tristate "Include IDE/ATA-2 DISK support" 88 tristate "generic ATA/ATAPI disk support"
89 ---help--- 89 default y
90 This will include enhanced support for MFM/RLL/IDE hard disks. If 90 help
91 you have a MFM/RLL/IDE disk, and there is no special reason to use 91 Support for ATA/ATAPI disks (including ATAPI floppy drives).
92 the old hard disk driver instead, say Y. If you have an SCSI-only
93 system, you can say N here.
94 92
95 To compile this driver as a module, choose M here: the 93 To compile this driver as a module, choose M here.
96 module will be called ide-disk. 94 The module will be called ide-gd_mod.
97 Do not compile this driver as a module if your root file system 95
98 (the one containing the directory /) is located on the IDE disk. 96 If unsure, say Y.
97
98config IDE_GD_ATA
99 bool "ATA disk support"
100 depends on IDE_GD
101 default y
102 help
103 This will include support for ATA hard disks.
99 104
100 If unsure, say Y. 105 If unsure, say Y.
101 106
107config IDE_GD_ATAPI
108 bool "ATAPI floppy support"
109 depends on IDE_GD
110 select IDE_ATAPI
111 help
112 This will include support for ATAPI floppy drives
113 (i.e. Iomega ZIP or MKE LS-120).
114
115 For information about jumper settings and the question
116 of when a ZIP drive uses a partition table, see
117 <http://www.win.tue.nl/~aeb/linux/zip/zip-1.html>.
118
119 If unsure, say N.
120
102config BLK_DEV_IDECS 121config BLK_DEV_IDECS
103 tristate "PCMCIA IDE support" 122 tristate "PCMCIA IDE support"
104 depends on PCMCIA 123 depends on PCMCIA
@@ -163,29 +182,6 @@ config BLK_DEV_IDETAPE
163 To compile this driver as a module, choose M here: the 182 To compile this driver as a module, choose M here: the
164 module will be called ide-tape. 183 module will be called ide-tape.
165 184
166config BLK_DEV_IDEFLOPPY
167 tristate "Include IDE/ATAPI FLOPPY support"
168 select IDE_ATAPI
169 ---help---
170 If you have an IDE floppy drive which uses the ATAPI protocol,
171 answer Y. ATAPI is a newer protocol used by IDE CD-ROM/tape/floppy
172 drives, similar to the SCSI protocol.
173
174 The LS-120 and the IDE/ATAPI Iomega ZIP drive are also supported by
175 this driver. For information about jumper settings and the question
176 of when a ZIP drive uses a partition table, see
177 <http://www.win.tue.nl/~aeb/linux/zip/zip-1.html>.
178 (ATAPI PD-CD/CDR drives are not supported by this driver; support
179 for PD-CD/CDR drives is available if you answer Y to
180 "SCSI emulation support", below).
181
182 If you say Y here, the FLOPPY drive will be identified along with
183 other IDE devices, as "hdb" or "hdc", or something similar (check
184 the boot messages with dmesg).
185
186 To compile this driver as a module, choose M here: the
187 module will be called ide-floppy.
188
189config BLK_DEV_IDESCSI 185config BLK_DEV_IDESCSI
190 tristate "SCSI emulation support (DEPRECATED)" 186 tristate "SCSI emulation support (DEPRECATED)"
191 depends on SCSI 187 depends on SCSI
@@ -332,7 +328,7 @@ config IDEPCI_PCIBUS_ORDER
332# TODO: split it on per host driver config options (or module parameters) 328# TODO: split it on per host driver config options (or module parameters)
333config BLK_DEV_OFFBOARD 329config BLK_DEV_OFFBOARD
334 bool "Boot off-board chipsets first support (DEPRECATED)" 330 bool "Boot off-board chipsets first support (DEPRECATED)"
335 depends on BLK_DEV_IDEPCI && (BLK_DEV_AEC62XX || BLK_DEV_GENERIC || BLK_DEV_HPT34X || BLK_DEV_HPT366 || BLK_DEV_PDC202XX_NEW || BLK_DEV_PDC202XX_OLD || BLK_DEV_TC86C001) 331 depends on BLK_DEV_IDEPCI && (BLK_DEV_AEC62XX || BLK_DEV_GENERIC || BLK_DEV_HPT366 || BLK_DEV_PDC202XX_NEW || BLK_DEV_PDC202XX_OLD || BLK_DEV_TC86C001)
336 help 332 help
337 Normally, IDE controllers built into the motherboard (on-board 333 Normally, IDE controllers built into the motherboard (on-board
338 controllers) are assigned to ide0 and ide1 while those on add-in PCI 334 controllers) are assigned to ide0 and ide1 while those on add-in PCI
@@ -482,28 +478,6 @@ config BLK_DEV_CS5535
482 478
483 It is safe to say Y to this question. 479 It is safe to say Y to this question.
484 480
485config BLK_DEV_HPT34X
486 tristate "HPT34X chipset support"
487 depends on BROKEN
488 select BLK_DEV_IDEDMA_PCI
489 help
490 This driver adds up to 4 more EIDE devices sharing a single
491 interrupt. The HPT343 chipset in its current form is a non-bootable
492 controller; the HPT345/HPT363 chipset is a bootable (needs BIOS FIX)
493 PCI UDMA controllers. This driver requires dynamic tuning of the
494 chipset during the ide-probe at boot time. It is reported to support
495 DVD II drives, by the manufacturer.
496
497config HPT34X_AUTODMA
498 bool "HPT34X AUTODMA support (EXPERIMENTAL)"
499 depends on BLK_DEV_HPT34X && EXPERIMENTAL
500 help
501 This is a dangerous thing to attempt currently! Please read the
502 comments at the top of <file:drivers/ide/pci/hpt34x.c>. If you say Y
503 here, then say Y to "Use DMA by default when available" as well.
504
505 If unsure, say N.
506
507config BLK_DEV_HPT366 481config BLK_DEV_HPT366
508 tristate "HPT36X/37X chipset support" 482 tristate "HPT36X/37X chipset support"
509 select BLK_DEV_IDEDMA_PCI 483 select BLK_DEV_IDEDMA_PCI
diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile
index ceaf779054ea..093d3248ca89 100644
--- a/drivers/ide/Makefile
+++ b/drivers/ide/Makefile
@@ -37,18 +37,25 @@ obj-$(CONFIG_IDE_H8300) += h8300/
37obj-$(CONFIG_IDE_GENERIC) += ide-generic.o 37obj-$(CONFIG_IDE_GENERIC) += ide-generic.o
38obj-$(CONFIG_BLK_DEV_IDEPNP) += ide-pnp.o 38obj-$(CONFIG_BLK_DEV_IDEPNP) += ide-pnp.o
39 39
40ide-disk_mod-y += ide-disk.o ide-disk_ioctl.o 40ide-gd_mod-y += ide-gd.o
41ide-cd_mod-y += ide-cd.o ide-cd_ioctl.o ide-cd_verbose.o 41ide-cd_mod-y += ide-cd.o ide-cd_ioctl.o ide-cd_verbose.o
42ide-floppy_mod-y += ide-floppy.o ide-floppy_ioctl.o
43 42
43ifeq ($(CONFIG_IDE_GD_ATA), y)
44 ide-gd_mod-y += ide-disk.o ide-disk_ioctl.o
44ifeq ($(CONFIG_IDE_PROC_FS), y) 45ifeq ($(CONFIG_IDE_PROC_FS), y)
45 ide-disk_mod-y += ide-disk_proc.o 46 ide-gd_mod-y += ide-disk_proc.o
46 ide-floppy_mod-y += ide-floppy_proc.o 47endif
48endif
49
50ifeq ($(CONFIG_IDE_GD_ATAPI), y)
51 ide-gd_mod-y += ide-floppy.o ide-floppy_ioctl.o
52ifeq ($(CONFIG_IDE_PROC_FS), y)
53 ide-gd_mod-y += ide-floppy_proc.o
54endif
47endif 55endif
48 56
49obj-$(CONFIG_BLK_DEV_IDEDISK) += ide-disk_mod.o 57obj-$(CONFIG_IDE_GD) += ide-gd_mod.o
50obj-$(CONFIG_BLK_DEV_IDECD) += ide-cd_mod.o 58obj-$(CONFIG_BLK_DEV_IDECD) += ide-cd_mod.o
51obj-$(CONFIG_BLK_DEV_IDEFLOPPY) += ide-floppy_mod.o
52obj-$(CONFIG_BLK_DEV_IDETAPE) += ide-tape.o 59obj-$(CONFIG_BLK_DEV_IDETAPE) += ide-tape.o
53 60
54ifeq ($(CONFIG_BLK_DEV_IDECS), y) 61ifeq ($(CONFIG_BLK_DEV_IDECS), y)
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 2e305714c209..4e58b9e7a58a 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -191,7 +191,7 @@ int ide_set_media_lock(ide_drive_t *drive, struct gendisk *disk, int on)
191{ 191{
192 struct ide_atapi_pc pc; 192 struct ide_atapi_pc pc;
193 193
194 if (drive->atapi_flags & IDE_AFLAG_NO_DOORLOCK) 194 if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) == 0)
195 return 0; 195 return 0;
196 196
197 ide_init_pc(&pc); 197 ide_init_pc(&pc);
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 3308b1cd3a33..13265a8827da 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -99,7 +99,7 @@ static void ide_cd_put(struct cdrom_info *cd)
99/* Mark that we've seen a media change and invalidate our internal buffers. */ 99/* Mark that we've seen a media change and invalidate our internal buffers. */
100static void cdrom_saw_media_change(ide_drive_t *drive) 100static void cdrom_saw_media_change(ide_drive_t *drive)
101{ 101{
102 drive->atapi_flags |= IDE_AFLAG_MEDIA_CHANGED; 102 drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
103 drive->atapi_flags &= ~IDE_AFLAG_TOC_VALID; 103 drive->atapi_flags &= ~IDE_AFLAG_TOC_VALID;
104} 104}
105 105
@@ -340,8 +340,8 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret)
340 } 340 }
341 341
342 ide_debug_log(IDE_DBG_RQ, "%s: stat: 0x%x, good_stat: 0x%x, " 342 ide_debug_log(IDE_DBG_RQ, "%s: stat: 0x%x, good_stat: 0x%x, "
343 "rq->cmd_type: 0x%x, err: 0x%x\n", __func__, stat, 343 "rq->cmd[0]: 0x%x, rq->cmd_type: 0x%x, err: 0x%x\n",
344 good_stat, rq->cmd_type, err); 344 __func__, stat, good_stat, rq->cmd[0], rq->cmd_type, err);
345 345
346 if (blk_sense_request(rq)) { 346 if (blk_sense_request(rq)) {
347 /* 347 /*
@@ -843,13 +843,10 @@ static void ide_cd_restore_request(ide_drive_t *drive, struct request *rq)
843 rq->q->prep_rq_fn(rq->q, rq); 843 rq->q->prep_rq_fn(rq->q, rq);
844} 844}
845 845
846/*
847 * All other packet commands.
848 */
849static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct request *rq) 846static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct request *rq)
850{ 847{
851 848 ide_debug_log(IDE_DBG_FUNC, "Call %s, rq->cmd[0]: 0x%x\n",
852 ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__); 849 __func__, rq->cmd[0]);
853 850
854 /* 851 /*
855 * Some of the trailing request sense fields are optional, 852 * Some of the trailing request sense fields are optional,
@@ -876,7 +873,7 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
876 if (!sense) 873 if (!sense)
877 sense = &local_sense; 874 sense = &local_sense;
878 875
879 ide_debug_log(IDE_DBG_PC, "Call %s, rq->cmd[0]: 0x%x, write: 0x%x, " 876 ide_debug_log(IDE_DBG_PC, "Call %s, cmd[0]: 0x%x, write: 0x%x, "
880 "timeout: %d, cmd_flags: 0x%x\n", __func__, cmd[0], write, 877 "timeout: %d, cmd_flags: 0x%x\n", __func__, cmd[0], write,
881 timeout, cmd_flags); 878 timeout, cmd_flags);
882 879
@@ -1177,8 +1174,9 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq)
1177 unsigned short sectors_per_frame = 1174 unsigned short sectors_per_frame =
1178 queue_hardsect_size(drive->queue) >> SECTOR_BITS; 1175 queue_hardsect_size(drive->queue) >> SECTOR_BITS;
1179 1176
1180 ide_debug_log(IDE_DBG_RQ, "Call %s, write: 0x%x, secs_per_frame: %u\n", 1177 ide_debug_log(IDE_DBG_RQ, "Call %s, rq->cmd[0]: 0x%x, write: 0x%x, "
1181 __func__, write, sectors_per_frame); 1178 "secs_per_frame: %u\n",
1179 __func__, rq->cmd[0], write, sectors_per_frame);
1182 1180
1183 if (write) { 1181 if (write) {
1184 /* disk has become write protected */ 1182 /* disk has become write protected */
@@ -1221,7 +1219,8 @@ static ide_startstop_t cdrom_do_newpc_cont(ide_drive_t *drive)
1221static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) 1219static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
1222{ 1220{
1223 1221
1224 ide_debug_log(IDE_DBG_PC, "Call %s, rq->cmd_type: 0x%x\n", __func__, 1222 ide_debug_log(IDE_DBG_PC, "Call %s, rq->cmd[0]: 0x%x, "
1223 "rq->cmd_type: 0x%x\n", __func__, rq->cmd[0],
1225 rq->cmd_type); 1224 rq->cmd_type);
1226 1225
1227 if (blk_pc_request(rq)) 1226 if (blk_pc_request(rq))
@@ -1257,9 +1256,6 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
1257 } 1256 }
1258} 1257}
1259 1258
1260/*
1261 * cdrom driver request routine.
1262 */
1263static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, 1259static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
1264 sector_t block) 1260 sector_t block)
1265{ 1261{
@@ -1267,8 +1263,10 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
1267 ide_handler_t *fn; 1263 ide_handler_t *fn;
1268 int xferlen; 1264 int xferlen;
1269 1265
1270 ide_debug_log(IDE_DBG_RQ, "Call %s, rq->cmd_type: 0x%x, block: %llu\n", 1266 ide_debug_log(IDE_DBG_RQ, "Call %s, rq->cmd[0]: 0x%x, "
1271 __func__, rq->cmd_type, (unsigned long long)block); 1267 "rq->cmd_type: 0x%x, block: %llu\n",
1268 __func__, rq->cmd[0], rq->cmd_type,
1269 (unsigned long long)block);
1272 1270
1273 if (blk_fs_request(rq)) { 1271 if (blk_fs_request(rq)) {
1274 if (drive->atapi_flags & IDE_AFLAG_SEEKING) { 1272 if (drive->atapi_flags & IDE_AFLAG_SEEKING) {
@@ -1412,6 +1410,10 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
1412 1410
1413 *capacity = 1 + be32_to_cpu(capbuf.lba); 1411 *capacity = 1 + be32_to_cpu(capbuf.lba);
1414 *sectors_per_frame = blocklen >> SECTOR_BITS; 1412 *sectors_per_frame = blocklen >> SECTOR_BITS;
1413
1414 ide_debug_log(IDE_DBG_PROBE, "%s: cap: %lu, sectors_per_frame: %lu\n",
1415 __func__, *capacity, *sectors_per_frame);
1416
1415 return 0; 1417 return 0;
1416} 1418}
1417 1419
@@ -1643,6 +1645,9 @@ void ide_cdrom_update_speed(ide_drive_t *drive, u8 *buf)
1643 maxspeed = be16_to_cpup((__be16 *)&buf[8 + 8]); 1645 maxspeed = be16_to_cpup((__be16 *)&buf[8 + 8]);
1644 } 1646 }
1645 1647
1648 ide_debug_log(IDE_DBG_PROBE, "%s: curspeed: %u, maxspeed: %u\n",
1649 __func__, curspeed, maxspeed);
1650
1646 cd->current_speed = (curspeed + (176/2)) / 176; 1651 cd->current_speed = (curspeed + (176/2)) / 176;
1647 cd->max_speed = (maxspeed + (176/2)) / 176; 1652 cd->max_speed = (maxspeed + (176/2)) / 176;
1648} 1653}
@@ -1732,7 +1737,7 @@ static int ide_cdrom_probe_capabilities(ide_drive_t *drive)
1732 return 0; 1737 return 0;
1733 1738
1734 if ((buf[8 + 6] & 0x01) == 0) 1739 if ((buf[8 + 6] & 0x01) == 0)
1735 drive->atapi_flags |= IDE_AFLAG_NO_DOORLOCK; 1740 drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
1736 if (buf[8 + 6] & 0x08) 1741 if (buf[8 + 6] & 0x08)
1737 drive->atapi_flags &= ~IDE_AFLAG_NO_EJECT; 1742 drive->atapi_flags &= ~IDE_AFLAG_NO_EJECT;
1738 if (buf[8 + 3] & 0x01) 1743 if (buf[8 + 3] & 0x01)
@@ -1777,7 +1782,7 @@ static int ide_cdrom_probe_capabilities(ide_drive_t *drive)
1777 if ((cdi->mask & CDC_DVD_R) == 0 || (cdi->mask & CDC_DVD_RAM) == 0) 1782 if ((cdi->mask & CDC_DVD_R) == 0 || (cdi->mask & CDC_DVD_RAM) == 0)
1778 printk(KERN_CONT " DVD%s%s", 1783 printk(KERN_CONT " DVD%s%s",
1779 (cdi->mask & CDC_DVD_R) ? "" : "-R", 1784 (cdi->mask & CDC_DVD_R) ? "" : "-R",
1780 (cdi->mask & CDC_DVD_RAM) ? "" : "-RAM"); 1785 (cdi->mask & CDC_DVD_RAM) ? "" : "/RAM");
1781 1786
1782 if ((cdi->mask & CDC_CD_R) == 0 || (cdi->mask & CDC_CD_RW) == 0) 1787 if ((cdi->mask & CDC_CD_R) == 0 || (cdi->mask & CDC_CD_RW) == 0)
1783 printk(KERN_CONT " CD%s%s", 1788 printk(KERN_CONT " CD%s%s",
@@ -1908,6 +1913,16 @@ static const struct ide_proc_devset idecd_settings[] = {
1908 IDE_PROC_DEVSET(dsc_overlap, 0, 1), 1913 IDE_PROC_DEVSET(dsc_overlap, 0, 1),
1909 { 0 }, 1914 { 0 },
1910}; 1915};
1916
1917static ide_proc_entry_t *ide_cd_proc_entries(ide_drive_t *drive)
1918{
1919 return idecd_proc;
1920}
1921
1922static const struct ide_proc_devset *ide_cd_proc_devsets(ide_drive_t *drive)
1923{
1924 return idecd_settings;
1925}
1911#endif 1926#endif
1912 1927
1913static const struct cd_list_entry ide_cd_quirks_list[] = { 1928static const struct cd_list_entry ide_cd_quirks_list[] = {
@@ -1986,8 +2001,8 @@ static int ide_cdrom_setup(ide_drive_t *drive)
1986 if (!drive->queue->unplug_delay) 2001 if (!drive->queue->unplug_delay)
1987 drive->queue->unplug_delay = 1; 2002 drive->queue->unplug_delay = 1;
1988 2003
1989 drive->atapi_flags = IDE_AFLAG_MEDIA_CHANGED | IDE_AFLAG_NO_EJECT | 2004 drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
1990 ide_cd_flags(id); 2005 drive->atapi_flags = IDE_AFLAG_NO_EJECT | ide_cd_flags(id);
1991 2006
1992 if ((drive->atapi_flags & IDE_AFLAG_VERTOS_300_SSD) && 2007 if ((drive->atapi_flags & IDE_AFLAG_VERTOS_300_SSD) &&
1993 fw_rev[4] == '1' && fw_rev[6] <= '2') 2008 fw_rev[4] == '1' && fw_rev[6] <= '2')
@@ -2069,8 +2084,8 @@ static ide_driver_t ide_cdrom_driver = {
2069 .end_request = ide_end_request, 2084 .end_request = ide_end_request,
2070 .error = __ide_error, 2085 .error = __ide_error,
2071#ifdef CONFIG_IDE_PROC_FS 2086#ifdef CONFIG_IDE_PROC_FS
2072 .proc = idecd_proc, 2087 .proc_entries = ide_cd_proc_entries,
2073 .settings = idecd_settings, 2088 .proc_devsets = ide_cd_proc_devsets,
2074#endif 2089#endif
2075}; 2090};
2076 2091
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 74231b41f611..df3df0041eb6 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -86,8 +86,8 @@ int ide_cdrom_check_media_change_real(struct cdrom_device_info *cdi,
86 86
87 if (slot_nr == CDSL_CURRENT) { 87 if (slot_nr == CDSL_CURRENT) {
88 (void) cdrom_check_status(drive, NULL); 88 (void) cdrom_check_status(drive, NULL);
89 retval = (drive->atapi_flags & IDE_AFLAG_MEDIA_CHANGED) ? 1 : 0; 89 retval = (drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED) ? 1 : 0;
90 drive->atapi_flags &= ~IDE_AFLAG_MEDIA_CHANGED; 90 drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED;
91 return retval; 91 return retval;
92 } else { 92 } else {
93 return -EINVAL; 93 return -EINVAL;
@@ -136,7 +136,7 @@ int ide_cd_lockdoor(ide_drive_t *drive, int lockflag,
136 sense = &my_sense; 136 sense = &my_sense;
137 137
138 /* If the drive cannot lock the door, just pretend. */ 138 /* If the drive cannot lock the door, just pretend. */
139 if (drive->atapi_flags & IDE_AFLAG_NO_DOORLOCK) { 139 if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) == 0) {
140 stat = 0; 140 stat = 0;
141 } else { 141 } else {
142 unsigned char cmd[BLK_MAX_CDB]; 142 unsigned char cmd[BLK_MAX_CDB];
@@ -157,7 +157,7 @@ int ide_cd_lockdoor(ide_drive_t *drive, int lockflag,
157 (sense->asc == 0x24 || sense->asc == 0x20)) { 157 (sense->asc == 0x24 || sense->asc == 0x20)) {
158 printk(KERN_ERR "%s: door locking not supported\n", 158 printk(KERN_ERR "%s: door locking not supported\n",
159 drive->name); 159 drive->name);
160 drive->atapi_flags |= IDE_AFLAG_NO_DOORLOCK; 160 drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
161 stat = 0; 161 stat = 0;
162 } 162 }
163 163
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 3853bde8eedc..223750c1b5a6 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -14,9 +14,6 @@
14 * This is the IDE/ATA disk driver, as evolved from hd.c and ide.c. 14 * This is the IDE/ATA disk driver, as evolved from hd.c and ide.c.
15 */ 15 */
16 16
17#define IDEDISK_VERSION "1.18"
18
19#include <linux/module.h>
20#include <linux/types.h> 17#include <linux/types.h>
21#include <linux/string.h> 18#include <linux/string.h>
22#include <linux/kernel.h> 19#include <linux/kernel.h>
@@ -39,46 +36,8 @@
39#include <asm/io.h> 36#include <asm/io.h>
40#include <asm/div64.h> 37#include <asm/div64.h>
41 38
42#if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT)
43#define IDE_DISK_MINORS (1 << PARTN_BITS)
44#else
45#define IDE_DISK_MINORS 0
46#endif
47
48#include "ide-disk.h" 39#include "ide-disk.h"
49 40
50static DEFINE_MUTEX(idedisk_ref_mutex);
51
52#define to_ide_disk(obj) container_of(obj, struct ide_disk_obj, kref)
53
54static void ide_disk_release(struct kref *);
55
56static struct ide_disk_obj *ide_disk_get(struct gendisk *disk)
57{
58 struct ide_disk_obj *idkp = NULL;
59
60 mutex_lock(&idedisk_ref_mutex);
61 idkp = ide_disk_g(disk);
62 if (idkp) {
63 if (ide_device_get(idkp->drive))
64 idkp = NULL;
65 else
66 kref_get(&idkp->kref);
67 }
68 mutex_unlock(&idedisk_ref_mutex);
69 return idkp;
70}
71
72static void ide_disk_put(struct ide_disk_obj *idkp)
73{
74 ide_drive_t *drive = idkp->drive;
75
76 mutex_lock(&idedisk_ref_mutex);
77 kref_put(&idkp->kref, ide_disk_release);
78 ide_device_put(drive);
79 mutex_unlock(&idedisk_ref_mutex);
80}
81
82static const u8 ide_rw_cmds[] = { 41static const u8 ide_rw_cmds[] = {
83 ATA_CMD_READ_MULTI, 42 ATA_CMD_READ_MULTI,
84 ATA_CMD_WRITE_MULTI, 43 ATA_CMD_WRITE_MULTI,
@@ -374,7 +333,7 @@ static void idedisk_check_hpa(ide_drive_t *drive)
374 } 333 }
375} 334}
376 335
377static void init_idedisk_capacity(ide_drive_t *drive) 336static int ide_disk_get_capacity(ide_drive_t *drive)
378{ 337{
379 u16 *id = drive->id; 338 u16 *id = drive->id;
380 int lba; 339 int lba;
@@ -403,11 +362,28 @@ static void init_idedisk_capacity(ide_drive_t *drive)
403 if (ata_id_hpa_enabled(id)) 362 if (ata_id_hpa_enabled(id))
404 idedisk_check_hpa(drive); 363 idedisk_check_hpa(drive);
405 } 364 }
406}
407 365
408sector_t ide_disk_capacity(ide_drive_t *drive) 366 /* limit drive capacity to 137GB if LBA48 cannot be used */
409{ 367 if ((drive->dev_flags & IDE_DFLAG_LBA48) == 0 &&
410 return drive->capacity64; 368 drive->capacity64 > 1ULL << 28) {
369 printk(KERN_WARNING "%s: cannot use LBA48 - full capacity "
370 "%llu sectors (%llu MB)\n",
371 drive->name, (unsigned long long)drive->capacity64,
372 sectors_to_MB(drive->capacity64));
373 drive->capacity64 = 1ULL << 28;
374 }
375
376 if ((drive->hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) &&
377 (drive->dev_flags & IDE_DFLAG_LBA48)) {
378 if (drive->capacity64 > 1ULL << 28) {
379 printk(KERN_INFO "%s: cannot use LBA48 DMA - PIO mode"
380 " will be used for accessing sectors "
381 "> %u\n", drive->name, 1 << 28);
382 } else
383 drive->dev_flags &= ~IDE_DFLAG_LBA48;
384 }
385
386 return 0;
411} 387}
412 388
413static void idedisk_prepare_flush(struct request_queue *q, struct request *rq) 389static void idedisk_prepare_flush(struct request_queue *q, struct request *rq)
@@ -508,7 +484,7 @@ static void update_ordered(ide_drive_t *drive)
508 * time we have trimmed the drive capacity if LBA48 is 484 * time we have trimmed the drive capacity if LBA48 is
509 * not available so we don't need to recheck that. 485 * not available so we don't need to recheck that.
510 */ 486 */
511 capacity = ide_disk_capacity(drive); 487 capacity = ide_gd_capacity(drive);
512 barrier = ata_id_flush_enabled(id) && 488 barrier = ata_id_flush_enabled(id) &&
513 (drive->dev_flags & IDE_DFLAG_NOFLUSH) == 0 && 489 (drive->dev_flags & IDE_DFLAG_NOFLUSH) == 0 &&
514 ((drive->dev_flags & IDE_DFLAG_LBA48) == 0 || 490 ((drive->dev_flags & IDE_DFLAG_LBA48) == 0 ||
@@ -616,7 +592,12 @@ ide_ext_devset_rw(wcache, wcache);
616 592
617ide_ext_devset_rw_sync(nowerr, nowerr); 593ide_ext_devset_rw_sync(nowerr, nowerr);
618 594
619static void idedisk_setup(ide_drive_t *drive) 595static int ide_disk_check(ide_drive_t *drive, const char *s)
596{
597 return 1;
598}
599
600static void ide_disk_setup(ide_drive_t *drive)
620{ 601{
621 struct ide_disk_obj *idkp = drive->driver_data; 602 struct ide_disk_obj *idkp = drive->driver_data;
622 ide_hwif_t *hwif = drive->hwif; 603 ide_hwif_t *hwif = drive->hwif;
@@ -652,33 +633,13 @@ static void idedisk_setup(ide_drive_t *drive)
652 drive->queue->max_sectors / 2); 633 drive->queue->max_sectors / 2);
653 634
654 /* calculate drive capacity, and select LBA if possible */ 635 /* calculate drive capacity, and select LBA if possible */
655 init_idedisk_capacity(drive); 636 ide_disk_get_capacity(drive);
656
657 /* limit drive capacity to 137GB if LBA48 cannot be used */
658 if ((drive->dev_flags & IDE_DFLAG_LBA48) == 0 &&
659 drive->capacity64 > 1ULL << 28) {
660 printk(KERN_WARNING "%s: cannot use LBA48 - full capacity "
661 "%llu sectors (%llu MB)\n",
662 drive->name, (unsigned long long)drive->capacity64,
663 sectors_to_MB(drive->capacity64));
664 drive->capacity64 = 1ULL << 28;
665 }
666
667 if ((hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) &&
668 (drive->dev_flags & IDE_DFLAG_LBA48)) {
669 if (drive->capacity64 > 1ULL << 28) {
670 printk(KERN_INFO "%s: cannot use LBA48 DMA - PIO mode"
671 " will be used for accessing sectors "
672 "> %u\n", drive->name, 1 << 28);
673 } else
674 drive->dev_flags &= ~IDE_DFLAG_LBA48;
675 }
676 637
677 /* 638 /*
678 * if possible, give fdisk access to more of the drive, 639 * if possible, give fdisk access to more of the drive,
679 * by correcting bios_cyls: 640 * by correcting bios_cyls:
680 */ 641 */
681 capacity = ide_disk_capacity(drive); 642 capacity = ide_gd_capacity(drive);
682 643
683 if ((drive->dev_flags & IDE_DFLAG_FORCED_GEOM) == 0) { 644 if ((drive->dev_flags & IDE_DFLAG_FORCED_GEOM) == 0) {
684 if (ata_id_lba48_enabled(drive->id)) { 645 if (ata_id_lba48_enabled(drive->id)) {
@@ -718,9 +679,17 @@ static void idedisk_setup(ide_drive_t *drive)
718 drive->dev_flags |= IDE_DFLAG_WCACHE; 679 drive->dev_flags |= IDE_DFLAG_WCACHE;
719 680
720 set_wcache(drive, 1); 681 set_wcache(drive, 1);
682
683 if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 &&
684 (drive->head == 0 || drive->head > 16)) {
685 printk(KERN_ERR "%s: invalid geometry: %d physical heads?\n",
686 drive->name, drive->head);
687 drive->dev_flags &= ~IDE_DFLAG_ATTACH;
688 } else
689 drive->dev_flags |= IDE_DFLAG_ATTACH;
721} 690}
722 691
723static void ide_cacheflush_p(ide_drive_t *drive) 692static void ide_disk_flush(ide_drive_t *drive)
724{ 693{
725 if (ata_id_flush_enabled(drive->id) == 0 || 694 if (ata_id_flush_enabled(drive->id) == 0 ||
726 (drive->dev_flags & IDE_DFLAG_WCACHE) == 0) 695 (drive->dev_flags & IDE_DFLAG_WCACHE) == 0)
@@ -730,267 +699,40 @@ static void ide_cacheflush_p(ide_drive_t *drive)
730 printk(KERN_INFO "%s: wcache flush failed!\n", drive->name); 699 printk(KERN_INFO "%s: wcache flush failed!\n", drive->name);
731} 700}
732 701
733static void ide_disk_remove(ide_drive_t *drive) 702static int ide_disk_init_media(ide_drive_t *drive, struct gendisk *disk)
734{
735 struct ide_disk_obj *idkp = drive->driver_data;
736 struct gendisk *g = idkp->disk;
737
738 ide_proc_unregister_driver(drive, idkp->driver);
739
740 del_gendisk(g);
741
742 ide_cacheflush_p(drive);
743
744 ide_disk_put(idkp);
745}
746
747static void ide_disk_release(struct kref *kref)
748{
749 struct ide_disk_obj *idkp = to_ide_disk(kref);
750 ide_drive_t *drive = idkp->drive;
751 struct gendisk *g = idkp->disk;
752
753 drive->driver_data = NULL;
754 g->private_data = NULL;
755 put_disk(g);
756 kfree(idkp);
757}
758
759static int ide_disk_probe(ide_drive_t *drive);
760
761/*
762 * On HPA drives the capacity needs to be
763 * reinitilized on resume otherwise the disk
764 * can not be used and a hard reset is required
765 */
766static void ide_disk_resume(ide_drive_t *drive)
767{ 703{
768 if (ata_id_hpa_enabled(drive->id)) 704 return 0;
769 init_idedisk_capacity(drive);
770}
771
772static void ide_device_shutdown(ide_drive_t *drive)
773{
774#ifdef CONFIG_ALPHA
775 /* On Alpha, halt(8) doesn't actually turn the machine off,
776 it puts you into the sort of firmware monitor. Typically,
777 it's used to boot another kernel image, so it's not much
778 different from reboot(8). Therefore, we don't need to
779 spin down the disk in this case, especially since Alpha
780 firmware doesn't handle disks in standby mode properly.
781 On the other hand, it's reasonably safe to turn the power
782 off when the shutdown process reaches the firmware prompt,
783 as the firmware initialization takes rather long time -
784 at least 10 seconds, which should be sufficient for
785 the disk to expire its write cache. */
786 if (system_state != SYSTEM_POWER_OFF) {
787#else
788 if (system_state == SYSTEM_RESTART) {
789#endif
790 ide_cacheflush_p(drive);
791 return;
792 }
793
794 printk(KERN_INFO "Shutdown: %s\n", drive->name);
795
796 drive->gendev.bus->suspend(&drive->gendev, PMSG_SUSPEND);
797} 705}
798 706
799static ide_driver_t idedisk_driver = { 707static int ide_disk_set_doorlock(ide_drive_t *drive, struct gendisk *disk,
800 .gen_driver = { 708 int on)
801 .owner = THIS_MODULE,
802 .name = "ide-disk",
803 .bus = &ide_bus_type,
804 },
805 .probe = ide_disk_probe,
806 .remove = ide_disk_remove,
807 .resume = ide_disk_resume,
808 .shutdown = ide_device_shutdown,
809 .version = IDEDISK_VERSION,
810 .do_request = ide_do_rw_disk,
811 .end_request = ide_end_request,
812 .error = __ide_error,
813#ifdef CONFIG_IDE_PROC_FS
814 .proc = ide_disk_proc,
815 .settings = ide_disk_settings,
816#endif
817};
818
819static int idedisk_set_doorlock(ide_drive_t *drive, int on)
820{ 709{
821 ide_task_t task; 710 ide_task_t task;
711 int ret;
712
713 if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) == 0)
714 return 0;
822 715
823 memset(&task, 0, sizeof(task)); 716 memset(&task, 0, sizeof(task));
824 task.tf.command = on ? ATA_CMD_MEDIA_LOCK : ATA_CMD_MEDIA_UNLOCK; 717 task.tf.command = on ? ATA_CMD_MEDIA_LOCK : ATA_CMD_MEDIA_UNLOCK;
825 task.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; 718 task.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE;
826 719
827 return ide_no_data_taskfile(drive, &task); 720 ret = ide_no_data_taskfile(drive, &task);
828}
829
830static int idedisk_open(struct inode *inode, struct file *filp)
831{
832 struct gendisk *disk = inode->i_bdev->bd_disk;
833 struct ide_disk_obj *idkp;
834 ide_drive_t *drive;
835
836 idkp = ide_disk_get(disk);
837 if (idkp == NULL)
838 return -ENXIO;
839
840 drive = idkp->drive;
841
842 idkp->openers++;
843
844 if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1) {
845 check_disk_change(inode->i_bdev);
846 /*
847 * Ignore the return code from door_lock,
848 * since the open() has already succeeded,
849 * and the door_lock is irrelevant at this point.
850 */
851 if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) &&
852 idedisk_set_doorlock(drive, 1))
853 drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
854 }
855 return 0;
856}
857
858static int idedisk_release(struct inode *inode, struct file *filp)
859{
860 struct gendisk *disk = inode->i_bdev->bd_disk;
861 struct ide_disk_obj *idkp = ide_disk_g(disk);
862 ide_drive_t *drive = idkp->drive;
863
864 if (idkp->openers == 1)
865 ide_cacheflush_p(drive);
866
867 if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1) {
868 if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) &&
869 idedisk_set_doorlock(drive, 0))
870 drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
871 }
872 721
873 idkp->openers--; 722 if (ret)
723 drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
874 724
875 ide_disk_put(idkp); 725 return ret;
876
877 return 0;
878}
879
880static int idedisk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
881{
882 struct ide_disk_obj *idkp = ide_disk_g(bdev->bd_disk);
883 ide_drive_t *drive = idkp->drive;
884
885 geo->heads = drive->bios_head;
886 geo->sectors = drive->bios_sect;
887 geo->cylinders = (u16)drive->bios_cyl; /* truncate */
888 return 0;
889} 726}
890 727
891static int idedisk_media_changed(struct gendisk *disk) 728const struct ide_disk_ops ide_ata_disk_ops = {
892{ 729 .check = ide_disk_check,
893 struct ide_disk_obj *idkp = ide_disk_g(disk); 730 .get_capacity = ide_disk_get_capacity,
894 ide_drive_t *drive = idkp->drive; 731 .setup = ide_disk_setup,
895 732 .flush = ide_disk_flush,
896 /* do not scan partitions twice if this is a removable device */ 733 .init_media = ide_disk_init_media,
897 if (drive->dev_flags & IDE_DFLAG_ATTACH) { 734 .set_doorlock = ide_disk_set_doorlock,
898 drive->dev_flags &= ~IDE_DFLAG_ATTACH; 735 .do_request = ide_do_rw_disk,
899 return 0; 736 .end_request = ide_end_request,
900 } 737 .ioctl = ide_disk_ioctl,
901
902 /* if removable, always assume it was changed */
903 return !!(drive->dev_flags & IDE_DFLAG_REMOVABLE);
904}
905
906static int idedisk_revalidate_disk(struct gendisk *disk)
907{
908 struct ide_disk_obj *idkp = ide_disk_g(disk);
909 set_capacity(disk, ide_disk_capacity(idkp->drive));
910 return 0;
911}
912
913static struct block_device_operations idedisk_ops = {
914 .owner = THIS_MODULE,
915 .open = idedisk_open,
916 .release = idedisk_release,
917 .ioctl = ide_disk_ioctl,
918 .getgeo = idedisk_getgeo,
919 .media_changed = idedisk_media_changed,
920 .revalidate_disk = idedisk_revalidate_disk
921}; 738};
922
923MODULE_DESCRIPTION("ATA DISK Driver");
924
925static int ide_disk_probe(ide_drive_t *drive)
926{
927 struct ide_disk_obj *idkp;
928 struct gendisk *g;
929
930 /* strstr("foo", "") is non-NULL */
931 if (!strstr("ide-disk", drive->driver_req))
932 goto failed;
933
934 if (drive->media != ide_disk)
935 goto failed;
936
937 idkp = kzalloc(sizeof(*idkp), GFP_KERNEL);
938 if (!idkp)
939 goto failed;
940
941 g = alloc_disk_node(IDE_DISK_MINORS, hwif_to_node(drive->hwif));
942 if (!g)
943 goto out_free_idkp;
944
945 ide_init_disk(g, drive);
946
947 kref_init(&idkp->kref);
948
949 idkp->drive = drive;
950 idkp->driver = &idedisk_driver;
951 idkp->disk = g;
952
953 g->private_data = &idkp->driver;
954
955 drive->driver_data = idkp;
956
957 idedisk_setup(drive);
958 if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 &&
959 (drive->head == 0 || drive->head > 16)) {
960 printk(KERN_ERR "%s: INVALID GEOMETRY: %d PHYSICAL HEADS?\n",
961 drive->name, drive->head);
962 drive->dev_flags &= ~IDE_DFLAG_ATTACH;
963 } else
964 drive->dev_flags |= IDE_DFLAG_ATTACH;
965
966 g->minors = IDE_DISK_MINORS;
967 g->driverfs_dev = &drive->gendev;
968 g->flags |= GENHD_FL_EXT_DEVT;
969 if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
970 g->flags = GENHD_FL_REMOVABLE;
971 set_capacity(g, ide_disk_capacity(drive));
972 g->fops = &idedisk_ops;
973 add_disk(g);
974 return 0;
975
976out_free_idkp:
977 kfree(idkp);
978failed:
979 return -ENODEV;
980}
981
982static void __exit idedisk_exit(void)
983{
984 driver_unregister(&idedisk_driver.gen_driver);
985}
986
987static int __init idedisk_init(void)
988{
989 return driver_register(&idedisk_driver.gen_driver);
990}
991
992MODULE_ALIAS("ide:*m-disk*");
993MODULE_ALIAS("ide-disk");
994module_init(idedisk_init);
995module_exit(idedisk_exit);
996MODULE_LICENSE("GPL");
diff --git a/drivers/ide/ide-disk.h b/drivers/ide/ide-disk.h
index a82fa4355665..b234b0feaf7b 100644
--- a/drivers/ide/ide-disk.h
+++ b/drivers/ide/ide-disk.h
@@ -1,19 +1,11 @@
1#ifndef __IDE_DISK_H 1#ifndef __IDE_DISK_H
2#define __IDE_DISK_H 2#define __IDE_DISK_H
3 3
4struct ide_disk_obj { 4#include "ide-gd.h"
5 ide_drive_t *drive;
6 ide_driver_t *driver;
7 struct gendisk *disk;
8 struct kref kref;
9 unsigned int openers; /* protected by BKL for now */
10};
11
12#define ide_disk_g(disk) \
13 container_of((disk)->private_data, struct ide_disk_obj, driver)
14 5
6#ifdef CONFIG_IDE_GD_ATA
15/* ide-disk.c */ 7/* ide-disk.c */
16sector_t ide_disk_capacity(ide_drive_t *); 8extern const struct ide_disk_ops ide_ata_disk_ops;
17ide_decl_devset(address); 9ide_decl_devset(address);
18ide_decl_devset(multcount); 10ide_decl_devset(multcount);
19ide_decl_devset(nowerr); 11ide_decl_devset(nowerr);
@@ -21,12 +13,17 @@ ide_decl_devset(wcache);
21ide_decl_devset(acoustic); 13ide_decl_devset(acoustic);
22 14
23/* ide-disk_ioctl.c */ 15/* ide-disk_ioctl.c */
24int ide_disk_ioctl(struct inode *, struct file *, unsigned int, unsigned long); 16int ide_disk_ioctl(ide_drive_t *, struct inode *, struct file *, unsigned int,
17 unsigned long);
25 18
26#ifdef CONFIG_IDE_PROC_FS 19#ifdef CONFIG_IDE_PROC_FS
27/* ide-disk_proc.c */ 20/* ide-disk_proc.c */
28extern ide_proc_entry_t ide_disk_proc[]; 21extern ide_proc_entry_t ide_disk_proc[];
29extern const struct ide_proc_devset ide_disk_settings[]; 22extern const struct ide_proc_devset ide_disk_settings[];
30#endif 23#endif
24#else
25#define ide_disk_proc NULL
26#define ide_disk_settings NULL
27#endif
31 28
32#endif /* __IDE_DISK_H */ 29#endif /* __IDE_DISK_H */
diff --git a/drivers/ide/ide-disk_ioctl.c b/drivers/ide/ide-disk_ioctl.c
index a6cf1a03a806..a49698bcf966 100644
--- a/drivers/ide/ide-disk_ioctl.c
+++ b/drivers/ide/ide-disk_ioctl.c
@@ -13,12 +13,10 @@ static const struct ide_ioctl_devset ide_disk_ioctl_settings[] = {
13{ 0 } 13{ 0 }
14}; 14};
15 15
16int ide_disk_ioctl(struct inode *inode, struct file *file, 16int ide_disk_ioctl(ide_drive_t *drive, struct inode *inode, struct file *file,
17 unsigned int cmd, unsigned long arg) 17 unsigned int cmd, unsigned long arg)
18{ 18{
19 struct block_device *bdev = inode->i_bdev; 19 struct block_device *bdev = inode->i_bdev;
20 struct ide_disk_obj *idkp = ide_disk_g(bdev->bd_disk);
21 ide_drive_t *drive = idkp->drive;
22 int err; 20 int err;
23 21
24 err = ide_setting_ioctl(drive, bdev, cmd, arg, ide_disk_ioctl_settings); 22 err = ide_setting_ioctl(drive, bdev, cmd, arg, ide_disk_ioctl_settings);
diff --git a/drivers/ide/ide-disk_proc.c b/drivers/ide/ide-disk_proc.c
index 4724976afe71..1146f4204c6e 100644
--- a/drivers/ide/ide-disk_proc.c
+++ b/drivers/ide/ide-disk_proc.c
@@ -56,7 +56,7 @@ static int proc_idedisk_read_capacity
56 ide_drive_t*drive = (ide_drive_t *)data; 56 ide_drive_t*drive = (ide_drive_t *)data;
57 int len; 57 int len;
58 58
59 len = sprintf(page, "%llu\n", (long long)ide_disk_capacity(drive)); 59 len = sprintf(page, "%llu\n", (long long)ide_gd_capacity(drive));
60 60
61 PROC_IDE_READ_RETURN(page, start, off, count, eof, len); 61 PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
62} 62}
diff --git a/drivers/ide/ide-dma-sff.c b/drivers/ide/ide-dma-sff.c
index 0903782689e9..cac431f0df17 100644
--- a/drivers/ide/ide-dma-sff.c
+++ b/drivers/ide/ide-dma-sff.c
@@ -130,7 +130,7 @@ int ide_build_dmatable(ide_drive_t *drive, struct request *rq)
130 xcount = bcount & 0xffff; 130 xcount = bcount & 0xffff;
131 if (is_trm290) 131 if (is_trm290)
132 xcount = ((xcount >> 2) - 1) << 16; 132 xcount = ((xcount >> 2) - 1) << 16;
133 if (xcount == 0x0000) { 133 else if (xcount == 0x0000) {
134 if (count++ >= PRD_ENTRIES) 134 if (count++ >= PRD_ENTRIES)
135 goto use_pio_instead; 135 goto use_pio_instead;
136 *table++ = cpu_to_le32(0x8000); 136 *table++ = cpu_to_le32(0x8000);
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index cf0aa25470ee..aeb1ad782f54 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -15,12 +15,6 @@
15 * Documentation/ide/ChangeLog.ide-floppy.1996-2002 15 * Documentation/ide/ChangeLog.ide-floppy.1996-2002
16 */ 16 */
17 17
18#define DRV_NAME "ide-floppy"
19#define PFX DRV_NAME ": "
20
21#define IDEFLOPPY_VERSION "1.00"
22
23#include <linux/module.h>
24#include <linux/types.h> 18#include <linux/types.h>
25#include <linux/string.h> 19#include <linux/string.h>
26#include <linux/kernel.h> 20#include <linux/kernel.h>
@@ -49,19 +43,6 @@
49 43
50#include "ide-floppy.h" 44#include "ide-floppy.h"
51 45
52/* module parameters */
53static unsigned long debug_mask;
54module_param(debug_mask, ulong, 0644);
55
56/* define to see debug info */
57#define IDEFLOPPY_DEBUG_LOG 0
58
59#if IDEFLOPPY_DEBUG_LOG
60#define ide_debug_log(lvl, fmt, args...) __ide_debug_log(lvl, fmt, args)
61#else
62#define ide_debug_log(lvl, fmt, args...) do {} while (0)
63#endif
64
65/* 46/*
66 * After each failed packet command we issue a request sense command and retry 47 * After each failed packet command we issue a request sense command and retry
67 * the packet command IDEFLOPPY_MAX_PC_RETRIES times. 48 * the packet command IDEFLOPPY_MAX_PC_RETRIES times.
@@ -83,43 +64,13 @@ module_param(debug_mask, ulong, 0644);
83/* Error code returned in rq->errors to the higher part of the driver. */ 64/* Error code returned in rq->errors to the higher part of the driver. */
84#define IDEFLOPPY_ERROR_GENERAL 101 65#define IDEFLOPPY_ERROR_GENERAL 101
85 66
86static DEFINE_MUTEX(idefloppy_ref_mutex);
87
88static void idefloppy_cleanup_obj(struct kref *);
89
90static struct ide_floppy_obj *ide_floppy_get(struct gendisk *disk)
91{
92 struct ide_floppy_obj *floppy = NULL;
93
94 mutex_lock(&idefloppy_ref_mutex);
95 floppy = ide_drv_g(disk, ide_floppy_obj);
96 if (floppy) {
97 if (ide_device_get(floppy->drive))
98 floppy = NULL;
99 else
100 kref_get(&floppy->kref);
101 }
102 mutex_unlock(&idefloppy_ref_mutex);
103 return floppy;
104}
105
106static void ide_floppy_put(struct ide_floppy_obj *floppy)
107{
108 ide_drive_t *drive = floppy->drive;
109
110 mutex_lock(&idefloppy_ref_mutex);
111 kref_put(&floppy->kref, idefloppy_cleanup_obj);
112 ide_device_put(drive);
113 mutex_unlock(&idefloppy_ref_mutex);
114}
115
116/* 67/*
117 * Used to finish servicing a request. For read/write requests, we will call 68 * Used to finish servicing a request. For read/write requests, we will call
118 * ide_end_request to pass to the next buffer. 69 * ide_end_request to pass to the next buffer.
119 */ 70 */
120static int idefloppy_end_request(ide_drive_t *drive, int uptodate, int nsecs) 71static int ide_floppy_end_request(ide_drive_t *drive, int uptodate, int nsecs)
121{ 72{
122 idefloppy_floppy_t *floppy = drive->driver_data; 73 struct ide_disk_obj *floppy = drive->driver_data;
123 struct request *rq = HWGROUP(drive)->rq; 74 struct request *rq = HWGROUP(drive)->rq;
124 int error; 75 int error;
125 76
@@ -161,12 +112,12 @@ static void idefloppy_update_buffers(ide_drive_t *drive,
161 struct bio *bio = rq->bio; 112 struct bio *bio = rq->bio;
162 113
163 while ((bio = rq->bio) != NULL) 114 while ((bio = rq->bio) != NULL)
164 idefloppy_end_request(drive, 1, 0); 115 ide_floppy_end_request(drive, 1, 0);
165} 116}
166 117
167static void ide_floppy_callback(ide_drive_t *drive, int dsc) 118static void ide_floppy_callback(ide_drive_t *drive, int dsc)
168{ 119{
169 idefloppy_floppy_t *floppy = drive->driver_data; 120 struct ide_disk_obj *floppy = drive->driver_data;
170 struct ide_atapi_pc *pc = drive->pc; 121 struct ide_atapi_pc *pc = drive->pc;
171 int uptodate = pc->error ? 0 : 1; 122 int uptodate = pc->error ? 0 : 1;
172 123
@@ -200,10 +151,10 @@ static void ide_floppy_callback(ide_drive_t *drive, int dsc)
200 "Aborting request!\n"); 151 "Aborting request!\n");
201 } 152 }
202 153
203 idefloppy_end_request(drive, uptodate, 0); 154 ide_floppy_end_request(drive, uptodate, 0);
204} 155}
205 156
206static void ide_floppy_report_error(idefloppy_floppy_t *floppy, 157static void ide_floppy_report_error(struct ide_disk_obj *floppy,
207 struct ide_atapi_pc *pc) 158 struct ide_atapi_pc *pc)
208{ 159{
209 /* supress error messages resulting from Medium not present */ 160 /* supress error messages resulting from Medium not present */
@@ -222,7 +173,7 @@ static void ide_floppy_report_error(idefloppy_floppy_t *floppy,
222static ide_startstop_t idefloppy_issue_pc(ide_drive_t *drive, 173static ide_startstop_t idefloppy_issue_pc(ide_drive_t *drive,
223 struct ide_atapi_pc *pc) 174 struct ide_atapi_pc *pc)
224{ 175{
225 idefloppy_floppy_t *floppy = drive->driver_data; 176 struct ide_disk_obj *floppy = drive->driver_data;
226 177
227 if (floppy->failed_pc == NULL && 178 if (floppy->failed_pc == NULL &&
228 pc->c[0] != GPCMD_REQUEST_SENSE) 179 pc->c[0] != GPCMD_REQUEST_SENSE)
@@ -286,7 +237,7 @@ static void idefloppy_create_rw_cmd(ide_drive_t *drive,
286 struct ide_atapi_pc *pc, struct request *rq, 237 struct ide_atapi_pc *pc, struct request *rq,
287 unsigned long sector) 238 unsigned long sector)
288{ 239{
289 idefloppy_floppy_t *floppy = drive->driver_data; 240 struct ide_disk_obj *floppy = drive->driver_data;
290 int block = sector / floppy->bs_factor; 241 int block = sector / floppy->bs_factor;
291 int blocks = rq->nr_sectors / floppy->bs_factor; 242 int blocks = rq->nr_sectors / floppy->bs_factor;
292 int cmd = rq_data_dir(rq); 243 int cmd = rq_data_dir(rq);
@@ -310,7 +261,7 @@ static void idefloppy_create_rw_cmd(ide_drive_t *drive,
310 pc->flags |= PC_FLAG_DMA_OK; 261 pc->flags |= PC_FLAG_DMA_OK;
311} 262}
312 263
313static void idefloppy_blockpc_cmd(idefloppy_floppy_t *floppy, 264static void idefloppy_blockpc_cmd(struct ide_disk_obj *floppy,
314 struct ide_atapi_pc *pc, struct request *rq) 265 struct ide_atapi_pc *pc, struct request *rq)
315{ 266{
316 ide_init_pc(pc); 267 ide_init_pc(pc);
@@ -329,13 +280,12 @@ static void idefloppy_blockpc_cmd(idefloppy_floppy_t *floppy,
329 pc->req_xfer = pc->buf_size = rq->data_len; 280 pc->req_xfer = pc->buf_size = rq->data_len;
330} 281}
331 282
332static ide_startstop_t idefloppy_do_request(ide_drive_t *drive, 283static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
333 struct request *rq, sector_t block_s) 284 struct request *rq, sector_t block)
334{ 285{
335 idefloppy_floppy_t *floppy = drive->driver_data; 286 struct ide_disk_obj *floppy = drive->driver_data;
336 ide_hwif_t *hwif = drive->hwif; 287 ide_hwif_t *hwif = drive->hwif;
337 struct ide_atapi_pc *pc; 288 struct ide_atapi_pc *pc;
338 unsigned long block = (unsigned long)block_s;
339 289
340 ide_debug_log(IDE_DBG_FUNC, "%s: dev: %s, cmd: 0x%x, cmd_type: %x, " 290 ide_debug_log(IDE_DBG_FUNC, "%s: dev: %s, cmd: 0x%x, cmd_type: %x, "
341 "errors: %d\n", 291 "errors: %d\n",
@@ -353,7 +303,7 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
353 else 303 else
354 printk(KERN_ERR PFX "%s: I/O error\n", drive->name); 304 printk(KERN_ERR PFX "%s: I/O error\n", drive->name);
355 305
356 idefloppy_end_request(drive, 0, 0); 306 ide_floppy_end_request(drive, 0, 0);
357 return ide_stopped; 307 return ide_stopped;
358 } 308 }
359 if (blk_fs_request(rq)) { 309 if (blk_fs_request(rq)) {
@@ -361,11 +311,11 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
361 (rq->nr_sectors % floppy->bs_factor)) { 311 (rq->nr_sectors % floppy->bs_factor)) {
362 printk(KERN_ERR PFX "%s: unsupported r/w rq size\n", 312 printk(KERN_ERR PFX "%s: unsupported r/w rq size\n",
363 drive->name); 313 drive->name);
364 idefloppy_end_request(drive, 0, 0); 314 ide_floppy_end_request(drive, 0, 0);
365 return ide_stopped; 315 return ide_stopped;
366 } 316 }
367 pc = &floppy->queued_pc; 317 pc = &floppy->queued_pc;
368 idefloppy_create_rw_cmd(drive, pc, rq, block); 318 idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block);
369 } else if (blk_special_request(rq)) { 319 } else if (blk_special_request(rq)) {
370 pc = (struct ide_atapi_pc *) rq->buffer; 320 pc = (struct ide_atapi_pc *) rq->buffer;
371 } else if (blk_pc_request(rq)) { 321 } else if (blk_pc_request(rq)) {
@@ -373,7 +323,7 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
373 idefloppy_blockpc_cmd(floppy, pc, rq); 323 idefloppy_blockpc_cmd(floppy, pc, rq);
374 } else { 324 } else {
375 blk_dump_rq_flags(rq, PFX "unsupported command in queue"); 325 blk_dump_rq_flags(rq, PFX "unsupported command in queue");
376 idefloppy_end_request(drive, 0, 0); 326 ide_floppy_end_request(drive, 0, 0);
377 return ide_stopped; 327 return ide_stopped;
378 } 328 }
379 329
@@ -394,7 +344,7 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
394 */ 344 */
395static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive) 345static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
396{ 346{
397 idefloppy_floppy_t *floppy = drive->driver_data; 347 struct ide_disk_obj *floppy = drive->driver_data;
398 struct gendisk *disk = floppy->disk; 348 struct gendisk *disk = floppy->disk;
399 struct ide_atapi_pc pc; 349 struct ide_atapi_pc pc;
400 u8 *page; 350 u8 *page;
@@ -410,11 +360,11 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
410 } 360 }
411 361
412 if (pc.buf[3] & 0x80) 362 if (pc.buf[3] & 0x80)
413 drive->atapi_flags |= IDE_AFLAG_WP; 363 drive->dev_flags |= IDE_DFLAG_WP;
414 else 364 else
415 drive->atapi_flags &= ~IDE_AFLAG_WP; 365 drive->dev_flags &= ~IDE_DFLAG_WP;
416 366
417 set_disk_ro(disk, !!(drive->atapi_flags & IDE_AFLAG_WP)); 367 set_disk_ro(disk, !!(drive->dev_flags & IDE_DFLAG_WP));
418 368
419 page = &pc.buf[8]; 369 page = &pc.buf[8];
420 370
@@ -445,7 +395,9 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
445 drive->name, lba_capacity, capacity); 395 drive->name, lba_capacity, capacity);
446 floppy->blocks = floppy->block_size ? 396 floppy->blocks = floppy->block_size ?
447 capacity / floppy->block_size : 0; 397 capacity / floppy->block_size : 0;
398 drive->capacity64 = floppy->blocks * floppy->bs_factor;
448 } 399 }
400
449 return 0; 401 return 0;
450} 402}
451 403
@@ -455,7 +407,7 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
455 */ 407 */
456static int ide_floppy_get_capacity(ide_drive_t *drive) 408static int ide_floppy_get_capacity(ide_drive_t *drive)
457{ 409{
458 idefloppy_floppy_t *floppy = drive->driver_data; 410 struct ide_disk_obj *floppy = drive->driver_data;
459 struct gendisk *disk = floppy->disk; 411 struct gendisk *disk = floppy->disk;
460 struct ide_atapi_pc pc; 412 struct ide_atapi_pc pc;
461 u8 *cap_desc; 413 u8 *cap_desc;
@@ -466,7 +418,7 @@ static int ide_floppy_get_capacity(ide_drive_t *drive)
466 drive->bios_head = drive->bios_sect = 0; 418 drive->bios_head = drive->bios_sect = 0;
467 floppy->blocks = 0; 419 floppy->blocks = 0;
468 floppy->bs_factor = 1; 420 floppy->bs_factor = 1;
469 set_capacity(floppy->disk, 0); 421 drive->capacity64 = 0;
470 422
471 ide_floppy_create_read_capacity_cmd(&pc); 423 ide_floppy_create_read_capacity_cmd(&pc);
472 if (ide_queue_pc_tail(drive, disk, &pc)) { 424 if (ide_queue_pc_tail(drive, disk, &pc)) {
@@ -523,6 +475,8 @@ static int ide_floppy_get_capacity(ide_drive_t *drive)
523 "non 512 bytes block size not " 475 "non 512 bytes block size not "
524 "fully supported\n", 476 "fully supported\n",
525 drive->name); 477 drive->name);
478 drive->capacity64 =
479 floppy->blocks * floppy->bs_factor;
526 rc = 0; 480 rc = 0;
527 } 481 }
528 break; 482 break;
@@ -547,21 +501,12 @@ static int ide_floppy_get_capacity(ide_drive_t *drive)
547 if (!(drive->atapi_flags & IDE_AFLAG_CLIK_DRIVE)) 501 if (!(drive->atapi_flags & IDE_AFLAG_CLIK_DRIVE))
548 (void) ide_floppy_get_flexible_disk_page(drive); 502 (void) ide_floppy_get_flexible_disk_page(drive);
549 503
550 set_capacity(disk, floppy->blocks * floppy->bs_factor);
551
552 return rc; 504 return rc;
553} 505}
554 506
555sector_t ide_floppy_capacity(ide_drive_t *drive) 507static void ide_floppy_setup(ide_drive_t *drive)
556{
557 idefloppy_floppy_t *floppy = drive->driver_data;
558 unsigned long capacity = floppy->blocks * floppy->bs_factor;
559
560 return capacity;
561}
562
563static void idefloppy_setup(ide_drive_t *drive, idefloppy_floppy_t *floppy)
564{ 508{
509 struct ide_disk_obj *floppy = drive->driver_data;
565 u16 *id = drive->id; 510 u16 *id = drive->id;
566 511
567 drive->pc_callback = ide_floppy_callback; 512 drive->pc_callback = ide_floppy_callback;
@@ -592,252 +537,42 @@ static void idefloppy_setup(ide_drive_t *drive, idefloppy_floppy_t *floppy)
592 blk_queue_max_sectors(drive->queue, 64); 537 blk_queue_max_sectors(drive->queue, 64);
593 drive->atapi_flags |= IDE_AFLAG_CLIK_DRIVE; 538 drive->atapi_flags |= IDE_AFLAG_CLIK_DRIVE;
594 /* IOMEGA Clik! drives do not support lock/unlock commands */ 539 /* IOMEGA Clik! drives do not support lock/unlock commands */
595 drive->atapi_flags |= IDE_AFLAG_NO_DOORLOCK; 540 drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
596 } 541 }
597 542
598 (void) ide_floppy_get_capacity(drive); 543 (void) ide_floppy_get_capacity(drive);
599 544
600 ide_proc_register_driver(drive, floppy->driver); 545 ide_proc_register_driver(drive, floppy->driver);
601}
602 546
603static void ide_floppy_remove(ide_drive_t *drive) 547 drive->dev_flags |= IDE_DFLAG_ATTACH;
604{
605 idefloppy_floppy_t *floppy = drive->driver_data;
606 struct gendisk *g = floppy->disk;
607
608 ide_proc_unregister_driver(drive, floppy->driver);
609
610 del_gendisk(g);
611
612 ide_floppy_put(floppy);
613} 548}
614 549
615static void idefloppy_cleanup_obj(struct kref *kref) 550static void ide_floppy_flush(ide_drive_t *drive)
616{ 551{
617 struct ide_floppy_obj *floppy = to_ide_drv(kref, ide_floppy_obj);
618 ide_drive_t *drive = floppy->drive;
619 struct gendisk *g = floppy->disk;
620
621 drive->driver_data = NULL;
622 g->private_data = NULL;
623 put_disk(g);
624 kfree(floppy);
625} 552}
626 553
627static int ide_floppy_probe(ide_drive_t *); 554static int ide_floppy_init_media(ide_drive_t *drive, struct gendisk *disk)
628
629static ide_driver_t idefloppy_driver = {
630 .gen_driver = {
631 .owner = THIS_MODULE,
632 .name = "ide-floppy",
633 .bus = &ide_bus_type,
634 },
635 .probe = ide_floppy_probe,
636 .remove = ide_floppy_remove,
637 .version = IDEFLOPPY_VERSION,
638 .do_request = idefloppy_do_request,
639 .end_request = idefloppy_end_request,
640 .error = __ide_error,
641#ifdef CONFIG_IDE_PROC_FS
642 .proc = ide_floppy_proc,
643 .settings = ide_floppy_settings,
644#endif
645};
646
647static int idefloppy_open(struct inode *inode, struct file *filp)
648{ 555{
649 struct gendisk *disk = inode->i_bdev->bd_disk;
650 struct ide_floppy_obj *floppy;
651 ide_drive_t *drive;
652 int ret = 0; 556 int ret = 0;
653 557
654 floppy = ide_floppy_get(disk); 558 if (ide_do_test_unit_ready(drive, disk))
655 if (!floppy) 559 ide_do_start_stop(drive, disk, 1);
656 return -ENXIO;
657
658 drive = floppy->drive;
659
660 ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
661
662 floppy->openers++;
663
664 if (floppy->openers == 1) {
665 drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS;
666 /* Just in case */
667
668 if (ide_do_test_unit_ready(drive, disk))
669 ide_do_start_stop(drive, disk, 1);
670
671 if (ide_floppy_get_capacity(drive)
672 && (filp->f_flags & O_NDELAY) == 0
673 /*
674 * Allow O_NDELAY to open a drive without a disk, or with an
675 * unreadable disk, so that we can get the format capacity
676 * of the drive or begin the format - Sam
677 */
678 ) {
679 ret = -EIO;
680 goto out_put_floppy;
681 }
682
683 if ((drive->atapi_flags & IDE_AFLAG_WP) && (filp->f_mode & 2)) {
684 ret = -EROFS;
685 goto out_put_floppy;
686 }
687
688 drive->atapi_flags |= IDE_AFLAG_MEDIA_CHANGED;
689 ide_set_media_lock(drive, disk, 1);
690 check_disk_change(inode->i_bdev);
691 } else if (drive->atapi_flags & IDE_AFLAG_FORMAT_IN_PROGRESS) {
692 ret = -EBUSY;
693 goto out_put_floppy;
694 }
695 return 0;
696
697out_put_floppy:
698 floppy->openers--;
699 ide_floppy_put(floppy);
700 return ret;
701}
702
703static int idefloppy_release(struct inode *inode, struct file *filp)
704{
705 struct gendisk *disk = inode->i_bdev->bd_disk;
706 struct ide_floppy_obj *floppy = ide_drv_g(disk, ide_floppy_obj);
707 ide_drive_t *drive = floppy->drive;
708
709 ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
710
711 if (floppy->openers == 1) {
712 ide_set_media_lock(drive, disk, 0);
713 drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS;
714 }
715
716 floppy->openers--;
717
718 ide_floppy_put(floppy);
719
720 return 0;
721}
722
723static int idefloppy_getgeo(struct block_device *bdev, struct hd_geometry *geo)
724{
725 struct ide_floppy_obj *floppy = ide_drv_g(bdev->bd_disk,
726 ide_floppy_obj);
727 ide_drive_t *drive = floppy->drive;
728 560
729 geo->heads = drive->bios_head; 561 ret = ide_floppy_get_capacity(drive);
730 geo->sectors = drive->bios_sect;
731 geo->cylinders = (u16)drive->bios_cyl; /* truncate */
732 return 0;
733}
734 562
735static int idefloppy_media_changed(struct gendisk *disk) 563 set_capacity(disk, ide_gd_capacity(drive));
736{
737 struct ide_floppy_obj *floppy = ide_drv_g(disk, ide_floppy_obj);
738 ide_drive_t *drive = floppy->drive;
739 int ret;
740 564
741 /* do not scan partitions twice if this is a removable device */
742 if (drive->dev_flags & IDE_DFLAG_ATTACH) {
743 drive->dev_flags &= ~IDE_DFLAG_ATTACH;
744 return 0;
745 }
746 ret = !!(drive->atapi_flags & IDE_AFLAG_MEDIA_CHANGED);
747 drive->atapi_flags &= ~IDE_AFLAG_MEDIA_CHANGED;
748 return ret; 565 return ret;
749} 566}
750 567
751static int idefloppy_revalidate_disk(struct gendisk *disk) 568const struct ide_disk_ops ide_atapi_disk_ops = {
752{ 569 .check = ide_check_atapi_device,
753 struct ide_floppy_obj *floppy = ide_drv_g(disk, ide_floppy_obj); 570 .get_capacity = ide_floppy_get_capacity,
754 set_capacity(disk, ide_floppy_capacity(floppy->drive)); 571 .setup = ide_floppy_setup,
755 return 0; 572 .flush = ide_floppy_flush,
756} 573 .init_media = ide_floppy_init_media,
757 574 .set_doorlock = ide_set_media_lock,
758static struct block_device_operations idefloppy_ops = { 575 .do_request = ide_floppy_do_request,
759 .owner = THIS_MODULE, 576 .end_request = ide_floppy_end_request,
760 .open = idefloppy_open, 577 .ioctl = ide_floppy_ioctl,
761 .release = idefloppy_release,
762 .ioctl = ide_floppy_ioctl,
763 .getgeo = idefloppy_getgeo,
764 .media_changed = idefloppy_media_changed,
765 .revalidate_disk = idefloppy_revalidate_disk
766}; 578};
767
768static int ide_floppy_probe(ide_drive_t *drive)
769{
770 idefloppy_floppy_t *floppy;
771 struct gendisk *g;
772
773 if (!strstr("ide-floppy", drive->driver_req))
774 goto failed;
775
776 if (drive->media != ide_floppy)
777 goto failed;
778
779 if (!ide_check_atapi_device(drive, DRV_NAME)) {
780 printk(KERN_ERR PFX "%s: not supported by this version of "
781 DRV_NAME "\n", drive->name);
782 goto failed;
783 }
784 floppy = kzalloc(sizeof(idefloppy_floppy_t), GFP_KERNEL);
785 if (!floppy) {
786 printk(KERN_ERR PFX "%s: Can't allocate a floppy structure\n",
787 drive->name);
788 goto failed;
789 }
790
791 g = alloc_disk(1 << PARTN_BITS);
792 if (!g)
793 goto out_free_floppy;
794
795 ide_init_disk(g, drive);
796
797 kref_init(&floppy->kref);
798
799 floppy->drive = drive;
800 floppy->driver = &idefloppy_driver;
801 floppy->disk = g;
802
803 g->private_data = &floppy->driver;
804
805 drive->driver_data = floppy;
806
807 drive->debug_mask = debug_mask;
808
809 idefloppy_setup(drive, floppy);
810 drive->dev_flags |= IDE_DFLAG_ATTACH;
811
812 g->minors = 1 << PARTN_BITS;
813 g->driverfs_dev = &drive->gendev;
814 if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
815 g->flags = GENHD_FL_REMOVABLE;
816 g->fops = &idefloppy_ops;
817 add_disk(g);
818 return 0;
819
820out_free_floppy:
821 kfree(floppy);
822failed:
823 return -ENODEV;
824}
825
826static void __exit idefloppy_exit(void)
827{
828 driver_unregister(&idefloppy_driver.gen_driver);
829}
830
831static int __init idefloppy_init(void)
832{
833 printk(KERN_INFO DRV_NAME " driver " IDEFLOPPY_VERSION "\n");
834 return driver_register(&idefloppy_driver.gen_driver);
835}
836
837MODULE_ALIAS("ide:*m-floppy*");
838MODULE_ALIAS("ide-floppy");
839module_init(idefloppy_init);
840module_exit(idefloppy_exit);
841MODULE_LICENSE("GPL");
842MODULE_DESCRIPTION("ATAPI FLOPPY Driver");
843
diff --git a/drivers/ide/ide-floppy.h b/drivers/ide/ide-floppy.h
index 17cf865e583d..c17124dd6079 100644
--- a/drivers/ide/ide-floppy.h
+++ b/drivers/ide/ide-floppy.h
@@ -1,37 +1,9 @@
1#ifndef __IDE_FLOPPY_H 1#ifndef __IDE_FLOPPY_H
2#define __IDE_FLOPPY_H 2#define __IDE_FLOPPY_H
3 3
4/* 4#include "ide-gd.h"
5 * Most of our global data which we need to save even as we leave the driver
6 * due to an interrupt or a timer event is stored in a variable of type
7 * idefloppy_floppy_t, defined below.
8 */
9typedef struct ide_floppy_obj {
10 ide_drive_t *drive;
11 ide_driver_t *driver;
12 struct gendisk *disk;
13 struct kref kref;
14 unsigned int openers; /* protected by BKL for now */
15
16 /* Last failed packet command */
17 struct ide_atapi_pc *failed_pc;
18 /* used for blk_{fs,pc}_request() requests */
19 struct ide_atapi_pc queued_pc;
20
21 /* Last error information */
22 u8 sense_key, asc, ascq;
23
24 int progress_indication;
25
26 /* Device information */
27 /* Current format */
28 int blocks, block_size, bs_factor;
29 /* Last format capacity descriptor */
30 u8 cap_desc[8];
31 /* Copy of the flexible disk page */
32 u8 flexible_disk_page[32];
33} idefloppy_floppy_t;
34 5
6#ifdef CONFIG_IDE_GD_ATAPI
35/* 7/*
36 * Pages of the SELECT SENSE / MODE SENSE packet commands. 8 * Pages of the SELECT SENSE / MODE SENSE packet commands.
37 * See SFF-8070i spec. 9 * See SFF-8070i spec.
@@ -46,17 +18,22 @@ typedef struct ide_floppy_obj {
46#define IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS 0x4603 18#define IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS 0x4603
47 19
48/* ide-floppy.c */ 20/* ide-floppy.c */
21extern const struct ide_disk_ops ide_atapi_disk_ops;
49void ide_floppy_create_mode_sense_cmd(struct ide_atapi_pc *, u8); 22void ide_floppy_create_mode_sense_cmd(struct ide_atapi_pc *, u8);
50void ide_floppy_create_read_capacity_cmd(struct ide_atapi_pc *); 23void ide_floppy_create_read_capacity_cmd(struct ide_atapi_pc *);
51sector_t ide_floppy_capacity(ide_drive_t *);
52 24
53/* ide-floppy_ioctl.c */ 25/* ide-floppy_ioctl.c */
54int ide_floppy_ioctl(struct inode *, struct file *, unsigned, unsigned long); 26int ide_floppy_ioctl(ide_drive_t *, struct inode *, struct file *, unsigned int,
27 unsigned long);
55 28
56#ifdef CONFIG_IDE_PROC_FS 29#ifdef CONFIG_IDE_PROC_FS
57/* ide-floppy_proc.c */ 30/* ide-floppy_proc.c */
58extern ide_proc_entry_t ide_floppy_proc[]; 31extern ide_proc_entry_t ide_floppy_proc[];
59extern const struct ide_proc_devset ide_floppy_settings[]; 32extern const struct ide_proc_devset ide_floppy_settings[];
60#endif 33#endif
34#else
35#define ide_floppy_proc NULL
36#define ide_floppy_settings NULL
37#endif
61 38
62#endif /*__IDE_FLOPPY_H */ 39#endif /*__IDE_FLOPPY_H */
diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c
index a3a7a0809e2b..409e4c15f9b7 100644
--- a/drivers/ide/ide-floppy_ioctl.c
+++ b/drivers/ide/ide-floppy_ioctl.c
@@ -33,7 +33,7 @@
33 33
34static int ide_floppy_get_format_capacities(ide_drive_t *drive, int __user *arg) 34static int ide_floppy_get_format_capacities(ide_drive_t *drive, int __user *arg)
35{ 35{
36 struct ide_floppy_obj *floppy = drive->driver_data; 36 struct ide_disk_obj *floppy = drive->driver_data;
37 struct ide_atapi_pc pc; 37 struct ide_atapi_pc pc;
38 u8 header_len, desc_cnt; 38 u8 header_len, desc_cnt;
39 int i, blocks, length, u_array_size, u_index; 39 int i, blocks, length, u_array_size, u_index;
@@ -113,7 +113,7 @@ static void ide_floppy_create_format_unit_cmd(struct ide_atapi_pc *pc, int b,
113 113
114static int ide_floppy_get_sfrp_bit(ide_drive_t *drive) 114static int ide_floppy_get_sfrp_bit(ide_drive_t *drive)
115{ 115{
116 idefloppy_floppy_t *floppy = drive->driver_data; 116 struct ide_disk_obj *floppy = drive->driver_data;
117 struct ide_atapi_pc pc; 117 struct ide_atapi_pc pc;
118 118
119 drive->atapi_flags &= ~IDE_AFLAG_SRFP; 119 drive->atapi_flags &= ~IDE_AFLAG_SRFP;
@@ -132,17 +132,17 @@ static int ide_floppy_get_sfrp_bit(ide_drive_t *drive)
132 132
133static int ide_floppy_format_unit(ide_drive_t *drive, int __user *arg) 133static int ide_floppy_format_unit(ide_drive_t *drive, int __user *arg)
134{ 134{
135 idefloppy_floppy_t *floppy = drive->driver_data; 135 struct ide_disk_obj *floppy = drive->driver_data;
136 struct ide_atapi_pc pc; 136 struct ide_atapi_pc pc;
137 int blocks, length, flags, err = 0; 137 int blocks, length, flags, err = 0;
138 138
139 if (floppy->openers > 1) { 139 if (floppy->openers > 1) {
140 /* Don't format if someone is using the disk */ 140 /* Don't format if someone is using the disk */
141 drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS; 141 drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
142 return -EBUSY; 142 return -EBUSY;
143 } 143 }
144 144
145 drive->atapi_flags |= IDE_AFLAG_FORMAT_IN_PROGRESS; 145 drive->dev_flags |= IDE_DFLAG_FORMAT_IN_PROGRESS;
146 146
147 /* 147 /*
148 * Send ATAPI_FORMAT_UNIT to the drive. 148 * Send ATAPI_FORMAT_UNIT to the drive.
@@ -174,7 +174,7 @@ static int ide_floppy_format_unit(ide_drive_t *drive, int __user *arg)
174 174
175out: 175out:
176 if (err) 176 if (err)
177 drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS; 177 drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
178 return err; 178 return err;
179} 179}
180 180
@@ -190,7 +190,7 @@ out:
190 190
191static int ide_floppy_get_format_progress(ide_drive_t *drive, int __user *arg) 191static int ide_floppy_get_format_progress(ide_drive_t *drive, int __user *arg)
192{ 192{
193 idefloppy_floppy_t *floppy = drive->driver_data; 193 struct ide_disk_obj *floppy = drive->driver_data;
194 struct ide_atapi_pc pc; 194 struct ide_atapi_pc pc;
195 int progress_indication = 0x10000; 195 int progress_indication = 0x10000;
196 196
@@ -226,7 +226,7 @@ static int ide_floppy_get_format_progress(ide_drive_t *drive, int __user *arg)
226static int ide_floppy_lockdoor(ide_drive_t *drive, struct ide_atapi_pc *pc, 226static int ide_floppy_lockdoor(ide_drive_t *drive, struct ide_atapi_pc *pc,
227 unsigned long arg, unsigned int cmd) 227 unsigned long arg, unsigned int cmd)
228{ 228{
229 idefloppy_floppy_t *floppy = drive->driver_data; 229 struct ide_disk_obj *floppy = drive->driver_data;
230 struct gendisk *disk = floppy->disk; 230 struct gendisk *disk = floppy->disk;
231 int prevent = (arg && cmd != CDROMEJECT) ? 1 : 0; 231 int prevent = (arg && cmd != CDROMEJECT) ? 1 : 0;
232 232
@@ -260,13 +260,10 @@ static int ide_floppy_format_ioctl(ide_drive_t *drive, struct file *file,
260 } 260 }
261} 261}
262 262
263int ide_floppy_ioctl(struct inode *inode, struct file *file, 263int ide_floppy_ioctl(ide_drive_t *drive, struct inode *inode,
264 unsigned int cmd, unsigned long arg) 264 struct file *file, unsigned int cmd, unsigned long arg)
265{ 265{
266 struct block_device *bdev = inode->i_bdev; 266 struct block_device *bdev = inode->i_bdev;
267 struct ide_floppy_obj *floppy = ide_drv_g(bdev->bd_disk,
268 ide_floppy_obj);
269 ide_drive_t *drive = floppy->drive;
270 struct ide_atapi_pc pc; 267 struct ide_atapi_pc pc;
271 void __user *argp = (void __user *)arg; 268 void __user *argp = (void __user *)arg;
272 int err; 269 int err;
diff --git a/drivers/ide/ide-floppy_proc.c b/drivers/ide/ide-floppy_proc.c
index 76f0c6c4eca3..3ec762cb60ab 100644
--- a/drivers/ide/ide-floppy_proc.c
+++ b/drivers/ide/ide-floppy_proc.c
@@ -9,7 +9,7 @@ static int proc_idefloppy_read_capacity(char *page, char **start, off_t off,
9 ide_drive_t*drive = (ide_drive_t *)data; 9 ide_drive_t*drive = (ide_drive_t *)data;
10 int len; 10 int len;
11 11
12 len = sprintf(page, "%llu\n", (long long)ide_floppy_capacity(drive)); 12 len = sprintf(page, "%llu\n", (long long)ide_gd_capacity(drive));
13 PROC_IDE_READ_RETURN(page, start, off, count, eof, len); 13 PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
14} 14}
15 15
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
new file mode 100644
index 000000000000..d44898f46c33
--- /dev/null
+++ b/drivers/ide/ide-gd.c
@@ -0,0 +1,398 @@
1#include <linux/module.h>
2#include <linux/types.h>
3#include <linux/string.h>
4#include <linux/kernel.h>
5#include <linux/errno.h>
6#include <linux/genhd.h>
7#include <linux/mutex.h>
8#include <linux/ide.h>
9#include <linux/hdreg.h>
10
11#if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT)
12#define IDE_DISK_MINORS (1 << PARTN_BITS)
13#else
14#define IDE_DISK_MINORS 0
15#endif
16
17#include "ide-disk.h"
18#include "ide-floppy.h"
19
20#define IDE_GD_VERSION "1.18"
21
22/* module parameters */
23static unsigned long debug_mask;
24module_param(debug_mask, ulong, 0644);
25
26static DEFINE_MUTEX(ide_disk_ref_mutex);
27
28static void ide_disk_release(struct kref *);
29
30static struct ide_disk_obj *ide_disk_get(struct gendisk *disk)
31{
32 struct ide_disk_obj *idkp = NULL;
33
34 mutex_lock(&ide_disk_ref_mutex);
35 idkp = ide_drv_g(disk, ide_disk_obj);
36 if (idkp) {
37 if (ide_device_get(idkp->drive))
38 idkp = NULL;
39 else
40 kref_get(&idkp->kref);
41 }
42 mutex_unlock(&ide_disk_ref_mutex);
43 return idkp;
44}
45
46static void ide_disk_put(struct ide_disk_obj *idkp)
47{
48 ide_drive_t *drive = idkp->drive;
49
50 mutex_lock(&ide_disk_ref_mutex);
51 kref_put(&idkp->kref, ide_disk_release);
52 ide_device_put(drive);
53 mutex_unlock(&ide_disk_ref_mutex);
54}
55
56sector_t ide_gd_capacity(ide_drive_t *drive)
57{
58 return drive->capacity64;
59}
60
61static int ide_gd_probe(ide_drive_t *);
62
63static void ide_gd_remove(ide_drive_t *drive)
64{
65 struct ide_disk_obj *idkp = drive->driver_data;
66 struct gendisk *g = idkp->disk;
67
68 ide_proc_unregister_driver(drive, idkp->driver);
69
70 del_gendisk(g);
71
72 drive->disk_ops->flush(drive);
73
74 ide_disk_put(idkp);
75}
76
77static void ide_disk_release(struct kref *kref)
78{
79 struct ide_disk_obj *idkp = to_ide_drv(kref, ide_disk_obj);
80 ide_drive_t *drive = idkp->drive;
81 struct gendisk *g = idkp->disk;
82
83 drive->disk_ops = NULL;
84 drive->driver_data = NULL;
85 g->private_data = NULL;
86 put_disk(g);
87 kfree(idkp);
88}
89
90/*
91 * On HPA drives the capacity needs to be
92 * reinitilized on resume otherwise the disk
93 * can not be used and a hard reset is required
94 */
95static void ide_gd_resume(ide_drive_t *drive)
96{
97 if (ata_id_hpa_enabled(drive->id))
98 (void)drive->disk_ops->get_capacity(drive);
99}
100
101static void ide_gd_shutdown(ide_drive_t *drive)
102{
103#ifdef CONFIG_ALPHA
104 /* On Alpha, halt(8) doesn't actually turn the machine off,
105 it puts you into the sort of firmware monitor. Typically,
106 it's used to boot another kernel image, so it's not much
107 different from reboot(8). Therefore, we don't need to
108 spin down the disk in this case, especially since Alpha
109 firmware doesn't handle disks in standby mode properly.
110 On the other hand, it's reasonably safe to turn the power
111 off when the shutdown process reaches the firmware prompt,
112 as the firmware initialization takes rather long time -
113 at least 10 seconds, which should be sufficient for
114 the disk to expire its write cache. */
115 if (system_state != SYSTEM_POWER_OFF) {
116#else
117 if (system_state == SYSTEM_RESTART) {
118#endif
119 drive->disk_ops->flush(drive);
120 return;
121 }
122
123 printk(KERN_INFO "Shutdown: %s\n", drive->name);
124
125 drive->gendev.bus->suspend(&drive->gendev, PMSG_SUSPEND);
126}
127
128#ifdef CONFIG_IDE_PROC_FS
129static ide_proc_entry_t *ide_disk_proc_entries(ide_drive_t *drive)
130{
131 return (drive->media == ide_disk) ? ide_disk_proc : ide_floppy_proc;
132}
133
134static const struct ide_proc_devset *ide_disk_proc_devsets(ide_drive_t *drive)
135{
136 return (drive->media == ide_disk) ? ide_disk_settings
137 : ide_floppy_settings;
138}
139#endif
140
141static ide_startstop_t ide_gd_do_request(ide_drive_t *drive,
142 struct request *rq, sector_t sector)
143{
144 return drive->disk_ops->do_request(drive, rq, sector);
145}
146
147static int ide_gd_end_request(ide_drive_t *drive, int uptodate, int nrsecs)
148{
149 return drive->disk_ops->end_request(drive, uptodate, nrsecs);
150}
151
152static ide_driver_t ide_gd_driver = {
153 .gen_driver = {
154 .owner = THIS_MODULE,
155 .name = "ide-gd",
156 .bus = &ide_bus_type,
157 },
158 .probe = ide_gd_probe,
159 .remove = ide_gd_remove,
160 .resume = ide_gd_resume,
161 .shutdown = ide_gd_shutdown,
162 .version = IDE_GD_VERSION,
163 .do_request = ide_gd_do_request,
164 .end_request = ide_gd_end_request,
165 .error = __ide_error,
166#ifdef CONFIG_IDE_PROC_FS
167 .proc_entries = ide_disk_proc_entries,
168 .proc_devsets = ide_disk_proc_devsets,
169#endif
170};
171
172static int ide_gd_open(struct inode *inode, struct file *filp)
173{
174 struct gendisk *disk = inode->i_bdev->bd_disk;
175 struct ide_disk_obj *idkp;
176 ide_drive_t *drive;
177 int ret = 0;
178
179 idkp = ide_disk_get(disk);
180 if (idkp == NULL)
181 return -ENXIO;
182
183 drive = idkp->drive;
184
185 ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
186
187 idkp->openers++;
188
189 if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1) {
190 drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
191 /* Just in case */
192
193 ret = drive->disk_ops->init_media(drive, disk);
194
195 /*
196 * Allow O_NDELAY to open a drive without a disk, or with an
197 * unreadable disk, so that we can get the format capacity
198 * of the drive or begin the format - Sam
199 */
200 if (ret && (filp->f_flags & O_NDELAY) == 0) {
201 ret = -EIO;
202 goto out_put_idkp;
203 }
204
205 if ((drive->dev_flags & IDE_DFLAG_WP) && (filp->f_mode & 2)) {
206 ret = -EROFS;
207 goto out_put_idkp;
208 }
209
210 /*
211 * Ignore the return code from door_lock,
212 * since the open() has already succeeded,
213 * and the door_lock is irrelevant at this point.
214 */
215 drive->disk_ops->set_doorlock(drive, disk, 1);
216 drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
217 check_disk_change(inode->i_bdev);
218 } else if (drive->dev_flags & IDE_DFLAG_FORMAT_IN_PROGRESS) {
219 ret = -EBUSY;
220 goto out_put_idkp;
221 }
222 return 0;
223
224out_put_idkp:
225 idkp->openers--;
226 ide_disk_put(idkp);
227 return ret;
228}
229
230static int ide_gd_release(struct inode *inode, struct file *filp)
231{
232 struct gendisk *disk = inode->i_bdev->bd_disk;
233 struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
234 ide_drive_t *drive = idkp->drive;
235
236 ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
237
238 if (idkp->openers == 1)
239 drive->disk_ops->flush(drive);
240
241 if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1) {
242 drive->disk_ops->set_doorlock(drive, disk, 0);
243 drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
244 }
245
246 idkp->openers--;
247
248 ide_disk_put(idkp);
249
250 return 0;
251}
252
253static int ide_gd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
254{
255 struct ide_disk_obj *idkp = ide_drv_g(bdev->bd_disk, ide_disk_obj);
256 ide_drive_t *drive = idkp->drive;
257
258 geo->heads = drive->bios_head;
259 geo->sectors = drive->bios_sect;
260 geo->cylinders = (u16)drive->bios_cyl; /* truncate */
261 return 0;
262}
263
264static int ide_gd_media_changed(struct gendisk *disk)
265{
266 struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
267 ide_drive_t *drive = idkp->drive;
268 int ret;
269
270 /* do not scan partitions twice if this is a removable device */
271 if (drive->dev_flags & IDE_DFLAG_ATTACH) {
272 drive->dev_flags &= ~IDE_DFLAG_ATTACH;
273 return 0;
274 }
275
276 ret = !!(drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED);
277 drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED;
278
279 return ret;
280}
281
282static int ide_gd_revalidate_disk(struct gendisk *disk)
283{
284 struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
285 set_capacity(disk, ide_gd_capacity(idkp->drive));
286 return 0;
287}
288
289static int ide_gd_ioctl(struct inode *inode, struct file *file,
290 unsigned int cmd, unsigned long arg)
291{
292 struct block_device *bdev = inode->i_bdev;
293 struct ide_disk_obj *idkp = ide_drv_g(bdev->bd_disk, ide_disk_obj);
294 ide_drive_t *drive = idkp->drive;
295
296 return drive->disk_ops->ioctl(drive, inode, file, cmd, arg);
297}
298
299static struct block_device_operations ide_gd_ops = {
300 .owner = THIS_MODULE,
301 .open = ide_gd_open,
302 .release = ide_gd_release,
303 .ioctl = ide_gd_ioctl,
304 .getgeo = ide_gd_getgeo,
305 .media_changed = ide_gd_media_changed,
306 .revalidate_disk = ide_gd_revalidate_disk
307};
308
309static int ide_gd_probe(ide_drive_t *drive)
310{
311 const struct ide_disk_ops *disk_ops = NULL;
312 struct ide_disk_obj *idkp;
313 struct gendisk *g;
314
315 /* strstr("foo", "") is non-NULL */
316 if (!strstr("ide-gd", drive->driver_req))
317 goto failed;
318
319#ifdef CONFIG_IDE_GD_ATA
320 if (drive->media == ide_disk)
321 disk_ops = &ide_ata_disk_ops;
322#endif
323#ifdef CONFIG_IDE_GD_ATAPI
324 if (drive->media == ide_floppy)
325 disk_ops = &ide_atapi_disk_ops;
326#endif
327 if (disk_ops == NULL)
328 goto failed;
329
330 if (disk_ops->check(drive, DRV_NAME) == 0) {
331 printk(KERN_ERR PFX "%s: not supported by this driver\n",
332 drive->name);
333 goto failed;
334 }
335
336 idkp = kzalloc(sizeof(*idkp), GFP_KERNEL);
337 if (!idkp) {
338 printk(KERN_ERR PFX "%s: can't allocate a disk structure\n",
339 drive->name);
340 goto failed;
341 }
342
343 g = alloc_disk_node(IDE_DISK_MINORS, hwif_to_node(drive->hwif));
344 if (!g)
345 goto out_free_idkp;
346
347 ide_init_disk(g, drive);
348
349 kref_init(&idkp->kref);
350
351 idkp->drive = drive;
352 idkp->driver = &ide_gd_driver;
353 idkp->disk = g;
354
355 g->private_data = &idkp->driver;
356
357 drive->driver_data = idkp;
358 drive->debug_mask = debug_mask;
359 drive->disk_ops = disk_ops;
360
361 disk_ops->setup(drive);
362
363 set_capacity(g, ide_gd_capacity(drive));
364
365 g->minors = IDE_DISK_MINORS;
366 g->driverfs_dev = &drive->gendev;
367 g->flags |= GENHD_FL_EXT_DEVT;
368 if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
369 g->flags = GENHD_FL_REMOVABLE;
370 g->fops = &ide_gd_ops;
371 add_disk(g);
372 return 0;
373
374out_free_idkp:
375 kfree(idkp);
376failed:
377 return -ENODEV;
378}
379
380static int __init ide_gd_init(void)
381{
382 printk(KERN_INFO DRV_NAME " driver " IDE_GD_VERSION "\n");
383 return driver_register(&ide_gd_driver.gen_driver);
384}
385
386static void __exit ide_gd_exit(void)
387{
388 driver_unregister(&ide_gd_driver.gen_driver);
389}
390
391MODULE_ALIAS("ide:*m-disk*");
392MODULE_ALIAS("ide-disk");
393MODULE_ALIAS("ide:*m-floppy*");
394MODULE_ALIAS("ide-floppy");
395module_init(ide_gd_init);
396module_exit(ide_gd_exit);
397MODULE_LICENSE("GPL");
398MODULE_DESCRIPTION("generic ATA/ATAPI disk driver");
diff --git a/drivers/ide/ide-gd.h b/drivers/ide/ide-gd.h
new file mode 100644
index 000000000000..7d3d101713e0
--- /dev/null
+++ b/drivers/ide/ide-gd.h
@@ -0,0 +1,44 @@
1#ifndef __IDE_GD_H
2#define __IDE_GD_H
3
4#define DRV_NAME "ide-gd"
5#define PFX DRV_NAME ": "
6
7/* define to see debug info */
8#define IDE_GD_DEBUG_LOG 0
9
10#if IDE_GD_DEBUG_LOG
11#define ide_debug_log(lvl, fmt, args...) __ide_debug_log(lvl, fmt, args)
12#else
13#define ide_debug_log(lvl, fmt, args...) do {} while (0)
14#endif
15
16struct ide_disk_obj {
17 ide_drive_t *drive;
18 ide_driver_t *driver;
19 struct gendisk *disk;
20 struct kref kref;
21 unsigned int openers; /* protected by BKL for now */
22
23 /* Last failed packet command */
24 struct ide_atapi_pc *failed_pc;
25 /* used for blk_{fs,pc}_request() requests */
26 struct ide_atapi_pc queued_pc;
27
28 /* Last error information */
29 u8 sense_key, asc, ascq;
30
31 int progress_indication;
32
33 /* Device information */
34 /* Current format */
35 int blocks, block_size, bs_factor;
36 /* Last format capacity descriptor */
37 u8 cap_desc[8];
38 /* Copy of the flexible disk page */
39 u8 flexible_disk_page[32];
40};
41
42sector_t ide_gd_capacity(ide_drive_t *);
43
44#endif /* __IDE_GD_H */
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index b762deb2dacb..bb7a1ed8094e 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -755,7 +755,7 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed)
755 755
756 udelay(1); 756 udelay(1);
757 SELECT_DRIVE(drive); 757 SELECT_DRIVE(drive);
758 SELECT_MASK(drive, 0); 758 SELECT_MASK(drive, 1);
759 udelay(1); 759 udelay(1);
760 tp_ops->set_irq(hwif, 0); 760 tp_ops->set_irq(hwif, 0);
761 761
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 19f8c7770a25..1649ea54f76c 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -208,6 +208,7 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd)
208 drive->ready_stat = 0; 208 drive->ready_stat = 0;
209 if (ata_id_cdb_intr(id)) 209 if (ata_id_cdb_intr(id))
210 drive->atapi_flags |= IDE_AFLAG_DRQ_INTERRUPT; 210 drive->atapi_flags |= IDE_AFLAG_DRQ_INTERRUPT;
211 drive->dev_flags |= IDE_DFLAG_DOORLOCKING;
211 /* we don't do head unloading on ATAPI devices */ 212 /* we don't do head unloading on ATAPI devices */
212 drive->dev_flags |= IDE_DFLAG_NO_UNLOAD; 213 drive->dev_flags |= IDE_DFLAG_NO_UNLOAD;
213 return; 214 return;
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index b26926487cc0..c31d0dd7a532 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -567,10 +567,10 @@ static void ide_remove_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t
567void ide_proc_register_driver(ide_drive_t *drive, ide_driver_t *driver) 567void ide_proc_register_driver(ide_drive_t *drive, ide_driver_t *driver)
568{ 568{
569 mutex_lock(&ide_setting_mtx); 569 mutex_lock(&ide_setting_mtx);
570 drive->settings = driver->settings; 570 drive->settings = driver->proc_devsets(drive);
571 mutex_unlock(&ide_setting_mtx); 571 mutex_unlock(&ide_setting_mtx);
572 572
573 ide_add_proc_entries(drive->proc, driver->proc, drive); 573 ide_add_proc_entries(drive->proc, driver->proc_entries(drive), drive);
574} 574}
575 575
576EXPORT_SYMBOL(ide_proc_register_driver); 576EXPORT_SYMBOL(ide_proc_register_driver);
@@ -591,7 +591,7 @@ void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver)
591{ 591{
592 unsigned long flags; 592 unsigned long flags;
593 593
594 ide_remove_proc_entries(drive->proc, driver->proc); 594 ide_remove_proc_entries(drive->proc, driver->proc_entries(drive));
595 595
596 mutex_lock(&ide_setting_mtx); 596 mutex_lock(&ide_setting_mtx);
597 spin_lock_irqsave(&ide_lock, flags); 597 spin_lock_irqsave(&ide_lock, flags);
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index d879c7797cde..b2b2e5e8d38e 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -2108,7 +2108,7 @@ static void idetape_get_mode_sense_results(ide_drive_t *drive)
2108 2108
2109 /* device lacks locking support according to capabilities page */ 2109 /* device lacks locking support according to capabilities page */
2110 if ((caps[6] & 1) == 0) 2110 if ((caps[6] & 1) == 0)
2111 drive->atapi_flags |= IDE_AFLAG_NO_DOORLOCK; 2111 drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
2112 2112
2113 if (caps[7] & 0x02) 2113 if (caps[7] & 0x02)
2114 tape->blk_size = 512; 2114 tape->blk_size = 512;
@@ -2298,6 +2298,16 @@ static ide_proc_entry_t idetape_proc[] = {
2298 { "name", S_IFREG|S_IRUGO, proc_idetape_read_name, NULL }, 2298 { "name", S_IFREG|S_IRUGO, proc_idetape_read_name, NULL },
2299 { NULL, 0, NULL, NULL } 2299 { NULL, 0, NULL, NULL }
2300}; 2300};
2301
2302static ide_proc_entry_t *ide_tape_proc_entries(ide_drive_t *drive)
2303{
2304 return idetape_proc;
2305}
2306
2307static const struct ide_proc_devset *ide_tape_proc_devsets(ide_drive_t *drive)
2308{
2309 return idetape_settings;
2310}
2301#endif 2311#endif
2302 2312
2303static int ide_tape_probe(ide_drive_t *); 2313static int ide_tape_probe(ide_drive_t *);
@@ -2315,8 +2325,8 @@ static ide_driver_t idetape_driver = {
2315 .end_request = idetape_end_request, 2325 .end_request = idetape_end_request,
2316 .error = __ide_error, 2326 .error = __ide_error,
2317#ifdef CONFIG_IDE_PROC_FS 2327#ifdef CONFIG_IDE_PROC_FS
2318 .proc = idetape_proc, 2328 .proc_entries = ide_tape_proc_entries,
2319 .settings = idetape_settings, 2329 .proc_devsets = ide_tape_proc_devsets,
2320#endif 2330#endif
2321}; 2331};
2322 2332
diff --git a/drivers/ide/pci/Makefile b/drivers/ide/pci/Makefile
index 02e6ee7d751d..ab44a1f5f5a9 100644
--- a/drivers/ide/pci/Makefile
+++ b/drivers/ide/pci/Makefile
@@ -11,7 +11,6 @@ obj-$(CONFIG_BLK_DEV_CS5535) += cs5535.o
11obj-$(CONFIG_BLK_DEV_SC1200) += sc1200.o 11obj-$(CONFIG_BLK_DEV_SC1200) += sc1200.o
12obj-$(CONFIG_BLK_DEV_CY82C693) += cy82c693.o 12obj-$(CONFIG_BLK_DEV_CY82C693) += cy82c693.o
13obj-$(CONFIG_BLK_DEV_DELKIN) += delkin_cb.o 13obj-$(CONFIG_BLK_DEV_DELKIN) += delkin_cb.o
14obj-$(CONFIG_BLK_DEV_HPT34X) += hpt34x.o
15obj-$(CONFIG_BLK_DEV_HPT366) += hpt366.o 14obj-$(CONFIG_BLK_DEV_HPT366) += hpt366.o
16obj-$(CONFIG_BLK_DEV_IT8213) += it8213.o 15obj-$(CONFIG_BLK_DEV_IT8213) += it8213.o
17obj-$(CONFIG_BLK_DEV_IT821X) += it821x.o 16obj-$(CONFIG_BLK_DEV_IT821X) += it821x.o
diff --git a/drivers/ide/pci/delkin_cb.c b/drivers/ide/pci/delkin_cb.c
index 8689a706f537..8f1b2d9f0513 100644
--- a/drivers/ide/pci/delkin_cb.c
+++ b/drivers/ide/pci/delkin_cb.c
@@ -46,10 +46,27 @@ static const struct ide_port_ops delkin_cb_port_ops = {
46 .quirkproc = ide_undecoded_slave, 46 .quirkproc = ide_undecoded_slave,
47}; 47};
48 48
49static unsigned int delkin_cb_init_chipset(struct pci_dev *dev)
50{
51 unsigned long base = pci_resource_start(dev, 0);
52 int i;
53
54 outb(0x02, base + 0x1e); /* set nIEN to block interrupts */
55 inb(base + 0x17); /* read status to clear interrupts */
56
57 for (i = 0; i < sizeof(setup); ++i) {
58 if (setup[i])
59 outb(setup[i], base + i);
60 }
61
62 return 0;
63}
64
49static const struct ide_port_info delkin_cb_port_info = { 65static const struct ide_port_info delkin_cb_port_info = {
50 .port_ops = &delkin_cb_port_ops, 66 .port_ops = &delkin_cb_port_ops,
51 .host_flags = IDE_HFLAG_IO_32BIT | IDE_HFLAG_UNMASK_IRQS | 67 .host_flags = IDE_HFLAG_IO_32BIT | IDE_HFLAG_UNMASK_IRQS |
52 IDE_HFLAG_NO_DMA, 68 IDE_HFLAG_NO_DMA,
69 .init_chipset = delkin_cb_init_chipset,
53}; 70};
54 71
55static int __devinit 72static int __devinit
@@ -57,7 +74,7 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id)
57{ 74{
58 struct ide_host *host; 75 struct ide_host *host;
59 unsigned long base; 76 unsigned long base;
60 int i, rc; 77 int rc;
61 hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; 78 hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
62 79
63 rc = pci_enable_device(dev); 80 rc = pci_enable_device(dev);
@@ -72,12 +89,8 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id)
72 return rc; 89 return rc;
73 } 90 }
74 base = pci_resource_start(dev, 0); 91 base = pci_resource_start(dev, 0);
75 outb(0x02, base + 0x1e); /* set nIEN to block interrupts */ 92
76 inb(base + 0x17); /* read status to clear interrupts */ 93 delkin_cb_init_chipset(dev);
77 for (i = 0; i < sizeof(setup); ++i) {
78 if (setup[i])
79 outb(setup[i], base + i);
80 }
81 94
82 memset(&hw, 0, sizeof(hw)); 95 memset(&hw, 0, sizeof(hw));
83 ide_std_init_ports(&hw, base + 0x10, base + 0x1e); 96 ide_std_init_ports(&hw, base + 0x10, base + 0x1e);
@@ -110,6 +123,40 @@ delkin_cb_remove (struct pci_dev *dev)
110 pci_disable_device(dev); 123 pci_disable_device(dev);
111} 124}
112 125
126#ifdef CONFIG_PM
127static int delkin_cb_suspend(struct pci_dev *dev, pm_message_t state)
128{
129 pci_save_state(dev);
130 pci_disable_device(dev);
131 pci_set_power_state(dev, pci_choose_state(dev, state));
132
133 return 0;
134}
135
136static int delkin_cb_resume(struct pci_dev *dev)
137{
138 struct ide_host *host = pci_get_drvdata(dev);
139 int rc;
140
141 pci_set_power_state(dev, PCI_D0);
142
143 rc = pci_enable_device(dev);
144 if (rc)
145 return rc;
146
147 pci_restore_state(dev);
148 pci_set_master(dev);
149
150 if (host->init_chipset)
151 host->init_chipset(dev);
152
153 return 0;
154}
155#else
156#define delkin_cb_suspend NULL
157#define delkin_cb_resume NULL
158#endif
159
113static struct pci_device_id delkin_cb_pci_tbl[] __devinitdata = { 160static struct pci_device_id delkin_cb_pci_tbl[] __devinitdata = {
114 { 0x1145, 0xf021, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, 161 { 0x1145, 0xf021, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
115 { 0x1145, 0xf024, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, 162 { 0x1145, 0xf024, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
@@ -122,6 +169,8 @@ static struct pci_driver delkin_cb_pci_driver = {
122 .id_table = delkin_cb_pci_tbl, 169 .id_table = delkin_cb_pci_tbl,
123 .probe = delkin_cb_probe, 170 .probe = delkin_cb_probe,
124 .remove = delkin_cb_remove, 171 .remove = delkin_cb_remove,
172 .suspend = delkin_cb_suspend,
173 .resume = delkin_cb_resume,
125}; 174};
126 175
127static int __init delkin_cb_init(void) 176static int __init delkin_cb_init(void)
diff --git a/drivers/ide/pci/hpt34x.c b/drivers/ide/pci/hpt34x.c
deleted file mode 100644
index fb1a3aa57f07..000000000000
--- a/drivers/ide/pci/hpt34x.c
+++ /dev/null
@@ -1,193 +0,0 @@
1/*
2 * Copyright (C) 1998-2000 Andre Hedrick <andre@linux-ide.org>
3 *
4 * May be copied or modified under the terms of the GNU General Public License
5 *
6 *
7 * 00:12.0 Unknown mass storage controller:
8 * Triones Technologies, Inc.
9 * Unknown device 0003 (rev 01)
10 *
11 * hde: UDMA 2 (0x0000 0x0002) (0x0000 0x0010)
12 * hdf: UDMA 2 (0x0002 0x0012) (0x0010 0x0030)
13 * hde: DMA 2 (0x0000 0x0002) (0x0000 0x0010)
14 * hdf: DMA 2 (0x0002 0x0012) (0x0010 0x0030)
15 * hdg: DMA 1 (0x0012 0x0052) (0x0030 0x0070)
16 * hdh: DMA 1 (0x0052 0x0252) (0x0070 0x00f0)
17 *
18 * ide-pci.c reference
19 *
20 * Since there are two cards that report almost identically,
21 * the only discernable difference is the values reported in pcicmd.
22 * Booting-BIOS card or HPT363 :: pcicmd == 0x07
23 * Non-bootable card or HPT343 :: pcicmd == 0x05
24 */
25
26#include <linux/module.h>
27#include <linux/types.h>
28#include <linux/kernel.h>
29#include <linux/ioport.h>
30#include <linux/interrupt.h>
31#include <linux/pci.h>
32#include <linux/init.h>
33#include <linux/ide.h>
34
35#define DRV_NAME "hpt34x"
36
37#define HPT343_DEBUG_DRIVE_INFO 0
38
39static void hpt34x_set_mode(ide_drive_t *drive, const u8 speed)
40{
41 struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
42 u32 reg1= 0, tmp1 = 0, reg2 = 0, tmp2 = 0;
43 u8 hi_speed, lo_speed;
44
45 hi_speed = speed >> 4;
46 lo_speed = speed & 0x0f;
47
48 if (hi_speed & 7) {
49 hi_speed = (hi_speed & 4) ? 0x01 : 0x10;
50 } else {
51 lo_speed <<= 5;
52 lo_speed >>= 5;
53 }
54
55 pci_read_config_dword(dev, 0x44, &reg1);
56 pci_read_config_dword(dev, 0x48, &reg2);
57 tmp1 = ((lo_speed << (3*drive->dn)) | (reg1 & ~(7 << (3*drive->dn))));
58 tmp2 = ((hi_speed << drive->dn) | (reg2 & ~(0x11 << drive->dn)));
59 pci_write_config_dword(dev, 0x44, tmp1);
60 pci_write_config_dword(dev, 0x48, tmp2);
61
62#if HPT343_DEBUG_DRIVE_INFO
63 printk("%s: %s drive%d (0x%04x 0x%04x) (0x%04x 0x%04x)" \
64 " (0x%02x 0x%02x)\n",
65 drive->name, ide_xfer_verbose(speed),
66 drive->dn, reg1, tmp1, reg2, tmp2,
67 hi_speed, lo_speed);
68#endif /* HPT343_DEBUG_DRIVE_INFO */
69}
70
71static void hpt34x_set_pio_mode(ide_drive_t *drive, const u8 pio)
72{
73 hpt34x_set_mode(drive, XFER_PIO_0 + pio);
74}
75
76/*
77 * If the BIOS does not set the IO base addaress to XX00, 343 will fail.
78 */
79#define HPT34X_PCI_INIT_REG 0x80
80
81static unsigned int init_chipset_hpt34x(struct pci_dev *dev)
82{
83 int i = 0;
84 unsigned long hpt34xIoBase = pci_resource_start(dev, 4);
85 unsigned long hpt_addr[4] = { 0x20, 0x34, 0x28, 0x3c };
86 unsigned long hpt_addr_len[4] = { 7, 3, 7, 3 };
87 u16 cmd;
88 unsigned long flags;
89
90 local_irq_save(flags);
91
92 pci_write_config_byte(dev, HPT34X_PCI_INIT_REG, 0x00);
93 pci_read_config_word(dev, PCI_COMMAND, &cmd);
94
95 if (cmd & PCI_COMMAND_MEMORY)
96 pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0xF0);
97 else
98 pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x20);
99
100 /*
101 * Since 20-23 can be assigned and are R/W, we correct them.
102 */
103 pci_write_config_word(dev, PCI_COMMAND, cmd & ~PCI_COMMAND_IO);
104 for(i=0; i<4; i++) {
105 dev->resource[i].start = (hpt34xIoBase + hpt_addr[i]);
106 dev->resource[i].end = dev->resource[i].start + hpt_addr_len[i];
107 dev->resource[i].flags = IORESOURCE_IO;
108 pci_write_config_dword(dev,
109 (PCI_BASE_ADDRESS_0 + (i * 4)),
110 dev->resource[i].start);
111 }
112 pci_write_config_word(dev, PCI_COMMAND, cmd);
113
114 local_irq_restore(flags);
115
116 return dev->irq;
117}
118
119static const struct ide_port_ops hpt34x_port_ops = {
120 .set_pio_mode = hpt34x_set_pio_mode,
121 .set_dma_mode = hpt34x_set_mode,
122};
123
124#define IDE_HFLAGS_HPT34X \
125 (IDE_HFLAG_NO_ATAPI_DMA | \
126 IDE_HFLAG_NO_DSC | \
127 IDE_HFLAG_NO_AUTODMA)
128
129static const struct ide_port_info hpt34x_chipsets[] __devinitdata = {
130 { /* 0: HPT343 */
131 .name = DRV_NAME,
132 .init_chipset = init_chipset_hpt34x,
133 .port_ops = &hpt34x_port_ops,
134 .host_flags = IDE_HFLAGS_HPT34X | IDE_HFLAG_NON_BOOTABLE,
135 .pio_mask = ATA_PIO5,
136 },
137 { /* 1: HPT345 */
138 .name = DRV_NAME,
139 .init_chipset = init_chipset_hpt34x,
140 .port_ops = &hpt34x_port_ops,
141 .host_flags = IDE_HFLAGS_HPT34X | IDE_HFLAG_OFF_BOARD,
142 .pio_mask = ATA_PIO5,
143#ifdef CONFIG_HPT34X_AUTODMA
144 .swdma_mask = ATA_SWDMA2,
145 .mwdma_mask = ATA_MWDMA2,
146 .udma_mask = ATA_UDMA2,
147#endif
148 }
149};
150
151static int __devinit hpt34x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
152{
153 const struct ide_port_info *d;
154 u16 pcicmd = 0;
155
156 pci_read_config_word(dev, PCI_COMMAND, &pcicmd);
157
158 d = &hpt34x_chipsets[(pcicmd & PCI_COMMAND_MEMORY) ? 1 : 0];
159
160 return ide_pci_init_one(dev, d, NULL);
161}
162
163static const struct pci_device_id hpt34x_pci_tbl[] = {
164 { PCI_VDEVICE(TTI, PCI_DEVICE_ID_TTI_HPT343), 0 },
165 { 0, },
166};
167MODULE_DEVICE_TABLE(pci, hpt34x_pci_tbl);
168
169static struct pci_driver hpt34x_pci_driver = {
170 .name = "HPT34x_IDE",
171 .id_table = hpt34x_pci_tbl,
172 .probe = hpt34x_init_one,
173 .remove = ide_pci_remove,
174 .suspend = ide_pci_suspend,
175 .resume = ide_pci_resume,
176};
177
178static int __init hpt34x_ide_init(void)
179{
180 return ide_pci_register_driver(&hpt34x_pci_driver);
181}
182
183static void __exit hpt34x_ide_exit(void)
184{
185 pci_unregister_driver(&hpt34x_pci_driver);
186}
187
188module_init(hpt34x_ide_init);
189module_exit(hpt34x_ide_exit);
190
191MODULE_AUTHOR("Andre Hedrick");
192MODULE_DESCRIPTION("PCI driver module for Highpoint 34x IDE");
193MODULE_LICENSE("GPL");
diff --git a/drivers/ide/pci/hpt366.c b/drivers/ide/pci/hpt366.c
index 9cf171cb9376..a7909e9c720e 100644
--- a/drivers/ide/pci/hpt366.c
+++ b/drivers/ide/pci/hpt366.c
@@ -3,7 +3,7 @@
3 * Portions Copyright (C) 2001 Sun Microsystems, Inc. 3 * Portions Copyright (C) 2001 Sun Microsystems, Inc.
4 * Portions Copyright (C) 2003 Red Hat Inc 4 * Portions Copyright (C) 2003 Red Hat Inc
5 * Portions Copyright (C) 2007 Bartlomiej Zolnierkiewicz 5 * Portions Copyright (C) 2007 Bartlomiej Zolnierkiewicz
6 * Portions Copyright (C) 2005-2007 MontaVista Software, Inc. 6 * Portions Copyright (C) 2005-2008 MontaVista Software, Inc.
7 * 7 *
8 * Thanks to HighPoint Technologies for their assistance, and hardware. 8 * Thanks to HighPoint Technologies for their assistance, and hardware.
9 * Special Thanks to Jon Burchmore in SanDiego for the deep pockets, his 9 * Special Thanks to Jon Burchmore in SanDiego for the deep pockets, his
@@ -748,26 +748,24 @@ static void hpt3xx_maskproc(ide_drive_t *drive, int mask)
748 struct pci_dev *dev = to_pci_dev(hwif->dev); 748 struct pci_dev *dev = to_pci_dev(hwif->dev);
749 struct hpt_info *info = hpt3xx_get_info(hwif->dev); 749 struct hpt_info *info = hpt3xx_get_info(hwif->dev);
750 750
751 if (drive->quirk_list) { 751 if (drive->quirk_list == 0)
752 if (info->chip_type >= HPT370) { 752 return;
753 u8 scr1 = 0; 753
754 754 if (info->chip_type >= HPT370) {
755 pci_read_config_byte(dev, 0x5a, &scr1); 755 u8 scr1 = 0;
756 if (((scr1 & 0x10) >> 4) != mask) { 756
757 if (mask) 757 pci_read_config_byte(dev, 0x5a, &scr1);
758 scr1 |= 0x10; 758 if (((scr1 & 0x10) >> 4) != mask) {
759 else
760 scr1 &= ~0x10;
761 pci_write_config_byte(dev, 0x5a, scr1);
762 }
763 } else {
764 if (mask) 759 if (mask)
765 disable_irq(hwif->irq); 760 scr1 |= 0x10;
766 else 761 else
767 enable_irq (hwif->irq); 762 scr1 &= ~0x10;
763 pci_write_config_byte(dev, 0x5a, scr1);
768 } 764 }
769 } else 765 } else if (mask)
770 outb(ATA_DEVCTL_OBS | (mask ? 2 : 0), hwif->io_ports.ctl_addr); 766 disable_irq(hwif->irq);
767 else
768 enable_irq(hwif->irq);
771} 769}
772 770
773/* 771/*
@@ -1289,7 +1287,6 @@ static u8 hpt3xx_cable_detect(ide_hwif_t *hwif)
1289 1287
1290static void __devinit init_hwif_hpt366(ide_hwif_t *hwif) 1288static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
1291{ 1289{
1292 struct pci_dev *dev = to_pci_dev(hwif->dev);
1293 struct hpt_info *info = hpt3xx_get_info(hwif->dev); 1290 struct hpt_info *info = hpt3xx_get_info(hwif->dev);
1294 int serialize = HPT_SERIALIZE_IO; 1291 int serialize = HPT_SERIALIZE_IO;
1295 u8 chip_type = info->chip_type; 1292 u8 chip_type = info->chip_type;
diff --git a/drivers/ide/pci/scc_pata.c b/drivers/ide/pci/scc_pata.c
index 9ce1d8059921..49f163aa51e3 100644
--- a/drivers/ide/pci/scc_pata.c
+++ b/drivers/ide/pci/scc_pata.c
@@ -617,7 +617,6 @@ static int __devinit init_setup_scc(struct pci_dev *dev,
617 unsigned long intmask_port; 617 unsigned long intmask_port;
618 unsigned long mode_port; 618 unsigned long mode_port;
619 unsigned long ecmode_port; 619 unsigned long ecmode_port;
620 unsigned long dma_status_port;
621 u32 reg = 0; 620 u32 reg = 0;
622 struct scc_ports *ports; 621 struct scc_ports *ports;
623 int rc; 622 int rc;
@@ -637,7 +636,6 @@ static int __devinit init_setup_scc(struct pci_dev *dev,
637 intmask_port = dma_base + 0x010; 636 intmask_port = dma_base + 0x010;
638 mode_port = ctl_base + 0x024; 637 mode_port = ctl_base + 0x024;
639 ecmode_port = ctl_base + 0xf00; 638 ecmode_port = ctl_base + 0xf00;
640 dma_status_port = dma_base + 0x004;
641 639
642 /* controller initialization */ 640 /* controller initialization */
643 reg = 0; 641 reg = 0;
@@ -843,8 +841,6 @@ static u8 scc_cable_detect(ide_hwif_t *hwif)
843 841
844static void __devinit init_hwif_scc(ide_hwif_t *hwif) 842static void __devinit init_hwif_scc(ide_hwif_t *hwif)
845{ 843{
846 struct scc_ports *ports = ide_get_hwifdata(hwif);
847
848 /* PTERADD */ 844 /* PTERADD */
849 out_be32((void __iomem *)(hwif->dma_base + 0x018), hwif->dmatable_dma); 845 out_be32((void __iomem *)(hwif->dma_base + 0x018), hwif->dmatable_dma);
850 846
diff --git a/drivers/ide/pci/sgiioc4.c b/drivers/ide/pci/sgiioc4.c
index dd634541ce36..8af9b23499fd 100644
--- a/drivers/ide/pci/sgiioc4.c
+++ b/drivers/ide/pci/sgiioc4.c
@@ -101,18 +101,8 @@ sgiioc4_init_hwif_ports(hw_regs_t * hw, unsigned long data_port,
101 for (i = 0; i <= 7; i++) 101 for (i = 0; i <= 7; i++)
102 hw->io_ports_array[i] = reg + i * 4; 102 hw->io_ports_array[i] = reg + i * 4;
103 103
104 if (ctrl_port) 104 hw->io_ports.ctl_addr = ctrl_port;
105 hw->io_ports.ctl_addr = ctrl_port; 105 hw->io_ports.irq_addr = irq_port;
106
107 if (irq_port)
108 hw->io_ports.irq_addr = irq_port;
109}
110
111static void
112sgiioc4_maskproc(ide_drive_t * drive, int mask)
113{
114 writeb(ATA_DEVCTL_OBS | (mask ? 2 : 0),
115 (void __iomem *)drive->hwif->io_ports.ctl_addr);
116} 106}
117 107
118static int 108static int
@@ -310,16 +300,14 @@ static u8 sgiioc4_read_status(ide_hwif_t *hwif)
310 unsigned long port = hwif->io_ports.status_addr; 300 unsigned long port = hwif->io_ports.status_addr;
311 u8 reg = (u8) readb((void __iomem *) port); 301 u8 reg = (u8) readb((void __iomem *) port);
312 302
313 if ((port & 0xFFF) == 0x11C) { /* Status register of IOC4 */ 303 if (!(reg & ATA_BUSY)) { /* Not busy... check for interrupt */
314 if (!(reg & ATA_BUSY)) { /* Not busy... check for interrupt */ 304 unsigned long other_ir = port - 0x110;
315 unsigned long other_ir = port - 0x110; 305 unsigned int intr_reg = (u32) readl((void __iomem *) other_ir);
316 unsigned int intr_reg = (u32) readl((void __iomem *) other_ir);
317 306
318 /* Clear the Interrupt, Error bits on the IOC4 */ 307 /* Clear the Interrupt, Error bits on the IOC4 */
319 if (intr_reg & 0x03) { 308 if (intr_reg & 0x03) {
320 writel(0x03, (void __iomem *) other_ir); 309 writel(0x03, (void __iomem *) other_ir);
321 intr_reg = (u32) readl((void __iomem *) other_ir); 310 intr_reg = (u32) readl((void __iomem *) other_ir);
322 }
323 } 311 }
324 } 312 }
325 313
@@ -332,13 +320,9 @@ ide_dma_sgiioc4(ide_hwif_t *hwif, const struct ide_port_info *d)
332{ 320{
333 struct pci_dev *dev = to_pci_dev(hwif->dev); 321 struct pci_dev *dev = to_pci_dev(hwif->dev);
334 unsigned long dma_base = pci_resource_start(dev, 0) + IOC4_DMA_OFFSET; 322 unsigned long dma_base = pci_resource_start(dev, 0) + IOC4_DMA_OFFSET;
335 void __iomem *virt_dma_base;
336 int num_ports = sizeof (ioc4_dma_regs_t); 323 int num_ports = sizeof (ioc4_dma_regs_t);
337 void *pad; 324 void *pad;
338 325
339 if (dma_base == 0)
340 return -1;
341
342 printk(KERN_INFO " %s: MMIO-DMA\n", hwif->name); 326 printk(KERN_INFO " %s: MMIO-DMA\n", hwif->name);
343 327
344 if (request_mem_region(dma_base, num_ports, hwif->name) == NULL) { 328 if (request_mem_region(dma_base, num_ports, hwif->name) == NULL) {
@@ -348,14 +332,8 @@ ide_dma_sgiioc4(ide_hwif_t *hwif, const struct ide_port_info *d)
348 return -1; 332 return -1;
349 } 333 }
350 334
351 virt_dma_base = ioremap(dma_base, num_ports); 335 hwif->dma_base = (unsigned long)hwif->io_ports.irq_addr +
352 if (virt_dma_base == NULL) { 336 IOC4_DMA_OFFSET;
353 printk(KERN_ERR "%s(%s) -- ERROR: unable to map addresses "
354 "0x%lx to 0x%lx\n", __func__, hwif->name,
355 dma_base, dma_base + num_ports - 1);
356 goto dma_remap_failure;
357 }
358 hwif->dma_base = (unsigned long) virt_dma_base;
359 337
360 hwif->sg_max_nents = IOC4_PRD_ENTRIES; 338 hwif->sg_max_nents = IOC4_PRD_ENTRIES;
361 339
@@ -379,9 +357,6 @@ ide_dma_sgiioc4(ide_hwif_t *hwif, const struct ide_port_info *d)
379 printk(KERN_INFO "%s: changing from DMA to PIO mode", hwif->name); 357 printk(KERN_INFO "%s: changing from DMA to PIO mode", hwif->name);
380 358
381dma_pci_alloc_failure: 359dma_pci_alloc_failure:
382 iounmap(virt_dma_base);
383
384dma_remap_failure:
385 release_mem_region(dma_base, num_ports); 360 release_mem_region(dma_base, num_ports);
386 361
387 return -1; 362 return -1;
@@ -563,8 +538,6 @@ static const struct ide_port_ops sgiioc4_port_ops = {
563 .set_dma_mode = sgiioc4_set_dma_mode, 538 .set_dma_mode = sgiioc4_set_dma_mode,
564 /* reset DMA engine, clear IRQs */ 539 /* reset DMA engine, clear IRQs */
565 .resetproc = sgiioc4_resetproc, 540 .resetproc = sgiioc4_resetproc,
566 /* mask on/off NIEN register */
567 .maskproc = sgiioc4_maskproc,
568}; 541};
569 542
570static const struct ide_dma_ops sgiioc4_dma_ops = { 543static const struct ide_dma_ops sgiioc4_dma_ops = {
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index e3e40427e00e..c7ff1e11ea85 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -179,7 +179,7 @@ config LEDS_TRIGGER_TIMER
179 179
180config LEDS_TRIGGER_IDE_DISK 180config LEDS_TRIGGER_IDE_DISK
181 bool "LED IDE Disk Trigger" 181 bool "LED IDE Disk Trigger"
182 depends on LEDS_TRIGGERS && BLK_DEV_IDEDISK 182 depends on LEDS_TRIGGERS && IDE_GD_ATA
183 help 183 help
184 This allows LEDs to be controlled by IDE disk activity. 184 This allows LEDs to be controlled by IDE disk activity.
185 If unsure, say Y. 185 If unsure, say Y.
diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c
index ba5aa2008273..e4c0db4dc7b1 100644
--- a/drivers/mfd/asic3.c
+++ b/drivers/mfd/asic3.c
@@ -123,7 +123,7 @@ static void asic3_irq_demux(unsigned int irq, struct irq_desc *desc)
123 irqnr = asic->irq_base + 123 irqnr = asic->irq_base +
124 (ASIC3_GPIOS_PER_BANK * bank) 124 (ASIC3_GPIOS_PER_BANK * bank)
125 + i; 125 + i;
126 desc = irq_desc + irqnr; 126 desc = irq_to_desc(irqnr);
127 desc->handle_irq(irqnr, desc); 127 desc->handle_irq(irqnr, desc);
128 if (asic->irq_bothedge[bank] & bit) 128 if (asic->irq_bothedge[bank] & bit)
129 asic3_irq_flip_edge(asic, base, 129 asic3_irq_flip_edge(asic, base,
@@ -136,7 +136,7 @@ static void asic3_irq_demux(unsigned int irq, struct irq_desc *desc)
136 for (i = ASIC3_NUM_GPIOS; i < ASIC3_NR_IRQS; i++) { 136 for (i = ASIC3_NUM_GPIOS; i < ASIC3_NR_IRQS; i++) {
137 /* They start at bit 4 and go up */ 137 /* They start at bit 4 and go up */
138 if (status & (1 << (i - ASIC3_NUM_GPIOS + 4))) { 138 if (status & (1 << (i - ASIC3_NUM_GPIOS + 4))) {
139 desc = irq_desc + asic->irq_base + i; 139 desc = irq_to_desc(asic->irq_base + i);
140 desc->handle_irq(asic->irq_base + i, 140 desc->handle_irq(asic->irq_base + i,
141 desc); 141 desc);
142 } 142 }
diff --git a/drivers/mfd/htc-egpio.c b/drivers/mfd/htc-egpio.c
index 50dff6e0088d..1a4d04664d6d 100644
--- a/drivers/mfd/htc-egpio.c
+++ b/drivers/mfd/htc-egpio.c
@@ -112,7 +112,7 @@ static void egpio_handler(unsigned int irq, struct irq_desc *desc)
112 /* Run irq handler */ 112 /* Run irq handler */
113 pr_debug("got IRQ %d\n", irqpin); 113 pr_debug("got IRQ %d\n", irqpin);
114 irq = ei->irq_start + irqpin; 114 irq = ei->irq_start + irqpin;
115 desc = &irq_desc[irq]; 115 desc = irq_to_desc(irq);
116 desc->handle_irq(irq, desc); 116 desc->handle_irq(irq, desc);
117 } 117 }
118} 118}
diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c
index 491ee16da5c1..9ba295d9dd97 100644
--- a/drivers/net/3c59x.c
+++ b/drivers/net/3c59x.c
@@ -90,7 +90,7 @@ static int vortex_debug = 1;
90#include <linux/eisa.h> 90#include <linux/eisa.h>
91#include <linux/bitops.h> 91#include <linux/bitops.h>
92#include <linux/jiffies.h> 92#include <linux/jiffies.h>
93#include <asm/irq.h> /* For NR_IRQS only. */ 93#include <asm/irq.h> /* For nr_irqs only. */
94#include <asm/io.h> 94#include <asm/io.h>
95#include <asm/uaccess.h> 95#include <asm/uaccess.h>
96 96
@@ -1221,7 +1221,7 @@ static int __devinit vortex_probe1(struct device *gendev,
1221 if (print_info) 1221 if (print_info)
1222 printk(", IRQ %d\n", dev->irq); 1222 printk(", IRQ %d\n", dev->irq);
1223 /* Tell them about an invalid IRQ. */ 1223 /* Tell them about an invalid IRQ. */
1224 if (dev->irq <= 0 || dev->irq >= NR_IRQS) 1224 if (dev->irq <= 0 || dev->irq >= nr_irqs)
1225 printk(KERN_WARNING " *** Warning: IRQ %d is unlikely to work! ***\n", 1225 printk(KERN_WARNING " *** Warning: IRQ %d is unlikely to work! ***\n",
1226 dev->irq); 1226 dev->irq);
1227 1227
diff --git a/drivers/net/hamradio/baycom_ser_fdx.c b/drivers/net/hamradio/baycom_ser_fdx.c
index 17ac6975d70d..b6a816e60c0f 100644
--- a/drivers/net/hamradio/baycom_ser_fdx.c
+++ b/drivers/net/hamradio/baycom_ser_fdx.c
@@ -416,10 +416,10 @@ static int ser12_open(struct net_device *dev)
416 if (!dev || !bc) 416 if (!dev || !bc)
417 return -ENXIO; 417 return -ENXIO;
418 if (!dev->base_addr || dev->base_addr > 0xffff-SER12_EXTENT || 418 if (!dev->base_addr || dev->base_addr > 0xffff-SER12_EXTENT ||
419 dev->irq < 2 || dev->irq > NR_IRQS) { 419 dev->irq < 2 || dev->irq > nr_irqs) {
420 printk(KERN_INFO "baycom_ser_fdx: invalid portnumber (max %u) " 420 printk(KERN_INFO "baycom_ser_fdx: invalid portnumber (max %u) "
421 "or irq (2 <= irq <= %d)\n", 421 "or irq (2 <= irq <= %d)\n",
422 0xffff-SER12_EXTENT, NR_IRQS); 422 0xffff-SER12_EXTENT, nr_irqs);
423 return -ENXIO; 423 return -ENXIO;
424 } 424 }
425 if (bc->baud < 300 || bc->baud > 4800) { 425 if (bc->baud < 300 || bc->baud > 4800) {
diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c
index 45ae9d1191d7..c17e39bc5460 100644
--- a/drivers/net/hamradio/scc.c
+++ b/drivers/net/hamradio/scc.c
@@ -1465,7 +1465,7 @@ static void z8530_init(void)
1465 printk(KERN_INFO "Init Z8530 driver: %u channels, IRQ", Nchips*2); 1465 printk(KERN_INFO "Init Z8530 driver: %u channels, IRQ", Nchips*2);
1466 1466
1467 flag=" "; 1467 flag=" ";
1468 for (k = 0; k < NR_IRQS; k++) 1468 for (k = 0; k < nr_irqs; k++)
1469 if (Ivec[k].used) 1469 if (Ivec[k].used)
1470 { 1470 {
1471 printk("%s%d", flag, k); 1471 printk("%s%d", flag, k);
@@ -1728,7 +1728,7 @@ static int scc_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1728 1728
1729 if (hwcfg.irq == 2) hwcfg.irq = 9; 1729 if (hwcfg.irq == 2) hwcfg.irq = 9;
1730 1730
1731 if (hwcfg.irq < 0 || hwcfg.irq >= NR_IRQS) 1731 if (hwcfg.irq < 0 || hwcfg.irq >= nr_irqs)
1732 return -EINVAL; 1732 return -EINVAL;
1733 1733
1734 if (!Ivec[hwcfg.irq].used && hwcfg.irq) 1734 if (!Ivec[hwcfg.irq].used && hwcfg.irq)
@@ -2148,7 +2148,7 @@ static void __exit scc_cleanup_driver(void)
2148 } 2148 }
2149 2149
2150 /* To unload the port must be closed so no real IRQ pending */ 2150 /* To unload the port must be closed so no real IRQ pending */
2151 for (k=0; k < NR_IRQS ; k++) 2151 for (k = 0; k < nr_irqs ; k++)
2152 if (Ivec[k].used) free_irq(k, NULL); 2152 if (Ivec[k].used) free_irq(k, NULL);
2153 2153
2154 local_irq_enable(); 2154 local_irq_enable();
diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c
index 38b90e7a7ed3..7914867110ed 100644
--- a/drivers/net/usb/pegasus.c
+++ b/drivers/net/usb/pegasus.c
@@ -168,7 +168,7 @@ static int get_registers(pegasus_t * pegasus, __u16 indx, __u16 size,
168 netif_device_detach(pegasus->net); 168 netif_device_detach(pegasus->net);
169 if (netif_msg_drv(pegasus) && printk_ratelimit()) 169 if (netif_msg_drv(pegasus) && printk_ratelimit())
170 dev_err(&pegasus->intf->dev, "%s, status %d\n", 170 dev_err(&pegasus->intf->dev, "%s, status %d\n",
171 __FUNCTION__, ret); 171 __func__, ret);
172 goto out; 172 goto out;
173 } 173 }
174 174
@@ -192,7 +192,7 @@ static int set_registers(pegasus_t * pegasus, __u16 indx, __u16 size,
192 if (!buffer) { 192 if (!buffer) {
193 if (netif_msg_drv(pegasus)) 193 if (netif_msg_drv(pegasus))
194 dev_warn(&pegasus->intf->dev, "out of memory in %s\n", 194 dev_warn(&pegasus->intf->dev, "out of memory in %s\n",
195 __FUNCTION__); 195 __func__);
196 return -ENOMEM; 196 return -ENOMEM;
197 } 197 }
198 memcpy(buffer, data, size); 198 memcpy(buffer, data, size);
diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c
index f972fef87c98..ee51b6a5e605 100644
--- a/drivers/net/wan/sbni.c
+++ b/drivers/net/wan/sbni.c
@@ -318,7 +318,7 @@ sbni_pci_probe( struct net_device *dev )
318 continue; 318 continue;
319 } 319 }
320 320
321 if( pci_irq_line <= 0 || pci_irq_line >= NR_IRQS ) 321 if (pci_irq_line <= 0 || pci_irq_line >= nr_irqs)
322 printk( KERN_WARNING " WARNING: The PCI BIOS assigned " 322 printk( KERN_WARNING " WARNING: The PCI BIOS assigned "
323 "this PCI card to IRQ %d, which is unlikely " 323 "this PCI card to IRQ %d, which is unlikely "
324 "to work!.\n" 324 "to work!.\n"
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index b30e38f3a50d..dcc1e9958d2f 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -66,15 +66,8 @@
66#undef DEBUG_CCIO_RUN_SG 66#undef DEBUG_CCIO_RUN_SG
67 67
68#ifdef CONFIG_PROC_FS 68#ifdef CONFIG_PROC_FS
69/* 69/* depends on proc fs support. But costs CPU performance. */
70 * CCIO_SEARCH_TIME can help measure how fast the bitmap search is. 70#undef CCIO_COLLECT_STATS
71 * impacts performance though - ditch it if you don't use it.
72 */
73#define CCIO_SEARCH_TIME
74#undef CCIO_MAP_STATS
75#else
76#undef CCIO_SEARCH_TIME
77#undef CCIO_MAP_STATS
78#endif 71#endif
79 72
80#include <linux/proc_fs.h> 73#include <linux/proc_fs.h>
@@ -239,12 +232,10 @@ struct ioc {
239 u32 res_size; /* size of resource map in bytes */ 232 u32 res_size; /* size of resource map in bytes */
240 spinlock_t res_lock; 233 spinlock_t res_lock;
241 234
242#ifdef CCIO_SEARCH_TIME 235#ifdef CCIO_COLLECT_STATS
243#define CCIO_SEARCH_SAMPLE 0x100 236#define CCIO_SEARCH_SAMPLE 0x100
244 unsigned long avg_search[CCIO_SEARCH_SAMPLE]; 237 unsigned long avg_search[CCIO_SEARCH_SAMPLE];
245 unsigned long avg_idx; /* current index into avg_search */ 238 unsigned long avg_idx; /* current index into avg_search */
246#endif
247#ifdef CCIO_MAP_STATS
248 unsigned long used_pages; 239 unsigned long used_pages;
249 unsigned long msingle_calls; 240 unsigned long msingle_calls;
250 unsigned long msingle_pages; 241 unsigned long msingle_pages;
@@ -351,7 +342,7 @@ ccio_alloc_range(struct ioc *ioc, struct device *dev, size_t size)
351 unsigned int pages_needed = size >> IOVP_SHIFT; 342 unsigned int pages_needed = size >> IOVP_SHIFT;
352 unsigned int res_idx; 343 unsigned int res_idx;
353 unsigned long boundary_size; 344 unsigned long boundary_size;
354#ifdef CCIO_SEARCH_TIME 345#ifdef CCIO_COLLECT_STATS
355 unsigned long cr_start = mfctl(16); 346 unsigned long cr_start = mfctl(16);
356#endif 347#endif
357 348
@@ -406,7 +397,7 @@ resource_found:
406 DBG_RES("%s() res_idx %d res_hint: %d\n", 397 DBG_RES("%s() res_idx %d res_hint: %d\n",
407 __func__, res_idx, ioc->res_hint); 398 __func__, res_idx, ioc->res_hint);
408 399
409#ifdef CCIO_SEARCH_TIME 400#ifdef CCIO_COLLECT_STATS
410 { 401 {
411 unsigned long cr_end = mfctl(16); 402 unsigned long cr_end = mfctl(16);
412 unsigned long tmp = cr_end - cr_start; 403 unsigned long tmp = cr_end - cr_start;
@@ -416,7 +407,7 @@ resource_found:
416 ioc->avg_search[ioc->avg_idx++] = cr_start; 407 ioc->avg_search[ioc->avg_idx++] = cr_start;
417 ioc->avg_idx &= CCIO_SEARCH_SAMPLE - 1; 408 ioc->avg_idx &= CCIO_SEARCH_SAMPLE - 1;
418#endif 409#endif
419#ifdef CCIO_MAP_STATS 410#ifdef CCIO_COLLECT_STATS
420 ioc->used_pages += pages_needed; 411 ioc->used_pages += pages_needed;
421#endif 412#endif
422 /* 413 /*
@@ -452,7 +443,7 @@ ccio_free_range(struct ioc *ioc, dma_addr_t iova, unsigned long pages_mapped)
452 DBG_RES("%s(): res_idx: %d pages_mapped %d\n", 443 DBG_RES("%s(): res_idx: %d pages_mapped %d\n",
453 __func__, res_idx, pages_mapped); 444 __func__, res_idx, pages_mapped);
454 445
455#ifdef CCIO_MAP_STATS 446#ifdef CCIO_COLLECT_STATS
456 ioc->used_pages -= pages_mapped; 447 ioc->used_pages -= pages_mapped;
457#endif 448#endif
458 449
@@ -764,7 +755,7 @@ ccio_map_single(struct device *dev, void *addr, size_t size,
764 size = ALIGN(size + offset, IOVP_SIZE); 755 size = ALIGN(size + offset, IOVP_SIZE);
765 spin_lock_irqsave(&ioc->res_lock, flags); 756 spin_lock_irqsave(&ioc->res_lock, flags);
766 757
767#ifdef CCIO_MAP_STATS 758#ifdef CCIO_COLLECT_STATS
768 ioc->msingle_calls++; 759 ioc->msingle_calls++;
769 ioc->msingle_pages += size >> IOVP_SHIFT; 760 ioc->msingle_pages += size >> IOVP_SHIFT;
770#endif 761#endif
@@ -828,7 +819,7 @@ ccio_unmap_single(struct device *dev, dma_addr_t iova, size_t size,
828 819
829 spin_lock_irqsave(&ioc->res_lock, flags); 820 spin_lock_irqsave(&ioc->res_lock, flags);
830 821
831#ifdef CCIO_MAP_STATS 822#ifdef CCIO_COLLECT_STATS
832 ioc->usingle_calls++; 823 ioc->usingle_calls++;
833 ioc->usingle_pages += size >> IOVP_SHIFT; 824 ioc->usingle_pages += size >> IOVP_SHIFT;
834#endif 825#endif
@@ -894,7 +885,7 @@ ccio_free_consistent(struct device *dev, size_t size, void *cpu_addr,
894*/ 885*/
895#define PIDE_FLAG 0x80000000UL 886#define PIDE_FLAG 0x80000000UL
896 887
897#ifdef CCIO_MAP_STATS 888#ifdef CCIO_COLLECT_STATS
898#define IOMMU_MAP_STATS 889#define IOMMU_MAP_STATS
899#endif 890#endif
900#include "iommu-helpers.h" 891#include "iommu-helpers.h"
@@ -938,7 +929,7 @@ ccio_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
938 929
939 spin_lock_irqsave(&ioc->res_lock, flags); 930 spin_lock_irqsave(&ioc->res_lock, flags);
940 931
941#ifdef CCIO_MAP_STATS 932#ifdef CCIO_COLLECT_STATS
942 ioc->msg_calls++; 933 ioc->msg_calls++;
943#endif 934#endif
944 935
@@ -997,13 +988,13 @@ ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents,
997 DBG_RUN_SG("%s() START %d entries, %08lx,%x\n", 988 DBG_RUN_SG("%s() START %d entries, %08lx,%x\n",
998 __func__, nents, sg_virt_addr(sglist), sglist->length); 989 __func__, nents, sg_virt_addr(sglist), sglist->length);
999 990
1000#ifdef CCIO_MAP_STATS 991#ifdef CCIO_COLLECT_STATS
1001 ioc->usg_calls++; 992 ioc->usg_calls++;
1002#endif 993#endif
1003 994
1004 while(sg_dma_len(sglist) && nents--) { 995 while(sg_dma_len(sglist) && nents--) {
1005 996
1006#ifdef CCIO_MAP_STATS 997#ifdef CCIO_COLLECT_STATS
1007 ioc->usg_pages += sg_dma_len(sglist) >> PAGE_SHIFT; 998 ioc->usg_pages += sg_dma_len(sglist) >> PAGE_SHIFT;
1008#endif 999#endif
1009 ccio_unmap_single(dev, sg_dma_address(sglist), 1000 ccio_unmap_single(dev, sg_dma_address(sglist),
@@ -1048,7 +1039,7 @@ static int ccio_proc_info(struct seq_file *m, void *p)
1048 len += seq_printf(m, "IO PDIR size : %d bytes (%d entries)\n", 1039 len += seq_printf(m, "IO PDIR size : %d bytes (%d entries)\n",
1049 total_pages * 8, total_pages); 1040 total_pages * 8, total_pages);
1050 1041
1051#ifdef CCIO_MAP_STATS 1042#ifdef CCIO_COLLECT_STATS
1052 len += seq_printf(m, "IO PDIR entries : %ld free %ld used (%d%%)\n", 1043 len += seq_printf(m, "IO PDIR entries : %ld free %ld used (%d%%)\n",
1053 total_pages - ioc->used_pages, ioc->used_pages, 1044 total_pages - ioc->used_pages, ioc->used_pages,
1054 (int)(ioc->used_pages * 100 / total_pages)); 1045 (int)(ioc->used_pages * 100 / total_pages));
@@ -1057,7 +1048,7 @@ static int ccio_proc_info(struct seq_file *m, void *p)
1057 len += seq_printf(m, "Resource bitmap : %d bytes (%d pages)\n", 1048 len += seq_printf(m, "Resource bitmap : %d bytes (%d pages)\n",
1058 ioc->res_size, total_pages); 1049 ioc->res_size, total_pages);
1059 1050
1060#ifdef CCIO_SEARCH_TIME 1051#ifdef CCIO_COLLECT_STATS
1061 min = max = ioc->avg_search[0]; 1052 min = max = ioc->avg_search[0];
1062 for(j = 0; j < CCIO_SEARCH_SAMPLE; ++j) { 1053 for(j = 0; j < CCIO_SEARCH_SAMPLE; ++j) {
1063 avg += ioc->avg_search[j]; 1054 avg += ioc->avg_search[j];
@@ -1070,7 +1061,7 @@ static int ccio_proc_info(struct seq_file *m, void *p)
1070 len += seq_printf(m, " Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles)\n", 1061 len += seq_printf(m, " Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles)\n",
1071 min, avg, max); 1062 min, avg, max);
1072#endif 1063#endif
1073#ifdef CCIO_MAP_STATS 1064#ifdef CCIO_COLLECT_STATS
1074 len += seq_printf(m, "pci_map_single(): %8ld calls %8ld pages (avg %d/1000)\n", 1065 len += seq_printf(m, "pci_map_single(): %8ld calls %8ld pages (avg %d/1000)\n",
1075 ioc->msingle_calls, ioc->msingle_pages, 1066 ioc->msingle_calls, ioc->msingle_pages,
1076 (int)((ioc->msingle_pages * 1000)/ioc->msingle_calls)); 1067 (int)((ioc->msingle_pages * 1000)/ioc->msingle_calls));
@@ -1088,7 +1079,7 @@ static int ccio_proc_info(struct seq_file *m, void *p)
1088 len += seq_printf(m, "pci_unmap_sg() : %8ld calls %8ld pages (avg %d/1000)\n\n\n", 1079 len += seq_printf(m, "pci_unmap_sg() : %8ld calls %8ld pages (avg %d/1000)\n\n\n",
1089 ioc->usg_calls, ioc->usg_pages, 1080 ioc->usg_calls, ioc->usg_pages,
1090 (int)((ioc->usg_pages * 1000)/ioc->usg_calls)); 1081 (int)((ioc->usg_pages * 1000)/ioc->usg_calls));
1091#endif /* CCIO_MAP_STATS */ 1082#endif /* CCIO_COLLECT_STATS */
1092 1083
1093 ioc = ioc->next; 1084 ioc = ioc->next;
1094 } 1085 }
diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c
index fd56128525d1..3bc54b30c3a1 100644
--- a/drivers/parisc/dino.c
+++ b/drivers/parisc/dino.c
@@ -298,7 +298,8 @@ struct pci_port_ops dino_port_ops = {
298 298
299static void dino_disable_irq(unsigned int irq) 299static void dino_disable_irq(unsigned int irq)
300{ 300{
301 struct dino_device *dino_dev = irq_desc[irq].chip_data; 301 struct irq_desc *desc = irq_to_desc(irq);
302 struct dino_device *dino_dev = desc->chip_data;
302 int local_irq = gsc_find_local_irq(irq, dino_dev->global_irq, DINO_LOCAL_IRQS); 303 int local_irq = gsc_find_local_irq(irq, dino_dev->global_irq, DINO_LOCAL_IRQS);
303 304
304 DBG(KERN_WARNING "%s(0x%p, %d)\n", __func__, dino_dev, irq); 305 DBG(KERN_WARNING "%s(0x%p, %d)\n", __func__, dino_dev, irq);
@@ -310,7 +311,8 @@ static void dino_disable_irq(unsigned int irq)
310 311
311static void dino_enable_irq(unsigned int irq) 312static void dino_enable_irq(unsigned int irq)
312{ 313{
313 struct dino_device *dino_dev = irq_desc[irq].chip_data; 314 struct irq_desc *desc = irq_to_desc(irq);
315 struct dino_device *dino_dev = desc->chip_data;
314 int local_irq = gsc_find_local_irq(irq, dino_dev->global_irq, DINO_LOCAL_IRQS); 316 int local_irq = gsc_find_local_irq(irq, dino_dev->global_irq, DINO_LOCAL_IRQS);
315 u32 tmp; 317 u32 tmp;
316 318
diff --git a/drivers/parisc/eisa.c b/drivers/parisc/eisa.c
index 771cef592542..7891db50c483 100644
--- a/drivers/parisc/eisa.c
+++ b/drivers/parisc/eisa.c
@@ -346,10 +346,10 @@ static int __init eisa_probe(struct parisc_device *dev)
346 } 346 }
347 347
348 /* Reserve IRQ2 */ 348 /* Reserve IRQ2 */
349 irq_desc[2].action = &irq2_action; 349 irq_to_desc(2)->action = &irq2_action;
350 350
351 for (i = 0; i < 16; i++) { 351 for (i = 0; i < 16; i++) {
352 irq_desc[i].chip = &eisa_interrupt_type; 352 irq_to_desc(i)->chip = &eisa_interrupt_type;
353 } 353 }
354 354
355 EISA_bus = 1; 355 EISA_bus = 1;
diff --git a/drivers/parisc/gsc.c b/drivers/parisc/gsc.c
index f7d088b897ee..e76db9e4d504 100644
--- a/drivers/parisc/gsc.c
+++ b/drivers/parisc/gsc.c
@@ -108,7 +108,8 @@ int gsc_find_local_irq(unsigned int irq, int *global_irqs, int limit)
108 108
109static void gsc_asic_disable_irq(unsigned int irq) 109static void gsc_asic_disable_irq(unsigned int irq)
110{ 110{
111 struct gsc_asic *irq_dev = irq_desc[irq].chip_data; 111 struct irq_desc *desc = irq_to_desc(irq);
112 struct gsc_asic *irq_dev = desc->chip_data;
112 int local_irq = gsc_find_local_irq(irq, irq_dev->global_irq, 32); 113 int local_irq = gsc_find_local_irq(irq, irq_dev->global_irq, 32);
113 u32 imr; 114 u32 imr;
114 115
@@ -123,7 +124,8 @@ static void gsc_asic_disable_irq(unsigned int irq)
123 124
124static void gsc_asic_enable_irq(unsigned int irq) 125static void gsc_asic_enable_irq(unsigned int irq)
125{ 126{
126 struct gsc_asic *irq_dev = irq_desc[irq].chip_data; 127 struct irq_desc *desc = irq_to_desc(irq);
128 struct gsc_asic *irq_dev = desc->chip_data;
127 int local_irq = gsc_find_local_irq(irq, irq_dev->global_irq, 32); 129 int local_irq = gsc_find_local_irq(irq, irq_dev->global_irq, 32);
128 u32 imr; 130 u32 imr;
129 131
@@ -159,12 +161,14 @@ static struct hw_interrupt_type gsc_asic_interrupt_type = {
159int gsc_assign_irq(struct hw_interrupt_type *type, void *data) 161int gsc_assign_irq(struct hw_interrupt_type *type, void *data)
160{ 162{
161 static int irq = GSC_IRQ_BASE; 163 static int irq = GSC_IRQ_BASE;
164 struct irq_desc *desc;
162 165
163 if (irq > GSC_IRQ_MAX) 166 if (irq > GSC_IRQ_MAX)
164 return NO_IRQ; 167 return NO_IRQ;
165 168
166 irq_desc[irq].chip = type; 169 desc = irq_to_desc(irq);
167 irq_desc[irq].chip_data = data; 170 desc->chip = type;
171 desc->chip_data = data;
168 return irq++; 172 return irq++;
169} 173}
170 174
diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c
index 6fb3f7979f21..7beffcab2745 100644
--- a/drivers/parisc/iosapic.c
+++ b/drivers/parisc/iosapic.c
@@ -619,7 +619,9 @@ iosapic_set_irt_data( struct vector_info *vi, u32 *dp0, u32 *dp1)
619 619
620static struct vector_info *iosapic_get_vector(unsigned int irq) 620static struct vector_info *iosapic_get_vector(unsigned int irq)
621{ 621{
622 return irq_desc[irq].chip_data; 622 struct irq_desc *desc = irq_to_desc(irq);
623
624 return desc->chip_data;
623} 625}
624 626
625static void iosapic_disable_irq(unsigned int irq) 627static void iosapic_disable_irq(unsigned int irq)
diff --git a/drivers/parisc/superio.c b/drivers/parisc/superio.c
index 1e8d2d17f04c..1e93c837514f 100644
--- a/drivers/parisc/superio.c
+++ b/drivers/parisc/superio.c
@@ -363,7 +363,9 @@ int superio_fixup_irq(struct pci_dev *pcidev)
363#endif 363#endif
364 364
365 for (i = 0; i < 16; i++) { 365 for (i = 0; i < 16; i++) {
366 irq_desc[i].chip = &superio_interrupt_type; 366 struct irq_desc *desc = irq_to_desc(i);
367
368 desc->chip = &superio_interrupt_type;
367 } 369 }
368 370
369 /* 371 /*
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 529d9d7727b0..999cc4088b59 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -151,6 +151,13 @@ void pci_bus_add_devices(struct pci_bus *bus)
151 if (retval) 151 if (retval)
152 dev_err(&dev->dev, "Error creating cpuaffinity" 152 dev_err(&dev->dev, "Error creating cpuaffinity"
153 " file, continuing...\n"); 153 " file, continuing...\n");
154
155 retval = device_create_file(&child_bus->dev,
156 &dev_attr_cpulistaffinity);
157 if (retval)
158 dev_err(&dev->dev,
159 "Error creating cpulistaffinity"
160 " file, continuing...\n");
154 } 161 }
155 } 162 }
156} 163}
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 7b3751136e63..691b3adeb870 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -211,7 +211,7 @@ static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
211 include_all = 1; 211 include_all = 1;
212 } 212 }
213 213
214 if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) { 214 if (ret) {
215 list_del(&dmaru->list); 215 list_del(&dmaru->list);
216 kfree(dmaru); 216 kfree(dmaru);
217 } 217 }
@@ -289,6 +289,24 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
289 } 289 }
290} 290}
291 291
292/**
293 * dmar_table_detect - checks to see if the platform supports DMAR devices
294 */
295static int __init dmar_table_detect(void)
296{
297 acpi_status status = AE_OK;
298
299 /* if we could find DMAR table, then there are DMAR devices */
300 status = acpi_get_table(ACPI_SIG_DMAR, 0,
301 (struct acpi_table_header **)&dmar_tbl);
302
303 if (ACPI_SUCCESS(status) && !dmar_tbl) {
304 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
305 status = AE_NOT_FOUND;
306 }
307
308 return (ACPI_SUCCESS(status) ? 1 : 0);
309}
292 310
293/** 311/**
294 * parse_dmar_table - parses the DMA reporting table 312 * parse_dmar_table - parses the DMA reporting table
@@ -300,6 +318,12 @@ parse_dmar_table(void)
300 struct acpi_dmar_header *entry_header; 318 struct acpi_dmar_header *entry_header;
301 int ret = 0; 319 int ret = 0;
302 320
321 /*
322 * Do it again, earlier dmar_tbl mapping could be mapped with
323 * fixed map.
324 */
325 dmar_table_detect();
326
303 dmar = (struct acpi_table_dmar *)dmar_tbl; 327 dmar = (struct acpi_table_dmar *)dmar_tbl;
304 if (!dmar) 328 if (!dmar)
305 return -ENODEV; 329 return -ENODEV;
@@ -373,10 +397,10 @@ dmar_find_matched_drhd_unit(struct pci_dev *dev)
373 397
374int __init dmar_dev_scope_init(void) 398int __init dmar_dev_scope_init(void)
375{ 399{
376 struct dmar_drhd_unit *drhd; 400 struct dmar_drhd_unit *drhd, *drhd_n;
377 int ret = -ENODEV; 401 int ret = -ENODEV;
378 402
379 for_each_drhd_unit(drhd) { 403 list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
380 ret = dmar_parse_dev(drhd); 404 ret = dmar_parse_dev(drhd);
381 if (ret) 405 if (ret)
382 return ret; 406 return ret;
@@ -384,8 +408,8 @@ int __init dmar_dev_scope_init(void)
384 408
385#ifdef CONFIG_DMAR 409#ifdef CONFIG_DMAR
386 { 410 {
387 struct dmar_rmrr_unit *rmrr; 411 struct dmar_rmrr_unit *rmrr, *rmrr_n;
388 for_each_rmrr_units(rmrr) { 412 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
389 ret = rmrr_parse_dev(rmrr); 413 ret = rmrr_parse_dev(rmrr);
390 if (ret) 414 if (ret)
391 return ret; 415 return ret;
@@ -430,30 +454,11 @@ int __init dmar_table_init(void)
430 return 0; 454 return 0;
431} 455}
432 456
433/**
434 * early_dmar_detect - checks to see if the platform supports DMAR devices
435 */
436int __init early_dmar_detect(void)
437{
438 acpi_status status = AE_OK;
439
440 /* if we could find DMAR table, then there are DMAR devices */
441 status = acpi_get_table(ACPI_SIG_DMAR, 0,
442 (struct acpi_table_header **)&dmar_tbl);
443
444 if (ACPI_SUCCESS(status) && !dmar_tbl) {
445 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
446 status = AE_NOT_FOUND;
447 }
448
449 return (ACPI_SUCCESS(status) ? 1 : 0);
450}
451
452void __init detect_intel_iommu(void) 457void __init detect_intel_iommu(void)
453{ 458{
454 int ret; 459 int ret;
455 460
456 ret = early_dmar_detect(); 461 ret = dmar_table_detect();
457 462
458 { 463 {
459#ifdef CONFIG_INTR_REMAP 464#ifdef CONFIG_INTR_REMAP
@@ -470,13 +475,13 @@ void __init detect_intel_iommu(void)
470 "Queued invalidation will be enabled to support " 475 "Queued invalidation will be enabled to support "
471 "x2apic and Intr-remapping.\n"); 476 "x2apic and Intr-remapping.\n");
472#endif 477#endif
473
474#ifdef CONFIG_DMAR 478#ifdef CONFIG_DMAR
475 if (ret && !no_iommu && !iommu_detected && !swiotlb && 479 if (ret && !no_iommu && !iommu_detected && !swiotlb &&
476 !dmar_disabled) 480 !dmar_disabled)
477 iommu_detected = 1; 481 iommu_detected = 1;
478#endif 482#endif
479 } 483 }
484 dmar_tbl = NULL;
480} 485}
481 486
482 487
diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c
index 7d27631e6e62..8cfd1c4926c8 100644
--- a/drivers/pci/hotplug/ibmphp_ebda.c
+++ b/drivers/pci/hotplug/ibmphp_ebda.c
@@ -123,10 +123,8 @@ static struct ebda_pci_rsrc *alloc_ebda_pci_rsrc (void)
123static void __init print_bus_info (void) 123static void __init print_bus_info (void)
124{ 124{
125 struct bus_info *ptr; 125 struct bus_info *ptr;
126 struct list_head *ptr1;
127 126
128 list_for_each (ptr1, &bus_info_head) { 127 list_for_each_entry(ptr, &bus_info_head, bus_info_list) {
129 ptr = list_entry (ptr1, struct bus_info, bus_info_list);
130 debug ("%s - slot_min = %x\n", __func__, ptr->slot_min); 128 debug ("%s - slot_min = %x\n", __func__, ptr->slot_min);
131 debug ("%s - slot_max = %x\n", __func__, ptr->slot_max); 129 debug ("%s - slot_max = %x\n", __func__, ptr->slot_max);
132 debug ("%s - slot_count = %x\n", __func__, ptr->slot_count); 130 debug ("%s - slot_count = %x\n", __func__, ptr->slot_count);
@@ -146,10 +144,8 @@ static void __init print_bus_info (void)
146static void print_lo_info (void) 144static void print_lo_info (void)
147{ 145{
148 struct rio_detail *ptr; 146 struct rio_detail *ptr;
149 struct list_head *ptr1;
150 debug ("print_lo_info ----\n"); 147 debug ("print_lo_info ----\n");
151 list_for_each (ptr1, &rio_lo_head) { 148 list_for_each_entry(ptr, &rio_lo_head, rio_detail_list) {
152 ptr = list_entry (ptr1, struct rio_detail, rio_detail_list);
153 debug ("%s - rio_node_id = %x\n", __func__, ptr->rio_node_id); 149 debug ("%s - rio_node_id = %x\n", __func__, ptr->rio_node_id);
154 debug ("%s - rio_type = %x\n", __func__, ptr->rio_type); 150 debug ("%s - rio_type = %x\n", __func__, ptr->rio_type);
155 debug ("%s - owner_id = %x\n", __func__, ptr->owner_id); 151 debug ("%s - owner_id = %x\n", __func__, ptr->owner_id);
@@ -163,10 +159,8 @@ static void print_lo_info (void)
163static void print_vg_info (void) 159static void print_vg_info (void)
164{ 160{
165 struct rio_detail *ptr; 161 struct rio_detail *ptr;
166 struct list_head *ptr1;
167 debug ("%s ---\n", __func__); 162 debug ("%s ---\n", __func__);
168 list_for_each (ptr1, &rio_vg_head) { 163 list_for_each_entry(ptr, &rio_vg_head, rio_detail_list) {
169 ptr = list_entry (ptr1, struct rio_detail, rio_detail_list);
170 debug ("%s - rio_node_id = %x\n", __func__, ptr->rio_node_id); 164 debug ("%s - rio_node_id = %x\n", __func__, ptr->rio_node_id);
171 debug ("%s - rio_type = %x\n", __func__, ptr->rio_type); 165 debug ("%s - rio_type = %x\n", __func__, ptr->rio_type);
172 debug ("%s - owner_id = %x\n", __func__, ptr->owner_id); 166 debug ("%s - owner_id = %x\n", __func__, ptr->owner_id);
@@ -180,10 +174,8 @@ static void print_vg_info (void)
180static void __init print_ebda_pci_rsrc (void) 174static void __init print_ebda_pci_rsrc (void)
181{ 175{
182 struct ebda_pci_rsrc *ptr; 176 struct ebda_pci_rsrc *ptr;
183 struct list_head *ptr1;
184 177
185 list_for_each (ptr1, &ibmphp_ebda_pci_rsrc_head) { 178 list_for_each_entry(ptr, &ibmphp_ebda_pci_rsrc_head, ebda_pci_rsrc_list) {
186 ptr = list_entry (ptr1, struct ebda_pci_rsrc, ebda_pci_rsrc_list);
187 debug ("%s - rsrc type: %x bus#: %x dev_func: %x start addr: %x end addr: %x\n", 179 debug ("%s - rsrc type: %x bus#: %x dev_func: %x start addr: %x end addr: %x\n",
188 __func__, ptr->rsrc_type ,ptr->bus_num, ptr->dev_fun,ptr->start_addr, ptr->end_addr); 180 __func__, ptr->rsrc_type ,ptr->bus_num, ptr->dev_fun,ptr->start_addr, ptr->end_addr);
189 } 181 }
@@ -192,10 +184,8 @@ static void __init print_ebda_pci_rsrc (void)
192static void __init print_ibm_slot (void) 184static void __init print_ibm_slot (void)
193{ 185{
194 struct slot *ptr; 186 struct slot *ptr;
195 struct list_head *ptr1;
196 187
197 list_for_each (ptr1, &ibmphp_slot_head) { 188 list_for_each_entry(ptr, &ibmphp_slot_head, ibm_slot_list) {
198 ptr = list_entry (ptr1, struct slot, ibm_slot_list);
199 debug ("%s - slot_number: %x\n", __func__, ptr->number); 189 debug ("%s - slot_number: %x\n", __func__, ptr->number);
200 } 190 }
201} 191}
@@ -203,10 +193,8 @@ static void __init print_ibm_slot (void)
203static void __init print_opt_vg (void) 193static void __init print_opt_vg (void)
204{ 194{
205 struct opt_rio *ptr; 195 struct opt_rio *ptr;
206 struct list_head *ptr1;
207 debug ("%s ---\n", __func__); 196 debug ("%s ---\n", __func__);
208 list_for_each (ptr1, &opt_vg_head) { 197 list_for_each_entry(ptr, &opt_vg_head, opt_rio_list) {
209 ptr = list_entry (ptr1, struct opt_rio, opt_rio_list);
210 debug ("%s - rio_type %x\n", __func__, ptr->rio_type); 198 debug ("%s - rio_type %x\n", __func__, ptr->rio_type);
211 debug ("%s - chassis_num: %x\n", __func__, ptr->chassis_num); 199 debug ("%s - chassis_num: %x\n", __func__, ptr->chassis_num);
212 debug ("%s - first_slot_num: %x\n", __func__, ptr->first_slot_num); 200 debug ("%s - first_slot_num: %x\n", __func__, ptr->first_slot_num);
@@ -217,13 +205,9 @@ static void __init print_opt_vg (void)
217static void __init print_ebda_hpc (void) 205static void __init print_ebda_hpc (void)
218{ 206{
219 struct controller *hpc_ptr; 207 struct controller *hpc_ptr;
220 struct list_head *ptr1;
221 u16 index; 208 u16 index;
222 209
223 list_for_each (ptr1, &ebda_hpc_head) { 210 list_for_each_entry(hpc_ptr, &ebda_hpc_head, ebda_hpc_list) {
224
225 hpc_ptr = list_entry (ptr1, struct controller, ebda_hpc_list);
226
227 for (index = 0; index < hpc_ptr->slot_count; index++) { 211 for (index = 0; index < hpc_ptr->slot_count; index++) {
228 debug ("%s - physical slot#: %x\n", __func__, hpc_ptr->slots[index].slot_num); 212 debug ("%s - physical slot#: %x\n", __func__, hpc_ptr->slots[index].slot_num);
229 debug ("%s - pci bus# of the slot: %x\n", __func__, hpc_ptr->slots[index].slot_bus_num); 213 debug ("%s - pci bus# of the slot: %x\n", __func__, hpc_ptr->slots[index].slot_bus_num);
@@ -460,9 +444,7 @@ static int __init ebda_rio_table (void)
460static struct opt_rio *search_opt_vg (u8 chassis_num) 444static struct opt_rio *search_opt_vg (u8 chassis_num)
461{ 445{
462 struct opt_rio *ptr; 446 struct opt_rio *ptr;
463 struct list_head *ptr1; 447 list_for_each_entry(ptr, &opt_vg_head, opt_rio_list) {
464 list_for_each (ptr1, &opt_vg_head) {
465 ptr = list_entry (ptr1, struct opt_rio, opt_rio_list);
466 if (ptr->chassis_num == chassis_num) 448 if (ptr->chassis_num == chassis_num)
467 return ptr; 449 return ptr;
468 } 450 }
@@ -473,10 +455,8 @@ static int __init combine_wpg_for_chassis (void)
473{ 455{
474 struct opt_rio *opt_rio_ptr = NULL; 456 struct opt_rio *opt_rio_ptr = NULL;
475 struct rio_detail *rio_detail_ptr = NULL; 457 struct rio_detail *rio_detail_ptr = NULL;
476 struct list_head *list_head_ptr = NULL;
477 458
478 list_for_each (list_head_ptr, &rio_vg_head) { 459 list_for_each_entry(rio_detail_ptr, &rio_vg_head, rio_detail_list) {
479 rio_detail_ptr = list_entry (list_head_ptr, struct rio_detail, rio_detail_list);
480 opt_rio_ptr = search_opt_vg (rio_detail_ptr->chassis_num); 460 opt_rio_ptr = search_opt_vg (rio_detail_ptr->chassis_num);
481 if (!opt_rio_ptr) { 461 if (!opt_rio_ptr) {
482 opt_rio_ptr = kzalloc(sizeof(struct opt_rio), GFP_KERNEL); 462 opt_rio_ptr = kzalloc(sizeof(struct opt_rio), GFP_KERNEL);
@@ -497,14 +477,12 @@ static int __init combine_wpg_for_chassis (void)
497} 477}
498 478
499/* 479/*
500 * reorgnizing linked list of expansion box 480 * reorganizing linked list of expansion box
501 */ 481 */
502static struct opt_rio_lo *search_opt_lo (u8 chassis_num) 482static struct opt_rio_lo *search_opt_lo (u8 chassis_num)
503{ 483{
504 struct opt_rio_lo *ptr; 484 struct opt_rio_lo *ptr;
505 struct list_head *ptr1; 485 list_for_each_entry(ptr, &opt_lo_head, opt_rio_lo_list) {
506 list_for_each (ptr1, &opt_lo_head) {
507 ptr = list_entry (ptr1, struct opt_rio_lo, opt_rio_lo_list);
508 if (ptr->chassis_num == chassis_num) 486 if (ptr->chassis_num == chassis_num)
509 return ptr; 487 return ptr;
510 } 488 }
@@ -515,10 +493,8 @@ static int combine_wpg_for_expansion (void)
515{ 493{
516 struct opt_rio_lo *opt_rio_lo_ptr = NULL; 494 struct opt_rio_lo *opt_rio_lo_ptr = NULL;
517 struct rio_detail *rio_detail_ptr = NULL; 495 struct rio_detail *rio_detail_ptr = NULL;
518 struct list_head *list_head_ptr = NULL;
519 496
520 list_for_each (list_head_ptr, &rio_lo_head) { 497 list_for_each_entry(rio_detail_ptr, &rio_lo_head, rio_detail_list) {
521 rio_detail_ptr = list_entry (list_head_ptr, struct rio_detail, rio_detail_list);
522 opt_rio_lo_ptr = search_opt_lo (rio_detail_ptr->chassis_num); 498 opt_rio_lo_ptr = search_opt_lo (rio_detail_ptr->chassis_num);
523 if (!opt_rio_lo_ptr) { 499 if (!opt_rio_lo_ptr) {
524 opt_rio_lo_ptr = kzalloc(sizeof(struct opt_rio_lo), GFP_KERNEL); 500 opt_rio_lo_ptr = kzalloc(sizeof(struct opt_rio_lo), GFP_KERNEL);
@@ -550,20 +526,17 @@ static int first_slot_num (u8 slot_num, u8 first_slot, u8 var)
550{ 526{
551 struct opt_rio *opt_vg_ptr = NULL; 527 struct opt_rio *opt_vg_ptr = NULL;
552 struct opt_rio_lo *opt_lo_ptr = NULL; 528 struct opt_rio_lo *opt_lo_ptr = NULL;
553 struct list_head *ptr = NULL;
554 int rc = 0; 529 int rc = 0;
555 530
556 if (!var) { 531 if (!var) {
557 list_for_each (ptr, &opt_vg_head) { 532 list_for_each_entry(opt_vg_ptr, &opt_vg_head, opt_rio_list) {
558 opt_vg_ptr = list_entry (ptr, struct opt_rio, opt_rio_list);
559 if ((first_slot < opt_vg_ptr->first_slot_num) && (slot_num >= opt_vg_ptr->first_slot_num)) { 533 if ((first_slot < opt_vg_ptr->first_slot_num) && (slot_num >= opt_vg_ptr->first_slot_num)) {
560 rc = -ENODEV; 534 rc = -ENODEV;
561 break; 535 break;
562 } 536 }
563 } 537 }
564 } else { 538 } else {
565 list_for_each (ptr, &opt_lo_head) { 539 list_for_each_entry(opt_lo_ptr, &opt_lo_head, opt_rio_lo_list) {
566 opt_lo_ptr = list_entry (ptr, struct opt_rio_lo, opt_rio_lo_list);
567 if ((first_slot < opt_lo_ptr->first_slot_num) && (slot_num >= opt_lo_ptr->first_slot_num)) { 540 if ((first_slot < opt_lo_ptr->first_slot_num) && (slot_num >= opt_lo_ptr->first_slot_num)) {
568 rc = -ENODEV; 541 rc = -ENODEV;
569 break; 542 break;
@@ -576,10 +549,8 @@ static int first_slot_num (u8 slot_num, u8 first_slot, u8 var)
576static struct opt_rio_lo * find_rxe_num (u8 slot_num) 549static struct opt_rio_lo * find_rxe_num (u8 slot_num)
577{ 550{
578 struct opt_rio_lo *opt_lo_ptr; 551 struct opt_rio_lo *opt_lo_ptr;
579 struct list_head *ptr;
580 552
581 list_for_each (ptr, &opt_lo_head) { 553 list_for_each_entry(opt_lo_ptr, &opt_lo_head, opt_rio_lo_list) {
582 opt_lo_ptr = list_entry (ptr, struct opt_rio_lo, opt_rio_lo_list);
583 //check to see if this slot_num belongs to expansion box 554 //check to see if this slot_num belongs to expansion box
584 if ((slot_num >= opt_lo_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_lo_ptr->first_slot_num, 1))) 555 if ((slot_num >= opt_lo_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_lo_ptr->first_slot_num, 1)))
585 return opt_lo_ptr; 556 return opt_lo_ptr;
@@ -590,10 +561,8 @@ static struct opt_rio_lo * find_rxe_num (u8 slot_num)
590static struct opt_rio * find_chassis_num (u8 slot_num) 561static struct opt_rio * find_chassis_num (u8 slot_num)
591{ 562{
592 struct opt_rio *opt_vg_ptr; 563 struct opt_rio *opt_vg_ptr;
593 struct list_head *ptr;
594 564
595 list_for_each (ptr, &opt_vg_head) { 565 list_for_each_entry(opt_vg_ptr, &opt_vg_head, opt_rio_list) {
596 opt_vg_ptr = list_entry (ptr, struct opt_rio, opt_rio_list);
597 //check to see if this slot_num belongs to chassis 566 //check to see if this slot_num belongs to chassis
598 if ((slot_num >= opt_vg_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_vg_ptr->first_slot_num, 0))) 567 if ((slot_num >= opt_vg_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_vg_ptr->first_slot_num, 0)))
599 return opt_vg_ptr; 568 return opt_vg_ptr;
@@ -607,11 +576,9 @@ static struct opt_rio * find_chassis_num (u8 slot_num)
607static u8 calculate_first_slot (u8 slot_num) 576static u8 calculate_first_slot (u8 slot_num)
608{ 577{
609 u8 first_slot = 1; 578 u8 first_slot = 1;
610 struct list_head * list;
611 struct slot * slot_cur; 579 struct slot * slot_cur;
612 580
613 list_for_each (list, &ibmphp_slot_head) { 581 list_for_each_entry(slot_cur, &ibmphp_slot_head, ibm_slot_list) {
614 slot_cur = list_entry (list, struct slot, ibm_slot_list);
615 if (slot_cur->ctrl) { 582 if (slot_cur->ctrl) {
616 if ((slot_cur->ctrl->ctlr_type != 4) && (slot_cur->ctrl->ending_slot_num > first_slot) && (slot_num > slot_cur->ctrl->ending_slot_num)) 583 if ((slot_cur->ctrl->ctlr_type != 4) && (slot_cur->ctrl->ending_slot_num > first_slot) && (slot_num > slot_cur->ctrl->ending_slot_num))
617 first_slot = slot_cur->ctrl->ending_slot_num; 584 first_slot = slot_cur->ctrl->ending_slot_num;
@@ -767,7 +734,6 @@ static int __init ebda_rsrc_controller (void)
767 struct bus_info *bus_info_ptr1, *bus_info_ptr2; 734 struct bus_info *bus_info_ptr1, *bus_info_ptr2;
768 int rc; 735 int rc;
769 struct slot *tmp_slot; 736 struct slot *tmp_slot;
770 struct list_head *list;
771 737
772 addr = hpc_list_ptr->phys_addr; 738 addr = hpc_list_ptr->phys_addr;
773 for (ctlr = 0; ctlr < hpc_list_ptr->num_ctlrs; ctlr++) { 739 for (ctlr = 0; ctlr < hpc_list_ptr->num_ctlrs; ctlr++) {
@@ -997,9 +963,7 @@ static int __init ebda_rsrc_controller (void)
997 963
998 } /* each hpc */ 964 } /* each hpc */
999 965
1000 list_for_each (list, &ibmphp_slot_head) { 966 list_for_each_entry(tmp_slot, &ibmphp_slot_head, ibm_slot_list) {
1001 tmp_slot = list_entry (list, struct slot, ibm_slot_list);
1002
1003 snprintf (tmp_slot->hotplug_slot->name, 30, "%s", create_file_name (tmp_slot)); 967 snprintf (tmp_slot->hotplug_slot->name, 30, "%s", create_file_name (tmp_slot));
1004 pci_hp_register(tmp_slot->hotplug_slot, 968 pci_hp_register(tmp_slot->hotplug_slot,
1005 pci_find_bus(0, tmp_slot->bus), tmp_slot->device); 969 pci_find_bus(0, tmp_slot->bus), tmp_slot->device);
@@ -1101,10 +1065,8 @@ u16 ibmphp_get_total_controllers (void)
1101struct slot *ibmphp_get_slot_from_physical_num (u8 physical_num) 1065struct slot *ibmphp_get_slot_from_physical_num (u8 physical_num)
1102{ 1066{
1103 struct slot *slot; 1067 struct slot *slot;
1104 struct list_head *list;
1105 1068
1106 list_for_each (list, &ibmphp_slot_head) { 1069 list_for_each_entry(slot, &ibmphp_slot_head, ibm_slot_list) {
1107 slot = list_entry (list, struct slot, ibm_slot_list);
1108 if (slot->number == physical_num) 1070 if (slot->number == physical_num)
1109 return slot; 1071 return slot;
1110 } 1072 }
@@ -1120,10 +1082,8 @@ struct slot *ibmphp_get_slot_from_physical_num (u8 physical_num)
1120struct bus_info *ibmphp_find_same_bus_num (u32 num) 1082struct bus_info *ibmphp_find_same_bus_num (u32 num)
1121{ 1083{
1122 struct bus_info *ptr; 1084 struct bus_info *ptr;
1123 struct list_head *ptr1;
1124 1085
1125 list_for_each (ptr1, &bus_info_head) { 1086 list_for_each_entry(ptr, &bus_info_head, bus_info_list) {
1126 ptr = list_entry (ptr1, struct bus_info, bus_info_list);
1127 if (ptr->busno == num) 1087 if (ptr->busno == num)
1128 return ptr; 1088 return ptr;
1129 } 1089 }
@@ -1136,10 +1096,8 @@ struct bus_info *ibmphp_find_same_bus_num (u32 num)
1136int ibmphp_get_bus_index (u8 num) 1096int ibmphp_get_bus_index (u8 num)
1137{ 1097{
1138 struct bus_info *ptr; 1098 struct bus_info *ptr;
1139 struct list_head *ptr1;
1140 1099
1141 list_for_each (ptr1, &bus_info_head) { 1100 list_for_each_entry(ptr, &bus_info_head, bus_info_list) {
1142 ptr = list_entry (ptr1, struct bus_info, bus_info_list);
1143 if (ptr->busno == num) 1101 if (ptr->busno == num)
1144 return ptr->index; 1102 return ptr->index;
1145 } 1103 }
@@ -1212,11 +1170,9 @@ static struct pci_driver ibmphp_driver = {
1212int ibmphp_register_pci (void) 1170int ibmphp_register_pci (void)
1213{ 1171{
1214 struct controller *ctrl; 1172 struct controller *ctrl;
1215 struct list_head *tmp;
1216 int rc = 0; 1173 int rc = 0;
1217 1174
1218 list_for_each (tmp, &ebda_hpc_head) { 1175 list_for_each_entry(ctrl, &ebda_hpc_head, ebda_hpc_list) {
1219 ctrl = list_entry (tmp, struct controller, ebda_hpc_list);
1220 if (ctrl->ctlr_type == 1) { 1176 if (ctrl->ctlr_type == 1) {
1221 rc = pci_register_driver(&ibmphp_driver); 1177 rc = pci_register_driver(&ibmphp_driver);
1222 break; 1178 break;
@@ -1227,12 +1183,10 @@ int ibmphp_register_pci (void)
1227static int ibmphp_probe (struct pci_dev * dev, const struct pci_device_id *ids) 1183static int ibmphp_probe (struct pci_dev * dev, const struct pci_device_id *ids)
1228{ 1184{
1229 struct controller *ctrl; 1185 struct controller *ctrl;
1230 struct list_head *tmp;
1231 1186
1232 debug ("inside ibmphp_probe\n"); 1187 debug ("inside ibmphp_probe\n");
1233 1188
1234 list_for_each (tmp, &ebda_hpc_head) { 1189 list_for_each_entry(ctrl, &ebda_hpc_head, ebda_hpc_list) {
1235 ctrl = list_entry (tmp, struct controller, ebda_hpc_list);
1236 if (ctrl->ctlr_type == 1) { 1190 if (ctrl->ctlr_type == 1) {
1237 if ((dev->devfn == ctrl->u.pci_ctlr.dev_fun) && (dev->bus->number == ctrl->u.pci_ctlr.bus)) { 1191 if ((dev->devfn == ctrl->u.pci_ctlr.dev_fun) && (dev->bus->number == ctrl->u.pci_ctlr.bus)) {
1238 ctrl->ctrl_dev = dev; 1192 ctrl->ctrl_dev = dev;
diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c
index 5f85b1b120e3..2e6c4474644e 100644
--- a/drivers/pci/hotplug/pci_hotplug_core.c
+++ b/drivers/pci/hotplug/pci_hotplug_core.c
@@ -102,13 +102,13 @@ static int get_##name (struct hotplug_slot *slot, type *value) \
102{ \ 102{ \
103 struct hotplug_slot_ops *ops = slot->ops; \ 103 struct hotplug_slot_ops *ops = slot->ops; \
104 int retval = 0; \ 104 int retval = 0; \
105 if (try_module_get(ops->owner)) { \ 105 if (!try_module_get(ops->owner)) \
106 if (ops->get_##name) \ 106 return -ENODEV; \
107 retval = ops->get_##name(slot, value); \ 107 if (ops->get_##name) \
108 else \ 108 retval = ops->get_##name(slot, value); \
109 *value = slot->info->name; \ 109 else \
110 module_put(ops->owner); \ 110 *value = slot->info->name; \
111 } \ 111 module_put(ops->owner); \
112 return retval; \ 112 return retval; \
113} 113}
114 114
diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 9e6cec67e1cc..c367978bd7fe 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -57,6 +57,19 @@ extern struct workqueue_struct *pciehp_wq;
57#define warn(format, arg...) \ 57#define warn(format, arg...) \
58 printk(KERN_WARNING "%s: " format, MY_NAME , ## arg) 58 printk(KERN_WARNING "%s: " format, MY_NAME , ## arg)
59 59
60#define ctrl_dbg(ctrl, format, arg...) \
61 do { \
62 if (pciehp_debug) \
63 dev_printk(, &ctrl->pcie->device, \
64 format, ## arg); \
65 } while (0)
66#define ctrl_err(ctrl, format, arg...) \
67 dev_err(&ctrl->pcie->device, format, ## arg)
68#define ctrl_info(ctrl, format, arg...) \
69 dev_info(&ctrl->pcie->device, format, ## arg)
70#define ctrl_warn(ctrl, format, arg...) \
71 dev_warn(&ctrl->pcie->device, format, ## arg)
72
60#define SLOT_NAME_SIZE 10 73#define SLOT_NAME_SIZE 10
61struct slot { 74struct slot {
62 u8 bus; 75 u8 bus;
@@ -87,6 +100,7 @@ struct controller {
87 int num_slots; /* Number of slots on ctlr */ 100 int num_slots; /* Number of slots on ctlr */
88 int slot_num_inc; /* 1 or -1 */ 101 int slot_num_inc; /* 1 or -1 */
89 struct pci_dev *pci_dev; 102 struct pci_dev *pci_dev;
103 struct pcie_device *pcie; /* PCI Express port service */
90 struct list_head slot_list; 104 struct list_head slot_list;
91 struct hpc_ops *hpc_ops; 105 struct hpc_ops *hpc_ops;
92 wait_queue_head_t queue; /* sleep & wake process */ 106 wait_queue_head_t queue; /* sleep & wake process */
@@ -170,7 +184,7 @@ static inline struct slot *pciehp_find_slot(struct controller *ctrl, u8 device)
170 return slot; 184 return slot;
171 } 185 }
172 186
173 err("%s: slot (device=0x%x) not found\n", __func__, device); 187 ctrl_err(ctrl, "%s: slot (device=0x%x) not found\n", __func__, device);
174 return NULL; 188 return NULL;
175} 189}
176 190
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index 4fd5355bc3b5..c748a19db89d 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -144,9 +144,10 @@ set_lock_exit:
144 * sysfs interface which allows the user to toggle the Electro Mechanical 144 * sysfs interface which allows the user to toggle the Electro Mechanical
145 * Interlock. Valid values are either 0 or 1. 0 == unlock, 1 == lock 145 * Interlock. Valid values are either 0 or 1. 0 == unlock, 1 == lock
146 */ 146 */
147static ssize_t lock_write_file(struct hotplug_slot *slot, const char *buf, 147static ssize_t lock_write_file(struct hotplug_slot *hotplug_slot,
148 size_t count) 148 const char *buf, size_t count)
149{ 149{
150 struct slot *slot = hotplug_slot->private;
150 unsigned long llock; 151 unsigned long llock;
151 u8 lock; 152 u8 lock;
152 int retval = 0; 153 int retval = 0;
@@ -157,10 +158,11 @@ static ssize_t lock_write_file(struct hotplug_slot *slot, const char *buf,
157 switch (lock) { 158 switch (lock) {
158 case 0: 159 case 0:
159 case 1: 160 case 1:
160 retval = set_lock_status(slot, lock); 161 retval = set_lock_status(hotplug_slot, lock);
161 break; 162 break;
162 default: 163 default:
163 err ("%d is an invalid lock value\n", lock); 164 ctrl_err(slot->ctrl, "%d is an invalid lock value\n",
165 lock);
164 retval = -EINVAL; 166 retval = -EINVAL;
165 } 167 }
166 if (retval) 168 if (retval)
@@ -180,7 +182,10 @@ static struct hotplug_slot_attribute hotplug_slot_attr_lock = {
180 */ 182 */
181static void release_slot(struct hotplug_slot *hotplug_slot) 183static void release_slot(struct hotplug_slot *hotplug_slot)
182{ 184{
183 dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name); 185 struct slot *slot = hotplug_slot->private;
186
187 ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
188 __func__, hotplug_slot->name);
184 189
185 kfree(hotplug_slot->info); 190 kfree(hotplug_slot->info);
186 kfree(hotplug_slot); 191 kfree(hotplug_slot);
@@ -215,9 +220,9 @@ static int init_slots(struct controller *ctrl)
215 get_adapter_status(hotplug_slot, &info->adapter_status); 220 get_adapter_status(hotplug_slot, &info->adapter_status);
216 slot->hotplug_slot = hotplug_slot; 221 slot->hotplug_slot = hotplug_slot;
217 222
218 dbg("Registering bus=%x dev=%x hp_slot=%x sun=%x " 223 ctrl_dbg(ctrl, "Registering bus=%x dev=%x hp_slot=%x sun=%x "
219 "slot_device_offset=%x\n", slot->bus, slot->device, 224 "slot_device_offset=%x\n", slot->bus, slot->device,
220 slot->hp_slot, slot->number, ctrl->slot_device_offset); 225 slot->hp_slot, slot->number, ctrl->slot_device_offset);
221duplicate_name: 226duplicate_name:
222 retval = pci_hp_register(hotplug_slot, 227 retval = pci_hp_register(hotplug_slot,
223 ctrl->pci_dev->subordinate, 228 ctrl->pci_dev->subordinate,
@@ -233,9 +238,11 @@ duplicate_name:
233 if (len < SLOT_NAME_SIZE) 238 if (len < SLOT_NAME_SIZE)
234 goto duplicate_name; 239 goto duplicate_name;
235 else 240 else
236 err("duplicate slot name overflow\n"); 241 ctrl_err(ctrl, "duplicate slot name "
242 "overflow\n");
237 } 243 }
238 err("pci_hp_register failed with error %d\n", retval); 244 ctrl_err(ctrl, "pci_hp_register failed with error %d\n",
245 retval);
239 goto error_info; 246 goto error_info;
240 } 247 }
241 /* create additional sysfs entries */ 248 /* create additional sysfs entries */
@@ -244,7 +251,8 @@ duplicate_name:
244 &hotplug_slot_attr_lock.attr); 251 &hotplug_slot_attr_lock.attr);
245 if (retval) { 252 if (retval) {
246 pci_hp_deregister(hotplug_slot); 253 pci_hp_deregister(hotplug_slot);
247 err("cannot create additional sysfs entries\n"); 254 ctrl_err(ctrl, "cannot create additional sysfs "
255 "entries\n");
248 goto error_info; 256 goto error_info;
249 } 257 }
250 } 258 }
@@ -278,7 +286,8 @@ static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
278{ 286{
279 struct slot *slot = hotplug_slot->private; 287 struct slot *slot = hotplug_slot->private;
280 288
281 dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name); 289 ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
290 __func__, hotplug_slot->name);
282 291
283 hotplug_slot->info->attention_status = status; 292 hotplug_slot->info->attention_status = status;
284 293
@@ -293,7 +302,8 @@ static int enable_slot(struct hotplug_slot *hotplug_slot)
293{ 302{
294 struct slot *slot = hotplug_slot->private; 303 struct slot *slot = hotplug_slot->private;
295 304
296 dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name); 305 ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
306 __func__, hotplug_slot->name);
297 307
298 return pciehp_sysfs_enable_slot(slot); 308 return pciehp_sysfs_enable_slot(slot);
299} 309}
@@ -303,7 +313,8 @@ static int disable_slot(struct hotplug_slot *hotplug_slot)
303{ 313{
304 struct slot *slot = hotplug_slot->private; 314 struct slot *slot = hotplug_slot->private;
305 315
306 dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name); 316 ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
317 __func__, hotplug_slot->name);
307 318
308 return pciehp_sysfs_disable_slot(slot); 319 return pciehp_sysfs_disable_slot(slot);
309} 320}
@@ -313,7 +324,8 @@ static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
313 struct slot *slot = hotplug_slot->private; 324 struct slot *slot = hotplug_slot->private;
314 int retval; 325 int retval;
315 326
316 dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name); 327 ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
328 __func__, hotplug_slot->name);
317 329
318 retval = slot->hpc_ops->get_power_status(slot, value); 330 retval = slot->hpc_ops->get_power_status(slot, value);
319 if (retval < 0) 331 if (retval < 0)
@@ -327,7 +339,8 @@ static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value)
327 struct slot *slot = hotplug_slot->private; 339 struct slot *slot = hotplug_slot->private;
328 int retval; 340 int retval;
329 341
330 dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name); 342 ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
343 __func__, hotplug_slot->name);
331 344
332 retval = slot->hpc_ops->get_attention_status(slot, value); 345 retval = slot->hpc_ops->get_attention_status(slot, value);
333 if (retval < 0) 346 if (retval < 0)
@@ -341,7 +354,8 @@ static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
341 struct slot *slot = hotplug_slot->private; 354 struct slot *slot = hotplug_slot->private;
342 int retval; 355 int retval;
343 356
344 dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name); 357 ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
358 __func__, hotplug_slot->name);
345 359
346 retval = slot->hpc_ops->get_latch_status(slot, value); 360 retval = slot->hpc_ops->get_latch_status(slot, value);
347 if (retval < 0) 361 if (retval < 0)
@@ -355,7 +369,8 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
355 struct slot *slot = hotplug_slot->private; 369 struct slot *slot = hotplug_slot->private;
356 int retval; 370 int retval;
357 371
358 dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name); 372 ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
373 __func__, hotplug_slot->name);
359 374
360 retval = slot->hpc_ops->get_adapter_status(slot, value); 375 retval = slot->hpc_ops->get_adapter_status(slot, value);
361 if (retval < 0) 376 if (retval < 0)
@@ -370,7 +385,8 @@ static int get_max_bus_speed(struct hotplug_slot *hotplug_slot,
370 struct slot *slot = hotplug_slot->private; 385 struct slot *slot = hotplug_slot->private;
371 int retval; 386 int retval;
372 387
373 dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name); 388 ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
389 __func__, hotplug_slot->name);
374 390
375 retval = slot->hpc_ops->get_max_bus_speed(slot, value); 391 retval = slot->hpc_ops->get_max_bus_speed(slot, value);
376 if (retval < 0) 392 if (retval < 0)
@@ -384,7 +400,8 @@ static int get_cur_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_spe
384 struct slot *slot = hotplug_slot->private; 400 struct slot *slot = hotplug_slot->private;
385 int retval; 401 int retval;
386 402
387 dbg("%s - physical_slot = %s\n", __func__, hotplug_slot->name); 403 ctrl_dbg(slot->ctrl, "%s - physical_slot = %s\n",
404 __func__, hotplug_slot->name);
388 405
389 retval = slot->hpc_ops->get_cur_bus_speed(slot, value); 406 retval = slot->hpc_ops->get_cur_bus_speed(slot, value);
390 if (retval < 0) 407 if (retval < 0)
@@ -402,14 +419,15 @@ static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_
402 struct pci_dev *pdev = dev->port; 419 struct pci_dev *pdev = dev->port;
403 420
404 if (pciehp_force) 421 if (pciehp_force)
405 dbg("Bypassing BIOS check for pciehp use on %s\n", 422 dev_info(&dev->device,
406 pci_name(pdev)); 423 "Bypassing BIOS check for pciehp use on %s\n",
424 pci_name(pdev));
407 else if (pciehp_get_hp_hw_control_from_firmware(pdev)) 425 else if (pciehp_get_hp_hw_control_from_firmware(pdev))
408 goto err_out_none; 426 goto err_out_none;
409 427
410 ctrl = pcie_init(dev); 428 ctrl = pcie_init(dev);
411 if (!ctrl) { 429 if (!ctrl) {
412 dbg("%s: controller initialization failed\n", PCIE_MODULE_NAME); 430 dev_err(&dev->device, "controller initialization failed\n");
413 goto err_out_none; 431 goto err_out_none;
414 } 432 }
415 set_service_data(dev, ctrl); 433 set_service_data(dev, ctrl);
@@ -418,11 +436,10 @@ static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_
418 rc = init_slots(ctrl); 436 rc = init_slots(ctrl);
419 if (rc) { 437 if (rc) {
420 if (rc == -EBUSY) 438 if (rc == -EBUSY)
421 warn("%s: slot already registered by another " 439 ctrl_warn(ctrl, "slot already registered by another "
422 "hotplug driver\n", PCIE_MODULE_NAME); 440 "hotplug driver\n");
423 else 441 else
424 err("%s: slot initialization failed\n", 442 ctrl_err(ctrl, "slot initialization failed\n");
425 PCIE_MODULE_NAME);
426 goto err_out_release_ctlr; 443 goto err_out_release_ctlr;
427 } 444 }
428 445
@@ -461,13 +478,13 @@ static void pciehp_remove (struct pcie_device *dev)
461#ifdef CONFIG_PM 478#ifdef CONFIG_PM
462static int pciehp_suspend (struct pcie_device *dev, pm_message_t state) 479static int pciehp_suspend (struct pcie_device *dev, pm_message_t state)
463{ 480{
464 printk("%s ENTRY\n", __func__); 481 dev_info(&dev->device, "%s ENTRY\n", __func__);
465 return 0; 482 return 0;
466} 483}
467 484
468static int pciehp_resume (struct pcie_device *dev) 485static int pciehp_resume (struct pcie_device *dev)
469{ 486{
470 printk("%s ENTRY\n", __func__); 487 dev_info(&dev->device, "%s ENTRY\n", __func__);
471 if (pciehp_force) { 488 if (pciehp_force) {
472 struct controller *ctrl = get_service_data(dev); 489 struct controller *ctrl = get_service_data(dev);
473 struct slot *t_slot; 490 struct slot *t_slot;
@@ -497,10 +514,9 @@ static struct pcie_port_service_id port_pci_ids[] = { {
497 .driver_data = 0, 514 .driver_data = 0,
498 }, { /* end: all zeroes */ } 515 }, { /* end: all zeroes */ }
499}; 516};
500static const char device_name[] = "hpdriver";
501 517
502static struct pcie_port_service_driver hpdriver_portdrv = { 518static struct pcie_port_service_driver hpdriver_portdrv = {
503 .name = (char *)device_name, 519 .name = PCIE_MODULE_NAME,
504 .id_table = &port_pci_ids[0], 520 .id_table = &port_pci_ids[0],
505 521
506 .probe = pciehp_probe, 522 .probe = pciehp_probe,
diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
index 96a5d55a4983..acb7f9efd182 100644
--- a/drivers/pci/hotplug/pciehp_ctrl.c
+++ b/drivers/pci/hotplug/pciehp_ctrl.c
@@ -58,14 +58,15 @@ static int queue_interrupt_event(struct slot *p_slot, u32 event_type)
58u8 pciehp_handle_attention_button(struct slot *p_slot) 58u8 pciehp_handle_attention_button(struct slot *p_slot)
59{ 59{
60 u32 event_type; 60 u32 event_type;
61 struct controller *ctrl = p_slot->ctrl;
61 62
62 /* Attention Button Change */ 63 /* Attention Button Change */
63 dbg("pciehp: Attention button interrupt received.\n"); 64 ctrl_dbg(ctrl, "Attention button interrupt received.\n");
64 65
65 /* 66 /*
66 * Button pressed - See if need to TAKE ACTION!!! 67 * Button pressed - See if need to TAKE ACTION!!!
67 */ 68 */
68 info("Button pressed on Slot(%s)\n", p_slot->name); 69 ctrl_info(ctrl, "Button pressed on Slot(%s)\n", p_slot->name);
69 event_type = INT_BUTTON_PRESS; 70 event_type = INT_BUTTON_PRESS;
70 71
71 queue_interrupt_event(p_slot, event_type); 72 queue_interrupt_event(p_slot, event_type);
@@ -77,22 +78,23 @@ u8 pciehp_handle_switch_change(struct slot *p_slot)
77{ 78{
78 u8 getstatus; 79 u8 getstatus;
79 u32 event_type; 80 u32 event_type;
81 struct controller *ctrl = p_slot->ctrl;
80 82
81 /* Switch Change */ 83 /* Switch Change */
82 dbg("pciehp: Switch interrupt received.\n"); 84 ctrl_dbg(ctrl, "Switch interrupt received.\n");
83 85
84 p_slot->hpc_ops->get_latch_status(p_slot, &getstatus); 86 p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
85 if (getstatus) { 87 if (getstatus) {
86 /* 88 /*
87 * Switch opened 89 * Switch opened
88 */ 90 */
89 info("Latch open on Slot(%s)\n", p_slot->name); 91 ctrl_info(ctrl, "Latch open on Slot(%s)\n", p_slot->name);
90 event_type = INT_SWITCH_OPEN; 92 event_type = INT_SWITCH_OPEN;
91 } else { 93 } else {
92 /* 94 /*
93 * Switch closed 95 * Switch closed
94 */ 96 */
95 info("Latch close on Slot(%s)\n", p_slot->name); 97 ctrl_info(ctrl, "Latch close on Slot(%s)\n", p_slot->name);
96 event_type = INT_SWITCH_CLOSE; 98 event_type = INT_SWITCH_CLOSE;
97 } 99 }
98 100
@@ -105,9 +107,10 @@ u8 pciehp_handle_presence_change(struct slot *p_slot)
105{ 107{
106 u32 event_type; 108 u32 event_type;
107 u8 presence_save; 109 u8 presence_save;
110 struct controller *ctrl = p_slot->ctrl;
108 111
109 /* Presence Change */ 112 /* Presence Change */
110 dbg("pciehp: Presence/Notify input change.\n"); 113 ctrl_dbg(ctrl, "Presence/Notify input change.\n");
111 114
112 /* Switch is open, assume a presence change 115 /* Switch is open, assume a presence change
113 * Save the presence state 116 * Save the presence state
@@ -117,13 +120,13 @@ u8 pciehp_handle_presence_change(struct slot *p_slot)
117 /* 120 /*
118 * Card Present 121 * Card Present
119 */ 122 */
120 info("Card present on Slot(%s)\n", p_slot->name); 123 ctrl_info(ctrl, "Card present on Slot(%s)\n", p_slot->name);
121 event_type = INT_PRESENCE_ON; 124 event_type = INT_PRESENCE_ON;
122 } else { 125 } else {
123 /* 126 /*
124 * Not Present 127 * Not Present
125 */ 128 */
126 info("Card not present on Slot(%s)\n", p_slot->name); 129 ctrl_info(ctrl, "Card not present on Slot(%s)\n", p_slot->name);
127 event_type = INT_PRESENCE_OFF; 130 event_type = INT_PRESENCE_OFF;
128 } 131 }
129 132
@@ -135,23 +138,25 @@ u8 pciehp_handle_presence_change(struct slot *p_slot)
135u8 pciehp_handle_power_fault(struct slot *p_slot) 138u8 pciehp_handle_power_fault(struct slot *p_slot)
136{ 139{
137 u32 event_type; 140 u32 event_type;
141 struct controller *ctrl = p_slot->ctrl;
138 142
139 /* power fault */ 143 /* power fault */
140 dbg("pciehp: Power fault interrupt received.\n"); 144 ctrl_dbg(ctrl, "Power fault interrupt received.\n");
141 145
142 if ( !(p_slot->hpc_ops->query_power_fault(p_slot))) { 146 if ( !(p_slot->hpc_ops->query_power_fault(p_slot))) {
143 /* 147 /*
144 * power fault Cleared 148 * power fault Cleared
145 */ 149 */
146 info("Power fault cleared on Slot(%s)\n", p_slot->name); 150 ctrl_info(ctrl, "Power fault cleared on Slot(%s)\n",
151 p_slot->name);
147 event_type = INT_POWER_FAULT_CLEAR; 152 event_type = INT_POWER_FAULT_CLEAR;
148 } else { 153 } else {
149 /* 154 /*
150 * power fault 155 * power fault
151 */ 156 */
152 info("Power fault on Slot(%s)\n", p_slot->name); 157 ctrl_info(ctrl, "Power fault on Slot(%s)\n", p_slot->name);
153 event_type = INT_POWER_FAULT; 158 event_type = INT_POWER_FAULT;
154 info("power fault bit %x set\n", 0); 159 ctrl_info(ctrl, "power fault bit %x set\n", 0);
155 } 160 }
156 161
157 queue_interrupt_event(p_slot, event_type); 162 queue_interrupt_event(p_slot, event_type);
@@ -168,8 +173,9 @@ static void set_slot_off(struct controller *ctrl, struct slot * pslot)
168 /* turn off slot, turn on Amber LED, turn off Green LED if supported*/ 173 /* turn off slot, turn on Amber LED, turn off Green LED if supported*/
169 if (POWER_CTRL(ctrl)) { 174 if (POWER_CTRL(ctrl)) {
170 if (pslot->hpc_ops->power_off_slot(pslot)) { 175 if (pslot->hpc_ops->power_off_slot(pslot)) {
171 err("%s: Issue of Slot Power Off command failed\n", 176 ctrl_err(ctrl,
172 __func__); 177 "%s: Issue of Slot Power Off command failed\n",
178 __func__);
173 return; 179 return;
174 } 180 }
175 } 181 }
@@ -186,8 +192,8 @@ static void set_slot_off(struct controller *ctrl, struct slot * pslot)
186 192
187 if (ATTN_LED(ctrl)) { 193 if (ATTN_LED(ctrl)) {
188 if (pslot->hpc_ops->set_attention_status(pslot, 1)) { 194 if (pslot->hpc_ops->set_attention_status(pslot, 1)) {
189 err("%s: Issue of Set Attention Led command failed\n", 195 ctrl_err(ctrl, "%s: Issue of Set Attention "
190 __func__); 196 "Led command failed\n", __func__);
191 return; 197 return;
192 } 198 }
193 } 199 }
@@ -205,9 +211,9 @@ static int board_added(struct slot *p_slot)
205 int retval = 0; 211 int retval = 0;
206 struct controller *ctrl = p_slot->ctrl; 212 struct controller *ctrl = p_slot->ctrl;
207 213
208 dbg("%s: slot device, slot offset, hp slot = %d, %d ,%d\n", 214 ctrl_dbg(ctrl, "%s: slot device, slot offset, hp slot = %d, %d ,%d\n",
209 __func__, p_slot->device, 215 __func__, p_slot->device, ctrl->slot_device_offset,
210 ctrl->slot_device_offset, p_slot->hp_slot); 216 p_slot->hp_slot);
211 217
212 if (POWER_CTRL(ctrl)) { 218 if (POWER_CTRL(ctrl)) {
213 /* Power on slot */ 219 /* Power on slot */
@@ -225,22 +231,22 @@ static int board_added(struct slot *p_slot)
225 /* Check link training status */ 231 /* Check link training status */
226 retval = p_slot->hpc_ops->check_lnk_status(ctrl); 232 retval = p_slot->hpc_ops->check_lnk_status(ctrl);
227 if (retval) { 233 if (retval) {
228 err("%s: Failed to check link status\n", __func__); 234 ctrl_err(ctrl, "%s: Failed to check link status\n", __func__);
229 set_slot_off(ctrl, p_slot); 235 set_slot_off(ctrl, p_slot);
230 return retval; 236 return retval;
231 } 237 }
232 238
233 /* Check for a power fault */ 239 /* Check for a power fault */
234 if (p_slot->hpc_ops->query_power_fault(p_slot)) { 240 if (p_slot->hpc_ops->query_power_fault(p_slot)) {
235 dbg("%s: power fault detected\n", __func__); 241 ctrl_dbg(ctrl, "%s: power fault detected\n", __func__);
236 retval = POWER_FAILURE; 242 retval = POWER_FAILURE;
237 goto err_exit; 243 goto err_exit;
238 } 244 }
239 245
240 retval = pciehp_configure_device(p_slot); 246 retval = pciehp_configure_device(p_slot);
241 if (retval) { 247 if (retval) {
242 err("Cannot add device 0x%x:%x\n", p_slot->bus, 248 ctrl_err(ctrl, "Cannot add device 0x%x:%x\n",
243 p_slot->device); 249 p_slot->bus, p_slot->device);
244 goto err_exit; 250 goto err_exit;
245 } 251 }
246 252
@@ -272,14 +278,14 @@ static int remove_board(struct slot *p_slot)
272 if (retval) 278 if (retval)
273 return retval; 279 return retval;
274 280
275 dbg("In %s, hp_slot = %d\n", __func__, p_slot->hp_slot); 281 ctrl_dbg(ctrl, "In %s, hp_slot = %d\n", __func__, p_slot->hp_slot);
276 282
277 if (POWER_CTRL(ctrl)) { 283 if (POWER_CTRL(ctrl)) {
278 /* power off slot */ 284 /* power off slot */
279 retval = p_slot->hpc_ops->power_off_slot(p_slot); 285 retval = p_slot->hpc_ops->power_off_slot(p_slot);
280 if (retval) { 286 if (retval) {
281 err("%s: Issue of Slot Disable command failed\n", 287 ctrl_err(ctrl, "%s: Issue of Slot Disable command "
282 __func__); 288 "failed\n", __func__);
283 return retval; 289 return retval;
284 } 290 }
285 } 291 }
@@ -320,8 +326,8 @@ static void pciehp_power_thread(struct work_struct *work)
320 switch (p_slot->state) { 326 switch (p_slot->state) {
321 case POWEROFF_STATE: 327 case POWEROFF_STATE:
322 mutex_unlock(&p_slot->lock); 328 mutex_unlock(&p_slot->lock);
323 dbg("%s: disabling bus:device(%x:%x)\n", 329 ctrl_dbg(p_slot->ctrl, "%s: disabling bus:device(%x:%x)\n",
324 __func__, p_slot->bus, p_slot->device); 330 __func__, p_slot->bus, p_slot->device);
325 pciehp_disable_slot(p_slot); 331 pciehp_disable_slot(p_slot);
326 mutex_lock(&p_slot->lock); 332 mutex_lock(&p_slot->lock);
327 p_slot->state = STATIC_STATE; 333 p_slot->state = STATIC_STATE;
@@ -349,7 +355,8 @@ void pciehp_queue_pushbutton_work(struct work_struct *work)
349 355
350 info = kmalloc(sizeof(*info), GFP_KERNEL); 356 info = kmalloc(sizeof(*info), GFP_KERNEL);
351 if (!info) { 357 if (!info) {
352 err("%s: Cannot allocate memory\n", __func__); 358 ctrl_err(p_slot->ctrl, "%s: Cannot allocate memory\n",
359 __func__);
353 return; 360 return;
354 } 361 }
355 info->p_slot = p_slot; 362 info->p_slot = p_slot;
@@ -403,12 +410,14 @@ static void handle_button_press_event(struct slot *p_slot)
403 p_slot->hpc_ops->get_power_status(p_slot, &getstatus); 410 p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
404 if (getstatus) { 411 if (getstatus) {
405 p_slot->state = BLINKINGOFF_STATE; 412 p_slot->state = BLINKINGOFF_STATE;
406 info("PCI slot #%s - powering off due to button " 413 ctrl_info(ctrl,
407 "press.\n", p_slot->name); 414 "PCI slot #%s - powering off due to button "
415 "press.\n", p_slot->name);
408 } else { 416 } else {
409 p_slot->state = BLINKINGON_STATE; 417 p_slot->state = BLINKINGON_STATE;
410 info("PCI slot #%s - powering on due to button " 418 ctrl_info(ctrl,
411 "press.\n", p_slot->name); 419 "PCI slot #%s - powering on due to button "
420 "press.\n", p_slot->name);
412 } 421 }
413 /* blink green LED and turn off amber */ 422 /* blink green LED and turn off amber */
414 if (PWR_LED(ctrl)) 423 if (PWR_LED(ctrl))
@@ -425,8 +434,8 @@ static void handle_button_press_event(struct slot *p_slot)
425 * press the attention again before the 5 sec. limit 434 * press the attention again before the 5 sec. limit
426 * expires to cancel hot-add or hot-remove 435 * expires to cancel hot-add or hot-remove
427 */ 436 */
428 info("Button cancel on Slot(%s)\n", p_slot->name); 437 ctrl_info(ctrl, "Button cancel on Slot(%s)\n", p_slot->name);
429 dbg("%s: button cancel\n", __func__); 438 ctrl_dbg(ctrl, "%s: button cancel\n", __func__);
430 cancel_delayed_work(&p_slot->work); 439 cancel_delayed_work(&p_slot->work);
431 if (p_slot->state == BLINKINGOFF_STATE) { 440 if (p_slot->state == BLINKINGOFF_STATE) {
432 if (PWR_LED(ctrl)) 441 if (PWR_LED(ctrl))
@@ -437,8 +446,8 @@ static void handle_button_press_event(struct slot *p_slot)
437 } 446 }
438 if (ATTN_LED(ctrl)) 447 if (ATTN_LED(ctrl))
439 p_slot->hpc_ops->set_attention_status(p_slot, 0); 448 p_slot->hpc_ops->set_attention_status(p_slot, 0);
440 info("PCI slot #%s - action canceled due to button press\n", 449 ctrl_info(ctrl, "PCI slot #%s - action canceled "
441 p_slot->name); 450 "due to button press\n", p_slot->name);
442 p_slot->state = STATIC_STATE; 451 p_slot->state = STATIC_STATE;
443 break; 452 break;
444 case POWEROFF_STATE: 453 case POWEROFF_STATE:
@@ -448,11 +457,11 @@ static void handle_button_press_event(struct slot *p_slot)
448 * this means that the previous attention button action 457 * this means that the previous attention button action
449 * to hot-add or hot-remove is undergoing 458 * to hot-add or hot-remove is undergoing
450 */ 459 */
451 info("Button ignore on Slot(%s)\n", p_slot->name); 460 ctrl_info(ctrl, "Button ignore on Slot(%s)\n", p_slot->name);
452 update_slot_info(p_slot); 461 update_slot_info(p_slot);
453 break; 462 break;
454 default: 463 default:
455 warn("Not a valid state\n"); 464 ctrl_warn(ctrl, "Not a valid state\n");
456 break; 465 break;
457 } 466 }
458} 467}
@@ -467,7 +476,8 @@ static void handle_surprise_event(struct slot *p_slot)
467 476
468 info = kmalloc(sizeof(*info), GFP_KERNEL); 477 info = kmalloc(sizeof(*info), GFP_KERNEL);
469 if (!info) { 478 if (!info) {
470 err("%s: Cannot allocate memory\n", __func__); 479 ctrl_err(p_slot->ctrl, "%s: Cannot allocate memory\n",
480 __func__);
471 return; 481 return;
472 } 482 }
473 info->p_slot = p_slot; 483 info->p_slot = p_slot;
@@ -505,7 +515,7 @@ static void interrupt_event_handler(struct work_struct *work)
505 case INT_PRESENCE_OFF: 515 case INT_PRESENCE_OFF:
506 if (!HP_SUPR_RM(ctrl)) 516 if (!HP_SUPR_RM(ctrl))
507 break; 517 break;
508 dbg("Surprise Removal\n"); 518 ctrl_dbg(ctrl, "Surprise Removal\n");
509 update_slot_info(p_slot); 519 update_slot_info(p_slot);
510 handle_surprise_event(p_slot); 520 handle_surprise_event(p_slot);
511 break; 521 break;
@@ -522,22 +532,23 @@ int pciehp_enable_slot(struct slot *p_slot)
522{ 532{
523 u8 getstatus = 0; 533 u8 getstatus = 0;
524 int rc; 534 int rc;
535 struct controller *ctrl = p_slot->ctrl;
525 536
526 /* Check to see if (latch closed, card present, power off) */ 537 /* Check to see if (latch closed, card present, power off) */
527 mutex_lock(&p_slot->ctrl->crit_sect); 538 mutex_lock(&p_slot->ctrl->crit_sect);
528 539
529 rc = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus); 540 rc = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus);
530 if (rc || !getstatus) { 541 if (rc || !getstatus) {
531 info("%s: no adapter on slot(%s)\n", __func__, 542 ctrl_info(ctrl, "%s: no adapter on slot(%s)\n",
532 p_slot->name); 543 __func__, p_slot->name);
533 mutex_unlock(&p_slot->ctrl->crit_sect); 544 mutex_unlock(&p_slot->ctrl->crit_sect);
534 return -ENODEV; 545 return -ENODEV;
535 } 546 }
536 if (MRL_SENS(p_slot->ctrl)) { 547 if (MRL_SENS(p_slot->ctrl)) {
537 rc = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus); 548 rc = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
538 if (rc || getstatus) { 549 if (rc || getstatus) {
539 info("%s: latch open on slot(%s)\n", __func__, 550 ctrl_info(ctrl, "%s: latch open on slot(%s)\n",
540 p_slot->name); 551 __func__, p_slot->name);
541 mutex_unlock(&p_slot->ctrl->crit_sect); 552 mutex_unlock(&p_slot->ctrl->crit_sect);
542 return -ENODEV; 553 return -ENODEV;
543 } 554 }
@@ -546,8 +557,8 @@ int pciehp_enable_slot(struct slot *p_slot)
546 if (POWER_CTRL(p_slot->ctrl)) { 557 if (POWER_CTRL(p_slot->ctrl)) {
547 rc = p_slot->hpc_ops->get_power_status(p_slot, &getstatus); 558 rc = p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
548 if (rc || getstatus) { 559 if (rc || getstatus) {
549 info("%s: already enabled on slot(%s)\n", __func__, 560 ctrl_info(ctrl, "%s: already enabled on slot(%s)\n",
550 p_slot->name); 561 __func__, p_slot->name);
551 mutex_unlock(&p_slot->ctrl->crit_sect); 562 mutex_unlock(&p_slot->ctrl->crit_sect);
552 return -EINVAL; 563 return -EINVAL;
553 } 564 }
@@ -571,6 +582,7 @@ int pciehp_disable_slot(struct slot *p_slot)
571{ 582{
572 u8 getstatus = 0; 583 u8 getstatus = 0;
573 int ret = 0; 584 int ret = 0;
585 struct controller *ctrl = p_slot->ctrl;
574 586
575 if (!p_slot->ctrl) 587 if (!p_slot->ctrl)
576 return 1; 588 return 1;
@@ -581,8 +593,8 @@ int pciehp_disable_slot(struct slot *p_slot)
581 if (!HP_SUPR_RM(p_slot->ctrl)) { 593 if (!HP_SUPR_RM(p_slot->ctrl)) {
582 ret = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus); 594 ret = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus);
583 if (ret || !getstatus) { 595 if (ret || !getstatus) {
584 info("%s: no adapter on slot(%s)\n", __func__, 596 ctrl_info(ctrl, "%s: no adapter on slot(%s)\n",
585 p_slot->name); 597 __func__, p_slot->name);
586 mutex_unlock(&p_slot->ctrl->crit_sect); 598 mutex_unlock(&p_slot->ctrl->crit_sect);
587 return -ENODEV; 599 return -ENODEV;
588 } 600 }
@@ -591,8 +603,8 @@ int pciehp_disable_slot(struct slot *p_slot)
591 if (MRL_SENS(p_slot->ctrl)) { 603 if (MRL_SENS(p_slot->ctrl)) {
592 ret = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus); 604 ret = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
593 if (ret || getstatus) { 605 if (ret || getstatus) {
594 info("%s: latch open on slot(%s)\n", __func__, 606 ctrl_info(ctrl, "%s: latch open on slot(%s)\n",
595 p_slot->name); 607 __func__, p_slot->name);
596 mutex_unlock(&p_slot->ctrl->crit_sect); 608 mutex_unlock(&p_slot->ctrl->crit_sect);
597 return -ENODEV; 609 return -ENODEV;
598 } 610 }
@@ -601,8 +613,8 @@ int pciehp_disable_slot(struct slot *p_slot)
601 if (POWER_CTRL(p_slot->ctrl)) { 613 if (POWER_CTRL(p_slot->ctrl)) {
602 ret = p_slot->hpc_ops->get_power_status(p_slot, &getstatus); 614 ret = p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
603 if (ret || !getstatus) { 615 if (ret || !getstatus) {
604 info("%s: already disabled slot(%s)\n", __func__, 616 ctrl_info(ctrl, "%s: already disabled slot(%s)\n",
605 p_slot->name); 617 __func__, p_slot->name);
606 mutex_unlock(&p_slot->ctrl->crit_sect); 618 mutex_unlock(&p_slot->ctrl->crit_sect);
607 return -EINVAL; 619 return -EINVAL;
608 } 620 }
@@ -618,6 +630,7 @@ int pciehp_disable_slot(struct slot *p_slot)
618int pciehp_sysfs_enable_slot(struct slot *p_slot) 630int pciehp_sysfs_enable_slot(struct slot *p_slot)
619{ 631{
620 int retval = -ENODEV; 632 int retval = -ENODEV;
633 struct controller *ctrl = p_slot->ctrl;
621 634
622 mutex_lock(&p_slot->lock); 635 mutex_lock(&p_slot->lock);
623 switch (p_slot->state) { 636 switch (p_slot->state) {
@@ -631,15 +644,15 @@ int pciehp_sysfs_enable_slot(struct slot *p_slot)
631 p_slot->state = STATIC_STATE; 644 p_slot->state = STATIC_STATE;
632 break; 645 break;
633 case POWERON_STATE: 646 case POWERON_STATE:
634 info("Slot %s is already in powering on state\n", 647 ctrl_info(ctrl, "Slot %s is already in powering on state\n",
635 p_slot->name); 648 p_slot->name);
636 break; 649 break;
637 case BLINKINGOFF_STATE: 650 case BLINKINGOFF_STATE:
638 case POWEROFF_STATE: 651 case POWEROFF_STATE:
639 info("Already enabled on slot %s\n", p_slot->name); 652 ctrl_info(ctrl, "Already enabled on slot %s\n", p_slot->name);
640 break; 653 break;
641 default: 654 default:
642 err("Not a valid state on slot %s\n", p_slot->name); 655 ctrl_err(ctrl, "Not a valid state on slot %s\n", p_slot->name);
643 break; 656 break;
644 } 657 }
645 mutex_unlock(&p_slot->lock); 658 mutex_unlock(&p_slot->lock);
@@ -650,6 +663,7 @@ int pciehp_sysfs_enable_slot(struct slot *p_slot)
650int pciehp_sysfs_disable_slot(struct slot *p_slot) 663int pciehp_sysfs_disable_slot(struct slot *p_slot)
651{ 664{
652 int retval = -ENODEV; 665 int retval = -ENODEV;
666 struct controller *ctrl = p_slot->ctrl;
653 667
654 mutex_lock(&p_slot->lock); 668 mutex_lock(&p_slot->lock);
655 switch (p_slot->state) { 669 switch (p_slot->state) {
@@ -663,15 +677,15 @@ int pciehp_sysfs_disable_slot(struct slot *p_slot)
663 p_slot->state = STATIC_STATE; 677 p_slot->state = STATIC_STATE;
664 break; 678 break;
665 case POWEROFF_STATE: 679 case POWEROFF_STATE:
666 info("Slot %s is already in powering off state\n", 680 ctrl_info(ctrl, "Slot %s is already in powering off state\n",
667 p_slot->name); 681 p_slot->name);
668 break; 682 break;
669 case BLINKINGON_STATE: 683 case BLINKINGON_STATE:
670 case POWERON_STATE: 684 case POWERON_STATE:
671 info("Already disabled on slot %s\n", p_slot->name); 685 ctrl_info(ctrl, "Already disabled on slot %s\n", p_slot->name);
672 break; 686 break;
673 default: 687 default:
674 err("Not a valid state on slot %s\n", p_slot->name); 688 ctrl_err(ctrl, "Not a valid state on slot %s\n", p_slot->name);
675 break; 689 break;
676 } 690 }
677 mutex_unlock(&p_slot->lock); 691 mutex_unlock(&p_slot->lock);
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 9d934ddee956..8e9530c4c36d 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -223,7 +223,7 @@ static void start_int_poll_timer(struct controller *ctrl, int sec)
223 223
224static inline int pciehp_request_irq(struct controller *ctrl) 224static inline int pciehp_request_irq(struct controller *ctrl)
225{ 225{
226 int retval, irq = ctrl->pci_dev->irq; 226 int retval, irq = ctrl->pcie->irq;
227 227
228 /* Install interrupt polling timer. Start with 10 sec delay */ 228 /* Install interrupt polling timer. Start with 10 sec delay */
229 if (pciehp_poll_mode) { 229 if (pciehp_poll_mode) {
@@ -235,7 +235,8 @@ static inline int pciehp_request_irq(struct controller *ctrl)
235 /* Installs the interrupt handler */ 235 /* Installs the interrupt handler */
236 retval = request_irq(irq, pcie_isr, IRQF_SHARED, MY_NAME, ctrl); 236 retval = request_irq(irq, pcie_isr, IRQF_SHARED, MY_NAME, ctrl);
237 if (retval) 237 if (retval)
238 err("Cannot get irq %d for the hotplug controller\n", irq); 238 ctrl_err(ctrl, "Cannot get irq %d for the hotplug controller\n",
239 irq);
239 return retval; 240 return retval;
240} 241}
241 242
@@ -244,7 +245,7 @@ static inline void pciehp_free_irq(struct controller *ctrl)
244 if (pciehp_poll_mode) 245 if (pciehp_poll_mode)
245 del_timer_sync(&ctrl->poll_timer); 246 del_timer_sync(&ctrl->poll_timer);
246 else 247 else
247 free_irq(ctrl->pci_dev->irq, ctrl); 248 free_irq(ctrl->pcie->irq, ctrl);
248} 249}
249 250
250static int pcie_poll_cmd(struct controller *ctrl) 251static int pcie_poll_cmd(struct controller *ctrl)
@@ -282,7 +283,7 @@ static void pcie_wait_cmd(struct controller *ctrl, int poll)
282 else 283 else
283 rc = wait_event_timeout(ctrl->queue, !ctrl->cmd_busy, timeout); 284 rc = wait_event_timeout(ctrl->queue, !ctrl->cmd_busy, timeout);
284 if (!rc) 285 if (!rc)
285 dbg("Command not completed in 1000 msec\n"); 286 ctrl_dbg(ctrl, "Command not completed in 1000 msec\n");
286} 287}
287 288
288/** 289/**
@@ -301,7 +302,8 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
301 302
302 retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status); 303 retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
303 if (retval) { 304 if (retval) {
304 err("%s: Cannot read SLOTSTATUS register\n", __func__); 305 ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n",
306 __func__);
305 goto out; 307 goto out;
306 } 308 }
307 309
@@ -312,26 +314,28 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
312 * proceed forward to issue the next command according 314 * proceed forward to issue the next command according
313 * to spec. Just print out the error message. 315 * to spec. Just print out the error message.
314 */ 316 */
315 dbg("%s: CMD_COMPLETED not clear after 1 sec.\n", 317 ctrl_dbg(ctrl,
316 __func__); 318 "%s: CMD_COMPLETED not clear after 1 sec.\n",
319 __func__);
317 } else if (!NO_CMD_CMPL(ctrl)) { 320 } else if (!NO_CMD_CMPL(ctrl)) {
318 /* 321 /*
319 * This controller semms to notify of command completed 322 * This controller semms to notify of command completed
320 * event even though it supports none of power 323 * event even though it supports none of power
321 * controller, attention led, power led and EMI. 324 * controller, attention led, power led and EMI.
322 */ 325 */
323 dbg("%s: Unexpected CMD_COMPLETED. Need to wait for " 326 ctrl_dbg(ctrl, "%s: Unexpected CMD_COMPLETED. Need to "
324 "command completed event.\n", __func__); 327 "wait for command completed event.\n",
328 __func__);
325 ctrl->no_cmd_complete = 0; 329 ctrl->no_cmd_complete = 0;
326 } else { 330 } else {
327 dbg("%s: Unexpected CMD_COMPLETED. Maybe the " 331 ctrl_dbg(ctrl, "%s: Unexpected CMD_COMPLETED. Maybe "
328 "controller is broken.\n", __func__); 332 "the controller is broken.\n", __func__);
329 } 333 }
330 } 334 }
331 335
332 retval = pciehp_readw(ctrl, SLOTCTRL, &slot_ctrl); 336 retval = pciehp_readw(ctrl, SLOTCTRL, &slot_ctrl);
333 if (retval) { 337 if (retval) {
334 err("%s: Cannot read SLOTCTRL register\n", __func__); 338 ctrl_err(ctrl, "%s: Cannot read SLOTCTRL register\n", __func__);
335 goto out; 339 goto out;
336 } 340 }
337 341
@@ -341,7 +345,8 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
341 smp_mb(); 345 smp_mb();
342 retval = pciehp_writew(ctrl, SLOTCTRL, slot_ctrl); 346 retval = pciehp_writew(ctrl, SLOTCTRL, slot_ctrl);
343 if (retval) 347 if (retval)
344 err("%s: Cannot write to SLOTCTRL register\n", __func__); 348 ctrl_err(ctrl, "%s: Cannot write to SLOTCTRL register\n",
349 __func__);
345 350
346 /* 351 /*
347 * Wait for command completion. 352 * Wait for command completion.
@@ -370,14 +375,15 @@ static int hpc_check_lnk_status(struct controller *ctrl)
370 375
371 retval = pciehp_readw(ctrl, LNKSTATUS, &lnk_status); 376 retval = pciehp_readw(ctrl, LNKSTATUS, &lnk_status);
372 if (retval) { 377 if (retval) {
373 err("%s: Cannot read LNKSTATUS register\n", __func__); 378 ctrl_err(ctrl, "%s: Cannot read LNKSTATUS register\n",
379 __func__);
374 return retval; 380 return retval;
375 } 381 }
376 382
377 dbg("%s: lnk_status = %x\n", __func__, lnk_status); 383 ctrl_dbg(ctrl, "%s: lnk_status = %x\n", __func__, lnk_status);
378 if ( (lnk_status & LNK_TRN) || (lnk_status & LNK_TRN_ERR) || 384 if ( (lnk_status & LNK_TRN) || (lnk_status & LNK_TRN_ERR) ||
379 !(lnk_status & NEG_LINK_WD)) { 385 !(lnk_status & NEG_LINK_WD)) {
380 err("%s : Link Training Error occurs \n", __func__); 386 ctrl_err(ctrl, "%s : Link Training Error occurs \n", __func__);
381 retval = -1; 387 retval = -1;
382 return retval; 388 return retval;
383 } 389 }
@@ -394,12 +400,12 @@ static int hpc_get_attention_status(struct slot *slot, u8 *status)
394 400
395 retval = pciehp_readw(ctrl, SLOTCTRL, &slot_ctrl); 401 retval = pciehp_readw(ctrl, SLOTCTRL, &slot_ctrl);
396 if (retval) { 402 if (retval) {
397 err("%s: Cannot read SLOTCTRL register\n", __func__); 403 ctrl_err(ctrl, "%s: Cannot read SLOTCTRL register\n", __func__);
398 return retval; 404 return retval;
399 } 405 }
400 406
401 dbg("%s: SLOTCTRL %x, value read %x\n", 407 ctrl_dbg(ctrl, "%s: SLOTCTRL %x, value read %x\n",
402 __func__, ctrl->cap_base + SLOTCTRL, slot_ctrl); 408 __func__, ctrl->cap_base + SLOTCTRL, slot_ctrl);
403 409
404 atten_led_state = (slot_ctrl & ATTN_LED_CTRL) >> 6; 410 atten_led_state = (slot_ctrl & ATTN_LED_CTRL) >> 6;
405 411
@@ -433,11 +439,11 @@ static int hpc_get_power_status(struct slot *slot, u8 *status)
433 439
434 retval = pciehp_readw(ctrl, SLOTCTRL, &slot_ctrl); 440 retval = pciehp_readw(ctrl, SLOTCTRL, &slot_ctrl);
435 if (retval) { 441 if (retval) {
436 err("%s: Cannot read SLOTCTRL register\n", __func__); 442 ctrl_err(ctrl, "%s: Cannot read SLOTCTRL register\n", __func__);
437 return retval; 443 return retval;
438 } 444 }
439 dbg("%s: SLOTCTRL %x value read %x\n", 445 ctrl_dbg(ctrl, "%s: SLOTCTRL %x value read %x\n",
440 __func__, ctrl->cap_base + SLOTCTRL, slot_ctrl); 446 __func__, ctrl->cap_base + SLOTCTRL, slot_ctrl);
441 447
442 pwr_state = (slot_ctrl & PWR_CTRL) >> 10; 448 pwr_state = (slot_ctrl & PWR_CTRL) >> 10;
443 449
@@ -464,7 +470,8 @@ static int hpc_get_latch_status(struct slot *slot, u8 *status)
464 470
465 retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status); 471 retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
466 if (retval) { 472 if (retval) {
467 err("%s: Cannot read SLOTSTATUS register\n", __func__); 473 ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n",
474 __func__);
468 return retval; 475 return retval;
469 } 476 }
470 477
@@ -482,7 +489,8 @@ static int hpc_get_adapter_status(struct slot *slot, u8 *status)
482 489
483 retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status); 490 retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
484 if (retval) { 491 if (retval) {
485 err("%s: Cannot read SLOTSTATUS register\n", __func__); 492 ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n",
493 __func__);
486 return retval; 494 return retval;
487 } 495 }
488 card_state = (u8)((slot_status & PRSN_STATE) >> 6); 496 card_state = (u8)((slot_status & PRSN_STATE) >> 6);
@@ -500,7 +508,7 @@ static int hpc_query_power_fault(struct slot *slot)
500 508
501 retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status); 509 retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
502 if (retval) { 510 if (retval) {
503 err("%s: Cannot check for power fault\n", __func__); 511 ctrl_err(ctrl, "%s: Cannot check for power fault\n", __func__);
504 return retval; 512 return retval;
505 } 513 }
506 pwr_fault = (u8)((slot_status & PWR_FAULT_DETECTED) >> 1); 514 pwr_fault = (u8)((slot_status & PWR_FAULT_DETECTED) >> 1);
@@ -516,7 +524,7 @@ static int hpc_get_emi_status(struct slot *slot, u8 *status)
516 524
517 retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status); 525 retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
518 if (retval) { 526 if (retval) {
519 err("%s : Cannot check EMI status\n", __func__); 527 ctrl_err(ctrl, "%s : Cannot check EMI status\n", __func__);
520 return retval; 528 return retval;
521 } 529 }
522 *status = (slot_status & EMI_STATE) >> EMI_STATUS_BIT; 530 *status = (slot_status & EMI_STATE) >> EMI_STATUS_BIT;
@@ -560,8 +568,8 @@ static int hpc_set_attention_status(struct slot *slot, u8 value)
560 return -1; 568 return -1;
561 } 569 }
562 rc = pcie_write_cmd(ctrl, slot_cmd, cmd_mask); 570 rc = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
563 dbg("%s: SLOTCTRL %x write cmd %x\n", 571 ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
564 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd); 572 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
565 573
566 return rc; 574 return rc;
567} 575}
@@ -575,8 +583,8 @@ static void hpc_set_green_led_on(struct slot *slot)
575 slot_cmd = 0x0100; 583 slot_cmd = 0x0100;
576 cmd_mask = PWR_LED_CTRL; 584 cmd_mask = PWR_LED_CTRL;
577 pcie_write_cmd(ctrl, slot_cmd, cmd_mask); 585 pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
578 dbg("%s: SLOTCTRL %x write cmd %x\n", 586 ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
579 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd); 587 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
580} 588}
581 589
582static void hpc_set_green_led_off(struct slot *slot) 590static void hpc_set_green_led_off(struct slot *slot)
@@ -588,8 +596,8 @@ static void hpc_set_green_led_off(struct slot *slot)
588 slot_cmd = 0x0300; 596 slot_cmd = 0x0300;
589 cmd_mask = PWR_LED_CTRL; 597 cmd_mask = PWR_LED_CTRL;
590 pcie_write_cmd(ctrl, slot_cmd, cmd_mask); 598 pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
591 dbg("%s: SLOTCTRL %x write cmd %x\n", 599 ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
592 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd); 600 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
593} 601}
594 602
595static void hpc_set_green_led_blink(struct slot *slot) 603static void hpc_set_green_led_blink(struct slot *slot)
@@ -601,8 +609,8 @@ static void hpc_set_green_led_blink(struct slot *slot)
601 slot_cmd = 0x0200; 609 slot_cmd = 0x0200;
602 cmd_mask = PWR_LED_CTRL; 610 cmd_mask = PWR_LED_CTRL;
603 pcie_write_cmd(ctrl, slot_cmd, cmd_mask); 611 pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
604 dbg("%s: SLOTCTRL %x write cmd %x\n", 612 ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
605 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd); 613 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
606} 614}
607 615
608static int hpc_power_on_slot(struct slot * slot) 616static int hpc_power_on_slot(struct slot * slot)
@@ -613,20 +621,22 @@ static int hpc_power_on_slot(struct slot * slot)
613 u16 slot_status; 621 u16 slot_status;
614 int retval = 0; 622 int retval = 0;
615 623
616 dbg("%s: slot->hp_slot %x\n", __func__, slot->hp_slot); 624 ctrl_dbg(ctrl, "%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
617 625
618 /* Clear sticky power-fault bit from previous power failures */ 626 /* Clear sticky power-fault bit from previous power failures */
619 retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status); 627 retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
620 if (retval) { 628 if (retval) {
621 err("%s: Cannot read SLOTSTATUS register\n", __func__); 629 ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n",
630 __func__);
622 return retval; 631 return retval;
623 } 632 }
624 slot_status &= PWR_FAULT_DETECTED; 633 slot_status &= PWR_FAULT_DETECTED;
625 if (slot_status) { 634 if (slot_status) {
626 retval = pciehp_writew(ctrl, SLOTSTATUS, slot_status); 635 retval = pciehp_writew(ctrl, SLOTSTATUS, slot_status);
627 if (retval) { 636 if (retval) {
628 err("%s: Cannot write to SLOTSTATUS register\n", 637 ctrl_err(ctrl,
629 __func__); 638 "%s: Cannot write to SLOTSTATUS register\n",
639 __func__);
630 return retval; 640 return retval;
631 } 641 }
632 } 642 }
@@ -644,11 +654,12 @@ static int hpc_power_on_slot(struct slot * slot)
644 retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask); 654 retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
645 655
646 if (retval) { 656 if (retval) {
647 err("%s: Write %x command failed!\n", __func__, slot_cmd); 657 ctrl_err(ctrl, "%s: Write %x command failed!\n",
658 __func__, slot_cmd);
648 return -1; 659 return -1;
649 } 660 }
650 dbg("%s: SLOTCTRL %x write cmd %x\n", 661 ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
651 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd); 662 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
652 663
653 return retval; 664 return retval;
654} 665}
@@ -694,7 +705,7 @@ static int hpc_power_off_slot(struct slot * slot)
694 int retval = 0; 705 int retval = 0;
695 int changed; 706 int changed;
696 707
697 dbg("%s: slot->hp_slot %x\n", __func__, slot->hp_slot); 708 ctrl_dbg(ctrl, "%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
698 709
699 /* 710 /*
700 * Set Bad DLLP Mask bit in Correctable Error Mask 711 * Set Bad DLLP Mask bit in Correctable Error Mask
@@ -722,12 +733,12 @@ static int hpc_power_off_slot(struct slot * slot)
722 733
723 retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask); 734 retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
724 if (retval) { 735 if (retval) {
725 err("%s: Write command failed!\n", __func__); 736 ctrl_err(ctrl, "%s: Write command failed!\n", __func__);
726 retval = -1; 737 retval = -1;
727 goto out; 738 goto out;
728 } 739 }
729 dbg("%s: SLOTCTRL %x write cmd %x\n", 740 ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
730 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd); 741 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
731 out: 742 out:
732 if (changed) 743 if (changed)
733 pcie_unmask_bad_dllp(ctrl); 744 pcie_unmask_bad_dllp(ctrl);
@@ -749,7 +760,8 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
749 intr_loc = 0; 760 intr_loc = 0;
750 do { 761 do {
751 if (pciehp_readw(ctrl, SLOTSTATUS, &detected)) { 762 if (pciehp_readw(ctrl, SLOTSTATUS, &detected)) {
752 err("%s: Cannot read SLOTSTATUS\n", __func__); 763 ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS\n",
764 __func__);
753 return IRQ_NONE; 765 return IRQ_NONE;
754 } 766 }
755 767
@@ -760,12 +772,13 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
760 if (!intr_loc) 772 if (!intr_loc)
761 return IRQ_NONE; 773 return IRQ_NONE;
762 if (detected && pciehp_writew(ctrl, SLOTSTATUS, detected)) { 774 if (detected && pciehp_writew(ctrl, SLOTSTATUS, detected)) {
763 err("%s: Cannot write to SLOTSTATUS\n", __func__); 775 ctrl_err(ctrl, "%s: Cannot write to SLOTSTATUS\n",
776 __func__);
764 return IRQ_NONE; 777 return IRQ_NONE;
765 } 778 }
766 } while (detected); 779 } while (detected);
767 780
768 dbg("%s: intr_loc %x\n", __FUNCTION__, intr_loc); 781 ctrl_dbg(ctrl, "%s: intr_loc %x\n", __func__, intr_loc);
769 782
770 /* Check Command Complete Interrupt Pending */ 783 /* Check Command Complete Interrupt Pending */
771 if (intr_loc & CMD_COMPLETED) { 784 if (intr_loc & CMD_COMPLETED) {
@@ -807,7 +820,7 @@ static int hpc_get_max_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
807 820
808 retval = pciehp_readl(ctrl, LNKCAP, &lnk_cap); 821 retval = pciehp_readl(ctrl, LNKCAP, &lnk_cap);
809 if (retval) { 822 if (retval) {
810 err("%s: Cannot read LNKCAP register\n", __func__); 823 ctrl_err(ctrl, "%s: Cannot read LNKCAP register\n", __func__);
811 return retval; 824 return retval;
812 } 825 }
813 826
@@ -821,7 +834,7 @@ static int hpc_get_max_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
821 } 834 }
822 835
823 *value = lnk_speed; 836 *value = lnk_speed;
824 dbg("Max link speed = %d\n", lnk_speed); 837 ctrl_dbg(ctrl, "Max link speed = %d\n", lnk_speed);
825 838
826 return retval; 839 return retval;
827} 840}
@@ -836,7 +849,7 @@ static int hpc_get_max_lnk_width(struct slot *slot,
836 849
837 retval = pciehp_readl(ctrl, LNKCAP, &lnk_cap); 850 retval = pciehp_readl(ctrl, LNKCAP, &lnk_cap);
838 if (retval) { 851 if (retval) {
839 err("%s: Cannot read LNKCAP register\n", __func__); 852 ctrl_err(ctrl, "%s: Cannot read LNKCAP register\n", __func__);
840 return retval; 853 return retval;
841 } 854 }
842 855
@@ -871,7 +884,7 @@ static int hpc_get_max_lnk_width(struct slot *slot,
871 } 884 }
872 885
873 *value = lnk_wdth; 886 *value = lnk_wdth;
874 dbg("Max link width = %d\n", lnk_wdth); 887 ctrl_dbg(ctrl, "Max link width = %d\n", lnk_wdth);
875 888
876 return retval; 889 return retval;
877} 890}
@@ -885,7 +898,8 @@ static int hpc_get_cur_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
885 898
886 retval = pciehp_readw(ctrl, LNKSTATUS, &lnk_status); 899 retval = pciehp_readw(ctrl, LNKSTATUS, &lnk_status);
887 if (retval) { 900 if (retval) {
888 err("%s: Cannot read LNKSTATUS register\n", __func__); 901 ctrl_err(ctrl, "%s: Cannot read LNKSTATUS register\n",
902 __func__);
889 return retval; 903 return retval;
890 } 904 }
891 905
@@ -899,7 +913,7 @@ static int hpc_get_cur_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
899 } 913 }
900 914
901 *value = lnk_speed; 915 *value = lnk_speed;
902 dbg("Current link speed = %d\n", lnk_speed); 916 ctrl_dbg(ctrl, "Current link speed = %d\n", lnk_speed);
903 917
904 return retval; 918 return retval;
905} 919}
@@ -914,7 +928,8 @@ static int hpc_get_cur_lnk_width(struct slot *slot,
914 928
915 retval = pciehp_readw(ctrl, LNKSTATUS, &lnk_status); 929 retval = pciehp_readw(ctrl, LNKSTATUS, &lnk_status);
916 if (retval) { 930 if (retval) {
917 err("%s: Cannot read LNKSTATUS register\n", __func__); 931 ctrl_err(ctrl, "%s: Cannot read LNKSTATUS register\n",
932 __func__);
918 return retval; 933 return retval;
919 } 934 }
920 935
@@ -949,7 +964,7 @@ static int hpc_get_cur_lnk_width(struct slot *slot,
949 } 964 }
950 965
951 *value = lnk_wdth; 966 *value = lnk_wdth;
952 dbg("Current link width = %d\n", lnk_wdth); 967 ctrl_dbg(ctrl, "Current link width = %d\n", lnk_wdth);
953 968
954 return retval; 969 return retval;
955} 970}
@@ -998,7 +1013,8 @@ int pcie_enable_notification(struct controller *ctrl)
998 PWR_FAULT_DETECT_ENABLE | HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE; 1013 PWR_FAULT_DETECT_ENABLE | HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE;
999 1014
1000 if (pcie_write_cmd(ctrl, cmd, mask)) { 1015 if (pcie_write_cmd(ctrl, cmd, mask)) {
1001 err("%s: Cannot enable software notification\n", __func__); 1016 ctrl_err(ctrl, "%s: Cannot enable software notification\n",
1017 __func__);
1002 return -1; 1018 return -1;
1003 } 1019 }
1004 return 0; 1020 return 0;
@@ -1010,7 +1026,8 @@ static void pcie_disable_notification(struct controller *ctrl)
1010 mask = PRSN_DETECT_ENABLE | ATTN_BUTTN_ENABLE | MRL_DETECT_ENABLE | 1026 mask = PRSN_DETECT_ENABLE | ATTN_BUTTN_ENABLE | MRL_DETECT_ENABLE |
1011 PWR_FAULT_DETECT_ENABLE | HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE; 1027 PWR_FAULT_DETECT_ENABLE | HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE;
1012 if (pcie_write_cmd(ctrl, 0, mask)) 1028 if (pcie_write_cmd(ctrl, 0, mask))
1013 warn("%s: Cannot disable software notification\n", __func__); 1029 ctrl_warn(ctrl, "%s: Cannot disable software notification\n",
1030 __func__);
1014} 1031}
1015 1032
1016static int pcie_init_notification(struct controller *ctrl) 1033static int pcie_init_notification(struct controller *ctrl)
@@ -1071,34 +1088,45 @@ static inline void dbg_ctrl(struct controller *ctrl)
1071 if (!pciehp_debug) 1088 if (!pciehp_debug)
1072 return; 1089 return;
1073 1090
1074 dbg("Hotplug Controller:\n"); 1091 ctrl_info(ctrl, "Hotplug Controller:\n");
1075 dbg(" Seg/Bus/Dev/Func/IRQ : %s IRQ %d\n", pci_name(pdev), pdev->irq); 1092 ctrl_info(ctrl, " Seg/Bus/Dev/Func/IRQ : %s IRQ %d\n",
1076 dbg(" Vendor ID : 0x%04x\n", pdev->vendor); 1093 pci_name(pdev), pdev->irq);
1077 dbg(" Device ID : 0x%04x\n", pdev->device); 1094 ctrl_info(ctrl, " Vendor ID : 0x%04x\n", pdev->vendor);
1078 dbg(" Subsystem ID : 0x%04x\n", pdev->subsystem_device); 1095 ctrl_info(ctrl, " Device ID : 0x%04x\n", pdev->device);
1079 dbg(" Subsystem Vendor ID : 0x%04x\n", pdev->subsystem_vendor); 1096 ctrl_info(ctrl, " Subsystem ID : 0x%04x\n",
1080 dbg(" PCIe Cap offset : 0x%02x\n", ctrl->cap_base); 1097 pdev->subsystem_device);
1098 ctrl_info(ctrl, " Subsystem Vendor ID : 0x%04x\n",
1099 pdev->subsystem_vendor);
1100 ctrl_info(ctrl, " PCIe Cap offset : 0x%02x\n", ctrl->cap_base);
1081 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { 1101 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
1082 if (!pci_resource_len(pdev, i)) 1102 if (!pci_resource_len(pdev, i))
1083 continue; 1103 continue;
1084 dbg(" PCI resource [%d] : 0x%llx@0x%llx\n", i, 1104 ctrl_info(ctrl, " PCI resource [%d] : 0x%llx@0x%llx\n",
1085 (unsigned long long)pci_resource_len(pdev, i), 1105 i, (unsigned long long)pci_resource_len(pdev, i),
1086 (unsigned long long)pci_resource_start(pdev, i)); 1106 (unsigned long long)pci_resource_start(pdev, i));
1087 } 1107 }
1088 dbg("Slot Capabilities : 0x%08x\n", ctrl->slot_cap); 1108 ctrl_info(ctrl, "Slot Capabilities : 0x%08x\n", ctrl->slot_cap);
1089 dbg(" Physical Slot Number : %d\n", ctrl->first_slot); 1109 ctrl_info(ctrl, " Physical Slot Number : %d\n", ctrl->first_slot);
1090 dbg(" Attention Button : %3s\n", ATTN_BUTTN(ctrl) ? "yes" : "no"); 1110 ctrl_info(ctrl, " Attention Button : %3s\n",
1091 dbg(" Power Controller : %3s\n", POWER_CTRL(ctrl) ? "yes" : "no"); 1111 ATTN_BUTTN(ctrl) ? "yes" : "no");
1092 dbg(" MRL Sensor : %3s\n", MRL_SENS(ctrl) ? "yes" : "no"); 1112 ctrl_info(ctrl, " Power Controller : %3s\n",
1093 dbg(" Attention Indicator : %3s\n", ATTN_LED(ctrl) ? "yes" : "no"); 1113 POWER_CTRL(ctrl) ? "yes" : "no");
1094 dbg(" Power Indicator : %3s\n", PWR_LED(ctrl) ? "yes" : "no"); 1114 ctrl_info(ctrl, " MRL Sensor : %3s\n",
1095 dbg(" Hot-Plug Surprise : %3s\n", HP_SUPR_RM(ctrl) ? "yes" : "no"); 1115 MRL_SENS(ctrl) ? "yes" : "no");
1096 dbg(" EMI Present : %3s\n", EMI(ctrl) ? "yes" : "no"); 1116 ctrl_info(ctrl, " Attention Indicator : %3s\n",
1097 dbg(" Command Completed : %3s\n", NO_CMD_CMPL(ctrl)? "no" : "yes"); 1117 ATTN_LED(ctrl) ? "yes" : "no");
1118 ctrl_info(ctrl, " Power Indicator : %3s\n",
1119 PWR_LED(ctrl) ? "yes" : "no");
1120 ctrl_info(ctrl, " Hot-Plug Surprise : %3s\n",
1121 HP_SUPR_RM(ctrl) ? "yes" : "no");
1122 ctrl_info(ctrl, " EMI Present : %3s\n",
1123 EMI(ctrl) ? "yes" : "no");
1124 ctrl_info(ctrl, " Command Completed : %3s\n",
1125 NO_CMD_CMPL(ctrl) ? "no" : "yes");
1098 pciehp_readw(ctrl, SLOTSTATUS, &reg16); 1126 pciehp_readw(ctrl, SLOTSTATUS, &reg16);
1099 dbg("Slot Status : 0x%04x\n", reg16); 1127 ctrl_info(ctrl, "Slot Status : 0x%04x\n", reg16);
1100 pciehp_readw(ctrl, SLOTCTRL, &reg16); 1128 pciehp_readw(ctrl, SLOTCTRL, &reg16);
1101 dbg("Slot Control : 0x%04x\n", reg16); 1129 ctrl_info(ctrl, "Slot Control : 0x%04x\n", reg16);
1102} 1130}
1103 1131
1104struct controller *pcie_init(struct pcie_device *dev) 1132struct controller *pcie_init(struct pcie_device *dev)
@@ -1109,19 +1137,21 @@ struct controller *pcie_init(struct pcie_device *dev)
1109 1137
1110 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); 1138 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
1111 if (!ctrl) { 1139 if (!ctrl) {
1112 err("%s : out of memory\n", __func__); 1140 dev_err(&dev->device, "%s : out of memory\n", __func__);
1113 goto abort; 1141 goto abort;
1114 } 1142 }
1115 INIT_LIST_HEAD(&ctrl->slot_list); 1143 INIT_LIST_HEAD(&ctrl->slot_list);
1116 1144
1145 ctrl->pcie = dev;
1117 ctrl->pci_dev = pdev; 1146 ctrl->pci_dev = pdev;
1118 ctrl->cap_base = pci_find_capability(pdev, PCI_CAP_ID_EXP); 1147 ctrl->cap_base = pci_find_capability(pdev, PCI_CAP_ID_EXP);
1119 if (!ctrl->cap_base) { 1148 if (!ctrl->cap_base) {
1120 err("%s: Cannot find PCI Express capability\n", __func__); 1149 ctrl_err(ctrl, "%s: Cannot find PCI Express capability\n",
1150 __func__);
1121 goto abort; 1151 goto abort;
1122 } 1152 }
1123 if (pciehp_readl(ctrl, SLOTCAP, &slot_cap)) { 1153 if (pciehp_readl(ctrl, SLOTCAP, &slot_cap)) {
1124 err("%s: Cannot read SLOTCAP register\n", __func__); 1154 ctrl_err(ctrl, "%s: Cannot read SLOTCAP register\n", __func__);
1125 goto abort; 1155 goto abort;
1126 } 1156 }
1127 1157
@@ -1161,9 +1191,9 @@ struct controller *pcie_init(struct pcie_device *dev)
1161 goto abort_ctrl; 1191 goto abort_ctrl;
1162 } 1192 }
1163 1193
1164 info("HPC vendor_id %x device_id %x ss_vid %x ss_did %x\n", 1194 ctrl_info(ctrl, "HPC vendor_id %x device_id %x ss_vid %x ss_did %x\n",
1165 pdev->vendor, pdev->device, 1195 pdev->vendor, pdev->device, pdev->subsystem_vendor,
1166 pdev->subsystem_vendor, pdev->subsystem_device); 1196 pdev->subsystem_device);
1167 1197
1168 if (pcie_init_slot(ctrl)) 1198 if (pcie_init_slot(ctrl))
1169 goto abort_ctrl; 1199 goto abort_ctrl;
diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c
index 6040dcceb256..ffd11148fbe2 100644
--- a/drivers/pci/hotplug/pciehp_pci.c
+++ b/drivers/pci/hotplug/pciehp_pci.c
@@ -198,18 +198,20 @@ int pciehp_configure_device(struct slot *p_slot)
198 struct pci_dev *dev; 198 struct pci_dev *dev;
199 struct pci_bus *parent = p_slot->ctrl->pci_dev->subordinate; 199 struct pci_bus *parent = p_slot->ctrl->pci_dev->subordinate;
200 int num, fn; 200 int num, fn;
201 struct controller *ctrl = p_slot->ctrl;
201 202
202 dev = pci_get_slot(parent, PCI_DEVFN(p_slot->device, 0)); 203 dev = pci_get_slot(parent, PCI_DEVFN(p_slot->device, 0));
203 if (dev) { 204 if (dev) {
204 err("Device %s already exists at %x:%x, cannot hot-add\n", 205 ctrl_err(ctrl,
205 pci_name(dev), p_slot->bus, p_slot->device); 206 "Device %s already exists at %x:%x, cannot hot-add\n",
207 pci_name(dev), p_slot->bus, p_slot->device);
206 pci_dev_put(dev); 208 pci_dev_put(dev);
207 return -EINVAL; 209 return -EINVAL;
208 } 210 }
209 211
210 num = pci_scan_slot(parent, PCI_DEVFN(p_slot->device, 0)); 212 num = pci_scan_slot(parent, PCI_DEVFN(p_slot->device, 0));
211 if (num == 0) { 213 if (num == 0) {
212 err("No new device found\n"); 214 ctrl_err(ctrl, "No new device found\n");
213 return -ENODEV; 215 return -ENODEV;
214 } 216 }
215 217
@@ -218,8 +220,8 @@ int pciehp_configure_device(struct slot *p_slot)
218 if (!dev) 220 if (!dev)
219 continue; 221 continue;
220 if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) { 222 if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
221 err("Cannot hot-add display device %s\n", 223 ctrl_err(ctrl, "Cannot hot-add display device %s\n",
222 pci_name(dev)); 224 pci_name(dev));
223 pci_dev_put(dev); 225 pci_dev_put(dev);
224 continue; 226 continue;
225 } 227 }
@@ -244,9 +246,10 @@ int pciehp_unconfigure_device(struct slot *p_slot)
244 u8 presence = 0; 246 u8 presence = 0;
245 struct pci_bus *parent = p_slot->ctrl->pci_dev->subordinate; 247 struct pci_bus *parent = p_slot->ctrl->pci_dev->subordinate;
246 u16 command; 248 u16 command;
249 struct controller *ctrl = p_slot->ctrl;
247 250
248 dbg("%s: bus/dev = %x/%x\n", __func__, p_slot->bus, 251 ctrl_dbg(ctrl, "%s: bus/dev = %x/%x\n", __func__,
249 p_slot->device); 252 p_slot->bus, p_slot->device);
250 ret = p_slot->hpc_ops->get_adapter_status(p_slot, &presence); 253 ret = p_slot->hpc_ops->get_adapter_status(p_slot, &presence);
251 if (ret) 254 if (ret)
252 presence = 0; 255 presence = 0;
@@ -257,16 +260,17 @@ int pciehp_unconfigure_device(struct slot *p_slot)
257 if (!temp) 260 if (!temp)
258 continue; 261 continue;
259 if ((temp->class >> 16) == PCI_BASE_CLASS_DISPLAY) { 262 if ((temp->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
260 err("Cannot remove display device %s\n", 263 ctrl_err(ctrl, "Cannot remove display device %s\n",
261 pci_name(temp)); 264 pci_name(temp));
262 pci_dev_put(temp); 265 pci_dev_put(temp);
263 continue; 266 continue;
264 } 267 }
265 if (temp->hdr_type == PCI_HEADER_TYPE_BRIDGE && presence) { 268 if (temp->hdr_type == PCI_HEADER_TYPE_BRIDGE && presence) {
266 pci_read_config_byte(temp, PCI_BRIDGE_CONTROL, &bctl); 269 pci_read_config_byte(temp, PCI_BRIDGE_CONTROL, &bctl);
267 if (bctl & PCI_BRIDGE_CTL_VGA) { 270 if (bctl & PCI_BRIDGE_CTL_VGA) {
268 err("Cannot remove display device %s\n", 271 ctrl_err(ctrl,
269 pci_name(temp)); 272 "Cannot remove display device %s\n",
273 pci_name(temp));
270 pci_dev_put(temp); 274 pci_dev_put(temp);
271 continue; 275 continue;
272 } 276 }
diff --git a/drivers/pci/hotplug/rpaphp.h b/drivers/pci/hotplug/rpaphp.h
index 7d5921b1ee78..419919a87b0f 100644
--- a/drivers/pci/hotplug/rpaphp.h
+++ b/drivers/pci/hotplug/rpaphp.h
@@ -46,10 +46,10 @@
46#define PRESENT 1 /* Card in slot */ 46#define PRESENT 1 /* Card in slot */
47 47
48#define MY_NAME "rpaphp" 48#define MY_NAME "rpaphp"
49extern int debug; 49extern int rpaphp_debug;
50#define dbg(format, arg...) \ 50#define dbg(format, arg...) \
51 do { \ 51 do { \
52 if (debug) \ 52 if (rpaphp_debug) \
53 printk(KERN_DEBUG "%s: " format, \ 53 printk(KERN_DEBUG "%s: " format, \
54 MY_NAME , ## arg); \ 54 MY_NAME , ## arg); \
55 } while (0) 55 } while (0)
diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
index 1f84f402acdb..95d02a08fdc7 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -37,7 +37,7 @@
37 /* and pci_do_scan_bus */ 37 /* and pci_do_scan_bus */
38#include "rpaphp.h" 38#include "rpaphp.h"
39 39
40int debug; 40int rpaphp_debug;
41LIST_HEAD(rpaphp_slot_head); 41LIST_HEAD(rpaphp_slot_head);
42 42
43#define DRIVER_VERSION "0.1" 43#define DRIVER_VERSION "0.1"
@@ -50,7 +50,7 @@ MODULE_AUTHOR(DRIVER_AUTHOR);
50MODULE_DESCRIPTION(DRIVER_DESC); 50MODULE_DESCRIPTION(DRIVER_DESC);
51MODULE_LICENSE("GPL"); 51MODULE_LICENSE("GPL");
52 52
53module_param(debug, bool, 0644); 53module_param_named(debug, rpaphp_debug, bool, 0644);
54 54
55/** 55/**
56 * set_attention_status - set attention LED 56 * set_attention_status - set attention LED
diff --git a/drivers/pci/hotplug/rpaphp_pci.c b/drivers/pci/hotplug/rpaphp_pci.c
index 5acfd4f3d4cb..513e1e282391 100644
--- a/drivers/pci/hotplug/rpaphp_pci.c
+++ b/drivers/pci/hotplug/rpaphp_pci.c
@@ -123,7 +123,7 @@ int rpaphp_enable_slot(struct slot *slot)
123 slot->state = CONFIGURED; 123 slot->state = CONFIGURED;
124 } 124 }
125 125
126 if (debug) { 126 if (rpaphp_debug) {
127 struct pci_dev *dev; 127 struct pci_dev *dev;
128 dbg("%s: pci_devs of slot[%s]\n", __func__, slot->dn->full_name); 128 dbg("%s: pci_devs of slot[%s]\n", __func__, slot->dn->full_name);
129 list_for_each_entry (dev, &bus->devices, bus_list) 129 list_for_each_entry (dev, &bus->devices, bus_list)
diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c
index 279c940a0039..bf7d6ce9bbb3 100644
--- a/drivers/pci/htirq.c
+++ b/drivers/pci/htirq.c
@@ -126,7 +126,8 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update)
126 cfg->msg.address_hi = 0xffffffff; 126 cfg->msg.address_hi = 0xffffffff;
127 127
128 irq = create_irq(); 128 irq = create_irq();
129 if (irq < 0) { 129
130 if (irq <= 0) {
130 kfree(cfg); 131 kfree(cfg);
131 return -EBUSY; 132 return -EBUSY;
132 } 133 }
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index 738d4c89581c..2de5a3238c94 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -1,3 +1,4 @@
1#include <linux/interrupt.h>
1#include <linux/dmar.h> 2#include <linux/dmar.h>
2#include <linux/spinlock.h> 3#include <linux/spinlock.h>
3#include <linux/jiffies.h> 4#include <linux/jiffies.h>
@@ -11,41 +12,64 @@ static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
11static int ir_ioapic_num; 12static int ir_ioapic_num;
12int intr_remapping_enabled; 13int intr_remapping_enabled;
13 14
14static struct { 15struct irq_2_iommu {
15 struct intel_iommu *iommu; 16 struct intel_iommu *iommu;
16 u16 irte_index; 17 u16 irte_index;
17 u16 sub_handle; 18 u16 sub_handle;
18 u8 irte_mask; 19 u8 irte_mask;
19} irq_2_iommu[NR_IRQS]; 20};
21
22static struct irq_2_iommu irq_2_iommuX[NR_IRQS];
23
24static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
25{
26 return (irq < nr_irqs) ? irq_2_iommuX + irq : NULL;
27}
28
29static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
30{
31 return irq_2_iommu(irq);
32}
20 33
21static DEFINE_SPINLOCK(irq_2_ir_lock); 34static DEFINE_SPINLOCK(irq_2_ir_lock);
22 35
23int irq_remapped(int irq) 36static struct irq_2_iommu *valid_irq_2_iommu(unsigned int irq)
24{ 37{
25 if (irq > NR_IRQS) 38 struct irq_2_iommu *irq_iommu;
26 return 0; 39
40 irq_iommu = irq_2_iommu(irq);
41
42 if (!irq_iommu)
43 return NULL;
44
45 if (!irq_iommu->iommu)
46 return NULL;
27 47
28 if (!irq_2_iommu[irq].iommu) 48 return irq_iommu;
29 return 0; 49}
30 50
31 return 1; 51int irq_remapped(int irq)
52{
53 return valid_irq_2_iommu(irq) != NULL;
32} 54}
33 55
34int get_irte(int irq, struct irte *entry) 56int get_irte(int irq, struct irte *entry)
35{ 57{
36 int index; 58 int index;
59 struct irq_2_iommu *irq_iommu;
37 60
38 if (!entry || irq > NR_IRQS) 61 if (!entry)
39 return -1; 62 return -1;
40 63
41 spin_lock(&irq_2_ir_lock); 64 spin_lock(&irq_2_ir_lock);
42 if (!irq_2_iommu[irq].iommu) { 65 irq_iommu = valid_irq_2_iommu(irq);
66 if (!irq_iommu) {
43 spin_unlock(&irq_2_ir_lock); 67 spin_unlock(&irq_2_ir_lock);
44 return -1; 68 return -1;
45 } 69 }
46 70
47 index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle; 71 index = irq_iommu->irte_index + irq_iommu->sub_handle;
48 *entry = *(irq_2_iommu[irq].iommu->ir_table->base + index); 72 *entry = *(irq_iommu->iommu->ir_table->base + index);
49 73
50 spin_unlock(&irq_2_ir_lock); 74 spin_unlock(&irq_2_ir_lock);
51 return 0; 75 return 0;
@@ -54,6 +78,7 @@ int get_irte(int irq, struct irte *entry)
54int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) 78int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
55{ 79{
56 struct ir_table *table = iommu->ir_table; 80 struct ir_table *table = iommu->ir_table;
81 struct irq_2_iommu *irq_iommu;
57 u16 index, start_index; 82 u16 index, start_index;
58 unsigned int mask = 0; 83 unsigned int mask = 0;
59 int i; 84 int i;
@@ -61,6 +86,10 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
61 if (!count) 86 if (!count)
62 return -1; 87 return -1;
63 88
89 /* protect irq_2_iommu_alloc later */
90 if (irq >= nr_irqs)
91 return -1;
92
64 /* 93 /*
65 * start the IRTE search from index 0. 94 * start the IRTE search from index 0.
66 */ 95 */
@@ -100,10 +129,11 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
100 for (i = index; i < index + count; i++) 129 for (i = index; i < index + count; i++)
101 table->base[i].present = 1; 130 table->base[i].present = 1;
102 131
103 irq_2_iommu[irq].iommu = iommu; 132 irq_iommu = irq_2_iommu_alloc(irq);
104 irq_2_iommu[irq].irte_index = index; 133 irq_iommu->iommu = iommu;
105 irq_2_iommu[irq].sub_handle = 0; 134 irq_iommu->irte_index = index;
106 irq_2_iommu[irq].irte_mask = mask; 135 irq_iommu->sub_handle = 0;
136 irq_iommu->irte_mask = mask;
107 137
108 spin_unlock(&irq_2_ir_lock); 138 spin_unlock(&irq_2_ir_lock);
109 139
@@ -124,31 +154,33 @@ static void qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
124int map_irq_to_irte_handle(int irq, u16 *sub_handle) 154int map_irq_to_irte_handle(int irq, u16 *sub_handle)
125{ 155{
126 int index; 156 int index;
157 struct irq_2_iommu *irq_iommu;
127 158
128 spin_lock(&irq_2_ir_lock); 159 spin_lock(&irq_2_ir_lock);
129 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { 160 irq_iommu = valid_irq_2_iommu(irq);
161 if (!irq_iommu) {
130 spin_unlock(&irq_2_ir_lock); 162 spin_unlock(&irq_2_ir_lock);
131 return -1; 163 return -1;
132 } 164 }
133 165
134 *sub_handle = irq_2_iommu[irq].sub_handle; 166 *sub_handle = irq_iommu->sub_handle;
135 index = irq_2_iommu[irq].irte_index; 167 index = irq_iommu->irte_index;
136 spin_unlock(&irq_2_ir_lock); 168 spin_unlock(&irq_2_ir_lock);
137 return index; 169 return index;
138} 170}
139 171
140int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) 172int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
141{ 173{
174 struct irq_2_iommu *irq_iommu;
175
142 spin_lock(&irq_2_ir_lock); 176 spin_lock(&irq_2_ir_lock);
143 if (irq >= NR_IRQS || irq_2_iommu[irq].iommu) {
144 spin_unlock(&irq_2_ir_lock);
145 return -1;
146 }
147 177
148 irq_2_iommu[irq].iommu = iommu; 178 irq_iommu = irq_2_iommu_alloc(irq);
149 irq_2_iommu[irq].irte_index = index; 179
150 irq_2_iommu[irq].sub_handle = subhandle; 180 irq_iommu->iommu = iommu;
151 irq_2_iommu[irq].irte_mask = 0; 181 irq_iommu->irte_index = index;
182 irq_iommu->sub_handle = subhandle;
183 irq_iommu->irte_mask = 0;
152 184
153 spin_unlock(&irq_2_ir_lock); 185 spin_unlock(&irq_2_ir_lock);
154 186
@@ -157,16 +189,19 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
157 189
158int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index) 190int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index)
159{ 191{
192 struct irq_2_iommu *irq_iommu;
193
160 spin_lock(&irq_2_ir_lock); 194 spin_lock(&irq_2_ir_lock);
161 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { 195 irq_iommu = valid_irq_2_iommu(irq);
196 if (!irq_iommu) {
162 spin_unlock(&irq_2_ir_lock); 197 spin_unlock(&irq_2_ir_lock);
163 return -1; 198 return -1;
164 } 199 }
165 200
166 irq_2_iommu[irq].iommu = NULL; 201 irq_iommu->iommu = NULL;
167 irq_2_iommu[irq].irte_index = 0; 202 irq_iommu->irte_index = 0;
168 irq_2_iommu[irq].sub_handle = 0; 203 irq_iommu->sub_handle = 0;
169 irq_2_iommu[irq].irte_mask = 0; 204 irq_2_iommu(irq)->irte_mask = 0;
170 205
171 spin_unlock(&irq_2_ir_lock); 206 spin_unlock(&irq_2_ir_lock);
172 207
@@ -178,16 +213,18 @@ int modify_irte(int irq, struct irte *irte_modified)
178 int index; 213 int index;
179 struct irte *irte; 214 struct irte *irte;
180 struct intel_iommu *iommu; 215 struct intel_iommu *iommu;
216 struct irq_2_iommu *irq_iommu;
181 217
182 spin_lock(&irq_2_ir_lock); 218 spin_lock(&irq_2_ir_lock);
183 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { 219 irq_iommu = valid_irq_2_iommu(irq);
220 if (!irq_iommu) {
184 spin_unlock(&irq_2_ir_lock); 221 spin_unlock(&irq_2_ir_lock);
185 return -1; 222 return -1;
186 } 223 }
187 224
188 iommu = irq_2_iommu[irq].iommu; 225 iommu = irq_iommu->iommu;
189 226
190 index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle; 227 index = irq_iommu->irte_index + irq_iommu->sub_handle;
191 irte = &iommu->ir_table->base[index]; 228 irte = &iommu->ir_table->base[index];
192 229
193 set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1)); 230 set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1));
@@ -203,18 +240,20 @@ int flush_irte(int irq)
203{ 240{
204 int index; 241 int index;
205 struct intel_iommu *iommu; 242 struct intel_iommu *iommu;
243 struct irq_2_iommu *irq_iommu;
206 244
207 spin_lock(&irq_2_ir_lock); 245 spin_lock(&irq_2_ir_lock);
208 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { 246 irq_iommu = valid_irq_2_iommu(irq);
247 if (!irq_iommu) {
209 spin_unlock(&irq_2_ir_lock); 248 spin_unlock(&irq_2_ir_lock);
210 return -1; 249 return -1;
211 } 250 }
212 251
213 iommu = irq_2_iommu[irq].iommu; 252 iommu = irq_iommu->iommu;
214 253
215 index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle; 254 index = irq_iommu->irte_index + irq_iommu->sub_handle;
216 255
217 qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask); 256 qi_flush_iec(iommu, index, irq_iommu->irte_mask);
218 spin_unlock(&irq_2_ir_lock); 257 spin_unlock(&irq_2_ir_lock);
219 258
220 return 0; 259 return 0;
@@ -246,28 +285,30 @@ int free_irte(int irq)
246 int index, i; 285 int index, i;
247 struct irte *irte; 286 struct irte *irte;
248 struct intel_iommu *iommu; 287 struct intel_iommu *iommu;
288 struct irq_2_iommu *irq_iommu;
249 289
250 spin_lock(&irq_2_ir_lock); 290 spin_lock(&irq_2_ir_lock);
251 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { 291 irq_iommu = valid_irq_2_iommu(irq);
292 if (!irq_iommu) {
252 spin_unlock(&irq_2_ir_lock); 293 spin_unlock(&irq_2_ir_lock);
253 return -1; 294 return -1;
254 } 295 }
255 296
256 iommu = irq_2_iommu[irq].iommu; 297 iommu = irq_iommu->iommu;
257 298
258 index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle; 299 index = irq_iommu->irte_index + irq_iommu->sub_handle;
259 irte = &iommu->ir_table->base[index]; 300 irte = &iommu->ir_table->base[index];
260 301
261 if (!irq_2_iommu[irq].sub_handle) { 302 if (!irq_iommu->sub_handle) {
262 for (i = 0; i < (1 << irq_2_iommu[irq].irte_mask); i++) 303 for (i = 0; i < (1 << irq_iommu->irte_mask); i++)
263 set_64bit((unsigned long *)irte, 0); 304 set_64bit((unsigned long *)irte, 0);
264 qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask); 305 qi_flush_iec(iommu, index, irq_iommu->irte_mask);
265 } 306 }
266 307
267 irq_2_iommu[irq].iommu = NULL; 308 irq_iommu->iommu = NULL;
268 irq_2_iommu[irq].irte_index = 0; 309 irq_iommu->irte_index = 0;
269 irq_2_iommu[irq].sub_handle = 0; 310 irq_iommu->sub_handle = 0;
270 irq_2_iommu[irq].irte_mask = 0; 311 irq_iommu->irte_mask = 0;
271 312
272 spin_unlock(&irq_2_ir_lock); 313 spin_unlock(&irq_2_ir_lock);
273 314
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 4a10b5624f72..d2812013fd22 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -378,23 +378,21 @@ static int msi_capability_init(struct pci_dev *dev)
378 entry->msi_attrib.masked = 1; 378 entry->msi_attrib.masked = 1;
379 entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */ 379 entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */
380 entry->msi_attrib.pos = pos; 380 entry->msi_attrib.pos = pos;
381 if (is_mask_bit_support(control)) { 381 if (entry->msi_attrib.maskbit) {
382 entry->mask_base = (void __iomem *)(long)msi_mask_bits_reg(pos, 382 entry->mask_base = (void __iomem *)(long)msi_mask_bits_reg(pos,
383 is_64bit_address(control)); 383 entry->msi_attrib.is_64);
384 } 384 }
385 entry->dev = dev; 385 entry->dev = dev;
386 if (entry->msi_attrib.maskbit) { 386 if (entry->msi_attrib.maskbit) {
387 unsigned int maskbits, temp; 387 unsigned int maskbits, temp;
388 /* All MSIs are unmasked by default, Mask them all */ 388 /* All MSIs are unmasked by default, Mask them all */
389 pci_read_config_dword(dev, 389 pci_read_config_dword(dev,
390 msi_mask_bits_reg(pos, is_64bit_address(control)), 390 msi_mask_bits_reg(pos, entry->msi_attrib.is_64),
391 &maskbits); 391 &maskbits);
392 temp = (1 << multi_msi_capable(control)); 392 temp = (1 << multi_msi_capable(control));
393 temp = ((temp - 1) & ~temp); 393 temp = ((temp - 1) & ~temp);
394 maskbits |= temp; 394 maskbits |= temp;
395 pci_write_config_dword(dev, 395 pci_write_config_dword(dev, entry->msi_attrib.is_64, maskbits);
396 msi_mask_bits_reg(pos, is_64bit_address(control)),
397 maskbits);
398 entry->msi_attrib.maskbits_mask = temp; 396 entry->msi_attrib.maskbits_mask = temp;
399 } 397 }
400 list_add_tail(&entry->list, &dev->msi_list); 398 list_add_tail(&entry->list, &dev->msi_list);
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index a13f53486114..b4cdd690ae71 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -43,18 +43,32 @@ store_new_id(struct device_driver *driver, const char *buf, size_t count)
43{ 43{
44 struct pci_dynid *dynid; 44 struct pci_dynid *dynid;
45 struct pci_driver *pdrv = to_pci_driver(driver); 45 struct pci_driver *pdrv = to_pci_driver(driver);
46 const struct pci_device_id *ids = pdrv->id_table;
46 __u32 vendor, device, subvendor=PCI_ANY_ID, 47 __u32 vendor, device, subvendor=PCI_ANY_ID,
47 subdevice=PCI_ANY_ID, class=0, class_mask=0; 48 subdevice=PCI_ANY_ID, class=0, class_mask=0;
48 unsigned long driver_data=0; 49 unsigned long driver_data=0;
49 int fields=0; 50 int fields=0;
50 int retval = 0; 51 int retval;
51 52
52 fields = sscanf(buf, "%x %x %x %x %x %x %lux", 53 fields = sscanf(buf, "%x %x %x %x %x %x %lx",
53 &vendor, &device, &subvendor, &subdevice, 54 &vendor, &device, &subvendor, &subdevice,
54 &class, &class_mask, &driver_data); 55 &class, &class_mask, &driver_data);
55 if (fields < 2) 56 if (fields < 2)
56 return -EINVAL; 57 return -EINVAL;
57 58
59 /* Only accept driver_data values that match an existing id_table
60 entry */
61 retval = -EINVAL;
62 while (ids->vendor || ids->subvendor || ids->class_mask) {
63 if (driver_data == ids->driver_data) {
64 retval = 0;
65 break;
66 }
67 ids++;
68 }
69 if (retval) /* No match */
70 return retval;
71
58 dynid = kzalloc(sizeof(*dynid), GFP_KERNEL); 72 dynid = kzalloc(sizeof(*dynid), GFP_KERNEL);
59 if (!dynid) 73 if (!dynid)
60 return -ENOMEM; 74 return -ENOMEM;
@@ -65,8 +79,7 @@ store_new_id(struct device_driver *driver, const char *buf, size_t count)
65 dynid->id.subdevice = subdevice; 79 dynid->id.subdevice = subdevice;
66 dynid->id.class = class; 80 dynid->id.class = class;
67 dynid->id.class_mask = class_mask; 81 dynid->id.class_mask = class_mask;
68 dynid->id.driver_data = pdrv->dynids.use_driver_data ? 82 dynid->id.driver_data = driver_data;
69 driver_data : 0UL;
70 83
71 spin_lock(&pdrv->dynids.lock); 84 spin_lock(&pdrv->dynids.lock);
72 list_add_tail(&dynid->node, &pdrv->dynids.list); 85 list_add_tail(&dynid->node, &pdrv->dynids.list);
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 77baff022f71..110022d78689 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -423,7 +423,7 @@ pci_write_vpd(struct kobject *kobj, struct bin_attribute *bin_attr,
423 * Reads 1, 2, or 4 bytes from legacy I/O port space using an arch specific 423 * Reads 1, 2, or 4 bytes from legacy I/O port space using an arch specific
424 * callback routine (pci_legacy_read). 424 * callback routine (pci_legacy_read).
425 */ 425 */
426ssize_t 426static ssize_t
427pci_read_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr, 427pci_read_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
428 char *buf, loff_t off, size_t count) 428 char *buf, loff_t off, size_t count)
429{ 429{
@@ -448,7 +448,7 @@ pci_read_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
448 * Writes 1, 2, or 4 bytes from legacy I/O port space using an arch specific 448 * Writes 1, 2, or 4 bytes from legacy I/O port space using an arch specific
449 * callback routine (pci_legacy_write). 449 * callback routine (pci_legacy_write).
450 */ 450 */
451ssize_t 451static ssize_t
452pci_write_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr, 452pci_write_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
453 char *buf, loff_t off, size_t count) 453 char *buf, loff_t off, size_t count)
454{ 454{
@@ -468,11 +468,11 @@ pci_write_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
468 * @attr: struct bin_attribute for this file 468 * @attr: struct bin_attribute for this file
469 * @vma: struct vm_area_struct passed to mmap 469 * @vma: struct vm_area_struct passed to mmap
470 * 470 *
471 * Uses an arch specific callback, pci_mmap_legacy_page_range, to mmap 471 * Uses an arch specific callback, pci_mmap_legacy_mem_page_range, to mmap
472 * legacy memory space (first meg of bus space) into application virtual 472 * legacy memory space (first meg of bus space) into application virtual
473 * memory space. 473 * memory space.
474 */ 474 */
475int 475static int
476pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr, 476pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr,
477 struct vm_area_struct *vma) 477 struct vm_area_struct *vma)
478{ 478{
@@ -480,7 +480,90 @@ pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr,
480 struct device, 480 struct device,
481 kobj)); 481 kobj));
482 482
483 return pci_mmap_legacy_page_range(bus, vma); 483 return pci_mmap_legacy_page_range(bus, vma, pci_mmap_mem);
484}
485
486/**
487 * pci_mmap_legacy_io - map legacy PCI IO into user memory space
488 * @kobj: kobject corresponding to device to be mapped
489 * @attr: struct bin_attribute for this file
490 * @vma: struct vm_area_struct passed to mmap
491 *
492 * Uses an arch specific callback, pci_mmap_legacy_io_page_range, to mmap
493 * legacy IO space (first meg of bus space) into application virtual
494 * memory space. Returns -ENOSYS if the operation isn't supported
495 */
496static int
497pci_mmap_legacy_io(struct kobject *kobj, struct bin_attribute *attr,
498 struct vm_area_struct *vma)
499{
500 struct pci_bus *bus = to_pci_bus(container_of(kobj,
501 struct device,
502 kobj));
503
504 return pci_mmap_legacy_page_range(bus, vma, pci_mmap_io);
505}
506
507/**
508 * pci_create_legacy_files - create legacy I/O port and memory files
509 * @b: bus to create files under
510 *
511 * Some platforms allow access to legacy I/O port and ISA memory space on
512 * a per-bus basis. This routine creates the files and ties them into
513 * their associated read, write and mmap files from pci-sysfs.c
514 *
515 * On error unwind, but don't propogate the error to the caller
516 * as it is ok to set up the PCI bus without these files.
517 */
518void pci_create_legacy_files(struct pci_bus *b)
519{
520 int error;
521
522 b->legacy_io = kzalloc(sizeof(struct bin_attribute) * 2,
523 GFP_ATOMIC);
524 if (!b->legacy_io)
525 goto kzalloc_err;
526
527 b->legacy_io->attr.name = "legacy_io";
528 b->legacy_io->size = 0xffff;
529 b->legacy_io->attr.mode = S_IRUSR | S_IWUSR;
530 b->legacy_io->read = pci_read_legacy_io;
531 b->legacy_io->write = pci_write_legacy_io;
532 b->legacy_io->mmap = pci_mmap_legacy_io;
533 error = device_create_bin_file(&b->dev, b->legacy_io);
534 if (error)
535 goto legacy_io_err;
536
537 /* Allocated above after the legacy_io struct */
538 b->legacy_mem = b->legacy_io + 1;
539 b->legacy_mem->attr.name = "legacy_mem";
540 b->legacy_mem->size = 1024*1024;
541 b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR;
542 b->legacy_mem->mmap = pci_mmap_legacy_mem;
543 error = device_create_bin_file(&b->dev, b->legacy_mem);
544 if (error)
545 goto legacy_mem_err;
546
547 return;
548
549legacy_mem_err:
550 device_remove_bin_file(&b->dev, b->legacy_io);
551legacy_io_err:
552 kfree(b->legacy_io);
553 b->legacy_io = NULL;
554kzalloc_err:
555 printk(KERN_WARNING "pci: warning: could not create legacy I/O port "
556 "and ISA memory resources to sysfs\n");
557 return;
558}
559
560void pci_remove_legacy_files(struct pci_bus *b)
561{
562 if (b->legacy_io) {
563 device_remove_bin_file(&b->dev, b->legacy_io);
564 device_remove_bin_file(&b->dev, b->legacy_mem);
565 kfree(b->legacy_io); /* both are allocated here */
566 }
484} 567}
485#endif /* HAVE_PCI_LEGACY */ 568#endif /* HAVE_PCI_LEGACY */
486 569
@@ -715,7 +798,7 @@ static struct bin_attribute pci_config_attr = {
715 .name = "config", 798 .name = "config",
716 .mode = S_IRUGO | S_IWUSR, 799 .mode = S_IRUGO | S_IWUSR,
717 }, 800 },
718 .size = 256, 801 .size = PCI_CFG_SPACE_SIZE,
719 .read = pci_read_config, 802 .read = pci_read_config,
720 .write = pci_write_config, 803 .write = pci_write_config,
721}; 804};
@@ -725,7 +808,7 @@ static struct bin_attribute pcie_config_attr = {
725 .name = "config", 808 .name = "config",
726 .mode = S_IRUGO | S_IWUSR, 809 .mode = S_IRUGO | S_IWUSR,
727 }, 810 },
728 .size = 4096, 811 .size = PCI_CFG_SPACE_EXP_SIZE,
729 .read = pci_read_config, 812 .read = pci_read_config,
730 .write = pci_write_config, 813 .write = pci_write_config,
731}; 814};
@@ -735,86 +818,103 @@ int __attribute__ ((weak)) pcibios_add_platform_entries(struct pci_dev *dev)
735 return 0; 818 return 0;
736} 819}
737 820
821static int pci_create_capabilities_sysfs(struct pci_dev *dev)
822{
823 int retval;
824 struct bin_attribute *attr;
825
826 /* If the device has VPD, try to expose it in sysfs. */
827 if (dev->vpd) {
828 attr = kzalloc(sizeof(*attr), GFP_ATOMIC);
829 if (!attr)
830 return -ENOMEM;
831
832 attr->size = dev->vpd->len;
833 attr->attr.name = "vpd";
834 attr->attr.mode = S_IRUSR | S_IWUSR;
835 attr->read = pci_read_vpd;
836 attr->write = pci_write_vpd;
837 retval = sysfs_create_bin_file(&dev->dev.kobj, attr);
838 if (retval) {
839 kfree(dev->vpd->attr);
840 return retval;
841 }
842 dev->vpd->attr = attr;
843 }
844
845 /* Active State Power Management */
846 pcie_aspm_create_sysfs_dev_files(dev);
847
848 return 0;
849}
850
738int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev) 851int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev)
739{ 852{
740 struct bin_attribute *attr = NULL;
741 int retval; 853 int retval;
854 int rom_size = 0;
855 struct bin_attribute *attr;
742 856
743 if (!sysfs_initialized) 857 if (!sysfs_initialized)
744 return -EACCES; 858 return -EACCES;
745 859
746 if (pdev->cfg_size < 4096) 860 if (pdev->cfg_size < PCI_CFG_SPACE_EXP_SIZE)
747 retval = sysfs_create_bin_file(&pdev->dev.kobj, &pci_config_attr); 861 retval = sysfs_create_bin_file(&pdev->dev.kobj, &pci_config_attr);
748 else 862 else
749 retval = sysfs_create_bin_file(&pdev->dev.kobj, &pcie_config_attr); 863 retval = sysfs_create_bin_file(&pdev->dev.kobj, &pcie_config_attr);
750 if (retval) 864 if (retval)
751 goto err; 865 goto err;
752 866
753 /* If the device has VPD, try to expose it in sysfs. */
754 if (pdev->vpd) {
755 attr = kzalloc(sizeof(*attr), GFP_ATOMIC);
756 if (attr) {
757 pdev->vpd->attr = attr;
758 attr->size = pdev->vpd->len;
759 attr->attr.name = "vpd";
760 attr->attr.mode = S_IRUSR | S_IWUSR;
761 attr->read = pci_read_vpd;
762 attr->write = pci_write_vpd;
763 retval = sysfs_create_bin_file(&pdev->dev.kobj, attr);
764 if (retval)
765 goto err_vpd;
766 } else {
767 retval = -ENOMEM;
768 goto err_config_file;
769 }
770 }
771
772 retval = pci_create_resource_files(pdev); 867 retval = pci_create_resource_files(pdev);
773 if (retval) 868 if (retval)
774 goto err_vpd_file; 869 goto err_config_file;
870
871 if (pci_resource_len(pdev, PCI_ROM_RESOURCE))
872 rom_size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
873 else if (pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW)
874 rom_size = 0x20000;
775 875
776 /* If the device has a ROM, try to expose it in sysfs. */ 876 /* If the device has a ROM, try to expose it in sysfs. */
777 if (pci_resource_len(pdev, PCI_ROM_RESOURCE) || 877 if (rom_size) {
778 (pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW)) {
779 attr = kzalloc(sizeof(*attr), GFP_ATOMIC); 878 attr = kzalloc(sizeof(*attr), GFP_ATOMIC);
780 if (attr) { 879 if (!attr) {
781 pdev->rom_attr = attr;
782 attr->size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
783 attr->attr.name = "rom";
784 attr->attr.mode = S_IRUSR;
785 attr->read = pci_read_rom;
786 attr->write = pci_write_rom;
787 retval = sysfs_create_bin_file(&pdev->dev.kobj, attr);
788 if (retval)
789 goto err_rom;
790 } else {
791 retval = -ENOMEM; 880 retval = -ENOMEM;
792 goto err_resource_files; 881 goto err_resource_files;
793 } 882 }
883 attr->size = rom_size;
884 attr->attr.name = "rom";
885 attr->attr.mode = S_IRUSR;
886 attr->read = pci_read_rom;
887 attr->write = pci_write_rom;
888 retval = sysfs_create_bin_file(&pdev->dev.kobj, attr);
889 if (retval) {
890 kfree(attr);
891 goto err_resource_files;
892 }
893 pdev->rom_attr = attr;
794 } 894 }
895
795 /* add platform-specific attributes */ 896 /* add platform-specific attributes */
796 if (pcibios_add_platform_entries(pdev)) 897 retval = pcibios_add_platform_entries(pdev);
898 if (retval)
797 goto err_rom_file; 899 goto err_rom_file;
798 900
799 pcie_aspm_create_sysfs_dev_files(pdev); 901 /* add sysfs entries for various capabilities */
902 retval = pci_create_capabilities_sysfs(pdev);
903 if (retval)
904 goto err_rom_file;
800 905
801 return 0; 906 return 0;
802 907
803err_rom_file: 908err_rom_file:
804 if (pci_resource_len(pdev, PCI_ROM_RESOURCE)) 909 if (rom_size) {
805 sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr); 910 sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr);
806err_rom: 911 kfree(pdev->rom_attr);
807 kfree(pdev->rom_attr); 912 pdev->rom_attr = NULL;
913 }
808err_resource_files: 914err_resource_files:
809 pci_remove_resource_files(pdev); 915 pci_remove_resource_files(pdev);
810err_vpd_file:
811 if (pdev->vpd) {
812 sysfs_remove_bin_file(&pdev->dev.kobj, pdev->vpd->attr);
813err_vpd:
814 kfree(pdev->vpd->attr);
815 }
816err_config_file: 916err_config_file:
817 if (pdev->cfg_size < 4096) 917 if (pdev->cfg_size < PCI_CFG_SPACE_EXP_SIZE)
818 sysfs_remove_bin_file(&pdev->dev.kobj, &pci_config_attr); 918 sysfs_remove_bin_file(&pdev->dev.kobj, &pci_config_attr);
819 else 919 else
820 sysfs_remove_bin_file(&pdev->dev.kobj, &pcie_config_attr); 920 sysfs_remove_bin_file(&pdev->dev.kobj, &pcie_config_attr);
@@ -822,6 +922,16 @@ err:
822 return retval; 922 return retval;
823} 923}
824 924
925static void pci_remove_capabilities_sysfs(struct pci_dev *dev)
926{
927 if (dev->vpd && dev->vpd->attr) {
928 sysfs_remove_bin_file(&dev->dev.kobj, dev->vpd->attr);
929 kfree(dev->vpd->attr);
930 }
931
932 pcie_aspm_remove_sysfs_dev_files(dev);
933}
934
825/** 935/**
826 * pci_remove_sysfs_dev_files - cleanup PCI specific sysfs files 936 * pci_remove_sysfs_dev_files - cleanup PCI specific sysfs files
827 * @pdev: device whose entries we should free 937 * @pdev: device whose entries we should free
@@ -830,27 +940,28 @@ err:
830 */ 940 */
831void pci_remove_sysfs_dev_files(struct pci_dev *pdev) 941void pci_remove_sysfs_dev_files(struct pci_dev *pdev)
832{ 942{
943 int rom_size = 0;
944
833 if (!sysfs_initialized) 945 if (!sysfs_initialized)
834 return; 946 return;
835 947
836 pcie_aspm_remove_sysfs_dev_files(pdev); 948 pci_remove_capabilities_sysfs(pdev);
837 949
838 if (pdev->vpd) { 950 if (pdev->cfg_size < PCI_CFG_SPACE_EXP_SIZE)
839 sysfs_remove_bin_file(&pdev->dev.kobj, pdev->vpd->attr);
840 kfree(pdev->vpd->attr);
841 }
842 if (pdev->cfg_size < 4096)
843 sysfs_remove_bin_file(&pdev->dev.kobj, &pci_config_attr); 951 sysfs_remove_bin_file(&pdev->dev.kobj, &pci_config_attr);
844 else 952 else
845 sysfs_remove_bin_file(&pdev->dev.kobj, &pcie_config_attr); 953 sysfs_remove_bin_file(&pdev->dev.kobj, &pcie_config_attr);
846 954
847 pci_remove_resource_files(pdev); 955 pci_remove_resource_files(pdev);
848 956
849 if (pci_resource_len(pdev, PCI_ROM_RESOURCE)) { 957 if (pci_resource_len(pdev, PCI_ROM_RESOURCE))
850 if (pdev->rom_attr) { 958 rom_size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
851 sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr); 959 else if (pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW)
852 kfree(pdev->rom_attr); 960 rom_size = 0x20000;
853 } 961
962 if (rom_size && pdev->rom_attr) {
963 sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr);
964 kfree(pdev->rom_attr);
854 } 965 }
855} 966}
856 967
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index dbe9f39f4436..4db261e13e69 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -213,10 +213,13 @@ int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap)
213int pci_find_ext_capability(struct pci_dev *dev, int cap) 213int pci_find_ext_capability(struct pci_dev *dev, int cap)
214{ 214{
215 u32 header; 215 u32 header;
216 int ttl = 480; /* 3840 bytes, minimum 8 bytes per capability */ 216 int ttl;
217 int pos = 0x100; 217 int pos = PCI_CFG_SPACE_SIZE;
218 218
219 if (dev->cfg_size <= 256) 219 /* minimum 8 bytes per capability */
220 ttl = (PCI_CFG_SPACE_EXP_SIZE - PCI_CFG_SPACE_SIZE) / 8;
221
222 if (dev->cfg_size <= PCI_CFG_SPACE_SIZE)
220 return 0; 223 return 0;
221 224
222 if (pci_read_config_dword(dev, pos, &header) != PCIBIOS_SUCCESSFUL) 225 if (pci_read_config_dword(dev, pos, &header) != PCIBIOS_SUCCESSFUL)
@@ -234,7 +237,7 @@ int pci_find_ext_capability(struct pci_dev *dev, int cap)
234 return pos; 237 return pos;
235 238
236 pos = PCI_EXT_CAP_NEXT(header); 239 pos = PCI_EXT_CAP_NEXT(header);
237 if (pos < 0x100) 240 if (pos < PCI_CFG_SPACE_SIZE)
238 break; 241 break;
239 242
240 if (pci_read_config_dword(dev, pos, &header) != PCIBIOS_SUCCESSFUL) 243 if (pci_read_config_dword(dev, pos, &header) != PCIBIOS_SUCCESSFUL)
@@ -1127,6 +1130,27 @@ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable)
1127} 1130}
1128 1131
1129/** 1132/**
1133 * pci_wake_from_d3 - enable/disable device to wake up from D3_hot or D3_cold
1134 * @dev: PCI device to prepare
1135 * @enable: True to enable wake-up event generation; false to disable
1136 *
1137 * Many drivers want the device to wake up the system from D3_hot or D3_cold
1138 * and this function allows them to set that up cleanly - pci_enable_wake()
1139 * should not be called twice in a row to enable wake-up due to PCI PM vs ACPI
1140 * ordering constraints.
1141 *
1142 * This function only returns error code if the device is not capable of
1143 * generating PME# from both D3_hot and D3_cold, and the platform is unable to
1144 * enable wake-up power for it.
1145 */
1146int pci_wake_from_d3(struct pci_dev *dev, bool enable)
1147{
1148 return pci_pme_capable(dev, PCI_D3cold) ?
1149 pci_enable_wake(dev, PCI_D3cold, enable) :
1150 pci_enable_wake(dev, PCI_D3hot, enable);
1151}
1152
1153/**
1130 * pci_target_state - find an appropriate low power state for a given PCI dev 1154 * pci_target_state - find an appropriate low power state for a given PCI dev
1131 * @dev: PCI device 1155 * @dev: PCI device
1132 * 1156 *
@@ -1242,25 +1266,25 @@ void pci_pm_init(struct pci_dev *dev)
1242 dev->d1_support = false; 1266 dev->d1_support = false;
1243 dev->d2_support = false; 1267 dev->d2_support = false;
1244 if (!pci_no_d1d2(dev)) { 1268 if (!pci_no_d1d2(dev)) {
1245 if (pmc & PCI_PM_CAP_D1) { 1269 if (pmc & PCI_PM_CAP_D1)
1246 dev_printk(KERN_DEBUG, &dev->dev, "supports D1\n");
1247 dev->d1_support = true; 1270 dev->d1_support = true;
1248 } 1271 if (pmc & PCI_PM_CAP_D2)
1249 if (pmc & PCI_PM_CAP_D2) {
1250 dev_printk(KERN_DEBUG, &dev->dev, "supports D2\n");
1251 dev->d2_support = true; 1272 dev->d2_support = true;
1252 } 1273
1274 if (dev->d1_support || dev->d2_support)
1275 dev_printk(KERN_DEBUG, &dev->dev, "supports%s%s\n",
1276 dev->d1_support ? " D1" : "",
1277 dev->d2_support ? " D2" : "");
1253 } 1278 }
1254 1279
1255 pmc &= PCI_PM_CAP_PME_MASK; 1280 pmc &= PCI_PM_CAP_PME_MASK;
1256 if (pmc) { 1281 if (pmc) {
1257 dev_printk(KERN_INFO, &dev->dev, 1282 dev_info(&dev->dev, "PME# supported from%s%s%s%s%s\n",
1258 "PME# supported from%s%s%s%s%s\n", 1283 (pmc & PCI_PM_CAP_PME_D0) ? " D0" : "",
1259 (pmc & PCI_PM_CAP_PME_D0) ? " D0" : "", 1284 (pmc & PCI_PM_CAP_PME_D1) ? " D1" : "",
1260 (pmc & PCI_PM_CAP_PME_D1) ? " D1" : "", 1285 (pmc & PCI_PM_CAP_PME_D2) ? " D2" : "",
1261 (pmc & PCI_PM_CAP_PME_D2) ? " D2" : "", 1286 (pmc & PCI_PM_CAP_PME_D3) ? " D3hot" : "",
1262 (pmc & PCI_PM_CAP_PME_D3) ? " D3hot" : "", 1287 (pmc & PCI_PM_CAP_PME_D3cold) ? " D3cold" : "");
1263 (pmc & PCI_PM_CAP_PME_D3cold) ? " D3cold" : "");
1264 dev->pme_support = pmc >> PCI_PM_CAP_PME_SHIFT; 1288 dev->pme_support = pmc >> PCI_PM_CAP_PME_SHIFT;
1265 /* 1289 /*
1266 * Make device's PM flags reflect the wake-up capability, but 1290 * Make device's PM flags reflect the wake-up capability, but
@@ -1275,6 +1299,38 @@ void pci_pm_init(struct pci_dev *dev)
1275 } 1299 }
1276} 1300}
1277 1301
1302/**
1303 * pci_enable_ari - enable ARI forwarding if hardware support it
1304 * @dev: the PCI device
1305 */
1306void pci_enable_ari(struct pci_dev *dev)
1307{
1308 int pos;
1309 u32 cap;
1310 u16 ctrl;
1311
1312 if (!dev->is_pcie)
1313 return;
1314
1315 if (dev->pcie_type != PCI_EXP_TYPE_ROOT_PORT &&
1316 dev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM)
1317 return;
1318
1319 pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
1320 if (!pos)
1321 return;
1322
1323 pci_read_config_dword(dev, pos + PCI_EXP_DEVCAP2, &cap);
1324 if (!(cap & PCI_EXP_DEVCAP2_ARI))
1325 return;
1326
1327 pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl);
1328 ctrl |= PCI_EXP_DEVCTL2_ARI;
1329 pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
1330
1331 dev->ari_enabled = 1;
1332}
1333
1278int 1334int
1279pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge) 1335pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge)
1280{ 1336{
@@ -1942,6 +1998,7 @@ EXPORT_SYMBOL(pci_restore_state);
1942EXPORT_SYMBOL(pci_pme_capable); 1998EXPORT_SYMBOL(pci_pme_capable);
1943EXPORT_SYMBOL(pci_pme_active); 1999EXPORT_SYMBOL(pci_pme_active);
1944EXPORT_SYMBOL(pci_enable_wake); 2000EXPORT_SYMBOL(pci_enable_wake);
2001EXPORT_SYMBOL(pci_wake_from_d3);
1945EXPORT_SYMBOL(pci_target_state); 2002EXPORT_SYMBOL(pci_target_state);
1946EXPORT_SYMBOL(pci_prepare_to_sleep); 2003EXPORT_SYMBOL(pci_prepare_to_sleep);
1947EXPORT_SYMBOL(pci_back_from_sleep); 2004EXPORT_SYMBOL(pci_back_from_sleep);
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index d807cd786f20..b205ab866a1d 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -1,3 +1,9 @@
1#ifndef DRIVERS_PCI_H
2#define DRIVERS_PCI_H
3
4#define PCI_CFG_SPACE_SIZE 256
5#define PCI_CFG_SPACE_EXP_SIZE 4096
6
1/* Functions internal to the PCI core code */ 7/* Functions internal to the PCI core code */
2 8
3extern int pci_uevent(struct device *dev, struct kobj_uevent_env *env); 9extern int pci_uevent(struct device *dev, struct kobj_uevent_env *env);
@@ -76,7 +82,13 @@ static inline int pci_proc_detach_bus(struct pci_bus *bus) { return 0; }
76/* Functions for PCI Hotplug drivers to use */ 82/* Functions for PCI Hotplug drivers to use */
77extern unsigned int pci_do_scan_bus(struct pci_bus *bus); 83extern unsigned int pci_do_scan_bus(struct pci_bus *bus);
78 84
85#ifdef HAVE_PCI_LEGACY
86extern void pci_create_legacy_files(struct pci_bus *bus);
79extern void pci_remove_legacy_files(struct pci_bus *bus); 87extern void pci_remove_legacy_files(struct pci_bus *bus);
88#else
89static inline void pci_create_legacy_files(struct pci_bus *bus) { return; }
90static inline void pci_remove_legacy_files(struct pci_bus *bus) { return; }
91#endif
80 92
81/* Lock for read/write access to pci device and bus lists */ 93/* Lock for read/write access to pci device and bus lists */
82extern struct rw_semaphore pci_bus_sem; 94extern struct rw_semaphore pci_bus_sem;
@@ -109,6 +121,7 @@ static inline int pci_no_d1d2(struct pci_dev *dev)
109extern int pcie_mch_quirk; 121extern int pcie_mch_quirk;
110extern struct device_attribute pci_dev_attrs[]; 122extern struct device_attribute pci_dev_attrs[];
111extern struct device_attribute dev_attr_cpuaffinity; 123extern struct device_attribute dev_attr_cpuaffinity;
124extern struct device_attribute dev_attr_cpulistaffinity;
112 125
113/** 126/**
114 * pci_match_one_device - Tell if a PCI device structure has a matching 127 * pci_match_one_device - Tell if a PCI device structure has a matching
@@ -144,3 +157,16 @@ struct pci_slot_attribute {
144}; 157};
145#define to_pci_slot_attr(s) container_of(s, struct pci_slot_attribute, attr) 158#define to_pci_slot_attr(s) container_of(s, struct pci_slot_attribute, attr)
146 159
160extern void pci_enable_ari(struct pci_dev *dev);
161/**
162 * pci_ari_enabled - query ARI forwarding status
163 * @dev: the PCI device
164 *
165 * Returns 1 if ARI forwarding is enabled, or 0 if not enabled;
166 */
167static inline int pci_ari_enabled(struct pci_dev *dev)
168{
169 return dev->ari_enabled;
170}
171
172#endif /* DRIVERS_PCI_H */
diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
index 77036f46acfe..e390707661dd 100644
--- a/drivers/pci/pcie/aer/aerdrv.c
+++ b/drivers/pci/pcie/aer/aerdrv.c
@@ -105,7 +105,7 @@ static irqreturn_t aer_irq(int irq, void *context)
105 unsigned long flags; 105 unsigned long flags;
106 int pos; 106 int pos;
107 107
108 pos = pci_find_aer_capability(pdev->port); 108 pos = pci_find_ext_capability(pdev->port, PCI_EXT_CAP_ID_ERR);
109 /* 109 /*
110 * Must lock access to Root Error Status Reg, Root Error ID Reg, 110 * Must lock access to Root Error Status Reg, Root Error ID Reg,
111 * and Root error producer/consumer index 111 * and Root error producer/consumer index
@@ -252,7 +252,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
252 u32 status; 252 u32 status;
253 int pos; 253 int pos;
254 254
255 pos = pci_find_aer_capability(dev); 255 pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
256 256
257 /* Disable Root's interrupt in response to error messages */ 257 /* Disable Root's interrupt in response to error messages */
258 pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, 0); 258 pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, 0);
@@ -316,7 +316,7 @@ static void aer_error_resume(struct pci_dev *dev)
316 pci_write_config_word(dev, pos + PCI_EXP_DEVSTA, reg16); 316 pci_write_config_word(dev, pos + PCI_EXP_DEVSTA, reg16);
317 317
318 /* Clean AER Root Error Status */ 318 /* Clean AER Root Error Status */
319 pos = pci_find_aer_capability(dev); 319 pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
320 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); 320 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
321 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask); 321 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask);
322 if (dev->error_state == pci_channel_io_normal) 322 if (dev->error_state == pci_channel_io_normal)
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index ee5e7b5176d0..dfc63d01f20a 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -28,41 +28,15 @@
28static int forceload; 28static int forceload;
29module_param(forceload, bool, 0); 29module_param(forceload, bool, 0);
30 30
31#define PCI_CFG_SPACE_SIZE (0x100)
32int pci_find_aer_capability(struct pci_dev *dev)
33{
34 int pos;
35 u32 reg32 = 0;
36
37 /* Check if it's a pci-express device */
38 pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
39 if (!pos)
40 return 0;
41
42 /* Check if it supports pci-express AER */
43 pos = PCI_CFG_SPACE_SIZE;
44 while (pos) {
45 if (pci_read_config_dword(dev, pos, &reg32))
46 return 0;
47
48 /* some broken boards return ~0 */
49 if (reg32 == 0xffffffff)
50 return 0;
51
52 if (PCI_EXT_CAP_ID(reg32) == PCI_EXT_CAP_ID_ERR)
53 break;
54
55 pos = reg32 >> 20;
56 }
57
58 return pos;
59}
60
61int pci_enable_pcie_error_reporting(struct pci_dev *dev) 31int pci_enable_pcie_error_reporting(struct pci_dev *dev)
62{ 32{
63 u16 reg16 = 0; 33 u16 reg16 = 0;
64 int pos; 34 int pos;
65 35
36 pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
37 if (!pos)
38 return -EIO;
39
66 pos = pci_find_capability(dev, PCI_CAP_ID_EXP); 40 pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
67 if (!pos) 41 if (!pos)
68 return -EIO; 42 return -EIO;
@@ -102,7 +76,7 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
102 int pos; 76 int pos;
103 u32 status, mask; 77 u32 status, mask;
104 78
105 pos = pci_find_aer_capability(dev); 79 pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
106 if (!pos) 80 if (!pos)
107 return -EIO; 81 return -EIO;
108 82
@@ -123,7 +97,7 @@ int pci_cleanup_aer_correct_error_status(struct pci_dev *dev)
123 int pos; 97 int pos;
124 u32 status; 98 u32 status;
125 99
126 pos = pci_find_aer_capability(dev); 100 pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
127 if (!pos) 101 if (!pos)
128 return -EIO; 102 return -EIO;
129 103
@@ -502,7 +476,7 @@ static void handle_error_source(struct pcie_device * aerdev,
502 * Correctable error does not need software intevention. 476 * Correctable error does not need software intevention.
503 * No need to go through error recovery process. 477 * No need to go through error recovery process.
504 */ 478 */
505 pos = pci_find_aer_capability(dev); 479 pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
506 if (pos) 480 if (pos)
507 pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, 481 pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS,
508 info.status); 482 info.status);
@@ -542,7 +516,7 @@ void aer_enable_rootport(struct aer_rpc *rpc)
542 reg16 &= ~(SYSTEM_ERROR_INTR_ON_MESG_MASK); 516 reg16 &= ~(SYSTEM_ERROR_INTR_ON_MESG_MASK);
543 pci_write_config_word(pdev, pos + PCI_EXP_RTCTL, reg16); 517 pci_write_config_word(pdev, pos + PCI_EXP_RTCTL, reg16);
544 518
545 aer_pos = pci_find_aer_capability(pdev); 519 aer_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
546 /* Clear error status */ 520 /* Clear error status */
547 pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, &reg32); 521 pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, &reg32);
548 pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32); 522 pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32);
@@ -579,7 +553,7 @@ static void disable_root_aer(struct aer_rpc *rpc)
579 u32 reg32; 553 u32 reg32;
580 int pos; 554 int pos;
581 555
582 pos = pci_find_aer_capability(pdev); 556 pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
583 /* Disable Root's interrupt in response to error messages */ 557 /* Disable Root's interrupt in response to error messages */
584 pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, 0); 558 pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, 0);
585 559
@@ -618,7 +592,7 @@ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
618{ 592{
619 int pos; 593 int pos;
620 594
621 pos = pci_find_aer_capability(dev); 595 pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
622 596
623 /* The device might not support AER */ 597 /* The device might not support AER */
624 if (!pos) 598 if (!pos)
@@ -755,7 +729,6 @@ int aer_init(struct pcie_device *dev)
755 return AER_SUCCESS; 729 return AER_SUCCESS;
756} 730}
757 731
758EXPORT_SYMBOL_GPL(pci_find_aer_capability);
759EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting); 732EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting);
760EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting); 733EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting);
761EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status); 734EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status);
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 851f5b83cdbc..8f63f4c6b85f 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -528,9 +528,9 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev)
528 pci_read_config_dword(child_dev, child_pos + PCI_EXP_DEVCAP, 528 pci_read_config_dword(child_dev, child_pos + PCI_EXP_DEVCAP,
529 &reg32); 529 &reg32);
530 if (!(reg32 & PCI_EXP_DEVCAP_RBER) && !aspm_force) { 530 if (!(reg32 & PCI_EXP_DEVCAP_RBER) && !aspm_force) {
531 printk("Pre-1.1 PCIe device detected, " 531 dev_printk(KERN_INFO, &child_dev->dev, "disabling ASPM"
532 "disable ASPM for %s. It can be enabled forcedly" 532 " on pre-1.1 PCIe device. You can enable it"
533 " with 'pcie_aspm=force'\n", pci_name(pdev)); 533 " with 'pcie_aspm=force'\n");
534 return -EINVAL; 534 return -EINVAL;
535 } 535 }
536 } 536 }
diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h
index 3656e0349dd1..2529f3f2ea5a 100644
--- a/drivers/pci/pcie/portdrv.h
+++ b/drivers/pci/pcie/portdrv.h
@@ -25,7 +25,6 @@
25#define PCIE_CAPABILITIES_REG 0x2 25#define PCIE_CAPABILITIES_REG 0x2
26#define PCIE_SLOT_CAPABILITIES_REG 0x14 26#define PCIE_SLOT_CAPABILITIES_REG 0x14
27#define PCIE_PORT_DEVICE_MAXSERVICES 4 27#define PCIE_PORT_DEVICE_MAXSERVICES 4
28#define PCI_CFG_SPACE_SIZE 256
29 28
30#define get_descriptor_id(type, service) (((type - 4) << 4) | service) 29#define get_descriptor_id(type, service) (((type - 4) << 4) | service)
31 30
diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
index 890f0d2b370a..2e091e014829 100644
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c
@@ -195,24 +195,11 @@ static int get_port_device_capability(struct pci_dev *dev)
195 /* PME Capable - root port capability */ 195 /* PME Capable - root port capability */
196 if (((reg16 >> 4) & PORT_TYPE_MASK) == PCIE_RC_PORT) 196 if (((reg16 >> 4) & PORT_TYPE_MASK) == PCIE_RC_PORT)
197 services |= PCIE_PORT_SERVICE_PME; 197 services |= PCIE_PORT_SERVICE_PME;
198 198
199 pos = PCI_CFG_SPACE_SIZE; 199 if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR))
200 while (pos) { 200 services |= PCIE_PORT_SERVICE_AER;
201 pci_read_config_dword(dev, pos, &reg32); 201 if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_VC))
202 switch (reg32 & 0xffff) { 202 services |= PCIE_PORT_SERVICE_VC;
203 case PCI_EXT_CAP_ID_ERR:
204 services |= PCIE_PORT_SERVICE_AER;
205 pos = reg32 >> 20;
206 break;
207 case PCI_EXT_CAP_ID_VC:
208 services |= PCIE_PORT_SERVICE_VC;
209 pos = reg32 >> 20;
210 break;
211 default:
212 pos = 0;
213 break;
214 }
215 }
216 203
217 return services; 204 return services;
218} 205}
diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
index 367c9c20000d..584422da8d8b 100644
--- a/drivers/pci/pcie/portdrv_pci.c
+++ b/drivers/pci/pcie/portdrv_pci.c
@@ -91,7 +91,7 @@ static int __devinit pcie_portdrv_probe (struct pci_dev *dev,
91 91
92 pci_set_master(dev); 92 pci_set_master(dev);
93 if (!dev->irq && dev->pin) { 93 if (!dev->irq && dev->pin) {
94 dev_warn(&dev->dev, "device [%04x/%04x] has invalid IRQ; " 94 dev_warn(&dev->dev, "device [%04x:%04x] has invalid IRQ; "
95 "check vendor BIOS\n", dev->vendor, dev->device); 95 "check vendor BIOS\n", dev->vendor, dev->device);
96 } 96 }
97 if (pcie_port_device_register(dev)) { 97 if (pcie_port_device_register(dev)) {
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index d3db8b249729..aaaf0a1fed22 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -14,8 +14,6 @@
14 14
15#define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */ 15#define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */
16#define CARDBUS_RESERVE_BUSNR 3 16#define CARDBUS_RESERVE_BUSNR 3
17#define PCI_CFG_SPACE_SIZE 256
18#define PCI_CFG_SPACE_EXP_SIZE 4096
19 17
20/* Ugh. Need to stop exporting this to modules. */ 18/* Ugh. Need to stop exporting this to modules. */
21LIST_HEAD(pci_root_buses); 19LIST_HEAD(pci_root_buses);
@@ -44,72 +42,6 @@ int no_pci_devices(void)
44} 42}
45EXPORT_SYMBOL(no_pci_devices); 43EXPORT_SYMBOL(no_pci_devices);
46 44
47#ifdef HAVE_PCI_LEGACY
48/**
49 * pci_create_legacy_files - create legacy I/O port and memory files
50 * @b: bus to create files under
51 *
52 * Some platforms allow access to legacy I/O port and ISA memory space on
53 * a per-bus basis. This routine creates the files and ties them into
54 * their associated read, write and mmap files from pci-sysfs.c
55 *
56 * On error unwind, but don't propogate the error to the caller
57 * as it is ok to set up the PCI bus without these files.
58 */
59static void pci_create_legacy_files(struct pci_bus *b)
60{
61 int error;
62
63 b->legacy_io = kzalloc(sizeof(struct bin_attribute) * 2,
64 GFP_ATOMIC);
65 if (!b->legacy_io)
66 goto kzalloc_err;
67
68 b->legacy_io->attr.name = "legacy_io";
69 b->legacy_io->size = 0xffff;
70 b->legacy_io->attr.mode = S_IRUSR | S_IWUSR;
71 b->legacy_io->read = pci_read_legacy_io;
72 b->legacy_io->write = pci_write_legacy_io;
73 error = device_create_bin_file(&b->dev, b->legacy_io);
74 if (error)
75 goto legacy_io_err;
76
77 /* Allocated above after the legacy_io struct */
78 b->legacy_mem = b->legacy_io + 1;
79 b->legacy_mem->attr.name = "legacy_mem";
80 b->legacy_mem->size = 1024*1024;
81 b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR;
82 b->legacy_mem->mmap = pci_mmap_legacy_mem;
83 error = device_create_bin_file(&b->dev, b->legacy_mem);
84 if (error)
85 goto legacy_mem_err;
86
87 return;
88
89legacy_mem_err:
90 device_remove_bin_file(&b->dev, b->legacy_io);
91legacy_io_err:
92 kfree(b->legacy_io);
93 b->legacy_io = NULL;
94kzalloc_err:
95 printk(KERN_WARNING "pci: warning: could not create legacy I/O port "
96 "and ISA memory resources to sysfs\n");
97 return;
98}
99
100void pci_remove_legacy_files(struct pci_bus *b)
101{
102 if (b->legacy_io) {
103 device_remove_bin_file(&b->dev, b->legacy_io);
104 device_remove_bin_file(&b->dev, b->legacy_mem);
105 kfree(b->legacy_io); /* both are allocated here */
106 }
107}
108#else /* !HAVE_PCI_LEGACY */
109static inline void pci_create_legacy_files(struct pci_bus *bus) { return; }
110void pci_remove_legacy_files(struct pci_bus *bus) { return; }
111#endif /* HAVE_PCI_LEGACY */
112
113/* 45/*
114 * PCI Bus Class Devices 46 * PCI Bus Class Devices
115 */ 47 */
@@ -219,7 +151,7 @@ static inline enum pci_bar_type decode_bar(struct resource *res, u32 bar)
219 151
220 res->flags = bar & ~PCI_BASE_ADDRESS_MEM_MASK; 152 res->flags = bar & ~PCI_BASE_ADDRESS_MEM_MASK;
221 153
222 if (res->flags == PCI_BASE_ADDRESS_MEM_TYPE_64) 154 if (res->flags & PCI_BASE_ADDRESS_MEM_TYPE_64)
223 return pci_bar_mem64; 155 return pci_bar_mem64;
224 return pci_bar_mem32; 156 return pci_bar_mem32;
225} 157}
@@ -304,8 +236,8 @@ static int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
304 } else { 236 } else {
305 res->start = l64; 237 res->start = l64;
306 res->end = l64 + sz64; 238 res->end = l64 + sz64;
307 printk(KERN_DEBUG "PCI: %s reg %x 64bit mmio: %pR\n", 239 dev_printk(KERN_DEBUG, &dev->dev,
308 pci_name(dev), pos, res); 240 "reg %x 64bit mmio: %pR\n", pos, res);
309 } 241 }
310 } else { 242 } else {
311 sz = pci_size(l, sz, mask); 243 sz = pci_size(l, sz, mask);
@@ -315,10 +247,10 @@ static int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
315 247
316 res->start = l; 248 res->start = l;
317 res->end = l + sz; 249 res->end = l + sz;
318 printk(KERN_DEBUG "PCI: %s reg %x %s: %pR\n", 250
319 pci_name(dev), pos, 251 dev_printk(KERN_DEBUG, &dev->dev, "reg %x %s: %pR\n", pos,
320 (res->flags & IORESOURCE_IO) ? "io port":"32bit mmio", 252 (res->flags & IORESOURCE_IO) ? "io port" : "32bit mmio",
321 res); 253 res);
322 } 254 }
323 255
324 out: 256 out:
@@ -389,8 +321,7 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
389 res->start = base; 321 res->start = base;
390 if (!res->end) 322 if (!res->end)
391 res->end = limit + 0xfff; 323 res->end = limit + 0xfff;
392 printk(KERN_DEBUG "PCI: bridge %s io port: %pR\n", 324 dev_printk(KERN_DEBUG, &dev->dev, "bridge io port: %pR\n", res);
393 pci_name(dev), res);
394 } 325 }
395 326
396 res = child->resource[1]; 327 res = child->resource[1];
@@ -402,8 +333,8 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
402 res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM; 333 res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM;
403 res->start = base; 334 res->start = base;
404 res->end = limit + 0xfffff; 335 res->end = limit + 0xfffff;
405 printk(KERN_DEBUG "PCI: bridge %s 32bit mmio: %pR\n", 336 dev_printk(KERN_DEBUG, &dev->dev, "bridge 32bit mmio: %pR\n",
406 pci_name(dev), res); 337 res);
407 } 338 }
408 339
409 res = child->resource[2]; 340 res = child->resource[2];
@@ -439,9 +370,9 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
439 res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM | IORESOURCE_PREFETCH; 370 res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM | IORESOURCE_PREFETCH;
440 res->start = base; 371 res->start = base;
441 res->end = limit + 0xfffff; 372 res->end = limit + 0xfffff;
442 printk(KERN_DEBUG "PCI: bridge %s %sbit mmio pref: %pR\n", 373 dev_printk(KERN_DEBUG, &dev->dev, "bridge %sbit mmio pref: %pR\n",
443 pci_name(dev), 374 (res->flags & PCI_PREF_RANGE_TYPE_64) ? "64" : "32",
444 (res->flags & PCI_PREF_RANGE_TYPE_64) ? "64":"32", res); 375 res);
445 } 376 }
446} 377}
447 378
@@ -762,7 +693,7 @@ static int pci_setup_device(struct pci_dev * dev)
762 dev->class = class; 693 dev->class = class;
763 class >>= 8; 694 class >>= 8;
764 695
765 dev_dbg(&dev->dev, "found [%04x/%04x] class %06x header type %02x\n", 696 dev_dbg(&dev->dev, "found [%04x:%04x] class %06x header type %02x\n",
766 dev->vendor, dev->device, class, dev->hdr_type); 697 dev->vendor, dev->device, class, dev->hdr_type);
767 698
768 /* "Unknown power state" */ 699 /* "Unknown power state" */
@@ -844,6 +775,11 @@ static int pci_setup_device(struct pci_dev * dev)
844 return 0; 775 return 0;
845} 776}
846 777
778static void pci_release_capabilities(struct pci_dev *dev)
779{
780 pci_vpd_release(dev);
781}
782
847/** 783/**
848 * pci_release_dev - free a pci device structure when all users of it are finished. 784 * pci_release_dev - free a pci device structure when all users of it are finished.
849 * @dev: device that's been disconnected 785 * @dev: device that's been disconnected
@@ -856,7 +792,7 @@ static void pci_release_dev(struct device *dev)
856 struct pci_dev *pci_dev; 792 struct pci_dev *pci_dev;
857 793
858 pci_dev = to_pci_dev(dev); 794 pci_dev = to_pci_dev(dev);
859 pci_vpd_release(pci_dev); 795 pci_release_capabilities(pci_dev);
860 kfree(pci_dev); 796 kfree(pci_dev);
861} 797}
862 798
@@ -887,8 +823,9 @@ static void set_pcie_port_type(struct pci_dev *pdev)
887int pci_cfg_space_size_ext(struct pci_dev *dev) 823int pci_cfg_space_size_ext(struct pci_dev *dev)
888{ 824{
889 u32 status; 825 u32 status;
826 int pos = PCI_CFG_SPACE_SIZE;
890 827
891 if (pci_read_config_dword(dev, 256, &status) != PCIBIOS_SUCCESSFUL) 828 if (pci_read_config_dword(dev, pos, &status) != PCIBIOS_SUCCESSFUL)
892 goto fail; 829 goto fail;
893 if (status == 0xffffffff) 830 if (status == 0xffffffff)
894 goto fail; 831 goto fail;
@@ -936,8 +873,6 @@ struct pci_dev *alloc_pci_dev(void)
936 873
937 INIT_LIST_HEAD(&dev->bus_list); 874 INIT_LIST_HEAD(&dev->bus_list);
938 875
939 pci_msi_init_pci_dev(dev);
940
941 return dev; 876 return dev;
942} 877}
943EXPORT_SYMBOL(alloc_pci_dev); 878EXPORT_SYMBOL(alloc_pci_dev);
@@ -949,6 +884,7 @@ EXPORT_SYMBOL(alloc_pci_dev);
949static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn) 884static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
950{ 885{
951 struct pci_dev *dev; 886 struct pci_dev *dev;
887 struct pci_slot *slot;
952 u32 l; 888 u32 l;
953 u8 hdr_type; 889 u8 hdr_type;
954 int delay = 1; 890 int delay = 1;
@@ -997,6 +933,10 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
997 dev->error_state = pci_channel_io_normal; 933 dev->error_state = pci_channel_io_normal;
998 set_pcie_port_type(dev); 934 set_pcie_port_type(dev);
999 935
936 list_for_each_entry(slot, &bus->slots, list)
937 if (PCI_SLOT(devfn) == slot->number)
938 dev->slot = slot;
939
1000 /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer) 940 /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
1001 set this higher, assuming the system even supports it. */ 941 set this higher, assuming the system even supports it. */
1002 dev->dma_mask = 0xffffffff; 942 dev->dma_mask = 0xffffffff;
@@ -1005,9 +945,22 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
1005 return NULL; 945 return NULL;
1006 } 946 }
1007 947
948 return dev;
949}
950
951static void pci_init_capabilities(struct pci_dev *dev)
952{
953 /* MSI/MSI-X list */
954 pci_msi_init_pci_dev(dev);
955
956 /* Power Management */
957 pci_pm_init(dev);
958
959 /* Vital Product Data */
1008 pci_vpd_pci22_init(dev); 960 pci_vpd_pci22_init(dev);
1009 961
1010 return dev; 962 /* Alternative Routing-ID Forwarding */
963 pci_enable_ari(dev);
1011} 964}
1012 965
1013void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) 966void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
@@ -1026,8 +979,8 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
1026 /* Fix up broken headers */ 979 /* Fix up broken headers */
1027 pci_fixup_device(pci_fixup_header, dev); 980 pci_fixup_device(pci_fixup_header, dev);
1028 981
1029 /* Initialize power management of the device */ 982 /* Initialize various capabilities */
1030 pci_pm_init(dev); 983 pci_init_capabilities(dev);
1031 984
1032 /* 985 /*
1033 * Add the device to our list of discovered devices 986 * Add the device to our list of discovered devices
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 832175d9ca25..96cf8ecd04ce 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -24,6 +24,14 @@
24#include <linux/kallsyms.h> 24#include <linux/kallsyms.h>
25#include "pci.h" 25#include "pci.h"
26 26
27int isa_dma_bridge_buggy;
28EXPORT_SYMBOL(isa_dma_bridge_buggy);
29int pci_pci_problems;
30EXPORT_SYMBOL(pci_pci_problems);
31int pcie_mch_quirk;
32EXPORT_SYMBOL(pcie_mch_quirk);
33
34#ifdef CONFIG_PCI_QUIRKS
27/* The Mellanox Tavor device gives false positive parity errors 35/* The Mellanox Tavor device gives false positive parity errors
28 * Mark this device with a broken_parity_status, to allow 36 * Mark this device with a broken_parity_status, to allow
29 * PCI scanning code to "skip" this now blacklisted device. 37 * PCI scanning code to "skip" this now blacklisted device.
@@ -76,8 +84,6 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82441, quirk_p
76 84
77 This appears to be BIOS not version dependent. So presumably there is a 85 This appears to be BIOS not version dependent. So presumably there is a
78 chipset level fix */ 86 chipset level fix */
79int isa_dma_bridge_buggy;
80EXPORT_SYMBOL(isa_dma_bridge_buggy);
81 87
82static void __devinit quirk_isa_dma_hangs(struct pci_dev *dev) 88static void __devinit quirk_isa_dma_hangs(struct pci_dev *dev)
83{ 89{
@@ -98,9 +104,6 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_CBUS_1, quirk_isa_d
98DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_CBUS_2, quirk_isa_dma_hangs); 104DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_CBUS_2, quirk_isa_dma_hangs);
99DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_CBUS_3, quirk_isa_dma_hangs); 105DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_CBUS_3, quirk_isa_dma_hangs);
100 106
101int pci_pci_problems;
102EXPORT_SYMBOL(pci_pci_problems);
103
104/* 107/*
105 * Chipsets where PCI->PCI transfers vanish or hang 108 * Chipsets where PCI->PCI transfers vanish or hang
106 */ 109 */
@@ -1376,9 +1379,6 @@ static void __init quirk_alder_ioapic(struct pci_dev *pdev)
1376DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_EESSC, quirk_alder_ioapic); 1379DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_EESSC, quirk_alder_ioapic);
1377#endif 1380#endif
1378 1381
1379int pcie_mch_quirk;
1380EXPORT_SYMBOL(pcie_mch_quirk);
1381
1382static void __devinit quirk_pcie_mch(struct pci_dev *pdev) 1382static void __devinit quirk_pcie_mch(struct pci_dev *pdev)
1383{ 1383{
1384 pcie_mch_quirk = 1; 1384 pcie_mch_quirk = 1;
@@ -1569,84 +1569,6 @@ static void __devinit fixup_rev1_53c810(struct pci_dev* dev)
1569} 1569}
1570DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, fixup_rev1_53c810); 1570DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, fixup_rev1_53c810);
1571 1571
1572static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_fixup *end)
1573{
1574 while (f < end) {
1575 if ((f->vendor == dev->vendor || f->vendor == (u16) PCI_ANY_ID) &&
1576 (f->device == dev->device || f->device == (u16) PCI_ANY_ID)) {
1577#ifdef DEBUG
1578 dev_dbg(&dev->dev, "calling %pF\n", f->hook);
1579#endif
1580 f->hook(dev);
1581 }
1582 f++;
1583 }
1584}
1585
1586extern struct pci_fixup __start_pci_fixups_early[];
1587extern struct pci_fixup __end_pci_fixups_early[];
1588extern struct pci_fixup __start_pci_fixups_header[];
1589extern struct pci_fixup __end_pci_fixups_header[];
1590extern struct pci_fixup __start_pci_fixups_final[];
1591extern struct pci_fixup __end_pci_fixups_final[];
1592extern struct pci_fixup __start_pci_fixups_enable[];
1593extern struct pci_fixup __end_pci_fixups_enable[];
1594extern struct pci_fixup __start_pci_fixups_resume[];
1595extern struct pci_fixup __end_pci_fixups_resume[];
1596extern struct pci_fixup __start_pci_fixups_resume_early[];
1597extern struct pci_fixup __end_pci_fixups_resume_early[];
1598extern struct pci_fixup __start_pci_fixups_suspend[];
1599extern struct pci_fixup __end_pci_fixups_suspend[];
1600
1601
1602void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev)
1603{
1604 struct pci_fixup *start, *end;
1605
1606 switch(pass) {
1607 case pci_fixup_early:
1608 start = __start_pci_fixups_early;
1609 end = __end_pci_fixups_early;
1610 break;
1611
1612 case pci_fixup_header:
1613 start = __start_pci_fixups_header;
1614 end = __end_pci_fixups_header;
1615 break;
1616
1617 case pci_fixup_final:
1618 start = __start_pci_fixups_final;
1619 end = __end_pci_fixups_final;
1620 break;
1621
1622 case pci_fixup_enable:
1623 start = __start_pci_fixups_enable;
1624 end = __end_pci_fixups_enable;
1625 break;
1626
1627 case pci_fixup_resume:
1628 start = __start_pci_fixups_resume;
1629 end = __end_pci_fixups_resume;
1630 break;
1631
1632 case pci_fixup_resume_early:
1633 start = __start_pci_fixups_resume_early;
1634 end = __end_pci_fixups_resume_early;
1635 break;
1636
1637 case pci_fixup_suspend:
1638 start = __start_pci_fixups_suspend;
1639 end = __end_pci_fixups_suspend;
1640 break;
1641
1642 default:
1643 /* stupid compiler warning, you would think with an enum... */
1644 return;
1645 }
1646 pci_do_fixups(dev, start, end);
1647}
1648EXPORT_SYMBOL(pci_fixup_device);
1649
1650/* Enable 1k I/O space granularity on the Intel P64H2 */ 1572/* Enable 1k I/O space granularity on the Intel P64H2 */
1651static void __devinit quirk_p64h2_1k_io(struct pci_dev *dev) 1573static void __devinit quirk_p64h2_1k_io(struct pci_dev *dev)
1652{ 1574{
@@ -2020,3 +1942,82 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4375,
2020 quirk_msi_intx_disable_bug); 1942 quirk_msi_intx_disable_bug);
2021 1943
2022#endif /* CONFIG_PCI_MSI */ 1944#endif /* CONFIG_PCI_MSI */
1945
1946static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_fixup *end)
1947{
1948 while (f < end) {
1949 if ((f->vendor == dev->vendor || f->vendor == (u16) PCI_ANY_ID) &&
1950 (f->device == dev->device || f->device == (u16) PCI_ANY_ID)) {
1951 dev_dbg(&dev->dev, "calling %pF\n", f->hook);
1952 f->hook(dev);
1953 }
1954 f++;
1955 }
1956}
1957
1958extern struct pci_fixup __start_pci_fixups_early[];
1959extern struct pci_fixup __end_pci_fixups_early[];
1960extern struct pci_fixup __start_pci_fixups_header[];
1961extern struct pci_fixup __end_pci_fixups_header[];
1962extern struct pci_fixup __start_pci_fixups_final[];
1963extern struct pci_fixup __end_pci_fixups_final[];
1964extern struct pci_fixup __start_pci_fixups_enable[];
1965extern struct pci_fixup __end_pci_fixups_enable[];
1966extern struct pci_fixup __start_pci_fixups_resume[];
1967extern struct pci_fixup __end_pci_fixups_resume[];
1968extern struct pci_fixup __start_pci_fixups_resume_early[];
1969extern struct pci_fixup __end_pci_fixups_resume_early[];
1970extern struct pci_fixup __start_pci_fixups_suspend[];
1971extern struct pci_fixup __end_pci_fixups_suspend[];
1972
1973
1974void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev)
1975{
1976 struct pci_fixup *start, *end;
1977
1978 switch(pass) {
1979 case pci_fixup_early:
1980 start = __start_pci_fixups_early;
1981 end = __end_pci_fixups_early;
1982 break;
1983
1984 case pci_fixup_header:
1985 start = __start_pci_fixups_header;
1986 end = __end_pci_fixups_header;
1987 break;
1988
1989 case pci_fixup_final:
1990 start = __start_pci_fixups_final;
1991 end = __end_pci_fixups_final;
1992 break;
1993
1994 case pci_fixup_enable:
1995 start = __start_pci_fixups_enable;
1996 end = __end_pci_fixups_enable;
1997 break;
1998
1999 case pci_fixup_resume:
2000 start = __start_pci_fixups_resume;
2001 end = __end_pci_fixups_resume;
2002 break;
2003
2004 case pci_fixup_resume_early:
2005 start = __start_pci_fixups_resume_early;
2006 end = __end_pci_fixups_resume_early;
2007 break;
2008
2009 case pci_fixup_suspend:
2010 start = __start_pci_fixups_suspend;
2011 end = __end_pci_fixups_suspend;
2012 break;
2013
2014 default:
2015 /* stupid compiler warning, you would think with an enum... */
2016 return;
2017 }
2018 pci_do_fixups(dev, start, end);
2019}
2020#else
2021void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev) {}
2022#endif
2023EXPORT_SYMBOL(pci_fixup_device);
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index bdc2a44d68e1..042e08924421 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -73,6 +73,7 @@ void pci_remove_bus(struct pci_bus *pci_bus)
73 up_write(&pci_bus_sem); 73 up_write(&pci_bus_sem);
74 pci_remove_legacy_files(pci_bus); 74 pci_remove_legacy_files(pci_bus);
75 device_remove_file(&pci_bus->dev, &dev_attr_cpuaffinity); 75 device_remove_file(&pci_bus->dev, &dev_attr_cpuaffinity);
76 device_remove_file(&pci_bus->dev, &dev_attr_cpulistaffinity);
76 device_unregister(&pci_bus->dev); 77 device_unregister(&pci_bus->dev);
77} 78}
78EXPORT_SYMBOL(pci_remove_bus); 79EXPORT_SYMBOL(pci_remove_bus);
@@ -114,13 +115,9 @@ void pci_remove_behind_bridge(struct pci_dev *dev)
114{ 115{
115 struct list_head *l, *n; 116 struct list_head *l, *n;
116 117
117 if (dev->subordinate) { 118 if (dev->subordinate)
118 list_for_each_safe(l, n, &dev->subordinate->devices) { 119 list_for_each_safe(l, n, &dev->subordinate->devices)
119 struct pci_dev *dev = pci_dev_b(l); 120 pci_remove_bus_device(pci_dev_b(l));
120
121 pci_remove_bus_device(dev);
122 }
123 }
124} 121}
125 122
126static void pci_stop_bus_devices(struct pci_bus *bus) 123static void pci_stop_bus_devices(struct pci_bus *bus)
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 471a429d7a20..ea979f2bc6db 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -299,7 +299,7 @@ static void pbus_size_io(struct pci_bus *bus)
299 299
300 if (r->parent || !(r->flags & IORESOURCE_IO)) 300 if (r->parent || !(r->flags & IORESOURCE_IO))
301 continue; 301 continue;
302 r_size = r->end - r->start + 1; 302 r_size = resource_size(r);
303 303
304 if (r_size < 0x400) 304 if (r_size < 0x400)
305 /* Might be re-aligned for ISA */ 305 /* Might be re-aligned for ISA */
@@ -350,7 +350,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long
350 350
351 if (r->parent || (r->flags & mask) != type) 351 if (r->parent || (r->flags & mask) != type)
352 continue; 352 continue;
353 r_size = r->end - r->start + 1; 353 r_size = resource_size(r);
354 /* For bridges size != alignment */ 354 /* For bridges size != alignment */
355 align = resource_alignment(r); 355 align = resource_alignment(r);
356 order = __ffs(align) - 20; 356 order = __ffs(align) - 20;
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index d4b5c690eaa7..2dbd96cce2d8 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -129,7 +129,7 @@ int pci_assign_resource(struct pci_dev *dev, int resno)
129 resource_size_t size, min, align; 129 resource_size_t size, min, align;
130 int ret; 130 int ret;
131 131
132 size = res->end - res->start + 1; 132 size = resource_size(res);
133 min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM; 133 min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM;
134 134
135 align = resource_alignment(res); 135 align = resource_alignment(res);
diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
index 7e5b85cbd948..0c6db03698ea 100644
--- a/drivers/pci/slot.c
+++ b/drivers/pci/slot.c
@@ -49,11 +49,16 @@ static ssize_t address_read_file(struct pci_slot *slot, char *buf)
49 49
50static void pci_slot_release(struct kobject *kobj) 50static void pci_slot_release(struct kobject *kobj)
51{ 51{
52 struct pci_dev *dev;
52 struct pci_slot *slot = to_pci_slot(kobj); 53 struct pci_slot *slot = to_pci_slot(kobj);
53 54
54 pr_debug("%s: releasing pci_slot on %x:%d\n", __func__, 55 pr_debug("%s: releasing pci_slot on %x:%d\n", __func__,
55 slot->bus->number, slot->number); 56 slot->bus->number, slot->number);
56 57
58 list_for_each_entry(dev, &slot->bus->devices, bus_list)
59 if (PCI_SLOT(dev->devfn) == slot->number)
60 dev->slot = NULL;
61
57 list_del(&slot->list); 62 list_del(&slot->list);
58 63
59 kfree(slot); 64 kfree(slot);
@@ -108,6 +113,7 @@ static struct kobj_type pci_slot_ktype = {
108struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr, 113struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr,
109 const char *name) 114 const char *name)
110{ 115{
116 struct pci_dev *dev;
111 struct pci_slot *slot; 117 struct pci_slot *slot;
112 int err; 118 int err;
113 119
@@ -150,6 +156,10 @@ placeholder:
150 INIT_LIST_HEAD(&slot->list); 156 INIT_LIST_HEAD(&slot->list);
151 list_add(&slot->list, &parent->slots); 157 list_add(&slot->list, &parent->slots);
152 158
159 list_for_each_entry(dev, &parent->devices, bus_list)
160 if (PCI_SLOT(dev->devfn) == slot_nr)
161 dev->slot = slot;
162
153 /* Don't care if debug printk has a -1 for slot_nr */ 163 /* Don't care if debug printk has a -1 for slot_nr */
154 pr_debug("%s: created pci_slot on %04x:%02x:%02x\n", 164 pr_debug("%s: created pci_slot on %04x:%02x:%02x\n",
155 __func__, pci_domain_nr(parent), parent->number, slot_nr); 165 __func__, pci_domain_nr(parent), parent->number, slot_nr);
diff --git a/drivers/pcmcia/at91_cf.c b/drivers/pcmcia/at91_cf.c
index a0ffb8ebfe00..9e1140f085fd 100644
--- a/drivers/pcmcia/at91_cf.c
+++ b/drivers/pcmcia/at91_cf.c
@@ -273,7 +273,7 @@ static int __init at91_cf_probe(struct platform_device *pdev)
273 goto fail0d; 273 goto fail0d;
274 cf->socket.pci_irq = board->irq_pin; 274 cf->socket.pci_irq = board->irq_pin;
275 } else 275 } else
276 cf->socket.pci_irq = NR_IRQS + 1; 276 cf->socket.pci_irq = nr_irqs + 1;
277 277
278 /* pcmcia layer only remaps "real" memory not iospace */ 278 /* pcmcia layer only remaps "real" memory not iospace */
279 cf->socket.io_offset = (unsigned long) 279 cf->socket.io_offset = (unsigned long)
diff --git a/drivers/pcmcia/hd64465_ss.c b/drivers/pcmcia/hd64465_ss.c
index 117dc12ab438..9ef69cdb3183 100644
--- a/drivers/pcmcia/hd64465_ss.c
+++ b/drivers/pcmcia/hd64465_ss.c
@@ -233,15 +233,18 @@ static struct hw_interrupt_type hd64465_ss_irq_type = {
233 */ 233 */
234static void hs_map_irq(hs_socket_t *sp, unsigned int irq) 234static void hs_map_irq(hs_socket_t *sp, unsigned int irq)
235{ 235{
236 struct irq_desc *desc;
237
236 DPRINTK("hs_map_irq(sock=%d irq=%d)\n", sp->number, irq); 238 DPRINTK("hs_map_irq(sock=%d irq=%d)\n", sp->number, irq);
237 239
238 if (irq >= HS_NUM_MAPPED_IRQS) 240 if (irq >= HS_NUM_MAPPED_IRQS)
239 return; 241 return;
240 242
243 desc = irq_to_desc(irq);
241 hs_mapped_irq[irq].sock = sp; 244 hs_mapped_irq[irq].sock = sp;
242 /* insert ourselves as the irq controller */ 245 /* insert ourselves as the irq controller */
243 hs_mapped_irq[irq].old_handler = irq_desc[irq].chip; 246 hs_mapped_irq[irq].old_handler = desc->chip;
244 irq_desc[irq].chip = &hd64465_ss_irq_type; 247 desc->chip = &hd64465_ss_irq_type;
245} 248}
246 249
247 250
@@ -250,13 +253,16 @@ static void hs_map_irq(hs_socket_t *sp, unsigned int irq)
250 */ 253 */
251static void hs_unmap_irq(hs_socket_t *sp, unsigned int irq) 254static void hs_unmap_irq(hs_socket_t *sp, unsigned int irq)
252{ 255{
256 struct irq_desc *desc;
257
253 DPRINTK("hs_unmap_irq(sock=%d irq=%d)\n", sp->number, irq); 258 DPRINTK("hs_unmap_irq(sock=%d irq=%d)\n", sp->number, irq);
254 259
255 if (irq >= HS_NUM_MAPPED_IRQS) 260 if (irq >= HS_NUM_MAPPED_IRQS)
256 return; 261 return;
257 262
263 desc = irq_to_desc(irq);
258 /* restore the original irq controller */ 264 /* restore the original irq controller */
259 irq_desc[irq].chip = hs_mapped_irq[irq].old_handler; 265 desc->chip = hs_mapped_irq[irq].old_handler;
260} 266}
261 267
262/*============================================================*/ 268/*============================================================*/
diff --git a/drivers/pcmcia/vrc4171_card.c b/drivers/pcmcia/vrc4171_card.c
index eee2f1cb213c..b2c412419059 100644
--- a/drivers/pcmcia/vrc4171_card.c
+++ b/drivers/pcmcia/vrc4171_card.c
@@ -639,7 +639,7 @@ static int __devinit vrc4171_card_setup(char *options)
639 int irq; 639 int irq;
640 options += 4; 640 options += 4;
641 irq = simple_strtoul(options, &options, 0); 641 irq = simple_strtoul(options, &options, 0);
642 if (irq >= 0 && irq < NR_IRQS) 642 if (irq >= 0 && irq < nr_irqs)
643 vrc4171_irq = irq; 643 vrc4171_irq = irq;
644 644
645 if (*options != ',') 645 if (*options != ',')
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index f660ef3e5b29..814f49fde530 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -610,6 +610,14 @@ config RTC_DRV_RS5C313
610 help 610 help
611 If you say yes here you get support for the Ricoh RS5C313 RTC chips. 611 If you say yes here you get support for the Ricoh RS5C313 RTC chips.
612 612
613config RTC_DRV_PARISC
614 tristate "PA-RISC firmware RTC support"
615 depends on PARISC
616 help
617 Say Y or M here to enable RTC support on PA-RISC systems using
618 firmware calls. If you do not know what you are doing, you should
619 just say Y.
620
613config RTC_DRV_PPC 621config RTC_DRV_PPC
614 tristate "PowerPC machine dependent RTC support" 622 tristate "PowerPC machine dependent RTC support"
615 depends on PPC 623 depends on PPC
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index d05928b3ca94..d6a9ac7176ea 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -51,6 +51,7 @@ obj-$(CONFIG_RTC_DRV_PCF8563) += rtc-pcf8563.o
51obj-$(CONFIG_RTC_DRV_PCF8583) += rtc-pcf8583.o 51obj-$(CONFIG_RTC_DRV_PCF8583) += rtc-pcf8583.o
52obj-$(CONFIG_RTC_DRV_PL030) += rtc-pl030.o 52obj-$(CONFIG_RTC_DRV_PL030) += rtc-pl030.o
53obj-$(CONFIG_RTC_DRV_PL031) += rtc-pl031.o 53obj-$(CONFIG_RTC_DRV_PL031) += rtc-pl031.o
54obj-$(CONFIG_RTC_DRV_PARISC) += rtc-parisc.o
54obj-$(CONFIG_RTC_DRV_PPC) += rtc-ppc.o 55obj-$(CONFIG_RTC_DRV_PPC) += rtc-ppc.o
55obj-$(CONFIG_RTC_DRV_R9701) += rtc-r9701.o 56obj-$(CONFIG_RTC_DRV_R9701) += rtc-r9701.o
56obj-$(CONFIG_RTC_DRV_RS5C313) += rtc-rs5c313.o 57obj-$(CONFIG_RTC_DRV_RS5C313) += rtc-rs5c313.o
diff --git a/drivers/rtc/rtc-parisc.c b/drivers/rtc/rtc-parisc.c
new file mode 100644
index 000000000000..346d633655e7
--- /dev/null
+++ b/drivers/rtc/rtc-parisc.c
@@ -0,0 +1,111 @@
1/* rtc-parisc: RTC for HP PA-RISC firmware
2 *
3 * Copyright (C) 2008 Kyle McMartin <kyle@mcmartin.ca>
4 */
5
6#include <linux/kernel.h>
7#include <linux/module.h>
8#include <linux/time.h>
9#include <linux/platform_device.h>
10
11#include <asm/rtc.h>
12
13/* as simple as can be, and no simpler. */
14struct parisc_rtc {
15 struct rtc_device *rtc;
16 spinlock_t lock;
17};
18
19static int parisc_get_time(struct device *dev, struct rtc_time *tm)
20{
21 struct parisc_rtc *p = dev_get_drvdata(dev);
22 unsigned long flags, ret;
23
24 spin_lock_irqsave(&p->lock, flags);
25 ret = get_rtc_time(tm);
26 spin_unlock_irqrestore(&p->lock, flags);
27
28 if (ret & RTC_BATT_BAD)
29 return -EOPNOTSUPP;
30
31 return 0;
32}
33
34static int parisc_set_time(struct device *dev, struct rtc_time *tm)
35{
36 struct parisc_rtc *p = dev_get_drvdata(dev);
37 unsigned long flags, ret;
38
39 spin_lock_irqsave(&p->lock, flags);
40 ret = set_rtc_time(tm);
41 spin_unlock_irqrestore(&p->lock, flags);
42
43 if (ret < 0)
44 return -EOPNOTSUPP;
45
46 return 0;
47}
48
49static const struct rtc_class_ops parisc_rtc_ops = {
50 .read_time = parisc_get_time,
51 .set_time = parisc_set_time,
52};
53
54static int __devinit parisc_rtc_probe(struct platform_device *dev)
55{
56 struct parisc_rtc *p;
57
58 p = kzalloc(sizeof (*p), GFP_KERNEL);
59 if (!p)
60 return -ENOMEM;
61
62 spin_lock_init(&p->lock);
63
64 p->rtc = rtc_device_register("rtc-parisc", &dev->dev, &parisc_rtc_ops,
65 THIS_MODULE);
66 if (IS_ERR(p->rtc)) {
67 int err = PTR_ERR(p->rtc);
68 kfree(p);
69 return err;
70 }
71
72 platform_set_drvdata(dev, p);
73
74 return 0;
75}
76
77static int __devexit parisc_rtc_remove(struct platform_device *dev)
78{
79 struct parisc_rtc *p = platform_get_drvdata(dev);
80
81 rtc_device_unregister(p->rtc);
82 kfree(p);
83
84 return 0;
85}
86
87static struct platform_driver parisc_rtc_driver = {
88 .driver = {
89 .name = "rtc-parisc",
90 .owner = THIS_MODULE,
91 },
92 .probe = parisc_rtc_probe,
93 .remove = __devexit_p(parisc_rtc_remove),
94};
95
96static int __init parisc_rtc_init(void)
97{
98 return platform_driver_register(&parisc_rtc_driver);
99}
100
101static void __exit parisc_rtc_fini(void)
102{
103 platform_driver_unregister(&parisc_rtc_driver);
104}
105
106module_init(parisc_rtc_init);
107module_exit(parisc_rtc_fini);
108
109MODULE_AUTHOR("Kyle McMartin <kyle@mcmartin.ca>");
110MODULE_LICENSE("GPL");
111MODULE_DESCRIPTION("HP PA-RISC RTC driver");
diff --git a/drivers/rtc/rtc-vr41xx.c b/drivers/rtc/rtc-vr41xx.c
index 884b635f028b..834dcc6d785f 100644
--- a/drivers/rtc/rtc-vr41xx.c
+++ b/drivers/rtc/rtc-vr41xx.c
@@ -360,7 +360,7 @@ static int __devinit rtc_probe(struct platform_device *pdev)
360 spin_unlock_irq(&rtc_lock); 360 spin_unlock_irq(&rtc_lock);
361 361
362 aie_irq = platform_get_irq(pdev, 0); 362 aie_irq = platform_get_irq(pdev, 0);
363 if (aie_irq < 0 || aie_irq >= NR_IRQS) { 363 if (aie_irq < 0 || aie_irq >= nr_irqs) {
364 retval = -EBUSY; 364 retval = -EBUSY;
365 goto err_device_unregister; 365 goto err_device_unregister;
366 } 366 }
@@ -371,7 +371,7 @@ static int __devinit rtc_probe(struct platform_device *pdev)
371 goto err_device_unregister; 371 goto err_device_unregister;
372 372
373 pie_irq = platform_get_irq(pdev, 1); 373 pie_irq = platform_get_irq(pdev, 1);
374 if (pie_irq < 0 || pie_irq >= NR_IRQS) 374 if (pie_irq < 0 || pie_irq >= nr_irqs)
375 goto err_free_irq; 375 goto err_free_irq;
376 376
377 retval = request_irq(pie_irq, rtclong1_interrupt, IRQF_DISABLED, 377 retval = request_irq(pie_irq, rtclong1_interrupt, IRQF_DISABLED,
diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c
index b5a868d85eb4..1e5478abd90e 100644
--- a/drivers/scsi/aha152x.c
+++ b/drivers/scsi/aha152x.c
@@ -337,7 +337,7 @@ CMD_INC_RESID(struct scsi_cmnd *cmd, int inc)
337#else 337#else
338#define IRQ_MIN 9 338#define IRQ_MIN 9
339#if defined(__PPC) 339#if defined(__PPC)
340#define IRQ_MAX (NR_IRQS-1) 340#define IRQ_MAX (nr_irqs-1)
341#else 341#else
342#define IRQ_MAX 12 342#define IRQ_MAX 12
343#endif 343#endif
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index 740bad435995..afc96e844a25 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -343,6 +343,11 @@ static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *r
343} 343}
344 344
345#ifdef CONFIG_IDE_PROC_FS 345#ifdef CONFIG_IDE_PROC_FS
346static ide_proc_entry_t idescsi_proc[] = {
347 { "capacity", S_IFREG|S_IRUGO, proc_ide_read_capacity, NULL },
348 { NULL, 0, NULL, NULL }
349};
350
346#define ide_scsi_devset_get(name, field) \ 351#define ide_scsi_devset_get(name, field) \
347static int get_##name(ide_drive_t *drive) \ 352static int get_##name(ide_drive_t *drive) \
348{ \ 353{ \
@@ -378,6 +383,16 @@ static const struct ide_proc_devset idescsi_settings[] = {
378 IDE_PROC_DEVSET(transform, 0, 3), 383 IDE_PROC_DEVSET(transform, 0, 3),
379 { 0 }, 384 { 0 },
380}; 385};
386
387static ide_proc_entry_t *ide_scsi_proc_entries(ide_drive_t *drive)
388{
389 return idescsi_proc;
390}
391
392static const struct ide_proc_devset *ide_scsi_proc_devsets(ide_drive_t *drive)
393{
394 return idescsi_settings;
395}
381#endif 396#endif
382 397
383/* 398/*
@@ -419,13 +434,6 @@ static void ide_scsi_remove(ide_drive_t *drive)
419 434
420static int ide_scsi_probe(ide_drive_t *); 435static int ide_scsi_probe(ide_drive_t *);
421 436
422#ifdef CONFIG_IDE_PROC_FS
423static ide_proc_entry_t idescsi_proc[] = {
424 { "capacity", S_IFREG|S_IRUGO, proc_ide_read_capacity, NULL },
425 { NULL, 0, NULL, NULL }
426};
427#endif
428
429static ide_driver_t idescsi_driver = { 437static ide_driver_t idescsi_driver = {
430 .gen_driver = { 438 .gen_driver = {
431 .owner = THIS_MODULE, 439 .owner = THIS_MODULE,
@@ -439,8 +447,8 @@ static ide_driver_t idescsi_driver = {
439 .end_request = idescsi_end_request, 447 .end_request = idescsi_end_request,
440 .error = idescsi_atapi_error, 448 .error = idescsi_atapi_error,
441#ifdef CONFIG_IDE_PROC_FS 449#ifdef CONFIG_IDE_PROC_FS
442 .proc = idescsi_proc, 450 .proc_entries = ide_scsi_proc_entries,
443 .settings = idescsi_settings, 451 .proc_devsets = ide_scsi_proc_devsets,
444#endif 452#endif
445}; 453};
446 454
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index d30eb7ba018e..098739deb02e 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -7859,7 +7859,6 @@ static struct pci_driver ipr_driver = {
7859 .remove = ipr_remove, 7859 .remove = ipr_remove,
7860 .shutdown = ipr_shutdown, 7860 .shutdown = ipr_shutdown,
7861 .err_handler = &ipr_err_handler, 7861 .err_handler = &ipr_err_handler,
7862 .dynids.use_driver_data = 1
7863}; 7862};
7864 7863
7865/** 7864/**
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 83c819216771..f25f41a499e5 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2108,7 +2108,7 @@ struct scsi_qla_host;
2108 2108
2109struct qla_msix_entry { 2109struct qla_msix_entry {
2110 int have_irq; 2110 int have_irq;
2111 uint16_t msix_vector; 2111 uint32_t msix_vector;
2112 uint16_t msix_entry; 2112 uint16_t msix_entry;
2113}; 2113};
2114 2114
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 2aed4721c0d0..21dd182ad512 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1566,9 +1566,8 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
1566 goto probe_out; 1566 goto probe_out;
1567 } 1567 }
1568 1568
1569 if (pci_find_aer_capability(pdev)) 1569 /* This may fail but that's ok */
1570 if (pci_enable_pcie_error_reporting(pdev)) 1570 pci_enable_pcie_error_reporting(pdev);
1571 goto probe_out;
1572 1571
1573 host = scsi_host_alloc(sht, sizeof(scsi_qla_host_t)); 1572 host = scsi_host_alloc(sht, sizeof(scsi_qla_host_t));
1574 if (host == NULL) { 1573 if (host == NULL) {
diff --git a/drivers/serial/68328serial.c b/drivers/serial/68328serial.c
index 381b12ac20e0..d935b2d04f93 100644
--- a/drivers/serial/68328serial.c
+++ b/drivers/serial/68328serial.c
@@ -66,7 +66,6 @@
66#endif 66#endif
67 67
68static struct m68k_serial m68k_soft[NR_PORTS]; 68static struct m68k_serial m68k_soft[NR_PORTS];
69struct m68k_serial *IRQ_ports[NR_IRQS];
70 69
71static unsigned int uart_irqs[NR_PORTS] = UART_IRQ_DEFNS; 70static unsigned int uart_irqs[NR_PORTS] = UART_IRQ_DEFNS;
72 71
@@ -375,15 +374,11 @@ clear_and_return:
375 */ 374 */
376irqreturn_t rs_interrupt(int irq, void *dev_id) 375irqreturn_t rs_interrupt(int irq, void *dev_id)
377{ 376{
378 struct m68k_serial * info; 377 struct m68k_serial *info = dev_id;
379 m68328_uart *uart; 378 m68328_uart *uart;
380 unsigned short rx; 379 unsigned short rx;
381 unsigned short tx; 380 unsigned short tx;
382 381
383 info = IRQ_ports[irq];
384 if(!info)
385 return IRQ_NONE;
386
387 uart = &uart_addr[info->line]; 382 uart = &uart_addr[info->line];
388 rx = uart->urx.w; 383 rx = uart->urx.w;
389 384
@@ -1383,8 +1378,6 @@ rs68328_init(void)
1383 info->port, info->irq); 1378 info->port, info->irq);
1384 printk(" is a builtin MC68328 UART\n"); 1379 printk(" is a builtin MC68328 UART\n");
1385 1380
1386 IRQ_ports[info->irq] = info; /* waste of space */
1387
1388#ifdef CONFIG_M68VZ328 1381#ifdef CONFIG_M68VZ328
1389 if (i > 0 ) 1382 if (i > 0 )
1390 PJSEL &= 0xCF; /* PSW enable second port output */ 1383 PJSEL &= 0xCF; /* PSW enable second port output */
@@ -1393,7 +1386,7 @@ rs68328_init(void)
1393 if (request_irq(uart_irqs[i], 1386 if (request_irq(uart_irqs[i],
1394 rs_interrupt, 1387 rs_interrupt,
1395 IRQF_DISABLED, 1388 IRQF_DISABLED,
1396 "M68328_UART", NULL)) 1389 "M68328_UART", info))
1397 panic("Unable to attach 68328 serial interrupt\n"); 1390 panic("Unable to attach 68328 serial interrupt\n");
1398 } 1391 }
1399 local_irq_restore(flags); 1392 local_irq_restore(flags);
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index 1528de23a650..303272af386e 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -156,11 +156,15 @@ struct uart_8250_port {
156}; 156};
157 157
158struct irq_info { 158struct irq_info {
159 spinlock_t lock; 159 struct hlist_node node;
160 int irq;
161 spinlock_t lock; /* Protects list not the hash */
160 struct list_head *head; 162 struct list_head *head;
161}; 163};
162 164
163static struct irq_info irq_lists[NR_IRQS]; 165#define NR_IRQ_HASH 32 /* Can be adjusted later */
166static struct hlist_head irq_lists[NR_IRQ_HASH];
167static DEFINE_MUTEX(hash_mutex); /* Used to walk the hash */
164 168
165/* 169/*
166 * Here we define the default xmit fifo size used for each type of UART. 170 * Here we define the default xmit fifo size used for each type of UART.
@@ -1545,15 +1549,43 @@ static void serial_do_unlink(struct irq_info *i, struct uart_8250_port *up)
1545 BUG_ON(i->head != &up->list); 1549 BUG_ON(i->head != &up->list);
1546 i->head = NULL; 1550 i->head = NULL;
1547 } 1551 }
1548
1549 spin_unlock_irq(&i->lock); 1552 spin_unlock_irq(&i->lock);
1553 /* List empty so throw away the hash node */
1554 if (i->head == NULL) {
1555 hlist_del(&i->node);
1556 kfree(i);
1557 }
1550} 1558}
1551 1559
1552static int serial_link_irq_chain(struct uart_8250_port *up) 1560static int serial_link_irq_chain(struct uart_8250_port *up)
1553{ 1561{
1554 struct irq_info *i = irq_lists + up->port.irq; 1562 struct hlist_head *h;
1563 struct hlist_node *n;
1564 struct irq_info *i;
1555 int ret, irq_flags = up->port.flags & UPF_SHARE_IRQ ? IRQF_SHARED : 0; 1565 int ret, irq_flags = up->port.flags & UPF_SHARE_IRQ ? IRQF_SHARED : 0;
1556 1566
1567 mutex_lock(&hash_mutex);
1568
1569 h = &irq_lists[up->port.irq % NR_IRQ_HASH];
1570
1571 hlist_for_each(n, h) {
1572 i = hlist_entry(n, struct irq_info, node);
1573 if (i->irq == up->port.irq)
1574 break;
1575 }
1576
1577 if (n == NULL) {
1578 i = kzalloc(sizeof(struct irq_info), GFP_KERNEL);
1579 if (i == NULL) {
1580 mutex_unlock(&hash_mutex);
1581 return -ENOMEM;
1582 }
1583 spin_lock_init(&i->lock);
1584 i->irq = up->port.irq;
1585 hlist_add_head(&i->node, h);
1586 }
1587 mutex_unlock(&hash_mutex);
1588
1557 spin_lock_irq(&i->lock); 1589 spin_lock_irq(&i->lock);
1558 1590
1559 if (i->head) { 1591 if (i->head) {
@@ -1577,14 +1609,28 @@ static int serial_link_irq_chain(struct uart_8250_port *up)
1577 1609
1578static void serial_unlink_irq_chain(struct uart_8250_port *up) 1610static void serial_unlink_irq_chain(struct uart_8250_port *up)
1579{ 1611{
1580 struct irq_info *i = irq_lists + up->port.irq; 1612 struct irq_info *i;
1613 struct hlist_node *n;
1614 struct hlist_head *h;
1581 1615
1616 mutex_lock(&hash_mutex);
1617
1618 h = &irq_lists[up->port.irq % NR_IRQ_HASH];
1619
1620 hlist_for_each(n, h) {
1621 i = hlist_entry(n, struct irq_info, node);
1622 if (i->irq == up->port.irq)
1623 break;
1624 }
1625
1626 BUG_ON(n == NULL);
1582 BUG_ON(i->head == NULL); 1627 BUG_ON(i->head == NULL);
1583 1628
1584 if (list_empty(i->head)) 1629 if (list_empty(i->head))
1585 free_irq(up->port.irq, i); 1630 free_irq(up->port.irq, i);
1586 1631
1587 serial_do_unlink(i, up); 1632 serial_do_unlink(i, up);
1633 mutex_unlock(&hash_mutex);
1588} 1634}
1589 1635
1590/* Base timer interval for polling */ 1636/* Base timer interval for polling */
@@ -2447,7 +2493,7 @@ static void serial8250_config_port(struct uart_port *port, int flags)
2447static int 2493static int
2448serial8250_verify_port(struct uart_port *port, struct serial_struct *ser) 2494serial8250_verify_port(struct uart_port *port, struct serial_struct *ser)
2449{ 2495{
2450 if (ser->irq >= NR_IRQS || ser->irq < 0 || 2496 if (ser->irq >= nr_irqs || ser->irq < 0 ||
2451 ser->baud_base < 9600 || ser->type < PORT_UNKNOWN || 2497 ser->baud_base < 9600 || ser->type < PORT_UNKNOWN ||
2452 ser->type >= ARRAY_SIZE(uart_config) || ser->type == PORT_CIRRUS || 2498 ser->type >= ARRAY_SIZE(uart_config) || ser->type == PORT_CIRRUS ||
2453 ser->type == PORT_STARTECH) 2499 ser->type == PORT_STARTECH)
@@ -2967,7 +3013,7 @@ EXPORT_SYMBOL(serial8250_unregister_port);
2967 3013
2968static int __init serial8250_init(void) 3014static int __init serial8250_init(void)
2969{ 3015{
2970 int ret, i; 3016 int ret;
2971 3017
2972 if (nr_uarts > UART_NR) 3018 if (nr_uarts > UART_NR)
2973 nr_uarts = UART_NR; 3019 nr_uarts = UART_NR;
@@ -2976,9 +3022,6 @@ static int __init serial8250_init(void)
2976 "%d ports, IRQ sharing %sabled\n", nr_uarts, 3022 "%d ports, IRQ sharing %sabled\n", nr_uarts,
2977 share_irqs ? "en" : "dis"); 3023 share_irqs ? "en" : "dis");
2978 3024
2979 for (i = 0; i < NR_IRQS; i++)
2980 spin_lock_init(&irq_lists[i].lock);
2981
2982#ifdef CONFIG_SPARC 3025#ifdef CONFIG_SPARC
2983 ret = sunserial_register_minors(&serial8250_reg, UART_NR); 3026 ret = sunserial_register_minors(&serial8250_reg, UART_NR);
2984#else 3027#else
@@ -3006,15 +3049,15 @@ static int __init serial8250_init(void)
3006 goto out; 3049 goto out;
3007 3050
3008 platform_device_del(serial8250_isa_devs); 3051 platform_device_del(serial8250_isa_devs);
3009 put_dev: 3052put_dev:
3010 platform_device_put(serial8250_isa_devs); 3053 platform_device_put(serial8250_isa_devs);
3011 unreg_uart_drv: 3054unreg_uart_drv:
3012#ifdef CONFIG_SPARC 3055#ifdef CONFIG_SPARC
3013 sunserial_unregister_minors(&serial8250_reg, UART_NR); 3056 sunserial_unregister_minors(&serial8250_reg, UART_NR);
3014#else 3057#else
3015 uart_unregister_driver(&serial8250_reg); 3058 uart_unregister_driver(&serial8250_reg);
3016#endif 3059#endif
3017 out: 3060out:
3018 return ret; 3061 return ret;
3019} 3062}
3020 3063
diff --git a/drivers/serial/amba-pl010.c b/drivers/serial/amba-pl010.c
index 90b56c2c31e2..71562689116f 100644
--- a/drivers/serial/amba-pl010.c
+++ b/drivers/serial/amba-pl010.c
@@ -512,7 +512,7 @@ static int pl010_verify_port(struct uart_port *port, struct serial_struct *ser)
512 int ret = 0; 512 int ret = 0;
513 if (ser->type != PORT_UNKNOWN && ser->type != PORT_AMBA) 513 if (ser->type != PORT_UNKNOWN && ser->type != PORT_AMBA)
514 ret = -EINVAL; 514 ret = -EINVAL;
515 if (ser->irq < 0 || ser->irq >= NR_IRQS) 515 if (ser->irq < 0 || ser->irq >= nr_irqs)
516 ret = -EINVAL; 516 ret = -EINVAL;
517 if (ser->baud_base < 9600) 517 if (ser->baud_base < 9600)
518 ret = -EINVAL; 518 ret = -EINVAL;
diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c
index 9d08f27208a1..b7180046f8db 100644
--- a/drivers/serial/amba-pl011.c
+++ b/drivers/serial/amba-pl011.c
@@ -572,7 +572,7 @@ static int pl010_verify_port(struct uart_port *port, struct serial_struct *ser)
572 int ret = 0; 572 int ret = 0;
573 if (ser->type != PORT_UNKNOWN && ser->type != PORT_AMBA) 573 if (ser->type != PORT_UNKNOWN && ser->type != PORT_AMBA)
574 ret = -EINVAL; 574 ret = -EINVAL;
575 if (ser->irq < 0 || ser->irq >= NR_IRQS) 575 if (ser->irq < 0 || ser->irq >= nr_irqs)
576 ret = -EINVAL; 576 ret = -EINVAL;
577 if (ser->baud_base < 9600) 577 if (ser->baud_base < 9600)
578 ret = -EINVAL; 578 ret = -EINVAL;
diff --git a/drivers/serial/cpm_uart/cpm_uart_core.c b/drivers/serial/cpm_uart/cpm_uart_core.c
index a6c4d744495e..bde4b4b0b80f 100644
--- a/drivers/serial/cpm_uart/cpm_uart_core.c
+++ b/drivers/serial/cpm_uart/cpm_uart_core.c
@@ -623,7 +623,7 @@ static int cpm_uart_verify_port(struct uart_port *port,
623 623
624 if (ser->type != PORT_UNKNOWN && ser->type != PORT_CPM) 624 if (ser->type != PORT_UNKNOWN && ser->type != PORT_CPM)
625 ret = -EINVAL; 625 ret = -EINVAL;
626 if (ser->irq < 0 || ser->irq >= NR_IRQS) 626 if (ser->irq < 0 || ser->irq >= nr_irqs)
627 ret = -EINVAL; 627 ret = -EINVAL;
628 if (ser->baud_base < 9600) 628 if (ser->baud_base < 9600)
629 ret = -EINVAL; 629 ret = -EINVAL;
diff --git a/drivers/serial/m32r_sio.c b/drivers/serial/m32r_sio.c
index 23d030511019..611c97a15654 100644
--- a/drivers/serial/m32r_sio.c
+++ b/drivers/serial/m32r_sio.c
@@ -922,7 +922,7 @@ static void m32r_sio_config_port(struct uart_port *port, int flags)
922static int 922static int
923m32r_sio_verify_port(struct uart_port *port, struct serial_struct *ser) 923m32r_sio_verify_port(struct uart_port *port, struct serial_struct *ser)
924{ 924{
925 if (ser->irq >= NR_IRQS || ser->irq < 0 || 925 if (ser->irq >= nr_irqs || ser->irq < 0 ||
926 ser->baud_base < 9600 || ser->type < PORT_UNKNOWN || 926 ser->baud_base < 9600 || ser->type < PORT_UNKNOWN ||
927 ser->type >= ARRAY_SIZE(uart_config)) 927 ser->type >= ARRAY_SIZE(uart_config))
928 return -EINVAL; 928 return -EINVAL;
@@ -1162,7 +1162,7 @@ static int __init m32r_sio_init(void)
1162 1162
1163 printk(KERN_INFO "Serial: M32R SIO driver\n"); 1163 printk(KERN_INFO "Serial: M32R SIO driver\n");
1164 1164
1165 for (i = 0; i < NR_IRQS; i++) 1165 for (i = 0; i < nr_irqs; i++)
1166 spin_lock_init(&irq_lists[i].lock); 1166 spin_lock_init(&irq_lists[i].lock);
1167 1167
1168 ret = uart_register_driver(&m32r_sio_reg); 1168 ret = uart_register_driver(&m32r_sio_reg);
diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 6bdf3362e3b1..874786a11fe9 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -741,7 +741,7 @@ static int uart_set_info(struct uart_state *state,
741 if (port->ops->verify_port) 741 if (port->ops->verify_port)
742 retval = port->ops->verify_port(port, &new_serial); 742 retval = port->ops->verify_port(port, &new_serial);
743 743
744 if ((new_serial.irq >= NR_IRQS) || (new_serial.irq < 0) || 744 if ((new_serial.irq >= nr_irqs) || (new_serial.irq < 0) ||
745 (new_serial.baud_base < 9600)) 745 (new_serial.baud_base < 9600))
746 retval = -EINVAL; 746 retval = -EINVAL;
747 747
diff --git a/drivers/serial/serial_lh7a40x.c b/drivers/serial/serial_lh7a40x.c
index cb49a5ac022f..61dc8b3daa26 100644
--- a/drivers/serial/serial_lh7a40x.c
+++ b/drivers/serial/serial_lh7a40x.c
@@ -460,7 +460,7 @@ static int lh7a40xuart_verify_port (struct uart_port* port,
460 460
461 if (ser->type != PORT_UNKNOWN && ser->type != PORT_LH7A40X) 461 if (ser->type != PORT_UNKNOWN && ser->type != PORT_LH7A40X)
462 ret = -EINVAL; 462 ret = -EINVAL;
463 if (ser->irq < 0 || ser->irq >= NR_IRQS) 463 if (ser->irq < 0 || ser->irq >= nr_irqs)
464 ret = -EINVAL; 464 ret = -EINVAL;
465 if (ser->baud_base < 9600) /* *** FIXME: is this true? */ 465 if (ser->baud_base < 9600) /* *** FIXME: is this true? */
466 ret = -EINVAL; 466 ret = -EINVAL;
diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index 3b9d2d83b590..f0658d2c45b2 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -1149,7 +1149,7 @@ static int sci_verify_port(struct uart_port *port, struct serial_struct *ser)
1149{ 1149{
1150 struct sci_port *s = &sci_ports[port->line]; 1150 struct sci_port *s = &sci_ports[port->line];
1151 1151
1152 if (ser->irq != s->irqs[SCIx_TXI_IRQ] || ser->irq > NR_IRQS) 1152 if (ser->irq != s->irqs[SCIx_TXI_IRQ] || ser->irq > nr_irqs)
1153 return -EINVAL; 1153 return -EINVAL;
1154 if (ser->baud_base < 2400) 1154 if (ser->baud_base < 2400)
1155 /* No paper tape reader for Mitch.. */ 1155 /* No paper tape reader for Mitch.. */
diff --git a/drivers/serial/ucc_uart.c b/drivers/serial/ucc_uart.c
index 539c933b335f..315a9333ca3c 100644
--- a/drivers/serial/ucc_uart.c
+++ b/drivers/serial/ucc_uart.c
@@ -1066,7 +1066,7 @@ static int qe_uart_verify_port(struct uart_port *port,
1066 if (ser->type != PORT_UNKNOWN && ser->type != PORT_CPM) 1066 if (ser->type != PORT_UNKNOWN && ser->type != PORT_CPM)
1067 return -EINVAL; 1067 return -EINVAL;
1068 1068
1069 if (ser->irq < 0 || ser->irq >= NR_IRQS) 1069 if (ser->irq < 0 || ser->irq >= nr_irqs)
1070 return -EINVAL; 1070 return -EINVAL;
1071 1071
1072 if (ser->baud_base < 9600) 1072 if (ser->baud_base < 9600)
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 5dccf057a7dd..f9b4647255aa 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -47,6 +47,9 @@ static struct uio_class {
47 struct class *class; 47 struct class *class;
48} *uio_class; 48} *uio_class;
49 49
50/* Protect idr accesses */
51static DEFINE_MUTEX(minor_lock);
52
50/* 53/*
51 * attributes 54 * attributes
52 */ 55 */
@@ -239,7 +242,6 @@ static void uio_dev_del_attributes(struct uio_device *idev)
239 242
240static int uio_get_minor(struct uio_device *idev) 243static int uio_get_minor(struct uio_device *idev)
241{ 244{
242 static DEFINE_MUTEX(minor_lock);
243 int retval = -ENOMEM; 245 int retval = -ENOMEM;
244 int id; 246 int id;
245 247
@@ -261,7 +263,9 @@ exit:
261 263
262static void uio_free_minor(struct uio_device *idev) 264static void uio_free_minor(struct uio_device *idev)
263{ 265{
266 mutex_lock(&minor_lock);
264 idr_remove(&uio_idr, idev->minor); 267 idr_remove(&uio_idr, idev->minor);
268 mutex_unlock(&minor_lock);
265} 269}
266 270
267/** 271/**
@@ -305,8 +309,9 @@ static int uio_open(struct inode *inode, struct file *filep)
305 struct uio_listener *listener; 309 struct uio_listener *listener;
306 int ret = 0; 310 int ret = 0;
307 311
308 lock_kernel(); 312 mutex_lock(&minor_lock);
309 idev = idr_find(&uio_idr, iminor(inode)); 313 idev = idr_find(&uio_idr, iminor(inode));
314 mutex_unlock(&minor_lock);
310 if (!idev) { 315 if (!idev) {
311 ret = -ENODEV; 316 ret = -ENODEV;
312 goto out; 317 goto out;
@@ -332,18 +337,15 @@ static int uio_open(struct inode *inode, struct file *filep)
332 if (ret) 337 if (ret)
333 goto err_infoopen; 338 goto err_infoopen;
334 } 339 }
335 unlock_kernel();
336 return 0; 340 return 0;
337 341
338err_infoopen: 342err_infoopen:
339
340 kfree(listener); 343 kfree(listener);
341err_alloc_listener:
342 344
345err_alloc_listener:
343 module_put(idev->owner); 346 module_put(idev->owner);
344 347
345out: 348out:
346 unlock_kernel();
347 return ret; 349 return ret;
348} 350}
349 351
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index d343afacb0b0..15a803b206b8 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -1111,8 +1111,8 @@ clean0:
1111#ifdef DEBUG 1111#ifdef DEBUG
1112 debugfs_remove(ehci_debug_root); 1112 debugfs_remove(ehci_debug_root);
1113 ehci_debug_root = NULL; 1113 ehci_debug_root = NULL;
1114#endif
1115err_debug: 1114err_debug:
1115#endif
1116 clear_bit(USB_EHCI_LOADED, &usb_hcds_loaded); 1116 clear_bit(USB_EHCI_LOADED, &usb_hcds_loaded);
1117 return retval; 1117 return retval;
1118} 1118}
diff --git a/drivers/watchdog/ib700wdt.c b/drivers/watchdog/ib700wdt.c
index 05a28106e8eb..8782ec1f5aa0 100644
--- a/drivers/watchdog/ib700wdt.c
+++ b/drivers/watchdog/ib700wdt.c
@@ -154,7 +154,7 @@ static int ibwdt_set_heartbeat(int t)
154 return -EINVAL; 154 return -EINVAL;
155 155
156 for (i = 0x0F; i > -1; i--) 156 for (i = 0x0F; i > -1; i--)
157 if (wd_times[i] > t) 157 if (wd_times[i] >= t)
158 break; 158 break;
159 wd_margin = i; 159 wd_margin = i;
160 return 0; 160 return 0;
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index c3290bc186a0..9ce1ab6c268d 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -125,7 +125,7 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
125 125
126 BUG_ON(irq == -1); 126 BUG_ON(irq == -1);
127#ifdef CONFIG_SMP 127#ifdef CONFIG_SMP
128 irq_desc[irq].affinity = cpumask_of_cpu(cpu); 128 irq_to_desc(irq)->affinity = cpumask_of_cpu(cpu);
129#endif 129#endif
130 130
131 __clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]); 131 __clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]);
@@ -137,10 +137,12 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
137static void init_evtchn_cpu_bindings(void) 137static void init_evtchn_cpu_bindings(void)
138{ 138{
139#ifdef CONFIG_SMP 139#ifdef CONFIG_SMP
140 struct irq_desc *desc;
140 int i; 141 int i;
142
141 /* By default all event channels notify CPU#0. */ 143 /* By default all event channels notify CPU#0. */
142 for (i = 0; i < NR_IRQS; i++) 144 for_each_irq_desc(i, desc)
143 irq_desc[i].affinity = cpumask_of_cpu(0); 145 desc->affinity = cpumask_of_cpu(0);
144#endif 146#endif
145 147
146 memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); 148 memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
@@ -229,12 +231,12 @@ static int find_unbound_irq(void)
229 int irq; 231 int irq;
230 232
231 /* Only allocate from dynirq range */ 233 /* Only allocate from dynirq range */
232 for (irq = 0; irq < NR_IRQS; irq++) 234 for_each_irq_nr(irq)
233 if (irq_bindcount[irq] == 0) 235 if (irq_bindcount[irq] == 0)
234 break; 236 break;
235 237
236 if (irq == NR_IRQS) 238 if (irq == nr_irqs)
237 panic("No available IRQ to bind to: increase NR_IRQS!\n"); 239 panic("No available IRQ to bind to: increase nr_irqs!\n");
238 240
239 return irq; 241 return irq;
240} 242}
@@ -790,7 +792,7 @@ void xen_irq_resume(void)
790 mask_evtchn(evtchn); 792 mask_evtchn(evtchn);
791 793
792 /* No IRQ <-> event-channel mappings. */ 794 /* No IRQ <-> event-channel mappings. */
793 for (irq = 0; irq < NR_IRQS; irq++) 795 for_each_irq_nr(irq)
794 irq_info[irq].evtchn = 0; /* zap event-channel binding */ 796 irq_info[irq].evtchn = 0; /* zap event-channel binding */
795 797
796 for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) 798 for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
@@ -822,7 +824,7 @@ void __init xen_init_IRQ(void)
822 mask_evtchn(i); 824 mask_evtchn(i);
823 825
824 /* Dynamic IRQ space is currently unbound. Zero the refcnts. */ 826 /* Dynamic IRQ space is currently unbound. Zero the refcnts. */
825 for (i = 0; i < NR_IRQS; i++) 827 for_each_irq_nr(i)
826 irq_bindcount[i] = 0; 828 irq_bindcount[i] = 0;
827 829
828 irq_ctx_init(smp_processor_id()); 830 irq_ctx_init(smp_processor_id());
diff --git a/fs/Kconfig b/fs/Kconfig
index e282002b94d2..e46297f020c1 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -403,7 +403,7 @@ config AUTOFS4_FS
403 N here. 403 N here.
404 404
405config FUSE_FS 405config FUSE_FS
406 tristate "Filesystem in Userspace support" 406 tristate "FUSE (Filesystem in Userspace) support"
407 help 407 help
408 With FUSE it is possible to implement a fully functional filesystem 408 With FUSE it is possible to implement a fully functional filesystem
409 in a userspace program. 409 in a userspace program.
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index e2159063198a..8fcfa398d350 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1341,20 +1341,15 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
1341 prstatus->pr_pgrp = task_pgrp_vnr(p); 1341 prstatus->pr_pgrp = task_pgrp_vnr(p);
1342 prstatus->pr_sid = task_session_vnr(p); 1342 prstatus->pr_sid = task_session_vnr(p);
1343 if (thread_group_leader(p)) { 1343 if (thread_group_leader(p)) {
1344 struct task_cputime cputime;
1345
1344 /* 1346 /*
1345 * This is the record for the group leader. Add in the 1347 * This is the record for the group leader. It shows the
1346 * cumulative times of previous dead threads. This total 1348 * group-wide total, not its individual thread total.
1347 * won't include the time of each live thread whose state
1348 * is included in the core dump. The final total reported
1349 * to our parent process when it calls wait4 will include
1350 * those sums as well as the little bit more time it takes
1351 * this and each other thread to finish dying after the
1352 * core dump synchronization phase.
1353 */ 1349 */
1354 cputime_to_timeval(cputime_add(p->utime, p->signal->utime), 1350 thread_group_cputime(p, &cputime);
1355 &prstatus->pr_utime); 1351 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1356 cputime_to_timeval(cputime_add(p->stime, p->signal->stime), 1352 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1357 &prstatus->pr_stime);
1358 } else { 1353 } else {
1359 cputime_to_timeval(p->utime, &prstatus->pr_utime); 1354 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1360 cputime_to_timeval(p->stime, &prstatus->pr_stime); 1355 cputime_to_timeval(p->stime, &prstatus->pr_stime);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 0e8367c54624..5b5424cb3391 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1390,20 +1390,15 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
1390 prstatus->pr_pgrp = task_pgrp_vnr(p); 1390 prstatus->pr_pgrp = task_pgrp_vnr(p);
1391 prstatus->pr_sid = task_session_vnr(p); 1391 prstatus->pr_sid = task_session_vnr(p);
1392 if (thread_group_leader(p)) { 1392 if (thread_group_leader(p)) {
1393 struct task_cputime cputime;
1394
1393 /* 1395 /*
1394 * This is the record for the group leader. Add in the 1396 * This is the record for the group leader. It shows the
1395 * cumulative times of previous dead threads. This total 1397 * group-wide total, not its individual thread total.
1396 * won't include the time of each live thread whose state
1397 * is included in the core dump. The final total reported
1398 * to our parent process when it calls wait4 will include
1399 * those sums as well as the little bit more time it takes
1400 * this and each other thread to finish dying after the
1401 * core dump synchronization phase.
1402 */ 1398 */
1403 cputime_to_timeval(cputime_add(p->utime, p->signal->utime), 1399 thread_group_cputime(p, &cputime);
1404 &prstatus->pr_utime); 1400 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1405 cputime_to_timeval(cputime_add(p->stime, p->signal->stime), 1401 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1406 &prstatus->pr_stime);
1407 } else { 1402 } else {
1408 cputime_to_timeval(p->utime, &prstatus->pr_utime); 1403 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1409 cputime_to_timeval(p->stime, &prstatus->pr_stime); 1404 cputime_to_timeval(p->stime, &prstatus->pr_stime);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 2bada6bbc317..34930a964b82 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -101,6 +101,8 @@ void fuse_finish_open(struct inode *inode, struct file *file,
101 file->f_op = &fuse_direct_io_file_operations; 101 file->f_op = &fuse_direct_io_file_operations;
102 if (!(outarg->open_flags & FOPEN_KEEP_CACHE)) 102 if (!(outarg->open_flags & FOPEN_KEEP_CACHE))
103 invalidate_inode_pages2(inode->i_mapping); 103 invalidate_inode_pages2(inode->i_mapping);
104 if (outarg->open_flags & FOPEN_NONSEEKABLE)
105 nonseekable_open(inode, file);
104 ff->fh = outarg->fh; 106 ff->fh = outarg->fh;
105 file->private_data = fuse_file_get(ff); 107 file->private_data = fuse_file_get(ff);
106} 108}
@@ -1448,6 +1450,9 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
1448 mutex_lock(&inode->i_mutex); 1450 mutex_lock(&inode->i_mutex);
1449 switch (origin) { 1451 switch (origin) {
1450 case SEEK_END: 1452 case SEEK_END:
1453 retval = fuse_update_attributes(inode, NULL, file, NULL);
1454 if (retval)
1455 return retval;
1451 offset += i_size_read(inode); 1456 offset += i_size_read(inode);
1452 break; 1457 break;
1453 case SEEK_CUR: 1458 case SEEK_CUR:
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 3a876076bdd1..35accfdd747f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -6,6 +6,9 @@
6 See the file COPYING. 6 See the file COPYING.
7*/ 7*/
8 8
9#ifndef _FS_FUSE_I_H
10#define _FS_FUSE_I_H
11
9#include <linux/fuse.h> 12#include <linux/fuse.h>
10#include <linux/fs.h> 13#include <linux/fs.h>
11#include <linux/mount.h> 14#include <linux/mount.h>
@@ -655,3 +658,5 @@ void fuse_set_nowrite(struct inode *inode);
655void fuse_release_nowrite(struct inode *inode); 658void fuse_release_nowrite(struct inode *inode);
656 659
657u64 fuse_get_attr_version(struct fuse_conn *fc); 660u64 fuse_get_attr_version(struct fuse_conn *fc);
661
662#endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 6a84388cacff..54b1f0e1ef58 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -865,7 +865,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
865 if (is_bdev) { 865 if (is_bdev) {
866 fc->destroy_req = fuse_request_alloc(); 866 fc->destroy_req = fuse_request_alloc();
867 if (!fc->destroy_req) 867 if (!fc->destroy_req)
868 goto err_put_root; 868 goto err_free_init_req;
869 } 869 }
870 870
871 mutex_lock(&fuse_mutex); 871 mutex_lock(&fuse_mutex);
@@ -895,6 +895,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
895 895
896 err_unlock: 896 err_unlock:
897 mutex_unlock(&fuse_mutex); 897 mutex_unlock(&fuse_mutex);
898 err_free_init_req:
898 fuse_request_free(init_req); 899 fuse_request_free(init_req);
899 err_put_root: 900 err_put_root:
900 dput(root_dentry); 901 dput(root_dentry);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index f4bc0e789539..bb9f4b05703d 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -388,20 +388,20 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
388 388
389 /* add up live thread stats at the group level */ 389 /* add up live thread stats at the group level */
390 if (whole) { 390 if (whole) {
391 struct task_cputime cputime;
391 struct task_struct *t = task; 392 struct task_struct *t = task;
392 do { 393 do {
393 min_flt += t->min_flt; 394 min_flt += t->min_flt;
394 maj_flt += t->maj_flt; 395 maj_flt += t->maj_flt;
395 utime = cputime_add(utime, task_utime(t));
396 stime = cputime_add(stime, task_stime(t));
397 gtime = cputime_add(gtime, task_gtime(t)); 396 gtime = cputime_add(gtime, task_gtime(t));
398 t = next_thread(t); 397 t = next_thread(t);
399 } while (t != task); 398 } while (t != task);
400 399
401 min_flt += sig->min_flt; 400 min_flt += sig->min_flt;
402 maj_flt += sig->maj_flt; 401 maj_flt += sig->maj_flt;
403 utime = cputime_add(utime, sig->utime); 402 thread_group_cputime(task, &cputime);
404 stime = cputime_add(stime, sig->stime); 403 utime = cputime.utime;
404 stime = cputime.stime;
405 gtime = cputime_add(gtime, sig->gtime); 405 gtime = cputime_add(gtime, sig->gtime);
406 } 406 }
407 407
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 61b25f4eabe6..7ea52c79b2da 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -30,6 +30,7 @@
30#include <linux/mm.h> 30#include <linux/mm.h>
31#include <linux/mmzone.h> 31#include <linux/mmzone.h>
32#include <linux/pagemap.h> 32#include <linux/pagemap.h>
33#include <linux/irq.h>
33#include <linux/interrupt.h> 34#include <linux/interrupt.h>
34#include <linux/swap.h> 35#include <linux/swap.h>
35#include <linux/slab.h> 36#include <linux/slab.h>
@@ -521,17 +522,13 @@ static const struct file_operations proc_vmalloc_operations = {
521 522
522static int show_stat(struct seq_file *p, void *v) 523static int show_stat(struct seq_file *p, void *v)
523{ 524{
524 int i; 525 int i, j;
525 unsigned long jif; 526 unsigned long jif;
526 cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; 527 cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
527 cputime64_t guest; 528 cputime64_t guest;
528 u64 sum = 0; 529 u64 sum = 0;
529 struct timespec boottime; 530 struct timespec boottime;
530 unsigned int *per_irq_sum; 531 unsigned int per_irq_sum;
531
532 per_irq_sum = kzalloc(sizeof(unsigned int)*NR_IRQS, GFP_KERNEL);
533 if (!per_irq_sum)
534 return -ENOMEM;
535 532
536 user = nice = system = idle = iowait = 533 user = nice = system = idle = iowait =
537 irq = softirq = steal = cputime64_zero; 534 irq = softirq = steal = cputime64_zero;
@@ -540,8 +537,6 @@ static int show_stat(struct seq_file *p, void *v)
540 jif = boottime.tv_sec; 537 jif = boottime.tv_sec;
541 538
542 for_each_possible_cpu(i) { 539 for_each_possible_cpu(i) {
543 int j;
544
545 user = cputime64_add(user, kstat_cpu(i).cpustat.user); 540 user = cputime64_add(user, kstat_cpu(i).cpustat.user);
546 nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice); 541 nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice);
547 system = cputime64_add(system, kstat_cpu(i).cpustat.system); 542 system = cputime64_add(system, kstat_cpu(i).cpustat.system);
@@ -551,11 +546,10 @@ static int show_stat(struct seq_file *p, void *v)
551 softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); 546 softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
552 steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); 547 steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
553 guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); 548 guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
554 for (j = 0; j < NR_IRQS; j++) { 549
555 unsigned int temp = kstat_cpu(i).irqs[j]; 550 for_each_irq_nr(j)
556 sum += temp; 551 sum += kstat_irqs_cpu(j, i);
557 per_irq_sum[j] += temp; 552
558 }
559 sum += arch_irq_stat_cpu(i); 553 sum += arch_irq_stat_cpu(i);
560 } 554 }
561 sum += arch_irq_stat(); 555 sum += arch_irq_stat();
@@ -597,8 +591,15 @@ static int show_stat(struct seq_file *p, void *v)
597 } 591 }
598 seq_printf(p, "intr %llu", (unsigned long long)sum); 592 seq_printf(p, "intr %llu", (unsigned long long)sum);
599 593
600 for (i = 0; i < NR_IRQS; i++) 594 /* sum again ? it could be updated? */
601 seq_printf(p, " %u", per_irq_sum[i]); 595 for_each_irq_nr(j) {
596 per_irq_sum = 0;
597
598 for_each_possible_cpu(i)
599 per_irq_sum += kstat_irqs_cpu(j, i);
600
601 seq_printf(p, " %u", per_irq_sum);
602 }
602 603
603 seq_printf(p, 604 seq_printf(p,
604 "\nctxt %llu\n" 605 "\nctxt %llu\n"
@@ -612,7 +613,6 @@ static int show_stat(struct seq_file *p, void *v)
612 nr_running(), 613 nr_running(),
613 nr_iowait()); 614 nr_iowait());
614 615
615 kfree(per_irq_sum);
616 return 0; 616 return 0;
617} 617}
618 618
@@ -651,15 +651,14 @@ static const struct file_operations proc_stat_operations = {
651 */ 651 */
652static void *int_seq_start(struct seq_file *f, loff_t *pos) 652static void *int_seq_start(struct seq_file *f, loff_t *pos)
653{ 653{
654 return (*pos <= NR_IRQS) ? pos : NULL; 654 return (*pos <= nr_irqs) ? pos : NULL;
655} 655}
656 656
657
657static void *int_seq_next(struct seq_file *f, void *v, loff_t *pos) 658static void *int_seq_next(struct seq_file *f, void *v, loff_t *pos)
658{ 659{
659 (*pos)++; 660 (*pos)++;
660 if (*pos > NR_IRQS) 661 return (*pos <= nr_irqs) ? pos : NULL;
661 return NULL;
662 return pos;
663} 662}
664 663
665static void int_seq_stop(struct seq_file *f, void *v) 664static void int_seq_stop(struct seq_file *f, void *v)
@@ -667,7 +666,6 @@ static void int_seq_stop(struct seq_file *f, void *v)
667 /* Nothing to do */ 666 /* Nothing to do */
668} 667}
669 668
670
671static const struct seq_operations int_seq_ops = { 669static const struct seq_operations int_seq_ops = {
672 .start = int_seq_start, 670 .start = int_seq_start,
673 .next = int_seq_next, 671 .next = int_seq_next,
diff --git a/include/asm-frv/ide.h b/include/asm-frv/ide.h
index 7ebcc56a2229..361076611855 100644
--- a/include/asm-frv/ide.h
+++ b/include/asm-frv/ide.h
@@ -18,15 +18,7 @@
18#include <asm/io.h> 18#include <asm/io.h>
19#include <asm/irq.h> 19#include <asm/irq.h>
20 20
21/****************************************************************************/ 21#include <asm-generic/ide_iops.h>
22/*
23 * some bits needed for parts of the IDE subsystem to compile
24 */
25#define __ide_mm_insw(port, addr, n) insw((unsigned long) (port), addr, n)
26#define __ide_mm_insl(port, addr, n) insl((unsigned long) (port), addr, n)
27#define __ide_mm_outsw(port, addr, n) outsw((unsigned long) (port), addr, n)
28#define __ide_mm_outsl(port, addr, n) outsl((unsigned long) (port), addr, n)
29
30 22
31#endif /* __KERNEL__ */ 23#endif /* __KERNEL__ */
32#endif /* _ASM_IDE_H */ 24#endif /* _ASM_IDE_H */
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 0f6dabd4b517..12c07c1866b2 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -41,7 +41,7 @@ extern void warn_slowpath(const char *file, const int line,
41#define __WARN() warn_on_slowpath(__FILE__, __LINE__) 41#define __WARN() warn_on_slowpath(__FILE__, __LINE__)
42#define __WARN_printf(arg...) warn_slowpath(__FILE__, __LINE__, arg) 42#define __WARN_printf(arg...) warn_slowpath(__FILE__, __LINE__, arg)
43#else 43#else
44#define __WARN_printf(arg...) __WARN() 44#define __WARN_printf(arg...) do { printk(arg); __WARN(); } while (0)
45#endif 45#endif
46 46
47#ifndef WARN_ON 47#ifndef WARN_ON
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 74c5faf26c05..80744606bad1 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -37,6 +37,13 @@
37#define MEM_DISCARD(sec) *(.mem##sec) 37#define MEM_DISCARD(sec) *(.mem##sec)
38#endif 38#endif
39 39
40#ifdef CONFIG_FTRACE_MCOUNT_RECORD
41#define MCOUNT_REC() VMLINUX_SYMBOL(__start_mcount_loc) = .; \
42 *(__mcount_loc) \
43 VMLINUX_SYMBOL(__stop_mcount_loc) = .;
44#else
45#define MCOUNT_REC()
46#endif
40 47
41/* .data section */ 48/* .data section */
42#define DATA_DATA \ 49#define DATA_DATA \
@@ -52,7 +59,10 @@
52 . = ALIGN(8); \ 59 . = ALIGN(8); \
53 VMLINUX_SYMBOL(__start___markers) = .; \ 60 VMLINUX_SYMBOL(__start___markers) = .; \
54 *(__markers) \ 61 *(__markers) \
55 VMLINUX_SYMBOL(__stop___markers) = .; 62 VMLINUX_SYMBOL(__stop___markers) = .; \
63 VMLINUX_SYMBOL(__start___tracepoints) = .; \
64 *(__tracepoints) \
65 VMLINUX_SYMBOL(__stop___tracepoints) = .;
56 66
57#define RO_DATA(align) \ 67#define RO_DATA(align) \
58 . = ALIGN((align)); \ 68 . = ALIGN((align)); \
@@ -61,6 +71,7 @@
61 *(.rodata) *(.rodata.*) \ 71 *(.rodata) *(.rodata.*) \
62 *(__vermagic) /* Kernel version magic */ \ 72 *(__vermagic) /* Kernel version magic */ \
63 *(__markers_strings) /* Markers: strings */ \ 73 *(__markers_strings) /* Markers: strings */ \
74 *(__tracepoints_strings)/* Tracepoints: strings */ \
64 } \ 75 } \
65 \ 76 \
66 .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \ 77 .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \
@@ -188,6 +199,7 @@
188 /* __*init sections */ \ 199 /* __*init sections */ \
189 __init_rodata : AT(ADDR(__init_rodata) - LOAD_OFFSET) { \ 200 __init_rodata : AT(ADDR(__init_rodata) - LOAD_OFFSET) { \
190 *(.ref.rodata) \ 201 *(.ref.rodata) \
202 MCOUNT_REC() \
191 DEV_KEEP(init.rodata) \ 203 DEV_KEEP(init.rodata) \
192 DEV_KEEP(exit.rodata) \ 204 DEV_KEEP(exit.rodata) \
193 CPU_KEEP(init.rodata) \ 205 CPU_KEEP(init.rodata) \
diff --git a/include/asm-m68k/ide.h b/include/asm-m68k/ide.h
index 1daf6cbdd9f0..b996a3c8cff5 100644
--- a/include/asm-m68k/ide.h
+++ b/include/asm-m68k/ide.h
@@ -92,15 +92,6 @@
92#define outsw_swapw(port, addr, n) raw_outsw_swapw((u16 *)port, addr, n) 92#define outsw_swapw(port, addr, n) raw_outsw_swapw((u16 *)port, addr, n)
93#endif 93#endif
94 94
95
96/* Q40 and Atari have byteswapped IDE busses and since many interesting
97 * values in the identification string are text, chars and words they
98 * happened to be almost correct without swapping.. However *_capacity
99 * is needed for drives over 8 GB. RZ */
100#if defined(CONFIG_Q40) || defined(CONFIG_ATARI)
101#define M68K_IDE_SWAPW (MACH_IS_Q40 || MACH_IS_ATARI)
102#endif
103
104#ifdef CONFIG_BLK_DEV_FALCON_IDE 95#ifdef CONFIG_BLK_DEV_FALCON_IDE
105#define IDE_ARCH_LOCK 96#define IDE_ARCH_LOCK
106 97
diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index d76a0839abe9..ef1d72dbdfe0 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -40,8 +40,6 @@ extern void generic_apic_probe(void);
40extern unsigned int apic_verbosity; 40extern unsigned int apic_verbosity;
41extern int local_apic_timer_c2_ok; 41extern int local_apic_timer_c2_ok;
42 42
43extern int ioapic_force;
44
45extern int disable_apic; 43extern int disable_apic;
46/* 44/*
47 * Basic functions accessing APICs. 45 * Basic functions accessing APICs.
@@ -100,6 +98,20 @@ extern void check_x2apic(void);
100extern void enable_x2apic(void); 98extern void enable_x2apic(void);
101extern void enable_IR_x2apic(void); 99extern void enable_IR_x2apic(void);
102extern void x2apic_icr_write(u32 low, u32 id); 100extern void x2apic_icr_write(u32 low, u32 id);
101static inline int x2apic_enabled(void)
102{
103 int msr, msr2;
104
105 if (!cpu_has_x2apic)
106 return 0;
107
108 rdmsr(MSR_IA32_APICBASE, msr, msr2);
109 if (msr & X2APIC_ENABLE)
110 return 1;
111 return 0;
112}
113#else
114#define x2apic_enabled() 0
103#endif 115#endif
104 116
105struct apic_ops { 117struct apic_ops {
diff --git a/include/asm-x86/bigsmp/apic.h b/include/asm-x86/bigsmp/apic.h
index 0a9cd7c5ca0c..1d9543b9d358 100644
--- a/include/asm-x86/bigsmp/apic.h
+++ b/include/asm-x86/bigsmp/apic.h
@@ -9,22 +9,17 @@ static inline int apic_id_registered(void)
9 return (1); 9 return (1);
10} 10}
11 11
12/* Round robin the irqs amoung the online cpus */
13static inline cpumask_t target_cpus(void) 12static inline cpumask_t target_cpus(void)
14{ 13{
15 static unsigned long cpu = NR_CPUS; 14#ifdef CONFIG_SMP
16 do { 15 return cpu_online_map;
17 if (cpu >= NR_CPUS) 16#else
18 cpu = first_cpu(cpu_online_map); 17 return cpumask_of_cpu(0);
19 else 18#endif
20 cpu = next_cpu(cpu, cpu_online_map);
21 } while (cpu >= NR_CPUS);
22 return cpumask_of_cpu(cpu);
23} 19}
24 20
25#undef APIC_DEST_LOGICAL 21#undef APIC_DEST_LOGICAL
26#define APIC_DEST_LOGICAL 0 22#define APIC_DEST_LOGICAL 0
27#define TARGET_CPUS (target_cpus())
28#define APIC_DFR_VALUE (APIC_DFR_FLAT) 23#define APIC_DFR_VALUE (APIC_DFR_FLAT)
29#define INT_DELIVERY_MODE (dest_Fixed) 24#define INT_DELIVERY_MODE (dest_Fixed)
30#define INT_DEST_MODE (0) /* phys delivery to target proc */ 25#define INT_DEST_MODE (0) /* phys delivery to target proc */
diff --git a/include/asm-x86/efi.h b/include/asm-x86/efi.h
index ed2de22e8705..313438e63348 100644
--- a/include/asm-x86/efi.h
+++ b/include/asm-x86/efi.h
@@ -94,4 +94,17 @@ extern void efi_reserve_early(void);
94extern void efi_call_phys_prelog(void); 94extern void efi_call_phys_prelog(void);
95extern void efi_call_phys_epilog(void); 95extern void efi_call_phys_epilog(void);
96 96
97#ifndef CONFIG_EFI
98/*
99 * IF EFI is not configured, have the EFI calls return -ENOSYS.
100 */
101#define efi_call0(_f) (-ENOSYS)
102#define efi_call1(_f, _a1) (-ENOSYS)
103#define efi_call2(_f, _a1, _a2) (-ENOSYS)
104#define efi_call3(_f, _a1, _a2, _a3) (-ENOSYS)
105#define efi_call4(_f, _a1, _a2, _a3, _a4) (-ENOSYS)
106#define efi_call5(_f, _a1, _a2, _a3, _a4, _a5) (-ENOSYS)
107#define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6) (-ENOSYS)
108#endif /* CONFIG_EFI */
109
97#endif /* ASM_X86__EFI_H */ 110#endif /* ASM_X86__EFI_H */
diff --git a/include/asm-x86/es7000/apic.h b/include/asm-x86/es7000/apic.h
index aae50c2fb303..380f0b4f17ed 100644
--- a/include/asm-x86/es7000/apic.h
+++ b/include/asm-x86/es7000/apic.h
@@ -17,7 +17,6 @@ static inline cpumask_t target_cpus(void)
17 return cpumask_of_cpu(smp_processor_id()); 17 return cpumask_of_cpu(smp_processor_id());
18#endif 18#endif
19} 19}
20#define TARGET_CPUS (target_cpus())
21 20
22#if defined CONFIG_ES7000_CLUSTERED_APIC 21#if defined CONFIG_ES7000_CLUSTERED_APIC
23#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) 22#define APIC_DFR_VALUE (APIC_DFR_CLUSTER)
@@ -81,7 +80,7 @@ static inline void setup_apic_routing(void)
81 int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); 80 int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
82 printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", 81 printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
83 (apic_version[apic] == 0x14) ? 82 (apic_version[apic] == 0x14) ?
84 "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(TARGET_CPUS)[0]); 83 "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(target_cpus())[0]);
85} 84}
86 85
87static inline int multi_timer_check(int apic, int irq) 86static inline int multi_timer_check(int apic, int irq)
diff --git a/include/asm-x86/ftrace.h b/include/asm-x86/ftrace.h
index be0e004ad148..1bb6f9bbe1ab 100644
--- a/include/asm-x86/ftrace.h
+++ b/include/asm-x86/ftrace.h
@@ -7,6 +7,16 @@
7 7
8#ifndef __ASSEMBLY__ 8#ifndef __ASSEMBLY__
9extern void mcount(void); 9extern void mcount(void);
10
11static inline unsigned long ftrace_call_adjust(unsigned long addr)
12{
13 /*
14 * call mcount is "e8 <4 byte offset>"
15 * The addr points to the 4 byte offset and the caller of this
16 * function wants the pointer to e8. Simply subtract one.
17 */
18 return addr - 1;
19}
10#endif 20#endif
11 21
12#endif /* CONFIG_FTRACE */ 22#endif /* CONFIG_FTRACE */
diff --git a/include/asm-x86/genapic_32.h b/include/asm-x86/genapic_32.h
index 34280f027664..6fe4f81bfcf9 100644
--- a/include/asm-x86/genapic_32.h
+++ b/include/asm-x86/genapic_32.h
@@ -57,6 +57,7 @@ struct genapic {
57 unsigned (*get_apic_id)(unsigned long x); 57 unsigned (*get_apic_id)(unsigned long x);
58 unsigned long apic_id_mask; 58 unsigned long apic_id_mask;
59 unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); 59 unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
60 cpumask_t (*vector_allocation_domain)(int cpu);
60 61
61#ifdef CONFIG_SMP 62#ifdef CONFIG_SMP
62 /* ipi */ 63 /* ipi */
@@ -104,6 +105,7 @@ struct genapic {
104 APICFUNC(get_apic_id) \ 105 APICFUNC(get_apic_id) \
105 .apic_id_mask = APIC_ID_MASK, \ 106 .apic_id_mask = APIC_ID_MASK, \
106 APICFUNC(cpu_mask_to_apicid) \ 107 APICFUNC(cpu_mask_to_apicid) \
108 APICFUNC(vector_allocation_domain) \
107 APICFUNC(acpi_madt_oem_check) \ 109 APICFUNC(acpi_madt_oem_check) \
108 IPIFUNC(send_IPI_mask) \ 110 IPIFUNC(send_IPI_mask) \
109 IPIFUNC(send_IPI_allbutself) \ 111 IPIFUNC(send_IPI_allbutself) \
diff --git a/include/asm-x86/hpet.h b/include/asm-x86/hpet.h
index cbbbb6d4dd32..58b273f6ef07 100644
--- a/include/asm-x86/hpet.h
+++ b/include/asm-x86/hpet.h
@@ -1,6 +1,8 @@
1#ifndef ASM_X86__HPET_H 1#ifndef ASM_X86__HPET_H
2#define ASM_X86__HPET_H 2#define ASM_X86__HPET_H
3 3
4#include <linux/msi.h>
5
4#ifdef CONFIG_HPET_TIMER 6#ifdef CONFIG_HPET_TIMER
5 7
6#define HPET_MMAP_SIZE 1024 8#define HPET_MMAP_SIZE 1024
@@ -10,6 +12,11 @@
10#define HPET_CFG 0x010 12#define HPET_CFG 0x010
11#define HPET_STATUS 0x020 13#define HPET_STATUS 0x020
12#define HPET_COUNTER 0x0f0 14#define HPET_COUNTER 0x0f0
15
16#define HPET_Tn_CFG(n) (0x100 + 0x20 * n)
17#define HPET_Tn_CMP(n) (0x108 + 0x20 * n)
18#define HPET_Tn_ROUTE(n) (0x110 + 0x20 * n)
19
13#define HPET_T0_CFG 0x100 20#define HPET_T0_CFG 0x100
14#define HPET_T0_CMP 0x108 21#define HPET_T0_CMP 0x108
15#define HPET_T0_ROUTE 0x110 22#define HPET_T0_ROUTE 0x110
@@ -65,6 +72,20 @@ extern void hpet_disable(void);
65extern unsigned long hpet_readl(unsigned long a); 72extern unsigned long hpet_readl(unsigned long a);
66extern void force_hpet_resume(void); 73extern void force_hpet_resume(void);
67 74
75extern void hpet_msi_unmask(unsigned int irq);
76extern void hpet_msi_mask(unsigned int irq);
77extern void hpet_msi_write(unsigned int irq, struct msi_msg *msg);
78extern void hpet_msi_read(unsigned int irq, struct msi_msg *msg);
79
80#ifdef CONFIG_PCI_MSI
81extern int arch_setup_hpet_msi(unsigned int irq);
82#else
83static inline int arch_setup_hpet_msi(unsigned int irq)
84{
85 return -EINVAL;
86}
87#endif
88
68#ifdef CONFIG_HPET_EMULATE_RTC 89#ifdef CONFIG_HPET_EMULATE_RTC
69 90
70#include <linux/interrupt.h> 91#include <linux/interrupt.h>
diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h
index 50f6e0316b50..749d042f0556 100644
--- a/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h
@@ -96,13 +96,8 @@ extern asmlinkage void qic_call_function_interrupt(void);
96 96
97/* SMP */ 97/* SMP */
98extern void smp_apic_timer_interrupt(struct pt_regs *); 98extern void smp_apic_timer_interrupt(struct pt_regs *);
99#ifdef CONFIG_X86_32
100extern void smp_spurious_interrupt(struct pt_regs *); 99extern void smp_spurious_interrupt(struct pt_regs *);
101extern void smp_error_interrupt(struct pt_regs *); 100extern void smp_error_interrupt(struct pt_regs *);
102#else
103extern asmlinkage void smp_spurious_interrupt(void);
104extern asmlinkage void smp_error_interrupt(void);
105#endif
106#ifdef CONFIG_X86_SMP 101#ifdef CONFIG_X86_SMP
107extern void smp_reschedule_interrupt(struct pt_regs *); 102extern void smp_reschedule_interrupt(struct pt_regs *);
108extern void smp_call_function_interrupt(struct pt_regs *); 103extern void smp_call_function_interrupt(struct pt_regs *);
@@ -115,13 +110,13 @@ extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *);
115#endif 110#endif
116 111
117#ifdef CONFIG_X86_32 112#ifdef CONFIG_X86_32
118extern void (*const interrupt[NR_IRQS])(void); 113extern void (*const interrupt[NR_VECTORS])(void);
119#else 114#endif
115
120typedef int vector_irq_t[NR_VECTORS]; 116typedef int vector_irq_t[NR_VECTORS];
121DECLARE_PER_CPU(vector_irq_t, vector_irq); 117DECLARE_PER_CPU(vector_irq_t, vector_irq);
122#endif
123 118
124#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_X86_64) 119#ifdef CONFIG_X86_IO_APIC
125extern void lock_vector_lock(void); 120extern void lock_vector_lock(void);
126extern void unlock_vector_lock(void); 121extern void unlock_vector_lock(void);
127extern void __setup_vector_irq(int cpu); 122extern void __setup_vector_irq(int cpu);
diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index 8ec68a50cf10..d35cbd7aa587 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h
@@ -4,6 +4,7 @@
4#include <linux/types.h> 4#include <linux/types.h>
5#include <asm/mpspec.h> 5#include <asm/mpspec.h>
6#include <asm/apicdef.h> 6#include <asm/apicdef.h>
7#include <asm/irq_vectors.h>
7 8
8/* 9/*
9 * Intel IO-APIC support for SMP and UP systems. 10 * Intel IO-APIC support for SMP and UP systems.
@@ -87,24 +88,8 @@ struct IO_APIC_route_entry {
87 mask : 1, /* 0: enabled, 1: disabled */ 88 mask : 1, /* 0: enabled, 1: disabled */
88 __reserved_2 : 15; 89 __reserved_2 : 15;
89 90
90#ifdef CONFIG_X86_32
91 union {
92 struct {
93 __u32 __reserved_1 : 24,
94 physical_dest : 4,
95 __reserved_2 : 4;
96 } physical;
97
98 struct {
99 __u32 __reserved_1 : 24,
100 logical_dest : 8;
101 } logical;
102 } dest;
103#else
104 __u32 __reserved_3 : 24, 91 __u32 __reserved_3 : 24,
105 dest : 8; 92 dest : 8;
106#endif
107
108} __attribute__ ((packed)); 93} __attribute__ ((packed));
109 94
110struct IR_IO_APIC_route_entry { 95struct IR_IO_APIC_route_entry {
@@ -203,10 +188,17 @@ extern void restore_IO_APIC_setup(void);
203extern void reinit_intr_remapped_IO_APIC(int); 188extern void reinit_intr_remapped_IO_APIC(int);
204#endif 189#endif
205 190
191extern int probe_nr_irqs(void);
192
206#else /* !CONFIG_X86_IO_APIC */ 193#else /* !CONFIG_X86_IO_APIC */
207#define io_apic_assign_pci_irqs 0 194#define io_apic_assign_pci_irqs 0
208static const int timer_through_8259 = 0; 195static const int timer_through_8259 = 0;
209static inline void ioapic_init_mappings(void) { } 196static inline void ioapic_init_mappings(void) { }
197
198static inline int probe_nr_irqs(void)
199{
200 return NR_IRQS;
201}
210#endif 202#endif
211 203
212#endif /* ASM_X86__IO_APIC_H */ 204#endif /* ASM_X86__IO_APIC_H */
diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h
index c5d2d767a1f3..a8d065d85f57 100644
--- a/include/asm-x86/irq_vectors.h
+++ b/include/asm-x86/irq_vectors.h
@@ -19,19 +19,14 @@
19 19
20/* 20/*
21 * Reserve the lowest usable priority level 0x20 - 0x2f for triggering 21 * Reserve the lowest usable priority level 0x20 - 0x2f for triggering
22 * cleanup after irq migration on 64 bit. 22 * cleanup after irq migration.
23 */ 23 */
24#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR 24#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR
25 25
26/* 26/*
27 * Vectors 0x20-0x2f are used for ISA interrupts on 32 bit. 27 * Vectors 0x30-0x3f are used for ISA interrupts.
28 * Vectors 0x30-0x3f are used for ISA interrupts on 64 bit.
29 */ 28 */
30#ifdef CONFIG_X86_32
31#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR)
32#else
33#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10) 29#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10)
34#endif
35#define IRQ1_VECTOR (IRQ0_VECTOR + 1) 30#define IRQ1_VECTOR (IRQ0_VECTOR + 1)
36#define IRQ2_VECTOR (IRQ0_VECTOR + 2) 31#define IRQ2_VECTOR (IRQ0_VECTOR + 2)
37#define IRQ3_VECTOR (IRQ0_VECTOR + 3) 32#define IRQ3_VECTOR (IRQ0_VECTOR + 3)
@@ -96,11 +91,7 @@
96 * start at 0x31(0x41) to spread out vectors evenly between priority 91 * start at 0x31(0x41) to spread out vectors evenly between priority
97 * levels. (0x80 is the syscall vector) 92 * levels. (0x80 is the syscall vector)
98 */ 93 */
99#ifdef CONFIG_X86_32 94#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2)
100# define FIRST_DEVICE_VECTOR 0x31
101#else
102# define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2)
103#endif
104 95
105#define NR_VECTORS 256 96#define NR_VECTORS 256
106 97
@@ -116,7 +107,6 @@
116# else 107# else
117# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) 108# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
118# endif 109# endif
119# define NR_IRQ_VECTORS NR_IRQS
120 110
121#elif !defined(CONFIG_X86_VOYAGER) 111#elif !defined(CONFIG_X86_VOYAGER)
122 112
@@ -124,23 +114,15 @@
124 114
125# define NR_IRQS 224 115# define NR_IRQS 224
126 116
127# if (224 >= 32 * NR_CPUS)
128# define NR_IRQ_VECTORS NR_IRQS
129# else
130# define NR_IRQ_VECTORS (32 * NR_CPUS)
131# endif
132
133# else /* IO_APIC || PARAVIRT */ 117# else /* IO_APIC || PARAVIRT */
134 118
135# define NR_IRQS 16 119# define NR_IRQS 16
136# define NR_IRQ_VECTORS NR_IRQS
137 120
138# endif 121# endif
139 122
140#else /* !VISWS && !VOYAGER */ 123#else /* !VISWS && !VOYAGER */
141 124
142# define NR_IRQS 224 125# define NR_IRQS 224
143# define NR_IRQ_VECTORS NR_IRQS
144 126
145#endif /* VISWS */ 127#endif /* VISWS */
146 128
diff --git a/include/asm-x86/mach-default/entry_arch.h b/include/asm-x86/mach-default/entry_arch.h
index 9283b60a1dd2..6b1add8e31dd 100644
--- a/include/asm-x86/mach-default/entry_arch.h
+++ b/include/asm-x86/mach-default/entry_arch.h
@@ -14,6 +14,7 @@ BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
14BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR) 14BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
15BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) 15BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
16BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) 16BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
17BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
17#endif 18#endif
18 19
19/* 20/*
diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h
index 2a330a41b3dd..3c66f2cdaec1 100644
--- a/include/asm-x86/mach-default/mach_apic.h
+++ b/include/asm-x86/mach-default/mach_apic.h
@@ -85,6 +85,20 @@ static inline int apicid_to_node(int logical_apicid)
85 return 0; 85 return 0;
86#endif 86#endif
87} 87}
88
89static inline cpumask_t vector_allocation_domain(int cpu)
90{
91 /* Careful. Some cpus do not strictly honor the set of cpus
92 * specified in the interrupt destination when using lowest
93 * priority interrupt delivery mode.
94 *
95 * In particular there was a hyperthreading cpu observed to
96 * deliver interrupts to the wrong hyperthread when only one
97 * hyperthread was specified in the interrupt desitination.
98 */
99 cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
100 return domain;
101}
88#endif 102#endif
89 103
90static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) 104static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
@@ -138,6 +152,5 @@ static inline int check_phys_apicid_present(int boot_cpu_physical_apicid)
138static inline void enable_apic_mode(void) 152static inline void enable_apic_mode(void)
139{ 153{
140} 154}
141
142#endif /* CONFIG_X86_LOCAL_APIC */ 155#endif /* CONFIG_X86_LOCAL_APIC */
143#endif /* ASM_X86__MACH_DEFAULT__MACH_APIC_H */ 156#endif /* ASM_X86__MACH_DEFAULT__MACH_APIC_H */
diff --git a/include/asm-x86/mach-generic/irq_vectors_limits.h b/include/asm-x86/mach-generic/irq_vectors_limits.h
deleted file mode 100644
index f7870e1a220d..000000000000
--- a/include/asm-x86/mach-generic/irq_vectors_limits.h
+++ /dev/null
@@ -1,14 +0,0 @@
1#ifndef ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H
2#define ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H
3
4/*
5 * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs,
6 * even with uni-proc kernels, so use a big array.
7 *
8 * This value should be the same in both the generic and summit subarches.
9 * Change one, change 'em both.
10 */
11#define NR_IRQS 224
12#define NR_IRQ_VECTORS 1024
13
14#endif /* ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H */
diff --git a/include/asm-x86/mach-generic/mach_apic.h b/include/asm-x86/mach-generic/mach_apic.h
index 5d010c6881dd..5085b52da301 100644
--- a/include/asm-x86/mach-generic/mach_apic.h
+++ b/include/asm-x86/mach-generic/mach_apic.h
@@ -24,6 +24,7 @@
24#define check_phys_apicid_present (genapic->check_phys_apicid_present) 24#define check_phys_apicid_present (genapic->check_phys_apicid_present)
25#define check_apicid_used (genapic->check_apicid_used) 25#define check_apicid_used (genapic->check_apicid_used)
26#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) 26#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
27#define vector_allocation_domain (genapic->vector_allocation_domain)
27#define enable_apic_mode (genapic->enable_apic_mode) 28#define enable_apic_mode (genapic->enable_apic_mode)
28#define phys_pkg_id (genapic->phys_pkg_id) 29#define phys_pkg_id (genapic->phys_pkg_id)
29 30
diff --git a/include/asm-x86/numaq/apic.h b/include/asm-x86/numaq/apic.h
index a8344ba6ea15..0bf2a06b7a4e 100644
--- a/include/asm-x86/numaq/apic.h
+++ b/include/asm-x86/numaq/apic.h
@@ -12,8 +12,6 @@ static inline cpumask_t target_cpus(void)
12 return CPU_MASK_ALL; 12 return CPU_MASK_ALL;
13} 13}
14 14
15#define TARGET_CPUS (target_cpus())
16
17#define NO_BALANCE_IRQ (1) 15#define NO_BALANCE_IRQ (1)
18#define esr_disable (1) 16#define esr_disable (1)
19 17
diff --git a/include/asm-x86/summit/apic.h b/include/asm-x86/summit/apic.h
index 394b00bb5e72..9b3070f1c2ac 100644
--- a/include/asm-x86/summit/apic.h
+++ b/include/asm-x86/summit/apic.h
@@ -22,7 +22,6 @@ static inline cpumask_t target_cpus(void)
22 */ 22 */
23 return cpumask_of_cpu(0); 23 return cpumask_of_cpu(0);
24} 24}
25#define TARGET_CPUS (target_cpus())
26 25
27#define INT_DELIVERY_MODE (dest_LowestPrio) 26#define INT_DELIVERY_MODE (dest_LowestPrio)
28#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ 27#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */
diff --git a/include/asm-x86/summit/irq_vectors_limits.h b/include/asm-x86/summit/irq_vectors_limits.h
deleted file mode 100644
index 890ce3f5e09a..000000000000
--- a/include/asm-x86/summit/irq_vectors_limits.h
+++ /dev/null
@@ -1,14 +0,0 @@
1#ifndef _ASM_IRQ_VECTORS_LIMITS_H
2#define _ASM_IRQ_VECTORS_LIMITS_H
3
4/*
5 * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs,
6 * even with uni-proc kernels, so use a big array.
7 *
8 * This value should be the same in both the generic and summit subarches.
9 * Change one, change 'em both.
10 */
11#define NR_IRQS 224
12#define NR_IRQ_VECTORS 1024
13
14#endif /* _ASM_IRQ_VECTORS_LIMITS_H */
diff --git a/include/asm-x86/uv/bios.h b/include/asm-x86/uv/bios.h
index 7cd6d7ec1308..215f1969c266 100644
--- a/include/asm-x86/uv/bios.h
+++ b/include/asm-x86/uv/bios.h
@@ -2,9 +2,7 @@
2#define ASM_X86__UV__BIOS_H 2#define ASM_X86__UV__BIOS_H
3 3
4/* 4/*
5 * BIOS layer definitions. 5 * UV BIOS layer definitions.
6 *
7 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
8 * 6 *
9 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by 8 * it under the terms of the GNU General Public License as published by
@@ -19,50 +17,78 @@
19 * You should have received a copy of the GNU General Public License 17 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software 18 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
22 * Copyright (c) Russ Anderson
22 */ 23 */
23 24
24#include <linux/rtc.h> 25#include <linux/rtc.h>
25 26
26#define BIOS_FREQ_BASE 0x01000001 27/*
28 * Values for the BIOS calls. It is passed as the first * argument in the
29 * BIOS call. Passing any other value in the first argument will result
30 * in a BIOS_STATUS_UNIMPLEMENTED return status.
31 */
32enum uv_bios_cmd {
33 UV_BIOS_COMMON,
34 UV_BIOS_GET_SN_INFO,
35 UV_BIOS_FREQ_BASE
36};
27 37
38/*
39 * Status values returned from a BIOS call.
40 */
28enum { 41enum {
29 BIOS_FREQ_BASE_PLATFORM = 0, 42 BIOS_STATUS_SUCCESS = 0,
30 BIOS_FREQ_BASE_INTERVAL_TIMER = 1, 43 BIOS_STATUS_UNIMPLEMENTED = -ENOSYS,
31 BIOS_FREQ_BASE_REALTIME_CLOCK = 2 44 BIOS_STATUS_EINVAL = -EINVAL,
45 BIOS_STATUS_UNAVAIL = -EBUSY
32}; 46};
33 47
34# define BIOS_CALL(result, a0, a1, a2, a3, a4, a5, a6, a7) \ 48/*
35 do { \ 49 * The UV system table describes specific firmware
36 /* XXX - the real call goes here */ \ 50 * capabilities available to the Linux kernel at runtime.
37 result.status = BIOS_STATUS_UNIMPLEMENTED; \ 51 */
38 isrv.v0 = 0; \ 52struct uv_systab {
39 isrv.v1 = 0; \ 53 char signature[4]; /* must be "UVST" */
40 } while (0) 54 u32 revision; /* distinguish different firmware revs */
55 u64 function; /* BIOS runtime callback function ptr */
56};
41 57
42enum { 58enum {
43 BIOS_STATUS_SUCCESS = 0, 59 BIOS_FREQ_BASE_PLATFORM = 0,
44 BIOS_STATUS_UNIMPLEMENTED = -1, 60 BIOS_FREQ_BASE_INTERVAL_TIMER = 1,
45 BIOS_STATUS_EINVAL = -2, 61 BIOS_FREQ_BASE_REALTIME_CLOCK = 2
46 BIOS_STATUS_ERROR = -3
47}; 62};
48 63
49struct uv_bios_retval { 64union partition_info_u {
50 /* 65 u64 val;
51 * A zero status value indicates call completed without error. 66 struct {
52 * A negative status value indicates reason of call failure. 67 u64 hub_version : 8,
53 * A positive status value indicates success but an 68 partition_id : 16,
54 * informational value should be printed (e.g., "reboot for 69 coherence_id : 16,
55 * change to take effect"). 70 region_size : 24;
56 */ 71 };
57 s64 status;
58 u64 v0;
59 u64 v1;
60 u64 v2;
61}; 72};
62 73
63extern long 74/*
64x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second, 75 * bios calls have 6 parameters
65 unsigned long *drift_info); 76 */
66extern const char *x86_bios_strerror(long status); 77extern s64 uv_bios_call(enum uv_bios_cmd, u64, u64, u64, u64, u64);
78extern s64 uv_bios_call_irqsave(enum uv_bios_cmd, u64, u64, u64, u64, u64);
79extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64);
80
81extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *);
82extern s64 uv_bios_freq_base(u64, u64 *);
83
84extern void uv_bios_init(void);
85
86extern int uv_type;
87extern long sn_partition_id;
88extern long uv_coherency_id;
89extern long uv_region_size;
90#define partition_coherence_id() (uv_coherency_id)
91
92extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */
67 93
68#endif /* ASM_X86__UV__BIOS_H */ 94#endif /* ASM_X86__UV__BIOS_H */
diff --git a/include/asm-x86/uv/uv_irq.h b/include/asm-x86/uv/uv_irq.h
new file mode 100644
index 000000000000..8bf5f32da9c6
--- /dev/null
+++ b/include/asm-x86/uv/uv_irq.h
@@ -0,0 +1,36 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * SGI UV IRQ definitions
7 *
8 * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
9 */
10
11#ifndef ASM_X86__UV__UV_IRQ_H
12#define ASM_X86__UV__UV_IRQ_H
13
14/* If a generic version of this structure gets defined, eliminate this one. */
15struct uv_IO_APIC_route_entry {
16 __u64 vector : 8,
17 delivery_mode : 3,
18 dest_mode : 1,
19 delivery_status : 1,
20 polarity : 1,
21 __reserved_1 : 1,
22 trigger : 1,
23 mask : 1,
24 __reserved_2 : 15,
25 dest : 32;
26};
27
28extern struct irq_chip uv_irq_chip;
29
30extern int arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long);
31extern void arch_disable_uv_irq(int, unsigned long);
32
33extern int uv_setup_irq(char *, int, int, unsigned long);
34extern void uv_teardown_irq(unsigned int, int, unsigned long);
35
36#endif /* ASM_X86__UV__UV_IRQ_H */
diff --git a/include/linux/aer.h b/include/linux/aer.h
index f2518141de88..f7df1eefc107 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -10,7 +10,6 @@
10#if defined(CONFIG_PCIEAER) 10#if defined(CONFIG_PCIEAER)
11/* pci-e port driver needs this function to enable aer */ 11/* pci-e port driver needs this function to enable aer */
12extern int pci_enable_pcie_error_reporting(struct pci_dev *dev); 12extern int pci_enable_pcie_error_reporting(struct pci_dev *dev);
13extern int pci_find_aer_capability(struct pci_dev *dev);
14extern int pci_disable_pcie_error_reporting(struct pci_dev *dev); 13extern int pci_disable_pcie_error_reporting(struct pci_dev *dev);
15extern int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev); 14extern int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
16#else 15#else
@@ -18,10 +17,6 @@ static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev)
18{ 17{
19 return -EINVAL; 18 return -EINVAL;
20} 19}
21static inline int pci_find_aer_capability(struct pci_dev *dev)
22{
23 return 0;
24}
25static inline int pci_disable_pcie_error_reporting(struct pci_dev *dev) 20static inline int pci_disable_pcie_error_reporting(struct pci_dev *dev)
26{ 21{
27 return -EINVAL; 22 return -EINVAL;
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 55e434feec99..f88d32f8ff7c 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -45,7 +45,8 @@ struct clocksource;
45 * @read: returns a cycle value 45 * @read: returns a cycle value
46 * @mask: bitmask for two's complement 46 * @mask: bitmask for two's complement
47 * subtraction of non 64 bit counters 47 * subtraction of non 64 bit counters
48 * @mult: cycle to nanosecond multiplier 48 * @mult: cycle to nanosecond multiplier (adjusted by NTP)
49 * @mult_orig: cycle to nanosecond multiplier (unadjusted by NTP)
49 * @shift: cycle to nanosecond divisor (power of two) 50 * @shift: cycle to nanosecond divisor (power of two)
50 * @flags: flags describing special properties 51 * @flags: flags describing special properties
51 * @vread: vsyscall based read 52 * @vread: vsyscall based read
@@ -63,6 +64,7 @@ struct clocksource {
63 cycle_t (*read)(void); 64 cycle_t (*read)(void);
64 cycle_t mask; 65 cycle_t mask;
65 u32 mult; 66 u32 mult;
67 u32 mult_orig;
66 u32 shift; 68 u32 shift;
67 unsigned long flags; 69 unsigned long flags;
68 cycle_t (*vread)(void); 70 cycle_t (*vread)(void);
@@ -77,6 +79,7 @@ struct clocksource {
77 /* timekeeping specific data, ignore */ 79 /* timekeeping specific data, ignore */
78 cycle_t cycle_interval; 80 cycle_t cycle_interval;
79 u64 xtime_interval; 81 u64 xtime_interval;
82 u32 raw_interval;
80 /* 83 /*
81 * Second part is written at each timer interrupt 84 * Second part is written at each timer interrupt
82 * Keep it in a different cache line to dirty no 85 * Keep it in a different cache line to dirty no
@@ -85,6 +88,7 @@ struct clocksource {
85 cycle_t cycle_last ____cacheline_aligned_in_smp; 88 cycle_t cycle_last ____cacheline_aligned_in_smp;
86 u64 xtime_nsec; 89 u64 xtime_nsec;
87 s64 error; 90 s64 error;
91 struct timespec raw_time;
88 92
89#ifdef CONFIG_CLOCKSOURCE_WATCHDOG 93#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
90 /* Watchdog related data, used by the framework */ 94 /* Watchdog related data, used by the framework */
@@ -201,17 +205,19 @@ static inline void clocksource_calculate_interval(struct clocksource *c,
201{ 205{
202 u64 tmp; 206 u64 tmp;
203 207
204 /* XXX - All of this could use a whole lot of optimization */ 208 /* Do the ns -> cycle conversion first, using original mult */
205 tmp = length_nsec; 209 tmp = length_nsec;
206 tmp <<= c->shift; 210 tmp <<= c->shift;
207 tmp += c->mult/2; 211 tmp += c->mult_orig/2;
208 do_div(tmp, c->mult); 212 do_div(tmp, c->mult_orig);
209 213
210 c->cycle_interval = (cycle_t)tmp; 214 c->cycle_interval = (cycle_t)tmp;
211 if (c->cycle_interval == 0) 215 if (c->cycle_interval == 0)
212 c->cycle_interval = 1; 216 c->cycle_interval = 1;
213 217
218 /* Go back from cycles -> shifted ns, this time use ntp adjused mult */
214 c->xtime_interval = (u64)c->cycle_interval * c->mult; 219 c->xtime_interval = (u64)c->cycle_interval * c->mult;
220 c->raw_interval = ((u64)c->cycle_interval * c->mult_orig) >> c->shift;
215} 221}
216 222
217 223
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 8322141ee480..98115d9d04da 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -44,6 +44,8 @@ extern void __chk_io_ptr(const volatile void __iomem *);
44# error Sorry, your compiler is too old/not recognized. 44# error Sorry, your compiler is too old/not recognized.
45#endif 45#endif
46 46
47#define notrace __attribute__((no_instrument_function))
48
47/* Intel compiler defines __GNUC__. So we will overwrite implementations 49/* Intel compiler defines __GNUC__. So we will overwrite implementations
48 * coming from above header files here 50 * coming from above header files here
49 */ 51 */
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index c360c558e59e..f1984fc3e06d 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -45,7 +45,6 @@ extern struct list_head dmar_drhd_units;
45 list_for_each_entry(drhd, &dmar_drhd_units, list) 45 list_for_each_entry(drhd, &dmar_drhd_units, list)
46 46
47extern int dmar_table_init(void); 47extern int dmar_table_init(void);
48extern int early_dmar_detect(void);
49extern int dmar_dev_scope_init(void); 48extern int dmar_dev_scope_init(void);
50 49
51/* Intel IOMMU detection */ 50/* Intel IOMMU detection */
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 807373d467f7..bb66feb164bd 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -208,6 +208,9 @@ typedef efi_status_t efi_set_virtual_address_map_t (unsigned long memory_map_siz
208#define EFI_GLOBAL_VARIABLE_GUID \ 208#define EFI_GLOBAL_VARIABLE_GUID \
209 EFI_GUID( 0x8be4df61, 0x93ca, 0x11d2, 0xaa, 0x0d, 0x00, 0xe0, 0x98, 0x03, 0x2b, 0x8c ) 209 EFI_GUID( 0x8be4df61, 0x93ca, 0x11d2, 0xaa, 0x0d, 0x00, 0xe0, 0x98, 0x03, 0x2b, 0x8c )
210 210
211#define UV_SYSTEM_TABLE_GUID \
212 EFI_GUID( 0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93 )
213
211typedef struct { 214typedef struct {
212 efi_guid_t guid; 215 efi_guid_t guid;
213 unsigned long table; 216 unsigned long table;
@@ -255,6 +258,7 @@ extern struct efi {
255 unsigned long boot_info; /* boot info table */ 258 unsigned long boot_info; /* boot info table */
256 unsigned long hcdp; /* HCDP table */ 259 unsigned long hcdp; /* HCDP table */
257 unsigned long uga; /* UGA table */ 260 unsigned long uga; /* UGA table */
261 unsigned long uv_systab; /* UV system table */
258 efi_get_time_t *get_time; 262 efi_get_time_t *get_time;
259 efi_set_time_t *set_time; 263 efi_set_time_t *set_time;
260 efi_get_wakeup_time_t *get_wakeup_time; 264 efi_get_wakeup_time_t *get_wakeup_time;
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index bb384068272e..a3d46151be19 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1,10 +1,14 @@
1#ifndef _LINUX_FTRACE_H 1#ifndef _LINUX_FTRACE_H
2#define _LINUX_FTRACE_H 2#define _LINUX_FTRACE_H
3 3
4#ifdef CONFIG_FTRACE
5
6#include <linux/linkage.h> 4#include <linux/linkage.h>
7#include <linux/fs.h> 5#include <linux/fs.h>
6#include <linux/ktime.h>
7#include <linux/init.h>
8#include <linux/types.h>
9#include <linux/kallsyms.h>
10
11#ifdef CONFIG_FTRACE
8 12
9extern int ftrace_enabled; 13extern int ftrace_enabled;
10extern int 14extern int
@@ -36,6 +40,7 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1);
36# define register_ftrace_function(ops) do { } while (0) 40# define register_ftrace_function(ops) do { } while (0)
37# define unregister_ftrace_function(ops) do { } while (0) 41# define unregister_ftrace_function(ops) do { } while (0)
38# define clear_ftrace_function(ops) do { } while (0) 42# define clear_ftrace_function(ops) do { } while (0)
43static inline void ftrace_kill_atomic(void) { }
39#endif /* CONFIG_FTRACE */ 44#endif /* CONFIG_FTRACE */
40 45
41#ifdef CONFIG_DYNAMIC_FTRACE 46#ifdef CONFIG_DYNAMIC_FTRACE
@@ -76,8 +81,10 @@ extern void mcount_call(void);
76 81
77extern int skip_trace(unsigned long ip); 82extern int skip_trace(unsigned long ip);
78 83
79void ftrace_disable_daemon(void); 84extern void ftrace_release(void *start, unsigned long size);
80void ftrace_enable_daemon(void); 85
86extern void ftrace_disable_daemon(void);
87extern void ftrace_enable_daemon(void);
81 88
82#else 89#else
83# define skip_trace(ip) ({ 0; }) 90# define skip_trace(ip) ({ 0; })
@@ -85,6 +92,7 @@ void ftrace_enable_daemon(void);
85# define ftrace_set_filter(buf, len, reset) do { } while (0) 92# define ftrace_set_filter(buf, len, reset) do { } while (0)
86# define ftrace_disable_daemon() do { } while (0) 93# define ftrace_disable_daemon() do { } while (0)
87# define ftrace_enable_daemon() do { } while (0) 94# define ftrace_enable_daemon() do { } while (0)
95static inline void ftrace_release(void *start, unsigned long size) { }
88#endif /* CONFIG_DYNAMIC_FTRACE */ 96#endif /* CONFIG_DYNAMIC_FTRACE */
89 97
90/* totally disable ftrace - can not re-enable after this */ 98/* totally disable ftrace - can not re-enable after this */
@@ -98,9 +106,11 @@ static inline void tracer_disable(void)
98#endif 106#endif
99} 107}
100 108
101/* Ftrace disable/restore without lock. Some synchronization mechanism 109/*
110 * Ftrace disable/restore without lock. Some synchronization mechanism
102 * must be used to prevent ftrace_enabled to be changed between 111 * must be used to prevent ftrace_enabled to be changed between
103 * disable/restore. */ 112 * disable/restore.
113 */
104static inline int __ftrace_enabled_save(void) 114static inline int __ftrace_enabled_save(void)
105{ 115{
106#ifdef CONFIG_FTRACE 116#ifdef CONFIG_FTRACE
@@ -157,9 +167,71 @@ static inline void __ftrace_enabled_restore(int enabled)
157#ifdef CONFIG_TRACING 167#ifdef CONFIG_TRACING
158extern void 168extern void
159ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); 169ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
170
171/**
172 * ftrace_printk - printf formatting in the ftrace buffer
173 * @fmt: the printf format for printing
174 *
175 * Note: __ftrace_printk is an internal function for ftrace_printk and
176 * the @ip is passed in via the ftrace_printk macro.
177 *
178 * This function allows a kernel developer to debug fast path sections
179 * that printk is not appropriate for. By scattering in various
180 * printk like tracing in the code, a developer can quickly see
181 * where problems are occurring.
182 *
183 * This is intended as a debugging tool for the developer only.
184 * Please refrain from leaving ftrace_printks scattered around in
185 * your code.
186 */
187# define ftrace_printk(fmt...) __ftrace_printk(_THIS_IP_, fmt)
188extern int
189__ftrace_printk(unsigned long ip, const char *fmt, ...)
190 __attribute__ ((format (printf, 2, 3)));
191extern void ftrace_dump(void);
160#else 192#else
161static inline void 193static inline void
162ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } 194ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
195static inline int
196ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0)));
197
198static inline int
199ftrace_printk(const char *fmt, ...)
200{
201 return 0;
202}
203static inline void ftrace_dump(void) { }
163#endif 204#endif
164 205
206#ifdef CONFIG_FTRACE_MCOUNT_RECORD
207extern void ftrace_init(void);
208extern void ftrace_init_module(unsigned long *start, unsigned long *end);
209#else
210static inline void ftrace_init(void) { }
211static inline void
212ftrace_init_module(unsigned long *start, unsigned long *end) { }
213#endif
214
215
216struct boot_trace {
217 pid_t caller;
218 char func[KSYM_NAME_LEN];
219 int result;
220 unsigned long long duration; /* usecs */
221 ktime_t calltime;
222 ktime_t rettime;
223};
224
225#ifdef CONFIG_BOOT_TRACER
226extern void trace_boot(struct boot_trace *it, initcall_t fn);
227extern void start_boot_trace(void);
228extern void stop_boot_trace(void);
229#else
230static inline void trace_boot(struct boot_trace *it, initcall_t fn) { }
231static inline void start_boot_trace(void) { }
232static inline void stop_boot_trace(void) { }
233#endif
234
235
236
165#endif /* _LINUX_FTRACE_H */ 237#endif /* _LINUX_FTRACE_H */
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 265635dc9908..350fe9767bbc 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -17,8 +17,14 @@
17 * - add lock_owner field to fuse_setattr_in, fuse_read_in and fuse_write_in 17 * - add lock_owner field to fuse_setattr_in, fuse_read_in and fuse_write_in
18 * - add blksize field to fuse_attr 18 * - add blksize field to fuse_attr
19 * - add file flags field to fuse_read_in and fuse_write_in 19 * - add file flags field to fuse_read_in and fuse_write_in
20 *
21 * 7.10
22 * - add nonseekable open flag
20 */ 23 */
21 24
25#ifndef _LINUX_FUSE_H
26#define _LINUX_FUSE_H
27
22#include <asm/types.h> 28#include <asm/types.h>
23#include <linux/major.h> 29#include <linux/major.h>
24 30
@@ -26,7 +32,7 @@
26#define FUSE_KERNEL_VERSION 7 32#define FUSE_KERNEL_VERSION 7
27 33
28/** Minor version number of this interface */ 34/** Minor version number of this interface */
29#define FUSE_KERNEL_MINOR_VERSION 9 35#define FUSE_KERNEL_MINOR_VERSION 10
30 36
31/** The node ID of the root inode */ 37/** The node ID of the root inode */
32#define FUSE_ROOT_ID 1 38#define FUSE_ROOT_ID 1
@@ -98,9 +104,11 @@ struct fuse_file_lock {
98 * 104 *
99 * FOPEN_DIRECT_IO: bypass page cache for this open file 105 * FOPEN_DIRECT_IO: bypass page cache for this open file
100 * FOPEN_KEEP_CACHE: don't invalidate the data cache on open 106 * FOPEN_KEEP_CACHE: don't invalidate the data cache on open
107 * FOPEN_NONSEEKABLE: the file is not seekable
101 */ 108 */
102#define FOPEN_DIRECT_IO (1 << 0) 109#define FOPEN_DIRECT_IO (1 << 0)
103#define FOPEN_KEEP_CACHE (1 << 1) 110#define FOPEN_KEEP_CACHE (1 << 1)
111#define FOPEN_NONSEEKABLE (1 << 2)
104 112
105/** 113/**
106 * INIT request/reply flags 114 * INIT request/reply flags
@@ -409,3 +417,5 @@ struct fuse_dirent {
409#define FUSE_DIRENT_ALIGN(x) (((x) + sizeof(__u64) - 1) & ~(sizeof(__u64) - 1)) 417#define FUSE_DIRENT_ALIGN(x) (((x) + sizeof(__u64) - 1) & ~(sizeof(__u64) - 1))
410#define FUSE_DIRENT_SIZE(d) \ 418#define FUSE_DIRENT_SIZE(d) \
411 FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) 419 FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
420
421#endif /* _LINUX_FUSE_H */
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 2f245fe63bda..9a4e35cd5f79 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -125,12 +125,12 @@ struct hrtimer {
125 enum hrtimer_restart (*function)(struct hrtimer *); 125 enum hrtimer_restart (*function)(struct hrtimer *);
126 struct hrtimer_clock_base *base; 126 struct hrtimer_clock_base *base;
127 unsigned long state; 127 unsigned long state;
128 enum hrtimer_cb_mode cb_mode;
129 struct list_head cb_entry; 128 struct list_head cb_entry;
129 enum hrtimer_cb_mode cb_mode;
130#ifdef CONFIG_TIMER_STATS 130#ifdef CONFIG_TIMER_STATS
131 int start_pid;
131 void *start_site; 132 void *start_site;
132 char start_comm[16]; 133 char start_comm[16];
133 int start_pid;
134#endif 134#endif
135}; 135};
136 136
@@ -155,10 +155,8 @@ struct hrtimer_sleeper {
155 * @first: pointer to the timer node which expires first 155 * @first: pointer to the timer node which expires first
156 * @resolution: the resolution of the clock, in nanoseconds 156 * @resolution: the resolution of the clock, in nanoseconds
157 * @get_time: function to retrieve the current time of the clock 157 * @get_time: function to retrieve the current time of the clock
158 * @get_softirq_time: function to retrieve the current time from the softirq
159 * @softirq_time: the time when running the hrtimer queue in the softirq 158 * @softirq_time: the time when running the hrtimer queue in the softirq
160 * @offset: offset of this clock to the monotonic base 159 * @offset: offset of this clock to the monotonic base
161 * @reprogram: function to reprogram the timer event
162 */ 160 */
163struct hrtimer_clock_base { 161struct hrtimer_clock_base {
164 struct hrtimer_cpu_base *cpu_base; 162 struct hrtimer_cpu_base *cpu_base;
@@ -167,13 +165,9 @@ struct hrtimer_clock_base {
167 struct rb_node *first; 165 struct rb_node *first;
168 ktime_t resolution; 166 ktime_t resolution;
169 ktime_t (*get_time)(void); 167 ktime_t (*get_time)(void);
170 ktime_t (*get_softirq_time)(void);
171 ktime_t softirq_time; 168 ktime_t softirq_time;
172#ifdef CONFIG_HIGH_RES_TIMERS 169#ifdef CONFIG_HIGH_RES_TIMERS
173 ktime_t offset; 170 ktime_t offset;
174 int (*reprogram)(struct hrtimer *t,
175 struct hrtimer_clock_base *b,
176 ktime_t n);
177#endif 171#endif
178}; 172};
179 173
diff --git a/include/linux/ide.h b/include/linux/ide.h
index c47e371554c1..89e53cfbc787 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -461,12 +461,26 @@ struct ide_acpi_drive_link;
461struct ide_acpi_hwif_link; 461struct ide_acpi_hwif_link;
462#endif 462#endif
463 463
464struct ide_drive_s;
465
466struct ide_disk_ops {
467 int (*check)(struct ide_drive_s *, const char *);
468 int (*get_capacity)(struct ide_drive_s *);
469 void (*setup)(struct ide_drive_s *);
470 void (*flush)(struct ide_drive_s *);
471 int (*init_media)(struct ide_drive_s *, struct gendisk *);
472 int (*set_doorlock)(struct ide_drive_s *, struct gendisk *,
473 int);
474 ide_startstop_t (*do_request)(struct ide_drive_s *, struct request *,
475 sector_t);
476 int (*end_request)(struct ide_drive_s *, int, int);
477 int (*ioctl)(struct ide_drive_s *, struct inode *,
478 struct file *, unsigned int, unsigned long);
479};
480
464/* ATAPI device flags */ 481/* ATAPI device flags */
465enum { 482enum {
466 IDE_AFLAG_DRQ_INTERRUPT = (1 << 0), 483 IDE_AFLAG_DRQ_INTERRUPT = (1 << 0),
467 IDE_AFLAG_MEDIA_CHANGED = (1 << 1),
468 /* Drive cannot lock the door. */
469 IDE_AFLAG_NO_DOORLOCK = (1 << 2),
470 484
471 /* ide-cd */ 485 /* ide-cd */
472 /* Drive cannot eject the disc. */ 486 /* Drive cannot eject the disc. */
@@ -498,14 +512,10 @@ enum {
498 IDE_AFLAG_LE_SPEED_FIELDS = (1 << 17), 512 IDE_AFLAG_LE_SPEED_FIELDS = (1 << 17),
499 513
500 /* ide-floppy */ 514 /* ide-floppy */
501 /* Format in progress */
502 IDE_AFLAG_FORMAT_IN_PROGRESS = (1 << 18),
503 /* Avoid commands not supported in Clik drive */ 515 /* Avoid commands not supported in Clik drive */
504 IDE_AFLAG_CLIK_DRIVE = (1 << 19), 516 IDE_AFLAG_CLIK_DRIVE = (1 << 19),
505 /* Requires BH algorithm for packets */ 517 /* Requires BH algorithm for packets */
506 IDE_AFLAG_ZIP_DRIVE = (1 << 20), 518 IDE_AFLAG_ZIP_DRIVE = (1 << 20),
507 /* Write protect */
508 IDE_AFLAG_WP = (1 << 21),
509 /* Supports format progress report */ 519 /* Supports format progress report */
510 IDE_AFLAG_SRFP = (1 << 22), 520 IDE_AFLAG_SRFP = (1 << 22),
511 521
@@ -578,7 +588,11 @@ enum {
578 /* don't unload heads */ 588 /* don't unload heads */
579 IDE_DFLAG_NO_UNLOAD = (1 << 27), 589 IDE_DFLAG_NO_UNLOAD = (1 << 27),
580 /* heads unloaded, please don't reset port */ 590 /* heads unloaded, please don't reset port */
581 IDE_DFLAG_PARKED = (1 << 28) 591 IDE_DFLAG_PARKED = (1 << 28),
592 IDE_DFLAG_MEDIA_CHANGED = (1 << 29),
593 /* write protect */
594 IDE_DFLAG_WP = (1 << 30),
595 IDE_DFLAG_FORMAT_IN_PROGRESS = (1 << 31),
582}; 596};
583 597
584struct ide_drive_s { 598struct ide_drive_s {
@@ -597,6 +611,8 @@ struct ide_drive_s {
597#endif 611#endif
598 struct hwif_s *hwif; /* actually (ide_hwif_t *) */ 612 struct hwif_s *hwif; /* actually (ide_hwif_t *) */
599 613
614 const struct ide_disk_ops *disk_ops;
615
600 unsigned long dev_flags; 616 unsigned long dev_flags;
601 617
602 unsigned long sleep; /* sleep until this time */ 618 unsigned long sleep; /* sleep until this time */
@@ -1123,8 +1139,8 @@ struct ide_driver_s {
1123 void (*resume)(ide_drive_t *); 1139 void (*resume)(ide_drive_t *);
1124 void (*shutdown)(ide_drive_t *); 1140 void (*shutdown)(ide_drive_t *);
1125#ifdef CONFIG_IDE_PROC_FS 1141#ifdef CONFIG_IDE_PROC_FS
1126 ide_proc_entry_t *proc; 1142 ide_proc_entry_t * (*proc_entries)(ide_drive_t *);
1127 const struct ide_proc_devset *settings; 1143 const struct ide_proc_devset * (*proc_devsets)(ide_drive_t *);
1128#endif 1144#endif
1129}; 1145};
1130 1146
diff --git a/include/linux/init.h b/include/linux/init.h
index ad63824460e3..0c1264668be0 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -40,7 +40,7 @@
40 40
41/* These are for everybody (although not all archs will actually 41/* These are for everybody (although not all archs will actually
42 discard it in modules) */ 42 discard it in modules) */
43#define __init __section(.init.text) __cold 43#define __init __section(.init.text) __cold notrace
44#define __initdata __section(.init.data) 44#define __initdata __section(.init.data)
45#define __initconst __section(.init.rodata) 45#define __initconst __section(.init.rodata)
46#define __exitdata __section(.exit.data) 46#define __exitdata __section(.exit.data)
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 35a61dc60d51..f58a0cf8929a 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -8,6 +8,7 @@
8#include <linux/preempt.h> 8#include <linux/preempt.h>
9#include <linux/cpumask.h> 9#include <linux/cpumask.h>
10#include <linux/irqreturn.h> 10#include <linux/irqreturn.h>
11#include <linux/irqnr.h>
11#include <linux/hardirq.h> 12#include <linux/hardirq.h>
12#include <linux/sched.h> 13#include <linux/sched.h>
13#include <linux/irqflags.h> 14#include <linux/irqflags.h>
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 8d9411bc60f6..d058c57be02d 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -18,6 +18,7 @@
18#include <linux/spinlock.h> 18#include <linux/spinlock.h>
19#include <linux/cpumask.h> 19#include <linux/cpumask.h>
20#include <linux/irqreturn.h> 20#include <linux/irqreturn.h>
21#include <linux/irqnr.h>
21#include <linux/errno.h> 22#include <linux/errno.h>
22 23
23#include <asm/irq.h> 24#include <asm/irq.h>
@@ -152,6 +153,7 @@ struct irq_chip {
152 * @name: flow handler name for /proc/interrupts output 153 * @name: flow handler name for /proc/interrupts output
153 */ 154 */
154struct irq_desc { 155struct irq_desc {
156 unsigned int irq;
155 irq_flow_handler_t handle_irq; 157 irq_flow_handler_t handle_irq;
156 struct irq_chip *chip; 158 struct irq_chip *chip;
157 struct msi_desc *msi_desc; 159 struct msi_desc *msi_desc;
@@ -170,7 +172,7 @@ struct irq_desc {
170 cpumask_t affinity; 172 cpumask_t affinity;
171 unsigned int cpu; 173 unsigned int cpu;
172#endif 174#endif
173#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE) 175#ifdef CONFIG_GENERIC_PENDING_IRQ
174 cpumask_t pending_mask; 176 cpumask_t pending_mask;
175#endif 177#endif
176#ifdef CONFIG_PROC_FS 178#ifdef CONFIG_PROC_FS
@@ -179,8 +181,14 @@ struct irq_desc {
179 const char *name; 181 const char *name;
180} ____cacheline_internodealigned_in_smp; 182} ____cacheline_internodealigned_in_smp;
181 183
184
182extern struct irq_desc irq_desc[NR_IRQS]; 185extern struct irq_desc irq_desc[NR_IRQS];
183 186
187static inline struct irq_desc *irq_to_desc(unsigned int irq)
188{
189 return (irq < nr_irqs) ? irq_desc + irq : NULL;
190}
191
184/* 192/*
185 * Migration helpers for obsolete names, they will go away: 193 * Migration helpers for obsolete names, they will go away:
186 */ 194 */
@@ -198,19 +206,15 @@ extern int setup_irq(unsigned int irq, struct irqaction *new);
198 206
199#ifdef CONFIG_GENERIC_HARDIRQS 207#ifdef CONFIG_GENERIC_HARDIRQS
200 208
201#ifndef handle_dynamic_tick
202# define handle_dynamic_tick(a) do { } while (0)
203#endif
204
205#ifdef CONFIG_SMP 209#ifdef CONFIG_SMP
206 210
207#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE) 211#ifdef CONFIG_GENERIC_PENDING_IRQ
208 212
209void set_pending_irq(unsigned int irq, cpumask_t mask); 213void set_pending_irq(unsigned int irq, cpumask_t mask);
210void move_native_irq(int irq); 214void move_native_irq(int irq);
211void move_masked_irq(int irq); 215void move_masked_irq(int irq);
212 216
213#else /* CONFIG_GENERIC_PENDING_IRQ || CONFIG_IRQBALANCE */ 217#else /* CONFIG_GENERIC_PENDING_IRQ */
214 218
215static inline void move_irq(int irq) 219static inline void move_irq(int irq)
216{ 220{
@@ -237,19 +241,14 @@ static inline void set_pending_irq(unsigned int irq, cpumask_t mask)
237 241
238#endif /* CONFIG_SMP */ 242#endif /* CONFIG_SMP */
239 243
240#ifdef CONFIG_IRQBALANCE
241extern void set_balance_irq_affinity(unsigned int irq, cpumask_t mask);
242#else
243static inline void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
244{
245}
246#endif
247
248extern int no_irq_affinity; 244extern int no_irq_affinity;
249 245
250static inline int irq_balancing_disabled(unsigned int irq) 246static inline int irq_balancing_disabled(unsigned int irq)
251{ 247{
252 return irq_desc[irq].status & IRQ_NO_BALANCING_MASK; 248 struct irq_desc *desc;
249
250 desc = irq_to_desc(irq);
251 return desc->status & IRQ_NO_BALANCING_MASK;
253} 252}
254 253
255/* Handle irq action chains: */ 254/* Handle irq action chains: */
@@ -279,10 +278,8 @@ extern unsigned int __do_IRQ(unsigned int irq);
279 * irqchip-style controller then we call the ->handle_irq() handler, 278 * irqchip-style controller then we call the ->handle_irq() handler,
280 * and it calls __do_IRQ() if it's attached to an irqtype-style controller. 279 * and it calls __do_IRQ() if it's attached to an irqtype-style controller.
281 */ 280 */
282static inline void generic_handle_irq(unsigned int irq) 281static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc)
283{ 282{
284 struct irq_desc *desc = irq_desc + irq;
285
286#ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ 283#ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
287 desc->handle_irq(irq, desc); 284 desc->handle_irq(irq, desc);
288#else 285#else
@@ -293,6 +290,11 @@ static inline void generic_handle_irq(unsigned int irq)
293#endif 290#endif
294} 291}
295 292
293static inline void generic_handle_irq(unsigned int irq)
294{
295 generic_handle_irq_desc(irq, irq_to_desc(irq));
296}
297
296/* Handling of unhandled and spurious interrupts: */ 298/* Handling of unhandled and spurious interrupts: */
297extern void note_interrupt(unsigned int irq, struct irq_desc *desc, 299extern void note_interrupt(unsigned int irq, struct irq_desc *desc,
298 int action_ret); 300 int action_ret);
@@ -325,7 +327,10 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
325static inline void __set_irq_handler_unlocked(int irq, 327static inline void __set_irq_handler_unlocked(int irq,
326 irq_flow_handler_t handler) 328 irq_flow_handler_t handler)
327{ 329{
328 irq_desc[irq].handle_irq = handler; 330 struct irq_desc *desc;
331
332 desc = irq_to_desc(irq);
333 desc->handle_irq = handler;
329} 334}
330 335
331/* 336/*
@@ -353,13 +358,14 @@ extern void set_irq_noprobe(unsigned int irq);
353extern void set_irq_probe(unsigned int irq); 358extern void set_irq_probe(unsigned int irq);
354 359
355/* Handle dynamic irq creation and destruction */ 360/* Handle dynamic irq creation and destruction */
361extern unsigned int create_irq_nr(unsigned int irq_want);
356extern int create_irq(void); 362extern int create_irq(void);
357extern void destroy_irq(unsigned int irq); 363extern void destroy_irq(unsigned int irq);
358 364
359/* Test to see if a driver has successfully requested an irq */ 365/* Test to see if a driver has successfully requested an irq */
360static inline int irq_has_action(unsigned int irq) 366static inline int irq_has_action(unsigned int irq)
361{ 367{
362 struct irq_desc *desc = irq_desc + irq; 368 struct irq_desc *desc = irq_to_desc(irq);
363 return desc->action != NULL; 369 return desc->action != NULL;
364} 370}
365 371
@@ -374,10 +380,10 @@ extern int set_irq_chip_data(unsigned int irq, void *data);
374extern int set_irq_type(unsigned int irq, unsigned int type); 380extern int set_irq_type(unsigned int irq, unsigned int type);
375extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); 381extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
376 382
377#define get_irq_chip(irq) (irq_desc[irq].chip) 383#define get_irq_chip(irq) (irq_to_desc(irq)->chip)
378#define get_irq_chip_data(irq) (irq_desc[irq].chip_data) 384#define get_irq_chip_data(irq) (irq_to_desc(irq)->chip_data)
379#define get_irq_data(irq) (irq_desc[irq].handler_data) 385#define get_irq_data(irq) (irq_to_desc(irq)->handler_data)
380#define get_irq_msi(irq) (irq_desc[irq].msi_desc) 386#define get_irq_msi(irq) (irq_to_desc(irq)->msi_desc)
381 387
382#endif /* CONFIG_GENERIC_HARDIRQS */ 388#endif /* CONFIG_GENERIC_HARDIRQS */
383 389
diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
new file mode 100644
index 000000000000..3171ddc3b39d
--- /dev/null
+++ b/include/linux/irqnr.h
@@ -0,0 +1,24 @@
1#ifndef _LINUX_IRQNR_H
2#define _LINUX_IRQNR_H
3
4#ifndef CONFIG_GENERIC_HARDIRQS
5#include <asm/irq.h>
6# define nr_irqs NR_IRQS
7
8# define for_each_irq_desc(irq, desc) \
9 for (irq = 0; irq < nr_irqs; irq++)
10#else
11extern int nr_irqs;
12
13# define for_each_irq_desc(irq, desc) \
14 for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++)
15
16# define for_each_irq_desc_reverse(irq, desc) \
17 for (irq = nr_irqs -1, desc = irq_desc + (nr_irqs -1 ); \
18 irq > 0; irq--, desc--)
19#endif
20
21#define for_each_irq_nr(irq) \
22 for (irq = 0; irq < nr_irqs; irq++)
23
24#endif
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 5a566b705ca9..94d17ff64c5a 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -496,4 +496,9 @@ struct sysinfo {
496#define NUMA_BUILD 0 496#define NUMA_BUILD 0
497#endif 497#endif
498 498
499/* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */
500#ifdef CONFIG_FTRACE_MCOUNT_RECORD
501# define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD
502#endif
503
499#endif 504#endif
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index cf9f40a91c9c..4a145caeee07 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -39,19 +39,34 @@ DECLARE_PER_CPU(struct kernel_stat, kstat);
39 39
40extern unsigned long long nr_context_switches(void); 40extern unsigned long long nr_context_switches(void);
41 41
42struct irq_desc;
43
44static inline void kstat_incr_irqs_this_cpu(unsigned int irq,
45 struct irq_desc *desc)
46{
47 kstat_this_cpu.irqs[irq]++;
48}
49
50static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
51{
52 return kstat_cpu(cpu).irqs[irq];
53}
54
42/* 55/*
43 * Number of interrupts per specific IRQ source, since bootup 56 * Number of interrupts per specific IRQ source, since bootup
44 */ 57 */
45static inline int kstat_irqs(int irq) 58static inline unsigned int kstat_irqs(unsigned int irq)
46{ 59{
47 int cpu, sum = 0; 60 unsigned int sum = 0;
61 int cpu;
48 62
49 for_each_possible_cpu(cpu) 63 for_each_possible_cpu(cpu)
50 sum += kstat_cpu(cpu).irqs[irq]; 64 sum += kstat_irqs_cpu(irq, cpu);
51 65
52 return sum; 66 return sum;
53} 67}
54 68
69extern unsigned long long task_delta_exec(struct task_struct *);
55extern void account_user_time(struct task_struct *, cputime_t); 70extern void account_user_time(struct task_struct *, cputime_t);
56extern void account_user_time_scaled(struct task_struct *, cputime_t); 71extern void account_user_time_scaled(struct task_struct *, cputime_t);
57extern void account_system_time(struct task_struct *, int, cputime_t); 72extern void account_system_time(struct task_struct *, int, cputime_t);
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 0be7795655fa..497b1d1f7a05 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -29,6 +29,7 @@
29 * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi 29 * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
30 * <prasanna@in.ibm.com> added function-return probes. 30 * <prasanna@in.ibm.com> added function-return probes.
31 */ 31 */
32#include <linux/linkage.h>
32#include <linux/list.h> 33#include <linux/list.h>
33#include <linux/notifier.h> 34#include <linux/notifier.h>
34#include <linux/smp.h> 35#include <linux/smp.h>
@@ -47,7 +48,7 @@
47#define KPROBE_HIT_SSDONE 0x00000008 48#define KPROBE_HIT_SSDONE 0x00000008
48 49
49/* Attach to insert probes on any functions which should be ignored*/ 50/* Attach to insert probes on any functions which should be ignored*/
50#define __kprobes __attribute__((__section__(".kprobes.text"))) 51#define __kprobes __attribute__((__section__(".kprobes.text"))) notrace
51 52
52struct kprobe; 53struct kprobe;
53struct pt_regs; 54struct pt_regs;
@@ -256,7 +257,7 @@ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head);
256 257
257#else /* CONFIG_KPROBES */ 258#else /* CONFIG_KPROBES */
258 259
259#define __kprobes /**/ 260#define __kprobes notrace
260struct jprobe; 261struct jprobe;
261struct kretprobe; 262struct kretprobe;
262 263
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 56ba37394656..9fd1f859021b 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -4,8 +4,6 @@
4#include <linux/compiler.h> 4#include <linux/compiler.h>
5#include <asm/linkage.h> 5#include <asm/linkage.h>
6 6
7#define notrace __attribute__((no_instrument_function))
8
9#ifdef __cplusplus 7#ifdef __cplusplus
10#define CPP_ASMLINKAGE extern "C" 8#define CPP_ASMLINKAGE extern "C"
11#else 9#else
diff --git a/include/linux/marker.h b/include/linux/marker.h
index 1290653f9241..889196c7fbb1 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -160,4 +160,11 @@ extern int marker_probe_unregister_private_data(marker_probe_func *probe,
160extern void *marker_get_private_data(const char *name, marker_probe_func *probe, 160extern void *marker_get_private_data(const char *name, marker_probe_func *probe,
161 int num); 161 int num);
162 162
163/*
164 * marker_synchronize_unregister must be called between the last marker probe
165 * unregistration and the end of module exit to make sure there is no caller
166 * executing a probe when it is freed.
167 */
168#define marker_synchronize_unregister() synchronize_sched()
169
163#endif 170#endif
diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h
index 61d19e1b7a0b..139d7c88d9c9 100644
--- a/include/linux/mmiotrace.h
+++ b/include/linux/mmiotrace.h
@@ -34,11 +34,15 @@ extern void unregister_kmmio_probe(struct kmmio_probe *p);
34/* Called from page fault handler. */ 34/* Called from page fault handler. */
35extern int kmmio_handler(struct pt_regs *regs, unsigned long addr); 35extern int kmmio_handler(struct pt_regs *regs, unsigned long addr);
36 36
37/* Called from ioremap.c */
38#ifdef CONFIG_MMIOTRACE 37#ifdef CONFIG_MMIOTRACE
38/* Called from ioremap.c */
39extern void mmiotrace_ioremap(resource_size_t offset, unsigned long size, 39extern void mmiotrace_ioremap(resource_size_t offset, unsigned long size,
40 void __iomem *addr); 40 void __iomem *addr);
41extern void mmiotrace_iounmap(volatile void __iomem *addr); 41extern void mmiotrace_iounmap(volatile void __iomem *addr);
42
43/* For anyone to insert markers. Remember trailing newline. */
44extern int mmiotrace_printk(const char *fmt, ...)
45 __attribute__ ((format (printf, 1, 2)));
42#else 46#else
43static inline void mmiotrace_ioremap(resource_size_t offset, 47static inline void mmiotrace_ioremap(resource_size_t offset,
44 unsigned long size, void __iomem *addr) 48 unsigned long size, void __iomem *addr)
@@ -48,15 +52,22 @@ static inline void mmiotrace_ioremap(resource_size_t offset,
48static inline void mmiotrace_iounmap(volatile void __iomem *addr) 52static inline void mmiotrace_iounmap(volatile void __iomem *addr)
49{ 53{
50} 54}
51#endif /* CONFIG_MMIOTRACE_HOOKS */ 55
56static inline int mmiotrace_printk(const char *fmt, ...)
57 __attribute__ ((format (printf, 1, 0)));
58
59static inline int mmiotrace_printk(const char *fmt, ...)
60{
61 return 0;
62}
63#endif /* CONFIG_MMIOTRACE */
52 64
53enum mm_io_opcode { 65enum mm_io_opcode {
54 MMIO_READ = 0x1, /* struct mmiotrace_rw */ 66 MMIO_READ = 0x1, /* struct mmiotrace_rw */
55 MMIO_WRITE = 0x2, /* struct mmiotrace_rw */ 67 MMIO_WRITE = 0x2, /* struct mmiotrace_rw */
56 MMIO_PROBE = 0x3, /* struct mmiotrace_map */ 68 MMIO_PROBE = 0x3, /* struct mmiotrace_map */
57 MMIO_UNPROBE = 0x4, /* struct mmiotrace_map */ 69 MMIO_UNPROBE = 0x4, /* struct mmiotrace_map */
58 MMIO_MARKER = 0x5, /* raw char data */ 70 MMIO_UNKNOWN_OP = 0x5, /* struct mmiotrace_rw */
59 MMIO_UNKNOWN_OP = 0x6, /* struct mmiotrace_rw */
60}; 71};
61 72
62struct mmiotrace_rw { 73struct mmiotrace_rw {
@@ -81,5 +92,6 @@ extern void enable_mmiotrace(void);
81extern void disable_mmiotrace(void); 92extern void disable_mmiotrace(void);
82extern void mmio_trace_rw(struct mmiotrace_rw *rw); 93extern void mmio_trace_rw(struct mmiotrace_rw *rw);
83extern void mmio_trace_mapping(struct mmiotrace_map *map); 94extern void mmio_trace_mapping(struct mmiotrace_map *map);
95extern int mmio_trace_printk(const char *fmt, va_list args);
84 96
85#endif /* MMIOTRACE_H */ 97#endif /* MMIOTRACE_H */
diff --git a/include/linux/module.h b/include/linux/module.h
index a41555cbe00a..5d2970cdce93 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -16,6 +16,7 @@
16#include <linux/kobject.h> 16#include <linux/kobject.h>
17#include <linux/moduleparam.h> 17#include <linux/moduleparam.h>
18#include <linux/marker.h> 18#include <linux/marker.h>
19#include <linux/tracepoint.h>
19#include <asm/local.h> 20#include <asm/local.h>
20 21
21#include <asm/module.h> 22#include <asm/module.h>
@@ -331,6 +332,10 @@ struct module
331 struct marker *markers; 332 struct marker *markers;
332 unsigned int num_markers; 333 unsigned int num_markers;
333#endif 334#endif
335#ifdef CONFIG_TRACEPOINTS
336 struct tracepoint *tracepoints;
337 unsigned int num_tracepoints;
338#endif
334 339
335#ifdef CONFIG_MODULE_UNLOAD 340#ifdef CONFIG_MODULE_UNLOAD
336 /* What modules depend on me? */ 341 /* What modules depend on me? */
@@ -453,6 +458,9 @@ extern void print_modules(void);
453 458
454extern void module_update_markers(void); 459extern void module_update_markers(void);
455 460
461extern void module_update_tracepoints(void);
462extern int module_get_iter_tracepoints(struct tracepoint_iter *iter);
463
456#else /* !CONFIG_MODULES... */ 464#else /* !CONFIG_MODULES... */
457#define EXPORT_SYMBOL(sym) 465#define EXPORT_SYMBOL(sym)
458#define EXPORT_SYMBOL_GPL(sym) 466#define EXPORT_SYMBOL_GPL(sym)
@@ -557,6 +565,15 @@ static inline void module_update_markers(void)
557{ 565{
558} 566}
559 567
568static inline void module_update_tracepoints(void)
569{
570}
571
572static inline int module_get_iter_tracepoints(struct tracepoint_iter *iter)
573{
574 return 0;
575}
576
560#endif /* CONFIG_MODULES */ 577#endif /* CONFIG_MODULES */
561 578
562struct device_driver; 579struct device_driver;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index acf8f24037cd..085187be29c7 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -214,6 +214,7 @@ struct pci_dev {
214 unsigned int broken_parity_status:1; /* Device generates false positive parity */ 214 unsigned int broken_parity_status:1; /* Device generates false positive parity */
215 unsigned int msi_enabled:1; 215 unsigned int msi_enabled:1;
216 unsigned int msix_enabled:1; 216 unsigned int msix_enabled:1;
217 unsigned int ari_enabled:1; /* ARI forwarding */
217 unsigned int is_managed:1; 218 unsigned int is_managed:1;
218 unsigned int is_pcie:1; 219 unsigned int is_pcie:1;
219 pci_dev_flags_t dev_flags; 220 pci_dev_flags_t dev_flags;
@@ -347,7 +348,6 @@ struct pci_bus_region {
347struct pci_dynids { 348struct pci_dynids {
348 spinlock_t lock; /* protects list, index */ 349 spinlock_t lock; /* protects list, index */
349 struct list_head list; /* for IDs added at runtime */ 350 struct list_head list; /* for IDs added at runtime */
350 unsigned int use_driver_data:1; /* pci_device_id->driver_data is used */
351}; 351};
352 352
353/* ---------------------------------------------------------------- */ 353/* ---------------------------------------------------------------- */
@@ -456,8 +456,8 @@ struct pci_driver {
456 456
457/** 457/**
458 * PCI_VDEVICE - macro used to describe a specific pci device in short form 458 * PCI_VDEVICE - macro used to describe a specific pci device in short form
459 * @vend: the vendor name 459 * @vendor: the vendor name
460 * @dev: the 16 bit PCI Device ID 460 * @device: the 16 bit PCI Device ID
461 * 461 *
462 * This macro is used to create a struct pci_device_id that matches a 462 * This macro is used to create a struct pci_device_id that matches a
463 * specific PCI device. The subvendor, and subdevice fields will be set 463 * specific PCI device. The subvendor, and subdevice fields will be set
@@ -645,6 +645,7 @@ pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state);
645bool pci_pme_capable(struct pci_dev *dev, pci_power_t state); 645bool pci_pme_capable(struct pci_dev *dev, pci_power_t state);
646void pci_pme_active(struct pci_dev *dev, bool enable); 646void pci_pme_active(struct pci_dev *dev, bool enable);
647int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable); 647int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable);
648int pci_wake_from_d3(struct pci_dev *dev, bool enable);
648pci_power_t pci_target_state(struct pci_dev *dev); 649pci_power_t pci_target_state(struct pci_dev *dev);
649int pci_prepare_to_sleep(struct pci_dev *dev); 650int pci_prepare_to_sleep(struct pci_dev *dev);
650int pci_back_from_sleep(struct pci_dev *dev); 651int pci_back_from_sleep(struct pci_dev *dev);
@@ -725,7 +726,7 @@ enum pci_dma_burst_strategy {
725}; 726};
726 727
727struct msix_entry { 728struct msix_entry {
728 u16 vector; /* kernel uses to write allocated vector */ 729 u32 vector; /* kernel uses to write allocated vector */
729 u16 entry; /* driver uses to specify entry, OS writes */ 730 u16 entry; /* driver uses to specify entry, OS writes */
730}; 731};
731 732
@@ -1118,5 +1119,20 @@ static inline void pci_mmcfg_early_init(void) { }
1118static inline void pci_mmcfg_late_init(void) { } 1119static inline void pci_mmcfg_late_init(void) { }
1119#endif 1120#endif
1120 1121
1122#ifdef CONFIG_HAS_IOMEM
1123static inline void * pci_ioremap_bar(struct pci_dev *pdev, int bar)
1124{
1125 /*
1126 * Make sure the BAR is actually a memory resource, not an IO resource
1127 */
1128 if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) {
1129 WARN_ON(1);
1130 return NULL;
1131 }
1132 return ioremap_nocache(pci_resource_start(pdev, bar),
1133 pci_resource_len(pdev, bar));
1134}
1135#endif
1136
1121#endif /* __KERNEL__ */ 1137#endif /* __KERNEL__ */
1122#endif /* LINUX_PCI_H */ 1138#endif /* LINUX_PCI_H */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 8edddc240e4f..e5d344bfcb7e 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2454,9 +2454,9 @@
2454#define PCI_DEVICE_ID_INTEL_ICH10_3 0x3a1a 2454#define PCI_DEVICE_ID_INTEL_ICH10_3 0x3a1a
2455#define PCI_DEVICE_ID_INTEL_ICH10_4 0x3a30 2455#define PCI_DEVICE_ID_INTEL_ICH10_4 0x3a30
2456#define PCI_DEVICE_ID_INTEL_ICH10_5 0x3a60 2456#define PCI_DEVICE_ID_INTEL_ICH10_5 0x3a60
2457#define PCI_DEVICE_ID_INTEL_PCH_0 0x3b10 2457#define PCI_DEVICE_ID_INTEL_PCH_LPC_MIN 0x3b00
2458#define PCI_DEVICE_ID_INTEL_PCH_1 0x3b11 2458#define PCI_DEVICE_ID_INTEL_PCH_LPC_MAX 0x3b1f
2459#define PCI_DEVICE_ID_INTEL_PCH_2 0x3b30 2459#define PCI_DEVICE_ID_INTEL_PCH_SMBUS 0x3b30
2460#define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f 2460#define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f
2461#define PCI_DEVICE_ID_INTEL_5100_16 0x65f0 2461#define PCI_DEVICE_ID_INTEL_5100_16 0x65f0
2462#define PCI_DEVICE_ID_INTEL_5100_21 0x65f5 2462#define PCI_DEVICE_ID_INTEL_5100_21 0x65f5
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index 450684f7eaac..eb6686b88f9a 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -419,6 +419,10 @@
419#define PCI_EXP_RTCTL_CRSSVE 0x10 /* CRS Software Visibility Enable */ 419#define PCI_EXP_RTCTL_CRSSVE 0x10 /* CRS Software Visibility Enable */
420#define PCI_EXP_RTCAP 30 /* Root Capabilities */ 420#define PCI_EXP_RTCAP 30 /* Root Capabilities */
421#define PCI_EXP_RTSTA 32 /* Root Status */ 421#define PCI_EXP_RTSTA 32 /* Root Status */
422#define PCI_EXP_DEVCAP2 36 /* Device Capabilities 2 */
423#define PCI_EXP_DEVCAP2_ARI 0x20 /* Alternative Routing-ID */
424#define PCI_EXP_DEVCTL2 40 /* Device Control 2 */
425#define PCI_EXP_DEVCTL2_ARI 0x20 /* Alternative Routing-ID */
422 426
423/* Extended Capabilities (PCI-X 2.0 and Express) */ 427/* Extended Capabilities (PCI-X 2.0 and Express) */
424#define PCI_EXT_CAP_ID(header) (header & 0x0000ffff) 428#define PCI_EXT_CAP_ID(header) (header & 0x0000ffff)
@@ -429,6 +433,7 @@
429#define PCI_EXT_CAP_ID_VC 2 433#define PCI_EXT_CAP_ID_VC 2
430#define PCI_EXT_CAP_ID_DSN 3 434#define PCI_EXT_CAP_ID_DSN 3
431#define PCI_EXT_CAP_ID_PWR 4 435#define PCI_EXT_CAP_ID_PWR 4
436#define PCI_EXT_CAP_ID_ARI 14
432 437
433/* Advanced Error Reporting */ 438/* Advanced Error Reporting */
434#define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ 439#define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */
@@ -536,5 +541,14 @@
536#define HT_CAPTYPE_GEN3 0xD0 /* Generation 3 hypertransport configuration */ 541#define HT_CAPTYPE_GEN3 0xD0 /* Generation 3 hypertransport configuration */
537#define HT_CAPTYPE_PM 0xE0 /* Hypertransport powermanagement configuration */ 542#define HT_CAPTYPE_PM 0xE0 /* Hypertransport powermanagement configuration */
538 543
544/* Alternative Routing-ID Interpretation */
545#define PCI_ARI_CAP 0x04 /* ARI Capability Register */
546#define PCI_ARI_CAP_MFVC 0x0001 /* MFVC Function Groups Capability */
547#define PCI_ARI_CAP_ACS 0x0002 /* ACS Function Groups Capability */
548#define PCI_ARI_CAP_NFN(x) (((x) >> 8) & 0xff) /* Next Function Number */
549#define PCI_ARI_CTRL 0x06 /* ARI Control Register */
550#define PCI_ARI_CTRL_MFVC 0x0001 /* MFVC Function Groups Enable */
551#define PCI_ARI_CTRL_ACS 0x0002 /* ACS Function Groups Enable */
552#define PCI_ARI_CTRL_FG(x) (((x) >> 4) & 7) /* Function Group */
539 553
540#endif /* LINUX_PCI_REGS_H */ 554#endif /* LINUX_PCI_REGS_H */
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index a7dd38f30ade..a7c721355549 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -45,8 +45,6 @@ struct k_itimer {
45 int it_requeue_pending; /* waiting to requeue this timer */ 45 int it_requeue_pending; /* waiting to requeue this timer */
46#define REQUEUE_PENDING 1 46#define REQUEUE_PENDING 1
47 int it_sigev_notify; /* notify word of sigevent struct */ 47 int it_sigev_notify; /* notify word of sigevent struct */
48 int it_sigev_signo; /* signo word of sigevent struct */
49 sigval_t it_sigev_value; /* value word of sigevent struct */
50 struct task_struct *it_process; /* process to send signal to */ 48 struct task_struct *it_process; /* process to send signal to */
51 struct sigqueue *sigq; /* signal queue entry. */ 49 struct sigqueue *sigq; /* signal queue entry. */
52 union { 50 union {
@@ -115,4 +113,6 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
115 113
116long clock_nanosleep_restart(struct restart_block *restart_block); 114long clock_nanosleep_restart(struct restart_block *restart_block);
117 115
116void update_rlimit_cpu(unsigned long rlim_new);
117
118#endif 118#endif
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
new file mode 100644
index 000000000000..536b0ca46a03
--- /dev/null
+++ b/include/linux/ring_buffer.h
@@ -0,0 +1,127 @@
1#ifndef _LINUX_RING_BUFFER_H
2#define _LINUX_RING_BUFFER_H
3
4#include <linux/mm.h>
5#include <linux/seq_file.h>
6
7struct ring_buffer;
8struct ring_buffer_iter;
9
10/*
11 * Don't reference this struct directly, use functions below.
12 */
13struct ring_buffer_event {
14 u32 type:2, len:3, time_delta:27;
15 u32 array[];
16};
17
18/**
19 * enum ring_buffer_type - internal ring buffer types
20 *
21 * @RINGBUF_TYPE_PADDING: Left over page padding
22 * array is ignored
23 * size is variable depending on how much
24 * padding is needed
25 *
26 * @RINGBUF_TYPE_TIME_EXTEND: Extend the time delta
27 * array[0] = time delta (28 .. 59)
28 * size = 8 bytes
29 *
30 * @RINGBUF_TYPE_TIME_STAMP: Sync time stamp with external clock
31 * array[0] = tv_nsec
32 * array[1] = tv_sec
33 * size = 16 bytes
34 *
35 * @RINGBUF_TYPE_DATA: Data record
36 * If len is zero:
37 * array[0] holds the actual length
38 * array[1..(length+3)/4-1] holds data
39 * else
40 * length = len << 2
41 * array[0..(length+3)/4] holds data
42 */
43enum ring_buffer_type {
44 RINGBUF_TYPE_PADDING,
45 RINGBUF_TYPE_TIME_EXTEND,
46 /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */
47 RINGBUF_TYPE_TIME_STAMP,
48 RINGBUF_TYPE_DATA,
49};
50
51unsigned ring_buffer_event_length(struct ring_buffer_event *event);
52void *ring_buffer_event_data(struct ring_buffer_event *event);
53
54/**
55 * ring_buffer_event_time_delta - return the delta timestamp of the event
56 * @event: the event to get the delta timestamp of
57 *
58 * The delta timestamp is the 27 bit timestamp since the last event.
59 */
60static inline unsigned
61ring_buffer_event_time_delta(struct ring_buffer_event *event)
62{
63 return event->time_delta;
64}
65
66/*
67 * size is in bytes for each per CPU buffer.
68 */
69struct ring_buffer *
70ring_buffer_alloc(unsigned long size, unsigned flags);
71void ring_buffer_free(struct ring_buffer *buffer);
72
73int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size);
74
75struct ring_buffer_event *
76ring_buffer_lock_reserve(struct ring_buffer *buffer,
77 unsigned long length,
78 unsigned long *flags);
79int ring_buffer_unlock_commit(struct ring_buffer *buffer,
80 struct ring_buffer_event *event,
81 unsigned long flags);
82int ring_buffer_write(struct ring_buffer *buffer,
83 unsigned long length, void *data);
84
85struct ring_buffer_event *
86ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts);
87struct ring_buffer_event *
88ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts);
89
90struct ring_buffer_iter *
91ring_buffer_read_start(struct ring_buffer *buffer, int cpu);
92void ring_buffer_read_finish(struct ring_buffer_iter *iter);
93
94struct ring_buffer_event *
95ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts);
96struct ring_buffer_event *
97ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts);
98void ring_buffer_iter_reset(struct ring_buffer_iter *iter);
99int ring_buffer_iter_empty(struct ring_buffer_iter *iter);
100
101unsigned long ring_buffer_size(struct ring_buffer *buffer);
102
103void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu);
104void ring_buffer_reset(struct ring_buffer *buffer);
105
106int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
107 struct ring_buffer *buffer_b, int cpu);
108
109int ring_buffer_empty(struct ring_buffer *buffer);
110int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu);
111
112void ring_buffer_record_disable(struct ring_buffer *buffer);
113void ring_buffer_record_enable(struct ring_buffer *buffer);
114void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu);
115void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu);
116
117unsigned long ring_buffer_entries(struct ring_buffer *buffer);
118unsigned long ring_buffer_overruns(struct ring_buffer *buffer);
119
120u64 ring_buffer_time_stamp(int cpu);
121void ring_buffer_normalize_time_stamp(int cpu, u64 *ts);
122
123enum ring_buffer_flags {
124 RB_FL_OVERWRITE = 1 << 0,
125};
126
127#endif /* _LINUX_RING_BUFFER_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f52dbd3587a7..5c38db536e07 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -434,6 +434,39 @@ struct pacct_struct {
434 unsigned long ac_minflt, ac_majflt; 434 unsigned long ac_minflt, ac_majflt;
435}; 435};
436 436
437/**
438 * struct task_cputime - collected CPU time counts
439 * @utime: time spent in user mode, in &cputime_t units
440 * @stime: time spent in kernel mode, in &cputime_t units
441 * @sum_exec_runtime: total time spent on the CPU, in nanoseconds
442 *
443 * This structure groups together three kinds of CPU time that are
444 * tracked for threads and thread groups. Most things considering
445 * CPU time want to group these counts together and treat all three
446 * of them in parallel.
447 */
448struct task_cputime {
449 cputime_t utime;
450 cputime_t stime;
451 unsigned long long sum_exec_runtime;
452};
453/* Alternate field names when used to cache expirations. */
454#define prof_exp stime
455#define virt_exp utime
456#define sched_exp sum_exec_runtime
457
458/**
459 * struct thread_group_cputime - thread group interval timer counts
460 * @totals: thread group interval timers; substructure for
461 * uniprocessor kernel, per-cpu for SMP kernel.
462 *
463 * This structure contains the version of task_cputime, above, that is
464 * used for thread group CPU clock calculations.
465 */
466struct thread_group_cputime {
467 struct task_cputime *totals;
468};
469
437/* 470/*
438 * NOTE! "signal_struct" does not have it's own 471 * NOTE! "signal_struct" does not have it's own
439 * locking, because a shared signal_struct always 472 * locking, because a shared signal_struct always
@@ -479,6 +512,17 @@ struct signal_struct {
479 cputime_t it_prof_expires, it_virt_expires; 512 cputime_t it_prof_expires, it_virt_expires;
480 cputime_t it_prof_incr, it_virt_incr; 513 cputime_t it_prof_incr, it_virt_incr;
481 514
515 /*
516 * Thread group totals for process CPU clocks.
517 * See thread_group_cputime(), et al, for details.
518 */
519 struct thread_group_cputime cputime;
520
521 /* Earliest-expiration cache. */
522 struct task_cputime cputime_expires;
523
524 struct list_head cpu_timers[3];
525
482 /* job control IDs */ 526 /* job control IDs */
483 527
484 /* 528 /*
@@ -509,7 +553,7 @@ struct signal_struct {
509 * Live threads maintain their own counters and add to these 553 * Live threads maintain their own counters and add to these
510 * in __exit_signal, except for the group leader. 554 * in __exit_signal, except for the group leader.
511 */ 555 */
512 cputime_t utime, stime, cutime, cstime; 556 cputime_t cutime, cstime;
513 cputime_t gtime; 557 cputime_t gtime;
514 cputime_t cgtime; 558 cputime_t cgtime;
515 unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; 559 unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
@@ -518,14 +562,6 @@ struct signal_struct {
518 struct task_io_accounting ioac; 562 struct task_io_accounting ioac;
519 563
520 /* 564 /*
521 * Cumulative ns of scheduled CPU time for dead threads in the
522 * group, not including a zombie group leader. (This only differs
523 * from jiffies_to_ns(utime + stime) if sched_clock uses something
524 * other than jiffies.)
525 */
526 unsigned long long sum_sched_runtime;
527
528 /*
529 * We don't bother to synchronize most readers of this at all, 565 * We don't bother to synchronize most readers of this at all,
530 * because there is no reader checking a limit that actually needs 566 * because there is no reader checking a limit that actually needs
531 * to get both rlim_cur and rlim_max atomically, and either one 567 * to get both rlim_cur and rlim_max atomically, and either one
@@ -536,8 +572,6 @@ struct signal_struct {
536 */ 572 */
537 struct rlimit rlim[RLIM_NLIMITS]; 573 struct rlimit rlim[RLIM_NLIMITS];
538 574
539 struct list_head cpu_timers[3];
540
541 /* keep the process-shared keyrings here so that they do the right 575 /* keep the process-shared keyrings here so that they do the right
542 * thing in threads created with CLONE_THREAD */ 576 * thing in threads created with CLONE_THREAD */
543#ifdef CONFIG_KEYS 577#ifdef CONFIG_KEYS
@@ -1146,8 +1180,7 @@ struct task_struct {
1146/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ 1180/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
1147 unsigned long min_flt, maj_flt; 1181 unsigned long min_flt, maj_flt;
1148 1182
1149 cputime_t it_prof_expires, it_virt_expires; 1183 struct task_cputime cputime_expires;
1150 unsigned long long it_sched_expires;
1151 struct list_head cpu_timers[3]; 1184 struct list_head cpu_timers[3];
1152 1185
1153/* process credentials */ 1186/* process credentials */
@@ -1597,6 +1630,7 @@ extern unsigned long long cpu_clock(int cpu);
1597 1630
1598extern unsigned long long 1631extern unsigned long long
1599task_sched_runtime(struct task_struct *task); 1632task_sched_runtime(struct task_struct *task);
1633extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
1600 1634
1601/* sched_exec is called by processes performing an exec */ 1635/* sched_exec is called by processes performing an exec */
1602#ifdef CONFIG_SMP 1636#ifdef CONFIG_SMP
@@ -2094,6 +2128,30 @@ static inline int spin_needbreak(spinlock_t *lock)
2094} 2128}
2095 2129
2096/* 2130/*
2131 * Thread group CPU time accounting.
2132 */
2133
2134extern int thread_group_cputime_alloc(struct task_struct *);
2135extern void thread_group_cputime(struct task_struct *, struct task_cputime *);
2136
2137static inline void thread_group_cputime_init(struct signal_struct *sig)
2138{
2139 sig->cputime.totals = NULL;
2140}
2141
2142static inline int thread_group_cputime_clone_thread(struct task_struct *curr)
2143{
2144 if (curr->signal->cputime.totals)
2145 return 0;
2146 return thread_group_cputime_alloc(curr);
2147}
2148
2149static inline void thread_group_cputime_free(struct signal_struct *sig)
2150{
2151 free_percpu(sig->cputime.totals);
2152}
2153
2154/*
2097 * Reevaluate whether the task has signals pending delivery. 2155 * Reevaluate whether the task has signals pending delivery.
2098 * Wake the task if so. 2156 * Wake the task if so.
2099 * This is required every time the blocked sigset_t changes. 2157 * This is required every time the blocked sigset_t changes.
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 98921a3e1aa8..b6ec8189ac0c 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -96,9 +96,11 @@ extern cpumask_t *tick_get_broadcast_oneshot_mask(void);
96extern void tick_clock_notify(void); 96extern void tick_clock_notify(void);
97extern int tick_check_oneshot_change(int allow_nohz); 97extern int tick_check_oneshot_change(int allow_nohz);
98extern struct tick_sched *tick_get_tick_sched(int cpu); 98extern struct tick_sched *tick_get_tick_sched(int cpu);
99extern void tick_check_idle(int cpu);
99# else 100# else
100static inline void tick_clock_notify(void) { } 101static inline void tick_clock_notify(void) { }
101static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } 102static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
103static inline void tick_check_idle(int cpu) { }
102# endif 104# endif
103 105
104#else /* CONFIG_GENERIC_CLOCKEVENTS */ 106#else /* CONFIG_GENERIC_CLOCKEVENTS */
@@ -106,26 +108,23 @@ static inline void tick_init(void) { }
106static inline void tick_cancel_sched_timer(int cpu) { } 108static inline void tick_cancel_sched_timer(int cpu) { }
107static inline void tick_clock_notify(void) { } 109static inline void tick_clock_notify(void) { }
108static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } 110static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
111static inline void tick_check_idle(int cpu) { }
109#endif /* !CONFIG_GENERIC_CLOCKEVENTS */ 112#endif /* !CONFIG_GENERIC_CLOCKEVENTS */
110 113
111# ifdef CONFIG_NO_HZ 114# ifdef CONFIG_NO_HZ
112extern void tick_nohz_stop_sched_tick(int inidle); 115extern void tick_nohz_stop_sched_tick(int inidle);
113extern void tick_nohz_restart_sched_tick(void); 116extern void tick_nohz_restart_sched_tick(void);
114extern void tick_nohz_update_jiffies(void);
115extern ktime_t tick_nohz_get_sleep_length(void); 117extern ktime_t tick_nohz_get_sleep_length(void);
116extern void tick_nohz_stop_idle(int cpu);
117extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); 118extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
118# else 119# else
119static inline void tick_nohz_stop_sched_tick(int inidle) { } 120static inline void tick_nohz_stop_sched_tick(int inidle) { }
120static inline void tick_nohz_restart_sched_tick(void) { } 121static inline void tick_nohz_restart_sched_tick(void) { }
121static inline void tick_nohz_update_jiffies(void) { }
122static inline ktime_t tick_nohz_get_sleep_length(void) 122static inline ktime_t tick_nohz_get_sleep_length(void)
123{ 123{
124 ktime_t len = { .tv64 = NSEC_PER_SEC/HZ }; 124 ktime_t len = { .tv64 = NSEC_PER_SEC/HZ };
125 125
126 return len; 126 return len;
127} 127}
128static inline void tick_nohz_stop_idle(int cpu) { }
129static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } 128static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
130# endif /* !NO_HZ */ 129# endif /* !NO_HZ */
131 130
diff --git a/include/linux/time.h b/include/linux/time.h
index 51e883df0fa5..4f1c9db57707 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -119,6 +119,7 @@ extern int do_setitimer(int which, struct itimerval *value,
119extern unsigned int alarm_setitimer(unsigned int seconds); 119extern unsigned int alarm_setitimer(unsigned int seconds);
120extern int do_getitimer(int which, struct itimerval *value); 120extern int do_getitimer(int which, struct itimerval *value);
121extern void getnstimeofday(struct timespec *tv); 121extern void getnstimeofday(struct timespec *tv);
122extern void getrawmonotonic(struct timespec *ts);
122extern void getboottime(struct timespec *ts); 123extern void getboottime(struct timespec *ts);
123extern void monotonic_to_bootbased(struct timespec *ts); 124extern void monotonic_to_bootbased(struct timespec *ts);
124 125
@@ -127,6 +128,9 @@ extern int timekeeping_valid_for_hres(void);
127extern void update_wall_time(void); 128extern void update_wall_time(void);
128extern void update_xtime_cache(u64 nsec); 129extern void update_xtime_cache(u64 nsec);
129 130
131struct tms;
132extern void do_sys_times(struct tms *);
133
130/** 134/**
131 * timespec_to_ns - Convert timespec to nanoseconds 135 * timespec_to_ns - Convert timespec to nanoseconds
132 * @ts: pointer to the timespec variable to be converted 136 * @ts: pointer to the timespec variable to be converted
@@ -216,6 +220,7 @@ struct itimerval {
216#define CLOCK_MONOTONIC 1 220#define CLOCK_MONOTONIC 1
217#define CLOCK_PROCESS_CPUTIME_ID 2 221#define CLOCK_PROCESS_CPUTIME_ID 2
218#define CLOCK_THREAD_CPUTIME_ID 3 222#define CLOCK_THREAD_CPUTIME_ID 3
223#define CLOCK_MONOTONIC_RAW 4
219 224
220/* 225/*
221 * The IDs of various hardware clocks: 226 * The IDs of various hardware clocks:
diff --git a/include/linux/timex.h b/include/linux/timex.h
index fc6035d29d56..9007313b5b71 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -82,7 +82,7 @@
82 */ 82 */
83#define SHIFT_USEC 16 /* frequency offset scale (shift) */ 83#define SHIFT_USEC 16 /* frequency offset scale (shift) */
84#define PPM_SCALE (NSEC_PER_USEC << (NTP_SCALE_SHIFT - SHIFT_USEC)) 84#define PPM_SCALE (NSEC_PER_USEC << (NTP_SCALE_SHIFT - SHIFT_USEC))
85#define PPM_SCALE_INV_SHIFT 20 85#define PPM_SCALE_INV_SHIFT 19
86#define PPM_SCALE_INV ((1ll << (PPM_SCALE_INV_SHIFT + NTP_SCALE_SHIFT)) / \ 86#define PPM_SCALE_INV ((1ll << (PPM_SCALE_INV_SHIFT + NTP_SCALE_SHIFT)) / \
87 PPM_SCALE + 1) 87 PPM_SCALE + 1)
88 88
@@ -141,8 +141,15 @@ struct timex {
141#define ADJ_MICRO 0x1000 /* select microsecond resolution */ 141#define ADJ_MICRO 0x1000 /* select microsecond resolution */
142#define ADJ_NANO 0x2000 /* select nanosecond resolution */ 142#define ADJ_NANO 0x2000 /* select nanosecond resolution */
143#define ADJ_TICK 0x4000 /* tick value */ 143#define ADJ_TICK 0x4000 /* tick value */
144
145#ifdef __KERNEL__
146#define ADJ_ADJTIME 0x8000 /* switch between adjtime/adjtimex modes */
147#define ADJ_OFFSET_SINGLESHOT 0x0001 /* old-fashioned adjtime */
148#define ADJ_OFFSET_READONLY 0x2000 /* read-only adjtime */
149#else
144#define ADJ_OFFSET_SINGLESHOT 0x8001 /* old-fashioned adjtime */ 150#define ADJ_OFFSET_SINGLESHOT 0x8001 /* old-fashioned adjtime */
145#define ADJ_OFFSET_SS_READ 0xa001 /* read-only adjtime */ 151#define ADJ_OFFSET_SS_READ 0xa001 /* read-only adjtime */
152#endif
146 153
147/* xntp 3.4 compatibility names */ 154/* xntp 3.4 compatibility names */
148#define MOD_OFFSET ADJ_OFFSET 155#define MOD_OFFSET ADJ_OFFSET
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
new file mode 100644
index 000000000000..c5bb39c7a770
--- /dev/null
+++ b/include/linux/tracepoint.h
@@ -0,0 +1,137 @@
1#ifndef _LINUX_TRACEPOINT_H
2#define _LINUX_TRACEPOINT_H
3
4/*
5 * Kernel Tracepoint API.
6 *
7 * See Documentation/tracepoint.txt.
8 *
9 * (C) Copyright 2008 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
10 *
11 * Heavily inspired from the Linux Kernel Markers.
12 *
13 * This file is released under the GPLv2.
14 * See the file COPYING for more details.
15 */
16
17#include <linux/types.h>
18#include <linux/rcupdate.h>
19
20struct module;
21struct tracepoint;
22
23struct tracepoint {
24 const char *name; /* Tracepoint name */
25 int state; /* State. */
26 void **funcs;
27} __attribute__((aligned(8)));
28
29
30#define TPPROTO(args...) args
31#define TPARGS(args...) args
32
33#ifdef CONFIG_TRACEPOINTS
34
35/*
36 * it_func[0] is never NULL because there is at least one element in the array
37 * when the array itself is non NULL.
38 */
39#define __DO_TRACE(tp, proto, args) \
40 do { \
41 void **it_func; \
42 \
43 rcu_read_lock_sched(); \
44 it_func = rcu_dereference((tp)->funcs); \
45 if (it_func) { \
46 do { \
47 ((void(*)(proto))(*it_func))(args); \
48 } while (*(++it_func)); \
49 } \
50 rcu_read_unlock_sched(); \
51 } while (0)
52
53/*
54 * Make sure the alignment of the structure in the __tracepoints section will
55 * not add unwanted padding between the beginning of the section and the
56 * structure. Force alignment to the same alignment as the section start.
57 */
58#define DEFINE_TRACE(name, proto, args) \
59 static inline void trace_##name(proto) \
60 { \
61 static const char __tpstrtab_##name[] \
62 __attribute__((section("__tracepoints_strings"))) \
63 = #name ":" #proto; \
64 static struct tracepoint __tracepoint_##name \
65 __attribute__((section("__tracepoints"), aligned(8))) = \
66 { __tpstrtab_##name, 0, NULL }; \
67 if (unlikely(__tracepoint_##name.state)) \
68 __DO_TRACE(&__tracepoint_##name, \
69 TPPROTO(proto), TPARGS(args)); \
70 } \
71 static inline int register_trace_##name(void (*probe)(proto)) \
72 { \
73 return tracepoint_probe_register(#name ":" #proto, \
74 (void *)probe); \
75 } \
76 static inline void unregister_trace_##name(void (*probe)(proto))\
77 { \
78 tracepoint_probe_unregister(#name ":" #proto, \
79 (void *)probe); \
80 }
81
82extern void tracepoint_update_probe_range(struct tracepoint *begin,
83 struct tracepoint *end);
84
85#else /* !CONFIG_TRACEPOINTS */
86#define DEFINE_TRACE(name, proto, args) \
87 static inline void _do_trace_##name(struct tracepoint *tp, proto) \
88 { } \
89 static inline void trace_##name(proto) \
90 { } \
91 static inline int register_trace_##name(void (*probe)(proto)) \
92 { \
93 return -ENOSYS; \
94 } \
95 static inline void unregister_trace_##name(void (*probe)(proto))\
96 { }
97
98static inline void tracepoint_update_probe_range(struct tracepoint *begin,
99 struct tracepoint *end)
100{ }
101#endif /* CONFIG_TRACEPOINTS */
102
103/*
104 * Connect a probe to a tracepoint.
105 * Internal API, should not be used directly.
106 */
107extern int tracepoint_probe_register(const char *name, void *probe);
108
109/*
110 * Disconnect a probe from a tracepoint.
111 * Internal API, should not be used directly.
112 */
113extern int tracepoint_probe_unregister(const char *name, void *probe);
114
115struct tracepoint_iter {
116 struct module *module;
117 struct tracepoint *tracepoint;
118};
119
120extern void tracepoint_iter_start(struct tracepoint_iter *iter);
121extern void tracepoint_iter_next(struct tracepoint_iter *iter);
122extern void tracepoint_iter_stop(struct tracepoint_iter *iter);
123extern void tracepoint_iter_reset(struct tracepoint_iter *iter);
124extern int tracepoint_get_iter_range(struct tracepoint **tracepoint,
125 struct tracepoint *begin, struct tracepoint *end);
126
127/*
128 * tracepoint_synchronize_unregister must be called between the last tracepoint
129 * probe unregistration and the end of module exit to make sure there is no
130 * caller executing a probe when it is freed.
131 */
132static inline void tracepoint_synchronize_unregister(void)
133{
134 synchronize_sched();
135}
136
137#endif
diff --git a/include/trace/sched.h b/include/trace/sched.h
new file mode 100644
index 000000000000..ad47369d01b5
--- /dev/null
+++ b/include/trace/sched.h
@@ -0,0 +1,56 @@
1#ifndef _TRACE_SCHED_H
2#define _TRACE_SCHED_H
3
4#include <linux/sched.h>
5#include <linux/tracepoint.h>
6
7DEFINE_TRACE(sched_kthread_stop,
8 TPPROTO(struct task_struct *t),
9 TPARGS(t));
10
11DEFINE_TRACE(sched_kthread_stop_ret,
12 TPPROTO(int ret),
13 TPARGS(ret));
14
15DEFINE_TRACE(sched_wait_task,
16 TPPROTO(struct rq *rq, struct task_struct *p),
17 TPARGS(rq, p));
18
19DEFINE_TRACE(sched_wakeup,
20 TPPROTO(struct rq *rq, struct task_struct *p),
21 TPARGS(rq, p));
22
23DEFINE_TRACE(sched_wakeup_new,
24 TPPROTO(struct rq *rq, struct task_struct *p),
25 TPARGS(rq, p));
26
27DEFINE_TRACE(sched_switch,
28 TPPROTO(struct rq *rq, struct task_struct *prev,
29 struct task_struct *next),
30 TPARGS(rq, prev, next));
31
32DEFINE_TRACE(sched_migrate_task,
33 TPPROTO(struct rq *rq, struct task_struct *p, int dest_cpu),
34 TPARGS(rq, p, dest_cpu));
35
36DEFINE_TRACE(sched_process_free,
37 TPPROTO(struct task_struct *p),
38 TPARGS(p));
39
40DEFINE_TRACE(sched_process_exit,
41 TPPROTO(struct task_struct *p),
42 TPARGS(p));
43
44DEFINE_TRACE(sched_process_wait,
45 TPPROTO(struct pid *pid),
46 TPARGS(pid));
47
48DEFINE_TRACE(sched_process_fork,
49 TPPROTO(struct task_struct *parent, struct task_struct *child),
50 TPARGS(parent, child));
51
52DEFINE_TRACE(sched_signal_send,
53 TPPROTO(int sig, struct task_struct *p),
54 TPARGS(sig, p));
55
56#endif
diff --git a/init/Kconfig b/init/Kconfig
index 8828ed0b2051..113c74c07da4 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -737,6 +737,14 @@ config VM_EVENT_COUNTERS
737 on EMBEDDED systems. /proc/vmstat will only show page counts 737 on EMBEDDED systems. /proc/vmstat will only show page counts
738 if VM event counters are disabled. 738 if VM event counters are disabled.
739 739
740config PCI_QUIRKS
741 default y
742 bool "Enable PCI quirk workarounds" if EMBEDDED && PCI
743 help
744 This enables workarounds for various PCI chipset
745 bugs/quirks. Disable this only if your target machine is
746 unaffected by PCI quirks.
747
740config SLUB_DEBUG 748config SLUB_DEBUG
741 default y 749 default y
742 bool "Enable SLUB debugging support" if EMBEDDED 750 bool "Enable SLUB debugging support" if EMBEDDED
@@ -786,6 +794,13 @@ config PROFILING
786 Say Y here to enable the extended profiling support mechanisms used 794 Say Y here to enable the extended profiling support mechanisms used
787 by profilers such as OProfile. 795 by profilers such as OProfile.
788 796
797#
798# Place an empty function call at each tracepoint site. Can be
799# dynamically changed for a probe function.
800#
801config TRACEPOINTS
802 bool
803
789config MARKERS 804config MARKERS
790 bool "Activate markers" 805 bool "Activate markers"
791 help 806 help
diff --git a/init/main.c b/init/main.c
index 4371d11721f6..3e17a3bafe60 100644
--- a/init/main.c
+++ b/init/main.c
@@ -61,6 +61,7 @@
61#include <linux/sched.h> 61#include <linux/sched.h>
62#include <linux/signal.h> 62#include <linux/signal.h>
63#include <linux/idr.h> 63#include <linux/idr.h>
64#include <linux/ftrace.h>
64 65
65#include <asm/io.h> 66#include <asm/io.h>
66#include <asm/bugs.h> 67#include <asm/bugs.h>
@@ -689,6 +690,8 @@ asmlinkage void __init start_kernel(void)
689 690
690 acpi_early_init(); /* before LAPIC and SMP init */ 691 acpi_early_init(); /* before LAPIC and SMP init */
691 692
693 ftrace_init();
694
692 /* Do the rest non-__init'ed, we're now alive */ 695 /* Do the rest non-__init'ed, we're now alive */
693 rest_init(); 696 rest_init();
694} 697}
@@ -705,30 +708,31 @@ __setup("initcall_debug", initcall_debug_setup);
705int do_one_initcall(initcall_t fn) 708int do_one_initcall(initcall_t fn)
706{ 709{
707 int count = preempt_count(); 710 int count = preempt_count();
708 ktime_t t0, t1, delta; 711 ktime_t delta;
709 char msgbuf[64]; 712 char msgbuf[64];
710 int result; 713 struct boot_trace it;
711 714
712 if (initcall_debug) { 715 if (initcall_debug) {
713 printk("calling %pF @ %i\n", fn, task_pid_nr(current)); 716 it.caller = task_pid_nr(current);
714 t0 = ktime_get(); 717 printk("calling %pF @ %i\n", fn, it.caller);
718 it.calltime = ktime_get();
715 } 719 }
716 720
717 result = fn(); 721 it.result = fn();
718 722
719 if (initcall_debug) { 723 if (initcall_debug) {
720 t1 = ktime_get(); 724 it.rettime = ktime_get();
721 delta = ktime_sub(t1, t0); 725 delta = ktime_sub(it.rettime, it.calltime);
722 726 it.duration = (unsigned long long) delta.tv64 >> 10;
723 printk("initcall %pF returned %d after %Ld msecs\n", 727 printk("initcall %pF returned %d after %Ld usecs\n", fn,
724 fn, result, 728 it.result, it.duration);
725 (unsigned long long) delta.tv64 >> 20); 729 trace_boot(&it, fn);
726 } 730 }
727 731
728 msgbuf[0] = 0; 732 msgbuf[0] = 0;
729 733
730 if (result && result != -ENODEV && initcall_debug) 734 if (it.result && it.result != -ENODEV && initcall_debug)
731 sprintf(msgbuf, "error code %d ", result); 735 sprintf(msgbuf, "error code %d ", it.result);
732 736
733 if (preempt_count() != count) { 737 if (preempt_count() != count) {
734 strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf)); 738 strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf));
@@ -742,7 +746,7 @@ int do_one_initcall(initcall_t fn)
742 printk("initcall %pF returned with %s\n", fn, msgbuf); 746 printk("initcall %pF returned with %s\n", fn, msgbuf);
743 } 747 }
744 748
745 return result; 749 return it.result;
746} 750}
747 751
748 752
@@ -857,6 +861,7 @@ static int __init kernel_init(void * unused)
857 smp_prepare_cpus(setup_max_cpus); 861 smp_prepare_cpus(setup_max_cpus);
858 862
859 do_pre_smp_initcalls(); 863 do_pre_smp_initcalls();
864 start_boot_trace();
860 865
861 smp_init(); 866 smp_init();
862 sched_init_smp(); 867 sched_init_smp();
@@ -883,6 +888,7 @@ static int __init kernel_init(void * unused)
883 * we're essentially up and running. Get rid of the 888 * we're essentially up and running. Get rid of the
884 * initmem segments and start the user-mode stuff.. 889 * initmem segments and start the user-mode stuff..
885 */ 890 */
891 stop_boot_trace();
886 init_post(); 892 init_post();
887 return 0; 893 return 0;
888} 894}
diff --git a/kernel/Makefile b/kernel/Makefile
index 066550aa61c5..305f11dbef21 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -85,6 +85,7 @@ obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
85obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o 85obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
86obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o 86obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
87obj-$(CONFIG_MARKERS) += marker.o 87obj-$(CONFIG_MARKERS) += marker.o
88obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
88obj-$(CONFIG_LATENCYTOP) += latencytop.o 89obj-$(CONFIG_LATENCYTOP) += latencytop.o
89obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o 90obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
90obj-$(CONFIG_FTRACE) += trace/ 91obj-$(CONFIG_FTRACE) += trace/
diff --git a/kernel/compat.c b/kernel/compat.c
index 143990e48cb9..8eafe3eb50d9 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -23,6 +23,7 @@
23#include <linux/timex.h> 23#include <linux/timex.h>
24#include <linux/migrate.h> 24#include <linux/migrate.h>
25#include <linux/posix-timers.h> 25#include <linux/posix-timers.h>
26#include <linux/times.h>
26 27
27#include <asm/uaccess.h> 28#include <asm/uaccess.h>
28 29
@@ -208,49 +209,23 @@ asmlinkage long compat_sys_setitimer(int which,
208 return 0; 209 return 0;
209} 210}
210 211
212static compat_clock_t clock_t_to_compat_clock_t(clock_t x)
213{
214 return compat_jiffies_to_clock_t(clock_t_to_jiffies(x));
215}
216
211asmlinkage long compat_sys_times(struct compat_tms __user *tbuf) 217asmlinkage long compat_sys_times(struct compat_tms __user *tbuf)
212{ 218{
213 /*
214 * In the SMP world we might just be unlucky and have one of
215 * the times increment as we use it. Since the value is an
216 * atomically safe type this is just fine. Conceptually its
217 * as if the syscall took an instant longer to occur.
218 */
219 if (tbuf) { 219 if (tbuf) {
220 struct tms tms;
220 struct compat_tms tmp; 221 struct compat_tms tmp;
221 struct task_struct *tsk = current; 222
222 struct task_struct *t; 223 do_sys_times(&tms);
223 cputime_t utime, stime, cutime, cstime; 224 /* Convert our struct tms to the compat version. */
224 225 tmp.tms_utime = clock_t_to_compat_clock_t(tms.tms_utime);
225 read_lock(&tasklist_lock); 226 tmp.tms_stime = clock_t_to_compat_clock_t(tms.tms_stime);
226 utime = tsk->signal->utime; 227 tmp.tms_cutime = clock_t_to_compat_clock_t(tms.tms_cutime);
227 stime = tsk->signal->stime; 228 tmp.tms_cstime = clock_t_to_compat_clock_t(tms.tms_cstime);
228 t = tsk;
229 do {
230 utime = cputime_add(utime, t->utime);
231 stime = cputime_add(stime, t->stime);
232 t = next_thread(t);
233 } while (t != tsk);
234
235 /*
236 * While we have tasklist_lock read-locked, no dying thread
237 * can be updating current->signal->[us]time. Instead,
238 * we got their counts included in the live thread loop.
239 * However, another thread can come in right now and
240 * do a wait call that updates current->signal->c[us]time.
241 * To make sure we always see that pair updated atomically,
242 * we take the siglock around fetching them.
243 */
244 spin_lock_irq(&tsk->sighand->siglock);
245 cutime = tsk->signal->cutime;
246 cstime = tsk->signal->cstime;
247 spin_unlock_irq(&tsk->sighand->siglock);
248 read_unlock(&tasklist_lock);
249
250 tmp.tms_utime = compat_jiffies_to_clock_t(cputime_to_jiffies(utime));
251 tmp.tms_stime = compat_jiffies_to_clock_t(cputime_to_jiffies(stime));
252 tmp.tms_cutime = compat_jiffies_to_clock_t(cputime_to_jiffies(cutime));
253 tmp.tms_cstime = compat_jiffies_to_clock_t(cputime_to_jiffies(cstime));
254 if (copy_to_user(tbuf, &tmp, sizeof(tmp))) 229 if (copy_to_user(tbuf, &tmp, sizeof(tmp)))
255 return -EFAULT; 230 return -EFAULT;
256 } 231 }
diff --git a/kernel/exit.c b/kernel/exit.c
index 0ef4673e351b..80137a5d9467 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -47,6 +47,7 @@
47#include <linux/blkdev.h> 47#include <linux/blkdev.h>
48#include <linux/task_io_accounting_ops.h> 48#include <linux/task_io_accounting_ops.h>
49#include <linux/tracehook.h> 49#include <linux/tracehook.h>
50#include <trace/sched.h>
50 51
51#include <asm/uaccess.h> 52#include <asm/uaccess.h>
52#include <asm/unistd.h> 53#include <asm/unistd.h>
@@ -112,8 +113,6 @@ static void __exit_signal(struct task_struct *tsk)
112 * We won't ever get here for the group leader, since it 113 * We won't ever get here for the group leader, since it
113 * will have been the last reference on the signal_struct. 114 * will have been the last reference on the signal_struct.
114 */ 115 */
115 sig->utime = cputime_add(sig->utime, task_utime(tsk));
116 sig->stime = cputime_add(sig->stime, task_stime(tsk));
117 sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); 116 sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
118 sig->min_flt += tsk->min_flt; 117 sig->min_flt += tsk->min_flt;
119 sig->maj_flt += tsk->maj_flt; 118 sig->maj_flt += tsk->maj_flt;
@@ -122,7 +121,6 @@ static void __exit_signal(struct task_struct *tsk)
122 sig->inblock += task_io_get_inblock(tsk); 121 sig->inblock += task_io_get_inblock(tsk);
123 sig->oublock += task_io_get_oublock(tsk); 122 sig->oublock += task_io_get_oublock(tsk);
124 task_io_accounting_add(&sig->ioac, &tsk->ioac); 123 task_io_accounting_add(&sig->ioac, &tsk->ioac);
125 sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
126 sig = NULL; /* Marker for below. */ 124 sig = NULL; /* Marker for below. */
127 } 125 }
128 126
@@ -149,7 +147,10 @@ static void __exit_signal(struct task_struct *tsk)
149 147
150static void delayed_put_task_struct(struct rcu_head *rhp) 148static void delayed_put_task_struct(struct rcu_head *rhp)
151{ 149{
152 put_task_struct(container_of(rhp, struct task_struct, rcu)); 150 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
151
152 trace_sched_process_free(tsk);
153 put_task_struct(tsk);
153} 154}
154 155
155 156
@@ -1073,6 +1074,8 @@ NORET_TYPE void do_exit(long code)
1073 1074
1074 if (group_dead) 1075 if (group_dead)
1075 acct_process(); 1076 acct_process();
1077 trace_sched_process_exit(tsk);
1078
1076 exit_sem(tsk); 1079 exit_sem(tsk);
1077 exit_files(tsk); 1080 exit_files(tsk);
1078 exit_fs(tsk); 1081 exit_fs(tsk);
@@ -1301,6 +1304,7 @@ static int wait_task_zombie(struct task_struct *p, int options,
1301 if (likely(!traced)) { 1304 if (likely(!traced)) {
1302 struct signal_struct *psig; 1305 struct signal_struct *psig;
1303 struct signal_struct *sig; 1306 struct signal_struct *sig;
1307 struct task_cputime cputime;
1304 1308
1305 /* 1309 /*
1306 * The resource counters for the group leader are in its 1310 * The resource counters for the group leader are in its
@@ -1316,20 +1320,23 @@ static int wait_task_zombie(struct task_struct *p, int options,
1316 * need to protect the access to p->parent->signal fields, 1320 * need to protect the access to p->parent->signal fields,
1317 * as other threads in the parent group can be right 1321 * as other threads in the parent group can be right
1318 * here reaping other children at the same time. 1322 * here reaping other children at the same time.
1323 *
1324 * We use thread_group_cputime() to get times for the thread
1325 * group, which consolidates times for all threads in the
1326 * group including the group leader.
1319 */ 1327 */
1320 spin_lock_irq(&p->parent->sighand->siglock); 1328 spin_lock_irq(&p->parent->sighand->siglock);
1321 psig = p->parent->signal; 1329 psig = p->parent->signal;
1322 sig = p->signal; 1330 sig = p->signal;
1331 thread_group_cputime(p, &cputime);
1323 psig->cutime = 1332 psig->cutime =
1324 cputime_add(psig->cutime, 1333 cputime_add(psig->cutime,
1325 cputime_add(p->utime, 1334 cputime_add(cputime.utime,
1326 cputime_add(sig->utime, 1335 sig->cutime));
1327 sig->cutime)));
1328 psig->cstime = 1336 psig->cstime =
1329 cputime_add(psig->cstime, 1337 cputime_add(psig->cstime,
1330 cputime_add(p->stime, 1338 cputime_add(cputime.stime,
1331 cputime_add(sig->stime, 1339 sig->cstime));
1332 sig->cstime)));
1333 psig->cgtime = 1340 psig->cgtime =
1334 cputime_add(psig->cgtime, 1341 cputime_add(psig->cgtime,
1335 cputime_add(p->gtime, 1342 cputime_add(p->gtime,
@@ -1674,6 +1681,8 @@ static long do_wait(enum pid_type type, struct pid *pid, int options,
1674 struct task_struct *tsk; 1681 struct task_struct *tsk;
1675 int retval; 1682 int retval;
1676 1683
1684 trace_sched_process_wait(pid);
1685
1677 add_wait_queue(&current->signal->wait_chldexit,&wait); 1686 add_wait_queue(&current->signal->wait_chldexit,&wait);
1678repeat: 1687repeat:
1679 /* 1688 /*
diff --git a/kernel/fork.c b/kernel/fork.c
index 30de644a40c4..4d093552dd6e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -58,6 +58,7 @@
58#include <linux/tty.h> 58#include <linux/tty.h>
59#include <linux/proc_fs.h> 59#include <linux/proc_fs.h>
60#include <linux/blkdev.h> 60#include <linux/blkdev.h>
61#include <trace/sched.h>
61 62
62#include <asm/pgtable.h> 63#include <asm/pgtable.h>
63#include <asm/pgalloc.h> 64#include <asm/pgalloc.h>
@@ -759,15 +760,44 @@ void __cleanup_sighand(struct sighand_struct *sighand)
759 kmem_cache_free(sighand_cachep, sighand); 760 kmem_cache_free(sighand_cachep, sighand);
760} 761}
761 762
763
764/*
765 * Initialize POSIX timer handling for a thread group.
766 */
767static void posix_cpu_timers_init_group(struct signal_struct *sig)
768{
769 /* Thread group counters. */
770 thread_group_cputime_init(sig);
771
772 /* Expiration times and increments. */
773 sig->it_virt_expires = cputime_zero;
774 sig->it_virt_incr = cputime_zero;
775 sig->it_prof_expires = cputime_zero;
776 sig->it_prof_incr = cputime_zero;
777
778 /* Cached expiration times. */
779 sig->cputime_expires.prof_exp = cputime_zero;
780 sig->cputime_expires.virt_exp = cputime_zero;
781 sig->cputime_expires.sched_exp = 0;
782
783 /* The timer lists. */
784 INIT_LIST_HEAD(&sig->cpu_timers[0]);
785 INIT_LIST_HEAD(&sig->cpu_timers[1]);
786 INIT_LIST_HEAD(&sig->cpu_timers[2]);
787}
788
762static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) 789static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
763{ 790{
764 struct signal_struct *sig; 791 struct signal_struct *sig;
765 int ret; 792 int ret;
766 793
767 if (clone_flags & CLONE_THREAD) { 794 if (clone_flags & CLONE_THREAD) {
768 atomic_inc(&current->signal->count); 795 ret = thread_group_cputime_clone_thread(current);
769 atomic_inc(&current->signal->live); 796 if (likely(!ret)) {
770 return 0; 797 atomic_inc(&current->signal->count);
798 atomic_inc(&current->signal->live);
799 }
800 return ret;
771 } 801 }
772 sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); 802 sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
773 tsk->signal = sig; 803 tsk->signal = sig;
@@ -795,40 +825,25 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
795 sig->it_real_incr.tv64 = 0; 825 sig->it_real_incr.tv64 = 0;
796 sig->real_timer.function = it_real_fn; 826 sig->real_timer.function = it_real_fn;
797 827
798 sig->it_virt_expires = cputime_zero;
799 sig->it_virt_incr = cputime_zero;
800 sig->it_prof_expires = cputime_zero;
801 sig->it_prof_incr = cputime_zero;
802
803 sig->leader = 0; /* session leadership doesn't inherit */ 828 sig->leader = 0; /* session leadership doesn't inherit */
804 sig->tty_old_pgrp = NULL; 829 sig->tty_old_pgrp = NULL;
805 sig->tty = NULL; 830 sig->tty = NULL;
806 831
807 sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; 832 sig->cutime = sig->cstime = cputime_zero;
808 sig->gtime = cputime_zero; 833 sig->gtime = cputime_zero;
809 sig->cgtime = cputime_zero; 834 sig->cgtime = cputime_zero;
810 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; 835 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
811 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; 836 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
812 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; 837 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
813 task_io_accounting_init(&sig->ioac); 838 task_io_accounting_init(&sig->ioac);
814 sig->sum_sched_runtime = 0;
815 INIT_LIST_HEAD(&sig->cpu_timers[0]);
816 INIT_LIST_HEAD(&sig->cpu_timers[1]);
817 INIT_LIST_HEAD(&sig->cpu_timers[2]);
818 taskstats_tgid_init(sig); 839 taskstats_tgid_init(sig);
819 840
820 task_lock(current->group_leader); 841 task_lock(current->group_leader);
821 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); 842 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
822 task_unlock(current->group_leader); 843 task_unlock(current->group_leader);
823 844
824 if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { 845 posix_cpu_timers_init_group(sig);
825 /* 846
826 * New sole thread in the process gets an expiry time
827 * of the whole CPU time limit.
828 */
829 tsk->it_prof_expires =
830 secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
831 }
832 acct_init_pacct(&sig->pacct); 847 acct_init_pacct(&sig->pacct);
833 848
834 tty_audit_fork(sig); 849 tty_audit_fork(sig);
@@ -838,6 +853,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
838 853
839void __cleanup_signal(struct signal_struct *sig) 854void __cleanup_signal(struct signal_struct *sig)
840{ 855{
856 thread_group_cputime_free(sig);
841 exit_thread_group_keys(sig); 857 exit_thread_group_keys(sig);
842 tty_kref_put(sig->tty); 858 tty_kref_put(sig->tty);
843 kmem_cache_free(signal_cachep, sig); 859 kmem_cache_free(signal_cachep, sig);
@@ -888,6 +904,19 @@ void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
888#endif /* CONFIG_MM_OWNER */ 904#endif /* CONFIG_MM_OWNER */
889 905
890/* 906/*
907 * Initialize POSIX timer handling for a single task.
908 */
909static void posix_cpu_timers_init(struct task_struct *tsk)
910{
911 tsk->cputime_expires.prof_exp = cputime_zero;
912 tsk->cputime_expires.virt_exp = cputime_zero;
913 tsk->cputime_expires.sched_exp = 0;
914 INIT_LIST_HEAD(&tsk->cpu_timers[0]);
915 INIT_LIST_HEAD(&tsk->cpu_timers[1]);
916 INIT_LIST_HEAD(&tsk->cpu_timers[2]);
917}
918
919/*
891 * This creates a new process as a copy of the old one, 920 * This creates a new process as a copy of the old one,
892 * but does not actually start it yet. 921 * but does not actually start it yet.
893 * 922 *
@@ -997,12 +1026,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
997 task_io_accounting_init(&p->ioac); 1026 task_io_accounting_init(&p->ioac);
998 acct_clear_integrals(p); 1027 acct_clear_integrals(p);
999 1028
1000 p->it_virt_expires = cputime_zero; 1029 posix_cpu_timers_init(p);
1001 p->it_prof_expires = cputime_zero;
1002 p->it_sched_expires = 0;
1003 INIT_LIST_HEAD(&p->cpu_timers[0]);
1004 INIT_LIST_HEAD(&p->cpu_timers[1]);
1005 INIT_LIST_HEAD(&p->cpu_timers[2]);
1006 1030
1007 p->lock_depth = -1; /* -1 = no lock */ 1031 p->lock_depth = -1; /* -1 = no lock */
1008 do_posix_clock_monotonic_gettime(&p->start_time); 1032 do_posix_clock_monotonic_gettime(&p->start_time);
@@ -1203,21 +1227,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1203 if (clone_flags & CLONE_THREAD) { 1227 if (clone_flags & CLONE_THREAD) {
1204 p->group_leader = current->group_leader; 1228 p->group_leader = current->group_leader;
1205 list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); 1229 list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
1206
1207 if (!cputime_eq(current->signal->it_virt_expires,
1208 cputime_zero) ||
1209 !cputime_eq(current->signal->it_prof_expires,
1210 cputime_zero) ||
1211 current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY ||
1212 !list_empty(&current->signal->cpu_timers[0]) ||
1213 !list_empty(&current->signal->cpu_timers[1]) ||
1214 !list_empty(&current->signal->cpu_timers[2])) {
1215 /*
1216 * Have child wake up on its first tick to check
1217 * for process CPU timers.
1218 */
1219 p->it_prof_expires = jiffies_to_cputime(1);
1220 }
1221 } 1230 }
1222 1231
1223 if (likely(p->pid)) { 1232 if (likely(p->pid)) {
@@ -1364,6 +1373,8 @@ long do_fork(unsigned long clone_flags,
1364 if (!IS_ERR(p)) { 1373 if (!IS_ERR(p)) {
1365 struct completion vfork; 1374 struct completion vfork;
1366 1375
1376 trace_sched_process_fork(current, p);
1377
1367 nr = task_pid_vnr(p); 1378 nr = task_pid_vnr(p);
1368 1379
1369 if (clone_flags & CLONE_PARENT_SETTID) 1380 if (clone_flags & CLONE_PARENT_SETTID)
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index cdec83e722fa..95978f48e039 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1403,9 +1403,7 @@ void hrtimer_run_queues(void)
1403 if (!base->first) 1403 if (!base->first)
1404 continue; 1404 continue;
1405 1405
1406 if (base->get_softirq_time) 1406 if (gettime) {
1407 base->softirq_time = base->get_softirq_time();
1408 else if (gettime) {
1409 hrtimer_get_softirq_time(cpu_base); 1407 hrtimer_get_softirq_time(cpu_base);
1410 gettime = 0; 1408 gettime = 0;
1411 } 1409 }
@@ -1688,9 +1686,11 @@ static void migrate_hrtimers(int cpu)
1688 new_base = &get_cpu_var(hrtimer_bases); 1686 new_base = &get_cpu_var(hrtimer_bases);
1689 1687
1690 tick_cancel_sched_timer(cpu); 1688 tick_cancel_sched_timer(cpu);
1691 1689 /*
1692 local_irq_disable(); 1690 * The caller is globally serialized and nobody else
1693 spin_lock(&new_base->lock); 1691 * takes two locks at once, deadlock is not possible.
1692 */
1693 spin_lock_irq(&new_base->lock);
1694 spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); 1694 spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
1695 1695
1696 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 1696 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
@@ -1703,8 +1703,7 @@ static void migrate_hrtimers(int cpu)
1703 raise = 1; 1703 raise = 1;
1704 1704
1705 spin_unlock(&old_base->lock); 1705 spin_unlock(&old_base->lock);
1706 spin_unlock(&new_base->lock); 1706 spin_unlock_irq(&new_base->lock);
1707 local_irq_enable();
1708 put_cpu_var(hrtimer_bases); 1707 put_cpu_var(hrtimer_bases);
1709 1708
1710 if (raise) 1709 if (raise)
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 533068cfb607..cc0f7321b8ce 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -30,17 +30,16 @@ static DEFINE_MUTEX(probing_active);
30unsigned long probe_irq_on(void) 30unsigned long probe_irq_on(void)
31{ 31{
32 struct irq_desc *desc; 32 struct irq_desc *desc;
33 unsigned long mask; 33 unsigned long mask = 0;
34 unsigned int i; 34 unsigned int status;
35 int i;
35 36
36 mutex_lock(&probing_active); 37 mutex_lock(&probing_active);
37 /* 38 /*
38 * something may have generated an irq long ago and we want to 39 * something may have generated an irq long ago and we want to
39 * flush such a longstanding irq before considering it as spurious. 40 * flush such a longstanding irq before considering it as spurious.
40 */ 41 */
41 for (i = NR_IRQS-1; i > 0; i--) { 42 for_each_irq_desc_reverse(i, desc) {
42 desc = irq_desc + i;
43
44 spin_lock_irq(&desc->lock); 43 spin_lock_irq(&desc->lock);
45 if (!desc->action && !(desc->status & IRQ_NOPROBE)) { 44 if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
46 /* 45 /*
@@ -68,9 +67,7 @@ unsigned long probe_irq_on(void)
68 * (we must startup again here because if a longstanding irq 67 * (we must startup again here because if a longstanding irq
69 * happened in the previous stage, it may have masked itself) 68 * happened in the previous stage, it may have masked itself)
70 */ 69 */
71 for (i = NR_IRQS-1; i > 0; i--) { 70 for_each_irq_desc_reverse(i, desc) {
72 desc = irq_desc + i;
73
74 spin_lock_irq(&desc->lock); 71 spin_lock_irq(&desc->lock);
75 if (!desc->action && !(desc->status & IRQ_NOPROBE)) { 72 if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
76 desc->status |= IRQ_AUTODETECT | IRQ_WAITING; 73 desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
@@ -88,11 +85,7 @@ unsigned long probe_irq_on(void)
88 /* 85 /*
89 * Now filter out any obviously spurious interrupts 86 * Now filter out any obviously spurious interrupts
90 */ 87 */
91 mask = 0; 88 for_each_irq_desc(i, desc) {
92 for (i = 0; i < NR_IRQS; i++) {
93 unsigned int status;
94
95 desc = irq_desc + i;
96 spin_lock_irq(&desc->lock); 89 spin_lock_irq(&desc->lock);
97 status = desc->status; 90 status = desc->status;
98 91
@@ -126,14 +119,11 @@ EXPORT_SYMBOL(probe_irq_on);
126 */ 119 */
127unsigned int probe_irq_mask(unsigned long val) 120unsigned int probe_irq_mask(unsigned long val)
128{ 121{
129 unsigned int mask; 122 unsigned int status, mask = 0;
123 struct irq_desc *desc;
130 int i; 124 int i;
131 125
132 mask = 0; 126 for_each_irq_desc(i, desc) {
133 for (i = 0; i < NR_IRQS; i++) {
134 struct irq_desc *desc = irq_desc + i;
135 unsigned int status;
136
137 spin_lock_irq(&desc->lock); 127 spin_lock_irq(&desc->lock);
138 status = desc->status; 128 status = desc->status;
139 129
@@ -171,20 +161,19 @@ EXPORT_SYMBOL(probe_irq_mask);
171 */ 161 */
172int probe_irq_off(unsigned long val) 162int probe_irq_off(unsigned long val)
173{ 163{
174 int i, irq_found = 0, nr_irqs = 0; 164 int i, irq_found = 0, nr_of_irqs = 0;
175 165 struct irq_desc *desc;
176 for (i = 0; i < NR_IRQS; i++) { 166 unsigned int status;
177 struct irq_desc *desc = irq_desc + i;
178 unsigned int status;
179 167
168 for_each_irq_desc(i, desc) {
180 spin_lock_irq(&desc->lock); 169 spin_lock_irq(&desc->lock);
181 status = desc->status; 170 status = desc->status;
182 171
183 if (status & IRQ_AUTODETECT) { 172 if (status & IRQ_AUTODETECT) {
184 if (!(status & IRQ_WAITING)) { 173 if (!(status & IRQ_WAITING)) {
185 if (!nr_irqs) 174 if (!nr_of_irqs)
186 irq_found = i; 175 irq_found = i;
187 nr_irqs++; 176 nr_of_irqs++;
188 } 177 }
189 desc->status = status & ~IRQ_AUTODETECT; 178 desc->status = status & ~IRQ_AUTODETECT;
190 desc->chip->shutdown(i); 179 desc->chip->shutdown(i);
@@ -193,7 +182,7 @@ int probe_irq_off(unsigned long val)
193 } 182 }
194 mutex_unlock(&probing_active); 183 mutex_unlock(&probing_active);
195 184
196 if (nr_irqs > 1) 185 if (nr_of_irqs > 1)
197 irq_found = -irq_found; 186 irq_found = -irq_found;
198 187
199 return irq_found; 188 return irq_found;
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 3cd441ebf5d2..4895fde4eb93 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -24,16 +24,15 @@
24 */ 24 */
25void dynamic_irq_init(unsigned int irq) 25void dynamic_irq_init(unsigned int irq)
26{ 26{
27 struct irq_desc *desc; 27 struct irq_desc *desc = irq_to_desc(irq);
28 unsigned long flags; 28 unsigned long flags;
29 29
30 if (irq >= NR_IRQS) { 30 if (!desc) {
31 WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq); 31 WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq);
32 return; 32 return;
33 } 33 }
34 34
35 /* Ensure we don't have left over values from a previous use of this irq */ 35 /* Ensure we don't have left over values from a previous use of this irq */
36 desc = irq_desc + irq;
37 spin_lock_irqsave(&desc->lock, flags); 36 spin_lock_irqsave(&desc->lock, flags);
38 desc->status = IRQ_DISABLED; 37 desc->status = IRQ_DISABLED;
39 desc->chip = &no_irq_chip; 38 desc->chip = &no_irq_chip;
@@ -57,15 +56,14 @@ void dynamic_irq_init(unsigned int irq)
57 */ 56 */
58void dynamic_irq_cleanup(unsigned int irq) 57void dynamic_irq_cleanup(unsigned int irq)
59{ 58{
60 struct irq_desc *desc; 59 struct irq_desc *desc = irq_to_desc(irq);
61 unsigned long flags; 60 unsigned long flags;
62 61
63 if (irq >= NR_IRQS) { 62 if (!desc) {
64 WARN(1, KERN_ERR "Trying to cleanup invalid IRQ%d\n", irq); 63 WARN(1, KERN_ERR "Trying to cleanup invalid IRQ%d\n", irq);
65 return; 64 return;
66 } 65 }
67 66
68 desc = irq_desc + irq;
69 spin_lock_irqsave(&desc->lock, flags); 67 spin_lock_irqsave(&desc->lock, flags);
70 if (desc->action) { 68 if (desc->action) {
71 spin_unlock_irqrestore(&desc->lock, flags); 69 spin_unlock_irqrestore(&desc->lock, flags);
@@ -89,10 +87,10 @@ void dynamic_irq_cleanup(unsigned int irq)
89 */ 87 */
90int set_irq_chip(unsigned int irq, struct irq_chip *chip) 88int set_irq_chip(unsigned int irq, struct irq_chip *chip)
91{ 89{
92 struct irq_desc *desc; 90 struct irq_desc *desc = irq_to_desc(irq);
93 unsigned long flags; 91 unsigned long flags;
94 92
95 if (irq >= NR_IRQS) { 93 if (!desc) {
96 WARN(1, KERN_ERR "Trying to install chip for IRQ%d\n", irq); 94 WARN(1, KERN_ERR "Trying to install chip for IRQ%d\n", irq);
97 return -EINVAL; 95 return -EINVAL;
98 } 96 }
@@ -100,7 +98,6 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip)
100 if (!chip) 98 if (!chip)
101 chip = &no_irq_chip; 99 chip = &no_irq_chip;
102 100
103 desc = irq_desc + irq;
104 spin_lock_irqsave(&desc->lock, flags); 101 spin_lock_irqsave(&desc->lock, flags);
105 irq_chip_set_defaults(chip); 102 irq_chip_set_defaults(chip);
106 desc->chip = chip; 103 desc->chip = chip;
@@ -111,27 +108,27 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip)
111EXPORT_SYMBOL(set_irq_chip); 108EXPORT_SYMBOL(set_irq_chip);
112 109
113/** 110/**
114 * set_irq_type - set the irq type for an irq 111 * set_irq_type - set the irq trigger type for an irq
115 * @irq: irq number 112 * @irq: irq number
116 * @type: interrupt type - see include/linux/interrupt.h 113 * @type: IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h
117 */ 114 */
118int set_irq_type(unsigned int irq, unsigned int type) 115int set_irq_type(unsigned int irq, unsigned int type)
119{ 116{
120 struct irq_desc *desc; 117 struct irq_desc *desc = irq_to_desc(irq);
121 unsigned long flags; 118 unsigned long flags;
122 int ret = -ENXIO; 119 int ret = -ENXIO;
123 120
124 if (irq >= NR_IRQS) { 121 if (!desc) {
125 printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq); 122 printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq);
126 return -ENODEV; 123 return -ENODEV;
127 } 124 }
128 125
129 desc = irq_desc + irq; 126 if (type == IRQ_TYPE_NONE)
130 if (desc->chip->set_type) { 127 return 0;
131 spin_lock_irqsave(&desc->lock, flags); 128
132 ret = desc->chip->set_type(irq, type); 129 spin_lock_irqsave(&desc->lock, flags);
133 spin_unlock_irqrestore(&desc->lock, flags); 130 ret = __irq_set_trigger(desc, irq, flags);
134 } 131 spin_unlock_irqrestore(&desc->lock, flags);
135 return ret; 132 return ret;
136} 133}
137EXPORT_SYMBOL(set_irq_type); 134EXPORT_SYMBOL(set_irq_type);
@@ -145,16 +142,15 @@ EXPORT_SYMBOL(set_irq_type);
145 */ 142 */
146int set_irq_data(unsigned int irq, void *data) 143int set_irq_data(unsigned int irq, void *data)
147{ 144{
148 struct irq_desc *desc; 145 struct irq_desc *desc = irq_to_desc(irq);
149 unsigned long flags; 146 unsigned long flags;
150 147
151 if (irq >= NR_IRQS) { 148 if (!desc) {
152 printk(KERN_ERR 149 printk(KERN_ERR
153 "Trying to install controller data for IRQ%d\n", irq); 150 "Trying to install controller data for IRQ%d\n", irq);
154 return -EINVAL; 151 return -EINVAL;
155 } 152 }
156 153
157 desc = irq_desc + irq;
158 spin_lock_irqsave(&desc->lock, flags); 154 spin_lock_irqsave(&desc->lock, flags);
159 desc->handler_data = data; 155 desc->handler_data = data;
160 spin_unlock_irqrestore(&desc->lock, flags); 156 spin_unlock_irqrestore(&desc->lock, flags);
@@ -171,15 +167,15 @@ EXPORT_SYMBOL(set_irq_data);
171 */ 167 */
172int set_irq_msi(unsigned int irq, struct msi_desc *entry) 168int set_irq_msi(unsigned int irq, struct msi_desc *entry)
173{ 169{
174 struct irq_desc *desc; 170 struct irq_desc *desc = irq_to_desc(irq);
175 unsigned long flags; 171 unsigned long flags;
176 172
177 if (irq >= NR_IRQS) { 173 if (!desc) {
178 printk(KERN_ERR 174 printk(KERN_ERR
179 "Trying to install msi data for IRQ%d\n", irq); 175 "Trying to install msi data for IRQ%d\n", irq);
180 return -EINVAL; 176 return -EINVAL;
181 } 177 }
182 desc = irq_desc + irq; 178
183 spin_lock_irqsave(&desc->lock, flags); 179 spin_lock_irqsave(&desc->lock, flags);
184 desc->msi_desc = entry; 180 desc->msi_desc = entry;
185 if (entry) 181 if (entry)
@@ -197,10 +193,16 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry)
197 */ 193 */
198int set_irq_chip_data(unsigned int irq, void *data) 194int set_irq_chip_data(unsigned int irq, void *data)
199{ 195{
200 struct irq_desc *desc = irq_desc + irq; 196 struct irq_desc *desc = irq_to_desc(irq);
201 unsigned long flags; 197 unsigned long flags;
202 198
203 if (irq >= NR_IRQS || !desc->chip) { 199 if (!desc) {
200 printk(KERN_ERR
201 "Trying to install chip data for IRQ%d\n", irq);
202 return -EINVAL;
203 }
204
205 if (!desc->chip) {
204 printk(KERN_ERR "BUG: bad set_irq_chip_data(IRQ#%d)\n", irq); 206 printk(KERN_ERR "BUG: bad set_irq_chip_data(IRQ#%d)\n", irq);
205 return -EINVAL; 207 return -EINVAL;
206 } 208 }
@@ -218,7 +220,7 @@ EXPORT_SYMBOL(set_irq_chip_data);
218 */ 220 */
219static void default_enable(unsigned int irq) 221static void default_enable(unsigned int irq)
220{ 222{
221 struct irq_desc *desc = irq_desc + irq; 223 struct irq_desc *desc = irq_to_desc(irq);
222 224
223 desc->chip->unmask(irq); 225 desc->chip->unmask(irq);
224 desc->status &= ~IRQ_MASKED; 226 desc->status &= ~IRQ_MASKED;
@@ -236,8 +238,9 @@ static void default_disable(unsigned int irq)
236 */ 238 */
237static unsigned int default_startup(unsigned int irq) 239static unsigned int default_startup(unsigned int irq)
238{ 240{
239 irq_desc[irq].chip->enable(irq); 241 struct irq_desc *desc = irq_to_desc(irq);
240 242
243 desc->chip->enable(irq);
241 return 0; 244 return 0;
242} 245}
243 246
@@ -246,7 +249,7 @@ static unsigned int default_startup(unsigned int irq)
246 */ 249 */
247static void default_shutdown(unsigned int irq) 250static void default_shutdown(unsigned int irq)
248{ 251{
249 struct irq_desc *desc = irq_desc + irq; 252 struct irq_desc *desc = irq_to_desc(irq);
250 253
251 desc->chip->mask(irq); 254 desc->chip->mask(irq);
252 desc->status |= IRQ_MASKED; 255 desc->status |= IRQ_MASKED;
@@ -305,14 +308,13 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
305{ 308{
306 struct irqaction *action; 309 struct irqaction *action;
307 irqreturn_t action_ret; 310 irqreturn_t action_ret;
308 const unsigned int cpu = smp_processor_id();
309 311
310 spin_lock(&desc->lock); 312 spin_lock(&desc->lock);
311 313
312 if (unlikely(desc->status & IRQ_INPROGRESS)) 314 if (unlikely(desc->status & IRQ_INPROGRESS))
313 goto out_unlock; 315 goto out_unlock;
314 desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); 316 desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
315 kstat_cpu(cpu).irqs[irq]++; 317 kstat_incr_irqs_this_cpu(irq, desc);
316 318
317 action = desc->action; 319 action = desc->action;
318 if (unlikely(!action || (desc->status & IRQ_DISABLED))) 320 if (unlikely(!action || (desc->status & IRQ_DISABLED)))
@@ -344,7 +346,6 @@ out_unlock:
344void 346void
345handle_level_irq(unsigned int irq, struct irq_desc *desc) 347handle_level_irq(unsigned int irq, struct irq_desc *desc)
346{ 348{
347 unsigned int cpu = smp_processor_id();
348 struct irqaction *action; 349 struct irqaction *action;
349 irqreturn_t action_ret; 350 irqreturn_t action_ret;
350 351
@@ -354,7 +355,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
354 if (unlikely(desc->status & IRQ_INPROGRESS)) 355 if (unlikely(desc->status & IRQ_INPROGRESS))
355 goto out_unlock; 356 goto out_unlock;
356 desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); 357 desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
357 kstat_cpu(cpu).irqs[irq]++; 358 kstat_incr_irqs_this_cpu(irq, desc);
358 359
359 /* 360 /*
360 * If its disabled or no action available 361 * If its disabled or no action available
@@ -392,7 +393,6 @@ out_unlock:
392void 393void
393handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) 394handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
394{ 395{
395 unsigned int cpu = smp_processor_id();
396 struct irqaction *action; 396 struct irqaction *action;
397 irqreturn_t action_ret; 397 irqreturn_t action_ret;
398 398
@@ -402,7 +402,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
402 goto out; 402 goto out;
403 403
404 desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); 404 desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
405 kstat_cpu(cpu).irqs[irq]++; 405 kstat_incr_irqs_this_cpu(irq, desc);
406 406
407 /* 407 /*
408 * If its disabled or no action available 408 * If its disabled or no action available
@@ -451,8 +451,6 @@ out:
451void 451void
452handle_edge_irq(unsigned int irq, struct irq_desc *desc) 452handle_edge_irq(unsigned int irq, struct irq_desc *desc)
453{ 453{
454 const unsigned int cpu = smp_processor_id();
455
456 spin_lock(&desc->lock); 454 spin_lock(&desc->lock);
457 455
458 desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); 456 desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
@@ -468,8 +466,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
468 mask_ack_irq(desc, irq); 466 mask_ack_irq(desc, irq);
469 goto out_unlock; 467 goto out_unlock;
470 } 468 }
471 469 kstat_incr_irqs_this_cpu(irq, desc);
472 kstat_cpu(cpu).irqs[irq]++;
473 470
474 /* Start handling the irq */ 471 /* Start handling the irq */
475 desc->chip->ack(irq); 472 desc->chip->ack(irq);
@@ -524,7 +521,7 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
524{ 521{
525 irqreturn_t action_ret; 522 irqreturn_t action_ret;
526 523
527 kstat_this_cpu.irqs[irq]++; 524 kstat_incr_irqs_this_cpu(irq, desc);
528 525
529 if (desc->chip->ack) 526 if (desc->chip->ack)
530 desc->chip->ack(irq); 527 desc->chip->ack(irq);
@@ -541,17 +538,15 @@ void
541__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, 538__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
542 const char *name) 539 const char *name)
543{ 540{
544 struct irq_desc *desc; 541 struct irq_desc *desc = irq_to_desc(irq);
545 unsigned long flags; 542 unsigned long flags;
546 543
547 if (irq >= NR_IRQS) { 544 if (!desc) {
548 printk(KERN_ERR 545 printk(KERN_ERR
549 "Trying to install type control for IRQ%d\n", irq); 546 "Trying to install type control for IRQ%d\n", irq);
550 return; 547 return;
551 } 548 }
552 549
553 desc = irq_desc + irq;
554
555 if (!handle) 550 if (!handle)
556 handle = handle_bad_irq; 551 handle = handle_bad_irq;
557 else if (desc->chip == &no_irq_chip) { 552 else if (desc->chip == &no_irq_chip) {
@@ -583,7 +578,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
583 desc->status &= ~IRQ_DISABLED; 578 desc->status &= ~IRQ_DISABLED;
584 desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE; 579 desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE;
585 desc->depth = 0; 580 desc->depth = 0;
586 desc->chip->unmask(irq); 581 desc->chip->startup(irq);
587 } 582 }
588 spin_unlock_irqrestore(&desc->lock, flags); 583 spin_unlock_irqrestore(&desc->lock, flags);
589} 584}
@@ -606,17 +601,14 @@ set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
606 601
607void __init set_irq_noprobe(unsigned int irq) 602void __init set_irq_noprobe(unsigned int irq)
608{ 603{
609 struct irq_desc *desc; 604 struct irq_desc *desc = irq_to_desc(irq);
610 unsigned long flags; 605 unsigned long flags;
611 606
612 if (irq >= NR_IRQS) { 607 if (!desc) {
613 printk(KERN_ERR "Trying to mark IRQ%d non-probeable\n", irq); 608 printk(KERN_ERR "Trying to mark IRQ%d non-probeable\n", irq);
614
615 return; 609 return;
616 } 610 }
617 611
618 desc = irq_desc + irq;
619
620 spin_lock_irqsave(&desc->lock, flags); 612 spin_lock_irqsave(&desc->lock, flags);
621 desc->status |= IRQ_NOPROBE; 613 desc->status |= IRQ_NOPROBE;
622 spin_unlock_irqrestore(&desc->lock, flags); 614 spin_unlock_irqrestore(&desc->lock, flags);
@@ -624,17 +616,14 @@ void __init set_irq_noprobe(unsigned int irq)
624 616
625void __init set_irq_probe(unsigned int irq) 617void __init set_irq_probe(unsigned int irq)
626{ 618{
627 struct irq_desc *desc; 619 struct irq_desc *desc = irq_to_desc(irq);
628 unsigned long flags; 620 unsigned long flags;
629 621
630 if (irq >= NR_IRQS) { 622 if (!desc) {
631 printk(KERN_ERR "Trying to mark IRQ%d probeable\n", irq); 623 printk(KERN_ERR "Trying to mark IRQ%d probeable\n", irq);
632
633 return; 624 return;
634 } 625 }
635 626
636 desc = irq_desc + irq;
637
638 spin_lock_irqsave(&desc->lock, flags); 627 spin_lock_irqsave(&desc->lock, flags);
639 desc->status &= ~IRQ_NOPROBE; 628 desc->status &= ~IRQ_NOPROBE;
640 spin_unlock_irqrestore(&desc->lock, flags); 629 spin_unlock_irqrestore(&desc->lock, flags);
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 5fa6198e9139..c815b42d0f5b 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -25,11 +25,10 @@
25 * 25 *
26 * Handles spurious and unhandled IRQ's. It also prints a debugmessage. 26 * Handles spurious and unhandled IRQ's. It also prints a debugmessage.
27 */ 27 */
28void 28void handle_bad_irq(unsigned int irq, struct irq_desc *desc)
29handle_bad_irq(unsigned int irq, struct irq_desc *desc)
30{ 29{
31 print_irq_desc(irq, desc); 30 print_irq_desc(irq, desc);
32 kstat_this_cpu.irqs[irq]++; 31 kstat_incr_irqs_this_cpu(irq, desc);
33 ack_bad_irq(irq); 32 ack_bad_irq(irq);
34} 33}
35 34
@@ -47,6 +46,9 @@ handle_bad_irq(unsigned int irq, struct irq_desc *desc)
47 * 46 *
48 * Controller mappings for all interrupt sources: 47 * Controller mappings for all interrupt sources:
49 */ 48 */
49int nr_irqs = NR_IRQS;
50EXPORT_SYMBOL_GPL(nr_irqs);
51
50struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { 52struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
51 [0 ... NR_IRQS-1] = { 53 [0 ... NR_IRQS-1] = {
52 .status = IRQ_DISABLED, 54 .status = IRQ_DISABLED,
@@ -66,7 +68,9 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
66 */ 68 */
67static void ack_bad(unsigned int irq) 69static void ack_bad(unsigned int irq)
68{ 70{
69 print_irq_desc(irq, irq_desc + irq); 71 struct irq_desc *desc = irq_to_desc(irq);
72
73 print_irq_desc(irq, desc);
70 ack_bad_irq(irq); 74 ack_bad_irq(irq);
71} 75}
72 76
@@ -131,8 +135,6 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
131 irqreturn_t ret, retval = IRQ_NONE; 135 irqreturn_t ret, retval = IRQ_NONE;
132 unsigned int status = 0; 136 unsigned int status = 0;
133 137
134 handle_dynamic_tick(action);
135
136 if (!(action->flags & IRQF_DISABLED)) 138 if (!(action->flags & IRQF_DISABLED))
137 local_irq_enable_in_hardirq(); 139 local_irq_enable_in_hardirq();
138 140
@@ -165,11 +167,12 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
165 */ 167 */
166unsigned int __do_IRQ(unsigned int irq) 168unsigned int __do_IRQ(unsigned int irq)
167{ 169{
168 struct irq_desc *desc = irq_desc + irq; 170 struct irq_desc *desc = irq_to_desc(irq);
169 struct irqaction *action; 171 struct irqaction *action;
170 unsigned int status; 172 unsigned int status;
171 173
172 kstat_this_cpu.irqs[irq]++; 174 kstat_incr_irqs_this_cpu(irq, desc);
175
173 if (CHECK_IRQ_PER_CPU(desc->status)) { 176 if (CHECK_IRQ_PER_CPU(desc->status)) {
174 irqreturn_t action_ret; 177 irqreturn_t action_ret;
175 178
@@ -256,8 +259,8 @@ out:
256} 259}
257#endif 260#endif
258 261
259#ifdef CONFIG_TRACE_IRQFLAGS
260 262
263#ifdef CONFIG_TRACE_IRQFLAGS
261/* 264/*
262 * lockdep: we want to handle all irq_desc locks as a single lock-class: 265 * lockdep: we want to handle all irq_desc locks as a single lock-class:
263 */ 266 */
@@ -265,10 +268,10 @@ static struct lock_class_key irq_desc_lock_class;
265 268
266void early_init_irq_lock_class(void) 269void early_init_irq_lock_class(void)
267{ 270{
271 struct irq_desc *desc;
268 int i; 272 int i;
269 273
270 for (i = 0; i < NR_IRQS; i++) 274 for_each_irq_desc(i, desc)
271 lockdep_set_class(&irq_desc[i].lock, &irq_desc_lock_class); 275 lockdep_set_class(&desc->lock, &irq_desc_lock_class);
272} 276}
273
274#endif 277#endif
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 08a849a22447..c9767e641980 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -10,12 +10,15 @@ extern void irq_chip_set_defaults(struct irq_chip *chip);
10/* Set default handler: */ 10/* Set default handler: */
11extern void compat_irq_chip_set_default_handler(struct irq_desc *desc); 11extern void compat_irq_chip_set_default_handler(struct irq_desc *desc);
12 12
13extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
14 unsigned long flags);
15
13#ifdef CONFIG_PROC_FS 16#ifdef CONFIG_PROC_FS
14extern void register_irq_proc(unsigned int irq); 17extern void register_irq_proc(unsigned int irq, struct irq_desc *desc);
15extern void register_handler_proc(unsigned int irq, struct irqaction *action); 18extern void register_handler_proc(unsigned int irq, struct irqaction *action);
16extern void unregister_handler_proc(unsigned int irq, struct irqaction *action); 19extern void unregister_handler_proc(unsigned int irq, struct irqaction *action);
17#else 20#else
18static inline void register_irq_proc(unsigned int irq) { } 21static inline void register_irq_proc(unsigned int irq, struct irq_desc *desc) { }
19static inline void register_handler_proc(unsigned int irq, 22static inline void register_handler_proc(unsigned int irq,
20 struct irqaction *action) { } 23 struct irqaction *action) { }
21static inline void unregister_handler_proc(unsigned int irq, 24static inline void unregister_handler_proc(unsigned int irq,
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 60c49e324390..c498a1b8c621 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -31,10 +31,10 @@ cpumask_t irq_default_affinity = CPU_MASK_ALL;
31 */ 31 */
32void synchronize_irq(unsigned int irq) 32void synchronize_irq(unsigned int irq)
33{ 33{
34 struct irq_desc *desc = irq_desc + irq; 34 struct irq_desc *desc = irq_to_desc(irq);
35 unsigned int status; 35 unsigned int status;
36 36
37 if (irq >= NR_IRQS) 37 if (!desc)
38 return; 38 return;
39 39
40 do { 40 do {
@@ -64,7 +64,7 @@ EXPORT_SYMBOL(synchronize_irq);
64 */ 64 */
65int irq_can_set_affinity(unsigned int irq) 65int irq_can_set_affinity(unsigned int irq)
66{ 66{
67 struct irq_desc *desc = irq_desc + irq; 67 struct irq_desc *desc = irq_to_desc(irq);
68 68
69 if (CHECK_IRQ_PER_CPU(desc->status) || !desc->chip || 69 if (CHECK_IRQ_PER_CPU(desc->status) || !desc->chip ||
70 !desc->chip->set_affinity) 70 !desc->chip->set_affinity)
@@ -81,18 +81,17 @@ int irq_can_set_affinity(unsigned int irq)
81 */ 81 */
82int irq_set_affinity(unsigned int irq, cpumask_t cpumask) 82int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
83{ 83{
84 struct irq_desc *desc = irq_desc + irq; 84 struct irq_desc *desc = irq_to_desc(irq);
85 85
86 if (!desc->chip->set_affinity) 86 if (!desc->chip->set_affinity)
87 return -EINVAL; 87 return -EINVAL;
88 88
89 set_balance_irq_affinity(irq, cpumask);
90
91#ifdef CONFIG_GENERIC_PENDING_IRQ 89#ifdef CONFIG_GENERIC_PENDING_IRQ
92 if (desc->status & IRQ_MOVE_PCNTXT) { 90 if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) {
93 unsigned long flags; 91 unsigned long flags;
94 92
95 spin_lock_irqsave(&desc->lock, flags); 93 spin_lock_irqsave(&desc->lock, flags);
94 desc->affinity = cpumask;
96 desc->chip->set_affinity(irq, cpumask); 95 desc->chip->set_affinity(irq, cpumask);
97 spin_unlock_irqrestore(&desc->lock, flags); 96 spin_unlock_irqrestore(&desc->lock, flags);
98 } else 97 } else
@@ -111,16 +110,17 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
111int irq_select_affinity(unsigned int irq) 110int irq_select_affinity(unsigned int irq)
112{ 111{
113 cpumask_t mask; 112 cpumask_t mask;
113 struct irq_desc *desc;
114 114
115 if (!irq_can_set_affinity(irq)) 115 if (!irq_can_set_affinity(irq))
116 return 0; 116 return 0;
117 117
118 cpus_and(mask, cpu_online_map, irq_default_affinity); 118 cpus_and(mask, cpu_online_map, irq_default_affinity);
119 119
120 irq_desc[irq].affinity = mask; 120 desc = irq_to_desc(irq);
121 irq_desc[irq].chip->set_affinity(irq, mask); 121 desc->affinity = mask;
122 desc->chip->set_affinity(irq, mask);
122 123
123 set_balance_irq_affinity(irq, mask);
124 return 0; 124 return 0;
125} 125}
126#endif 126#endif
@@ -140,10 +140,10 @@ int irq_select_affinity(unsigned int irq)
140 */ 140 */
141void disable_irq_nosync(unsigned int irq) 141void disable_irq_nosync(unsigned int irq)
142{ 142{
143 struct irq_desc *desc = irq_desc + irq; 143 struct irq_desc *desc = irq_to_desc(irq);
144 unsigned long flags; 144 unsigned long flags;
145 145
146 if (irq >= NR_IRQS) 146 if (!desc)
147 return; 147 return;
148 148
149 spin_lock_irqsave(&desc->lock, flags); 149 spin_lock_irqsave(&desc->lock, flags);
@@ -169,9 +169,9 @@ EXPORT_SYMBOL(disable_irq_nosync);
169 */ 169 */
170void disable_irq(unsigned int irq) 170void disable_irq(unsigned int irq)
171{ 171{
172 struct irq_desc *desc = irq_desc + irq; 172 struct irq_desc *desc = irq_to_desc(irq);
173 173
174 if (irq >= NR_IRQS) 174 if (!desc)
175 return; 175 return;
176 176
177 disable_irq_nosync(irq); 177 disable_irq_nosync(irq);
@@ -211,10 +211,10 @@ static void __enable_irq(struct irq_desc *desc, unsigned int irq)
211 */ 211 */
212void enable_irq(unsigned int irq) 212void enable_irq(unsigned int irq)
213{ 213{
214 struct irq_desc *desc = irq_desc + irq; 214 struct irq_desc *desc = irq_to_desc(irq);
215 unsigned long flags; 215 unsigned long flags;
216 216
217 if (irq >= NR_IRQS) 217 if (!desc)
218 return; 218 return;
219 219
220 spin_lock_irqsave(&desc->lock, flags); 220 spin_lock_irqsave(&desc->lock, flags);
@@ -223,9 +223,9 @@ void enable_irq(unsigned int irq)
223} 223}
224EXPORT_SYMBOL(enable_irq); 224EXPORT_SYMBOL(enable_irq);
225 225
226int set_irq_wake_real(unsigned int irq, unsigned int on) 226static int set_irq_wake_real(unsigned int irq, unsigned int on)
227{ 227{
228 struct irq_desc *desc = irq_desc + irq; 228 struct irq_desc *desc = irq_to_desc(irq);
229 int ret = -ENXIO; 229 int ret = -ENXIO;
230 230
231 if (desc->chip->set_wake) 231 if (desc->chip->set_wake)
@@ -248,7 +248,7 @@ int set_irq_wake_real(unsigned int irq, unsigned int on)
248 */ 248 */
249int set_irq_wake(unsigned int irq, unsigned int on) 249int set_irq_wake(unsigned int irq, unsigned int on)
250{ 250{
251 struct irq_desc *desc = irq_desc + irq; 251 struct irq_desc *desc = irq_to_desc(irq);
252 unsigned long flags; 252 unsigned long flags;
253 int ret = 0; 253 int ret = 0;
254 254
@@ -288,12 +288,16 @@ EXPORT_SYMBOL(set_irq_wake);
288 */ 288 */
289int can_request_irq(unsigned int irq, unsigned long irqflags) 289int can_request_irq(unsigned int irq, unsigned long irqflags)
290{ 290{
291 struct irq_desc *desc = irq_to_desc(irq);
291 struct irqaction *action; 292 struct irqaction *action;
292 293
293 if (irq >= NR_IRQS || irq_desc[irq].status & IRQ_NOREQUEST) 294 if (!desc)
295 return 0;
296
297 if (desc->status & IRQ_NOREQUEST)
294 return 0; 298 return 0;
295 299
296 action = irq_desc[irq].action; 300 action = desc->action;
297 if (action) 301 if (action)
298 if (irqflags & action->flags & IRQF_SHARED) 302 if (irqflags & action->flags & IRQF_SHARED)
299 action = NULL; 303 action = NULL;
@@ -312,10 +316,11 @@ void compat_irq_chip_set_default_handler(struct irq_desc *desc)
312 desc->handle_irq = NULL; 316 desc->handle_irq = NULL;
313} 317}
314 318
315static int __irq_set_trigger(struct irq_chip *chip, unsigned int irq, 319int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
316 unsigned long flags) 320 unsigned long flags)
317{ 321{
318 int ret; 322 int ret;
323 struct irq_chip *chip = desc->chip;
319 324
320 if (!chip || !chip->set_type) { 325 if (!chip || !chip->set_type) {
321 /* 326 /*
@@ -333,6 +338,11 @@ static int __irq_set_trigger(struct irq_chip *chip, unsigned int irq,
333 pr_err("setting trigger mode %d for irq %u failed (%pF)\n", 338 pr_err("setting trigger mode %d for irq %u failed (%pF)\n",
334 (int)(flags & IRQF_TRIGGER_MASK), 339 (int)(flags & IRQF_TRIGGER_MASK),
335 irq, chip->set_type); 340 irq, chip->set_type);
341 else {
342 /* note that IRQF_TRIGGER_MASK == IRQ_TYPE_SENSE_MASK */
343 desc->status &= ~IRQ_TYPE_SENSE_MASK;
344 desc->status |= flags & IRQ_TYPE_SENSE_MASK;
345 }
336 346
337 return ret; 347 return ret;
338} 348}
@@ -341,16 +351,16 @@ static int __irq_set_trigger(struct irq_chip *chip, unsigned int irq,
341 * Internal function to register an irqaction - typically used to 351 * Internal function to register an irqaction - typically used to
342 * allocate special interrupts that are part of the architecture. 352 * allocate special interrupts that are part of the architecture.
343 */ 353 */
344int setup_irq(unsigned int irq, struct irqaction *new) 354static int
355__setup_irq(unsigned int irq, struct irq_desc * desc, struct irqaction *new)
345{ 356{
346 struct irq_desc *desc = irq_desc + irq;
347 struct irqaction *old, **p; 357 struct irqaction *old, **p;
348 const char *old_name = NULL; 358 const char *old_name = NULL;
349 unsigned long flags; 359 unsigned long flags;
350 int shared = 0; 360 int shared = 0;
351 int ret; 361 int ret;
352 362
353 if (irq >= NR_IRQS) 363 if (!desc)
354 return -EINVAL; 364 return -EINVAL;
355 365
356 if (desc->chip == &no_irq_chip) 366 if (desc->chip == &no_irq_chip)
@@ -411,7 +421,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
411 421
412 /* Setup the type (level, edge polarity) if configured: */ 422 /* Setup the type (level, edge polarity) if configured: */
413 if (new->flags & IRQF_TRIGGER_MASK) { 423 if (new->flags & IRQF_TRIGGER_MASK) {
414 ret = __irq_set_trigger(desc->chip, irq, new->flags); 424 ret = __irq_set_trigger(desc, irq, new->flags);
415 425
416 if (ret) { 426 if (ret) {
417 spin_unlock_irqrestore(&desc->lock, flags); 427 spin_unlock_irqrestore(&desc->lock, flags);
@@ -430,16 +440,21 @@ int setup_irq(unsigned int irq, struct irqaction *new)
430 if (!(desc->status & IRQ_NOAUTOEN)) { 440 if (!(desc->status & IRQ_NOAUTOEN)) {
431 desc->depth = 0; 441 desc->depth = 0;
432 desc->status &= ~IRQ_DISABLED; 442 desc->status &= ~IRQ_DISABLED;
433 if (desc->chip->startup) 443 desc->chip->startup(irq);
434 desc->chip->startup(irq);
435 else
436 desc->chip->enable(irq);
437 } else 444 } else
438 /* Undo nested disables: */ 445 /* Undo nested disables: */
439 desc->depth = 1; 446 desc->depth = 1;
440 447
441 /* Set default affinity mask once everything is setup */ 448 /* Set default affinity mask once everything is setup */
442 irq_select_affinity(irq); 449 irq_select_affinity(irq);
450
451 } else if ((new->flags & IRQF_TRIGGER_MASK)
452 && (new->flags & IRQF_TRIGGER_MASK)
453 != (desc->status & IRQ_TYPE_SENSE_MASK)) {
454 /* hope the handler works with the actual trigger mode... */
455 pr_warning("IRQ %d uses trigger mode %d; requested %d\n",
456 irq, (int)(desc->status & IRQ_TYPE_SENSE_MASK),
457 (int)(new->flags & IRQF_TRIGGER_MASK));
443 } 458 }
444 459
445 *p = new; 460 *p = new;
@@ -464,7 +479,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
464 spin_unlock_irqrestore(&desc->lock, flags); 479 spin_unlock_irqrestore(&desc->lock, flags);
465 480
466 new->irq = irq; 481 new->irq = irq;
467 register_irq_proc(irq); 482 register_irq_proc(irq, desc);
468 new->dir = NULL; 483 new->dir = NULL;
469 register_handler_proc(irq, new); 484 register_handler_proc(irq, new);
470 485
@@ -484,6 +499,20 @@ mismatch:
484} 499}
485 500
486/** 501/**
502 * setup_irq - setup an interrupt
503 * @irq: Interrupt line to setup
504 * @act: irqaction for the interrupt
505 *
506 * Used to statically setup interrupts in the early boot process.
507 */
508int setup_irq(unsigned int irq, struct irqaction *act)
509{
510 struct irq_desc *desc = irq_to_desc(irq);
511
512 return __setup_irq(irq, desc, act);
513}
514
515/**
487 * free_irq - free an interrupt 516 * free_irq - free an interrupt
488 * @irq: Interrupt line to free 517 * @irq: Interrupt line to free
489 * @dev_id: Device identity to free 518 * @dev_id: Device identity to free
@@ -499,15 +528,15 @@ mismatch:
499 */ 528 */
500void free_irq(unsigned int irq, void *dev_id) 529void free_irq(unsigned int irq, void *dev_id)
501{ 530{
502 struct irq_desc *desc; 531 struct irq_desc *desc = irq_to_desc(irq);
503 struct irqaction **p; 532 struct irqaction **p;
504 unsigned long flags; 533 unsigned long flags;
505 534
506 WARN_ON(in_interrupt()); 535 WARN_ON(in_interrupt());
507 if (irq >= NR_IRQS) 536
537 if (!desc)
508 return; 538 return;
509 539
510 desc = irq_desc + irq;
511 spin_lock_irqsave(&desc->lock, flags); 540 spin_lock_irqsave(&desc->lock, flags);
512 p = &desc->action; 541 p = &desc->action;
513 for (;;) { 542 for (;;) {
@@ -596,12 +625,14 @@ EXPORT_SYMBOL(free_irq);
596 * IRQF_SHARED Interrupt is shared 625 * IRQF_SHARED Interrupt is shared
597 * IRQF_DISABLED Disable local interrupts while processing 626 * IRQF_DISABLED Disable local interrupts while processing
598 * IRQF_SAMPLE_RANDOM The interrupt can be used for entropy 627 * IRQF_SAMPLE_RANDOM The interrupt can be used for entropy
628 * IRQF_TRIGGER_* Specify active edge(s) or level
599 * 629 *
600 */ 630 */
601int request_irq(unsigned int irq, irq_handler_t handler, 631int request_irq(unsigned int irq, irq_handler_t handler,
602 unsigned long irqflags, const char *devname, void *dev_id) 632 unsigned long irqflags, const char *devname, void *dev_id)
603{ 633{
604 struct irqaction *action; 634 struct irqaction *action;
635 struct irq_desc *desc;
605 int retval; 636 int retval;
606 637
607#ifdef CONFIG_LOCKDEP 638#ifdef CONFIG_LOCKDEP
@@ -618,9 +649,12 @@ int request_irq(unsigned int irq, irq_handler_t handler,
618 */ 649 */
619 if ((irqflags & IRQF_SHARED) && !dev_id) 650 if ((irqflags & IRQF_SHARED) && !dev_id)
620 return -EINVAL; 651 return -EINVAL;
621 if (irq >= NR_IRQS) 652
653 desc = irq_to_desc(irq);
654 if (!desc)
622 return -EINVAL; 655 return -EINVAL;
623 if (irq_desc[irq].status & IRQ_NOREQUEST) 656
657 if (desc->status & IRQ_NOREQUEST)
624 return -EINVAL; 658 return -EINVAL;
625 if (!handler) 659 if (!handler)
626 return -EINVAL; 660 return -EINVAL;
@@ -636,26 +670,29 @@ int request_irq(unsigned int irq, irq_handler_t handler,
636 action->next = NULL; 670 action->next = NULL;
637 action->dev_id = dev_id; 671 action->dev_id = dev_id;
638 672
673 retval = __setup_irq(irq, desc, action);
674 if (retval)
675 kfree(action);
676
639#ifdef CONFIG_DEBUG_SHIRQ 677#ifdef CONFIG_DEBUG_SHIRQ
640 if (irqflags & IRQF_SHARED) { 678 if (irqflags & IRQF_SHARED) {
641 /* 679 /*
642 * It's a shared IRQ -- the driver ought to be prepared for it 680 * It's a shared IRQ -- the driver ought to be prepared for it
643 * to happen immediately, so let's make sure.... 681 * to happen immediately, so let's make sure....
644 * We do this before actually registering it, to make sure that 682 * We disable the irq to make sure that a 'real' IRQ doesn't
645 * a 'real' IRQ doesn't run in parallel with our fake 683 * run in parallel with our fake.
646 */ 684 */
647 unsigned long flags; 685 unsigned long flags;
648 686
687 disable_irq(irq);
649 local_irq_save(flags); 688 local_irq_save(flags);
689
650 handler(irq, dev_id); 690 handler(irq, dev_id);
691
651 local_irq_restore(flags); 692 local_irq_restore(flags);
693 enable_irq(irq);
652 } 694 }
653#endif 695#endif
654
655 retval = setup_irq(irq, action);
656 if (retval)
657 kfree(action);
658
659 return retval; 696 return retval;
660} 697}
661EXPORT_SYMBOL(request_irq); 698EXPORT_SYMBOL(request_irq);
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 77b7acc875c5..90b920d3f52b 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -3,18 +3,18 @@
3 3
4void set_pending_irq(unsigned int irq, cpumask_t mask) 4void set_pending_irq(unsigned int irq, cpumask_t mask)
5{ 5{
6 struct irq_desc *desc = irq_desc + irq; 6 struct irq_desc *desc = irq_to_desc(irq);
7 unsigned long flags; 7 unsigned long flags;
8 8
9 spin_lock_irqsave(&desc->lock, flags); 9 spin_lock_irqsave(&desc->lock, flags);
10 desc->status |= IRQ_MOVE_PENDING; 10 desc->status |= IRQ_MOVE_PENDING;
11 irq_desc[irq].pending_mask = mask; 11 desc->pending_mask = mask;
12 spin_unlock_irqrestore(&desc->lock, flags); 12 spin_unlock_irqrestore(&desc->lock, flags);
13} 13}
14 14
15void move_masked_irq(int irq) 15void move_masked_irq(int irq)
16{ 16{
17 struct irq_desc *desc = irq_desc + irq; 17 struct irq_desc *desc = irq_to_desc(irq);
18 cpumask_t tmp; 18 cpumask_t tmp;
19 19
20 if (likely(!(desc->status & IRQ_MOVE_PENDING))) 20 if (likely(!(desc->status & IRQ_MOVE_PENDING)))
@@ -30,7 +30,7 @@ void move_masked_irq(int irq)
30 30
31 desc->status &= ~IRQ_MOVE_PENDING; 31 desc->status &= ~IRQ_MOVE_PENDING;
32 32
33 if (unlikely(cpus_empty(irq_desc[irq].pending_mask))) 33 if (unlikely(cpus_empty(desc->pending_mask)))
34 return; 34 return;
35 35
36 if (!desc->chip->set_affinity) 36 if (!desc->chip->set_affinity)
@@ -38,7 +38,7 @@ void move_masked_irq(int irq)
38 38
39 assert_spin_locked(&desc->lock); 39 assert_spin_locked(&desc->lock);
40 40
41 cpus_and(tmp, irq_desc[irq].pending_mask, cpu_online_map); 41 cpus_and(tmp, desc->pending_mask, cpu_online_map);
42 42
43 /* 43 /*
44 * If there was a valid mask to work with, please 44 * If there was a valid mask to work with, please
@@ -55,12 +55,12 @@ void move_masked_irq(int irq)
55 if (likely(!cpus_empty(tmp))) { 55 if (likely(!cpus_empty(tmp))) {
56 desc->chip->set_affinity(irq,tmp); 56 desc->chip->set_affinity(irq,tmp);
57 } 57 }
58 cpus_clear(irq_desc[irq].pending_mask); 58 cpus_clear(desc->pending_mask);
59} 59}
60 60
61void move_native_irq(int irq) 61void move_native_irq(int irq)
62{ 62{
63 struct irq_desc *desc = irq_desc + irq; 63 struct irq_desc *desc = irq_to_desc(irq);
64 64
65 if (likely(!(desc->status & IRQ_MOVE_PENDING))) 65 if (likely(!(desc->status & IRQ_MOVE_PENDING)))
66 return; 66 return;
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index a09dd29c2fd7..fac014a81b24 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -19,7 +19,7 @@ static struct proc_dir_entry *root_irq_dir;
19 19
20static int irq_affinity_proc_show(struct seq_file *m, void *v) 20static int irq_affinity_proc_show(struct seq_file *m, void *v)
21{ 21{
22 struct irq_desc *desc = irq_desc + (long)m->private; 22 struct irq_desc *desc = irq_to_desc((long)m->private);
23 cpumask_t *mask = &desc->affinity; 23 cpumask_t *mask = &desc->affinity;
24 24
25#ifdef CONFIG_GENERIC_PENDING_IRQ 25#ifdef CONFIG_GENERIC_PENDING_IRQ
@@ -43,7 +43,7 @@ static ssize_t irq_affinity_proc_write(struct file *file,
43 cpumask_t new_value; 43 cpumask_t new_value;
44 int err; 44 int err;
45 45
46 if (!irq_desc[irq].chip->set_affinity || no_irq_affinity || 46 if (!irq_to_desc(irq)->chip->set_affinity || no_irq_affinity ||
47 irq_balancing_disabled(irq)) 47 irq_balancing_disabled(irq))
48 return -EIO; 48 return -EIO;
49 49
@@ -132,20 +132,20 @@ static const struct file_operations default_affinity_proc_fops = {
132static int irq_spurious_read(char *page, char **start, off_t off, 132static int irq_spurious_read(char *page, char **start, off_t off,
133 int count, int *eof, void *data) 133 int count, int *eof, void *data)
134{ 134{
135 struct irq_desc *d = &irq_desc[(long) data]; 135 struct irq_desc *desc = irq_to_desc((long) data);
136 return sprintf(page, "count %u\n" 136 return sprintf(page, "count %u\n"
137 "unhandled %u\n" 137 "unhandled %u\n"
138 "last_unhandled %u ms\n", 138 "last_unhandled %u ms\n",
139 d->irq_count, 139 desc->irq_count,
140 d->irqs_unhandled, 140 desc->irqs_unhandled,
141 jiffies_to_msecs(d->last_unhandled)); 141 jiffies_to_msecs(desc->last_unhandled));
142} 142}
143 143
144#define MAX_NAMELEN 128 144#define MAX_NAMELEN 128
145 145
146static int name_unique(unsigned int irq, struct irqaction *new_action) 146static int name_unique(unsigned int irq, struct irqaction *new_action)
147{ 147{
148 struct irq_desc *desc = irq_desc + irq; 148 struct irq_desc *desc = irq_to_desc(irq);
149 struct irqaction *action; 149 struct irqaction *action;
150 unsigned long flags; 150 unsigned long flags;
151 int ret = 1; 151 int ret = 1;
@@ -165,8 +165,9 @@ static int name_unique(unsigned int irq, struct irqaction *new_action)
165void register_handler_proc(unsigned int irq, struct irqaction *action) 165void register_handler_proc(unsigned int irq, struct irqaction *action)
166{ 166{
167 char name [MAX_NAMELEN]; 167 char name [MAX_NAMELEN];
168 struct irq_desc *desc = irq_to_desc(irq);
168 169
169 if (!irq_desc[irq].dir || action->dir || !action->name || 170 if (!desc->dir || action->dir || !action->name ||
170 !name_unique(irq, action)) 171 !name_unique(irq, action))
171 return; 172 return;
172 173
@@ -174,36 +175,34 @@ void register_handler_proc(unsigned int irq, struct irqaction *action)
174 snprintf(name, MAX_NAMELEN, "%s", action->name); 175 snprintf(name, MAX_NAMELEN, "%s", action->name);
175 176
176 /* create /proc/irq/1234/handler/ */ 177 /* create /proc/irq/1234/handler/ */
177 action->dir = proc_mkdir(name, irq_desc[irq].dir); 178 action->dir = proc_mkdir(name, desc->dir);
178} 179}
179 180
180#undef MAX_NAMELEN 181#undef MAX_NAMELEN
181 182
182#define MAX_NAMELEN 10 183#define MAX_NAMELEN 10
183 184
184void register_irq_proc(unsigned int irq) 185void register_irq_proc(unsigned int irq, struct irq_desc *desc)
185{ 186{
186 char name [MAX_NAMELEN]; 187 char name [MAX_NAMELEN];
187 struct proc_dir_entry *entry; 188 struct proc_dir_entry *entry;
188 189
189 if (!root_irq_dir || 190 if (!root_irq_dir || (desc->chip == &no_irq_chip) || desc->dir)
190 (irq_desc[irq].chip == &no_irq_chip) ||
191 irq_desc[irq].dir)
192 return; 191 return;
193 192
194 memset(name, 0, MAX_NAMELEN); 193 memset(name, 0, MAX_NAMELEN);
195 sprintf(name, "%d", irq); 194 sprintf(name, "%d", irq);
196 195
197 /* create /proc/irq/1234 */ 196 /* create /proc/irq/1234 */
198 irq_desc[irq].dir = proc_mkdir(name, root_irq_dir); 197 desc->dir = proc_mkdir(name, root_irq_dir);
199 198
200#ifdef CONFIG_SMP 199#ifdef CONFIG_SMP
201 /* create /proc/irq/<irq>/smp_affinity */ 200 /* create /proc/irq/<irq>/smp_affinity */
202 proc_create_data("smp_affinity", 0600, irq_desc[irq].dir, 201 proc_create_data("smp_affinity", 0600, desc->dir,
203 &irq_affinity_proc_fops, (void *)(long)irq); 202 &irq_affinity_proc_fops, (void *)(long)irq);
204#endif 203#endif
205 204
206 entry = create_proc_entry("spurious", 0444, irq_desc[irq].dir); 205 entry = create_proc_entry("spurious", 0444, desc->dir);
207 if (entry) { 206 if (entry) {
208 entry->data = (void *)(long)irq; 207 entry->data = (void *)(long)irq;
209 entry->read_proc = irq_spurious_read; 208 entry->read_proc = irq_spurious_read;
@@ -214,8 +213,11 @@ void register_irq_proc(unsigned int irq)
214 213
215void unregister_handler_proc(unsigned int irq, struct irqaction *action) 214void unregister_handler_proc(unsigned int irq, struct irqaction *action)
216{ 215{
217 if (action->dir) 216 if (action->dir) {
218 remove_proc_entry(action->dir->name, irq_desc[irq].dir); 217 struct irq_desc *desc = irq_to_desc(irq);
218
219 remove_proc_entry(action->dir->name, desc->dir);
220 }
219} 221}
220 222
221void register_default_affinity_proc(void) 223void register_default_affinity_proc(void)
@@ -228,7 +230,8 @@ void register_default_affinity_proc(void)
228 230
229void init_irq_proc(void) 231void init_irq_proc(void)
230{ 232{
231 int i; 233 unsigned int irq;
234 struct irq_desc *desc;
232 235
233 /* create /proc/irq */ 236 /* create /proc/irq */
234 root_irq_dir = proc_mkdir("irq", NULL); 237 root_irq_dir = proc_mkdir("irq", NULL);
@@ -240,7 +243,7 @@ void init_irq_proc(void)
240 /* 243 /*
241 * Create entries for all existing IRQs. 244 * Create entries for all existing IRQs.
242 */ 245 */
243 for (i = 0; i < NR_IRQS; i++) 246 for_each_irq_desc(irq, desc)
244 register_irq_proc(i); 247 register_irq_proc(irq, desc);
245} 248}
246 249
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index a8046791ba2d..89c7117acf2b 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -33,10 +33,10 @@ static void resend_irqs(unsigned long arg)
33 struct irq_desc *desc; 33 struct irq_desc *desc;
34 int irq; 34 int irq;
35 35
36 while (!bitmap_empty(irqs_resend, NR_IRQS)) { 36 while (!bitmap_empty(irqs_resend, nr_irqs)) {
37 irq = find_first_bit(irqs_resend, NR_IRQS); 37 irq = find_first_bit(irqs_resend, nr_irqs);
38 clear_bit(irq, irqs_resend); 38 clear_bit(irq, irqs_resend);
39 desc = irq_desc + irq; 39 desc = irq_to_desc(irq);
40 local_irq_disable(); 40 local_irq_disable();
41 desc->handle_irq(irq, desc); 41 desc->handle_irq(irq, desc);
42 local_irq_enable(); 42 local_irq_enable();
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index c66d3f10e853..dd364c11e56e 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -12,83 +12,122 @@
12#include <linux/kallsyms.h> 12#include <linux/kallsyms.h>
13#include <linux/interrupt.h> 13#include <linux/interrupt.h>
14#include <linux/moduleparam.h> 14#include <linux/moduleparam.h>
15#include <linux/timer.h>
15 16
16static int irqfixup __read_mostly; 17static int irqfixup __read_mostly;
17 18
19#define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10)
20static void poll_spurious_irqs(unsigned long dummy);
21static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs, 0, 0);
22
18/* 23/*
19 * Recovery handler for misrouted interrupts. 24 * Recovery handler for misrouted interrupts.
20 */ 25 */
21static int misrouted_irq(int irq) 26static int try_one_irq(int irq, struct irq_desc *desc)
22{ 27{
23 int i; 28 struct irqaction *action;
24 int ok = 0; 29 int ok = 0, work = 0;
25 int work = 0; /* Did we do work for a real IRQ */
26
27 for (i = 1; i < NR_IRQS; i++) {
28 struct irq_desc *desc = irq_desc + i;
29 struct irqaction *action;
30
31 if (i == irq) /* Already tried */
32 continue;
33 30
34 spin_lock(&desc->lock); 31 spin_lock(&desc->lock);
35 /* Already running on another processor */ 32 /* Already running on another processor */
36 if (desc->status & IRQ_INPROGRESS) { 33 if (desc->status & IRQ_INPROGRESS) {
37 /* 34 /*
38 * Already running: If it is shared get the other 35 * Already running: If it is shared get the other
39 * CPU to go looking for our mystery interrupt too 36 * CPU to go looking for our mystery interrupt too
40 */ 37 */
41 if (desc->action && (desc->action->flags & IRQF_SHARED)) 38 if (desc->action && (desc->action->flags & IRQF_SHARED))
42 desc->status |= IRQ_PENDING; 39 desc->status |= IRQ_PENDING;
43 spin_unlock(&desc->lock);
44 continue;
45 }
46 /* Honour the normal IRQ locking */
47 desc->status |= IRQ_INPROGRESS;
48 action = desc->action;
49 spin_unlock(&desc->lock); 40 spin_unlock(&desc->lock);
41 return ok;
42 }
43 /* Honour the normal IRQ locking */
44 desc->status |= IRQ_INPROGRESS;
45 action = desc->action;
46 spin_unlock(&desc->lock);
50 47
51 while (action) { 48 while (action) {
52 /* Only shared IRQ handlers are safe to call */ 49 /* Only shared IRQ handlers are safe to call */
53 if (action->flags & IRQF_SHARED) { 50 if (action->flags & IRQF_SHARED) {
54 if (action->handler(i, action->dev_id) == 51 if (action->handler(irq, action->dev_id) ==
55 IRQ_HANDLED) 52 IRQ_HANDLED)
56 ok = 1; 53 ok = 1;
57 }
58 action = action->next;
59 } 54 }
60 local_irq_disable(); 55 action = action->next;
61 /* Now clean up the flags */ 56 }
62 spin_lock(&desc->lock); 57 local_irq_disable();
63 action = desc->action; 58 /* Now clean up the flags */
59 spin_lock(&desc->lock);
60 action = desc->action;
64 61
62 /*
63 * While we were looking for a fixup someone queued a real
64 * IRQ clashing with our walk:
65 */
66 while ((desc->status & IRQ_PENDING) && action) {
65 /* 67 /*
66 * While we were looking for a fixup someone queued a real 68 * Perform real IRQ processing for the IRQ we deferred
67 * IRQ clashing with our walk:
68 */
69 while ((desc->status & IRQ_PENDING) && action) {
70 /*
71 * Perform real IRQ processing for the IRQ we deferred
72 */
73 work = 1;
74 spin_unlock(&desc->lock);
75 handle_IRQ_event(i, action);
76 spin_lock(&desc->lock);
77 desc->status &= ~IRQ_PENDING;
78 }
79 desc->status &= ~IRQ_INPROGRESS;
80 /*
81 * If we did actual work for the real IRQ line we must let the
82 * IRQ controller clean up too
83 */ 69 */
84 if (work && desc->chip && desc->chip->end) 70 work = 1;
85 desc->chip->end(i);
86 spin_unlock(&desc->lock); 71 spin_unlock(&desc->lock);
72 handle_IRQ_event(irq, action);
73 spin_lock(&desc->lock);
74 desc->status &= ~IRQ_PENDING;
75 }
76 desc->status &= ~IRQ_INPROGRESS;
77 /*
78 * If we did actual work for the real IRQ line we must let the
79 * IRQ controller clean up too
80 */
81 if (work && desc->chip && desc->chip->end)
82 desc->chip->end(irq);
83 spin_unlock(&desc->lock);
84
85 return ok;
86}
87
88static int misrouted_irq(int irq)
89{
90 struct irq_desc *desc;
91 int i, ok = 0;
92
93 for_each_irq_desc(i, desc) {
94 if (!i)
95 continue;
96
97 if (i == irq) /* Already tried */
98 continue;
99
100 if (try_one_irq(i, desc))
101 ok = 1;
87 } 102 }
88 /* So the caller can adjust the irq error counts */ 103 /* So the caller can adjust the irq error counts */
89 return ok; 104 return ok;
90} 105}
91 106
107static void poll_spurious_irqs(unsigned long dummy)
108{
109 struct irq_desc *desc;
110 int i;
111
112 for_each_irq_desc(i, desc) {
113 unsigned int status;
114
115 if (!i)
116 continue;
117
118 /* Racy but it doesn't matter */
119 status = desc->status;
120 barrier();
121 if (!(status & IRQ_SPURIOUS_DISABLED))
122 continue;
123
124 try_one_irq(i, desc);
125 }
126
127 mod_timer(&poll_spurious_irq_timer,
128 jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
129}
130
92/* 131/*
93 * If 99,900 of the previous 100,000 interrupts have not been handled 132 * If 99,900 of the previous 100,000 interrupts have not been handled
94 * then assume that the IRQ is stuck in some manner. Drop a diagnostic 133 * then assume that the IRQ is stuck in some manner. Drop a diagnostic
@@ -137,7 +176,9 @@ report_bad_irq(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret)
137 } 176 }
138} 177}
139 178
140static inline int try_misrouted_irq(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret) 179static inline int
180try_misrouted_irq(unsigned int irq, struct irq_desc *desc,
181 irqreturn_t action_ret)
141{ 182{
142 struct irqaction *action; 183 struct irqaction *action;
143 184
@@ -212,6 +253,9 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
212 desc->status |= IRQ_DISABLED | IRQ_SPURIOUS_DISABLED; 253 desc->status |= IRQ_DISABLED | IRQ_SPURIOUS_DISABLED;
213 desc->depth++; 254 desc->depth++;
214 desc->chip->disable(irq); 255 desc->chip->disable(irq);
256
257 mod_timer(&poll_spurious_irq_timer,
258 jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
215 } 259 }
216 desc->irqs_unhandled = 0; 260 desc->irqs_unhandled = 0;
217} 261}
@@ -241,7 +285,7 @@ static int __init irqfixup_setup(char *str)
241 285
242__setup("irqfixup", irqfixup_setup); 286__setup("irqfixup", irqfixup_setup);
243module_param(irqfixup, int, 0644); 287module_param(irqfixup, int, 0644);
244MODULE_PARM_DESC("irqfixup", "0: No fixup, 1: irqfixup mode 2: irqpoll mode"); 288MODULE_PARM_DESC("irqfixup", "0: No fixup, 1: irqfixup mode, 2: irqpoll mode");
245 289
246static int __init irqpoll_setup(char *str) 290static int __init irqpoll_setup(char *str)
247{ 291{
diff --git a/kernel/itimer.c b/kernel/itimer.c
index ab982747d9bd..db7c358b9a02 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -55,17 +55,15 @@ int do_getitimer(int which, struct itimerval *value)
55 spin_unlock_irq(&tsk->sighand->siglock); 55 spin_unlock_irq(&tsk->sighand->siglock);
56 break; 56 break;
57 case ITIMER_VIRTUAL: 57 case ITIMER_VIRTUAL:
58 read_lock(&tasklist_lock);
59 spin_lock_irq(&tsk->sighand->siglock); 58 spin_lock_irq(&tsk->sighand->siglock);
60 cval = tsk->signal->it_virt_expires; 59 cval = tsk->signal->it_virt_expires;
61 cinterval = tsk->signal->it_virt_incr; 60 cinterval = tsk->signal->it_virt_incr;
62 if (!cputime_eq(cval, cputime_zero)) { 61 if (!cputime_eq(cval, cputime_zero)) {
63 struct task_struct *t = tsk; 62 struct task_cputime cputime;
64 cputime_t utime = tsk->signal->utime; 63 cputime_t utime;
65 do { 64
66 utime = cputime_add(utime, t->utime); 65 thread_group_cputime(tsk, &cputime);
67 t = next_thread(t); 66 utime = cputime.utime;
68 } while (t != tsk);
69 if (cputime_le(cval, utime)) { /* about to fire */ 67 if (cputime_le(cval, utime)) { /* about to fire */
70 cval = jiffies_to_cputime(1); 68 cval = jiffies_to_cputime(1);
71 } else { 69 } else {
@@ -73,25 +71,19 @@ int do_getitimer(int which, struct itimerval *value)
73 } 71 }
74 } 72 }
75 spin_unlock_irq(&tsk->sighand->siglock); 73 spin_unlock_irq(&tsk->sighand->siglock);
76 read_unlock(&tasklist_lock);
77 cputime_to_timeval(cval, &value->it_value); 74 cputime_to_timeval(cval, &value->it_value);
78 cputime_to_timeval(cinterval, &value->it_interval); 75 cputime_to_timeval(cinterval, &value->it_interval);
79 break; 76 break;
80 case ITIMER_PROF: 77 case ITIMER_PROF:
81 read_lock(&tasklist_lock);
82 spin_lock_irq(&tsk->sighand->siglock); 78 spin_lock_irq(&tsk->sighand->siglock);
83 cval = tsk->signal->it_prof_expires; 79 cval = tsk->signal->it_prof_expires;
84 cinterval = tsk->signal->it_prof_incr; 80 cinterval = tsk->signal->it_prof_incr;
85 if (!cputime_eq(cval, cputime_zero)) { 81 if (!cputime_eq(cval, cputime_zero)) {
86 struct task_struct *t = tsk; 82 struct task_cputime times;
87 cputime_t ptime = cputime_add(tsk->signal->utime, 83 cputime_t ptime;
88 tsk->signal->stime); 84
89 do { 85 thread_group_cputime(tsk, &times);
90 ptime = cputime_add(ptime, 86 ptime = cputime_add(times.utime, times.stime);
91 cputime_add(t->utime,
92 t->stime));
93 t = next_thread(t);
94 } while (t != tsk);
95 if (cputime_le(cval, ptime)) { /* about to fire */ 87 if (cputime_le(cval, ptime)) { /* about to fire */
96 cval = jiffies_to_cputime(1); 88 cval = jiffies_to_cputime(1);
97 } else { 89 } else {
@@ -99,7 +91,6 @@ int do_getitimer(int which, struct itimerval *value)
99 } 91 }
100 } 92 }
101 spin_unlock_irq(&tsk->sighand->siglock); 93 spin_unlock_irq(&tsk->sighand->siglock);
102 read_unlock(&tasklist_lock);
103 cputime_to_timeval(cval, &value->it_value); 94 cputime_to_timeval(cval, &value->it_value);
104 cputime_to_timeval(cinterval, &value->it_interval); 95 cputime_to_timeval(cinterval, &value->it_interval);
105 break; 96 break;
@@ -185,7 +176,6 @@ again:
185 case ITIMER_VIRTUAL: 176 case ITIMER_VIRTUAL:
186 nval = timeval_to_cputime(&value->it_value); 177 nval = timeval_to_cputime(&value->it_value);
187 ninterval = timeval_to_cputime(&value->it_interval); 178 ninterval = timeval_to_cputime(&value->it_interval);
188 read_lock(&tasklist_lock);
189 spin_lock_irq(&tsk->sighand->siglock); 179 spin_lock_irq(&tsk->sighand->siglock);
190 cval = tsk->signal->it_virt_expires; 180 cval = tsk->signal->it_virt_expires;
191 cinterval = tsk->signal->it_virt_incr; 181 cinterval = tsk->signal->it_virt_incr;
@@ -200,7 +190,6 @@ again:
200 tsk->signal->it_virt_expires = nval; 190 tsk->signal->it_virt_expires = nval;
201 tsk->signal->it_virt_incr = ninterval; 191 tsk->signal->it_virt_incr = ninterval;
202 spin_unlock_irq(&tsk->sighand->siglock); 192 spin_unlock_irq(&tsk->sighand->siglock);
203 read_unlock(&tasklist_lock);
204 if (ovalue) { 193 if (ovalue) {
205 cputime_to_timeval(cval, &ovalue->it_value); 194 cputime_to_timeval(cval, &ovalue->it_value);
206 cputime_to_timeval(cinterval, &ovalue->it_interval); 195 cputime_to_timeval(cinterval, &ovalue->it_interval);
@@ -209,7 +198,6 @@ again:
209 case ITIMER_PROF: 198 case ITIMER_PROF:
210 nval = timeval_to_cputime(&value->it_value); 199 nval = timeval_to_cputime(&value->it_value);
211 ninterval = timeval_to_cputime(&value->it_interval); 200 ninterval = timeval_to_cputime(&value->it_interval);
212 read_lock(&tasklist_lock);
213 spin_lock_irq(&tsk->sighand->siglock); 201 spin_lock_irq(&tsk->sighand->siglock);
214 cval = tsk->signal->it_prof_expires; 202 cval = tsk->signal->it_prof_expires;
215 cinterval = tsk->signal->it_prof_incr; 203 cinterval = tsk->signal->it_prof_incr;
@@ -224,7 +212,6 @@ again:
224 tsk->signal->it_prof_expires = nval; 212 tsk->signal->it_prof_expires = nval;
225 tsk->signal->it_prof_incr = ninterval; 213 tsk->signal->it_prof_incr = ninterval;
226 spin_unlock_irq(&tsk->sighand->siglock); 214 spin_unlock_irq(&tsk->sighand->siglock);
227 read_unlock(&tasklist_lock);
228 if (ovalue) { 215 if (ovalue) {
229 cputime_to_timeval(cval, &ovalue->it_value); 216 cputime_to_timeval(cval, &ovalue->it_value);
230 cputime_to_timeval(cinterval, &ovalue->it_interval); 217 cputime_to_timeval(cinterval, &ovalue->it_interval);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 777ac458ac99..ac0fde7b54d0 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -30,6 +30,7 @@
30#include <linux/pm.h> 30#include <linux/pm.h>
31#include <linux/cpu.h> 31#include <linux/cpu.h>
32#include <linux/console.h> 32#include <linux/console.h>
33#include <linux/vmalloc.h>
33 34
34#include <asm/page.h> 35#include <asm/page.h>
35#include <asm/uaccess.h> 36#include <asm/uaccess.h>
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 14ec64fe175a..8e7a7ce3ed0a 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -13,6 +13,7 @@
13#include <linux/file.h> 13#include <linux/file.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/mutex.h> 15#include <linux/mutex.h>
16#include <trace/sched.h>
16 17
17#define KTHREAD_NICE_LEVEL (-5) 18#define KTHREAD_NICE_LEVEL (-5)
18 19
@@ -205,6 +206,8 @@ int kthread_stop(struct task_struct *k)
205 /* It could exit after stop_info.k set, but before wake_up_process. */ 206 /* It could exit after stop_info.k set, but before wake_up_process. */
206 get_task_struct(k); 207 get_task_struct(k);
207 208
209 trace_sched_kthread_stop(k);
210
208 /* Must init completion *before* thread sees kthread_stop_info.k */ 211 /* Must init completion *before* thread sees kthread_stop_info.k */
209 init_completion(&kthread_stop_info.done); 212 init_completion(&kthread_stop_info.done);
210 smp_wmb(); 213 smp_wmb();
@@ -220,6 +223,8 @@ int kthread_stop(struct task_struct *k)
220 ret = kthread_stop_info.err; 223 ret = kthread_stop_info.err;
221 mutex_unlock(&kthread_stop_lock); 224 mutex_unlock(&kthread_stop_lock);
222 225
226 trace_sched_kthread_stop_ret(ret);
227
223 return ret; 228 return ret;
224} 229}
225EXPORT_SYMBOL(kthread_stop); 230EXPORT_SYMBOL(kthread_stop);
diff --git a/kernel/marker.c b/kernel/marker.c
index 7d1faecd7a51..e9c6b2bc9400 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -62,7 +62,7 @@ struct marker_entry {
62 int refcount; /* Number of times armed. 0 if disarmed. */ 62 int refcount; /* Number of times armed. 0 if disarmed. */
63 struct rcu_head rcu; 63 struct rcu_head rcu;
64 void *oldptr; 64 void *oldptr;
65 unsigned char rcu_pending:1; 65 int rcu_pending;
66 unsigned char ptype:1; 66 unsigned char ptype:1;
67 char name[0]; /* Contains name'\0'format'\0' */ 67 char name[0]; /* Contains name'\0'format'\0' */
68}; 68};
@@ -103,11 +103,11 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
103 char ptype; 103 char ptype;
104 104
105 /* 105 /*
106 * preempt_disable does two things : disabling preemption to make sure 106 * rcu_read_lock_sched does two things : disabling preemption to make
107 * the teardown of the callbacks can be done correctly when they are in 107 * sure the teardown of the callbacks can be done correctly when they
108 * modules and they insure RCU read coherency. 108 * are in modules and they insure RCU read coherency.
109 */ 109 */
110 preempt_disable(); 110 rcu_read_lock_sched();
111 ptype = mdata->ptype; 111 ptype = mdata->ptype;
112 if (likely(!ptype)) { 112 if (likely(!ptype)) {
113 marker_probe_func *func; 113 marker_probe_func *func;
@@ -145,7 +145,7 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
145 va_end(args); 145 va_end(args);
146 } 146 }
147 } 147 }
148 preempt_enable(); 148 rcu_read_unlock_sched();
149} 149}
150EXPORT_SYMBOL_GPL(marker_probe_cb); 150EXPORT_SYMBOL_GPL(marker_probe_cb);
151 151
@@ -162,7 +162,7 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
162 va_list args; /* not initialized */ 162 va_list args; /* not initialized */
163 char ptype; 163 char ptype;
164 164
165 preempt_disable(); 165 rcu_read_lock_sched();
166 ptype = mdata->ptype; 166 ptype = mdata->ptype;
167 if (likely(!ptype)) { 167 if (likely(!ptype)) {
168 marker_probe_func *func; 168 marker_probe_func *func;
@@ -195,7 +195,7 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
195 multi[i].func(multi[i].probe_private, call_private, 195 multi[i].func(multi[i].probe_private, call_private,
196 mdata->format, &args); 196 mdata->format, &args);
197 } 197 }
198 preempt_enable(); 198 rcu_read_unlock_sched();
199} 199}
200EXPORT_SYMBOL_GPL(marker_probe_cb_noarg); 200EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
201 201
@@ -560,7 +560,7 @@ static int set_marker(struct marker_entry **entry, struct marker *elem,
560 * Disable a marker and its probe callback. 560 * Disable a marker and its probe callback.
561 * Note: only waiting an RCU period after setting elem->call to the empty 561 * Note: only waiting an RCU period after setting elem->call to the empty
562 * function insures that the original callback is not used anymore. This insured 562 * function insures that the original callback is not used anymore. This insured
563 * by preempt_disable around the call site. 563 * by rcu_read_lock_sched around the call site.
564 */ 564 */
565static void disable_marker(struct marker *elem) 565static void disable_marker(struct marker *elem)
566{ 566{
@@ -653,11 +653,17 @@ int marker_probe_register(const char *name, const char *format,
653 entry = get_marker(name); 653 entry = get_marker(name);
654 if (!entry) { 654 if (!entry) {
655 entry = add_marker(name, format); 655 entry = add_marker(name, format);
656 if (IS_ERR(entry)) { 656 if (IS_ERR(entry))
657 ret = PTR_ERR(entry); 657 ret = PTR_ERR(entry);
658 goto end; 658 } else if (format) {
659 } 659 if (!entry->format)
660 ret = marker_set_format(&entry, format);
661 else if (strcmp(entry->format, format))
662 ret = -EPERM;
660 } 663 }
664 if (ret)
665 goto end;
666
661 /* 667 /*
662 * If we detect that a call_rcu is pending for this marker, 668 * If we detect that a call_rcu is pending for this marker,
663 * make sure it's executed now. 669 * make sure it's executed now.
@@ -674,6 +680,8 @@ int marker_probe_register(const char *name, const char *format,
674 mutex_lock(&markers_mutex); 680 mutex_lock(&markers_mutex);
675 entry = get_marker(name); 681 entry = get_marker(name);
676 WARN_ON(!entry); 682 WARN_ON(!entry);
683 if (entry->rcu_pending)
684 rcu_barrier_sched();
677 entry->oldptr = old; 685 entry->oldptr = old;
678 entry->rcu_pending = 1; 686 entry->rcu_pending = 1;
679 /* write rcu_pending before calling the RCU callback */ 687 /* write rcu_pending before calling the RCU callback */
@@ -717,6 +725,8 @@ int marker_probe_unregister(const char *name,
717 entry = get_marker(name); 725 entry = get_marker(name);
718 if (!entry) 726 if (!entry)
719 goto end; 727 goto end;
728 if (entry->rcu_pending)
729 rcu_barrier_sched();
720 entry->oldptr = old; 730 entry->oldptr = old;
721 entry->rcu_pending = 1; 731 entry->rcu_pending = 1;
722 /* write rcu_pending before calling the RCU callback */ 732 /* write rcu_pending before calling the RCU callback */
@@ -795,6 +805,8 @@ int marker_probe_unregister_private_data(marker_probe_func *probe,
795 mutex_lock(&markers_mutex); 805 mutex_lock(&markers_mutex);
796 entry = get_marker_from_private_data(probe, probe_private); 806 entry = get_marker_from_private_data(probe, probe_private);
797 WARN_ON(!entry); 807 WARN_ON(!entry);
808 if (entry->rcu_pending)
809 rcu_barrier_sched();
798 entry->oldptr = old; 810 entry->oldptr = old;
799 entry->rcu_pending = 1; 811 entry->rcu_pending = 1;
800 /* write rcu_pending before calling the RCU callback */ 812 /* write rcu_pending before calling the RCU callback */
diff --git a/kernel/module.c b/kernel/module.c
index 25bc9ac9e226..0d8d21ee792c 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -46,6 +46,8 @@
46#include <asm/cacheflush.h> 46#include <asm/cacheflush.h>
47#include <linux/license.h> 47#include <linux/license.h>
48#include <asm/sections.h> 48#include <asm/sections.h>
49#include <linux/tracepoint.h>
50#include <linux/ftrace.h>
49 51
50#if 0 52#if 0
51#define DEBUGP printk 53#define DEBUGP printk
@@ -1430,6 +1432,9 @@ static void free_module(struct module *mod)
1430 /* Module unload stuff */ 1432 /* Module unload stuff */
1431 module_unload_free(mod); 1433 module_unload_free(mod);
1432 1434
1435 /* release any pointers to mcount in this module */
1436 ftrace_release(mod->module_core, mod->core_size);
1437
1433 /* This may be NULL, but that's OK */ 1438 /* This may be NULL, but that's OK */
1434 module_free(mod, mod->module_init); 1439 module_free(mod, mod->module_init);
1435 kfree(mod->args); 1440 kfree(mod->args);
@@ -1861,9 +1866,13 @@ static noinline struct module *load_module(void __user *umod,
1861 unsigned int markersindex; 1866 unsigned int markersindex;
1862 unsigned int markersstringsindex; 1867 unsigned int markersstringsindex;
1863 unsigned int verboseindex; 1868 unsigned int verboseindex;
1869 unsigned int tracepointsindex;
1870 unsigned int tracepointsstringsindex;
1871 unsigned int mcountindex;
1864 struct module *mod; 1872 struct module *mod;
1865 long err = 0; 1873 long err = 0;
1866 void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ 1874 void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
1875 void *mseg;
1867 struct exception_table_entry *extable; 1876 struct exception_table_entry *extable;
1868 mm_segment_t old_fs; 1877 mm_segment_t old_fs;
1869 1878
@@ -2156,6 +2165,12 @@ static noinline struct module *load_module(void __user *umod,
2156 markersstringsindex = find_sec(hdr, sechdrs, secstrings, 2165 markersstringsindex = find_sec(hdr, sechdrs, secstrings,
2157 "__markers_strings"); 2166 "__markers_strings");
2158 verboseindex = find_sec(hdr, sechdrs, secstrings, "__verbose"); 2167 verboseindex = find_sec(hdr, sechdrs, secstrings, "__verbose");
2168 tracepointsindex = find_sec(hdr, sechdrs, secstrings, "__tracepoints");
2169 tracepointsstringsindex = find_sec(hdr, sechdrs, secstrings,
2170 "__tracepoints_strings");
2171
2172 mcountindex = find_sec(hdr, sechdrs, secstrings,
2173 "__mcount_loc");
2159 2174
2160 /* Now do relocations. */ 2175 /* Now do relocations. */
2161 for (i = 1; i < hdr->e_shnum; i++) { 2176 for (i = 1; i < hdr->e_shnum; i++) {
@@ -2183,6 +2198,12 @@ static noinline struct module *load_module(void __user *umod,
2183 mod->num_markers = 2198 mod->num_markers =
2184 sechdrs[markersindex].sh_size / sizeof(*mod->markers); 2199 sechdrs[markersindex].sh_size / sizeof(*mod->markers);
2185#endif 2200#endif
2201#ifdef CONFIG_TRACEPOINTS
2202 mod->tracepoints = (void *)sechdrs[tracepointsindex].sh_addr;
2203 mod->num_tracepoints =
2204 sechdrs[tracepointsindex].sh_size / sizeof(*mod->tracepoints);
2205#endif
2206
2186 2207
2187 /* Find duplicate symbols */ 2208 /* Find duplicate symbols */
2188 err = verify_export_symbols(mod); 2209 err = verify_export_symbols(mod);
@@ -2201,12 +2222,22 @@ static noinline struct module *load_module(void __user *umod,
2201 2222
2202 add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); 2223 add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
2203 2224
2225 if (!mod->taints) {
2204#ifdef CONFIG_MARKERS 2226#ifdef CONFIG_MARKERS
2205 if (!mod->taints)
2206 marker_update_probe_range(mod->markers, 2227 marker_update_probe_range(mod->markers,
2207 mod->markers + mod->num_markers); 2228 mod->markers + mod->num_markers);
2208#endif 2229#endif
2209 dynamic_printk_setup(sechdrs, verboseindex); 2230 dynamic_printk_setup(sechdrs, verboseindex);
2231#ifdef CONFIG_TRACEPOINTS
2232 tracepoint_update_probe_range(mod->tracepoints,
2233 mod->tracepoints + mod->num_tracepoints);
2234#endif
2235 }
2236
2237 /* sechdrs[0].sh_size is always zero */
2238 mseg = (void *)sechdrs[mcountindex].sh_addr;
2239 ftrace_init_module(mseg, mseg + sechdrs[mcountindex].sh_size);
2240
2210 err = module_finalize(hdr, sechdrs, mod); 2241 err = module_finalize(hdr, sechdrs, mod);
2211 if (err < 0) 2242 if (err < 0)
2212 goto cleanup; 2243 goto cleanup;
@@ -2276,6 +2307,7 @@ static noinline struct module *load_module(void __user *umod,
2276 cleanup: 2307 cleanup:
2277 kobject_del(&mod->mkobj.kobj); 2308 kobject_del(&mod->mkobj.kobj);
2278 kobject_put(&mod->mkobj.kobj); 2309 kobject_put(&mod->mkobj.kobj);
2310 ftrace_release(mod->module_core, mod->core_size);
2279 free_unload: 2311 free_unload:
2280 module_unload_free(mod); 2312 module_unload_free(mod);
2281 module_free(mod, mod->module_init); 2313 module_free(mod, mod->module_init);
@@ -2759,3 +2791,50 @@ void module_update_markers(void)
2759 mutex_unlock(&module_mutex); 2791 mutex_unlock(&module_mutex);
2760} 2792}
2761#endif 2793#endif
2794
2795#ifdef CONFIG_TRACEPOINTS
2796void module_update_tracepoints(void)
2797{
2798 struct module *mod;
2799
2800 mutex_lock(&module_mutex);
2801 list_for_each_entry(mod, &modules, list)
2802 if (!mod->taints)
2803 tracepoint_update_probe_range(mod->tracepoints,
2804 mod->tracepoints + mod->num_tracepoints);
2805 mutex_unlock(&module_mutex);
2806}
2807
2808/*
2809 * Returns 0 if current not found.
2810 * Returns 1 if current found.
2811 */
2812int module_get_iter_tracepoints(struct tracepoint_iter *iter)
2813{
2814 struct module *iter_mod;
2815 int found = 0;
2816
2817 mutex_lock(&module_mutex);
2818 list_for_each_entry(iter_mod, &modules, list) {
2819 if (!iter_mod->taints) {
2820 /*
2821 * Sorted module list
2822 */
2823 if (iter_mod < iter->module)
2824 continue;
2825 else if (iter_mod > iter->module)
2826 iter->tracepoint = NULL;
2827 found = tracepoint_get_iter_range(&iter->tracepoint,
2828 iter_mod->tracepoints,
2829 iter_mod->tracepoints
2830 + iter_mod->num_tracepoints);
2831 if (found) {
2832 iter->module = iter_mod;
2833 break;
2834 }
2835 }
2836 }
2837 mutex_unlock(&module_mutex);
2838 return found;
2839}
2840#endif
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 823be11584ef..4282c0a40a57 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -550,7 +550,7 @@ EXPORT_SYMBOL(unregister_reboot_notifier);
550 550
551static ATOMIC_NOTIFIER_HEAD(die_chain); 551static ATOMIC_NOTIFIER_HEAD(die_chain);
552 552
553int notify_die(enum die_val val, const char *str, 553int notrace notify_die(enum die_val val, const char *str,
554 struct pt_regs *regs, long err, int trap, int sig) 554 struct pt_regs *regs, long err, int trap, int sig)
555{ 555{
556 struct die_args args = { 556 struct die_args args = {
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index c42a03aef36f..153dcb2639c3 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -7,6 +7,93 @@
7#include <linux/errno.h> 7#include <linux/errno.h>
8#include <linux/math64.h> 8#include <linux/math64.h>
9#include <asm/uaccess.h> 9#include <asm/uaccess.h>
10#include <linux/kernel_stat.h>
11
12/*
13 * Allocate the thread_group_cputime structure appropriately and fill in the
14 * current values of the fields. Called from copy_signal() via
15 * thread_group_cputime_clone_thread() when adding a second or subsequent
16 * thread to a thread group. Assumes interrupts are enabled when called.
17 */
18int thread_group_cputime_alloc(struct task_struct *tsk)
19{
20 struct signal_struct *sig = tsk->signal;
21 struct task_cputime *cputime;
22
23 /*
24 * If we have multiple threads and we don't already have a
25 * per-CPU task_cputime struct (checked in the caller), allocate
26 * one and fill it in with the times accumulated so far. We may
27 * race with another thread so recheck after we pick up the sighand
28 * lock.
29 */
30 cputime = alloc_percpu(struct task_cputime);
31 if (cputime == NULL)
32 return -ENOMEM;
33 spin_lock_irq(&tsk->sighand->siglock);
34 if (sig->cputime.totals) {
35 spin_unlock_irq(&tsk->sighand->siglock);
36 free_percpu(cputime);
37 return 0;
38 }
39 sig->cputime.totals = cputime;
40 cputime = per_cpu_ptr(sig->cputime.totals, smp_processor_id());
41 cputime->utime = tsk->utime;
42 cputime->stime = tsk->stime;
43 cputime->sum_exec_runtime = tsk->se.sum_exec_runtime;
44 spin_unlock_irq(&tsk->sighand->siglock);
45 return 0;
46}
47
48/**
49 * thread_group_cputime - Sum the thread group time fields across all CPUs.
50 *
51 * @tsk: The task we use to identify the thread group.
52 * @times: task_cputime structure in which we return the summed fields.
53 *
54 * Walk the list of CPUs to sum the per-CPU time fields in the thread group
55 * time structure.
56 */
57void thread_group_cputime(
58 struct task_struct *tsk,
59 struct task_cputime *times)
60{
61 struct signal_struct *sig;
62 int i;
63 struct task_cputime *tot;
64
65 sig = tsk->signal;
66 if (unlikely(!sig) || !sig->cputime.totals) {
67 times->utime = tsk->utime;
68 times->stime = tsk->stime;
69 times->sum_exec_runtime = tsk->se.sum_exec_runtime;
70 return;
71 }
72 times->stime = times->utime = cputime_zero;
73 times->sum_exec_runtime = 0;
74 for_each_possible_cpu(i) {
75 tot = per_cpu_ptr(tsk->signal->cputime.totals, i);
76 times->utime = cputime_add(times->utime, tot->utime);
77 times->stime = cputime_add(times->stime, tot->stime);
78 times->sum_exec_runtime += tot->sum_exec_runtime;
79 }
80}
81
82/*
83 * Called after updating RLIMIT_CPU to set timer expiration if necessary.
84 */
85void update_rlimit_cpu(unsigned long rlim_new)
86{
87 cputime_t cputime;
88
89 cputime = secs_to_cputime(rlim_new);
90 if (cputime_eq(current->signal->it_prof_expires, cputime_zero) ||
91 cputime_lt(current->signal->it_prof_expires, cputime)) {
92 spin_lock_irq(&current->sighand->siglock);
93 set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
94 spin_unlock_irq(&current->sighand->siglock);
95 }
96}
10 97
11static int check_clock(const clockid_t which_clock) 98static int check_clock(const clockid_t which_clock)
12{ 99{
@@ -158,10 +245,6 @@ static inline cputime_t virt_ticks(struct task_struct *p)
158{ 245{
159 return p->utime; 246 return p->utime;
160} 247}
161static inline unsigned long long sched_ns(struct task_struct *p)
162{
163 return task_sched_runtime(p);
164}
165 248
166int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) 249int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
167{ 250{
@@ -211,7 +294,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
211 cpu->cpu = virt_ticks(p); 294 cpu->cpu = virt_ticks(p);
212 break; 295 break;
213 case CPUCLOCK_SCHED: 296 case CPUCLOCK_SCHED:
214 cpu->sched = sched_ns(p); 297 cpu->sched = p->se.sum_exec_runtime + task_delta_exec(p);
215 break; 298 break;
216 } 299 }
217 return 0; 300 return 0;
@@ -220,59 +303,30 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
220/* 303/*
221 * Sample a process (thread group) clock for the given group_leader task. 304 * Sample a process (thread group) clock for the given group_leader task.
222 * Must be called with tasklist_lock held for reading. 305 * Must be called with tasklist_lock held for reading.
223 * Must be called with tasklist_lock held for reading, and p->sighand->siglock.
224 */ 306 */
225static int cpu_clock_sample_group_locked(unsigned int clock_idx, 307static int cpu_clock_sample_group(const clockid_t which_clock,
226 struct task_struct *p, 308 struct task_struct *p,
227 union cpu_time_count *cpu) 309 union cpu_time_count *cpu)
228{ 310{
229 struct task_struct *t = p; 311 struct task_cputime cputime;
230 switch (clock_idx) { 312
313 thread_group_cputime(p, &cputime);
314 switch (which_clock) {
231 default: 315 default:
232 return -EINVAL; 316 return -EINVAL;
233 case CPUCLOCK_PROF: 317 case CPUCLOCK_PROF:
234 cpu->cpu = cputime_add(p->signal->utime, p->signal->stime); 318 cpu->cpu = cputime_add(cputime.utime, cputime.stime);
235 do {
236 cpu->cpu = cputime_add(cpu->cpu, prof_ticks(t));
237 t = next_thread(t);
238 } while (t != p);
239 break; 319 break;
240 case CPUCLOCK_VIRT: 320 case CPUCLOCK_VIRT:
241 cpu->cpu = p->signal->utime; 321 cpu->cpu = cputime.utime;
242 do {
243 cpu->cpu = cputime_add(cpu->cpu, virt_ticks(t));
244 t = next_thread(t);
245 } while (t != p);
246 break; 322 break;
247 case CPUCLOCK_SCHED: 323 case CPUCLOCK_SCHED:
248 cpu->sched = p->signal->sum_sched_runtime; 324 cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
249 /* Add in each other live thread. */
250 while ((t = next_thread(t)) != p) {
251 cpu->sched += t->se.sum_exec_runtime;
252 }
253 cpu->sched += sched_ns(p);
254 break; 325 break;
255 } 326 }
256 return 0; 327 return 0;
257} 328}
258 329
259/*
260 * Sample a process (thread group) clock for the given group_leader task.
261 * Must be called with tasklist_lock held for reading.
262 */
263static int cpu_clock_sample_group(const clockid_t which_clock,
264 struct task_struct *p,
265 union cpu_time_count *cpu)
266{
267 int ret;
268 unsigned long flags;
269 spin_lock_irqsave(&p->sighand->siglock, flags);
270 ret = cpu_clock_sample_group_locked(CPUCLOCK_WHICH(which_clock), p,
271 cpu);
272 spin_unlock_irqrestore(&p->sighand->siglock, flags);
273 return ret;
274}
275
276 330
277int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) 331int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
278{ 332{
@@ -471,80 +525,11 @@ void posix_cpu_timers_exit(struct task_struct *tsk)
471} 525}
472void posix_cpu_timers_exit_group(struct task_struct *tsk) 526void posix_cpu_timers_exit_group(struct task_struct *tsk)
473{ 527{
474 cleanup_timers(tsk->signal->cpu_timers, 528 struct task_cputime cputime;
475 cputime_add(tsk->utime, tsk->signal->utime),
476 cputime_add(tsk->stime, tsk->signal->stime),
477 tsk->se.sum_exec_runtime + tsk->signal->sum_sched_runtime);
478}
479 529
480 530 thread_group_cputime(tsk, &cputime);
481/* 531 cleanup_timers(tsk->signal->cpu_timers,
482 * Set the expiry times of all the threads in the process so one of them 532 cputime.utime, cputime.stime, cputime.sum_exec_runtime);
483 * will go off before the process cumulative expiry total is reached.
484 */
485static void process_timer_rebalance(struct task_struct *p,
486 unsigned int clock_idx,
487 union cpu_time_count expires,
488 union cpu_time_count val)
489{
490 cputime_t ticks, left;
491 unsigned long long ns, nsleft;
492 struct task_struct *t = p;
493 unsigned int nthreads = atomic_read(&p->signal->live);
494
495 if (!nthreads)
496 return;
497
498 switch (clock_idx) {
499 default:
500 BUG();
501 break;
502 case CPUCLOCK_PROF:
503 left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
504 nthreads);
505 do {
506 if (likely(!(t->flags & PF_EXITING))) {
507 ticks = cputime_add(prof_ticks(t), left);
508 if (cputime_eq(t->it_prof_expires,
509 cputime_zero) ||
510 cputime_gt(t->it_prof_expires, ticks)) {
511 t->it_prof_expires = ticks;
512 }
513 }
514 t = next_thread(t);
515 } while (t != p);
516 break;
517 case CPUCLOCK_VIRT:
518 left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
519 nthreads);
520 do {
521 if (likely(!(t->flags & PF_EXITING))) {
522 ticks = cputime_add(virt_ticks(t), left);
523 if (cputime_eq(t->it_virt_expires,
524 cputime_zero) ||
525 cputime_gt(t->it_virt_expires, ticks)) {
526 t->it_virt_expires = ticks;
527 }
528 }
529 t = next_thread(t);
530 } while (t != p);
531 break;
532 case CPUCLOCK_SCHED:
533 nsleft = expires.sched - val.sched;
534 do_div(nsleft, nthreads);
535 nsleft = max_t(unsigned long long, nsleft, 1);
536 do {
537 if (likely(!(t->flags & PF_EXITING))) {
538 ns = t->se.sum_exec_runtime + nsleft;
539 if (t->it_sched_expires == 0 ||
540 t->it_sched_expires > ns) {
541 t->it_sched_expires = ns;
542 }
543 }
544 t = next_thread(t);
545 } while (t != p);
546 break;
547 }
548} 533}
549 534
550static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now) 535static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
@@ -608,29 +593,32 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
608 default: 593 default:
609 BUG(); 594 BUG();
610 case CPUCLOCK_PROF: 595 case CPUCLOCK_PROF:
611 if (cputime_eq(p->it_prof_expires, 596 if (cputime_eq(p->cputime_expires.prof_exp,
612 cputime_zero) || 597 cputime_zero) ||
613 cputime_gt(p->it_prof_expires, 598 cputime_gt(p->cputime_expires.prof_exp,
614 nt->expires.cpu)) 599 nt->expires.cpu))
615 p->it_prof_expires = nt->expires.cpu; 600 p->cputime_expires.prof_exp =
601 nt->expires.cpu;
616 break; 602 break;
617 case CPUCLOCK_VIRT: 603 case CPUCLOCK_VIRT:
618 if (cputime_eq(p->it_virt_expires, 604 if (cputime_eq(p->cputime_expires.virt_exp,
619 cputime_zero) || 605 cputime_zero) ||
620 cputime_gt(p->it_virt_expires, 606 cputime_gt(p->cputime_expires.virt_exp,
621 nt->expires.cpu)) 607 nt->expires.cpu))
622 p->it_virt_expires = nt->expires.cpu; 608 p->cputime_expires.virt_exp =
609 nt->expires.cpu;
623 break; 610 break;
624 case CPUCLOCK_SCHED: 611 case CPUCLOCK_SCHED:
625 if (p->it_sched_expires == 0 || 612 if (p->cputime_expires.sched_exp == 0 ||
626 p->it_sched_expires > nt->expires.sched) 613 p->cputime_expires.sched_exp >
627 p->it_sched_expires = nt->expires.sched; 614 nt->expires.sched)
615 p->cputime_expires.sched_exp =
616 nt->expires.sched;
628 break; 617 break;
629 } 618 }
630 } else { 619 } else {
631 /* 620 /*
632 * For a process timer, we must balance 621 * For a process timer, set the cached expiration time.
633 * all the live threads' expirations.
634 */ 622 */
635 switch (CPUCLOCK_WHICH(timer->it_clock)) { 623 switch (CPUCLOCK_WHICH(timer->it_clock)) {
636 default: 624 default:
@@ -641,7 +629,9 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
641 cputime_lt(p->signal->it_virt_expires, 629 cputime_lt(p->signal->it_virt_expires,
642 timer->it.cpu.expires.cpu)) 630 timer->it.cpu.expires.cpu))
643 break; 631 break;
644 goto rebalance; 632 p->signal->cputime_expires.virt_exp =
633 timer->it.cpu.expires.cpu;
634 break;
645 case CPUCLOCK_PROF: 635 case CPUCLOCK_PROF:
646 if (!cputime_eq(p->signal->it_prof_expires, 636 if (!cputime_eq(p->signal->it_prof_expires,
647 cputime_zero) && 637 cputime_zero) &&
@@ -652,13 +642,12 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
652 if (i != RLIM_INFINITY && 642 if (i != RLIM_INFINITY &&
653 i <= cputime_to_secs(timer->it.cpu.expires.cpu)) 643 i <= cputime_to_secs(timer->it.cpu.expires.cpu))
654 break; 644 break;
655 goto rebalance; 645 p->signal->cputime_expires.prof_exp =
646 timer->it.cpu.expires.cpu;
647 break;
656 case CPUCLOCK_SCHED: 648 case CPUCLOCK_SCHED:
657 rebalance: 649 p->signal->cputime_expires.sched_exp =
658 process_timer_rebalance( 650 timer->it.cpu.expires.sched;
659 timer->it.cpu.task,
660 CPUCLOCK_WHICH(timer->it_clock),
661 timer->it.cpu.expires, now);
662 break; 651 break;
663 } 652 }
664 } 653 }
@@ -969,13 +958,13 @@ static void check_thread_timers(struct task_struct *tsk,
969 struct signal_struct *const sig = tsk->signal; 958 struct signal_struct *const sig = tsk->signal;
970 959
971 maxfire = 20; 960 maxfire = 20;
972 tsk->it_prof_expires = cputime_zero; 961 tsk->cputime_expires.prof_exp = cputime_zero;
973 while (!list_empty(timers)) { 962 while (!list_empty(timers)) {
974 struct cpu_timer_list *t = list_first_entry(timers, 963 struct cpu_timer_list *t = list_first_entry(timers,
975 struct cpu_timer_list, 964 struct cpu_timer_list,
976 entry); 965 entry);
977 if (!--maxfire || cputime_lt(prof_ticks(tsk), t->expires.cpu)) { 966 if (!--maxfire || cputime_lt(prof_ticks(tsk), t->expires.cpu)) {
978 tsk->it_prof_expires = t->expires.cpu; 967 tsk->cputime_expires.prof_exp = t->expires.cpu;
979 break; 968 break;
980 } 969 }
981 t->firing = 1; 970 t->firing = 1;
@@ -984,13 +973,13 @@ static void check_thread_timers(struct task_struct *tsk,
984 973
985 ++timers; 974 ++timers;
986 maxfire = 20; 975 maxfire = 20;
987 tsk->it_virt_expires = cputime_zero; 976 tsk->cputime_expires.virt_exp = cputime_zero;
988 while (!list_empty(timers)) { 977 while (!list_empty(timers)) {
989 struct cpu_timer_list *t = list_first_entry(timers, 978 struct cpu_timer_list *t = list_first_entry(timers,
990 struct cpu_timer_list, 979 struct cpu_timer_list,
991 entry); 980 entry);
992 if (!--maxfire || cputime_lt(virt_ticks(tsk), t->expires.cpu)) { 981 if (!--maxfire || cputime_lt(virt_ticks(tsk), t->expires.cpu)) {
993 tsk->it_virt_expires = t->expires.cpu; 982 tsk->cputime_expires.virt_exp = t->expires.cpu;
994 break; 983 break;
995 } 984 }
996 t->firing = 1; 985 t->firing = 1;
@@ -999,13 +988,13 @@ static void check_thread_timers(struct task_struct *tsk,
999 988
1000 ++timers; 989 ++timers;
1001 maxfire = 20; 990 maxfire = 20;
1002 tsk->it_sched_expires = 0; 991 tsk->cputime_expires.sched_exp = 0;
1003 while (!list_empty(timers)) { 992 while (!list_empty(timers)) {
1004 struct cpu_timer_list *t = list_first_entry(timers, 993 struct cpu_timer_list *t = list_first_entry(timers,
1005 struct cpu_timer_list, 994 struct cpu_timer_list,
1006 entry); 995 entry);
1007 if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) { 996 if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) {
1008 tsk->it_sched_expires = t->expires.sched; 997 tsk->cputime_expires.sched_exp = t->expires.sched;
1009 break; 998 break;
1010 } 999 }
1011 t->firing = 1; 1000 t->firing = 1;
@@ -1055,10 +1044,10 @@ static void check_process_timers(struct task_struct *tsk,
1055{ 1044{
1056 int maxfire; 1045 int maxfire;
1057 struct signal_struct *const sig = tsk->signal; 1046 struct signal_struct *const sig = tsk->signal;
1058 cputime_t utime, stime, ptime, virt_expires, prof_expires; 1047 cputime_t utime, ptime, virt_expires, prof_expires;
1059 unsigned long long sum_sched_runtime, sched_expires; 1048 unsigned long long sum_sched_runtime, sched_expires;
1060 struct task_struct *t;
1061 struct list_head *timers = sig->cpu_timers; 1049 struct list_head *timers = sig->cpu_timers;
1050 struct task_cputime cputime;
1062 1051
1063 /* 1052 /*
1064 * Don't sample the current process CPU clocks if there are no timers. 1053 * Don't sample the current process CPU clocks if there are no timers.
@@ -1074,18 +1063,10 @@ static void check_process_timers(struct task_struct *tsk,
1074 /* 1063 /*
1075 * Collect the current process totals. 1064 * Collect the current process totals.
1076 */ 1065 */
1077 utime = sig->utime; 1066 thread_group_cputime(tsk, &cputime);
1078 stime = sig->stime; 1067 utime = cputime.utime;
1079 sum_sched_runtime = sig->sum_sched_runtime; 1068 ptime = cputime_add(utime, cputime.stime);
1080 t = tsk; 1069 sum_sched_runtime = cputime.sum_exec_runtime;
1081 do {
1082 utime = cputime_add(utime, t->utime);
1083 stime = cputime_add(stime, t->stime);
1084 sum_sched_runtime += t->se.sum_exec_runtime;
1085 t = next_thread(t);
1086 } while (t != tsk);
1087 ptime = cputime_add(utime, stime);
1088
1089 maxfire = 20; 1070 maxfire = 20;
1090 prof_expires = cputime_zero; 1071 prof_expires = cputime_zero;
1091 while (!list_empty(timers)) { 1072 while (!list_empty(timers)) {
@@ -1193,60 +1174,18 @@ static void check_process_timers(struct task_struct *tsk,
1193 } 1174 }
1194 } 1175 }
1195 1176
1196 if (!cputime_eq(prof_expires, cputime_zero) || 1177 if (!cputime_eq(prof_expires, cputime_zero) &&
1197 !cputime_eq(virt_expires, cputime_zero) || 1178 (cputime_eq(sig->cputime_expires.prof_exp, cputime_zero) ||
1198 sched_expires != 0) { 1179 cputime_gt(sig->cputime_expires.prof_exp, prof_expires)))
1199 /* 1180 sig->cputime_expires.prof_exp = prof_expires;
1200 * Rebalance the threads' expiry times for the remaining 1181 if (!cputime_eq(virt_expires, cputime_zero) &&
1201 * process CPU timers. 1182 (cputime_eq(sig->cputime_expires.virt_exp, cputime_zero) ||
1202 */ 1183 cputime_gt(sig->cputime_expires.virt_exp, virt_expires)))
1203 1184 sig->cputime_expires.virt_exp = virt_expires;
1204 cputime_t prof_left, virt_left, ticks; 1185 if (sched_expires != 0 &&
1205 unsigned long long sched_left, sched; 1186 (sig->cputime_expires.sched_exp == 0 ||
1206 const unsigned int nthreads = atomic_read(&sig->live); 1187 sig->cputime_expires.sched_exp > sched_expires))
1207 1188 sig->cputime_expires.sched_exp = sched_expires;
1208 if (!nthreads)
1209 return;
1210
1211 prof_left = cputime_sub(prof_expires, utime);
1212 prof_left = cputime_sub(prof_left, stime);
1213 prof_left = cputime_div_non_zero(prof_left, nthreads);
1214 virt_left = cputime_sub(virt_expires, utime);
1215 virt_left = cputime_div_non_zero(virt_left, nthreads);
1216 if (sched_expires) {
1217 sched_left = sched_expires - sum_sched_runtime;
1218 do_div(sched_left, nthreads);
1219 sched_left = max_t(unsigned long long, sched_left, 1);
1220 } else {
1221 sched_left = 0;
1222 }
1223 t = tsk;
1224 do {
1225 if (unlikely(t->flags & PF_EXITING))
1226 continue;
1227
1228 ticks = cputime_add(cputime_add(t->utime, t->stime),
1229 prof_left);
1230 if (!cputime_eq(prof_expires, cputime_zero) &&
1231 (cputime_eq(t->it_prof_expires, cputime_zero) ||
1232 cputime_gt(t->it_prof_expires, ticks))) {
1233 t->it_prof_expires = ticks;
1234 }
1235
1236 ticks = cputime_add(t->utime, virt_left);
1237 if (!cputime_eq(virt_expires, cputime_zero) &&
1238 (cputime_eq(t->it_virt_expires, cputime_zero) ||
1239 cputime_gt(t->it_virt_expires, ticks))) {
1240 t->it_virt_expires = ticks;
1241 }
1242
1243 sched = t->se.sum_exec_runtime + sched_left;
1244 if (sched_expires && (t->it_sched_expires == 0 ||
1245 t->it_sched_expires > sched)) {
1246 t->it_sched_expires = sched;
1247 }
1248 } while ((t = next_thread(t)) != tsk);
1249 }
1250} 1189}
1251 1190
1252/* 1191/*
@@ -1314,6 +1253,86 @@ out:
1314 ++timer->it_requeue_pending; 1253 ++timer->it_requeue_pending;
1315} 1254}
1316 1255
1256/**
1257 * task_cputime_zero - Check a task_cputime struct for all zero fields.
1258 *
1259 * @cputime: The struct to compare.
1260 *
1261 * Checks @cputime to see if all fields are zero. Returns true if all fields
1262 * are zero, false if any field is nonzero.
1263 */
1264static inline int task_cputime_zero(const struct task_cputime *cputime)
1265{
1266 if (cputime_eq(cputime->utime, cputime_zero) &&
1267 cputime_eq(cputime->stime, cputime_zero) &&
1268 cputime->sum_exec_runtime == 0)
1269 return 1;
1270 return 0;
1271}
1272
1273/**
1274 * task_cputime_expired - Compare two task_cputime entities.
1275 *
1276 * @sample: The task_cputime structure to be checked for expiration.
1277 * @expires: Expiration times, against which @sample will be checked.
1278 *
1279 * Checks @sample against @expires to see if any field of @sample has expired.
1280 * Returns true if any field of the former is greater than the corresponding
1281 * field of the latter if the latter field is set. Otherwise returns false.
1282 */
1283static inline int task_cputime_expired(const struct task_cputime *sample,
1284 const struct task_cputime *expires)
1285{
1286 if (!cputime_eq(expires->utime, cputime_zero) &&
1287 cputime_ge(sample->utime, expires->utime))
1288 return 1;
1289 if (!cputime_eq(expires->stime, cputime_zero) &&
1290 cputime_ge(cputime_add(sample->utime, sample->stime),
1291 expires->stime))
1292 return 1;
1293 if (expires->sum_exec_runtime != 0 &&
1294 sample->sum_exec_runtime >= expires->sum_exec_runtime)
1295 return 1;
1296 return 0;
1297}
1298
1299/**
1300 * fastpath_timer_check - POSIX CPU timers fast path.
1301 *
1302 * @tsk: The task (thread) being checked.
1303 *
1304 * Check the task and thread group timers. If both are zero (there are no
1305 * timers set) return false. Otherwise snapshot the task and thread group
1306 * timers and compare them with the corresponding expiration times. Return
1307 * true if a timer has expired, else return false.
1308 */
1309static inline int fastpath_timer_check(struct task_struct *tsk)
1310{
1311 struct signal_struct *sig = tsk->signal;
1312
1313 if (unlikely(!sig))
1314 return 0;
1315
1316 if (!task_cputime_zero(&tsk->cputime_expires)) {
1317 struct task_cputime task_sample = {
1318 .utime = tsk->utime,
1319 .stime = tsk->stime,
1320 .sum_exec_runtime = tsk->se.sum_exec_runtime
1321 };
1322
1323 if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
1324 return 1;
1325 }
1326 if (!task_cputime_zero(&sig->cputime_expires)) {
1327 struct task_cputime group_sample;
1328
1329 thread_group_cputime(tsk, &group_sample);
1330 if (task_cputime_expired(&group_sample, &sig->cputime_expires))
1331 return 1;
1332 }
1333 return 0;
1334}
1335
1317/* 1336/*
1318 * This is called from the timer interrupt handler. The irq handler has 1337 * This is called from the timer interrupt handler. The irq handler has
1319 * already updated our counts. We need to check if any timers fire now. 1338 * already updated our counts. We need to check if any timers fire now.
@@ -1326,42 +1345,31 @@ void run_posix_cpu_timers(struct task_struct *tsk)
1326 1345
1327 BUG_ON(!irqs_disabled()); 1346 BUG_ON(!irqs_disabled());
1328 1347
1329#define UNEXPIRED(clock) \ 1348 /*
1330 (cputime_eq(tsk->it_##clock##_expires, cputime_zero) || \ 1349 * The fast path checks that there are no expired thread or thread
1331 cputime_lt(clock##_ticks(tsk), tsk->it_##clock##_expires)) 1350 * group timers. If that's so, just return.
1332 1351 */
1333 if (UNEXPIRED(prof) && UNEXPIRED(virt) && 1352 if (!fastpath_timer_check(tsk))
1334 (tsk->it_sched_expires == 0 ||
1335 tsk->se.sum_exec_runtime < tsk->it_sched_expires))
1336 return; 1353 return;
1337 1354
1338#undef UNEXPIRED 1355 spin_lock(&tsk->sighand->siglock);
1339
1340 /* 1356 /*
1341 * Double-check with locks held. 1357 * Here we take off tsk->signal->cpu_timers[N] and
1358 * tsk->cpu_timers[N] all the timers that are firing, and
1359 * put them on the firing list.
1342 */ 1360 */
1343 read_lock(&tasklist_lock); 1361 check_thread_timers(tsk, &firing);
1344 if (likely(tsk->signal != NULL)) { 1362 check_process_timers(tsk, &firing);
1345 spin_lock(&tsk->sighand->siglock);
1346 1363
1347 /* 1364 /*
1348 * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N] 1365 * We must release these locks before taking any timer's lock.
1349 * all the timers that are firing, and put them on the firing list. 1366 * There is a potential race with timer deletion here, as the
1350 */ 1367 * siglock now protects our private firing list. We have set
1351 check_thread_timers(tsk, &firing); 1368 * the firing flag in each timer, so that a deletion attempt
1352 check_process_timers(tsk, &firing); 1369 * that gets the timer lock before we do will give it up and
1353 1370 * spin until we've taken care of that timer below.
1354 /* 1371 */
1355 * We must release these locks before taking any timer's lock. 1372 spin_unlock(&tsk->sighand->siglock);
1356 * There is a potential race with timer deletion here, as the
1357 * siglock now protects our private firing list. We have set
1358 * the firing flag in each timer, so that a deletion attempt
1359 * that gets the timer lock before we do will give it up and
1360 * spin until we've taken care of that timer below.
1361 */
1362 spin_unlock(&tsk->sighand->siglock);
1363 }
1364 read_unlock(&tasklist_lock);
1365 1373
1366 /* 1374 /*
1367 * Now that all the timers on our list have the firing flag, 1375 * Now that all the timers on our list have the firing flag,
@@ -1389,10 +1397,9 @@ void run_posix_cpu_timers(struct task_struct *tsk)
1389 1397
1390/* 1398/*
1391 * Set one of the process-wide special case CPU timers. 1399 * Set one of the process-wide special case CPU timers.
1392 * The tasklist_lock and tsk->sighand->siglock must be held by the caller. 1400 * The tsk->sighand->siglock must be held by the caller.
1393 * The oldval argument is null for the RLIMIT_CPU timer, where *newval is 1401 * The *newval argument is relative and we update it to be absolute, *oldval
1394 * absolute; non-null for ITIMER_*, where *newval is relative and we update 1402 * is absolute and we update it to be relative.
1395 * it to be absolute, *oldval is absolute and we update it to be relative.
1396 */ 1403 */
1397void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, 1404void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
1398 cputime_t *newval, cputime_t *oldval) 1405 cputime_t *newval, cputime_t *oldval)
@@ -1401,7 +1408,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
1401 struct list_head *head; 1408 struct list_head *head;
1402 1409
1403 BUG_ON(clock_idx == CPUCLOCK_SCHED); 1410 BUG_ON(clock_idx == CPUCLOCK_SCHED);
1404 cpu_clock_sample_group_locked(clock_idx, tsk, &now); 1411 cpu_clock_sample_group(clock_idx, tsk, &now);
1405 1412
1406 if (oldval) { 1413 if (oldval) {
1407 if (!cputime_eq(*oldval, cputime_zero)) { 1414 if (!cputime_eq(*oldval, cputime_zero)) {
@@ -1435,13 +1442,14 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
1435 cputime_ge(list_first_entry(head, 1442 cputime_ge(list_first_entry(head,
1436 struct cpu_timer_list, entry)->expires.cpu, 1443 struct cpu_timer_list, entry)->expires.cpu,
1437 *newval)) { 1444 *newval)) {
1438 /* 1445 switch (clock_idx) {
1439 * Rejigger each thread's expiry time so that one will 1446 case CPUCLOCK_PROF:
1440 * notice before we hit the process-cumulative expiry time. 1447 tsk->signal->cputime_expires.prof_exp = *newval;
1441 */ 1448 break;
1442 union cpu_time_count expires = { .sched = 0 }; 1449 case CPUCLOCK_VIRT:
1443 expires.cpu = *newval; 1450 tsk->signal->cputime_expires.virt_exp = *newval;
1444 process_timer_rebalance(tsk, clock_idx, expires, now); 1451 break;
1452 }
1445 } 1453 }
1446} 1454}
1447 1455
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 5131e5471169..b931d7cedbfa 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -223,6 +223,15 @@ static int posix_ktime_get_ts(clockid_t which_clock, struct timespec *tp)
223} 223}
224 224
225/* 225/*
226 * Get monotonic time for posix timers
227 */
228static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp)
229{
230 getrawmonotonic(tp);
231 return 0;
232}
233
234/*
226 * Initialize everything, well, just everything in Posix clocks/timers ;) 235 * Initialize everything, well, just everything in Posix clocks/timers ;)
227 */ 236 */
228static __init int init_posix_timers(void) 237static __init int init_posix_timers(void)
@@ -235,9 +244,15 @@ static __init int init_posix_timers(void)
235 .clock_get = posix_ktime_get_ts, 244 .clock_get = posix_ktime_get_ts,
236 .clock_set = do_posix_clock_nosettime, 245 .clock_set = do_posix_clock_nosettime,
237 }; 246 };
247 struct k_clock clock_monotonic_raw = {
248 .clock_getres = hrtimer_get_res,
249 .clock_get = posix_get_monotonic_raw,
250 .clock_set = do_posix_clock_nosettime,
251 };
238 252
239 register_posix_clock(CLOCK_REALTIME, &clock_realtime); 253 register_posix_clock(CLOCK_REALTIME, &clock_realtime);
240 register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic); 254 register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
255 register_posix_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw);
241 256
242 posix_timers_cache = kmem_cache_create("posix_timers_cache", 257 posix_timers_cache = kmem_cache_create("posix_timers_cache",
243 sizeof (struct k_itimer), 0, SLAB_PANIC, 258 sizeof (struct k_itimer), 0, SLAB_PANIC,
@@ -298,6 +313,7 @@ void do_schedule_next_timer(struct siginfo *info)
298 313
299int posix_timer_event(struct k_itimer *timr, int si_private) 314int posix_timer_event(struct k_itimer *timr, int si_private)
300{ 315{
316 int shared, ret;
301 /* 317 /*
302 * FIXME: if ->sigq is queued we can race with 318 * FIXME: if ->sigq is queued we can race with
303 * dequeue_signal()->do_schedule_next_timer(). 319 * dequeue_signal()->do_schedule_next_timer().
@@ -311,25 +327,10 @@ int posix_timer_event(struct k_itimer *timr, int si_private)
311 */ 327 */
312 timr->sigq->info.si_sys_private = si_private; 328 timr->sigq->info.si_sys_private = si_private;
313 329
314 timr->sigq->info.si_signo = timr->it_sigev_signo; 330 shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
315 timr->sigq->info.si_code = SI_TIMER; 331 ret = send_sigqueue(timr->sigq, timr->it_process, shared);
316 timr->sigq->info.si_tid = timr->it_id; 332 /* If we failed to send the signal the timer stops. */
317 timr->sigq->info.si_value = timr->it_sigev_value; 333 return ret > 0;
318
319 if (timr->it_sigev_notify & SIGEV_THREAD_ID) {
320 struct task_struct *leader;
321 int ret = send_sigqueue(timr->sigq, timr->it_process, 0);
322
323 if (likely(ret >= 0))
324 return ret;
325
326 timr->it_sigev_notify = SIGEV_SIGNAL;
327 leader = timr->it_process->group_leader;
328 put_task_struct(timr->it_process);
329 timr->it_process = leader;
330 }
331
332 return send_sigqueue(timr->sigq, timr->it_process, 1);
333} 334}
334EXPORT_SYMBOL_GPL(posix_timer_event); 335EXPORT_SYMBOL_GPL(posix_timer_event);
335 336
@@ -468,11 +469,9 @@ sys_timer_create(const clockid_t which_clock,
468 struct sigevent __user *timer_event_spec, 469 struct sigevent __user *timer_event_spec,
469 timer_t __user * created_timer_id) 470 timer_t __user * created_timer_id)
470{ 471{
471 int error = 0; 472 struct k_itimer *new_timer;
472 struct k_itimer *new_timer = NULL; 473 int error, new_timer_id;
473 int new_timer_id; 474 struct task_struct *process;
474 struct task_struct *process = NULL;
475 unsigned long flags;
476 sigevent_t event; 475 sigevent_t event;
477 int it_id_set = IT_ID_NOT_SET; 476 int it_id_set = IT_ID_NOT_SET;
478 477
@@ -490,12 +489,11 @@ sys_timer_create(const clockid_t which_clock,
490 goto out; 489 goto out;
491 } 490 }
492 spin_lock_irq(&idr_lock); 491 spin_lock_irq(&idr_lock);
493 error = idr_get_new(&posix_timers_id, (void *) new_timer, 492 error = idr_get_new(&posix_timers_id, new_timer, &new_timer_id);
494 &new_timer_id);
495 spin_unlock_irq(&idr_lock); 493 spin_unlock_irq(&idr_lock);
496 if (error == -EAGAIN) 494 if (error) {
497 goto retry; 495 if (error == -EAGAIN)
498 else if (error) { 496 goto retry;
499 /* 497 /*
500 * Weird looking, but we return EAGAIN if the IDR is 498 * Weird looking, but we return EAGAIN if the IDR is
501 * full (proper POSIX return value for this) 499 * full (proper POSIX return value for this)
@@ -526,67 +524,43 @@ sys_timer_create(const clockid_t which_clock,
526 error = -EFAULT; 524 error = -EFAULT;
527 goto out; 525 goto out;
528 } 526 }
529 new_timer->it_sigev_notify = event.sigev_notify; 527 rcu_read_lock();
530 new_timer->it_sigev_signo = event.sigev_signo; 528 process = good_sigevent(&event);
531 new_timer->it_sigev_value = event.sigev_value; 529 if (process)
532 530 get_task_struct(process);
533 read_lock(&tasklist_lock); 531 rcu_read_unlock();
534 if ((process = good_sigevent(&event))) {
535 /*
536 * We may be setting up this process for another
537 * thread. It may be exiting. To catch this
538 * case the we check the PF_EXITING flag. If
539 * the flag is not set, the siglock will catch
540 * him before it is too late (in exit_itimers).
541 *
542 * The exec case is a bit more invloved but easy
543 * to code. If the process is in our thread
544 * group (and it must be or we would not allow
545 * it here) and is doing an exec, it will cause
546 * us to be killed. In this case it will wait
547 * for us to die which means we can finish this
548 * linkage with our last gasp. I.e. no code :)
549 */
550 spin_lock_irqsave(&process->sighand->siglock, flags);
551 if (!(process->flags & PF_EXITING)) {
552 new_timer->it_process = process;
553 list_add(&new_timer->list,
554 &process->signal->posix_timers);
555 if (new_timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
556 get_task_struct(process);
557 spin_unlock_irqrestore(&process->sighand->siglock, flags);
558 } else {
559 spin_unlock_irqrestore(&process->sighand->siglock, flags);
560 process = NULL;
561 }
562 }
563 read_unlock(&tasklist_lock);
564 if (!process) { 532 if (!process) {
565 error = -EINVAL; 533 error = -EINVAL;
566 goto out; 534 goto out;
567 } 535 }
568 } else { 536 } else {
569 new_timer->it_sigev_notify = SIGEV_SIGNAL; 537 event.sigev_notify = SIGEV_SIGNAL;
570 new_timer->it_sigev_signo = SIGALRM; 538 event.sigev_signo = SIGALRM;
571 new_timer->it_sigev_value.sival_int = new_timer->it_id; 539 event.sigev_value.sival_int = new_timer->it_id;
572 process = current->group_leader; 540 process = current->group_leader;
573 spin_lock_irqsave(&process->sighand->siglock, flags); 541 get_task_struct(process);
574 new_timer->it_process = process;
575 list_add(&new_timer->list, &process->signal->posix_timers);
576 spin_unlock_irqrestore(&process->sighand->siglock, flags);
577 } 542 }
578 543
544 new_timer->it_sigev_notify = event.sigev_notify;
545 new_timer->sigq->info.si_signo = event.sigev_signo;
546 new_timer->sigq->info.si_value = event.sigev_value;
547 new_timer->sigq->info.si_tid = new_timer->it_id;
548 new_timer->sigq->info.si_code = SI_TIMER;
549
550 spin_lock_irq(&current->sighand->siglock);
551 new_timer->it_process = process;
552 list_add(&new_timer->list, &current->signal->posix_timers);
553 spin_unlock_irq(&current->sighand->siglock);
554
555 return 0;
579 /* 556 /*
580 * In the case of the timer belonging to another task, after 557 * In the case of the timer belonging to another task, after
581 * the task is unlocked, the timer is owned by the other task 558 * the task is unlocked, the timer is owned by the other task
582 * and may cease to exist at any time. Don't use or modify 559 * and may cease to exist at any time. Don't use or modify
583 * new_timer after the unlock call. 560 * new_timer after the unlock call.
584 */ 561 */
585
586out: 562out:
587 if (error) 563 release_posix_timer(new_timer, it_id_set);
588 release_posix_timer(new_timer, it_id_set);
589
590 return error; 564 return error;
591} 565}
592 566
@@ -597,7 +571,7 @@ out:
597 * the find to the timer lock. To avoid a dead lock, the timer id MUST 571 * the find to the timer lock. To avoid a dead lock, the timer id MUST
598 * be release with out holding the timer lock. 572 * be release with out holding the timer lock.
599 */ 573 */
600static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags) 574static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags)
601{ 575{
602 struct k_itimer *timr; 576 struct k_itimer *timr;
603 /* 577 /*
@@ -605,23 +579,20 @@ static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags)
605 * flags part over to the timer lock. Must not let interrupts in 579 * flags part over to the timer lock. Must not let interrupts in
606 * while we are moving the lock. 580 * while we are moving the lock.
607 */ 581 */
608
609 spin_lock_irqsave(&idr_lock, *flags); 582 spin_lock_irqsave(&idr_lock, *flags);
610 timr = (struct k_itimer *) idr_find(&posix_timers_id, (int) timer_id); 583 timr = idr_find(&posix_timers_id, (int)timer_id);
611 if (timr) { 584 if (timr) {
612 spin_lock(&timr->it_lock); 585 spin_lock(&timr->it_lock);
613 586 if (timr->it_process &&
614 if ((timr->it_id != timer_id) || !(timr->it_process) || 587 same_thread_group(timr->it_process, current)) {
615 !same_thread_group(timr->it_process, current)) {
616 spin_unlock(&timr->it_lock);
617 spin_unlock_irqrestore(&idr_lock, *flags);
618 timr = NULL;
619 } else
620 spin_unlock(&idr_lock); 588 spin_unlock(&idr_lock);
621 } else 589 return timr;
622 spin_unlock_irqrestore(&idr_lock, *flags); 590 }
591 spin_unlock(&timr->it_lock);
592 }
593 spin_unlock_irqrestore(&idr_lock, *flags);
623 594
624 return timr; 595 return NULL;
625} 596}
626 597
627/* 598/*
@@ -862,8 +833,7 @@ retry_delete:
862 * This keeps any tasks waiting on the spin lock from thinking 833 * This keeps any tasks waiting on the spin lock from thinking
863 * they got something (see the lock code above). 834 * they got something (see the lock code above).
864 */ 835 */
865 if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) 836 put_task_struct(timer->it_process);
866 put_task_struct(timer->it_process);
867 timer->it_process = NULL; 837 timer->it_process = NULL;
868 838
869 unlock_timer(timer, flags); 839 unlock_timer(timer, flags);
@@ -890,8 +860,7 @@ retry_delete:
890 * This keeps any tasks waiting on the spin lock from thinking 860 * This keeps any tasks waiting on the spin lock from thinking
891 * they got something (see the lock code above). 861 * they got something (see the lock code above).
892 */ 862 */
893 if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) 863 put_task_struct(timer->it_process);
894 put_task_struct(timer->it_process);
895 timer->it_process = NULL; 864 timer->it_process = NULL;
896 865
897 unlock_timer(timer, flags); 866 unlock_timer(timer, flags);
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 90b5b123f7a1..85cb90588a55 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -42,10 +42,10 @@
42#include <linux/freezer.h> 42#include <linux/freezer.h>
43#include <linux/cpu.h> 43#include <linux/cpu.h>
44#include <linux/delay.h> 44#include <linux/delay.h>
45#include <linux/byteorder/swabb.h>
46#include <linux/stat.h> 45#include <linux/stat.h>
47#include <linux/srcu.h> 46#include <linux/srcu.h>
48#include <linux/slab.h> 47#include <linux/slab.h>
48#include <asm/byteorder.h>
49 49
50MODULE_LICENSE("GPL"); 50MODULE_LICENSE("GPL");
51MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " 51MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and "
diff --git a/kernel/sched.c b/kernel/sched.c
index 6f230596bd0c..d906f72b42d2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -71,6 +71,7 @@
71#include <linux/debugfs.h> 71#include <linux/debugfs.h>
72#include <linux/ctype.h> 72#include <linux/ctype.h>
73#include <linux/ftrace.h> 73#include <linux/ftrace.h>
74#include <trace/sched.h>
74 75
75#include <asm/tlb.h> 76#include <asm/tlb.h>
76#include <asm/irq_regs.h> 77#include <asm/irq_regs.h>
@@ -1936,6 +1937,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
1936 * just go back and repeat. 1937 * just go back and repeat.
1937 */ 1938 */
1938 rq = task_rq_lock(p, &flags); 1939 rq = task_rq_lock(p, &flags);
1940 trace_sched_wait_task(rq, p);
1939 running = task_running(rq, p); 1941 running = task_running(rq, p);
1940 on_rq = p->se.on_rq; 1942 on_rq = p->se.on_rq;
1941 ncsw = 0; 1943 ncsw = 0;
@@ -2297,9 +2299,7 @@ out_activate:
2297 success = 1; 2299 success = 1;
2298 2300
2299out_running: 2301out_running:
2300 trace_mark(kernel_sched_wakeup, 2302 trace_sched_wakeup(rq, p);
2301 "pid %d state %ld ## rq %p task %p rq->curr %p",
2302 p->pid, p->state, rq, p, rq->curr);
2303 check_preempt_curr(rq, p, sync); 2303 check_preempt_curr(rq, p, sync);
2304 2304
2305 p->state = TASK_RUNNING; 2305 p->state = TASK_RUNNING;
@@ -2432,9 +2432,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2432 p->sched_class->task_new(rq, p); 2432 p->sched_class->task_new(rq, p);
2433 inc_nr_running(rq); 2433 inc_nr_running(rq);
2434 } 2434 }
2435 trace_mark(kernel_sched_wakeup_new, 2435 trace_sched_wakeup_new(rq, p);
2436 "pid %d state %ld ## rq %p task %p rq->curr %p",
2437 p->pid, p->state, rq, p, rq->curr);
2438 check_preempt_curr(rq, p, 0); 2436 check_preempt_curr(rq, p, 0);
2439#ifdef CONFIG_SMP 2437#ifdef CONFIG_SMP
2440 if (p->sched_class->task_wake_up) 2438 if (p->sched_class->task_wake_up)
@@ -2607,11 +2605,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
2607 struct mm_struct *mm, *oldmm; 2605 struct mm_struct *mm, *oldmm;
2608 2606
2609 prepare_task_switch(rq, prev, next); 2607 prepare_task_switch(rq, prev, next);
2610 trace_mark(kernel_sched_schedule, 2608 trace_sched_switch(rq, prev, next);
2611 "prev_pid %d next_pid %d prev_state %ld "
2612 "## rq %p prev %p next %p",
2613 prev->pid, next->pid, prev->state,
2614 rq, prev, next);
2615 mm = next->mm; 2609 mm = next->mm;
2616 oldmm = prev->active_mm; 2610 oldmm = prev->active_mm;
2617 /* 2611 /*
@@ -2851,6 +2845,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
2851 || unlikely(!cpu_active(dest_cpu))) 2845 || unlikely(!cpu_active(dest_cpu)))
2852 goto out; 2846 goto out;
2853 2847
2848 trace_sched_migrate_task(rq, p, dest_cpu);
2854 /* force the process onto the specified CPU */ 2849 /* force the process onto the specified CPU */
2855 if (migrate_task(p, dest_cpu, &req)) { 2850 if (migrate_task(p, dest_cpu, &req)) {
2856 /* Need to wait for migration thread (might exit: take ref). */ 2851 /* Need to wait for migration thread (might exit: take ref). */
@@ -4052,23 +4047,26 @@ DEFINE_PER_CPU(struct kernel_stat, kstat);
4052EXPORT_PER_CPU_SYMBOL(kstat); 4047EXPORT_PER_CPU_SYMBOL(kstat);
4053 4048
4054/* 4049/*
4055 * Return p->sum_exec_runtime plus any more ns on the sched_clock 4050 * Return any ns on the sched_clock that have not yet been banked in
4056 * that have not yet been banked in case the task is currently running. 4051 * @p in case that task is currently running.
4057 */ 4052 */
4058unsigned long long task_sched_runtime(struct task_struct *p) 4053unsigned long long task_delta_exec(struct task_struct *p)
4059{ 4054{
4060 unsigned long flags; 4055 unsigned long flags;
4061 u64 ns, delta_exec;
4062 struct rq *rq; 4056 struct rq *rq;
4057 u64 ns = 0;
4063 4058
4064 rq = task_rq_lock(p, &flags); 4059 rq = task_rq_lock(p, &flags);
4065 ns = p->se.sum_exec_runtime; 4060
4066 if (task_current(rq, p)) { 4061 if (task_current(rq, p)) {
4062 u64 delta_exec;
4063
4067 update_rq_clock(rq); 4064 update_rq_clock(rq);
4068 delta_exec = rq->clock - p->se.exec_start; 4065 delta_exec = rq->clock - p->se.exec_start;
4069 if ((s64)delta_exec > 0) 4066 if ((s64)delta_exec > 0)
4070 ns += delta_exec; 4067 ns = delta_exec;
4071 } 4068 }
4069
4072 task_rq_unlock(rq, &flags); 4070 task_rq_unlock(rq, &flags);
4073 4071
4074 return ns; 4072 return ns;
@@ -4085,6 +4083,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime)
4085 cputime64_t tmp; 4083 cputime64_t tmp;
4086 4084
4087 p->utime = cputime_add(p->utime, cputime); 4085 p->utime = cputime_add(p->utime, cputime);
4086 account_group_user_time(p, cputime);
4088 4087
4089 /* Add user time to cpustat. */ 4088 /* Add user time to cpustat. */
4090 tmp = cputime_to_cputime64(cputime); 4089 tmp = cputime_to_cputime64(cputime);
@@ -4109,6 +4108,7 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime)
4109 tmp = cputime_to_cputime64(cputime); 4108 tmp = cputime_to_cputime64(cputime);
4110 4109
4111 p->utime = cputime_add(p->utime, cputime); 4110 p->utime = cputime_add(p->utime, cputime);
4111 account_group_user_time(p, cputime);
4112 p->gtime = cputime_add(p->gtime, cputime); 4112 p->gtime = cputime_add(p->gtime, cputime);
4113 4113
4114 cpustat->user = cputime64_add(cpustat->user, tmp); 4114 cpustat->user = cputime64_add(cpustat->user, tmp);
@@ -4144,6 +4144,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
4144 } 4144 }
4145 4145
4146 p->stime = cputime_add(p->stime, cputime); 4146 p->stime = cputime_add(p->stime, cputime);
4147 account_group_system_time(p, cputime);
4147 4148
4148 /* Add system time to cpustat. */ 4149 /* Add system time to cpustat. */
4149 tmp = cputime_to_cputime64(cputime); 4150 tmp = cputime_to_cputime64(cputime);
@@ -4185,6 +4186,7 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
4185 4186
4186 if (p == rq->idle) { 4187 if (p == rq->idle) {
4187 p->stime = cputime_add(p->stime, steal); 4188 p->stime = cputime_add(p->stime, steal);
4189 account_group_system_time(p, steal);
4188 if (atomic_read(&rq->nr_iowait) > 0) 4190 if (atomic_read(&rq->nr_iowait) > 0)
4189 cpustat->iowait = cputime64_add(cpustat->iowait, tmp); 4191 cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
4190 else 4192 else
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 18fd17172eb6..f604dae71316 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -449,6 +449,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
449 struct task_struct *curtask = task_of(curr); 449 struct task_struct *curtask = task_of(curr);
450 450
451 cpuacct_charge(curtask, delta_exec); 451 cpuacct_charge(curtask, delta_exec);
452 account_group_exec_runtime(curtask, delta_exec);
452 } 453 }
453} 454}
454 455
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index cdf5740ab03e..b446dc87494f 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -526,6 +526,8 @@ static void update_curr_rt(struct rq *rq)
526 schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec)); 526 schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
527 527
528 curr->se.sum_exec_runtime += delta_exec; 528 curr->se.sum_exec_runtime += delta_exec;
529 account_group_exec_runtime(curr, delta_exec);
530
529 curr->se.exec_start = rq->clock; 531 curr->se.exec_start = rq->clock;
530 cpuacct_charge(curr, delta_exec); 532 cpuacct_charge(curr, delta_exec);
531 533
@@ -1458,7 +1460,7 @@ static void watchdog(struct rq *rq, struct task_struct *p)
1458 p->rt.timeout++; 1460 p->rt.timeout++;
1459 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); 1461 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
1460 if (p->rt.timeout > next) 1462 if (p->rt.timeout > next)
1461 p->it_sched_expires = p->se.sum_exec_runtime; 1463 p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
1462 } 1464 }
1463} 1465}
1464 1466
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 8385d43987e2..b8c156979cf2 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -270,3 +270,89 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
270#define sched_info_switch(t, next) do { } while (0) 270#define sched_info_switch(t, next) do { } while (0)
271#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ 271#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
272 272
273/*
274 * The following are functions that support scheduler-internal time accounting.
275 * These functions are generally called at the timer tick. None of this depends
276 * on CONFIG_SCHEDSTATS.
277 */
278
279/**
280 * account_group_user_time - Maintain utime for a thread group.
281 *
282 * @tsk: Pointer to task structure.
283 * @cputime: Time value by which to increment the utime field of the
284 * thread_group_cputime structure.
285 *
286 * If thread group time is being maintained, get the structure for the
287 * running CPU and update the utime field there.
288 */
289static inline void account_group_user_time(struct task_struct *tsk,
290 cputime_t cputime)
291{
292 struct signal_struct *sig;
293
294 sig = tsk->signal;
295 if (unlikely(!sig))
296 return;
297 if (sig->cputime.totals) {
298 struct task_cputime *times;
299
300 times = per_cpu_ptr(sig->cputime.totals, get_cpu());
301 times->utime = cputime_add(times->utime, cputime);
302 put_cpu_no_resched();
303 }
304}
305
306/**
307 * account_group_system_time - Maintain stime for a thread group.
308 *
309 * @tsk: Pointer to task structure.
310 * @cputime: Time value by which to increment the stime field of the
311 * thread_group_cputime structure.
312 *
313 * If thread group time is being maintained, get the structure for the
314 * running CPU and update the stime field there.
315 */
316static inline void account_group_system_time(struct task_struct *tsk,
317 cputime_t cputime)
318{
319 struct signal_struct *sig;
320
321 sig = tsk->signal;
322 if (unlikely(!sig))
323 return;
324 if (sig->cputime.totals) {
325 struct task_cputime *times;
326
327 times = per_cpu_ptr(sig->cputime.totals, get_cpu());
328 times->stime = cputime_add(times->stime, cputime);
329 put_cpu_no_resched();
330 }
331}
332
333/**
334 * account_group_exec_runtime - Maintain exec runtime for a thread group.
335 *
336 * @tsk: Pointer to task structure.
337 * @ns: Time value by which to increment the sum_exec_runtime field
338 * of the thread_group_cputime structure.
339 *
340 * If thread group time is being maintained, get the structure for the
341 * running CPU and update the sum_exec_runtime field there.
342 */
343static inline void account_group_exec_runtime(struct task_struct *tsk,
344 unsigned long long ns)
345{
346 struct signal_struct *sig;
347
348 sig = tsk->signal;
349 if (unlikely(!sig))
350 return;
351 if (sig->cputime.totals) {
352 struct task_cputime *times;
353
354 times = per_cpu_ptr(sig->cputime.totals, get_cpu());
355 times->sum_exec_runtime += ns;
356 put_cpu_no_resched();
357 }
358}
diff --git a/kernel/signal.c b/kernel/signal.c
index e661b01d340f..105217da5c82 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -27,6 +27,7 @@
27#include <linux/freezer.h> 27#include <linux/freezer.h>
28#include <linux/pid_namespace.h> 28#include <linux/pid_namespace.h>
29#include <linux/nsproxy.h> 29#include <linux/nsproxy.h>
30#include <trace/sched.h>
30 31
31#include <asm/param.h> 32#include <asm/param.h>
32#include <asm/uaccess.h> 33#include <asm/uaccess.h>
@@ -803,6 +804,8 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
803 struct sigpending *pending; 804 struct sigpending *pending;
804 struct sigqueue *q; 805 struct sigqueue *q;
805 806
807 trace_sched_signal_send(sig, t);
808
806 assert_spin_locked(&t->sighand->siglock); 809 assert_spin_locked(&t->sighand->siglock);
807 if (!prepare_signal(sig, t)) 810 if (!prepare_signal(sig, t))
808 return 0; 811 return 0;
@@ -1338,6 +1341,7 @@ int do_notify_parent(struct task_struct *tsk, int sig)
1338 struct siginfo info; 1341 struct siginfo info;
1339 unsigned long flags; 1342 unsigned long flags;
1340 struct sighand_struct *psig; 1343 struct sighand_struct *psig;
1344 struct task_cputime cputime;
1341 int ret = sig; 1345 int ret = sig;
1342 1346
1343 BUG_ON(sig == -1); 1347 BUG_ON(sig == -1);
@@ -1368,10 +1372,9 @@ int do_notify_parent(struct task_struct *tsk, int sig)
1368 1372
1369 info.si_uid = tsk->uid; 1373 info.si_uid = tsk->uid;
1370 1374
1371 info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime, 1375 thread_group_cputime(tsk, &cputime);
1372 tsk->signal->utime)); 1376 info.si_utime = cputime_to_jiffies(cputime.utime);
1373 info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime, 1377 info.si_stime = cputime_to_jiffies(cputime.stime);
1374 tsk->signal->stime));
1375 1378
1376 info.si_status = tsk->exit_code & 0x7f; 1379 info.si_status = tsk->exit_code & 0x7f;
1377 if (tsk->exit_code & 0x80) 1380 if (tsk->exit_code & 0x80)
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 83ba21a13bd4..7110daeb9a90 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -267,16 +267,12 @@ asmlinkage void do_softirq(void)
267 */ 267 */
268void irq_enter(void) 268void irq_enter(void)
269{ 269{
270#ifdef CONFIG_NO_HZ
271 int cpu = smp_processor_id(); 270 int cpu = smp_processor_id();
271
272 if (idle_cpu(cpu) && !in_interrupt()) 272 if (idle_cpu(cpu) && !in_interrupt())
273 tick_nohz_stop_idle(cpu); 273 tick_check_idle(cpu);
274#endif 274
275 __irq_enter(); 275 __irq_enter();
276#ifdef CONFIG_NO_HZ
277 if (idle_cpu(cpu))
278 tick_nohz_update_jiffies();
279#endif
280} 276}
281 277
282#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED 278#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
diff --git a/kernel/sys.c b/kernel/sys.c
index 0bc8fa3c2288..53879cdae483 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -853,38 +853,28 @@ asmlinkage long sys_setfsgid(gid_t gid)
853 return old_fsgid; 853 return old_fsgid;
854} 854}
855 855
856void do_sys_times(struct tms *tms)
857{
858 struct task_cputime cputime;
859 cputime_t cutime, cstime;
860
861 spin_lock_irq(&current->sighand->siglock);
862 thread_group_cputime(current, &cputime);
863 cutime = current->signal->cutime;
864 cstime = current->signal->cstime;
865 spin_unlock_irq(&current->sighand->siglock);
866 tms->tms_utime = cputime_to_clock_t(cputime.utime);
867 tms->tms_stime = cputime_to_clock_t(cputime.stime);
868 tms->tms_cutime = cputime_to_clock_t(cutime);
869 tms->tms_cstime = cputime_to_clock_t(cstime);
870}
871
856asmlinkage long sys_times(struct tms __user * tbuf) 872asmlinkage long sys_times(struct tms __user * tbuf)
857{ 873{
858 /*
859 * In the SMP world we might just be unlucky and have one of
860 * the times increment as we use it. Since the value is an
861 * atomically safe type this is just fine. Conceptually its
862 * as if the syscall took an instant longer to occur.
863 */
864 if (tbuf) { 874 if (tbuf) {
865 struct tms tmp; 875 struct tms tmp;
866 struct task_struct *tsk = current; 876
867 struct task_struct *t; 877 do_sys_times(&tmp);
868 cputime_t utime, stime, cutime, cstime;
869
870 spin_lock_irq(&tsk->sighand->siglock);
871 utime = tsk->signal->utime;
872 stime = tsk->signal->stime;
873 t = tsk;
874 do {
875 utime = cputime_add(utime, t->utime);
876 stime = cputime_add(stime, t->stime);
877 t = next_thread(t);
878 } while (t != tsk);
879
880 cutime = tsk->signal->cutime;
881 cstime = tsk->signal->cstime;
882 spin_unlock_irq(&tsk->sighand->siglock);
883
884 tmp.tms_utime = cputime_to_clock_t(utime);
885 tmp.tms_stime = cputime_to_clock_t(stime);
886 tmp.tms_cutime = cputime_to_clock_t(cutime);
887 tmp.tms_cstime = cputime_to_clock_t(cstime);
888 if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) 878 if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
889 return -EFAULT; 879 return -EFAULT;
890 } 880 }
@@ -1449,7 +1439,6 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r
1449asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) 1439asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
1450{ 1440{
1451 struct rlimit new_rlim, *old_rlim; 1441 struct rlimit new_rlim, *old_rlim;
1452 unsigned long it_prof_secs;
1453 int retval; 1442 int retval;
1454 1443
1455 if (resource >= RLIM_NLIMITS) 1444 if (resource >= RLIM_NLIMITS)
@@ -1503,18 +1492,7 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
1503 if (new_rlim.rlim_cur == RLIM_INFINITY) 1492 if (new_rlim.rlim_cur == RLIM_INFINITY)
1504 goto out; 1493 goto out;
1505 1494
1506 it_prof_secs = cputime_to_secs(current->signal->it_prof_expires); 1495 update_rlimit_cpu(new_rlim.rlim_cur);
1507 if (it_prof_secs == 0 || new_rlim.rlim_cur <= it_prof_secs) {
1508 unsigned long rlim_cur = new_rlim.rlim_cur;
1509 cputime_t cputime;
1510
1511 cputime = secs_to_cputime(rlim_cur);
1512 read_lock(&tasklist_lock);
1513 spin_lock_irq(&current->sighand->siglock);
1514 set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
1515 spin_unlock_irq(&current->sighand->siglock);
1516 read_unlock(&tasklist_lock);
1517 }
1518out: 1496out:
1519 return 0; 1497 return 0;
1520} 1498}
@@ -1552,11 +1530,8 @@ out:
1552 * 1530 *
1553 */ 1531 */
1554 1532
1555static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r, 1533static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r)
1556 cputime_t *utimep, cputime_t *stimep)
1557{ 1534{
1558 *utimep = cputime_add(*utimep, t->utime);
1559 *stimep = cputime_add(*stimep, t->stime);
1560 r->ru_nvcsw += t->nvcsw; 1535 r->ru_nvcsw += t->nvcsw;
1561 r->ru_nivcsw += t->nivcsw; 1536 r->ru_nivcsw += t->nivcsw;
1562 r->ru_minflt += t->min_flt; 1537 r->ru_minflt += t->min_flt;
@@ -1570,12 +1545,13 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1570 struct task_struct *t; 1545 struct task_struct *t;
1571 unsigned long flags; 1546 unsigned long flags;
1572 cputime_t utime, stime; 1547 cputime_t utime, stime;
1548 struct task_cputime cputime;
1573 1549
1574 memset((char *) r, 0, sizeof *r); 1550 memset((char *) r, 0, sizeof *r);
1575 utime = stime = cputime_zero; 1551 utime = stime = cputime_zero;
1576 1552
1577 if (who == RUSAGE_THREAD) { 1553 if (who == RUSAGE_THREAD) {
1578 accumulate_thread_rusage(p, r, &utime, &stime); 1554 accumulate_thread_rusage(p, r);
1579 goto out; 1555 goto out;
1580 } 1556 }
1581 1557
@@ -1598,8 +1574,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1598 break; 1574 break;
1599 1575
1600 case RUSAGE_SELF: 1576 case RUSAGE_SELF:
1601 utime = cputime_add(utime, p->signal->utime); 1577 thread_group_cputime(p, &cputime);
1602 stime = cputime_add(stime, p->signal->stime); 1578 utime = cputime_add(utime, cputime.utime);
1579 stime = cputime_add(stime, cputime.stime);
1603 r->ru_nvcsw += p->signal->nvcsw; 1580 r->ru_nvcsw += p->signal->nvcsw;
1604 r->ru_nivcsw += p->signal->nivcsw; 1581 r->ru_nivcsw += p->signal->nivcsw;
1605 r->ru_minflt += p->signal->min_flt; 1582 r->ru_minflt += p->signal->min_flt;
@@ -1608,7 +1585,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1608 r->ru_oublock += p->signal->oublock; 1585 r->ru_oublock += p->signal->oublock;
1609 t = p; 1586 t = p;
1610 do { 1587 do {
1611 accumulate_thread_rusage(t, r, &utime, &stime); 1588 accumulate_thread_rusage(t, r);
1612 t = next_thread(t); 1589 t = next_thread(t);
1613 } while (t != p); 1590 } while (t != p);
1614 break; 1591 break;
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 093d4acf993b..9ed2eec97526 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -325,6 +325,9 @@ int clocksource_register(struct clocksource *c)
325 unsigned long flags; 325 unsigned long flags;
326 int ret; 326 int ret;
327 327
328 /* save mult_orig on registration */
329 c->mult_orig = c->mult;
330
328 spin_lock_irqsave(&clocksource_lock, flags); 331 spin_lock_irqsave(&clocksource_lock, flags);
329 ret = clocksource_enqueue(c); 332 ret = clocksource_enqueue(c);
330 if (!ret) 333 if (!ret)
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 4c256fdb8875..1ca99557e929 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -61,6 +61,7 @@ struct clocksource clocksource_jiffies = {
61 .read = jiffies_read, 61 .read = jiffies_read,
62 .mask = 0xffffffff, /*32bits*/ 62 .mask = 0xffffffff, /*32bits*/
63 .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ 63 .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
64 .mult_orig = NSEC_PER_JIFFY << JIFFIES_SHIFT,
64 .shift = JIFFIES_SHIFT, 65 .shift = JIFFIES_SHIFT,
65}; 66};
66 67
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 1ad46f3df6e7..1a20715bfd6e 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -10,13 +10,13 @@
10 10
11#include <linux/mm.h> 11#include <linux/mm.h>
12#include <linux/time.h> 12#include <linux/time.h>
13#include <linux/timer.h>
14#include <linux/timex.h> 13#include <linux/timex.h>
15#include <linux/jiffies.h> 14#include <linux/jiffies.h>
16#include <linux/hrtimer.h> 15#include <linux/hrtimer.h>
17#include <linux/capability.h> 16#include <linux/capability.h>
18#include <linux/math64.h> 17#include <linux/math64.h>
19#include <linux/clocksource.h> 18#include <linux/clocksource.h>
19#include <linux/workqueue.h>
20#include <asm/timex.h> 20#include <asm/timex.h>
21 21
22/* 22/*
@@ -218,11 +218,11 @@ void second_overflow(void)
218/* Disable the cmos update - used by virtualization and embedded */ 218/* Disable the cmos update - used by virtualization and embedded */
219int no_sync_cmos_clock __read_mostly; 219int no_sync_cmos_clock __read_mostly;
220 220
221static void sync_cmos_clock(unsigned long dummy); 221static void sync_cmos_clock(struct work_struct *work);
222 222
223static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); 223static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock);
224 224
225static void sync_cmos_clock(unsigned long dummy) 225static void sync_cmos_clock(struct work_struct *work)
226{ 226{
227 struct timespec now, next; 227 struct timespec now, next;
228 int fail = 1; 228 int fail = 1;
@@ -258,13 +258,13 @@ static void sync_cmos_clock(unsigned long dummy)
258 next.tv_sec++; 258 next.tv_sec++;
259 next.tv_nsec -= NSEC_PER_SEC; 259 next.tv_nsec -= NSEC_PER_SEC;
260 } 260 }
261 mod_timer(&sync_cmos_timer, jiffies + timespec_to_jiffies(&next)); 261 schedule_delayed_work(&sync_cmos_work, timespec_to_jiffies(&next));
262} 262}
263 263
264static void notify_cmos_timer(void) 264static void notify_cmos_timer(void)
265{ 265{
266 if (!no_sync_cmos_clock) 266 if (!no_sync_cmos_clock)
267 mod_timer(&sync_cmos_timer, jiffies + 1); 267 schedule_delayed_work(&sync_cmos_work, 0);
268} 268}
269 269
270#else 270#else
@@ -277,38 +277,50 @@ static inline void notify_cmos_timer(void) { }
277int do_adjtimex(struct timex *txc) 277int do_adjtimex(struct timex *txc)
278{ 278{
279 struct timespec ts; 279 struct timespec ts;
280 long save_adjust, sec;
281 int result; 280 int result;
282 281
283 /* In order to modify anything, you gotta be super-user! */ 282 /* Validate the data before disabling interrupts */
284 if (txc->modes && !capable(CAP_SYS_TIME)) 283 if (txc->modes & ADJ_ADJTIME) {
285 return -EPERM;
286
287 /* Now we validate the data before disabling interrupts */
288
289 if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) {
290 /* singleshot must not be used with any other mode bits */ 284 /* singleshot must not be used with any other mode bits */
291 if (txc->modes & ~ADJ_OFFSET_SS_READ) 285 if (!(txc->modes & ADJ_OFFSET_SINGLESHOT))
292 return -EINVAL; 286 return -EINVAL;
287 if (!(txc->modes & ADJ_OFFSET_READONLY) &&
288 !capable(CAP_SYS_TIME))
289 return -EPERM;
290 } else {
291 /* In order to modify anything, you gotta be super-user! */
292 if (txc->modes && !capable(CAP_SYS_TIME))
293 return -EPERM;
294
295 /* if the quartz is off by more than 10% something is VERY wrong! */
296 if (txc->modes & ADJ_TICK &&
297 (txc->tick < 900000/USER_HZ ||
298 txc->tick > 1100000/USER_HZ))
299 return -EINVAL;
300
301 if (txc->modes & ADJ_STATUS && time_state != TIME_OK)
302 hrtimer_cancel(&leap_timer);
293 } 303 }
294 304
295 /* if the quartz is off by more than 10% something is VERY wrong ! */
296 if (txc->modes & ADJ_TICK)
297 if (txc->tick < 900000/USER_HZ ||
298 txc->tick > 1100000/USER_HZ)
299 return -EINVAL;
300
301 if (time_state != TIME_OK && txc->modes & ADJ_STATUS)
302 hrtimer_cancel(&leap_timer);
303 getnstimeofday(&ts); 305 getnstimeofday(&ts);
304 306
305 write_seqlock_irq(&xtime_lock); 307 write_seqlock_irq(&xtime_lock);
306 308
307 /* Save for later - semantics of adjtime is to return old value */
308 save_adjust = time_adjust;
309
310 /* If there are input parameters, then process them */ 309 /* If there are input parameters, then process them */
310 if (txc->modes & ADJ_ADJTIME) {
311 long save_adjust = time_adjust;
312
313 if (!(txc->modes & ADJ_OFFSET_READONLY)) {
314 /* adjtime() is independent from ntp_adjtime() */
315 time_adjust = txc->offset;
316 ntp_update_frequency();
317 }
318 txc->offset = save_adjust;
319 goto adj_done;
320 }
311 if (txc->modes) { 321 if (txc->modes) {
322 long sec;
323
312 if (txc->modes & ADJ_STATUS) { 324 if (txc->modes & ADJ_STATUS) {
313 if ((time_status & STA_PLL) && 325 if ((time_status & STA_PLL) &&
314 !(txc->status & STA_PLL)) { 326 !(txc->status & STA_PLL)) {
@@ -375,13 +387,8 @@ int do_adjtimex(struct timex *txc)
375 if (txc->modes & ADJ_TAI && txc->constant > 0) 387 if (txc->modes & ADJ_TAI && txc->constant > 0)
376 time_tai = txc->constant; 388 time_tai = txc->constant;
377 389
378 if (txc->modes & ADJ_OFFSET) { 390 if (txc->modes & ADJ_OFFSET)
379 if (txc->modes == ADJ_OFFSET_SINGLESHOT) 391 ntp_update_offset(txc->offset);
380 /* adjtime() is independent from ntp_adjtime() */
381 time_adjust = txc->offset;
382 else
383 ntp_update_offset(txc->offset);
384 }
385 if (txc->modes & ADJ_TICK) 392 if (txc->modes & ADJ_TICK)
386 tick_usec = txc->tick; 393 tick_usec = txc->tick;
387 394
@@ -389,22 +396,18 @@ int do_adjtimex(struct timex *txc)
389 ntp_update_frequency(); 396 ntp_update_frequency();
390 } 397 }
391 398
399 txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
400 NTP_SCALE_SHIFT);
401 if (!(time_status & STA_NANO))
402 txc->offset /= NSEC_PER_USEC;
403
404adj_done:
392 result = time_state; /* mostly `TIME_OK' */ 405 result = time_state; /* mostly `TIME_OK' */
393 if (time_status & (STA_UNSYNC|STA_CLOCKERR)) 406 if (time_status & (STA_UNSYNC|STA_CLOCKERR))
394 result = TIME_ERROR; 407 result = TIME_ERROR;
395 408
396 if ((txc->modes == ADJ_OFFSET_SINGLESHOT) || 409 txc->freq = shift_right((time_freq >> PPM_SCALE_INV_SHIFT) *
397 (txc->modes == ADJ_OFFSET_SS_READ)) 410 (s64)PPM_SCALE_INV, NTP_SCALE_SHIFT);
398 txc->offset = save_adjust;
399 else {
400 txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
401 NTP_SCALE_SHIFT);
402 if (!(time_status & STA_NANO))
403 txc->offset /= NSEC_PER_USEC;
404 }
405 txc->freq = shift_right((s32)(time_freq >> PPM_SCALE_INV_SHIFT) *
406 (s64)PPM_SCALE_INV,
407 NTP_SCALE_SHIFT);
408 txc->maxerror = time_maxerror; 411 txc->maxerror = time_maxerror;
409 txc->esterror = time_esterror; 412 txc->esterror = time_esterror;
410 txc->status = time_status; 413 txc->status = time_status;
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index cb01cd8f919b..f98a1b7b16e9 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -384,6 +384,19 @@ int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
384} 384}
385 385
386/* 386/*
387 * Called from irq_enter() when idle was interrupted to reenable the
388 * per cpu device.
389 */
390void tick_check_oneshot_broadcast(int cpu)
391{
392 if (cpu_isset(cpu, tick_broadcast_oneshot_mask)) {
393 struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
394
395 clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT);
396 }
397}
398
399/*
387 * Handle oneshot mode broadcasting 400 * Handle oneshot mode broadcasting
388 */ 401 */
389static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) 402static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 469248782c23..b1c05bf75ee0 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -36,6 +36,7 @@ extern void tick_broadcast_switch_to_oneshot(void);
36extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); 36extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
37extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); 37extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);
38extern int tick_broadcast_oneshot_active(void); 38extern int tick_broadcast_oneshot_active(void);
39extern void tick_check_oneshot_broadcast(int cpu);
39# else /* BROADCAST */ 40# else /* BROADCAST */
40static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) 41static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
41{ 42{
@@ -45,6 +46,7 @@ static inline void tick_broadcast_oneshot_control(unsigned long reason) { }
45static inline void tick_broadcast_switch_to_oneshot(void) { } 46static inline void tick_broadcast_switch_to_oneshot(void) { }
46static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } 47static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
47static inline int tick_broadcast_oneshot_active(void) { return 0; } 48static inline int tick_broadcast_oneshot_active(void) { return 0; }
49static inline void tick_check_oneshot_broadcast(int cpu) { }
48# endif /* !BROADCAST */ 50# endif /* !BROADCAST */
49 51
50#else /* !ONESHOT */ 52#else /* !ONESHOT */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index b711ffcb106c..0581c11fe6c6 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -155,7 +155,7 @@ void tick_nohz_update_jiffies(void)
155 touch_softlockup_watchdog(); 155 touch_softlockup_watchdog();
156} 156}
157 157
158void tick_nohz_stop_idle(int cpu) 158static void tick_nohz_stop_idle(int cpu)
159{ 159{
160 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 160 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
161 161
@@ -377,6 +377,32 @@ ktime_t tick_nohz_get_sleep_length(void)
377 return ts->sleep_length; 377 return ts->sleep_length;
378} 378}
379 379
380static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
381{
382 hrtimer_cancel(&ts->sched_timer);
383 ts->sched_timer.expires = ts->idle_tick;
384
385 while (1) {
386 /* Forward the time to expire in the future */
387 hrtimer_forward(&ts->sched_timer, now, tick_period);
388
389 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
390 hrtimer_start(&ts->sched_timer,
391 ts->sched_timer.expires,
392 HRTIMER_MODE_ABS);
393 /* Check, if the timer was already in the past */
394 if (hrtimer_active(&ts->sched_timer))
395 break;
396 } else {
397 if (!tick_program_event(ts->sched_timer.expires, 0))
398 break;
399 }
400 /* Update jiffies and reread time */
401 tick_do_update_jiffies64(now);
402 now = ktime_get();
403 }
404}
405
380/** 406/**
381 * tick_nohz_restart_sched_tick - restart the idle tick from the idle task 407 * tick_nohz_restart_sched_tick - restart the idle tick from the idle task
382 * 408 *
@@ -430,28 +456,7 @@ void tick_nohz_restart_sched_tick(void)
430 */ 456 */
431 ts->tick_stopped = 0; 457 ts->tick_stopped = 0;
432 ts->idle_exittime = now; 458 ts->idle_exittime = now;
433 hrtimer_cancel(&ts->sched_timer); 459 tick_nohz_restart(ts, now);
434 ts->sched_timer.expires = ts->idle_tick;
435
436 while (1) {
437 /* Forward the time to expire in the future */
438 hrtimer_forward(&ts->sched_timer, now, tick_period);
439
440 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
441 hrtimer_start(&ts->sched_timer,
442 ts->sched_timer.expires,
443 HRTIMER_MODE_ABS);
444 /* Check, if the timer was already in the past */
445 if (hrtimer_active(&ts->sched_timer))
446 break;
447 } else {
448 if (!tick_program_event(ts->sched_timer.expires, 0))
449 break;
450 }
451 /* Update jiffies and reread time */
452 tick_do_update_jiffies64(now);
453 now = ktime_get();
454 }
455 local_irq_enable(); 460 local_irq_enable();
456} 461}
457 462
@@ -503,10 +508,6 @@ static void tick_nohz_handler(struct clock_event_device *dev)
503 update_process_times(user_mode(regs)); 508 update_process_times(user_mode(regs));
504 profile_tick(CPU_PROFILING); 509 profile_tick(CPU_PROFILING);
505 510
506 /* Do not restart, when we are in the idle loop */
507 if (ts->tick_stopped)
508 return;
509
510 while (tick_nohz_reprogram(ts, now)) { 511 while (tick_nohz_reprogram(ts, now)) {
511 now = ktime_get(); 512 now = ktime_get();
512 tick_do_update_jiffies64(now); 513 tick_do_update_jiffies64(now);
@@ -552,6 +553,27 @@ static void tick_nohz_switch_to_nohz(void)
552 smp_processor_id()); 553 smp_processor_id());
553} 554}
554 555
556/*
557 * When NOHZ is enabled and the tick is stopped, we need to kick the
558 * tick timer from irq_enter() so that the jiffies update is kept
559 * alive during long running softirqs. That's ugly as hell, but
560 * correctness is key even if we need to fix the offending softirq in
561 * the first place.
562 *
563 * Note, this is different to tick_nohz_restart. We just kick the
564 * timer and do not touch the other magic bits which need to be done
565 * when idle is left.
566 */
567static void tick_nohz_kick_tick(int cpu)
568{
569 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
570
571 if (!ts->tick_stopped)
572 return;
573
574 tick_nohz_restart(ts, ktime_get());
575}
576
555#else 577#else
556 578
557static inline void tick_nohz_switch_to_nohz(void) { } 579static inline void tick_nohz_switch_to_nohz(void) { }
@@ -559,6 +581,19 @@ static inline void tick_nohz_switch_to_nohz(void) { }
559#endif /* NO_HZ */ 581#endif /* NO_HZ */
560 582
561/* 583/*
584 * Called from irq_enter to notify about the possible interruption of idle()
585 */
586void tick_check_idle(int cpu)
587{
588 tick_check_oneshot_broadcast(cpu);
589#ifdef CONFIG_NO_HZ
590 tick_nohz_stop_idle(cpu);
591 tick_nohz_update_jiffies();
592 tick_nohz_kick_tick(cpu);
593#endif
594}
595
596/*
562 * High resolution timer specific code 597 * High resolution timer specific code
563 */ 598 */
564#ifdef CONFIG_HIGH_RES_TIMERS 599#ifdef CONFIG_HIGH_RES_TIMERS
@@ -611,10 +646,6 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
611 profile_tick(CPU_PROFILING); 646 profile_tick(CPU_PROFILING);
612 } 647 }
613 648
614 /* Do not restart, when we are in the idle loop */
615 if (ts->tick_stopped)
616 return HRTIMER_NORESTART;
617
618 hrtimer_forward(timer, now, tick_period); 649 hrtimer_forward(timer, now, tick_period);
619 650
620 return HRTIMER_RESTART; 651 return HRTIMER_RESTART;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index e91c29f961c9..e7acfb482a68 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -58,27 +58,26 @@ struct clocksource *clock;
58 58
59#ifdef CONFIG_GENERIC_TIME 59#ifdef CONFIG_GENERIC_TIME
60/** 60/**
61 * __get_nsec_offset - Returns nanoseconds since last call to periodic_hook 61 * clocksource_forward_now - update clock to the current time
62 * 62 *
63 * private function, must hold xtime_lock lock when being 63 * Forward the current clock to update its state since the last call to
64 * called. Returns the number of nanoseconds since the 64 * update_wall_time(). This is useful before significant clock changes,
65 * last call to update_wall_time() (adjusted by NTP scaling) 65 * as it avoids having to deal with this time offset explicitly.
66 */ 66 */
67static inline s64 __get_nsec_offset(void) 67static void clocksource_forward_now(void)
68{ 68{
69 cycle_t cycle_now, cycle_delta; 69 cycle_t cycle_now, cycle_delta;
70 s64 ns_offset; 70 s64 nsec;
71 71
72 /* read clocksource: */
73 cycle_now = clocksource_read(clock); 72 cycle_now = clocksource_read(clock);
74
75 /* calculate the delta since the last update_wall_time: */
76 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; 73 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
74 clock->cycle_last = cycle_now;
77 75
78 /* convert to nanoseconds: */ 76 nsec = cyc2ns(clock, cycle_delta);
79 ns_offset = cyc2ns(clock, cycle_delta); 77 timespec_add_ns(&xtime, nsec);
80 78
81 return ns_offset; 79 nsec = ((s64)cycle_delta * clock->mult_orig) >> clock->shift;
80 clock->raw_time.tv_nsec += nsec;
82} 81}
83 82
84/** 83/**
@@ -89,6 +88,7 @@ static inline s64 __get_nsec_offset(void)
89 */ 88 */
90void getnstimeofday(struct timespec *ts) 89void getnstimeofday(struct timespec *ts)
91{ 90{
91 cycle_t cycle_now, cycle_delta;
92 unsigned long seq; 92 unsigned long seq;
93 s64 nsecs; 93 s64 nsecs;
94 94
@@ -96,7 +96,15 @@ void getnstimeofday(struct timespec *ts)
96 seq = read_seqbegin(&xtime_lock); 96 seq = read_seqbegin(&xtime_lock);
97 97
98 *ts = xtime; 98 *ts = xtime;
99 nsecs = __get_nsec_offset(); 99
100 /* read clocksource: */
101 cycle_now = clocksource_read(clock);
102
103 /* calculate the delta since the last update_wall_time: */
104 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
105
106 /* convert to nanoseconds: */
107 nsecs = cyc2ns(clock, cycle_delta);
100 108
101 } while (read_seqretry(&xtime_lock, seq)); 109 } while (read_seqretry(&xtime_lock, seq));
102 110
@@ -129,22 +137,22 @@ EXPORT_SYMBOL(do_gettimeofday);
129 */ 137 */
130int do_settimeofday(struct timespec *tv) 138int do_settimeofday(struct timespec *tv)
131{ 139{
140 struct timespec ts_delta;
132 unsigned long flags; 141 unsigned long flags;
133 time_t wtm_sec, sec = tv->tv_sec;
134 long wtm_nsec, nsec = tv->tv_nsec;
135 142
136 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) 143 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
137 return -EINVAL; 144 return -EINVAL;
138 145
139 write_seqlock_irqsave(&xtime_lock, flags); 146 write_seqlock_irqsave(&xtime_lock, flags);
140 147
141 nsec -= __get_nsec_offset(); 148 clocksource_forward_now();
149
150 ts_delta.tv_sec = tv->tv_sec - xtime.tv_sec;
151 ts_delta.tv_nsec = tv->tv_nsec - xtime.tv_nsec;
152 wall_to_monotonic = timespec_sub(wall_to_monotonic, ts_delta);
142 153
143 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); 154 xtime = *tv;
144 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
145 155
146 set_normalized_timespec(&xtime, sec, nsec);
147 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
148 update_xtime_cache(0); 156 update_xtime_cache(0);
149 157
150 clock->error = 0; 158 clock->error = 0;
@@ -170,22 +178,19 @@ EXPORT_SYMBOL(do_settimeofday);
170static void change_clocksource(void) 178static void change_clocksource(void)
171{ 179{
172 struct clocksource *new; 180 struct clocksource *new;
173 cycle_t now;
174 u64 nsec;
175 181
176 new = clocksource_get_next(); 182 new = clocksource_get_next();
177 183
178 if (clock == new) 184 if (clock == new)
179 return; 185 return;
180 186
181 new->cycle_last = 0; 187 clocksource_forward_now();
182 now = clocksource_read(new);
183 nsec = __get_nsec_offset();
184 timespec_add_ns(&xtime, nsec);
185 188
186 clock = new; 189 new->raw_time = clock->raw_time;
187 clock->cycle_last = now;
188 190
191 clock = new;
192 clock->cycle_last = 0;
193 clock->cycle_last = clocksource_read(new);
189 clock->error = 0; 194 clock->error = 0;
190 clock->xtime_nsec = 0; 195 clock->xtime_nsec = 0;
191 clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); 196 clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
@@ -200,11 +205,44 @@ static void change_clocksource(void)
200 */ 205 */
201} 206}
202#else 207#else
208static inline void clocksource_forward_now(void) { }
203static inline void change_clocksource(void) { } 209static inline void change_clocksource(void) { }
204static inline s64 __get_nsec_offset(void) { return 0; }
205#endif 210#endif
206 211
207/** 212/**
213 * getrawmonotonic - Returns the raw monotonic time in a timespec
214 * @ts: pointer to the timespec to be set
215 *
216 * Returns the raw monotonic time (completely un-modified by ntp)
217 */
218void getrawmonotonic(struct timespec *ts)
219{
220 unsigned long seq;
221 s64 nsecs;
222 cycle_t cycle_now, cycle_delta;
223
224 do {
225 seq = read_seqbegin(&xtime_lock);
226
227 /* read clocksource: */
228 cycle_now = clocksource_read(clock);
229
230 /* calculate the delta since the last update_wall_time: */
231 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
232
233 /* convert to nanoseconds: */
234 nsecs = ((s64)cycle_delta * clock->mult_orig) >> clock->shift;
235
236 *ts = clock->raw_time;
237
238 } while (read_seqretry(&xtime_lock, seq));
239
240 timespec_add_ns(ts, nsecs);
241}
242EXPORT_SYMBOL(getrawmonotonic);
243
244
245/**
208 * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres 246 * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres
209 */ 247 */
210int timekeeping_valid_for_hres(void) 248int timekeeping_valid_for_hres(void)
@@ -265,8 +303,6 @@ void __init timekeeping_init(void)
265static int timekeeping_suspended; 303static int timekeeping_suspended;
266/* time in seconds when suspend began */ 304/* time in seconds when suspend began */
267static unsigned long timekeeping_suspend_time; 305static unsigned long timekeeping_suspend_time;
268/* xtime offset when we went into suspend */
269static s64 timekeeping_suspend_nsecs;
270 306
271/** 307/**
272 * timekeeping_resume - Resumes the generic timekeeping subsystem. 308 * timekeeping_resume - Resumes the generic timekeeping subsystem.
@@ -292,8 +328,6 @@ static int timekeeping_resume(struct sys_device *dev)
292 wall_to_monotonic.tv_sec -= sleep_length; 328 wall_to_monotonic.tv_sec -= sleep_length;
293 total_sleep_time += sleep_length; 329 total_sleep_time += sleep_length;
294 } 330 }
295 /* Make sure that we have the correct xtime reference */
296 timespec_add_ns(&xtime, timekeeping_suspend_nsecs);
297 update_xtime_cache(0); 331 update_xtime_cache(0);
298 /* re-base the last cycle value */ 332 /* re-base the last cycle value */
299 clock->cycle_last = 0; 333 clock->cycle_last = 0;
@@ -319,8 +353,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
319 timekeeping_suspend_time = read_persistent_clock(); 353 timekeeping_suspend_time = read_persistent_clock();
320 354
321 write_seqlock_irqsave(&xtime_lock, flags); 355 write_seqlock_irqsave(&xtime_lock, flags);
322 /* Get the current xtime offset */ 356 clocksource_forward_now();
323 timekeeping_suspend_nsecs = __get_nsec_offset();
324 timekeeping_suspended = 1; 357 timekeeping_suspended = 1;
325 write_sequnlock_irqrestore(&xtime_lock, flags); 358 write_sequnlock_irqrestore(&xtime_lock, flags);
326 359
@@ -454,23 +487,29 @@ void update_wall_time(void)
454#else 487#else
455 offset = clock->cycle_interval; 488 offset = clock->cycle_interval;
456#endif 489#endif
457 clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift; 490 clock->xtime_nsec = (s64)xtime.tv_nsec << clock->shift;
458 491
459 /* normally this loop will run just once, however in the 492 /* normally this loop will run just once, however in the
460 * case of lost or late ticks, it will accumulate correctly. 493 * case of lost or late ticks, it will accumulate correctly.
461 */ 494 */
462 while (offset >= clock->cycle_interval) { 495 while (offset >= clock->cycle_interval) {
463 /* accumulate one interval */ 496 /* accumulate one interval */
464 clock->xtime_nsec += clock->xtime_interval;
465 clock->cycle_last += clock->cycle_interval;
466 offset -= clock->cycle_interval; 497 offset -= clock->cycle_interval;
498 clock->cycle_last += clock->cycle_interval;
467 499
500 clock->xtime_nsec += clock->xtime_interval;
468 if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) { 501 if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) {
469 clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift; 502 clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift;
470 xtime.tv_sec++; 503 xtime.tv_sec++;
471 second_overflow(); 504 second_overflow();
472 } 505 }
473 506
507 clock->raw_time.tv_nsec += clock->raw_interval;
508 if (clock->raw_time.tv_nsec >= NSEC_PER_SEC) {
509 clock->raw_time.tv_nsec -= NSEC_PER_SEC;
510 clock->raw_time.tv_sec++;
511 }
512
474 /* accumulate error between NTP and clock interval */ 513 /* accumulate error between NTP and clock interval */
475 clock->error += tick_length; 514 clock->error += tick_length;
476 clock->error -= clock->xtime_interval << (NTP_SCALE_SHIFT - clock->shift); 515 clock->error -= clock->xtime_interval << (NTP_SCALE_SHIFT - clock->shift);
@@ -479,9 +518,12 @@ void update_wall_time(void)
479 /* correct the clock when NTP error is too big */ 518 /* correct the clock when NTP error is too big */
480 clocksource_adjust(offset); 519 clocksource_adjust(offset);
481 520
482 /* store full nanoseconds into xtime */ 521 /* store full nanoseconds into xtime after rounding it up and
483 xtime.tv_nsec = (s64)clock->xtime_nsec >> clock->shift; 522 * add the remainder to the error difference.
523 */
524 xtime.tv_nsec = ((s64)clock->xtime_nsec >> clock->shift) + 1;
484 clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; 525 clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift;
526 clock->error += clock->xtime_nsec << (NTP_SCALE_SHIFT - clock->shift);
485 527
486 update_xtime_cache(cyc2ns(clock, offset)); 528 update_xtime_cache(cyc2ns(clock, offset));
487 529
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index a40e20fd0001..f6426911e35a 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -47,13 +47,14 @@ static void print_name_offset(struct seq_file *m, void *sym)
47} 47}
48 48
49static void 49static void
50print_timer(struct seq_file *m, struct hrtimer *timer, int idx, u64 now) 50print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer,
51 int idx, u64 now)
51{ 52{
52#ifdef CONFIG_TIMER_STATS 53#ifdef CONFIG_TIMER_STATS
53 char tmp[TASK_COMM_LEN + 1]; 54 char tmp[TASK_COMM_LEN + 1];
54#endif 55#endif
55 SEQ_printf(m, " #%d: ", idx); 56 SEQ_printf(m, " #%d: ", idx);
56 print_name_offset(m, timer); 57 print_name_offset(m, taddr);
57 SEQ_printf(m, ", "); 58 SEQ_printf(m, ", ");
58 print_name_offset(m, timer->function); 59 print_name_offset(m, timer->function);
59 SEQ_printf(m, ", S:%02lx", timer->state); 60 SEQ_printf(m, ", S:%02lx", timer->state);
@@ -99,7 +100,7 @@ next_one:
99 tmp = *timer; 100 tmp = *timer;
100 spin_unlock_irqrestore(&base->cpu_base->lock, flags); 101 spin_unlock_irqrestore(&base->cpu_base->lock, flags);
101 102
102 print_timer(m, &tmp, i, now); 103 print_timer(m, timer, &tmp, i, now);
103 next++; 104 next++;
104 goto next_one; 105 goto next_one;
105 } 106 }
@@ -109,6 +110,7 @@ next_one:
109static void 110static void
110print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now) 111print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now)
111{ 112{
113 SEQ_printf(m, " .base: %p\n", base);
112 SEQ_printf(m, " .index: %d\n", 114 SEQ_printf(m, " .index: %d\n",
113 base->index); 115 base->index);
114 SEQ_printf(m, " .resolution: %Lu nsecs\n", 116 SEQ_printf(m, " .resolution: %Lu nsecs\n",
@@ -183,12 +185,16 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
183 185
184#ifdef CONFIG_GENERIC_CLOCKEVENTS 186#ifdef CONFIG_GENERIC_CLOCKEVENTS
185static void 187static void
186print_tickdevice(struct seq_file *m, struct tick_device *td) 188print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
187{ 189{
188 struct clock_event_device *dev = td->evtdev; 190 struct clock_event_device *dev = td->evtdev;
189 191
190 SEQ_printf(m, "\n"); 192 SEQ_printf(m, "\n");
191 SEQ_printf(m, "Tick Device: mode: %d\n", td->mode); 193 SEQ_printf(m, "Tick Device: mode: %d\n", td->mode);
194 if (cpu < 0)
195 SEQ_printf(m, "Broadcast device\n");
196 else
197 SEQ_printf(m, "Per CPU device: %d\n", cpu);
192 198
193 SEQ_printf(m, "Clock Event Device: "); 199 SEQ_printf(m, "Clock Event Device: ");
194 if (!dev) { 200 if (!dev) {
@@ -222,7 +228,7 @@ static void timer_list_show_tickdevices(struct seq_file *m)
222 int cpu; 228 int cpu;
223 229
224#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST 230#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
225 print_tickdevice(m, tick_get_broadcast_device()); 231 print_tickdevice(m, tick_get_broadcast_device(), -1);
226 SEQ_printf(m, "tick_broadcast_mask: %08lx\n", 232 SEQ_printf(m, "tick_broadcast_mask: %08lx\n",
227 tick_get_broadcast_mask()->bits[0]); 233 tick_get_broadcast_mask()->bits[0]);
228#ifdef CONFIG_TICK_ONESHOT 234#ifdef CONFIG_TICK_ONESHOT
@@ -232,7 +238,7 @@ static void timer_list_show_tickdevices(struct seq_file *m)
232 SEQ_printf(m, "\n"); 238 SEQ_printf(m, "\n");
233#endif 239#endif
234 for_each_online_cpu(cpu) 240 for_each_online_cpu(cpu)
235 print_tickdevice(m, tick_get_device(cpu)); 241 print_tickdevice(m, tick_get_device(cpu), cpu);
236 SEQ_printf(m, "\n"); 242 SEQ_printf(m, "\n");
237} 243}
238#else 244#else
@@ -244,7 +250,7 @@ static int timer_list_show(struct seq_file *m, void *v)
244 u64 now = ktime_to_ns(ktime_get()); 250 u64 now = ktime_to_ns(ktime_get());
245 int cpu; 251 int cpu;
246 252
247 SEQ_printf(m, "Timer List Version: v0.3\n"); 253 SEQ_printf(m, "Timer List Version: v0.4\n");
248 SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); 254 SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
249 SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); 255 SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
250 256
diff --git a/kernel/timer.c b/kernel/timer.c
index 510fe69351ca..56becf373c58 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1436,9 +1436,11 @@ static void __cpuinit migrate_timers(int cpu)
1436 BUG_ON(cpu_online(cpu)); 1436 BUG_ON(cpu_online(cpu));
1437 old_base = per_cpu(tvec_bases, cpu); 1437 old_base = per_cpu(tvec_bases, cpu);
1438 new_base = get_cpu_var(tvec_bases); 1438 new_base = get_cpu_var(tvec_bases);
1439 1439 /*
1440 local_irq_disable(); 1440 * The caller is globally serialized and nobody else
1441 spin_lock(&new_base->lock); 1441 * takes two locks at once, deadlock is not possible.
1442 */
1443 spin_lock_irq(&new_base->lock);
1442 spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); 1444 spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
1443 1445
1444 BUG_ON(old_base->running_timer); 1446 BUG_ON(old_base->running_timer);
@@ -1453,8 +1455,7 @@ static void __cpuinit migrate_timers(int cpu)
1453 } 1455 }
1454 1456
1455 spin_unlock(&old_base->lock); 1457 spin_unlock(&old_base->lock);
1456 spin_unlock(&new_base->lock); 1458 spin_unlock_irq(&new_base->lock);
1457 local_irq_enable();
1458 put_cpu_var(tvec_bases); 1459 put_cpu_var(tvec_bases);
1459} 1460}
1460#endif /* CONFIG_HOTPLUG_CPU */ 1461#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 263e9e6bbd60..1cb3e1f616af 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1,23 +1,37 @@
1# 1#
2# Architectures that offer an FTRACE implementation should select HAVE_FTRACE: 2# Architectures that offer an FTRACE implementation should select HAVE_FTRACE:
3# 3#
4
5config NOP_TRACER
6 bool
7
4config HAVE_FTRACE 8config HAVE_FTRACE
5 bool 9 bool
10 select NOP_TRACER
6 11
7config HAVE_DYNAMIC_FTRACE 12config HAVE_DYNAMIC_FTRACE
8 bool 13 bool
9 14
15config HAVE_FTRACE_MCOUNT_RECORD
16 bool
17
10config TRACER_MAX_TRACE 18config TRACER_MAX_TRACE
11 bool 19 bool
12 20
21config RING_BUFFER
22 bool
23
13config TRACING 24config TRACING
14 bool 25 bool
15 select DEBUG_FS 26 select DEBUG_FS
27 select RING_BUFFER
16 select STACKTRACE 28 select STACKTRACE
29 select TRACEPOINTS
17 30
18config FTRACE 31config FTRACE
19 bool "Kernel Function Tracer" 32 bool "Kernel Function Tracer"
20 depends on HAVE_FTRACE 33 depends on HAVE_FTRACE
34 depends on DEBUG_KERNEL
21 select FRAME_POINTER 35 select FRAME_POINTER
22 select TRACING 36 select TRACING
23 select CONTEXT_SWITCH_TRACER 37 select CONTEXT_SWITCH_TRACER
@@ -36,6 +50,7 @@ config IRQSOFF_TRACER
36 depends on TRACE_IRQFLAGS_SUPPORT 50 depends on TRACE_IRQFLAGS_SUPPORT
37 depends on GENERIC_TIME 51 depends on GENERIC_TIME
38 depends on HAVE_FTRACE 52 depends on HAVE_FTRACE
53 depends on DEBUG_KERNEL
39 select TRACE_IRQFLAGS 54 select TRACE_IRQFLAGS
40 select TRACING 55 select TRACING
41 select TRACER_MAX_TRACE 56 select TRACER_MAX_TRACE
@@ -59,6 +74,7 @@ config PREEMPT_TRACER
59 depends on GENERIC_TIME 74 depends on GENERIC_TIME
60 depends on PREEMPT 75 depends on PREEMPT
61 depends on HAVE_FTRACE 76 depends on HAVE_FTRACE
77 depends on DEBUG_KERNEL
62 select TRACING 78 select TRACING
63 select TRACER_MAX_TRACE 79 select TRACER_MAX_TRACE
64 help 80 help
@@ -86,6 +102,7 @@ config SYSPROF_TRACER
86config SCHED_TRACER 102config SCHED_TRACER
87 bool "Scheduling Latency Tracer" 103 bool "Scheduling Latency Tracer"
88 depends on HAVE_FTRACE 104 depends on HAVE_FTRACE
105 depends on DEBUG_KERNEL
89 select TRACING 106 select TRACING
90 select CONTEXT_SWITCH_TRACER 107 select CONTEXT_SWITCH_TRACER
91 select TRACER_MAX_TRACE 108 select TRACER_MAX_TRACE
@@ -96,16 +113,56 @@ config SCHED_TRACER
96config CONTEXT_SWITCH_TRACER 113config CONTEXT_SWITCH_TRACER
97 bool "Trace process context switches" 114 bool "Trace process context switches"
98 depends on HAVE_FTRACE 115 depends on HAVE_FTRACE
116 depends on DEBUG_KERNEL
99 select TRACING 117 select TRACING
100 select MARKERS 118 select MARKERS
101 help 119 help
102 This tracer gets called from the context switch and records 120 This tracer gets called from the context switch and records
103 all switching of tasks. 121 all switching of tasks.
104 122
123config BOOT_TRACER
124 bool "Trace boot initcalls"
125 depends on HAVE_FTRACE
126 depends on DEBUG_KERNEL
127 select TRACING
128 help
129 This tracer helps developers to optimize boot times: it records
130 the timings of the initcalls and traces key events and the identity
131 of tasks that can cause boot delays, such as context-switches.
132
133 Its aim is to be parsed by the /scripts/bootgraph.pl tool to
134 produce pretty graphics about boot inefficiencies, giving a visual
135 representation of the delays during initcalls - but the raw
136 /debug/tracing/trace text output is readable too.
137
138 ( Note that tracing self tests can't be enabled if this tracer is
139 selected, because the self-tests are an initcall as well and that
140 would invalidate the boot trace. )
141
142config STACK_TRACER
143 bool "Trace max stack"
144 depends on HAVE_FTRACE
145 depends on DEBUG_KERNEL
146 select FTRACE
147 select STACKTRACE
148 help
149 This special tracer records the maximum stack footprint of the
150 kernel and displays it in debugfs/tracing/stack_trace.
151
152 This tracer works by hooking into every function call that the
153 kernel executes, and keeping a maximum stack depth value and
154 stack-trace saved. Because this logic has to execute in every
155 kernel function, all the time, this option can slow down the
156 kernel measurably and is generally intended for kernel
157 developers only.
158
159 Say N if unsure.
160
105config DYNAMIC_FTRACE 161config DYNAMIC_FTRACE
106 bool "enable/disable ftrace tracepoints dynamically" 162 bool "enable/disable ftrace tracepoints dynamically"
107 depends on FTRACE 163 depends on FTRACE
108 depends on HAVE_DYNAMIC_FTRACE 164 depends on HAVE_DYNAMIC_FTRACE
165 depends on DEBUG_KERNEL
109 default y 166 default y
110 help 167 help
111 This option will modify all the calls to ftrace dynamically 168 This option will modify all the calls to ftrace dynamically
@@ -121,12 +178,17 @@ config DYNAMIC_FTRACE
121 were made. If so, it runs stop_machine (stops all CPUS) 178 were made. If so, it runs stop_machine (stops all CPUS)
122 and modifies the code to jump over the call to ftrace. 179 and modifies the code to jump over the call to ftrace.
123 180
181config FTRACE_MCOUNT_RECORD
182 def_bool y
183 depends on DYNAMIC_FTRACE
184 depends on HAVE_FTRACE_MCOUNT_RECORD
185
124config FTRACE_SELFTEST 186config FTRACE_SELFTEST
125 bool 187 bool
126 188
127config FTRACE_STARTUP_TEST 189config FTRACE_STARTUP_TEST
128 bool "Perform a startup test on ftrace" 190 bool "Perform a startup test on ftrace"
129 depends on TRACING 191 depends on TRACING && DEBUG_KERNEL && !BOOT_TRACER
130 select FTRACE_SELFTEST 192 select FTRACE_SELFTEST
131 help 193 help
132 This option performs a series of startup tests on ftrace. On bootup 194 This option performs a series of startup tests on ftrace. On bootup
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 71d17de17288..a85dfba88ba0 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -11,6 +11,7 @@ obj-y += trace_selftest_dynamic.o
11endif 11endif
12 12
13obj-$(CONFIG_FTRACE) += libftrace.o 13obj-$(CONFIG_FTRACE) += libftrace.o
14obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
14 15
15obj-$(CONFIG_TRACING) += trace.o 16obj-$(CONFIG_TRACING) += trace.o
16obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o 17obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
@@ -19,6 +20,9 @@ obj-$(CONFIG_FTRACE) += trace_functions.o
19obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o 20obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
20obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o 21obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
21obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o 22obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
23obj-$(CONFIG_NOP_TRACER) += trace_nop.o
24obj-$(CONFIG_STACK_TRACER) += trace_stack.o
22obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o 25obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
26obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
23 27
24libftrace-y := ftrace.o 28libftrace-y := ftrace.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f6e3af31b403..4dda4f60a2a9 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -81,7 +81,7 @@ void clear_ftrace_function(void)
81 81
82static int __register_ftrace_function(struct ftrace_ops *ops) 82static int __register_ftrace_function(struct ftrace_ops *ops)
83{ 83{
84 /* Should never be called by interrupts */ 84 /* should not be called from interrupt context */
85 spin_lock(&ftrace_lock); 85 spin_lock(&ftrace_lock);
86 86
87 ops->next = ftrace_list; 87 ops->next = ftrace_list;
@@ -115,6 +115,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
115 struct ftrace_ops **p; 115 struct ftrace_ops **p;
116 int ret = 0; 116 int ret = 0;
117 117
118 /* should not be called from interrupt context */
118 spin_lock(&ftrace_lock); 119 spin_lock(&ftrace_lock);
119 120
120 /* 121 /*
@@ -153,6 +154,30 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
153 154
154#ifdef CONFIG_DYNAMIC_FTRACE 155#ifdef CONFIG_DYNAMIC_FTRACE
155 156
157#ifndef CONFIG_FTRACE_MCOUNT_RECORD
158/*
159 * The hash lock is only needed when the recording of the mcount
160 * callers are dynamic. That is, by the caller themselves and
161 * not recorded via the compilation.
162 */
163static DEFINE_SPINLOCK(ftrace_hash_lock);
164#define ftrace_hash_lock(flags) spin_lock_irqsave(&ftrace_hash_lock, flags)
165#define ftrace_hash_unlock(flags) \
166 spin_unlock_irqrestore(&ftrace_hash_lock, flags)
167#else
168/* This is protected via the ftrace_lock with MCOUNT_RECORD. */
169#define ftrace_hash_lock(flags) do { (void)(flags); } while (0)
170#define ftrace_hash_unlock(flags) do { } while(0)
171#endif
172
173/*
174 * Since MCOUNT_ADDR may point to mcount itself, we do not want
175 * to get it confused by reading a reference in the code as we
176 * are parsing on objcopy output of text. Use a variable for
177 * it instead.
178 */
179static unsigned long mcount_addr = MCOUNT_ADDR;
180
156static struct task_struct *ftraced_task; 181static struct task_struct *ftraced_task;
157 182
158enum { 183enum {
@@ -171,7 +196,6 @@ static struct hlist_head ftrace_hash[FTRACE_HASHSIZE];
171 196
172static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu); 197static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu);
173 198
174static DEFINE_SPINLOCK(ftrace_shutdown_lock);
175static DEFINE_MUTEX(ftraced_lock); 199static DEFINE_MUTEX(ftraced_lock);
176static DEFINE_MUTEX(ftrace_regex_lock); 200static DEFINE_MUTEX(ftrace_regex_lock);
177 201
@@ -294,13 +318,37 @@ static inline void ftrace_del_hash(struct dyn_ftrace *node)
294 318
295static void ftrace_free_rec(struct dyn_ftrace *rec) 319static void ftrace_free_rec(struct dyn_ftrace *rec)
296{ 320{
297 /* no locking, only called from kstop_machine */
298
299 rec->ip = (unsigned long)ftrace_free_records; 321 rec->ip = (unsigned long)ftrace_free_records;
300 ftrace_free_records = rec; 322 ftrace_free_records = rec;
301 rec->flags |= FTRACE_FL_FREE; 323 rec->flags |= FTRACE_FL_FREE;
302} 324}
303 325
326void ftrace_release(void *start, unsigned long size)
327{
328 struct dyn_ftrace *rec;
329 struct ftrace_page *pg;
330 unsigned long s = (unsigned long)start;
331 unsigned long e = s + size;
332 int i;
333
334 if (ftrace_disabled || !start)
335 return;
336
337 /* should not be called from interrupt context */
338 spin_lock(&ftrace_lock);
339
340 for (pg = ftrace_pages_start; pg; pg = pg->next) {
341 for (i = 0; i < pg->index; i++) {
342 rec = &pg->records[i];
343
344 if ((rec->ip >= s) && (rec->ip < e))
345 ftrace_free_rec(rec);
346 }
347 }
348 spin_unlock(&ftrace_lock);
349
350}
351
304static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) 352static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
305{ 353{
306 struct dyn_ftrace *rec; 354 struct dyn_ftrace *rec;
@@ -338,7 +386,6 @@ ftrace_record_ip(unsigned long ip)
338 unsigned long flags; 386 unsigned long flags;
339 unsigned long key; 387 unsigned long key;
340 int resched; 388 int resched;
341 int atomic;
342 int cpu; 389 int cpu;
343 390
344 if (!ftrace_enabled || ftrace_disabled) 391 if (!ftrace_enabled || ftrace_disabled)
@@ -368,9 +415,7 @@ ftrace_record_ip(unsigned long ip)
368 if (ftrace_ip_in_hash(ip, key)) 415 if (ftrace_ip_in_hash(ip, key))
369 goto out; 416 goto out;
370 417
371 atomic = irqs_disabled(); 418 ftrace_hash_lock(flags);
372
373 spin_lock_irqsave(&ftrace_shutdown_lock, flags);
374 419
375 /* This ip may have hit the hash before the lock */ 420 /* This ip may have hit the hash before the lock */
376 if (ftrace_ip_in_hash(ip, key)) 421 if (ftrace_ip_in_hash(ip, key))
@@ -387,7 +432,7 @@ ftrace_record_ip(unsigned long ip)
387 ftraced_trigger = 1; 432 ftraced_trigger = 1;
388 433
389 out_unlock: 434 out_unlock:
390 spin_unlock_irqrestore(&ftrace_shutdown_lock, flags); 435 ftrace_hash_unlock(flags);
391 out: 436 out:
392 per_cpu(ftrace_shutdown_disable_cpu, cpu)--; 437 per_cpu(ftrace_shutdown_disable_cpu, cpu)--;
393 438
@@ -531,6 +576,16 @@ static void ftrace_shutdown_replenish(void)
531 ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL); 576 ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL);
532} 577}
533 578
579static void print_ip_ins(const char *fmt, unsigned char *p)
580{
581 int i;
582
583 printk(KERN_CONT "%s", fmt);
584
585 for (i = 0; i < MCOUNT_INSN_SIZE; i++)
586 printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
587}
588
534static int 589static int
535ftrace_code_disable(struct dyn_ftrace *rec) 590ftrace_code_disable(struct dyn_ftrace *rec)
536{ 591{
@@ -541,10 +596,27 @@ ftrace_code_disable(struct dyn_ftrace *rec)
541 ip = rec->ip; 596 ip = rec->ip;
542 597
543 nop = ftrace_nop_replace(); 598 nop = ftrace_nop_replace();
544 call = ftrace_call_replace(ip, MCOUNT_ADDR); 599 call = ftrace_call_replace(ip, mcount_addr);
545 600
546 failed = ftrace_modify_code(ip, call, nop); 601 failed = ftrace_modify_code(ip, call, nop);
547 if (failed) { 602 if (failed) {
603 switch (failed) {
604 case 1:
605 WARN_ON_ONCE(1);
606 pr_info("ftrace faulted on modifying ");
607 print_ip_sym(ip);
608 break;
609 case 2:
610 WARN_ON_ONCE(1);
611 pr_info("ftrace failed to modify ");
612 print_ip_sym(ip);
613 print_ip_ins(" expected: ", call);
614 print_ip_ins(" actual: ", (unsigned char *)ip);
615 print_ip_ins(" replace: ", nop);
616 printk(KERN_CONT "\n");
617 break;
618 }
619
548 rec->flags |= FTRACE_FL_FAILED; 620 rec->flags |= FTRACE_FL_FAILED;
549 return 0; 621 return 0;
550 } 622 }
@@ -792,47 +864,7 @@ static int ftrace_update_code(void)
792 return 1; 864 return 1;
793} 865}
794 866
795static int ftraced(void *ignore) 867static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
796{
797 unsigned long usecs;
798
799 while (!kthread_should_stop()) {
800
801 set_current_state(TASK_INTERRUPTIBLE);
802
803 /* check once a second */
804 schedule_timeout(HZ);
805
806 if (unlikely(ftrace_disabled))
807 continue;
808
809 mutex_lock(&ftrace_sysctl_lock);
810 mutex_lock(&ftraced_lock);
811 if (!ftraced_suspend && !ftraced_stop &&
812 ftrace_update_code()) {
813 usecs = nsecs_to_usecs(ftrace_update_time);
814 if (ftrace_update_tot_cnt > 100000) {
815 ftrace_update_tot_cnt = 0;
816 pr_info("hm, dftrace overflow: %lu change%s"
817 " (%lu total) in %lu usec%s\n",
818 ftrace_update_cnt,
819 ftrace_update_cnt != 1 ? "s" : "",
820 ftrace_update_tot_cnt,
821 usecs, usecs != 1 ? "s" : "");
822 ftrace_disabled = 1;
823 WARN_ON_ONCE(1);
824 }
825 }
826 mutex_unlock(&ftraced_lock);
827 mutex_unlock(&ftrace_sysctl_lock);
828
829 ftrace_shutdown_replenish();
830 }
831 __set_current_state(TASK_RUNNING);
832 return 0;
833}
834
835static int __init ftrace_dyn_table_alloc(void)
836{ 868{
837 struct ftrace_page *pg; 869 struct ftrace_page *pg;
838 int cnt; 870 int cnt;
@@ -859,7 +891,9 @@ static int __init ftrace_dyn_table_alloc(void)
859 891
860 pg = ftrace_pages = ftrace_pages_start; 892 pg = ftrace_pages = ftrace_pages_start;
861 893
862 cnt = NR_TO_INIT / ENTRIES_PER_PAGE; 894 cnt = num_to_init / ENTRIES_PER_PAGE;
895 pr_info("ftrace: allocating %ld hash entries in %d pages\n",
896 num_to_init, cnt);
863 897
864 for (i = 0; i < cnt; i++) { 898 for (i = 0; i < cnt; i++) {
865 pg->next = (void *)get_zeroed_page(GFP_KERNEL); 899 pg->next = (void *)get_zeroed_page(GFP_KERNEL);
@@ -901,6 +935,8 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
901 935
902 (*pos)++; 936 (*pos)++;
903 937
938 /* should not be called from interrupt context */
939 spin_lock(&ftrace_lock);
904 retry: 940 retry:
905 if (iter->idx >= iter->pg->index) { 941 if (iter->idx >= iter->pg->index) {
906 if (iter->pg->next) { 942 if (iter->pg->next) {
@@ -910,15 +946,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
910 } 946 }
911 } else { 947 } else {
912 rec = &iter->pg->records[iter->idx++]; 948 rec = &iter->pg->records[iter->idx++];
913 if ((!(iter->flags & FTRACE_ITER_FAILURES) && 949 if ((rec->flags & FTRACE_FL_FREE) ||
950
951 (!(iter->flags & FTRACE_ITER_FAILURES) &&
914 (rec->flags & FTRACE_FL_FAILED)) || 952 (rec->flags & FTRACE_FL_FAILED)) ||
915 953
916 ((iter->flags & FTRACE_ITER_FAILURES) && 954 ((iter->flags & FTRACE_ITER_FAILURES) &&
917 (!(rec->flags & FTRACE_FL_FAILED) || 955 !(rec->flags & FTRACE_FL_FAILED)) ||
918 (rec->flags & FTRACE_FL_FREE))) ||
919
920 ((iter->flags & FTRACE_ITER_FILTER) &&
921 !(rec->flags & FTRACE_FL_FILTER)) ||
922 956
923 ((iter->flags & FTRACE_ITER_NOTRACE) && 957 ((iter->flags & FTRACE_ITER_NOTRACE) &&
924 !(rec->flags & FTRACE_FL_NOTRACE))) { 958 !(rec->flags & FTRACE_FL_NOTRACE))) {
@@ -926,6 +960,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
926 goto retry; 960 goto retry;
927 } 961 }
928 } 962 }
963 spin_unlock(&ftrace_lock);
929 964
930 iter->pos = *pos; 965 iter->pos = *pos;
931 966
@@ -1039,8 +1074,8 @@ static void ftrace_filter_reset(int enable)
1039 unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1074 unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1040 unsigned i; 1075 unsigned i;
1041 1076
1042 /* keep kstop machine from running */ 1077 /* should not be called from interrupt context */
1043 preempt_disable(); 1078 spin_lock(&ftrace_lock);
1044 if (enable) 1079 if (enable)
1045 ftrace_filtered = 0; 1080 ftrace_filtered = 0;
1046 pg = ftrace_pages_start; 1081 pg = ftrace_pages_start;
@@ -1053,7 +1088,7 @@ static void ftrace_filter_reset(int enable)
1053 } 1088 }
1054 pg = pg->next; 1089 pg = pg->next;
1055 } 1090 }
1056 preempt_enable(); 1091 spin_unlock(&ftrace_lock);
1057} 1092}
1058 1093
1059static int 1094static int
@@ -1165,8 +1200,8 @@ ftrace_match(unsigned char *buff, int len, int enable)
1165 } 1200 }
1166 } 1201 }
1167 1202
1168 /* keep kstop machine from running */ 1203 /* should not be called from interrupt context */
1169 preempt_disable(); 1204 spin_lock(&ftrace_lock);
1170 if (enable) 1205 if (enable)
1171 ftrace_filtered = 1; 1206 ftrace_filtered = 1;
1172 pg = ftrace_pages_start; 1207 pg = ftrace_pages_start;
@@ -1203,7 +1238,7 @@ ftrace_match(unsigned char *buff, int len, int enable)
1203 } 1238 }
1204 pg = pg->next; 1239 pg = pg->next;
1205 } 1240 }
1206 preempt_enable(); 1241 spin_unlock(&ftrace_lock);
1207} 1242}
1208 1243
1209static ssize_t 1244static ssize_t
@@ -1556,6 +1591,114 @@ static __init int ftrace_init_debugfs(void)
1556 1591
1557fs_initcall(ftrace_init_debugfs); 1592fs_initcall(ftrace_init_debugfs);
1558 1593
1594#ifdef CONFIG_FTRACE_MCOUNT_RECORD
1595static int ftrace_convert_nops(unsigned long *start,
1596 unsigned long *end)
1597{
1598 unsigned long *p;
1599 unsigned long addr;
1600 unsigned long flags;
1601
1602 p = start;
1603 while (p < end) {
1604 addr = ftrace_call_adjust(*p++);
1605 /* should not be called from interrupt context */
1606 spin_lock(&ftrace_lock);
1607 ftrace_record_ip(addr);
1608 spin_unlock(&ftrace_lock);
1609 ftrace_shutdown_replenish();
1610 }
1611
1612 /* p is ignored */
1613 local_irq_save(flags);
1614 __ftrace_update_code(p);
1615 local_irq_restore(flags);
1616
1617 return 0;
1618}
1619
1620void ftrace_init_module(unsigned long *start, unsigned long *end)
1621{
1622 if (ftrace_disabled || start == end)
1623 return;
1624 ftrace_convert_nops(start, end);
1625}
1626
1627extern unsigned long __start_mcount_loc[];
1628extern unsigned long __stop_mcount_loc[];
1629
1630void __init ftrace_init(void)
1631{
1632 unsigned long count, addr, flags;
1633 int ret;
1634
1635 /* Keep the ftrace pointer to the stub */
1636 addr = (unsigned long)ftrace_stub;
1637
1638 local_irq_save(flags);
1639 ftrace_dyn_arch_init(&addr);
1640 local_irq_restore(flags);
1641
1642 /* ftrace_dyn_arch_init places the return code in addr */
1643 if (addr)
1644 goto failed;
1645
1646 count = __stop_mcount_loc - __start_mcount_loc;
1647
1648 ret = ftrace_dyn_table_alloc(count);
1649 if (ret)
1650 goto failed;
1651
1652 last_ftrace_enabled = ftrace_enabled = 1;
1653
1654 ret = ftrace_convert_nops(__start_mcount_loc,
1655 __stop_mcount_loc);
1656
1657 return;
1658 failed:
1659 ftrace_disabled = 1;
1660}
1661#else /* CONFIG_FTRACE_MCOUNT_RECORD */
1662static int ftraced(void *ignore)
1663{
1664 unsigned long usecs;
1665
1666 while (!kthread_should_stop()) {
1667
1668 set_current_state(TASK_INTERRUPTIBLE);
1669
1670 /* check once a second */
1671 schedule_timeout(HZ);
1672
1673 if (unlikely(ftrace_disabled))
1674 continue;
1675
1676 mutex_lock(&ftrace_sysctl_lock);
1677 mutex_lock(&ftraced_lock);
1678 if (!ftraced_suspend && !ftraced_stop &&
1679 ftrace_update_code()) {
1680 usecs = nsecs_to_usecs(ftrace_update_time);
1681 if (ftrace_update_tot_cnt > 100000) {
1682 ftrace_update_tot_cnt = 0;
1683 pr_info("hm, dftrace overflow: %lu change%s"
1684 " (%lu total) in %lu usec%s\n",
1685 ftrace_update_cnt,
1686 ftrace_update_cnt != 1 ? "s" : "",
1687 ftrace_update_tot_cnt,
1688 usecs, usecs != 1 ? "s" : "");
1689 ftrace_disabled = 1;
1690 WARN_ON_ONCE(1);
1691 }
1692 }
1693 mutex_unlock(&ftraced_lock);
1694 mutex_unlock(&ftrace_sysctl_lock);
1695
1696 ftrace_shutdown_replenish();
1697 }
1698 __set_current_state(TASK_RUNNING);
1699 return 0;
1700}
1701
1559static int __init ftrace_dynamic_init(void) 1702static int __init ftrace_dynamic_init(void)
1560{ 1703{
1561 struct task_struct *p; 1704 struct task_struct *p;
@@ -1572,7 +1715,7 @@ static int __init ftrace_dynamic_init(void)
1572 goto failed; 1715 goto failed;
1573 } 1716 }
1574 1717
1575 ret = ftrace_dyn_table_alloc(); 1718 ret = ftrace_dyn_table_alloc(NR_TO_INIT);
1576 if (ret) 1719 if (ret)
1577 goto failed; 1720 goto failed;
1578 1721
@@ -1593,6 +1736,8 @@ static int __init ftrace_dynamic_init(void)
1593} 1736}
1594 1737
1595core_initcall(ftrace_dynamic_init); 1738core_initcall(ftrace_dynamic_init);
1739#endif /* CONFIG_FTRACE_MCOUNT_RECORD */
1740
1596#else 1741#else
1597# define ftrace_startup() do { } while (0) 1742# define ftrace_startup() do { } while (0)
1598# define ftrace_shutdown() do { } while (0) 1743# define ftrace_shutdown() do { } while (0)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
new file mode 100644
index 000000000000..94af1fe56bb4
--- /dev/null
+++ b/kernel/trace/ring_buffer.c
@@ -0,0 +1,2014 @@
1/*
2 * Generic ring buffer
3 *
4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
5 */
6#include <linux/ring_buffer.h>
7#include <linux/spinlock.h>
8#include <linux/debugfs.h>
9#include <linux/uaccess.h>
10#include <linux/module.h>
11#include <linux/percpu.h>
12#include <linux/mutex.h>
13#include <linux/sched.h> /* used for sched_clock() (for now) */
14#include <linux/init.h>
15#include <linux/hash.h>
16#include <linux/list.h>
17#include <linux/fs.h>
18
19/* Up this if you want to test the TIME_EXTENTS and normalization */
20#define DEBUG_SHIFT 0
21
22/* FIXME!!! */
23u64 ring_buffer_time_stamp(int cpu)
24{
25 /* shift to debug/test normalization and TIME_EXTENTS */
26 return sched_clock() << DEBUG_SHIFT;
27}
28
29void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
30{
31 /* Just stupid testing the normalize function and deltas */
32 *ts >>= DEBUG_SHIFT;
33}
34
35#define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event))
36#define RB_ALIGNMENT_SHIFT 2
37#define RB_ALIGNMENT (1 << RB_ALIGNMENT_SHIFT)
38#define RB_MAX_SMALL_DATA 28
39
40enum {
41 RB_LEN_TIME_EXTEND = 8,
42 RB_LEN_TIME_STAMP = 16,
43};
44
45/* inline for ring buffer fast paths */
46static inline unsigned
47rb_event_length(struct ring_buffer_event *event)
48{
49 unsigned length;
50
51 switch (event->type) {
52 case RINGBUF_TYPE_PADDING:
53 /* undefined */
54 return -1;
55
56 case RINGBUF_TYPE_TIME_EXTEND:
57 return RB_LEN_TIME_EXTEND;
58
59 case RINGBUF_TYPE_TIME_STAMP:
60 return RB_LEN_TIME_STAMP;
61
62 case RINGBUF_TYPE_DATA:
63 if (event->len)
64 length = event->len << RB_ALIGNMENT_SHIFT;
65 else
66 length = event->array[0];
67 return length + RB_EVNT_HDR_SIZE;
68 default:
69 BUG();
70 }
71 /* not hit */
72 return 0;
73}
74
75/**
76 * ring_buffer_event_length - return the length of the event
77 * @event: the event to get the length of
78 */
79unsigned ring_buffer_event_length(struct ring_buffer_event *event)
80{
81 return rb_event_length(event);
82}
83
84/* inline for ring buffer fast paths */
85static inline void *
86rb_event_data(struct ring_buffer_event *event)
87{
88 BUG_ON(event->type != RINGBUF_TYPE_DATA);
89 /* If length is in len field, then array[0] has the data */
90 if (event->len)
91 return (void *)&event->array[0];
92 /* Otherwise length is in array[0] and array[1] has the data */
93 return (void *)&event->array[1];
94}
95
96/**
97 * ring_buffer_event_data - return the data of the event
98 * @event: the event to get the data from
99 */
100void *ring_buffer_event_data(struct ring_buffer_event *event)
101{
102 return rb_event_data(event);
103}
104
105#define for_each_buffer_cpu(buffer, cpu) \
106 for_each_cpu_mask(cpu, buffer->cpumask)
107
108#define TS_SHIFT 27
109#define TS_MASK ((1ULL << TS_SHIFT) - 1)
110#define TS_DELTA_TEST (~TS_MASK)
111
112/*
113 * This hack stolen from mm/slob.c.
114 * We can store per page timing information in the page frame of the page.
115 * Thanks to Peter Zijlstra for suggesting this idea.
116 */
117struct buffer_page {
118 u64 time_stamp; /* page time stamp */
119 local_t write; /* index for next write */
120 local_t commit; /* write commited index */
121 unsigned read; /* index for next read */
122 struct list_head list; /* list of free pages */
123 void *page; /* Actual data page */
124};
125
126/*
127 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
128 * this issue out.
129 */
130static inline void free_buffer_page(struct buffer_page *bpage)
131{
132 if (bpage->page)
133 __free_page(bpage->page);
134 kfree(bpage);
135}
136
137/*
138 * We need to fit the time_stamp delta into 27 bits.
139 */
140static inline int test_time_stamp(u64 delta)
141{
142 if (delta & TS_DELTA_TEST)
143 return 1;
144 return 0;
145}
146
147#define BUF_PAGE_SIZE PAGE_SIZE
148
149/*
150 * head_page == tail_page && head == tail then buffer is empty.
151 */
152struct ring_buffer_per_cpu {
153 int cpu;
154 struct ring_buffer *buffer;
155 spinlock_t lock;
156 struct lock_class_key lock_key;
157 struct list_head pages;
158 struct buffer_page *head_page; /* read from head */
159 struct buffer_page *tail_page; /* write to tail */
160 struct buffer_page *commit_page; /* commited pages */
161 struct buffer_page *reader_page;
162 unsigned long overrun;
163 unsigned long entries;
164 u64 write_stamp;
165 u64 read_stamp;
166 atomic_t record_disabled;
167};
168
169struct ring_buffer {
170 unsigned long size;
171 unsigned pages;
172 unsigned flags;
173 int cpus;
174 cpumask_t cpumask;
175 atomic_t record_disabled;
176
177 struct mutex mutex;
178
179 struct ring_buffer_per_cpu **buffers;
180};
181
182struct ring_buffer_iter {
183 struct ring_buffer_per_cpu *cpu_buffer;
184 unsigned long head;
185 struct buffer_page *head_page;
186 u64 read_stamp;
187};
188
189#define RB_WARN_ON(buffer, cond) \
190 do { \
191 if (unlikely(cond)) { \
192 atomic_inc(&buffer->record_disabled); \
193 WARN_ON(1); \
194 } \
195 } while (0)
196
197#define RB_WARN_ON_RET(buffer, cond) \
198 do { \
199 if (unlikely(cond)) { \
200 atomic_inc(&buffer->record_disabled); \
201 WARN_ON(1); \
202 return -1; \
203 } \
204 } while (0)
205
206#define RB_WARN_ON_ONCE(buffer, cond) \
207 do { \
208 static int once; \
209 if (unlikely(cond) && !once) { \
210 once++; \
211 atomic_inc(&buffer->record_disabled); \
212 WARN_ON(1); \
213 } \
214 } while (0)
215
216/**
217 * check_pages - integrity check of buffer pages
218 * @cpu_buffer: CPU buffer with pages to test
219 *
220 * As a safty measure we check to make sure the data pages have not
221 * been corrupted.
222 */
223static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
224{
225 struct list_head *head = &cpu_buffer->pages;
226 struct buffer_page *page, *tmp;
227
228 RB_WARN_ON_RET(cpu_buffer, head->next->prev != head);
229 RB_WARN_ON_RET(cpu_buffer, head->prev->next != head);
230
231 list_for_each_entry_safe(page, tmp, head, list) {
232 RB_WARN_ON_RET(cpu_buffer,
233 page->list.next->prev != &page->list);
234 RB_WARN_ON_RET(cpu_buffer,
235 page->list.prev->next != &page->list);
236 }
237
238 return 0;
239}
240
241static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
242 unsigned nr_pages)
243{
244 struct list_head *head = &cpu_buffer->pages;
245 struct buffer_page *page, *tmp;
246 unsigned long addr;
247 LIST_HEAD(pages);
248 unsigned i;
249
250 for (i = 0; i < nr_pages; i++) {
251 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
252 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
253 if (!page)
254 goto free_pages;
255 list_add(&page->list, &pages);
256
257 addr = __get_free_page(GFP_KERNEL);
258 if (!addr)
259 goto free_pages;
260 page->page = (void *)addr;
261 }
262
263 list_splice(&pages, head);
264
265 rb_check_pages(cpu_buffer);
266
267 return 0;
268
269 free_pages:
270 list_for_each_entry_safe(page, tmp, &pages, list) {
271 list_del_init(&page->list);
272 free_buffer_page(page);
273 }
274 return -ENOMEM;
275}
276
277static struct ring_buffer_per_cpu *
278rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
279{
280 struct ring_buffer_per_cpu *cpu_buffer;
281 struct buffer_page *page;
282 unsigned long addr;
283 int ret;
284
285 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
286 GFP_KERNEL, cpu_to_node(cpu));
287 if (!cpu_buffer)
288 return NULL;
289
290 cpu_buffer->cpu = cpu;
291 cpu_buffer->buffer = buffer;
292 spin_lock_init(&cpu_buffer->lock);
293 INIT_LIST_HEAD(&cpu_buffer->pages);
294
295 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
296 GFP_KERNEL, cpu_to_node(cpu));
297 if (!page)
298 goto fail_free_buffer;
299
300 cpu_buffer->reader_page = page;
301 addr = __get_free_page(GFP_KERNEL);
302 if (!addr)
303 goto fail_free_reader;
304 page->page = (void *)addr;
305
306 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
307
308 ret = rb_allocate_pages(cpu_buffer, buffer->pages);
309 if (ret < 0)
310 goto fail_free_reader;
311
312 cpu_buffer->head_page
313 = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
314 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
315
316 return cpu_buffer;
317
318 fail_free_reader:
319 free_buffer_page(cpu_buffer->reader_page);
320
321 fail_free_buffer:
322 kfree(cpu_buffer);
323 return NULL;
324}
325
326static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
327{
328 struct list_head *head = &cpu_buffer->pages;
329 struct buffer_page *page, *tmp;
330
331 list_del_init(&cpu_buffer->reader_page->list);
332 free_buffer_page(cpu_buffer->reader_page);
333
334 list_for_each_entry_safe(page, tmp, head, list) {
335 list_del_init(&page->list);
336 free_buffer_page(page);
337 }
338 kfree(cpu_buffer);
339}
340
341/*
342 * Causes compile errors if the struct buffer_page gets bigger
343 * than the struct page.
344 */
345extern int ring_buffer_page_too_big(void);
346
347/**
348 * ring_buffer_alloc - allocate a new ring_buffer
349 * @size: the size in bytes that is needed.
350 * @flags: attributes to set for the ring buffer.
351 *
352 * Currently the only flag that is available is the RB_FL_OVERWRITE
353 * flag. This flag means that the buffer will overwrite old data
354 * when the buffer wraps. If this flag is not set, the buffer will
355 * drop data when the tail hits the head.
356 */
357struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
358{
359 struct ring_buffer *buffer;
360 int bsize;
361 int cpu;
362
363 /* Paranoid! Optimizes out when all is well */
364 if (sizeof(struct buffer_page) > sizeof(struct page))
365 ring_buffer_page_too_big();
366
367
368 /* keep it in its own cache line */
369 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
370 GFP_KERNEL);
371 if (!buffer)
372 return NULL;
373
374 buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
375 buffer->flags = flags;
376
377 /* need at least two pages */
378 if (buffer->pages == 1)
379 buffer->pages++;
380
381 buffer->cpumask = cpu_possible_map;
382 buffer->cpus = nr_cpu_ids;
383
384 bsize = sizeof(void *) * nr_cpu_ids;
385 buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
386 GFP_KERNEL);
387 if (!buffer->buffers)
388 goto fail_free_buffer;
389
390 for_each_buffer_cpu(buffer, cpu) {
391 buffer->buffers[cpu] =
392 rb_allocate_cpu_buffer(buffer, cpu);
393 if (!buffer->buffers[cpu])
394 goto fail_free_buffers;
395 }
396
397 mutex_init(&buffer->mutex);
398
399 return buffer;
400
401 fail_free_buffers:
402 for_each_buffer_cpu(buffer, cpu) {
403 if (buffer->buffers[cpu])
404 rb_free_cpu_buffer(buffer->buffers[cpu]);
405 }
406 kfree(buffer->buffers);
407
408 fail_free_buffer:
409 kfree(buffer);
410 return NULL;
411}
412
413/**
414 * ring_buffer_free - free a ring buffer.
415 * @buffer: the buffer to free.
416 */
417void
418ring_buffer_free(struct ring_buffer *buffer)
419{
420 int cpu;
421
422 for_each_buffer_cpu(buffer, cpu)
423 rb_free_cpu_buffer(buffer->buffers[cpu]);
424
425 kfree(buffer);
426}
427
428static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
429
430static void
431rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
432{
433 struct buffer_page *page;
434 struct list_head *p;
435 unsigned i;
436
437 atomic_inc(&cpu_buffer->record_disabled);
438 synchronize_sched();
439
440 for (i = 0; i < nr_pages; i++) {
441 BUG_ON(list_empty(&cpu_buffer->pages));
442 p = cpu_buffer->pages.next;
443 page = list_entry(p, struct buffer_page, list);
444 list_del_init(&page->list);
445 free_buffer_page(page);
446 }
447 BUG_ON(list_empty(&cpu_buffer->pages));
448
449 rb_reset_cpu(cpu_buffer);
450
451 rb_check_pages(cpu_buffer);
452
453 atomic_dec(&cpu_buffer->record_disabled);
454
455}
456
457static void
458rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
459 struct list_head *pages, unsigned nr_pages)
460{
461 struct buffer_page *page;
462 struct list_head *p;
463 unsigned i;
464
465 atomic_inc(&cpu_buffer->record_disabled);
466 synchronize_sched();
467
468 for (i = 0; i < nr_pages; i++) {
469 BUG_ON(list_empty(pages));
470 p = pages->next;
471 page = list_entry(p, struct buffer_page, list);
472 list_del_init(&page->list);
473 list_add_tail(&page->list, &cpu_buffer->pages);
474 }
475 rb_reset_cpu(cpu_buffer);
476
477 rb_check_pages(cpu_buffer);
478
479 atomic_dec(&cpu_buffer->record_disabled);
480}
481
482/**
483 * ring_buffer_resize - resize the ring buffer
484 * @buffer: the buffer to resize.
485 * @size: the new size.
486 *
487 * The tracer is responsible for making sure that the buffer is
488 * not being used while changing the size.
489 * Note: We may be able to change the above requirement by using
490 * RCU synchronizations.
491 *
492 * Minimum size is 2 * BUF_PAGE_SIZE.
493 *
494 * Returns -1 on failure.
495 */
496int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
497{
498 struct ring_buffer_per_cpu *cpu_buffer;
499 unsigned nr_pages, rm_pages, new_pages;
500 struct buffer_page *page, *tmp;
501 unsigned long buffer_size;
502 unsigned long addr;
503 LIST_HEAD(pages);
504 int i, cpu;
505
506 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
507 size *= BUF_PAGE_SIZE;
508 buffer_size = buffer->pages * BUF_PAGE_SIZE;
509
510 /* we need a minimum of two pages */
511 if (size < BUF_PAGE_SIZE * 2)
512 size = BUF_PAGE_SIZE * 2;
513
514 if (size == buffer_size)
515 return size;
516
517 mutex_lock(&buffer->mutex);
518
519 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
520
521 if (size < buffer_size) {
522
523 /* easy case, just free pages */
524 BUG_ON(nr_pages >= buffer->pages);
525
526 rm_pages = buffer->pages - nr_pages;
527
528 for_each_buffer_cpu(buffer, cpu) {
529 cpu_buffer = buffer->buffers[cpu];
530 rb_remove_pages(cpu_buffer, rm_pages);
531 }
532 goto out;
533 }
534
535 /*
536 * This is a bit more difficult. We only want to add pages
537 * when we can allocate enough for all CPUs. We do this
538 * by allocating all the pages and storing them on a local
539 * link list. If we succeed in our allocation, then we
540 * add these pages to the cpu_buffers. Otherwise we just free
541 * them all and return -ENOMEM;
542 */
543 BUG_ON(nr_pages <= buffer->pages);
544 new_pages = nr_pages - buffer->pages;
545
546 for_each_buffer_cpu(buffer, cpu) {
547 for (i = 0; i < new_pages; i++) {
548 page = kzalloc_node(ALIGN(sizeof(*page),
549 cache_line_size()),
550 GFP_KERNEL, cpu_to_node(cpu));
551 if (!page)
552 goto free_pages;
553 list_add(&page->list, &pages);
554 addr = __get_free_page(GFP_KERNEL);
555 if (!addr)
556 goto free_pages;
557 page->page = (void *)addr;
558 }
559 }
560
561 for_each_buffer_cpu(buffer, cpu) {
562 cpu_buffer = buffer->buffers[cpu];
563 rb_insert_pages(cpu_buffer, &pages, new_pages);
564 }
565
566 BUG_ON(!list_empty(&pages));
567
568 out:
569 buffer->pages = nr_pages;
570 mutex_unlock(&buffer->mutex);
571
572 return size;
573
574 free_pages:
575 list_for_each_entry_safe(page, tmp, &pages, list) {
576 list_del_init(&page->list);
577 free_buffer_page(page);
578 }
579 return -ENOMEM;
580}
581
582static inline int rb_null_event(struct ring_buffer_event *event)
583{
584 return event->type == RINGBUF_TYPE_PADDING;
585}
586
587static inline void *__rb_page_index(struct buffer_page *page, unsigned index)
588{
589 return page->page + index;
590}
591
592static inline struct ring_buffer_event *
593rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
594{
595 return __rb_page_index(cpu_buffer->reader_page,
596 cpu_buffer->reader_page->read);
597}
598
599static inline struct ring_buffer_event *
600rb_head_event(struct ring_buffer_per_cpu *cpu_buffer)
601{
602 return __rb_page_index(cpu_buffer->head_page,
603 cpu_buffer->head_page->read);
604}
605
606static inline struct ring_buffer_event *
607rb_iter_head_event(struct ring_buffer_iter *iter)
608{
609 return __rb_page_index(iter->head_page, iter->head);
610}
611
612static inline unsigned rb_page_write(struct buffer_page *bpage)
613{
614 return local_read(&bpage->write);
615}
616
617static inline unsigned rb_page_commit(struct buffer_page *bpage)
618{
619 return local_read(&bpage->commit);
620}
621
622/* Size is determined by what has been commited */
623static inline unsigned rb_page_size(struct buffer_page *bpage)
624{
625 return rb_page_commit(bpage);
626}
627
628static inline unsigned
629rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
630{
631 return rb_page_commit(cpu_buffer->commit_page);
632}
633
634static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
635{
636 return rb_page_commit(cpu_buffer->head_page);
637}
638
639/*
640 * When the tail hits the head and the buffer is in overwrite mode,
641 * the head jumps to the next page and all content on the previous
642 * page is discarded. But before doing so, we update the overrun
643 * variable of the buffer.
644 */
645static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
646{
647 struct ring_buffer_event *event;
648 unsigned long head;
649
650 for (head = 0; head < rb_head_size(cpu_buffer);
651 head += rb_event_length(event)) {
652
653 event = __rb_page_index(cpu_buffer->head_page, head);
654 BUG_ON(rb_null_event(event));
655 /* Only count data entries */
656 if (event->type != RINGBUF_TYPE_DATA)
657 continue;
658 cpu_buffer->overrun++;
659 cpu_buffer->entries--;
660 }
661}
662
663static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
664 struct buffer_page **page)
665{
666 struct list_head *p = (*page)->list.next;
667
668 if (p == &cpu_buffer->pages)
669 p = p->next;
670
671 *page = list_entry(p, struct buffer_page, list);
672}
673
674static inline unsigned
675rb_event_index(struct ring_buffer_event *event)
676{
677 unsigned long addr = (unsigned long)event;
678
679 return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
680}
681
682static inline int
683rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
684 struct ring_buffer_event *event)
685{
686 unsigned long addr = (unsigned long)event;
687 unsigned long index;
688
689 index = rb_event_index(event);
690 addr &= PAGE_MASK;
691
692 return cpu_buffer->commit_page->page == (void *)addr &&
693 rb_commit_index(cpu_buffer) == index;
694}
695
696static inline void
697rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
698 struct ring_buffer_event *event)
699{
700 unsigned long addr = (unsigned long)event;
701 unsigned long index;
702
703 index = rb_event_index(event);
704 addr &= PAGE_MASK;
705
706 while (cpu_buffer->commit_page->page != (void *)addr) {
707 RB_WARN_ON(cpu_buffer,
708 cpu_buffer->commit_page == cpu_buffer->tail_page);
709 cpu_buffer->commit_page->commit =
710 cpu_buffer->commit_page->write;
711 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
712 cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
713 }
714
715 /* Now set the commit to the event's index */
716 local_set(&cpu_buffer->commit_page->commit, index);
717}
718
719static inline void
720rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
721{
722 /*
723 * We only race with interrupts and NMIs on this CPU.
724 * If we own the commit event, then we can commit
725 * all others that interrupted us, since the interruptions
726 * are in stack format (they finish before they come
727 * back to us). This allows us to do a simple loop to
728 * assign the commit to the tail.
729 */
730 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
731 cpu_buffer->commit_page->commit =
732 cpu_buffer->commit_page->write;
733 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
734 cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
735 /* add barrier to keep gcc from optimizing too much */
736 barrier();
737 }
738 while (rb_commit_index(cpu_buffer) !=
739 rb_page_write(cpu_buffer->commit_page)) {
740 cpu_buffer->commit_page->commit =
741 cpu_buffer->commit_page->write;
742 barrier();
743 }
744}
745
746static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
747{
748 cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp;
749 cpu_buffer->reader_page->read = 0;
750}
751
752static inline void rb_inc_iter(struct ring_buffer_iter *iter)
753{
754 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
755
756 /*
757 * The iterator could be on the reader page (it starts there).
758 * But the head could have moved, since the reader was
759 * found. Check for this case and assign the iterator
760 * to the head page instead of next.
761 */
762 if (iter->head_page == cpu_buffer->reader_page)
763 iter->head_page = cpu_buffer->head_page;
764 else
765 rb_inc_page(cpu_buffer, &iter->head_page);
766
767 iter->read_stamp = iter->head_page->time_stamp;
768 iter->head = 0;
769}
770
771/**
772 * ring_buffer_update_event - update event type and data
773 * @event: the even to update
774 * @type: the type of event
775 * @length: the size of the event field in the ring buffer
776 *
777 * Update the type and data fields of the event. The length
778 * is the actual size that is written to the ring buffer,
779 * and with this, we can determine what to place into the
780 * data field.
781 */
782static inline void
783rb_update_event(struct ring_buffer_event *event,
784 unsigned type, unsigned length)
785{
786 event->type = type;
787
788 switch (type) {
789
790 case RINGBUF_TYPE_PADDING:
791 break;
792
793 case RINGBUF_TYPE_TIME_EXTEND:
794 event->len =
795 (RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1))
796 >> RB_ALIGNMENT_SHIFT;
797 break;
798
799 case RINGBUF_TYPE_TIME_STAMP:
800 event->len =
801 (RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1))
802 >> RB_ALIGNMENT_SHIFT;
803 break;
804
805 case RINGBUF_TYPE_DATA:
806 length -= RB_EVNT_HDR_SIZE;
807 if (length > RB_MAX_SMALL_DATA) {
808 event->len = 0;
809 event->array[0] = length;
810 } else
811 event->len =
812 (length + (RB_ALIGNMENT-1))
813 >> RB_ALIGNMENT_SHIFT;
814 break;
815 default:
816 BUG();
817 }
818}
819
820static inline unsigned rb_calculate_event_length(unsigned length)
821{
822 struct ring_buffer_event event; /* Used only for sizeof array */
823
824 /* zero length can cause confusions */
825 if (!length)
826 length = 1;
827
828 if (length > RB_MAX_SMALL_DATA)
829 length += sizeof(event.array[0]);
830
831 length += RB_EVNT_HDR_SIZE;
832 length = ALIGN(length, RB_ALIGNMENT);
833
834 return length;
835}
836
837static struct ring_buffer_event *
838__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
839 unsigned type, unsigned long length, u64 *ts)
840{
841 struct buffer_page *tail_page, *head_page, *reader_page;
842 unsigned long tail, write;
843 struct ring_buffer *buffer = cpu_buffer->buffer;
844 struct ring_buffer_event *event;
845 unsigned long flags;
846
847 tail_page = cpu_buffer->tail_page;
848 write = local_add_return(length, &tail_page->write);
849 tail = write - length;
850
851 /* See if we shot pass the end of this buffer page */
852 if (write > BUF_PAGE_SIZE) {
853 struct buffer_page *next_page = tail_page;
854
855 spin_lock_irqsave(&cpu_buffer->lock, flags);
856
857 rb_inc_page(cpu_buffer, &next_page);
858
859 head_page = cpu_buffer->head_page;
860 reader_page = cpu_buffer->reader_page;
861
862 /* we grabbed the lock before incrementing */
863 RB_WARN_ON(cpu_buffer, next_page == reader_page);
864
865 /*
866 * If for some reason, we had an interrupt storm that made
867 * it all the way around the buffer, bail, and warn
868 * about it.
869 */
870 if (unlikely(next_page == cpu_buffer->commit_page)) {
871 WARN_ON_ONCE(1);
872 goto out_unlock;
873 }
874
875 if (next_page == head_page) {
876 if (!(buffer->flags & RB_FL_OVERWRITE)) {
877 /* reset write */
878 if (tail <= BUF_PAGE_SIZE)
879 local_set(&tail_page->write, tail);
880 goto out_unlock;
881 }
882
883 /* tail_page has not moved yet? */
884 if (tail_page == cpu_buffer->tail_page) {
885 /* count overflows */
886 rb_update_overflow(cpu_buffer);
887
888 rb_inc_page(cpu_buffer, &head_page);
889 cpu_buffer->head_page = head_page;
890 cpu_buffer->head_page->read = 0;
891 }
892 }
893
894 /*
895 * If the tail page is still the same as what we think
896 * it is, then it is up to us to update the tail
897 * pointer.
898 */
899 if (tail_page == cpu_buffer->tail_page) {
900 local_set(&next_page->write, 0);
901 local_set(&next_page->commit, 0);
902 cpu_buffer->tail_page = next_page;
903
904 /* reread the time stamp */
905 *ts = ring_buffer_time_stamp(cpu_buffer->cpu);
906 cpu_buffer->tail_page->time_stamp = *ts;
907 }
908
909 /*
910 * The actual tail page has moved forward.
911 */
912 if (tail < BUF_PAGE_SIZE) {
913 /* Mark the rest of the page with padding */
914 event = __rb_page_index(tail_page, tail);
915 event->type = RINGBUF_TYPE_PADDING;
916 }
917
918 if (tail <= BUF_PAGE_SIZE)
919 /* Set the write back to the previous setting */
920 local_set(&tail_page->write, tail);
921
922 /*
923 * If this was a commit entry that failed,
924 * increment that too
925 */
926 if (tail_page == cpu_buffer->commit_page &&
927 tail == rb_commit_index(cpu_buffer)) {
928 rb_set_commit_to_write(cpu_buffer);
929 }
930
931 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
932
933 /* fail and let the caller try again */
934 return ERR_PTR(-EAGAIN);
935 }
936
937 /* We reserved something on the buffer */
938
939 BUG_ON(write > BUF_PAGE_SIZE);
940
941 event = __rb_page_index(tail_page, tail);
942 rb_update_event(event, type, length);
943
944 /*
945 * If this is a commit and the tail is zero, then update
946 * this page's time stamp.
947 */
948 if (!tail && rb_is_commit(cpu_buffer, event))
949 cpu_buffer->commit_page->time_stamp = *ts;
950
951 return event;
952
953 out_unlock:
954 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
955 return NULL;
956}
957
958static int
959rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
960 u64 *ts, u64 *delta)
961{
962 struct ring_buffer_event *event;
963 static int once;
964 int ret;
965
966 if (unlikely(*delta > (1ULL << 59) && !once++)) {
967 printk(KERN_WARNING "Delta way too big! %llu"
968 " ts=%llu write stamp = %llu\n",
969 *delta, *ts, cpu_buffer->write_stamp);
970 WARN_ON(1);
971 }
972
973 /*
974 * The delta is too big, we to add a
975 * new timestamp.
976 */
977 event = __rb_reserve_next(cpu_buffer,
978 RINGBUF_TYPE_TIME_EXTEND,
979 RB_LEN_TIME_EXTEND,
980 ts);
981 if (!event)
982 return -EBUSY;
983
984 if (PTR_ERR(event) == -EAGAIN)
985 return -EAGAIN;
986
987 /* Only a commited time event can update the write stamp */
988 if (rb_is_commit(cpu_buffer, event)) {
989 /*
990 * If this is the first on the page, then we need to
991 * update the page itself, and just put in a zero.
992 */
993 if (rb_event_index(event)) {
994 event->time_delta = *delta & TS_MASK;
995 event->array[0] = *delta >> TS_SHIFT;
996 } else {
997 cpu_buffer->commit_page->time_stamp = *ts;
998 event->time_delta = 0;
999 event->array[0] = 0;
1000 }
1001 cpu_buffer->write_stamp = *ts;
1002 /* let the caller know this was the commit */
1003 ret = 1;
1004 } else {
1005 /* Darn, this is just wasted space */
1006 event->time_delta = 0;
1007 event->array[0] = 0;
1008 ret = 0;
1009 }
1010
1011 *delta = 0;
1012
1013 return ret;
1014}
1015
1016static struct ring_buffer_event *
1017rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1018 unsigned type, unsigned long length)
1019{
1020 struct ring_buffer_event *event;
1021 u64 ts, delta;
1022 int commit = 0;
1023
1024 again:
1025 ts = ring_buffer_time_stamp(cpu_buffer->cpu);
1026
1027 /*
1028 * Only the first commit can update the timestamp.
1029 * Yes there is a race here. If an interrupt comes in
1030 * just after the conditional and it traces too, then it
1031 * will also check the deltas. More than one timestamp may
1032 * also be made. But only the entry that did the actual
1033 * commit will be something other than zero.
1034 */
1035 if (cpu_buffer->tail_page == cpu_buffer->commit_page &&
1036 rb_page_write(cpu_buffer->tail_page) ==
1037 rb_commit_index(cpu_buffer)) {
1038
1039 delta = ts - cpu_buffer->write_stamp;
1040
1041 /* make sure this delta is calculated here */
1042 barrier();
1043
1044 /* Did the write stamp get updated already? */
1045 if (unlikely(ts < cpu_buffer->write_stamp))
1046 goto again;
1047
1048 if (test_time_stamp(delta)) {
1049
1050 commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
1051
1052 if (commit == -EBUSY)
1053 return NULL;
1054
1055 if (commit == -EAGAIN)
1056 goto again;
1057
1058 RB_WARN_ON(cpu_buffer, commit < 0);
1059 }
1060 } else
1061 /* Non commits have zero deltas */
1062 delta = 0;
1063
1064 event = __rb_reserve_next(cpu_buffer, type, length, &ts);
1065 if (PTR_ERR(event) == -EAGAIN)
1066 goto again;
1067
1068 if (!event) {
1069 if (unlikely(commit))
1070 /*
1071 * Ouch! We needed a timestamp and it was commited. But
1072 * we didn't get our event reserved.
1073 */
1074 rb_set_commit_to_write(cpu_buffer);
1075 return NULL;
1076 }
1077
1078 /*
1079 * If the timestamp was commited, make the commit our entry
1080 * now so that we will update it when needed.
1081 */
1082 if (commit)
1083 rb_set_commit_event(cpu_buffer, event);
1084 else if (!rb_is_commit(cpu_buffer, event))
1085 delta = 0;
1086
1087 event->time_delta = delta;
1088
1089 return event;
1090}
1091
1092static DEFINE_PER_CPU(int, rb_need_resched);
1093
1094/**
1095 * ring_buffer_lock_reserve - reserve a part of the buffer
1096 * @buffer: the ring buffer to reserve from
1097 * @length: the length of the data to reserve (excluding event header)
1098 * @flags: a pointer to save the interrupt flags
1099 *
1100 * Returns a reseverd event on the ring buffer to copy directly to.
1101 * The user of this interface will need to get the body to write into
1102 * and can use the ring_buffer_event_data() interface.
1103 *
1104 * The length is the length of the data needed, not the event length
1105 * which also includes the event header.
1106 *
1107 * Must be paired with ring_buffer_unlock_commit, unless NULL is returned.
1108 * If NULL is returned, then nothing has been allocated or locked.
1109 */
1110struct ring_buffer_event *
1111ring_buffer_lock_reserve(struct ring_buffer *buffer,
1112 unsigned long length,
1113 unsigned long *flags)
1114{
1115 struct ring_buffer_per_cpu *cpu_buffer;
1116 struct ring_buffer_event *event;
1117 int cpu, resched;
1118
1119 if (atomic_read(&buffer->record_disabled))
1120 return NULL;
1121
1122 /* If we are tracing schedule, we don't want to recurse */
1123 resched = need_resched();
1124 preempt_disable_notrace();
1125
1126 cpu = raw_smp_processor_id();
1127
1128 if (!cpu_isset(cpu, buffer->cpumask))
1129 goto out;
1130
1131 cpu_buffer = buffer->buffers[cpu];
1132
1133 if (atomic_read(&cpu_buffer->record_disabled))
1134 goto out;
1135
1136 length = rb_calculate_event_length(length);
1137 if (length > BUF_PAGE_SIZE)
1138 goto out;
1139
1140 event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length);
1141 if (!event)
1142 goto out;
1143
1144 /*
1145 * Need to store resched state on this cpu.
1146 * Only the first needs to.
1147 */
1148
1149 if (preempt_count() == 1)
1150 per_cpu(rb_need_resched, cpu) = resched;
1151
1152 return event;
1153
1154 out:
1155 if (resched)
1156 preempt_enable_notrace();
1157 else
1158 preempt_enable_notrace();
1159 return NULL;
1160}
1161
1162static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
1163 struct ring_buffer_event *event)
1164{
1165 cpu_buffer->entries++;
1166
1167 /* Only process further if we own the commit */
1168 if (!rb_is_commit(cpu_buffer, event))
1169 return;
1170
1171 cpu_buffer->write_stamp += event->time_delta;
1172
1173 rb_set_commit_to_write(cpu_buffer);
1174}
1175
1176/**
1177 * ring_buffer_unlock_commit - commit a reserved
1178 * @buffer: The buffer to commit to
1179 * @event: The event pointer to commit.
1180 * @flags: the interrupt flags received from ring_buffer_lock_reserve.
1181 *
1182 * This commits the data to the ring buffer, and releases any locks held.
1183 *
1184 * Must be paired with ring_buffer_lock_reserve.
1185 */
1186int ring_buffer_unlock_commit(struct ring_buffer *buffer,
1187 struct ring_buffer_event *event,
1188 unsigned long flags)
1189{
1190 struct ring_buffer_per_cpu *cpu_buffer;
1191 int cpu = raw_smp_processor_id();
1192
1193 cpu_buffer = buffer->buffers[cpu];
1194
1195 rb_commit(cpu_buffer, event);
1196
1197 /*
1198 * Only the last preempt count needs to restore preemption.
1199 */
1200 if (preempt_count() == 1) {
1201 if (per_cpu(rb_need_resched, cpu))
1202 preempt_enable_no_resched_notrace();
1203 else
1204 preempt_enable_notrace();
1205 } else
1206 preempt_enable_no_resched_notrace();
1207
1208 return 0;
1209}
1210
1211/**
1212 * ring_buffer_write - write data to the buffer without reserving
1213 * @buffer: The ring buffer to write to.
1214 * @length: The length of the data being written (excluding the event header)
1215 * @data: The data to write to the buffer.
1216 *
1217 * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as
1218 * one function. If you already have the data to write to the buffer, it
1219 * may be easier to simply call this function.
1220 *
1221 * Note, like ring_buffer_lock_reserve, the length is the length of the data
1222 * and not the length of the event which would hold the header.
1223 */
1224int ring_buffer_write(struct ring_buffer *buffer,
1225 unsigned long length,
1226 void *data)
1227{
1228 struct ring_buffer_per_cpu *cpu_buffer;
1229 struct ring_buffer_event *event;
1230 unsigned long event_length;
1231 void *body;
1232 int ret = -EBUSY;
1233 int cpu, resched;
1234
1235 if (atomic_read(&buffer->record_disabled))
1236 return -EBUSY;
1237
1238 resched = need_resched();
1239 preempt_disable_notrace();
1240
1241 cpu = raw_smp_processor_id();
1242
1243 if (!cpu_isset(cpu, buffer->cpumask))
1244 goto out;
1245
1246 cpu_buffer = buffer->buffers[cpu];
1247
1248 if (atomic_read(&cpu_buffer->record_disabled))
1249 goto out;
1250
1251 event_length = rb_calculate_event_length(length);
1252 event = rb_reserve_next_event(cpu_buffer,
1253 RINGBUF_TYPE_DATA, event_length);
1254 if (!event)
1255 goto out;
1256
1257 body = rb_event_data(event);
1258
1259 memcpy(body, data, length);
1260
1261 rb_commit(cpu_buffer, event);
1262
1263 ret = 0;
1264 out:
1265 if (resched)
1266 preempt_enable_no_resched_notrace();
1267 else
1268 preempt_enable_notrace();
1269
1270 return ret;
1271}
1272
1273static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
1274{
1275 struct buffer_page *reader = cpu_buffer->reader_page;
1276 struct buffer_page *head = cpu_buffer->head_page;
1277 struct buffer_page *commit = cpu_buffer->commit_page;
1278
1279 return reader->read == rb_page_commit(reader) &&
1280 (commit == reader ||
1281 (commit == head &&
1282 head->read == rb_page_commit(commit)));
1283}
1284
1285/**
1286 * ring_buffer_record_disable - stop all writes into the buffer
1287 * @buffer: The ring buffer to stop writes to.
1288 *
1289 * This prevents all writes to the buffer. Any attempt to write
1290 * to the buffer after this will fail and return NULL.
1291 *
1292 * The caller should call synchronize_sched() after this.
1293 */
1294void ring_buffer_record_disable(struct ring_buffer *buffer)
1295{
1296 atomic_inc(&buffer->record_disabled);
1297}
1298
1299/**
1300 * ring_buffer_record_enable - enable writes to the buffer
1301 * @buffer: The ring buffer to enable writes
1302 *
1303 * Note, multiple disables will need the same number of enables
1304 * to truely enable the writing (much like preempt_disable).
1305 */
1306void ring_buffer_record_enable(struct ring_buffer *buffer)
1307{
1308 atomic_dec(&buffer->record_disabled);
1309}
1310
1311/**
1312 * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
1313 * @buffer: The ring buffer to stop writes to.
1314 * @cpu: The CPU buffer to stop
1315 *
1316 * This prevents all writes to the buffer. Any attempt to write
1317 * to the buffer after this will fail and return NULL.
1318 *
1319 * The caller should call synchronize_sched() after this.
1320 */
1321void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
1322{
1323 struct ring_buffer_per_cpu *cpu_buffer;
1324
1325 if (!cpu_isset(cpu, buffer->cpumask))
1326 return;
1327
1328 cpu_buffer = buffer->buffers[cpu];
1329 atomic_inc(&cpu_buffer->record_disabled);
1330}
1331
1332/**
1333 * ring_buffer_record_enable_cpu - enable writes to the buffer
1334 * @buffer: The ring buffer to enable writes
1335 * @cpu: The CPU to enable.
1336 *
1337 * Note, multiple disables will need the same number of enables
1338 * to truely enable the writing (much like preempt_disable).
1339 */
1340void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
1341{
1342 struct ring_buffer_per_cpu *cpu_buffer;
1343
1344 if (!cpu_isset(cpu, buffer->cpumask))
1345 return;
1346
1347 cpu_buffer = buffer->buffers[cpu];
1348 atomic_dec(&cpu_buffer->record_disabled);
1349}
1350
1351/**
1352 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
1353 * @buffer: The ring buffer
1354 * @cpu: The per CPU buffer to get the entries from.
1355 */
1356unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
1357{
1358 struct ring_buffer_per_cpu *cpu_buffer;
1359
1360 if (!cpu_isset(cpu, buffer->cpumask))
1361 return 0;
1362
1363 cpu_buffer = buffer->buffers[cpu];
1364 return cpu_buffer->entries;
1365}
1366
1367/**
1368 * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer
1369 * @buffer: The ring buffer
1370 * @cpu: The per CPU buffer to get the number of overruns from
1371 */
1372unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
1373{
1374 struct ring_buffer_per_cpu *cpu_buffer;
1375
1376 if (!cpu_isset(cpu, buffer->cpumask))
1377 return 0;
1378
1379 cpu_buffer = buffer->buffers[cpu];
1380 return cpu_buffer->overrun;
1381}
1382
1383/**
1384 * ring_buffer_entries - get the number of entries in a buffer
1385 * @buffer: The ring buffer
1386 *
1387 * Returns the total number of entries in the ring buffer
1388 * (all CPU entries)
1389 */
1390unsigned long ring_buffer_entries(struct ring_buffer *buffer)
1391{
1392 struct ring_buffer_per_cpu *cpu_buffer;
1393 unsigned long entries = 0;
1394 int cpu;
1395
1396 /* if you care about this being correct, lock the buffer */
1397 for_each_buffer_cpu(buffer, cpu) {
1398 cpu_buffer = buffer->buffers[cpu];
1399 entries += cpu_buffer->entries;
1400 }
1401
1402 return entries;
1403}
1404
1405/**
1406 * ring_buffer_overrun_cpu - get the number of overruns in buffer
1407 * @buffer: The ring buffer
1408 *
1409 * Returns the total number of overruns in the ring buffer
1410 * (all CPU entries)
1411 */
1412unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
1413{
1414 struct ring_buffer_per_cpu *cpu_buffer;
1415 unsigned long overruns = 0;
1416 int cpu;
1417
1418 /* if you care about this being correct, lock the buffer */
1419 for_each_buffer_cpu(buffer, cpu) {
1420 cpu_buffer = buffer->buffers[cpu];
1421 overruns += cpu_buffer->overrun;
1422 }
1423
1424 return overruns;
1425}
1426
1427/**
1428 * ring_buffer_iter_reset - reset an iterator
1429 * @iter: The iterator to reset
1430 *
1431 * Resets the iterator, so that it will start from the beginning
1432 * again.
1433 */
1434void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1435{
1436 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1437
1438 /* Iterator usage is expected to have record disabled */
1439 if (list_empty(&cpu_buffer->reader_page->list)) {
1440 iter->head_page = cpu_buffer->head_page;
1441 iter->head = cpu_buffer->head_page->read;
1442 } else {
1443 iter->head_page = cpu_buffer->reader_page;
1444 iter->head = cpu_buffer->reader_page->read;
1445 }
1446 if (iter->head)
1447 iter->read_stamp = cpu_buffer->read_stamp;
1448 else
1449 iter->read_stamp = iter->head_page->time_stamp;
1450}
1451
1452/**
1453 * ring_buffer_iter_empty - check if an iterator has no more to read
1454 * @iter: The iterator to check
1455 */
1456int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
1457{
1458 struct ring_buffer_per_cpu *cpu_buffer;
1459
1460 cpu_buffer = iter->cpu_buffer;
1461
1462 return iter->head_page == cpu_buffer->commit_page &&
1463 iter->head == rb_commit_index(cpu_buffer);
1464}
1465
1466static void
1467rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1468 struct ring_buffer_event *event)
1469{
1470 u64 delta;
1471
1472 switch (event->type) {
1473 case RINGBUF_TYPE_PADDING:
1474 return;
1475
1476 case RINGBUF_TYPE_TIME_EXTEND:
1477 delta = event->array[0];
1478 delta <<= TS_SHIFT;
1479 delta += event->time_delta;
1480 cpu_buffer->read_stamp += delta;
1481 return;
1482
1483 case RINGBUF_TYPE_TIME_STAMP:
1484 /* FIXME: not implemented */
1485 return;
1486
1487 case RINGBUF_TYPE_DATA:
1488 cpu_buffer->read_stamp += event->time_delta;
1489 return;
1490
1491 default:
1492 BUG();
1493 }
1494 return;
1495}
1496
1497static void
1498rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
1499 struct ring_buffer_event *event)
1500{
1501 u64 delta;
1502
1503 switch (event->type) {
1504 case RINGBUF_TYPE_PADDING:
1505 return;
1506
1507 case RINGBUF_TYPE_TIME_EXTEND:
1508 delta = event->array[0];
1509 delta <<= TS_SHIFT;
1510 delta += event->time_delta;
1511 iter->read_stamp += delta;
1512 return;
1513
1514 case RINGBUF_TYPE_TIME_STAMP:
1515 /* FIXME: not implemented */
1516 return;
1517
1518 case RINGBUF_TYPE_DATA:
1519 iter->read_stamp += event->time_delta;
1520 return;
1521
1522 default:
1523 BUG();
1524 }
1525 return;
1526}
1527
1528static struct buffer_page *
1529rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1530{
1531 struct buffer_page *reader = NULL;
1532 unsigned long flags;
1533
1534 spin_lock_irqsave(&cpu_buffer->lock, flags);
1535
1536 again:
1537 reader = cpu_buffer->reader_page;
1538
1539 /* If there's more to read, return this page */
1540 if (cpu_buffer->reader_page->read < rb_page_size(reader))
1541 goto out;
1542
1543 /* Never should we have an index greater than the size */
1544 RB_WARN_ON(cpu_buffer,
1545 cpu_buffer->reader_page->read > rb_page_size(reader));
1546
1547 /* check if we caught up to the tail */
1548 reader = NULL;
1549 if (cpu_buffer->commit_page == cpu_buffer->reader_page)
1550 goto out;
1551
1552 /*
1553 * Splice the empty reader page into the list around the head.
1554 * Reset the reader page to size zero.
1555 */
1556
1557 reader = cpu_buffer->head_page;
1558 cpu_buffer->reader_page->list.next = reader->list.next;
1559 cpu_buffer->reader_page->list.prev = reader->list.prev;
1560
1561 local_set(&cpu_buffer->reader_page->write, 0);
1562 local_set(&cpu_buffer->reader_page->commit, 0);
1563
1564 /* Make the reader page now replace the head */
1565 reader->list.prev->next = &cpu_buffer->reader_page->list;
1566 reader->list.next->prev = &cpu_buffer->reader_page->list;
1567
1568 /*
1569 * If the tail is on the reader, then we must set the head
1570 * to the inserted page, otherwise we set it one before.
1571 */
1572 cpu_buffer->head_page = cpu_buffer->reader_page;
1573
1574 if (cpu_buffer->commit_page != reader)
1575 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
1576
1577 /* Finally update the reader page to the new head */
1578 cpu_buffer->reader_page = reader;
1579 rb_reset_reader_page(cpu_buffer);
1580
1581 goto again;
1582
1583 out:
1584 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
1585
1586 return reader;
1587}
1588
1589static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
1590{
1591 struct ring_buffer_event *event;
1592 struct buffer_page *reader;
1593 unsigned length;
1594
1595 reader = rb_get_reader_page(cpu_buffer);
1596
1597 /* This function should not be called when buffer is empty */
1598 BUG_ON(!reader);
1599
1600 event = rb_reader_event(cpu_buffer);
1601
1602 if (event->type == RINGBUF_TYPE_DATA)
1603 cpu_buffer->entries--;
1604
1605 rb_update_read_stamp(cpu_buffer, event);
1606
1607 length = rb_event_length(event);
1608 cpu_buffer->reader_page->read += length;
1609}
1610
1611static void rb_advance_iter(struct ring_buffer_iter *iter)
1612{
1613 struct ring_buffer *buffer;
1614 struct ring_buffer_per_cpu *cpu_buffer;
1615 struct ring_buffer_event *event;
1616 unsigned length;
1617
1618 cpu_buffer = iter->cpu_buffer;
1619 buffer = cpu_buffer->buffer;
1620
1621 /*
1622 * Check if we are at the end of the buffer.
1623 */
1624 if (iter->head >= rb_page_size(iter->head_page)) {
1625 BUG_ON(iter->head_page == cpu_buffer->commit_page);
1626 rb_inc_iter(iter);
1627 return;
1628 }
1629
1630 event = rb_iter_head_event(iter);
1631
1632 length = rb_event_length(event);
1633
1634 /*
1635 * This should not be called to advance the header if we are
1636 * at the tail of the buffer.
1637 */
1638 BUG_ON((iter->head_page == cpu_buffer->commit_page) &&
1639 (iter->head + length > rb_commit_index(cpu_buffer)));
1640
1641 rb_update_iter_read_stamp(iter, event);
1642
1643 iter->head += length;
1644
1645 /* check for end of page padding */
1646 if ((iter->head >= rb_page_size(iter->head_page)) &&
1647 (iter->head_page != cpu_buffer->commit_page))
1648 rb_advance_iter(iter);
1649}
1650
1651/**
1652 * ring_buffer_peek - peek at the next event to be read
1653 * @buffer: The ring buffer to read
1654 * @cpu: The cpu to peak at
1655 * @ts: The timestamp counter of this event.
1656 *
1657 * This will return the event that will be read next, but does
1658 * not consume the data.
1659 */
1660struct ring_buffer_event *
1661ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1662{
1663 struct ring_buffer_per_cpu *cpu_buffer;
1664 struct ring_buffer_event *event;
1665 struct buffer_page *reader;
1666
1667 if (!cpu_isset(cpu, buffer->cpumask))
1668 return NULL;
1669
1670 cpu_buffer = buffer->buffers[cpu];
1671
1672 again:
1673 reader = rb_get_reader_page(cpu_buffer);
1674 if (!reader)
1675 return NULL;
1676
1677 event = rb_reader_event(cpu_buffer);
1678
1679 switch (event->type) {
1680 case RINGBUF_TYPE_PADDING:
1681 RB_WARN_ON(cpu_buffer, 1);
1682 rb_advance_reader(cpu_buffer);
1683 return NULL;
1684
1685 case RINGBUF_TYPE_TIME_EXTEND:
1686 /* Internal data, OK to advance */
1687 rb_advance_reader(cpu_buffer);
1688 goto again;
1689
1690 case RINGBUF_TYPE_TIME_STAMP:
1691 /* FIXME: not implemented */
1692 rb_advance_reader(cpu_buffer);
1693 goto again;
1694
1695 case RINGBUF_TYPE_DATA:
1696 if (ts) {
1697 *ts = cpu_buffer->read_stamp + event->time_delta;
1698 ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
1699 }
1700 return event;
1701
1702 default:
1703 BUG();
1704 }
1705
1706 return NULL;
1707}
1708
1709/**
1710 * ring_buffer_iter_peek - peek at the next event to be read
1711 * @iter: The ring buffer iterator
1712 * @ts: The timestamp counter of this event.
1713 *
1714 * This will return the event that will be read next, but does
1715 * not increment the iterator.
1716 */
1717struct ring_buffer_event *
1718ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1719{
1720 struct ring_buffer *buffer;
1721 struct ring_buffer_per_cpu *cpu_buffer;
1722 struct ring_buffer_event *event;
1723
1724 if (ring_buffer_iter_empty(iter))
1725 return NULL;
1726
1727 cpu_buffer = iter->cpu_buffer;
1728 buffer = cpu_buffer->buffer;
1729
1730 again:
1731 if (rb_per_cpu_empty(cpu_buffer))
1732 return NULL;
1733
1734 event = rb_iter_head_event(iter);
1735
1736 switch (event->type) {
1737 case RINGBUF_TYPE_PADDING:
1738 rb_inc_iter(iter);
1739 goto again;
1740
1741 case RINGBUF_TYPE_TIME_EXTEND:
1742 /* Internal data, OK to advance */
1743 rb_advance_iter(iter);
1744 goto again;
1745
1746 case RINGBUF_TYPE_TIME_STAMP:
1747 /* FIXME: not implemented */
1748 rb_advance_iter(iter);
1749 goto again;
1750
1751 case RINGBUF_TYPE_DATA:
1752 if (ts) {
1753 *ts = iter->read_stamp + event->time_delta;
1754 ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
1755 }
1756 return event;
1757
1758 default:
1759 BUG();
1760 }
1761
1762 return NULL;
1763}
1764
1765/**
1766 * ring_buffer_consume - return an event and consume it
1767 * @buffer: The ring buffer to get the next event from
1768 *
1769 * Returns the next event in the ring buffer, and that event is consumed.
1770 * Meaning, that sequential reads will keep returning a different event,
1771 * and eventually empty the ring buffer if the producer is slower.
1772 */
1773struct ring_buffer_event *
1774ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
1775{
1776 struct ring_buffer_per_cpu *cpu_buffer;
1777 struct ring_buffer_event *event;
1778
1779 if (!cpu_isset(cpu, buffer->cpumask))
1780 return NULL;
1781
1782 event = ring_buffer_peek(buffer, cpu, ts);
1783 if (!event)
1784 return NULL;
1785
1786 cpu_buffer = buffer->buffers[cpu];
1787 rb_advance_reader(cpu_buffer);
1788
1789 return event;
1790}
1791
1792/**
1793 * ring_buffer_read_start - start a non consuming read of the buffer
1794 * @buffer: The ring buffer to read from
1795 * @cpu: The cpu buffer to iterate over
1796 *
1797 * This starts up an iteration through the buffer. It also disables
1798 * the recording to the buffer until the reading is finished.
1799 * This prevents the reading from being corrupted. This is not
1800 * a consuming read, so a producer is not expected.
1801 *
1802 * Must be paired with ring_buffer_finish.
1803 */
1804struct ring_buffer_iter *
1805ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
1806{
1807 struct ring_buffer_per_cpu *cpu_buffer;
1808 struct ring_buffer_iter *iter;
1809 unsigned long flags;
1810
1811 if (!cpu_isset(cpu, buffer->cpumask))
1812 return NULL;
1813
1814 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
1815 if (!iter)
1816 return NULL;
1817
1818 cpu_buffer = buffer->buffers[cpu];
1819
1820 iter->cpu_buffer = cpu_buffer;
1821
1822 atomic_inc(&cpu_buffer->record_disabled);
1823 synchronize_sched();
1824
1825 spin_lock_irqsave(&cpu_buffer->lock, flags);
1826 ring_buffer_iter_reset(iter);
1827 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
1828
1829 return iter;
1830}
1831
1832/**
1833 * ring_buffer_finish - finish reading the iterator of the buffer
1834 * @iter: The iterator retrieved by ring_buffer_start
1835 *
1836 * This re-enables the recording to the buffer, and frees the
1837 * iterator.
1838 */
1839void
1840ring_buffer_read_finish(struct ring_buffer_iter *iter)
1841{
1842 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1843
1844 atomic_dec(&cpu_buffer->record_disabled);
1845 kfree(iter);
1846}
1847
1848/**
1849 * ring_buffer_read - read the next item in the ring buffer by the iterator
1850 * @iter: The ring buffer iterator
1851 * @ts: The time stamp of the event read.
1852 *
1853 * This reads the next event in the ring buffer and increments the iterator.
1854 */
1855struct ring_buffer_event *
1856ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
1857{
1858 struct ring_buffer_event *event;
1859
1860 event = ring_buffer_iter_peek(iter, ts);
1861 if (!event)
1862 return NULL;
1863
1864 rb_advance_iter(iter);
1865
1866 return event;
1867}
1868
1869/**
1870 * ring_buffer_size - return the size of the ring buffer (in bytes)
1871 * @buffer: The ring buffer.
1872 */
1873unsigned long ring_buffer_size(struct ring_buffer *buffer)
1874{
1875 return BUF_PAGE_SIZE * buffer->pages;
1876}
1877
1878static void
1879rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
1880{
1881 cpu_buffer->head_page
1882 = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
1883 local_set(&cpu_buffer->head_page->write, 0);
1884 local_set(&cpu_buffer->head_page->commit, 0);
1885
1886 cpu_buffer->head_page->read = 0;
1887
1888 cpu_buffer->tail_page = cpu_buffer->head_page;
1889 cpu_buffer->commit_page = cpu_buffer->head_page;
1890
1891 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1892 local_set(&cpu_buffer->reader_page->write, 0);
1893 local_set(&cpu_buffer->reader_page->commit, 0);
1894 cpu_buffer->reader_page->read = 0;
1895
1896 cpu_buffer->overrun = 0;
1897 cpu_buffer->entries = 0;
1898}
1899
1900/**
1901 * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
1902 * @buffer: The ring buffer to reset a per cpu buffer of
1903 * @cpu: The CPU buffer to be reset
1904 */
1905void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
1906{
1907 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1908 unsigned long flags;
1909
1910 if (!cpu_isset(cpu, buffer->cpumask))
1911 return;
1912
1913 spin_lock_irqsave(&cpu_buffer->lock, flags);
1914
1915 rb_reset_cpu(cpu_buffer);
1916
1917 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
1918}
1919
1920/**
1921 * ring_buffer_reset - reset a ring buffer
1922 * @buffer: The ring buffer to reset all cpu buffers
1923 */
1924void ring_buffer_reset(struct ring_buffer *buffer)
1925{
1926 int cpu;
1927
1928 for_each_buffer_cpu(buffer, cpu)
1929 ring_buffer_reset_cpu(buffer, cpu);
1930}
1931
1932/**
1933 * rind_buffer_empty - is the ring buffer empty?
1934 * @buffer: The ring buffer to test
1935 */
1936int ring_buffer_empty(struct ring_buffer *buffer)
1937{
1938 struct ring_buffer_per_cpu *cpu_buffer;
1939 int cpu;
1940
1941 /* yes this is racy, but if you don't like the race, lock the buffer */
1942 for_each_buffer_cpu(buffer, cpu) {
1943 cpu_buffer = buffer->buffers[cpu];
1944 if (!rb_per_cpu_empty(cpu_buffer))
1945 return 0;
1946 }
1947 return 1;
1948}
1949
1950/**
1951 * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty?
1952 * @buffer: The ring buffer
1953 * @cpu: The CPU buffer to test
1954 */
1955int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
1956{
1957 struct ring_buffer_per_cpu *cpu_buffer;
1958
1959 if (!cpu_isset(cpu, buffer->cpumask))
1960 return 1;
1961
1962 cpu_buffer = buffer->buffers[cpu];
1963 return rb_per_cpu_empty(cpu_buffer);
1964}
1965
1966/**
1967 * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
1968 * @buffer_a: One buffer to swap with
1969 * @buffer_b: The other buffer to swap with
1970 *
1971 * This function is useful for tracers that want to take a "snapshot"
1972 * of a CPU buffer and has another back up buffer lying around.
1973 * it is expected that the tracer handles the cpu buffer not being
1974 * used at the moment.
1975 */
1976int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
1977 struct ring_buffer *buffer_b, int cpu)
1978{
1979 struct ring_buffer_per_cpu *cpu_buffer_a;
1980 struct ring_buffer_per_cpu *cpu_buffer_b;
1981
1982 if (!cpu_isset(cpu, buffer_a->cpumask) ||
1983 !cpu_isset(cpu, buffer_b->cpumask))
1984 return -EINVAL;
1985
1986 /* At least make sure the two buffers are somewhat the same */
1987 if (buffer_a->size != buffer_b->size ||
1988 buffer_a->pages != buffer_b->pages)
1989 return -EINVAL;
1990
1991 cpu_buffer_a = buffer_a->buffers[cpu];
1992 cpu_buffer_b = buffer_b->buffers[cpu];
1993
1994 /*
1995 * We can't do a synchronize_sched here because this
1996 * function can be called in atomic context.
1997 * Normally this will be called from the same CPU as cpu.
1998 * If not it's up to the caller to protect this.
1999 */
2000 atomic_inc(&cpu_buffer_a->record_disabled);
2001 atomic_inc(&cpu_buffer_b->record_disabled);
2002
2003 buffer_a->buffers[cpu] = cpu_buffer_b;
2004 buffer_b->buffers[cpu] = cpu_buffer_a;
2005
2006 cpu_buffer_b->buffer = buffer_a;
2007 cpu_buffer_a->buffer = buffer_b;
2008
2009 atomic_dec(&cpu_buffer_a->record_disabled);
2010 atomic_dec(&cpu_buffer_b->record_disabled);
2011
2012 return 0;
2013}
2014
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8f3fb3db61c3..d345d649d073 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -14,6 +14,7 @@
14#include <linux/utsrelease.h> 14#include <linux/utsrelease.h>
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/seq_file.h> 16#include <linux/seq_file.h>
17#include <linux/notifier.h>
17#include <linux/debugfs.h> 18#include <linux/debugfs.h>
18#include <linux/pagemap.h> 19#include <linux/pagemap.h>
19#include <linux/hardirq.h> 20#include <linux/hardirq.h>
@@ -22,6 +23,7 @@
22#include <linux/ftrace.h> 23#include <linux/ftrace.h>
23#include <linux/module.h> 24#include <linux/module.h>
24#include <linux/percpu.h> 25#include <linux/percpu.h>
26#include <linux/kdebug.h>
25#include <linux/ctype.h> 27#include <linux/ctype.h>
26#include <linux/init.h> 28#include <linux/init.h>
27#include <linux/poll.h> 29#include <linux/poll.h>
@@ -31,25 +33,36 @@
31#include <linux/writeback.h> 33#include <linux/writeback.h>
32 34
33#include <linux/stacktrace.h> 35#include <linux/stacktrace.h>
36#include <linux/ring_buffer.h>
34 37
35#include "trace.h" 38#include "trace.h"
36 39
40#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
41
37unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; 42unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
38unsigned long __read_mostly tracing_thresh; 43unsigned long __read_mostly tracing_thresh;
39 44
40static unsigned long __read_mostly tracing_nr_buffers; 45static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
46
47static inline void ftrace_disable_cpu(void)
48{
49 preempt_disable();
50 local_inc(&__get_cpu_var(ftrace_cpu_disabled));
51}
52
53static inline void ftrace_enable_cpu(void)
54{
55 local_dec(&__get_cpu_var(ftrace_cpu_disabled));
56 preempt_enable();
57}
58
41static cpumask_t __read_mostly tracing_buffer_mask; 59static cpumask_t __read_mostly tracing_buffer_mask;
42 60
43#define for_each_tracing_cpu(cpu) \ 61#define for_each_tracing_cpu(cpu) \
44 for_each_cpu_mask(cpu, tracing_buffer_mask) 62 for_each_cpu_mask(cpu, tracing_buffer_mask)
45 63
46static int trace_alloc_page(void);
47static int trace_free_page(void);
48
49static int tracing_disabled = 1; 64static int tracing_disabled = 1;
50 65
51static unsigned long tracing_pages_allocated;
52
53long 66long
54ns2usecs(cycle_t nsec) 67ns2usecs(cycle_t nsec)
55{ 68{
@@ -60,7 +73,9 @@ ns2usecs(cycle_t nsec)
60 73
61cycle_t ftrace_now(int cpu) 74cycle_t ftrace_now(int cpu)
62{ 75{
63 return cpu_clock(cpu); 76 u64 ts = ring_buffer_time_stamp(cpu);
77 ring_buffer_normalize_time_stamp(cpu, &ts);
78 return ts;
64} 79}
65 80
66/* 81/*
@@ -100,11 +115,18 @@ static int tracer_enabled = 1;
100int ftrace_function_enabled; 115int ftrace_function_enabled;
101 116
102/* 117/*
103 * trace_nr_entries is the number of entries that is allocated 118 * trace_buf_size is the size in bytes that is allocated
104 * for a buffer. Note, the number of entries is always rounded 119 * for a buffer. Note, the number of bytes is always rounded
105 * to ENTRIES_PER_PAGE. 120 * to page size.
121 *
122 * This number is purposely set to a low number of 16384.
123 * If the dump on oops happens, it will be much appreciated
124 * to not have to wait for all that output. Anyway this can be
125 * boot time and run time configurable.
106 */ 126 */
107static unsigned long trace_nr_entries = 65536UL; 127#define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
128
129static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
108 130
109/* trace_types holds a link list of available tracers. */ 131/* trace_types holds a link list of available tracers. */
110static struct tracer *trace_types __read_mostly; 132static struct tracer *trace_types __read_mostly;
@@ -133,24 +155,6 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
133/* trace_flags holds iter_ctrl options */ 155/* trace_flags holds iter_ctrl options */
134unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; 156unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
135 157
136static notrace void no_trace_init(struct trace_array *tr)
137{
138 int cpu;
139
140 ftrace_function_enabled = 0;
141 if(tr->ctrl)
142 for_each_online_cpu(cpu)
143 tracing_reset(tr->data[cpu]);
144 tracer_enabled = 0;
145}
146
147/* dummy trace to disable tracing */
148static struct tracer no_tracer __read_mostly = {
149 .name = "none",
150 .init = no_trace_init
151};
152
153
154/** 158/**
155 * trace_wake_up - wake up tasks waiting for trace input 159 * trace_wake_up - wake up tasks waiting for trace input
156 * 160 *
@@ -167,23 +171,21 @@ void trace_wake_up(void)
167 wake_up(&trace_wait); 171 wake_up(&trace_wait);
168} 172}
169 173
170#define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry)) 174static int __init set_buf_size(char *str)
171
172static int __init set_nr_entries(char *str)
173{ 175{
174 unsigned long nr_entries; 176 unsigned long buf_size;
175 int ret; 177 int ret;
176 178
177 if (!str) 179 if (!str)
178 return 0; 180 return 0;
179 ret = strict_strtoul(str, 0, &nr_entries); 181 ret = strict_strtoul(str, 0, &buf_size);
180 /* nr_entries can not be zero */ 182 /* nr_entries can not be zero */
181 if (ret < 0 || nr_entries == 0) 183 if (ret < 0 || buf_size == 0)
182 return 0; 184 return 0;
183 trace_nr_entries = nr_entries; 185 trace_buf_size = buf_size;
184 return 1; 186 return 1;
185} 187}
186__setup("trace_entries=", set_nr_entries); 188__setup("trace_buf_size=", set_buf_size);
187 189
188unsigned long nsecs_to_usecs(unsigned long nsecs) 190unsigned long nsecs_to_usecs(unsigned long nsecs)
189{ 191{
@@ -191,21 +193,6 @@ unsigned long nsecs_to_usecs(unsigned long nsecs)
191} 193}
192 194
193/* 195/*
194 * trace_flag_type is an enumeration that holds different
195 * states when a trace occurs. These are:
196 * IRQS_OFF - interrupts were disabled
197 * NEED_RESCED - reschedule is requested
198 * HARDIRQ - inside an interrupt handler
199 * SOFTIRQ - inside a softirq handler
200 */
201enum trace_flag_type {
202 TRACE_FLAG_IRQS_OFF = 0x01,
203 TRACE_FLAG_NEED_RESCHED = 0x02,
204 TRACE_FLAG_HARDIRQ = 0x04,
205 TRACE_FLAG_SOFTIRQ = 0x08,
206};
207
208/*
209 * TRACE_ITER_SYM_MASK masks the options in trace_flags that 196 * TRACE_ITER_SYM_MASK masks the options in trace_flags that
210 * control the output of kernel symbols. 197 * control the output of kernel symbols.
211 */ 198 */
@@ -224,6 +211,7 @@ static const char *trace_options[] = {
224 "block", 211 "block",
225 "stacktrace", 212 "stacktrace",
226 "sched-tree", 213 "sched-tree",
214 "ftrace_printk",
227 NULL 215 NULL
228}; 216};
229 217
@@ -266,54 +254,6 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
266 tracing_record_cmdline(current); 254 tracing_record_cmdline(current);
267} 255}
268 256
269#define CHECK_COND(cond) \
270 if (unlikely(cond)) { \
271 tracing_disabled = 1; \
272 WARN_ON(1); \
273 return -1; \
274 }
275
276/**
277 * check_pages - integrity check of trace buffers
278 *
279 * As a safty measure we check to make sure the data pages have not
280 * been corrupted.
281 */
282int check_pages(struct trace_array_cpu *data)
283{
284 struct page *page, *tmp;
285
286 CHECK_COND(data->trace_pages.next->prev != &data->trace_pages);
287 CHECK_COND(data->trace_pages.prev->next != &data->trace_pages);
288
289 list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) {
290 CHECK_COND(page->lru.next->prev != &page->lru);
291 CHECK_COND(page->lru.prev->next != &page->lru);
292 }
293
294 return 0;
295}
296
297/**
298 * head_page - page address of the first page in per_cpu buffer.
299 *
300 * head_page returns the page address of the first page in
301 * a per_cpu buffer. This also preforms various consistency
302 * checks to make sure the buffer has not been corrupted.
303 */
304void *head_page(struct trace_array_cpu *data)
305{
306 struct page *page;
307
308 if (list_empty(&data->trace_pages))
309 return NULL;
310
311 page = list_entry(data->trace_pages.next, struct page, lru);
312 BUG_ON(&page->lru == &data->trace_pages);
313
314 return page_address(page);
315}
316
317/** 257/**
318 * trace_seq_printf - sequence printing of trace information 258 * trace_seq_printf - sequence printing of trace information
319 * @s: trace sequence descriptor 259 * @s: trace sequence descriptor
@@ -395,28 +335,23 @@ trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
395 return len; 335 return len;
396} 336}
397 337
398#define HEX_CHARS 17 338#define MAX_MEMHEX_BYTES 8
399static const char hex2asc[] = "0123456789abcdef"; 339#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
400 340
401static int 341static int
402trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) 342trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
403{ 343{
404 unsigned char hex[HEX_CHARS]; 344 unsigned char hex[HEX_CHARS];
405 unsigned char *data = mem; 345 unsigned char *data = mem;
406 unsigned char byte;
407 int i, j; 346 int i, j;
408 347
409 BUG_ON(len >= HEX_CHARS);
410
411#ifdef __BIG_ENDIAN 348#ifdef __BIG_ENDIAN
412 for (i = 0, j = 0; i < len; i++) { 349 for (i = 0, j = 0; i < len; i++) {
413#else 350#else
414 for (i = len-1, j = 0; i >= 0; i--) { 351 for (i = len-1, j = 0; i >= 0; i--) {
415#endif 352#endif
416 byte = data[i]; 353 hex[j++] = hex_asc_hi(data[i]);
417 354 hex[j++] = hex_asc_lo(data[i]);
418 hex[j++] = hex2asc[byte & 0x0f];
419 hex[j++] = hex2asc[byte >> 4];
420 } 355 }
421 hex[j++] = ' '; 356 hex[j++] = ' ';
422 357
@@ -460,34 +395,6 @@ trace_print_seq(struct seq_file *m, struct trace_seq *s)
460 trace_seq_reset(s); 395 trace_seq_reset(s);
461} 396}
462 397
463/*
464 * flip the trace buffers between two trace descriptors.
465 * This usually is the buffers between the global_trace and
466 * the max_tr to record a snapshot of a current trace.
467 *
468 * The ftrace_max_lock must be held.
469 */
470static void
471flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
472{
473 struct list_head flip_pages;
474
475 INIT_LIST_HEAD(&flip_pages);
476
477 memcpy(&tr1->trace_head_idx, &tr2->trace_head_idx,
478 sizeof(struct trace_array_cpu) -
479 offsetof(struct trace_array_cpu, trace_head_idx));
480
481 check_pages(tr1);
482 check_pages(tr2);
483 list_splice_init(&tr1->trace_pages, &flip_pages);
484 list_splice_init(&tr2->trace_pages, &tr1->trace_pages);
485 list_splice_init(&flip_pages, &tr2->trace_pages);
486 BUG_ON(!list_empty(&flip_pages));
487 check_pages(tr1);
488 check_pages(tr2);
489}
490
491/** 398/**
492 * update_max_tr - snapshot all trace buffers from global_trace to max_tr 399 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
493 * @tr: tracer 400 * @tr: tracer
@@ -500,17 +407,17 @@ flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
500void 407void
501update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) 408update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
502{ 409{
503 struct trace_array_cpu *data; 410 struct ring_buffer *buf = tr->buffer;
504 int i;
505 411
506 WARN_ON_ONCE(!irqs_disabled()); 412 WARN_ON_ONCE(!irqs_disabled());
507 __raw_spin_lock(&ftrace_max_lock); 413 __raw_spin_lock(&ftrace_max_lock);
508 /* clear out all the previous traces */ 414
509 for_each_tracing_cpu(i) { 415 tr->buffer = max_tr.buffer;
510 data = tr->data[i]; 416 max_tr.buffer = buf;
511 flip_trace(max_tr.data[i], data); 417
512 tracing_reset(data); 418 ftrace_disable_cpu();
513 } 419 ring_buffer_reset(tr->buffer);
420 ftrace_enable_cpu();
514 421
515 __update_max_tr(tr, tsk, cpu); 422 __update_max_tr(tr, tsk, cpu);
516 __raw_spin_unlock(&ftrace_max_lock); 423 __raw_spin_unlock(&ftrace_max_lock);
@@ -527,16 +434,19 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
527void 434void
528update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) 435update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
529{ 436{
530 struct trace_array_cpu *data = tr->data[cpu]; 437 int ret;
531 int i;
532 438
533 WARN_ON_ONCE(!irqs_disabled()); 439 WARN_ON_ONCE(!irqs_disabled());
534 __raw_spin_lock(&ftrace_max_lock); 440 __raw_spin_lock(&ftrace_max_lock);
535 for_each_tracing_cpu(i)
536 tracing_reset(max_tr.data[i]);
537 441
538 flip_trace(max_tr.data[cpu], data); 442 ftrace_disable_cpu();
539 tracing_reset(data); 443
444 ring_buffer_reset(max_tr.buffer);
445 ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
446
447 ftrace_enable_cpu();
448
449 WARN_ON_ONCE(ret);
540 450
541 __update_max_tr(tr, tsk, cpu); 451 __update_max_tr(tr, tsk, cpu);
542 __raw_spin_unlock(&ftrace_max_lock); 452 __raw_spin_unlock(&ftrace_max_lock);
@@ -573,7 +483,6 @@ int register_tracer(struct tracer *type)
573#ifdef CONFIG_FTRACE_STARTUP_TEST 483#ifdef CONFIG_FTRACE_STARTUP_TEST
574 if (type->selftest) { 484 if (type->selftest) {
575 struct tracer *saved_tracer = current_trace; 485 struct tracer *saved_tracer = current_trace;
576 struct trace_array_cpu *data;
577 struct trace_array *tr = &global_trace; 486 struct trace_array *tr = &global_trace;
578 int saved_ctrl = tr->ctrl; 487 int saved_ctrl = tr->ctrl;
579 int i; 488 int i;
@@ -585,10 +494,7 @@ int register_tracer(struct tracer *type)
585 * If we fail, we do not register this tracer. 494 * If we fail, we do not register this tracer.
586 */ 495 */
587 for_each_tracing_cpu(i) { 496 for_each_tracing_cpu(i) {
588 data = tr->data[i]; 497 tracing_reset(tr, i);
589 if (!head_page(data))
590 continue;
591 tracing_reset(data);
592 } 498 }
593 current_trace = type; 499 current_trace = type;
594 tr->ctrl = 0; 500 tr->ctrl = 0;
@@ -604,10 +510,7 @@ int register_tracer(struct tracer *type)
604 } 510 }
605 /* Only reset on passing, to avoid touching corrupted buffers */ 511 /* Only reset on passing, to avoid touching corrupted buffers */
606 for_each_tracing_cpu(i) { 512 for_each_tracing_cpu(i) {
607 data = tr->data[i]; 513 tracing_reset(tr, i);
608 if (!head_page(data))
609 continue;
610 tracing_reset(data);
611 } 514 }
612 printk(KERN_CONT "PASSED\n"); 515 printk(KERN_CONT "PASSED\n");
613 } 516 }
@@ -653,13 +556,11 @@ void unregister_tracer(struct tracer *type)
653 mutex_unlock(&trace_types_lock); 556 mutex_unlock(&trace_types_lock);
654} 557}
655 558
656void tracing_reset(struct trace_array_cpu *data) 559void tracing_reset(struct trace_array *tr, int cpu)
657{ 560{
658 data->trace_idx = 0; 561 ftrace_disable_cpu();
659 data->overrun = 0; 562 ring_buffer_reset_cpu(tr->buffer, cpu);
660 data->trace_head = data->trace_tail = head_page(data); 563 ftrace_enable_cpu();
661 data->trace_head_idx = 0;
662 data->trace_tail_idx = 0;
663} 564}
664 565
665#define SAVED_CMDLINES 128 566#define SAVED_CMDLINES 128
@@ -745,82 +646,16 @@ void tracing_record_cmdline(struct task_struct *tsk)
745 trace_save_cmdline(tsk); 646 trace_save_cmdline(tsk);
746} 647}
747 648
748static inline struct list_head * 649void
749trace_next_list(struct trace_array_cpu *data, struct list_head *next) 650tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
750{ 651 int pc)
751 /*
752 * Roundrobin - but skip the head (which is not a real page):
753 */
754 next = next->next;
755 if (unlikely(next == &data->trace_pages))
756 next = next->next;
757 BUG_ON(next == &data->trace_pages);
758
759 return next;
760}
761
762static inline void *
763trace_next_page(struct trace_array_cpu *data, void *addr)
764{
765 struct list_head *next;
766 struct page *page;
767
768 page = virt_to_page(addr);
769
770 next = trace_next_list(data, &page->lru);
771 page = list_entry(next, struct page, lru);
772
773 return page_address(page);
774}
775
776static inline struct trace_entry *
777tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
778{
779 unsigned long idx, idx_next;
780 struct trace_entry *entry;
781
782 data->trace_idx++;
783 idx = data->trace_head_idx;
784 idx_next = idx + 1;
785
786 BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE);
787
788 entry = data->trace_head + idx * TRACE_ENTRY_SIZE;
789
790 if (unlikely(idx_next >= ENTRIES_PER_PAGE)) {
791 data->trace_head = trace_next_page(data, data->trace_head);
792 idx_next = 0;
793 }
794
795 if (data->trace_head == data->trace_tail &&
796 idx_next == data->trace_tail_idx) {
797 /* overrun */
798 data->overrun++;
799 data->trace_tail_idx++;
800 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
801 data->trace_tail =
802 trace_next_page(data, data->trace_tail);
803 data->trace_tail_idx = 0;
804 }
805 }
806
807 data->trace_head_idx = idx_next;
808
809 return entry;
810}
811
812static inline void
813tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
814{ 652{
815 struct task_struct *tsk = current; 653 struct task_struct *tsk = current;
816 unsigned long pc;
817
818 pc = preempt_count();
819 654
820 entry->preempt_count = pc & 0xff; 655 entry->preempt_count = pc & 0xff;
821 entry->pid = (tsk) ? tsk->pid : 0; 656 entry->pid = (tsk) ? tsk->pid : 0;
822 entry->t = ftrace_now(raw_smp_processor_id()); 657 entry->flags =
823 entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | 658 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
824 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | 659 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
825 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | 660 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
826 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); 661 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
@@ -828,145 +663,139 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
828 663
829void 664void
830trace_function(struct trace_array *tr, struct trace_array_cpu *data, 665trace_function(struct trace_array *tr, struct trace_array_cpu *data,
831 unsigned long ip, unsigned long parent_ip, unsigned long flags) 666 unsigned long ip, unsigned long parent_ip, unsigned long flags,
667 int pc)
832{ 668{
833 struct trace_entry *entry; 669 struct ring_buffer_event *event;
670 struct ftrace_entry *entry;
834 unsigned long irq_flags; 671 unsigned long irq_flags;
835 672
836 raw_local_irq_save(irq_flags); 673 /* If we are reading the ring buffer, don't trace */
837 __raw_spin_lock(&data->lock); 674 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
838 entry = tracing_get_trace_entry(tr, data); 675 return;
839 tracing_generic_entry_update(entry, flags); 676
840 entry->type = TRACE_FN; 677 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
841 entry->fn.ip = ip; 678 &irq_flags);
842 entry->fn.parent_ip = parent_ip; 679 if (!event)
843 __raw_spin_unlock(&data->lock); 680 return;
844 raw_local_irq_restore(irq_flags); 681 entry = ring_buffer_event_data(event);
682 tracing_generic_entry_update(&entry->ent, flags, pc);
683 entry->ent.type = TRACE_FN;
684 entry->ip = ip;
685 entry->parent_ip = parent_ip;
686 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
845} 687}
846 688
847void 689void
848ftrace(struct trace_array *tr, struct trace_array_cpu *data, 690ftrace(struct trace_array *tr, struct trace_array_cpu *data,
849 unsigned long ip, unsigned long parent_ip, unsigned long flags) 691 unsigned long ip, unsigned long parent_ip, unsigned long flags,
692 int pc)
850{ 693{
851 if (likely(!atomic_read(&data->disabled))) 694 if (likely(!atomic_read(&data->disabled)))
852 trace_function(tr, data, ip, parent_ip, flags); 695 trace_function(tr, data, ip, parent_ip, flags, pc);
853} 696}
854 697
855#ifdef CONFIG_MMIOTRACE 698static void ftrace_trace_stack(struct trace_array *tr,
856void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data, 699 struct trace_array_cpu *data,
857 struct mmiotrace_rw *rw) 700 unsigned long flags,
701 int skip, int pc)
858{ 702{
859 struct trace_entry *entry; 703 struct ring_buffer_event *event;
704 struct stack_entry *entry;
705 struct stack_trace trace;
860 unsigned long irq_flags; 706 unsigned long irq_flags;
861 707
862 raw_local_irq_save(irq_flags); 708 if (!(trace_flags & TRACE_ITER_STACKTRACE))
863 __raw_spin_lock(&data->lock); 709 return;
864
865 entry = tracing_get_trace_entry(tr, data);
866 tracing_generic_entry_update(entry, 0);
867 entry->type = TRACE_MMIO_RW;
868 entry->mmiorw = *rw;
869
870 __raw_spin_unlock(&data->lock);
871 raw_local_irq_restore(irq_flags);
872
873 trace_wake_up();
874}
875
876void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data,
877 struct mmiotrace_map *map)
878{
879 struct trace_entry *entry;
880 unsigned long irq_flags;
881 710
882 raw_local_irq_save(irq_flags); 711 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
883 __raw_spin_lock(&data->lock); 712 &irq_flags);
713 if (!event)
714 return;
715 entry = ring_buffer_event_data(event);
716 tracing_generic_entry_update(&entry->ent, flags, pc);
717 entry->ent.type = TRACE_STACK;
884 718
885 entry = tracing_get_trace_entry(tr, data); 719 memset(&entry->caller, 0, sizeof(entry->caller));
886 tracing_generic_entry_update(entry, 0);
887 entry->type = TRACE_MMIO_MAP;
888 entry->mmiomap = *map;
889 720
890 __raw_spin_unlock(&data->lock); 721 trace.nr_entries = 0;
891 raw_local_irq_restore(irq_flags); 722 trace.max_entries = FTRACE_STACK_ENTRIES;
723 trace.skip = skip;
724 trace.entries = entry->caller;
892 725
893 trace_wake_up(); 726 save_stack_trace(&trace);
727 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
894} 728}
895#endif
896 729
897void __trace_stack(struct trace_array *tr, 730void __trace_stack(struct trace_array *tr,
898 struct trace_array_cpu *data, 731 struct trace_array_cpu *data,
899 unsigned long flags, 732 unsigned long flags,
900 int skip) 733 int skip)
901{ 734{
902 struct trace_entry *entry; 735 ftrace_trace_stack(tr, data, flags, skip, preempt_count());
903 struct stack_trace trace;
904
905 if (!(trace_flags & TRACE_ITER_STACKTRACE))
906 return;
907
908 entry = tracing_get_trace_entry(tr, data);
909 tracing_generic_entry_update(entry, flags);
910 entry->type = TRACE_STACK;
911
912 memset(&entry->stack, 0, sizeof(entry->stack));
913
914 trace.nr_entries = 0;
915 trace.max_entries = FTRACE_STACK_ENTRIES;
916 trace.skip = skip;
917 trace.entries = entry->stack.caller;
918
919 save_stack_trace(&trace);
920} 736}
921 737
922void 738static void
923__trace_special(void *__tr, void *__data, 739ftrace_trace_special(void *__tr, void *__data,
924 unsigned long arg1, unsigned long arg2, unsigned long arg3) 740 unsigned long arg1, unsigned long arg2, unsigned long arg3,
741 int pc)
925{ 742{
743 struct ring_buffer_event *event;
926 struct trace_array_cpu *data = __data; 744 struct trace_array_cpu *data = __data;
927 struct trace_array *tr = __tr; 745 struct trace_array *tr = __tr;
928 struct trace_entry *entry; 746 struct special_entry *entry;
929 unsigned long irq_flags; 747 unsigned long irq_flags;
930 748
931 raw_local_irq_save(irq_flags); 749 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
932 __raw_spin_lock(&data->lock); 750 &irq_flags);
933 entry = tracing_get_trace_entry(tr, data); 751 if (!event)
934 tracing_generic_entry_update(entry, 0); 752 return;
935 entry->type = TRACE_SPECIAL; 753 entry = ring_buffer_event_data(event);
936 entry->special.arg1 = arg1; 754 tracing_generic_entry_update(&entry->ent, 0, pc);
937 entry->special.arg2 = arg2; 755 entry->ent.type = TRACE_SPECIAL;
938 entry->special.arg3 = arg3; 756 entry->arg1 = arg1;
939 __trace_stack(tr, data, irq_flags, 4); 757 entry->arg2 = arg2;
940 __raw_spin_unlock(&data->lock); 758 entry->arg3 = arg3;
941 raw_local_irq_restore(irq_flags); 759 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
760 ftrace_trace_stack(tr, data, irq_flags, 4, pc);
942 761
943 trace_wake_up(); 762 trace_wake_up();
944} 763}
945 764
946void 765void
766__trace_special(void *__tr, void *__data,
767 unsigned long arg1, unsigned long arg2, unsigned long arg3)
768{
769 ftrace_trace_special(__tr, __data, arg1, arg2, arg3, preempt_count());
770}
771
772void
947tracing_sched_switch_trace(struct trace_array *tr, 773tracing_sched_switch_trace(struct trace_array *tr,
948 struct trace_array_cpu *data, 774 struct trace_array_cpu *data,
949 struct task_struct *prev, 775 struct task_struct *prev,
950 struct task_struct *next, 776 struct task_struct *next,
951 unsigned long flags) 777 unsigned long flags, int pc)
952{ 778{
953 struct trace_entry *entry; 779 struct ring_buffer_event *event;
780 struct ctx_switch_entry *entry;
954 unsigned long irq_flags; 781 unsigned long irq_flags;
955 782
956 raw_local_irq_save(irq_flags); 783 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
957 __raw_spin_lock(&data->lock); 784 &irq_flags);
958 entry = tracing_get_trace_entry(tr, data); 785 if (!event)
959 tracing_generic_entry_update(entry, flags); 786 return;
960 entry->type = TRACE_CTX; 787 entry = ring_buffer_event_data(event);
961 entry->ctx.prev_pid = prev->pid; 788 tracing_generic_entry_update(&entry->ent, flags, pc);
962 entry->ctx.prev_prio = prev->prio; 789 entry->ent.type = TRACE_CTX;
963 entry->ctx.prev_state = prev->state; 790 entry->prev_pid = prev->pid;
964 entry->ctx.next_pid = next->pid; 791 entry->prev_prio = prev->prio;
965 entry->ctx.next_prio = next->prio; 792 entry->prev_state = prev->state;
966 entry->ctx.next_state = next->state; 793 entry->next_pid = next->pid;
967 __trace_stack(tr, data, flags, 5); 794 entry->next_prio = next->prio;
968 __raw_spin_unlock(&data->lock); 795 entry->next_state = next->state;
969 raw_local_irq_restore(irq_flags); 796 entry->next_cpu = task_cpu(next);
797 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
798 ftrace_trace_stack(tr, data, flags, 5, pc);
970} 799}
971 800
972void 801void
@@ -974,25 +803,28 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
974 struct trace_array_cpu *data, 803 struct trace_array_cpu *data,
975 struct task_struct *wakee, 804 struct task_struct *wakee,
976 struct task_struct *curr, 805 struct task_struct *curr,
977 unsigned long flags) 806 unsigned long flags, int pc)
978{ 807{
979 struct trace_entry *entry; 808 struct ring_buffer_event *event;
809 struct ctx_switch_entry *entry;
980 unsigned long irq_flags; 810 unsigned long irq_flags;
981 811
982 raw_local_irq_save(irq_flags); 812 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
983 __raw_spin_lock(&data->lock); 813 &irq_flags);
984 entry = tracing_get_trace_entry(tr, data); 814 if (!event)
985 tracing_generic_entry_update(entry, flags); 815 return;
986 entry->type = TRACE_WAKE; 816 entry = ring_buffer_event_data(event);
987 entry->ctx.prev_pid = curr->pid; 817 tracing_generic_entry_update(&entry->ent, flags, pc);
988 entry->ctx.prev_prio = curr->prio; 818 entry->ent.type = TRACE_WAKE;
989 entry->ctx.prev_state = curr->state; 819 entry->prev_pid = curr->pid;
990 entry->ctx.next_pid = wakee->pid; 820 entry->prev_prio = curr->prio;
991 entry->ctx.next_prio = wakee->prio; 821 entry->prev_state = curr->state;
992 entry->ctx.next_state = wakee->state; 822 entry->next_pid = wakee->pid;
993 __trace_stack(tr, data, flags, 6); 823 entry->next_prio = wakee->prio;
994 __raw_spin_unlock(&data->lock); 824 entry->next_state = wakee->state;
995 raw_local_irq_restore(irq_flags); 825 entry->next_cpu = task_cpu(wakee);
826 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
827 ftrace_trace_stack(tr, data, flags, 6, pc);
996 828
997 trace_wake_up(); 829 trace_wake_up();
998} 830}
@@ -1002,23 +834,21 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1002{ 834{
1003 struct trace_array *tr = &global_trace; 835 struct trace_array *tr = &global_trace;
1004 struct trace_array_cpu *data; 836 struct trace_array_cpu *data;
1005 unsigned long flags;
1006 long disabled;
1007 int cpu; 837 int cpu;
838 int pc;
1008 839
1009 if (tracing_disabled || current_trace == &no_tracer || !tr->ctrl) 840 if (tracing_disabled || !tr->ctrl)
1010 return; 841 return;
1011 842
1012 local_irq_save(flags); 843 pc = preempt_count();
844 preempt_disable_notrace();
1013 cpu = raw_smp_processor_id(); 845 cpu = raw_smp_processor_id();
1014 data = tr->data[cpu]; 846 data = tr->data[cpu];
1015 disabled = atomic_inc_return(&data->disabled);
1016 847
1017 if (likely(disabled == 1)) 848 if (likely(!atomic_read(&data->disabled)))
1018 __trace_special(tr, data, arg1, arg2, arg3); 849 ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
1019 850
1020 atomic_dec(&data->disabled); 851 preempt_enable_notrace();
1021 local_irq_restore(flags);
1022} 852}
1023 853
1024#ifdef CONFIG_FTRACE 854#ifdef CONFIG_FTRACE
@@ -1029,7 +859,8 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
1029 struct trace_array_cpu *data; 859 struct trace_array_cpu *data;
1030 unsigned long flags; 860 unsigned long flags;
1031 long disabled; 861 long disabled;
1032 int cpu; 862 int cpu, resched;
863 int pc;
1033 864
1034 if (unlikely(!ftrace_function_enabled)) 865 if (unlikely(!ftrace_function_enabled))
1035 return; 866 return;
@@ -1037,16 +868,22 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
1037 if (skip_trace(ip)) 868 if (skip_trace(ip))
1038 return; 869 return;
1039 870
1040 local_irq_save(flags); 871 pc = preempt_count();
872 resched = need_resched();
873 preempt_disable_notrace();
874 local_save_flags(flags);
1041 cpu = raw_smp_processor_id(); 875 cpu = raw_smp_processor_id();
1042 data = tr->data[cpu]; 876 data = tr->data[cpu];
1043 disabled = atomic_inc_return(&data->disabled); 877 disabled = atomic_inc_return(&data->disabled);
1044 878
1045 if (likely(disabled == 1)) 879 if (likely(disabled == 1))
1046 trace_function(tr, data, ip, parent_ip, flags); 880 trace_function(tr, data, ip, parent_ip, flags, pc);
1047 881
1048 atomic_dec(&data->disabled); 882 atomic_dec(&data->disabled);
1049 local_irq_restore(flags); 883 if (resched)
884 preempt_enable_no_resched_notrace();
885 else
886 preempt_enable_notrace();
1050} 887}
1051 888
1052static struct ftrace_ops trace_ops __read_mostly = 889static struct ftrace_ops trace_ops __read_mostly =
@@ -1073,111 +910,96 @@ enum trace_file_type {
1073 TRACE_FILE_LAT_FMT = 1, 910 TRACE_FILE_LAT_FMT = 1,
1074}; 911};
1075 912
1076static struct trace_entry * 913static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
1077trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
1078 struct trace_iterator *iter, int cpu)
1079{ 914{
1080 struct page *page; 915 /* Don't allow ftrace to trace into the ring buffers */
1081 struct trace_entry *array; 916 ftrace_disable_cpu();
1082 917
1083 if (iter->next_idx[cpu] >= tr->entries || 918 iter->idx++;
1084 iter->next_idx[cpu] >= data->trace_idx || 919 if (iter->buffer_iter[iter->cpu])
1085 (data->trace_head == data->trace_tail && 920 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1086 data->trace_head_idx == data->trace_tail_idx))
1087 return NULL;
1088 921
1089 if (!iter->next_page[cpu]) { 922 ftrace_enable_cpu();
1090 /* Initialize the iterator for this cpu trace buffer */ 923}
1091 WARN_ON(!data->trace_tail); 924
1092 page = virt_to_page(data->trace_tail); 925static struct trace_entry *
1093 iter->next_page[cpu] = &page->lru; 926peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
1094 iter->next_page_idx[cpu] = data->trace_tail_idx; 927{
1095 } 928 struct ring_buffer_event *event;
929 struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
1096 930
1097 page = list_entry(iter->next_page[cpu], struct page, lru); 931 /* Don't allow ftrace to trace into the ring buffers */
1098 BUG_ON(&data->trace_pages == &page->lru); 932 ftrace_disable_cpu();
933
934 if (buf_iter)
935 event = ring_buffer_iter_peek(buf_iter, ts);
936 else
937 event = ring_buffer_peek(iter->tr->buffer, cpu, ts);
1099 938
1100 array = page_address(page); 939 ftrace_enable_cpu();
1101 940
1102 WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE); 941 return event ? ring_buffer_event_data(event) : NULL;
1103 return &array[iter->next_page_idx[cpu]];
1104} 942}
1105 943
1106static struct trace_entry * 944static struct trace_entry *
1107find_next_entry(struct trace_iterator *iter, int *ent_cpu) 945__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1108{ 946{
1109 struct trace_array *tr = iter->tr; 947 struct ring_buffer *buffer = iter->tr->buffer;
1110 struct trace_entry *ent, *next = NULL; 948 struct trace_entry *ent, *next = NULL;
949 u64 next_ts = 0, ts;
1111 int next_cpu = -1; 950 int next_cpu = -1;
1112 int cpu; 951 int cpu;
1113 952
1114 for_each_tracing_cpu(cpu) { 953 for_each_tracing_cpu(cpu) {
1115 if (!head_page(tr->data[cpu])) 954
955 if (ring_buffer_empty_cpu(buffer, cpu))
1116 continue; 956 continue;
1117 ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu); 957
958 ent = peek_next_entry(iter, cpu, &ts);
959
1118 /* 960 /*
1119 * Pick the entry with the smallest timestamp: 961 * Pick the entry with the smallest timestamp:
1120 */ 962 */
1121 if (ent && (!next || ent->t < next->t)) { 963 if (ent && (!next || ts < next_ts)) {
1122 next = ent; 964 next = ent;
1123 next_cpu = cpu; 965 next_cpu = cpu;
966 next_ts = ts;
1124 } 967 }
1125 } 968 }
1126 969
1127 if (ent_cpu) 970 if (ent_cpu)
1128 *ent_cpu = next_cpu; 971 *ent_cpu = next_cpu;
1129 972
973 if (ent_ts)
974 *ent_ts = next_ts;
975
1130 return next; 976 return next;
1131} 977}
1132 978
1133static void trace_iterator_increment(struct trace_iterator *iter) 979/* Find the next real entry, without updating the iterator itself */
980static struct trace_entry *
981find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1134{ 982{
1135 iter->idx++; 983 return __find_next_entry(iter, ent_cpu, ent_ts);
1136 iter->next_idx[iter->cpu]++;
1137 iter->next_page_idx[iter->cpu]++;
1138
1139 if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) {
1140 struct trace_array_cpu *data = iter->tr->data[iter->cpu];
1141
1142 iter->next_page_idx[iter->cpu] = 0;
1143 iter->next_page[iter->cpu] =
1144 trace_next_list(data, iter->next_page[iter->cpu]);
1145 }
1146} 984}
1147 985
1148static void trace_consume(struct trace_iterator *iter) 986/* Find the next real entry, and increment the iterator to the next entry */
987static void *find_next_entry_inc(struct trace_iterator *iter)
1149{ 988{
1150 struct trace_array_cpu *data = iter->tr->data[iter->cpu]; 989 iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
1151 990
1152 data->trace_tail_idx++; 991 if (iter->ent)
1153 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) { 992 trace_iterator_increment(iter, iter->cpu);
1154 data->trace_tail = trace_next_page(data, data->trace_tail);
1155 data->trace_tail_idx = 0;
1156 }
1157 993
1158 /* Check if we empty it, then reset the index */ 994 return iter->ent ? iter : NULL;
1159 if (data->trace_head == data->trace_tail &&
1160 data->trace_head_idx == data->trace_tail_idx)
1161 data->trace_idx = 0;
1162} 995}
1163 996
1164static void *find_next_entry_inc(struct trace_iterator *iter) 997static void trace_consume(struct trace_iterator *iter)
1165{ 998{
1166 struct trace_entry *next; 999 /* Don't allow ftrace to trace into the ring buffers */
1167 int next_cpu = -1; 1000 ftrace_disable_cpu();
1168 1001 ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts);
1169 next = find_next_entry(iter, &next_cpu); 1002 ftrace_enable_cpu();
1170
1171 iter->prev_ent = iter->ent;
1172 iter->prev_cpu = iter->cpu;
1173
1174 iter->ent = next;
1175 iter->cpu = next_cpu;
1176
1177 if (next)
1178 trace_iterator_increment(iter);
1179
1180 return next ? iter : NULL;
1181} 1003}
1182 1004
1183static void *s_next(struct seq_file *m, void *v, loff_t *pos) 1005static void *s_next(struct seq_file *m, void *v, loff_t *pos)
@@ -1210,7 +1032,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1210 struct trace_iterator *iter = m->private; 1032 struct trace_iterator *iter = m->private;
1211 void *p = NULL; 1033 void *p = NULL;
1212 loff_t l = 0; 1034 loff_t l = 0;
1213 int i; 1035 int cpu;
1214 1036
1215 mutex_lock(&trace_types_lock); 1037 mutex_lock(&trace_types_lock);
1216 1038
@@ -1229,14 +1051,15 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1229 iter->ent = NULL; 1051 iter->ent = NULL;
1230 iter->cpu = 0; 1052 iter->cpu = 0;
1231 iter->idx = -1; 1053 iter->idx = -1;
1232 iter->prev_ent = NULL;
1233 iter->prev_cpu = -1;
1234 1054
1235 for_each_tracing_cpu(i) { 1055 ftrace_disable_cpu();
1236 iter->next_idx[i] = 0; 1056
1237 iter->next_page[i] = NULL; 1057 for_each_tracing_cpu(cpu) {
1058 ring_buffer_iter_reset(iter->buffer_iter[cpu]);
1238 } 1059 }
1239 1060
1061 ftrace_enable_cpu();
1062
1240 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 1063 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1241 ; 1064 ;
1242 1065
@@ -1330,21 +1153,21 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1330 1153
1331static void print_lat_help_header(struct seq_file *m) 1154static void print_lat_help_header(struct seq_file *m)
1332{ 1155{
1333 seq_puts(m, "# _------=> CPU# \n"); 1156 seq_puts(m, "# _------=> CPU# \n");
1334 seq_puts(m, "# / _-----=> irqs-off \n"); 1157 seq_puts(m, "# / _-----=> irqs-off \n");
1335 seq_puts(m, "# | / _----=> need-resched \n"); 1158 seq_puts(m, "# | / _----=> need-resched \n");
1336 seq_puts(m, "# || / _---=> hardirq/softirq \n"); 1159 seq_puts(m, "# || / _---=> hardirq/softirq \n");
1337 seq_puts(m, "# ||| / _--=> preempt-depth \n"); 1160 seq_puts(m, "# ||| / _--=> preempt-depth \n");
1338 seq_puts(m, "# |||| / \n"); 1161 seq_puts(m, "# |||| / \n");
1339 seq_puts(m, "# ||||| delay \n"); 1162 seq_puts(m, "# ||||| delay \n");
1340 seq_puts(m, "# cmd pid ||||| time | caller \n"); 1163 seq_puts(m, "# cmd pid ||||| time | caller \n");
1341 seq_puts(m, "# \\ / ||||| \\ | / \n"); 1164 seq_puts(m, "# \\ / ||||| \\ | / \n");
1342} 1165}
1343 1166
1344static void print_func_help_header(struct seq_file *m) 1167static void print_func_help_header(struct seq_file *m)
1345{ 1168{
1346 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"); 1169 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n");
1347 seq_puts(m, "# | | | | |\n"); 1170 seq_puts(m, "# | | | | |\n");
1348} 1171}
1349 1172
1350 1173
@@ -1355,23 +1178,16 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1355 struct trace_array *tr = iter->tr; 1178 struct trace_array *tr = iter->tr;
1356 struct trace_array_cpu *data = tr->data[tr->cpu]; 1179 struct trace_array_cpu *data = tr->data[tr->cpu];
1357 struct tracer *type = current_trace; 1180 struct tracer *type = current_trace;
1358 unsigned long total = 0; 1181 unsigned long total;
1359 unsigned long entries = 0; 1182 unsigned long entries;
1360 int cpu;
1361 const char *name = "preemption"; 1183 const char *name = "preemption";
1362 1184
1363 if (type) 1185 if (type)
1364 name = type->name; 1186 name = type->name;
1365 1187
1366 for_each_tracing_cpu(cpu) { 1188 entries = ring_buffer_entries(iter->tr->buffer);
1367 if (head_page(tr->data[cpu])) { 1189 total = entries +
1368 total += tr->data[cpu]->trace_idx; 1190 ring_buffer_overruns(iter->tr->buffer);
1369 if (tr->data[cpu]->trace_idx > tr->entries)
1370 entries += tr->entries;
1371 else
1372 entries += tr->data[cpu]->trace_idx;
1373 }
1374 }
1375 1191
1376 seq_printf(m, "%s latency trace v1.1.5 on %s\n", 1192 seq_printf(m, "%s latency trace v1.1.5 on %s\n",
1377 name, UTS_RELEASE); 1193 name, UTS_RELEASE);
@@ -1428,7 +1244,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1428 comm = trace_find_cmdline(entry->pid); 1244 comm = trace_find_cmdline(entry->pid);
1429 1245
1430 trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid); 1246 trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
1431 trace_seq_printf(s, "%d", cpu); 1247 trace_seq_printf(s, "%3d", cpu);
1432 trace_seq_printf(s, "%c%c", 1248 trace_seq_printf(s, "%c%c",
1433 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.', 1249 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
1434 ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.')); 1250 ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
@@ -1457,7 +1273,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1457unsigned long preempt_mark_thresh = 100; 1273unsigned long preempt_mark_thresh = 100;
1458 1274
1459static void 1275static void
1460lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs, 1276lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
1461 unsigned long rel_usecs) 1277 unsigned long rel_usecs)
1462{ 1278{
1463 trace_seq_printf(s, " %4lldus", abs_usecs); 1279 trace_seq_printf(s, " %4lldus", abs_usecs);
@@ -1471,34 +1287,76 @@ lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
1471 1287
1472static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; 1288static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
1473 1289
1474static int 1290/*
1291 * The message is supposed to contain an ending newline.
1292 * If the printing stops prematurely, try to add a newline of our own.
1293 */
1294void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
1295{
1296 struct trace_entry *ent;
1297 struct trace_field_cont *cont;
1298 bool ok = true;
1299
1300 ent = peek_next_entry(iter, iter->cpu, NULL);
1301 if (!ent || ent->type != TRACE_CONT) {
1302 trace_seq_putc(s, '\n');
1303 return;
1304 }
1305
1306 do {
1307 cont = (struct trace_field_cont *)ent;
1308 if (ok)
1309 ok = (trace_seq_printf(s, "%s", cont->buf) > 0);
1310
1311 ftrace_disable_cpu();
1312
1313 if (iter->buffer_iter[iter->cpu])
1314 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1315 else
1316 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
1317
1318 ftrace_enable_cpu();
1319
1320 ent = peek_next_entry(iter, iter->cpu, NULL);
1321 } while (ent && ent->type == TRACE_CONT);
1322
1323 if (!ok)
1324 trace_seq_putc(s, '\n');
1325}
1326
1327static enum print_line_t
1475print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) 1328print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1476{ 1329{
1477 struct trace_seq *s = &iter->seq; 1330 struct trace_seq *s = &iter->seq;
1478 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); 1331 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1479 struct trace_entry *next_entry = find_next_entry(iter, NULL); 1332 struct trace_entry *next_entry;
1480 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE); 1333 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
1481 struct trace_entry *entry = iter->ent; 1334 struct trace_entry *entry = iter->ent;
1482 unsigned long abs_usecs; 1335 unsigned long abs_usecs;
1483 unsigned long rel_usecs; 1336 unsigned long rel_usecs;
1337 u64 next_ts;
1484 char *comm; 1338 char *comm;
1485 int S, T; 1339 int S, T;
1486 int i; 1340 int i;
1487 unsigned state; 1341 unsigned state;
1488 1342
1343 if (entry->type == TRACE_CONT)
1344 return TRACE_TYPE_HANDLED;
1345
1346 next_entry = find_next_entry(iter, NULL, &next_ts);
1489 if (!next_entry) 1347 if (!next_entry)
1490 next_entry = entry; 1348 next_ts = iter->ts;
1491 rel_usecs = ns2usecs(next_entry->t - entry->t); 1349 rel_usecs = ns2usecs(next_ts - iter->ts);
1492 abs_usecs = ns2usecs(entry->t - iter->tr->time_start); 1350 abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
1493 1351
1494 if (verbose) { 1352 if (verbose) {
1495 comm = trace_find_cmdline(entry->pid); 1353 comm = trace_find_cmdline(entry->pid);
1496 trace_seq_printf(s, "%16s %5d %d %d %08x %08x [%08lx]" 1354 trace_seq_printf(s, "%16s %5d %3d %d %08x %08x [%08lx]"
1497 " %ld.%03ldms (+%ld.%03ldms): ", 1355 " %ld.%03ldms (+%ld.%03ldms): ",
1498 comm, 1356 comm,
1499 entry->pid, cpu, entry->flags, 1357 entry->pid, cpu, entry->flags,
1500 entry->preempt_count, trace_idx, 1358 entry->preempt_count, trace_idx,
1501 ns2usecs(entry->t), 1359 ns2usecs(iter->ts),
1502 abs_usecs/1000, 1360 abs_usecs/1000,
1503 abs_usecs % 1000, rel_usecs/1000, 1361 abs_usecs % 1000, rel_usecs/1000,
1504 rel_usecs % 1000); 1362 rel_usecs % 1000);
@@ -1507,52 +1365,85 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1507 lat_print_timestamp(s, abs_usecs, rel_usecs); 1365 lat_print_timestamp(s, abs_usecs, rel_usecs);
1508 } 1366 }
1509 switch (entry->type) { 1367 switch (entry->type) {
1510 case TRACE_FN: 1368 case TRACE_FN: {
1511 seq_print_ip_sym(s, entry->fn.ip, sym_flags); 1369 struct ftrace_entry *field;
1370
1371 trace_assign_type(field, entry);
1372
1373 seq_print_ip_sym(s, field->ip, sym_flags);
1512 trace_seq_puts(s, " ("); 1374 trace_seq_puts(s, " (");
1513 if (kretprobed(entry->fn.parent_ip)) 1375 if (kretprobed(field->parent_ip))
1514 trace_seq_puts(s, KRETPROBE_MSG); 1376 trace_seq_puts(s, KRETPROBE_MSG);
1515 else 1377 else
1516 seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags); 1378 seq_print_ip_sym(s, field->parent_ip, sym_flags);
1517 trace_seq_puts(s, ")\n"); 1379 trace_seq_puts(s, ")\n");
1518 break; 1380 break;
1381 }
1519 case TRACE_CTX: 1382 case TRACE_CTX:
1520 case TRACE_WAKE: 1383 case TRACE_WAKE: {
1521 T = entry->ctx.next_state < sizeof(state_to_char) ? 1384 struct ctx_switch_entry *field;
1522 state_to_char[entry->ctx.next_state] : 'X'; 1385
1386 trace_assign_type(field, entry);
1387
1388 T = field->next_state < sizeof(state_to_char) ?
1389 state_to_char[field->next_state] : 'X';
1523 1390
1524 state = entry->ctx.prev_state ? __ffs(entry->ctx.prev_state) + 1 : 0; 1391 state = field->prev_state ?
1392 __ffs(field->prev_state) + 1 : 0;
1525 S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X'; 1393 S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
1526 comm = trace_find_cmdline(entry->ctx.next_pid); 1394 comm = trace_find_cmdline(field->next_pid);
1527 trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c %s\n", 1395 trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
1528 entry->ctx.prev_pid, 1396 field->prev_pid,
1529 entry->ctx.prev_prio, 1397 field->prev_prio,
1530 S, entry->type == TRACE_CTX ? "==>" : " +", 1398 S, entry->type == TRACE_CTX ? "==>" : " +",
1531 entry->ctx.next_pid, 1399 field->next_cpu,
1532 entry->ctx.next_prio, 1400 field->next_pid,
1401 field->next_prio,
1533 T, comm); 1402 T, comm);
1534 break; 1403 break;
1535 case TRACE_SPECIAL: 1404 }
1405 case TRACE_SPECIAL: {
1406 struct special_entry *field;
1407
1408 trace_assign_type(field, entry);
1409
1536 trace_seq_printf(s, "# %ld %ld %ld\n", 1410 trace_seq_printf(s, "# %ld %ld %ld\n",
1537 entry->special.arg1, 1411 field->arg1,
1538 entry->special.arg2, 1412 field->arg2,
1539 entry->special.arg3); 1413 field->arg3);
1540 break; 1414 break;
1541 case TRACE_STACK: 1415 }
1416 case TRACE_STACK: {
1417 struct stack_entry *field;
1418
1419 trace_assign_type(field, entry);
1420
1542 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { 1421 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1543 if (i) 1422 if (i)
1544 trace_seq_puts(s, " <= "); 1423 trace_seq_puts(s, " <= ");
1545 seq_print_ip_sym(s, entry->stack.caller[i], sym_flags); 1424 seq_print_ip_sym(s, field->caller[i], sym_flags);
1546 } 1425 }
1547 trace_seq_puts(s, "\n"); 1426 trace_seq_puts(s, "\n");
1548 break; 1427 break;
1428 }
1429 case TRACE_PRINT: {
1430 struct print_entry *field;
1431
1432 trace_assign_type(field, entry);
1433
1434 seq_print_ip_sym(s, field->ip, sym_flags);
1435 trace_seq_printf(s, ": %s", field->buf);
1436 if (entry->flags & TRACE_FLAG_CONT)
1437 trace_seq_print_cont(s, iter);
1438 break;
1439 }
1549 default: 1440 default:
1550 trace_seq_printf(s, "Unknown type %d\n", entry->type); 1441 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1551 } 1442 }
1552 return 1; 1443 return TRACE_TYPE_HANDLED;
1553} 1444}
1554 1445
1555static int print_trace_fmt(struct trace_iterator *iter) 1446static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1556{ 1447{
1557 struct trace_seq *s = &iter->seq; 1448 struct trace_seq *s = &iter->seq;
1558 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); 1449 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
@@ -1567,90 +1458,126 @@ static int print_trace_fmt(struct trace_iterator *iter)
1567 1458
1568 entry = iter->ent; 1459 entry = iter->ent;
1569 1460
1461 if (entry->type == TRACE_CONT)
1462 return TRACE_TYPE_HANDLED;
1463
1570 comm = trace_find_cmdline(iter->ent->pid); 1464 comm = trace_find_cmdline(iter->ent->pid);
1571 1465
1572 t = ns2usecs(entry->t); 1466 t = ns2usecs(iter->ts);
1573 usec_rem = do_div(t, 1000000ULL); 1467 usec_rem = do_div(t, 1000000ULL);
1574 secs = (unsigned long)t; 1468 secs = (unsigned long)t;
1575 1469
1576 ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); 1470 ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
1577 if (!ret) 1471 if (!ret)
1578 return 0; 1472 return TRACE_TYPE_PARTIAL_LINE;
1579 ret = trace_seq_printf(s, "[%02d] ", iter->cpu); 1473 ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
1580 if (!ret) 1474 if (!ret)
1581 return 0; 1475 return TRACE_TYPE_PARTIAL_LINE;
1582 ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem); 1476 ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
1583 if (!ret) 1477 if (!ret)
1584 return 0; 1478 return TRACE_TYPE_PARTIAL_LINE;
1585 1479
1586 switch (entry->type) { 1480 switch (entry->type) {
1587 case TRACE_FN: 1481 case TRACE_FN: {
1588 ret = seq_print_ip_sym(s, entry->fn.ip, sym_flags); 1482 struct ftrace_entry *field;
1483
1484 trace_assign_type(field, entry);
1485
1486 ret = seq_print_ip_sym(s, field->ip, sym_flags);
1589 if (!ret) 1487 if (!ret)
1590 return 0; 1488 return TRACE_TYPE_PARTIAL_LINE;
1591 if ((sym_flags & TRACE_ITER_PRINT_PARENT) && 1489 if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
1592 entry->fn.parent_ip) { 1490 field->parent_ip) {
1593 ret = trace_seq_printf(s, " <-"); 1491 ret = trace_seq_printf(s, " <-");
1594 if (!ret) 1492 if (!ret)
1595 return 0; 1493 return TRACE_TYPE_PARTIAL_LINE;
1596 if (kretprobed(entry->fn.parent_ip)) 1494 if (kretprobed(field->parent_ip))
1597 ret = trace_seq_puts(s, KRETPROBE_MSG); 1495 ret = trace_seq_puts(s, KRETPROBE_MSG);
1598 else 1496 else
1599 ret = seq_print_ip_sym(s, entry->fn.parent_ip, 1497 ret = seq_print_ip_sym(s,
1498 field->parent_ip,
1600 sym_flags); 1499 sym_flags);
1601 if (!ret) 1500 if (!ret)
1602 return 0; 1501 return TRACE_TYPE_PARTIAL_LINE;
1603 } 1502 }
1604 ret = trace_seq_printf(s, "\n"); 1503 ret = trace_seq_printf(s, "\n");
1605 if (!ret) 1504 if (!ret)
1606 return 0; 1505 return TRACE_TYPE_PARTIAL_LINE;
1607 break; 1506 break;
1507 }
1608 case TRACE_CTX: 1508 case TRACE_CTX:
1609 case TRACE_WAKE: 1509 case TRACE_WAKE: {
1610 S = entry->ctx.prev_state < sizeof(state_to_char) ? 1510 struct ctx_switch_entry *field;
1611 state_to_char[entry->ctx.prev_state] : 'X'; 1511
1612 T = entry->ctx.next_state < sizeof(state_to_char) ? 1512 trace_assign_type(field, entry);
1613 state_to_char[entry->ctx.next_state] : 'X'; 1513
1614 ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c\n", 1514 S = field->prev_state < sizeof(state_to_char) ?
1615 entry->ctx.prev_pid, 1515 state_to_char[field->prev_state] : 'X';
1616 entry->ctx.prev_prio, 1516 T = field->next_state < sizeof(state_to_char) ?
1517 state_to_char[field->next_state] : 'X';
1518 ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
1519 field->prev_pid,
1520 field->prev_prio,
1617 S, 1521 S,
1618 entry->type == TRACE_CTX ? "==>" : " +", 1522 entry->type == TRACE_CTX ? "==>" : " +",
1619 entry->ctx.next_pid, 1523 field->next_cpu,
1620 entry->ctx.next_prio, 1524 field->next_pid,
1525 field->next_prio,
1621 T); 1526 T);
1622 if (!ret) 1527 if (!ret)
1623 return 0; 1528 return TRACE_TYPE_PARTIAL_LINE;
1624 break; 1529 break;
1625 case TRACE_SPECIAL: 1530 }
1531 case TRACE_SPECIAL: {
1532 struct special_entry *field;
1533
1534 trace_assign_type(field, entry);
1535
1626 ret = trace_seq_printf(s, "# %ld %ld %ld\n", 1536 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1627 entry->special.arg1, 1537 field->arg1,
1628 entry->special.arg2, 1538 field->arg2,
1629 entry->special.arg3); 1539 field->arg3);
1630 if (!ret) 1540 if (!ret)
1631 return 0; 1541 return TRACE_TYPE_PARTIAL_LINE;
1632 break; 1542 break;
1633 case TRACE_STACK: 1543 }
1544 case TRACE_STACK: {
1545 struct stack_entry *field;
1546
1547 trace_assign_type(field, entry);
1548
1634 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { 1549 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1635 if (i) { 1550 if (i) {
1636 ret = trace_seq_puts(s, " <= "); 1551 ret = trace_seq_puts(s, " <= ");
1637 if (!ret) 1552 if (!ret)
1638 return 0; 1553 return TRACE_TYPE_PARTIAL_LINE;
1639 } 1554 }
1640 ret = seq_print_ip_sym(s, entry->stack.caller[i], 1555 ret = seq_print_ip_sym(s, field->caller[i],
1641 sym_flags); 1556 sym_flags);
1642 if (!ret) 1557 if (!ret)
1643 return 0; 1558 return TRACE_TYPE_PARTIAL_LINE;
1644 } 1559 }
1645 ret = trace_seq_puts(s, "\n"); 1560 ret = trace_seq_puts(s, "\n");
1646 if (!ret) 1561 if (!ret)
1647 return 0; 1562 return TRACE_TYPE_PARTIAL_LINE;
1648 break; 1563 break;
1649 } 1564 }
1650 return 1; 1565 case TRACE_PRINT: {
1566 struct print_entry *field;
1567
1568 trace_assign_type(field, entry);
1569
1570 seq_print_ip_sym(s, field->ip, sym_flags);
1571 trace_seq_printf(s, ": %s", field->buf);
1572 if (entry->flags & TRACE_FLAG_CONT)
1573 trace_seq_print_cont(s, iter);
1574 break;
1575 }
1576 }
1577 return TRACE_TYPE_HANDLED;
1651} 1578}
1652 1579
1653static int print_raw_fmt(struct trace_iterator *iter) 1580static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
1654{ 1581{
1655 struct trace_seq *s = &iter->seq; 1582 struct trace_seq *s = &iter->seq;
1656 struct trace_entry *entry; 1583 struct trace_entry *entry;
@@ -1659,47 +1586,77 @@ static int print_raw_fmt(struct trace_iterator *iter)
1659 1586
1660 entry = iter->ent; 1587 entry = iter->ent;
1661 1588
1589 if (entry->type == TRACE_CONT)
1590 return TRACE_TYPE_HANDLED;
1591
1662 ret = trace_seq_printf(s, "%d %d %llu ", 1592 ret = trace_seq_printf(s, "%d %d %llu ",
1663 entry->pid, iter->cpu, entry->t); 1593 entry->pid, iter->cpu, iter->ts);
1664 if (!ret) 1594 if (!ret)
1665 return 0; 1595 return TRACE_TYPE_PARTIAL_LINE;
1666 1596
1667 switch (entry->type) { 1597 switch (entry->type) {
1668 case TRACE_FN: 1598 case TRACE_FN: {
1599 struct ftrace_entry *field;
1600
1601 trace_assign_type(field, entry);
1602
1669 ret = trace_seq_printf(s, "%x %x\n", 1603 ret = trace_seq_printf(s, "%x %x\n",
1670 entry->fn.ip, entry->fn.parent_ip); 1604 field->ip,
1605 field->parent_ip);
1671 if (!ret) 1606 if (!ret)
1672 return 0; 1607 return TRACE_TYPE_PARTIAL_LINE;
1673 break; 1608 break;
1609 }
1674 case TRACE_CTX: 1610 case TRACE_CTX:
1675 case TRACE_WAKE: 1611 case TRACE_WAKE: {
1676 S = entry->ctx.prev_state < sizeof(state_to_char) ? 1612 struct ctx_switch_entry *field;
1677 state_to_char[entry->ctx.prev_state] : 'X'; 1613
1678 T = entry->ctx.next_state < sizeof(state_to_char) ? 1614 trace_assign_type(field, entry);
1679 state_to_char[entry->ctx.next_state] : 'X'; 1615
1616 S = field->prev_state < sizeof(state_to_char) ?
1617 state_to_char[field->prev_state] : 'X';
1618 T = field->next_state < sizeof(state_to_char) ?
1619 state_to_char[field->next_state] : 'X';
1680 if (entry->type == TRACE_WAKE) 1620 if (entry->type == TRACE_WAKE)
1681 S = '+'; 1621 S = '+';
1682 ret = trace_seq_printf(s, "%d %d %c %d %d %c\n", 1622 ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
1683 entry->ctx.prev_pid, 1623 field->prev_pid,
1684 entry->ctx.prev_prio, 1624 field->prev_prio,
1685 S, 1625 S,
1686 entry->ctx.next_pid, 1626 field->next_cpu,
1687 entry->ctx.next_prio, 1627 field->next_pid,
1628 field->next_prio,
1688 T); 1629 T);
1689 if (!ret) 1630 if (!ret)
1690 return 0; 1631 return TRACE_TYPE_PARTIAL_LINE;
1691 break; 1632 break;
1633 }
1692 case TRACE_SPECIAL: 1634 case TRACE_SPECIAL:
1693 case TRACE_STACK: 1635 case TRACE_STACK: {
1636 struct special_entry *field;
1637
1638 trace_assign_type(field, entry);
1639
1694 ret = trace_seq_printf(s, "# %ld %ld %ld\n", 1640 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1695 entry->special.arg1, 1641 field->arg1,
1696 entry->special.arg2, 1642 field->arg2,
1697 entry->special.arg3); 1643 field->arg3);
1698 if (!ret) 1644 if (!ret)
1699 return 0; 1645 return TRACE_TYPE_PARTIAL_LINE;
1700 break; 1646 break;
1701 } 1647 }
1702 return 1; 1648 case TRACE_PRINT: {
1649 struct print_entry *field;
1650
1651 trace_assign_type(field, entry);
1652
1653 trace_seq_printf(s, "# %lx %s", field->ip, field->buf);
1654 if (entry->flags & TRACE_FLAG_CONT)
1655 trace_seq_print_cont(s, iter);
1656 break;
1657 }
1658 }
1659 return TRACE_TYPE_HANDLED;
1703} 1660}
1704 1661
1705#define SEQ_PUT_FIELD_RET(s, x) \ 1662#define SEQ_PUT_FIELD_RET(s, x) \
@@ -1710,11 +1667,12 @@ do { \
1710 1667
1711#define SEQ_PUT_HEX_FIELD_RET(s, x) \ 1668#define SEQ_PUT_HEX_FIELD_RET(s, x) \
1712do { \ 1669do { \
1670 BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES); \
1713 if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \ 1671 if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \
1714 return 0; \ 1672 return 0; \
1715} while (0) 1673} while (0)
1716 1674
1717static int print_hex_fmt(struct trace_iterator *iter) 1675static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
1718{ 1676{
1719 struct trace_seq *s = &iter->seq; 1677 struct trace_seq *s = &iter->seq;
1720 unsigned char newline = '\n'; 1678 unsigned char newline = '\n';
@@ -1723,97 +1681,139 @@ static int print_hex_fmt(struct trace_iterator *iter)
1723 1681
1724 entry = iter->ent; 1682 entry = iter->ent;
1725 1683
1684 if (entry->type == TRACE_CONT)
1685 return TRACE_TYPE_HANDLED;
1686
1726 SEQ_PUT_HEX_FIELD_RET(s, entry->pid); 1687 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
1727 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu); 1688 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
1728 SEQ_PUT_HEX_FIELD_RET(s, entry->t); 1689 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
1729 1690
1730 switch (entry->type) { 1691 switch (entry->type) {
1731 case TRACE_FN: 1692 case TRACE_FN: {
1732 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.ip); 1693 struct ftrace_entry *field;
1733 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip); 1694
1695 trace_assign_type(field, entry);
1696
1697 SEQ_PUT_HEX_FIELD_RET(s, field->ip);
1698 SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
1734 break; 1699 break;
1700 }
1735 case TRACE_CTX: 1701 case TRACE_CTX:
1736 case TRACE_WAKE: 1702 case TRACE_WAKE: {
1737 S = entry->ctx.prev_state < sizeof(state_to_char) ? 1703 struct ctx_switch_entry *field;
1738 state_to_char[entry->ctx.prev_state] : 'X'; 1704
1739 T = entry->ctx.next_state < sizeof(state_to_char) ? 1705 trace_assign_type(field, entry);
1740 state_to_char[entry->ctx.next_state] : 'X'; 1706
1707 S = field->prev_state < sizeof(state_to_char) ?
1708 state_to_char[field->prev_state] : 'X';
1709 T = field->next_state < sizeof(state_to_char) ?
1710 state_to_char[field->next_state] : 'X';
1741 if (entry->type == TRACE_WAKE) 1711 if (entry->type == TRACE_WAKE)
1742 S = '+'; 1712 S = '+';
1743 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_pid); 1713 SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
1744 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_prio); 1714 SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
1745 SEQ_PUT_HEX_FIELD_RET(s, S); 1715 SEQ_PUT_HEX_FIELD_RET(s, S);
1746 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_pid); 1716 SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
1747 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_prio); 1717 SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
1748 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip); 1718 SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
1749 SEQ_PUT_HEX_FIELD_RET(s, T); 1719 SEQ_PUT_HEX_FIELD_RET(s, T);
1750 break; 1720 break;
1721 }
1751 case TRACE_SPECIAL: 1722 case TRACE_SPECIAL:
1752 case TRACE_STACK: 1723 case TRACE_STACK: {
1753 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg1); 1724 struct special_entry *field;
1754 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg2); 1725
1755 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg3); 1726 trace_assign_type(field, entry);
1727
1728 SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
1729 SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
1730 SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
1756 break; 1731 break;
1757 } 1732 }
1733 }
1758 SEQ_PUT_FIELD_RET(s, newline); 1734 SEQ_PUT_FIELD_RET(s, newline);
1759 1735
1760 return 1; 1736 return TRACE_TYPE_HANDLED;
1761} 1737}
1762 1738
1763static int print_bin_fmt(struct trace_iterator *iter) 1739static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
1764{ 1740{
1765 struct trace_seq *s = &iter->seq; 1741 struct trace_seq *s = &iter->seq;
1766 struct trace_entry *entry; 1742 struct trace_entry *entry;
1767 1743
1768 entry = iter->ent; 1744 entry = iter->ent;
1769 1745
1746 if (entry->type == TRACE_CONT)
1747 return TRACE_TYPE_HANDLED;
1748
1770 SEQ_PUT_FIELD_RET(s, entry->pid); 1749 SEQ_PUT_FIELD_RET(s, entry->pid);
1771 SEQ_PUT_FIELD_RET(s, entry->cpu); 1750 SEQ_PUT_FIELD_RET(s, iter->cpu);
1772 SEQ_PUT_FIELD_RET(s, entry->t); 1751 SEQ_PUT_FIELD_RET(s, iter->ts);
1773 1752
1774 switch (entry->type) { 1753 switch (entry->type) {
1775 case TRACE_FN: 1754 case TRACE_FN: {
1776 SEQ_PUT_FIELD_RET(s, entry->fn.ip); 1755 struct ftrace_entry *field;
1777 SEQ_PUT_FIELD_RET(s, entry->fn.parent_ip); 1756
1757 trace_assign_type(field, entry);
1758
1759 SEQ_PUT_FIELD_RET(s, field->ip);
1760 SEQ_PUT_FIELD_RET(s, field->parent_ip);
1778 break; 1761 break;
1779 case TRACE_CTX: 1762 }
1780 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_pid); 1763 case TRACE_CTX: {
1781 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_prio); 1764 struct ctx_switch_entry *field;
1782 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_state); 1765
1783 SEQ_PUT_FIELD_RET(s, entry->ctx.next_pid); 1766 trace_assign_type(field, entry);
1784 SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio); 1767
1785 SEQ_PUT_FIELD_RET(s, entry->ctx.next_state); 1768 SEQ_PUT_FIELD_RET(s, field->prev_pid);
1769 SEQ_PUT_FIELD_RET(s, field->prev_prio);
1770 SEQ_PUT_FIELD_RET(s, field->prev_state);
1771 SEQ_PUT_FIELD_RET(s, field->next_pid);
1772 SEQ_PUT_FIELD_RET(s, field->next_prio);
1773 SEQ_PUT_FIELD_RET(s, field->next_state);
1786 break; 1774 break;
1775 }
1787 case TRACE_SPECIAL: 1776 case TRACE_SPECIAL:
1788 case TRACE_STACK: 1777 case TRACE_STACK: {
1789 SEQ_PUT_FIELD_RET(s, entry->special.arg1); 1778 struct special_entry *field;
1790 SEQ_PUT_FIELD_RET(s, entry->special.arg2); 1779
1791 SEQ_PUT_FIELD_RET(s, entry->special.arg3); 1780 trace_assign_type(field, entry);
1781
1782 SEQ_PUT_FIELD_RET(s, field->arg1);
1783 SEQ_PUT_FIELD_RET(s, field->arg2);
1784 SEQ_PUT_FIELD_RET(s, field->arg3);
1792 break; 1785 break;
1793 } 1786 }
1787 }
1794 return 1; 1788 return 1;
1795} 1789}
1796 1790
1797static int trace_empty(struct trace_iterator *iter) 1791static int trace_empty(struct trace_iterator *iter)
1798{ 1792{
1799 struct trace_array_cpu *data;
1800 int cpu; 1793 int cpu;
1801 1794
1802 for_each_tracing_cpu(cpu) { 1795 for_each_tracing_cpu(cpu) {
1803 data = iter->tr->data[cpu]; 1796 if (iter->buffer_iter[cpu]) {
1804 1797 if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
1805 if (head_page(data) && data->trace_idx && 1798 return 0;
1806 (data->trace_tail != data->trace_head || 1799 } else {
1807 data->trace_tail_idx != data->trace_head_idx)) 1800 if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
1808 return 0; 1801 return 0;
1802 }
1809 } 1803 }
1804
1810 return 1; 1805 return 1;
1811} 1806}
1812 1807
1813static int print_trace_line(struct trace_iterator *iter) 1808static enum print_line_t print_trace_line(struct trace_iterator *iter)
1814{ 1809{
1815 if (iter->trace && iter->trace->print_line) 1810 enum print_line_t ret;
1816 return iter->trace->print_line(iter); 1811
1812 if (iter->trace && iter->trace->print_line) {
1813 ret = iter->trace->print_line(iter);
1814 if (ret != TRACE_TYPE_UNHANDLED)
1815 return ret;
1816 }
1817 1817
1818 if (trace_flags & TRACE_ITER_BIN) 1818 if (trace_flags & TRACE_ITER_BIN)
1819 return print_bin_fmt(iter); 1819 return print_bin_fmt(iter);
@@ -1869,6 +1869,8 @@ static struct trace_iterator *
1869__tracing_open(struct inode *inode, struct file *file, int *ret) 1869__tracing_open(struct inode *inode, struct file *file, int *ret)
1870{ 1870{
1871 struct trace_iterator *iter; 1871 struct trace_iterator *iter;
1872 struct seq_file *m;
1873 int cpu;
1872 1874
1873 if (tracing_disabled) { 1875 if (tracing_disabled) {
1874 *ret = -ENODEV; 1876 *ret = -ENODEV;
@@ -1889,28 +1891,45 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
1889 iter->trace = current_trace; 1891 iter->trace = current_trace;
1890 iter->pos = -1; 1892 iter->pos = -1;
1891 1893
1894 for_each_tracing_cpu(cpu) {
1895
1896 iter->buffer_iter[cpu] =
1897 ring_buffer_read_start(iter->tr->buffer, cpu);
1898
1899 if (!iter->buffer_iter[cpu])
1900 goto fail_buffer;
1901 }
1902
1892 /* TODO stop tracer */ 1903 /* TODO stop tracer */
1893 *ret = seq_open(file, &tracer_seq_ops); 1904 *ret = seq_open(file, &tracer_seq_ops);
1894 if (!*ret) { 1905 if (*ret)
1895 struct seq_file *m = file->private_data; 1906 goto fail_buffer;
1896 m->private = iter;
1897 1907
1898 /* stop the trace while dumping */ 1908 m = file->private_data;
1899 if (iter->tr->ctrl) { 1909 m->private = iter;
1900 tracer_enabled = 0;
1901 ftrace_function_enabled = 0;
1902 }
1903 1910
1904 if (iter->trace && iter->trace->open) 1911 /* stop the trace while dumping */
1905 iter->trace->open(iter); 1912 if (iter->tr->ctrl) {
1906 } else { 1913 tracer_enabled = 0;
1907 kfree(iter); 1914 ftrace_function_enabled = 0;
1908 iter = NULL;
1909 } 1915 }
1916
1917 if (iter->trace && iter->trace->open)
1918 iter->trace->open(iter);
1919
1910 mutex_unlock(&trace_types_lock); 1920 mutex_unlock(&trace_types_lock);
1911 1921
1912 out: 1922 out:
1913 return iter; 1923 return iter;
1924
1925 fail_buffer:
1926 for_each_tracing_cpu(cpu) {
1927 if (iter->buffer_iter[cpu])
1928 ring_buffer_read_finish(iter->buffer_iter[cpu]);
1929 }
1930 mutex_unlock(&trace_types_lock);
1931
1932 return ERR_PTR(-ENOMEM);
1914} 1933}
1915 1934
1916int tracing_open_generic(struct inode *inode, struct file *filp) 1935int tracing_open_generic(struct inode *inode, struct file *filp)
@@ -1926,8 +1945,14 @@ int tracing_release(struct inode *inode, struct file *file)
1926{ 1945{
1927 struct seq_file *m = (struct seq_file *)file->private_data; 1946 struct seq_file *m = (struct seq_file *)file->private_data;
1928 struct trace_iterator *iter = m->private; 1947 struct trace_iterator *iter = m->private;
1948 int cpu;
1929 1949
1930 mutex_lock(&trace_types_lock); 1950 mutex_lock(&trace_types_lock);
1951 for_each_tracing_cpu(cpu) {
1952 if (iter->buffer_iter[cpu])
1953 ring_buffer_read_finish(iter->buffer_iter[cpu]);
1954 }
1955
1931 if (iter->trace && iter->trace->close) 1956 if (iter->trace && iter->trace->close)
1932 iter->trace->close(iter); 1957 iter->trace->close(iter);
1933 1958
@@ -2352,9 +2377,11 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2352 struct tracer *t; 2377 struct tracer *t;
2353 char buf[max_tracer_type_len+1]; 2378 char buf[max_tracer_type_len+1];
2354 int i; 2379 int i;
2380 size_t ret;
2355 2381
2356 if (cnt > max_tracer_type_len) 2382 if (cnt > max_tracer_type_len)
2357 cnt = max_tracer_type_len; 2383 cnt = max_tracer_type_len;
2384 ret = cnt;
2358 2385
2359 if (copy_from_user(&buf, ubuf, cnt)) 2386 if (copy_from_user(&buf, ubuf, cnt))
2360 return -EFAULT; 2387 return -EFAULT;
@@ -2370,7 +2397,11 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2370 if (strcmp(t->name, buf) == 0) 2397 if (strcmp(t->name, buf) == 0)
2371 break; 2398 break;
2372 } 2399 }
2373 if (!t || t == current_trace) 2400 if (!t) {
2401 ret = -EINVAL;
2402 goto out;
2403 }
2404 if (t == current_trace)
2374 goto out; 2405 goto out;
2375 2406
2376 if (current_trace && current_trace->reset) 2407 if (current_trace && current_trace->reset)
@@ -2383,9 +2414,10 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2383 out: 2414 out:
2384 mutex_unlock(&trace_types_lock); 2415 mutex_unlock(&trace_types_lock);
2385 2416
2386 filp->f_pos += cnt; 2417 if (ret == cnt)
2418 filp->f_pos += cnt;
2387 2419
2388 return cnt; 2420 return ret;
2389} 2421}
2390 2422
2391static ssize_t 2423static ssize_t
@@ -2500,20 +2532,12 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
2500 size_t cnt, loff_t *ppos) 2532 size_t cnt, loff_t *ppos)
2501{ 2533{
2502 struct trace_iterator *iter = filp->private_data; 2534 struct trace_iterator *iter = filp->private_data;
2503 struct trace_array_cpu *data;
2504 static cpumask_t mask;
2505 unsigned long flags;
2506#ifdef CONFIG_FTRACE
2507 int ftrace_save;
2508#endif
2509 int cpu;
2510 ssize_t sret; 2535 ssize_t sret;
2511 2536
2512 /* return any leftover data */ 2537 /* return any leftover data */
2513 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 2538 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2514 if (sret != -EBUSY) 2539 if (sret != -EBUSY)
2515 return sret; 2540 return sret;
2516 sret = 0;
2517 2541
2518 trace_seq_reset(&iter->seq); 2542 trace_seq_reset(&iter->seq);
2519 2543
@@ -2524,6 +2548,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
2524 goto out; 2548 goto out;
2525 } 2549 }
2526 2550
2551waitagain:
2552 sret = 0;
2527 while (trace_empty(iter)) { 2553 while (trace_empty(iter)) {
2528 2554
2529 if ((filp->f_flags & O_NONBLOCK)) { 2555 if ((filp->f_flags & O_NONBLOCK)) {
@@ -2588,46 +2614,12 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
2588 offsetof(struct trace_iterator, seq)); 2614 offsetof(struct trace_iterator, seq));
2589 iter->pos = -1; 2615 iter->pos = -1;
2590 2616
2591 /*
2592 * We need to stop all tracing on all CPUS to read the
2593 * the next buffer. This is a bit expensive, but is
2594 * not done often. We fill all what we can read,
2595 * and then release the locks again.
2596 */
2597
2598 cpus_clear(mask);
2599 local_irq_save(flags);
2600#ifdef CONFIG_FTRACE
2601 ftrace_save = ftrace_enabled;
2602 ftrace_enabled = 0;
2603#endif
2604 smp_wmb();
2605 for_each_tracing_cpu(cpu) {
2606 data = iter->tr->data[cpu];
2607
2608 if (!head_page(data) || !data->trace_idx)
2609 continue;
2610
2611 atomic_inc(&data->disabled);
2612 cpu_set(cpu, mask);
2613 }
2614
2615 for_each_cpu_mask(cpu, mask) {
2616 data = iter->tr->data[cpu];
2617 __raw_spin_lock(&data->lock);
2618
2619 if (data->overrun > iter->last_overrun[cpu])
2620 iter->overrun[cpu] +=
2621 data->overrun - iter->last_overrun[cpu];
2622 iter->last_overrun[cpu] = data->overrun;
2623 }
2624
2625 while (find_next_entry_inc(iter) != NULL) { 2617 while (find_next_entry_inc(iter) != NULL) {
2626 int ret; 2618 enum print_line_t ret;
2627 int len = iter->seq.len; 2619 int len = iter->seq.len;
2628 2620
2629 ret = print_trace_line(iter); 2621 ret = print_trace_line(iter);
2630 if (!ret) { 2622 if (ret == TRACE_TYPE_PARTIAL_LINE) {
2631 /* don't print partial lines */ 2623 /* don't print partial lines */
2632 iter->seq.len = len; 2624 iter->seq.len = len;
2633 break; 2625 break;
@@ -2639,26 +2631,17 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
2639 break; 2631 break;
2640 } 2632 }
2641 2633
2642 for_each_cpu_mask(cpu, mask) {
2643 data = iter->tr->data[cpu];
2644 __raw_spin_unlock(&data->lock);
2645 }
2646
2647 for_each_cpu_mask(cpu, mask) {
2648 data = iter->tr->data[cpu];
2649 atomic_dec(&data->disabled);
2650 }
2651#ifdef CONFIG_FTRACE
2652 ftrace_enabled = ftrace_save;
2653#endif
2654 local_irq_restore(flags);
2655
2656 /* Now copy what we have to the user */ 2634 /* Now copy what we have to the user */
2657 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 2635 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2658 if (iter->seq.readpos >= iter->seq.len) 2636 if (iter->seq.readpos >= iter->seq.len)
2659 trace_seq_reset(&iter->seq); 2637 trace_seq_reset(&iter->seq);
2638
2639 /*
2640 * If there was nothing to send to user, inspite of consuming trace
2641 * entries, go back to wait for more entries.
2642 */
2660 if (sret == -EBUSY) 2643 if (sret == -EBUSY)
2661 sret = 0; 2644 goto waitagain;
2662 2645
2663out: 2646out:
2664 mutex_unlock(&trace_types_lock); 2647 mutex_unlock(&trace_types_lock);
@@ -2684,7 +2667,8 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2684{ 2667{
2685 unsigned long val; 2668 unsigned long val;
2686 char buf[64]; 2669 char buf[64];
2687 int i, ret; 2670 int ret;
2671 struct trace_array *tr = filp->private_data;
2688 2672
2689 if (cnt >= sizeof(buf)) 2673 if (cnt >= sizeof(buf))
2690 return -EINVAL; 2674 return -EINVAL;
@@ -2704,59 +2688,38 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2704 2688
2705 mutex_lock(&trace_types_lock); 2689 mutex_lock(&trace_types_lock);
2706 2690
2707 if (current_trace != &no_tracer) { 2691 if (tr->ctrl) {
2708 cnt = -EBUSY; 2692 cnt = -EBUSY;
2709 pr_info("ftrace: set current_tracer to none" 2693 pr_info("ftrace: please disable tracing"
2710 " before modifying buffer size\n"); 2694 " before modifying buffer size\n");
2711 goto out; 2695 goto out;
2712 } 2696 }
2713 2697
2714 if (val > global_trace.entries) { 2698 if (val != global_trace.entries) {
2715 long pages_requested; 2699 ret = ring_buffer_resize(global_trace.buffer, val);
2716 unsigned long freeable_pages; 2700 if (ret < 0) {
2717 2701 cnt = ret;
2718 /* make sure we have enough memory before mapping */
2719 pages_requested =
2720 (val + (ENTRIES_PER_PAGE-1)) / ENTRIES_PER_PAGE;
2721
2722 /* account for each buffer (and max_tr) */
2723 pages_requested *= tracing_nr_buffers * 2;
2724
2725 /* Check for overflow */
2726 if (pages_requested < 0) {
2727 cnt = -ENOMEM;
2728 goto out;
2729 }
2730
2731 freeable_pages = determine_dirtyable_memory();
2732
2733 /* we only allow to request 1/4 of useable memory */
2734 if (pages_requested >
2735 ((freeable_pages + tracing_pages_allocated) / 4)) {
2736 cnt = -ENOMEM;
2737 goto out; 2702 goto out;
2738 } 2703 }
2739 2704
2740 while (global_trace.entries < val) { 2705 ret = ring_buffer_resize(max_tr.buffer, val);
2741 if (trace_alloc_page()) { 2706 if (ret < 0) {
2742 cnt = -ENOMEM; 2707 int r;
2743 goto out; 2708 cnt = ret;
2709 r = ring_buffer_resize(global_trace.buffer,
2710 global_trace.entries);
2711 if (r < 0) {
2712 /* AARGH! We are left with different
2713 * size max buffer!!!! */
2714 WARN_ON(1);
2715 tracing_disabled = 1;
2744 } 2716 }
2745 /* double check that we don't go over the known pages */ 2717 goto out;
2746 if (tracing_pages_allocated > pages_requested)
2747 break;
2748 } 2718 }
2749 2719
2750 } else { 2720 global_trace.entries = val;
2751 /* include the number of entries in val (inc of page entries) */
2752 while (global_trace.entries > val + (ENTRIES_PER_PAGE - 1))
2753 trace_free_page();
2754 } 2721 }
2755 2722
2756 /* check integrity */
2757 for_each_tracing_cpu(i)
2758 check_pages(global_trace.data[i]);
2759
2760 filp->f_pos += cnt; 2723 filp->f_pos += cnt;
2761 2724
2762 /* If check pages failed, return ENOMEM */ 2725 /* If check pages failed, return ENOMEM */
@@ -2769,6 +2732,52 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2769 return cnt; 2732 return cnt;
2770} 2733}
2771 2734
2735static int mark_printk(const char *fmt, ...)
2736{
2737 int ret;
2738 va_list args;
2739 va_start(args, fmt);
2740 ret = trace_vprintk(0, fmt, args);
2741 va_end(args);
2742 return ret;
2743}
2744
2745static ssize_t
2746tracing_mark_write(struct file *filp, const char __user *ubuf,
2747 size_t cnt, loff_t *fpos)
2748{
2749 char *buf;
2750 char *end;
2751 struct trace_array *tr = &global_trace;
2752
2753 if (!tr->ctrl || tracing_disabled)
2754 return -EINVAL;
2755
2756 if (cnt > TRACE_BUF_SIZE)
2757 cnt = TRACE_BUF_SIZE;
2758
2759 buf = kmalloc(cnt + 1, GFP_KERNEL);
2760 if (buf == NULL)
2761 return -ENOMEM;
2762
2763 if (copy_from_user(buf, ubuf, cnt)) {
2764 kfree(buf);
2765 return -EFAULT;
2766 }
2767
2768 /* Cut from the first nil or newline. */
2769 buf[cnt] = '\0';
2770 end = strchr(buf, '\n');
2771 if (end)
2772 *end = '\0';
2773
2774 cnt = mark_printk("%s\n", buf);
2775 kfree(buf);
2776 *fpos += cnt;
2777
2778 return cnt;
2779}
2780
2772static struct file_operations tracing_max_lat_fops = { 2781static struct file_operations tracing_max_lat_fops = {
2773 .open = tracing_open_generic, 2782 .open = tracing_open_generic,
2774 .read = tracing_max_lat_read, 2783 .read = tracing_max_lat_read,
@@ -2800,6 +2809,11 @@ static struct file_operations tracing_entries_fops = {
2800 .write = tracing_entries_write, 2809 .write = tracing_entries_write,
2801}; 2810};
2802 2811
2812static struct file_operations tracing_mark_fops = {
2813 .open = tracing_open_generic,
2814 .write = tracing_mark_write,
2815};
2816
2803#ifdef CONFIG_DYNAMIC_FTRACE 2817#ifdef CONFIG_DYNAMIC_FTRACE
2804 2818
2805static ssize_t 2819static ssize_t
@@ -2846,7 +2860,7 @@ struct dentry *tracing_init_dentry(void)
2846#include "trace_selftest.c" 2860#include "trace_selftest.c"
2847#endif 2861#endif
2848 2862
2849static __init void tracer_init_debugfs(void) 2863static __init int tracer_init_debugfs(void)
2850{ 2864{
2851 struct dentry *d_tracer; 2865 struct dentry *d_tracer;
2852 struct dentry *entry; 2866 struct dentry *entry;
@@ -2881,12 +2895,12 @@ static __init void tracer_init_debugfs(void)
2881 entry = debugfs_create_file("available_tracers", 0444, d_tracer, 2895 entry = debugfs_create_file("available_tracers", 0444, d_tracer,
2882 &global_trace, &show_traces_fops); 2896 &global_trace, &show_traces_fops);
2883 if (!entry) 2897 if (!entry)
2884 pr_warning("Could not create debugfs 'trace' entry\n"); 2898 pr_warning("Could not create debugfs 'available_tracers' entry\n");
2885 2899
2886 entry = debugfs_create_file("current_tracer", 0444, d_tracer, 2900 entry = debugfs_create_file("current_tracer", 0444, d_tracer,
2887 &global_trace, &set_tracer_fops); 2901 &global_trace, &set_tracer_fops);
2888 if (!entry) 2902 if (!entry)
2889 pr_warning("Could not create debugfs 'trace' entry\n"); 2903 pr_warning("Could not create debugfs 'current_tracer' entry\n");
2890 2904
2891 entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer, 2905 entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
2892 &tracing_max_latency, 2906 &tracing_max_latency,
@@ -2899,7 +2913,7 @@ static __init void tracer_init_debugfs(void)
2899 &tracing_thresh, &tracing_max_lat_fops); 2913 &tracing_thresh, &tracing_max_lat_fops);
2900 if (!entry) 2914 if (!entry)
2901 pr_warning("Could not create debugfs " 2915 pr_warning("Could not create debugfs "
2902 "'tracing_threash' entry\n"); 2916 "'tracing_thresh' entry\n");
2903 entry = debugfs_create_file("README", 0644, d_tracer, 2917 entry = debugfs_create_file("README", 0644, d_tracer,
2904 NULL, &tracing_readme_fops); 2918 NULL, &tracing_readme_fops);
2905 if (!entry) 2919 if (!entry)
@@ -2909,13 +2923,19 @@ static __init void tracer_init_debugfs(void)
2909 NULL, &tracing_pipe_fops); 2923 NULL, &tracing_pipe_fops);
2910 if (!entry) 2924 if (!entry)
2911 pr_warning("Could not create debugfs " 2925 pr_warning("Could not create debugfs "
2912 "'tracing_threash' entry\n"); 2926 "'trace_pipe' entry\n");
2913 2927
2914 entry = debugfs_create_file("trace_entries", 0644, d_tracer, 2928 entry = debugfs_create_file("trace_entries", 0644, d_tracer,
2915 &global_trace, &tracing_entries_fops); 2929 &global_trace, &tracing_entries_fops);
2916 if (!entry) 2930 if (!entry)
2917 pr_warning("Could not create debugfs " 2931 pr_warning("Could not create debugfs "
2918 "'tracing_threash' entry\n"); 2932 "'trace_entries' entry\n");
2933
2934 entry = debugfs_create_file("trace_marker", 0220, d_tracer,
2935 NULL, &tracing_mark_fops);
2936 if (!entry)
2937 pr_warning("Could not create debugfs "
2938 "'trace_marker' entry\n");
2919 2939
2920#ifdef CONFIG_DYNAMIC_FTRACE 2940#ifdef CONFIG_DYNAMIC_FTRACE
2921 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, 2941 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
@@ -2928,230 +2948,263 @@ static __init void tracer_init_debugfs(void)
2928#ifdef CONFIG_SYSPROF_TRACER 2948#ifdef CONFIG_SYSPROF_TRACER
2929 init_tracer_sysprof_debugfs(d_tracer); 2949 init_tracer_sysprof_debugfs(d_tracer);
2930#endif 2950#endif
2951 return 0;
2931} 2952}
2932 2953
2933static int trace_alloc_page(void) 2954int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2934{ 2955{
2956 static DEFINE_SPINLOCK(trace_buf_lock);
2957 static char trace_buf[TRACE_BUF_SIZE];
2958
2959 struct ring_buffer_event *event;
2960 struct trace_array *tr = &global_trace;
2935 struct trace_array_cpu *data; 2961 struct trace_array_cpu *data;
2936 struct page *page, *tmp; 2962 struct print_entry *entry;
2937 LIST_HEAD(pages); 2963 unsigned long flags, irq_flags;
2938 void *array; 2964 int cpu, len = 0, size, pc;
2939 unsigned pages_allocated = 0;
2940 int i;
2941 2965
2942 /* first allocate a page for each CPU */ 2966 if (!tr->ctrl || tracing_disabled)
2943 for_each_tracing_cpu(i) { 2967 return 0;
2944 array = (void *)__get_free_page(GFP_KERNEL);
2945 if (array == NULL) {
2946 printk(KERN_ERR "tracer: failed to allocate page"
2947 "for trace buffer!\n");
2948 goto free_pages;
2949 }
2950 2968
2951 pages_allocated++; 2969 pc = preempt_count();
2952 page = virt_to_page(array); 2970 preempt_disable_notrace();
2953 list_add(&page->lru, &pages); 2971 cpu = raw_smp_processor_id();
2972 data = tr->data[cpu];
2954 2973
2955/* Only allocate if we are actually using the max trace */ 2974 if (unlikely(atomic_read(&data->disabled)))
2956#ifdef CONFIG_TRACER_MAX_TRACE 2975 goto out;
2957 array = (void *)__get_free_page(GFP_KERNEL);
2958 if (array == NULL) {
2959 printk(KERN_ERR "tracer: failed to allocate page"
2960 "for trace buffer!\n");
2961 goto free_pages;
2962 }
2963 pages_allocated++;
2964 page = virt_to_page(array);
2965 list_add(&page->lru, &pages);
2966#endif
2967 }
2968 2976
2969 /* Now that we successfully allocate a page per CPU, add them */ 2977 spin_lock_irqsave(&trace_buf_lock, flags);
2970 for_each_tracing_cpu(i) { 2978 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
2971 data = global_trace.data[i];
2972 page = list_entry(pages.next, struct page, lru);
2973 list_del_init(&page->lru);
2974 list_add_tail(&page->lru, &data->trace_pages);
2975 ClearPageLRU(page);
2976 2979
2977#ifdef CONFIG_TRACER_MAX_TRACE 2980 len = min(len, TRACE_BUF_SIZE-1);
2978 data = max_tr.data[i]; 2981 trace_buf[len] = 0;
2979 page = list_entry(pages.next, struct page, lru);
2980 list_del_init(&page->lru);
2981 list_add_tail(&page->lru, &data->trace_pages);
2982 SetPageLRU(page);
2983#endif
2984 }
2985 tracing_pages_allocated += pages_allocated;
2986 global_trace.entries += ENTRIES_PER_PAGE;
2987 2982
2988 return 0; 2983 size = sizeof(*entry) + len + 1;
2984 event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags);
2985 if (!event)
2986 goto out_unlock;
2987 entry = ring_buffer_event_data(event);
2988 tracing_generic_entry_update(&entry->ent, flags, pc);
2989 entry->ent.type = TRACE_PRINT;
2990 entry->ip = ip;
2989 2991
2990 free_pages: 2992 memcpy(&entry->buf, trace_buf, len);
2991 list_for_each_entry_safe(page, tmp, &pages, lru) { 2993 entry->buf[len] = 0;
2992 list_del_init(&page->lru); 2994 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
2993 __free_page(page); 2995
2994 } 2996 out_unlock:
2995 return -ENOMEM; 2997 spin_unlock_irqrestore(&trace_buf_lock, flags);
2998
2999 out:
3000 preempt_enable_notrace();
3001
3002 return len;
2996} 3003}
3004EXPORT_SYMBOL_GPL(trace_vprintk);
2997 3005
2998static int trace_free_page(void) 3006int __ftrace_printk(unsigned long ip, const char *fmt, ...)
2999{ 3007{
3000 struct trace_array_cpu *data; 3008 int ret;
3001 struct page *page; 3009 va_list ap;
3002 struct list_head *p;
3003 int i;
3004 int ret = 0;
3005 3010
3006 /* free one page from each buffer */ 3011 if (!(trace_flags & TRACE_ITER_PRINTK))
3007 for_each_tracing_cpu(i) { 3012 return 0;
3008 data = global_trace.data[i];
3009 p = data->trace_pages.next;
3010 if (p == &data->trace_pages) {
3011 /* should never happen */
3012 WARN_ON(1);
3013 tracing_disabled = 1;
3014 ret = -1;
3015 break;
3016 }
3017 page = list_entry(p, struct page, lru);
3018 ClearPageLRU(page);
3019 list_del(&page->lru);
3020 tracing_pages_allocated--;
3021 tracing_pages_allocated--;
3022 __free_page(page);
3023 3013
3024 tracing_reset(data); 3014 va_start(ap, fmt);
3015 ret = trace_vprintk(ip, fmt, ap);
3016 va_end(ap);
3017 return ret;
3018}
3019EXPORT_SYMBOL_GPL(__ftrace_printk);
3025 3020
3026#ifdef CONFIG_TRACER_MAX_TRACE 3021static int trace_panic_handler(struct notifier_block *this,
3027 data = max_tr.data[i]; 3022 unsigned long event, void *unused)
3028 p = data->trace_pages.next; 3023{
3029 if (p == &data->trace_pages) { 3024 ftrace_dump();
3030 /* should never happen */ 3025 return NOTIFY_OK;
3031 WARN_ON(1); 3026}
3032 tracing_disabled = 1;
3033 ret = -1;
3034 break;
3035 }
3036 page = list_entry(p, struct page, lru);
3037 ClearPageLRU(page);
3038 list_del(&page->lru);
3039 __free_page(page);
3040 3027
3041 tracing_reset(data); 3028static struct notifier_block trace_panic_notifier = {
3042#endif 3029 .notifier_call = trace_panic_handler,
3043 } 3030 .next = NULL,
3044 global_trace.entries -= ENTRIES_PER_PAGE; 3031 .priority = 150 /* priority: INT_MAX >= x >= 0 */
3032};
3045 3033
3046 return ret; 3034static int trace_die_handler(struct notifier_block *self,
3035 unsigned long val,
3036 void *data)
3037{
3038 switch (val) {
3039 case DIE_OOPS:
3040 ftrace_dump();
3041 break;
3042 default:
3043 break;
3044 }
3045 return NOTIFY_OK;
3047} 3046}
3048 3047
3049__init static int tracer_alloc_buffers(void) 3048static struct notifier_block trace_die_notifier = {
3049 .notifier_call = trace_die_handler,
3050 .priority = 200
3051};
3052
3053/*
3054 * printk is set to max of 1024, we really don't need it that big.
3055 * Nothing should be printing 1000 characters anyway.
3056 */
3057#define TRACE_MAX_PRINT 1000
3058
3059/*
3060 * Define here KERN_TRACE so that we have one place to modify
3061 * it if we decide to change what log level the ftrace dump
3062 * should be at.
3063 */
3064#define KERN_TRACE KERN_INFO
3065
3066static void
3067trace_printk_seq(struct trace_seq *s)
3050{ 3068{
3051 struct trace_array_cpu *data; 3069 /* Probably should print a warning here. */
3052 void *array; 3070 if (s->len >= 1000)
3053 struct page *page; 3071 s->len = 1000;
3054 int pages = 0;
3055 int ret = -ENOMEM;
3056 int i;
3057 3072
3058 /* TODO: make the number of buffers hot pluggable with CPUS */ 3073 /* should be zero ended, but we are paranoid. */
3059 tracing_nr_buffers = num_possible_cpus(); 3074 s->buffer[s->len] = 0;
3060 tracing_buffer_mask = cpu_possible_map;
3061 3075
3062 /* Allocate the first page for all buffers */ 3076 printk(KERN_TRACE "%s", s->buffer);
3063 for_each_tracing_cpu(i) {
3064 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
3065 max_tr.data[i] = &per_cpu(max_data, i);
3066 3077
3067 array = (void *)__get_free_page(GFP_KERNEL); 3078 trace_seq_reset(s);
3068 if (array == NULL) { 3079}
3069 printk(KERN_ERR "tracer: failed to allocate page" 3080
3070 "for trace buffer!\n"); 3081
3071 goto free_buffers; 3082void ftrace_dump(void)
3072 } 3083{
3084 static DEFINE_SPINLOCK(ftrace_dump_lock);
3085 /* use static because iter can be a bit big for the stack */
3086 static struct trace_iterator iter;
3087 static cpumask_t mask;
3088 static int dump_ran;
3089 unsigned long flags;
3090 int cnt = 0, cpu;
3073 3091
3074 /* set the array to the list */ 3092 /* only one dump */
3075 INIT_LIST_HEAD(&data->trace_pages); 3093 spin_lock_irqsave(&ftrace_dump_lock, flags);
3076 page = virt_to_page(array); 3094 if (dump_ran)
3077 list_add(&page->lru, &data->trace_pages); 3095 goto out;
3078 /* use the LRU flag to differentiate the two buffers */
3079 ClearPageLRU(page);
3080 3096
3081 data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 3097 dump_ran = 1;
3082 max_tr.data[i]->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
3083 3098
3084/* Only allocate if we are actually using the max trace */ 3099 /* No turning back! */
3085#ifdef CONFIG_TRACER_MAX_TRACE 3100 ftrace_kill_atomic();
3086 array = (void *)__get_free_page(GFP_KERNEL);
3087 if (array == NULL) {
3088 printk(KERN_ERR "tracer: failed to allocate page"
3089 "for trace buffer!\n");
3090 goto free_buffers;
3091 }
3092 3101
3093 INIT_LIST_HEAD(&max_tr.data[i]->trace_pages); 3102 for_each_tracing_cpu(cpu) {
3094 page = virt_to_page(array); 3103 atomic_inc(&global_trace.data[cpu]->disabled);
3095 list_add(&page->lru, &max_tr.data[i]->trace_pages);
3096 SetPageLRU(page);
3097#endif
3098 } 3104 }
3099 3105
3106 printk(KERN_TRACE "Dumping ftrace buffer:\n");
3107
3108 iter.tr = &global_trace;
3109 iter.trace = current_trace;
3110
3100 /* 3111 /*
3101 * Since we allocate by orders of pages, we may be able to 3112 * We need to stop all tracing on all CPUS to read the
3102 * round up a bit. 3113 * the next buffer. This is a bit expensive, but is
3114 * not done often. We fill all what we can read,
3115 * and then release the locks again.
3103 */ 3116 */
3104 global_trace.entries = ENTRIES_PER_PAGE;
3105 pages++;
3106 3117
3107 while (global_trace.entries < trace_nr_entries) { 3118 cpus_clear(mask);
3108 if (trace_alloc_page()) 3119
3109 break; 3120 while (!trace_empty(&iter)) {
3110 pages++; 3121
3122 if (!cnt)
3123 printk(KERN_TRACE "---------------------------------\n");
3124
3125 cnt++;
3126
3127 /* reset all but tr, trace, and overruns */
3128 memset(&iter.seq, 0,
3129 sizeof(struct trace_iterator) -
3130 offsetof(struct trace_iterator, seq));
3131 iter.iter_flags |= TRACE_FILE_LAT_FMT;
3132 iter.pos = -1;
3133
3134 if (find_next_entry_inc(&iter) != NULL) {
3135 print_trace_line(&iter);
3136 trace_consume(&iter);
3137 }
3138
3139 trace_printk_seq(&iter.seq);
3111 } 3140 }
3112 max_tr.entries = global_trace.entries;
3113 3141
3114 pr_info("tracer: %d pages allocated for %ld entries of %ld bytes\n", 3142 if (!cnt)
3115 pages, trace_nr_entries, (long)TRACE_ENTRY_SIZE); 3143 printk(KERN_TRACE " (ftrace buffer empty)\n");
3116 pr_info(" actual entries %ld\n", global_trace.entries); 3144 else
3145 printk(KERN_TRACE "---------------------------------\n");
3146
3147 out:
3148 spin_unlock_irqrestore(&ftrace_dump_lock, flags);
3149}
3150
3151__init static int tracer_alloc_buffers(void)
3152{
3153 struct trace_array_cpu *data;
3154 int i;
3155
3156 /* TODO: make the number of buffers hot pluggable with CPUS */
3157 tracing_buffer_mask = cpu_possible_map;
3158
3159 global_trace.buffer = ring_buffer_alloc(trace_buf_size,
3160 TRACE_BUFFER_FLAGS);
3161 if (!global_trace.buffer) {
3162 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
3163 WARN_ON(1);
3164 return 0;
3165 }
3166 global_trace.entries = ring_buffer_size(global_trace.buffer);
3117 3167
3118 tracer_init_debugfs(); 3168#ifdef CONFIG_TRACER_MAX_TRACE
3169 max_tr.buffer = ring_buffer_alloc(trace_buf_size,
3170 TRACE_BUFFER_FLAGS);
3171 if (!max_tr.buffer) {
3172 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
3173 WARN_ON(1);
3174 ring_buffer_free(global_trace.buffer);
3175 return 0;
3176 }
3177 max_tr.entries = ring_buffer_size(max_tr.buffer);
3178 WARN_ON(max_tr.entries != global_trace.entries);
3179#endif
3180
3181 /* Allocate the first page for all buffers */
3182 for_each_tracing_cpu(i) {
3183 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
3184 max_tr.data[i] = &per_cpu(max_data, i);
3185 }
3119 3186
3120 trace_init_cmdlines(); 3187 trace_init_cmdlines();
3121 3188
3122 register_tracer(&no_tracer); 3189 register_tracer(&nop_trace);
3123 current_trace = &no_tracer; 3190#ifdef CONFIG_BOOT_TRACER
3191 register_tracer(&boot_tracer);
3192 current_trace = &boot_tracer;
3193 current_trace->init(&global_trace);
3194#else
3195 current_trace = &nop_trace;
3196#endif
3124 3197
3125 /* All seems OK, enable tracing */ 3198 /* All seems OK, enable tracing */
3126 global_trace.ctrl = tracer_enabled; 3199 global_trace.ctrl = tracer_enabled;
3127 tracing_disabled = 0; 3200 tracing_disabled = 0;
3128 3201
3129 return 0; 3202 atomic_notifier_chain_register(&panic_notifier_list,
3203 &trace_panic_notifier);
3130 3204
3131 free_buffers: 3205 register_die_notifier(&trace_die_notifier);
3132 for (i-- ; i >= 0; i--) {
3133 struct page *page, *tmp;
3134 struct trace_array_cpu *data = global_trace.data[i];
3135 3206
3136 if (data) { 3207 return 0;
3137 list_for_each_entry_safe(page, tmp,
3138 &data->trace_pages, lru) {
3139 list_del_init(&page->lru);
3140 __free_page(page);
3141 }
3142 }
3143
3144#ifdef CONFIG_TRACER_MAX_TRACE
3145 data = max_tr.data[i];
3146 if (data) {
3147 list_for_each_entry_safe(page, tmp,
3148 &data->trace_pages, lru) {
3149 list_del_init(&page->lru);
3150 __free_page(page);
3151 }
3152 }
3153#endif
3154 }
3155 return ret;
3156} 3208}
3157fs_initcall(tracer_alloc_buffers); 3209early_initcall(tracer_alloc_buffers);
3210fs_initcall(tracer_init_debugfs);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f69f86788c2b..f1f99572cde7 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -5,7 +5,9 @@
5#include <asm/atomic.h> 5#include <asm/atomic.h>
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/clocksource.h> 7#include <linux/clocksource.h>
8#include <linux/ring_buffer.h>
8#include <linux/mmiotrace.h> 9#include <linux/mmiotrace.h>
10#include <linux/ftrace.h>
9 11
10enum trace_type { 12enum trace_type {
11 __TRACE_FIRST_TYPE = 0, 13 __TRACE_FIRST_TYPE = 0,
@@ -13,38 +15,60 @@ enum trace_type {
13 TRACE_FN, 15 TRACE_FN,
14 TRACE_CTX, 16 TRACE_CTX,
15 TRACE_WAKE, 17 TRACE_WAKE,
18 TRACE_CONT,
16 TRACE_STACK, 19 TRACE_STACK,
20 TRACE_PRINT,
17 TRACE_SPECIAL, 21 TRACE_SPECIAL,
18 TRACE_MMIO_RW, 22 TRACE_MMIO_RW,
19 TRACE_MMIO_MAP, 23 TRACE_MMIO_MAP,
24 TRACE_BOOT,
20 25
21 __TRACE_LAST_TYPE 26 __TRACE_LAST_TYPE
22}; 27};
23 28
24/* 29/*
30 * The trace entry - the most basic unit of tracing. This is what
31 * is printed in the end as a single line in the trace output, such as:
32 *
33 * bash-15816 [01] 235.197585: idle_cpu <- irq_enter
34 */
35struct trace_entry {
36 unsigned char type;
37 unsigned char cpu;
38 unsigned char flags;
39 unsigned char preempt_count;
40 int pid;
41};
42
43/*
25 * Function trace entry - function address and parent function addres: 44 * Function trace entry - function address and parent function addres:
26 */ 45 */
27struct ftrace_entry { 46struct ftrace_entry {
47 struct trace_entry ent;
28 unsigned long ip; 48 unsigned long ip;
29 unsigned long parent_ip; 49 unsigned long parent_ip;
30}; 50};
51extern struct tracer boot_tracer;
31 52
32/* 53/*
33 * Context switch trace entry - which task (and prio) we switched from/to: 54 * Context switch trace entry - which task (and prio) we switched from/to:
34 */ 55 */
35struct ctx_switch_entry { 56struct ctx_switch_entry {
57 struct trace_entry ent;
36 unsigned int prev_pid; 58 unsigned int prev_pid;
37 unsigned char prev_prio; 59 unsigned char prev_prio;
38 unsigned char prev_state; 60 unsigned char prev_state;
39 unsigned int next_pid; 61 unsigned int next_pid;
40 unsigned char next_prio; 62 unsigned char next_prio;
41 unsigned char next_state; 63 unsigned char next_state;
64 unsigned int next_cpu;
42}; 65};
43 66
44/* 67/*
45 * Special (free-form) trace entry: 68 * Special (free-form) trace entry:
46 */ 69 */
47struct special_entry { 70struct special_entry {
71 struct trace_entry ent;
48 unsigned long arg1; 72 unsigned long arg1;
49 unsigned long arg2; 73 unsigned long arg2;
50 unsigned long arg3; 74 unsigned long arg3;
@@ -57,33 +81,60 @@ struct special_entry {
57#define FTRACE_STACK_ENTRIES 8 81#define FTRACE_STACK_ENTRIES 8
58 82
59struct stack_entry { 83struct stack_entry {
84 struct trace_entry ent;
60 unsigned long caller[FTRACE_STACK_ENTRIES]; 85 unsigned long caller[FTRACE_STACK_ENTRIES];
61}; 86};
62 87
63/* 88/*
64 * The trace entry - the most basic unit of tracing. This is what 89 * ftrace_printk entry:
65 * is printed in the end as a single line in the trace output, such as:
66 *
67 * bash-15816 [01] 235.197585: idle_cpu <- irq_enter
68 */ 90 */
69struct trace_entry { 91struct print_entry {
70 char type; 92 struct trace_entry ent;
71 char cpu; 93 unsigned long ip;
72 char flags; 94 char buf[];
73 char preempt_count; 95};
74 int pid; 96
75 cycle_t t; 97#define TRACE_OLD_SIZE 88
76 union { 98
77 struct ftrace_entry fn; 99struct trace_field_cont {
78 struct ctx_switch_entry ctx; 100 unsigned char type;
79 struct special_entry special; 101 /* Temporary till we get rid of this completely */
80 struct stack_entry stack; 102 char buf[TRACE_OLD_SIZE - 1];
81 struct mmiotrace_rw mmiorw; 103};
82 struct mmiotrace_map mmiomap; 104
83 }; 105struct trace_mmiotrace_rw {
106 struct trace_entry ent;
107 struct mmiotrace_rw rw;
84}; 108};
85 109
86#define TRACE_ENTRY_SIZE sizeof(struct trace_entry) 110struct trace_mmiotrace_map {
111 struct trace_entry ent;
112 struct mmiotrace_map map;
113};
114
115struct trace_boot {
116 struct trace_entry ent;
117 struct boot_trace initcall;
118};
119
120/*
121 * trace_flag_type is an enumeration that holds different
122 * states when a trace occurs. These are:
123 * IRQS_OFF - interrupts were disabled
124 * NEED_RESCED - reschedule is requested
125 * HARDIRQ - inside an interrupt handler
126 * SOFTIRQ - inside a softirq handler
127 * CONT - multiple entries hold the trace item
128 */
129enum trace_flag_type {
130 TRACE_FLAG_IRQS_OFF = 0x01,
131 TRACE_FLAG_NEED_RESCHED = 0x02,
132 TRACE_FLAG_HARDIRQ = 0x04,
133 TRACE_FLAG_SOFTIRQ = 0x08,
134 TRACE_FLAG_CONT = 0x10,
135};
136
137#define TRACE_BUF_SIZE 1024
87 138
88/* 139/*
89 * The CPU trace array - it consists of thousands of trace entries 140 * The CPU trace array - it consists of thousands of trace entries
@@ -91,16 +142,9 @@ struct trace_entry {
91 * the trace, etc.) 142 * the trace, etc.)
92 */ 143 */
93struct trace_array_cpu { 144struct trace_array_cpu {
94 struct list_head trace_pages;
95 atomic_t disabled; 145 atomic_t disabled;
96 raw_spinlock_t lock;
97 struct lock_class_key lock_key;
98 146
99 /* these fields get copied into max-trace: */ 147 /* these fields get copied into max-trace: */
100 unsigned trace_head_idx;
101 unsigned trace_tail_idx;
102 void *trace_head; /* producer */
103 void *trace_tail; /* consumer */
104 unsigned long trace_idx; 148 unsigned long trace_idx;
105 unsigned long overrun; 149 unsigned long overrun;
106 unsigned long saved_latency; 150 unsigned long saved_latency;
@@ -124,6 +168,7 @@ struct trace_iterator;
124 * They have on/off state as well: 168 * They have on/off state as well:
125 */ 169 */
126struct trace_array { 170struct trace_array {
171 struct ring_buffer *buffer;
127 unsigned long entries; 172 unsigned long entries;
128 long ctrl; 173 long ctrl;
129 int cpu; 174 int cpu;
@@ -132,6 +177,56 @@ struct trace_array {
132 struct trace_array_cpu *data[NR_CPUS]; 177 struct trace_array_cpu *data[NR_CPUS];
133}; 178};
134 179
180#define FTRACE_CMP_TYPE(var, type) \
181 __builtin_types_compatible_p(typeof(var), type *)
182
183#undef IF_ASSIGN
184#define IF_ASSIGN(var, entry, etype, id) \
185 if (FTRACE_CMP_TYPE(var, etype)) { \
186 var = (typeof(var))(entry); \
187 WARN_ON(id && (entry)->type != id); \
188 break; \
189 }
190
191/* Will cause compile errors if type is not found. */
192extern void __ftrace_bad_type(void);
193
194/*
195 * The trace_assign_type is a verifier that the entry type is
196 * the same as the type being assigned. To add new types simply
197 * add a line with the following format:
198 *
199 * IF_ASSIGN(var, ent, type, id);
200 *
201 * Where "type" is the trace type that includes the trace_entry
202 * as the "ent" item. And "id" is the trace identifier that is
203 * used in the trace_type enum.
204 *
205 * If the type can have more than one id, then use zero.
206 */
207#define trace_assign_type(var, ent) \
208 do { \
209 IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN); \
210 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \
211 IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
212 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \
213 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
214 IF_ASSIGN(var, ent, struct special_entry, 0); \
215 IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
216 TRACE_MMIO_RW); \
217 IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
218 TRACE_MMIO_MAP); \
219 IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT); \
220 __ftrace_bad_type(); \
221 } while (0)
222
223/* Return values for print_line callback */
224enum print_line_t {
225 TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */
226 TRACE_TYPE_HANDLED = 1,
227 TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */
228};
229
135/* 230/*
136 * A specific tracer, represented by methods that operate on a trace array: 231 * A specific tracer, represented by methods that operate on a trace array:
137 */ 232 */
@@ -152,7 +247,7 @@ struct tracer {
152 int (*selftest)(struct tracer *trace, 247 int (*selftest)(struct tracer *trace,
153 struct trace_array *tr); 248 struct trace_array *tr);
154#endif 249#endif
155 int (*print_line)(struct trace_iterator *iter); 250 enum print_line_t (*print_line)(struct trace_iterator *iter);
156 struct tracer *next; 251 struct tracer *next;
157 int print_max; 252 int print_max;
158}; 253};
@@ -171,57 +266,58 @@ struct trace_iterator {
171 struct trace_array *tr; 266 struct trace_array *tr;
172 struct tracer *trace; 267 struct tracer *trace;
173 void *private; 268 void *private;
174 long last_overrun[NR_CPUS]; 269 struct ring_buffer_iter *buffer_iter[NR_CPUS];
175 long overrun[NR_CPUS];
176 270
177 /* The below is zeroed out in pipe_read */ 271 /* The below is zeroed out in pipe_read */
178 struct trace_seq seq; 272 struct trace_seq seq;
179 struct trace_entry *ent; 273 struct trace_entry *ent;
180 int cpu; 274 int cpu;
181 275 u64 ts;
182 struct trace_entry *prev_ent;
183 int prev_cpu;
184 276
185 unsigned long iter_flags; 277 unsigned long iter_flags;
186 loff_t pos; 278 loff_t pos;
187 unsigned long next_idx[NR_CPUS];
188 struct list_head *next_page[NR_CPUS];
189 unsigned next_page_idx[NR_CPUS];
190 long idx; 279 long idx;
191}; 280};
192 281
193void tracing_reset(struct trace_array_cpu *data); 282void trace_wake_up(void);
283void tracing_reset(struct trace_array *tr, int cpu);
194int tracing_open_generic(struct inode *inode, struct file *filp); 284int tracing_open_generic(struct inode *inode, struct file *filp);
195struct dentry *tracing_init_dentry(void); 285struct dentry *tracing_init_dentry(void);
196void init_tracer_sysprof_debugfs(struct dentry *d_tracer); 286void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
197 287
288struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
289 struct trace_array_cpu *data);
290void tracing_generic_entry_update(struct trace_entry *entry,
291 unsigned long flags,
292 int pc);
293
198void ftrace(struct trace_array *tr, 294void ftrace(struct trace_array *tr,
199 struct trace_array_cpu *data, 295 struct trace_array_cpu *data,
200 unsigned long ip, 296 unsigned long ip,
201 unsigned long parent_ip, 297 unsigned long parent_ip,
202 unsigned long flags); 298 unsigned long flags, int pc);
203void tracing_sched_switch_trace(struct trace_array *tr, 299void tracing_sched_switch_trace(struct trace_array *tr,
204 struct trace_array_cpu *data, 300 struct trace_array_cpu *data,
205 struct task_struct *prev, 301 struct task_struct *prev,
206 struct task_struct *next, 302 struct task_struct *next,
207 unsigned long flags); 303 unsigned long flags, int pc);
208void tracing_record_cmdline(struct task_struct *tsk); 304void tracing_record_cmdline(struct task_struct *tsk);
209 305
210void tracing_sched_wakeup_trace(struct trace_array *tr, 306void tracing_sched_wakeup_trace(struct trace_array *tr,
211 struct trace_array_cpu *data, 307 struct trace_array_cpu *data,
212 struct task_struct *wakee, 308 struct task_struct *wakee,
213 struct task_struct *cur, 309 struct task_struct *cur,
214 unsigned long flags); 310 unsigned long flags, int pc);
215void trace_special(struct trace_array *tr, 311void trace_special(struct trace_array *tr,
216 struct trace_array_cpu *data, 312 struct trace_array_cpu *data,
217 unsigned long arg1, 313 unsigned long arg1,
218 unsigned long arg2, 314 unsigned long arg2,
219 unsigned long arg3); 315 unsigned long arg3, int pc);
220void trace_function(struct trace_array *tr, 316void trace_function(struct trace_array *tr,
221 struct trace_array_cpu *data, 317 struct trace_array_cpu *data,
222 unsigned long ip, 318 unsigned long ip,
223 unsigned long parent_ip, 319 unsigned long parent_ip,
224 unsigned long flags); 320 unsigned long flags, int pc);
225 321
226void tracing_start_cmdline_record(void); 322void tracing_start_cmdline_record(void);
227void tracing_stop_cmdline_record(void); 323void tracing_stop_cmdline_record(void);
@@ -268,51 +364,33 @@ extern unsigned long ftrace_update_tot_cnt;
268extern int DYN_FTRACE_TEST_NAME(void); 364extern int DYN_FTRACE_TEST_NAME(void);
269#endif 365#endif
270 366
271#ifdef CONFIG_MMIOTRACE
272extern void __trace_mmiotrace_rw(struct trace_array *tr,
273 struct trace_array_cpu *data,
274 struct mmiotrace_rw *rw);
275extern void __trace_mmiotrace_map(struct trace_array *tr,
276 struct trace_array_cpu *data,
277 struct mmiotrace_map *map);
278#endif
279
280#ifdef CONFIG_FTRACE_STARTUP_TEST 367#ifdef CONFIG_FTRACE_STARTUP_TEST
281#ifdef CONFIG_FTRACE
282extern int trace_selftest_startup_function(struct tracer *trace, 368extern int trace_selftest_startup_function(struct tracer *trace,
283 struct trace_array *tr); 369 struct trace_array *tr);
284#endif
285#ifdef CONFIG_IRQSOFF_TRACER
286extern int trace_selftest_startup_irqsoff(struct tracer *trace, 370extern int trace_selftest_startup_irqsoff(struct tracer *trace,
287 struct trace_array *tr); 371 struct trace_array *tr);
288#endif
289#ifdef CONFIG_PREEMPT_TRACER
290extern int trace_selftest_startup_preemptoff(struct tracer *trace, 372extern int trace_selftest_startup_preemptoff(struct tracer *trace,
291 struct trace_array *tr); 373 struct trace_array *tr);
292#endif
293#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
294extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace, 374extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace,
295 struct trace_array *tr); 375 struct trace_array *tr);
296#endif
297#ifdef CONFIG_SCHED_TRACER
298extern int trace_selftest_startup_wakeup(struct tracer *trace, 376extern int trace_selftest_startup_wakeup(struct tracer *trace,
299 struct trace_array *tr); 377 struct trace_array *tr);
300#endif 378extern int trace_selftest_startup_nop(struct tracer *trace,
301#ifdef CONFIG_CONTEXT_SWITCH_TRACER 379 struct trace_array *tr);
302extern int trace_selftest_startup_sched_switch(struct tracer *trace, 380extern int trace_selftest_startup_sched_switch(struct tracer *trace,
303 struct trace_array *tr); 381 struct trace_array *tr);
304#endif
305#ifdef CONFIG_SYSPROF_TRACER
306extern int trace_selftest_startup_sysprof(struct tracer *trace, 382extern int trace_selftest_startup_sysprof(struct tracer *trace,
307 struct trace_array *tr); 383 struct trace_array *tr);
308#endif
309#endif /* CONFIG_FTRACE_STARTUP_TEST */ 384#endif /* CONFIG_FTRACE_STARTUP_TEST */
310 385
311extern void *head_page(struct trace_array_cpu *data); 386extern void *head_page(struct trace_array_cpu *data);
312extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...); 387extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
388extern void trace_seq_print_cont(struct trace_seq *s,
389 struct trace_iterator *iter);
313extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, 390extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
314 size_t cnt); 391 size_t cnt);
315extern long ns2usecs(cycle_t nsec); 392extern long ns2usecs(cycle_t nsec);
393extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args);
316 394
317extern unsigned long trace_flags; 395extern unsigned long trace_flags;
318 396
@@ -334,6 +412,9 @@ enum trace_iterator_flags {
334 TRACE_ITER_BLOCK = 0x80, 412 TRACE_ITER_BLOCK = 0x80,
335 TRACE_ITER_STACKTRACE = 0x100, 413 TRACE_ITER_STACKTRACE = 0x100,
336 TRACE_ITER_SCHED_TREE = 0x200, 414 TRACE_ITER_SCHED_TREE = 0x200,
415 TRACE_ITER_PRINTK = 0x400,
337}; 416};
338 417
418extern struct tracer nop_trace;
419
339#endif /* _LINUX_KERNEL_TRACE_H */ 420#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
new file mode 100644
index 000000000000..d0a5e50eeff2
--- /dev/null
+++ b/kernel/trace/trace_boot.c
@@ -0,0 +1,126 @@
1/*
2 * ring buffer based initcalls tracer
3 *
4 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 *
6 */
7
8#include <linux/init.h>
9#include <linux/debugfs.h>
10#include <linux/ftrace.h>
11#include <linux/kallsyms.h>
12
13#include "trace.h"
14
15static struct trace_array *boot_trace;
16static int trace_boot_enabled;
17
18
19/* Should be started after do_pre_smp_initcalls() in init/main.c */
20void start_boot_trace(void)
21{
22 trace_boot_enabled = 1;
23}
24
25void stop_boot_trace(void)
26{
27 trace_boot_enabled = 0;
28}
29
30void reset_boot_trace(struct trace_array *tr)
31{
32 stop_boot_trace();
33}
34
35static void boot_trace_init(struct trace_array *tr)
36{
37 int cpu;
38 boot_trace = tr;
39
40 trace_boot_enabled = 0;
41
42 for_each_cpu_mask(cpu, cpu_possible_map)
43 tracing_reset(tr, cpu);
44}
45
46static void boot_trace_ctrl_update(struct trace_array *tr)
47{
48 if (tr->ctrl)
49 start_boot_trace();
50 else
51 stop_boot_trace();
52}
53
54static enum print_line_t initcall_print_line(struct trace_iterator *iter)
55{
56 int ret;
57 struct trace_entry *entry = iter->ent;
58 struct trace_boot *field = (struct trace_boot *)entry;
59 struct boot_trace *it = &field->initcall;
60 struct trace_seq *s = &iter->seq;
61 struct timespec calltime = ktime_to_timespec(it->calltime);
62 struct timespec rettime = ktime_to_timespec(it->rettime);
63
64 if (entry->type == TRACE_BOOT) {
65 ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n",
66 calltime.tv_sec,
67 calltime.tv_nsec,
68 it->func, it->caller);
69 if (!ret)
70 return TRACE_TYPE_PARTIAL_LINE;
71
72 ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
73 "returned %d after %lld msecs\n",
74 rettime.tv_sec,
75 rettime.tv_nsec,
76 it->func, it->result, it->duration);
77
78 if (!ret)
79 return TRACE_TYPE_PARTIAL_LINE;
80 return TRACE_TYPE_HANDLED;
81 }
82 return TRACE_TYPE_UNHANDLED;
83}
84
85struct tracer boot_tracer __read_mostly =
86{
87 .name = "initcall",
88 .init = boot_trace_init,
89 .reset = reset_boot_trace,
90 .ctrl_update = boot_trace_ctrl_update,
91 .print_line = initcall_print_line,
92};
93
94void trace_boot(struct boot_trace *it, initcall_t fn)
95{
96 struct ring_buffer_event *event;
97 struct trace_boot *entry;
98 struct trace_array_cpu *data;
99 unsigned long irq_flags;
100 struct trace_array *tr = boot_trace;
101
102 if (!trace_boot_enabled)
103 return;
104
105 /* Get its name now since this function could
106 * disappear because it is in the .init section.
107 */
108 sprint_symbol(it->func, (unsigned long)fn);
109 preempt_disable();
110 data = tr->data[smp_processor_id()];
111
112 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
113 &irq_flags);
114 if (!event)
115 goto out;
116 entry = ring_buffer_event_data(event);
117 tracing_generic_entry_update(&entry->ent, 0, 0);
118 entry->ent.type = TRACE_BOOT;
119 entry->initcall = *it;
120 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
121
122 trace_wake_up();
123
124 out:
125 preempt_enable();
126}
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 312144897970..e90eb0c2c56c 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -23,7 +23,7 @@ static void function_reset(struct trace_array *tr)
23 tr->time_start = ftrace_now(tr->cpu); 23 tr->time_start = ftrace_now(tr->cpu);
24 24
25 for_each_online_cpu(cpu) 25 for_each_online_cpu(cpu)
26 tracing_reset(tr->data[cpu]); 26 tracing_reset(tr, cpu);
27} 27}
28 28
29static void start_function_trace(struct trace_array *tr) 29static void start_function_trace(struct trace_array *tr)
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index ece6cfb649fa..a7db7f040ae0 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -95,7 +95,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
95 disabled = atomic_inc_return(&data->disabled); 95 disabled = atomic_inc_return(&data->disabled);
96 96
97 if (likely(disabled == 1)) 97 if (likely(disabled == 1))
98 trace_function(tr, data, ip, parent_ip, flags); 98 trace_function(tr, data, ip, parent_ip, flags, preempt_count());
99 99
100 atomic_dec(&data->disabled); 100 atomic_dec(&data->disabled);
101} 101}
@@ -130,6 +130,7 @@ check_critical_timing(struct trace_array *tr,
130 unsigned long latency, t0, t1; 130 unsigned long latency, t0, t1;
131 cycle_t T0, T1, delta; 131 cycle_t T0, T1, delta;
132 unsigned long flags; 132 unsigned long flags;
133 int pc;
133 134
134 /* 135 /*
135 * usecs conversion is slow so we try to delay the conversion 136 * usecs conversion is slow so we try to delay the conversion
@@ -141,6 +142,8 @@ check_critical_timing(struct trace_array *tr,
141 142
142 local_save_flags(flags); 143 local_save_flags(flags);
143 144
145 pc = preempt_count();
146
144 if (!report_latency(delta)) 147 if (!report_latency(delta))
145 goto out; 148 goto out;
146 149
@@ -150,7 +153,7 @@ check_critical_timing(struct trace_array *tr,
150 if (!report_latency(delta)) 153 if (!report_latency(delta))
151 goto out_unlock; 154 goto out_unlock;
152 155
153 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags); 156 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc);
154 157
155 latency = nsecs_to_usecs(delta); 158 latency = nsecs_to_usecs(delta);
156 159
@@ -173,8 +176,8 @@ out_unlock:
173out: 176out:
174 data->critical_sequence = max_sequence; 177 data->critical_sequence = max_sequence;
175 data->preempt_timestamp = ftrace_now(cpu); 178 data->preempt_timestamp = ftrace_now(cpu);
176 tracing_reset(data); 179 tracing_reset(tr, cpu);
177 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags); 180 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc);
178} 181}
179 182
180static inline void 183static inline void
@@ -203,11 +206,11 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
203 data->critical_sequence = max_sequence; 206 data->critical_sequence = max_sequence;
204 data->preempt_timestamp = ftrace_now(cpu); 207 data->preempt_timestamp = ftrace_now(cpu);
205 data->critical_start = parent_ip ? : ip; 208 data->critical_start = parent_ip ? : ip;
206 tracing_reset(data); 209 tracing_reset(tr, cpu);
207 210
208 local_save_flags(flags); 211 local_save_flags(flags);
209 212
210 trace_function(tr, data, ip, parent_ip, flags); 213 trace_function(tr, data, ip, parent_ip, flags, preempt_count());
211 214
212 per_cpu(tracing_cpu, cpu) = 1; 215 per_cpu(tracing_cpu, cpu) = 1;
213 216
@@ -234,14 +237,14 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
234 237
235 data = tr->data[cpu]; 238 data = tr->data[cpu];
236 239
237 if (unlikely(!data) || unlikely(!head_page(data)) || 240 if (unlikely(!data) ||
238 !data->critical_start || atomic_read(&data->disabled)) 241 !data->critical_start || atomic_read(&data->disabled))
239 return; 242 return;
240 243
241 atomic_inc(&data->disabled); 244 atomic_inc(&data->disabled);
242 245
243 local_save_flags(flags); 246 local_save_flags(flags);
244 trace_function(tr, data, ip, parent_ip, flags); 247 trace_function(tr, data, ip, parent_ip, flags, preempt_count());
245 check_critical_timing(tr, data, parent_ip ? : ip, cpu); 248 check_critical_timing(tr, data, parent_ip ? : ip, cpu);
246 data->critical_start = 0; 249 data->critical_start = 0;
247 atomic_dec(&data->disabled); 250 atomic_dec(&data->disabled);
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index b13dc19dcbb4..f28484618ff0 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -27,7 +27,7 @@ static void mmio_reset_data(struct trace_array *tr)
27 tr->time_start = ftrace_now(tr->cpu); 27 tr->time_start = ftrace_now(tr->cpu);
28 28
29 for_each_online_cpu(cpu) 29 for_each_online_cpu(cpu)
30 tracing_reset(tr->data[cpu]); 30 tracing_reset(tr, cpu);
31} 31}
32 32
33static void mmio_trace_init(struct trace_array *tr) 33static void mmio_trace_init(struct trace_array *tr)
@@ -130,10 +130,14 @@ static unsigned long count_overruns(struct trace_iterator *iter)
130{ 130{
131 int cpu; 131 int cpu;
132 unsigned long cnt = 0; 132 unsigned long cnt = 0;
133/* FIXME: */
134#if 0
133 for_each_online_cpu(cpu) { 135 for_each_online_cpu(cpu) {
134 cnt += iter->overrun[cpu]; 136 cnt += iter->overrun[cpu];
135 iter->overrun[cpu] = 0; 137 iter->overrun[cpu] = 0;
136 } 138 }
139#endif
140 (void)cpu;
137 return cnt; 141 return cnt;
138} 142}
139 143
@@ -171,17 +175,21 @@ print_out:
171 return (ret == -EBUSY) ? 0 : ret; 175 return (ret == -EBUSY) ? 0 : ret;
172} 176}
173 177
174static int mmio_print_rw(struct trace_iterator *iter) 178static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
175{ 179{
176 struct trace_entry *entry = iter->ent; 180 struct trace_entry *entry = iter->ent;
177 struct mmiotrace_rw *rw = &entry->mmiorw; 181 struct trace_mmiotrace_rw *field;
182 struct mmiotrace_rw *rw;
178 struct trace_seq *s = &iter->seq; 183 struct trace_seq *s = &iter->seq;
179 unsigned long long t = ns2usecs(entry->t); 184 unsigned long long t = ns2usecs(iter->ts);
180 unsigned long usec_rem = do_div(t, 1000000ULL); 185 unsigned long usec_rem = do_div(t, 1000000ULL);
181 unsigned secs = (unsigned long)t; 186 unsigned secs = (unsigned long)t;
182 int ret = 1; 187 int ret = 1;
183 188
184 switch (entry->mmiorw.opcode) { 189 trace_assign_type(field, entry);
190 rw = &field->rw;
191
192 switch (rw->opcode) {
185 case MMIO_READ: 193 case MMIO_READ:
186 ret = trace_seq_printf(s, 194 ret = trace_seq_printf(s,
187 "R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", 195 "R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
@@ -209,21 +217,25 @@ static int mmio_print_rw(struct trace_iterator *iter)
209 break; 217 break;
210 } 218 }
211 if (ret) 219 if (ret)
212 return 1; 220 return TRACE_TYPE_HANDLED;
213 return 0; 221 return TRACE_TYPE_PARTIAL_LINE;
214} 222}
215 223
216static int mmio_print_map(struct trace_iterator *iter) 224static enum print_line_t mmio_print_map(struct trace_iterator *iter)
217{ 225{
218 struct trace_entry *entry = iter->ent; 226 struct trace_entry *entry = iter->ent;
219 struct mmiotrace_map *m = &entry->mmiomap; 227 struct trace_mmiotrace_map *field;
228 struct mmiotrace_map *m;
220 struct trace_seq *s = &iter->seq; 229 struct trace_seq *s = &iter->seq;
221 unsigned long long t = ns2usecs(entry->t); 230 unsigned long long t = ns2usecs(iter->ts);
222 unsigned long usec_rem = do_div(t, 1000000ULL); 231 unsigned long usec_rem = do_div(t, 1000000ULL);
223 unsigned secs = (unsigned long)t; 232 unsigned secs = (unsigned long)t;
224 int ret = 1; 233 int ret;
225 234
226 switch (entry->mmiorw.opcode) { 235 trace_assign_type(field, entry);
236 m = &field->map;
237
238 switch (m->opcode) {
227 case MMIO_PROBE: 239 case MMIO_PROBE:
228 ret = trace_seq_printf(s, 240 ret = trace_seq_printf(s,
229 "MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n", 241 "MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
@@ -241,20 +253,43 @@ static int mmio_print_map(struct trace_iterator *iter)
241 break; 253 break;
242 } 254 }
243 if (ret) 255 if (ret)
244 return 1; 256 return TRACE_TYPE_HANDLED;
245 return 0; 257 return TRACE_TYPE_PARTIAL_LINE;
258}
259
260static enum print_line_t mmio_print_mark(struct trace_iterator *iter)
261{
262 struct trace_entry *entry = iter->ent;
263 struct print_entry *print = (struct print_entry *)entry;
264 const char *msg = print->buf;
265 struct trace_seq *s = &iter->seq;
266 unsigned long long t = ns2usecs(iter->ts);
267 unsigned long usec_rem = do_div(t, 1000000ULL);
268 unsigned secs = (unsigned long)t;
269 int ret;
270
271 /* The trailing newline must be in the message. */
272 ret = trace_seq_printf(s, "MARK %lu.%06lu %s", secs, usec_rem, msg);
273 if (!ret)
274 return TRACE_TYPE_PARTIAL_LINE;
275
276 if (entry->flags & TRACE_FLAG_CONT)
277 trace_seq_print_cont(s, iter);
278
279 return TRACE_TYPE_HANDLED;
246} 280}
247 281
248/* return 0 to abort printing without consuming current entry in pipe mode */ 282static enum print_line_t mmio_print_line(struct trace_iterator *iter)
249static int mmio_print_line(struct trace_iterator *iter)
250{ 283{
251 switch (iter->ent->type) { 284 switch (iter->ent->type) {
252 case TRACE_MMIO_RW: 285 case TRACE_MMIO_RW:
253 return mmio_print_rw(iter); 286 return mmio_print_rw(iter);
254 case TRACE_MMIO_MAP: 287 case TRACE_MMIO_MAP:
255 return mmio_print_map(iter); 288 return mmio_print_map(iter);
289 case TRACE_PRINT:
290 return mmio_print_mark(iter);
256 default: 291 default:
257 return 1; /* ignore unknown entries */ 292 return TRACE_TYPE_HANDLED; /* ignore unknown entries */
258 } 293 }
259} 294}
260 295
@@ -276,6 +311,27 @@ __init static int init_mmio_trace(void)
276} 311}
277device_initcall(init_mmio_trace); 312device_initcall(init_mmio_trace);
278 313
314static void __trace_mmiotrace_rw(struct trace_array *tr,
315 struct trace_array_cpu *data,
316 struct mmiotrace_rw *rw)
317{
318 struct ring_buffer_event *event;
319 struct trace_mmiotrace_rw *entry;
320 unsigned long irq_flags;
321
322 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
323 &irq_flags);
324 if (!event)
325 return;
326 entry = ring_buffer_event_data(event);
327 tracing_generic_entry_update(&entry->ent, 0, preempt_count());
328 entry->ent.type = TRACE_MMIO_RW;
329 entry->rw = *rw;
330 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
331
332 trace_wake_up();
333}
334
279void mmio_trace_rw(struct mmiotrace_rw *rw) 335void mmio_trace_rw(struct mmiotrace_rw *rw)
280{ 336{
281 struct trace_array *tr = mmio_trace_array; 337 struct trace_array *tr = mmio_trace_array;
@@ -283,6 +339,27 @@ void mmio_trace_rw(struct mmiotrace_rw *rw)
283 __trace_mmiotrace_rw(tr, data, rw); 339 __trace_mmiotrace_rw(tr, data, rw);
284} 340}
285 341
342static void __trace_mmiotrace_map(struct trace_array *tr,
343 struct trace_array_cpu *data,
344 struct mmiotrace_map *map)
345{
346 struct ring_buffer_event *event;
347 struct trace_mmiotrace_map *entry;
348 unsigned long irq_flags;
349
350 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
351 &irq_flags);
352 if (!event)
353 return;
354 entry = ring_buffer_event_data(event);
355 tracing_generic_entry_update(&entry->ent, 0, preempt_count());
356 entry->ent.type = TRACE_MMIO_MAP;
357 entry->map = *map;
358 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
359
360 trace_wake_up();
361}
362
286void mmio_trace_mapping(struct mmiotrace_map *map) 363void mmio_trace_mapping(struct mmiotrace_map *map)
287{ 364{
288 struct trace_array *tr = mmio_trace_array; 365 struct trace_array *tr = mmio_trace_array;
@@ -293,3 +370,8 @@ void mmio_trace_mapping(struct mmiotrace_map *map)
293 __trace_mmiotrace_map(tr, data, map); 370 __trace_mmiotrace_map(tr, data, map);
294 preempt_enable(); 371 preempt_enable();
295} 372}
373
374int mmio_trace_printk(const char *fmt, va_list args)
375{
376 return trace_vprintk(0, fmt, args);
377}
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
new file mode 100644
index 000000000000..4592b4862515
--- /dev/null
+++ b/kernel/trace/trace_nop.c
@@ -0,0 +1,64 @@
1/*
2 * nop tracer
3 *
4 * Copyright (C) 2008 Steven Noonan <steven@uplinklabs.net>
5 *
6 */
7
8#include <linux/module.h>
9#include <linux/fs.h>
10#include <linux/debugfs.h>
11#include <linux/ftrace.h>
12
13#include "trace.h"
14
15static struct trace_array *ctx_trace;
16
17static void start_nop_trace(struct trace_array *tr)
18{
19 /* Nothing to do! */
20}
21
22static void stop_nop_trace(struct trace_array *tr)
23{
24 /* Nothing to do! */
25}
26
27static void nop_trace_init(struct trace_array *tr)
28{
29 int cpu;
30 ctx_trace = tr;
31
32 for_each_online_cpu(cpu)
33 tracing_reset(tr, cpu);
34
35 if (tr->ctrl)
36 start_nop_trace(tr);
37}
38
39static void nop_trace_reset(struct trace_array *tr)
40{
41 if (tr->ctrl)
42 stop_nop_trace(tr);
43}
44
45static void nop_trace_ctrl_update(struct trace_array *tr)
46{
47 /* When starting a new trace, reset the buffers */
48 if (tr->ctrl)
49 start_nop_trace(tr);
50 else
51 stop_nop_trace(tr);
52}
53
54struct tracer nop_trace __read_mostly =
55{
56 .name = "nop",
57 .init = nop_trace_init,
58 .reset = nop_trace_reset,
59 .ctrl_update = nop_trace_ctrl_update,
60#ifdef CONFIG_FTRACE_SELFTEST
61 .selftest = trace_selftest_startup_nop,
62#endif
63};
64
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index cb817a209aa0..b8f56beb1a62 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -9,8 +9,8 @@
9#include <linux/debugfs.h> 9#include <linux/debugfs.h>
10#include <linux/kallsyms.h> 10#include <linux/kallsyms.h>
11#include <linux/uaccess.h> 11#include <linux/uaccess.h>
12#include <linux/marker.h>
13#include <linux/ftrace.h> 12#include <linux/ftrace.h>
13#include <trace/sched.h>
14 14
15#include "trace.h" 15#include "trace.h"
16 16
@@ -19,15 +19,16 @@ static int __read_mostly tracer_enabled;
19static atomic_t sched_ref; 19static atomic_t sched_ref;
20 20
21static void 21static void
22sched_switch_func(void *private, void *__rq, struct task_struct *prev, 22probe_sched_switch(struct rq *__rq, struct task_struct *prev,
23 struct task_struct *next) 23 struct task_struct *next)
24{ 24{
25 struct trace_array **ptr = private;
26 struct trace_array *tr = *ptr;
27 struct trace_array_cpu *data; 25 struct trace_array_cpu *data;
28 unsigned long flags; 26 unsigned long flags;
29 long disabled;
30 int cpu; 27 int cpu;
28 int pc;
29
30 if (!atomic_read(&sched_ref))
31 return;
31 32
32 tracing_record_cmdline(prev); 33 tracing_record_cmdline(prev);
33 tracing_record_cmdline(next); 34 tracing_record_cmdline(next);
@@ -35,97 +36,41 @@ sched_switch_func(void *private, void *__rq, struct task_struct *prev,
35 if (!tracer_enabled) 36 if (!tracer_enabled)
36 return; 37 return;
37 38
39 pc = preempt_count();
38 local_irq_save(flags); 40 local_irq_save(flags);
39 cpu = raw_smp_processor_id(); 41 cpu = raw_smp_processor_id();
40 data = tr->data[cpu]; 42 data = ctx_trace->data[cpu];
41 disabled = atomic_inc_return(&data->disabled);
42 43
43 if (likely(disabled == 1)) 44 if (likely(!atomic_read(&data->disabled)))
44 tracing_sched_switch_trace(tr, data, prev, next, flags); 45 tracing_sched_switch_trace(ctx_trace, data, prev, next, flags, pc);
45 46
46 atomic_dec(&data->disabled);
47 local_irq_restore(flags); 47 local_irq_restore(flags);
48} 48}
49 49
50static notrace void
51sched_switch_callback(void *probe_data, void *call_data,
52 const char *format, va_list *args)
53{
54 struct task_struct *prev;
55 struct task_struct *next;
56 struct rq *__rq;
57
58 if (!atomic_read(&sched_ref))
59 return;
60
61 /* skip prev_pid %d next_pid %d prev_state %ld */
62 (void)va_arg(*args, int);
63 (void)va_arg(*args, int);
64 (void)va_arg(*args, long);
65 __rq = va_arg(*args, typeof(__rq));
66 prev = va_arg(*args, typeof(prev));
67 next = va_arg(*args, typeof(next));
68
69 /*
70 * If tracer_switch_func only points to the local
71 * switch func, it still needs the ptr passed to it.
72 */
73 sched_switch_func(probe_data, __rq, prev, next);
74}
75
76static void 50static void
77wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct 51probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee)
78 task_struct *curr)
79{ 52{
80 struct trace_array **ptr = private;
81 struct trace_array *tr = *ptr;
82 struct trace_array_cpu *data; 53 struct trace_array_cpu *data;
83 unsigned long flags; 54 unsigned long flags;
84 long disabled; 55 int cpu, pc;
85 int cpu;
86 56
87 if (!tracer_enabled) 57 if (!likely(tracer_enabled))
88 return; 58 return;
89 59
90 tracing_record_cmdline(curr); 60 pc = preempt_count();
61 tracing_record_cmdline(current);
91 62
92 local_irq_save(flags); 63 local_irq_save(flags);
93 cpu = raw_smp_processor_id(); 64 cpu = raw_smp_processor_id();
94 data = tr->data[cpu]; 65 data = ctx_trace->data[cpu];
95 disabled = atomic_inc_return(&data->disabled);
96 66
97 if (likely(disabled == 1)) 67 if (likely(!atomic_read(&data->disabled)))
98 tracing_sched_wakeup_trace(tr, data, wakee, curr, flags); 68 tracing_sched_wakeup_trace(ctx_trace, data, wakee, current,
69 flags, pc);
99 70
100 atomic_dec(&data->disabled);
101 local_irq_restore(flags); 71 local_irq_restore(flags);
102} 72}
103 73
104static notrace void
105wake_up_callback(void *probe_data, void *call_data,
106 const char *format, va_list *args)
107{
108 struct task_struct *curr;
109 struct task_struct *task;
110 struct rq *__rq;
111
112 if (likely(!tracer_enabled))
113 return;
114
115 /* Skip pid %d state %ld */
116 (void)va_arg(*args, int);
117 (void)va_arg(*args, long);
118 /* now get the meat: "rq %p task %p rq->curr %p" */
119 __rq = va_arg(*args, typeof(__rq));
120 task = va_arg(*args, typeof(task));
121 curr = va_arg(*args, typeof(curr));
122
123 tracing_record_cmdline(task);
124 tracing_record_cmdline(curr);
125
126 wakeup_func(probe_data, __rq, task, curr);
127}
128
129static void sched_switch_reset(struct trace_array *tr) 74static void sched_switch_reset(struct trace_array *tr)
130{ 75{
131 int cpu; 76 int cpu;
@@ -133,67 +78,47 @@ static void sched_switch_reset(struct trace_array *tr)
133 tr->time_start = ftrace_now(tr->cpu); 78 tr->time_start = ftrace_now(tr->cpu);
134 79
135 for_each_online_cpu(cpu) 80 for_each_online_cpu(cpu)
136 tracing_reset(tr->data[cpu]); 81 tracing_reset(tr, cpu);
137} 82}
138 83
139static int tracing_sched_register(void) 84static int tracing_sched_register(void)
140{ 85{
141 int ret; 86 int ret;
142 87
143 ret = marker_probe_register("kernel_sched_wakeup", 88 ret = register_trace_sched_wakeup(probe_sched_wakeup);
144 "pid %d state %ld ## rq %p task %p rq->curr %p",
145 wake_up_callback,
146 &ctx_trace);
147 if (ret) { 89 if (ret) {
148 pr_info("wakeup trace: Couldn't add marker" 90 pr_info("wakeup trace: Couldn't activate tracepoint"
149 " probe to kernel_sched_wakeup\n"); 91 " probe to kernel_sched_wakeup\n");
150 return ret; 92 return ret;
151 } 93 }
152 94
153 ret = marker_probe_register("kernel_sched_wakeup_new", 95 ret = register_trace_sched_wakeup_new(probe_sched_wakeup);
154 "pid %d state %ld ## rq %p task %p rq->curr %p",
155 wake_up_callback,
156 &ctx_trace);
157 if (ret) { 96 if (ret) {
158 pr_info("wakeup trace: Couldn't add marker" 97 pr_info("wakeup trace: Couldn't activate tracepoint"
159 " probe to kernel_sched_wakeup_new\n"); 98 " probe to kernel_sched_wakeup_new\n");
160 goto fail_deprobe; 99 goto fail_deprobe;
161 } 100 }
162 101
163 ret = marker_probe_register("kernel_sched_schedule", 102 ret = register_trace_sched_switch(probe_sched_switch);
164 "prev_pid %d next_pid %d prev_state %ld "
165 "## rq %p prev %p next %p",
166 sched_switch_callback,
167 &ctx_trace);
168 if (ret) { 103 if (ret) {
169 pr_info("sched trace: Couldn't add marker" 104 pr_info("sched trace: Couldn't activate tracepoint"
170 " probe to kernel_sched_schedule\n"); 105 " probe to kernel_sched_schedule\n");
171 goto fail_deprobe_wake_new; 106 goto fail_deprobe_wake_new;
172 } 107 }
173 108
174 return ret; 109 return ret;
175fail_deprobe_wake_new: 110fail_deprobe_wake_new:
176 marker_probe_unregister("kernel_sched_wakeup_new", 111 unregister_trace_sched_wakeup_new(probe_sched_wakeup);
177 wake_up_callback,
178 &ctx_trace);
179fail_deprobe: 112fail_deprobe:
180 marker_probe_unregister("kernel_sched_wakeup", 113 unregister_trace_sched_wakeup(probe_sched_wakeup);
181 wake_up_callback,
182 &ctx_trace);
183 return ret; 114 return ret;
184} 115}
185 116
186static void tracing_sched_unregister(void) 117static void tracing_sched_unregister(void)
187{ 118{
188 marker_probe_unregister("kernel_sched_schedule", 119 unregister_trace_sched_switch(probe_sched_switch);
189 sched_switch_callback, 120 unregister_trace_sched_wakeup_new(probe_sched_wakeup);
190 &ctx_trace); 121 unregister_trace_sched_wakeup(probe_sched_wakeup);
191 marker_probe_unregister("kernel_sched_wakeup_new",
192 wake_up_callback,
193 &ctx_trace);
194 marker_probe_unregister("kernel_sched_wakeup",
195 wake_up_callback,
196 &ctx_trace);
197} 122}
198 123
199static void tracing_start_sched_switch(void) 124static void tracing_start_sched_switch(void)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index e303ccb62cdf..fe4a252c2363 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -15,7 +15,7 @@
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/uaccess.h> 16#include <linux/uaccess.h>
17#include <linux/ftrace.h> 17#include <linux/ftrace.h>
18#include <linux/marker.h> 18#include <trace/sched.h>
19 19
20#include "trace.h" 20#include "trace.h"
21 21
@@ -44,10 +44,12 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
44 long disabled; 44 long disabled;
45 int resched; 45 int resched;
46 int cpu; 46 int cpu;
47 int pc;
47 48
48 if (likely(!wakeup_task)) 49 if (likely(!wakeup_task))
49 return; 50 return;
50 51
52 pc = preempt_count();
51 resched = need_resched(); 53 resched = need_resched();
52 preempt_disable_notrace(); 54 preempt_disable_notrace();
53 55
@@ -70,7 +72,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
70 if (task_cpu(wakeup_task) != cpu) 72 if (task_cpu(wakeup_task) != cpu)
71 goto unlock; 73 goto unlock;
72 74
73 trace_function(tr, data, ip, parent_ip, flags); 75 trace_function(tr, data, ip, parent_ip, flags, pc);
74 76
75 unlock: 77 unlock:
76 __raw_spin_unlock(&wakeup_lock); 78 __raw_spin_unlock(&wakeup_lock);
@@ -112,17 +114,18 @@ static int report_latency(cycle_t delta)
112} 114}
113 115
114static void notrace 116static void notrace
115wakeup_sched_switch(void *private, void *rq, struct task_struct *prev, 117probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
116 struct task_struct *next) 118 struct task_struct *next)
117{ 119{
118 unsigned long latency = 0, t0 = 0, t1 = 0; 120 unsigned long latency = 0, t0 = 0, t1 = 0;
119 struct trace_array **ptr = private;
120 struct trace_array *tr = *ptr;
121 struct trace_array_cpu *data; 121 struct trace_array_cpu *data;
122 cycle_t T0, T1, delta; 122 cycle_t T0, T1, delta;
123 unsigned long flags; 123 unsigned long flags;
124 long disabled; 124 long disabled;
125 int cpu; 125 int cpu;
126 int pc;
127
128 tracing_record_cmdline(prev);
126 129
127 if (unlikely(!tracer_enabled)) 130 if (unlikely(!tracer_enabled))
128 return; 131 return;
@@ -139,12 +142,14 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
139 if (next != wakeup_task) 142 if (next != wakeup_task)
140 return; 143 return;
141 144
145 pc = preempt_count();
146
142 /* The task we are waiting for is waking up */ 147 /* The task we are waiting for is waking up */
143 data = tr->data[wakeup_cpu]; 148 data = wakeup_trace->data[wakeup_cpu];
144 149
145 /* disable local data, not wakeup_cpu data */ 150 /* disable local data, not wakeup_cpu data */
146 cpu = raw_smp_processor_id(); 151 cpu = raw_smp_processor_id();
147 disabled = atomic_inc_return(&tr->data[cpu]->disabled); 152 disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
148 if (likely(disabled != 1)) 153 if (likely(disabled != 1))
149 goto out; 154 goto out;
150 155
@@ -155,7 +160,7 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
155 if (unlikely(!tracer_enabled || next != wakeup_task)) 160 if (unlikely(!tracer_enabled || next != wakeup_task))
156 goto out_unlock; 161 goto out_unlock;
157 162
158 trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags); 163 trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
159 164
160 /* 165 /*
161 * usecs conversion is slow so we try to delay the conversion 166 * usecs conversion is slow so we try to delay the conversion
@@ -174,39 +179,14 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
174 t0 = nsecs_to_usecs(T0); 179 t0 = nsecs_to_usecs(T0);
175 t1 = nsecs_to_usecs(T1); 180 t1 = nsecs_to_usecs(T1);
176 181
177 update_max_tr(tr, wakeup_task, wakeup_cpu); 182 update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
178 183
179out_unlock: 184out_unlock:
180 __wakeup_reset(tr); 185 __wakeup_reset(wakeup_trace);
181 __raw_spin_unlock(&wakeup_lock); 186 __raw_spin_unlock(&wakeup_lock);
182 local_irq_restore(flags); 187 local_irq_restore(flags);
183out: 188out:
184 atomic_dec(&tr->data[cpu]->disabled); 189 atomic_dec(&wakeup_trace->data[cpu]->disabled);
185}
186
187static notrace void
188sched_switch_callback(void *probe_data, void *call_data,
189 const char *format, va_list *args)
190{
191 struct task_struct *prev;
192 struct task_struct *next;
193 struct rq *__rq;
194
195 /* skip prev_pid %d next_pid %d prev_state %ld */
196 (void)va_arg(*args, int);
197 (void)va_arg(*args, int);
198 (void)va_arg(*args, long);
199 __rq = va_arg(*args, typeof(__rq));
200 prev = va_arg(*args, typeof(prev));
201 next = va_arg(*args, typeof(next));
202
203 tracing_record_cmdline(prev);
204
205 /*
206 * If tracer_switch_func only points to the local
207 * switch func, it still needs the ptr passed to it.
208 */
209 wakeup_sched_switch(probe_data, __rq, prev, next);
210} 190}
211 191
212static void __wakeup_reset(struct trace_array *tr) 192static void __wakeup_reset(struct trace_array *tr)
@@ -216,7 +196,7 @@ static void __wakeup_reset(struct trace_array *tr)
216 196
217 for_each_possible_cpu(cpu) { 197 for_each_possible_cpu(cpu) {
218 data = tr->data[cpu]; 198 data = tr->data[cpu];
219 tracing_reset(data); 199 tracing_reset(tr, cpu);
220 } 200 }
221 201
222 wakeup_cpu = -1; 202 wakeup_cpu = -1;
@@ -240,19 +220,26 @@ static void wakeup_reset(struct trace_array *tr)
240} 220}
241 221
242static void 222static void
243wakeup_check_start(struct trace_array *tr, struct task_struct *p, 223probe_wakeup(struct rq *rq, struct task_struct *p)
244 struct task_struct *curr)
245{ 224{
246 int cpu = smp_processor_id(); 225 int cpu = smp_processor_id();
247 unsigned long flags; 226 unsigned long flags;
248 long disabled; 227 long disabled;
228 int pc;
229
230 if (likely(!tracer_enabled))
231 return;
232
233 tracing_record_cmdline(p);
234 tracing_record_cmdline(current);
249 235
250 if (likely(!rt_task(p)) || 236 if (likely(!rt_task(p)) ||
251 p->prio >= wakeup_prio || 237 p->prio >= wakeup_prio ||
252 p->prio >= curr->prio) 238 p->prio >= current->prio)
253 return; 239 return;
254 240
255 disabled = atomic_inc_return(&tr->data[cpu]->disabled); 241 pc = preempt_count();
242 disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
256 if (unlikely(disabled != 1)) 243 if (unlikely(disabled != 1))
257 goto out; 244 goto out;
258 245
@@ -264,7 +251,7 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p,
264 goto out_locked; 251 goto out_locked;
265 252
266 /* reset the trace */ 253 /* reset the trace */
267 __wakeup_reset(tr); 254 __wakeup_reset(wakeup_trace);
268 255
269 wakeup_cpu = task_cpu(p); 256 wakeup_cpu = task_cpu(p);
270 wakeup_prio = p->prio; 257 wakeup_prio = p->prio;
@@ -274,74 +261,37 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p,
274 261
275 local_save_flags(flags); 262 local_save_flags(flags);
276 263
277 tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu); 264 wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
278 trace_function(tr, tr->data[wakeup_cpu], 265 trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu],
279 CALLER_ADDR1, CALLER_ADDR2, flags); 266 CALLER_ADDR1, CALLER_ADDR2, flags, pc);
280 267
281out_locked: 268out_locked:
282 __raw_spin_unlock(&wakeup_lock); 269 __raw_spin_unlock(&wakeup_lock);
283out: 270out:
284 atomic_dec(&tr->data[cpu]->disabled); 271 atomic_dec(&wakeup_trace->data[cpu]->disabled);
285}
286
287static notrace void
288wake_up_callback(void *probe_data, void *call_data,
289 const char *format, va_list *args)
290{
291 struct trace_array **ptr = probe_data;
292 struct trace_array *tr = *ptr;
293 struct task_struct *curr;
294 struct task_struct *task;
295 struct rq *__rq;
296
297 if (likely(!tracer_enabled))
298 return;
299
300 /* Skip pid %d state %ld */
301 (void)va_arg(*args, int);
302 (void)va_arg(*args, long);
303 /* now get the meat: "rq %p task %p rq->curr %p" */
304 __rq = va_arg(*args, typeof(__rq));
305 task = va_arg(*args, typeof(task));
306 curr = va_arg(*args, typeof(curr));
307
308 tracing_record_cmdline(task);
309 tracing_record_cmdline(curr);
310
311 wakeup_check_start(tr, task, curr);
312} 272}
313 273
314static void start_wakeup_tracer(struct trace_array *tr) 274static void start_wakeup_tracer(struct trace_array *tr)
315{ 275{
316 int ret; 276 int ret;
317 277
318 ret = marker_probe_register("kernel_sched_wakeup", 278 ret = register_trace_sched_wakeup(probe_wakeup);
319 "pid %d state %ld ## rq %p task %p rq->curr %p",
320 wake_up_callback,
321 &wakeup_trace);
322 if (ret) { 279 if (ret) {
323 pr_info("wakeup trace: Couldn't add marker" 280 pr_info("wakeup trace: Couldn't activate tracepoint"
324 " probe to kernel_sched_wakeup\n"); 281 " probe to kernel_sched_wakeup\n");
325 return; 282 return;
326 } 283 }
327 284
328 ret = marker_probe_register("kernel_sched_wakeup_new", 285 ret = register_trace_sched_wakeup_new(probe_wakeup);
329 "pid %d state %ld ## rq %p task %p rq->curr %p",
330 wake_up_callback,
331 &wakeup_trace);
332 if (ret) { 286 if (ret) {
333 pr_info("wakeup trace: Couldn't add marker" 287 pr_info("wakeup trace: Couldn't activate tracepoint"
334 " probe to kernel_sched_wakeup_new\n"); 288 " probe to kernel_sched_wakeup_new\n");
335 goto fail_deprobe; 289 goto fail_deprobe;
336 } 290 }
337 291
338 ret = marker_probe_register("kernel_sched_schedule", 292 ret = register_trace_sched_switch(probe_wakeup_sched_switch);
339 "prev_pid %d next_pid %d prev_state %ld "
340 "## rq %p prev %p next %p",
341 sched_switch_callback,
342 &wakeup_trace);
343 if (ret) { 293 if (ret) {
344 pr_info("sched trace: Couldn't add marker" 294 pr_info("sched trace: Couldn't activate tracepoint"
345 " probe to kernel_sched_schedule\n"); 295 " probe to kernel_sched_schedule\n");
346 goto fail_deprobe_wake_new; 296 goto fail_deprobe_wake_new;
347 } 297 }
@@ -363,28 +313,18 @@ static void start_wakeup_tracer(struct trace_array *tr)
363 313
364 return; 314 return;
365fail_deprobe_wake_new: 315fail_deprobe_wake_new:
366 marker_probe_unregister("kernel_sched_wakeup_new", 316 unregister_trace_sched_wakeup_new(probe_wakeup);
367 wake_up_callback,
368 &wakeup_trace);
369fail_deprobe: 317fail_deprobe:
370 marker_probe_unregister("kernel_sched_wakeup", 318 unregister_trace_sched_wakeup(probe_wakeup);
371 wake_up_callback,
372 &wakeup_trace);
373} 319}
374 320
375static void stop_wakeup_tracer(struct trace_array *tr) 321static void stop_wakeup_tracer(struct trace_array *tr)
376{ 322{
377 tracer_enabled = 0; 323 tracer_enabled = 0;
378 unregister_ftrace_function(&trace_ops); 324 unregister_ftrace_function(&trace_ops);
379 marker_probe_unregister("kernel_sched_schedule", 325 unregister_trace_sched_switch(probe_wakeup_sched_switch);
380 sched_switch_callback, 326 unregister_trace_sched_wakeup_new(probe_wakeup);
381 &wakeup_trace); 327 unregister_trace_sched_wakeup(probe_wakeup);
382 marker_probe_unregister("kernel_sched_wakeup_new",
383 wake_up_callback,
384 &wakeup_trace);
385 marker_probe_unregister("kernel_sched_wakeup",
386 wake_up_callback,
387 &wakeup_trace);
388} 328}
389 329
390static void wakeup_tracer_init(struct trace_array *tr) 330static void wakeup_tracer_init(struct trace_array *tr)
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 0911b7e073bf..09cf230d7eca 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -9,65 +9,29 @@ static inline int trace_valid_entry(struct trace_entry *entry)
9 case TRACE_FN: 9 case TRACE_FN:
10 case TRACE_CTX: 10 case TRACE_CTX:
11 case TRACE_WAKE: 11 case TRACE_WAKE:
12 case TRACE_CONT:
12 case TRACE_STACK: 13 case TRACE_STACK:
14 case TRACE_PRINT:
13 case TRACE_SPECIAL: 15 case TRACE_SPECIAL:
14 return 1; 16 return 1;
15 } 17 }
16 return 0; 18 return 0;
17} 19}
18 20
19static int 21static int trace_test_buffer_cpu(struct trace_array *tr, int cpu)
20trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data)
21{ 22{
22 struct trace_entry *entries; 23 struct ring_buffer_event *event;
23 struct page *page; 24 struct trace_entry *entry;
24 int idx = 0;
25 int i;
26 25
27 BUG_ON(list_empty(&data->trace_pages)); 26 while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) {
28 page = list_entry(data->trace_pages.next, struct page, lru); 27 entry = ring_buffer_event_data(event);
29 entries = page_address(page);
30 28
31 check_pages(data); 29 if (!trace_valid_entry(entry)) {
32 if (head_page(data) != entries)
33 goto failed;
34
35 /*
36 * The starting trace buffer always has valid elements,
37 * if any element exists.
38 */
39 entries = head_page(data);
40
41 for (i = 0; i < tr->entries; i++) {
42
43 if (i < data->trace_idx && !trace_valid_entry(&entries[idx])) {
44 printk(KERN_CONT ".. invalid entry %d ", 30 printk(KERN_CONT ".. invalid entry %d ",
45 entries[idx].type); 31 entry->type);
46 goto failed; 32 goto failed;
47 } 33 }
48
49 idx++;
50 if (idx >= ENTRIES_PER_PAGE) {
51 page = virt_to_page(entries);
52 if (page->lru.next == &data->trace_pages) {
53 if (i != tr->entries - 1) {
54 printk(KERN_CONT ".. entries buffer mismatch");
55 goto failed;
56 }
57 } else {
58 page = list_entry(page->lru.next, struct page, lru);
59 entries = page_address(page);
60 }
61 idx = 0;
62 }
63 } 34 }
64
65 page = virt_to_page(entries);
66 if (page->lru.next != &data->trace_pages) {
67 printk(KERN_CONT ".. too many entries");
68 goto failed;
69 }
70
71 return 0; 35 return 0;
72 36
73 failed: 37 failed:
@@ -89,13 +53,11 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
89 /* Don't allow flipping of max traces now */ 53 /* Don't allow flipping of max traces now */
90 raw_local_irq_save(flags); 54 raw_local_irq_save(flags);
91 __raw_spin_lock(&ftrace_max_lock); 55 __raw_spin_lock(&ftrace_max_lock);
92 for_each_possible_cpu(cpu) {
93 if (!head_page(tr->data[cpu]))
94 continue;
95 56
96 cnt += tr->data[cpu]->trace_idx; 57 cnt = ring_buffer_entries(tr->buffer);
97 58
98 ret = trace_test_buffer_cpu(tr, tr->data[cpu]); 59 for_each_possible_cpu(cpu) {
60 ret = trace_test_buffer_cpu(tr, cpu);
99 if (ret) 61 if (ret)
100 break; 62 break;
101 } 63 }
@@ -120,11 +82,11 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
120 struct trace_array *tr, 82 struct trace_array *tr,
121 int (*func)(void)) 83 int (*func)(void))
122{ 84{
123 unsigned long count;
124 int ret;
125 int save_ftrace_enabled = ftrace_enabled; 85 int save_ftrace_enabled = ftrace_enabled;
126 int save_tracer_enabled = tracer_enabled; 86 int save_tracer_enabled = tracer_enabled;
87 unsigned long count;
127 char *func_name; 88 char *func_name;
89 int ret;
128 90
129 /* The ftrace test PASSED */ 91 /* The ftrace test PASSED */
130 printk(KERN_CONT "PASSED\n"); 92 printk(KERN_CONT "PASSED\n");
@@ -157,6 +119,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
157 /* enable tracing */ 119 /* enable tracing */
158 tr->ctrl = 1; 120 tr->ctrl = 1;
159 trace->init(tr); 121 trace->init(tr);
122
160 /* Sleep for a 1/10 of a second */ 123 /* Sleep for a 1/10 of a second */
161 msleep(100); 124 msleep(100);
162 125
@@ -212,10 +175,10 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
212int 175int
213trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) 176trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
214{ 177{
215 unsigned long count;
216 int ret;
217 int save_ftrace_enabled = ftrace_enabled; 178 int save_ftrace_enabled = ftrace_enabled;
218 int save_tracer_enabled = tracer_enabled; 179 int save_tracer_enabled = tracer_enabled;
180 unsigned long count;
181 int ret;
219 182
220 /* make sure msleep has been recorded */ 183 /* make sure msleep has been recorded */
221 msleep(1); 184 msleep(1);
@@ -415,6 +378,15 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
415} 378}
416#endif /* CONFIG_IRQSOFF_TRACER && CONFIG_PREEMPT_TRACER */ 379#endif /* CONFIG_IRQSOFF_TRACER && CONFIG_PREEMPT_TRACER */
417 380
381#ifdef CONFIG_NOP_TRACER
382int
383trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr)
384{
385 /* What could possibly go wrong? */
386 return 0;
387}
388#endif
389
418#ifdef CONFIG_SCHED_TRACER 390#ifdef CONFIG_SCHED_TRACER
419static int trace_wakeup_test_thread(void *data) 391static int trace_wakeup_test_thread(void *data)
420{ 392{
@@ -486,6 +458,9 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
486 458
487 wake_up_process(p); 459 wake_up_process(p);
488 460
461 /* give a little time to let the thread wake up */
462 msleep(100);
463
489 /* stop the tracing. */ 464 /* stop the tracing. */
490 tr->ctrl = 0; 465 tr->ctrl = 0;
491 trace->ctrl_update(tr); 466 trace->ctrl_update(tr);
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
new file mode 100644
index 000000000000..74c5d9a3afae
--- /dev/null
+++ b/kernel/trace/trace_stack.c
@@ -0,0 +1,310 @@
1/*
2 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
3 *
4 */
5#include <linux/stacktrace.h>
6#include <linux/kallsyms.h>
7#include <linux/seq_file.h>
8#include <linux/spinlock.h>
9#include <linux/uaccess.h>
10#include <linux/debugfs.h>
11#include <linux/ftrace.h>
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/fs.h>
15#include "trace.h"
16
17#define STACK_TRACE_ENTRIES 500
18
19static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
20 { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
21static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
22
23static struct stack_trace max_stack_trace = {
24 .max_entries = STACK_TRACE_ENTRIES,
25 .entries = stack_dump_trace,
26};
27
28static unsigned long max_stack_size;
29static raw_spinlock_t max_stack_lock =
30 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
31
32static int stack_trace_disabled __read_mostly;
33static DEFINE_PER_CPU(int, trace_active);
34
35static inline void check_stack(void)
36{
37 unsigned long this_size, flags;
38 unsigned long *p, *top, *start;
39 int i;
40
41 this_size = ((unsigned long)&this_size) & (THREAD_SIZE-1);
42 this_size = THREAD_SIZE - this_size;
43
44 if (this_size <= max_stack_size)
45 return;
46
47 raw_local_irq_save(flags);
48 __raw_spin_lock(&max_stack_lock);
49
50 /* a race could have already updated it */
51 if (this_size <= max_stack_size)
52 goto out;
53
54 max_stack_size = this_size;
55
56 max_stack_trace.nr_entries = 0;
57 max_stack_trace.skip = 3;
58
59 save_stack_trace(&max_stack_trace);
60
61 /*
62 * Now find where in the stack these are.
63 */
64 i = 0;
65 start = &this_size;
66 top = (unsigned long *)
67 (((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
68
69 /*
70 * Loop through all the entries. One of the entries may
71 * for some reason be missed on the stack, so we may
72 * have to account for them. If they are all there, this
73 * loop will only happen once. This code only takes place
74 * on a new max, so it is far from a fast path.
75 */
76 while (i < max_stack_trace.nr_entries) {
77
78 stack_dump_index[i] = this_size;
79 p = start;
80
81 for (; p < top && i < max_stack_trace.nr_entries; p++) {
82 if (*p == stack_dump_trace[i]) {
83 this_size = stack_dump_index[i++] =
84 (top - p) * sizeof(unsigned long);
85 /* Start the search from here */
86 start = p + 1;
87 }
88 }
89
90 i++;
91 }
92
93 out:
94 __raw_spin_unlock(&max_stack_lock);
95 raw_local_irq_restore(flags);
96}
97
98static void
99stack_trace_call(unsigned long ip, unsigned long parent_ip)
100{
101 int cpu, resched;
102
103 if (unlikely(!ftrace_enabled || stack_trace_disabled))
104 return;
105
106 resched = need_resched();
107 preempt_disable_notrace();
108
109 cpu = raw_smp_processor_id();
110 /* no atomic needed, we only modify this variable by this cpu */
111 if (per_cpu(trace_active, cpu)++ != 0)
112 goto out;
113
114 check_stack();
115
116 out:
117 per_cpu(trace_active, cpu)--;
118 /* prevent recursion in schedule */
119 if (resched)
120 preempt_enable_no_resched_notrace();
121 else
122 preempt_enable_notrace();
123}
124
125static struct ftrace_ops trace_ops __read_mostly =
126{
127 .func = stack_trace_call,
128};
129
130static ssize_t
131stack_max_size_read(struct file *filp, char __user *ubuf,
132 size_t count, loff_t *ppos)
133{
134 unsigned long *ptr = filp->private_data;
135 char buf[64];
136 int r;
137
138 r = snprintf(buf, sizeof(buf), "%ld\n", *ptr);
139 if (r > sizeof(buf))
140 r = sizeof(buf);
141 return simple_read_from_buffer(ubuf, count, ppos, buf, r);
142}
143
144static ssize_t
145stack_max_size_write(struct file *filp, const char __user *ubuf,
146 size_t count, loff_t *ppos)
147{
148 long *ptr = filp->private_data;
149 unsigned long val, flags;
150 char buf[64];
151 int ret;
152
153 if (count >= sizeof(buf))
154 return -EINVAL;
155
156 if (copy_from_user(&buf, ubuf, count))
157 return -EFAULT;
158
159 buf[count] = 0;
160
161 ret = strict_strtoul(buf, 10, &val);
162 if (ret < 0)
163 return ret;
164
165 raw_local_irq_save(flags);
166 __raw_spin_lock(&max_stack_lock);
167 *ptr = val;
168 __raw_spin_unlock(&max_stack_lock);
169 raw_local_irq_restore(flags);
170
171 return count;
172}
173
174static struct file_operations stack_max_size_fops = {
175 .open = tracing_open_generic,
176 .read = stack_max_size_read,
177 .write = stack_max_size_write,
178};
179
180static void *
181t_next(struct seq_file *m, void *v, loff_t *pos)
182{
183 long i = (long)m->private;
184
185 (*pos)++;
186
187 i++;
188
189 if (i >= max_stack_trace.nr_entries ||
190 stack_dump_trace[i] == ULONG_MAX)
191 return NULL;
192
193 m->private = (void *)i;
194
195 return &m->private;
196}
197
198static void *t_start(struct seq_file *m, loff_t *pos)
199{
200 void *t = &m->private;
201 loff_t l = 0;
202
203 local_irq_disable();
204 __raw_spin_lock(&max_stack_lock);
205
206 for (; t && l < *pos; t = t_next(m, t, &l))
207 ;
208
209 return t;
210}
211
212static void t_stop(struct seq_file *m, void *p)
213{
214 __raw_spin_unlock(&max_stack_lock);
215 local_irq_enable();
216}
217
218static int trace_lookup_stack(struct seq_file *m, long i)
219{
220 unsigned long addr = stack_dump_trace[i];
221#ifdef CONFIG_KALLSYMS
222 char str[KSYM_SYMBOL_LEN];
223
224 sprint_symbol(str, addr);
225
226 return seq_printf(m, "%s\n", str);
227#else
228 return seq_printf(m, "%p\n", (void*)addr);
229#endif
230}
231
232static int t_show(struct seq_file *m, void *v)
233{
234 long i = *(long *)v;
235 int size;
236
237 if (i < 0) {
238 seq_printf(m, " Depth Size Location"
239 " (%d entries)\n"
240 " ----- ---- --------\n",
241 max_stack_trace.nr_entries);
242 return 0;
243 }
244
245 if (i >= max_stack_trace.nr_entries ||
246 stack_dump_trace[i] == ULONG_MAX)
247 return 0;
248
249 if (i+1 == max_stack_trace.nr_entries ||
250 stack_dump_trace[i+1] == ULONG_MAX)
251 size = stack_dump_index[i];
252 else
253 size = stack_dump_index[i] - stack_dump_index[i+1];
254
255 seq_printf(m, "%3ld) %8d %5d ", i, stack_dump_index[i], size);
256
257 trace_lookup_stack(m, i);
258
259 return 0;
260}
261
262static struct seq_operations stack_trace_seq_ops = {
263 .start = t_start,
264 .next = t_next,
265 .stop = t_stop,
266 .show = t_show,
267};
268
269static int stack_trace_open(struct inode *inode, struct file *file)
270{
271 int ret;
272
273 ret = seq_open(file, &stack_trace_seq_ops);
274 if (!ret) {
275 struct seq_file *m = file->private_data;
276 m->private = (void *)-1;
277 }
278
279 return ret;
280}
281
282static struct file_operations stack_trace_fops = {
283 .open = stack_trace_open,
284 .read = seq_read,
285 .llseek = seq_lseek,
286};
287
288static __init int stack_trace_init(void)
289{
290 struct dentry *d_tracer;
291 struct dentry *entry;
292
293 d_tracer = tracing_init_dentry();
294
295 entry = debugfs_create_file("stack_max_size", 0644, d_tracer,
296 &max_stack_size, &stack_max_size_fops);
297 if (!entry)
298 pr_warning("Could not create debugfs 'stack_max_size' entry\n");
299
300 entry = debugfs_create_file("stack_trace", 0444, d_tracer,
301 NULL, &stack_trace_fops);
302 if (!entry)
303 pr_warning("Could not create debugfs 'stack_trace' entry\n");
304
305 register_ftrace_function(&trace_ops);
306
307 return 0;
308}
309
310device_initcall(stack_trace_init);
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index db58fb66a135..9587d3bcba55 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -241,7 +241,7 @@ static void stack_reset(struct trace_array *tr)
241 tr->time_start = ftrace_now(tr->cpu); 241 tr->time_start = ftrace_now(tr->cpu);
242 242
243 for_each_online_cpu(cpu) 243 for_each_online_cpu(cpu)
244 tracing_reset(tr->data[cpu]); 244 tracing_reset(tr, cpu);
245} 245}
246 246
247static void start_stack_trace(struct trace_array *tr) 247static void start_stack_trace(struct trace_array *tr)
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
new file mode 100644
index 000000000000..f2b7c28a4708
--- /dev/null
+++ b/kernel/tracepoint.c
@@ -0,0 +1,477 @@
1/*
2 * Copyright (C) 2008 Mathieu Desnoyers
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 */
18#include <linux/module.h>
19#include <linux/mutex.h>
20#include <linux/types.h>
21#include <linux/jhash.h>
22#include <linux/list.h>
23#include <linux/rcupdate.h>
24#include <linux/tracepoint.h>
25#include <linux/err.h>
26#include <linux/slab.h>
27
28extern struct tracepoint __start___tracepoints[];
29extern struct tracepoint __stop___tracepoints[];
30
31/* Set to 1 to enable tracepoint debug output */
32static const int tracepoint_debug;
33
34/*
35 * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the
36 * builtin and module tracepoints and the hash table.
37 */
38static DEFINE_MUTEX(tracepoints_mutex);
39
40/*
41 * Tracepoint hash table, containing the active tracepoints.
42 * Protected by tracepoints_mutex.
43 */
44#define TRACEPOINT_HASH_BITS 6
45#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS)
46
47/*
48 * Note about RCU :
49 * It is used to to delay the free of multiple probes array until a quiescent
50 * state is reached.
51 * Tracepoint entries modifications are protected by the tracepoints_mutex.
52 */
53struct tracepoint_entry {
54 struct hlist_node hlist;
55 void **funcs;
56 int refcount; /* Number of times armed. 0 if disarmed. */
57 struct rcu_head rcu;
58 void *oldptr;
59 unsigned char rcu_pending:1;
60 char name[0];
61};
62
63static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
64
65static void free_old_closure(struct rcu_head *head)
66{
67 struct tracepoint_entry *entry = container_of(head,
68 struct tracepoint_entry, rcu);
69 kfree(entry->oldptr);
70 /* Make sure we free the data before setting the pending flag to 0 */
71 smp_wmb();
72 entry->rcu_pending = 0;
73}
74
75static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old)
76{
77 if (!old)
78 return;
79 entry->oldptr = old;
80 entry->rcu_pending = 1;
81 /* write rcu_pending before calling the RCU callback */
82 smp_wmb();
83 call_rcu_sched(&entry->rcu, free_old_closure);
84}
85
86static void debug_print_probes(struct tracepoint_entry *entry)
87{
88 int i;
89
90 if (!tracepoint_debug)
91 return;
92
93 for (i = 0; entry->funcs[i]; i++)
94 printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i]);
95}
96
97static void *
98tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe)
99{
100 int nr_probes = 0;
101 void **old, **new;
102
103 WARN_ON(!probe);
104
105 debug_print_probes(entry);
106 old = entry->funcs;
107 if (old) {
108 /* (N -> N+1), (N != 0, 1) probes */
109 for (nr_probes = 0; old[nr_probes]; nr_probes++)
110 if (old[nr_probes] == probe)
111 return ERR_PTR(-EEXIST);
112 }
113 /* + 2 : one for new probe, one for NULL func */
114 new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL);
115 if (new == NULL)
116 return ERR_PTR(-ENOMEM);
117 if (old)
118 memcpy(new, old, nr_probes * sizeof(void *));
119 new[nr_probes] = probe;
120 entry->refcount = nr_probes + 1;
121 entry->funcs = new;
122 debug_print_probes(entry);
123 return old;
124}
125
126static void *
127tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
128{
129 int nr_probes = 0, nr_del = 0, i;
130 void **old, **new;
131
132 old = entry->funcs;
133
134 debug_print_probes(entry);
135 /* (N -> M), (N > 1, M >= 0) probes */
136 for (nr_probes = 0; old[nr_probes]; nr_probes++) {
137 if ((!probe || old[nr_probes] == probe))
138 nr_del++;
139 }
140
141 if (nr_probes - nr_del == 0) {
142 /* N -> 0, (N > 1) */
143 entry->funcs = NULL;
144 entry->refcount = 0;
145 debug_print_probes(entry);
146 return old;
147 } else {
148 int j = 0;
149 /* N -> M, (N > 1, M > 0) */
150 /* + 1 for NULL */
151 new = kzalloc((nr_probes - nr_del + 1)
152 * sizeof(void *), GFP_KERNEL);
153 if (new == NULL)
154 return ERR_PTR(-ENOMEM);
155 for (i = 0; old[i]; i++)
156 if ((probe && old[i] != probe))
157 new[j++] = old[i];
158 entry->refcount = nr_probes - nr_del;
159 entry->funcs = new;
160 }
161 debug_print_probes(entry);
162 return old;
163}
164
165/*
166 * Get tracepoint if the tracepoint is present in the tracepoint hash table.
167 * Must be called with tracepoints_mutex held.
168 * Returns NULL if not present.
169 */
170static struct tracepoint_entry *get_tracepoint(const char *name)
171{
172 struct hlist_head *head;
173 struct hlist_node *node;
174 struct tracepoint_entry *e;
175 u32 hash = jhash(name, strlen(name), 0);
176
177 head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
178 hlist_for_each_entry(e, node, head, hlist) {
179 if (!strcmp(name, e->name))
180 return e;
181 }
182 return NULL;
183}
184
185/*
186 * Add the tracepoint to the tracepoint hash table. Must be called with
187 * tracepoints_mutex held.
188 */
189static struct tracepoint_entry *add_tracepoint(const char *name)
190{
191 struct hlist_head *head;
192 struct hlist_node *node;
193 struct tracepoint_entry *e;
194 size_t name_len = strlen(name) + 1;
195 u32 hash = jhash(name, name_len-1, 0);
196
197 head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
198 hlist_for_each_entry(e, node, head, hlist) {
199 if (!strcmp(name, e->name)) {
200 printk(KERN_NOTICE
201 "tracepoint %s busy\n", name);
202 return ERR_PTR(-EEXIST); /* Already there */
203 }
204 }
205 /*
206 * Using kmalloc here to allocate a variable length element. Could
207 * cause some memory fragmentation if overused.
208 */
209 e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL);
210 if (!e)
211 return ERR_PTR(-ENOMEM);
212 memcpy(&e->name[0], name, name_len);
213 e->funcs = NULL;
214 e->refcount = 0;
215 e->rcu_pending = 0;
216 hlist_add_head(&e->hlist, head);
217 return e;
218}
219
220/*
221 * Remove the tracepoint from the tracepoint hash table. Must be called with
222 * mutex_lock held.
223 */
224static int remove_tracepoint(const char *name)
225{
226 struct hlist_head *head;
227 struct hlist_node *node;
228 struct tracepoint_entry *e;
229 int found = 0;
230 size_t len = strlen(name) + 1;
231 u32 hash = jhash(name, len-1, 0);
232
233 head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
234 hlist_for_each_entry(e, node, head, hlist) {
235 if (!strcmp(name, e->name)) {
236 found = 1;
237 break;
238 }
239 }
240 if (!found)
241 return -ENOENT;
242 if (e->refcount)
243 return -EBUSY;
244 hlist_del(&e->hlist);
245 /* Make sure the call_rcu_sched has been executed */
246 if (e->rcu_pending)
247 rcu_barrier_sched();
248 kfree(e);
249 return 0;
250}
251
252/*
253 * Sets the probe callback corresponding to one tracepoint.
254 */
255static void set_tracepoint(struct tracepoint_entry **entry,
256 struct tracepoint *elem, int active)
257{
258 WARN_ON(strcmp((*entry)->name, elem->name) != 0);
259
260 /*
261 * rcu_assign_pointer has a smp_wmb() which makes sure that the new
262 * probe callbacks array is consistent before setting a pointer to it.
263 * This array is referenced by __DO_TRACE from
264 * include/linux/tracepoints.h. A matching smp_read_barrier_depends()
265 * is used.
266 */
267 rcu_assign_pointer(elem->funcs, (*entry)->funcs);
268 elem->state = active;
269}
270
271/*
272 * Disable a tracepoint and its probe callback.
273 * Note: only waiting an RCU period after setting elem->call to the empty
274 * function insures that the original callback is not used anymore. This insured
275 * by preempt_disable around the call site.
276 */
277static void disable_tracepoint(struct tracepoint *elem)
278{
279 elem->state = 0;
280}
281
282/**
283 * tracepoint_update_probe_range - Update a probe range
284 * @begin: beginning of the range
285 * @end: end of the range
286 *
287 * Updates the probe callback corresponding to a range of tracepoints.
288 */
289void tracepoint_update_probe_range(struct tracepoint *begin,
290 struct tracepoint *end)
291{
292 struct tracepoint *iter;
293 struct tracepoint_entry *mark_entry;
294
295 mutex_lock(&tracepoints_mutex);
296 for (iter = begin; iter < end; iter++) {
297 mark_entry = get_tracepoint(iter->name);
298 if (mark_entry) {
299 set_tracepoint(&mark_entry, iter,
300 !!mark_entry->refcount);
301 } else {
302 disable_tracepoint(iter);
303 }
304 }
305 mutex_unlock(&tracepoints_mutex);
306}
307
308/*
309 * Update probes, removing the faulty probes.
310 */
311static void tracepoint_update_probes(void)
312{
313 /* Core kernel tracepoints */
314 tracepoint_update_probe_range(__start___tracepoints,
315 __stop___tracepoints);
316 /* tracepoints in modules. */
317 module_update_tracepoints();
318}
319
320/**
321 * tracepoint_probe_register - Connect a probe to a tracepoint
322 * @name: tracepoint name
323 * @probe: probe handler
324 *
325 * Returns 0 if ok, error value on error.
326 * The probe address must at least be aligned on the architecture pointer size.
327 */
328int tracepoint_probe_register(const char *name, void *probe)
329{
330 struct tracepoint_entry *entry;
331 int ret = 0;
332 void *old;
333
334 mutex_lock(&tracepoints_mutex);
335 entry = get_tracepoint(name);
336 if (!entry) {
337 entry = add_tracepoint(name);
338 if (IS_ERR(entry)) {
339 ret = PTR_ERR(entry);
340 goto end;
341 }
342 }
343 /*
344 * If we detect that a call_rcu_sched is pending for this tracepoint,
345 * make sure it's executed now.
346 */
347 if (entry->rcu_pending)
348 rcu_barrier_sched();
349 old = tracepoint_entry_add_probe(entry, probe);
350 if (IS_ERR(old)) {
351 ret = PTR_ERR(old);
352 goto end;
353 }
354 mutex_unlock(&tracepoints_mutex);
355 tracepoint_update_probes(); /* may update entry */
356 mutex_lock(&tracepoints_mutex);
357 entry = get_tracepoint(name);
358 WARN_ON(!entry);
359 if (entry->rcu_pending)
360 rcu_barrier_sched();
361 tracepoint_entry_free_old(entry, old);
362end:
363 mutex_unlock(&tracepoints_mutex);
364 return ret;
365}
366EXPORT_SYMBOL_GPL(tracepoint_probe_register);
367
368/**
369 * tracepoint_probe_unregister - Disconnect a probe from a tracepoint
370 * @name: tracepoint name
371 * @probe: probe function pointer
372 *
373 * We do not need to call a synchronize_sched to make sure the probes have
374 * finished running before doing a module unload, because the module unload
375 * itself uses stop_machine(), which insures that every preempt disabled section
376 * have finished.
377 */
378int tracepoint_probe_unregister(const char *name, void *probe)
379{
380 struct tracepoint_entry *entry;
381 void *old;
382 int ret = -ENOENT;
383
384 mutex_lock(&tracepoints_mutex);
385 entry = get_tracepoint(name);
386 if (!entry)
387 goto end;
388 if (entry->rcu_pending)
389 rcu_barrier_sched();
390 old = tracepoint_entry_remove_probe(entry, probe);
391 mutex_unlock(&tracepoints_mutex);
392 tracepoint_update_probes(); /* may update entry */
393 mutex_lock(&tracepoints_mutex);
394 entry = get_tracepoint(name);
395 if (!entry)
396 goto end;
397 if (entry->rcu_pending)
398 rcu_barrier_sched();
399 tracepoint_entry_free_old(entry, old);
400 remove_tracepoint(name); /* Ignore busy error message */
401 ret = 0;
402end:
403 mutex_unlock(&tracepoints_mutex);
404 return ret;
405}
406EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
407
408/**
409 * tracepoint_get_iter_range - Get a next tracepoint iterator given a range.
410 * @tracepoint: current tracepoints (in), next tracepoint (out)
411 * @begin: beginning of the range
412 * @end: end of the range
413 *
414 * Returns whether a next tracepoint has been found (1) or not (0).
415 * Will return the first tracepoint in the range if the input tracepoint is
416 * NULL.
417 */
418int tracepoint_get_iter_range(struct tracepoint **tracepoint,
419 struct tracepoint *begin, struct tracepoint *end)
420{
421 if (!*tracepoint && begin != end) {
422 *tracepoint = begin;
423 return 1;
424 }
425 if (*tracepoint >= begin && *tracepoint < end)
426 return 1;
427 return 0;
428}
429EXPORT_SYMBOL_GPL(tracepoint_get_iter_range);
430
431static void tracepoint_get_iter(struct tracepoint_iter *iter)
432{
433 int found = 0;
434
435 /* Core kernel tracepoints */
436 if (!iter->module) {
437 found = tracepoint_get_iter_range(&iter->tracepoint,
438 __start___tracepoints, __stop___tracepoints);
439 if (found)
440 goto end;
441 }
442 /* tracepoints in modules. */
443 found = module_get_iter_tracepoints(iter);
444end:
445 if (!found)
446 tracepoint_iter_reset(iter);
447}
448
449void tracepoint_iter_start(struct tracepoint_iter *iter)
450{
451 tracepoint_get_iter(iter);
452}
453EXPORT_SYMBOL_GPL(tracepoint_iter_start);
454
455void tracepoint_iter_next(struct tracepoint_iter *iter)
456{
457 iter->tracepoint++;
458 /*
459 * iter->tracepoint may be invalid because we blindly incremented it.
460 * Make sure it is valid by marshalling on the tracepoints, getting the
461 * tracepoints from following modules if necessary.
462 */
463 tracepoint_get_iter(iter);
464}
465EXPORT_SYMBOL_GPL(tracepoint_iter_next);
466
467void tracepoint_iter_stop(struct tracepoint_iter *iter)
468{
469}
470EXPORT_SYMBOL_GPL(tracepoint_iter_stop);
471
472void tracepoint_iter_reset(struct tracepoint_iter *iter)
473{
474 iter->module = NULL;
475 iter->tracepoint = NULL;
476}
477EXPORT_SYMBOL_GPL(tracepoint_iter_reset);
diff --git a/mm/memory.c b/mm/memory.c
index 3a6c4a658325..164951c47305 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -64,8 +64,6 @@
64 64
65#include "internal.h" 65#include "internal.h"
66 66
67#include "internal.h"
68
69#ifndef CONFIG_NEED_MULTIPLE_NODES 67#ifndef CONFIG_NEED_MULTIPLE_NODES
70/* use the per-pgdat data instead for discontigmem - mbligh */ 68/* use the per-pgdat data instead for discontigmem - mbligh */
71unsigned long max_mapnr; 69unsigned long max_mapnr;
diff --git a/mm/tiny-shmem.c b/mm/tiny-shmem.c
index 8d7a27a6335c..3e67d575ee6e 100644
--- a/mm/tiny-shmem.c
+++ b/mm/tiny-shmem.c
@@ -95,6 +95,7 @@ put_dentry:
95put_memory: 95put_memory:
96 return ERR_PTR(error); 96 return ERR_PTR(error);
97} 97}
98EXPORT_SYMBOL_GPL(shmem_file_setup);
98 99
99/** 100/**
100 * shmem_zero_setup - setup a shared anonymous mapping 101 * shmem_zero_setup - setup a shared anonymous mapping
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 712ae47af0bf..65ae576030da 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -17,7 +17,6 @@
17#include <linux/interrupt.h> 17#include <linux/interrupt.h>
18#include <linux/seq_file.h> 18#include <linux/seq_file.h>
19#include <linux/debugobjects.h> 19#include <linux/debugobjects.h>
20#include <linux/vmalloc.h>
21#include <linux/kallsyms.h> 20#include <linux/kallsyms.h>
22#include <linux/list.h> 21#include <linux/list.h>
23#include <linux/rbtree.h> 22#include <linux/rbtree.h>
@@ -175,6 +174,21 @@ static int vmap_page_range(unsigned long addr, unsigned long end,
175 return nr; 174 return nr;
176} 175}
177 176
177static inline int is_vmalloc_or_module_addr(const void *x)
178{
179 /*
180 * x86-64 and sparc64 put modules in a special place,
181 * and fall back on vmalloc() if that fails. Others
182 * just put it in the vmalloc space.
183 */
184#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
185 unsigned long addr = (unsigned long)x;
186 if (addr >= MODULES_VADDR && addr < MODULES_END)
187 return 1;
188#endif
189 return is_vmalloc_addr(x);
190}
191
178/* 192/*
179 * Walk a vmap address to the struct page it maps. 193 * Walk a vmap address to the struct page it maps.
180 */ 194 */
@@ -188,8 +202,7 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
188 * XXX we might need to change this if we add VIRTUAL_BUG_ON for 202 * XXX we might need to change this if we add VIRTUAL_BUG_ON for
189 * architectures that do not vmalloc module space 203 * architectures that do not vmalloc module space
190 */ 204 */
191 VIRTUAL_BUG_ON(!is_vmalloc_addr(vmalloc_addr) && 205 VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
192 !is_module_address(addr));
193 206
194 if (!pgd_none(*pgd)) { 207 if (!pgd_none(*pgd)) {
195 pud_t *pud = pud_offset(pgd, addr); 208 pud_t *pud = pud_offset(pgd, addr);
diff --git a/samples/Kconfig b/samples/Kconfig
index e1fb471cc501..4b02f5a0e656 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -13,6 +13,12 @@ config SAMPLE_MARKERS
13 help 13 help
14 This build markers example modules. 14 This build markers example modules.
15 15
16config SAMPLE_TRACEPOINTS
17 tristate "Build tracepoints examples -- loadable modules only"
18 depends on TRACEPOINTS && m
19 help
20 This build tracepoints example modules.
21
16config SAMPLE_KOBJECT 22config SAMPLE_KOBJECT
17 tristate "Build kobject examples" 23 tristate "Build kobject examples"
18 help 24 help
diff --git a/samples/Makefile b/samples/Makefile
index 2e02575f7794..10eaca89fe17 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,3 +1,3 @@
1# Makefile for Linux samples code 1# Makefile for Linux samples code
2 2
3obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ 3obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/
diff --git a/samples/markers/probe-example.c b/samples/markers/probe-example.c
index c8e099d4d1fd..2dfb3b32937e 100644
--- a/samples/markers/probe-example.c
+++ b/samples/markers/probe-example.c
@@ -81,6 +81,7 @@ static void __exit probe_fini(void)
81 probe_array[i].probe_func, &probe_array[i]); 81 probe_array[i].probe_func, &probe_array[i]);
82 printk(KERN_INFO "Number of event b : %u\n", 82 printk(KERN_INFO "Number of event b : %u\n",
83 atomic_read(&eventb_count)); 83 atomic_read(&eventb_count));
84 marker_synchronize_unregister();
84} 85}
85 86
86module_init(probe_init); 87module_init(probe_init);
diff --git a/samples/tracepoints/Makefile b/samples/tracepoints/Makefile
new file mode 100644
index 000000000000..36479ad9ae14
--- /dev/null
+++ b/samples/tracepoints/Makefile
@@ -0,0 +1,6 @@
1# builds the tracepoint example kernel modules;
2# then to use one (as root): insmod <module_name.ko>
3
4obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-sample.o
5obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-probe-sample.o
6obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-probe-sample2.o
diff --git a/samples/tracepoints/tp-samples-trace.h b/samples/tracepoints/tp-samples-trace.h
new file mode 100644
index 000000000000..0216b55bd640
--- /dev/null
+++ b/samples/tracepoints/tp-samples-trace.h
@@ -0,0 +1,13 @@
1#ifndef _TP_SAMPLES_TRACE_H
2#define _TP_SAMPLES_TRACE_H
3
4#include <linux/proc_fs.h> /* for struct inode and struct file */
5#include <linux/tracepoint.h>
6
7DEFINE_TRACE(subsys_event,
8 TPPROTO(struct inode *inode, struct file *file),
9 TPARGS(inode, file));
10DEFINE_TRACE(subsys_eventb,
11 TPPROTO(void),
12 TPARGS());
13#endif
diff --git a/samples/tracepoints/tracepoint-probe-sample.c b/samples/tracepoints/tracepoint-probe-sample.c
new file mode 100644
index 000000000000..55abfdda4bd4
--- /dev/null
+++ b/samples/tracepoints/tracepoint-probe-sample.c
@@ -0,0 +1,55 @@
1/*
2 * tracepoint-probe-sample.c
3 *
4 * sample tracepoint probes.
5 */
6
7#include <linux/module.h>
8#include <linux/file.h>
9#include <linux/dcache.h>
10#include "tp-samples-trace.h"
11
12/*
13 * Here the caller only guarantees locking for struct file and struct inode.
14 * Locking must therefore be done in the probe to use the dentry.
15 */
16static void probe_subsys_event(struct inode *inode, struct file *file)
17{
18 path_get(&file->f_path);
19 dget(file->f_path.dentry);
20 printk(KERN_INFO "Event is encountered with filename %s\n",
21 file->f_path.dentry->d_name.name);
22 dput(file->f_path.dentry);
23 path_put(&file->f_path);
24}
25
26static void probe_subsys_eventb(void)
27{
28 printk(KERN_INFO "Event B is encountered\n");
29}
30
31int __init tp_sample_trace_init(void)
32{
33 int ret;
34
35 ret = register_trace_subsys_event(probe_subsys_event);
36 WARN_ON(ret);
37 ret = register_trace_subsys_eventb(probe_subsys_eventb);
38 WARN_ON(ret);
39
40 return 0;
41}
42
43module_init(tp_sample_trace_init);
44
45void __exit tp_sample_trace_exit(void)
46{
47 unregister_trace_subsys_eventb(probe_subsys_eventb);
48 unregister_trace_subsys_event(probe_subsys_event);
49}
50
51module_exit(tp_sample_trace_exit);
52
53MODULE_LICENSE("GPL");
54MODULE_AUTHOR("Mathieu Desnoyers");
55MODULE_DESCRIPTION("Tracepoint Probes Samples");
diff --git a/samples/tracepoints/tracepoint-probe-sample2.c b/samples/tracepoints/tracepoint-probe-sample2.c
new file mode 100644
index 000000000000..5e9fcf4afffe
--- /dev/null
+++ b/samples/tracepoints/tracepoint-probe-sample2.c
@@ -0,0 +1,42 @@
1/*
2 * tracepoint-probe-sample2.c
3 *
4 * 2nd sample tracepoint probes.
5 */
6
7#include <linux/module.h>
8#include <linux/fs.h>
9#include "tp-samples-trace.h"
10
11/*
12 * Here the caller only guarantees locking for struct file and struct inode.
13 * Locking must therefore be done in the probe to use the dentry.
14 */
15static void probe_subsys_event(struct inode *inode, struct file *file)
16{
17 printk(KERN_INFO "Event is encountered with inode number %lu\n",
18 inode->i_ino);
19}
20
21int __init tp_sample_trace_init(void)
22{
23 int ret;
24
25 ret = register_trace_subsys_event(probe_subsys_event);
26 WARN_ON(ret);
27
28 return 0;
29}
30
31module_init(tp_sample_trace_init);
32
33void __exit tp_sample_trace_exit(void)
34{
35 unregister_trace_subsys_event(probe_subsys_event);
36}
37
38module_exit(tp_sample_trace_exit);
39
40MODULE_LICENSE("GPL");
41MODULE_AUTHOR("Mathieu Desnoyers");
42MODULE_DESCRIPTION("Tracepoint Probes Samples");
diff --git a/samples/tracepoints/tracepoint-sample.c b/samples/tracepoints/tracepoint-sample.c
new file mode 100644
index 000000000000..4ae4b7fcc043
--- /dev/null
+++ b/samples/tracepoints/tracepoint-sample.c
@@ -0,0 +1,53 @@
1/* tracepoint-sample.c
2 *
3 * Executes a tracepoint when /proc/tracepoint-example is opened.
4 *
5 * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
6 *
7 * This file is released under the GPLv2.
8 * See the file COPYING for more details.
9 */
10
11#include <linux/module.h>
12#include <linux/sched.h>
13#include <linux/proc_fs.h>
14#include "tp-samples-trace.h"
15
16struct proc_dir_entry *pentry_example;
17
18static int my_open(struct inode *inode, struct file *file)
19{
20 int i;
21
22 trace_subsys_event(inode, file);
23 for (i = 0; i < 10; i++)
24 trace_subsys_eventb();
25 return -EPERM;
26}
27
28static struct file_operations mark_ops = {
29 .open = my_open,
30};
31
32static int example_init(void)
33{
34 printk(KERN_ALERT "example init\n");
35 pentry_example = proc_create("tracepoint-example", 0444, NULL,
36 &mark_ops);
37 if (!pentry_example)
38 return -EPERM;
39 return 0;
40}
41
42static void example_exit(void)
43{
44 printk(KERN_ALERT "example exit\n");
45 remove_proc_entry("tracepoint-example", NULL);
46}
47
48module_init(example_init)
49module_exit(example_exit)
50
51MODULE_LICENSE("GPL");
52MODULE_AUTHOR("Mathieu Desnoyers");
53MODULE_DESCRIPTION("Tracepoint example");
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 277cfe0b7100..5ed4cbf1e0e1 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -198,10 +198,17 @@ cmd_modversions = \
198 fi; 198 fi;
199endif 199endif
200 200
201ifdef CONFIG_FTRACE_MCOUNT_RECORD
202cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl \
203 "$(ARCH)" "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" \
204 "$(MV)" "$(@)";
205endif
206
201define rule_cc_o_c 207define rule_cc_o_c
202 $(call echo-cmd,checksrc) $(cmd_checksrc) \ 208 $(call echo-cmd,checksrc) $(cmd_checksrc) \
203 $(call echo-cmd,cc_o_c) $(cmd_cc_o_c); \ 209 $(call echo-cmd,cc_o_c) $(cmd_cc_o_c); \
204 $(cmd_modversions) \ 210 $(cmd_modversions) \
211 $(cmd_record_mcount) \
205 scripts/basic/fixdep $(depfile) $@ '$(call make-cmd,cc_o_c)' > \ 212 scripts/basic/fixdep $(depfile) $@ '$(call make-cmd,cc_o_c)' > \
206 $(dot-target).tmp; \ 213 $(dot-target).tmp; \
207 rm -f $(depfile); \ 214 rm -f $(depfile); \
diff --git a/scripts/bootgraph.pl b/scripts/bootgraph.pl
index 2243353fe55d..5e7316e5aa39 100644
--- a/scripts/bootgraph.pl
+++ b/scripts/bootgraph.pl
@@ -37,13 +37,13 @@
37# dmesg | perl scripts/bootgraph.pl > output.svg 37# dmesg | perl scripts/bootgraph.pl > output.svg
38# 38#
39 39
40my @rows; 40my %start, %end;
41my %start, %end, %row;
42my $done = 0; 41my $done = 0;
43my $rowcount = 0;
44my $maxtime = 0; 42my $maxtime = 0;
45my $firsttime = 100; 43my $firsttime = 100;
46my $count = 0; 44my $count = 0;
45my %pids;
46
47while (<>) { 47while (<>) {
48 my $line = $_; 48 my $line = $_;
49 if ($line =~ /([0-9\.]+)\] calling ([a-zA-Z0-9\_]+)\+/) { 49 if ($line =~ /([0-9\.]+)\] calling ([a-zA-Z0-9\_]+)\+/) {
@@ -54,14 +54,8 @@ while (<>) {
54 $firsttime = $1; 54 $firsttime = $1;
55 } 55 }
56 } 56 }
57 $row{$func} = 1;
58 if ($line =~ /\@ ([0-9]+)/) { 57 if ($line =~ /\@ ([0-9]+)/) {
59 my $pid = $1; 58 $pids{$func} = $1;
60 if (!defined($rows[$pid])) {
61 $rowcount = $rowcount + 1;
62 $rows[$pid] = $rowcount;
63 }
64 $row{$func} = $rows[$pid];
65 } 59 }
66 $count = $count + 1; 60 $count = $count + 1;
67 } 61 }
@@ -109,17 +103,25 @@ $styles[11] = "fill:rgb(128,255,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(
109my $mult = 950.0 / ($maxtime - $firsttime); 103my $mult = 950.0 / ($maxtime - $firsttime);
110my $threshold = ($maxtime - $firsttime) / 60.0; 104my $threshold = ($maxtime - $firsttime) / 60.0;
111my $stylecounter = 0; 105my $stylecounter = 0;
106my %rows;
107my $rowscount = 1;
112while (($key,$value) = each %start) { 108while (($key,$value) = each %start) {
113 my $duration = $end{$key} - $start{$key}; 109 my $duration = $end{$key} - $start{$key};
114 110
115 if ($duration >= $threshold) { 111 if ($duration >= $threshold) {
116 my $s, $s2, $e, $y; 112 my $s, $s2, $e, $y;
113 $pid = $pids{$key};
114
115 if (!defined($rows{$pid})) {
116 $rows{$pid} = $rowscount;
117 $rowscount = $rowscount + 1;
118 }
117 $s = ($value - $firsttime) * $mult; 119 $s = ($value - $firsttime) * $mult;
118 $s2 = $s + 6; 120 $s2 = $s + 6;
119 $e = ($end{$key} - $firsttime) * $mult; 121 $e = ($end{$key} - $firsttime) * $mult;
120 $w = $e - $s; 122 $w = $e - $s;
121 123
122 $y = $row{$key} * 150; 124 $y = $rows{$pid} * 150;
123 $y2 = $y + 4; 125 $y2 = $y + 4;
124 126
125 $style = $styles[$stylecounter]; 127 $style = $styles[$stylecounter];
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index e30bac141b21..f88bb3e21cda 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1,5 +1,5 @@
1#!/usr/bin/perl -w 1#!/usr/bin/perl -w
2# (c) 2001, Dave Jones. <davej@codemonkey.org.uk> (the file handling bit) 2# (c) 2001, Dave Jones. <davej@redhat.com> (the file handling bit)
3# (c) 2005, Joel Schopp <jschopp@austin.ibm.com> (the ugly bit) 3# (c) 2005, Joel Schopp <jschopp@austin.ibm.com> (the ugly bit)
4# (c) 2007, Andy Whitcroft <apw@uk.ibm.com> (new conditions, test suite, etc) 4# (c) 2007, Andy Whitcroft <apw@uk.ibm.com> (new conditions, test suite, etc)
5# Licensed under the terms of the GNU GPL License version 2 5# Licensed under the terms of the GNU GPL License version 2
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
new file mode 100755
index 000000000000..f56d760bd589
--- /dev/null
+++ b/scripts/recordmcount.pl
@@ -0,0 +1,395 @@
1#!/usr/bin/perl -w
2# (c) 2008, Steven Rostedt <srostedt@redhat.com>
3# Licensed under the terms of the GNU GPL License version 2
4#
5# recordmcount.pl - makes a section called __mcount_loc that holds
6# all the offsets to the calls to mcount.
7#
8#
9# What we want to end up with is a section in vmlinux called
10# __mcount_loc that contains a list of pointers to all the
11# call sites in the kernel that call mcount. Later on boot up, the kernel
12# will read this list, save the locations and turn them into nops.
13# When tracing or profiling is later enabled, these locations will then
14# be converted back to pointers to some function.
15#
16# This is no easy feat. This script is called just after the original
17# object is compiled and before it is linked.
18#
19# The references to the call sites are offsets from the section of text
20# that the call site is in. Hence, all functions in a section that
21# has a call site to mcount, will have the offset from the beginning of
22# the section and not the beginning of the function.
23#
24# The trick is to find a way to record the beginning of the section.
25# The way we do this is to look at the first function in the section
26# which will also be the location of that section after final link.
27# e.g.
28#
29# .section ".text.sched"
30# .globl my_func
31# my_func:
32# [...]
33# call mcount (offset: 0x5)
34# [...]
35# ret
36# other_func:
37# [...]
38# call mcount (offset: 0x1b)
39# [...]
40#
41# Both relocation offsets for the mcounts in the above example will be
42# offset from .text.sched. If we make another file called tmp.s with:
43#
44# .section __mcount_loc
45# .quad my_func + 0x5
46# .quad my_func + 0x1b
47#
48# We can then compile this tmp.s into tmp.o, and link it to the original
49# object.
50#
51# But this gets hard if my_func is not globl (a static function).
52# In such a case we have:
53#
54# .section ".text.sched"
55# my_func:
56# [...]
57# call mcount (offset: 0x5)
58# [...]
59# ret
60# .globl my_func
61# other_func:
62# [...]
63# call mcount (offset: 0x1b)
64# [...]
65#
66# If we make the tmp.s the same as above, when we link together with
67# the original object, we will end up with two symbols for my_func:
68# one local, one global. After final compile, we will end up with
69# an undefined reference to my_func.
70#
71# Since local objects can reference local variables, we need to find
72# a way to make tmp.o reference the local objects of the original object
73# file after it is linked together. To do this, we convert the my_func
74# into a global symbol before linking tmp.o. Then after we link tmp.o
75# we will only have a single symbol for my_func that is global.
76# We can convert my_func back into a local symbol and we are done.
77#
78# Here are the steps we take:
79#
80# 1) Record all the local symbols by using 'nm'
81# 2) Use objdump to find all the call site offsets and sections for
82# mcount.
83# 3) Compile the list into its own object.
84# 4) Do we have to deal with local functions? If not, go to step 8.
85# 5) Make an object that converts these local functions to global symbols
86# with objcopy.
87# 6) Link together this new object with the list object.
88# 7) Convert the local functions back to local symbols and rename
89# the result as the original object.
90# End.
91# 8) Link the object with the list object.
92# 9) Move the result back to the original object.
93# End.
94#
95
96use strict;
97
98my $P = $0;
99$P =~ s@.*/@@g;
100
101my $V = '0.1';
102
103if ($#ARGV < 6) {
104 print "usage: $P arch objdump objcopy cc ld nm rm mv inputfile\n";
105 print "version: $V\n";
106 exit(1);
107}
108
109my ($arch, $objdump, $objcopy, $cc, $ld, $nm, $rm, $mv, $inputfile) = @ARGV;
110
111$objdump = "objdump" if ((length $objdump) == 0);
112$objcopy = "objcopy" if ((length $objcopy) == 0);
113$cc = "gcc" if ((length $cc) == 0);
114$ld = "ld" if ((length $ld) == 0);
115$nm = "nm" if ((length $nm) == 0);
116$rm = "rm" if ((length $rm) == 0);
117$mv = "mv" if ((length $mv) == 0);
118
119#print STDERR "running: $P '$arch' '$objdump' '$objcopy' '$cc' '$ld' " .
120# "'$nm' '$rm' '$mv' '$inputfile'\n";
121
122my %locals; # List of local (static) functions
123my %weak; # List of weak functions
124my %convert; # List of local functions used that needs conversion
125
126my $type;
127my $section_regex; # Find the start of a section
128my $function_regex; # Find the name of a function
129 # (return offset and func name)
130my $mcount_regex; # Find the call site to mcount (return offset)
131
132if ($arch eq "x86_64") {
133 $section_regex = "Disassembly of section";
134 $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
135 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount([+-]0x[0-9a-zA-Z]+)?\$";
136 $type = ".quad";
137
138 # force flags for this arch
139 $ld .= " -m elf_x86_64";
140 $objdump .= " -M x86-64";
141 $objcopy .= " -O elf64-x86-64";
142 $cc .= " -m64";
143
144} elsif ($arch eq "i386") {
145 $section_regex = "Disassembly of section";
146 $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
147 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$";
148 $type = ".long";
149
150 # force flags for this arch
151 $ld .= " -m elf_i386";
152 $objdump .= " -M i386";
153 $objcopy .= " -O elf32-i386";
154 $cc .= " -m32";
155
156} else {
157 die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
158}
159
160my $text_found = 0;
161my $read_function = 0;
162my $opened = 0;
163my $mcount_section = "__mcount_loc";
164
165my $dirname;
166my $filename;
167my $prefix;
168my $ext;
169
170if ($inputfile =~ m,^(.*)/([^/]*)$,) {
171 $dirname = $1;
172 $filename = $2;
173} else {
174 $dirname = ".";
175 $filename = $inputfile;
176}
177
178if ($filename =~ m,^(.*)(\.\S),) {
179 $prefix = $1;
180 $ext = $2;
181} else {
182 $prefix = $filename;
183 $ext = "";
184}
185
186my $mcount_s = $dirname . "/.tmp_mc_" . $prefix . ".s";
187my $mcount_o = $dirname . "/.tmp_mc_" . $prefix . ".o";
188
189#
190# --globalize-symbols came out in 2.17, we must test the version
191# of objcopy, and if it is less than 2.17, then we can not
192# record local functions.
193my $use_locals = 01;
194my $local_warn_once = 0;
195my $found_version = 0;
196
197open (IN, "$objcopy --version |") || die "error running $objcopy";
198while (<IN>) {
199 if (/objcopy.*\s(\d+)\.(\d+)/) {
200 my $major = $1;
201 my $minor = $2;
202
203 $found_version = 1;
204 if ($major < 2 ||
205 ($major == 2 && $minor < 17)) {
206 $use_locals = 0;
207 }
208 last;
209 }
210}
211close (IN);
212
213if (!$found_version) {
214 print STDERR "WARNING: could not find objcopy version.\n" .
215 "\tDisabling local function references.\n";
216}
217
218
219#
220# Step 1: find all the local (static functions) and weak symbols.
221# 't' is local, 'w/W' is weak (we never use a weak function)
222#
223open (IN, "$nm $inputfile|") || die "error running $nm";
224while (<IN>) {
225 if (/^[0-9a-fA-F]+\s+t\s+(\S+)/) {
226 $locals{$1} = 1;
227 } elsif (/^[0-9a-fA-F]+\s+([wW])\s+(\S+)/) {
228 $weak{$2} = $1;
229 }
230}
231close(IN);
232
233my @offsets; # Array of offsets of mcount callers
234my $ref_func; # reference function to use for offsets
235my $offset = 0; # offset of ref_func to section beginning
236
237##
238# update_funcs - print out the current mcount callers
239#
240# Go through the list of offsets to callers and write them to
241# the output file in a format that can be read by an assembler.
242#
243sub update_funcs
244{
245 return if ($#offsets < 0);
246
247 defined($ref_func) || die "No function to reference";
248
249 # A section only had a weak function, to represent it.
250 # Unfortunately, a weak function may be overwritten by another
251 # function of the same name, making all these offsets incorrect.
252 # To be safe, we simply print a warning and bail.
253 if (defined $weak{$ref_func}) {
254 print STDERR
255 "$inputfile: WARNING: referencing weak function" .
256 " $ref_func for mcount\n";
257 return;
258 }
259
260 # is this function static? If so, note this fact.
261 if (defined $locals{$ref_func}) {
262
263 # only use locals if objcopy supports globalize-symbols
264 if (!$use_locals) {
265 return;
266 }
267 $convert{$ref_func} = 1;
268 }
269
270 # Loop through all the mcount caller offsets and print a reference
271 # to the caller based from the ref_func.
272 for (my $i=0; $i <= $#offsets; $i++) {
273 if (!$opened) {
274 open(FILE, ">$mcount_s") || die "can't create $mcount_s\n";
275 $opened = 1;
276 print FILE "\t.section $mcount_section,\"a\",\@progbits\n";
277 }
278 printf FILE "\t%s %s + %d\n", $type, $ref_func, $offsets[$i] - $offset;
279 }
280}
281
282#
283# Step 2: find the sections and mcount call sites
284#
285open(IN, "$objdump -dr $inputfile|") || die "error running $objdump";
286
287my $text;
288
289while (<IN>) {
290 # is it a section?
291 if (/$section_regex/) {
292 $read_function = 1;
293 # print out any recorded offsets
294 update_funcs() if ($text_found);
295
296 # reset all markers and arrays
297 $text_found = 0;
298 undef($ref_func);
299 undef(@offsets);
300
301 # section found, now is this a start of a function?
302 } elsif ($read_function && /$function_regex/) {
303 $text_found = 1;
304 $offset = hex $1;
305 $text = $2;
306
307 # if this is either a local function or a weak function
308 # keep looking for functions that are global that
309 # we can use safely.
310 if (!defined($locals{$text}) && !defined($weak{$text})) {
311 $ref_func = $text;
312 $read_function = 0;
313 } else {
314 # if we already have a function, and this is weak, skip it
315 if (!defined($ref_func) || !defined($weak{$text})) {
316 $ref_func = $text;
317 }
318 }
319 }
320
321 # is this a call site to mcount? If so, record it to print later
322 if ($text_found && /$mcount_regex/) {
323 $offsets[$#offsets + 1] = hex $1;
324 }
325}
326
327# dump out anymore offsets that may have been found
328update_funcs() if ($text_found);
329
330# If we did not find any mcount callers, we are done (do nothing).
331if (!$opened) {
332 exit(0);
333}
334
335close(FILE);
336
337#
338# Step 3: Compile the file that holds the list of call sites to mcount.
339#
340`$cc -o $mcount_o -c $mcount_s`;
341
342my @converts = keys %convert;
343
344#
345# Step 4: Do we have sections that started with local functions?
346#
347if ($#converts >= 0) {
348 my $globallist = "";
349 my $locallist = "";
350
351 foreach my $con (@converts) {
352 $globallist .= " --globalize-symbol $con";
353 $locallist .= " --localize-symbol $con";
354 }
355
356 my $globalobj = $dirname . "/.tmp_gl_" . $filename;
357 my $globalmix = $dirname . "/.tmp_mx_" . $filename;
358
359 #
360 # Step 5: set up each local function as a global
361 #
362 `$objcopy $globallist $inputfile $globalobj`;
363
364 #
365 # Step 6: Link the global version to our list.
366 #
367 `$ld -r $globalobj $mcount_o -o $globalmix`;
368
369 #
370 # Step 7: Convert the local functions back into local symbols
371 #
372 `$objcopy $locallist $globalmix $inputfile`;
373
374 # Remove the temp files
375 `$rm $globalobj $globalmix`;
376
377} else {
378
379 my $mix = $dirname . "/.tmp_mx_" . $filename;
380
381 #
382 # Step 8: Link the object with our list of call sites object.
383 #
384 `$ld -r $inputfile $mcount_o -o $mix`;
385
386 #
387 # Step 9: Move the result back to the original object.
388 #
389 `$mv $mix $inputfile`;
390}
391
392# Clean up the temp files
393`$rm $mcount_o $mcount_s`;
394
395exit(0);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 576e51199079..3e3fde7c1d2b 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -75,6 +75,7 @@
75#include <linux/string.h> 75#include <linux/string.h>
76#include <linux/selinux.h> 76#include <linux/selinux.h>
77#include <linux/mutex.h> 77#include <linux/mutex.h>
78#include <linux/posix-timers.h>
78 79
79#include "avc.h" 80#include "avc.h"
80#include "objsec.h" 81#include "objsec.h"
@@ -2322,13 +2323,7 @@ static void selinux_bprm_post_apply_creds(struct linux_binprm *bprm)
2322 initrlim = init_task.signal->rlim+i; 2323 initrlim = init_task.signal->rlim+i;
2323 rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur); 2324 rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur);
2324 } 2325 }
2325 if (current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { 2326 update_rlimit_cpu(rlim->rlim_cur);
2326 /*
2327 * This will cause RLIMIT_CPU calculations
2328 * to be refigured.
2329 */
2330 current->it_prof_expires = jiffies_to_cputime(1);
2331 }
2332 } 2327 }
2333 2328
2334 /* Wake up the parent if it is waiting so that it can 2329 /* Wake up the parent if it is waiting so that it can