aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/irqflags-tracing.txt57
-rw-r--r--Documentation/kernel-parameters.txt9
-rw-r--r--Documentation/lockdep-design.txt197
-rw-r--r--Documentation/powerpc/booting-without-of.txt4
-rw-r--r--Documentation/sysctl/vm.txt14
-rw-r--r--MAINTAINERS10
-rw-r--r--arch/alpha/kernel/process.c2
-rw-r--r--arch/i386/Kconfig8
-rw-r--r--arch/i386/Kconfig.debug13
-rw-r--r--arch/i386/kernel/Makefile1
-rw-r--r--arch/i386/kernel/alternative.c10
-rw-r--r--arch/i386/kernel/cpuid.c6
-rw-r--r--arch/i386/kernel/entry.S36
-rw-r--r--arch/i386/kernel/irq.c6
-rw-r--r--arch/i386/kernel/nmi.c2
-rw-r--r--arch/i386/kernel/stacktrace.c98
-rw-r--r--arch/i386/kernel/traps.c39
-rw-r--r--arch/ia64/kernel/mca.c10
-rw-r--r--arch/ia64/kernel/smpboot.c2
-rw-r--r--arch/mips/kernel/entry.S2
-rw-r--r--arch/mips/kernel/mips-mt.c6
-rw-r--r--arch/powerpc/configs/chrp32_defconfig1378
-rw-r--r--arch/powerpc/configs/mpc834x_itx_defconfig1336
-rw-r--r--arch/powerpc/kernel/btext.c20
-rw-r--r--arch/powerpc/kernel/ibmebus.c9
-rw-r--r--arch/powerpc/kernel/irq.c656
-rw-r--r--arch/powerpc/kernel/legacy_serial.c57
-rw-r--r--arch/powerpc/kernel/misc_64.S10
-rw-r--r--arch/powerpc/kernel/pci_32.c37
-rw-r--r--arch/powerpc/kernel/pci_64.c33
-rw-r--r--arch/powerpc/kernel/prom.c454
-rw-r--r--arch/powerpc/kernel/prom_init.c20
-rw-r--r--arch/powerpc/kernel/prom_parse.c443
-rw-r--r--arch/powerpc/kernel/rtas_pci.c17
-rw-r--r--arch/powerpc/kernel/setup_32.c2
-rw-r--r--arch/powerpc/kernel/setup_64.c17
-rw-r--r--arch/powerpc/kernel/vio.c12
-rw-r--r--arch/powerpc/platforms/83xx/Kconfig11
-rw-r--r--arch/powerpc/platforms/83xx/Makefile1
-rw-r--r--arch/powerpc/platforms/83xx/mpc834x_itx.c156
-rw-r--r--arch/powerpc/platforms/83xx/mpc834x_itx.h23
-rw-r--r--arch/powerpc/platforms/cell/interrupt.c419
-rw-r--r--arch/powerpc/platforms/cell/interrupt.h19
-rw-r--r--arch/powerpc/platforms/cell/setup.c22
-rw-r--r--arch/powerpc/platforms/cell/spider-pic.c394
-rw-r--r--arch/powerpc/platforms/cell/spu_base.c119
-rw-r--r--arch/powerpc/platforms/chrp/pci.c11
-rw-r--r--arch/powerpc/platforms/chrp/setup.c101
-rw-r--r--arch/powerpc/platforms/chrp/smp.c1
-rw-r--r--arch/powerpc/platforms/iseries/irq.c105
-rw-r--r--arch/powerpc/platforms/iseries/irq.h2
-rw-r--r--arch/powerpc/platforms/iseries/setup.c8
-rw-r--r--arch/powerpc/platforms/maple/pci.c17
-rw-r--r--arch/powerpc/platforms/maple/setup.c94
-rw-r--r--arch/powerpc/platforms/powermac/bootx_init.c33
-rw-r--r--arch/powerpc/platforms/powermac/low_i2c.c9
-rw-r--r--arch/powerpc/platforms/powermac/nvram.c5
-rw-r--r--arch/powerpc/platforms/powermac/pci.c68
-rw-r--r--arch/powerpc/platforms/powermac/pfunc_base.c13
-rw-r--r--arch/powerpc/platforms/powermac/pic.c422
-rw-r--r--arch/powerpc/platforms/powermac/pmac.h2
-rw-r--r--arch/powerpc/platforms/powermac/setup.c3
-rw-r--r--arch/powerpc/platforms/pseries/ras.c82
-rw-r--r--arch/powerpc/platforms/pseries/setup.c244
-rw-r--r--arch/powerpc/platforms/pseries/smp.c32
-rw-r--r--arch/powerpc/platforms/pseries/xics.c718
-rw-r--r--arch/powerpc/platforms/pseries/xics.h17
-rw-r--r--arch/powerpc/sysdev/Makefile5
-rw-r--r--arch/powerpc/sysdev/i8259.c163
-rw-r--r--arch/powerpc/sysdev/mpic.c482
-rw-r--r--arch/ppc/syslib/Makefile2
-rw-r--r--arch/ppc/syslib/i8259.c212
-rw-r--r--arch/s390/Kconfig8
-rw-r--r--arch/s390/Kconfig.debug4
-rw-r--r--arch/s390/Makefile5
-rw-r--r--arch/s390/kernel/Makefile1
-rw-r--r--arch/s390/kernel/entry.S29
-rw-r--r--arch/s390/kernel/entry64.S21
-rw-r--r--arch/s390/kernel/irq.c8
-rw-r--r--arch/s390/kernel/process.c1
-rw-r--r--arch/s390/kernel/stacktrace.c90
-rw-r--r--arch/um/kernel/tt/process_kern.c2
-rw-r--r--arch/um/kernel/um_arch.c2
-rw-r--r--arch/x86_64/Kconfig8
-rw-r--r--arch/x86_64/Kconfig.debug4
-rw-r--r--arch/x86_64/ia32/ia32entry.S19
-rw-r--r--arch/x86_64/kernel/Makefile1
-rw-r--r--arch/x86_64/kernel/entry.S188
-rw-r--r--arch/x86_64/kernel/head64.c5
-rw-r--r--arch/x86_64/kernel/irq.c4
-rw-r--r--arch/x86_64/kernel/nmi.c2
-rw-r--r--arch/x86_64/kernel/process.c2
-rw-r--r--arch/x86_64/kernel/smpboot.c2
-rw-r--r--arch/x86_64/kernel/stacktrace.c221
-rw-r--r--arch/x86_64/kernel/traps.c129
-rw-r--r--arch/x86_64/lib/thunk.S5
-rw-r--r--arch/x86_64/mm/fault.c1
-rw-r--r--block/ll_rw_blk.c2
-rw-r--r--drivers/block/floppy.c42
-rw-r--r--drivers/block/swim3.c230
-rw-r--r--drivers/char/agp/frontend.c2
-rw-r--r--drivers/char/applicom.c2
-rw-r--r--drivers/char/cs5535_gpio.c2
-rw-r--r--drivers/char/ds1286.c2
-rw-r--r--drivers/char/ds1302.c2
-rw-r--r--drivers/char/ds1620.c2
-rw-r--r--drivers/char/dsp56k.c2
-rw-r--r--drivers/char/dtlk.c2
-rw-r--r--drivers/char/efirtc.c2
-rw-r--r--drivers/char/ftape/zftape/zftape-init.c2
-rw-r--r--drivers/char/genrtc.c2
-rw-r--r--drivers/char/hpet.c2
-rw-r--r--drivers/char/hvsi.c7
-rw-r--r--drivers/char/hw_random/core.c2
-rw-r--r--drivers/char/i8k.c2
-rw-r--r--drivers/char/ip2/ip2main.c2
-rw-r--r--drivers/char/ip27-rtc.c2
-rw-r--r--drivers/char/ipmi/ipmi_devintf.c2
-rw-r--r--drivers/char/ipmi/ipmi_watchdog.c2
-rw-r--r--drivers/char/istallion.c2
-rw-r--r--drivers/char/ite_gpio.c2
-rw-r--r--drivers/char/lcd.c2
-rw-r--r--drivers/char/lp.c2
-rw-r--r--drivers/char/mem.c18
-rw-r--r--drivers/char/misc.c4
-rw-r--r--drivers/char/mmtimer.c2
-rw-r--r--drivers/char/mwave/mwavedd.c2
-rw-r--r--drivers/char/nvram.c2
-rw-r--r--drivers/char/nwbutton.c2
-rw-r--r--drivers/char/nwflash.c2
-rw-r--r--drivers/char/pc8736x_gpio.c2
-rw-r--r--drivers/char/pcmcia/cm4000_cs.c2
-rw-r--r--drivers/char/pcmcia/cm4040_cs.c2
-rw-r--r--drivers/char/ppdev.c2
-rw-r--r--drivers/char/random.c6
-rw-r--r--drivers/char/raw.c6
-rw-r--r--drivers/char/rio/rio_linux.c2
-rw-r--r--drivers/char/rtc.c4
-rw-r--r--drivers/char/scx200_gpio.c2
-rw-r--r--drivers/char/snsc.c2
-rw-r--r--drivers/char/sonypi.c2
-rw-r--r--drivers/char/stallion.c2
-rw-r--r--drivers/char/sx.c2
-rw-r--r--drivers/char/sysrq.c5
-rw-r--r--drivers/char/tb0219.c2
-rw-r--r--drivers/char/tipar.c2
-rw-r--r--drivers/char/tlclk.c2
-rw-r--r--drivers/char/toshiba.c2
-rw-r--r--drivers/char/tpm/tpm_atmel.c2
-rw-r--r--drivers/char/tpm/tpm_infineon.c2
-rw-r--r--drivers/char/tpm/tpm_nsc.c2
-rw-r--r--drivers/char/tpm/tpm_tis.c2
-rw-r--r--drivers/char/tty_io.c10
-rw-r--r--drivers/char/vc_screen.c2
-rw-r--r--drivers/char/viotape.c2
-rw-r--r--drivers/char/vr41xx_giu.c2
-rw-r--r--drivers/char/vt.c1
-rw-r--r--drivers/char/watchdog/acquirewdt.c2
-rw-r--r--drivers/char/watchdog/advantechwdt.c2
-rw-r--r--drivers/char/watchdog/alim1535_wdt.c2
-rw-r--r--drivers/char/watchdog/alim7101_wdt.c2
-rw-r--r--drivers/char/watchdog/at91_wdt.c2
-rw-r--r--drivers/char/watchdog/booke_wdt.c2
-rw-r--r--drivers/char/watchdog/cpu5wdt.c2
-rw-r--r--drivers/char/watchdog/ep93xx_wdt.c2
-rw-r--r--drivers/char/watchdog/eurotechwdt.c2
-rw-r--r--drivers/char/watchdog/i6300esb.c2
-rw-r--r--drivers/char/watchdog/i8xx_tco.c2
-rw-r--r--drivers/char/watchdog/ib700wdt.c2
-rw-r--r--drivers/char/watchdog/ibmasr.c2
-rw-r--r--drivers/char/watchdog/indydog.c2
-rw-r--r--drivers/char/watchdog/ixp2000_wdt.c2
-rw-r--r--drivers/char/watchdog/ixp4xx_wdt.c2
-rw-r--r--drivers/char/watchdog/machzwd.c2
-rw-r--r--drivers/char/watchdog/mixcomwd.c2
-rw-r--r--drivers/char/watchdog/mpc83xx_wdt.c2
-rw-r--r--drivers/char/watchdog/mpc8xx_wdt.c2
-rw-r--r--drivers/char/watchdog/mpcore_wdt.c2
-rw-r--r--drivers/char/watchdog/mv64x60_wdt.c2
-rw-r--r--drivers/char/watchdog/pcwd.c4
-rw-r--r--drivers/char/watchdog/pcwd_pci.c4
-rw-r--r--drivers/char/watchdog/pcwd_usb.c4
-rw-r--r--drivers/char/watchdog/s3c2410_wdt.c2
-rw-r--r--drivers/char/watchdog/sa1100_wdt.c2
-rw-r--r--drivers/char/watchdog/sbc60xxwdt.c2
-rw-r--r--drivers/char/watchdog/sbc8360.c2
-rw-r--r--drivers/char/watchdog/sbc_epx_c3.c2
-rw-r--r--drivers/char/watchdog/sc1200wdt.c2
-rw-r--r--drivers/char/watchdog/sc520_wdt.c2
-rw-r--r--drivers/char/watchdog/scx200_wdt.c2
-rw-r--r--drivers/char/watchdog/shwdt.c2
-rw-r--r--drivers/char/watchdog/softdog.c2
-rw-r--r--drivers/char/watchdog/w83627hf_wdt.c2
-rw-r--r--drivers/char/watchdog/w83877f_wdt.c2
-rw-r--r--drivers/char/watchdog/w83977f_wdt.c2
-rw-r--r--drivers/char/watchdog/wafer5823wdt.c2
-rw-r--r--drivers/char/watchdog/wdrtas.c4
-rw-r--r--drivers/char/watchdog/wdt.c4
-rw-r--r--drivers/char/watchdog/wdt285.c2
-rw-r--r--drivers/char/watchdog/wdt977.c2
-rw-r--r--drivers/char/watchdog/wdt_pci.c4
-rw-r--r--drivers/ide/ide-floppy.c2
-rw-r--r--drivers/ide/ide-io.c8
-rw-r--r--drivers/ide/ide-taskfile.c2
-rw-r--r--drivers/ieee1394/hosts.c10
-rw-r--r--drivers/input/serio/i8042-sparcio.h2
-rw-r--r--drivers/input/serio/libps2.c2
-rw-r--r--drivers/macintosh/macio-adb.c19
-rw-r--r--drivers/macintosh/macio_asic.c152
-rw-r--r--drivers/macintosh/smu.c6
-rw-r--r--drivers/macintosh/via-cuda.c24
-rw-r--r--drivers/macintosh/via-pmu.c33
-rw-r--r--drivers/md/md.c6
-rw-r--r--drivers/mmc/mmc.c2
-rw-r--r--drivers/net/3c59x.c4
-rw-r--r--drivers/net/8390.c4
-rw-r--r--drivers/net/forcedeth.c28
-rw-r--r--drivers/net/mace.c4
-rw-r--r--drivers/net/wireless/hostap/hostap_hw.c10
-rw-r--r--drivers/pnp/pnpacpi/rsparser.c11
-rw-r--r--drivers/s390/char/sclp.c10
-rw-r--r--drivers/s390/cio/cio.c2
-rw-r--r--drivers/s390/net/qeth_main.c6
-rw-r--r--drivers/s390/s390mach.c3
-rw-r--r--drivers/s390/scsi/zfcp_erp.c8
-rw-r--r--drivers/s390/scsi/zfcp_qdio.c9
-rw-r--r--drivers/scsi/libata-core.c2
-rw-r--r--drivers/serial/8250_pnp.c2
-rw-r--r--drivers/serial/pmac_zilog.c6
-rw-r--r--drivers/serial/serial_core.c11
-rw-r--r--drivers/spi/spi.c2
-rw-r--r--drivers/usb/core/inode.c4
-rw-r--r--drivers/video/Kconfig15
-rw-r--r--drivers/video/Makefile2
-rw-r--r--drivers/video/offb.c284
-rw-r--r--drivers/video/pnx4008/Makefile7
-rw-r--r--drivers/video/pnx4008/dum.h211
-rw-r--r--drivers/video/pnx4008/fbcommon.h43
-rw-r--r--drivers/video/pnx4008/pnxrgbfb.c213
-rw-r--r--drivers/video/pnx4008/sdum.c872
-rw-r--r--drivers/video/pnx4008/sdum.h139
-rw-r--r--fs/binfmt_elf.c15
-rw-r--r--fs/block_dev.c102
-rw-r--r--fs/dcache.c6
-rw-r--r--fs/direct-io.c6
-rw-r--r--fs/eventpoll.c4
-rw-r--r--fs/ext2/super.c2
-rw-r--r--fs/ext3/super.c2
-rw-r--r--fs/namei.c20
-rw-r--r--fs/ntfs/inode.c33
-rw-r--r--fs/ntfs/super.c31
-rw-r--r--fs/reiserfs/super.c2
-rw-r--r--fs/super.c11
-rw-r--r--fs/ufs/super.c2
-rw-r--r--include/asm-alpha/rwsem.h14
-rw-r--r--include/asm-generic/mutex-null.h15
-rw-r--r--include/asm-generic/percpu.h2
-rw-r--r--include/asm-i386/irqflags.h127
-rw-r--r--include/asm-i386/rwsem.h42
-rw-r--r--include/asm-i386/spinlock.h12
-rw-r--r--include/asm-i386/system.h20
-rw-r--r--include/asm-ia64/irq.h2
-rw-r--r--include/asm-ia64/percpu.h1
-rw-r--r--include/asm-ia64/rwsem.h18
-rw-r--r--include/asm-ia64/thread_info.h2
-rw-r--r--include/asm-m32r/system.h2
-rw-r--r--include/asm-powerpc/i8259.h8
-rw-r--r--include/asm-powerpc/irq.h358
-rw-r--r--include/asm-powerpc/irqflags.h31
-rw-r--r--include/asm-powerpc/machdep.h2
-rw-r--r--include/asm-powerpc/mpic.h67
-rw-r--r--include/asm-powerpc/percpu.h1
-rw-r--r--include/asm-powerpc/prom.h98
-rw-r--r--include/asm-powerpc/rwsem.h18
-rw-r--r--include/asm-powerpc/spu.h1
-rw-r--r--include/asm-s390/irqflags.h50
-rw-r--r--include/asm-s390/percpu.h1
-rw-r--r--include/asm-s390/rwsem.h31
-rw-r--r--include/asm-s390/semaphore.h3
-rw-r--r--include/asm-s390/system.h32
-rw-r--r--include/asm-sh/rwsem.h18
-rw-r--r--include/asm-sh/system.h2
-rw-r--r--include/asm-sparc64/percpu.h1
-rw-r--r--include/asm-x86_64/irqflags.h141
-rw-r--r--include/asm-x86_64/kdebug.h2
-rw-r--r--include/asm-x86_64/percpu.h2
-rw-r--r--include/asm-x86_64/system.h38
-rw-r--r--include/asm-xtensa/rwsem.h18
-rw-r--r--include/linux/completion.h12
-rw-r--r--include/linux/dcache.h12
-rw-r--r--include/linux/debug_locks.h69
-rw-r--r--include/linux/fs.h38
-rw-r--r--include/linux/hardirq.h27
-rw-r--r--include/linux/hrtimer.h1
-rw-r--r--include/linux/ide.h2
-rw-r--r--include/linux/idr.h2
-rw-r--r--include/linux/init_task.h15
-rw-r--r--include/linux/interrupt.h77
-rw-r--r--include/linux/ioport.h1
-rw-r--r--include/linux/irqflags.h96
-rw-r--r--include/linux/kallsyms.h23
-rw-r--r--include/linux/lockdep.h353
-rw-r--r--include/linux/mm.h8
-rw-r--r--include/linux/mmzone.h6
-rw-r--r--include/linux/module.h6
-rw-r--r--include/linux/mutex-debug.h18
-rw-r--r--include/linux/mutex.h37
-rw-r--r--include/linux/notifier.h2
-rw-r--r--include/linux/rtmutex.h10
-rw-r--r--include/linux/rwsem-spinlock.h27
-rw-r--r--include/linux/rwsem.h83
-rw-r--r--include/linux/sched.h86
-rw-r--r--include/linux/seqlock.h12
-rw-r--r--include/linux/skbuff.h3
-rw-r--r--include/linux/spinlock.h63
-rw-r--r--include/linux/spinlock_api_smp.h2
-rw-r--r--include/linux/spinlock_api_up.h1
-rw-r--r--include/linux/spinlock_types.h47
-rw-r--r--include/linux/spinlock_types_up.h9
-rw-r--r--include/linux/spinlock_up.h1
-rw-r--r--include/linux/stacktrace.h20
-rw-r--r--include/linux/swap.h1
-rw-r--r--include/linux/sysctl.h2
-rw-r--r--include/linux/wait.h8
-rw-r--r--include/net/af_unix.h3
-rw-r--r--include/net/sock.h19
-rw-r--r--init/main.c31
-rw-r--r--kernel/Makefile8
-rw-r--r--kernel/capability.c8
-rw-r--r--kernel/exit.c40
-rw-r--r--kernel/fork.c51
-rw-r--r--kernel/futex.c28
-rw-r--r--kernel/hrtimer.c6
-rw-r--r--kernel/irq/chip.c5
-rw-r--r--kernel/irq/handle.c18
-rw-r--r--kernel/irq/manage.c6
-rw-r--r--kernel/kmod.c2
-rw-r--r--kernel/lockdep.c2702
-rw-r--r--kernel/lockdep_internals.h78
-rw-r--r--kernel/lockdep_proc.c345
-rw-r--r--kernel/module.c26
-rw-r--r--kernel/mutex-debug.c399
-rw-r--r--kernel/mutex-debug.h94
-rw-r--r--kernel/mutex.c74
-rw-r--r--kernel/mutex.h19
-rw-r--r--kernel/pid.c6
-rw-r--r--kernel/printk.c23
-rw-r--r--kernel/ptrace.c6
-rw-r--r--kernel/rcupdate.c4
-rw-r--r--kernel/rtmutex-debug.c307
-rw-r--r--kernel/rtmutex-debug.h8
-rw-r--r--kernel/rtmutex-tester.c4
-rw-r--r--kernel/rtmutex.c57
-rw-r--r--kernel/rtmutex.h3
-rw-r--r--kernel/rwsem.c147
-rw-r--r--kernel/sched.c748
-rw-r--r--kernel/softirq.c141
-rw-r--r--kernel/spinlock.c79
-rw-r--r--kernel/stacktrace.c24
-rw-r--r--kernel/stop_machine.c17
-rw-r--r--kernel/sysctl.c11
-rw-r--r--kernel/timer.c13
-rw-r--r--kernel/wait.c4
-rw-r--r--kernel/workqueue.c2
-rw-r--r--lib/Kconfig.debug127
-rw-r--r--lib/Makefile3
-rw-r--r--lib/debug_locks.c45
-rw-r--r--lib/kernel_lock.c7
-rw-r--r--lib/locking-selftest-hardirq.h9
-rw-r--r--lib/locking-selftest-mutex.h11
-rw-r--r--lib/locking-selftest-rlock-hardirq.h2
-rw-r--r--lib/locking-selftest-rlock-softirq.h2
-rw-r--r--lib/locking-selftest-rlock.h14
-rw-r--r--lib/locking-selftest-rsem.h14
-rw-r--r--lib/locking-selftest-softirq.h9
-rw-r--r--lib/locking-selftest-spin-hardirq.h2
-rw-r--r--lib/locking-selftest-spin-softirq.h2
-rw-r--r--lib/locking-selftest-spin.h11
-rw-r--r--lib/locking-selftest-wlock-hardirq.h2
-rw-r--r--lib/locking-selftest-wlock-softirq.h2
-rw-r--r--lib/locking-selftest-wlock.h14
-rw-r--r--lib/locking-selftest-wsem.h14
-rw-r--r--lib/locking-selftest.c1216
-rw-r--r--lib/rwsem-spinlock.c66
-rw-r--r--lib/rwsem.c51
-rw-r--r--lib/spinlock_debug.c98
-rw-r--r--mm/memory.c2
-rw-r--r--mm/mremap.c2
-rw-r--r--mm/oom_kill.c8
-rw-r--r--mm/page_alloc.c22
-rw-r--r--mm/slab.c59
-rw-r--r--mm/swap_state.c2
-rw-r--r--mm/vmalloc.c2
-rw-r--r--mm/vmscan.c27
-rw-r--r--net/8021q/vlan.c11
-rw-r--r--net/core/skbuff.c7
-rw-r--r--net/core/sock.c111
-rw-r--r--net/ipv4/route.c26
-rw-r--r--net/ipv4/tcp_ipv4.c4
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/netlink/af_netlink.c8
-rw-r--r--net/sunrpc/rpc_pipe.c8
-rw-r--r--net/unix/af_unix.c12
-rw-r--r--sound/aoa/core/snd-aoa-gpio-feature.c7
-rw-r--r--sound/aoa/soundbus/i2sbus/i2sbus-core.c7
-rw-r--r--sound/core/seq/seq_device.c6
-rw-r--r--sound/core/seq/seq_ports.c4
-rw-r--r--sound/oss/dmasound/dmasound_awacs.c16
-rw-r--r--sound/ppc/pmac.c33
-rw-r--r--sound/ppc/tumbler.c8
-rw-r--r--sound/sparc/amd7930.c4
-rw-r--r--sound/sparc/cs4231.c2
-rw-r--r--sound/sparc/dbri.c2
413 files changed, 17758 insertions, 5048 deletions
diff --git a/Documentation/irqflags-tracing.txt b/Documentation/irqflags-tracing.txt
new file mode 100644
index 000000000000..6a444877ee0b
--- /dev/null
+++ b/Documentation/irqflags-tracing.txt
@@ -0,0 +1,57 @@
1IRQ-flags state tracing
2
3started by Ingo Molnar <mingo@redhat.com>
4
5the "irq-flags tracing" feature "traces" hardirq and softirq state, in
6that it gives interested subsystems an opportunity to be notified of
7every hardirqs-off/hardirqs-on, softirqs-off/softirqs-on event that
8happens in the kernel.
9
10CONFIG_TRACE_IRQFLAGS_SUPPORT is needed for CONFIG_PROVE_SPIN_LOCKING
11and CONFIG_PROVE_RW_LOCKING to be offered by the generic lock debugging
12code. Otherwise only CONFIG_PROVE_MUTEX_LOCKING and
13CONFIG_PROVE_RWSEM_LOCKING will be offered on an architecture - these
14are locking APIs that are not used in IRQ context. (the one exception
15for rwsems is worked around)
16
17architecture support for this is certainly not in the "trivial"
18category, because lots of lowlevel assembly code deal with irq-flags
19state changes. But an architecture can be irq-flags-tracing enabled in a
20rather straightforward and risk-free manner.
21
22Architectures that want to support this need to do a couple of
23code-organizational changes first:
24
25- move their irq-flags manipulation code from their asm/system.h header
26 to asm/irqflags.h
27
28- rename local_irq_disable()/etc to raw_local_irq_disable()/etc. so that
29 the linux/irqflags.h code can inject callbacks and can construct the
30 real local_irq_disable()/etc APIs.
31
32- add and enable TRACE_IRQFLAGS_SUPPORT in their arch level Kconfig file
33
34and then a couple of functional changes are needed as well to implement
35irq-flags-tracing support:
36
37- in lowlevel entry code add (build-conditional) calls to the
38 trace_hardirqs_off()/trace_hardirqs_on() functions. The lock validator
39 closely guards whether the 'real' irq-flags matches the 'virtual'
40 irq-flags state, and complains loudly (and turns itself off) if the
41 two do not match. Usually most of the time for arch support for
42 irq-flags-tracing is spent in this state: look at the lockdep
43 complaint, try to figure out the assembly code we did not cover yet,
44 fix and repeat. Once the system has booted up and works without a
45 lockdep complaint in the irq-flags-tracing functions arch support is
46 complete.
47- if the architecture has non-maskable interrupts then those need to be
48 excluded from the irq-tracing [and lock validation] mechanism via
49 lockdep_off()/lockdep_on().
50
51in general there is no risk from having an incomplete irq-flags-tracing
52implementation in an architecture: lockdep will detect that and will
53turn itself off. I.e. the lock validator will still be reliable. There
54should be no crashes due to irq-tracing bugs. (except if the assembly
55changes break other code by modifying conditions or registers that
56shouldnt be)
57
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 86e9282d1c20..149f62ba14a5 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -435,6 +435,15 @@ running once the system is up.
435 435
436 debug [KNL] Enable kernel debugging (events log level). 436 debug [KNL] Enable kernel debugging (events log level).
437 437
438 debug_locks_verbose=
439 [KNL] verbose self-tests
440 Format=<0|1>
441 Print debugging info while doing the locking API
442 self-tests.
443 We default to 0 (no extra messages), setting it to
444 1 will print _a lot_ more information - normally
445 only useful to kernel developers.
446
438 decnet= [HW,NET] 447 decnet= [HW,NET]
439 Format: <area>[,<node>] 448 Format: <area>[,<node>]
440 See also Documentation/networking/decnet.txt. 449 See also Documentation/networking/decnet.txt.
diff --git a/Documentation/lockdep-design.txt b/Documentation/lockdep-design.txt
new file mode 100644
index 000000000000..00d93605bfd3
--- /dev/null
+++ b/Documentation/lockdep-design.txt
@@ -0,0 +1,197 @@
1Runtime locking correctness validator
2=====================================
3
4started by Ingo Molnar <mingo@redhat.com>
5additions by Arjan van de Ven <arjan@linux.intel.com>
6
7Lock-class
8----------
9
10The basic object the validator operates upon is a 'class' of locks.
11
12A class of locks is a group of locks that are logically the same with
13respect to locking rules, even if the locks may have multiple (possibly
14tens of thousands of) instantiations. For example a lock in the inode
15struct is one class, while each inode has its own instantiation of that
16lock class.
17
18The validator tracks the 'state' of lock-classes, and it tracks
19dependencies between different lock-classes. The validator maintains a
20rolling proof that the state and the dependencies are correct.
21
22Unlike an lock instantiation, the lock-class itself never goes away: when
23a lock-class is used for the first time after bootup it gets registered,
24and all subsequent uses of that lock-class will be attached to this
25lock-class.
26
27State
28-----
29
30The validator tracks lock-class usage history into 5 separate state bits:
31
32- 'ever held in hardirq context' [ == hardirq-safe ]
33- 'ever held in softirq context' [ == softirq-safe ]
34- 'ever held with hardirqs enabled' [ == hardirq-unsafe ]
35- 'ever held with softirqs and hardirqs enabled' [ == softirq-unsafe ]
36
37- 'ever used' [ == !unused ]
38
39Single-lock state rules:
40------------------------
41
42A softirq-unsafe lock-class is automatically hardirq-unsafe as well. The
43following states are exclusive, and only one of them is allowed to be
44set for any lock-class:
45
46 <hardirq-safe> and <hardirq-unsafe>
47 <softirq-safe> and <softirq-unsafe>
48
49The validator detects and reports lock usage that violate these
50single-lock state rules.
51
52Multi-lock dependency rules:
53----------------------------
54
55The same lock-class must not be acquired twice, because this could lead
56to lock recursion deadlocks.
57
58Furthermore, two locks may not be taken in different order:
59
60 <L1> -> <L2>
61 <L2> -> <L1>
62
63because this could lead to lock inversion deadlocks. (The validator
64finds such dependencies in arbitrary complexity, i.e. there can be any
65other locking sequence between the acquire-lock operations, the
66validator will still track all dependencies between locks.)
67
68Furthermore, the following usage based lock dependencies are not allowed
69between any two lock-classes:
70
71 <hardirq-safe> -> <hardirq-unsafe>
72 <softirq-safe> -> <softirq-unsafe>
73
74The first rule comes from the fact the a hardirq-safe lock could be
75taken by a hardirq context, interrupting a hardirq-unsafe lock - and
76thus could result in a lock inversion deadlock. Likewise, a softirq-safe
77lock could be taken by an softirq context, interrupting a softirq-unsafe
78lock.
79
80The above rules are enforced for any locking sequence that occurs in the
81kernel: when acquiring a new lock, the validator checks whether there is
82any rule violation between the new lock and any of the held locks.
83
84When a lock-class changes its state, the following aspects of the above
85dependency rules are enforced:
86
87- if a new hardirq-safe lock is discovered, we check whether it
88 took any hardirq-unsafe lock in the past.
89
90- if a new softirq-safe lock is discovered, we check whether it took
91 any softirq-unsafe lock in the past.
92
93- if a new hardirq-unsafe lock is discovered, we check whether any
94 hardirq-safe lock took it in the past.
95
96- if a new softirq-unsafe lock is discovered, we check whether any
97 softirq-safe lock took it in the past.
98
99(Again, we do these checks too on the basis that an interrupt context
100could interrupt _any_ of the irq-unsafe or hardirq-unsafe locks, which
101could lead to a lock inversion deadlock - even if that lock scenario did
102not trigger in practice yet.)
103
104Exception: Nested data dependencies leading to nested locking
105-------------------------------------------------------------
106
107There are a few cases where the Linux kernel acquires more than one
108instance of the same lock-class. Such cases typically happen when there
109is some sort of hierarchy within objects of the same type. In these
110cases there is an inherent "natural" ordering between the two objects
111(defined by the properties of the hierarchy), and the kernel grabs the
112locks in this fixed order on each of the objects.
113
114An example of such an object hieararchy that results in "nested locking"
115is that of a "whole disk" block-dev object and a "partition" block-dev
116object; the partition is "part of" the whole device and as long as one
117always takes the whole disk lock as a higher lock than the partition
118lock, the lock ordering is fully correct. The validator does not
119automatically detect this natural ordering, as the locking rule behind
120the ordering is not static.
121
122In order to teach the validator about this correct usage model, new
123versions of the various locking primitives were added that allow you to
124specify a "nesting level". An example call, for the block device mutex,
125looks like this:
126
127enum bdev_bd_mutex_lock_class
128{
129 BD_MUTEX_NORMAL,
130 BD_MUTEX_WHOLE,
131 BD_MUTEX_PARTITION
132};
133
134 mutex_lock_nested(&bdev->bd_contains->bd_mutex, BD_MUTEX_PARTITION);
135
136In this case the locking is done on a bdev object that is known to be a
137partition.
138
139The validator treats a lock that is taken in such a nested fasion as a
140separate (sub)class for the purposes of validation.
141
142Note: When changing code to use the _nested() primitives, be careful and
143check really thoroughly that the hiearchy is correctly mapped; otherwise
144you can get false positives or false negatives.
145
146Proof of 100% correctness:
147--------------------------
148
149The validator achieves perfect, mathematical 'closure' (proof of locking
150correctness) in the sense that for every simple, standalone single-task
151locking sequence that occured at least once during the lifetime of the
152kernel, the validator proves it with a 100% certainty that no
153combination and timing of these locking sequences can cause any class of
154lock related deadlock. [*]
155
156I.e. complex multi-CPU and multi-task locking scenarios do not have to
157occur in practice to prove a deadlock: only the simple 'component'
158locking chains have to occur at least once (anytime, in any
159task/context) for the validator to be able to prove correctness. (For
160example, complex deadlocks that would normally need more than 3 CPUs and
161a very unlikely constellation of tasks, irq-contexts and timings to
162occur, can be detected on a plain, lightly loaded single-CPU system as
163well!)
164
165This radically decreases the complexity of locking related QA of the
166kernel: what has to be done during QA is to trigger as many "simple"
167single-task locking dependencies in the kernel as possible, at least
168once, to prove locking correctness - instead of having to trigger every
169possible combination of locking interaction between CPUs, combined with
170every possible hardirq and softirq nesting scenario (which is impossible
171to do in practice).
172
173[*] assuming that the validator itself is 100% correct, and no other
174 part of the system corrupts the state of the validator in any way.
175 We also assume that all NMI/SMM paths [which could interrupt
176 even hardirq-disabled codepaths] are correct and do not interfere
177 with the validator. We also assume that the 64-bit 'chain hash'
178 value is unique for every lock-chain in the system. Also, lock
179 recursion must not be higher than 20.
180
181Performance:
182------------
183
184The above rules require _massive_ amounts of runtime checking. If we did
185that for every lock taken and for every irqs-enable event, it would
186render the system practically unusably slow. The complexity of checking
187is O(N^2), so even with just a few hundred lock-classes we'd have to do
188tens of thousands of checks for every event.
189
190This problem is solved by checking any given 'locking scenario' (unique
191sequence of locks taken after each other) only once. A simple stack of
192held locks is maintained, and a lightweight 64-bit hash value is
193calculated, which hash is unique for every lock chain. The hash value,
194when the chain is validated for the first time, is then put into a hash
195table, which hash-table can be checked in a lockfree manner. If the
196locking chain occurs again later on, the hash table tells us that we
197dont have to validate the chain again.
diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt
index 217e51768b87..3c62e66e1fcc 100644
--- a/Documentation/powerpc/booting-without-of.txt
+++ b/Documentation/powerpc/booting-without-of.txt
@@ -1436,9 +1436,9 @@ platforms are moved over to use the flattened-device-tree model.
1436 interrupts = <1d 3>; 1436 interrupts = <1d 3>;
1437 interrupt-parent = <40000>; 1437 interrupt-parent = <40000>;
1438 num-channels = <4>; 1438 num-channels = <4>;
1439 channel-fifo-len = <24>; 1439 channel-fifo-len = <18>;
1440 exec-units-mask = <000000fe>; 1440 exec-units-mask = <000000fe>;
1441 descriptor-types-mask = <073f1127>; 1441 descriptor-types-mask = <012b0ebf>;
1442 }; 1442 };
1443 1443
1444 1444
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 86754eb390da..7cee90223d3a 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -28,6 +28,7 @@ Currently, these files are in /proc/sys/vm:
28- block_dump 28- block_dump
29- drop-caches 29- drop-caches
30- zone_reclaim_mode 30- zone_reclaim_mode
31- min_unmapped_ratio
31- panic_on_oom 32- panic_on_oom
32 33
33============================================================== 34==============================================================
@@ -168,6 +169,19 @@ in all nodes of the system.
168 169
169============================================================= 170=============================================================
170 171
172min_unmapped_ratio:
173
174This is available only on NUMA kernels.
175
176A percentage of the file backed pages in each zone. Zone reclaim will only
177occur if more than this percentage of pages are file backed and unmapped.
178This is to insure that a minimal amount of local pages is still available for
179file I/O even if the node is overallocated.
180
181The default is 1 percent.
182
183=============================================================
184
171panic_on_oom 185panic_on_oom
172 186
173This enables or disables panic on out-of-memory feature. If this is set to 1, 187This enables or disables panic on out-of-memory feature. If this is set to 1,
diff --git a/MAINTAINERS b/MAINTAINERS
index 42be131139c8..5f76a4f5cd4b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -861,6 +861,8 @@ S: Maintained
861DOCBOOK FOR DOCUMENTATION 861DOCBOOK FOR DOCUMENTATION
862P: Martin Waitz 862P: Martin Waitz
863M: tali@admingilde.org 863M: tali@admingilde.org
864P: Randy Dunlap
865M: rdunlap@xenotime.net
864T: git http://tali.admingilde.org/git/linux-docbook.git 866T: git http://tali.admingilde.org/git/linux-docbook.git
865S: Maintained 867S: Maintained
866 868
@@ -2298,6 +2300,14 @@ M: promise@pnd-pc.demon.co.uk
2298W: http://www.pnd-pc.demon.co.uk/promise/ 2300W: http://www.pnd-pc.demon.co.uk/promise/
2299S: Maintained 2301S: Maintained
2300 2302
2303PVRUSB2 VIDEO4LINUX DRIVER
2304P: Mike Isely
2305M: isely@pobox.com
2306L: pvrusb2@isely.net
2307L: video4linux-list@redhat.com
2308W: http://www.isely.net/pvrusb2/
2309S: Maintained
2310
2301PXA2xx SUPPORT 2311PXA2xx SUPPORT
2302P: Nicolas Pitre 2312P: Nicolas Pitre
2303M: nico@cam.org 2313M: nico@cam.org
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 01c8c8b23337..41ebf51a107a 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -474,7 +474,7 @@ out:
474 */ 474 */
475 475
476unsigned long 476unsigned long
477thread_saved_pc(task_t *t) 477thread_saved_pc(struct task_struct *t)
478{ 478{
479 unsigned long base = (unsigned long)task_stack_page(t); 479 unsigned long base = (unsigned long)task_stack_page(t);
480 unsigned long fp, sp = task_thread_info(t)->pcb.ksp; 480 unsigned long fp, sp = task_thread_info(t)->pcb.ksp;
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 27d8dddbaa47..daa75ce4b777 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -18,6 +18,14 @@ config GENERIC_TIME
18 bool 18 bool
19 default y 19 default y
20 20
21config LOCKDEP_SUPPORT
22 bool
23 default y
24
25config STACKTRACE_SUPPORT
26 bool
27 default y
28
21config SEMAPHORE_SLEEPERS 29config SEMAPHORE_SLEEPERS
22 bool 30 bool
23 default y 31 default y
diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug
index c92191b1fb67..b31c0802e1cc 100644
--- a/arch/i386/Kconfig.debug
+++ b/arch/i386/Kconfig.debug
@@ -1,5 +1,9 @@
1menu "Kernel hacking" 1menu "Kernel hacking"
2 2
3config TRACE_IRQFLAGS_SUPPORT
4 bool
5 default y
6
3source "lib/Kconfig.debug" 7source "lib/Kconfig.debug"
4 8
5config EARLY_PRINTK 9config EARLY_PRINTK
@@ -31,15 +35,6 @@ config DEBUG_STACK_USAGE
31 35
32 This option will slow down process creation somewhat. 36 This option will slow down process creation somewhat.
33 37
34config STACK_BACKTRACE_COLS
35 int "Stack backtraces per line" if DEBUG_KERNEL
36 range 1 3
37 default 2
38 help
39 Selects how many stack backtrace entries per line to display.
40
41 This can save screen space when displaying traces.
42
43comment "Page alloc debug is incompatible with Software Suspend on i386" 38comment "Page alloc debug is incompatible with Software Suspend on i386"
44 depends on DEBUG_KERNEL && SOFTWARE_SUSPEND 39 depends on DEBUG_KERNEL && SOFTWARE_SUSPEND
45 40
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index cbc1184e9473..1b452a1665c4 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -9,6 +9,7 @@ obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \
9 pci-dma.o i386_ksyms.o i387.o bootflag.o \ 9 pci-dma.o i386_ksyms.o i387.o bootflag.o \
10 quirks.o i8237.o topology.o alternative.o i8253.o tsc.o 10 quirks.o i8237.o topology.o alternative.o i8253.o tsc.o
11 11
12obj-$(CONFIG_STACKTRACE) += stacktrace.o
12obj-y += cpu/ 13obj-y += cpu/
13obj-y += acpi/ 14obj-y += acpi/
14obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o 15obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o
diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c
index 7b421b3a053e..28ab80649764 100644
--- a/arch/i386/kernel/alternative.c
+++ b/arch/i386/kernel/alternative.c
@@ -303,6 +303,16 @@ void alternatives_smp_switch(int smp)
303 struct smp_alt_module *mod; 303 struct smp_alt_module *mod;
304 unsigned long flags; 304 unsigned long flags;
305 305
306#ifdef CONFIG_LOCKDEP
307 /*
308 * A not yet fixed binutils section handling bug prevents
309 * alternatives-replacement from working reliably, so turn
310 * it off:
311 */
312 printk("lockdep: not fixing up alternatives.\n");
313 return;
314#endif
315
306 if (no_replacement || smp_alt_once) 316 if (no_replacement || smp_alt_once)
307 return; 317 return;
308 BUG_ON(!smp && (num_online_cpus() > 1)); 318 BUG_ON(!smp && (num_online_cpus() > 1));
diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c
index a8d3ecdc3897..fde8bea85cee 100644
--- a/arch/i386/kernel/cpuid.c
+++ b/arch/i386/kernel/cpuid.c
@@ -167,6 +167,7 @@ static int cpuid_class_device_create(int i)
167 return err; 167 return err;
168} 168}
169 169
170#ifdef CONFIG_HOTPLUG_CPU
170static int cpuid_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) 171static int cpuid_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
171{ 172{
172 unsigned int cpu = (unsigned long)hcpu; 173 unsigned int cpu = (unsigned long)hcpu;
@@ -186,6 +187,7 @@ static struct notifier_block __cpuinitdata cpuid_class_cpu_notifier =
186{ 187{
187 .notifier_call = cpuid_class_cpu_callback, 188 .notifier_call = cpuid_class_cpu_callback,
188}; 189};
190#endif /* !CONFIG_HOTPLUG_CPU */
189 191
190static int __init cpuid_init(void) 192static int __init cpuid_init(void)
191{ 193{
@@ -208,7 +210,7 @@ static int __init cpuid_init(void)
208 if (err != 0) 210 if (err != 0)
209 goto out_class; 211 goto out_class;
210 } 212 }
211 register_cpu_notifier(&cpuid_class_cpu_notifier); 213 register_hotcpu_notifier(&cpuid_class_cpu_notifier);
212 214
213 err = 0; 215 err = 0;
214 goto out; 216 goto out;
@@ -233,7 +235,7 @@ static void __exit cpuid_exit(void)
233 class_device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); 235 class_device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu));
234 class_destroy(cpuid_class); 236 class_destroy(cpuid_class);
235 unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); 237 unregister_chrdev(CPUID_MAJOR, "cpu/cpuid");
236 unregister_cpu_notifier(&cpuid_class_cpu_notifier); 238 unregister_hotcpu_notifier(&cpuid_class_cpu_notifier);
237} 239}
238 240
239module_init(cpuid_init); 241module_init(cpuid_init);
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 787190c45fdb..d9a260f2efb4 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -42,6 +42,7 @@
42 42
43#include <linux/linkage.h> 43#include <linux/linkage.h>
44#include <asm/thread_info.h> 44#include <asm/thread_info.h>
45#include <asm/irqflags.h>
45#include <asm/errno.h> 46#include <asm/errno.h>
46#include <asm/segment.h> 47#include <asm/segment.h>
47#include <asm/smp.h> 48#include <asm/smp.h>
@@ -76,12 +77,21 @@ NT_MASK = 0x00004000
76VM_MASK = 0x00020000 77VM_MASK = 0x00020000
77 78
78#ifdef CONFIG_PREEMPT 79#ifdef CONFIG_PREEMPT
79#define preempt_stop cli 80#define preempt_stop cli; TRACE_IRQS_OFF
80#else 81#else
81#define preempt_stop 82#define preempt_stop
82#define resume_kernel restore_nocheck 83#define resume_kernel restore_nocheck
83#endif 84#endif
84 85
86.macro TRACE_IRQS_IRET
87#ifdef CONFIG_TRACE_IRQFLAGS
88 testl $IF_MASK,EFLAGS(%esp) # interrupts off?
89 jz 1f
90 TRACE_IRQS_ON
911:
92#endif
93.endm
94
85#ifdef CONFIG_VM86 95#ifdef CONFIG_VM86
86#define resume_userspace_sig check_userspace 96#define resume_userspace_sig check_userspace
87#else 97#else
@@ -257,6 +267,10 @@ ENTRY(sysenter_entry)
257 CFI_REGISTER esp, ebp 267 CFI_REGISTER esp, ebp
258 movl TSS_sysenter_esp0(%esp),%esp 268 movl TSS_sysenter_esp0(%esp),%esp
259sysenter_past_esp: 269sysenter_past_esp:
270 /*
271 * No need to follow this irqs on/off section: the syscall
272 * disabled irqs and here we enable it straight after entry:
273 */
260 sti 274 sti
261 pushl $(__USER_DS) 275 pushl $(__USER_DS)
262 CFI_ADJUST_CFA_OFFSET 4 276 CFI_ADJUST_CFA_OFFSET 4
@@ -303,6 +317,7 @@ sysenter_past_esp:
303 call *sys_call_table(,%eax,4) 317 call *sys_call_table(,%eax,4)
304 movl %eax,EAX(%esp) 318 movl %eax,EAX(%esp)
305 cli 319 cli
320 TRACE_IRQS_OFF
306 movl TI_flags(%ebp), %ecx 321 movl TI_flags(%ebp), %ecx
307 testw $_TIF_ALLWORK_MASK, %cx 322 testw $_TIF_ALLWORK_MASK, %cx
308 jne syscall_exit_work 323 jne syscall_exit_work
@@ -310,6 +325,7 @@ sysenter_past_esp:
310 movl EIP(%esp), %edx 325 movl EIP(%esp), %edx
311 movl OLDESP(%esp), %ecx 326 movl OLDESP(%esp), %ecx
312 xorl %ebp,%ebp 327 xorl %ebp,%ebp
328 TRACE_IRQS_ON
313 sti 329 sti
314 sysexit 330 sysexit
315 CFI_ENDPROC 331 CFI_ENDPROC
@@ -339,6 +355,7 @@ syscall_exit:
339 cli # make sure we don't miss an interrupt 355 cli # make sure we don't miss an interrupt
340 # setting need_resched or sigpending 356 # setting need_resched or sigpending
341 # between sampling and the iret 357 # between sampling and the iret
358 TRACE_IRQS_OFF
342 movl TI_flags(%ebp), %ecx 359 movl TI_flags(%ebp), %ecx
343 testw $_TIF_ALLWORK_MASK, %cx # current->work 360 testw $_TIF_ALLWORK_MASK, %cx # current->work
344 jne syscall_exit_work 361 jne syscall_exit_work
@@ -355,12 +372,15 @@ restore_all:
355 CFI_REMEMBER_STATE 372 CFI_REMEMBER_STATE
356 je ldt_ss # returning to user-space with LDT SS 373 je ldt_ss # returning to user-space with LDT SS
357restore_nocheck: 374restore_nocheck:
375 TRACE_IRQS_IRET
376restore_nocheck_notrace:
358 RESTORE_REGS 377 RESTORE_REGS
359 addl $4, %esp 378 addl $4, %esp
360 CFI_ADJUST_CFA_OFFSET -4 379 CFI_ADJUST_CFA_OFFSET -4
3611: iret 3801: iret
362.section .fixup,"ax" 381.section .fixup,"ax"
363iret_exc: 382iret_exc:
383 TRACE_IRQS_ON
364 sti 384 sti
365 pushl $0 # no error code 385 pushl $0 # no error code
366 pushl $do_iret_error 386 pushl $do_iret_error
@@ -386,11 +406,13 @@ ldt_ss:
386 subl $8, %esp # reserve space for switch16 pointer 406 subl $8, %esp # reserve space for switch16 pointer
387 CFI_ADJUST_CFA_OFFSET 8 407 CFI_ADJUST_CFA_OFFSET 8
388 cli 408 cli
409 TRACE_IRQS_OFF
389 movl %esp, %eax 410 movl %esp, %eax
390 /* Set up the 16bit stack frame with switch32 pointer on top, 411 /* Set up the 16bit stack frame with switch32 pointer on top,
391 * and a switch16 pointer on top of the current frame. */ 412 * and a switch16 pointer on top of the current frame. */
392 call setup_x86_bogus_stack 413 call setup_x86_bogus_stack
393 CFI_ADJUST_CFA_OFFSET -8 # frame has moved 414 CFI_ADJUST_CFA_OFFSET -8 # frame has moved
415 TRACE_IRQS_IRET
394 RESTORE_REGS 416 RESTORE_REGS
395 lss 20+4(%esp), %esp # switch to 16bit stack 417 lss 20+4(%esp), %esp # switch to 16bit stack
3961: iret 4181: iret
@@ -411,6 +433,7 @@ work_resched:
411 cli # make sure we don't miss an interrupt 433 cli # make sure we don't miss an interrupt
412 # setting need_resched or sigpending 434 # setting need_resched or sigpending
413 # between sampling and the iret 435 # between sampling and the iret
436 TRACE_IRQS_OFF
414 movl TI_flags(%ebp), %ecx 437 movl TI_flags(%ebp), %ecx
415 andl $_TIF_WORK_MASK, %ecx # is there any work to be done other 438 andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
416 # than syscall tracing? 439 # than syscall tracing?
@@ -462,6 +485,7 @@ syscall_trace_entry:
462syscall_exit_work: 485syscall_exit_work:
463 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl 486 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
464 jz work_pending 487 jz work_pending
488 TRACE_IRQS_ON
465 sti # could let do_syscall_trace() call 489 sti # could let do_syscall_trace() call
466 # schedule() instead 490 # schedule() instead
467 movl %esp, %eax 491 movl %esp, %eax
@@ -535,9 +559,14 @@ ENTRY(irq_entries_start)
535vector=vector+1 559vector=vector+1
536.endr 560.endr
537 561
562/*
563 * the CPU automatically disables interrupts when executing an IRQ vector,
564 * so IRQ-flags tracing has to follow that:
565 */
538 ALIGN 566 ALIGN
539common_interrupt: 567common_interrupt:
540 SAVE_ALL 568 SAVE_ALL
569 TRACE_IRQS_OFF
541 movl %esp,%eax 570 movl %esp,%eax
542 call do_IRQ 571 call do_IRQ
543 jmp ret_from_intr 572 jmp ret_from_intr
@@ -549,9 +578,10 @@ ENTRY(name) \
549 pushl $~(nr); \ 578 pushl $~(nr); \
550 CFI_ADJUST_CFA_OFFSET 4; \ 579 CFI_ADJUST_CFA_OFFSET 4; \
551 SAVE_ALL; \ 580 SAVE_ALL; \
581 TRACE_IRQS_OFF \
552 movl %esp,%eax; \ 582 movl %esp,%eax; \
553 call smp_/**/name; \ 583 call smp_/**/name; \
554 jmp ret_from_intr; \ 584 jmp ret_from_intr; \
555 CFI_ENDPROC 585 CFI_ENDPROC
556 586
557/* The include is where all of the SMP etc. interrupts come from */ 587/* The include is where all of the SMP etc. interrupts come from */
@@ -726,7 +756,7 @@ nmi_stack_correct:
726 xorl %edx,%edx # zero error code 756 xorl %edx,%edx # zero error code
727 movl %esp,%eax # pt_regs pointer 757 movl %esp,%eax # pt_regs pointer
728 call do_nmi 758 call do_nmi
729 jmp restore_all 759 jmp restore_nocheck_notrace
730 CFI_ENDPROC 760 CFI_ENDPROC
731 761
732nmi_stack_fixup: 762nmi_stack_fixup:
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 16b491703967..6cb529f60dcc 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -166,7 +166,7 @@ void irq_ctx_init(int cpu)
166 irqctx->tinfo.task = NULL; 166 irqctx->tinfo.task = NULL;
167 irqctx->tinfo.exec_domain = NULL; 167 irqctx->tinfo.exec_domain = NULL;
168 irqctx->tinfo.cpu = cpu; 168 irqctx->tinfo.cpu = cpu;
169 irqctx->tinfo.preempt_count = SOFTIRQ_OFFSET; 169 irqctx->tinfo.preempt_count = 0;
170 irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); 170 irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
171 171
172 softirq_ctx[cpu] = irqctx; 172 softirq_ctx[cpu] = irqctx;
@@ -211,6 +211,10 @@ asmlinkage void do_softirq(void)
211 : "0"(isp) 211 : "0"(isp)
212 : "memory", "cc", "edx", "ecx", "eax" 212 : "memory", "cc", "edx", "ecx", "eax"
213 ); 213 );
214 /*
215 * Shouldnt happen, we returned above if in_interrupt():
216 */
217 WARN_ON_ONCE(softirq_count());
214 } 218 }
215 219
216 local_irq_restore(flags); 220 local_irq_restore(flags);
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index a76e93146585..2dd928a84645 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -107,7 +107,7 @@ int nmi_active;
107static __init void nmi_cpu_busy(void *data) 107static __init void nmi_cpu_busy(void *data)
108{ 108{
109 volatile int *endflag = data; 109 volatile int *endflag = data;
110 local_irq_enable(); 110 local_irq_enable_in_hardirq();
111 /* Intentionally don't use cpu_relax here. This is 111 /* Intentionally don't use cpu_relax here. This is
112 to make sure that the performance counter really ticks, 112 to make sure that the performance counter really ticks,
113 even if there is a simulator or similar that catches the 113 even if there is a simulator or similar that catches the
diff --git a/arch/i386/kernel/stacktrace.c b/arch/i386/kernel/stacktrace.c
new file mode 100644
index 000000000000..e62a037ab399
--- /dev/null
+++ b/arch/i386/kernel/stacktrace.c
@@ -0,0 +1,98 @@
1/*
2 * arch/i386/kernel/stacktrace.c
3 *
4 * Stack trace management functions
5 *
6 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7 */
8#include <linux/sched.h>
9#include <linux/stacktrace.h>
10
11static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
12{
13 return p > (void *)tinfo &&
14 p < (void *)tinfo + THREAD_SIZE - 3;
15}
16
17/*
18 * Save stack-backtrace addresses into a stack_trace buffer:
19 */
20static inline unsigned long
21save_context_stack(struct stack_trace *trace, unsigned int skip,
22 struct thread_info *tinfo, unsigned long *stack,
23 unsigned long ebp)
24{
25 unsigned long addr;
26
27#ifdef CONFIG_FRAME_POINTER
28 while (valid_stack_ptr(tinfo, (void *)ebp)) {
29 addr = *(unsigned long *)(ebp + 4);
30 if (!skip)
31 trace->entries[trace->nr_entries++] = addr;
32 else
33 skip--;
34 if (trace->nr_entries >= trace->max_entries)
35 break;
36 /*
37 * break out of recursive entries (such as
38 * end_of_stack_stop_unwind_function):
39 */
40 if (ebp == *(unsigned long *)ebp)
41 break;
42
43 ebp = *(unsigned long *)ebp;
44 }
45#else
46 while (valid_stack_ptr(tinfo, stack)) {
47 addr = *stack++;
48 if (__kernel_text_address(addr)) {
49 if (!skip)
50 trace->entries[trace->nr_entries++] = addr;
51 else
52 skip--;
53 if (trace->nr_entries >= trace->max_entries)
54 break;
55 }
56 }
57#endif
58
59 return ebp;
60}
61
62/*
63 * Save stack-backtrace addresses into a stack_trace buffer.
64 * If all_contexts is set, all contexts (hardirq, softirq and process)
65 * are saved. If not set then only the current context is saved.
66 */
67void save_stack_trace(struct stack_trace *trace,
68 struct task_struct *task, int all_contexts,
69 unsigned int skip)
70{
71 unsigned long ebp;
72 unsigned long *stack = &ebp;
73
74 WARN_ON(trace->nr_entries || !trace->max_entries);
75
76 if (!task || task == current) {
77 /* Grab ebp right from our regs: */
78 asm ("movl %%ebp, %0" : "=r" (ebp));
79 } else {
80 /* ebp is the last reg pushed by switch_to(): */
81 ebp = *(unsigned long *) task->thread.esp;
82 }
83
84 while (1) {
85 struct thread_info *context = (struct thread_info *)
86 ((unsigned long)stack & (~(THREAD_SIZE - 1)));
87
88 ebp = save_context_stack(trace, skip, context, stack, ebp);
89 stack = (unsigned long *)context->previous_esp;
90 if (!all_contexts || !stack ||
91 trace->nr_entries >= trace->max_entries)
92 break;
93 trace->entries[trace->nr_entries++] = ULONG_MAX;
94 if (trace->nr_entries >= trace->max_entries)
95 break;
96 }
97}
98
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index e8c6086b2aa1..2bf8b55b91f8 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -115,28 +115,13 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
115} 115}
116 116
117/* 117/*
118 * Print CONFIG_STACK_BACKTRACE_COLS address/symbol entries per line. 118 * Print one address/symbol entries per line.
119 */ 119 */
120static inline int print_addr_and_symbol(unsigned long addr, char *log_lvl, 120static inline void print_addr_and_symbol(unsigned long addr, char *log_lvl)
121 int printed)
122{ 121{
123 if (!printed)
124 printk(log_lvl);
125
126#if CONFIG_STACK_BACKTRACE_COLS == 1
127 printk(" [<%08lx>] ", addr); 122 printk(" [<%08lx>] ", addr);
128#else
129 printk(" <%08lx> ", addr);
130#endif
131 print_symbol("%s", addr);
132 123
133 printed = (printed + 1) % CONFIG_STACK_BACKTRACE_COLS; 124 print_symbol("%s\n", addr);
134 if (printed)
135 printk(" ");
136 else
137 printk("\n");
138
139 return printed;
140} 125}
141 126
142static inline unsigned long print_context_stack(struct thread_info *tinfo, 127static inline unsigned long print_context_stack(struct thread_info *tinfo,
@@ -144,12 +129,11 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
144 char *log_lvl) 129 char *log_lvl)
145{ 130{
146 unsigned long addr; 131 unsigned long addr;
147 int printed = 0; /* nr of entries already printed on current line */
148 132
149#ifdef CONFIG_FRAME_POINTER 133#ifdef CONFIG_FRAME_POINTER
150 while (valid_stack_ptr(tinfo, (void *)ebp)) { 134 while (valid_stack_ptr(tinfo, (void *)ebp)) {
151 addr = *(unsigned long *)(ebp + 4); 135 addr = *(unsigned long *)(ebp + 4);
152 printed = print_addr_and_symbol(addr, log_lvl, printed); 136 print_addr_and_symbol(addr, log_lvl);
153 /* 137 /*
154 * break out of recursive entries (such as 138 * break out of recursive entries (such as
155 * end_of_stack_stop_unwind_function): 139 * end_of_stack_stop_unwind_function):
@@ -162,28 +146,23 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
162 while (valid_stack_ptr(tinfo, stack)) { 146 while (valid_stack_ptr(tinfo, stack)) {
163 addr = *stack++; 147 addr = *stack++;
164 if (__kernel_text_address(addr)) 148 if (__kernel_text_address(addr))
165 printed = print_addr_and_symbol(addr, log_lvl, printed); 149 print_addr_and_symbol(addr, log_lvl);
166 } 150 }
167#endif 151#endif
168 if (printed)
169 printk("\n");
170
171 return ebp; 152 return ebp;
172} 153}
173 154
174static asmlinkage int show_trace_unwind(struct unwind_frame_info *info, void *log_lvl) 155static asmlinkage int
156show_trace_unwind(struct unwind_frame_info *info, void *log_lvl)
175{ 157{
176 int n = 0; 158 int n = 0;
177 int printed = 0; /* nr of entries already printed on current line */
178 159
179 while (unwind(info) == 0 && UNW_PC(info)) { 160 while (unwind(info) == 0 && UNW_PC(info)) {
180 ++n; 161 n++;
181 printed = print_addr_and_symbol(UNW_PC(info), log_lvl, printed); 162 print_addr_and_symbol(UNW_PC(info), log_lvl);
182 if (arch_unw_user_mode(info)) 163 if (arch_unw_user_mode(info))
183 break; 164 break;
184 } 165 }
185 if (printed)
186 printk("\n");
187 return n; 166 return n;
188} 167}
189 168
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index eb8e8dc5ac8e..2fbe4536fe18 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -678,7 +678,7 @@ copy_reg(const u64 *fr, u64 fnat, u64 *tr, u64 *tnat)
678 */ 678 */
679 679
680static void 680static void
681ia64_mca_modify_comm(const task_t *previous_current) 681ia64_mca_modify_comm(const struct task_struct *previous_current)
682{ 682{
683 char *p, comm[sizeof(current->comm)]; 683 char *p, comm[sizeof(current->comm)];
684 if (previous_current->pid) 684 if (previous_current->pid)
@@ -709,7 +709,7 @@ ia64_mca_modify_comm(const task_t *previous_current)
709 * that we can do backtrace on the MCA/INIT handler code itself. 709 * that we can do backtrace on the MCA/INIT handler code itself.
710 */ 710 */
711 711
712static task_t * 712static struct task_struct *
713ia64_mca_modify_original_stack(struct pt_regs *regs, 713ia64_mca_modify_original_stack(struct pt_regs *regs,
714 const struct switch_stack *sw, 714 const struct switch_stack *sw,
715 struct ia64_sal_os_state *sos, 715 struct ia64_sal_os_state *sos,
@@ -719,7 +719,7 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
719 ia64_va va; 719 ia64_va va;
720 extern char ia64_leave_kernel[]; /* Need asm address, not function descriptor */ 720 extern char ia64_leave_kernel[]; /* Need asm address, not function descriptor */
721 const pal_min_state_area_t *ms = sos->pal_min_state; 721 const pal_min_state_area_t *ms = sos->pal_min_state;
722 task_t *previous_current; 722 struct task_struct *previous_current;
723 struct pt_regs *old_regs; 723 struct pt_regs *old_regs;
724 struct switch_stack *old_sw; 724 struct switch_stack *old_sw;
725 unsigned size = sizeof(struct pt_regs) + 725 unsigned size = sizeof(struct pt_regs) +
@@ -1023,7 +1023,7 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
1023 pal_processor_state_info_t *psp = (pal_processor_state_info_t *) 1023 pal_processor_state_info_t *psp = (pal_processor_state_info_t *)
1024 &sos->proc_state_param; 1024 &sos->proc_state_param;
1025 int recover, cpu = smp_processor_id(); 1025 int recover, cpu = smp_processor_id();
1026 task_t *previous_current; 1026 struct task_struct *previous_current;
1027 struct ia64_mca_notify_die nd = 1027 struct ia64_mca_notify_die nd =
1028 { .sos = sos, .monarch_cpu = &monarch_cpu }; 1028 { .sos = sos, .monarch_cpu = &monarch_cpu };
1029 1029
@@ -1352,7 +1352,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
1352{ 1352{
1353 static atomic_t slaves; 1353 static atomic_t slaves;
1354 static atomic_t monarchs; 1354 static atomic_t monarchs;
1355 task_t *previous_current; 1355 struct task_struct *previous_current;
1356 int cpu = smp_processor_id(); 1356 int cpu = smp_processor_id();
1357 struct ia64_mca_notify_die nd = 1357 struct ia64_mca_notify_die nd =
1358 { .sos = sos, .monarch_cpu = &monarch_cpu }; 1358 { .sos = sos, .monarch_cpu = &monarch_cpu };
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index e1960979be29..6203ed4ec8cf 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -124,7 +124,7 @@ extern void __devinit calibrate_delay (void);
124extern void start_ap (void); 124extern void start_ap (void);
125extern unsigned long ia64_iobase; 125extern unsigned long ia64_iobase;
126 126
127task_t *task_for_booting_cpu; 127struct task_struct *task_for_booting_cpu;
128 128
129/* 129/*
130 * State for each CPU 130 * State for each CPU
diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index ecfd637d702a..01e7fa86aa43 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S
@@ -65,7 +65,7 @@ need_resched:
65#endif 65#endif
66 66
67FEXPORT(ret_from_fork) 67FEXPORT(ret_from_fork)
68 jal schedule_tail # a0 = task_t *prev 68 jal schedule_tail # a0 = struct task_struct *prev
69 69
70FEXPORT(syscall_exit) 70FEXPORT(syscall_exit)
71 local_irq_disable # make sure need_resched and 71 local_irq_disable # make sure need_resched and
diff --git a/arch/mips/kernel/mips-mt.c b/arch/mips/kernel/mips-mt.c
index 02237a685ec7..4dcc39f42951 100644
--- a/arch/mips/kernel/mips-mt.c
+++ b/arch/mips/kernel/mips-mt.c
@@ -47,7 +47,7 @@ unsigned long mt_fpemul_threshold = 0;
47 * used in sys_sched_set/getaffinity() in kernel/sched.c, so 47 * used in sys_sched_set/getaffinity() in kernel/sched.c, so
48 * cloned here. 48 * cloned here.
49 */ 49 */
50static inline task_t *find_process_by_pid(pid_t pid) 50static inline struct task_struct *find_process_by_pid(pid_t pid)
51{ 51{
52 return pid ? find_task_by_pid(pid) : current; 52 return pid ? find_task_by_pid(pid) : current;
53} 53}
@@ -62,7 +62,7 @@ asmlinkage long mipsmt_sys_sched_setaffinity(pid_t pid, unsigned int len,
62 cpumask_t new_mask; 62 cpumask_t new_mask;
63 cpumask_t effective_mask; 63 cpumask_t effective_mask;
64 int retval; 64 int retval;
65 task_t *p; 65 struct task_struct *p;
66 66
67 if (len < sizeof(new_mask)) 67 if (len < sizeof(new_mask))
68 return -EINVAL; 68 return -EINVAL;
@@ -127,7 +127,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len,
127 unsigned int real_len; 127 unsigned int real_len;
128 cpumask_t mask; 128 cpumask_t mask;
129 int retval; 129 int retval;
130 task_t *p; 130 struct task_struct *p;
131 131
132 real_len = sizeof(mask); 132 real_len = sizeof(mask);
133 if (len < real_len) 133 if (len < real_len)
diff --git a/arch/powerpc/configs/chrp32_defconfig b/arch/powerpc/configs/chrp32_defconfig
new file mode 100644
index 000000000000..0fa010a63a8e
--- /dev/null
+++ b/arch/powerpc/configs/chrp32_defconfig
@@ -0,0 +1,1378 @@
1#
2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.17
4# Mon Jul 3 12:08:41 2006
5#
6# CONFIG_PPC64 is not set
7CONFIG_PPC32=y
8CONFIG_PPC_MERGE=y
9CONFIG_MMU=y
10CONFIG_GENERIC_HARDIRQS=y
11CONFIG_RWSEM_XCHGADD_ALGORITHM=y
12CONFIG_GENERIC_HWEIGHT=y
13CONFIG_GENERIC_CALIBRATE_DELAY=y
14CONFIG_GENERIC_FIND_NEXT_BIT=y
15CONFIG_PPC=y
16CONFIG_EARLY_PRINTK=y
17CONFIG_GENERIC_NVRAM=y
18CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
19CONFIG_ARCH_MAY_HAVE_PC_FDC=y
20CONFIG_PPC_OF=y
21CONFIG_PPC_UDBG_16550=y
22CONFIG_GENERIC_TBSYNC=y
23# CONFIG_DEFAULT_UIMAGE is not set
24
25#
26# Processor support
27#
28CONFIG_CLASSIC32=y
29# CONFIG_PPC_52xx is not set
30# CONFIG_PPC_82xx is not set
31# CONFIG_PPC_83xx is not set
32# CONFIG_PPC_85xx is not set
33# CONFIG_PPC_86xx is not set
34# CONFIG_40x is not set
35# CONFIG_44x is not set
36# CONFIG_8xx is not set
37# CONFIG_E200 is not set
38CONFIG_6xx=y
39CONFIG_PPC_FPU=y
40# CONFIG_ALTIVEC is not set
41CONFIG_PPC_STD_MMU=y
42CONFIG_PPC_STD_MMU_32=y
43CONFIG_SMP=y
44CONFIG_NR_CPUS=4
45
46#
47# Code maturity level options
48#
49CONFIG_EXPERIMENTAL=y
50CONFIG_LOCK_KERNEL=y
51CONFIG_INIT_ENV_ARG_LIMIT=32
52
53#
54# General setup
55#
56CONFIG_LOCALVERSION=""
57# CONFIG_LOCALVERSION_AUTO is not set
58CONFIG_SWAP=y
59CONFIG_SYSVIPC=y
60CONFIG_POSIX_MQUEUE=y
61# CONFIG_BSD_PROCESS_ACCT is not set
62CONFIG_SYSCTL=y
63# CONFIG_AUDIT is not set
64CONFIG_IKCONFIG=y
65CONFIG_IKCONFIG_PROC=y
66# CONFIG_CPUSETS is not set
67# CONFIG_RELAY is not set
68CONFIG_INITRAMFS_SOURCE=""
69# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
70# CONFIG_EMBEDDED is not set
71CONFIG_KALLSYMS=y
72# CONFIG_KALLSYMS_ALL is not set
73# CONFIG_KALLSYMS_EXTRA_PASS is not set
74CONFIG_HOTPLUG=y
75CONFIG_PRINTK=y
76CONFIG_BUG=y
77CONFIG_ELF_CORE=y
78CONFIG_BASE_FULL=y
79CONFIG_FUTEX=y
80CONFIG_EPOLL=y
81CONFIG_SHMEM=y
82CONFIG_SLAB=y
83# CONFIG_TINY_SHMEM is not set
84CONFIG_BASE_SMALL=0
85# CONFIG_SLOB is not set
86
87#
88# Loadable module support
89#
90CONFIG_MODULES=y
91CONFIG_MODULE_UNLOAD=y
92CONFIG_MODULE_FORCE_UNLOAD=y
93# CONFIG_MODVERSIONS is not set
94# CONFIG_MODULE_SRCVERSION_ALL is not set
95CONFIG_KMOD=y
96CONFIG_STOP_MACHINE=y
97
98#
99# Block layer
100#
101CONFIG_LBD=y
102# CONFIG_BLK_DEV_IO_TRACE is not set
103# CONFIG_LSF is not set
104
105#
106# IO Schedulers
107#
108CONFIG_IOSCHED_NOOP=y
109CONFIG_IOSCHED_AS=y
110CONFIG_IOSCHED_DEADLINE=y
111CONFIG_IOSCHED_CFQ=y
112CONFIG_DEFAULT_AS=y
113# CONFIG_DEFAULT_DEADLINE is not set
114# CONFIG_DEFAULT_CFQ is not set
115# CONFIG_DEFAULT_NOOP is not set
116CONFIG_DEFAULT_IOSCHED="anticipatory"
117
118#
119# Platform support
120#
121CONFIG_PPC_MULTIPLATFORM=y
122# CONFIG_PPC_ISERIES is not set
123# CONFIG_EMBEDDED6xx is not set
124# CONFIG_APUS is not set
125CONFIG_PPC_CHRP=y
126# CONFIG_PPC_PMAC is not set
127# CONFIG_PPC_CELL is not set
128# CONFIG_PPC_CELL_NATIVE is not set
129CONFIG_MPIC=y
130CONFIG_PPC_RTAS=y
131# CONFIG_RTAS_ERROR_LOGGING is not set
132CONFIG_RTAS_PROC=y
133# CONFIG_MMIO_NVRAM is not set
134CONFIG_PPC_MPC106=y
135# CONFIG_PPC_970_NAP is not set
136# CONFIG_CPU_FREQ is not set
137# CONFIG_TAU is not set
138# CONFIG_WANT_EARLY_SERIAL is not set
139
140#
141# Kernel options
142#
143CONFIG_HIGHMEM=y
144# CONFIG_HZ_100 is not set
145CONFIG_HZ_250=y
146# CONFIG_HZ_1000 is not set
147CONFIG_HZ=250
148CONFIG_PREEMPT_NONE=y
149# CONFIG_PREEMPT_VOLUNTARY is not set
150# CONFIG_PREEMPT is not set
151CONFIG_PREEMPT_BKL=y
152CONFIG_BINFMT_ELF=y
153CONFIG_BINFMT_MISC=y
154# CONFIG_KEXEC is not set
155CONFIG_IRQ_ALL_CPUS=y
156CONFIG_ARCH_FLATMEM_ENABLE=y
157CONFIG_SELECT_MEMORY_MODEL=y
158CONFIG_FLATMEM_MANUAL=y
159# CONFIG_DISCONTIGMEM_MANUAL is not set
160# CONFIG_SPARSEMEM_MANUAL is not set
161CONFIG_FLATMEM=y
162CONFIG_FLAT_NODE_MEM_MAP=y
163# CONFIG_SPARSEMEM_STATIC is not set
164CONFIG_SPLIT_PTLOCK_CPUS=4
165CONFIG_PROC_DEVICETREE=y
166# CONFIG_CMDLINE_BOOL is not set
167# CONFIG_PM is not set
168CONFIG_SECCOMP=y
169CONFIG_ISA_DMA_API=y
170
171#
172# Bus options
173#
174CONFIG_ISA=y
175CONFIG_GENERIC_ISA_DMA=y
176CONFIG_PPC_I8259=y
177CONFIG_PPC_INDIRECT_PCI=y
178CONFIG_PCI=y
179CONFIG_PCI_DOMAINS=y
180# CONFIG_PCIEPORTBUS is not set
181# CONFIG_PCI_DEBUG is not set
182
183#
184# PCCARD (PCMCIA/CardBus) support
185#
186# CONFIG_PCCARD is not set
187
188#
189# PCI Hotplug Support
190#
191# CONFIG_HOTPLUG_PCI is not set
192
193#
194# Advanced setup
195#
196# CONFIG_ADVANCED_OPTIONS is not set
197
198#
199# Default settings for advanced configuration options are used
200#
201CONFIG_HIGHMEM_START=0xfe000000
202CONFIG_LOWMEM_SIZE=0x30000000
203CONFIG_KERNEL_START=0xc0000000
204CONFIG_TASK_SIZE=0x80000000
205CONFIG_BOOT_LOAD=0x00800000
206
207#
208# Networking
209#
210CONFIG_NET=y
211
212#
213# Networking options
214#
215# CONFIG_NETDEBUG is not set
216CONFIG_PACKET=y
217# CONFIG_PACKET_MMAP is not set
218CONFIG_UNIX=y
219# CONFIG_NET_KEY is not set
220CONFIG_INET=y
221CONFIG_IP_MULTICAST=y
222# CONFIG_IP_ADVANCED_ROUTER is not set
223CONFIG_IP_FIB_HASH=y
224# CONFIG_IP_PNP is not set
225# CONFIG_NET_IPIP is not set
226# CONFIG_NET_IPGRE is not set
227# CONFIG_IP_MROUTE is not set
228# CONFIG_ARPD is not set
229CONFIG_SYN_COOKIES=y
230# CONFIG_INET_AH is not set
231# CONFIG_INET_ESP is not set
232# CONFIG_INET_IPCOMP is not set
233# CONFIG_INET_XFRM_TUNNEL is not set
234# CONFIG_INET_TUNNEL is not set
235# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
236# CONFIG_INET_XFRM_MODE_TUNNEL is not set
237CONFIG_INET_DIAG=y
238CONFIG_INET_TCP_DIAG=y
239# CONFIG_TCP_CONG_ADVANCED is not set
240CONFIG_TCP_CONG_BIC=y
241
242#
243# IP: Virtual Server Configuration
244#
245# CONFIG_IP_VS is not set
246# CONFIG_IPV6 is not set
247# CONFIG_INET6_XFRM_TUNNEL is not set
248# CONFIG_INET6_TUNNEL is not set
249# CONFIG_NETWORK_SECMARK is not set
250CONFIG_NETFILTER=y
251# CONFIG_NETFILTER_DEBUG is not set
252
253#
254# Core Netfilter Configuration
255#
256# CONFIG_NETFILTER_NETLINK is not set
257# CONFIG_NETFILTER_XTABLES is not set
258
259#
260# IP: Netfilter Configuration
261#
262CONFIG_IP_NF_CONNTRACK=m
263# CONFIG_IP_NF_CT_ACCT is not set
264# CONFIG_IP_NF_CONNTRACK_MARK is not set
265# CONFIG_IP_NF_CONNTRACK_EVENTS is not set
266# CONFIG_IP_NF_CT_PROTO_SCTP is not set
267CONFIG_IP_NF_FTP=m
268CONFIG_IP_NF_IRC=m
269# CONFIG_IP_NF_NETBIOS_NS is not set
270CONFIG_IP_NF_TFTP=m
271CONFIG_IP_NF_AMANDA=m
272# CONFIG_IP_NF_PPTP is not set
273# CONFIG_IP_NF_H323 is not set
274# CONFIG_IP_NF_SIP is not set
275# CONFIG_IP_NF_QUEUE is not set
276
277#
278# DCCP Configuration (EXPERIMENTAL)
279#
280# CONFIG_IP_DCCP is not set
281
282#
283# SCTP Configuration (EXPERIMENTAL)
284#
285# CONFIG_IP_SCTP is not set
286
287#
288# TIPC Configuration (EXPERIMENTAL)
289#
290# CONFIG_TIPC is not set
291# CONFIG_ATM is not set
292# CONFIG_BRIDGE is not set
293# CONFIG_VLAN_8021Q is not set
294# CONFIG_DECNET is not set
295# CONFIG_LLC2 is not set
296# CONFIG_IPX is not set
297# CONFIG_ATALK is not set
298# CONFIG_X25 is not set
299# CONFIG_LAPB is not set
300# CONFIG_NET_DIVERT is not set
301# CONFIG_ECONET is not set
302# CONFIG_WAN_ROUTER is not set
303
304#
305# QoS and/or fair queueing
306#
307# CONFIG_NET_SCHED is not set
308
309#
310# Network testing
311#
312# CONFIG_NET_PKTGEN is not set
313# CONFIG_HAMRADIO is not set
314# CONFIG_IRDA is not set
315# CONFIG_BT is not set
316# CONFIG_IEEE80211 is not set
317
318#
319# Device Drivers
320#
321
322#
323# Generic Driver Options
324#
325# CONFIG_STANDALONE is not set
326CONFIG_PREVENT_FIRMWARE_BUILD=y
327# CONFIG_FW_LOADER is not set
328# CONFIG_DEBUG_DRIVER is not set
329# CONFIG_SYS_HYPERVISOR is not set
330
331#
332# Connector - unified userspace <-> kernelspace linker
333#
334# CONFIG_CONNECTOR is not set
335
336#
337# Memory Technology Devices (MTD)
338#
339# CONFIG_MTD is not set
340
341#
342# Parallel port support
343#
344# CONFIG_PARPORT is not set
345
346#
347# Plug and Play support
348#
349# CONFIG_PNP is not set
350
351#
352# Block devices
353#
354CONFIG_BLK_DEV_FD=y
355# CONFIG_BLK_DEV_XD is not set
356# CONFIG_BLK_CPQ_DA is not set
357# CONFIG_BLK_CPQ_CISS_DA is not set
358# CONFIG_BLK_DEV_DAC960 is not set
359# CONFIG_BLK_DEV_UMEM is not set
360# CONFIG_BLK_DEV_COW_COMMON is not set
361CONFIG_BLK_DEV_LOOP=y
362# CONFIG_BLK_DEV_CRYPTOLOOP is not set
363# CONFIG_BLK_DEV_NBD is not set
364# CONFIG_BLK_DEV_SX8 is not set
365# CONFIG_BLK_DEV_UB is not set
366CONFIG_BLK_DEV_RAM=y
367CONFIG_BLK_DEV_RAM_COUNT=16
368CONFIG_BLK_DEV_RAM_SIZE=4096
369CONFIG_BLK_DEV_INITRD=y
370# CONFIG_CDROM_PKTCDVD is not set
371# CONFIG_ATA_OVER_ETH is not set
372
373#
374# ATA/ATAPI/MFM/RLL support
375#
376CONFIG_IDE=y
377CONFIG_BLK_DEV_IDE=y
378
379#
380# Please see Documentation/ide.txt for help/info on IDE drives
381#
382# CONFIG_BLK_DEV_IDE_SATA is not set
383CONFIG_BLK_DEV_IDEDISK=y
384CONFIG_IDEDISK_MULTI_MODE=y
385CONFIG_BLK_DEV_IDECD=y
386# CONFIG_BLK_DEV_IDETAPE is not set
387# CONFIG_BLK_DEV_IDEFLOPPY is not set
388# CONFIG_BLK_DEV_IDESCSI is not set
389# CONFIG_IDE_TASK_IOCTL is not set
390
391#
392# IDE chipset support/bugfixes
393#
394CONFIG_IDE_GENERIC=y
395CONFIG_BLK_DEV_IDEPCI=y
396CONFIG_IDEPCI_SHARE_IRQ=y
397# CONFIG_BLK_DEV_OFFBOARD is not set
398CONFIG_BLK_DEV_GENERIC=y
399# CONFIG_BLK_DEV_OPTI621 is not set
400CONFIG_BLK_DEV_SL82C105=y
401CONFIG_BLK_DEV_IDEDMA_PCI=y
402# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
403CONFIG_IDEDMA_PCI_AUTO=y
404# CONFIG_IDEDMA_ONLYDISK is not set
405# CONFIG_BLK_DEV_AEC62XX is not set
406# CONFIG_BLK_DEV_ALI15X3 is not set
407# CONFIG_BLK_DEV_AMD74XX is not set
408# CONFIG_BLK_DEV_CMD64X is not set
409# CONFIG_BLK_DEV_TRIFLEX is not set
410# CONFIG_BLK_DEV_CY82C693 is not set
411# CONFIG_BLK_DEV_CS5520 is not set
412# CONFIG_BLK_DEV_CS5530 is not set
413# CONFIG_BLK_DEV_HPT34X is not set
414# CONFIG_BLK_DEV_HPT366 is not set
415# CONFIG_BLK_DEV_SC1200 is not set
416# CONFIG_BLK_DEV_PIIX is not set
417# CONFIG_BLK_DEV_IT821X is not set
418# CONFIG_BLK_DEV_NS87415 is not set
419# CONFIG_BLK_DEV_PDC202XX_OLD is not set
420# CONFIG_BLK_DEV_PDC202XX_NEW is not set
421# CONFIG_BLK_DEV_SVWKS is not set
422# CONFIG_BLK_DEV_SIIMAGE is not set
423# CONFIG_BLK_DEV_SLC90E66 is not set
424# CONFIG_BLK_DEV_TRM290 is not set
425CONFIG_BLK_DEV_VIA82CXXX=y
426# CONFIG_IDE_ARM is not set
427# CONFIG_IDE_CHIPSETS is not set
428CONFIG_BLK_DEV_IDEDMA=y
429# CONFIG_IDEDMA_IVB is not set
430CONFIG_IDEDMA_AUTO=y
431# CONFIG_BLK_DEV_HD is not set
432
433#
434# SCSI device support
435#
436# CONFIG_RAID_ATTRS is not set
437CONFIG_SCSI=y
438CONFIG_SCSI_PROC_FS=y
439
440#
441# SCSI support type (disk, tape, CD-ROM)
442#
443CONFIG_BLK_DEV_SD=y
444CONFIG_CHR_DEV_ST=y
445# CONFIG_CHR_DEV_OSST is not set
446CONFIG_BLK_DEV_SR=y
447CONFIG_BLK_DEV_SR_VENDOR=y
448CONFIG_CHR_DEV_SG=y
449# CONFIG_CHR_DEV_SCH is not set
450
451#
452# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
453#
454# CONFIG_SCSI_MULTI_LUN is not set
455CONFIG_SCSI_CONSTANTS=y
456# CONFIG_SCSI_LOGGING is not set
457
458#
459# SCSI Transport Attributes
460#
461CONFIG_SCSI_SPI_ATTRS=y
462# CONFIG_SCSI_FC_ATTRS is not set
463# CONFIG_SCSI_ISCSI_ATTRS is not set
464# CONFIG_SCSI_SAS_ATTRS is not set
465
466#
467# SCSI low-level drivers
468#
469# CONFIG_ISCSI_TCP is not set
470# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
471# CONFIG_SCSI_3W_9XXX is not set
472# CONFIG_SCSI_7000FASST is not set
473# CONFIG_SCSI_ACARD is not set
474# CONFIG_SCSI_AHA152X is not set
475# CONFIG_SCSI_AHA1542 is not set
476# CONFIG_SCSI_AACRAID is not set
477# CONFIG_SCSI_AIC7XXX is not set
478# CONFIG_SCSI_AIC7XXX_OLD is not set
479# CONFIG_SCSI_AIC79XX is not set
480# CONFIG_SCSI_DPT_I2O is not set
481# CONFIG_SCSI_IN2000 is not set
482# CONFIG_MEGARAID_NEWGEN is not set
483# CONFIG_MEGARAID_LEGACY is not set
484# CONFIG_MEGARAID_SAS is not set
485# CONFIG_SCSI_SATA is not set
486# CONFIG_SCSI_HPTIOP is not set
487# CONFIG_SCSI_BUSLOGIC is not set
488# CONFIG_SCSI_DMX3191D is not set
489# CONFIG_SCSI_DTC3280 is not set
490# CONFIG_SCSI_EATA is not set
491# CONFIG_SCSI_FUTURE_DOMAIN is not set
492# CONFIG_SCSI_GDTH is not set
493# CONFIG_SCSI_GENERIC_NCR5380 is not set
494# CONFIG_SCSI_GENERIC_NCR5380_MMIO is not set
495# CONFIG_SCSI_IPS is not set
496# CONFIG_SCSI_INITIO is not set
497# CONFIG_SCSI_INIA100 is not set
498# CONFIG_SCSI_NCR53C406A is not set
499CONFIG_SCSI_SYM53C8XX_2=y
500CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
501CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
502CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
503CONFIG_SCSI_SYM53C8XX_MMIO=y
504# CONFIG_SCSI_IPR is not set
505# CONFIG_SCSI_PAS16 is not set
506# CONFIG_SCSI_PSI240I is not set
507# CONFIG_SCSI_QLOGIC_FAS is not set
508# CONFIG_SCSI_QLOGIC_1280 is not set
509# CONFIG_SCSI_QLA_FC is not set
510# CONFIG_SCSI_LPFC is not set
511# CONFIG_SCSI_SYM53C416 is not set
512# CONFIG_SCSI_DC395x is not set
513# CONFIG_SCSI_DC390T is not set
514# CONFIG_SCSI_T128 is not set
515# CONFIG_SCSI_U14_34F is not set
516# CONFIG_SCSI_NSP32 is not set
517# CONFIG_SCSI_DEBUG is not set
518
519#
520# Old CD-ROM drivers (not SCSI, not IDE)
521#
522# CONFIG_CD_NO_IDESCSI is not set
523
524#
525# Multi-device support (RAID and LVM)
526#
527# CONFIG_MD is not set
528
529#
530# Fusion MPT device support
531#
532# CONFIG_FUSION is not set
533# CONFIG_FUSION_SPI is not set
534# CONFIG_FUSION_FC is not set
535# CONFIG_FUSION_SAS is not set
536
537#
538# IEEE 1394 (FireWire) support
539#
540# CONFIG_IEEE1394 is not set
541
542#
543# I2O device support
544#
545# CONFIG_I2O is not set
546
547#
548# Macintosh device drivers
549#
550# CONFIG_WINDFARM is not set
551
552#
553# Network device support
554#
555CONFIG_NETDEVICES=y
556# CONFIG_DUMMY is not set
557# CONFIG_BONDING is not set
558# CONFIG_EQUALIZER is not set
559# CONFIG_TUN is not set
560
561#
562# ARCnet devices
563#
564# CONFIG_ARCNET is not set
565
566#
567# PHY device support
568#
569# CONFIG_PHYLIB is not set
570
571#
572# Ethernet (10 or 100Mbit)
573#
574CONFIG_NET_ETHERNET=y
575CONFIG_MII=y
576# CONFIG_HAPPYMEAL is not set
577# CONFIG_SUNGEM is not set
578# CONFIG_CASSINI is not set
579# CONFIG_NET_VENDOR_3COM is not set
580# CONFIG_LANCE is not set
581# CONFIG_NET_VENDOR_SMC is not set
582# CONFIG_NET_VENDOR_RACAL is not set
583
584#
585# Tulip family network device support
586#
587CONFIG_NET_TULIP=y
588# CONFIG_DE2104X is not set
589# CONFIG_TULIP is not set
590CONFIG_DE4X5=y
591# CONFIG_WINBOND_840 is not set
592# CONFIG_DM9102 is not set
593# CONFIG_ULI526X is not set
594# CONFIG_AT1700 is not set
595# CONFIG_DEPCA is not set
596# CONFIG_HP100 is not set
597# CONFIG_NET_ISA is not set
598CONFIG_NET_PCI=y
599CONFIG_PCNET32=y
600# CONFIG_AMD8111_ETH is not set
601# CONFIG_ADAPTEC_STARFIRE is not set
602# CONFIG_AC3200 is not set
603# CONFIG_APRICOT is not set
604# CONFIG_B44 is not set
605# CONFIG_FORCEDETH is not set
606# CONFIG_CS89x0 is not set
607# CONFIG_DGRS is not set
608# CONFIG_EEPRO100 is not set
609# CONFIG_E100 is not set
610# CONFIG_FEALNX is not set
611# CONFIG_NATSEMI is not set
612# CONFIG_NE2K_PCI is not set
613CONFIG_8139CP=y
614CONFIG_8139TOO=y
615# CONFIG_8139TOO_PIO is not set
616# CONFIG_8139TOO_TUNE_TWISTER is not set
617# CONFIG_8139TOO_8129 is not set
618# CONFIG_8139_OLD_RX_RESET is not set
619# CONFIG_SIS900 is not set
620# CONFIG_EPIC100 is not set
621# CONFIG_SUNDANCE is not set
622# CONFIG_TLAN is not set
623CONFIG_VIA_RHINE=y
624# CONFIG_VIA_RHINE_MMIO is not set
625
626#
627# Ethernet (1000 Mbit)
628#
629# CONFIG_ACENIC is not set
630# CONFIG_DL2K is not set
631# CONFIG_E1000 is not set
632# CONFIG_NS83820 is not set
633# CONFIG_HAMACHI is not set
634# CONFIG_YELLOWFIN is not set
635# CONFIG_R8169 is not set
636# CONFIG_SIS190 is not set
637# CONFIG_SKGE is not set
638# CONFIG_SKY2 is not set
639# CONFIG_SK98LIN is not set
640# CONFIG_VIA_VELOCITY is not set
641# CONFIG_TIGON3 is not set
642# CONFIG_BNX2 is not set
643CONFIG_MV643XX_ETH=y
644# CONFIG_MV643XX_ETH_0 is not set
645# CONFIG_MV643XX_ETH_1 is not set
646# CONFIG_MV643XX_ETH_2 is not set
647
648#
649# Ethernet (10000 Mbit)
650#
651# CONFIG_CHELSIO_T1 is not set
652# CONFIG_IXGB is not set
653# CONFIG_S2IO is not set
654# CONFIG_MYRI10GE is not set
655
656#
657# Token Ring devices
658#
659# CONFIG_TR is not set
660
661#
662# Wireless LAN (non-hamradio)
663#
664# CONFIG_NET_RADIO is not set
665
666#
667# Wan interfaces
668#
669# CONFIG_WAN is not set
670# CONFIG_FDDI is not set
671# CONFIG_HIPPI is not set
672CONFIG_PPP=m
673CONFIG_PPP_MULTILINK=y
674CONFIG_PPP_FILTER=y
675CONFIG_PPP_ASYNC=m
676CONFIG_PPP_SYNC_TTY=m
677CONFIG_PPP_DEFLATE=m
678CONFIG_PPP_BSDCOMP=m
679CONFIG_PPP_MPPE=m
680CONFIG_PPPOE=m
681# CONFIG_SLIP is not set
682# CONFIG_NET_FC is not set
683# CONFIG_SHAPER is not set
684# CONFIG_NETCONSOLE is not set
685# CONFIG_NETPOLL is not set
686# CONFIG_NET_POLL_CONTROLLER is not set
687
688#
689# ISDN subsystem
690#
691# CONFIG_ISDN is not set
692
693#
694# Telephony Support
695#
696# CONFIG_PHONE is not set
697
698#
699# Input device support
700#
701CONFIG_INPUT=y
702
703#
704# Userland interfaces
705#
706CONFIG_INPUT_MOUSEDEV=y
707CONFIG_INPUT_MOUSEDEV_PSAUX=y
708CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
709CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
710# CONFIG_INPUT_JOYDEV is not set
711# CONFIG_INPUT_TSDEV is not set
712CONFIG_INPUT_EVDEV=y
713# CONFIG_INPUT_EVBUG is not set
714
715#
716# Input Device Drivers
717#
718CONFIG_INPUT_KEYBOARD=y
719CONFIG_KEYBOARD_ATKBD=y
720# CONFIG_KEYBOARD_SUNKBD is not set
721# CONFIG_KEYBOARD_LKKBD is not set
722# CONFIG_KEYBOARD_XTKBD is not set
723# CONFIG_KEYBOARD_NEWTON is not set
724CONFIG_INPUT_MOUSE=y
725CONFIG_MOUSE_PS2=y
726# CONFIG_MOUSE_SERIAL is not set
727# CONFIG_MOUSE_INPORT is not set
728# CONFIG_MOUSE_LOGIBM is not set
729# CONFIG_MOUSE_PC110PAD is not set
730# CONFIG_MOUSE_VSXXXAA is not set
731# CONFIG_INPUT_JOYSTICK is not set
732# CONFIG_INPUT_TOUCHSCREEN is not set
733CONFIG_INPUT_MISC=y
734# CONFIG_INPUT_PCSPKR is not set
735CONFIG_INPUT_UINPUT=y
736
737#
738# Hardware I/O ports
739#
740CONFIG_SERIO=y
741CONFIG_SERIO_I8042=y
742CONFIG_SERIO_SERPORT=y
743# CONFIG_SERIO_PCIPS2 is not set
744CONFIG_SERIO_LIBPS2=y
745# CONFIG_SERIO_RAW is not set
746# CONFIG_GAMEPORT is not set
747
748#
749# Character devices
750#
751CONFIG_VT=y
752CONFIG_VT_CONSOLE=y
753CONFIG_HW_CONSOLE=y
754# CONFIG_SERIAL_NONSTANDARD is not set
755
756#
757# Serial drivers
758#
759CONFIG_SERIAL_8250=y
760CONFIG_SERIAL_8250_CONSOLE=y
761CONFIG_SERIAL_8250_PCI=y
762CONFIG_SERIAL_8250_NR_UARTS=4
763CONFIG_SERIAL_8250_RUNTIME_UARTS=4
764# CONFIG_SERIAL_8250_EXTENDED is not set
765
766#
767# Non-8250 serial port support
768#
769CONFIG_SERIAL_CORE=y
770CONFIG_SERIAL_CORE_CONSOLE=y
771# CONFIG_SERIAL_JSM is not set
772CONFIG_UNIX98_PTYS=y
773CONFIG_LEGACY_PTYS=y
774CONFIG_LEGACY_PTY_COUNT=256
775# CONFIG_HVC_RTAS is not set
776
777#
778# IPMI
779#
780# CONFIG_IPMI_HANDLER is not set
781
782#
783# Watchdog Cards
784#
785# CONFIG_WATCHDOG is not set
786CONFIG_NVRAM=y
787CONFIG_GEN_RTC=y
788# CONFIG_GEN_RTC_X is not set
789# CONFIG_DTLK is not set
790# CONFIG_R3964 is not set
791# CONFIG_APPLICOM is not set
792
793#
794# Ftape, the floppy tape device driver
795#
796# CONFIG_AGP is not set
797# CONFIG_DRM is not set
798# CONFIG_RAW_DRIVER is not set
799
800#
801# TPM devices
802#
803# CONFIG_TCG_TPM is not set
804# CONFIG_TELCLOCK is not set
805
806#
807# I2C support
808#
809CONFIG_I2C=y
810# CONFIG_I2C_CHARDEV is not set
811
812#
813# I2C Algorithms
814#
815CONFIG_I2C_ALGOBIT=y
816# CONFIG_I2C_ALGOPCF is not set
817# CONFIG_I2C_ALGOPCA is not set
818
819#
820# I2C Hardware Bus support
821#
822# CONFIG_I2C_ALI1535 is not set
823# CONFIG_I2C_ALI1563 is not set
824# CONFIG_I2C_ALI15X3 is not set
825# CONFIG_I2C_AMD756 is not set
826# CONFIG_I2C_AMD8111 is not set
827# CONFIG_I2C_HYDRA is not set
828# CONFIG_I2C_I801 is not set
829# CONFIG_I2C_I810 is not set
830# CONFIG_I2C_PIIX4 is not set
831# CONFIG_I2C_MPC is not set
832# CONFIG_I2C_NFORCE2 is not set
833# CONFIG_I2C_OCORES is not set
834# CONFIG_I2C_PARPORT_LIGHT is not set
835# CONFIG_I2C_PROSAVAGE is not set
836# CONFIG_I2C_SAVAGE4 is not set
837# CONFIG_I2C_SIS5595 is not set
838# CONFIG_I2C_SIS630 is not set
839# CONFIG_I2C_SIS96X is not set
840# CONFIG_I2C_STUB is not set
841# CONFIG_I2C_VIA is not set
842# CONFIG_I2C_VIAPRO is not set
843# CONFIG_I2C_VOODOO3 is not set
844# CONFIG_I2C_PCA_ISA is not set
845
846#
847# Miscellaneous I2C Chip support
848#
849# CONFIG_SENSORS_DS1337 is not set
850# CONFIG_SENSORS_DS1374 is not set
851# CONFIG_SENSORS_EEPROM is not set
852# CONFIG_SENSORS_PCF8574 is not set
853# CONFIG_SENSORS_PCA9539 is not set
854# CONFIG_SENSORS_PCF8591 is not set
855# CONFIG_SENSORS_M41T00 is not set
856# CONFIG_SENSORS_MAX6875 is not set
857# CONFIG_I2C_DEBUG_CORE is not set
858# CONFIG_I2C_DEBUG_ALGO is not set
859# CONFIG_I2C_DEBUG_BUS is not set
860# CONFIG_I2C_DEBUG_CHIP is not set
861
862#
863# SPI support
864#
865# CONFIG_SPI is not set
866# CONFIG_SPI_MASTER is not set
867
868#
869# Dallas's 1-wire bus
870#
871
872#
873# Hardware Monitoring support
874#
875# CONFIG_HWMON is not set
876# CONFIG_HWMON_VID is not set
877
878#
879# Misc devices
880#
881
882#
883# Multimedia devices
884#
885# CONFIG_VIDEO_DEV is not set
886CONFIG_VIDEO_V4L2=y
887
888#
889# Digital Video Broadcasting Devices
890#
891# CONFIG_DVB is not set
892# CONFIG_USB_DABUSB is not set
893
894#
895# Graphics support
896#
897CONFIG_FB=y
898CONFIG_FB_CFB_FILLRECT=y
899CONFIG_FB_CFB_COPYAREA=y
900CONFIG_FB_CFB_IMAGEBLIT=y
901CONFIG_FB_MACMODES=y
902CONFIG_FB_FIRMWARE_EDID=y
903# CONFIG_FB_BACKLIGHT is not set
904CONFIG_FB_MODE_HELPERS=y
905CONFIG_FB_TILEBLITTING=y
906# CONFIG_FB_CIRRUS is not set
907# CONFIG_FB_PM2 is not set
908# CONFIG_FB_CYBER2000 is not set
909CONFIG_FB_OF=y
910# CONFIG_FB_CT65550 is not set
911# CONFIG_FB_ASILIANT is not set
912# CONFIG_FB_IMSTT is not set
913# CONFIG_FB_VGA16 is not set
914# CONFIG_FB_S1D13XXX is not set
915# CONFIG_FB_NVIDIA is not set
916# CONFIG_FB_RIVA is not set
917CONFIG_FB_MATROX=y
918CONFIG_FB_MATROX_MILLENIUM=y
919CONFIG_FB_MATROX_MYSTIQUE=y
920CONFIG_FB_MATROX_G=y
921# CONFIG_FB_MATROX_I2C is not set
922# CONFIG_FB_MATROX_MULTIHEAD is not set
923CONFIG_FB_RADEON=y
924CONFIG_FB_RADEON_I2C=y
925# CONFIG_FB_RADEON_DEBUG is not set
926# CONFIG_FB_ATY128 is not set
927CONFIG_FB_ATY=y
928CONFIG_FB_ATY_CT=y
929# CONFIG_FB_ATY_GENERIC_LCD is not set
930CONFIG_FB_ATY_GX=y
931# CONFIG_FB_SAVAGE is not set
932# CONFIG_FB_SIS is not set
933# CONFIG_FB_NEOMAGIC is not set
934# CONFIG_FB_KYRO is not set
935CONFIG_FB_3DFX=y
936# CONFIG_FB_3DFX_ACCEL is not set
937# CONFIG_FB_VOODOO1 is not set
938# CONFIG_FB_TRIDENT is not set
939# CONFIG_FB_VIRTUAL is not set
940
941#
942# Console display driver support
943#
944CONFIG_VGA_CONSOLE=y
945# CONFIG_VGACON_SOFT_SCROLLBACK is not set
946# CONFIG_MDA_CONSOLE is not set
947CONFIG_DUMMY_CONSOLE=y
948CONFIG_FRAMEBUFFER_CONSOLE=y
949# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
950# CONFIG_FONTS is not set
951CONFIG_FONT_8x8=y
952CONFIG_FONT_8x16=y
953
954#
955# Logo configuration
956#
957CONFIG_LOGO=y
958CONFIG_LOGO_LINUX_MONO=y
959CONFIG_LOGO_LINUX_VGA16=y
960CONFIG_LOGO_LINUX_CLUT224=y
961# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
962
963#
964# Sound
965#
966# CONFIG_SOUND is not set
967
968#
969# USB support
970#
971CONFIG_USB_ARCH_HAS_HCD=y
972CONFIG_USB_ARCH_HAS_OHCI=y
973CONFIG_USB_ARCH_HAS_EHCI=y
974CONFIG_USB=y
975# CONFIG_USB_DEBUG is not set
976
977#
978# Miscellaneous USB options
979#
980CONFIG_USB_DEVICEFS=y
981# CONFIG_USB_BANDWIDTH is not set
982# CONFIG_USB_DYNAMIC_MINORS is not set
983# CONFIG_USB_OTG is not set
984
985#
986# USB Host Controller Drivers
987#
988CONFIG_USB_EHCI_HCD=m
989# CONFIG_USB_EHCI_SPLIT_ISO is not set
990# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
991# CONFIG_USB_EHCI_TT_NEWSCHED is not set
992# CONFIG_USB_ISP116X_HCD is not set
993CONFIG_USB_OHCI_HCD=y
994# CONFIG_USB_OHCI_BIG_ENDIAN is not set
995CONFIG_USB_OHCI_LITTLE_ENDIAN=y
996CONFIG_USB_UHCI_HCD=y
997# CONFIG_USB_SL811_HCD is not set
998
999#
1000# USB Device Class drivers
1001#
1002# CONFIG_USB_ACM is not set
1003# CONFIG_USB_PRINTER is not set
1004
1005#
1006# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
1007#
1008
1009#
1010# may also be needed; see USB_STORAGE Help for more information
1011#
1012CONFIG_USB_STORAGE=m
1013# CONFIG_USB_STORAGE_DEBUG is not set
1014# CONFIG_USB_STORAGE_DATAFAB is not set
1015# CONFIG_USB_STORAGE_FREECOM is not set
1016# CONFIG_USB_STORAGE_ISD200 is not set
1017# CONFIG_USB_STORAGE_DPCM is not set
1018# CONFIG_USB_STORAGE_USBAT is not set
1019# CONFIG_USB_STORAGE_SDDR09 is not set
1020# CONFIG_USB_STORAGE_SDDR55 is not set
1021# CONFIG_USB_STORAGE_JUMPSHOT is not set
1022# CONFIG_USB_STORAGE_ALAUDA is not set
1023# CONFIG_USB_STORAGE_ONETOUCH is not set
1024# CONFIG_USB_LIBUSUAL is not set
1025
1026#
1027# USB Input Devices
1028#
1029CONFIG_USB_HID=y
1030CONFIG_USB_HIDINPUT=y
1031# CONFIG_USB_HIDINPUT_POWERBOOK is not set
1032# CONFIG_HID_FF is not set
1033# CONFIG_USB_HIDDEV is not set
1034# CONFIG_USB_AIPTEK is not set
1035# CONFIG_USB_WACOM is not set
1036# CONFIG_USB_ACECAD is not set
1037# CONFIG_USB_KBTAB is not set
1038# CONFIG_USB_POWERMATE is not set
1039# CONFIG_USB_TOUCHSCREEN is not set
1040# CONFIG_USB_YEALINK is not set
1041# CONFIG_USB_XPAD is not set
1042# CONFIG_USB_ATI_REMOTE is not set
1043# CONFIG_USB_ATI_REMOTE2 is not set
1044# CONFIG_USB_KEYSPAN_REMOTE is not set
1045# CONFIG_USB_APPLETOUCH is not set
1046
1047#
1048# USB Imaging devices
1049#
1050# CONFIG_USB_MDC800 is not set
1051# CONFIG_USB_MICROTEK is not set
1052
1053#
1054# USB Network Adapters
1055#
1056# CONFIG_USB_CATC is not set
1057# CONFIG_USB_KAWETH is not set
1058# CONFIG_USB_PEGASUS is not set
1059# CONFIG_USB_RTL8150 is not set
1060# CONFIG_USB_USBNET is not set
1061CONFIG_USB_MON=y
1062
1063#
1064# USB port drivers
1065#
1066
1067#
1068# USB Serial Converter support
1069#
1070# CONFIG_USB_SERIAL is not set
1071
1072#
1073# USB Miscellaneous drivers
1074#
1075# CONFIG_USB_EMI62 is not set
1076# CONFIG_USB_EMI26 is not set
1077# CONFIG_USB_AUERSWALD is not set
1078# CONFIG_USB_RIO500 is not set
1079# CONFIG_USB_LEGOTOWER is not set
1080# CONFIG_USB_LCD is not set
1081# CONFIG_USB_LED is not set
1082# CONFIG_USB_CY7C63 is not set
1083# CONFIG_USB_CYTHERM is not set
1084# CONFIG_USB_PHIDGETKIT is not set
1085# CONFIG_USB_PHIDGETSERVO is not set
1086# CONFIG_USB_IDMOUSE is not set
1087# CONFIG_USB_APPLEDISPLAY is not set
1088# CONFIG_USB_SISUSBVGA is not set
1089# CONFIG_USB_LD is not set
1090# CONFIG_USB_TEST is not set
1091
1092#
1093# USB DSL modem support
1094#
1095
1096#
1097# USB Gadget Support
1098#
1099# CONFIG_USB_GADGET is not set
1100
1101#
1102# MMC/SD Card support
1103#
1104# CONFIG_MMC is not set
1105
1106#
1107# LED devices
1108#
1109# CONFIG_NEW_LEDS is not set
1110
1111#
1112# LED drivers
1113#
1114
1115#
1116# LED Triggers
1117#
1118
1119#
1120# InfiniBand support
1121#
1122# CONFIG_INFINIBAND is not set
1123
1124#
1125# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
1126#
1127
1128#
1129# Real Time Clock
1130#
1131# CONFIG_RTC_CLASS is not set
1132
1133#
1134# DMA Engine support
1135#
1136# CONFIG_DMA_ENGINE is not set
1137
1138#
1139# DMA Clients
1140#
1141
1142#
1143# DMA Devices
1144#
1145
1146#
1147# File systems
1148#
1149CONFIG_EXT2_FS=y
1150# CONFIG_EXT2_FS_XATTR is not set
1151# CONFIG_EXT2_FS_XIP is not set
1152CONFIG_EXT3_FS=y
1153CONFIG_EXT3_FS_XATTR=y
1154# CONFIG_EXT3_FS_POSIX_ACL is not set
1155# CONFIG_EXT3_FS_SECURITY is not set
1156CONFIG_JBD=y
1157# CONFIG_JBD_DEBUG is not set
1158CONFIG_FS_MBCACHE=y
1159# CONFIG_REISERFS_FS is not set
1160# CONFIG_JFS_FS is not set
1161# CONFIG_FS_POSIX_ACL is not set
1162# CONFIG_XFS_FS is not set
1163# CONFIG_OCFS2_FS is not set
1164# CONFIG_MINIX_FS is not set
1165# CONFIG_ROMFS_FS is not set
1166CONFIG_INOTIFY=y
1167CONFIG_INOTIFY_USER=y
1168# CONFIG_QUOTA is not set
1169CONFIG_DNOTIFY=y
1170# CONFIG_AUTOFS_FS is not set
1171# CONFIG_AUTOFS4_FS is not set
1172# CONFIG_FUSE_FS is not set
1173
1174#
1175# CD-ROM/DVD Filesystems
1176#
1177CONFIG_ISO9660_FS=y
1178# CONFIG_JOLIET is not set
1179# CONFIG_ZISOFS is not set
1180# CONFIG_UDF_FS is not set
1181
1182#
1183# DOS/FAT/NT Filesystems
1184#
1185CONFIG_FAT_FS=m
1186CONFIG_MSDOS_FS=m
1187CONFIG_VFAT_FS=m
1188CONFIG_FAT_DEFAULT_CODEPAGE=437
1189CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
1190# CONFIG_NTFS_FS is not set
1191
1192#
1193# Pseudo filesystems
1194#
1195CONFIG_PROC_FS=y
1196CONFIG_PROC_KCORE=y
1197CONFIG_SYSFS=y
1198CONFIG_TMPFS=y
1199# CONFIG_HUGETLB_PAGE is not set
1200CONFIG_RAMFS=y
1201# CONFIG_CONFIGFS_FS is not set
1202
1203#
1204# Miscellaneous filesystems
1205#
1206# CONFIG_ADFS_FS is not set
1207# CONFIG_AFFS_FS is not set
1208# CONFIG_HFS_FS is not set
1209# CONFIG_HFSPLUS_FS is not set
1210# CONFIG_BEFS_FS is not set
1211# CONFIG_BFS_FS is not set
1212# CONFIG_EFS_FS is not set
1213# CONFIG_CRAMFS is not set
1214# CONFIG_VXFS_FS is not set
1215# CONFIG_HPFS_FS is not set
1216# CONFIG_QNX4FS_FS is not set
1217# CONFIG_SYSV_FS is not set
1218# CONFIG_UFS_FS is not set
1219
1220#
1221# Network File Systems
1222#
1223# CONFIG_NFS_FS is not set
1224# CONFIG_NFSD is not set
1225# CONFIG_SMB_FS is not set
1226# CONFIG_CIFS is not set
1227# CONFIG_NCP_FS is not set
1228# CONFIG_CODA_FS is not set
1229# CONFIG_AFS_FS is not set
1230# CONFIG_9P_FS is not set
1231
1232#
1233# Partition Types
1234#
1235CONFIG_PARTITION_ADVANCED=y
1236# CONFIG_ACORN_PARTITION is not set
1237# CONFIG_OSF_PARTITION is not set
1238# CONFIG_AMIGA_PARTITION is not set
1239# CONFIG_ATARI_PARTITION is not set
1240CONFIG_MAC_PARTITION=y
1241CONFIG_MSDOS_PARTITION=y
1242# CONFIG_BSD_DISKLABEL is not set
1243# CONFIG_MINIX_SUBPARTITION is not set
1244# CONFIG_SOLARIS_X86_PARTITION is not set
1245# CONFIG_UNIXWARE_DISKLABEL is not set
1246# CONFIG_LDM_PARTITION is not set
1247# CONFIG_SGI_PARTITION is not set
1248# CONFIG_ULTRIX_PARTITION is not set
1249# CONFIG_SUN_PARTITION is not set
1250# CONFIG_KARMA_PARTITION is not set
1251# CONFIG_EFI_PARTITION is not set
1252
1253#
1254# Native Language Support
1255#
1256CONFIG_NLS=y
1257CONFIG_NLS_DEFAULT="iso8859-1"
1258# CONFIG_NLS_CODEPAGE_437 is not set
1259# CONFIG_NLS_CODEPAGE_737 is not set
1260# CONFIG_NLS_CODEPAGE_775 is not set
1261# CONFIG_NLS_CODEPAGE_850 is not set
1262# CONFIG_NLS_CODEPAGE_852 is not set
1263# CONFIG_NLS_CODEPAGE_855 is not set
1264# CONFIG_NLS_CODEPAGE_857 is not set
1265# CONFIG_NLS_CODEPAGE_860 is not set
1266# CONFIG_NLS_CODEPAGE_861 is not set
1267# CONFIG_NLS_CODEPAGE_862 is not set
1268# CONFIG_NLS_CODEPAGE_863 is not set
1269# CONFIG_NLS_CODEPAGE_864 is not set
1270# CONFIG_NLS_CODEPAGE_865 is not set
1271# CONFIG_NLS_CODEPAGE_866 is not set
1272# CONFIG_NLS_CODEPAGE_869 is not set
1273# CONFIG_NLS_CODEPAGE_936 is not set
1274# CONFIG_NLS_CODEPAGE_950 is not set
1275# CONFIG_NLS_CODEPAGE_932 is not set
1276# CONFIG_NLS_CODEPAGE_949 is not set
1277# CONFIG_NLS_CODEPAGE_874 is not set
1278# CONFIG_NLS_ISO8859_8 is not set
1279# CONFIG_NLS_CODEPAGE_1250 is not set
1280# CONFIG_NLS_CODEPAGE_1251 is not set
1281CONFIG_NLS_ASCII=y
1282CONFIG_NLS_ISO8859_1=m
1283# CONFIG_NLS_ISO8859_2 is not set
1284# CONFIG_NLS_ISO8859_3 is not set
1285# CONFIG_NLS_ISO8859_4 is not set
1286# CONFIG_NLS_ISO8859_5 is not set
1287# CONFIG_NLS_ISO8859_6 is not set
1288# CONFIG_NLS_ISO8859_7 is not set
1289# CONFIG_NLS_ISO8859_9 is not set
1290# CONFIG_NLS_ISO8859_13 is not set
1291# CONFIG_NLS_ISO8859_14 is not set
1292# CONFIG_NLS_ISO8859_15 is not set
1293# CONFIG_NLS_KOI8_R is not set
1294# CONFIG_NLS_KOI8_U is not set
1295# CONFIG_NLS_UTF8 is not set
1296
1297#
1298# Library routines
1299#
1300CONFIG_CRC_CCITT=m
1301# CONFIG_CRC16 is not set
1302CONFIG_CRC32=y
1303# CONFIG_LIBCRC32C is not set
1304CONFIG_ZLIB_INFLATE=m
1305CONFIG_ZLIB_DEFLATE=m
1306CONFIG_TEXTSEARCH=y
1307CONFIG_TEXTSEARCH_KMP=m
1308
1309#
1310# Instrumentation Support
1311#
1312# CONFIG_PROFILING is not set
1313
1314#
1315# Kernel hacking
1316#
1317# CONFIG_PRINTK_TIME is not set
1318CONFIG_MAGIC_SYSRQ=y
1319CONFIG_DEBUG_KERNEL=y
1320CONFIG_LOG_BUF_SHIFT=15
1321CONFIG_DETECT_SOFTLOCKUP=y
1322# CONFIG_SCHEDSTATS is not set
1323# CONFIG_DEBUG_SLAB is not set
1324CONFIG_DEBUG_MUTEXES=y
1325# CONFIG_DEBUG_SPINLOCK is not set
1326CONFIG_DEBUG_SPINLOCK_SLEEP=y
1327# CONFIG_DEBUG_KOBJECT is not set
1328# CONFIG_DEBUG_HIGHMEM is not set
1329# CONFIG_DEBUG_INFO is not set
1330# CONFIG_DEBUG_FS is not set
1331# CONFIG_DEBUG_VM is not set
1332CONFIG_FORCED_INLINING=y
1333# CONFIG_RCU_TORTURE_TEST is not set
1334CONFIG_DEBUGGER=y
1335CONFIG_XMON=y
1336CONFIG_XMON_DEFAULT=y
1337# CONFIG_BDI_SWITCH is not set
1338# CONFIG_BOOTX_TEXT is not set
1339# CONFIG_PPC_EARLY_DEBUG is not set
1340
1341#
1342# Security options
1343#
1344# CONFIG_KEYS is not set
1345# CONFIG_SECURITY is not set
1346
1347#
1348# Cryptographic options
1349#
1350CONFIG_CRYPTO=y
1351# CONFIG_CRYPTO_HMAC is not set
1352# CONFIG_CRYPTO_NULL is not set
1353# CONFIG_CRYPTO_MD4 is not set
1354# CONFIG_CRYPTO_MD5 is not set
1355CONFIG_CRYPTO_SHA1=m
1356# CONFIG_CRYPTO_SHA256 is not set
1357# CONFIG_CRYPTO_SHA512 is not set
1358# CONFIG_CRYPTO_WP512 is not set
1359# CONFIG_CRYPTO_TGR192 is not set
1360# CONFIG_CRYPTO_DES is not set
1361# CONFIG_CRYPTO_BLOWFISH is not set
1362# CONFIG_CRYPTO_TWOFISH is not set
1363# CONFIG_CRYPTO_SERPENT is not set
1364# CONFIG_CRYPTO_AES is not set
1365# CONFIG_CRYPTO_CAST5 is not set
1366# CONFIG_CRYPTO_CAST6 is not set
1367# CONFIG_CRYPTO_TEA is not set
1368CONFIG_CRYPTO_ARC4=m
1369# CONFIG_CRYPTO_KHAZAD is not set
1370# CONFIG_CRYPTO_ANUBIS is not set
1371# CONFIG_CRYPTO_DEFLATE is not set
1372# CONFIG_CRYPTO_MICHAEL_MIC is not set
1373# CONFIG_CRYPTO_CRC32C is not set
1374# CONFIG_CRYPTO_TEST is not set
1375
1376#
1377# Hardware crypto devices
1378#
diff --git a/arch/powerpc/configs/mpc834x_itx_defconfig b/arch/powerpc/configs/mpc834x_itx_defconfig
new file mode 100644
index 000000000000..fc2d9789adc8
--- /dev/null
+++ b/arch/powerpc/configs/mpc834x_itx_defconfig
@@ -0,0 +1,1336 @@
1#
2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.17
4# Fri Jun 30 17:53:25 2006
5#
6# CONFIG_PPC64 is not set
7CONFIG_PPC32=y
8CONFIG_PPC_MERGE=y
9CONFIG_MMU=y
10CONFIG_GENERIC_HARDIRQS=y
11CONFIG_IRQ_PER_CPU=y
12CONFIG_RWSEM_XCHGADD_ALGORITHM=y
13CONFIG_GENERIC_HWEIGHT=y
14CONFIG_GENERIC_CALIBRATE_DELAY=y
15CONFIG_GENERIC_FIND_NEXT_BIT=y
16CONFIG_PPC=y
17CONFIG_EARLY_PRINTK=y
18CONFIG_GENERIC_NVRAM=y
19CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
20CONFIG_ARCH_MAY_HAVE_PC_FDC=y
21CONFIG_PPC_OF=y
22CONFIG_PPC_UDBG_16550=y
23# CONFIG_GENERIC_TBSYNC is not set
24CONFIG_DEFAULT_UIMAGE=y
25
26#
27# Processor support
28#
29# CONFIG_CLASSIC32 is not set
30# CONFIG_PPC_52xx is not set
31# CONFIG_PPC_82xx is not set
32CONFIG_PPC_83xx=y
33# CONFIG_PPC_85xx is not set
34# CONFIG_PPC_86xx is not set
35# CONFIG_40x is not set
36# CONFIG_44x is not set
37# CONFIG_8xx is not set
38# CONFIG_E200 is not set
39CONFIG_6xx=y
40CONFIG_83xx=y
41CONFIG_PPC_FPU=y
42CONFIG_PPC_STD_MMU=y
43CONFIG_PPC_STD_MMU_32=y
44# CONFIG_SMP is not set
45CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
46
47#
48# Code maturity level options
49#
50CONFIG_EXPERIMENTAL=y
51CONFIG_BROKEN_ON_SMP=y
52CONFIG_INIT_ENV_ARG_LIMIT=32
53
54#
55# General setup
56#
57CONFIG_LOCALVERSION=""
58CONFIG_LOCALVERSION_AUTO=y
59CONFIG_SWAP=y
60CONFIG_SYSVIPC=y
61# CONFIG_POSIX_MQUEUE is not set
62# CONFIG_BSD_PROCESS_ACCT is not set
63CONFIG_SYSCTL=y
64# CONFIG_AUDIT is not set
65# CONFIG_IKCONFIG is not set
66# CONFIG_RELAY is not set
67CONFIG_INITRAMFS_SOURCE=""
68# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
69CONFIG_EMBEDDED=y
70# CONFIG_KALLSYMS is not set
71CONFIG_HOTPLUG=y
72CONFIG_PRINTK=y
73CONFIG_BUG=y
74CONFIG_ELF_CORE=y
75CONFIG_BASE_FULL=y
76CONFIG_RT_MUTEXES=y
77CONFIG_FUTEX=y
78# CONFIG_EPOLL is not set
79CONFIG_SHMEM=y
80CONFIG_SLAB=y
81# CONFIG_TINY_SHMEM is not set
82CONFIG_BASE_SMALL=0
83# CONFIG_SLOB is not set
84
85#
86# Loadable module support
87#
88CONFIG_MODULES=y
89CONFIG_MODULE_UNLOAD=y
90# CONFIG_MODULE_FORCE_UNLOAD is not set
91# CONFIG_MODVERSIONS is not set
92# CONFIG_MODULE_SRCVERSION_ALL is not set
93# CONFIG_KMOD is not set
94
95#
96# Block layer
97#
98# CONFIG_LBD is not set
99# CONFIG_BLK_DEV_IO_TRACE is not set
100# CONFIG_LSF is not set
101
102#
103# IO Schedulers
104#
105CONFIG_IOSCHED_NOOP=y
106CONFIG_IOSCHED_AS=y
107CONFIG_IOSCHED_DEADLINE=y
108CONFIG_IOSCHED_CFQ=y
109CONFIG_DEFAULT_AS=y
110# CONFIG_DEFAULT_DEADLINE is not set
111# CONFIG_DEFAULT_CFQ is not set
112# CONFIG_DEFAULT_NOOP is not set
113CONFIG_DEFAULT_IOSCHED="anticipatory"
114CONFIG_PPC_GEN550=y
115# CONFIG_WANT_EARLY_SERIAL is not set
116
117#
118# Platform support
119#
120# CONFIG_MPC834x_SYS is not set
121CONFIG_MPC834x_ITX=y
122CONFIG_MPC834x=y
123
124#
125# Kernel options
126#
127# CONFIG_HIGHMEM is not set
128# CONFIG_HZ_100 is not set
129CONFIG_HZ_250=y
130# CONFIG_HZ_1000 is not set
131CONFIG_HZ=250
132CONFIG_PREEMPT_NONE=y
133# CONFIG_PREEMPT_VOLUNTARY is not set
134# CONFIG_PREEMPT is not set
135CONFIG_BINFMT_ELF=y
136# CONFIG_BINFMT_MISC is not set
137CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
138CONFIG_ARCH_FLATMEM_ENABLE=y
139CONFIG_SELECT_MEMORY_MODEL=y
140CONFIG_FLATMEM_MANUAL=y
141# CONFIG_DISCONTIGMEM_MANUAL is not set
142# CONFIG_SPARSEMEM_MANUAL is not set
143CONFIG_FLATMEM=y
144CONFIG_FLAT_NODE_MEM_MAP=y
145# CONFIG_SPARSEMEM_STATIC is not set
146CONFIG_SPLIT_PTLOCK_CPUS=4
147# CONFIG_RESOURCES_64BIT is not set
148CONFIG_PROC_DEVICETREE=y
149# CONFIG_CMDLINE_BOOL is not set
150# CONFIG_PM is not set
151# CONFIG_SOFTWARE_SUSPEND is not set
152CONFIG_SECCOMP=y
153CONFIG_ISA_DMA_API=y
154
155#
156# Bus options
157#
158CONFIG_GENERIC_ISA_DMA=y
159# CONFIG_PPC_I8259 is not set
160CONFIG_PPC_INDIRECT_PCI=y
161CONFIG_FSL_SOC=y
162CONFIG_PCI=y
163CONFIG_PCI_DOMAINS=y
164# CONFIG_PCIEPORTBUS is not set
165# CONFIG_PCI_DEBUG is not set
166
167#
168# PCCARD (PCMCIA/CardBus) support
169#
170# CONFIG_PCCARD is not set
171
172#
173# PCI Hotplug Support
174#
175# CONFIG_HOTPLUG_PCI is not set
176
177#
178# Advanced setup
179#
180# CONFIG_ADVANCED_OPTIONS is not set
181
182#
183# Default settings for advanced configuration options are used
184#
185CONFIG_HIGHMEM_START=0xfe000000
186CONFIG_LOWMEM_SIZE=0x30000000
187CONFIG_KERNEL_START=0xc0000000
188CONFIG_TASK_SIZE=0x80000000
189CONFIG_BOOT_LOAD=0x00800000
190
191#
192# Networking
193#
194CONFIG_NET=y
195
196#
197# Networking options
198#
199# CONFIG_NETDEBUG is not set
200CONFIG_PACKET=y
201# CONFIG_PACKET_MMAP is not set
202CONFIG_UNIX=y
203CONFIG_XFRM=y
204# CONFIG_XFRM_USER is not set
205# CONFIG_NET_KEY is not set
206CONFIG_INET=y
207CONFIG_IP_MULTICAST=y
208# CONFIG_IP_ADVANCED_ROUTER is not set
209CONFIG_IP_FIB_HASH=y
210CONFIG_IP_PNP=y
211CONFIG_IP_PNP_DHCP=y
212CONFIG_IP_PNP_BOOTP=y
213# CONFIG_IP_PNP_RARP is not set
214# CONFIG_NET_IPIP is not set
215# CONFIG_NET_IPGRE is not set
216# CONFIG_IP_MROUTE is not set
217# CONFIG_ARPD is not set
218CONFIG_SYN_COOKIES=y
219# CONFIG_INET_AH is not set
220# CONFIG_INET_ESP is not set
221# CONFIG_INET_IPCOMP is not set
222# CONFIG_INET_XFRM_TUNNEL is not set
223# CONFIG_INET_TUNNEL is not set
224CONFIG_INET_XFRM_MODE_TRANSPORT=y
225CONFIG_INET_XFRM_MODE_TUNNEL=y
226CONFIG_INET_DIAG=y
227CONFIG_INET_TCP_DIAG=y
228# CONFIG_TCP_CONG_ADVANCED is not set
229CONFIG_TCP_CONG_BIC=y
230# CONFIG_IPV6 is not set
231# CONFIG_INET6_XFRM_TUNNEL is not set
232# CONFIG_INET6_TUNNEL is not set
233# CONFIG_NETWORK_SECMARK is not set
234# CONFIG_NETFILTER is not set
235
236#
237# DCCP Configuration (EXPERIMENTAL)
238#
239# CONFIG_IP_DCCP is not set
240
241#
242# SCTP Configuration (EXPERIMENTAL)
243#
244# CONFIG_IP_SCTP is not set
245
246#
247# TIPC Configuration (EXPERIMENTAL)
248#
249# CONFIG_TIPC is not set
250# CONFIG_ATM is not set
251# CONFIG_BRIDGE is not set
252# CONFIG_VLAN_8021Q is not set
253# CONFIG_DECNET is not set
254# CONFIG_LLC2 is not set
255# CONFIG_IPX is not set
256# CONFIG_ATALK is not set
257# CONFIG_X25 is not set
258# CONFIG_LAPB is not set
259# CONFIG_NET_DIVERT is not set
260# CONFIG_ECONET is not set
261# CONFIG_WAN_ROUTER is not set
262
263#
264# QoS and/or fair queueing
265#
266# CONFIG_NET_SCHED is not set
267
268#
269# Network testing
270#
271# CONFIG_NET_PKTGEN is not set
272# CONFIG_HAMRADIO is not set
273# CONFIG_IRDA is not set
274# CONFIG_BT is not set
275# CONFIG_IEEE80211 is not set
276
277#
278# Device Drivers
279#
280
281#
282# Generic Driver Options
283#
284CONFIG_STANDALONE=y
285CONFIG_PREVENT_FIRMWARE_BUILD=y
286# CONFIG_FW_LOADER is not set
287# CONFIG_DEBUG_DRIVER is not set
288# CONFIG_SYS_HYPERVISOR is not set
289
290#
291# Connector - unified userspace <-> kernelspace linker
292#
293# CONFIG_CONNECTOR is not set
294
295#
296# Memory Technology Devices (MTD)
297#
298CONFIG_MTD=y
299# CONFIG_MTD_DEBUG is not set
300# CONFIG_MTD_CONCAT is not set
301# CONFIG_MTD_PARTITIONS is not set
302
303#
304# User Modules And Translation Layers
305#
306CONFIG_MTD_CHAR=y
307# CONFIG_MTD_BLOCK is not set
308# CONFIG_MTD_BLOCK_RO is not set
309# CONFIG_FTL is not set
310# CONFIG_NFTL is not set
311# CONFIG_INFTL is not set
312# CONFIG_RFD_FTL is not set
313
314#
315# RAM/ROM/Flash chip drivers
316#
317CONFIG_MTD_CFI=y
318# CONFIG_MTD_JEDECPROBE is not set
319CONFIG_MTD_GEN_PROBE=y
320# CONFIG_MTD_CFI_ADV_OPTIONS is not set
321CONFIG_MTD_MAP_BANK_WIDTH_1=y
322CONFIG_MTD_MAP_BANK_WIDTH_2=y
323CONFIG_MTD_MAP_BANK_WIDTH_4=y
324# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
325# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
326# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
327CONFIG_MTD_CFI_I1=y
328CONFIG_MTD_CFI_I2=y
329# CONFIG_MTD_CFI_I4 is not set
330# CONFIG_MTD_CFI_I8 is not set
331# CONFIG_MTD_CFI_INTELEXT is not set
332CONFIG_MTD_CFI_AMDSTD=y
333# CONFIG_MTD_CFI_STAA is not set
334CONFIG_MTD_CFI_UTIL=y
335# CONFIG_MTD_RAM is not set
336# CONFIG_MTD_ROM is not set
337# CONFIG_MTD_ABSENT is not set
338# CONFIG_MTD_OBSOLETE_CHIPS is not set
339
340#
341# Mapping drivers for chip access
342#
343# CONFIG_MTD_COMPLEX_MAPPINGS is not set
344CONFIG_MTD_PHYSMAP=y
345CONFIG_MTD_PHYSMAP_START=0xfe000000
346CONFIG_MTD_PHYSMAP_LEN=0x1000000
347CONFIG_MTD_PHYSMAP_BANKWIDTH=2
348# CONFIG_MTD_PLATRAM is not set
349
350#
351# Self-contained MTD device drivers
352#
353# CONFIG_MTD_PMC551 is not set
354# CONFIG_MTD_DATAFLASH is not set
355# CONFIG_MTD_M25P80 is not set
356# CONFIG_MTD_SLRAM is not set
357# CONFIG_MTD_PHRAM is not set
358# CONFIG_MTD_MTDRAM is not set
359# CONFIG_MTD_BLOCK2MTD is not set
360
361#
362# Disk-On-Chip Device Drivers
363#
364# CONFIG_MTD_DOC2000 is not set
365# CONFIG_MTD_DOC2001 is not set
366# CONFIG_MTD_DOC2001PLUS is not set
367
368#
369# NAND Flash Device Drivers
370#
371# CONFIG_MTD_NAND is not set
372
373#
374# OneNAND Flash Device Drivers
375#
376# CONFIG_MTD_ONENAND is not set
377
378#
379# Parallel port support
380#
381# CONFIG_PARPORT is not set
382
383#
384# Plug and Play support
385#
386
387#
388# Block devices
389#
390# CONFIG_BLK_DEV_FD is not set
391# CONFIG_BLK_CPQ_DA is not set
392# CONFIG_BLK_CPQ_CISS_DA is not set
393# CONFIG_BLK_DEV_DAC960 is not set
394# CONFIG_BLK_DEV_UMEM is not set
395# CONFIG_BLK_DEV_COW_COMMON is not set
396CONFIG_BLK_DEV_LOOP=y
397# CONFIG_BLK_DEV_CRYPTOLOOP is not set
398# CONFIG_BLK_DEV_NBD is not set
399# CONFIG_BLK_DEV_SX8 is not set
400# CONFIG_BLK_DEV_UB is not set
401CONFIG_BLK_DEV_RAM=y
402CONFIG_BLK_DEV_RAM_COUNT=16
403CONFIG_BLK_DEV_RAM_SIZE=32768
404CONFIG_BLK_DEV_INITRD=y
405# CONFIG_CDROM_PKTCDVD is not set
406# CONFIG_ATA_OVER_ETH is not set
407
408#
409# ATA/ATAPI/MFM/RLL support
410#
411CONFIG_IDE=y
412# CONFIG_BLK_DEV_IDE is not set
413# CONFIG_BLK_DEV_HD_ONLY is not set
414# CONFIG_BLK_DEV_HD is not set
415
416#
417# SCSI device support
418#
419# CONFIG_RAID_ATTRS is not set
420CONFIG_SCSI=y
421CONFIG_SCSI_PROC_FS=y
422
423#
424# SCSI support type (disk, tape, CD-ROM)
425#
426CONFIG_BLK_DEV_SD=y
427# CONFIG_CHR_DEV_ST is not set
428# CONFIG_CHR_DEV_OSST is not set
429# CONFIG_BLK_DEV_SR is not set
430CONFIG_CHR_DEV_SG=y
431# CONFIG_CHR_DEV_SCH is not set
432
433#
434# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
435#
436# CONFIG_SCSI_MULTI_LUN is not set
437# CONFIG_SCSI_CONSTANTS is not set
438# CONFIG_SCSI_LOGGING is not set
439
440#
441# SCSI Transport Attributes
442#
443CONFIG_SCSI_SPI_ATTRS=y
444# CONFIG_SCSI_FC_ATTRS is not set
445# CONFIG_SCSI_ISCSI_ATTRS is not set
446# CONFIG_SCSI_SAS_ATTRS is not set
447
448#
449# SCSI low-level drivers
450#
451# CONFIG_ISCSI_TCP is not set
452# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
453# CONFIG_SCSI_3W_9XXX is not set
454# CONFIG_SCSI_ACARD is not set
455# CONFIG_SCSI_AACRAID is not set
456# CONFIG_SCSI_AIC7XXX is not set
457# CONFIG_SCSI_AIC7XXX_OLD is not set
458# CONFIG_SCSI_AIC79XX is not set
459# CONFIG_SCSI_DPT_I2O is not set
460# CONFIG_MEGARAID_NEWGEN is not set
461# CONFIG_MEGARAID_LEGACY is not set
462# CONFIG_MEGARAID_SAS is not set
463CONFIG_SCSI_SATA=y
464# CONFIG_SCSI_SATA_AHCI is not set
465# CONFIG_SCSI_SATA_SVW is not set
466# CONFIG_SCSI_ATA_PIIX is not set
467# CONFIG_SCSI_SATA_MV is not set
468# CONFIG_SCSI_SATA_NV is not set
469# CONFIG_SCSI_PDC_ADMA is not set
470# CONFIG_SCSI_HPTIOP is not set
471# CONFIG_SCSI_SATA_QSTOR is not set
472# CONFIG_SCSI_SATA_PROMISE is not set
473# CONFIG_SCSI_SATA_SX4 is not set
474CONFIG_SCSI_SATA_SIL=y
475# CONFIG_SCSI_SATA_SIL24 is not set
476# CONFIG_SCSI_SATA_SIS is not set
477# CONFIG_SCSI_SATA_ULI is not set
478# CONFIG_SCSI_SATA_VIA is not set
479# CONFIG_SCSI_SATA_VITESSE is not set
480# CONFIG_SCSI_BUSLOGIC is not set
481# CONFIG_SCSI_DMX3191D is not set
482# CONFIG_SCSI_EATA is not set
483# CONFIG_SCSI_FUTURE_DOMAIN is not set
484# CONFIG_SCSI_GDTH is not set
485# CONFIG_SCSI_IPS is not set
486# CONFIG_SCSI_INITIO is not set
487# CONFIG_SCSI_INIA100 is not set
488# CONFIG_SCSI_SYM53C8XX_2 is not set
489# CONFIG_SCSI_IPR is not set
490# CONFIG_SCSI_QLOGIC_1280 is not set
491# CONFIG_SCSI_QLA_FC is not set
492# CONFIG_SCSI_LPFC is not set
493# CONFIG_SCSI_DC395x is not set
494# CONFIG_SCSI_DC390T is not set
495# CONFIG_SCSI_NSP32 is not set
496# CONFIG_SCSI_DEBUG is not set
497
498#
499# Multi-device support (RAID and LVM)
500#
501CONFIG_MD=y
502CONFIG_BLK_DEV_MD=y
503CONFIG_MD_LINEAR=y
504CONFIG_MD_RAID0=y
505CONFIG_MD_RAID1=y
506# CONFIG_MD_RAID10 is not set
507# CONFIG_MD_RAID456 is not set
508# CONFIG_MD_MULTIPATH is not set
509# CONFIG_MD_FAULTY is not set
510# CONFIG_BLK_DEV_DM is not set
511
512#
513# Fusion MPT device support
514#
515# CONFIG_FUSION is not set
516# CONFIG_FUSION_SPI is not set
517# CONFIG_FUSION_FC is not set
518# CONFIG_FUSION_SAS is not set
519
520#
521# IEEE 1394 (FireWire) support
522#
523# CONFIG_IEEE1394 is not set
524
525#
526# I2O device support
527#
528# CONFIG_I2O is not set
529
530#
531# Macintosh device drivers
532#
533# CONFIG_WINDFARM is not set
534
535#
536# Network device support
537#
538CONFIG_NETDEVICES=y
539# CONFIG_DUMMY is not set
540# CONFIG_BONDING is not set
541# CONFIG_EQUALIZER is not set
542# CONFIG_TUN is not set
543
544#
545# ARCnet devices
546#
547# CONFIG_ARCNET is not set
548
549#
550# PHY device support
551#
552CONFIG_PHYLIB=y
553
554#
555# MII PHY device drivers
556#
557# CONFIG_MARVELL_PHY is not set
558# CONFIG_DAVICOM_PHY is not set
559# CONFIG_QSEMI_PHY is not set
560# CONFIG_LXT_PHY is not set
561CONFIG_CICADA_PHY=y
562# CONFIG_VITESSE_PHY is not set
563# CONFIG_SMSC_PHY is not set
564
565#
566# Ethernet (10 or 100Mbit)
567#
568CONFIG_NET_ETHERNET=y
569CONFIG_MII=y
570# CONFIG_HAPPYMEAL is not set
571# CONFIG_SUNGEM is not set
572# CONFIG_CASSINI is not set
573# CONFIG_NET_VENDOR_3COM is not set
574
575#
576# Tulip family network device support
577#
578# CONFIG_NET_TULIP is not set
579# CONFIG_HP100 is not set
580CONFIG_NET_PCI=y
581# CONFIG_PCNET32 is not set
582# CONFIG_AMD8111_ETH is not set
583# CONFIG_ADAPTEC_STARFIRE is not set
584# CONFIG_B44 is not set
585# CONFIG_FORCEDETH is not set
586# CONFIG_DGRS is not set
587# CONFIG_EEPRO100 is not set
588CONFIG_E100=y
589# CONFIG_FEALNX is not set
590# CONFIG_NATSEMI is not set
591# CONFIG_NE2K_PCI is not set
592# CONFIG_8139CP is not set
593# CONFIG_8139TOO is not set
594# CONFIG_SIS900 is not set
595# CONFIG_EPIC100 is not set
596# CONFIG_SUNDANCE is not set
597# CONFIG_TLAN is not set
598# CONFIG_VIA_RHINE is not set
599
600#
601# Ethernet (1000 Mbit)
602#
603# CONFIG_ACENIC is not set
604# CONFIG_DL2K is not set
605# CONFIG_E1000 is not set
606# CONFIG_NS83820 is not set
607# CONFIG_HAMACHI is not set
608# CONFIG_YELLOWFIN is not set
609# CONFIG_R8169 is not set
610# CONFIG_SIS190 is not set
611# CONFIG_SKGE is not set
612# CONFIG_SKY2 is not set
613# CONFIG_SK98LIN is not set
614# CONFIG_VIA_VELOCITY is not set
615# CONFIG_TIGON3 is not set
616# CONFIG_BNX2 is not set
617CONFIG_GIANFAR=y
618CONFIG_GFAR_NAPI=y
619
620#
621# Ethernet (10000 Mbit)
622#
623# CONFIG_CHELSIO_T1 is not set
624# CONFIG_IXGB is not set
625# CONFIG_S2IO is not set
626# CONFIG_MYRI10GE is not set
627
628#
629# Token Ring devices
630#
631# CONFIG_TR is not set
632
633#
634# Wireless LAN (non-hamradio)
635#
636# CONFIG_NET_RADIO is not set
637
638#
639# Wan interfaces
640#
641# CONFIG_WAN is not set
642# CONFIG_FDDI is not set
643# CONFIG_HIPPI is not set
644# CONFIG_PPP is not set
645# CONFIG_SLIP is not set
646# CONFIG_NET_FC is not set
647# CONFIG_SHAPER is not set
648# CONFIG_NETCONSOLE is not set
649# CONFIG_NETPOLL is not set
650# CONFIG_NET_POLL_CONTROLLER is not set
651
652#
653# ISDN subsystem
654#
655# CONFIG_ISDN is not set
656
657#
658# Telephony Support
659#
660# CONFIG_PHONE is not set
661
662#
663# Input device support
664#
665CONFIG_INPUT=y
666
667#
668# Userland interfaces
669#
670# CONFIG_INPUT_MOUSEDEV is not set
671# CONFIG_INPUT_JOYDEV is not set
672# CONFIG_INPUT_TSDEV is not set
673# CONFIG_INPUT_EVDEV is not set
674# CONFIG_INPUT_EVBUG is not set
675
676#
677# Input Device Drivers
678#
679# CONFIG_INPUT_KEYBOARD is not set
680# CONFIG_INPUT_MOUSE is not set
681# CONFIG_INPUT_JOYSTICK is not set
682# CONFIG_INPUT_TOUCHSCREEN is not set
683# CONFIG_INPUT_MISC is not set
684
685#
686# Hardware I/O ports
687#
688# CONFIG_SERIO is not set
689# CONFIG_GAMEPORT is not set
690
691#
692# Character devices
693#
694# CONFIG_VT is not set
695# CONFIG_SERIAL_NONSTANDARD is not set
696
697#
698# Serial drivers
699#
700CONFIG_SERIAL_8250=y
701CONFIG_SERIAL_8250_CONSOLE=y
702CONFIG_SERIAL_8250_PCI=y
703CONFIG_SERIAL_8250_NR_UARTS=4
704CONFIG_SERIAL_8250_RUNTIME_UARTS=4
705# CONFIG_SERIAL_8250_EXTENDED is not set
706
707#
708# Non-8250 serial port support
709#
710CONFIG_SERIAL_CORE=y
711CONFIG_SERIAL_CORE_CONSOLE=y
712# CONFIG_SERIAL_JSM is not set
713CONFIG_UNIX98_PTYS=y
714CONFIG_LEGACY_PTYS=y
715CONFIG_LEGACY_PTY_COUNT=256
716
717#
718# IPMI
719#
720# CONFIG_IPMI_HANDLER is not set
721
722#
723# Watchdog Cards
724#
725CONFIG_WATCHDOG=y
726# CONFIG_WATCHDOG_NOWAYOUT is not set
727
728#
729# Watchdog Device Drivers
730#
731# CONFIG_SOFT_WATCHDOG is not set
732CONFIG_83xx_WDT=y
733
734#
735# PCI-based Watchdog Cards
736#
737# CONFIG_PCIPCWATCHDOG is not set
738# CONFIG_WDTPCI is not set
739
740#
741# USB-based Watchdog Cards
742#
743# CONFIG_USBPCWATCHDOG is not set
744CONFIG_HW_RANDOM=y
745# CONFIG_NVRAM is not set
746# CONFIG_GEN_RTC is not set
747# CONFIG_DTLK is not set
748# CONFIG_R3964 is not set
749# CONFIG_APPLICOM is not set
750
751#
752# Ftape, the floppy tape device driver
753#
754# CONFIG_AGP is not set
755# CONFIG_DRM is not set
756# CONFIG_RAW_DRIVER is not set
757
758#
759# TPM devices
760#
761# CONFIG_TCG_TPM is not set
762# CONFIG_TELCLOCK is not set
763
764#
765# I2C support
766#
767CONFIG_I2C=y
768CONFIG_I2C_CHARDEV=y
769
770#
771# I2C Algorithms
772#
773# CONFIG_I2C_ALGOBIT is not set
774# CONFIG_I2C_ALGOPCF is not set
775# CONFIG_I2C_ALGOPCA is not set
776
777#
778# I2C Hardware Bus support
779#
780# CONFIG_I2C_ALI1535 is not set
781# CONFIG_I2C_ALI1563 is not set
782# CONFIG_I2C_ALI15X3 is not set
783# CONFIG_I2C_AMD756 is not set
784# CONFIG_I2C_AMD8111 is not set
785# CONFIG_I2C_I801 is not set
786# CONFIG_I2C_I810 is not set
787# CONFIG_I2C_PIIX4 is not set
788CONFIG_I2C_MPC=y
789# CONFIG_I2C_NFORCE2 is not set
790# CONFIG_I2C_OCORES is not set
791# CONFIG_I2C_PARPORT_LIGHT is not set
792# CONFIG_I2C_PROSAVAGE is not set
793# CONFIG_I2C_SAVAGE4 is not set
794# CONFIG_I2C_SIS5595 is not set
795# CONFIG_I2C_SIS630 is not set
796# CONFIG_I2C_SIS96X is not set
797# CONFIG_I2C_STUB is not set
798# CONFIG_I2C_VIA is not set
799# CONFIG_I2C_VIAPRO is not set
800# CONFIG_I2C_VOODOO3 is not set
801# CONFIG_I2C_PCA_ISA is not set
802
803#
804# Miscellaneous I2C Chip support
805#
806# CONFIG_SENSORS_DS1337 is not set
807# CONFIG_SENSORS_DS1374 is not set
808# CONFIG_SENSORS_EEPROM is not set
809# CONFIG_SENSORS_PCF8574 is not set
810# CONFIG_SENSORS_PCA9539 is not set
811# CONFIG_SENSORS_PCF8591 is not set
812# CONFIG_SENSORS_M41T00 is not set
813# CONFIG_SENSORS_MAX6875 is not set
814# CONFIG_I2C_DEBUG_CORE is not set
815# CONFIG_I2C_DEBUG_ALGO is not set
816# CONFIG_I2C_DEBUG_BUS is not set
817# CONFIG_I2C_DEBUG_CHIP is not set
818
819#
820# SPI support
821#
822CONFIG_SPI=y
823# CONFIG_SPI_DEBUG is not set
824CONFIG_SPI_MASTER=y
825
826#
827# SPI Master Controller Drivers
828#
829CONFIG_SPI_BITBANG=y
830CONFIG_SPI_MPC83xx=y
831
832#
833# SPI Protocol Masters
834#
835
836#
837# Dallas's 1-wire bus
838#
839
840#
841# Hardware Monitoring support
842#
843CONFIG_HWMON=y
844# CONFIG_HWMON_VID is not set
845# CONFIG_SENSORS_ABITUGURU is not set
846# CONFIG_SENSORS_ADM1021 is not set
847# CONFIG_SENSORS_ADM1025 is not set
848# CONFIG_SENSORS_ADM1026 is not set
849# CONFIG_SENSORS_ADM1031 is not set
850# CONFIG_SENSORS_ADM9240 is not set
851# CONFIG_SENSORS_ASB100 is not set
852# CONFIG_SENSORS_ATXP1 is not set
853# CONFIG_SENSORS_DS1621 is not set
854# CONFIG_SENSORS_F71805F is not set
855# CONFIG_SENSORS_FSCHER is not set
856# CONFIG_SENSORS_FSCPOS is not set
857# CONFIG_SENSORS_GL518SM is not set
858# CONFIG_SENSORS_GL520SM is not set
859# CONFIG_SENSORS_IT87 is not set
860# CONFIG_SENSORS_LM63 is not set
861# CONFIG_SENSORS_LM70 is not set
862# CONFIG_SENSORS_LM75 is not set
863# CONFIG_SENSORS_LM77 is not set
864# CONFIG_SENSORS_LM78 is not set
865# CONFIG_SENSORS_LM80 is not set
866# CONFIG_SENSORS_LM83 is not set
867# CONFIG_SENSORS_LM85 is not set
868# CONFIG_SENSORS_LM87 is not set
869# CONFIG_SENSORS_LM90 is not set
870# CONFIG_SENSORS_LM92 is not set
871# CONFIG_SENSORS_MAX1619 is not set
872# CONFIG_SENSORS_PC87360 is not set
873# CONFIG_SENSORS_SIS5595 is not set
874# CONFIG_SENSORS_SMSC47M1 is not set
875# CONFIG_SENSORS_SMSC47M192 is not set
876# CONFIG_SENSORS_SMSC47B397 is not set
877# CONFIG_SENSORS_VIA686A is not set
878# CONFIG_SENSORS_VT8231 is not set
879# CONFIG_SENSORS_W83781D is not set
880# CONFIG_SENSORS_W83791D is not set
881# CONFIG_SENSORS_W83792D is not set
882# CONFIG_SENSORS_W83L785TS is not set
883# CONFIG_SENSORS_W83627HF is not set
884# CONFIG_SENSORS_W83627EHF is not set
885# CONFIG_HWMON_DEBUG_CHIP is not set
886
887#
888# Misc devices
889#
890
891#
892# Multimedia devices
893#
894# CONFIG_VIDEO_DEV is not set
895CONFIG_VIDEO_V4L2=y
896
897#
898# Digital Video Broadcasting Devices
899#
900# CONFIG_DVB is not set
901# CONFIG_USB_DABUSB is not set
902
903#
904# Graphics support
905#
906CONFIG_FIRMWARE_EDID=y
907# CONFIG_FB is not set
908
909#
910# Sound
911#
912# CONFIG_SOUND is not set
913
914#
915# USB support
916#
917CONFIG_USB_ARCH_HAS_HCD=y
918CONFIG_USB_ARCH_HAS_OHCI=y
919CONFIG_USB_ARCH_HAS_EHCI=y
920CONFIG_USB=y
921# CONFIG_USB_DEBUG is not set
922
923#
924# Miscellaneous USB options
925#
926CONFIG_USB_DEVICEFS=y
927# CONFIG_USB_BANDWIDTH is not set
928# CONFIG_USB_DYNAMIC_MINORS is not set
929# CONFIG_USB_OTG is not set
930
931#
932# USB Host Controller Drivers
933#
934CONFIG_USB_EHCI_HCD=y
935# CONFIG_USB_EHCI_SPLIT_ISO is not set
936# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
937# CONFIG_USB_EHCI_TT_NEWSCHED is not set
938# CONFIG_USB_ISP116X_HCD is not set
939CONFIG_USB_OHCI_HCD=y
940# CONFIG_USB_OHCI_BIG_ENDIAN is not set
941CONFIG_USB_OHCI_LITTLE_ENDIAN=y
942CONFIG_USB_UHCI_HCD=y
943# CONFIG_USB_SL811_HCD is not set
944
945#
946# USB Device Class drivers
947#
948# CONFIG_USB_ACM is not set
949# CONFIG_USB_PRINTER is not set
950
951#
952# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
953#
954
955#
956# may also be needed; see USB_STORAGE Help for more information
957#
958CONFIG_USB_STORAGE=y
959# CONFIG_USB_STORAGE_DEBUG is not set
960# CONFIG_USB_STORAGE_DATAFAB is not set
961# CONFIG_USB_STORAGE_FREECOM is not set
962# CONFIG_USB_STORAGE_DPCM is not set
963# CONFIG_USB_STORAGE_USBAT is not set
964# CONFIG_USB_STORAGE_SDDR09 is not set
965# CONFIG_USB_STORAGE_SDDR55 is not set
966# CONFIG_USB_STORAGE_JUMPSHOT is not set
967# CONFIG_USB_STORAGE_ALAUDA is not set
968# CONFIG_USB_LIBUSUAL is not set
969
970#
971# USB Input Devices
972#
973# CONFIG_USB_HID is not set
974
975#
976# USB HID Boot Protocol drivers
977#
978# CONFIG_USB_KBD is not set
979# CONFIG_USB_MOUSE is not set
980# CONFIG_USB_AIPTEK is not set
981# CONFIG_USB_WACOM is not set
982# CONFIG_USB_ACECAD is not set
983# CONFIG_USB_KBTAB is not set
984# CONFIG_USB_POWERMATE is not set
985# CONFIG_USB_TOUCHSCREEN is not set
986# CONFIG_USB_YEALINK is not set
987# CONFIG_USB_XPAD is not set
988# CONFIG_USB_ATI_REMOTE is not set
989# CONFIG_USB_ATI_REMOTE2 is not set
990# CONFIG_USB_KEYSPAN_REMOTE is not set
991# CONFIG_USB_APPLETOUCH is not set
992
993#
994# USB Imaging devices
995#
996# CONFIG_USB_MDC800 is not set
997# CONFIG_USB_MICROTEK is not set
998
999#
1000# USB Network Adapters
1001#
1002# CONFIG_USB_CATC is not set
1003# CONFIG_USB_KAWETH is not set
1004# CONFIG_USB_PEGASUS is not set
1005# CONFIG_USB_RTL8150 is not set
1006# CONFIG_USB_USBNET is not set
1007CONFIG_USB_MON=y
1008
1009#
1010# USB port drivers
1011#
1012
1013#
1014# USB Serial Converter support
1015#
1016# CONFIG_USB_SERIAL is not set
1017
1018#
1019# USB Miscellaneous drivers
1020#
1021# CONFIG_USB_EMI62 is not set
1022# CONFIG_USB_EMI26 is not set
1023# CONFIG_USB_AUERSWALD is not set
1024# CONFIG_USB_RIO500 is not set
1025# CONFIG_USB_LEGOTOWER is not set
1026# CONFIG_USB_LCD is not set
1027# CONFIG_USB_LED is not set
1028# CONFIG_USB_CY7C63 is not set
1029# CONFIG_USB_CYTHERM is not set
1030# CONFIG_USB_PHIDGETKIT is not set
1031# CONFIG_USB_PHIDGETSERVO is not set
1032# CONFIG_USB_IDMOUSE is not set
1033# CONFIG_USB_APPLEDISPLAY is not set
1034# CONFIG_USB_SISUSBVGA is not set
1035# CONFIG_USB_LD is not set
1036# CONFIG_USB_TEST is not set
1037
1038#
1039# USB DSL modem support
1040#
1041
1042#
1043# USB Gadget Support
1044#
1045CONFIG_USB_GADGET=y
1046# CONFIG_USB_GADGET_DEBUG_FILES is not set
1047CONFIG_USB_GADGET_SELECTED=y
1048CONFIG_USB_GADGET_NET2280=y
1049CONFIG_USB_NET2280=y
1050# CONFIG_USB_GADGET_PXA2XX is not set
1051# CONFIG_USB_GADGET_GOKU is not set
1052# CONFIG_USB_GADGET_LH7A40X is not set
1053# CONFIG_USB_GADGET_OMAP is not set
1054# CONFIG_USB_GADGET_AT91 is not set
1055# CONFIG_USB_GADGET_DUMMY_HCD is not set
1056CONFIG_USB_GADGET_DUALSPEED=y
1057# CONFIG_USB_ZERO is not set
1058CONFIG_USB_ETH=y
1059CONFIG_USB_ETH_RNDIS=y
1060# CONFIG_USB_GADGETFS is not set
1061# CONFIG_USB_FILE_STORAGE is not set
1062# CONFIG_USB_G_SERIAL is not set
1063
1064#
1065# MMC/SD Card support
1066#
1067# CONFIG_MMC is not set
1068
1069#
1070# LED devices
1071#
1072# CONFIG_NEW_LEDS is not set
1073
1074#
1075# LED drivers
1076#
1077
1078#
1079# LED Triggers
1080#
1081
1082#
1083# InfiniBand support
1084#
1085# CONFIG_INFINIBAND is not set
1086
1087#
1088# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
1089#
1090
1091#
1092# Real Time Clock
1093#
1094CONFIG_RTC_LIB=y
1095CONFIG_RTC_CLASS=y
1096CONFIG_RTC_HCTOSYS=y
1097CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
1098
1099#
1100# RTC interfaces
1101#
1102CONFIG_RTC_INTF_SYSFS=y
1103CONFIG_RTC_INTF_PROC=y
1104CONFIG_RTC_INTF_DEV=y
1105CONFIG_RTC_INTF_DEV_UIE_EMUL=y
1106
1107#
1108# RTC drivers
1109#
1110# CONFIG_RTC_DRV_X1205 is not set
1111CONFIG_RTC_DRV_DS1307=y
1112# CONFIG_RTC_DRV_DS1553 is not set
1113# CONFIG_RTC_DRV_DS1672 is not set
1114# CONFIG_RTC_DRV_DS1742 is not set
1115# CONFIG_RTC_DRV_PCF8563 is not set
1116# CONFIG_RTC_DRV_PCF8583 is not set
1117# CONFIG_RTC_DRV_RS5C348 is not set
1118# CONFIG_RTC_DRV_RS5C372 is not set
1119# CONFIG_RTC_DRV_M48T86 is not set
1120# CONFIG_RTC_DRV_TEST is not set
1121# CONFIG_RTC_DRV_MAX6902 is not set
1122# CONFIG_RTC_DRV_V3020 is not set
1123
1124#
1125# DMA Engine support
1126#
1127CONFIG_DMA_ENGINE=y
1128
1129#
1130# DMA Clients
1131#
1132CONFIG_NET_DMA=y
1133
1134#
1135# DMA Devices
1136#
1137CONFIG_INTEL_IOATDMA=y
1138
1139#
1140# File systems
1141#
1142CONFIG_EXT2_FS=y
1143# CONFIG_EXT2_FS_XATTR is not set
1144# CONFIG_EXT2_FS_XIP is not set
1145CONFIG_EXT3_FS=y
1146CONFIG_EXT3_FS_XATTR=y
1147# CONFIG_EXT3_FS_POSIX_ACL is not set
1148# CONFIG_EXT3_FS_SECURITY is not set
1149CONFIG_JBD=y
1150# CONFIG_JBD_DEBUG is not set
1151CONFIG_FS_MBCACHE=y
1152# CONFIG_REISERFS_FS is not set
1153# CONFIG_JFS_FS is not set
1154# CONFIG_FS_POSIX_ACL is not set
1155# CONFIG_XFS_FS is not set
1156# CONFIG_OCFS2_FS is not set
1157# CONFIG_MINIX_FS is not set
1158# CONFIG_ROMFS_FS is not set
1159CONFIG_INOTIFY=y
1160CONFIG_INOTIFY_USER=y
1161# CONFIG_QUOTA is not set
1162CONFIG_DNOTIFY=y
1163# CONFIG_AUTOFS_FS is not set
1164# CONFIG_AUTOFS4_FS is not set
1165# CONFIG_FUSE_FS is not set
1166
1167#
1168# CD-ROM/DVD Filesystems
1169#
1170# CONFIG_ISO9660_FS is not set
1171# CONFIG_UDF_FS is not set
1172
1173#
1174# DOS/FAT/NT Filesystems
1175#
1176# CONFIG_MSDOS_FS is not set
1177# CONFIG_VFAT_FS is not set
1178# CONFIG_NTFS_FS is not set
1179
1180#
1181# Pseudo filesystems
1182#
1183CONFIG_PROC_FS=y
1184CONFIG_PROC_KCORE=y
1185CONFIG_SYSFS=y
1186CONFIG_TMPFS=y
1187# CONFIG_HUGETLB_PAGE is not set
1188CONFIG_RAMFS=y
1189# CONFIG_CONFIGFS_FS is not set
1190
1191#
1192# Miscellaneous filesystems
1193#
1194# CONFIG_ADFS_FS is not set
1195# CONFIG_AFFS_FS is not set
1196# CONFIG_HFS_FS is not set
1197# CONFIG_HFSPLUS_FS is not set
1198# CONFIG_BEFS_FS is not set
1199# CONFIG_BFS_FS is not set
1200# CONFIG_EFS_FS is not set
1201# CONFIG_JFFS_FS is not set
1202# CONFIG_JFFS2_FS is not set
1203# CONFIG_CRAMFS is not set
1204# CONFIG_VXFS_FS is not set
1205# CONFIG_HPFS_FS is not set
1206# CONFIG_QNX4FS_FS is not set
1207# CONFIG_SYSV_FS is not set
1208# CONFIG_UFS_FS is not set
1209
1210#
1211# Network File Systems
1212#
1213CONFIG_NFS_FS=y
1214CONFIG_NFS_V3=y
1215# CONFIG_NFS_V3_ACL is not set
1216CONFIG_NFS_V4=y
1217# CONFIG_NFS_DIRECTIO is not set
1218# CONFIG_NFSD is not set
1219CONFIG_ROOT_NFS=y
1220CONFIG_LOCKD=y
1221CONFIG_LOCKD_V4=y
1222CONFIG_NFS_COMMON=y
1223CONFIG_SUNRPC=y
1224CONFIG_SUNRPC_GSS=y
1225CONFIG_RPCSEC_GSS_KRB5=y
1226# CONFIG_RPCSEC_GSS_SPKM3 is not set
1227# CONFIG_SMB_FS is not set
1228# CONFIG_CIFS is not set
1229# CONFIG_CIFS_DEBUG2 is not set
1230# CONFIG_NCP_FS is not set
1231# CONFIG_CODA_FS is not set
1232# CONFIG_AFS_FS is not set
1233# CONFIG_9P_FS is not set
1234
1235#
1236# Partition Types
1237#
1238CONFIG_PARTITION_ADVANCED=y
1239# CONFIG_ACORN_PARTITION is not set
1240# CONFIG_OSF_PARTITION is not set
1241# CONFIG_AMIGA_PARTITION is not set
1242# CONFIG_ATARI_PARTITION is not set
1243# CONFIG_MAC_PARTITION is not set
1244# CONFIG_MSDOS_PARTITION is not set
1245# CONFIG_LDM_PARTITION is not set
1246# CONFIG_SGI_PARTITION is not set
1247# CONFIG_ULTRIX_PARTITION is not set
1248# CONFIG_SUN_PARTITION is not set
1249# CONFIG_KARMA_PARTITION is not set
1250# CONFIG_EFI_PARTITION is not set
1251
1252#
1253# Native Language Support
1254#
1255# CONFIG_NLS is not set
1256
1257#
1258# Library routines
1259#
1260# CONFIG_CRC_CCITT is not set
1261# CONFIG_CRC16 is not set
1262CONFIG_CRC32=y
1263# CONFIG_LIBCRC32C is not set
1264CONFIG_PLIST=y
1265
1266#
1267# Instrumentation Support
1268#
1269# CONFIG_PROFILING is not set
1270
1271#
1272# Kernel hacking
1273#
1274CONFIG_PRINTK_TIME=y
1275# CONFIG_MAGIC_SYSRQ is not set
1276# CONFIG_UNUSED_SYMBOLS is not set
1277CONFIG_DEBUG_KERNEL=y
1278CONFIG_LOG_BUF_SHIFT=17
1279CONFIG_DETECT_SOFTLOCKUP=y
1280# CONFIG_SCHEDSTATS is not set
1281# CONFIG_DEBUG_SLAB is not set
1282# CONFIG_DEBUG_MUTEXES is not set
1283# CONFIG_DEBUG_RT_MUTEXES is not set
1284# CONFIG_RT_MUTEX_TESTER is not set
1285# CONFIG_DEBUG_SPINLOCK is not set
1286# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
1287# CONFIG_DEBUG_KOBJECT is not set
1288CONFIG_DEBUG_INFO=y
1289# CONFIG_DEBUG_FS is not set
1290# CONFIG_DEBUG_VM is not set
1291CONFIG_FORCED_INLINING=y
1292# CONFIG_RCU_TORTURE_TEST is not set
1293# CONFIG_DEBUGGER is not set
1294# CONFIG_BDI_SWITCH is not set
1295CONFIG_BOOTX_TEXT=y
1296CONFIG_SERIAL_TEXT_DEBUG=y
1297# CONFIG_PPC_EARLY_DEBUG is not set
1298
1299#
1300# Security options
1301#
1302# CONFIG_KEYS is not set
1303# CONFIG_SECURITY is not set
1304
1305#
1306# Cryptographic options
1307#
1308CONFIG_CRYPTO=y
1309# CONFIG_CRYPTO_HMAC is not set
1310# CONFIG_CRYPTO_NULL is not set
1311# CONFIG_CRYPTO_MD4 is not set
1312CONFIG_CRYPTO_MD5=y
1313# CONFIG_CRYPTO_SHA1 is not set
1314# CONFIG_CRYPTO_SHA256 is not set
1315# CONFIG_CRYPTO_SHA512 is not set
1316# CONFIG_CRYPTO_WP512 is not set
1317# CONFIG_CRYPTO_TGR192 is not set
1318CONFIG_CRYPTO_DES=y
1319# CONFIG_CRYPTO_BLOWFISH is not set
1320# CONFIG_CRYPTO_TWOFISH is not set
1321# CONFIG_CRYPTO_SERPENT is not set
1322# CONFIG_CRYPTO_AES is not set
1323# CONFIG_CRYPTO_CAST5 is not set
1324# CONFIG_CRYPTO_CAST6 is not set
1325# CONFIG_CRYPTO_TEA is not set
1326# CONFIG_CRYPTO_ARC4 is not set
1327# CONFIG_CRYPTO_KHAZAD is not set
1328# CONFIG_CRYPTO_ANUBIS is not set
1329# CONFIG_CRYPTO_DEFLATE is not set
1330# CONFIG_CRYPTO_MICHAEL_MIC is not set
1331# CONFIG_CRYPTO_CRC32C is not set
1332# CONFIG_CRYPTO_TEST is not set
1333
1334#
1335# Hardware crypto devices
1336#
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index a6920919d68e..f4e5e14ee2b6 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -111,7 +111,7 @@ void __init btext_setup_display(int width, int height, int depth, int pitch,
111 logicalDisplayBase = (unsigned char *)address; 111 logicalDisplayBase = (unsigned char *)address;
112 dispDeviceBase = (unsigned char *)address; 112 dispDeviceBase = (unsigned char *)address;
113 dispDeviceRowBytes = pitch; 113 dispDeviceRowBytes = pitch;
114 dispDeviceDepth = depth; 114 dispDeviceDepth = depth == 15 ? 16 : depth;
115 dispDeviceRect[0] = dispDeviceRect[1] = 0; 115 dispDeviceRect[0] = dispDeviceRect[1] = 0;
116 dispDeviceRect[2] = width; 116 dispDeviceRect[2] = width;
117 dispDeviceRect[3] = height; 117 dispDeviceRect[3] = height;
@@ -160,20 +160,28 @@ int btext_initialize(struct device_node *np)
160 unsigned long address = 0; 160 unsigned long address = 0;
161 u32 *prop; 161 u32 *prop;
162 162
163 prop = (u32 *)get_property(np, "width", NULL); 163 prop = (u32 *)get_property(np, "linux,bootx-width", NULL);
164 if (prop == NULL)
165 prop = (u32 *)get_property(np, "width", NULL);
164 if (prop == NULL) 166 if (prop == NULL)
165 return -EINVAL; 167 return -EINVAL;
166 width = *prop; 168 width = *prop;
167 prop = (u32 *)get_property(np, "height", NULL); 169 prop = (u32 *)get_property(np, "linux,bootx-height", NULL);
170 if (prop == NULL)
171 prop = (u32 *)get_property(np, "height", NULL);
168 if (prop == NULL) 172 if (prop == NULL)
169 return -EINVAL; 173 return -EINVAL;
170 height = *prop; 174 height = *prop;
171 prop = (u32 *)get_property(np, "depth", NULL); 175 prop = (u32 *)get_property(np, "linux,bootx-depth", NULL);
176 if (prop == NULL)
177 prop = (u32 *)get_property(np, "depth", NULL);
172 if (prop == NULL) 178 if (prop == NULL)
173 return -EINVAL; 179 return -EINVAL;
174 depth = *prop; 180 depth = *prop;
175 pitch = width * ((depth + 7) / 8); 181 pitch = width * ((depth + 7) / 8);
176 prop = (u32 *)get_property(np, "linebytes", NULL); 182 prop = (u32 *)get_property(np, "linux,bootx-linebytes", NULL);
183 if (prop == NULL)
184 prop = (u32 *)get_property(np, "linebytes", NULL);
177 if (prop) 185 if (prop)
178 pitch = *prop; 186 pitch = *prop;
179 if (pitch == 1) 187 if (pitch == 1)
@@ -194,7 +202,7 @@ int btext_initialize(struct device_node *np)
194 g_max_loc_Y = height / 16; 202 g_max_loc_Y = height / 16;
195 dispDeviceBase = (unsigned char *)address; 203 dispDeviceBase = (unsigned char *)address;
196 dispDeviceRowBytes = pitch; 204 dispDeviceRowBytes = pitch;
197 dispDeviceDepth = depth; 205 dispDeviceDepth = depth == 15 ? 16 : depth;
198 dispDeviceRect[0] = dispDeviceRect[1] = 0; 206 dispDeviceRect[0] = dispDeviceRect[1] = 0;
199 dispDeviceRect[2] = width; 207 dispDeviceRect[2] = width;
200 dispDeviceRect[3] = height; 208 dispDeviceRect[3] = height;
diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c
index e47d40ac6f39..97ddc02a3d42 100644
--- a/arch/powerpc/kernel/ibmebus.c
+++ b/arch/powerpc/kernel/ibmebus.c
@@ -323,13 +323,11 @@ int ibmebus_request_irq(struct ibmebus_dev *dev,
323 unsigned long irq_flags, const char * devname, 323 unsigned long irq_flags, const char * devname,
324 void *dev_id) 324 void *dev_id)
325{ 325{
326 unsigned int irq = virt_irq_create_mapping(ist); 326 unsigned int irq = irq_create_mapping(NULL, ist, 0);
327 327
328 if (irq == NO_IRQ) 328 if (irq == NO_IRQ)
329 return -EINVAL; 329 return -EINVAL;
330 330
331 irq = irq_offset_up(irq);
332
333 return request_irq(irq, handler, 331 return request_irq(irq, handler,
334 irq_flags, devname, dev_id); 332 irq_flags, devname, dev_id);
335} 333}
@@ -337,12 +335,9 @@ EXPORT_SYMBOL(ibmebus_request_irq);
337 335
338void ibmebus_free_irq(struct ibmebus_dev *dev, u32 ist, void *dev_id) 336void ibmebus_free_irq(struct ibmebus_dev *dev, u32 ist, void *dev_id)
339{ 337{
340 unsigned int irq = virt_irq_create_mapping(ist); 338 unsigned int irq = irq_find_mapping(NULL, ist);
341 339
342 irq = irq_offset_up(irq);
343 free_irq(irq, dev_id); 340 free_irq(irq, dev_id);
344
345 return;
346} 341}
347EXPORT_SYMBOL(ibmebus_free_irq); 342EXPORT_SYMBOL(ibmebus_free_irq);
348 343
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 525baab45d2d..8cf987809c66 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -29,6 +29,8 @@
29 * to reduce code space and undefined function references. 29 * to reduce code space and undefined function references.
30 */ 30 */
31 31
32#undef DEBUG
33
32#include <linux/module.h> 34#include <linux/module.h>
33#include <linux/threads.h> 35#include <linux/threads.h>
34#include <linux/kernel_stat.h> 36#include <linux/kernel_stat.h>
@@ -46,7 +48,10 @@
46#include <linux/cpumask.h> 48#include <linux/cpumask.h>
47#include <linux/profile.h> 49#include <linux/profile.h>
48#include <linux/bitops.h> 50#include <linux/bitops.h>
49#include <linux/pci.h> 51#include <linux/list.h>
52#include <linux/radix-tree.h>
53#include <linux/mutex.h>
54#include <linux/bootmem.h>
50 55
51#include <asm/uaccess.h> 56#include <asm/uaccess.h>
52#include <asm/system.h> 57#include <asm/system.h>
@@ -57,39 +62,38 @@
57#include <asm/prom.h> 62#include <asm/prom.h>
58#include <asm/ptrace.h> 63#include <asm/ptrace.h>
59#include <asm/machdep.h> 64#include <asm/machdep.h>
65#include <asm/udbg.h>
60#ifdef CONFIG_PPC_ISERIES 66#ifdef CONFIG_PPC_ISERIES
61#include <asm/paca.h> 67#include <asm/paca.h>
62#endif 68#endif
63 69
64int __irq_offset_value; 70int __irq_offset_value;
65#ifdef CONFIG_PPC32
66EXPORT_SYMBOL(__irq_offset_value);
67#endif
68
69static int ppc_spurious_interrupts; 71static int ppc_spurious_interrupts;
70 72
71#ifdef CONFIG_PPC32 73#ifdef CONFIG_PPC32
72#define NR_MASK_WORDS ((NR_IRQS + 31) / 32) 74EXPORT_SYMBOL(__irq_offset_value);
75atomic_t ppc_n_lost_interrupts;
73 76
77#ifndef CONFIG_PPC_MERGE
78#define NR_MASK_WORDS ((NR_IRQS + 31) / 32)
74unsigned long ppc_cached_irq_mask[NR_MASK_WORDS]; 79unsigned long ppc_cached_irq_mask[NR_MASK_WORDS];
75atomic_t ppc_n_lost_interrupts; 80#endif
76 81
77#ifdef CONFIG_TAU_INT 82#ifdef CONFIG_TAU_INT
78extern int tau_initialized; 83extern int tau_initialized;
79extern int tau_interrupts(int); 84extern int tau_interrupts(int);
80#endif 85#endif
86#endif /* CONFIG_PPC32 */
81 87
82#if defined(CONFIG_SMP) && !defined(CONFIG_PPC_MERGE) 88#if defined(CONFIG_SMP) && !defined(CONFIG_PPC_MERGE)
83extern atomic_t ipi_recv; 89extern atomic_t ipi_recv;
84extern atomic_t ipi_sent; 90extern atomic_t ipi_sent;
85#endif 91#endif
86#endif /* CONFIG_PPC32 */
87 92
88#ifdef CONFIG_PPC64 93#ifdef CONFIG_PPC64
89EXPORT_SYMBOL(irq_desc); 94EXPORT_SYMBOL(irq_desc);
90 95
91int distribute_irqs = 1; 96int distribute_irqs = 1;
92u64 ppc64_interrupt_controller;
93#endif /* CONFIG_PPC64 */ 97#endif /* CONFIG_PPC64 */
94 98
95int show_interrupts(struct seq_file *p, void *v) 99int show_interrupts(struct seq_file *p, void *v)
@@ -182,7 +186,7 @@ void fixup_irqs(cpumask_t map)
182 186
183void do_IRQ(struct pt_regs *regs) 187void do_IRQ(struct pt_regs *regs)
184{ 188{
185 int irq; 189 unsigned int irq;
186#ifdef CONFIG_IRQSTACKS 190#ifdef CONFIG_IRQSTACKS
187 struct thread_info *curtp, *irqtp; 191 struct thread_info *curtp, *irqtp;
188#endif 192#endif
@@ -213,22 +217,26 @@ void do_IRQ(struct pt_regs *regs)
213 */ 217 */
214 irq = ppc_md.get_irq(regs); 218 irq = ppc_md.get_irq(regs);
215 219
216 if (irq >= 0) { 220 if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) {
217#ifdef CONFIG_IRQSTACKS 221#ifdef CONFIG_IRQSTACKS
218 /* Switch to the irq stack to handle this */ 222 /* Switch to the irq stack to handle this */
219 curtp = current_thread_info(); 223 curtp = current_thread_info();
220 irqtp = hardirq_ctx[smp_processor_id()]; 224 irqtp = hardirq_ctx[smp_processor_id()];
221 if (curtp != irqtp) { 225 if (curtp != irqtp) {
226 struct irq_desc *desc = irq_desc + irq;
227 void *handler = desc->handle_irq;
228 if (handler == NULL)
229 handler = &__do_IRQ;
222 irqtp->task = curtp->task; 230 irqtp->task = curtp->task;
223 irqtp->flags = 0; 231 irqtp->flags = 0;
224 call___do_IRQ(irq, regs, irqtp); 232 call_handle_irq(irq, desc, regs, irqtp, handler);
225 irqtp->task = NULL; 233 irqtp->task = NULL;
226 if (irqtp->flags) 234 if (irqtp->flags)
227 set_bits(irqtp->flags, &curtp->flags); 235 set_bits(irqtp->flags, &curtp->flags);
228 } else 236 } else
229#endif 237#endif
230 __do_IRQ(irq, regs); 238 generic_handle_irq(irq, regs);
231 } else if (irq != -2) 239 } else if (irq != NO_IRQ_IGNORE)
232 /* That's not SMP safe ... but who cares ? */ 240 /* That's not SMP safe ... but who cares ? */
233 ppc_spurious_interrupts++; 241 ppc_spurious_interrupts++;
234 242
@@ -245,196 +253,562 @@ void do_IRQ(struct pt_regs *regs)
245 253
246void __init init_IRQ(void) 254void __init init_IRQ(void)
247{ 255{
256 ppc_md.init_IRQ();
248#ifdef CONFIG_PPC64 257#ifdef CONFIG_PPC64
249 static int once = 0; 258 irq_ctx_init();
259#endif
260}
261
262
263#ifdef CONFIG_IRQSTACKS
264struct thread_info *softirq_ctx[NR_CPUS] __read_mostly;
265struct thread_info *hardirq_ctx[NR_CPUS] __read_mostly;
266
267void irq_ctx_init(void)
268{
269 struct thread_info *tp;
270 int i;
271
272 for_each_possible_cpu(i) {
273 memset((void *)softirq_ctx[i], 0, THREAD_SIZE);
274 tp = softirq_ctx[i];
275 tp->cpu = i;
276 tp->preempt_count = SOFTIRQ_OFFSET;
277
278 memset((void *)hardirq_ctx[i], 0, THREAD_SIZE);
279 tp = hardirq_ctx[i];
280 tp->cpu = i;
281 tp->preempt_count = HARDIRQ_OFFSET;
282 }
283}
284
285static inline void do_softirq_onstack(void)
286{
287 struct thread_info *curtp, *irqtp;
288
289 curtp = current_thread_info();
290 irqtp = softirq_ctx[smp_processor_id()];
291 irqtp->task = curtp->task;
292 call_do_softirq(irqtp);
293 irqtp->task = NULL;
294}
250 295
251 if (once) 296#else
297#define do_softirq_onstack() __do_softirq()
298#endif /* CONFIG_IRQSTACKS */
299
300void do_softirq(void)
301{
302 unsigned long flags;
303
304 if (in_interrupt())
252 return; 305 return;
253 306
254 once++; 307 local_irq_save(flags);
255 308
256#endif 309 if (local_softirq_pending())
257 ppc_md.init_IRQ(); 310 do_softirq_onstack();
258#ifdef CONFIG_PPC64 311
259 irq_ctx_init(); 312 local_irq_restore(flags);
260#endif
261} 313}
314EXPORT_SYMBOL(do_softirq);
315
262 316
263#ifdef CONFIG_PPC64
264/* 317/*
265 * Virtual IRQ mapping code, used on systems with XICS interrupt controllers. 318 * IRQ controller and virtual interrupts
266 */ 319 */
267 320
268#define UNDEFINED_IRQ 0xffffffff 321#ifdef CONFIG_PPC_MERGE
269unsigned int virt_irq_to_real_map[NR_IRQS];
270 322
271/* 323static LIST_HEAD(irq_hosts);
272 * Don't use virtual irqs 0, 1, 2 for devices. 324static spinlock_t irq_big_lock = SPIN_LOCK_UNLOCKED;
273 * The pcnet32 driver considers interrupt numbers < 2 to be invalid,
274 * and 2 is the XICS IPI interrupt.
275 * We limit virtual irqs to __irq_offet_value less than virt_irq_max so
276 * that when we offset them we don't end up with an interrupt
277 * number >= virt_irq_max.
278 */
279#define MIN_VIRT_IRQ 3
280 325
281unsigned int virt_irq_max; 326struct irq_map_entry irq_map[NR_IRQS];
282static unsigned int max_virt_irq; 327static unsigned int irq_virq_count = NR_IRQS;
283static unsigned int nr_virt_irqs; 328static struct irq_host *irq_default_host;
284 329
285void 330struct irq_host *irq_alloc_host(unsigned int revmap_type,
286virt_irq_init(void) 331 unsigned int revmap_arg,
332 struct irq_host_ops *ops,
333 irq_hw_number_t inval_irq)
287{ 334{
288 int i; 335 struct irq_host *host;
336 unsigned int size = sizeof(struct irq_host);
337 unsigned int i;
338 unsigned int *rmap;
339 unsigned long flags;
289 340
290 if ((virt_irq_max == 0) || (virt_irq_max > (NR_IRQS - 1))) 341 /* Allocate structure and revmap table if using linear mapping */
291 virt_irq_max = NR_IRQS - 1; 342 if (revmap_type == IRQ_HOST_MAP_LINEAR)
292 max_virt_irq = virt_irq_max - __irq_offset_value; 343 size += revmap_arg * sizeof(unsigned int);
293 nr_virt_irqs = max_virt_irq - MIN_VIRT_IRQ + 1; 344 if (mem_init_done)
345 host = kzalloc(size, GFP_KERNEL);
346 else {
347 host = alloc_bootmem(size);
348 if (host)
349 memset(host, 0, size);
350 }
351 if (host == NULL)
352 return NULL;
294 353
295 for (i = 0; i < NR_IRQS; i++) 354 /* Fill structure */
296 virt_irq_to_real_map[i] = UNDEFINED_IRQ; 355 host->revmap_type = revmap_type;
356 host->inval_irq = inval_irq;
357 host->ops = ops;
358
359 spin_lock_irqsave(&irq_big_lock, flags);
360
361 /* If it's a legacy controller, check for duplicates and
362 * mark it as allocated (we use irq 0 host pointer for that
363 */
364 if (revmap_type == IRQ_HOST_MAP_LEGACY) {
365 if (irq_map[0].host != NULL) {
366 spin_unlock_irqrestore(&irq_big_lock, flags);
367 /* If we are early boot, we can't free the structure,
368 * too bad...
369 * this will be fixed once slab is made available early
370 * instead of the current cruft
371 */
372 if (mem_init_done)
373 kfree(host);
374 return NULL;
375 }
376 irq_map[0].host = host;
377 }
378
379 list_add(&host->link, &irq_hosts);
380 spin_unlock_irqrestore(&irq_big_lock, flags);
381
382 /* Additional setups per revmap type */
383 switch(revmap_type) {
384 case IRQ_HOST_MAP_LEGACY:
385 /* 0 is always the invalid number for legacy */
386 host->inval_irq = 0;
387 /* setup us as the host for all legacy interrupts */
388 for (i = 1; i < NUM_ISA_INTERRUPTS; i++) {
389 irq_map[i].hwirq = 0;
390 smp_wmb();
391 irq_map[i].host = host;
392 smp_wmb();
393
394 /* Clear some flags */
395 get_irq_desc(i)->status
396 &= ~(IRQ_NOREQUEST | IRQ_LEVEL);
397
398 /* Legacy flags are left to default at this point,
399 * one can then use irq_create_mapping() to
400 * explicitely change them
401 */
402 ops->map(host, i, i, 0);
403 }
404 break;
405 case IRQ_HOST_MAP_LINEAR:
406 rmap = (unsigned int *)(host + 1);
407 for (i = 0; i < revmap_arg; i++)
408 rmap[i] = IRQ_NONE;
409 host->revmap_data.linear.size = revmap_arg;
410 smp_wmb();
411 host->revmap_data.linear.revmap = rmap;
412 break;
413 default:
414 break;
415 }
416
417 pr_debug("irq: Allocated host of type %d @0x%p\n", revmap_type, host);
418
419 return host;
297} 420}
298 421
299/* Create a mapping for a real_irq if it doesn't already exist. 422struct irq_host *irq_find_host(struct device_node *node)
300 * Return the virtual irq as a convenience.
301 */
302int virt_irq_create_mapping(unsigned int real_irq)
303{ 423{
304 unsigned int virq, first_virq; 424 struct irq_host *h, *found = NULL;
305 static int warned; 425 unsigned long flags;
426
427 /* We might want to match the legacy controller last since
428 * it might potentially be set to match all interrupts in
429 * the absence of a device node. This isn't a problem so far
430 * yet though...
431 */
432 spin_lock_irqsave(&irq_big_lock, flags);
433 list_for_each_entry(h, &irq_hosts, link)
434 if (h->ops->match == NULL || h->ops->match(h, node)) {
435 found = h;
436 break;
437 }
438 spin_unlock_irqrestore(&irq_big_lock, flags);
439 return found;
440}
441EXPORT_SYMBOL_GPL(irq_find_host);
442
443void irq_set_default_host(struct irq_host *host)
444{
445 pr_debug("irq: Default host set to @0x%p\n", host);
446
447 irq_default_host = host;
448}
306 449
307 if (ppc64_interrupt_controller == IC_OPEN_PIC) 450void irq_set_virq_count(unsigned int count)
308 return real_irq; /* no mapping for openpic (for now) */ 451{
452 pr_debug("irq: Trying to set virq count to %d\n", count);
309 453
310 if (ppc64_interrupt_controller == IC_CELL_PIC) 454 BUG_ON(count < NUM_ISA_INTERRUPTS);
311 return real_irq; /* no mapping for iic either */ 455 if (count < NR_IRQS)
456 irq_virq_count = count;
457}
312 458
313 /* don't map interrupts < MIN_VIRT_IRQ */ 459unsigned int irq_create_mapping(struct irq_host *host,
314 if (real_irq < MIN_VIRT_IRQ) { 460 irq_hw_number_t hwirq,
315 virt_irq_to_real_map[real_irq] = real_irq; 461 unsigned int flags)
316 return real_irq; 462{
463 unsigned int virq, hint;
464
465 pr_debug("irq: irq_create_mapping(0x%p, 0x%lx, 0x%x)\n",
466 host, hwirq, flags);
467
468 /* Look for default host if nececssary */
469 if (host == NULL)
470 host = irq_default_host;
471 if (host == NULL) {
472 printk(KERN_WARNING "irq_create_mapping called for"
473 " NULL host, hwirq=%lx\n", hwirq);
474 WARN_ON(1);
475 return NO_IRQ;
317 } 476 }
477 pr_debug("irq: -> using host @%p\n", host);
318 478
319 /* map to a number between MIN_VIRT_IRQ and max_virt_irq */ 479 /* Check if mapping already exist, if it does, call
320 virq = real_irq; 480 * host->ops->map() to update the flags
321 if (virq > max_virt_irq) 481 */
322 virq = (virq % nr_virt_irqs) + MIN_VIRT_IRQ; 482 virq = irq_find_mapping(host, hwirq);
323 483 if (virq != IRQ_NONE) {
324 /* search for this number or a free slot */ 484 pr_debug("irq: -> existing mapping on virq %d\n", virq);
325 first_virq = virq; 485 host->ops->map(host, virq, hwirq, flags);
326 while (virt_irq_to_real_map[virq] != UNDEFINED_IRQ) { 486 return virq;
327 if (virt_irq_to_real_map[virq] == real_irq) 487 }
328 return virq; 488
329 if (++virq > max_virt_irq) 489 /* Get a virtual interrupt number */
330 virq = MIN_VIRT_IRQ; 490 if (host->revmap_type == IRQ_HOST_MAP_LEGACY) {
331 if (virq == first_virq) 491 /* Handle legacy */
332 goto nospace; /* oops, no free slots */ 492 virq = (unsigned int)hwirq;
493 if (virq == 0 || virq >= NUM_ISA_INTERRUPTS)
494 return NO_IRQ;
495 return virq;
496 } else {
497 /* Allocate a virtual interrupt number */
498 hint = hwirq % irq_virq_count;
499 virq = irq_alloc_virt(host, 1, hint);
500 if (virq == NO_IRQ) {
501 pr_debug("irq: -> virq allocation failed\n");
502 return NO_IRQ;
503 }
333 } 504 }
505 pr_debug("irq: -> obtained virq %d\n", virq);
334 506
335 virt_irq_to_real_map[virq] = real_irq; 507 /* Clear some flags */
508 get_irq_desc(virq)->status &= ~(IRQ_NOREQUEST | IRQ_LEVEL);
509
510 /* map it */
511 if (host->ops->map(host, virq, hwirq, flags)) {
512 pr_debug("irq: -> mapping failed, freeing\n");
513 irq_free_virt(virq, 1);
514 return NO_IRQ;
515 }
516 smp_wmb();
517 irq_map[virq].hwirq = hwirq;
518 smp_mb();
336 return virq; 519 return virq;
520}
521EXPORT_SYMBOL_GPL(irq_create_mapping);
337 522
338 nospace: 523extern unsigned int irq_create_of_mapping(struct device_node *controller,
339 if (!warned) { 524 u32 *intspec, unsigned int intsize)
340 printk(KERN_CRIT "Interrupt table is full\n"); 525{
341 printk(KERN_CRIT "Increase virt_irq_max (currently %d) " 526 struct irq_host *host;
342 "in your kernel sources and rebuild.\n", virt_irq_max); 527 irq_hw_number_t hwirq;
343 warned = 1; 528 unsigned int flags = IRQ_TYPE_NONE;
529
530 if (controller == NULL)
531 host = irq_default_host;
532 else
533 host = irq_find_host(controller);
534 if (host == NULL)
535 return NO_IRQ;
536
537 /* If host has no translation, then we assume interrupt line */
538 if (host->ops->xlate == NULL)
539 hwirq = intspec[0];
540 else {
541 if (host->ops->xlate(host, controller, intspec, intsize,
542 &hwirq, &flags))
543 return NO_IRQ;
344 } 544 }
345 return NO_IRQ; 545
546 return irq_create_mapping(host, hwirq, flags);
346} 547}
548EXPORT_SYMBOL_GPL(irq_create_of_mapping);
347 549
348/* 550unsigned int irq_of_parse_and_map(struct device_node *dev, int index)
349 * In most cases will get a hit on the very first slot checked in the
350 * virt_irq_to_real_map. Only when there are a large number of
351 * IRQs will this be expensive.
352 */
353unsigned int real_irq_to_virt_slowpath(unsigned int real_irq)
354{ 551{
355 unsigned int virq; 552 struct of_irq oirq;
356 unsigned int first_virq;
357 553
358 virq = real_irq; 554 if (of_irq_map_one(dev, index, &oirq))
555 return NO_IRQ;
359 556
360 if (virq > max_virt_irq) 557 return irq_create_of_mapping(oirq.controller, oirq.specifier,
361 virq = (virq % nr_virt_irqs) + MIN_VIRT_IRQ; 558 oirq.size);
559}
560EXPORT_SYMBOL_GPL(irq_of_parse_and_map);
362 561
363 first_virq = virq; 562void irq_dispose_mapping(unsigned int virq)
563{
564 struct irq_host *host = irq_map[virq].host;
565 irq_hw_number_t hwirq;
566 unsigned long flags;
364 567
365 do { 568 WARN_ON (host == NULL);
366 if (virt_irq_to_real_map[virq] == real_irq) 569 if (host == NULL)
367 return virq; 570 return;
368 571
369 virq++; 572 /* Never unmap legacy interrupts */
573 if (host->revmap_type == IRQ_HOST_MAP_LEGACY)
574 return;
370 575
371 if (virq >= max_virt_irq) 576 /* remove chip and handler */
372 virq = 0; 577 set_irq_chip_and_handler(virq, NULL, NULL);
578
579 /* Make sure it's completed */
580 synchronize_irq(virq);
581
582 /* Tell the PIC about it */
583 if (host->ops->unmap)
584 host->ops->unmap(host, virq);
585 smp_mb();
586
587 /* Clear reverse map */
588 hwirq = irq_map[virq].hwirq;
589 switch(host->revmap_type) {
590 case IRQ_HOST_MAP_LINEAR:
591 if (hwirq < host->revmap_data.linear.size)
592 host->revmap_data.linear.revmap[hwirq] = IRQ_NONE;
593 break;
594 case IRQ_HOST_MAP_TREE:
595 /* Check if radix tree allocated yet */
596 if (host->revmap_data.tree.gfp_mask == 0)
597 break;
598 /* XXX radix tree not safe ! remove lock whem it becomes safe
599 * and use some RCU sync to make sure everything is ok before we
600 * can re-use that map entry
601 */
602 spin_lock_irqsave(&irq_big_lock, flags);
603 radix_tree_delete(&host->revmap_data.tree, hwirq);
604 spin_unlock_irqrestore(&irq_big_lock, flags);
605 break;
606 }
373 607
374 } while (first_virq != virq); 608 /* Destroy map */
609 smp_mb();
610 irq_map[virq].hwirq = host->inval_irq;
375 611
376 return NO_IRQ; 612 /* Set some flags */
613 get_irq_desc(virq)->status |= IRQ_NOREQUEST;
377 614
615 /* Free it */
616 irq_free_virt(virq, 1);
378} 617}
379#endif /* CONFIG_PPC64 */ 618EXPORT_SYMBOL_GPL(irq_dispose_mapping);
380 619
381#ifdef CONFIG_IRQSTACKS 620unsigned int irq_find_mapping(struct irq_host *host,
382struct thread_info *softirq_ctx[NR_CPUS] __read_mostly; 621 irq_hw_number_t hwirq)
383struct thread_info *hardirq_ctx[NR_CPUS] __read_mostly; 622{
623 unsigned int i;
624 unsigned int hint = hwirq % irq_virq_count;
625
626 /* Look for default host if nececssary */
627 if (host == NULL)
628 host = irq_default_host;
629 if (host == NULL)
630 return NO_IRQ;
631
632 /* legacy -> bail early */
633 if (host->revmap_type == IRQ_HOST_MAP_LEGACY)
634 return hwirq;
635
636 /* Slow path does a linear search of the map */
637 if (hint < NUM_ISA_INTERRUPTS)
638 hint = NUM_ISA_INTERRUPTS;
639 i = hint;
640 do {
641 if (irq_map[i].host == host &&
642 irq_map[i].hwirq == hwirq)
643 return i;
644 i++;
645 if (i >= irq_virq_count)
646 i = NUM_ISA_INTERRUPTS;
647 } while(i != hint);
648 return NO_IRQ;
649}
650EXPORT_SYMBOL_GPL(irq_find_mapping);
384 651
385void irq_ctx_init(void) 652
653unsigned int irq_radix_revmap(struct irq_host *host,
654 irq_hw_number_t hwirq)
386{ 655{
387 struct thread_info *tp; 656 struct radix_tree_root *tree;
388 int i; 657 struct irq_map_entry *ptr;
658 unsigned int virq;
659 unsigned long flags;
389 660
390 for_each_possible_cpu(i) { 661 WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE);
391 memset((void *)softirq_ctx[i], 0, THREAD_SIZE);
392 tp = softirq_ctx[i];
393 tp->cpu = i;
394 tp->preempt_count = SOFTIRQ_OFFSET;
395 662
396 memset((void *)hardirq_ctx[i], 0, THREAD_SIZE); 663 /* Check if the radix tree exist yet. We test the value of
397 tp = hardirq_ctx[i]; 664 * the gfp_mask for that. Sneaky but saves another int in the
398 tp->cpu = i; 665 * structure. If not, we fallback to slow mode
399 tp->preempt_count = HARDIRQ_OFFSET; 666 */
667 tree = &host->revmap_data.tree;
668 if (tree->gfp_mask == 0)
669 return irq_find_mapping(host, hwirq);
670
671 /* XXX Current radix trees are NOT SMP safe !!! Remove that lock
672 * when that is fixed (when Nick's patch gets in
673 */
674 spin_lock_irqsave(&irq_big_lock, flags);
675
676 /* Now try to resolve */
677 ptr = radix_tree_lookup(tree, hwirq);
678 /* Found it, return */
679 if (ptr) {
680 virq = ptr - irq_map;
681 goto bail;
400 } 682 }
683
684 /* If not there, try to insert it */
685 virq = irq_find_mapping(host, hwirq);
686 if (virq != NO_IRQ)
687 radix_tree_insert(tree, virq, &irq_map[virq]);
688 bail:
689 spin_unlock_irqrestore(&irq_big_lock, flags);
690 return virq;
401} 691}
402 692
403static inline void do_softirq_onstack(void) 693unsigned int irq_linear_revmap(struct irq_host *host,
694 irq_hw_number_t hwirq)
404{ 695{
405 struct thread_info *curtp, *irqtp; 696 unsigned int *revmap;
406 697
407 curtp = current_thread_info(); 698 WARN_ON(host->revmap_type != IRQ_HOST_MAP_LINEAR);
408 irqtp = softirq_ctx[smp_processor_id()]; 699
409 irqtp->task = curtp->task; 700 /* Check revmap bounds */
410 call_do_softirq(irqtp); 701 if (unlikely(hwirq >= host->revmap_data.linear.size))
411 irqtp->task = NULL; 702 return irq_find_mapping(host, hwirq);
703
704 /* Check if revmap was allocated */
705 revmap = host->revmap_data.linear.revmap;
706 if (unlikely(revmap == NULL))
707 return irq_find_mapping(host, hwirq);
708
709 /* Fill up revmap with slow path if no mapping found */
710 if (unlikely(revmap[hwirq] == NO_IRQ))
711 revmap[hwirq] = irq_find_mapping(host, hwirq);
712
713 return revmap[hwirq];
412} 714}
413 715
414#else 716unsigned int irq_alloc_virt(struct irq_host *host,
415#define do_softirq_onstack() __do_softirq() 717 unsigned int count,
416#endif /* CONFIG_IRQSTACKS */ 718 unsigned int hint)
719{
720 unsigned long flags;
721 unsigned int i, j, found = NO_IRQ;
722 unsigned int limit = irq_virq_count - count;
417 723
418void do_softirq(void) 724 if (count == 0 || count > (irq_virq_count - NUM_ISA_INTERRUPTS))
725 return NO_IRQ;
726
727 spin_lock_irqsave(&irq_big_lock, flags);
728
729 /* Use hint for 1 interrupt if any */
730 if (count == 1 && hint >= NUM_ISA_INTERRUPTS &&
731 hint < irq_virq_count && irq_map[hint].host == NULL) {
732 found = hint;
733 goto hint_found;
734 }
735
736 /* Look for count consecutive numbers in the allocatable
737 * (non-legacy) space
738 */
739 for (i = NUM_ISA_INTERRUPTS; i <= limit; ) {
740 for (j = i; j < (i + count); j++)
741 if (irq_map[j].host != NULL) {
742 i = j + 1;
743 continue;
744 }
745 found = i;
746 break;
747 }
748 if (found == NO_IRQ) {
749 spin_unlock_irqrestore(&irq_big_lock, flags);
750 return NO_IRQ;
751 }
752 hint_found:
753 for (i = found; i < (found + count); i++) {
754 irq_map[i].hwirq = host->inval_irq;
755 smp_wmb();
756 irq_map[i].host = host;
757 }
758 spin_unlock_irqrestore(&irq_big_lock, flags);
759 return found;
760}
761
762void irq_free_virt(unsigned int virq, unsigned int count)
419{ 763{
420 unsigned long flags; 764 unsigned long flags;
765 unsigned int i;
421 766
422 if (in_interrupt()) 767 WARN_ON (virq < NUM_ISA_INTERRUPTS);
423 return; 768 WARN_ON (count == 0 || (virq + count) > irq_virq_count);
424 769
425 local_irq_save(flags); 770 spin_lock_irqsave(&irq_big_lock, flags);
771 for (i = virq; i < (virq + count); i++) {
772 struct irq_host *host;
426 773
427 if (local_softirq_pending()) { 774 if (i < NUM_ISA_INTERRUPTS ||
428 account_system_vtime(current); 775 (virq + count) > irq_virq_count)
429 local_bh_disable(); 776 continue;
430 do_softirq_onstack(); 777
431 account_system_vtime(current); 778 host = irq_map[i].host;
432 __local_bh_enable(); 779 irq_map[i].hwirq = host->inval_irq;
780 smp_wmb();
781 irq_map[i].host = NULL;
433 } 782 }
783 spin_unlock_irqrestore(&irq_big_lock, flags);
784}
434 785
435 local_irq_restore(flags); 786void irq_early_init(void)
787{
788 unsigned int i;
789
790 for (i = 0; i < NR_IRQS; i++)
791 get_irq_desc(i)->status |= IRQ_NOREQUEST;
436} 792}
437EXPORT_SYMBOL(do_softirq); 793
794/* We need to create the radix trees late */
795static int irq_late_init(void)
796{
797 struct irq_host *h;
798 unsigned long flags;
799
800 spin_lock_irqsave(&irq_big_lock, flags);
801 list_for_each_entry(h, &irq_hosts, link) {
802 if (h->revmap_type == IRQ_HOST_MAP_TREE)
803 INIT_RADIX_TREE(&h->revmap_data.tree, GFP_ATOMIC);
804 }
805 spin_unlock_irqrestore(&irq_big_lock, flags);
806
807 return 0;
808}
809arch_initcall(irq_late_init);
810
811#endif /* CONFIG_PPC_MERGE */
438 812
439#ifdef CONFIG_PCI_MSI 813#ifdef CONFIG_PCI_MSI
440int pci_enable_msi(struct pci_dev * pdev) 814int pci_enable_msi(struct pci_dev * pdev)
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 4cf0b971976b..7e98e778b52f 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -28,6 +28,7 @@ static struct legacy_serial_info {
28 struct device_node *np; 28 struct device_node *np;
29 unsigned int speed; 29 unsigned int speed;
30 unsigned int clock; 30 unsigned int clock;
31 int irq_check_parent;
31 phys_addr_t taddr; 32 phys_addr_t taddr;
32} legacy_serial_infos[MAX_LEGACY_SERIAL_PORTS]; 33} legacy_serial_infos[MAX_LEGACY_SERIAL_PORTS];
33static unsigned int legacy_serial_count; 34static unsigned int legacy_serial_count;
@@ -36,7 +37,7 @@ static int legacy_serial_console = -1;
36static int __init add_legacy_port(struct device_node *np, int want_index, 37static int __init add_legacy_port(struct device_node *np, int want_index,
37 int iotype, phys_addr_t base, 38 int iotype, phys_addr_t base,
38 phys_addr_t taddr, unsigned long irq, 39 phys_addr_t taddr, unsigned long irq,
39 upf_t flags) 40 upf_t flags, int irq_check_parent)
40{ 41{
41 u32 *clk, *spd, clock = BASE_BAUD * 16; 42 u32 *clk, *spd, clock = BASE_BAUD * 16;
42 int index; 43 int index;
@@ -68,7 +69,7 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
68 if (legacy_serial_infos[index].np != 0) { 69 if (legacy_serial_infos[index].np != 0) {
69 /* if we still have some room, move it, else override */ 70 /* if we still have some room, move it, else override */
70 if (legacy_serial_count < MAX_LEGACY_SERIAL_PORTS) { 71 if (legacy_serial_count < MAX_LEGACY_SERIAL_PORTS) {
71 printk(KERN_INFO "Moved legacy port %d -> %d\n", 72 printk(KERN_DEBUG "Moved legacy port %d -> %d\n",
72 index, legacy_serial_count); 73 index, legacy_serial_count);
73 legacy_serial_ports[legacy_serial_count] = 74 legacy_serial_ports[legacy_serial_count] =
74 legacy_serial_ports[index]; 75 legacy_serial_ports[index];
@@ -76,7 +77,7 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
76 legacy_serial_infos[index]; 77 legacy_serial_infos[index];
77 legacy_serial_count++; 78 legacy_serial_count++;
78 } else { 79 } else {
79 printk(KERN_INFO "Replacing legacy port %d\n", index); 80 printk(KERN_DEBUG "Replacing legacy port %d\n", index);
80 } 81 }
81 } 82 }
82 83
@@ -95,10 +96,11 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
95 legacy_serial_infos[index].np = of_node_get(np); 96 legacy_serial_infos[index].np = of_node_get(np);
96 legacy_serial_infos[index].clock = clock; 97 legacy_serial_infos[index].clock = clock;
97 legacy_serial_infos[index].speed = spd ? *spd : 0; 98 legacy_serial_infos[index].speed = spd ? *spd : 0;
99 legacy_serial_infos[index].irq_check_parent = irq_check_parent;
98 100
99 printk(KERN_INFO "Found legacy serial port %d for %s\n", 101 printk(KERN_DEBUG "Found legacy serial port %d for %s\n",
100 index, np->full_name); 102 index, np->full_name);
101 printk(KERN_INFO " %s=%llx, taddr=%llx, irq=%lx, clk=%d, speed=%d\n", 103 printk(KERN_DEBUG " %s=%llx, taddr=%llx, irq=%lx, clk=%d, speed=%d\n",
102 (iotype == UPIO_PORT) ? "port" : "mem", 104 (iotype == UPIO_PORT) ? "port" : "mem",
103 (unsigned long long)base, (unsigned long long)taddr, irq, 105 (unsigned long long)base, (unsigned long long)taddr, irq,
104 legacy_serial_ports[index].uartclk, 106 legacy_serial_ports[index].uartclk,
@@ -126,11 +128,13 @@ static int __init add_legacy_soc_port(struct device_node *np,
126 return -1; 128 return -1;
127 129
128 addr = of_translate_address(soc_dev, addrp); 130 addr = of_translate_address(soc_dev, addrp);
131 if (addr == OF_BAD_ADDR)
132 return -1;
129 133
130 /* Add port, irq will be dealt with later. We passed a translated 134 /* Add port, irq will be dealt with later. We passed a translated
131 * IO port value. It will be fixed up later along with the irq 135 * IO port value. It will be fixed up later along with the irq
132 */ 136 */
133 return add_legacy_port(np, -1, UPIO_MEM, addr, addr, NO_IRQ, flags); 137 return add_legacy_port(np, -1, UPIO_MEM, addr, addr, NO_IRQ, flags, 0);
134} 138}
135 139
136static int __init add_legacy_isa_port(struct device_node *np, 140static int __init add_legacy_isa_port(struct device_node *np,
@@ -141,6 +145,8 @@ static int __init add_legacy_isa_port(struct device_node *np,
141 int index = -1; 145 int index = -1;
142 phys_addr_t taddr; 146 phys_addr_t taddr;
143 147
148 DBG(" -> add_legacy_isa_port(%s)\n", np->full_name);
149
144 /* Get the ISA port number */ 150 /* Get the ISA port number */
145 reg = (u32 *)get_property(np, "reg", NULL); 151 reg = (u32 *)get_property(np, "reg", NULL);
146 if (reg == NULL) 152 if (reg == NULL)
@@ -161,9 +167,12 @@ static int __init add_legacy_isa_port(struct device_node *np,
161 167
162 /* Translate ISA address */ 168 /* Translate ISA address */
163 taddr = of_translate_address(np, reg); 169 taddr = of_translate_address(np, reg);
170 if (taddr == OF_BAD_ADDR)
171 return -1;
164 172
165 /* Add port, irq will be dealt with later */ 173 /* Add port, irq will be dealt with later */
166 return add_legacy_port(np, index, UPIO_PORT, reg[1], taddr, NO_IRQ, UPF_BOOT_AUTOCONF); 174 return add_legacy_port(np, index, UPIO_PORT, reg[1], taddr,
175 NO_IRQ, UPF_BOOT_AUTOCONF, 0);
167 176
168} 177}
169 178
@@ -176,6 +185,8 @@ static int __init add_legacy_pci_port(struct device_node *np,
176 unsigned int flags; 185 unsigned int flags;
177 int iotype, index = -1, lindex = 0; 186 int iotype, index = -1, lindex = 0;
178 187
188 DBG(" -> add_legacy_pci_port(%s)\n", np->full_name);
189
179 /* We only support ports that have a clock frequency properly 190 /* We only support ports that have a clock frequency properly
180 * encoded in the device-tree (that is have an fcode). Anything 191 * encoded in the device-tree (that is have an fcode). Anything
181 * else can't be used that early and will be normally probed by 192 * else can't be used that early and will be normally probed by
@@ -194,6 +205,8 @@ static int __init add_legacy_pci_port(struct device_node *np,
194 /* We only support BAR 0 for now */ 205 /* We only support BAR 0 for now */
195 iotype = (flags & IORESOURCE_MEM) ? UPIO_MEM : UPIO_PORT; 206 iotype = (flags & IORESOURCE_MEM) ? UPIO_MEM : UPIO_PORT;
196 addr = of_translate_address(pci_dev, addrp); 207 addr = of_translate_address(pci_dev, addrp);
208 if (addr == OF_BAD_ADDR)
209 return -1;
197 210
198 /* Set the IO base to the same as the translated address for MMIO, 211 /* Set the IO base to the same as the translated address for MMIO,
199 * or to the domain local IO base for PIO (it will be fixed up later) 212 * or to the domain local IO base for PIO (it will be fixed up later)
@@ -231,7 +244,8 @@ static int __init add_legacy_pci_port(struct device_node *np,
231 /* Add port, irq will be dealt with later. We passed a translated 244 /* Add port, irq will be dealt with later. We passed a translated
232 * IO port value. It will be fixed up later along with the irq 245 * IO port value. It will be fixed up later along with the irq
233 */ 246 */
234 return add_legacy_port(np, index, iotype, base, addr, NO_IRQ, UPF_BOOT_AUTOCONF); 247 return add_legacy_port(np, index, iotype, base, addr, NO_IRQ,
248 UPF_BOOT_AUTOCONF, np != pci_dev);
235} 249}
236#endif 250#endif
237 251
@@ -362,27 +376,22 @@ static void __init fixup_port_irq(int index,
362 struct device_node *np, 376 struct device_node *np,
363 struct plat_serial8250_port *port) 377 struct plat_serial8250_port *port)
364{ 378{
379 unsigned int virq;
380
365 DBG("fixup_port_irq(%d)\n", index); 381 DBG("fixup_port_irq(%d)\n", index);
366 382
367 /* Check for interrupts in that node */ 383 virq = irq_of_parse_and_map(np, 0);
368 if (np->n_intrs > 0) { 384 if (virq == NO_IRQ && legacy_serial_infos[index].irq_check_parent) {
369 port->irq = np->intrs[0].line; 385 np = of_get_parent(np);
370 DBG(" port %d (%s), irq=%d\n", 386 if (np == NULL)
371 index, np->full_name, port->irq); 387 return;
372 return; 388 virq = irq_of_parse_and_map(np, 0);
389 of_node_put(np);
373 } 390 }
374 391 if (virq == NO_IRQ)
375 /* Check for interrupts in the parent */
376 np = of_get_parent(np);
377 if (np == NULL)
378 return; 392 return;
379 393
380 if (np->n_intrs > 0) { 394 port->irq = virq;
381 port->irq = np->intrs[0].line;
382 DBG(" port %d (%s), irq=%d\n",
383 index, np->full_name, port->irq);
384 }
385 of_node_put(np);
386} 395}
387 396
388static void __init fixup_port_pio(int index, 397static void __init fixup_port_pio(int index,
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 0c3c70d115c6..bfb407fc1aa1 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -51,12 +51,14 @@ _GLOBAL(call_do_softirq)
51 mtlr r0 51 mtlr r0
52 blr 52 blr
53 53
54_GLOBAL(call___do_IRQ) 54_GLOBAL(call_handle_irq)
55 ld r8,0(r7)
55 mflr r0 56 mflr r0
56 std r0,16(r1) 57 std r0,16(r1)
57 stdu r1,THREAD_SIZE-112(r5) 58 mtctr r8
58 mr r1,r5 59 stdu r1,THREAD_SIZE-112(r6)
59 bl .__do_IRQ 60 mr r1,r6
61 bctrl
60 ld r1,0(r1) 62 ld r1,0(r1)
61 ld r0,16(r1) 63 ld r0,16(r1)
62 mtlr r0 64 mtlr r0
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 1333335c474e..898dae8ab6d9 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -1404,6 +1404,43 @@ pcibios_update_irq(struct pci_dev *dev, int irq)
1404 /* XXX FIXME - update OF device tree node interrupt property */ 1404 /* XXX FIXME - update OF device tree node interrupt property */
1405} 1405}
1406 1406
1407#ifdef CONFIG_PPC_MERGE
1408/* XXX This is a copy of the ppc64 version. This is temporary until we start
1409 * merging the 2 PCI layers
1410 */
1411/*
1412 * Reads the interrupt pin to determine if interrupt is use by card.
1413 * If the interrupt is used, then gets the interrupt line from the
1414 * openfirmware and sets it in the pci_dev and pci_config line.
1415 */
1416int pci_read_irq_line(struct pci_dev *pci_dev)
1417{
1418 struct of_irq oirq;
1419 unsigned int virq;
1420
1421 DBG("Try to map irq for %s...\n", pci_name(pci_dev));
1422
1423 if (of_irq_map_pci(pci_dev, &oirq)) {
1424 DBG(" -> failed !\n");
1425 return -1;
1426 }
1427
1428 DBG(" -> got one, spec %d cells (0x%08x...) on %s\n",
1429 oirq.size, oirq.specifier[0], oirq.controller->full_name);
1430
1431 virq = irq_create_of_mapping(oirq.controller, oirq.specifier, oirq.size);
1432 if(virq == NO_IRQ) {
1433 DBG(" -> failed to map !\n");
1434 return -1;
1435 }
1436 pci_dev->irq = virq;
1437 pci_write_config_byte(pci_dev, PCI_INTERRUPT_LINE, virq);
1438
1439 return 0;
1440}
1441EXPORT_SYMBOL(pci_read_irq_line);
1442#endif /* CONFIG_PPC_MERGE */
1443
1407int pcibios_enable_device(struct pci_dev *dev, int mask) 1444int pcibios_enable_device(struct pci_dev *dev, int mask)
1408{ 1445{
1409 u16 cmd, old_cmd; 1446 u16 cmd, old_cmd;
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index bea8451fb57b..efc0b5559ee0 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -398,12 +398,8 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
398 } else { 398 } else {
399 dev->hdr_type = PCI_HEADER_TYPE_NORMAL; 399 dev->hdr_type = PCI_HEADER_TYPE_NORMAL;
400 dev->rom_base_reg = PCI_ROM_ADDRESS; 400 dev->rom_base_reg = PCI_ROM_ADDRESS;
401 /* Maybe do a default OF mapping here */
401 dev->irq = NO_IRQ; 402 dev->irq = NO_IRQ;
402 if (node->n_intrs > 0) {
403 dev->irq = node->intrs[0].line;
404 pci_write_config_byte(dev, PCI_INTERRUPT_LINE,
405 dev->irq);
406 }
407 } 403 }
408 404
409 pci_parse_of_addrs(node, dev); 405 pci_parse_of_addrs(node, dev);
@@ -1288,23 +1284,26 @@ EXPORT_SYMBOL(pcibios_fixup_bus);
1288 */ 1284 */
1289int pci_read_irq_line(struct pci_dev *pci_dev) 1285int pci_read_irq_line(struct pci_dev *pci_dev)
1290{ 1286{
1291 u8 intpin; 1287 struct of_irq oirq;
1292 struct device_node *node; 1288 unsigned int virq;
1293
1294 pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &intpin);
1295 if (intpin == 0)
1296 return 0;
1297 1289
1298 node = pci_device_to_OF_node(pci_dev); 1290 DBG("Try to map irq for %s...\n", pci_name(pci_dev));
1299 if (node == NULL)
1300 return -1;
1301 1291
1302 if (node->n_intrs == 0) 1292 if (of_irq_map_pci(pci_dev, &oirq)) {
1293 DBG(" -> failed !\n");
1303 return -1; 1294 return -1;
1295 }
1304 1296
1305 pci_dev->irq = node->intrs[0].line; 1297 DBG(" -> got one, spec %d cells (0x%08x...) on %s\n",
1298 oirq.size, oirq.specifier[0], oirq.controller->full_name);
1306 1299
1307 pci_write_config_byte(pci_dev, PCI_INTERRUPT_LINE, pci_dev->irq); 1300 virq = irq_create_of_mapping(oirq.controller, oirq.specifier, oirq.size);
1301 if(virq == NO_IRQ) {
1302 DBG(" -> failed to map !\n");
1303 return -1;
1304 }
1305 pci_dev->irq = virq;
1306 pci_write_config_byte(pci_dev, PCI_INTERRUPT_LINE, virq);
1308 1307
1309 return 0; 1308 return 0;
1310} 1309}
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 4c524cb52184..a1787ffb6319 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -30,6 +30,7 @@
30#include <linux/module.h> 30#include <linux/module.h>
31#include <linux/kexec.h> 31#include <linux/kexec.h>
32#include <linux/debugfs.h> 32#include <linux/debugfs.h>
33#include <linux/irq.h>
33 34
34#include <asm/prom.h> 35#include <asm/prom.h>
35#include <asm/rtas.h> 36#include <asm/rtas.h>
@@ -86,424 +87,6 @@ static DEFINE_RWLOCK(devtree_lock);
86/* export that to outside world */ 87/* export that to outside world */
87struct device_node *of_chosen; 88struct device_node *of_chosen;
88 89
89struct device_node *dflt_interrupt_controller;
90int num_interrupt_controllers;
91
92/*
93 * Wrapper for allocating memory for various data that needs to be
94 * attached to device nodes as they are processed at boot or when
95 * added to the device tree later (e.g. DLPAR). At boot there is
96 * already a region reserved so we just increment *mem_start by size;
97 * otherwise we call kmalloc.
98 */
99static void * prom_alloc(unsigned long size, unsigned long *mem_start)
100{
101 unsigned long tmp;
102
103 if (!mem_start)
104 return kmalloc(size, GFP_KERNEL);
105
106 tmp = *mem_start;
107 *mem_start += size;
108 return (void *)tmp;
109}
110
111/*
112 * Find the device_node with a given phandle.
113 */
114static struct device_node * find_phandle(phandle ph)
115{
116 struct device_node *np;
117
118 for (np = allnodes; np != 0; np = np->allnext)
119 if (np->linux_phandle == ph)
120 return np;
121 return NULL;
122}
123
124/*
125 * Find the interrupt parent of a node.
126 */
127static struct device_node * __devinit intr_parent(struct device_node *p)
128{
129 phandle *parp;
130
131 parp = (phandle *) get_property(p, "interrupt-parent", NULL);
132 if (parp == NULL)
133 return p->parent;
134 p = find_phandle(*parp);
135 if (p != NULL)
136 return p;
137 /*
138 * On a powermac booted with BootX, we don't get to know the
139 * phandles for any nodes, so find_phandle will return NULL.
140 * Fortunately these machines only have one interrupt controller
141 * so there isn't in fact any ambiguity. -- paulus
142 */
143 if (num_interrupt_controllers == 1)
144 p = dflt_interrupt_controller;
145 return p;
146}
147
148/*
149 * Find out the size of each entry of the interrupts property
150 * for a node.
151 */
152int __devinit prom_n_intr_cells(struct device_node *np)
153{
154 struct device_node *p;
155 unsigned int *icp;
156
157 for (p = np; (p = intr_parent(p)) != NULL; ) {
158 icp = (unsigned int *)
159 get_property(p, "#interrupt-cells", NULL);
160 if (icp != NULL)
161 return *icp;
162 if (get_property(p, "interrupt-controller", NULL) != NULL
163 || get_property(p, "interrupt-map", NULL) != NULL) {
164 printk("oops, node %s doesn't have #interrupt-cells\n",
165 p->full_name);
166 return 1;
167 }
168 }
169#ifdef DEBUG_IRQ
170 printk("prom_n_intr_cells failed for %s\n", np->full_name);
171#endif
172 return 1;
173}
174
175/*
176 * Map an interrupt from a device up to the platform interrupt
177 * descriptor.
178 */
179static int __devinit map_interrupt(unsigned int **irq, struct device_node **ictrler,
180 struct device_node *np, unsigned int *ints,
181 int nintrc)
182{
183 struct device_node *p, *ipar;
184 unsigned int *imap, *imask, *ip;
185 int i, imaplen, match;
186 int newintrc = 0, newaddrc = 0;
187 unsigned int *reg;
188 int naddrc;
189
190 reg = (unsigned int *) get_property(np, "reg", NULL);
191 naddrc = prom_n_addr_cells(np);
192 p = intr_parent(np);
193 while (p != NULL) {
194 if (get_property(p, "interrupt-controller", NULL) != NULL)
195 /* this node is an interrupt controller, stop here */
196 break;
197 imap = (unsigned int *)
198 get_property(p, "interrupt-map", &imaplen);
199 if (imap == NULL) {
200 p = intr_parent(p);
201 continue;
202 }
203 imask = (unsigned int *)
204 get_property(p, "interrupt-map-mask", NULL);
205 if (imask == NULL) {
206 printk("oops, %s has interrupt-map but no mask\n",
207 p->full_name);
208 return 0;
209 }
210 imaplen /= sizeof(unsigned int);
211 match = 0;
212 ipar = NULL;
213 while (imaplen > 0 && !match) {
214 /* check the child-interrupt field */
215 match = 1;
216 for (i = 0; i < naddrc && match; ++i)
217 match = ((reg[i] ^ imap[i]) & imask[i]) == 0;
218 for (; i < naddrc + nintrc && match; ++i)
219 match = ((ints[i-naddrc] ^ imap[i]) & imask[i]) == 0;
220 imap += naddrc + nintrc;
221 imaplen -= naddrc + nintrc;
222 /* grab the interrupt parent */
223 ipar = find_phandle((phandle) *imap++);
224 --imaplen;
225 if (ipar == NULL && num_interrupt_controllers == 1)
226 /* cope with BootX not giving us phandles */
227 ipar = dflt_interrupt_controller;
228 if (ipar == NULL) {
229 printk("oops, no int parent %x in map of %s\n",
230 imap[-1], p->full_name);
231 return 0;
232 }
233 /* find the parent's # addr and intr cells */
234 ip = (unsigned int *)
235 get_property(ipar, "#interrupt-cells", NULL);
236 if (ip == NULL) {
237 printk("oops, no #interrupt-cells on %s\n",
238 ipar->full_name);
239 return 0;
240 }
241 newintrc = *ip;
242 ip = (unsigned int *)
243 get_property(ipar, "#address-cells", NULL);
244 newaddrc = (ip == NULL)? 0: *ip;
245 imap += newaddrc + newintrc;
246 imaplen -= newaddrc + newintrc;
247 }
248 if (imaplen < 0) {
249 printk("oops, error decoding int-map on %s, len=%d\n",
250 p->full_name, imaplen);
251 return 0;
252 }
253 if (!match) {
254#ifdef DEBUG_IRQ
255 printk("oops, no match in %s int-map for %s\n",
256 p->full_name, np->full_name);
257#endif
258 return 0;
259 }
260 p = ipar;
261 naddrc = newaddrc;
262 nintrc = newintrc;
263 ints = imap - nintrc;
264 reg = ints - naddrc;
265 }
266 if (p == NULL) {
267#ifdef DEBUG_IRQ
268 printk("hmmm, int tree for %s doesn't have ctrler\n",
269 np->full_name);
270#endif
271 return 0;
272 }
273 *irq = ints;
274 *ictrler = p;
275 return nintrc;
276}
277
278static unsigned char map_isa_senses[4] = {
279 IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE,
280 IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE,
281 IRQ_SENSE_EDGE | IRQ_POLARITY_NEGATIVE,
282 IRQ_SENSE_EDGE | IRQ_POLARITY_POSITIVE
283};
284
285static unsigned char map_mpic_senses[4] = {
286 IRQ_SENSE_EDGE | IRQ_POLARITY_POSITIVE,
287 IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE,
288 /* 2 seems to be used for the 8259 cascade... */
289 IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE,
290 IRQ_SENSE_EDGE | IRQ_POLARITY_NEGATIVE,
291};
292
293static int __devinit finish_node_interrupts(struct device_node *np,
294 unsigned long *mem_start,
295 int measure_only)
296{
297 unsigned int *ints;
298 int intlen, intrcells, intrcount;
299 int i, j, n, sense;
300 unsigned int *irq, virq;
301 struct device_node *ic;
302 int trace = 0;
303
304 //#define TRACE(fmt...) do { if (trace) { printk(fmt); mdelay(1000); } } while(0)
305#define TRACE(fmt...)
306
307 if (!strcmp(np->name, "smu-doorbell"))
308 trace = 1;
309
310 TRACE("Finishing SMU doorbell ! num_interrupt_controllers = %d\n",
311 num_interrupt_controllers);
312
313 if (num_interrupt_controllers == 0) {
314 /*
315 * Old machines just have a list of interrupt numbers
316 * and no interrupt-controller nodes.
317 */
318 ints = (unsigned int *) get_property(np, "AAPL,interrupts",
319 &intlen);
320 /* XXX old interpret_pci_props looked in parent too */
321 /* XXX old interpret_macio_props looked for interrupts
322 before AAPL,interrupts */
323 if (ints == NULL)
324 ints = (unsigned int *) get_property(np, "interrupts",
325 &intlen);
326 if (ints == NULL)
327 return 0;
328
329 np->n_intrs = intlen / sizeof(unsigned int);
330 np->intrs = prom_alloc(np->n_intrs * sizeof(np->intrs[0]),
331 mem_start);
332 if (!np->intrs)
333 return -ENOMEM;
334 if (measure_only)
335 return 0;
336
337 for (i = 0; i < np->n_intrs; ++i) {
338 np->intrs[i].line = *ints++;
339 np->intrs[i].sense = IRQ_SENSE_LEVEL
340 | IRQ_POLARITY_NEGATIVE;
341 }
342 return 0;
343 }
344
345 ints = (unsigned int *) get_property(np, "interrupts", &intlen);
346 TRACE("ints=%p, intlen=%d\n", ints, intlen);
347 if (ints == NULL)
348 return 0;
349 intrcells = prom_n_intr_cells(np);
350 intlen /= intrcells * sizeof(unsigned int);
351 TRACE("intrcells=%d, new intlen=%d\n", intrcells, intlen);
352 np->intrs = prom_alloc(intlen * sizeof(*(np->intrs)), mem_start);
353 if (!np->intrs)
354 return -ENOMEM;
355
356 if (measure_only)
357 return 0;
358
359 intrcount = 0;
360 for (i = 0; i < intlen; ++i, ints += intrcells) {
361 n = map_interrupt(&irq, &ic, np, ints, intrcells);
362 TRACE("map, irq=%d, ic=%p, n=%d\n", irq, ic, n);
363 if (n <= 0)
364 continue;
365
366 /* don't map IRQ numbers under a cascaded 8259 controller */
367 if (ic && device_is_compatible(ic, "chrp,iic")) {
368 np->intrs[intrcount].line = irq[0];
369 sense = (n > 1)? (irq[1] & 3): 3;
370 np->intrs[intrcount].sense = map_isa_senses[sense];
371 } else {
372 virq = virt_irq_create_mapping(irq[0]);
373 TRACE("virq=%d\n", virq);
374#ifdef CONFIG_PPC64
375 if (virq == NO_IRQ) {
376 printk(KERN_CRIT "Could not allocate interrupt"
377 " number for %s\n", np->full_name);
378 continue;
379 }
380#endif
381 np->intrs[intrcount].line = irq_offset_up(virq);
382 sense = (n > 1)? (irq[1] & 3): 1;
383
384 /* Apple uses bits in there in a different way, let's
385 * only keep the real sense bit on macs
386 */
387 if (machine_is(powermac))
388 sense &= 0x1;
389 np->intrs[intrcount].sense = map_mpic_senses[sense];
390 }
391
392#ifdef CONFIG_PPC64
393 /* We offset irq numbers for the u3 MPIC by 128 in PowerMac */
394 if (machine_is(powermac) && ic && ic->parent) {
395 char *name = get_property(ic->parent, "name", NULL);
396 if (name && !strcmp(name, "u3"))
397 np->intrs[intrcount].line += 128;
398 else if (!(name && (!strcmp(name, "mac-io") ||
399 !strcmp(name, "u4"))))
400 /* ignore other cascaded controllers, such as
401 the k2-sata-root */
402 break;
403 }
404#endif /* CONFIG_PPC64 */
405 if (n > 2) {
406 printk("hmmm, got %d intr cells for %s:", n,
407 np->full_name);
408 for (j = 0; j < n; ++j)
409 printk(" %d", irq[j]);
410 printk("\n");
411 }
412 ++intrcount;
413 }
414 np->n_intrs = intrcount;
415
416 return 0;
417}
418
419static int __devinit finish_node(struct device_node *np,
420 unsigned long *mem_start,
421 int measure_only)
422{
423 struct device_node *child;
424 int rc = 0;
425
426 rc = finish_node_interrupts(np, mem_start, measure_only);
427 if (rc)
428 goto out;
429
430 for (child = np->child; child != NULL; child = child->sibling) {
431 rc = finish_node(child, mem_start, measure_only);
432 if (rc)
433 goto out;
434 }
435out:
436 return rc;
437}
438
439static void __init scan_interrupt_controllers(void)
440{
441 struct device_node *np;
442 int n = 0;
443 char *name, *ic;
444 int iclen;
445
446 for (np = allnodes; np != NULL; np = np->allnext) {
447 ic = get_property(np, "interrupt-controller", &iclen);
448 name = get_property(np, "name", NULL);
449 /* checking iclen makes sure we don't get a false
450 match on /chosen.interrupt_controller */
451 if ((name != NULL
452 && strcmp(name, "interrupt-controller") == 0)
453 || (ic != NULL && iclen == 0
454 && strcmp(name, "AppleKiwi"))) {
455 if (n == 0)
456 dflt_interrupt_controller = np;
457 ++n;
458 }
459 }
460 num_interrupt_controllers = n;
461}
462
463/**
464 * finish_device_tree is called once things are running normally
465 * (i.e. with text and data mapped to the address they were linked at).
466 * It traverses the device tree and fills in some of the additional,
467 * fields in each node like {n_}addrs and {n_}intrs, the virt interrupt
468 * mapping is also initialized at this point.
469 */
470void __init finish_device_tree(void)
471{
472 unsigned long start, end, size = 0;
473
474 DBG(" -> finish_device_tree\n");
475
476#ifdef CONFIG_PPC64
477 /* Initialize virtual IRQ map */
478 virt_irq_init();
479#endif
480 scan_interrupt_controllers();
481
482 /*
483 * Finish device-tree (pre-parsing some properties etc...)
484 * We do this in 2 passes. One with "measure_only" set, which
485 * will only measure the amount of memory needed, then we can
486 * allocate that memory, and call finish_node again. However,
487 * we must be careful as most routines will fail nowadays when
488 * prom_alloc() returns 0, so we must make sure our first pass
489 * doesn't start at 0. We pre-initialize size to 16 for that
490 * reason and then remove those additional 16 bytes
491 */
492 size = 16;
493 finish_node(allnodes, &size, 1);
494 size -= 16;
495
496 if (0 == size)
497 end = start = 0;
498 else
499 end = start = (unsigned long)__va(lmb_alloc(size, 128));
500
501 finish_node(allnodes, &end, 0);
502 BUG_ON(end != start + size);
503
504 DBG(" <- finish_device_tree\n");
505}
506
507static inline char *find_flat_dt_string(u32 offset) 90static inline char *find_flat_dt_string(u32 offset)
508{ 91{
509 return ((char *)initial_boot_params) + 92 return ((char *)initial_boot_params) +
@@ -1389,27 +972,6 @@ prom_n_size_cells(struct device_node* np)
1389EXPORT_SYMBOL(prom_n_size_cells); 972EXPORT_SYMBOL(prom_n_size_cells);
1390 973
1391/** 974/**
1392 * Work out the sense (active-low level / active-high edge)
1393 * of each interrupt from the device tree.
1394 */
1395void __init prom_get_irq_senses(unsigned char *senses, int off, int max)
1396{
1397 struct device_node *np;
1398 int i, j;
1399
1400 /* default to level-triggered */
1401 memset(senses, IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE, max - off);
1402
1403 for (np = allnodes; np != 0; np = np->allnext) {
1404 for (j = 0; j < np->n_intrs; j++) {
1405 i = np->intrs[j].line;
1406 if (i >= off && i < max)
1407 senses[i-off] = np->intrs[j].sense;
1408 }
1409 }
1410}
1411
1412/**
1413 * Construct and return a list of the device_nodes with a given name. 975 * Construct and return a list of the device_nodes with a given name.
1414 */ 976 */
1415struct device_node *find_devices(const char *name) 977struct device_node *find_devices(const char *name)
@@ -1808,7 +1370,6 @@ static void of_node_release(struct kref *kref)
1808 node->deadprops = NULL; 1370 node->deadprops = NULL;
1809 } 1371 }
1810 } 1372 }
1811 kfree(node->intrs);
1812 kfree(node->full_name); 1373 kfree(node->full_name);
1813 kfree(node->data); 1374 kfree(node->data);
1814 kfree(node); 1375 kfree(node);
@@ -1881,13 +1442,7 @@ void of_detach_node(const struct device_node *np)
1881#ifdef CONFIG_PPC_PSERIES 1442#ifdef CONFIG_PPC_PSERIES
1882/* 1443/*
1883 * Fix up the uninitialized fields in a new device node: 1444 * Fix up the uninitialized fields in a new device node:
1884 * name, type, n_addrs, addrs, n_intrs, intrs, and pci-specific fields 1445 * name, type and pci-specific fields
1885 *
1886 * A lot of boot-time code is duplicated here, because functions such
1887 * as finish_node_interrupts, interpret_pci_props, etc. cannot use the
1888 * slab allocator.
1889 *
1890 * This should probably be split up into smaller chunks.
1891 */ 1446 */
1892 1447
1893static int of_finish_dynamic_node(struct device_node *node) 1448static int of_finish_dynamic_node(struct device_node *node)
@@ -1928,8 +1483,6 @@ static int prom_reconfig_notifier(struct notifier_block *nb,
1928 switch (action) { 1483 switch (action) {
1929 case PSERIES_RECONFIG_ADD: 1484 case PSERIES_RECONFIG_ADD:
1930 err = of_finish_dynamic_node(node); 1485 err = of_finish_dynamic_node(node);
1931 if (!err)
1932 finish_node(node, NULL, 0);
1933 if (err < 0) { 1486 if (err < 0) {
1934 printk(KERN_ERR "finish_node returned %d\n", err); 1487 printk(KERN_ERR "finish_node returned %d\n", err);
1935 err = NOTIFY_BAD; 1488 err = NOTIFY_BAD;
@@ -1975,8 +1528,7 @@ struct property *of_find_property(struct device_node *np, const char *name,
1975 * Find a property with a given name for a given node 1528 * Find a property with a given name for a given node
1976 * and return the value. 1529 * and return the value.
1977 */ 1530 */
1978unsigned char *get_property(struct device_node *np, const char *name, 1531void *get_property(struct device_node *np, const char *name, int *lenp)
1979 int *lenp)
1980{ 1532{
1981 struct property *pp = of_find_property(np,name,lenp); 1533 struct property *pp = of_find_property(np,name,lenp);
1982 return pp ? pp->value : NULL; 1534 return pp ? pp->value : NULL;
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 1e95a9f8cda1..ebd501a59abd 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1990,12 +1990,22 @@ static void __init flatten_device_tree(void)
1990static void __init fixup_device_tree_maple(void) 1990static void __init fixup_device_tree_maple(void)
1991{ 1991{
1992 phandle isa; 1992 phandle isa;
1993 u32 rloc = 0x01002000; /* IO space; PCI device = 4 */
1993 u32 isa_ranges[6]; 1994 u32 isa_ranges[6];
1994 1995 char *name;
1995 isa = call_prom("finddevice", 1, 1, ADDR("/ht@0/isa@4")); 1996
1997 name = "/ht@0/isa@4";
1998 isa = call_prom("finddevice", 1, 1, ADDR(name));
1999 if (!PHANDLE_VALID(isa)) {
2000 name = "/ht@0/isa@6";
2001 isa = call_prom("finddevice", 1, 1, ADDR(name));
2002 rloc = 0x01003000; /* IO space; PCI device = 6 */
2003 }
1996 if (!PHANDLE_VALID(isa)) 2004 if (!PHANDLE_VALID(isa))
1997 return; 2005 return;
1998 2006
2007 if (prom_getproplen(isa, "ranges") != 12)
2008 return;
1999 if (prom_getprop(isa, "ranges", isa_ranges, sizeof(isa_ranges)) 2009 if (prom_getprop(isa, "ranges", isa_ranges, sizeof(isa_ranges))
2000 == PROM_ERROR) 2010 == PROM_ERROR)
2001 return; 2011 return;
@@ -2005,15 +2015,15 @@ static void __init fixup_device_tree_maple(void)
2005 isa_ranges[2] != 0x00010000) 2015 isa_ranges[2] != 0x00010000)
2006 return; 2016 return;
2007 2017
2008 prom_printf("fixing up bogus ISA range on Maple...\n"); 2018 prom_printf("Fixing up bogus ISA range on Maple/Apache...\n");
2009 2019
2010 isa_ranges[0] = 0x1; 2020 isa_ranges[0] = 0x1;
2011 isa_ranges[1] = 0x0; 2021 isa_ranges[1] = 0x0;
2012 isa_ranges[2] = 0x01002000; /* IO space; PCI device = 4 */ 2022 isa_ranges[2] = rloc;
2013 isa_ranges[3] = 0x0; 2023 isa_ranges[3] = 0x0;
2014 isa_ranges[4] = 0x0; 2024 isa_ranges[4] = 0x0;
2015 isa_ranges[5] = 0x00010000; 2025 isa_ranges[5] = 0x00010000;
2016 prom_setprop(isa, "/ht@0/isa@4", "ranges", 2026 prom_setprop(isa, name, "ranges",
2017 isa_ranges, sizeof(isa_ranges)); 2027 isa_ranges, sizeof(isa_ranges));
2018} 2028}
2019#else 2029#else
diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c
index 45df420383cc..21009b1f7869 100644
--- a/arch/powerpc/kernel/prom_parse.c
+++ b/arch/powerpc/kernel/prom_parse.c
@@ -38,14 +38,6 @@ static void of_dump_addr(const char *s, u32 *addr, int na)
38static void of_dump_addr(const char *s, u32 *addr, int na) { } 38static void of_dump_addr(const char *s, u32 *addr, int na) { }
39#endif 39#endif
40 40
41/* Read a big address */
42static inline u64 of_read_addr(u32 *cell, int size)
43{
44 u64 r = 0;
45 while (size--)
46 r = (r << 32) | *(cell++);
47 return r;
48}
49 41
50/* Callbacks for bus specific translators */ 42/* Callbacks for bus specific translators */
51struct of_bus { 43struct of_bus {
@@ -77,9 +69,9 @@ static u64 of_bus_default_map(u32 *addr, u32 *range, int na, int ns, int pna)
77{ 69{
78 u64 cp, s, da; 70 u64 cp, s, da;
79 71
80 cp = of_read_addr(range, na); 72 cp = of_read_number(range, na);
81 s = of_read_addr(range + na + pna, ns); 73 s = of_read_number(range + na + pna, ns);
82 da = of_read_addr(addr, na); 74 da = of_read_number(addr, na);
83 75
84 DBG("OF: default map, cp="PRu64", s="PRu64", da="PRu64"\n", 76 DBG("OF: default map, cp="PRu64", s="PRu64", da="PRu64"\n",
85 cp, s, da); 77 cp, s, da);
@@ -91,7 +83,7 @@ static u64 of_bus_default_map(u32 *addr, u32 *range, int na, int ns, int pna)
91 83
92static int of_bus_default_translate(u32 *addr, u64 offset, int na) 84static int of_bus_default_translate(u32 *addr, u64 offset, int na)
93{ 85{
94 u64 a = of_read_addr(addr, na); 86 u64 a = of_read_number(addr, na);
95 memset(addr, 0, na * 4); 87 memset(addr, 0, na * 4);
96 a += offset; 88 a += offset;
97 if (na > 1) 89 if (na > 1)
@@ -135,9 +127,9 @@ static u64 of_bus_pci_map(u32 *addr, u32 *range, int na, int ns, int pna)
135 return OF_BAD_ADDR; 127 return OF_BAD_ADDR;
136 128
137 /* Read address values, skipping high cell */ 129 /* Read address values, skipping high cell */
138 cp = of_read_addr(range + 1, na - 1); 130 cp = of_read_number(range + 1, na - 1);
139 s = of_read_addr(range + na + pna, ns); 131 s = of_read_number(range + na + pna, ns);
140 da = of_read_addr(addr + 1, na - 1); 132 da = of_read_number(addr + 1, na - 1);
141 133
142 DBG("OF: PCI map, cp="PRu64", s="PRu64", da="PRu64"\n", cp, s, da); 134 DBG("OF: PCI map, cp="PRu64", s="PRu64", da="PRu64"\n", cp, s, da);
143 135
@@ -195,9 +187,9 @@ static u64 of_bus_isa_map(u32 *addr, u32 *range, int na, int ns, int pna)
195 return OF_BAD_ADDR; 187 return OF_BAD_ADDR;
196 188
197 /* Read address values, skipping high cell */ 189 /* Read address values, skipping high cell */
198 cp = of_read_addr(range + 1, na - 1); 190 cp = of_read_number(range + 1, na - 1);
199 s = of_read_addr(range + na + pna, ns); 191 s = of_read_number(range + na + pna, ns);
200 da = of_read_addr(addr + 1, na - 1); 192 da = of_read_number(addr + 1, na - 1);
201 193
202 DBG("OF: ISA map, cp="PRu64", s="PRu64", da="PRu64"\n", cp, s, da); 194 DBG("OF: ISA map, cp="PRu64", s="PRu64", da="PRu64"\n", cp, s, da);
203 195
@@ -295,7 +287,7 @@ static int of_translate_one(struct device_node *parent, struct of_bus *bus,
295 */ 287 */
296 ranges = (u32 *)get_property(parent, "ranges", &rlen); 288 ranges = (u32 *)get_property(parent, "ranges", &rlen);
297 if (ranges == NULL || rlen == 0) { 289 if (ranges == NULL || rlen == 0) {
298 offset = of_read_addr(addr, na); 290 offset = of_read_number(addr, na);
299 memset(addr, 0, pna * 4); 291 memset(addr, 0, pna * 4);
300 DBG("OF: no ranges, 1:1 translation\n"); 292 DBG("OF: no ranges, 1:1 translation\n");
301 goto finish; 293 goto finish;
@@ -378,7 +370,7 @@ u64 of_translate_address(struct device_node *dev, u32 *in_addr)
378 /* If root, we have finished */ 370 /* If root, we have finished */
379 if (parent == NULL) { 371 if (parent == NULL) {
380 DBG("OF: reached root node\n"); 372 DBG("OF: reached root node\n");
381 result = of_read_addr(addr, na); 373 result = of_read_number(addr, na);
382 break; 374 break;
383 } 375 }
384 376
@@ -442,7 +434,7 @@ u32 *of_get_address(struct device_node *dev, int index, u64 *size,
442 for (i = 0; psize >= onesize; psize -= onesize, prop += onesize, i++) 434 for (i = 0; psize >= onesize; psize -= onesize, prop += onesize, i++)
443 if (i == index) { 435 if (i == index) {
444 if (size) 436 if (size)
445 *size = of_read_addr(prop + na, ns); 437 *size = of_read_number(prop + na, ns);
446 if (flags) 438 if (flags)
447 *flags = bus->get_flags(prop); 439 *flags = bus->get_flags(prop);
448 return prop; 440 return prop;
@@ -484,7 +476,7 @@ u32 *of_get_pci_address(struct device_node *dev, int bar_no, u64 *size,
484 for (i = 0; psize >= onesize; psize -= onesize, prop += onesize, i++) 476 for (i = 0; psize >= onesize; psize -= onesize, prop += onesize, i++)
485 if ((prop[0] & 0xff) == ((bar_no * 4) + PCI_BASE_ADDRESS_0)) { 477 if ((prop[0] & 0xff) == ((bar_no * 4) + PCI_BASE_ADDRESS_0)) {
486 if (size) 478 if (size)
487 *size = of_read_addr(prop + na, ns); 479 *size = of_read_number(prop + na, ns);
488 if (flags) 480 if (flags)
489 *flags = bus->get_flags(prop); 481 *flags = bus->get_flags(prop);
490 return prop; 482 return prop;
@@ -565,11 +557,414 @@ void of_parse_dma_window(struct device_node *dn, unsigned char *dma_window_prop,
565 prop = get_property(dn, "#address-cells", NULL); 557 prop = get_property(dn, "#address-cells", NULL);
566 558
567 cells = prop ? *(u32 *)prop : prom_n_addr_cells(dn); 559 cells = prop ? *(u32 *)prop : prom_n_addr_cells(dn);
568 *phys = of_read_addr(dma_window, cells); 560 *phys = of_read_number(dma_window, cells);
569 561
570 dma_window += cells; 562 dma_window += cells;
571 563
572 prop = get_property(dn, "ibm,#dma-size-cells", NULL); 564 prop = get_property(dn, "ibm,#dma-size-cells", NULL);
573 cells = prop ? *(u32 *)prop : prom_n_size_cells(dn); 565 cells = prop ? *(u32 *)prop : prom_n_size_cells(dn);
574 *size = of_read_addr(dma_window, cells); 566 *size = of_read_number(dma_window, cells);
567}
568
569/*
570 * Interrupt remapper
571 */
572
573static unsigned int of_irq_workarounds;
574static struct device_node *of_irq_dflt_pic;
575
576static struct device_node *of_irq_find_parent(struct device_node *child)
577{
578 struct device_node *p;
579 phandle *parp;
580
581 if (!of_node_get(child))
582 return NULL;
583
584 do {
585 parp = (phandle *)get_property(child, "interrupt-parent", NULL);
586 if (parp == NULL)
587 p = of_get_parent(child);
588 else {
589 if (of_irq_workarounds & OF_IMAP_NO_PHANDLE)
590 p = of_node_get(of_irq_dflt_pic);
591 else
592 p = of_find_node_by_phandle(*parp);
593 }
594 of_node_put(child);
595 child = p;
596 } while (p && get_property(p, "#interrupt-cells", NULL) == NULL);
597
598 return p;
599}
600
601static u8 of_irq_pci_swizzle(u8 slot, u8 pin)
602{
603 return (((pin - 1) + slot) % 4) + 1;
575} 604}
605
606/* This doesn't need to be called if you don't have any special workaround
607 * flags to pass
608 */
609void of_irq_map_init(unsigned int flags)
610{
611 of_irq_workarounds = flags;
612
613 /* OldWorld, don't bother looking at other things */
614 if (flags & OF_IMAP_OLDWORLD_MAC)
615 return;
616
617 /* If we don't have phandles, let's try to locate a default interrupt
618 * controller (happens when booting with BootX). We do a first match
619 * here, hopefully, that only ever happens on machines with one
620 * controller.
621 */
622 if (flags & OF_IMAP_NO_PHANDLE) {
623 struct device_node *np;
624
625 for(np = NULL; (np = of_find_all_nodes(np)) != NULL;) {
626 if (get_property(np, "interrupt-controller", NULL)
627 == NULL)
628 continue;
629 /* Skip /chosen/interrupt-controller */
630 if (strcmp(np->name, "chosen") == 0)
631 continue;
632 /* It seems like at least one person on this planet wants
633 * to use BootX on a machine with an AppleKiwi controller
634 * which happens to pretend to be an interrupt
635 * controller too.
636 */
637 if (strcmp(np->name, "AppleKiwi") == 0)
638 continue;
639 /* I think we found one ! */
640 of_irq_dflt_pic = np;
641 break;
642 }
643 }
644
645}
646
647int of_irq_map_raw(struct device_node *parent, u32 *intspec, u32 *addr,
648 struct of_irq *out_irq)
649{
650 struct device_node *ipar, *tnode, *old = NULL, *newpar = NULL;
651 u32 *tmp, *imap, *imask;
652 u32 intsize = 1, addrsize, newintsize = 0, newaddrsize = 0;
653 int imaplen, match, i;
654
655 ipar = of_node_get(parent);
656
657 /* First get the #interrupt-cells property of the current cursor
658 * that tells us how to interpret the passed-in intspec. If there
659 * is none, we are nice and just walk up the tree
660 */
661 do {
662 tmp = (u32 *)get_property(ipar, "#interrupt-cells", NULL);
663 if (tmp != NULL) {
664 intsize = *tmp;
665 break;
666 }
667 tnode = ipar;
668 ipar = of_irq_find_parent(ipar);
669 of_node_put(tnode);
670 } while (ipar);
671 if (ipar == NULL) {
672 DBG(" -> no parent found !\n");
673 goto fail;
674 }
675
676 DBG("of_irq_map_raw: ipar=%s, size=%d\n", ipar->full_name, intsize);
677
678 /* Look for this #address-cells. We have to implement the old linux
679 * trick of looking for the parent here as some device-trees rely on it
680 */
681 old = of_node_get(ipar);
682 do {
683 tmp = (u32 *)get_property(old, "#address-cells", NULL);
684 tnode = of_get_parent(old);
685 of_node_put(old);
686 old = tnode;
687 } while(old && tmp == NULL);
688 of_node_put(old);
689 old = NULL;
690 addrsize = (tmp == NULL) ? 2 : *tmp;
691
692 DBG(" -> addrsize=%d\n", addrsize);
693
694 /* Now start the actual "proper" walk of the interrupt tree */
695 while (ipar != NULL) {
696 /* Now check if cursor is an interrupt-controller and if it is
697 * then we are done
698 */
699 if (get_property(ipar, "interrupt-controller", NULL) != NULL) {
700 DBG(" -> got it !\n");
701 memcpy(out_irq->specifier, intspec,
702 intsize * sizeof(u32));
703 out_irq->size = intsize;
704 out_irq->controller = ipar;
705 of_node_put(old);
706 return 0;
707 }
708
709 /* Now look for an interrupt-map */
710 imap = (u32 *)get_property(ipar, "interrupt-map", &imaplen);
711 /* No interrupt map, check for an interrupt parent */
712 if (imap == NULL) {
713 DBG(" -> no map, getting parent\n");
714 newpar = of_irq_find_parent(ipar);
715 goto skiplevel;
716 }
717 imaplen /= sizeof(u32);
718
719 /* Look for a mask */
720 imask = (u32 *)get_property(ipar, "interrupt-map-mask", NULL);
721
722 /* If we were passed no "reg" property and we attempt to parse
723 * an interrupt-map, then #address-cells must be 0.
724 * Fail if it's not.
725 */
726 if (addr == NULL && addrsize != 0) {
727 DBG(" -> no reg passed in when needed !\n");
728 goto fail;
729 }
730
731 /* Parse interrupt-map */
732 match = 0;
733 while (imaplen > (addrsize + intsize + 1) && !match) {
734 /* Compare specifiers */
735 match = 1;
736 for (i = 0; i < addrsize && match; ++i) {
737 u32 mask = imask ? imask[i] : 0xffffffffu;
738 match = ((addr[i] ^ imap[i]) & mask) == 0;
739 }
740 for (; i < (addrsize + intsize) && match; ++i) {
741 u32 mask = imask ? imask[i] : 0xffffffffu;
742 match =
743 ((intspec[i-addrsize] ^ imap[i]) & mask) == 0;
744 }
745 imap += addrsize + intsize;
746 imaplen -= addrsize + intsize;
747
748 DBG(" -> match=%d (imaplen=%d)\n", match, imaplen);
749
750 /* Get the interrupt parent */
751 if (of_irq_workarounds & OF_IMAP_NO_PHANDLE)
752 newpar = of_node_get(of_irq_dflt_pic);
753 else
754 newpar = of_find_node_by_phandle((phandle)*imap);
755 imap++;
756 --imaplen;
757
758 /* Check if not found */
759 if (newpar == NULL) {
760 DBG(" -> imap parent not found !\n");
761 goto fail;
762 }
763
764 /* Get #interrupt-cells and #address-cells of new
765 * parent
766 */
767 tmp = (u32 *)get_property(newpar, "#interrupt-cells",
768 NULL);
769 if (tmp == NULL) {
770 DBG(" -> parent lacks #interrupt-cells !\n");
771 goto fail;
772 }
773 newintsize = *tmp;
774 tmp = (u32 *)get_property(newpar, "#address-cells",
775 NULL);
776 newaddrsize = (tmp == NULL) ? 0 : *tmp;
777
778 DBG(" -> newintsize=%d, newaddrsize=%d\n",
779 newintsize, newaddrsize);
780
781 /* Check for malformed properties */
782 if (imaplen < (newaddrsize + newintsize))
783 goto fail;
784
785 imap += newaddrsize + newintsize;
786 imaplen -= newaddrsize + newintsize;
787
788 DBG(" -> imaplen=%d\n", imaplen);
789 }
790 if (!match)
791 goto fail;
792
793 of_node_put(old);
794 old = of_node_get(newpar);
795 addrsize = newaddrsize;
796 intsize = newintsize;
797 intspec = imap - intsize;
798 addr = intspec - addrsize;
799
800 skiplevel:
801 /* Iterate again with new parent */
802 DBG(" -> new parent: %s\n", newpar ? newpar->full_name : "<>");
803 of_node_put(ipar);
804 ipar = newpar;
805 newpar = NULL;
806 }
807 fail:
808 of_node_put(ipar);
809 of_node_put(old);
810 of_node_put(newpar);
811
812 return -EINVAL;
813}
814EXPORT_SYMBOL_GPL(of_irq_map_raw);
815
816#if defined(CONFIG_PPC_PMAC) && defined(CONFIG_PPC32)
817static int of_irq_map_oldworld(struct device_node *device, int index,
818 struct of_irq *out_irq)
819{
820 u32 *ints;
821 int intlen;
822
823 /*
824 * Old machines just have a list of interrupt numbers
825 * and no interrupt-controller nodes.
826 */
827 ints = (u32 *) get_property(device, "AAPL,interrupts", &intlen);
828 if (ints == NULL)
829 return -EINVAL;
830 intlen /= sizeof(u32);
831
832 if (index >= intlen)
833 return -EINVAL;
834
835 out_irq->controller = NULL;
836 out_irq->specifier[0] = ints[index];
837 out_irq->size = 1;
838
839 return 0;
840}
841#else /* defined(CONFIG_PPC_PMAC) && defined(CONFIG_PPC32) */
842static int of_irq_map_oldworld(struct device_node *device, int index,
843 struct of_irq *out_irq)
844{
845 return -EINVAL;
846}
847#endif /* !(defined(CONFIG_PPC_PMAC) && defined(CONFIG_PPC32)) */
848
849int of_irq_map_one(struct device_node *device, int index, struct of_irq *out_irq)
850{
851 struct device_node *p;
852 u32 *intspec, *tmp, intsize, intlen, *addr;
853 int res;
854
855 DBG("of_irq_map_one: dev=%s, index=%d\n", device->full_name, index);
856
857 /* OldWorld mac stuff is "special", handle out of line */
858 if (of_irq_workarounds & OF_IMAP_OLDWORLD_MAC)
859 return of_irq_map_oldworld(device, index, out_irq);
860
861 /* Get the interrupts property */
862 intspec = (u32 *)get_property(device, "interrupts", &intlen);
863 if (intspec == NULL)
864 return -EINVAL;
865 intlen /= sizeof(u32);
866
867 /* Get the reg property (if any) */
868 addr = (u32 *)get_property(device, "reg", NULL);
869
870 /* Look for the interrupt parent. */
871 p = of_irq_find_parent(device);
872 if (p == NULL)
873 return -EINVAL;
874
875 /* Get size of interrupt specifier */
876 tmp = (u32 *)get_property(p, "#interrupt-cells", NULL);
877 if (tmp == NULL) {
878 of_node_put(p);
879 return -EINVAL;
880 }
881 intsize = *tmp;
882
883 /* Check index */
884 if (index * intsize >= intlen)
885 return -EINVAL;
886
887 /* Get new specifier and map it */
888 res = of_irq_map_raw(p, intspec + index * intsize, addr, out_irq);
889 of_node_put(p);
890 return res;
891}
892EXPORT_SYMBOL_GPL(of_irq_map_one);
893
894int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
895{
896 struct device_node *dn, *ppnode;
897 struct pci_dev *ppdev;
898 u32 lspec;
899 u32 laddr[3];
900 u8 pin;
901 int rc;
902
903 /* Check if we have a device node, if yes, fallback to standard OF
904 * parsing
905 */
906 dn = pci_device_to_OF_node(pdev);
907 if (dn)
908 return of_irq_map_one(dn, 0, out_irq);
909
910 /* Ok, we don't, time to have fun. Let's start by building up an
911 * interrupt spec. we assume #interrupt-cells is 1, which is standard
912 * for PCI. If you do different, then don't use that routine.
913 */
914 rc = pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin);
915 if (rc != 0)
916 return rc;
917 /* No pin, exit */
918 if (pin == 0)
919 return -ENODEV;
920
921 /* Now we walk up the PCI tree */
922 lspec = pin;
923 for (;;) {
924 /* Get the pci_dev of our parent */
925 ppdev = pdev->bus->self;
926
927 /* Ouch, it's a host bridge... */
928 if (ppdev == NULL) {
929#ifdef CONFIG_PPC64
930 ppnode = pci_bus_to_OF_node(pdev->bus);
931#else
932 struct pci_controller *host;
933 host = pci_bus_to_host(pdev->bus);
934 ppnode = host ? host->arch_data : NULL;
935#endif
936 /* No node for host bridge ? give up */
937 if (ppnode == NULL)
938 return -EINVAL;
939 } else
940 /* We found a P2P bridge, check if it has a node */
941 ppnode = pci_device_to_OF_node(ppdev);
942
943 /* Ok, we have found a parent with a device-node, hand over to
944 * the OF parsing code.
945 * We build a unit address from the linux device to be used for
946 * resolution. Note that we use the linux bus number which may
947 * not match your firmware bus numbering.
948 * Fortunately, in most cases, interrupt-map-mask doesn't include
949 * the bus number as part of the matching.
950 * You should still be careful about that though if you intend
951 * to rely on this function (you ship a firmware that doesn't
952 * create device nodes for all PCI devices).
953 */
954 if (ppnode)
955 break;
956
957 /* We can only get here if we hit a P2P bridge with no node,
958 * let's do standard swizzling and try again
959 */
960 lspec = of_irq_pci_swizzle(PCI_SLOT(pdev->devfn), lspec);
961 pdev = ppdev;
962 }
963
964 laddr[0] = (pdev->bus->number << 16)
965 | (pdev->devfn << 8);
966 laddr[1] = laddr[2] = 0;
967 return of_irq_map_raw(ppnode, &lspec, laddr, out_irq);
968}
969EXPORT_SYMBOL_GPL(of_irq_map_pci);
970
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index 6eb7e49b394a..cda022657324 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -297,19 +297,9 @@ unsigned long __init find_and_init_phbs(void)
297 struct device_node *node; 297 struct device_node *node;
298 struct pci_controller *phb; 298 struct pci_controller *phb;
299 unsigned int index; 299 unsigned int index;
300 unsigned int root_size_cells = 0;
301 unsigned int *opprop = NULL;
302 struct device_node *root = of_find_node_by_path("/"); 300 struct device_node *root = of_find_node_by_path("/");
303 301
304 if (ppc64_interrupt_controller == IC_OPEN_PIC) {
305 opprop = (unsigned int *)get_property(root,
306 "platform-open-pic", NULL);
307 }
308
309 root_size_cells = prom_n_size_cells(root);
310
311 index = 0; 302 index = 0;
312
313 for (node = of_get_next_child(root, NULL); 303 for (node = of_get_next_child(root, NULL);
314 node != NULL; 304 node != NULL;
315 node = of_get_next_child(root, node)) { 305 node = of_get_next_child(root, node)) {
@@ -324,13 +314,6 @@ unsigned long __init find_and_init_phbs(void)
324 setup_phb(node, phb); 314 setup_phb(node, phb);
325 pci_process_bridge_OF_ranges(phb, node, 0); 315 pci_process_bridge_OF_ranges(phb, node, 0);
326 pci_setup_phb_io(phb, index == 0); 316 pci_setup_phb_io(phb, index == 0);
327#ifdef CONFIG_PPC_PSERIES
328 /* XXX This code need serious fixing ... --BenH */
329 if (ppc64_interrupt_controller == IC_OPEN_PIC && pSeries_mpic) {
330 int addr = root_size_cells * (index + 2) - 1;
331 mpic_assign_isu(pSeries_mpic, index, opprop[addr]);
332 }
333#endif
334 index++; 317 index++;
335 } 318 }
336 319
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index ba7cd50d820d..e0df2ba1ab9f 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -51,7 +51,6 @@
51 51
52extern void bootx_init(unsigned long r4, unsigned long phys); 52extern void bootx_init(unsigned long r4, unsigned long phys);
53 53
54boot_infos_t *boot_infos;
55struct ide_machdep_calls ppc_ide_md; 54struct ide_machdep_calls ppc_ide_md;
56 55
57int boot_cpuid; 56int boot_cpuid;
@@ -240,7 +239,6 @@ void __init setup_arch(char **cmdline_p)
240 ppc_md.init_early(); 239 ppc_md.init_early();
241 240
242 find_legacy_serial_ports(); 241 find_legacy_serial_ports();
243 finish_device_tree();
244 242
245 smp_setup_cpu_maps(); 243 smp_setup_cpu_maps();
246 244
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index ac7276c40685..fd1785e4c9bb 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -361,12 +361,15 @@ void __init setup_system(void)
361 361
362 /* 362 /*
363 * Fill the ppc64_caches & systemcfg structures with informations 363 * Fill the ppc64_caches & systemcfg structures with informations
364 * retrieved from the device-tree. Need to be called before 364 * retrieved from the device-tree.
365 * finish_device_tree() since the later requires some of the
366 * informations filled up here to properly parse the interrupt tree.
367 */ 365 */
368 initialize_cache_info(); 366 initialize_cache_info();
369 367
368 /*
369 * Initialize irq remapping subsystem
370 */
371 irq_early_init();
372
370#ifdef CONFIG_PPC_RTAS 373#ifdef CONFIG_PPC_RTAS
371 /* 374 /*
372 * Initialize RTAS if available 375 * Initialize RTAS if available
@@ -394,12 +397,6 @@ void __init setup_system(void)
394 find_legacy_serial_ports(); 397 find_legacy_serial_ports();
395 398
396 /* 399 /*
397 * "Finish" the device-tree, that is do the actual parsing of
398 * some of the properties like the interrupt map
399 */
400 finish_device_tree();
401
402 /*
403 * Initialize xmon 400 * Initialize xmon
404 */ 401 */
405#ifdef CONFIG_XMON_DEFAULT 402#ifdef CONFIG_XMON_DEFAULT
@@ -427,8 +424,6 @@ void __init setup_system(void)
427 424
428 printk("-----------------------------------------------------\n"); 425 printk("-----------------------------------------------------\n");
429 printk("ppc64_pft_size = 0x%lx\n", ppc64_pft_size); 426 printk("ppc64_pft_size = 0x%lx\n", ppc64_pft_size);
430 printk("ppc64_interrupt_controller = 0x%ld\n",
431 ppc64_interrupt_controller);
432 printk("physicalMemorySize = 0x%lx\n", lmb_phys_mem_size()); 427 printk("physicalMemorySize = 0x%lx\n", lmb_phys_mem_size());
433 printk("ppc64_caches.dcache_line_size = 0x%x\n", 428 printk("ppc64_caches.dcache_line_size = 0x%x\n",
434 ppc64_caches.dline_size); 429 ppc64_caches.dline_size);
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index cdf5867838a6..fad8580f9081 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -218,7 +218,6 @@ struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node)
218{ 218{
219 struct vio_dev *viodev; 219 struct vio_dev *viodev;
220 unsigned int *unit_address; 220 unsigned int *unit_address;
221 unsigned int *irq_p;
222 221
223 /* we need the 'device_type' property, in order to match with drivers */ 222 /* we need the 'device_type' property, in order to match with drivers */
224 if (of_node->type == NULL) { 223 if (of_node->type == NULL) {
@@ -243,16 +242,7 @@ struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node)
243 242
244 viodev->dev.platform_data = of_node_get(of_node); 243 viodev->dev.platform_data = of_node_get(of_node);
245 244
246 viodev->irq = NO_IRQ; 245 viodev->irq = irq_of_parse_and_map(of_node, 0);
247 irq_p = (unsigned int *)get_property(of_node, "interrupts", NULL);
248 if (irq_p) {
249 int virq = virt_irq_create_mapping(*irq_p);
250 if (virq == NO_IRQ) {
251 printk(KERN_ERR "Unable to allocate interrupt "
252 "number for %s\n", of_node->full_name);
253 } else
254 viodev->irq = irq_offset_up(virq);
255 }
256 246
257 snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%x", *unit_address); 247 snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%x", *unit_address);
258 viodev->name = of_node->name; 248 viodev->name = of_node->name;
diff --git a/arch/powerpc/platforms/83xx/Kconfig b/arch/powerpc/platforms/83xx/Kconfig
index 7675e675dce1..5fe7b7faf45f 100644
--- a/arch/powerpc/platforms/83xx/Kconfig
+++ b/arch/powerpc/platforms/83xx/Kconfig
@@ -16,12 +16,21 @@ config MPC834x_SYS
16 3 PCI slots. The PIBs PCI initialization is the bootloader's 16 3 PCI slots. The PIBs PCI initialization is the bootloader's
17 responsiblilty. 17 responsiblilty.
18 18
19config MPC834x_ITX
20 bool "Freescale MPC834x ITX"
21 select DEFAULT_UIMAGE
22 help
23 This option enables support for the MPC 834x ITX evaluation board.
24
25 Be aware that PCI initialization is the bootloader's
26 responsiblilty.
27
19endchoice 28endchoice
20 29
21config MPC834x 30config MPC834x
22 bool 31 bool
23 select PPC_UDBG_16550 32 select PPC_UDBG_16550
24 select PPC_INDIRECT_PCI 33 select PPC_INDIRECT_PCI
25 default y if MPC834x_SYS 34 default y if MPC834x_SYS || MPC834x_ITX
26 35
27endmenu 36endmenu
diff --git a/arch/powerpc/platforms/83xx/Makefile b/arch/powerpc/platforms/83xx/Makefile
index 5c72367441a8..9387a110d28a 100644
--- a/arch/powerpc/platforms/83xx/Makefile
+++ b/arch/powerpc/platforms/83xx/Makefile
@@ -4,3 +4,4 @@
4obj-y := misc.o 4obj-y := misc.o
5obj-$(CONFIG_PCI) += pci.o 5obj-$(CONFIG_PCI) += pci.o
6obj-$(CONFIG_MPC834x_SYS) += mpc834x_sys.o 6obj-$(CONFIG_MPC834x_SYS) += mpc834x_sys.o
7obj-$(CONFIG_MPC834x_ITX) += mpc834x_itx.o
diff --git a/arch/powerpc/platforms/83xx/mpc834x_itx.c b/arch/powerpc/platforms/83xx/mpc834x_itx.c
new file mode 100644
index 000000000000..b46305645d38
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc834x_itx.c
@@ -0,0 +1,156 @@
1/*
2 * arch/powerpc/platforms/83xx/mpc834x_itx.c
3 *
4 * MPC834x ITX board specific routines
5 *
6 * Maintainer: Kumar Gala <galak@kernel.crashing.org>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2 of the License, or (at your
11 * option) any later version.
12 */
13
14#include <linux/config.h>
15#include <linux/stddef.h>
16#include <linux/kernel.h>
17#include <linux/init.h>
18#include <linux/errno.h>
19#include <linux/reboot.h>
20#include <linux/pci.h>
21#include <linux/kdev_t.h>
22#include <linux/major.h>
23#include <linux/console.h>
24#include <linux/delay.h>
25#include <linux/seq_file.h>
26#include <linux/root_dev.h>
27
28#include <asm/system.h>
29#include <asm/atomic.h>
30#include <asm/time.h>
31#include <asm/io.h>
32#include <asm/machdep.h>
33#include <asm/ipic.h>
34#include <asm/bootinfo.h>
35#include <asm/irq.h>
36#include <asm/prom.h>
37#include <asm/udbg.h>
38#include <sysdev/fsl_soc.h>
39
40#include "mpc83xx.h"
41
42#include <platforms/83xx/mpc834x_sys.h>
43
44#ifndef CONFIG_PCI
45unsigned long isa_io_base = 0;
46unsigned long isa_mem_base = 0;
47#endif
48
49#ifdef CONFIG_PCI
50static int
51mpc83xx_map_irq(struct pci_dev *dev, unsigned char idsel, unsigned char pin)
52{
53 static char pci_irq_table[][4] =
54 /*
55 * PCI IDSEL/INTPIN->INTLINE
56 * A B C D
57 */
58 {
59 {PIRQB, PIRQC, PIRQD, PIRQA}, /* idsel 0x0e */
60 {PIRQA, PIRQB, PIRQC, PIRQD}, /* idsel 0x0f */
61 {PIRQC, PIRQD, PIRQA, PIRQB}, /* idsel 0x10 */
62 };
63
64 const long min_idsel = 0x0e, max_idsel = 0x10, irqs_per_slot = 4;
65 return PCI_IRQ_TABLE_LOOKUP;
66}
67#endif /* CONFIG_PCI */
68
69/* ************************************************************************
70 *
71 * Setup the architecture
72 *
73 */
74static void __init mpc834x_itx_setup_arch(void)
75{
76 struct device_node *np;
77
78 if (ppc_md.progress)
79 ppc_md.progress("mpc834x_itx_setup_arch()", 0);
80
81 np = of_find_node_by_type(NULL, "cpu");
82 if (np != 0) {
83 unsigned int *fp =
84 (int *)get_property(np, "clock-frequency", NULL);
85 if (fp != 0)
86 loops_per_jiffy = *fp / HZ;
87 else
88 loops_per_jiffy = 50000000 / HZ;
89 of_node_put(np);
90 }
91#ifdef CONFIG_PCI
92 for (np = NULL; (np = of_find_node_by_type(np, "pci")) != NULL;)
93 add_bridge(np);
94
95 ppc_md.pci_swizzle = common_swizzle;
96 ppc_md.pci_map_irq = mpc83xx_map_irq;
97 ppc_md.pci_exclude_device = mpc83xx_exclude_device;
98#endif
99
100#ifdef CONFIG_ROOT_NFS
101 ROOT_DEV = Root_NFS;
102#else
103 ROOT_DEV = Root_HDA1;
104#endif
105}
106
107void __init mpc834x_itx_init_IRQ(void)
108{
109 u8 senses[8] = {
110 0, /* EXT 0 */
111 IRQ_SENSE_LEVEL, /* EXT 1 */
112 IRQ_SENSE_LEVEL, /* EXT 2 */
113 0, /* EXT 3 */
114#ifdef CONFIG_PCI
115 IRQ_SENSE_LEVEL, /* EXT 4 */
116 IRQ_SENSE_LEVEL, /* EXT 5 */
117 IRQ_SENSE_LEVEL, /* EXT 6 */
118 IRQ_SENSE_LEVEL, /* EXT 7 */
119#else
120 0, /* EXT 4 */
121 0, /* EXT 5 */
122 0, /* EXT 6 */
123 0, /* EXT 7 */
124#endif
125 };
126
127 ipic_init(get_immrbase() + 0x00700, 0, 0, senses, 8);
128
129 /* Initialize the default interrupt mapping priorities,
130 * in case the boot rom changed something on us.
131 */
132 ipic_set_default_priority();
133}
134
135/*
136 * Called very early, MMU is off, device-tree isn't unflattened
137 */
138static int __init mpc834x_itx_probe(void)
139{
140 /* We always match for now, eventually we should look at the flat
141 dev tree to ensure this is the board we are suppose to run on
142 */
143 return 1;
144}
145
146define_machine(mpc834x_itx) {
147 .name = "MPC834x ITX",
148 .probe = mpc834x_itx_probe,
149 .setup_arch = mpc834x_itx_setup_arch,
150 .init_IRQ = mpc834x_itx_init_IRQ,
151 .get_irq = ipic_get_irq,
152 .restart = mpc83xx_restart,
153 .time_init = mpc83xx_time_init,
154 .calibrate_decr = generic_calibrate_decr,
155 .progress = udbg_progress,
156};
diff --git a/arch/powerpc/platforms/83xx/mpc834x_itx.h b/arch/powerpc/platforms/83xx/mpc834x_itx.h
new file mode 100644
index 000000000000..174ca4ef55f3
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc834x_itx.h
@@ -0,0 +1,23 @@
1/*
2 * arch/powerpc/platforms/83xx/mpc834x_itx.h
3 *
4 * MPC834X ITX common board definitions
5 *
6 * Maintainer: Kumar Gala <galak@kernel.crashing.org>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2 of the License, or (at your
11 * option) any later version.
12 *
13 */
14
15#ifndef __MACH_MPC83XX_ITX_H__
16#define __MACH_MPC83XX_ITX_H__
17
18#define PIRQA MPC83xx_IRQ_EXT4
19#define PIRQB MPC83xx_IRQ_EXT5
20#define PIRQC MPC83xx_IRQ_EXT6
21#define PIRQD MPC83xx_IRQ_EXT7
22
23#endif /* __MACH_MPC83XX_ITX_H__ */
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 22da1335445a..9d5da7896892 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -1,6 +1,9 @@
1/* 1/*
2 * Cell Internal Interrupt Controller 2 * Cell Internal Interrupt Controller
3 * 3 *
4 * Copyright (C) 2006 Benjamin Herrenschmidt (benh@kernel.crashing.org)
5 * IBM, Corp.
6 *
4 * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 7 * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
5 * 8 *
6 * Author: Arnd Bergmann <arndb@de.ibm.com> 9 * Author: Arnd Bergmann <arndb@de.ibm.com>
@@ -25,11 +28,13 @@
25#include <linux/module.h> 28#include <linux/module.h>
26#include <linux/percpu.h> 29#include <linux/percpu.h>
27#include <linux/types.h> 30#include <linux/types.h>
31#include <linux/ioport.h>
28 32
29#include <asm/io.h> 33#include <asm/io.h>
30#include <asm/pgtable.h> 34#include <asm/pgtable.h>
31#include <asm/prom.h> 35#include <asm/prom.h>
32#include <asm/ptrace.h> 36#include <asm/ptrace.h>
37#include <asm/machdep.h>
33 38
34#include "interrupt.h" 39#include "interrupt.h"
35#include "cbe_regs.h" 40#include "cbe_regs.h"
@@ -37,231 +42,65 @@
37struct iic { 42struct iic {
38 struct cbe_iic_thread_regs __iomem *regs; 43 struct cbe_iic_thread_regs __iomem *regs;
39 u8 target_id; 44 u8 target_id;
45 u8 eoi_stack[16];
46 int eoi_ptr;
47 struct irq_host *host;
40}; 48};
41 49
42static DEFINE_PER_CPU(struct iic, iic); 50static DEFINE_PER_CPU(struct iic, iic);
51#define IIC_NODE_COUNT 2
52static struct irq_host *iic_hosts[IIC_NODE_COUNT];
43 53
44void iic_local_enable(void) 54/* Convert between "pending" bits and hw irq number */
55static irq_hw_number_t iic_pending_to_hwnum(struct cbe_iic_pending_bits bits)
45{ 56{
46 struct iic *iic = &__get_cpu_var(iic); 57 unsigned char unit = bits.source & 0xf;
47 u64 tmp;
48
49 /*
50 * There seems to be a bug that is present in DD2.x CPUs
51 * and still only partially fixed in DD3.1.
52 * This bug causes a value written to the priority register
53 * not to make it there, resulting in a system hang unless we
54 * write it again.
55 * Masking with 0xf0 is done because the Cell BE does not
56 * implement the lower four bits of the interrupt priority,
57 * they always read back as zeroes, although future CPUs
58 * might implement different bits.
59 */
60 do {
61 out_be64(&iic->regs->prio, 0xff);
62 tmp = in_be64(&iic->regs->prio);
63 } while ((tmp & 0xf0) != 0xf0);
64}
65
66void iic_local_disable(void)
67{
68 out_be64(&__get_cpu_var(iic).regs->prio, 0x0);
69}
70 58
71static unsigned int iic_startup(unsigned int irq) 59 if (bits.flags & CBE_IIC_IRQ_IPI)
72{ 60 return IIC_IRQ_IPI0 | (bits.prio >> 4);
73 return 0; 61 else if (bits.class <= 3)
62 return (bits.class << 4) | unit;
63 else
64 return IIC_IRQ_INVALID;
74} 65}
75 66
76static void iic_enable(unsigned int irq) 67static void iic_mask(unsigned int irq)
77{ 68{
78 iic_local_enable();
79} 69}
80 70
81static void iic_disable(unsigned int irq) 71static void iic_unmask(unsigned int irq)
82{ 72{
83} 73}
84 74
85static void iic_end(unsigned int irq) 75static void iic_eoi(unsigned int irq)
86{ 76{
87 iic_local_enable(); 77 struct iic *iic = &__get_cpu_var(iic);
78 out_be64(&iic->regs->prio, iic->eoi_stack[--iic->eoi_ptr]);
79 BUG_ON(iic->eoi_ptr < 0);
88} 80}
89 81
90static struct hw_interrupt_type iic_pic = { 82static struct irq_chip iic_chip = {
91 .typename = " CELL-IIC ", 83 .typename = " CELL-IIC ",
92 .startup = iic_startup, 84 .mask = iic_mask,
93 .enable = iic_enable, 85 .unmask = iic_unmask,
94 .disable = iic_disable, 86 .eoi = iic_eoi,
95 .end = iic_end,
96}; 87};
97 88
98static int iic_external_get_irq(struct cbe_iic_pending_bits pending)
99{
100 int irq;
101 unsigned char node, unit;
102
103 node = pending.source >> 4;
104 unit = pending.source & 0xf;
105 irq = -1;
106
107 /*
108 * This mapping is specific to the Cell Broadband
109 * Engine. We might need to get the numbers
110 * from the device tree to support future CPUs.
111 */
112 switch (unit) {
113 case 0x00:
114 case 0x0b:
115 /*
116 * One of these units can be connected
117 * to an external interrupt controller.
118 */
119 if (pending.class != 2)
120 break;
121 irq = IIC_EXT_OFFSET
122 + spider_get_irq(node)
123 + node * IIC_NODE_STRIDE;
124 break;
125 case 0x01 ... 0x04:
126 case 0x07 ... 0x0a:
127 /*
128 * These units are connected to the SPEs
129 */
130 if (pending.class > 2)
131 break;
132 irq = IIC_SPE_OFFSET
133 + pending.class * IIC_CLASS_STRIDE
134 + node * IIC_NODE_STRIDE
135 + unit;
136 break;
137 }
138 if (irq == -1)
139 printk(KERN_WARNING "Unexpected interrupt class %02x, "
140 "source %02x, prio %02x, cpu %02x\n", pending.class,
141 pending.source, pending.prio, smp_processor_id());
142 return irq;
143}
144
145/* Get an IRQ number from the pending state register of the IIC */ 89/* Get an IRQ number from the pending state register of the IIC */
146int iic_get_irq(struct pt_regs *regs) 90static unsigned int iic_get_irq(struct pt_regs *regs)
147{ 91{
148 struct iic *iic; 92 struct cbe_iic_pending_bits pending;
149 int irq; 93 struct iic *iic;
150 struct cbe_iic_pending_bits pending; 94
151 95 iic = &__get_cpu_var(iic);
152 iic = &__get_cpu_var(iic); 96 *(unsigned long *) &pending =
153 *(unsigned long *) &pending = 97 in_be64((unsigned long __iomem *) &iic->regs->pending_destr);
154 in_be64((unsigned long __iomem *) &iic->regs->pending_destr); 98 iic->eoi_stack[++iic->eoi_ptr] = pending.prio;
155 99 BUG_ON(iic->eoi_ptr > 15);
156 irq = -1; 100 if (pending.flags & CBE_IIC_IRQ_VALID)
157 if (pending.flags & CBE_IIC_IRQ_VALID) { 101 return irq_linear_revmap(iic->host,
158 if (pending.flags & CBE_IIC_IRQ_IPI) { 102 iic_pending_to_hwnum(pending));
159 irq = IIC_IPI_OFFSET + (pending.prio >> 4); 103 return NO_IRQ;
160/*
161 if (irq > 0x80)
162 printk(KERN_WARNING "Unexpected IPI prio %02x"
163 "on CPU %02x\n", pending.prio,
164 smp_processor_id());
165*/
166 } else {
167 irq = iic_external_get_irq(pending);
168 }
169 }
170 return irq;
171}
172
173/* hardcoded part to be compatible with older firmware */
174
175static int setup_iic_hardcoded(void)
176{
177 struct device_node *np;
178 int nodeid, cpu;
179 unsigned long regs;
180 struct iic *iic;
181
182 for_each_possible_cpu(cpu) {
183 iic = &per_cpu(iic, cpu);
184 nodeid = cpu/2;
185
186 for (np = of_find_node_by_type(NULL, "cpu");
187 np;
188 np = of_find_node_by_type(np, "cpu")) {
189 if (nodeid == *(int *)get_property(np, "node-id", NULL))
190 break;
191 }
192
193 if (!np) {
194 printk(KERN_WARNING "IIC: CPU %d not found\n", cpu);
195 iic->regs = NULL;
196 iic->target_id = 0xff;
197 return -ENODEV;
198 }
199
200 regs = *(long *)get_property(np, "iic", NULL);
201
202 /* hack until we have decided on the devtree info */
203 regs += 0x400;
204 if (cpu & 1)
205 regs += 0x20;
206
207 printk(KERN_INFO "IIC for CPU %d at %lx\n", cpu, regs);
208 iic->regs = ioremap(regs, sizeof(struct cbe_iic_thread_regs));
209 iic->target_id = (nodeid << 4) + ((cpu & 1) ? 0xf : 0xe);
210 }
211
212 return 0;
213}
214
215static int setup_iic(void)
216{
217 struct device_node *dn;
218 unsigned long *regs;
219 char *compatible;
220 unsigned *np, found = 0;
221 struct iic *iic = NULL;
222
223 for (dn = NULL; (dn = of_find_node_by_name(dn, "interrupt-controller"));) {
224 compatible = (char *)get_property(dn, "compatible", NULL);
225
226 if (!compatible) {
227 printk(KERN_WARNING "no compatible property found !\n");
228 continue;
229 }
230
231 if (strstr(compatible, "IBM,CBEA-Internal-Interrupt-Controller"))
232 regs = (unsigned long *)get_property(dn,"reg", NULL);
233 else
234 continue;
235
236 if (!regs)
237 printk(KERN_WARNING "IIC: no reg property\n");
238
239 np = (unsigned int *)get_property(dn, "ibm,interrupt-server-ranges", NULL);
240
241 if (!np) {
242 printk(KERN_WARNING "IIC: CPU association not found\n");
243 iic->regs = NULL;
244 iic->target_id = 0xff;
245 return -ENODEV;
246 }
247
248 iic = &per_cpu(iic, np[0]);
249 iic->regs = ioremap(regs[0], sizeof(struct cbe_iic_thread_regs));
250 iic->target_id = ((np[0] & 2) << 3) + ((np[0] & 1) ? 0xf : 0xe);
251 printk("IIC for CPU %d at %lx mapped to %p\n", np[0], regs[0], iic->regs);
252
253 iic = &per_cpu(iic, np[1]);
254 iic->regs = ioremap(regs[2], sizeof(struct cbe_iic_thread_regs));
255 iic->target_id = ((np[1] & 2) << 3) + ((np[1] & 1) ? 0xf : 0xe);
256 printk("IIC for CPU %d at %lx mapped to %p\n", np[1], regs[2], iic->regs);
257
258 found++;
259 }
260
261 if (found)
262 return 0;
263 else
264 return -ENODEV;
265} 104}
266 105
267#ifdef CONFIG_SMP 106#ifdef CONFIG_SMP
@@ -269,12 +108,12 @@ static int setup_iic(void)
269/* Use the highest interrupt priorities for IPI */ 108/* Use the highest interrupt priorities for IPI */
270static inline int iic_ipi_to_irq(int ipi) 109static inline int iic_ipi_to_irq(int ipi)
271{ 110{
272 return IIC_IPI_OFFSET + IIC_NUM_IPIS - 1 - ipi; 111 return IIC_IRQ_IPI0 + IIC_NUM_IPIS - 1 - ipi;
273} 112}
274 113
275static inline int iic_irq_to_ipi(int irq) 114static inline int iic_irq_to_ipi(int irq)
276{ 115{
277 return IIC_NUM_IPIS - 1 - (irq - IIC_IPI_OFFSET); 116 return IIC_NUM_IPIS - 1 - (irq - IIC_IRQ_IPI0);
278} 117}
279 118
280void iic_setup_cpu(void) 119void iic_setup_cpu(void)
@@ -293,22 +132,51 @@ u8 iic_get_target_id(int cpu)
293} 132}
294EXPORT_SYMBOL_GPL(iic_get_target_id); 133EXPORT_SYMBOL_GPL(iic_get_target_id);
295 134
135struct irq_host *iic_get_irq_host(int node)
136{
137 if (node < 0 || node >= IIC_NODE_COUNT)
138 return NULL;
139 return iic_hosts[node];
140}
141EXPORT_SYMBOL_GPL(iic_get_irq_host);
142
143
296static irqreturn_t iic_ipi_action(int irq, void *dev_id, struct pt_regs *regs) 144static irqreturn_t iic_ipi_action(int irq, void *dev_id, struct pt_regs *regs)
297{ 145{
298 smp_message_recv(iic_irq_to_ipi(irq), regs); 146 int ipi = (int)(long)dev_id;
147
148 smp_message_recv(ipi, regs);
149
299 return IRQ_HANDLED; 150 return IRQ_HANDLED;
300} 151}
301 152
302static void iic_request_ipi(int ipi, const char *name) 153static void iic_request_ipi(int ipi, const char *name)
303{ 154{
304 int irq; 155 int node, virq;
305 156
306 irq = iic_ipi_to_irq(ipi); 157 for (node = 0; node < IIC_NODE_COUNT; node++) {
307 /* IPIs are marked IRQF_DISABLED as they must run with irqs 158 char *rname;
308 * disabled */ 159 if (iic_hosts[node] == NULL)
309 get_irq_desc(irq)->chip = &iic_pic; 160 continue;
310 get_irq_desc(irq)->status |= IRQ_PER_CPU; 161 virq = irq_create_mapping(iic_hosts[node],
311 request_irq(irq, iic_ipi_action, IRQF_DISABLED, name, NULL); 162 iic_ipi_to_irq(ipi), 0);
163 if (virq == NO_IRQ) {
164 printk(KERN_ERR
165 "iic: failed to map IPI %s on node %d\n",
166 name, node);
167 continue;
168 }
169 rname = kzalloc(strlen(name) + 16, GFP_KERNEL);
170 if (rname)
171 sprintf(rname, "%s node %d", name, node);
172 else
173 rname = (char *)name;
174 if (request_irq(virq, iic_ipi_action, IRQF_DISABLED,
175 rname, (void *)(long)ipi))
176 printk(KERN_ERR
177 "iic: failed to request IPI %s on node %d\n",
178 name, node);
179 }
312} 180}
313 181
314void iic_request_IPIs(void) 182void iic_request_IPIs(void)
@@ -319,34 +187,119 @@ void iic_request_IPIs(void)
319 iic_request_ipi(PPC_MSG_DEBUGGER_BREAK, "IPI-debug"); 187 iic_request_ipi(PPC_MSG_DEBUGGER_BREAK, "IPI-debug");
320#endif /* CONFIG_DEBUGGER */ 188#endif /* CONFIG_DEBUGGER */
321} 189}
190
322#endif /* CONFIG_SMP */ 191#endif /* CONFIG_SMP */
323 192
324static void iic_setup_spe_handlers(void) 193
194static int iic_host_match(struct irq_host *h, struct device_node *node)
195{
196 return h->host_data != NULL && node == h->host_data;
197}
198
199static int iic_host_map(struct irq_host *h, unsigned int virq,
200 irq_hw_number_t hw, unsigned int flags)
201{
202 if (hw < IIC_IRQ_IPI0)
203 set_irq_chip_and_handler(virq, &iic_chip, handle_fasteoi_irq);
204 else
205 set_irq_chip_and_handler(virq, &iic_chip, handle_percpu_irq);
206 return 0;
207}
208
209static int iic_host_xlate(struct irq_host *h, struct device_node *ct,
210 u32 *intspec, unsigned int intsize,
211 irq_hw_number_t *out_hwirq, unsigned int *out_flags)
212
213{
214 /* Currently, we don't translate anything. That needs to be fixed as
215 * we get better defined device-trees. iic interrupts have to be
216 * explicitely mapped by whoever needs them
217 */
218 return -ENODEV;
219}
220
221static struct irq_host_ops iic_host_ops = {
222 .match = iic_host_match,
223 .map = iic_host_map,
224 .xlate = iic_host_xlate,
225};
226
227static void __init init_one_iic(unsigned int hw_cpu, unsigned long addr,
228 struct irq_host *host)
325{ 229{
326 int be, isrc; 230 /* XXX FIXME: should locate the linux CPU number from the HW cpu
231 * number properly. We are lucky for now
232 */
233 struct iic *iic = &per_cpu(iic, hw_cpu);
234
235 iic->regs = ioremap(addr, sizeof(struct cbe_iic_thread_regs));
236 BUG_ON(iic->regs == NULL);
327 237
328 /* Assume two threads per BE are present */ 238 iic->target_id = ((hw_cpu & 2) << 3) | ((hw_cpu & 1) ? 0xf : 0xe);
329 for (be=0; be < num_present_cpus() / 2; be++) { 239 iic->eoi_stack[0] = 0xff;
330 for (isrc = 0; isrc < IIC_CLASS_STRIDE * 3; isrc++) { 240 iic->host = host;
331 int irq = IIC_NODE_STRIDE * be + IIC_SPE_OFFSET + isrc; 241 out_be64(&iic->regs->prio, 0);
332 get_irq_desc(irq)->chip = &iic_pic; 242
243 printk(KERN_INFO "IIC for CPU %d at %lx mapped to %p, target id 0x%x\n",
244 hw_cpu, addr, iic->regs, iic->target_id);
245}
246
247static int __init setup_iic(void)
248{
249 struct device_node *dn;
250 struct resource r0, r1;
251 struct irq_host *host;
252 int found = 0;
253 u32 *np;
254
255 for (dn = NULL;
256 (dn = of_find_node_by_name(dn,"interrupt-controller")) != NULL;) {
257 if (!device_is_compatible(dn,
258 "IBM,CBEA-Internal-Interrupt-Controller"))
259 continue;
260 np = (u32 *)get_property(dn, "ibm,interrupt-server-ranges",
261 NULL);
262 if (np == NULL) {
263 printk(KERN_WARNING "IIC: CPU association not found\n");
264 of_node_put(dn);
265 return -ENODEV;
266 }
267 if (of_address_to_resource(dn, 0, &r0) ||
268 of_address_to_resource(dn, 1, &r1)) {
269 printk(KERN_WARNING "IIC: Can't resolve addresses\n");
270 of_node_put(dn);
271 return -ENODEV;
333 } 272 }
273 host = NULL;
274 if (found < IIC_NODE_COUNT) {
275 host = irq_alloc_host(IRQ_HOST_MAP_LINEAR,
276 IIC_SOURCE_COUNT,
277 &iic_host_ops,
278 IIC_IRQ_INVALID);
279 iic_hosts[found] = host;
280 BUG_ON(iic_hosts[found] == NULL);
281 iic_hosts[found]->host_data = of_node_get(dn);
282 found++;
283 }
284 init_one_iic(np[0], r0.start, host);
285 init_one_iic(np[1], r1.start, host);
334 } 286 }
287
288 if (found)
289 return 0;
290 else
291 return -ENODEV;
335} 292}
336 293
337void iic_init_IRQ(void) 294void __init iic_init_IRQ(void)
338{ 295{
339 int cpu, irq_offset; 296 /* Discover and initialize iics */
340 struct iic *iic;
341
342 if (setup_iic() < 0) 297 if (setup_iic() < 0)
343 setup_iic_hardcoded(); 298 panic("IIC: Failed to initialize !\n");
344 299
345 irq_offset = 0; 300 /* Set master interrupt handling function */
346 for_each_possible_cpu(cpu) { 301 ppc_md.get_irq = iic_get_irq;
347 iic = &per_cpu(iic, cpu); 302
348 if (iic->regs) 303 /* Enable on current CPU */
349 out_be64(&iic->regs->prio, 0xff); 304 iic_setup_cpu();
350 }
351 iic_setup_spe_handlers();
352} 305}
diff --git a/arch/powerpc/platforms/cell/interrupt.h b/arch/powerpc/platforms/cell/interrupt.h
index 799f77d98f96..5560a92ec3ab 100644
--- a/arch/powerpc/platforms/cell/interrupt.h
+++ b/arch/powerpc/platforms/cell/interrupt.h
@@ -37,27 +37,24 @@
37 */ 37 */
38 38
39enum { 39enum {
40 IIC_EXT_OFFSET = 0x00, /* Start of south bridge IRQs */ 40 IIC_IRQ_INVALID = 0xff,
41 IIC_NUM_EXT = 0x40, /* Number of south bridge IRQs */ 41 IIC_IRQ_MAX = 0x3f,
42 IIC_SPE_OFFSET = 0x40, /* Start of SPE interrupts */ 42 IIC_IRQ_EXT_IOIF0 = 0x20,
43 IIC_CLASS_STRIDE = 0x10, /* SPE IRQs per class */ 43 IIC_IRQ_EXT_IOIF1 = 0x2b,
44 IIC_IPI_OFFSET = 0x70, /* Start of IPI IRQs */ 44 IIC_IRQ_IPI0 = 0x40,
45 IIC_NUM_IPIS = 0x10, /* IRQs reserved for IPI */ 45 IIC_NUM_IPIS = 0x10, /* IRQs reserved for IPI */
46 IIC_NODE_STRIDE = 0x80, /* Total IRQs per node */ 46 IIC_SOURCE_COUNT = 0x50,
47}; 47};
48 48
49extern void iic_init_IRQ(void); 49extern void iic_init_IRQ(void);
50extern int iic_get_irq(struct pt_regs *regs);
51extern void iic_cause_IPI(int cpu, int mesg); 50extern void iic_cause_IPI(int cpu, int mesg);
52extern void iic_request_IPIs(void); 51extern void iic_request_IPIs(void);
53extern void iic_setup_cpu(void); 52extern void iic_setup_cpu(void);
54extern void iic_local_enable(void);
55extern void iic_local_disable(void);
56 53
57extern u8 iic_get_target_id(int cpu); 54extern u8 iic_get_target_id(int cpu);
55extern struct irq_host *iic_get_irq_host(int node);
58 56
59extern void spider_init_IRQ(void); 57extern void spider_init_IRQ(void);
60extern int spider_get_irq(int node);
61 58
62#endif 59#endif
63#endif /* ASM_CELL_PIC_H */ 60#endif /* ASM_CELL_PIC_H */
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index d8c2a29b3c15..282987d6d4a2 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -49,6 +49,7 @@
49#include <asm/irq.h> 49#include <asm/irq.h>
50#include <asm/spu.h> 50#include <asm/spu.h>
51#include <asm/spu_priv1.h> 51#include <asm/spu_priv1.h>
52#include <asm/udbg.h>
52 53
53#include "interrupt.h" 54#include "interrupt.h"
54#include "iommu.h" 55#include "iommu.h"
@@ -79,10 +80,22 @@ static void cell_progress(char *s, unsigned short hex)
79 printk("*** %04x : %s\n", hex, s ? s : ""); 80 printk("*** %04x : %s\n", hex, s ? s : "");
80} 81}
81 82
83static void __init cell_pcibios_fixup(void)
84{
85 struct pci_dev *dev = NULL;
86
87 for_each_pci_dev(dev)
88 pci_read_irq_line(dev);
89}
90
91static void __init cell_init_irq(void)
92{
93 iic_init_IRQ();
94 spider_init_IRQ();
95}
96
82static void __init cell_setup_arch(void) 97static void __init cell_setup_arch(void)
83{ 98{
84 ppc_md.init_IRQ = iic_init_IRQ;
85 ppc_md.get_irq = iic_get_irq;
86#ifdef CONFIG_SPU_BASE 99#ifdef CONFIG_SPU_BASE
87 spu_priv1_ops = &spu_priv1_mmio_ops; 100 spu_priv1_ops = &spu_priv1_mmio_ops;
88#endif 101#endif
@@ -108,7 +121,6 @@ static void __init cell_setup_arch(void)
108 /* Find and initialize PCI host bridges */ 121 /* Find and initialize PCI host bridges */
109 init_pci_config_tokens(); 122 init_pci_config_tokens();
110 find_and_init_phbs(); 123 find_and_init_phbs();
111 spider_init_IRQ();
112 cbe_pervasive_init(); 124 cbe_pervasive_init();
113#ifdef CONFIG_DUMMY_CONSOLE 125#ifdef CONFIG_DUMMY_CONSOLE
114 conswitchp = &dummy_con; 126 conswitchp = &dummy_con;
@@ -126,8 +138,6 @@ static void __init cell_init_early(void)
126 138
127 cell_init_iommu(); 139 cell_init_iommu();
128 140
129 ppc64_interrupt_controller = IC_CELL_PIC;
130
131 DBG(" <- cell_init_early()\n"); 141 DBG(" <- cell_init_early()\n");
132} 142}
133 143
@@ -173,6 +183,8 @@ define_machine(cell) {
173 .calibrate_decr = generic_calibrate_decr, 183 .calibrate_decr = generic_calibrate_decr,
174 .check_legacy_ioport = cell_check_legacy_ioport, 184 .check_legacy_ioport = cell_check_legacy_ioport,
175 .progress = cell_progress, 185 .progress = cell_progress,
186 .init_IRQ = cell_init_irq,
187 .pcibios_fixup = cell_pcibios_fixup,
176#ifdef CONFIG_KEXEC 188#ifdef CONFIG_KEXEC
177 .machine_kexec = default_machine_kexec, 189 .machine_kexec = default_machine_kexec,
178 .machine_kexec_prepare = default_machine_kexec_prepare, 190 .machine_kexec_prepare = default_machine_kexec_prepare,
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index 7c3a0b6d34fd..ae7ef88f1a37 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -22,6 +22,7 @@
22 22
23#include <linux/interrupt.h> 23#include <linux/interrupt.h>
24#include <linux/irq.h> 24#include <linux/irq.h>
25#include <linux/ioport.h>
25 26
26#include <asm/pgtable.h> 27#include <asm/pgtable.h>
27#include <asm/prom.h> 28#include <asm/prom.h>
@@ -56,184 +57,313 @@ enum {
56 REISWAITEN = 0x508, /* Reissue Wait Control*/ 57 REISWAITEN = 0x508, /* Reissue Wait Control*/
57}; 58};
58 59
59static void __iomem *spider_pics[4]; 60#define SPIDER_CHIP_COUNT 4
61#define SPIDER_SRC_COUNT 64
62#define SPIDER_IRQ_INVALID 63
60 63
61static void __iomem *spider_get_pic(int irq) 64struct spider_pic {
62{ 65 struct irq_host *host;
63 int node = irq / IIC_NODE_STRIDE; 66 struct device_node *of_node;
64 irq %= IIC_NODE_STRIDE; 67 void __iomem *regs;
65 68 unsigned int node_id;
66 if (irq >= IIC_EXT_OFFSET && 69};
67 irq < IIC_EXT_OFFSET + IIC_NUM_EXT && 70static struct spider_pic spider_pics[SPIDER_CHIP_COUNT];
68 spider_pics)
69 return spider_pics[node];
70 return NULL;
71}
72 71
73static int spider_get_nr(unsigned int irq) 72static struct spider_pic *spider_virq_to_pic(unsigned int virq)
74{ 73{
75 return (irq % IIC_NODE_STRIDE) - IIC_EXT_OFFSET; 74 return irq_map[virq].host->host_data;
76} 75}
77 76
78static void __iomem *spider_get_irq_config(int irq) 77static void __iomem *spider_get_irq_config(struct spider_pic *pic,
78 unsigned int src)
79{ 79{
80 void __iomem *pic; 80 return pic->regs + TIR_CFGA + 8 * src;
81 pic = spider_get_pic(irq);
82 return pic + TIR_CFGA + 8 * spider_get_nr(irq);
83} 81}
84 82
85static void spider_enable_irq(unsigned int irq) 83static void spider_unmask_irq(unsigned int virq)
86{ 84{
87 int nodeid = (irq / IIC_NODE_STRIDE) * 0x10; 85 struct spider_pic *pic = spider_virq_to_pic(virq);
88 void __iomem *cfg = spider_get_irq_config(irq); 86 void __iomem *cfg = spider_get_irq_config(pic, irq_map[virq].hwirq);
89 irq = spider_get_nr(irq);
90 87
91 out_be32(cfg, (in_be32(cfg) & ~0xf0)| 0x3107000eu | nodeid); 88 /* We use no locking as we should be covered by the descriptor lock
92 out_be32(cfg + 4, in_be32(cfg + 4) | 0x00020000u | irq); 89 * for access to invidual source configuration registers
90 */
91 out_be32(cfg, in_be32(cfg) | 0x30000000u);
93} 92}
94 93
95static void spider_disable_irq(unsigned int irq) 94static void spider_mask_irq(unsigned int virq)
96{ 95{
97 void __iomem *cfg = spider_get_irq_config(irq); 96 struct spider_pic *pic = spider_virq_to_pic(virq);
98 irq = spider_get_nr(irq); 97 void __iomem *cfg = spider_get_irq_config(pic, irq_map[virq].hwirq);
99 98
99 /* We use no locking as we should be covered by the descriptor lock
100 * for access to invidual source configuration registers
101 */
100 out_be32(cfg, in_be32(cfg) & ~0x30000000u); 102 out_be32(cfg, in_be32(cfg) & ~0x30000000u);
101} 103}
102 104
103static unsigned int spider_startup_irq(unsigned int irq) 105static void spider_ack_irq(unsigned int virq)
104{ 106{
105 spider_enable_irq(irq); 107 struct spider_pic *pic = spider_virq_to_pic(virq);
106 return 0; 108 unsigned int src = irq_map[virq].hwirq;
107}
108 109
109static void spider_shutdown_irq(unsigned int irq) 110 /* Reset edge detection logic if necessary
110{ 111 */
111 spider_disable_irq(irq); 112 if (get_irq_desc(virq)->status & IRQ_LEVEL)
112} 113 return;
113 114
114static void spider_end_irq(unsigned int irq) 115 /* Only interrupts 47 to 50 can be set to edge */
115{ 116 if (src < 47 || src > 50)
116 spider_enable_irq(irq); 117 return;
117}
118 118
119static void spider_ack_irq(unsigned int irq) 119 /* Perform the clear of the edge logic */
120{ 120 out_be32(pic->regs + TIR_EDC, 0x100 | (src & 0xf));
121 spider_disable_irq(irq);
122 iic_local_enable();
123} 121}
124 122
125static struct hw_interrupt_type spider_pic = { 123static struct irq_chip spider_pic = {
126 .typename = " SPIDER ", 124 .typename = " SPIDER ",
127 .startup = spider_startup_irq, 125 .unmask = spider_unmask_irq,
128 .shutdown = spider_shutdown_irq, 126 .mask = spider_mask_irq,
129 .enable = spider_enable_irq,
130 .disable = spider_disable_irq,
131 .ack = spider_ack_irq, 127 .ack = spider_ack_irq,
132 .end = spider_end_irq,
133}; 128};
134 129
135int spider_get_irq(int node) 130static int spider_host_match(struct irq_host *h, struct device_node *node)
136{ 131{
137 unsigned long cs; 132 struct spider_pic *pic = h->host_data;
138 void __iomem *regs = spider_pics[node]; 133 return node == pic->of_node;
139
140 cs = in_be32(regs + TIR_CS) >> 24;
141
142 if (cs == 63)
143 return -1;
144 else
145 return cs;
146} 134}
147 135
148/* hardcoded part to be compatible with older firmware */ 136static int spider_host_map(struct irq_host *h, unsigned int virq,
149 137 irq_hw_number_t hw, unsigned int flags)
150void spider_init_IRQ_hardcoded(void)
151{ 138{
152 int node; 139 unsigned int sense = flags & IRQ_TYPE_SENSE_MASK;
153 long spiderpic; 140 struct spider_pic *pic = h->host_data;
154 long pics[] = { 0x24000008000, 0x34000008000 }; 141 void __iomem *cfg = spider_get_irq_config(pic, hw);
155 int n; 142 int level = 0;
156 143 u32 ic;
157 pr_debug("%s(%d): Using hardcoded defaults\n", __FUNCTION__, __LINE__); 144
158 145 /* Note that only level high is supported for most interrupts */
159 for (node = 0; node < num_present_cpus()/2; node++) { 146 if (sense != IRQ_TYPE_NONE && sense != IRQ_TYPE_LEVEL_HIGH &&
160 spiderpic = pics[node]; 147 (hw < 47 || hw > 50))
161 printk(KERN_DEBUG "SPIDER addr: %lx\n", spiderpic); 148 return -EINVAL;
162 spider_pics[node] = ioremap(spiderpic, 0x800); 149
163 for (n = 0; n < IIC_NUM_EXT; n++) { 150 /* Decode sense type */
164 int irq = n + IIC_EXT_OFFSET + node * IIC_NODE_STRIDE; 151 switch(sense) {
165 get_irq_desc(irq)->chip = &spider_pic; 152 case IRQ_TYPE_EDGE_RISING:
166 } 153 ic = 0x3;
167 154 break;
168 /* do not mask any interrupts because of level */ 155 case IRQ_TYPE_EDGE_FALLING:
169 out_be32(spider_pics[node] + TIR_MSK, 0x0); 156 ic = 0x2;
170 157 break;
171 /* disable edge detection clear */ 158 case IRQ_TYPE_LEVEL_LOW:
172 /* out_be32(spider_pics[node] + TIR_EDC, 0x0); */ 159 ic = 0x0;
173 160 level = 1;
174 /* enable interrupt packets to be output */ 161 break;
175 out_be32(spider_pics[node] + TIR_PIEN, 162 case IRQ_TYPE_LEVEL_HIGH:
176 in_be32(spider_pics[node] + TIR_PIEN) | 0x1); 163 case IRQ_TYPE_NONE:
177 164 ic = 0x1;
178 /* Enable the interrupt detection enable bit. Do this last! */ 165 level = 1;
179 out_be32(spider_pics[node] + TIR_DEN, 166 break;
180 in_be32(spider_pics[node] + TIR_DEN) | 0x1); 167 default:
168 return -EINVAL;
181 } 169 }
182}
183 170
184void spider_init_IRQ(void) 171 /* Configure the source. One gross hack that was there before and
185{ 172 * that I've kept around is the priority to the BE which I set to
186 long spider_reg; 173 * be the same as the interrupt source number. I don't know wether
187 struct device_node *dn; 174 * that's supposed to make any kind of sense however, we'll have to
188 char *compatible; 175 * decide that, but for now, I'm not changing the behaviour.
189 int n, node = 0; 176 */
177 out_be32(cfg, (ic << 24) | (0x7 << 16) | (pic->node_id << 4) | 0xe);
178 out_be32(cfg + 4, (0x2 << 16) | (hw & 0xff));
179
180 if (level)
181 get_irq_desc(virq)->status |= IRQ_LEVEL;
182 set_irq_chip_and_handler(virq, &spider_pic, handle_level_irq);
183 return 0;
184}
190 185
191 for (dn = NULL; (dn = of_find_node_by_name(dn, "interrupt-controller"));) { 186static int spider_host_xlate(struct irq_host *h, struct device_node *ct,
192 compatible = (char *)get_property(dn, "compatible", NULL); 187 u32 *intspec, unsigned int intsize,
188 irq_hw_number_t *out_hwirq, unsigned int *out_flags)
193 189
194 if (!compatible) 190{
195 continue; 191 /* Spider interrupts have 2 cells, first is the interrupt source,
192 * second, well, I don't know for sure yet ... We mask the top bits
193 * because old device-trees encode a node number in there
194 */
195 *out_hwirq = intspec[0] & 0x3f;
196 *out_flags = IRQ_TYPE_LEVEL_HIGH;
197 return 0;
198}
196 199
197 if (strstr(compatible, "CBEA,platform-spider-pic")) 200static struct irq_host_ops spider_host_ops = {
198 spider_reg = *(long *)get_property(dn,"reg", NULL); 201 .match = spider_host_match,
199 else if (strstr(compatible, "sti,platform-spider-pic")) { 202 .map = spider_host_map,
200 spider_init_IRQ_hardcoded(); 203 .xlate = spider_host_xlate,
201 return; 204};
202 } else
203 continue;
204 205
205 if (!spider_reg) 206static void spider_irq_cascade(unsigned int irq, struct irq_desc *desc,
206 printk("interrupt controller does not have reg property !\n"); 207 struct pt_regs *regs)
208{
209 struct spider_pic *pic = desc->handler_data;
210 unsigned int cs, virq;
207 211
208 n = prom_n_addr_cells(dn); 212 cs = in_be32(pic->regs + TIR_CS) >> 24;
213 if (cs == SPIDER_IRQ_INVALID)
214 virq = NO_IRQ;
215 else
216 virq = irq_linear_revmap(pic->host, cs);
217 if (virq != NO_IRQ)
218 generic_handle_irq(virq, regs);
219 desc->chip->eoi(irq);
220}
209 221
210 if ( n != 2) 222/* For hooking up the cascace we have a problem. Our device-tree is
211 printk("reg property with invalid number of elements \n"); 223 * crap and we don't know on which BE iic interrupt we are hooked on at
224 * least not the "standard" way. We can reconstitute it based on two
225 * informations though: which BE node we are connected to and wether
226 * we are connected to IOIF0 or IOIF1. Right now, we really only care
227 * about the IBM cell blade and we know that its firmware gives us an
228 * interrupt-map property which is pretty strange.
229 */
230static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic)
231{
232 unsigned int virq;
233 u32 *imap, *tmp;
234 int imaplen, intsize, unit;
235 struct device_node *iic;
236 struct irq_host *iic_host;
237
238#if 0 /* Enable that when we have a way to retreive the node as well */
239 /* First, we check wether we have a real "interrupts" in the device
240 * tree in case the device-tree is ever fixed
241 */
242 struct of_irq oirq;
243 if (of_irq_map_one(pic->of_node, 0, &oirq) == 0) {
244 virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
245 oirq.size);
246 goto bail;
247 }
248#endif
249
250 /* Now do the horrible hacks */
251 tmp = (u32 *)get_property(pic->of_node, "#interrupt-cells", NULL);
252 if (tmp == NULL)
253 return NO_IRQ;
254 intsize = *tmp;
255 imap = (u32 *)get_property(pic->of_node, "interrupt-map", &imaplen);
256 if (imap == NULL || imaplen < (intsize + 1))
257 return NO_IRQ;
258 iic = of_find_node_by_phandle(imap[intsize]);
259 if (iic == NULL)
260 return NO_IRQ;
261 imap += intsize + 1;
262 tmp = (u32 *)get_property(iic, "#interrupt-cells", NULL);
263 if (tmp == NULL)
264 return NO_IRQ;
265 intsize = *tmp;
266 /* Assume unit is last entry of interrupt specifier */
267 unit = imap[intsize - 1];
268 /* Ok, we have a unit, now let's try to get the node */
269 tmp = (u32 *)get_property(iic, "ibm,interrupt-server-ranges", NULL);
270 if (tmp == NULL) {
271 of_node_put(iic);
272 return NO_IRQ;
273 }
274 /* ugly as hell but works for now */
275 pic->node_id = (*tmp) >> 1;
276 of_node_put(iic);
277
278 /* Ok, now let's get cracking. You may ask me why I just didn't match
279 * the iic host from the iic OF node, but that way I'm still compatible
280 * with really really old old firmwares for which we don't have a node
281 */
282 iic_host = iic_get_irq_host(pic->node_id);
283 if (iic_host == NULL)
284 return NO_IRQ;
285 /* Manufacture an IIC interrupt number of class 2 */
286 virq = irq_create_mapping(iic_host, 0x20 | unit, 0);
287 if (virq == NO_IRQ)
288 printk(KERN_ERR "spider_pic: failed to map cascade !");
289 return virq;
290}
212 291
213 spider_pics[node] = ioremap(spider_reg, 0x800);
214 292
215 printk("SPIDER addr: %lx with %i addr_cells mapped to %p\n", 293static void __init spider_init_one(struct device_node *of_node, int chip,
216 spider_reg, n, spider_pics[node]); 294 unsigned long addr)
295{
296 struct spider_pic *pic = &spider_pics[chip];
297 int i, virq;
298
299 /* Map registers */
300 pic->regs = ioremap(addr, 0x1000);
301 if (pic->regs == NULL)
302 panic("spider_pic: can't map registers !");
303
304 /* Allocate a host */
305 pic->host = irq_alloc_host(IRQ_HOST_MAP_LINEAR, SPIDER_SRC_COUNT,
306 &spider_host_ops, SPIDER_IRQ_INVALID);
307 if (pic->host == NULL)
308 panic("spider_pic: can't allocate irq host !");
309 pic->host->host_data = pic;
310
311 /* Fill out other bits */
312 pic->of_node = of_node_get(of_node);
313
314 /* Go through all sources and disable them */
315 for (i = 0; i < SPIDER_SRC_COUNT; i++) {
316 void __iomem *cfg = pic->regs + TIR_CFGA + 8 * i;
317 out_be32(cfg, in_be32(cfg) & ~0x30000000u);
318 }
217 319
218 for (n = 0; n < IIC_NUM_EXT; n++) { 320 /* do not mask any interrupts because of level */
219 int irq = n + IIC_EXT_OFFSET + node * IIC_NODE_STRIDE; 321 out_be32(pic->regs + TIR_MSK, 0x0);
220 get_irq_desc(irq)->chip = &spider_pic;
221 }
222 322
223 /* do not mask any interrupts because of level */ 323 /* enable interrupt packets to be output */
224 out_be32(spider_pics[node] + TIR_MSK, 0x0); 324 out_be32(pic->regs + TIR_PIEN, in_be32(pic->regs + TIR_PIEN) | 0x1);
225 325
226 /* disable edge detection clear */ 326 /* Hook up the cascade interrupt to the iic and nodeid */
227 /* out_be32(spider_pics[node] + TIR_EDC, 0x0); */ 327 virq = spider_find_cascade_and_node(pic);
328 if (virq == NO_IRQ)
329 return;
330 set_irq_data(virq, pic);
331 set_irq_chained_handler(virq, spider_irq_cascade);
228 332
229 /* enable interrupt packets to be output */ 333 printk(KERN_INFO "spider_pic: node %d, addr: 0x%lx %s\n",
230 out_be32(spider_pics[node] + TIR_PIEN, 334 pic->node_id, addr, of_node->full_name);
231 in_be32(spider_pics[node] + TIR_PIEN) | 0x1);
232 335
233 /* Enable the interrupt detection enable bit. Do this last! */ 336 /* Enable the interrupt detection enable bit. Do this last! */
234 out_be32(spider_pics[node] + TIR_DEN, 337 out_be32(pic->regs + TIR_DEN, in_be32(pic->regs + TIR_DEN) | 0x1);
235 in_be32(spider_pics[node] + TIR_DEN) | 0x1); 338}
236 339
237 node++; 340void __init spider_init_IRQ(void)
341{
342 struct resource r;
343 struct device_node *dn;
344 int chip = 0;
345
346 /* XXX node numbers are totally bogus. We _hope_ we get the device
347 * nodes in the right order here but that's definitely not guaranteed,
348 * we need to get the node from the device tree instead.
349 * There is currently no proper property for it (but our whole
350 * device-tree is bogus anyway) so all we can do is pray or maybe test
351 * the address and deduce the node-id
352 */
353 for (dn = NULL;
354 (dn = of_find_node_by_name(dn, "interrupt-controller"));) {
355 if (device_is_compatible(dn, "CBEA,platform-spider-pic")) {
356 if (of_address_to_resource(dn, 0, &r)) {
357 printk(KERN_WARNING "spider-pic: Failed\n");
358 continue;
359 }
360 } else if (device_is_compatible(dn, "sti,platform-spider-pic")
361 && (chip < 2)) {
362 static long hard_coded_pics[] =
363 { 0x24000008000, 0x34000008000 };
364 r.start = hard_coded_pics[chip];
365 } else
366 continue;
367 spider_init_one(dn, chip++, r.start);
238 } 368 }
239} 369}
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index 656c1ef5f4ad..5d2313a6c82b 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -264,51 +264,57 @@ spu_irq_class_2(int irq, void *data, struct pt_regs *regs)
264 return stat ? IRQ_HANDLED : IRQ_NONE; 264 return stat ? IRQ_HANDLED : IRQ_NONE;
265} 265}
266 266
267static int 267static int spu_request_irqs(struct spu *spu)
268spu_request_irqs(struct spu *spu)
269{ 268{
270 int ret; 269 int ret = 0;
271 int irq_base;
272
273 irq_base = IIC_NODE_STRIDE * spu->node + IIC_SPE_OFFSET;
274
275 snprintf(spu->irq_c0, sizeof (spu->irq_c0), "spe%02d.0", spu->number);
276 ret = request_irq(irq_base + spu->isrc,
277 spu_irq_class_0, IRQF_DISABLED, spu->irq_c0, spu);
278 if (ret)
279 goto out;
280
281 snprintf(spu->irq_c1, sizeof (spu->irq_c1), "spe%02d.1", spu->number);
282 ret = request_irq(irq_base + IIC_CLASS_STRIDE + spu->isrc,
283 spu_irq_class_1, IRQF_DISABLED, spu->irq_c1, spu);
284 if (ret)
285 goto out1;
286 270
287 snprintf(spu->irq_c2, sizeof (spu->irq_c2), "spe%02d.2", spu->number); 271 if (spu->irqs[0] != NO_IRQ) {
288 ret = request_irq(irq_base + 2*IIC_CLASS_STRIDE + spu->isrc, 272 snprintf(spu->irq_c0, sizeof (spu->irq_c0), "spe%02d.0",
289 spu_irq_class_2, IRQF_DISABLED, spu->irq_c2, spu); 273 spu->number);
290 if (ret) 274 ret = request_irq(spu->irqs[0], spu_irq_class_0,
291 goto out2; 275 IRQF_DISABLED,
292 goto out; 276 spu->irq_c0, spu);
277 if (ret)
278 goto bail0;
279 }
280 if (spu->irqs[1] != NO_IRQ) {
281 snprintf(spu->irq_c1, sizeof (spu->irq_c1), "spe%02d.1",
282 spu->number);
283 ret = request_irq(spu->irqs[1], spu_irq_class_1,
284 IRQF_DISABLED,
285 spu->irq_c1, spu);
286 if (ret)
287 goto bail1;
288 }
289 if (spu->irqs[2] != NO_IRQ) {
290 snprintf(spu->irq_c2, sizeof (spu->irq_c2), "spe%02d.2",
291 spu->number);
292 ret = request_irq(spu->irqs[2], spu_irq_class_2,
293 IRQF_DISABLED,
294 spu->irq_c2, spu);
295 if (ret)
296 goto bail2;
297 }
298 return 0;
293 299
294out2: 300bail2:
295 free_irq(irq_base + IIC_CLASS_STRIDE + spu->isrc, spu); 301 if (spu->irqs[1] != NO_IRQ)
296out1: 302 free_irq(spu->irqs[1], spu);
297 free_irq(irq_base + spu->isrc, spu); 303bail1:
298out: 304 if (spu->irqs[0] != NO_IRQ)
305 free_irq(spu->irqs[0], spu);
306bail0:
299 return ret; 307 return ret;
300} 308}
301 309
302static void 310static void spu_free_irqs(struct spu *spu)
303spu_free_irqs(struct spu *spu)
304{ 311{
305 int irq_base; 312 if (spu->irqs[0] != NO_IRQ)
306 313 free_irq(spu->irqs[0], spu);
307 irq_base = IIC_NODE_STRIDE * spu->node + IIC_SPE_OFFSET; 314 if (spu->irqs[1] != NO_IRQ)
308 315 free_irq(spu->irqs[1], spu);
309 free_irq(irq_base + spu->isrc, spu); 316 if (spu->irqs[2] != NO_IRQ)
310 free_irq(irq_base + IIC_CLASS_STRIDE + spu->isrc, spu); 317 free_irq(spu->irqs[2], spu);
311 free_irq(irq_base + 2*IIC_CLASS_STRIDE + spu->isrc, spu);
312} 318}
313 319
314static LIST_HEAD(spu_list); 320static LIST_HEAD(spu_list);
@@ -559,17 +565,38 @@ static void spu_unmap(struct spu *spu)
559 iounmap((u8 __iomem *)spu->local_store); 565 iounmap((u8 __iomem *)spu->local_store);
560} 566}
561 567
568/* This function shall be abstracted for HV platforms */
569static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
570{
571 struct irq_host *host;
572 unsigned int isrc;
573 u32 *tmp;
574
575 host = iic_get_irq_host(spu->node);
576 if (host == NULL)
577 return -ENODEV;
578
579 /* Get the interrupt source from the device-tree */
580 tmp = (u32 *)get_property(np, "isrc", NULL);
581 if (!tmp)
582 return -ENODEV;
583 spu->isrc = isrc = tmp[0];
584
585 /* Now map interrupts of all 3 classes */
586 spu->irqs[0] = irq_create_mapping(host, 0x00 | isrc, 0);
587 spu->irqs[1] = irq_create_mapping(host, 0x10 | isrc, 0);
588 spu->irqs[2] = irq_create_mapping(host, 0x20 | isrc, 0);
589
590 /* Right now, we only fail if class 2 failed */
591 return spu->irqs[2] == NO_IRQ ? -EINVAL : 0;
592}
593
562static int __init spu_map_device(struct spu *spu, struct device_node *node) 594static int __init spu_map_device(struct spu *spu, struct device_node *node)
563{ 595{
564 char *prop; 596 char *prop;
565 int ret; 597 int ret;
566 598
567 ret = -ENODEV; 599 ret = -ENODEV;
568 prop = get_property(node, "isrc", NULL);
569 if (!prop)
570 goto out;
571 spu->isrc = *(unsigned int *)prop;
572
573 spu->name = get_property(node, "name", NULL); 600 spu->name = get_property(node, "name", NULL);
574 if (!spu->name) 601 if (!spu->name)
575 goto out; 602 goto out;
@@ -636,7 +663,8 @@ static int spu_create_sysdev(struct spu *spu)
636 return ret; 663 return ret;
637 } 664 }
638 665
639 sysdev_create_file(&spu->sysdev, &attr_isrc); 666 if (spu->isrc != 0)
667 sysdev_create_file(&spu->sysdev, &attr_isrc);
640 sysfs_add_device_to_node(&spu->sysdev, spu->nid); 668 sysfs_add_device_to_node(&spu->sysdev, spu->nid);
641 669
642 return 0; 670 return 0;
@@ -668,6 +696,9 @@ static int __init create_spu(struct device_node *spe)
668 spu->nid = of_node_to_nid(spe); 696 spu->nid = of_node_to_nid(spe);
669 if (spu->nid == -1) 697 if (spu->nid == -1)
670 spu->nid = 0; 698 spu->nid = 0;
699 ret = spu_map_interrupts(spu, spe);
700 if (ret)
701 goto out_unmap;
671 spin_lock_init(&spu->register_lock); 702 spin_lock_init(&spu->register_lock);
672 spu_mfc_sdr_set(spu, mfspr(SPRN_SDR1)); 703 spu_mfc_sdr_set(spu, mfspr(SPRN_SDR1));
673 spu_mfc_sr1_set(spu, 0x33); 704 spu_mfc_sr1_set(spu, 0x33);
diff --git a/arch/powerpc/platforms/chrp/pci.c b/arch/powerpc/platforms/chrp/pci.c
index 66c253498803..6802cdc3168a 100644
--- a/arch/powerpc/platforms/chrp/pci.c
+++ b/arch/powerpc/platforms/chrp/pci.c
@@ -18,7 +18,6 @@
18#include <asm/machdep.h> 18#include <asm/machdep.h>
19#include <asm/sections.h> 19#include <asm/sections.h>
20#include <asm/pci-bridge.h> 20#include <asm/pci-bridge.h>
21#include <asm/open_pic.h>
22#include <asm/grackle.h> 21#include <asm/grackle.h>
23#include <asm/rtas.h> 22#include <asm/rtas.h>
24 23
@@ -161,15 +160,9 @@ void __init
161chrp_pcibios_fixup(void) 160chrp_pcibios_fixup(void)
162{ 161{
163 struct pci_dev *dev = NULL; 162 struct pci_dev *dev = NULL;
164 struct device_node *np;
165 163
166 /* PCI interrupts are controlled by the OpenPIC */ 164 for_each_pci_dev(dev)
167 for_each_pci_dev(dev) { 165 pci_read_irq_line(dev);
168 np = pci_device_to_OF_node(dev);
169 if ((np != 0) && (np->n_intrs > 0) && (np->intrs[0].line != 0))
170 dev->irq = np->intrs[0].line;
171 pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
172 }
173} 166}
174 167
175#define PRG_CL_RESET_VALID 0x00010000 168#define PRG_CL_RESET_VALID 0x00010000
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index 9df9f2079e9b..538e337d63e2 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -59,7 +59,7 @@ void rtas_indicator_progress(char *, unsigned short);
59int _chrp_type; 59int _chrp_type;
60EXPORT_SYMBOL(_chrp_type); 60EXPORT_SYMBOL(_chrp_type);
61 61
62struct mpic *chrp_mpic; 62static struct mpic *chrp_mpic;
63 63
64/* Used for doing CHRP event-scans */ 64/* Used for doing CHRP event-scans */
65DEFINE_PER_CPU(struct timer_list, heartbeat_timer); 65DEFINE_PER_CPU(struct timer_list, heartbeat_timer);
@@ -315,24 +315,32 @@ chrp_event_scan(unsigned long unused)
315 jiffies + event_scan_interval); 315 jiffies + event_scan_interval);
316} 316}
317 317
318static void chrp_8259_cascade(unsigned int irq, struct irq_desc *desc,
319 struct pt_regs *regs)
320{
321 unsigned int cascade_irq = i8259_irq(regs);
322 if (cascade_irq != NO_IRQ)
323 generic_handle_irq(cascade_irq, regs);
324 desc->chip->eoi(irq);
325}
326
318/* 327/*
319 * Finds the open-pic node and sets up the mpic driver. 328 * Finds the open-pic node and sets up the mpic driver.
320 */ 329 */
321static void __init chrp_find_openpic(void) 330static void __init chrp_find_openpic(void)
322{ 331{
323 struct device_node *np, *root; 332 struct device_node *np, *root;
324 int len, i, j, irq_count; 333 int len, i, j;
325 int isu_size, idu_size; 334 int isu_size, idu_size;
326 unsigned int *iranges, *opprop = NULL; 335 unsigned int *iranges, *opprop = NULL;
327 int oplen = 0; 336 int oplen = 0;
328 unsigned long opaddr; 337 unsigned long opaddr;
329 int na = 1; 338 int na = 1;
330 unsigned char init_senses[NR_IRQS - NUM_8259_INTERRUPTS];
331 339
332 np = find_type_devices("open-pic"); 340 np = of_find_node_by_type(NULL, "open-pic");
333 if (np == NULL) 341 if (np == NULL)
334 return; 342 return;
335 root = find_path_device("/"); 343 root = of_find_node_by_path("/");
336 if (root) { 344 if (root) {
337 opprop = (unsigned int *) get_property 345 opprop = (unsigned int *) get_property
338 (root, "platform-open-pic", &oplen); 346 (root, "platform-open-pic", &oplen);
@@ -343,19 +351,15 @@ static void __init chrp_find_openpic(void)
343 oplen /= na * sizeof(unsigned int); 351 oplen /= na * sizeof(unsigned int);
344 } else { 352 } else {
345 struct resource r; 353 struct resource r;
346 if (of_address_to_resource(np, 0, &r)) 354 if (of_address_to_resource(np, 0, &r)) {
347 return; 355 goto bail;
356 }
348 opaddr = r.start; 357 opaddr = r.start;
349 oplen = 0; 358 oplen = 0;
350 } 359 }
351 360
352 printk(KERN_INFO "OpenPIC at %lx\n", opaddr); 361 printk(KERN_INFO "OpenPIC at %lx\n", opaddr);
353 362
354 irq_count = NR_IRQS - NUM_ISA_INTERRUPTS - 4; /* leave room for IPIs */
355 prom_get_irq_senses(init_senses, NUM_ISA_INTERRUPTS, NR_IRQS - 4);
356 /* i8259 cascade is always positive level */
357 init_senses[0] = IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE;
358
359 iranges = (unsigned int *) get_property(np, "interrupt-ranges", &len); 363 iranges = (unsigned int *) get_property(np, "interrupt-ranges", &len);
360 if (iranges == NULL) 364 if (iranges == NULL)
361 len = 0; /* non-distributed mpic */ 365 len = 0; /* non-distributed mpic */
@@ -382,15 +386,12 @@ static void __init chrp_find_openpic(void)
382 if (len > 1) 386 if (len > 1)
383 isu_size = iranges[3]; 387 isu_size = iranges[3];
384 388
385 chrp_mpic = mpic_alloc(opaddr, MPIC_PRIMARY, 389 chrp_mpic = mpic_alloc(np, opaddr, MPIC_PRIMARY,
386 isu_size, NUM_ISA_INTERRUPTS, irq_count, 390 isu_size, 0, " MPIC ");
387 NR_IRQS - 4, init_senses, irq_count,
388 " MPIC ");
389 if (chrp_mpic == NULL) { 391 if (chrp_mpic == NULL) {
390 printk(KERN_ERR "Failed to allocate MPIC structure\n"); 392 printk(KERN_ERR "Failed to allocate MPIC structure\n");
391 return; 393 goto bail;
392 } 394 }
393
394 j = na - 1; 395 j = na - 1;
395 for (i = 1; i < len; ++i) { 396 for (i = 1; i < len; ++i) {
396 iranges += 2; 397 iranges += 2;
@@ -402,7 +403,10 @@ static void __init chrp_find_openpic(void)
402 } 403 }
403 404
404 mpic_init(chrp_mpic); 405 mpic_init(chrp_mpic);
405 mpic_setup_cascade(NUM_ISA_INTERRUPTS, i8259_irq_cascade, NULL); 406 ppc_md.get_irq = mpic_get_irq;
407 bail:
408 of_node_put(root);
409 of_node_put(np);
406} 410}
407 411
408#if defined(CONFIG_VT) && defined(CONFIG_INPUT_ADBHID) && defined(XMON) 412#if defined(CONFIG_VT) && defined(CONFIG_INPUT_ADBHID) && defined(XMON)
@@ -413,14 +417,34 @@ static struct irqaction xmon_irqaction = {
413}; 417};
414#endif 418#endif
415 419
416void __init chrp_init_IRQ(void) 420static void __init chrp_find_8259(void)
417{ 421{
418 struct device_node *np; 422 struct device_node *np, *pic = NULL;
419 unsigned long chrp_int_ack = 0; 423 unsigned long chrp_int_ack = 0;
420#if defined(CONFIG_VT) && defined(CONFIG_INPUT_ADBHID) && defined(XMON) 424 unsigned int cascade_irq;
421 struct device_node *kbd; 425
422#endif 426 /* Look for cascade */
427 for_each_node_by_type(np, "interrupt-controller")
428 if (device_is_compatible(np, "chrp,iic")) {
429 pic = np;
430 break;
431 }
432 /* Ok, 8259 wasn't found. We need to handle the case where
433 * we have a pegasos that claims to be chrp but doesn't have
434 * a proper interrupt tree
435 */
436 if (pic == NULL && chrp_mpic != NULL) {
437 printk(KERN_ERR "i8259: Not found in device-tree"
438 " assuming no legacy interrupts\n");
439 return;
440 }
423 441
442 /* Look for intack. In a perfect world, we would look for it on
443 * the ISA bus that holds the 8259 but heh... Works that way. If
444 * we ever see a problem, we can try to re-use the pSeries code here.
445 * Also, Pegasos-type platforms don't have a proper node to start
446 * from anyway
447 */
424 for (np = find_devices("pci"); np != NULL; np = np->next) { 448 for (np = find_devices("pci"); np != NULL; np = np->next) {
425 unsigned int *addrp = (unsigned int *) 449 unsigned int *addrp = (unsigned int *)
426 get_property(np, "8259-interrupt-acknowledge", NULL); 450 get_property(np, "8259-interrupt-acknowledge", NULL);
@@ -431,11 +455,29 @@ void __init chrp_init_IRQ(void)
431 break; 455 break;
432 } 456 }
433 if (np == NULL) 457 if (np == NULL)
434 printk(KERN_ERR "Cannot find PCI interrupt acknowledge address\n"); 458 printk(KERN_WARNING "Cannot find PCI interrupt acknowledge"
459 " address, polling\n");
460
461 i8259_init(pic, chrp_int_ack);
462 if (ppc_md.get_irq == NULL)
463 ppc_md.get_irq = i8259_irq;
464 if (chrp_mpic != NULL) {
465 cascade_irq = irq_of_parse_and_map(pic, 0);
466 if (cascade_irq == NO_IRQ)
467 printk(KERN_ERR "i8259: failed to map cascade irq\n");
468 else
469 set_irq_chained_handler(cascade_irq,
470 chrp_8259_cascade);
471 }
472}
435 473
474void __init chrp_init_IRQ(void)
475{
476#if defined(CONFIG_VT) && defined(CONFIG_INPUT_ADBHID) && defined(XMON)
477 struct device_node *kbd;
478#endif
436 chrp_find_openpic(); 479 chrp_find_openpic();
437 480 chrp_find_8259();
438 i8259_init(chrp_int_ack, 0);
439 481
440 if (_chrp_type == _CHRP_Pegasos) 482 if (_chrp_type == _CHRP_Pegasos)
441 ppc_md.get_irq = i8259_irq; 483 ppc_md.get_irq = i8259_irq;
@@ -520,10 +562,6 @@ static int __init chrp_probe(void)
520 DMA_MODE_READ = 0x44; 562 DMA_MODE_READ = 0x44;
521 DMA_MODE_WRITE = 0x48; 563 DMA_MODE_WRITE = 0x48;
522 isa_io_base = CHRP_ISA_IO_BASE; /* default value */ 564 isa_io_base = CHRP_ISA_IO_BASE; /* default value */
523 ppc_do_canonicalize_irqs = 1;
524
525 /* Assume we have an 8259... */
526 __irq_offset_value = NUM_ISA_INTERRUPTS;
527 565
528 return 1; 566 return 1;
529} 567}
@@ -535,7 +573,6 @@ define_machine(chrp) {
535 .init = chrp_init2, 573 .init = chrp_init2,
536 .show_cpuinfo = chrp_show_cpuinfo, 574 .show_cpuinfo = chrp_show_cpuinfo,
537 .init_IRQ = chrp_init_IRQ, 575 .init_IRQ = chrp_init_IRQ,
538 .get_irq = mpic_get_irq,
539 .pcibios_fixup = chrp_pcibios_fixup, 576 .pcibios_fixup = chrp_pcibios_fixup,
540 .restart = rtas_restart, 577 .restart = rtas_restart,
541 .power_off = rtas_power_off, 578 .power_off = rtas_power_off,
diff --git a/arch/powerpc/platforms/chrp/smp.c b/arch/powerpc/platforms/chrp/smp.c
index c298ca1ea680..1d2307e87c30 100644
--- a/arch/powerpc/platforms/chrp/smp.c
+++ b/arch/powerpc/platforms/chrp/smp.c
@@ -29,7 +29,6 @@
29#include <asm/smp.h> 29#include <asm/smp.h>
30#include <asm/residual.h> 30#include <asm/residual.h>
31#include <asm/time.h> 31#include <asm/time.h>
32#include <asm/open_pic.h>
33#include <asm/machdep.h> 32#include <asm/machdep.h>
34#include <asm/smp.h> 33#include <asm/smp.h>
35#include <asm/mpic.h> 34#include <asm/mpic.h>
diff --git a/arch/powerpc/platforms/iseries/irq.c b/arch/powerpc/platforms/iseries/irq.c
index f70e820e7304..2275e64f3152 100644
--- a/arch/powerpc/platforms/iseries/irq.c
+++ b/arch/powerpc/platforms/iseries/irq.c
@@ -162,27 +162,6 @@ static void pci_event_handler(struct HvLpEvent *event, struct pt_regs *regs)
162 printk(KERN_ERR "pci_event_handler: NULL event received\n"); 162 printk(KERN_ERR "pci_event_handler: NULL event received\n");
163} 163}
164 164
165/*
166 * This is called by init_IRQ. set in ppc_md.init_IRQ by iSeries_setup.c
167 * It must be called before the bus walk.
168 */
169void __init iSeries_init_IRQ(void)
170{
171 /* Register PCI event handler and open an event path */
172 int ret;
173
174 ret = HvLpEvent_registerHandler(HvLpEvent_Type_PciIo,
175 &pci_event_handler);
176 if (ret == 0) {
177 ret = HvLpEvent_openPath(HvLpEvent_Type_PciIo, 0);
178 if (ret != 0)
179 printk(KERN_ERR "iseries_init_IRQ: open event path "
180 "failed with rc 0x%x\n", ret);
181 } else
182 printk(KERN_ERR "iseries_init_IRQ: register handler "
183 "failed with rc 0x%x\n", ret);
184}
185
186#define REAL_IRQ_TO_SUBBUS(irq) (((irq) >> 14) & 0xff) 165#define REAL_IRQ_TO_SUBBUS(irq) (((irq) >> 14) & 0xff)
187#define REAL_IRQ_TO_BUS(irq) ((((irq) >> 6) & 0xff) + 1) 166#define REAL_IRQ_TO_BUS(irq) ((((irq) >> 6) & 0xff) + 1)
188#define REAL_IRQ_TO_IDSEL(irq) ((((irq) >> 3) & 7) + 1) 167#define REAL_IRQ_TO_IDSEL(irq) ((((irq) >> 3) & 7) + 1)
@@ -196,7 +175,7 @@ static void iseries_enable_IRQ(unsigned int irq)
196{ 175{
197 u32 bus, dev_id, function, mask; 176 u32 bus, dev_id, function, mask;
198 const u32 sub_bus = 0; 177 const u32 sub_bus = 0;
199 unsigned int rirq = virt_irq_to_real_map[irq]; 178 unsigned int rirq = (unsigned int)irq_map[irq].hwirq;
200 179
201 /* The IRQ has already been locked by the caller */ 180 /* The IRQ has already been locked by the caller */
202 bus = REAL_IRQ_TO_BUS(rirq); 181 bus = REAL_IRQ_TO_BUS(rirq);
@@ -213,7 +192,7 @@ static unsigned int iseries_startup_IRQ(unsigned int irq)
213{ 192{
214 u32 bus, dev_id, function, mask; 193 u32 bus, dev_id, function, mask;
215 const u32 sub_bus = 0; 194 const u32 sub_bus = 0;
216 unsigned int rirq = virt_irq_to_real_map[irq]; 195 unsigned int rirq = (unsigned int)irq_map[irq].hwirq;
217 196
218 bus = REAL_IRQ_TO_BUS(rirq); 197 bus = REAL_IRQ_TO_BUS(rirq);
219 function = REAL_IRQ_TO_FUNC(rirq); 198 function = REAL_IRQ_TO_FUNC(rirq);
@@ -254,7 +233,7 @@ static void iseries_shutdown_IRQ(unsigned int irq)
254{ 233{
255 u32 bus, dev_id, function, mask; 234 u32 bus, dev_id, function, mask;
256 const u32 sub_bus = 0; 235 const u32 sub_bus = 0;
257 unsigned int rirq = virt_irq_to_real_map[irq]; 236 unsigned int rirq = (unsigned int)irq_map[irq].hwirq;
258 237
259 /* irq should be locked by the caller */ 238 /* irq should be locked by the caller */
260 bus = REAL_IRQ_TO_BUS(rirq); 239 bus = REAL_IRQ_TO_BUS(rirq);
@@ -277,7 +256,7 @@ static void iseries_disable_IRQ(unsigned int irq)
277{ 256{
278 u32 bus, dev_id, function, mask; 257 u32 bus, dev_id, function, mask;
279 const u32 sub_bus = 0; 258 const u32 sub_bus = 0;
280 unsigned int rirq = virt_irq_to_real_map[irq]; 259 unsigned int rirq = (unsigned int)irq_map[irq].hwirq;
281 260
282 /* The IRQ has already been locked by the caller */ 261 /* The IRQ has already been locked by the caller */
283 bus = REAL_IRQ_TO_BUS(rirq); 262 bus = REAL_IRQ_TO_BUS(rirq);
@@ -291,19 +270,19 @@ static void iseries_disable_IRQ(unsigned int irq)
291 270
292static void iseries_end_IRQ(unsigned int irq) 271static void iseries_end_IRQ(unsigned int irq)
293{ 272{
294 unsigned int rirq = virt_irq_to_real_map[irq]; 273 unsigned int rirq = (unsigned int)irq_map[irq].hwirq;
295 274
296 HvCallPci_eoi(REAL_IRQ_TO_BUS(rirq), REAL_IRQ_TO_SUBBUS(rirq), 275 HvCallPci_eoi(REAL_IRQ_TO_BUS(rirq), REAL_IRQ_TO_SUBBUS(rirq),
297 (REAL_IRQ_TO_IDSEL(rirq) << 4) + REAL_IRQ_TO_FUNC(rirq)); 276 (REAL_IRQ_TO_IDSEL(rirq) << 4) + REAL_IRQ_TO_FUNC(rirq));
298} 277}
299 278
300static hw_irq_controller iSeries_IRQ_handler = { 279static struct irq_chip iseries_pic = {
301 .typename = "iSeries irq controller", 280 .typename = "iSeries irq controller",
302 .startup = iseries_startup_IRQ, 281 .startup = iseries_startup_IRQ,
303 .shutdown = iseries_shutdown_IRQ, 282 .shutdown = iseries_shutdown_IRQ,
304 .enable = iseries_enable_IRQ, 283 .unmask = iseries_enable_IRQ,
305 .disable = iseries_disable_IRQ, 284 .mask = iseries_disable_IRQ,
306 .end = iseries_end_IRQ 285 .eoi = iseries_end_IRQ
307}; 286};
308 287
309/* 288/*
@@ -314,17 +293,14 @@ static hw_irq_controller iSeries_IRQ_handler = {
314int __init iSeries_allocate_IRQ(HvBusNumber bus, 293int __init iSeries_allocate_IRQ(HvBusNumber bus,
315 HvSubBusNumber sub_bus, u32 bsubbus) 294 HvSubBusNumber sub_bus, u32 bsubbus)
316{ 295{
317 int virtirq;
318 unsigned int realirq; 296 unsigned int realirq;
319 u8 idsel = ISERIES_GET_DEVICE_FROM_SUBBUS(bsubbus); 297 u8 idsel = ISERIES_GET_DEVICE_FROM_SUBBUS(bsubbus);
320 u8 function = ISERIES_GET_FUNCTION_FROM_SUBBUS(bsubbus); 298 u8 function = ISERIES_GET_FUNCTION_FROM_SUBBUS(bsubbus);
321 299
322 realirq = (((((sub_bus << 8) + (bus - 1)) << 3) + (idsel - 1)) << 3) 300 realirq = (((((sub_bus << 8) + (bus - 1)) << 3) + (idsel - 1)) << 3)
323 + function; 301 + function;
324 virtirq = virt_irq_create_mapping(realirq);
325 302
326 irq_desc[virtirq].chip = &iSeries_IRQ_handler; 303 return irq_create_mapping(NULL, realirq, IRQ_TYPE_NONE);
327 return virtirq;
328} 304}
329 305
330#endif /* CONFIG_PCI */ 306#endif /* CONFIG_PCI */
@@ -332,10 +308,9 @@ int __init iSeries_allocate_IRQ(HvBusNumber bus,
332/* 308/*
333 * Get the next pending IRQ. 309 * Get the next pending IRQ.
334 */ 310 */
335int iSeries_get_irq(struct pt_regs *regs) 311unsigned int iSeries_get_irq(struct pt_regs *regs)
336{ 312{
337 /* -2 means ignore this interrupt */ 313 int irq = NO_IRQ_IGNORE;
338 int irq = -2;
339 314
340#ifdef CONFIG_SMP 315#ifdef CONFIG_SMP
341 if (get_lppaca()->int_dword.fields.ipi_cnt) { 316 if (get_lppaca()->int_dword.fields.ipi_cnt) {
@@ -358,9 +333,57 @@ int iSeries_get_irq(struct pt_regs *regs)
358 } 333 }
359 spin_unlock(&pending_irqs_lock); 334 spin_unlock(&pending_irqs_lock);
360 if (irq >= NR_IRQS) 335 if (irq >= NR_IRQS)
361 irq = -2; 336 irq = NO_IRQ_IGNORE;
362 } 337 }
363#endif 338#endif
364 339
365 return irq; 340 return irq;
366} 341}
342
343static int iseries_irq_host_map(struct irq_host *h, unsigned int virq,
344 irq_hw_number_t hw, unsigned int flags)
345{
346 set_irq_chip_and_handler(virq, &iseries_pic, handle_fasteoi_irq);
347
348 return 0;
349}
350
351static struct irq_host_ops iseries_irq_host_ops = {
352 .map = iseries_irq_host_map,
353};
354
355/*
356 * This is called by init_IRQ. set in ppc_md.init_IRQ by iSeries_setup.c
357 * It must be called before the bus walk.
358 */
359void __init iSeries_init_IRQ(void)
360{
361 /* Register PCI event handler and open an event path */
362 struct irq_host *host;
363 int ret;
364
365 /*
366 * The Hypervisor only allows us up to 256 interrupt
367 * sources (the irq number is passed in a u8).
368 */
369 irq_set_virq_count(256);
370
371 /* Create irq host. No need for a revmap since HV will give us
372 * back our virtual irq number
373 */
374 host = irq_alloc_host(IRQ_HOST_MAP_NOMAP, 0, &iseries_irq_host_ops, 0);
375 BUG_ON(host == NULL);
376 irq_set_default_host(host);
377
378 ret = HvLpEvent_registerHandler(HvLpEvent_Type_PciIo,
379 &pci_event_handler);
380 if (ret == 0) {
381 ret = HvLpEvent_openPath(HvLpEvent_Type_PciIo, 0);
382 if (ret != 0)
383 printk(KERN_ERR "iseries_init_IRQ: open event path "
384 "failed with rc 0x%x\n", ret);
385 } else
386 printk(KERN_ERR "iseries_init_IRQ: register handler "
387 "failed with rc 0x%x\n", ret);
388}
389
diff --git a/arch/powerpc/platforms/iseries/irq.h b/arch/powerpc/platforms/iseries/irq.h
index 188aa808abd7..1ee8985140e5 100644
--- a/arch/powerpc/platforms/iseries/irq.h
+++ b/arch/powerpc/platforms/iseries/irq.h
@@ -4,6 +4,6 @@
4extern void iSeries_init_IRQ(void); 4extern void iSeries_init_IRQ(void);
5extern int iSeries_allocate_IRQ(HvBusNumber, HvSubBusNumber, u32); 5extern int iSeries_allocate_IRQ(HvBusNumber, HvSubBusNumber, u32);
6extern void iSeries_activate_IRQs(void); 6extern void iSeries_activate_IRQs(void);
7extern int iSeries_get_irq(struct pt_regs *); 7extern unsigned int iSeries_get_irq(struct pt_regs *);
8 8
9#endif /* _ISERIES_IRQ_H */ 9#endif /* _ISERIES_IRQ_H */
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index c877074745b2..c9605d773a77 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -294,8 +294,6 @@ static void __init iSeries_init_early(void)
294{ 294{
295 DBG(" -> iSeries_init_early()\n"); 295 DBG(" -> iSeries_init_early()\n");
296 296
297 ppc64_interrupt_controller = IC_ISERIES;
298
299#if defined(CONFIG_BLK_DEV_INITRD) 297#if defined(CONFIG_BLK_DEV_INITRD)
300 /* 298 /*
301 * If the init RAM disk has been configured and there is 299 * If the init RAM disk has been configured and there is
@@ -659,12 +657,6 @@ static int __init iseries_probe(void)
659 powerpc_firmware_features |= FW_FEATURE_ISERIES; 657 powerpc_firmware_features |= FW_FEATURE_ISERIES;
660 powerpc_firmware_features |= FW_FEATURE_LPAR; 658 powerpc_firmware_features |= FW_FEATURE_LPAR;
661 659
662 /*
663 * The Hypervisor only allows us up to 256 interrupt
664 * sources (the irq number is passed in a u8).
665 */
666 virt_irq_max = 255;
667
668 hpte_init_iSeries(); 660 hpte_init_iSeries();
669 661
670 return 1; 662 return 1;
diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c
index f7170ff86dab..63a1670d3bfd 100644
--- a/arch/powerpc/platforms/maple/pci.c
+++ b/arch/powerpc/platforms/maple/pci.c
@@ -443,18 +443,23 @@ void __init maple_pci_init(void)
443int maple_pci_get_legacy_ide_irq(struct pci_dev *pdev, int channel) 443int maple_pci_get_legacy_ide_irq(struct pci_dev *pdev, int channel)
444{ 444{
445 struct device_node *np; 445 struct device_node *np;
446 int irq = channel ? 15 : 14; 446 unsigned int defirq = channel ? 15 : 14;
447 unsigned int irq;
447 448
448 if (pdev->vendor != PCI_VENDOR_ID_AMD || 449 if (pdev->vendor != PCI_VENDOR_ID_AMD ||
449 pdev->device != PCI_DEVICE_ID_AMD_8111_IDE) 450 pdev->device != PCI_DEVICE_ID_AMD_8111_IDE)
450 return irq; 451 return defirq;
451 452
452 np = pci_device_to_OF_node(pdev); 453 np = pci_device_to_OF_node(pdev);
453 if (np == NULL) 454 if (np == NULL)
454 return irq; 455 return defirq;
455 if (np->n_intrs < 2) 456 irq = irq_of_parse_and_map(np, channel & 0x1);
456 return irq; 457 if (irq == NO_IRQ) {
457 return np->intrs[channel & 0x1].line; 458 printk("Failed to map onboard IDE interrupt for channel %d\n",
459 channel);
460 return defirq;
461 }
462 return irq;
458} 463}
459 464
460/* XXX: To remove once all firmwares are ok */ 465/* XXX: To remove once all firmwares are ok */
diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c
index 5cf90c28b141..cb528c9de4c3 100644
--- a/arch/powerpc/platforms/maple/setup.c
+++ b/arch/powerpc/platforms/maple/setup.c
@@ -11,7 +11,7 @@
11 * 11 *
12 */ 12 */
13 13
14#define DEBUG 14#undef DEBUG
15 15
16#include <linux/init.h> 16#include <linux/init.h>
17#include <linux/errno.h> 17#include <linux/errno.h>
@@ -198,50 +198,81 @@ static void __init maple_init_early(void)
198{ 198{
199 DBG(" -> maple_init_early\n"); 199 DBG(" -> maple_init_early\n");
200 200
201 /* Setup interrupt mapping options */
202 ppc64_interrupt_controller = IC_OPEN_PIC;
203
204 iommu_init_early_dart(); 201 iommu_init_early_dart();
205 202
206 DBG(" <- maple_init_early\n"); 203 DBG(" <- maple_init_early\n");
207} 204}
208 205
209 206/*
210static __init void maple_init_IRQ(void) 207 * This is almost identical to pSeries and CHRP. We need to make that
208 * code generic at one point, with appropriate bits in the device-tree to
209 * identify the presence of an HT APIC
210 */
211static void __init maple_init_IRQ(void)
211{ 212{
212 struct device_node *root; 213 struct device_node *root, *np, *mpic_node = NULL;
213 unsigned int *opprop; 214 unsigned int *opprop;
214 unsigned long opic_addr; 215 unsigned long openpic_addr = 0;
216 int naddr, n, i, opplen, has_isus = 0;
215 struct mpic *mpic; 217 struct mpic *mpic;
216 unsigned char senses[128]; 218 unsigned int flags = MPIC_PRIMARY;
217 int n;
218 219
219 DBG(" -> maple_init_IRQ\n"); 220 /* Locate MPIC in the device-tree. Note that there is a bug
221 * in Maple device-tree where the type of the controller is
222 * open-pic and not interrupt-controller
223 */
224 for_each_node_by_type(np, "open-pic") {
225 mpic_node = np;
226 break;
227 }
228 if (mpic_node == NULL) {
229 printk(KERN_ERR
230 "Failed to locate the MPIC interrupt controller\n");
231 return;
232 }
220 233
221 /* XXX: Non standard, replace that with a proper openpic/mpic node 234 /* Find address list in /platform-open-pic */
222 * in the device-tree. Find the Open PIC if present */
223 root = of_find_node_by_path("/"); 235 root = of_find_node_by_path("/");
224 opprop = (unsigned int *) get_property(root, 236 naddr = prom_n_addr_cells(root);
225 "platform-open-pic", NULL); 237 opprop = (unsigned int *) get_property(root, "platform-open-pic",
226 if (opprop == 0) 238 &opplen);
227 panic("OpenPIC not found !\n"); 239 if (opprop != 0) {
228 240 openpic_addr = of_read_number(opprop, naddr);
229 n = prom_n_addr_cells(root); 241 has_isus = (opplen > naddr);
230 for (opic_addr = 0; n > 0; --n) 242 printk(KERN_DEBUG "OpenPIC addr: %lx, has ISUs: %d\n",
231 opic_addr = (opic_addr << 32) + *opprop++; 243 openpic_addr, has_isus);
244 }
232 of_node_put(root); 245 of_node_put(root);
233 246
234 /* Obtain sense values from device-tree */ 247 BUG_ON(openpic_addr == 0);
235 prom_get_irq_senses(senses, 0, 128); 248
249 /* Check for a big endian MPIC */
250 if (get_property(np, "big-endian", NULL) != NULL)
251 flags |= MPIC_BIG_ENDIAN;
236 252
237 mpic = mpic_alloc(opic_addr, 253 /* XXX Maple specific bits */
238 MPIC_PRIMARY | MPIC_BIG_ENDIAN | 254 flags |= MPIC_BROKEN_U3 | MPIC_WANTS_RESET;
239 MPIC_BROKEN_U3 | MPIC_WANTS_RESET, 255
240 0, 0, 128, 128, senses, 128, "U3-MPIC"); 256 /* Setup the openpic driver. More device-tree junks, we hard code no
257 * ISUs for now. I'll have to revisit some stuffs with the folks doing
258 * the firmware for those
259 */
260 mpic = mpic_alloc(mpic_node, openpic_addr, flags,
261 /*has_isus ? 16 :*/ 0, 0, " MPIC ");
241 BUG_ON(mpic == NULL); 262 BUG_ON(mpic == NULL);
242 mpic_init(mpic);
243 263
244 DBG(" <- maple_init_IRQ\n"); 264 /* Add ISUs */
265 opplen /= sizeof(u32);
266 for (n = 0, i = naddr; i < opplen; i += naddr, n++) {
267 unsigned long isuaddr = of_read_number(opprop + i, naddr);
268 mpic_assign_isu(mpic, n, isuaddr);
269 }
270
271 /* All ISUs are setup, complete initialization */
272 mpic_init(mpic);
273 ppc_md.get_irq = mpic_get_irq;
274 of_node_put(mpic_node);
275 of_node_put(root);
245} 276}
246 277
247static void __init maple_progress(char *s, unsigned short hex) 278static void __init maple_progress(char *s, unsigned short hex)
@@ -256,7 +287,9 @@ static void __init maple_progress(char *s, unsigned short hex)
256static int __init maple_probe(void) 287static int __init maple_probe(void)
257{ 288{
258 unsigned long root = of_get_flat_dt_root(); 289 unsigned long root = of_get_flat_dt_root();
259 if (!of_flat_dt_is_compatible(root, "Momentum,Maple")) 290
291 if (!of_flat_dt_is_compatible(root, "Momentum,Maple") &&
292 !of_flat_dt_is_compatible(root, "Momentum,Apache"))
260 return 0; 293 return 0;
261 /* 294 /*
262 * On U3, the DART (iommu) must be allocated now since it 295 * On U3, the DART (iommu) must be allocated now since it
@@ -277,7 +310,6 @@ define_machine(maple_md) {
277 .setup_arch = maple_setup_arch, 310 .setup_arch = maple_setup_arch,
278 .init_early = maple_init_early, 311 .init_early = maple_init_early,
279 .init_IRQ = maple_init_IRQ, 312 .init_IRQ = maple_init_IRQ,
280 .get_irq = mpic_get_irq,
281 .pcibios_fixup = maple_pcibios_fixup, 313 .pcibios_fixup = maple_pcibios_fixup,
282 .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq, 314 .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq,
283 .restart = maple_restart, 315 .restart = maple_restart,
diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
index 24f09e2a5775..871b002c9f90 100644
--- a/arch/powerpc/platforms/powermac/bootx_init.c
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -162,6 +162,8 @@ static void __init bootx_add_chosen_props(unsigned long base,
162{ 162{
163 u32 val; 163 u32 val;
164 164
165 bootx_dt_add_prop("linux,bootx", NULL, 0, mem_end);
166
165 if (bootx_info->kernelParamsOffset) { 167 if (bootx_info->kernelParamsOffset) {
166 char *args = (char *)((unsigned long)bootx_info) + 168 char *args = (char *)((unsigned long)bootx_info) +
167 bootx_info->kernelParamsOffset; 169 bootx_info->kernelParamsOffset;
@@ -181,8 +183,25 @@ static void __init bootx_add_chosen_props(unsigned long base,
181static void __init bootx_add_display_props(unsigned long base, 183static void __init bootx_add_display_props(unsigned long base,
182 unsigned long *mem_end) 184 unsigned long *mem_end)
183{ 185{
186 boot_infos_t *bi = bootx_info;
187 u32 tmp;
188
184 bootx_dt_add_prop("linux,boot-display", NULL, 0, mem_end); 189 bootx_dt_add_prop("linux,boot-display", NULL, 0, mem_end);
185 bootx_dt_add_prop("linux,opened", NULL, 0, mem_end); 190 bootx_dt_add_prop("linux,opened", NULL, 0, mem_end);
191 tmp = bi->dispDeviceDepth;
192 bootx_dt_add_prop("linux,bootx-depth", &tmp, 4, mem_end);
193 tmp = bi->dispDeviceRect[2] - bi->dispDeviceRect[0];
194 bootx_dt_add_prop("linux,bootx-width", &tmp, 4, mem_end);
195 tmp = bi->dispDeviceRect[3] - bi->dispDeviceRect[1];
196 bootx_dt_add_prop("linux,bootx-height", &tmp, 4, mem_end);
197 tmp = bi->dispDeviceRowBytes;
198 bootx_dt_add_prop("linux,bootx-linebytes", &tmp, 4, mem_end);
199 tmp = (u32)bi->dispDeviceBase;
200 if (tmp == 0)
201 tmp = (u32)bi->logicalDisplayBase;
202 tmp += bi->dispDeviceRect[1] * bi->dispDeviceRowBytes;
203 tmp += bi->dispDeviceRect[0] * ((bi->dispDeviceDepth + 7) / 8);
204 bootx_dt_add_prop("linux,bootx-addr", &tmp, 4, mem_end);
186} 205}
187 206
188static void __init bootx_dt_add_string(char *s, unsigned long *mem_end) 207static void __init bootx_dt_add_string(char *s, unsigned long *mem_end)
@@ -211,7 +230,7 @@ static void __init bootx_scan_dt_build_strings(unsigned long base,
211 230
212 if (!strcmp(namep, "/chosen")) { 231 if (!strcmp(namep, "/chosen")) {
213 DBG(" detected /chosen ! adding properties names !\n"); 232 DBG(" detected /chosen ! adding properties names !\n");
214 bootx_dt_add_string("linux,platform", mem_end); 233 bootx_dt_add_string("linux,bootx", mem_end);
215 bootx_dt_add_string("linux,stdout-path", mem_end); 234 bootx_dt_add_string("linux,stdout-path", mem_end);
216 bootx_dt_add_string("linux,initrd-start", mem_end); 235 bootx_dt_add_string("linux,initrd-start", mem_end);
217 bootx_dt_add_string("linux,initrd-end", mem_end); 236 bootx_dt_add_string("linux,initrd-end", mem_end);
@@ -222,6 +241,11 @@ static void __init bootx_scan_dt_build_strings(unsigned long base,
222 DBG(" detected display ! adding properties names !\n"); 241 DBG(" detected display ! adding properties names !\n");
223 bootx_dt_add_string("linux,boot-display", mem_end); 242 bootx_dt_add_string("linux,boot-display", mem_end);
224 bootx_dt_add_string("linux,opened", mem_end); 243 bootx_dt_add_string("linux,opened", mem_end);
244 bootx_dt_add_string("linux,bootx-depth", mem_end);
245 bootx_dt_add_string("linux,bootx-width", mem_end);
246 bootx_dt_add_string("linux,bootx-height", mem_end);
247 bootx_dt_add_string("linux,bootx-linebytes", mem_end);
248 bootx_dt_add_string("linux,bootx-addr", mem_end);
225 strncpy(bootx_disp_path, namep, 255); 249 strncpy(bootx_disp_path, namep, 255);
226 } 250 }
227 251
@@ -443,7 +467,14 @@ void __init bootx_init(unsigned long r3, unsigned long r4)
443 if (!BOOT_INFO_IS_V2_COMPATIBLE(bi)) 467 if (!BOOT_INFO_IS_V2_COMPATIBLE(bi))
444 bi->logicalDisplayBase = bi->dispDeviceBase; 468 bi->logicalDisplayBase = bi->dispDeviceBase;
445 469
470 /* Fixup depth 16 -> 15 as that's what MacOS calls 16bpp */
471 if (bi->dispDeviceDepth == 16)
472 bi->dispDeviceDepth = 15;
473
446#ifdef CONFIG_BOOTX_TEXT 474#ifdef CONFIG_BOOTX_TEXT
475 ptr = (unsigned long)bi->logicalDisplayBase;
476 ptr += bi->dispDeviceRect[1] * bi->dispDeviceRowBytes;
477 ptr += bi->dispDeviceRect[0] * ((bi->dispDeviceDepth + 7) / 8);
447 btext_setup_display(bi->dispDeviceRect[2] - bi->dispDeviceRect[0], 478 btext_setup_display(bi->dispDeviceRect[2] - bi->dispDeviceRect[0],
448 bi->dispDeviceRect[3] - bi->dispDeviceRect[1], 479 bi->dispDeviceRect[3] - bi->dispDeviceRect[1],
449 bi->dispDeviceDepth, bi->dispDeviceRowBytes, 480 bi->dispDeviceDepth, bi->dispDeviceRowBytes,
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
index ceafaf52a668..8677f50c2586 100644
--- a/arch/powerpc/platforms/powermac/low_i2c.c
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -522,10 +522,11 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
522 host->speed = KW_I2C_MODE_25KHZ; 522 host->speed = KW_I2C_MODE_25KHZ;
523 break; 523 break;
524 } 524 }
525 if (np->n_intrs > 0) 525 host->irq = irq_of_parse_and_map(np, 0);
526 host->irq = np->intrs[0].line; 526 if (host->irq == NO_IRQ)
527 else 527 printk(KERN_WARNING
528 host->irq = NO_IRQ; 528 "low_i2c: Failed to map interrupt for %s\n",
529 np->full_name);
529 530
530 host->base = ioremap((*addrp), 0x1000); 531 host->base = ioremap((*addrp), 0x1000);
531 if (host->base == NULL) { 532 if (host->base == NULL) {
diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c
index 41fa2409482a..6a36ea9bf673 100644
--- a/arch/powerpc/platforms/powermac/nvram.c
+++ b/arch/powerpc/platforms/powermac/nvram.c
@@ -29,6 +29,8 @@
29#include <asm/machdep.h> 29#include <asm/machdep.h>
30#include <asm/nvram.h> 30#include <asm/nvram.h>
31 31
32#include "pmac.h"
33
32#define DEBUG 34#define DEBUG
33 35
34#ifdef DEBUG 36#ifdef DEBUG
@@ -80,9 +82,6 @@ static int nvram_partitions[3];
80// XXX Turn that into a sem 82// XXX Turn that into a sem
81static DEFINE_SPINLOCK(nv_lock); 83static DEFINE_SPINLOCK(nv_lock);
82 84
83extern int pmac_newworld;
84extern int system_running;
85
86static int (*core99_write_bank)(int bank, u8* datas); 85static int (*core99_write_bank)(int bank, u8* datas);
87static int (*core99_erase_bank)(int bank); 86static int (*core99_erase_bank)(int bank);
88 87
diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
index d524a915aa86..556b349797e8 100644
--- a/arch/powerpc/platforms/powermac/pci.c
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -46,6 +46,9 @@ static int has_uninorth;
46static struct pci_controller *u3_agp; 46static struct pci_controller *u3_agp;
47static struct pci_controller *u4_pcie; 47static struct pci_controller *u4_pcie;
48static struct pci_controller *u3_ht; 48static struct pci_controller *u3_ht;
49#define has_second_ohare 0
50#else
51static int has_second_ohare;
49#endif /* CONFIG_PPC64 */ 52#endif /* CONFIG_PPC64 */
50 53
51extern u8 pci_cache_line_size; 54extern u8 pci_cache_line_size;
@@ -647,6 +650,33 @@ static void __init init_p2pbridge(void)
647 early_write_config_word(hose, bus, devfn, PCI_BRIDGE_CONTROL, val); 650 early_write_config_word(hose, bus, devfn, PCI_BRIDGE_CONTROL, val);
648} 651}
649 652
653static void __init init_second_ohare(void)
654{
655 struct device_node *np = of_find_node_by_name(NULL, "pci106b,7");
656 unsigned char bus, devfn;
657 unsigned short cmd;
658
659 if (np == NULL)
660 return;
661
662 /* This must run before we initialize the PICs since the second
663 * ohare hosts a PIC that will be accessed there.
664 */
665 if (pci_device_from_OF_node(np, &bus, &devfn) == 0) {
666 struct pci_controller* hose =
667 pci_find_hose_for_OF_device(np);
668 if (!hose) {
669 printk(KERN_ERR "Can't find PCI hose for OHare2 !\n");
670 return;
671 }
672 early_read_config_word(hose, bus, devfn, PCI_COMMAND, &cmd);
673 cmd |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
674 cmd &= ~PCI_COMMAND_IO;
675 early_write_config_word(hose, bus, devfn, PCI_COMMAND, cmd);
676 }
677 has_second_ohare = 1;
678}
679
650/* 680/*
651 * Some Apple desktop machines have a NEC PD720100A USB2 controller 681 * Some Apple desktop machines have a NEC PD720100A USB2 controller
652 * on the motherboard. Open Firmware, on these, will disable the 682 * on the motherboard. Open Firmware, on these, will disable the
@@ -688,9 +718,6 @@ static void __init fixup_nec_usb2(void)
688 " EHCI, fixing up...\n"); 718 " EHCI, fixing up...\n");
689 data &= ~1UL; 719 data &= ~1UL;
690 early_write_config_dword(hose, bus, devfn, 0xe4, data); 720 early_write_config_dword(hose, bus, devfn, 0xe4, data);
691 early_write_config_byte(hose, bus,
692 devfn | 2, PCI_INTERRUPT_LINE,
693 nec->intrs[0].line);
694 } 721 }
695 } 722 }
696} 723}
@@ -958,32 +985,28 @@ static int __init add_bridge(struct device_node *dev)
958 return 0; 985 return 0;
959} 986}
960 987
961static void __init pcibios_fixup_OF_interrupts(void) 988void __init pmac_pcibios_fixup(void)
962{ 989{
963 struct pci_dev* dev = NULL; 990 struct pci_dev* dev = NULL;
964 991
965 /*
966 * Open Firmware often doesn't initialize the
967 * PCI_INTERRUPT_LINE config register properly, so we
968 * should find the device node and apply the interrupt
969 * obtained from the OF device-tree
970 */
971 for_each_pci_dev(dev) { 992 for_each_pci_dev(dev) {
972 struct device_node *node; 993 /* Read interrupt from the device-tree */
973 node = pci_device_to_OF_node(dev); 994 pci_read_irq_line(dev);
974 /* this is the node, see if it has interrupts */ 995
975 if (node && node->n_intrs > 0) 996 /* Fixup interrupt for the modem/ethernet combo controller.
976 dev->irq = node->intrs[0].line; 997 * on machines with a second ohare chip.
977 pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq); 998 * The number in the device tree (27) is bogus (correct for
999 * the ethernet-only board but not the combo ethernet/modem
1000 * board). The real interrupt is 28 on the second controller
1001 * -> 28+32 = 60.
1002 */
1003 if (has_second_ohare &&
1004 dev->vendor == PCI_VENDOR_ID_DEC &&
1005 dev->device == PCI_DEVICE_ID_DEC_TULIP_PLUS)
1006 dev->irq = irq_create_mapping(NULL, 60, 0);
978 } 1007 }
979} 1008}
980 1009
981void __init pmac_pcibios_fixup(void)
982{
983 /* Fixup interrupts according to OF tree */
984 pcibios_fixup_OF_interrupts();
985}
986
987#ifdef CONFIG_PPC64 1010#ifdef CONFIG_PPC64
988static void __init pmac_fixup_phb_resources(void) 1011static void __init pmac_fixup_phb_resources(void)
989{ 1012{
@@ -1071,6 +1094,7 @@ void __init pmac_pci_init(void)
1071 1094
1072#else /* CONFIG_PPC64 */ 1095#else /* CONFIG_PPC64 */
1073 init_p2pbridge(); 1096 init_p2pbridge();
1097 init_second_ohare();
1074 fixup_nec_usb2(); 1098 fixup_nec_usb2();
1075 1099
1076 /* We are still having some issues with the Xserve G4, enabling 1100 /* We are still having some issues with the Xserve G4, enabling
diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c
index d6eab8b3f7de..6d66359ec8c8 100644
--- a/arch/powerpc/platforms/powermac/pfunc_base.c
+++ b/arch/powerpc/platforms/powermac/pfunc_base.c
@@ -24,19 +24,18 @@ static irqreturn_t macio_gpio_irq(int irq, void *data, struct pt_regs *regs)
24 24
25static int macio_do_gpio_irq_enable(struct pmf_function *func) 25static int macio_do_gpio_irq_enable(struct pmf_function *func)
26{ 26{
27 if (func->node->n_intrs < 1) 27 unsigned int irq = irq_of_parse_and_map(func->node, 0);
28 if (irq == NO_IRQ)
28 return -EINVAL; 29 return -EINVAL;
29 30 return request_irq(irq, macio_gpio_irq, 0, func->node->name, func);
30 return request_irq(func->node->intrs[0].line, macio_gpio_irq, 0,
31 func->node->name, func);
32} 31}
33 32
34static int macio_do_gpio_irq_disable(struct pmf_function *func) 33static int macio_do_gpio_irq_disable(struct pmf_function *func)
35{ 34{
36 if (func->node->n_intrs < 1) 35 unsigned int irq = irq_of_parse_and_map(func->node, 0);
36 if (irq == NO_IRQ)
37 return -EINVAL; 37 return -EINVAL;
38 38 free_irq(irq, func);
39 free_irq(func->node->intrs[0].line, func);
40 return 0; 39 return 0;
41} 40}
42 41
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index c9b09a9e6050..3d328bc1f7e0 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -65,39 +65,36 @@ static u32 level_mask[4];
65 65
66static DEFINE_SPINLOCK(pmac_pic_lock); 66static DEFINE_SPINLOCK(pmac_pic_lock);
67 67
68#define GATWICK_IRQ_POOL_SIZE 10
69static struct interrupt_info gatwick_int_pool[GATWICK_IRQ_POOL_SIZE];
70
71#define NR_MASK_WORDS ((NR_IRQS + 31) / 32) 68#define NR_MASK_WORDS ((NR_IRQS + 31) / 32)
72static unsigned long ppc_lost_interrupts[NR_MASK_WORDS]; 69static unsigned long ppc_lost_interrupts[NR_MASK_WORDS];
70static unsigned long ppc_cached_irq_mask[NR_MASK_WORDS];
71static int pmac_irq_cascade = -1;
72static struct irq_host *pmac_pic_host;
73 73
74/* 74static void __pmac_retrigger(unsigned int irq_nr)
75 * Mark an irq as "lost". This is only used on the pmac
76 * since it can lose interrupts (see pmac_set_irq_mask).
77 * -- Cort
78 */
79void __set_lost(unsigned long irq_nr, int nokick)
80{ 75{
81 if (!test_and_set_bit(irq_nr, ppc_lost_interrupts)) { 76 if (irq_nr >= max_real_irqs && pmac_irq_cascade > 0) {
77 __set_bit(irq_nr, ppc_lost_interrupts);
78 irq_nr = pmac_irq_cascade;
79 mb();
80 }
81 if (!__test_and_set_bit(irq_nr, ppc_lost_interrupts)) {
82 atomic_inc(&ppc_n_lost_interrupts); 82 atomic_inc(&ppc_n_lost_interrupts);
83 if (!nokick) 83 set_dec(1);
84 set_dec(1);
85 } 84 }
86} 85}
87 86
88static void pmac_mask_and_ack_irq(unsigned int irq_nr) 87static void pmac_mask_and_ack_irq(unsigned int virq)
89{ 88{
90 unsigned long bit = 1UL << (irq_nr & 0x1f); 89 unsigned int src = irq_map[virq].hwirq;
91 int i = irq_nr >> 5; 90 unsigned long bit = 1UL << (virq & 0x1f);
91 int i = virq >> 5;
92 unsigned long flags; 92 unsigned long flags;
93 93
94 if ((unsigned)irq_nr >= max_irqs)
95 return;
96
97 clear_bit(irq_nr, ppc_cached_irq_mask);
98 if (test_and_clear_bit(irq_nr, ppc_lost_interrupts))
99 atomic_dec(&ppc_n_lost_interrupts);
100 spin_lock_irqsave(&pmac_pic_lock, flags); 94 spin_lock_irqsave(&pmac_pic_lock, flags);
95 __clear_bit(src, ppc_cached_irq_mask);
96 if (__test_and_clear_bit(src, ppc_lost_interrupts))
97 atomic_dec(&ppc_n_lost_interrupts);
101 out_le32(&pmac_irq_hw[i]->enable, ppc_cached_irq_mask[i]); 98 out_le32(&pmac_irq_hw[i]->enable, ppc_cached_irq_mask[i]);
102 out_le32(&pmac_irq_hw[i]->ack, bit); 99 out_le32(&pmac_irq_hw[i]->ack, bit);
103 do { 100 do {
@@ -109,16 +106,29 @@ static void pmac_mask_and_ack_irq(unsigned int irq_nr)
109 spin_unlock_irqrestore(&pmac_pic_lock, flags); 106 spin_unlock_irqrestore(&pmac_pic_lock, flags);
110} 107}
111 108
112static void pmac_set_irq_mask(unsigned int irq_nr, int nokicklost) 109static void pmac_ack_irq(unsigned int virq)
110{
111 unsigned int src = irq_map[virq].hwirq;
112 unsigned long bit = 1UL << (src & 0x1f);
113 int i = src >> 5;
114 unsigned long flags;
115
116 spin_lock_irqsave(&pmac_pic_lock, flags);
117 if (__test_and_clear_bit(src, ppc_lost_interrupts))
118 atomic_dec(&ppc_n_lost_interrupts);
119 out_le32(&pmac_irq_hw[i]->ack, bit);
120 (void)in_le32(&pmac_irq_hw[i]->ack);
121 spin_unlock_irqrestore(&pmac_pic_lock, flags);
122}
123
124static void __pmac_set_irq_mask(unsigned int irq_nr, int nokicklost)
113{ 125{
114 unsigned long bit = 1UL << (irq_nr & 0x1f); 126 unsigned long bit = 1UL << (irq_nr & 0x1f);
115 int i = irq_nr >> 5; 127 int i = irq_nr >> 5;
116 unsigned long flags;
117 128
118 if ((unsigned)irq_nr >= max_irqs) 129 if ((unsigned)irq_nr >= max_irqs)
119 return; 130 return;
120 131
121 spin_lock_irqsave(&pmac_pic_lock, flags);
122 /* enable unmasked interrupts */ 132 /* enable unmasked interrupts */
123 out_le32(&pmac_irq_hw[i]->enable, ppc_cached_irq_mask[i]); 133 out_le32(&pmac_irq_hw[i]->enable, ppc_cached_irq_mask[i]);
124 134
@@ -135,71 +145,78 @@ static void pmac_set_irq_mask(unsigned int irq_nr, int nokicklost)
135 * the bit in the flag register or request another interrupt. 145 * the bit in the flag register or request another interrupt.
136 */ 146 */
137 if (bit & ppc_cached_irq_mask[i] & in_le32(&pmac_irq_hw[i]->level)) 147 if (bit & ppc_cached_irq_mask[i] & in_le32(&pmac_irq_hw[i]->level))
138 __set_lost((ulong)irq_nr, nokicklost); 148 __pmac_retrigger(irq_nr);
139 spin_unlock_irqrestore(&pmac_pic_lock, flags);
140} 149}
141 150
142/* When an irq gets requested for the first client, if it's an 151/* When an irq gets requested for the first client, if it's an
143 * edge interrupt, we clear any previous one on the controller 152 * edge interrupt, we clear any previous one on the controller
144 */ 153 */
145static unsigned int pmac_startup_irq(unsigned int irq_nr) 154static unsigned int pmac_startup_irq(unsigned int virq)
146{ 155{
147 unsigned long bit = 1UL << (irq_nr & 0x1f); 156 unsigned long flags;
148 int i = irq_nr >> 5; 157 unsigned int src = irq_map[virq].hwirq;
158 unsigned long bit = 1UL << (src & 0x1f);
159 int i = src >> 5;
149 160
150 if ((irq_desc[irq_nr].status & IRQ_LEVEL) == 0) 161 spin_lock_irqsave(&pmac_pic_lock, flags);
162 if ((irq_desc[virq].status & IRQ_LEVEL) == 0)
151 out_le32(&pmac_irq_hw[i]->ack, bit); 163 out_le32(&pmac_irq_hw[i]->ack, bit);
152 set_bit(irq_nr, ppc_cached_irq_mask); 164 __set_bit(src, ppc_cached_irq_mask);
153 pmac_set_irq_mask(irq_nr, 0); 165 __pmac_set_irq_mask(src, 0);
166 spin_unlock_irqrestore(&pmac_pic_lock, flags);
154 167
155 return 0; 168 return 0;
156} 169}
157 170
158static void pmac_mask_irq(unsigned int irq_nr) 171static void pmac_mask_irq(unsigned int virq)
159{ 172{
160 clear_bit(irq_nr, ppc_cached_irq_mask); 173 unsigned long flags;
161 pmac_set_irq_mask(irq_nr, 0); 174 unsigned int src = irq_map[virq].hwirq;
162 mb(); 175
176 spin_lock_irqsave(&pmac_pic_lock, flags);
177 __clear_bit(src, ppc_cached_irq_mask);
178 __pmac_set_irq_mask(src, 0);
179 spin_unlock_irqrestore(&pmac_pic_lock, flags);
163} 180}
164 181
165static void pmac_unmask_irq(unsigned int irq_nr) 182static void pmac_unmask_irq(unsigned int virq)
166{ 183{
167 set_bit(irq_nr, ppc_cached_irq_mask); 184 unsigned long flags;
168 pmac_set_irq_mask(irq_nr, 0); 185 unsigned int src = irq_map[virq].hwirq;
186
187 spin_lock_irqsave(&pmac_pic_lock, flags);
188 __set_bit(src, ppc_cached_irq_mask);
189 __pmac_set_irq_mask(src, 0);
190 spin_unlock_irqrestore(&pmac_pic_lock, flags);
169} 191}
170 192
171static void pmac_end_irq(unsigned int irq_nr) 193static int pmac_retrigger(unsigned int virq)
172{ 194{
173 if (!(irq_desc[irq_nr].status & (IRQ_DISABLED|IRQ_INPROGRESS)) 195 unsigned long flags;
174 && irq_desc[irq_nr].action) {
175 set_bit(irq_nr, ppc_cached_irq_mask);
176 pmac_set_irq_mask(irq_nr, 1);
177 }
178}
179 196
197 spin_lock_irqsave(&pmac_pic_lock, flags);
198 __pmac_retrigger(irq_map[virq].hwirq);
199 spin_unlock_irqrestore(&pmac_pic_lock, flags);
200 return 1;
201}
180 202
181struct hw_interrupt_type pmac_pic = { 203static struct irq_chip pmac_pic = {
182 .typename = " PMAC-PIC ", 204 .typename = " PMAC-PIC ",
183 .startup = pmac_startup_irq, 205 .startup = pmac_startup_irq,
184 .enable = pmac_unmask_irq, 206 .mask = pmac_mask_irq,
185 .disable = pmac_mask_irq, 207 .ack = pmac_ack_irq,
186 .ack = pmac_mask_and_ack_irq, 208 .mask_ack = pmac_mask_and_ack_irq,
187 .end = pmac_end_irq, 209 .unmask = pmac_unmask_irq,
188}; 210 .retrigger = pmac_retrigger,
189
190struct hw_interrupt_type gatwick_pic = {
191 .typename = " GATWICK ",
192 .startup = pmac_startup_irq,
193 .enable = pmac_unmask_irq,
194 .disable = pmac_mask_irq,
195 .ack = pmac_mask_and_ack_irq,
196 .end = pmac_end_irq,
197}; 211};
198 212
199static irqreturn_t gatwick_action(int cpl, void *dev_id, struct pt_regs *regs) 213static irqreturn_t gatwick_action(int cpl, void *dev_id, struct pt_regs *regs)
200{ 214{
215 unsigned long flags;
201 int irq, bits; 216 int irq, bits;
217 int rc = IRQ_NONE;
202 218
219 spin_lock_irqsave(&pmac_pic_lock, flags);
203 for (irq = max_irqs; (irq -= 32) >= max_real_irqs; ) { 220 for (irq = max_irqs; (irq -= 32) >= max_real_irqs; ) {
204 int i = irq >> 5; 221 int i = irq >> 5;
205 bits = in_le32(&pmac_irq_hw[i]->event) | ppc_lost_interrupts[i]; 222 bits = in_le32(&pmac_irq_hw[i]->event) | ppc_lost_interrupts[i];
@@ -209,17 +226,20 @@ static irqreturn_t gatwick_action(int cpl, void *dev_id, struct pt_regs *regs)
209 if (bits == 0) 226 if (bits == 0)
210 continue; 227 continue;
211 irq += __ilog2(bits); 228 irq += __ilog2(bits);
229 spin_unlock_irqrestore(&pmac_pic_lock, flags);
212 __do_IRQ(irq, regs); 230 __do_IRQ(irq, regs);
213 return IRQ_HANDLED; 231 spin_lock_irqsave(&pmac_pic_lock, flags);
232 rc = IRQ_HANDLED;
214 } 233 }
215 printk("gatwick irq not from gatwick pic\n"); 234 spin_unlock_irqrestore(&pmac_pic_lock, flags);
216 return IRQ_NONE; 235 return rc;
217} 236}
218 237
219static int pmac_get_irq(struct pt_regs *regs) 238static unsigned int pmac_pic_get_irq(struct pt_regs *regs)
220{ 239{
221 int irq; 240 int irq;
222 unsigned long bits = 0; 241 unsigned long bits = 0;
242 unsigned long flags;
223 243
224#ifdef CONFIG_SMP 244#ifdef CONFIG_SMP
225 void psurge_smp_message_recv(struct pt_regs *); 245 void psurge_smp_message_recv(struct pt_regs *);
@@ -227,9 +247,10 @@ static int pmac_get_irq(struct pt_regs *regs)
227 /* IPI's are a hack on the powersurge -- Cort */ 247 /* IPI's are a hack on the powersurge -- Cort */
228 if ( smp_processor_id() != 0 ) { 248 if ( smp_processor_id() != 0 ) {
229 psurge_smp_message_recv(regs); 249 psurge_smp_message_recv(regs);
230 return -2; /* ignore, already handled */ 250 return NO_IRQ_IGNORE; /* ignore, already handled */
231 } 251 }
232#endif /* CONFIG_SMP */ 252#endif /* CONFIG_SMP */
253 spin_lock_irqsave(&pmac_pic_lock, flags);
233 for (irq = max_real_irqs; (irq -= 32) >= 0; ) { 254 for (irq = max_real_irqs; (irq -= 32) >= 0; ) {
234 int i = irq >> 5; 255 int i = irq >> 5;
235 bits = in_le32(&pmac_irq_hw[i]->event) | ppc_lost_interrupts[i]; 256 bits = in_le32(&pmac_irq_hw[i]->event) | ppc_lost_interrupts[i];
@@ -241,133 +262,10 @@ static int pmac_get_irq(struct pt_regs *regs)
241 irq += __ilog2(bits); 262 irq += __ilog2(bits);
242 break; 263 break;
243 } 264 }
244 265 spin_unlock_irqrestore(&pmac_pic_lock, flags);
245 return irq; 266 if (unlikely(irq < 0))
246} 267 return NO_IRQ;
247 268 return irq_linear_revmap(pmac_pic_host, irq);
248/* This routine will fix some missing interrupt values in the device tree
249 * on the gatwick mac-io controller used by some PowerBooks
250 *
251 * Walking of OF nodes could use a bit more fixing up here, but it's not
252 * very important as this is all boot time code on static portions of the
253 * device-tree.
254 *
255 * However, the modifications done to "intrs" will have to be removed and
256 * replaced with proper updates of the "interrupts" properties or
257 * AAPL,interrupts, yet to be decided, once the dynamic parsing is there.
258 */
259static void __init pmac_fix_gatwick_interrupts(struct device_node *gw,
260 int irq_base)
261{
262 struct device_node *node;
263 int count;
264
265 memset(gatwick_int_pool, 0, sizeof(gatwick_int_pool));
266 count = 0;
267 for (node = NULL; (node = of_get_next_child(gw, node)) != NULL;) {
268 /* Fix SCC */
269 if ((strcasecmp(node->name, "escc") == 0) && node->child) {
270 if (node->child->n_intrs < 3) {
271 node->child->intrs = &gatwick_int_pool[count];
272 count += 3;
273 }
274 node->child->n_intrs = 3;
275 node->child->intrs[0].line = 15+irq_base;
276 node->child->intrs[1].line = 4+irq_base;
277 node->child->intrs[2].line = 5+irq_base;
278 printk(KERN_INFO "irq: fixed SCC on gatwick"
279 " (%d,%d,%d)\n",
280 node->child->intrs[0].line,
281 node->child->intrs[1].line,
282 node->child->intrs[2].line);
283 }
284 /* Fix media-bay & left SWIM */
285 if (strcasecmp(node->name, "media-bay") == 0) {
286 struct device_node* ya_node;
287
288 if (node->n_intrs == 0)
289 node->intrs = &gatwick_int_pool[count++];
290 node->n_intrs = 1;
291 node->intrs[0].line = 29+irq_base;
292 printk(KERN_INFO "irq: fixed media-bay on gatwick"
293 " (%d)\n", node->intrs[0].line);
294
295 ya_node = node->child;
296 while(ya_node) {
297 if (strcasecmp(ya_node->name, "floppy") == 0) {
298 if (ya_node->n_intrs < 2) {
299 ya_node->intrs = &gatwick_int_pool[count];
300 count += 2;
301 }
302 ya_node->n_intrs = 2;
303 ya_node->intrs[0].line = 19+irq_base;
304 ya_node->intrs[1].line = 1+irq_base;
305 printk(KERN_INFO "irq: fixed floppy on second controller (%d,%d)\n",
306 ya_node->intrs[0].line, ya_node->intrs[1].line);
307 }
308 if (strcasecmp(ya_node->name, "ata4") == 0) {
309 if (ya_node->n_intrs < 2) {
310 ya_node->intrs = &gatwick_int_pool[count];
311 count += 2;
312 }
313 ya_node->n_intrs = 2;
314 ya_node->intrs[0].line = 14+irq_base;
315 ya_node->intrs[1].line = 3+irq_base;
316 printk(KERN_INFO "irq: fixed ide on second controller (%d,%d)\n",
317 ya_node->intrs[0].line, ya_node->intrs[1].line);
318 }
319 ya_node = ya_node->sibling;
320 }
321 }
322 }
323 if (count > 10) {
324 printk("WARNING !! Gatwick interrupt pool overflow\n");
325 printk(" GATWICK_IRQ_POOL_SIZE = %d\n", GATWICK_IRQ_POOL_SIZE);
326 printk(" requested = %d\n", count);
327 }
328}
329
330/*
331 * The PowerBook 3400/2400/3500 can have a combo ethernet/modem
332 * card which includes an ohare chip that acts as a second interrupt
333 * controller. If we find this second ohare, set it up and fix the
334 * interrupt value in the device tree for the ethernet chip.
335 */
336static void __init enable_second_ohare(struct device_node *np)
337{
338 unsigned char bus, devfn;
339 unsigned short cmd;
340 struct device_node *ether;
341
342 /* This code doesn't strictly belong here, it could be part of
343 * either the PCI initialisation or the feature code. It's kept
344 * here for historical reasons.
345 */
346 if (pci_device_from_OF_node(np, &bus, &devfn) == 0) {
347 struct pci_controller* hose =
348 pci_find_hose_for_OF_device(np);
349 if (!hose) {
350 printk(KERN_ERR "Can't find PCI hose for OHare2 !\n");
351 return;
352 }
353 early_read_config_word(hose, bus, devfn, PCI_COMMAND, &cmd);
354 cmd |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
355 cmd &= ~PCI_COMMAND_IO;
356 early_write_config_word(hose, bus, devfn, PCI_COMMAND, cmd);
357 }
358
359 /* Fix interrupt for the modem/ethernet combo controller. The number
360 * in the device tree (27) is bogus (correct for the ethernet-only
361 * board but not the combo ethernet/modem board).
362 * The real interrupt is 28 on the second controller -> 28+32 = 60.
363 */
364 ether = of_find_node_by_name(NULL, "pci1011,14");
365 if (ether && ether->n_intrs > 0) {
366 ether->intrs[0].line = 60;
367 printk(KERN_INFO "irq: Fixed ethernet IRQ to %d\n",
368 ether->intrs[0].line);
369 }
370 of_node_put(ether);
371} 269}
372 270
373#ifdef CONFIG_XMON 271#ifdef CONFIG_XMON
@@ -386,17 +284,60 @@ static struct irqaction gatwick_cascade_action = {
386 .name = "cascade", 284 .name = "cascade",
387}; 285};
388 286
287static int pmac_pic_host_match(struct irq_host *h, struct device_node *node)
288{
289 /* We match all, we don't always have a node anyway */
290 return 1;
291}
292
293static int pmac_pic_host_map(struct irq_host *h, unsigned int virq,
294 irq_hw_number_t hw, unsigned int flags)
295{
296 struct irq_desc *desc = get_irq_desc(virq);
297 int level;
298
299 if (hw >= max_irqs)
300 return -EINVAL;
301
302 /* Mark level interrupts, set delayed disable for edge ones and set
303 * handlers
304 */
305 level = !!(level_mask[hw >> 5] & (1UL << (hw & 0x1f)));
306 if (level)
307 desc->status |= IRQ_LEVEL;
308 else
309 desc->status |= IRQ_DELAYED_DISABLE;
310 set_irq_chip_and_handler(virq, &pmac_pic, level ?
311 handle_level_irq : handle_edge_irq);
312 return 0;
313}
314
315static int pmac_pic_host_xlate(struct irq_host *h, struct device_node *ct,
316 u32 *intspec, unsigned int intsize,
317 irq_hw_number_t *out_hwirq,
318 unsigned int *out_flags)
319
320{
321 *out_hwirq = *intspec;
322 return 0;
323}
324
325static struct irq_host_ops pmac_pic_host_ops = {
326 .match = pmac_pic_host_match,
327 .map = pmac_pic_host_map,
328 .xlate = pmac_pic_host_xlate,
329};
330
389static void __init pmac_pic_probe_oldstyle(void) 331static void __init pmac_pic_probe_oldstyle(void)
390{ 332{
391 int i; 333 int i;
392 int irq_cascade = -1;
393 struct device_node *master = NULL; 334 struct device_node *master = NULL;
394 struct device_node *slave = NULL; 335 struct device_node *slave = NULL;
395 u8 __iomem *addr; 336 u8 __iomem *addr;
396 struct resource r; 337 struct resource r;
397 338
398 /* Set our get_irq function */ 339 /* Set our get_irq function */
399 ppc_md.get_irq = pmac_get_irq; 340 ppc_md.get_irq = pmac_pic_get_irq;
400 341
401 /* 342 /*
402 * Find the interrupt controller type & node 343 * Find the interrupt controller type & node
@@ -414,7 +355,6 @@ static void __init pmac_pic_probe_oldstyle(void)
414 if (slave) { 355 if (slave) {
415 max_irqs = 64; 356 max_irqs = 64;
416 level_mask[1] = OHARE_LEVEL_MASK; 357 level_mask[1] = OHARE_LEVEL_MASK;
417 enable_second_ohare(slave);
418 } 358 }
419 } else if ((master = of_find_node_by_name(NULL, "mac-io")) != NULL) { 359 } else if ((master = of_find_node_by_name(NULL, "mac-io")) != NULL) {
420 max_irqs = max_real_irqs = 64; 360 max_irqs = max_real_irqs = 64;
@@ -438,14 +378,18 @@ static void __init pmac_pic_probe_oldstyle(void)
438 max_irqs = 128; 378 max_irqs = 128;
439 level_mask[2] = HEATHROW_LEVEL_MASK; 379 level_mask[2] = HEATHROW_LEVEL_MASK;
440 level_mask[3] = 0; 380 level_mask[3] = 0;
441 pmac_fix_gatwick_interrupts(slave, max_real_irqs);
442 } 381 }
443 } 382 }
444 BUG_ON(master == NULL); 383 BUG_ON(master == NULL);
445 384
446 /* Set the handler for the main PIC */ 385 /*
447 for ( i = 0; i < max_real_irqs ; i++ ) 386 * Allocate an irq host
448 irq_desc[i].chip = &pmac_pic; 387 */
388 pmac_pic_host = irq_alloc_host(IRQ_HOST_MAP_LINEAR, max_irqs,
389 &pmac_pic_host_ops,
390 max_irqs);
391 BUG_ON(pmac_pic_host == NULL);
392 irq_set_default_host(pmac_pic_host);
449 393
450 /* Get addresses of first controller if we have a node for it */ 394 /* Get addresses of first controller if we have a node for it */
451 BUG_ON(of_address_to_resource(master, 0, &r)); 395 BUG_ON(of_address_to_resource(master, 0, &r));
@@ -472,39 +416,38 @@ static void __init pmac_pic_probe_oldstyle(void)
472 pmac_irq_hw[i++] = 416 pmac_irq_hw[i++] =
473 (volatile struct pmac_irq_hw __iomem *) 417 (volatile struct pmac_irq_hw __iomem *)
474 (addr + 0x10); 418 (addr + 0x10);
475 irq_cascade = slave->intrs[0].line; 419 pmac_irq_cascade = irq_of_parse_and_map(slave, 0);
476 420
477 printk(KERN_INFO "irq: Found slave Apple PIC %s for %d irqs" 421 printk(KERN_INFO "irq: Found slave Apple PIC %s for %d irqs"
478 " cascade: %d\n", slave->full_name, 422 " cascade: %d\n", slave->full_name,
479 max_irqs - max_real_irqs, irq_cascade); 423 max_irqs - max_real_irqs, pmac_irq_cascade);
480 } 424 }
481 of_node_put(slave); 425 of_node_put(slave);
482 426
483 /* disable all interrupts in all controllers */ 427 /* Disable all interrupts in all controllers */
484 for (i = 0; i * 32 < max_irqs; ++i) 428 for (i = 0; i * 32 < max_irqs; ++i)
485 out_le32(&pmac_irq_hw[i]->enable, 0); 429 out_le32(&pmac_irq_hw[i]->enable, 0);
486 430
487 /* mark level interrupts */ 431 /* Hookup cascade irq */
488 for (i = 0; i < max_irqs; i++) 432 if (slave && pmac_irq_cascade != NO_IRQ)
489 if (level_mask[i >> 5] & (1UL << (i & 0x1f))) 433 setup_irq(pmac_irq_cascade, &gatwick_cascade_action);
490 irq_desc[i].status = IRQ_LEVEL;
491 434
492 /* Setup handlers for secondary controller and hook cascade irq*/
493 if (slave) {
494 for ( i = max_real_irqs ; i < max_irqs ; i++ )
495 irq_desc[i].chip = &gatwick_pic;
496 setup_irq(irq_cascade, &gatwick_cascade_action);
497 }
498 printk(KERN_INFO "irq: System has %d possible interrupts\n", max_irqs); 435 printk(KERN_INFO "irq: System has %d possible interrupts\n", max_irqs);
499#ifdef CONFIG_XMON 436#ifdef CONFIG_XMON
500 setup_irq(20, &xmon_action); 437 setup_irq(irq_create_mapping(NULL, 20, 0), &xmon_action);
501#endif 438#endif
502} 439}
503#endif /* CONFIG_PPC32 */ 440#endif /* CONFIG_PPC32 */
504 441
505static int pmac_u3_cascade(struct pt_regs *regs, void *data) 442static void pmac_u3_cascade(unsigned int irq, struct irq_desc *desc,
443 struct pt_regs *regs)
506{ 444{
507 return mpic_get_one_irq((struct mpic *)data, regs); 445 struct mpic *mpic = desc->handler_data;
446
447 unsigned int cascade_irq = mpic_get_one_irq(mpic, regs);
448 if (cascade_irq != NO_IRQ)
449 generic_handle_irq(cascade_irq, regs);
450 desc->chip->eoi(irq);
508} 451}
509 452
510static void __init pmac_pic_setup_mpic_nmi(struct mpic *mpic) 453static void __init pmac_pic_setup_mpic_nmi(struct mpic *mpic)
@@ -514,21 +457,20 @@ static void __init pmac_pic_setup_mpic_nmi(struct mpic *mpic)
514 int nmi_irq; 457 int nmi_irq;
515 458
516 pswitch = of_find_node_by_name(NULL, "programmer-switch"); 459 pswitch = of_find_node_by_name(NULL, "programmer-switch");
517 if (pswitch && pswitch->n_intrs) { 460 if (pswitch) {
518 nmi_irq = pswitch->intrs[0].line; 461 nmi_irq = irq_of_parse_and_map(pswitch, 0);
519 mpic_irq_set_priority(nmi_irq, 9); 462 if (nmi_irq != NO_IRQ) {
520 setup_irq(nmi_irq, &xmon_action); 463 mpic_irq_set_priority(nmi_irq, 9);
464 setup_irq(nmi_irq, &xmon_action);
465 }
466 of_node_put(pswitch);
521 } 467 }
522 of_node_put(pswitch);
523#endif /* defined(CONFIG_XMON) && defined(CONFIG_PPC32) */ 468#endif /* defined(CONFIG_XMON) && defined(CONFIG_PPC32) */
524} 469}
525 470
526static struct mpic * __init pmac_setup_one_mpic(struct device_node *np, 471static struct mpic * __init pmac_setup_one_mpic(struct device_node *np,
527 int master) 472 int master)
528{ 473{
529 unsigned char senses[128];
530 int offset = master ? 0 : 128;
531 int count = master ? 128 : 124;
532 const char *name = master ? " MPIC 1 " : " MPIC 2 "; 474 const char *name = master ? " MPIC 1 " : " MPIC 2 ";
533 struct resource r; 475 struct resource r;
534 struct mpic *mpic; 476 struct mpic *mpic;
@@ -541,8 +483,6 @@ static struct mpic * __init pmac_setup_one_mpic(struct device_node *np,
541 483
542 pmac_call_feature(PMAC_FTR_ENABLE_MPIC, np, 0, 0); 484 pmac_call_feature(PMAC_FTR_ENABLE_MPIC, np, 0, 0);
543 485
544 prom_get_irq_senses(senses, offset, offset + count);
545
546 flags |= MPIC_WANTS_RESET; 486 flags |= MPIC_WANTS_RESET;
547 if (get_property(np, "big-endian", NULL)) 487 if (get_property(np, "big-endian", NULL))
548 flags |= MPIC_BIG_ENDIAN; 488 flags |= MPIC_BIG_ENDIAN;
@@ -553,8 +493,7 @@ static struct mpic * __init pmac_setup_one_mpic(struct device_node *np,
553 if (master && (flags & MPIC_BIG_ENDIAN)) 493 if (master && (flags & MPIC_BIG_ENDIAN))
554 flags |= MPIC_BROKEN_U3; 494 flags |= MPIC_BROKEN_U3;
555 495
556 mpic = mpic_alloc(r.start, flags, 0, offset, count, master ? 252 : 0, 496 mpic = mpic_alloc(np, r.start, flags, 0, 0, name);
557 senses, count, name);
558 if (mpic == NULL) 497 if (mpic == NULL)
559 return NULL; 498 return NULL;
560 499
@@ -567,6 +506,7 @@ static int __init pmac_pic_probe_mpic(void)
567{ 506{
568 struct mpic *mpic1, *mpic2; 507 struct mpic *mpic1, *mpic2;
569 struct device_node *np, *master = NULL, *slave = NULL; 508 struct device_node *np, *master = NULL, *slave = NULL;
509 unsigned int cascade;
570 510
571 /* We can have up to 2 MPICs cascaded */ 511 /* We can have up to 2 MPICs cascaded */
572 for (np = NULL; (np = of_find_node_by_type(np, "open-pic")) 512 for (np = NULL; (np = of_find_node_by_type(np, "open-pic"))
@@ -603,8 +543,15 @@ static int __init pmac_pic_probe_mpic(void)
603 of_node_put(master); 543 of_node_put(master);
604 544
605 /* No slave, let's go out */ 545 /* No slave, let's go out */
606 if (slave == NULL || slave->n_intrs < 1) 546 if (slave == NULL)
547 return 0;
548
549 /* Get/Map slave interrupt */
550 cascade = irq_of_parse_and_map(slave, 0);
551 if (cascade == NO_IRQ) {
552 printk(KERN_ERR "Failed to map cascade IRQ\n");
607 return 0; 553 return 0;
554 }
608 555
609 mpic2 = pmac_setup_one_mpic(slave, 0); 556 mpic2 = pmac_setup_one_mpic(slave, 0);
610 if (mpic2 == NULL) { 557 if (mpic2 == NULL) {
@@ -612,7 +559,8 @@ static int __init pmac_pic_probe_mpic(void)
612 of_node_put(slave); 559 of_node_put(slave);
613 return 0; 560 return 0;
614 } 561 }
615 mpic_setup_cascade(slave->intrs[0].line, pmac_u3_cascade, mpic2); 562 set_irq_data(cascade, mpic2);
563 set_irq_chained_handler(cascade, pmac_u3_cascade);
616 564
617 of_node_put(slave); 565 of_node_put(slave);
618 return 0; 566 return 0;
@@ -621,6 +569,19 @@ static int __init pmac_pic_probe_mpic(void)
621 569
622void __init pmac_pic_init(void) 570void __init pmac_pic_init(void)
623{ 571{
572 unsigned int flags = 0;
573
574 /* We configure the OF parsing based on our oldworld vs. newworld
575 * platform type and wether we were booted by BootX.
576 */
577#ifdef CONFIG_PPC32
578 if (!pmac_newworld)
579 flags |= OF_IMAP_OLDWORLD_MAC;
580 if (get_property(of_chosen, "linux,bootx", NULL) != NULL)
581 flags |= OF_IMAP_NO_PHANDLE;
582 of_irq_map_init(flags);
583#endif /* CONFIG_PPC_32 */
584
624 /* We first try to detect Apple's new Core99 chipset, since mac-io 585 /* We first try to detect Apple's new Core99 chipset, since mac-io
625 * is quite different on those machines and contains an IBM MPIC2. 586 * is quite different on those machines and contains an IBM MPIC2.
626 */ 587 */
@@ -643,6 +604,7 @@ unsigned long sleep_save_mask[2];
643 604
644/* This used to be passed by the PMU driver but that link got 605/* This used to be passed by the PMU driver but that link got
645 * broken with the new driver model. We use this tweak for now... 606 * broken with the new driver model. We use this tweak for now...
607 * We really want to do things differently though...
646 */ 608 */
647static int pmacpic_find_viaint(void) 609static int pmacpic_find_viaint(void)
648{ 610{
@@ -656,7 +618,7 @@ static int pmacpic_find_viaint(void)
656 np = of_find_node_by_name(NULL, "via-pmu"); 618 np = of_find_node_by_name(NULL, "via-pmu");
657 if (np == NULL) 619 if (np == NULL)
658 goto not_found; 620 goto not_found;
659 viaint = np->intrs[0].line; 621 viaint = irq_of_parse_and_map(np, 0);;
660#endif /* CONFIG_ADB_PMU */ 622#endif /* CONFIG_ADB_PMU */
661 623
662not_found: 624not_found:
diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h
index 21c7b0f8f329..94e7b24b840b 100644
--- a/arch/powerpc/platforms/powermac/pmac.h
+++ b/arch/powerpc/platforms/powermac/pmac.h
@@ -12,6 +12,8 @@
12 12
13struct rtc_time; 13struct rtc_time;
14 14
15extern int pmac_newworld;
16
15extern long pmac_time_init(void); 17extern long pmac_time_init(void);
16extern unsigned long pmac_get_boot_time(void); 18extern unsigned long pmac_get_boot_time(void);
17extern void pmac_get_rtc_time(struct rtc_time *); 19extern void pmac_get_rtc_time(struct rtc_time *);
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 8654b5f07836..31a9da769fa2 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -613,9 +613,6 @@ static void __init pmac_init_early(void)
613 udbg_adb_init(!!strstr(cmd_line, "btextdbg")); 613 udbg_adb_init(!!strstr(cmd_line, "btextdbg"));
614 614
615#ifdef CONFIG_PPC64 615#ifdef CONFIG_PPC64
616 /* Setup interrupt mapping options */
617 ppc64_interrupt_controller = IC_OPEN_PIC;
618
619 iommu_init_early_dart(); 616 iommu_init_early_dart();
620#endif 617#endif
621} 618}
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 9639c66b453d..9df783088b61 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -72,32 +72,62 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id,
72 72
73/* #define DEBUG */ 73/* #define DEBUG */
74 74
75static void request_ras_irqs(struct device_node *np, char *propname, 75
76static void request_ras_irqs(struct device_node *np,
76 irqreturn_t (*handler)(int, void *, struct pt_regs *), 77 irqreturn_t (*handler)(int, void *, struct pt_regs *),
77 const char *name) 78 const char *name)
78{ 79{
79 unsigned int *ireg, len, i; 80 int i, index, count = 0;
80 int virq, n_intr; 81 struct of_irq oirq;
81 82 u32 *opicprop;
82 ireg = (unsigned int *)get_property(np, propname, &len); 83 unsigned int opicplen;
83 if (ireg == NULL) 84 unsigned int virqs[16];
84 return; 85
85 n_intr = prom_n_intr_cells(np); 86 /* Check for obsolete "open-pic-interrupt" property. If present, then
86 len /= n_intr * sizeof(*ireg); 87 * map those interrupts using the default interrupt host and default
87 88 * trigger
88 for (i = 0; i < len; i++) { 89 */
89 virq = virt_irq_create_mapping(*ireg); 90 opicprop = (u32 *)get_property(np, "open-pic-interrupt", &opicplen);
90 if (virq == NO_IRQ) { 91 if (opicprop) {
91 printk(KERN_ERR "Unable to allocate interrupt " 92 opicplen /= sizeof(u32);
92 "number for %s\n", np->full_name); 93 for (i = 0; i < opicplen; i++) {
93 return; 94 if (count > 15)
95 break;
96 virqs[count] = irq_create_mapping(NULL, *(opicprop++),
97 IRQ_TYPE_NONE);
98 if (virqs[count] == NO_IRQ)
99 printk(KERN_ERR "Unable to allocate interrupt "
100 "number for %s\n", np->full_name);
101 else
102 count++;
103
94 } 104 }
95 if (request_irq(irq_offset_up(virq), handler, 0, name, NULL)) { 105 }
106 /* Else use normal interrupt tree parsing */
107 else {
108 /* First try to do a proper OF tree parsing */
109 for (index = 0; of_irq_map_one(np, index, &oirq) == 0;
110 index++) {
111 if (count > 15)
112 break;
113 virqs[count] = irq_create_of_mapping(oirq.controller,
114 oirq.specifier,
115 oirq.size);
116 if (virqs[count] == NO_IRQ)
117 printk(KERN_ERR "Unable to allocate interrupt "
118 "number for %s\n", np->full_name);
119 else
120 count++;
121 }
122 }
123
124 /* Now request them */
125 for (i = 0; i < count; i++) {
126 if (request_irq(virqs[i], handler, 0, name, NULL)) {
96 printk(KERN_ERR "Unable to request interrupt %d for " 127 printk(KERN_ERR "Unable to request interrupt %d for "
97 "%s\n", irq_offset_up(virq), np->full_name); 128 "%s\n", virqs[i], np->full_name);
98 return; 129 return;
99 } 130 }
100 ireg += n_intr;
101 } 131 }
102} 132}
103 133
@@ -115,20 +145,14 @@ static int __init init_ras_IRQ(void)
115 /* Internal Errors */ 145 /* Internal Errors */
116 np = of_find_node_by_path("/event-sources/internal-errors"); 146 np = of_find_node_by_path("/event-sources/internal-errors");
117 if (np != NULL) { 147 if (np != NULL) {
118 request_ras_irqs(np, "open-pic-interrupt", ras_error_interrupt, 148 request_ras_irqs(np, ras_error_interrupt, "RAS_ERROR");
119 "RAS_ERROR");
120 request_ras_irqs(np, "interrupts", ras_error_interrupt,
121 "RAS_ERROR");
122 of_node_put(np); 149 of_node_put(np);
123 } 150 }
124 151
125 /* EPOW Events */ 152 /* EPOW Events */
126 np = of_find_node_by_path("/event-sources/epow-events"); 153 np = of_find_node_by_path("/event-sources/epow-events");
127 if (np != NULL) { 154 if (np != NULL) {
128 request_ras_irqs(np, "open-pic-interrupt", ras_epow_interrupt, 155 request_ras_irqs(np, ras_epow_interrupt, "RAS_EPOW");
129 "RAS_EPOW");
130 request_ras_irqs(np, "interrupts", ras_epow_interrupt,
131 "RAS_EPOW");
132 of_node_put(np); 156 of_node_put(np);
133 } 157 }
134 158
@@ -162,7 +186,7 @@ ras_epow_interrupt(int irq, void *dev_id, struct pt_regs * regs)
162 186
163 status = rtas_call(ras_check_exception_token, 6, 1, NULL, 187 status = rtas_call(ras_check_exception_token, 6, 1, NULL,
164 RAS_VECTOR_OFFSET, 188 RAS_VECTOR_OFFSET,
165 virt_irq_to_real(irq_offset_down(irq)), 189 irq_map[irq].hwirq,
166 RTAS_EPOW_WARNING | RTAS_POWERMGM_EVENTS, 190 RTAS_EPOW_WARNING | RTAS_POWERMGM_EVENTS,
167 critical, __pa(&ras_log_buf), 191 critical, __pa(&ras_log_buf),
168 rtas_get_error_log_max()); 192 rtas_get_error_log_max());
@@ -198,7 +222,7 @@ ras_error_interrupt(int irq, void *dev_id, struct pt_regs * regs)
198 222
199 status = rtas_call(ras_check_exception_token, 6, 1, NULL, 223 status = rtas_call(ras_check_exception_token, 6, 1, NULL,
200 RAS_VECTOR_OFFSET, 224 RAS_VECTOR_OFFSET,
201 virt_irq_to_real(irq_offset_down(irq)), 225 irq_map[irq].hwirq,
202 RTAS_INTERNAL_ERROR, 1 /*Time Critical */, 226 RTAS_INTERNAL_ERROR, 1 /*Time Critical */,
203 __pa(&ras_log_buf), 227 __pa(&ras_log_buf),
204 rtas_get_error_log_max()); 228 rtas_get_error_log_max());
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 999509d28af8..54a52437265c 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -76,6 +76,9 @@
76#define DBG(fmt...) 76#define DBG(fmt...)
77#endif 77#endif
78 78
79/* move those away to a .h */
80extern void smp_init_pseries_mpic(void);
81extern void smp_init_pseries_xics(void);
79extern void find_udbg_vterm(void); 82extern void find_udbg_vterm(void);
80 83
81int fwnmi_active; /* TRUE if an FWNMI handler is present */ 84int fwnmi_active; /* TRUE if an FWNMI handler is present */
@@ -83,7 +86,7 @@ int fwnmi_active; /* TRUE if an FWNMI handler is present */
83static void pseries_shared_idle_sleep(void); 86static void pseries_shared_idle_sleep(void);
84static void pseries_dedicated_idle_sleep(void); 87static void pseries_dedicated_idle_sleep(void);
85 88
86struct mpic *pSeries_mpic; 89static struct device_node *pSeries_mpic_node;
87 90
88static void pSeries_show_cpuinfo(struct seq_file *m) 91static void pSeries_show_cpuinfo(struct seq_file *m)
89{ 92{
@@ -118,63 +121,92 @@ static void __init fwnmi_init(void)
118 fwnmi_active = 1; 121 fwnmi_active = 1;
119} 122}
120 123
121static void __init pSeries_init_mpic(void) 124void pseries_8259_cascade(unsigned int irq, struct irq_desc *desc,
125 struct pt_regs *regs)
122{ 126{
123 unsigned int *addrp; 127 unsigned int cascade_irq = i8259_irq(regs);
124 struct device_node *np; 128 if (cascade_irq != NO_IRQ)
125 unsigned long intack = 0; 129 generic_handle_irq(cascade_irq, regs);
126 130 desc->chip->eoi(irq);
127 /* All ISUs are setup, complete initialization */
128 mpic_init(pSeries_mpic);
129
130 /* Check what kind of cascade ACK we have */
131 if (!(np = of_find_node_by_name(NULL, "pci"))
132 || !(addrp = (unsigned int *)
133 get_property(np, "8259-interrupt-acknowledge", NULL)))
134 printk(KERN_ERR "Cannot find pci to get ack address\n");
135 else
136 intack = addrp[prom_n_addr_cells(np)-1];
137 of_node_put(np);
138
139 /* Setup the legacy interrupts & controller */
140 i8259_init(intack, 0);
141
142 /* Hook cascade to mpic */
143 mpic_setup_cascade(NUM_ISA_INTERRUPTS, i8259_irq_cascade, NULL);
144} 131}
145 132
146static void __init pSeries_setup_mpic(void) 133static void __init pseries_mpic_init_IRQ(void)
147{ 134{
135 struct device_node *np, *old, *cascade = NULL;
136 unsigned int *addrp;
137 unsigned long intack = 0;
148 unsigned int *opprop; 138 unsigned int *opprop;
149 unsigned long openpic_addr = 0; 139 unsigned long openpic_addr = 0;
150 unsigned char senses[NR_IRQS - NUM_ISA_INTERRUPTS]; 140 unsigned int cascade_irq;
151 struct device_node *root; 141 int naddr, n, i, opplen;
152 int irq_count; 142 struct mpic *mpic;
153 143
154 /* Find the Open PIC if present */ 144 np = of_find_node_by_path("/");
155 root = of_find_node_by_path("/"); 145 naddr = prom_n_addr_cells(np);
156 opprop = (unsigned int *) get_property(root, "platform-open-pic", NULL); 146 opprop = (unsigned int *) get_property(np, "platform-open-pic", &opplen);
157 if (opprop != 0) { 147 if (opprop != 0) {
158 int n = prom_n_addr_cells(root); 148 openpic_addr = of_read_number(opprop, naddr);
159
160 for (openpic_addr = 0; n > 0; --n)
161 openpic_addr = (openpic_addr << 32) + *opprop++;
162 printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr); 149 printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr);
163 } 150 }
164 of_node_put(root); 151 of_node_put(np);
165 152
166 BUG_ON(openpic_addr == 0); 153 BUG_ON(openpic_addr == 0);
167 154
168 /* Get the sense values from OF */
169 prom_get_irq_senses(senses, NUM_ISA_INTERRUPTS, NR_IRQS);
170
171 /* Setup the openpic driver */ 155 /* Setup the openpic driver */
172 irq_count = NR_IRQS - NUM_ISA_INTERRUPTS - 4; /* leave room for IPIs */ 156 mpic = mpic_alloc(pSeries_mpic_node, openpic_addr,
173 pSeries_mpic = mpic_alloc(openpic_addr, MPIC_PRIMARY, 157 MPIC_PRIMARY,
174 16, 16, irq_count, /* isu size, irq offset, irq count */ 158 16, 250, /* isu size, irq count */
175 NR_IRQS - 4, /* ipi offset */ 159 " MPIC ");
176 senses, irq_count, /* sense & sense size */ 160 BUG_ON(mpic == NULL);
177 " MPIC "); 161
162 /* Add ISUs */
163 opplen /= sizeof(u32);
164 for (n = 0, i = naddr; i < opplen; i += naddr, n++) {
165 unsigned long isuaddr = of_read_number(opprop + i, naddr);
166 mpic_assign_isu(mpic, n, isuaddr);
167 }
168
169 /* All ISUs are setup, complete initialization */
170 mpic_init(mpic);
171
172 /* Look for cascade */
173 for_each_node_by_type(np, "interrupt-controller")
174 if (device_is_compatible(np, "chrp,iic")) {
175 cascade = np;
176 break;
177 }
178 if (cascade == NULL)
179 return;
180
181 cascade_irq = irq_of_parse_and_map(cascade, 0);
182 if (cascade == NO_IRQ) {
183 printk(KERN_ERR "xics: failed to map cascade interrupt");
184 return;
185 }
186
187 /* Check ACK type */
188 for (old = of_node_get(cascade); old != NULL ; old = np) {
189 np = of_get_parent(old);
190 of_node_put(old);
191 if (np == NULL)
192 break;
193 if (strcmp(np->name, "pci") != 0)
194 continue;
195 addrp = (u32 *)get_property(np, "8259-interrupt-acknowledge",
196 NULL);
197 if (addrp == NULL)
198 continue;
199 naddr = prom_n_addr_cells(np);
200 intack = addrp[naddr-1];
201 if (naddr > 1)
202 intack |= ((unsigned long)addrp[naddr-2]) << 32;
203 }
204 if (intack)
205 printk(KERN_DEBUG "mpic: PCI 8259 intack at 0x%016lx\n",
206 intack);
207 i8259_init(cascade, intack);
208 of_node_put(cascade);
209 set_irq_chained_handler(cascade_irq, pseries_8259_cascade);
178} 210}
179 211
180static void pseries_lpar_enable_pmcs(void) 212static void pseries_lpar_enable_pmcs(void)
@@ -192,23 +224,67 @@ static void pseries_lpar_enable_pmcs(void)
192 get_lppaca()->pmcregs_in_use = 1; 224 get_lppaca()->pmcregs_in_use = 1;
193} 225}
194 226
195static void __init pSeries_setup_arch(void) 227#ifdef CONFIG_KEXEC
228static void pseries_kexec_cpu_down_mpic(int crash_shutdown, int secondary)
196{ 229{
197 /* Fixup ppc_md depending on the type of interrupt controller */ 230 mpic_teardown_this_cpu(secondary);
198 if (ppc64_interrupt_controller == IC_OPEN_PIC) { 231}
199 ppc_md.init_IRQ = pSeries_init_mpic; 232
200 ppc_md.get_irq = mpic_get_irq; 233static void pseries_kexec_cpu_down_xics(int crash_shutdown, int secondary)
201 /* Allocate the mpic now, so that find_and_init_phbs() can 234{
202 * fill the ISUs */ 235 /* Don't risk a hypervisor call if we're crashing */
203 pSeries_setup_mpic(); 236 if (firmware_has_feature(FW_FEATURE_SPLPAR) && !crash_shutdown) {
204 } else { 237 unsigned long vpa = __pa(get_lppaca());
205 ppc_md.init_IRQ = xics_init_IRQ; 238
206 ppc_md.get_irq = xics_get_irq; 239 if (unregister_vpa(hard_smp_processor_id(), vpa)) {
240 printk("VPA deregistration of cpu %u (hw_cpu_id %d) "
241 "failed\n", smp_processor_id(),
242 hard_smp_processor_id());
243 }
207 } 244 }
245 xics_teardown_cpu(secondary);
246}
247#endif /* CONFIG_KEXEC */
208 248
249static void __init pseries_discover_pic(void)
250{
251 struct device_node *np;
252 char *typep;
253
254 for (np = NULL; (np = of_find_node_by_name(np,
255 "interrupt-controller"));) {
256 typep = (char *)get_property(np, "compatible", NULL);
257 if (strstr(typep, "open-pic")) {
258 pSeries_mpic_node = of_node_get(np);
259 ppc_md.init_IRQ = pseries_mpic_init_IRQ;
260 ppc_md.get_irq = mpic_get_irq;
261#ifdef CONFIG_KEXEC
262 ppc_md.kexec_cpu_down = pseries_kexec_cpu_down_mpic;
263#endif
264#ifdef CONFIG_SMP
265 smp_init_pseries_mpic();
266#endif
267 return;
268 } else if (strstr(typep, "ppc-xicp")) {
269 ppc_md.init_IRQ = xics_init_IRQ;
270#ifdef CONFIG_KEXEC
271 ppc_md.kexec_cpu_down = pseries_kexec_cpu_down_xics;
272#endif
209#ifdef CONFIG_SMP 273#ifdef CONFIG_SMP
210 smp_init_pSeries(); 274 smp_init_pseries_xics();
211#endif 275#endif
276 return;
277 }
278 }
279 printk(KERN_ERR "pSeries_discover_pic: failed to recognize"
280 " interrupt-controller\n");
281}
282
283static void __init pSeries_setup_arch(void)
284{
285 /* Discover PIC type and setup ppc_md accordingly */
286 pseries_discover_pic();
287
212 /* openpic global configuration register (64-bit format). */ 288 /* openpic global configuration register (64-bit format). */
213 /* openpic Interrupt Source Unit pointer (64-bit format). */ 289 /* openpic Interrupt Source Unit pointer (64-bit format). */
214 /* python0 facility area (mmio) (64-bit format) REAL address. */ 290 /* python0 facility area (mmio) (64-bit format) REAL address. */
@@ -260,41 +336,11 @@ static int __init pSeries_init_panel(void)
260} 336}
261arch_initcall(pSeries_init_panel); 337arch_initcall(pSeries_init_panel);
262 338
263static void __init pSeries_discover_pic(void)
264{
265 struct device_node *np;
266 char *typep;
267
268 /*
269 * Setup interrupt mapping options that are needed for finish_device_tree
270 * to properly parse the OF interrupt tree & do the virtual irq mapping
271 */
272 __irq_offset_value = NUM_ISA_INTERRUPTS;
273 ppc64_interrupt_controller = IC_INVALID;
274 for (np = NULL; (np = of_find_node_by_name(np, "interrupt-controller"));) {
275 typep = (char *)get_property(np, "compatible", NULL);
276 if (strstr(typep, "open-pic")) {
277 ppc64_interrupt_controller = IC_OPEN_PIC;
278 break;
279 } else if (strstr(typep, "ppc-xicp")) {
280 ppc64_interrupt_controller = IC_PPC_XIC;
281 break;
282 }
283 }
284 if (ppc64_interrupt_controller == IC_INVALID)
285 printk("pSeries_discover_pic: failed to recognize"
286 " interrupt-controller\n");
287
288}
289
290static void pSeries_mach_cpu_die(void) 339static void pSeries_mach_cpu_die(void)
291{ 340{
292 local_irq_disable(); 341 local_irq_disable();
293 idle_task_exit(); 342 idle_task_exit();
294 /* Some hardware requires clearing the CPPR, while other hardware does not 343 xics_teardown_cpu(0);
295 * it is safe either way
296 */
297 pSeriesLP_cppr_info(0, 0);
298 rtas_stop_self(); 344 rtas_stop_self();
299 /* Should never get here... */ 345 /* Should never get here... */
300 BUG(); 346 BUG();
@@ -332,8 +378,6 @@ static void __init pSeries_init_early(void)
332 378
333 iommu_init_early_pSeries(); 379 iommu_init_early_pSeries();
334 380
335 pSeries_discover_pic();
336
337 DBG(" <- pSeries_init_early()\n"); 381 DBG(" <- pSeries_init_early()\n");
338} 382}
339 383
@@ -505,27 +549,6 @@ static int pSeries_pci_probe_mode(struct pci_bus *bus)
505 return PCI_PROBE_NORMAL; 549 return PCI_PROBE_NORMAL;
506} 550}
507 551
508#ifdef CONFIG_KEXEC
509static void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
510{
511 /* Don't risk a hypervisor call if we're crashing */
512 if (firmware_has_feature(FW_FEATURE_SPLPAR) && !crash_shutdown) {
513 unsigned long vpa = __pa(get_lppaca());
514
515 if (unregister_vpa(hard_smp_processor_id(), vpa)) {
516 printk("VPA deregistration of cpu %u (hw_cpu_id %d) "
517 "failed\n", smp_processor_id(),
518 hard_smp_processor_id());
519 }
520 }
521
522 if (ppc64_interrupt_controller == IC_OPEN_PIC)
523 mpic_teardown_this_cpu(secondary);
524 else
525 xics_teardown_cpu(secondary);
526}
527#endif
528
529define_machine(pseries) { 552define_machine(pseries) {
530 .name = "pSeries", 553 .name = "pSeries",
531 .probe = pSeries_probe, 554 .probe = pSeries_probe,
@@ -550,7 +573,6 @@ define_machine(pseries) {
550 .system_reset_exception = pSeries_system_reset_exception, 573 .system_reset_exception = pSeries_system_reset_exception,
551 .machine_check_exception = pSeries_machine_check_exception, 574 .machine_check_exception = pSeries_machine_check_exception,
552#ifdef CONFIG_KEXEC 575#ifdef CONFIG_KEXEC
553 .kexec_cpu_down = pseries_kexec_cpu_down,
554 .machine_kexec = default_machine_kexec, 576 .machine_kexec = default_machine_kexec,
555 .machine_kexec_prepare = default_machine_kexec_prepare, 577 .machine_kexec_prepare = default_machine_kexec_prepare,
556 .machine_crash_shutdown = default_machine_crash_shutdown, 578 .machine_crash_shutdown = default_machine_crash_shutdown,
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 4ad144df49c2..ac61098ff401 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -416,27 +416,12 @@ static struct smp_ops_t pSeries_xics_smp_ops = {
416#endif 416#endif
417 417
418/* This is called very early */ 418/* This is called very early */
419void __init smp_init_pSeries(void) 419static void __init smp_init_pseries(void)
420{ 420{
421 int i; 421 int i;
422 422
423 DBG(" -> smp_init_pSeries()\n"); 423 DBG(" -> smp_init_pSeries()\n");
424 424
425 switch (ppc64_interrupt_controller) {
426#ifdef CONFIG_MPIC
427 case IC_OPEN_PIC:
428 smp_ops = &pSeries_mpic_smp_ops;
429 break;
430#endif
431#ifdef CONFIG_XICS
432 case IC_PPC_XIC:
433 smp_ops = &pSeries_xics_smp_ops;
434 break;
435#endif
436 default:
437 panic("Invalid interrupt controller");
438 }
439
440#ifdef CONFIG_HOTPLUG_CPU 425#ifdef CONFIG_HOTPLUG_CPU
441 smp_ops->cpu_disable = pSeries_cpu_disable; 426 smp_ops->cpu_disable = pSeries_cpu_disable;
442 smp_ops->cpu_die = pSeries_cpu_die; 427 smp_ops->cpu_die = pSeries_cpu_die;
@@ -471,3 +456,18 @@ void __init smp_init_pSeries(void)
471 DBG(" <- smp_init_pSeries()\n"); 456 DBG(" <- smp_init_pSeries()\n");
472} 457}
473 458
459#ifdef CONFIG_MPIC
460void __init smp_init_pseries_mpic(void)
461{
462 smp_ops = &pSeries_mpic_smp_ops;
463
464 smp_init_pseries();
465}
466#endif
467
468void __init smp_init_pseries_xics(void)
469{
470 smp_ops = &pSeries_xics_smp_ops;
471
472 smp_init_pseries();
473}
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index 2ffebe31cb2d..716972aa9777 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -8,6 +8,9 @@
8 * as published by the Free Software Foundation; either version 8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11
12#undef DEBUG
13
11#include <linux/types.h> 14#include <linux/types.h>
12#include <linux/threads.h> 15#include <linux/threads.h>
13#include <linux/kernel.h> 16#include <linux/kernel.h>
@@ -19,6 +22,7 @@
19#include <linux/gfp.h> 22#include <linux/gfp.h>
20#include <linux/radix-tree.h> 23#include <linux/radix-tree.h>
21#include <linux/cpu.h> 24#include <linux/cpu.h>
25
22#include <asm/firmware.h> 26#include <asm/firmware.h>
23#include <asm/prom.h> 27#include <asm/prom.h>
24#include <asm/io.h> 28#include <asm/io.h>
@@ -31,26 +35,6 @@
31 35
32#include "xics.h" 36#include "xics.h"
33 37
34static unsigned int xics_startup(unsigned int irq);
35static void xics_enable_irq(unsigned int irq);
36static void xics_disable_irq(unsigned int irq);
37static void xics_mask_and_ack_irq(unsigned int irq);
38static void xics_end_irq(unsigned int irq);
39static void xics_set_affinity(unsigned int irq_nr, cpumask_t cpumask);
40
41static struct hw_interrupt_type xics_pic = {
42 .typename = " XICS ",
43 .startup = xics_startup,
44 .enable = xics_enable_irq,
45 .disable = xics_disable_irq,
46 .ack = xics_mask_and_ack_irq,
47 .end = xics_end_irq,
48 .set_affinity = xics_set_affinity
49};
50
51/* This is used to map real irq numbers to virtual */
52static struct radix_tree_root irq_map = RADIX_TREE_INIT(GFP_ATOMIC);
53
54#define XICS_IPI 2 38#define XICS_IPI 2
55#define XICS_IRQ_SPURIOUS 0 39#define XICS_IRQ_SPURIOUS 0
56 40
@@ -81,12 +65,12 @@ struct xics_ipl {
81 65
82static struct xics_ipl __iomem *xics_per_cpu[NR_CPUS]; 66static struct xics_ipl __iomem *xics_per_cpu[NR_CPUS];
83 67
84static int xics_irq_8259_cascade = 0;
85static int xics_irq_8259_cascade_real = 0;
86static unsigned int default_server = 0xFF; 68static unsigned int default_server = 0xFF;
87static unsigned int default_distrib_server = 0; 69static unsigned int default_distrib_server = 0;
88static unsigned int interrupt_server_size = 8; 70static unsigned int interrupt_server_size = 8;
89 71
72static struct irq_host *xics_host;
73
90/* 74/*
91 * XICS only has a single IPI, so encode the messages per CPU 75 * XICS only has a single IPI, so encode the messages per CPU
92 */ 76 */
@@ -98,48 +82,34 @@ static int ibm_set_xive;
98static int ibm_int_on; 82static int ibm_int_on;
99static int ibm_int_off; 83static int ibm_int_off;
100 84
101typedef struct {
102 int (*xirr_info_get)(int cpu);
103 void (*xirr_info_set)(int cpu, int val);
104 void (*cppr_info)(int cpu, u8 val);
105 void (*qirr_info)(int cpu, u8 val);
106} xics_ops;
107 85
86/* Direct HW low level accessors */
108 87
109/* SMP */
110 88
111static int pSeries_xirr_info_get(int n_cpu) 89static inline unsigned int direct_xirr_info_get(int n_cpu)
112{ 90{
113 return in_be32(&xics_per_cpu[n_cpu]->xirr.word); 91 return in_be32(&xics_per_cpu[n_cpu]->xirr.word);
114} 92}
115 93
116static void pSeries_xirr_info_set(int n_cpu, int value) 94static inline void direct_xirr_info_set(int n_cpu, int value)
117{ 95{
118 out_be32(&xics_per_cpu[n_cpu]->xirr.word, value); 96 out_be32(&xics_per_cpu[n_cpu]->xirr.word, value);
119} 97}
120 98
121static void pSeries_cppr_info(int n_cpu, u8 value) 99static inline void direct_cppr_info(int n_cpu, u8 value)
122{ 100{
123 out_8(&xics_per_cpu[n_cpu]->xirr.bytes[0], value); 101 out_8(&xics_per_cpu[n_cpu]->xirr.bytes[0], value);
124} 102}
125 103
126static void pSeries_qirr_info(int n_cpu, u8 value) 104static inline void direct_qirr_info(int n_cpu, u8 value)
127{ 105{
128 out_8(&xics_per_cpu[n_cpu]->qirr.bytes[0], value); 106 out_8(&xics_per_cpu[n_cpu]->qirr.bytes[0], value);
129} 107}
130 108
131static xics_ops pSeries_ops = {
132 pSeries_xirr_info_get,
133 pSeries_xirr_info_set,
134 pSeries_cppr_info,
135 pSeries_qirr_info
136};
137 109
138static xics_ops *ops = &pSeries_ops; 110/* LPAR low level accessors */
139 111
140 112
141/* LPAR */
142
143static inline long plpar_eoi(unsigned long xirr) 113static inline long plpar_eoi(unsigned long xirr)
144{ 114{
145 return plpar_hcall_norets(H_EOI, xirr); 115 return plpar_hcall_norets(H_EOI, xirr);
@@ -161,7 +131,7 @@ static inline long plpar_xirr(unsigned long *xirr_ret)
161 return plpar_hcall(H_XIRR, 0, 0, 0, 0, xirr_ret, &dummy, &dummy); 131 return plpar_hcall(H_XIRR, 0, 0, 0, 0, xirr_ret, &dummy, &dummy);
162} 132}
163 133
164static int pSeriesLP_xirr_info_get(int n_cpu) 134static inline unsigned int lpar_xirr_info_get(int n_cpu)
165{ 135{
166 unsigned long lpar_rc; 136 unsigned long lpar_rc;
167 unsigned long return_value; 137 unsigned long return_value;
@@ -169,10 +139,10 @@ static int pSeriesLP_xirr_info_get(int n_cpu)
169 lpar_rc = plpar_xirr(&return_value); 139 lpar_rc = plpar_xirr(&return_value);
170 if (lpar_rc != H_SUCCESS) 140 if (lpar_rc != H_SUCCESS)
171 panic(" bad return code xirr - rc = %lx \n", lpar_rc); 141 panic(" bad return code xirr - rc = %lx \n", lpar_rc);
172 return (int)return_value; 142 return (unsigned int)return_value;
173} 143}
174 144
175static void pSeriesLP_xirr_info_set(int n_cpu, int value) 145static inline void lpar_xirr_info_set(int n_cpu, int value)
176{ 146{
177 unsigned long lpar_rc; 147 unsigned long lpar_rc;
178 unsigned long val64 = value & 0xffffffff; 148 unsigned long val64 = value & 0xffffffff;
@@ -183,7 +153,7 @@ static void pSeriesLP_xirr_info_set(int n_cpu, int value)
183 val64); 153 val64);
184} 154}
185 155
186void pSeriesLP_cppr_info(int n_cpu, u8 value) 156static inline void lpar_cppr_info(int n_cpu, u8 value)
187{ 157{
188 unsigned long lpar_rc; 158 unsigned long lpar_rc;
189 159
@@ -192,7 +162,7 @@ void pSeriesLP_cppr_info(int n_cpu, u8 value)
192 panic("bad return code cppr - rc = %lx\n", lpar_rc); 162 panic("bad return code cppr - rc = %lx\n", lpar_rc);
193} 163}
194 164
195static void pSeriesLP_qirr_info(int n_cpu , u8 value) 165static inline void lpar_qirr_info(int n_cpu , u8 value)
196{ 166{
197 unsigned long lpar_rc; 167 unsigned long lpar_rc;
198 168
@@ -201,43 +171,16 @@ static void pSeriesLP_qirr_info(int n_cpu , u8 value)
201 panic("bad return code qirr - rc = %lx\n", lpar_rc); 171 panic("bad return code qirr - rc = %lx\n", lpar_rc);
202} 172}
203 173
204xics_ops pSeriesLP_ops = {
205 pSeriesLP_xirr_info_get,
206 pSeriesLP_xirr_info_set,
207 pSeriesLP_cppr_info,
208 pSeriesLP_qirr_info
209};
210
211static unsigned int xics_startup(unsigned int virq)
212{
213 unsigned int irq;
214
215 irq = irq_offset_down(virq);
216 if (radix_tree_insert(&irq_map, virt_irq_to_real(irq),
217 &virt_irq_to_real_map[irq]) == -ENOMEM)
218 printk(KERN_CRIT "Out of memory creating real -> virtual"
219 " IRQ mapping for irq %u (real 0x%x)\n",
220 virq, virt_irq_to_real(irq));
221 xics_enable_irq(virq);
222 return 0; /* return value is ignored */
223}
224 174
225static unsigned int real_irq_to_virt(unsigned int real_irq) 175/* High level handlers and init code */
226{
227 unsigned int *ptr;
228 176
229 ptr = radix_tree_lookup(&irq_map, real_irq);
230 if (ptr == NULL)
231 return NO_IRQ;
232 return ptr - virt_irq_to_real_map;
233}
234 177
235#ifdef CONFIG_SMP 178#ifdef CONFIG_SMP
236static int get_irq_server(unsigned int irq) 179static int get_irq_server(unsigned int virq)
237{ 180{
238 unsigned int server; 181 unsigned int server;
239 /* For the moment only implement delivery to all cpus or one cpu */ 182 /* For the moment only implement delivery to all cpus or one cpu */
240 cpumask_t cpumask = irq_desc[irq].affinity; 183 cpumask_t cpumask = irq_desc[virq].affinity;
241 cpumask_t tmp = CPU_MASK_NONE; 184 cpumask_t tmp = CPU_MASK_NONE;
242 185
243 if (!distribute_irqs) 186 if (!distribute_irqs)
@@ -258,23 +201,28 @@ static int get_irq_server(unsigned int irq)
258 201
259} 202}
260#else 203#else
261static int get_irq_server(unsigned int irq) 204static int get_irq_server(unsigned int virq)
262{ 205{
263 return default_server; 206 return default_server;
264} 207}
265#endif 208#endif
266 209
267static void xics_enable_irq(unsigned int virq) 210
211static void xics_unmask_irq(unsigned int virq)
268{ 212{
269 unsigned int irq; 213 unsigned int irq;
270 int call_status; 214 int call_status;
271 unsigned int server; 215 unsigned int server;
272 216
273 irq = virt_irq_to_real(irq_offset_down(virq)); 217 pr_debug("xics: unmask virq %d\n", virq);
274 if (irq == XICS_IPI) 218
219 irq = (unsigned int)irq_map[virq].hwirq;
220 pr_debug(" -> map to hwirq 0x%x\n", irq);
221 if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
275 return; 222 return;
276 223
277 server = get_irq_server(virq); 224 server = get_irq_server(virq);
225
278 call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server, 226 call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server,
279 DEFAULT_PRIORITY); 227 DEFAULT_PRIORITY);
280 if (call_status != 0) { 228 if (call_status != 0) {
@@ -293,7 +241,7 @@ static void xics_enable_irq(unsigned int virq)
293 } 241 }
294} 242}
295 243
296static void xics_disable_real_irq(unsigned int irq) 244static void xics_mask_real_irq(unsigned int irq)
297{ 245{
298 int call_status; 246 int call_status;
299 unsigned int server; 247 unsigned int server;
@@ -318,75 +266,86 @@ static void xics_disable_real_irq(unsigned int irq)
318 } 266 }
319} 267}
320 268
321static void xics_disable_irq(unsigned int virq) 269static void xics_mask_irq(unsigned int virq)
322{ 270{
323 unsigned int irq; 271 unsigned int irq;
324 272
325 irq = virt_irq_to_real(irq_offset_down(virq)); 273 pr_debug("xics: mask virq %d\n", virq);
326 xics_disable_real_irq(irq); 274
275 irq = (unsigned int)irq_map[virq].hwirq;
276 if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
277 return;
278 xics_mask_real_irq(irq);
279}
280
281static unsigned int xics_startup(unsigned int virq)
282{
283 unsigned int irq;
284
285 /* force a reverse mapping of the interrupt so it gets in the cache */
286 irq = (unsigned int)irq_map[virq].hwirq;
287 irq_radix_revmap(xics_host, irq);
288
289 /* unmask it */
290 xics_unmask_irq(virq);
291 return 0;
327} 292}
328 293
329static void xics_end_irq(unsigned int irq) 294static void xics_eoi_direct(unsigned int virq)
330{ 295{
331 int cpu = smp_processor_id(); 296 int cpu = smp_processor_id();
297 unsigned int irq = (unsigned int)irq_map[virq].hwirq;
332 298
333 iosync(); 299 iosync();
334 ops->xirr_info_set(cpu, ((0xff << 24) | 300 direct_xirr_info_set(cpu, (0xff << 24) | irq);
335 (virt_irq_to_real(irq_offset_down(irq)))));
336
337} 301}
338 302
339static void xics_mask_and_ack_irq(unsigned int irq) 303
304static void xics_eoi_lpar(unsigned int virq)
340{ 305{
341 int cpu = smp_processor_id(); 306 int cpu = smp_processor_id();
307 unsigned int irq = (unsigned int)irq_map[virq].hwirq;
342 308
343 if (irq < irq_offset_value()) { 309 iosync();
344 i8259_pic.ack(irq); 310 lpar_xirr_info_set(cpu, (0xff << 24) | irq);
345 iosync();
346 ops->xirr_info_set(cpu, ((0xff<<24) |
347 xics_irq_8259_cascade_real));
348 iosync();
349 }
350} 311}
351 312
352int xics_get_irq(struct pt_regs *regs) 313static inline unsigned int xics_remap_irq(unsigned int vec)
353{ 314{
354 unsigned int cpu = smp_processor_id(); 315 unsigned int irq;
355 unsigned int vec;
356 int irq;
357 316
358 vec = ops->xirr_info_get(cpu);
359 /* (vec >> 24) == old priority */
360 vec &= 0x00ffffff; 317 vec &= 0x00ffffff;
361 318
362 /* for sanity, this had better be < NR_IRQS - 16 */ 319 if (vec == XICS_IRQ_SPURIOUS)
363 if (vec == xics_irq_8259_cascade_real) { 320 return NO_IRQ;
364 irq = i8259_irq(regs); 321 irq = irq_radix_revmap(xics_host, vec);
365 xics_end_irq(irq_offset_up(xics_irq_8259_cascade)); 322 if (likely(irq != NO_IRQ))
366 } else if (vec == XICS_IRQ_SPURIOUS) { 323 return irq;
367 irq = -1; 324
368 } else { 325 printk(KERN_ERR "Interrupt %u (real) is invalid,"
369 irq = real_irq_to_virt(vec); 326 " disabling it.\n", vec);
370 if (irq == NO_IRQ) 327 xics_mask_real_irq(vec);
371 irq = real_irq_to_virt_slowpath(vec); 328 return NO_IRQ;
372 if (irq == NO_IRQ) {
373 printk(KERN_ERR "Interrupt %u (real) is invalid,"
374 " disabling it.\n", vec);
375 xics_disable_real_irq(vec);
376 } else
377 irq = irq_offset_up(irq);
378 }
379 return irq;
380} 329}
381 330
382#ifdef CONFIG_SMP 331static unsigned int xics_get_irq_direct(struct pt_regs *regs)
332{
333 unsigned int cpu = smp_processor_id();
383 334
384static irqreturn_t xics_ipi_action(int irq, void *dev_id, struct pt_regs *regs) 335 return xics_remap_irq(direct_xirr_info_get(cpu));
336}
337
338static unsigned int xics_get_irq_lpar(struct pt_regs *regs)
385{ 339{
386 int cpu = smp_processor_id(); 340 unsigned int cpu = smp_processor_id();
341
342 return xics_remap_irq(lpar_xirr_info_get(cpu));
343}
387 344
388 ops->qirr_info(cpu, 0xff); 345#ifdef CONFIG_SMP
389 346
347static irqreturn_t xics_ipi_dispatch(int cpu, struct pt_regs *regs)
348{
390 WARN_ON(cpu_is_offline(cpu)); 349 WARN_ON(cpu_is_offline(cpu));
391 350
392 while (xics_ipi_message[cpu].value) { 351 while (xics_ipi_message[cpu].value) {
@@ -418,18 +377,88 @@ static irqreturn_t xics_ipi_action(int irq, void *dev_id, struct pt_regs *regs)
418 return IRQ_HANDLED; 377 return IRQ_HANDLED;
419} 378}
420 379
380static irqreturn_t xics_ipi_action_direct(int irq, void *dev_id, struct pt_regs *regs)
381{
382 int cpu = smp_processor_id();
383
384 direct_qirr_info(cpu, 0xff);
385
386 return xics_ipi_dispatch(cpu, regs);
387}
388
389static irqreturn_t xics_ipi_action_lpar(int irq, void *dev_id, struct pt_regs *regs)
390{
391 int cpu = smp_processor_id();
392
393 lpar_qirr_info(cpu, 0xff);
394
395 return xics_ipi_dispatch(cpu, regs);
396}
397
421void xics_cause_IPI(int cpu) 398void xics_cause_IPI(int cpu)
422{ 399{
423 ops->qirr_info(cpu, IPI_PRIORITY); 400 if (firmware_has_feature(FW_FEATURE_LPAR))
401 lpar_qirr_info(cpu, IPI_PRIORITY);
402 else
403 direct_qirr_info(cpu, IPI_PRIORITY);
424} 404}
405
425#endif /* CONFIG_SMP */ 406#endif /* CONFIG_SMP */
426 407
408static void xics_set_cpu_priority(int cpu, unsigned char cppr)
409{
410 if (firmware_has_feature(FW_FEATURE_LPAR))
411 lpar_cppr_info(cpu, cppr);
412 else
413 direct_cppr_info(cpu, cppr);
414 iosync();
415}
416
417static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
418{
419 unsigned int irq;
420 int status;
421 int xics_status[2];
422 unsigned long newmask;
423 cpumask_t tmp = CPU_MASK_NONE;
424
425 irq = (unsigned int)irq_map[virq].hwirq;
426 if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
427 return;
428
429 status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq);
430
431 if (status) {
432 printk(KERN_ERR "xics_set_affinity: irq=%u ibm,get-xive "
433 "returns %d\n", irq, status);
434 return;
435 }
436
437 /* For the moment only implement delivery to all cpus or one cpu */
438 if (cpus_equal(cpumask, CPU_MASK_ALL)) {
439 newmask = default_distrib_server;
440 } else {
441 cpus_and(tmp, cpu_online_map, cpumask);
442 if (cpus_empty(tmp))
443 return;
444 newmask = get_hard_smp_processor_id(first_cpu(tmp));
445 }
446
447 status = rtas_call(ibm_set_xive, 3, 1, NULL,
448 irq, newmask, xics_status[1]);
449
450 if (status) {
451 printk(KERN_ERR "xics_set_affinity: irq=%u ibm,set-xive "
452 "returns %d\n", irq, status);
453 return;
454 }
455}
456
427void xics_setup_cpu(void) 457void xics_setup_cpu(void)
428{ 458{
429 int cpu = smp_processor_id(); 459 int cpu = smp_processor_id();
430 460
431 ops->cppr_info(cpu, 0xff); 461 xics_set_cpu_priority(cpu, 0xff);
432 iosync();
433 462
434 /* 463 /*
435 * Put the calling processor into the GIQ. This is really only 464 * Put the calling processor into the GIQ. This is really only
@@ -442,72 +471,266 @@ void xics_setup_cpu(void)
442 (1UL << interrupt_server_size) - 1 - default_distrib_server, 1); 471 (1UL << interrupt_server_size) - 1 - default_distrib_server, 1);
443} 472}
444 473
445void xics_init_IRQ(void) 474
475static struct irq_chip xics_pic_direct = {
476 .typename = " XICS ",
477 .startup = xics_startup,
478 .mask = xics_mask_irq,
479 .unmask = xics_unmask_irq,
480 .eoi = xics_eoi_direct,
481 .set_affinity = xics_set_affinity
482};
483
484
485static struct irq_chip xics_pic_lpar = {
486 .typename = " XICS ",
487 .startup = xics_startup,
488 .mask = xics_mask_irq,
489 .unmask = xics_unmask_irq,
490 .eoi = xics_eoi_lpar,
491 .set_affinity = xics_set_affinity
492};
493
494
495static int xics_host_match(struct irq_host *h, struct device_node *node)
496{
497 /* IBM machines have interrupt parents of various funky types for things
498 * like vdevices, events, etc... The trick we use here is to match
499 * everything here except the legacy 8259 which is compatible "chrp,iic"
500 */
501 return !device_is_compatible(node, "chrp,iic");
502}
503
504static int xics_host_map_direct(struct irq_host *h, unsigned int virq,
505 irq_hw_number_t hw, unsigned int flags)
506{
507 unsigned int sense = flags & IRQ_TYPE_SENSE_MASK;
508
509 pr_debug("xics: map_direct virq %d, hwirq 0x%lx, flags: 0x%x\n",
510 virq, hw, flags);
511
512 if (sense && sense != IRQ_TYPE_LEVEL_LOW)
513 printk(KERN_WARNING "xics: using unsupported sense 0x%x"
514 " for irq %d (h: 0x%lx)\n", flags, virq, hw);
515
516 get_irq_desc(virq)->status |= IRQ_LEVEL;
517 set_irq_chip_and_handler(virq, &xics_pic_direct, handle_fasteoi_irq);
518 return 0;
519}
520
521static int xics_host_map_lpar(struct irq_host *h, unsigned int virq,
522 irq_hw_number_t hw, unsigned int flags)
523{
524 unsigned int sense = flags & IRQ_TYPE_SENSE_MASK;
525
526 pr_debug("xics: map_lpar virq %d, hwirq 0x%lx, flags: 0x%x\n",
527 virq, hw, flags);
528
529 if (sense && sense != IRQ_TYPE_LEVEL_LOW)
530 printk(KERN_WARNING "xics: using unsupported sense 0x%x"
531 " for irq %d (h: 0x%lx)\n", flags, virq, hw);
532
533 get_irq_desc(virq)->status |= IRQ_LEVEL;
534 set_irq_chip_and_handler(virq, &xics_pic_lpar, handle_fasteoi_irq);
535 return 0;
536}
537
538static int xics_host_xlate(struct irq_host *h, struct device_node *ct,
539 u32 *intspec, unsigned int intsize,
540 irq_hw_number_t *out_hwirq, unsigned int *out_flags)
541
542{
543 /* Current xics implementation translates everything
544 * to level. It is not technically right for MSIs but this
545 * is irrelevant at this point. We might get smarter in the future
546 */
547 *out_hwirq = intspec[0];
548 *out_flags = IRQ_TYPE_LEVEL_LOW;
549
550 return 0;
551}
552
553static struct irq_host_ops xics_host_direct_ops = {
554 .match = xics_host_match,
555 .map = xics_host_map_direct,
556 .xlate = xics_host_xlate,
557};
558
559static struct irq_host_ops xics_host_lpar_ops = {
560 .match = xics_host_match,
561 .map = xics_host_map_lpar,
562 .xlate = xics_host_xlate,
563};
564
565static void __init xics_init_host(void)
566{
567 struct irq_host_ops *ops;
568
569 if (firmware_has_feature(FW_FEATURE_LPAR))
570 ops = &xics_host_lpar_ops;
571 else
572 ops = &xics_host_direct_ops;
573 xics_host = irq_alloc_host(IRQ_HOST_MAP_TREE, 0, ops,
574 XICS_IRQ_SPURIOUS);
575 BUG_ON(xics_host == NULL);
576 irq_set_default_host(xics_host);
577}
578
579static void __init xics_map_one_cpu(int hw_id, unsigned long addr,
580 unsigned long size)
446{ 581{
582#ifdef CONFIG_SMP
447 int i; 583 int i;
448 unsigned long intr_size = 0;
449 struct device_node *np;
450 uint *ireg, ilen, indx = 0;
451 unsigned long intr_base = 0;
452 struct xics_interrupt_node {
453 unsigned long addr;
454 unsigned long size;
455 } intnodes[NR_CPUS];
456 584
457 ppc64_boot_msg(0x20, "XICS Init"); 585 /* This may look gross but it's good enough for now, we don't quite
586 * have a hard -> linux processor id matching.
587 */
588 for_each_possible_cpu(i) {
589 if (!cpu_present(i))
590 continue;
591 if (hw_id == get_hard_smp_processor_id(i)) {
592 xics_per_cpu[i] = ioremap(addr, size);
593 return;
594 }
595 }
596#else
597 if (hw_id != 0)
598 return;
599 xics_per_cpu[0] = ioremap(addr, size);
600#endif /* CONFIG_SMP */
601}
458 602
459 ibm_get_xive = rtas_token("ibm,get-xive"); 603static void __init xics_init_one_node(struct device_node *np,
460 ibm_set_xive = rtas_token("ibm,set-xive"); 604 unsigned int *indx)
461 ibm_int_on = rtas_token("ibm,int-on"); 605{
462 ibm_int_off = rtas_token("ibm,int-off"); 606 unsigned int ilen;
607 u32 *ireg;
463 608
464 np = of_find_node_by_type(NULL, "PowerPC-External-Interrupt-Presentation"); 609 /* This code does the theorically broken assumption that the interrupt
465 if (!np) 610 * server numbers are the same as the hard CPU numbers.
466 panic("xics_init_IRQ: can't find interrupt presentation"); 611 * This happens to be the case so far but we are playing with fire...
612 * should be fixed one of these days. -BenH.
613 */
614 ireg = (u32 *)get_property(np, "ibm,interrupt-server-ranges", NULL);
467 615
468nextnode: 616 /* Do that ever happen ? we'll know soon enough... but even good'old
469 ireg = (uint *)get_property(np, "ibm,interrupt-server-ranges", NULL); 617 * f80 does have that property ..
618 */
619 WARN_ON(ireg == NULL);
470 if (ireg) { 620 if (ireg) {
471 /* 621 /*
472 * set node starting index for this node 622 * set node starting index for this node
473 */ 623 */
474 indx = *ireg; 624 *indx = *ireg;
475 } 625 }
476 626 ireg = (u32 *)get_property(np, "reg", &ilen);
477 ireg = (uint *)get_property(np, "reg", &ilen);
478 if (!ireg) 627 if (!ireg)
479 panic("xics_init_IRQ: can't find interrupt reg property"); 628 panic("xics_init_IRQ: can't find interrupt reg property");
480 629
481 while (ilen) { 630 while (ilen >= (4 * sizeof(u32))) {
482 intnodes[indx].addr = (unsigned long)*ireg++ << 32; 631 unsigned long addr, size;
483 ilen -= sizeof(uint); 632
484 intnodes[indx].addr |= *ireg++; 633 /* XXX Use proper OF parsing code here !!! */
485 ilen -= sizeof(uint); 634 addr = (unsigned long)*ireg++ << 32;
486 intnodes[indx].size = (unsigned long)*ireg++ << 32; 635 ilen -= sizeof(u32);
487 ilen -= sizeof(uint); 636 addr |= *ireg++;
488 intnodes[indx].size |= *ireg++; 637 ilen -= sizeof(u32);
489 ilen -= sizeof(uint); 638 size = (unsigned long)*ireg++ << 32;
490 indx++; 639 ilen -= sizeof(u32);
491 if (indx >= NR_CPUS) break; 640 size |= *ireg++;
641 ilen -= sizeof(u32);
642 xics_map_one_cpu(*indx, addr, size);
643 (*indx)++;
644 }
645}
646
647
648static void __init xics_setup_8259_cascade(void)
649{
650 struct device_node *np, *old, *found = NULL;
651 int cascade, naddr;
652 u32 *addrp;
653 unsigned long intack = 0;
654
655 for_each_node_by_type(np, "interrupt-controller")
656 if (device_is_compatible(np, "chrp,iic")) {
657 found = np;
658 break;
659 }
660 if (found == NULL) {
661 printk(KERN_DEBUG "xics: no ISA interrupt controller\n");
662 return;
663 }
664 cascade = irq_of_parse_and_map(found, 0);
665 if (cascade == NO_IRQ) {
666 printk(KERN_ERR "xics: failed to map cascade interrupt");
667 return;
668 }
669 pr_debug("xics: cascade mapped to irq %d\n", cascade);
670
671 for (old = of_node_get(found); old != NULL ; old = np) {
672 np = of_get_parent(old);
673 of_node_put(old);
674 if (np == NULL)
675 break;
676 if (strcmp(np->name, "pci") != 0)
677 continue;
678 addrp = (u32 *)get_property(np, "8259-interrupt-acknowledge", NULL);
679 if (addrp == NULL)
680 continue;
681 naddr = prom_n_addr_cells(np);
682 intack = addrp[naddr-1];
683 if (naddr > 1)
684 intack |= ((unsigned long)addrp[naddr-2]) << 32;
685 }
686 if (intack)
687 printk(KERN_DEBUG "xics: PCI 8259 intack at 0x%016lx\n", intack);
688 i8259_init(found, intack);
689 of_node_put(found);
690 set_irq_chained_handler(cascade, pseries_8259_cascade);
691}
692
693void __init xics_init_IRQ(void)
694{
695 int i;
696 struct device_node *np;
697 u32 *ireg, ilen, indx = 0;
698 int found = 0;
699
700 ppc64_boot_msg(0x20, "XICS Init");
701
702 ibm_get_xive = rtas_token("ibm,get-xive");
703 ibm_set_xive = rtas_token("ibm,set-xive");
704 ibm_int_on = rtas_token("ibm,int-on");
705 ibm_int_off = rtas_token("ibm,int-off");
706
707 for_each_node_by_type(np, "PowerPC-External-Interrupt-Presentation") {
708 found = 1;
709 if (firmware_has_feature(FW_FEATURE_LPAR))
710 break;
711 xics_init_one_node(np, &indx);
492 } 712 }
713 if (found == 0)
714 return;
493 715
494 np = of_find_node_by_type(np, "PowerPC-External-Interrupt-Presentation"); 716 xics_init_host();
495 if ((indx < NR_CPUS) && np) goto nextnode;
496 717
497 /* Find the server numbers for the boot cpu. */ 718 /* Find the server numbers for the boot cpu. */
498 for (np = of_find_node_by_type(NULL, "cpu"); 719 for (np = of_find_node_by_type(NULL, "cpu");
499 np; 720 np;
500 np = of_find_node_by_type(np, "cpu")) { 721 np = of_find_node_by_type(np, "cpu")) {
501 ireg = (uint *)get_property(np, "reg", &ilen); 722 ireg = (u32 *)get_property(np, "reg", &ilen);
502 if (ireg && ireg[0] == get_hard_smp_processor_id(boot_cpuid)) { 723 if (ireg && ireg[0] == get_hard_smp_processor_id(boot_cpuid)) {
503 ireg = (uint *)get_property(np, "ibm,ppc-interrupt-gserver#s", 724 ireg = (u32 *)get_property(np,
504 &ilen); 725 "ibm,ppc-interrupt-gserver#s",
726 &ilen);
505 i = ilen / sizeof(int); 727 i = ilen / sizeof(int);
506 if (ireg && i > 0) { 728 if (ireg && i > 0) {
507 default_server = ireg[0]; 729 default_server = ireg[0];
508 default_distrib_server = ireg[i-1]; /* take last element */ 730 /* take last element */
731 default_distrib_server = ireg[i-1];
509 } 732 }
510 ireg = (uint *)get_property(np, 733 ireg = (u32 *)get_property(np,
511 "ibm,interrupt-server#-size", NULL); 734 "ibm,interrupt-server#-size", NULL);
512 if (ireg) 735 if (ireg)
513 interrupt_server_size = *ireg; 736 interrupt_server_size = *ireg;
@@ -516,135 +739,48 @@ nextnode:
516 } 739 }
517 of_node_put(np); 740 of_node_put(np);
518 741
519 intr_base = intnodes[0].addr;
520 intr_size = intnodes[0].size;
521
522 np = of_find_node_by_type(NULL, "interrupt-controller");
523 if (!np) {
524 printk(KERN_DEBUG "xics: no ISA interrupt controller\n");
525 xics_irq_8259_cascade_real = -1;
526 xics_irq_8259_cascade = -1;
527 } else {
528 ireg = (uint *) get_property(np, "interrupts", NULL);
529 if (!ireg)
530 panic("xics_init_IRQ: can't find ISA interrupts property");
531
532 xics_irq_8259_cascade_real = *ireg;
533 xics_irq_8259_cascade
534 = virt_irq_create_mapping(xics_irq_8259_cascade_real);
535 i8259_init(0, 0);
536 of_node_put(np);
537 }
538
539 if (firmware_has_feature(FW_FEATURE_LPAR)) 742 if (firmware_has_feature(FW_FEATURE_LPAR))
540 ops = &pSeriesLP_ops; 743 ppc_md.get_irq = xics_get_irq_lpar;
541 else { 744 else
542#ifdef CONFIG_SMP 745 ppc_md.get_irq = xics_get_irq_direct;
543 for_each_possible_cpu(i) {
544 int hard_id;
545
546 /* FIXME: Do this dynamically! --RR */
547 if (!cpu_present(i))
548 continue;
549
550 hard_id = get_hard_smp_processor_id(i);
551 xics_per_cpu[i] = ioremap(intnodes[hard_id].addr,
552 intnodes[hard_id].size);
553 }
554#else
555 xics_per_cpu[0] = ioremap(intr_base, intr_size);
556#endif /* CONFIG_SMP */
557 }
558
559 for (i = irq_offset_value(); i < NR_IRQS; ++i)
560 get_irq_desc(i)->chip = &xics_pic;
561 746
562 xics_setup_cpu(); 747 xics_setup_cpu();
563 748
749 xics_setup_8259_cascade();
750
564 ppc64_boot_msg(0x21, "XICS Done"); 751 ppc64_boot_msg(0x21, "XICS Done");
565} 752}
566 753
567/*
568 * We cant do this in init_IRQ because we need the memory subsystem up for
569 * request_irq()
570 */
571static int __init xics_setup_i8259(void)
572{
573 if (ppc64_interrupt_controller == IC_PPC_XIC &&
574 xics_irq_8259_cascade != -1) {
575 if (request_irq(irq_offset_up(xics_irq_8259_cascade),
576 no_action, 0, "8259 cascade", NULL))
577 printk(KERN_ERR "xics_setup_i8259: couldn't get 8259 "
578 "cascade\n");
579 }
580 return 0;
581}
582arch_initcall(xics_setup_i8259);
583 754
584#ifdef CONFIG_SMP 755#ifdef CONFIG_SMP
585void xics_request_IPIs(void) 756void xics_request_IPIs(void)
586{ 757{
587 virt_irq_to_real_map[XICS_IPI] = XICS_IPI; 758 unsigned int ipi;
759
760 ipi = irq_create_mapping(xics_host, XICS_IPI, 0);
761 BUG_ON(ipi == NO_IRQ);
588 762
589 /* 763 /*
590 * IPIs are marked IRQF_DISABLED as they must run with irqs 764 * IPIs are marked IRQF_DISABLED as they must run with irqs
591 * disabled 765 * disabled
592 */ 766 */
593 request_irq(irq_offset_up(XICS_IPI), xics_ipi_action, 767 set_irq_handler(ipi, handle_percpu_irq);
594 IRQF_DISABLED, "IPI", NULL); 768 if (firmware_has_feature(FW_FEATURE_LPAR))
595 get_irq_desc(irq_offset_up(XICS_IPI))->status |= IRQ_PER_CPU; 769 request_irq(ipi, xics_ipi_action_lpar, IRQF_DISABLED,
596} 770 "IPI", NULL);
597#endif 771 else
598 772 request_irq(ipi, xics_ipi_action_direct, IRQF_DISABLED,
599static void xics_set_affinity(unsigned int virq, cpumask_t cpumask) 773 "IPI", NULL);
600{
601 unsigned int irq;
602 int status;
603 int xics_status[2];
604 unsigned long newmask;
605 cpumask_t tmp = CPU_MASK_NONE;
606
607 irq = virt_irq_to_real(irq_offset_down(virq));
608 if (irq == XICS_IPI || irq == NO_IRQ)
609 return;
610
611 status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq);
612
613 if (status) {
614 printk(KERN_ERR "xics_set_affinity: irq=%u ibm,get-xive "
615 "returns %d\n", irq, status);
616 return;
617 }
618
619 /* For the moment only implement delivery to all cpus or one cpu */
620 if (cpus_equal(cpumask, CPU_MASK_ALL)) {
621 newmask = default_distrib_server;
622 } else {
623 cpus_and(tmp, cpu_online_map, cpumask);
624 if (cpus_empty(tmp))
625 return;
626 newmask = get_hard_smp_processor_id(first_cpu(tmp));
627 }
628
629 status = rtas_call(ibm_set_xive, 3, 1, NULL,
630 irq, newmask, xics_status[1]);
631
632 if (status) {
633 printk(KERN_ERR "xics_set_affinity: irq=%u ibm,set-xive "
634 "returns %d\n", irq, status);
635 return;
636 }
637} 774}
775#endif /* CONFIG_SMP */
638 776
639void xics_teardown_cpu(int secondary) 777void xics_teardown_cpu(int secondary)
640{ 778{
641 int cpu = smp_processor_id(); 779 int cpu = smp_processor_id();
780 unsigned int ipi;
781 struct irq_desc *desc;
642 782
643 ops->cppr_info(cpu, 0x00); 783 xics_set_cpu_priority(cpu, 0);
644 iosync();
645
646 /* Clear IPI */
647 ops->qirr_info(cpu, 0xff);
648 784
649 /* 785 /*
650 * we need to EOI the IPI if we got here from kexec down IPI 786 * we need to EOI the IPI if we got here from kexec down IPI
@@ -653,7 +789,13 @@ void xics_teardown_cpu(int secondary)
653 * should we be flagging idle loop instead? 789 * should we be flagging idle loop instead?
654 * or creating some task to be scheduled? 790 * or creating some task to be scheduled?
655 */ 791 */
656 ops->xirr_info_set(cpu, XICS_IPI); 792
793 ipi = irq_find_mapping(xics_host, XICS_IPI);
794 if (ipi == XICS_IRQ_SPURIOUS)
795 return;
796 desc = get_irq_desc(ipi);
797 if (desc->chip && desc->chip->eoi)
798 desc->chip->eoi(XICS_IPI);
657 799
658 /* 800 /*
659 * Some machines need to have at least one cpu in the GIQ, 801 * Some machines need to have at least one cpu in the GIQ,
@@ -661,8 +803,8 @@ void xics_teardown_cpu(int secondary)
661 */ 803 */
662 if (secondary) 804 if (secondary)
663 rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE, 805 rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE,
664 (1UL << interrupt_server_size) - 1 - 806 (1UL << interrupt_server_size) - 1 -
665 default_distrib_server, 0); 807 default_distrib_server, 0);
666} 808}
667 809
668#ifdef CONFIG_HOTPLUG_CPU 810#ifdef CONFIG_HOTPLUG_CPU
@@ -674,8 +816,7 @@ void xics_migrate_irqs_away(void)
674 unsigned int irq, virq, cpu = smp_processor_id(); 816 unsigned int irq, virq, cpu = smp_processor_id();
675 817
676 /* Reject any interrupt that was queued to us... */ 818 /* Reject any interrupt that was queued to us... */
677 ops->cppr_info(cpu, 0); 819 xics_set_cpu_priority(cpu, 0);
678 iosync();
679 820
680 /* remove ourselves from the global interrupt queue */ 821 /* remove ourselves from the global interrupt queue */
681 status = rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE, 822 status = rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE,
@@ -683,24 +824,23 @@ void xics_migrate_irqs_away(void)
683 WARN_ON(status < 0); 824 WARN_ON(status < 0);
684 825
685 /* Allow IPIs again... */ 826 /* Allow IPIs again... */
686 ops->cppr_info(cpu, DEFAULT_PRIORITY); 827 xics_set_cpu_priority(cpu, DEFAULT_PRIORITY);
687 iosync();
688 828
689 for_each_irq(virq) { 829 for_each_irq(virq) {
690 irq_desc_t *desc; 830 struct irq_desc *desc;
691 int xics_status[2]; 831 int xics_status[2];
692 unsigned long flags; 832 unsigned long flags;
693 833
694 /* We cant set affinity on ISA interrupts */ 834 /* We cant set affinity on ISA interrupts */
695 if (virq < irq_offset_value()) 835 if (virq < NUM_ISA_INTERRUPTS)
696 continue; 836 continue;
697 837 if (irq_map[virq].host != xics_host)
698 desc = get_irq_desc(virq); 838 continue;
699 irq = virt_irq_to_real(irq_offset_down(virq)); 839 irq = (unsigned int)irq_map[virq].hwirq;
700
701 /* We need to get IPIs still. */ 840 /* We need to get IPIs still. */
702 if (irq == XICS_IPI || irq == NO_IRQ) 841 if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
703 continue; 842 continue;
843 desc = get_irq_desc(virq);
704 844
705 /* We only need to migrate enabled IRQS */ 845 /* We only need to migrate enabled IRQS */
706 if (desc == NULL || desc->chip == NULL 846 if (desc == NULL || desc->chip == NULL
diff --git a/arch/powerpc/platforms/pseries/xics.h b/arch/powerpc/platforms/pseries/xics.h
index e14c70868f1d..6ee1055b0ffb 100644
--- a/arch/powerpc/platforms/pseries/xics.h
+++ b/arch/powerpc/platforms/pseries/xics.h
@@ -14,13 +14,12 @@
14 14
15#include <linux/cache.h> 15#include <linux/cache.h>
16 16
17void xics_init_IRQ(void); 17extern void xics_init_IRQ(void);
18int xics_get_irq(struct pt_regs *); 18extern void xics_setup_cpu(void);
19void xics_setup_cpu(void); 19extern void xics_teardown_cpu(int secondary);
20void xics_teardown_cpu(int secondary); 20extern void xics_cause_IPI(int cpu);
21void xics_cause_IPI(int cpu); 21extern void xics_request_IPIs(void);
22void xics_request_IPIs(void); 22extern void xics_migrate_irqs_away(void);
23void xics_migrate_irqs_away(void);
24 23
25/* first argument is ignored for now*/ 24/* first argument is ignored for now*/
26void pSeriesLP_cppr_info(int n_cpu, u8 value); 25void pSeriesLP_cppr_info(int n_cpu, u8 value);
@@ -31,4 +30,8 @@ struct xics_ipi_struct {
31 30
32extern struct xics_ipi_struct xics_ipi_message[NR_CPUS] __cacheline_aligned; 31extern struct xics_ipi_struct xics_ipi_message[NR_CPUS] __cacheline_aligned;
33 32
33struct irq_desc;
34extern void pseries_8259_cascade(unsigned int irq, struct irq_desc *desc,
35 struct pt_regs *regs);
36
34#endif /* _POWERPC_KERNEL_XICS_H */ 37#endif /* _POWERPC_KERNEL_XICS_H */
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index 054bd8b41ef5..cebfae242602 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -4,7 +4,6 @@ endif
4 4
5obj-$(CONFIG_MPIC) += mpic.o 5obj-$(CONFIG_MPIC) += mpic.o
6obj-$(CONFIG_PPC_INDIRECT_PCI) += indirect_pci.o 6obj-$(CONFIG_PPC_INDIRECT_PCI) += indirect_pci.o
7obj-$(CONFIG_PPC_I8259) += i8259.o
8obj-$(CONFIG_PPC_MPC106) += grackle.o 7obj-$(CONFIG_PPC_MPC106) += grackle.o
9obj-$(CONFIG_BOOKE) += dcr.o 8obj-$(CONFIG_BOOKE) += dcr.o
10obj-$(CONFIG_40x) += dcr.o 9obj-$(CONFIG_40x) += dcr.o
@@ -14,3 +13,7 @@ obj-$(CONFIG_PPC_83xx) += ipic.o
14obj-$(CONFIG_FSL_SOC) += fsl_soc.o 13obj-$(CONFIG_FSL_SOC) += fsl_soc.o
15obj-$(CONFIG_PPC_TODC) += todc.o 14obj-$(CONFIG_PPC_TODC) += todc.o
16obj-$(CONFIG_TSI108_BRIDGE) += tsi108_pci.o tsi108_dev.o 15obj-$(CONFIG_TSI108_BRIDGE) += tsi108_pci.o tsi108_dev.o
16
17ifeq ($(CONFIG_PPC_MERGE),y)
18obj-$(CONFIG_PPC_I8259) += i8259.o
19 endif
diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c
index 1a3ef1ab9d6e..72c73a6105cd 100644
--- a/arch/powerpc/sysdev/i8259.c
+++ b/arch/powerpc/sysdev/i8259.c
@@ -6,11 +6,16 @@
6 * as published by the Free Software Foundation; either version 6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version. 7 * 2 of the License, or (at your option) any later version.
8 */ 8 */
9#undef DEBUG
10
9#include <linux/init.h> 11#include <linux/init.h>
10#include <linux/ioport.h> 12#include <linux/ioport.h>
11#include <linux/interrupt.h> 13#include <linux/interrupt.h>
14#include <linux/kernel.h>
15#include <linux/delay.h>
12#include <asm/io.h> 16#include <asm/io.h>
13#include <asm/i8259.h> 17#include <asm/i8259.h>
18#include <asm/prom.h>
14 19
15static volatile void __iomem *pci_intack; /* RO, gives us the irq vector */ 20static volatile void __iomem *pci_intack; /* RO, gives us the irq vector */
16 21
@@ -20,7 +25,8 @@ static unsigned char cached_8259[2] = { 0xff, 0xff };
20 25
21static DEFINE_SPINLOCK(i8259_lock); 26static DEFINE_SPINLOCK(i8259_lock);
22 27
23static int i8259_pic_irq_offset; 28static struct device_node *i8259_node;
29static struct irq_host *i8259_host;
24 30
25/* 31/*
26 * Acknowledge the IRQ using either the PCI host bridge's interrupt 32 * Acknowledge the IRQ using either the PCI host bridge's interrupt
@@ -28,16 +34,18 @@ static int i8259_pic_irq_offset;
28 * which is called. It should be noted that polling is broken on some 34 * which is called. It should be noted that polling is broken on some
29 * IBM and Motorola PReP boxes so we must use the int-ack feature on them. 35 * IBM and Motorola PReP boxes so we must use the int-ack feature on them.
30 */ 36 */
31int i8259_irq(struct pt_regs *regs) 37unsigned int i8259_irq(struct pt_regs *regs)
32{ 38{
33 int irq; 39 int irq;
34 40 int lock = 0;
35 spin_lock(&i8259_lock);
36 41
37 /* Either int-ack or poll for the IRQ */ 42 /* Either int-ack or poll for the IRQ */
38 if (pci_intack) 43 if (pci_intack)
39 irq = readb(pci_intack); 44 irq = readb(pci_intack);
40 else { 45 else {
46 spin_lock(&i8259_lock);
47 lock = 1;
48
41 /* Perform an interrupt acknowledge cycle on controller 1. */ 49 /* Perform an interrupt acknowledge cycle on controller 1. */
42 outb(0x0C, 0x20); /* prepare for poll */ 50 outb(0x0C, 0x20); /* prepare for poll */
43 irq = inb(0x20) & 7; 51 irq = inb(0x20) & 7;
@@ -62,16 +70,13 @@ int i8259_irq(struct pt_regs *regs)
62 if (!pci_intack) 70 if (!pci_intack)
63 outb(0x0B, 0x20); /* ISR register */ 71 outb(0x0B, 0x20); /* ISR register */
64 if(~inb(0x20) & 0x80) 72 if(~inb(0x20) & 0x80)
65 irq = -1; 73 irq = NO_IRQ;
66 } 74 } else if (irq == 0xff)
75 irq = NO_IRQ;
67 76
68 spin_unlock(&i8259_lock); 77 if (lock)
69 return irq + i8259_pic_irq_offset; 78 spin_unlock(&i8259_lock);
70} 79 return irq;
71
72int i8259_irq_cascade(struct pt_regs *regs, void *unused)
73{
74 return i8259_irq(regs);
75} 80}
76 81
77static void i8259_mask_and_ack_irq(unsigned int irq_nr) 82static void i8259_mask_and_ack_irq(unsigned int irq_nr)
@@ -79,7 +84,6 @@ static void i8259_mask_and_ack_irq(unsigned int irq_nr)
79 unsigned long flags; 84 unsigned long flags;
80 85
81 spin_lock_irqsave(&i8259_lock, flags); 86 spin_lock_irqsave(&i8259_lock, flags);
82 irq_nr -= i8259_pic_irq_offset;
83 if (irq_nr > 7) { 87 if (irq_nr > 7) {
84 cached_A1 |= 1 << (irq_nr-8); 88 cached_A1 |= 1 << (irq_nr-8);
85 inb(0xA1); /* DUMMY */ 89 inb(0xA1); /* DUMMY */
@@ -105,8 +109,9 @@ static void i8259_mask_irq(unsigned int irq_nr)
105{ 109{
106 unsigned long flags; 110 unsigned long flags;
107 111
112 pr_debug("i8259_mask_irq(%d)\n", irq_nr);
113
108 spin_lock_irqsave(&i8259_lock, flags); 114 spin_lock_irqsave(&i8259_lock, flags);
109 irq_nr -= i8259_pic_irq_offset;
110 if (irq_nr < 8) 115 if (irq_nr < 8)
111 cached_21 |= 1 << irq_nr; 116 cached_21 |= 1 << irq_nr;
112 else 117 else
@@ -119,8 +124,9 @@ static void i8259_unmask_irq(unsigned int irq_nr)
119{ 124{
120 unsigned long flags; 125 unsigned long flags;
121 126
127 pr_debug("i8259_unmask_irq(%d)\n", irq_nr);
128
122 spin_lock_irqsave(&i8259_lock, flags); 129 spin_lock_irqsave(&i8259_lock, flags);
123 irq_nr -= i8259_pic_irq_offset;
124 if (irq_nr < 8) 130 if (irq_nr < 8)
125 cached_21 &= ~(1 << irq_nr); 131 cached_21 &= ~(1 << irq_nr);
126 else 132 else
@@ -129,19 +135,11 @@ static void i8259_unmask_irq(unsigned int irq_nr)
129 spin_unlock_irqrestore(&i8259_lock, flags); 135 spin_unlock_irqrestore(&i8259_lock, flags);
130} 136}
131 137
132static void i8259_end_irq(unsigned int irq) 138static struct irq_chip i8259_pic = {
133{ 139 .typename = " i8259 ",
134 if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)) 140 .mask = i8259_mask_irq,
135 && irq_desc[irq].action) 141 .unmask = i8259_unmask_irq,
136 i8259_unmask_irq(irq); 142 .mask_ack = i8259_mask_and_ack_irq,
137}
138
139struct hw_interrupt_type i8259_pic = {
140 .typename = " i8259 ",
141 .enable = i8259_unmask_irq,
142 .disable = i8259_mask_irq,
143 .ack = i8259_mask_and_ack_irq,
144 .end = i8259_end_irq,
145}; 143};
146 144
147static struct resource pic1_iores = { 145static struct resource pic1_iores = {
@@ -165,25 +163,84 @@ static struct resource pic_edgectrl_iores = {
165 .flags = IORESOURCE_BUSY, 163 .flags = IORESOURCE_BUSY,
166}; 164};
167 165
168static struct irqaction i8259_irqaction = { 166static int i8259_host_match(struct irq_host *h, struct device_node *node)
169 .handler = no_action, 167{
170 .flags = IRQF_DISABLED, 168 return i8259_node == NULL || i8259_node == node;
171 .mask = CPU_MASK_NONE, 169}
172 .name = "82c59 secondary cascade", 170
171static int i8259_host_map(struct irq_host *h, unsigned int virq,
172 irq_hw_number_t hw, unsigned int flags)
173{
174 pr_debug("i8259_host_map(%d, 0x%lx)\n", virq, hw);
175
176 /* We block the internal cascade */
177 if (hw == 2)
178 get_irq_desc(virq)->status |= IRQ_NOREQUEST;
179
180 /* We use the level stuff only for now, we might want to
181 * be more cautious here but that works for now
182 */
183 get_irq_desc(virq)->status |= IRQ_LEVEL;
184 set_irq_chip_and_handler(virq, &i8259_pic, handle_level_irq);
185 return 0;
186}
187
188static void i8259_host_unmap(struct irq_host *h, unsigned int virq)
189{
190 /* Make sure irq is masked in hardware */
191 i8259_mask_irq(virq);
192
193 /* remove chip and handler */
194 set_irq_chip_and_handler(virq, NULL, NULL);
195
196 /* Make sure it's completed */
197 synchronize_irq(virq);
198}
199
200static int i8259_host_xlate(struct irq_host *h, struct device_node *ct,
201 u32 *intspec, unsigned int intsize,
202 irq_hw_number_t *out_hwirq, unsigned int *out_flags)
203{
204 static unsigned char map_isa_senses[4] = {
205 IRQ_TYPE_LEVEL_LOW,
206 IRQ_TYPE_LEVEL_HIGH,
207 IRQ_TYPE_EDGE_FALLING,
208 IRQ_TYPE_EDGE_RISING,
209 };
210
211 *out_hwirq = intspec[0];
212 if (intsize > 1 && intspec[1] < 4)
213 *out_flags = map_isa_senses[intspec[1]];
214 else
215 *out_flags = IRQ_TYPE_NONE;
216
217 return 0;
218}
219
220static struct irq_host_ops i8259_host_ops = {
221 .match = i8259_host_match,
222 .map = i8259_host_map,
223 .unmap = i8259_host_unmap,
224 .xlate = i8259_host_xlate,
173}; 225};
174 226
175/* 227/****
176 * i8259_init() 228 * i8259_init - Initialize the legacy controller
177 * intack_addr - PCI interrupt acknowledge (real) address which will return 229 * @node: device node of the legacy PIC (can be NULL, but then, it will match
178 * the active irq from the 8259 230 * all interrupts, so beware)
231 * @intack_addr: PCI interrupt acknowledge (real) address which will return
232 * the active irq from the 8259
179 */ 233 */
180void __init i8259_init(unsigned long intack_addr, int offset) 234void i8259_init(struct device_node *node, unsigned long intack_addr)
181{ 235{
182 unsigned long flags; 236 unsigned long flags;
183 int i;
184 237
238 /* initialize the controller */
185 spin_lock_irqsave(&i8259_lock, flags); 239 spin_lock_irqsave(&i8259_lock, flags);
186 i8259_pic_irq_offset = offset; 240
241 /* Mask all first */
242 outb(0xff, 0xA1);
243 outb(0xff, 0x21);
187 244
188 /* init master interrupt controller */ 245 /* init master interrupt controller */
189 outb(0x11, 0x20); /* Start init sequence */ 246 outb(0x11, 0x20); /* Start init sequence */
@@ -197,21 +254,36 @@ void __init i8259_init(unsigned long intack_addr, int offset)
197 outb(0x02, 0xA1); /* edge triggered, Cascade (slave) on IRQ2 */ 254 outb(0x02, 0xA1); /* edge triggered, Cascade (slave) on IRQ2 */
198 outb(0x01, 0xA1); /* Select 8086 mode */ 255 outb(0x01, 0xA1); /* Select 8086 mode */
199 256
257 /* That thing is slow */
258 udelay(100);
259
200 /* always read ISR */ 260 /* always read ISR */
201 outb(0x0B, 0x20); 261 outb(0x0B, 0x20);
202 outb(0x0B, 0xA0); 262 outb(0x0B, 0xA0);
203 263
204 /* Mask all interrupts */ 264 /* Unmask the internal cascade */
265 cached_21 &= ~(1 << 2);
266
267 /* Set interrupt masks */
205 outb(cached_A1, 0xA1); 268 outb(cached_A1, 0xA1);
206 outb(cached_21, 0x21); 269 outb(cached_21, 0x21);
207 270
208 spin_unlock_irqrestore(&i8259_lock, flags); 271 spin_unlock_irqrestore(&i8259_lock, flags);
209 272
210 for (i = 0; i < NUM_ISA_INTERRUPTS; ++i) 273 /* create a legacy host */
211 irq_desc[offset + i].chip = &i8259_pic; 274 if (node)
275 i8259_node = of_node_get(node);
276 i8259_host = irq_alloc_host(IRQ_HOST_MAP_LEGACY, 0, &i8259_host_ops, 0);
277 if (i8259_host == NULL) {
278 printk(KERN_ERR "i8259: failed to allocate irq host !\n");
279 return;
280 }
212 281
213 /* reserve our resources */ 282 /* reserve our resources */
214 setup_irq(offset + 2, &i8259_irqaction); 283 /* XXX should we continue doing that ? it seems to cause problems
284 * with further requesting of PCI IO resources for that range...
285 * need to look into it.
286 */
215 request_resource(&ioport_resource, &pic1_iores); 287 request_resource(&ioport_resource, &pic1_iores);
216 request_resource(&ioport_resource, &pic2_iores); 288 request_resource(&ioport_resource, &pic2_iores);
217 request_resource(&ioport_resource, &pic_edgectrl_iores); 289 request_resource(&ioport_resource, &pic_edgectrl_iores);
@@ -219,4 +291,5 @@ void __init i8259_init(unsigned long intack_addr, int offset)
219 if (intack_addr != 0) 291 if (intack_addr != 0)
220 pci_intack = ioremap(intack_addr, 1); 292 pci_intack = ioremap(intack_addr, 1);
221 293
294 printk(KERN_INFO "i8259 legacy interrupt controller initialized\n");
222} 295}
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 7e469358895f..7d31d7cc392d 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -100,8 +100,8 @@ static inline u32 _mpic_cpu_read(struct mpic *mpic, unsigned int reg)
100 100
101 if (mpic->flags & MPIC_PRIMARY) 101 if (mpic->flags & MPIC_PRIMARY)
102 cpu = hard_smp_processor_id(); 102 cpu = hard_smp_processor_id();
103 103 return _mpic_read(mpic->flags & MPIC_BIG_ENDIAN,
104 return _mpic_read(mpic->flags & MPIC_BIG_ENDIAN, mpic->cpuregs[cpu], reg); 104 mpic->cpuregs[cpu], reg);
105} 105}
106 106
107static inline void _mpic_cpu_write(struct mpic *mpic, unsigned int reg, u32 value) 107static inline void _mpic_cpu_write(struct mpic *mpic, unsigned int reg, u32 value)
@@ -340,27 +340,19 @@ static void __init mpic_scan_ht_pics(struct mpic *mpic)
340#endif /* CONFIG_MPIC_BROKEN_U3 */ 340#endif /* CONFIG_MPIC_BROKEN_U3 */
341 341
342 342
343#define mpic_irq_to_hw(virq) ((unsigned int)irq_map[virq].hwirq)
344
343/* Find an mpic associated with a given linux interrupt */ 345/* Find an mpic associated with a given linux interrupt */
344static struct mpic *mpic_find(unsigned int irq, unsigned int *is_ipi) 346static struct mpic *mpic_find(unsigned int irq, unsigned int *is_ipi)
345{ 347{
346 struct mpic *mpic = mpics; 348 unsigned int src = mpic_irq_to_hw(irq);
347 349
348 while(mpic) { 350 if (irq < NUM_ISA_INTERRUPTS)
349 /* search IPIs first since they may override the main interrupts */ 351 return NULL;
350 if (irq >= mpic->ipi_offset && irq < (mpic->ipi_offset + 4)) { 352 if (is_ipi)
351 if (is_ipi) 353 *is_ipi = (src >= MPIC_VEC_IPI_0 && src <= MPIC_VEC_IPI_3);
352 *is_ipi = 1; 354
353 return mpic; 355 return irq_desc[irq].chip_data;
354 }
355 if (irq >= mpic->irq_offset &&
356 irq < (mpic->irq_offset + mpic->irq_count)) {
357 if (is_ipi)
358 *is_ipi = 0;
359 return mpic;
360 }
361 mpic = mpic -> next;
362 }
363 return NULL;
364} 356}
365 357
366/* Convert a cpu mask from logical to physical cpu numbers. */ 358/* Convert a cpu mask from logical to physical cpu numbers. */
@@ -378,14 +370,14 @@ static inline u32 mpic_physmask(u32 cpumask)
378/* Get the mpic structure from the IPI number */ 370/* Get the mpic structure from the IPI number */
379static inline struct mpic * mpic_from_ipi(unsigned int ipi) 371static inline struct mpic * mpic_from_ipi(unsigned int ipi)
380{ 372{
381 return container_of(irq_desc[ipi].chip, struct mpic, hc_ipi); 373 return irq_desc[ipi].chip_data;
382} 374}
383#endif 375#endif
384 376
385/* Get the mpic structure from the irq number */ 377/* Get the mpic structure from the irq number */
386static inline struct mpic * mpic_from_irq(unsigned int irq) 378static inline struct mpic * mpic_from_irq(unsigned int irq)
387{ 379{
388 return container_of(irq_desc[irq].chip, struct mpic, hc_irq); 380 return irq_desc[irq].chip_data;
389} 381}
390 382
391/* Send an EOI */ 383/* Send an EOI */
@@ -398,9 +390,7 @@ static inline void mpic_eoi(struct mpic *mpic)
398#ifdef CONFIG_SMP 390#ifdef CONFIG_SMP
399static irqreturn_t mpic_ipi_action(int irq, void *dev_id, struct pt_regs *regs) 391static irqreturn_t mpic_ipi_action(int irq, void *dev_id, struct pt_regs *regs)
400{ 392{
401 struct mpic *mpic = dev_id; 393 smp_message_recv(mpic_irq_to_hw(irq) - MPIC_VEC_IPI_0, regs);
402
403 smp_message_recv(irq - mpic->ipi_offset, regs);
404 return IRQ_HANDLED; 394 return IRQ_HANDLED;
405} 395}
406#endif /* CONFIG_SMP */ 396#endif /* CONFIG_SMP */
@@ -410,11 +400,11 @@ static irqreturn_t mpic_ipi_action(int irq, void *dev_id, struct pt_regs *regs)
410 */ 400 */
411 401
412 402
413static void mpic_enable_irq(unsigned int irq) 403static void mpic_unmask_irq(unsigned int irq)
414{ 404{
415 unsigned int loops = 100000; 405 unsigned int loops = 100000;
416 struct mpic *mpic = mpic_from_irq(irq); 406 struct mpic *mpic = mpic_from_irq(irq);
417 unsigned int src = irq - mpic->irq_offset; 407 unsigned int src = mpic_irq_to_hw(irq);
418 408
419 DBG("%p: %s: enable_irq: %d (src %d)\n", mpic, mpic->name, irq, src); 409 DBG("%p: %s: enable_irq: %d (src %d)\n", mpic, mpic->name, irq, src);
420 410
@@ -429,39 +419,13 @@ static void mpic_enable_irq(unsigned int irq)
429 break; 419 break;
430 } 420 }
431 } while(mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & MPIC_VECPRI_MASK); 421 } while(mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & MPIC_VECPRI_MASK);
432
433#ifdef CONFIG_MPIC_BROKEN_U3
434 if (mpic->flags & MPIC_BROKEN_U3) {
435 unsigned int src = irq - mpic->irq_offset;
436 if (mpic_is_ht_interrupt(mpic, src) &&
437 (irq_desc[irq].status & IRQ_LEVEL))
438 mpic_ht_end_irq(mpic, src);
439 }
440#endif /* CONFIG_MPIC_BROKEN_U3 */
441}
442
443static unsigned int mpic_startup_irq(unsigned int irq)
444{
445#ifdef CONFIG_MPIC_BROKEN_U3
446 struct mpic *mpic = mpic_from_irq(irq);
447 unsigned int src = irq - mpic->irq_offset;
448#endif /* CONFIG_MPIC_BROKEN_U3 */
449
450 mpic_enable_irq(irq);
451
452#ifdef CONFIG_MPIC_BROKEN_U3
453 if (mpic_is_ht_interrupt(mpic, src))
454 mpic_startup_ht_interrupt(mpic, src, irq_desc[irq].status);
455#endif /* CONFIG_MPIC_BROKEN_U3 */
456
457 return 0;
458} 422}
459 423
460static void mpic_disable_irq(unsigned int irq) 424static void mpic_mask_irq(unsigned int irq)
461{ 425{
462 unsigned int loops = 100000; 426 unsigned int loops = 100000;
463 struct mpic *mpic = mpic_from_irq(irq); 427 struct mpic *mpic = mpic_from_irq(irq);
464 unsigned int src = irq - mpic->irq_offset; 428 unsigned int src = mpic_irq_to_hw(irq);
465 429
466 DBG("%s: disable_irq: %d (src %d)\n", mpic->name, irq, src); 430 DBG("%s: disable_irq: %d (src %d)\n", mpic->name, irq, src);
467 431
@@ -478,23 +442,58 @@ static void mpic_disable_irq(unsigned int irq)
478 } while(!(mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & MPIC_VECPRI_MASK)); 442 } while(!(mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & MPIC_VECPRI_MASK));
479} 443}
480 444
481static void mpic_shutdown_irq(unsigned int irq) 445static void mpic_end_irq(unsigned int irq)
482{ 446{
447 struct mpic *mpic = mpic_from_irq(irq);
448
449#ifdef DEBUG_IRQ
450 DBG("%s: end_irq: %d\n", mpic->name, irq);
451#endif
452 /* We always EOI on end_irq() even for edge interrupts since that
453 * should only lower the priority, the MPIC should have properly
454 * latched another edge interrupt coming in anyway
455 */
456
457 mpic_eoi(mpic);
458}
459
483#ifdef CONFIG_MPIC_BROKEN_U3 460#ifdef CONFIG_MPIC_BROKEN_U3
461
462static void mpic_unmask_ht_irq(unsigned int irq)
463{
484 struct mpic *mpic = mpic_from_irq(irq); 464 struct mpic *mpic = mpic_from_irq(irq);
485 unsigned int src = irq - mpic->irq_offset; 465 unsigned int src = mpic_irq_to_hw(irq);
486 466
487 if (mpic_is_ht_interrupt(mpic, src)) 467 mpic_unmask_irq(irq);
488 mpic_shutdown_ht_interrupt(mpic, src, irq_desc[irq].status);
489 468
490#endif /* CONFIG_MPIC_BROKEN_U3 */ 469 if (irq_desc[irq].status & IRQ_LEVEL)
470 mpic_ht_end_irq(mpic, src);
471}
472
473static unsigned int mpic_startup_ht_irq(unsigned int irq)
474{
475 struct mpic *mpic = mpic_from_irq(irq);
476 unsigned int src = mpic_irq_to_hw(irq);
477
478 mpic_unmask_irq(irq);
479 mpic_startup_ht_interrupt(mpic, src, irq_desc[irq].status);
491 480
492 mpic_disable_irq(irq); 481 return 0;
493} 482}
494 483
495static void mpic_end_irq(unsigned int irq) 484static void mpic_shutdown_ht_irq(unsigned int irq)
496{ 485{
497 struct mpic *mpic = mpic_from_irq(irq); 486 struct mpic *mpic = mpic_from_irq(irq);
487 unsigned int src = mpic_irq_to_hw(irq);
488
489 mpic_shutdown_ht_interrupt(mpic, src, irq_desc[irq].status);
490 mpic_mask_irq(irq);
491}
492
493static void mpic_end_ht_irq(unsigned int irq)
494{
495 struct mpic *mpic = mpic_from_irq(irq);
496 unsigned int src = mpic_irq_to_hw(irq);
498 497
499#ifdef DEBUG_IRQ 498#ifdef DEBUG_IRQ
500 DBG("%s: end_irq: %d\n", mpic->name, irq); 499 DBG("%s: end_irq: %d\n", mpic->name, irq);
@@ -504,30 +503,25 @@ static void mpic_end_irq(unsigned int irq)
504 * latched another edge interrupt coming in anyway 503 * latched another edge interrupt coming in anyway
505 */ 504 */
506 505
507#ifdef CONFIG_MPIC_BROKEN_U3 506 if (irq_desc[irq].status & IRQ_LEVEL)
508 if (mpic->flags & MPIC_BROKEN_U3) { 507 mpic_ht_end_irq(mpic, src);
509 unsigned int src = irq - mpic->irq_offset;
510 if (mpic_is_ht_interrupt(mpic, src) &&
511 (irq_desc[irq].status & IRQ_LEVEL))
512 mpic_ht_end_irq(mpic, src);
513 }
514#endif /* CONFIG_MPIC_BROKEN_U3 */
515
516 mpic_eoi(mpic); 508 mpic_eoi(mpic);
517} 509}
518 510
511#endif /* CONFIG_MPIC_BROKEN_U3 */
512
519#ifdef CONFIG_SMP 513#ifdef CONFIG_SMP
520 514
521static void mpic_enable_ipi(unsigned int irq) 515static void mpic_unmask_ipi(unsigned int irq)
522{ 516{
523 struct mpic *mpic = mpic_from_ipi(irq); 517 struct mpic *mpic = mpic_from_ipi(irq);
524 unsigned int src = irq - mpic->ipi_offset; 518 unsigned int src = mpic_irq_to_hw(irq) - MPIC_VEC_IPI_0;
525 519
526 DBG("%s: enable_ipi: %d (ipi %d)\n", mpic->name, irq, src); 520 DBG("%s: enable_ipi: %d (ipi %d)\n", mpic->name, irq, src);
527 mpic_ipi_write(src, mpic_ipi_read(src) & ~MPIC_VECPRI_MASK); 521 mpic_ipi_write(src, mpic_ipi_read(src) & ~MPIC_VECPRI_MASK);
528} 522}
529 523
530static void mpic_disable_ipi(unsigned int irq) 524static void mpic_mask_ipi(unsigned int irq)
531{ 525{
532 /* NEVER disable an IPI... that's just plain wrong! */ 526 /* NEVER disable an IPI... that's just plain wrong! */
533} 527}
@@ -551,29 +545,176 @@ static void mpic_end_ipi(unsigned int irq)
551static void mpic_set_affinity(unsigned int irq, cpumask_t cpumask) 545static void mpic_set_affinity(unsigned int irq, cpumask_t cpumask)
552{ 546{
553 struct mpic *mpic = mpic_from_irq(irq); 547 struct mpic *mpic = mpic_from_irq(irq);
548 unsigned int src = mpic_irq_to_hw(irq);
554 549
555 cpumask_t tmp; 550 cpumask_t tmp;
556 551
557 cpus_and(tmp, cpumask, cpu_online_map); 552 cpus_and(tmp, cpumask, cpu_online_map);
558 553
559 mpic_irq_write(irq - mpic->irq_offset, MPIC_IRQ_DESTINATION, 554 mpic_irq_write(src, MPIC_IRQ_DESTINATION,
560 mpic_physmask(cpus_addr(tmp)[0])); 555 mpic_physmask(cpus_addr(tmp)[0]));
561} 556}
562 557
558static unsigned int mpic_flags_to_vecpri(unsigned int flags, int *level)
559{
560 unsigned int vecpri;
561
562 /* Now convert sense value */
563 switch(flags & IRQ_TYPE_SENSE_MASK) {
564 case IRQ_TYPE_EDGE_RISING:
565 vecpri = MPIC_VECPRI_SENSE_EDGE |
566 MPIC_VECPRI_POLARITY_POSITIVE;
567 *level = 0;
568 break;
569 case IRQ_TYPE_EDGE_FALLING:
570 vecpri = MPIC_VECPRI_SENSE_EDGE |
571 MPIC_VECPRI_POLARITY_NEGATIVE;
572 *level = 0;
573 break;
574 case IRQ_TYPE_LEVEL_HIGH:
575 vecpri = MPIC_VECPRI_SENSE_LEVEL |
576 MPIC_VECPRI_POLARITY_POSITIVE;
577 *level = 1;
578 break;
579 case IRQ_TYPE_LEVEL_LOW:
580 default:
581 vecpri = MPIC_VECPRI_SENSE_LEVEL |
582 MPIC_VECPRI_POLARITY_NEGATIVE;
583 *level = 1;
584 }
585 return vecpri;
586}
587
588static struct irq_chip mpic_irq_chip = {
589 .mask = mpic_mask_irq,
590 .unmask = mpic_unmask_irq,
591 .eoi = mpic_end_irq,
592};
593
594#ifdef CONFIG_SMP
595static struct irq_chip mpic_ipi_chip = {
596 .mask = mpic_mask_ipi,
597 .unmask = mpic_unmask_ipi,
598 .eoi = mpic_end_ipi,
599};
600#endif /* CONFIG_SMP */
601
602#ifdef CONFIG_MPIC_BROKEN_U3
603static struct irq_chip mpic_irq_ht_chip = {
604 .startup = mpic_startup_ht_irq,
605 .shutdown = mpic_shutdown_ht_irq,
606 .mask = mpic_mask_irq,
607 .unmask = mpic_unmask_ht_irq,
608 .eoi = mpic_end_ht_irq,
609};
610#endif /* CONFIG_MPIC_BROKEN_U3 */
611
612
613static int mpic_host_match(struct irq_host *h, struct device_node *node)
614{
615 struct mpic *mpic = h->host_data;
616
617 /* Exact match, unless mpic node is NULL */
618 return mpic->of_node == NULL || mpic->of_node == node;
619}
620
621static int mpic_host_map(struct irq_host *h, unsigned int virq,
622 irq_hw_number_t hw, unsigned int flags)
623{
624 struct irq_desc *desc = get_irq_desc(virq);
625 struct irq_chip *chip;
626 struct mpic *mpic = h->host_data;
627 unsigned int vecpri = MPIC_VECPRI_SENSE_LEVEL |
628 MPIC_VECPRI_POLARITY_NEGATIVE;
629 int level;
630
631 pr_debug("mpic: map virq %d, hwirq 0x%lx, flags: 0x%x\n",
632 virq, hw, flags);
633
634 if (hw == MPIC_VEC_SPURRIOUS)
635 return -EINVAL;
636#ifdef CONFIG_SMP
637 else if (hw >= MPIC_VEC_IPI_0) {
638 WARN_ON(!(mpic->flags & MPIC_PRIMARY));
639
640 pr_debug("mpic: mapping as IPI\n");
641 set_irq_chip_data(virq, mpic);
642 set_irq_chip_and_handler(virq, &mpic->hc_ipi,
643 handle_percpu_irq);
644 return 0;
645 }
646#endif /* CONFIG_SMP */
647
648 if (hw >= mpic->irq_count)
649 return -EINVAL;
650
651 /* If no sense provided, check default sense array */
652 if (((flags & IRQ_TYPE_SENSE_MASK) == IRQ_TYPE_NONE) &&
653 mpic->senses && hw < mpic->senses_count)
654 flags |= mpic->senses[hw];
655
656 vecpri = mpic_flags_to_vecpri(flags, &level);
657 if (level)
658 desc->status |= IRQ_LEVEL;
659 chip = &mpic->hc_irq;
660
661#ifdef CONFIG_MPIC_BROKEN_U3
662 /* Check for HT interrupts, override vecpri */
663 if (mpic_is_ht_interrupt(mpic, hw)) {
664 vecpri &= ~(MPIC_VECPRI_SENSE_MASK |
665 MPIC_VECPRI_POLARITY_MASK);
666 vecpri |= MPIC_VECPRI_POLARITY_POSITIVE;
667 chip = &mpic->hc_ht_irq;
668 }
669#endif
670
671 /* Reconfigure irq */
672 vecpri |= MPIC_VECPRI_MASK | hw | (8 << MPIC_VECPRI_PRIORITY_SHIFT);
673 mpic_irq_write(hw, MPIC_IRQ_VECTOR_PRI, vecpri);
674
675 pr_debug("mpic: mapping as IRQ\n");
676
677 set_irq_chip_data(virq, mpic);
678 set_irq_chip_and_handler(virq, chip, handle_fasteoi_irq);
679 return 0;
680}
681
682static int mpic_host_xlate(struct irq_host *h, struct device_node *ct,
683 u32 *intspec, unsigned int intsize,
684 irq_hw_number_t *out_hwirq, unsigned int *out_flags)
685
686{
687 static unsigned char map_mpic_senses[4] = {
688 IRQ_TYPE_EDGE_RISING,
689 IRQ_TYPE_LEVEL_LOW,
690 IRQ_TYPE_LEVEL_HIGH,
691 IRQ_TYPE_EDGE_FALLING,
692 };
693
694 *out_hwirq = intspec[0];
695 if (intsize > 1 && intspec[1] < 4)
696 *out_flags = map_mpic_senses[intspec[1]];
697 else
698 *out_flags = IRQ_TYPE_NONE;
699
700 return 0;
701}
702
703static struct irq_host_ops mpic_host_ops = {
704 .match = mpic_host_match,
705 .map = mpic_host_map,
706 .xlate = mpic_host_xlate,
707};
563 708
564/* 709/*
565 * Exported functions 710 * Exported functions
566 */ 711 */
567 712
568 713struct mpic * __init mpic_alloc(struct device_node *node,
569struct mpic * __init mpic_alloc(unsigned long phys_addr, 714 unsigned long phys_addr,
570 unsigned int flags, 715 unsigned int flags,
571 unsigned int isu_size, 716 unsigned int isu_size,
572 unsigned int irq_offset,
573 unsigned int irq_count, 717 unsigned int irq_count,
574 unsigned int ipi_offset,
575 unsigned char *senses,
576 unsigned int senses_count,
577 const char *name) 718 const char *name)
578{ 719{
579 struct mpic *mpic; 720 struct mpic *mpic;
@@ -585,33 +726,38 @@ struct mpic * __init mpic_alloc(unsigned long phys_addr,
585 if (mpic == NULL) 726 if (mpic == NULL)
586 return NULL; 727 return NULL;
587 728
588
589 memset(mpic, 0, sizeof(struct mpic)); 729 memset(mpic, 0, sizeof(struct mpic));
590 mpic->name = name; 730 mpic->name = name;
731 mpic->of_node = node ? of_node_get(node) : NULL;
591 732
733 mpic->irqhost = irq_alloc_host(IRQ_HOST_MAP_LINEAR, 256,
734 &mpic_host_ops,
735 MPIC_VEC_SPURRIOUS);
736 if (mpic->irqhost == NULL) {
737 of_node_put(node);
738 return NULL;
739 }
740
741 mpic->irqhost->host_data = mpic;
742 mpic->hc_irq = mpic_irq_chip;
592 mpic->hc_irq.typename = name; 743 mpic->hc_irq.typename = name;
593 mpic->hc_irq.startup = mpic_startup_irq;
594 mpic->hc_irq.shutdown = mpic_shutdown_irq;
595 mpic->hc_irq.enable = mpic_enable_irq;
596 mpic->hc_irq.disable = mpic_disable_irq;
597 mpic->hc_irq.end = mpic_end_irq;
598 if (flags & MPIC_PRIMARY) 744 if (flags & MPIC_PRIMARY)
599 mpic->hc_irq.set_affinity = mpic_set_affinity; 745 mpic->hc_irq.set_affinity = mpic_set_affinity;
746#ifdef CONFIG_MPIC_BROKEN_U3
747 mpic->hc_ht_irq = mpic_irq_ht_chip;
748 mpic->hc_ht_irq.typename = name;
749 if (flags & MPIC_PRIMARY)
750 mpic->hc_ht_irq.set_affinity = mpic_set_affinity;
751#endif /* CONFIG_MPIC_BROKEN_U3 */
600#ifdef CONFIG_SMP 752#ifdef CONFIG_SMP
753 mpic->hc_ipi = mpic_ipi_chip;
601 mpic->hc_ipi.typename = name; 754 mpic->hc_ipi.typename = name;
602 mpic->hc_ipi.enable = mpic_enable_ipi;
603 mpic->hc_ipi.disable = mpic_disable_ipi;
604 mpic->hc_ipi.end = mpic_end_ipi;
605#endif /* CONFIG_SMP */ 755#endif /* CONFIG_SMP */
606 756
607 mpic->flags = flags; 757 mpic->flags = flags;
608 mpic->isu_size = isu_size; 758 mpic->isu_size = isu_size;
609 mpic->irq_offset = irq_offset;
610 mpic->irq_count = irq_count; 759 mpic->irq_count = irq_count;
611 mpic->ipi_offset = ipi_offset;
612 mpic->num_sources = 0; /* so far */ 760 mpic->num_sources = 0; /* so far */
613 mpic->senses = senses;
614 mpic->senses_count = senses_count;
615 761
616 /* Map the global registers */ 762 /* Map the global registers */
617 mpic->gregs = ioremap(phys_addr + MPIC_GREG_BASE, 0x1000); 763 mpic->gregs = ioremap(phys_addr + MPIC_GREG_BASE, 0x1000);
@@ -679,8 +825,10 @@ struct mpic * __init mpic_alloc(unsigned long phys_addr,
679 mpic->next = mpics; 825 mpic->next = mpics;
680 mpics = mpic; 826 mpics = mpic;
681 827
682 if (flags & MPIC_PRIMARY) 828 if (flags & MPIC_PRIMARY) {
683 mpic_primary = mpic; 829 mpic_primary = mpic;
830 irq_set_default_host(mpic->irqhost);
831 }
684 832
685 return mpic; 833 return mpic;
686} 834}
@@ -697,26 +845,10 @@ void __init mpic_assign_isu(struct mpic *mpic, unsigned int isu_num,
697 mpic->num_sources = isu_first + mpic->isu_size; 845 mpic->num_sources = isu_first + mpic->isu_size;
698} 846}
699 847
700void __init mpic_setup_cascade(unsigned int irq, mpic_cascade_t handler, 848void __init mpic_set_default_senses(struct mpic *mpic, u8 *senses, int count)
701 void *data)
702{ 849{
703 struct mpic *mpic = mpic_find(irq, NULL); 850 mpic->senses = senses;
704 unsigned long flags; 851 mpic->senses_count = count;
705
706 /* Synchronization here is a bit dodgy, so don't try to replace cascade
707 * interrupts on the fly too often ... but normally it's set up at boot.
708 */
709 spin_lock_irqsave(&mpic_lock, flags);
710 if (mpic->cascade)
711 mpic_disable_irq(mpic->cascade_vec + mpic->irq_offset);
712 mpic->cascade = NULL;
713 wmb();
714 mpic->cascade_vec = irq - mpic->irq_offset;
715 mpic->cascade_data = data;
716 wmb();
717 mpic->cascade = handler;
718 mpic_enable_irq(irq);
719 spin_unlock_irqrestore(&mpic_lock, flags);
720} 852}
721 853
722void __init mpic_init(struct mpic *mpic) 854void __init mpic_init(struct mpic *mpic)
@@ -724,6 +856,11 @@ void __init mpic_init(struct mpic *mpic)
724 int i; 856 int i;
725 857
726 BUG_ON(mpic->num_sources == 0); 858 BUG_ON(mpic->num_sources == 0);
859 WARN_ON(mpic->num_sources > MPIC_VEC_IPI_0);
860
861 /* Sanitize source count */
862 if (mpic->num_sources > MPIC_VEC_IPI_0)
863 mpic->num_sources = MPIC_VEC_IPI_0;
727 864
728 printk(KERN_INFO "mpic: Initializing for %d sources\n", mpic->num_sources); 865 printk(KERN_INFO "mpic: Initializing for %d sources\n", mpic->num_sources);
729 866
@@ -747,12 +884,6 @@ void __init mpic_init(struct mpic *mpic)
747 MPIC_VECPRI_MASK | 884 MPIC_VECPRI_MASK |
748 (10 << MPIC_VECPRI_PRIORITY_SHIFT) | 885 (10 << MPIC_VECPRI_PRIORITY_SHIFT) |
749 (MPIC_VEC_IPI_0 + i)); 886 (MPIC_VEC_IPI_0 + i));
750#ifdef CONFIG_SMP
751 if (!(mpic->flags & MPIC_PRIMARY))
752 continue;
753 irq_desc[mpic->ipi_offset+i].status |= IRQ_PER_CPU;
754 irq_desc[mpic->ipi_offset+i].chip = &mpic->hc_ipi;
755#endif /* CONFIG_SMP */
756 } 887 }
757 888
758 /* Initialize interrupt sources */ 889 /* Initialize interrupt sources */
@@ -763,31 +894,21 @@ void __init mpic_init(struct mpic *mpic)
763 /* Do the HT PIC fixups on U3 broken mpic */ 894 /* Do the HT PIC fixups on U3 broken mpic */
764 DBG("MPIC flags: %x\n", mpic->flags); 895 DBG("MPIC flags: %x\n", mpic->flags);
765 if ((mpic->flags & MPIC_BROKEN_U3) && (mpic->flags & MPIC_PRIMARY)) 896 if ((mpic->flags & MPIC_BROKEN_U3) && (mpic->flags & MPIC_PRIMARY))
766 mpic_scan_ht_pics(mpic); 897 mpic_scan_ht_pics(mpic);
767#endif /* CONFIG_MPIC_BROKEN_U3 */ 898#endif /* CONFIG_MPIC_BROKEN_U3 */
768 899
769 for (i = 0; i < mpic->num_sources; i++) { 900 for (i = 0; i < mpic->num_sources; i++) {
770 /* start with vector = source number, and masked */ 901 /* start with vector = source number, and masked */
771 u32 vecpri = MPIC_VECPRI_MASK | i | (8 << MPIC_VECPRI_PRIORITY_SHIFT); 902 u32 vecpri = MPIC_VECPRI_MASK | i | (8 << MPIC_VECPRI_PRIORITY_SHIFT);
772 int level = 0; 903 int level = 1;
773 904
774 /* if it's an IPI, we skip it */
775 if ((mpic->irq_offset + i) >= (mpic->ipi_offset + i) &&
776 (mpic->irq_offset + i) < (mpic->ipi_offset + i + 4))
777 continue;
778
779 /* do senses munging */ 905 /* do senses munging */
780 if (mpic->senses && i < mpic->senses_count) { 906 if (mpic->senses && i < mpic->senses_count)
781 if (mpic->senses[i] & IRQ_SENSE_LEVEL) 907 vecpri = mpic_flags_to_vecpri(mpic->senses[i],
782 vecpri |= MPIC_VECPRI_SENSE_LEVEL; 908 &level);
783 if (mpic->senses[i] & IRQ_POLARITY_POSITIVE) 909 else
784 vecpri |= MPIC_VECPRI_POLARITY_POSITIVE;
785 } else
786 vecpri |= MPIC_VECPRI_SENSE_LEVEL; 910 vecpri |= MPIC_VECPRI_SENSE_LEVEL;
787 911
788 /* remember if it was a level interrupts */
789 level = (vecpri & MPIC_VECPRI_SENSE_LEVEL);
790
791 /* deal with broken U3 */ 912 /* deal with broken U3 */
792 if (mpic->flags & MPIC_BROKEN_U3) { 913 if (mpic->flags & MPIC_BROKEN_U3) {
793#ifdef CONFIG_MPIC_BROKEN_U3 914#ifdef CONFIG_MPIC_BROKEN_U3
@@ -808,12 +929,6 @@ void __init mpic_init(struct mpic *mpic)
808 mpic_irq_write(i, MPIC_IRQ_VECTOR_PRI, vecpri); 929 mpic_irq_write(i, MPIC_IRQ_VECTOR_PRI, vecpri);
809 mpic_irq_write(i, MPIC_IRQ_DESTINATION, 930 mpic_irq_write(i, MPIC_IRQ_DESTINATION,
810 1 << hard_smp_processor_id()); 931 1 << hard_smp_processor_id());
811
812 /* init linux descriptors */
813 if (i < mpic->irq_count) {
814 irq_desc[mpic->irq_offset+i].status = level ? IRQ_LEVEL : 0;
815 irq_desc[mpic->irq_offset+i].chip = &mpic->hc_irq;
816 }
817 } 932 }
818 933
819 /* Init spurrious vector */ 934 /* Init spurrious vector */
@@ -854,19 +969,20 @@ void mpic_irq_set_priority(unsigned int irq, unsigned int pri)
854{ 969{
855 int is_ipi; 970 int is_ipi;
856 struct mpic *mpic = mpic_find(irq, &is_ipi); 971 struct mpic *mpic = mpic_find(irq, &is_ipi);
972 unsigned int src = mpic_irq_to_hw(irq);
857 unsigned long flags; 973 unsigned long flags;
858 u32 reg; 974 u32 reg;
859 975
860 spin_lock_irqsave(&mpic_lock, flags); 976 spin_lock_irqsave(&mpic_lock, flags);
861 if (is_ipi) { 977 if (is_ipi) {
862 reg = mpic_ipi_read(irq - mpic->ipi_offset) & 978 reg = mpic_ipi_read(src - MPIC_VEC_IPI_0) &
863 ~MPIC_VECPRI_PRIORITY_MASK; 979 ~MPIC_VECPRI_PRIORITY_MASK;
864 mpic_ipi_write(irq - mpic->ipi_offset, 980 mpic_ipi_write(src - MPIC_VEC_IPI_0,
865 reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT)); 981 reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
866 } else { 982 } else {
867 reg = mpic_irq_read(irq - mpic->irq_offset,MPIC_IRQ_VECTOR_PRI) 983 reg = mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI)
868 & ~MPIC_VECPRI_PRIORITY_MASK; 984 & ~MPIC_VECPRI_PRIORITY_MASK;
869 mpic_irq_write(irq - mpic->irq_offset, MPIC_IRQ_VECTOR_PRI, 985 mpic_irq_write(src, MPIC_IRQ_VECTOR_PRI,
870 reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT)); 986 reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
871 } 987 }
872 spin_unlock_irqrestore(&mpic_lock, flags); 988 spin_unlock_irqrestore(&mpic_lock, flags);
@@ -876,14 +992,15 @@ unsigned int mpic_irq_get_priority(unsigned int irq)
876{ 992{
877 int is_ipi; 993 int is_ipi;
878 struct mpic *mpic = mpic_find(irq, &is_ipi); 994 struct mpic *mpic = mpic_find(irq, &is_ipi);
995 unsigned int src = mpic_irq_to_hw(irq);
879 unsigned long flags; 996 unsigned long flags;
880 u32 reg; 997 u32 reg;
881 998
882 spin_lock_irqsave(&mpic_lock, flags); 999 spin_lock_irqsave(&mpic_lock, flags);
883 if (is_ipi) 1000 if (is_ipi)
884 reg = mpic_ipi_read(irq - mpic->ipi_offset); 1001 reg = mpic_ipi_read(src = MPIC_VEC_IPI_0);
885 else 1002 else
886 reg = mpic_irq_read(irq - mpic->irq_offset, MPIC_IRQ_VECTOR_PRI); 1003 reg = mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI);
887 spin_unlock_irqrestore(&mpic_lock, flags); 1004 spin_unlock_irqrestore(&mpic_lock, flags);
888 return (reg & MPIC_VECPRI_PRIORITY_MASK) >> MPIC_VECPRI_PRIORITY_SHIFT; 1005 return (reg & MPIC_VECPRI_PRIORITY_MASK) >> MPIC_VECPRI_PRIORITY_SHIFT;
889} 1006}
@@ -978,37 +1095,20 @@ void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask)
978 mpic_physmask(cpu_mask & cpus_addr(cpu_online_map)[0])); 1095 mpic_physmask(cpu_mask & cpus_addr(cpu_online_map)[0]));
979} 1096}
980 1097
981int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs) 1098unsigned int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs)
982{ 1099{
983 u32 irq; 1100 u32 src;
984 1101
985 irq = mpic_cpu_read(MPIC_CPU_INTACK) & MPIC_VECPRI_VECTOR_MASK; 1102 src = mpic_cpu_read(MPIC_CPU_INTACK) & MPIC_VECPRI_VECTOR_MASK;
986#ifdef DEBUG_LOW 1103#ifdef DEBUG_LOW
987 DBG("%s: get_one_irq(): %d\n", mpic->name, irq); 1104 DBG("%s: get_one_irq(): %d\n", mpic->name, src);
988#endif 1105#endif
989 if (mpic->cascade && irq == mpic->cascade_vec) { 1106 if (unlikely(src == MPIC_VEC_SPURRIOUS))
990#ifdef DEBUG_LOW 1107 return NO_IRQ;
991 DBG("%s: cascading ...\n", mpic->name); 1108 return irq_linear_revmap(mpic->irqhost, src);
992#endif
993 irq = mpic->cascade(regs, mpic->cascade_data);
994 mpic_eoi(mpic);
995 return irq;
996 }
997 if (unlikely(irq == MPIC_VEC_SPURRIOUS))
998 return -1;
999 if (irq < MPIC_VEC_IPI_0) {
1000#ifdef DEBUG_IRQ
1001 DBG("%s: irq %d\n", mpic->name, irq + mpic->irq_offset);
1002#endif
1003 return irq + mpic->irq_offset;
1004 }
1005#ifdef DEBUG_IPI
1006 DBG("%s: ipi %d !\n", mpic->name, irq - MPIC_VEC_IPI_0);
1007#endif
1008 return irq - MPIC_VEC_IPI_0 + mpic->ipi_offset;
1009} 1109}
1010 1110
1011int mpic_get_irq(struct pt_regs *regs) 1111unsigned int mpic_get_irq(struct pt_regs *regs)
1012{ 1112{
1013 struct mpic *mpic = mpic_primary; 1113 struct mpic *mpic = mpic_primary;
1014 1114
@@ -1022,25 +1122,27 @@ int mpic_get_irq(struct pt_regs *regs)
1022void mpic_request_ipis(void) 1122void mpic_request_ipis(void)
1023{ 1123{
1024 struct mpic *mpic = mpic_primary; 1124 struct mpic *mpic = mpic_primary;
1025 1125 int i;
1126 static char *ipi_names[] = {
1127 "IPI0 (call function)",
1128 "IPI1 (reschedule)",
1129 "IPI2 (unused)",
1130 "IPI3 (debugger break)",
1131 };
1026 BUG_ON(mpic == NULL); 1132 BUG_ON(mpic == NULL);
1027
1028 printk("requesting IPIs ... \n");
1029 1133
1030 /* 1134 printk(KERN_INFO "mpic: requesting IPIs ... \n");
1031 * IPIs are marked IRQF_DISABLED as they must run with irqs 1135
1032 * disabled 1136 for (i = 0; i < 4; i++) {
1033 */ 1137 unsigned int vipi = irq_create_mapping(mpic->irqhost,
1034 request_irq(mpic->ipi_offset+0, mpic_ipi_action, IRQF_DISABLED, 1138 MPIC_VEC_IPI_0 + i, 0);
1035 "IPI0 (call function)", mpic); 1139 if (vipi == NO_IRQ) {
1036 request_irq(mpic->ipi_offset+1, mpic_ipi_action, IRQF_DISABLED, 1140 printk(KERN_ERR "Failed to map IPI %d\n", i);
1037 "IPI1 (reschedule)", mpic); 1141 break;
1038 request_irq(mpic->ipi_offset+2, mpic_ipi_action, IRQF_DISABLED, 1142 }
1039 "IPI2 (unused)", mpic); 1143 request_irq(vipi, mpic_ipi_action, IRQF_DISABLED,
1040 request_irq(mpic->ipi_offset+3, mpic_ipi_action, IRQF_DISABLED, 1144 ipi_names[i], mpic);
1041 "IPI3 (debugger break)", mpic); 1145 }
1042
1043 printk("IPIs requested... \n");
1044} 1146}
1045 1147
1046void smp_mpic_message_pass(int target, int msg) 1148void smp_mpic_message_pass(int target, int msg)
diff --git a/arch/ppc/syslib/Makefile b/arch/ppc/syslib/Makefile
index 490749ca88f9..2497bbc07e76 100644
--- a/arch/ppc/syslib/Makefile
+++ b/arch/ppc/syslib/Makefile
@@ -104,3 +104,5 @@ obj-$(CONFIG_PPC_MPC52xx) += mpc52xx_setup.o mpc52xx_pic.o \
104ifeq ($(CONFIG_PPC_MPC52xx),y) 104ifeq ($(CONFIG_PPC_MPC52xx),y)
105obj-$(CONFIG_PCI) += mpc52xx_pci.o 105obj-$(CONFIG_PCI) += mpc52xx_pci.o
106endif 106endif
107
108obj-$(CONFIG_PPC_I8259) += i8259.o
diff --git a/arch/ppc/syslib/i8259.c b/arch/ppc/syslib/i8259.c
new file mode 100644
index 000000000000..eb35353af837
--- /dev/null
+++ b/arch/ppc/syslib/i8259.c
@@ -0,0 +1,212 @@
1/*
2 * i8259 interrupt controller driver.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#include <linux/init.h>
10#include <linux/ioport.h>
11#include <linux/interrupt.h>
12#include <asm/io.h>
13#include <asm/i8259.h>
14
15static volatile void __iomem *pci_intack; /* RO, gives us the irq vector */
16
17static unsigned char cached_8259[2] = { 0xff, 0xff };
18#define cached_A1 (cached_8259[0])
19#define cached_21 (cached_8259[1])
20
21static DEFINE_SPINLOCK(i8259_lock);
22
23static int i8259_pic_irq_offset;
24
25/*
26 * Acknowledge the IRQ using either the PCI host bridge's interrupt
27 * acknowledge feature or poll. How i8259_init() is called determines
28 * which is called. It should be noted that polling is broken on some
29 * IBM and Motorola PReP boxes so we must use the int-ack feature on them.
30 */
31int i8259_irq(struct pt_regs *regs)
32{
33 int irq;
34
35 spin_lock(&i8259_lock);
36
37 /* Either int-ack or poll for the IRQ */
38 if (pci_intack)
39 irq = readb(pci_intack);
40 else {
41 /* Perform an interrupt acknowledge cycle on controller 1. */
42 outb(0x0C, 0x20); /* prepare for poll */
43 irq = inb(0x20) & 7;
44 if (irq == 2 ) {
45 /*
46 * Interrupt is cascaded so perform interrupt
47 * acknowledge on controller 2.
48 */
49 outb(0x0C, 0xA0); /* prepare for poll */
50 irq = (inb(0xA0) & 7) + 8;
51 }
52 }
53
54 if (irq == 7) {
55 /*
56 * This may be a spurious interrupt.
57 *
58 * Read the interrupt status register (ISR). If the most
59 * significant bit is not set then there is no valid
60 * interrupt.
61 */
62 if (!pci_intack)
63 outb(0x0B, 0x20); /* ISR register */
64 if(~inb(0x20) & 0x80)
65 irq = -1;
66 }
67
68 spin_unlock(&i8259_lock);
69 return irq + i8259_pic_irq_offset;
70}
71
72static void i8259_mask_and_ack_irq(unsigned int irq_nr)
73{
74 unsigned long flags;
75
76 spin_lock_irqsave(&i8259_lock, flags);
77 irq_nr -= i8259_pic_irq_offset;
78 if (irq_nr > 7) {
79 cached_A1 |= 1 << (irq_nr-8);
80 inb(0xA1); /* DUMMY */
81 outb(cached_A1, 0xA1);
82 outb(0x20, 0xA0); /* Non-specific EOI */
83 outb(0x20, 0x20); /* Non-specific EOI to cascade */
84 } else {
85 cached_21 |= 1 << irq_nr;
86 inb(0x21); /* DUMMY */
87 outb(cached_21, 0x21);
88 outb(0x20, 0x20); /* Non-specific EOI */
89 }
90 spin_unlock_irqrestore(&i8259_lock, flags);
91}
92
93static void i8259_set_irq_mask(int irq_nr)
94{
95 outb(cached_A1,0xA1);
96 outb(cached_21,0x21);
97}
98
99static void i8259_mask_irq(unsigned int irq_nr)
100{
101 unsigned long flags;
102
103 spin_lock_irqsave(&i8259_lock, flags);
104 irq_nr -= i8259_pic_irq_offset;
105 if (irq_nr < 8)
106 cached_21 |= 1 << irq_nr;
107 else
108 cached_A1 |= 1 << (irq_nr-8);
109 i8259_set_irq_mask(irq_nr);
110 spin_unlock_irqrestore(&i8259_lock, flags);
111}
112
113static void i8259_unmask_irq(unsigned int irq_nr)
114{
115 unsigned long flags;
116
117 spin_lock_irqsave(&i8259_lock, flags);
118 irq_nr -= i8259_pic_irq_offset;
119 if (irq_nr < 8)
120 cached_21 &= ~(1 << irq_nr);
121 else
122 cached_A1 &= ~(1 << (irq_nr-8));
123 i8259_set_irq_mask(irq_nr);
124 spin_unlock_irqrestore(&i8259_lock, flags);
125}
126
127static struct irq_chip i8259_pic = {
128 .typename = " i8259 ",
129 .mask = i8259_mask_irq,
130 .unmask = i8259_unmask_irq,
131 .mask_ack = i8259_mask_and_ack_irq,
132};
133
134static struct resource pic1_iores = {
135 .name = "8259 (master)",
136 .start = 0x20,
137 .end = 0x21,
138 .flags = IORESOURCE_BUSY,
139};
140
141static struct resource pic2_iores = {
142 .name = "8259 (slave)",
143 .start = 0xa0,
144 .end = 0xa1,
145 .flags = IORESOURCE_BUSY,
146};
147
148static struct resource pic_edgectrl_iores = {
149 .name = "8259 edge control",
150 .start = 0x4d0,
151 .end = 0x4d1,
152 .flags = IORESOURCE_BUSY,
153};
154
155static struct irqaction i8259_irqaction = {
156 .handler = no_action,
157 .flags = SA_INTERRUPT,
158 .mask = CPU_MASK_NONE,
159 .name = "82c59 secondary cascade",
160};
161
162/*
163 * i8259_init()
164 * intack_addr - PCI interrupt acknowledge (real) address which will return
165 * the active irq from the 8259
166 */
167void __init i8259_init(unsigned long intack_addr, int offset)
168{
169 unsigned long flags;
170 int i;
171
172 spin_lock_irqsave(&i8259_lock, flags);
173 i8259_pic_irq_offset = offset;
174
175 /* init master interrupt controller */
176 outb(0x11, 0x20); /* Start init sequence */
177 outb(0x00, 0x21); /* Vector base */
178 outb(0x04, 0x21); /* edge tiggered, Cascade (slave) on IRQ2 */
179 outb(0x01, 0x21); /* Select 8086 mode */
180
181 /* init slave interrupt controller */
182 outb(0x11, 0xA0); /* Start init sequence */
183 outb(0x08, 0xA1); /* Vector base */
184 outb(0x02, 0xA1); /* edge triggered, Cascade (slave) on IRQ2 */
185 outb(0x01, 0xA1); /* Select 8086 mode */
186
187 /* always read ISR */
188 outb(0x0B, 0x20);
189 outb(0x0B, 0xA0);
190
191 /* Mask all interrupts */
192 outb(cached_A1, 0xA1);
193 outb(cached_21, 0x21);
194
195 spin_unlock_irqrestore(&i8259_lock, flags);
196
197 for (i = 0; i < NUM_ISA_INTERRUPTS; ++i) {
198 set_irq_chip_and_handler(offset + i, &i8259_pic,
199 handle_level_irq);
200 irq_desc[offset + i].status |= IRQ_LEVEL;
201 }
202
203 /* reserve our resources */
204 setup_irq(offset + 2, &i8259_irqaction);
205 request_resource(&ioport_resource, &pic1_iores);
206 request_resource(&ioport_resource, &pic2_iores);
207 request_resource(&ioport_resource, &pic_edgectrl_iores);
208
209 if (intack_addr != 0)
210 pci_intack = ioremap(intack_addr, 1);
211
212}
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 821a141889de..224fbff79969 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -7,6 +7,14 @@ config MMU
7 bool 7 bool
8 default y 8 default y
9 9
10config LOCKDEP_SUPPORT
11 bool
12 default y
13
14config STACKTRACE_SUPPORT
15 bool
16 default y
17
10config RWSEM_GENERIC_SPINLOCK 18config RWSEM_GENERIC_SPINLOCK
11 bool 19 bool
12 20
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
index f53b6d5300e5..2283933a9a93 100644
--- a/arch/s390/Kconfig.debug
+++ b/arch/s390/Kconfig.debug
@@ -1,5 +1,9 @@
1menu "Kernel hacking" 1menu "Kernel hacking"
2 2
3config TRACE_IRQFLAGS_SUPPORT
4 bool
5 default y
6
3source "lib/Kconfig.debug" 7source "lib/Kconfig.debug"
4 8
5endmenu 9endmenu
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index b3791fb094a8..74ef57dcfa60 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -34,6 +34,11 @@ cflags-$(CONFIG_MARCH_G5) += $(call cc-option,-march=g5)
34cflags-$(CONFIG_MARCH_Z900) += $(call cc-option,-march=z900) 34cflags-$(CONFIG_MARCH_Z900) += $(call cc-option,-march=z900)
35cflags-$(CONFIG_MARCH_Z990) += $(call cc-option,-march=z990) 35cflags-$(CONFIG_MARCH_Z990) += $(call cc-option,-march=z990)
36 36
37#
38# Prevent tail-call optimizations, to get clearer backtraces:
39#
40cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls
41
37# old style option for packed stacks 42# old style option for packed stacks
38ifeq ($(call cc-option-yn,-mkernel-backchain),y) 43ifeq ($(call cc-option-yn,-mkernel-backchain),y)
39cflags-$(CONFIG_PACK_STACK) += -mkernel-backchain -D__PACK_STACK 44cflags-$(CONFIG_PACK_STACK) += -mkernel-backchain -D__PACK_STACK
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 9269b5788fac..eabf00a6f770 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o \
21obj-$(CONFIG_BINFMT_ELF32) += binfmt_elf32.o 21obj-$(CONFIG_BINFMT_ELF32) += binfmt_elf32.o
22 22
23obj-$(CONFIG_VIRT_TIMER) += vtime.o 23obj-$(CONFIG_VIRT_TIMER) += vtime.o
24obj-$(CONFIG_STACKTRACE) += stacktrace.o
24 25
25# Kexec part 26# Kexec part
26S390_KEXEC_OBJS := machine_kexec.o crash.o 27S390_KEXEC_OBJS := machine_kexec.o crash.o
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index d8948c342caf..5b5799ac8f83 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -58,6 +58,21 @@ STACK_SIZE = 1 << STACK_SHIFT
58 58
59#define BASED(name) name-system_call(%r13) 59#define BASED(name) name-system_call(%r13)
60 60
61#ifdef CONFIG_TRACE_IRQFLAGS
62 .macro TRACE_IRQS_ON
63 l %r1,BASED(.Ltrace_irq_on)
64 basr %r14,%r1
65 .endm
66
67 .macro TRACE_IRQS_OFF
68 l %r1,BASED(.Ltrace_irq_off)
69 basr %r14,%r1
70 .endm
71#else
72#define TRACE_IRQS_ON
73#define TRACE_IRQS_OFF
74#endif
75
61/* 76/*
62 * Register usage in interrupt handlers: 77 * Register usage in interrupt handlers:
63 * R9 - pointer to current task structure 78 * R9 - pointer to current task structure
@@ -361,6 +376,7 @@ ret_from_fork:
361 st %r15,SP_R15(%r15) # store stack pointer for new kthread 376 st %r15,SP_R15(%r15) # store stack pointer for new kthread
3620: l %r1,BASED(.Lschedtail) 3770: l %r1,BASED(.Lschedtail)
363 basr %r14,%r1 378 basr %r14,%r1
379 TRACE_IRQS_ON
364 stosm __SF_EMPTY(%r15),0x03 # reenable interrupts 380 stosm __SF_EMPTY(%r15),0x03 # reenable interrupts
365 b BASED(sysc_return) 381 b BASED(sysc_return)
366 382
@@ -516,6 +532,7 @@ pgm_no_vtime3:
516 mvc __THREAD_per+__PER_address(4,%r1),__LC_PER_ADDRESS 532 mvc __THREAD_per+__PER_address(4,%r1),__LC_PER_ADDRESS
517 mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID 533 mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID
518 oi __TI_flags+3(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP 534 oi __TI_flags+3(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP
535 TRACE_IRQS_ON
519 stosm __SF_EMPTY(%r15),0x03 # reenable interrupts 536 stosm __SF_EMPTY(%r15),0x03 # reenable interrupts
520 b BASED(sysc_do_svc) 537 b BASED(sysc_do_svc)
521 538
@@ -539,9 +556,11 @@ io_int_handler:
539io_no_vtime: 556io_no_vtime:
540#endif 557#endif
541 l %r9,__LC_THREAD_INFO # load pointer to thread_info struct 558 l %r9,__LC_THREAD_INFO # load pointer to thread_info struct
559 TRACE_IRQS_OFF
542 l %r1,BASED(.Ldo_IRQ) # load address of do_IRQ 560 l %r1,BASED(.Ldo_IRQ) # load address of do_IRQ
543 la %r2,SP_PTREGS(%r15) # address of register-save area 561 la %r2,SP_PTREGS(%r15) # address of register-save area
544 basr %r14,%r1 # branch to standard irq handler 562 basr %r14,%r1 # branch to standard irq handler
563 TRACE_IRQS_ON
545 564
546io_return: 565io_return:
547 tm SP_PSW+1(%r15),0x01 # returning to user ? 566 tm SP_PSW+1(%r15),0x01 # returning to user ?
@@ -651,10 +670,12 @@ ext_int_handler:
651ext_no_vtime: 670ext_no_vtime:
652#endif 671#endif
653 l %r9,__LC_THREAD_INFO # load pointer to thread_info struct 672 l %r9,__LC_THREAD_INFO # load pointer to thread_info struct
673 TRACE_IRQS_OFF
654 la %r2,SP_PTREGS(%r15) # address of register-save area 674 la %r2,SP_PTREGS(%r15) # address of register-save area
655 lh %r3,__LC_EXT_INT_CODE # get interruption code 675 lh %r3,__LC_EXT_INT_CODE # get interruption code
656 l %r1,BASED(.Ldo_extint) 676 l %r1,BASED(.Ldo_extint)
657 basr %r14,%r1 677 basr %r14,%r1
678 TRACE_IRQS_ON
658 b BASED(io_return) 679 b BASED(io_return)
659 680
660__critical_end: 681__critical_end:
@@ -731,8 +752,10 @@ mcck_no_vtime:
731 stosm __SF_EMPTY(%r15),0x04 # turn dat on 752 stosm __SF_EMPTY(%r15),0x04 # turn dat on
732 tm __TI_flags+3(%r9),_TIF_MCCK_PENDING 753 tm __TI_flags+3(%r9),_TIF_MCCK_PENDING
733 bno BASED(mcck_return) 754 bno BASED(mcck_return)
755 TRACE_IRQS_OFF
734 l %r1,BASED(.Ls390_handle_mcck) 756 l %r1,BASED(.Ls390_handle_mcck)
735 basr %r14,%r1 # call machine check handler 757 basr %r14,%r1 # call machine check handler
758 TRACE_IRQS_ON
736mcck_return: 759mcck_return:
737 mvc __LC_RETURN_MCCK_PSW(8),SP_PSW(%r15) # move return PSW 760 mvc __LC_RETURN_MCCK_PSW(8),SP_PSW(%r15) # move return PSW
738 ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit 761 ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit
@@ -1012,7 +1035,11 @@ cleanup_io_leave_insn:
1012.Lvfork: .long sys_vfork 1035.Lvfork: .long sys_vfork
1013.Lschedtail: .long schedule_tail 1036.Lschedtail: .long schedule_tail
1014.Lsysc_table: .long sys_call_table 1037.Lsysc_table: .long sys_call_table
1015 1038#ifdef CONFIG_TRACE_IRQFLAGS
1039.Ltrace_irq_on:.long trace_hardirqs_on
1040.Ltrace_irq_off:
1041 .long trace_hardirqs_off
1042#endif
1016.Lcritical_start: 1043.Lcritical_start:
1017 .long __critical_start + 0x80000000 1044 .long __critical_start + 0x80000000
1018.Lcritical_end: 1045.Lcritical_end:
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 1ca499fa54b4..56f5f613b868 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -58,6 +58,19 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NEED_RESCHED | \
58 58
59#define BASED(name) name-system_call(%r13) 59#define BASED(name) name-system_call(%r13)
60 60
61#ifdef CONFIG_TRACE_IRQFLAGS
62 .macro TRACE_IRQS_ON
63 brasl %r14,trace_hardirqs_on
64 .endm
65
66 .macro TRACE_IRQS_OFF
67 brasl %r14,trace_hardirqs_off
68 .endm
69#else
70#define TRACE_IRQS_ON
71#define TRACE_IRQS_OFF
72#endif
73
61 .macro STORE_TIMER lc_offset 74 .macro STORE_TIMER lc_offset
62#ifdef CONFIG_VIRT_CPU_ACCOUNTING 75#ifdef CONFIG_VIRT_CPU_ACCOUNTING
63 stpt \lc_offset 76 stpt \lc_offset
@@ -354,6 +367,7 @@ ret_from_fork:
354 jo 0f 367 jo 0f
355 stg %r15,SP_R15(%r15) # store stack pointer for new kthread 368 stg %r15,SP_R15(%r15) # store stack pointer for new kthread
3560: brasl %r14,schedule_tail 3690: brasl %r14,schedule_tail
370 TRACE_IRQS_ON
357 stosm 24(%r15),0x03 # reenable interrupts 371 stosm 24(%r15),0x03 # reenable interrupts
358 j sysc_return 372 j sysc_return
359 373
@@ -535,6 +549,7 @@ pgm_no_vtime3:
535 mvc __THREAD_per+__PER_address(8,%r1),__LC_PER_ADDRESS 549 mvc __THREAD_per+__PER_address(8,%r1),__LC_PER_ADDRESS
536 mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID 550 mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID
537 oi __TI_flags+7(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP 551 oi __TI_flags+7(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP
552 TRACE_IRQS_ON
538 stosm __SF_EMPTY(%r15),0x03 # reenable interrupts 553 stosm __SF_EMPTY(%r15),0x03 # reenable interrupts
539 j sysc_do_svc 554 j sysc_do_svc
540 555
@@ -557,8 +572,10 @@ io_int_handler:
557io_no_vtime: 572io_no_vtime:
558#endif 573#endif
559 lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct 574 lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct
575 TRACE_IRQS_OFF
560 la %r2,SP_PTREGS(%r15) # address of register-save area 576 la %r2,SP_PTREGS(%r15) # address of register-save area
561 brasl %r14,do_IRQ # call standard irq handler 577 brasl %r14,do_IRQ # call standard irq handler
578 TRACE_IRQS_ON
562 579
563io_return: 580io_return:
564 tm SP_PSW+1(%r15),0x01 # returning to user ? 581 tm SP_PSW+1(%r15),0x01 # returning to user ?
@@ -665,9 +682,11 @@ ext_int_handler:
665ext_no_vtime: 682ext_no_vtime:
666#endif 683#endif
667 lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct 684 lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct
685 TRACE_IRQS_OFF
668 la %r2,SP_PTREGS(%r15) # address of register-save area 686 la %r2,SP_PTREGS(%r15) # address of register-save area
669 llgh %r3,__LC_EXT_INT_CODE # get interruption code 687 llgh %r3,__LC_EXT_INT_CODE # get interruption code
670 brasl %r14,do_extint 688 brasl %r14,do_extint
689 TRACE_IRQS_ON
671 j io_return 690 j io_return
672 691
673__critical_end: 692__critical_end:
@@ -743,7 +762,9 @@ mcck_no_vtime:
743 stosm __SF_EMPTY(%r15),0x04 # turn dat on 762 stosm __SF_EMPTY(%r15),0x04 # turn dat on
744 tm __TI_flags+7(%r9),_TIF_MCCK_PENDING 763 tm __TI_flags+7(%r9),_TIF_MCCK_PENDING
745 jno mcck_return 764 jno mcck_return
765 TRACE_IRQS_OFF
746 brasl %r14,s390_handle_mcck 766 brasl %r14,s390_handle_mcck
767 TRACE_IRQS_ON
747mcck_return: 768mcck_return:
748 mvc __LC_RETURN_MCCK_PSW(16),SP_PSW(%r15) # move return PSW 769 mvc __LC_RETURN_MCCK_PSW(16),SP_PSW(%r15) # move return PSW
749 ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit 770 ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 480b6a5fef3a..1eef50918615 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -69,10 +69,6 @@ asmlinkage void do_softirq(void)
69 69
70 local_irq_save(flags); 70 local_irq_save(flags);
71 71
72 account_system_vtime(current);
73
74 local_bh_disable();
75
76 if (local_softirq_pending()) { 72 if (local_softirq_pending()) {
77 /* Get current stack pointer. */ 73 /* Get current stack pointer. */
78 asm volatile("la %0,0(15)" : "=a" (old)); 74 asm volatile("la %0,0(15)" : "=a" (old));
@@ -95,10 +91,6 @@ asmlinkage void do_softirq(void)
95 __do_softirq(); 91 __do_softirq();
96 } 92 }
97 93
98 account_system_vtime(current);
99
100 __local_bh_enable();
101
102 local_irq_restore(flags); 94 local_irq_restore(flags);
103} 95}
104 96
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 1f9399191794..78c8e5548caf 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -142,6 +142,7 @@ static void default_idle(void)
142 return; 142 return;
143 } 143 }
144 144
145 trace_hardirqs_on();
145 /* Wait for external, I/O or machine check interrupt. */ 146 /* Wait for external, I/O or machine check interrupt. */
146 __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_WAIT | 147 __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_WAIT |
147 PSW_MASK_IO | PSW_MASK_EXT); 148 PSW_MASK_IO | PSW_MASK_EXT);
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
new file mode 100644
index 000000000000..de83f38288d0
--- /dev/null
+++ b/arch/s390/kernel/stacktrace.c
@@ -0,0 +1,90 @@
1/*
2 * arch/s390/kernel/stacktrace.c
3 *
4 * Stack trace management functions
5 *
6 * Copyright (C) IBM Corp. 2006
7 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
8 */
9
10#include <linux/sched.h>
11#include <linux/stacktrace.h>
12#include <linux/kallsyms.h>
13
14static inline unsigned long save_context_stack(struct stack_trace *trace,
15 unsigned int *skip,
16 unsigned long sp,
17 unsigned long low,
18 unsigned long high)
19{
20 struct stack_frame *sf;
21 struct pt_regs *regs;
22 unsigned long addr;
23
24 while(1) {
25 sp &= PSW_ADDR_INSN;
26 if (sp < low || sp > high)
27 return sp;
28 sf = (struct stack_frame *)sp;
29 while(1) {
30 addr = sf->gprs[8] & PSW_ADDR_INSN;
31 if (!(*skip))
32 trace->entries[trace->nr_entries++] = addr;
33 else
34 (*skip)--;
35 if (trace->nr_entries >= trace->max_entries)
36 return sp;
37 low = sp;
38 sp = sf->back_chain & PSW_ADDR_INSN;
39 if (!sp)
40 break;
41 if (sp <= low || sp > high - sizeof(*sf))
42 return sp;
43 sf = (struct stack_frame *)sp;
44 }
45 /* Zero backchain detected, check for interrupt frame. */
46 sp = (unsigned long)(sf + 1);
47 if (sp <= low || sp > high - sizeof(*regs))
48 return sp;
49 regs = (struct pt_regs *)sp;
50 addr = regs->psw.addr & PSW_ADDR_INSN;
51 if (!(*skip))
52 trace->entries[trace->nr_entries++] = addr;
53 else
54 (*skip)--;
55 if (trace->nr_entries >= trace->max_entries)
56 return sp;
57 low = sp;
58 sp = regs->gprs[15];
59 }
60}
61
62void save_stack_trace(struct stack_trace *trace,
63 struct task_struct *task, int all_contexts,
64 unsigned int skip)
65{
66 register unsigned long sp asm ("15");
67 unsigned long orig_sp;
68
69 sp &= PSW_ADDR_INSN;
70 orig_sp = sp;
71
72 sp = save_context_stack(trace, &skip, sp,
73 S390_lowcore.panic_stack - PAGE_SIZE,
74 S390_lowcore.panic_stack);
75 if ((sp != orig_sp) && !all_contexts)
76 return;
77 sp = save_context_stack(trace, &skip, sp,
78 S390_lowcore.async_stack - ASYNC_SIZE,
79 S390_lowcore.async_stack);
80 if ((sp != orig_sp) && !all_contexts)
81 return;
82 if (task)
83 save_context_stack(trace, &skip, sp,
84 (unsigned long) task_stack_page(task),
85 (unsigned long) task_stack_page(task) + THREAD_SIZE);
86 else
87 save_context_stack(trace, &skip, sp, S390_lowcore.thread_info,
88 S390_lowcore.thread_info + THREAD_SIZE);
89 return;
90}
diff --git a/arch/um/kernel/tt/process_kern.c b/arch/um/kernel/tt/process_kern.c
index a9c1443fc548..8368c2dbe635 100644
--- a/arch/um/kernel/tt/process_kern.c
+++ b/arch/um/kernel/tt/process_kern.c
@@ -119,7 +119,7 @@ void suspend_new_thread(int fd)
119 panic("read failed in suspend_new_thread, err = %d", -err); 119 panic("read failed in suspend_new_thread, err = %d", -err);
120} 120}
121 121
122void schedule_tail(task_t *prev); 122void schedule_tail(struct task_struct *prev);
123 123
124static void new_thread_handler(int sig) 124static void new_thread_handler(int sig)
125{ 125{
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 7d51dd7201c3..37cfe7701f06 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -495,6 +495,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
495{ 495{
496} 496}
497 497
498#ifdef CONFIG_SMP
498void alternatives_smp_module_add(struct module *mod, char *name, 499void alternatives_smp_module_add(struct module *mod, char *name,
499 void *locks, void *locks_end, 500 void *locks, void *locks_end,
500 void *text, void *text_end) 501 void *text, void *text_end)
@@ -504,3 +505,4 @@ void alternatives_smp_module_add(struct module *mod, char *name,
504void alternatives_smp_module_del(struct module *mod) 505void alternatives_smp_module_del(struct module *mod)
505{ 506{
506} 507}
508#endif
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index e856804c447f..28df7d88ce2c 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -24,6 +24,14 @@ config X86
24 bool 24 bool
25 default y 25 default y
26 26
27config LOCKDEP_SUPPORT
28 bool
29 default y
30
31config STACKTRACE_SUPPORT
32 bool
33 default y
34
27config SEMAPHORE_SLEEPERS 35config SEMAPHORE_SLEEPERS
28 bool 36 bool
29 default y 37 default y
diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug
index 1d92ab56c0f9..775d211a5cf9 100644
--- a/arch/x86_64/Kconfig.debug
+++ b/arch/x86_64/Kconfig.debug
@@ -1,5 +1,9 @@
1menu "Kernel hacking" 1menu "Kernel hacking"
2 2
3config TRACE_IRQFLAGS_SUPPORT
4 bool
5 default y
6
3source "lib/Kconfig.debug" 7source "lib/Kconfig.debug"
4 8
5config DEBUG_RODATA 9config DEBUG_RODATA
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index c536fa98ea37..9b5bb413a6e9 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -13,6 +13,7 @@
13#include <asm/thread_info.h> 13#include <asm/thread_info.h>
14#include <asm/segment.h> 14#include <asm/segment.h>
15#include <asm/vsyscall32.h> 15#include <asm/vsyscall32.h>
16#include <asm/irqflags.h>
16#include <linux/linkage.h> 17#include <linux/linkage.h>
17 18
18#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) 19#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
@@ -75,6 +76,10 @@ ENTRY(ia32_sysenter_target)
75 swapgs 76 swapgs
76 movq %gs:pda_kernelstack, %rsp 77 movq %gs:pda_kernelstack, %rsp
77 addq $(PDA_STACKOFFSET),%rsp 78 addq $(PDA_STACKOFFSET),%rsp
79 /*
80 * No need to follow this irqs on/off section: the syscall
81 * disabled irqs, here we enable it straight after entry:
82 */
78 sti 83 sti
79 movl %ebp,%ebp /* zero extension */ 84 movl %ebp,%ebp /* zero extension */
80 pushq $__USER32_DS 85 pushq $__USER32_DS
@@ -118,6 +123,7 @@ sysenter_do_call:
118 movq %rax,RAX-ARGOFFSET(%rsp) 123 movq %rax,RAX-ARGOFFSET(%rsp)
119 GET_THREAD_INFO(%r10) 124 GET_THREAD_INFO(%r10)
120 cli 125 cli
126 TRACE_IRQS_OFF
121 testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) 127 testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
122 jnz int_ret_from_sys_call 128 jnz int_ret_from_sys_call
123 andl $~TS_COMPAT,threadinfo_status(%r10) 129 andl $~TS_COMPAT,threadinfo_status(%r10)
@@ -132,6 +138,7 @@ sysenter_do_call:
132 CFI_REGISTER rsp,rcx 138 CFI_REGISTER rsp,rcx
133 movl $VSYSCALL32_SYSEXIT,%edx /* User %eip */ 139 movl $VSYSCALL32_SYSEXIT,%edx /* User %eip */
134 CFI_REGISTER rip,rdx 140 CFI_REGISTER rip,rdx
141 TRACE_IRQS_ON
135 swapgs 142 swapgs
136 sti /* sti only takes effect after the next instruction */ 143 sti /* sti only takes effect after the next instruction */
137 /* sysexit */ 144 /* sysexit */
@@ -186,6 +193,10 @@ ENTRY(ia32_cstar_target)
186 movl %esp,%r8d 193 movl %esp,%r8d
187 CFI_REGISTER rsp,r8 194 CFI_REGISTER rsp,r8
188 movq %gs:pda_kernelstack,%rsp 195 movq %gs:pda_kernelstack,%rsp
196 /*
197 * No need to follow this irqs on/off section: the syscall
198 * disabled irqs and here we enable it straight after entry:
199 */
189 sti 200 sti
190 SAVE_ARGS 8,1,1 201 SAVE_ARGS 8,1,1
191 movl %eax,%eax /* zero extension */ 202 movl %eax,%eax /* zero extension */
@@ -220,6 +231,7 @@ cstar_do_call:
220 movq %rax,RAX-ARGOFFSET(%rsp) 231 movq %rax,RAX-ARGOFFSET(%rsp)
221 GET_THREAD_INFO(%r10) 232 GET_THREAD_INFO(%r10)
222 cli 233 cli
234 TRACE_IRQS_OFF
223 testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) 235 testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
224 jnz int_ret_from_sys_call 236 jnz int_ret_from_sys_call
225 andl $~TS_COMPAT,threadinfo_status(%r10) 237 andl $~TS_COMPAT,threadinfo_status(%r10)
@@ -228,6 +240,7 @@ cstar_do_call:
228 CFI_REGISTER rip,rcx 240 CFI_REGISTER rip,rcx
229 movl EFLAGS-ARGOFFSET(%rsp),%r11d 241 movl EFLAGS-ARGOFFSET(%rsp),%r11d
230 /*CFI_REGISTER rflags,r11*/ 242 /*CFI_REGISTER rflags,r11*/
243 TRACE_IRQS_ON
231 movl RSP-ARGOFFSET(%rsp),%esp 244 movl RSP-ARGOFFSET(%rsp),%esp
232 CFI_RESTORE rsp 245 CFI_RESTORE rsp
233 swapgs 246 swapgs
@@ -286,7 +299,11 @@ ENTRY(ia32_syscall)
286 /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ 299 /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/
287 /*CFI_REL_OFFSET cs,CS-RIP*/ 300 /*CFI_REL_OFFSET cs,CS-RIP*/
288 CFI_REL_OFFSET rip,RIP-RIP 301 CFI_REL_OFFSET rip,RIP-RIP
289 swapgs 302 swapgs
303 /*
304 * No need to follow this irqs on/off section: the syscall
305 * disabled irqs and here we enable it straight after entry:
306 */
290 sti 307 sti
291 movl %eax,%eax 308 movl %eax,%eax
292 pushq %rax 309 pushq %rax
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index 819e84ec5b64..b5aaeafc1cd3 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -10,6 +10,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \
10 setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ 10 setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
11 pci-dma.o pci-nommu.o alternative.o 11 pci-dma.o pci-nommu.o alternative.o
12 12
13obj-$(CONFIG_STACKTRACE) += stacktrace.o
13obj-$(CONFIG_X86_MCE) += mce.o 14obj-$(CONFIG_X86_MCE) += mce.o
14obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o 15obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
15obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o 16obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index ed92c2983606..d464dded68c0 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -41,13 +41,24 @@
41#include <asm/thread_info.h> 41#include <asm/thread_info.h>
42#include <asm/hw_irq.h> 42#include <asm/hw_irq.h>
43#include <asm/page.h> 43#include <asm/page.h>
44#include <asm/irqflags.h>
44 45
45 .code64 46 .code64
46 47
47#ifndef CONFIG_PREEMPT 48#ifndef CONFIG_PREEMPT
48#define retint_kernel retint_restore_args 49#define retint_kernel retint_restore_args
49#endif 50#endif
50 51
52
53.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
54#ifdef CONFIG_TRACE_IRQFLAGS
55 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
56 jnc 1f
57 TRACE_IRQS_ON
581:
59#endif
60.endm
61
51/* 62/*
52 * C code is not supposed to know about undefined top of stack. Every time 63 * C code is not supposed to know about undefined top of stack. Every time
53 * a C function with an pt_regs argument is called from the SYSCALL based 64 * a C function with an pt_regs argument is called from the SYSCALL based
@@ -194,6 +205,10 @@ ENTRY(system_call)
194 swapgs 205 swapgs
195 movq %rsp,%gs:pda_oldrsp 206 movq %rsp,%gs:pda_oldrsp
196 movq %gs:pda_kernelstack,%rsp 207 movq %gs:pda_kernelstack,%rsp
208 /*
209 * No need to follow this irqs off/on section - it's straight
210 * and short:
211 */
197 sti 212 sti
198 SAVE_ARGS 8,1 213 SAVE_ARGS 8,1
199 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) 214 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
@@ -219,10 +234,15 @@ ret_from_sys_call:
219sysret_check: 234sysret_check:
220 GET_THREAD_INFO(%rcx) 235 GET_THREAD_INFO(%rcx)
221 cli 236 cli
237 TRACE_IRQS_OFF
222 movl threadinfo_flags(%rcx),%edx 238 movl threadinfo_flags(%rcx),%edx
223 andl %edi,%edx 239 andl %edi,%edx
224 CFI_REMEMBER_STATE 240 CFI_REMEMBER_STATE
225 jnz sysret_careful 241 jnz sysret_careful
242 /*
243 * sysretq will re-enable interrupts:
244 */
245 TRACE_IRQS_ON
226 movq RIP-ARGOFFSET(%rsp),%rcx 246 movq RIP-ARGOFFSET(%rsp),%rcx
227 CFI_REGISTER rip,rcx 247 CFI_REGISTER rip,rcx
228 RESTORE_ARGS 0,-ARG_SKIP,1 248 RESTORE_ARGS 0,-ARG_SKIP,1
@@ -237,6 +257,7 @@ sysret_careful:
237 CFI_RESTORE_STATE 257 CFI_RESTORE_STATE
238 bt $TIF_NEED_RESCHED,%edx 258 bt $TIF_NEED_RESCHED,%edx
239 jnc sysret_signal 259 jnc sysret_signal
260 TRACE_IRQS_ON
240 sti 261 sti
241 pushq %rdi 262 pushq %rdi
242 CFI_ADJUST_CFA_OFFSET 8 263 CFI_ADJUST_CFA_OFFSET 8
@@ -247,6 +268,7 @@ sysret_careful:
247 268
248 /* Handle a signal */ 269 /* Handle a signal */
249sysret_signal: 270sysret_signal:
271 TRACE_IRQS_ON
250 sti 272 sti
251 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx 273 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
252 jz 1f 274 jz 1f
@@ -261,6 +283,7 @@ sysret_signal:
261 /* Use IRET because user could have changed frame. This 283 /* Use IRET because user could have changed frame. This
262 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ 284 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
263 cli 285 cli
286 TRACE_IRQS_OFF
264 jmp int_with_check 287 jmp int_with_check
265 288
266badsys: 289badsys:
@@ -309,6 +332,7 @@ ENTRY(int_ret_from_sys_call)
309 CFI_REL_OFFSET r10,R10-ARGOFFSET 332 CFI_REL_OFFSET r10,R10-ARGOFFSET
310 CFI_REL_OFFSET r11,R11-ARGOFFSET 333 CFI_REL_OFFSET r11,R11-ARGOFFSET
311 cli 334 cli
335 TRACE_IRQS_OFF
312 testl $3,CS-ARGOFFSET(%rsp) 336 testl $3,CS-ARGOFFSET(%rsp)
313 je retint_restore_args 337 je retint_restore_args
314 movl $_TIF_ALLWORK_MASK,%edi 338 movl $_TIF_ALLWORK_MASK,%edi
@@ -327,6 +351,7 @@ int_with_check:
327int_careful: 351int_careful:
328 bt $TIF_NEED_RESCHED,%edx 352 bt $TIF_NEED_RESCHED,%edx
329 jnc int_very_careful 353 jnc int_very_careful
354 TRACE_IRQS_ON
330 sti 355 sti
331 pushq %rdi 356 pushq %rdi
332 CFI_ADJUST_CFA_OFFSET 8 357 CFI_ADJUST_CFA_OFFSET 8
@@ -334,10 +359,12 @@ int_careful:
334 popq %rdi 359 popq %rdi
335 CFI_ADJUST_CFA_OFFSET -8 360 CFI_ADJUST_CFA_OFFSET -8
336 cli 361 cli
362 TRACE_IRQS_OFF
337 jmp int_with_check 363 jmp int_with_check
338 364
339 /* handle signals and tracing -- both require a full stack frame */ 365 /* handle signals and tracing -- both require a full stack frame */
340int_very_careful: 366int_very_careful:
367 TRACE_IRQS_ON
341 sti 368 sti
342 SAVE_REST 369 SAVE_REST
343 /* Check for syscall exit trace */ 370 /* Check for syscall exit trace */
@@ -351,6 +378,7 @@ int_very_careful:
351 CFI_ADJUST_CFA_OFFSET -8 378 CFI_ADJUST_CFA_OFFSET -8
352 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi 379 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
353 cli 380 cli
381 TRACE_IRQS_OFF
354 jmp int_restore_rest 382 jmp int_restore_rest
355 383
356int_signal: 384int_signal:
@@ -363,6 +391,7 @@ int_signal:
363int_restore_rest: 391int_restore_rest:
364 RESTORE_REST 392 RESTORE_REST
365 cli 393 cli
394 TRACE_IRQS_OFF
366 jmp int_with_check 395 jmp int_with_check
367 CFI_ENDPROC 396 CFI_ENDPROC
368END(int_ret_from_sys_call) 397END(int_ret_from_sys_call)
@@ -484,6 +513,10 @@ END(stub_rt_sigreturn)
484 swapgs 513 swapgs
4851: incl %gs:pda_irqcount # RED-PEN should check preempt count 5141: incl %gs:pda_irqcount # RED-PEN should check preempt count
486 cmoveq %gs:pda_irqstackptr,%rsp 515 cmoveq %gs:pda_irqstackptr,%rsp
516 /*
517 * We entered an interrupt context - irqs are off:
518 */
519 TRACE_IRQS_OFF
487 call \func 520 call \func
488 .endm 521 .endm
489 522
@@ -493,6 +526,7 @@ ENTRY(common_interrupt)
493 /* 0(%rsp): oldrsp-ARGOFFSET */ 526 /* 0(%rsp): oldrsp-ARGOFFSET */
494ret_from_intr: 527ret_from_intr:
495 cli 528 cli
529 TRACE_IRQS_OFF
496 decl %gs:pda_irqcount 530 decl %gs:pda_irqcount
497 leaveq 531 leaveq
498 CFI_DEF_CFA_REGISTER rsp 532 CFI_DEF_CFA_REGISTER rsp
@@ -515,9 +549,21 @@ retint_check:
515 CFI_REMEMBER_STATE 549 CFI_REMEMBER_STATE
516 jnz retint_careful 550 jnz retint_careful
517retint_swapgs: 551retint_swapgs:
552 /*
553 * The iretq could re-enable interrupts:
554 */
555 cli
556 TRACE_IRQS_IRETQ
518 swapgs 557 swapgs
558 jmp restore_args
559
519retint_restore_args: 560retint_restore_args:
520 cli 561 cli
562 /*
563 * The iretq could re-enable interrupts:
564 */
565 TRACE_IRQS_IRETQ
566restore_args:
521 RESTORE_ARGS 0,8,0 567 RESTORE_ARGS 0,8,0
522iret_label: 568iret_label:
523 iretq 569 iretq
@@ -530,6 +576,7 @@ iret_label:
530 /* running with kernel gs */ 576 /* running with kernel gs */
531bad_iret: 577bad_iret:
532 movq $11,%rdi /* SIGSEGV */ 578 movq $11,%rdi /* SIGSEGV */
579 TRACE_IRQS_ON
533 sti 580 sti
534 jmp do_exit 581 jmp do_exit
535 .previous 582 .previous
@@ -539,6 +586,7 @@ retint_careful:
539 CFI_RESTORE_STATE 586 CFI_RESTORE_STATE
540 bt $TIF_NEED_RESCHED,%edx 587 bt $TIF_NEED_RESCHED,%edx
541 jnc retint_signal 588 jnc retint_signal
589 TRACE_IRQS_ON
542 sti 590 sti
543 pushq %rdi 591 pushq %rdi
544 CFI_ADJUST_CFA_OFFSET 8 592 CFI_ADJUST_CFA_OFFSET 8
@@ -547,11 +595,13 @@ retint_careful:
547 CFI_ADJUST_CFA_OFFSET -8 595 CFI_ADJUST_CFA_OFFSET -8
548 GET_THREAD_INFO(%rcx) 596 GET_THREAD_INFO(%rcx)
549 cli 597 cli
598 TRACE_IRQS_OFF
550 jmp retint_check 599 jmp retint_check
551 600
552retint_signal: 601retint_signal:
553 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx 602 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
554 jz retint_swapgs 603 jz retint_swapgs
604 TRACE_IRQS_ON
555 sti 605 sti
556 SAVE_REST 606 SAVE_REST
557 movq $-1,ORIG_RAX(%rsp) 607 movq $-1,ORIG_RAX(%rsp)
@@ -560,6 +610,7 @@ retint_signal:
560 call do_notify_resume 610 call do_notify_resume
561 RESTORE_REST 611 RESTORE_REST
562 cli 612 cli
613 TRACE_IRQS_OFF
563 movl $_TIF_NEED_RESCHED,%edi 614 movl $_TIF_NEED_RESCHED,%edi
564 GET_THREAD_INFO(%rcx) 615 GET_THREAD_INFO(%rcx)
565 jmp retint_check 616 jmp retint_check
@@ -666,7 +717,7 @@ END(spurious_interrupt)
666 717
667 /* error code is on the stack already */ 718 /* error code is on the stack already */
668 /* handle NMI like exceptions that can happen everywhere */ 719 /* handle NMI like exceptions that can happen everywhere */
669 .macro paranoidentry sym, ist=0 720 .macro paranoidentry sym, ist=0, irqtrace=1
670 SAVE_ALL 721 SAVE_ALL
671 cld 722 cld
672 movl $1,%ebx 723 movl $1,%ebx
@@ -691,8 +742,73 @@ END(spurious_interrupt)
691 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) 742 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
692 .endif 743 .endif
693 cli 744 cli
745 .if \irqtrace
746 TRACE_IRQS_OFF
747 .endif
694 .endm 748 .endm
695 749
750 /*
751 * "Paranoid" exit path from exception stack.
752 * Paranoid because this is used by NMIs and cannot take
753 * any kernel state for granted.
754 * We don't do kernel preemption checks here, because only
755 * NMI should be common and it does not enable IRQs and
756 * cannot get reschedule ticks.
757 *
758 * "trace" is 0 for the NMI handler only, because irq-tracing
759 * is fundamentally NMI-unsafe. (we cannot change the soft and
760 * hard flags at once, atomically)
761 */
762 .macro paranoidexit trace=1
763 /* ebx: no swapgs flag */
764paranoid_exit\trace:
765 testl %ebx,%ebx /* swapgs needed? */
766 jnz paranoid_restore\trace
767 testl $3,CS(%rsp)
768 jnz paranoid_userspace\trace
769paranoid_swapgs\trace:
770 TRACE_IRQS_IRETQ 0
771 swapgs
772paranoid_restore\trace:
773 RESTORE_ALL 8
774 iretq
775paranoid_userspace\trace:
776 GET_THREAD_INFO(%rcx)
777 movl threadinfo_flags(%rcx),%ebx
778 andl $_TIF_WORK_MASK,%ebx
779 jz paranoid_swapgs\trace
780 movq %rsp,%rdi /* &pt_regs */
781 call sync_regs
782 movq %rax,%rsp /* switch stack for scheduling */
783 testl $_TIF_NEED_RESCHED,%ebx
784 jnz paranoid_schedule\trace
785 movl %ebx,%edx /* arg3: thread flags */
786 .if \trace
787 TRACE_IRQS_ON
788 .endif
789 sti
790 xorl %esi,%esi /* arg2: oldset */
791 movq %rsp,%rdi /* arg1: &pt_regs */
792 call do_notify_resume
793 cli
794 .if \trace
795 TRACE_IRQS_OFF
796 .endif
797 jmp paranoid_userspace\trace
798paranoid_schedule\trace:
799 .if \trace
800 TRACE_IRQS_ON
801 .endif
802 sti
803 call schedule
804 cli
805 .if \trace
806 TRACE_IRQS_OFF
807 .endif
808 jmp paranoid_userspace\trace
809 CFI_ENDPROC
810 .endm
811
696/* 812/*
697 * Exception entry point. This expects an error code/orig_rax on the stack 813 * Exception entry point. This expects an error code/orig_rax on the stack
698 * and the exception handler in %rax. 814 * and the exception handler in %rax.
@@ -748,6 +864,7 @@ error_exit:
748 movl %ebx,%eax 864 movl %ebx,%eax
749 RESTORE_REST 865 RESTORE_REST
750 cli 866 cli
867 TRACE_IRQS_OFF
751 GET_THREAD_INFO(%rcx) 868 GET_THREAD_INFO(%rcx)
752 testl %eax,%eax 869 testl %eax,%eax
753 jne retint_kernel 870 jne retint_kernel
@@ -755,6 +872,10 @@ error_exit:
755 movl $_TIF_WORK_MASK,%edi 872 movl $_TIF_WORK_MASK,%edi
756 andl %edi,%edx 873 andl %edi,%edx
757 jnz retint_careful 874 jnz retint_careful
875 /*
876 * The iret might restore flags:
877 */
878 TRACE_IRQS_IRETQ
758 swapgs 879 swapgs
759 RESTORE_ARGS 0,8,0 880 RESTORE_ARGS 0,8,0
760 jmp iret_label 881 jmp iret_label
@@ -916,8 +1037,7 @@ KPROBE_ENTRY(debug)
916 pushq $0 1037 pushq $0
917 CFI_ADJUST_CFA_OFFSET 8 1038 CFI_ADJUST_CFA_OFFSET 8
918 paranoidentry do_debug, DEBUG_STACK 1039 paranoidentry do_debug, DEBUG_STACK
919 jmp paranoid_exit 1040 paranoidexit
920 CFI_ENDPROC
921END(debug) 1041END(debug)
922 .previous .text 1042 .previous .text
923 1043
@@ -926,49 +1046,13 @@ KPROBE_ENTRY(nmi)
926 INTR_FRAME 1046 INTR_FRAME
927 pushq $-1 1047 pushq $-1
928 CFI_ADJUST_CFA_OFFSET 8 1048 CFI_ADJUST_CFA_OFFSET 8
929 paranoidentry do_nmi 1049 paranoidentry do_nmi, 0, 0
930 /* 1050#ifdef CONFIG_TRACE_IRQFLAGS
931 * "Paranoid" exit path from exception stack. 1051 paranoidexit 0
932 * Paranoid because this is used by NMIs and cannot take 1052#else
933 * any kernel state for granted. 1053 jmp paranoid_exit1
934 * We don't do kernel preemption checks here, because only 1054 CFI_ENDPROC
935 * NMI should be common and it does not enable IRQs and 1055#endif
936 * cannot get reschedule ticks.
937 */
938 /* ebx: no swapgs flag */
939paranoid_exit:
940 testl %ebx,%ebx /* swapgs needed? */
941 jnz paranoid_restore
942 testl $3,CS(%rsp)
943 jnz paranoid_userspace
944paranoid_swapgs:
945 swapgs
946paranoid_restore:
947 RESTORE_ALL 8
948 iretq
949paranoid_userspace:
950 GET_THREAD_INFO(%rcx)
951 movl threadinfo_flags(%rcx),%ebx
952 andl $_TIF_WORK_MASK,%ebx
953 jz paranoid_swapgs
954 movq %rsp,%rdi /* &pt_regs */
955 call sync_regs
956 movq %rax,%rsp /* switch stack for scheduling */
957 testl $_TIF_NEED_RESCHED,%ebx
958 jnz paranoid_schedule
959 movl %ebx,%edx /* arg3: thread flags */
960 sti
961 xorl %esi,%esi /* arg2: oldset */
962 movq %rsp,%rdi /* arg1: &pt_regs */
963 call do_notify_resume
964 cli
965 jmp paranoid_userspace
966paranoid_schedule:
967 sti
968 call schedule
969 cli
970 jmp paranoid_userspace
971 CFI_ENDPROC
972END(nmi) 1056END(nmi)
973 .previous .text 1057 .previous .text
974 1058
@@ -977,7 +1061,7 @@ KPROBE_ENTRY(int3)
977 pushq $0 1061 pushq $0
978 CFI_ADJUST_CFA_OFFSET 8 1062 CFI_ADJUST_CFA_OFFSET 8
979 paranoidentry do_int3, DEBUG_STACK 1063 paranoidentry do_int3, DEBUG_STACK
980 jmp paranoid_exit 1064 jmp paranoid_exit1
981 CFI_ENDPROC 1065 CFI_ENDPROC
982END(int3) 1066END(int3)
983 .previous .text 1067 .previous .text
@@ -1006,7 +1090,7 @@ END(reserved)
1006ENTRY(double_fault) 1090ENTRY(double_fault)
1007 XCPT_FRAME 1091 XCPT_FRAME
1008 paranoidentry do_double_fault 1092 paranoidentry do_double_fault
1009 jmp paranoid_exit 1093 jmp paranoid_exit1
1010 CFI_ENDPROC 1094 CFI_ENDPROC
1011END(double_fault) 1095END(double_fault)
1012 1096
@@ -1022,7 +1106,7 @@ END(segment_not_present)
1022ENTRY(stack_segment) 1106ENTRY(stack_segment)
1023 XCPT_FRAME 1107 XCPT_FRAME
1024 paranoidentry do_stack_segment 1108 paranoidentry do_stack_segment
1025 jmp paranoid_exit 1109 jmp paranoid_exit1
1026 CFI_ENDPROC 1110 CFI_ENDPROC
1027END(stack_segment) 1111END(stack_segment)
1028 1112
@@ -1050,7 +1134,7 @@ ENTRY(machine_check)
1050 pushq $0 1134 pushq $0
1051 CFI_ADJUST_CFA_OFFSET 8 1135 CFI_ADJUST_CFA_OFFSET 8
1052 paranoidentry do_machine_check 1136 paranoidentry do_machine_check
1053 jmp paranoid_exit 1137 jmp paranoid_exit1
1054 CFI_ENDPROC 1138 CFI_ENDPROC
1055END(machine_check) 1139END(machine_check)
1056#endif 1140#endif
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index e6a71c9556d9..36647ce6aecb 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -85,6 +85,11 @@ void __init x86_64_start_kernel(char * real_mode_data)
85 clear_bss(); 85 clear_bss();
86 86
87 /* 87 /*
88 * This must be called really, really early:
89 */
90 lockdep_init();
91
92 /*
88 * switch to init_level4_pgt from boot_level4_pgt 93 * switch to init_level4_pgt from boot_level4_pgt
89 */ 94 */
90 memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t)); 95 memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t));
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
index a1f1df5f7bfc..5221a53e90c1 100644
--- a/arch/x86_64/kernel/irq.c
+++ b/arch/x86_64/kernel/irq.c
@@ -177,8 +177,10 @@ asmlinkage void do_softirq(void)
177 local_irq_save(flags); 177 local_irq_save(flags);
178 pending = local_softirq_pending(); 178 pending = local_softirq_pending();
179 /* Switch to interrupt stack */ 179 /* Switch to interrupt stack */
180 if (pending) 180 if (pending) {
181 call_softirq(); 181 call_softirq();
182 WARN_ON_ONCE(softirq_count());
183 }
182 local_irq_restore(flags); 184 local_irq_restore(flags);
183} 185}
184EXPORT_SYMBOL(do_softirq); 186EXPORT_SYMBOL(do_softirq);
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 476c1472fc07..5baa0c726e97 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -127,7 +127,7 @@ void __cpuinit nmi_watchdog_default(void)
127static __init void nmi_cpu_busy(void *data) 127static __init void nmi_cpu_busy(void *data)
128{ 128{
129 volatile int *endflag = data; 129 volatile int *endflag = data;
130 local_irq_enable(); 130 local_irq_enable_in_hardirq();
131 /* Intentionally don't use cpu_relax here. This is 131 /* Intentionally don't use cpu_relax here. This is
132 to make sure that the performance counter really ticks, 132 to make sure that the performance counter really ticks,
133 even if there is a simulator or similar that catches the 133 even if there is a simulator or similar that catches the
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index ca56e19b8b6e..bb6745d13b8f 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -296,7 +296,7 @@ void __show_regs(struct pt_regs * regs)
296 system_utsname.version); 296 system_utsname.version);
297 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); 297 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
298 printk_address(regs->rip); 298 printk_address(regs->rip);
299 printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, 299 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
300 regs->eflags); 300 regs->eflags);
301 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", 301 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
302 regs->rax, regs->rbx, regs->rcx); 302 regs->rax, regs->rbx, regs->rcx);
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 9705a6a384f1..b7c705969791 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -775,6 +775,8 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
775 }; 775 };
776 DECLARE_WORK(work, do_fork_idle, &c_idle); 776 DECLARE_WORK(work, do_fork_idle, &c_idle);
777 777
778 lockdep_set_class(&c_idle.done.wait.lock, &waitqueue_lock_key);
779
778 /* allocate memory for gdts of secondary cpus. Hotplug is considered */ 780 /* allocate memory for gdts of secondary cpus. Hotplug is considered */
779 if (!cpu_gdt_descr[cpu].address && 781 if (!cpu_gdt_descr[cpu].address &&
780 !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) { 782 !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) {
diff --git a/arch/x86_64/kernel/stacktrace.c b/arch/x86_64/kernel/stacktrace.c
new file mode 100644
index 000000000000..32cf55eb9af8
--- /dev/null
+++ b/arch/x86_64/kernel/stacktrace.c
@@ -0,0 +1,221 @@
1/*
2 * arch/x86_64/kernel/stacktrace.c
3 *
4 * Stack trace management functions
5 *
6 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7 */
8#include <linux/sched.h>
9#include <linux/stacktrace.h>
10
11#include <asm/smp.h>
12
13static inline int
14in_range(unsigned long start, unsigned long addr, unsigned long end)
15{
16 return addr >= start && addr <= end;
17}
18
19static unsigned long
20get_stack_end(struct task_struct *task, unsigned long stack)
21{
22 unsigned long stack_start, stack_end, flags;
23 int i, cpu;
24
25 /*
26 * The most common case is that we are in the task stack:
27 */
28 stack_start = (unsigned long)task->thread_info;
29 stack_end = stack_start + THREAD_SIZE;
30
31 if (in_range(stack_start, stack, stack_end))
32 return stack_end;
33
34 /*
35 * We are in an interrupt if irqstackptr is set:
36 */
37 raw_local_irq_save(flags);
38 cpu = safe_smp_processor_id();
39 stack_end = (unsigned long)cpu_pda(cpu)->irqstackptr;
40
41 if (stack_end) {
42 stack_start = stack_end & ~(IRQSTACKSIZE-1);
43 if (in_range(stack_start, stack, stack_end))
44 goto out_restore;
45 /*
46 * We get here if we are in an IRQ context but we
47 * are also in an exception stack.
48 */
49 }
50
51 /*
52 * Iterate over all exception stacks, and figure out whether
53 * 'stack' is in one of them:
54 */
55 for (i = 0; i < N_EXCEPTION_STACKS; i++) {
56 /*
57 * set 'end' to the end of the exception stack.
58 */
59 stack_end = per_cpu(init_tss, cpu).ist[i];
60 stack_start = stack_end - EXCEPTION_STKSZ;
61
62 /*
63 * Is 'stack' above this exception frame's end?
64 * If yes then skip to the next frame.
65 */
66 if (stack >= stack_end)
67 continue;
68 /*
69 * Is 'stack' above this exception frame's start address?
70 * If yes then we found the right frame.
71 */
72 if (stack >= stack_start)
73 goto out_restore;
74
75 /*
76 * If this is a debug stack, and if it has a larger size than
77 * the usual exception stacks, then 'stack' might still
78 * be within the lower portion of the debug stack:
79 */
80#if DEBUG_STKSZ > EXCEPTION_STKSZ
81 if (i == DEBUG_STACK - 1 && stack >= stack_end - DEBUG_STKSZ) {
82 /*
83 * Black magic. A large debug stack is composed of
84 * multiple exception stack entries, which we
85 * iterate through now. Dont look:
86 */
87 do {
88 stack_end -= EXCEPTION_STKSZ;
89 stack_start -= EXCEPTION_STKSZ;
90 } while (stack < stack_start);
91
92 goto out_restore;
93 }
94#endif
95 }
96 /*
97 * Ok, 'stack' is not pointing to any of the system stacks.
98 */
99 stack_end = 0;
100
101out_restore:
102 raw_local_irq_restore(flags);
103
104 return stack_end;
105}
106
107
108/*
109 * Save stack-backtrace addresses into a stack_trace buffer:
110 */
111static inline unsigned long
112save_context_stack(struct stack_trace *trace, unsigned int skip,
113 unsigned long stack, unsigned long stack_end)
114{
115 unsigned long addr;
116
117#ifdef CONFIG_FRAME_POINTER
118 unsigned long prev_stack = 0;
119
120 while (in_range(prev_stack, stack, stack_end)) {
121 pr_debug("stack: %p\n", (void *)stack);
122 addr = (unsigned long)(((unsigned long *)stack)[1]);
123 pr_debug("addr: %p\n", (void *)addr);
124 if (!skip)
125 trace->entries[trace->nr_entries++] = addr-1;
126 else
127 skip--;
128 if (trace->nr_entries >= trace->max_entries)
129 break;
130 if (!addr)
131 return 0;
132 /*
133 * Stack frames must go forwards (otherwise a loop could
134 * happen if the stackframe is corrupted), so we move
135 * prev_stack forwards:
136 */
137 prev_stack = stack;
138 stack = (unsigned long)(((unsigned long *)stack)[0]);
139 }
140 pr_debug("invalid: %p\n", (void *)stack);
141#else
142 while (stack < stack_end) {
143 addr = ((unsigned long *)stack)[0];
144 stack += sizeof(long);
145 if (__kernel_text_address(addr)) {
146 if (!skip)
147 trace->entries[trace->nr_entries++] = addr-1;
148 else
149 skip--;
150 if (trace->nr_entries >= trace->max_entries)
151 break;
152 }
153 }
154#endif
155 return stack;
156}
157
158#define MAX_STACKS 10
159
160/*
161 * Save stack-backtrace addresses into a stack_trace buffer.
162 * If all_contexts is set, all contexts (hardirq, softirq and process)
163 * are saved. If not set then only the current context is saved.
164 */
165void save_stack_trace(struct stack_trace *trace,
166 struct task_struct *task, int all_contexts,
167 unsigned int skip)
168{
169 unsigned long stack = (unsigned long)&stack;
170 int i, nr_stacks = 0, stacks_done[MAX_STACKS];
171
172 WARN_ON(trace->nr_entries || !trace->max_entries);
173
174 if (!task)
175 task = current;
176
177 pr_debug("task: %p, ti: %p\n", task, task->thread_info);
178
179 if (!task || task == current) {
180 /* Grab rbp right from our regs: */
181 asm ("mov %%rbp, %0" : "=r" (stack));
182 pr_debug("rbp: %p\n", (void *)stack);
183 } else {
184 /* rbp is the last reg pushed by switch_to(): */
185 stack = task->thread.rsp;
186 pr_debug("other task rsp: %p\n", (void *)stack);
187 stack = (unsigned long)(((unsigned long *)stack)[0]);
188 pr_debug("other task rbp: %p\n", (void *)stack);
189 }
190
191 while (1) {
192 unsigned long stack_end = get_stack_end(task, stack);
193
194 pr_debug("stack: %p\n", (void *)stack);
195 pr_debug("stack end: %p\n", (void *)stack_end);
196
197 /*
198 * Invalid stack addres?
199 */
200 if (!stack_end)
201 return;
202 /*
203 * Were we in this stack already? (recursion)
204 */
205 for (i = 0; i < nr_stacks; i++)
206 if (stacks_done[i] == stack_end)
207 return;
208 stacks_done[nr_stacks] = stack_end;
209
210 stack = save_context_stack(trace, skip, stack, stack_end);
211 if (!all_contexts || !stack ||
212 trace->nr_entries >= trace->max_entries)
213 return;
214 trace->entries[trace->nr_entries++] = ULONG_MAX;
215 if (trace->nr_entries >= trace->max_entries)
216 return;
217 if (++nr_stacks >= MAX_STACKS)
218 return;
219 }
220}
221
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 5a5311d3de0f..79d05c482072 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -110,28 +110,31 @@ static int kstack_depth_to_print = 12;
110static int call_trace = 1; 110static int call_trace = 1;
111 111
112#ifdef CONFIG_KALLSYMS 112#ifdef CONFIG_KALLSYMS
113#include <linux/kallsyms.h> 113# include <linux/kallsyms.h>
114int printk_address(unsigned long address) 114void printk_address(unsigned long address)
115{ 115{
116 unsigned long offset = 0, symsize; 116 unsigned long offset = 0, symsize;
117 const char *symname; 117 const char *symname;
118 char *modname; 118 char *modname;
119 char *delim = ":"; 119 char *delim = ":";
120 char namebuf[128]; 120 char namebuf[128];
121 121
122 symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf); 122 symname = kallsyms_lookup(address, &symsize, &offset,
123 if (!symname) 123 &modname, namebuf);
124 return printk("[<%016lx>]", address); 124 if (!symname) {
125 if (!modname) 125 printk(" [<%016lx>]\n", address);
126 return;
127 }
128 if (!modname)
126 modname = delim = ""; 129 modname = delim = "";
127 return printk("<%016lx>{%s%s%s%s%+ld}", 130 printk(" [<%016lx>] %s%s%s%s+0x%lx/0x%lx\n",
128 address, delim, modname, delim, symname, offset); 131 address, delim, modname, delim, symname, offset, symsize);
129} 132}
130#else 133#else
131int printk_address(unsigned long address) 134void printk_address(unsigned long address)
132{ 135{
133 return printk("[<%016lx>]", address); 136 printk(" [<%016lx>]\n", address);
134} 137}
135#endif 138#endif
136 139
137static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, 140static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
@@ -149,10 +152,22 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
149 }; 152 };
150 unsigned k; 153 unsigned k;
151 154
155 /*
156 * Iterate over all exception stacks, and figure out whether
157 * 'stack' is in one of them:
158 */
152 for (k = 0; k < N_EXCEPTION_STACKS; k++) { 159 for (k = 0; k < N_EXCEPTION_STACKS; k++) {
153 unsigned long end; 160 unsigned long end;
154 161
162 /*
163 * set 'end' to the end of the exception stack.
164 */
155 switch (k + 1) { 165 switch (k + 1) {
166 /*
167 * TODO: this block is not needed i think, because
168 * setup64.c:cpu_init() sets up t->ist[DEBUG_STACK]
169 * properly too.
170 */
156#if DEBUG_STKSZ > EXCEPTION_STKSZ 171#if DEBUG_STKSZ > EXCEPTION_STKSZ
157 case DEBUG_STACK: 172 case DEBUG_STACK:
158 end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ; 173 end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ;
@@ -162,19 +177,43 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
162 end = per_cpu(init_tss, cpu).ist[k]; 177 end = per_cpu(init_tss, cpu).ist[k];
163 break; 178 break;
164 } 179 }
180 /*
181 * Is 'stack' above this exception frame's end?
182 * If yes then skip to the next frame.
183 */
165 if (stack >= end) 184 if (stack >= end)
166 continue; 185 continue;
186 /*
187 * Is 'stack' above this exception frame's start address?
188 * If yes then we found the right frame.
189 */
167 if (stack >= end - EXCEPTION_STKSZ) { 190 if (stack >= end - EXCEPTION_STKSZ) {
191 /*
192 * Make sure we only iterate through an exception
193 * stack once. If it comes up for the second time
194 * then there's something wrong going on - just
195 * break out and return NULL:
196 */
168 if (*usedp & (1U << k)) 197 if (*usedp & (1U << k))
169 break; 198 break;
170 *usedp |= 1U << k; 199 *usedp |= 1U << k;
171 *idp = ids[k]; 200 *idp = ids[k];
172 return (unsigned long *)end; 201 return (unsigned long *)end;
173 } 202 }
203 /*
204 * If this is a debug stack, and if it has a larger size than
205 * the usual exception stacks, then 'stack' might still
206 * be within the lower portion of the debug stack:
207 */
174#if DEBUG_STKSZ > EXCEPTION_STKSZ 208#if DEBUG_STKSZ > EXCEPTION_STKSZ
175 if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) { 209 if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) {
176 unsigned j = N_EXCEPTION_STACKS - 1; 210 unsigned j = N_EXCEPTION_STACKS - 1;
177 211
212 /*
213 * Black magic. A large debug stack is composed of
214 * multiple exception stack entries, which we
215 * iterate through now. Dont look:
216 */
178 do { 217 do {
179 ++j; 218 ++j;
180 end -= EXCEPTION_STKSZ; 219 end -= EXCEPTION_STKSZ;
@@ -193,20 +232,14 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
193 232
194static int show_trace_unwind(struct unwind_frame_info *info, void *context) 233static int show_trace_unwind(struct unwind_frame_info *info, void *context)
195{ 234{
196 int i = 11, n = 0; 235 int n = 0;
197 236
198 while (unwind(info) == 0 && UNW_PC(info)) { 237 while (unwind(info) == 0 && UNW_PC(info)) {
199 ++n; 238 n++;
200 if (i > 50) { 239 printk_address(UNW_PC(info));
201 printk("\n ");
202 i = 7;
203 } else
204 i += printk(" ");
205 i += printk_address(UNW_PC(info));
206 if (arch_unw_user_mode(info)) 240 if (arch_unw_user_mode(info))
207 break; 241 break;
208 } 242 }
209 printk("\n");
210 return n; 243 return n;
211} 244}
212 245
@@ -224,7 +257,7 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
224 int i = 11; 257 int i = 11;
225 unsigned used = 0; 258 unsigned used = 0;
226 259
227 printk("\nCall Trace:"); 260 printk("\nCall Trace:\n");
228 261
229 if (!tsk) 262 if (!tsk)
230 tsk = current; 263 tsk = current;
@@ -250,16 +283,15 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
250 } 283 }
251 } 284 }
252 285
286 /*
287 * Print function call entries within a stack. 'cond' is the
288 * "end of stackframe" condition, that the 'stack++'
289 * iteration will eventually trigger.
290 */
253#define HANDLE_STACK(cond) \ 291#define HANDLE_STACK(cond) \
254 do while (cond) { \ 292 do while (cond) { \
255 unsigned long addr = *stack++; \ 293 unsigned long addr = *stack++; \
256 if (kernel_text_address(addr)) { \ 294 if (kernel_text_address(addr)) { \
257 if (i > 50) { \
258 printk("\n "); \
259 i = 0; \
260 } \
261 else \
262 i += printk(" "); \
263 /* \ 295 /* \
264 * If the address is either in the text segment of the \ 296 * If the address is either in the text segment of the \
265 * kernel, or in the region which contains vmalloc'ed \ 297 * kernel, or in the region which contains vmalloc'ed \
@@ -268,20 +300,30 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
268 * down the cause of the crash will be able to figure \ 300 * down the cause of the crash will be able to figure \
269 * out the call path that was taken. \ 301 * out the call path that was taken. \
270 */ \ 302 */ \
271 i += printk_address(addr); \ 303 printk_address(addr); \
272 } \ 304 } \
273 } while (0) 305 } while (0)
274 306
275 for(; ; ) { 307 /*
308 * Print function call entries in all stacks, starting at the
309 * current stack address. If the stacks consist of nested
310 * exceptions
311 */
312 for ( ; ; ) {
276 const char *id; 313 const char *id;
277 unsigned long *estack_end; 314 unsigned long *estack_end;
278 estack_end = in_exception_stack(cpu, (unsigned long)stack, 315 estack_end = in_exception_stack(cpu, (unsigned long)stack,
279 &used, &id); 316 &used, &id);
280 317
281 if (estack_end) { 318 if (estack_end) {
282 i += printk(" <%s>", id); 319 printk(" <%s>", id);
283 HANDLE_STACK (stack < estack_end); 320 HANDLE_STACK (stack < estack_end);
284 i += printk(" <EOE>"); 321 printk(" <EOE>");
322 /*
323 * We link to the next stack via the
324 * second-to-last pointer (index -2 to end) in the
325 * exception stack:
326 */
285 stack = (unsigned long *) estack_end[-2]; 327 stack = (unsigned long *) estack_end[-2];
286 continue; 328 continue;
287 } 329 }
@@ -291,19 +333,28 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
291 (IRQSTACKSIZE - 64) / sizeof(*irqstack); 333 (IRQSTACKSIZE - 64) / sizeof(*irqstack);
292 334
293 if (stack >= irqstack && stack < irqstack_end) { 335 if (stack >= irqstack && stack < irqstack_end) {
294 i += printk(" <IRQ>"); 336 printk(" <IRQ>");
295 HANDLE_STACK (stack < irqstack_end); 337 HANDLE_STACK (stack < irqstack_end);
338 /*
339 * We link to the next stack (which would be
340 * the process stack normally) the last
341 * pointer (index -1 to end) in the IRQ stack:
342 */
296 stack = (unsigned long *) (irqstack_end[-1]); 343 stack = (unsigned long *) (irqstack_end[-1]);
297 irqstack_end = NULL; 344 irqstack_end = NULL;
298 i += printk(" <EOI>"); 345 printk(" <EOI>");
299 continue; 346 continue;
300 } 347 }
301 } 348 }
302 break; 349 break;
303 } 350 }
304 351
352 /*
353 * This prints the process stack:
354 */
305 HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0); 355 HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0);
306#undef HANDLE_STACK 356#undef HANDLE_STACK
357
307 printk("\n"); 358 printk("\n");
308} 359}
309 360
@@ -337,8 +388,8 @@ static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned
337 break; 388 break;
338 } 389 }
339 if (i && ((i % 4) == 0)) 390 if (i && ((i % 4) == 0))
340 printk("\n "); 391 printk("\n");
341 printk("%016lx ", *stack++); 392 printk(" %016lx", *stack++);
342 touch_nmi_watchdog(); 393 touch_nmi_watchdog();
343 } 394 }
344 show_trace(tsk, regs, rsp); 395 show_trace(tsk, regs, rsp);
diff --git a/arch/x86_64/lib/thunk.S b/arch/x86_64/lib/thunk.S
index e49af0032e94..332ea5dff916 100644
--- a/arch/x86_64/lib/thunk.S
+++ b/arch/x86_64/lib/thunk.S
@@ -47,6 +47,11 @@
47 thunk_retrax __down_failed_interruptible,__down_interruptible 47 thunk_retrax __down_failed_interruptible,__down_interruptible
48 thunk_retrax __down_failed_trylock,__down_trylock 48 thunk_retrax __down_failed_trylock,__down_trylock
49 thunk __up_wakeup,__up 49 thunk __up_wakeup,__up
50
51#ifdef CONFIG_TRACE_IRQFLAGS
52 thunk trace_hardirqs_on_thunk,trace_hardirqs_on
53 thunk trace_hardirqs_off_thunk,trace_hardirqs_off
54#endif
50 55
51 /* SAVE_ARGS below is used only for the .cfi directives it contains. */ 56 /* SAVE_ARGS below is used only for the .cfi directives it contains. */
52 CFI_STARTPROC 57 CFI_STARTPROC
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 5afcf6eb00fa..ac8ea66ccb94 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -570,7 +570,6 @@ no_context:
570 printk(KERN_ALERT "Unable to handle kernel paging request"); 570 printk(KERN_ALERT "Unable to handle kernel paging request");
571 printk(" at %016lx RIP: \n" KERN_ALERT,address); 571 printk(" at %016lx RIP: \n" KERN_ALERT,address);
572 printk_address(regs->rip); 572 printk_address(regs->rip);
573 printk("\n");
574 dump_pagetable(address); 573 dump_pagetable(address);
575 tsk->thread.cr2 = address; 574 tsk->thread.cr2 = address;
576 tsk->thread.trap_no = 14; 575 tsk->thread.trap_no = 14;
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 5813d63c20af..ab17c7224bb6 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -2516,7 +2516,7 @@ EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
2516int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk, 2516int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk,
2517 struct request *rq, int at_head) 2517 struct request *rq, int at_head)
2518{ 2518{
2519 DECLARE_COMPLETION(wait); 2519 DECLARE_COMPLETION_ONSTACK(wait);
2520 char sense[SCSI_SENSE_BUFFERSIZE]; 2520 char sense[SCSI_SENSE_BUFFERSIZE];
2521 int err = 0; 2521 int err = 0;
2522 2522
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 0242cbb86a87..5109fa37c662 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -249,18 +249,6 @@ static int irqdma_allocated;
249#include <linux/cdrom.h> /* for the compatibility eject ioctl */ 249#include <linux/cdrom.h> /* for the compatibility eject ioctl */
250#include <linux/completion.h> 250#include <linux/completion.h>
251 251
252/*
253 * Interrupt freeing also means /proc VFS work - dont do it
254 * from interrupt context. We push this work into keventd:
255 */
256static void fd_free_irq_fn(void *data)
257{
258 fd_free_irq();
259}
260
261static DECLARE_WORK(fd_free_irq_work, fd_free_irq_fn, NULL);
262
263
264static struct request *current_req; 252static struct request *current_req;
265static struct request_queue *floppy_queue; 253static struct request_queue *floppy_queue;
266static void do_fd_request(request_queue_t * q); 254static void do_fd_request(request_queue_t * q);
@@ -826,15 +814,6 @@ static int set_dor(int fdc, char mask, char data)
826 UDRS->select_date = jiffies; 814 UDRS->select_date = jiffies;
827 } 815 }
828 } 816 }
829 /*
830 * We should propagate failures to grab the resources back
831 * nicely from here. Actually we ought to rewrite the fd
832 * driver some day too.
833 */
834 if (newdor & FLOPPY_MOTOR_MASK)
835 floppy_grab_irq_and_dma();
836 if (olddor & FLOPPY_MOTOR_MASK)
837 floppy_release_irq_and_dma();
838 return olddor; 817 return olddor;
839} 818}
840 819
@@ -892,8 +871,6 @@ static int _lock_fdc(int drive, int interruptible, int line)
892 line); 871 line);
893 return -1; 872 return -1;
894 } 873 }
895 if (floppy_grab_irq_and_dma() == -1)
896 return -EBUSY;
897 874
898 if (test_and_set_bit(0, &fdc_busy)) { 875 if (test_and_set_bit(0, &fdc_busy)) {
899 DECLARE_WAITQUEUE(wait, current); 876 DECLARE_WAITQUEUE(wait, current);
@@ -915,6 +892,8 @@ static int _lock_fdc(int drive, int interruptible, int line)
915 892
916 set_current_state(TASK_RUNNING); 893 set_current_state(TASK_RUNNING);
917 remove_wait_queue(&fdc_wait, &wait); 894 remove_wait_queue(&fdc_wait, &wait);
895
896 flush_scheduled_work();
918 } 897 }
919 command_status = FD_COMMAND_NONE; 898 command_status = FD_COMMAND_NONE;
920 899
@@ -948,7 +927,6 @@ static inline void unlock_fdc(void)
948 if (elv_next_request(floppy_queue)) 927 if (elv_next_request(floppy_queue))
949 do_fd_request(floppy_queue); 928 do_fd_request(floppy_queue);
950 spin_unlock_irqrestore(&floppy_lock, flags); 929 spin_unlock_irqrestore(&floppy_lock, flags);
951 floppy_release_irq_and_dma();
952 wake_up(&fdc_wait); 930 wake_up(&fdc_wait);
953} 931}
954 932
@@ -3694,8 +3672,8 @@ static int floppy_release(struct inode *inode, struct file *filp)
3694 } 3672 }
3695 if (!UDRS->fd_ref) 3673 if (!UDRS->fd_ref)
3696 opened_bdev[drive] = NULL; 3674 opened_bdev[drive] = NULL;
3697 floppy_release_irq_and_dma();
3698 mutex_unlock(&open_lock); 3675 mutex_unlock(&open_lock);
3676
3699 return 0; 3677 return 0;
3700} 3678}
3701 3679
@@ -3726,9 +3704,6 @@ static int floppy_open(struct inode *inode, struct file *filp)
3726 if (UDRS->fd_ref == -1 || (UDRS->fd_ref && (filp->f_flags & O_EXCL))) 3704 if (UDRS->fd_ref == -1 || (UDRS->fd_ref && (filp->f_flags & O_EXCL)))
3727 goto out2; 3705 goto out2;
3728 3706
3729 if (floppy_grab_irq_and_dma())
3730 goto out2;
3731
3732 if (filp->f_flags & O_EXCL) 3707 if (filp->f_flags & O_EXCL)
3733 UDRS->fd_ref = -1; 3708 UDRS->fd_ref = -1;
3734 else 3709 else
@@ -3805,7 +3780,6 @@ out:
3805 UDRS->fd_ref--; 3780 UDRS->fd_ref--;
3806 if (!UDRS->fd_ref) 3781 if (!UDRS->fd_ref)
3807 opened_bdev[drive] = NULL; 3782 opened_bdev[drive] = NULL;
3808 floppy_release_irq_and_dma();
3809out2: 3783out2:
3810 mutex_unlock(&open_lock); 3784 mutex_unlock(&open_lock);
3811 return res; 3785 return res;
@@ -3822,14 +3796,9 @@ static int check_floppy_change(struct gendisk *disk)
3822 return 1; 3796 return 1;
3823 3797
3824 if (time_after(jiffies, UDRS->last_checked + UDP->checkfreq)) { 3798 if (time_after(jiffies, UDRS->last_checked + UDP->checkfreq)) {
3825 if (floppy_grab_irq_and_dma()) {
3826 return 1;
3827 }
3828
3829 lock_fdc(drive, 0); 3799 lock_fdc(drive, 0);
3830 poll_drive(0, 0); 3800 poll_drive(0, 0);
3831 process_fd_request(); 3801 process_fd_request();
3832 floppy_release_irq_and_dma();
3833 } 3802 }
3834 3803
3835 if (UTESTF(FD_DISK_CHANGED) || 3804 if (UTESTF(FD_DISK_CHANGED) ||
@@ -4346,7 +4315,6 @@ static int __init floppy_init(void)
4346 fdc = 0; 4315 fdc = 0;
4347 del_timer(&fd_timeout); 4316 del_timer(&fd_timeout);
4348 current_drive = 0; 4317 current_drive = 0;
4349 floppy_release_irq_and_dma();
4350 initialising = 0; 4318 initialising = 0;
4351 if (have_no_fdc) { 4319 if (have_no_fdc) {
4352 DPRINT("no floppy controllers found\n"); 4320 DPRINT("no floppy controllers found\n");
@@ -4504,7 +4472,7 @@ static void floppy_release_irq_and_dma(void)
4504 if (irqdma_allocated) { 4472 if (irqdma_allocated) {
4505 fd_disable_dma(); 4473 fd_disable_dma();
4506 fd_free_dma(); 4474 fd_free_dma();
4507 schedule_work(&fd_free_irq_work); 4475 fd_free_irq();
4508 irqdma_allocated = 0; 4476 irqdma_allocated = 0;
4509 } 4477 }
4510 set_dor(0, ~0, 8); 4478 set_dor(0, ~0, 8);
@@ -4600,8 +4568,6 @@ void cleanup_module(void)
4600 /* eject disk, if any */ 4568 /* eject disk, if any */
4601 fd_eject(0); 4569 fd_eject(0);
4602 4570
4603 flush_scheduled_work(); /* fd_free_irq() might be pending */
4604
4605 wait_for_completion(&device_release); 4571 wait_for_completion(&device_release);
4606} 4572}
4607 4573
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 3721e12135d9..cc42e762396f 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -250,8 +250,6 @@ static int floppy_open(struct inode *inode, struct file *filp);
250static int floppy_release(struct inode *inode, struct file *filp); 250static int floppy_release(struct inode *inode, struct file *filp);
251static int floppy_check_change(struct gendisk *disk); 251static int floppy_check_change(struct gendisk *disk);
252static int floppy_revalidate(struct gendisk *disk); 252static int floppy_revalidate(struct gendisk *disk);
253static int swim3_add_device(struct device_node *swims);
254int swim3_init(void);
255 253
256#ifndef CONFIG_PMAC_MEDIABAY 254#ifndef CONFIG_PMAC_MEDIABAY
257#define check_media_bay(which, what) 1 255#define check_media_bay(which, what) 1
@@ -1011,114 +1009,63 @@ static struct block_device_operations floppy_fops = {
1011 .revalidate_disk= floppy_revalidate, 1009 .revalidate_disk= floppy_revalidate,
1012}; 1010};
1013 1011
1014int swim3_init(void) 1012static int swim3_add_device(struct macio_dev *mdev, int index)
1015{
1016 struct device_node *swim;
1017 int err = -ENOMEM;
1018 int i;
1019
1020 swim = find_devices("floppy");
1021 while (swim && (floppy_count < MAX_FLOPPIES))
1022 {
1023 swim3_add_device(swim);
1024 swim = swim->next;
1025 }
1026
1027 swim = find_devices("swim3");
1028 while (swim && (floppy_count < MAX_FLOPPIES))
1029 {
1030 swim3_add_device(swim);
1031 swim = swim->next;
1032 }
1033
1034 if (!floppy_count)
1035 return -ENODEV;
1036
1037 for (i = 0; i < floppy_count; i++) {
1038 disks[i] = alloc_disk(1);
1039 if (!disks[i])
1040 goto out;
1041 }
1042
1043 if (register_blkdev(FLOPPY_MAJOR, "fd")) {
1044 err = -EBUSY;
1045 goto out;
1046 }
1047
1048 swim3_queue = blk_init_queue(do_fd_request, &swim3_lock);
1049 if (!swim3_queue) {
1050 err = -ENOMEM;
1051 goto out_queue;
1052 }
1053
1054 for (i = 0; i < floppy_count; i++) {
1055 struct gendisk *disk = disks[i];
1056 disk->major = FLOPPY_MAJOR;
1057 disk->first_minor = i;
1058 disk->fops = &floppy_fops;
1059 disk->private_data = &floppy_states[i];
1060 disk->queue = swim3_queue;
1061 disk->flags |= GENHD_FL_REMOVABLE;
1062 sprintf(disk->disk_name, "fd%d", i);
1063 set_capacity(disk, 2880);
1064 add_disk(disk);
1065 }
1066 return 0;
1067
1068out_queue:
1069 unregister_blkdev(FLOPPY_MAJOR, "fd");
1070out:
1071 while (i--)
1072 put_disk(disks[i]);
1073 /* shouldn't we do something with results of swim_add_device()? */
1074 return err;
1075}
1076
1077static int swim3_add_device(struct device_node *swim)
1078{ 1013{
1014 struct device_node *swim = mdev->ofdev.node;
1079 struct device_node *mediabay; 1015 struct device_node *mediabay;
1080 struct floppy_state *fs = &floppy_states[floppy_count]; 1016 struct floppy_state *fs = &floppy_states[index];
1081 struct resource res_reg, res_dma; 1017 int rc = -EBUSY;
1082 1018
1083 if (of_address_to_resource(swim, 0, &res_reg) || 1019 /* Check & Request resources */
1084 of_address_to_resource(swim, 1, &res_dma)) { 1020 if (macio_resource_count(mdev) < 2) {
1085 printk(KERN_ERR "swim3: Can't get addresses\n"); 1021 printk(KERN_WARNING "ifd%d: no address for %s\n",
1086 return -EINVAL; 1022 index, swim->full_name);
1023 return -ENXIO;
1087 } 1024 }
1088 if (request_mem_region(res_reg.start, res_reg.end - res_reg.start + 1, 1025 if (macio_irq_count(mdev) < 2) {
1089 " (reg)") == NULL) { 1026 printk(KERN_WARNING "fd%d: no intrs for device %s\n",
1090 printk(KERN_ERR "swim3: Can't request register space\n"); 1027 index, swim->full_name);
1091 return -EINVAL;
1092 } 1028 }
1093 if (request_mem_region(res_dma.start, res_dma.end - res_dma.start + 1, 1029 if (macio_request_resource(mdev, 0, "swim3 (mmio)")) {
1094 " (dma)") == NULL) { 1030 printk(KERN_ERR "fd%d: can't request mmio resource for %s\n",
1095 release_mem_region(res_reg.start, 1031 index, swim->full_name);
1096 res_reg.end - res_reg.start + 1); 1032 return -EBUSY;
1097 printk(KERN_ERR "swim3: Can't request DMA space\n");
1098 return -EINVAL;
1099 } 1033 }
1100 1034 if (macio_request_resource(mdev, 1, "swim3 (dma)")) {
1101 if (swim->n_intrs < 2) { 1035 printk(KERN_ERR "fd%d: can't request dma resource for %s\n",
1102 printk(KERN_INFO "swim3: expecting 2 intrs (n_intrs:%d)\n", 1036 index, swim->full_name);
1103 swim->n_intrs); 1037 macio_release_resource(mdev, 0);
1104 release_mem_region(res_reg.start, 1038 return -EBUSY;
1105 res_reg.end - res_reg.start + 1);
1106 release_mem_region(res_dma.start,
1107 res_dma.end - res_dma.start + 1);
1108 return -EINVAL;
1109 } 1039 }
1040 dev_set_drvdata(&mdev->ofdev.dev, fs);
1110 1041
1111 mediabay = (strcasecmp(swim->parent->type, "media-bay") == 0) ? swim->parent : NULL; 1042 mediabay = (strcasecmp(swim->parent->type, "media-bay") == 0) ?
1043 swim->parent : NULL;
1112 if (mediabay == NULL) 1044 if (mediabay == NULL)
1113 pmac_call_feature(PMAC_FTR_SWIM3_ENABLE, swim, 0, 1); 1045 pmac_call_feature(PMAC_FTR_SWIM3_ENABLE, swim, 0, 1);
1114 1046
1115 memset(fs, 0, sizeof(*fs)); 1047 memset(fs, 0, sizeof(*fs));
1116 spin_lock_init(&fs->lock); 1048 spin_lock_init(&fs->lock);
1117 fs->state = idle; 1049 fs->state = idle;
1118 fs->swim3 = (struct swim3 __iomem *)ioremap(res_reg.start, 0x200); 1050 fs->swim3 = (struct swim3 __iomem *)
1119 fs->dma = (struct dbdma_regs __iomem *)ioremap(res_dma.start, 0x200); 1051 ioremap(macio_resource_start(mdev, 0), 0x200);
1120 fs->swim3_intr = swim->intrs[0].line; 1052 if (fs->swim3 == NULL) {
1121 fs->dma_intr = swim->intrs[1].line; 1053 printk("fd%d: couldn't map registers for %s\n",
1054 index, swim->full_name);
1055 rc = -ENOMEM;
1056 goto out_release;
1057 }
1058 fs->dma = (struct dbdma_regs __iomem *)
1059 ioremap(macio_resource_start(mdev, 1), 0x200);
1060 if (fs->dma == NULL) {
1061 printk("fd%d: couldn't map DMA for %s\n",
1062 index, swim->full_name);
1063 iounmap(fs->swim3);
1064 rc = -ENOMEM;
1065 goto out_release;
1066 }
1067 fs->swim3_intr = macio_irq(mdev, 0);
1068 fs->dma_intr = macio_irq(mdev, 1);;
1122 fs->cur_cyl = -1; 1069 fs->cur_cyl = -1;
1123 fs->cur_sector = -1; 1070 fs->cur_sector = -1;
1124 fs->secpercyl = 36; 1071 fs->secpercyl = 36;
@@ -1132,15 +1079,16 @@ static int swim3_add_device(struct device_node *swim)
1132 st_le16(&fs->dma_cmd[1].command, DBDMA_STOP); 1079 st_le16(&fs->dma_cmd[1].command, DBDMA_STOP);
1133 1080
1134 if (request_irq(fs->swim3_intr, swim3_interrupt, 0, "SWIM3", fs)) { 1081 if (request_irq(fs->swim3_intr, swim3_interrupt, 0, "SWIM3", fs)) {
1135 printk(KERN_ERR "Couldn't get irq %d for SWIM3\n", fs->swim3_intr); 1082 printk(KERN_ERR "fd%d: couldn't request irq %d for %s\n",
1083 index, fs->swim3_intr, swim->full_name);
1136 pmac_call_feature(PMAC_FTR_SWIM3_ENABLE, swim, 0, 0); 1084 pmac_call_feature(PMAC_FTR_SWIM3_ENABLE, swim, 0, 0);
1085 goto out_unmap;
1137 return -EBUSY; 1086 return -EBUSY;
1138 } 1087 }
1139/* 1088/*
1140 if (request_irq(fs->dma_intr, fd_dma_interrupt, 0, "SWIM3-dma", fs)) { 1089 if (request_irq(fs->dma_intr, fd_dma_interrupt, 0, "SWIM3-dma", fs)) {
1141 printk(KERN_ERR "Couldn't get irq %d for SWIM3 DMA", 1090 printk(KERN_ERR "Couldn't get irq %d for SWIM3 DMA",
1142 fs->dma_intr); 1091 fs->dma_intr);
1143 pmac_call_feature(PMAC_FTR_SWIM3_ENABLE, swim, 0, 0);
1144 return -EBUSY; 1092 return -EBUSY;
1145 } 1093 }
1146*/ 1094*/
@@ -1150,8 +1098,90 @@ static int swim3_add_device(struct device_node *swim)
1150 printk(KERN_INFO "fd%d: SWIM3 floppy controller %s\n", floppy_count, 1098 printk(KERN_INFO "fd%d: SWIM3 floppy controller %s\n", floppy_count,
1151 mediabay ? "in media bay" : ""); 1099 mediabay ? "in media bay" : "");
1152 1100
1153 floppy_count++; 1101 return 0;
1154 1102
1103 out_unmap:
1104 iounmap(fs->dma);
1105 iounmap(fs->swim3);
1106
1107 out_release:
1108 macio_release_resource(mdev, 0);
1109 macio_release_resource(mdev, 1);
1110
1111 return rc;
1112}
1113
1114static int __devinit swim3_attach(struct macio_dev *mdev, const struct of_device_id *match)
1115{
1116 int i, rc;
1117 struct gendisk *disk;
1118
1119 /* Add the drive */
1120 rc = swim3_add_device(mdev, floppy_count);
1121 if (rc)
1122 return rc;
1123
1124 /* Now create the queue if not there yet */
1125 if (swim3_queue == NULL) {
1126 /* If we failed, there isn't much we can do as the driver is still
1127 * too dumb to remove the device, just bail out
1128 */
1129 if (register_blkdev(FLOPPY_MAJOR, "fd"))
1130 return 0;
1131 swim3_queue = blk_init_queue(do_fd_request, &swim3_lock);
1132 if (swim3_queue == NULL) {
1133 unregister_blkdev(FLOPPY_MAJOR, "fd");
1134 return 0;
1135 }
1136 }
1137
1138 /* Now register that disk. Same comment about failure handling */
1139 i = floppy_count++;
1140 disk = disks[i] = alloc_disk(1);
1141 if (disk == NULL)
1142 return 0;
1143
1144 disk->major = FLOPPY_MAJOR;
1145 disk->first_minor = i;
1146 disk->fops = &floppy_fops;
1147 disk->private_data = &floppy_states[i];
1148 disk->queue = swim3_queue;
1149 disk->flags |= GENHD_FL_REMOVABLE;
1150 sprintf(disk->disk_name, "fd%d", i);
1151 set_capacity(disk, 2880);
1152 add_disk(disk);
1153
1154 return 0;
1155}
1156
1157static struct of_device_id swim3_match[] =
1158{
1159 {
1160 .name = "swim3",
1161 },
1162 {
1163 .compatible = "ohare-swim3"
1164 },
1165 {
1166 .compatible = "swim3"
1167 },
1168};
1169
1170static struct macio_driver swim3_driver =
1171{
1172 .name = "swim3",
1173 .match_table = swim3_match,
1174 .probe = swim3_attach,
1175#if 0
1176 .suspend = swim3_suspend,
1177 .resume = swim3_resume,
1178#endif
1179};
1180
1181
1182int swim3_init(void)
1183{
1184 macio_register_driver(&swim3_driver);
1155 return 0; 1185 return 0;
1156} 1186}
1157 1187
diff --git a/drivers/char/agp/frontend.c b/drivers/char/agp/frontend.c
index ffcf15c30e90..d9c5a9142ad1 100644
--- a/drivers/char/agp/frontend.c
+++ b/drivers/char/agp/frontend.c
@@ -1059,7 +1059,7 @@ ioctl_out:
1059 return ret_val; 1059 return ret_val;
1060} 1060}
1061 1061
1062static struct file_operations agp_fops = 1062static const struct file_operations agp_fops =
1063{ 1063{
1064 .owner = THIS_MODULE, 1064 .owner = THIS_MODULE,
1065 .llseek = no_llseek, 1065 .llseek = no_llseek,
diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c
index bcc4668835b5..10a389dafd60 100644
--- a/drivers/char/applicom.c
+++ b/drivers/char/applicom.c
@@ -112,7 +112,7 @@ static int ac_ioctl(struct inode *, struct file *, unsigned int,
112 unsigned long); 112 unsigned long);
113static irqreturn_t ac_interrupt(int, void *, struct pt_regs *); 113static irqreturn_t ac_interrupt(int, void *, struct pt_regs *);
114 114
115static struct file_operations ac_fops = { 115static const struct file_operations ac_fops = {
116 .owner = THIS_MODULE, 116 .owner = THIS_MODULE,
117 .llseek = no_llseek, 117 .llseek = no_llseek,
118 .read = ac_read, 118 .read = ac_read,
diff --git a/drivers/char/cs5535_gpio.c b/drivers/char/cs5535_gpio.c
index 46d66037b917..8ce3f34cfc22 100644
--- a/drivers/char/cs5535_gpio.c
+++ b/drivers/char/cs5535_gpio.c
@@ -158,7 +158,7 @@ static int cs5535_gpio_open(struct inode *inode, struct file *file)
158 return nonseekable_open(inode, file); 158 return nonseekable_open(inode, file);
159} 159}
160 160
161static struct file_operations cs5535_gpio_fops = { 161static const struct file_operations cs5535_gpio_fops = {
162 .owner = THIS_MODULE, 162 .owner = THIS_MODULE,
163 .write = cs5535_gpio_write, 163 .write = cs5535_gpio_write,
164 .read = cs5535_gpio_read, 164 .read = cs5535_gpio_read,
diff --git a/drivers/char/ds1286.c b/drivers/char/ds1286.c
index d755cac14bc1..21c8229f5443 100644
--- a/drivers/char/ds1286.c
+++ b/drivers/char/ds1286.c
@@ -281,7 +281,7 @@ static unsigned int ds1286_poll(struct file *file, poll_table *wait)
281 * The various file operations we support. 281 * The various file operations we support.
282 */ 282 */
283 283
284static struct file_operations ds1286_fops = { 284static const struct file_operations ds1286_fops = {
285 .llseek = no_llseek, 285 .llseek = no_llseek,
286 .read = ds1286_read, 286 .read = ds1286_read,
287 .poll = ds1286_poll, 287 .poll = ds1286_poll,
diff --git a/drivers/char/ds1302.c b/drivers/char/ds1302.c
index 625e8b517005..bcdb107aa967 100644
--- a/drivers/char/ds1302.c
+++ b/drivers/char/ds1302.c
@@ -282,7 +282,7 @@ get_rtc_status(char *buf)
282 282
283/* The various file operations we support. */ 283/* The various file operations we support. */
284 284
285static struct file_operations rtc_fops = { 285static const struct file_operations rtc_fops = {
286 .owner = THIS_MODULE, 286 .owner = THIS_MODULE,
287 .ioctl = rtc_ioctl, 287 .ioctl = rtc_ioctl,
288}; 288};
diff --git a/drivers/char/ds1620.c b/drivers/char/ds1620.c
index 953e670dcd09..48cb8f0e8ebf 100644
--- a/drivers/char/ds1620.c
+++ b/drivers/char/ds1620.c
@@ -336,7 +336,7 @@ proc_therm_ds1620_read(char *buf, char **start, off_t offset,
336static struct proc_dir_entry *proc_therm_ds1620; 336static struct proc_dir_entry *proc_therm_ds1620;
337#endif 337#endif
338 338
339static struct file_operations ds1620_fops = { 339static const struct file_operations ds1620_fops = {
340 .owner = THIS_MODULE, 340 .owner = THIS_MODULE,
341 .open = nonseekable_open, 341 .open = nonseekable_open,
342 .read = ds1620_read, 342 .read = ds1620_read,
diff --git a/drivers/char/dsp56k.c b/drivers/char/dsp56k.c
index 09b413618b57..9b1bf60ffbe7 100644
--- a/drivers/char/dsp56k.c
+++ b/drivers/char/dsp56k.c
@@ -483,7 +483,7 @@ static int dsp56k_release(struct inode *inode, struct file *file)
483 return 0; 483 return 0;
484} 484}
485 485
486static struct file_operations dsp56k_fops = { 486static const struct file_operations dsp56k_fops = {
487 .owner = THIS_MODULE, 487 .owner = THIS_MODULE,
488 .read = dsp56k_read, 488 .read = dsp56k_read,
489 .write = dsp56k_write, 489 .write = dsp56k_write,
diff --git a/drivers/char/dtlk.c b/drivers/char/dtlk.c
index da2c89f1b8bc..5e82c3bad2e3 100644
--- a/drivers/char/dtlk.c
+++ b/drivers/char/dtlk.c
@@ -94,7 +94,7 @@ static int dtlk_release(struct inode *, struct file *);
94static int dtlk_ioctl(struct inode *inode, struct file *file, 94static int dtlk_ioctl(struct inode *inode, struct file *file,
95 unsigned int cmd, unsigned long arg); 95 unsigned int cmd, unsigned long arg);
96 96
97static struct file_operations dtlk_fops = 97static const struct file_operations dtlk_fops =
98{ 98{
99 .owner = THIS_MODULE, 99 .owner = THIS_MODULE,
100 .read = dtlk_read, 100 .read = dtlk_read,
diff --git a/drivers/char/efirtc.c b/drivers/char/efirtc.c
index 0090e7a4fcd3..004141d535a2 100644
--- a/drivers/char/efirtc.c
+++ b/drivers/char/efirtc.c
@@ -285,7 +285,7 @@ efi_rtc_close(struct inode *inode, struct file *file)
285 * The various file operations we support. 285 * The various file operations we support.
286 */ 286 */
287 287
288static struct file_operations efi_rtc_fops = { 288static const struct file_operations efi_rtc_fops = {
289 .owner = THIS_MODULE, 289 .owner = THIS_MODULE,
290 .ioctl = efi_rtc_ioctl, 290 .ioctl = efi_rtc_ioctl,
291 .open = efi_rtc_open, 291 .open = efi_rtc_open,
diff --git a/drivers/char/ftape/zftape/zftape-init.c b/drivers/char/ftape/zftape/zftape-init.c
index 55272566b740..164a1aa77a2f 100644
--- a/drivers/char/ftape/zftape/zftape-init.c
+++ b/drivers/char/ftape/zftape/zftape-init.c
@@ -86,7 +86,7 @@ static ssize_t zft_read (struct file *fp, char __user *buff,
86static ssize_t zft_write(struct file *fp, const char __user *buff, 86static ssize_t zft_write(struct file *fp, const char __user *buff,
87 size_t req_len, loff_t *ppos); 87 size_t req_len, loff_t *ppos);
88 88
89static struct file_operations zft_cdev = 89static const struct file_operations zft_cdev =
90{ 90{
91 .owner = THIS_MODULE, 91 .owner = THIS_MODULE,
92 .read = zft_read, 92 .read = zft_read,
diff --git a/drivers/char/genrtc.c b/drivers/char/genrtc.c
index bebd7e34f792..817dc409ac20 100644
--- a/drivers/char/genrtc.c
+++ b/drivers/char/genrtc.c
@@ -482,7 +482,7 @@ static inline int gen_rtc_proc_init(void) { return 0; }
482 * The various file operations we support. 482 * The various file operations we support.
483 */ 483 */
484 484
485static struct file_operations gen_rtc_fops = { 485static const struct file_operations gen_rtc_fops = {
486 .owner = THIS_MODULE, 486 .owner = THIS_MODULE,
487#ifdef CONFIG_GEN_RTC_X 487#ifdef CONFIG_GEN_RTC_X
488 .read = gen_rtc_read, 488 .read = gen_rtc_read,
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index e5643f3aa73f..8afba339f05a 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -553,7 +553,7 @@ hpet_ioctl_common(struct hpet_dev *devp, int cmd, unsigned long arg, int kernel)
553 return err; 553 return err;
554} 554}
555 555
556static struct file_operations hpet_fops = { 556static const struct file_operations hpet_fops = {
557 .owner = THIS_MODULE, 557 .owner = THIS_MODULE,
558 .llseek = no_llseek, 558 .llseek = no_llseek,
559 .read = hpet_read, 559 .read = hpet_read,
diff --git a/drivers/char/hvsi.c b/drivers/char/hvsi.c
index 8dc205b275e3..56612a2dca6b 100644
--- a/drivers/char/hvsi.c
+++ b/drivers/char/hvsi.c
@@ -1299,13 +1299,12 @@ static int __init hvsi_console_init(void)
1299 hp->inbuf_end = hp->inbuf; 1299 hp->inbuf_end = hp->inbuf;
1300 hp->state = HVSI_CLOSED; 1300 hp->state = HVSI_CLOSED;
1301 hp->vtermno = *vtermno; 1301 hp->vtermno = *vtermno;
1302 hp->virq = virt_irq_create_mapping(irq[0]); 1302 hp->virq = irq_create_mapping(NULL, irq[0], 0);
1303 if (hp->virq == NO_IRQ) { 1303 if (hp->virq == NO_IRQ) {
1304 printk(KERN_ERR "%s: couldn't create irq mapping for 0x%x\n", 1304 printk(KERN_ERR "%s: couldn't create irq mapping for 0x%x\n",
1305 __FUNCTION__, hp->virq); 1305 __FUNCTION__, irq[0]);
1306 continue; 1306 continue;
1307 } else 1307 }
1308 hp->virq = irq_offset_up(hp->virq);
1309 1308
1310 hvsi_count++; 1309 hvsi_count++;
1311 } 1310 }
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 88b026639f10..154a81d328c1 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -149,7 +149,7 @@ out:
149} 149}
150 150
151 151
152static struct file_operations rng_chrdev_ops = { 152static const struct file_operations rng_chrdev_ops = {
153 .owner = THIS_MODULE, 153 .owner = THIS_MODULE,
154 .open = rng_dev_open, 154 .open = rng_dev_open,
155 .read = rng_dev_read, 155 .read = rng_dev_read,
diff --git a/drivers/char/i8k.c b/drivers/char/i8k.c
index f3c3aaf4560e..353d9f3cf8d7 100644
--- a/drivers/char/i8k.c
+++ b/drivers/char/i8k.c
@@ -80,7 +80,7 @@ static int i8k_open_fs(struct inode *inode, struct file *file);
80static int i8k_ioctl(struct inode *, struct file *, unsigned int, 80static int i8k_ioctl(struct inode *, struct file *, unsigned int,
81 unsigned long); 81 unsigned long);
82 82
83static struct file_operations i8k_fops = { 83static const struct file_operations i8k_fops = {
84 .open = i8k_open_fs, 84 .open = i8k_open_fs,
85 .read = seq_read, 85 .read = seq_read,
86 .llseek = seq_lseek, 86 .llseek = seq_lseek,
diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c
index a4200a2b0811..518ece7ac656 100644
--- a/drivers/char/ip2/ip2main.c
+++ b/drivers/char/ip2/ip2main.c
@@ -233,7 +233,7 @@ static void *DevTableMem[IP2_MAX_BOARDS];
233/* This is the driver descriptor for the ip2ipl device, which is used to 233/* This is the driver descriptor for the ip2ipl device, which is used to
234 * download the loadware to the boards. 234 * download the loadware to the boards.
235 */ 235 */
236static struct file_operations ip2_ipl = { 236static const struct file_operations ip2_ipl = {
237 .owner = THIS_MODULE, 237 .owner = THIS_MODULE,
238 .read = ip2_ipl_read, 238 .read = ip2_ipl_read,
239 .write = ip2_ipl_write, 239 .write = ip2_ipl_write,
diff --git a/drivers/char/ip27-rtc.c b/drivers/char/ip27-rtc.c
index 3acdac3c967e..a48da02aad2f 100644
--- a/drivers/char/ip27-rtc.c
+++ b/drivers/char/ip27-rtc.c
@@ -196,7 +196,7 @@ static int rtc_release(struct inode *inode, struct file *file)
196 * The various file operations we support. 196 * The various file operations we support.
197 */ 197 */
198 198
199static struct file_operations rtc_fops = { 199static const struct file_operations rtc_fops = {
200 .owner = THIS_MODULE, 200 .owner = THIS_MODULE,
201 .ioctl = rtc_ioctl, 201 .ioctl = rtc_ioctl,
202 .open = rtc_open, 202 .open = rtc_open,
diff --git a/drivers/char/ipmi/ipmi_devintf.c b/drivers/char/ipmi/ipmi_devintf.c
index 2fc894fef1cb..68d7c61a864e 100644
--- a/drivers/char/ipmi/ipmi_devintf.c
+++ b/drivers/char/ipmi/ipmi_devintf.c
@@ -765,7 +765,7 @@ static long compat_ipmi_ioctl(struct file *filep, unsigned int cmd,
765} 765}
766#endif 766#endif
767 767
768static struct file_operations ipmi_fops = { 768static const struct file_operations ipmi_fops = {
769 .owner = THIS_MODULE, 769 .owner = THIS_MODULE,
770 .ioctl = ipmi_ioctl, 770 .ioctl = ipmi_ioctl,
771#ifdef CONFIG_COMPAT 771#ifdef CONFIG_COMPAT
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index 74a889c58333..accaaf1a6b69 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -807,7 +807,7 @@ static int ipmi_close(struct inode *ino, struct file *filep)
807 return 0; 807 return 0;
808} 808}
809 809
810static struct file_operations ipmi_wdog_fops = { 810static const struct file_operations ipmi_wdog_fops = {
811 .owner = THIS_MODULE, 811 .owner = THIS_MODULE,
812 .read = ipmi_read, 812 .read = ipmi_read,
813 .poll = ipmi_poll, 813 .poll = ipmi_poll,
diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c
index fbce2f0669d6..84dfc4278139 100644
--- a/drivers/char/istallion.c
+++ b/drivers/char/istallion.c
@@ -748,7 +748,7 @@ static int stli_initpcibrd(int brdtype, struct pci_dev *devp);
748 * will give access to the shared memory on the Stallion intelligent 748 * will give access to the shared memory on the Stallion intelligent
749 * board. This is also a very useful debugging tool. 749 * board. This is also a very useful debugging tool.
750 */ 750 */
751static struct file_operations stli_fsiomem = { 751static const struct file_operations stli_fsiomem = {
752 .owner = THIS_MODULE, 752 .owner = THIS_MODULE,
753 .read = stli_memread, 753 .read = stli_memread,
754 .write = stli_memwrite, 754 .write = stli_memwrite,
diff --git a/drivers/char/ite_gpio.c b/drivers/char/ite_gpio.c
index 747ba45e50e5..cde562d70c4f 100644
--- a/drivers/char/ite_gpio.c
+++ b/drivers/char/ite_gpio.c
@@ -357,7 +357,7 @@ DEB(printk("interrupt 0x%x %d\n",ITE_GPAISR, i));
357 } 357 }
358} 358}
359 359
360static struct file_operations ite_gpio_fops = { 360static const struct file_operations ite_gpio_fops = {
361 .owner = THIS_MODULE, 361 .owner = THIS_MODULE,
362 .ioctl = ite_gpio_ioctl, 362 .ioctl = ite_gpio_ioctl,
363 .open = ite_gpio_open, 363 .open = ite_gpio_open,
diff --git a/drivers/char/lcd.c b/drivers/char/lcd.c
index 7d49b241de56..da601fd6c07a 100644
--- a/drivers/char/lcd.c
+++ b/drivers/char/lcd.c
@@ -598,7 +598,7 @@ static ssize_t lcd_read(struct file *file, char *buf,
598 * The various file operations we support. 598 * The various file operations we support.
599 */ 599 */
600 600
601static struct file_operations lcd_fops = { 601static const struct file_operations lcd_fops = {
602 .read = lcd_read, 602 .read = lcd_read,
603 .ioctl = lcd_ioctl, 603 .ioctl = lcd_ioctl,
604 .open = lcd_open, 604 .open = lcd_open,
diff --git a/drivers/char/lp.c b/drivers/char/lp.c
index 582cdbdb0c42..f875fda3b089 100644
--- a/drivers/char/lp.c
+++ b/drivers/char/lp.c
@@ -666,7 +666,7 @@ static int lp_ioctl(struct inode *inode, struct file *file,
666 return retval; 666 return retval;
667} 667}
668 668
669static struct file_operations lp_fops = { 669static const struct file_operations lp_fops = {
670 .owner = THIS_MODULE, 670 .owner = THIS_MODULE,
671 .write = lp_write, 671 .write = lp_write,
672 .ioctl = lp_ioctl, 672 .ioctl = lp_ioctl,
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 70f3954d6dfd..e97c32ceb796 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -776,7 +776,7 @@ static int open_port(struct inode * inode, struct file * filp)
776#define open_kmem open_mem 776#define open_kmem open_mem
777#define open_oldmem open_mem 777#define open_oldmem open_mem
778 778
779static struct file_operations mem_fops = { 779static const struct file_operations mem_fops = {
780 .llseek = memory_lseek, 780 .llseek = memory_lseek,
781 .read = read_mem, 781 .read = read_mem,
782 .write = write_mem, 782 .write = write_mem,
@@ -784,7 +784,7 @@ static struct file_operations mem_fops = {
784 .open = open_mem, 784 .open = open_mem,
785}; 785};
786 786
787static struct file_operations kmem_fops = { 787static const struct file_operations kmem_fops = {
788 .llseek = memory_lseek, 788 .llseek = memory_lseek,
789 .read = read_kmem, 789 .read = read_kmem,
790 .write = write_kmem, 790 .write = write_kmem,
@@ -792,7 +792,7 @@ static struct file_operations kmem_fops = {
792 .open = open_kmem, 792 .open = open_kmem,
793}; 793};
794 794
795static struct file_operations null_fops = { 795static const struct file_operations null_fops = {
796 .llseek = null_lseek, 796 .llseek = null_lseek,
797 .read = read_null, 797 .read = read_null,
798 .write = write_null, 798 .write = write_null,
@@ -800,7 +800,7 @@ static struct file_operations null_fops = {
800}; 800};
801 801
802#if defined(CONFIG_ISA) || !defined(__mc68000__) 802#if defined(CONFIG_ISA) || !defined(__mc68000__)
803static struct file_operations port_fops = { 803static const struct file_operations port_fops = {
804 .llseek = memory_lseek, 804 .llseek = memory_lseek,
805 .read = read_port, 805 .read = read_port,
806 .write = write_port, 806 .write = write_port,
@@ -808,7 +808,7 @@ static struct file_operations port_fops = {
808}; 808};
809#endif 809#endif
810 810
811static struct file_operations zero_fops = { 811static const struct file_operations zero_fops = {
812 .llseek = zero_lseek, 812 .llseek = zero_lseek,
813 .read = read_zero, 813 .read = read_zero,
814 .write = write_zero, 814 .write = write_zero,
@@ -819,14 +819,14 @@ static struct backing_dev_info zero_bdi = {
819 .capabilities = BDI_CAP_MAP_COPY, 819 .capabilities = BDI_CAP_MAP_COPY,
820}; 820};
821 821
822static struct file_operations full_fops = { 822static const struct file_operations full_fops = {
823 .llseek = full_lseek, 823 .llseek = full_lseek,
824 .read = read_full, 824 .read = read_full,
825 .write = write_full, 825 .write = write_full,
826}; 826};
827 827
828#ifdef CONFIG_CRASH_DUMP 828#ifdef CONFIG_CRASH_DUMP
829static struct file_operations oldmem_fops = { 829static const struct file_operations oldmem_fops = {
830 .read = read_oldmem, 830 .read = read_oldmem,
831 .open = open_oldmem, 831 .open = open_oldmem,
832}; 832};
@@ -853,7 +853,7 @@ static ssize_t kmsg_write(struct file * file, const char __user * buf,
853 return ret; 853 return ret;
854} 854}
855 855
856static struct file_operations kmsg_fops = { 856static const struct file_operations kmsg_fops = {
857 .write = kmsg_write, 857 .write = kmsg_write,
858}; 858};
859 859
@@ -903,7 +903,7 @@ static int memory_open(struct inode * inode, struct file * filp)
903 return 0; 903 return 0;
904} 904}
905 905
906static struct file_operations memory_fops = { 906static const struct file_operations memory_fops = {
907 .open = memory_open, /* just a selector for the real open */ 907 .open = memory_open, /* just a selector for the real open */
908}; 908};
909 909
diff --git a/drivers/char/misc.c b/drivers/char/misc.c
index d5fa19da330b..62ebe09656e3 100644
--- a/drivers/char/misc.c
+++ b/drivers/char/misc.c
@@ -113,7 +113,7 @@ static int misc_seq_open(struct inode *inode, struct file *file)
113 return seq_open(file, &misc_seq_ops); 113 return seq_open(file, &misc_seq_ops);
114} 114}
115 115
116static struct file_operations misc_proc_fops = { 116static const struct file_operations misc_proc_fops = {
117 .owner = THIS_MODULE, 117 .owner = THIS_MODULE,
118 .open = misc_seq_open, 118 .open = misc_seq_open,
119 .read = seq_read, 119 .read = seq_read,
@@ -176,7 +176,7 @@ fail:
176 */ 176 */
177static struct class *misc_class; 177static struct class *misc_class;
178 178
179static struct file_operations misc_fops = { 179static const struct file_operations misc_fops = {
180 .owner = THIS_MODULE, 180 .owner = THIS_MODULE,
181 .open = misc_open, 181 .open = misc_open,
182}; 182};
diff --git a/drivers/char/mmtimer.c b/drivers/char/mmtimer.c
index 70b774ff5aa4..1f0f2b6dae26 100644
--- a/drivers/char/mmtimer.c
+++ b/drivers/char/mmtimer.c
@@ -63,7 +63,7 @@ static int mmtimer_mmap(struct file *file, struct vm_area_struct *vma);
63 */ 63 */
64static unsigned long mmtimer_femtoperiod = 0; 64static unsigned long mmtimer_femtoperiod = 0;
65 65
66static struct file_operations mmtimer_fops = { 66static const struct file_operations mmtimer_fops = {
67 .owner = THIS_MODULE, 67 .owner = THIS_MODULE,
68 .mmap = mmtimer_mmap, 68 .mmap = mmtimer_mmap,
69 .ioctl = mmtimer_ioctl, 69 .ioctl = mmtimer_ioctl,
diff --git a/drivers/char/mwave/mwavedd.c b/drivers/char/mwave/mwavedd.c
index d3ba2f860ef0..39a2e661ff55 100644
--- a/drivers/char/mwave/mwavedd.c
+++ b/drivers/char/mwave/mwavedd.c
@@ -454,7 +454,7 @@ static int register_serial_portandirq(unsigned int port, int irq)
454} 454}
455 455
456 456
457static struct file_operations mwave_fops = { 457static const struct file_operations mwave_fops = {
458 .owner = THIS_MODULE, 458 .owner = THIS_MODULE,
459 .read = mwave_read, 459 .read = mwave_read,
460 .write = mwave_write, 460 .write = mwave_write,
diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c
index 8c5f102622b6..a39f19c35a6a 100644
--- a/drivers/char/nvram.c
+++ b/drivers/char/nvram.c
@@ -437,7 +437,7 @@ nvram_read_proc(char *buffer, char **start, off_t offset,
437 437
438#endif /* CONFIG_PROC_FS */ 438#endif /* CONFIG_PROC_FS */
439 439
440static struct file_operations nvram_fops = { 440static const struct file_operations nvram_fops = {
441 .owner = THIS_MODULE, 441 .owner = THIS_MODULE,
442 .llseek = nvram_llseek, 442 .llseek = nvram_llseek,
443 .read = nvram_read, 443 .read = nvram_read,
diff --git a/drivers/char/nwbutton.c b/drivers/char/nwbutton.c
index f240a104d250..7c57ebfa8640 100644
--- a/drivers/char/nwbutton.c
+++ b/drivers/char/nwbutton.c
@@ -183,7 +183,7 @@ static int button_read (struct file *filp, char __user *buffer,
183 * attempts to perform these operations on the device. 183 * attempts to perform these operations on the device.
184 */ 184 */
185 185
186static struct file_operations button_fops = { 186static const struct file_operations button_fops = {
187 .owner = THIS_MODULE, 187 .owner = THIS_MODULE,
188 .read = button_read, 188 .read = button_read,
189}; 189};
diff --git a/drivers/char/nwflash.c b/drivers/char/nwflash.c
index 8865387d3448..206cf6f50695 100644
--- a/drivers/char/nwflash.c
+++ b/drivers/char/nwflash.c
@@ -642,7 +642,7 @@ static void kick_open(void)
642 udelay(25); 642 udelay(25);
643} 643}
644 644
645static struct file_operations flash_fops = 645static const struct file_operations flash_fops =
646{ 646{
647 .owner = THIS_MODULE, 647 .owner = THIS_MODULE,
648 .llseek = flash_llseek, 648 .llseek = flash_llseek,
diff --git a/drivers/char/pc8736x_gpio.c b/drivers/char/pc8736x_gpio.c
index c860de6a6fde..4005ee0aa11e 100644
--- a/drivers/char/pc8736x_gpio.c
+++ b/drivers/char/pc8736x_gpio.c
@@ -236,7 +236,7 @@ static int pc8736x_gpio_open(struct inode *inode, struct file *file)
236 return nonseekable_open(inode, file); 236 return nonseekable_open(inode, file);
237} 237}
238 238
239static struct file_operations pc8736x_gpio_fops = { 239static const struct file_operations pc8736x_gpio_fops = {
240 .owner = THIS_MODULE, 240 .owner = THIS_MODULE,
241 .open = pc8736x_gpio_open, 241 .open = pc8736x_gpio_open,
242 .write = nsc_gpio_write, 242 .write = nsc_gpio_write,
diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c
index 31c8a21f9d87..50d20aafeb18 100644
--- a/drivers/char/pcmcia/cm4000_cs.c
+++ b/drivers/char/pcmcia/cm4000_cs.c
@@ -1938,7 +1938,7 @@ static void cm4000_detach(struct pcmcia_device *link)
1938 return; 1938 return;
1939} 1939}
1940 1940
1941static struct file_operations cm4000_fops = { 1941static const struct file_operations cm4000_fops = {
1942 .owner = THIS_MODULE, 1942 .owner = THIS_MODULE,
1943 .read = cmm_read, 1943 .read = cmm_read,
1944 .write = cmm_write, 1944 .write = cmm_write,
diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c
index 47a8465bf95b..55cf4be42976 100644
--- a/drivers/char/pcmcia/cm4040_cs.c
+++ b/drivers/char/pcmcia/cm4040_cs.c
@@ -688,7 +688,7 @@ static void reader_detach(struct pcmcia_device *link)
688 return; 688 return;
689} 689}
690 690
691static struct file_operations reader_fops = { 691static const struct file_operations reader_fops = {
692 .owner = THIS_MODULE, 692 .owner = THIS_MODULE,
693 .read = cm4040_read, 693 .read = cm4040_read,
694 .write = cm4040_write, 694 .write = cm4040_write,
diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c
index 24231d9743dc..520d2cf82bc0 100644
--- a/drivers/char/ppdev.c
+++ b/drivers/char/ppdev.c
@@ -739,7 +739,7 @@ static unsigned int pp_poll (struct file * file, poll_table * wait)
739 739
740static struct class *ppdev_class; 740static struct class *ppdev_class;
741 741
742static struct file_operations pp_fops = { 742static const struct file_operations pp_fops = {
743 .owner = THIS_MODULE, 743 .owner = THIS_MODULE,
744 .llseek = no_llseek, 744 .llseek = no_llseek,
745 .read = pp_read, 745 .read = pp_read,
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 164bddae047f..4c3a5ca9d8f7 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -416,7 +416,7 @@ static struct entropy_store input_pool = {
416 .poolinfo = &poolinfo_table[0], 416 .poolinfo = &poolinfo_table[0],
417 .name = "input", 417 .name = "input",
418 .limit = 1, 418 .limit = 1,
419 .lock = SPIN_LOCK_UNLOCKED, 419 .lock = __SPIN_LOCK_UNLOCKED(&input_pool.lock),
420 .pool = input_pool_data 420 .pool = input_pool_data
421}; 421};
422 422
@@ -425,7 +425,7 @@ static struct entropy_store blocking_pool = {
425 .name = "blocking", 425 .name = "blocking",
426 .limit = 1, 426 .limit = 1,
427 .pull = &input_pool, 427 .pull = &input_pool,
428 .lock = SPIN_LOCK_UNLOCKED, 428 .lock = __SPIN_LOCK_UNLOCKED(&blocking_pool.lock),
429 .pool = blocking_pool_data 429 .pool = blocking_pool_data
430}; 430};
431 431
@@ -433,7 +433,7 @@ static struct entropy_store nonblocking_pool = {
433 .poolinfo = &poolinfo_table[1], 433 .poolinfo = &poolinfo_table[1],
434 .name = "nonblocking", 434 .name = "nonblocking",
435 .pull = &input_pool, 435 .pull = &input_pool,
436 .lock = SPIN_LOCK_UNLOCKED, 436 .lock = __SPIN_LOCK_UNLOCKED(&nonblocking_pool.lock),
437 .pool = nonblocking_pool_data 437 .pool = nonblocking_pool_data
438}; 438};
439 439
diff --git a/drivers/char/raw.c b/drivers/char/raw.c
index 9bf97c5e38c0..579868af4a54 100644
--- a/drivers/char/raw.c
+++ b/drivers/char/raw.c
@@ -30,7 +30,7 @@ struct raw_device_data {
30static struct class *raw_class; 30static struct class *raw_class;
31static struct raw_device_data raw_devices[MAX_RAW_MINORS]; 31static struct raw_device_data raw_devices[MAX_RAW_MINORS];
32static DEFINE_MUTEX(raw_mutex); 32static DEFINE_MUTEX(raw_mutex);
33static struct file_operations raw_ctl_fops; /* forward declaration */ 33static const struct file_operations raw_ctl_fops; /* forward declaration */
34 34
35/* 35/*
36 * Open/close code for raw IO. 36 * Open/close code for raw IO.
@@ -261,7 +261,7 @@ static ssize_t raw_file_aio_write(struct kiocb *iocb, const char __user *buf,
261} 261}
262 262
263 263
264static struct file_operations raw_fops = { 264static const struct file_operations raw_fops = {
265 .read = generic_file_read, 265 .read = generic_file_read,
266 .aio_read = generic_file_aio_read, 266 .aio_read = generic_file_aio_read,
267 .write = raw_file_write, 267 .write = raw_file_write,
@@ -274,7 +274,7 @@ static struct file_operations raw_fops = {
274 .owner = THIS_MODULE, 274 .owner = THIS_MODULE,
275}; 275};
276 276
277static struct file_operations raw_ctl_fops = { 277static const struct file_operations raw_ctl_fops = {
278 .ioctl = raw_ctl_ioctl, 278 .ioctl = raw_ctl_ioctl,
279 .open = raw_open, 279 .open = raw_open,
280 .owner = THIS_MODULE, 280 .owner = THIS_MODULE,
diff --git a/drivers/char/rio/rio_linux.c b/drivers/char/rio/rio_linux.c
index 3afc6a47ebbc..3fa80aaf4527 100644
--- a/drivers/char/rio/rio_linux.c
+++ b/drivers/char/rio/rio_linux.c
@@ -243,7 +243,7 @@ static struct real_driver rio_real_driver = {
243 * 243 *
244 */ 244 */
245 245
246static struct file_operations rio_fw_fops = { 246static const struct file_operations rio_fw_fops = {
247 .owner = THIS_MODULE, 247 .owner = THIS_MODULE,
248 .ioctl = rio_fw_ioctl, 248 .ioctl = rio_fw_ioctl,
249}; 249};
diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index aefac4ac0bf5..cc7bd1a3095b 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -877,7 +877,7 @@ int rtc_control(rtc_task_t *task, unsigned int cmd, unsigned long arg)
877 * The various file operations we support. 877 * The various file operations we support.
878 */ 878 */
879 879
880static struct file_operations rtc_fops = { 880static const struct file_operations rtc_fops = {
881 .owner = THIS_MODULE, 881 .owner = THIS_MODULE,
882 .llseek = no_llseek, 882 .llseek = no_llseek,
883 .read = rtc_read, 883 .read = rtc_read,
@@ -896,7 +896,7 @@ static struct miscdevice rtc_dev = {
896 .fops = &rtc_fops, 896 .fops = &rtc_fops,
897}; 897};
898 898
899static struct file_operations rtc_proc_fops = { 899static const struct file_operations rtc_proc_fops = {
900 .owner = THIS_MODULE, 900 .owner = THIS_MODULE,
901 .open = rtc_proc_open, 901 .open = rtc_proc_open,
902 .read = seq_read, 902 .read = seq_read,
diff --git a/drivers/char/scx200_gpio.c b/drivers/char/scx200_gpio.c
index 45083e5dd23b..425c58719db6 100644
--- a/drivers/char/scx200_gpio.c
+++ b/drivers/char/scx200_gpio.c
@@ -63,7 +63,7 @@ static int scx200_gpio_release(struct inode *inode, struct file *file)
63} 63}
64 64
65 65
66static struct file_operations scx200_gpio_fops = { 66static const struct file_operations scx200_gpio_fops = {
67 .owner = THIS_MODULE, 67 .owner = THIS_MODULE,
68 .write = nsc_gpio_write, 68 .write = nsc_gpio_write,
69 .read = nsc_gpio_read, 69 .read = nsc_gpio_read,
diff --git a/drivers/char/snsc.c b/drivers/char/snsc.c
index 203240b6c08f..afc6eda602f7 100644
--- a/drivers/char/snsc.c
+++ b/drivers/char/snsc.c
@@ -347,7 +347,7 @@ scdrv_poll(struct file *file, struct poll_table_struct *wait)
347 return mask; 347 return mask;
348} 348}
349 349
350static struct file_operations scdrv_fops = { 350static const struct file_operations scdrv_fops = {
351 .owner = THIS_MODULE, 351 .owner = THIS_MODULE,
352 .read = scdrv_read, 352 .read = scdrv_read,
353 .write = scdrv_write, 353 .write = scdrv_write,
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index 45508a039508..d4e434d694b7 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -1106,7 +1106,7 @@ static int sonypi_misc_ioctl(struct inode *ip, struct file *fp,
1106 return ret; 1106 return ret;
1107} 1107}
1108 1108
1109static struct file_operations sonypi_misc_fops = { 1109static const struct file_operations sonypi_misc_fops = {
1110 .owner = THIS_MODULE, 1110 .owner = THIS_MODULE,
1111 .read = sonypi_misc_read, 1111 .read = sonypi_misc_read,
1112 .poll = sonypi_misc_poll, 1112 .poll = sonypi_misc_poll,
diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c
index ed7b8eaf0367..3beb2203d24b 100644
--- a/drivers/char/stallion.c
+++ b/drivers/char/stallion.c
@@ -707,7 +707,7 @@ static unsigned int sc26198_baudtable[] = {
707 * Define the driver info for a user level control device. Used mainly 707 * Define the driver info for a user level control device. Used mainly
708 * to get at port stats - only not using the port device itself. 708 * to get at port stats - only not using the port device itself.
709 */ 709 */
710static struct file_operations stl_fsiomem = { 710static const struct file_operations stl_fsiomem = {
711 .owner = THIS_MODULE, 711 .owner = THIS_MODULE,
712 .ioctl = stl_memioctl, 712 .ioctl = stl_memioctl,
713}; 713};
diff --git a/drivers/char/sx.c b/drivers/char/sx.c
index 45c193aa11db..e1cd2bc4b1e4 100644
--- a/drivers/char/sx.c
+++ b/drivers/char/sx.c
@@ -410,7 +410,7 @@ static struct real_driver sx_real_driver = {
410 * 410 *
411 */ 411 */
412 412
413static struct file_operations sx_fw_fops = { 413static const struct file_operations sx_fw_fops = {
414 .owner = THIS_MODULE, 414 .owner = THIS_MODULE,
415 .ioctl = sx_fw_ioctl, 415 .ioctl = sx_fw_ioctl,
416}; 416};
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index a064ee9181c0..ee3ca8f1768e 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -147,12 +147,13 @@ static struct sysrq_key_op sysrq_mountro_op = {
147 .enable_mask = SYSRQ_ENABLE_REMOUNT, 147 .enable_mask = SYSRQ_ENABLE_REMOUNT,
148}; 148};
149 149
150#ifdef CONFIG_DEBUG_MUTEXES 150#ifdef CONFIG_LOCKDEP
151static void sysrq_handle_showlocks(int key, struct pt_regs *pt_regs, 151static void sysrq_handle_showlocks(int key, struct pt_regs *pt_regs,
152 struct tty_struct *tty) 152 struct tty_struct *tty)
153{ 153{
154 mutex_debug_show_all_locks(); 154 debug_show_all_locks();
155} 155}
156
156static struct sysrq_key_op sysrq_showlocks_op = { 157static struct sysrq_key_op sysrq_showlocks_op = {
157 .handler = sysrq_handle_showlocks, 158 .handler = sysrq_handle_showlocks,
158 .help_msg = "show-all-locks(D)", 159 .help_msg = "show-all-locks(D)",
diff --git a/drivers/char/tb0219.c b/drivers/char/tb0219.c
index a80c83210872..bb1bad4c18f9 100644
--- a/drivers/char/tb0219.c
+++ b/drivers/char/tb0219.c
@@ -255,7 +255,7 @@ static int tanbac_tb0219_release(struct inode *inode, struct file *file)
255 return 0; 255 return 0;
256} 256}
257 257
258static struct file_operations tb0219_fops = { 258static const struct file_operations tb0219_fops = {
259 .owner = THIS_MODULE, 259 .owner = THIS_MODULE,
260 .read = tanbac_tb0219_read, 260 .read = tanbac_tb0219_read,
261 .write = tanbac_tb0219_write, 261 .write = tanbac_tb0219_write,
diff --git a/drivers/char/tipar.c b/drivers/char/tipar.c
index e0633a119d29..d30dc09dbbc9 100644
--- a/drivers/char/tipar.c
+++ b/drivers/char/tipar.c
@@ -381,7 +381,7 @@ tipar_ioctl(struct inode *inode, struct file *file,
381 381
382/* ----- kernel module registering ------------------------------------ */ 382/* ----- kernel module registering ------------------------------------ */
383 383
384static struct file_operations tipar_fops = { 384static const struct file_operations tipar_fops = {
385 .owner = THIS_MODULE, 385 .owner = THIS_MODULE,
386 .llseek = no_llseek, 386 .llseek = no_llseek,
387 .read = tipar_read, 387 .read = tipar_read,
diff --git a/drivers/char/tlclk.c b/drivers/char/tlclk.c
index 952b829e2cb4..d2c5ba4e83b8 100644
--- a/drivers/char/tlclk.c
+++ b/drivers/char/tlclk.c
@@ -247,7 +247,7 @@ static ssize_t tlclk_write(struct file *filp, const char __user *buf, size_t cou
247 return 0; 247 return 0;
248} 248}
249 249
250static struct file_operations tlclk_fops = { 250static const struct file_operations tlclk_fops = {
251 .read = tlclk_read, 251 .read = tlclk_read,
252 .write = tlclk_write, 252 .write = tlclk_write,
253 .open = tlclk_open, 253 .open = tlclk_open,
diff --git a/drivers/char/toshiba.c b/drivers/char/toshiba.c
index e2fb234dee40..dd36fd04a842 100644
--- a/drivers/char/toshiba.c
+++ b/drivers/char/toshiba.c
@@ -92,7 +92,7 @@ static int tosh_ioctl(struct inode *, struct file *, unsigned int,
92 unsigned long); 92 unsigned long);
93 93
94 94
95static struct file_operations tosh_fops = { 95static const struct file_operations tosh_fops = {
96 .owner = THIS_MODULE, 96 .owner = THIS_MODULE,
97 .ioctl = tosh_ioctl, 97 .ioctl = tosh_ioctl,
98}; 98};
diff --git a/drivers/char/tpm/tpm_atmel.c b/drivers/char/tpm/tpm_atmel.c
index 58a258cec153..ad8ffe49256f 100644
--- a/drivers/char/tpm/tpm_atmel.c
+++ b/drivers/char/tpm/tpm_atmel.c
@@ -116,7 +116,7 @@ static u8 tpm_atml_status(struct tpm_chip *chip)
116 return ioread8(chip->vendor.iobase + 1); 116 return ioread8(chip->vendor.iobase + 1);
117} 117}
118 118
119static struct file_operations atmel_ops = { 119static const struct file_operations atmel_ops = {
120 .owner = THIS_MODULE, 120 .owner = THIS_MODULE,
121 .llseek = no_llseek, 121 .llseek = no_llseek,
122 .open = tpm_open, 122 .open = tpm_open,
diff --git a/drivers/char/tpm/tpm_infineon.c b/drivers/char/tpm/tpm_infineon.c
index adfff21beb21..1353b5a6bae8 100644
--- a/drivers/char/tpm/tpm_infineon.c
+++ b/drivers/char/tpm/tpm_infineon.c
@@ -338,7 +338,7 @@ static struct attribute *inf_attrs[] = {
338 338
339static struct attribute_group inf_attr_grp = {.attrs = inf_attrs }; 339static struct attribute_group inf_attr_grp = {.attrs = inf_attrs };
340 340
341static struct file_operations inf_ops = { 341static const struct file_operations inf_ops = {
342 .owner = THIS_MODULE, 342 .owner = THIS_MODULE,
343 .llseek = no_llseek, 343 .llseek = no_llseek,
344 .open = tpm_open, 344 .open = tpm_open,
diff --git a/drivers/char/tpm/tpm_nsc.c b/drivers/char/tpm/tpm_nsc.c
index 4c8bc06c7d95..26287aace87d 100644
--- a/drivers/char/tpm/tpm_nsc.c
+++ b/drivers/char/tpm/tpm_nsc.c
@@ -226,7 +226,7 @@ static u8 tpm_nsc_status(struct tpm_chip *chip)
226 return inb(chip->vendor.base + NSC_STATUS); 226 return inb(chip->vendor.base + NSC_STATUS);
227} 227}
228 228
229static struct file_operations nsc_ops = { 229static const struct file_operations nsc_ops = {
230 .owner = THIS_MODULE, 230 .owner = THIS_MODULE,
231 .llseek = no_llseek, 231 .llseek = no_llseek,
232 .open = tpm_open, 232 .open = tpm_open,
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index abb0f2aeae66..3232b1932597 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -330,7 +330,7 @@ out_err:
330 return rc; 330 return rc;
331} 331}
332 332
333static struct file_operations tis_ops = { 333static const struct file_operations tis_ops = {
334 .owner = THIS_MODULE, 334 .owner = THIS_MODULE,
335 .llseek = no_llseek, 335 .llseek = no_llseek,
336 .open = tpm_open, 336 .open = tpm_open,
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 615e934da05f..bfdb90242a90 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -912,7 +912,7 @@ static int hung_up_tty_ioctl(struct inode * inode, struct file * file,
912 return cmd == TIOCSPGRP ? -ENOTTY : -EIO; 912 return cmd == TIOCSPGRP ? -ENOTTY : -EIO;
913} 913}
914 914
915static struct file_operations tty_fops = { 915static const struct file_operations tty_fops = {
916 .llseek = no_llseek, 916 .llseek = no_llseek,
917 .read = tty_read, 917 .read = tty_read,
918 .write = tty_write, 918 .write = tty_write,
@@ -924,7 +924,7 @@ static struct file_operations tty_fops = {
924}; 924};
925 925
926#ifdef CONFIG_UNIX98_PTYS 926#ifdef CONFIG_UNIX98_PTYS
927static struct file_operations ptmx_fops = { 927static const struct file_operations ptmx_fops = {
928 .llseek = no_llseek, 928 .llseek = no_llseek,
929 .read = tty_read, 929 .read = tty_read,
930 .write = tty_write, 930 .write = tty_write,
@@ -936,7 +936,7 @@ static struct file_operations ptmx_fops = {
936}; 936};
937#endif 937#endif
938 938
939static struct file_operations console_fops = { 939static const struct file_operations console_fops = {
940 .llseek = no_llseek, 940 .llseek = no_llseek,
941 .read = tty_read, 941 .read = tty_read,
942 .write = redirected_tty_write, 942 .write = redirected_tty_write,
@@ -947,7 +947,7 @@ static struct file_operations console_fops = {
947 .fasync = tty_fasync, 947 .fasync = tty_fasync,
948}; 948};
949 949
950static struct file_operations hung_up_tty_fops = { 950static const struct file_operations hung_up_tty_fops = {
951 .llseek = no_llseek, 951 .llseek = no_llseek,
952 .read = hung_up_tty_read, 952 .read = hung_up_tty_read,
953 .write = hung_up_tty_write, 953 .write = hung_up_tty_write,
@@ -2336,7 +2336,7 @@ static int fionbio(struct file *file, int __user *p)
2336 2336
2337static int tiocsctty(struct tty_struct *tty, int arg) 2337static int tiocsctty(struct tty_struct *tty, int arg)
2338{ 2338{
2339 task_t *p; 2339 struct task_struct *p;
2340 2340
2341 if (current->signal->leader && 2341 if (current->signal->leader &&
2342 (current->signal->session == tty->session)) 2342 (current->signal->session == tty->session))
diff --git a/drivers/char/vc_screen.c b/drivers/char/vc_screen.c
index 45e9bd81bc0e..a9247b5213d5 100644
--- a/drivers/char/vc_screen.c
+++ b/drivers/char/vc_screen.c
@@ -465,7 +465,7 @@ vcs_open(struct inode *inode, struct file *filp)
465 return 0; 465 return 0;
466} 466}
467 467
468static struct file_operations vcs_fops = { 468static const struct file_operations vcs_fops = {
469 .llseek = vcs_lseek, 469 .llseek = vcs_lseek,
470 .read = vcs_read, 470 .read = vcs_read,
471 .write = vcs_write, 471 .write = vcs_write,
diff --git a/drivers/char/viotape.c b/drivers/char/viotape.c
index 7d42c8ec8dbc..b72b2049aaae 100644
--- a/drivers/char/viotape.c
+++ b/drivers/char/viotape.c
@@ -292,7 +292,7 @@ static int proc_viotape_open(struct inode *inode, struct file *file)
292 return single_open(file, proc_viotape_show, NULL); 292 return single_open(file, proc_viotape_show, NULL);
293} 293}
294 294
295static struct file_operations proc_viotape_operations = { 295static const struct file_operations proc_viotape_operations = {
296 .open = proc_viotape_open, 296 .open = proc_viotape_open,
297 .read = seq_read, 297 .read = seq_read,
298 .llseek = seq_lseek, 298 .llseek = seq_lseek,
diff --git a/drivers/char/vr41xx_giu.c b/drivers/char/vr41xx_giu.c
index 073da48c092e..1b9b1f1d4c49 100644
--- a/drivers/char/vr41xx_giu.c
+++ b/drivers/char/vr41xx_giu.c
@@ -605,7 +605,7 @@ static int gpio_release(struct inode *inode, struct file *file)
605 return 0; 605 return 0;
606} 606}
607 607
608static struct file_operations gpio_fops = { 608static const struct file_operations gpio_fops = {
609 .owner = THIS_MODULE, 609 .owner = THIS_MODULE,
610 .read = gpio_read, 610 .read = gpio_read,
611 .write = gpio_write, 611 .write = gpio_write,
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 3ef823d7d255..da7e66a2a38b 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -886,6 +886,7 @@ void vc_disallocate(unsigned int currcons)
886 if (vc_cons_allocated(currcons)) { 886 if (vc_cons_allocated(currcons)) {
887 struct vc_data *vc = vc_cons[currcons].d; 887 struct vc_data *vc = vc_cons[currcons].d;
888 vc->vc_sw->con_deinit(vc); 888 vc->vc_sw->con_deinit(vc);
889 module_put(vc->vc_sw->owner);
889 if (vc->vc_kmalloced) 890 if (vc->vc_kmalloced)
890 kfree(vc->vc_screenbuf); 891 kfree(vc->vc_screenbuf);
891 if (currcons >= MIN_NR_CONSOLES) 892 if (currcons >= MIN_NR_CONSOLES)
diff --git a/drivers/char/watchdog/acquirewdt.c b/drivers/char/watchdog/acquirewdt.c
index 7289f4af93d0..c77fe3cf2852 100644
--- a/drivers/char/watchdog/acquirewdt.c
+++ b/drivers/char/watchdog/acquirewdt.c
@@ -231,7 +231,7 @@ static int acq_notify_sys(struct notifier_block *this, unsigned long code,
231 * Kernel Interfaces 231 * Kernel Interfaces
232 */ 232 */
233 233
234static struct file_operations acq_fops = { 234static const struct file_operations acq_fops = {
235 .owner = THIS_MODULE, 235 .owner = THIS_MODULE,
236 .llseek = no_llseek, 236 .llseek = no_llseek,
237 .write = acq_write, 237 .write = acq_write,
diff --git a/drivers/char/watchdog/advantechwdt.c b/drivers/char/watchdog/advantechwdt.c
index 194a3fd36b91..8069be445edc 100644
--- a/drivers/char/watchdog/advantechwdt.c
+++ b/drivers/char/watchdog/advantechwdt.c
@@ -227,7 +227,7 @@ advwdt_notify_sys(struct notifier_block *this, unsigned long code,
227 * Kernel Interfaces 227 * Kernel Interfaces
228 */ 228 */
229 229
230static struct file_operations advwdt_fops = { 230static const struct file_operations advwdt_fops = {
231 .owner = THIS_MODULE, 231 .owner = THIS_MODULE,
232 .llseek = no_llseek, 232 .llseek = no_llseek,
233 .write = advwdt_write, 233 .write = advwdt_write,
diff --git a/drivers/char/watchdog/alim1535_wdt.c b/drivers/char/watchdog/alim1535_wdt.c
index 8338ca300e2e..c5c94e4c9495 100644
--- a/drivers/char/watchdog/alim1535_wdt.c
+++ b/drivers/char/watchdog/alim1535_wdt.c
@@ -362,7 +362,7 @@ static int __init ali_find_watchdog(void)
362 * Kernel Interfaces 362 * Kernel Interfaces
363 */ 363 */
364 364
365static struct file_operations ali_fops = { 365static const struct file_operations ali_fops = {
366 .owner = THIS_MODULE, 366 .owner = THIS_MODULE,
367 .llseek = no_llseek, 367 .llseek = no_llseek,
368 .write = ali_write, 368 .write = ali_write,
diff --git a/drivers/char/watchdog/alim7101_wdt.c b/drivers/char/watchdog/alim7101_wdt.c
index c05ac188a4d7..ffd7684f999b 100644
--- a/drivers/char/watchdog/alim7101_wdt.c
+++ b/drivers/char/watchdog/alim7101_wdt.c
@@ -281,7 +281,7 @@ static int fop_ioctl(struct inode *inode, struct file *file, unsigned int cmd, u
281 } 281 }
282} 282}
283 283
284static struct file_operations wdt_fops = { 284static const struct file_operations wdt_fops = {
285 .owner= THIS_MODULE, 285 .owner= THIS_MODULE,
286 .llseek= no_llseek, 286 .llseek= no_llseek,
287 .write= fop_write, 287 .write= fop_write,
diff --git a/drivers/char/watchdog/at91_wdt.c b/drivers/char/watchdog/at91_wdt.c
index f61dedc3c96c..cc266715ea32 100644
--- a/drivers/char/watchdog/at91_wdt.c
+++ b/drivers/char/watchdog/at91_wdt.c
@@ -183,7 +183,7 @@ static ssize_t at91_wdt_write(struct file *file, const char *data, size_t len, l
183 183
184/* ......................................................................... */ 184/* ......................................................................... */
185 185
186static struct file_operations at91wdt_fops = { 186static const struct file_operations at91wdt_fops = {
187 .owner = THIS_MODULE, 187 .owner = THIS_MODULE,
188 .llseek = no_llseek, 188 .llseek = no_llseek,
189 .ioctl = at91_wdt_ioctl, 189 .ioctl = at91_wdt_ioctl,
diff --git a/drivers/char/watchdog/booke_wdt.c b/drivers/char/watchdog/booke_wdt.c
index 537f5c6729bf..e3cefc538b40 100644
--- a/drivers/char/watchdog/booke_wdt.c
+++ b/drivers/char/watchdog/booke_wdt.c
@@ -145,7 +145,7 @@ static int booke_wdt_open (struct inode *inode, struct file *file)
145 return 0; 145 return 0;
146} 146}
147 147
148static struct file_operations booke_wdt_fops = { 148static const struct file_operations booke_wdt_fops = {
149 .owner = THIS_MODULE, 149 .owner = THIS_MODULE,
150 .llseek = no_llseek, 150 .llseek = no_llseek,
151 .write = booke_wdt_write, 151 .write = booke_wdt_write,
diff --git a/drivers/char/watchdog/cpu5wdt.c b/drivers/char/watchdog/cpu5wdt.c
index 3e8410b5a65e..04c7e49918db 100644
--- a/drivers/char/watchdog/cpu5wdt.c
+++ b/drivers/char/watchdog/cpu5wdt.c
@@ -198,7 +198,7 @@ static ssize_t cpu5wdt_write(struct file *file, const char __user *buf, size_t c
198 return count; 198 return count;
199} 199}
200 200
201static struct file_operations cpu5wdt_fops = { 201static const struct file_operations cpu5wdt_fops = {
202 .owner = THIS_MODULE, 202 .owner = THIS_MODULE,
203 .llseek = no_llseek, 203 .llseek = no_llseek,
204 .ioctl = cpu5wdt_ioctl, 204 .ioctl = cpu5wdt_ioctl,
diff --git a/drivers/char/watchdog/ep93xx_wdt.c b/drivers/char/watchdog/ep93xx_wdt.c
index 9021dbb78299..77c8a955ae9e 100644
--- a/drivers/char/watchdog/ep93xx_wdt.c
+++ b/drivers/char/watchdog/ep93xx_wdt.c
@@ -187,7 +187,7 @@ static int ep93xx_wdt_release(struct inode *inode, struct file *file)
187 return 0; 187 return 0;
188} 188}
189 189
190static struct file_operations ep93xx_wdt_fops = { 190static const struct file_operations ep93xx_wdt_fops = {
191 .owner = THIS_MODULE, 191 .owner = THIS_MODULE,
192 .write = ep93xx_wdt_write, 192 .write = ep93xx_wdt_write,
193 .ioctl = ep93xx_wdt_ioctl, 193 .ioctl = ep93xx_wdt_ioctl,
diff --git a/drivers/char/watchdog/eurotechwdt.c b/drivers/char/watchdog/eurotechwdt.c
index ea670de4fab7..62dbccb2f6df 100644
--- a/drivers/char/watchdog/eurotechwdt.c
+++ b/drivers/char/watchdog/eurotechwdt.c
@@ -356,7 +356,7 @@ static int eurwdt_notify_sys(struct notifier_block *this, unsigned long code,
356 */ 356 */
357 357
358 358
359static struct file_operations eurwdt_fops = { 359static const struct file_operations eurwdt_fops = {
360 .owner = THIS_MODULE, 360 .owner = THIS_MODULE,
361 .llseek = no_llseek, 361 .llseek = no_llseek,
362 .write = eurwdt_write, 362 .write = eurwdt_write,
diff --git a/drivers/char/watchdog/i6300esb.c b/drivers/char/watchdog/i6300esb.c
index 93785f13242e..870539eabbf3 100644
--- a/drivers/char/watchdog/i6300esb.c
+++ b/drivers/char/watchdog/i6300esb.c
@@ -337,7 +337,7 @@ static int esb_notify_sys (struct notifier_block *this, unsigned long code, void
337 * Kernel Interfaces 337 * Kernel Interfaces
338 */ 338 */
339 339
340static struct file_operations esb_fops = { 340static const struct file_operations esb_fops = {
341 .owner = THIS_MODULE, 341 .owner = THIS_MODULE,
342 .llseek = no_llseek, 342 .llseek = no_llseek,
343 .write = esb_write, 343 .write = esb_write,
diff --git a/drivers/char/watchdog/i8xx_tco.c b/drivers/char/watchdog/i8xx_tco.c
index bfbdbbf3c2f2..8385dd36eefe 100644
--- a/drivers/char/watchdog/i8xx_tco.c
+++ b/drivers/char/watchdog/i8xx_tco.c
@@ -378,7 +378,7 @@ static int i8xx_tco_notify_sys (struct notifier_block *this, unsigned long code,
378 * Kernel Interfaces 378 * Kernel Interfaces
379 */ 379 */
380 380
381static struct file_operations i8xx_tco_fops = { 381static const struct file_operations i8xx_tco_fops = {
382 .owner = THIS_MODULE, 382 .owner = THIS_MODULE,
383 .llseek = no_llseek, 383 .llseek = no_llseek,
384 .write = i8xx_tco_write, 384 .write = i8xx_tco_write,
diff --git a/drivers/char/watchdog/ib700wdt.c b/drivers/char/watchdog/ib700wdt.c
index a2e53c715b36..fd95f7327798 100644
--- a/drivers/char/watchdog/ib700wdt.c
+++ b/drivers/char/watchdog/ib700wdt.c
@@ -255,7 +255,7 @@ ibwdt_notify_sys(struct notifier_block *this, unsigned long code,
255 * Kernel Interfaces 255 * Kernel Interfaces
256 */ 256 */
257 257
258static struct file_operations ibwdt_fops = { 258static const struct file_operations ibwdt_fops = {
259 .owner = THIS_MODULE, 259 .owner = THIS_MODULE,
260 .llseek = no_llseek, 260 .llseek = no_llseek,
261 .write = ibwdt_write, 261 .write = ibwdt_write,
diff --git a/drivers/char/watchdog/ibmasr.c b/drivers/char/watchdog/ibmasr.c
index b0741cbdc139..26ceee7a4df0 100644
--- a/drivers/char/watchdog/ibmasr.c
+++ b/drivers/char/watchdog/ibmasr.c
@@ -322,7 +322,7 @@ static int asr_release(struct inode *inode, struct file *file)
322 return 0; 322 return 0;
323} 323}
324 324
325static struct file_operations asr_fops = { 325static const struct file_operations asr_fops = {
326 .owner = THIS_MODULE, 326 .owner = THIS_MODULE,
327 .llseek = no_llseek, 327 .llseek = no_llseek,
328 .write = asr_write, 328 .write = asr_write,
diff --git a/drivers/char/watchdog/indydog.c b/drivers/char/watchdog/indydog.c
index d387979b2434..dacc1c20a310 100644
--- a/drivers/char/watchdog/indydog.c
+++ b/drivers/char/watchdog/indydog.c
@@ -154,7 +154,7 @@ static int indydog_notify_sys(struct notifier_block *this, unsigned long code, v
154 return NOTIFY_DONE; 154 return NOTIFY_DONE;
155} 155}
156 156
157static struct file_operations indydog_fops = { 157static const struct file_operations indydog_fops = {
158 .owner = THIS_MODULE, 158 .owner = THIS_MODULE,
159 .llseek = no_llseek, 159 .llseek = no_llseek,
160 .write = indydog_write, 160 .write = indydog_write,
diff --git a/drivers/char/watchdog/ixp2000_wdt.c b/drivers/char/watchdog/ixp2000_wdt.c
index aa29a7d68759..692908819e26 100644
--- a/drivers/char/watchdog/ixp2000_wdt.c
+++ b/drivers/char/watchdog/ixp2000_wdt.c
@@ -168,7 +168,7 @@ ixp2000_wdt_release(struct inode *inode, struct file *file)
168} 168}
169 169
170 170
171static struct file_operations ixp2000_wdt_fops = 171static const struct file_operations ixp2000_wdt_fops =
172{ 172{
173 .owner = THIS_MODULE, 173 .owner = THIS_MODULE,
174 .llseek = no_llseek, 174 .llseek = no_llseek,
diff --git a/drivers/char/watchdog/ixp4xx_wdt.c b/drivers/char/watchdog/ixp4xx_wdt.c
index e6a3fe83fa01..9db5cf2c38c3 100644
--- a/drivers/char/watchdog/ixp4xx_wdt.c
+++ b/drivers/char/watchdog/ixp4xx_wdt.c
@@ -162,7 +162,7 @@ ixp4xx_wdt_release(struct inode *inode, struct file *file)
162} 162}
163 163
164 164
165static struct file_operations ixp4xx_wdt_fops = 165static const struct file_operations ixp4xx_wdt_fops =
166{ 166{
167 .owner = THIS_MODULE, 167 .owner = THIS_MODULE,
168 .llseek = no_llseek, 168 .llseek = no_llseek,
diff --git a/drivers/char/watchdog/machzwd.c b/drivers/char/watchdog/machzwd.c
index b67b4878ae0f..23734e07fb22 100644
--- a/drivers/char/watchdog/machzwd.c
+++ b/drivers/char/watchdog/machzwd.c
@@ -388,7 +388,7 @@ static int zf_notify_sys(struct notifier_block *this, unsigned long code,
388 388
389 389
390 390
391static struct file_operations zf_fops = { 391static const struct file_operations zf_fops = {
392 .owner = THIS_MODULE, 392 .owner = THIS_MODULE,
393 .llseek = no_llseek, 393 .llseek = no_llseek,
394 .write = zf_write, 394 .write = zf_write,
diff --git a/drivers/char/watchdog/mixcomwd.c b/drivers/char/watchdog/mixcomwd.c
index 433c27f98159..ae943324d251 100644
--- a/drivers/char/watchdog/mixcomwd.c
+++ b/drivers/char/watchdog/mixcomwd.c
@@ -190,7 +190,7 @@ static int mixcomwd_ioctl(struct inode *inode, struct file *file,
190 return 0; 190 return 0;
191} 191}
192 192
193static struct file_operations mixcomwd_fops= 193static const struct file_operations mixcomwd_fops=
194{ 194{
195 .owner = THIS_MODULE, 195 .owner = THIS_MODULE,
196 .llseek = no_llseek, 196 .llseek = no_llseek,
diff --git a/drivers/char/watchdog/mpc83xx_wdt.c b/drivers/char/watchdog/mpc83xx_wdt.c
index dac1381af364..a480903ee1a5 100644
--- a/drivers/char/watchdog/mpc83xx_wdt.c
+++ b/drivers/char/watchdog/mpc83xx_wdt.c
@@ -129,7 +129,7 @@ static int mpc83xx_wdt_ioctl(struct inode *inode, struct file *file,
129 } 129 }
130} 130}
131 131
132static struct file_operations mpc83xx_wdt_fops = { 132static const struct file_operations mpc83xx_wdt_fops = {
133 .owner = THIS_MODULE, 133 .owner = THIS_MODULE,
134 .llseek = no_llseek, 134 .llseek = no_llseek,
135 .write = mpc83xx_wdt_write, 135 .write = mpc83xx_wdt_write,
diff --git a/drivers/char/watchdog/mpc8xx_wdt.c b/drivers/char/watchdog/mpc8xx_wdt.c
index 11f0ccd4c4d4..35dd9e6e1140 100644
--- a/drivers/char/watchdog/mpc8xx_wdt.c
+++ b/drivers/char/watchdog/mpc8xx_wdt.c
@@ -132,7 +132,7 @@ static int mpc8xx_wdt_ioctl(struct inode *inode, struct file *file,
132 return 0; 132 return 0;
133} 133}
134 134
135static struct file_operations mpc8xx_wdt_fops = { 135static const struct file_operations mpc8xx_wdt_fops = {
136 .owner = THIS_MODULE, 136 .owner = THIS_MODULE,
137 .llseek = no_llseek, 137 .llseek = no_llseek,
138 .write = mpc8xx_wdt_write, 138 .write = mpc8xx_wdt_write,
diff --git a/drivers/char/watchdog/mpcore_wdt.c b/drivers/char/watchdog/mpcore_wdt.c
index c2d492c852fc..54b3c56ead0d 100644
--- a/drivers/char/watchdog/mpcore_wdt.c
+++ b/drivers/char/watchdog/mpcore_wdt.c
@@ -297,7 +297,7 @@ static void mpcore_wdt_shutdown(struct platform_device *dev)
297/* 297/*
298 * Kernel Interfaces 298 * Kernel Interfaces
299 */ 299 */
300static struct file_operations mpcore_wdt_fops = { 300static const struct file_operations mpcore_wdt_fops = {
301 .owner = THIS_MODULE, 301 .owner = THIS_MODULE,
302 .llseek = no_llseek, 302 .llseek = no_llseek,
303 .write = mpcore_wdt_write, 303 .write = mpcore_wdt_write,
diff --git a/drivers/char/watchdog/mv64x60_wdt.c b/drivers/char/watchdog/mv64x60_wdt.c
index 20a6cbb0fbb8..5c8fab345b40 100644
--- a/drivers/char/watchdog/mv64x60_wdt.c
+++ b/drivers/char/watchdog/mv64x60_wdt.c
@@ -166,7 +166,7 @@ static int mv64x60_wdt_ioctl(struct inode *inode, struct file *file,
166 return 0; 166 return 0;
167} 167}
168 168
169static struct file_operations mv64x60_wdt_fops = { 169static const struct file_operations mv64x60_wdt_fops = {
170 .owner = THIS_MODULE, 170 .owner = THIS_MODULE,
171 .llseek = no_llseek, 171 .llseek = no_llseek,
172 .write = mv64x60_wdt_write, 172 .write = mv64x60_wdt_write,
diff --git a/drivers/char/watchdog/pcwd.c b/drivers/char/watchdog/pcwd.c
index 6d44ca68312d..cd7d1b6a5d9f 100644
--- a/drivers/char/watchdog/pcwd.c
+++ b/drivers/char/watchdog/pcwd.c
@@ -740,7 +740,7 @@ static int pcwd_notify_sys(struct notifier_block *this, unsigned long code, void
740 * Kernel Interfaces 740 * Kernel Interfaces
741 */ 741 */
742 742
743static struct file_operations pcwd_fops = { 743static const struct file_operations pcwd_fops = {
744 .owner = THIS_MODULE, 744 .owner = THIS_MODULE,
745 .llseek = no_llseek, 745 .llseek = no_llseek,
746 .write = pcwd_write, 746 .write = pcwd_write,
@@ -755,7 +755,7 @@ static struct miscdevice pcwd_miscdev = {
755 .fops = &pcwd_fops, 755 .fops = &pcwd_fops,
756}; 756};
757 757
758static struct file_operations pcwd_temp_fops = { 758static const struct file_operations pcwd_temp_fops = {
759 .owner = THIS_MODULE, 759 .owner = THIS_MODULE,
760 .llseek = no_llseek, 760 .llseek = no_llseek,
761 .read = pcwd_temp_read, 761 .read = pcwd_temp_read,
diff --git a/drivers/char/watchdog/pcwd_pci.c b/drivers/char/watchdog/pcwd_pci.c
index 1f40ecefbf72..c7cfd6dbfe1b 100644
--- a/drivers/char/watchdog/pcwd_pci.c
+++ b/drivers/char/watchdog/pcwd_pci.c
@@ -625,7 +625,7 @@ static int pcipcwd_notify_sys(struct notifier_block *this, unsigned long code, v
625 * Kernel Interfaces 625 * Kernel Interfaces
626 */ 626 */
627 627
628static struct file_operations pcipcwd_fops = { 628static const struct file_operations pcipcwd_fops = {
629 .owner = THIS_MODULE, 629 .owner = THIS_MODULE,
630 .llseek = no_llseek, 630 .llseek = no_llseek,
631 .write = pcipcwd_write, 631 .write = pcipcwd_write,
@@ -640,7 +640,7 @@ static struct miscdevice pcipcwd_miscdev = {
640 .fops = &pcipcwd_fops, 640 .fops = &pcipcwd_fops,
641}; 641};
642 642
643static struct file_operations pcipcwd_temp_fops = { 643static const struct file_operations pcipcwd_temp_fops = {
644 .owner = THIS_MODULE, 644 .owner = THIS_MODULE,
645 .llseek = no_llseek, 645 .llseek = no_llseek,
646 .read = pcipcwd_temp_read, 646 .read = pcipcwd_temp_read,
diff --git a/drivers/char/watchdog/pcwd_usb.c b/drivers/char/watchdog/pcwd_usb.c
index 92bf8c1a0f0d..b7ae73dcdd08 100644
--- a/drivers/char/watchdog/pcwd_usb.c
+++ b/drivers/char/watchdog/pcwd_usb.c
@@ -523,7 +523,7 @@ static int usb_pcwd_notify_sys(struct notifier_block *this, unsigned long code,
523 * Kernel Interfaces 523 * Kernel Interfaces
524 */ 524 */
525 525
526static struct file_operations usb_pcwd_fops = { 526static const struct file_operations usb_pcwd_fops = {
527 .owner = THIS_MODULE, 527 .owner = THIS_MODULE,
528 .llseek = no_llseek, 528 .llseek = no_llseek,
529 .write = usb_pcwd_write, 529 .write = usb_pcwd_write,
@@ -538,7 +538,7 @@ static struct miscdevice usb_pcwd_miscdev = {
538 .fops = &usb_pcwd_fops, 538 .fops = &usb_pcwd_fops,
539}; 539};
540 540
541static struct file_operations usb_pcwd_temperature_fops = { 541static const struct file_operations usb_pcwd_temperature_fops = {
542 .owner = THIS_MODULE, 542 .owner = THIS_MODULE,
543 .llseek = no_llseek, 543 .llseek = no_llseek,
544 .read = usb_pcwd_temperature_read, 544 .read = usb_pcwd_temperature_read,
diff --git a/drivers/char/watchdog/s3c2410_wdt.c b/drivers/char/watchdog/s3c2410_wdt.c
index f267dad26071..be978e8ed754 100644
--- a/drivers/char/watchdog/s3c2410_wdt.c
+++ b/drivers/char/watchdog/s3c2410_wdt.c
@@ -319,7 +319,7 @@ static int s3c2410wdt_ioctl(struct inode *inode, struct file *file,
319 319
320/* kernel interface */ 320/* kernel interface */
321 321
322static struct file_operations s3c2410wdt_fops = { 322static const struct file_operations s3c2410wdt_fops = {
323 .owner = THIS_MODULE, 323 .owner = THIS_MODULE,
324 .llseek = no_llseek, 324 .llseek = no_llseek,
325 .write = s3c2410wdt_write, 325 .write = s3c2410wdt_write,
diff --git a/drivers/char/watchdog/sa1100_wdt.c b/drivers/char/watchdog/sa1100_wdt.c
index b22e95c5470c..1fc16d995788 100644
--- a/drivers/char/watchdog/sa1100_wdt.c
+++ b/drivers/char/watchdog/sa1100_wdt.c
@@ -135,7 +135,7 @@ static int sa1100dog_ioctl(struct inode *inode, struct file *file,
135 return ret; 135 return ret;
136} 136}
137 137
138static struct file_operations sa1100dog_fops = 138static const struct file_operations sa1100dog_fops =
139{ 139{
140 .owner = THIS_MODULE, 140 .owner = THIS_MODULE,
141 .llseek = no_llseek, 141 .llseek = no_llseek,
diff --git a/drivers/char/watchdog/sbc60xxwdt.c b/drivers/char/watchdog/sbc60xxwdt.c
index ed0bd55fbfc1..4663c2fd53cd 100644
--- a/drivers/char/watchdog/sbc60xxwdt.c
+++ b/drivers/char/watchdog/sbc60xxwdt.c
@@ -282,7 +282,7 @@ static int fop_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
282 } 282 }
283} 283}
284 284
285static struct file_operations wdt_fops = { 285static const struct file_operations wdt_fops = {
286 .owner = THIS_MODULE, 286 .owner = THIS_MODULE,
287 .llseek = no_llseek, 287 .llseek = no_llseek,
288 .write = fop_write, 288 .write = fop_write,
diff --git a/drivers/char/watchdog/sbc8360.c b/drivers/char/watchdog/sbc8360.c
index 6562aa910ace..1035be5b5019 100644
--- a/drivers/char/watchdog/sbc8360.c
+++ b/drivers/char/watchdog/sbc8360.c
@@ -305,7 +305,7 @@ static int sbc8360_notify_sys(struct notifier_block *this, unsigned long code,
305 * Kernel Interfaces 305 * Kernel Interfaces
306 */ 306 */
307 307
308static struct file_operations sbc8360_fops = { 308static const struct file_operations sbc8360_fops = {
309 .owner = THIS_MODULE, 309 .owner = THIS_MODULE,
310 .llseek = no_llseek, 310 .llseek = no_llseek,
311 .write = sbc8360_write, 311 .write = sbc8360_write,
diff --git a/drivers/char/watchdog/sbc_epx_c3.c b/drivers/char/watchdog/sbc_epx_c3.c
index 09867fadc720..bfc475dabe6d 100644
--- a/drivers/char/watchdog/sbc_epx_c3.c
+++ b/drivers/char/watchdog/sbc_epx_c3.c
@@ -154,7 +154,7 @@ static int epx_c3_notify_sys(struct notifier_block *this, unsigned long code,
154 return NOTIFY_DONE; 154 return NOTIFY_DONE;
155} 155}
156 156
157static struct file_operations epx_c3_fops = { 157static const struct file_operations epx_c3_fops = {
158 .owner = THIS_MODULE, 158 .owner = THIS_MODULE,
159 .llseek = no_llseek, 159 .llseek = no_llseek,
160 .write = epx_c3_write, 160 .write = epx_c3_write,
diff --git a/drivers/char/watchdog/sc1200wdt.c b/drivers/char/watchdog/sc1200wdt.c
index 78ef6333c181..7c3cf293a5af 100644
--- a/drivers/char/watchdog/sc1200wdt.c
+++ b/drivers/char/watchdog/sc1200wdt.c
@@ -292,7 +292,7 @@ static struct notifier_block sc1200wdt_notifier =
292 .notifier_call = sc1200wdt_notify_sys, 292 .notifier_call = sc1200wdt_notify_sys,
293}; 293};
294 294
295static struct file_operations sc1200wdt_fops = 295static const struct file_operations sc1200wdt_fops =
296{ 296{
297 .owner = THIS_MODULE, 297 .owner = THIS_MODULE,
298 .llseek = no_llseek, 298 .llseek = no_llseek,
diff --git a/drivers/char/watchdog/sc520_wdt.c b/drivers/char/watchdog/sc520_wdt.c
index 4ee9974ad8cb..2c7c9db71be8 100644
--- a/drivers/char/watchdog/sc520_wdt.c
+++ b/drivers/char/watchdog/sc520_wdt.c
@@ -336,7 +336,7 @@ static int fop_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
336 } 336 }
337} 337}
338 338
339static struct file_operations wdt_fops = { 339static const struct file_operations wdt_fops = {
340 .owner = THIS_MODULE, 340 .owner = THIS_MODULE,
341 .llseek = no_llseek, 341 .llseek = no_llseek,
342 .write = fop_write, 342 .write = fop_write,
diff --git a/drivers/char/watchdog/scx200_wdt.c b/drivers/char/watchdog/scx200_wdt.c
index c0b4754e8de0..c561299a5537 100644
--- a/drivers/char/watchdog/scx200_wdt.c
+++ b/drivers/char/watchdog/scx200_wdt.c
@@ -194,7 +194,7 @@ static int scx200_wdt_ioctl(struct inode *inode, struct file *file,
194 } 194 }
195} 195}
196 196
197static struct file_operations scx200_wdt_fops = { 197static const struct file_operations scx200_wdt_fops = {
198 .owner = THIS_MODULE, 198 .owner = THIS_MODULE,
199 .llseek = no_llseek, 199 .llseek = no_llseek,
200 .write = scx200_wdt_write, 200 .write = scx200_wdt_write,
diff --git a/drivers/char/watchdog/shwdt.c b/drivers/char/watchdog/shwdt.c
index 803701b675c0..1355038f1044 100644
--- a/drivers/char/watchdog/shwdt.c
+++ b/drivers/char/watchdog/shwdt.c
@@ -344,7 +344,7 @@ static int sh_wdt_notify_sys(struct notifier_block *this,
344 return NOTIFY_DONE; 344 return NOTIFY_DONE;
345} 345}
346 346
347static struct file_operations sh_wdt_fops = { 347static const struct file_operations sh_wdt_fops = {
348 .owner = THIS_MODULE, 348 .owner = THIS_MODULE,
349 .llseek = no_llseek, 349 .llseek = no_llseek,
350 .write = sh_wdt_write, 350 .write = sh_wdt_write,
diff --git a/drivers/char/watchdog/softdog.c b/drivers/char/watchdog/softdog.c
index 79ce5c655428..ef8da517545a 100644
--- a/drivers/char/watchdog/softdog.c
+++ b/drivers/char/watchdog/softdog.c
@@ -243,7 +243,7 @@ static int softdog_notify_sys(struct notifier_block *this, unsigned long code,
243 * Kernel Interfaces 243 * Kernel Interfaces
244 */ 244 */
245 245
246static struct file_operations softdog_fops = { 246static const struct file_operations softdog_fops = {
247 .owner = THIS_MODULE, 247 .owner = THIS_MODULE,
248 .llseek = no_llseek, 248 .llseek = no_llseek,
249 .write = softdog_write, 249 .write = softdog_write,
diff --git a/drivers/char/watchdog/w83627hf_wdt.c b/drivers/char/watchdog/w83627hf_wdt.c
index d15ca9a3986f..13f16d41c2fd 100644
--- a/drivers/char/watchdog/w83627hf_wdt.c
+++ b/drivers/char/watchdog/w83627hf_wdt.c
@@ -274,7 +274,7 @@ wdt_notify_sys(struct notifier_block *this, unsigned long code,
274 * Kernel Interfaces 274 * Kernel Interfaces
275 */ 275 */
276 276
277static struct file_operations wdt_fops = { 277static const struct file_operations wdt_fops = {
278 .owner = THIS_MODULE, 278 .owner = THIS_MODULE,
279 .llseek = no_llseek, 279 .llseek = no_llseek,
280 .write = wdt_write, 280 .write = wdt_write,
diff --git a/drivers/char/watchdog/w83877f_wdt.c b/drivers/char/watchdog/w83877f_wdt.c
index 52a8bd0a5988..ccf6c0915945 100644
--- a/drivers/char/watchdog/w83877f_wdt.c
+++ b/drivers/char/watchdog/w83877f_wdt.c
@@ -299,7 +299,7 @@ static int fop_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
299 } 299 }
300} 300}
301 301
302static struct file_operations wdt_fops = { 302static const struct file_operations wdt_fops = {
303 .owner = THIS_MODULE, 303 .owner = THIS_MODULE,
304 .llseek = no_llseek, 304 .llseek = no_llseek,
305 .write = fop_write, 305 .write = fop_write,
diff --git a/drivers/char/watchdog/w83977f_wdt.c b/drivers/char/watchdog/w83977f_wdt.c
index c31849e4c5c2..98f4e17db70a 100644
--- a/drivers/char/watchdog/w83977f_wdt.c
+++ b/drivers/char/watchdog/w83977f_wdt.c
@@ -449,7 +449,7 @@ static int wdt_notify_sys(struct notifier_block *this, unsigned long code,
449 return NOTIFY_DONE; 449 return NOTIFY_DONE;
450} 450}
451 451
452static struct file_operations wdt_fops= 452static const struct file_operations wdt_fops=
453{ 453{
454 .owner = THIS_MODULE, 454 .owner = THIS_MODULE,
455 .llseek = no_llseek, 455 .llseek = no_llseek,
diff --git a/drivers/char/watchdog/wafer5823wdt.c b/drivers/char/watchdog/wafer5823wdt.c
index 7cf6c9bbf486..2bb6a9d6ad28 100644
--- a/drivers/char/watchdog/wafer5823wdt.c
+++ b/drivers/char/watchdog/wafer5823wdt.c
@@ -222,7 +222,7 @@ static int wafwdt_notify_sys(struct notifier_block *this, unsigned long code, vo
222 * Kernel Interfaces 222 * Kernel Interfaces
223 */ 223 */
224 224
225static struct file_operations wafwdt_fops = { 225static const struct file_operations wafwdt_fops = {
226 .owner = THIS_MODULE, 226 .owner = THIS_MODULE,
227 .llseek = no_llseek, 227 .llseek = no_llseek,
228 .write = wafwdt_write, 228 .write = wafwdt_write,
diff --git a/drivers/char/watchdog/wdrtas.c b/drivers/char/watchdog/wdrtas.c
index 3a462c34b92a..5c38cdf41731 100644
--- a/drivers/char/watchdog/wdrtas.c
+++ b/drivers/char/watchdog/wdrtas.c
@@ -520,7 +520,7 @@ wdrtas_reboot(struct notifier_block *this, unsigned long code, void *ptr)
520 520
521/*** initialization stuff */ 521/*** initialization stuff */
522 522
523static struct file_operations wdrtas_fops = { 523static const struct file_operations wdrtas_fops = {
524 .owner = THIS_MODULE, 524 .owner = THIS_MODULE,
525 .llseek = no_llseek, 525 .llseek = no_llseek,
526 .write = wdrtas_write, 526 .write = wdrtas_write,
@@ -535,7 +535,7 @@ static struct miscdevice wdrtas_miscdev = {
535 .fops = &wdrtas_fops, 535 .fops = &wdrtas_fops,
536}; 536};
537 537
538static struct file_operations wdrtas_temp_fops = { 538static const struct file_operations wdrtas_temp_fops = {
539 .owner = THIS_MODULE, 539 .owner = THIS_MODULE,
540 .llseek = no_llseek, 540 .llseek = no_llseek,
541 .read = wdrtas_temp_read, 541 .read = wdrtas_temp_read,
diff --git a/drivers/char/watchdog/wdt.c b/drivers/char/watchdog/wdt.c
index a1d972c8f44c..70be81e39a61 100644
--- a/drivers/char/watchdog/wdt.c
+++ b/drivers/char/watchdog/wdt.c
@@ -494,7 +494,7 @@ static int wdt_notify_sys(struct notifier_block *this, unsigned long code,
494 */ 494 */
495 495
496 496
497static struct file_operations wdt_fops = { 497static const struct file_operations wdt_fops = {
498 .owner = THIS_MODULE, 498 .owner = THIS_MODULE,
499 .llseek = no_llseek, 499 .llseek = no_llseek,
500 .write = wdt_write, 500 .write = wdt_write,
@@ -510,7 +510,7 @@ static struct miscdevice wdt_miscdev = {
510}; 510};
511 511
512#ifdef CONFIG_WDT_501 512#ifdef CONFIG_WDT_501
513static struct file_operations wdt_temp_fops = { 513static const struct file_operations wdt_temp_fops = {
514 .owner = THIS_MODULE, 514 .owner = THIS_MODULE,
515 .llseek = no_llseek, 515 .llseek = no_llseek,
516 .read = wdt_temp_read, 516 .read = wdt_temp_read,
diff --git a/drivers/char/watchdog/wdt285.c b/drivers/char/watchdog/wdt285.c
index 52825a1f1779..6555fb844f23 100644
--- a/drivers/char/watchdog/wdt285.c
+++ b/drivers/char/watchdog/wdt285.c
@@ -178,7 +178,7 @@ watchdog_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
178 return ret; 178 return ret;
179} 179}
180 180
181static struct file_operations watchdog_fops = { 181static const struct file_operations watchdog_fops = {
182 .owner = THIS_MODULE, 182 .owner = THIS_MODULE,
183 .llseek = no_llseek, 183 .llseek = no_llseek,
184 .write = watchdog_write, 184 .write = watchdog_write,
diff --git a/drivers/char/watchdog/wdt977.c b/drivers/char/watchdog/wdt977.c
index 3cde2b9bb763..a0935bc775f8 100644
--- a/drivers/char/watchdog/wdt977.c
+++ b/drivers/char/watchdog/wdt977.c
@@ -418,7 +418,7 @@ static int wdt977_notify_sys(struct notifier_block *this, unsigned long code,
418 return NOTIFY_DONE; 418 return NOTIFY_DONE;
419} 419}
420 420
421static struct file_operations wdt977_fops= 421static const struct file_operations wdt977_fops=
422{ 422{
423 .owner = THIS_MODULE, 423 .owner = THIS_MODULE,
424 .llseek = no_llseek, 424 .llseek = no_llseek,
diff --git a/drivers/char/watchdog/wdt_pci.c b/drivers/char/watchdog/wdt_pci.c
index 7529ecdbabae..5918ca2c9c35 100644
--- a/drivers/char/watchdog/wdt_pci.c
+++ b/drivers/char/watchdog/wdt_pci.c
@@ -543,7 +543,7 @@ static int wdtpci_notify_sys(struct notifier_block *this, unsigned long code,
543 */ 543 */
544 544
545 545
546static struct file_operations wdtpci_fops = { 546static const struct file_operations wdtpci_fops = {
547 .owner = THIS_MODULE, 547 .owner = THIS_MODULE,
548 .llseek = no_llseek, 548 .llseek = no_llseek,
549 .write = wdtpci_write, 549 .write = wdtpci_write,
@@ -559,7 +559,7 @@ static struct miscdevice wdtpci_miscdev = {
559}; 559};
560 560
561#ifdef CONFIG_WDT_501_PCI 561#ifdef CONFIG_WDT_501_PCI
562static struct file_operations wdtpci_temp_fops = { 562static const struct file_operations wdtpci_temp_fops = {
563 .owner = THIS_MODULE, 563 .owner = THIS_MODULE,
564 .llseek = no_llseek, 564 .llseek = no_llseek,
565 .read = wdtpci_temp_read, 565 .read = wdtpci_temp_read,
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 6ca3476d02c7..adbe9f76a505 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -838,7 +838,7 @@ static ide_startstop_t idefloppy_pc_intr (ide_drive_t *drive)
838 "transferred\n", pc->actually_transferred); 838 "transferred\n", pc->actually_transferred);
839 clear_bit(PC_DMA_IN_PROGRESS, &pc->flags); 839 clear_bit(PC_DMA_IN_PROGRESS, &pc->flags);
840 840
841 local_irq_enable(); 841 local_irq_enable_in_hardirq();
842 842
843 if (status.b.check || test_bit(PC_DMA_ERROR, &pc->flags)) { 843 if (status.b.check || test_bit(PC_DMA_ERROR, &pc->flags)) {
844 /* Error detected */ 844 /* Error detected */
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 7dba9992ad30..fb6795236e76 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -693,7 +693,7 @@ static ide_startstop_t drive_cmd_intr (ide_drive_t *drive)
693 u8 stat = hwif->INB(IDE_STATUS_REG); 693 u8 stat = hwif->INB(IDE_STATUS_REG);
694 int retries = 10; 694 int retries = 10;
695 695
696 local_irq_enable(); 696 local_irq_enable_in_hardirq();
697 if ((stat & DRQ_STAT) && args && args[3]) { 697 if ((stat & DRQ_STAT) && args && args[3]) {
698 u8 io_32bit = drive->io_32bit; 698 u8 io_32bit = drive->io_32bit;
699 drive->io_32bit = 0; 699 drive->io_32bit = 0;
@@ -1286,7 +1286,7 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
1286 if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq) 1286 if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq)
1287 disable_irq_nosync(hwif->irq); 1287 disable_irq_nosync(hwif->irq);
1288 spin_unlock(&ide_lock); 1288 spin_unlock(&ide_lock);
1289 local_irq_enable(); 1289 local_irq_enable_in_hardirq();
1290 /* allow other IRQs while we start this request */ 1290 /* allow other IRQs while we start this request */
1291 startstop = start_request(drive, rq); 1291 startstop = start_request(drive, rq);
1292 spin_lock_irq(&ide_lock); 1292 spin_lock_irq(&ide_lock);
@@ -1631,7 +1631,7 @@ irqreturn_t ide_intr (int irq, void *dev_id, struct pt_regs *regs)
1631 spin_unlock(&ide_lock); 1631 spin_unlock(&ide_lock);
1632 1632
1633 if (drive->unmask) 1633 if (drive->unmask)
1634 local_irq_enable(); 1634 local_irq_enable_in_hardirq();
1635 /* service this interrupt, may set handler for next interrupt */ 1635 /* service this interrupt, may set handler for next interrupt */
1636 startstop = handler(drive); 1636 startstop = handler(drive);
1637 spin_lock_irq(&ide_lock); 1637 spin_lock_irq(&ide_lock);
@@ -1705,7 +1705,7 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio
1705{ 1705{
1706 unsigned long flags; 1706 unsigned long flags;
1707 ide_hwgroup_t *hwgroup = HWGROUP(drive); 1707 ide_hwgroup_t *hwgroup = HWGROUP(drive);
1708 DECLARE_COMPLETION(wait); 1708 DECLARE_COMPLETION_ONSTACK(wait);
1709 int where = ELEVATOR_INSERT_BACK, err; 1709 int where = ELEVATOR_INSERT_BACK, err;
1710 int must_wait = (action == ide_wait || action == ide_head_wait); 1710 int must_wait = (action == ide_wait || action == ide_head_wait);
1711 1711
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 04547eb0833f..97a9244312fc 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -222,7 +222,7 @@ ide_startstop_t task_no_data_intr (ide_drive_t *drive)
222 ide_hwif_t *hwif = HWIF(drive); 222 ide_hwif_t *hwif = HWIF(drive);
223 u8 stat; 223 u8 stat;
224 224
225 local_irq_enable(); 225 local_irq_enable_in_hardirq();
226 if (!OK_STAT(stat = hwif->INB(IDE_STATUS_REG),READY_STAT,BAD_STAT)) { 226 if (!OK_STAT(stat = hwif->INB(IDE_STATUS_REG),READY_STAT,BAD_STAT)) {
227 return ide_error(drive, "task_no_data_intr", stat); 227 return ide_error(drive, "task_no_data_intr", stat);
228 /* calls ide_end_drive_cmd */ 228 /* calls ide_end_drive_cmd */
diff --git a/drivers/ieee1394/hosts.c b/drivers/ieee1394/hosts.c
index 2c669287f5bd..4feead4a35c5 100644
--- a/drivers/ieee1394/hosts.c
+++ b/drivers/ieee1394/hosts.c
@@ -107,6 +107,14 @@ static int alloc_hostnum_cb(struct hpsb_host *host, void *__data)
107 */ 107 */
108static DEFINE_MUTEX(host_num_alloc); 108static DEFINE_MUTEX(host_num_alloc);
109 109
110/*
111 * The pending_packet_queue is special in that it's processed
112 * from hardirq context too (such as hpsb_bus_reset()). Hence
113 * split the lock class from the usual networking skb-head
114 * lock class by using a separate key for it:
115 */
116static struct lock_class_key pending_packet_queue_key;
117
110struct hpsb_host *hpsb_alloc_host(struct hpsb_host_driver *drv, size_t extra, 118struct hpsb_host *hpsb_alloc_host(struct hpsb_host_driver *drv, size_t extra,
111 struct device *dev) 119 struct device *dev)
112{ 120{
@@ -128,6 +136,8 @@ struct hpsb_host *hpsb_alloc_host(struct hpsb_host_driver *drv, size_t extra,
128 h->driver = drv; 136 h->driver = drv;
129 137
130 skb_queue_head_init(&h->pending_packet_queue); 138 skb_queue_head_init(&h->pending_packet_queue);
139 lockdep_set_class(&h->pending_packet_queue.lock,
140 &pending_packet_queue_key);
131 INIT_LIST_HEAD(&h->addr_space); 141 INIT_LIST_HEAD(&h->addr_space);
132 142
133 for (i = 2; i < 16; i++) 143 for (i = 2; i < 16; i++)
diff --git a/drivers/input/serio/i8042-sparcio.h b/drivers/input/serio/i8042-sparcio.h
index 7d9fafea9615..54adba2d8ed5 100644
--- a/drivers/input/serio/i8042-sparcio.h
+++ b/drivers/input/serio/i8042-sparcio.h
@@ -88,7 +88,7 @@ static struct of_device_id sparc_i8042_match[] = {
88 }, 88 },
89 {}, 89 {},
90}; 90};
91MODULE_DEVICE_TABLE(of, i8042_match); 91MODULE_DEVICE_TABLE(of, sparc_i8042_match);
92 92
93static struct of_platform_driver sparc_i8042_driver = { 93static struct of_platform_driver sparc_i8042_driver = {
94 .name = "i8042", 94 .name = "i8042",
diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c
index 79c97f94bcbd..61a6f977846f 100644
--- a/drivers/input/serio/libps2.c
+++ b/drivers/input/serio/libps2.c
@@ -177,7 +177,7 @@ int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command)
177 return -1; 177 return -1;
178 } 178 }
179 179
180 mutex_lock(&ps2dev->cmd_mutex); 180 mutex_lock_nested(&ps2dev->cmd_mutex, SINGLE_DEPTH_NESTING);
181 181
182 serio_pause_rx(ps2dev->serio); 182 serio_pause_rx(ps2dev->serio);
183 ps2dev->flags = command == PS2_CMD_GETID ? PS2_FLAG_WAITID : 0; 183 ps2dev->flags = command == PS2_CMD_GETID ? PS2_FLAG_WAITID : 0;
diff --git a/drivers/macintosh/macio-adb.c b/drivers/macintosh/macio-adb.c
index 314fc0830d90..4b08852c35ee 100644
--- a/drivers/macintosh/macio-adb.c
+++ b/drivers/macintosh/macio-adb.c
@@ -90,22 +90,12 @@ int macio_init(void)
90{ 90{
91 struct device_node *adbs; 91 struct device_node *adbs;
92 struct resource r; 92 struct resource r;
93 unsigned int irq;
93 94
94 adbs = find_compatible_devices("adb", "chrp,adb0"); 95 adbs = find_compatible_devices("adb", "chrp,adb0");
95 if (adbs == 0) 96 if (adbs == 0)
96 return -ENXIO; 97 return -ENXIO;
97 98
98#if 0
99 { int i = 0;
100
101 printk("macio_adb_init: node = %p, addrs =", adbs->node);
102 while(!of_address_to_resource(adbs, i, &r))
103 printk(" %x(%x)", r.start, r.end - r.start);
104 printk(", intrs =");
105 for (i = 0; i < adbs->n_intrs; ++i)
106 printk(" %x", adbs->intrs[i].line);
107 printk("\n"); }
108#endif
109 if (of_address_to_resource(adbs, 0, &r)) 99 if (of_address_to_resource(adbs, 0, &r))
110 return -ENXIO; 100 return -ENXIO;
111 adb = ioremap(r.start, sizeof(struct adb_regs)); 101 adb = ioremap(r.start, sizeof(struct adb_regs));
@@ -117,10 +107,9 @@ int macio_init(void)
117 out_8(&adb->active_lo.r, 0xff); 107 out_8(&adb->active_lo.r, 0xff);
118 out_8(&adb->autopoll.r, APE); 108 out_8(&adb->autopoll.r, APE);
119 109
120 if (request_irq(adbs->intrs[0].line, macio_adb_interrupt, 110 irq = irq_of_parse_and_map(adbs, 0);
121 0, "ADB", (void *)0)) { 111 if (request_irq(irq, macio_adb_interrupt, 0, "ADB", (void *)0)) {
122 printk(KERN_ERR "ADB: can't get irq %d\n", 112 printk(KERN_ERR "ADB: can't get irq %d\n", irq);
123 adbs->intrs[0].line);
124 return -EAGAIN; 113 return -EAGAIN;
125 } 114 }
126 out_8(&adb->intr_enb.r, DFB | TAG); 115 out_8(&adb->intr_enb.r, DFB | TAG);
diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c
index 40ae7b6a939d..80c0c665b5f6 100644
--- a/drivers/macintosh/macio_asic.c
+++ b/drivers/macintosh/macio_asic.c
@@ -280,75 +280,128 @@ static void macio_release_dev(struct device *dev)
280static int macio_resource_quirks(struct device_node *np, struct resource *res, 280static int macio_resource_quirks(struct device_node *np, struct resource *res,
281 int index) 281 int index)
282{ 282{
283 if (res->flags & IORESOURCE_MEM) { 283 /* Only quirks for memory resources for now */
284 /* Grand Central has too large resource 0 on some machines */ 284 if ((res->flags & IORESOURCE_MEM) == 0)
285 if (index == 0 && !strcmp(np->name, "gc")) 285 return 0;
286 res->end = res->start + 0x1ffff; 286
287 /* Grand Central has too large resource 0 on some machines */
288 if (index == 0 && !strcmp(np->name, "gc"))
289 res->end = res->start + 0x1ffff;
287 290
288 /* Airport has bogus resource 2 */ 291 /* Airport has bogus resource 2 */
289 if (index >= 2 && !strcmp(np->name, "radio")) 292 if (index >= 2 && !strcmp(np->name, "radio"))
290 return 1; 293 return 1;
291 294
292#ifndef CONFIG_PPC64 295#ifndef CONFIG_PPC64
293 /* DBDMAs may have bogus sizes */ 296 /* DBDMAs may have bogus sizes */
294 if ((res->start & 0x0001f000) == 0x00008000) 297 if ((res->start & 0x0001f000) == 0x00008000)
295 res->end = res->start + 0xff; 298 res->end = res->start + 0xff;
296#endif /* CONFIG_PPC64 */ 299#endif /* CONFIG_PPC64 */
297 300
298 /* ESCC parent eats child resources. We could have added a 301 /* ESCC parent eats child resources. We could have added a
299 * level of hierarchy, but I don't really feel the need 302 * level of hierarchy, but I don't really feel the need
300 * for it 303 * for it
301 */ 304 */
302 if (!strcmp(np->name, "escc")) 305 if (!strcmp(np->name, "escc"))
303 return 1; 306 return 1;
304 307
305 /* ESCC has bogus resources >= 3 */ 308 /* ESCC has bogus resources >= 3 */
306 if (index >= 3 && !(strcmp(np->name, "ch-a") && 309 if (index >= 3 && !(strcmp(np->name, "ch-a") &&
307 strcmp(np->name, "ch-b"))) 310 strcmp(np->name, "ch-b")))
308 return 1; 311 return 1;
309 312
310 /* Media bay has too many resources, keep only first one */ 313 /* Media bay has too many resources, keep only first one */
311 if (index > 0 && !strcmp(np->name, "media-bay")) 314 if (index > 0 && !strcmp(np->name, "media-bay"))
312 return 1; 315 return 1;
313 316
314 /* Some older IDE resources have bogus sizes */ 317 /* Some older IDE resources have bogus sizes */
315 if (!(strcmp(np->name, "IDE") && strcmp(np->name, "ATA") && 318 if (!(strcmp(np->name, "IDE") && strcmp(np->name, "ATA") &&
316 strcmp(np->type, "ide") && strcmp(np->type, "ata"))) { 319 strcmp(np->type, "ide") && strcmp(np->type, "ata"))) {
317 if (index == 0 && (res->end - res->start) > 0xfff) 320 if (index == 0 && (res->end - res->start) > 0xfff)
318 res->end = res->start + 0xfff; 321 res->end = res->start + 0xfff;
319 if (index == 1 && (res->end - res->start) > 0xff) 322 if (index == 1 && (res->end - res->start) > 0xff)
320 res->end = res->start + 0xff; 323 res->end = res->start + 0xff;
321 }
322 } 324 }
323 return 0; 325 return 0;
324} 326}
325 327
328static void macio_create_fixup_irq(struct macio_dev *dev, int index,
329 unsigned int line)
330{
331 unsigned int irq;
326 332
327static void macio_setup_interrupts(struct macio_dev *dev) 333 irq = irq_create_mapping(NULL, line, 0);
334 if (irq != NO_IRQ) {
335 dev->interrupt[index].start = irq;
336 dev->interrupt[index].flags = IORESOURCE_IRQ;
337 dev->interrupt[index].name = dev->ofdev.dev.bus_id;
338 }
339 if (dev->n_interrupts <= index)
340 dev->n_interrupts = index + 1;
341}
342
343static void macio_add_missing_resources(struct macio_dev *dev)
328{ 344{
329 struct device_node *np = dev->ofdev.node; 345 struct device_node *np = dev->ofdev.node;
330 int i,j; 346 unsigned int irq_base;
347
348 /* Gatwick has some missing interrupts on child nodes */
349 if (dev->bus->chip->type != macio_gatwick)
350 return;
331 351
332 /* For now, we use pre-parsed entries in the device-tree for 352 /* irq_base is always 64 on gatwick. I have no cleaner way to get
333 * interrupt routing and addresses, but we should change that 353 * that value from here at this point
334 * to dynamically parsed entries and so get rid of most of the
335 * clutter in struct device_node
336 */ 354 */
337 for (i = j = 0; i < np->n_intrs; i++) { 355 irq_base = 64;
356
357 /* Fix SCC */
358 if (strcmp(np->name, "ch-a") == 0) {
359 macio_create_fixup_irq(dev, 0, 15 + irq_base);
360 macio_create_fixup_irq(dev, 1, 4 + irq_base);
361 macio_create_fixup_irq(dev, 2, 5 + irq_base);
362 printk(KERN_INFO "macio: fixed SCC irqs on gatwick\n");
363 }
364
365 /* Fix media-bay */
366 if (strcmp(np->name, "media-bay") == 0) {
367 macio_create_fixup_irq(dev, 0, 29 + irq_base);
368 printk(KERN_INFO "macio: fixed media-bay irq on gatwick\n");
369 }
370
371 /* Fix left media bay childs */
372 if (dev->media_bay != NULL && strcmp(np->name, "floppy") == 0) {
373 macio_create_fixup_irq(dev, 0, 19 + irq_base);
374 macio_create_fixup_irq(dev, 1, 1 + irq_base);
375 printk(KERN_INFO "macio: fixed left floppy irqs\n");
376 }
377 if (dev->media_bay != NULL && strcasecmp(np->name, "ata4") == 0) {
378 macio_create_fixup_irq(dev, 0, 14 + irq_base);
379 macio_create_fixup_irq(dev, 0, 3 + irq_base);
380 printk(KERN_INFO "macio: fixed left ide irqs\n");
381 }
382}
383
384static void macio_setup_interrupts(struct macio_dev *dev)
385{
386 struct device_node *np = dev->ofdev.node;
387 unsigned int irq;
388 int i = 0, j = 0;
389
390 for (;;) {
338 struct resource *res = &dev->interrupt[j]; 391 struct resource *res = &dev->interrupt[j];
339 392
340 if (j >= MACIO_DEV_COUNT_IRQS) 393 if (j >= MACIO_DEV_COUNT_IRQS)
341 break; 394 break;
342 res->start = np->intrs[i].line; 395 irq = irq_of_parse_and_map(np, i++);
343 res->flags = IORESOURCE_IO; 396 if (irq == NO_IRQ)
344 if (np->intrs[j].sense) 397 break;
345 res->flags |= IORESOURCE_IRQ_LOWLEVEL; 398 res->start = irq;
346 else 399 res->flags = IORESOURCE_IRQ;
347 res->flags |= IORESOURCE_IRQ_HIGHEDGE;
348 res->name = dev->ofdev.dev.bus_id; 400 res->name = dev->ofdev.dev.bus_id;
349 if (macio_resource_quirks(np, res, i)) 401 if (macio_resource_quirks(np, res, i - 1)) {
350 memset(res, 0, sizeof(struct resource)); 402 memset(res, 0, sizeof(struct resource));
351 else 403 continue;
404 } else
352 j++; 405 j++;
353 } 406 }
354 dev->n_interrupts = j; 407 dev->n_interrupts = j;
@@ -445,6 +498,7 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip,
445 /* Setup interrupts & resources */ 498 /* Setup interrupts & resources */
446 macio_setup_interrupts(dev); 499 macio_setup_interrupts(dev);
447 macio_setup_resources(dev, parent_res); 500 macio_setup_resources(dev, parent_res);
501 macio_add_missing_resources(dev);
448 502
449 /* Register with core */ 503 /* Register with core */
450 if (of_device_register(&dev->ofdev) != 0) { 504 if (of_device_register(&dev->ofdev) != 0) {
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index ff6d9bfdc3d2..f139a74696fe 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -497,8 +497,7 @@ int __init smu_init (void)
497 smu->doorbell = *data; 497 smu->doorbell = *data;
498 if (smu->doorbell < 0x50) 498 if (smu->doorbell < 0x50)
499 smu->doorbell += 0x50; 499 smu->doorbell += 0x50;
500 if (np->n_intrs > 0) 500 smu->db_irq = irq_of_parse_and_map(np, 0);
501 smu->db_irq = np->intrs[0].line;
502 501
503 of_node_put(np); 502 of_node_put(np);
504 503
@@ -515,8 +514,7 @@ int __init smu_init (void)
515 smu->msg = *data; 514 smu->msg = *data;
516 if (smu->msg < 0x50) 515 if (smu->msg < 0x50)
517 smu->msg += 0x50; 516 smu->msg += 0x50;
518 if (np->n_intrs > 0) 517 smu->msg_irq = irq_of_parse_and_map(np, 0);
519 smu->msg_irq = np->intrs[0].line;
520 of_node_put(np); 518 of_node_put(np);
521 } while(0); 519 } while(0);
522 520
diff --git a/drivers/macintosh/via-cuda.c b/drivers/macintosh/via-cuda.c
index 6501db50fb83..69d5452fd22f 100644
--- a/drivers/macintosh/via-cuda.c
+++ b/drivers/macintosh/via-cuda.c
@@ -34,13 +34,6 @@
34static volatile unsigned char __iomem *via; 34static volatile unsigned char __iomem *via;
35static DEFINE_SPINLOCK(cuda_lock); 35static DEFINE_SPINLOCK(cuda_lock);
36 36
37#ifdef CONFIG_MAC
38#define CUDA_IRQ IRQ_MAC_ADB
39#define eieio()
40#else
41#define CUDA_IRQ vias->intrs[0].line
42#endif
43
44/* VIA registers - spaced 0x200 bytes apart */ 37/* VIA registers - spaced 0x200 bytes apart */
45#define RS 0x200 /* skip between registers */ 38#define RS 0x200 /* skip between registers */
46#define B 0 /* B-side data */ 39#define B 0 /* B-side data */
@@ -189,11 +182,24 @@ int __init find_via_cuda(void)
189 182
190static int __init via_cuda_start(void) 183static int __init via_cuda_start(void)
191{ 184{
185 unsigned int irq;
186
192 if (via == NULL) 187 if (via == NULL)
193 return -ENODEV; 188 return -ENODEV;
194 189
195 if (request_irq(CUDA_IRQ, cuda_interrupt, 0, "ADB", cuda_interrupt)) { 190#ifdef CONFIG_MAC
196 printk(KERN_ERR "cuda_init: can't get irq %d\n", CUDA_IRQ); 191 irq = IRQ_MAC_ADB;
192#else /* CONFIG_MAC */
193 irq = irq_of_parse_and_map(vias, 0);
194 if (irq == NO_IRQ) {
195 printk(KERN_ERR "via-cuda: can't map interrupts for %s\n",
196 vias->full_name);
197 return -ENODEV;
198 }
199#endif /* CONFIG_MAP */
200
201 if (request_irq(irq, cuda_interrupt, 0, "ADB", cuda_interrupt)) {
202 printk(KERN_ERR "via-cuda: can't request irq %d\n", irq);
197 return -EAGAIN; 203 return -EAGAIN;
198 } 204 }
199 205
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index c1193d34ec9e..06ca80bfd6b9 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -64,10 +64,6 @@
64#include <asm/backlight.h> 64#include <asm/backlight.h>
65#endif 65#endif
66 66
67#ifdef CONFIG_PPC32
68#include <asm/open_pic.h>
69#endif
70
71#include "via-pmu-event.h" 67#include "via-pmu-event.h"
72 68
73/* Some compile options */ 69/* Some compile options */
@@ -151,7 +147,7 @@ static int pmu_fully_inited = 0;
151static int pmu_has_adb; 147static int pmu_has_adb;
152static struct device_node *gpio_node; 148static struct device_node *gpio_node;
153static unsigned char __iomem *gpio_reg = NULL; 149static unsigned char __iomem *gpio_reg = NULL;
154static int gpio_irq = -1; 150static int gpio_irq = NO_IRQ;
155static int gpio_irq_enabled = -1; 151static int gpio_irq_enabled = -1;
156static volatile int pmu_suspended = 0; 152static volatile int pmu_suspended = 0;
157static spinlock_t pmu_lock; 153static spinlock_t pmu_lock;
@@ -403,22 +399,21 @@ static int __init pmu_init(void)
403 */ 399 */
404static int __init via_pmu_start(void) 400static int __init via_pmu_start(void)
405{ 401{
402 unsigned int irq;
403
406 if (vias == NULL) 404 if (vias == NULL)
407 return -ENODEV; 405 return -ENODEV;
408 406
409 batt_req.complete = 1; 407 batt_req.complete = 1;
410 408
411#ifndef CONFIG_PPC_MERGE 409 irq = irq_of_parse_and_map(vias, 0);
412 if (pmu_kind == PMU_KEYLARGO_BASED) 410 if (irq == NO_IRQ) {
413 openpic_set_irq_priority(vias->intrs[0].line, 411 printk(KERN_ERR "via-pmu: can't map interruptn");
414 OPENPIC_PRIORITY_DEFAULT + 1); 412 return -ENODEV;
415#endif 413 }
416 414 if (request_irq(irq, via_pmu_interrupt, 0, "VIA-PMU", (void *)0)) {
417 if (request_irq(vias->intrs[0].line, via_pmu_interrupt, 0, "VIA-PMU", 415 printk(KERN_ERR "via-pmu: can't request irq %d\n", irq);
418 (void *)0)) { 416 return -ENODEV;
419 printk(KERN_ERR "VIA-PMU: can't get irq %d\n",
420 vias->intrs[0].line);
421 return -EAGAIN;
422 } 417 }
423 418
424 if (pmu_kind == PMU_KEYLARGO_BASED) { 419 if (pmu_kind == PMU_KEYLARGO_BASED) {
@@ -426,10 +421,10 @@ static int __init via_pmu_start(void)
426 if (gpio_node == NULL) 421 if (gpio_node == NULL)
427 gpio_node = of_find_node_by_name(NULL, 422 gpio_node = of_find_node_by_name(NULL,
428 "pmu-interrupt"); 423 "pmu-interrupt");
429 if (gpio_node && gpio_node->n_intrs > 0) 424 if (gpio_node)
430 gpio_irq = gpio_node->intrs[0].line; 425 gpio_irq = irq_of_parse_and_map(gpio_node, 0);
431 426
432 if (gpio_irq != -1) { 427 if (gpio_irq != NO_IRQ) {
433 if (request_irq(gpio_irq, gpio1_interrupt, 0, 428 if (request_irq(gpio_irq, gpio1_interrupt, 0,
434 "GPIO1 ADB", (void *)0)) 429 "GPIO1 ADB", (void *)0))
435 printk(KERN_ERR "pmu: can't get irq %d" 430 printk(KERN_ERR "pmu: can't get irq %d"
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 2fe32c261922..e4e161372a3e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1404,7 +1404,7 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev)
1404 struct block_device *bdev; 1404 struct block_device *bdev;
1405 char b[BDEVNAME_SIZE]; 1405 char b[BDEVNAME_SIZE];
1406 1406
1407 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); 1407 bdev = open_partition_by_devnum(dev, FMODE_READ|FMODE_WRITE);
1408 if (IS_ERR(bdev)) { 1408 if (IS_ERR(bdev)) {
1409 printk(KERN_ERR "md: could not open %s.\n", 1409 printk(KERN_ERR "md: could not open %s.\n",
1410 __bdevname(dev, b)); 1410 __bdevname(dev, b));
@@ -1414,7 +1414,7 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev)
1414 if (err) { 1414 if (err) {
1415 printk(KERN_ERR "md: could not bd_claim %s.\n", 1415 printk(KERN_ERR "md: could not bd_claim %s.\n",
1416 bdevname(bdev, b)); 1416 bdevname(bdev, b));
1417 blkdev_put(bdev); 1417 blkdev_put_partition(bdev);
1418 return err; 1418 return err;
1419 } 1419 }
1420 rdev->bdev = bdev; 1420 rdev->bdev = bdev;
@@ -1428,7 +1428,7 @@ static void unlock_rdev(mdk_rdev_t *rdev)
1428 if (!bdev) 1428 if (!bdev)
1429 MD_BUG(); 1429 MD_BUG();
1430 bd_release(bdev); 1430 bd_release(bdev);
1431 blkdev_put(bdev); 1431 blkdev_put_partition(bdev);
1432} 1432}
1433 1433
1434void md_autodetect_dev(dev_t dev); 1434void md_autodetect_dev(dev_t dev);
diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c
index 247ff2f23ac9..33525bdf2ab6 100644
--- a/drivers/mmc/mmc.c
+++ b/drivers/mmc/mmc.c
@@ -128,7 +128,7 @@ static void mmc_wait_done(struct mmc_request *mrq)
128 128
129int mmc_wait_for_req(struct mmc_host *host, struct mmc_request *mrq) 129int mmc_wait_for_req(struct mmc_host *host, struct mmc_request *mrq)
130{ 130{
131 DECLARE_COMPLETION(complete); 131 DECLARE_COMPLETION_ONSTACK(complete);
132 132
133 mrq->done_data = &complete; 133 mrq->done_data = &complete;
134 mrq->done = mmc_wait_done; 134 mrq->done = mmc_wait_done;
diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c
index 8ab03b4a885e..2819de79442c 100644
--- a/drivers/net/3c59x.c
+++ b/drivers/net/3c59x.c
@@ -1897,7 +1897,7 @@ vortex_timer(unsigned long data)
1897 printk(KERN_DEBUG "dev->watchdog_timeo=%d\n", dev->watchdog_timeo); 1897 printk(KERN_DEBUG "dev->watchdog_timeo=%d\n", dev->watchdog_timeo);
1898 } 1898 }
1899 1899
1900 disable_irq(dev->irq); 1900 disable_irq_lockdep(dev->irq);
1901 old_window = ioread16(ioaddr + EL3_CMD) >> 13; 1901 old_window = ioread16(ioaddr + EL3_CMD) >> 13;
1902 EL3WINDOW(4); 1902 EL3WINDOW(4);
1903 media_status = ioread16(ioaddr + Wn4_Media); 1903 media_status = ioread16(ioaddr + Wn4_Media);
@@ -1978,7 +1978,7 @@ leave_media_alone:
1978 dev->name, media_tbl[dev->if_port].name); 1978 dev->name, media_tbl[dev->if_port].name);
1979 1979
1980 EL3WINDOW(old_window); 1980 EL3WINDOW(old_window);
1981 enable_irq(dev->irq); 1981 enable_irq_lockdep(dev->irq);
1982 mod_timer(&vp->timer, RUN_AT(next_tick)); 1982 mod_timer(&vp->timer, RUN_AT(next_tick));
1983 if (vp->deferred) 1983 if (vp->deferred)
1984 iowrite16(FakeIntr, ioaddr + EL3_CMD); 1984 iowrite16(FakeIntr, ioaddr + EL3_CMD);
diff --git a/drivers/net/8390.c b/drivers/net/8390.c
index 86be96af9c8f..d2935ae39814 100644
--- a/drivers/net/8390.c
+++ b/drivers/net/8390.c
@@ -249,7 +249,7 @@ void ei_tx_timeout(struct net_device *dev)
249 249
250 /* Ugly but a reset can be slow, yet must be protected */ 250 /* Ugly but a reset can be slow, yet must be protected */
251 251
252 disable_irq_nosync(dev->irq); 252 disable_irq_nosync_lockdep(dev->irq);
253 spin_lock(&ei_local->page_lock); 253 spin_lock(&ei_local->page_lock);
254 254
255 /* Try to restart the card. Perhaps the user has fixed something. */ 255 /* Try to restart the card. Perhaps the user has fixed something. */
@@ -257,7 +257,7 @@ void ei_tx_timeout(struct net_device *dev)
257 NS8390_init(dev, 1); 257 NS8390_init(dev, 1);
258 258
259 spin_unlock(&ei_local->page_lock); 259 spin_unlock(&ei_local->page_lock);
260 enable_irq(dev->irq); 260 enable_irq_lockdep(dev->irq);
261 netif_wake_queue(dev); 261 netif_wake_queue(dev);
262} 262}
263 263
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index 3c90003f4230..037d870712ff 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -2735,21 +2735,21 @@ static void nv_do_nic_poll(unsigned long data)
2735 2735
2736 if (!using_multi_irqs(dev)) { 2736 if (!using_multi_irqs(dev)) {
2737 if (np->msi_flags & NV_MSI_X_ENABLED) 2737 if (np->msi_flags & NV_MSI_X_ENABLED)
2738 disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector); 2738 disable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
2739 else 2739 else
2740 disable_irq(dev->irq); 2740 disable_irq_lockdep(dev->irq);
2741 mask = np->irqmask; 2741 mask = np->irqmask;
2742 } else { 2742 } else {
2743 if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) { 2743 if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) {
2744 disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector); 2744 disable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
2745 mask |= NVREG_IRQ_RX_ALL; 2745 mask |= NVREG_IRQ_RX_ALL;
2746 } 2746 }
2747 if (np->nic_poll_irq & NVREG_IRQ_TX_ALL) { 2747 if (np->nic_poll_irq & NVREG_IRQ_TX_ALL) {
2748 disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector); 2748 disable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector);
2749 mask |= NVREG_IRQ_TX_ALL; 2749 mask |= NVREG_IRQ_TX_ALL;
2750 } 2750 }
2751 if (np->nic_poll_irq & NVREG_IRQ_OTHER) { 2751 if (np->nic_poll_irq & NVREG_IRQ_OTHER) {
2752 disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector); 2752 disable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector);
2753 mask |= NVREG_IRQ_OTHER; 2753 mask |= NVREG_IRQ_OTHER;
2754 } 2754 }
2755 } 2755 }
@@ -2761,23 +2761,23 @@ static void nv_do_nic_poll(unsigned long data)
2761 pci_push(base); 2761 pci_push(base);
2762 2762
2763 if (!using_multi_irqs(dev)) { 2763 if (!using_multi_irqs(dev)) {
2764 nv_nic_irq((int) 0, (void *) data, (struct pt_regs *) NULL); 2764 nv_nic_irq(0, dev, NULL);
2765 if (np->msi_flags & NV_MSI_X_ENABLED) 2765 if (np->msi_flags & NV_MSI_X_ENABLED)
2766 enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector); 2766 enable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
2767 else 2767 else
2768 enable_irq(dev->irq); 2768 enable_irq_lockdep(dev->irq);
2769 } else { 2769 } else {
2770 if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) { 2770 if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) {
2771 nv_nic_irq_rx((int) 0, (void *) data, (struct pt_regs *) NULL); 2771 nv_nic_irq_rx(0, dev, NULL);
2772 enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector); 2772 enable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
2773 } 2773 }
2774 if (np->nic_poll_irq & NVREG_IRQ_TX_ALL) { 2774 if (np->nic_poll_irq & NVREG_IRQ_TX_ALL) {
2775 nv_nic_irq_tx((int) 0, (void *) data, (struct pt_regs *) NULL); 2775 nv_nic_irq_tx(0, dev, NULL);
2776 enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector); 2776 enable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector);
2777 } 2777 }
2778 if (np->nic_poll_irq & NVREG_IRQ_OTHER) { 2778 if (np->nic_poll_irq & NVREG_IRQ_OTHER) {
2779 nv_nic_irq_other((int) 0, (void *) data, (struct pt_regs *) NULL); 2779 nv_nic_irq_other(0, dev, NULL);
2780 enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector); 2780 enable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector);
2781 } 2781 }
2782 } 2782 }
2783} 2783}
diff --git a/drivers/net/mace.c b/drivers/net/mace.c
index f2c0bf89f0c7..29e4b5aa6ead 100644
--- a/drivers/net/mace.c
+++ b/drivers/net/mace.c
@@ -242,12 +242,12 @@ static int __devinit mace_probe(struct macio_dev *mdev, const struct of_device_i
242 } 242 }
243 rc = request_irq(mp->tx_dma_intr, mace_txdma_intr, 0, "MACE-txdma", dev); 243 rc = request_irq(mp->tx_dma_intr, mace_txdma_intr, 0, "MACE-txdma", dev);
244 if (rc) { 244 if (rc) {
245 printk(KERN_ERR "MACE: can't get irq %d\n", mace->intrs[1].line); 245 printk(KERN_ERR "MACE: can't get irq %d\n", mp->tx_dma_intr);
246 goto err_free_irq; 246 goto err_free_irq;
247 } 247 }
248 rc = request_irq(mp->rx_dma_intr, mace_rxdma_intr, 0, "MACE-rxdma", dev); 248 rc = request_irq(mp->rx_dma_intr, mace_rxdma_intr, 0, "MACE-rxdma", dev);
249 if (rc) { 249 if (rc) {
250 printk(KERN_ERR "MACE: can't get irq %d\n", mace->intrs[2].line); 250 printk(KERN_ERR "MACE: can't get irq %d\n", mp->rx_dma_intr);
251 goto err_free_tx_irq; 251 goto err_free_tx_irq;
252 } 252 }
253 253
diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c
index b764cfda6e84..dafaa5ff5aa6 100644
--- a/drivers/net/wireless/hostap/hostap_hw.c
+++ b/drivers/net/wireless/hostap/hostap_hw.c
@@ -3095,6 +3095,14 @@ static void prism2_clear_set_tim_queue(local_info_t *local)
3095} 3095}
3096 3096
3097 3097
3098/*
3099 * HostAP uses two layers of net devices, where the inner
3100 * layer gets called all the time from the outer layer.
3101 * This is a natural nesting, which needs a split lock type.
3102 */
3103static struct lock_class_key hostap_netdev_xmit_lock_key;
3104
3105
3098static struct net_device * 3106static struct net_device *
3099prism2_init_local_data(struct prism2_helper_functions *funcs, int card_idx, 3107prism2_init_local_data(struct prism2_helper_functions *funcs, int card_idx,
3100 struct device *sdev) 3108 struct device *sdev)
@@ -3259,6 +3267,8 @@ while (0)
3259 SET_NETDEV_DEV(dev, sdev); 3267 SET_NETDEV_DEV(dev, sdev);
3260 if (ret >= 0) 3268 if (ret >= 0)
3261 ret = register_netdevice(dev); 3269 ret = register_netdevice(dev);
3270
3271 lockdep_set_class(&dev->_xmit_lock, &hostap_netdev_xmit_lock_key);
3262 rtnl_unlock(); 3272 rtnl_unlock();
3263 if (ret < 0) { 3273 if (ret < 0) {
3264 printk(KERN_WARNING "%s: register netdevice failed!\n", 3274 printk(KERN_WARNING "%s: register netdevice failed!\n",
diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c
index 3a4a644c2686..212268881857 100644
--- a/drivers/pnp/pnpacpi/rsparser.c
+++ b/drivers/pnp/pnpacpi/rsparser.c
@@ -74,7 +74,7 @@ static void decode_irq_flags(int flag, int *triggering, int *polarity)
74 74
75static void 75static void
76pnpacpi_parse_allocated_irqresource(struct pnp_resource_table *res, u32 gsi, 76pnpacpi_parse_allocated_irqresource(struct pnp_resource_table *res, u32 gsi,
77 int triggering, int polarity) 77 int triggering, int polarity, int shareable)
78{ 78{
79 int i = 0; 79 int i = 0;
80 int irq; 80 int irq;
@@ -95,6 +95,9 @@ pnpacpi_parse_allocated_irqresource(struct pnp_resource_table *res, u32 gsi,
95 return; 95 return;
96 } 96 }
97 97
98 if (shareable)
99 res->irq_resource[i].flags |= IORESOURCE_IRQ_SHAREABLE;
100
98 res->irq_resource[i].start = irq; 101 res->irq_resource[i].start = irq;
99 res->irq_resource[i].end = irq; 102 res->irq_resource[i].end = irq;
100 pcibios_penalize_isa_irq(irq, 1); 103 pcibios_penalize_isa_irq(irq, 1);
@@ -194,7 +197,8 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
194 pnpacpi_parse_allocated_irqresource(res_table, 197 pnpacpi_parse_allocated_irqresource(res_table,
195 res->data.irq.interrupts[i], 198 res->data.irq.interrupts[i],
196 res->data.irq.triggering, 199 res->data.irq.triggering,
197 res->data.irq.polarity); 200 res->data.irq.polarity,
201 res->data.irq.sharable);
198 } 202 }
199 break; 203 break;
200 204
@@ -255,7 +259,8 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
255 pnpacpi_parse_allocated_irqresource(res_table, 259 pnpacpi_parse_allocated_irqresource(res_table,
256 res->data.extended_irq.interrupts[i], 260 res->data.extended_irq.interrupts[i],
257 res->data.extended_irq.triggering, 261 res->data.extended_irq.triggering,
258 res->data.extended_irq.polarity); 262 res->data.extended_irq.polarity,
263 res->data.extended_irq.sharable);
259 } 264 }
260 break; 265 break;
261 266
diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c
index 4138564402b8..985d1613baaa 100644
--- a/drivers/s390/char/sclp.c
+++ b/drivers/s390/char/sclp.c
@@ -383,6 +383,7 @@ void
383sclp_sync_wait(void) 383sclp_sync_wait(void)
384{ 384{
385 unsigned long psw_mask; 385 unsigned long psw_mask;
386 unsigned long flags;
386 unsigned long cr0, cr0_sync; 387 unsigned long cr0, cr0_sync;
387 u64 timeout; 388 u64 timeout;
388 389
@@ -395,9 +396,11 @@ sclp_sync_wait(void)
395 sclp_tod_from_jiffies(sclp_request_timer.expires - 396 sclp_tod_from_jiffies(sclp_request_timer.expires -
396 jiffies); 397 jiffies);
397 } 398 }
399 local_irq_save(flags);
398 /* Prevent bottom half from executing once we force interrupts open */ 400 /* Prevent bottom half from executing once we force interrupts open */
399 local_bh_disable(); 401 local_bh_disable();
400 /* Enable service-signal interruption, disable timer interrupts */ 402 /* Enable service-signal interruption, disable timer interrupts */
403 trace_hardirqs_on();
401 __ctl_store(cr0, 0, 0); 404 __ctl_store(cr0, 0, 0);
402 cr0_sync = cr0; 405 cr0_sync = cr0;
403 cr0_sync |= 0x00000200; 406 cr0_sync |= 0x00000200;
@@ -415,11 +418,10 @@ sclp_sync_wait(void)
415 barrier(); 418 barrier();
416 cpu_relax(); 419 cpu_relax();
417 } 420 }
418 /* Restore interrupt settings */ 421 local_irq_disable();
419 asm volatile ("SSM 0(%0)"
420 : : "a" (&psw_mask) : "memory");
421 __ctl_load(cr0, 0, 0); 422 __ctl_load(cr0, 0, 0);
422 __local_bh_enable(); 423 _local_bh_enable();
424 local_irq_restore(flags);
423} 425}
424 426
425EXPORT_SYMBOL(sclp_sync_wait); 427EXPORT_SYMBOL(sclp_sync_wait);
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index a3423267467f..6fec90eab00e 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -147,7 +147,7 @@ cio_tpi(void)
147 sch->driver->irq(&sch->dev); 147 sch->driver->irq(&sch->dev);
148 spin_unlock(&sch->lock); 148 spin_unlock(&sch->lock);
149 irq_exit (); 149 irq_exit ();
150 __local_bh_enable(); 150 _local_bh_enable();
151 return 1; 151 return 1;
152} 152}
153 153
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index 36733b9823c6..8e8963f15731 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -84,6 +84,8 @@ static debug_info_t *qeth_dbf_qerr = NULL;
84 84
85DEFINE_PER_CPU(char[256], qeth_dbf_txt_buf); 85DEFINE_PER_CPU(char[256], qeth_dbf_txt_buf);
86 86
87static struct lock_class_key qdio_out_skb_queue_key;
88
87/** 89/**
88 * some more definitions and declarations 90 * some more definitions and declarations
89 */ 91 */
@@ -3229,6 +3231,9 @@ qeth_alloc_qdio_buffers(struct qeth_card *card)
3229 &card->qdio.out_qs[i]->qdio_bufs[j]; 3231 &card->qdio.out_qs[i]->qdio_bufs[j];
3230 skb_queue_head_init(&card->qdio.out_qs[i]->bufs[j]. 3232 skb_queue_head_init(&card->qdio.out_qs[i]->bufs[j].
3231 skb_list); 3233 skb_list);
3234 lockdep_set_class(
3235 &card->qdio.out_qs[i]->bufs[j].skb_list.lock,
3236 &qdio_out_skb_queue_key);
3232 INIT_LIST_HEAD(&card->qdio.out_qs[i]->bufs[j].ctx_list); 3237 INIT_LIST_HEAD(&card->qdio.out_qs[i]->bufs[j].ctx_list);
3233 } 3238 }
3234 } 3239 }
@@ -5272,6 +5277,7 @@ qeth_free_vlan_buffer(struct qeth_card *card, struct qeth_qdio_out_buffer *buf,
5272 struct sk_buff_head tmp_list; 5277 struct sk_buff_head tmp_list;
5273 5278
5274 skb_queue_head_init(&tmp_list); 5279 skb_queue_head_init(&tmp_list);
5280 lockdep_set_class(&tmp_list.lock, &qdio_out_skb_queue_key);
5275 for(i = 0; i < QETH_MAX_BUFFER_ELEMENTS(card); ++i){ 5281 for(i = 0; i < QETH_MAX_BUFFER_ELEMENTS(card); ++i){
5276 while ((skb = skb_dequeue(&buf->skb_list))){ 5282 while ((skb = skb_dequeue(&buf->skb_list))){
5277 if (vlan_tx_tag_present(skb) && 5283 if (vlan_tx_tag_present(skb) &&
diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c
index 432136f96e64..ffb3677e354f 100644
--- a/drivers/s390/s390mach.c
+++ b/drivers/s390/s390mach.c
@@ -378,6 +378,8 @@ s390_do_machine_check(struct pt_regs *regs)
378 struct mcck_struct *mcck; 378 struct mcck_struct *mcck;
379 int umode; 379 int umode;
380 380
381 lockdep_off();
382
381 mci = (struct mci *) &S390_lowcore.mcck_interruption_code; 383 mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
382 mcck = &__get_cpu_var(cpu_mcck); 384 mcck = &__get_cpu_var(cpu_mcck);
383 umode = user_mode(regs); 385 umode = user_mode(regs);
@@ -482,6 +484,7 @@ s390_do_machine_check(struct pt_regs *regs)
482 mcck->warning = 1; 484 mcck->warning = 1;
483 set_thread_flag(TIF_MCCK_PENDING); 485 set_thread_flag(TIF_MCCK_PENDING);
484 } 486 }
487 lockdep_on();
485} 488}
486 489
487/* 490/*
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index 909731b99d26..8ec8da0beaa8 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -2168,9 +2168,9 @@ zfcp_erp_adapter_strategy_open_fsf_xconfig(struct zfcp_erp_action *erp_action)
2168 atomic_clear_mask(ZFCP_STATUS_ADAPTER_HOST_CON_INIT, 2168 atomic_clear_mask(ZFCP_STATUS_ADAPTER_HOST_CON_INIT,
2169 &adapter->status); 2169 &adapter->status);
2170 ZFCP_LOG_DEBUG("Doing exchange config data\n"); 2170 ZFCP_LOG_DEBUG("Doing exchange config data\n");
2171 write_lock(&adapter->erp_lock); 2171 write_lock_irq(&adapter->erp_lock);
2172 zfcp_erp_action_to_running(erp_action); 2172 zfcp_erp_action_to_running(erp_action);
2173 write_unlock(&adapter->erp_lock); 2173 write_unlock_irq(&adapter->erp_lock);
2174 zfcp_erp_timeout_init(erp_action); 2174 zfcp_erp_timeout_init(erp_action);
2175 if (zfcp_fsf_exchange_config_data(erp_action)) { 2175 if (zfcp_fsf_exchange_config_data(erp_action)) {
2176 retval = ZFCP_ERP_FAILED; 2176 retval = ZFCP_ERP_FAILED;
@@ -2236,9 +2236,9 @@ zfcp_erp_adapter_strategy_open_fsf_xport(struct zfcp_erp_action *erp_action)
2236 adapter = erp_action->adapter; 2236 adapter = erp_action->adapter;
2237 atomic_clear_mask(ZFCP_STATUS_ADAPTER_XPORT_OK, &adapter->status); 2237 atomic_clear_mask(ZFCP_STATUS_ADAPTER_XPORT_OK, &adapter->status);
2238 2238
2239 write_lock(&adapter->erp_lock); 2239 write_lock_irq(&adapter->erp_lock);
2240 zfcp_erp_action_to_running(erp_action); 2240 zfcp_erp_action_to_running(erp_action);
2241 write_unlock(&adapter->erp_lock); 2241 write_unlock_irq(&adapter->erp_lock);
2242 2242
2243 zfcp_erp_timeout_init(erp_action); 2243 zfcp_erp_timeout_init(erp_action);
2244 ret = zfcp_fsf_exchange_port_data(erp_action, adapter, NULL); 2244 ret = zfcp_fsf_exchange_port_data(erp_action, adapter, NULL);
diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c
index 345a191926a4..49ea5add4abc 100644
--- a/drivers/s390/scsi/zfcp_qdio.c
+++ b/drivers/s390/scsi/zfcp_qdio.c
@@ -427,6 +427,7 @@ int
427zfcp_qdio_reqid_check(struct zfcp_adapter *adapter, void *sbale_addr) 427zfcp_qdio_reqid_check(struct zfcp_adapter *adapter, void *sbale_addr)
428{ 428{
429 struct zfcp_fsf_req *fsf_req; 429 struct zfcp_fsf_req *fsf_req;
430 unsigned long flags;
430 431
431 /* invalid (per convention used in this driver) */ 432 /* invalid (per convention used in this driver) */
432 if (unlikely(!sbale_addr)) { 433 if (unlikely(!sbale_addr)) {
@@ -438,15 +439,15 @@ zfcp_qdio_reqid_check(struct zfcp_adapter *adapter, void *sbale_addr)
438 fsf_req = (struct zfcp_fsf_req *) sbale_addr; 439 fsf_req = (struct zfcp_fsf_req *) sbale_addr;
439 440
440 /* serialize with zfcp_fsf_req_dismiss_all */ 441 /* serialize with zfcp_fsf_req_dismiss_all */
441 spin_lock(&adapter->fsf_req_list_lock); 442 spin_lock_irqsave(&adapter->fsf_req_list_lock, flags);
442 if (list_empty(&adapter->fsf_req_list_head)) { 443 if (list_empty(&adapter->fsf_req_list_head)) {
443 spin_unlock(&adapter->fsf_req_list_lock); 444 spin_unlock_irqrestore(&adapter->fsf_req_list_lock, flags);
444 return 0; 445 return 0;
445 } 446 }
446 list_del(&fsf_req->list); 447 list_del(&fsf_req->list);
447 atomic_dec(&adapter->fsf_reqs_active); 448 atomic_dec(&adapter->fsf_reqs_active);
448 spin_unlock(&adapter->fsf_req_list_lock); 449 spin_unlock_irqrestore(&adapter->fsf_req_list_lock, flags);
449 450
450 if (unlikely(adapter != fsf_req->adapter)) { 451 if (unlikely(adapter != fsf_req->adapter)) {
451 ZFCP_LOG_NORMAL("bug: invalid reqid (fsf_req=%p, " 452 ZFCP_LOG_NORMAL("bug: invalid reqid (fsf_req=%p, "
452 "fsf_req->adapter=%p, adapter=%p)\n", 453 "fsf_req->adapter=%p, adapter=%p)\n",
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index 82caba464291..1c960ac1617f 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -1001,7 +1001,7 @@ unsigned ata_exec_internal(struct ata_device *dev,
1001 struct ata_queued_cmd *qc; 1001 struct ata_queued_cmd *qc;
1002 unsigned int tag, preempted_tag; 1002 unsigned int tag, preempted_tag;
1003 u32 preempted_sactive, preempted_qc_active; 1003 u32 preempted_sactive, preempted_qc_active;
1004 DECLARE_COMPLETION(wait); 1004 DECLARE_COMPLETION_ONSTACK(wait);
1005 unsigned long flags; 1005 unsigned long flags;
1006 unsigned int err_mask; 1006 unsigned int err_mask;
1007 int rc; 1007 int rc;
diff --git a/drivers/serial/8250_pnp.c b/drivers/serial/8250_pnp.c
index 739bc84f91e9..632f62d6ec7e 100644
--- a/drivers/serial/8250_pnp.c
+++ b/drivers/serial/8250_pnp.c
@@ -431,6 +431,8 @@ serial_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id)
431#endif 431#endif
432 432
433 port.flags |= UPF_SKIP_TEST | UPF_BOOT_AUTOCONF; 433 port.flags |= UPF_SKIP_TEST | UPF_BOOT_AUTOCONF;
434 if (pnp_irq_flags(dev, 0) & IORESOURCE_IRQ_SHAREABLE)
435 port.flags |= UPF_SHARE_IRQ;
434 port.uartclk = 1843200; 436 port.uartclk = 1843200;
435 port.dev = &dev->dev; 437 port.dev = &dev->dev;
436 438
diff --git a/drivers/serial/pmac_zilog.c b/drivers/serial/pmac_zilog.c
index 459c0231aef3..bfd2a22759eb 100644
--- a/drivers/serial/pmac_zilog.c
+++ b/drivers/serial/pmac_zilog.c
@@ -1443,8 +1443,8 @@ static int __init pmz_init_port(struct uart_pmac_port *uap)
1443 uap->flags &= ~PMACZILOG_FLAG_HAS_DMA; 1443 uap->flags &= ~PMACZILOG_FLAG_HAS_DMA;
1444 goto no_dma; 1444 goto no_dma;
1445 } 1445 }
1446 uap->tx_dma_irq = np->intrs[1].line; 1446 uap->tx_dma_irq = irq_of_parse_and_map(np, 1);
1447 uap->rx_dma_irq = np->intrs[2].line; 1447 uap->rx_dma_irq = irq_of_parse_and_map(np, 2);
1448 } 1448 }
1449no_dma: 1449no_dma:
1450 1450
@@ -1491,7 +1491,7 @@ no_dma:
1491 * Init remaining bits of "port" structure 1491 * Init remaining bits of "port" structure
1492 */ 1492 */
1493 uap->port.iotype = UPIO_MEM; 1493 uap->port.iotype = UPIO_MEM;
1494 uap->port.irq = np->intrs[0].line; 1494 uap->port.irq = irq_of_parse_and_map(np, 0);
1495 uap->port.uartclk = ZS_CLOCK; 1495 uap->port.uartclk = ZS_CLOCK;
1496 uap->port.fifosize = 1; 1496 uap->port.fifosize = 1;
1497 uap->port.ops = &pmz_pops; 1497 uap->port.ops = &pmz_pops;
diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index c54af8774393..95831808334c 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -49,6 +49,12 @@
49 */ 49 */
50static DEFINE_MUTEX(port_mutex); 50static DEFINE_MUTEX(port_mutex);
51 51
52/*
53 * lockdep: port->lock is initialized in two places, but we
54 * want only one lock-class:
55 */
56static struct lock_class_key port_lock_key;
57
52#define HIGH_BITS_OFFSET ((sizeof(long)-sizeof(int))*8) 58#define HIGH_BITS_OFFSET ((sizeof(long)-sizeof(int))*8)
53 59
54#define uart_users(state) ((state)->count + ((state)->info ? (state)->info->blocked_open : 0)) 60#define uart_users(state) ((state)->count + ((state)->info ? (state)->info->blocked_open : 0))
@@ -1865,6 +1871,7 @@ uart_set_options(struct uart_port *port, struct console *co,
1865 * early. 1871 * early.
1866 */ 1872 */
1867 spin_lock_init(&port->lock); 1873 spin_lock_init(&port->lock);
1874 lockdep_set_class(&port->lock, &port_lock_key);
1868 1875
1869 memset(&termios, 0, sizeof(struct termios)); 1876 memset(&termios, 0, sizeof(struct termios));
1870 1877
@@ -2247,8 +2254,10 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *port)
2247 * If this port is a console, then the spinlock is already 2254 * If this port is a console, then the spinlock is already
2248 * initialised. 2255 * initialised.
2249 */ 2256 */
2250 if (!(uart_console(port) && (port->cons->flags & CON_ENABLED))) 2257 if (!(uart_console(port) && (port->cons->flags & CON_ENABLED))) {
2251 spin_lock_init(&port->lock); 2258 spin_lock_init(&port->lock);
2259 lockdep_set_class(&port->lock, &port_lock_key);
2260 }
2252 2261
2253 uart_configure_port(drv, state, port); 2262 uart_configure_port(drv, state, port);
2254 2263
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index ed1cdf6ac8f3..146298ad7371 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -510,7 +510,7 @@ static void spi_complete(void *arg)
510 */ 510 */
511int spi_sync(struct spi_device *spi, struct spi_message *message) 511int spi_sync(struct spi_device *spi, struct spi_message *message)
512{ 512{
513 DECLARE_COMPLETION(done); 513 DECLARE_COMPLETION_ONSTACK(done);
514 int status; 514 int status;
515 515
516 message->complete = spi_complete; 516 message->complete = spi_complete;
diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index e47e3a8ed6e4..f48c3dbc367a 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -200,7 +200,7 @@ static void update_sb(struct super_block *sb)
200 if (!root) 200 if (!root)
201 return; 201 return;
202 202
203 mutex_lock(&root->d_inode->i_mutex); 203 mutex_lock_nested(&root->d_inode->i_mutex, I_MUTEX_PARENT);
204 204
205 list_for_each_entry(bus, &root->d_subdirs, d_u.d_child) { 205 list_for_each_entry(bus, &root->d_subdirs, d_u.d_child) {
206 if (bus->d_inode) { 206 if (bus->d_inode) {
@@ -527,7 +527,7 @@ static void fs_remove_file (struct dentry *dentry)
527 if (!parent || !parent->d_inode) 527 if (!parent || !parent->d_inode)
528 return; 528 return;
529 529
530 mutex_lock(&parent->d_inode->i_mutex); 530 mutex_lock_nested(&parent->d_inode->i_mutex, I_MUTEX_PARENT);
531 if (usbfs_positive(dentry)) { 531 if (usbfs_positive(dentry)) {
532 if (dentry->d_inode) { 532 if (dentry->d_inode) {
533 if (S_ISDIR(dentry->d_inode->i_mode)) 533 if (S_ISDIR(dentry->d_inode->i_mode))
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 17de4c84db69..3badb48d662b 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -1557,6 +1557,21 @@ config FB_S3C2410_DEBUG
1557 Turn on debugging messages. Note that you can set/unset at run time 1557 Turn on debugging messages. Note that you can set/unset at run time
1558 through sysfs 1558 through sysfs
1559 1559
1560config FB_PNX4008_DUM
1561 tristate "Display Update Module support on Philips PNX4008 board"
1562 depends on FB && ARCH_PNX4008
1563 ---help---
1564 Say Y here to enable support for PNX4008 Display Update Module (DUM)
1565
1566config FB_PNX4008_DUM_RGB
1567 tristate "RGB Framebuffer support on Philips PNX4008 board"
1568 depends on FB_PNX4008_DUM
1569 select FB_CFB_FILLRECT
1570 select FB_CFB_COPYAREA
1571 select FB_CFB_IMAGEBLIT
1572 ---help---
1573 Say Y here to enable support for PNX4008 RGB Framebuffer
1574
1560config FB_VIRTUAL 1575config FB_VIRTUAL
1561 tristate "Virtual Frame Buffer support (ONLY FOR TESTING!)" 1576 tristate "Virtual Frame Buffer support (ONLY FOR TESTING!)"
1562 depends on FB 1577 depends on FB
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index c335e9bc3b20..6283d015f8f5 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -94,6 +94,8 @@ obj-$(CONFIG_FB_TX3912) += tx3912fb.o
94obj-$(CONFIG_FB_S1D13XXX) += s1d13xxxfb.o 94obj-$(CONFIG_FB_S1D13XXX) += s1d13xxxfb.o
95obj-$(CONFIG_FB_IMX) += imxfb.o 95obj-$(CONFIG_FB_IMX) += imxfb.o
96obj-$(CONFIG_FB_S3C2410) += s3c2410fb.o 96obj-$(CONFIG_FB_S3C2410) += s3c2410fb.o
97obj-$(CONFIG_FB_PNX4008_DUM) += pnx4008/
98obj-$(CONFIG_FB_PNX4008_DUM_RGB) += pnx4008/
97 99
98# Platform or fallback drivers go here 100# Platform or fallback drivers go here
99obj-$(CONFIG_FB_VESA) += vesafb.o 101obj-$(CONFIG_FB_VESA) += vesafb.o
diff --git a/drivers/video/offb.c b/drivers/video/offb.c
index bfeb11bd4712..71ce1fa45cf4 100644
--- a/drivers/video/offb.c
+++ b/drivers/video/offb.c
@@ -97,14 +97,43 @@ static int offb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
97 u_int transp, struct fb_info *info) 97 u_int transp, struct fb_info *info)
98{ 98{
99 struct offb_par *par = (struct offb_par *) info->par; 99 struct offb_par *par = (struct offb_par *) info->par;
100 int i, depth;
101 u32 *pal = info->pseudo_palette;
100 102
101 if (!par->cmap_adr || regno > 255) 103 depth = info->var.bits_per_pixel;
104 if (depth == 16)
105 depth = (info->var.green.length == 5) ? 15 : 16;
106
107 if (regno > 255 ||
108 (depth == 16 && regno > 63) ||
109 (depth == 15 && regno > 31))
102 return 1; 110 return 1;
103 111
112 if (regno < 16) {
113 switch (depth) {
114 case 15:
115 pal[regno] = (regno << 10) | (regno << 5) | regno;
116 break;
117 case 16:
118 pal[regno] = (regno << 11) | (regno << 5) | regno;
119 break;
120 case 24:
121 pal[regno] = (regno << 16) | (regno << 8) | regno;
122 break;
123 case 32:
124 i = (regno << 8) | regno;
125 pal[regno] = (i << 16) | i;
126 break;
127 }
128 }
129
104 red >>= 8; 130 red >>= 8;
105 green >>= 8; 131 green >>= 8;
106 blue >>= 8; 132 blue >>= 8;
107 133
134 if (!par->cmap_adr)
135 return 0;
136
108 switch (par->cmap_type) { 137 switch (par->cmap_type) {
109 case cmap_m64: 138 case cmap_m64:
110 writeb(regno, par->cmap_adr); 139 writeb(regno, par->cmap_adr);
@@ -141,20 +170,6 @@ static int offb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
141 break; 170 break;
142 } 171 }
143 172
144 if (regno < 16)
145 switch (info->var.bits_per_pixel) {
146 case 16:
147 ((u16 *) (info->pseudo_palette))[regno] =
148 (regno << 10) | (regno << 5) | regno;
149 break;
150 case 32:
151 {
152 int i = (regno << 8) | regno;
153 ((u32 *) (info->pseudo_palette))[regno] =
154 (i << 16) | i;
155 break;
156 }
157 }
158 return 0; 173 return 0;
159} 174}
160 175
@@ -223,81 +238,9 @@ int __init offb_init(void)
223{ 238{
224 struct device_node *dp = NULL, *boot_disp = NULL; 239 struct device_node *dp = NULL, *boot_disp = NULL;
225 240
226#if defined(CONFIG_BOOTX_TEXT) && defined(CONFIG_PPC32)
227 struct device_node *macos_display = NULL;
228#endif
229 if (fb_get_options("offb", NULL)) 241 if (fb_get_options("offb", NULL))
230 return -ENODEV; 242 return -ENODEV;
231 243
232#if defined(CONFIG_BOOTX_TEXT) && defined(CONFIG_PPC32)
233 /* If we're booted from BootX... */
234 if (boot_infos != 0) {
235 unsigned long addr =
236 (unsigned long) boot_infos->dispDeviceBase;
237 u32 *addrp;
238 u64 daddr, dsize;
239 unsigned int flags;
240
241 /* find the device node corresponding to the macos display */
242 while ((dp = of_find_node_by_type(dp, "display"))) {
243 int i;
244
245 /*
246 * Look for an AAPL,address property first.
247 */
248 unsigned int na;
249 unsigned int *ap =
250 (unsigned int *)get_property(dp, "AAPL,address",
251 &na);
252 if (ap != 0) {
253 for (na /= sizeof(unsigned int); na > 0;
254 --na, ++ap)
255 if (*ap <= addr &&
256 addr < *ap + 0x1000000) {
257 macos_display = dp;
258 goto foundit;
259 }
260 }
261
262 /*
263 * See if the display address is in one of the address
264 * ranges for this display.
265 */
266 i = 0;
267 for (;;) {
268 addrp = of_get_address(dp, i++, &dsize, &flags);
269 if (addrp == NULL)
270 break;
271 if (!(flags & IORESOURCE_MEM))
272 continue;
273 daddr = of_translate_address(dp, addrp);
274 if (daddr == OF_BAD_ADDR)
275 continue;
276 if (daddr <= addr && addr < (daddr + dsize)) {
277 macos_display = dp;
278 goto foundit;
279 }
280 }
281 foundit:
282 if (macos_display) {
283 printk(KERN_INFO "MacOS display is %s\n",
284 dp->full_name);
285 break;
286 }
287 }
288
289 /* initialize it */
290 offb_init_fb(macos_display ? macos_display->
291 name : "MacOS display",
292 macos_display ? macos_display->
293 full_name : "MacOS display",
294 boot_infos->dispDeviceRect[2],
295 boot_infos->dispDeviceRect[3],
296 boot_infos->dispDeviceDepth,
297 boot_infos->dispDeviceRowBytes, addr, NULL);
298 }
299#endif /* defined(CONFIG_BOOTX_TEXT) && defined(CONFIG_PPC32) */
300
301 for (dp = NULL; (dp = of_find_node_by_type(dp, "display"));) { 244 for (dp = NULL; (dp = of_find_node_by_type(dp, "display"));) {
302 if (get_property(dp, "linux,opened", NULL) && 245 if (get_property(dp, "linux,opened", NULL) &&
303 get_property(dp, "linux,boot-display", NULL)) { 246 get_property(dp, "linux,boot-display", NULL)) {
@@ -317,94 +260,93 @@ int __init offb_init(void)
317 260
318static void __init offb_init_nodriver(struct device_node *dp) 261static void __init offb_init_nodriver(struct device_node *dp)
319{ 262{
320 int *pp, i;
321 unsigned int len; 263 unsigned int len;
322 int width = 640, height = 480, depth = 8, pitch; 264 int i, width = 640, height = 480, depth = 8, pitch = 640;
323 unsigned int flags, rsize, *up; 265 unsigned int flags, rsize, addr_prop = 0;
324 u64 address = OF_BAD_ADDR; 266 unsigned long max_size = 0;
325 u32 *addrp; 267 u64 rstart, address = OF_BAD_ADDR;
268 u32 *pp, *addrp, *up;
326 u64 asize; 269 u64 asize;
327 270
328 if ((pp = (int *) get_property(dp, "depth", &len)) != NULL 271 pp = (u32 *)get_property(dp, "linux,bootx-depth", &len);
329 && len == sizeof(int)) 272 if (pp == NULL)
273 pp = (u32 *)get_property(dp, "depth", &len);
274 if (pp && len == sizeof(u32))
330 depth = *pp; 275 depth = *pp;
331 if ((pp = (int *) get_property(dp, "width", &len)) != NULL 276
332 && len == sizeof(int)) 277 pp = (u32 *)get_property(dp, "linux,bootx-width", &len);
278 if (pp == NULL)
279 pp = (u32 *)get_property(dp, "width", &len);
280 if (pp && len == sizeof(u32))
333 width = *pp; 281 width = *pp;
334 if ((pp = (int *) get_property(dp, "height", &len)) != NULL 282
335 && len == sizeof(int)) 283 pp = (u32 *)get_property(dp, "linux,bootx-height", &len);
284 if (pp == NULL)
285 pp = (u32 *)get_property(dp, "height", &len);
286 if (pp && len == sizeof(u32))
336 height = *pp; 287 height = *pp;
337 if ((pp = (int *) get_property(dp, "linebytes", &len)) != NULL 288
338 && len == sizeof(int)) { 289 pp = (u32 *)get_property(dp, "linux,bootx-linebytes", &len);
290 if (pp == NULL)
291 pp = (u32 *)get_property(dp, "linebytes", &len);
292 if (pp && len == sizeof(u32))
339 pitch = *pp; 293 pitch = *pp;
340 if (pitch == 1) 294 else
341 pitch = 0x1000; 295 pitch = width * ((depth + 7) / 8);
342 } else 296
343 pitch = width; 297 rsize = (unsigned long)pitch * (unsigned long)height;
344 298
345 rsize = (unsigned long)pitch * (unsigned long)height * 299 /* Ok, now we try to figure out the address of the framebuffer.
346 (unsigned long)(depth / 8); 300 *
347 301 * Unfortunately, Open Firmware doesn't provide a standard way to do
348 /* Try to match device to a PCI device in order to get a properly 302 * so. All we can do is a dodgy heuristic that happens to work in
349 * translated address rather then trying to decode the open firmware 303 * practice. On most machines, the "address" property contains what
350 * stuff in various incorrect ways 304 * we need, though not on Matrox cards found in IBM machines. What I've
351 */ 305 * found that appears to give good results is to go through the PCI
352#ifdef CONFIG_PCI 306 * ranges and pick one that is both big enough and if possible encloses
353 /* First try to locate the PCI device if any */ 307 * the "address" property. If none match, we pick the biggest
354 { 308 */
355 struct pci_dev *pdev = NULL; 309 up = (u32 *)get_property(dp, "linux,bootx-addr", &len);
356 310 if (up == NULL)
357 for_each_pci_dev(pdev) { 311 up = (u32 *)get_property(dp, "address", &len);
358 if (dp == pci_device_to_OF_node(pdev)) 312 if (up && len == sizeof(u32))
359 break; 313 addr_prop = *up;
360 } 314
361 if (pdev) { 315 for (i = 0; (addrp = of_get_address(dp, i, &asize, &flags))
362 for (i = 0; i < 6 && address == OF_BAD_ADDR; i++) { 316 != NULL; i++) {
363 if ((pci_resource_flags(pdev, i) & 317 int match_addrp = 0;
364 IORESOURCE_MEM) && 318
365 (pci_resource_len(pdev, i) >= rsize)) 319 if (!(flags & IORESOURCE_MEM))
366 address = pci_resource_start(pdev, i); 320 continue;
367 } 321 if (asize < rsize)
368 pci_dev_put(pdev); 322 continue;
369 } 323 rstart = of_translate_address(dp, addrp);
370 } 324 if (rstart == OF_BAD_ADDR)
371#endif /* CONFIG_PCI */ 325 continue;
372 326 if (addr_prop && (rstart <= addr_prop) &&
373 /* This one is dodgy, we may drop it ... */ 327 ((rstart + asize) >= (addr_prop + rsize)))
374 if (address == OF_BAD_ADDR && 328 match_addrp = 1;
375 (up = (unsigned *) get_property(dp, "address", &len)) != NULL && 329 if (match_addrp) {
376 len == sizeof(unsigned int)) 330 address = addr_prop;
377 address = (u64) * up; 331 break;
378
379 if (address == OF_BAD_ADDR) {
380 for (i = 0; (addrp = of_get_address(dp, i, &asize, &flags))
381 != NULL; i++) {
382 if (!(flags & IORESOURCE_MEM))
383 continue;
384 if (asize >= pitch * height * depth / 8)
385 break;
386 }
387 if (addrp == NULL) {
388 printk(KERN_ERR
389 "no framebuffer address found for %s\n",
390 dp->full_name);
391 return;
392 }
393 address = of_translate_address(dp, addrp);
394 if (address == OF_BAD_ADDR) {
395 printk(KERN_ERR
396 "can't translate framebuffer address for %s\n",
397 dp->full_name);
398 return;
399 } 332 }
333 if (rsize > max_size) {
334 max_size = rsize;
335 address = OF_BAD_ADDR;
336 }
400 337
338 if (address == OF_BAD_ADDR)
339 address = rstart;
340 }
341 if (address == OF_BAD_ADDR && addr_prop)
342 address = (u64)addr_prop;
343 if (address != OF_BAD_ADDR) {
401 /* kludge for valkyrie */ 344 /* kludge for valkyrie */
402 if (strcmp(dp->name, "valkyrie") == 0) 345 if (strcmp(dp->name, "valkyrie") == 0)
403 address += 0x1000; 346 address += 0x1000;
347 offb_init_fb(dp->name, dp->full_name, width, height, depth,
348 pitch, address, dp);
404 } 349 }
405 offb_init_fb(dp->name, dp->full_name, width, height, depth,
406 pitch, address, dp);
407
408} 350}
409 351
410static void __init offb_init_fb(const char *name, const char *full_name, 352static void __init offb_init_fb(const char *name, const char *full_name,
@@ -412,7 +354,7 @@ static void __init offb_init_fb(const char *name, const char *full_name,
412 int pitch, unsigned long address, 354 int pitch, unsigned long address,
413 struct device_node *dp) 355 struct device_node *dp)
414{ 356{
415 unsigned long res_size = pitch * height * depth / 8; 357 unsigned long res_size = pitch * height * (depth + 7) / 8;
416 struct offb_par *par = &default_par; 358 struct offb_par *par = &default_par;
417 unsigned long res_start = address; 359 unsigned long res_start = address;
418 struct fb_fix_screeninfo *fix; 360 struct fb_fix_screeninfo *fix;
@@ -426,7 +368,7 @@ static void __init offb_init_fb(const char *name, const char *full_name,
426 printk(KERN_INFO 368 printk(KERN_INFO
427 "Using unsupported %dx%d %s at %lx, depth=%d, pitch=%d\n", 369 "Using unsupported %dx%d %s at %lx, depth=%d, pitch=%d\n",
428 width, height, name, address, depth, pitch); 370 width, height, name, address, depth, pitch);
429 if (depth != 8 && depth != 16 && depth != 32) { 371 if (depth != 8 && depth != 15 && depth != 16 && depth != 32) {
430 printk(KERN_ERR "%s: can't use depth = %d\n", full_name, 372 printk(KERN_ERR "%s: can't use depth = %d\n", full_name,
431 depth); 373 depth);
432 release_mem_region(res_start, res_size); 374 release_mem_region(res_start, res_size);
@@ -502,7 +444,6 @@ static void __init offb_init_fb(const char *name, const char *full_name,
502 : */ FB_VISUAL_TRUECOLOR; 444 : */ FB_VISUAL_TRUECOLOR;
503 445
504 var->xoffset = var->yoffset = 0; 446 var->xoffset = var->yoffset = 0;
505 var->bits_per_pixel = depth;
506 switch (depth) { 447 switch (depth) {
507 case 8: 448 case 8:
508 var->bits_per_pixel = 8; 449 var->bits_per_pixel = 8;
@@ -515,7 +456,7 @@ static void __init offb_init_fb(const char *name, const char *full_name,
515 var->transp.offset = 0; 456 var->transp.offset = 0;
516 var->transp.length = 0; 457 var->transp.length = 0;
517 break; 458 break;
518 case 16: /* RGB 555 */ 459 case 15: /* RGB 555 */
519 var->bits_per_pixel = 16; 460 var->bits_per_pixel = 16;
520 var->red.offset = 10; 461 var->red.offset = 10;
521 var->red.length = 5; 462 var->red.length = 5;
@@ -526,6 +467,17 @@ static void __init offb_init_fb(const char *name, const char *full_name,
526 var->transp.offset = 0; 467 var->transp.offset = 0;
527 var->transp.length = 0; 468 var->transp.length = 0;
528 break; 469 break;
470 case 16: /* RGB 565 */
471 var->bits_per_pixel = 16;
472 var->red.offset = 11;
473 var->red.length = 5;
474 var->green.offset = 5;
475 var->green.length = 6;
476 var->blue.offset = 0;
477 var->blue.length = 5;
478 var->transp.offset = 0;
479 var->transp.length = 0;
480 break;
529 case 32: /* RGB 888 */ 481 case 32: /* RGB 888 */
530 var->bits_per_pixel = 32; 482 var->bits_per_pixel = 32;
531 var->red.offset = 16; 483 var->red.offset = 16;
diff --git a/drivers/video/pnx4008/Makefile b/drivers/video/pnx4008/Makefile
new file mode 100644
index 000000000000..636aaccf01fd
--- /dev/null
+++ b/drivers/video/pnx4008/Makefile
@@ -0,0 +1,7 @@
1#
2# Makefile for the new PNX4008 framebuffer device driver
3#
4
5obj-$(CONFIG_FB_PNX4008_DUM) += sdum.o
6obj-$(CONFIG_FB_PNX4008_DUM_RGB) += pnxrgbfb.o
7
diff --git a/drivers/video/pnx4008/dum.h b/drivers/video/pnx4008/dum.h
new file mode 100644
index 000000000000..d80a614d89ed
--- /dev/null
+++ b/drivers/video/pnx4008/dum.h
@@ -0,0 +1,211 @@
1/*
2 * linux/drivers/video/pnx4008/dum.h
3 *
4 * Internal header for SDUM
5 *
6 * 2005 (c) Koninklijke Philips N.V. This file is licensed under
7 * the terms of the GNU General Public License version 2. This program
8 * is licensed "as is" without any warranty of any kind, whether express
9 * or implied.
10 */
11
12#ifndef __PNX008_DUM_H__
13#define __PNX008_DUM_H__
14
15#include <asm/arch/platform.h>
16
17#define PNX4008_DUMCONF_VA_BASE IO_ADDRESS(PNX4008_DUMCONF_BASE)
18#define PNX4008_DUM_MAIN_VA_BASE IO_ADDRESS(PNX4008_DUM_MAINCFG_BASE)
19
20/* DUM CFG ADDRESSES */
21#define DUM_CH_BASE_ADR (PNX4008_DUMCONF_VA_BASE + 0x00)
22#define DUM_CH_MIN_ADR (PNX4008_DUMCONF_VA_BASE + 0x00)
23#define DUM_CH_MAX_ADR (PNX4008_DUMCONF_VA_BASE + 0x04)
24#define DUM_CH_CONF_ADR (PNX4008_DUMCONF_VA_BASE + 0x08)
25#define DUM_CH_STAT_ADR (PNX4008_DUMCONF_VA_BASE + 0x0C)
26#define DUM_CH_CTRL_ADR (PNX4008_DUMCONF_VA_BASE + 0x10)
27
28#define CH_MARG (0x100 / sizeof(u32))
29#define DUM_CH_MIN(i) (*((volatile u32 *)DUM_CH_MIN_ADR + (i) * CH_MARG))
30#define DUM_CH_MAX(i) (*((volatile u32 *)DUM_CH_MAX_ADR + (i) * CH_MARG))
31#define DUM_CH_CONF(i) (*((volatile u32 *)DUM_CH_CONF_ADR + (i) * CH_MARG))
32#define DUM_CH_STAT(i) (*((volatile u32 *)DUM_CH_STAT_ADR + (i) * CH_MARG))
33#define DUM_CH_CTRL(i) (*((volatile u32 *)DUM_CH_CTRL_ADR + (i) * CH_MARG))
34
35#define DUM_CONF_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x00)
36#define DUM_CTRL_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x04)
37#define DUM_STAT_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x08)
38#define DUM_DECODE_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x0C)
39#define DUM_COM_BASE_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x10)
40#define DUM_SYNC_C_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x14)
41#define DUM_CLK_DIV_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x18)
42#define DUM_DIRTY_LOW_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x20)
43#define DUM_DIRTY_HIGH_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x24)
44#define DUM_FORMAT_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x28)
45#define DUM_WTCFG1_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x30)
46#define DUM_RTCFG1_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x34)
47#define DUM_WTCFG2_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x38)
48#define DUM_RTCFG2_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x3C)
49#define DUM_TCFG_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x40)
50#define DUM_OUTP_FORMAT1_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x44)
51#define DUM_OUTP_FORMAT2_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x48)
52#define DUM_SYNC_MODE_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x4C)
53#define DUM_SYNC_OUT_C_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x50)
54
55#define DUM_CONF (*(volatile u32 *)(DUM_CONF_ADR))
56#define DUM_CTRL (*(volatile u32 *)(DUM_CTRL_ADR))
57#define DUM_STAT (*(volatile u32 *)(DUM_STAT_ADR))
58#define DUM_DECODE (*(volatile u32 *)(DUM_DECODE_ADR))
59#define DUM_COM_BASE (*(volatile u32 *)(DUM_COM_BASE_ADR))
60#define DUM_SYNC_C (*(volatile u32 *)(DUM_SYNC_C_ADR))
61#define DUM_CLK_DIV (*(volatile u32 *)(DUM_CLK_DIV_ADR))
62#define DUM_DIRTY_LOW (*(volatile u32 *)(DUM_DIRTY_LOW_ADR))
63#define DUM_DIRTY_HIGH (*(volatile u32 *)(DUM_DIRTY_HIGH_ADR))
64#define DUM_FORMAT (*(volatile u32 *)(DUM_FORMAT_ADR))
65#define DUM_WTCFG1 (*(volatile u32 *)(DUM_WTCFG1_ADR))
66#define DUM_RTCFG1 (*(volatile u32 *)(DUM_RTCFG1_ADR))
67#define DUM_WTCFG2 (*(volatile u32 *)(DUM_WTCFG2_ADR))
68#define DUM_RTCFG2 (*(volatile u32 *)(DUM_RTCFG2_ADR))
69#define DUM_TCFG (*(volatile u32 *)(DUM_TCFG_ADR))
70#define DUM_OUTP_FORMAT1 (*(volatile u32 *)(DUM_OUTP_FORMAT1_ADR))
71#define DUM_OUTP_FORMAT2 (*(volatile u32 *)(DUM_OUTP_FORMAT2_ADR))
72#define DUM_SYNC_MODE (*(volatile u32 *)(DUM_SYNC_MODE_ADR))
73#define DUM_SYNC_OUT_C (*(volatile u32 *)(DUM_SYNC_OUT_C_ADR))
74
75/* DUM SLAVE ADDRESSES */
76#define DUM_SLAVE_WRITE_ADR (PNX4008_DUM_MAINCFG_BASE + 0x0000000)
77#define DUM_SLAVE_READ1_I_ADR (PNX4008_DUM_MAINCFG_BASE + 0x1000000)
78#define DUM_SLAVE_READ1_R_ADR (PNX4008_DUM_MAINCFG_BASE + 0x1000004)
79#define DUM_SLAVE_READ2_I_ADR (PNX4008_DUM_MAINCFG_BASE + 0x1000008)
80#define DUM_SLAVE_READ2_R_ADR (PNX4008_DUM_MAINCFG_BASE + 0x100000C)
81
82#define DUM_SLAVE_WRITE_W ((volatile u32 *)(DUM_SLAVE_WRITE_ADR))
83#define DUM_SLAVE_WRITE_HW ((volatile u16 *)(DUM_SLAVE_WRITE_ADR))
84#define DUM_SLAVE_READ1_I ((volatile u8 *)(DUM_SLAVE_READ1_I_ADR))
85#define DUM_SLAVE_READ1_R ((volatile u16 *)(DUM_SLAVE_READ1_R_ADR))
86#define DUM_SLAVE_READ2_I ((volatile u8 *)(DUM_SLAVE_READ2_I_ADR))
87#define DUM_SLAVE_READ2_R ((volatile u16 *)(DUM_SLAVE_READ2_R_ADR))
88
89/* Sony display register addresses */
90#define DISP_0_REG (0x00)
91#define DISP_1_REG (0x01)
92#define DISP_CAL_REG (0x20)
93#define DISP_ID_REG (0x2A)
94#define DISP_XMIN_L_REG (0x30)
95#define DISP_XMIN_H_REG (0x31)
96#define DISP_YMIN_REG (0x32)
97#define DISP_XMAX_L_REG (0x34)
98#define DISP_XMAX_H_REG (0x35)
99#define DISP_YMAX_REG (0x36)
100#define DISP_SYNC_EN_REG (0x38)
101#define DISP_SYNC_RISE_L_REG (0x3C)
102#define DISP_SYNC_RISE_H_REG (0x3D)
103#define DISP_SYNC_FALL_L_REG (0x3E)
104#define DISP_SYNC_FALL_H_REG (0x3F)
105#define DISP_PIXEL_REG (0x0B)
106#define DISP_DUMMY1_REG (0x28)
107#define DISP_DUMMY2_REG (0x29)
108#define DISP_TIMING_REG (0x98)
109#define DISP_DUMP_REG (0x99)
110
111/* Sony display constants */
112#define SONY_ID1 (0x22)
113#define SONY_ID2 (0x23)
114
115/* Philips display register addresses */
116#define PH_DISP_ORIENT_REG (0x003)
117#define PH_DISP_YPOINT_REG (0x200)
118#define PH_DISP_XPOINT_REG (0x201)
119#define PH_DISP_PIXEL_REG (0x202)
120#define PH_DISP_YMIN_REG (0x406)
121#define PH_DISP_YMAX_REG (0x407)
122#define PH_DISP_XMIN_REG (0x408)
123#define PH_DISP_XMAX_REG (0x409)
124
125/* Misc constants */
126#define NO_VALID_DISPLAY_FOUND (0)
127#define DISPLAY2_IS_NOT_CONNECTED (0)
128
129/* register values */
130#define V_BAC_ENABLE (BIT(0))
131#define V_BAC_DISABLE_IDLE (BIT(1))
132#define V_BAC_DISABLE_TRIG (BIT(2))
133#define V_DUM_RESET (BIT(3))
134#define V_MUX_RESET (BIT(4))
135#define BAC_ENABLED (BIT(0))
136#define BAC_DISABLED 0
137
138/* Sony LCD commands */
139#define V_LCD_STANDBY_OFF ((BIT(25)) | (0 << 16) | DISP_0_REG)
140#define V_LCD_USE_9BIT_BUS ((BIT(25)) | (2 << 16) | DISP_1_REG)
141#define V_LCD_SYNC_RISE_L ((BIT(25)) | (0 << 16) | DISP_SYNC_RISE_L_REG)
142#define V_LCD_SYNC_RISE_H ((BIT(25)) | (0 << 16) | DISP_SYNC_RISE_H_REG)
143#define V_LCD_SYNC_FALL_L ((BIT(25)) | (160 << 16) | DISP_SYNC_FALL_L_REG)
144#define V_LCD_SYNC_FALL_H ((BIT(25)) | (0 << 16) | DISP_SYNC_FALL_H_REG)
145#define V_LCD_SYNC_ENABLE ((BIT(25)) | (128 << 16) | DISP_SYNC_EN_REG)
146#define V_LCD_DISPLAY_ON ((BIT(25)) | (64 << 16) | DISP_0_REG)
147
148enum {
149 PAD_NONE,
150 PAD_512,
151 PAD_1024
152};
153
154enum {
155 RGB888,
156 RGB666,
157 RGB565,
158 BGR565,
159 ARGB1555,
160 ABGR1555,
161 ARGB4444,
162 ABGR4444
163};
164
165struct dum_setup {
166 int sync_neg_edge;
167 int round_robin;
168 int mux_int;
169 int synced_dirty_flag_int;
170 int dirty_flag_int;
171 int error_int;
172 int pf_empty_int;
173 int sf_empty_int;
174 int bac_dis_int;
175 u32 dirty_base_adr;
176 u32 command_base_adr;
177 u32 sync_clk_div;
178 int sync_output;
179 u32 sync_restart_val;
180 u32 set_sync_high;
181 u32 set_sync_low;
182};
183
184struct dum_ch_setup {
185 int disp_no;
186 u32 xmin;
187 u32 ymin;
188 u32 xmax;
189 u32 ymax;
190 int xmirror;
191 int ymirror;
192 int rotate;
193 u32 minadr;
194 u32 maxadr;
195 u32 dirtybuffer;
196 int pad;
197 int format;
198 int hwdirty;
199 int slave_trans;
200};
201
202struct disp_window {
203 u32 xmin_l;
204 u32 xmin_h;
205 u32 ymin;
206 u32 xmax_l;
207 u32 xmax_h;
208 u32 ymax;
209};
210
211#endif /* #ifndef __PNX008_DUM_H__ */
diff --git a/drivers/video/pnx4008/fbcommon.h b/drivers/video/pnx4008/fbcommon.h
new file mode 100644
index 000000000000..4ebc87dafafb
--- /dev/null
+++ b/drivers/video/pnx4008/fbcommon.h
@@ -0,0 +1,43 @@
1/*
2 * Copyright (C) 2005 Philips Semiconductors
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; see the file COPYING. If not, write to
16 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 02111-1307, USA, or http://www.gnu.org/licenses/gpl.html
18*/
19
20#define QCIF_W (176)
21#define QCIF_H (144)
22
23#define CIF_W (352)
24#define CIF_H (288)
25
26#define LCD_X_RES 208
27#define LCD_Y_RES 320
28#define LCD_X_PAD 256
29#define LCD_BBP 4 /* Bytes Per Pixel */
30
31#define DISP_MAX_X_SIZE (320)
32#define DISP_MAX_Y_SIZE (208)
33
34#define RETURNVAL_BASE (0x400)
35
36enum fb_ioctl_returntype {
37 ENORESOURCESLEFT = RETURNVAL_BASE,
38 ERESOURCESNOTFREED,
39 EPROCNOTOWNER,
40 EFBNOTOWNER,
41 ECOPYFAILED,
42 EIOREMAPFAILED,
43};
diff --git a/drivers/video/pnx4008/pnxrgbfb.c b/drivers/video/pnx4008/pnxrgbfb.c
new file mode 100644
index 000000000000..7d9453c91a42
--- /dev/null
+++ b/drivers/video/pnx4008/pnxrgbfb.c
@@ -0,0 +1,213 @@
1/*
2 * drivers/video/pnx4008/pnxrgbfb.c
3 *
4 * PNX4008's framebuffer support
5 *
6 * Author: Grigory Tolstolytkin <gtolstolytkin@ru.mvista.com>
7 * Based on Philips Semiconductors's code
8 *
9 * Copyrght (c) 2005 MontaVista Software, Inc.
10 * Copyright (c) 2005 Philips Semiconductors
11 * This file is licensed under the terms of the GNU General Public License
12 * version 2. This program is licensed "as is" without any warranty of any
13 * kind, whether express or implied.
14 */
15
16#include <linux/module.h>
17#include <linux/kernel.h>
18#include <linux/errno.h>
19#include <linux/string.h>
20#include <linux/mm.h>
21#include <linux/slab.h>
22#include <linux/vmalloc.h>
23#include <linux/delay.h>
24#include <linux/interrupt.h>
25#include <linux/fb.h>
26#include <linux/init.h>
27#include <linux/platform_device.h>
28
29#include <asm/uaccess.h>
30#include "sdum.h"
31#include "fbcommon.h"
32
33static u32 colreg[16];
34
35static struct fb_var_screeninfo rgbfb_var __initdata = {
36 .xres = LCD_X_RES,
37 .yres = LCD_Y_RES,
38 .xres_virtual = LCD_X_RES,
39 .yres_virtual = LCD_Y_RES,
40 .bits_per_pixel = 32,
41 .red.offset = 16,
42 .red.length = 8,
43 .green.offset = 8,
44 .green.length = 8,
45 .blue.offset = 0,
46 .blue.length = 8,
47 .left_margin = 0,
48 .right_margin = 0,
49 .upper_margin = 0,
50 .lower_margin = 0,
51 .vmode = FB_VMODE_NONINTERLACED,
52};
53static struct fb_fix_screeninfo rgbfb_fix __initdata = {
54 .id = "RGBFB",
55 .line_length = LCD_X_RES * LCD_BBP,
56 .type = FB_TYPE_PACKED_PIXELS,
57 .visual = FB_VISUAL_TRUECOLOR,
58 .xpanstep = 0,
59 .ypanstep = 0,
60 .ywrapstep = 0,
61 .accel = FB_ACCEL_NONE,
62};
63
64static int channel_owned;
65
66static int no_cursor(struct fb_info *info, struct fb_cursor *cursor)
67{
68 return 0;
69}
70
71static int rgbfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
72 u_int transp, struct fb_info *info)
73{
74 if (regno > 15)
75 return 1;
76
77 colreg[regno] = ((red & 0xff00) << 8) | (green & 0xff00) |
78 ((blue & 0xff00) >> 8);
79 return 0;
80}
81
82static int rgbfb_mmap(struct fb_info *info, struct vm_area_struct *vma)
83{
84 return pnx4008_sdum_mmap(info, vma, NULL);
85}
86
87static struct fb_ops rgbfb_ops = {
88 .fb_mmap = rgbfb_mmap,
89 .fb_setcolreg = rgbfb_setcolreg,
90 .fb_fillrect = cfb_fillrect,
91 .fb_copyarea = cfb_copyarea,
92 .fb_imageblit = cfb_imageblit,
93};
94
95static int rgbfb_remove(struct platform_device *pdev)
96{
97 struct fb_info *info = platform_get_drvdata(pdev);
98
99 if (info) {
100 unregister_framebuffer(info);
101 fb_dealloc_cmap(&info->cmap);
102 framebuffer_release(info);
103 platform_set_drvdata(pdev, NULL);
104 kfree(info);
105 }
106
107 pnx4008_free_dum_channel(channel_owned, pdev->id);
108 pnx4008_set_dum_exit_notification(pdev->id);
109
110 return 0;
111}
112
113static int __devinit rgbfb_probe(struct platform_device *pdev)
114{
115 struct fb_info *info;
116 struct dumchannel_uf chan_uf;
117 int ret;
118 char *option;
119
120 info = framebuffer_alloc(sizeof(u32) * 16, &pdev->dev);
121 if (!info) {
122 ret = -ENOMEM;
123 goto err;
124 }
125
126 pnx4008_get_fb_addresses(FB_TYPE_RGB, (void **)&info->screen_base,
127 (dma_addr_t *) &rgbfb_fix.smem_start,
128 &rgbfb_fix.smem_len);
129
130 if ((ret = pnx4008_alloc_dum_channel(pdev->id)) < 0)
131 goto err0;
132 else {
133 channel_owned = ret;
134 chan_uf.channelnr = channel_owned;
135 chan_uf.dirty = (u32 *) NULL;
136 chan_uf.source = (u32 *) rgbfb_fix.smem_start;
137 chan_uf.x_offset = 0;
138 chan_uf.y_offset = 0;
139 chan_uf.width = LCD_X_RES;
140 chan_uf.height = LCD_Y_RES;
141
142 if ((ret = pnx4008_put_dum_channel_uf(chan_uf, pdev->id))< 0)
143 goto err1;
144
145 if ((ret =
146 pnx4008_set_dum_channel_sync(channel_owned, CONF_SYNC_ON,
147 pdev->id)) < 0)
148 goto err1;
149
150 if ((ret =
151 pnx4008_set_dum_channel_dirty_detect(channel_owned,
152 CONF_DIRTYDETECTION_ON,
153 pdev->id)) < 0)
154 goto err1;
155 }
156
157 if (!fb_get_options("pnxrgbfb", &option) && !strcmp(option, "nocursor"))
158 rgbfb_ops.fb_cursor = no_cursor;
159
160 info->node = -1;
161 info->flags = FBINFO_FLAG_DEFAULT;
162 info->fbops = &rgbfb_ops;
163 info->fix = rgbfb_fix;
164 info->var = rgbfb_var;
165 info->screen_size = rgbfb_fix.smem_len;
166 info->pseudo_palette = info->par;
167 info->par = NULL;
168
169 ret = fb_alloc_cmap(&info->cmap, 256, 0);
170 if (ret < 0)
171 goto err2;
172
173 ret = register_framebuffer(info);
174 if (ret < 0)
175 goto err3;
176 platform_set_drvdata(pdev, info);
177
178 return 0;
179
180err3:
181 fb_dealloc_cmap(&info->cmap);
182err2:
183 framebuffer_release(info);
184err1:
185 pnx4008_free_dum_channel(channel_owned, pdev->id);
186err0:
187 kfree(info);
188err:
189 return ret;
190}
191
192static struct platform_driver rgbfb_driver = {
193 .driver = {
194 .name = "rgbfb",
195 },
196 .probe = rgbfb_probe,
197 .remove = rgbfb_remove,
198};
199
200static int __init rgbfb_init(void)
201{
202 return platform_driver_register(&rgbfb_driver);
203}
204
205static void __exit rgbfb_exit(void)
206{
207 platform_driver_unregister(&rgbfb_driver);
208}
209
210module_init(rgbfb_init);
211module_exit(rgbfb_exit);
212
213MODULE_LICENSE("GPL");
diff --git a/drivers/video/pnx4008/sdum.c b/drivers/video/pnx4008/sdum.c
new file mode 100644
index 000000000000..51f0ecc2a511
--- /dev/null
+++ b/drivers/video/pnx4008/sdum.c
@@ -0,0 +1,872 @@
1/*
2 * drivers/video/pnx4008/sdum.c
3 *
4 * Display Update Master support
5 *
6 * Authors: Grigory Tolstolytkin <gtolstolytkin@ru.mvista.com>
7 * Vitaly Wool <vitalywool@gmail.com>
8 * Based on Philips Semiconductors's code
9 *
10 * Copyrght (c) 2005-2006 MontaVista Software, Inc.
11 * Copyright (c) 2005 Philips Semiconductors
12 * This file is licensed under the terms of the GNU General Public License
13 * version 2. This program is licensed "as is" without any warranty of any
14 * kind, whether express or implied.
15 */
16
17#include <linux/module.h>
18#include <linux/kernel.h>
19#include <linux/errno.h>
20#include <linux/string.h>
21#include <linux/mm.h>
22#include <linux/tty.h>
23#include <linux/slab.h>
24#include <linux/vmalloc.h>
25#include <linux/delay.h>
26#include <linux/interrupt.h>
27#include <linux/platform_device.h>
28#include <linux/fb.h>
29#include <linux/init.h>
30#include <linux/dma-mapping.h>
31#include <linux/clk.h>
32#include <asm/uaccess.h>
33#include <asm/arch/gpio.h>
34
35#include "sdum.h"
36#include "fbcommon.h"
37#include "dum.h"
38
39/* Framebuffers we have */
40
41static struct pnx4008_fb_addr {
42 int fb_type;
43 long addr_offset;
44 long fb_length;
45} fb_addr[] = {
46 [0] = {
47 FB_TYPE_YUV, 0, 0xB0000
48 },
49 [1] = {
50 FB_TYPE_RGB, 0xB0000, 0x50000
51 },
52};
53
54static struct dum_data {
55 u32 lcd_phys_start;
56 u32 lcd_virt_start;
57 u32 slave_phys_base;
58 u32 *slave_virt_base;
59 int fb_owning_channel[MAX_DUM_CHANNELS];
60 struct dumchannel_uf chan_uf_store[MAX_DUM_CHANNELS];
61} dum_data;
62
63/* Different local helper functions */
64
65static u32 nof_pixels_dx(struct dum_ch_setup *ch_setup)
66{
67 return (ch_setup->xmax - ch_setup->xmin + 1);
68}
69
70static u32 nof_pixels_dy(struct dum_ch_setup *ch_setup)
71{
72 return (ch_setup->ymax - ch_setup->ymin + 1);
73}
74
75static u32 nof_pixels_dxy(struct dum_ch_setup *ch_setup)
76{
77 return (nof_pixels_dx(ch_setup) * nof_pixels_dy(ch_setup));
78}
79
80static u32 nof_bytes(struct dum_ch_setup *ch_setup)
81{
82 u32 r = nof_pixels_dxy(ch_setup);
83 switch (ch_setup->format) {
84 case RGB888:
85 case RGB666:
86 r *= 4;
87 break;
88
89 default:
90 r *= 2;
91 break;
92 }
93 return r;
94}
95
96static u32 build_command(int disp_no, u32 reg, u32 val)
97{
98 return ((disp_no << 26) | BIT(25) | (val << 16) | (disp_no << 10) |
99 (reg << 0));
100}
101
102static u32 build_double_index(int disp_no, u32 val)
103{
104 return ((disp_no << 26) | (val << 16) | (disp_no << 10) | (val << 0));
105}
106
107static void build_disp_window(struct dum_ch_setup * ch_setup, struct disp_window * dw)
108{
109 dw->ymin = ch_setup->ymin;
110 dw->ymax = ch_setup->ymax;
111 dw->xmin_l = ch_setup->xmin & 0xFF;
112 dw->xmin_h = (ch_setup->xmin & BIT(8)) >> 8;
113 dw->xmax_l = ch_setup->xmax & 0xFF;
114 dw->xmax_h = (ch_setup->xmax & BIT(8)) >> 8;
115}
116
117static int put_channel(struct dumchannel chan)
118{
119 int i = chan.channelnr;
120
121 if (i < 0 || i > MAX_DUM_CHANNELS)
122 return -EINVAL;
123 else {
124 DUM_CH_MIN(i) = chan.dum_ch_min;
125 DUM_CH_MAX(i) = chan.dum_ch_max;
126 DUM_CH_CONF(i) = chan.dum_ch_conf;
127 DUM_CH_CTRL(i) = chan.dum_ch_ctrl;
128 }
129
130 return 0;
131}
132
133static void clear_channel(int channr)
134{
135 struct dumchannel chan;
136
137 chan.channelnr = channr;
138 chan.dum_ch_min = 0;
139 chan.dum_ch_max = 0;
140 chan.dum_ch_conf = 0;
141 chan.dum_ch_ctrl = 0;
142
143 put_channel(chan);
144}
145
146static int put_cmd_string(struct cmdstring cmds)
147{
148 u16 *cmd_str_virtaddr;
149 u32 *cmd_ptr0_virtaddr;
150 u32 cmd_str_physaddr;
151
152 int i = cmds.channelnr;
153
154 if (i < 0 || i > MAX_DUM_CHANNELS)
155 return -EINVAL;
156 else if ((cmd_ptr0_virtaddr =
157 (int *)ioremap_nocache(DUM_COM_BASE,
158 sizeof(int) * MAX_DUM_CHANNELS)) ==
159 NULL)
160 return -EIOREMAPFAILED;
161 else {
162 cmd_str_physaddr = ioread32(&cmd_ptr0_virtaddr[cmds.channelnr]);
163 if ((cmd_str_virtaddr =
164 (u16 *) ioremap_nocache(cmd_str_physaddr,
165 sizeof(cmds))) == NULL) {
166 iounmap(cmd_ptr0_virtaddr);
167 return -EIOREMAPFAILED;
168 } else {
169 int t;
170 for (t = 0; t < 8; t++)
171 iowrite16(*((u16 *)&cmds.prestringlen + t),
172 cmd_str_virtaddr + t);
173
174 for (t = 0; t < cmds.prestringlen / 2; t++)
175 iowrite16(*((u16 *)&cmds.precmd + t),
176 cmd_str_virtaddr + t + 8);
177
178 for (t = 0; t < cmds.poststringlen / 2; t++)
179 iowrite16(*((u16 *)&cmds.postcmd + t),
180 cmd_str_virtaddr + t + 8 +
181 cmds.prestringlen / 2);
182
183 iounmap(cmd_ptr0_virtaddr);
184 iounmap(cmd_str_virtaddr);
185 }
186 }
187
188 return 0;
189}
190
191static u32 dum_ch_setup(int ch_no, struct dum_ch_setup * ch_setup)
192{
193 struct cmdstring cmds_c;
194 struct cmdstring *cmds = &cmds_c;
195 struct disp_window dw;
196 int standard;
197 u32 orientation = 0;
198 struct dumchannel chan = { 0 };
199 int ret;
200
201 if ((ch_setup->xmirror) || (ch_setup->ymirror) || (ch_setup->rotate)) {
202 standard = 0;
203
204 orientation = BIT(1); /* always set 9-bit-bus */
205 if (ch_setup->xmirror)
206 orientation |= BIT(4);
207 if (ch_setup->ymirror)
208 orientation |= BIT(3);
209 if (ch_setup->rotate)
210 orientation |= BIT(0);
211 } else
212 standard = 1;
213
214 cmds->channelnr = ch_no;
215
216 /* build command string header */
217 if (standard) {
218 cmds->prestringlen = 32;
219 cmds->poststringlen = 0;
220 } else {
221 cmds->prestringlen = 48;
222 cmds->poststringlen = 16;
223 }
224
225 cmds->format =
226 (u16) ((ch_setup->disp_no << 4) | (BIT(3)) | (ch_setup->format));
227 cmds->reserved = 0x0;
228 cmds->startaddr_low = (ch_setup->minadr & 0xFFFF);
229 cmds->startaddr_high = (ch_setup->minadr >> 16);
230
231 if ((ch_setup->minadr == 0) && (ch_setup->maxadr == 0)
232 && (ch_setup->xmin == 0)
233 && (ch_setup->ymin == 0) && (ch_setup->xmax == 0)
234 && (ch_setup->ymax == 0)) {
235 cmds->pixdatlen_low = 0;
236 cmds->pixdatlen_high = 0;
237 } else {
238 u32 nbytes = nof_bytes(ch_setup);
239 cmds->pixdatlen_low = (nbytes & 0xFFFF);
240 cmds->pixdatlen_high = (nbytes >> 16);
241 }
242
243 if (ch_setup->slave_trans)
244 cmds->pixdatlen_high |= BIT(15);
245
246 /* build pre-string */
247 build_disp_window(ch_setup, &dw);
248
249 if (standard) {
250 cmds->precmd[0] =
251 build_command(ch_setup->disp_no, DISP_XMIN_L_REG, 0x99);
252 cmds->precmd[1] =
253 build_command(ch_setup->disp_no, DISP_XMIN_L_REG,
254 dw.xmin_l);
255 cmds->precmd[2] =
256 build_command(ch_setup->disp_no, DISP_XMIN_H_REG,
257 dw.xmin_h);
258 cmds->precmd[3] =
259 build_command(ch_setup->disp_no, DISP_YMIN_REG, dw.ymin);
260 cmds->precmd[4] =
261 build_command(ch_setup->disp_no, DISP_XMAX_L_REG,
262 dw.xmax_l);
263 cmds->precmd[5] =
264 build_command(ch_setup->disp_no, DISP_XMAX_H_REG,
265 dw.xmax_h);
266 cmds->precmd[6] =
267 build_command(ch_setup->disp_no, DISP_YMAX_REG, dw.ymax);
268 cmds->precmd[7] =
269 build_double_index(ch_setup->disp_no, DISP_PIXEL_REG);
270 } else {
271 if (dw.xmin_l == ch_no)
272 cmds->precmd[0] =
273 build_command(ch_setup->disp_no, DISP_XMIN_L_REG,
274 0x99);
275 else
276 cmds->precmd[0] =
277 build_command(ch_setup->disp_no, DISP_XMIN_L_REG,
278 ch_no);
279
280 cmds->precmd[1] =
281 build_command(ch_setup->disp_no, DISP_XMIN_L_REG,
282 dw.xmin_l);
283 cmds->precmd[2] =
284 build_command(ch_setup->disp_no, DISP_XMIN_H_REG,
285 dw.xmin_h);
286 cmds->precmd[3] =
287 build_command(ch_setup->disp_no, DISP_YMIN_REG, dw.ymin);
288 cmds->precmd[4] =
289 build_command(ch_setup->disp_no, DISP_XMAX_L_REG,
290 dw.xmax_l);
291 cmds->precmd[5] =
292 build_command(ch_setup->disp_no, DISP_XMAX_H_REG,
293 dw.xmax_h);
294 cmds->precmd[6] =
295 build_command(ch_setup->disp_no, DISP_YMAX_REG, dw.ymax);
296 cmds->precmd[7] =
297 build_command(ch_setup->disp_no, DISP_1_REG, orientation);
298 cmds->precmd[8] =
299 build_double_index(ch_setup->disp_no, DISP_PIXEL_REG);
300 cmds->precmd[9] =
301 build_double_index(ch_setup->disp_no, DISP_PIXEL_REG);
302 cmds->precmd[0xA] =
303 build_double_index(ch_setup->disp_no, DISP_PIXEL_REG);
304 cmds->precmd[0xB] =
305 build_double_index(ch_setup->disp_no, DISP_PIXEL_REG);
306 cmds->postcmd[0] =
307 build_command(ch_setup->disp_no, DISP_1_REG, BIT(1));
308 cmds->postcmd[1] =
309 build_command(ch_setup->disp_no, DISP_DUMMY1_REG, 1);
310 cmds->postcmd[2] =
311 build_command(ch_setup->disp_no, DISP_DUMMY1_REG, 2);
312 cmds->postcmd[3] =
313 build_command(ch_setup->disp_no, DISP_DUMMY1_REG, 3);
314 }
315
316 if ((ret = put_cmd_string(cmds_c)) != 0) {
317 return ret;
318 }
319
320 chan.channelnr = cmds->channelnr;
321 chan.dum_ch_min = ch_setup->dirtybuffer + ch_setup->minadr;
322 chan.dum_ch_max = ch_setup->dirtybuffer + ch_setup->maxadr;
323 chan.dum_ch_conf = 0x002;
324 chan.dum_ch_ctrl = 0x04;
325
326 put_channel(chan);
327
328 return 0;
329}
330
331static u32 display_open(int ch_no, int auto_update, u32 * dirty_buffer,
332 u32 * frame_buffer, u32 xpos, u32 ypos, u32 w, u32 h)
333{
334
335 struct dum_ch_setup k;
336 int ret;
337
338 /* keep width & height within display area */
339 if ((xpos + w) > DISP_MAX_X_SIZE)
340 w = DISP_MAX_X_SIZE - xpos;
341
342 if ((ypos + h) > DISP_MAX_Y_SIZE)
343 h = DISP_MAX_Y_SIZE - ypos;
344
345 /* assume 1 display only */
346 k.disp_no = 0;
347 k.xmin = xpos;
348 k.ymin = ypos;
349 k.xmax = xpos + (w - 1);
350 k.ymax = ypos + (h - 1);
351
352 /* adjust min and max values if necessary */
353 if (k.xmin > DISP_MAX_X_SIZE - 1)
354 k.xmin = DISP_MAX_X_SIZE - 1;
355 if (k.ymin > DISP_MAX_Y_SIZE - 1)
356 k.ymin = DISP_MAX_Y_SIZE - 1;
357
358 if (k.xmax > DISP_MAX_X_SIZE - 1)
359 k.xmax = DISP_MAX_X_SIZE - 1;
360 if (k.ymax > DISP_MAX_Y_SIZE - 1)
361 k.ymax = DISP_MAX_Y_SIZE - 1;
362
363 k.xmirror = 0;
364 k.ymirror = 0;
365 k.rotate = 0;
366 k.minadr = (u32) frame_buffer;
367 k.maxadr = (u32) frame_buffer + (((w - 1) << 10) | ((h << 2) - 2));
368 k.pad = PAD_1024;
369 k.dirtybuffer = (u32) dirty_buffer;
370 k.format = RGB888;
371 k.hwdirty = 0;
372 k.slave_trans = 0;
373
374 ret = dum_ch_setup(ch_no, &k);
375
376 return ret;
377}
378
379static void lcd_reset(void)
380{
381 u32 *dum_pio_base = (u32 *)IO_ADDRESS(PNX4008_PIO_BASE);
382
383 udelay(1);
384 iowrite32(BIT(19), &dum_pio_base[2]);
385 udelay(1);
386 iowrite32(BIT(19), &dum_pio_base[1]);
387 udelay(1);
388}
389
390static int dum_init(struct platform_device *pdev)
391{
392 struct clk *clk;
393
394 /* enable DUM clock */
395 clk = clk_get(&pdev->dev, "dum_ck");
396 if (IS_ERR(clk)) {
397 printk(KERN_ERR "pnx4008_dum: Unable to access DUM clock\n");
398 return PTR_ERR(clk);
399 }
400
401 clk_set_rate(clk, 1);
402 clk_put(clk);
403
404 DUM_CTRL = V_DUM_RESET;
405
406 /* set priority to "round-robin". All other params to "false" */
407 DUM_CONF = BIT(9);
408
409 /* Display 1 */
410 DUM_WTCFG1 = PNX4008_DUM_WT_CFG;
411 DUM_RTCFG1 = PNX4008_DUM_RT_CFG;
412 DUM_TCFG = PNX4008_DUM_T_CFG;
413
414 return 0;
415}
416
417static void dum_chan_init(void)
418{
419 int i = 0, ch = 0;
420 u32 *cmdptrs;
421 u32 *cmdstrings;
422
423 DUM_COM_BASE =
424 CMDSTRING_BASEADDR + BYTES_PER_CMDSTRING * NR_OF_CMDSTRINGS;
425
426 if ((cmdptrs =
427 (u32 *) ioremap_nocache(DUM_COM_BASE,
428 sizeof(u32) * NR_OF_CMDSTRINGS)) == NULL)
429 return;
430
431 for (ch = 0; ch < NR_OF_CMDSTRINGS; ch++)
432 iowrite32(CMDSTRING_BASEADDR + BYTES_PER_CMDSTRING * ch,
433 cmdptrs + ch);
434
435 for (ch = 0; ch < MAX_DUM_CHANNELS; ch++)
436 clear_channel(ch);
437
438 /* Clear the cmdstrings */
439 cmdstrings =
440 (u32 *)ioremap_nocache(*cmdptrs,
441 BYTES_PER_CMDSTRING * NR_OF_CMDSTRINGS);
442
443 if (!cmdstrings)
444 goto out;
445
446 for (i = 0; i < NR_OF_CMDSTRINGS * BYTES_PER_CMDSTRING / sizeof(u32);
447 i++)
448 iowrite32(0, cmdstrings + i);
449
450 iounmap((u32 *)cmdstrings);
451
452out:
453 iounmap((u32 *)cmdptrs);
454}
455
456static void lcd_init(void)
457{
458 lcd_reset();
459
460 DUM_OUTP_FORMAT1 = 0; /* RGB666 */
461
462 udelay(1);
463 iowrite32(V_LCD_STANDBY_OFF, dum_data.slave_virt_base);
464 udelay(1);
465 iowrite32(V_LCD_USE_9BIT_BUS, dum_data.slave_virt_base);
466 udelay(1);
467 iowrite32(V_LCD_SYNC_RISE_L, dum_data.slave_virt_base);
468 udelay(1);
469 iowrite32(V_LCD_SYNC_RISE_H, dum_data.slave_virt_base);
470 udelay(1);
471 iowrite32(V_LCD_SYNC_FALL_L, dum_data.slave_virt_base);
472 udelay(1);
473 iowrite32(V_LCD_SYNC_FALL_H, dum_data.slave_virt_base);
474 udelay(1);
475 iowrite32(V_LCD_SYNC_ENABLE, dum_data.slave_virt_base);
476 udelay(1);
477 iowrite32(V_LCD_DISPLAY_ON, dum_data.slave_virt_base);
478 udelay(1);
479}
480
481/* Interface exported to framebuffer drivers */
482
483int pnx4008_get_fb_addresses(int fb_type, void **virt_addr,
484 dma_addr_t *phys_addr, int *fb_length)
485{
486 int i;
487 int ret = -1;
488 for (i = 0; i < ARRAY_SIZE(fb_addr); i++)
489 if (fb_addr[i].fb_type == fb_type) {
490 *virt_addr = (void *)(dum_data.lcd_virt_start +
491 fb_addr[i].addr_offset);
492 *phys_addr =
493 dum_data.lcd_phys_start + fb_addr[i].addr_offset;
494 *fb_length = fb_addr[i].fb_length;
495 ret = 0;
496 break;
497 }
498
499 return ret;
500}
501
502EXPORT_SYMBOL(pnx4008_get_fb_addresses);
503
504int pnx4008_alloc_dum_channel(int dev_id)
505{
506 int i = 0;
507
508 while ((i < MAX_DUM_CHANNELS) && (dum_data.fb_owning_channel[i] != -1))
509 i++;
510
511 if (i == MAX_DUM_CHANNELS)
512 return -ENORESOURCESLEFT;
513 else {
514 dum_data.fb_owning_channel[i] = dev_id;
515 return i;
516 }
517}
518
519EXPORT_SYMBOL(pnx4008_alloc_dum_channel);
520
521int pnx4008_free_dum_channel(int channr, int dev_id)
522{
523 if (channr < 0 || channr > MAX_DUM_CHANNELS)
524 return -EINVAL;
525 else if (dum_data.fb_owning_channel[channr] != dev_id)
526 return -EFBNOTOWNER;
527 else {
528 clear_channel(channr);
529 dum_data.fb_owning_channel[channr] = -1;
530 }
531
532 return 0;
533}
534
535EXPORT_SYMBOL(pnx4008_free_dum_channel);
536
537int pnx4008_put_dum_channel_uf(struct dumchannel_uf chan_uf, int dev_id)
538{
539 int i = chan_uf.channelnr;
540 int ret;
541
542 if (i < 0 || i > MAX_DUM_CHANNELS)
543 return -EINVAL;
544 else if (dum_data.fb_owning_channel[i] != dev_id)
545 return -EFBNOTOWNER;
546 else if ((ret =
547 display_open(chan_uf.channelnr, 0, chan_uf.dirty,
548 chan_uf.source, chan_uf.y_offset,
549 chan_uf.x_offset, chan_uf.height,
550 chan_uf.width)) != 0)
551 return ret;
552 else {
553 dum_data.chan_uf_store[i].dirty = chan_uf.dirty;
554 dum_data.chan_uf_store[i].source = chan_uf.source;
555 dum_data.chan_uf_store[i].x_offset = chan_uf.x_offset;
556 dum_data.chan_uf_store[i].y_offset = chan_uf.y_offset;
557 dum_data.chan_uf_store[i].width = chan_uf.width;
558 dum_data.chan_uf_store[i].height = chan_uf.height;
559 }
560
561 return 0;
562}
563
564EXPORT_SYMBOL(pnx4008_put_dum_channel_uf);
565
566int pnx4008_set_dum_channel_sync(int channr, int val, int dev_id)
567{
568 if (channr < 0 || channr > MAX_DUM_CHANNELS)
569 return -EINVAL;
570 else if (dum_data.fb_owning_channel[channr] != dev_id)
571 return -EFBNOTOWNER;
572 else {
573 if (val == CONF_SYNC_ON) {
574 DUM_CH_CONF(channr) |= CONF_SYNCENABLE;
575 DUM_CH_CONF(channr) |= DUM_CHANNEL_CFG_SYNC_MASK |
576 DUM_CHANNEL_CFG_SYNC_MASK_SET;
577 } else if (val == CONF_SYNC_OFF)
578 DUM_CH_CONF(channr) &= ~CONF_SYNCENABLE;
579 else
580 return -EINVAL;
581 }
582
583 return 0;
584}
585
586EXPORT_SYMBOL(pnx4008_set_dum_channel_sync);
587
588int pnx4008_set_dum_channel_dirty_detect(int channr, int val, int dev_id)
589{
590 if (channr < 0 || channr > MAX_DUM_CHANNELS)
591 return -EINVAL;
592 else if (dum_data.fb_owning_channel[channr] != dev_id)
593 return -EFBNOTOWNER;
594 else {
595 if (val == CONF_DIRTYDETECTION_ON)
596 DUM_CH_CONF(channr) |= CONF_DIRTYENABLE;
597 else if (val == CONF_DIRTYDETECTION_OFF)
598 DUM_CH_CONF(channr) &= ~CONF_DIRTYENABLE;
599 else
600 return -EINVAL;
601 }
602
603 return 0;
604}
605
606EXPORT_SYMBOL(pnx4008_set_dum_channel_dirty_detect);
607
608#if 0 /* Functions not used currently, but likely to be used in future */
609
610static int get_channel(struct dumchannel *p_chan)
611{
612 int i = p_chan->channelnr;
613
614 if (i < 0 || i > MAX_DUM_CHANNELS)
615 return -EINVAL;
616 else {
617 p_chan->dum_ch_min = DUM_CH_MIN(i);
618 p_chan->dum_ch_max = DUM_CH_MAX(i);
619 p_chan->dum_ch_conf = DUM_CH_CONF(i);
620 p_chan->dum_ch_stat = DUM_CH_STAT(i);
621 p_chan->dum_ch_ctrl = 0; /* WriteOnly control register */
622 }
623
624 return 0;
625}
626
627int pnx4008_get_dum_channel_uf(struct dumchannel_uf *p_chan_uf, int dev_id)
628{
629 int i = p_chan_uf->channelnr;
630
631 if (i < 0 || i > MAX_DUM_CHANNELS)
632 return -EINVAL;
633 else if (dum_data.fb_owning_channel[i] != dev_id)
634 return -EFBNOTOWNER;
635 else {
636 p_chan_uf->dirty = dum_data.chan_uf_store[i].dirty;
637 p_chan_uf->source = dum_data.chan_uf_store[i].source;
638 p_chan_uf->x_offset = dum_data.chan_uf_store[i].x_offset;
639 p_chan_uf->y_offset = dum_data.chan_uf_store[i].y_offset;
640 p_chan_uf->width = dum_data.chan_uf_store[i].width;
641 p_chan_uf->height = dum_data.chan_uf_store[i].height;
642 }
643
644 return 0;
645}
646
647EXPORT_SYMBOL(pnx4008_get_dum_channel_uf);
648
649int pnx4008_get_dum_channel_config(int channr, int dev_id)
650{
651 int ret;
652 struct dumchannel chan;
653
654 if (channr < 0 || channr > MAX_DUM_CHANNELS)
655 return -EINVAL;
656 else if (dum_data.fb_owning_channel[channr] != dev_id)
657 return -EFBNOTOWNER;
658 else {
659 chan.channelnr = channr;
660 if ((ret = get_channel(&chan)) != 0)
661 return ret;
662 }
663
664 return (chan.dum_ch_conf & DUM_CHANNEL_CFG_MASK);
665}
666
667EXPORT_SYMBOL(pnx4008_get_dum_channel_config);
668
669int pnx4008_force_update_dum_channel(int channr, int dev_id)
670{
671 if (channr < 0 || channr > MAX_DUM_CHANNELS)
672 return -EINVAL;
673
674 else if (dum_data.fb_owning_channel[channr] != dev_id)
675 return -EFBNOTOWNER;
676 else
677 DUM_CH_CTRL(channr) = CTRL_SETDIRTY;
678
679 return 0;
680}
681
682EXPORT_SYMBOL(pnx4008_force_update_dum_channel);
683
684#endif
685
686int pnx4008_sdum_mmap(struct fb_info *info, struct vm_area_struct *vma,
687 struct device *dev)
688{
689 unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
690
691 if (off < info->fix.smem_len) {
692 vma->vm_pgoff += 1;
693 return dma_mmap_writecombine(dev, vma,
694 (void *)dum_data.lcd_virt_start,
695 dum_data.lcd_phys_start,
696 FB_DMA_SIZE);
697 }
698 return -EINVAL;
699}
700
701EXPORT_SYMBOL(pnx4008_sdum_mmap);
702
703int pnx4008_set_dum_exit_notification(int dev_id)
704{
705 int i;
706
707 for (i = 0; i < MAX_DUM_CHANNELS; i++)
708 if (dum_data.fb_owning_channel[i] == dev_id)
709 return -ERESOURCESNOTFREED;
710
711 return 0;
712}
713
714EXPORT_SYMBOL(pnx4008_set_dum_exit_notification);
715
716/* Platform device driver for DUM */
717
718static int sdum_suspend(struct platform_device *pdev, pm_message_t state)
719{
720 int retval = 0;
721 struct clk *clk;
722
723 clk = clk_get(0, "dum_ck");
724 if (!IS_ERR(clk)) {
725 clk_set_rate(clk, 0);
726 clk_put(clk);
727 } else
728 retval = PTR_ERR(clk);
729
730 /* disable BAC */
731 DUM_CTRL = V_BAC_DISABLE_IDLE;
732
733 /* LCD standby & turn off display */
734 lcd_reset();
735
736 return retval;
737}
738
739static int sdum_resume(struct platform_device *pdev)
740{
741 int retval = 0;
742 struct clk *clk;
743
744 clk = clk_get(0, "dum_ck");
745 if (!IS_ERR(clk)) {
746 clk_set_rate(clk, 1);
747 clk_put(clk);
748 } else
749 retval = PTR_ERR(clk);
750
751 /* wait for BAC disable */
752 DUM_CTRL = V_BAC_DISABLE_TRIG;
753
754 while (DUM_CTRL & BAC_ENABLED)
755 udelay(10);
756
757 /* re-init LCD */
758 lcd_init();
759
760 /* enable BAC and reset MUX */
761 DUM_CTRL = V_BAC_ENABLE;
762 udelay(1);
763 DUM_CTRL = V_MUX_RESET;
764 return 0;
765}
766
767static int __devinit sdum_probe(struct platform_device *pdev)
768{
769 int ret = 0, i = 0;
770
771 /* map frame buffer */
772 dum_data.lcd_virt_start = (u32) dma_alloc_writecombine(&pdev->dev,
773 FB_DMA_SIZE,
774 &dum_data.lcd_phys_start,
775 GFP_KERNEL);
776
777 if (!dum_data.lcd_virt_start) {
778 ret = -ENOMEM;
779 goto out_3;
780 }
781
782 /* map slave registers */
783 dum_data.slave_phys_base = PNX4008_DUM_SLAVE_BASE;
784 dum_data.slave_virt_base =
785 (u32 *) ioremap_nocache(dum_data.slave_phys_base, sizeof(u32));
786
787 if (dum_data.slave_virt_base == NULL) {
788 ret = -ENOMEM;
789 goto out_2;
790 }
791
792 /* initialize DUM and LCD display */
793 ret = dum_init(pdev);
794 if (ret)
795 goto out_1;
796
797 dum_chan_init();
798 lcd_init();
799
800 DUM_CTRL = V_BAC_ENABLE;
801 udelay(1);
802 DUM_CTRL = V_MUX_RESET;
803
804 /* set decode address and sync clock divider */
805 DUM_DECODE = dum_data.lcd_phys_start & DUM_DECODE_MASK;
806 DUM_CLK_DIV = PNX4008_DUM_CLK_DIV;
807
808 for (i = 0; i < MAX_DUM_CHANNELS; i++)
809 dum_data.fb_owning_channel[i] = -1;
810
811 /*setup wakeup interrupt */
812 start_int_set_rising_edge(SE_DISP_SYNC_INT);
813 start_int_ack(SE_DISP_SYNC_INT);
814 start_int_umask(SE_DISP_SYNC_INT);
815
816 return 0;
817
818out_1:
819 iounmap((void *)dum_data.slave_virt_base);
820out_2:
821 dma_free_writecombine(&pdev->dev, FB_DMA_SIZE,
822 (void *)dum_data.lcd_virt_start,
823 dum_data.lcd_phys_start);
824out_3:
825 return ret;
826}
827
828static int sdum_remove(struct platform_device *pdev)
829{
830 struct clk *clk;
831
832 start_int_mask(SE_DISP_SYNC_INT);
833
834 clk = clk_get(0, "dum_ck");
835 if (!IS_ERR(clk)) {
836 clk_set_rate(clk, 0);
837 clk_put(clk);
838 }
839
840 iounmap((void *)dum_data.slave_virt_base);
841
842 dma_free_writecombine(&pdev->dev, FB_DMA_SIZE,
843 (void *)dum_data.lcd_virt_start,
844 dum_data.lcd_phys_start);
845
846 return 0;
847}
848
849static struct platform_driver sdum_driver = {
850 .driver = {
851 .name = "sdum",
852 },
853 .probe = sdum_probe,
854 .remove = sdum_remove,
855 .suspend = sdum_suspend,
856 .resume = sdum_resume,
857};
858
859int __init sdum_init(void)
860{
861 return platform_driver_register(&sdum_driver);
862}
863
864static void __exit sdum_exit(void)
865{
866 platform_driver_unregister(&sdum_driver);
867};
868
869module_init(sdum_init);
870module_exit(sdum_exit);
871
872MODULE_LICENSE("GPL");
diff --git a/drivers/video/pnx4008/sdum.h b/drivers/video/pnx4008/sdum.h
new file mode 100644
index 000000000000..e8c5dcdd8813
--- /dev/null
+++ b/drivers/video/pnx4008/sdum.h
@@ -0,0 +1,139 @@
1/*
2 * Copyright (C) 2005 Philips Semiconductors
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; see the file COPYING. If not, write to
16 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 02111-1307, USA, or http://www.gnu.org/licenses/gpl.html
18*/
19
20#define MAX_DUM_CHANNELS 64
21
22#define RGB_MEM_WINDOW(x) (0x10000000 + (x)*0x00100000)
23
24#define QCIF_OFFSET(x) (((x) == 0) ? 0x00000: ((x) == 1) ? 0x30000: -1)
25#define CIF_OFFSET(x) (((x) == 0) ? 0x00000: ((x) == 1) ? 0x60000: -1)
26
27#define CTRL_SETDIRTY (0x00000001)
28#define CONF_DIRTYENABLE (0x00000020)
29#define CONF_SYNCENABLE (0x00000004)
30
31#define DIRTY_ENABLED(conf) ((conf) & 0x0020)
32#define SYNC_ENABLED(conf) ((conf) & 0x0004)
33
34/* Display 1 & 2 Write Timing Configuration */
35#define PNX4008_DUM_WT_CFG 0x00372000
36
37/* Display 1 & 2 Read Timing Configuration */
38#define PNX4008_DUM_RT_CFG 0x00003A47
39
40/* DUM Transit State Timing Configuration */
41#define PNX4008_DUM_T_CFG 0x1D /* 29 HCLK cycles */
42
43/* DUM Sync count clock divider */
44#define PNX4008_DUM_CLK_DIV 0x02DD
45
46/* Memory size for framebuffer, allocated through dma_alloc_writecombine().
47 * Must be PAGE aligned
48 */
49#define FB_DMA_SIZE (PAGE_ALIGN(SZ_1M + PAGE_SIZE))
50
51#define OFFSET_RGBBUFFER (0xB0000)
52#define OFFSET_YUVBUFFER (0x00000)
53
54#define YUVBUFFER (lcd_video_start + OFFSET_YUVBUFFER)
55#define RGBBUFFER (lcd_video_start + OFFSET_RGBBUFFER)
56
57#define CMDSTRING_BASEADDR (0x00C000) /* iram */
58#define BYTES_PER_CMDSTRING (0x80)
59#define NR_OF_CMDSTRINGS (64)
60
61#define MAX_NR_PRESTRINGS (0x40)
62#define MAX_NR_POSTSTRINGS (0x40)
63
64/* various mask definitions */
65#define DUM_CLK_ENABLE 0x01
66#define DUM_CLK_DISABLE 0
67#define DUM_DECODE_MASK 0x1FFFFFFF
68#define DUM_CHANNEL_CFG_MASK 0x01FF
69#define DUM_CHANNEL_CFG_SYNC_MASK 0xFFFE00FF
70#define DUM_CHANNEL_CFG_SYNC_MASK_SET 0x0CA00
71
72#define SDUM_RETURNVAL_BASE (0x500)
73
74#define CONF_SYNC_OFF (0x602)
75#define CONF_SYNC_ON (0x603)
76
77#define CONF_DIRTYDETECTION_OFF (0x600)
78#define CONF_DIRTYDETECTION_ON (0x601)
79
80/* Set the corresponding bit. */
81#define BIT(n) (0x1U << (n))
82
83struct dumchannel_uf {
84 int channelnr;
85 u32 *dirty;
86 u32 *source;
87 u32 x_offset;
88 u32 y_offset;
89 u32 width;
90 u32 height;
91};
92
93enum {
94 FB_TYPE_YUV,
95 FB_TYPE_RGB
96};
97
98struct cmdstring {
99 int channelnr;
100 uint16_t prestringlen;
101 uint16_t poststringlen;
102 uint16_t format;
103 uint16_t reserved;
104 uint16_t startaddr_low;
105 uint16_t startaddr_high;
106 uint16_t pixdatlen_low;
107 uint16_t pixdatlen_high;
108 u32 precmd[MAX_NR_PRESTRINGS];
109 u32 postcmd[MAX_NR_POSTSTRINGS];
110
111};
112
113struct dumchannel {
114 int channelnr;
115 int dum_ch_min;
116 int dum_ch_max;
117 int dum_ch_conf;
118 int dum_ch_stat;
119 int dum_ch_ctrl;
120};
121
122int pnx4008_alloc_dum_channel(int dev_id);
123int pnx4008_free_dum_channel(int channr, int dev_id);
124
125int pnx4008_get_dum_channel_uf(struct dumchannel_uf *pChan_uf, int dev_id);
126int pnx4008_put_dum_channel_uf(struct dumchannel_uf chan_uf, int dev_id);
127
128int pnx4008_set_dum_channel_sync(int channr, int val, int dev_id);
129int pnx4008_set_dum_channel_dirty_detect(int channr, int val, int dev_id);
130
131int pnx4008_force_dum_update_channel(int channr, int dev_id);
132
133int pnx4008_get_dum_channel_config(int channr, int dev_id);
134
135int pnx4008_sdum_mmap(struct fb_info *info, struct vm_area_struct *vma, struct device *dev);
136int pnx4008_set_dum_exit_notification(int dev_id);
137
138int pnx4008_get_fb_addresses(int fb_type, void **virt_addr,
139 dma_addr_t * phys_addr, int *fb_length);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index d0434406eaeb..f42e64210ee5 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -84,7 +84,7 @@ static struct linux_binfmt elf_format = {
84 .min_coredump = ELF_EXEC_PAGESIZE 84 .min_coredump = ELF_EXEC_PAGESIZE
85}; 85};
86 86
87#define BAD_ADDR(x) ((unsigned long)(x) > TASK_SIZE) 87#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
88 88
89static int set_brk(unsigned long start, unsigned long end) 89static int set_brk(unsigned long start, unsigned long end)
90{ 90{
@@ -394,7 +394,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
394 * <= p_memsize so it's only necessary to check p_memsz. 394 * <= p_memsize so it's only necessary to check p_memsz.
395 */ 395 */
396 k = load_addr + eppnt->p_vaddr; 396 k = load_addr + eppnt->p_vaddr;
397 if (k > TASK_SIZE || 397 if (BAD_ADDR(k) ||
398 eppnt->p_filesz > eppnt->p_memsz || 398 eppnt->p_filesz > eppnt->p_memsz ||
399 eppnt->p_memsz > TASK_SIZE || 399 eppnt->p_memsz > TASK_SIZE ||
400 TASK_SIZE - eppnt->p_memsz < k) { 400 TASK_SIZE - eppnt->p_memsz < k) {
@@ -887,7 +887,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
887 * allowed task size. Note that p_filesz must always be 887 * allowed task size. Note that p_filesz must always be
888 * <= p_memsz so it is only necessary to check p_memsz. 888 * <= p_memsz so it is only necessary to check p_memsz.
889 */ 889 */
890 if (k > TASK_SIZE || elf_ppnt->p_filesz > elf_ppnt->p_memsz || 890 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
891 elf_ppnt->p_memsz > TASK_SIZE || 891 elf_ppnt->p_memsz > TASK_SIZE ||
892 TASK_SIZE - elf_ppnt->p_memsz < k) { 892 TASK_SIZE - elf_ppnt->p_memsz < k) {
893 /* set_brk can never work. Avoid overflows. */ 893 /* set_brk can never work. Avoid overflows. */
@@ -941,10 +941,9 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
941 interpreter, 941 interpreter,
942 &interp_load_addr); 942 &interp_load_addr);
943 if (BAD_ADDR(elf_entry)) { 943 if (BAD_ADDR(elf_entry)) {
944 printk(KERN_ERR "Unable to load interpreter %.128s\n",
945 elf_interpreter);
946 force_sig(SIGSEGV, current); 944 force_sig(SIGSEGV, current);
947 retval = -ENOEXEC; /* Nobody gets to see this, but.. */ 945 retval = IS_ERR((void *)elf_entry) ?
946 (int)elf_entry : -EINVAL;
948 goto out_free_dentry; 947 goto out_free_dentry;
949 } 948 }
950 reloc_func_desc = interp_load_addr; 949 reloc_func_desc = interp_load_addr;
@@ -955,8 +954,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
955 } else { 954 } else {
956 elf_entry = loc->elf_ex.e_entry; 955 elf_entry = loc->elf_ex.e_entry;
957 if (BAD_ADDR(elf_entry)) { 956 if (BAD_ADDR(elf_entry)) {
958 send_sig(SIGSEGV, current, 0); 957 force_sig(SIGSEGV, current);
959 retval = -ENOEXEC; /* Nobody gets to see this, but.. */ 958 retval = -EINVAL;
960 goto out_free_dentry; 959 goto out_free_dentry;
961 } 960 }
962 } 961 }
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9633a490dab0..37534573960b 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -739,7 +739,7 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
739 if (!bo) 739 if (!bo)
740 return -ENOMEM; 740 return -ENOMEM;
741 741
742 mutex_lock(&bdev->bd_mutex); 742 mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION);
743 res = bd_claim(bdev, holder); 743 res = bd_claim(bdev, holder);
744 if (res || !add_bd_holder(bdev, bo)) 744 if (res || !add_bd_holder(bdev, bo))
745 free_bd_holder(bo); 745 free_bd_holder(bo);
@@ -764,7 +764,7 @@ static void bd_release_from_kobject(struct block_device *bdev,
764 if (!kobj) 764 if (!kobj)
765 return; 765 return;
766 766
767 mutex_lock(&bdev->bd_mutex); 767 mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION);
768 bd_release(bdev); 768 bd_release(bdev);
769 if ((bo = del_bd_holder(bdev, kobj))) 769 if ((bo = del_bd_holder(bdev, kobj)))
770 free_bd_holder(bo); 770 free_bd_holder(bo);
@@ -822,6 +822,22 @@ struct block_device *open_by_devnum(dev_t dev, unsigned mode)
822 822
823EXPORT_SYMBOL(open_by_devnum); 823EXPORT_SYMBOL(open_by_devnum);
824 824
825static int
826blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags);
827
828struct block_device *open_partition_by_devnum(dev_t dev, unsigned mode)
829{
830 struct block_device *bdev = bdget(dev);
831 int err = -ENOMEM;
832 int flags = mode & FMODE_WRITE ? O_RDWR : O_RDONLY;
833 if (bdev)
834 err = blkdev_get_partition(bdev, mode, flags);
835 return err ? ERR_PTR(err) : bdev;
836}
837
838EXPORT_SYMBOL(open_partition_by_devnum);
839
840
825/* 841/*
826 * This routine checks whether a removable media has been changed, 842 * This routine checks whether a removable media has been changed,
827 * and invalidates all buffer-cache-entries in that case. This 843 * and invalidates all buffer-cache-entries in that case. This
@@ -868,7 +884,11 @@ void bd_set_size(struct block_device *bdev, loff_t size)
868} 884}
869EXPORT_SYMBOL(bd_set_size); 885EXPORT_SYMBOL(bd_set_size);
870 886
871static int do_open(struct block_device *bdev, struct file *file) 887static int
888blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags);
889
890static int
891do_open(struct block_device *bdev, struct file *file, unsigned int subclass)
872{ 892{
873 struct module *owner = NULL; 893 struct module *owner = NULL;
874 struct gendisk *disk; 894 struct gendisk *disk;
@@ -885,7 +905,8 @@ static int do_open(struct block_device *bdev, struct file *file)
885 } 905 }
886 owner = disk->fops->owner; 906 owner = disk->fops->owner;
887 907
888 mutex_lock(&bdev->bd_mutex); 908 mutex_lock_nested(&bdev->bd_mutex, subclass);
909
889 if (!bdev->bd_openers) { 910 if (!bdev->bd_openers) {
890 bdev->bd_disk = disk; 911 bdev->bd_disk = disk;
891 bdev->bd_contains = bdev; 912 bdev->bd_contains = bdev;
@@ -912,11 +933,11 @@ static int do_open(struct block_device *bdev, struct file *file)
912 ret = -ENOMEM; 933 ret = -ENOMEM;
913 if (!whole) 934 if (!whole)
914 goto out_first; 935 goto out_first;
915 ret = blkdev_get(whole, file->f_mode, file->f_flags); 936 ret = blkdev_get_whole(whole, file->f_mode, file->f_flags);
916 if (ret) 937 if (ret)
917 goto out_first; 938 goto out_first;
918 bdev->bd_contains = whole; 939 bdev->bd_contains = whole;
919 mutex_lock(&whole->bd_mutex); 940 mutex_lock_nested(&whole->bd_mutex, BD_MUTEX_WHOLE);
920 whole->bd_part_count++; 941 whole->bd_part_count++;
921 p = disk->part[part - 1]; 942 p = disk->part[part - 1];
922 bdev->bd_inode->i_data.backing_dev_info = 943 bdev->bd_inode->i_data.backing_dev_info =
@@ -944,7 +965,8 @@ static int do_open(struct block_device *bdev, struct file *file)
944 if (bdev->bd_invalidated) 965 if (bdev->bd_invalidated)
945 rescan_partitions(bdev->bd_disk, bdev); 966 rescan_partitions(bdev->bd_disk, bdev);
946 } else { 967 } else {
947 mutex_lock(&bdev->bd_contains->bd_mutex); 968 mutex_lock_nested(&bdev->bd_contains->bd_mutex,
969 BD_MUTEX_PARTITION);
948 bdev->bd_contains->bd_part_count++; 970 bdev->bd_contains->bd_part_count++;
949 mutex_unlock(&bdev->bd_contains->bd_mutex); 971 mutex_unlock(&bdev->bd_contains->bd_mutex);
950 } 972 }
@@ -985,11 +1007,49 @@ int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags)
985 fake_file.f_dentry = &fake_dentry; 1007 fake_file.f_dentry = &fake_dentry;
986 fake_dentry.d_inode = bdev->bd_inode; 1008 fake_dentry.d_inode = bdev->bd_inode;
987 1009
988 return do_open(bdev, &fake_file); 1010 return do_open(bdev, &fake_file, BD_MUTEX_NORMAL);
989} 1011}
990 1012
991EXPORT_SYMBOL(blkdev_get); 1013EXPORT_SYMBOL(blkdev_get);
992 1014
1015static int
1016blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags)
1017{
1018 /*
1019 * This crockload is due to bad choice of ->open() type.
1020 * It will go away.
1021 * For now, block device ->open() routine must _not_
1022 * examine anything in 'inode' argument except ->i_rdev.
1023 */
1024 struct file fake_file = {};
1025 struct dentry fake_dentry = {};
1026 fake_file.f_mode = mode;
1027 fake_file.f_flags = flags;
1028 fake_file.f_dentry = &fake_dentry;
1029 fake_dentry.d_inode = bdev->bd_inode;
1030
1031 return do_open(bdev, &fake_file, BD_MUTEX_WHOLE);
1032}
1033
1034static int
1035blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags)
1036{
1037 /*
1038 * This crockload is due to bad choice of ->open() type.
1039 * It will go away.
1040 * For now, block device ->open() routine must _not_
1041 * examine anything in 'inode' argument except ->i_rdev.
1042 */
1043 struct file fake_file = {};
1044 struct dentry fake_dentry = {};
1045 fake_file.f_mode = mode;
1046 fake_file.f_flags = flags;
1047 fake_file.f_dentry = &fake_dentry;
1048 fake_dentry.d_inode = bdev->bd_inode;
1049
1050 return do_open(bdev, &fake_file, BD_MUTEX_PARTITION);
1051}
1052
993static int blkdev_open(struct inode * inode, struct file * filp) 1053static int blkdev_open(struct inode * inode, struct file * filp)
994{ 1054{
995 struct block_device *bdev; 1055 struct block_device *bdev;
@@ -1005,7 +1065,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
1005 1065
1006 bdev = bd_acquire(inode); 1066 bdev = bd_acquire(inode);
1007 1067
1008 res = do_open(bdev, filp); 1068 res = do_open(bdev, filp, BD_MUTEX_NORMAL);
1009 if (res) 1069 if (res)
1010 return res; 1070 return res;
1011 1071
@@ -1019,13 +1079,13 @@ static int blkdev_open(struct inode * inode, struct file * filp)
1019 return res; 1079 return res;
1020} 1080}
1021 1081
1022int blkdev_put(struct block_device *bdev) 1082static int __blkdev_put(struct block_device *bdev, unsigned int subclass)
1023{ 1083{
1024 int ret = 0; 1084 int ret = 0;
1025 struct inode *bd_inode = bdev->bd_inode; 1085 struct inode *bd_inode = bdev->bd_inode;
1026 struct gendisk *disk = bdev->bd_disk; 1086 struct gendisk *disk = bdev->bd_disk;
1027 1087
1028 mutex_lock(&bdev->bd_mutex); 1088 mutex_lock_nested(&bdev->bd_mutex, subclass);
1029 lock_kernel(); 1089 lock_kernel();
1030 if (!--bdev->bd_openers) { 1090 if (!--bdev->bd_openers) {
1031 sync_blockdev(bdev); 1091 sync_blockdev(bdev);
@@ -1035,7 +1095,8 @@ int blkdev_put(struct block_device *bdev)
1035 if (disk->fops->release) 1095 if (disk->fops->release)
1036 ret = disk->fops->release(bd_inode, NULL); 1096 ret = disk->fops->release(bd_inode, NULL);
1037 } else { 1097 } else {
1038 mutex_lock(&bdev->bd_contains->bd_mutex); 1098 mutex_lock_nested(&bdev->bd_contains->bd_mutex,
1099 subclass + 1);
1039 bdev->bd_contains->bd_part_count--; 1100 bdev->bd_contains->bd_part_count--;
1040 mutex_unlock(&bdev->bd_contains->bd_mutex); 1101 mutex_unlock(&bdev->bd_contains->bd_mutex);
1041 } 1102 }
@@ -1051,9 +1112,8 @@ int blkdev_put(struct block_device *bdev)
1051 } 1112 }
1052 bdev->bd_disk = NULL; 1113 bdev->bd_disk = NULL;
1053 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 1114 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
1054 if (bdev != bdev->bd_contains) { 1115 if (bdev != bdev->bd_contains)
1055 blkdev_put(bdev->bd_contains); 1116 __blkdev_put(bdev->bd_contains, subclass + 1);
1056 }
1057 bdev->bd_contains = NULL; 1117 bdev->bd_contains = NULL;
1058 } 1118 }
1059 unlock_kernel(); 1119 unlock_kernel();
@@ -1062,8 +1122,20 @@ int blkdev_put(struct block_device *bdev)
1062 return ret; 1122 return ret;
1063} 1123}
1064 1124
1125int blkdev_put(struct block_device *bdev)
1126{
1127 return __blkdev_put(bdev, BD_MUTEX_NORMAL);
1128}
1129
1065EXPORT_SYMBOL(blkdev_put); 1130EXPORT_SYMBOL(blkdev_put);
1066 1131
1132int blkdev_put_partition(struct block_device *bdev)
1133{
1134 return __blkdev_put(bdev, BD_MUTEX_PARTITION);
1135}
1136
1137EXPORT_SYMBOL(blkdev_put_partition);
1138
1067static int blkdev_close(struct inode * inode, struct file * filp) 1139static int blkdev_close(struct inode * inode, struct file * filp)
1068{ 1140{
1069 struct block_device *bdev = I_BDEV(filp->f_mapping->host); 1141 struct block_device *bdev = I_BDEV(filp->f_mapping->host);
diff --git a/fs/dcache.c b/fs/dcache.c
index c6e3535be192..1b4a3a34ec57 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -38,7 +38,7 @@ int sysctl_vfs_cache_pressure __read_mostly = 100;
38EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); 38EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
39 39
40 __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); 40 __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
41static seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED; 41static __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
42 42
43EXPORT_SYMBOL(dcache_lock); 43EXPORT_SYMBOL(dcache_lock);
44 44
@@ -1339,10 +1339,10 @@ void d_move(struct dentry * dentry, struct dentry * target)
1339 */ 1339 */
1340 if (target < dentry) { 1340 if (target < dentry) {
1341 spin_lock(&target->d_lock); 1341 spin_lock(&target->d_lock);
1342 spin_lock(&dentry->d_lock); 1342 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1343 } else { 1343 } else {
1344 spin_lock(&dentry->d_lock); 1344 spin_lock(&dentry->d_lock);
1345 spin_lock(&target->d_lock); 1345 spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);
1346 } 1346 }
1347 1347
1348 /* Move the dentry to the target hash queue, if on different bucket */ 1348 /* Move the dentry to the target hash queue, if on different bucket */
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 538fb0418fba..5981e17f46f0 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -220,7 +220,8 @@ static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes)
220 if (dio->end_io && dio->result) 220 if (dio->end_io && dio->result)
221 dio->end_io(dio->iocb, offset, bytes, dio->map_bh.b_private); 221 dio->end_io(dio->iocb, offset, bytes, dio->map_bh.b_private);
222 if (dio->lock_type == DIO_LOCKING) 222 if (dio->lock_type == DIO_LOCKING)
223 up_read(&dio->inode->i_alloc_sem); 223 /* lockdep: non-owner release */
224 up_read_non_owner(&dio->inode->i_alloc_sem);
224} 225}
225 226
226/* 227/*
@@ -1261,7 +1262,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1261 } 1262 }
1262 1263
1263 if (dio_lock_type == DIO_LOCKING) 1264 if (dio_lock_type == DIO_LOCKING)
1264 down_read(&inode->i_alloc_sem); 1265 /* lockdep: not the owner will release it */
1266 down_read_non_owner(&inode->i_alloc_sem);
1265 } 1267 }
1266 1268
1267 /* 1269 /*
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 9c677bbd0b08..19ffb043abbc 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -120,7 +120,7 @@ struct epoll_filefd {
120 */ 120 */
121struct wake_task_node { 121struct wake_task_node {
122 struct list_head llink; 122 struct list_head llink;
123 task_t *task; 123 struct task_struct *task;
124 wait_queue_head_t *wq; 124 wait_queue_head_t *wq;
125}; 125};
126 126
@@ -413,7 +413,7 @@ static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq)
413{ 413{
414 int wake_nests = 0; 414 int wake_nests = 0;
415 unsigned long flags; 415 unsigned long flags;
416 task_t *this_task = current; 416 struct task_struct *this_task = current;
417 struct list_head *lsthead = &psw->wake_task_list, *lnk; 417 struct list_head *lsthead = &psw->wake_task_list, *lnk;
418 struct wake_task_node *tncur; 418 struct wake_task_node *tncur;
419 struct wake_task_node tnode; 419 struct wake_task_node tnode;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 9f43879d6d68..f2702cda9779 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1157,7 +1157,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
1157 struct buffer_head tmp_bh; 1157 struct buffer_head tmp_bh;
1158 struct buffer_head *bh; 1158 struct buffer_head *bh;
1159 1159
1160 mutex_lock(&inode->i_mutex); 1160 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
1161 while (towrite > 0) { 1161 while (towrite > 0) {
1162 tocopy = sb->s_blocksize - offset < towrite ? 1162 tocopy = sb->s_blocksize - offset < towrite ?
1163 sb->s_blocksize - offset : towrite; 1163 sb->s_blocksize - offset : towrite;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index f2dd71336612..813d589cc6c0 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2614,7 +2614,7 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
2614 struct buffer_head *bh; 2614 struct buffer_head *bh;
2615 handle_t *handle = journal_current_handle(); 2615 handle_t *handle = journal_current_handle();
2616 2616
2617 mutex_lock(&inode->i_mutex); 2617 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
2618 while (towrite > 0) { 2618 while (towrite > 0) {
2619 tocopy = sb->s_blocksize - offset < towrite ? 2619 tocopy = sb->s_blocksize - offset < towrite ?
2620 sb->s_blocksize - offset : towrite; 2620 sb->s_blocksize - offset : towrite;
diff --git a/fs/namei.c b/fs/namei.c
index c784e8bb57a3..c9750d755aff 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1423,7 +1423,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
1423 struct dentry *p; 1423 struct dentry *p;
1424 1424
1425 if (p1 == p2) { 1425 if (p1 == p2) {
1426 mutex_lock(&p1->d_inode->i_mutex); 1426 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
1427 return NULL; 1427 return NULL;
1428 } 1428 }
1429 1429
@@ -1431,22 +1431,22 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
1431 1431
1432 for (p = p1; p->d_parent != p; p = p->d_parent) { 1432 for (p = p1; p->d_parent != p; p = p->d_parent) {
1433 if (p->d_parent == p2) { 1433 if (p->d_parent == p2) {
1434 mutex_lock(&p2->d_inode->i_mutex); 1434 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT);
1435 mutex_lock(&p1->d_inode->i_mutex); 1435 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD);
1436 return p; 1436 return p;
1437 } 1437 }
1438 } 1438 }
1439 1439
1440 for (p = p2; p->d_parent != p; p = p->d_parent) { 1440 for (p = p2; p->d_parent != p; p = p->d_parent) {
1441 if (p->d_parent == p1) { 1441 if (p->d_parent == p1) {
1442 mutex_lock(&p1->d_inode->i_mutex); 1442 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
1443 mutex_lock(&p2->d_inode->i_mutex); 1443 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
1444 return p; 1444 return p;
1445 } 1445 }
1446 } 1446 }
1447 1447
1448 mutex_lock(&p1->d_inode->i_mutex); 1448 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
1449 mutex_lock(&p2->d_inode->i_mutex); 1449 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
1450 return NULL; 1450 return NULL;
1451} 1451}
1452 1452
@@ -1751,7 +1751,7 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir)
1751{ 1751{
1752 struct dentry *dentry = ERR_PTR(-EEXIST); 1752 struct dentry *dentry = ERR_PTR(-EEXIST);
1753 1753
1754 mutex_lock(&nd->dentry->d_inode->i_mutex); 1754 mutex_lock_nested(&nd->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
1755 /* 1755 /*
1756 * Yucky last component or no last component at all? 1756 * Yucky last component or no last component at all?
1757 * (foo/., foo/.., /////) 1757 * (foo/., foo/.., /////)
@@ -2008,7 +2008,7 @@ static long do_rmdir(int dfd, const char __user *pathname)
2008 error = -EBUSY; 2008 error = -EBUSY;
2009 goto exit1; 2009 goto exit1;
2010 } 2010 }
2011 mutex_lock(&nd.dentry->d_inode->i_mutex); 2011 mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
2012 dentry = lookup_hash(&nd); 2012 dentry = lookup_hash(&nd);
2013 error = PTR_ERR(dentry); 2013 error = PTR_ERR(dentry);
2014 if (!IS_ERR(dentry)) { 2014 if (!IS_ERR(dentry)) {
@@ -2082,7 +2082,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
2082 error = -EISDIR; 2082 error = -EISDIR;
2083 if (nd.last_type != LAST_NORM) 2083 if (nd.last_type != LAST_NORM)
2084 goto exit1; 2084 goto exit1;
2085 mutex_lock(&nd.dentry->d_inode->i_mutex); 2085 mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
2086 dentry = lookup_hash(&nd); 2086 dentry = lookup_hash(&nd);
2087 error = PTR_ERR(dentry); 2087 error = PTR_ERR(dentry);
2088 if (!IS_ERR(dentry)) { 2088 if (!IS_ERR(dentry)) {
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 4c86b7e1d1eb..d313f356e66a 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -367,6 +367,12 @@ static void ntfs_destroy_extent_inode(ntfs_inode *ni)
367 kmem_cache_free(ntfs_inode_cache, ni); 367 kmem_cache_free(ntfs_inode_cache, ni);
368} 368}
369 369
370/*
371 * The attribute runlist lock has separate locking rules from the
372 * normal runlist lock, so split the two lock-classes:
373 */
374static struct lock_class_key attr_list_rl_lock_class;
375
370/** 376/**
371 * __ntfs_init_inode - initialize ntfs specific part of an inode 377 * __ntfs_init_inode - initialize ntfs specific part of an inode
372 * @sb: super block of mounted volume 378 * @sb: super block of mounted volume
@@ -394,6 +400,8 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni)
394 ni->attr_list_size = 0; 400 ni->attr_list_size = 0;
395 ni->attr_list = NULL; 401 ni->attr_list = NULL;
396 ntfs_init_runlist(&ni->attr_list_rl); 402 ntfs_init_runlist(&ni->attr_list_rl);
403 lockdep_set_class(&ni->attr_list_rl.lock,
404 &attr_list_rl_lock_class);
397 ni->itype.index.bmp_ino = NULL; 405 ni->itype.index.bmp_ino = NULL;
398 ni->itype.index.block_size = 0; 406 ni->itype.index.block_size = 0;
399 ni->itype.index.vcn_size = 0; 407 ni->itype.index.vcn_size = 0;
@@ -405,6 +413,13 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni)
405 ni->ext.base_ntfs_ino = NULL; 413 ni->ext.base_ntfs_ino = NULL;
406} 414}
407 415
416/*
417 * Extent inodes get MFT-mapped in a nested way, while the base inode
418 * is still mapped. Teach this nesting to the lock validator by creating
419 * a separate class for nested inode's mrec_lock's:
420 */
421static struct lock_class_key extent_inode_mrec_lock_key;
422
408inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb, 423inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb,
409 unsigned long mft_no) 424 unsigned long mft_no)
410{ 425{
@@ -413,6 +428,7 @@ inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb,
413 ntfs_debug("Entering."); 428 ntfs_debug("Entering.");
414 if (likely(ni != NULL)) { 429 if (likely(ni != NULL)) {
415 __ntfs_init_inode(sb, ni); 430 __ntfs_init_inode(sb, ni);
431 lockdep_set_class(&ni->mrec_lock, &extent_inode_mrec_lock_key);
416 ni->mft_no = mft_no; 432 ni->mft_no = mft_no;
417 ni->type = AT_UNUSED; 433 ni->type = AT_UNUSED;
418 ni->name = NULL; 434 ni->name = NULL;
@@ -1722,6 +1738,15 @@ err_out:
1722 return err; 1738 return err;
1723} 1739}
1724 1740
1741/*
1742 * The MFT inode has special locking, so teach the lock validator
1743 * about this by splitting off the locking rules of the MFT from
1744 * the locking rules of other inodes. The MFT inode can never be
1745 * accessed from the VFS side (or even internally), only by the
1746 * map_mft functions.
1747 */
1748static struct lock_class_key mft_ni_runlist_lock_key, mft_ni_mrec_lock_key;
1749
1725/** 1750/**
1726 * ntfs_read_inode_mount - special read_inode for mount time use only 1751 * ntfs_read_inode_mount - special read_inode for mount time use only
1727 * @vi: inode to read 1752 * @vi: inode to read
@@ -2148,6 +2173,14 @@ int ntfs_read_inode_mount(struct inode *vi)
2148 ntfs_attr_put_search_ctx(ctx); 2173 ntfs_attr_put_search_ctx(ctx);
2149 ntfs_debug("Done."); 2174 ntfs_debug("Done.");
2150 ntfs_free(m); 2175 ntfs_free(m);
2176
2177 /*
2178 * Split the locking rules of the MFT inode from the
2179 * locking rules of other inodes:
2180 */
2181 lockdep_set_class(&ni->runlist.lock, &mft_ni_runlist_lock_key);
2182 lockdep_set_class(&ni->mrec_lock, &mft_ni_mrec_lock_key);
2183
2151 return 0; 2184 return 0;
2152 2185
2153em_put_err_out: 2186em_put_err_out:
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 0e14acea3f8b..74e0ee8fce72 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1724,6 +1724,14 @@ upcase_failed:
1724 return FALSE; 1724 return FALSE;
1725} 1725}
1726 1726
1727/*
1728 * The lcn and mft bitmap inodes are NTFS-internal inodes with
1729 * their own special locking rules:
1730 */
1731static struct lock_class_key
1732 lcnbmp_runlist_lock_key, lcnbmp_mrec_lock_key,
1733 mftbmp_runlist_lock_key, mftbmp_mrec_lock_key;
1734
1727/** 1735/**
1728 * load_system_files - open the system files using normal functions 1736 * load_system_files - open the system files using normal functions
1729 * @vol: ntfs super block describing device whose system files to load 1737 * @vol: ntfs super block describing device whose system files to load
@@ -1780,6 +1788,10 @@ static BOOL load_system_files(ntfs_volume *vol)
1780 ntfs_error(sb, "Failed to load $MFT/$BITMAP attribute."); 1788 ntfs_error(sb, "Failed to load $MFT/$BITMAP attribute.");
1781 goto iput_mirr_err_out; 1789 goto iput_mirr_err_out;
1782 } 1790 }
1791 lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->runlist.lock,
1792 &mftbmp_runlist_lock_key);
1793 lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->mrec_lock,
1794 &mftbmp_mrec_lock_key);
1783 /* Read upcase table and setup @vol->upcase and @vol->upcase_len. */ 1795 /* Read upcase table and setup @vol->upcase and @vol->upcase_len. */
1784 if (!load_and_init_upcase(vol)) 1796 if (!load_and_init_upcase(vol))
1785 goto iput_mftbmp_err_out; 1797 goto iput_mftbmp_err_out;
@@ -1802,6 +1814,11 @@ static BOOL load_system_files(ntfs_volume *vol)
1802 iput(vol->lcnbmp_ino); 1814 iput(vol->lcnbmp_ino);
1803 goto bitmap_failed; 1815 goto bitmap_failed;
1804 } 1816 }
1817 lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->runlist.lock,
1818 &lcnbmp_runlist_lock_key);
1819 lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->mrec_lock,
1820 &lcnbmp_mrec_lock_key);
1821
1805 NInoSetSparseDisabled(NTFS_I(vol->lcnbmp_ino)); 1822 NInoSetSparseDisabled(NTFS_I(vol->lcnbmp_ino));
1806 if ((vol->nr_clusters + 7) >> 3 > i_size_read(vol->lcnbmp_ino)) { 1823 if ((vol->nr_clusters + 7) >> 3 > i_size_read(vol->lcnbmp_ino)) {
1807 iput(vol->lcnbmp_ino); 1824 iput(vol->lcnbmp_ino);
@@ -2743,6 +2760,17 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
2743 struct inode *tmp_ino; 2760 struct inode *tmp_ino;
2744 int blocksize, result; 2761 int blocksize, result;
2745 2762
2763 /*
2764 * We do a pretty difficult piece of bootstrap by reading the
2765 * MFT (and other metadata) from disk into memory. We'll only
2766 * release this metadata during umount, so the locking patterns
2767 * observed during bootstrap do not count. So turn off the
2768 * observation of locking patterns (strictly for this context
2769 * only) while mounting NTFS. [The validator is still active
2770 * otherwise, even for this context: it will for example record
2771 * lock class registrations.]
2772 */
2773 lockdep_off();
2746 ntfs_debug("Entering."); 2774 ntfs_debug("Entering.");
2747#ifndef NTFS_RW 2775#ifndef NTFS_RW
2748 sb->s_flags |= MS_RDONLY; 2776 sb->s_flags |= MS_RDONLY;
@@ -2754,6 +2782,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
2754 if (!silent) 2782 if (!silent)
2755 ntfs_error(sb, "Allocation of NTFS volume structure " 2783 ntfs_error(sb, "Allocation of NTFS volume structure "
2756 "failed. Aborting mount..."); 2784 "failed. Aborting mount...");
2785 lockdep_on();
2757 return -ENOMEM; 2786 return -ENOMEM;
2758 } 2787 }
2759 /* Initialize ntfs_volume structure. */ 2788 /* Initialize ntfs_volume structure. */
@@ -2940,6 +2969,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
2940 mutex_unlock(&ntfs_lock); 2969 mutex_unlock(&ntfs_lock);
2941 sb->s_export_op = &ntfs_export_ops; 2970 sb->s_export_op = &ntfs_export_ops;
2942 lock_kernel(); 2971 lock_kernel();
2972 lockdep_on();
2943 return 0; 2973 return 0;
2944 } 2974 }
2945 ntfs_error(sb, "Failed to allocate root directory."); 2975 ntfs_error(sb, "Failed to allocate root directory.");
@@ -3059,6 +3089,7 @@ err_out_now:
3059 sb->s_fs_info = NULL; 3089 sb->s_fs_info = NULL;
3060 kfree(vol); 3090 kfree(vol);
3061 ntfs_debug("Failed, returning -EINVAL."); 3091 ntfs_debug("Failed, returning -EINVAL.");
3092 lockdep_on();
3062 return -EINVAL; 3093 return -EINVAL;
3063} 3094}
3064 3095
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 28eb3c886034..5567328f1041 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2203,7 +2203,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
2203 size_t towrite = len; 2203 size_t towrite = len;
2204 struct buffer_head tmp_bh, *bh; 2204 struct buffer_head tmp_bh, *bh;
2205 2205
2206 mutex_lock(&inode->i_mutex); 2206 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
2207 while (towrite > 0) { 2207 while (towrite > 0) {
2208 tocopy = sb->s_blocksize - offset < towrite ? 2208 tocopy = sb->s_blocksize - offset < towrite ?
2209 sb->s_blocksize - offset : towrite; 2209 sb->s_blocksize - offset : towrite;
diff --git a/fs/super.c b/fs/super.c
index 9b780c42d845..6d4e8174b6db 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -53,7 +53,7 @@ DEFINE_SPINLOCK(sb_lock);
53 * Allocates and initializes a new &struct super_block. alloc_super() 53 * Allocates and initializes a new &struct super_block. alloc_super()
54 * returns a pointer new superblock or %NULL if allocation had failed. 54 * returns a pointer new superblock or %NULL if allocation had failed.
55 */ 55 */
56static struct super_block *alloc_super(void) 56static struct super_block *alloc_super(struct file_system_type *type)
57{ 57{
58 struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); 58 struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER);
59 static struct super_operations default_op; 59 static struct super_operations default_op;
@@ -72,6 +72,13 @@ static struct super_block *alloc_super(void)
72 INIT_LIST_HEAD(&s->s_inodes); 72 INIT_LIST_HEAD(&s->s_inodes);
73 init_rwsem(&s->s_umount); 73 init_rwsem(&s->s_umount);
74 mutex_init(&s->s_lock); 74 mutex_init(&s->s_lock);
75 lockdep_set_class(&s->s_umount, &type->s_umount_key);
76 /*
77 * The locking rules for s_lock are up to the
78 * filesystem. For example ext3fs has different
79 * lock ordering than usbfs:
80 */
81 lockdep_set_class(&s->s_lock, &type->s_lock_key);
75 down_write(&s->s_umount); 82 down_write(&s->s_umount);
76 s->s_count = S_BIAS; 83 s->s_count = S_BIAS;
77 atomic_set(&s->s_active, 1); 84 atomic_set(&s->s_active, 1);
@@ -295,7 +302,7 @@ retry:
295 } 302 }
296 if (!s) { 303 if (!s) {
297 spin_unlock(&sb_lock); 304 spin_unlock(&sb_lock);
298 s = alloc_super(); 305 s = alloc_super(type);
299 if (!s) 306 if (!s)
300 return ERR_PTR(-ENOMEM); 307 return ERR_PTR(-ENOMEM);
301 goto retry; 308 goto retry;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 19a99726e58d..992ee0b87cc3 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1326,7 +1326,7 @@ static ssize_t ufs_quota_write(struct super_block *sb, int type,
1326 size_t towrite = len; 1326 size_t towrite = len;
1327 struct buffer_head *bh; 1327 struct buffer_head *bh;
1328 1328
1329 mutex_lock(&inode->i_mutex); 1329 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
1330 while (towrite > 0) { 1330 while (towrite > 0) {
1331 tocopy = sb->s_blocksize - offset < towrite ? 1331 tocopy = sb->s_blocksize - offset < towrite ?
1332 sb->s_blocksize - offset : towrite; 1332 sb->s_blocksize - offset : towrite;
diff --git a/include/asm-alpha/rwsem.h b/include/asm-alpha/rwsem.h
index fafdd4f7010a..1570c0b54336 100644
--- a/include/asm-alpha/rwsem.h
+++ b/include/asm-alpha/rwsem.h
@@ -36,20 +36,11 @@ struct rw_semaphore {
36#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 36#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
37 spinlock_t wait_lock; 37 spinlock_t wait_lock;
38 struct list_head wait_list; 38 struct list_head wait_list;
39#if RWSEM_DEBUG
40 int debug;
41#endif
42}; 39};
43 40
44#if RWSEM_DEBUG
45#define __RWSEM_DEBUG_INIT , 0
46#else
47#define __RWSEM_DEBUG_INIT /* */
48#endif
49
50#define __RWSEM_INITIALIZER(name) \ 41#define __RWSEM_INITIALIZER(name) \
51 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ 42 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
52 LIST_HEAD_INIT((name).wait_list) __RWSEM_DEBUG_INIT } 43 LIST_HEAD_INIT((name).wait_list) }
53 44
54#define DECLARE_RWSEM(name) \ 45#define DECLARE_RWSEM(name) \
55 struct rw_semaphore name = __RWSEM_INITIALIZER(name) 46 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
@@ -59,9 +50,6 @@ static inline void init_rwsem(struct rw_semaphore *sem)
59 sem->count = RWSEM_UNLOCKED_VALUE; 50 sem->count = RWSEM_UNLOCKED_VALUE;
60 spin_lock_init(&sem->wait_lock); 51 spin_lock_init(&sem->wait_lock);
61 INIT_LIST_HEAD(&sem->wait_list); 52 INIT_LIST_HEAD(&sem->wait_list);
62#if RWSEM_DEBUG
63 sem->debug = 0;
64#endif
65} 53}
66 54
67static inline void __down_read(struct rw_semaphore *sem) 55static inline void __down_read(struct rw_semaphore *sem)
diff --git a/include/asm-generic/mutex-null.h b/include/asm-generic/mutex-null.h
index 5cf8b7ce0c45..254a126ede5c 100644
--- a/include/asm-generic/mutex-null.h
+++ b/include/asm-generic/mutex-null.h
@@ -10,15 +10,10 @@
10#ifndef _ASM_GENERIC_MUTEX_NULL_H 10#ifndef _ASM_GENERIC_MUTEX_NULL_H
11#define _ASM_GENERIC_MUTEX_NULL_H 11#define _ASM_GENERIC_MUTEX_NULL_H
12 12
13/* extra parameter only needed for mutex debugging: */ 13#define __mutex_fastpath_lock(count, fail_fn) fail_fn(count)
14#ifndef __IP__ 14#define __mutex_fastpath_lock_retval(count, fail_fn) fail_fn(count)
15# define __IP__ 15#define __mutex_fastpath_unlock(count, fail_fn) fail_fn(count)
16#endif 16#define __mutex_fastpath_trylock(count, fail_fn) fail_fn(count)
17 17#define __mutex_slowpath_needs_to_unlock() 1
18#define __mutex_fastpath_lock(count, fail_fn) fail_fn(count __RET_IP__)
19#define __mutex_fastpath_lock_retval(count, fail_fn) fail_fn(count __RET_IP__)
20#define __mutex_fastpath_unlock(count, fail_fn) fail_fn(count __RET_IP__)
21#define __mutex_fastpath_trylock(count, fail_fn) fail_fn(count)
22#define __mutex_slowpath_needs_to_unlock() 1
23 18
24#endif 19#endif
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index c74521157461..e160e04290fb 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -7,6 +7,8 @@
7 7
8extern unsigned long __per_cpu_offset[NR_CPUS]; 8extern unsigned long __per_cpu_offset[NR_CPUS];
9 9
10#define per_cpu_offset(x) (__per_cpu_offset[x])
11
10/* Separate out the type, so (int[3], foo) works. */ 12/* Separate out the type, so (int[3], foo) works. */
11#define DEFINE_PER_CPU(type, name) \ 13#define DEFINE_PER_CPU(type, name) \
12 __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name 14 __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
diff --git a/include/asm-i386/irqflags.h b/include/asm-i386/irqflags.h
new file mode 100644
index 000000000000..e1bdb97c07fa
--- /dev/null
+++ b/include/asm-i386/irqflags.h
@@ -0,0 +1,127 @@
1/*
2 * include/asm-i386/irqflags.h
3 *
4 * IRQ flags handling
5 *
6 * This file gets included from lowlevel asm headers too, to provide
7 * wrapped versions of the local_irq_*() APIs, based on the
8 * raw_local_irq_*() functions from the lowlevel headers.
9 */
10#ifndef _ASM_IRQFLAGS_H
11#define _ASM_IRQFLAGS_H
12
13#ifndef __ASSEMBLY__
14
15static inline unsigned long __raw_local_save_flags(void)
16{
17 unsigned long flags;
18
19 __asm__ __volatile__(
20 "pushfl ; popl %0"
21 : "=g" (flags)
22 : /* no input */
23 );
24
25 return flags;
26}
27
28#define raw_local_save_flags(flags) \
29 do { (flags) = __raw_local_save_flags(); } while (0)
30
31static inline void raw_local_irq_restore(unsigned long flags)
32{
33 __asm__ __volatile__(
34 "pushl %0 ; popfl"
35 : /* no output */
36 :"g" (flags)
37 :"memory", "cc"
38 );
39}
40
41static inline void raw_local_irq_disable(void)
42{
43 __asm__ __volatile__("cli" : : : "memory");
44}
45
46static inline void raw_local_irq_enable(void)
47{
48 __asm__ __volatile__("sti" : : : "memory");
49}
50
51/*
52 * Used in the idle loop; sti takes one instruction cycle
53 * to complete:
54 */
55static inline void raw_safe_halt(void)
56{
57 __asm__ __volatile__("sti; hlt" : : : "memory");
58}
59
60/*
61 * Used when interrupts are already enabled or to
62 * shutdown the processor:
63 */
64static inline void halt(void)
65{
66 __asm__ __volatile__("hlt": : :"memory");
67}
68
69static inline int raw_irqs_disabled_flags(unsigned long flags)
70{
71 return !(flags & (1 << 9));
72}
73
74static inline int raw_irqs_disabled(void)
75{
76 unsigned long flags = __raw_local_save_flags();
77
78 return raw_irqs_disabled_flags(flags);
79}
80
81/*
82 * For spinlocks, etc:
83 */
84static inline unsigned long __raw_local_irq_save(void)
85{
86 unsigned long flags = __raw_local_save_flags();
87
88 raw_local_irq_disable();
89
90 return flags;
91}
92
93#define raw_local_irq_save(flags) \
94 do { (flags) = __raw_local_irq_save(); } while (0)
95
96#endif /* __ASSEMBLY__ */
97
98/*
99 * Do the CPU's IRQ-state tracing from assembly code. We call a
100 * C function, so save all the C-clobbered registers:
101 */
102#ifdef CONFIG_TRACE_IRQFLAGS
103
104# define TRACE_IRQS_ON \
105 pushl %eax; \
106 pushl %ecx; \
107 pushl %edx; \
108 call trace_hardirqs_on; \
109 popl %edx; \
110 popl %ecx; \
111 popl %eax;
112
113# define TRACE_IRQS_OFF \
114 pushl %eax; \
115 pushl %ecx; \
116 pushl %edx; \
117 call trace_hardirqs_off; \
118 popl %edx; \
119 popl %ecx; \
120 popl %eax;
121
122#else
123# define TRACE_IRQS_ON
124# define TRACE_IRQS_OFF
125#endif
126
127#endif
diff --git a/include/asm-i386/rwsem.h b/include/asm-i386/rwsem.h
index be4ab859238e..2f07601562e7 100644
--- a/include/asm-i386/rwsem.h
+++ b/include/asm-i386/rwsem.h
@@ -40,6 +40,7 @@
40 40
41#include <linux/list.h> 41#include <linux/list.h>
42#include <linux/spinlock.h> 42#include <linux/spinlock.h>
43#include <linux/lockdep.h>
43 44
44struct rwsem_waiter; 45struct rwsem_waiter;
45 46
@@ -61,36 +62,34 @@ struct rw_semaphore {
61#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 62#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
62 spinlock_t wait_lock; 63 spinlock_t wait_lock;
63 struct list_head wait_list; 64 struct list_head wait_list;
64#if RWSEM_DEBUG 65#ifdef CONFIG_DEBUG_LOCK_ALLOC
65 int debug; 66 struct lockdep_map dep_map;
66#endif 67#endif
67}; 68};
68 69
69/* 70#ifdef CONFIG_DEBUG_LOCK_ALLOC
70 * initialisation 71# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
71 */
72#if RWSEM_DEBUG
73#define __RWSEM_DEBUG_INIT , 0
74#else 72#else
75#define __RWSEM_DEBUG_INIT /* */ 73# define __RWSEM_DEP_MAP_INIT(lockname)
76#endif 74#endif
77 75
76
78#define __RWSEM_INITIALIZER(name) \ 77#define __RWSEM_INITIALIZER(name) \
79{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \ 78{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \
80 __RWSEM_DEBUG_INIT } 79 __RWSEM_DEP_MAP_INIT(name) }
81 80
82#define DECLARE_RWSEM(name) \ 81#define DECLARE_RWSEM(name) \
83 struct rw_semaphore name = __RWSEM_INITIALIZER(name) 82 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
84 83
85static inline void init_rwsem(struct rw_semaphore *sem) 84extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
86{ 85 struct lock_class_key *key);
87 sem->count = RWSEM_UNLOCKED_VALUE; 86
88 spin_lock_init(&sem->wait_lock); 87#define init_rwsem(sem) \
89 INIT_LIST_HEAD(&sem->wait_list); 88do { \
90#if RWSEM_DEBUG 89 static struct lock_class_key __key; \
91 sem->debug = 0; 90 \
92#endif 91 __init_rwsem((sem), #sem, &__key); \
93} 92} while (0)
94 93
95/* 94/*
96 * lock for reading 95 * lock for reading
@@ -143,7 +142,7 @@ LOCK_PREFIX " cmpxchgl %2,%0\n\t"
143/* 142/*
144 * lock for writing 143 * lock for writing
145 */ 144 */
146static inline void __down_write(struct rw_semaphore *sem) 145static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
147{ 146{
148 int tmp; 147 int tmp;
149 148
@@ -167,6 +166,11 @@ LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the
167 : "memory", "cc"); 166 : "memory", "cc");
168} 167}
169 168
169static inline void __down_write(struct rw_semaphore *sem)
170{
171 __down_write_nested(sem, 0);
172}
173
170/* 174/*
171 * trylock for writing -- returns 1 if successful, 0 if contention 175 * trylock for writing -- returns 1 if successful, 0 if contention
172 */ 176 */
diff --git a/include/asm-i386/spinlock.h b/include/asm-i386/spinlock.h
index 04ba30234c48..87c40f830653 100644
--- a/include/asm-i386/spinlock.h
+++ b/include/asm-i386/spinlock.h
@@ -31,6 +31,11 @@
31 "jmp 1b\n" \ 31 "jmp 1b\n" \
32 "3:\n\t" 32 "3:\n\t"
33 33
34/*
35 * NOTE: there's an irqs-on section here, which normally would have to be
36 * irq-traced, but on CONFIG_TRACE_IRQFLAGS we never use
37 * __raw_spin_lock_string_flags().
38 */
34#define __raw_spin_lock_string_flags \ 39#define __raw_spin_lock_string_flags \
35 "\n1:\t" \ 40 "\n1:\t" \
36 "lock ; decb %0\n\t" \ 41 "lock ; decb %0\n\t" \
@@ -63,6 +68,12 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock)
63 "=m" (lock->slock) : : "memory"); 68 "=m" (lock->slock) : : "memory");
64} 69}
65 70
71/*
72 * It is easier for the lock validator if interrupts are not re-enabled
73 * in the middle of a lock-acquire. This is a performance feature anyway
74 * so we turn it off:
75 */
76#ifndef CONFIG_PROVE_LOCKING
66static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) 77static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
67{ 78{
68 alternative_smp( 79 alternative_smp(
@@ -70,6 +81,7 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long fla
70 __raw_spin_lock_string_up, 81 __raw_spin_lock_string_up,
71 "=m" (lock->slock) : "r" (flags) : "memory"); 82 "=m" (lock->slock) : "r" (flags) : "memory");
72} 83}
84#endif
73 85
74static inline int __raw_spin_trylock(raw_spinlock_t *lock) 86static inline int __raw_spin_trylock(raw_spinlock_t *lock)
75{ 87{
diff --git a/include/asm-i386/system.h b/include/asm-i386/system.h
index cab0180567f9..db398d88b1d9 100644
--- a/include/asm-i386/system.h
+++ b/include/asm-i386/system.h
@@ -456,25 +456,7 @@ static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long l
456 456
457#define set_wmb(var, value) do { var = value; wmb(); } while (0) 457#define set_wmb(var, value) do { var = value; wmb(); } while (0)
458 458
459/* interrupt control.. */ 459#include <linux/irqflags.h>
460#define local_save_flags(x) do { typecheck(unsigned long,x); __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */); } while (0)
461#define local_irq_restore(x) do { typecheck(unsigned long,x); __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc"); } while (0)
462#define local_irq_disable() __asm__ __volatile__("cli": : :"memory")
463#define local_irq_enable() __asm__ __volatile__("sti": : :"memory")
464/* used in the idle loop; sti takes one instruction cycle to complete */
465#define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory")
466/* used when interrupts are already enabled or to shutdown the processor */
467#define halt() __asm__ __volatile__("hlt": : :"memory")
468
469#define irqs_disabled() \
470({ \
471 unsigned long flags; \
472 local_save_flags(flags); \
473 !(flags & (1<<9)); \
474})
475
476/* For spinlocks etc */
477#define local_irq_save(x) __asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory")
478 460
479/* 461/*
480 * disable hlt during certain critical i/o operations 462 * disable hlt during certain critical i/o operations
diff --git a/include/asm-ia64/irq.h b/include/asm-ia64/irq.h
index 8acb00190d5a..79479e2c6966 100644
--- a/include/asm-ia64/irq.h
+++ b/include/asm-ia64/irq.h
@@ -14,8 +14,6 @@
14#define NR_IRQS 256 14#define NR_IRQS 256
15#define NR_IRQ_VECTORS NR_IRQS 15#define NR_IRQ_VECTORS NR_IRQS
16 16
17#define IRQF_PERCPU 0x02000000
18
19static __inline__ int 17static __inline__ int
20irq_canonicalize (int irq) 18irq_canonicalize (int irq)
21{ 19{
diff --git a/include/asm-ia64/percpu.h b/include/asm-ia64/percpu.h
index 24d898b650c5..fbe5cf3ab8dc 100644
--- a/include/asm-ia64/percpu.h
+++ b/include/asm-ia64/percpu.h
@@ -36,6 +36,7 @@
36#ifdef CONFIG_SMP 36#ifdef CONFIG_SMP
37 37
38extern unsigned long __per_cpu_offset[NR_CPUS]; 38extern unsigned long __per_cpu_offset[NR_CPUS];
39#define per_cpu_offset(x) (__per_cpu_offset(x))
39 40
40/* Equal to __per_cpu_offset[smp_processor_id()], but faster to access: */ 41/* Equal to __per_cpu_offset[smp_processor_id()], but faster to access: */
41DECLARE_PER_CPU(unsigned long, local_per_cpu_offset); 42DECLARE_PER_CPU(unsigned long, local_per_cpu_offset);
diff --git a/include/asm-ia64/rwsem.h b/include/asm-ia64/rwsem.h
index 1327c91ea39c..2d1640cc240a 100644
--- a/include/asm-ia64/rwsem.h
+++ b/include/asm-ia64/rwsem.h
@@ -33,9 +33,6 @@ struct rw_semaphore {
33 signed long count; 33 signed long count;
34 spinlock_t wait_lock; 34 spinlock_t wait_lock;
35 struct list_head wait_list; 35 struct list_head wait_list;
36#if RWSEM_DEBUG
37 int debug;
38#endif
39}; 36};
40 37
41#define RWSEM_UNLOCKED_VALUE __IA64_UL_CONST(0x0000000000000000) 38#define RWSEM_UNLOCKED_VALUE __IA64_UL_CONST(0x0000000000000000)
@@ -45,19 +42,9 @@ struct rw_semaphore {
45#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS 42#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
46#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 43#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
47 44
48/*
49 * initialization
50 */
51#if RWSEM_DEBUG
52#define __RWSEM_DEBUG_INIT , 0
53#else
54#define __RWSEM_DEBUG_INIT /* */
55#endif
56
57#define __RWSEM_INITIALIZER(name) \ 45#define __RWSEM_INITIALIZER(name) \
58 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ 46 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
59 LIST_HEAD_INIT((name).wait_list) \ 47 LIST_HEAD_INIT((name).wait_list) }
60 __RWSEM_DEBUG_INIT }
61 48
62#define DECLARE_RWSEM(name) \ 49#define DECLARE_RWSEM(name) \
63 struct rw_semaphore name = __RWSEM_INITIALIZER(name) 50 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
@@ -73,9 +60,6 @@ init_rwsem (struct rw_semaphore *sem)
73 sem->count = RWSEM_UNLOCKED_VALUE; 60 sem->count = RWSEM_UNLOCKED_VALUE;
74 spin_lock_init(&sem->wait_lock); 61 spin_lock_init(&sem->wait_lock);
75 INIT_LIST_HEAD(&sem->wait_list); 62 INIT_LIST_HEAD(&sem->wait_list);
76#if RWSEM_DEBUG
77 sem->debug = 0;
78#endif
79} 63}
80 64
81/* 65/*
diff --git a/include/asm-ia64/thread_info.h b/include/asm-ia64/thread_info.h
index 8bc9869e5765..8adcde0934ca 100644
--- a/include/asm-ia64/thread_info.h
+++ b/include/asm-ia64/thread_info.h
@@ -68,7 +68,7 @@ struct thread_info {
68#define end_of_stack(p) (unsigned long *)((void *)(p) + IA64_RBS_OFFSET) 68#define end_of_stack(p) (unsigned long *)((void *)(p) + IA64_RBS_OFFSET)
69 69
70#define __HAVE_ARCH_TASK_STRUCT_ALLOCATOR 70#define __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
71#define alloc_task_struct() ((task_t *)__get_free_pages(GFP_KERNEL | __GFP_COMP, KERNEL_STACK_SIZE_ORDER)) 71#define alloc_task_struct() ((struct task_struct *)__get_free_pages(GFP_KERNEL | __GFP_COMP, KERNEL_STACK_SIZE_ORDER))
72#define free_task_struct(tsk) free_pages((unsigned long) (tsk), KERNEL_STACK_SIZE_ORDER) 72#define free_task_struct(tsk) free_pages((unsigned long) (tsk), KERNEL_STACK_SIZE_ORDER)
73 73
74#endif /* !__ASSEMBLY */ 74#endif /* !__ASSEMBLY */
diff --git a/include/asm-m32r/system.h b/include/asm-m32r/system.h
index 66c4742f09e7..311cebf44eff 100644
--- a/include/asm-m32r/system.h
+++ b/include/asm-m32r/system.h
@@ -18,7 +18,7 @@
18 * switch_to(prev, next) should switch from task `prev' to `next' 18 * switch_to(prev, next) should switch from task `prev' to `next'
19 * `prev' will never be the same as `next'. 19 * `prev' will never be the same as `next'.
20 * 20 *
21 * `next' and `prev' should be task_t, but it isn't always defined 21 * `next' and `prev' should be struct task_struct, but it isn't always defined
22 */ 22 */
23 23
24#define switch_to(prev, next, last) do { \ 24#define switch_to(prev, next, last) do { \
diff --git a/include/asm-powerpc/i8259.h b/include/asm-powerpc/i8259.h
index 0392159e16e4..c80e113052cd 100644
--- a/include/asm-powerpc/i8259.h
+++ b/include/asm-powerpc/i8259.h
@@ -4,11 +4,13 @@
4 4
5#include <linux/irq.h> 5#include <linux/irq.h>
6 6
7extern struct hw_interrupt_type i8259_pic; 7#ifdef CONFIG_PPC_MERGE
8 8extern void i8259_init(struct device_node *node, unsigned long intack_addr);
9extern unsigned int i8259_irq(struct pt_regs *regs);
10#else
9extern void i8259_init(unsigned long intack_addr, int offset); 11extern void i8259_init(unsigned long intack_addr, int offset);
10extern int i8259_irq(struct pt_regs *regs); 12extern int i8259_irq(struct pt_regs *regs);
11extern int i8259_irq_cascade(struct pt_regs *regs, void *unused); 13#endif
12 14
13#endif /* __KERNEL__ */ 15#endif /* __KERNEL__ */
14#endif /* _ASM_POWERPC_I8259_H */ 16#endif /* _ASM_POWERPC_I8259_H */
diff --git a/include/asm-powerpc/irq.h b/include/asm-powerpc/irq.h
index eb5f33e1977a..e05754752028 100644
--- a/include/asm-powerpc/irq.h
+++ b/include/asm-powerpc/irq.h
@@ -9,26 +9,14 @@
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12#include <linux/config.h>
12#include <linux/threads.h> 13#include <linux/threads.h>
14#include <linux/list.h>
15#include <linux/radix-tree.h>
13 16
14#include <asm/types.h> 17#include <asm/types.h>
15#include <asm/atomic.h> 18#include <asm/atomic.h>
16 19
17/* this number is used when no interrupt has been assigned */
18#define NO_IRQ (-1)
19
20/*
21 * These constants are used for passing information about interrupt
22 * signal polarity and level/edge sensing to the low-level PIC chip
23 * drivers.
24 */
25#define IRQ_SENSE_MASK 0x1
26#define IRQ_SENSE_LEVEL 0x1 /* interrupt on active level */
27#define IRQ_SENSE_EDGE 0x0 /* interrupt triggered by edge */
28
29#define IRQ_POLARITY_MASK 0x2
30#define IRQ_POLARITY_POSITIVE 0x2 /* high level or low->high edge */
31#define IRQ_POLARITY_NEGATIVE 0x0 /* low level or high->low edge */
32 20
33#define get_irq_desc(irq) (&irq_desc[(irq)]) 21#define get_irq_desc(irq) (&irq_desc[(irq)])
34 22
@@ -36,50 +24,325 @@
36#define for_each_irq(i) \ 24#define for_each_irq(i) \
37 for ((i) = 0; (i) < NR_IRQS; ++(i)) 25 for ((i) = 0; (i) < NR_IRQS; ++(i))
38 26
39#ifdef CONFIG_PPC64 27extern atomic_t ppc_n_lost_interrupts;
40 28
41/* 29#ifdef CONFIG_PPC_MERGE
42 * Maximum number of interrupt sources that we can handle. 30
31/* This number is used when no interrupt has been assigned */
32#define NO_IRQ (0)
33
34/* This is a special irq number to return from get_irq() to tell that
35 * no interrupt happened _and_ ignore it (don't count it as bad). Some
36 * platforms like iSeries rely on that.
43 */ 37 */
38#define NO_IRQ_IGNORE ((unsigned int)-1)
39
40/* Total number of virq in the platform (make it a CONFIG_* option ? */
44#define NR_IRQS 512 41#define NR_IRQS 512
45 42
46/* Interrupt numbers are virtual in case they are sparsely 43/* Number of irqs reserved for the legacy controller */
47 * distributed by the hardware. 44#define NUM_ISA_INTERRUPTS 16
45
46/* This type is the placeholder for a hardware interrupt number. It has to
47 * be big enough to enclose whatever representation is used by a given
48 * platform.
49 */
50typedef unsigned long irq_hw_number_t;
51
52/* Interrupt controller "host" data structure. This could be defined as a
53 * irq domain controller. That is, it handles the mapping between hardware
54 * and virtual interrupt numbers for a given interrupt domain. The host
55 * structure is generally created by the PIC code for a given PIC instance
56 * (though a host can cover more than one PIC if they have a flat number
57 * model). It's the host callbacks that are responsible for setting the
58 * irq_chip on a given irq_desc after it's been mapped.
59 *
60 * The host code and data structures are fairly agnostic to the fact that
61 * we use an open firmware device-tree. We do have references to struct
62 * device_node in two places: in irq_find_host() to find the host matching
63 * a given interrupt controller node, and of course as an argument to its
64 * counterpart host->ops->match() callback. However, those are treated as
65 * generic pointers by the core and the fact that it's actually a device-node
66 * pointer is purely a convention between callers and implementation. This
67 * code could thus be used on other architectures by replacing those two
68 * by some sort of arch-specific void * "token" used to identify interrupt
69 * controllers.
48 */ 70 */
49extern unsigned int virt_irq_to_real_map[NR_IRQS]; 71struct irq_host;
72struct radix_tree_root;
50 73
51/* The maximum virtual IRQ number that we support. This 74/* Functions below are provided by the host and called whenever a new mapping
52 * can be set by the platform and will be reduced by the 75 * is created or an old mapping is disposed. The host can then proceed to
53 * value of __irq_offset_value. It defaults to and is 76 * whatever internal data structures management is required. It also needs
54 * capped by (NR_IRQS - 1). 77 * to setup the irq_desc when returning from map().
55 */ 78 */
56extern unsigned int virt_irq_max; 79struct irq_host_ops {
80 /* Match an interrupt controller device node to a host, returns
81 * 1 on a match
82 */
83 int (*match)(struct irq_host *h, struct device_node *node);
84
85 /* Create or update a mapping between a virtual irq number and a hw
86 * irq number. This can be called several times for the same mapping
87 * but with different flags, though unmap shall always be called
88 * before the virq->hw mapping is changed.
89 */
90 int (*map)(struct irq_host *h, unsigned int virq,
91 irq_hw_number_t hw, unsigned int flags);
92
93 /* Dispose of such a mapping */
94 void (*unmap)(struct irq_host *h, unsigned int virq);
95
96 /* Translate device-tree interrupt specifier from raw format coming
97 * from the firmware to a irq_hw_number_t (interrupt line number) and
98 * trigger flags that can be passed to irq_create_mapping().
99 * If no translation is provided, raw format is assumed to be one cell
100 * for interrupt line and default sense.
101 */
102 int (*xlate)(struct irq_host *h, struct device_node *ctrler,
103 u32 *intspec, unsigned int intsize,
104 irq_hw_number_t *out_hwirq, unsigned int *out_flags);
105};
106
107struct irq_host {
108 struct list_head link;
109
110 /* type of reverse mapping technique */
111 unsigned int revmap_type;
112#define IRQ_HOST_MAP_LEGACY 0 /* legacy 8259, gets irqs 1..15 */
113#define IRQ_HOST_MAP_NOMAP 1 /* no fast reverse mapping */
114#define IRQ_HOST_MAP_LINEAR 2 /* linear map of interrupts */
115#define IRQ_HOST_MAP_TREE 3 /* radix tree */
116 union {
117 struct {
118 unsigned int size;
119 unsigned int *revmap;
120 } linear;
121 struct radix_tree_root tree;
122 } revmap_data;
123 struct irq_host_ops *ops;
124 void *host_data;
125 irq_hw_number_t inval_irq;
126};
127
128/* The main irq map itself is an array of NR_IRQ entries containing the
129 * associate host and irq number. An entry with a host of NULL is free.
130 * An entry can be allocated if it's free, the allocator always then sets
131 * hwirq first to the host's invalid irq number and then fills ops.
132 */
133struct irq_map_entry {
134 irq_hw_number_t hwirq;
135 struct irq_host *host;
136};
137
138extern struct irq_map_entry irq_map[NR_IRQS];
139
57 140
58/* Create a mapping for a real_irq if it doesn't already exist. 141/***
59 * Return the virtual irq as a convenience. 142 * irq_alloc_host - Allocate a new irq_host data structure
143 * @node: device-tree node of the interrupt controller
144 * @revmap_type: type of reverse mapping to use
145 * @revmap_arg: for IRQ_HOST_MAP_LINEAR linear only: size of the map
146 * @ops: map/unmap host callbacks
147 * @inval_irq: provide a hw number in that host space that is always invalid
148 *
149 * Allocates and initialize and irq_host structure. Note that in the case of
150 * IRQ_HOST_MAP_LEGACY, the map() callback will be called before this returns
151 * for all legacy interrupts except 0 (which is always the invalid irq for
152 * a legacy controller). For a IRQ_HOST_MAP_LINEAR, the map is allocated by
153 * this call as well. For a IRQ_HOST_MAP_TREE, the radix tree will be allocated
154 * later during boot automatically (the reverse mapping will use the slow path
155 * until that happens).
156 */
157extern struct irq_host *irq_alloc_host(unsigned int revmap_type,
158 unsigned int revmap_arg,
159 struct irq_host_ops *ops,
160 irq_hw_number_t inval_irq);
161
162
163/***
164 * irq_find_host - Locates a host for a given device node
165 * @node: device-tree node of the interrupt controller
166 */
167extern struct irq_host *irq_find_host(struct device_node *node);
168
169
170/***
171 * irq_set_default_host - Set a "default" host
172 * @host: default host pointer
173 *
174 * For convenience, it's possible to set a "default" host that will be used
175 * whenever NULL is passed to irq_create_mapping(). It makes life easier for
176 * platforms that want to manipulate a few hard coded interrupt numbers that
177 * aren't properly represented in the device-tree.
178 */
179extern void irq_set_default_host(struct irq_host *host);
180
181
182/***
183 * irq_set_virq_count - Set the maximum number of virt irqs
184 * @count: number of linux virtual irqs, capped with NR_IRQS
185 *
186 * This is mainly for use by platforms like iSeries who want to program
187 * the virtual irq number in the controller to avoid the reverse mapping
188 */
189extern void irq_set_virq_count(unsigned int count);
190
191
192/***
193 * irq_create_mapping - Map a hardware interrupt into linux virq space
194 * @host: host owning this hardware interrupt or NULL for default host
195 * @hwirq: hardware irq number in that host space
196 * @flags: flags passed to the controller. contains the trigger type among
197 * others. Use IRQ_TYPE_* defined in include/linux/irq.h
198 *
199 * Only one mapping per hardware interrupt is permitted. Returns a linux
200 * virq number. The flags can be used to provide sense information to the
201 * controller (typically extracted from the device-tree). If no information
202 * is passed, the controller defaults will apply (for example, xics can only
203 * do edge so flags are irrelevant for some pseries specific irqs).
204 *
205 * The device-tree generally contains the trigger info in an encoding that is
206 * specific to a given type of controller. In that case, you can directly use
207 * host->ops->trigger_xlate() to translate that.
208 *
209 * It is recommended that new PICs that don't have existing OF bindings chose
210 * to use a representation of triggers identical to linux.
211 */
212extern unsigned int irq_create_mapping(struct irq_host *host,
213 irq_hw_number_t hwirq,
214 unsigned int flags);
215
216
217/***
218 * irq_dispose_mapping - Unmap an interrupt
219 * @virq: linux virq number of the interrupt to unmap
220 */
221extern void irq_dispose_mapping(unsigned int virq);
222
223/***
224 * irq_find_mapping - Find a linux virq from an hw irq number.
225 * @host: host owning this hardware interrupt
226 * @hwirq: hardware irq number in that host space
227 *
228 * This is a slow path, for use by generic code. It's expected that an
229 * irq controller implementation directly calls the appropriate low level
230 * mapping function.
60 */ 231 */
61int virt_irq_create_mapping(unsigned int real_irq); 232extern unsigned int irq_find_mapping(struct irq_host *host,
62void virt_irq_init(void); 233 irq_hw_number_t hwirq);
63 234
64static inline unsigned int virt_irq_to_real(unsigned int virt_irq) 235
236/***
237 * irq_radix_revmap - Find a linux virq from a hw irq number.
238 * @host: host owning this hardware interrupt
239 * @hwirq: hardware irq number in that host space
240 *
241 * This is a fast path, for use by irq controller code that uses radix tree
242 * revmaps
243 */
244extern unsigned int irq_radix_revmap(struct irq_host *host,
245 irq_hw_number_t hwirq);
246
247/***
248 * irq_linear_revmap - Find a linux virq from a hw irq number.
249 * @host: host owning this hardware interrupt
250 * @hwirq: hardware irq number in that host space
251 *
252 * This is a fast path, for use by irq controller code that uses linear
253 * revmaps. It does fallback to the slow path if the revmap doesn't exist
254 * yet and will create the revmap entry with appropriate locking
255 */
256
257extern unsigned int irq_linear_revmap(struct irq_host *host,
258 irq_hw_number_t hwirq);
259
260
261
262/***
263 * irq_alloc_virt - Allocate virtual irq numbers
264 * @host: host owning these new virtual irqs
265 * @count: number of consecutive numbers to allocate
266 * @hint: pass a hint number, the allocator will try to use a 1:1 mapping
267 *
268 * This is a low level function that is used internally by irq_create_mapping()
269 * and that can be used by some irq controllers implementations for things
270 * like allocating ranges of numbers for MSIs. The revmaps are left untouched.
271 */
272extern unsigned int irq_alloc_virt(struct irq_host *host,
273 unsigned int count,
274 unsigned int hint);
275
276/***
277 * irq_free_virt - Free virtual irq numbers
278 * @virq: virtual irq number of the first interrupt to free
279 * @count: number of interrupts to free
280 *
281 * This function is the opposite of irq_alloc_virt. It will not clear reverse
282 * maps, this should be done previously by unmap'ing the interrupt. In fact,
283 * all interrupts covered by the range being freed should have been unmapped
284 * prior to calling this.
285 */
286extern void irq_free_virt(unsigned int virq, unsigned int count);
287
288
289/* -- OF helpers -- */
290
291/* irq_create_of_mapping - Map a hardware interrupt into linux virq space
292 * @controller: Device node of the interrupt controller
293 * @inspec: Interrupt specifier from the device-tree
294 * @intsize: Size of the interrupt specifier from the device-tree
295 *
296 * This function is identical to irq_create_mapping except that it takes
297 * as input informations straight from the device-tree (typically the results
298 * of the of_irq_map_*() functions
299 */
300extern unsigned int irq_create_of_mapping(struct device_node *controller,
301 u32 *intspec, unsigned int intsize);
302
303
304/* irq_of_parse_and_map - Parse nad Map an interrupt into linux virq space
305 * @device: Device node of the device whose interrupt is to be mapped
306 * @index: Index of the interrupt to map
307 *
308 * This function is a wrapper that chains of_irq_map_one() and
309 * irq_create_of_mapping() to make things easier to callers
310 */
311extern unsigned int irq_of_parse_and_map(struct device_node *dev, int index);
312
313/* -- End OF helpers -- */
314
315/***
316 * irq_early_init - Init irq remapping subsystem
317 */
318extern void irq_early_init(void);
319
320static __inline__ int irq_canonicalize(int irq)
65{ 321{
66 return virt_irq_to_real_map[virt_irq]; 322 return irq;
67} 323}
68 324
69extern unsigned int real_irq_to_virt_slowpath(unsigned int real_irq); 325
326#else /* CONFIG_PPC_MERGE */
327
328/* This number is used when no interrupt has been assigned */
329#define NO_IRQ (-1)
330#define NO_IRQ_IGNORE (-2)
331
70 332
71/* 333/*
72 * List of interrupt controllers. 334 * These constants are used for passing information about interrupt
335 * signal polarity and level/edge sensing to the low-level PIC chip
336 * drivers.
73 */ 337 */
74#define IC_INVALID 0 338#define IRQ_SENSE_MASK 0x1
75#define IC_OPEN_PIC 1 339#define IRQ_SENSE_LEVEL 0x1 /* interrupt on active level */
76#define IC_PPC_XIC 2 340#define IRQ_SENSE_EDGE 0x0 /* interrupt triggered by edge */
77#define IC_CELL_PIC 3
78#define IC_ISERIES 4
79 341
80extern u64 ppc64_interrupt_controller; 342#define IRQ_POLARITY_MASK 0x2
343#define IRQ_POLARITY_POSITIVE 0x2 /* high level or low->high edge */
344#define IRQ_POLARITY_NEGATIVE 0x0 /* low level or high->low edge */
81 345
82#else /* 32-bit */
83 346
84#if defined(CONFIG_40x) 347#if defined(CONFIG_40x)
85#include <asm/ibm4xx.h> 348#include <asm/ibm4xx.h>
@@ -512,16 +775,11 @@ extern u64 ppc64_interrupt_controller;
512 775
513#endif /* CONFIG_8260 */ 776#endif /* CONFIG_8260 */
514 777
515#endif 778#endif /* Whatever way too big #ifdef */
516 779
517#define NR_MASK_WORDS ((NR_IRQS + 31) / 32) 780#define NR_MASK_WORDS ((NR_IRQS + 31) / 32)
518/* pedantic: these are long because they are used with set_bit --RR */ 781/* pedantic: these are long because they are used with set_bit --RR */
519extern unsigned long ppc_cached_irq_mask[NR_MASK_WORDS]; 782extern unsigned long ppc_cached_irq_mask[NR_MASK_WORDS];
520extern atomic_t ppc_n_lost_interrupts;
521
522#define virt_irq_create_mapping(x) (x)
523
524#endif
525 783
526/* 784/*
527 * Because many systems have two overlapping names spaces for 785 * Because many systems have two overlapping names spaces for
@@ -560,6 +818,7 @@ static __inline__ int irq_canonicalize(int irq)
560 irq = 9; 818 irq = 9;
561 return irq; 819 return irq;
562} 820}
821#endif /* CONFIG_PPC_MERGE */
563 822
564extern int distribute_irqs; 823extern int distribute_irqs;
565 824
@@ -579,9 +838,8 @@ extern struct thread_info *softirq_ctx[NR_CPUS];
579 838
580extern void irq_ctx_init(void); 839extern void irq_ctx_init(void);
581extern void call_do_softirq(struct thread_info *tp); 840extern void call_do_softirq(struct thread_info *tp);
582extern int call___do_IRQ(int irq, struct pt_regs *regs, 841extern int call_handle_irq(int irq, void *p1, void *p2,
583 struct thread_info *tp); 842 struct thread_info *tp, void *func);
584
585#else 843#else
586#define irq_ctx_init() 844#define irq_ctx_init()
587 845
diff --git a/include/asm-powerpc/irqflags.h b/include/asm-powerpc/irqflags.h
new file mode 100644
index 000000000000..7970cbaeaa54
--- /dev/null
+++ b/include/asm-powerpc/irqflags.h
@@ -0,0 +1,31 @@
1/*
2 * include/asm-powerpc/irqflags.h
3 *
4 * IRQ flags handling
5 *
6 * This file gets included from lowlevel asm headers too, to provide
7 * wrapped versions of the local_irq_*() APIs, based on the
8 * raw_local_irq_*() macros from the lowlevel headers.
9 */
10#ifndef _ASM_IRQFLAGS_H
11#define _ASM_IRQFLAGS_H
12
13/*
14 * Get definitions for raw_local_save_flags(x), etc.
15 */
16#include <asm-powerpc/hw_irq.h>
17
18/*
19 * Do the CPU's IRQ-state tracing from assembly code. We call a
20 * C function, so save all the C-clobbered registers:
21 */
22#ifdef CONFIG_TRACE_IRQFLAGS
23
24#error No support on PowerPC yet for CONFIG_TRACE_IRQFLAGS
25
26#else
27# define TRACE_IRQS_ON
28# define TRACE_IRQS_OFF
29#endif
30
31#endif
diff --git a/include/asm-powerpc/machdep.h b/include/asm-powerpc/machdep.h
index eba133d149a7..c17c13742401 100644
--- a/include/asm-powerpc/machdep.h
+++ b/include/asm-powerpc/machdep.h
@@ -97,7 +97,7 @@ struct machdep_calls {
97 void (*show_percpuinfo)(struct seq_file *m, int i); 97 void (*show_percpuinfo)(struct seq_file *m, int i);
98 98
99 void (*init_IRQ)(void); 99 void (*init_IRQ)(void);
100 int (*get_irq)(struct pt_regs *); 100 unsigned int (*get_irq)(struct pt_regs *);
101#ifdef CONFIG_KEXEC 101#ifdef CONFIG_KEXEC
102 void (*kexec_cpu_down)(int crash_shutdown, int secondary); 102 void (*kexec_cpu_down)(int crash_shutdown, int secondary);
103#endif 103#endif
diff --git a/include/asm-powerpc/mpic.h b/include/asm-powerpc/mpic.h
index f0d22ac34b96..eb241c99c457 100644
--- a/include/asm-powerpc/mpic.h
+++ b/include/asm-powerpc/mpic.h
@@ -114,9 +114,6 @@
114#define MPIC_VEC_TIMER_1 248 114#define MPIC_VEC_TIMER_1 248
115#define MPIC_VEC_TIMER_0 247 115#define MPIC_VEC_TIMER_0 247
116 116
117/* Type definition of the cascade handler */
118typedef int (*mpic_cascade_t)(struct pt_regs *regs, void *data);
119
120#ifdef CONFIG_MPIC_BROKEN_U3 117#ifdef CONFIG_MPIC_BROKEN_U3
121/* Fixup table entry */ 118/* Fixup table entry */
122struct mpic_irq_fixup 119struct mpic_irq_fixup
@@ -132,10 +129,19 @@ struct mpic_irq_fixup
132/* The instance data of a given MPIC */ 129/* The instance data of a given MPIC */
133struct mpic 130struct mpic
134{ 131{
132 /* The device node of the interrupt controller */
133 struct device_node *of_node;
134
135 /* The remapper for this MPIC */
136 struct irq_host *irqhost;
137
135 /* The "linux" controller struct */ 138 /* The "linux" controller struct */
136 hw_irq_controller hc_irq; 139 struct irq_chip hc_irq;
140#ifdef CONFIG_MPIC_BROKEN_U3
141 struct irq_chip hc_ht_irq;
142#endif
137#ifdef CONFIG_SMP 143#ifdef CONFIG_SMP
138 hw_irq_controller hc_ipi; 144 struct irq_chip hc_ipi;
139#endif 145#endif
140 const char *name; 146 const char *name;
141 /* Flags */ 147 /* Flags */
@@ -144,20 +150,12 @@ struct mpic
144 unsigned int isu_size; 150 unsigned int isu_size;
145 unsigned int isu_shift; 151 unsigned int isu_shift;
146 unsigned int isu_mask; 152 unsigned int isu_mask;
147 /* Offset of irq vector numbers */
148 unsigned int irq_offset;
149 unsigned int irq_count; 153 unsigned int irq_count;
150 /* Offset of ipi vector numbers */
151 unsigned int ipi_offset;
152 /* Number of sources */ 154 /* Number of sources */
153 unsigned int num_sources; 155 unsigned int num_sources;
154 /* Number of CPUs */ 156 /* Number of CPUs */
155 unsigned int num_cpus; 157 unsigned int num_cpus;
156 /* cascade handler */ 158 /* default senses array */
157 mpic_cascade_t cascade;
158 void *cascade_data;
159 unsigned int cascade_vec;
160 /* senses array */
161 unsigned char *senses; 159 unsigned char *senses;
162 unsigned int senses_count; 160 unsigned int senses_count;
163 161
@@ -213,14 +211,11 @@ struct mpic
213 * The values in the array start at the first source of the MPIC, 211 * The values in the array start at the first source of the MPIC,
214 * that is senses[0] correspond to linux irq "irq_offset". 212 * that is senses[0] correspond to linux irq "irq_offset".
215 */ 213 */
216extern struct mpic *mpic_alloc(unsigned long phys_addr, 214extern struct mpic *mpic_alloc(struct device_node *node,
215 unsigned long phys_addr,
217 unsigned int flags, 216 unsigned int flags,
218 unsigned int isu_size, 217 unsigned int isu_size,
219 unsigned int irq_offset,
220 unsigned int irq_count, 218 unsigned int irq_count,
221 unsigned int ipi_offset,
222 unsigned char *senses,
223 unsigned int senses_num,
224 const char *name); 219 const char *name);
225 220
226/* Assign ISUs, to call before mpic_init() 221/* Assign ISUs, to call before mpic_init()
@@ -232,22 +227,27 @@ extern struct mpic *mpic_alloc(unsigned long phys_addr,
232extern void mpic_assign_isu(struct mpic *mpic, unsigned int isu_num, 227extern void mpic_assign_isu(struct mpic *mpic, unsigned int isu_num,
233 unsigned long phys_addr); 228 unsigned long phys_addr);
234 229
230/* Set default sense codes
231 *
232 * @mpic: controller
233 * @senses: array of sense codes
234 * @count: size of above array
235 *
236 * Optionally provide an array (indexed on hardware interrupt numbers
237 * for this MPIC) of default sense codes for the chip. Those are linux
238 * sense codes IRQ_TYPE_*
239 *
240 * The driver gets ownership of the pointer, don't dispose of it or
241 * anything like that. __init only.
242 */
243extern void mpic_set_default_senses(struct mpic *mpic, u8 *senses, int count);
244
245
235/* Initialize the controller. After this has been called, none of the above 246/* Initialize the controller. After this has been called, none of the above
236 * should be called again for this mpic 247 * should be called again for this mpic
237 */ 248 */
238extern void mpic_init(struct mpic *mpic); 249extern void mpic_init(struct mpic *mpic);
239 250
240/* Setup a cascade. Currently, only one cascade is supported this
241 * way, though you can always do a normal request_irq() and add
242 * other cascades this way. You should call this _after_ having
243 * added all the ISUs
244 *
245 * @irq_no: "linux" irq number of the cascade (that is offset'ed vector)
246 * @handler: cascade handler function
247 */
248extern void mpic_setup_cascade(unsigned int irq_no, mpic_cascade_t hanlder,
249 void *data);
250
251/* 251/*
252 * All of the following functions must only be used after the 252 * All of the following functions must only be used after the
253 * ISUs have been assigned and the controller fully initialized 253 * ISUs have been assigned and the controller fully initialized
@@ -284,9 +284,9 @@ extern void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask);
284void smp_mpic_message_pass(int target, int msg); 284void smp_mpic_message_pass(int target, int msg);
285 285
286/* Fetch interrupt from a given mpic */ 286/* Fetch interrupt from a given mpic */
287extern int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs); 287extern unsigned int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs);
288/* This one gets to the primary mpic */ 288/* This one gets to the primary mpic */
289extern int mpic_get_irq(struct pt_regs *regs); 289extern unsigned int mpic_get_irq(struct pt_regs *regs);
290 290
291/* Set the EPIC clock ratio */ 291/* Set the EPIC clock ratio */
292void mpic_set_clk_ratio(struct mpic *mpic, u32 clock_ratio); 292void mpic_set_clk_ratio(struct mpic *mpic, u32 clock_ratio);
@@ -294,8 +294,5 @@ void mpic_set_clk_ratio(struct mpic *mpic, u32 clock_ratio);
294/* Enable/Disable EPIC serial interrupt mode */ 294/* Enable/Disable EPIC serial interrupt mode */
295void mpic_set_serial_int(struct mpic *mpic, int enable); 295void mpic_set_serial_int(struct mpic *mpic, int enable);
296 296
297/* global mpic for pSeries */
298extern struct mpic *pSeries_mpic;
299
300#endif /* __KERNEL__ */ 297#endif /* __KERNEL__ */
301#endif /* _ASM_POWERPC_MPIC_H */ 298#endif /* _ASM_POWERPC_MPIC_H */
diff --git a/include/asm-powerpc/percpu.h b/include/asm-powerpc/percpu.h
index faa1fc703053..2f2e3024fa61 100644
--- a/include/asm-powerpc/percpu.h
+++ b/include/asm-powerpc/percpu.h
@@ -14,6 +14,7 @@
14 14
15#define __per_cpu_offset(cpu) (paca[cpu].data_offset) 15#define __per_cpu_offset(cpu) (paca[cpu].data_offset)
16#define __my_cpu_offset() get_paca()->data_offset 16#define __my_cpu_offset() get_paca()->data_offset
17#define per_cpu_offset(x) (__per_cpu_offset(x))
17 18
18/* Separate out the type, so (int[3], foo) works. */ 19/* Separate out the type, so (int[3], foo) works. */
19#define DEFINE_PER_CPU(type, name) \ 20#define DEFINE_PER_CPU(type, name) \
diff --git a/include/asm-powerpc/prom.h b/include/asm-powerpc/prom.h
index 010d186d095b..b095a285c84b 100644
--- a/include/asm-powerpc/prom.h
+++ b/include/asm-powerpc/prom.h
@@ -64,11 +64,6 @@ struct boot_param_header
64typedef u32 phandle; 64typedef u32 phandle;
65typedef u32 ihandle; 65typedef u32 ihandle;
66 66
67struct interrupt_info {
68 int line;
69 int sense; /* +ve/-ve logic, edge or level, etc. */
70};
71
72struct property { 67struct property {
73 char *name; 68 char *name;
74 int length; 69 int length;
@@ -81,8 +76,6 @@ struct device_node {
81 char *type; 76 char *type;
82 phandle node; 77 phandle node;
83 phandle linux_phandle; 78 phandle linux_phandle;
84 int n_intrs;
85 struct interrupt_info *intrs;
86 char *full_name; 79 char *full_name;
87 80
88 struct property *properties; 81 struct property *properties;
@@ -167,8 +160,8 @@ extern void unflatten_device_tree(void);
167extern void early_init_devtree(void *); 160extern void early_init_devtree(void *);
168extern int device_is_compatible(struct device_node *device, const char *); 161extern int device_is_compatible(struct device_node *device, const char *);
169extern int machine_is_compatible(const char *compat); 162extern int machine_is_compatible(const char *compat);
170extern unsigned char *get_property(struct device_node *node, const char *name, 163extern void *get_property(struct device_node *node, const char *name,
171 int *lenp); 164 int *lenp);
172extern void print_properties(struct device_node *node); 165extern void print_properties(struct device_node *node);
173extern int prom_n_addr_cells(struct device_node* np); 166extern int prom_n_addr_cells(struct device_node* np);
174extern int prom_n_size_cells(struct device_node* np); 167extern int prom_n_size_cells(struct device_node* np);
@@ -204,6 +197,15 @@ extern int release_OF_resource(struct device_node* node, int index);
204 */ 197 */
205 198
206 199
200/* Helper to read a big number */
201static inline u64 of_read_number(u32 *cell, int size)
202{
203 u64 r = 0;
204 while (size--)
205 r = (r << 32) | *(cell++);
206 return r;
207}
208
207/* Translate an OF address block into a CPU physical address 209/* Translate an OF address block into a CPU physical address
208 */ 210 */
209#define OF_BAD_ADDR ((u64)-1) 211#define OF_BAD_ADDR ((u64)-1)
@@ -240,5 +242,83 @@ extern void kdump_move_device_tree(void);
240/* CPU OF node matching */ 242/* CPU OF node matching */
241struct device_node *of_get_cpu_node(int cpu, unsigned int *thread); 243struct device_node *of_get_cpu_node(int cpu, unsigned int *thread);
242 244
245
246/*
247 * OF interrupt mapping
248 */
249
250/* This structure is returned when an interrupt is mapped. The controller
251 * field needs to be put() after use
252 */
253
254#define OF_MAX_IRQ_SPEC 4 /* We handle specifiers of at most 4 cells */
255
256struct of_irq {
257 struct device_node *controller; /* Interrupt controller node */
258 u32 size; /* Specifier size */
259 u32 specifier[OF_MAX_IRQ_SPEC]; /* Specifier copy */
260};
261
262/***
263 * of_irq_map_init - Initialize the irq remapper
264 * @flags: flags defining workarounds to enable
265 *
266 * Some machines have bugs in the device-tree which require certain workarounds
267 * to be applied. Call this before any interrupt mapping attempts to enable
268 * those workarounds.
269 */
270#define OF_IMAP_OLDWORLD_MAC 0x00000001
271#define OF_IMAP_NO_PHANDLE 0x00000002
272
273extern void of_irq_map_init(unsigned int flags);
274
275/***
276 * of_irq_map_raw - Low level interrupt tree parsing
277 * @parent: the device interrupt parent
278 * @intspec: interrupt specifier ("interrupts" property of the device)
279 * @addr: address specifier (start of "reg" property of the device)
280 * @out_irq: structure of_irq filled by this function
281 *
282 * Returns 0 on success and a negative number on error
283 *
284 * This function is a low-level interrupt tree walking function. It
285 * can be used to do a partial walk with synthetized reg and interrupts
286 * properties, for example when resolving PCI interrupts when no device
287 * node exist for the parent.
288 *
289 */
290
291extern int of_irq_map_raw(struct device_node *parent, u32 *intspec, u32 *addr,
292 struct of_irq *out_irq);
293
294
295/***
296 * of_irq_map_one - Resolve an interrupt for a device
297 * @device: the device whose interrupt is to be resolved
298 * @index: index of the interrupt to resolve
299 * @out_irq: structure of_irq filled by this function
300 *
301 * This function resolves an interrupt, walking the tree, for a given
302 * device-tree node. It's the high level pendant to of_irq_map_raw().
303 * It also implements the workarounds for OldWolrd Macs.
304 */
305extern int of_irq_map_one(struct device_node *device, int index,
306 struct of_irq *out_irq);
307
308/***
309 * of_irq_map_pci - Resolve the interrupt for a PCI device
310 * @pdev: the device whose interrupt is to be resolved
311 * @out_irq: structure of_irq filled by this function
312 *
313 * This function resolves the PCI interrupt for a given PCI device. If a
314 * device-node exists for a given pci_dev, it will use normal OF tree
315 * walking. If not, it will implement standard swizzling and walk up the
316 * PCI tree until an device-node is found, at which point it will finish
317 * resolving using the OF tree walking.
318 */
319struct pci_dev;
320extern int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq);
321
322
243#endif /* __KERNEL__ */ 323#endif /* __KERNEL__ */
244#endif /* _POWERPC_PROM_H */ 324#endif /* _POWERPC_PROM_H */
diff --git a/include/asm-powerpc/rwsem.h b/include/asm-powerpc/rwsem.h
index 2c2fe9647595..e929145e1e46 100644
--- a/include/asm-powerpc/rwsem.h
+++ b/include/asm-powerpc/rwsem.h
@@ -28,24 +28,11 @@ struct rw_semaphore {
28#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 28#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
29 spinlock_t wait_lock; 29 spinlock_t wait_lock;
30 struct list_head wait_list; 30 struct list_head wait_list;
31#if RWSEM_DEBUG
32 int debug;
33#endif
34}; 31};
35 32
36/*
37 * initialisation
38 */
39#if RWSEM_DEBUG
40#define __RWSEM_DEBUG_INIT , 0
41#else
42#define __RWSEM_DEBUG_INIT /* */
43#endif
44
45#define __RWSEM_INITIALIZER(name) \ 33#define __RWSEM_INITIALIZER(name) \
46 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ 34 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
47 LIST_HEAD_INIT((name).wait_list) \ 35 LIST_HEAD_INIT((name).wait_list) }
48 __RWSEM_DEBUG_INIT }
49 36
50#define DECLARE_RWSEM(name) \ 37#define DECLARE_RWSEM(name) \
51 struct rw_semaphore name = __RWSEM_INITIALIZER(name) 38 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
@@ -60,9 +47,6 @@ static inline void init_rwsem(struct rw_semaphore *sem)
60 sem->count = RWSEM_UNLOCKED_VALUE; 47 sem->count = RWSEM_UNLOCKED_VALUE;
61 spin_lock_init(&sem->wait_lock); 48 spin_lock_init(&sem->wait_lock);
62 INIT_LIST_HEAD(&sem->wait_list); 49 INIT_LIST_HEAD(&sem->wait_list);
63#if RWSEM_DEBUG
64 sem->debug = 0;
65#endif
66} 50}
67 51
68/* 52/*
diff --git a/include/asm-powerpc/spu.h b/include/asm-powerpc/spu.h
index 9609d3ee8798..c02d105d8294 100644
--- a/include/asm-powerpc/spu.h
+++ b/include/asm-powerpc/spu.h
@@ -117,6 +117,7 @@ struct spu {
117 struct list_head sched_list; 117 struct list_head sched_list;
118 int number; 118 int number;
119 int nid; 119 int nid;
120 unsigned int irqs[3];
120 u32 isrc; 121 u32 isrc;
121 u32 node; 122 u32 node;
122 u64 flags; 123 u64 flags;
diff --git a/include/asm-s390/irqflags.h b/include/asm-s390/irqflags.h
new file mode 100644
index 000000000000..65f4db627e7a
--- /dev/null
+++ b/include/asm-s390/irqflags.h
@@ -0,0 +1,50 @@
1/*
2 * include/asm-s390/irqflags.h
3 *
4 * Copyright (C) IBM Corp. 2006
5 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
6 */
7
8#ifndef __ASM_IRQFLAGS_H
9#define __ASM_IRQFLAGS_H
10
11#ifdef __KERNEL__
12
13/* interrupt control.. */
14#define raw_local_irq_enable() ({ \
15 unsigned long __dummy; \
16 __asm__ __volatile__ ( \
17 "stosm 0(%1),0x03" \
18 : "=m" (__dummy) : "a" (&__dummy) : "memory" ); \
19 })
20
21#define raw_local_irq_disable() ({ \
22 unsigned long __flags; \
23 __asm__ __volatile__ ( \
24 "stnsm 0(%1),0xfc" : "=m" (__flags) : "a" (&__flags) ); \
25 __flags; \
26 })
27
28#define raw_local_save_flags(x) \
29 __asm__ __volatile__("stosm 0(%1),0" : "=m" (x) : "a" (&x), "m" (x) )
30
31#define raw_local_irq_restore(x) \
32 __asm__ __volatile__("ssm 0(%0)" : : "a" (&x), "m" (x) : "memory")
33
34#define raw_irqs_disabled() \
35({ \
36 unsigned long flags; \
37 local_save_flags(flags); \
38 !((flags >> __FLAG_SHIFT) & 3); \
39})
40
41static inline int raw_irqs_disabled_flags(unsigned long flags)
42{
43 return !((flags >> __FLAG_SHIFT) & 3);
44}
45
46/* For spinlocks etc */
47#define raw_local_irq_save(x) ((x) = raw_local_irq_disable())
48
49#endif /* __KERNEL__ */
50#endif /* __ASM_IRQFLAGS_H */
diff --git a/include/asm-s390/percpu.h b/include/asm-s390/percpu.h
index d9a8cca9b653..28b3517e787c 100644
--- a/include/asm-s390/percpu.h
+++ b/include/asm-s390/percpu.h
@@ -42,6 +42,7 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
42#define __get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset) 42#define __get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset)
43#define __raw_get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset) 43#define __raw_get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset)
44#define per_cpu(var,cpu) __reloc_hide(var,__per_cpu_offset[cpu]) 44#define per_cpu(var,cpu) __reloc_hide(var,__per_cpu_offset[cpu])
45#define per_cpu_offset(x) (__per_cpu_offset[x])
45 46
46/* A macro to avoid #include hell... */ 47/* A macro to avoid #include hell... */
47#define percpu_modcopy(pcpudst, src, size) \ 48#define percpu_modcopy(pcpudst, src, size) \
diff --git a/include/asm-s390/rwsem.h b/include/asm-s390/rwsem.h
index 0422a085dd56..13ec16965150 100644
--- a/include/asm-s390/rwsem.h
+++ b/include/asm-s390/rwsem.h
@@ -61,6 +61,9 @@ struct rw_semaphore {
61 signed long count; 61 signed long count;
62 spinlock_t wait_lock; 62 spinlock_t wait_lock;
63 struct list_head wait_list; 63 struct list_head wait_list;
64#ifdef CONFIG_DEBUG_LOCK_ALLOC
65 struct lockdep_map dep_map;
66#endif
64}; 67};
65 68
66#ifndef __s390x__ 69#ifndef __s390x__
@@ -80,8 +83,16 @@ struct rw_semaphore {
80/* 83/*
81 * initialisation 84 * initialisation
82 */ 85 */
86
87#ifdef CONFIG_DEBUG_LOCK_ALLOC
88# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
89#else
90# define __RWSEM_DEP_MAP_INIT(lockname)
91#endif
92
83#define __RWSEM_INITIALIZER(name) \ 93#define __RWSEM_INITIALIZER(name) \
84{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) } 94{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \
95 __RWSEM_DEP_MAP_INIT(name) }
85 96
86#define DECLARE_RWSEM(name) \ 97#define DECLARE_RWSEM(name) \
87 struct rw_semaphore name = __RWSEM_INITIALIZER(name) 98 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
@@ -93,6 +104,17 @@ static inline void init_rwsem(struct rw_semaphore *sem)
93 INIT_LIST_HEAD(&sem->wait_list); 104 INIT_LIST_HEAD(&sem->wait_list);
94} 105}
95 106
107extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
108 struct lock_class_key *key);
109
110#define init_rwsem(sem) \
111do { \
112 static struct lock_class_key __key; \
113 \
114 __init_rwsem((sem), #sem, &__key); \
115} while (0)
116
117
96/* 118/*
97 * lock for reading 119 * lock for reading
98 */ 120 */
@@ -155,7 +177,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
155/* 177/*
156 * lock for writing 178 * lock for writing
157 */ 179 */
158static inline void __down_write(struct rw_semaphore *sem) 180static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
159{ 181{
160 signed long old, new, tmp; 182 signed long old, new, tmp;
161 183
@@ -181,6 +203,11 @@ static inline void __down_write(struct rw_semaphore *sem)
181 rwsem_down_write_failed(sem); 203 rwsem_down_write_failed(sem);
182} 204}
183 205
206static inline void __down_write(struct rw_semaphore *sem)
207{
208 __down_write_nested(sem, 0);
209}
210
184/* 211/*
185 * trylock for writing -- returns 1 if successful, 0 if contention 212 * trylock for writing -- returns 1 if successful, 0 if contention
186 */ 213 */
diff --git a/include/asm-s390/semaphore.h b/include/asm-s390/semaphore.h
index 702cf436698c..32cdc69f39f4 100644
--- a/include/asm-s390/semaphore.h
+++ b/include/asm-s390/semaphore.h
@@ -37,7 +37,8 @@ struct semaphore {
37 37
38static inline void sema_init (struct semaphore *sem, int val) 38static inline void sema_init (struct semaphore *sem, int val)
39{ 39{
40 *sem = (struct semaphore) __SEMAPHORE_INITIALIZER((*sem),val); 40 atomic_set(&sem->count, val);
41 init_waitqueue_head(&sem->wait);
41} 42}
42 43
43static inline void init_MUTEX (struct semaphore *sem) 44static inline void init_MUTEX (struct semaphore *sem)
diff --git a/include/asm-s390/system.h b/include/asm-s390/system.h
index 71a0732cd518..9ab186ffde23 100644
--- a/include/asm-s390/system.h
+++ b/include/asm-s390/system.h
@@ -301,34 +301,6 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
301#define set_mb(var, value) do { var = value; mb(); } while (0) 301#define set_mb(var, value) do { var = value; mb(); } while (0)
302#define set_wmb(var, value) do { var = value; wmb(); } while (0) 302#define set_wmb(var, value) do { var = value; wmb(); } while (0)
303 303
304/* interrupt control.. */
305#define local_irq_enable() ({ \
306 unsigned long __dummy; \
307 __asm__ __volatile__ ( \
308 "stosm 0(%1),0x03" \
309 : "=m" (__dummy) : "a" (&__dummy) : "memory" ); \
310 })
311
312#define local_irq_disable() ({ \
313 unsigned long __flags; \
314 __asm__ __volatile__ ( \
315 "stnsm 0(%1),0xfc" : "=m" (__flags) : "a" (&__flags) ); \
316 __flags; \
317 })
318
319#define local_save_flags(x) \
320 __asm__ __volatile__("stosm 0(%1),0" : "=m" (x) : "a" (&x), "m" (x) )
321
322#define local_irq_restore(x) \
323 __asm__ __volatile__("ssm 0(%0)" : : "a" (&x), "m" (x) : "memory")
324
325#define irqs_disabled() \
326({ \
327 unsigned long flags; \
328 local_save_flags(flags); \
329 !((flags >> __FLAG_SHIFT) & 3); \
330})
331
332#ifdef __s390x__ 304#ifdef __s390x__
333 305
334#define __ctl_load(array, low, high) ({ \ 306#define __ctl_load(array, low, high) ({ \
@@ -442,8 +414,7 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
442 }) 414 })
443#endif /* __s390x__ */ 415#endif /* __s390x__ */
444 416
445/* For spinlocks etc */ 417#include <linux/irqflags.h>
446#define local_irq_save(x) ((x) = local_irq_disable())
447 418
448/* 419/*
449 * Use to set psw mask except for the first byte which 420 * Use to set psw mask except for the first byte which
@@ -482,4 +453,3 @@ extern void (*_machine_power_off)(void);
482#endif /* __KERNEL__ */ 453#endif /* __KERNEL__ */
483 454
484#endif 455#endif
485
diff --git a/include/asm-sh/rwsem.h b/include/asm-sh/rwsem.h
index 0262d3d1e5e0..9d2aea5e8488 100644
--- a/include/asm-sh/rwsem.h
+++ b/include/asm-sh/rwsem.h
@@ -25,24 +25,11 @@ struct rw_semaphore {
25#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 25#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
26 spinlock_t wait_lock; 26 spinlock_t wait_lock;
27 struct list_head wait_list; 27 struct list_head wait_list;
28#if RWSEM_DEBUG
29 int debug;
30#endif
31}; 28};
32 29
33/*
34 * initialisation
35 */
36#if RWSEM_DEBUG
37#define __RWSEM_DEBUG_INIT , 0
38#else
39#define __RWSEM_DEBUG_INIT /* */
40#endif
41
42#define __RWSEM_INITIALIZER(name) \ 30#define __RWSEM_INITIALIZER(name) \
43 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ 31 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
44 LIST_HEAD_INIT((name).wait_list) \ 32 LIST_HEAD_INIT((name).wait_list) }
45 __RWSEM_DEBUG_INIT }
46 33
47#define DECLARE_RWSEM(name) \ 34#define DECLARE_RWSEM(name) \
48 struct rw_semaphore name = __RWSEM_INITIALIZER(name) 35 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
@@ -57,9 +44,6 @@ static inline void init_rwsem(struct rw_semaphore *sem)
57 sem->count = RWSEM_UNLOCKED_VALUE; 44 sem->count = RWSEM_UNLOCKED_VALUE;
58 spin_lock_init(&sem->wait_lock); 45 spin_lock_init(&sem->wait_lock);
59 INIT_LIST_HEAD(&sem->wait_list); 46 INIT_LIST_HEAD(&sem->wait_list);
60#if RWSEM_DEBUG
61 sem->debug = 0;
62#endif
63} 47}
64 48
65/* 49/*
diff --git a/include/asm-sh/system.h b/include/asm-sh/system.h
index b752e5cbb830..ce2e60664a86 100644
--- a/include/asm-sh/system.h
+++ b/include/asm-sh/system.h
@@ -12,7 +12,7 @@
12 */ 12 */
13 13
14#define switch_to(prev, next, last) do { \ 14#define switch_to(prev, next, last) do { \
15 task_t *__last; \ 15 struct task_struct *__last; \
16 register unsigned long *__ts1 __asm__ ("r1") = &prev->thread.sp; \ 16 register unsigned long *__ts1 __asm__ ("r1") = &prev->thread.sp; \
17 register unsigned long *__ts2 __asm__ ("r2") = &prev->thread.pc; \ 17 register unsigned long *__ts2 __asm__ ("r2") = &prev->thread.pc; \
18 register unsigned long *__ts4 __asm__ ("r4") = (unsigned long *)prev; \ 18 register unsigned long *__ts4 __asm__ ("r4") = (unsigned long *)prev; \
diff --git a/include/asm-sparc64/percpu.h b/include/asm-sparc64/percpu.h
index a6ece06b83db..ced8cbde046d 100644
--- a/include/asm-sparc64/percpu.h
+++ b/include/asm-sparc64/percpu.h
@@ -11,6 +11,7 @@ extern unsigned long __per_cpu_base;
11extern unsigned long __per_cpu_shift; 11extern unsigned long __per_cpu_shift;
12#define __per_cpu_offset(__cpu) \ 12#define __per_cpu_offset(__cpu) \
13 (__per_cpu_base + ((unsigned long)(__cpu) << __per_cpu_shift)) 13 (__per_cpu_base + ((unsigned long)(__cpu) << __per_cpu_shift))
14#define per_cpu_offset(x) (__per_cpu_offset(x))
14 15
15/* Separate out the type, so (int[3], foo) works. */ 16/* Separate out the type, so (int[3], foo) works. */
16#define DEFINE_PER_CPU(type, name) \ 17#define DEFINE_PER_CPU(type, name) \
diff --git a/include/asm-x86_64/irqflags.h b/include/asm-x86_64/irqflags.h
new file mode 100644
index 000000000000..cce6937e87c0
--- /dev/null
+++ b/include/asm-x86_64/irqflags.h
@@ -0,0 +1,141 @@
1/*
2 * include/asm-x86_64/irqflags.h
3 *
4 * IRQ flags handling
5 *
6 * This file gets included from lowlevel asm headers too, to provide
7 * wrapped versions of the local_irq_*() APIs, based on the
8 * raw_local_irq_*() functions from the lowlevel headers.
9 */
10#ifndef _ASM_IRQFLAGS_H
11#define _ASM_IRQFLAGS_H
12
13#ifndef __ASSEMBLY__
14/*
15 * Interrupt control:
16 */
17
18static inline unsigned long __raw_local_save_flags(void)
19{
20 unsigned long flags;
21
22 __asm__ __volatile__(
23 "# __raw_save_flags\n\t"
24 "pushfq ; popq %q0"
25 : "=g" (flags)
26 : /* no input */
27 : "memory"
28 );
29
30 return flags;
31}
32
33#define raw_local_save_flags(flags) \
34 do { (flags) = __raw_local_save_flags(); } while (0)
35
36static inline void raw_local_irq_restore(unsigned long flags)
37{
38 __asm__ __volatile__(
39 "pushq %0 ; popfq"
40 : /* no output */
41 :"g" (flags)
42 :"memory", "cc"
43 );
44}
45
46#ifdef CONFIG_X86_VSMP
47
48/*
49 * Interrupt control for the VSMP architecture:
50 */
51
52static inline void raw_local_irq_disable(void)
53{
54 unsigned long flags = __raw_local_save_flags();
55
56 raw_local_irq_restore((flags & ~(1 << 9)) | (1 << 18));
57}
58
59static inline void raw_local_irq_enable(void)
60{
61 unsigned long flags = __raw_local_save_flags();
62
63 raw_local_irq_restore((flags | (1 << 9)) & ~(1 << 18));
64}
65
66static inline int raw_irqs_disabled_flags(unsigned long flags)
67{
68 return !(flags & (1<<9)) || (flags & (1 << 18));
69}
70
71#else /* CONFIG_X86_VSMP */
72
73static inline void raw_local_irq_disable(void)
74{
75 __asm__ __volatile__("cli" : : : "memory");
76}
77
78static inline void raw_local_irq_enable(void)
79{
80 __asm__ __volatile__("sti" : : : "memory");
81}
82
83static inline int raw_irqs_disabled_flags(unsigned long flags)
84{
85 return !(flags & (1 << 9));
86}
87
88#endif
89
90/*
91 * For spinlocks, etc.:
92 */
93
94static inline unsigned long __raw_local_irq_save(void)
95{
96 unsigned long flags = __raw_local_save_flags();
97
98 raw_local_irq_disable();
99
100 return flags;
101}
102
103#define raw_local_irq_save(flags) \
104 do { (flags) = __raw_local_irq_save(); } while (0)
105
106static inline int raw_irqs_disabled(void)
107{
108 unsigned long flags = __raw_local_save_flags();
109
110 return raw_irqs_disabled_flags(flags);
111}
112
113/*
114 * Used in the idle loop; sti takes one instruction cycle
115 * to complete:
116 */
117static inline void raw_safe_halt(void)
118{
119 __asm__ __volatile__("sti; hlt" : : : "memory");
120}
121
122/*
123 * Used when interrupts are already enabled or to
124 * shutdown the processor:
125 */
126static inline void halt(void)
127{
128 __asm__ __volatile__("hlt": : :"memory");
129}
130
131#else /* __ASSEMBLY__: */
132# ifdef CONFIG_TRACE_IRQFLAGS
133# define TRACE_IRQS_ON call trace_hardirqs_on_thunk
134# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk
135# else
136# define TRACE_IRQS_ON
137# define TRACE_IRQS_OFF
138# endif
139#endif
140
141#endif
diff --git a/include/asm-x86_64/kdebug.h b/include/asm-x86_64/kdebug.h
index cd52c7f33bca..2b0c088e2957 100644
--- a/include/asm-x86_64/kdebug.h
+++ b/include/asm-x86_64/kdebug.h
@@ -49,7 +49,7 @@ static inline int notify_die(enum die_val val, const char *str,
49 return atomic_notifier_call_chain(&die_chain, val, &args); 49 return atomic_notifier_call_chain(&die_chain, val, &args);
50} 50}
51 51
52extern int printk_address(unsigned long address); 52extern void printk_address(unsigned long address);
53extern void die(const char *,struct pt_regs *,long); 53extern void die(const char *,struct pt_regs *,long);
54extern void __die(const char *,struct pt_regs *,long); 54extern void __die(const char *,struct pt_regs *,long);
55extern void show_registers(struct pt_regs *regs); 55extern void show_registers(struct pt_regs *regs);
diff --git a/include/asm-x86_64/percpu.h b/include/asm-x86_64/percpu.h
index 549eb929b2c0..08dd9f9dda81 100644
--- a/include/asm-x86_64/percpu.h
+++ b/include/asm-x86_64/percpu.h
@@ -14,6 +14,8 @@
14#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset) 14#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
15#define __my_cpu_offset() read_pda(data_offset) 15#define __my_cpu_offset() read_pda(data_offset)
16 16
17#define per_cpu_offset(x) (__per_cpu_offset(x))
18
17/* Separate out the type, so (int[3], foo) works. */ 19/* Separate out the type, so (int[3], foo) works. */
18#define DEFINE_PER_CPU(type, name) \ 20#define DEFINE_PER_CPU(type, name) \
19 __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name 21 __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
diff --git a/include/asm-x86_64/system.h b/include/asm-x86_64/system.h
index 68e559f3631c..f67f2873a922 100644
--- a/include/asm-x86_64/system.h
+++ b/include/asm-x86_64/system.h
@@ -244,43 +244,7 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
244 244
245#define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0) 245#define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0)
246 246
247/* interrupt control.. */ 247#include <linux/irqflags.h>
248#define local_save_flags(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# save_flags \n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory"); } while (0)
249#define local_irq_restore(x) __asm__ __volatile__("# restore_flags \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc")
250
251#ifdef CONFIG_X86_VSMP
252/* Interrupt control for VSMP architecture */
253#define local_irq_disable() do { unsigned long flags; local_save_flags(flags); local_irq_restore((flags & ~(1 << 9)) | (1 << 18)); } while (0)
254#define local_irq_enable() do { unsigned long flags; local_save_flags(flags); local_irq_restore((flags | (1 << 9)) & ~(1 << 18)); } while (0)
255
256#define irqs_disabled() \
257({ \
258 unsigned long flags; \
259 local_save_flags(flags); \
260 (flags & (1<<18)) || !(flags & (1<<9)); \
261})
262
263/* For spinlocks etc */
264#define local_irq_save(x) do { local_save_flags(x); local_irq_restore((x & ~(1 << 9)) | (1 << 18)); } while (0)
265#else /* CONFIG_X86_VSMP */
266#define local_irq_disable() __asm__ __volatile__("cli": : :"memory")
267#define local_irq_enable() __asm__ __volatile__("sti": : :"memory")
268
269#define irqs_disabled() \
270({ \
271 unsigned long flags; \
272 local_save_flags(flags); \
273 !(flags & (1<<9)); \
274})
275
276/* For spinlocks etc */
277#define local_irq_save(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# local_irq_save \n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0)
278#endif
279
280/* used in the idle loop; sti takes one instruction cycle to complete */
281#define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory")
282/* used when interrupts are already enabled or to shutdown the processor */
283#define halt() __asm__ __volatile__("hlt": : :"memory")
284 248
285void cpu_idle_wait(void); 249void cpu_idle_wait(void);
286 250
diff --git a/include/asm-xtensa/rwsem.h b/include/asm-xtensa/rwsem.h
index abcd86dc5ab9..0aad3a587551 100644
--- a/include/asm-xtensa/rwsem.h
+++ b/include/asm-xtensa/rwsem.h
@@ -31,24 +31,11 @@ struct rw_semaphore {
31#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 31#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
32 spinlock_t wait_lock; 32 spinlock_t wait_lock;
33 struct list_head wait_list; 33 struct list_head wait_list;
34#if RWSEM_DEBUG
35 int debug;
36#endif
37}; 34};
38 35
39/*
40 * initialisation
41 */
42#if RWSEM_DEBUG
43#define __RWSEM_DEBUG_INIT , 0
44#else
45#define __RWSEM_DEBUG_INIT /* */
46#endif
47
48#define __RWSEM_INITIALIZER(name) \ 36#define __RWSEM_INITIALIZER(name) \
49 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ 37 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
50 LIST_HEAD_INIT((name).wait_list) \ 38 LIST_HEAD_INIT((name).wait_list) }
51 __RWSEM_DEBUG_INIT }
52 39
53#define DECLARE_RWSEM(name) \ 40#define DECLARE_RWSEM(name) \
54 struct rw_semaphore name = __RWSEM_INITIALIZER(name) 41 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
@@ -63,9 +50,6 @@ static inline void init_rwsem(struct rw_semaphore *sem)
63 sem->count = RWSEM_UNLOCKED_VALUE; 50 sem->count = RWSEM_UNLOCKED_VALUE;
64 spin_lock_init(&sem->wait_lock); 51 spin_lock_init(&sem->wait_lock);
65 INIT_LIST_HEAD(&sem->wait_list); 52 INIT_LIST_HEAD(&sem->wait_list);
66#if RWSEM_DEBUG
67 sem->debug = 0;
68#endif
69} 53}
70 54
71/* 55/*
diff --git a/include/linux/completion.h b/include/linux/completion.h
index 90663ad217f9..251c41e3ddd5 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -21,6 +21,18 @@ struct completion {
21#define DECLARE_COMPLETION(work) \ 21#define DECLARE_COMPLETION(work) \
22 struct completion work = COMPLETION_INITIALIZER(work) 22 struct completion work = COMPLETION_INITIALIZER(work)
23 23
24/*
25 * Lockdep needs to run a non-constant initializer for on-stack
26 * completions - so we use the _ONSTACK() variant for those that
27 * are on the kernel stack:
28 */
29#ifdef CONFIG_LOCKDEP
30# define DECLARE_COMPLETION_ONSTACK(work) \
31 struct completion work = ({ init_completion(&work); work; })
32#else
33# define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work)
34#endif
35
24static inline void init_completion(struct completion *x) 36static inline void init_completion(struct completion *x)
25{ 37{
26 x->done = 0; 38 x->done = 0;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 0dd1610a94a9..471781ffeab1 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -114,6 +114,18 @@ struct dentry {
114 unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */ 114 unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */
115}; 115};
116 116
117/*
118 * dentry->d_lock spinlock nesting subclasses:
119 *
120 * 0: normal
121 * 1: nested
122 */
123enum dentry_d_lock_class
124{
125 DENTRY_D_LOCK_NORMAL, /* implicitly used by plain spin_lock() APIs. */
126 DENTRY_D_LOCK_NESTED
127};
128
117struct dentry_operations { 129struct dentry_operations {
118 int (*d_revalidate)(struct dentry *, struct nameidata *); 130 int (*d_revalidate)(struct dentry *, struct nameidata *);
119 int (*d_hash) (struct dentry *, struct qstr *); 131 int (*d_hash) (struct dentry *, struct qstr *);
diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
new file mode 100644
index 000000000000..6a7047851e48
--- /dev/null
+++ b/include/linux/debug_locks.h
@@ -0,0 +1,69 @@
1#ifndef __LINUX_DEBUG_LOCKING_H
2#define __LINUX_DEBUG_LOCKING_H
3
4extern int debug_locks;
5extern int debug_locks_silent;
6
7/*
8 * Generic 'turn off all lock debugging' function:
9 */
10extern int debug_locks_off(void);
11
12/*
13 * In the debug case we carry the caller's instruction pointer into
14 * other functions, but we dont want the function argument overhead
15 * in the nondebug case - hence these macros:
16 */
17#define _RET_IP_ (unsigned long)__builtin_return_address(0)
18#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; })
19
20#define DEBUG_LOCKS_WARN_ON(c) \
21({ \
22 int __ret = 0; \
23 \
24 if (unlikely(c)) { \
25 if (debug_locks_off()) \
26 WARN_ON(1); \
27 __ret = 1; \
28 } \
29 __ret; \
30})
31
32#ifdef CONFIG_SMP
33# define SMP_DEBUG_LOCKS_WARN_ON(c) DEBUG_LOCKS_WARN_ON(c)
34#else
35# define SMP_DEBUG_LOCKS_WARN_ON(c) do { } while (0)
36#endif
37
38#ifdef CONFIG_DEBUG_LOCKING_API_SELFTESTS
39 extern void locking_selftest(void);
40#else
41# define locking_selftest() do { } while (0)
42#endif
43
44#ifdef CONFIG_LOCKDEP
45extern void debug_show_all_locks(void);
46extern void debug_show_held_locks(struct task_struct *task);
47extern void debug_check_no_locks_freed(const void *from, unsigned long len);
48extern void debug_check_no_locks_held(struct task_struct *task);
49#else
50static inline void debug_show_all_locks(void)
51{
52}
53
54static inline void debug_show_held_locks(struct task_struct *task)
55{
56}
57
58static inline void
59debug_check_no_locks_freed(const void *from, unsigned long len)
60{
61}
62
63static inline void
64debug_check_no_locks_held(struct task_struct *task)
65{
66}
67#endif
68
69#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e04a5cfe874f..134b32068246 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -436,6 +436,21 @@ struct block_device {
436}; 436};
437 437
438/* 438/*
439 * bdev->bd_mutex nesting subclasses for the lock validator:
440 *
441 * 0: normal
442 * 1: 'whole'
443 * 2: 'partition'
444 */
445enum bdev_bd_mutex_lock_class
446{
447 BD_MUTEX_NORMAL,
448 BD_MUTEX_WHOLE,
449 BD_MUTEX_PARTITION
450};
451
452
453/*
439 * Radix-tree tags, for tagging dirty and writeback pages within the pagecache 454 * Radix-tree tags, for tagging dirty and writeback pages within the pagecache
440 * radix trees 455 * radix trees
441 */ 456 */
@@ -543,6 +558,25 @@ struct inode {
543}; 558};
544 559
545/* 560/*
561 * inode->i_mutex nesting subclasses for the lock validator:
562 *
563 * 0: the object of the current VFS operation
564 * 1: parent
565 * 2: child/target
566 * 3: quota file
567 *
568 * The locking order between these classes is
569 * parent -> child -> normal -> quota
570 */
571enum inode_i_mutex_lock_class
572{
573 I_MUTEX_NORMAL,
574 I_MUTEX_PARENT,
575 I_MUTEX_CHILD,
576 I_MUTEX_QUOTA
577};
578
579/*
546 * NOTE: in a 32bit arch with a preemptable kernel and 580 * NOTE: in a 32bit arch with a preemptable kernel and
547 * an UP compile the i_size_read/write must be atomic 581 * an UP compile the i_size_read/write must be atomic
548 * with respect to the local cpu (unlike with preempt disabled), 582 * with respect to the local cpu (unlike with preempt disabled),
@@ -1276,6 +1310,8 @@ struct file_system_type {
1276 struct module *owner; 1310 struct module *owner;
1277 struct file_system_type * next; 1311 struct file_system_type * next;
1278 struct list_head fs_supers; 1312 struct list_head fs_supers;
1313 struct lock_class_key s_lock_key;
1314 struct lock_class_key s_umount_key;
1279}; 1315};
1280 1316
1281extern int get_sb_bdev(struct file_system_type *fs_type, 1317extern int get_sb_bdev(struct file_system_type *fs_type,
@@ -1404,6 +1440,7 @@ extern void bd_set_size(struct block_device *, loff_t size);
1404extern void bd_forget(struct inode *inode); 1440extern void bd_forget(struct inode *inode);
1405extern void bdput(struct block_device *); 1441extern void bdput(struct block_device *);
1406extern struct block_device *open_by_devnum(dev_t, unsigned); 1442extern struct block_device *open_by_devnum(dev_t, unsigned);
1443extern struct block_device *open_partition_by_devnum(dev_t, unsigned);
1407extern const struct file_operations def_blk_fops; 1444extern const struct file_operations def_blk_fops;
1408extern const struct address_space_operations def_blk_aops; 1445extern const struct address_space_operations def_blk_aops;
1409extern const struct file_operations def_chr_fops; 1446extern const struct file_operations def_chr_fops;
@@ -1414,6 +1451,7 @@ extern int blkdev_ioctl(struct inode *, struct file *, unsigned, unsigned long);
1414extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); 1451extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
1415extern int blkdev_get(struct block_device *, mode_t, unsigned); 1452extern int blkdev_get(struct block_device *, mode_t, unsigned);
1416extern int blkdev_put(struct block_device *); 1453extern int blkdev_put(struct block_device *);
1454extern int blkdev_put_partition(struct block_device *);
1417extern int bd_claim(struct block_device *, void *); 1455extern int bd_claim(struct block_device *, void *);
1418extern void bd_release(struct block_device *); 1456extern void bd_release(struct block_device *);
1419#ifdef CONFIG_SYSFS 1457#ifdef CONFIG_SYSFS
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 114ae583cca9..50d8b5744cf6 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -3,6 +3,7 @@
3 3
4#include <linux/preempt.h> 4#include <linux/preempt.h>
5#include <linux/smp_lock.h> 5#include <linux/smp_lock.h>
6#include <linux/lockdep.h>
6#include <asm/hardirq.h> 7#include <asm/hardirq.h>
7#include <asm/system.h> 8#include <asm/system.h>
8 9
@@ -86,9 +87,6 @@ extern void synchronize_irq(unsigned int irq);
86# define synchronize_irq(irq) barrier() 87# define synchronize_irq(irq) barrier()
87#endif 88#endif
88 89
89#define nmi_enter() irq_enter()
90#define nmi_exit() sub_preempt_count(HARDIRQ_OFFSET)
91
92struct task_struct; 90struct task_struct;
93 91
94#ifndef CONFIG_VIRT_CPU_ACCOUNTING 92#ifndef CONFIG_VIRT_CPU_ACCOUNTING
@@ -97,12 +95,35 @@ static inline void account_system_vtime(struct task_struct *tsk)
97} 95}
98#endif 96#endif
99 97
98/*
99 * It is safe to do non-atomic ops on ->hardirq_context,
100 * because NMI handlers may not preempt and the ops are
101 * always balanced, so the interrupted value of ->hardirq_context
102 * will always be restored.
103 */
100#define irq_enter() \ 104#define irq_enter() \
101 do { \ 105 do { \
102 account_system_vtime(current); \ 106 account_system_vtime(current); \
103 add_preempt_count(HARDIRQ_OFFSET); \ 107 add_preempt_count(HARDIRQ_OFFSET); \
108 trace_hardirq_enter(); \
109 } while (0)
110
111/*
112 * Exit irq context without processing softirqs:
113 */
114#define __irq_exit() \
115 do { \
116 trace_hardirq_exit(); \
117 account_system_vtime(current); \
118 sub_preempt_count(HARDIRQ_OFFSET); \
104 } while (0) 119 } while (0)
105 120
121/*
122 * Exit irq context and process softirqs if needed:
123 */
106extern void irq_exit(void); 124extern void irq_exit(void);
107 125
126#define nmi_enter() do { lockdep_off(); irq_enter(); } while (0)
127#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0)
128
108#endif /* LINUX_HARDIRQ_H */ 129#endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 07d7305f131e..e4bccbcc2750 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -91,6 +91,7 @@ struct hrtimer_base {
91 ktime_t (*get_softirq_time)(void); 91 ktime_t (*get_softirq_time)(void);
92 struct hrtimer *curr_timer; 92 struct hrtimer *curr_timer;
93 ktime_t softirq_time; 93 ktime_t softirq_time;
94 struct lock_class_key lock_key;
94}; 95};
95 96
96/* 97/*
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 285316c836b5..dc7abef10965 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1359,7 +1359,7 @@ extern struct semaphore ide_cfg_sem;
1359 * ide_drive_t->hwif: constant, no locking 1359 * ide_drive_t->hwif: constant, no locking
1360 */ 1360 */
1361 1361
1362#define local_irq_set(flags) do { local_save_flags((flags)); local_irq_enable(); } while (0) 1362#define local_irq_set(flags) do { local_save_flags((flags)); local_irq_enable_in_hardirq(); } while (0)
1363 1363
1364extern struct bus_type ide_bus_type; 1364extern struct bus_type ide_bus_type;
1365 1365
diff --git a/include/linux/idr.h b/include/linux/idr.h
index f559a719dbe8..826803449db7 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -66,7 +66,7 @@ struct idr {
66 .id_free = NULL, \ 66 .id_free = NULL, \
67 .layers = 0, \ 67 .layers = 0, \
68 .id_free_cnt = 0, \ 68 .id_free_cnt = 0, \
69 .lock = SPIN_LOCK_UNLOCKED, \ 69 .lock = __SPIN_LOCK_UNLOCKED(name.lock), \
70} 70}
71#define DEFINE_IDR(name) struct idr name = IDR_INIT(name) 71#define DEFINE_IDR(name) struct idr name = IDR_INIT(name)
72 72
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 3a256957fb56..60aac2cea0cf 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -3,6 +3,8 @@
3 3
4#include <linux/file.h> 4#include <linux/file.h>
5#include <linux/rcupdate.h> 5#include <linux/rcupdate.h>
6#include <linux/irqflags.h>
7#include <linux/lockdep.h>
6 8
7#define INIT_FDTABLE \ 9#define INIT_FDTABLE \
8{ \ 10{ \
@@ -21,7 +23,7 @@
21 .count = ATOMIC_INIT(1), \ 23 .count = ATOMIC_INIT(1), \
22 .fdt = &init_files.fdtab, \ 24 .fdt = &init_files.fdtab, \
23 .fdtab = INIT_FDTABLE, \ 25 .fdtab = INIT_FDTABLE, \
24 .file_lock = SPIN_LOCK_UNLOCKED, \ 26 .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), \
25 .next_fd = 0, \ 27 .next_fd = 0, \
26 .close_on_exec_init = { { 0, } }, \ 28 .close_on_exec_init = { { 0, } }, \
27 .open_fds_init = { { 0, } }, \ 29 .open_fds_init = { { 0, } }, \
@@ -36,7 +38,7 @@
36 .user_id = 0, \ 38 .user_id = 0, \
37 .next = NULL, \ 39 .next = NULL, \
38 .wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait), \ 40 .wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait), \
39 .ctx_lock = SPIN_LOCK_UNLOCKED, \ 41 .ctx_lock = __SPIN_LOCK_UNLOCKED(name.ctx_lock), \
40 .reqs_active = 0U, \ 42 .reqs_active = 0U, \
41 .max_reqs = ~0U, \ 43 .max_reqs = ~0U, \
42} 44}
@@ -48,7 +50,7 @@
48 .mm_users = ATOMIC_INIT(2), \ 50 .mm_users = ATOMIC_INIT(2), \
49 .mm_count = ATOMIC_INIT(1), \ 51 .mm_count = ATOMIC_INIT(1), \
50 .mmap_sem = __RWSEM_INITIALIZER(name.mmap_sem), \ 52 .mmap_sem = __RWSEM_INITIALIZER(name.mmap_sem), \
51 .page_table_lock = SPIN_LOCK_UNLOCKED, \ 53 .page_table_lock = __SPIN_LOCK_UNLOCKED(name.page_table_lock), \
52 .mmlist = LIST_HEAD_INIT(name.mmlist), \ 54 .mmlist = LIST_HEAD_INIT(name.mmlist), \
53 .cpu_vm_mask = CPU_MASK_ALL, \ 55 .cpu_vm_mask = CPU_MASK_ALL, \
54} 56}
@@ -69,7 +71,7 @@
69#define INIT_SIGHAND(sighand) { \ 71#define INIT_SIGHAND(sighand) { \
70 .count = ATOMIC_INIT(1), \ 72 .count = ATOMIC_INIT(1), \
71 .action = { { { .sa_handler = NULL, } }, }, \ 73 .action = { { { .sa_handler = NULL, } }, }, \
72 .siglock = SPIN_LOCK_UNLOCKED, \ 74 .siglock = __SPIN_LOCK_UNLOCKED(sighand.siglock), \
73} 75}
74 76
75extern struct group_info init_groups; 77extern struct group_info init_groups;
@@ -119,12 +121,13 @@ extern struct group_info init_groups;
119 .list = LIST_HEAD_INIT(tsk.pending.list), \ 121 .list = LIST_HEAD_INIT(tsk.pending.list), \
120 .signal = {{0}}}, \ 122 .signal = {{0}}}, \
121 .blocked = {{0}}, \ 123 .blocked = {{0}}, \
122 .alloc_lock = SPIN_LOCK_UNLOCKED, \ 124 .alloc_lock = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock), \
123 .journal_info = NULL, \ 125 .journal_info = NULL, \
124 .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ 126 .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
125 .fs_excl = ATOMIC_INIT(0), \ 127 .fs_excl = ATOMIC_INIT(0), \
126 .pi_lock = SPIN_LOCK_UNLOCKED, \ 128 .pi_lock = SPIN_LOCK_UNLOCKED, \
127 INIT_RT_MUTEXES(tsk) \ 129 INIT_TRACE_IRQFLAGS \
130 INIT_LOCKDEP \
128} 131}
129 132
130 133
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index da3e0dbe61d4..d5afee95fd43 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -10,6 +10,7 @@
10#include <linux/irqreturn.h> 10#include <linux/irqreturn.h>
11#include <linux/hardirq.h> 11#include <linux/hardirq.h>
12#include <linux/sched.h> 12#include <linux/sched.h>
13#include <linux/irqflags.h>
13#include <asm/atomic.h> 14#include <asm/atomic.h>
14#include <asm/ptrace.h> 15#include <asm/ptrace.h>
15#include <asm/system.h> 16#include <asm/system.h>
@@ -80,12 +81,64 @@ extern int request_irq(unsigned int,
80 unsigned long, const char *, void *); 81 unsigned long, const char *, void *);
81extern void free_irq(unsigned int, void *); 82extern void free_irq(unsigned int, void *);
82 83
84/*
85 * On lockdep we dont want to enable hardirqs in hardirq
86 * context. Use local_irq_enable_in_hardirq() to annotate
87 * kernel code that has to do this nevertheless (pretty much
88 * the only valid case is for old/broken hardware that is
89 * insanely slow).
90 *
91 * NOTE: in theory this might break fragile code that relies
92 * on hardirq delivery - in practice we dont seem to have such
93 * places left. So the only effect should be slightly increased
94 * irqs-off latencies.
95 */
96#ifdef CONFIG_LOCKDEP
97# define local_irq_enable_in_hardirq() do { } while (0)
98#else
99# define local_irq_enable_in_hardirq() local_irq_enable()
100#endif
83 101
84#ifdef CONFIG_GENERIC_HARDIRQS 102#ifdef CONFIG_GENERIC_HARDIRQS
85extern void disable_irq_nosync(unsigned int irq); 103extern void disable_irq_nosync(unsigned int irq);
86extern void disable_irq(unsigned int irq); 104extern void disable_irq(unsigned int irq);
87extern void enable_irq(unsigned int irq); 105extern void enable_irq(unsigned int irq);
88 106
107/*
108 * Special lockdep variants of irq disabling/enabling.
109 * These should be used for locking constructs that
110 * know that a particular irq context which is disabled,
111 * and which is the only irq-context user of a lock,
112 * that it's safe to take the lock in the irq-disabled
113 * section without disabling hardirqs.
114 *
115 * On !CONFIG_LOCKDEP they are equivalent to the normal
116 * irq disable/enable methods.
117 */
118static inline void disable_irq_nosync_lockdep(unsigned int irq)
119{
120 disable_irq_nosync(irq);
121#ifdef CONFIG_LOCKDEP
122 local_irq_disable();
123#endif
124}
125
126static inline void disable_irq_lockdep(unsigned int irq)
127{
128 disable_irq(irq);
129#ifdef CONFIG_LOCKDEP
130 local_irq_disable();
131#endif
132}
133
134static inline void enable_irq_lockdep(unsigned int irq)
135{
136#ifdef CONFIG_LOCKDEP
137 local_irq_enable();
138#endif
139 enable_irq(irq);
140}
141
89/* IRQ wakeup (PM) control: */ 142/* IRQ wakeup (PM) control: */
90extern int set_irq_wake(unsigned int irq, unsigned int on); 143extern int set_irq_wake(unsigned int irq, unsigned int on);
91 144
@@ -99,7 +152,19 @@ static inline int disable_irq_wake(unsigned int irq)
99 return set_irq_wake(irq, 0); 152 return set_irq_wake(irq, 0);
100} 153}
101 154
102#endif 155#else /* !CONFIG_GENERIC_HARDIRQS */
156/*
157 * NOTE: non-genirq architectures, if they want to support the lock
158 * validator need to define the methods below in their asm/irq.h
159 * files, under an #ifdef CONFIG_LOCKDEP section.
160 */
161# ifndef CONFIG_LOCKDEP
162# define disable_irq_nosync_lockdep(irq) disable_irq_nosync(irq)
163# define disable_irq_lockdep(irq) disable_irq(irq)
164# define enable_irq_lockdep(irq) enable_irq(irq)
165# endif
166
167#endif /* CONFIG_GENERIC_HARDIRQS */
103 168
104#ifndef __ARCH_SET_SOFTIRQ_PENDING 169#ifndef __ARCH_SET_SOFTIRQ_PENDING
105#define set_softirq_pending(x) (local_softirq_pending() = (x)) 170#define set_softirq_pending(x) (local_softirq_pending() = (x))
@@ -135,13 +200,11 @@ static inline void __deprecated save_and_cli(unsigned long *x)
135#define save_and_cli(x) save_and_cli(&x) 200#define save_and_cli(x) save_and_cli(&x)
136#endif /* CONFIG_SMP */ 201#endif /* CONFIG_SMP */
137 202
138/* SoftIRQ primitives. */ 203extern void local_bh_disable(void);
139#define local_bh_disable() \ 204extern void __local_bh_enable(void);
140 do { add_preempt_count(SOFTIRQ_OFFSET); barrier(); } while (0) 205extern void _local_bh_enable(void);
141#define __local_bh_enable() \
142 do { barrier(); sub_preempt_count(SOFTIRQ_OFFSET); } while (0)
143
144extern void local_bh_enable(void); 206extern void local_bh_enable(void);
207extern void local_bh_enable_ip(unsigned long ip);
145 208
146/* PLEASE, avoid to allocate new softirqs, if you need not _really_ high 209/* PLEASE, avoid to allocate new softirqs, if you need not _really_ high
147 frequency threaded job scheduling. For almost all the purposes 210 frequency threaded job scheduling. For almost all the purposes
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 87a9fc039b47..5612dfeeae50 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -55,6 +55,7 @@ struct resource_list {
55#define IORESOURCE_IRQ_LOWEDGE (1<<1) 55#define IORESOURCE_IRQ_LOWEDGE (1<<1)
56#define IORESOURCE_IRQ_HIGHLEVEL (1<<2) 56#define IORESOURCE_IRQ_HIGHLEVEL (1<<2)
57#define IORESOURCE_IRQ_LOWLEVEL (1<<3) 57#define IORESOURCE_IRQ_LOWLEVEL (1<<3)
58#define IORESOURCE_IRQ_SHAREABLE (1<<4)
58 59
59/* ISA PnP DMA specific bits (IORESOURCE_BITS) */ 60/* ISA PnP DMA specific bits (IORESOURCE_BITS) */
60#define IORESOURCE_DMA_TYPE_MASK (3<<0) 61#define IORESOURCE_DMA_TYPE_MASK (3<<0)
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
new file mode 100644
index 000000000000..412e025bc5c7
--- /dev/null
+++ b/include/linux/irqflags.h
@@ -0,0 +1,96 @@
1/*
2 * include/linux/irqflags.h
3 *
4 * IRQ flags tracing: follow the state of the hardirq and softirq flags and
5 * provide callbacks for transitions between ON and OFF states.
6 *
7 * This file gets included from lowlevel asm headers too, to provide
8 * wrapped versions of the local_irq_*() APIs, based on the
9 * raw_local_irq_*() macros from the lowlevel headers.
10 */
11#ifndef _LINUX_TRACE_IRQFLAGS_H
12#define _LINUX_TRACE_IRQFLAGS_H
13
14#ifdef CONFIG_TRACE_IRQFLAGS
15 extern void trace_hardirqs_on(void);
16 extern void trace_hardirqs_off(void);
17 extern void trace_softirqs_on(unsigned long ip);
18 extern void trace_softirqs_off(unsigned long ip);
19# define trace_hardirq_context(p) ((p)->hardirq_context)
20# define trace_softirq_context(p) ((p)->softirq_context)
21# define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled)
22# define trace_softirqs_enabled(p) ((p)->softirqs_enabled)
23# define trace_hardirq_enter() do { current->hardirq_context++; } while (0)
24# define trace_hardirq_exit() do { current->hardirq_context--; } while (0)
25# define trace_softirq_enter() do { current->softirq_context++; } while (0)
26# define trace_softirq_exit() do { current->softirq_context--; } while (0)
27# define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1,
28#else
29# define trace_hardirqs_on() do { } while (0)
30# define trace_hardirqs_off() do { } while (0)
31# define trace_softirqs_on(ip) do { } while (0)
32# define trace_softirqs_off(ip) do { } while (0)
33# define trace_hardirq_context(p) 0
34# define trace_softirq_context(p) 0
35# define trace_hardirqs_enabled(p) 0
36# define trace_softirqs_enabled(p) 0
37# define trace_hardirq_enter() do { } while (0)
38# define trace_hardirq_exit() do { } while (0)
39# define trace_softirq_enter() do { } while (0)
40# define trace_softirq_exit() do { } while (0)
41# define INIT_TRACE_IRQFLAGS
42#endif
43
44#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
45
46#include <asm/irqflags.h>
47
48#define local_irq_enable() \
49 do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0)
50#define local_irq_disable() \
51 do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0)
52#define local_irq_save(flags) \
53 do { raw_local_irq_save(flags); trace_hardirqs_off(); } while (0)
54
55#define local_irq_restore(flags) \
56 do { \
57 if (raw_irqs_disabled_flags(flags)) { \
58 raw_local_irq_restore(flags); \
59 trace_hardirqs_off(); \
60 } else { \
61 trace_hardirqs_on(); \
62 raw_local_irq_restore(flags); \
63 } \
64 } while (0)
65#else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */
66/*
67 * The local_irq_*() APIs are equal to the raw_local_irq*()
68 * if !TRACE_IRQFLAGS.
69 */
70# define raw_local_irq_disable() local_irq_disable()
71# define raw_local_irq_enable() local_irq_enable()
72# define raw_local_irq_save(flags) local_irq_save(flags)
73# define raw_local_irq_restore(flags) local_irq_restore(flags)
74#endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
75
76#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
77#define safe_halt() \
78 do { \
79 trace_hardirqs_on(); \
80 raw_safe_halt(); \
81 } while (0)
82
83#define local_save_flags(flags) raw_local_save_flags(flags)
84
85#define irqs_disabled() \
86({ \
87 unsigned long flags; \
88 \
89 raw_local_save_flags(flags); \
90 raw_irqs_disabled_flags(flags); \
91})
92
93#define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags)
94#endif /* CONFIG_X86 */
95
96#endif
diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index 54e2549f96ba..849043ce4ed6 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -57,10 +57,25 @@ do { \
57#define print_fn_descriptor_symbol(fmt, addr) print_symbol(fmt, addr) 57#define print_fn_descriptor_symbol(fmt, addr) print_symbol(fmt, addr)
58#endif 58#endif
59 59
60#define print_symbol(fmt, addr) \ 60static inline void print_symbol(const char *fmt, unsigned long addr)
61do { \ 61{
62 __check_printsym_format(fmt, ""); \ 62 __check_printsym_format(fmt, "");
63 __print_symbol(fmt, addr); \ 63 __print_symbol(fmt, (unsigned long)
64 __builtin_extract_return_addr((void *)addr));
65}
66
67#ifndef CONFIG_64BIT
68#define print_ip_sym(ip) \
69do { \
70 printk("[<%08lx>]", ip); \
71 print_symbol(" %s\n", ip); \
64} while(0) 72} while(0)
73#else
74#define print_ip_sym(ip) \
75do { \
76 printk("[<%016lx>]", ip); \
77 print_symbol(" %s\n", ip); \
78} while(0)
79#endif
65 80
66#endif /*_LINUX_KALLSYMS_H*/ 81#endif /*_LINUX_KALLSYMS_H*/
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
new file mode 100644
index 000000000000..316e0fb8d7b1
--- /dev/null
+++ b/include/linux/lockdep.h
@@ -0,0 +1,353 @@
1/*
2 * Runtime locking correctness validator
3 *
4 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
5 *
6 * see Documentation/lockdep-design.txt for more details.
7 */
8#ifndef __LINUX_LOCKDEP_H
9#define __LINUX_LOCKDEP_H
10
11#include <linux/linkage.h>
12#include <linux/list.h>
13#include <linux/debug_locks.h>
14#include <linux/stacktrace.h>
15
16#ifdef CONFIG_LOCKDEP
17
18/*
19 * Lock-class usage-state bits:
20 */
21enum lock_usage_bit
22{
23 LOCK_USED = 0,
24 LOCK_USED_IN_HARDIRQ,
25 LOCK_USED_IN_SOFTIRQ,
26 LOCK_ENABLED_SOFTIRQS,
27 LOCK_ENABLED_HARDIRQS,
28 LOCK_USED_IN_HARDIRQ_READ,
29 LOCK_USED_IN_SOFTIRQ_READ,
30 LOCK_ENABLED_SOFTIRQS_READ,
31 LOCK_ENABLED_HARDIRQS_READ,
32 LOCK_USAGE_STATES
33};
34
35/*
36 * Usage-state bitmasks:
37 */
38#define LOCKF_USED (1 << LOCK_USED)
39#define LOCKF_USED_IN_HARDIRQ (1 << LOCK_USED_IN_HARDIRQ)
40#define LOCKF_USED_IN_SOFTIRQ (1 << LOCK_USED_IN_SOFTIRQ)
41#define LOCKF_ENABLED_HARDIRQS (1 << LOCK_ENABLED_HARDIRQS)
42#define LOCKF_ENABLED_SOFTIRQS (1 << LOCK_ENABLED_SOFTIRQS)
43
44#define LOCKF_ENABLED_IRQS (LOCKF_ENABLED_HARDIRQS | LOCKF_ENABLED_SOFTIRQS)
45#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ)
46
47#define LOCKF_USED_IN_HARDIRQ_READ (1 << LOCK_USED_IN_HARDIRQ_READ)
48#define LOCKF_USED_IN_SOFTIRQ_READ (1 << LOCK_USED_IN_SOFTIRQ_READ)
49#define LOCKF_ENABLED_HARDIRQS_READ (1 << LOCK_ENABLED_HARDIRQS_READ)
50#define LOCKF_ENABLED_SOFTIRQS_READ (1 << LOCK_ENABLED_SOFTIRQS_READ)
51
52#define LOCKF_ENABLED_IRQS_READ \
53 (LOCKF_ENABLED_HARDIRQS_READ | LOCKF_ENABLED_SOFTIRQS_READ)
54#define LOCKF_USED_IN_IRQ_READ \
55 (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
56
57#define MAX_LOCKDEP_SUBCLASSES 8UL
58
59/*
60 * Lock-classes are keyed via unique addresses, by embedding the
61 * lockclass-key into the kernel (or module) .data section. (For
62 * static locks we use the lock address itself as the key.)
63 */
64struct lockdep_subclass_key {
65 char __one_byte;
66} __attribute__ ((__packed__));
67
68struct lock_class_key {
69 struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
70};
71
72/*
73 * The lock-class itself:
74 */
75struct lock_class {
76 /*
77 * class-hash:
78 */
79 struct list_head hash_entry;
80
81 /*
82 * global list of all lock-classes:
83 */
84 struct list_head lock_entry;
85
86 struct lockdep_subclass_key *key;
87 unsigned int subclass;
88
89 /*
90 * IRQ/softirq usage tracking bits:
91 */
92 unsigned long usage_mask;
93 struct stack_trace usage_traces[LOCK_USAGE_STATES];
94
95 /*
96 * These fields represent a directed graph of lock dependencies,
97 * to every node we attach a list of "forward" and a list of
98 * "backward" graph nodes.
99 */
100 struct list_head locks_after, locks_before;
101
102 /*
103 * Generation counter, when doing certain classes of graph walking,
104 * to ensure that we check one node only once:
105 */
106 unsigned int version;
107
108 /*
109 * Statistics counter:
110 */
111 unsigned long ops;
112
113 const char *name;
114 int name_version;
115};
116
117/*
118 * Map the lock object (the lock instance) to the lock-class object.
119 * This is embedded into specific lock instances:
120 */
121struct lockdep_map {
122 struct lock_class_key *key;
123 struct lock_class *class[MAX_LOCKDEP_SUBCLASSES];
124 const char *name;
125};
126
127/*
128 * Every lock has a list of other locks that were taken after it.
129 * We only grow the list, never remove from it:
130 */
131struct lock_list {
132 struct list_head entry;
133 struct lock_class *class;
134 struct stack_trace trace;
135};
136
137/*
138 * We record lock dependency chains, so that we can cache them:
139 */
140struct lock_chain {
141 struct list_head entry;
142 u64 chain_key;
143};
144
145struct held_lock {
146 /*
147 * One-way hash of the dependency chain up to this point. We
148 * hash the hashes step by step as the dependency chain grows.
149 *
150 * We use it for dependency-caching and we skip detection
151 * passes and dependency-updates if there is a cache-hit, so
152 * it is absolutely critical for 100% coverage of the validator
153 * to have a unique key value for every unique dependency path
154 * that can occur in the system, to make a unique hash value
155 * as likely as possible - hence the 64-bit width.
156 *
157 * The task struct holds the current hash value (initialized
158 * with zero), here we store the previous hash value:
159 */
160 u64 prev_chain_key;
161 struct lock_class *class;
162 unsigned long acquire_ip;
163 struct lockdep_map *instance;
164
165 /*
166 * The lock-stack is unified in that the lock chains of interrupt
167 * contexts nest ontop of process context chains, but we 'separate'
168 * the hashes by starting with 0 if we cross into an interrupt
169 * context, and we also keep do not add cross-context lock
170 * dependencies - the lock usage graph walking covers that area
171 * anyway, and we'd just unnecessarily increase the number of
172 * dependencies otherwise. [Note: hardirq and softirq contexts
173 * are separated from each other too.]
174 *
175 * The following field is used to detect when we cross into an
176 * interrupt context:
177 */
178 int irq_context;
179 int trylock;
180 int read;
181 int check;
182 int hardirqs_off;
183};
184
185/*
186 * Initialization, self-test and debugging-output methods:
187 */
188extern void lockdep_init(void);
189extern void lockdep_info(void);
190extern void lockdep_reset(void);
191extern void lockdep_reset_lock(struct lockdep_map *lock);
192extern void lockdep_free_key_range(void *start, unsigned long size);
193
194extern void lockdep_off(void);
195extern void lockdep_on(void);
196extern int lockdep_internal(void);
197
198/*
199 * These methods are used by specific locking variants (spinlocks,
200 * rwlocks, mutexes and rwsems) to pass init/acquire/release events
201 * to lockdep:
202 */
203
204extern void lockdep_init_map(struct lockdep_map *lock, const char *name,
205 struct lock_class_key *key);
206
207/*
208 * Reinitialize a lock key - for cases where there is special locking or
209 * special initialization of locks so that the validator gets the scope
210 * of dependencies wrong: they are either too broad (they need a class-split)
211 * or they are too narrow (they suffer from a false class-split):
212 */
213#define lockdep_set_class(lock, key) \
214 lockdep_init_map(&(lock)->dep_map, #key, key)
215#define lockdep_set_class_and_name(lock, key, name) \
216 lockdep_init_map(&(lock)->dep_map, name, key)
217
218/*
219 * Acquire a lock.
220 *
221 * Values for "read":
222 *
223 * 0: exclusive (write) acquire
224 * 1: read-acquire (no recursion allowed)
225 * 2: read-acquire with same-instance recursion allowed
226 *
227 * Values for check:
228 *
229 * 0: disabled
230 * 1: simple checks (freeing, held-at-exit-time, etc.)
231 * 2: full validation
232 */
233extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
234 int trylock, int read, int check, unsigned long ip);
235
236extern void lock_release(struct lockdep_map *lock, int nested,
237 unsigned long ip);
238
239# define INIT_LOCKDEP .lockdep_recursion = 0,
240
241#else /* !LOCKDEP */
242
243static inline void lockdep_off(void)
244{
245}
246
247static inline void lockdep_on(void)
248{
249}
250
251static inline int lockdep_internal(void)
252{
253 return 0;
254}
255
256# define lock_acquire(l, s, t, r, c, i) do { } while (0)
257# define lock_release(l, n, i) do { } while (0)
258# define lockdep_init() do { } while (0)
259# define lockdep_info() do { } while (0)
260# define lockdep_init_map(lock, name, key) do { (void)(key); } while (0)
261# define lockdep_set_class(lock, key) do { (void)(key); } while (0)
262# define lockdep_set_class_and_name(lock, key, name) \
263 do { (void)(key); } while (0)
264# define INIT_LOCKDEP
265# define lockdep_reset() do { debug_locks = 1; } while (0)
266# define lockdep_free_key_range(start, size) do { } while (0)
267/*
268 * The class key takes no space if lockdep is disabled:
269 */
270struct lock_class_key { };
271#endif /* !LOCKDEP */
272
273#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_GENERIC_HARDIRQS)
274extern void early_init_irq_lock_class(void);
275#else
276# define early_init_irq_lock_class() do { } while (0)
277#endif
278
279#ifdef CONFIG_TRACE_IRQFLAGS
280extern void early_boot_irqs_off(void);
281extern void early_boot_irqs_on(void);
282#else
283# define early_boot_irqs_off() do { } while (0)
284# define early_boot_irqs_on() do { } while (0)
285#endif
286
287/*
288 * For trivial one-depth nesting of a lock-class, the following
289 * global define can be used. (Subsystems with multiple levels
290 * of nesting should define their own lock-nesting subclasses.)
291 */
292#define SINGLE_DEPTH_NESTING 1
293
294/*
295 * Map the dependency ops to NOP or to real lockdep ops, depending
296 * on the per lock-class debug mode:
297 */
298
299#ifdef CONFIG_DEBUG_LOCK_ALLOC
300# ifdef CONFIG_PROVE_LOCKING
301# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i)
302# else
303# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i)
304# endif
305# define spin_release(l, n, i) lock_release(l, n, i)
306#else
307# define spin_acquire(l, s, t, i) do { } while (0)
308# define spin_release(l, n, i) do { } while (0)
309#endif
310
311#ifdef CONFIG_DEBUG_LOCK_ALLOC
312# ifdef CONFIG_PROVE_LOCKING
313# define rwlock_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i)
314# define rwlock_acquire_read(l, s, t, i) lock_acquire(l, s, t, 2, 2, i)
315# else
316# define rwlock_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i)
317# define rwlock_acquire_read(l, s, t, i) lock_acquire(l, s, t, 2, 1, i)
318# endif
319# define rwlock_release(l, n, i) lock_release(l, n, i)
320#else
321# define rwlock_acquire(l, s, t, i) do { } while (0)
322# define rwlock_acquire_read(l, s, t, i) do { } while (0)
323# define rwlock_release(l, n, i) do { } while (0)
324#endif
325
326#ifdef CONFIG_DEBUG_LOCK_ALLOC
327# ifdef CONFIG_PROVE_LOCKING
328# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i)
329# else
330# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i)
331# endif
332# define mutex_release(l, n, i) lock_release(l, n, i)
333#else
334# define mutex_acquire(l, s, t, i) do { } while (0)
335# define mutex_release(l, n, i) do { } while (0)
336#endif
337
338#ifdef CONFIG_DEBUG_LOCK_ALLOC
339# ifdef CONFIG_PROVE_LOCKING
340# define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i)
341# define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 2, i)
342# else
343# define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i)
344# define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 1, i)
345# endif
346# define rwsem_release(l, n, i) lock_release(l, n, i)
347#else
348# define rwsem_acquire(l, s, t, i) do { } while (0)
349# define rwsem_acquire_read(l, s, t, i) do { } while (0)
350# define rwsem_release(l, n, i) do { } while (0)
351#endif
352
353#endif /* __LINUX_LOCKDEP_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 75179529e399..990957e0929f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -14,6 +14,7 @@
14#include <linux/prio_tree.h> 14#include <linux/prio_tree.h>
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/debug_locks.h>
17 18
18struct mempolicy; 19struct mempolicy;
19struct anon_vma; 20struct anon_vma;
@@ -1034,13 +1035,6 @@ static inline void vm_stat_account(struct mm_struct *mm,
1034} 1035}
1035#endif /* CONFIG_PROC_FS */ 1036#endif /* CONFIG_PROC_FS */
1036 1037
1037static inline void
1038debug_check_no_locks_freed(const void *from, unsigned long len)
1039{
1040 mutex_debug_check_no_locks_freed(from, len);
1041 rt_mutex_debug_check_no_locks_freed(from, len);
1042}
1043
1044#ifndef CONFIG_DEBUG_PAGEALLOC 1038#ifndef CONFIG_DEBUG_PAGEALLOC
1045static inline void 1039static inline void
1046kernel_map_pages(struct page *page, int numpages, int enable) 1040kernel_map_pages(struct page *page, int numpages, int enable)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 27e748eb72b0..656b588a9f96 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -150,6 +150,10 @@ struct zone {
150 unsigned long lowmem_reserve[MAX_NR_ZONES]; 150 unsigned long lowmem_reserve[MAX_NR_ZONES];
151 151
152#ifdef CONFIG_NUMA 152#ifdef CONFIG_NUMA
153 /*
154 * zone reclaim becomes active if more unmapped pages exist.
155 */
156 unsigned long min_unmapped_ratio;
153 struct per_cpu_pageset *pageset[NR_CPUS]; 157 struct per_cpu_pageset *pageset[NR_CPUS];
154#else 158#else
155 struct per_cpu_pageset pageset[NR_CPUS]; 159 struct per_cpu_pageset pageset[NR_CPUS];
@@ -414,6 +418,8 @@ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *,
414 void __user *, size_t *, loff_t *); 418 void __user *, size_t *, loff_t *);
415int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *, 419int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *,
416 void __user *, size_t *, loff_t *); 420 void __user *, size_t *, loff_t *);
421int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
422 struct file *, void __user *, size_t *, loff_t *);
417 423
418#include <linux/topology.h> 424#include <linux/topology.h>
419/* Returns the number of the current Node. */ 425/* Returns the number of the current Node. */
diff --git a/include/linux/module.h b/include/linux/module.h
index 9e9dc7c24d95..d06c74fb8c26 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -358,6 +358,7 @@ static inline int module_is_live(struct module *mod)
358/* Is this address in a module? (second is with no locks, for oops) */ 358/* Is this address in a module? (second is with no locks, for oops) */
359struct module *module_text_address(unsigned long addr); 359struct module *module_text_address(unsigned long addr);
360struct module *__module_text_address(unsigned long addr); 360struct module *__module_text_address(unsigned long addr);
361int is_module_address(unsigned long addr);
361 362
362/* Returns module and fills in value, defined and namebuf, or NULL if 363/* Returns module and fills in value, defined and namebuf, or NULL if
363 symnum out of range. */ 364 symnum out of range. */
@@ -496,6 +497,11 @@ static inline struct module *__module_text_address(unsigned long addr)
496 return NULL; 497 return NULL;
497} 498}
498 499
500static inline int is_module_address(unsigned long addr)
501{
502 return 0;
503}
504
499/* Get/put a kernel symbol (calls should be symmetric) */ 505/* Get/put a kernel symbol (calls should be symmetric) */
500#define symbol_get(x) ({ extern typeof(x) x __attribute__((weak)); &(x); }) 506#define symbol_get(x) ({ extern typeof(x) x __attribute__((weak)); &(x); })
501#define symbol_put(x) do { } while(0) 507#define symbol_put(x) do { } while(0)
diff --git a/include/linux/mutex-debug.h b/include/linux/mutex-debug.h
index 8b5769f00467..2537285e1064 100644
--- a/include/linux/mutex-debug.h
+++ b/include/linux/mutex-debug.h
@@ -2,22 +2,22 @@
2#define __LINUX_MUTEX_DEBUG_H 2#define __LINUX_MUTEX_DEBUG_H
3 3
4#include <linux/linkage.h> 4#include <linux/linkage.h>
5#include <linux/lockdep.h>
5 6
6/* 7/*
7 * Mutexes - debugging helpers: 8 * Mutexes - debugging helpers:
8 */ 9 */
9 10
10#define __DEBUG_MUTEX_INITIALIZER(lockname) \ 11#define __DEBUG_MUTEX_INITIALIZER(lockname) \
11 , .held_list = LIST_HEAD_INIT(lockname.held_list), \ 12 , .magic = &lockname
12 .name = #lockname , .magic = &lockname
13 13
14#define mutex_init(sem) __mutex_init(sem, __FUNCTION__) 14#define mutex_init(mutex) \
15do { \
16 static struct lock_class_key __key; \
17 \
18 __mutex_init((mutex), #mutex, &__key); \
19} while (0)
15 20
16extern void FASTCALL(mutex_destroy(struct mutex *lock)); 21extern void FASTCALL(mutex_destroy(struct mutex *lock));
17 22
18extern void mutex_debug_show_all_locks(void);
19extern void mutex_debug_show_held_locks(struct task_struct *filter);
20extern void mutex_debug_check_no_locks_held(struct task_struct *task);
21extern void mutex_debug_check_no_locks_freed(const void *from, unsigned long len);
22
23#endif 23#endif
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index f1ac507fa20d..27c48daa3183 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -13,6 +13,7 @@
13#include <linux/list.h> 13#include <linux/list.h>
14#include <linux/spinlock_types.h> 14#include <linux/spinlock_types.h>
15#include <linux/linkage.h> 15#include <linux/linkage.h>
16#include <linux/lockdep.h>
16 17
17#include <asm/atomic.h> 18#include <asm/atomic.h>
18 19
@@ -50,11 +51,12 @@ struct mutex {
50 struct list_head wait_list; 51 struct list_head wait_list;
51#ifdef CONFIG_DEBUG_MUTEXES 52#ifdef CONFIG_DEBUG_MUTEXES
52 struct thread_info *owner; 53 struct thread_info *owner;
53 struct list_head held_list;
54 unsigned long acquire_ip;
55 const char *name; 54 const char *name;
56 void *magic; 55 void *magic;
57#endif 56#endif
57#ifdef CONFIG_DEBUG_LOCK_ALLOC
58 struct lockdep_map dep_map;
59#endif
58}; 60};
59 61
60/* 62/*
@@ -74,24 +76,34 @@ struct mutex_waiter {
74# include <linux/mutex-debug.h> 76# include <linux/mutex-debug.h>
75#else 77#else
76# define __DEBUG_MUTEX_INITIALIZER(lockname) 78# define __DEBUG_MUTEX_INITIALIZER(lockname)
77# define mutex_init(mutex) __mutex_init(mutex, NULL) 79# define mutex_init(mutex) \
80do { \
81 static struct lock_class_key __key; \
82 \
83 __mutex_init((mutex), #mutex, &__key); \
84} while (0)
78# define mutex_destroy(mutex) do { } while (0) 85# define mutex_destroy(mutex) do { } while (0)
79# define mutex_debug_show_all_locks() do { } while (0) 86#endif
80# define mutex_debug_show_held_locks(p) do { } while (0) 87
81# define mutex_debug_check_no_locks_held(task) do { } while (0) 88#ifdef CONFIG_DEBUG_LOCK_ALLOC
82# define mutex_debug_check_no_locks_freed(from, len) do { } while (0) 89# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
90 , .dep_map = { .name = #lockname }
91#else
92# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
83#endif 93#endif
84 94
85#define __MUTEX_INITIALIZER(lockname) \ 95#define __MUTEX_INITIALIZER(lockname) \
86 { .count = ATOMIC_INIT(1) \ 96 { .count = ATOMIC_INIT(1) \
87 , .wait_lock = SPIN_LOCK_UNLOCKED \ 97 , .wait_lock = SPIN_LOCK_UNLOCKED \
88 , .wait_list = LIST_HEAD_INIT(lockname.wait_list) \ 98 , .wait_list = LIST_HEAD_INIT(lockname.wait_list) \
89 __DEBUG_MUTEX_INITIALIZER(lockname) } 99 __DEBUG_MUTEX_INITIALIZER(lockname) \
100 __DEP_MAP_MUTEX_INITIALIZER(lockname) }
90 101
91#define DEFINE_MUTEX(mutexname) \ 102#define DEFINE_MUTEX(mutexname) \
92 struct mutex mutexname = __MUTEX_INITIALIZER(mutexname) 103 struct mutex mutexname = __MUTEX_INITIALIZER(mutexname)
93 104
94extern void fastcall __mutex_init(struct mutex *lock, const char *name); 105extern void __mutex_init(struct mutex *lock, const char *name,
106 struct lock_class_key *key);
95 107
96/*** 108/***
97 * mutex_is_locked - is the mutex locked 109 * mutex_is_locked - is the mutex locked
@@ -110,6 +122,13 @@ static inline int fastcall mutex_is_locked(struct mutex *lock)
110 */ 122 */
111extern void fastcall mutex_lock(struct mutex *lock); 123extern void fastcall mutex_lock(struct mutex *lock);
112extern int fastcall mutex_lock_interruptible(struct mutex *lock); 124extern int fastcall mutex_lock_interruptible(struct mutex *lock);
125
126#ifdef CONFIG_DEBUG_LOCK_ALLOC
127extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
128#else
129# define mutex_lock_nested(lock, subclass) mutex_lock(lock)
130#endif
131
113/* 132/*
114 * NOTE: mutex_trylock() follows the spin_trylock() convention, 133 * NOTE: mutex_trylock() follows the spin_trylock() convention,
115 * not the down_trylock() convention! 134 * not the down_trylock() convention!
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 51dbab9710c7..7ff386a6ae87 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -65,7 +65,7 @@ struct raw_notifier_head {
65 } while (0) 65 } while (0)
66 66
67#define ATOMIC_NOTIFIER_INIT(name) { \ 67#define ATOMIC_NOTIFIER_INIT(name) { \
68 .lock = SPIN_LOCK_UNLOCKED, \ 68 .lock = __SPIN_LOCK_UNLOCKED(name.lock), \
69 .head = NULL } 69 .head = NULL }
70#define BLOCKING_NOTIFIER_INIT(name) { \ 70#define BLOCKING_NOTIFIER_INIT(name) { \
71 .rwsem = __RWSEM_INITIALIZER((name).rwsem), \ 71 .rwsem = __RWSEM_INITIALIZER((name).rwsem), \
diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
index fa4a3b82ba70..5d41dee82f80 100644
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -29,8 +29,6 @@ struct rt_mutex {
29 struct task_struct *owner; 29 struct task_struct *owner;
30#ifdef CONFIG_DEBUG_RT_MUTEXES 30#ifdef CONFIG_DEBUG_RT_MUTEXES
31 int save_state; 31 int save_state;
32 struct list_head held_list_entry;
33 unsigned long acquire_ip;
34 const char *name, *file; 32 const char *name, *file;
35 int line; 33 int line;
36 void *magic; 34 void *magic;
@@ -98,14 +96,6 @@ extern int rt_mutex_trylock(struct rt_mutex *lock);
98 96
99extern void rt_mutex_unlock(struct rt_mutex *lock); 97extern void rt_mutex_unlock(struct rt_mutex *lock);
100 98
101#ifdef CONFIG_DEBUG_RT_MUTEXES
102# define INIT_RT_MUTEX_DEBUG(tsk) \
103 .held_list_head = LIST_HEAD_INIT(tsk.held_list_head), \
104 .held_list_lock = SPIN_LOCK_UNLOCKED
105#else
106# define INIT_RT_MUTEX_DEBUG(tsk)
107#endif
108
109#ifdef CONFIG_RT_MUTEXES 99#ifdef CONFIG_RT_MUTEXES
110# define INIT_RT_MUTEXES(tsk) \ 100# define INIT_RT_MUTEXES(tsk) \
111 .pi_waiters = PLIST_HEAD_INIT(tsk.pi_waiters, tsk.pi_lock), \ 101 .pi_waiters = PLIST_HEAD_INIT(tsk.pi_waiters, tsk.pi_lock), \
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index f30f805080ae..ae1fcadd598e 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -32,30 +32,37 @@ struct rw_semaphore {
32 __s32 activity; 32 __s32 activity;
33 spinlock_t wait_lock; 33 spinlock_t wait_lock;
34 struct list_head wait_list; 34 struct list_head wait_list;
35#if RWSEM_DEBUG 35#ifdef CONFIG_DEBUG_LOCK_ALLOC
36 int debug; 36 struct lockdep_map dep_map;
37#endif 37#endif
38}; 38};
39 39
40/* 40#ifdef CONFIG_DEBUG_LOCK_ALLOC
41 * initialisation 41# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
42 */
43#if RWSEM_DEBUG
44#define __RWSEM_DEBUG_INIT , 0
45#else 42#else
46#define __RWSEM_DEBUG_INIT /* */ 43# define __RWSEM_DEP_MAP_INIT(lockname)
47#endif 44#endif
48 45
49#define __RWSEM_INITIALIZER(name) \ 46#define __RWSEM_INITIALIZER(name) \
50{ 0, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) __RWSEM_DEBUG_INIT } 47{ 0, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
51 48
52#define DECLARE_RWSEM(name) \ 49#define DECLARE_RWSEM(name) \
53 struct rw_semaphore name = __RWSEM_INITIALIZER(name) 50 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
54 51
55extern void FASTCALL(init_rwsem(struct rw_semaphore *sem)); 52extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
53 struct lock_class_key *key);
54
55#define init_rwsem(sem) \
56do { \
57 static struct lock_class_key __key; \
58 \
59 __init_rwsem((sem), #sem, &__key); \
60} while (0)
61
56extern void FASTCALL(__down_read(struct rw_semaphore *sem)); 62extern void FASTCALL(__down_read(struct rw_semaphore *sem));
57extern int FASTCALL(__down_read_trylock(struct rw_semaphore *sem)); 63extern int FASTCALL(__down_read_trylock(struct rw_semaphore *sem));
58extern void FASTCALL(__down_write(struct rw_semaphore *sem)); 64extern void FASTCALL(__down_write(struct rw_semaphore *sem));
65extern void FASTCALL(__down_write_nested(struct rw_semaphore *sem, int subclass));
59extern int FASTCALL(__down_write_trylock(struct rw_semaphore *sem)); 66extern int FASTCALL(__down_write_trylock(struct rw_semaphore *sem));
60extern void FASTCALL(__up_read(struct rw_semaphore *sem)); 67extern void FASTCALL(__up_read(struct rw_semaphore *sem));
61extern void FASTCALL(__up_write(struct rw_semaphore *sem)); 68extern void FASTCALL(__up_write(struct rw_semaphore *sem));
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index f99fe90732ab..658afb37c3f5 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -9,8 +9,6 @@
9 9
10#include <linux/linkage.h> 10#include <linux/linkage.h>
11 11
12#define RWSEM_DEBUG 0
13
14#ifdef __KERNEL__ 12#ifdef __KERNEL__
15 13
16#include <linux/types.h> 14#include <linux/types.h>
@@ -26,89 +24,58 @@ struct rw_semaphore;
26#include <asm/rwsem.h> /* use an arch-specific implementation */ 24#include <asm/rwsem.h> /* use an arch-specific implementation */
27#endif 25#endif
28 26
29#ifndef rwsemtrace
30#if RWSEM_DEBUG
31extern void FASTCALL(rwsemtrace(struct rw_semaphore *sem, const char *str));
32#else
33#define rwsemtrace(SEM,FMT)
34#endif
35#endif
36
37/* 27/*
38 * lock for reading 28 * lock for reading
39 */ 29 */
40static inline void down_read(struct rw_semaphore *sem) 30extern void down_read(struct rw_semaphore *sem);
41{
42 might_sleep();
43 rwsemtrace(sem,"Entering down_read");
44 __down_read(sem);
45 rwsemtrace(sem,"Leaving down_read");
46}
47 31
48/* 32/*
49 * trylock for reading -- returns 1 if successful, 0 if contention 33 * trylock for reading -- returns 1 if successful, 0 if contention
50 */ 34 */
51static inline int down_read_trylock(struct rw_semaphore *sem) 35extern int down_read_trylock(struct rw_semaphore *sem);
52{
53 int ret;
54 rwsemtrace(sem,"Entering down_read_trylock");
55 ret = __down_read_trylock(sem);
56 rwsemtrace(sem,"Leaving down_read_trylock");
57 return ret;
58}
59 36
60/* 37/*
61 * lock for writing 38 * lock for writing
62 */ 39 */
63static inline void down_write(struct rw_semaphore *sem) 40extern void down_write(struct rw_semaphore *sem);
64{
65 might_sleep();
66 rwsemtrace(sem,"Entering down_write");
67 __down_write(sem);
68 rwsemtrace(sem,"Leaving down_write");
69}
70 41
71/* 42/*
72 * trylock for writing -- returns 1 if successful, 0 if contention 43 * trylock for writing -- returns 1 if successful, 0 if contention
73 */ 44 */
74static inline int down_write_trylock(struct rw_semaphore *sem) 45extern int down_write_trylock(struct rw_semaphore *sem);
75{
76 int ret;
77 rwsemtrace(sem,"Entering down_write_trylock");
78 ret = __down_write_trylock(sem);
79 rwsemtrace(sem,"Leaving down_write_trylock");
80 return ret;
81}
82 46
83/* 47/*
84 * release a read lock 48 * release a read lock
85 */ 49 */
86static inline void up_read(struct rw_semaphore *sem) 50extern void up_read(struct rw_semaphore *sem);
87{
88 rwsemtrace(sem,"Entering up_read");
89 __up_read(sem);
90 rwsemtrace(sem,"Leaving up_read");
91}
92 51
93/* 52/*
94 * release a write lock 53 * release a write lock
95 */ 54 */
96static inline void up_write(struct rw_semaphore *sem) 55extern void up_write(struct rw_semaphore *sem);
97{
98 rwsemtrace(sem,"Entering up_write");
99 __up_write(sem);
100 rwsemtrace(sem,"Leaving up_write");
101}
102 56
103/* 57/*
104 * downgrade write lock to read lock 58 * downgrade write lock to read lock
105 */ 59 */
106static inline void downgrade_write(struct rw_semaphore *sem) 60extern void downgrade_write(struct rw_semaphore *sem);
107{ 61
108 rwsemtrace(sem,"Entering downgrade_write"); 62#ifdef CONFIG_DEBUG_LOCK_ALLOC
109 __downgrade_write(sem); 63/*
110 rwsemtrace(sem,"Leaving downgrade_write"); 64 * nested locking:
111} 65 */
66extern void down_read_nested(struct rw_semaphore *sem, int subclass);
67extern void down_write_nested(struct rw_semaphore *sem, int subclass);
68/*
69 * Take/release a lock when not the owner will release it:
70 */
71extern void down_read_non_owner(struct rw_semaphore *sem);
72extern void up_read_non_owner(struct rw_semaphore *sem);
73#else
74# define down_read_nested(sem, subclass) down_read(sem)
75# define down_write_nested(sem, subclass) down_write(sem)
76# define down_read_non_owner(sem) down_read(sem)
77# define up_read_non_owner(sem) up_read(sem)
78#endif
112 79
113#endif /* __KERNEL__ */ 80#endif /* __KERNEL__ */
114#endif /* _LINUX_RWSEM_H */ 81#endif /* _LINUX_RWSEM_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index aaf723308ed4..1c876e27ff93 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -184,11 +184,11 @@ extern unsigned long weighted_cpuload(const int cpu);
184extern rwlock_t tasklist_lock; 184extern rwlock_t tasklist_lock;
185extern spinlock_t mmlist_lock; 185extern spinlock_t mmlist_lock;
186 186
187typedef struct task_struct task_t; 187struct task_struct;
188 188
189extern void sched_init(void); 189extern void sched_init(void);
190extern void sched_init_smp(void); 190extern void sched_init_smp(void);
191extern void init_idle(task_t *idle, int cpu); 191extern void init_idle(struct task_struct *idle, int cpu);
192 192
193extern cpumask_t nohz_cpu_mask; 193extern cpumask_t nohz_cpu_mask;
194 194
@@ -383,7 +383,7 @@ struct signal_struct {
383 wait_queue_head_t wait_chldexit; /* for wait4() */ 383 wait_queue_head_t wait_chldexit; /* for wait4() */
384 384
385 /* current thread group signal load-balancing target: */ 385 /* current thread group signal load-balancing target: */
386 task_t *curr_target; 386 struct task_struct *curr_target;
387 387
388 /* shared signal handling: */ 388 /* shared signal handling: */
389 struct sigpending shared_pending; 389 struct sigpending shared_pending;
@@ -534,7 +534,6 @@ extern struct user_struct *find_user(uid_t);
534extern struct user_struct root_user; 534extern struct user_struct root_user;
535#define INIT_USER (&root_user) 535#define INIT_USER (&root_user)
536 536
537typedef struct prio_array prio_array_t;
538struct backing_dev_info; 537struct backing_dev_info;
539struct reclaim_state; 538struct reclaim_state;
540 539
@@ -699,7 +698,7 @@ extern int groups_search(struct group_info *group_info, gid_t grp);
699 ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK]) 698 ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK])
700 699
701#ifdef ARCH_HAS_PREFETCH_SWITCH_STACK 700#ifdef ARCH_HAS_PREFETCH_SWITCH_STACK
702extern void prefetch_stack(struct task_struct*); 701extern void prefetch_stack(struct task_struct *t);
703#else 702#else
704static inline void prefetch_stack(struct task_struct *t) { } 703static inline void prefetch_stack(struct task_struct *t) { }
705#endif 704#endif
@@ -715,6 +714,8 @@ enum sleep_type {
715 SLEEP_INTERRUPTED, 714 SLEEP_INTERRUPTED,
716}; 715};
717 716
717struct prio_array;
718
718struct task_struct { 719struct task_struct {
719 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ 720 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
720 struct thread_info *thread_info; 721 struct thread_info *thread_info;
@@ -732,7 +733,7 @@ struct task_struct {
732 int load_weight; /* for niceness load balancing purposes */ 733 int load_weight; /* for niceness load balancing purposes */
733 int prio, static_prio, normal_prio; 734 int prio, static_prio, normal_prio;
734 struct list_head run_list; 735 struct list_head run_list;
735 prio_array_t *array; 736 struct prio_array *array;
736 737
737 unsigned short ioprio; 738 unsigned short ioprio;
738 unsigned int btrace_seq; 739 unsigned int btrace_seq;
@@ -865,16 +866,34 @@ struct task_struct {
865 struct plist_head pi_waiters; 866 struct plist_head pi_waiters;
866 /* Deadlock detection and priority inheritance handling */ 867 /* Deadlock detection and priority inheritance handling */
867 struct rt_mutex_waiter *pi_blocked_on; 868 struct rt_mutex_waiter *pi_blocked_on;
868# ifdef CONFIG_DEBUG_RT_MUTEXES
869 spinlock_t held_list_lock;
870 struct list_head held_list_head;
871# endif
872#endif 869#endif
873 870
874#ifdef CONFIG_DEBUG_MUTEXES 871#ifdef CONFIG_DEBUG_MUTEXES
875 /* mutex deadlock detection */ 872 /* mutex deadlock detection */
876 struct mutex_waiter *blocked_on; 873 struct mutex_waiter *blocked_on;
877#endif 874#endif
875#ifdef CONFIG_TRACE_IRQFLAGS
876 unsigned int irq_events;
877 int hardirqs_enabled;
878 unsigned long hardirq_enable_ip;
879 unsigned int hardirq_enable_event;
880 unsigned long hardirq_disable_ip;
881 unsigned int hardirq_disable_event;
882 int softirqs_enabled;
883 unsigned long softirq_disable_ip;
884 unsigned int softirq_disable_event;
885 unsigned long softirq_enable_ip;
886 unsigned int softirq_enable_event;
887 int hardirq_context;
888 int softirq_context;
889#endif
890#ifdef CONFIG_LOCKDEP
891# define MAX_LOCK_DEPTH 30UL
892 u64 curr_chain_key;
893 int lockdep_depth;
894 struct held_lock held_locks[MAX_LOCK_DEPTH];
895 unsigned int lockdep_recursion;
896#endif
878 897
879/* journalling filesystem info */ 898/* journalling filesystem info */
880 void *journal_info; 899 void *journal_info;
@@ -1013,9 +1032,9 @@ static inline void put_task_struct(struct task_struct *t)
1013#define used_math() tsk_used_math(current) 1032#define used_math() tsk_used_math(current)
1014 1033
1015#ifdef CONFIG_SMP 1034#ifdef CONFIG_SMP
1016extern int set_cpus_allowed(task_t *p, cpumask_t new_mask); 1035extern int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask);
1017#else 1036#else
1018static inline int set_cpus_allowed(task_t *p, cpumask_t new_mask) 1037static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
1019{ 1038{
1020 if (!cpu_isset(0, new_mask)) 1039 if (!cpu_isset(0, new_mask))
1021 return -EINVAL; 1040 return -EINVAL;
@@ -1024,7 +1043,8 @@ static inline int set_cpus_allowed(task_t *p, cpumask_t new_mask)
1024#endif 1043#endif
1025 1044
1026extern unsigned long long sched_clock(void); 1045extern unsigned long long sched_clock(void);
1027extern unsigned long long current_sched_time(const task_t *current_task); 1046extern unsigned long long
1047current_sched_time(const struct task_struct *current_task);
1028 1048
1029/* sched_exec is called by processes performing an exec */ 1049/* sched_exec is called by processes performing an exec */
1030#ifdef CONFIG_SMP 1050#ifdef CONFIG_SMP
@@ -1042,27 +1062,27 @@ static inline void idle_task_exit(void) {}
1042extern void sched_idle_next(void); 1062extern void sched_idle_next(void);
1043 1063
1044#ifdef CONFIG_RT_MUTEXES 1064#ifdef CONFIG_RT_MUTEXES
1045extern int rt_mutex_getprio(task_t *p); 1065extern int rt_mutex_getprio(struct task_struct *p);
1046extern void rt_mutex_setprio(task_t *p, int prio); 1066extern void rt_mutex_setprio(struct task_struct *p, int prio);
1047extern void rt_mutex_adjust_pi(task_t *p); 1067extern void rt_mutex_adjust_pi(struct task_struct *p);
1048#else 1068#else
1049static inline int rt_mutex_getprio(task_t *p) 1069static inline int rt_mutex_getprio(struct task_struct *p)
1050{ 1070{
1051 return p->normal_prio; 1071 return p->normal_prio;
1052} 1072}
1053# define rt_mutex_adjust_pi(p) do { } while (0) 1073# define rt_mutex_adjust_pi(p) do { } while (0)
1054#endif 1074#endif
1055 1075
1056extern void set_user_nice(task_t *p, long nice); 1076extern void set_user_nice(struct task_struct *p, long nice);
1057extern int task_prio(const task_t *p); 1077extern int task_prio(const struct task_struct *p);
1058extern int task_nice(const task_t *p); 1078extern int task_nice(const struct task_struct *p);
1059extern int can_nice(const task_t *p, const int nice); 1079extern int can_nice(const struct task_struct *p, const int nice);
1060extern int task_curr(const task_t *p); 1080extern int task_curr(const struct task_struct *p);
1061extern int idle_cpu(int cpu); 1081extern int idle_cpu(int cpu);
1062extern int sched_setscheduler(struct task_struct *, int, struct sched_param *); 1082extern int sched_setscheduler(struct task_struct *, int, struct sched_param *);
1063extern task_t *idle_task(int cpu); 1083extern struct task_struct *idle_task(int cpu);
1064extern task_t *curr_task(int cpu); 1084extern struct task_struct *curr_task(int cpu);
1065extern void set_curr_task(int cpu, task_t *p); 1085extern void set_curr_task(int cpu, struct task_struct *p);
1066 1086
1067void yield(void); 1087void yield(void);
1068 1088
@@ -1119,8 +1139,8 @@ extern void FASTCALL(wake_up_new_task(struct task_struct * tsk,
1119#else 1139#else
1120 static inline void kick_process(struct task_struct *tsk) { } 1140 static inline void kick_process(struct task_struct *tsk) { }
1121#endif 1141#endif
1122extern void FASTCALL(sched_fork(task_t * p, int clone_flags)); 1142extern void FASTCALL(sched_fork(struct task_struct * p, int clone_flags));
1123extern void FASTCALL(sched_exit(task_t * p)); 1143extern void FASTCALL(sched_exit(struct task_struct * p));
1124 1144
1125extern int in_group_p(gid_t); 1145extern int in_group_p(gid_t);
1126extern int in_egroup_p(gid_t); 1146extern int in_egroup_p(gid_t);
@@ -1225,17 +1245,17 @@ extern NORET_TYPE void do_group_exit(int);
1225extern void daemonize(const char *, ...); 1245extern void daemonize(const char *, ...);
1226extern int allow_signal(int); 1246extern int allow_signal(int);
1227extern int disallow_signal(int); 1247extern int disallow_signal(int);
1228extern task_t *child_reaper; 1248extern struct task_struct *child_reaper;
1229 1249
1230extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *); 1250extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
1231extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *); 1251extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
1232task_t *fork_idle(int); 1252struct task_struct *fork_idle(int);
1233 1253
1234extern void set_task_comm(struct task_struct *tsk, char *from); 1254extern void set_task_comm(struct task_struct *tsk, char *from);
1235extern void get_task_comm(char *to, struct task_struct *tsk); 1255extern void get_task_comm(char *to, struct task_struct *tsk);
1236 1256
1237#ifdef CONFIG_SMP 1257#ifdef CONFIG_SMP
1238extern void wait_task_inactive(task_t * p); 1258extern void wait_task_inactive(struct task_struct * p);
1239#else 1259#else
1240#define wait_task_inactive(p) do { } while (0) 1260#define wait_task_inactive(p) do { } while (0)
1241#endif 1261#endif
@@ -1261,13 +1281,13 @@ extern void wait_task_inactive(task_t * p);
1261/* de_thread depends on thread_group_leader not being a pid based check */ 1281/* de_thread depends on thread_group_leader not being a pid based check */
1262#define thread_group_leader(p) (p == p->group_leader) 1282#define thread_group_leader(p) (p == p->group_leader)
1263 1283
1264static inline task_t *next_thread(const task_t *p) 1284static inline struct task_struct *next_thread(const struct task_struct *p)
1265{ 1285{
1266 return list_entry(rcu_dereference(p->thread_group.next), 1286 return list_entry(rcu_dereference(p->thread_group.next),
1267 task_t, thread_group); 1287 struct task_struct, thread_group);
1268} 1288}
1269 1289
1270static inline int thread_group_empty(task_t *p) 1290static inline int thread_group_empty(struct task_struct *p)
1271{ 1291{
1272 return list_empty(&p->thread_group); 1292 return list_empty(&p->thread_group);
1273} 1293}
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 7bc5c7c12b54..46000936f8f1 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -38,9 +38,17 @@ typedef struct {
38 * These macros triggered gcc-3.x compile-time problems. We think these are 38 * These macros triggered gcc-3.x compile-time problems. We think these are
39 * OK now. Be cautious. 39 * OK now. Be cautious.
40 */ 40 */
41#define SEQLOCK_UNLOCKED { 0, SPIN_LOCK_UNLOCKED } 41#define __SEQLOCK_UNLOCKED(lockname) \
42#define seqlock_init(x) do { *(x) = (seqlock_t) SEQLOCK_UNLOCKED; } while (0) 42 { 0, __SPIN_LOCK_UNLOCKED(lockname) }
43 43
44#define SEQLOCK_UNLOCKED \
45 __SEQLOCK_UNLOCKED(old_style_seqlock_init)
46
47#define seqlock_init(x) \
48 do { *(x) = (seqlock_t) __SEQLOCK_UNLOCKED(x); } while (0)
49
50#define DEFINE_SEQLOCK(x) \
51 seqlock_t x = __SEQLOCK_UNLOCKED(x)
44 52
45/* Lock out other writers and update the count. 53/* Lock out other writers and update the count.
46 * Acts like a normal spin_lock/unlock. 54 * Acts like a normal spin_lock/unlock.
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 57d7d4965f9a..3597b4f14389 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -604,9 +604,12 @@ static inline __u32 skb_queue_len(const struct sk_buff_head *list_)
604 return list_->qlen; 604 return list_->qlen;
605} 605}
606 606
607extern struct lock_class_key skb_queue_lock_key;
608
607static inline void skb_queue_head_init(struct sk_buff_head *list) 609static inline void skb_queue_head_init(struct sk_buff_head *list)
608{ 610{
609 spin_lock_init(&list->lock); 611 spin_lock_init(&list->lock);
612 lockdep_set_class(&list->lock, &skb_queue_lock_key);
610 list->prev = list->next = (struct sk_buff *)list; 613 list->prev = list->next = (struct sk_buff *)list;
611 list->qlen = 0; 614 list->qlen = 0;
612} 615}
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index ae23beef9cc9..31473db92d3b 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -82,14 +82,40 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock);
82/* 82/*
83 * Pull the __raw*() functions/declarations (UP-nondebug doesnt need them): 83 * Pull the __raw*() functions/declarations (UP-nondebug doesnt need them):
84 */ 84 */
85#if defined(CONFIG_SMP) 85#ifdef CONFIG_SMP
86# include <asm/spinlock.h> 86# include <asm/spinlock.h>
87#else 87#else
88# include <linux/spinlock_up.h> 88# include <linux/spinlock_up.h>
89#endif 89#endif
90 90
91#define spin_lock_init(lock) do { *(lock) = SPIN_LOCK_UNLOCKED; } while (0) 91#ifdef CONFIG_DEBUG_SPINLOCK
92#define rwlock_init(lock) do { *(lock) = RW_LOCK_UNLOCKED; } while (0) 92 extern void __spin_lock_init(spinlock_t *lock, const char *name,
93 struct lock_class_key *key);
94# define spin_lock_init(lock) \
95do { \
96 static struct lock_class_key __key; \
97 \
98 __spin_lock_init((lock), #lock, &__key); \
99} while (0)
100
101#else
102# define spin_lock_init(lock) \
103 do { *(lock) = SPIN_LOCK_UNLOCKED; } while (0)
104#endif
105
106#ifdef CONFIG_DEBUG_SPINLOCK
107 extern void __rwlock_init(rwlock_t *lock, const char *name,
108 struct lock_class_key *key);
109# define rwlock_init(lock) \
110do { \
111 static struct lock_class_key __key; \
112 \
113 __rwlock_init((lock), #lock, &__key); \
114} while (0)
115#else
116# define rwlock_init(lock) \
117 do { *(lock) = RW_LOCK_UNLOCKED; } while (0)
118#endif
93 119
94#define spin_is_locked(lock) __raw_spin_is_locked(&(lock)->raw_lock) 120#define spin_is_locked(lock) __raw_spin_is_locked(&(lock)->raw_lock)
95 121
@@ -113,7 +139,6 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock);
113#define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock) 139#define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock)
114 extern int _raw_spin_trylock(spinlock_t *lock); 140 extern int _raw_spin_trylock(spinlock_t *lock);
115 extern void _raw_spin_unlock(spinlock_t *lock); 141 extern void _raw_spin_unlock(spinlock_t *lock);
116
117 extern void _raw_read_lock(rwlock_t *lock); 142 extern void _raw_read_lock(rwlock_t *lock);
118 extern int _raw_read_trylock(rwlock_t *lock); 143 extern int _raw_read_trylock(rwlock_t *lock);
119 extern void _raw_read_unlock(rwlock_t *lock); 144 extern void _raw_read_unlock(rwlock_t *lock);
@@ -121,17 +146,17 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock);
121 extern int _raw_write_trylock(rwlock_t *lock); 146 extern int _raw_write_trylock(rwlock_t *lock);
122 extern void _raw_write_unlock(rwlock_t *lock); 147 extern void _raw_write_unlock(rwlock_t *lock);
123#else 148#else
124# define _raw_spin_unlock(lock) __raw_spin_unlock(&(lock)->raw_lock)
125# define _raw_spin_trylock(lock) __raw_spin_trylock(&(lock)->raw_lock)
126# define _raw_spin_lock(lock) __raw_spin_lock(&(lock)->raw_lock) 149# define _raw_spin_lock(lock) __raw_spin_lock(&(lock)->raw_lock)
127# define _raw_spin_lock_flags(lock, flags) \ 150# define _raw_spin_lock_flags(lock, flags) \
128 __raw_spin_lock_flags(&(lock)->raw_lock, *(flags)) 151 __raw_spin_lock_flags(&(lock)->raw_lock, *(flags))
152# define _raw_spin_trylock(lock) __raw_spin_trylock(&(lock)->raw_lock)
153# define _raw_spin_unlock(lock) __raw_spin_unlock(&(lock)->raw_lock)
129# define _raw_read_lock(rwlock) __raw_read_lock(&(rwlock)->raw_lock) 154# define _raw_read_lock(rwlock) __raw_read_lock(&(rwlock)->raw_lock)
130# define _raw_write_lock(rwlock) __raw_write_lock(&(rwlock)->raw_lock)
131# define _raw_read_unlock(rwlock) __raw_read_unlock(&(rwlock)->raw_lock)
132# define _raw_write_unlock(rwlock) __raw_write_unlock(&(rwlock)->raw_lock)
133# define _raw_read_trylock(rwlock) __raw_read_trylock(&(rwlock)->raw_lock) 155# define _raw_read_trylock(rwlock) __raw_read_trylock(&(rwlock)->raw_lock)
156# define _raw_read_unlock(rwlock) __raw_read_unlock(&(rwlock)->raw_lock)
157# define _raw_write_lock(rwlock) __raw_write_lock(&(rwlock)->raw_lock)
134# define _raw_write_trylock(rwlock) __raw_write_trylock(&(rwlock)->raw_lock) 158# define _raw_write_trylock(rwlock) __raw_write_trylock(&(rwlock)->raw_lock)
159# define _raw_write_unlock(rwlock) __raw_write_unlock(&(rwlock)->raw_lock)
135#endif 160#endif
136 161
137#define read_can_lock(rwlock) __raw_read_can_lock(&(rwlock)->raw_lock) 162#define read_can_lock(rwlock) __raw_read_can_lock(&(rwlock)->raw_lock)
@@ -147,6 +172,13 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock);
147#define write_trylock(lock) __cond_lock(_write_trylock(lock)) 172#define write_trylock(lock) __cond_lock(_write_trylock(lock))
148 173
149#define spin_lock(lock) _spin_lock(lock) 174#define spin_lock(lock) _spin_lock(lock)
175
176#ifdef CONFIG_DEBUG_LOCK_ALLOC
177# define spin_lock_nested(lock, subclass) _spin_lock_nested(lock, subclass)
178#else
179# define spin_lock_nested(lock, subclass) _spin_lock(lock)
180#endif
181
150#define write_lock(lock) _write_lock(lock) 182#define write_lock(lock) _write_lock(lock)
151#define read_lock(lock) _read_lock(lock) 183#define read_lock(lock) _read_lock(lock)
152 184
@@ -172,21 +204,18 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock);
172/* 204/*
173 * We inline the unlock functions in the nondebug case: 205 * We inline the unlock functions in the nondebug case:
174 */ 206 */
175#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) 207#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || \
208 !defined(CONFIG_SMP)
176# define spin_unlock(lock) _spin_unlock(lock) 209# define spin_unlock(lock) _spin_unlock(lock)
177# define read_unlock(lock) _read_unlock(lock) 210# define read_unlock(lock) _read_unlock(lock)
178# define write_unlock(lock) _write_unlock(lock) 211# define write_unlock(lock) _write_unlock(lock)
179#else
180# define spin_unlock(lock) __raw_spin_unlock(&(lock)->raw_lock)
181# define read_unlock(lock) __raw_read_unlock(&(lock)->raw_lock)
182# define write_unlock(lock) __raw_write_unlock(&(lock)->raw_lock)
183#endif
184
185#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP)
186# define spin_unlock_irq(lock) _spin_unlock_irq(lock) 212# define spin_unlock_irq(lock) _spin_unlock_irq(lock)
187# define read_unlock_irq(lock) _read_unlock_irq(lock) 213# define read_unlock_irq(lock) _read_unlock_irq(lock)
188# define write_unlock_irq(lock) _write_unlock_irq(lock) 214# define write_unlock_irq(lock) _write_unlock_irq(lock)
189#else 215#else
216# define spin_unlock(lock) __raw_spin_unlock(&(lock)->raw_lock)
217# define read_unlock(lock) __raw_read_unlock(&(lock)->raw_lock)
218# define write_unlock(lock) __raw_write_unlock(&(lock)->raw_lock)
190# define spin_unlock_irq(lock) \ 219# define spin_unlock_irq(lock) \
191 do { __raw_spin_unlock(&(lock)->raw_lock); local_irq_enable(); } while (0) 220 do { __raw_spin_unlock(&(lock)->raw_lock); local_irq_enable(); } while (0)
192# define read_unlock_irq(lock) \ 221# define read_unlock_irq(lock) \
diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index 78e6989ffb54..b2c4f8299464 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -20,6 +20,8 @@ int in_lock_functions(unsigned long addr);
20#define assert_spin_locked(x) BUG_ON(!spin_is_locked(x)) 20#define assert_spin_locked(x) BUG_ON(!spin_is_locked(x))
21 21
22void __lockfunc _spin_lock(spinlock_t *lock) __acquires(spinlock_t); 22void __lockfunc _spin_lock(spinlock_t *lock) __acquires(spinlock_t);
23void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
24 __acquires(spinlock_t);
23void __lockfunc _read_lock(rwlock_t *lock) __acquires(rwlock_t); 25void __lockfunc _read_lock(rwlock_t *lock) __acquires(rwlock_t);
24void __lockfunc _write_lock(rwlock_t *lock) __acquires(rwlock_t); 26void __lockfunc _write_lock(rwlock_t *lock) __acquires(rwlock_t);
25void __lockfunc _spin_lock_bh(spinlock_t *lock) __acquires(spinlock_t); 27void __lockfunc _spin_lock_bh(spinlock_t *lock) __acquires(spinlock_t);
diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h
index cd81cee566f4..67faa044c5f5 100644
--- a/include/linux/spinlock_api_up.h
+++ b/include/linux/spinlock_api_up.h
@@ -49,6 +49,7 @@
49 do { local_irq_restore(flags); __UNLOCK(lock); } while (0) 49 do { local_irq_restore(flags); __UNLOCK(lock); } while (0)
50 50
51#define _spin_lock(lock) __LOCK(lock) 51#define _spin_lock(lock) __LOCK(lock)
52#define _spin_lock_nested(lock, subclass) __LOCK(lock)
52#define _read_lock(lock) __LOCK(lock) 53#define _read_lock(lock) __LOCK(lock)
53#define _write_lock(lock) __LOCK(lock) 54#define _write_lock(lock) __LOCK(lock)
54#define _spin_lock_bh(lock) __LOCK_BH(lock) 55#define _spin_lock_bh(lock) __LOCK_BH(lock)
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index 9cb51e070390..dc5fb69e4de9 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -9,6 +9,8 @@
9 * Released under the General Public License (GPL). 9 * Released under the General Public License (GPL).
10 */ 10 */
11 11
12#include <linux/lockdep.h>
13
12#if defined(CONFIG_SMP) 14#if defined(CONFIG_SMP)
13# include <asm/spinlock_types.h> 15# include <asm/spinlock_types.h>
14#else 16#else
@@ -24,6 +26,9 @@ typedef struct {
24 unsigned int magic, owner_cpu; 26 unsigned int magic, owner_cpu;
25 void *owner; 27 void *owner;
26#endif 28#endif
29#ifdef CONFIG_DEBUG_LOCK_ALLOC
30 struct lockdep_map dep_map;
31#endif
27} spinlock_t; 32} spinlock_t;
28 33
29#define SPINLOCK_MAGIC 0xdead4ead 34#define SPINLOCK_MAGIC 0xdead4ead
@@ -37,31 +42,53 @@ typedef struct {
37 unsigned int magic, owner_cpu; 42 unsigned int magic, owner_cpu;
38 void *owner; 43 void *owner;
39#endif 44#endif
45#ifdef CONFIG_DEBUG_LOCK_ALLOC
46 struct lockdep_map dep_map;
47#endif
40} rwlock_t; 48} rwlock_t;
41 49
42#define RWLOCK_MAGIC 0xdeaf1eed 50#define RWLOCK_MAGIC 0xdeaf1eed
43 51
44#define SPINLOCK_OWNER_INIT ((void *)-1L) 52#define SPINLOCK_OWNER_INIT ((void *)-1L)
45 53
54#ifdef CONFIG_DEBUG_LOCK_ALLOC
55# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
56#else
57# define SPIN_DEP_MAP_INIT(lockname)
58#endif
59
60#ifdef CONFIG_DEBUG_LOCK_ALLOC
61# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
62#else
63# define RW_DEP_MAP_INIT(lockname)
64#endif
65
46#ifdef CONFIG_DEBUG_SPINLOCK 66#ifdef CONFIG_DEBUG_SPINLOCK
47# define SPIN_LOCK_UNLOCKED \ 67# define __SPIN_LOCK_UNLOCKED(lockname) \
48 (spinlock_t) { .raw_lock = __RAW_SPIN_LOCK_UNLOCKED, \ 68 (spinlock_t) { .raw_lock = __RAW_SPIN_LOCK_UNLOCKED, \
49 .magic = SPINLOCK_MAGIC, \ 69 .magic = SPINLOCK_MAGIC, \
50 .owner = SPINLOCK_OWNER_INIT, \ 70 .owner = SPINLOCK_OWNER_INIT, \
51 .owner_cpu = -1 } 71 .owner_cpu = -1, \
52#define RW_LOCK_UNLOCKED \ 72 SPIN_DEP_MAP_INIT(lockname) }
73#define __RW_LOCK_UNLOCKED(lockname) \
53 (rwlock_t) { .raw_lock = __RAW_RW_LOCK_UNLOCKED, \ 74 (rwlock_t) { .raw_lock = __RAW_RW_LOCK_UNLOCKED, \
54 .magic = RWLOCK_MAGIC, \ 75 .magic = RWLOCK_MAGIC, \
55 .owner = SPINLOCK_OWNER_INIT, \ 76 .owner = SPINLOCK_OWNER_INIT, \
56 .owner_cpu = -1 } 77 .owner_cpu = -1, \
78 RW_DEP_MAP_INIT(lockname) }
57#else 79#else
58# define SPIN_LOCK_UNLOCKED \ 80# define __SPIN_LOCK_UNLOCKED(lockname) \
59 (spinlock_t) { .raw_lock = __RAW_SPIN_LOCK_UNLOCKED } 81 (spinlock_t) { .raw_lock = __RAW_SPIN_LOCK_UNLOCKED, \
60#define RW_LOCK_UNLOCKED \ 82 SPIN_DEP_MAP_INIT(lockname) }
61 (rwlock_t) { .raw_lock = __RAW_RW_LOCK_UNLOCKED } 83#define __RW_LOCK_UNLOCKED(lockname) \
84 (rwlock_t) { .raw_lock = __RAW_RW_LOCK_UNLOCKED, \
85 RW_DEP_MAP_INIT(lockname) }
62#endif 86#endif
63 87
64#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED 88#define SPIN_LOCK_UNLOCKED __SPIN_LOCK_UNLOCKED(old_style_spin_init)
65#define DEFINE_RWLOCK(x) rwlock_t x = RW_LOCK_UNLOCKED 89#define RW_LOCK_UNLOCKED __RW_LOCK_UNLOCKED(old_style_rw_init)
90
91#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
92#define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x)
66 93
67#endif /* __LINUX_SPINLOCK_TYPES_H */ 94#endif /* __LINUX_SPINLOCK_TYPES_H */
diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h
index 04135b0e198e..27644af20b7c 100644
--- a/include/linux/spinlock_types_up.h
+++ b/include/linux/spinlock_types_up.h
@@ -12,10 +12,14 @@
12 * Released under the General Public License (GPL). 12 * Released under the General Public License (GPL).
13 */ 13 */
14 14
15#ifdef CONFIG_DEBUG_SPINLOCK 15#if defined(CONFIG_DEBUG_SPINLOCK) || \
16 defined(CONFIG_DEBUG_LOCK_ALLOC)
16 17
17typedef struct { 18typedef struct {
18 volatile unsigned int slock; 19 volatile unsigned int slock;
20#ifdef CONFIG_DEBUG_LOCK_ALLOC
21 struct lockdep_map dep_map;
22#endif
19} raw_spinlock_t; 23} raw_spinlock_t;
20 24
21#define __RAW_SPIN_LOCK_UNLOCKED { 1 } 25#define __RAW_SPIN_LOCK_UNLOCKED { 1 }
@@ -30,6 +34,9 @@ typedef struct { } raw_spinlock_t;
30 34
31typedef struct { 35typedef struct {
32 /* no debug version on UP */ 36 /* no debug version on UP */
37#ifdef CONFIG_DEBUG_LOCK_ALLOC
38 struct lockdep_map dep_map;
39#endif
33} raw_rwlock_t; 40} raw_rwlock_t;
34 41
35#define __RAW_RW_LOCK_UNLOCKED { } 42#define __RAW_RW_LOCK_UNLOCKED { }
diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index 31accf2f0b13..ea54c4c9a4ec 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -18,7 +18,6 @@
18 */ 18 */
19 19
20#ifdef CONFIG_DEBUG_SPINLOCK 20#ifdef CONFIG_DEBUG_SPINLOCK
21
22#define __raw_spin_is_locked(x) ((x)->slock == 0) 21#define __raw_spin_is_locked(x) ((x)->slock == 0)
23 22
24static inline void __raw_spin_lock(raw_spinlock_t *lock) 23static inline void __raw_spin_lock(raw_spinlock_t *lock)
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
new file mode 100644
index 000000000000..9cc81e572224
--- /dev/null
+++ b/include/linux/stacktrace.h
@@ -0,0 +1,20 @@
1#ifndef __LINUX_STACKTRACE_H
2#define __LINUX_STACKTRACE_H
3
4#ifdef CONFIG_STACKTRACE
5struct stack_trace {
6 unsigned int nr_entries, max_entries;
7 unsigned long *entries;
8};
9
10extern void save_stack_trace(struct stack_trace *trace,
11 struct task_struct *task, int all_contexts,
12 unsigned int skip);
13
14extern void print_stack_trace(struct stack_trace *trace, int spaces);
15#else
16# define save_stack_trace(trace, task, all, skip) do { } while (0)
17# define print_stack_trace(trace) do { } while (0)
18#endif
19
20#endif
diff --git a/include/linux/swap.h b/include/linux/swap.h
index cf6ca6e377bd..5e59184c9096 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -189,6 +189,7 @@ extern long vm_total_pages;
189 189
190#ifdef CONFIG_NUMA 190#ifdef CONFIG_NUMA
191extern int zone_reclaim_mode; 191extern int zone_reclaim_mode;
192extern int sysctl_min_unmapped_ratio;
192extern int zone_reclaim(struct zone *, gfp_t, unsigned int); 193extern int zone_reclaim(struct zone *, gfp_t, unsigned int);
193#else 194#else
194#define zone_reclaim_mode 0 195#define zone_reclaim_mode 0
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 46e4d8f2771f..e4b1a4d4dcf3 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -188,7 +188,7 @@ enum
188 VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */ 188 VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */
189 VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */ 189 VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */
190 VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */ 190 VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */
191 VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */ 191 VM_MIN_UNMAPPED=32, /* Set min percent of unmapped pages */
192 VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ 192 VM_PANIC_ON_OOM=33, /* panic at out-of-memory */
193 VM_VDSO_ENABLED=34, /* map VDSO into new processes? */ 193 VM_VDSO_ENABLED=34, /* map VDSO into new processes? */
194}; 194};
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 544e855c7c02..794be7af58ae 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -68,7 +68,7 @@ struct task_struct;
68 wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk) 68 wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk)
69 69
70#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \ 70#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \
71 .lock = SPIN_LOCK_UNLOCKED, \ 71 .lock = __SPIN_LOCK_UNLOCKED(name.lock), \
72 .task_list = { &(name).task_list, &(name).task_list } } 72 .task_list = { &(name).task_list, &(name).task_list } }
73 73
74#define DECLARE_WAIT_QUEUE_HEAD(name) \ 74#define DECLARE_WAIT_QUEUE_HEAD(name) \
@@ -77,9 +77,15 @@ struct task_struct;
77#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \ 77#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \
78 { .flags = word, .bit_nr = bit, } 78 { .flags = word, .bit_nr = bit, }
79 79
80/*
81 * lockdep: we want one lock-class for all waitqueue locks.
82 */
83extern struct lock_class_key waitqueue_lock_key;
84
80static inline void init_waitqueue_head(wait_queue_head_t *q) 85static inline void init_waitqueue_head(wait_queue_head_t *q)
81{ 86{
82 spin_lock_init(&q->lock); 87 spin_lock_init(&q->lock);
88 lockdep_set_class(&q->lock, &waitqueue_lock_key);
83 INIT_LIST_HEAD(&q->task_list); 89 INIT_LIST_HEAD(&q->task_list);
84} 90}
85 91
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 5ba72d95280c..2fec827c8801 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -67,6 +67,9 @@ struct unix_skb_parms {
67#define unix_state_rlock(s) spin_lock(&unix_sk(s)->lock) 67#define unix_state_rlock(s) spin_lock(&unix_sk(s)->lock)
68#define unix_state_runlock(s) spin_unlock(&unix_sk(s)->lock) 68#define unix_state_runlock(s) spin_unlock(&unix_sk(s)->lock)
69#define unix_state_wlock(s) spin_lock(&unix_sk(s)->lock) 69#define unix_state_wlock(s) spin_lock(&unix_sk(s)->lock)
70#define unix_state_wlock_nested(s) \
71 spin_lock_nested(&unix_sk(s)->lock, \
72 SINGLE_DEPTH_NESTING)
70#define unix_state_wunlock(s) spin_unlock(&unix_sk(s)->lock) 73#define unix_state_wunlock(s) spin_unlock(&unix_sk(s)->lock)
71 74
72#ifdef __KERNEL__ 75#ifdef __KERNEL__
diff --git a/include/net/sock.h b/include/net/sock.h
index 7b3d6b856946..324b3ea233d6 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -44,6 +44,7 @@
44#include <linux/timer.h> 44#include <linux/timer.h>
45#include <linux/cache.h> 45#include <linux/cache.h>
46#include <linux/module.h> 46#include <linux/module.h>
47#include <linux/lockdep.h>
47#include <linux/netdevice.h> 48#include <linux/netdevice.h>
48#include <linux/skbuff.h> /* struct sk_buff */ 49#include <linux/skbuff.h> /* struct sk_buff */
49#include <linux/security.h> 50#include <linux/security.h>
@@ -78,14 +79,17 @@ typedef struct {
78 spinlock_t slock; 79 spinlock_t slock;
79 struct sock_iocb *owner; 80 struct sock_iocb *owner;
80 wait_queue_head_t wq; 81 wait_queue_head_t wq;
82 /*
83 * We express the mutex-alike socket_lock semantics
84 * to the lock validator by explicitly managing
85 * the slock as a lock variant (in addition to
86 * the slock itself):
87 */
88#ifdef CONFIG_DEBUG_LOCK_ALLOC
89 struct lockdep_map dep_map;
90#endif
81} socket_lock_t; 91} socket_lock_t;
82 92
83#define sock_lock_init(__sk) \
84do { spin_lock_init(&((__sk)->sk_lock.slock)); \
85 (__sk)->sk_lock.owner = NULL; \
86 init_waitqueue_head(&((__sk)->sk_lock.wq)); \
87} while(0)
88
89struct sock; 93struct sock;
90struct proto; 94struct proto;
91 95
@@ -747,6 +751,9 @@ extern void FASTCALL(release_sock(struct sock *sk));
747 751
748/* BH context may only use the following locking interface. */ 752/* BH context may only use the following locking interface. */
749#define bh_lock_sock(__sk) spin_lock(&((__sk)->sk_lock.slock)) 753#define bh_lock_sock(__sk) spin_lock(&((__sk)->sk_lock.slock))
754#define bh_lock_sock_nested(__sk) \
755 spin_lock_nested(&((__sk)->sk_lock.slock), \
756 SINGLE_DEPTH_NESTING)
750#define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) 757#define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock))
751 758
752extern struct sock *sk_alloc(int family, 759extern struct sock *sk_alloc(int family,
diff --git a/init/main.c b/init/main.c
index b2f3b566790e..628b8e9e841a 100644
--- a/init/main.c
+++ b/init/main.c
@@ -47,6 +47,8 @@
47#include <linux/key.h> 47#include <linux/key.h>
48#include <linux/unwind.h> 48#include <linux/unwind.h>
49#include <linux/buffer_head.h> 49#include <linux/buffer_head.h>
50#include <linux/debug_locks.h>
51#include <linux/lockdep.h>
50 52
51#include <asm/io.h> 53#include <asm/io.h>
52#include <asm/bugs.h> 54#include <asm/bugs.h>
@@ -456,6 +458,16 @@ asmlinkage void __init start_kernel(void)
456 458
457 smp_setup_processor_id(); 459 smp_setup_processor_id();
458 460
461 /*
462 * Need to run as early as possible, to initialize the
463 * lockdep hash:
464 */
465 lockdep_init();
466
467 local_irq_disable();
468 early_boot_irqs_off();
469 early_init_irq_lock_class();
470
459/* 471/*
460 * Interrupts are still disabled. Do necessary setups, then 472 * Interrupts are still disabled. Do necessary setups, then
461 * enable them 473 * enable them
@@ -496,8 +508,13 @@ asmlinkage void __init start_kernel(void)
496 init_timers(); 508 init_timers();
497 hrtimers_init(); 509 hrtimers_init();
498 softirq_init(); 510 softirq_init();
499 time_init();
500 timekeeping_init(); 511 timekeeping_init();
512 time_init();
513 profile_init();
514 if (!irqs_disabled())
515 printk("start_kernel(): bug: interrupts were enabled early\n");
516 early_boot_irqs_on();
517 local_irq_enable();
501 518
502 /* 519 /*
503 * HACK ALERT! This is early. We're enabling the console before 520 * HACK ALERT! This is early. We're enabling the console before
@@ -507,8 +524,16 @@ asmlinkage void __init start_kernel(void)
507 console_init(); 524 console_init();
508 if (panic_later) 525 if (panic_later)
509 panic(panic_later, panic_param); 526 panic(panic_later, panic_param);
510 profile_init(); 527
511 local_irq_enable(); 528 lockdep_info();
529
530 /*
531 * Need to run this when irqs are enabled, because it wants
532 * to self-test [hard/soft]-irqs on/off lock inversion bugs
533 * too:
534 */
535 locking_selftest();
536
512#ifdef CONFIG_BLK_DEV_INITRD 537#ifdef CONFIG_BLK_DEV_INITRD
513 if (initrd_start && !initrd_below_start_ok && 538 if (initrd_start && !initrd_below_start_ok &&
514 initrd_start < min_low_pfn << PAGE_SHIFT) { 539 initrd_start < min_low_pfn << PAGE_SHIFT) {
diff --git a/kernel/Makefile b/kernel/Makefile
index 82fb182f6f61..47dbcd570cd8 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -8,10 +8,15 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
8 signal.o sys.o kmod.o workqueue.o pid.o \ 8 signal.o sys.o kmod.o workqueue.o pid.o \
9 rcupdate.o extable.o params.o posix-timers.o \ 9 rcupdate.o extable.o params.o posix-timers.o \
10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ 10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
11 hrtimer.o 11 hrtimer.o rwsem.o
12 12
13obj-$(CONFIG_STACKTRACE) += stacktrace.o
13obj-y += time/ 14obj-y += time/
14obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o 15obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
16obj-$(CONFIG_LOCKDEP) += lockdep.o
17ifeq ($(CONFIG_PROC_FS),y)
18obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
19endif
15obj-$(CONFIG_FUTEX) += futex.o 20obj-$(CONFIG_FUTEX) += futex.o
16ifeq ($(CONFIG_COMPAT),y) 21ifeq ($(CONFIG_COMPAT),y)
17obj-$(CONFIG_FUTEX) += futex_compat.o 22obj-$(CONFIG_FUTEX) += futex_compat.o
@@ -22,6 +27,7 @@ obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
22obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o 27obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
23obj-$(CONFIG_SMP) += cpu.o spinlock.o 28obj-$(CONFIG_SMP) += cpu.o spinlock.o
24obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o 29obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
30obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
25obj-$(CONFIG_UID16) += uid16.o 31obj-$(CONFIG_UID16) += uid16.o
26obj-$(CONFIG_MODULES) += module.o 32obj-$(CONFIG_MODULES) += module.o
27obj-$(CONFIG_KALLSYMS) += kallsyms.o 33obj-$(CONFIG_KALLSYMS) += kallsyms.o
diff --git a/kernel/capability.c b/kernel/capability.c
index 1a4d8a40d3f9..c7685ad00a97 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -46,7 +46,7 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
46 int ret = 0; 46 int ret = 0;
47 pid_t pid; 47 pid_t pid;
48 __u32 version; 48 __u32 version;
49 task_t *target; 49 struct task_struct *target;
50 struct __user_cap_data_struct data; 50 struct __user_cap_data_struct data;
51 51
52 if (get_user(version, &header->version)) 52 if (get_user(version, &header->version))
@@ -96,7 +96,7 @@ static inline int cap_set_pg(int pgrp, kernel_cap_t *effective,
96 kernel_cap_t *inheritable, 96 kernel_cap_t *inheritable,
97 kernel_cap_t *permitted) 97 kernel_cap_t *permitted)
98{ 98{
99 task_t *g, *target; 99 struct task_struct *g, *target;
100 int ret = -EPERM; 100 int ret = -EPERM;
101 int found = 0; 101 int found = 0;
102 102
@@ -128,7 +128,7 @@ static inline int cap_set_all(kernel_cap_t *effective,
128 kernel_cap_t *inheritable, 128 kernel_cap_t *inheritable,
129 kernel_cap_t *permitted) 129 kernel_cap_t *permitted)
130{ 130{
131 task_t *g, *target; 131 struct task_struct *g, *target;
132 int ret = -EPERM; 132 int ret = -EPERM;
133 int found = 0; 133 int found = 0;
134 134
@@ -172,7 +172,7 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
172{ 172{
173 kernel_cap_t inheritable, permitted, effective; 173 kernel_cap_t inheritable, permitted, effective;
174 __u32 version; 174 __u32 version;
175 task_t *target; 175 struct task_struct *target;
176 int ret; 176 int ret;
177 pid_t pid; 177 pid_t pid;
178 178
diff --git a/kernel/exit.c b/kernel/exit.c
index 7f7ef2258553..6664c084783d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -134,8 +134,8 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
134 134
135void release_task(struct task_struct * p) 135void release_task(struct task_struct * p)
136{ 136{
137 struct task_struct *leader;
137 int zap_leader; 138 int zap_leader;
138 task_t *leader;
139repeat: 139repeat:
140 atomic_dec(&p->user->processes); 140 atomic_dec(&p->user->processes);
141 write_lock_irq(&tasklist_lock); 141 write_lock_irq(&tasklist_lock);
@@ -209,7 +209,7 @@ out:
209 * 209 *
210 * "I ask you, have you ever known what it is to be an orphan?" 210 * "I ask you, have you ever known what it is to be an orphan?"
211 */ 211 */
212static int will_become_orphaned_pgrp(int pgrp, task_t *ignored_task) 212static int will_become_orphaned_pgrp(int pgrp, struct task_struct *ignored_task)
213{ 213{
214 struct task_struct *p; 214 struct task_struct *p;
215 int ret = 1; 215 int ret = 1;
@@ -582,7 +582,8 @@ static void exit_mm(struct task_struct * tsk)
582 mmput(mm); 582 mmput(mm);
583} 583}
584 584
585static inline void choose_new_parent(task_t *p, task_t *reaper) 585static inline void
586choose_new_parent(struct task_struct *p, struct task_struct *reaper)
586{ 587{
587 /* 588 /*
588 * Make sure we're not reparenting to ourselves and that 589 * Make sure we're not reparenting to ourselves and that
@@ -592,7 +593,8 @@ static inline void choose_new_parent(task_t *p, task_t *reaper)
592 p->real_parent = reaper; 593 p->real_parent = reaper;
593} 594}
594 595
595static void reparent_thread(task_t *p, task_t *father, int traced) 596static void
597reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
596{ 598{
597 /* We don't want people slaying init. */ 599 /* We don't want people slaying init. */
598 if (p->exit_signal != -1) 600 if (p->exit_signal != -1)
@@ -656,8 +658,8 @@ static void reparent_thread(task_t *p, task_t *father, int traced)
656 * group, and if no such member exists, give it to 658 * group, and if no such member exists, give it to
657 * the global child reaper process (ie "init") 659 * the global child reaper process (ie "init")
658 */ 660 */
659static void forget_original_parent(struct task_struct * father, 661static void
660 struct list_head *to_release) 662forget_original_parent(struct task_struct *father, struct list_head *to_release)
661{ 663{
662 struct task_struct *p, *reaper = father; 664 struct task_struct *p, *reaper = father;
663 struct list_head *_p, *_n; 665 struct list_head *_p, *_n;
@@ -680,7 +682,7 @@ static void forget_original_parent(struct task_struct * father,
680 */ 682 */
681 list_for_each_safe(_p, _n, &father->children) { 683 list_for_each_safe(_p, _n, &father->children) {
682 int ptrace; 684 int ptrace;
683 p = list_entry(_p,struct task_struct,sibling); 685 p = list_entry(_p, struct task_struct, sibling);
684 686
685 ptrace = p->ptrace; 687 ptrace = p->ptrace;
686 688
@@ -709,7 +711,7 @@ static void forget_original_parent(struct task_struct * father,
709 list_add(&p->ptrace_list, to_release); 711 list_add(&p->ptrace_list, to_release);
710 } 712 }
711 list_for_each_safe(_p, _n, &father->ptrace_children) { 713 list_for_each_safe(_p, _n, &father->ptrace_children) {
712 p = list_entry(_p,struct task_struct,ptrace_list); 714 p = list_entry(_p, struct task_struct, ptrace_list);
713 choose_new_parent(p, reaper); 715 choose_new_parent(p, reaper);
714 reparent_thread(p, father, 1); 716 reparent_thread(p, father, 1);
715 } 717 }
@@ -829,7 +831,7 @@ static void exit_notify(struct task_struct *tsk)
829 831
830 list_for_each_safe(_p, _n, &ptrace_dead) { 832 list_for_each_safe(_p, _n, &ptrace_dead) {
831 list_del_init(_p); 833 list_del_init(_p);
832 t = list_entry(_p,struct task_struct,ptrace_list); 834 t = list_entry(_p, struct task_struct, ptrace_list);
833 release_task(t); 835 release_task(t);
834 } 836 }
835 837
@@ -933,10 +935,9 @@ fastcall NORET_TYPE void do_exit(long code)
933 if (unlikely(current->pi_state_cache)) 935 if (unlikely(current->pi_state_cache))
934 kfree(current->pi_state_cache); 936 kfree(current->pi_state_cache);
935 /* 937 /*
936 * If DEBUG_MUTEXES is on, make sure we are holding no locks: 938 * Make sure we are holding no locks:
937 */ 939 */
938 mutex_debug_check_no_locks_held(tsk); 940 debug_check_no_locks_held(tsk);
939 rt_mutex_debug_check_no_locks_held(tsk);
940 941
941 if (tsk->io_context) 942 if (tsk->io_context)
942 exit_io_context(); 943 exit_io_context();
@@ -1011,7 +1012,7 @@ asmlinkage void sys_exit_group(int error_code)
1011 do_group_exit((error_code & 0xff) << 8); 1012 do_group_exit((error_code & 0xff) << 8);
1012} 1013}
1013 1014
1014static int eligible_child(pid_t pid, int options, task_t *p) 1015static int eligible_child(pid_t pid, int options, struct task_struct *p)
1015{ 1016{
1016 if (pid > 0) { 1017 if (pid > 0) {
1017 if (p->pid != pid) 1018 if (p->pid != pid)
@@ -1052,12 +1053,13 @@ static int eligible_child(pid_t pid, int options, task_t *p)
1052 return 1; 1053 return 1;
1053} 1054}
1054 1055
1055static int wait_noreap_copyout(task_t *p, pid_t pid, uid_t uid, 1056static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid,
1056 int why, int status, 1057 int why, int status,
1057 struct siginfo __user *infop, 1058 struct siginfo __user *infop,
1058 struct rusage __user *rusagep) 1059 struct rusage __user *rusagep)
1059{ 1060{
1060 int retval = rusagep ? getrusage(p, RUSAGE_BOTH, rusagep) : 0; 1061 int retval = rusagep ? getrusage(p, RUSAGE_BOTH, rusagep) : 0;
1062
1061 put_task_struct(p); 1063 put_task_struct(p);
1062 if (!retval) 1064 if (!retval)
1063 retval = put_user(SIGCHLD, &infop->si_signo); 1065 retval = put_user(SIGCHLD, &infop->si_signo);
@@ -1082,7 +1084,7 @@ static int wait_noreap_copyout(task_t *p, pid_t pid, uid_t uid,
1082 * the lock and this task is uninteresting. If we return nonzero, we have 1084 * the lock and this task is uninteresting. If we return nonzero, we have
1083 * released the lock and the system call should return. 1085 * released the lock and the system call should return.
1084 */ 1086 */
1085static int wait_task_zombie(task_t *p, int noreap, 1087static int wait_task_zombie(struct task_struct *p, int noreap,
1086 struct siginfo __user *infop, 1088 struct siginfo __user *infop,
1087 int __user *stat_addr, struct rusage __user *ru) 1089 int __user *stat_addr, struct rusage __user *ru)
1088{ 1090{
@@ -1244,8 +1246,8 @@ static int wait_task_zombie(task_t *p, int noreap,
1244 * the lock and this task is uninteresting. If we return nonzero, we have 1246 * the lock and this task is uninteresting. If we return nonzero, we have
1245 * released the lock and the system call should return. 1247 * released the lock and the system call should return.
1246 */ 1248 */
1247static int wait_task_stopped(task_t *p, int delayed_group_leader, int noreap, 1249static int wait_task_stopped(struct task_struct *p, int delayed_group_leader,
1248 struct siginfo __user *infop, 1250 int noreap, struct siginfo __user *infop,
1249 int __user *stat_addr, struct rusage __user *ru) 1251 int __user *stat_addr, struct rusage __user *ru)
1250{ 1252{
1251 int retval, exit_code; 1253 int retval, exit_code;
@@ -1359,7 +1361,7 @@ bail_ref:
1359 * the lock and this task is uninteresting. If we return nonzero, we have 1361 * the lock and this task is uninteresting. If we return nonzero, we have
1360 * released the lock and the system call should return. 1362 * released the lock and the system call should return.
1361 */ 1363 */
1362static int wait_task_continued(task_t *p, int noreap, 1364static int wait_task_continued(struct task_struct *p, int noreap,
1363 struct siginfo __user *infop, 1365 struct siginfo __user *infop,
1364 int __user *stat_addr, struct rusage __user *ru) 1366 int __user *stat_addr, struct rusage __user *ru)
1365{ 1367{
@@ -1445,7 +1447,7 @@ repeat:
1445 int ret; 1447 int ret;
1446 1448
1447 list_for_each(_p,&tsk->children) { 1449 list_for_each(_p,&tsk->children) {
1448 p = list_entry(_p,struct task_struct,sibling); 1450 p = list_entry(_p, struct task_struct, sibling);
1449 1451
1450 ret = eligible_child(pid, options, p); 1452 ret = eligible_child(pid, options, p);
1451 if (!ret) 1453 if (!ret)
diff --git a/kernel/fork.c b/kernel/fork.c
index 9064bf9e131b..56e4e07e45f7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -193,7 +193,10 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
193 193
194 down_write(&oldmm->mmap_sem); 194 down_write(&oldmm->mmap_sem);
195 flush_cache_mm(oldmm); 195 flush_cache_mm(oldmm);
196 down_write(&mm->mmap_sem); 196 /*
197 * Not linked in yet - no deadlock potential:
198 */
199 down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
197 200
198 mm->locked_vm = 0; 201 mm->locked_vm = 0;
199 mm->mmap = NULL; 202 mm->mmap = NULL;
@@ -919,10 +922,6 @@ static inline void rt_mutex_init_task(struct task_struct *p)
919 spin_lock_init(&p->pi_lock); 922 spin_lock_init(&p->pi_lock);
920 plist_head_init(&p->pi_waiters, &p->pi_lock); 923 plist_head_init(&p->pi_waiters, &p->pi_lock);
921 p->pi_blocked_on = NULL; 924 p->pi_blocked_on = NULL;
922# ifdef CONFIG_DEBUG_RT_MUTEXES
923 spin_lock_init(&p->held_list_lock);
924 INIT_LIST_HEAD(&p->held_list_head);
925# endif
926#endif 925#endif
927} 926}
928 927
@@ -934,13 +933,13 @@ static inline void rt_mutex_init_task(struct task_struct *p)
934 * parts of the process environment (as per the clone 933 * parts of the process environment (as per the clone
935 * flags). The actual kick-off is left to the caller. 934 * flags). The actual kick-off is left to the caller.
936 */ 935 */
937static task_t *copy_process(unsigned long clone_flags, 936static struct task_struct *copy_process(unsigned long clone_flags,
938 unsigned long stack_start, 937 unsigned long stack_start,
939 struct pt_regs *regs, 938 struct pt_regs *regs,
940 unsigned long stack_size, 939 unsigned long stack_size,
941 int __user *parent_tidptr, 940 int __user *parent_tidptr,
942 int __user *child_tidptr, 941 int __user *child_tidptr,
943 int pid) 942 int pid)
944{ 943{
945 int retval; 944 int retval;
946 struct task_struct *p = NULL; 945 struct task_struct *p = NULL;
@@ -972,6 +971,10 @@ static task_t *copy_process(unsigned long clone_flags,
972 if (!p) 971 if (!p)
973 goto fork_out; 972 goto fork_out;
974 973
974#ifdef CONFIG_TRACE_IRQFLAGS
975 DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
976 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
977#endif
975 retval = -EAGAIN; 978 retval = -EAGAIN;
976 if (atomic_read(&p->user->processes) >= 979 if (atomic_read(&p->user->processes) >=
977 p->signal->rlim[RLIMIT_NPROC].rlim_cur) { 980 p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
@@ -1046,6 +1049,26 @@ static task_t *copy_process(unsigned long clone_flags,
1046 } 1049 }
1047 mpol_fix_fork_child_flag(p); 1050 mpol_fix_fork_child_flag(p);
1048#endif 1051#endif
1052#ifdef CONFIG_TRACE_IRQFLAGS
1053 p->irq_events = 0;
1054 p->hardirqs_enabled = 0;
1055 p->hardirq_enable_ip = 0;
1056 p->hardirq_enable_event = 0;
1057 p->hardirq_disable_ip = _THIS_IP_;
1058 p->hardirq_disable_event = 0;
1059 p->softirqs_enabled = 1;
1060 p->softirq_enable_ip = _THIS_IP_;
1061 p->softirq_enable_event = 0;
1062 p->softirq_disable_ip = 0;
1063 p->softirq_disable_event = 0;
1064 p->hardirq_context = 0;
1065 p->softirq_context = 0;
1066#endif
1067#ifdef CONFIG_LOCKDEP
1068 p->lockdep_depth = 0; /* no locks held yet */
1069 p->curr_chain_key = 0;
1070 p->lockdep_recursion = 0;
1071#endif
1049 1072
1050 rt_mutex_init_task(p); 1073 rt_mutex_init_task(p);
1051 1074
@@ -1271,9 +1294,9 @@ struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
1271 return regs; 1294 return regs;
1272} 1295}
1273 1296
1274task_t * __devinit fork_idle(int cpu) 1297struct task_struct * __devinit fork_idle(int cpu)
1275{ 1298{
1276 task_t *task; 1299 struct task_struct *task;
1277 struct pt_regs regs; 1300 struct pt_regs regs;
1278 1301
1279 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0); 1302 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0);
diff --git a/kernel/futex.c b/kernel/futex.c
index 15caf93e4a43..1dc98e4dd287 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -607,6 +607,22 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
607} 607}
608 608
609/* 609/*
610 * Express the locking dependencies for lockdep:
611 */
612static inline void
613double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
614{
615 if (hb1 <= hb2) {
616 spin_lock(&hb1->lock);
617 if (hb1 < hb2)
618 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
619 } else { /* hb1 > hb2 */
620 spin_lock(&hb2->lock);
621 spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
622 }
623}
624
625/*
610 * Wake up all waiters hashed on the physical page that is mapped 626 * Wake up all waiters hashed on the physical page that is mapped
611 * to this virtual address: 627 * to this virtual address:
612 */ 628 */
@@ -674,11 +690,7 @@ retryfull:
674 hb2 = hash_futex(&key2); 690 hb2 = hash_futex(&key2);
675 691
676retry: 692retry:
677 if (hb1 < hb2) 693 double_lock_hb(hb1, hb2);
678 spin_lock(&hb1->lock);
679 spin_lock(&hb2->lock);
680 if (hb1 > hb2)
681 spin_lock(&hb1->lock);
682 694
683 op_ret = futex_atomic_op_inuser(op, uaddr2); 695 op_ret = futex_atomic_op_inuser(op, uaddr2);
684 if (unlikely(op_ret < 0)) { 696 if (unlikely(op_ret < 0)) {
@@ -787,11 +799,7 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2,
787 hb1 = hash_futex(&key1); 799 hb1 = hash_futex(&key1);
788 hb2 = hash_futex(&key2); 800 hb2 = hash_futex(&key2);
789 801
790 if (hb1 < hb2) 802 double_lock_hb(hb1, hb2);
791 spin_lock(&hb1->lock);
792 spin_lock(&hb2->lock);
793 if (hb1 > hb2)
794 spin_lock(&hb1->lock);
795 803
796 if (likely(cmpval != NULL)) { 804 if (likely(cmpval != NULL)) {
797 u32 curval; 805 u32 curval;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 8d3dc29ef41a..d17766d40dab 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -669,7 +669,7 @@ static int hrtimer_wakeup(struct hrtimer *timer)
669 return HRTIMER_NORESTART; 669 return HRTIMER_NORESTART;
670} 670}
671 671
672void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, task_t *task) 672void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
673{ 673{
674 sl->timer.function = hrtimer_wakeup; 674 sl->timer.function = hrtimer_wakeup;
675 sl->task = task; 675 sl->task = task;
@@ -782,8 +782,10 @@ static void __devinit init_hrtimers_cpu(int cpu)
782 struct hrtimer_base *base = per_cpu(hrtimer_bases, cpu); 782 struct hrtimer_base *base = per_cpu(hrtimer_bases, cpu);
783 int i; 783 int i;
784 784
785 for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) 785 for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) {
786 spin_lock_init(&base->lock); 786 spin_lock_init(&base->lock);
787 lockdep_set_class(&base->lock, &base->lock_key);
788 }
787} 789}
788 790
789#ifdef CONFIG_HOTPLUG_CPU 791#ifdef CONFIG_HOTPLUG_CPU
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 54105bdfe20d..9336f2e89e40 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -261,10 +261,13 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs)
261 * keep it masked and get out of here 261 * keep it masked and get out of here
262 */ 262 */
263 action = desc->action; 263 action = desc->action;
264 if (unlikely(!action || (desc->status & IRQ_DISABLED))) 264 if (unlikely(!action || (desc->status & IRQ_DISABLED))) {
265 desc->status |= IRQ_PENDING;
265 goto out; 266 goto out;
267 }
266 268
267 desc->status |= IRQ_INPROGRESS; 269 desc->status |= IRQ_INPROGRESS;
270 desc->status &= ~IRQ_PENDING;
268 spin_unlock(&desc->lock); 271 spin_unlock(&desc->lock);
269 272
270 action_ret = handle_IRQ_event(irq, regs, action); 273 action_ret = handle_IRQ_event(irq, regs, action);
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index aeb6e391276c..fc4e906aedbd 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -132,7 +132,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
132 handle_dynamic_tick(action); 132 handle_dynamic_tick(action);
133 133
134 if (!(action->flags & IRQF_DISABLED)) 134 if (!(action->flags & IRQF_DISABLED))
135 local_irq_enable(); 135 local_irq_enable_in_hardirq();
136 136
137 do { 137 do {
138 ret = action->handler(irq, action->dev_id, regs); 138 ret = action->handler(irq, action->dev_id, regs);
@@ -249,3 +249,19 @@ out:
249 return 1; 249 return 1;
250} 250}
251 251
252#ifdef CONFIG_TRACE_IRQFLAGS
253
254/*
255 * lockdep: we want to handle all irq_desc locks as a single lock-class:
256 */
257static struct lock_class_key irq_desc_lock_class;
258
259void early_init_irq_lock_class(void)
260{
261 int i;
262
263 for (i = 0; i < NR_IRQS; i++)
264 lockdep_set_class(&irq_desc[i].lock, &irq_desc_lock_class);
265}
266
267#endif
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index c911c6ec4dd6..4e461438e48b 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -410,6 +410,12 @@ int request_irq(unsigned int irq,
410 struct irqaction *action; 410 struct irqaction *action;
411 int retval; 411 int retval;
412 412
413#ifdef CONFIG_LOCKDEP
414 /*
415 * Lockdep wants atomic interrupt handlers:
416 */
417 irqflags |= SA_INTERRUPT;
418#endif
413 /* 419 /*
414 * Sanity-check: shared interrupts must pass in a real dev-ID, 420 * Sanity-check: shared interrupts must pass in a real dev-ID,
415 * otherwise we'll have trouble later trying to figure out 421 * otherwise we'll have trouble later trying to figure out
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 1b7157af051c..1d32defa38ab 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -233,7 +233,7 @@ static void __call_usermodehelper(void *data)
233int call_usermodehelper_keys(char *path, char **argv, char **envp, 233int call_usermodehelper_keys(char *path, char **argv, char **envp,
234 struct key *session_keyring, int wait) 234 struct key *session_keyring, int wait)
235{ 235{
236 DECLARE_COMPLETION(done); 236 DECLARE_COMPLETION_ONSTACK(done);
237 struct subprocess_info sub_info = { 237 struct subprocess_info sub_info = {
238 .complete = &done, 238 .complete = &done,
239 .path = path, 239 .path = path,
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
new file mode 100644
index 000000000000..f32ca78c198d
--- /dev/null
+++ b/kernel/lockdep.c
@@ -0,0 +1,2702 @@
1/*
2 * kernel/lockdep.c
3 *
4 * Runtime locking correctness validator
5 *
6 * Started by Ingo Molnar:
7 *
8 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
9 *
10 * this code maps all the lock dependencies as they occur in a live kernel
11 * and will warn about the following classes of locking bugs:
12 *
13 * - lock inversion scenarios
14 * - circular lock dependencies
15 * - hardirq/softirq safe/unsafe locking bugs
16 *
17 * Bugs are reported even if the current locking scenario does not cause
18 * any deadlock at this point.
19 *
20 * I.e. if anytime in the past two locks were taken in a different order,
21 * even if it happened for another task, even if those were different
22 * locks (but of the same class as this lock), this code will detect it.
23 *
24 * Thanks to Arjan van de Ven for coming up with the initial idea of
25 * mapping lock dependencies runtime.
26 */
27#include <linux/mutex.h>
28#include <linux/sched.h>
29#include <linux/delay.h>
30#include <linux/module.h>
31#include <linux/proc_fs.h>
32#include <linux/seq_file.h>
33#include <linux/spinlock.h>
34#include <linux/kallsyms.h>
35#include <linux/interrupt.h>
36#include <linux/stacktrace.h>
37#include <linux/debug_locks.h>
38#include <linux/irqflags.h>
39
40#include <asm/sections.h>
41
42#include "lockdep_internals.h"
43
44/*
45 * hash_lock: protects the lockdep hashes and class/list/hash allocators.
46 *
47 * This is one of the rare exceptions where it's justified
48 * to use a raw spinlock - we really dont want the spinlock
49 * code to recurse back into the lockdep code.
50 */
51static raw_spinlock_t hash_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
52
53static int lockdep_initialized;
54
55unsigned long nr_list_entries;
56static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
57
58/*
59 * Allocate a lockdep entry. (assumes hash_lock held, returns
60 * with NULL on failure)
61 */
62static struct lock_list *alloc_list_entry(void)
63{
64 if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) {
65 __raw_spin_unlock(&hash_lock);
66 debug_locks_off();
67 printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n");
68 printk("turning off the locking correctness validator.\n");
69 return NULL;
70 }
71 return list_entries + nr_list_entries++;
72}
73
74/*
75 * All data structures here are protected by the global debug_lock.
76 *
77 * Mutex key structs only get allocated, once during bootup, and never
78 * get freed - this significantly simplifies the debugging code.
79 */
80unsigned long nr_lock_classes;
81static struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
82
83/*
84 * We keep a global list of all lock classes. The list only grows,
85 * never shrinks. The list is only accessed with the lockdep
86 * spinlock lock held.
87 */
88LIST_HEAD(all_lock_classes);
89
90/*
91 * The lockdep classes are in a hash-table as well, for fast lookup:
92 */
93#define CLASSHASH_BITS (MAX_LOCKDEP_KEYS_BITS - 1)
94#define CLASSHASH_SIZE (1UL << CLASSHASH_BITS)
95#define CLASSHASH_MASK (CLASSHASH_SIZE - 1)
96#define __classhashfn(key) ((((unsigned long)key >> CLASSHASH_BITS) + (unsigned long)key) & CLASSHASH_MASK)
97#define classhashentry(key) (classhash_table + __classhashfn((key)))
98
99static struct list_head classhash_table[CLASSHASH_SIZE];
100
101unsigned long nr_lock_chains;
102static struct lock_chain lock_chains[MAX_LOCKDEP_CHAINS];
103
104/*
105 * We put the lock dependency chains into a hash-table as well, to cache
106 * their existence:
107 */
108#define CHAINHASH_BITS (MAX_LOCKDEP_CHAINS_BITS-1)
109#define CHAINHASH_SIZE (1UL << CHAINHASH_BITS)
110#define CHAINHASH_MASK (CHAINHASH_SIZE - 1)
111#define __chainhashfn(chain) \
112 (((chain >> CHAINHASH_BITS) + chain) & CHAINHASH_MASK)
113#define chainhashentry(chain) (chainhash_table + __chainhashfn((chain)))
114
115static struct list_head chainhash_table[CHAINHASH_SIZE];
116
117/*
118 * The hash key of the lock dependency chains is a hash itself too:
119 * it's a hash of all locks taken up to that lock, including that lock.
120 * It's a 64-bit hash, because it's important for the keys to be
121 * unique.
122 */
123#define iterate_chain_key(key1, key2) \
124 (((key1) << MAX_LOCKDEP_KEYS_BITS/2) ^ \
125 ((key1) >> (64-MAX_LOCKDEP_KEYS_BITS/2)) ^ \
126 (key2))
127
128void lockdep_off(void)
129{
130 current->lockdep_recursion++;
131}
132
133EXPORT_SYMBOL(lockdep_off);
134
135void lockdep_on(void)
136{
137 current->lockdep_recursion--;
138}
139
140EXPORT_SYMBOL(lockdep_on);
141
142int lockdep_internal(void)
143{
144 return current->lockdep_recursion != 0;
145}
146
147EXPORT_SYMBOL(lockdep_internal);
148
149/*
150 * Debugging switches:
151 */
152
153#define VERBOSE 0
154#ifdef VERBOSE
155# define VERY_VERBOSE 0
156#endif
157
158#if VERBOSE
159# define HARDIRQ_VERBOSE 1
160# define SOFTIRQ_VERBOSE 1
161#else
162# define HARDIRQ_VERBOSE 0
163# define SOFTIRQ_VERBOSE 0
164#endif
165
166#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE
167/*
168 * Quick filtering for interesting events:
169 */
170static int class_filter(struct lock_class *class)
171{
172 if (class->name_version == 1 &&
173 !strcmp(class->name, "&rl->lock"))
174 return 1;
175 if (class->name_version == 1 &&
176 !strcmp(class->name, "&ni->mrec_lock"))
177 return 1;
178 if (class->name_version == 1 &&
179 !strcmp(class->name, "mft_ni_runlist_lock"))
180 return 1;
181 if (class->name_version == 1 &&
182 !strcmp(class->name, "mft_ni_mrec_lock"))
183 return 1;
184 if (class->name_version == 1 &&
185 !strcmp(class->name, "&vol->lcnbmp_lock"))
186 return 1;
187 return 0;
188}
189#endif
190
191static int verbose(struct lock_class *class)
192{
193#if VERBOSE
194 return class_filter(class);
195#endif
196 return 0;
197}
198
199#ifdef CONFIG_TRACE_IRQFLAGS
200
201static int hardirq_verbose(struct lock_class *class)
202{
203#if HARDIRQ_VERBOSE
204 return class_filter(class);
205#endif
206 return 0;
207}
208
209static int softirq_verbose(struct lock_class *class)
210{
211#if SOFTIRQ_VERBOSE
212 return class_filter(class);
213#endif
214 return 0;
215}
216
217#endif
218
219/*
220 * Stack-trace: tightly packed array of stack backtrace
221 * addresses. Protected by the hash_lock.
222 */
223unsigned long nr_stack_trace_entries;
224static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES];
225
226static int save_trace(struct stack_trace *trace)
227{
228 trace->nr_entries = 0;
229 trace->max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries;
230 trace->entries = stack_trace + nr_stack_trace_entries;
231
232 save_stack_trace(trace, NULL, 0, 3);
233
234 trace->max_entries = trace->nr_entries;
235
236 nr_stack_trace_entries += trace->nr_entries;
237 if (DEBUG_LOCKS_WARN_ON(nr_stack_trace_entries > MAX_STACK_TRACE_ENTRIES))
238 return 0;
239
240 if (nr_stack_trace_entries == MAX_STACK_TRACE_ENTRIES) {
241 __raw_spin_unlock(&hash_lock);
242 if (debug_locks_off()) {
243 printk("BUG: MAX_STACK_TRACE_ENTRIES too low!\n");
244 printk("turning off the locking correctness validator.\n");
245 dump_stack();
246 }
247 return 0;
248 }
249
250 return 1;
251}
252
253unsigned int nr_hardirq_chains;
254unsigned int nr_softirq_chains;
255unsigned int nr_process_chains;
256unsigned int max_lockdep_depth;
257unsigned int max_recursion_depth;
258
259#ifdef CONFIG_DEBUG_LOCKDEP
260/*
261 * We cannot printk in early bootup code. Not even early_printk()
262 * might work. So we mark any initialization errors and printk
263 * about it later on, in lockdep_info().
264 */
265static int lockdep_init_error;
266
267/*
268 * Various lockdep statistics:
269 */
270atomic_t chain_lookup_hits;
271atomic_t chain_lookup_misses;
272atomic_t hardirqs_on_events;
273atomic_t hardirqs_off_events;
274atomic_t redundant_hardirqs_on;
275atomic_t redundant_hardirqs_off;
276atomic_t softirqs_on_events;
277atomic_t softirqs_off_events;
278atomic_t redundant_softirqs_on;
279atomic_t redundant_softirqs_off;
280atomic_t nr_unused_locks;
281atomic_t nr_cyclic_checks;
282atomic_t nr_cyclic_check_recursions;
283atomic_t nr_find_usage_forwards_checks;
284atomic_t nr_find_usage_forwards_recursions;
285atomic_t nr_find_usage_backwards_checks;
286atomic_t nr_find_usage_backwards_recursions;
287# define debug_atomic_inc(ptr) atomic_inc(ptr)
288# define debug_atomic_dec(ptr) atomic_dec(ptr)
289# define debug_atomic_read(ptr) atomic_read(ptr)
290#else
291# define debug_atomic_inc(ptr) do { } while (0)
292# define debug_atomic_dec(ptr) do { } while (0)
293# define debug_atomic_read(ptr) 0
294#endif
295
296/*
297 * Locking printouts:
298 */
299
300static const char *usage_str[] =
301{
302 [LOCK_USED] = "initial-use ",
303 [LOCK_USED_IN_HARDIRQ] = "in-hardirq-W",
304 [LOCK_USED_IN_SOFTIRQ] = "in-softirq-W",
305 [LOCK_ENABLED_SOFTIRQS] = "softirq-on-W",
306 [LOCK_ENABLED_HARDIRQS] = "hardirq-on-W",
307 [LOCK_USED_IN_HARDIRQ_READ] = "in-hardirq-R",
308 [LOCK_USED_IN_SOFTIRQ_READ] = "in-softirq-R",
309 [LOCK_ENABLED_SOFTIRQS_READ] = "softirq-on-R",
310 [LOCK_ENABLED_HARDIRQS_READ] = "hardirq-on-R",
311};
312
313const char * __get_key_name(struct lockdep_subclass_key *key, char *str)
314{
315 unsigned long offs, size;
316 char *modname;
317
318 return kallsyms_lookup((unsigned long)key, &size, &offs, &modname, str);
319}
320
321void
322get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4)
323{
324 *c1 = '.', *c2 = '.', *c3 = '.', *c4 = '.';
325
326 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ)
327 *c1 = '+';
328 else
329 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS)
330 *c1 = '-';
331
332 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ)
333 *c2 = '+';
334 else
335 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS)
336 *c2 = '-';
337
338 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ)
339 *c3 = '-';
340 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ) {
341 *c3 = '+';
342 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ)
343 *c3 = '?';
344 }
345
346 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ)
347 *c4 = '-';
348 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ) {
349 *c4 = '+';
350 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ)
351 *c4 = '?';
352 }
353}
354
355static void print_lock_name(struct lock_class *class)
356{
357 char str[128], c1, c2, c3, c4;
358 const char *name;
359
360 get_usage_chars(class, &c1, &c2, &c3, &c4);
361
362 name = class->name;
363 if (!name) {
364 name = __get_key_name(class->key, str);
365 printk(" (%s", name);
366 } else {
367 printk(" (%s", name);
368 if (class->name_version > 1)
369 printk("#%d", class->name_version);
370 if (class->subclass)
371 printk("/%d", class->subclass);
372 }
373 printk("){%c%c%c%c}", c1, c2, c3, c4);
374}
375
376static void print_lockdep_cache(struct lockdep_map *lock)
377{
378 const char *name;
379 char str[128];
380
381 name = lock->name;
382 if (!name)
383 name = __get_key_name(lock->key->subkeys, str);
384
385 printk("%s", name);
386}
387
388static void print_lock(struct held_lock *hlock)
389{
390 print_lock_name(hlock->class);
391 printk(", at: ");
392 print_ip_sym(hlock->acquire_ip);
393}
394
395static void lockdep_print_held_locks(struct task_struct *curr)
396{
397 int i, depth = curr->lockdep_depth;
398
399 if (!depth) {
400 printk("no locks held by %s/%d.\n", curr->comm, curr->pid);
401 return;
402 }
403 printk("%d lock%s held by %s/%d:\n",
404 depth, depth > 1 ? "s" : "", curr->comm, curr->pid);
405
406 for (i = 0; i < depth; i++) {
407 printk(" #%d: ", i);
408 print_lock(curr->held_locks + i);
409 }
410}
411/*
412 * Helper to print a nice hierarchy of lock dependencies:
413 */
414static void print_spaces(int nr)
415{
416 int i;
417
418 for (i = 0; i < nr; i++)
419 printk(" ");
420}
421
422static void print_lock_class_header(struct lock_class *class, int depth)
423{
424 int bit;
425
426 print_spaces(depth);
427 printk("->");
428 print_lock_name(class);
429 printk(" ops: %lu", class->ops);
430 printk(" {\n");
431
432 for (bit = 0; bit < LOCK_USAGE_STATES; bit++) {
433 if (class->usage_mask & (1 << bit)) {
434 int len = depth;
435
436 print_spaces(depth);
437 len += printk(" %s", usage_str[bit]);
438 len += printk(" at:\n");
439 print_stack_trace(class->usage_traces + bit, len);
440 }
441 }
442 print_spaces(depth);
443 printk(" }\n");
444
445 print_spaces(depth);
446 printk(" ... key at: ");
447 print_ip_sym((unsigned long)class->key);
448}
449
450/*
451 * printk all lock dependencies starting at <entry>:
452 */
453static void print_lock_dependencies(struct lock_class *class, int depth)
454{
455 struct lock_list *entry;
456
457 if (DEBUG_LOCKS_WARN_ON(depth >= 20))
458 return;
459
460 print_lock_class_header(class, depth);
461
462 list_for_each_entry(entry, &class->locks_after, entry) {
463 DEBUG_LOCKS_WARN_ON(!entry->class);
464 print_lock_dependencies(entry->class, depth + 1);
465
466 print_spaces(depth);
467 printk(" ... acquired at:\n");
468 print_stack_trace(&entry->trace, 2);
469 printk("\n");
470 }
471}
472
473/*
474 * Add a new dependency to the head of the list:
475 */
476static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
477 struct list_head *head, unsigned long ip)
478{
479 struct lock_list *entry;
480 /*
481 * Lock not present yet - get a new dependency struct and
482 * add it to the list:
483 */
484 entry = alloc_list_entry();
485 if (!entry)
486 return 0;
487
488 entry->class = this;
489 save_trace(&entry->trace);
490
491 /*
492 * Since we never remove from the dependency list, the list can
493 * be walked lockless by other CPUs, it's only allocation
494 * that must be protected by the spinlock. But this also means
495 * we must make new entries visible only once writes to the
496 * entry become visible - hence the RCU op:
497 */
498 list_add_tail_rcu(&entry->entry, head);
499
500 return 1;
501}
502
503/*
504 * Recursive, forwards-direction lock-dependency checking, used for
505 * both noncyclic checking and for hardirq-unsafe/softirq-unsafe
506 * checking.
507 *
508 * (to keep the stackframe of the recursive functions small we
509 * use these global variables, and we also mark various helper
510 * functions as noinline.)
511 */
512static struct held_lock *check_source, *check_target;
513
514/*
515 * Print a dependency chain entry (this is only done when a deadlock
516 * has been detected):
517 */
518static noinline int
519print_circular_bug_entry(struct lock_list *target, unsigned int depth)
520{
521 if (debug_locks_silent)
522 return 0;
523 printk("\n-> #%u", depth);
524 print_lock_name(target->class);
525 printk(":\n");
526 print_stack_trace(&target->trace, 6);
527
528 return 0;
529}
530
531/*
532 * When a circular dependency is detected, print the
533 * header first:
534 */
535static noinline int
536print_circular_bug_header(struct lock_list *entry, unsigned int depth)
537{
538 struct task_struct *curr = current;
539
540 __raw_spin_unlock(&hash_lock);
541 debug_locks_off();
542 if (debug_locks_silent)
543 return 0;
544
545 printk("\n=======================================================\n");
546 printk( "[ INFO: possible circular locking dependency detected ]\n");
547 printk( "-------------------------------------------------------\n");
548 printk("%s/%d is trying to acquire lock:\n",
549 curr->comm, curr->pid);
550 print_lock(check_source);
551 printk("\nbut task is already holding lock:\n");
552 print_lock(check_target);
553 printk("\nwhich lock already depends on the new lock.\n\n");
554 printk("\nthe existing dependency chain (in reverse order) is:\n");
555
556 print_circular_bug_entry(entry, depth);
557
558 return 0;
559}
560
561static noinline int print_circular_bug_tail(void)
562{
563 struct task_struct *curr = current;
564 struct lock_list this;
565
566 if (debug_locks_silent)
567 return 0;
568
569 this.class = check_source->class;
570 save_trace(&this.trace);
571 print_circular_bug_entry(&this, 0);
572
573 printk("\nother info that might help us debug this:\n\n");
574 lockdep_print_held_locks(curr);
575
576 printk("\nstack backtrace:\n");
577 dump_stack();
578
579 return 0;
580}
581
582static int noinline print_infinite_recursion_bug(void)
583{
584 __raw_spin_unlock(&hash_lock);
585 DEBUG_LOCKS_WARN_ON(1);
586
587 return 0;
588}
589
590/*
591 * Prove that the dependency graph starting at <entry> can not
592 * lead to <target>. Print an error and return 0 if it does.
593 */
594static noinline int
595check_noncircular(struct lock_class *source, unsigned int depth)
596{
597 struct lock_list *entry;
598
599 debug_atomic_inc(&nr_cyclic_check_recursions);
600 if (depth > max_recursion_depth)
601 max_recursion_depth = depth;
602 if (depth >= 20)
603 return print_infinite_recursion_bug();
604 /*
605 * Check this lock's dependency list:
606 */
607 list_for_each_entry(entry, &source->locks_after, entry) {
608 if (entry->class == check_target->class)
609 return print_circular_bug_header(entry, depth+1);
610 debug_atomic_inc(&nr_cyclic_checks);
611 if (!check_noncircular(entry->class, depth+1))
612 return print_circular_bug_entry(entry, depth+1);
613 }
614 return 1;
615}
616
617static int very_verbose(struct lock_class *class)
618{
619#if VERY_VERBOSE
620 return class_filter(class);
621#endif
622 return 0;
623}
624#ifdef CONFIG_TRACE_IRQFLAGS
625
626/*
627 * Forwards and backwards subgraph searching, for the purposes of
628 * proving that two subgraphs can be connected by a new dependency
629 * without creating any illegal irq-safe -> irq-unsafe lock dependency.
630 */
631static enum lock_usage_bit find_usage_bit;
632static struct lock_class *forwards_match, *backwards_match;
633
634/*
635 * Find a node in the forwards-direction dependency sub-graph starting
636 * at <source> that matches <find_usage_bit>.
637 *
638 * Return 2 if such a node exists in the subgraph, and put that node
639 * into <forwards_match>.
640 *
641 * Return 1 otherwise and keep <forwards_match> unchanged.
642 * Return 0 on error.
643 */
644static noinline int
645find_usage_forwards(struct lock_class *source, unsigned int depth)
646{
647 struct lock_list *entry;
648 int ret;
649
650 if (depth > max_recursion_depth)
651 max_recursion_depth = depth;
652 if (depth >= 20)
653 return print_infinite_recursion_bug();
654
655 debug_atomic_inc(&nr_find_usage_forwards_checks);
656 if (source->usage_mask & (1 << find_usage_bit)) {
657 forwards_match = source;
658 return 2;
659 }
660
661 /*
662 * Check this lock's dependency list:
663 */
664 list_for_each_entry(entry, &source->locks_after, entry) {
665 debug_atomic_inc(&nr_find_usage_forwards_recursions);
666 ret = find_usage_forwards(entry->class, depth+1);
667 if (ret == 2 || ret == 0)
668 return ret;
669 }
670 return 1;
671}
672
673/*
674 * Find a node in the backwards-direction dependency sub-graph starting
675 * at <source> that matches <find_usage_bit>.
676 *
677 * Return 2 if such a node exists in the subgraph, and put that node
678 * into <backwards_match>.
679 *
680 * Return 1 otherwise and keep <backwards_match> unchanged.
681 * Return 0 on error.
682 */
683static noinline int
684find_usage_backwards(struct lock_class *source, unsigned int depth)
685{
686 struct lock_list *entry;
687 int ret;
688
689 if (depth > max_recursion_depth)
690 max_recursion_depth = depth;
691 if (depth >= 20)
692 return print_infinite_recursion_bug();
693
694 debug_atomic_inc(&nr_find_usage_backwards_checks);
695 if (source->usage_mask & (1 << find_usage_bit)) {
696 backwards_match = source;
697 return 2;
698 }
699
700 /*
701 * Check this lock's dependency list:
702 */
703 list_for_each_entry(entry, &source->locks_before, entry) {
704 debug_atomic_inc(&nr_find_usage_backwards_recursions);
705 ret = find_usage_backwards(entry->class, depth+1);
706 if (ret == 2 || ret == 0)
707 return ret;
708 }
709 return 1;
710}
711
712static int
713print_bad_irq_dependency(struct task_struct *curr,
714 struct held_lock *prev,
715 struct held_lock *next,
716 enum lock_usage_bit bit1,
717 enum lock_usage_bit bit2,
718 const char *irqclass)
719{
720 __raw_spin_unlock(&hash_lock);
721 debug_locks_off();
722 if (debug_locks_silent)
723 return 0;
724
725 printk("\n======================================================\n");
726 printk( "[ INFO: %s-safe -> %s-unsafe lock order detected ]\n",
727 irqclass, irqclass);
728 printk( "------------------------------------------------------\n");
729 printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
730 curr->comm, curr->pid,
731 curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT,
732 curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT,
733 curr->hardirqs_enabled,
734 curr->softirqs_enabled);
735 print_lock(next);
736
737 printk("\nand this task is already holding:\n");
738 print_lock(prev);
739 printk("which would create a new lock dependency:\n");
740 print_lock_name(prev->class);
741 printk(" ->");
742 print_lock_name(next->class);
743 printk("\n");
744
745 printk("\nbut this new dependency connects a %s-irq-safe lock:\n",
746 irqclass);
747 print_lock_name(backwards_match);
748 printk("\n... which became %s-irq-safe at:\n", irqclass);
749
750 print_stack_trace(backwards_match->usage_traces + bit1, 1);
751
752 printk("\nto a %s-irq-unsafe lock:\n", irqclass);
753 print_lock_name(forwards_match);
754 printk("\n... which became %s-irq-unsafe at:\n", irqclass);
755 printk("...");
756
757 print_stack_trace(forwards_match->usage_traces + bit2, 1);
758
759 printk("\nother info that might help us debug this:\n\n");
760 lockdep_print_held_locks(curr);
761
762 printk("\nthe %s-irq-safe lock's dependencies:\n", irqclass);
763 print_lock_dependencies(backwards_match, 0);
764
765 printk("\nthe %s-irq-unsafe lock's dependencies:\n", irqclass);
766 print_lock_dependencies(forwards_match, 0);
767
768 printk("\nstack backtrace:\n");
769 dump_stack();
770
771 return 0;
772}
773
774static int
775check_usage(struct task_struct *curr, struct held_lock *prev,
776 struct held_lock *next, enum lock_usage_bit bit_backwards,
777 enum lock_usage_bit bit_forwards, const char *irqclass)
778{
779 int ret;
780
781 find_usage_bit = bit_backwards;
782 /* fills in <backwards_match> */
783 ret = find_usage_backwards(prev->class, 0);
784 if (!ret || ret == 1)
785 return ret;
786
787 find_usage_bit = bit_forwards;
788 ret = find_usage_forwards(next->class, 0);
789 if (!ret || ret == 1)
790 return ret;
791 /* ret == 2 */
792 return print_bad_irq_dependency(curr, prev, next,
793 bit_backwards, bit_forwards, irqclass);
794}
795
796#endif
797
798static int
799print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
800 struct held_lock *next)
801{
802 debug_locks_off();
803 __raw_spin_unlock(&hash_lock);
804 if (debug_locks_silent)
805 return 0;
806
807 printk("\n=============================================\n");
808 printk( "[ INFO: possible recursive locking detected ]\n");
809 printk( "---------------------------------------------\n");
810 printk("%s/%d is trying to acquire lock:\n",
811 curr->comm, curr->pid);
812 print_lock(next);
813 printk("\nbut task is already holding lock:\n");
814 print_lock(prev);
815
816 printk("\nother info that might help us debug this:\n");
817 lockdep_print_held_locks(curr);
818
819 printk("\nstack backtrace:\n");
820 dump_stack();
821
822 return 0;
823}
824
825/*
826 * Check whether we are holding such a class already.
827 *
828 * (Note that this has to be done separately, because the graph cannot
829 * detect such classes of deadlocks.)
830 *
831 * Returns: 0 on deadlock detected, 1 on OK, 2 on recursive read
832 */
833static int
834check_deadlock(struct task_struct *curr, struct held_lock *next,
835 struct lockdep_map *next_instance, int read)
836{
837 struct held_lock *prev;
838 int i;
839
840 for (i = 0; i < curr->lockdep_depth; i++) {
841 prev = curr->held_locks + i;
842 if (prev->class != next->class)
843 continue;
844 /*
845 * Allow read-after-read recursion of the same
846 * lock class (i.e. read_lock(lock)+read_lock(lock)):
847 */
848 if ((read == 2) && prev->read)
849 return 2;
850 return print_deadlock_bug(curr, prev, next);
851 }
852 return 1;
853}
854
855/*
856 * There was a chain-cache miss, and we are about to add a new dependency
857 * to a previous lock. We recursively validate the following rules:
858 *
859 * - would the adding of the <prev> -> <next> dependency create a
860 * circular dependency in the graph? [== circular deadlock]
861 *
862 * - does the new prev->next dependency connect any hardirq-safe lock
863 * (in the full backwards-subgraph starting at <prev>) with any
864 * hardirq-unsafe lock (in the full forwards-subgraph starting at
865 * <next>)? [== illegal lock inversion with hardirq contexts]
866 *
867 * - does the new prev->next dependency connect any softirq-safe lock
868 * (in the full backwards-subgraph starting at <prev>) with any
869 * softirq-unsafe lock (in the full forwards-subgraph starting at
870 * <next>)? [== illegal lock inversion with softirq contexts]
871 *
872 * any of these scenarios could lead to a deadlock.
873 *
874 * Then if all the validations pass, we add the forwards and backwards
875 * dependency.
876 */
877static int
878check_prev_add(struct task_struct *curr, struct held_lock *prev,
879 struct held_lock *next)
880{
881 struct lock_list *entry;
882 int ret;
883
884 /*
885 * Prove that the new <prev> -> <next> dependency would not
886 * create a circular dependency in the graph. (We do this by
887 * forward-recursing into the graph starting at <next>, and
888 * checking whether we can reach <prev>.)
889 *
890 * We are using global variables to control the recursion, to
891 * keep the stackframe size of the recursive functions low:
892 */
893 check_source = next;
894 check_target = prev;
895 if (!(check_noncircular(next->class, 0)))
896 return print_circular_bug_tail();
897
898#ifdef CONFIG_TRACE_IRQFLAGS
899 /*
900 * Prove that the new dependency does not connect a hardirq-safe
901 * lock with a hardirq-unsafe lock - to achieve this we search
902 * the backwards-subgraph starting at <prev>, and the
903 * forwards-subgraph starting at <next>:
904 */
905 if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ,
906 LOCK_ENABLED_HARDIRQS, "hard"))
907 return 0;
908
909 /*
910 * Prove that the new dependency does not connect a hardirq-safe-read
911 * lock with a hardirq-unsafe lock - to achieve this we search
912 * the backwards-subgraph starting at <prev>, and the
913 * forwards-subgraph starting at <next>:
914 */
915 if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ_READ,
916 LOCK_ENABLED_HARDIRQS, "hard-read"))
917 return 0;
918
919 /*
920 * Prove that the new dependency does not connect a softirq-safe
921 * lock with a softirq-unsafe lock - to achieve this we search
922 * the backwards-subgraph starting at <prev>, and the
923 * forwards-subgraph starting at <next>:
924 */
925 if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ,
926 LOCK_ENABLED_SOFTIRQS, "soft"))
927 return 0;
928 /*
929 * Prove that the new dependency does not connect a softirq-safe-read
930 * lock with a softirq-unsafe lock - to achieve this we search
931 * the backwards-subgraph starting at <prev>, and the
932 * forwards-subgraph starting at <next>:
933 */
934 if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ_READ,
935 LOCK_ENABLED_SOFTIRQS, "soft"))
936 return 0;
937#endif
938 /*
939 * For recursive read-locks we do all the dependency checks,
940 * but we dont store read-triggered dependencies (only
941 * write-triggered dependencies). This ensures that only the
942 * write-side dependencies matter, and that if for example a
943 * write-lock never takes any other locks, then the reads are
944 * equivalent to a NOP.
945 */
946 if (next->read == 2 || prev->read == 2)
947 return 1;
948 /*
949 * Is the <prev> -> <next> dependency already present?
950 *
951 * (this may occur even though this is a new chain: consider
952 * e.g. the L1 -> L2 -> L3 -> L4 and the L5 -> L1 -> L2 -> L3
953 * chains - the second one will be new, but L1 already has
954 * L2 added to its dependency list, due to the first chain.)
955 */
956 list_for_each_entry(entry, &prev->class->locks_after, entry) {
957 if (entry->class == next->class)
958 return 2;
959 }
960
961 /*
962 * Ok, all validations passed, add the new lock
963 * to the previous lock's dependency list:
964 */
965 ret = add_lock_to_list(prev->class, next->class,
966 &prev->class->locks_after, next->acquire_ip);
967 if (!ret)
968 return 0;
969 /*
970 * Return value of 2 signals 'dependency already added',
971 * in that case we dont have to add the backlink either.
972 */
973 if (ret == 2)
974 return 2;
975 ret = add_lock_to_list(next->class, prev->class,
976 &next->class->locks_before, next->acquire_ip);
977
978 /*
979 * Debugging printouts:
980 */
981 if (verbose(prev->class) || verbose(next->class)) {
982 __raw_spin_unlock(&hash_lock);
983 printk("\n new dependency: ");
984 print_lock_name(prev->class);
985 printk(" => ");
986 print_lock_name(next->class);
987 printk("\n");
988 dump_stack();
989 __raw_spin_lock(&hash_lock);
990 }
991 return 1;
992}
993
994/*
995 * Add the dependency to all directly-previous locks that are 'relevant'.
996 * The ones that are relevant are (in increasing distance from curr):
997 * all consecutive trylock entries and the final non-trylock entry - or
998 * the end of this context's lock-chain - whichever comes first.
999 */
1000static int
1001check_prevs_add(struct task_struct *curr, struct held_lock *next)
1002{
1003 int depth = curr->lockdep_depth;
1004 struct held_lock *hlock;
1005
1006 /*
1007 * Debugging checks.
1008 *
1009 * Depth must not be zero for a non-head lock:
1010 */
1011 if (!depth)
1012 goto out_bug;
1013 /*
1014 * At least two relevant locks must exist for this
1015 * to be a head:
1016 */
1017 if (curr->held_locks[depth].irq_context !=
1018 curr->held_locks[depth-1].irq_context)
1019 goto out_bug;
1020
1021 for (;;) {
1022 hlock = curr->held_locks + depth-1;
1023 /*
1024 * Only non-recursive-read entries get new dependencies
1025 * added:
1026 */
1027 if (hlock->read != 2) {
1028 check_prev_add(curr, hlock, next);
1029 /*
1030 * Stop after the first non-trylock entry,
1031 * as non-trylock entries have added their
1032 * own direct dependencies already, so this
1033 * lock is connected to them indirectly:
1034 */
1035 if (!hlock->trylock)
1036 break;
1037 }
1038 depth--;
1039 /*
1040 * End of lock-stack?
1041 */
1042 if (!depth)
1043 break;
1044 /*
1045 * Stop the search if we cross into another context:
1046 */
1047 if (curr->held_locks[depth].irq_context !=
1048 curr->held_locks[depth-1].irq_context)
1049 break;
1050 }
1051 return 1;
1052out_bug:
1053 __raw_spin_unlock(&hash_lock);
1054 DEBUG_LOCKS_WARN_ON(1);
1055
1056 return 0;
1057}
1058
1059
1060/*
1061 * Is this the address of a static object:
1062 */
1063static int static_obj(void *obj)
1064{
1065 unsigned long start = (unsigned long) &_stext,
1066 end = (unsigned long) &_end,
1067 addr = (unsigned long) obj;
1068#ifdef CONFIG_SMP
1069 int i;
1070#endif
1071
1072 /*
1073 * static variable?
1074 */
1075 if ((addr >= start) && (addr < end))
1076 return 1;
1077
1078#ifdef CONFIG_SMP
1079 /*
1080 * percpu var?
1081 */
1082 for_each_possible_cpu(i) {
1083 start = (unsigned long) &__per_cpu_start + per_cpu_offset(i);
1084 end = (unsigned long) &__per_cpu_end + per_cpu_offset(i);
1085
1086 if ((addr >= start) && (addr < end))
1087 return 1;
1088 }
1089#endif
1090
1091 /*
1092 * module var?
1093 */
1094 return is_module_address(addr);
1095}
1096
1097/*
1098 * To make lock name printouts unique, we calculate a unique
1099 * class->name_version generation counter:
1100 */
1101static int count_matching_names(struct lock_class *new_class)
1102{
1103 struct lock_class *class;
1104 int count = 0;
1105
1106 if (!new_class->name)
1107 return 0;
1108
1109 list_for_each_entry(class, &all_lock_classes, lock_entry) {
1110 if (new_class->key - new_class->subclass == class->key)
1111 return class->name_version;
1112 if (class->name && !strcmp(class->name, new_class->name))
1113 count = max(count, class->name_version);
1114 }
1115
1116 return count + 1;
1117}
1118
1119extern void __error_too_big_MAX_LOCKDEP_SUBCLASSES(void);
1120
1121/*
1122 * Register a lock's class in the hash-table, if the class is not present
1123 * yet. Otherwise we look it up. We cache the result in the lock object
1124 * itself, so actual lookup of the hash should be once per lock object.
1125 */
1126static inline struct lock_class *
1127register_lock_class(struct lockdep_map *lock, unsigned int subclass)
1128{
1129 struct lockdep_subclass_key *key;
1130 struct list_head *hash_head;
1131 struct lock_class *class;
1132
1133#ifdef CONFIG_DEBUG_LOCKDEP
1134 /*
1135 * If the architecture calls into lockdep before initializing
1136 * the hashes then we'll warn about it later. (we cannot printk
1137 * right now)
1138 */
1139 if (unlikely(!lockdep_initialized)) {
1140 lockdep_init();
1141 lockdep_init_error = 1;
1142 }
1143#endif
1144
1145 /*
1146 * Static locks do not have their class-keys yet - for them the key
1147 * is the lock object itself:
1148 */
1149 if (unlikely(!lock->key))
1150 lock->key = (void *)lock;
1151
1152 /*
1153 * NOTE: the class-key must be unique. For dynamic locks, a static
1154 * lock_class_key variable is passed in through the mutex_init()
1155 * (or spin_lock_init()) call - which acts as the key. For static
1156 * locks we use the lock object itself as the key.
1157 */
1158 if (sizeof(struct lock_class_key) > sizeof(struct lock_class))
1159 __error_too_big_MAX_LOCKDEP_SUBCLASSES();
1160
1161 key = lock->key->subkeys + subclass;
1162
1163 hash_head = classhashentry(key);
1164
1165 /*
1166 * We can walk the hash lockfree, because the hash only
1167 * grows, and we are careful when adding entries to the end:
1168 */
1169 list_for_each_entry(class, hash_head, hash_entry)
1170 if (class->key == key)
1171 goto out_set;
1172
1173 /*
1174 * Debug-check: all keys must be persistent!
1175 */
1176 if (!static_obj(lock->key)) {
1177 debug_locks_off();
1178 printk("INFO: trying to register non-static key.\n");
1179 printk("the code is fine but needs lockdep annotation.\n");
1180 printk("turning off the locking correctness validator.\n");
1181 dump_stack();
1182
1183 return NULL;
1184 }
1185
1186 __raw_spin_lock(&hash_lock);
1187 /*
1188 * We have to do the hash-walk again, to avoid races
1189 * with another CPU:
1190 */
1191 list_for_each_entry(class, hash_head, hash_entry)
1192 if (class->key == key)
1193 goto out_unlock_set;
1194 /*
1195 * Allocate a new key from the static array, and add it to
1196 * the hash:
1197 */
1198 if (nr_lock_classes >= MAX_LOCKDEP_KEYS) {
1199 __raw_spin_unlock(&hash_lock);
1200 debug_locks_off();
1201 printk("BUG: MAX_LOCKDEP_KEYS too low!\n");
1202 printk("turning off the locking correctness validator.\n");
1203 return NULL;
1204 }
1205 class = lock_classes + nr_lock_classes++;
1206 debug_atomic_inc(&nr_unused_locks);
1207 class->key = key;
1208 class->name = lock->name;
1209 class->subclass = subclass;
1210 INIT_LIST_HEAD(&class->lock_entry);
1211 INIT_LIST_HEAD(&class->locks_before);
1212 INIT_LIST_HEAD(&class->locks_after);
1213 class->name_version = count_matching_names(class);
1214 /*
1215 * We use RCU's safe list-add method to make
1216 * parallel walking of the hash-list safe:
1217 */
1218 list_add_tail_rcu(&class->hash_entry, hash_head);
1219
1220 if (verbose(class)) {
1221 __raw_spin_unlock(&hash_lock);
1222 printk("\nnew class %p: %s", class->key, class->name);
1223 if (class->name_version > 1)
1224 printk("#%d", class->name_version);
1225 printk("\n");
1226 dump_stack();
1227 __raw_spin_lock(&hash_lock);
1228 }
1229out_unlock_set:
1230 __raw_spin_unlock(&hash_lock);
1231
1232out_set:
1233 lock->class[subclass] = class;
1234
1235 DEBUG_LOCKS_WARN_ON(class->subclass != subclass);
1236
1237 return class;
1238}
1239
1240/*
1241 * Look up a dependency chain. If the key is not present yet then
1242 * add it and return 0 - in this case the new dependency chain is
1243 * validated. If the key is already hashed, return 1.
1244 */
1245static inline int lookup_chain_cache(u64 chain_key)
1246{
1247 struct list_head *hash_head = chainhashentry(chain_key);
1248 struct lock_chain *chain;
1249
1250 DEBUG_LOCKS_WARN_ON(!irqs_disabled());
1251 /*
1252 * We can walk it lock-free, because entries only get added
1253 * to the hash:
1254 */
1255 list_for_each_entry(chain, hash_head, entry) {
1256 if (chain->chain_key == chain_key) {
1257cache_hit:
1258 debug_atomic_inc(&chain_lookup_hits);
1259 /*
1260 * In the debugging case, force redundant checking
1261 * by returning 1:
1262 */
1263#ifdef CONFIG_DEBUG_LOCKDEP
1264 __raw_spin_lock(&hash_lock);
1265 return 1;
1266#endif
1267 return 0;
1268 }
1269 }
1270 /*
1271 * Allocate a new chain entry from the static array, and add
1272 * it to the hash:
1273 */
1274 __raw_spin_lock(&hash_lock);
1275 /*
1276 * We have to walk the chain again locked - to avoid duplicates:
1277 */
1278 list_for_each_entry(chain, hash_head, entry) {
1279 if (chain->chain_key == chain_key) {
1280 __raw_spin_unlock(&hash_lock);
1281 goto cache_hit;
1282 }
1283 }
1284 if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) {
1285 __raw_spin_unlock(&hash_lock);
1286 debug_locks_off();
1287 printk("BUG: MAX_LOCKDEP_CHAINS too low!\n");
1288 printk("turning off the locking correctness validator.\n");
1289 return 0;
1290 }
1291 chain = lock_chains + nr_lock_chains++;
1292 chain->chain_key = chain_key;
1293 list_add_tail_rcu(&chain->entry, hash_head);
1294 debug_atomic_inc(&chain_lookup_misses);
1295#ifdef CONFIG_TRACE_IRQFLAGS
1296 if (current->hardirq_context)
1297 nr_hardirq_chains++;
1298 else {
1299 if (current->softirq_context)
1300 nr_softirq_chains++;
1301 else
1302 nr_process_chains++;
1303 }
1304#else
1305 nr_process_chains++;
1306#endif
1307
1308 return 1;
1309}
1310
1311/*
1312 * We are building curr_chain_key incrementally, so double-check
1313 * it from scratch, to make sure that it's done correctly:
1314 */
1315static void check_chain_key(struct task_struct *curr)
1316{
1317#ifdef CONFIG_DEBUG_LOCKDEP
1318 struct held_lock *hlock, *prev_hlock = NULL;
1319 unsigned int i, id;
1320 u64 chain_key = 0;
1321
1322 for (i = 0; i < curr->lockdep_depth; i++) {
1323 hlock = curr->held_locks + i;
1324 if (chain_key != hlock->prev_chain_key) {
1325 debug_locks_off();
1326 printk("hm#1, depth: %u [%u], %016Lx != %016Lx\n",
1327 curr->lockdep_depth, i,
1328 (unsigned long long)chain_key,
1329 (unsigned long long)hlock->prev_chain_key);
1330 WARN_ON(1);
1331 return;
1332 }
1333 id = hlock->class - lock_classes;
1334 DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS);
1335 if (prev_hlock && (prev_hlock->irq_context !=
1336 hlock->irq_context))
1337 chain_key = 0;
1338 chain_key = iterate_chain_key(chain_key, id);
1339 prev_hlock = hlock;
1340 }
1341 if (chain_key != curr->curr_chain_key) {
1342 debug_locks_off();
1343 printk("hm#2, depth: %u [%u], %016Lx != %016Lx\n",
1344 curr->lockdep_depth, i,
1345 (unsigned long long)chain_key,
1346 (unsigned long long)curr->curr_chain_key);
1347 WARN_ON(1);
1348 }
1349#endif
1350}
1351
1352#ifdef CONFIG_TRACE_IRQFLAGS
1353
1354/*
1355 * print irq inversion bug:
1356 */
1357static int
1358print_irq_inversion_bug(struct task_struct *curr, struct lock_class *other,
1359 struct held_lock *this, int forwards,
1360 const char *irqclass)
1361{
1362 __raw_spin_unlock(&hash_lock);
1363 debug_locks_off();
1364 if (debug_locks_silent)
1365 return 0;
1366
1367 printk("\n=========================================================\n");
1368 printk( "[ INFO: possible irq lock inversion dependency detected ]\n");
1369 printk( "---------------------------------------------------------\n");
1370 printk("%s/%d just changed the state of lock:\n",
1371 curr->comm, curr->pid);
1372 print_lock(this);
1373 if (forwards)
1374 printk("but this lock took another, %s-irq-unsafe lock in the past:\n", irqclass);
1375 else
1376 printk("but this lock was taken by another, %s-irq-safe lock in the past:\n", irqclass);
1377 print_lock_name(other);
1378 printk("\n\nand interrupts could create inverse lock ordering between them.\n\n");
1379
1380 printk("\nother info that might help us debug this:\n");
1381 lockdep_print_held_locks(curr);
1382
1383 printk("\nthe first lock's dependencies:\n");
1384 print_lock_dependencies(this->class, 0);
1385
1386 printk("\nthe second lock's dependencies:\n");
1387 print_lock_dependencies(other, 0);
1388
1389 printk("\nstack backtrace:\n");
1390 dump_stack();
1391
1392 return 0;
1393}
1394
1395/*
1396 * Prove that in the forwards-direction subgraph starting at <this>
1397 * there is no lock matching <mask>:
1398 */
1399static int
1400check_usage_forwards(struct task_struct *curr, struct held_lock *this,
1401 enum lock_usage_bit bit, const char *irqclass)
1402{
1403 int ret;
1404
1405 find_usage_bit = bit;
1406 /* fills in <forwards_match> */
1407 ret = find_usage_forwards(this->class, 0);
1408 if (!ret || ret == 1)
1409 return ret;
1410
1411 return print_irq_inversion_bug(curr, forwards_match, this, 1, irqclass);
1412}
1413
1414/*
1415 * Prove that in the backwards-direction subgraph starting at <this>
1416 * there is no lock matching <mask>:
1417 */
1418static int
1419check_usage_backwards(struct task_struct *curr, struct held_lock *this,
1420 enum lock_usage_bit bit, const char *irqclass)
1421{
1422 int ret;
1423
1424 find_usage_bit = bit;
1425 /* fills in <backwards_match> */
1426 ret = find_usage_backwards(this->class, 0);
1427 if (!ret || ret == 1)
1428 return ret;
1429
1430 return print_irq_inversion_bug(curr, backwards_match, this, 0, irqclass);
1431}
1432
1433static inline void print_irqtrace_events(struct task_struct *curr)
1434{
1435 printk("irq event stamp: %u\n", curr->irq_events);
1436 printk("hardirqs last enabled at (%u): ", curr->hardirq_enable_event);
1437 print_ip_sym(curr->hardirq_enable_ip);
1438 printk("hardirqs last disabled at (%u): ", curr->hardirq_disable_event);
1439 print_ip_sym(curr->hardirq_disable_ip);
1440 printk("softirqs last enabled at (%u): ", curr->softirq_enable_event);
1441 print_ip_sym(curr->softirq_enable_ip);
1442 printk("softirqs last disabled at (%u): ", curr->softirq_disable_event);
1443 print_ip_sym(curr->softirq_disable_ip);
1444}
1445
1446#else
1447static inline void print_irqtrace_events(struct task_struct *curr)
1448{
1449}
1450#endif
1451
1452static int
1453print_usage_bug(struct task_struct *curr, struct held_lock *this,
1454 enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit)
1455{
1456 __raw_spin_unlock(&hash_lock);
1457 debug_locks_off();
1458 if (debug_locks_silent)
1459 return 0;
1460
1461 printk("\n=================================\n");
1462 printk( "[ INFO: inconsistent lock state ]\n");
1463 printk( "---------------------------------\n");
1464
1465 printk("inconsistent {%s} -> {%s} usage.\n",
1466 usage_str[prev_bit], usage_str[new_bit]);
1467
1468 printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
1469 curr->comm, curr->pid,
1470 trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
1471 trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT,
1472 trace_hardirqs_enabled(curr),
1473 trace_softirqs_enabled(curr));
1474 print_lock(this);
1475
1476 printk("{%s} state was registered at:\n", usage_str[prev_bit]);
1477 print_stack_trace(this->class->usage_traces + prev_bit, 1);
1478
1479 print_irqtrace_events(curr);
1480 printk("\nother info that might help us debug this:\n");
1481 lockdep_print_held_locks(curr);
1482
1483 printk("\nstack backtrace:\n");
1484 dump_stack();
1485
1486 return 0;
1487}
1488
1489/*
1490 * Print out an error if an invalid bit is set:
1491 */
1492static inline int
1493valid_state(struct task_struct *curr, struct held_lock *this,
1494 enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit)
1495{
1496 if (unlikely(this->class->usage_mask & (1 << bad_bit)))
1497 return print_usage_bug(curr, this, bad_bit, new_bit);
1498 return 1;
1499}
1500
1501#define STRICT_READ_CHECKS 1
1502
1503/*
1504 * Mark a lock with a usage bit, and validate the state transition:
1505 */
1506static int mark_lock(struct task_struct *curr, struct held_lock *this,
1507 enum lock_usage_bit new_bit, unsigned long ip)
1508{
1509 unsigned int new_mask = 1 << new_bit, ret = 1;
1510
1511 /*
1512 * If already set then do not dirty the cacheline,
1513 * nor do any checks:
1514 */
1515 if (likely(this->class->usage_mask & new_mask))
1516 return 1;
1517
1518 __raw_spin_lock(&hash_lock);
1519 /*
1520 * Make sure we didnt race:
1521 */
1522 if (unlikely(this->class->usage_mask & new_mask)) {
1523 __raw_spin_unlock(&hash_lock);
1524 return 1;
1525 }
1526
1527 this->class->usage_mask |= new_mask;
1528
1529#ifdef CONFIG_TRACE_IRQFLAGS
1530 if (new_bit == LOCK_ENABLED_HARDIRQS ||
1531 new_bit == LOCK_ENABLED_HARDIRQS_READ)
1532 ip = curr->hardirq_enable_ip;
1533 else if (new_bit == LOCK_ENABLED_SOFTIRQS ||
1534 new_bit == LOCK_ENABLED_SOFTIRQS_READ)
1535 ip = curr->softirq_enable_ip;
1536#endif
1537 if (!save_trace(this->class->usage_traces + new_bit))
1538 return 0;
1539
1540 switch (new_bit) {
1541#ifdef CONFIG_TRACE_IRQFLAGS
1542 case LOCK_USED_IN_HARDIRQ:
1543 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS))
1544 return 0;
1545 if (!valid_state(curr, this, new_bit,
1546 LOCK_ENABLED_HARDIRQS_READ))
1547 return 0;
1548 /*
1549 * just marked it hardirq-safe, check that this lock
1550 * took no hardirq-unsafe lock in the past:
1551 */
1552 if (!check_usage_forwards(curr, this,
1553 LOCK_ENABLED_HARDIRQS, "hard"))
1554 return 0;
1555#if STRICT_READ_CHECKS
1556 /*
1557 * just marked it hardirq-safe, check that this lock
1558 * took no hardirq-unsafe-read lock in the past:
1559 */
1560 if (!check_usage_forwards(curr, this,
1561 LOCK_ENABLED_HARDIRQS_READ, "hard-read"))
1562 return 0;
1563#endif
1564 if (hardirq_verbose(this->class))
1565 ret = 2;
1566 break;
1567 case LOCK_USED_IN_SOFTIRQ:
1568 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS))
1569 return 0;
1570 if (!valid_state(curr, this, new_bit,
1571 LOCK_ENABLED_SOFTIRQS_READ))
1572 return 0;
1573 /*
1574 * just marked it softirq-safe, check that this lock
1575 * took no softirq-unsafe lock in the past:
1576 */
1577 if (!check_usage_forwards(curr, this,
1578 LOCK_ENABLED_SOFTIRQS, "soft"))
1579 return 0;
1580#if STRICT_READ_CHECKS
1581 /*
1582 * just marked it softirq-safe, check that this lock
1583 * took no softirq-unsafe-read lock in the past:
1584 */
1585 if (!check_usage_forwards(curr, this,
1586 LOCK_ENABLED_SOFTIRQS_READ, "soft-read"))
1587 return 0;
1588#endif
1589 if (softirq_verbose(this->class))
1590 ret = 2;
1591 break;
1592 case LOCK_USED_IN_HARDIRQ_READ:
1593 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS))
1594 return 0;
1595 /*
1596 * just marked it hardirq-read-safe, check that this lock
1597 * took no hardirq-unsafe lock in the past:
1598 */
1599 if (!check_usage_forwards(curr, this,
1600 LOCK_ENABLED_HARDIRQS, "hard"))
1601 return 0;
1602 if (hardirq_verbose(this->class))
1603 ret = 2;
1604 break;
1605 case LOCK_USED_IN_SOFTIRQ_READ:
1606 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS))
1607 return 0;
1608 /*
1609 * just marked it softirq-read-safe, check that this lock
1610 * took no softirq-unsafe lock in the past:
1611 */
1612 if (!check_usage_forwards(curr, this,
1613 LOCK_ENABLED_SOFTIRQS, "soft"))
1614 return 0;
1615 if (softirq_verbose(this->class))
1616 ret = 2;
1617 break;
1618 case LOCK_ENABLED_HARDIRQS:
1619 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ))
1620 return 0;
1621 if (!valid_state(curr, this, new_bit,
1622 LOCK_USED_IN_HARDIRQ_READ))
1623 return 0;
1624 /*
1625 * just marked it hardirq-unsafe, check that no hardirq-safe
1626 * lock in the system ever took it in the past:
1627 */
1628 if (!check_usage_backwards(curr, this,
1629 LOCK_USED_IN_HARDIRQ, "hard"))
1630 return 0;
1631#if STRICT_READ_CHECKS
1632 /*
1633 * just marked it hardirq-unsafe, check that no
1634 * hardirq-safe-read lock in the system ever took
1635 * it in the past:
1636 */
1637 if (!check_usage_backwards(curr, this,
1638 LOCK_USED_IN_HARDIRQ_READ, "hard-read"))
1639 return 0;
1640#endif
1641 if (hardirq_verbose(this->class))
1642 ret = 2;
1643 break;
1644 case LOCK_ENABLED_SOFTIRQS:
1645 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ))
1646 return 0;
1647 if (!valid_state(curr, this, new_bit,
1648 LOCK_USED_IN_SOFTIRQ_READ))
1649 return 0;
1650 /*
1651 * just marked it softirq-unsafe, check that no softirq-safe
1652 * lock in the system ever took it in the past:
1653 */
1654 if (!check_usage_backwards(curr, this,
1655 LOCK_USED_IN_SOFTIRQ, "soft"))
1656 return 0;
1657#if STRICT_READ_CHECKS
1658 /*
1659 * just marked it softirq-unsafe, check that no
1660 * softirq-safe-read lock in the system ever took
1661 * it in the past:
1662 */
1663 if (!check_usage_backwards(curr, this,
1664 LOCK_USED_IN_SOFTIRQ_READ, "soft-read"))
1665 return 0;
1666#endif
1667 if (softirq_verbose(this->class))
1668 ret = 2;
1669 break;
1670 case LOCK_ENABLED_HARDIRQS_READ:
1671 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ))
1672 return 0;
1673#if STRICT_READ_CHECKS
1674 /*
1675 * just marked it hardirq-read-unsafe, check that no
1676 * hardirq-safe lock in the system ever took it in the past:
1677 */
1678 if (!check_usage_backwards(curr, this,
1679 LOCK_USED_IN_HARDIRQ, "hard"))
1680 return 0;
1681#endif
1682 if (hardirq_verbose(this->class))
1683 ret = 2;
1684 break;
1685 case LOCK_ENABLED_SOFTIRQS_READ:
1686 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ))
1687 return 0;
1688#if STRICT_READ_CHECKS
1689 /*
1690 * just marked it softirq-read-unsafe, check that no
1691 * softirq-safe lock in the system ever took it in the past:
1692 */
1693 if (!check_usage_backwards(curr, this,
1694 LOCK_USED_IN_SOFTIRQ, "soft"))
1695 return 0;
1696#endif
1697 if (softirq_verbose(this->class))
1698 ret = 2;
1699 break;
1700#endif
1701 case LOCK_USED:
1702 /*
1703 * Add it to the global list of classes:
1704 */
1705 list_add_tail_rcu(&this->class->lock_entry, &all_lock_classes);
1706 debug_atomic_dec(&nr_unused_locks);
1707 break;
1708 default:
1709 debug_locks_off();
1710 WARN_ON(1);
1711 return 0;
1712 }
1713
1714 __raw_spin_unlock(&hash_lock);
1715
1716 /*
1717 * We must printk outside of the hash_lock:
1718 */
1719 if (ret == 2) {
1720 printk("\nmarked lock as {%s}:\n", usage_str[new_bit]);
1721 print_lock(this);
1722 print_irqtrace_events(curr);
1723 dump_stack();
1724 }
1725
1726 return ret;
1727}
1728
1729#ifdef CONFIG_TRACE_IRQFLAGS
1730/*
1731 * Mark all held locks with a usage bit:
1732 */
1733static int
1734mark_held_locks(struct task_struct *curr, int hardirq, unsigned long ip)
1735{
1736 enum lock_usage_bit usage_bit;
1737 struct held_lock *hlock;
1738 int i;
1739
1740 for (i = 0; i < curr->lockdep_depth; i++) {
1741 hlock = curr->held_locks + i;
1742
1743 if (hardirq) {
1744 if (hlock->read)
1745 usage_bit = LOCK_ENABLED_HARDIRQS_READ;
1746 else
1747 usage_bit = LOCK_ENABLED_HARDIRQS;
1748 } else {
1749 if (hlock->read)
1750 usage_bit = LOCK_ENABLED_SOFTIRQS_READ;
1751 else
1752 usage_bit = LOCK_ENABLED_SOFTIRQS;
1753 }
1754 if (!mark_lock(curr, hlock, usage_bit, ip))
1755 return 0;
1756 }
1757
1758 return 1;
1759}
1760
1761/*
1762 * Debugging helper: via this flag we know that we are in
1763 * 'early bootup code', and will warn about any invalid irqs-on event:
1764 */
1765static int early_boot_irqs_enabled;
1766
1767void early_boot_irqs_off(void)
1768{
1769 early_boot_irqs_enabled = 0;
1770}
1771
1772void early_boot_irqs_on(void)
1773{
1774 early_boot_irqs_enabled = 1;
1775}
1776
1777/*
1778 * Hardirqs will be enabled:
1779 */
1780void trace_hardirqs_on(void)
1781{
1782 struct task_struct *curr = current;
1783 unsigned long ip;
1784
1785 if (unlikely(!debug_locks || current->lockdep_recursion))
1786 return;
1787
1788 if (DEBUG_LOCKS_WARN_ON(unlikely(!early_boot_irqs_enabled)))
1789 return;
1790
1791 if (unlikely(curr->hardirqs_enabled)) {
1792 debug_atomic_inc(&redundant_hardirqs_on);
1793 return;
1794 }
1795 /* we'll do an OFF -> ON transition: */
1796 curr->hardirqs_enabled = 1;
1797 ip = (unsigned long) __builtin_return_address(0);
1798
1799 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
1800 return;
1801 if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
1802 return;
1803 /*
1804 * We are going to turn hardirqs on, so set the
1805 * usage bit for all held locks:
1806 */
1807 if (!mark_held_locks(curr, 1, ip))
1808 return;
1809 /*
1810 * If we have softirqs enabled, then set the usage
1811 * bit for all held locks. (disabled hardirqs prevented
1812 * this bit from being set before)
1813 */
1814 if (curr->softirqs_enabled)
1815 if (!mark_held_locks(curr, 0, ip))
1816 return;
1817
1818 curr->hardirq_enable_ip = ip;
1819 curr->hardirq_enable_event = ++curr->irq_events;
1820 debug_atomic_inc(&hardirqs_on_events);
1821}
1822
1823EXPORT_SYMBOL(trace_hardirqs_on);
1824
1825/*
1826 * Hardirqs were disabled:
1827 */
1828void trace_hardirqs_off(void)
1829{
1830 struct task_struct *curr = current;
1831
1832 if (unlikely(!debug_locks || current->lockdep_recursion))
1833 return;
1834
1835 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
1836 return;
1837
1838 if (curr->hardirqs_enabled) {
1839 /*
1840 * We have done an ON -> OFF transition:
1841 */
1842 curr->hardirqs_enabled = 0;
1843 curr->hardirq_disable_ip = _RET_IP_;
1844 curr->hardirq_disable_event = ++curr->irq_events;
1845 debug_atomic_inc(&hardirqs_off_events);
1846 } else
1847 debug_atomic_inc(&redundant_hardirqs_off);
1848}
1849
1850EXPORT_SYMBOL(trace_hardirqs_off);
1851
1852/*
1853 * Softirqs will be enabled:
1854 */
1855void trace_softirqs_on(unsigned long ip)
1856{
1857 struct task_struct *curr = current;
1858
1859 if (unlikely(!debug_locks))
1860 return;
1861
1862 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
1863 return;
1864
1865 if (curr->softirqs_enabled) {
1866 debug_atomic_inc(&redundant_softirqs_on);
1867 return;
1868 }
1869
1870 /*
1871 * We'll do an OFF -> ON transition:
1872 */
1873 curr->softirqs_enabled = 1;
1874 curr->softirq_enable_ip = ip;
1875 curr->softirq_enable_event = ++curr->irq_events;
1876 debug_atomic_inc(&softirqs_on_events);
1877 /*
1878 * We are going to turn softirqs on, so set the
1879 * usage bit for all held locks, if hardirqs are
1880 * enabled too:
1881 */
1882 if (curr->hardirqs_enabled)
1883 mark_held_locks(curr, 0, ip);
1884}
1885
1886/*
1887 * Softirqs were disabled:
1888 */
1889void trace_softirqs_off(unsigned long ip)
1890{
1891 struct task_struct *curr = current;
1892
1893 if (unlikely(!debug_locks))
1894 return;
1895
1896 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
1897 return;
1898
1899 if (curr->softirqs_enabled) {
1900 /*
1901 * We have done an ON -> OFF transition:
1902 */
1903 curr->softirqs_enabled = 0;
1904 curr->softirq_disable_ip = ip;
1905 curr->softirq_disable_event = ++curr->irq_events;
1906 debug_atomic_inc(&softirqs_off_events);
1907 DEBUG_LOCKS_WARN_ON(!softirq_count());
1908 } else
1909 debug_atomic_inc(&redundant_softirqs_off);
1910}
1911
1912#endif
1913
1914/*
1915 * Initialize a lock instance's lock-class mapping info:
1916 */
1917void lockdep_init_map(struct lockdep_map *lock, const char *name,
1918 struct lock_class_key *key)
1919{
1920 if (unlikely(!debug_locks))
1921 return;
1922
1923 if (DEBUG_LOCKS_WARN_ON(!key))
1924 return;
1925 if (DEBUG_LOCKS_WARN_ON(!name))
1926 return;
1927 /*
1928 * Sanity check, the lock-class key must be persistent:
1929 */
1930 if (!static_obj(key)) {
1931 printk("BUG: key %p not in .data!\n", key);
1932 DEBUG_LOCKS_WARN_ON(1);
1933 return;
1934 }
1935 lock->name = name;
1936 lock->key = key;
1937 memset(lock->class, 0, sizeof(lock->class[0])*MAX_LOCKDEP_SUBCLASSES);
1938}
1939
1940EXPORT_SYMBOL_GPL(lockdep_init_map);
1941
1942/*
1943 * This gets called for every mutex_lock*()/spin_lock*() operation.
1944 * We maintain the dependency maps and validate the locking attempt:
1945 */
1946static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
1947 int trylock, int read, int check, int hardirqs_off,
1948 unsigned long ip)
1949{
1950 struct task_struct *curr = current;
1951 struct held_lock *hlock;
1952 struct lock_class *class;
1953 unsigned int depth, id;
1954 int chain_head = 0;
1955 u64 chain_key;
1956
1957 if (unlikely(!debug_locks))
1958 return 0;
1959
1960 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
1961 return 0;
1962
1963 if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) {
1964 debug_locks_off();
1965 printk("BUG: MAX_LOCKDEP_SUBCLASSES too low!\n");
1966 printk("turning off the locking correctness validator.\n");
1967 return 0;
1968 }
1969
1970 class = lock->class[subclass];
1971 /* not cached yet? */
1972 if (unlikely(!class)) {
1973 class = register_lock_class(lock, subclass);
1974 if (!class)
1975 return 0;
1976 }
1977 debug_atomic_inc((atomic_t *)&class->ops);
1978 if (very_verbose(class)) {
1979 printk("\nacquire class [%p] %s", class->key, class->name);
1980 if (class->name_version > 1)
1981 printk("#%d", class->name_version);
1982 printk("\n");
1983 dump_stack();
1984 }
1985
1986 /*
1987 * Add the lock to the list of currently held locks.
1988 * (we dont increase the depth just yet, up until the
1989 * dependency checks are done)
1990 */
1991 depth = curr->lockdep_depth;
1992 if (DEBUG_LOCKS_WARN_ON(depth >= MAX_LOCK_DEPTH))
1993 return 0;
1994
1995 hlock = curr->held_locks + depth;
1996
1997 hlock->class = class;
1998 hlock->acquire_ip = ip;
1999 hlock->instance = lock;
2000 hlock->trylock = trylock;
2001 hlock->read = read;
2002 hlock->check = check;
2003 hlock->hardirqs_off = hardirqs_off;
2004
2005 if (check != 2)
2006 goto out_calc_hash;
2007#ifdef CONFIG_TRACE_IRQFLAGS
2008 /*
2009 * If non-trylock use in a hardirq or softirq context, then
2010 * mark the lock as used in these contexts:
2011 */
2012 if (!trylock) {
2013 if (read) {
2014 if (curr->hardirq_context)
2015 if (!mark_lock(curr, hlock,
2016 LOCK_USED_IN_HARDIRQ_READ, ip))
2017 return 0;
2018 if (curr->softirq_context)
2019 if (!mark_lock(curr, hlock,
2020 LOCK_USED_IN_SOFTIRQ_READ, ip))
2021 return 0;
2022 } else {
2023 if (curr->hardirq_context)
2024 if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ, ip))
2025 return 0;
2026 if (curr->softirq_context)
2027 if (!mark_lock(curr, hlock, LOCK_USED_IN_SOFTIRQ, ip))
2028 return 0;
2029 }
2030 }
2031 if (!hardirqs_off) {
2032 if (read) {
2033 if (!mark_lock(curr, hlock,
2034 LOCK_ENABLED_HARDIRQS_READ, ip))
2035 return 0;
2036 if (curr->softirqs_enabled)
2037 if (!mark_lock(curr, hlock,
2038 LOCK_ENABLED_SOFTIRQS_READ, ip))
2039 return 0;
2040 } else {
2041 if (!mark_lock(curr, hlock,
2042 LOCK_ENABLED_HARDIRQS, ip))
2043 return 0;
2044 if (curr->softirqs_enabled)
2045 if (!mark_lock(curr, hlock,
2046 LOCK_ENABLED_SOFTIRQS, ip))
2047 return 0;
2048 }
2049 }
2050#endif
2051 /* mark it as used: */
2052 if (!mark_lock(curr, hlock, LOCK_USED, ip))
2053 return 0;
2054out_calc_hash:
2055 /*
2056 * Calculate the chain hash: it's the combined has of all the
2057 * lock keys along the dependency chain. We save the hash value
2058 * at every step so that we can get the current hash easily
2059 * after unlock. The chain hash is then used to cache dependency
2060 * results.
2061 *
2062 * The 'key ID' is what is the most compact key value to drive
2063 * the hash, not class->key.
2064 */
2065 id = class - lock_classes;
2066 if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
2067 return 0;
2068
2069 chain_key = curr->curr_chain_key;
2070 if (!depth) {
2071 if (DEBUG_LOCKS_WARN_ON(chain_key != 0))
2072 return 0;
2073 chain_head = 1;
2074 }
2075
2076 hlock->prev_chain_key = chain_key;
2077
2078#ifdef CONFIG_TRACE_IRQFLAGS
2079 /*
2080 * Keep track of points where we cross into an interrupt context:
2081 */
2082 hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) +
2083 curr->softirq_context;
2084 if (depth) {
2085 struct held_lock *prev_hlock;
2086
2087 prev_hlock = curr->held_locks + depth-1;
2088 /*
2089 * If we cross into another context, reset the
2090 * hash key (this also prevents the checking and the
2091 * adding of the dependency to 'prev'):
2092 */
2093 if (prev_hlock->irq_context != hlock->irq_context) {
2094 chain_key = 0;
2095 chain_head = 1;
2096 }
2097 }
2098#endif
2099 chain_key = iterate_chain_key(chain_key, id);
2100 curr->curr_chain_key = chain_key;
2101
2102 /*
2103 * Trylock needs to maintain the stack of held locks, but it
2104 * does not add new dependencies, because trylock can be done
2105 * in any order.
2106 *
2107 * We look up the chain_key and do the O(N^2) check and update of
2108 * the dependencies only if this is a new dependency chain.
2109 * (If lookup_chain_cache() returns with 1 it acquires
2110 * hash_lock for us)
2111 */
2112 if (!trylock && (check == 2) && lookup_chain_cache(chain_key)) {
2113 /*
2114 * Check whether last held lock:
2115 *
2116 * - is irq-safe, if this lock is irq-unsafe
2117 * - is softirq-safe, if this lock is hardirq-unsafe
2118 *
2119 * And check whether the new lock's dependency graph
2120 * could lead back to the previous lock.
2121 *
2122 * any of these scenarios could lead to a deadlock. If
2123 * All validations
2124 */
2125 int ret = check_deadlock(curr, hlock, lock, read);
2126
2127 if (!ret)
2128 return 0;
2129 /*
2130 * Mark recursive read, as we jump over it when
2131 * building dependencies (just like we jump over
2132 * trylock entries):
2133 */
2134 if (ret == 2)
2135 hlock->read = 2;
2136 /*
2137 * Add dependency only if this lock is not the head
2138 * of the chain, and if it's not a secondary read-lock:
2139 */
2140 if (!chain_head && ret != 2)
2141 if (!check_prevs_add(curr, hlock))
2142 return 0;
2143 __raw_spin_unlock(&hash_lock);
2144 }
2145 curr->lockdep_depth++;
2146 check_chain_key(curr);
2147 if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) {
2148 debug_locks_off();
2149 printk("BUG: MAX_LOCK_DEPTH too low!\n");
2150 printk("turning off the locking correctness validator.\n");
2151 return 0;
2152 }
2153 if (unlikely(curr->lockdep_depth > max_lockdep_depth))
2154 max_lockdep_depth = curr->lockdep_depth;
2155
2156 return 1;
2157}
2158
2159static int
2160print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
2161 unsigned long ip)
2162{
2163 if (!debug_locks_off())
2164 return 0;
2165 if (debug_locks_silent)
2166 return 0;
2167
2168 printk("\n=====================================\n");
2169 printk( "[ BUG: bad unlock balance detected! ]\n");
2170 printk( "-------------------------------------\n");
2171 printk("%s/%d is trying to release lock (",
2172 curr->comm, curr->pid);
2173 print_lockdep_cache(lock);
2174 printk(") at:\n");
2175 print_ip_sym(ip);
2176 printk("but there are no more locks to release!\n");
2177 printk("\nother info that might help us debug this:\n");
2178 lockdep_print_held_locks(curr);
2179
2180 printk("\nstack backtrace:\n");
2181 dump_stack();
2182
2183 return 0;
2184}
2185
2186/*
2187 * Common debugging checks for both nested and non-nested unlock:
2188 */
2189static int check_unlock(struct task_struct *curr, struct lockdep_map *lock,
2190 unsigned long ip)
2191{
2192 if (unlikely(!debug_locks))
2193 return 0;
2194 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
2195 return 0;
2196
2197 if (curr->lockdep_depth <= 0)
2198 return print_unlock_inbalance_bug(curr, lock, ip);
2199
2200 return 1;
2201}
2202
2203/*
2204 * Remove the lock to the list of currently held locks in a
2205 * potentially non-nested (out of order) manner. This is a
2206 * relatively rare operation, as all the unlock APIs default
2207 * to nested mode (which uses lock_release()):
2208 */
2209static int
2210lock_release_non_nested(struct task_struct *curr,
2211 struct lockdep_map *lock, unsigned long ip)
2212{
2213 struct held_lock *hlock, *prev_hlock;
2214 unsigned int depth;
2215 int i;
2216
2217 /*
2218 * Check whether the lock exists in the current stack
2219 * of held locks:
2220 */
2221 depth = curr->lockdep_depth;
2222 if (DEBUG_LOCKS_WARN_ON(!depth))
2223 return 0;
2224
2225 prev_hlock = NULL;
2226 for (i = depth-1; i >= 0; i--) {
2227 hlock = curr->held_locks + i;
2228 /*
2229 * We must not cross into another context:
2230 */
2231 if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
2232 break;
2233 if (hlock->instance == lock)
2234 goto found_it;
2235 prev_hlock = hlock;
2236 }
2237 return print_unlock_inbalance_bug(curr, lock, ip);
2238
2239found_it:
2240 /*
2241 * We have the right lock to unlock, 'hlock' points to it.
2242 * Now we remove it from the stack, and add back the other
2243 * entries (if any), recalculating the hash along the way:
2244 */
2245 curr->lockdep_depth = i;
2246 curr->curr_chain_key = hlock->prev_chain_key;
2247
2248 for (i++; i < depth; i++) {
2249 hlock = curr->held_locks + i;
2250 if (!__lock_acquire(hlock->instance,
2251 hlock->class->subclass, hlock->trylock,
2252 hlock->read, hlock->check, hlock->hardirqs_off,
2253 hlock->acquire_ip))
2254 return 0;
2255 }
2256
2257 if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - 1))
2258 return 0;
2259 return 1;
2260}
2261
2262/*
2263 * Remove the lock to the list of currently held locks - this gets
2264 * called on mutex_unlock()/spin_unlock*() (or on a failed
2265 * mutex_lock_interruptible()). This is done for unlocks that nest
2266 * perfectly. (i.e. the current top of the lock-stack is unlocked)
2267 */
2268static int lock_release_nested(struct task_struct *curr,
2269 struct lockdep_map *lock, unsigned long ip)
2270{
2271 struct held_lock *hlock;
2272 unsigned int depth;
2273
2274 /*
2275 * Pop off the top of the lock stack:
2276 */
2277 depth = curr->lockdep_depth - 1;
2278 hlock = curr->held_locks + depth;
2279
2280 /*
2281 * Is the unlock non-nested:
2282 */
2283 if (hlock->instance != lock)
2284 return lock_release_non_nested(curr, lock, ip);
2285 curr->lockdep_depth--;
2286
2287 if (DEBUG_LOCKS_WARN_ON(!depth && (hlock->prev_chain_key != 0)))
2288 return 0;
2289
2290 curr->curr_chain_key = hlock->prev_chain_key;
2291
2292#ifdef CONFIG_DEBUG_LOCKDEP
2293 hlock->prev_chain_key = 0;
2294 hlock->class = NULL;
2295 hlock->acquire_ip = 0;
2296 hlock->irq_context = 0;
2297#endif
2298 return 1;
2299}
2300
2301/*
2302 * Remove the lock to the list of currently held locks - this gets
2303 * called on mutex_unlock()/spin_unlock*() (or on a failed
2304 * mutex_lock_interruptible()). This is done for unlocks that nest
2305 * perfectly. (i.e. the current top of the lock-stack is unlocked)
2306 */
2307static void
2308__lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
2309{
2310 struct task_struct *curr = current;
2311
2312 if (!check_unlock(curr, lock, ip))
2313 return;
2314
2315 if (nested) {
2316 if (!lock_release_nested(curr, lock, ip))
2317 return;
2318 } else {
2319 if (!lock_release_non_nested(curr, lock, ip))
2320 return;
2321 }
2322
2323 check_chain_key(curr);
2324}
2325
2326/*
2327 * Check whether we follow the irq-flags state precisely:
2328 */
2329static void check_flags(unsigned long flags)
2330{
2331#if defined(CONFIG_DEBUG_LOCKDEP) && defined(CONFIG_TRACE_IRQFLAGS)
2332 if (!debug_locks)
2333 return;
2334
2335 if (irqs_disabled_flags(flags))
2336 DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled);
2337 else
2338 DEBUG_LOCKS_WARN_ON(!current->hardirqs_enabled);
2339
2340 /*
2341 * We dont accurately track softirq state in e.g.
2342 * hardirq contexts (such as on 4KSTACKS), so only
2343 * check if not in hardirq contexts:
2344 */
2345 if (!hardirq_count()) {
2346 if (softirq_count())
2347 DEBUG_LOCKS_WARN_ON(current->softirqs_enabled);
2348 else
2349 DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
2350 }
2351
2352 if (!debug_locks)
2353 print_irqtrace_events(current);
2354#endif
2355}
2356
2357/*
2358 * We are not always called with irqs disabled - do that here,
2359 * and also avoid lockdep recursion:
2360 */
2361void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2362 int trylock, int read, int check, unsigned long ip)
2363{
2364 unsigned long flags;
2365
2366 if (unlikely(current->lockdep_recursion))
2367 return;
2368
2369 raw_local_irq_save(flags);
2370 check_flags(flags);
2371
2372 current->lockdep_recursion = 1;
2373 __lock_acquire(lock, subclass, trylock, read, check,
2374 irqs_disabled_flags(flags), ip);
2375 current->lockdep_recursion = 0;
2376 raw_local_irq_restore(flags);
2377}
2378
2379EXPORT_SYMBOL_GPL(lock_acquire);
2380
2381void lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
2382{
2383 unsigned long flags;
2384
2385 if (unlikely(current->lockdep_recursion))
2386 return;
2387
2388 raw_local_irq_save(flags);
2389 check_flags(flags);
2390 current->lockdep_recursion = 1;
2391 __lock_release(lock, nested, ip);
2392 current->lockdep_recursion = 0;
2393 raw_local_irq_restore(flags);
2394}
2395
2396EXPORT_SYMBOL_GPL(lock_release);
2397
2398/*
2399 * Used by the testsuite, sanitize the validator state
2400 * after a simulated failure:
2401 */
2402
2403void lockdep_reset(void)
2404{
2405 unsigned long flags;
2406
2407 raw_local_irq_save(flags);
2408 current->curr_chain_key = 0;
2409 current->lockdep_depth = 0;
2410 current->lockdep_recursion = 0;
2411 memset(current->held_locks, 0, MAX_LOCK_DEPTH*sizeof(struct held_lock));
2412 nr_hardirq_chains = 0;
2413 nr_softirq_chains = 0;
2414 nr_process_chains = 0;
2415 debug_locks = 1;
2416 raw_local_irq_restore(flags);
2417}
2418
2419static void zap_class(struct lock_class *class)
2420{
2421 int i;
2422
2423 /*
2424 * Remove all dependencies this lock is
2425 * involved in:
2426 */
2427 for (i = 0; i < nr_list_entries; i++) {
2428 if (list_entries[i].class == class)
2429 list_del_rcu(&list_entries[i].entry);
2430 }
2431 /*
2432 * Unhash the class and remove it from the all_lock_classes list:
2433 */
2434 list_del_rcu(&class->hash_entry);
2435 list_del_rcu(&class->lock_entry);
2436
2437}
2438
2439static inline int within(void *addr, void *start, unsigned long size)
2440{
2441 return addr >= start && addr < start + size;
2442}
2443
2444void lockdep_free_key_range(void *start, unsigned long size)
2445{
2446 struct lock_class *class, *next;
2447 struct list_head *head;
2448 unsigned long flags;
2449 int i;
2450
2451 raw_local_irq_save(flags);
2452 __raw_spin_lock(&hash_lock);
2453
2454 /*
2455 * Unhash all classes that were created by this module:
2456 */
2457 for (i = 0; i < CLASSHASH_SIZE; i++) {
2458 head = classhash_table + i;
2459 if (list_empty(head))
2460 continue;
2461 list_for_each_entry_safe(class, next, head, hash_entry)
2462 if (within(class->key, start, size))
2463 zap_class(class);
2464 }
2465
2466 __raw_spin_unlock(&hash_lock);
2467 raw_local_irq_restore(flags);
2468}
2469
2470void lockdep_reset_lock(struct lockdep_map *lock)
2471{
2472 struct lock_class *class, *next, *entry;
2473 struct list_head *head;
2474 unsigned long flags;
2475 int i, j;
2476
2477 raw_local_irq_save(flags);
2478 __raw_spin_lock(&hash_lock);
2479
2480 /*
2481 * Remove all classes this lock has:
2482 */
2483 for (i = 0; i < CLASSHASH_SIZE; i++) {
2484 head = classhash_table + i;
2485 if (list_empty(head))
2486 continue;
2487 list_for_each_entry_safe(class, next, head, hash_entry) {
2488 for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) {
2489 entry = lock->class[j];
2490 if (class == entry) {
2491 zap_class(class);
2492 lock->class[j] = NULL;
2493 break;
2494 }
2495 }
2496 }
2497 }
2498
2499 /*
2500 * Debug check: in the end all mapped classes should
2501 * be gone.
2502 */
2503 for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) {
2504 entry = lock->class[j];
2505 if (!entry)
2506 continue;
2507 __raw_spin_unlock(&hash_lock);
2508 DEBUG_LOCKS_WARN_ON(1);
2509 raw_local_irq_restore(flags);
2510 return;
2511 }
2512
2513 __raw_spin_unlock(&hash_lock);
2514 raw_local_irq_restore(flags);
2515}
2516
2517void __init lockdep_init(void)
2518{
2519 int i;
2520
2521 /*
2522 * Some architectures have their own start_kernel()
2523 * code which calls lockdep_init(), while we also
2524 * call lockdep_init() from the start_kernel() itself,
2525 * and we want to initialize the hashes only once:
2526 */
2527 if (lockdep_initialized)
2528 return;
2529
2530 for (i = 0; i < CLASSHASH_SIZE; i++)
2531 INIT_LIST_HEAD(classhash_table + i);
2532
2533 for (i = 0; i < CHAINHASH_SIZE; i++)
2534 INIT_LIST_HEAD(chainhash_table + i);
2535
2536 lockdep_initialized = 1;
2537}
2538
2539void __init lockdep_info(void)
2540{
2541 printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
2542
2543 printk("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES);
2544 printk("... MAX_LOCK_DEPTH: %lu\n", MAX_LOCK_DEPTH);
2545 printk("... MAX_LOCKDEP_KEYS: %lu\n", MAX_LOCKDEP_KEYS);
2546 printk("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE);
2547 printk("... MAX_LOCKDEP_ENTRIES: %lu\n", MAX_LOCKDEP_ENTRIES);
2548 printk("... MAX_LOCKDEP_CHAINS: %lu\n", MAX_LOCKDEP_CHAINS);
2549 printk("... CHAINHASH_SIZE: %lu\n", CHAINHASH_SIZE);
2550
2551 printk(" memory used by lock dependency info: %lu kB\n",
2552 (sizeof(struct lock_class) * MAX_LOCKDEP_KEYS +
2553 sizeof(struct list_head) * CLASSHASH_SIZE +
2554 sizeof(struct lock_list) * MAX_LOCKDEP_ENTRIES +
2555 sizeof(struct lock_chain) * MAX_LOCKDEP_CHAINS +
2556 sizeof(struct list_head) * CHAINHASH_SIZE) / 1024);
2557
2558 printk(" per task-struct memory footprint: %lu bytes\n",
2559 sizeof(struct held_lock) * MAX_LOCK_DEPTH);
2560
2561#ifdef CONFIG_DEBUG_LOCKDEP
2562 if (lockdep_init_error)
2563 printk("WARNING: lockdep init error! Arch code didnt call lockdep_init() early enough?\n");
2564#endif
2565}
2566
2567static inline int in_range(const void *start, const void *addr, const void *end)
2568{
2569 return addr >= start && addr <= end;
2570}
2571
2572static void
2573print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
2574 const void *mem_to)
2575{
2576 if (!debug_locks_off())
2577 return;
2578 if (debug_locks_silent)
2579 return;
2580
2581 printk("\n=========================\n");
2582 printk( "[ BUG: held lock freed! ]\n");
2583 printk( "-------------------------\n");
2584 printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
2585 curr->comm, curr->pid, mem_from, mem_to-1);
2586 lockdep_print_held_locks(curr);
2587
2588 printk("\nstack backtrace:\n");
2589 dump_stack();
2590}
2591
2592/*
2593 * Called when kernel memory is freed (or unmapped), or if a lock
2594 * is destroyed or reinitialized - this code checks whether there is
2595 * any held lock in the memory range of <from> to <to>:
2596 */
2597void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
2598{
2599 const void *mem_to = mem_from + mem_len, *lock_from, *lock_to;
2600 struct task_struct *curr = current;
2601 struct held_lock *hlock;
2602 unsigned long flags;
2603 int i;
2604
2605 if (unlikely(!debug_locks))
2606 return;
2607
2608 local_irq_save(flags);
2609 for (i = 0; i < curr->lockdep_depth; i++) {
2610 hlock = curr->held_locks + i;
2611
2612 lock_from = (void *)hlock->instance;
2613 lock_to = (void *)(hlock->instance + 1);
2614
2615 if (!in_range(mem_from, lock_from, mem_to) &&
2616 !in_range(mem_from, lock_to, mem_to))
2617 continue;
2618
2619 print_freed_lock_bug(curr, mem_from, mem_to);
2620 break;
2621 }
2622 local_irq_restore(flags);
2623}
2624
2625static void print_held_locks_bug(struct task_struct *curr)
2626{
2627 if (!debug_locks_off())
2628 return;
2629 if (debug_locks_silent)
2630 return;
2631
2632 printk("\n=====================================\n");
2633 printk( "[ BUG: lock held at task exit time! ]\n");
2634 printk( "-------------------------------------\n");
2635 printk("%s/%d is exiting with locks still held!\n",
2636 curr->comm, curr->pid);
2637 lockdep_print_held_locks(curr);
2638
2639 printk("\nstack backtrace:\n");
2640 dump_stack();
2641}
2642
2643void debug_check_no_locks_held(struct task_struct *task)
2644{
2645 if (unlikely(task->lockdep_depth > 0))
2646 print_held_locks_bug(task);
2647}
2648
2649void debug_show_all_locks(void)
2650{
2651 struct task_struct *g, *p;
2652 int count = 10;
2653 int unlock = 1;
2654
2655 printk("\nShowing all locks held in the system:\n");
2656
2657 /*
2658 * Here we try to get the tasklist_lock as hard as possible,
2659 * if not successful after 2 seconds we ignore it (but keep
2660 * trying). This is to enable a debug printout even if a
2661 * tasklist_lock-holding task deadlocks or crashes.
2662 */
2663retry:
2664 if (!read_trylock(&tasklist_lock)) {
2665 if (count == 10)
2666 printk("hm, tasklist_lock locked, retrying... ");
2667 if (count) {
2668 count--;
2669 printk(" #%d", 10-count);
2670 mdelay(200);
2671 goto retry;
2672 }
2673 printk(" ignoring it.\n");
2674 unlock = 0;
2675 }
2676 if (count != 10)
2677 printk(" locked it.\n");
2678
2679 do_each_thread(g, p) {
2680 if (p->lockdep_depth)
2681 lockdep_print_held_locks(p);
2682 if (!unlock)
2683 if (read_trylock(&tasklist_lock))
2684 unlock = 1;
2685 } while_each_thread(g, p);
2686
2687 printk("\n");
2688 printk("=============================================\n\n");
2689
2690 if (unlock)
2691 read_unlock(&tasklist_lock);
2692}
2693
2694EXPORT_SYMBOL_GPL(debug_show_all_locks);
2695
2696void debug_show_held_locks(struct task_struct *task)
2697{
2698 lockdep_print_held_locks(task);
2699}
2700
2701EXPORT_SYMBOL_GPL(debug_show_held_locks);
2702
diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h
new file mode 100644
index 000000000000..0d355f24fe04
--- /dev/null
+++ b/kernel/lockdep_internals.h
@@ -0,0 +1,78 @@
1/*
2 * kernel/lockdep_internals.h
3 *
4 * Runtime locking correctness validator
5 *
6 * lockdep subsystem internal functions and variables.
7 */
8
9/*
10 * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies
11 * we track.
12 *
13 * We use the per-lock dependency maps in two ways: we grow it by adding
14 * every to-be-taken lock to all currently held lock's own dependency
15 * table (if it's not there yet), and we check it for lock order
16 * conflicts and deadlocks.
17 */
18#define MAX_LOCKDEP_ENTRIES 8192UL
19
20#define MAX_LOCKDEP_KEYS_BITS 11
21#define MAX_LOCKDEP_KEYS (1UL << MAX_LOCKDEP_KEYS_BITS)
22
23#define MAX_LOCKDEP_CHAINS_BITS 13
24#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS)
25
26/*
27 * Stack-trace: tightly packed array of stack backtrace
28 * addresses. Protected by the hash_lock.
29 */
30#define MAX_STACK_TRACE_ENTRIES 131072UL
31
32extern struct list_head all_lock_classes;
33
34extern void
35get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4);
36
37extern const char * __get_key_name(struct lockdep_subclass_key *key, char *str);
38
39extern unsigned long nr_lock_classes;
40extern unsigned long nr_list_entries;
41extern unsigned long nr_lock_chains;
42extern unsigned long nr_stack_trace_entries;
43
44extern unsigned int nr_hardirq_chains;
45extern unsigned int nr_softirq_chains;
46extern unsigned int nr_process_chains;
47extern unsigned int max_lockdep_depth;
48extern unsigned int max_recursion_depth;
49
50#ifdef CONFIG_DEBUG_LOCKDEP
51/*
52 * Various lockdep statistics:
53 */
54extern atomic_t chain_lookup_hits;
55extern atomic_t chain_lookup_misses;
56extern atomic_t hardirqs_on_events;
57extern atomic_t hardirqs_off_events;
58extern atomic_t redundant_hardirqs_on;
59extern atomic_t redundant_hardirqs_off;
60extern atomic_t softirqs_on_events;
61extern atomic_t softirqs_off_events;
62extern atomic_t redundant_softirqs_on;
63extern atomic_t redundant_softirqs_off;
64extern atomic_t nr_unused_locks;
65extern atomic_t nr_cyclic_checks;
66extern atomic_t nr_cyclic_check_recursions;
67extern atomic_t nr_find_usage_forwards_checks;
68extern atomic_t nr_find_usage_forwards_recursions;
69extern atomic_t nr_find_usage_backwards_checks;
70extern atomic_t nr_find_usage_backwards_recursions;
71# define debug_atomic_inc(ptr) atomic_inc(ptr)
72# define debug_atomic_dec(ptr) atomic_dec(ptr)
73# define debug_atomic_read(ptr) atomic_read(ptr)
74#else
75# define debug_atomic_inc(ptr) do { } while (0)
76# define debug_atomic_dec(ptr) do { } while (0)
77# define debug_atomic_read(ptr) 0
78#endif
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
new file mode 100644
index 000000000000..f6e72eaab3fa
--- /dev/null
+++ b/kernel/lockdep_proc.c
@@ -0,0 +1,345 @@
1/*
2 * kernel/lockdep_proc.c
3 *
4 * Runtime locking correctness validator
5 *
6 * Started by Ingo Molnar:
7 *
8 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
9 *
10 * Code for /proc/lockdep and /proc/lockdep_stats:
11 *
12 */
13#include <linux/sched.h>
14#include <linux/module.h>
15#include <linux/proc_fs.h>
16#include <linux/seq_file.h>
17#include <linux/kallsyms.h>
18#include <linux/debug_locks.h>
19
20#include "lockdep_internals.h"
21
22static void *l_next(struct seq_file *m, void *v, loff_t *pos)
23{
24 struct lock_class *class = v;
25
26 (*pos)++;
27
28 if (class->lock_entry.next != &all_lock_classes)
29 class = list_entry(class->lock_entry.next, struct lock_class,
30 lock_entry);
31 else
32 class = NULL;
33 m->private = class;
34
35 return class;
36}
37
38static void *l_start(struct seq_file *m, loff_t *pos)
39{
40 struct lock_class *class = m->private;
41
42 if (&class->lock_entry == all_lock_classes.next)
43 seq_printf(m, "all lock classes:\n");
44
45 return class;
46}
47
48static void l_stop(struct seq_file *m, void *v)
49{
50}
51
52static unsigned long count_forward_deps(struct lock_class *class)
53{
54 struct lock_list *entry;
55 unsigned long ret = 1;
56
57 /*
58 * Recurse this class's dependency list:
59 */
60 list_for_each_entry(entry, &class->locks_after, entry)
61 ret += count_forward_deps(entry->class);
62
63 return ret;
64}
65
66static unsigned long count_backward_deps(struct lock_class *class)
67{
68 struct lock_list *entry;
69 unsigned long ret = 1;
70
71 /*
72 * Recurse this class's dependency list:
73 */
74 list_for_each_entry(entry, &class->locks_before, entry)
75 ret += count_backward_deps(entry->class);
76
77 return ret;
78}
79
80static int l_show(struct seq_file *m, void *v)
81{
82 unsigned long nr_forward_deps, nr_backward_deps;
83 struct lock_class *class = m->private;
84 char str[128], c1, c2, c3, c4;
85 const char *name;
86
87 seq_printf(m, "%p", class->key);
88#ifdef CONFIG_DEBUG_LOCKDEP
89 seq_printf(m, " OPS:%8ld", class->ops);
90#endif
91 nr_forward_deps = count_forward_deps(class);
92 seq_printf(m, " FD:%5ld", nr_forward_deps);
93
94 nr_backward_deps = count_backward_deps(class);
95 seq_printf(m, " BD:%5ld", nr_backward_deps);
96
97 get_usage_chars(class, &c1, &c2, &c3, &c4);
98 seq_printf(m, " %c%c%c%c", c1, c2, c3, c4);
99
100 name = class->name;
101 if (!name) {
102 name = __get_key_name(class->key, str);
103 seq_printf(m, ": %s", name);
104 } else{
105 seq_printf(m, ": %s", name);
106 if (class->name_version > 1)
107 seq_printf(m, "#%d", class->name_version);
108 if (class->subclass)
109 seq_printf(m, "/%d", class->subclass);
110 }
111 seq_puts(m, "\n");
112
113 return 0;
114}
115
116static struct seq_operations lockdep_ops = {
117 .start = l_start,
118 .next = l_next,
119 .stop = l_stop,
120 .show = l_show,
121};
122
123static int lockdep_open(struct inode *inode, struct file *file)
124{
125 int res = seq_open(file, &lockdep_ops);
126 if (!res) {
127 struct seq_file *m = file->private_data;
128
129 if (!list_empty(&all_lock_classes))
130 m->private = list_entry(all_lock_classes.next,
131 struct lock_class, lock_entry);
132 else
133 m->private = NULL;
134 }
135 return res;
136}
137
138static struct file_operations proc_lockdep_operations = {
139 .open = lockdep_open,
140 .read = seq_read,
141 .llseek = seq_lseek,
142 .release = seq_release,
143};
144
145static void lockdep_stats_debug_show(struct seq_file *m)
146{
147#ifdef CONFIG_DEBUG_LOCKDEP
148 unsigned int hi1 = debug_atomic_read(&hardirqs_on_events),
149 hi2 = debug_atomic_read(&hardirqs_off_events),
150 hr1 = debug_atomic_read(&redundant_hardirqs_on),
151 hr2 = debug_atomic_read(&redundant_hardirqs_off),
152 si1 = debug_atomic_read(&softirqs_on_events),
153 si2 = debug_atomic_read(&softirqs_off_events),
154 sr1 = debug_atomic_read(&redundant_softirqs_on),
155 sr2 = debug_atomic_read(&redundant_softirqs_off);
156
157 seq_printf(m, " chain lookup misses: %11u\n",
158 debug_atomic_read(&chain_lookup_misses));
159 seq_printf(m, " chain lookup hits: %11u\n",
160 debug_atomic_read(&chain_lookup_hits));
161 seq_printf(m, " cyclic checks: %11u\n",
162 debug_atomic_read(&nr_cyclic_checks));
163 seq_printf(m, " cyclic-check recursions: %11u\n",
164 debug_atomic_read(&nr_cyclic_check_recursions));
165 seq_printf(m, " find-mask forwards checks: %11u\n",
166 debug_atomic_read(&nr_find_usage_forwards_checks));
167 seq_printf(m, " find-mask forwards recursions: %11u\n",
168 debug_atomic_read(&nr_find_usage_forwards_recursions));
169 seq_printf(m, " find-mask backwards checks: %11u\n",
170 debug_atomic_read(&nr_find_usage_backwards_checks));
171 seq_printf(m, " find-mask backwards recursions:%11u\n",
172 debug_atomic_read(&nr_find_usage_backwards_recursions));
173
174 seq_printf(m, " hardirq on events: %11u\n", hi1);
175 seq_printf(m, " hardirq off events: %11u\n", hi2);
176 seq_printf(m, " redundant hardirq ons: %11u\n", hr1);
177 seq_printf(m, " redundant hardirq offs: %11u\n", hr2);
178 seq_printf(m, " softirq on events: %11u\n", si1);
179 seq_printf(m, " softirq off events: %11u\n", si2);
180 seq_printf(m, " redundant softirq ons: %11u\n", sr1);
181 seq_printf(m, " redundant softirq offs: %11u\n", sr2);
182#endif
183}
184
185static int lockdep_stats_show(struct seq_file *m, void *v)
186{
187 struct lock_class *class;
188 unsigned long nr_unused = 0, nr_uncategorized = 0,
189 nr_irq_safe = 0, nr_irq_unsafe = 0,
190 nr_softirq_safe = 0, nr_softirq_unsafe = 0,
191 nr_hardirq_safe = 0, nr_hardirq_unsafe = 0,
192 nr_irq_read_safe = 0, nr_irq_read_unsafe = 0,
193 nr_softirq_read_safe = 0, nr_softirq_read_unsafe = 0,
194 nr_hardirq_read_safe = 0, nr_hardirq_read_unsafe = 0,
195 sum_forward_deps = 0, factor = 0;
196
197 list_for_each_entry(class, &all_lock_classes, lock_entry) {
198
199 if (class->usage_mask == 0)
200 nr_unused++;
201 if (class->usage_mask == LOCKF_USED)
202 nr_uncategorized++;
203 if (class->usage_mask & LOCKF_USED_IN_IRQ)
204 nr_irq_safe++;
205 if (class->usage_mask & LOCKF_ENABLED_IRQS)
206 nr_irq_unsafe++;
207 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ)
208 nr_softirq_safe++;
209 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS)
210 nr_softirq_unsafe++;
211 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ)
212 nr_hardirq_safe++;
213 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS)
214 nr_hardirq_unsafe++;
215 if (class->usage_mask & LOCKF_USED_IN_IRQ_READ)
216 nr_irq_read_safe++;
217 if (class->usage_mask & LOCKF_ENABLED_IRQS_READ)
218 nr_irq_read_unsafe++;
219 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ)
220 nr_softirq_read_safe++;
221 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ)
222 nr_softirq_read_unsafe++;
223 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ)
224 nr_hardirq_read_safe++;
225 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ)
226 nr_hardirq_read_unsafe++;
227
228 sum_forward_deps += count_forward_deps(class);
229 }
230#ifdef CONFIG_LOCKDEP_DEBUG
231 DEBUG_LOCKS_WARN_ON(debug_atomic_read(&nr_unused_locks) != nr_unused);
232#endif
233 seq_printf(m, " lock-classes: %11lu [max: %lu]\n",
234 nr_lock_classes, MAX_LOCKDEP_KEYS);
235 seq_printf(m, " direct dependencies: %11lu [max: %lu]\n",
236 nr_list_entries, MAX_LOCKDEP_ENTRIES);
237 seq_printf(m, " indirect dependencies: %11lu\n",
238 sum_forward_deps);
239
240 /*
241 * Total number of dependencies:
242 *
243 * All irq-safe locks may nest inside irq-unsafe locks,
244 * plus all the other known dependencies:
245 */
246 seq_printf(m, " all direct dependencies: %11lu\n",
247 nr_irq_unsafe * nr_irq_safe +
248 nr_hardirq_unsafe * nr_hardirq_safe +
249 nr_list_entries);
250
251 /*
252 * Estimated factor between direct and indirect
253 * dependencies:
254 */
255 if (nr_list_entries)
256 factor = sum_forward_deps / nr_list_entries;
257
258 seq_printf(m, " dependency chains: %11lu [max: %lu]\n",
259 nr_lock_chains, MAX_LOCKDEP_CHAINS);
260
261#ifdef CONFIG_TRACE_IRQFLAGS
262 seq_printf(m, " in-hardirq chains: %11u\n",
263 nr_hardirq_chains);
264 seq_printf(m, " in-softirq chains: %11u\n",
265 nr_softirq_chains);
266#endif
267 seq_printf(m, " in-process chains: %11u\n",
268 nr_process_chains);
269 seq_printf(m, " stack-trace entries: %11lu [max: %lu]\n",
270 nr_stack_trace_entries, MAX_STACK_TRACE_ENTRIES);
271 seq_printf(m, " combined max dependencies: %11u\n",
272 (nr_hardirq_chains + 1) *
273 (nr_softirq_chains + 1) *
274 (nr_process_chains + 1)
275 );
276 seq_printf(m, " hardirq-safe locks: %11lu\n",
277 nr_hardirq_safe);
278 seq_printf(m, " hardirq-unsafe locks: %11lu\n",
279 nr_hardirq_unsafe);
280 seq_printf(m, " softirq-safe locks: %11lu\n",
281 nr_softirq_safe);
282 seq_printf(m, " softirq-unsafe locks: %11lu\n",
283 nr_softirq_unsafe);
284 seq_printf(m, " irq-safe locks: %11lu\n",
285 nr_irq_safe);
286 seq_printf(m, " irq-unsafe locks: %11lu\n",
287 nr_irq_unsafe);
288
289 seq_printf(m, " hardirq-read-safe locks: %11lu\n",
290 nr_hardirq_read_safe);
291 seq_printf(m, " hardirq-read-unsafe locks: %11lu\n",
292 nr_hardirq_read_unsafe);
293 seq_printf(m, " softirq-read-safe locks: %11lu\n",
294 nr_softirq_read_safe);
295 seq_printf(m, " softirq-read-unsafe locks: %11lu\n",
296 nr_softirq_read_unsafe);
297 seq_printf(m, " irq-read-safe locks: %11lu\n",
298 nr_irq_read_safe);
299 seq_printf(m, " irq-read-unsafe locks: %11lu\n",
300 nr_irq_read_unsafe);
301
302 seq_printf(m, " uncategorized locks: %11lu\n",
303 nr_uncategorized);
304 seq_printf(m, " unused locks: %11lu\n",
305 nr_unused);
306 seq_printf(m, " max locking depth: %11u\n",
307 max_lockdep_depth);
308 seq_printf(m, " max recursion depth: %11u\n",
309 max_recursion_depth);
310 lockdep_stats_debug_show(m);
311 seq_printf(m, " debug_locks: %11u\n",
312 debug_locks);
313
314 return 0;
315}
316
317static int lockdep_stats_open(struct inode *inode, struct file *file)
318{
319 return single_open(file, lockdep_stats_show, NULL);
320}
321
322static struct file_operations proc_lockdep_stats_operations = {
323 .open = lockdep_stats_open,
324 .read = seq_read,
325 .llseek = seq_lseek,
326 .release = seq_release,
327};
328
329static int __init lockdep_proc_init(void)
330{
331 struct proc_dir_entry *entry;
332
333 entry = create_proc_entry("lockdep", S_IRUSR, NULL);
334 if (entry)
335 entry->proc_fops = &proc_lockdep_operations;
336
337 entry = create_proc_entry("lockdep_stats", S_IRUSR, NULL);
338 if (entry)
339 entry->proc_fops = &proc_lockdep_stats_operations;
340
341 return 0;
342}
343
344__initcall(lockdep_proc_init);
345
diff --git a/kernel/module.c b/kernel/module.c
index 281172f01e9a..35e1b1f859d7 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1121,6 +1121,9 @@ static void free_module(struct module *mod)
1121 if (mod->percpu) 1121 if (mod->percpu)
1122 percpu_modfree(mod->percpu); 1122 percpu_modfree(mod->percpu);
1123 1123
1124 /* Free lock-classes: */
1125 lockdep_free_key_range(mod->module_core, mod->core_size);
1126
1124 /* Finally, free the core (containing the module structure) */ 1127 /* Finally, free the core (containing the module structure) */
1125 module_free(mod, mod->module_core); 1128 module_free(mod, mod->module_core);
1126} 1129}
@@ -2159,6 +2162,29 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)
2159 return e; 2162 return e;
2160} 2163}
2161 2164
2165/*
2166 * Is this a valid module address?
2167 */
2168int is_module_address(unsigned long addr)
2169{
2170 unsigned long flags;
2171 struct module *mod;
2172
2173 spin_lock_irqsave(&modlist_lock, flags);
2174
2175 list_for_each_entry(mod, &modules, list) {
2176 if (within(addr, mod->module_core, mod->core_size)) {
2177 spin_unlock_irqrestore(&modlist_lock, flags);
2178 return 1;
2179 }
2180 }
2181
2182 spin_unlock_irqrestore(&modlist_lock, flags);
2183
2184 return 0;
2185}
2186
2187
2162/* Is this a valid kernel address? We don't grab the lock: we are oopsing. */ 2188/* Is this a valid kernel address? We don't grab the lock: we are oopsing. */
2163struct module *__module_text_address(unsigned long addr) 2189struct module *__module_text_address(unsigned long addr)
2164{ 2190{
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index e38e4bac97ca..e3203c654dda 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -20,367 +20,19 @@
20#include <linux/spinlock.h> 20#include <linux/spinlock.h>
21#include <linux/kallsyms.h> 21#include <linux/kallsyms.h>
22#include <linux/interrupt.h> 22#include <linux/interrupt.h>
23#include <linux/debug_locks.h>
23 24
24#include "mutex-debug.h" 25#include "mutex-debug.h"
25 26
26/* 27/*
27 * We need a global lock when we walk through the multi-process
28 * lock tree. Only used in the deadlock-debugging case.
29 */
30DEFINE_SPINLOCK(debug_mutex_lock);
31
32/*
33 * All locks held by all tasks, in a single global list:
34 */
35LIST_HEAD(debug_mutex_held_locks);
36
37/*
38 * In the debug case we carry the caller's instruction pointer into
39 * other functions, but we dont want the function argument overhead
40 * in the nondebug case - hence these macros:
41 */
42#define __IP_DECL__ , unsigned long ip
43#define __IP__ , ip
44#define __RET_IP__ , (unsigned long)__builtin_return_address(0)
45
46/*
47 * "mutex debugging enabled" flag. We turn it off when we detect
48 * the first problem because we dont want to recurse back
49 * into the tracing code when doing error printk or
50 * executing a BUG():
51 */
52int debug_mutex_on = 1;
53
54static void printk_task(struct task_struct *p)
55{
56 if (p)
57 printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio);
58 else
59 printk("<none>");
60}
61
62static void printk_ti(struct thread_info *ti)
63{
64 if (ti)
65 printk_task(ti->task);
66 else
67 printk("<none>");
68}
69
70static void printk_task_short(struct task_struct *p)
71{
72 if (p)
73 printk("%s/%d [%p, %3d]", p->comm, p->pid, p, p->prio);
74 else
75 printk("<none>");
76}
77
78static void printk_lock(struct mutex *lock, int print_owner)
79{
80 printk(" [%p] {%s}\n", lock, lock->name);
81
82 if (print_owner && lock->owner) {
83 printk(".. held by: ");
84 printk_ti(lock->owner);
85 printk("\n");
86 }
87 if (lock->owner) {
88 printk("... acquired at: ");
89 print_symbol("%s\n", lock->acquire_ip);
90 }
91}
92
93/*
94 * printk locks held by a task:
95 */
96static void show_task_locks(struct task_struct *p)
97{
98 switch (p->state) {
99 case TASK_RUNNING: printk("R"); break;
100 case TASK_INTERRUPTIBLE: printk("S"); break;
101 case TASK_UNINTERRUPTIBLE: printk("D"); break;
102 case TASK_STOPPED: printk("T"); break;
103 case EXIT_ZOMBIE: printk("Z"); break;
104 case EXIT_DEAD: printk("X"); break;
105 default: printk("?"); break;
106 }
107 printk_task(p);
108 if (p->blocked_on) {
109 struct mutex *lock = p->blocked_on->lock;
110
111 printk(" blocked on mutex:");
112 printk_lock(lock, 1);
113 } else
114 printk(" (not blocked on mutex)\n");
115}
116
117/*
118 * printk all locks held in the system (if filter == NULL),
119 * or all locks belonging to a single task (if filter != NULL):
120 */
121void show_held_locks(struct task_struct *filter)
122{
123 struct list_head *curr, *cursor = NULL;
124 struct mutex *lock;
125 struct thread_info *t;
126 unsigned long flags;
127 int count = 0;
128
129 if (filter) {
130 printk("------------------------------\n");
131 printk("| showing all locks held by: | (");
132 printk_task_short(filter);
133 printk("):\n");
134 printk("------------------------------\n");
135 } else {
136 printk("---------------------------\n");
137 printk("| showing all locks held: |\n");
138 printk("---------------------------\n");
139 }
140
141 /*
142 * Play safe and acquire the global trace lock. We
143 * cannot printk with that lock held so we iterate
144 * very carefully:
145 */
146next:
147 debug_spin_lock_save(&debug_mutex_lock, flags);
148 list_for_each(curr, &debug_mutex_held_locks) {
149 if (cursor && curr != cursor)
150 continue;
151 lock = list_entry(curr, struct mutex, held_list);
152 t = lock->owner;
153 if (filter && (t != filter->thread_info))
154 continue;
155 count++;
156 cursor = curr->next;
157 debug_spin_unlock_restore(&debug_mutex_lock, flags);
158
159 printk("\n#%03d: ", count);
160 printk_lock(lock, filter ? 0 : 1);
161 goto next;
162 }
163 debug_spin_unlock_restore(&debug_mutex_lock, flags);
164 printk("\n");
165}
166
167void mutex_debug_show_all_locks(void)
168{
169 struct task_struct *g, *p;
170 int count = 10;
171 int unlock = 1;
172
173 printk("\nShowing all blocking locks in the system:\n");
174
175 /*
176 * Here we try to get the tasklist_lock as hard as possible,
177 * if not successful after 2 seconds we ignore it (but keep
178 * trying). This is to enable a debug printout even if a
179 * tasklist_lock-holding task deadlocks or crashes.
180 */
181retry:
182 if (!read_trylock(&tasklist_lock)) {
183 if (count == 10)
184 printk("hm, tasklist_lock locked, retrying... ");
185 if (count) {
186 count--;
187 printk(" #%d", 10-count);
188 mdelay(200);
189 goto retry;
190 }
191 printk(" ignoring it.\n");
192 unlock = 0;
193 }
194 if (count != 10)
195 printk(" locked it.\n");
196
197 do_each_thread(g, p) {
198 show_task_locks(p);
199 if (!unlock)
200 if (read_trylock(&tasklist_lock))
201 unlock = 1;
202 } while_each_thread(g, p);
203
204 printk("\n");
205 show_held_locks(NULL);
206 printk("=============================================\n\n");
207
208 if (unlock)
209 read_unlock(&tasklist_lock);
210}
211
212static void report_deadlock(struct task_struct *task, struct mutex *lock,
213 struct mutex *lockblk, unsigned long ip)
214{
215 printk("\n%s/%d is trying to acquire this lock:\n",
216 current->comm, current->pid);
217 printk_lock(lock, 1);
218 printk("... trying at: ");
219 print_symbol("%s\n", ip);
220 show_held_locks(current);
221
222 if (lockblk) {
223 printk("but %s/%d is deadlocking current task %s/%d!\n\n",
224 task->comm, task->pid, current->comm, current->pid);
225 printk("\n%s/%d is blocked on this lock:\n",
226 task->comm, task->pid);
227 printk_lock(lockblk, 1);
228
229 show_held_locks(task);
230
231 printk("\n%s/%d's [blocked] stackdump:\n\n",
232 task->comm, task->pid);
233 show_stack(task, NULL);
234 }
235
236 printk("\n%s/%d's [current] stackdump:\n\n",
237 current->comm, current->pid);
238 dump_stack();
239 mutex_debug_show_all_locks();
240 printk("[ turning off deadlock detection. Please report this. ]\n\n");
241 local_irq_disable();
242}
243
244/*
245 * Recursively check for mutex deadlocks:
246 */
247static int check_deadlock(struct mutex *lock, int depth,
248 struct thread_info *ti, unsigned long ip)
249{
250 struct mutex *lockblk;
251 struct task_struct *task;
252
253 if (!debug_mutex_on)
254 return 0;
255
256 ti = lock->owner;
257 if (!ti)
258 return 0;
259
260 task = ti->task;
261 lockblk = NULL;
262 if (task->blocked_on)
263 lockblk = task->blocked_on->lock;
264
265 /* Self-deadlock: */
266 if (current == task) {
267 DEBUG_OFF();
268 if (depth)
269 return 1;
270 printk("\n==========================================\n");
271 printk( "[ BUG: lock recursion deadlock detected! |\n");
272 printk( "------------------------------------------\n");
273 report_deadlock(task, lock, NULL, ip);
274 return 0;
275 }
276
277 /* Ugh, something corrupted the lock data structure? */
278 if (depth > 20) {
279 DEBUG_OFF();
280 printk("\n===========================================\n");
281 printk( "[ BUG: infinite lock dependency detected!? |\n");
282 printk( "-------------------------------------------\n");
283 report_deadlock(task, lock, lockblk, ip);
284 return 0;
285 }
286
287 /* Recursively check for dependencies: */
288 if (lockblk && check_deadlock(lockblk, depth+1, ti, ip)) {
289 printk("\n============================================\n");
290 printk( "[ BUG: circular locking deadlock detected! ]\n");
291 printk( "--------------------------------------------\n");
292 report_deadlock(task, lock, lockblk, ip);
293 return 0;
294 }
295 return 0;
296}
297
298/*
299 * Called when a task exits, this function checks whether the
300 * task is holding any locks, and reports the first one if so:
301 */
302void mutex_debug_check_no_locks_held(struct task_struct *task)
303{
304 struct list_head *curr, *next;
305 struct thread_info *t;
306 unsigned long flags;
307 struct mutex *lock;
308
309 if (!debug_mutex_on)
310 return;
311
312 debug_spin_lock_save(&debug_mutex_lock, flags);
313 list_for_each_safe(curr, next, &debug_mutex_held_locks) {
314 lock = list_entry(curr, struct mutex, held_list);
315 t = lock->owner;
316 if (t != task->thread_info)
317 continue;
318 list_del_init(curr);
319 DEBUG_OFF();
320 debug_spin_unlock_restore(&debug_mutex_lock, flags);
321
322 printk("BUG: %s/%d, lock held at task exit time!\n",
323 task->comm, task->pid);
324 printk_lock(lock, 1);
325 if (lock->owner != task->thread_info)
326 printk("exiting task is not even the owner??\n");
327 return;
328 }
329 debug_spin_unlock_restore(&debug_mutex_lock, flags);
330}
331
332/*
333 * Called when kernel memory is freed (or unmapped), or if a mutex
334 * is destroyed or reinitialized - this code checks whether there is
335 * any held lock in the memory range of <from> to <to>:
336 */
337void mutex_debug_check_no_locks_freed(const void *from, unsigned long len)
338{
339 struct list_head *curr, *next;
340 const void *to = from + len;
341 unsigned long flags;
342 struct mutex *lock;
343 void *lock_addr;
344
345 if (!debug_mutex_on)
346 return;
347
348 debug_spin_lock_save(&debug_mutex_lock, flags);
349 list_for_each_safe(curr, next, &debug_mutex_held_locks) {
350 lock = list_entry(curr, struct mutex, held_list);
351 lock_addr = lock;
352 if (lock_addr < from || lock_addr >= to)
353 continue;
354 list_del_init(curr);
355 DEBUG_OFF();
356 debug_spin_unlock_restore(&debug_mutex_lock, flags);
357
358 printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n",
359 current->comm, current->pid, lock, from, to);
360 dump_stack();
361 printk_lock(lock, 1);
362 if (lock->owner != current_thread_info())
363 printk("freeing task is not even the owner??\n");
364 return;
365 }
366 debug_spin_unlock_restore(&debug_mutex_lock, flags);
367}
368
369/*
370 * Must be called with lock->wait_lock held. 28 * Must be called with lock->wait_lock held.
371 */ 29 */
372void debug_mutex_set_owner(struct mutex *lock, 30void debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner)
373 struct thread_info *new_owner __IP_DECL__)
374{ 31{
375 lock->owner = new_owner; 32 lock->owner = new_owner;
376 DEBUG_WARN_ON(!list_empty(&lock->held_list));
377 if (debug_mutex_on) {
378 list_add_tail(&lock->held_list, &debug_mutex_held_locks);
379 lock->acquire_ip = ip;
380 }
381} 33}
382 34
383void debug_mutex_init_waiter(struct mutex_waiter *waiter) 35void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
384{ 36{
385 memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter)); 37 memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));
386 waiter->magic = waiter; 38 waiter->magic = waiter;
@@ -389,23 +41,23 @@ void debug_mutex_init_waiter(struct mutex_waiter *waiter)
389 41
390void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter) 42void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter)
391{ 43{
392 SMP_DEBUG_WARN_ON(!spin_is_locked(&lock->wait_lock)); 44 SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
393 DEBUG_WARN_ON(list_empty(&lock->wait_list)); 45 DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list));
394 DEBUG_WARN_ON(waiter->magic != waiter); 46 DEBUG_LOCKS_WARN_ON(waiter->magic != waiter);
395 DEBUG_WARN_ON(list_empty(&waiter->list)); 47 DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
396} 48}
397 49
398void debug_mutex_free_waiter(struct mutex_waiter *waiter) 50void debug_mutex_free_waiter(struct mutex_waiter *waiter)
399{ 51{
400 DEBUG_WARN_ON(!list_empty(&waiter->list)); 52 DEBUG_LOCKS_WARN_ON(!list_empty(&waiter->list));
401 memset(waiter, MUTEX_DEBUG_FREE, sizeof(*waiter)); 53 memset(waiter, MUTEX_DEBUG_FREE, sizeof(*waiter));
402} 54}
403 55
404void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, 56void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
405 struct thread_info *ti __IP_DECL__) 57 struct thread_info *ti)
406{ 58{
407 SMP_DEBUG_WARN_ON(!spin_is_locked(&lock->wait_lock)); 59 SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
408 check_deadlock(lock, 0, ti, ip); 60
409 /* Mark the current thread as blocked on the lock: */ 61 /* Mark the current thread as blocked on the lock: */
410 ti->task->blocked_on = waiter; 62 ti->task->blocked_on = waiter;
411 waiter->lock = lock; 63 waiter->lock = lock;
@@ -414,9 +66,9 @@ void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
414void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, 66void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
415 struct thread_info *ti) 67 struct thread_info *ti)
416{ 68{
417 DEBUG_WARN_ON(list_empty(&waiter->list)); 69 DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
418 DEBUG_WARN_ON(waiter->task != ti->task); 70 DEBUG_LOCKS_WARN_ON(waiter->task != ti->task);
419 DEBUG_WARN_ON(ti->task->blocked_on != waiter); 71 DEBUG_LOCKS_WARN_ON(ti->task->blocked_on != waiter);
420 ti->task->blocked_on = NULL; 72 ti->task->blocked_on = NULL;
421 73
422 list_del_init(&waiter->list); 74 list_del_init(&waiter->list);
@@ -425,24 +77,23 @@ void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
425 77
426void debug_mutex_unlock(struct mutex *lock) 78void debug_mutex_unlock(struct mutex *lock)
427{ 79{
428 DEBUG_WARN_ON(lock->magic != lock); 80 DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
429 DEBUG_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); 81 DEBUG_LOCKS_WARN_ON(lock->magic != lock);
430 DEBUG_WARN_ON(lock->owner != current_thread_info()); 82 DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
431 if (debug_mutex_on) { 83 DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
432 DEBUG_WARN_ON(list_empty(&lock->held_list));
433 list_del_init(&lock->held_list);
434 }
435} 84}
436 85
437void debug_mutex_init(struct mutex *lock, const char *name) 86void debug_mutex_init(struct mutex *lock, const char *name,
87 struct lock_class_key *key)
438{ 88{
89#ifdef CONFIG_DEBUG_LOCK_ALLOC
439 /* 90 /*
440 * Make sure we are not reinitializing a held lock: 91 * Make sure we are not reinitializing a held lock:
441 */ 92 */
442 mutex_debug_check_no_locks_freed((void *)lock, sizeof(*lock)); 93 debug_check_no_locks_freed((void *)lock, sizeof(*lock));
94 lockdep_init_map(&lock->dep_map, name, key);
95#endif
443 lock->owner = NULL; 96 lock->owner = NULL;
444 INIT_LIST_HEAD(&lock->held_list);
445 lock->name = name;
446 lock->magic = lock; 97 lock->magic = lock;
447} 98}
448 99
@@ -456,7 +107,7 @@ void debug_mutex_init(struct mutex *lock, const char *name)
456 */ 107 */
457void fastcall mutex_destroy(struct mutex *lock) 108void fastcall mutex_destroy(struct mutex *lock)
458{ 109{
459 DEBUG_WARN_ON(mutex_is_locked(lock)); 110 DEBUG_LOCKS_WARN_ON(mutex_is_locked(lock));
460 lock->magic = NULL; 111 lock->magic = NULL;
461} 112}
462 113
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h
index a5196c36a5fd..babfbdfc534b 100644
--- a/kernel/mutex-debug.h
+++ b/kernel/mutex-debug.h
@@ -10,110 +10,44 @@
10 * More details are in kernel/mutex-debug.c. 10 * More details are in kernel/mutex-debug.c.
11 */ 11 */
12 12
13extern spinlock_t debug_mutex_lock;
14extern struct list_head debug_mutex_held_locks;
15extern int debug_mutex_on;
16
17/*
18 * In the debug case we carry the caller's instruction pointer into
19 * other functions, but we dont want the function argument overhead
20 * in the nondebug case - hence these macros:
21 */
22#define __IP_DECL__ , unsigned long ip
23#define __IP__ , ip
24#define __RET_IP__ , (unsigned long)__builtin_return_address(0)
25
26/* 13/*
27 * This must be called with lock->wait_lock held. 14 * This must be called with lock->wait_lock held.
28 */ 15 */
29extern void debug_mutex_set_owner(struct mutex *lock, 16extern void
30 struct thread_info *new_owner __IP_DECL__); 17debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner);
31 18
32static inline void debug_mutex_clear_owner(struct mutex *lock) 19static inline void debug_mutex_clear_owner(struct mutex *lock)
33{ 20{
34 lock->owner = NULL; 21 lock->owner = NULL;
35} 22}
36 23
37extern void debug_mutex_init_waiter(struct mutex_waiter *waiter); 24extern void debug_mutex_lock_common(struct mutex *lock,
25 struct mutex_waiter *waiter);
38extern void debug_mutex_wake_waiter(struct mutex *lock, 26extern void debug_mutex_wake_waiter(struct mutex *lock,
39 struct mutex_waiter *waiter); 27 struct mutex_waiter *waiter);
40extern void debug_mutex_free_waiter(struct mutex_waiter *waiter); 28extern void debug_mutex_free_waiter(struct mutex_waiter *waiter);
41extern void debug_mutex_add_waiter(struct mutex *lock, 29extern void debug_mutex_add_waiter(struct mutex *lock,
42 struct mutex_waiter *waiter, 30 struct mutex_waiter *waiter,
43 struct thread_info *ti __IP_DECL__); 31 struct thread_info *ti);
44extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, 32extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
45 struct thread_info *ti); 33 struct thread_info *ti);
46extern void debug_mutex_unlock(struct mutex *lock); 34extern void debug_mutex_unlock(struct mutex *lock);
47extern void debug_mutex_init(struct mutex *lock, const char *name); 35extern void debug_mutex_init(struct mutex *lock, const char *name,
48 36 struct lock_class_key *key);
49#define debug_spin_lock_save(lock, flags) \
50 do { \
51 local_irq_save(flags); \
52 if (debug_mutex_on) \
53 spin_lock(lock); \
54 } while (0)
55
56#define debug_spin_unlock_restore(lock, flags) \
57 do { \
58 if (debug_mutex_on) \
59 spin_unlock(lock); \
60 local_irq_restore(flags); \
61 preempt_check_resched(); \
62 } while (0)
63 37
64#define spin_lock_mutex(lock, flags) \ 38#define spin_lock_mutex(lock, flags) \
65 do { \ 39 do { \
66 struct mutex *l = container_of(lock, struct mutex, wait_lock); \ 40 struct mutex *l = container_of(lock, struct mutex, wait_lock); \
67 \ 41 \
68 DEBUG_WARN_ON(in_interrupt()); \ 42 DEBUG_LOCKS_WARN_ON(in_interrupt()); \
69 debug_spin_lock_save(&debug_mutex_lock, flags); \ 43 local_irq_save(flags); \
70 spin_lock(lock); \ 44 __raw_spin_lock(&(lock)->raw_lock); \
71 DEBUG_WARN_ON(l->magic != l); \ 45 DEBUG_LOCKS_WARN_ON(l->magic != l); \
72 } while (0) 46 } while (0)
73 47
74#define spin_unlock_mutex(lock, flags) \ 48#define spin_unlock_mutex(lock, flags) \
75 do { \ 49 do { \
76 spin_unlock(lock); \ 50 __raw_spin_unlock(&(lock)->raw_lock); \
77 debug_spin_unlock_restore(&debug_mutex_lock, flags); \ 51 local_irq_restore(flags); \
52 preempt_check_resched(); \
78 } while (0) 53 } while (0)
79
80#define DEBUG_OFF() \
81do { \
82 if (debug_mutex_on) { \
83 debug_mutex_on = 0; \
84 console_verbose(); \
85 if (spin_is_locked(&debug_mutex_lock)) \
86 spin_unlock(&debug_mutex_lock); \
87 } \
88} while (0)
89
90#define DEBUG_BUG() \
91do { \
92 if (debug_mutex_on) { \
93 DEBUG_OFF(); \
94 BUG(); \
95 } \
96} while (0)
97
98#define DEBUG_WARN_ON(c) \
99do { \
100 if (unlikely(c && debug_mutex_on)) { \
101 DEBUG_OFF(); \
102 WARN_ON(1); \
103 } \
104} while (0)
105
106# define DEBUG_BUG_ON(c) \
107do { \
108 if (unlikely(c)) \
109 DEBUG_BUG(); \
110} while (0)
111
112#ifdef CONFIG_SMP
113# define SMP_DEBUG_WARN_ON(c) DEBUG_WARN_ON(c)
114# define SMP_DEBUG_BUG_ON(c) DEBUG_BUG_ON(c)
115#else
116# define SMP_DEBUG_WARN_ON(c) do { } while (0)
117# define SMP_DEBUG_BUG_ON(c) do { } while (0)
118#endif
119
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 7043db21bbce..8c71cf72a497 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -17,6 +17,7 @@
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/spinlock.h> 18#include <linux/spinlock.h>
19#include <linux/interrupt.h> 19#include <linux/interrupt.h>
20#include <linux/debug_locks.h>
20 21
21/* 22/*
22 * In the DEBUG case we are using the "NULL fastpath" for mutexes, 23 * In the DEBUG case we are using the "NULL fastpath" for mutexes,
@@ -38,13 +39,14 @@
38 * 39 *
39 * It is not allowed to initialize an already locked mutex. 40 * It is not allowed to initialize an already locked mutex.
40 */ 41 */
41void fastcall __mutex_init(struct mutex *lock, const char *name) 42void
43__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
42{ 44{
43 atomic_set(&lock->count, 1); 45 atomic_set(&lock->count, 1);
44 spin_lock_init(&lock->wait_lock); 46 spin_lock_init(&lock->wait_lock);
45 INIT_LIST_HEAD(&lock->wait_list); 47 INIT_LIST_HEAD(&lock->wait_list);
46 48
47 debug_mutex_init(lock, name); 49 debug_mutex_init(lock, name, key);
48} 50}
49 51
50EXPORT_SYMBOL(__mutex_init); 52EXPORT_SYMBOL(__mutex_init);
@@ -56,7 +58,7 @@ EXPORT_SYMBOL(__mutex_init);
56 * branch is predicted by the CPU as default-untaken. 58 * branch is predicted by the CPU as default-untaken.
57 */ 59 */
58static void fastcall noinline __sched 60static void fastcall noinline __sched
59__mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__); 61__mutex_lock_slowpath(atomic_t *lock_count);
60 62
61/*** 63/***
62 * mutex_lock - acquire the mutex 64 * mutex_lock - acquire the mutex
@@ -79,7 +81,7 @@ __mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__);
79 * 81 *
80 * This function is similar to (but not equivalent to) down(). 82 * This function is similar to (but not equivalent to) down().
81 */ 83 */
82void fastcall __sched mutex_lock(struct mutex *lock) 84void inline fastcall __sched mutex_lock(struct mutex *lock)
83{ 85{
84 might_sleep(); 86 might_sleep();
85 /* 87 /*
@@ -92,7 +94,7 @@ void fastcall __sched mutex_lock(struct mutex *lock)
92EXPORT_SYMBOL(mutex_lock); 94EXPORT_SYMBOL(mutex_lock);
93 95
94static void fastcall noinline __sched 96static void fastcall noinline __sched
95__mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__); 97__mutex_unlock_slowpath(atomic_t *lock_count);
96 98
97/*** 99/***
98 * mutex_unlock - release the mutex 100 * mutex_unlock - release the mutex
@@ -120,18 +122,18 @@ EXPORT_SYMBOL(mutex_unlock);
120 * Lock a mutex (possibly interruptible), slowpath: 122 * Lock a mutex (possibly interruptible), slowpath:
121 */ 123 */
122static inline int __sched 124static inline int __sched
123__mutex_lock_common(struct mutex *lock, long state __IP_DECL__) 125__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass)
124{ 126{
125 struct task_struct *task = current; 127 struct task_struct *task = current;
126 struct mutex_waiter waiter; 128 struct mutex_waiter waiter;
127 unsigned int old_val; 129 unsigned int old_val;
128 unsigned long flags; 130 unsigned long flags;
129 131
130 debug_mutex_init_waiter(&waiter);
131
132 spin_lock_mutex(&lock->wait_lock, flags); 132 spin_lock_mutex(&lock->wait_lock, flags);
133 133
134 debug_mutex_add_waiter(lock, &waiter, task->thread_info, ip); 134 debug_mutex_lock_common(lock, &waiter);
135 mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
136 debug_mutex_add_waiter(lock, &waiter, task->thread_info);
135 137
136 /* add waiting tasks to the end of the waitqueue (FIFO): */ 138 /* add waiting tasks to the end of the waitqueue (FIFO): */
137 list_add_tail(&waiter.list, &lock->wait_list); 139 list_add_tail(&waiter.list, &lock->wait_list);
@@ -158,6 +160,7 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
158 if (unlikely(state == TASK_INTERRUPTIBLE && 160 if (unlikely(state == TASK_INTERRUPTIBLE &&
159 signal_pending(task))) { 161 signal_pending(task))) {
160 mutex_remove_waiter(lock, &waiter, task->thread_info); 162 mutex_remove_waiter(lock, &waiter, task->thread_info);
163 mutex_release(&lock->dep_map, 1, _RET_IP_);
161 spin_unlock_mutex(&lock->wait_lock, flags); 164 spin_unlock_mutex(&lock->wait_lock, flags);
162 165
163 debug_mutex_free_waiter(&waiter); 166 debug_mutex_free_waiter(&waiter);
@@ -173,7 +176,7 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
173 176
174 /* got the lock - rejoice! */ 177 /* got the lock - rejoice! */
175 mutex_remove_waiter(lock, &waiter, task->thread_info); 178 mutex_remove_waiter(lock, &waiter, task->thread_info);
176 debug_mutex_set_owner(lock, task->thread_info __IP__); 179 debug_mutex_set_owner(lock, task->thread_info);
177 180
178 /* set it to 0 if there are no waiters left: */ 181 /* set it to 0 if there are no waiters left: */
179 if (likely(list_empty(&lock->wait_list))) 182 if (likely(list_empty(&lock->wait_list)))
@@ -183,32 +186,40 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
183 186
184 debug_mutex_free_waiter(&waiter); 187 debug_mutex_free_waiter(&waiter);
185 188
186 DEBUG_WARN_ON(list_empty(&lock->held_list));
187 DEBUG_WARN_ON(lock->owner != task->thread_info);
188
189 return 0; 189 return 0;
190} 190}
191 191
192static void fastcall noinline __sched 192static void fastcall noinline __sched
193__mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__) 193__mutex_lock_slowpath(atomic_t *lock_count)
194{ 194{
195 struct mutex *lock = container_of(lock_count, struct mutex, count); 195 struct mutex *lock = container_of(lock_count, struct mutex, count);
196 196
197 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE __IP__); 197 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0);
198}
199
200#ifdef CONFIG_DEBUG_LOCK_ALLOC
201void __sched
202mutex_lock_nested(struct mutex *lock, unsigned int subclass)
203{
204 might_sleep();
205 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass);
198} 206}
199 207
208EXPORT_SYMBOL_GPL(mutex_lock_nested);
209#endif
210
200/* 211/*
201 * Release the lock, slowpath: 212 * Release the lock, slowpath:
202 */ 213 */
203static fastcall noinline void 214static fastcall inline void
204__mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__) 215__mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
205{ 216{
206 struct mutex *lock = container_of(lock_count, struct mutex, count); 217 struct mutex *lock = container_of(lock_count, struct mutex, count);
207 unsigned long flags; 218 unsigned long flags;
208 219
209 DEBUG_WARN_ON(lock->owner != current_thread_info());
210
211 spin_lock_mutex(&lock->wait_lock, flags); 220 spin_lock_mutex(&lock->wait_lock, flags);
221 mutex_release(&lock->dep_map, nested, _RET_IP_);
222 debug_mutex_unlock(lock);
212 223
213 /* 224 /*
214 * some architectures leave the lock unlocked in the fastpath failure 225 * some architectures leave the lock unlocked in the fastpath failure
@@ -218,8 +229,6 @@ __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__)
218 if (__mutex_slowpath_needs_to_unlock()) 229 if (__mutex_slowpath_needs_to_unlock())
219 atomic_set(&lock->count, 1); 230 atomic_set(&lock->count, 1);
220 231
221 debug_mutex_unlock(lock);
222
223 if (!list_empty(&lock->wait_list)) { 232 if (!list_empty(&lock->wait_list)) {
224 /* get the first entry from the wait-list: */ 233 /* get the first entry from the wait-list: */
225 struct mutex_waiter *waiter = 234 struct mutex_waiter *waiter =
@@ -237,11 +246,20 @@ __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__)
237} 246}
238 247
239/* 248/*
249 * Release the lock, slowpath:
250 */
251static fastcall noinline void
252__mutex_unlock_slowpath(atomic_t *lock_count)
253{
254 __mutex_unlock_common_slowpath(lock_count, 1);
255}
256
257/*
240 * Here come the less common (and hence less performance-critical) APIs: 258 * Here come the less common (and hence less performance-critical) APIs:
241 * mutex_lock_interruptible() and mutex_trylock(). 259 * mutex_lock_interruptible() and mutex_trylock().
242 */ 260 */
243static int fastcall noinline __sched 261static int fastcall noinline __sched
244__mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__); 262__mutex_lock_interruptible_slowpath(atomic_t *lock_count);
245 263
246/*** 264/***
247 * mutex_lock_interruptible - acquire the mutex, interruptable 265 * mutex_lock_interruptible - acquire the mutex, interruptable
@@ -264,11 +282,11 @@ int fastcall __sched mutex_lock_interruptible(struct mutex *lock)
264EXPORT_SYMBOL(mutex_lock_interruptible); 282EXPORT_SYMBOL(mutex_lock_interruptible);
265 283
266static int fastcall noinline __sched 284static int fastcall noinline __sched
267__mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__) 285__mutex_lock_interruptible_slowpath(atomic_t *lock_count)
268{ 286{
269 struct mutex *lock = container_of(lock_count, struct mutex, count); 287 struct mutex *lock = container_of(lock_count, struct mutex, count);
270 288
271 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE __IP__); 289 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0);
272} 290}
273 291
274/* 292/*
@@ -284,8 +302,10 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
284 spin_lock_mutex(&lock->wait_lock, flags); 302 spin_lock_mutex(&lock->wait_lock, flags);
285 303
286 prev = atomic_xchg(&lock->count, -1); 304 prev = atomic_xchg(&lock->count, -1);
287 if (likely(prev == 1)) 305 if (likely(prev == 1)) {
288 debug_mutex_set_owner(lock, current_thread_info() __RET_IP__); 306 debug_mutex_set_owner(lock, current_thread_info());
307 mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
308 }
289 /* Set it back to 0 if there are no waiters: */ 309 /* Set it back to 0 if there are no waiters: */
290 if (likely(list_empty(&lock->wait_list))) 310 if (likely(list_empty(&lock->wait_list)))
291 atomic_set(&lock->count, 0); 311 atomic_set(&lock->count, 0);
@@ -309,7 +329,7 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
309 * This function must not be used in interrupt context. The 329 * This function must not be used in interrupt context. The
310 * mutex must be released by the same task that acquired it. 330 * mutex must be released by the same task that acquired it.
311 */ 331 */
312int fastcall mutex_trylock(struct mutex *lock) 332int fastcall __sched mutex_trylock(struct mutex *lock)
313{ 333{
314 return __mutex_fastpath_trylock(&lock->count, 334 return __mutex_fastpath_trylock(&lock->count,
315 __mutex_trylock_slowpath); 335 __mutex_trylock_slowpath);
diff --git a/kernel/mutex.h b/kernel/mutex.h
index 069189947257..a075dafbb290 100644
--- a/kernel/mutex.h
+++ b/kernel/mutex.h
@@ -16,22 +16,15 @@
16#define mutex_remove_waiter(lock, waiter, ti) \ 16#define mutex_remove_waiter(lock, waiter, ti) \
17 __list_del((waiter)->list.prev, (waiter)->list.next) 17 __list_del((waiter)->list.prev, (waiter)->list.next)
18 18
19#define DEBUG_WARN_ON(c) do { } while (0)
20#define debug_mutex_set_owner(lock, new_owner) do { } while (0) 19#define debug_mutex_set_owner(lock, new_owner) do { } while (0)
21#define debug_mutex_clear_owner(lock) do { } while (0) 20#define debug_mutex_clear_owner(lock) do { } while (0)
22#define debug_mutex_init_waiter(waiter) do { } while (0)
23#define debug_mutex_wake_waiter(lock, waiter) do { } while (0) 21#define debug_mutex_wake_waiter(lock, waiter) do { } while (0)
24#define debug_mutex_free_waiter(waiter) do { } while (0) 22#define debug_mutex_free_waiter(waiter) do { } while (0)
25#define debug_mutex_add_waiter(lock, waiter, ti, ip) do { } while (0) 23#define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0)
26#define debug_mutex_unlock(lock) do { } while (0) 24#define debug_mutex_unlock(lock) do { } while (0)
27#define debug_mutex_init(lock, name) do { } while (0) 25#define debug_mutex_init(lock, name, key) do { } while (0)
28
29/*
30 * Return-address parameters/declarations. They are very useful for
31 * debugging, but add overhead in the !DEBUG case - so we go the
32 * trouble of using this not too elegant but zero-cost solution:
33 */
34#define __IP_DECL__
35#define __IP__
36#define __RET_IP__
37 26
27static inline void
28debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
29{
30}
diff --git a/kernel/pid.c b/kernel/pid.c
index eeb836b65ca4..93e212f20671 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -218,7 +218,7 @@ struct pid * fastcall find_pid(int nr)
218 return NULL; 218 return NULL;
219} 219}
220 220
221int fastcall attach_pid(task_t *task, enum pid_type type, int nr) 221int fastcall attach_pid(struct task_struct *task, enum pid_type type, int nr)
222{ 222{
223 struct pid_link *link; 223 struct pid_link *link;
224 struct pid *pid; 224 struct pid *pid;
@@ -233,7 +233,7 @@ int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
233 return 0; 233 return 0;
234} 234}
235 235
236void fastcall detach_pid(task_t *task, enum pid_type type) 236void fastcall detach_pid(struct task_struct *task, enum pid_type type)
237{ 237{
238 struct pid_link *link; 238 struct pid_link *link;
239 struct pid *pid; 239 struct pid *pid;
@@ -267,7 +267,7 @@ struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type)
267/* 267/*
268 * Must be called under rcu_read_lock() or with tasklist_lock read-held. 268 * Must be called under rcu_read_lock() or with tasklist_lock read-held.
269 */ 269 */
270task_t *find_task_by_pid_type(int type, int nr) 270struct task_struct *find_task_by_pid_type(int type, int nr)
271{ 271{
272 return pid_task(find_pid(nr), type); 272 return pid_task(find_pid(nr), type);
273} 273}
diff --git a/kernel/printk.c b/kernel/printk.c
index 39ae24d2a415..bdba5d80496c 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -518,7 +518,9 @@ asmlinkage int vprintk(const char *fmt, va_list args)
518 zap_locks(); 518 zap_locks();
519 519
520 /* This stops the holder of console_sem just where we want him */ 520 /* This stops the holder of console_sem just where we want him */
521 spin_lock_irqsave(&logbuf_lock, flags); 521 local_irq_save(flags);
522 lockdep_off();
523 spin_lock(&logbuf_lock);
522 printk_cpu = smp_processor_id(); 524 printk_cpu = smp_processor_id();
523 525
524 /* Emit the output into the temporary buffer */ 526 /* Emit the output into the temporary buffer */
@@ -588,7 +590,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
588 */ 590 */
589 console_locked = 1; 591 console_locked = 1;
590 printk_cpu = UINT_MAX; 592 printk_cpu = UINT_MAX;
591 spin_unlock_irqrestore(&logbuf_lock, flags); 593 spin_unlock(&logbuf_lock);
592 594
593 /* 595 /*
594 * Console drivers may assume that per-cpu resources have 596 * Console drivers may assume that per-cpu resources have
@@ -604,6 +606,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
604 console_locked = 0; 606 console_locked = 0;
605 up(&console_sem); 607 up(&console_sem);
606 } 608 }
609 lockdep_on();
610 local_irq_restore(flags);
607 } else { 611 } else {
608 /* 612 /*
609 * Someone else owns the drivers. We drop the spinlock, which 613 * Someone else owns the drivers. We drop the spinlock, which
@@ -611,7 +615,9 @@ asmlinkage int vprintk(const char *fmt, va_list args)
611 * console drivers with the output which we just produced. 615 * console drivers with the output which we just produced.
612 */ 616 */
613 printk_cpu = UINT_MAX; 617 printk_cpu = UINT_MAX;
614 spin_unlock_irqrestore(&logbuf_lock, flags); 618 spin_unlock(&logbuf_lock);
619 lockdep_on();
620 local_irq_restore(flags);
615 } 621 }
616 622
617 preempt_enable(); 623 preempt_enable();
@@ -809,8 +815,15 @@ void release_console_sem(void)
809 console_may_schedule = 0; 815 console_may_schedule = 0;
810 up(&console_sem); 816 up(&console_sem);
811 spin_unlock_irqrestore(&logbuf_lock, flags); 817 spin_unlock_irqrestore(&logbuf_lock, flags);
812 if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) 818 if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) {
813 wake_up_interruptible(&log_wait); 819 /*
820 * If we printk from within the lock dependency code,
821 * from within the scheduler code, then do not lock
822 * up due to self-recursion:
823 */
824 if (!lockdep_internal())
825 wake_up_interruptible(&log_wait);
826 }
814} 827}
815EXPORT_SYMBOL(release_console_sem); 828EXPORT_SYMBOL(release_console_sem);
816 829
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 335c5b932e14..9a111f70145c 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -28,7 +28,7 @@
28 * 28 *
29 * Must be called with the tasklist lock write-held. 29 * Must be called with the tasklist lock write-held.
30 */ 30 */
31void __ptrace_link(task_t *child, task_t *new_parent) 31void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
32{ 32{
33 BUG_ON(!list_empty(&child->ptrace_list)); 33 BUG_ON(!list_empty(&child->ptrace_list));
34 if (child->parent == new_parent) 34 if (child->parent == new_parent)
@@ -46,7 +46,7 @@ void __ptrace_link(task_t *child, task_t *new_parent)
46 * TASK_TRACED, resume it now. 46 * TASK_TRACED, resume it now.
47 * Requires that irqs be disabled. 47 * Requires that irqs be disabled.
48 */ 48 */
49void ptrace_untrace(task_t *child) 49void ptrace_untrace(struct task_struct *child)
50{ 50{
51 spin_lock(&child->sighand->siglock); 51 spin_lock(&child->sighand->siglock);
52 if (child->state == TASK_TRACED) { 52 if (child->state == TASK_TRACED) {
@@ -65,7 +65,7 @@ void ptrace_untrace(task_t *child)
65 * 65 *
66 * Must be called with the tasklist lock write-held. 66 * Must be called with the tasklist lock write-held.
67 */ 67 */
68void __ptrace_unlink(task_t *child) 68void __ptrace_unlink(struct task_struct *child)
69{ 69{
70 BUG_ON(!child->ptrace); 70 BUG_ON(!child->ptrace);
71 71
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index f464f5ae3f11..759805c9859a 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -53,13 +53,13 @@
53static struct rcu_ctrlblk rcu_ctrlblk = { 53static struct rcu_ctrlblk rcu_ctrlblk = {
54 .cur = -300, 54 .cur = -300,
55 .completed = -300, 55 .completed = -300,
56 .lock = SPIN_LOCK_UNLOCKED, 56 .lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
57 .cpumask = CPU_MASK_NONE, 57 .cpumask = CPU_MASK_NONE,
58}; 58};
59static struct rcu_ctrlblk rcu_bh_ctrlblk = { 59static struct rcu_ctrlblk rcu_bh_ctrlblk = {
60 .cur = -300, 60 .cur = -300,
61 .completed = -300, 61 .completed = -300,
62 .lock = SPIN_LOCK_UNLOCKED, 62 .lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
63 .cpumask = CPU_MASK_NONE, 63 .cpumask = CPU_MASK_NONE,
64}; 64};
65 65
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index 4aa8a2c9f453..0c1faa950af7 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -26,6 +26,7 @@
26#include <linux/interrupt.h> 26#include <linux/interrupt.h>
27#include <linux/plist.h> 27#include <linux/plist.h>
28#include <linux/fs.h> 28#include <linux/fs.h>
29#include <linux/debug_locks.h>
29 30
30#include "rtmutex_common.h" 31#include "rtmutex_common.h"
31 32
@@ -45,8 +46,6 @@ do { \
45 console_verbose(); \ 46 console_verbose(); \
46 if (spin_is_locked(&current->pi_lock)) \ 47 if (spin_is_locked(&current->pi_lock)) \
47 spin_unlock(&current->pi_lock); \ 48 spin_unlock(&current->pi_lock); \
48 if (spin_is_locked(&current->held_list_lock)) \
49 spin_unlock(&current->held_list_lock); \
50 } \ 49 } \
51} while (0) 50} while (0)
52 51
@@ -97,7 +96,7 @@ void deadlock_trace_off(void)
97 rt_trace_on = 0; 96 rt_trace_on = 0;
98} 97}
99 98
100static void printk_task(task_t *p) 99static void printk_task(struct task_struct *p)
101{ 100{
102 if (p) 101 if (p)
103 printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio); 102 printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio);
@@ -105,14 +104,6 @@ static void printk_task(task_t *p)
105 printk("<none>"); 104 printk("<none>");
106} 105}
107 106
108static void printk_task_short(task_t *p)
109{
110 if (p)
111 printk("%s/%d [%p, %3d]", p->comm, p->pid, p, p->prio);
112 else
113 printk("<none>");
114}
115
116static void printk_lock(struct rt_mutex *lock, int print_owner) 107static void printk_lock(struct rt_mutex *lock, int print_owner)
117{ 108{
118 if (lock->name) 109 if (lock->name)
@@ -128,222 +119,6 @@ static void printk_lock(struct rt_mutex *lock, int print_owner)
128 printk_task(rt_mutex_owner(lock)); 119 printk_task(rt_mutex_owner(lock));
129 printk("\n"); 120 printk("\n");
130 } 121 }
131 if (rt_mutex_owner(lock)) {
132 printk("... acquired at: ");
133 print_symbol("%s\n", lock->acquire_ip);
134 }
135}
136
137static void printk_waiter(struct rt_mutex_waiter *w)
138{
139 printk("-------------------------\n");
140 printk("| waiter struct %p:\n", w);
141 printk("| w->list_entry: [DP:%p/%p|SP:%p/%p|PRI:%d]\n",
142 w->list_entry.plist.prio_list.prev, w->list_entry.plist.prio_list.next,
143 w->list_entry.plist.node_list.prev, w->list_entry.plist.node_list.next,
144 w->list_entry.prio);
145 printk("| w->pi_list_entry: [DP:%p/%p|SP:%p/%p|PRI:%d]\n",
146 w->pi_list_entry.plist.prio_list.prev, w->pi_list_entry.plist.prio_list.next,
147 w->pi_list_entry.plist.node_list.prev, w->pi_list_entry.plist.node_list.next,
148 w->pi_list_entry.prio);
149 printk("\n| lock:\n");
150 printk_lock(w->lock, 1);
151 printk("| w->ti->task:\n");
152 printk_task(w->task);
153 printk("| blocked at: ");
154 print_symbol("%s\n", w->ip);
155 printk("-------------------------\n");
156}
157
158static void show_task_locks(task_t *p)
159{
160 switch (p->state) {
161 case TASK_RUNNING: printk("R"); break;
162 case TASK_INTERRUPTIBLE: printk("S"); break;
163 case TASK_UNINTERRUPTIBLE: printk("D"); break;
164 case TASK_STOPPED: printk("T"); break;
165 case EXIT_ZOMBIE: printk("Z"); break;
166 case EXIT_DEAD: printk("X"); break;
167 default: printk("?"); break;
168 }
169 printk_task(p);
170 if (p->pi_blocked_on) {
171 struct rt_mutex *lock = p->pi_blocked_on->lock;
172
173 printk(" blocked on:");
174 printk_lock(lock, 1);
175 } else
176 printk(" (not blocked)\n");
177}
178
179void rt_mutex_show_held_locks(task_t *task, int verbose)
180{
181 struct list_head *curr, *cursor = NULL;
182 struct rt_mutex *lock;
183 task_t *t;
184 unsigned long flags;
185 int count = 0;
186
187 if (!rt_trace_on)
188 return;
189
190 if (verbose) {
191 printk("------------------------------\n");
192 printk("| showing all locks held by: | (");
193 printk_task_short(task);
194 printk("):\n");
195 printk("------------------------------\n");
196 }
197
198next:
199 spin_lock_irqsave(&task->held_list_lock, flags);
200 list_for_each(curr, &task->held_list_head) {
201 if (cursor && curr != cursor)
202 continue;
203 lock = list_entry(curr, struct rt_mutex, held_list_entry);
204 t = rt_mutex_owner(lock);
205 WARN_ON(t != task);
206 count++;
207 cursor = curr->next;
208 spin_unlock_irqrestore(&task->held_list_lock, flags);
209
210 printk("\n#%03d: ", count);
211 printk_lock(lock, 0);
212 goto next;
213 }
214 spin_unlock_irqrestore(&task->held_list_lock, flags);
215
216 printk("\n");
217}
218
219void rt_mutex_show_all_locks(void)
220{
221 task_t *g, *p;
222 int count = 10;
223 int unlock = 1;
224
225 printk("\n");
226 printk("----------------------\n");
227 printk("| showing all tasks: |\n");
228 printk("----------------------\n");
229
230 /*
231 * Here we try to get the tasklist_lock as hard as possible,
232 * if not successful after 2 seconds we ignore it (but keep
233 * trying). This is to enable a debug printout even if a
234 * tasklist_lock-holding task deadlocks or crashes.
235 */
236retry:
237 if (!read_trylock(&tasklist_lock)) {
238 if (count == 10)
239 printk("hm, tasklist_lock locked, retrying... ");
240 if (count) {
241 count--;
242 printk(" #%d", 10-count);
243 mdelay(200);
244 goto retry;
245 }
246 printk(" ignoring it.\n");
247 unlock = 0;
248 }
249 if (count != 10)
250 printk(" locked it.\n");
251
252 do_each_thread(g, p) {
253 show_task_locks(p);
254 if (!unlock)
255 if (read_trylock(&tasklist_lock))
256 unlock = 1;
257 } while_each_thread(g, p);
258
259 printk("\n");
260
261 printk("-----------------------------------------\n");
262 printk("| showing all locks held in the system: |\n");
263 printk("-----------------------------------------\n");
264
265 do_each_thread(g, p) {
266 rt_mutex_show_held_locks(p, 0);
267 if (!unlock)
268 if (read_trylock(&tasklist_lock))
269 unlock = 1;
270 } while_each_thread(g, p);
271
272
273 printk("=============================================\n\n");
274
275 if (unlock)
276 read_unlock(&tasklist_lock);
277}
278
279void rt_mutex_debug_check_no_locks_held(task_t *task)
280{
281 struct rt_mutex_waiter *w;
282 struct list_head *curr;
283 struct rt_mutex *lock;
284
285 if (!rt_trace_on)
286 return;
287 if (!rt_prio(task->normal_prio) && rt_prio(task->prio)) {
288 printk("BUG: PI priority boost leaked!\n");
289 printk_task(task);
290 printk("\n");
291 }
292 if (list_empty(&task->held_list_head))
293 return;
294
295 spin_lock(&task->pi_lock);
296 plist_for_each_entry(w, &task->pi_waiters, pi_list_entry) {
297 TRACE_OFF();
298
299 printk("hm, PI interest held at exit time? Task:\n");
300 printk_task(task);
301 printk_waiter(w);
302 return;
303 }
304 spin_unlock(&task->pi_lock);
305
306 list_for_each(curr, &task->held_list_head) {
307 lock = list_entry(curr, struct rt_mutex, held_list_entry);
308
309 printk("BUG: %s/%d, lock held at task exit time!\n",
310 task->comm, task->pid);
311 printk_lock(lock, 1);
312 if (rt_mutex_owner(lock) != task)
313 printk("exiting task is not even the owner??\n");
314 }
315}
316
317int rt_mutex_debug_check_no_locks_freed(const void *from, unsigned long len)
318{
319 const void *to = from + len;
320 struct list_head *curr;
321 struct rt_mutex *lock;
322 unsigned long flags;
323 void *lock_addr;
324
325 if (!rt_trace_on)
326 return 0;
327
328 spin_lock_irqsave(&current->held_list_lock, flags);
329 list_for_each(curr, &current->held_list_head) {
330 lock = list_entry(curr, struct rt_mutex, held_list_entry);
331 lock_addr = lock;
332 if (lock_addr < from || lock_addr >= to)
333 continue;
334 TRACE_OFF();
335
336 printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n",
337 current->comm, current->pid, lock, from, to);
338 dump_stack();
339 printk_lock(lock, 1);
340 if (rt_mutex_owner(lock) != current)
341 printk("freeing task is not even the owner??\n");
342 return 1;
343 }
344 spin_unlock_irqrestore(&current->held_list_lock, flags);
345
346 return 0;
347} 122}
348 123
349void rt_mutex_debug_task_free(struct task_struct *task) 124void rt_mutex_debug_task_free(struct task_struct *task)
@@ -395,85 +170,41 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter)
395 current->comm, current->pid); 170 current->comm, current->pid);
396 printk_lock(waiter->lock, 1); 171 printk_lock(waiter->lock, 1);
397 172
398 printk("... trying at: ");
399 print_symbol("%s\n", waiter->ip);
400
401 printk("\n2) %s/%d is blocked on this lock:\n", task->comm, task->pid); 173 printk("\n2) %s/%d is blocked on this lock:\n", task->comm, task->pid);
402 printk_lock(waiter->deadlock_lock, 1); 174 printk_lock(waiter->deadlock_lock, 1);
403 175
404 rt_mutex_show_held_locks(current, 1); 176 debug_show_held_locks(current);
405 rt_mutex_show_held_locks(task, 1); 177 debug_show_held_locks(task);
406 178
407 printk("\n%s/%d's [blocked] stackdump:\n\n", task->comm, task->pid); 179 printk("\n%s/%d's [blocked] stackdump:\n\n", task->comm, task->pid);
408 show_stack(task, NULL); 180 show_stack(task, NULL);
409 printk("\n%s/%d's [current] stackdump:\n\n", 181 printk("\n%s/%d's [current] stackdump:\n\n",
410 current->comm, current->pid); 182 current->comm, current->pid);
411 dump_stack(); 183 dump_stack();
412 rt_mutex_show_all_locks(); 184 debug_show_all_locks();
185
413 printk("[ turning off deadlock detection." 186 printk("[ turning off deadlock detection."
414 "Please report this trace. ]\n\n"); 187 "Please report this trace. ]\n\n");
415 local_irq_disable(); 188 local_irq_disable();
416} 189}
417 190
418void debug_rt_mutex_lock(struct rt_mutex *lock __IP_DECL__) 191void debug_rt_mutex_lock(struct rt_mutex *lock)
419{ 192{
420 unsigned long flags;
421
422 if (rt_trace_on) {
423 TRACE_WARN_ON_LOCKED(!list_empty(&lock->held_list_entry));
424
425 spin_lock_irqsave(&current->held_list_lock, flags);
426 list_add_tail(&lock->held_list_entry, &current->held_list_head);
427 spin_unlock_irqrestore(&current->held_list_lock, flags);
428
429 lock->acquire_ip = ip;
430 }
431} 193}
432 194
433void debug_rt_mutex_unlock(struct rt_mutex *lock) 195void debug_rt_mutex_unlock(struct rt_mutex *lock)
434{ 196{
435 unsigned long flags; 197 TRACE_WARN_ON_LOCKED(rt_mutex_owner(lock) != current);
436
437 if (rt_trace_on) {
438 TRACE_WARN_ON_LOCKED(rt_mutex_owner(lock) != current);
439 TRACE_WARN_ON_LOCKED(list_empty(&lock->held_list_entry));
440
441 spin_lock_irqsave(&current->held_list_lock, flags);
442 list_del_init(&lock->held_list_entry);
443 spin_unlock_irqrestore(&current->held_list_lock, flags);
444 }
445} 198}
446 199
447void debug_rt_mutex_proxy_lock(struct rt_mutex *lock, 200void
448 struct task_struct *powner __IP_DECL__) 201debug_rt_mutex_proxy_lock(struct rt_mutex *lock, struct task_struct *powner)
449{ 202{
450 unsigned long flags;
451
452 if (rt_trace_on) {
453 TRACE_WARN_ON_LOCKED(!list_empty(&lock->held_list_entry));
454
455 spin_lock_irqsave(&powner->held_list_lock, flags);
456 list_add_tail(&lock->held_list_entry, &powner->held_list_head);
457 spin_unlock_irqrestore(&powner->held_list_lock, flags);
458
459 lock->acquire_ip = ip;
460 }
461} 203}
462 204
463void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock) 205void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock)
464{ 206{
465 unsigned long flags; 207 TRACE_WARN_ON_LOCKED(!rt_mutex_owner(lock));
466
467 if (rt_trace_on) {
468 struct task_struct *owner = rt_mutex_owner(lock);
469
470 TRACE_WARN_ON_LOCKED(!owner);
471 TRACE_WARN_ON_LOCKED(list_empty(&lock->held_list_entry));
472
473 spin_lock_irqsave(&owner->held_list_lock, flags);
474 list_del_init(&lock->held_list_entry);
475 spin_unlock_irqrestore(&owner->held_list_lock, flags);
476 }
477} 208}
478 209
479void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) 210void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
@@ -493,17 +224,15 @@ void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
493 224
494void debug_rt_mutex_init(struct rt_mutex *lock, const char *name) 225void debug_rt_mutex_init(struct rt_mutex *lock, const char *name)
495{ 226{
496 void *addr = lock; 227 /*
497 228 * Make sure we are not reinitializing a held lock:
498 if (rt_trace_on) { 229 */
499 rt_mutex_debug_check_no_locks_freed(addr, 230 debug_check_no_locks_freed((void *)lock, sizeof(*lock));
500 sizeof(struct rt_mutex)); 231 lock->name = name;
501 INIT_LIST_HEAD(&lock->held_list_entry);
502 lock->name = name;
503 }
504} 232}
505 233
506void rt_mutex_deadlock_account_lock(struct rt_mutex *lock, task_t *task) 234void
235rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task)
507{ 236{
508} 237}
509 238
diff --git a/kernel/rtmutex-debug.h b/kernel/rtmutex-debug.h
index 7612fbc62d70..14193d596d78 100644
--- a/kernel/rtmutex-debug.h
+++ b/kernel/rtmutex-debug.h
@@ -9,20 +9,16 @@
9 * This file contains macros used solely by rtmutex.c. Debug version. 9 * This file contains macros used solely by rtmutex.c. Debug version.
10 */ 10 */
11 11
12#define __IP_DECL__ , unsigned long ip
13#define __IP__ , ip
14#define __RET_IP__ , (unsigned long)__builtin_return_address(0)
15
16extern void 12extern void
17rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task); 13rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task);
18extern void rt_mutex_deadlock_account_unlock(struct task_struct *task); 14extern void rt_mutex_deadlock_account_unlock(struct task_struct *task);
19extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); 15extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
20extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter); 16extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter);
21extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name); 17extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name);
22extern void debug_rt_mutex_lock(struct rt_mutex *lock __IP_DECL__); 18extern void debug_rt_mutex_lock(struct rt_mutex *lock);
23extern void debug_rt_mutex_unlock(struct rt_mutex *lock); 19extern void debug_rt_mutex_unlock(struct rt_mutex *lock);
24extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock, 20extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock,
25 struct task_struct *powner __IP_DECL__); 21 struct task_struct *powner);
26extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock); 22extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock);
27extern void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *waiter, 23extern void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *waiter,
28 struct rt_mutex *lock); 24 struct rt_mutex *lock);
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index e82c2f848249..494dac872a13 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -33,7 +33,7 @@ struct test_thread_data {
33}; 33};
34 34
35static struct test_thread_data thread_data[MAX_RT_TEST_THREADS]; 35static struct test_thread_data thread_data[MAX_RT_TEST_THREADS];
36static task_t *threads[MAX_RT_TEST_THREADS]; 36static struct task_struct *threads[MAX_RT_TEST_THREADS];
37static struct rt_mutex mutexes[MAX_RT_TEST_MUTEXES]; 37static struct rt_mutex mutexes[MAX_RT_TEST_MUTEXES];
38 38
39enum test_opcodes { 39enum test_opcodes {
@@ -361,8 +361,8 @@ static ssize_t sysfs_test_command(struct sys_device *dev, const char *buf,
361static ssize_t sysfs_test_status(struct sys_device *dev, char *buf) 361static ssize_t sysfs_test_status(struct sys_device *dev, char *buf)
362{ 362{
363 struct test_thread_data *td; 363 struct test_thread_data *td;
364 struct task_struct *tsk;
364 char *curr = buf; 365 char *curr = buf;
365 task_t *tsk;
366 int i; 366 int i;
367 367
368 td = container_of(dev, struct test_thread_data, sysdev); 368 td = container_of(dev, struct test_thread_data, sysdev);
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 45d61016da57..d2ef13b485e7 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -157,12 +157,11 @@ int max_lock_depth = 1024;
157 * Decreases task's usage by one - may thus free the task. 157 * Decreases task's usage by one - may thus free the task.
158 * Returns 0 or -EDEADLK. 158 * Returns 0 or -EDEADLK.
159 */ 159 */
160static int rt_mutex_adjust_prio_chain(task_t *task, 160static int rt_mutex_adjust_prio_chain(struct task_struct *task,
161 int deadlock_detect, 161 int deadlock_detect,
162 struct rt_mutex *orig_lock, 162 struct rt_mutex *orig_lock,
163 struct rt_mutex_waiter *orig_waiter, 163 struct rt_mutex_waiter *orig_waiter,
164 struct task_struct *top_task 164 struct task_struct *top_task)
165 __IP_DECL__)
166{ 165{
167 struct rt_mutex *lock; 166 struct rt_mutex *lock;
168 struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; 167 struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
@@ -283,6 +282,7 @@ static int rt_mutex_adjust_prio_chain(task_t *task,
283 spin_unlock_irqrestore(&task->pi_lock, flags); 282 spin_unlock_irqrestore(&task->pi_lock, flags);
284 out_put_task: 283 out_put_task:
285 put_task_struct(task); 284 put_task_struct(task);
285
286 return ret; 286 return ret;
287} 287}
288 288
@@ -357,7 +357,7 @@ static inline int try_to_steal_lock(struct rt_mutex *lock)
357 * 357 *
358 * Must be called with lock->wait_lock held. 358 * Must be called with lock->wait_lock held.
359 */ 359 */
360static int try_to_take_rt_mutex(struct rt_mutex *lock __IP_DECL__) 360static int try_to_take_rt_mutex(struct rt_mutex *lock)
361{ 361{
362 /* 362 /*
363 * We have to be careful here if the atomic speedups are 363 * We have to be careful here if the atomic speedups are
@@ -384,7 +384,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock __IP_DECL__)
384 return 0; 384 return 0;
385 385
386 /* We got the lock. */ 386 /* We got the lock. */
387 debug_rt_mutex_lock(lock __IP__); 387 debug_rt_mutex_lock(lock);
388 388
389 rt_mutex_set_owner(lock, current, 0); 389 rt_mutex_set_owner(lock, current, 0);
390 390
@@ -402,13 +402,12 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock __IP_DECL__)
402 */ 402 */
403static int task_blocks_on_rt_mutex(struct rt_mutex *lock, 403static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
404 struct rt_mutex_waiter *waiter, 404 struct rt_mutex_waiter *waiter,
405 int detect_deadlock 405 int detect_deadlock)
406 __IP_DECL__)
407{ 406{
407 struct task_struct *owner = rt_mutex_owner(lock);
408 struct rt_mutex_waiter *top_waiter = waiter; 408 struct rt_mutex_waiter *top_waiter = waiter;
409 task_t *owner = rt_mutex_owner(lock);
410 int boost = 0, res;
411 unsigned long flags; 409 unsigned long flags;
410 int boost = 0, res;
412 411
413 spin_lock_irqsave(&current->pi_lock, flags); 412 spin_lock_irqsave(&current->pi_lock, flags);
414 __rt_mutex_adjust_prio(current); 413 __rt_mutex_adjust_prio(current);
@@ -454,7 +453,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
454 spin_unlock(&lock->wait_lock); 453 spin_unlock(&lock->wait_lock);
455 454
456 res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, 455 res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
457 current __IP__); 456 current);
458 457
459 spin_lock(&lock->wait_lock); 458 spin_lock(&lock->wait_lock);
460 459
@@ -526,12 +525,12 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
526 * Must be called with lock->wait_lock held 525 * Must be called with lock->wait_lock held
527 */ 526 */
528static void remove_waiter(struct rt_mutex *lock, 527static void remove_waiter(struct rt_mutex *lock,
529 struct rt_mutex_waiter *waiter __IP_DECL__) 528 struct rt_mutex_waiter *waiter)
530{ 529{
531 int first = (waiter == rt_mutex_top_waiter(lock)); 530 int first = (waiter == rt_mutex_top_waiter(lock));
532 int boost = 0; 531 struct task_struct *owner = rt_mutex_owner(lock);
533 task_t *owner = rt_mutex_owner(lock);
534 unsigned long flags; 532 unsigned long flags;
533 int boost = 0;
535 534
536 spin_lock_irqsave(&current->pi_lock, flags); 535 spin_lock_irqsave(&current->pi_lock, flags);
537 plist_del(&waiter->list_entry, &lock->wait_list); 536 plist_del(&waiter->list_entry, &lock->wait_list);
@@ -568,7 +567,7 @@ static void remove_waiter(struct rt_mutex *lock,
568 567
569 spin_unlock(&lock->wait_lock); 568 spin_unlock(&lock->wait_lock);
570 569
571 rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current __IP__); 570 rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);
572 571
573 spin_lock(&lock->wait_lock); 572 spin_lock(&lock->wait_lock);
574} 573}
@@ -595,7 +594,7 @@ void rt_mutex_adjust_pi(struct task_struct *task)
595 get_task_struct(task); 594 get_task_struct(task);
596 spin_unlock_irqrestore(&task->pi_lock, flags); 595 spin_unlock_irqrestore(&task->pi_lock, flags);
597 596
598 rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task __RET_IP__); 597 rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
599} 598}
600 599
601/* 600/*
@@ -604,7 +603,7 @@ void rt_mutex_adjust_pi(struct task_struct *task)
604static int __sched 603static int __sched
605rt_mutex_slowlock(struct rt_mutex *lock, int state, 604rt_mutex_slowlock(struct rt_mutex *lock, int state,
606 struct hrtimer_sleeper *timeout, 605 struct hrtimer_sleeper *timeout,
607 int detect_deadlock __IP_DECL__) 606 int detect_deadlock)
608{ 607{
609 struct rt_mutex_waiter waiter; 608 struct rt_mutex_waiter waiter;
610 int ret = 0; 609 int ret = 0;
@@ -615,7 +614,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
615 spin_lock(&lock->wait_lock); 614 spin_lock(&lock->wait_lock);
616 615
617 /* Try to acquire the lock again: */ 616 /* Try to acquire the lock again: */
618 if (try_to_take_rt_mutex(lock __IP__)) { 617 if (try_to_take_rt_mutex(lock)) {
619 spin_unlock(&lock->wait_lock); 618 spin_unlock(&lock->wait_lock);
620 return 0; 619 return 0;
621 } 620 }
@@ -629,7 +628,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
629 628
630 for (;;) { 629 for (;;) {
631 /* Try to acquire the lock: */ 630 /* Try to acquire the lock: */
632 if (try_to_take_rt_mutex(lock __IP__)) 631 if (try_to_take_rt_mutex(lock))
633 break; 632 break;
634 633
635 /* 634 /*
@@ -653,7 +652,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
653 */ 652 */
654 if (!waiter.task) { 653 if (!waiter.task) {
655 ret = task_blocks_on_rt_mutex(lock, &waiter, 654 ret = task_blocks_on_rt_mutex(lock, &waiter,
656 detect_deadlock __IP__); 655 detect_deadlock);
657 /* 656 /*
658 * If we got woken up by the owner then start loop 657 * If we got woken up by the owner then start loop
659 * all over without going into schedule to try 658 * all over without going into schedule to try
@@ -680,7 +679,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
680 set_current_state(TASK_RUNNING); 679 set_current_state(TASK_RUNNING);
681 680
682 if (unlikely(waiter.task)) 681 if (unlikely(waiter.task))
683 remove_waiter(lock, &waiter __IP__); 682 remove_waiter(lock, &waiter);
684 683
685 /* 684 /*
686 * try_to_take_rt_mutex() sets the waiter bit 685 * try_to_take_rt_mutex() sets the waiter bit
@@ -711,7 +710,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
711 * Slow path try-lock function: 710 * Slow path try-lock function:
712 */ 711 */
713static inline int 712static inline int
714rt_mutex_slowtrylock(struct rt_mutex *lock __IP_DECL__) 713rt_mutex_slowtrylock(struct rt_mutex *lock)
715{ 714{
716 int ret = 0; 715 int ret = 0;
717 716
@@ -719,7 +718,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock __IP_DECL__)
719 718
720 if (likely(rt_mutex_owner(lock) != current)) { 719 if (likely(rt_mutex_owner(lock) != current)) {
721 720
722 ret = try_to_take_rt_mutex(lock __IP__); 721 ret = try_to_take_rt_mutex(lock);
723 /* 722 /*
724 * try_to_take_rt_mutex() sets the lock waiters 723 * try_to_take_rt_mutex() sets the lock waiters
725 * bit unconditionally. Clean this up. 724 * bit unconditionally. Clean this up.
@@ -769,13 +768,13 @@ rt_mutex_fastlock(struct rt_mutex *lock, int state,
769 int detect_deadlock, 768 int detect_deadlock,
770 int (*slowfn)(struct rt_mutex *lock, int state, 769 int (*slowfn)(struct rt_mutex *lock, int state,
771 struct hrtimer_sleeper *timeout, 770 struct hrtimer_sleeper *timeout,
772 int detect_deadlock __IP_DECL__)) 771 int detect_deadlock))
773{ 772{
774 if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) { 773 if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
775 rt_mutex_deadlock_account_lock(lock, current); 774 rt_mutex_deadlock_account_lock(lock, current);
776 return 0; 775 return 0;
777 } else 776 } else
778 return slowfn(lock, state, NULL, detect_deadlock __RET_IP__); 777 return slowfn(lock, state, NULL, detect_deadlock);
779} 778}
780 779
781static inline int 780static inline int
@@ -783,24 +782,24 @@ rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
783 struct hrtimer_sleeper *timeout, int detect_deadlock, 782 struct hrtimer_sleeper *timeout, int detect_deadlock,
784 int (*slowfn)(struct rt_mutex *lock, int state, 783 int (*slowfn)(struct rt_mutex *lock, int state,
785 struct hrtimer_sleeper *timeout, 784 struct hrtimer_sleeper *timeout,
786 int detect_deadlock __IP_DECL__)) 785 int detect_deadlock))
787{ 786{
788 if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) { 787 if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
789 rt_mutex_deadlock_account_lock(lock, current); 788 rt_mutex_deadlock_account_lock(lock, current);
790 return 0; 789 return 0;
791 } else 790 } else
792 return slowfn(lock, state, timeout, detect_deadlock __RET_IP__); 791 return slowfn(lock, state, timeout, detect_deadlock);
793} 792}
794 793
795static inline int 794static inline int
796rt_mutex_fasttrylock(struct rt_mutex *lock, 795rt_mutex_fasttrylock(struct rt_mutex *lock,
797 int (*slowfn)(struct rt_mutex *lock __IP_DECL__)) 796 int (*slowfn)(struct rt_mutex *lock))
798{ 797{
799 if (likely(rt_mutex_cmpxchg(lock, NULL, current))) { 798 if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
800 rt_mutex_deadlock_account_lock(lock, current); 799 rt_mutex_deadlock_account_lock(lock, current);
801 return 1; 800 return 1;
802 } 801 }
803 return slowfn(lock __RET_IP__); 802 return slowfn(lock);
804} 803}
805 804
806static inline void 805static inline void
@@ -948,7 +947,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
948 struct task_struct *proxy_owner) 947 struct task_struct *proxy_owner)
949{ 948{
950 __rt_mutex_init(lock, NULL); 949 __rt_mutex_init(lock, NULL);
951 debug_rt_mutex_proxy_lock(lock, proxy_owner __RET_IP__); 950 debug_rt_mutex_proxy_lock(lock, proxy_owner);
952 rt_mutex_set_owner(lock, proxy_owner, 0); 951 rt_mutex_set_owner(lock, proxy_owner, 0);
953 rt_mutex_deadlock_account_lock(lock, proxy_owner); 952 rt_mutex_deadlock_account_lock(lock, proxy_owner);
954} 953}
diff --git a/kernel/rtmutex.h b/kernel/rtmutex.h
index 1e0fca13ff72..a1a1dd06421d 100644
--- a/kernel/rtmutex.h
+++ b/kernel/rtmutex.h
@@ -10,9 +10,6 @@
10 * Non-debug version. 10 * Non-debug version.
11 */ 11 */
12 12
13#define __IP_DECL__
14#define __IP__
15#define __RET_IP__
16#define rt_mutex_deadlock_check(l) (0) 13#define rt_mutex_deadlock_check(l) (0)
17#define rt_mutex_deadlock_account_lock(m, t) do { } while (0) 14#define rt_mutex_deadlock_account_lock(m, t) do { } while (0)
18#define rt_mutex_deadlock_account_unlock(l) do { } while (0) 15#define rt_mutex_deadlock_account_unlock(l) do { } while (0)
diff --git a/kernel/rwsem.c b/kernel/rwsem.c
new file mode 100644
index 000000000000..291ded556aa0
--- /dev/null
+++ b/kernel/rwsem.c
@@ -0,0 +1,147 @@
1/* kernel/rwsem.c: R/W semaphores, public implementation
2 *
3 * Written by David Howells (dhowells@redhat.com).
4 * Derived from asm-i386/semaphore.h
5 */
6
7#include <linux/types.h>
8#include <linux/kernel.h>
9#include <linux/module.h>
10#include <linux/rwsem.h>
11
12#include <asm/system.h>
13#include <asm/atomic.h>
14
15/*
16 * lock for reading
17 */
18void down_read(struct rw_semaphore *sem)
19{
20 might_sleep();
21 rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
22
23 __down_read(sem);
24}
25
26EXPORT_SYMBOL(down_read);
27
28/*
29 * trylock for reading -- returns 1 if successful, 0 if contention
30 */
31int down_read_trylock(struct rw_semaphore *sem)
32{
33 int ret = __down_read_trylock(sem);
34
35 if (ret == 1)
36 rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
37 return ret;
38}
39
40EXPORT_SYMBOL(down_read_trylock);
41
42/*
43 * lock for writing
44 */
45void down_write(struct rw_semaphore *sem)
46{
47 might_sleep();
48 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
49
50 __down_write(sem);
51}
52
53EXPORT_SYMBOL(down_write);
54
55/*
56 * trylock for writing -- returns 1 if successful, 0 if contention
57 */
58int down_write_trylock(struct rw_semaphore *sem)
59{
60 int ret = __down_write_trylock(sem);
61
62 if (ret == 1)
63 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
64 return ret;
65}
66
67EXPORT_SYMBOL(down_write_trylock);
68
69/*
70 * release a read lock
71 */
72void up_read(struct rw_semaphore *sem)
73{
74 rwsem_release(&sem->dep_map, 1, _RET_IP_);
75
76 __up_read(sem);
77}
78
79EXPORT_SYMBOL(up_read);
80
81/*
82 * release a write lock
83 */
84void up_write(struct rw_semaphore *sem)
85{
86 rwsem_release(&sem->dep_map, 1, _RET_IP_);
87
88 __up_write(sem);
89}
90
91EXPORT_SYMBOL(up_write);
92
93/*
94 * downgrade write lock to read lock
95 */
96void downgrade_write(struct rw_semaphore *sem)
97{
98 /*
99 * lockdep: a downgraded write will live on as a write
100 * dependency.
101 */
102 __downgrade_write(sem);
103}
104
105EXPORT_SYMBOL(downgrade_write);
106
107#ifdef CONFIG_DEBUG_LOCK_ALLOC
108
109void down_read_nested(struct rw_semaphore *sem, int subclass)
110{
111 might_sleep();
112 rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
113
114 __down_read(sem);
115}
116
117EXPORT_SYMBOL(down_read_nested);
118
119void down_read_non_owner(struct rw_semaphore *sem)
120{
121 might_sleep();
122
123 __down_read(sem);
124}
125
126EXPORT_SYMBOL(down_read_non_owner);
127
128void down_write_nested(struct rw_semaphore *sem, int subclass)
129{
130 might_sleep();
131 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
132
133 __down_write_nested(sem, subclass);
134}
135
136EXPORT_SYMBOL(down_write_nested);
137
138void up_read_non_owner(struct rw_semaphore *sem)
139{
140 __up_read(sem);
141}
142
143EXPORT_SYMBOL(up_read_non_owner);
144
145#endif
146
147
diff --git a/kernel/sched.c b/kernel/sched.c
index d5e37072ea54..4ee400f9d56b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -30,6 +30,7 @@
30#include <linux/capability.h> 30#include <linux/capability.h>
31#include <linux/completion.h> 31#include <linux/completion.h>
32#include <linux/kernel_stat.h> 32#include <linux/kernel_stat.h>
33#include <linux/debug_locks.h>
33#include <linux/security.h> 34#include <linux/security.h>
34#include <linux/notifier.h> 35#include <linux/notifier.h>
35#include <linux/profile.h> 36#include <linux/profile.h>
@@ -178,20 +179,15 @@ static unsigned int static_prio_timeslice(int static_prio)
178 return SCALE_PRIO(DEF_TIMESLICE, static_prio); 179 return SCALE_PRIO(DEF_TIMESLICE, static_prio);
179} 180}
180 181
181static inline unsigned int task_timeslice(task_t *p) 182static inline unsigned int task_timeslice(struct task_struct *p)
182{ 183{
183 return static_prio_timeslice(p->static_prio); 184 return static_prio_timeslice(p->static_prio);
184} 185}
185 186
186#define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \
187 < (long long) (sd)->cache_hot_time)
188
189/* 187/*
190 * These are the runqueue data structures: 188 * These are the runqueue data structures:
191 */ 189 */
192 190
193typedef struct runqueue runqueue_t;
194
195struct prio_array { 191struct prio_array {
196 unsigned int nr_active; 192 unsigned int nr_active;
197 DECLARE_BITMAP(bitmap, MAX_PRIO+1); /* include 1 bit for delimiter */ 193 DECLARE_BITMAP(bitmap, MAX_PRIO+1); /* include 1 bit for delimiter */
@@ -205,7 +201,7 @@ struct prio_array {
205 * (such as the load balancing or the thread migration code), lock 201 * (such as the load balancing or the thread migration code), lock
206 * acquire operations must be ordered by ascending &runqueue. 202 * acquire operations must be ordered by ascending &runqueue.
207 */ 203 */
208struct runqueue { 204struct rq {
209 spinlock_t lock; 205 spinlock_t lock;
210 206
211 /* 207 /*
@@ -229,9 +225,9 @@ struct runqueue {
229 225
230 unsigned long expired_timestamp; 226 unsigned long expired_timestamp;
231 unsigned long long timestamp_last_tick; 227 unsigned long long timestamp_last_tick;
232 task_t *curr, *idle; 228 struct task_struct *curr, *idle;
233 struct mm_struct *prev_mm; 229 struct mm_struct *prev_mm;
234 prio_array_t *active, *expired, arrays[2]; 230 struct prio_array *active, *expired, arrays[2];
235 int best_expired_prio; 231 int best_expired_prio;
236 atomic_t nr_iowait; 232 atomic_t nr_iowait;
237 233
@@ -242,7 +238,7 @@ struct runqueue {
242 int active_balance; 238 int active_balance;
243 int push_cpu; 239 int push_cpu;
244 240
245 task_t *migration_thread; 241 struct task_struct *migration_thread;
246 struct list_head migration_queue; 242 struct list_head migration_queue;
247#endif 243#endif
248 244
@@ -265,9 +261,10 @@ struct runqueue {
265 unsigned long ttwu_cnt; 261 unsigned long ttwu_cnt;
266 unsigned long ttwu_local; 262 unsigned long ttwu_local;
267#endif 263#endif
264 struct lock_class_key rq_lock_key;
268}; 265};
269 266
270static DEFINE_PER_CPU(struct runqueue, runqueues); 267static DEFINE_PER_CPU(struct rq, runqueues);
271 268
272/* 269/*
273 * The domain tree (rq->sd) is protected by RCU's quiescent state transition. 270 * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
@@ -276,8 +273,8 @@ static DEFINE_PER_CPU(struct runqueue, runqueues);
276 * The domain tree of any CPU may only be accessed from within 273 * The domain tree of any CPU may only be accessed from within
277 * preempt-disabled sections. 274 * preempt-disabled sections.
278 */ 275 */
279#define for_each_domain(cpu, domain) \ 276#define for_each_domain(cpu, __sd) \
280for (domain = rcu_dereference(cpu_rq(cpu)->sd); domain; domain = domain->parent) 277 for (__sd = rcu_dereference(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
281 278
282#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) 279#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
283#define this_rq() (&__get_cpu_var(runqueues)) 280#define this_rq() (&__get_cpu_var(runqueues))
@@ -292,26 +289,33 @@ for (domain = rcu_dereference(cpu_rq(cpu)->sd); domain; domain = domain->parent)
292#endif 289#endif
293 290
294#ifndef __ARCH_WANT_UNLOCKED_CTXSW 291#ifndef __ARCH_WANT_UNLOCKED_CTXSW
295static inline int task_running(runqueue_t *rq, task_t *p) 292static inline int task_running(struct rq *rq, struct task_struct *p)
296{ 293{
297 return rq->curr == p; 294 return rq->curr == p;
298} 295}
299 296
300static inline void prepare_lock_switch(runqueue_t *rq, task_t *next) 297static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
301{ 298{
302} 299}
303 300
304static inline void finish_lock_switch(runqueue_t *rq, task_t *prev) 301static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
305{ 302{
306#ifdef CONFIG_DEBUG_SPINLOCK 303#ifdef CONFIG_DEBUG_SPINLOCK
307 /* this is a valid case when another task releases the spinlock */ 304 /* this is a valid case when another task releases the spinlock */
308 rq->lock.owner = current; 305 rq->lock.owner = current;
309#endif 306#endif
307 /*
308 * If we are tracking spinlock dependencies then we have to
309 * fix up the runqueue lock - which gets 'carried over' from
310 * prev into current:
311 */
312 spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
313
310 spin_unlock_irq(&rq->lock); 314 spin_unlock_irq(&rq->lock);
311} 315}
312 316
313#else /* __ARCH_WANT_UNLOCKED_CTXSW */ 317#else /* __ARCH_WANT_UNLOCKED_CTXSW */
314static inline int task_running(runqueue_t *rq, task_t *p) 318static inline int task_running(struct rq *rq, struct task_struct *p)
315{ 319{
316#ifdef CONFIG_SMP 320#ifdef CONFIG_SMP
317 return p->oncpu; 321 return p->oncpu;
@@ -320,7 +324,7 @@ static inline int task_running(runqueue_t *rq, task_t *p)
320#endif 324#endif
321} 325}
322 326
323static inline void prepare_lock_switch(runqueue_t *rq, task_t *next) 327static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
324{ 328{
325#ifdef CONFIG_SMP 329#ifdef CONFIG_SMP
326 /* 330 /*
@@ -337,7 +341,7 @@ static inline void prepare_lock_switch(runqueue_t *rq, task_t *next)
337#endif 341#endif
338} 342}
339 343
340static inline void finish_lock_switch(runqueue_t *rq, task_t *prev) 344static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
341{ 345{
342#ifdef CONFIG_SMP 346#ifdef CONFIG_SMP
343 /* 347 /*
@@ -358,10 +362,10 @@ static inline void finish_lock_switch(runqueue_t *rq, task_t *prev)
358 * __task_rq_lock - lock the runqueue a given task resides on. 362 * __task_rq_lock - lock the runqueue a given task resides on.
359 * Must be called interrupts disabled. 363 * Must be called interrupts disabled.
360 */ 364 */
361static inline runqueue_t *__task_rq_lock(task_t *p) 365static inline struct rq *__task_rq_lock(struct task_struct *p)
362 __acquires(rq->lock) 366 __acquires(rq->lock)
363{ 367{
364 struct runqueue *rq; 368 struct rq *rq;
365 369
366repeat_lock_task: 370repeat_lock_task:
367 rq = task_rq(p); 371 rq = task_rq(p);
@@ -378,10 +382,10 @@ repeat_lock_task:
378 * interrupts. Note the ordering: we can safely lookup the task_rq without 382 * interrupts. Note the ordering: we can safely lookup the task_rq without
379 * explicitly disabling preemption. 383 * explicitly disabling preemption.
380 */ 384 */
381static runqueue_t *task_rq_lock(task_t *p, unsigned long *flags) 385static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
382 __acquires(rq->lock) 386 __acquires(rq->lock)
383{ 387{
384 struct runqueue *rq; 388 struct rq *rq;
385 389
386repeat_lock_task: 390repeat_lock_task:
387 local_irq_save(*flags); 391 local_irq_save(*flags);
@@ -394,13 +398,13 @@ repeat_lock_task:
394 return rq; 398 return rq;
395} 399}
396 400
397static inline void __task_rq_unlock(runqueue_t *rq) 401static inline void __task_rq_unlock(struct rq *rq)
398 __releases(rq->lock) 402 __releases(rq->lock)
399{ 403{
400 spin_unlock(&rq->lock); 404 spin_unlock(&rq->lock);
401} 405}
402 406
403static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags) 407static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
404 __releases(rq->lock) 408 __releases(rq->lock)
405{ 409{
406 spin_unlock_irqrestore(&rq->lock, *flags); 410 spin_unlock_irqrestore(&rq->lock, *flags);
@@ -420,7 +424,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
420 seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); 424 seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
421 seq_printf(seq, "timestamp %lu\n", jiffies); 425 seq_printf(seq, "timestamp %lu\n", jiffies);
422 for_each_online_cpu(cpu) { 426 for_each_online_cpu(cpu) {
423 runqueue_t *rq = cpu_rq(cpu); 427 struct rq *rq = cpu_rq(cpu);
424#ifdef CONFIG_SMP 428#ifdef CONFIG_SMP
425 struct sched_domain *sd; 429 struct sched_domain *sd;
426 int dcnt = 0; 430 int dcnt = 0;
@@ -507,10 +511,10 @@ struct file_operations proc_schedstat_operations = {
507/* 511/*
508 * rq_lock - lock a given runqueue and disable interrupts. 512 * rq_lock - lock a given runqueue and disable interrupts.
509 */ 513 */
510static inline runqueue_t *this_rq_lock(void) 514static inline struct rq *this_rq_lock(void)
511 __acquires(rq->lock) 515 __acquires(rq->lock)
512{ 516{
513 runqueue_t *rq; 517 struct rq *rq;
514 518
515 local_irq_disable(); 519 local_irq_disable();
516 rq = this_rq(); 520 rq = this_rq();
@@ -535,7 +539,7 @@ static inline runqueue_t *this_rq_lock(void)
535 * long it was from the *first* time it was queued to the time that it 539 * long it was from the *first* time it was queued to the time that it
536 * finally hit a cpu. 540 * finally hit a cpu.
537 */ 541 */
538static inline void sched_info_dequeued(task_t *t) 542static inline void sched_info_dequeued(struct task_struct *t)
539{ 543{
540 t->sched_info.last_queued = 0; 544 t->sched_info.last_queued = 0;
541} 545}
@@ -545,10 +549,10 @@ static inline void sched_info_dequeued(task_t *t)
545 * long it was waiting to run. We also note when it began so that we 549 * long it was waiting to run. We also note when it began so that we
546 * can keep stats on how long its timeslice is. 550 * can keep stats on how long its timeslice is.
547 */ 551 */
548static void sched_info_arrive(task_t *t) 552static void sched_info_arrive(struct task_struct *t)
549{ 553{
550 unsigned long now = jiffies, diff = 0; 554 unsigned long now = jiffies, diff = 0;
551 struct runqueue *rq = task_rq(t); 555 struct rq *rq = task_rq(t);
552 556
553 if (t->sched_info.last_queued) 557 if (t->sched_info.last_queued)
554 diff = now - t->sched_info.last_queued; 558 diff = now - t->sched_info.last_queued;
@@ -579,7 +583,7 @@ static void sched_info_arrive(task_t *t)
579 * the timestamp if it is already not set. It's assumed that 583 * the timestamp if it is already not set. It's assumed that
580 * sched_info_dequeued() will clear that stamp when appropriate. 584 * sched_info_dequeued() will clear that stamp when appropriate.
581 */ 585 */
582static inline void sched_info_queued(task_t *t) 586static inline void sched_info_queued(struct task_struct *t)
583{ 587{
584 if (!t->sched_info.last_queued) 588 if (!t->sched_info.last_queued)
585 t->sched_info.last_queued = jiffies; 589 t->sched_info.last_queued = jiffies;
@@ -589,9 +593,9 @@ static inline void sched_info_queued(task_t *t)
589 * Called when a process ceases being the active-running process, either 593 * Called when a process ceases being the active-running process, either
590 * voluntarily or involuntarily. Now we can calculate how long we ran. 594 * voluntarily or involuntarily. Now we can calculate how long we ran.
591 */ 595 */
592static inline void sched_info_depart(task_t *t) 596static inline void sched_info_depart(struct task_struct *t)
593{ 597{
594 struct runqueue *rq = task_rq(t); 598 struct rq *rq = task_rq(t);
595 unsigned long diff = jiffies - t->sched_info.last_arrival; 599 unsigned long diff = jiffies - t->sched_info.last_arrival;
596 600
597 t->sched_info.cpu_time += diff; 601 t->sched_info.cpu_time += diff;
@@ -605,9 +609,10 @@ static inline void sched_info_depart(task_t *t)
605 * their time slice. (This may also be called when switching to or from 609 * their time slice. (This may also be called when switching to or from
606 * the idle task.) We are only called when prev != next. 610 * the idle task.) We are only called when prev != next.
607 */ 611 */
608static inline void sched_info_switch(task_t *prev, task_t *next) 612static inline void
613sched_info_switch(struct task_struct *prev, struct task_struct *next)
609{ 614{
610 struct runqueue *rq = task_rq(prev); 615 struct rq *rq = task_rq(prev);
611 616
612 /* 617 /*
613 * prev now departs the cpu. It's not interesting to record 618 * prev now departs the cpu. It's not interesting to record
@@ -628,7 +633,7 @@ static inline void sched_info_switch(task_t *prev, task_t *next)
628/* 633/*
629 * Adding/removing a task to/from a priority array: 634 * Adding/removing a task to/from a priority array:
630 */ 635 */
631static void dequeue_task(struct task_struct *p, prio_array_t *array) 636static void dequeue_task(struct task_struct *p, struct prio_array *array)
632{ 637{
633 array->nr_active--; 638 array->nr_active--;
634 list_del(&p->run_list); 639 list_del(&p->run_list);
@@ -636,7 +641,7 @@ static void dequeue_task(struct task_struct *p, prio_array_t *array)
636 __clear_bit(p->prio, array->bitmap); 641 __clear_bit(p->prio, array->bitmap);
637} 642}
638 643
639static void enqueue_task(struct task_struct *p, prio_array_t *array) 644static void enqueue_task(struct task_struct *p, struct prio_array *array)
640{ 645{
641 sched_info_queued(p); 646 sched_info_queued(p);
642 list_add_tail(&p->run_list, array->queue + p->prio); 647 list_add_tail(&p->run_list, array->queue + p->prio);
@@ -649,12 +654,13 @@ static void enqueue_task(struct task_struct *p, prio_array_t *array)
649 * Put task to the end of the run list without the overhead of dequeue 654 * Put task to the end of the run list without the overhead of dequeue
650 * followed by enqueue. 655 * followed by enqueue.
651 */ 656 */
652static void requeue_task(struct task_struct *p, prio_array_t *array) 657static void requeue_task(struct task_struct *p, struct prio_array *array)
653{ 658{
654 list_move_tail(&p->run_list, array->queue + p->prio); 659 list_move_tail(&p->run_list, array->queue + p->prio);
655} 660}
656 661
657static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array) 662static inline void
663enqueue_task_head(struct task_struct *p, struct prio_array *array)
658{ 664{
659 list_add(&p->run_list, array->queue + p->prio); 665 list_add(&p->run_list, array->queue + p->prio);
660 __set_bit(p->prio, array->bitmap); 666 __set_bit(p->prio, array->bitmap);
@@ -677,7 +683,7 @@ static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array)
677 * Both properties are important to certain workloads. 683 * Both properties are important to certain workloads.
678 */ 684 */
679 685
680static inline int __normal_prio(task_t *p) 686static inline int __normal_prio(struct task_struct *p)
681{ 687{
682 int bonus, prio; 688 int bonus, prio;
683 689
@@ -713,7 +719,7 @@ static inline int __normal_prio(task_t *p)
713#define RTPRIO_TO_LOAD_WEIGHT(rp) \ 719#define RTPRIO_TO_LOAD_WEIGHT(rp) \
714 (PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + LOAD_WEIGHT(rp)) 720 (PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + LOAD_WEIGHT(rp))
715 721
716static void set_load_weight(task_t *p) 722static void set_load_weight(struct task_struct *p)
717{ 723{
718 if (has_rt_policy(p)) { 724 if (has_rt_policy(p)) {
719#ifdef CONFIG_SMP 725#ifdef CONFIG_SMP
@@ -731,23 +737,25 @@ static void set_load_weight(task_t *p)
731 p->load_weight = PRIO_TO_LOAD_WEIGHT(p->static_prio); 737 p->load_weight = PRIO_TO_LOAD_WEIGHT(p->static_prio);
732} 738}
733 739
734static inline void inc_raw_weighted_load(runqueue_t *rq, const task_t *p) 740static inline void
741inc_raw_weighted_load(struct rq *rq, const struct task_struct *p)
735{ 742{
736 rq->raw_weighted_load += p->load_weight; 743 rq->raw_weighted_load += p->load_weight;
737} 744}
738 745
739static inline void dec_raw_weighted_load(runqueue_t *rq, const task_t *p) 746static inline void
747dec_raw_weighted_load(struct rq *rq, const struct task_struct *p)
740{ 748{
741 rq->raw_weighted_load -= p->load_weight; 749 rq->raw_weighted_load -= p->load_weight;
742} 750}
743 751
744static inline void inc_nr_running(task_t *p, runqueue_t *rq) 752static inline void inc_nr_running(struct task_struct *p, struct rq *rq)
745{ 753{
746 rq->nr_running++; 754 rq->nr_running++;
747 inc_raw_weighted_load(rq, p); 755 inc_raw_weighted_load(rq, p);
748} 756}
749 757
750static inline void dec_nr_running(task_t *p, runqueue_t *rq) 758static inline void dec_nr_running(struct task_struct *p, struct rq *rq)
751{ 759{
752 rq->nr_running--; 760 rq->nr_running--;
753 dec_raw_weighted_load(rq, p); 761 dec_raw_weighted_load(rq, p);
@@ -760,7 +768,7 @@ static inline void dec_nr_running(task_t *p, runqueue_t *rq)
760 * setprio syscalls, and whenever the interactivity 768 * setprio syscalls, and whenever the interactivity
761 * estimator recalculates. 769 * estimator recalculates.
762 */ 770 */
763static inline int normal_prio(task_t *p) 771static inline int normal_prio(struct task_struct *p)
764{ 772{
765 int prio; 773 int prio;
766 774
@@ -778,7 +786,7 @@ static inline int normal_prio(task_t *p)
778 * interactivity modifiers. Will be RT if the task got 786 * interactivity modifiers. Will be RT if the task got
779 * RT-boosted. If not then it returns p->normal_prio. 787 * RT-boosted. If not then it returns p->normal_prio.
780 */ 788 */
781static int effective_prio(task_t *p) 789static int effective_prio(struct task_struct *p)
782{ 790{
783 p->normal_prio = normal_prio(p); 791 p->normal_prio = normal_prio(p);
784 /* 792 /*
@@ -794,9 +802,9 @@ static int effective_prio(task_t *p)
794/* 802/*
795 * __activate_task - move a task to the runqueue. 803 * __activate_task - move a task to the runqueue.
796 */ 804 */
797static void __activate_task(task_t *p, runqueue_t *rq) 805static void __activate_task(struct task_struct *p, struct rq *rq)
798{ 806{
799 prio_array_t *target = rq->active; 807 struct prio_array *target = rq->active;
800 808
801 if (batch_task(p)) 809 if (batch_task(p))
802 target = rq->expired; 810 target = rq->expired;
@@ -807,7 +815,7 @@ static void __activate_task(task_t *p, runqueue_t *rq)
807/* 815/*
808 * __activate_idle_task - move idle task to the _front_ of runqueue. 816 * __activate_idle_task - move idle task to the _front_ of runqueue.
809 */ 817 */
810static inline void __activate_idle_task(task_t *p, runqueue_t *rq) 818static inline void __activate_idle_task(struct task_struct *p, struct rq *rq)
811{ 819{
812 enqueue_task_head(p, rq->active); 820 enqueue_task_head(p, rq->active);
813 inc_nr_running(p, rq); 821 inc_nr_running(p, rq);
@@ -817,7 +825,7 @@ static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
817 * Recalculate p->normal_prio and p->prio after having slept, 825 * Recalculate p->normal_prio and p->prio after having slept,
818 * updating the sleep-average too: 826 * updating the sleep-average too:
819 */ 827 */
820static int recalc_task_prio(task_t *p, unsigned long long now) 828static int recalc_task_prio(struct task_struct *p, unsigned long long now)
821{ 829{
822 /* Caller must always ensure 'now >= p->timestamp' */ 830 /* Caller must always ensure 'now >= p->timestamp' */
823 unsigned long sleep_time = now - p->timestamp; 831 unsigned long sleep_time = now - p->timestamp;
@@ -889,7 +897,7 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
889 * Update all the scheduling statistics stuff. (sleep average 897 * Update all the scheduling statistics stuff. (sleep average
890 * calculation, priority modifiers, etc.) 898 * calculation, priority modifiers, etc.)
891 */ 899 */
892static void activate_task(task_t *p, runqueue_t *rq, int local) 900static void activate_task(struct task_struct *p, struct rq *rq, int local)
893{ 901{
894 unsigned long long now; 902 unsigned long long now;
895 903
@@ -897,7 +905,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
897#ifdef CONFIG_SMP 905#ifdef CONFIG_SMP
898 if (!local) { 906 if (!local) {
899 /* Compensate for drifting sched_clock */ 907 /* Compensate for drifting sched_clock */
900 runqueue_t *this_rq = this_rq(); 908 struct rq *this_rq = this_rq();
901 now = (now - this_rq->timestamp_last_tick) 909 now = (now - this_rq->timestamp_last_tick)
902 + rq->timestamp_last_tick; 910 + rq->timestamp_last_tick;
903 } 911 }
@@ -936,7 +944,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
936/* 944/*
937 * deactivate_task - remove a task from the runqueue. 945 * deactivate_task - remove a task from the runqueue.
938 */ 946 */
939static void deactivate_task(struct task_struct *p, runqueue_t *rq) 947static void deactivate_task(struct task_struct *p, struct rq *rq)
940{ 948{
941 dec_nr_running(p, rq); 949 dec_nr_running(p, rq);
942 dequeue_task(p, p->array); 950 dequeue_task(p, p->array);
@@ -956,7 +964,7 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq)
956#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) 964#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
957#endif 965#endif
958 966
959static void resched_task(task_t *p) 967static void resched_task(struct task_struct *p)
960{ 968{
961 int cpu; 969 int cpu;
962 970
@@ -977,7 +985,7 @@ static void resched_task(task_t *p)
977 smp_send_reschedule(cpu); 985 smp_send_reschedule(cpu);
978} 986}
979#else 987#else
980static inline void resched_task(task_t *p) 988static inline void resched_task(struct task_struct *p)
981{ 989{
982 assert_spin_locked(&task_rq(p)->lock); 990 assert_spin_locked(&task_rq(p)->lock);
983 set_tsk_need_resched(p); 991 set_tsk_need_resched(p);
@@ -988,7 +996,7 @@ static inline void resched_task(task_t *p)
988 * task_curr - is this task currently executing on a CPU? 996 * task_curr - is this task currently executing on a CPU?
989 * @p: the task in question. 997 * @p: the task in question.
990 */ 998 */
991inline int task_curr(const task_t *p) 999inline int task_curr(const struct task_struct *p)
992{ 1000{
993 return cpu_curr(task_cpu(p)) == p; 1001 return cpu_curr(task_cpu(p)) == p;
994} 1002}
@@ -1000,22 +1008,23 @@ unsigned long weighted_cpuload(const int cpu)
1000} 1008}
1001 1009
1002#ifdef CONFIG_SMP 1010#ifdef CONFIG_SMP
1003typedef struct { 1011struct migration_req {
1004 struct list_head list; 1012 struct list_head list;
1005 1013
1006 task_t *task; 1014 struct task_struct *task;
1007 int dest_cpu; 1015 int dest_cpu;
1008 1016
1009 struct completion done; 1017 struct completion done;
1010} migration_req_t; 1018};
1011 1019
1012/* 1020/*
1013 * The task's runqueue lock must be held. 1021 * The task's runqueue lock must be held.
1014 * Returns true if you have to wait for migration thread. 1022 * Returns true if you have to wait for migration thread.
1015 */ 1023 */
1016static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req) 1024static int
1025migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
1017{ 1026{
1018 runqueue_t *rq = task_rq(p); 1027 struct rq *rq = task_rq(p);
1019 1028
1020 /* 1029 /*
1021 * If the task is not on a runqueue (and not running), then 1030 * If the task is not on a runqueue (and not running), then
@@ -1030,6 +1039,7 @@ static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req)
1030 req->task = p; 1039 req->task = p;
1031 req->dest_cpu = dest_cpu; 1040 req->dest_cpu = dest_cpu;
1032 list_add(&req->list, &rq->migration_queue); 1041 list_add(&req->list, &rq->migration_queue);
1042
1033 return 1; 1043 return 1;
1034} 1044}
1035 1045
@@ -1042,10 +1052,10 @@ static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req)
1042 * smp_call_function() if an IPI is sent by the same process we are 1052 * smp_call_function() if an IPI is sent by the same process we are
1043 * waiting to become inactive. 1053 * waiting to become inactive.
1044 */ 1054 */
1045void wait_task_inactive(task_t *p) 1055void wait_task_inactive(struct task_struct *p)
1046{ 1056{
1047 unsigned long flags; 1057 unsigned long flags;
1048 runqueue_t *rq; 1058 struct rq *rq;
1049 int preempted; 1059 int preempted;
1050 1060
1051repeat: 1061repeat:
@@ -1076,7 +1086,7 @@ repeat:
1076 * to another CPU then no harm is done and the purpose has been 1086 * to another CPU then no harm is done and the purpose has been
1077 * achieved as well. 1087 * achieved as well.
1078 */ 1088 */
1079void kick_process(task_t *p) 1089void kick_process(struct task_struct *p)
1080{ 1090{
1081 int cpu; 1091 int cpu;
1082 1092
@@ -1096,7 +1106,7 @@ void kick_process(task_t *p)
1096 */ 1106 */
1097static inline unsigned long source_load(int cpu, int type) 1107static inline unsigned long source_load(int cpu, int type)
1098{ 1108{
1099 runqueue_t *rq = cpu_rq(cpu); 1109 struct rq *rq = cpu_rq(cpu);
1100 1110
1101 if (type == 0) 1111 if (type == 0)
1102 return rq->raw_weighted_load; 1112 return rq->raw_weighted_load;
@@ -1110,7 +1120,7 @@ static inline unsigned long source_load(int cpu, int type)
1110 */ 1120 */
1111static inline unsigned long target_load(int cpu, int type) 1121static inline unsigned long target_load(int cpu, int type)
1112{ 1122{
1113 runqueue_t *rq = cpu_rq(cpu); 1123 struct rq *rq = cpu_rq(cpu);
1114 1124
1115 if (type == 0) 1125 if (type == 0)
1116 return rq->raw_weighted_load; 1126 return rq->raw_weighted_load;
@@ -1123,10 +1133,10 @@ static inline unsigned long target_load(int cpu, int type)
1123 */ 1133 */
1124static inline unsigned long cpu_avg_load_per_task(int cpu) 1134static inline unsigned long cpu_avg_load_per_task(int cpu)
1125{ 1135{
1126 runqueue_t *rq = cpu_rq(cpu); 1136 struct rq *rq = cpu_rq(cpu);
1127 unsigned long n = rq->nr_running; 1137 unsigned long n = rq->nr_running;
1128 1138
1129 return n ? rq->raw_weighted_load / n : SCHED_LOAD_SCALE; 1139 return n ? rq->raw_weighted_load / n : SCHED_LOAD_SCALE;
1130} 1140}
1131 1141
1132/* 1142/*
@@ -1279,7 +1289,7 @@ nextlevel:
1279 * Returns the CPU we should wake onto. 1289 * Returns the CPU we should wake onto.
1280 */ 1290 */
1281#if defined(ARCH_HAS_SCHED_WAKE_IDLE) 1291#if defined(ARCH_HAS_SCHED_WAKE_IDLE)
1282static int wake_idle(int cpu, task_t *p) 1292static int wake_idle(int cpu, struct task_struct *p)
1283{ 1293{
1284 cpumask_t tmp; 1294 cpumask_t tmp;
1285 struct sched_domain *sd; 1295 struct sched_domain *sd;
@@ -1302,7 +1312,7 @@ static int wake_idle(int cpu, task_t *p)
1302 return cpu; 1312 return cpu;
1303} 1313}
1304#else 1314#else
1305static inline int wake_idle(int cpu, task_t *p) 1315static inline int wake_idle(int cpu, struct task_struct *p)
1306{ 1316{
1307 return cpu; 1317 return cpu;
1308} 1318}
@@ -1322,15 +1332,15 @@ static inline int wake_idle(int cpu, task_t *p)
1322 * 1332 *
1323 * returns failure only if the task is already active. 1333 * returns failure only if the task is already active.
1324 */ 1334 */
1325static int try_to_wake_up(task_t *p, unsigned int state, int sync) 1335static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
1326{ 1336{
1327 int cpu, this_cpu, success = 0; 1337 int cpu, this_cpu, success = 0;
1328 unsigned long flags; 1338 unsigned long flags;
1329 long old_state; 1339 long old_state;
1330 runqueue_t *rq; 1340 struct rq *rq;
1331#ifdef CONFIG_SMP 1341#ifdef CONFIG_SMP
1332 unsigned long load, this_load;
1333 struct sched_domain *sd, *this_sd = NULL; 1342 struct sched_domain *sd, *this_sd = NULL;
1343 unsigned long load, this_load;
1334 int new_cpu; 1344 int new_cpu;
1335#endif 1345#endif
1336 1346
@@ -1480,15 +1490,14 @@ out:
1480 return success; 1490 return success;
1481} 1491}
1482 1492
1483int fastcall wake_up_process(task_t *p) 1493int fastcall wake_up_process(struct task_struct *p)
1484{ 1494{
1485 return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED | 1495 return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED |
1486 TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0); 1496 TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0);
1487} 1497}
1488
1489EXPORT_SYMBOL(wake_up_process); 1498EXPORT_SYMBOL(wake_up_process);
1490 1499
1491int fastcall wake_up_state(task_t *p, unsigned int state) 1500int fastcall wake_up_state(struct task_struct *p, unsigned int state)
1492{ 1501{
1493 return try_to_wake_up(p, state, 0); 1502 return try_to_wake_up(p, state, 0);
1494} 1503}
@@ -1497,7 +1506,7 @@ int fastcall wake_up_state(task_t *p, unsigned int state)
1497 * Perform scheduler related setup for a newly forked process p. 1506 * Perform scheduler related setup for a newly forked process p.
1498 * p is forked by current. 1507 * p is forked by current.
1499 */ 1508 */
1500void fastcall sched_fork(task_t *p, int clone_flags) 1509void fastcall sched_fork(struct task_struct *p, int clone_flags)
1501{ 1510{
1502 int cpu = get_cpu(); 1511 int cpu = get_cpu();
1503 1512
@@ -1565,11 +1574,11 @@ void fastcall sched_fork(task_t *p, int clone_flags)
1565 * that must be done for every newly created context, then puts the task 1574 * that must be done for every newly created context, then puts the task
1566 * on the runqueue and wakes it. 1575 * on the runqueue and wakes it.
1567 */ 1576 */
1568void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags) 1577void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
1569{ 1578{
1579 struct rq *rq, *this_rq;
1570 unsigned long flags; 1580 unsigned long flags;
1571 int this_cpu, cpu; 1581 int this_cpu, cpu;
1572 runqueue_t *rq, *this_rq;
1573 1582
1574 rq = task_rq_lock(p, &flags); 1583 rq = task_rq_lock(p, &flags);
1575 BUG_ON(p->state != TASK_RUNNING); 1584 BUG_ON(p->state != TASK_RUNNING);
@@ -1649,10 +1658,10 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags)
1649 * artificially, because any timeslice recovered here 1658 * artificially, because any timeslice recovered here
1650 * was given away by the parent in the first place.) 1659 * was given away by the parent in the first place.)
1651 */ 1660 */
1652void fastcall sched_exit(task_t *p) 1661void fastcall sched_exit(struct task_struct *p)
1653{ 1662{
1654 unsigned long flags; 1663 unsigned long flags;
1655 runqueue_t *rq; 1664 struct rq *rq;
1656 1665
1657 /* 1666 /*
1658 * If the child was a (relative-) CPU hog then decrease 1667 * If the child was a (relative-) CPU hog then decrease
@@ -1683,7 +1692,7 @@ void fastcall sched_exit(task_t *p)
1683 * prepare_task_switch sets up locking and calls architecture specific 1692 * prepare_task_switch sets up locking and calls architecture specific
1684 * hooks. 1693 * hooks.
1685 */ 1694 */
1686static inline void prepare_task_switch(runqueue_t *rq, task_t *next) 1695static inline void prepare_task_switch(struct rq *rq, struct task_struct *next)
1687{ 1696{
1688 prepare_lock_switch(rq, next); 1697 prepare_lock_switch(rq, next);
1689 prepare_arch_switch(next); 1698 prepare_arch_switch(next);
@@ -1704,7 +1713,7 @@ static inline void prepare_task_switch(runqueue_t *rq, task_t *next)
1704 * with the lock held can cause deadlocks; see schedule() for 1713 * with the lock held can cause deadlocks; see schedule() for
1705 * details.) 1714 * details.)
1706 */ 1715 */
1707static inline void finish_task_switch(runqueue_t *rq, task_t *prev) 1716static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
1708 __releases(rq->lock) 1717 __releases(rq->lock)
1709{ 1718{
1710 struct mm_struct *mm = rq->prev_mm; 1719 struct mm_struct *mm = rq->prev_mm;
@@ -1742,10 +1751,11 @@ static inline void finish_task_switch(runqueue_t *rq, task_t *prev)
1742 * schedule_tail - first thing a freshly forked thread must call. 1751 * schedule_tail - first thing a freshly forked thread must call.
1743 * @prev: the thread we just switched away from. 1752 * @prev: the thread we just switched away from.
1744 */ 1753 */
1745asmlinkage void schedule_tail(task_t *prev) 1754asmlinkage void schedule_tail(struct task_struct *prev)
1746 __releases(rq->lock) 1755 __releases(rq->lock)
1747{ 1756{
1748 runqueue_t *rq = this_rq(); 1757 struct rq *rq = this_rq();
1758
1749 finish_task_switch(rq, prev); 1759 finish_task_switch(rq, prev);
1750#ifdef __ARCH_WANT_UNLOCKED_CTXSW 1760#ifdef __ARCH_WANT_UNLOCKED_CTXSW
1751 /* In this case, finish_task_switch does not reenable preemption */ 1761 /* In this case, finish_task_switch does not reenable preemption */
@@ -1759,8 +1769,9 @@ asmlinkage void schedule_tail(task_t *prev)
1759 * context_switch - switch to the new MM and the new 1769 * context_switch - switch to the new MM and the new
1760 * thread's register state. 1770 * thread's register state.
1761 */ 1771 */
1762static inline 1772static inline struct task_struct *
1763task_t * context_switch(runqueue_t *rq, task_t *prev, task_t *next) 1773context_switch(struct rq *rq, struct task_struct *prev,
1774 struct task_struct *next)
1764{ 1775{
1765 struct mm_struct *mm = next->mm; 1776 struct mm_struct *mm = next->mm;
1766 struct mm_struct *oldmm = prev->active_mm; 1777 struct mm_struct *oldmm = prev->active_mm;
@@ -1777,6 +1788,7 @@ task_t * context_switch(runqueue_t *rq, task_t *prev, task_t *next)
1777 WARN_ON(rq->prev_mm); 1788 WARN_ON(rq->prev_mm);
1778 rq->prev_mm = oldmm; 1789 rq->prev_mm = oldmm;
1779 } 1790 }
1791 spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
1780 1792
1781 /* Here we just switch the register state and the stack. */ 1793 /* Here we just switch the register state and the stack. */
1782 switch_to(prev, next, prev); 1794 switch_to(prev, next, prev);
@@ -1857,12 +1869,21 @@ unsigned long nr_active(void)
1857#ifdef CONFIG_SMP 1869#ifdef CONFIG_SMP
1858 1870
1859/* 1871/*
1872 * Is this task likely cache-hot:
1873 */
1874static inline int
1875task_hot(struct task_struct *p, unsigned long long now, struct sched_domain *sd)
1876{
1877 return (long long)(now - p->last_ran) < (long long)sd->cache_hot_time;
1878}
1879
1880/*
1860 * double_rq_lock - safely lock two runqueues 1881 * double_rq_lock - safely lock two runqueues
1861 * 1882 *
1862 * Note this does not disable interrupts like task_rq_lock, 1883 * Note this does not disable interrupts like task_rq_lock,
1863 * you need to do so manually before calling. 1884 * you need to do so manually before calling.
1864 */ 1885 */
1865static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2) 1886static void double_rq_lock(struct rq *rq1, struct rq *rq2)
1866 __acquires(rq1->lock) 1887 __acquires(rq1->lock)
1867 __acquires(rq2->lock) 1888 __acquires(rq2->lock)
1868{ 1889{
@@ -1886,7 +1907,7 @@ static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
1886 * Note this does not restore interrupts like task_rq_unlock, 1907 * Note this does not restore interrupts like task_rq_unlock,
1887 * you need to do so manually after calling. 1908 * you need to do so manually after calling.
1888 */ 1909 */
1889static void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2) 1910static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
1890 __releases(rq1->lock) 1911 __releases(rq1->lock)
1891 __releases(rq2->lock) 1912 __releases(rq2->lock)
1892{ 1913{
@@ -1900,7 +1921,7 @@ static void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2)
1900/* 1921/*
1901 * double_lock_balance - lock the busiest runqueue, this_rq is locked already. 1922 * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
1902 */ 1923 */
1903static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest) 1924static void double_lock_balance(struct rq *this_rq, struct rq *busiest)
1904 __releases(this_rq->lock) 1925 __releases(this_rq->lock)
1905 __acquires(busiest->lock) 1926 __acquires(busiest->lock)
1906 __acquires(this_rq->lock) 1927 __acquires(this_rq->lock)
@@ -1921,11 +1942,11 @@ static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest)
1921 * allow dest_cpu, which will force the cpu onto dest_cpu. Then 1942 * allow dest_cpu, which will force the cpu onto dest_cpu. Then
1922 * the cpu_allowed mask is restored. 1943 * the cpu_allowed mask is restored.
1923 */ 1944 */
1924static void sched_migrate_task(task_t *p, int dest_cpu) 1945static void sched_migrate_task(struct task_struct *p, int dest_cpu)
1925{ 1946{
1926 migration_req_t req; 1947 struct migration_req req;
1927 runqueue_t *rq;
1928 unsigned long flags; 1948 unsigned long flags;
1949 struct rq *rq;
1929 1950
1930 rq = task_rq_lock(p, &flags); 1951 rq = task_rq_lock(p, &flags);
1931 if (!cpu_isset(dest_cpu, p->cpus_allowed) 1952 if (!cpu_isset(dest_cpu, p->cpus_allowed)
@@ -1936,11 +1957,13 @@ static void sched_migrate_task(task_t *p, int dest_cpu)
1936 if (migrate_task(p, dest_cpu, &req)) { 1957 if (migrate_task(p, dest_cpu, &req)) {
1937 /* Need to wait for migration thread (might exit: take ref). */ 1958 /* Need to wait for migration thread (might exit: take ref). */
1938 struct task_struct *mt = rq->migration_thread; 1959 struct task_struct *mt = rq->migration_thread;
1960
1939 get_task_struct(mt); 1961 get_task_struct(mt);
1940 task_rq_unlock(rq, &flags); 1962 task_rq_unlock(rq, &flags);
1941 wake_up_process(mt); 1963 wake_up_process(mt);
1942 put_task_struct(mt); 1964 put_task_struct(mt);
1943 wait_for_completion(&req.done); 1965 wait_for_completion(&req.done);
1966
1944 return; 1967 return;
1945 } 1968 }
1946out: 1969out:
@@ -1964,9 +1987,9 @@ void sched_exec(void)
1964 * pull_task - move a task from a remote runqueue to the local runqueue. 1987 * pull_task - move a task from a remote runqueue to the local runqueue.
1965 * Both runqueues must be locked. 1988 * Both runqueues must be locked.
1966 */ 1989 */
1967static 1990static void pull_task(struct rq *src_rq, struct prio_array *src_array,
1968void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, 1991 struct task_struct *p, struct rq *this_rq,
1969 runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) 1992 struct prio_array *this_array, int this_cpu)
1970{ 1993{
1971 dequeue_task(p, src_array); 1994 dequeue_task(p, src_array);
1972 dec_nr_running(p, src_rq); 1995 dec_nr_running(p, src_rq);
@@ -1987,7 +2010,7 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
1987 * can_migrate_task - may task p from runqueue rq be migrated to this_cpu? 2010 * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
1988 */ 2011 */
1989static 2012static
1990int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, 2013int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
1991 struct sched_domain *sd, enum idle_type idle, 2014 struct sched_domain *sd, enum idle_type idle,
1992 int *all_pinned) 2015 int *all_pinned)
1993{ 2016{
@@ -2019,6 +2042,7 @@ int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,
2019} 2042}
2020 2043
2021#define rq_best_prio(rq) min((rq)->curr->prio, (rq)->best_expired_prio) 2044#define rq_best_prio(rq) min((rq)->curr->prio, (rq)->best_expired_prio)
2045
2022/* 2046/*
2023 * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted 2047 * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted
2024 * load from busiest to this_rq, as part of a balancing operation within 2048 * load from busiest to this_rq, as part of a balancing operation within
@@ -2026,18 +2050,17 @@ int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,
2026 * 2050 *
2027 * Called with both runqueues locked. 2051 * Called with both runqueues locked.
2028 */ 2052 */
2029static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest, 2053static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
2030 unsigned long max_nr_move, unsigned long max_load_move, 2054 unsigned long max_nr_move, unsigned long max_load_move,
2031 struct sched_domain *sd, enum idle_type idle, 2055 struct sched_domain *sd, enum idle_type idle,
2032 int *all_pinned) 2056 int *all_pinned)
2033{ 2057{
2034 prio_array_t *array, *dst_array; 2058 int idx, pulled = 0, pinned = 0, this_best_prio, best_prio,
2059 best_prio_seen, skip_for_load;
2060 struct prio_array *array, *dst_array;
2035 struct list_head *head, *curr; 2061 struct list_head *head, *curr;
2036 int idx, pulled = 0, pinned = 0, this_best_prio, busiest_best_prio; 2062 struct task_struct *tmp;
2037 int busiest_best_prio_seen;
2038 int skip_for_load; /* skip the task based on weighted load issues */
2039 long rem_load_move; 2063 long rem_load_move;
2040 task_t *tmp;
2041 2064
2042 if (max_nr_move == 0 || max_load_move == 0) 2065 if (max_nr_move == 0 || max_load_move == 0)
2043 goto out; 2066 goto out;
@@ -2045,15 +2068,15 @@ static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest,
2045 rem_load_move = max_load_move; 2068 rem_load_move = max_load_move;
2046 pinned = 1; 2069 pinned = 1;
2047 this_best_prio = rq_best_prio(this_rq); 2070 this_best_prio = rq_best_prio(this_rq);
2048 busiest_best_prio = rq_best_prio(busiest); 2071 best_prio = rq_best_prio(busiest);
2049 /* 2072 /*
2050 * Enable handling of the case where there is more than one task 2073 * Enable handling of the case where there is more than one task
2051 * with the best priority. If the current running task is one 2074 * with the best priority. If the current running task is one
2052 * of those with prio==busiest_best_prio we know it won't be moved 2075 * of those with prio==best_prio we know it won't be moved
2053 * and therefore it's safe to override the skip (based on load) of 2076 * and therefore it's safe to override the skip (based on load) of
2054 * any task we find with that prio. 2077 * any task we find with that prio.
2055 */ 2078 */
2056 busiest_best_prio_seen = busiest_best_prio == busiest->curr->prio; 2079 best_prio_seen = best_prio == busiest->curr->prio;
2057 2080
2058 /* 2081 /*
2059 * We first consider expired tasks. Those will likely not be 2082 * We first consider expired tasks. Those will likely not be
@@ -2089,7 +2112,7 @@ skip_bitmap:
2089 head = array->queue + idx; 2112 head = array->queue + idx;
2090 curr = head->prev; 2113 curr = head->prev;
2091skip_queue: 2114skip_queue:
2092 tmp = list_entry(curr, task_t, run_list); 2115 tmp = list_entry(curr, struct task_struct, run_list);
2093 2116
2094 curr = curr->prev; 2117 curr = curr->prev;
2095 2118
@@ -2100,10 +2123,11 @@ skip_queue:
2100 */ 2123 */
2101 skip_for_load = tmp->load_weight > rem_load_move; 2124 skip_for_load = tmp->load_weight > rem_load_move;
2102 if (skip_for_load && idx < this_best_prio) 2125 if (skip_for_load && idx < this_best_prio)
2103 skip_for_load = !busiest_best_prio_seen && idx == busiest_best_prio; 2126 skip_for_load = !best_prio_seen && idx == best_prio;
2104 if (skip_for_load || 2127 if (skip_for_load ||
2105 !can_migrate_task(tmp, busiest, this_cpu, sd, idle, &pinned)) { 2128 !can_migrate_task(tmp, busiest, this_cpu, sd, idle, &pinned)) {
2106 busiest_best_prio_seen |= idx == busiest_best_prio; 2129
2130 best_prio_seen |= idx == best_prio;
2107 if (curr != head) 2131 if (curr != head)
2108 goto skip_queue; 2132 goto skip_queue;
2109 idx++; 2133 idx++;
@@ -2146,8 +2170,8 @@ out:
2146 2170
2147/* 2171/*
2148 * find_busiest_group finds and returns the busiest CPU group within the 2172 * find_busiest_group finds and returns the busiest CPU group within the
2149 * domain. It calculates and returns the amount of weighted load which should be 2173 * domain. It calculates and returns the amount of weighted load which
2150 * moved to restore balance via the imbalance parameter. 2174 * should be moved to restore balance via the imbalance parameter.
2151 */ 2175 */
2152static struct sched_group * 2176static struct sched_group *
2153find_busiest_group(struct sched_domain *sd, int this_cpu, 2177find_busiest_group(struct sched_domain *sd, int this_cpu,
@@ -2188,7 +2212,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2188 sum_weighted_load = sum_nr_running = avg_load = 0; 2212 sum_weighted_load = sum_nr_running = avg_load = 0;
2189 2213
2190 for_each_cpu_mask(i, group->cpumask) { 2214 for_each_cpu_mask(i, group->cpumask) {
2191 runqueue_t *rq = cpu_rq(i); 2215 struct rq *rq = cpu_rq(i);
2192 2216
2193 if (*sd_idle && !idle_cpu(i)) 2217 if (*sd_idle && !idle_cpu(i))
2194 *sd_idle = 0; 2218 *sd_idle = 0;
@@ -2269,7 +2293,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2269 * capacity but still has some space to pick up some load 2293 * capacity but still has some space to pick up some load
2270 * from other group and save more power 2294 * from other group and save more power
2271 */ 2295 */
2272 if (sum_nr_running <= group_capacity - 1) 2296 if (sum_nr_running <= group_capacity - 1) {
2273 if (sum_nr_running > leader_nr_running || 2297 if (sum_nr_running > leader_nr_running ||
2274 (sum_nr_running == leader_nr_running && 2298 (sum_nr_running == leader_nr_running &&
2275 first_cpu(group->cpumask) > 2299 first_cpu(group->cpumask) >
@@ -2277,7 +2301,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2277 group_leader = group; 2301 group_leader = group;
2278 leader_nr_running = sum_nr_running; 2302 leader_nr_running = sum_nr_running;
2279 } 2303 }
2280 2304 }
2281group_next: 2305group_next:
2282#endif 2306#endif
2283 group = group->next; 2307 group = group->next;
@@ -2332,8 +2356,7 @@ group_next:
2332 * moved 2356 * moved
2333 */ 2357 */
2334 if (*imbalance < busiest_load_per_task) { 2358 if (*imbalance < busiest_load_per_task) {
2335 unsigned long pwr_now, pwr_move; 2359 unsigned long tmp, pwr_now, pwr_move;
2336 unsigned long tmp;
2337 unsigned int imbn; 2360 unsigned int imbn;
2338 2361
2339small_imbalance: 2362small_imbalance:
@@ -2405,22 +2428,23 @@ ret:
2405/* 2428/*
2406 * find_busiest_queue - find the busiest runqueue among the cpus in group. 2429 * find_busiest_queue - find the busiest runqueue among the cpus in group.
2407 */ 2430 */
2408static runqueue_t *find_busiest_queue(struct sched_group *group, 2431static struct rq *
2409 enum idle_type idle, unsigned long imbalance) 2432find_busiest_queue(struct sched_group *group, enum idle_type idle,
2433 unsigned long imbalance)
2410{ 2434{
2435 struct rq *busiest = NULL, *rq;
2411 unsigned long max_load = 0; 2436 unsigned long max_load = 0;
2412 runqueue_t *busiest = NULL, *rqi;
2413 int i; 2437 int i;
2414 2438
2415 for_each_cpu_mask(i, group->cpumask) { 2439 for_each_cpu_mask(i, group->cpumask) {
2416 rqi = cpu_rq(i); 2440 rq = cpu_rq(i);
2417 2441
2418 if (rqi->nr_running == 1 && rqi->raw_weighted_load > imbalance) 2442 if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance)
2419 continue; 2443 continue;
2420 2444
2421 if (rqi->raw_weighted_load > max_load) { 2445 if (rq->raw_weighted_load > max_load) {
2422 max_load = rqi->raw_weighted_load; 2446 max_load = rq->raw_weighted_load;
2423 busiest = rqi; 2447 busiest = rq;
2424 } 2448 }
2425 } 2449 }
2426 2450
@@ -2433,22 +2457,24 @@ static runqueue_t *find_busiest_queue(struct sched_group *group,
2433 */ 2457 */
2434#define MAX_PINNED_INTERVAL 512 2458#define MAX_PINNED_INTERVAL 512
2435 2459
2436#define minus_1_or_zero(n) ((n) > 0 ? (n) - 1 : 0) 2460static inline unsigned long minus_1_or_zero(unsigned long n)
2461{
2462 return n > 0 ? n - 1 : 0;
2463}
2464
2437/* 2465/*
2438 * Check this_cpu to ensure it is balanced within domain. Attempt to move 2466 * Check this_cpu to ensure it is balanced within domain. Attempt to move
2439 * tasks if there is an imbalance. 2467 * tasks if there is an imbalance.
2440 * 2468 *
2441 * Called with this_rq unlocked. 2469 * Called with this_rq unlocked.
2442 */ 2470 */
2443static int load_balance(int this_cpu, runqueue_t *this_rq, 2471static int load_balance(int this_cpu, struct rq *this_rq,
2444 struct sched_domain *sd, enum idle_type idle) 2472 struct sched_domain *sd, enum idle_type idle)
2445{ 2473{
2474 int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
2446 struct sched_group *group; 2475 struct sched_group *group;
2447 runqueue_t *busiest;
2448 unsigned long imbalance; 2476 unsigned long imbalance;
2449 int nr_moved, all_pinned = 0; 2477 struct rq *busiest;
2450 int active_balance = 0;
2451 int sd_idle = 0;
2452 2478
2453 if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && 2479 if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
2454 !sched_smt_power_savings) 2480 !sched_smt_power_savings)
@@ -2482,8 +2508,8 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,
2482 */ 2508 */
2483 double_rq_lock(this_rq, busiest); 2509 double_rq_lock(this_rq, busiest);
2484 nr_moved = move_tasks(this_rq, this_cpu, busiest, 2510 nr_moved = move_tasks(this_rq, this_cpu, busiest,
2485 minus_1_or_zero(busiest->nr_running), 2511 minus_1_or_zero(busiest->nr_running),
2486 imbalance, sd, idle, &all_pinned); 2512 imbalance, sd, idle, &all_pinned);
2487 double_rq_unlock(this_rq, busiest); 2513 double_rq_unlock(this_rq, busiest);
2488 2514
2489 /* All tasks on this runqueue were pinned by CPU affinity */ 2515 /* All tasks on this runqueue were pinned by CPU affinity */
@@ -2556,7 +2582,8 @@ out_one_pinned:
2556 (sd->balance_interval < sd->max_interval)) 2582 (sd->balance_interval < sd->max_interval))
2557 sd->balance_interval *= 2; 2583 sd->balance_interval *= 2;
2558 2584
2559 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) 2585 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
2586 !sched_smt_power_savings)
2560 return -1; 2587 return -1;
2561 return 0; 2588 return 0;
2562} 2589}
@@ -2568,11 +2595,11 @@ out_one_pinned:
2568 * Called from schedule when this_rq is about to become idle (NEWLY_IDLE). 2595 * Called from schedule when this_rq is about to become idle (NEWLY_IDLE).
2569 * this_rq is locked. 2596 * this_rq is locked.
2570 */ 2597 */
2571static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, 2598static int
2572 struct sched_domain *sd) 2599load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
2573{ 2600{
2574 struct sched_group *group; 2601 struct sched_group *group;
2575 runqueue_t *busiest = NULL; 2602 struct rq *busiest = NULL;
2576 unsigned long imbalance; 2603 unsigned long imbalance;
2577 int nr_moved = 0; 2604 int nr_moved = 0;
2578 int sd_idle = 0; 2605 int sd_idle = 0;
@@ -2618,9 +2645,11 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
2618 2645
2619out_balanced: 2646out_balanced:
2620 schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); 2647 schedstat_inc(sd, lb_balanced[NEWLY_IDLE]);
2621 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) 2648 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
2649 !sched_smt_power_savings)
2622 return -1; 2650 return -1;
2623 sd->nr_balance_failed = 0; 2651 sd->nr_balance_failed = 0;
2652
2624 return 0; 2653 return 0;
2625} 2654}
2626 2655
@@ -2628,16 +2657,15 @@ out_balanced:
2628 * idle_balance is called by schedule() if this_cpu is about to become 2657 * idle_balance is called by schedule() if this_cpu is about to become
2629 * idle. Attempts to pull tasks from other CPUs. 2658 * idle. Attempts to pull tasks from other CPUs.
2630 */ 2659 */
2631static void idle_balance(int this_cpu, runqueue_t *this_rq) 2660static void idle_balance(int this_cpu, struct rq *this_rq)
2632{ 2661{
2633 struct sched_domain *sd; 2662 struct sched_domain *sd;
2634 2663
2635 for_each_domain(this_cpu, sd) { 2664 for_each_domain(this_cpu, sd) {
2636 if (sd->flags & SD_BALANCE_NEWIDLE) { 2665 if (sd->flags & SD_BALANCE_NEWIDLE) {
2637 if (load_balance_newidle(this_cpu, this_rq, sd)) { 2666 /* If we've pulled tasks over stop searching: */
2638 /* We've pulled tasks over so stop searching */ 2667 if (load_balance_newidle(this_cpu, this_rq, sd))
2639 break; 2668 break;
2640 }
2641 } 2669 }
2642 } 2670 }
2643} 2671}
@@ -2650,14 +2678,14 @@ static void idle_balance(int this_cpu, runqueue_t *this_rq)
2650 * 2678 *
2651 * Called with busiest_rq locked. 2679 * Called with busiest_rq locked.
2652 */ 2680 */
2653static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu) 2681static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
2654{ 2682{
2655 struct sched_domain *sd;
2656 runqueue_t *target_rq;
2657 int target_cpu = busiest_rq->push_cpu; 2683 int target_cpu = busiest_rq->push_cpu;
2684 struct sched_domain *sd;
2685 struct rq *target_rq;
2658 2686
2687 /* Is there any task to move? */
2659 if (busiest_rq->nr_running <= 1) 2688 if (busiest_rq->nr_running <= 1)
2660 /* no task to move */
2661 return; 2689 return;
2662 2690
2663 target_rq = cpu_rq(target_cpu); 2691 target_rq = cpu_rq(target_cpu);
@@ -2675,21 +2703,20 @@ static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu)
2675 /* Search for an sd spanning us and the target CPU. */ 2703 /* Search for an sd spanning us and the target CPU. */
2676 for_each_domain(target_cpu, sd) { 2704 for_each_domain(target_cpu, sd) {
2677 if ((sd->flags & SD_LOAD_BALANCE) && 2705 if ((sd->flags & SD_LOAD_BALANCE) &&
2678 cpu_isset(busiest_cpu, sd->span)) 2706 cpu_isset(busiest_cpu, sd->span))
2679 break; 2707 break;
2680 } 2708 }
2681 2709
2682 if (unlikely(sd == NULL)) 2710 if (likely(sd)) {
2683 goto out; 2711 schedstat_inc(sd, alb_cnt);
2684
2685 schedstat_inc(sd, alb_cnt);
2686 2712
2687 if (move_tasks(target_rq, target_cpu, busiest_rq, 1, 2713 if (move_tasks(target_rq, target_cpu, busiest_rq, 1,
2688 RTPRIO_TO_LOAD_WEIGHT(100), sd, SCHED_IDLE, NULL)) 2714 RTPRIO_TO_LOAD_WEIGHT(100), sd, SCHED_IDLE,
2689 schedstat_inc(sd, alb_pushed); 2715 NULL))
2690 else 2716 schedstat_inc(sd, alb_pushed);
2691 schedstat_inc(sd, alb_failed); 2717 else
2692out: 2718 schedstat_inc(sd, alb_failed);
2719 }
2693 spin_unlock(&target_rq->lock); 2720 spin_unlock(&target_rq->lock);
2694} 2721}
2695 2722
@@ -2702,23 +2729,27 @@ out:
2702 * Balancing parameters are set up in arch_init_sched_domains. 2729 * Balancing parameters are set up in arch_init_sched_domains.
2703 */ 2730 */
2704 2731
2705/* Don't have all balancing operations going off at once */ 2732/* Don't have all balancing operations going off at once: */
2706#define CPU_OFFSET(cpu) (HZ * cpu / NR_CPUS) 2733static inline unsigned long cpu_offset(int cpu)
2734{
2735 return jiffies + cpu * HZ / NR_CPUS;
2736}
2707 2737
2708static void rebalance_tick(int this_cpu, runqueue_t *this_rq, 2738static void
2709 enum idle_type idle) 2739rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle)
2710{ 2740{
2711 unsigned long old_load, this_load; 2741 unsigned long this_load, interval, j = cpu_offset(this_cpu);
2712 unsigned long j = jiffies + CPU_OFFSET(this_cpu);
2713 struct sched_domain *sd; 2742 struct sched_domain *sd;
2714 int i; 2743 int i, scale;
2715 2744
2716 this_load = this_rq->raw_weighted_load; 2745 this_load = this_rq->raw_weighted_load;
2717 /* Update our load */ 2746
2718 for (i = 0; i < 3; i++) { 2747 /* Update our load: */
2719 unsigned long new_load = this_load; 2748 for (i = 0, scale = 1; i < 3; i++, scale <<= 1) {
2720 int scale = 1 << i; 2749 unsigned long old_load, new_load;
2750
2721 old_load = this_rq->cpu_load[i]; 2751 old_load = this_rq->cpu_load[i];
2752 new_load = this_load;
2722 /* 2753 /*
2723 * Round up the averaging division if load is increasing. This 2754 * Round up the averaging division if load is increasing. This
2724 * prevents us from getting stuck on 9 if the load is 10, for 2755 * prevents us from getting stuck on 9 if the load is 10, for
@@ -2730,8 +2761,6 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq,
2730 } 2761 }
2731 2762
2732 for_each_domain(this_cpu, sd) { 2763 for_each_domain(this_cpu, sd) {
2733 unsigned long interval;
2734
2735 if (!(sd->flags & SD_LOAD_BALANCE)) 2764 if (!(sd->flags & SD_LOAD_BALANCE))
2736 continue; 2765 continue;
2737 2766
@@ -2761,17 +2790,18 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq,
2761/* 2790/*
2762 * on UP we do not need to balance between CPUs: 2791 * on UP we do not need to balance between CPUs:
2763 */ 2792 */
2764static inline void rebalance_tick(int cpu, runqueue_t *rq, enum idle_type idle) 2793static inline void rebalance_tick(int cpu, struct rq *rq, enum idle_type idle)
2765{ 2794{
2766} 2795}
2767static inline void idle_balance(int cpu, runqueue_t *rq) 2796static inline void idle_balance(int cpu, struct rq *rq)
2768{ 2797{
2769} 2798}
2770#endif 2799#endif
2771 2800
2772static inline int wake_priority_sleeper(runqueue_t *rq) 2801static inline int wake_priority_sleeper(struct rq *rq)
2773{ 2802{
2774 int ret = 0; 2803 int ret = 0;
2804
2775#ifdef CONFIG_SCHED_SMT 2805#ifdef CONFIG_SCHED_SMT
2776 spin_lock(&rq->lock); 2806 spin_lock(&rq->lock);
2777 /* 2807 /*
@@ -2795,25 +2825,26 @@ EXPORT_PER_CPU_SYMBOL(kstat);
2795 * This is called on clock ticks and on context switches. 2825 * This is called on clock ticks and on context switches.
2796 * Bank in p->sched_time the ns elapsed since the last tick or switch. 2826 * Bank in p->sched_time the ns elapsed since the last tick or switch.
2797 */ 2827 */
2798static inline void update_cpu_clock(task_t *p, runqueue_t *rq, 2828static inline void
2799 unsigned long long now) 2829update_cpu_clock(struct task_struct *p, struct rq *rq, unsigned long long now)
2800{ 2830{
2801 unsigned long long last = max(p->timestamp, rq->timestamp_last_tick); 2831 p->sched_time += now - max(p->timestamp, rq->timestamp_last_tick);
2802 p->sched_time += now - last;
2803} 2832}
2804 2833
2805/* 2834/*
2806 * Return current->sched_time plus any more ns on the sched_clock 2835 * Return current->sched_time plus any more ns on the sched_clock
2807 * that have not yet been banked. 2836 * that have not yet been banked.
2808 */ 2837 */
2809unsigned long long current_sched_time(const task_t *tsk) 2838unsigned long long current_sched_time(const struct task_struct *p)
2810{ 2839{
2811 unsigned long long ns; 2840 unsigned long long ns;
2812 unsigned long flags; 2841 unsigned long flags;
2842
2813 local_irq_save(flags); 2843 local_irq_save(flags);
2814 ns = max(tsk->timestamp, task_rq(tsk)->timestamp_last_tick); 2844 ns = max(p->timestamp, task_rq(p)->timestamp_last_tick);
2815 ns = tsk->sched_time + (sched_clock() - ns); 2845 ns = p->sched_time + sched_clock() - ns;
2816 local_irq_restore(flags); 2846 local_irq_restore(flags);
2847
2817 return ns; 2848 return ns;
2818} 2849}
2819 2850
@@ -2827,11 +2858,16 @@ unsigned long long current_sched_time(const task_t *tsk)
2827 * increasing number of running tasks. We also ignore the interactivity 2858 * increasing number of running tasks. We also ignore the interactivity
2828 * if a better static_prio task has expired: 2859 * if a better static_prio task has expired:
2829 */ 2860 */
2830#define EXPIRED_STARVING(rq) \ 2861static inline int expired_starving(struct rq *rq)
2831 ((STARVATION_LIMIT && ((rq)->expired_timestamp && \ 2862{
2832 (jiffies - (rq)->expired_timestamp >= \ 2863 if (rq->curr->static_prio > rq->best_expired_prio)
2833 STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \ 2864 return 1;
2834 ((rq)->curr->static_prio > (rq)->best_expired_prio)) 2865 if (!STARVATION_LIMIT || !rq->expired_timestamp)
2866 return 0;
2867 if (jiffies - rq->expired_timestamp > STARVATION_LIMIT * rq->nr_running)
2868 return 1;
2869 return 0;
2870}
2835 2871
2836/* 2872/*
2837 * Account user cpu time to a process. 2873 * Account user cpu time to a process.
@@ -2864,7 +2900,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
2864 cputime_t cputime) 2900 cputime_t cputime)
2865{ 2901{
2866 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; 2902 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
2867 runqueue_t *rq = this_rq(); 2903 struct rq *rq = this_rq();
2868 cputime64_t tmp; 2904 cputime64_t tmp;
2869 2905
2870 p->stime = cputime_add(p->stime, cputime); 2906 p->stime = cputime_add(p->stime, cputime);
@@ -2894,7 +2930,7 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
2894{ 2930{
2895 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; 2931 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
2896 cputime64_t tmp = cputime_to_cputime64(steal); 2932 cputime64_t tmp = cputime_to_cputime64(steal);
2897 runqueue_t *rq = this_rq(); 2933 struct rq *rq = this_rq();
2898 2934
2899 if (p == rq->idle) { 2935 if (p == rq->idle) {
2900 p->stime = cputime_add(p->stime, steal); 2936 p->stime = cputime_add(p->stime, steal);
@@ -2915,10 +2951,10 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
2915 */ 2951 */
2916void scheduler_tick(void) 2952void scheduler_tick(void)
2917{ 2953{
2918 int cpu = smp_processor_id();
2919 runqueue_t *rq = this_rq();
2920 task_t *p = current;
2921 unsigned long long now = sched_clock(); 2954 unsigned long long now = sched_clock();
2955 struct task_struct *p = current;
2956 int cpu = smp_processor_id();
2957 struct rq *rq = cpu_rq(cpu);
2922 2958
2923 update_cpu_clock(p, rq, now); 2959 update_cpu_clock(p, rq, now);
2924 2960
@@ -2968,7 +3004,7 @@ void scheduler_tick(void)
2968 3004
2969 if (!rq->expired_timestamp) 3005 if (!rq->expired_timestamp)
2970 rq->expired_timestamp = jiffies; 3006 rq->expired_timestamp = jiffies;
2971 if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) { 3007 if (!TASK_INTERACTIVE(p) || expired_starving(rq)) {
2972 enqueue_task(p, rq->expired); 3008 enqueue_task(p, rq->expired);
2973 if (p->static_prio < rq->best_expired_prio) 3009 if (p->static_prio < rq->best_expired_prio)
2974 rq->best_expired_prio = p->static_prio; 3010 rq->best_expired_prio = p->static_prio;
@@ -3007,7 +3043,7 @@ out:
3007} 3043}
3008 3044
3009#ifdef CONFIG_SCHED_SMT 3045#ifdef CONFIG_SCHED_SMT
3010static inline void wakeup_busy_runqueue(runqueue_t *rq) 3046static inline void wakeup_busy_runqueue(struct rq *rq)
3011{ 3047{
3012 /* If an SMT runqueue is sleeping due to priority reasons wake it up */ 3048 /* If an SMT runqueue is sleeping due to priority reasons wake it up */
3013 if (rq->curr == rq->idle && rq->nr_running) 3049 if (rq->curr == rq->idle && rq->nr_running)
@@ -3033,7 +3069,7 @@ static void wake_sleeping_dependent(int this_cpu)
3033 return; 3069 return;
3034 3070
3035 for_each_cpu_mask(i, sd->span) { 3071 for_each_cpu_mask(i, sd->span) {
3036 runqueue_t *smt_rq = cpu_rq(i); 3072 struct rq *smt_rq = cpu_rq(i);
3037 3073
3038 if (i == this_cpu) 3074 if (i == this_cpu)
3039 continue; 3075 continue;
@@ -3050,7 +3086,8 @@ static void wake_sleeping_dependent(int this_cpu)
3050 * utilize, if another task runs on a sibling. This models the 3086 * utilize, if another task runs on a sibling. This models the
3051 * slowdown effect of other tasks running on siblings: 3087 * slowdown effect of other tasks running on siblings:
3052 */ 3088 */
3053static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd) 3089static inline unsigned long
3090smt_slice(struct task_struct *p, struct sched_domain *sd)
3054{ 3091{
3055 return p->time_slice * (100 - sd->per_cpu_gain) / 100; 3092 return p->time_slice * (100 - sd->per_cpu_gain) / 100;
3056} 3093}
@@ -3061,7 +3098,8 @@ static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd)
3061 * acquire their lock. As we only trylock the normal locking order does not 3098 * acquire their lock. As we only trylock the normal locking order does not
3062 * need to be obeyed. 3099 * need to be obeyed.
3063 */ 3100 */
3064static int dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p) 3101static int
3102dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p)
3065{ 3103{
3066 struct sched_domain *tmp, *sd = NULL; 3104 struct sched_domain *tmp, *sd = NULL;
3067 int ret = 0, i; 3105 int ret = 0, i;
@@ -3081,8 +3119,8 @@ static int dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p)
3081 return 0; 3119 return 0;
3082 3120
3083 for_each_cpu_mask(i, sd->span) { 3121 for_each_cpu_mask(i, sd->span) {
3084 runqueue_t *smt_rq; 3122 struct task_struct *smt_curr;
3085 task_t *smt_curr; 3123 struct rq *smt_rq;
3086 3124
3087 if (i == this_cpu) 3125 if (i == this_cpu)
3088 continue; 3126 continue;
@@ -3127,9 +3165,8 @@ unlock:
3127static inline void wake_sleeping_dependent(int this_cpu) 3165static inline void wake_sleeping_dependent(int this_cpu)
3128{ 3166{
3129} 3167}
3130 3168static inline int
3131static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq, 3169dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p)
3132 task_t *p)
3133{ 3170{
3134 return 0; 3171 return 0;
3135} 3172}
@@ -3142,12 +3179,13 @@ void fastcall add_preempt_count(int val)
3142 /* 3179 /*
3143 * Underflow? 3180 * Underflow?
3144 */ 3181 */
3145 BUG_ON((preempt_count() < 0)); 3182 if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
3183 return;
3146 preempt_count() += val; 3184 preempt_count() += val;
3147 /* 3185 /*
3148 * Spinlock count overflowing soon? 3186 * Spinlock count overflowing soon?
3149 */ 3187 */
3150 BUG_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10); 3188 DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10);
3151} 3189}
3152EXPORT_SYMBOL(add_preempt_count); 3190EXPORT_SYMBOL(add_preempt_count);
3153 3191
@@ -3156,11 +3194,15 @@ void fastcall sub_preempt_count(int val)
3156 /* 3194 /*
3157 * Underflow? 3195 * Underflow?
3158 */ 3196 */
3159 BUG_ON(val > preempt_count()); 3197 if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
3198 return;
3160 /* 3199 /*
3161 * Is the spinlock portion underflowing? 3200 * Is the spinlock portion underflowing?
3162 */ 3201 */
3163 BUG_ON((val < PREEMPT_MASK) && !(preempt_count() & PREEMPT_MASK)); 3202 if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
3203 !(preempt_count() & PREEMPT_MASK)))
3204 return;
3205
3164 preempt_count() -= val; 3206 preempt_count() -= val;
3165} 3207}
3166EXPORT_SYMBOL(sub_preempt_count); 3208EXPORT_SYMBOL(sub_preempt_count);
@@ -3178,14 +3220,14 @@ static inline int interactive_sleep(enum sleep_type sleep_type)
3178 */ 3220 */
3179asmlinkage void __sched schedule(void) 3221asmlinkage void __sched schedule(void)
3180{ 3222{
3181 long *switch_count; 3223 struct task_struct *prev, *next;
3182 task_t *prev, *next; 3224 struct prio_array *array;
3183 runqueue_t *rq;
3184 prio_array_t *array;
3185 struct list_head *queue; 3225 struct list_head *queue;
3186 unsigned long long now; 3226 unsigned long long now;
3187 unsigned long run_time; 3227 unsigned long run_time;
3188 int cpu, idx, new_prio; 3228 int cpu, idx, new_prio;
3229 long *switch_count;
3230 struct rq *rq;
3189 3231
3190 /* 3232 /*
3191 * Test if we are atomic. Since do_exit() needs to call into 3233 * Test if we are atomic. Since do_exit() needs to call into
@@ -3275,7 +3317,7 @@ need_resched_nonpreemptible:
3275 3317
3276 idx = sched_find_first_bit(array->bitmap); 3318 idx = sched_find_first_bit(array->bitmap);
3277 queue = array->queue + idx; 3319 queue = array->queue + idx;
3278 next = list_entry(queue->next, task_t, run_list); 3320 next = list_entry(queue->next, struct task_struct, run_list);
3279 3321
3280 if (!rt_task(next) && interactive_sleep(next->sleep_type)) { 3322 if (!rt_task(next) && interactive_sleep(next->sleep_type)) {
3281 unsigned long long delta = now - next->timestamp; 3323 unsigned long long delta = now - next->timestamp;
@@ -3338,7 +3380,6 @@ switch_tasks:
3338 if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) 3380 if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
3339 goto need_resched; 3381 goto need_resched;
3340} 3382}
3341
3342EXPORT_SYMBOL(schedule); 3383EXPORT_SYMBOL(schedule);
3343 3384
3344#ifdef CONFIG_PREEMPT 3385#ifdef CONFIG_PREEMPT
@@ -3383,7 +3424,6 @@ need_resched:
3383 if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) 3424 if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
3384 goto need_resched; 3425 goto need_resched;
3385} 3426}
3386
3387EXPORT_SYMBOL(preempt_schedule); 3427EXPORT_SYMBOL(preempt_schedule);
3388 3428
3389/* 3429/*
@@ -3432,10 +3472,8 @@ need_resched:
3432int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, 3472int default_wake_function(wait_queue_t *curr, unsigned mode, int sync,
3433 void *key) 3473 void *key)
3434{ 3474{
3435 task_t *p = curr->private; 3475 return try_to_wake_up(curr->private, mode, sync);
3436 return try_to_wake_up(p, mode, sync);
3437} 3476}
3438
3439EXPORT_SYMBOL(default_wake_function); 3477EXPORT_SYMBOL(default_wake_function);
3440 3478
3441/* 3479/*
@@ -3453,13 +3491,11 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
3453 struct list_head *tmp, *next; 3491 struct list_head *tmp, *next;
3454 3492
3455 list_for_each_safe(tmp, next, &q->task_list) { 3493 list_for_each_safe(tmp, next, &q->task_list) {
3456 wait_queue_t *curr; 3494 wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
3457 unsigned flags; 3495 unsigned flags = curr->flags;
3458 curr = list_entry(tmp, wait_queue_t, task_list); 3496
3459 flags = curr->flags;
3460 if (curr->func(curr, mode, sync, key) && 3497 if (curr->func(curr, mode, sync, key) &&
3461 (flags & WQ_FLAG_EXCLUSIVE) && 3498 (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
3462 !--nr_exclusive)
3463 break; 3499 break;
3464 } 3500 }
3465} 3501}
@@ -3480,7 +3516,6 @@ void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode,
3480 __wake_up_common(q, mode, nr_exclusive, 0, key); 3516 __wake_up_common(q, mode, nr_exclusive, 0, key);
3481 spin_unlock_irqrestore(&q->lock, flags); 3517 spin_unlock_irqrestore(&q->lock, flags);
3482} 3518}
3483
3484EXPORT_SYMBOL(__wake_up); 3519EXPORT_SYMBOL(__wake_up);
3485 3520
3486/* 3521/*
@@ -3549,6 +3584,7 @@ EXPORT_SYMBOL(complete_all);
3549void fastcall __sched wait_for_completion(struct completion *x) 3584void fastcall __sched wait_for_completion(struct completion *x)
3550{ 3585{
3551 might_sleep(); 3586 might_sleep();
3587
3552 spin_lock_irq(&x->wait.lock); 3588 spin_lock_irq(&x->wait.lock);
3553 if (!x->done) { 3589 if (!x->done) {
3554 DECLARE_WAITQUEUE(wait, current); 3590 DECLARE_WAITQUEUE(wait, current);
@@ -3693,7 +3729,6 @@ void fastcall __sched interruptible_sleep_on(wait_queue_head_t *q)
3693 schedule(); 3729 schedule();
3694 SLEEP_ON_TAIL 3730 SLEEP_ON_TAIL
3695} 3731}
3696
3697EXPORT_SYMBOL(interruptible_sleep_on); 3732EXPORT_SYMBOL(interruptible_sleep_on);
3698 3733
3699long fastcall __sched 3734long fastcall __sched
@@ -3709,7 +3744,6 @@ interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout)
3709 3744
3710 return timeout; 3745 return timeout;
3711} 3746}
3712
3713EXPORT_SYMBOL(interruptible_sleep_on_timeout); 3747EXPORT_SYMBOL(interruptible_sleep_on_timeout);
3714 3748
3715void fastcall __sched sleep_on(wait_queue_head_t *q) 3749void fastcall __sched sleep_on(wait_queue_head_t *q)
@@ -3722,7 +3756,6 @@ void fastcall __sched sleep_on(wait_queue_head_t *q)
3722 schedule(); 3756 schedule();
3723 SLEEP_ON_TAIL 3757 SLEEP_ON_TAIL
3724} 3758}
3725
3726EXPORT_SYMBOL(sleep_on); 3759EXPORT_SYMBOL(sleep_on);
3727 3760
3728long fastcall __sched sleep_on_timeout(wait_queue_head_t *q, long timeout) 3761long fastcall __sched sleep_on_timeout(wait_queue_head_t *q, long timeout)
@@ -3752,11 +3785,11 @@ EXPORT_SYMBOL(sleep_on_timeout);
3752 * 3785 *
3753 * Used by the rt_mutex code to implement priority inheritance logic. 3786 * Used by the rt_mutex code to implement priority inheritance logic.
3754 */ 3787 */
3755void rt_mutex_setprio(task_t *p, int prio) 3788void rt_mutex_setprio(struct task_struct *p, int prio)
3756{ 3789{
3790 struct prio_array *array;
3757 unsigned long flags; 3791 unsigned long flags;
3758 prio_array_t *array; 3792 struct rq *rq;
3759 runqueue_t *rq;
3760 int oldprio; 3793 int oldprio;
3761 3794
3762 BUG_ON(prio < 0 || prio > MAX_PRIO); 3795 BUG_ON(prio < 0 || prio > MAX_PRIO);
@@ -3793,12 +3826,12 @@ void rt_mutex_setprio(task_t *p, int prio)
3793 3826
3794#endif 3827#endif
3795 3828
3796void set_user_nice(task_t *p, long nice) 3829void set_user_nice(struct task_struct *p, long nice)
3797{ 3830{
3798 unsigned long flags; 3831 struct prio_array *array;
3799 prio_array_t *array;
3800 runqueue_t *rq;
3801 int old_prio, delta; 3832 int old_prio, delta;
3833 unsigned long flags;
3834 struct rq *rq;
3802 3835
3803 if (TASK_NICE(p) == nice || nice < -20 || nice > 19) 3836 if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
3804 return; 3837 return;
@@ -3849,10 +3882,11 @@ EXPORT_SYMBOL(set_user_nice);
3849 * @p: task 3882 * @p: task
3850 * @nice: nice value 3883 * @nice: nice value
3851 */ 3884 */
3852int can_nice(const task_t *p, const int nice) 3885int can_nice(const struct task_struct *p, const int nice)
3853{ 3886{
3854 /* convert nice value [19,-20] to rlimit style value [1,40] */ 3887 /* convert nice value [19,-20] to rlimit style value [1,40] */
3855 int nice_rlim = 20 - nice; 3888 int nice_rlim = 20 - nice;
3889
3856 return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur || 3890 return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur ||
3857 capable(CAP_SYS_NICE)); 3891 capable(CAP_SYS_NICE));
3858} 3892}
@@ -3868,8 +3902,7 @@ int can_nice(const task_t *p, const int nice)
3868 */ 3902 */
3869asmlinkage long sys_nice(int increment) 3903asmlinkage long sys_nice(int increment)
3870{ 3904{
3871 int retval; 3905 long nice, retval;
3872 long nice;
3873 3906
3874 /* 3907 /*
3875 * Setpriority might change our priority at the same moment. 3908 * Setpriority might change our priority at the same moment.
@@ -3908,7 +3941,7 @@ asmlinkage long sys_nice(int increment)
3908 * RT tasks are offset by -200. Normal tasks are centered 3941 * RT tasks are offset by -200. Normal tasks are centered
3909 * around 0, value goes from -16 to +15. 3942 * around 0, value goes from -16 to +15.
3910 */ 3943 */
3911int task_prio(const task_t *p) 3944int task_prio(const struct task_struct *p)
3912{ 3945{
3913 return p->prio - MAX_RT_PRIO; 3946 return p->prio - MAX_RT_PRIO;
3914} 3947}
@@ -3917,7 +3950,7 @@ int task_prio(const task_t *p)
3917 * task_nice - return the nice value of a given task. 3950 * task_nice - return the nice value of a given task.
3918 * @p: the task in question. 3951 * @p: the task in question.
3919 */ 3952 */
3920int task_nice(const task_t *p) 3953int task_nice(const struct task_struct *p)
3921{ 3954{
3922 return TASK_NICE(p); 3955 return TASK_NICE(p);
3923} 3956}
@@ -3936,7 +3969,7 @@ int idle_cpu(int cpu)
3936 * idle_task - return the idle task for a given cpu. 3969 * idle_task - return the idle task for a given cpu.
3937 * @cpu: the processor in question. 3970 * @cpu: the processor in question.
3938 */ 3971 */
3939task_t *idle_task(int cpu) 3972struct task_struct *idle_task(int cpu)
3940{ 3973{
3941 return cpu_rq(cpu)->idle; 3974 return cpu_rq(cpu)->idle;
3942} 3975}
@@ -3945,7 +3978,7 @@ task_t *idle_task(int cpu)
3945 * find_process_by_pid - find a process with a matching PID value. 3978 * find_process_by_pid - find a process with a matching PID value.
3946 * @pid: the pid in question. 3979 * @pid: the pid in question.
3947 */ 3980 */
3948static inline task_t *find_process_by_pid(pid_t pid) 3981static inline struct task_struct *find_process_by_pid(pid_t pid)
3949{ 3982{
3950 return pid ? find_task_by_pid(pid) : current; 3983 return pid ? find_task_by_pid(pid) : current;
3951} 3984}
@@ -3954,6 +3987,7 @@ static inline task_t *find_process_by_pid(pid_t pid)
3954static void __setscheduler(struct task_struct *p, int policy, int prio) 3987static void __setscheduler(struct task_struct *p, int policy, int prio)
3955{ 3988{
3956 BUG_ON(p->array); 3989 BUG_ON(p->array);
3990
3957 p->policy = policy; 3991 p->policy = policy;
3958 p->rt_priority = prio; 3992 p->rt_priority = prio;
3959 p->normal_prio = normal_prio(p); 3993 p->normal_prio = normal_prio(p);
@@ -3977,11 +4011,10 @@ static void __setscheduler(struct task_struct *p, int policy, int prio)
3977int sched_setscheduler(struct task_struct *p, int policy, 4011int sched_setscheduler(struct task_struct *p, int policy,
3978 struct sched_param *param) 4012 struct sched_param *param)
3979{ 4013{
3980 int retval; 4014 int retval, oldprio, oldpolicy = -1;
3981 int oldprio, oldpolicy = -1; 4015 struct prio_array *array;
3982 prio_array_t *array;
3983 unsigned long flags; 4016 unsigned long flags;
3984 runqueue_t *rq; 4017 struct rq *rq;
3985 4018
3986 /* may grab non-irq protected spin_locks */ 4019 /* may grab non-irq protected spin_locks */
3987 BUG_ON(in_interrupt()); 4020 BUG_ON(in_interrupt());
@@ -4079,9 +4112,9 @@ EXPORT_SYMBOL_GPL(sched_setscheduler);
4079static int 4112static int
4080do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) 4113do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
4081{ 4114{
4082 int retval;
4083 struct sched_param lparam; 4115 struct sched_param lparam;
4084 struct task_struct *p; 4116 struct task_struct *p;
4117 int retval;
4085 4118
4086 if (!param || pid < 0) 4119 if (!param || pid < 0)
4087 return -EINVAL; 4120 return -EINVAL;
@@ -4097,6 +4130,7 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
4097 read_unlock_irq(&tasklist_lock); 4130 read_unlock_irq(&tasklist_lock);
4098 retval = sched_setscheduler(p, policy, &lparam); 4131 retval = sched_setscheduler(p, policy, &lparam);
4099 put_task_struct(p); 4132 put_task_struct(p);
4133
4100 return retval; 4134 return retval;
4101} 4135}
4102 4136
@@ -4132,8 +4166,8 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param)
4132 */ 4166 */
4133asmlinkage long sys_sched_getscheduler(pid_t pid) 4167asmlinkage long sys_sched_getscheduler(pid_t pid)
4134{ 4168{
4169 struct task_struct *p;
4135 int retval = -EINVAL; 4170 int retval = -EINVAL;
4136 task_t *p;
4137 4171
4138 if (pid < 0) 4172 if (pid < 0)
4139 goto out_nounlock; 4173 goto out_nounlock;
@@ -4160,8 +4194,8 @@ out_nounlock:
4160asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) 4194asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param)
4161{ 4195{
4162 struct sched_param lp; 4196 struct sched_param lp;
4197 struct task_struct *p;
4163 int retval = -EINVAL; 4198 int retval = -EINVAL;
4164 task_t *p;
4165 4199
4166 if (!param || pid < 0) 4200 if (!param || pid < 0)
4167 goto out_nounlock; 4201 goto out_nounlock;
@@ -4194,9 +4228,9 @@ out_unlock:
4194 4228
4195long sched_setaffinity(pid_t pid, cpumask_t new_mask) 4229long sched_setaffinity(pid_t pid, cpumask_t new_mask)
4196{ 4230{
4197 task_t *p;
4198 int retval;
4199 cpumask_t cpus_allowed; 4231 cpumask_t cpus_allowed;
4232 struct task_struct *p;
4233 int retval;
4200 4234
4201 lock_cpu_hotplug(); 4235 lock_cpu_hotplug();
4202 read_lock(&tasklist_lock); 4236 read_lock(&tasklist_lock);
@@ -4282,8 +4316,8 @@ cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
4282 4316
4283long sched_getaffinity(pid_t pid, cpumask_t *mask) 4317long sched_getaffinity(pid_t pid, cpumask_t *mask)
4284{ 4318{
4319 struct task_struct *p;
4285 int retval; 4320 int retval;
4286 task_t *p;
4287 4321
4288 lock_cpu_hotplug(); 4322 lock_cpu_hotplug();
4289 read_lock(&tasklist_lock); 4323 read_lock(&tasklist_lock);
@@ -4342,9 +4376,8 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
4342 */ 4376 */
4343asmlinkage long sys_sched_yield(void) 4377asmlinkage long sys_sched_yield(void)
4344{ 4378{
4345 runqueue_t *rq = this_rq_lock(); 4379 struct rq *rq = this_rq_lock();
4346 prio_array_t *array = current->array; 4380 struct prio_array *array = current->array, *target = rq->expired;
4347 prio_array_t *target = rq->expired;
4348 4381
4349 schedstat_inc(rq, yld_cnt); 4382 schedstat_inc(rq, yld_cnt);
4350 /* 4383 /*
@@ -4378,6 +4411,7 @@ asmlinkage long sys_sched_yield(void)
4378 * no need to preempt or enable interrupts: 4411 * no need to preempt or enable interrupts:
4379 */ 4412 */
4380 __release(rq->lock); 4413 __release(rq->lock);
4414 spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
4381 _raw_spin_unlock(&rq->lock); 4415 _raw_spin_unlock(&rq->lock);
4382 preempt_enable_no_resched(); 4416 preempt_enable_no_resched();
4383 4417
@@ -4441,6 +4475,7 @@ int cond_resched_lock(spinlock_t *lock)
4441 spin_lock(lock); 4475 spin_lock(lock);
4442 } 4476 }
4443 if (need_resched() && __resched_legal()) { 4477 if (need_resched() && __resched_legal()) {
4478 spin_release(&lock->dep_map, 1, _THIS_IP_);
4444 _raw_spin_unlock(lock); 4479 _raw_spin_unlock(lock);
4445 preempt_enable_no_resched(); 4480 preempt_enable_no_resched();
4446 __cond_resched(); 4481 __cond_resched();
@@ -4456,7 +4491,9 @@ int __sched cond_resched_softirq(void)
4456 BUG_ON(!in_softirq()); 4491 BUG_ON(!in_softirq());
4457 4492
4458 if (need_resched() && __resched_legal()) { 4493 if (need_resched() && __resched_legal()) {
4459 __local_bh_enable(); 4494 raw_local_irq_disable();
4495 _local_bh_enable();
4496 raw_local_irq_enable();
4460 __cond_resched(); 4497 __cond_resched();
4461 local_bh_disable(); 4498 local_bh_disable();
4462 return 1; 4499 return 1;
@@ -4476,7 +4513,6 @@ void __sched yield(void)
4476 set_current_state(TASK_RUNNING); 4513 set_current_state(TASK_RUNNING);
4477 sys_sched_yield(); 4514 sys_sched_yield();
4478} 4515}
4479
4480EXPORT_SYMBOL(yield); 4516EXPORT_SYMBOL(yield);
4481 4517
4482/* 4518/*
@@ -4488,18 +4524,17 @@ EXPORT_SYMBOL(yield);
4488 */ 4524 */
4489void __sched io_schedule(void) 4525void __sched io_schedule(void)
4490{ 4526{
4491 struct runqueue *rq = &__raw_get_cpu_var(runqueues); 4527 struct rq *rq = &__raw_get_cpu_var(runqueues);
4492 4528
4493 atomic_inc(&rq->nr_iowait); 4529 atomic_inc(&rq->nr_iowait);
4494 schedule(); 4530 schedule();
4495 atomic_dec(&rq->nr_iowait); 4531 atomic_dec(&rq->nr_iowait);
4496} 4532}
4497
4498EXPORT_SYMBOL(io_schedule); 4533EXPORT_SYMBOL(io_schedule);
4499 4534
4500long __sched io_schedule_timeout(long timeout) 4535long __sched io_schedule_timeout(long timeout)
4501{ 4536{
4502 struct runqueue *rq = &__raw_get_cpu_var(runqueues); 4537 struct rq *rq = &__raw_get_cpu_var(runqueues);
4503 long ret; 4538 long ret;
4504 4539
4505 atomic_inc(&rq->nr_iowait); 4540 atomic_inc(&rq->nr_iowait);
@@ -4566,9 +4601,9 @@ asmlinkage long sys_sched_get_priority_min(int policy)
4566asmlinkage 4601asmlinkage
4567long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) 4602long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval)
4568{ 4603{
4604 struct task_struct *p;
4569 int retval = -EINVAL; 4605 int retval = -EINVAL;
4570 struct timespec t; 4606 struct timespec t;
4571 task_t *p;
4572 4607
4573 if (pid < 0) 4608 if (pid < 0)
4574 goto out_nounlock; 4609 goto out_nounlock;
@@ -4596,28 +4631,32 @@ out_unlock:
4596 4631
4597static inline struct task_struct *eldest_child(struct task_struct *p) 4632static inline struct task_struct *eldest_child(struct task_struct *p)
4598{ 4633{
4599 if (list_empty(&p->children)) return NULL; 4634 if (list_empty(&p->children))
4635 return NULL;
4600 return list_entry(p->children.next,struct task_struct,sibling); 4636 return list_entry(p->children.next,struct task_struct,sibling);
4601} 4637}
4602 4638
4603static inline struct task_struct *older_sibling(struct task_struct *p) 4639static inline struct task_struct *older_sibling(struct task_struct *p)
4604{ 4640{
4605 if (p->sibling.prev==&p->parent->children) return NULL; 4641 if (p->sibling.prev==&p->parent->children)
4642 return NULL;
4606 return list_entry(p->sibling.prev,struct task_struct,sibling); 4643 return list_entry(p->sibling.prev,struct task_struct,sibling);
4607} 4644}
4608 4645
4609static inline struct task_struct *younger_sibling(struct task_struct *p) 4646static inline struct task_struct *younger_sibling(struct task_struct *p)
4610{ 4647{
4611 if (p->sibling.next==&p->parent->children) return NULL; 4648 if (p->sibling.next==&p->parent->children)
4649 return NULL;
4612 return list_entry(p->sibling.next,struct task_struct,sibling); 4650 return list_entry(p->sibling.next,struct task_struct,sibling);
4613} 4651}
4614 4652
4615static void show_task(task_t *p) 4653static const char *stat_nam[] = { "R", "S", "D", "T", "t", "Z", "X" };
4654
4655static void show_task(struct task_struct *p)
4616{ 4656{
4617 task_t *relative; 4657 struct task_struct *relative;
4618 unsigned state;
4619 unsigned long free = 0; 4658 unsigned long free = 0;
4620 static const char *stat_nam[] = { "R", "S", "D", "T", "t", "Z", "X" }; 4659 unsigned state;
4621 4660
4622 printk("%-13.13s ", p->comm); 4661 printk("%-13.13s ", p->comm);
4623 state = p->state ? __ffs(p->state) + 1 : 0; 4662 state = p->state ? __ffs(p->state) + 1 : 0;
@@ -4668,7 +4707,7 @@ static void show_task(task_t *p)
4668 4707
4669void show_state(void) 4708void show_state(void)
4670{ 4709{
4671 task_t *g, *p; 4710 struct task_struct *g, *p;
4672 4711
4673#if (BITS_PER_LONG == 32) 4712#if (BITS_PER_LONG == 32)
4674 printk("\n" 4713 printk("\n"
@@ -4690,7 +4729,7 @@ void show_state(void)
4690 } while_each_thread(g, p); 4729 } while_each_thread(g, p);
4691 4730
4692 read_unlock(&tasklist_lock); 4731 read_unlock(&tasklist_lock);
4693 mutex_debug_show_all_locks(); 4732 debug_show_all_locks();
4694} 4733}
4695 4734
4696/** 4735/**
@@ -4701,9 +4740,9 @@ void show_state(void)
4701 * NOTE: this function does not set the idle thread's NEED_RESCHED 4740 * NOTE: this function does not set the idle thread's NEED_RESCHED
4702 * flag, to make booting more robust. 4741 * flag, to make booting more robust.
4703 */ 4742 */
4704void __devinit init_idle(task_t *idle, int cpu) 4743void __devinit init_idle(struct task_struct *idle, int cpu)
4705{ 4744{
4706 runqueue_t *rq = cpu_rq(cpu); 4745 struct rq *rq = cpu_rq(cpu);
4707 unsigned long flags; 4746 unsigned long flags;
4708 4747
4709 idle->timestamp = sched_clock(); 4748 idle->timestamp = sched_clock();
@@ -4742,7 +4781,7 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
4742/* 4781/*
4743 * This is how migration works: 4782 * This is how migration works:
4744 * 4783 *
4745 * 1) we queue a migration_req_t structure in the source CPU's 4784 * 1) we queue a struct migration_req structure in the source CPU's
4746 * runqueue and wake up that CPU's migration thread. 4785 * runqueue and wake up that CPU's migration thread.
4747 * 2) we down() the locked semaphore => thread blocks. 4786 * 2) we down() the locked semaphore => thread blocks.
4748 * 3) migration thread wakes up (implicitly it forces the migrated 4787 * 3) migration thread wakes up (implicitly it forces the migrated
@@ -4764,12 +4803,12 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
4764 * task must not exit() & deallocate itself prematurely. The 4803 * task must not exit() & deallocate itself prematurely. The
4765 * call is not atomic; no spinlocks may be held. 4804 * call is not atomic; no spinlocks may be held.
4766 */ 4805 */
4767int set_cpus_allowed(task_t *p, cpumask_t new_mask) 4806int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
4768{ 4807{
4808 struct migration_req req;
4769 unsigned long flags; 4809 unsigned long flags;
4810 struct rq *rq;
4770 int ret = 0; 4811 int ret = 0;
4771 migration_req_t req;
4772 runqueue_t *rq;
4773 4812
4774 rq = task_rq_lock(p, &flags); 4813 rq = task_rq_lock(p, &flags);
4775 if (!cpus_intersects(new_mask, cpu_online_map)) { 4814 if (!cpus_intersects(new_mask, cpu_online_map)) {
@@ -4792,9 +4831,9 @@ int set_cpus_allowed(task_t *p, cpumask_t new_mask)
4792 } 4831 }
4793out: 4832out:
4794 task_rq_unlock(rq, &flags); 4833 task_rq_unlock(rq, &flags);
4834
4795 return ret; 4835 return ret;
4796} 4836}
4797
4798EXPORT_SYMBOL_GPL(set_cpus_allowed); 4837EXPORT_SYMBOL_GPL(set_cpus_allowed);
4799 4838
4800/* 4839/*
@@ -4810,7 +4849,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed);
4810 */ 4849 */
4811static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) 4850static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
4812{ 4851{
4813 runqueue_t *rq_dest, *rq_src; 4852 struct rq *rq_dest, *rq_src;
4814 int ret = 0; 4853 int ret = 0;
4815 4854
4816 if (unlikely(cpu_is_offline(dest_cpu))) 4855 if (unlikely(cpu_is_offline(dest_cpu)))
@@ -4855,16 +4894,16 @@ out:
4855 */ 4894 */
4856static int migration_thread(void *data) 4895static int migration_thread(void *data)
4857{ 4896{
4858 runqueue_t *rq;
4859 int cpu = (long)data; 4897 int cpu = (long)data;
4898 struct rq *rq;
4860 4899
4861 rq = cpu_rq(cpu); 4900 rq = cpu_rq(cpu);
4862 BUG_ON(rq->migration_thread != current); 4901 BUG_ON(rq->migration_thread != current);
4863 4902
4864 set_current_state(TASK_INTERRUPTIBLE); 4903 set_current_state(TASK_INTERRUPTIBLE);
4865 while (!kthread_should_stop()) { 4904 while (!kthread_should_stop()) {
4905 struct migration_req *req;
4866 struct list_head *head; 4906 struct list_head *head;
4867 migration_req_t *req;
4868 4907
4869 try_to_freeze(); 4908 try_to_freeze();
4870 4909
@@ -4888,7 +4927,7 @@ static int migration_thread(void *data)
4888 set_current_state(TASK_INTERRUPTIBLE); 4927 set_current_state(TASK_INTERRUPTIBLE);
4889 continue; 4928 continue;
4890 } 4929 }
4891 req = list_entry(head->next, migration_req_t, list); 4930 req = list_entry(head->next, struct migration_req, list);
4892 list_del_init(head->next); 4931 list_del_init(head->next);
4893 4932
4894 spin_unlock(&rq->lock); 4933 spin_unlock(&rq->lock);
@@ -4913,28 +4952,28 @@ wait_to_die:
4913 4952
4914#ifdef CONFIG_HOTPLUG_CPU 4953#ifdef CONFIG_HOTPLUG_CPU
4915/* Figure out where task on dead CPU should go, use force if neccessary. */ 4954/* Figure out where task on dead CPU should go, use force if neccessary. */
4916static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *tsk) 4955static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
4917{ 4956{
4918 runqueue_t *rq;
4919 unsigned long flags; 4957 unsigned long flags;
4920 int dest_cpu;
4921 cpumask_t mask; 4958 cpumask_t mask;
4959 struct rq *rq;
4960 int dest_cpu;
4922 4961
4923restart: 4962restart:
4924 /* On same node? */ 4963 /* On same node? */
4925 mask = node_to_cpumask(cpu_to_node(dead_cpu)); 4964 mask = node_to_cpumask(cpu_to_node(dead_cpu));
4926 cpus_and(mask, mask, tsk->cpus_allowed); 4965 cpus_and(mask, mask, p->cpus_allowed);
4927 dest_cpu = any_online_cpu(mask); 4966 dest_cpu = any_online_cpu(mask);
4928 4967
4929 /* On any allowed CPU? */ 4968 /* On any allowed CPU? */
4930 if (dest_cpu == NR_CPUS) 4969 if (dest_cpu == NR_CPUS)
4931 dest_cpu = any_online_cpu(tsk->cpus_allowed); 4970 dest_cpu = any_online_cpu(p->cpus_allowed);
4932 4971
4933 /* No more Mr. Nice Guy. */ 4972 /* No more Mr. Nice Guy. */
4934 if (dest_cpu == NR_CPUS) { 4973 if (dest_cpu == NR_CPUS) {
4935 rq = task_rq_lock(tsk, &flags); 4974 rq = task_rq_lock(p, &flags);
4936 cpus_setall(tsk->cpus_allowed); 4975 cpus_setall(p->cpus_allowed);
4937 dest_cpu = any_online_cpu(tsk->cpus_allowed); 4976 dest_cpu = any_online_cpu(p->cpus_allowed);
4938 task_rq_unlock(rq, &flags); 4977 task_rq_unlock(rq, &flags);
4939 4978
4940 /* 4979 /*
@@ -4942,12 +4981,12 @@ restart:
4942 * kernel threads (both mm NULL), since they never 4981 * kernel threads (both mm NULL), since they never
4943 * leave kernel. 4982 * leave kernel.
4944 */ 4983 */
4945 if (tsk->mm && printk_ratelimit()) 4984 if (p->mm && printk_ratelimit())
4946 printk(KERN_INFO "process %d (%s) no " 4985 printk(KERN_INFO "process %d (%s) no "
4947 "longer affine to cpu%d\n", 4986 "longer affine to cpu%d\n",
4948 tsk->pid, tsk->comm, dead_cpu); 4987 p->pid, p->comm, dead_cpu);
4949 } 4988 }
4950 if (!__migrate_task(tsk, dead_cpu, dest_cpu)) 4989 if (!__migrate_task(p, dead_cpu, dest_cpu))
4951 goto restart; 4990 goto restart;
4952} 4991}
4953 4992
@@ -4958,9 +4997,9 @@ restart:
4958 * their home CPUs. So we just add the counter to another CPU's counter, 4997 * their home CPUs. So we just add the counter to another CPU's counter,
4959 * to keep the global sum constant after CPU-down: 4998 * to keep the global sum constant after CPU-down:
4960 */ 4999 */
4961static void migrate_nr_uninterruptible(runqueue_t *rq_src) 5000static void migrate_nr_uninterruptible(struct rq *rq_src)
4962{ 5001{
4963 runqueue_t *rq_dest = cpu_rq(any_online_cpu(CPU_MASK_ALL)); 5002 struct rq *rq_dest = cpu_rq(any_online_cpu(CPU_MASK_ALL));
4964 unsigned long flags; 5003 unsigned long flags;
4965 5004
4966 local_irq_save(flags); 5005 local_irq_save(flags);
@@ -4974,48 +5013,51 @@ static void migrate_nr_uninterruptible(runqueue_t *rq_src)
4974/* Run through task list and migrate tasks from the dead cpu. */ 5013/* Run through task list and migrate tasks from the dead cpu. */
4975static void migrate_live_tasks(int src_cpu) 5014static void migrate_live_tasks(int src_cpu)
4976{ 5015{
4977 struct task_struct *tsk, *t; 5016 struct task_struct *p, *t;
4978 5017
4979 write_lock_irq(&tasklist_lock); 5018 write_lock_irq(&tasklist_lock);
4980 5019
4981 do_each_thread(t, tsk) { 5020 do_each_thread(t, p) {
4982 if (tsk == current) 5021 if (p == current)
4983 continue; 5022 continue;
4984 5023
4985 if (task_cpu(tsk) == src_cpu) 5024 if (task_cpu(p) == src_cpu)
4986 move_task_off_dead_cpu(src_cpu, tsk); 5025 move_task_off_dead_cpu(src_cpu, p);
4987 } while_each_thread(t, tsk); 5026 } while_each_thread(t, p);
4988 5027
4989 write_unlock_irq(&tasklist_lock); 5028 write_unlock_irq(&tasklist_lock);
4990} 5029}
4991 5030
4992/* Schedules idle task to be the next runnable task on current CPU. 5031/* Schedules idle task to be the next runnable task on current CPU.
4993 * It does so by boosting its priority to highest possible and adding it to 5032 * It does so by boosting its priority to highest possible and adding it to
4994 * the _front_ of runqueue. Used by CPU offline code. 5033 * the _front_ of the runqueue. Used by CPU offline code.
4995 */ 5034 */
4996void sched_idle_next(void) 5035void sched_idle_next(void)
4997{ 5036{
4998 int cpu = smp_processor_id(); 5037 int this_cpu = smp_processor_id();
4999 runqueue_t *rq = this_rq(); 5038 struct rq *rq = cpu_rq(this_cpu);
5000 struct task_struct *p = rq->idle; 5039 struct task_struct *p = rq->idle;
5001 unsigned long flags; 5040 unsigned long flags;
5002 5041
5003 /* cpu has to be offline */ 5042 /* cpu has to be offline */
5004 BUG_ON(cpu_online(cpu)); 5043 BUG_ON(cpu_online(this_cpu));
5005 5044
5006 /* Strictly not necessary since rest of the CPUs are stopped by now 5045 /*
5007 * and interrupts disabled on current cpu. 5046 * Strictly not necessary since rest of the CPUs are stopped by now
5047 * and interrupts disabled on the current cpu.
5008 */ 5048 */
5009 spin_lock_irqsave(&rq->lock, flags); 5049 spin_lock_irqsave(&rq->lock, flags);
5010 5050
5011 __setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1); 5051 __setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1);
5012 /* Add idle task to _front_ of it's priority queue */ 5052
5053 /* Add idle task to the _front_ of its priority queue: */
5013 __activate_idle_task(p, rq); 5054 __activate_idle_task(p, rq);
5014 5055
5015 spin_unlock_irqrestore(&rq->lock, flags); 5056 spin_unlock_irqrestore(&rq->lock, flags);
5016} 5057}
5017 5058
5018/* Ensures that the idle task is using init_mm right before its cpu goes 5059/*
5060 * Ensures that the idle task is using init_mm right before its cpu goes
5019 * offline. 5061 * offline.
5020 */ 5062 */
5021void idle_task_exit(void) 5063void idle_task_exit(void)
@@ -5029,17 +5071,17 @@ void idle_task_exit(void)
5029 mmdrop(mm); 5071 mmdrop(mm);
5030} 5072}
5031 5073
5032static void migrate_dead(unsigned int dead_cpu, task_t *tsk) 5074static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
5033{ 5075{
5034 struct runqueue *rq = cpu_rq(dead_cpu); 5076 struct rq *rq = cpu_rq(dead_cpu);
5035 5077
5036 /* Must be exiting, otherwise would be on tasklist. */ 5078 /* Must be exiting, otherwise would be on tasklist. */
5037 BUG_ON(tsk->exit_state != EXIT_ZOMBIE && tsk->exit_state != EXIT_DEAD); 5079 BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD);
5038 5080
5039 /* Cannot have done final schedule yet: would have vanished. */ 5081 /* Cannot have done final schedule yet: would have vanished. */
5040 BUG_ON(tsk->flags & PF_DEAD); 5082 BUG_ON(p->flags & PF_DEAD);
5041 5083
5042 get_task_struct(tsk); 5084 get_task_struct(p);
5043 5085
5044 /* 5086 /*
5045 * Drop lock around migration; if someone else moves it, 5087 * Drop lock around migration; if someone else moves it,
@@ -5047,25 +5089,25 @@ static void migrate_dead(unsigned int dead_cpu, task_t *tsk)
5047 * fine. 5089 * fine.
5048 */ 5090 */
5049 spin_unlock_irq(&rq->lock); 5091 spin_unlock_irq(&rq->lock);
5050 move_task_off_dead_cpu(dead_cpu, tsk); 5092 move_task_off_dead_cpu(dead_cpu, p);
5051 spin_lock_irq(&rq->lock); 5093 spin_lock_irq(&rq->lock);
5052 5094
5053 put_task_struct(tsk); 5095 put_task_struct(p);
5054} 5096}
5055 5097
5056/* release_task() removes task from tasklist, so we won't find dead tasks. */ 5098/* release_task() removes task from tasklist, so we won't find dead tasks. */
5057static void migrate_dead_tasks(unsigned int dead_cpu) 5099static void migrate_dead_tasks(unsigned int dead_cpu)
5058{ 5100{
5059 unsigned arr, i; 5101 struct rq *rq = cpu_rq(dead_cpu);
5060 struct runqueue *rq = cpu_rq(dead_cpu); 5102 unsigned int arr, i;
5061 5103
5062 for (arr = 0; arr < 2; arr++) { 5104 for (arr = 0; arr < 2; arr++) {
5063 for (i = 0; i < MAX_PRIO; i++) { 5105 for (i = 0; i < MAX_PRIO; i++) {
5064 struct list_head *list = &rq->arrays[arr].queue[i]; 5106 struct list_head *list = &rq->arrays[arr].queue[i];
5107
5065 while (!list_empty(list)) 5108 while (!list_empty(list))
5066 migrate_dead(dead_cpu, 5109 migrate_dead(dead_cpu, list_entry(list->next,
5067 list_entry(list->next, task_t, 5110 struct task_struct, run_list));
5068 run_list));
5069 } 5111 }
5070 } 5112 }
5071} 5113}
@@ -5075,14 +5117,13 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
5075 * migration_call - callback that gets triggered when a CPU is added. 5117 * migration_call - callback that gets triggered when a CPU is added.
5076 * Here we can start up the necessary migration thread for the new CPU. 5118 * Here we can start up the necessary migration thread for the new CPU.
5077 */ 5119 */
5078static int __cpuinit migration_call(struct notifier_block *nfb, 5120static int __cpuinit
5079 unsigned long action, 5121migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5080 void *hcpu)
5081{ 5122{
5082 int cpu = (long)hcpu;
5083 struct task_struct *p; 5123 struct task_struct *p;
5084 struct runqueue *rq; 5124 int cpu = (long)hcpu;
5085 unsigned long flags; 5125 unsigned long flags;
5126 struct rq *rq;
5086 5127
5087 switch (action) { 5128 switch (action) {
5088 case CPU_UP_PREPARE: 5129 case CPU_UP_PREPARE:
@@ -5097,10 +5138,12 @@ static int __cpuinit migration_call(struct notifier_block *nfb,
5097 task_rq_unlock(rq, &flags); 5138 task_rq_unlock(rq, &flags);
5098 cpu_rq(cpu)->migration_thread = p; 5139 cpu_rq(cpu)->migration_thread = p;
5099 break; 5140 break;
5141
5100 case CPU_ONLINE: 5142 case CPU_ONLINE:
5101 /* Strictly unneccessary, as first user will wake it. */ 5143 /* Strictly unneccessary, as first user will wake it. */
5102 wake_up_process(cpu_rq(cpu)->migration_thread); 5144 wake_up_process(cpu_rq(cpu)->migration_thread);
5103 break; 5145 break;
5146
5104#ifdef CONFIG_HOTPLUG_CPU 5147#ifdef CONFIG_HOTPLUG_CPU
5105 case CPU_UP_CANCELED: 5148 case CPU_UP_CANCELED:
5106 if (!cpu_rq(cpu)->migration_thread) 5149 if (!cpu_rq(cpu)->migration_thread)
@@ -5111,6 +5154,7 @@ static int __cpuinit migration_call(struct notifier_block *nfb,
5111 kthread_stop(cpu_rq(cpu)->migration_thread); 5154 kthread_stop(cpu_rq(cpu)->migration_thread);
5112 cpu_rq(cpu)->migration_thread = NULL; 5155 cpu_rq(cpu)->migration_thread = NULL;
5113 break; 5156 break;
5157
5114 case CPU_DEAD: 5158 case CPU_DEAD:
5115 migrate_live_tasks(cpu); 5159 migrate_live_tasks(cpu);
5116 rq = cpu_rq(cpu); 5160 rq = cpu_rq(cpu);
@@ -5131,9 +5175,10 @@ static int __cpuinit migration_call(struct notifier_block *nfb,
5131 * the requestors. */ 5175 * the requestors. */
5132 spin_lock_irq(&rq->lock); 5176 spin_lock_irq(&rq->lock);
5133 while (!list_empty(&rq->migration_queue)) { 5177 while (!list_empty(&rq->migration_queue)) {
5134 migration_req_t *req; 5178 struct migration_req *req;
5179
5135 req = list_entry(rq->migration_queue.next, 5180 req = list_entry(rq->migration_queue.next,
5136 migration_req_t, list); 5181 struct migration_req, list);
5137 list_del_init(&req->list); 5182 list_del_init(&req->list);
5138 complete(&req->done); 5183 complete(&req->done);
5139 } 5184 }
@@ -5155,10 +5200,12 @@ static struct notifier_block __cpuinitdata migration_notifier = {
5155int __init migration_init(void) 5200int __init migration_init(void)
5156{ 5201{
5157 void *cpu = (void *)(long)smp_processor_id(); 5202 void *cpu = (void *)(long)smp_processor_id();
5158 /* Start one for boot CPU. */ 5203
5204 /* Start one for the boot CPU: */
5159 migration_call(&migration_notifier, CPU_UP_PREPARE, cpu); 5205 migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
5160 migration_call(&migration_notifier, CPU_ONLINE, cpu); 5206 migration_call(&migration_notifier, CPU_ONLINE, cpu);
5161 register_cpu_notifier(&migration_notifier); 5207 register_cpu_notifier(&migration_notifier);
5208
5162 return 0; 5209 return 0;
5163} 5210}
5164#endif 5211#endif
@@ -5254,7 +5301,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
5254 } while (sd); 5301 } while (sd);
5255} 5302}
5256#else 5303#else
5257#define sched_domain_debug(sd, cpu) {} 5304# define sched_domain_debug(sd, cpu) do { } while (0)
5258#endif 5305#endif
5259 5306
5260static int sd_degenerate(struct sched_domain *sd) 5307static int sd_degenerate(struct sched_domain *sd)
@@ -5280,8 +5327,8 @@ static int sd_degenerate(struct sched_domain *sd)
5280 return 1; 5327 return 1;
5281} 5328}
5282 5329
5283static int sd_parent_degenerate(struct sched_domain *sd, 5330static int
5284 struct sched_domain *parent) 5331sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
5285{ 5332{
5286 unsigned long cflags = sd->flags, pflags = parent->flags; 5333 unsigned long cflags = sd->flags, pflags = parent->flags;
5287 5334
@@ -5314,7 +5361,7 @@ static int sd_parent_degenerate(struct sched_domain *sd,
5314 */ 5361 */
5315static void cpu_attach_domain(struct sched_domain *sd, int cpu) 5362static void cpu_attach_domain(struct sched_domain *sd, int cpu)
5316{ 5363{
5317 runqueue_t *rq = cpu_rq(cpu); 5364 struct rq *rq = cpu_rq(cpu);
5318 struct sched_domain *tmp; 5365 struct sched_domain *tmp;
5319 5366
5320 /* Remove the sched domains which do not contribute to scheduling. */ 5367 /* Remove the sched domains which do not contribute to scheduling. */
@@ -5576,8 +5623,8 @@ static void touch_cache(void *__cache, unsigned long __size)
5576/* 5623/*
5577 * Measure the cache-cost of one task migration. Returns in units of nsec. 5624 * Measure the cache-cost of one task migration. Returns in units of nsec.
5578 */ 5625 */
5579static unsigned long long measure_one(void *cache, unsigned long size, 5626static unsigned long long
5580 int source, int target) 5627measure_one(void *cache, unsigned long size, int source, int target)
5581{ 5628{
5582 cpumask_t mask, saved_mask; 5629 cpumask_t mask, saved_mask;
5583 unsigned long long t0, t1, t2, t3, cost; 5630 unsigned long long t0, t1, t2, t3, cost;
@@ -5927,9 +5974,9 @@ static int find_next_best_node(int node, unsigned long *used_nodes)
5927 */ 5974 */
5928static cpumask_t sched_domain_node_span(int node) 5975static cpumask_t sched_domain_node_span(int node)
5929{ 5976{
5930 int i;
5931 cpumask_t span, nodemask;
5932 DECLARE_BITMAP(used_nodes, MAX_NUMNODES); 5977 DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
5978 cpumask_t span, nodemask;
5979 int i;
5933 5980
5934 cpus_clear(span); 5981 cpus_clear(span);
5935 bitmap_zero(used_nodes, MAX_NUMNODES); 5982 bitmap_zero(used_nodes, MAX_NUMNODES);
@@ -5940,6 +5987,7 @@ static cpumask_t sched_domain_node_span(int node)
5940 5987
5941 for (i = 1; i < SD_NODES_PER_DOMAIN; i++) { 5988 for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
5942 int next_node = find_next_best_node(node, used_nodes); 5989 int next_node = find_next_best_node(node, used_nodes);
5990
5943 nodemask = node_to_cpumask(next_node); 5991 nodemask = node_to_cpumask(next_node);
5944 cpus_or(span, span, nodemask); 5992 cpus_or(span, span, nodemask);
5945 } 5993 }
@@ -5949,19 +5997,23 @@ static cpumask_t sched_domain_node_span(int node)
5949#endif 5997#endif
5950 5998
5951int sched_smt_power_savings = 0, sched_mc_power_savings = 0; 5999int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
6000
5952/* 6001/*
5953 * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we 6002 * SMT sched-domains:
5954 * can switch it on easily if needed.
5955 */ 6003 */
5956#ifdef CONFIG_SCHED_SMT 6004#ifdef CONFIG_SCHED_SMT
5957static DEFINE_PER_CPU(struct sched_domain, cpu_domains); 6005static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
5958static struct sched_group sched_group_cpus[NR_CPUS]; 6006static struct sched_group sched_group_cpus[NR_CPUS];
6007
5959static int cpu_to_cpu_group(int cpu) 6008static int cpu_to_cpu_group(int cpu)
5960{ 6009{
5961 return cpu; 6010 return cpu;
5962} 6011}
5963#endif 6012#endif
5964 6013
6014/*
6015 * multi-core sched-domains:
6016 */
5965#ifdef CONFIG_SCHED_MC 6017#ifdef CONFIG_SCHED_MC
5966static DEFINE_PER_CPU(struct sched_domain, core_domains); 6018static DEFINE_PER_CPU(struct sched_domain, core_domains);
5967static struct sched_group *sched_group_core_bycpu[NR_CPUS]; 6019static struct sched_group *sched_group_core_bycpu[NR_CPUS];
@@ -5981,9 +6033,10 @@ static int cpu_to_core_group(int cpu)
5981 6033
5982static DEFINE_PER_CPU(struct sched_domain, phys_domains); 6034static DEFINE_PER_CPU(struct sched_domain, phys_domains);
5983static struct sched_group *sched_group_phys_bycpu[NR_CPUS]; 6035static struct sched_group *sched_group_phys_bycpu[NR_CPUS];
6036
5984static int cpu_to_phys_group(int cpu) 6037static int cpu_to_phys_group(int cpu)
5985{ 6038{
5986#if defined(CONFIG_SCHED_MC) 6039#ifdef CONFIG_SCHED_MC
5987 cpumask_t mask = cpu_coregroup_map(cpu); 6040 cpumask_t mask = cpu_coregroup_map(cpu);
5988 return first_cpu(mask); 6041 return first_cpu(mask);
5989#elif defined(CONFIG_SCHED_SMT) 6042#elif defined(CONFIG_SCHED_SMT)
@@ -6529,6 +6582,7 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
6529int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) 6582int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
6530{ 6583{
6531 int err = 0; 6584 int err = 0;
6585
6532#ifdef CONFIG_SCHED_SMT 6586#ifdef CONFIG_SCHED_SMT
6533 if (smt_capable()) 6587 if (smt_capable())
6534 err = sysfs_create_file(&cls->kset.kobj, 6588 err = sysfs_create_file(&cls->kset.kobj,
@@ -6548,7 +6602,8 @@ static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page)
6548{ 6602{
6549 return sprintf(page, "%u\n", sched_mc_power_savings); 6603 return sprintf(page, "%u\n", sched_mc_power_savings);
6550} 6604}
6551static ssize_t sched_mc_power_savings_store(struct sys_device *dev, const char *buf, size_t count) 6605static ssize_t sched_mc_power_savings_store(struct sys_device *dev,
6606 const char *buf, size_t count)
6552{ 6607{
6553 return sched_power_savings_store(buf, count, 0); 6608 return sched_power_savings_store(buf, count, 0);
6554} 6609}
@@ -6561,7 +6616,8 @@ static ssize_t sched_smt_power_savings_show(struct sys_device *dev, char *page)
6561{ 6616{
6562 return sprintf(page, "%u\n", sched_smt_power_savings); 6617 return sprintf(page, "%u\n", sched_smt_power_savings);
6563} 6618}
6564static ssize_t sched_smt_power_savings_store(struct sys_device *dev, const char *buf, size_t count) 6619static ssize_t sched_smt_power_savings_store(struct sys_device *dev,
6620 const char *buf, size_t count)
6565{ 6621{
6566 return sched_power_savings_store(buf, count, 1); 6622 return sched_power_savings_store(buf, count, 1);
6567} 6623}
@@ -6623,6 +6679,7 @@ int in_sched_functions(unsigned long addr)
6623{ 6679{
6624 /* Linker adds these: start and end of __sched functions */ 6680 /* Linker adds these: start and end of __sched functions */
6625 extern char __sched_text_start[], __sched_text_end[]; 6681 extern char __sched_text_start[], __sched_text_end[];
6682
6626 return in_lock_functions(addr) || 6683 return in_lock_functions(addr) ||
6627 (addr >= (unsigned long)__sched_text_start 6684 (addr >= (unsigned long)__sched_text_start
6628 && addr < (unsigned long)__sched_text_end); 6685 && addr < (unsigned long)__sched_text_end);
@@ -6630,14 +6687,15 @@ int in_sched_functions(unsigned long addr)
6630 6687
6631void __init sched_init(void) 6688void __init sched_init(void)
6632{ 6689{
6633 runqueue_t *rq;
6634 int i, j, k; 6690 int i, j, k;
6635 6691
6636 for_each_possible_cpu(i) { 6692 for_each_possible_cpu(i) {
6637 prio_array_t *array; 6693 struct prio_array *array;
6694 struct rq *rq;
6638 6695
6639 rq = cpu_rq(i); 6696 rq = cpu_rq(i);
6640 spin_lock_init(&rq->lock); 6697 spin_lock_init(&rq->lock);
6698 lockdep_set_class(&rq->lock, &rq->rq_lock_key);
6641 rq->nr_running = 0; 6699 rq->nr_running = 0;
6642 rq->active = rq->arrays; 6700 rq->active = rq->arrays;
6643 rq->expired = rq->arrays + 1; 6701 rq->expired = rq->arrays + 1;
@@ -6684,7 +6742,7 @@ void __init sched_init(void)
6684#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP 6742#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
6685void __might_sleep(char *file, int line) 6743void __might_sleep(char *file, int line)
6686{ 6744{
6687#if defined(in_atomic) 6745#ifdef in_atomic
6688 static unsigned long prev_jiffy; /* ratelimiting */ 6746 static unsigned long prev_jiffy; /* ratelimiting */
6689 6747
6690 if ((in_atomic() || irqs_disabled()) && 6748 if ((in_atomic() || irqs_disabled()) &&
@@ -6706,10 +6764,10 @@ EXPORT_SYMBOL(__might_sleep);
6706#ifdef CONFIG_MAGIC_SYSRQ 6764#ifdef CONFIG_MAGIC_SYSRQ
6707void normalize_rt_tasks(void) 6765void normalize_rt_tasks(void)
6708{ 6766{
6767 struct prio_array *array;
6709 struct task_struct *p; 6768 struct task_struct *p;
6710 prio_array_t *array;
6711 unsigned long flags; 6769 unsigned long flags;
6712 runqueue_t *rq; 6770 struct rq *rq;
6713 6771
6714 read_lock_irq(&tasklist_lock); 6772 read_lock_irq(&tasklist_lock);
6715 for_each_process(p) { 6773 for_each_process(p) {
@@ -6753,7 +6811,7 @@ void normalize_rt_tasks(void)
6753 * 6811 *
6754 * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED! 6812 * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
6755 */ 6813 */
6756task_t *curr_task(int cpu) 6814struct task_struct *curr_task(int cpu)
6757{ 6815{
6758 return cpu_curr(cpu); 6816 return cpu_curr(cpu);
6759} 6817}
@@ -6773,7 +6831,7 @@ task_t *curr_task(int cpu)
6773 * 6831 *
6774 * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED! 6832 * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
6775 */ 6833 */
6776void set_curr_task(int cpu, task_t *p) 6834void set_curr_task(int cpu, struct task_struct *p)
6777{ 6835{
6778 cpu_curr(cpu) = p; 6836 cpu_curr(cpu) = p;
6779} 6837}
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 8f03e3b89b55..215541e26c1a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -62,6 +62,119 @@ static inline void wakeup_softirqd(void)
62} 62}
63 63
64/* 64/*
65 * This one is for softirq.c-internal use,
66 * where hardirqs are disabled legitimately:
67 */
68static void __local_bh_disable(unsigned long ip)
69{
70 unsigned long flags;
71
72 WARN_ON_ONCE(in_irq());
73
74 raw_local_irq_save(flags);
75 add_preempt_count(SOFTIRQ_OFFSET);
76 /*
77 * Were softirqs turned off above:
78 */
79 if (softirq_count() == SOFTIRQ_OFFSET)
80 trace_softirqs_off(ip);
81 raw_local_irq_restore(flags);
82}
83
84void local_bh_disable(void)
85{
86 __local_bh_disable((unsigned long)__builtin_return_address(0));
87}
88
89EXPORT_SYMBOL(local_bh_disable);
90
91void __local_bh_enable(void)
92{
93 WARN_ON_ONCE(in_irq());
94
95 /*
96 * softirqs should never be enabled by __local_bh_enable(),
97 * it always nests inside local_bh_enable() sections:
98 */
99 WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
100
101 sub_preempt_count(SOFTIRQ_OFFSET);
102}
103EXPORT_SYMBOL_GPL(__local_bh_enable);
104
105/*
106 * Special-case - softirqs can safely be enabled in
107 * cond_resched_softirq(), or by __do_softirq(),
108 * without processing still-pending softirqs:
109 */
110void _local_bh_enable(void)
111{
112 WARN_ON_ONCE(in_irq());
113 WARN_ON_ONCE(!irqs_disabled());
114
115 if (softirq_count() == SOFTIRQ_OFFSET)
116 trace_softirqs_on((unsigned long)__builtin_return_address(0));
117 sub_preempt_count(SOFTIRQ_OFFSET);
118}
119
120EXPORT_SYMBOL(_local_bh_enable);
121
122void local_bh_enable(void)
123{
124 unsigned long flags;
125
126 WARN_ON_ONCE(in_irq());
127 WARN_ON_ONCE(irqs_disabled());
128
129 local_irq_save(flags);
130 /*
131 * Are softirqs going to be turned on now:
132 */
133 if (softirq_count() == SOFTIRQ_OFFSET)
134 trace_softirqs_on((unsigned long)__builtin_return_address(0));
135 /*
136 * Keep preemption disabled until we are done with
137 * softirq processing:
138 */
139 sub_preempt_count(SOFTIRQ_OFFSET - 1);
140
141 if (unlikely(!in_interrupt() && local_softirq_pending()))
142 do_softirq();
143
144 dec_preempt_count();
145 local_irq_restore(flags);
146 preempt_check_resched();
147}
148EXPORT_SYMBOL(local_bh_enable);
149
150void local_bh_enable_ip(unsigned long ip)
151{
152 unsigned long flags;
153
154 WARN_ON_ONCE(in_irq());
155
156 local_irq_save(flags);
157 /*
158 * Are softirqs going to be turned on now:
159 */
160 if (softirq_count() == SOFTIRQ_OFFSET)
161 trace_softirqs_on(ip);
162 /*
163 * Keep preemption disabled until we are done with
164 * softirq processing:
165 */
166 sub_preempt_count(SOFTIRQ_OFFSET - 1);
167
168 if (unlikely(!in_interrupt() && local_softirq_pending()))
169 do_softirq();
170
171 dec_preempt_count();
172 local_irq_restore(flags);
173 preempt_check_resched();
174}
175EXPORT_SYMBOL(local_bh_enable_ip);
176
177/*
65 * We restart softirq processing MAX_SOFTIRQ_RESTART times, 178 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
66 * and we fall back to softirqd after that. 179 * and we fall back to softirqd after that.
67 * 180 *
@@ -80,8 +193,11 @@ asmlinkage void __do_softirq(void)
80 int cpu; 193 int cpu;
81 194
82 pending = local_softirq_pending(); 195 pending = local_softirq_pending();
196 account_system_vtime(current);
197
198 __local_bh_disable((unsigned long)__builtin_return_address(0));
199 trace_softirq_enter();
83 200
84 local_bh_disable();
85 cpu = smp_processor_id(); 201 cpu = smp_processor_id();
86restart: 202restart:
87 /* Reset the pending bitmask before enabling irqs */ 203 /* Reset the pending bitmask before enabling irqs */
@@ -109,7 +225,10 @@ restart:
109 if (pending) 225 if (pending)
110 wakeup_softirqd(); 226 wakeup_softirqd();
111 227
112 __local_bh_enable(); 228 trace_softirq_exit();
229
230 account_system_vtime(current);
231 _local_bh_enable();
113} 232}
114 233
115#ifndef __ARCH_HAS_DO_SOFTIRQ 234#ifndef __ARCH_HAS_DO_SOFTIRQ
@@ -136,23 +255,6 @@ EXPORT_SYMBOL(do_softirq);
136 255
137#endif 256#endif
138 257
139void local_bh_enable(void)
140{
141 WARN_ON(irqs_disabled());
142 /*
143 * Keep preemption disabled until we are done with
144 * softirq processing:
145 */
146 sub_preempt_count(SOFTIRQ_OFFSET - 1);
147
148 if (unlikely(!in_interrupt() && local_softirq_pending()))
149 do_softirq();
150
151 dec_preempt_count();
152 preempt_check_resched();
153}
154EXPORT_SYMBOL(local_bh_enable);
155
156#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED 258#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
157# define invoke_softirq() __do_softirq() 259# define invoke_softirq() __do_softirq()
158#else 260#else
@@ -165,6 +267,7 @@ EXPORT_SYMBOL(local_bh_enable);
165void irq_exit(void) 267void irq_exit(void)
166{ 268{
167 account_system_vtime(current); 269 account_system_vtime(current);
270 trace_hardirq_exit();
168 sub_preempt_count(IRQ_EXIT_OFFSET); 271 sub_preempt_count(IRQ_EXIT_OFFSET);
169 if (!in_interrupt() && local_softirq_pending()) 272 if (!in_interrupt() && local_softirq_pending())
170 invoke_softirq(); 273 invoke_softirq();
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index b31e54eadf56..bfd6ad9c0330 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -13,6 +13,7 @@
13#include <linux/preempt.h> 13#include <linux/preempt.h>
14#include <linux/spinlock.h> 14#include <linux/spinlock.h>
15#include <linux/interrupt.h> 15#include <linux/interrupt.h>
16#include <linux/debug_locks.h>
16#include <linux/module.h> 17#include <linux/module.h>
17 18
18/* 19/*
@@ -29,8 +30,10 @@ EXPORT_SYMBOL(generic__raw_read_trylock);
29int __lockfunc _spin_trylock(spinlock_t *lock) 30int __lockfunc _spin_trylock(spinlock_t *lock)
30{ 31{
31 preempt_disable(); 32 preempt_disable();
32 if (_raw_spin_trylock(lock)) 33 if (_raw_spin_trylock(lock)) {
34 spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
33 return 1; 35 return 1;
36 }
34 37
35 preempt_enable(); 38 preempt_enable();
36 return 0; 39 return 0;
@@ -40,8 +43,10 @@ EXPORT_SYMBOL(_spin_trylock);
40int __lockfunc _read_trylock(rwlock_t *lock) 43int __lockfunc _read_trylock(rwlock_t *lock)
41{ 44{
42 preempt_disable(); 45 preempt_disable();
43 if (_raw_read_trylock(lock)) 46 if (_raw_read_trylock(lock)) {
47 rwlock_acquire_read(&lock->dep_map, 0, 1, _RET_IP_);
44 return 1; 48 return 1;
49 }
45 50
46 preempt_enable(); 51 preempt_enable();
47 return 0; 52 return 0;
@@ -51,19 +56,28 @@ EXPORT_SYMBOL(_read_trylock);
51int __lockfunc _write_trylock(rwlock_t *lock) 56int __lockfunc _write_trylock(rwlock_t *lock)
52{ 57{
53 preempt_disable(); 58 preempt_disable();
54 if (_raw_write_trylock(lock)) 59 if (_raw_write_trylock(lock)) {
60 rwlock_acquire(&lock->dep_map, 0, 1, _RET_IP_);
55 return 1; 61 return 1;
62 }
56 63
57 preempt_enable(); 64 preempt_enable();
58 return 0; 65 return 0;
59} 66}
60EXPORT_SYMBOL(_write_trylock); 67EXPORT_SYMBOL(_write_trylock);
61 68
62#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) 69/*
70 * If lockdep is enabled then we use the non-preemption spin-ops
71 * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
72 * not re-enabled during lock-acquire (which the preempt-spin-ops do):
73 */
74#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) || \
75 defined(CONFIG_PROVE_LOCKING)
63 76
64void __lockfunc _read_lock(rwlock_t *lock) 77void __lockfunc _read_lock(rwlock_t *lock)
65{ 78{
66 preempt_disable(); 79 preempt_disable();
80 rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
67 _raw_read_lock(lock); 81 _raw_read_lock(lock);
68} 82}
69EXPORT_SYMBOL(_read_lock); 83EXPORT_SYMBOL(_read_lock);
@@ -74,7 +88,17 @@ unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
74 88
75 local_irq_save(flags); 89 local_irq_save(flags);
76 preempt_disable(); 90 preempt_disable();
91 spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
92 /*
93 * On lockdep we dont want the hand-coded irq-enable of
94 * _raw_spin_lock_flags() code, because lockdep assumes
95 * that interrupts are not re-enabled during lock-acquire:
96 */
97#ifdef CONFIG_PROVE_LOCKING
98 _raw_spin_lock(lock);
99#else
77 _raw_spin_lock_flags(lock, &flags); 100 _raw_spin_lock_flags(lock, &flags);
101#endif
78 return flags; 102 return flags;
79} 103}
80EXPORT_SYMBOL(_spin_lock_irqsave); 104EXPORT_SYMBOL(_spin_lock_irqsave);
@@ -83,6 +107,7 @@ void __lockfunc _spin_lock_irq(spinlock_t *lock)
83{ 107{
84 local_irq_disable(); 108 local_irq_disable();
85 preempt_disable(); 109 preempt_disable();
110 spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
86 _raw_spin_lock(lock); 111 _raw_spin_lock(lock);
87} 112}
88EXPORT_SYMBOL(_spin_lock_irq); 113EXPORT_SYMBOL(_spin_lock_irq);
@@ -91,6 +116,7 @@ void __lockfunc _spin_lock_bh(spinlock_t *lock)
91{ 116{
92 local_bh_disable(); 117 local_bh_disable();
93 preempt_disable(); 118 preempt_disable();
119 spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
94 _raw_spin_lock(lock); 120 _raw_spin_lock(lock);
95} 121}
96EXPORT_SYMBOL(_spin_lock_bh); 122EXPORT_SYMBOL(_spin_lock_bh);
@@ -101,6 +127,7 @@ unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
101 127
102 local_irq_save(flags); 128 local_irq_save(flags);
103 preempt_disable(); 129 preempt_disable();
130 rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
104 _raw_read_lock(lock); 131 _raw_read_lock(lock);
105 return flags; 132 return flags;
106} 133}
@@ -110,6 +137,7 @@ void __lockfunc _read_lock_irq(rwlock_t *lock)
110{ 137{
111 local_irq_disable(); 138 local_irq_disable();
112 preempt_disable(); 139 preempt_disable();
140 rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
113 _raw_read_lock(lock); 141 _raw_read_lock(lock);
114} 142}
115EXPORT_SYMBOL(_read_lock_irq); 143EXPORT_SYMBOL(_read_lock_irq);
@@ -118,6 +146,7 @@ void __lockfunc _read_lock_bh(rwlock_t *lock)
118{ 146{
119 local_bh_disable(); 147 local_bh_disable();
120 preempt_disable(); 148 preempt_disable();
149 rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
121 _raw_read_lock(lock); 150 _raw_read_lock(lock);
122} 151}
123EXPORT_SYMBOL(_read_lock_bh); 152EXPORT_SYMBOL(_read_lock_bh);
@@ -128,6 +157,7 @@ unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
128 157
129 local_irq_save(flags); 158 local_irq_save(flags);
130 preempt_disable(); 159 preempt_disable();
160 rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
131 _raw_write_lock(lock); 161 _raw_write_lock(lock);
132 return flags; 162 return flags;
133} 163}
@@ -137,6 +167,7 @@ void __lockfunc _write_lock_irq(rwlock_t *lock)
137{ 167{
138 local_irq_disable(); 168 local_irq_disable();
139 preempt_disable(); 169 preempt_disable();
170 rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
140 _raw_write_lock(lock); 171 _raw_write_lock(lock);
141} 172}
142EXPORT_SYMBOL(_write_lock_irq); 173EXPORT_SYMBOL(_write_lock_irq);
@@ -145,6 +176,7 @@ void __lockfunc _write_lock_bh(rwlock_t *lock)
145{ 176{
146 local_bh_disable(); 177 local_bh_disable();
147 preempt_disable(); 178 preempt_disable();
179 rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
148 _raw_write_lock(lock); 180 _raw_write_lock(lock);
149} 181}
150EXPORT_SYMBOL(_write_lock_bh); 182EXPORT_SYMBOL(_write_lock_bh);
@@ -152,6 +184,7 @@ EXPORT_SYMBOL(_write_lock_bh);
152void __lockfunc _spin_lock(spinlock_t *lock) 184void __lockfunc _spin_lock(spinlock_t *lock)
153{ 185{
154 preempt_disable(); 186 preempt_disable();
187 spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
155 _raw_spin_lock(lock); 188 _raw_spin_lock(lock);
156} 189}
157 190
@@ -160,6 +193,7 @@ EXPORT_SYMBOL(_spin_lock);
160void __lockfunc _write_lock(rwlock_t *lock) 193void __lockfunc _write_lock(rwlock_t *lock)
161{ 194{
162 preempt_disable(); 195 preempt_disable();
196 rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
163 _raw_write_lock(lock); 197 _raw_write_lock(lock);
164} 198}
165 199
@@ -255,8 +289,22 @@ BUILD_LOCK_OPS(write, rwlock);
255 289
256#endif /* CONFIG_PREEMPT */ 290#endif /* CONFIG_PREEMPT */
257 291
292#ifdef CONFIG_DEBUG_LOCK_ALLOC
293
294void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
295{
296 preempt_disable();
297 spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
298 _raw_spin_lock(lock);
299}
300
301EXPORT_SYMBOL(_spin_lock_nested);
302
303#endif
304
258void __lockfunc _spin_unlock(spinlock_t *lock) 305void __lockfunc _spin_unlock(spinlock_t *lock)
259{ 306{
307 spin_release(&lock->dep_map, 1, _RET_IP_);
260 _raw_spin_unlock(lock); 308 _raw_spin_unlock(lock);
261 preempt_enable(); 309 preempt_enable();
262} 310}
@@ -264,6 +312,7 @@ EXPORT_SYMBOL(_spin_unlock);
264 312
265void __lockfunc _write_unlock(rwlock_t *lock) 313void __lockfunc _write_unlock(rwlock_t *lock)
266{ 314{
315 rwlock_release(&lock->dep_map, 1, _RET_IP_);
267 _raw_write_unlock(lock); 316 _raw_write_unlock(lock);
268 preempt_enable(); 317 preempt_enable();
269} 318}
@@ -271,6 +320,7 @@ EXPORT_SYMBOL(_write_unlock);
271 320
272void __lockfunc _read_unlock(rwlock_t *lock) 321void __lockfunc _read_unlock(rwlock_t *lock)
273{ 322{
323 rwlock_release(&lock->dep_map, 1, _RET_IP_);
274 _raw_read_unlock(lock); 324 _raw_read_unlock(lock);
275 preempt_enable(); 325 preempt_enable();
276} 326}
@@ -278,6 +328,7 @@ EXPORT_SYMBOL(_read_unlock);
278 328
279void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) 329void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
280{ 330{
331 spin_release(&lock->dep_map, 1, _RET_IP_);
281 _raw_spin_unlock(lock); 332 _raw_spin_unlock(lock);
282 local_irq_restore(flags); 333 local_irq_restore(flags);
283 preempt_enable(); 334 preempt_enable();
@@ -286,6 +337,7 @@ EXPORT_SYMBOL(_spin_unlock_irqrestore);
286 337
287void __lockfunc _spin_unlock_irq(spinlock_t *lock) 338void __lockfunc _spin_unlock_irq(spinlock_t *lock)
288{ 339{
340 spin_release(&lock->dep_map, 1, _RET_IP_);
289 _raw_spin_unlock(lock); 341 _raw_spin_unlock(lock);
290 local_irq_enable(); 342 local_irq_enable();
291 preempt_enable(); 343 preempt_enable();
@@ -294,14 +346,16 @@ EXPORT_SYMBOL(_spin_unlock_irq);
294 346
295void __lockfunc _spin_unlock_bh(spinlock_t *lock) 347void __lockfunc _spin_unlock_bh(spinlock_t *lock)
296{ 348{
349 spin_release(&lock->dep_map, 1, _RET_IP_);
297 _raw_spin_unlock(lock); 350 _raw_spin_unlock(lock);
298 preempt_enable_no_resched(); 351 preempt_enable_no_resched();
299 local_bh_enable(); 352 local_bh_enable_ip((unsigned long)__builtin_return_address(0));
300} 353}
301EXPORT_SYMBOL(_spin_unlock_bh); 354EXPORT_SYMBOL(_spin_unlock_bh);
302 355
303void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) 356void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
304{ 357{
358 rwlock_release(&lock->dep_map, 1, _RET_IP_);
305 _raw_read_unlock(lock); 359 _raw_read_unlock(lock);
306 local_irq_restore(flags); 360 local_irq_restore(flags);
307 preempt_enable(); 361 preempt_enable();
@@ -310,6 +364,7 @@ EXPORT_SYMBOL(_read_unlock_irqrestore);
310 364
311void __lockfunc _read_unlock_irq(rwlock_t *lock) 365void __lockfunc _read_unlock_irq(rwlock_t *lock)
312{ 366{
367 rwlock_release(&lock->dep_map, 1, _RET_IP_);
313 _raw_read_unlock(lock); 368 _raw_read_unlock(lock);
314 local_irq_enable(); 369 local_irq_enable();
315 preempt_enable(); 370 preempt_enable();
@@ -318,14 +373,16 @@ EXPORT_SYMBOL(_read_unlock_irq);
318 373
319void __lockfunc _read_unlock_bh(rwlock_t *lock) 374void __lockfunc _read_unlock_bh(rwlock_t *lock)
320{ 375{
376 rwlock_release(&lock->dep_map, 1, _RET_IP_);
321 _raw_read_unlock(lock); 377 _raw_read_unlock(lock);
322 preempt_enable_no_resched(); 378 preempt_enable_no_resched();
323 local_bh_enable(); 379 local_bh_enable_ip((unsigned long)__builtin_return_address(0));
324} 380}
325EXPORT_SYMBOL(_read_unlock_bh); 381EXPORT_SYMBOL(_read_unlock_bh);
326 382
327void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) 383void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
328{ 384{
385 rwlock_release(&lock->dep_map, 1, _RET_IP_);
329 _raw_write_unlock(lock); 386 _raw_write_unlock(lock);
330 local_irq_restore(flags); 387 local_irq_restore(flags);
331 preempt_enable(); 388 preempt_enable();
@@ -334,6 +391,7 @@ EXPORT_SYMBOL(_write_unlock_irqrestore);
334 391
335void __lockfunc _write_unlock_irq(rwlock_t *lock) 392void __lockfunc _write_unlock_irq(rwlock_t *lock)
336{ 393{
394 rwlock_release(&lock->dep_map, 1, _RET_IP_);
337 _raw_write_unlock(lock); 395 _raw_write_unlock(lock);
338 local_irq_enable(); 396 local_irq_enable();
339 preempt_enable(); 397 preempt_enable();
@@ -342,9 +400,10 @@ EXPORT_SYMBOL(_write_unlock_irq);
342 400
343void __lockfunc _write_unlock_bh(rwlock_t *lock) 401void __lockfunc _write_unlock_bh(rwlock_t *lock)
344{ 402{
403 rwlock_release(&lock->dep_map, 1, _RET_IP_);
345 _raw_write_unlock(lock); 404 _raw_write_unlock(lock);
346 preempt_enable_no_resched(); 405 preempt_enable_no_resched();
347 local_bh_enable(); 406 local_bh_enable_ip((unsigned long)__builtin_return_address(0));
348} 407}
349EXPORT_SYMBOL(_write_unlock_bh); 408EXPORT_SYMBOL(_write_unlock_bh);
350 409
@@ -352,11 +411,13 @@ int __lockfunc _spin_trylock_bh(spinlock_t *lock)
352{ 411{
353 local_bh_disable(); 412 local_bh_disable();
354 preempt_disable(); 413 preempt_disable();
355 if (_raw_spin_trylock(lock)) 414 if (_raw_spin_trylock(lock)) {
415 spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
356 return 1; 416 return 1;
417 }
357 418
358 preempt_enable_no_resched(); 419 preempt_enable_no_resched();
359 local_bh_enable(); 420 local_bh_enable_ip((unsigned long)__builtin_return_address(0));
360 return 0; 421 return 0;
361} 422}
362EXPORT_SYMBOL(_spin_trylock_bh); 423EXPORT_SYMBOL(_spin_trylock_bh);
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
new file mode 100644
index 000000000000..b71816e47a30
--- /dev/null
+++ b/kernel/stacktrace.c
@@ -0,0 +1,24 @@
1/*
2 * kernel/stacktrace.c
3 *
4 * Stack trace management functions
5 *
6 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7 */
8#include <linux/sched.h>
9#include <linux/kallsyms.h>
10#include <linux/stacktrace.h>
11
12void print_stack_trace(struct stack_trace *trace, int spaces)
13{
14 int i, j;
15
16 for (i = 0; i < trace->nr_entries; i++) {
17 unsigned long ip = trace->entries[i];
18
19 for (j = 0; j < spaces + 1; j++)
20 printk(" ");
21 print_ip_sym(ip);
22 }
23}
24
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 2c0aacc37c55..dcfb5d731466 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -4,7 +4,6 @@
4#include <linux/cpu.h> 4#include <linux/cpu.h>
5#include <linux/err.h> 5#include <linux/err.h>
6#include <linux/syscalls.h> 6#include <linux/syscalls.h>
7#include <linux/kthread.h>
8#include <asm/atomic.h> 7#include <asm/atomic.h>
9#include <asm/semaphore.h> 8#include <asm/semaphore.h>
10#include <asm/uaccess.h> 9#include <asm/uaccess.h>
@@ -26,11 +25,13 @@ static unsigned int stopmachine_num_threads;
26static atomic_t stopmachine_thread_ack; 25static atomic_t stopmachine_thread_ack;
27static DECLARE_MUTEX(stopmachine_mutex); 26static DECLARE_MUTEX(stopmachine_mutex);
28 27
29static int stopmachine(void *unused) 28static int stopmachine(void *cpu)
30{ 29{
31 int irqs_disabled = 0; 30 int irqs_disabled = 0;
32 int prepared = 0; 31 int prepared = 0;
33 32
33 set_cpus_allowed(current, cpumask_of_cpu((int)(long)cpu));
34
34 /* Ack: we are alive */ 35 /* Ack: we are alive */
35 smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */ 36 smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
36 atomic_inc(&stopmachine_thread_ack); 37 atomic_inc(&stopmachine_thread_ack);
@@ -84,8 +85,7 @@ static void stopmachine_set_state(enum stopmachine_state state)
84 85
85static int stop_machine(void) 86static int stop_machine(void)
86{ 87{
87 int ret = 0; 88 int i, ret = 0;
88 unsigned int i;
89 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 89 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
90 90
91 /* One high-prio thread per cpu. We'll do this one. */ 91 /* One high-prio thread per cpu. We'll do this one. */
@@ -96,16 +96,11 @@ static int stop_machine(void)
96 stopmachine_state = STOPMACHINE_WAIT; 96 stopmachine_state = STOPMACHINE_WAIT;
97 97
98 for_each_online_cpu(i) { 98 for_each_online_cpu(i) {
99 struct task_struct *tsk;
100 if (i == raw_smp_processor_id()) 99 if (i == raw_smp_processor_id())
101 continue; 100 continue;
102 tsk = kthread_create(stopmachine, NULL, "stopmachine"); 101 ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
103 if (IS_ERR(tsk)) { 102 if (ret < 0)
104 ret = PTR_ERR(tsk);
105 break; 103 break;
106 }
107 kthread_bind(tsk, i);
108 wake_up_process(tsk);
109 stopmachine_num_threads++; 104 stopmachine_num_threads++;
110 } 105 }
111 106
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 99a58f279077..362a0cc37138 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -932,6 +932,17 @@ static ctl_table vm_table[] = {
932 .strategy = &sysctl_intvec, 932 .strategy = &sysctl_intvec,
933 .extra1 = &zero, 933 .extra1 = &zero,
934 }, 934 },
935 {
936 .ctl_name = VM_MIN_UNMAPPED,
937 .procname = "min_unmapped_ratio",
938 .data = &sysctl_min_unmapped_ratio,
939 .maxlen = sizeof(sysctl_min_unmapped_ratio),
940 .mode = 0644,
941 .proc_handler = &sysctl_min_unmapped_ratio_sysctl_handler,
942 .strategy = &sysctl_intvec,
943 .extra1 = &zero,
944 .extra2 = &one_hundred,
945 },
935#endif 946#endif
936#ifdef CONFIG_X86_32 947#ifdef CONFIG_X86_32
937 { 948 {
diff --git a/kernel/timer.c b/kernel/timer.c
index 5a8960253063..396a3c024c2c 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1208,7 +1208,7 @@ unsigned long wall_jiffies = INITIAL_JIFFIES;
1208 * playing with xtime and avenrun. 1208 * playing with xtime and avenrun.
1209 */ 1209 */
1210#ifndef ARCH_HAVE_XTIME_LOCK 1210#ifndef ARCH_HAVE_XTIME_LOCK
1211seqlock_t xtime_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED; 1211__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
1212 1212
1213EXPORT_SYMBOL(xtime_lock); 1213EXPORT_SYMBOL(xtime_lock);
1214#endif 1214#endif
@@ -1368,7 +1368,7 @@ asmlinkage long sys_getegid(void)
1368 1368
1369static void process_timeout(unsigned long __data) 1369static void process_timeout(unsigned long __data)
1370{ 1370{
1371 wake_up_process((task_t *)__data); 1371 wake_up_process((struct task_struct *)__data);
1372} 1372}
1373 1373
1374/** 1374/**
@@ -1559,6 +1559,13 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info)
1559 return 0; 1559 return 0;
1560} 1560}
1561 1561
1562/*
1563 * lockdep: we want to track each per-CPU base as a separate lock-class,
1564 * but timer-bases are kmalloc()-ed, so we need to attach separate
1565 * keys to them:
1566 */
1567static struct lock_class_key base_lock_keys[NR_CPUS];
1568
1562static int __devinit init_timers_cpu(int cpu) 1569static int __devinit init_timers_cpu(int cpu)
1563{ 1570{
1564 int j; 1571 int j;
@@ -1594,6 +1601,8 @@ static int __devinit init_timers_cpu(int cpu)
1594 } 1601 }
1595 1602
1596 spin_lock_init(&base->lock); 1603 spin_lock_init(&base->lock);
1604 lockdep_set_class(&base->lock, base_lock_keys + cpu);
1605
1597 for (j = 0; j < TVN_SIZE; j++) { 1606 for (j = 0; j < TVN_SIZE; j++) {
1598 INIT_LIST_HEAD(base->tv5.vec + j); 1607 INIT_LIST_HEAD(base->tv5.vec + j);
1599 INIT_LIST_HEAD(base->tv4.vec + j); 1608 INIT_LIST_HEAD(base->tv4.vec + j);
diff --git a/kernel/wait.c b/kernel/wait.c
index 5985d866531f..a1d57aeb7f75 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -10,6 +10,10 @@
10#include <linux/wait.h> 10#include <linux/wait.h>
11#include <linux/hash.h> 11#include <linux/hash.h>
12 12
13struct lock_class_key waitqueue_lock_key;
14
15EXPORT_SYMBOL(waitqueue_lock_key);
16
13void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) 17void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
14{ 18{
15 unsigned long flags; 19 unsigned long flags;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 59f0b42bd89e..90d2c6001659 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -51,7 +51,7 @@ struct cpu_workqueue_struct {
51 wait_queue_head_t work_done; 51 wait_queue_head_t work_done;
52 52
53 struct workqueue_struct *wq; 53 struct workqueue_struct *wq;
54 task_t *thread; 54 struct task_struct *thread;
55 55
56 int run_depth; /* Detect run_workqueue() recursion depth */ 56 int run_depth; /* Detect run_workqueue() recursion depth */
57} ____cacheline_aligned; 57} ____cacheline_aligned;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e4fcbd12cf6e..e5889b1a33ff 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -48,7 +48,7 @@ config DEBUG_KERNEL
48config LOG_BUF_SHIFT 48config LOG_BUF_SHIFT
49 int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" if DEBUG_KERNEL 49 int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" if DEBUG_KERNEL
50 range 12 21 50 range 12 21
51 default 17 if S390 51 default 17 if S390 || LOCKDEP
52 default 16 if X86_NUMAQ || IA64 52 default 16 if X86_NUMAQ || IA64
53 default 15 if SMP 53 default 15 if SMP
54 default 14 54 default 14
@@ -107,7 +107,7 @@ config DEBUG_SLAB_LEAK
107 107
108config DEBUG_PREEMPT 108config DEBUG_PREEMPT
109 bool "Debug preemptible kernel" 109 bool "Debug preemptible kernel"
110 depends on DEBUG_KERNEL && PREEMPT 110 depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT
111 default y 111 default y
112 help 112 help
113 If you say Y here then the kernel will use a debug variant of the 113 If you say Y here then the kernel will use a debug variant of the
@@ -115,14 +115,6 @@ config DEBUG_PREEMPT
115 if kernel code uses it in a preemption-unsafe way. Also, the kernel 115 if kernel code uses it in a preemption-unsafe way. Also, the kernel
116 will detect preemption count underflows. 116 will detect preemption count underflows.
117 117
118config DEBUG_MUTEXES
119 bool "Mutex debugging, deadlock detection"
120 default n
121 depends on DEBUG_KERNEL
122 help
123 This allows mutex semantics violations and mutex related deadlocks
124 (lockups) to be detected and reported automatically.
125
126config DEBUG_RT_MUTEXES 118config DEBUG_RT_MUTEXES
127 bool "RT Mutex debugging, deadlock detection" 119 bool "RT Mutex debugging, deadlock detection"
128 depends on DEBUG_KERNEL && RT_MUTEXES 120 depends on DEBUG_KERNEL && RT_MUTEXES
@@ -142,7 +134,7 @@ config RT_MUTEX_TESTER
142 This option enables a rt-mutex tester. 134 This option enables a rt-mutex tester.
143 135
144config DEBUG_SPINLOCK 136config DEBUG_SPINLOCK
145 bool "Spinlock debugging" 137 bool "Spinlock and rw-lock debugging: basic checks"
146 depends on DEBUG_KERNEL 138 depends on DEBUG_KERNEL
147 help 139 help
148 Say Y here and build SMP to catch missing spinlock initialization 140 Say Y here and build SMP to catch missing spinlock initialization
@@ -150,13 +142,122 @@ config DEBUG_SPINLOCK
150 best used in conjunction with the NMI watchdog so that spinlock 142 best used in conjunction with the NMI watchdog so that spinlock
151 deadlocks are also debuggable. 143 deadlocks are also debuggable.
152 144
145config DEBUG_MUTEXES
146 bool "Mutex debugging: basic checks"
147 depends on DEBUG_KERNEL
148 help
149 This feature allows mutex semantics violations to be detected and
150 reported.
151
152config DEBUG_RWSEMS
153 bool "RW-sem debugging: basic checks"
154 depends on DEBUG_KERNEL
155 help
156 This feature allows read-write semaphore semantics violations to
157 be detected and reported.
158
159config DEBUG_LOCK_ALLOC
160 bool "Lock debugging: detect incorrect freeing of live locks"
161 depends on TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
162 select DEBUG_SPINLOCK
163 select DEBUG_MUTEXES
164 select DEBUG_RWSEMS
165 select LOCKDEP
166 help
167 This feature will check whether any held lock (spinlock, rwlock,
168 mutex or rwsem) is incorrectly freed by the kernel, via any of the
169 memory-freeing routines (kfree(), kmem_cache_free(), free_pages(),
170 vfree(), etc.), whether a live lock is incorrectly reinitialized via
171 spin_lock_init()/mutex_init()/etc., or whether there is any lock
172 held during task exit.
173
174config PROVE_LOCKING
175 bool "Lock debugging: prove locking correctness"
176 depends on TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
177 select LOCKDEP
178 select DEBUG_SPINLOCK
179 select DEBUG_MUTEXES
180 select DEBUG_RWSEMS
181 select DEBUG_LOCK_ALLOC
182 default n
183 help
184 This feature enables the kernel to prove that all locking
185 that occurs in the kernel runtime is mathematically
186 correct: that under no circumstance could an arbitrary (and
187 not yet triggered) combination of observed locking
188 sequences (on an arbitrary number of CPUs, running an
189 arbitrary number of tasks and interrupt contexts) cause a
190 deadlock.
191
192 In short, this feature enables the kernel to report locking
193 related deadlocks before they actually occur.
194
195 The proof does not depend on how hard and complex a
196 deadlock scenario would be to trigger: how many
197 participant CPUs, tasks and irq-contexts would be needed
198 for it to trigger. The proof also does not depend on
199 timing: if a race and a resulting deadlock is possible
200 theoretically (no matter how unlikely the race scenario
201 is), it will be proven so and will immediately be
202 reported by the kernel (once the event is observed that
203 makes the deadlock theoretically possible).
204
205 If a deadlock is impossible (i.e. the locking rules, as
206 observed by the kernel, are mathematically correct), the
207 kernel reports nothing.
208
209 NOTE: this feature can also be enabled for rwlocks, mutexes
210 and rwsems - in which case all dependencies between these
211 different locking variants are observed and mapped too, and
212 the proof of observed correctness is also maintained for an
213 arbitrary combination of these separate locking variants.
214
215 For more details, see Documentation/lockdep-design.txt.
216
217config LOCKDEP
218 bool
219 depends on TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
220 select STACKTRACE
221 select FRAME_POINTER
222 select KALLSYMS
223 select KALLSYMS_ALL
224
225config DEBUG_LOCKDEP
226 bool "Lock dependency engine debugging"
227 depends on LOCKDEP
228 help
229 If you say Y here, the lock dependency engine will do
230 additional runtime checks to debug itself, at the price
231 of more runtime overhead.
232
233config TRACE_IRQFLAGS
234 bool
235 default y
236 depends on TRACE_IRQFLAGS_SUPPORT
237 depends on PROVE_LOCKING
238
153config DEBUG_SPINLOCK_SLEEP 239config DEBUG_SPINLOCK_SLEEP
154 bool "Sleep-inside-spinlock checking" 240 bool "Spinlock debugging: sleep-inside-spinlock checking"
155 depends on DEBUG_KERNEL 241 depends on DEBUG_KERNEL
156 help 242 help
157 If you say Y here, various routines which may sleep will become very 243 If you say Y here, various routines which may sleep will become very
158 noisy if they are called with a spinlock held. 244 noisy if they are called with a spinlock held.
159 245
246config DEBUG_LOCKING_API_SELFTESTS
247 bool "Locking API boot-time self-tests"
248 depends on DEBUG_KERNEL
249 help
250 Say Y here if you want the kernel to run a short self-test during
251 bootup. The self-test checks whether common types of locking bugs
252 are detected by debugging mechanisms or not. (if you disable
253 lock debugging then those bugs wont be detected of course.)
254 The following locking APIs are covered: spinlocks, rwlocks,
255 mutexes and rwsems.
256
257config STACKTRACE
258 bool
259 depends on STACKTRACE_SUPPORT
260
160config DEBUG_KOBJECT 261config DEBUG_KOBJECT
161 bool "kobject debugging" 262 bool "kobject debugging"
162 depends on DEBUG_KERNEL 263 depends on DEBUG_KERNEL
@@ -212,7 +313,7 @@ config DEBUG_VM
212 313
213config FRAME_POINTER 314config FRAME_POINTER
214 bool "Compile the kernel with frame pointers" 315 bool "Compile the kernel with frame pointers"
215 depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML) 316 depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML || S390)
216 default y if DEBUG_INFO && UML 317 default y if DEBUG_INFO && UML
217 help 318 help
218 If you say Y here the resulting kernel image will be slightly larger 319 If you say Y here the resulting kernel image will be slightly larger
diff --git a/lib/Makefile b/lib/Makefile
index 10c13c9d7824..be9719ae82d0 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -11,13 +11,14 @@ lib-$(CONFIG_SMP) += cpumask.o
11 11
12lib-y += kobject.o kref.o kobject_uevent.o klist.o 12lib-y += kobject.o kref.o kobject_uevent.o klist.o
13 13
14obj-y += sort.o parser.o halfmd4.o iomap_copy.o 14obj-y += sort.o parser.o halfmd4.o iomap_copy.o debug_locks.o
15 15
16ifeq ($(CONFIG_DEBUG_KOBJECT),y) 16ifeq ($(CONFIG_DEBUG_KOBJECT),y)
17CFLAGS_kobject.o += -DDEBUG 17CFLAGS_kobject.o += -DDEBUG
18CFLAGS_kobject_uevent.o += -DDEBUG 18CFLAGS_kobject_uevent.o += -DDEBUG
19endif 19endif
20 20
21obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
21obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o 22obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
22lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o 23lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
23lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o 24lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
diff --git a/lib/debug_locks.c b/lib/debug_locks.c
new file mode 100644
index 000000000000..0ef01d14727c
--- /dev/null
+++ b/lib/debug_locks.c
@@ -0,0 +1,45 @@
1/*
2 * lib/debug_locks.c
3 *
4 * Generic place for common debugging facilities for various locks:
5 * spinlocks, rwlocks, mutexes and rwsems.
6 *
7 * Started by Ingo Molnar:
8 *
9 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
10 */
11#include <linux/rwsem.h>
12#include <linux/mutex.h>
13#include <linux/module.h>
14#include <linux/spinlock.h>
15#include <linux/debug_locks.h>
16
17/*
18 * We want to turn all lock-debugging facilities on/off at once,
19 * via a global flag. The reason is that once a single bug has been
20 * detected and reported, there might be cascade of followup bugs
21 * that would just muddy the log. So we report the first one and
22 * shut up after that.
23 */
24int debug_locks = 1;
25
26/*
27 * The locking-testsuite uses <debug_locks_silent> to get a
28 * 'silent failure': nothing is printed to the console when
29 * a locking bug is detected.
30 */
31int debug_locks_silent;
32
33/*
34 * Generic 'turn off all lock debugging' function:
35 */
36int debug_locks_off(void)
37{
38 if (xchg(&debug_locks, 0)) {
39 if (!debug_locks_silent) {
40 console_verbose();
41 return 1;
42 }
43 }
44 return 0;
45}
diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
index e713e86811ae..e0fdfddb406e 100644
--- a/lib/kernel_lock.c
+++ b/lib/kernel_lock.c
@@ -177,7 +177,12 @@ static inline void __lock_kernel(void)
177 177
178static inline void __unlock_kernel(void) 178static inline void __unlock_kernel(void)
179{ 179{
180 spin_unlock(&kernel_flag); 180 /*
181 * the BKL is not covered by lockdep, so we open-code the
182 * unlocking sequence (and thus avoid the dep-chain ops):
183 */
184 _raw_spin_unlock(&kernel_flag);
185 preempt_enable();
181} 186}
182 187
183/* 188/*
diff --git a/lib/locking-selftest-hardirq.h b/lib/locking-selftest-hardirq.h
new file mode 100644
index 000000000000..10d4a150b259
--- /dev/null
+++ b/lib/locking-selftest-hardirq.h
@@ -0,0 +1,9 @@
1#undef IRQ_DISABLE
2#undef IRQ_ENABLE
3#undef IRQ_ENTER
4#undef IRQ_EXIT
5
6#define IRQ_ENABLE HARDIRQ_ENABLE
7#define IRQ_DISABLE HARDIRQ_DISABLE
8#define IRQ_ENTER HARDIRQ_ENTER
9#define IRQ_EXIT HARDIRQ_EXIT
diff --git a/lib/locking-selftest-mutex.h b/lib/locking-selftest-mutex.h
new file mode 100644
index 000000000000..68601b6f584b
--- /dev/null
+++ b/lib/locking-selftest-mutex.h
@@ -0,0 +1,11 @@
1#undef LOCK
2#define LOCK ML
3
4#undef UNLOCK
5#define UNLOCK MU
6
7#undef RLOCK
8#undef WLOCK
9
10#undef INIT
11#define INIT MI
diff --git a/lib/locking-selftest-rlock-hardirq.h b/lib/locking-selftest-rlock-hardirq.h
new file mode 100644
index 000000000000..9f517ebcb786
--- /dev/null
+++ b/lib/locking-selftest-rlock-hardirq.h
@@ -0,0 +1,2 @@
1#include "locking-selftest-rlock.h"
2#include "locking-selftest-hardirq.h"
diff --git a/lib/locking-selftest-rlock-softirq.h b/lib/locking-selftest-rlock-softirq.h
new file mode 100644
index 000000000000..981455db7ff0
--- /dev/null
+++ b/lib/locking-selftest-rlock-softirq.h
@@ -0,0 +1,2 @@
1#include "locking-selftest-rlock.h"
2#include "locking-selftest-softirq.h"
diff --git a/lib/locking-selftest-rlock.h b/lib/locking-selftest-rlock.h
new file mode 100644
index 000000000000..6789044f4d0e
--- /dev/null
+++ b/lib/locking-selftest-rlock.h
@@ -0,0 +1,14 @@
1#undef LOCK
2#define LOCK RL
3
4#undef UNLOCK
5#define UNLOCK RU
6
7#undef RLOCK
8#define RLOCK RL
9
10#undef WLOCK
11#define WLOCK WL
12
13#undef INIT
14#define INIT RWI
diff --git a/lib/locking-selftest-rsem.h b/lib/locking-selftest-rsem.h
new file mode 100644
index 000000000000..62da886680c7
--- /dev/null
+++ b/lib/locking-selftest-rsem.h
@@ -0,0 +1,14 @@
1#undef LOCK
2#define LOCK RSL
3
4#undef UNLOCK
5#define UNLOCK RSU
6
7#undef RLOCK
8#define RLOCK RSL
9
10#undef WLOCK
11#define WLOCK WSL
12
13#undef INIT
14#define INIT RWSI
diff --git a/lib/locking-selftest-softirq.h b/lib/locking-selftest-softirq.h
new file mode 100644
index 000000000000..a83de2a04ace
--- /dev/null
+++ b/lib/locking-selftest-softirq.h
@@ -0,0 +1,9 @@
1#undef IRQ_DISABLE
2#undef IRQ_ENABLE
3#undef IRQ_ENTER
4#undef IRQ_EXIT
5
6#define IRQ_DISABLE SOFTIRQ_DISABLE
7#define IRQ_ENABLE SOFTIRQ_ENABLE
8#define IRQ_ENTER SOFTIRQ_ENTER
9#define IRQ_EXIT SOFTIRQ_EXIT
diff --git a/lib/locking-selftest-spin-hardirq.h b/lib/locking-selftest-spin-hardirq.h
new file mode 100644
index 000000000000..693198dce30a
--- /dev/null
+++ b/lib/locking-selftest-spin-hardirq.h
@@ -0,0 +1,2 @@
1#include "locking-selftest-spin.h"
2#include "locking-selftest-hardirq.h"
diff --git a/lib/locking-selftest-spin-softirq.h b/lib/locking-selftest-spin-softirq.h
new file mode 100644
index 000000000000..c472e2a87ffc
--- /dev/null
+++ b/lib/locking-selftest-spin-softirq.h
@@ -0,0 +1,2 @@
1#include "locking-selftest-spin.h"
2#include "locking-selftest-softirq.h"
diff --git a/lib/locking-selftest-spin.h b/lib/locking-selftest-spin.h
new file mode 100644
index 000000000000..ccd1b4b09757
--- /dev/null
+++ b/lib/locking-selftest-spin.h
@@ -0,0 +1,11 @@
1#undef LOCK
2#define LOCK L
3
4#undef UNLOCK
5#define UNLOCK U
6
7#undef RLOCK
8#undef WLOCK
9
10#undef INIT
11#define INIT SI
diff --git a/lib/locking-selftest-wlock-hardirq.h b/lib/locking-selftest-wlock-hardirq.h
new file mode 100644
index 000000000000..2dd2e5122caa
--- /dev/null
+++ b/lib/locking-selftest-wlock-hardirq.h
@@ -0,0 +1,2 @@
1#include "locking-selftest-wlock.h"
2#include "locking-selftest-hardirq.h"
diff --git a/lib/locking-selftest-wlock-softirq.h b/lib/locking-selftest-wlock-softirq.h
new file mode 100644
index 000000000000..cb80d1cb944e
--- /dev/null
+++ b/lib/locking-selftest-wlock-softirq.h
@@ -0,0 +1,2 @@
1#include "locking-selftest-wlock.h"
2#include "locking-selftest-softirq.h"
diff --git a/lib/locking-selftest-wlock.h b/lib/locking-selftest-wlock.h
new file mode 100644
index 000000000000..0815322d99ed
--- /dev/null
+++ b/lib/locking-selftest-wlock.h
@@ -0,0 +1,14 @@
1#undef LOCK
2#define LOCK WL
3
4#undef UNLOCK
5#define UNLOCK WU
6
7#undef RLOCK
8#define RLOCK RL
9
10#undef WLOCK
11#define WLOCK WL
12
13#undef INIT
14#define INIT RWI
diff --git a/lib/locking-selftest-wsem.h b/lib/locking-selftest-wsem.h
new file mode 100644
index 000000000000..b88c5f2dc5f0
--- /dev/null
+++ b/lib/locking-selftest-wsem.h
@@ -0,0 +1,14 @@
1#undef LOCK
2#define LOCK WSL
3
4#undef UNLOCK
5#define UNLOCK WSU
6
7#undef RLOCK
8#define RLOCK RSL
9
10#undef WLOCK
11#define WLOCK WSL
12
13#undef INIT
14#define INIT RWSI
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
new file mode 100644
index 000000000000..7945787f439a
--- /dev/null
+++ b/lib/locking-selftest.c
@@ -0,0 +1,1216 @@
1/*
2 * lib/locking-selftest.c
3 *
4 * Testsuite for various locking APIs: spinlocks, rwlocks,
5 * mutexes and rw-semaphores.
6 *
7 * It is checking both false positives and false negatives.
8 *
9 * Started by Ingo Molnar:
10 *
11 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
12 */
13#include <linux/rwsem.h>
14#include <linux/mutex.h>
15#include <linux/sched.h>
16#include <linux/delay.h>
17#include <linux/module.h>
18#include <linux/lockdep.h>
19#include <linux/spinlock.h>
20#include <linux/kallsyms.h>
21#include <linux/interrupt.h>
22#include <linux/debug_locks.h>
23#include <linux/irqflags.h>
24
25/*
26 * Change this to 1 if you want to see the failure printouts:
27 */
28static unsigned int debug_locks_verbose;
29
30static int __init setup_debug_locks_verbose(char *str)
31{
32 get_option(&str, &debug_locks_verbose);
33
34 return 1;
35}
36
37__setup("debug_locks_verbose=", setup_debug_locks_verbose);
38
39#define FAILURE 0
40#define SUCCESS 1
41
42#define LOCKTYPE_SPIN 0x1
43#define LOCKTYPE_RWLOCK 0x2
44#define LOCKTYPE_MUTEX 0x4
45#define LOCKTYPE_RWSEM 0x8
46
47/*
48 * Normal standalone locks, for the circular and irq-context
49 * dependency tests:
50 */
51static DEFINE_SPINLOCK(lock_A);
52static DEFINE_SPINLOCK(lock_B);
53static DEFINE_SPINLOCK(lock_C);
54static DEFINE_SPINLOCK(lock_D);
55
56static DEFINE_RWLOCK(rwlock_A);
57static DEFINE_RWLOCK(rwlock_B);
58static DEFINE_RWLOCK(rwlock_C);
59static DEFINE_RWLOCK(rwlock_D);
60
61static DEFINE_MUTEX(mutex_A);
62static DEFINE_MUTEX(mutex_B);
63static DEFINE_MUTEX(mutex_C);
64static DEFINE_MUTEX(mutex_D);
65
66static DECLARE_RWSEM(rwsem_A);
67static DECLARE_RWSEM(rwsem_B);
68static DECLARE_RWSEM(rwsem_C);
69static DECLARE_RWSEM(rwsem_D);
70
71/*
72 * Locks that we initialize dynamically as well so that
73 * e.g. X1 and X2 becomes two instances of the same class,
74 * but X* and Y* are different classes. We do this so that
75 * we do not trigger a real lockup:
76 */
77static DEFINE_SPINLOCK(lock_X1);
78static DEFINE_SPINLOCK(lock_X2);
79static DEFINE_SPINLOCK(lock_Y1);
80static DEFINE_SPINLOCK(lock_Y2);
81static DEFINE_SPINLOCK(lock_Z1);
82static DEFINE_SPINLOCK(lock_Z2);
83
84static DEFINE_RWLOCK(rwlock_X1);
85static DEFINE_RWLOCK(rwlock_X2);
86static DEFINE_RWLOCK(rwlock_Y1);
87static DEFINE_RWLOCK(rwlock_Y2);
88static DEFINE_RWLOCK(rwlock_Z1);
89static DEFINE_RWLOCK(rwlock_Z2);
90
91static DEFINE_MUTEX(mutex_X1);
92static DEFINE_MUTEX(mutex_X2);
93static DEFINE_MUTEX(mutex_Y1);
94static DEFINE_MUTEX(mutex_Y2);
95static DEFINE_MUTEX(mutex_Z1);
96static DEFINE_MUTEX(mutex_Z2);
97
98static DECLARE_RWSEM(rwsem_X1);
99static DECLARE_RWSEM(rwsem_X2);
100static DECLARE_RWSEM(rwsem_Y1);
101static DECLARE_RWSEM(rwsem_Y2);
102static DECLARE_RWSEM(rwsem_Z1);
103static DECLARE_RWSEM(rwsem_Z2);
104
105/*
106 * non-inlined runtime initializers, to let separate locks share
107 * the same lock-class:
108 */
109#define INIT_CLASS_FUNC(class) \
110static noinline void \
111init_class_##class(spinlock_t *lock, rwlock_t *rwlock, struct mutex *mutex, \
112 struct rw_semaphore *rwsem) \
113{ \
114 spin_lock_init(lock); \
115 rwlock_init(rwlock); \
116 mutex_init(mutex); \
117 init_rwsem(rwsem); \
118}
119
120INIT_CLASS_FUNC(X)
121INIT_CLASS_FUNC(Y)
122INIT_CLASS_FUNC(Z)
123
124static void init_shared_classes(void)
125{
126 init_class_X(&lock_X1, &rwlock_X1, &mutex_X1, &rwsem_X1);
127 init_class_X(&lock_X2, &rwlock_X2, &mutex_X2, &rwsem_X2);
128
129 init_class_Y(&lock_Y1, &rwlock_Y1, &mutex_Y1, &rwsem_Y1);
130 init_class_Y(&lock_Y2, &rwlock_Y2, &mutex_Y2, &rwsem_Y2);
131
132 init_class_Z(&lock_Z1, &rwlock_Z1, &mutex_Z1, &rwsem_Z1);
133 init_class_Z(&lock_Z2, &rwlock_Z2, &mutex_Z2, &rwsem_Z2);
134}
135
136/*
137 * For spinlocks and rwlocks we also do hardirq-safe / softirq-safe tests.
138 * The following functions use a lock from a simulated hardirq/softirq
139 * context, causing the locks to be marked as hardirq-safe/softirq-safe:
140 */
141
142#define HARDIRQ_DISABLE local_irq_disable
143#define HARDIRQ_ENABLE local_irq_enable
144
145#define HARDIRQ_ENTER() \
146 local_irq_disable(); \
147 irq_enter(); \
148 WARN_ON(!in_irq());
149
150#define HARDIRQ_EXIT() \
151 __irq_exit(); \
152 local_irq_enable();
153
154#define SOFTIRQ_DISABLE local_bh_disable
155#define SOFTIRQ_ENABLE local_bh_enable
156
157#define SOFTIRQ_ENTER() \
158 local_bh_disable(); \
159 local_irq_disable(); \
160 trace_softirq_enter(); \
161 WARN_ON(!in_softirq());
162
163#define SOFTIRQ_EXIT() \
164 trace_softirq_exit(); \
165 local_irq_enable(); \
166 local_bh_enable();
167
168/*
169 * Shortcuts for lock/unlock API variants, to keep
170 * the testcases compact:
171 */
172#define L(x) spin_lock(&lock_##x)
173#define U(x) spin_unlock(&lock_##x)
174#define LU(x) L(x); U(x)
175#define SI(x) spin_lock_init(&lock_##x)
176
177#define WL(x) write_lock(&rwlock_##x)
178#define WU(x) write_unlock(&rwlock_##x)
179#define WLU(x) WL(x); WU(x)
180
181#define RL(x) read_lock(&rwlock_##x)
182#define RU(x) read_unlock(&rwlock_##x)
183#define RLU(x) RL(x); RU(x)
184#define RWI(x) rwlock_init(&rwlock_##x)
185
186#define ML(x) mutex_lock(&mutex_##x)
187#define MU(x) mutex_unlock(&mutex_##x)
188#define MI(x) mutex_init(&mutex_##x)
189
190#define WSL(x) down_write(&rwsem_##x)
191#define WSU(x) up_write(&rwsem_##x)
192
193#define RSL(x) down_read(&rwsem_##x)
194#define RSU(x) up_read(&rwsem_##x)
195#define RWSI(x) init_rwsem(&rwsem_##x)
196
197#define LOCK_UNLOCK_2(x,y) LOCK(x); LOCK(y); UNLOCK(y); UNLOCK(x)
198
199/*
200 * Generate different permutations of the same testcase, using
201 * the same basic lock-dependency/state events:
202 */
203
204#define GENERATE_TESTCASE(name) \
205 \
206static void name(void) { E(); }
207
208#define GENERATE_PERMUTATIONS_2_EVENTS(name) \
209 \
210static void name##_12(void) { E1(); E2(); } \
211static void name##_21(void) { E2(); E1(); }
212
213#define GENERATE_PERMUTATIONS_3_EVENTS(name) \
214 \
215static void name##_123(void) { E1(); E2(); E3(); } \
216static void name##_132(void) { E1(); E3(); E2(); } \
217static void name##_213(void) { E2(); E1(); E3(); } \
218static void name##_231(void) { E2(); E3(); E1(); } \
219static void name##_312(void) { E3(); E1(); E2(); } \
220static void name##_321(void) { E3(); E2(); E1(); }
221
222/*
223 * AA deadlock:
224 */
225
226#define E() \
227 \
228 LOCK(X1); \
229 LOCK(X2); /* this one should fail */
230
231/*
232 * 6 testcases:
233 */
234#include "locking-selftest-spin.h"
235GENERATE_TESTCASE(AA_spin)
236#include "locking-selftest-wlock.h"
237GENERATE_TESTCASE(AA_wlock)
238#include "locking-selftest-rlock.h"
239GENERATE_TESTCASE(AA_rlock)
240#include "locking-selftest-mutex.h"
241GENERATE_TESTCASE(AA_mutex)
242#include "locking-selftest-wsem.h"
243GENERATE_TESTCASE(AA_wsem)
244#include "locking-selftest-rsem.h"
245GENERATE_TESTCASE(AA_rsem)
246
247#undef E
248
249/*
250 * Special-case for read-locking, they are
251 * allowed to recurse on the same lock class:
252 */
253static void rlock_AA1(void)
254{
255 RL(X1);
256 RL(X1); // this one should NOT fail
257}
258
259static void rlock_AA1B(void)
260{
261 RL(X1);
262 RL(X2); // this one should NOT fail
263}
264
265static void rsem_AA1(void)
266{
267 RSL(X1);
268 RSL(X1); // this one should fail
269}
270
271static void rsem_AA1B(void)
272{
273 RSL(X1);
274 RSL(X2); // this one should fail
275}
276/*
277 * The mixing of read and write locks is not allowed:
278 */
279static void rlock_AA2(void)
280{
281 RL(X1);
282 WL(X2); // this one should fail
283}
284
285static void rsem_AA2(void)
286{
287 RSL(X1);
288 WSL(X2); // this one should fail
289}
290
291static void rlock_AA3(void)
292{
293 WL(X1);
294 RL(X2); // this one should fail
295}
296
297static void rsem_AA3(void)
298{
299 WSL(X1);
300 RSL(X2); // this one should fail
301}
302
303/*
304 * ABBA deadlock:
305 */
306
307#define E() \
308 \
309 LOCK_UNLOCK_2(A, B); \
310 LOCK_UNLOCK_2(B, A); /* fail */
311
312/*
313 * 6 testcases:
314 */
315#include "locking-selftest-spin.h"
316GENERATE_TESTCASE(ABBA_spin)
317#include "locking-selftest-wlock.h"
318GENERATE_TESTCASE(ABBA_wlock)
319#include "locking-selftest-rlock.h"
320GENERATE_TESTCASE(ABBA_rlock)
321#include "locking-selftest-mutex.h"
322GENERATE_TESTCASE(ABBA_mutex)
323#include "locking-selftest-wsem.h"
324GENERATE_TESTCASE(ABBA_wsem)
325#include "locking-selftest-rsem.h"
326GENERATE_TESTCASE(ABBA_rsem)
327
328#undef E
329
330/*
331 * AB BC CA deadlock:
332 */
333
334#define E() \
335 \
336 LOCK_UNLOCK_2(A, B); \
337 LOCK_UNLOCK_2(B, C); \
338 LOCK_UNLOCK_2(C, A); /* fail */
339
340/*
341 * 6 testcases:
342 */
343#include "locking-selftest-spin.h"
344GENERATE_TESTCASE(ABBCCA_spin)
345#include "locking-selftest-wlock.h"
346GENERATE_TESTCASE(ABBCCA_wlock)
347#include "locking-selftest-rlock.h"
348GENERATE_TESTCASE(ABBCCA_rlock)
349#include "locking-selftest-mutex.h"
350GENERATE_TESTCASE(ABBCCA_mutex)
351#include "locking-selftest-wsem.h"
352GENERATE_TESTCASE(ABBCCA_wsem)
353#include "locking-selftest-rsem.h"
354GENERATE_TESTCASE(ABBCCA_rsem)
355
356#undef E
357
358/*
359 * AB CA BC deadlock:
360 */
361
362#define E() \
363 \
364 LOCK_UNLOCK_2(A, B); \
365 LOCK_UNLOCK_2(C, A); \
366 LOCK_UNLOCK_2(B, C); /* fail */
367
368/*
369 * 6 testcases:
370 */
371#include "locking-selftest-spin.h"
372GENERATE_TESTCASE(ABCABC_spin)
373#include "locking-selftest-wlock.h"
374GENERATE_TESTCASE(ABCABC_wlock)
375#include "locking-selftest-rlock.h"
376GENERATE_TESTCASE(ABCABC_rlock)
377#include "locking-selftest-mutex.h"
378GENERATE_TESTCASE(ABCABC_mutex)
379#include "locking-selftest-wsem.h"
380GENERATE_TESTCASE(ABCABC_wsem)
381#include "locking-selftest-rsem.h"
382GENERATE_TESTCASE(ABCABC_rsem)
383
384#undef E
385
386/*
387 * AB BC CD DA deadlock:
388 */
389
390#define E() \
391 \
392 LOCK_UNLOCK_2(A, B); \
393 LOCK_UNLOCK_2(B, C); \
394 LOCK_UNLOCK_2(C, D); \
395 LOCK_UNLOCK_2(D, A); /* fail */
396
397/*
398 * 6 testcases:
399 */
400#include "locking-selftest-spin.h"
401GENERATE_TESTCASE(ABBCCDDA_spin)
402#include "locking-selftest-wlock.h"
403GENERATE_TESTCASE(ABBCCDDA_wlock)
404#include "locking-selftest-rlock.h"
405GENERATE_TESTCASE(ABBCCDDA_rlock)
406#include "locking-selftest-mutex.h"
407GENERATE_TESTCASE(ABBCCDDA_mutex)
408#include "locking-selftest-wsem.h"
409GENERATE_TESTCASE(ABBCCDDA_wsem)
410#include "locking-selftest-rsem.h"
411GENERATE_TESTCASE(ABBCCDDA_rsem)
412
413#undef E
414
415/*
416 * AB CD BD DA deadlock:
417 */
418#define E() \
419 \
420 LOCK_UNLOCK_2(A, B); \
421 LOCK_UNLOCK_2(C, D); \
422 LOCK_UNLOCK_2(B, D); \
423 LOCK_UNLOCK_2(D, A); /* fail */
424
425/*
426 * 6 testcases:
427 */
428#include "locking-selftest-spin.h"
429GENERATE_TESTCASE(ABCDBDDA_spin)
430#include "locking-selftest-wlock.h"
431GENERATE_TESTCASE(ABCDBDDA_wlock)
432#include "locking-selftest-rlock.h"
433GENERATE_TESTCASE(ABCDBDDA_rlock)
434#include "locking-selftest-mutex.h"
435GENERATE_TESTCASE(ABCDBDDA_mutex)
436#include "locking-selftest-wsem.h"
437GENERATE_TESTCASE(ABCDBDDA_wsem)
438#include "locking-selftest-rsem.h"
439GENERATE_TESTCASE(ABCDBDDA_rsem)
440
441#undef E
442
443/*
444 * AB CD BC DA deadlock:
445 */
446#define E() \
447 \
448 LOCK_UNLOCK_2(A, B); \
449 LOCK_UNLOCK_2(C, D); \
450 LOCK_UNLOCK_2(B, C); \
451 LOCK_UNLOCK_2(D, A); /* fail */
452
453/*
454 * 6 testcases:
455 */
456#include "locking-selftest-spin.h"
457GENERATE_TESTCASE(ABCDBCDA_spin)
458#include "locking-selftest-wlock.h"
459GENERATE_TESTCASE(ABCDBCDA_wlock)
460#include "locking-selftest-rlock.h"
461GENERATE_TESTCASE(ABCDBCDA_rlock)
462#include "locking-selftest-mutex.h"
463GENERATE_TESTCASE(ABCDBCDA_mutex)
464#include "locking-selftest-wsem.h"
465GENERATE_TESTCASE(ABCDBCDA_wsem)
466#include "locking-selftest-rsem.h"
467GENERATE_TESTCASE(ABCDBCDA_rsem)
468
469#undef E
470
471/*
472 * Double unlock:
473 */
474#define E() \
475 \
476 LOCK(A); \
477 UNLOCK(A); \
478 UNLOCK(A); /* fail */
479
480/*
481 * 6 testcases:
482 */
483#include "locking-selftest-spin.h"
484GENERATE_TESTCASE(double_unlock_spin)
485#include "locking-selftest-wlock.h"
486GENERATE_TESTCASE(double_unlock_wlock)
487#include "locking-selftest-rlock.h"
488GENERATE_TESTCASE(double_unlock_rlock)
489#include "locking-selftest-mutex.h"
490GENERATE_TESTCASE(double_unlock_mutex)
491#include "locking-selftest-wsem.h"
492GENERATE_TESTCASE(double_unlock_wsem)
493#include "locking-selftest-rsem.h"
494GENERATE_TESTCASE(double_unlock_rsem)
495
496#undef E
497
498/*
499 * Bad unlock ordering:
500 */
501#define E() \
502 \
503 LOCK(A); \
504 LOCK(B); \
505 UNLOCK(A); /* fail */ \
506 UNLOCK(B);
507
508/*
509 * 6 testcases:
510 */
511#include "locking-selftest-spin.h"
512GENERATE_TESTCASE(bad_unlock_order_spin)
513#include "locking-selftest-wlock.h"
514GENERATE_TESTCASE(bad_unlock_order_wlock)
515#include "locking-selftest-rlock.h"
516GENERATE_TESTCASE(bad_unlock_order_rlock)
517#include "locking-selftest-mutex.h"
518GENERATE_TESTCASE(bad_unlock_order_mutex)
519#include "locking-selftest-wsem.h"
520GENERATE_TESTCASE(bad_unlock_order_wsem)
521#include "locking-selftest-rsem.h"
522GENERATE_TESTCASE(bad_unlock_order_rsem)
523
524#undef E
525
526/*
527 * initializing a held lock:
528 */
529#define E() \
530 \
531 LOCK(A); \
532 INIT(A); /* fail */
533
534/*
535 * 6 testcases:
536 */
537#include "locking-selftest-spin.h"
538GENERATE_TESTCASE(init_held_spin)
539#include "locking-selftest-wlock.h"
540GENERATE_TESTCASE(init_held_wlock)
541#include "locking-selftest-rlock.h"
542GENERATE_TESTCASE(init_held_rlock)
543#include "locking-selftest-mutex.h"
544GENERATE_TESTCASE(init_held_mutex)
545#include "locking-selftest-wsem.h"
546GENERATE_TESTCASE(init_held_wsem)
547#include "locking-selftest-rsem.h"
548GENERATE_TESTCASE(init_held_rsem)
549
550#undef E
551
552/*
553 * locking an irq-safe lock with irqs enabled:
554 */
555#define E1() \
556 \
557 IRQ_ENTER(); \
558 LOCK(A); \
559 UNLOCK(A); \
560 IRQ_EXIT();
561
562#define E2() \
563 \
564 LOCK(A); \
565 UNLOCK(A);
566
567/*
568 * Generate 24 testcases:
569 */
570#include "locking-selftest-spin-hardirq.h"
571GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin)
572
573#include "locking-selftest-rlock-hardirq.h"
574GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock)
575
576#include "locking-selftest-wlock-hardirq.h"
577GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_wlock)
578
579#include "locking-selftest-spin-softirq.h"
580GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_spin)
581
582#include "locking-selftest-rlock-softirq.h"
583GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_rlock)
584
585#include "locking-selftest-wlock-softirq.h"
586GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock)
587
588#undef E1
589#undef E2
590
591/*
592 * Enabling hardirqs with a softirq-safe lock held:
593 */
594#define E1() \
595 \
596 SOFTIRQ_ENTER(); \
597 LOCK(A); \
598 UNLOCK(A); \
599 SOFTIRQ_EXIT();
600
601#define E2() \
602 \
603 HARDIRQ_DISABLE(); \
604 LOCK(A); \
605 HARDIRQ_ENABLE(); \
606 UNLOCK(A);
607
608/*
609 * Generate 12 testcases:
610 */
611#include "locking-selftest-spin.h"
612GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_spin)
613
614#include "locking-selftest-wlock.h"
615GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_wlock)
616
617#include "locking-selftest-rlock.h"
618GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock)
619
620#undef E1
621#undef E2
622
623/*
624 * Enabling irqs with an irq-safe lock held:
625 */
626#define E1() \
627 \
628 IRQ_ENTER(); \
629 LOCK(A); \
630 UNLOCK(A); \
631 IRQ_EXIT();
632
633#define E2() \
634 \
635 IRQ_DISABLE(); \
636 LOCK(A); \
637 IRQ_ENABLE(); \
638 UNLOCK(A);
639
640/*
641 * Generate 24 testcases:
642 */
643#include "locking-selftest-spin-hardirq.h"
644GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin)
645
646#include "locking-selftest-rlock-hardirq.h"
647GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock)
648
649#include "locking-selftest-wlock-hardirq.h"
650GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_wlock)
651
652#include "locking-selftest-spin-softirq.h"
653GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_spin)
654
655#include "locking-selftest-rlock-softirq.h"
656GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_rlock)
657
658#include "locking-selftest-wlock-softirq.h"
659GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock)
660
661#undef E1
662#undef E2
663
664/*
665 * Acquiring a irq-unsafe lock while holding an irq-safe-lock:
666 */
667#define E1() \
668 \
669 LOCK(A); \
670 LOCK(B); \
671 UNLOCK(B); \
672 UNLOCK(A); \
673
674#define E2() \
675 \
676 LOCK(B); \
677 UNLOCK(B);
678
679#define E3() \
680 \
681 IRQ_ENTER(); \
682 LOCK(A); \
683 UNLOCK(A); \
684 IRQ_EXIT();
685
686/*
687 * Generate 36 testcases:
688 */
689#include "locking-selftest-spin-hardirq.h"
690GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin)
691
692#include "locking-selftest-rlock-hardirq.h"
693GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock)
694
695#include "locking-selftest-wlock-hardirq.h"
696GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_wlock)
697
698#include "locking-selftest-spin-softirq.h"
699GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_spin)
700
701#include "locking-selftest-rlock-softirq.h"
702GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_rlock)
703
704#include "locking-selftest-wlock-softirq.h"
705GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock)
706
707#undef E1
708#undef E2
709#undef E3
710
711/*
712 * If a lock turns into softirq-safe, but earlier it took
713 * a softirq-unsafe lock:
714 */
715
716#define E1() \
717 IRQ_DISABLE(); \
718 LOCK(A); \
719 LOCK(B); \
720 UNLOCK(B); \
721 UNLOCK(A); \
722 IRQ_ENABLE();
723
724#define E2() \
725 LOCK(B); \
726 UNLOCK(B);
727
728#define E3() \
729 IRQ_ENTER(); \
730 LOCK(A); \
731 UNLOCK(A); \
732 IRQ_EXIT();
733
734/*
735 * Generate 36 testcases:
736 */
737#include "locking-selftest-spin-hardirq.h"
738GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin)
739
740#include "locking-selftest-rlock-hardirq.h"
741GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock)
742
743#include "locking-selftest-wlock-hardirq.h"
744GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_wlock)
745
746#include "locking-selftest-spin-softirq.h"
747GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_spin)
748
749#include "locking-selftest-rlock-softirq.h"
750GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_rlock)
751
752#include "locking-selftest-wlock-softirq.h"
753GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock)
754
755#undef E1
756#undef E2
757#undef E3
758
759/*
760 * read-lock / write-lock irq inversion.
761 *
762 * Deadlock scenario:
763 *
764 * CPU#1 is at #1, i.e. it has write-locked A, but has not
765 * taken B yet.
766 *
767 * CPU#2 is at #2, i.e. it has locked B.
768 *
769 * Hardirq hits CPU#2 at point #2 and is trying to read-lock A.
770 *
771 * The deadlock occurs because CPU#1 will spin on B, and CPU#2
772 * will spin on A.
773 */
774
775#define E1() \
776 \
777 IRQ_DISABLE(); \
778 WL(A); \
779 LOCK(B); \
780 UNLOCK(B); \
781 WU(A); \
782 IRQ_ENABLE();
783
784#define E2() \
785 \
786 LOCK(B); \
787 UNLOCK(B);
788
789#define E3() \
790 \
791 IRQ_ENTER(); \
792 RL(A); \
793 RU(A); \
794 IRQ_EXIT();
795
796/*
797 * Generate 36 testcases:
798 */
799#include "locking-selftest-spin-hardirq.h"
800GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_spin)
801
802#include "locking-selftest-rlock-hardirq.h"
803GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_rlock)
804
805#include "locking-selftest-wlock-hardirq.h"
806GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_wlock)
807
808#include "locking-selftest-spin-softirq.h"
809GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_spin)
810
811#include "locking-selftest-rlock-softirq.h"
812GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_rlock)
813
814#include "locking-selftest-wlock-softirq.h"
815GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_wlock)
816
817#undef E1
818#undef E2
819#undef E3
820
821/*
822 * read-lock / write-lock recursion that is actually safe.
823 */
824
825#define E1() \
826 \
827 IRQ_DISABLE(); \
828 WL(A); \
829 WU(A); \
830 IRQ_ENABLE();
831
832#define E2() \
833 \
834 RL(A); \
835 RU(A); \
836
837#define E3() \
838 \
839 IRQ_ENTER(); \
840 RL(A); \
841 L(B); \
842 U(B); \
843 RU(A); \
844 IRQ_EXIT();
845
846/*
847 * Generate 12 testcases:
848 */
849#include "locking-selftest-hardirq.h"
850GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_hard)
851
852#include "locking-selftest-softirq.h"
853GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
854
855#undef E1
856#undef E2
857#undef E3
858
859/*
860 * read-lock / write-lock recursion that is unsafe.
861 */
862
863#define E1() \
864 \
865 IRQ_DISABLE(); \
866 L(B); \
867 WL(A); \
868 WU(A); \
869 U(B); \
870 IRQ_ENABLE();
871
872#define E2() \
873 \
874 RL(A); \
875 RU(A); \
876
877#define E3() \
878 \
879 IRQ_ENTER(); \
880 L(B); \
881 U(B); \
882 IRQ_EXIT();
883
884/*
885 * Generate 12 testcases:
886 */
887#include "locking-selftest-hardirq.h"
888// GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_hard)
889
890#include "locking-selftest-softirq.h"
891// GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft)
892
893#ifdef CONFIG_DEBUG_LOCK_ALLOC
894# define I_SPINLOCK(x) lockdep_reset_lock(&lock_##x.dep_map)
895# define I_RWLOCK(x) lockdep_reset_lock(&rwlock_##x.dep_map)
896# define I_MUTEX(x) lockdep_reset_lock(&mutex_##x.dep_map)
897# define I_RWSEM(x) lockdep_reset_lock(&rwsem_##x.dep_map)
898#else
899# define I_SPINLOCK(x)
900# define I_RWLOCK(x)
901# define I_MUTEX(x)
902# define I_RWSEM(x)
903#endif
904
905#define I1(x) \
906 do { \
907 I_SPINLOCK(x); \
908 I_RWLOCK(x); \
909 I_MUTEX(x); \
910 I_RWSEM(x); \
911 } while (0)
912
913#define I2(x) \
914 do { \
915 spin_lock_init(&lock_##x); \
916 rwlock_init(&rwlock_##x); \
917 mutex_init(&mutex_##x); \
918 init_rwsem(&rwsem_##x); \
919 } while (0)
920
921static void reset_locks(void)
922{
923 local_irq_disable();
924 I1(A); I1(B); I1(C); I1(D);
925 I1(X1); I1(X2); I1(Y1); I1(Y2); I1(Z1); I1(Z2);
926 lockdep_reset();
927 I2(A); I2(B); I2(C); I2(D);
928 init_shared_classes();
929 local_irq_enable();
930}
931
932#undef I
933
934static int testcase_total;
935static int testcase_successes;
936static int expected_testcase_failures;
937static int unexpected_testcase_failures;
938
939static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask)
940{
941 unsigned long saved_preempt_count = preempt_count();
942 int expected_failure = 0;
943
944 WARN_ON(irqs_disabled());
945
946 testcase_fn();
947 /*
948 * Filter out expected failures:
949 */
950#ifndef CONFIG_PROVE_LOCKING
951 if ((lockclass_mask & LOCKTYPE_SPIN) && debug_locks != expected)
952 expected_failure = 1;
953 if ((lockclass_mask & LOCKTYPE_RWLOCK) && debug_locks != expected)
954 expected_failure = 1;
955 if ((lockclass_mask & LOCKTYPE_MUTEX) && debug_locks != expected)
956 expected_failure = 1;
957 if ((lockclass_mask & LOCKTYPE_RWSEM) && debug_locks != expected)
958 expected_failure = 1;
959#endif
960 if (debug_locks != expected) {
961 if (expected_failure) {
962 expected_testcase_failures++;
963 printk("failed|");
964 } else {
965 unexpected_testcase_failures++;
966 printk("FAILED|");
967 }
968 } else {
969 testcase_successes++;
970 printk(" ok |");
971 }
972 testcase_total++;
973
974 if (debug_locks_verbose)
975 printk(" lockclass mask: %x, debug_locks: %d, expected: %d\n",
976 lockclass_mask, debug_locks, expected);
977 /*
978 * Some tests (e.g. double-unlock) might corrupt the preemption
979 * count, so restore it:
980 */
981 preempt_count() = saved_preempt_count;
982#ifdef CONFIG_TRACE_IRQFLAGS
983 if (softirq_count())
984 current->softirqs_enabled = 0;
985 else
986 current->softirqs_enabled = 1;
987#endif
988
989 reset_locks();
990}
991
992static inline void print_testname(const char *testname)
993{
994 printk("%33s:", testname);
995}
996
997#define DO_TESTCASE_1(desc, name, nr) \
998 print_testname(desc"/"#nr); \
999 dotest(name##_##nr, SUCCESS, LOCKTYPE_RWLOCK); \
1000 printk("\n");
1001
1002#define DO_TESTCASE_1B(desc, name, nr) \
1003 print_testname(desc"/"#nr); \
1004 dotest(name##_##nr, FAILURE, LOCKTYPE_RWLOCK); \
1005 printk("\n");
1006
1007#define DO_TESTCASE_3(desc, name, nr) \
1008 print_testname(desc"/"#nr); \
1009 dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN); \
1010 dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \
1011 dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK); \
1012 printk("\n");
1013
1014#define DO_TESTCASE_3RW(desc, name, nr) \
1015 print_testname(desc"/"#nr); \
1016 dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN|LOCKTYPE_RWLOCK);\
1017 dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \
1018 dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK); \
1019 printk("\n");
1020
1021#define DO_TESTCASE_6(desc, name) \
1022 print_testname(desc); \
1023 dotest(name##_spin, FAILURE, LOCKTYPE_SPIN); \
1024 dotest(name##_wlock, FAILURE, LOCKTYPE_RWLOCK); \
1025 dotest(name##_rlock, FAILURE, LOCKTYPE_RWLOCK); \
1026 dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX); \
1027 dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM); \
1028 dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM); \
1029 printk("\n");
1030
1031#define DO_TESTCASE_6_SUCCESS(desc, name) \
1032 print_testname(desc); \
1033 dotest(name##_spin, SUCCESS, LOCKTYPE_SPIN); \
1034 dotest(name##_wlock, SUCCESS, LOCKTYPE_RWLOCK); \
1035 dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK); \
1036 dotest(name##_mutex, SUCCESS, LOCKTYPE_MUTEX); \
1037 dotest(name##_wsem, SUCCESS, LOCKTYPE_RWSEM); \
1038 dotest(name##_rsem, SUCCESS, LOCKTYPE_RWSEM); \
1039 printk("\n");
1040
1041/*
1042 * 'read' variant: rlocks must not trigger.
1043 */
1044#define DO_TESTCASE_6R(desc, name) \
1045 print_testname(desc); \
1046 dotest(name##_spin, FAILURE, LOCKTYPE_SPIN); \
1047 dotest(name##_wlock, FAILURE, LOCKTYPE_RWLOCK); \
1048 dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK); \
1049 dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX); \
1050 dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM); \
1051 dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM); \
1052 printk("\n");
1053
1054#define DO_TESTCASE_2I(desc, name, nr) \
1055 DO_TESTCASE_1("hard-"desc, name##_hard, nr); \
1056 DO_TESTCASE_1("soft-"desc, name##_soft, nr);
1057
1058#define DO_TESTCASE_2IB(desc, name, nr) \
1059 DO_TESTCASE_1B("hard-"desc, name##_hard, nr); \
1060 DO_TESTCASE_1B("soft-"desc, name##_soft, nr);
1061
1062#define DO_TESTCASE_6I(desc, name, nr) \
1063 DO_TESTCASE_3("hard-"desc, name##_hard, nr); \
1064 DO_TESTCASE_3("soft-"desc, name##_soft, nr);
1065
1066#define DO_TESTCASE_6IRW(desc, name, nr) \
1067 DO_TESTCASE_3RW("hard-"desc, name##_hard, nr); \
1068 DO_TESTCASE_3RW("soft-"desc, name##_soft, nr);
1069
1070#define DO_TESTCASE_2x3(desc, name) \
1071 DO_TESTCASE_3(desc, name, 12); \
1072 DO_TESTCASE_3(desc, name, 21);
1073
1074#define DO_TESTCASE_2x6(desc, name) \
1075 DO_TESTCASE_6I(desc, name, 12); \
1076 DO_TESTCASE_6I(desc, name, 21);
1077
1078#define DO_TESTCASE_6x2(desc, name) \
1079 DO_TESTCASE_2I(desc, name, 123); \
1080 DO_TESTCASE_2I(desc, name, 132); \
1081 DO_TESTCASE_2I(desc, name, 213); \
1082 DO_TESTCASE_2I(desc, name, 231); \
1083 DO_TESTCASE_2I(desc, name, 312); \
1084 DO_TESTCASE_2I(desc, name, 321);
1085
1086#define DO_TESTCASE_6x2B(desc, name) \
1087 DO_TESTCASE_2IB(desc, name, 123); \
1088 DO_TESTCASE_2IB(desc, name, 132); \
1089 DO_TESTCASE_2IB(desc, name, 213); \
1090 DO_TESTCASE_2IB(desc, name, 231); \
1091 DO_TESTCASE_2IB(desc, name, 312); \
1092 DO_TESTCASE_2IB(desc, name, 321);
1093
1094#define DO_TESTCASE_6x6(desc, name) \
1095 DO_TESTCASE_6I(desc, name, 123); \
1096 DO_TESTCASE_6I(desc, name, 132); \
1097 DO_TESTCASE_6I(desc, name, 213); \
1098 DO_TESTCASE_6I(desc, name, 231); \
1099 DO_TESTCASE_6I(desc, name, 312); \
1100 DO_TESTCASE_6I(desc, name, 321);
1101
1102#define DO_TESTCASE_6x6RW(desc, name) \
1103 DO_TESTCASE_6IRW(desc, name, 123); \
1104 DO_TESTCASE_6IRW(desc, name, 132); \
1105 DO_TESTCASE_6IRW(desc, name, 213); \
1106 DO_TESTCASE_6IRW(desc, name, 231); \
1107 DO_TESTCASE_6IRW(desc, name, 312); \
1108 DO_TESTCASE_6IRW(desc, name, 321);
1109
1110
1111void locking_selftest(void)
1112{
1113 /*
1114 * Got a locking failure before the selftest ran?
1115 */
1116 if (!debug_locks) {
1117 printk("----------------------------------\n");
1118 printk("| Locking API testsuite disabled |\n");
1119 printk("----------------------------------\n");
1120 return;
1121 }
1122
1123 /*
1124 * Run the testsuite:
1125 */
1126 printk("------------------------\n");
1127 printk("| Locking API testsuite:\n");
1128 printk("----------------------------------------------------------------------------\n");
1129 printk(" | spin |wlock |rlock |mutex | wsem | rsem |\n");
1130 printk(" --------------------------------------------------------------------------\n");
1131
1132 init_shared_classes();
1133 debug_locks_silent = !debug_locks_verbose;
1134
1135 DO_TESTCASE_6R("A-A deadlock", AA);
1136 DO_TESTCASE_6R("A-B-B-A deadlock", ABBA);
1137 DO_TESTCASE_6R("A-B-B-C-C-A deadlock", ABBCCA);
1138 DO_TESTCASE_6R("A-B-C-A-B-C deadlock", ABCABC);
1139 DO_TESTCASE_6R("A-B-B-C-C-D-D-A deadlock", ABBCCDDA);
1140 DO_TESTCASE_6R("A-B-C-D-B-D-D-A deadlock", ABCDBDDA);
1141 DO_TESTCASE_6R("A-B-C-D-B-C-D-A deadlock", ABCDBCDA);
1142 DO_TESTCASE_6("double unlock", double_unlock);
1143 DO_TESTCASE_6("initialize held", init_held);
1144 DO_TESTCASE_6_SUCCESS("bad unlock order", bad_unlock_order);
1145
1146 printk(" --------------------------------------------------------------------------\n");
1147 print_testname("recursive read-lock");
1148 printk(" |");
1149 dotest(rlock_AA1, SUCCESS, LOCKTYPE_RWLOCK);
1150 printk(" |");
1151 dotest(rsem_AA1, FAILURE, LOCKTYPE_RWSEM);
1152 printk("\n");
1153
1154 print_testname("recursive read-lock #2");
1155 printk(" |");
1156 dotest(rlock_AA1B, SUCCESS, LOCKTYPE_RWLOCK);
1157 printk(" |");
1158 dotest(rsem_AA1B, FAILURE, LOCKTYPE_RWSEM);
1159 printk("\n");
1160
1161 print_testname("mixed read-write-lock");
1162 printk(" |");
1163 dotest(rlock_AA2, FAILURE, LOCKTYPE_RWLOCK);
1164 printk(" |");
1165 dotest(rsem_AA2, FAILURE, LOCKTYPE_RWSEM);
1166 printk("\n");
1167
1168 print_testname("mixed write-read-lock");
1169 printk(" |");
1170 dotest(rlock_AA3, FAILURE, LOCKTYPE_RWLOCK);
1171 printk(" |");
1172 dotest(rsem_AA3, FAILURE, LOCKTYPE_RWSEM);
1173 printk("\n");
1174
1175 printk(" --------------------------------------------------------------------------\n");
1176
1177 /*
1178 * irq-context testcases:
1179 */
1180 DO_TESTCASE_2x6("irqs-on + irq-safe-A", irqsafe1);
1181 DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A);
1182 DO_TESTCASE_2x6("safe-A + irqs-on", irqsafe2B);
1183 DO_TESTCASE_6x6("safe-A + unsafe-B #1", irqsafe3);
1184 DO_TESTCASE_6x6("safe-A + unsafe-B #2", irqsafe4);
1185 DO_TESTCASE_6x6RW("irq lock-inversion", irq_inversion);
1186
1187 DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion);
1188// DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2);
1189
1190 if (unexpected_testcase_failures) {
1191 printk("-----------------------------------------------------------------\n");
1192 debug_locks = 0;
1193 printk("BUG: %3d unexpected failures (out of %3d) - debugging disabled! |\n",
1194 unexpected_testcase_failures, testcase_total);
1195 printk("-----------------------------------------------------------------\n");
1196 } else if (expected_testcase_failures && testcase_successes) {
1197 printk("--------------------------------------------------------\n");
1198 printk("%3d out of %3d testcases failed, as expected. |\n",
1199 expected_testcase_failures, testcase_total);
1200 printk("----------------------------------------------------\n");
1201 debug_locks = 1;
1202 } else if (expected_testcase_failures && !testcase_successes) {
1203 printk("--------------------------------------------------------\n");
1204 printk("All %3d testcases failed, as expected. |\n",
1205 expected_testcase_failures);
1206 printk("----------------------------------------\n");
1207 debug_locks = 1;
1208 } else {
1209 printk("-------------------------------------------------------\n");
1210 printk("Good, all %3d testcases passed! |\n",
1211 testcase_successes);
1212 printk("---------------------------------\n");
1213 debug_locks = 1;
1214 }
1215 debug_locks_silent = 0;
1216}
diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c
index 40ffde940a86..db4fed74b940 100644
--- a/lib/rwsem-spinlock.c
+++ b/lib/rwsem-spinlock.c
@@ -17,27 +17,22 @@ struct rwsem_waiter {
17#define RWSEM_WAITING_FOR_WRITE 0x00000002 17#define RWSEM_WAITING_FOR_WRITE 0x00000002
18}; 18};
19 19
20#if RWSEM_DEBUG
21void rwsemtrace(struct rw_semaphore *sem, const char *str)
22{
23 if (sem->debug)
24 printk("[%d] %s({%d,%d})\n",
25 current->pid, str, sem->activity,
26 list_empty(&sem->wait_list) ? 0 : 1);
27}
28#endif
29
30/* 20/*
31 * initialise the semaphore 21 * initialise the semaphore
32 */ 22 */
33void fastcall init_rwsem(struct rw_semaphore *sem) 23void __init_rwsem(struct rw_semaphore *sem, const char *name,
24 struct lock_class_key *key)
34{ 25{
26#ifdef CONFIG_DEBUG_LOCK_ALLOC
27 /*
28 * Make sure we are not reinitializing a held semaphore:
29 */
30 debug_check_no_locks_freed((void *)sem, sizeof(*sem));
31 lockdep_init_map(&sem->dep_map, name, key);
32#endif
35 sem->activity = 0; 33 sem->activity = 0;
36 spin_lock_init(&sem->wait_lock); 34 spin_lock_init(&sem->wait_lock);
37 INIT_LIST_HEAD(&sem->wait_list); 35 INIT_LIST_HEAD(&sem->wait_list);
38#if RWSEM_DEBUG
39 sem->debug = 0;
40#endif
41} 36}
42 37
43/* 38/*
@@ -56,8 +51,6 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
56 struct task_struct *tsk; 51 struct task_struct *tsk;
57 int woken; 52 int woken;
58 53
59 rwsemtrace(sem, "Entering __rwsem_do_wake");
60
61 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); 54 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
62 55
63 if (!wakewrite) { 56 if (!wakewrite) {
@@ -104,7 +97,6 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
104 sem->activity += woken; 97 sem->activity += woken;
105 98
106 out: 99 out:
107 rwsemtrace(sem, "Leaving __rwsem_do_wake");
108 return sem; 100 return sem;
109} 101}
110 102
@@ -138,8 +130,6 @@ void fastcall __sched __down_read(struct rw_semaphore *sem)
138 struct rwsem_waiter waiter; 130 struct rwsem_waiter waiter;
139 struct task_struct *tsk; 131 struct task_struct *tsk;
140 132
141 rwsemtrace(sem, "Entering __down_read");
142
143 spin_lock_irq(&sem->wait_lock); 133 spin_lock_irq(&sem->wait_lock);
144 134
145 if (sem->activity >= 0 && list_empty(&sem->wait_list)) { 135 if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
@@ -171,9 +161,8 @@ void fastcall __sched __down_read(struct rw_semaphore *sem)
171 } 161 }
172 162
173 tsk->state = TASK_RUNNING; 163 tsk->state = TASK_RUNNING;
174
175 out: 164 out:
176 rwsemtrace(sem, "Leaving __down_read"); 165 ;
177} 166}
178 167
179/* 168/*
@@ -184,7 +173,6 @@ int fastcall __down_read_trylock(struct rw_semaphore *sem)
184 unsigned long flags; 173 unsigned long flags;
185 int ret = 0; 174 int ret = 0;
186 175
187 rwsemtrace(sem, "Entering __down_read_trylock");
188 176
189 spin_lock_irqsave(&sem->wait_lock, flags); 177 spin_lock_irqsave(&sem->wait_lock, flags);
190 178
@@ -196,7 +184,6 @@ int fastcall __down_read_trylock(struct rw_semaphore *sem)
196 184
197 spin_unlock_irqrestore(&sem->wait_lock, flags); 185 spin_unlock_irqrestore(&sem->wait_lock, flags);
198 186
199 rwsemtrace(sem, "Leaving __down_read_trylock");
200 return ret; 187 return ret;
201} 188}
202 189
@@ -204,13 +191,11 @@ int fastcall __down_read_trylock(struct rw_semaphore *sem)
204 * get a write lock on the semaphore 191 * get a write lock on the semaphore
205 * - we increment the waiting count anyway to indicate an exclusive lock 192 * - we increment the waiting count anyway to indicate an exclusive lock
206 */ 193 */
207void fastcall __sched __down_write(struct rw_semaphore *sem) 194void fastcall __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
208{ 195{
209 struct rwsem_waiter waiter; 196 struct rwsem_waiter waiter;
210 struct task_struct *tsk; 197 struct task_struct *tsk;
211 198
212 rwsemtrace(sem, "Entering __down_write");
213
214 spin_lock_irq(&sem->wait_lock); 199 spin_lock_irq(&sem->wait_lock);
215 200
216 if (sem->activity == 0 && list_empty(&sem->wait_list)) { 201 if (sem->activity == 0 && list_empty(&sem->wait_list)) {
@@ -242,9 +227,13 @@ void fastcall __sched __down_write(struct rw_semaphore *sem)
242 } 227 }
243 228
244 tsk->state = TASK_RUNNING; 229 tsk->state = TASK_RUNNING;
245
246 out: 230 out:
247 rwsemtrace(sem, "Leaving __down_write"); 231 ;
232}
233
234void fastcall __sched __down_write(struct rw_semaphore *sem)
235{
236 __down_write_nested(sem, 0);
248} 237}
249 238
250/* 239/*
@@ -255,8 +244,6 @@ int fastcall __down_write_trylock(struct rw_semaphore *sem)
255 unsigned long flags; 244 unsigned long flags;
256 int ret = 0; 245 int ret = 0;
257 246
258 rwsemtrace(sem, "Entering __down_write_trylock");
259
260 spin_lock_irqsave(&sem->wait_lock, flags); 247 spin_lock_irqsave(&sem->wait_lock, flags);
261 248
262 if (sem->activity == 0 && list_empty(&sem->wait_list)) { 249 if (sem->activity == 0 && list_empty(&sem->wait_list)) {
@@ -267,7 +254,6 @@ int fastcall __down_write_trylock(struct rw_semaphore *sem)
267 254
268 spin_unlock_irqrestore(&sem->wait_lock, flags); 255 spin_unlock_irqrestore(&sem->wait_lock, flags);
269 256
270 rwsemtrace(sem, "Leaving __down_write_trylock");
271 return ret; 257 return ret;
272} 258}
273 259
@@ -278,16 +264,12 @@ void fastcall __up_read(struct rw_semaphore *sem)
278{ 264{
279 unsigned long flags; 265 unsigned long flags;
280 266
281 rwsemtrace(sem, "Entering __up_read");
282
283 spin_lock_irqsave(&sem->wait_lock, flags); 267 spin_lock_irqsave(&sem->wait_lock, flags);
284 268
285 if (--sem->activity == 0 && !list_empty(&sem->wait_list)) 269 if (--sem->activity == 0 && !list_empty(&sem->wait_list))
286 sem = __rwsem_wake_one_writer(sem); 270 sem = __rwsem_wake_one_writer(sem);
287 271
288 spin_unlock_irqrestore(&sem->wait_lock, flags); 272 spin_unlock_irqrestore(&sem->wait_lock, flags);
289
290 rwsemtrace(sem, "Leaving __up_read");
291} 273}
292 274
293/* 275/*
@@ -297,8 +279,6 @@ void fastcall __up_write(struct rw_semaphore *sem)
297{ 279{
298 unsigned long flags; 280 unsigned long flags;
299 281
300 rwsemtrace(sem, "Entering __up_write");
301
302 spin_lock_irqsave(&sem->wait_lock, flags); 282 spin_lock_irqsave(&sem->wait_lock, flags);
303 283
304 sem->activity = 0; 284 sem->activity = 0;
@@ -306,8 +286,6 @@ void fastcall __up_write(struct rw_semaphore *sem)
306 sem = __rwsem_do_wake(sem, 1); 286 sem = __rwsem_do_wake(sem, 1);
307 287
308 spin_unlock_irqrestore(&sem->wait_lock, flags); 288 spin_unlock_irqrestore(&sem->wait_lock, flags);
309
310 rwsemtrace(sem, "Leaving __up_write");
311} 289}
312 290
313/* 291/*
@@ -318,8 +296,6 @@ void fastcall __downgrade_write(struct rw_semaphore *sem)
318{ 296{
319 unsigned long flags; 297 unsigned long flags;
320 298
321 rwsemtrace(sem, "Entering __downgrade_write");
322
323 spin_lock_irqsave(&sem->wait_lock, flags); 299 spin_lock_irqsave(&sem->wait_lock, flags);
324 300
325 sem->activity = 1; 301 sem->activity = 1;
@@ -327,18 +303,14 @@ void fastcall __downgrade_write(struct rw_semaphore *sem)
327 sem = __rwsem_do_wake(sem, 0); 303 sem = __rwsem_do_wake(sem, 0);
328 304
329 spin_unlock_irqrestore(&sem->wait_lock, flags); 305 spin_unlock_irqrestore(&sem->wait_lock, flags);
330
331 rwsemtrace(sem, "Leaving __downgrade_write");
332} 306}
333 307
334EXPORT_SYMBOL(init_rwsem); 308EXPORT_SYMBOL(__init_rwsem);
335EXPORT_SYMBOL(__down_read); 309EXPORT_SYMBOL(__down_read);
336EXPORT_SYMBOL(__down_read_trylock); 310EXPORT_SYMBOL(__down_read_trylock);
311EXPORT_SYMBOL(__down_write_nested);
337EXPORT_SYMBOL(__down_write); 312EXPORT_SYMBOL(__down_write);
338EXPORT_SYMBOL(__down_write_trylock); 313EXPORT_SYMBOL(__down_write_trylock);
339EXPORT_SYMBOL(__up_read); 314EXPORT_SYMBOL(__up_read);
340EXPORT_SYMBOL(__up_write); 315EXPORT_SYMBOL(__up_write);
341EXPORT_SYMBOL(__downgrade_write); 316EXPORT_SYMBOL(__downgrade_write);
342#if RWSEM_DEBUG
343EXPORT_SYMBOL(rwsemtrace);
344#endif
diff --git a/lib/rwsem.c b/lib/rwsem.c
index 62fa4eba9ffe..b322421c2969 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -8,6 +8,26 @@
8#include <linux/init.h> 8#include <linux/init.h>
9#include <linux/module.h> 9#include <linux/module.h>
10 10
11/*
12 * Initialize an rwsem:
13 */
14void __init_rwsem(struct rw_semaphore *sem, const char *name,
15 struct lock_class_key *key)
16{
17#ifdef CONFIG_DEBUG_LOCK_ALLOC
18 /*
19 * Make sure we are not reinitializing a held semaphore:
20 */
21 debug_check_no_locks_freed((void *)sem, sizeof(*sem));
22 lockdep_init_map(&sem->dep_map, name, key);
23#endif
24 sem->count = RWSEM_UNLOCKED_VALUE;
25 spin_lock_init(&sem->wait_lock);
26 INIT_LIST_HEAD(&sem->wait_list);
27}
28
29EXPORT_SYMBOL(__init_rwsem);
30
11struct rwsem_waiter { 31struct rwsem_waiter {
12 struct list_head list; 32 struct list_head list;
13 struct task_struct *task; 33 struct task_struct *task;
@@ -16,17 +36,6 @@ struct rwsem_waiter {
16#define RWSEM_WAITING_FOR_WRITE 0x00000002 36#define RWSEM_WAITING_FOR_WRITE 0x00000002
17}; 37};
18 38
19#if RWSEM_DEBUG
20#undef rwsemtrace
21void rwsemtrace(struct rw_semaphore *sem, const char *str)
22{
23 printk("sem=%p\n", sem);
24 printk("(sem)=%08lx\n", sem->count);
25 if (sem->debug)
26 printk("[%d] %s({%08lx})\n", current->pid, str, sem->count);
27}
28#endif
29
30/* 39/*
31 * handle the lock release when processes blocked on it that can now run 40 * handle the lock release when processes blocked on it that can now run
32 * - if we come here from up_xxxx(), then: 41 * - if we come here from up_xxxx(), then:
@@ -45,8 +54,6 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
45 struct list_head *next; 54 struct list_head *next;
46 signed long oldcount, woken, loop; 55 signed long oldcount, woken, loop;
47 56
48 rwsemtrace(sem, "Entering __rwsem_do_wake");
49
50 if (downgrading) 57 if (downgrading)
51 goto dont_wake_writers; 58 goto dont_wake_writers;
52 59
@@ -127,7 +134,6 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
127 next->prev = &sem->wait_list; 134 next->prev = &sem->wait_list;
128 135
129 out: 136 out:
130 rwsemtrace(sem, "Leaving __rwsem_do_wake");
131 return sem; 137 return sem;
132 138
133 /* undo the change to count, but check for a transition 1->0 */ 139 /* undo the change to count, but check for a transition 1->0 */
@@ -186,13 +192,9 @@ rwsem_down_read_failed(struct rw_semaphore *sem)
186{ 192{
187 struct rwsem_waiter waiter; 193 struct rwsem_waiter waiter;
188 194
189 rwsemtrace(sem, "Entering rwsem_down_read_failed");
190
191 waiter.flags = RWSEM_WAITING_FOR_READ; 195 waiter.flags = RWSEM_WAITING_FOR_READ;
192 rwsem_down_failed_common(sem, &waiter, 196 rwsem_down_failed_common(sem, &waiter,
193 RWSEM_WAITING_BIAS - RWSEM_ACTIVE_BIAS); 197 RWSEM_WAITING_BIAS - RWSEM_ACTIVE_BIAS);
194
195 rwsemtrace(sem, "Leaving rwsem_down_read_failed");
196 return sem; 198 return sem;
197} 199}
198 200
@@ -204,12 +206,9 @@ rwsem_down_write_failed(struct rw_semaphore *sem)
204{ 206{
205 struct rwsem_waiter waiter; 207 struct rwsem_waiter waiter;
206 208
207 rwsemtrace(sem, "Entering rwsem_down_write_failed");
208
209 waiter.flags = RWSEM_WAITING_FOR_WRITE; 209 waiter.flags = RWSEM_WAITING_FOR_WRITE;
210 rwsem_down_failed_common(sem, &waiter, -RWSEM_ACTIVE_BIAS); 210 rwsem_down_failed_common(sem, &waiter, -RWSEM_ACTIVE_BIAS);
211 211
212 rwsemtrace(sem, "Leaving rwsem_down_write_failed");
213 return sem; 212 return sem;
214} 213}
215 214
@@ -221,8 +220,6 @@ struct rw_semaphore fastcall *rwsem_wake(struct rw_semaphore *sem)
221{ 220{
222 unsigned long flags; 221 unsigned long flags;
223 222
224 rwsemtrace(sem, "Entering rwsem_wake");
225
226 spin_lock_irqsave(&sem->wait_lock, flags); 223 spin_lock_irqsave(&sem->wait_lock, flags);
227 224
228 /* do nothing if list empty */ 225 /* do nothing if list empty */
@@ -231,8 +228,6 @@ struct rw_semaphore fastcall *rwsem_wake(struct rw_semaphore *sem)
231 228
232 spin_unlock_irqrestore(&sem->wait_lock, flags); 229 spin_unlock_irqrestore(&sem->wait_lock, flags);
233 230
234 rwsemtrace(sem, "Leaving rwsem_wake");
235
236 return sem; 231 return sem;
237} 232}
238 233
@@ -245,8 +240,6 @@ struct rw_semaphore fastcall *rwsem_downgrade_wake(struct rw_semaphore *sem)
245{ 240{
246 unsigned long flags; 241 unsigned long flags;
247 242
248 rwsemtrace(sem, "Entering rwsem_downgrade_wake");
249
250 spin_lock_irqsave(&sem->wait_lock, flags); 243 spin_lock_irqsave(&sem->wait_lock, flags);
251 244
252 /* do nothing if list empty */ 245 /* do nothing if list empty */
@@ -255,7 +248,6 @@ struct rw_semaphore fastcall *rwsem_downgrade_wake(struct rw_semaphore *sem)
255 248
256 spin_unlock_irqrestore(&sem->wait_lock, flags); 249 spin_unlock_irqrestore(&sem->wait_lock, flags);
257 250
258 rwsemtrace(sem, "Leaving rwsem_downgrade_wake");
259 return sem; 251 return sem;
260} 252}
261 253
@@ -263,6 +255,3 @@ EXPORT_SYMBOL(rwsem_down_read_failed);
263EXPORT_SYMBOL(rwsem_down_write_failed); 255EXPORT_SYMBOL(rwsem_down_write_failed);
264EXPORT_SYMBOL(rwsem_wake); 256EXPORT_SYMBOL(rwsem_wake);
265EXPORT_SYMBOL(rwsem_downgrade_wake); 257EXPORT_SYMBOL(rwsem_downgrade_wake);
266#if RWSEM_DEBUG
267EXPORT_SYMBOL(rwsemtrace);
268#endif
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index 93c15ee3f8ea..3d9c4dc965ed 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -8,38 +8,71 @@
8 8
9#include <linux/spinlock.h> 9#include <linux/spinlock.h>
10#include <linux/interrupt.h> 10#include <linux/interrupt.h>
11#include <linux/debug_locks.h>
11#include <linux/delay.h> 12#include <linux/delay.h>
13#include <linux/module.h>
14
15void __spin_lock_init(spinlock_t *lock, const char *name,
16 struct lock_class_key *key)
17{
18#ifdef CONFIG_DEBUG_LOCK_ALLOC
19 /*
20 * Make sure we are not reinitializing a held lock:
21 */
22 debug_check_no_locks_freed((void *)lock, sizeof(*lock));
23 lockdep_init_map(&lock->dep_map, name, key);
24#endif
25 lock->raw_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
26 lock->magic = SPINLOCK_MAGIC;
27 lock->owner = SPINLOCK_OWNER_INIT;
28 lock->owner_cpu = -1;
29}
30
31EXPORT_SYMBOL(__spin_lock_init);
32
33void __rwlock_init(rwlock_t *lock, const char *name,
34 struct lock_class_key *key)
35{
36#ifdef CONFIG_DEBUG_LOCK_ALLOC
37 /*
38 * Make sure we are not reinitializing a held lock:
39 */
40 debug_check_no_locks_freed((void *)lock, sizeof(*lock));
41 lockdep_init_map(&lock->dep_map, name, key);
42#endif
43 lock->raw_lock = (raw_rwlock_t) __RAW_RW_LOCK_UNLOCKED;
44 lock->magic = RWLOCK_MAGIC;
45 lock->owner = SPINLOCK_OWNER_INIT;
46 lock->owner_cpu = -1;
47}
48
49EXPORT_SYMBOL(__rwlock_init);
12 50
13static void spin_bug(spinlock_t *lock, const char *msg) 51static void spin_bug(spinlock_t *lock, const char *msg)
14{ 52{
15 static long print_once = 1;
16 struct task_struct *owner = NULL; 53 struct task_struct *owner = NULL;
17 54
18 if (xchg(&print_once, 0)) { 55 if (!debug_locks_off())
19 if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT) 56 return;
20 owner = lock->owner; 57
21 printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n", 58 if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT)
22 msg, raw_smp_processor_id(), 59 owner = lock->owner;
23 current->comm, current->pid); 60 printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n",
24 printk(KERN_EMERG " lock: %p, .magic: %08x, .owner: %s/%d, " 61 msg, raw_smp_processor_id(),
25 ".owner_cpu: %d\n", 62 current->comm, current->pid);
26 lock, lock->magic, 63 printk(KERN_EMERG " lock: %p, .magic: %08x, .owner: %s/%d, "
27 owner ? owner->comm : "<none>", 64 ".owner_cpu: %d\n",
28 owner ? owner->pid : -1, 65 lock, lock->magic,
29 lock->owner_cpu); 66 owner ? owner->comm : "<none>",
30 dump_stack(); 67 owner ? owner->pid : -1,
31#ifdef CONFIG_SMP 68 lock->owner_cpu);
32 /* 69 dump_stack();
33 * We cannot continue on SMP:
34 */
35// panic("bad locking");
36#endif
37 }
38} 70}
39 71
40#define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg) 72#define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg)
41 73
42static inline void debug_spin_lock_before(spinlock_t *lock) 74static inline void
75debug_spin_lock_before(spinlock_t *lock)
43{ 76{
44 SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic"); 77 SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
45 SPIN_BUG_ON(lock->owner == current, lock, "recursion"); 78 SPIN_BUG_ON(lock->owner == current, lock, "recursion");
@@ -118,20 +151,13 @@ void _raw_spin_unlock(spinlock_t *lock)
118 151
119static void rwlock_bug(rwlock_t *lock, const char *msg) 152static void rwlock_bug(rwlock_t *lock, const char *msg)
120{ 153{
121 static long print_once = 1; 154 if (!debug_locks_off())
122 155 return;
123 if (xchg(&print_once, 0)) { 156
124 printk(KERN_EMERG "BUG: rwlock %s on CPU#%d, %s/%d, %p\n", 157 printk(KERN_EMERG "BUG: rwlock %s on CPU#%d, %s/%d, %p\n",
125 msg, raw_smp_processor_id(), current->comm, 158 msg, raw_smp_processor_id(), current->comm,
126 current->pid, lock); 159 current->pid, lock);
127 dump_stack(); 160 dump_stack();
128#ifdef CONFIG_SMP
129 /*
130 * We cannot continue on SMP:
131 */
132 panic("bad locking");
133#endif
134 }
135} 161}
136 162
137#define RWLOCK_BUG_ON(cond, lock, msg) if (unlikely(cond)) rwlock_bug(lock, msg) 163#define RWLOCK_BUG_ON(cond, lock, msg) if (unlikely(cond)) rwlock_bug(lock, msg)
diff --git a/mm/memory.c b/mm/memory.c
index 7e2a4b1580e3..c1e14c9e67e4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -503,7 +503,7 @@ again:
503 return -ENOMEM; 503 return -ENOMEM;
504 src_pte = pte_offset_map_nested(src_pmd, addr); 504 src_pte = pte_offset_map_nested(src_pmd, addr);
505 src_ptl = pte_lockptr(src_mm, src_pmd); 505 src_ptl = pte_lockptr(src_mm, src_pmd);
506 spin_lock(src_ptl); 506 spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
507 507
508 do { 508 do {
509 /* 509 /*
diff --git a/mm/mremap.c b/mm/mremap.c
index 1903bdf65e42..7c15cf3373ad 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -97,7 +97,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
97 new_pte = pte_offset_map_nested(new_pmd, new_addr); 97 new_pte = pte_offset_map_nested(new_pmd, new_addr);
98 new_ptl = pte_lockptr(mm, new_pmd); 98 new_ptl = pte_lockptr(mm, new_pmd);
99 if (new_ptl != old_ptl) 99 if (new_ptl != old_ptl)
100 spin_lock(new_ptl); 100 spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
101 101
102 for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, 102 for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
103 new_pte++, new_addr += PAGE_SIZE) { 103 new_pte++, new_addr += PAGE_SIZE) {
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index d46ed0f1dc06..b9af136e5cfa 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -225,7 +225,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
225 * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that 225 * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that
226 * we select a process with CAP_SYS_RAW_IO set). 226 * we select a process with CAP_SYS_RAW_IO set).
227 */ 227 */
228static void __oom_kill_task(task_t *p, const char *message) 228static void __oom_kill_task(struct task_struct *p, const char *message)
229{ 229{
230 if (p->pid == 1) { 230 if (p->pid == 1) {
231 WARN_ON(1); 231 WARN_ON(1);
@@ -255,10 +255,10 @@ static void __oom_kill_task(task_t *p, const char *message)
255 force_sig(SIGKILL, p); 255 force_sig(SIGKILL, p);
256} 256}
257 257
258static int oom_kill_task(task_t *p, const char *message) 258static int oom_kill_task(struct task_struct *p, const char *message)
259{ 259{
260 struct mm_struct *mm; 260 struct mm_struct *mm;
261 task_t * g, * q; 261 struct task_struct *g, *q;
262 262
263 mm = p->mm; 263 mm = p->mm;
264 264
@@ -316,7 +316,7 @@ static int oom_kill_process(struct task_struct *p, unsigned long points,
316 */ 316 */
317void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) 317void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
318{ 318{
319 task_t *p; 319 struct task_struct *p;
320 unsigned long points = 0; 320 unsigned long points = 0;
321 321
322 if (printk_ratelimit()) { 322 if (printk_ratelimit()) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3e792a583f3b..54a4f5375bba 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2005,6 +2005,10 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
2005 2005
2006 zone->spanned_pages = size; 2006 zone->spanned_pages = size;
2007 zone->present_pages = realsize; 2007 zone->present_pages = realsize;
2008#ifdef CONFIG_NUMA
2009 zone->min_unmapped_ratio = (realsize*sysctl_min_unmapped_ratio)
2010 / 100;
2011#endif
2008 zone->name = zone_names[j]; 2012 zone->name = zone_names[j];
2009 spin_lock_init(&zone->lock); 2013 spin_lock_init(&zone->lock);
2010 spin_lock_init(&zone->lru_lock); 2014 spin_lock_init(&zone->lru_lock);
@@ -2298,6 +2302,24 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
2298 return 0; 2302 return 0;
2299} 2303}
2300 2304
2305#ifdef CONFIG_NUMA
2306int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
2307 struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
2308{
2309 struct zone *zone;
2310 int rc;
2311
2312 rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
2313 if (rc)
2314 return rc;
2315
2316 for_each_zone(zone)
2317 zone->min_unmapped_ratio = (zone->present_pages *
2318 sysctl_min_unmapped_ratio) / 100;
2319 return 0;
2320}
2321#endif
2322
2301/* 2323/*
2302 * lowmem_reserve_ratio_sysctl_handler - just a wrapper around 2324 * lowmem_reserve_ratio_sysctl_handler - just a wrapper around
2303 * proc_dointvec() so that we can call setup_per_zone_lowmem_reserve() 2325 * proc_dointvec() so that we can call setup_per_zone_lowmem_reserve()
diff --git a/mm/slab.c b/mm/slab.c
index 3936af344542..85c2e03098a7 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1021,7 +1021,8 @@ static void drain_alien_cache(struct kmem_cache *cachep,
1021 } 1021 }
1022} 1022}
1023 1023
1024static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) 1024static inline int cache_free_alien(struct kmem_cache *cachep, void *objp,
1025 int nesting)
1025{ 1026{
1026 struct slab *slabp = virt_to_slab(objp); 1027 struct slab *slabp = virt_to_slab(objp);
1027 int nodeid = slabp->nodeid; 1028 int nodeid = slabp->nodeid;
@@ -1039,7 +1040,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1039 STATS_INC_NODEFREES(cachep); 1040 STATS_INC_NODEFREES(cachep);
1040 if (l3->alien && l3->alien[nodeid]) { 1041 if (l3->alien && l3->alien[nodeid]) {
1041 alien = l3->alien[nodeid]; 1042 alien = l3->alien[nodeid];
1042 spin_lock(&alien->lock); 1043 spin_lock_nested(&alien->lock, nesting);
1043 if (unlikely(alien->avail == alien->limit)) { 1044 if (unlikely(alien->avail == alien->limit)) {
1044 STATS_INC_ACOVERFLOW(cachep); 1045 STATS_INC_ACOVERFLOW(cachep);
1045 __drain_alien_cache(cachep, alien, nodeid); 1046 __drain_alien_cache(cachep, alien, nodeid);
@@ -1068,7 +1069,8 @@ static inline void free_alien_cache(struct array_cache **ac_ptr)
1068{ 1069{
1069} 1070}
1070 1071
1071static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) 1072static inline int cache_free_alien(struct kmem_cache *cachep, void *objp,
1073 int nesting)
1072{ 1074{
1073 return 0; 1075 return 0;
1074} 1076}
@@ -1272,6 +1274,11 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
1272 1274
1273 local_irq_disable(); 1275 local_irq_disable();
1274 memcpy(ptr, list, sizeof(struct kmem_list3)); 1276 memcpy(ptr, list, sizeof(struct kmem_list3));
1277 /*
1278 * Do not assume that spinlocks can be initialized via memcpy:
1279 */
1280 spin_lock_init(&ptr->list_lock);
1281
1275 MAKE_ALL_LISTS(cachep, ptr, nodeid); 1282 MAKE_ALL_LISTS(cachep, ptr, nodeid);
1276 cachep->nodelists[nodeid] = ptr; 1283 cachep->nodelists[nodeid] = ptr;
1277 local_irq_enable(); 1284 local_irq_enable();
@@ -1398,7 +1405,7 @@ void __init kmem_cache_init(void)
1398 } 1405 }
1399 /* 4) Replace the bootstrap head arrays */ 1406 /* 4) Replace the bootstrap head arrays */
1400 { 1407 {
1401 void *ptr; 1408 struct array_cache *ptr;
1402 1409
1403 ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); 1410 ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
1404 1411
@@ -1406,6 +1413,11 @@ void __init kmem_cache_init(void)
1406 BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); 1413 BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
1407 memcpy(ptr, cpu_cache_get(&cache_cache), 1414 memcpy(ptr, cpu_cache_get(&cache_cache),
1408 sizeof(struct arraycache_init)); 1415 sizeof(struct arraycache_init));
1416 /*
1417 * Do not assume that spinlocks can be initialized via memcpy:
1418 */
1419 spin_lock_init(&ptr->lock);
1420
1409 cache_cache.array[smp_processor_id()] = ptr; 1421 cache_cache.array[smp_processor_id()] = ptr;
1410 local_irq_enable(); 1422 local_irq_enable();
1411 1423
@@ -1416,6 +1428,11 @@ void __init kmem_cache_init(void)
1416 != &initarray_generic.cache); 1428 != &initarray_generic.cache);
1417 memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), 1429 memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
1418 sizeof(struct arraycache_init)); 1430 sizeof(struct arraycache_init));
1431 /*
1432 * Do not assume that spinlocks can be initialized via memcpy:
1433 */
1434 spin_lock_init(&ptr->lock);
1435
1419 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = 1436 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
1420 ptr; 1437 ptr;
1421 local_irq_enable(); 1438 local_irq_enable();
@@ -1743,6 +1760,8 @@ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp)
1743} 1760}
1744#endif 1761#endif
1745 1762
1763static void __cache_free(struct kmem_cache *cachep, void *objp, int nesting);
1764
1746/** 1765/**
1747 * slab_destroy - destroy and release all objects in a slab 1766 * slab_destroy - destroy and release all objects in a slab
1748 * @cachep: cache pointer being destroyed 1767 * @cachep: cache pointer being destroyed
@@ -1766,8 +1785,17 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
1766 call_rcu(&slab_rcu->head, kmem_rcu_free); 1785 call_rcu(&slab_rcu->head, kmem_rcu_free);
1767 } else { 1786 } else {
1768 kmem_freepages(cachep, addr); 1787 kmem_freepages(cachep, addr);
1769 if (OFF_SLAB(cachep)) 1788 if (OFF_SLAB(cachep)) {
1770 kmem_cache_free(cachep->slabp_cache, slabp); 1789 unsigned long flags;
1790
1791 /*
1792 * lockdep: we may nest inside an already held
1793 * ac->lock, so pass in a nesting flag:
1794 */
1795 local_irq_save(flags);
1796 __cache_free(cachep->slabp_cache, slabp, 1);
1797 local_irq_restore(flags);
1798 }
1771 } 1799 }
1772} 1800}
1773 1801
@@ -3072,7 +3100,16 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3072 if (slabp->inuse == 0) { 3100 if (slabp->inuse == 0) {
3073 if (l3->free_objects > l3->free_limit) { 3101 if (l3->free_objects > l3->free_limit) {
3074 l3->free_objects -= cachep->num; 3102 l3->free_objects -= cachep->num;
3103 /*
3104 * It is safe to drop the lock. The slab is
3105 * no longer linked to the cache. cachep
3106 * cannot disappear - we are using it and
3107 * all destruction of caches must be
3108 * serialized properly by the user.
3109 */
3110 spin_unlock(&l3->list_lock);
3075 slab_destroy(cachep, slabp); 3111 slab_destroy(cachep, slabp);
3112 spin_lock(&l3->list_lock);
3076 } else { 3113 } else {
3077 list_add(&slabp->list, &l3->slabs_free); 3114 list_add(&slabp->list, &l3->slabs_free);
3078 } 3115 }
@@ -3098,7 +3135,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
3098#endif 3135#endif
3099 check_irq_off(); 3136 check_irq_off();
3100 l3 = cachep->nodelists[node]; 3137 l3 = cachep->nodelists[node];
3101 spin_lock(&l3->list_lock); 3138 spin_lock_nested(&l3->list_lock, SINGLE_DEPTH_NESTING);
3102 if (l3->shared) { 3139 if (l3->shared) {
3103 struct array_cache *shared_array = l3->shared; 3140 struct array_cache *shared_array = l3->shared;
3104 int max = shared_array->limit - shared_array->avail; 3141 int max = shared_array->limit - shared_array->avail;
@@ -3141,14 +3178,14 @@ free_done:
3141 * Release an obj back to its cache. If the obj has a constructed state, it must 3178 * Release an obj back to its cache. If the obj has a constructed state, it must
3142 * be in this state _before_ it is released. Called with disabled ints. 3179 * be in this state _before_ it is released. Called with disabled ints.
3143 */ 3180 */
3144static inline void __cache_free(struct kmem_cache *cachep, void *objp) 3181static void __cache_free(struct kmem_cache *cachep, void *objp, int nesting)
3145{ 3182{
3146 struct array_cache *ac = cpu_cache_get(cachep); 3183 struct array_cache *ac = cpu_cache_get(cachep);
3147 3184
3148 check_irq_off(); 3185 check_irq_off();
3149 objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); 3186 objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
3150 3187
3151 if (cache_free_alien(cachep, objp)) 3188 if (cache_free_alien(cachep, objp, nesting))
3152 return; 3189 return;
3153 3190
3154 if (likely(ac->avail < ac->limit)) { 3191 if (likely(ac->avail < ac->limit)) {
@@ -3387,7 +3424,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
3387 BUG_ON(virt_to_cache(objp) != cachep); 3424 BUG_ON(virt_to_cache(objp) != cachep);
3388 3425
3389 local_irq_save(flags); 3426 local_irq_save(flags);
3390 __cache_free(cachep, objp); 3427 __cache_free(cachep, objp, 0);
3391 local_irq_restore(flags); 3428 local_irq_restore(flags);
3392} 3429}
3393EXPORT_SYMBOL(kmem_cache_free); 3430EXPORT_SYMBOL(kmem_cache_free);
@@ -3412,7 +3449,7 @@ void kfree(const void *objp)
3412 kfree_debugcheck(objp); 3449 kfree_debugcheck(objp);
3413 c = virt_to_cache(objp); 3450 c = virt_to_cache(objp);
3414 debug_check_no_locks_freed(objp, obj_size(c)); 3451 debug_check_no_locks_freed(objp, obj_size(c));
3415 __cache_free(c, (void *)objp); 3452 __cache_free(c, (void *)objp, 0);
3416 local_irq_restore(flags); 3453 local_irq_restore(flags);
3417} 3454}
3418EXPORT_SYMBOL(kfree); 3455EXPORT_SYMBOL(kfree);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index fccbd9bba77b..5f7cf2a4cb55 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -38,7 +38,7 @@ static struct backing_dev_info swap_backing_dev_info = {
38 38
39struct address_space swapper_space = { 39struct address_space swapper_space = {
40 .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), 40 .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
41 .tree_lock = RW_LOCK_UNLOCKED, 41 .tree_lock = __RW_LOCK_UNLOCKED(swapper_space.tree_lock),
42 .a_ops = &swap_aops, 42 .a_ops = &swap_aops,
43 .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), 43 .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear),
44 .backing_dev_info = &swap_backing_dev_info, 44 .backing_dev_info = &swap_backing_dev_info,
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 35f8553f893a..7b450798b458 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -330,6 +330,8 @@ void __vunmap(void *addr, int deallocate_pages)
330 return; 330 return;
331 } 331 }
332 332
333 debug_check_no_locks_freed(addr, area->size);
334
333 if (deallocate_pages) { 335 if (deallocate_pages) {
334 int i; 336 int i;
335 337
diff --git a/mm/vmscan.c b/mm/vmscan.c
index ff2ebe9458a3..5d4c4d02254d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1503,10 +1503,6 @@ module_init(kswapd_init)
1503 * 1503 *
1504 * If non-zero call zone_reclaim when the number of free pages falls below 1504 * If non-zero call zone_reclaim when the number of free pages falls below
1505 * the watermarks. 1505 * the watermarks.
1506 *
1507 * In the future we may add flags to the mode. However, the page allocator
1508 * should only have to check that zone_reclaim_mode != 0 before calling
1509 * zone_reclaim().
1510 */ 1506 */
1511int zone_reclaim_mode __read_mostly; 1507int zone_reclaim_mode __read_mostly;
1512 1508
@@ -1524,6 +1520,12 @@ int zone_reclaim_mode __read_mostly;
1524#define ZONE_RECLAIM_PRIORITY 4 1520#define ZONE_RECLAIM_PRIORITY 4
1525 1521
1526/* 1522/*
1523 * Percentage of pages in a zone that must be unmapped for zone_reclaim to
1524 * occur.
1525 */
1526int sysctl_min_unmapped_ratio = 1;
1527
1528/*
1527 * Try to free up some pages from this zone through reclaim. 1529 * Try to free up some pages from this zone through reclaim.
1528 */ 1530 */
1529static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) 1531static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
@@ -1590,18 +1592,17 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
1590 int node_id; 1592 int node_id;
1591 1593
1592 /* 1594 /*
1593 * Do not reclaim if there are not enough reclaimable pages in this 1595 * Zone reclaim reclaims unmapped file backed pages.
1594 * zone that would satify this allocations.
1595 * 1596 *
1596 * All unmapped pagecache pages are reclaimable. 1597 * A small portion of unmapped file backed pages is needed for
1597 * 1598 * file I/O otherwise pages read by file I/O will be immediately
1598 * Both counters may be temporarily off a bit so we use 1599 * thrown out if the zone is overallocated. So we do not reclaim
1599 * SWAP_CLUSTER_MAX as the boundary. It may also be good to 1600 * if less than a specified percentage of the zone is used by
1600 * leave a few frequently used unmapped pagecache pages around. 1601 * unmapped file backed pages.
1601 */ 1602 */
1602 if (zone_page_state(zone, NR_FILE_PAGES) - 1603 if (zone_page_state(zone, NR_FILE_PAGES) -
1603 zone_page_state(zone, NR_FILE_MAPPED) < SWAP_CLUSTER_MAX) 1604 zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_ratio)
1604 return 0; 1605 return 0;
1605 1606
1606 /* 1607 /*
1607 * Avoid concurrent zone reclaims, do not reclaim in a zone that does 1608 * Avoid concurrent zone reclaims, do not reclaim in a zone that does
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 3948949a609a..458031bfff55 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -364,6 +364,14 @@ static void vlan_transfer_operstate(const struct net_device *dev, struct net_dev
364 } 364 }
365} 365}
366 366
367/*
368 * vlan network devices have devices nesting below it, and are a special
369 * "super class" of normal network devices; split their locks off into a
370 * separate class since they always nest.
371 */
372static struct lock_class_key vlan_netdev_xmit_lock_key;
373
374
367/* Attach a VLAN device to a mac address (ie Ethernet Card). 375/* Attach a VLAN device to a mac address (ie Ethernet Card).
368 * Returns the device that was created, or NULL if there was 376 * Returns the device that was created, or NULL if there was
369 * an error of some kind. 377 * an error of some kind.
@@ -460,6 +468,7 @@ static struct net_device *register_vlan_device(const char *eth_IF_name,
460 468
461 new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name, 469 new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name,
462 vlan_setup); 470 vlan_setup);
471
463 if (new_dev == NULL) 472 if (new_dev == NULL)
464 goto out_unlock; 473 goto out_unlock;
465 474
@@ -518,6 +527,8 @@ static struct net_device *register_vlan_device(const char *eth_IF_name,
518 if (register_netdevice(new_dev)) 527 if (register_netdevice(new_dev))
519 goto out_free_newdev; 528 goto out_free_newdev;
520 529
530 lockdep_set_class(&new_dev->_xmit_lock, &vlan_netdev_xmit_lock_key);
531
521 new_dev->iflink = real_dev->ifindex; 532 new_dev->iflink = real_dev->ifindex;
522 vlan_transfer_operstate(real_dev, new_dev); 533 vlan_transfer_operstate(real_dev, new_dev);
523 linkwatch_fire_event(new_dev); /* _MUST_ call rfc2863_policy() */ 534 linkwatch_fire_event(new_dev); /* _MUST_ call rfc2863_policy() */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7cfbdb215ba2..44f6a181a754 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -71,6 +71,13 @@ static kmem_cache_t *skbuff_head_cache __read_mostly;
71static kmem_cache_t *skbuff_fclone_cache __read_mostly; 71static kmem_cache_t *skbuff_fclone_cache __read_mostly;
72 72
73/* 73/*
74 * lockdep: lock class key used by skb_queue_head_init():
75 */
76struct lock_class_key skb_queue_lock_key;
77
78EXPORT_SYMBOL(skb_queue_lock_key);
79
80/*
74 * Keep out-of-line to prevent kernel bloat. 81 * Keep out-of-line to prevent kernel bloat.
75 * __builtin_return_address is not used because it is not always 82 * __builtin_return_address is not used because it is not always
76 * reliable. 83 * reliable.
diff --git a/net/core/sock.c b/net/core/sock.c
index 533b9317144b..51fcfbc041a7 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -129,6 +129,53 @@
129#include <net/tcp.h> 129#include <net/tcp.h>
130#endif 130#endif
131 131
132/*
133 * Each address family might have different locking rules, so we have
134 * one slock key per address family:
135 */
136static struct lock_class_key af_family_keys[AF_MAX];
137static struct lock_class_key af_family_slock_keys[AF_MAX];
138
139#ifdef CONFIG_DEBUG_LOCK_ALLOC
140/*
141 * Make lock validator output more readable. (we pre-construct these
142 * strings build-time, so that runtime initialization of socket
143 * locks is fast):
144 */
145static const char *af_family_key_strings[AF_MAX+1] = {
146 "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
147 "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
148 "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
149 "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
150 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
151 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
152 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
153 "sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
154 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
155 "sk_lock-27" , "sk_lock-28" , "sk_lock-29" ,
156 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-AF_MAX"
157};
158static const char *af_family_slock_key_strings[AF_MAX+1] = {
159 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
160 "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
161 "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
162 "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
163 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
164 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
165 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
166 "slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" ,
167 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
168 "slock-27" , "slock-28" , "slock-29" ,
169 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_MAX"
170};
171#endif
172
173/*
174 * sk_callback_lock locking rules are per-address-family,
175 * so split the lock classes by using a per-AF key:
176 */
177static struct lock_class_key af_callback_keys[AF_MAX];
178
132/* Take into consideration the size of the struct sk_buff overhead in the 179/* Take into consideration the size of the struct sk_buff overhead in the
133 * determination of these values, since that is non-constant across 180 * determination of these values, since that is non-constant across
134 * platforms. This makes socket queueing behavior and performance 181 * platforms. This makes socket queueing behavior and performance
@@ -237,9 +284,16 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb)
237 skb->dev = NULL; 284 skb->dev = NULL;
238 285
239 bh_lock_sock(sk); 286 bh_lock_sock(sk);
240 if (!sock_owned_by_user(sk)) 287 if (!sock_owned_by_user(sk)) {
288 /*
289 * trylock + unlock semantics:
290 */
291 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
292
241 rc = sk->sk_backlog_rcv(sk, skb); 293 rc = sk->sk_backlog_rcv(sk, skb);
242 else 294
295 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
296 } else
243 sk_add_backlog(sk, skb); 297 sk_add_backlog(sk, skb);
244 bh_unlock_sock(sk); 298 bh_unlock_sock(sk);
245out: 299out:
@@ -749,6 +803,33 @@ lenout:
749 return 0; 803 return 0;
750} 804}
751 805
806/*
807 * Initialize an sk_lock.
808 *
809 * (We also register the sk_lock with the lock validator.)
810 */
811static void inline sock_lock_init(struct sock *sk)
812{
813 spin_lock_init(&sk->sk_lock.slock);
814 sk->sk_lock.owner = NULL;
815 init_waitqueue_head(&sk->sk_lock.wq);
816 /*
817 * Make sure we are not reinitializing a held lock:
818 */
819 debug_check_no_locks_freed((void *)&sk->sk_lock, sizeof(sk->sk_lock));
820
821 /*
822 * Mark both the sk_lock and the sk_lock.slock as a
823 * per-address-family lock class:
824 */
825 lockdep_set_class_and_name(&sk->sk_lock.slock,
826 af_family_slock_keys + sk->sk_family,
827 af_family_slock_key_strings[sk->sk_family]);
828 lockdep_init_map(&sk->sk_lock.dep_map,
829 af_family_key_strings[sk->sk_family],
830 af_family_keys + sk->sk_family);
831}
832
752/** 833/**
753 * sk_alloc - All socket objects are allocated here 834 * sk_alloc - All socket objects are allocated here
754 * @family: protocol family 835 * @family: protocol family
@@ -848,6 +929,8 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
848 929
849 rwlock_init(&newsk->sk_dst_lock); 930 rwlock_init(&newsk->sk_dst_lock);
850 rwlock_init(&newsk->sk_callback_lock); 931 rwlock_init(&newsk->sk_callback_lock);
932 lockdep_set_class(&newsk->sk_callback_lock,
933 af_callback_keys + newsk->sk_family);
851 934
852 newsk->sk_dst_cache = NULL; 935 newsk->sk_dst_cache = NULL;
853 newsk->sk_wmem_queued = 0; 936 newsk->sk_wmem_queued = 0;
@@ -1422,6 +1505,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
1422 1505
1423 rwlock_init(&sk->sk_dst_lock); 1506 rwlock_init(&sk->sk_dst_lock);
1424 rwlock_init(&sk->sk_callback_lock); 1507 rwlock_init(&sk->sk_callback_lock);
1508 lockdep_set_class(&sk->sk_callback_lock,
1509 af_callback_keys + sk->sk_family);
1425 1510
1426 sk->sk_state_change = sock_def_wakeup; 1511 sk->sk_state_change = sock_def_wakeup;
1427 sk->sk_data_ready = sock_def_readable; 1512 sk->sk_data_ready = sock_def_readable;
@@ -1449,24 +1534,34 @@ void sock_init_data(struct socket *sock, struct sock *sk)
1449void fastcall lock_sock(struct sock *sk) 1534void fastcall lock_sock(struct sock *sk)
1450{ 1535{
1451 might_sleep(); 1536 might_sleep();
1452 spin_lock_bh(&(sk->sk_lock.slock)); 1537 spin_lock_bh(&sk->sk_lock.slock);
1453 if (sk->sk_lock.owner) 1538 if (sk->sk_lock.owner)
1454 __lock_sock(sk); 1539 __lock_sock(sk);
1455 sk->sk_lock.owner = (void *)1; 1540 sk->sk_lock.owner = (void *)1;
1456 spin_unlock_bh(&(sk->sk_lock.slock)); 1541 spin_unlock(&sk->sk_lock.slock);
1542 /*
1543 * The sk_lock has mutex_lock() semantics here:
1544 */
1545 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
1546 local_bh_enable();
1457} 1547}
1458 1548
1459EXPORT_SYMBOL(lock_sock); 1549EXPORT_SYMBOL(lock_sock);
1460 1550
1461void fastcall release_sock(struct sock *sk) 1551void fastcall release_sock(struct sock *sk)
1462{ 1552{
1463 spin_lock_bh(&(sk->sk_lock.slock)); 1553 /*
1554 * The sk_lock has mutex_unlock() semantics:
1555 */
1556 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
1557
1558 spin_lock_bh(&sk->sk_lock.slock);
1464 if (sk->sk_backlog.tail) 1559 if (sk->sk_backlog.tail)
1465 __release_sock(sk); 1560 __release_sock(sk);
1466 sk->sk_lock.owner = NULL; 1561 sk->sk_lock.owner = NULL;
1467 if (waitqueue_active(&(sk->sk_lock.wq))) 1562 if (waitqueue_active(&sk->sk_lock.wq))
1468 wake_up(&(sk->sk_lock.wq)); 1563 wake_up(&sk->sk_lock.wq);
1469 spin_unlock_bh(&(sk->sk_lock.slock)); 1564 spin_unlock_bh(&sk->sk_lock.slock);
1470} 1565}
1471EXPORT_SYMBOL(release_sock); 1566EXPORT_SYMBOL(release_sock);
1472 1567
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index da44fabf4dc5..2dc6dbb28467 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -205,21 +205,27 @@ __u8 ip_tos2prio[16] = {
205struct rt_hash_bucket { 205struct rt_hash_bucket {
206 struct rtable *chain; 206 struct rtable *chain;
207}; 207};
208#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) 208#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
209 defined(CONFIG_PROVE_LOCKING)
209/* 210/*
210 * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks 211 * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks
211 * The size of this table is a power of two and depends on the number of CPUS. 212 * The size of this table is a power of two and depends on the number of CPUS.
213 * (on lockdep we have a quite big spinlock_t, so keep the size down there)
212 */ 214 */
213#if NR_CPUS >= 32 215#ifdef CONFIG_LOCKDEP
214#define RT_HASH_LOCK_SZ 4096 216# define RT_HASH_LOCK_SZ 256
215#elif NR_CPUS >= 16
216#define RT_HASH_LOCK_SZ 2048
217#elif NR_CPUS >= 8
218#define RT_HASH_LOCK_SZ 1024
219#elif NR_CPUS >= 4
220#define RT_HASH_LOCK_SZ 512
221#else 217#else
222#define RT_HASH_LOCK_SZ 256 218# if NR_CPUS >= 32
219# define RT_HASH_LOCK_SZ 4096
220# elif NR_CPUS >= 16
221# define RT_HASH_LOCK_SZ 2048
222# elif NR_CPUS >= 8
223# define RT_HASH_LOCK_SZ 1024
224# elif NR_CPUS >= 4
225# define RT_HASH_LOCK_SZ 512
226# else
227# define RT_HASH_LOCK_SZ 256
228# endif
223#endif 229#endif
224 230
225static spinlock_t *rt_hash_locks; 231static spinlock_t *rt_hash_locks;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8355b729fa95..5a886e6efbbe 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -90,7 +90,7 @@ static struct socket *tcp_socket;
90void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); 90void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
91 91
92struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { 92struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
93 .lhash_lock = RW_LOCK_UNLOCKED, 93 .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
94 .lhash_users = ATOMIC_INIT(0), 94 .lhash_users = ATOMIC_INIT(0),
95 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), 95 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
96}; 96};
@@ -1090,7 +1090,7 @@ process:
1090 1090
1091 skb->dev = NULL; 1091 skb->dev = NULL;
1092 1092
1093 bh_lock_sock(sk); 1093 bh_lock_sock_nested(sk);
1094 ret = 0; 1094 ret = 0;
1095 if (!sock_owned_by_user(sk)) { 1095 if (!sock_owned_by_user(sk)) {
1096#ifdef CONFIG_NET_DMA 1096#ifdef CONFIG_NET_DMA
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e0851697ad5e..0ccb7cb22b15 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -40,7 +40,7 @@ int sysctl_tcp_abort_on_overflow;
40struct inet_timewait_death_row tcp_death_row = { 40struct inet_timewait_death_row tcp_death_row = {
41 .sysctl_max_tw_buckets = NR_FILE * 2, 41 .sysctl_max_tw_buckets = NR_FILE * 2,
42 .period = TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS, 42 .period = TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
43 .death_lock = SPIN_LOCK_UNLOCKED, 43 .death_lock = __SPIN_LOCK_UNLOCKED(tcp_death_row.death_lock),
44 .hashinfo = &tcp_hashinfo, 44 .hashinfo = &tcp_hashinfo,
45 .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0, 45 .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0,
46 (unsigned long)&tcp_death_row), 46 (unsigned long)&tcp_death_row),
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 70cee82a98bf..55c0adc8f115 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -156,7 +156,7 @@ static void netlink_sock_destruct(struct sock *sk)
156 156
157static void netlink_table_grab(void) 157static void netlink_table_grab(void)
158{ 158{
159 write_lock_bh(&nl_table_lock); 159 write_lock_irq(&nl_table_lock);
160 160
161 if (atomic_read(&nl_table_users)) { 161 if (atomic_read(&nl_table_users)) {
162 DECLARE_WAITQUEUE(wait, current); 162 DECLARE_WAITQUEUE(wait, current);
@@ -166,9 +166,9 @@ static void netlink_table_grab(void)
166 set_current_state(TASK_UNINTERRUPTIBLE); 166 set_current_state(TASK_UNINTERRUPTIBLE);
167 if (atomic_read(&nl_table_users) == 0) 167 if (atomic_read(&nl_table_users) == 0)
168 break; 168 break;
169 write_unlock_bh(&nl_table_lock); 169 write_unlock_irq(&nl_table_lock);
170 schedule(); 170 schedule();
171 write_lock_bh(&nl_table_lock); 171 write_lock_irq(&nl_table_lock);
172 } 172 }
173 173
174 __set_current_state(TASK_RUNNING); 174 __set_current_state(TASK_RUNNING);
@@ -178,7 +178,7 @@ static void netlink_table_grab(void)
178 178
179static __inline__ void netlink_table_ungrab(void) 179static __inline__ void netlink_table_ungrab(void)
180{ 180{
181 write_unlock_bh(&nl_table_lock); 181 write_unlock_irq(&nl_table_lock);
182 wake_up(&nl_table_wait); 182 wake_up(&nl_table_wait);
183} 183}
184 184
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 6db6006616c6..dc6cb93c8830 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -515,7 +515,7 @@ rpc_depopulate(struct dentry *parent)
515 struct dentry *dentry, *dvec[10]; 515 struct dentry *dentry, *dvec[10];
516 int n = 0; 516 int n = 0;
517 517
518 mutex_lock(&dir->i_mutex); 518 mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD);
519repeat: 519repeat:
520 spin_lock(&dcache_lock); 520 spin_lock(&dcache_lock);
521 list_for_each_safe(pos, next, &parent->d_subdirs) { 521 list_for_each_safe(pos, next, &parent->d_subdirs) {
@@ -631,7 +631,7 @@ rpc_lookup_negative(char *path, struct nameidata *nd)
631 if ((error = rpc_lookup_parent(path, nd)) != 0) 631 if ((error = rpc_lookup_parent(path, nd)) != 0)
632 return ERR_PTR(error); 632 return ERR_PTR(error);
633 dir = nd->dentry->d_inode; 633 dir = nd->dentry->d_inode;
634 mutex_lock(&dir->i_mutex); 634 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
635 dentry = lookup_one_len(nd->last.name, nd->dentry, nd->last.len); 635 dentry = lookup_one_len(nd->last.name, nd->dentry, nd->last.len);
636 if (IS_ERR(dentry)) 636 if (IS_ERR(dentry))
637 goto out_err; 637 goto out_err;
@@ -693,7 +693,7 @@ rpc_rmdir(char *path)
693 if ((error = rpc_lookup_parent(path, &nd)) != 0) 693 if ((error = rpc_lookup_parent(path, &nd)) != 0)
694 return error; 694 return error;
695 dir = nd.dentry->d_inode; 695 dir = nd.dentry->d_inode;
696 mutex_lock(&dir->i_mutex); 696 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
697 dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len); 697 dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len);
698 if (IS_ERR(dentry)) { 698 if (IS_ERR(dentry)) {
699 error = PTR_ERR(dentry); 699 error = PTR_ERR(dentry);
@@ -754,7 +754,7 @@ rpc_unlink(char *path)
754 if ((error = rpc_lookup_parent(path, &nd)) != 0) 754 if ((error = rpc_lookup_parent(path, &nd)) != 0)
755 return error; 755 return error;
756 dir = nd.dentry->d_inode; 756 dir = nd.dentry->d_inode;
757 mutex_lock(&dir->i_mutex); 757 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
758 dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len); 758 dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len);
759 if (IS_ERR(dentry)) { 759 if (IS_ERR(dentry)) {
760 error = PTR_ERR(dentry); 760 error = PTR_ERR(dentry);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index aca650109425..e9a287bc3142 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -565,6 +565,14 @@ static struct proto unix_proto = {
565 .obj_size = sizeof(struct unix_sock), 565 .obj_size = sizeof(struct unix_sock),
566}; 566};
567 567
568/*
569 * AF_UNIX sockets do not interact with hardware, hence they
570 * dont trigger interrupts - so it's safe for them to have
571 * bh-unsafe locking for their sk_receive_queue.lock. Split off
572 * this special lock-class by reinitializing the spinlock key:
573 */
574static struct lock_class_key af_unix_sk_receive_queue_lock_key;
575
568static struct sock * unix_create1(struct socket *sock) 576static struct sock * unix_create1(struct socket *sock)
569{ 577{
570 struct sock *sk = NULL; 578 struct sock *sk = NULL;
@@ -580,6 +588,8 @@ static struct sock * unix_create1(struct socket *sock)
580 atomic_inc(&unix_nr_socks); 588 atomic_inc(&unix_nr_socks);
581 589
582 sock_init_data(sock,sk); 590 sock_init_data(sock,sk);
591 lockdep_set_class(&sk->sk_receive_queue.lock,
592 &af_unix_sk_receive_queue_lock_key);
583 593
584 sk->sk_write_space = unix_write_space; 594 sk->sk_write_space = unix_write_space;
585 sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen; 595 sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen;
@@ -1045,7 +1055,7 @@ restart:
1045 goto out_unlock; 1055 goto out_unlock;
1046 } 1056 }
1047 1057
1048 unix_state_wlock(sk); 1058 unix_state_wlock_nested(sk);
1049 1059
1050 if (sk->sk_state != st) { 1060 if (sk->sk_state != st) {
1051 unix_state_wunlock(sk); 1061 unix_state_wunlock(sk);
diff --git a/sound/aoa/core/snd-aoa-gpio-feature.c b/sound/aoa/core/snd-aoa-gpio-feature.c
index bab97547a052..7ae0c0bdfad8 100644
--- a/sound/aoa/core/snd-aoa-gpio-feature.c
+++ b/sound/aoa/core/snd-aoa-gpio-feature.c
@@ -112,12 +112,7 @@ static struct device_node *get_gpio(char *name,
112 112
113static void get_irq(struct device_node * np, int *irqptr) 113static void get_irq(struct device_node * np, int *irqptr)
114{ 114{
115 *irqptr = -1; 115 *irqptr = irq_of_parse_and_map(np, 0);
116 if (!np)
117 return;
118 if (np->n_intrs != 1)
119 return;
120 *irqptr = np->intrs[0].line;
121} 116}
122 117
123/* 0x4 is outenable, 0x1 is out, thus 4 or 5 */ 118/* 0x4 is outenable, 0x1 is out, thus 4 or 5 */
diff --git a/sound/aoa/soundbus/i2sbus/i2sbus-core.c b/sound/aoa/soundbus/i2sbus/i2sbus-core.c
index f268dacdaa00..01c0724335a3 100644
--- a/sound/aoa/soundbus/i2sbus/i2sbus-core.c
+++ b/sound/aoa/soundbus/i2sbus/i2sbus-core.c
@@ -129,7 +129,7 @@ static int i2sbus_add_dev(struct macio_dev *macio,
129 if (strncmp(np->name, "i2s-", 4)) 129 if (strncmp(np->name, "i2s-", 4))
130 return 0; 130 return 0;
131 131
132 if (np->n_intrs != 3) 132 if (macio_irq_count(macio) != 3)
133 return 0; 133 return 0;
134 134
135 dev = kzalloc(sizeof(struct i2sbus_dev), GFP_KERNEL); 135 dev = kzalloc(sizeof(struct i2sbus_dev), GFP_KERNEL);
@@ -183,9 +183,10 @@ static int i2sbus_add_dev(struct macio_dev *macio,
183 snprintf(dev->rnames[i], sizeof(dev->rnames[i]), rnames[i], np->name); 183 snprintf(dev->rnames[i], sizeof(dev->rnames[i]), rnames[i], np->name);
184 } 184 }
185 for (i=0;i<3;i++) { 185 for (i=0;i<3;i++) {
186 if (request_irq(np->intrs[i].line, ints[i], 0, dev->rnames[i], dev)) 186 if (request_irq(macio_irq(macio, i), ints[i], 0,
187 dev->rnames[i], dev))
187 goto err; 188 goto err;
188 dev->interrupts[i] = np->intrs[i].line; 189 dev->interrupts[i] = macio_irq(macio, i);
189 } 190 }
190 191
191 for (i=0;i<3;i++) { 192 for (i=0;i<3;i++) {
diff --git a/sound/core/seq/seq_device.c b/sound/core/seq/seq_device.c
index d812dc886360..4260de90f36f 100644
--- a/sound/core/seq/seq_device.c
+++ b/sound/core/seq/seq_device.c
@@ -380,6 +380,12 @@ static struct ops_list * create_driver(char *id)
380 /* set up driver entry */ 380 /* set up driver entry */
381 strlcpy(ops->id, id, sizeof(ops->id)); 381 strlcpy(ops->id, id, sizeof(ops->id));
382 mutex_init(&ops->reg_mutex); 382 mutex_init(&ops->reg_mutex);
383 /*
384 * The ->reg_mutex locking rules are per-driver, so we create
385 * separate per-driver lock classes:
386 */
387 lockdep_set_class(&ops->reg_mutex, (struct lock_class_key *)id);
388
383 ops->driver = DRIVER_EMPTY; 389 ops->driver = DRIVER_EMPTY;
384 INIT_LIST_HEAD(&ops->dev_list); 390 INIT_LIST_HEAD(&ops->dev_list);
385 /* lock this instance */ 391 /* lock this instance */
diff --git a/sound/core/seq/seq_ports.c b/sound/core/seq/seq_ports.c
index d467b4f0ff2b..8c64b58ff77b 100644
--- a/sound/core/seq/seq_ports.c
+++ b/sound/core/seq/seq_ports.c
@@ -514,7 +514,7 @@ int snd_seq_port_connect(struct snd_seq_client *connector,
514 atomic_set(&subs->ref_count, 2); 514 atomic_set(&subs->ref_count, 2);
515 515
516 down_write(&src->list_mutex); 516 down_write(&src->list_mutex);
517 down_write(&dest->list_mutex); 517 down_write_nested(&dest->list_mutex, SINGLE_DEPTH_NESTING);
518 518
519 exclusive = info->flags & SNDRV_SEQ_PORT_SUBS_EXCLUSIVE ? 1 : 0; 519 exclusive = info->flags & SNDRV_SEQ_PORT_SUBS_EXCLUSIVE ? 1 : 0;
520 err = -EBUSY; 520 err = -EBUSY;
@@ -587,7 +587,7 @@ int snd_seq_port_disconnect(struct snd_seq_client *connector,
587 unsigned long flags; 587 unsigned long flags;
588 588
589 down_write(&src->list_mutex); 589 down_write(&src->list_mutex);
590 down_write(&dest->list_mutex); 590 down_write_nested(&dest->list_mutex, SINGLE_DEPTH_NESTING);
591 591
592 /* look for the connection */ 592 /* look for the connection */
593 list_for_each(p, &src->list_head) { 593 list_for_each(p, &src->list_head) {
diff --git a/sound/oss/dmasound/dmasound_awacs.c b/sound/oss/dmasound/dmasound_awacs.c
index de454ca39226..4359903f4376 100644
--- a/sound/oss/dmasound/dmasound_awacs.c
+++ b/sound/oss/dmasound/dmasound_awacs.c
@@ -374,10 +374,7 @@ setup_audio_gpio(const char *name, const char* compatible, int *gpio_addr, int*
374 *gpio_pol = *pp; 374 *gpio_pol = *pp;
375 else 375 else
376 *gpio_pol = 1; 376 *gpio_pol = 1;
377 if (np->n_intrs > 0) 377 return irq_of_parse_and_map(np, 0);
378 return np->intrs[0].line;
379
380 return 0;
381} 378}
382 379
383static inline void 380static inline void
@@ -2864,14 +2861,13 @@ printk("dmasound_pmac: couldn't find a Codec we can handle\n");
2864 * other info if necessary (early AWACS we want to read chip ids) 2861 * other info if necessary (early AWACS we want to read chip ids)
2865 */ 2862 */
2866 2863
2867 if (of_get_address(io, 2, NULL, NULL) == NULL || io->n_intrs < 3) { 2864 if (of_get_address(io, 2, NULL, NULL) == NULL) {
2868 /* OK - maybe we need to use the 'awacs' node (on earlier 2865 /* OK - maybe we need to use the 'awacs' node (on earlier
2869 * machines). 2866 * machines).
2870 */ 2867 */
2871 if (awacs_node) { 2868 if (awacs_node) {
2872 io = awacs_node ; 2869 io = awacs_node ;
2873 if (of_get_address(io, 2, NULL, NULL) == NULL || 2870 if (of_get_address(io, 2, NULL, NULL) == NULL) {
2874 io->n_intrs < 3) {
2875 printk("dmasound_pmac: can't use %s\n", 2871 printk("dmasound_pmac: can't use %s\n",
2876 io->full_name); 2872 io->full_name);
2877 return -ENODEV; 2873 return -ENODEV;
@@ -2940,9 +2936,9 @@ printk("dmasound_pmac: couldn't find a Codec we can handle\n");
2940 if (awacs_revision == AWACS_SCREAMER && awacs) 2936 if (awacs_revision == AWACS_SCREAMER && awacs)
2941 awacs_recalibrate(); 2937 awacs_recalibrate();
2942 2938
2943 awacs_irq = io->intrs[0].line; 2939 awacs_irq = irq_of_parse_and_map(io, 0);
2944 awacs_tx_irq = io->intrs[1].line; 2940 awacs_tx_irq = irq_of_parse_and_map(io, 1);
2945 awacs_rx_irq = io->intrs[2].line; 2941 awacs_rx_irq = irq_of_parse_and_map(io, 2);
2946 2942
2947 /* Hack for legacy crap that will be killed someday */ 2943 /* Hack for legacy crap that will be killed someday */
2948 awacs_node = io; 2944 awacs_node = io;
diff --git a/sound/ppc/pmac.c b/sound/ppc/pmac.c
index 90db9a1d1e0a..641430631505 100644
--- a/sound/ppc/pmac.c
+++ b/sound/ppc/pmac.c
@@ -1120,6 +1120,7 @@ int __init snd_pmac_new(struct snd_card *card, struct snd_pmac **chip_return)
1120 struct snd_pmac *chip; 1120 struct snd_pmac *chip;
1121 struct device_node *np; 1121 struct device_node *np;
1122 int i, err; 1122 int i, err;
1123 unsigned int irq;
1123 unsigned long ctrl_addr, txdma_addr, rxdma_addr; 1124 unsigned long ctrl_addr, txdma_addr, rxdma_addr;
1124 static struct snd_device_ops ops = { 1125 static struct snd_device_ops ops = {
1125 .dev_free = snd_pmac_dev_free, 1126 .dev_free = snd_pmac_dev_free,
@@ -1153,10 +1154,6 @@ int __init snd_pmac_new(struct snd_card *card, struct snd_pmac **chip_return)
1153 if (chip->is_k2) { 1154 if (chip->is_k2) {
1154 static char *rnames[] = { 1155 static char *rnames[] = {
1155 "Sound Control", "Sound DMA" }; 1156 "Sound Control", "Sound DMA" };
1156 if (np->n_intrs < 3) {
1157 err = -ENODEV;
1158 goto __error;
1159 }
1160 for (i = 0; i < 2; i ++) { 1157 for (i = 0; i < 2; i ++) {
1161 if (of_address_to_resource(np->parent, i, 1158 if (of_address_to_resource(np->parent, i,
1162 &chip->rsrc[i])) { 1159 &chip->rsrc[i])) {
@@ -1185,10 +1182,6 @@ int __init snd_pmac_new(struct snd_card *card, struct snd_pmac **chip_return)
1185 } else { 1182 } else {
1186 static char *rnames[] = { 1183 static char *rnames[] = {
1187 "Sound Control", "Sound Tx DMA", "Sound Rx DMA" }; 1184 "Sound Control", "Sound Tx DMA", "Sound Rx DMA" };
1188 if (np->n_intrs < 3) {
1189 err = -ENODEV;
1190 goto __error;
1191 }
1192 for (i = 0; i < 3; i ++) { 1185 for (i = 0; i < 3; i ++) {
1193 if (of_address_to_resource(np, i, 1186 if (of_address_to_resource(np, i,
1194 &chip->rsrc[i])) { 1187 &chip->rsrc[i])) {
@@ -1220,28 +1213,30 @@ int __init snd_pmac_new(struct snd_card *card, struct snd_pmac **chip_return)
1220 chip->playback.dma = ioremap(txdma_addr, 0x100); 1213 chip->playback.dma = ioremap(txdma_addr, 0x100);
1221 chip->capture.dma = ioremap(rxdma_addr, 0x100); 1214 chip->capture.dma = ioremap(rxdma_addr, 0x100);
1222 if (chip->model <= PMAC_BURGUNDY) { 1215 if (chip->model <= PMAC_BURGUNDY) {
1223 if (request_irq(np->intrs[0].line, snd_pmac_ctrl_intr, 0, 1216 irq = irq_of_parse_and_map(np, 0);
1217 if (request_irq(irq, snd_pmac_ctrl_intr, 0,
1224 "PMac", (void*)chip)) { 1218 "PMac", (void*)chip)) {
1225 snd_printk(KERN_ERR "pmac: unable to grab IRQ %d\n", np->intrs[0].line); 1219 snd_printk(KERN_ERR "pmac: unable to grab IRQ %d\n",
1220 irq);
1226 err = -EBUSY; 1221 err = -EBUSY;
1227 goto __error; 1222 goto __error;
1228 } 1223 }
1229 chip->irq = np->intrs[0].line; 1224 chip->irq = irq;
1230 } 1225 }
1231 if (request_irq(np->intrs[1].line, snd_pmac_tx_intr, 0, 1226 irq = irq_of_parse_and_map(np, 1);
1232 "PMac Output", (void*)chip)) { 1227 if (request_irq(irq, snd_pmac_tx_intr, 0, "PMac Output", (void*)chip)){
1233 snd_printk(KERN_ERR "pmac: unable to grab IRQ %d\n", np->intrs[1].line); 1228 snd_printk(KERN_ERR "pmac: unable to grab IRQ %d\n", irq);
1234 err = -EBUSY; 1229 err = -EBUSY;
1235 goto __error; 1230 goto __error;
1236 } 1231 }
1237 chip->tx_irq = np->intrs[1].line; 1232 chip->tx_irq = irq;
1238 if (request_irq(np->intrs[2].line, snd_pmac_rx_intr, 0, 1233 irq = irq_of_parse_and_map(np, 2);
1239 "PMac Input", (void*)chip)) { 1234 if (request_irq(irq, snd_pmac_rx_intr, 0, "PMac Input", (void*)chip)) {
1240 snd_printk(KERN_ERR "pmac: unable to grab IRQ %d\n", np->intrs[2].line); 1235 snd_printk(KERN_ERR "pmac: unable to grab IRQ %d\n", irq);
1241 err = -EBUSY; 1236 err = -EBUSY;
1242 goto __error; 1237 goto __error;
1243 } 1238 }
1244 chip->rx_irq = np->intrs[2].line; 1239 chip->rx_irq = irq;
1245 1240
1246 snd_pmac_sound_feature(chip, 1); 1241 snd_pmac_sound_feature(chip, 1);
1247 1242
diff --git a/sound/ppc/tumbler.c b/sound/ppc/tumbler.c
index 70e4ebc70260..692c61177678 100644
--- a/sound/ppc/tumbler.c
+++ b/sound/ppc/tumbler.c
@@ -1121,7 +1121,7 @@ static long tumbler_find_device(const char *device, const char *platform,
1121 DBG("(I) GPIO device %s found, offset: %x, active state: %d !\n", 1121 DBG("(I) GPIO device %s found, offset: %x, active state: %d !\n",
1122 device, gp->addr, gp->active_state); 1122 device, gp->addr, gp->active_state);
1123 1123
1124 return (node->n_intrs > 0) ? node->intrs[0].line : 0; 1124 return irq_of_parse_and_map(node, 0);
1125} 1125}
1126 1126
1127/* reset audio */ 1127/* reset audio */
@@ -1264,16 +1264,16 @@ static int __init tumbler_init(struct snd_pmac *chip)
1264 &mix->line_mute, 1); 1264 &mix->line_mute, 1);
1265 irq = tumbler_find_device("headphone-detect", 1265 irq = tumbler_find_device("headphone-detect",
1266 NULL, &mix->hp_detect, 0); 1266 NULL, &mix->hp_detect, 0);
1267 if (irq < 0) 1267 if (irq <= NO_IRQ)
1268 irq = tumbler_find_device("headphone-detect", 1268 irq = tumbler_find_device("headphone-detect",
1269 NULL, &mix->hp_detect, 1); 1269 NULL, &mix->hp_detect, 1);
1270 if (irq < 0) 1270 if (irq <= NO_IRQ)
1271 irq = tumbler_find_device("keywest-gpio15", 1271 irq = tumbler_find_device("keywest-gpio15",
1272 NULL, &mix->hp_detect, 1); 1272 NULL, &mix->hp_detect, 1);
1273 mix->headphone_irq = irq; 1273 mix->headphone_irq = irq;
1274 irq = tumbler_find_device("line-output-detect", 1274 irq = tumbler_find_device("line-output-detect",
1275 NULL, &mix->line_detect, 0); 1275 NULL, &mix->line_detect, 0);
1276 if (irq < 0) 1276 if (irq <= NO_IRQ)
1277 irq = tumbler_find_device("line-output-detect", 1277 irq = tumbler_find_device("line-output-detect",
1278 NULL, &mix->line_detect, 1); 1278 NULL, &mix->line_detect, 1);
1279 mix->lineout_irq = irq; 1279 mix->lineout_irq = irq;
diff --git a/sound/sparc/amd7930.c b/sound/sparc/amd7930.c
index db3e22efd02e..2bd8e40b8541 100644
--- a/sound/sparc/amd7930.c
+++ b/sound/sparc/amd7930.c
@@ -1033,10 +1033,10 @@ static int __init amd7930_attach_common(struct resource *rp, int irq)
1033 1033
1034 strcpy(card->driver, "AMD7930"); 1034 strcpy(card->driver, "AMD7930");
1035 strcpy(card->shortname, "Sun AMD7930"); 1035 strcpy(card->shortname, "Sun AMD7930");
1036 sprintf(card->longname, "%s at 0x%02lx:0x%08lx, irq %d", 1036 sprintf(card->longname, "%s at 0x%02lx:0x%08Lx, irq %d",
1037 card->shortname, 1037 card->shortname,
1038 rp->flags & 0xffL, 1038 rp->flags & 0xffL,
1039 rp->start, 1039 (unsigned long long)rp->start,
1040 irq); 1040 irq);
1041 1041
1042 if ((err = snd_amd7930_create(card, rp, 1042 if ((err = snd_amd7930_create(card, rp,
diff --git a/sound/sparc/cs4231.c b/sound/sparc/cs4231.c
index 5018fcf41df5..9a06c3bd6944 100644
--- a/sound/sparc/cs4231.c
+++ b/sound/sparc/cs4231.c
@@ -2036,7 +2036,7 @@ static int __init cs4231_sbus_attach(struct sbus_dev *sdev)
2036 if (err) 2036 if (err)
2037 return err; 2037 return err;
2038 2038
2039 sprintf(card->longname, "%s at 0x%02lx:0x%016lx, irq %d", 2039 sprintf(card->longname, "%s at 0x%02lx:0x%016Lx, irq %d",
2040 card->shortname, 2040 card->shortname,
2041 rp->flags & 0xffL, 2041 rp->flags & 0xffL,
2042 (unsigned long long)rp->start, 2042 (unsigned long long)rp->start,
diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c
index 59a02a0d9afc..f3ae6e23610e 100644
--- a/sound/sparc/dbri.c
+++ b/sound/sparc/dbri.c
@@ -2645,7 +2645,7 @@ static int __init dbri_attach(int prom_node, struct sbus_dev *sdev)
2645 strcpy(card->driver, "DBRI"); 2645 strcpy(card->driver, "DBRI");
2646 strcpy(card->shortname, "Sun DBRI"); 2646 strcpy(card->shortname, "Sun DBRI");
2647 rp = &sdev->resource[0]; 2647 rp = &sdev->resource[0];
2648 sprintf(card->longname, "%s at 0x%02lx:0x%016lx, irq %d", 2648 sprintf(card->longname, "%s at 0x%02lx:0x%016Lx, irq %d",
2649 card->shortname, 2649 card->shortname,
2650 rp->flags & 0xffL, (unsigned long long)rp->start, irq.pri); 2650 rp->flags & 0xffL, (unsigned long long)rp->start, irq.pri);
2651 2651