aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/irqflags-tracing.txt57
-rw-r--r--Documentation/kernel-parameters.txt9
-rw-r--r--Documentation/lockdep-design.txt197
-rw-r--r--Documentation/sysctl/vm.txt14
-rw-r--r--MAINTAINERS10
-rw-r--r--arch/alpha/kernel/process.c2
-rw-r--r--arch/arm/common/time-acorn.c2
-rw-r--r--arch/arm/mach-aaec2000/core.c2
-rw-r--r--arch/arm/mach-at91rm9200/at91rm9200_time.c2
-rw-r--r--arch/arm/mach-clps711x/time.c2
-rw-r--r--arch/arm/mach-clps7500/core.c2
-rw-r--r--arch/arm/mach-ebsa110/core.c2
-rw-r--r--arch/arm/mach-ep93xx/core.c2
-rw-r--r--arch/arm/mach-footbridge/dc21285-timer.c2
-rw-r--r--arch/arm/mach-footbridge/dc21285.c10
-rw-r--r--arch/arm/mach-footbridge/isa-timer.c2
-rw-r--r--arch/arm/mach-h720x/cpu-h7201.c2
-rw-r--r--arch/arm/mach-h720x/cpu-h7202.c2
-rw-r--r--arch/arm/mach-imx/time.c2
-rw-r--r--arch/arm/mach-integrator/core.c2
-rw-r--r--arch/arm/mach-integrator/time.c2
-rw-r--r--arch/arm/mach-iop3xx/iop321-time.c2
-rw-r--r--arch/arm/mach-iop3xx/iop331-time.c2
-rw-r--r--arch/arm/mach-ixp2000/core.c2
-rw-r--r--arch/arm/mach-ixp23xx/core.c2
-rw-r--r--arch/arm/mach-ixp4xx/common.c2
-rw-r--r--arch/arm/mach-ixp4xx/nas100d-power.c2
-rw-r--r--arch/arm/mach-ixp4xx/nslu2-power.c4
-rw-r--r--arch/arm/mach-lh7a40x/time.c2
-rw-r--r--arch/arm/mach-netx/time.c2
-rw-r--r--arch/arm/mach-omap1/board-osk.c2
-rw-r--r--arch/arm/mach-omap1/fpga.c2
-rw-r--r--arch/arm/mach-omap1/pm.c2
-rw-r--r--arch/arm/mach-omap1/serial.c2
-rw-r--r--arch/arm/mach-omap1/time.c4
-rw-r--r--arch/arm/mach-omap2/board-apollon.c6
-rw-r--r--arch/arm/mach-omap2/timer-gp.c2
-rw-r--r--arch/arm/mach-pnx4008/time.c2
-rw-r--r--arch/arm/mach-pxa/corgi.c2
-rw-r--r--arch/arm/mach-pxa/lubbock.c2
-rw-r--r--arch/arm/mach-pxa/mainstone.c2
-rw-r--r--arch/arm/mach-pxa/poodle.c2
-rw-r--r--arch/arm/mach-pxa/sharpsl_pm.c8
-rw-r--r--arch/arm/mach-pxa/spitz.c2
-rw-r--r--arch/arm/mach-pxa/time.c2
-rw-r--r--arch/arm/mach-pxa/tosa.c2
-rw-r--r--arch/arm/mach-pxa/trizeps4.c4
-rw-r--r--arch/arm/mach-realview/core.c2
-rw-r--r--arch/arm/mach-rpc/dma.c2
-rw-r--r--arch/arm/mach-s3c2410/dma.c2
-rw-r--r--arch/arm/mach-s3c2410/time.c2
-rw-r--r--arch/arm/mach-s3c2410/usb-simtec.c4
-rw-r--r--arch/arm/mach-sa1100/collie_pm.c4
-rw-r--r--arch/arm/mach-sa1100/dma.c2
-rw-r--r--arch/arm/mach-sa1100/h3600.c2
-rw-r--r--arch/arm/mach-sa1100/time.c2
-rw-r--r--arch/arm/mach-shark/core.c2
-rw-r--r--arch/arm/mach-versatile/core.c2
-rw-r--r--arch/arm/mm/proc-arm720.S2
-rw-r--r--arch/arm/mm/proc-sa110.S2
-rw-r--r--arch/arm/mm/proc-sa1100.S2
-rw-r--r--arch/arm/oprofile/op_model_xscale.c2
-rw-r--r--arch/arm/plat-omap/dma.c2
-rw-r--r--arch/arm/plat-omap/gpio.c15
-rw-r--r--arch/arm/plat-omap/pm.c2
-rw-r--r--arch/arm/plat-omap/timer32k.c2
-rw-r--r--arch/i386/Kconfig8
-rw-r--r--arch/i386/Kconfig.debug13
-rw-r--r--arch/i386/kernel/Makefile1
-rw-r--r--arch/i386/kernel/alternative.c10
-rw-r--r--arch/i386/kernel/entry.S36
-rw-r--r--arch/i386/kernel/irq.c6
-rw-r--r--arch/i386/kernel/nmi.c2
-rw-r--r--arch/i386/kernel/stacktrace.c98
-rw-r--r--arch/i386/kernel/traps.c39
-rw-r--r--arch/ia64/kernel/mca.c10
-rw-r--r--arch/ia64/kernel/smpboot.c2
-rw-r--r--arch/mips/kernel/entry.S2
-rw-r--r--arch/mips/kernel/mips-mt.c6
-rw-r--r--arch/powerpc/kernel/irq.c7
-rw-r--r--arch/s390/Kconfig8
-rw-r--r--arch/s390/Kconfig.debug4
-rw-r--r--arch/s390/Makefile5
-rw-r--r--arch/s390/kernel/Makefile1
-rw-r--r--arch/s390/kernel/entry.S29
-rw-r--r--arch/s390/kernel/entry64.S21
-rw-r--r--arch/s390/kernel/irq.c8
-rw-r--r--arch/s390/kernel/process.c1
-rw-r--r--arch/s390/kernel/stacktrace.c90
-rw-r--r--arch/um/kernel/tt/process_kern.c2
-rw-r--r--arch/um/kernel/um_arch.c2
-rw-r--r--arch/x86_64/Kconfig8
-rw-r--r--arch/x86_64/Kconfig.debug4
-rw-r--r--arch/x86_64/ia32/ia32entry.S19
-rw-r--r--arch/x86_64/kernel/Makefile1
-rw-r--r--arch/x86_64/kernel/entry.S188
-rw-r--r--arch/x86_64/kernel/head64.c5
-rw-r--r--arch/x86_64/kernel/irq.c4
-rw-r--r--arch/x86_64/kernel/nmi.c2
-rw-r--r--arch/x86_64/kernel/process.c2
-rw-r--r--arch/x86_64/kernel/smpboot.c2
-rw-r--r--arch/x86_64/kernel/stacktrace.c221
-rw-r--r--arch/x86_64/kernel/traps.c129
-rw-r--r--arch/x86_64/lib/thunk.S5
-rw-r--r--arch/x86_64/mm/fault.c1
-rw-r--r--block/ll_rw_blk.c2
-rw-r--r--drivers/block/floppy.c42
-rw-r--r--drivers/char/agp/frontend.c2
-rw-r--r--drivers/char/applicom.c2
-rw-r--r--drivers/char/cs5535_gpio.c2
-rw-r--r--drivers/char/ds1286.c2
-rw-r--r--drivers/char/ds1302.c2
-rw-r--r--drivers/char/ds1620.c2
-rw-r--r--drivers/char/dsp56k.c2
-rw-r--r--drivers/char/dtlk.c2
-rw-r--r--drivers/char/efirtc.c2
-rw-r--r--drivers/char/ftape/zftape/zftape-init.c2
-rw-r--r--drivers/char/genrtc.c2
-rw-r--r--drivers/char/hpet.c2
-rw-r--r--drivers/char/hw_random/core.c2
-rw-r--r--drivers/char/i8k.c2
-rw-r--r--drivers/char/ip2/ip2main.c2
-rw-r--r--drivers/char/ip27-rtc.c2
-rw-r--r--drivers/char/ipmi/ipmi_devintf.c2
-rw-r--r--drivers/char/ipmi/ipmi_watchdog.c2
-rw-r--r--drivers/char/istallion.c2
-rw-r--r--drivers/char/ite_gpio.c2
-rw-r--r--drivers/char/lcd.c2
-rw-r--r--drivers/char/lp.c2
-rw-r--r--drivers/char/mem.c18
-rw-r--r--drivers/char/misc.c4
-rw-r--r--drivers/char/mmtimer.c2
-rw-r--r--drivers/char/mwave/mwavedd.c2
-rw-r--r--drivers/char/nvram.c2
-rw-r--r--drivers/char/nwbutton.c2
-rw-r--r--drivers/char/nwflash.c2
-rw-r--r--drivers/char/pc8736x_gpio.c2
-rw-r--r--drivers/char/pcmcia/cm4000_cs.c2
-rw-r--r--drivers/char/pcmcia/cm4040_cs.c2
-rw-r--r--drivers/char/ppdev.c2
-rw-r--r--drivers/char/random.c6
-rw-r--r--drivers/char/raw.c6
-rw-r--r--drivers/char/rio/rio_linux.c2
-rw-r--r--drivers/char/rtc.c4
-rw-r--r--drivers/char/scx200_gpio.c2
-rw-r--r--drivers/char/snsc.c2
-rw-r--r--drivers/char/sonypi.c2
-rw-r--r--drivers/char/stallion.c2
-rw-r--r--drivers/char/sx.c2
-rw-r--r--drivers/char/sysrq.c5
-rw-r--r--drivers/char/tb0219.c2
-rw-r--r--drivers/char/tipar.c2
-rw-r--r--drivers/char/tlclk.c2
-rw-r--r--drivers/char/toshiba.c2
-rw-r--r--drivers/char/tpm/tpm_atmel.c2
-rw-r--r--drivers/char/tpm/tpm_infineon.c2
-rw-r--r--drivers/char/tpm/tpm_nsc.c2
-rw-r--r--drivers/char/tpm/tpm_tis.c2
-rw-r--r--drivers/char/tty_io.c10
-rw-r--r--drivers/char/vc_screen.c2
-rw-r--r--drivers/char/viotape.c2
-rw-r--r--drivers/char/vr41xx_giu.c2
-rw-r--r--drivers/char/vt.c1
-rw-r--r--drivers/char/watchdog/acquirewdt.c2
-rw-r--r--drivers/char/watchdog/advantechwdt.c2
-rw-r--r--drivers/char/watchdog/alim1535_wdt.c2
-rw-r--r--drivers/char/watchdog/alim7101_wdt.c2
-rw-r--r--drivers/char/watchdog/at91_wdt.c2
-rw-r--r--drivers/char/watchdog/booke_wdt.c2
-rw-r--r--drivers/char/watchdog/cpu5wdt.c2
-rw-r--r--drivers/char/watchdog/ep93xx_wdt.c2
-rw-r--r--drivers/char/watchdog/eurotechwdt.c2
-rw-r--r--drivers/char/watchdog/i6300esb.c2
-rw-r--r--drivers/char/watchdog/i8xx_tco.c2
-rw-r--r--drivers/char/watchdog/ib700wdt.c2
-rw-r--r--drivers/char/watchdog/ibmasr.c2
-rw-r--r--drivers/char/watchdog/indydog.c2
-rw-r--r--drivers/char/watchdog/ixp2000_wdt.c2
-rw-r--r--drivers/char/watchdog/ixp4xx_wdt.c2
-rw-r--r--drivers/char/watchdog/machzwd.c2
-rw-r--r--drivers/char/watchdog/mixcomwd.c2
-rw-r--r--drivers/char/watchdog/mpc83xx_wdt.c2
-rw-r--r--drivers/char/watchdog/mpc8xx_wdt.c2
-rw-r--r--drivers/char/watchdog/mpcore_wdt.c2
-rw-r--r--drivers/char/watchdog/mv64x60_wdt.c2
-rw-r--r--drivers/char/watchdog/pcwd.c4
-rw-r--r--drivers/char/watchdog/pcwd_pci.c4
-rw-r--r--drivers/char/watchdog/pcwd_usb.c4
-rw-r--r--drivers/char/watchdog/s3c2410_wdt.c2
-rw-r--r--drivers/char/watchdog/sa1100_wdt.c2
-rw-r--r--drivers/char/watchdog/sbc60xxwdt.c2
-rw-r--r--drivers/char/watchdog/sbc8360.c2
-rw-r--r--drivers/char/watchdog/sbc_epx_c3.c2
-rw-r--r--drivers/char/watchdog/sc1200wdt.c2
-rw-r--r--drivers/char/watchdog/sc520_wdt.c2
-rw-r--r--drivers/char/watchdog/scx200_wdt.c2
-rw-r--r--drivers/char/watchdog/shwdt.c2
-rw-r--r--drivers/char/watchdog/softdog.c2
-rw-r--r--drivers/char/watchdog/w83627hf_wdt.c2
-rw-r--r--drivers/char/watchdog/w83877f_wdt.c2
-rw-r--r--drivers/char/watchdog/w83977f_wdt.c2
-rw-r--r--drivers/char/watchdog/wafer5823wdt.c2
-rw-r--r--drivers/char/watchdog/wdrtas.c4
-rw-r--r--drivers/char/watchdog/wdt.c4
-rw-r--r--drivers/char/watchdog/wdt285.c2
-rw-r--r--drivers/char/watchdog/wdt977.c2
-rw-r--r--drivers/char/watchdog/wdt_pci.c4
-rw-r--r--drivers/ide/ide-floppy.c2
-rw-r--r--drivers/ide/ide-io.c8
-rw-r--r--drivers/ide/ide-taskfile.c2
-rw-r--r--drivers/ieee1394/hosts.c10
-rw-r--r--drivers/input/serio/i8042-sparcio.h2
-rw-r--r--drivers/input/serio/libps2.c2
-rw-r--r--drivers/md/md.c6
-rw-r--r--drivers/mmc/mmc.c2
-rw-r--r--drivers/mmc/sdhci.c55
-rw-r--r--drivers/net/3c59x.c4
-rw-r--r--drivers/net/8390.c4
-rw-r--r--drivers/net/forcedeth.c28
-rw-r--r--drivers/net/wireless/hostap/hostap_hw.c10
-rw-r--r--drivers/pnp/pnpacpi/rsparser.c11
-rw-r--r--drivers/s390/char/sclp.c10
-rw-r--r--drivers/s390/cio/cio.c2
-rw-r--r--drivers/s390/net/qeth_main.c6
-rw-r--r--drivers/s390/s390mach.c3
-rw-r--r--drivers/scsi/libata-core.c2
-rw-r--r--drivers/serial/8250_pnp.c2
-rw-r--r--drivers/serial/serial_core.c11
-rw-r--r--drivers/spi/spi.c2
-rw-r--r--drivers/usb/core/inode.c4
-rw-r--r--drivers/video/Kconfig15
-rw-r--r--drivers/video/Makefile2
-rw-r--r--drivers/video/pnx4008/Makefile7
-rw-r--r--drivers/video/pnx4008/dum.h211
-rw-r--r--drivers/video/pnx4008/fbcommon.h43
-rw-r--r--drivers/video/pnx4008/pnxrgbfb.c213
-rw-r--r--drivers/video/pnx4008/sdum.c872
-rw-r--r--drivers/video/pnx4008/sdum.h139
-rw-r--r--fs/binfmt_elf.c15
-rw-r--r--fs/block_dev.c102
-rw-r--r--fs/dcache.c6
-rw-r--r--fs/direct-io.c6
-rw-r--r--fs/eventpoll.c4
-rw-r--r--fs/ext2/super.c2
-rw-r--r--fs/ext3/super.c2
-rw-r--r--fs/namei.c20
-rw-r--r--fs/ntfs/inode.c33
-rw-r--r--fs/ntfs/super.c31
-rw-r--r--fs/reiserfs/super.c2
-rw-r--r--fs/super.c11
-rw-r--r--fs/ufs/super.c2
-rw-r--r--include/asm-alpha/rwsem.h14
-rw-r--r--include/asm-arm/floppy.h2
-rw-r--r--include/asm-arm/hw_irq.h11
-rw-r--r--include/asm-arm/mach/time.h1
-rw-r--r--include/asm-arm/signal.h6
-rw-r--r--include/asm-generic/mutex-null.h15
-rw-r--r--include/asm-generic/percpu.h2
-rw-r--r--include/asm-i386/irqflags.h127
-rw-r--r--include/asm-i386/rwsem.h42
-rw-r--r--include/asm-i386/spinlock.h12
-rw-r--r--include/asm-i386/system.h20
-rw-r--r--include/asm-ia64/irq.h2
-rw-r--r--include/asm-ia64/percpu.h1
-rw-r--r--include/asm-ia64/rwsem.h18
-rw-r--r--include/asm-ia64/thread_info.h2
-rw-r--r--include/asm-m32r/system.h2
-rw-r--r--include/asm-powerpc/irqflags.h31
-rw-r--r--include/asm-powerpc/percpu.h1
-rw-r--r--include/asm-powerpc/rwsem.h18
-rw-r--r--include/asm-s390/irqflags.h50
-rw-r--r--include/asm-s390/percpu.h1
-rw-r--r--include/asm-s390/rwsem.h31
-rw-r--r--include/asm-s390/semaphore.h3
-rw-r--r--include/asm-s390/system.h32
-rw-r--r--include/asm-sh/rwsem.h18
-rw-r--r--include/asm-sh/system.h2
-rw-r--r--include/asm-sparc64/percpu.h1
-rw-r--r--include/asm-x86_64/irqflags.h141
-rw-r--r--include/asm-x86_64/kdebug.h2
-rw-r--r--include/asm-x86_64/percpu.h2
-rw-r--r--include/asm-x86_64/system.h38
-rw-r--r--include/asm-xtensa/rwsem.h18
-rw-r--r--include/linux/completion.h12
-rw-r--r--include/linux/dcache.h12
-rw-r--r--include/linux/debug_locks.h69
-rw-r--r--include/linux/fs.h38
-rw-r--r--include/linux/hardirq.h27
-rw-r--r--include/linux/hrtimer.h1
-rw-r--r--include/linux/ide.h2
-rw-r--r--include/linux/idr.h2
-rw-r--r--include/linux/init_task.h15
-rw-r--r--include/linux/interrupt.h79
-rw-r--r--include/linux/ioport.h1
-rw-r--r--include/linux/irq.h4
-rw-r--r--include/linux/irqflags.h96
-rw-r--r--include/linux/kallsyms.h23
-rw-r--r--include/linux/lockdep.h353
-rw-r--r--include/linux/mm.h8
-rw-r--r--include/linux/mmzone.h6
-rw-r--r--include/linux/module.h6
-rw-r--r--include/linux/mutex-debug.h18
-rw-r--r--include/linux/mutex.h37
-rw-r--r--include/linux/notifier.h2
-rw-r--r--include/linux/pci_ids.h2
-rw-r--r--include/linux/rtmutex.h10
-rw-r--r--include/linux/rwsem-spinlock.h27
-rw-r--r--include/linux/rwsem.h83
-rw-r--r--include/linux/sched.h86
-rw-r--r--include/linux/seqlock.h12
-rw-r--r--include/linux/skbuff.h3
-rw-r--r--include/linux/spinlock.h63
-rw-r--r--include/linux/spinlock_api_smp.h2
-rw-r--r--include/linux/spinlock_api_up.h1
-rw-r--r--include/linux/spinlock_types.h47
-rw-r--r--include/linux/spinlock_types_up.h9
-rw-r--r--include/linux/spinlock_up.h1
-rw-r--r--include/linux/stacktrace.h20
-rw-r--r--include/linux/swap.h1
-rw-r--r--include/linux/sysctl.h2
-rw-r--r--include/linux/wait.h8
-rw-r--r--include/net/af_unix.h3
-rw-r--r--include/net/sock.h19
-rw-r--r--init/main.c31
-rw-r--r--kernel/Makefile8
-rw-r--r--kernel/capability.c8
-rw-r--r--kernel/exit.c40
-rw-r--r--kernel/fork.c51
-rw-r--r--kernel/futex.c28
-rw-r--r--kernel/hrtimer.c6
-rw-r--r--kernel/irq/handle.c31
-rw-r--r--kernel/irq/manage.c10
-rw-r--r--kernel/kmod.c2
-rw-r--r--kernel/lockdep.c2702
-rw-r--r--kernel/lockdep_internals.h78
-rw-r--r--kernel/lockdep_proc.c345
-rw-r--r--kernel/module.c26
-rw-r--r--kernel/mutex-debug.c399
-rw-r--r--kernel/mutex-debug.h94
-rw-r--r--kernel/mutex.c74
-rw-r--r--kernel/mutex.h19
-rw-r--r--kernel/pid.c6
-rw-r--r--kernel/printk.c23
-rw-r--r--kernel/ptrace.c6
-rw-r--r--kernel/rcupdate.c4
-rw-r--r--kernel/rtmutex-debug.c307
-rw-r--r--kernel/rtmutex-debug.h8
-rw-r--r--kernel/rtmutex-tester.c4
-rw-r--r--kernel/rtmutex.c57
-rw-r--r--kernel/rtmutex.h3
-rw-r--r--kernel/rwsem.c147
-rw-r--r--kernel/sched.c748
-rw-r--r--kernel/softirq.c141
-rw-r--r--kernel/spinlock.c79
-rw-r--r--kernel/stacktrace.c24
-rw-r--r--kernel/sysctl.c11
-rw-r--r--kernel/timer.c13
-rw-r--r--kernel/wait.c4
-rw-r--r--kernel/workqueue.c2
-rw-r--r--lib/Kconfig.debug127
-rw-r--r--lib/Makefile3
-rw-r--r--lib/debug_locks.c45
-rw-r--r--lib/kernel_lock.c7
-rw-r--r--lib/locking-selftest-hardirq.h9
-rw-r--r--lib/locking-selftest-mutex.h11
-rw-r--r--lib/locking-selftest-rlock-hardirq.h2
-rw-r--r--lib/locking-selftest-rlock-softirq.h2
-rw-r--r--lib/locking-selftest-rlock.h14
-rw-r--r--lib/locking-selftest-rsem.h14
-rw-r--r--lib/locking-selftest-softirq.h9
-rw-r--r--lib/locking-selftest-spin-hardirq.h2
-rw-r--r--lib/locking-selftest-spin-softirq.h2
-rw-r--r--lib/locking-selftest-spin.h11
-rw-r--r--lib/locking-selftest-wlock-hardirq.h2
-rw-r--r--lib/locking-selftest-wlock-softirq.h2
-rw-r--r--lib/locking-selftest-wlock.h14
-rw-r--r--lib/locking-selftest-wsem.h14
-rw-r--r--lib/locking-selftest.c1216
-rw-r--r--lib/rwsem-spinlock.c66
-rw-r--r--lib/rwsem.c51
-rw-r--r--lib/spinlock_debug.c98
-rw-r--r--mm/memory.c2
-rw-r--r--mm/mremap.c2
-rw-r--r--mm/oom_kill.c8
-rw-r--r--mm/page_alloc.c22
-rw-r--r--mm/slab.c59
-rw-r--r--mm/swap_state.c2
-rw-r--r--mm/vmalloc.c2
-rw-r--r--mm/vmscan.c27
-rw-r--r--net/8021q/vlan.c11
-rw-r--r--net/core/skbuff.c7
-rw-r--r--net/core/sock.c111
-rw-r--r--net/ipv4/route.c26
-rw-r--r--net/ipv4/tcp_ipv4.c4
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/netlink/af_netlink.c8
-rw-r--r--net/sunrpc/rpc_pipe.c8
-rw-r--r--net/unix/af_unix.c12
-rw-r--r--sound/core/seq/seq_device.c6
-rw-r--r--sound/core/seq/seq_ports.c4
-rw-r--r--sound/sparc/amd7930.c4
-rw-r--r--sound/sparc/cs4231.c2
-rw-r--r--sound/sparc/dbri.c2
403 files changed, 10750 insertions, 2389 deletions
diff --git a/Documentation/irqflags-tracing.txt b/Documentation/irqflags-tracing.txt
new file mode 100644
index 000000000000..6a444877ee0b
--- /dev/null
+++ b/Documentation/irqflags-tracing.txt
@@ -0,0 +1,57 @@
1IRQ-flags state tracing
2
3started by Ingo Molnar <mingo@redhat.com>
4
5the "irq-flags tracing" feature "traces" hardirq and softirq state, in
6that it gives interested subsystems an opportunity to be notified of
7every hardirqs-off/hardirqs-on, softirqs-off/softirqs-on event that
8happens in the kernel.
9
10CONFIG_TRACE_IRQFLAGS_SUPPORT is needed for CONFIG_PROVE_SPIN_LOCKING
11and CONFIG_PROVE_RW_LOCKING to be offered by the generic lock debugging
12code. Otherwise only CONFIG_PROVE_MUTEX_LOCKING and
13CONFIG_PROVE_RWSEM_LOCKING will be offered on an architecture - these
14are locking APIs that are not used in IRQ context. (the one exception
15for rwsems is worked around)
16
17architecture support for this is certainly not in the "trivial"
18category, because lots of lowlevel assembly code deal with irq-flags
19state changes. But an architecture can be irq-flags-tracing enabled in a
20rather straightforward and risk-free manner.
21
22Architectures that want to support this need to do a couple of
23code-organizational changes first:
24
25- move their irq-flags manipulation code from their asm/system.h header
26 to asm/irqflags.h
27
28- rename local_irq_disable()/etc to raw_local_irq_disable()/etc. so that
29 the linux/irqflags.h code can inject callbacks and can construct the
30 real local_irq_disable()/etc APIs.
31
32- add and enable TRACE_IRQFLAGS_SUPPORT in their arch level Kconfig file
33
34and then a couple of functional changes are needed as well to implement
35irq-flags-tracing support:
36
37- in lowlevel entry code add (build-conditional) calls to the
38 trace_hardirqs_off()/trace_hardirqs_on() functions. The lock validator
39 closely guards whether the 'real' irq-flags matches the 'virtual'
40 irq-flags state, and complains loudly (and turns itself off) if the
41 two do not match. Usually most of the time for arch support for
42 irq-flags-tracing is spent in this state: look at the lockdep
43 complaint, try to figure out the assembly code we did not cover yet,
44 fix and repeat. Once the system has booted up and works without a
45 lockdep complaint in the irq-flags-tracing functions arch support is
46 complete.
47- if the architecture has non-maskable interrupts then those need to be
48 excluded from the irq-tracing [and lock validation] mechanism via
49 lockdep_off()/lockdep_on().
50
51in general there is no risk from having an incomplete irq-flags-tracing
52implementation in an architecture: lockdep will detect that and will
53turn itself off. I.e. the lock validator will still be reliable. There
54should be no crashes due to irq-tracing bugs. (except if the assembly
55changes break other code by modifying conditions or registers that
56shouldnt be)
57
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 86e9282d1c20..149f62ba14a5 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -435,6 +435,15 @@ running once the system is up.
435 435
436 debug [KNL] Enable kernel debugging (events log level). 436 debug [KNL] Enable kernel debugging (events log level).
437 437
438 debug_locks_verbose=
439 [KNL] verbose self-tests
440 Format=<0|1>
441 Print debugging info while doing the locking API
442 self-tests.
443 We default to 0 (no extra messages), setting it to
444 1 will print _a lot_ more information - normally
445 only useful to kernel developers.
446
438 decnet= [HW,NET] 447 decnet= [HW,NET]
439 Format: <area>[,<node>] 448 Format: <area>[,<node>]
440 See also Documentation/networking/decnet.txt. 449 See also Documentation/networking/decnet.txt.
diff --git a/Documentation/lockdep-design.txt b/Documentation/lockdep-design.txt
new file mode 100644
index 000000000000..00d93605bfd3
--- /dev/null
+++ b/Documentation/lockdep-design.txt
@@ -0,0 +1,197 @@
1Runtime locking correctness validator
2=====================================
3
4started by Ingo Molnar <mingo@redhat.com>
5additions by Arjan van de Ven <arjan@linux.intel.com>
6
7Lock-class
8----------
9
10The basic object the validator operates upon is a 'class' of locks.
11
12A class of locks is a group of locks that are logically the same with
13respect to locking rules, even if the locks may have multiple (possibly
14tens of thousands of) instantiations. For example a lock in the inode
15struct is one class, while each inode has its own instantiation of that
16lock class.
17
18The validator tracks the 'state' of lock-classes, and it tracks
19dependencies between different lock-classes. The validator maintains a
20rolling proof that the state and the dependencies are correct.
21
22Unlike an lock instantiation, the lock-class itself never goes away: when
23a lock-class is used for the first time after bootup it gets registered,
24and all subsequent uses of that lock-class will be attached to this
25lock-class.
26
27State
28-----
29
30The validator tracks lock-class usage history into 5 separate state bits:
31
32- 'ever held in hardirq context' [ == hardirq-safe ]
33- 'ever held in softirq context' [ == softirq-safe ]
34- 'ever held with hardirqs enabled' [ == hardirq-unsafe ]
35- 'ever held with softirqs and hardirqs enabled' [ == softirq-unsafe ]
36
37- 'ever used' [ == !unused ]
38
39Single-lock state rules:
40------------------------
41
42A softirq-unsafe lock-class is automatically hardirq-unsafe as well. The
43following states are exclusive, and only one of them is allowed to be
44set for any lock-class:
45
46 <hardirq-safe> and <hardirq-unsafe>
47 <softirq-safe> and <softirq-unsafe>
48
49The validator detects and reports lock usage that violate these
50single-lock state rules.
51
52Multi-lock dependency rules:
53----------------------------
54
55The same lock-class must not be acquired twice, because this could lead
56to lock recursion deadlocks.
57
58Furthermore, two locks may not be taken in different order:
59
60 <L1> -> <L2>
61 <L2> -> <L1>
62
63because this could lead to lock inversion deadlocks. (The validator
64finds such dependencies in arbitrary complexity, i.e. there can be any
65other locking sequence between the acquire-lock operations, the
66validator will still track all dependencies between locks.)
67
68Furthermore, the following usage based lock dependencies are not allowed
69between any two lock-classes:
70
71 <hardirq-safe> -> <hardirq-unsafe>
72 <softirq-safe> -> <softirq-unsafe>
73
74The first rule comes from the fact the a hardirq-safe lock could be
75taken by a hardirq context, interrupting a hardirq-unsafe lock - and
76thus could result in a lock inversion deadlock. Likewise, a softirq-safe
77lock could be taken by an softirq context, interrupting a softirq-unsafe
78lock.
79
80The above rules are enforced for any locking sequence that occurs in the
81kernel: when acquiring a new lock, the validator checks whether there is
82any rule violation between the new lock and any of the held locks.
83
84When a lock-class changes its state, the following aspects of the above
85dependency rules are enforced:
86
87- if a new hardirq-safe lock is discovered, we check whether it
88 took any hardirq-unsafe lock in the past.
89
90- if a new softirq-safe lock is discovered, we check whether it took
91 any softirq-unsafe lock in the past.
92
93- if a new hardirq-unsafe lock is discovered, we check whether any
94 hardirq-safe lock took it in the past.
95
96- if a new softirq-unsafe lock is discovered, we check whether any
97 softirq-safe lock took it in the past.
98
99(Again, we do these checks too on the basis that an interrupt context
100could interrupt _any_ of the irq-unsafe or hardirq-unsafe locks, which
101could lead to a lock inversion deadlock - even if that lock scenario did
102not trigger in practice yet.)
103
104Exception: Nested data dependencies leading to nested locking
105-------------------------------------------------------------
106
107There are a few cases where the Linux kernel acquires more than one
108instance of the same lock-class. Such cases typically happen when there
109is some sort of hierarchy within objects of the same type. In these
110cases there is an inherent "natural" ordering between the two objects
111(defined by the properties of the hierarchy), and the kernel grabs the
112locks in this fixed order on each of the objects.
113
114An example of such an object hieararchy that results in "nested locking"
115is that of a "whole disk" block-dev object and a "partition" block-dev
116object; the partition is "part of" the whole device and as long as one
117always takes the whole disk lock as a higher lock than the partition
118lock, the lock ordering is fully correct. The validator does not
119automatically detect this natural ordering, as the locking rule behind
120the ordering is not static.
121
122In order to teach the validator about this correct usage model, new
123versions of the various locking primitives were added that allow you to
124specify a "nesting level". An example call, for the block device mutex,
125looks like this:
126
127enum bdev_bd_mutex_lock_class
128{
129 BD_MUTEX_NORMAL,
130 BD_MUTEX_WHOLE,
131 BD_MUTEX_PARTITION
132};
133
134 mutex_lock_nested(&bdev->bd_contains->bd_mutex, BD_MUTEX_PARTITION);
135
136In this case the locking is done on a bdev object that is known to be a
137partition.
138
139The validator treats a lock that is taken in such a nested fasion as a
140separate (sub)class for the purposes of validation.
141
142Note: When changing code to use the _nested() primitives, be careful and
143check really thoroughly that the hiearchy is correctly mapped; otherwise
144you can get false positives or false negatives.
145
146Proof of 100% correctness:
147--------------------------
148
149The validator achieves perfect, mathematical 'closure' (proof of locking
150correctness) in the sense that for every simple, standalone single-task
151locking sequence that occured at least once during the lifetime of the
152kernel, the validator proves it with a 100% certainty that no
153combination and timing of these locking sequences can cause any class of
154lock related deadlock. [*]
155
156I.e. complex multi-CPU and multi-task locking scenarios do not have to
157occur in practice to prove a deadlock: only the simple 'component'
158locking chains have to occur at least once (anytime, in any
159task/context) for the validator to be able to prove correctness. (For
160example, complex deadlocks that would normally need more than 3 CPUs and
161a very unlikely constellation of tasks, irq-contexts and timings to
162occur, can be detected on a plain, lightly loaded single-CPU system as
163well!)
164
165This radically decreases the complexity of locking related QA of the
166kernel: what has to be done during QA is to trigger as many "simple"
167single-task locking dependencies in the kernel as possible, at least
168once, to prove locking correctness - instead of having to trigger every
169possible combination of locking interaction between CPUs, combined with
170every possible hardirq and softirq nesting scenario (which is impossible
171to do in practice).
172
173[*] assuming that the validator itself is 100% correct, and no other
174 part of the system corrupts the state of the validator in any way.
175 We also assume that all NMI/SMM paths [which could interrupt
176 even hardirq-disabled codepaths] are correct and do not interfere
177 with the validator. We also assume that the 64-bit 'chain hash'
178 value is unique for every lock-chain in the system. Also, lock
179 recursion must not be higher than 20.
180
181Performance:
182------------
183
184The above rules require _massive_ amounts of runtime checking. If we did
185that for every lock taken and for every irqs-enable event, it would
186render the system practically unusably slow. The complexity of checking
187is O(N^2), so even with just a few hundred lock-classes we'd have to do
188tens of thousands of checks for every event.
189
190This problem is solved by checking any given 'locking scenario' (unique
191sequence of locks taken after each other) only once. A simple stack of
192held locks is maintained, and a lightweight 64-bit hash value is
193calculated, which hash is unique for every lock chain. The hash value,
194when the chain is validated for the first time, is then put into a hash
195table, which hash-table can be checked in a lockfree manner. If the
196locking chain occurs again later on, the hash table tells us that we
197dont have to validate the chain again.
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 86754eb390da..7cee90223d3a 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -28,6 +28,7 @@ Currently, these files are in /proc/sys/vm:
28- block_dump 28- block_dump
29- drop-caches 29- drop-caches
30- zone_reclaim_mode 30- zone_reclaim_mode
31- min_unmapped_ratio
31- panic_on_oom 32- panic_on_oom
32 33
33============================================================== 34==============================================================
@@ -168,6 +169,19 @@ in all nodes of the system.
168 169
169============================================================= 170=============================================================
170 171
172min_unmapped_ratio:
173
174This is available only on NUMA kernels.
175
176A percentage of the file backed pages in each zone. Zone reclaim will only
177occur if more than this percentage of pages are file backed and unmapped.
178This is to insure that a minimal amount of local pages is still available for
179file I/O even if the node is overallocated.
180
181The default is 1 percent.
182
183=============================================================
184
171panic_on_oom 185panic_on_oom
172 186
173This enables or disables panic on out-of-memory feature. If this is set to 1, 187This enables or disables panic on out-of-memory feature. If this is set to 1,
diff --git a/MAINTAINERS b/MAINTAINERS
index 42be131139c8..5f76a4f5cd4b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -861,6 +861,8 @@ S: Maintained
861DOCBOOK FOR DOCUMENTATION 861DOCBOOK FOR DOCUMENTATION
862P: Martin Waitz 862P: Martin Waitz
863M: tali@admingilde.org 863M: tali@admingilde.org
864P: Randy Dunlap
865M: rdunlap@xenotime.net
864T: git http://tali.admingilde.org/git/linux-docbook.git 866T: git http://tali.admingilde.org/git/linux-docbook.git
865S: Maintained 867S: Maintained
866 868
@@ -2298,6 +2300,14 @@ M: promise@pnd-pc.demon.co.uk
2298W: http://www.pnd-pc.demon.co.uk/promise/ 2300W: http://www.pnd-pc.demon.co.uk/promise/
2299S: Maintained 2301S: Maintained
2300 2302
2303PVRUSB2 VIDEO4LINUX DRIVER
2304P: Mike Isely
2305M: isely@pobox.com
2306L: pvrusb2@isely.net
2307L: video4linux-list@redhat.com
2308W: http://www.isely.net/pvrusb2/
2309S: Maintained
2310
2301PXA2xx SUPPORT 2311PXA2xx SUPPORT
2302P: Nicolas Pitre 2312P: Nicolas Pitre
2303M: nico@cam.org 2313M: nico@cam.org
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 01c8c8b23337..41ebf51a107a 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -474,7 +474,7 @@ out:
474 */ 474 */
475 475
476unsigned long 476unsigned long
477thread_saved_pc(task_t *t) 477thread_saved_pc(struct task_struct *t)
478{ 478{
479 unsigned long base = (unsigned long)task_stack_page(t); 479 unsigned long base = (unsigned long)task_stack_page(t);
480 unsigned long fp, sp = task_thread_info(t)->pcb.ksp; 480 unsigned long fp, sp = task_thread_info(t)->pcb.ksp;
diff --git a/arch/arm/common/time-acorn.c b/arch/arm/common/time-acorn.c
index 31b65e2231d9..3f60dd9aca80 100644
--- a/arch/arm/common/time-acorn.c
+++ b/arch/arm/common/time-acorn.c
@@ -77,7 +77,7 @@ ioc_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
77 77
78static struct irqaction ioc_timer_irq = { 78static struct irqaction ioc_timer_irq = {
79 .name = "timer", 79 .name = "timer",
80 .flags = SA_INTERRUPT, 80 .flags = IRQF_DISABLED,
81 .handler = ioc_timer_interrupt 81 .handler = ioc_timer_interrupt
82}; 82};
83 83
diff --git a/arch/arm/mach-aaec2000/core.c b/arch/arm/mach-aaec2000/core.c
index aa01d6753d6e..baa997c857dc 100644
--- a/arch/arm/mach-aaec2000/core.c
+++ b/arch/arm/mach-aaec2000/core.c
@@ -142,7 +142,7 @@ aaec2000_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
142 142
143static struct irqaction aaec2000_timer_irq = { 143static struct irqaction aaec2000_timer_irq = {
144 .name = "AAEC-2000 Timer Tick", 144 .name = "AAEC-2000 Timer Tick",
145 .flags = SA_INTERRUPT | SA_TIMER, 145 .flags = IRQF_DISABLED | IRQF_TIMER,
146 .handler = aaec2000_timer_interrupt, 146 .handler = aaec2000_timer_interrupt,
147}; 147};
148 148
diff --git a/arch/arm/mach-at91rm9200/at91rm9200_time.c b/arch/arm/mach-at91rm9200/at91rm9200_time.c
index 0aa22650a00f..a92a8622c78a 100644
--- a/arch/arm/mach-at91rm9200/at91rm9200_time.c
+++ b/arch/arm/mach-at91rm9200/at91rm9200_time.c
@@ -85,7 +85,7 @@ static irqreturn_t at91rm9200_timer_interrupt(int irq, void *dev_id, struct pt_r
85 85
86static struct irqaction at91rm9200_timer_irq = { 86static struct irqaction at91rm9200_timer_irq = {
87 .name = "at91_tick", 87 .name = "at91_tick",
88 .flags = SA_SHIRQ | SA_INTERRUPT | SA_TIMER, 88 .flags = IRQF_SHARED | IRQF_DISABLED | IRQF_TIMER,
89 .handler = at91rm9200_timer_interrupt 89 .handler = at91rm9200_timer_interrupt
90}; 90};
91 91
diff --git a/arch/arm/mach-clps711x/time.c b/arch/arm/mach-clps711x/time.c
index b0f1db258e80..a071eac4a30a 100644
--- a/arch/arm/mach-clps711x/time.c
+++ b/arch/arm/mach-clps711x/time.c
@@ -58,7 +58,7 @@ p720t_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
58 58
59static struct irqaction clps711x_timer_irq = { 59static struct irqaction clps711x_timer_irq = {
60 .name = "CLPS711x Timer Tick", 60 .name = "CLPS711x Timer Tick",
61 .flags = SA_INTERRUPT | SA_TIMER, 61 .flags = IRQF_DISABLED | IRQF_TIMER,
62 .handler = p720t_timer_interrupt, 62 .handler = p720t_timer_interrupt,
63}; 63};
64 64
diff --git a/arch/arm/mach-clps7500/core.c b/arch/arm/mach-clps7500/core.c
index cd66df896364..92eaebdd5606 100644
--- a/arch/arm/mach-clps7500/core.c
+++ b/arch/arm/mach-clps7500/core.c
@@ -316,7 +316,7 @@ clps7500_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
316 316
317static struct irqaction clps7500_timer_irq = { 317static struct irqaction clps7500_timer_irq = {
318 .name = "CLPS7500 Timer Tick", 318 .name = "CLPS7500 Timer Tick",
319 .flags = SA_INTERRUPT | SA_TIMER, 319 .flags = IRQF_DISABLED | IRQF_TIMER,
320 .handler = clps7500_timer_interrupt, 320 .handler = clps7500_timer_interrupt,
321}; 321};
322 322
diff --git a/arch/arm/mach-ebsa110/core.c b/arch/arm/mach-ebsa110/core.c
index 6d620d8268cc..70dd12ef3c40 100644
--- a/arch/arm/mach-ebsa110/core.c
+++ b/arch/arm/mach-ebsa110/core.c
@@ -199,7 +199,7 @@ ebsa110_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
199 199
200static struct irqaction ebsa110_timer_irq = { 200static struct irqaction ebsa110_timer_irq = {
201 .name = "EBSA110 Timer Tick", 201 .name = "EBSA110 Timer Tick",
202 .flags = SA_INTERRUPT | SA_TIMER, 202 .flags = IRQF_DISABLED | IRQF_TIMER,
203 .handler = ebsa110_timer_interrupt, 203 .handler = ebsa110_timer_interrupt,
204}; 204};
205 205
diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c
index 26df3b666b56..a87a784b9201 100644
--- a/arch/arm/mach-ep93xx/core.c
+++ b/arch/arm/mach-ep93xx/core.c
@@ -116,7 +116,7 @@ static int ep93xx_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
116 116
117static struct irqaction ep93xx_timer_irq = { 117static struct irqaction ep93xx_timer_irq = {
118 .name = "ep93xx timer", 118 .name = "ep93xx timer",
119 .flags = SA_INTERRUPT | SA_TIMER, 119 .flags = IRQF_DISABLED | IRQF_TIMER,
120 .handler = ep93xx_timer_interrupt, 120 .handler = ep93xx_timer_interrupt,
121}; 121};
122 122
diff --git a/arch/arm/mach-footbridge/dc21285-timer.c b/arch/arm/mach-footbridge/dc21285-timer.c
index e668d4acd808..2af610811ca4 100644
--- a/arch/arm/mach-footbridge/dc21285-timer.c
+++ b/arch/arm/mach-footbridge/dc21285-timer.c
@@ -44,7 +44,7 @@ timer1_interrupt(int irq, void *dev_id, struct pt_regs *regs)
44static struct irqaction footbridge_timer_irq = { 44static struct irqaction footbridge_timer_irq = {
45 .name = "Timer1 timer tick", 45 .name = "Timer1 timer tick",
46 .handler = timer1_interrupt, 46 .handler = timer1_interrupt,
47 .flags = SA_INTERRUPT | SA_TIMER, 47 .flags = IRQF_DISABLED | IRQF_TIMER,
48}; 48};
49 49
50/* 50/*
diff --git a/arch/arm/mach-footbridge/dc21285.c b/arch/arm/mach-footbridge/dc21285.c
index 5dace2597838..607ed1f5b3f8 100644
--- a/arch/arm/mach-footbridge/dc21285.c
+++ b/arch/arm/mach-footbridge/dc21285.c
@@ -332,15 +332,15 @@ void __init dc21285_preinit(void)
332 /* 332 /*
333 * We don't care if these fail. 333 * We don't care if these fail.
334 */ 334 */
335 request_irq(IRQ_PCI_SERR, dc21285_serr_irq, SA_INTERRUPT, 335 request_irq(IRQ_PCI_SERR, dc21285_serr_irq, IRQF_DISABLED,
336 "PCI system error", &serr_timer); 336 "PCI system error", &serr_timer);
337 request_irq(IRQ_PCI_PERR, dc21285_parity_irq, SA_INTERRUPT, 337 request_irq(IRQ_PCI_PERR, dc21285_parity_irq, IRQF_DISABLED,
338 "PCI parity error", &perr_timer); 338 "PCI parity error", &perr_timer);
339 request_irq(IRQ_PCI_ABORT, dc21285_abort_irq, SA_INTERRUPT, 339 request_irq(IRQ_PCI_ABORT, dc21285_abort_irq, IRQF_DISABLED,
340 "PCI abort", NULL); 340 "PCI abort", NULL);
341 request_irq(IRQ_DISCARD_TIMER, dc21285_discard_irq, SA_INTERRUPT, 341 request_irq(IRQ_DISCARD_TIMER, dc21285_discard_irq, IRQF_DISABLED,
342 "Discard timer", NULL); 342 "Discard timer", NULL);
343 request_irq(IRQ_PCI_DPERR, dc21285_dparity_irq, SA_INTERRUPT, 343 request_irq(IRQ_PCI_DPERR, dc21285_dparity_irq, IRQF_DISABLED,
344 "PCI data parity", NULL); 344 "PCI data parity", NULL);
345 345
346 if (cfn_mode) { 346 if (cfn_mode) {
diff --git a/arch/arm/mach-footbridge/isa-timer.c b/arch/arm/mach-footbridge/isa-timer.c
index 282b473c21f2..c4810a40c8e1 100644
--- a/arch/arm/mach-footbridge/isa-timer.c
+++ b/arch/arm/mach-footbridge/isa-timer.c
@@ -73,7 +73,7 @@ isa_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
73static struct irqaction isa_timer_irq = { 73static struct irqaction isa_timer_irq = {
74 .name = "ISA timer tick", 74 .name = "ISA timer tick",
75 .handler = isa_timer_interrupt, 75 .handler = isa_timer_interrupt,
76 .flags = SA_INTERRUPT | SA_TIMER, 76 .flags = IRQF_DISABLED | IRQF_TIMER,
77}; 77};
78 78
79static void __init isa_timer_init(void) 79static void __init isa_timer_init(void)
diff --git a/arch/arm/mach-h720x/cpu-h7201.c b/arch/arm/mach-h720x/cpu-h7201.c
index af9e4a5d5ea7..a9a8255a3a03 100644
--- a/arch/arm/mach-h720x/cpu-h7201.c
+++ b/arch/arm/mach-h720x/cpu-h7201.c
@@ -41,7 +41,7 @@ h7201_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
41 41
42static struct irqaction h7201_timer_irq = { 42static struct irqaction h7201_timer_irq = {
43 .name = "h7201 Timer Tick", 43 .name = "h7201 Timer Tick",
44 .flags = SA_INTERRUPT | SA_TIMER, 44 .flags = IRQF_DISABLED | IRQF_TIMER,
45 .handler = h7201_timer_interrupt, 45 .handler = h7201_timer_interrupt,
46}; 46};
47 47
diff --git a/arch/arm/mach-h720x/cpu-h7202.c b/arch/arm/mach-h720x/cpu-h7202.c
index a4a7c0125d03..da678d163fd9 100644
--- a/arch/arm/mach-h720x/cpu-h7202.c
+++ b/arch/arm/mach-h720x/cpu-h7202.c
@@ -171,7 +171,7 @@ static struct irqchip h7202_timerx_chip = {
171 171
172static struct irqaction h7202_timer_irq = { 172static struct irqaction h7202_timer_irq = {
173 .name = "h7202 Timer Tick", 173 .name = "h7202 Timer Tick",
174 .flags = SA_INTERRUPT | SA_TIMER, 174 .flags = IRQF_DISABLED | IRQF_TIMER,
175 .handler = h7202_timer_interrupt, 175 .handler = h7202_timer_interrupt,
176}; 176};
177 177
diff --git a/arch/arm/mach-imx/time.c b/arch/arm/mach-imx/time.c
index 5f9a04775a47..6ed7523c65bb 100644
--- a/arch/arm/mach-imx/time.c
+++ b/arch/arm/mach-imx/time.c
@@ -72,7 +72,7 @@ imx_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
72 72
73static struct irqaction imx_timer_irq = { 73static struct irqaction imx_timer_irq = {
74 .name = "i.MX Timer Tick", 74 .name = "i.MX Timer Tick",
75 .flags = SA_INTERRUPT | SA_TIMER, 75 .flags = IRQF_DISABLED | IRQF_TIMER,
76 .handler = imx_timer_interrupt, 76 .handler = imx_timer_interrupt,
77}; 77};
78 78
diff --git a/arch/arm/mach-integrator/core.c b/arch/arm/mach-integrator/core.c
index 2d7e505e748f..42021fdfa0c6 100644
--- a/arch/arm/mach-integrator/core.c
+++ b/arch/arm/mach-integrator/core.c
@@ -282,7 +282,7 @@ integrator_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
282 282
283static struct irqaction integrator_timer_irq = { 283static struct irqaction integrator_timer_irq = {
284 .name = "Integrator Timer Tick", 284 .name = "Integrator Timer Tick",
285 .flags = SA_INTERRUPT | SA_TIMER, 285 .flags = IRQF_DISABLED | IRQF_TIMER,
286 .handler = integrator_timer_interrupt, 286 .handler = integrator_timer_interrupt,
287}; 287};
288 288
diff --git a/arch/arm/mach-integrator/time.c b/arch/arm/mach-integrator/time.c
index bc07f52a6fd7..ee49cf790dab 100644
--- a/arch/arm/mach-integrator/time.c
+++ b/arch/arm/mach-integrator/time.c
@@ -125,7 +125,7 @@ static int rtc_probe(struct amba_device *dev, void *id)
125 125
126 xtime.tv_sec = __raw_readl(rtc_base + RTC_DR); 126 xtime.tv_sec = __raw_readl(rtc_base + RTC_DR);
127 127
128 ret = request_irq(dev->irq[0], arm_rtc_interrupt, SA_INTERRUPT, 128 ret = request_irq(dev->irq[0], arm_rtc_interrupt, IRQF_DISABLED,
129 "rtc-pl030", dev); 129 "rtc-pl030", dev);
130 if (ret) 130 if (ret)
131 goto map_out; 131 goto map_out;
diff --git a/arch/arm/mach-iop3xx/iop321-time.c b/arch/arm/mach-iop3xx/iop321-time.c
index d67ac0e5d438..04b1a6f7ebae 100644
--- a/arch/arm/mach-iop3xx/iop321-time.c
+++ b/arch/arm/mach-iop3xx/iop321-time.c
@@ -85,7 +85,7 @@ iop321_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
85static struct irqaction iop321_timer_irq = { 85static struct irqaction iop321_timer_irq = {
86 .name = "IOP321 Timer Tick", 86 .name = "IOP321 Timer Tick",
87 .handler = iop321_timer_interrupt, 87 .handler = iop321_timer_interrupt,
88 .flags = SA_INTERRUPT | SA_TIMER, 88 .flags = IRQF_DISABLED | IRQF_TIMER,
89}; 89};
90 90
91static void __init iop321_timer_init(void) 91static void __init iop321_timer_init(void)
diff --git a/arch/arm/mach-iop3xx/iop331-time.c b/arch/arm/mach-iop3xx/iop331-time.c
index 3c1f0ebbd636..0c09e74c5740 100644
--- a/arch/arm/mach-iop3xx/iop331-time.c
+++ b/arch/arm/mach-iop3xx/iop331-time.c
@@ -82,7 +82,7 @@ iop331_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
82static struct irqaction iop331_timer_irq = { 82static struct irqaction iop331_timer_irq = {
83 .name = "IOP331 Timer Tick", 83 .name = "IOP331 Timer Tick",
84 .handler = iop331_timer_interrupt, 84 .handler = iop331_timer_interrupt,
85 .flags = SA_INTERRUPT | SA_TIMER, 85 .flags = IRQF_DISABLED | IRQF_TIMER,
86}; 86};
87 87
88static void __init iop331_timer_init(void) 88static void __init iop331_timer_init(void)
diff --git a/arch/arm/mach-ixp2000/core.c b/arch/arm/mach-ixp2000/core.c
index b31f31ac937b..7f91f689a041 100644
--- a/arch/arm/mach-ixp2000/core.c
+++ b/arch/arm/mach-ixp2000/core.c
@@ -224,7 +224,7 @@ static int ixp2000_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
224 224
225static struct irqaction ixp2000_timer_irq = { 225static struct irqaction ixp2000_timer_irq = {
226 .name = "IXP2000 Timer Tick", 226 .name = "IXP2000 Timer Tick",
227 .flags = SA_INTERRUPT | SA_TIMER, 227 .flags = IRQF_DISABLED | IRQF_TIMER,
228 .handler = ixp2000_timer_interrupt, 228 .handler = ixp2000_timer_interrupt,
229}; 229};
230 230
diff --git a/arch/arm/mach-ixp23xx/core.c b/arch/arm/mach-ixp23xx/core.c
index 7c218aecf443..566a07821c77 100644
--- a/arch/arm/mach-ixp23xx/core.c
+++ b/arch/arm/mach-ixp23xx/core.c
@@ -363,7 +363,7 @@ ixp23xx_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
363static struct irqaction ixp23xx_timer_irq = { 363static struct irqaction ixp23xx_timer_irq = {
364 .name = "IXP23xx Timer Tick", 364 .name = "IXP23xx Timer Tick",
365 .handler = ixp23xx_timer_interrupt, 365 .handler = ixp23xx_timer_interrupt,
366 .flags = SA_INTERRUPT | SA_TIMER, 366 .flags = IRQF_DISABLED | IRQF_TIMER,
367}; 367};
368 368
369void __init ixp23xx_init_timer(void) 369void __init ixp23xx_init_timer(void)
diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c
index 13c7c629d037..7c25dbd5a181 100644
--- a/arch/arm/mach-ixp4xx/common.c
+++ b/arch/arm/mach-ixp4xx/common.c
@@ -287,7 +287,7 @@ static irqreturn_t ixp4xx_timer_interrupt(int irq, void *dev_id, struct pt_regs
287 287
288static struct irqaction ixp4xx_timer_irq = { 288static struct irqaction ixp4xx_timer_irq = {
289 .name = "IXP4xx Timer Tick", 289 .name = "IXP4xx Timer Tick",
290 .flags = SA_INTERRUPT | SA_TIMER, 290 .flags = IRQF_DISABLED | IRQF_TIMER,
291 .handler = ixp4xx_timer_interrupt, 291 .handler = ixp4xx_timer_interrupt,
292}; 292};
293 293
diff --git a/arch/arm/mach-ixp4xx/nas100d-power.c b/arch/arm/mach-ixp4xx/nas100d-power.c
index f58a1d05a02e..81ffcae1f56e 100644
--- a/arch/arm/mach-ixp4xx/nas100d-power.c
+++ b/arch/arm/mach-ixp4xx/nas100d-power.c
@@ -42,7 +42,7 @@ static int __init nas100d_power_init(void)
42 set_irq_type(NAS100D_RB_IRQ, IRQT_LOW); 42 set_irq_type(NAS100D_RB_IRQ, IRQT_LOW);
43 43
44 if (request_irq(NAS100D_RB_IRQ, &nas100d_reset_handler, 44 if (request_irq(NAS100D_RB_IRQ, &nas100d_reset_handler,
45 SA_INTERRUPT, "NAS100D reset button", NULL) < 0) { 45 IRQF_DISABLED, "NAS100D reset button", NULL) < 0) {
46 46
47 printk(KERN_DEBUG "Reset Button IRQ %d not available\n", 47 printk(KERN_DEBUG "Reset Button IRQ %d not available\n",
48 NAS100D_RB_IRQ); 48 NAS100D_RB_IRQ);
diff --git a/arch/arm/mach-ixp4xx/nslu2-power.c b/arch/arm/mach-ixp4xx/nslu2-power.c
index 6d38e97142cc..e2a2230b69f0 100644
--- a/arch/arm/mach-ixp4xx/nslu2-power.c
+++ b/arch/arm/mach-ixp4xx/nslu2-power.c
@@ -54,7 +54,7 @@ static int __init nslu2_power_init(void)
54 set_irq_type(NSLU2_PB_IRQ, IRQT_HIGH); 54 set_irq_type(NSLU2_PB_IRQ, IRQT_HIGH);
55 55
56 if (request_irq(NSLU2_RB_IRQ, &nslu2_reset_handler, 56 if (request_irq(NSLU2_RB_IRQ, &nslu2_reset_handler,
57 SA_INTERRUPT, "NSLU2 reset button", NULL) < 0) { 57 IRQF_DISABLED, "NSLU2 reset button", NULL) < 0) {
58 58
59 printk(KERN_DEBUG "Reset Button IRQ %d not available\n", 59 printk(KERN_DEBUG "Reset Button IRQ %d not available\n",
60 NSLU2_RB_IRQ); 60 NSLU2_RB_IRQ);
@@ -63,7 +63,7 @@ static int __init nslu2_power_init(void)
63 } 63 }
64 64
65 if (request_irq(NSLU2_PB_IRQ, &nslu2_power_handler, 65 if (request_irq(NSLU2_PB_IRQ, &nslu2_power_handler,
66 SA_INTERRUPT, "NSLU2 power button", NULL) < 0) { 66 IRQF_DISABLED, "NSLU2 power button", NULL) < 0) {
67 67
68 printk(KERN_DEBUG "Power Button IRQ %d not available\n", 68 printk(KERN_DEBUG "Power Button IRQ %d not available\n",
69 NSLU2_PB_IRQ); 69 NSLU2_PB_IRQ);
diff --git a/arch/arm/mach-lh7a40x/time.c b/arch/arm/mach-lh7a40x/time.c
index 4d26c9f62c71..ad5652e01507 100644
--- a/arch/arm/mach-lh7a40x/time.c
+++ b/arch/arm/mach-lh7a40x/time.c
@@ -53,7 +53,7 @@ lh7a40x_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
53 53
54static struct irqaction lh7a40x_timer_irq = { 54static struct irqaction lh7a40x_timer_irq = {
55 .name = "LHA740x Timer Tick", 55 .name = "LHA740x Timer Tick",
56 .flags = SA_INTERRUPT | SA_TIMER, 56 .flags = IRQF_DISABLED | IRQF_TIMER,
57 .handler = lh7a40x_timer_interrupt, 57 .handler = lh7a40x_timer_interrupt,
58}; 58};
59 59
diff --git a/arch/arm/mach-netx/time.c b/arch/arm/mach-netx/time.c
index edfbdf40c600..6d72c81b7d9f 100644
--- a/arch/arm/mach-netx/time.c
+++ b/arch/arm/mach-netx/time.c
@@ -54,7 +54,7 @@ netx_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
54 54
55static struct irqaction netx_timer_irq = { 55static struct irqaction netx_timer_irq = {
56 .name = "NetX Timer Tick", 56 .name = "NetX Timer Tick",
57 .flags = SA_INTERRUPT | SA_TIMER, 57 .flags = IRQF_DISABLED | IRQF_TIMER,
58 .handler = netx_timer_interrupt, 58 .handler = netx_timer_interrupt,
59}; 59};
60 60
diff --git a/arch/arm/mach-omap1/board-osk.c b/arch/arm/mach-omap1/board-osk.c
index 91933301bb73..b742261c97ad 100644
--- a/arch/arm/mach-omap1/board-osk.c
+++ b/arch/arm/mach-omap1/board-osk.c
@@ -357,7 +357,7 @@ static void __init osk_mistral_init(void)
357 */ 357 */
358 ret = request_irq(OMAP_GPIO_IRQ(OMAP_MPUIO(2)), 358 ret = request_irq(OMAP_GPIO_IRQ(OMAP_MPUIO(2)),
359 &osk_mistral_wake_interrupt, 359 &osk_mistral_wake_interrupt,
360 SA_SHIRQ, "mistral_wakeup", 360 IRQF_SHARED, "mistral_wakeup",
361 &osk_mistral_wake_interrupt); 361 &osk_mistral_wake_interrupt);
362 if (ret != 0) { 362 if (ret != 0) {
363 omap_free_gpio(OMAP_MPUIO(2)); 363 omap_free_gpio(OMAP_MPUIO(2));
diff --git a/arch/arm/mach-omap1/fpga.c b/arch/arm/mach-omap1/fpga.c
index 880cd2d8f4aa..34eb79ee6e61 100644
--- a/arch/arm/mach-omap1/fpga.c
+++ b/arch/arm/mach-omap1/fpga.c
@@ -133,7 +133,7 @@ static struct irqchip omap_fpga_irq = {
133 * mask_ack routine for all of the FPGA interrupts has been changed from 133 * mask_ack routine for all of the FPGA interrupts has been changed from
134 * fpga_mask_ack_irq() to fpga_ack_irq() so that the specific FPGA interrupt 134 * fpga_mask_ack_irq() to fpga_ack_irq() so that the specific FPGA interrupt
135 * being serviced is left unmasked. We can do this because the FPGA cascade 135 * being serviced is left unmasked. We can do this because the FPGA cascade
136 * interrupt is installed with the SA_INTERRUPT flag, which leaves all 136 * interrupt is installed with the IRQF_DISABLED flag, which leaves all
137 * interrupts masked at the CPU while an FPGA interrupt handler executes. 137 * interrupts masked at the CPU while an FPGA interrupt handler executes.
138 * 138 *
139 * Limited testing indicates that this workaround appears to be effective 139 * Limited testing indicates that this workaround appears to be effective
diff --git a/arch/arm/mach-omap1/pm.c b/arch/arm/mach-omap1/pm.c
index 1b4e1d57afb1..cd76185bab74 100644
--- a/arch/arm/mach-omap1/pm.c
+++ b/arch/arm/mach-omap1/pm.c
@@ -690,7 +690,7 @@ static irqreturn_t omap_wakeup_interrupt(int irq, void * dev,
690 690
691static struct irqaction omap_wakeup_irq = { 691static struct irqaction omap_wakeup_irq = {
692 .name = "peripheral wakeup", 692 .name = "peripheral wakeup",
693 .flags = SA_INTERRUPT, 693 .flags = IRQF_DISABLED,
694 .handler = omap_wakeup_interrupt 694 .handler = omap_wakeup_interrupt
695}; 695};
696 696
diff --git a/arch/arm/mach-omap1/serial.c b/arch/arm/mach-omap1/serial.c
index 5615fb8a3d5b..976edfb882e2 100644
--- a/arch/arm/mach-omap1/serial.c
+++ b/arch/arm/mach-omap1/serial.c
@@ -253,7 +253,7 @@ static void __init omap_serial_set_port_wakeup(int gpio_nr)
253 } 253 }
254 omap_set_gpio_direction(gpio_nr, 1); 254 omap_set_gpio_direction(gpio_nr, 1);
255 ret = request_irq(OMAP_GPIO_IRQ(gpio_nr), &omap_serial_wake_interrupt, 255 ret = request_irq(OMAP_GPIO_IRQ(gpio_nr), &omap_serial_wake_interrupt,
256 SA_TRIGGER_RISING, "serial wakeup", NULL); 256 IRQF_TRIGGER_RISING, "serial wakeup", NULL);
257 if (ret) { 257 if (ret) {
258 omap_free_gpio(gpio_nr); 258 omap_free_gpio(gpio_nr);
259 printk(KERN_ERR "No interrupt for UART wake GPIO: %i\n", 259 printk(KERN_ERR "No interrupt for UART wake GPIO: %i\n",
diff --git a/arch/arm/mach-omap1/time.c b/arch/arm/mach-omap1/time.c
index a01f0efdae14..4d91b9f51084 100644
--- a/arch/arm/mach-omap1/time.c
+++ b/arch/arm/mach-omap1/time.c
@@ -177,7 +177,7 @@ static irqreturn_t omap_mpu_timer_interrupt(int irq, void *dev_id,
177 177
178static struct irqaction omap_mpu_timer_irq = { 178static struct irqaction omap_mpu_timer_irq = {
179 .name = "mpu timer", 179 .name = "mpu timer",
180 .flags = SA_INTERRUPT | SA_TIMER, 180 .flags = IRQF_DISABLED | IRQF_TIMER,
181 .handler = omap_mpu_timer_interrupt, 181 .handler = omap_mpu_timer_interrupt,
182}; 182};
183 183
@@ -191,7 +191,7 @@ static irqreturn_t omap_mpu_timer1_interrupt(int irq, void *dev_id,
191 191
192static struct irqaction omap_mpu_timer1_irq = { 192static struct irqaction omap_mpu_timer1_irq = {
193 .name = "mpu timer1 overflow", 193 .name = "mpu timer1 overflow",
194 .flags = SA_INTERRUPT, 194 .flags = IRQF_DISABLED,
195 .handler = omap_mpu_timer1_interrupt, 195 .handler = omap_mpu_timer1_interrupt,
196}; 196};
197 197
diff --git a/arch/arm/mach-omap2/board-apollon.c b/arch/arm/mach-omap2/board-apollon.c
index 6c6ba172cdf6..7993b7bae2bd 100644
--- a/arch/arm/mach-omap2/board-apollon.c
+++ b/arch/arm/mach-omap2/board-apollon.c
@@ -234,17 +234,17 @@ static void __init apollon_sw_init(void)
234 234
235 set_irq_type(OMAP_GPIO_IRQ(SW_ENTER_GPIO16), IRQT_RISING); 235 set_irq_type(OMAP_GPIO_IRQ(SW_ENTER_GPIO16), IRQT_RISING);
236 if (request_irq(OMAP_GPIO_IRQ(SW_ENTER_GPIO16), &apollon_sw_interrupt, 236 if (request_irq(OMAP_GPIO_IRQ(SW_ENTER_GPIO16), &apollon_sw_interrupt,
237 SA_SHIRQ, "enter sw", 237 IRQF_SHARED, "enter sw",
238 &apollon_sw_interrupt)) 238 &apollon_sw_interrupt))
239 return; 239 return;
240 set_irq_type(OMAP_GPIO_IRQ(SW_UP_GPIO17), IRQT_RISING); 240 set_irq_type(OMAP_GPIO_IRQ(SW_UP_GPIO17), IRQT_RISING);
241 if (request_irq(OMAP_GPIO_IRQ(SW_UP_GPIO17), &apollon_sw_interrupt, 241 if (request_irq(OMAP_GPIO_IRQ(SW_UP_GPIO17), &apollon_sw_interrupt,
242 SA_SHIRQ, "up sw", 242 IRQF_SHARED, "up sw",
243 &apollon_sw_interrupt)) 243 &apollon_sw_interrupt))
244 return; 244 return;
245 set_irq_type(OMAP_GPIO_IRQ(SW_DOWN_GPIO58), IRQT_RISING); 245 set_irq_type(OMAP_GPIO_IRQ(SW_DOWN_GPIO58), IRQT_RISING);
246 if (request_irq(OMAP_GPIO_IRQ(SW_DOWN_GPIO58), &apollon_sw_interrupt, 246 if (request_irq(OMAP_GPIO_IRQ(SW_DOWN_GPIO58), &apollon_sw_interrupt,
247 SA_SHIRQ, "down sw", 247 IRQF_SHARED, "down sw",
248 &apollon_sw_interrupt)) 248 &apollon_sw_interrupt))
249 return; 249 return;
250} 250}
diff --git a/arch/arm/mach-omap2/timer-gp.c b/arch/arm/mach-omap2/timer-gp.c
index cf78e6c5a277..fe5fd6d42dea 100644
--- a/arch/arm/mach-omap2/timer-gp.c
+++ b/arch/arm/mach-omap2/timer-gp.c
@@ -52,7 +52,7 @@ static irqreturn_t omap2_gp_timer_interrupt(int irq, void *dev_id,
52 52
53static struct irqaction omap2_gp_timer_irq = { 53static struct irqaction omap2_gp_timer_irq = {
54 .name = "gp timer", 54 .name = "gp timer",
55 .flags = SA_INTERRUPT | SA_TIMER, 55 .flags = IRQF_DISABLED | IRQF_TIMER,
56 .handler = omap2_gp_timer_interrupt, 56 .handler = omap2_gp_timer_interrupt,
57}; 57};
58 58
diff --git a/arch/arm/mach-pnx4008/time.c b/arch/arm/mach-pnx4008/time.c
index 4ce680698529..888bf6cfba8a 100644
--- a/arch/arm/mach-pnx4008/time.c
+++ b/arch/arm/mach-pnx4008/time.c
@@ -86,7 +86,7 @@ static irqreturn_t pnx4008_timer_interrupt(int irq, void *dev_id,
86 86
87static struct irqaction pnx4008_timer_irq = { 87static struct irqaction pnx4008_timer_irq = {
88 .name = "PNX4008 Tick Timer", 88 .name = "PNX4008 Tick Timer",
89 .flags = SA_INTERRUPT | SA_TIMER, 89 .flags = IRQF_DISABLED | IRQF_TIMER,
90 .handler = pnx4008_timer_interrupt 90 .handler = pnx4008_timer_interrupt
91}; 91};
92 92
diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c
index bf6648a83901..cce26576999e 100644
--- a/arch/arm/mach-pxa/corgi.c
+++ b/arch/arm/mach-pxa/corgi.c
@@ -225,7 +225,7 @@ static int corgi_mci_init(struct device *dev, irqreturn_t (*corgi_detect_int)(in
225 corgi_mci_platform_data.detect_delay = msecs_to_jiffies(250); 225 corgi_mci_platform_data.detect_delay = msecs_to_jiffies(250);
226 226
227 err = request_irq(CORGI_IRQ_GPIO_nSD_DETECT, corgi_detect_int, 227 err = request_irq(CORGI_IRQ_GPIO_nSD_DETECT, corgi_detect_int,
228 SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING, 228 IRQF_DISABLED | IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
229 "MMC card detect", data); 229 "MMC card detect", data);
230 if (err) { 230 if (err) {
231 printk(KERN_ERR "corgi_mci_init: MMC/SD: can't request MMC card detect IRQ\n"); 231 printk(KERN_ERR "corgi_mci_init: MMC/SD: can't request MMC card detect IRQ\n");
diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c
index 1ab26c6914f2..6a9a669d60de 100644
--- a/arch/arm/mach-pxa/lubbock.c
+++ b/arch/arm/mach-pxa/lubbock.c
@@ -419,7 +419,7 @@ static int lubbock_mci_init(struct device *dev,
419 init_timer(&mmc_timer); 419 init_timer(&mmc_timer);
420 mmc_timer.data = (unsigned long) data; 420 mmc_timer.data = (unsigned long) data;
421 return request_irq(LUBBOCK_SD_IRQ, lubbock_detect_int, 421 return request_irq(LUBBOCK_SD_IRQ, lubbock_detect_int,
422 SA_SAMPLE_RANDOM, "lubbock-sd-detect", data); 422 IRQF_SAMPLE_RANDOM, "lubbock-sd-detect", data);
423} 423}
424 424
425static int lubbock_mci_get_ro(struct device *dev) 425static int lubbock_mci_get_ro(struct device *dev)
diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c
index b307f11951df..21ddf3de2f6e 100644
--- a/arch/arm/mach-pxa/mainstone.c
+++ b/arch/arm/mach-pxa/mainstone.c
@@ -331,7 +331,7 @@ static int mainstone_mci_init(struct device *dev, irqreturn_t (*mstone_detect_in
331 */ 331 */
332 MST_MSCWR1 &= ~MST_MSCWR1_MS_SEL; 332 MST_MSCWR1 &= ~MST_MSCWR1_MS_SEL;
333 333
334 err = request_irq(MAINSTONE_MMC_IRQ, mstone_detect_int, SA_INTERRUPT, 334 err = request_irq(MAINSTONE_MMC_IRQ, mstone_detect_int, IRQF_DISABLED,
335 "MMC card detect", data); 335 "MMC card detect", data);
336 if (err) { 336 if (err) {
337 printk(KERN_ERR "mainstone_mci_init: MMC/SD: can't request MMC card detect IRQ\n"); 337 printk(KERN_ERR "mainstone_mci_init: MMC/SD: can't request MMC card detect IRQ\n");
diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c
index 9a9fa87cea9f..6dbff6d94801 100644
--- a/arch/arm/mach-pxa/poodle.c
+++ b/arch/arm/mach-pxa/poodle.c
@@ -212,7 +212,7 @@ static int poodle_mci_init(struct device *dev, irqreturn_t (*poodle_detect_int)(
212 poodle_mci_platform_data.detect_delay = msecs_to_jiffies(250); 212 poodle_mci_platform_data.detect_delay = msecs_to_jiffies(250);
213 213
214 err = request_irq(POODLE_IRQ_GPIO_nSD_DETECT, poodle_detect_int, 214 err = request_irq(POODLE_IRQ_GPIO_nSD_DETECT, poodle_detect_int,
215 SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING, 215 IRQF_DISABLED | IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
216 "MMC card detect", data); 216 "MMC card detect", data);
217 if (err) { 217 if (err) {
218 printk(KERN_ERR "poodle_mci_init: MMC/SD: can't request MMC card detect IRQ\n"); 218 printk(KERN_ERR "poodle_mci_init: MMC/SD: can't request MMC card detect IRQ\n");
diff --git a/arch/arm/mach-pxa/sharpsl_pm.c b/arch/arm/mach-pxa/sharpsl_pm.c
index 1e5e6ca693ee..db6e8f56a75f 100644
--- a/arch/arm/mach-pxa/sharpsl_pm.c
+++ b/arch/arm/mach-pxa/sharpsl_pm.c
@@ -142,18 +142,18 @@ void sharpsl_pm_pxa_init(void)
142 pxa_gpio_mode(sharpsl_pm.machinfo->gpio_batlock | GPIO_IN); 142 pxa_gpio_mode(sharpsl_pm.machinfo->gpio_batlock | GPIO_IN);
143 143
144 /* Register interrupt handlers */ 144 /* Register interrupt handlers */
145 if (request_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_acin), sharpsl_ac_isr, SA_INTERRUPT, "AC Input Detect", sharpsl_ac_isr)) { 145 if (request_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_acin), sharpsl_ac_isr, IRQF_DISABLED, "AC Input Detect", sharpsl_ac_isr)) {
146 dev_err(sharpsl_pm.dev, "Could not get irq %d.\n", IRQ_GPIO(sharpsl_pm.machinfo->gpio_acin)); 146 dev_err(sharpsl_pm.dev, "Could not get irq %d.\n", IRQ_GPIO(sharpsl_pm.machinfo->gpio_acin));
147 } 147 }
148 else set_irq_type(IRQ_GPIO(sharpsl_pm.machinfo->gpio_acin),IRQT_BOTHEDGE); 148 else set_irq_type(IRQ_GPIO(sharpsl_pm.machinfo->gpio_acin),IRQT_BOTHEDGE);
149 149
150 if (request_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batlock), sharpsl_fatal_isr, SA_INTERRUPT, "Battery Cover", sharpsl_fatal_isr)) { 150 if (request_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batlock), sharpsl_fatal_isr, IRQF_DISABLED, "Battery Cover", sharpsl_fatal_isr)) {
151 dev_err(sharpsl_pm.dev, "Could not get irq %d.\n", IRQ_GPIO(sharpsl_pm.machinfo->gpio_batlock)); 151 dev_err(sharpsl_pm.dev, "Could not get irq %d.\n", IRQ_GPIO(sharpsl_pm.machinfo->gpio_batlock));
152 } 152 }
153 else set_irq_type(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batlock),IRQT_FALLING); 153 else set_irq_type(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batlock),IRQT_FALLING);
154 154
155 if (sharpsl_pm.machinfo->gpio_fatal) { 155 if (sharpsl_pm.machinfo->gpio_fatal) {
156 if (request_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_fatal), sharpsl_fatal_isr, SA_INTERRUPT, "Fatal Battery", sharpsl_fatal_isr)) { 156 if (request_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_fatal), sharpsl_fatal_isr, IRQF_DISABLED, "Fatal Battery", sharpsl_fatal_isr)) {
157 dev_err(sharpsl_pm.dev, "Could not get irq %d.\n", IRQ_GPIO(sharpsl_pm.machinfo->gpio_fatal)); 157 dev_err(sharpsl_pm.dev, "Could not get irq %d.\n", IRQ_GPIO(sharpsl_pm.machinfo->gpio_fatal));
158 } 158 }
159 else set_irq_type(IRQ_GPIO(sharpsl_pm.machinfo->gpio_fatal),IRQT_FALLING); 159 else set_irq_type(IRQ_GPIO(sharpsl_pm.machinfo->gpio_fatal),IRQT_FALLING);
@@ -162,7 +162,7 @@ void sharpsl_pm_pxa_init(void)
162 if (sharpsl_pm.machinfo->batfull_irq) 162 if (sharpsl_pm.machinfo->batfull_irq)
163 { 163 {
164 /* Register interrupt handler. */ 164 /* Register interrupt handler. */
165 if (request_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batfull), sharpsl_chrg_full_isr, SA_INTERRUPT, "CO", sharpsl_chrg_full_isr)) { 165 if (request_irq(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batfull), sharpsl_chrg_full_isr, IRQF_DISABLED, "CO", sharpsl_chrg_full_isr)) {
166 dev_err(sharpsl_pm.dev, "Could not get irq %d.\n", IRQ_GPIO(sharpsl_pm.machinfo->gpio_batfull)); 166 dev_err(sharpsl_pm.dev, "Could not get irq %d.\n", IRQ_GPIO(sharpsl_pm.machinfo->gpio_batfull));
167 } 167 }
168 else set_irq_type(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batfull),IRQT_RISING); 168 else set_irq_type(IRQ_GPIO(sharpsl_pm.machinfo->gpio_batfull),IRQT_RISING);
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index eb9937f6f5cd..1c32a9310dc2 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -308,7 +308,7 @@ static int spitz_mci_init(struct device *dev, irqreturn_t (*spitz_detect_int)(in
308 spitz_mci_platform_data.detect_delay = msecs_to_jiffies(250); 308 spitz_mci_platform_data.detect_delay = msecs_to_jiffies(250);
309 309
310 err = request_irq(SPITZ_IRQ_GPIO_nSD_DETECT, spitz_detect_int, 310 err = request_irq(SPITZ_IRQ_GPIO_nSD_DETECT, spitz_detect_int,
311 SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING, 311 IRQF_DISABLED | IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
312 "MMC card detect", data); 312 "MMC card detect", data);
313 if (err) { 313 if (err) {
314 printk(KERN_ERR "spitz_mci_init: MMC/SD: can't request MMC card detect IRQ\n"); 314 printk(KERN_ERR "spitz_mci_init: MMC/SD: can't request MMC card detect IRQ\n");
diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c
index 1ef85fcc6c06..5dbd191c57c4 100644
--- a/arch/arm/mach-pxa/time.c
+++ b/arch/arm/mach-pxa/time.c
@@ -117,7 +117,7 @@ pxa_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
117 117
118static struct irqaction pxa_timer_irq = { 118static struct irqaction pxa_timer_irq = {
119 .name = "PXA Timer Tick", 119 .name = "PXA Timer Tick",
120 .flags = SA_INTERRUPT | SA_TIMER, 120 .flags = IRQF_DISABLED | IRQF_TIMER,
121 .handler = pxa_timer_interrupt, 121 .handler = pxa_timer_interrupt,
122}; 122};
123 123
diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
index 7152bc13680f..249353616aba 100644
--- a/arch/arm/mach-pxa/tosa.c
+++ b/arch/arm/mach-pxa/tosa.c
@@ -185,7 +185,7 @@ static int tosa_mci_init(struct device *dev, irqreturn_t (*tosa_detect_int)(int,
185 185
186 tosa_mci_platform_data.detect_delay = msecs_to_jiffies(250); 186 tosa_mci_platform_data.detect_delay = msecs_to_jiffies(250);
187 187
188 err = request_irq(TOSA_IRQ_GPIO_nSD_DETECT, tosa_detect_int, SA_INTERRUPT, 188 err = request_irq(TOSA_IRQ_GPIO_nSD_DETECT, tosa_detect_int, IRQF_DISABLED,
189 "MMC/SD card detect", data); 189 "MMC/SD card detect", data);
190 if (err) { 190 if (err) {
191 printk(KERN_ERR "tosa_mci_init: MMC/SD: can't request MMC card detect IRQ\n"); 191 printk(KERN_ERR "tosa_mci_init: MMC/SD: can't request MMC card detect IRQ\n");
diff --git a/arch/arm/mach-pxa/trizeps4.c b/arch/arm/mach-pxa/trizeps4.c
index 4ffff9e95eca..7c3007df1bd6 100644
--- a/arch/arm/mach-pxa/trizeps4.c
+++ b/arch/arm/mach-pxa/trizeps4.c
@@ -283,7 +283,9 @@ static int trizeps4_mci_init(struct device *dev, irqreturn_t (*mci_detect_int)(i
283 283
284 pxa_gpio_mode(GPIO_MMC_DET | GPIO_IN); 284 pxa_gpio_mode(GPIO_MMC_DET | GPIO_IN);
285 285
286 err = request_irq(TRIZEPS4_MMC_IRQ, mci_detect_int, SA_INTERRUPT | SA_TRIGGER_RISING, "MMC card detect", data); 286 err = request_irq(TRIZEPS4_MMC_IRQ, mci_detect_int,
287 IRQF_DISABLED | IRQF_TRIGGER_RISING,
288 "MMC card detect", data);
287 if (err) { 289 if (err) {
288 printk(KERN_ERR "trizeps4_mci_init: MMC/SD: can't request MMC card detect IRQ\n"); 290 printk(KERN_ERR "trizeps4_mci_init: MMC/SD: can't request MMC card detect IRQ\n");
289 return -1; 291 return -1;
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index eb09d3859d6d..da0286973823 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -536,7 +536,7 @@ static irqreturn_t realview_timer_interrupt(int irq, void *dev_id, struct pt_reg
536 536
537static struct irqaction realview_timer_irq = { 537static struct irqaction realview_timer_irq = {
538 .name = "RealView Timer Tick", 538 .name = "RealView Timer Tick",
539 .flags = SA_INTERRUPT | SA_TIMER, 539 .flags = IRQF_DISABLED | IRQF_TIMER,
540 .handler = realview_timer_interrupt, 540 .handler = realview_timer_interrupt,
541}; 541};
542 542
diff --git a/arch/arm/mach-rpc/dma.c b/arch/arm/mach-rpc/dma.c
index bd86ffba8810..ac511d41d4d7 100644
--- a/arch/arm/mach-rpc/dma.c
+++ b/arch/arm/mach-rpc/dma.c
@@ -128,7 +128,7 @@ static irqreturn_t iomd_dma_handle(int irq, void *dev_id, struct pt_regs *regs)
128static int iomd_request_dma(dmach_t channel, dma_t *dma) 128static int iomd_request_dma(dmach_t channel, dma_t *dma)
129{ 129{
130 return request_irq(dma->dma_irq, iomd_dma_handle, 130 return request_irq(dma->dma_irq, iomd_dma_handle,
131 SA_INTERRUPT, dma->device_id, dma); 131 IRQF_DISABLED, dma->device_id, dma);
132} 132}
133 133
134static void iomd_free_dma(dmach_t channel, dma_t *dma) 134static void iomd_free_dma(dmach_t channel, dma_t *dma)
diff --git a/arch/arm/mach-s3c2410/dma.c b/arch/arm/mach-s3c2410/dma.c
index b0aaf4328732..094cc52745c5 100644
--- a/arch/arm/mach-s3c2410/dma.c
+++ b/arch/arm/mach-s3c2410/dma.c
@@ -718,7 +718,7 @@ int s3c2410_dma_request(unsigned int channel, s3c2410_dma_client_t *client,
718 pr_debug("dma%d: %s : requesting irq %d\n", 718 pr_debug("dma%d: %s : requesting irq %d\n",
719 channel, __FUNCTION__, chan->irq); 719 channel, __FUNCTION__, chan->irq);
720 720
721 err = request_irq(chan->irq, s3c2410_dma_irq, SA_INTERRUPT, 721 err = request_irq(chan->irq, s3c2410_dma_irq, IRQF_DISABLED,
722 client->name, (void *)chan); 722 client->name, (void *)chan);
723 723
724 if (err) { 724 if (err) {
diff --git a/arch/arm/mach-s3c2410/time.c b/arch/arm/mach-s3c2410/time.c
index c153c49c75dc..00d1cfca9712 100644
--- a/arch/arm/mach-s3c2410/time.c
+++ b/arch/arm/mach-s3c2410/time.c
@@ -138,7 +138,7 @@ s3c2410_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
138 138
139static struct irqaction s3c2410_timer_irq = { 139static struct irqaction s3c2410_timer_irq = {
140 .name = "S3C2410 Timer Tick", 140 .name = "S3C2410 Timer Tick",
141 .flags = SA_INTERRUPT | SA_TIMER, 141 .flags = IRQF_DISABLED | IRQF_TIMER,
142 .handler = s3c2410_timer_interrupt, 142 .handler = s3c2410_timer_interrupt,
143}; 143};
144 144
diff --git a/arch/arm/mach-s3c2410/usb-simtec.c b/arch/arm/mach-s3c2410/usb-simtec.c
index 495f8c6ffcb6..6b22d8f0a00d 100644
--- a/arch/arm/mach-s3c2410/usb-simtec.c
+++ b/arch/arm/mach-s3c2410/usb-simtec.c
@@ -85,8 +85,8 @@ static void usb_simtec_enableoc(struct s3c2410_hcd_info *info, int on)
85 85
86 if (on) { 86 if (on) {
87 ret = request_irq(IRQ_USBOC, usb_simtec_ocirq, 87 ret = request_irq(IRQ_USBOC, usb_simtec_ocirq,
88 SA_INTERRUPT | SA_TRIGGER_RISING | 88 IRQF_DISABLED | IRQF_TRIGGER_RISING |
89 SA_TRIGGER_FALLING, 89 IRQF_TRIGGER_FALLING,
90 "USB Over-current", info); 90 "USB Over-current", info);
91 if (ret != 0) { 91 if (ret != 0) {
92 printk(KERN_ERR "failed to request usb oc irq\n"); 92 printk(KERN_ERR "failed to request usb oc irq\n");
diff --git a/arch/arm/mach-sa1100/collie_pm.c b/arch/arm/mach-sa1100/collie_pm.c
index 696d7d29c8a5..45b1e71f111d 100644
--- a/arch/arm/mach-sa1100/collie_pm.c
+++ b/arch/arm/mach-sa1100/collie_pm.c
@@ -45,12 +45,12 @@ static void collie_charger_init(void)
45 } 45 }
46 46
47 /* Register interrupt handler. */ 47 /* Register interrupt handler. */
48 if ((err = request_irq(COLLIE_IRQ_GPIO_AC_IN, sharpsl_ac_isr, SA_INTERRUPT, 48 if ((err = request_irq(COLLIE_IRQ_GPIO_AC_IN, sharpsl_ac_isr, IRQF_DISABLED,
49 "ACIN", sharpsl_ac_isr))) { 49 "ACIN", sharpsl_ac_isr))) {
50 printk("Could not get irq %d.\n", COLLIE_IRQ_GPIO_AC_IN); 50 printk("Could not get irq %d.\n", COLLIE_IRQ_GPIO_AC_IN);
51 return; 51 return;
52 } 52 }
53 if ((err = request_irq(COLLIE_IRQ_GPIO_CO, sharpsl_chrg_full_isr, SA_INTERRUPT, 53 if ((err = request_irq(COLLIE_IRQ_GPIO_CO, sharpsl_chrg_full_isr, IRQF_DISABLED,
54 "CO", sharpsl_chrg_full_isr))) { 54 "CO", sharpsl_chrg_full_isr))) {
55 free_irq(COLLIE_IRQ_GPIO_AC_IN, sharpsl_ac_isr); 55 free_irq(COLLIE_IRQ_GPIO_AC_IN, sharpsl_ac_isr);
56 printk("Could not get irq %d.\n", COLLIE_IRQ_GPIO_CO); 56 printk("Could not get irq %d.\n", COLLIE_IRQ_GPIO_CO);
diff --git a/arch/arm/mach-sa1100/dma.c b/arch/arm/mach-sa1100/dma.c
index be0e4427bec7..3c6441d4bc59 100644
--- a/arch/arm/mach-sa1100/dma.c
+++ b/arch/arm/mach-sa1100/dma.c
@@ -124,7 +124,7 @@ int sa1100_request_dma (dma_device_t device, const char *device_id,
124 124
125 i = dma - dma_chan; 125 i = dma - dma_chan;
126 regs = (dma_regs_t *)&DDAR(i); 126 regs = (dma_regs_t *)&DDAR(i);
127 err = request_irq(IRQ_DMA0 + i, dma_irq_handler, SA_INTERRUPT, 127 err = request_irq(IRQ_DMA0 + i, dma_irq_handler, IRQF_DISABLED,
128 device_id, regs); 128 device_id, regs);
129 if (err) { 129 if (err) {
130 printk(KERN_ERR 130 printk(KERN_ERR
diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c
index e10d661c015f..7364478cec12 100644
--- a/arch/arm/mach-sa1100/h3600.c
+++ b/arch/arm/mach-sa1100/h3600.c
@@ -740,7 +740,7 @@ static void h3800_IRQ_demux(unsigned int irq, struct irqdesc *desc, struct pt_re
740static struct irqaction h3800_irq = { 740static struct irqaction h3800_irq = {
741 .name = "h3800_asic", 741 .name = "h3800_asic",
742 .handler = h3800_IRQ_demux, 742 .handler = h3800_IRQ_demux,
743 .flags = SA_INTERRUPT | SA_TIMER, 743 .flags = IRQF_DISABLED | IRQF_TIMER,
744}; 744};
745 745
746u32 kpio_int_shadow = 0; 746u32 kpio_int_shadow = 0;
diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c
index 688b1e109a40..49ae716e16c2 100644
--- a/arch/arm/mach-sa1100/time.c
+++ b/arch/arm/mach-sa1100/time.c
@@ -111,7 +111,7 @@ sa1100_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
111 111
112static struct irqaction sa1100_timer_irq = { 112static struct irqaction sa1100_timer_irq = {
113 .name = "SA11xx Timer Tick", 113 .name = "SA11xx Timer Tick",
114 .flags = SA_INTERRUPT | SA_TIMER, 114 .flags = IRQF_DISABLED | IRQF_TIMER,
115 .handler = sa1100_timer_interrupt, 115 .handler = sa1100_timer_interrupt,
116}; 116};
117 117
diff --git a/arch/arm/mach-shark/core.c b/arch/arm/mach-shark/core.c
index 2f2c6e97b7a3..1095df34fec0 100644
--- a/arch/arm/mach-shark/core.c
+++ b/arch/arm/mach-shark/core.c
@@ -90,7 +90,7 @@ shark_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
90 90
91static struct irqaction shark_timer_irq = { 91static struct irqaction shark_timer_irq = {
92 .name = "Shark Timer Tick", 92 .name = "Shark Timer Tick",
93 .flags = SA_INTERRUPT | SA_TIMER, 93 .flags = IRQF_DISABLED | IRQF_TIMER,
94 .handler = shark_timer_interrupt, 94 .handler = shark_timer_interrupt,
95}; 95};
96 96
diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index 08de8490fb69..a432539cc1bd 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -869,7 +869,7 @@ static irqreturn_t versatile_timer_interrupt(int irq, void *dev_id, struct pt_re
869 869
870static struct irqaction versatile_timer_irq = { 870static struct irqaction versatile_timer_irq = {
871 .name = "Versatile Timer Tick", 871 .name = "Versatile Timer Tick",
872 .flags = SA_INTERRUPT | SA_TIMER, 872 .flags = IRQF_DISABLED | IRQF_TIMER,
873 .handler = versatile_timer_interrupt, 873 .handler = versatile_timer_interrupt,
874}; 874};
875 875
diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S
index 0e6946ab6e5b..c2f0705bfd49 100644
--- a/arch/arm/mm/proc-arm720.S
+++ b/arch/arm/mm/proc-arm720.S
@@ -41,6 +41,8 @@
41#include <asm/procinfo.h> 41#include <asm/procinfo.h>
42#include <asm/ptrace.h> 42#include <asm/ptrace.h>
43 43
44#include "proc-macros.S"
45
44/* 46/*
45 * Function: arm720_proc_init (void) 47 * Function: arm720_proc_init (void)
46 * : arm720_proc_fin (void) 48 * : arm720_proc_fin (void)
diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S
index e812246277cf..c878064e9b88 100644
--- a/arch/arm/mm/proc-sa110.S
+++ b/arch/arm/mm/proc-sa110.S
@@ -23,6 +23,8 @@
23#include <asm/pgtable.h> 23#include <asm/pgtable.h>
24#include <asm/ptrace.h> 24#include <asm/ptrace.h>
25 25
26#include "proc-macros.S"
27
26/* 28/*
27 * the cache line size of the I and D cache 29 * the cache line size of the I and D cache
28 */ 30 */
diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S
index ba32cc6296a0..b23b66a6155a 100644
--- a/arch/arm/mm/proc-sa1100.S
+++ b/arch/arm/mm/proc-sa1100.S
@@ -27,6 +27,8 @@
27#include <asm/pgtable-hwdef.h> 27#include <asm/pgtable-hwdef.h>
28#include <asm/pgtable.h> 28#include <asm/pgtable.h>
29 29
30#include "proc-macros.S"
31
30/* 32/*
31 * the cache line size of the I and D cache 33 * the cache line size of the I and D cache
32 */ 34 */
diff --git a/arch/arm/oprofile/op_model_xscale.c b/arch/arm/oprofile/op_model_xscale.c
index e0f0b320d76c..34fdc733743b 100644
--- a/arch/arm/oprofile/op_model_xscale.c
+++ b/arch/arm/oprofile/op_model_xscale.c
@@ -384,7 +384,7 @@ static int xscale_pmu_start(void)
384 int ret; 384 int ret;
385 u32 pmnc = read_pmnc(); 385 u32 pmnc = read_pmnc();
386 386
387 ret = request_irq(XSCALE_PMU_IRQ, xscale_pmu_interrupt, SA_INTERRUPT, 387 ret = request_irq(XSCALE_PMU_IRQ, xscale_pmu_interrupt, IRQF_DISABLED,
388 "XScale PMU", (void *)results); 388 "XScale PMU", (void *)results);
389 389
390 if (ret < 0) { 390 if (ret < 0) {
diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c
index c2c05ef86348..9eddc9507147 100644
--- a/arch/arm/plat-omap/dma.c
+++ b/arch/arm/plat-omap/dma.c
@@ -939,7 +939,7 @@ static irqreturn_t omap2_dma_irq_handler(int irq, void *dev_id,
939static struct irqaction omap24xx_dma_irq = { 939static struct irqaction omap24xx_dma_irq = {
940 .name = "DMA", 940 .name = "DMA",
941 .handler = omap2_dma_irq_handler, 941 .handler = omap2_dma_irq_handler,
942 .flags = SA_INTERRUPT 942 .flags = IRQF_DISABLED
943}; 943};
944 944
945#else 945#else
diff --git a/arch/arm/plat-omap/gpio.c b/arch/arm/plat-omap/gpio.c
index cb0c21d384c0..fec7970e564d 100644
--- a/arch/arm/plat-omap/gpio.c
+++ b/arch/arm/plat-omap/gpio.c
@@ -850,7 +850,8 @@ static void gpio_irq_handler(unsigned int irq, struct irqdesc *desc,
850 /* Don't run the handler if it's already running 850 /* Don't run the handler if it's already running
851 * or was disabled lazely. 851 * or was disabled lazely.
852 */ 852 */
853 if (unlikely((d->disable_depth || d->running))) { 853 if (unlikely((d->depth ||
854 (d->status & IRQ_INPROGRESS)))) {
854 irq_mask = 1 << 855 irq_mask = 1 <<
855 (gpio_irq - bank->virtual_irq_start); 856 (gpio_irq - bank->virtual_irq_start);
856 /* The unmasking will be done by 857 /* The unmasking will be done by
@@ -859,22 +860,22 @@ static void gpio_irq_handler(unsigned int irq, struct irqdesc *desc,
859 * it's already running. 860 * it's already running.
860 */ 861 */
861 _enable_gpio_irqbank(bank, irq_mask, 0); 862 _enable_gpio_irqbank(bank, irq_mask, 0);
862 if (!d->disable_depth) { 863 if (!d->depth) {
863 /* Level triggered interrupts 864 /* Level triggered interrupts
864 * won't ever be reentered 865 * won't ever be reentered
865 */ 866 */
866 BUG_ON(level_mask & irq_mask); 867 BUG_ON(level_mask & irq_mask);
867 d->pending = 1; 868 d->status |= IRQ_PENDING;
868 } 869 }
869 continue; 870 continue;
870 } 871 }
871 d->running = 1; 872
872 desc_handle_irq(gpio_irq, d, regs); 873 desc_handle_irq(gpio_irq, d, regs);
873 d->running = 0; 874
874 if (unlikely(d->pending && !d->disable_depth)) { 875 if (unlikely((d->status & IRQ_PENDING) && !d->depth)) {
875 irq_mask = 1 << 876 irq_mask = 1 <<
876 (gpio_irq - bank->virtual_irq_start); 877 (gpio_irq - bank->virtual_irq_start);
877 d->pending = 0; 878 d->status &= ~IRQ_PENDING;
878 _enable_gpio_irqbank(bank, irq_mask, 1); 879 _enable_gpio_irqbank(bank, irq_mask, 1);
879 retrigger |= irq_mask; 880 retrigger |= irq_mask;
880 } 881 }
diff --git a/arch/arm/plat-omap/pm.c b/arch/arm/plat-omap/pm.c
index 1a24e2c10714..04b4102727a8 100644
--- a/arch/arm/plat-omap/pm.c
+++ b/arch/arm/plat-omap/pm.c
@@ -580,7 +580,7 @@ static irqreturn_t omap_wakeup_interrupt(int irq, void * dev,
580 580
581static struct irqaction omap_wakeup_irq = { 581static struct irqaction omap_wakeup_irq = {
582 .name = "peripheral wakeup", 582 .name = "peripheral wakeup",
583 .flags = SA_INTERRUPT, 583 .flags = IRQF_DISABLED,
584 .handler = omap_wakeup_interrupt 584 .handler = omap_wakeup_interrupt
585}; 585};
586 586
diff --git a/arch/arm/plat-omap/timer32k.c b/arch/arm/plat-omap/timer32k.c
index ddf4360dea72..281ecc7fcdfc 100644
--- a/arch/arm/plat-omap/timer32k.c
+++ b/arch/arm/plat-omap/timer32k.c
@@ -258,7 +258,7 @@ static struct dyn_tick_timer omap_dyn_tick_timer = {
258 258
259static struct irqaction omap_32k_timer_irq = { 259static struct irqaction omap_32k_timer_irq = {
260 .name = "32KHz timer", 260 .name = "32KHz timer",
261 .flags = SA_INTERRUPT | SA_TIMER, 261 .flags = IRQF_DISABLED | IRQF_TIMER,
262 .handler = omap_32k_timer_interrupt, 262 .handler = omap_32k_timer_interrupt,
263}; 263};
264 264
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 27d8dddbaa47..daa75ce4b777 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -18,6 +18,14 @@ config GENERIC_TIME
18 bool 18 bool
19 default y 19 default y
20 20
21config LOCKDEP_SUPPORT
22 bool
23 default y
24
25config STACKTRACE_SUPPORT
26 bool
27 default y
28
21config SEMAPHORE_SLEEPERS 29config SEMAPHORE_SLEEPERS
22 bool 30 bool
23 default y 31 default y
diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug
index c92191b1fb67..b31c0802e1cc 100644
--- a/arch/i386/Kconfig.debug
+++ b/arch/i386/Kconfig.debug
@@ -1,5 +1,9 @@
1menu "Kernel hacking" 1menu "Kernel hacking"
2 2
3config TRACE_IRQFLAGS_SUPPORT
4 bool
5 default y
6
3source "lib/Kconfig.debug" 7source "lib/Kconfig.debug"
4 8
5config EARLY_PRINTK 9config EARLY_PRINTK
@@ -31,15 +35,6 @@ config DEBUG_STACK_USAGE
31 35
32 This option will slow down process creation somewhat. 36 This option will slow down process creation somewhat.
33 37
34config STACK_BACKTRACE_COLS
35 int "Stack backtraces per line" if DEBUG_KERNEL
36 range 1 3
37 default 2
38 help
39 Selects how many stack backtrace entries per line to display.
40
41 This can save screen space when displaying traces.
42
43comment "Page alloc debug is incompatible with Software Suspend on i386" 38comment "Page alloc debug is incompatible with Software Suspend on i386"
44 depends on DEBUG_KERNEL && SOFTWARE_SUSPEND 39 depends on DEBUG_KERNEL && SOFTWARE_SUSPEND
45 40
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index cbc1184e9473..1b452a1665c4 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -9,6 +9,7 @@ obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \
9 pci-dma.o i386_ksyms.o i387.o bootflag.o \ 9 pci-dma.o i386_ksyms.o i387.o bootflag.o \
10 quirks.o i8237.o topology.o alternative.o i8253.o tsc.o 10 quirks.o i8237.o topology.o alternative.o i8253.o tsc.o
11 11
12obj-$(CONFIG_STACKTRACE) += stacktrace.o
12obj-y += cpu/ 13obj-y += cpu/
13obj-y += acpi/ 14obj-y += acpi/
14obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o 15obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o
diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c
index 7b421b3a053e..28ab80649764 100644
--- a/arch/i386/kernel/alternative.c
+++ b/arch/i386/kernel/alternative.c
@@ -303,6 +303,16 @@ void alternatives_smp_switch(int smp)
303 struct smp_alt_module *mod; 303 struct smp_alt_module *mod;
304 unsigned long flags; 304 unsigned long flags;
305 305
306#ifdef CONFIG_LOCKDEP
307 /*
308 * A not yet fixed binutils section handling bug prevents
309 * alternatives-replacement from working reliably, so turn
310 * it off:
311 */
312 printk("lockdep: not fixing up alternatives.\n");
313 return;
314#endif
315
306 if (no_replacement || smp_alt_once) 316 if (no_replacement || smp_alt_once)
307 return; 317 return;
308 BUG_ON(!smp && (num_online_cpus() > 1)); 318 BUG_ON(!smp && (num_online_cpus() > 1));
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 787190c45fdb..d9a260f2efb4 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -42,6 +42,7 @@
42 42
43#include <linux/linkage.h> 43#include <linux/linkage.h>
44#include <asm/thread_info.h> 44#include <asm/thread_info.h>
45#include <asm/irqflags.h>
45#include <asm/errno.h> 46#include <asm/errno.h>
46#include <asm/segment.h> 47#include <asm/segment.h>
47#include <asm/smp.h> 48#include <asm/smp.h>
@@ -76,12 +77,21 @@ NT_MASK = 0x00004000
76VM_MASK = 0x00020000 77VM_MASK = 0x00020000
77 78
78#ifdef CONFIG_PREEMPT 79#ifdef CONFIG_PREEMPT
79#define preempt_stop cli 80#define preempt_stop cli; TRACE_IRQS_OFF
80#else 81#else
81#define preempt_stop 82#define preempt_stop
82#define resume_kernel restore_nocheck 83#define resume_kernel restore_nocheck
83#endif 84#endif
84 85
86.macro TRACE_IRQS_IRET
87#ifdef CONFIG_TRACE_IRQFLAGS
88 testl $IF_MASK,EFLAGS(%esp) # interrupts off?
89 jz 1f
90 TRACE_IRQS_ON
911:
92#endif
93.endm
94
85#ifdef CONFIG_VM86 95#ifdef CONFIG_VM86
86#define resume_userspace_sig check_userspace 96#define resume_userspace_sig check_userspace
87#else 97#else
@@ -257,6 +267,10 @@ ENTRY(sysenter_entry)
257 CFI_REGISTER esp, ebp 267 CFI_REGISTER esp, ebp
258 movl TSS_sysenter_esp0(%esp),%esp 268 movl TSS_sysenter_esp0(%esp),%esp
259sysenter_past_esp: 269sysenter_past_esp:
270 /*
271 * No need to follow this irqs on/off section: the syscall
272 * disabled irqs and here we enable it straight after entry:
273 */
260 sti 274 sti
261 pushl $(__USER_DS) 275 pushl $(__USER_DS)
262 CFI_ADJUST_CFA_OFFSET 4 276 CFI_ADJUST_CFA_OFFSET 4
@@ -303,6 +317,7 @@ sysenter_past_esp:
303 call *sys_call_table(,%eax,4) 317 call *sys_call_table(,%eax,4)
304 movl %eax,EAX(%esp) 318 movl %eax,EAX(%esp)
305 cli 319 cli
320 TRACE_IRQS_OFF
306 movl TI_flags(%ebp), %ecx 321 movl TI_flags(%ebp), %ecx
307 testw $_TIF_ALLWORK_MASK, %cx 322 testw $_TIF_ALLWORK_MASK, %cx
308 jne syscall_exit_work 323 jne syscall_exit_work
@@ -310,6 +325,7 @@ sysenter_past_esp:
310 movl EIP(%esp), %edx 325 movl EIP(%esp), %edx
311 movl OLDESP(%esp), %ecx 326 movl OLDESP(%esp), %ecx
312 xorl %ebp,%ebp 327 xorl %ebp,%ebp
328 TRACE_IRQS_ON
313 sti 329 sti
314 sysexit 330 sysexit
315 CFI_ENDPROC 331 CFI_ENDPROC
@@ -339,6 +355,7 @@ syscall_exit:
339 cli # make sure we don't miss an interrupt 355 cli # make sure we don't miss an interrupt
340 # setting need_resched or sigpending 356 # setting need_resched or sigpending
341 # between sampling and the iret 357 # between sampling and the iret
358 TRACE_IRQS_OFF
342 movl TI_flags(%ebp), %ecx 359 movl TI_flags(%ebp), %ecx
343 testw $_TIF_ALLWORK_MASK, %cx # current->work 360 testw $_TIF_ALLWORK_MASK, %cx # current->work
344 jne syscall_exit_work 361 jne syscall_exit_work
@@ -355,12 +372,15 @@ restore_all:
355 CFI_REMEMBER_STATE 372 CFI_REMEMBER_STATE
356 je ldt_ss # returning to user-space with LDT SS 373 je ldt_ss # returning to user-space with LDT SS
357restore_nocheck: 374restore_nocheck:
375 TRACE_IRQS_IRET
376restore_nocheck_notrace:
358 RESTORE_REGS 377 RESTORE_REGS
359 addl $4, %esp 378 addl $4, %esp
360 CFI_ADJUST_CFA_OFFSET -4 379 CFI_ADJUST_CFA_OFFSET -4
3611: iret 3801: iret
362.section .fixup,"ax" 381.section .fixup,"ax"
363iret_exc: 382iret_exc:
383 TRACE_IRQS_ON
364 sti 384 sti
365 pushl $0 # no error code 385 pushl $0 # no error code
366 pushl $do_iret_error 386 pushl $do_iret_error
@@ -386,11 +406,13 @@ ldt_ss:
386 subl $8, %esp # reserve space for switch16 pointer 406 subl $8, %esp # reserve space for switch16 pointer
387 CFI_ADJUST_CFA_OFFSET 8 407 CFI_ADJUST_CFA_OFFSET 8
388 cli 408 cli
409 TRACE_IRQS_OFF
389 movl %esp, %eax 410 movl %esp, %eax
390 /* Set up the 16bit stack frame with switch32 pointer on top, 411 /* Set up the 16bit stack frame with switch32 pointer on top,
391 * and a switch16 pointer on top of the current frame. */ 412 * and a switch16 pointer on top of the current frame. */
392 call setup_x86_bogus_stack 413 call setup_x86_bogus_stack
393 CFI_ADJUST_CFA_OFFSET -8 # frame has moved 414 CFI_ADJUST_CFA_OFFSET -8 # frame has moved
415 TRACE_IRQS_IRET
394 RESTORE_REGS 416 RESTORE_REGS
395 lss 20+4(%esp), %esp # switch to 16bit stack 417 lss 20+4(%esp), %esp # switch to 16bit stack
3961: iret 4181: iret
@@ -411,6 +433,7 @@ work_resched:
411 cli # make sure we don't miss an interrupt 433 cli # make sure we don't miss an interrupt
412 # setting need_resched or sigpending 434 # setting need_resched or sigpending
413 # between sampling and the iret 435 # between sampling and the iret
436 TRACE_IRQS_OFF
414 movl TI_flags(%ebp), %ecx 437 movl TI_flags(%ebp), %ecx
415 andl $_TIF_WORK_MASK, %ecx # is there any work to be done other 438 andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
416 # than syscall tracing? 439 # than syscall tracing?
@@ -462,6 +485,7 @@ syscall_trace_entry:
462syscall_exit_work: 485syscall_exit_work:
463 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl 486 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
464 jz work_pending 487 jz work_pending
488 TRACE_IRQS_ON
465 sti # could let do_syscall_trace() call 489 sti # could let do_syscall_trace() call
466 # schedule() instead 490 # schedule() instead
467 movl %esp, %eax 491 movl %esp, %eax
@@ -535,9 +559,14 @@ ENTRY(irq_entries_start)
535vector=vector+1 559vector=vector+1
536.endr 560.endr
537 561
562/*
563 * the CPU automatically disables interrupts when executing an IRQ vector,
564 * so IRQ-flags tracing has to follow that:
565 */
538 ALIGN 566 ALIGN
539common_interrupt: 567common_interrupt:
540 SAVE_ALL 568 SAVE_ALL
569 TRACE_IRQS_OFF
541 movl %esp,%eax 570 movl %esp,%eax
542 call do_IRQ 571 call do_IRQ
543 jmp ret_from_intr 572 jmp ret_from_intr
@@ -549,9 +578,10 @@ ENTRY(name) \
549 pushl $~(nr); \ 578 pushl $~(nr); \
550 CFI_ADJUST_CFA_OFFSET 4; \ 579 CFI_ADJUST_CFA_OFFSET 4; \
551 SAVE_ALL; \ 580 SAVE_ALL; \
581 TRACE_IRQS_OFF \
552 movl %esp,%eax; \ 582 movl %esp,%eax; \
553 call smp_/**/name; \ 583 call smp_/**/name; \
554 jmp ret_from_intr; \ 584 jmp ret_from_intr; \
555 CFI_ENDPROC 585 CFI_ENDPROC
556 586
557/* The include is where all of the SMP etc. interrupts come from */ 587/* The include is where all of the SMP etc. interrupts come from */
@@ -726,7 +756,7 @@ nmi_stack_correct:
726 xorl %edx,%edx # zero error code 756 xorl %edx,%edx # zero error code
727 movl %esp,%eax # pt_regs pointer 757 movl %esp,%eax # pt_regs pointer
728 call do_nmi 758 call do_nmi
729 jmp restore_all 759 jmp restore_nocheck_notrace
730 CFI_ENDPROC 760 CFI_ENDPROC
731 761
732nmi_stack_fixup: 762nmi_stack_fixup:
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 16b491703967..6cb529f60dcc 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -166,7 +166,7 @@ void irq_ctx_init(int cpu)
166 irqctx->tinfo.task = NULL; 166 irqctx->tinfo.task = NULL;
167 irqctx->tinfo.exec_domain = NULL; 167 irqctx->tinfo.exec_domain = NULL;
168 irqctx->tinfo.cpu = cpu; 168 irqctx->tinfo.cpu = cpu;
169 irqctx->tinfo.preempt_count = SOFTIRQ_OFFSET; 169 irqctx->tinfo.preempt_count = 0;
170 irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); 170 irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
171 171
172 softirq_ctx[cpu] = irqctx; 172 softirq_ctx[cpu] = irqctx;
@@ -211,6 +211,10 @@ asmlinkage void do_softirq(void)
211 : "0"(isp) 211 : "0"(isp)
212 : "memory", "cc", "edx", "ecx", "eax" 212 : "memory", "cc", "edx", "ecx", "eax"
213 ); 213 );
214 /*
215 * Shouldnt happen, we returned above if in_interrupt():
216 */
217 WARN_ON_ONCE(softirq_count());
214 } 218 }
215 219
216 local_irq_restore(flags); 220 local_irq_restore(flags);
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index a76e93146585..2dd928a84645 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -107,7 +107,7 @@ int nmi_active;
107static __init void nmi_cpu_busy(void *data) 107static __init void nmi_cpu_busy(void *data)
108{ 108{
109 volatile int *endflag = data; 109 volatile int *endflag = data;
110 local_irq_enable(); 110 local_irq_enable_in_hardirq();
111 /* Intentionally don't use cpu_relax here. This is 111 /* Intentionally don't use cpu_relax here. This is
112 to make sure that the performance counter really ticks, 112 to make sure that the performance counter really ticks,
113 even if there is a simulator or similar that catches the 113 even if there is a simulator or similar that catches the
diff --git a/arch/i386/kernel/stacktrace.c b/arch/i386/kernel/stacktrace.c
new file mode 100644
index 000000000000..e62a037ab399
--- /dev/null
+++ b/arch/i386/kernel/stacktrace.c
@@ -0,0 +1,98 @@
1/*
2 * arch/i386/kernel/stacktrace.c
3 *
4 * Stack trace management functions
5 *
6 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7 */
8#include <linux/sched.h>
9#include <linux/stacktrace.h>
10
11static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
12{
13 return p > (void *)tinfo &&
14 p < (void *)tinfo + THREAD_SIZE - 3;
15}
16
17/*
18 * Save stack-backtrace addresses into a stack_trace buffer:
19 */
20static inline unsigned long
21save_context_stack(struct stack_trace *trace, unsigned int skip,
22 struct thread_info *tinfo, unsigned long *stack,
23 unsigned long ebp)
24{
25 unsigned long addr;
26
27#ifdef CONFIG_FRAME_POINTER
28 while (valid_stack_ptr(tinfo, (void *)ebp)) {
29 addr = *(unsigned long *)(ebp + 4);
30 if (!skip)
31 trace->entries[trace->nr_entries++] = addr;
32 else
33 skip--;
34 if (trace->nr_entries >= trace->max_entries)
35 break;
36 /*
37 * break out of recursive entries (such as
38 * end_of_stack_stop_unwind_function):
39 */
40 if (ebp == *(unsigned long *)ebp)
41 break;
42
43 ebp = *(unsigned long *)ebp;
44 }
45#else
46 while (valid_stack_ptr(tinfo, stack)) {
47 addr = *stack++;
48 if (__kernel_text_address(addr)) {
49 if (!skip)
50 trace->entries[trace->nr_entries++] = addr;
51 else
52 skip--;
53 if (trace->nr_entries >= trace->max_entries)
54 break;
55 }
56 }
57#endif
58
59 return ebp;
60}
61
62/*
63 * Save stack-backtrace addresses into a stack_trace buffer.
64 * If all_contexts is set, all contexts (hardirq, softirq and process)
65 * are saved. If not set then only the current context is saved.
66 */
67void save_stack_trace(struct stack_trace *trace,
68 struct task_struct *task, int all_contexts,
69 unsigned int skip)
70{
71 unsigned long ebp;
72 unsigned long *stack = &ebp;
73
74 WARN_ON(trace->nr_entries || !trace->max_entries);
75
76 if (!task || task == current) {
77 /* Grab ebp right from our regs: */
78 asm ("movl %%ebp, %0" : "=r" (ebp));
79 } else {
80 /* ebp is the last reg pushed by switch_to(): */
81 ebp = *(unsigned long *) task->thread.esp;
82 }
83
84 while (1) {
85 struct thread_info *context = (struct thread_info *)
86 ((unsigned long)stack & (~(THREAD_SIZE - 1)));
87
88 ebp = save_context_stack(trace, skip, context, stack, ebp);
89 stack = (unsigned long *)context->previous_esp;
90 if (!all_contexts || !stack ||
91 trace->nr_entries >= trace->max_entries)
92 break;
93 trace->entries[trace->nr_entries++] = ULONG_MAX;
94 if (trace->nr_entries >= trace->max_entries)
95 break;
96 }
97}
98
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index e8c6086b2aa1..2bf8b55b91f8 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -115,28 +115,13 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
115} 115}
116 116
117/* 117/*
118 * Print CONFIG_STACK_BACKTRACE_COLS address/symbol entries per line. 118 * Print one address/symbol entries per line.
119 */ 119 */
120static inline int print_addr_and_symbol(unsigned long addr, char *log_lvl, 120static inline void print_addr_and_symbol(unsigned long addr, char *log_lvl)
121 int printed)
122{ 121{
123 if (!printed)
124 printk(log_lvl);
125
126#if CONFIG_STACK_BACKTRACE_COLS == 1
127 printk(" [<%08lx>] ", addr); 122 printk(" [<%08lx>] ", addr);
128#else
129 printk(" <%08lx> ", addr);
130#endif
131 print_symbol("%s", addr);
132 123
133 printed = (printed + 1) % CONFIG_STACK_BACKTRACE_COLS; 124 print_symbol("%s\n", addr);
134 if (printed)
135 printk(" ");
136 else
137 printk("\n");
138
139 return printed;
140} 125}
141 126
142static inline unsigned long print_context_stack(struct thread_info *tinfo, 127static inline unsigned long print_context_stack(struct thread_info *tinfo,
@@ -144,12 +129,11 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
144 char *log_lvl) 129 char *log_lvl)
145{ 130{
146 unsigned long addr; 131 unsigned long addr;
147 int printed = 0; /* nr of entries already printed on current line */
148 132
149#ifdef CONFIG_FRAME_POINTER 133#ifdef CONFIG_FRAME_POINTER
150 while (valid_stack_ptr(tinfo, (void *)ebp)) { 134 while (valid_stack_ptr(tinfo, (void *)ebp)) {
151 addr = *(unsigned long *)(ebp + 4); 135 addr = *(unsigned long *)(ebp + 4);
152 printed = print_addr_and_symbol(addr, log_lvl, printed); 136 print_addr_and_symbol(addr, log_lvl);
153 /* 137 /*
154 * break out of recursive entries (such as 138 * break out of recursive entries (such as
155 * end_of_stack_stop_unwind_function): 139 * end_of_stack_stop_unwind_function):
@@ -162,28 +146,23 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
162 while (valid_stack_ptr(tinfo, stack)) { 146 while (valid_stack_ptr(tinfo, stack)) {
163 addr = *stack++; 147 addr = *stack++;
164 if (__kernel_text_address(addr)) 148 if (__kernel_text_address(addr))
165 printed = print_addr_and_symbol(addr, log_lvl, printed); 149 print_addr_and_symbol(addr, log_lvl);
166 } 150 }
167#endif 151#endif
168 if (printed)
169 printk("\n");
170
171 return ebp; 152 return ebp;
172} 153}
173 154
174static asmlinkage int show_trace_unwind(struct unwind_frame_info *info, void *log_lvl) 155static asmlinkage int
156show_trace_unwind(struct unwind_frame_info *info, void *log_lvl)
175{ 157{
176 int n = 0; 158 int n = 0;
177 int printed = 0; /* nr of entries already printed on current line */
178 159
179 while (unwind(info) == 0 && UNW_PC(info)) { 160 while (unwind(info) == 0 && UNW_PC(info)) {
180 ++n; 161 n++;
181 printed = print_addr_and_symbol(UNW_PC(info), log_lvl, printed); 162 print_addr_and_symbol(UNW_PC(info), log_lvl);
182 if (arch_unw_user_mode(info)) 163 if (arch_unw_user_mode(info))
183 break; 164 break;
184 } 165 }
185 if (printed)
186 printk("\n");
187 return n; 166 return n;
188} 167}
189 168
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index eb8e8dc5ac8e..2fbe4536fe18 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -678,7 +678,7 @@ copy_reg(const u64 *fr, u64 fnat, u64 *tr, u64 *tnat)
678 */ 678 */
679 679
680static void 680static void
681ia64_mca_modify_comm(const task_t *previous_current) 681ia64_mca_modify_comm(const struct task_struct *previous_current)
682{ 682{
683 char *p, comm[sizeof(current->comm)]; 683 char *p, comm[sizeof(current->comm)];
684 if (previous_current->pid) 684 if (previous_current->pid)
@@ -709,7 +709,7 @@ ia64_mca_modify_comm(const task_t *previous_current)
709 * that we can do backtrace on the MCA/INIT handler code itself. 709 * that we can do backtrace on the MCA/INIT handler code itself.
710 */ 710 */
711 711
712static task_t * 712static struct task_struct *
713ia64_mca_modify_original_stack(struct pt_regs *regs, 713ia64_mca_modify_original_stack(struct pt_regs *regs,
714 const struct switch_stack *sw, 714 const struct switch_stack *sw,
715 struct ia64_sal_os_state *sos, 715 struct ia64_sal_os_state *sos,
@@ -719,7 +719,7 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
719 ia64_va va; 719 ia64_va va;
720 extern char ia64_leave_kernel[]; /* Need asm address, not function descriptor */ 720 extern char ia64_leave_kernel[]; /* Need asm address, not function descriptor */
721 const pal_min_state_area_t *ms = sos->pal_min_state; 721 const pal_min_state_area_t *ms = sos->pal_min_state;
722 task_t *previous_current; 722 struct task_struct *previous_current;
723 struct pt_regs *old_regs; 723 struct pt_regs *old_regs;
724 struct switch_stack *old_sw; 724 struct switch_stack *old_sw;
725 unsigned size = sizeof(struct pt_regs) + 725 unsigned size = sizeof(struct pt_regs) +
@@ -1023,7 +1023,7 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
1023 pal_processor_state_info_t *psp = (pal_processor_state_info_t *) 1023 pal_processor_state_info_t *psp = (pal_processor_state_info_t *)
1024 &sos->proc_state_param; 1024 &sos->proc_state_param;
1025 int recover, cpu = smp_processor_id(); 1025 int recover, cpu = smp_processor_id();
1026 task_t *previous_current; 1026 struct task_struct *previous_current;
1027 struct ia64_mca_notify_die nd = 1027 struct ia64_mca_notify_die nd =
1028 { .sos = sos, .monarch_cpu = &monarch_cpu }; 1028 { .sos = sos, .monarch_cpu = &monarch_cpu };
1029 1029
@@ -1352,7 +1352,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
1352{ 1352{
1353 static atomic_t slaves; 1353 static atomic_t slaves;
1354 static atomic_t monarchs; 1354 static atomic_t monarchs;
1355 task_t *previous_current; 1355 struct task_struct *previous_current;
1356 int cpu = smp_processor_id(); 1356 int cpu = smp_processor_id();
1357 struct ia64_mca_notify_die nd = 1357 struct ia64_mca_notify_die nd =
1358 { .sos = sos, .monarch_cpu = &monarch_cpu }; 1358 { .sos = sos, .monarch_cpu = &monarch_cpu };
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index e1960979be29..6203ed4ec8cf 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -124,7 +124,7 @@ extern void __devinit calibrate_delay (void);
124extern void start_ap (void); 124extern void start_ap (void);
125extern unsigned long ia64_iobase; 125extern unsigned long ia64_iobase;
126 126
127task_t *task_for_booting_cpu; 127struct task_struct *task_for_booting_cpu;
128 128
129/* 129/*
130 * State for each CPU 130 * State for each CPU
diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index ecfd637d702a..01e7fa86aa43 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S
@@ -65,7 +65,7 @@ need_resched:
65#endif 65#endif
66 66
67FEXPORT(ret_from_fork) 67FEXPORT(ret_from_fork)
68 jal schedule_tail # a0 = task_t *prev 68 jal schedule_tail # a0 = struct task_struct *prev
69 69
70FEXPORT(syscall_exit) 70FEXPORT(syscall_exit)
71 local_irq_disable # make sure need_resched and 71 local_irq_disable # make sure need_resched and
diff --git a/arch/mips/kernel/mips-mt.c b/arch/mips/kernel/mips-mt.c
index 02237a685ec7..4dcc39f42951 100644
--- a/arch/mips/kernel/mips-mt.c
+++ b/arch/mips/kernel/mips-mt.c
@@ -47,7 +47,7 @@ unsigned long mt_fpemul_threshold = 0;
47 * used in sys_sched_set/getaffinity() in kernel/sched.c, so 47 * used in sys_sched_set/getaffinity() in kernel/sched.c, so
48 * cloned here. 48 * cloned here.
49 */ 49 */
50static inline task_t *find_process_by_pid(pid_t pid) 50static inline struct task_struct *find_process_by_pid(pid_t pid)
51{ 51{
52 return pid ? find_task_by_pid(pid) : current; 52 return pid ? find_task_by_pid(pid) : current;
53} 53}
@@ -62,7 +62,7 @@ asmlinkage long mipsmt_sys_sched_setaffinity(pid_t pid, unsigned int len,
62 cpumask_t new_mask; 62 cpumask_t new_mask;
63 cpumask_t effective_mask; 63 cpumask_t effective_mask;
64 int retval; 64 int retval;
65 task_t *p; 65 struct task_struct *p;
66 66
67 if (len < sizeof(new_mask)) 67 if (len < sizeof(new_mask))
68 return -EINVAL; 68 return -EINVAL;
@@ -127,7 +127,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len,
127 unsigned int real_len; 127 unsigned int real_len;
128 cpumask_t mask; 128 cpumask_t mask;
129 int retval; 129 int retval;
130 task_t *p; 130 struct task_struct *p;
131 131
132 real_len = sizeof(mask); 132 real_len = sizeof(mask);
133 if (len < real_len) 133 if (len < real_len)
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 05a700940f67..8cf987809c66 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -306,13 +306,8 @@ void do_softirq(void)
306 306
307 local_irq_save(flags); 307 local_irq_save(flags);
308 308
309 if (local_softirq_pending()) { 309 if (local_softirq_pending())
310 account_system_vtime(current);
311 local_bh_disable();
312 do_softirq_onstack(); 310 do_softirq_onstack();
313 account_system_vtime(current);
314 __local_bh_enable();
315 }
316 311
317 local_irq_restore(flags); 312 local_irq_restore(flags);
318} 313}
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 821a141889de..224fbff79969 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -7,6 +7,14 @@ config MMU
7 bool 7 bool
8 default y 8 default y
9 9
10config LOCKDEP_SUPPORT
11 bool
12 default y
13
14config STACKTRACE_SUPPORT
15 bool
16 default y
17
10config RWSEM_GENERIC_SPINLOCK 18config RWSEM_GENERIC_SPINLOCK
11 bool 19 bool
12 20
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
index f53b6d5300e5..2283933a9a93 100644
--- a/arch/s390/Kconfig.debug
+++ b/arch/s390/Kconfig.debug
@@ -1,5 +1,9 @@
1menu "Kernel hacking" 1menu "Kernel hacking"
2 2
3config TRACE_IRQFLAGS_SUPPORT
4 bool
5 default y
6
3source "lib/Kconfig.debug" 7source "lib/Kconfig.debug"
4 8
5endmenu 9endmenu
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index b3791fb094a8..74ef57dcfa60 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -34,6 +34,11 @@ cflags-$(CONFIG_MARCH_G5) += $(call cc-option,-march=g5)
34cflags-$(CONFIG_MARCH_Z900) += $(call cc-option,-march=z900) 34cflags-$(CONFIG_MARCH_Z900) += $(call cc-option,-march=z900)
35cflags-$(CONFIG_MARCH_Z990) += $(call cc-option,-march=z990) 35cflags-$(CONFIG_MARCH_Z990) += $(call cc-option,-march=z990)
36 36
37#
38# Prevent tail-call optimizations, to get clearer backtraces:
39#
40cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls
41
37# old style option for packed stacks 42# old style option for packed stacks
38ifeq ($(call cc-option-yn,-mkernel-backchain),y) 43ifeq ($(call cc-option-yn,-mkernel-backchain),y)
39cflags-$(CONFIG_PACK_STACK) += -mkernel-backchain -D__PACK_STACK 44cflags-$(CONFIG_PACK_STACK) += -mkernel-backchain -D__PACK_STACK
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 9269b5788fac..eabf00a6f770 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o \
21obj-$(CONFIG_BINFMT_ELF32) += binfmt_elf32.o 21obj-$(CONFIG_BINFMT_ELF32) += binfmt_elf32.o
22 22
23obj-$(CONFIG_VIRT_TIMER) += vtime.o 23obj-$(CONFIG_VIRT_TIMER) += vtime.o
24obj-$(CONFIG_STACKTRACE) += stacktrace.o
24 25
25# Kexec part 26# Kexec part
26S390_KEXEC_OBJS := machine_kexec.o crash.o 27S390_KEXEC_OBJS := machine_kexec.o crash.o
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index d8948c342caf..5b5799ac8f83 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -58,6 +58,21 @@ STACK_SIZE = 1 << STACK_SHIFT
58 58
59#define BASED(name) name-system_call(%r13) 59#define BASED(name) name-system_call(%r13)
60 60
61#ifdef CONFIG_TRACE_IRQFLAGS
62 .macro TRACE_IRQS_ON
63 l %r1,BASED(.Ltrace_irq_on)
64 basr %r14,%r1
65 .endm
66
67 .macro TRACE_IRQS_OFF
68 l %r1,BASED(.Ltrace_irq_off)
69 basr %r14,%r1
70 .endm
71#else
72#define TRACE_IRQS_ON
73#define TRACE_IRQS_OFF
74#endif
75
61/* 76/*
62 * Register usage in interrupt handlers: 77 * Register usage in interrupt handlers:
63 * R9 - pointer to current task structure 78 * R9 - pointer to current task structure
@@ -361,6 +376,7 @@ ret_from_fork:
361 st %r15,SP_R15(%r15) # store stack pointer for new kthread 376 st %r15,SP_R15(%r15) # store stack pointer for new kthread
3620: l %r1,BASED(.Lschedtail) 3770: l %r1,BASED(.Lschedtail)
363 basr %r14,%r1 378 basr %r14,%r1
379 TRACE_IRQS_ON
364 stosm __SF_EMPTY(%r15),0x03 # reenable interrupts 380 stosm __SF_EMPTY(%r15),0x03 # reenable interrupts
365 b BASED(sysc_return) 381 b BASED(sysc_return)
366 382
@@ -516,6 +532,7 @@ pgm_no_vtime3:
516 mvc __THREAD_per+__PER_address(4,%r1),__LC_PER_ADDRESS 532 mvc __THREAD_per+__PER_address(4,%r1),__LC_PER_ADDRESS
517 mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID 533 mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID
518 oi __TI_flags+3(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP 534 oi __TI_flags+3(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP
535 TRACE_IRQS_ON
519 stosm __SF_EMPTY(%r15),0x03 # reenable interrupts 536 stosm __SF_EMPTY(%r15),0x03 # reenable interrupts
520 b BASED(sysc_do_svc) 537 b BASED(sysc_do_svc)
521 538
@@ -539,9 +556,11 @@ io_int_handler:
539io_no_vtime: 556io_no_vtime:
540#endif 557#endif
541 l %r9,__LC_THREAD_INFO # load pointer to thread_info struct 558 l %r9,__LC_THREAD_INFO # load pointer to thread_info struct
559 TRACE_IRQS_OFF
542 l %r1,BASED(.Ldo_IRQ) # load address of do_IRQ 560 l %r1,BASED(.Ldo_IRQ) # load address of do_IRQ
543 la %r2,SP_PTREGS(%r15) # address of register-save area 561 la %r2,SP_PTREGS(%r15) # address of register-save area
544 basr %r14,%r1 # branch to standard irq handler 562 basr %r14,%r1 # branch to standard irq handler
563 TRACE_IRQS_ON
545 564
546io_return: 565io_return:
547 tm SP_PSW+1(%r15),0x01 # returning to user ? 566 tm SP_PSW+1(%r15),0x01 # returning to user ?
@@ -651,10 +670,12 @@ ext_int_handler:
651ext_no_vtime: 670ext_no_vtime:
652#endif 671#endif
653 l %r9,__LC_THREAD_INFO # load pointer to thread_info struct 672 l %r9,__LC_THREAD_INFO # load pointer to thread_info struct
673 TRACE_IRQS_OFF
654 la %r2,SP_PTREGS(%r15) # address of register-save area 674 la %r2,SP_PTREGS(%r15) # address of register-save area
655 lh %r3,__LC_EXT_INT_CODE # get interruption code 675 lh %r3,__LC_EXT_INT_CODE # get interruption code
656 l %r1,BASED(.Ldo_extint) 676 l %r1,BASED(.Ldo_extint)
657 basr %r14,%r1 677 basr %r14,%r1
678 TRACE_IRQS_ON
658 b BASED(io_return) 679 b BASED(io_return)
659 680
660__critical_end: 681__critical_end:
@@ -731,8 +752,10 @@ mcck_no_vtime:
731 stosm __SF_EMPTY(%r15),0x04 # turn dat on 752 stosm __SF_EMPTY(%r15),0x04 # turn dat on
732 tm __TI_flags+3(%r9),_TIF_MCCK_PENDING 753 tm __TI_flags+3(%r9),_TIF_MCCK_PENDING
733 bno BASED(mcck_return) 754 bno BASED(mcck_return)
755 TRACE_IRQS_OFF
734 l %r1,BASED(.Ls390_handle_mcck) 756 l %r1,BASED(.Ls390_handle_mcck)
735 basr %r14,%r1 # call machine check handler 757 basr %r14,%r1 # call machine check handler
758 TRACE_IRQS_ON
736mcck_return: 759mcck_return:
737 mvc __LC_RETURN_MCCK_PSW(8),SP_PSW(%r15) # move return PSW 760 mvc __LC_RETURN_MCCK_PSW(8),SP_PSW(%r15) # move return PSW
738 ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit 761 ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit
@@ -1012,7 +1035,11 @@ cleanup_io_leave_insn:
1012.Lvfork: .long sys_vfork 1035.Lvfork: .long sys_vfork
1013.Lschedtail: .long schedule_tail 1036.Lschedtail: .long schedule_tail
1014.Lsysc_table: .long sys_call_table 1037.Lsysc_table: .long sys_call_table
1015 1038#ifdef CONFIG_TRACE_IRQFLAGS
1039.Ltrace_irq_on:.long trace_hardirqs_on
1040.Ltrace_irq_off:
1041 .long trace_hardirqs_off
1042#endif
1016.Lcritical_start: 1043.Lcritical_start:
1017 .long __critical_start + 0x80000000 1044 .long __critical_start + 0x80000000
1018.Lcritical_end: 1045.Lcritical_end:
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 1ca499fa54b4..56f5f613b868 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -58,6 +58,19 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NEED_RESCHED | \
58 58
59#define BASED(name) name-system_call(%r13) 59#define BASED(name) name-system_call(%r13)
60 60
61#ifdef CONFIG_TRACE_IRQFLAGS
62 .macro TRACE_IRQS_ON
63 brasl %r14,trace_hardirqs_on
64 .endm
65
66 .macro TRACE_IRQS_OFF
67 brasl %r14,trace_hardirqs_off
68 .endm
69#else
70#define TRACE_IRQS_ON
71#define TRACE_IRQS_OFF
72#endif
73
61 .macro STORE_TIMER lc_offset 74 .macro STORE_TIMER lc_offset
62#ifdef CONFIG_VIRT_CPU_ACCOUNTING 75#ifdef CONFIG_VIRT_CPU_ACCOUNTING
63 stpt \lc_offset 76 stpt \lc_offset
@@ -354,6 +367,7 @@ ret_from_fork:
354 jo 0f 367 jo 0f
355 stg %r15,SP_R15(%r15) # store stack pointer for new kthread 368 stg %r15,SP_R15(%r15) # store stack pointer for new kthread
3560: brasl %r14,schedule_tail 3690: brasl %r14,schedule_tail
370 TRACE_IRQS_ON
357 stosm 24(%r15),0x03 # reenable interrupts 371 stosm 24(%r15),0x03 # reenable interrupts
358 j sysc_return 372 j sysc_return
359 373
@@ -535,6 +549,7 @@ pgm_no_vtime3:
535 mvc __THREAD_per+__PER_address(8,%r1),__LC_PER_ADDRESS 549 mvc __THREAD_per+__PER_address(8,%r1),__LC_PER_ADDRESS
536 mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID 550 mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID
537 oi __TI_flags+7(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP 551 oi __TI_flags+7(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP
552 TRACE_IRQS_ON
538 stosm __SF_EMPTY(%r15),0x03 # reenable interrupts 553 stosm __SF_EMPTY(%r15),0x03 # reenable interrupts
539 j sysc_do_svc 554 j sysc_do_svc
540 555
@@ -557,8 +572,10 @@ io_int_handler:
557io_no_vtime: 572io_no_vtime:
558#endif 573#endif
559 lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct 574 lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct
575 TRACE_IRQS_OFF
560 la %r2,SP_PTREGS(%r15) # address of register-save area 576 la %r2,SP_PTREGS(%r15) # address of register-save area
561 brasl %r14,do_IRQ # call standard irq handler 577 brasl %r14,do_IRQ # call standard irq handler
578 TRACE_IRQS_ON
562 579
563io_return: 580io_return:
564 tm SP_PSW+1(%r15),0x01 # returning to user ? 581 tm SP_PSW+1(%r15),0x01 # returning to user ?
@@ -665,9 +682,11 @@ ext_int_handler:
665ext_no_vtime: 682ext_no_vtime:
666#endif 683#endif
667 lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct 684 lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct
685 TRACE_IRQS_OFF
668 la %r2,SP_PTREGS(%r15) # address of register-save area 686 la %r2,SP_PTREGS(%r15) # address of register-save area
669 llgh %r3,__LC_EXT_INT_CODE # get interruption code 687 llgh %r3,__LC_EXT_INT_CODE # get interruption code
670 brasl %r14,do_extint 688 brasl %r14,do_extint
689 TRACE_IRQS_ON
671 j io_return 690 j io_return
672 691
673__critical_end: 692__critical_end:
@@ -743,7 +762,9 @@ mcck_no_vtime:
743 stosm __SF_EMPTY(%r15),0x04 # turn dat on 762 stosm __SF_EMPTY(%r15),0x04 # turn dat on
744 tm __TI_flags+7(%r9),_TIF_MCCK_PENDING 763 tm __TI_flags+7(%r9),_TIF_MCCK_PENDING
745 jno mcck_return 764 jno mcck_return
765 TRACE_IRQS_OFF
746 brasl %r14,s390_handle_mcck 766 brasl %r14,s390_handle_mcck
767 TRACE_IRQS_ON
747mcck_return: 768mcck_return:
748 mvc __LC_RETURN_MCCK_PSW(16),SP_PSW(%r15) # move return PSW 769 mvc __LC_RETURN_MCCK_PSW(16),SP_PSW(%r15) # move return PSW
749 ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit 770 ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 480b6a5fef3a..1eef50918615 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -69,10 +69,6 @@ asmlinkage void do_softirq(void)
69 69
70 local_irq_save(flags); 70 local_irq_save(flags);
71 71
72 account_system_vtime(current);
73
74 local_bh_disable();
75
76 if (local_softirq_pending()) { 72 if (local_softirq_pending()) {
77 /* Get current stack pointer. */ 73 /* Get current stack pointer. */
78 asm volatile("la %0,0(15)" : "=a" (old)); 74 asm volatile("la %0,0(15)" : "=a" (old));
@@ -95,10 +91,6 @@ asmlinkage void do_softirq(void)
95 __do_softirq(); 91 __do_softirq();
96 } 92 }
97 93
98 account_system_vtime(current);
99
100 __local_bh_enable();
101
102 local_irq_restore(flags); 94 local_irq_restore(flags);
103} 95}
104 96
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 1f9399191794..78c8e5548caf 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -142,6 +142,7 @@ static void default_idle(void)
142 return; 142 return;
143 } 143 }
144 144
145 trace_hardirqs_on();
145 /* Wait for external, I/O or machine check interrupt. */ 146 /* Wait for external, I/O or machine check interrupt. */
146 __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_WAIT | 147 __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_WAIT |
147 PSW_MASK_IO | PSW_MASK_EXT); 148 PSW_MASK_IO | PSW_MASK_EXT);
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
new file mode 100644
index 000000000000..de83f38288d0
--- /dev/null
+++ b/arch/s390/kernel/stacktrace.c
@@ -0,0 +1,90 @@
1/*
2 * arch/s390/kernel/stacktrace.c
3 *
4 * Stack trace management functions
5 *
6 * Copyright (C) IBM Corp. 2006
7 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
8 */
9
10#include <linux/sched.h>
11#include <linux/stacktrace.h>
12#include <linux/kallsyms.h>
13
14static inline unsigned long save_context_stack(struct stack_trace *trace,
15 unsigned int *skip,
16 unsigned long sp,
17 unsigned long low,
18 unsigned long high)
19{
20 struct stack_frame *sf;
21 struct pt_regs *regs;
22 unsigned long addr;
23
24 while(1) {
25 sp &= PSW_ADDR_INSN;
26 if (sp < low || sp > high)
27 return sp;
28 sf = (struct stack_frame *)sp;
29 while(1) {
30 addr = sf->gprs[8] & PSW_ADDR_INSN;
31 if (!(*skip))
32 trace->entries[trace->nr_entries++] = addr;
33 else
34 (*skip)--;
35 if (trace->nr_entries >= trace->max_entries)
36 return sp;
37 low = sp;
38 sp = sf->back_chain & PSW_ADDR_INSN;
39 if (!sp)
40 break;
41 if (sp <= low || sp > high - sizeof(*sf))
42 return sp;
43 sf = (struct stack_frame *)sp;
44 }
45 /* Zero backchain detected, check for interrupt frame. */
46 sp = (unsigned long)(sf + 1);
47 if (sp <= low || sp > high - sizeof(*regs))
48 return sp;
49 regs = (struct pt_regs *)sp;
50 addr = regs->psw.addr & PSW_ADDR_INSN;
51 if (!(*skip))
52 trace->entries[trace->nr_entries++] = addr;
53 else
54 (*skip)--;
55 if (trace->nr_entries >= trace->max_entries)
56 return sp;
57 low = sp;
58 sp = regs->gprs[15];
59 }
60}
61
62void save_stack_trace(struct stack_trace *trace,
63 struct task_struct *task, int all_contexts,
64 unsigned int skip)
65{
66 register unsigned long sp asm ("15");
67 unsigned long orig_sp;
68
69 sp &= PSW_ADDR_INSN;
70 orig_sp = sp;
71
72 sp = save_context_stack(trace, &skip, sp,
73 S390_lowcore.panic_stack - PAGE_SIZE,
74 S390_lowcore.panic_stack);
75 if ((sp != orig_sp) && !all_contexts)
76 return;
77 sp = save_context_stack(trace, &skip, sp,
78 S390_lowcore.async_stack - ASYNC_SIZE,
79 S390_lowcore.async_stack);
80 if ((sp != orig_sp) && !all_contexts)
81 return;
82 if (task)
83 save_context_stack(trace, &skip, sp,
84 (unsigned long) task_stack_page(task),
85 (unsigned long) task_stack_page(task) + THREAD_SIZE);
86 else
87 save_context_stack(trace, &skip, sp, S390_lowcore.thread_info,
88 S390_lowcore.thread_info + THREAD_SIZE);
89 return;
90}
diff --git a/arch/um/kernel/tt/process_kern.c b/arch/um/kernel/tt/process_kern.c
index a9c1443fc548..8368c2dbe635 100644
--- a/arch/um/kernel/tt/process_kern.c
+++ b/arch/um/kernel/tt/process_kern.c
@@ -119,7 +119,7 @@ void suspend_new_thread(int fd)
119 panic("read failed in suspend_new_thread, err = %d", -err); 119 panic("read failed in suspend_new_thread, err = %d", -err);
120} 120}
121 121
122void schedule_tail(task_t *prev); 122void schedule_tail(struct task_struct *prev);
123 123
124static void new_thread_handler(int sig) 124static void new_thread_handler(int sig)
125{ 125{
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 7d51dd7201c3..37cfe7701f06 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -495,6 +495,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
495{ 495{
496} 496}
497 497
498#ifdef CONFIG_SMP
498void alternatives_smp_module_add(struct module *mod, char *name, 499void alternatives_smp_module_add(struct module *mod, char *name,
499 void *locks, void *locks_end, 500 void *locks, void *locks_end,
500 void *text, void *text_end) 501 void *text, void *text_end)
@@ -504,3 +505,4 @@ void alternatives_smp_module_add(struct module *mod, char *name,
504void alternatives_smp_module_del(struct module *mod) 505void alternatives_smp_module_del(struct module *mod)
505{ 506{
506} 507}
508#endif
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index e856804c447f..28df7d88ce2c 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -24,6 +24,14 @@ config X86
24 bool 24 bool
25 default y 25 default y
26 26
27config LOCKDEP_SUPPORT
28 bool
29 default y
30
31config STACKTRACE_SUPPORT
32 bool
33 default y
34
27config SEMAPHORE_SLEEPERS 35config SEMAPHORE_SLEEPERS
28 bool 36 bool
29 default y 37 default y
diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug
index 1d92ab56c0f9..775d211a5cf9 100644
--- a/arch/x86_64/Kconfig.debug
+++ b/arch/x86_64/Kconfig.debug
@@ -1,5 +1,9 @@
1menu "Kernel hacking" 1menu "Kernel hacking"
2 2
3config TRACE_IRQFLAGS_SUPPORT
4 bool
5 default y
6
3source "lib/Kconfig.debug" 7source "lib/Kconfig.debug"
4 8
5config DEBUG_RODATA 9config DEBUG_RODATA
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index c536fa98ea37..9b5bb413a6e9 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -13,6 +13,7 @@
13#include <asm/thread_info.h> 13#include <asm/thread_info.h>
14#include <asm/segment.h> 14#include <asm/segment.h>
15#include <asm/vsyscall32.h> 15#include <asm/vsyscall32.h>
16#include <asm/irqflags.h>
16#include <linux/linkage.h> 17#include <linux/linkage.h>
17 18
18#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) 19#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
@@ -75,6 +76,10 @@ ENTRY(ia32_sysenter_target)
75 swapgs 76 swapgs
76 movq %gs:pda_kernelstack, %rsp 77 movq %gs:pda_kernelstack, %rsp
77 addq $(PDA_STACKOFFSET),%rsp 78 addq $(PDA_STACKOFFSET),%rsp
79 /*
80 * No need to follow this irqs on/off section: the syscall
81 * disabled irqs, here we enable it straight after entry:
82 */
78 sti 83 sti
79 movl %ebp,%ebp /* zero extension */ 84 movl %ebp,%ebp /* zero extension */
80 pushq $__USER32_DS 85 pushq $__USER32_DS
@@ -118,6 +123,7 @@ sysenter_do_call:
118 movq %rax,RAX-ARGOFFSET(%rsp) 123 movq %rax,RAX-ARGOFFSET(%rsp)
119 GET_THREAD_INFO(%r10) 124 GET_THREAD_INFO(%r10)
120 cli 125 cli
126 TRACE_IRQS_OFF
121 testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) 127 testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
122 jnz int_ret_from_sys_call 128 jnz int_ret_from_sys_call
123 andl $~TS_COMPAT,threadinfo_status(%r10) 129 andl $~TS_COMPAT,threadinfo_status(%r10)
@@ -132,6 +138,7 @@ sysenter_do_call:
132 CFI_REGISTER rsp,rcx 138 CFI_REGISTER rsp,rcx
133 movl $VSYSCALL32_SYSEXIT,%edx /* User %eip */ 139 movl $VSYSCALL32_SYSEXIT,%edx /* User %eip */
134 CFI_REGISTER rip,rdx 140 CFI_REGISTER rip,rdx
141 TRACE_IRQS_ON
135 swapgs 142 swapgs
136 sti /* sti only takes effect after the next instruction */ 143 sti /* sti only takes effect after the next instruction */
137 /* sysexit */ 144 /* sysexit */
@@ -186,6 +193,10 @@ ENTRY(ia32_cstar_target)
186 movl %esp,%r8d 193 movl %esp,%r8d
187 CFI_REGISTER rsp,r8 194 CFI_REGISTER rsp,r8
188 movq %gs:pda_kernelstack,%rsp 195 movq %gs:pda_kernelstack,%rsp
196 /*
197 * No need to follow this irqs on/off section: the syscall
198 * disabled irqs and here we enable it straight after entry:
199 */
189 sti 200 sti
190 SAVE_ARGS 8,1,1 201 SAVE_ARGS 8,1,1
191 movl %eax,%eax /* zero extension */ 202 movl %eax,%eax /* zero extension */
@@ -220,6 +231,7 @@ cstar_do_call:
220 movq %rax,RAX-ARGOFFSET(%rsp) 231 movq %rax,RAX-ARGOFFSET(%rsp)
221 GET_THREAD_INFO(%r10) 232 GET_THREAD_INFO(%r10)
222 cli 233 cli
234 TRACE_IRQS_OFF
223 testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) 235 testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
224 jnz int_ret_from_sys_call 236 jnz int_ret_from_sys_call
225 andl $~TS_COMPAT,threadinfo_status(%r10) 237 andl $~TS_COMPAT,threadinfo_status(%r10)
@@ -228,6 +240,7 @@ cstar_do_call:
228 CFI_REGISTER rip,rcx 240 CFI_REGISTER rip,rcx
229 movl EFLAGS-ARGOFFSET(%rsp),%r11d 241 movl EFLAGS-ARGOFFSET(%rsp),%r11d
230 /*CFI_REGISTER rflags,r11*/ 242 /*CFI_REGISTER rflags,r11*/
243 TRACE_IRQS_ON
231 movl RSP-ARGOFFSET(%rsp),%esp 244 movl RSP-ARGOFFSET(%rsp),%esp
232 CFI_RESTORE rsp 245 CFI_RESTORE rsp
233 swapgs 246 swapgs
@@ -286,7 +299,11 @@ ENTRY(ia32_syscall)
286 /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ 299 /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/
287 /*CFI_REL_OFFSET cs,CS-RIP*/ 300 /*CFI_REL_OFFSET cs,CS-RIP*/
288 CFI_REL_OFFSET rip,RIP-RIP 301 CFI_REL_OFFSET rip,RIP-RIP
289 swapgs 302 swapgs
303 /*
304 * No need to follow this irqs on/off section: the syscall
305 * disabled irqs and here we enable it straight after entry:
306 */
290 sti 307 sti
291 movl %eax,%eax 308 movl %eax,%eax
292 pushq %rax 309 pushq %rax
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index 819e84ec5b64..b5aaeafc1cd3 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -10,6 +10,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \
10 setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ 10 setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
11 pci-dma.o pci-nommu.o alternative.o 11 pci-dma.o pci-nommu.o alternative.o
12 12
13obj-$(CONFIG_STACKTRACE) += stacktrace.o
13obj-$(CONFIG_X86_MCE) += mce.o 14obj-$(CONFIG_X86_MCE) += mce.o
14obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o 15obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
15obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o 16obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index ed92c2983606..d464dded68c0 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -41,13 +41,24 @@
41#include <asm/thread_info.h> 41#include <asm/thread_info.h>
42#include <asm/hw_irq.h> 42#include <asm/hw_irq.h>
43#include <asm/page.h> 43#include <asm/page.h>
44#include <asm/irqflags.h>
44 45
45 .code64 46 .code64
46 47
47#ifndef CONFIG_PREEMPT 48#ifndef CONFIG_PREEMPT
48#define retint_kernel retint_restore_args 49#define retint_kernel retint_restore_args
49#endif 50#endif
50 51
52
53.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
54#ifdef CONFIG_TRACE_IRQFLAGS
55 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
56 jnc 1f
57 TRACE_IRQS_ON
581:
59#endif
60.endm
61
51/* 62/*
52 * C code is not supposed to know about undefined top of stack. Every time 63 * C code is not supposed to know about undefined top of stack. Every time
53 * a C function with an pt_regs argument is called from the SYSCALL based 64 * a C function with an pt_regs argument is called from the SYSCALL based
@@ -194,6 +205,10 @@ ENTRY(system_call)
194 swapgs 205 swapgs
195 movq %rsp,%gs:pda_oldrsp 206 movq %rsp,%gs:pda_oldrsp
196 movq %gs:pda_kernelstack,%rsp 207 movq %gs:pda_kernelstack,%rsp
208 /*
209 * No need to follow this irqs off/on section - it's straight
210 * and short:
211 */
197 sti 212 sti
198 SAVE_ARGS 8,1 213 SAVE_ARGS 8,1
199 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) 214 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
@@ -219,10 +234,15 @@ ret_from_sys_call:
219sysret_check: 234sysret_check:
220 GET_THREAD_INFO(%rcx) 235 GET_THREAD_INFO(%rcx)
221 cli 236 cli
237 TRACE_IRQS_OFF
222 movl threadinfo_flags(%rcx),%edx 238 movl threadinfo_flags(%rcx),%edx
223 andl %edi,%edx 239 andl %edi,%edx
224 CFI_REMEMBER_STATE 240 CFI_REMEMBER_STATE
225 jnz sysret_careful 241 jnz sysret_careful
242 /*
243 * sysretq will re-enable interrupts:
244 */
245 TRACE_IRQS_ON
226 movq RIP-ARGOFFSET(%rsp),%rcx 246 movq RIP-ARGOFFSET(%rsp),%rcx
227 CFI_REGISTER rip,rcx 247 CFI_REGISTER rip,rcx
228 RESTORE_ARGS 0,-ARG_SKIP,1 248 RESTORE_ARGS 0,-ARG_SKIP,1
@@ -237,6 +257,7 @@ sysret_careful:
237 CFI_RESTORE_STATE 257 CFI_RESTORE_STATE
238 bt $TIF_NEED_RESCHED,%edx 258 bt $TIF_NEED_RESCHED,%edx
239 jnc sysret_signal 259 jnc sysret_signal
260 TRACE_IRQS_ON
240 sti 261 sti
241 pushq %rdi 262 pushq %rdi
242 CFI_ADJUST_CFA_OFFSET 8 263 CFI_ADJUST_CFA_OFFSET 8
@@ -247,6 +268,7 @@ sysret_careful:
247 268
248 /* Handle a signal */ 269 /* Handle a signal */
249sysret_signal: 270sysret_signal:
271 TRACE_IRQS_ON
250 sti 272 sti
251 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx 273 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
252 jz 1f 274 jz 1f
@@ -261,6 +283,7 @@ sysret_signal:
261 /* Use IRET because user could have changed frame. This 283 /* Use IRET because user could have changed frame. This
262 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ 284 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
263 cli 285 cli
286 TRACE_IRQS_OFF
264 jmp int_with_check 287 jmp int_with_check
265 288
266badsys: 289badsys:
@@ -309,6 +332,7 @@ ENTRY(int_ret_from_sys_call)
309 CFI_REL_OFFSET r10,R10-ARGOFFSET 332 CFI_REL_OFFSET r10,R10-ARGOFFSET
310 CFI_REL_OFFSET r11,R11-ARGOFFSET 333 CFI_REL_OFFSET r11,R11-ARGOFFSET
311 cli 334 cli
335 TRACE_IRQS_OFF
312 testl $3,CS-ARGOFFSET(%rsp) 336 testl $3,CS-ARGOFFSET(%rsp)
313 je retint_restore_args 337 je retint_restore_args
314 movl $_TIF_ALLWORK_MASK,%edi 338 movl $_TIF_ALLWORK_MASK,%edi
@@ -327,6 +351,7 @@ int_with_check:
327int_careful: 351int_careful:
328 bt $TIF_NEED_RESCHED,%edx 352 bt $TIF_NEED_RESCHED,%edx
329 jnc int_very_careful 353 jnc int_very_careful
354 TRACE_IRQS_ON
330 sti 355 sti
331 pushq %rdi 356 pushq %rdi
332 CFI_ADJUST_CFA_OFFSET 8 357 CFI_ADJUST_CFA_OFFSET 8
@@ -334,10 +359,12 @@ int_careful:
334 popq %rdi 359 popq %rdi
335 CFI_ADJUST_CFA_OFFSET -8 360 CFI_ADJUST_CFA_OFFSET -8
336 cli 361 cli
362 TRACE_IRQS_OFF
337 jmp int_with_check 363 jmp int_with_check
338 364
339 /* handle signals and tracing -- both require a full stack frame */ 365 /* handle signals and tracing -- both require a full stack frame */
340int_very_careful: 366int_very_careful:
367 TRACE_IRQS_ON
341 sti 368 sti
342 SAVE_REST 369 SAVE_REST
343 /* Check for syscall exit trace */ 370 /* Check for syscall exit trace */
@@ -351,6 +378,7 @@ int_very_careful:
351 CFI_ADJUST_CFA_OFFSET -8 378 CFI_ADJUST_CFA_OFFSET -8
352 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi 379 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
353 cli 380 cli
381 TRACE_IRQS_OFF
354 jmp int_restore_rest 382 jmp int_restore_rest
355 383
356int_signal: 384int_signal:
@@ -363,6 +391,7 @@ int_signal:
363int_restore_rest: 391int_restore_rest:
364 RESTORE_REST 392 RESTORE_REST
365 cli 393 cli
394 TRACE_IRQS_OFF
366 jmp int_with_check 395 jmp int_with_check
367 CFI_ENDPROC 396 CFI_ENDPROC
368END(int_ret_from_sys_call) 397END(int_ret_from_sys_call)
@@ -484,6 +513,10 @@ END(stub_rt_sigreturn)
484 swapgs 513 swapgs
4851: incl %gs:pda_irqcount # RED-PEN should check preempt count 5141: incl %gs:pda_irqcount # RED-PEN should check preempt count
486 cmoveq %gs:pda_irqstackptr,%rsp 515 cmoveq %gs:pda_irqstackptr,%rsp
516 /*
517 * We entered an interrupt context - irqs are off:
518 */
519 TRACE_IRQS_OFF
487 call \func 520 call \func
488 .endm 521 .endm
489 522
@@ -493,6 +526,7 @@ ENTRY(common_interrupt)
493 /* 0(%rsp): oldrsp-ARGOFFSET */ 526 /* 0(%rsp): oldrsp-ARGOFFSET */
494ret_from_intr: 527ret_from_intr:
495 cli 528 cli
529 TRACE_IRQS_OFF
496 decl %gs:pda_irqcount 530 decl %gs:pda_irqcount
497 leaveq 531 leaveq
498 CFI_DEF_CFA_REGISTER rsp 532 CFI_DEF_CFA_REGISTER rsp
@@ -515,9 +549,21 @@ retint_check:
515 CFI_REMEMBER_STATE 549 CFI_REMEMBER_STATE
516 jnz retint_careful 550 jnz retint_careful
517retint_swapgs: 551retint_swapgs:
552 /*
553 * The iretq could re-enable interrupts:
554 */
555 cli
556 TRACE_IRQS_IRETQ
518 swapgs 557 swapgs
558 jmp restore_args
559
519retint_restore_args: 560retint_restore_args:
520 cli 561 cli
562 /*
563 * The iretq could re-enable interrupts:
564 */
565 TRACE_IRQS_IRETQ
566restore_args:
521 RESTORE_ARGS 0,8,0 567 RESTORE_ARGS 0,8,0
522iret_label: 568iret_label:
523 iretq 569 iretq
@@ -530,6 +576,7 @@ iret_label:
530 /* running with kernel gs */ 576 /* running with kernel gs */
531bad_iret: 577bad_iret:
532 movq $11,%rdi /* SIGSEGV */ 578 movq $11,%rdi /* SIGSEGV */
579 TRACE_IRQS_ON
533 sti 580 sti
534 jmp do_exit 581 jmp do_exit
535 .previous 582 .previous
@@ -539,6 +586,7 @@ retint_careful:
539 CFI_RESTORE_STATE 586 CFI_RESTORE_STATE
540 bt $TIF_NEED_RESCHED,%edx 587 bt $TIF_NEED_RESCHED,%edx
541 jnc retint_signal 588 jnc retint_signal
589 TRACE_IRQS_ON
542 sti 590 sti
543 pushq %rdi 591 pushq %rdi
544 CFI_ADJUST_CFA_OFFSET 8 592 CFI_ADJUST_CFA_OFFSET 8
@@ -547,11 +595,13 @@ retint_careful:
547 CFI_ADJUST_CFA_OFFSET -8 595 CFI_ADJUST_CFA_OFFSET -8
548 GET_THREAD_INFO(%rcx) 596 GET_THREAD_INFO(%rcx)
549 cli 597 cli
598 TRACE_IRQS_OFF
550 jmp retint_check 599 jmp retint_check
551 600
552retint_signal: 601retint_signal:
553 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx 602 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
554 jz retint_swapgs 603 jz retint_swapgs
604 TRACE_IRQS_ON
555 sti 605 sti
556 SAVE_REST 606 SAVE_REST
557 movq $-1,ORIG_RAX(%rsp) 607 movq $-1,ORIG_RAX(%rsp)
@@ -560,6 +610,7 @@ retint_signal:
560 call do_notify_resume 610 call do_notify_resume
561 RESTORE_REST 611 RESTORE_REST
562 cli 612 cli
613 TRACE_IRQS_OFF
563 movl $_TIF_NEED_RESCHED,%edi 614 movl $_TIF_NEED_RESCHED,%edi
564 GET_THREAD_INFO(%rcx) 615 GET_THREAD_INFO(%rcx)
565 jmp retint_check 616 jmp retint_check
@@ -666,7 +717,7 @@ END(spurious_interrupt)
666 717
667 /* error code is on the stack already */ 718 /* error code is on the stack already */
668 /* handle NMI like exceptions that can happen everywhere */ 719 /* handle NMI like exceptions that can happen everywhere */
669 .macro paranoidentry sym, ist=0 720 .macro paranoidentry sym, ist=0, irqtrace=1
670 SAVE_ALL 721 SAVE_ALL
671 cld 722 cld
672 movl $1,%ebx 723 movl $1,%ebx
@@ -691,8 +742,73 @@ END(spurious_interrupt)
691 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) 742 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
692 .endif 743 .endif
693 cli 744 cli
745 .if \irqtrace
746 TRACE_IRQS_OFF
747 .endif
694 .endm 748 .endm
695 749
750 /*
751 * "Paranoid" exit path from exception stack.
752 * Paranoid because this is used by NMIs and cannot take
753 * any kernel state for granted.
754 * We don't do kernel preemption checks here, because only
755 * NMI should be common and it does not enable IRQs and
756 * cannot get reschedule ticks.
757 *
758 * "trace" is 0 for the NMI handler only, because irq-tracing
759 * is fundamentally NMI-unsafe. (we cannot change the soft and
760 * hard flags at once, atomically)
761 */
762 .macro paranoidexit trace=1
763 /* ebx: no swapgs flag */
764paranoid_exit\trace:
765 testl %ebx,%ebx /* swapgs needed? */
766 jnz paranoid_restore\trace
767 testl $3,CS(%rsp)
768 jnz paranoid_userspace\trace
769paranoid_swapgs\trace:
770 TRACE_IRQS_IRETQ 0
771 swapgs
772paranoid_restore\trace:
773 RESTORE_ALL 8
774 iretq
775paranoid_userspace\trace:
776 GET_THREAD_INFO(%rcx)
777 movl threadinfo_flags(%rcx),%ebx
778 andl $_TIF_WORK_MASK,%ebx
779 jz paranoid_swapgs\trace
780 movq %rsp,%rdi /* &pt_regs */
781 call sync_regs
782 movq %rax,%rsp /* switch stack for scheduling */
783 testl $_TIF_NEED_RESCHED,%ebx
784 jnz paranoid_schedule\trace
785 movl %ebx,%edx /* arg3: thread flags */
786 .if \trace
787 TRACE_IRQS_ON
788 .endif
789 sti
790 xorl %esi,%esi /* arg2: oldset */
791 movq %rsp,%rdi /* arg1: &pt_regs */
792 call do_notify_resume
793 cli
794 .if \trace
795 TRACE_IRQS_OFF
796 .endif
797 jmp paranoid_userspace\trace
798paranoid_schedule\trace:
799 .if \trace
800 TRACE_IRQS_ON
801 .endif
802 sti
803 call schedule
804 cli
805 .if \trace
806 TRACE_IRQS_OFF
807 .endif
808 jmp paranoid_userspace\trace
809 CFI_ENDPROC
810 .endm
811
696/* 812/*
697 * Exception entry point. This expects an error code/orig_rax on the stack 813 * Exception entry point. This expects an error code/orig_rax on the stack
698 * and the exception handler in %rax. 814 * and the exception handler in %rax.
@@ -748,6 +864,7 @@ error_exit:
748 movl %ebx,%eax 864 movl %ebx,%eax
749 RESTORE_REST 865 RESTORE_REST
750 cli 866 cli
867 TRACE_IRQS_OFF
751 GET_THREAD_INFO(%rcx) 868 GET_THREAD_INFO(%rcx)
752 testl %eax,%eax 869 testl %eax,%eax
753 jne retint_kernel 870 jne retint_kernel
@@ -755,6 +872,10 @@ error_exit:
755 movl $_TIF_WORK_MASK,%edi 872 movl $_TIF_WORK_MASK,%edi
756 andl %edi,%edx 873 andl %edi,%edx
757 jnz retint_careful 874 jnz retint_careful
875 /*
876 * The iret might restore flags:
877 */
878 TRACE_IRQS_IRETQ
758 swapgs 879 swapgs
759 RESTORE_ARGS 0,8,0 880 RESTORE_ARGS 0,8,0
760 jmp iret_label 881 jmp iret_label
@@ -916,8 +1037,7 @@ KPROBE_ENTRY(debug)
916 pushq $0 1037 pushq $0
917 CFI_ADJUST_CFA_OFFSET 8 1038 CFI_ADJUST_CFA_OFFSET 8
918 paranoidentry do_debug, DEBUG_STACK 1039 paranoidentry do_debug, DEBUG_STACK
919 jmp paranoid_exit 1040 paranoidexit
920 CFI_ENDPROC
921END(debug) 1041END(debug)
922 .previous .text 1042 .previous .text
923 1043
@@ -926,49 +1046,13 @@ KPROBE_ENTRY(nmi)
926 INTR_FRAME 1046 INTR_FRAME
927 pushq $-1 1047 pushq $-1
928 CFI_ADJUST_CFA_OFFSET 8 1048 CFI_ADJUST_CFA_OFFSET 8
929 paranoidentry do_nmi 1049 paranoidentry do_nmi, 0, 0
930 /* 1050#ifdef CONFIG_TRACE_IRQFLAGS
931 * "Paranoid" exit path from exception stack. 1051 paranoidexit 0
932 * Paranoid because this is used by NMIs and cannot take 1052#else
933 * any kernel state for granted. 1053 jmp paranoid_exit1
934 * We don't do kernel preemption checks here, because only 1054 CFI_ENDPROC
935 * NMI should be common and it does not enable IRQs and 1055#endif
936 * cannot get reschedule ticks.
937 */
938 /* ebx: no swapgs flag */
939paranoid_exit:
940 testl %ebx,%ebx /* swapgs needed? */
941 jnz paranoid_restore
942 testl $3,CS(%rsp)
943 jnz paranoid_userspace
944paranoid_swapgs:
945 swapgs
946paranoid_restore:
947 RESTORE_ALL 8
948 iretq
949paranoid_userspace:
950 GET_THREAD_INFO(%rcx)
951 movl threadinfo_flags(%rcx),%ebx
952 andl $_TIF_WORK_MASK,%ebx
953 jz paranoid_swapgs
954 movq %rsp,%rdi /* &pt_regs */
955 call sync_regs
956 movq %rax,%rsp /* switch stack for scheduling */
957 testl $_TIF_NEED_RESCHED,%ebx
958 jnz paranoid_schedule
959 movl %ebx,%edx /* arg3: thread flags */
960 sti
961 xorl %esi,%esi /* arg2: oldset */
962 movq %rsp,%rdi /* arg1: &pt_regs */
963 call do_notify_resume
964 cli
965 jmp paranoid_userspace
966paranoid_schedule:
967 sti
968 call schedule
969 cli
970 jmp paranoid_userspace
971 CFI_ENDPROC
972END(nmi) 1056END(nmi)
973 .previous .text 1057 .previous .text
974 1058
@@ -977,7 +1061,7 @@ KPROBE_ENTRY(int3)
977 pushq $0 1061 pushq $0
978 CFI_ADJUST_CFA_OFFSET 8 1062 CFI_ADJUST_CFA_OFFSET 8
979 paranoidentry do_int3, DEBUG_STACK 1063 paranoidentry do_int3, DEBUG_STACK
980 jmp paranoid_exit 1064 jmp paranoid_exit1
981 CFI_ENDPROC 1065 CFI_ENDPROC
982END(int3) 1066END(int3)
983 .previous .text 1067 .previous .text
@@ -1006,7 +1090,7 @@ END(reserved)
1006ENTRY(double_fault) 1090ENTRY(double_fault)
1007 XCPT_FRAME 1091 XCPT_FRAME
1008 paranoidentry do_double_fault 1092 paranoidentry do_double_fault
1009 jmp paranoid_exit 1093 jmp paranoid_exit1
1010 CFI_ENDPROC 1094 CFI_ENDPROC
1011END(double_fault) 1095END(double_fault)
1012 1096
@@ -1022,7 +1106,7 @@ END(segment_not_present)
1022ENTRY(stack_segment) 1106ENTRY(stack_segment)
1023 XCPT_FRAME 1107 XCPT_FRAME
1024 paranoidentry do_stack_segment 1108 paranoidentry do_stack_segment
1025 jmp paranoid_exit 1109 jmp paranoid_exit1
1026 CFI_ENDPROC 1110 CFI_ENDPROC
1027END(stack_segment) 1111END(stack_segment)
1028 1112
@@ -1050,7 +1134,7 @@ ENTRY(machine_check)
1050 pushq $0 1134 pushq $0
1051 CFI_ADJUST_CFA_OFFSET 8 1135 CFI_ADJUST_CFA_OFFSET 8
1052 paranoidentry do_machine_check 1136 paranoidentry do_machine_check
1053 jmp paranoid_exit 1137 jmp paranoid_exit1
1054 CFI_ENDPROC 1138 CFI_ENDPROC
1055END(machine_check) 1139END(machine_check)
1056#endif 1140#endif
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index e6a71c9556d9..36647ce6aecb 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -85,6 +85,11 @@ void __init x86_64_start_kernel(char * real_mode_data)
85 clear_bss(); 85 clear_bss();
86 86
87 /* 87 /*
88 * This must be called really, really early:
89 */
90 lockdep_init();
91
92 /*
88 * switch to init_level4_pgt from boot_level4_pgt 93 * switch to init_level4_pgt from boot_level4_pgt
89 */ 94 */
90 memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t)); 95 memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t));
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
index a1f1df5f7bfc..5221a53e90c1 100644
--- a/arch/x86_64/kernel/irq.c
+++ b/arch/x86_64/kernel/irq.c
@@ -177,8 +177,10 @@ asmlinkage void do_softirq(void)
177 local_irq_save(flags); 177 local_irq_save(flags);
178 pending = local_softirq_pending(); 178 pending = local_softirq_pending();
179 /* Switch to interrupt stack */ 179 /* Switch to interrupt stack */
180 if (pending) 180 if (pending) {
181 call_softirq(); 181 call_softirq();
182 WARN_ON_ONCE(softirq_count());
183 }
182 local_irq_restore(flags); 184 local_irq_restore(flags);
183} 185}
184EXPORT_SYMBOL(do_softirq); 186EXPORT_SYMBOL(do_softirq);
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 476c1472fc07..5baa0c726e97 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -127,7 +127,7 @@ void __cpuinit nmi_watchdog_default(void)
127static __init void nmi_cpu_busy(void *data) 127static __init void nmi_cpu_busy(void *data)
128{ 128{
129 volatile int *endflag = data; 129 volatile int *endflag = data;
130 local_irq_enable(); 130 local_irq_enable_in_hardirq();
131 /* Intentionally don't use cpu_relax here. This is 131 /* Intentionally don't use cpu_relax here. This is
132 to make sure that the performance counter really ticks, 132 to make sure that the performance counter really ticks,
133 even if there is a simulator or similar that catches the 133 even if there is a simulator or similar that catches the
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index ca56e19b8b6e..bb6745d13b8f 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -296,7 +296,7 @@ void __show_regs(struct pt_regs * regs)
296 system_utsname.version); 296 system_utsname.version);
297 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); 297 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
298 printk_address(regs->rip); 298 printk_address(regs->rip);
299 printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, 299 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
300 regs->eflags); 300 regs->eflags);
301 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", 301 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
302 regs->rax, regs->rbx, regs->rcx); 302 regs->rax, regs->rbx, regs->rcx);
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 9705a6a384f1..b7c705969791 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -775,6 +775,8 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
775 }; 775 };
776 DECLARE_WORK(work, do_fork_idle, &c_idle); 776 DECLARE_WORK(work, do_fork_idle, &c_idle);
777 777
778 lockdep_set_class(&c_idle.done.wait.lock, &waitqueue_lock_key);
779
778 /* allocate memory for gdts of secondary cpus. Hotplug is considered */ 780 /* allocate memory for gdts of secondary cpus. Hotplug is considered */
779 if (!cpu_gdt_descr[cpu].address && 781 if (!cpu_gdt_descr[cpu].address &&
780 !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) { 782 !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) {
diff --git a/arch/x86_64/kernel/stacktrace.c b/arch/x86_64/kernel/stacktrace.c
new file mode 100644
index 000000000000..32cf55eb9af8
--- /dev/null
+++ b/arch/x86_64/kernel/stacktrace.c
@@ -0,0 +1,221 @@
1/*
2 * arch/x86_64/kernel/stacktrace.c
3 *
4 * Stack trace management functions
5 *
6 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7 */
8#include <linux/sched.h>
9#include <linux/stacktrace.h>
10
11#include <asm/smp.h>
12
13static inline int
14in_range(unsigned long start, unsigned long addr, unsigned long end)
15{
16 return addr >= start && addr <= end;
17}
18
19static unsigned long
20get_stack_end(struct task_struct *task, unsigned long stack)
21{
22 unsigned long stack_start, stack_end, flags;
23 int i, cpu;
24
25 /*
26 * The most common case is that we are in the task stack:
27 */
28 stack_start = (unsigned long)task->thread_info;
29 stack_end = stack_start + THREAD_SIZE;
30
31 if (in_range(stack_start, stack, stack_end))
32 return stack_end;
33
34 /*
35 * We are in an interrupt if irqstackptr is set:
36 */
37 raw_local_irq_save(flags);
38 cpu = safe_smp_processor_id();
39 stack_end = (unsigned long)cpu_pda(cpu)->irqstackptr;
40
41 if (stack_end) {
42 stack_start = stack_end & ~(IRQSTACKSIZE-1);
43 if (in_range(stack_start, stack, stack_end))
44 goto out_restore;
45 /*
46 * We get here if we are in an IRQ context but we
47 * are also in an exception stack.
48 */
49 }
50
51 /*
52 * Iterate over all exception stacks, and figure out whether
53 * 'stack' is in one of them:
54 */
55 for (i = 0; i < N_EXCEPTION_STACKS; i++) {
56 /*
57 * set 'end' to the end of the exception stack.
58 */
59 stack_end = per_cpu(init_tss, cpu).ist[i];
60 stack_start = stack_end - EXCEPTION_STKSZ;
61
62 /*
63 * Is 'stack' above this exception frame's end?
64 * If yes then skip to the next frame.
65 */
66 if (stack >= stack_end)
67 continue;
68 /*
69 * Is 'stack' above this exception frame's start address?
70 * If yes then we found the right frame.
71 */
72 if (stack >= stack_start)
73 goto out_restore;
74
75 /*
76 * If this is a debug stack, and if it has a larger size than
77 * the usual exception stacks, then 'stack' might still
78 * be within the lower portion of the debug stack:
79 */
80#if DEBUG_STKSZ > EXCEPTION_STKSZ
81 if (i == DEBUG_STACK - 1 && stack >= stack_end - DEBUG_STKSZ) {
82 /*
83 * Black magic. A large debug stack is composed of
84 * multiple exception stack entries, which we
85 * iterate through now. Dont look:
86 */
87 do {
88 stack_end -= EXCEPTION_STKSZ;
89 stack_start -= EXCEPTION_STKSZ;
90 } while (stack < stack_start);
91
92 goto out_restore;
93 }
94#endif
95 }
96 /*
97 * Ok, 'stack' is not pointing to any of the system stacks.
98 */
99 stack_end = 0;
100
101out_restore:
102 raw_local_irq_restore(flags);
103
104 return stack_end;
105}
106
107
108/*
109 * Save stack-backtrace addresses into a stack_trace buffer:
110 */
111static inline unsigned long
112save_context_stack(struct stack_trace *trace, unsigned int skip,
113 unsigned long stack, unsigned long stack_end)
114{
115 unsigned long addr;
116
117#ifdef CONFIG_FRAME_POINTER
118 unsigned long prev_stack = 0;
119
120 while (in_range(prev_stack, stack, stack_end)) {
121 pr_debug("stack: %p\n", (void *)stack);
122 addr = (unsigned long)(((unsigned long *)stack)[1]);
123 pr_debug("addr: %p\n", (void *)addr);
124 if (!skip)
125 trace->entries[trace->nr_entries++] = addr-1;
126 else
127 skip--;
128 if (trace->nr_entries >= trace->max_entries)
129 break;
130 if (!addr)
131 return 0;
132 /*
133 * Stack frames must go forwards (otherwise a loop could
134 * happen if the stackframe is corrupted), so we move
135 * prev_stack forwards:
136 */
137 prev_stack = stack;
138 stack = (unsigned long)(((unsigned long *)stack)[0]);
139 }
140 pr_debug("invalid: %p\n", (void *)stack);
141#else
142 while (stack < stack_end) {
143 addr = ((unsigned long *)stack)[0];
144 stack += sizeof(long);
145 if (__kernel_text_address(addr)) {
146 if (!skip)
147 trace->entries[trace->nr_entries++] = addr-1;
148 else
149 skip--;
150 if (trace->nr_entries >= trace->max_entries)
151 break;
152 }
153 }
154#endif
155 return stack;
156}
157
158#define MAX_STACKS 10
159
160/*
161 * Save stack-backtrace addresses into a stack_trace buffer.
162 * If all_contexts is set, all contexts (hardirq, softirq and process)
163 * are saved. If not set then only the current context is saved.
164 */
165void save_stack_trace(struct stack_trace *trace,
166 struct task_struct *task, int all_contexts,
167 unsigned int skip)
168{
169 unsigned long stack = (unsigned long)&stack;
170 int i, nr_stacks = 0, stacks_done[MAX_STACKS];
171
172 WARN_ON(trace->nr_entries || !trace->max_entries);
173
174 if (!task)
175 task = current;
176
177 pr_debug("task: %p, ti: %p\n", task, task->thread_info);
178
179 if (!task || task == current) {
180 /* Grab rbp right from our regs: */
181 asm ("mov %%rbp, %0" : "=r" (stack));
182 pr_debug("rbp: %p\n", (void *)stack);
183 } else {
184 /* rbp is the last reg pushed by switch_to(): */
185 stack = task->thread.rsp;
186 pr_debug("other task rsp: %p\n", (void *)stack);
187 stack = (unsigned long)(((unsigned long *)stack)[0]);
188 pr_debug("other task rbp: %p\n", (void *)stack);
189 }
190
191 while (1) {
192 unsigned long stack_end = get_stack_end(task, stack);
193
194 pr_debug("stack: %p\n", (void *)stack);
195 pr_debug("stack end: %p\n", (void *)stack_end);
196
197 /*
198 * Invalid stack addres?
199 */
200 if (!stack_end)
201 return;
202 /*
203 * Were we in this stack already? (recursion)
204 */
205 for (i = 0; i < nr_stacks; i++)
206 if (stacks_done[i] == stack_end)
207 return;
208 stacks_done[nr_stacks] = stack_end;
209
210 stack = save_context_stack(trace, skip, stack, stack_end);
211 if (!all_contexts || !stack ||
212 trace->nr_entries >= trace->max_entries)
213 return;
214 trace->entries[trace->nr_entries++] = ULONG_MAX;
215 if (trace->nr_entries >= trace->max_entries)
216 return;
217 if (++nr_stacks >= MAX_STACKS)
218 return;
219 }
220}
221
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 5a5311d3de0f..79d05c482072 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -110,28 +110,31 @@ static int kstack_depth_to_print = 12;
110static int call_trace = 1; 110static int call_trace = 1;
111 111
112#ifdef CONFIG_KALLSYMS 112#ifdef CONFIG_KALLSYMS
113#include <linux/kallsyms.h> 113# include <linux/kallsyms.h>
114int printk_address(unsigned long address) 114void printk_address(unsigned long address)
115{ 115{
116 unsigned long offset = 0, symsize; 116 unsigned long offset = 0, symsize;
117 const char *symname; 117 const char *symname;
118 char *modname; 118 char *modname;
119 char *delim = ":"; 119 char *delim = ":";
120 char namebuf[128]; 120 char namebuf[128];
121 121
122 symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf); 122 symname = kallsyms_lookup(address, &symsize, &offset,
123 if (!symname) 123 &modname, namebuf);
124 return printk("[<%016lx>]", address); 124 if (!symname) {
125 if (!modname) 125 printk(" [<%016lx>]\n", address);
126 return;
127 }
128 if (!modname)
126 modname = delim = ""; 129 modname = delim = "";
127 return printk("<%016lx>{%s%s%s%s%+ld}", 130 printk(" [<%016lx>] %s%s%s%s+0x%lx/0x%lx\n",
128 address, delim, modname, delim, symname, offset); 131 address, delim, modname, delim, symname, offset, symsize);
129} 132}
130#else 133#else
131int printk_address(unsigned long address) 134void printk_address(unsigned long address)
132{ 135{
133 return printk("[<%016lx>]", address); 136 printk(" [<%016lx>]\n", address);
134} 137}
135#endif 138#endif
136 139
137static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, 140static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
@@ -149,10 +152,22 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
149 }; 152 };
150 unsigned k; 153 unsigned k;
151 154
155 /*
156 * Iterate over all exception stacks, and figure out whether
157 * 'stack' is in one of them:
158 */
152 for (k = 0; k < N_EXCEPTION_STACKS; k++) { 159 for (k = 0; k < N_EXCEPTION_STACKS; k++) {
153 unsigned long end; 160 unsigned long end;
154 161
162 /*
163 * set 'end' to the end of the exception stack.
164 */
155 switch (k + 1) { 165 switch (k + 1) {
166 /*
167 * TODO: this block is not needed i think, because
168 * setup64.c:cpu_init() sets up t->ist[DEBUG_STACK]
169 * properly too.
170 */
156#if DEBUG_STKSZ > EXCEPTION_STKSZ 171#if DEBUG_STKSZ > EXCEPTION_STKSZ
157 case DEBUG_STACK: 172 case DEBUG_STACK:
158 end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ; 173 end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ;
@@ -162,19 +177,43 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
162 end = per_cpu(init_tss, cpu).ist[k]; 177 end = per_cpu(init_tss, cpu).ist[k];
163 break; 178 break;
164 } 179 }
180 /*
181 * Is 'stack' above this exception frame's end?
182 * If yes then skip to the next frame.
183 */
165 if (stack >= end) 184 if (stack >= end)
166 continue; 185 continue;
186 /*
187 * Is 'stack' above this exception frame's start address?
188 * If yes then we found the right frame.
189 */
167 if (stack >= end - EXCEPTION_STKSZ) { 190 if (stack >= end - EXCEPTION_STKSZ) {
191 /*
192 * Make sure we only iterate through an exception
193 * stack once. If it comes up for the second time
194 * then there's something wrong going on - just
195 * break out and return NULL:
196 */
168 if (*usedp & (1U << k)) 197 if (*usedp & (1U << k))
169 break; 198 break;
170 *usedp |= 1U << k; 199 *usedp |= 1U << k;
171 *idp = ids[k]; 200 *idp = ids[k];
172 return (unsigned long *)end; 201 return (unsigned long *)end;
173 } 202 }
203 /*
204 * If this is a debug stack, and if it has a larger size than
205 * the usual exception stacks, then 'stack' might still
206 * be within the lower portion of the debug stack:
207 */
174#if DEBUG_STKSZ > EXCEPTION_STKSZ 208#if DEBUG_STKSZ > EXCEPTION_STKSZ
175 if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) { 209 if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) {
176 unsigned j = N_EXCEPTION_STACKS - 1; 210 unsigned j = N_EXCEPTION_STACKS - 1;
177 211
212 /*
213 * Black magic. A large debug stack is composed of
214 * multiple exception stack entries, which we
215 * iterate through now. Dont look:
216 */
178 do { 217 do {
179 ++j; 218 ++j;
180 end -= EXCEPTION_STKSZ; 219 end -= EXCEPTION_STKSZ;
@@ -193,20 +232,14 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
193 232
194static int show_trace_unwind(struct unwind_frame_info *info, void *context) 233static int show_trace_unwind(struct unwind_frame_info *info, void *context)
195{ 234{
196 int i = 11, n = 0; 235 int n = 0;
197 236
198 while (unwind(info) == 0 && UNW_PC(info)) { 237 while (unwind(info) == 0 && UNW_PC(info)) {
199 ++n; 238 n++;
200 if (i > 50) { 239 printk_address(UNW_PC(info));
201 printk("\n ");
202 i = 7;
203 } else
204 i += printk(" ");
205 i += printk_address(UNW_PC(info));
206 if (arch_unw_user_mode(info)) 240 if (arch_unw_user_mode(info))
207 break; 241 break;
208 } 242 }
209 printk("\n");
210 return n; 243 return n;
211} 244}
212 245
@@ -224,7 +257,7 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
224 int i = 11; 257 int i = 11;
225 unsigned used = 0; 258 unsigned used = 0;
226 259
227 printk("\nCall Trace:"); 260 printk("\nCall Trace:\n");
228 261
229 if (!tsk) 262 if (!tsk)
230 tsk = current; 263 tsk = current;
@@ -250,16 +283,15 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
250 } 283 }
251 } 284 }
252 285
286 /*
287 * Print function call entries within a stack. 'cond' is the
288 * "end of stackframe" condition, that the 'stack++'
289 * iteration will eventually trigger.
290 */
253#define HANDLE_STACK(cond) \ 291#define HANDLE_STACK(cond) \
254 do while (cond) { \ 292 do while (cond) { \
255 unsigned long addr = *stack++; \ 293 unsigned long addr = *stack++; \
256 if (kernel_text_address(addr)) { \ 294 if (kernel_text_address(addr)) { \
257 if (i > 50) { \
258 printk("\n "); \
259 i = 0; \
260 } \
261 else \
262 i += printk(" "); \
263 /* \ 295 /* \
264 * If the address is either in the text segment of the \ 296 * If the address is either in the text segment of the \
265 * kernel, or in the region which contains vmalloc'ed \ 297 * kernel, or in the region which contains vmalloc'ed \
@@ -268,20 +300,30 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
268 * down the cause of the crash will be able to figure \ 300 * down the cause of the crash will be able to figure \
269 * out the call path that was taken. \ 301 * out the call path that was taken. \
270 */ \ 302 */ \
271 i += printk_address(addr); \ 303 printk_address(addr); \
272 } \ 304 } \
273 } while (0) 305 } while (0)
274 306
275 for(; ; ) { 307 /*
308 * Print function call entries in all stacks, starting at the
309 * current stack address. If the stacks consist of nested
310 * exceptions
311 */
312 for ( ; ; ) {
276 const char *id; 313 const char *id;
277 unsigned long *estack_end; 314 unsigned long *estack_end;
278 estack_end = in_exception_stack(cpu, (unsigned long)stack, 315 estack_end = in_exception_stack(cpu, (unsigned long)stack,
279 &used, &id); 316 &used, &id);
280 317
281 if (estack_end) { 318 if (estack_end) {
282 i += printk(" <%s>", id); 319 printk(" <%s>", id);
283 HANDLE_STACK (stack < estack_end); 320 HANDLE_STACK (stack < estack_end);
284 i += printk(" <EOE>"); 321 printk(" <EOE>");
322 /*
323 * We link to the next stack via the
324 * second-to-last pointer (index -2 to end) in the
325 * exception stack:
326 */
285 stack = (unsigned long *) estack_end[-2]; 327 stack = (unsigned long *) estack_end[-2];
286 continue; 328 continue;
287 } 329 }
@@ -291,19 +333,28 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
291 (IRQSTACKSIZE - 64) / sizeof(*irqstack); 333 (IRQSTACKSIZE - 64) / sizeof(*irqstack);
292 334
293 if (stack >= irqstack && stack < irqstack_end) { 335 if (stack >= irqstack && stack < irqstack_end) {
294 i += printk(" <IRQ>"); 336 printk(" <IRQ>");
295 HANDLE_STACK (stack < irqstack_end); 337 HANDLE_STACK (stack < irqstack_end);
338 /*
339 * We link to the next stack (which would be
340 * the process stack normally) the last
341 * pointer (index -1 to end) in the IRQ stack:
342 */
296 stack = (unsigned long *) (irqstack_end[-1]); 343 stack = (unsigned long *) (irqstack_end[-1]);
297 irqstack_end = NULL; 344 irqstack_end = NULL;
298 i += printk(" <EOI>"); 345 printk(" <EOI>");
299 continue; 346 continue;
300 } 347 }
301 } 348 }
302 break; 349 break;
303 } 350 }
304 351
352 /*
353 * This prints the process stack:
354 */
305 HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0); 355 HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0);
306#undef HANDLE_STACK 356#undef HANDLE_STACK
357
307 printk("\n"); 358 printk("\n");
308} 359}
309 360
@@ -337,8 +388,8 @@ static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned
337 break; 388 break;
338 } 389 }
339 if (i && ((i % 4) == 0)) 390 if (i && ((i % 4) == 0))
340 printk("\n "); 391 printk("\n");
341 printk("%016lx ", *stack++); 392 printk(" %016lx", *stack++);
342 touch_nmi_watchdog(); 393 touch_nmi_watchdog();
343 } 394 }
344 show_trace(tsk, regs, rsp); 395 show_trace(tsk, regs, rsp);
diff --git a/arch/x86_64/lib/thunk.S b/arch/x86_64/lib/thunk.S
index e49af0032e94..332ea5dff916 100644
--- a/arch/x86_64/lib/thunk.S
+++ b/arch/x86_64/lib/thunk.S
@@ -47,6 +47,11 @@
47 thunk_retrax __down_failed_interruptible,__down_interruptible 47 thunk_retrax __down_failed_interruptible,__down_interruptible
48 thunk_retrax __down_failed_trylock,__down_trylock 48 thunk_retrax __down_failed_trylock,__down_trylock
49 thunk __up_wakeup,__up 49 thunk __up_wakeup,__up
50
51#ifdef CONFIG_TRACE_IRQFLAGS
52 thunk trace_hardirqs_on_thunk,trace_hardirqs_on
53 thunk trace_hardirqs_off_thunk,trace_hardirqs_off
54#endif
50 55
51 /* SAVE_ARGS below is used only for the .cfi directives it contains. */ 56 /* SAVE_ARGS below is used only for the .cfi directives it contains. */
52 CFI_STARTPROC 57 CFI_STARTPROC
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 5afcf6eb00fa..ac8ea66ccb94 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -570,7 +570,6 @@ no_context:
570 printk(KERN_ALERT "Unable to handle kernel paging request"); 570 printk(KERN_ALERT "Unable to handle kernel paging request");
571 printk(" at %016lx RIP: \n" KERN_ALERT,address); 571 printk(" at %016lx RIP: \n" KERN_ALERT,address);
572 printk_address(regs->rip); 572 printk_address(regs->rip);
573 printk("\n");
574 dump_pagetable(address); 573 dump_pagetable(address);
575 tsk->thread.cr2 = address; 574 tsk->thread.cr2 = address;
576 tsk->thread.trap_no = 14; 575 tsk->thread.trap_no = 14;
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 5813d63c20af..ab17c7224bb6 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -2516,7 +2516,7 @@ EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
2516int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk, 2516int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk,
2517 struct request *rq, int at_head) 2517 struct request *rq, int at_head)
2518{ 2518{
2519 DECLARE_COMPLETION(wait); 2519 DECLARE_COMPLETION_ONSTACK(wait);
2520 char sense[SCSI_SENSE_BUFFERSIZE]; 2520 char sense[SCSI_SENSE_BUFFERSIZE];
2521 int err = 0; 2521 int err = 0;
2522 2522
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 0242cbb86a87..5109fa37c662 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -249,18 +249,6 @@ static int irqdma_allocated;
249#include <linux/cdrom.h> /* for the compatibility eject ioctl */ 249#include <linux/cdrom.h> /* for the compatibility eject ioctl */
250#include <linux/completion.h> 250#include <linux/completion.h>
251 251
252/*
253 * Interrupt freeing also means /proc VFS work - dont do it
254 * from interrupt context. We push this work into keventd:
255 */
256static void fd_free_irq_fn(void *data)
257{
258 fd_free_irq();
259}
260
261static DECLARE_WORK(fd_free_irq_work, fd_free_irq_fn, NULL);
262
263
264static struct request *current_req; 252static struct request *current_req;
265static struct request_queue *floppy_queue; 253static struct request_queue *floppy_queue;
266static void do_fd_request(request_queue_t * q); 254static void do_fd_request(request_queue_t * q);
@@ -826,15 +814,6 @@ static int set_dor(int fdc, char mask, char data)
826 UDRS->select_date = jiffies; 814 UDRS->select_date = jiffies;
827 } 815 }
828 } 816 }
829 /*
830 * We should propagate failures to grab the resources back
831 * nicely from here. Actually we ought to rewrite the fd
832 * driver some day too.
833 */
834 if (newdor & FLOPPY_MOTOR_MASK)
835 floppy_grab_irq_and_dma();
836 if (olddor & FLOPPY_MOTOR_MASK)
837 floppy_release_irq_and_dma();
838 return olddor; 817 return olddor;
839} 818}
840 819
@@ -892,8 +871,6 @@ static int _lock_fdc(int drive, int interruptible, int line)
892 line); 871 line);
893 return -1; 872 return -1;
894 } 873 }
895 if (floppy_grab_irq_and_dma() == -1)
896 return -EBUSY;
897 874
898 if (test_and_set_bit(0, &fdc_busy)) { 875 if (test_and_set_bit(0, &fdc_busy)) {
899 DECLARE_WAITQUEUE(wait, current); 876 DECLARE_WAITQUEUE(wait, current);
@@ -915,6 +892,8 @@ static int _lock_fdc(int drive, int interruptible, int line)
915 892
916 set_current_state(TASK_RUNNING); 893 set_current_state(TASK_RUNNING);
917 remove_wait_queue(&fdc_wait, &wait); 894 remove_wait_queue(&fdc_wait, &wait);
895
896 flush_scheduled_work();
918 } 897 }
919 command_status = FD_COMMAND_NONE; 898 command_status = FD_COMMAND_NONE;
920 899
@@ -948,7 +927,6 @@ static inline void unlock_fdc(void)
948 if (elv_next_request(floppy_queue)) 927 if (elv_next_request(floppy_queue))
949 do_fd_request(floppy_queue); 928 do_fd_request(floppy_queue);
950 spin_unlock_irqrestore(&floppy_lock, flags); 929 spin_unlock_irqrestore(&floppy_lock, flags);
951 floppy_release_irq_and_dma();
952 wake_up(&fdc_wait); 930 wake_up(&fdc_wait);
953} 931}
954 932
@@ -3694,8 +3672,8 @@ static int floppy_release(struct inode *inode, struct file *filp)
3694 } 3672 }
3695 if (!UDRS->fd_ref) 3673 if (!UDRS->fd_ref)
3696 opened_bdev[drive] = NULL; 3674 opened_bdev[drive] = NULL;
3697 floppy_release_irq_and_dma();
3698 mutex_unlock(&open_lock); 3675 mutex_unlock(&open_lock);
3676
3699 return 0; 3677 return 0;
3700} 3678}
3701 3679
@@ -3726,9 +3704,6 @@ static int floppy_open(struct inode *inode, struct file *filp)
3726 if (UDRS->fd_ref == -1 || (UDRS->fd_ref && (filp->f_flags & O_EXCL))) 3704 if (UDRS->fd_ref == -1 || (UDRS->fd_ref && (filp->f_flags & O_EXCL)))
3727 goto out2; 3705 goto out2;
3728 3706
3729 if (floppy_grab_irq_and_dma())
3730 goto out2;
3731
3732 if (filp->f_flags & O_EXCL) 3707 if (filp->f_flags & O_EXCL)
3733 UDRS->fd_ref = -1; 3708 UDRS->fd_ref = -1;
3734 else 3709 else
@@ -3805,7 +3780,6 @@ out:
3805 UDRS->fd_ref--; 3780 UDRS->fd_ref--;
3806 if (!UDRS->fd_ref) 3781 if (!UDRS->fd_ref)
3807 opened_bdev[drive] = NULL; 3782 opened_bdev[drive] = NULL;
3808 floppy_release_irq_and_dma();
3809out2: 3783out2:
3810 mutex_unlock(&open_lock); 3784 mutex_unlock(&open_lock);
3811 return res; 3785 return res;
@@ -3822,14 +3796,9 @@ static int check_floppy_change(struct gendisk *disk)
3822 return 1; 3796 return 1;
3823 3797
3824 if (time_after(jiffies, UDRS->last_checked + UDP->checkfreq)) { 3798 if (time_after(jiffies, UDRS->last_checked + UDP->checkfreq)) {
3825 if (floppy_grab_irq_and_dma()) {
3826 return 1;
3827 }
3828
3829 lock_fdc(drive, 0); 3799 lock_fdc(drive, 0);
3830 poll_drive(0, 0); 3800 poll_drive(0, 0);
3831 process_fd_request(); 3801 process_fd_request();
3832 floppy_release_irq_and_dma();
3833 } 3802 }
3834 3803
3835 if (UTESTF(FD_DISK_CHANGED) || 3804 if (UTESTF(FD_DISK_CHANGED) ||
@@ -4346,7 +4315,6 @@ static int __init floppy_init(void)
4346 fdc = 0; 4315 fdc = 0;
4347 del_timer(&fd_timeout); 4316 del_timer(&fd_timeout);
4348 current_drive = 0; 4317 current_drive = 0;
4349 floppy_release_irq_and_dma();
4350 initialising = 0; 4318 initialising = 0;
4351 if (have_no_fdc) { 4319 if (have_no_fdc) {
4352 DPRINT("no floppy controllers found\n"); 4320 DPRINT("no floppy controllers found\n");
@@ -4504,7 +4472,7 @@ static void floppy_release_irq_and_dma(void)
4504 if (irqdma_allocated) { 4472 if (irqdma_allocated) {
4505 fd_disable_dma(); 4473 fd_disable_dma();
4506 fd_free_dma(); 4474 fd_free_dma();
4507 schedule_work(&fd_free_irq_work); 4475 fd_free_irq();
4508 irqdma_allocated = 0; 4476 irqdma_allocated = 0;
4509 } 4477 }
4510 set_dor(0, ~0, 8); 4478 set_dor(0, ~0, 8);
@@ -4600,8 +4568,6 @@ void cleanup_module(void)
4600 /* eject disk, if any */ 4568 /* eject disk, if any */
4601 fd_eject(0); 4569 fd_eject(0);
4602 4570
4603 flush_scheduled_work(); /* fd_free_irq() might be pending */
4604
4605 wait_for_completion(&device_release); 4571 wait_for_completion(&device_release);
4606} 4572}
4607 4573
diff --git a/drivers/char/agp/frontend.c b/drivers/char/agp/frontend.c
index ffcf15c30e90..d9c5a9142ad1 100644
--- a/drivers/char/agp/frontend.c
+++ b/drivers/char/agp/frontend.c
@@ -1059,7 +1059,7 @@ ioctl_out:
1059 return ret_val; 1059 return ret_val;
1060} 1060}
1061 1061
1062static struct file_operations agp_fops = 1062static const struct file_operations agp_fops =
1063{ 1063{
1064 .owner = THIS_MODULE, 1064 .owner = THIS_MODULE,
1065 .llseek = no_llseek, 1065 .llseek = no_llseek,
diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c
index bcc4668835b5..10a389dafd60 100644
--- a/drivers/char/applicom.c
+++ b/drivers/char/applicom.c
@@ -112,7 +112,7 @@ static int ac_ioctl(struct inode *, struct file *, unsigned int,
112 unsigned long); 112 unsigned long);
113static irqreturn_t ac_interrupt(int, void *, struct pt_regs *); 113static irqreturn_t ac_interrupt(int, void *, struct pt_regs *);
114 114
115static struct file_operations ac_fops = { 115static const struct file_operations ac_fops = {
116 .owner = THIS_MODULE, 116 .owner = THIS_MODULE,
117 .llseek = no_llseek, 117 .llseek = no_llseek,
118 .read = ac_read, 118 .read = ac_read,
diff --git a/drivers/char/cs5535_gpio.c b/drivers/char/cs5535_gpio.c
index 46d66037b917..8ce3f34cfc22 100644
--- a/drivers/char/cs5535_gpio.c
+++ b/drivers/char/cs5535_gpio.c
@@ -158,7 +158,7 @@ static int cs5535_gpio_open(struct inode *inode, struct file *file)
158 return nonseekable_open(inode, file); 158 return nonseekable_open(inode, file);
159} 159}
160 160
161static struct file_operations cs5535_gpio_fops = { 161static const struct file_operations cs5535_gpio_fops = {
162 .owner = THIS_MODULE, 162 .owner = THIS_MODULE,
163 .write = cs5535_gpio_write, 163 .write = cs5535_gpio_write,
164 .read = cs5535_gpio_read, 164 .read = cs5535_gpio_read,
diff --git a/drivers/char/ds1286.c b/drivers/char/ds1286.c
index d755cac14bc1..21c8229f5443 100644
--- a/drivers/char/ds1286.c
+++ b/drivers/char/ds1286.c
@@ -281,7 +281,7 @@ static unsigned int ds1286_poll(struct file *file, poll_table *wait)
281 * The various file operations we support. 281 * The various file operations we support.
282 */ 282 */
283 283
284static struct file_operations ds1286_fops = { 284static const struct file_operations ds1286_fops = {
285 .llseek = no_llseek, 285 .llseek = no_llseek,
286 .read = ds1286_read, 286 .read = ds1286_read,
287 .poll = ds1286_poll, 287 .poll = ds1286_poll,
diff --git a/drivers/char/ds1302.c b/drivers/char/ds1302.c
index 625e8b517005..bcdb107aa967 100644
--- a/drivers/char/ds1302.c
+++ b/drivers/char/ds1302.c
@@ -282,7 +282,7 @@ get_rtc_status(char *buf)
282 282
283/* The various file operations we support. */ 283/* The various file operations we support. */
284 284
285static struct file_operations rtc_fops = { 285static const struct file_operations rtc_fops = {
286 .owner = THIS_MODULE, 286 .owner = THIS_MODULE,
287 .ioctl = rtc_ioctl, 287 .ioctl = rtc_ioctl,
288}; 288};
diff --git a/drivers/char/ds1620.c b/drivers/char/ds1620.c
index 953e670dcd09..48cb8f0e8ebf 100644
--- a/drivers/char/ds1620.c
+++ b/drivers/char/ds1620.c
@@ -336,7 +336,7 @@ proc_therm_ds1620_read(char *buf, char **start, off_t offset,
336static struct proc_dir_entry *proc_therm_ds1620; 336static struct proc_dir_entry *proc_therm_ds1620;
337#endif 337#endif
338 338
339static struct file_operations ds1620_fops = { 339static const struct file_operations ds1620_fops = {
340 .owner = THIS_MODULE, 340 .owner = THIS_MODULE,
341 .open = nonseekable_open, 341 .open = nonseekable_open,
342 .read = ds1620_read, 342 .read = ds1620_read,
diff --git a/drivers/char/dsp56k.c b/drivers/char/dsp56k.c
index 09b413618b57..9b1bf60ffbe7 100644
--- a/drivers/char/dsp56k.c
+++ b/drivers/char/dsp56k.c
@@ -483,7 +483,7 @@ static int dsp56k_release(struct inode *inode, struct file *file)
483 return 0; 483 return 0;
484} 484}
485 485
486static struct file_operations dsp56k_fops = { 486static const struct file_operations dsp56k_fops = {
487 .owner = THIS_MODULE, 487 .owner = THIS_MODULE,
488 .read = dsp56k_read, 488 .read = dsp56k_read,
489 .write = dsp56k_write, 489 .write = dsp56k_write,
diff --git a/drivers/char/dtlk.c b/drivers/char/dtlk.c
index da2c89f1b8bc..5e82c3bad2e3 100644
--- a/drivers/char/dtlk.c
+++ b/drivers/char/dtlk.c
@@ -94,7 +94,7 @@ static int dtlk_release(struct inode *, struct file *);
94static int dtlk_ioctl(struct inode *inode, struct file *file, 94static int dtlk_ioctl(struct inode *inode, struct file *file,
95 unsigned int cmd, unsigned long arg); 95 unsigned int cmd, unsigned long arg);
96 96
97static struct file_operations dtlk_fops = 97static const struct file_operations dtlk_fops =
98{ 98{
99 .owner = THIS_MODULE, 99 .owner = THIS_MODULE,
100 .read = dtlk_read, 100 .read = dtlk_read,
diff --git a/drivers/char/efirtc.c b/drivers/char/efirtc.c
index 0090e7a4fcd3..004141d535a2 100644
--- a/drivers/char/efirtc.c
+++ b/drivers/char/efirtc.c
@@ -285,7 +285,7 @@ efi_rtc_close(struct inode *inode, struct file *file)
285 * The various file operations we support. 285 * The various file operations we support.
286 */ 286 */
287 287
288static struct file_operations efi_rtc_fops = { 288static const struct file_operations efi_rtc_fops = {
289 .owner = THIS_MODULE, 289 .owner = THIS_MODULE,
290 .ioctl = efi_rtc_ioctl, 290 .ioctl = efi_rtc_ioctl,
291 .open = efi_rtc_open, 291 .open = efi_rtc_open,
diff --git a/drivers/char/ftape/zftape/zftape-init.c b/drivers/char/ftape/zftape/zftape-init.c
index 55272566b740..164a1aa77a2f 100644
--- a/drivers/char/ftape/zftape/zftape-init.c
+++ b/drivers/char/ftape/zftape/zftape-init.c
@@ -86,7 +86,7 @@ static ssize_t zft_read (struct file *fp, char __user *buff,
86static ssize_t zft_write(struct file *fp, const char __user *buff, 86static ssize_t zft_write(struct file *fp, const char __user *buff,
87 size_t req_len, loff_t *ppos); 87 size_t req_len, loff_t *ppos);
88 88
89static struct file_operations zft_cdev = 89static const struct file_operations zft_cdev =
90{ 90{
91 .owner = THIS_MODULE, 91 .owner = THIS_MODULE,
92 .read = zft_read, 92 .read = zft_read,
diff --git a/drivers/char/genrtc.c b/drivers/char/genrtc.c
index bebd7e34f792..817dc409ac20 100644
--- a/drivers/char/genrtc.c
+++ b/drivers/char/genrtc.c
@@ -482,7 +482,7 @@ static inline int gen_rtc_proc_init(void) { return 0; }
482 * The various file operations we support. 482 * The various file operations we support.
483 */ 483 */
484 484
485static struct file_operations gen_rtc_fops = { 485static const struct file_operations gen_rtc_fops = {
486 .owner = THIS_MODULE, 486 .owner = THIS_MODULE,
487#ifdef CONFIG_GEN_RTC_X 487#ifdef CONFIG_GEN_RTC_X
488 .read = gen_rtc_read, 488 .read = gen_rtc_read,
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index e5643f3aa73f..8afba339f05a 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -553,7 +553,7 @@ hpet_ioctl_common(struct hpet_dev *devp, int cmd, unsigned long arg, int kernel)
553 return err; 553 return err;
554} 554}
555 555
556static struct file_operations hpet_fops = { 556static const struct file_operations hpet_fops = {
557 .owner = THIS_MODULE, 557 .owner = THIS_MODULE,
558 .llseek = no_llseek, 558 .llseek = no_llseek,
559 .read = hpet_read, 559 .read = hpet_read,
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 88b026639f10..154a81d328c1 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -149,7 +149,7 @@ out:
149} 149}
150 150
151 151
152static struct file_operations rng_chrdev_ops = { 152static const struct file_operations rng_chrdev_ops = {
153 .owner = THIS_MODULE, 153 .owner = THIS_MODULE,
154 .open = rng_dev_open, 154 .open = rng_dev_open,
155 .read = rng_dev_read, 155 .read = rng_dev_read,
diff --git a/drivers/char/i8k.c b/drivers/char/i8k.c
index f3c3aaf4560e..353d9f3cf8d7 100644
--- a/drivers/char/i8k.c
+++ b/drivers/char/i8k.c
@@ -80,7 +80,7 @@ static int i8k_open_fs(struct inode *inode, struct file *file);
80static int i8k_ioctl(struct inode *, struct file *, unsigned int, 80static int i8k_ioctl(struct inode *, struct file *, unsigned int,
81 unsigned long); 81 unsigned long);
82 82
83static struct file_operations i8k_fops = { 83static const struct file_operations i8k_fops = {
84 .open = i8k_open_fs, 84 .open = i8k_open_fs,
85 .read = seq_read, 85 .read = seq_read,
86 .llseek = seq_lseek, 86 .llseek = seq_lseek,
diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c
index a4200a2b0811..518ece7ac656 100644
--- a/drivers/char/ip2/ip2main.c
+++ b/drivers/char/ip2/ip2main.c
@@ -233,7 +233,7 @@ static void *DevTableMem[IP2_MAX_BOARDS];
233/* This is the driver descriptor for the ip2ipl device, which is used to 233/* This is the driver descriptor for the ip2ipl device, which is used to
234 * download the loadware to the boards. 234 * download the loadware to the boards.
235 */ 235 */
236static struct file_operations ip2_ipl = { 236static const struct file_operations ip2_ipl = {
237 .owner = THIS_MODULE, 237 .owner = THIS_MODULE,
238 .read = ip2_ipl_read, 238 .read = ip2_ipl_read,
239 .write = ip2_ipl_write, 239 .write = ip2_ipl_write,
diff --git a/drivers/char/ip27-rtc.c b/drivers/char/ip27-rtc.c
index 3acdac3c967e..a48da02aad2f 100644
--- a/drivers/char/ip27-rtc.c
+++ b/drivers/char/ip27-rtc.c
@@ -196,7 +196,7 @@ static int rtc_release(struct inode *inode, struct file *file)
196 * The various file operations we support. 196 * The various file operations we support.
197 */ 197 */
198 198
199static struct file_operations rtc_fops = { 199static const struct file_operations rtc_fops = {
200 .owner = THIS_MODULE, 200 .owner = THIS_MODULE,
201 .ioctl = rtc_ioctl, 201 .ioctl = rtc_ioctl,
202 .open = rtc_open, 202 .open = rtc_open,
diff --git a/drivers/char/ipmi/ipmi_devintf.c b/drivers/char/ipmi/ipmi_devintf.c
index 2fc894fef1cb..68d7c61a864e 100644
--- a/drivers/char/ipmi/ipmi_devintf.c
+++ b/drivers/char/ipmi/ipmi_devintf.c
@@ -765,7 +765,7 @@ static long compat_ipmi_ioctl(struct file *filep, unsigned int cmd,
765} 765}
766#endif 766#endif
767 767
768static struct file_operations ipmi_fops = { 768static const struct file_operations ipmi_fops = {
769 .owner = THIS_MODULE, 769 .owner = THIS_MODULE,
770 .ioctl = ipmi_ioctl, 770 .ioctl = ipmi_ioctl,
771#ifdef CONFIG_COMPAT 771#ifdef CONFIG_COMPAT
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index 74a889c58333..accaaf1a6b69 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -807,7 +807,7 @@ static int ipmi_close(struct inode *ino, struct file *filep)
807 return 0; 807 return 0;
808} 808}
809 809
810static struct file_operations ipmi_wdog_fops = { 810static const struct file_operations ipmi_wdog_fops = {
811 .owner = THIS_MODULE, 811 .owner = THIS_MODULE,
812 .read = ipmi_read, 812 .read = ipmi_read,
813 .poll = ipmi_poll, 813 .poll = ipmi_poll,
diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c
index fbce2f0669d6..84dfc4278139 100644
--- a/drivers/char/istallion.c
+++ b/drivers/char/istallion.c
@@ -748,7 +748,7 @@ static int stli_initpcibrd(int brdtype, struct pci_dev *devp);
748 * will give access to the shared memory on the Stallion intelligent 748 * will give access to the shared memory on the Stallion intelligent
749 * board. This is also a very useful debugging tool. 749 * board. This is also a very useful debugging tool.
750 */ 750 */
751static struct file_operations stli_fsiomem = { 751static const struct file_operations stli_fsiomem = {
752 .owner = THIS_MODULE, 752 .owner = THIS_MODULE,
753 .read = stli_memread, 753 .read = stli_memread,
754 .write = stli_memwrite, 754 .write = stli_memwrite,
diff --git a/drivers/char/ite_gpio.c b/drivers/char/ite_gpio.c
index 747ba45e50e5..cde562d70c4f 100644
--- a/drivers/char/ite_gpio.c
+++ b/drivers/char/ite_gpio.c
@@ -357,7 +357,7 @@ DEB(printk("interrupt 0x%x %d\n",ITE_GPAISR, i));
357 } 357 }
358} 358}
359 359
360static struct file_operations ite_gpio_fops = { 360static const struct file_operations ite_gpio_fops = {
361 .owner = THIS_MODULE, 361 .owner = THIS_MODULE,
362 .ioctl = ite_gpio_ioctl, 362 .ioctl = ite_gpio_ioctl,
363 .open = ite_gpio_open, 363 .open = ite_gpio_open,
diff --git a/drivers/char/lcd.c b/drivers/char/lcd.c
index 7d49b241de56..da601fd6c07a 100644
--- a/drivers/char/lcd.c
+++ b/drivers/char/lcd.c
@@ -598,7 +598,7 @@ static ssize_t lcd_read(struct file *file, char *buf,
598 * The various file operations we support. 598 * The various file operations we support.
599 */ 599 */
600 600
601static struct file_operations lcd_fops = { 601static const struct file_operations lcd_fops = {
602 .read = lcd_read, 602 .read = lcd_read,
603 .ioctl = lcd_ioctl, 603 .ioctl = lcd_ioctl,
604 .open = lcd_open, 604 .open = lcd_open,
diff --git a/drivers/char/lp.c b/drivers/char/lp.c
index 582cdbdb0c42..f875fda3b089 100644
--- a/drivers/char/lp.c
+++ b/drivers/char/lp.c
@@ -666,7 +666,7 @@ static int lp_ioctl(struct inode *inode, struct file *file,
666 return retval; 666 return retval;
667} 667}
668 668
669static struct file_operations lp_fops = { 669static const struct file_operations lp_fops = {
670 .owner = THIS_MODULE, 670 .owner = THIS_MODULE,
671 .write = lp_write, 671 .write = lp_write,
672 .ioctl = lp_ioctl, 672 .ioctl = lp_ioctl,
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 70f3954d6dfd..e97c32ceb796 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -776,7 +776,7 @@ static int open_port(struct inode * inode, struct file * filp)
776#define open_kmem open_mem 776#define open_kmem open_mem
777#define open_oldmem open_mem 777#define open_oldmem open_mem
778 778
779static struct file_operations mem_fops = { 779static const struct file_operations mem_fops = {
780 .llseek = memory_lseek, 780 .llseek = memory_lseek,
781 .read = read_mem, 781 .read = read_mem,
782 .write = write_mem, 782 .write = write_mem,
@@ -784,7 +784,7 @@ static struct file_operations mem_fops = {
784 .open = open_mem, 784 .open = open_mem,
785}; 785};
786 786
787static struct file_operations kmem_fops = { 787static const struct file_operations kmem_fops = {
788 .llseek = memory_lseek, 788 .llseek = memory_lseek,
789 .read = read_kmem, 789 .read = read_kmem,
790 .write = write_kmem, 790 .write = write_kmem,
@@ -792,7 +792,7 @@ static struct file_operations kmem_fops = {
792 .open = open_kmem, 792 .open = open_kmem,
793}; 793};
794 794
795static struct file_operations null_fops = { 795static const struct file_operations null_fops = {
796 .llseek = null_lseek, 796 .llseek = null_lseek,
797 .read = read_null, 797 .read = read_null,
798 .write = write_null, 798 .write = write_null,
@@ -800,7 +800,7 @@ static struct file_operations null_fops = {
800}; 800};
801 801
802#if defined(CONFIG_ISA) || !defined(__mc68000__) 802#if defined(CONFIG_ISA) || !defined(__mc68000__)
803static struct file_operations port_fops = { 803static const struct file_operations port_fops = {
804 .llseek = memory_lseek, 804 .llseek = memory_lseek,
805 .read = read_port, 805 .read = read_port,
806 .write = write_port, 806 .write = write_port,
@@ -808,7 +808,7 @@ static struct file_operations port_fops = {
808}; 808};
809#endif 809#endif
810 810
811static struct file_operations zero_fops = { 811static const struct file_operations zero_fops = {
812 .llseek = zero_lseek, 812 .llseek = zero_lseek,
813 .read = read_zero, 813 .read = read_zero,
814 .write = write_zero, 814 .write = write_zero,
@@ -819,14 +819,14 @@ static struct backing_dev_info zero_bdi = {
819 .capabilities = BDI_CAP_MAP_COPY, 819 .capabilities = BDI_CAP_MAP_COPY,
820}; 820};
821 821
822static struct file_operations full_fops = { 822static const struct file_operations full_fops = {
823 .llseek = full_lseek, 823 .llseek = full_lseek,
824 .read = read_full, 824 .read = read_full,
825 .write = write_full, 825 .write = write_full,
826}; 826};
827 827
828#ifdef CONFIG_CRASH_DUMP 828#ifdef CONFIG_CRASH_DUMP
829static struct file_operations oldmem_fops = { 829static const struct file_operations oldmem_fops = {
830 .read = read_oldmem, 830 .read = read_oldmem,
831 .open = open_oldmem, 831 .open = open_oldmem,
832}; 832};
@@ -853,7 +853,7 @@ static ssize_t kmsg_write(struct file * file, const char __user * buf,
853 return ret; 853 return ret;
854} 854}
855 855
856static struct file_operations kmsg_fops = { 856static const struct file_operations kmsg_fops = {
857 .write = kmsg_write, 857 .write = kmsg_write,
858}; 858};
859 859
@@ -903,7 +903,7 @@ static int memory_open(struct inode * inode, struct file * filp)
903 return 0; 903 return 0;
904} 904}
905 905
906static struct file_operations memory_fops = { 906static const struct file_operations memory_fops = {
907 .open = memory_open, /* just a selector for the real open */ 907 .open = memory_open, /* just a selector for the real open */
908}; 908};
909 909
diff --git a/drivers/char/misc.c b/drivers/char/misc.c
index d5fa19da330b..62ebe09656e3 100644
--- a/drivers/char/misc.c
+++ b/drivers/char/misc.c
@@ -113,7 +113,7 @@ static int misc_seq_open(struct inode *inode, struct file *file)
113 return seq_open(file, &misc_seq_ops); 113 return seq_open(file, &misc_seq_ops);
114} 114}
115 115
116static struct file_operations misc_proc_fops = { 116static const struct file_operations misc_proc_fops = {
117 .owner = THIS_MODULE, 117 .owner = THIS_MODULE,
118 .open = misc_seq_open, 118 .open = misc_seq_open,
119 .read = seq_read, 119 .read = seq_read,
@@ -176,7 +176,7 @@ fail:
176 */ 176 */
177static struct class *misc_class; 177static struct class *misc_class;
178 178
179static struct file_operations misc_fops = { 179static const struct file_operations misc_fops = {
180 .owner = THIS_MODULE, 180 .owner = THIS_MODULE,
181 .open = misc_open, 181 .open = misc_open,
182}; 182};
diff --git a/drivers/char/mmtimer.c b/drivers/char/mmtimer.c
index 70b774ff5aa4..1f0f2b6dae26 100644
--- a/drivers/char/mmtimer.c
+++ b/drivers/char/mmtimer.c
@@ -63,7 +63,7 @@ static int mmtimer_mmap(struct file *file, struct vm_area_struct *vma);
63 */ 63 */
64static unsigned long mmtimer_femtoperiod = 0; 64static unsigned long mmtimer_femtoperiod = 0;
65 65
66static struct file_operations mmtimer_fops = { 66static const struct file_operations mmtimer_fops = {
67 .owner = THIS_MODULE, 67 .owner = THIS_MODULE,
68 .mmap = mmtimer_mmap, 68 .mmap = mmtimer_mmap,
69 .ioctl = mmtimer_ioctl, 69 .ioctl = mmtimer_ioctl,
diff --git a/drivers/char/mwave/mwavedd.c b/drivers/char/mwave/mwavedd.c
index d3ba2f860ef0..39a2e661ff55 100644
--- a/drivers/char/mwave/mwavedd.c
+++ b/drivers/char/mwave/mwavedd.c
@@ -454,7 +454,7 @@ static int register_serial_portandirq(unsigned int port, int irq)
454} 454}
455 455
456 456
457static struct file_operations mwave_fops = { 457static const struct file_operations mwave_fops = {
458 .owner = THIS_MODULE, 458 .owner = THIS_MODULE,
459 .read = mwave_read, 459 .read = mwave_read,
460 .write = mwave_write, 460 .write = mwave_write,
diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c
index 8c5f102622b6..a39f19c35a6a 100644
--- a/drivers/char/nvram.c
+++ b/drivers/char/nvram.c
@@ -437,7 +437,7 @@ nvram_read_proc(char *buffer, char **start, off_t offset,
437 437
438#endif /* CONFIG_PROC_FS */ 438#endif /* CONFIG_PROC_FS */
439 439
440static struct file_operations nvram_fops = { 440static const struct file_operations nvram_fops = {
441 .owner = THIS_MODULE, 441 .owner = THIS_MODULE,
442 .llseek = nvram_llseek, 442 .llseek = nvram_llseek,
443 .read = nvram_read, 443 .read = nvram_read,
diff --git a/drivers/char/nwbutton.c b/drivers/char/nwbutton.c
index f240a104d250..7c57ebfa8640 100644
--- a/drivers/char/nwbutton.c
+++ b/drivers/char/nwbutton.c
@@ -183,7 +183,7 @@ static int button_read (struct file *filp, char __user *buffer,
183 * attempts to perform these operations on the device. 183 * attempts to perform these operations on the device.
184 */ 184 */
185 185
186static struct file_operations button_fops = { 186static const struct file_operations button_fops = {
187 .owner = THIS_MODULE, 187 .owner = THIS_MODULE,
188 .read = button_read, 188 .read = button_read,
189}; 189};
diff --git a/drivers/char/nwflash.c b/drivers/char/nwflash.c
index 8865387d3448..206cf6f50695 100644
--- a/drivers/char/nwflash.c
+++ b/drivers/char/nwflash.c
@@ -642,7 +642,7 @@ static void kick_open(void)
642 udelay(25); 642 udelay(25);
643} 643}
644 644
645static struct file_operations flash_fops = 645static const struct file_operations flash_fops =
646{ 646{
647 .owner = THIS_MODULE, 647 .owner = THIS_MODULE,
648 .llseek = flash_llseek, 648 .llseek = flash_llseek,
diff --git a/drivers/char/pc8736x_gpio.c b/drivers/char/pc8736x_gpio.c
index c860de6a6fde..4005ee0aa11e 100644
--- a/drivers/char/pc8736x_gpio.c
+++ b/drivers/char/pc8736x_gpio.c
@@ -236,7 +236,7 @@ static int pc8736x_gpio_open(struct inode *inode, struct file *file)
236 return nonseekable_open(inode, file); 236 return nonseekable_open(inode, file);
237} 237}
238 238
239static struct file_operations pc8736x_gpio_fops = { 239static const struct file_operations pc8736x_gpio_fops = {
240 .owner = THIS_MODULE, 240 .owner = THIS_MODULE,
241 .open = pc8736x_gpio_open, 241 .open = pc8736x_gpio_open,
242 .write = nsc_gpio_write, 242 .write = nsc_gpio_write,
diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c
index 31c8a21f9d87..50d20aafeb18 100644
--- a/drivers/char/pcmcia/cm4000_cs.c
+++ b/drivers/char/pcmcia/cm4000_cs.c
@@ -1938,7 +1938,7 @@ static void cm4000_detach(struct pcmcia_device *link)
1938 return; 1938 return;
1939} 1939}
1940 1940
1941static struct file_operations cm4000_fops = { 1941static const struct file_operations cm4000_fops = {
1942 .owner = THIS_MODULE, 1942 .owner = THIS_MODULE,
1943 .read = cmm_read, 1943 .read = cmm_read,
1944 .write = cmm_write, 1944 .write = cmm_write,
diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c
index 47a8465bf95b..55cf4be42976 100644
--- a/drivers/char/pcmcia/cm4040_cs.c
+++ b/drivers/char/pcmcia/cm4040_cs.c
@@ -688,7 +688,7 @@ static void reader_detach(struct pcmcia_device *link)
688 return; 688 return;
689} 689}
690 690
691static struct file_operations reader_fops = { 691static const struct file_operations reader_fops = {
692 .owner = THIS_MODULE, 692 .owner = THIS_MODULE,
693 .read = cm4040_read, 693 .read = cm4040_read,
694 .write = cm4040_write, 694 .write = cm4040_write,
diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c
index 24231d9743dc..520d2cf82bc0 100644
--- a/drivers/char/ppdev.c
+++ b/drivers/char/ppdev.c
@@ -739,7 +739,7 @@ static unsigned int pp_poll (struct file * file, poll_table * wait)
739 739
740static struct class *ppdev_class; 740static struct class *ppdev_class;
741 741
742static struct file_operations pp_fops = { 742static const struct file_operations pp_fops = {
743 .owner = THIS_MODULE, 743 .owner = THIS_MODULE,
744 .llseek = no_llseek, 744 .llseek = no_llseek,
745 .read = pp_read, 745 .read = pp_read,
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 164bddae047f..4c3a5ca9d8f7 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -416,7 +416,7 @@ static struct entropy_store input_pool = {
416 .poolinfo = &poolinfo_table[0], 416 .poolinfo = &poolinfo_table[0],
417 .name = "input", 417 .name = "input",
418 .limit = 1, 418 .limit = 1,
419 .lock = SPIN_LOCK_UNLOCKED, 419 .lock = __SPIN_LOCK_UNLOCKED(&input_pool.lock),
420 .pool = input_pool_data 420 .pool = input_pool_data
421}; 421};
422 422
@@ -425,7 +425,7 @@ static struct entropy_store blocking_pool = {
425 .name = "blocking", 425 .name = "blocking",
426 .limit = 1, 426 .limit = 1,
427 .pull = &input_pool, 427 .pull = &input_pool,
428 .lock = SPIN_LOCK_UNLOCKED, 428 .lock = __SPIN_LOCK_UNLOCKED(&blocking_pool.lock),
429 .pool = blocking_pool_data 429 .pool = blocking_pool_data
430}; 430};
431 431
@@ -433,7 +433,7 @@ static struct entropy_store nonblocking_pool = {
433 .poolinfo = &poolinfo_table[1], 433 .poolinfo = &poolinfo_table[1],
434 .name = "nonblocking", 434 .name = "nonblocking",
435 .pull = &input_pool, 435 .pull = &input_pool,
436 .lock = SPIN_LOCK_UNLOCKED, 436 .lock = __SPIN_LOCK_UNLOCKED(&nonblocking_pool.lock),
437 .pool = nonblocking_pool_data 437 .pool = nonblocking_pool_data
438}; 438};
439 439
diff --git a/drivers/char/raw.c b/drivers/char/raw.c
index 9bf97c5e38c0..579868af4a54 100644
--- a/drivers/char/raw.c
+++ b/drivers/char/raw.c
@@ -30,7 +30,7 @@ struct raw_device_data {
30static struct class *raw_class; 30static struct class *raw_class;
31static struct raw_device_data raw_devices[MAX_RAW_MINORS]; 31static struct raw_device_data raw_devices[MAX_RAW_MINORS];
32static DEFINE_MUTEX(raw_mutex); 32static DEFINE_MUTEX(raw_mutex);
33static struct file_operations raw_ctl_fops; /* forward declaration */ 33static const struct file_operations raw_ctl_fops; /* forward declaration */
34 34
35/* 35/*
36 * Open/close code for raw IO. 36 * Open/close code for raw IO.
@@ -261,7 +261,7 @@ static ssize_t raw_file_aio_write(struct kiocb *iocb, const char __user *buf,
261} 261}
262 262
263 263
264static struct file_operations raw_fops = { 264static const struct file_operations raw_fops = {
265 .read = generic_file_read, 265 .read = generic_file_read,
266 .aio_read = generic_file_aio_read, 266 .aio_read = generic_file_aio_read,
267 .write = raw_file_write, 267 .write = raw_file_write,
@@ -274,7 +274,7 @@ static struct file_operations raw_fops = {
274 .owner = THIS_MODULE, 274 .owner = THIS_MODULE,
275}; 275};
276 276
277static struct file_operations raw_ctl_fops = { 277static const struct file_operations raw_ctl_fops = {
278 .ioctl = raw_ctl_ioctl, 278 .ioctl = raw_ctl_ioctl,
279 .open = raw_open, 279 .open = raw_open,
280 .owner = THIS_MODULE, 280 .owner = THIS_MODULE,
diff --git a/drivers/char/rio/rio_linux.c b/drivers/char/rio/rio_linux.c
index 3afc6a47ebbc..3fa80aaf4527 100644
--- a/drivers/char/rio/rio_linux.c
+++ b/drivers/char/rio/rio_linux.c
@@ -243,7 +243,7 @@ static struct real_driver rio_real_driver = {
243 * 243 *
244 */ 244 */
245 245
246static struct file_operations rio_fw_fops = { 246static const struct file_operations rio_fw_fops = {
247 .owner = THIS_MODULE, 247 .owner = THIS_MODULE,
248 .ioctl = rio_fw_ioctl, 248 .ioctl = rio_fw_ioctl,
249}; 249};
diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index aefac4ac0bf5..cc7bd1a3095b 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -877,7 +877,7 @@ int rtc_control(rtc_task_t *task, unsigned int cmd, unsigned long arg)
877 * The various file operations we support. 877 * The various file operations we support.
878 */ 878 */
879 879
880static struct file_operations rtc_fops = { 880static const struct file_operations rtc_fops = {
881 .owner = THIS_MODULE, 881 .owner = THIS_MODULE,
882 .llseek = no_llseek, 882 .llseek = no_llseek,
883 .read = rtc_read, 883 .read = rtc_read,
@@ -896,7 +896,7 @@ static struct miscdevice rtc_dev = {
896 .fops = &rtc_fops, 896 .fops = &rtc_fops,
897}; 897};
898 898
899static struct file_operations rtc_proc_fops = { 899static const struct file_operations rtc_proc_fops = {
900 .owner = THIS_MODULE, 900 .owner = THIS_MODULE,
901 .open = rtc_proc_open, 901 .open = rtc_proc_open,
902 .read = seq_read, 902 .read = seq_read,
diff --git a/drivers/char/scx200_gpio.c b/drivers/char/scx200_gpio.c
index 45083e5dd23b..425c58719db6 100644
--- a/drivers/char/scx200_gpio.c
+++ b/drivers/char/scx200_gpio.c
@@ -63,7 +63,7 @@ static int scx200_gpio_release(struct inode *inode, struct file *file)
63} 63}
64 64
65 65
66static struct file_operations scx200_gpio_fops = { 66static const struct file_operations scx200_gpio_fops = {
67 .owner = THIS_MODULE, 67 .owner = THIS_MODULE,
68 .write = nsc_gpio_write, 68 .write = nsc_gpio_write,
69 .read = nsc_gpio_read, 69 .read = nsc_gpio_read,
diff --git a/drivers/char/snsc.c b/drivers/char/snsc.c
index 203240b6c08f..afc6eda602f7 100644
--- a/drivers/char/snsc.c
+++ b/drivers/char/snsc.c
@@ -347,7 +347,7 @@ scdrv_poll(struct file *file, struct poll_table_struct *wait)
347 return mask; 347 return mask;
348} 348}
349 349
350static struct file_operations scdrv_fops = { 350static const struct file_operations scdrv_fops = {
351 .owner = THIS_MODULE, 351 .owner = THIS_MODULE,
352 .read = scdrv_read, 352 .read = scdrv_read,
353 .write = scdrv_write, 353 .write = scdrv_write,
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index 45508a039508..d4e434d694b7 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -1106,7 +1106,7 @@ static int sonypi_misc_ioctl(struct inode *ip, struct file *fp,
1106 return ret; 1106 return ret;
1107} 1107}
1108 1108
1109static struct file_operations sonypi_misc_fops = { 1109static const struct file_operations sonypi_misc_fops = {
1110 .owner = THIS_MODULE, 1110 .owner = THIS_MODULE,
1111 .read = sonypi_misc_read, 1111 .read = sonypi_misc_read,
1112 .poll = sonypi_misc_poll, 1112 .poll = sonypi_misc_poll,
diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c
index ed7b8eaf0367..3beb2203d24b 100644
--- a/drivers/char/stallion.c
+++ b/drivers/char/stallion.c
@@ -707,7 +707,7 @@ static unsigned int sc26198_baudtable[] = {
707 * Define the driver info for a user level control device. Used mainly 707 * Define the driver info for a user level control device. Used mainly
708 * to get at port stats - only not using the port device itself. 708 * to get at port stats - only not using the port device itself.
709 */ 709 */
710static struct file_operations stl_fsiomem = { 710static const struct file_operations stl_fsiomem = {
711 .owner = THIS_MODULE, 711 .owner = THIS_MODULE,
712 .ioctl = stl_memioctl, 712 .ioctl = stl_memioctl,
713}; 713};
diff --git a/drivers/char/sx.c b/drivers/char/sx.c
index 45c193aa11db..e1cd2bc4b1e4 100644
--- a/drivers/char/sx.c
+++ b/drivers/char/sx.c
@@ -410,7 +410,7 @@ static struct real_driver sx_real_driver = {
410 * 410 *
411 */ 411 */
412 412
413static struct file_operations sx_fw_fops = { 413static const struct file_operations sx_fw_fops = {
414 .owner = THIS_MODULE, 414 .owner = THIS_MODULE,
415 .ioctl = sx_fw_ioctl, 415 .ioctl = sx_fw_ioctl,
416}; 416};
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index a064ee9181c0..ee3ca8f1768e 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -147,12 +147,13 @@ static struct sysrq_key_op sysrq_mountro_op = {
147 .enable_mask = SYSRQ_ENABLE_REMOUNT, 147 .enable_mask = SYSRQ_ENABLE_REMOUNT,
148}; 148};
149 149
150#ifdef CONFIG_DEBUG_MUTEXES 150#ifdef CONFIG_LOCKDEP
151static void sysrq_handle_showlocks(int key, struct pt_regs *pt_regs, 151static void sysrq_handle_showlocks(int key, struct pt_regs *pt_regs,
152 struct tty_struct *tty) 152 struct tty_struct *tty)
153{ 153{
154 mutex_debug_show_all_locks(); 154 debug_show_all_locks();
155} 155}
156
156static struct sysrq_key_op sysrq_showlocks_op = { 157static struct sysrq_key_op sysrq_showlocks_op = {
157 .handler = sysrq_handle_showlocks, 158 .handler = sysrq_handle_showlocks,
158 .help_msg = "show-all-locks(D)", 159 .help_msg = "show-all-locks(D)",
diff --git a/drivers/char/tb0219.c b/drivers/char/tb0219.c
index a80c83210872..bb1bad4c18f9 100644
--- a/drivers/char/tb0219.c
+++ b/drivers/char/tb0219.c
@@ -255,7 +255,7 @@ static int tanbac_tb0219_release(struct inode *inode, struct file *file)
255 return 0; 255 return 0;
256} 256}
257 257
258static struct file_operations tb0219_fops = { 258static const struct file_operations tb0219_fops = {
259 .owner = THIS_MODULE, 259 .owner = THIS_MODULE,
260 .read = tanbac_tb0219_read, 260 .read = tanbac_tb0219_read,
261 .write = tanbac_tb0219_write, 261 .write = tanbac_tb0219_write,
diff --git a/drivers/char/tipar.c b/drivers/char/tipar.c
index e0633a119d29..d30dc09dbbc9 100644
--- a/drivers/char/tipar.c
+++ b/drivers/char/tipar.c
@@ -381,7 +381,7 @@ tipar_ioctl(struct inode *inode, struct file *file,
381 381
382/* ----- kernel module registering ------------------------------------ */ 382/* ----- kernel module registering ------------------------------------ */
383 383
384static struct file_operations tipar_fops = { 384static const struct file_operations tipar_fops = {
385 .owner = THIS_MODULE, 385 .owner = THIS_MODULE,
386 .llseek = no_llseek, 386 .llseek = no_llseek,
387 .read = tipar_read, 387 .read = tipar_read,
diff --git a/drivers/char/tlclk.c b/drivers/char/tlclk.c
index 952b829e2cb4..d2c5ba4e83b8 100644
--- a/drivers/char/tlclk.c
+++ b/drivers/char/tlclk.c
@@ -247,7 +247,7 @@ static ssize_t tlclk_write(struct file *filp, const char __user *buf, size_t cou
247 return 0; 247 return 0;
248} 248}
249 249
250static struct file_operations tlclk_fops = { 250static const struct file_operations tlclk_fops = {
251 .read = tlclk_read, 251 .read = tlclk_read,
252 .write = tlclk_write, 252 .write = tlclk_write,
253 .open = tlclk_open, 253 .open = tlclk_open,
diff --git a/drivers/char/toshiba.c b/drivers/char/toshiba.c
index e2fb234dee40..dd36fd04a842 100644
--- a/drivers/char/toshiba.c
+++ b/drivers/char/toshiba.c
@@ -92,7 +92,7 @@ static int tosh_ioctl(struct inode *, struct file *, unsigned int,
92 unsigned long); 92 unsigned long);
93 93
94 94
95static struct file_operations tosh_fops = { 95static const struct file_operations tosh_fops = {
96 .owner = THIS_MODULE, 96 .owner = THIS_MODULE,
97 .ioctl = tosh_ioctl, 97 .ioctl = tosh_ioctl,
98}; 98};
diff --git a/drivers/char/tpm/tpm_atmel.c b/drivers/char/tpm/tpm_atmel.c
index 58a258cec153..ad8ffe49256f 100644
--- a/drivers/char/tpm/tpm_atmel.c
+++ b/drivers/char/tpm/tpm_atmel.c
@@ -116,7 +116,7 @@ static u8 tpm_atml_status(struct tpm_chip *chip)
116 return ioread8(chip->vendor.iobase + 1); 116 return ioread8(chip->vendor.iobase + 1);
117} 117}
118 118
119static struct file_operations atmel_ops = { 119static const struct file_operations atmel_ops = {
120 .owner = THIS_MODULE, 120 .owner = THIS_MODULE,
121 .llseek = no_llseek, 121 .llseek = no_llseek,
122 .open = tpm_open, 122 .open = tpm_open,
diff --git a/drivers/char/tpm/tpm_infineon.c b/drivers/char/tpm/tpm_infineon.c
index adfff21beb21..1353b5a6bae8 100644
--- a/drivers/char/tpm/tpm_infineon.c
+++ b/drivers/char/tpm/tpm_infineon.c
@@ -338,7 +338,7 @@ static struct attribute *inf_attrs[] = {
338 338
339static struct attribute_group inf_attr_grp = {.attrs = inf_attrs }; 339static struct attribute_group inf_attr_grp = {.attrs = inf_attrs };
340 340
341static struct file_operations inf_ops = { 341static const struct file_operations inf_ops = {
342 .owner = THIS_MODULE, 342 .owner = THIS_MODULE,
343 .llseek = no_llseek, 343 .llseek = no_llseek,
344 .open = tpm_open, 344 .open = tpm_open,
diff --git a/drivers/char/tpm/tpm_nsc.c b/drivers/char/tpm/tpm_nsc.c
index 4c8bc06c7d95..26287aace87d 100644
--- a/drivers/char/tpm/tpm_nsc.c
+++ b/drivers/char/tpm/tpm_nsc.c
@@ -226,7 +226,7 @@ static u8 tpm_nsc_status(struct tpm_chip *chip)
226 return inb(chip->vendor.base + NSC_STATUS); 226 return inb(chip->vendor.base + NSC_STATUS);
227} 227}
228 228
229static struct file_operations nsc_ops = { 229static const struct file_operations nsc_ops = {
230 .owner = THIS_MODULE, 230 .owner = THIS_MODULE,
231 .llseek = no_llseek, 231 .llseek = no_llseek,
232 .open = tpm_open, 232 .open = tpm_open,
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index abb0f2aeae66..3232b1932597 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -330,7 +330,7 @@ out_err:
330 return rc; 330 return rc;
331} 331}
332 332
333static struct file_operations tis_ops = { 333static const struct file_operations tis_ops = {
334 .owner = THIS_MODULE, 334 .owner = THIS_MODULE,
335 .llseek = no_llseek, 335 .llseek = no_llseek,
336 .open = tpm_open, 336 .open = tpm_open,
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 615e934da05f..bfdb90242a90 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -912,7 +912,7 @@ static int hung_up_tty_ioctl(struct inode * inode, struct file * file,
912 return cmd == TIOCSPGRP ? -ENOTTY : -EIO; 912 return cmd == TIOCSPGRP ? -ENOTTY : -EIO;
913} 913}
914 914
915static struct file_operations tty_fops = { 915static const struct file_operations tty_fops = {
916 .llseek = no_llseek, 916 .llseek = no_llseek,
917 .read = tty_read, 917 .read = tty_read,
918 .write = tty_write, 918 .write = tty_write,
@@ -924,7 +924,7 @@ static struct file_operations tty_fops = {
924}; 924};
925 925
926#ifdef CONFIG_UNIX98_PTYS 926#ifdef CONFIG_UNIX98_PTYS
927static struct file_operations ptmx_fops = { 927static const struct file_operations ptmx_fops = {
928 .llseek = no_llseek, 928 .llseek = no_llseek,
929 .read = tty_read, 929 .read = tty_read,
930 .write = tty_write, 930 .write = tty_write,
@@ -936,7 +936,7 @@ static struct file_operations ptmx_fops = {
936}; 936};
937#endif 937#endif
938 938
939static struct file_operations console_fops = { 939static const struct file_operations console_fops = {
940 .llseek = no_llseek, 940 .llseek = no_llseek,
941 .read = tty_read, 941 .read = tty_read,
942 .write = redirected_tty_write, 942 .write = redirected_tty_write,
@@ -947,7 +947,7 @@ static struct file_operations console_fops = {
947 .fasync = tty_fasync, 947 .fasync = tty_fasync,
948}; 948};
949 949
950static struct file_operations hung_up_tty_fops = { 950static const struct file_operations hung_up_tty_fops = {
951 .llseek = no_llseek, 951 .llseek = no_llseek,
952 .read = hung_up_tty_read, 952 .read = hung_up_tty_read,
953 .write = hung_up_tty_write, 953 .write = hung_up_tty_write,
@@ -2336,7 +2336,7 @@ static int fionbio(struct file *file, int __user *p)
2336 2336
2337static int tiocsctty(struct tty_struct *tty, int arg) 2337static int tiocsctty(struct tty_struct *tty, int arg)
2338{ 2338{
2339 task_t *p; 2339 struct task_struct *p;
2340 2340
2341 if (current->signal->leader && 2341 if (current->signal->leader &&
2342 (current->signal->session == tty->session)) 2342 (current->signal->session == tty->session))
diff --git a/drivers/char/vc_screen.c b/drivers/char/vc_screen.c
index 45e9bd81bc0e..a9247b5213d5 100644
--- a/drivers/char/vc_screen.c
+++ b/drivers/char/vc_screen.c
@@ -465,7 +465,7 @@ vcs_open(struct inode *inode, struct file *filp)
465 return 0; 465 return 0;
466} 466}
467 467
468static struct file_operations vcs_fops = { 468static const struct file_operations vcs_fops = {
469 .llseek = vcs_lseek, 469 .llseek = vcs_lseek,
470 .read = vcs_read, 470 .read = vcs_read,
471 .write = vcs_write, 471 .write = vcs_write,
diff --git a/drivers/char/viotape.c b/drivers/char/viotape.c
index 7d42c8ec8dbc..b72b2049aaae 100644
--- a/drivers/char/viotape.c
+++ b/drivers/char/viotape.c
@@ -292,7 +292,7 @@ static int proc_viotape_open(struct inode *inode, struct file *file)
292 return single_open(file, proc_viotape_show, NULL); 292 return single_open(file, proc_viotape_show, NULL);
293} 293}
294 294
295static struct file_operations proc_viotape_operations = { 295static const struct file_operations proc_viotape_operations = {
296 .open = proc_viotape_open, 296 .open = proc_viotape_open,
297 .read = seq_read, 297 .read = seq_read,
298 .llseek = seq_lseek, 298 .llseek = seq_lseek,
diff --git a/drivers/char/vr41xx_giu.c b/drivers/char/vr41xx_giu.c
index 073da48c092e..1b9b1f1d4c49 100644
--- a/drivers/char/vr41xx_giu.c
+++ b/drivers/char/vr41xx_giu.c
@@ -605,7 +605,7 @@ static int gpio_release(struct inode *inode, struct file *file)
605 return 0; 605 return 0;
606} 606}
607 607
608static struct file_operations gpio_fops = { 608static const struct file_operations gpio_fops = {
609 .owner = THIS_MODULE, 609 .owner = THIS_MODULE,
610 .read = gpio_read, 610 .read = gpio_read,
611 .write = gpio_write, 611 .write = gpio_write,
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 3ef823d7d255..da7e66a2a38b 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -886,6 +886,7 @@ void vc_disallocate(unsigned int currcons)
886 if (vc_cons_allocated(currcons)) { 886 if (vc_cons_allocated(currcons)) {
887 struct vc_data *vc = vc_cons[currcons].d; 887 struct vc_data *vc = vc_cons[currcons].d;
888 vc->vc_sw->con_deinit(vc); 888 vc->vc_sw->con_deinit(vc);
889 module_put(vc->vc_sw->owner);
889 if (vc->vc_kmalloced) 890 if (vc->vc_kmalloced)
890 kfree(vc->vc_screenbuf); 891 kfree(vc->vc_screenbuf);
891 if (currcons >= MIN_NR_CONSOLES) 892 if (currcons >= MIN_NR_CONSOLES)
diff --git a/drivers/char/watchdog/acquirewdt.c b/drivers/char/watchdog/acquirewdt.c
index 7289f4af93d0..c77fe3cf2852 100644
--- a/drivers/char/watchdog/acquirewdt.c
+++ b/drivers/char/watchdog/acquirewdt.c
@@ -231,7 +231,7 @@ static int acq_notify_sys(struct notifier_block *this, unsigned long code,
231 * Kernel Interfaces 231 * Kernel Interfaces
232 */ 232 */
233 233
234static struct file_operations acq_fops = { 234static const struct file_operations acq_fops = {
235 .owner = THIS_MODULE, 235 .owner = THIS_MODULE,
236 .llseek = no_llseek, 236 .llseek = no_llseek,
237 .write = acq_write, 237 .write = acq_write,
diff --git a/drivers/char/watchdog/advantechwdt.c b/drivers/char/watchdog/advantechwdt.c
index 194a3fd36b91..8069be445edc 100644
--- a/drivers/char/watchdog/advantechwdt.c
+++ b/drivers/char/watchdog/advantechwdt.c
@@ -227,7 +227,7 @@ advwdt_notify_sys(struct notifier_block *this, unsigned long code,
227 * Kernel Interfaces 227 * Kernel Interfaces
228 */ 228 */
229 229
230static struct file_operations advwdt_fops = { 230static const struct file_operations advwdt_fops = {
231 .owner = THIS_MODULE, 231 .owner = THIS_MODULE,
232 .llseek = no_llseek, 232 .llseek = no_llseek,
233 .write = advwdt_write, 233 .write = advwdt_write,
diff --git a/drivers/char/watchdog/alim1535_wdt.c b/drivers/char/watchdog/alim1535_wdt.c
index 8338ca300e2e..c5c94e4c9495 100644
--- a/drivers/char/watchdog/alim1535_wdt.c
+++ b/drivers/char/watchdog/alim1535_wdt.c
@@ -362,7 +362,7 @@ static int __init ali_find_watchdog(void)
362 * Kernel Interfaces 362 * Kernel Interfaces
363 */ 363 */
364 364
365static struct file_operations ali_fops = { 365static const struct file_operations ali_fops = {
366 .owner = THIS_MODULE, 366 .owner = THIS_MODULE,
367 .llseek = no_llseek, 367 .llseek = no_llseek,
368 .write = ali_write, 368 .write = ali_write,
diff --git a/drivers/char/watchdog/alim7101_wdt.c b/drivers/char/watchdog/alim7101_wdt.c
index c05ac188a4d7..ffd7684f999b 100644
--- a/drivers/char/watchdog/alim7101_wdt.c
+++ b/drivers/char/watchdog/alim7101_wdt.c
@@ -281,7 +281,7 @@ static int fop_ioctl(struct inode *inode, struct file *file, unsigned int cmd, u
281 } 281 }
282} 282}
283 283
284static struct file_operations wdt_fops = { 284static const struct file_operations wdt_fops = {
285 .owner= THIS_MODULE, 285 .owner= THIS_MODULE,
286 .llseek= no_llseek, 286 .llseek= no_llseek,
287 .write= fop_write, 287 .write= fop_write,
diff --git a/drivers/char/watchdog/at91_wdt.c b/drivers/char/watchdog/at91_wdt.c
index f61dedc3c96c..cc266715ea32 100644
--- a/drivers/char/watchdog/at91_wdt.c
+++ b/drivers/char/watchdog/at91_wdt.c
@@ -183,7 +183,7 @@ static ssize_t at91_wdt_write(struct file *file, const char *data, size_t len, l
183 183
184/* ......................................................................... */ 184/* ......................................................................... */
185 185
186static struct file_operations at91wdt_fops = { 186static const struct file_operations at91wdt_fops = {
187 .owner = THIS_MODULE, 187 .owner = THIS_MODULE,
188 .llseek = no_llseek, 188 .llseek = no_llseek,
189 .ioctl = at91_wdt_ioctl, 189 .ioctl = at91_wdt_ioctl,
diff --git a/drivers/char/watchdog/booke_wdt.c b/drivers/char/watchdog/booke_wdt.c
index 537f5c6729bf..e3cefc538b40 100644
--- a/drivers/char/watchdog/booke_wdt.c
+++ b/drivers/char/watchdog/booke_wdt.c
@@ -145,7 +145,7 @@ static int booke_wdt_open (struct inode *inode, struct file *file)
145 return 0; 145 return 0;
146} 146}
147 147
148static struct file_operations booke_wdt_fops = { 148static const struct file_operations booke_wdt_fops = {
149 .owner = THIS_MODULE, 149 .owner = THIS_MODULE,
150 .llseek = no_llseek, 150 .llseek = no_llseek,
151 .write = booke_wdt_write, 151 .write = booke_wdt_write,
diff --git a/drivers/char/watchdog/cpu5wdt.c b/drivers/char/watchdog/cpu5wdt.c
index 3e8410b5a65e..04c7e49918db 100644
--- a/drivers/char/watchdog/cpu5wdt.c
+++ b/drivers/char/watchdog/cpu5wdt.c
@@ -198,7 +198,7 @@ static ssize_t cpu5wdt_write(struct file *file, const char __user *buf, size_t c
198 return count; 198 return count;
199} 199}
200 200
201static struct file_operations cpu5wdt_fops = { 201static const struct file_operations cpu5wdt_fops = {
202 .owner = THIS_MODULE, 202 .owner = THIS_MODULE,
203 .llseek = no_llseek, 203 .llseek = no_llseek,
204 .ioctl = cpu5wdt_ioctl, 204 .ioctl = cpu5wdt_ioctl,
diff --git a/drivers/char/watchdog/ep93xx_wdt.c b/drivers/char/watchdog/ep93xx_wdt.c
index 9021dbb78299..77c8a955ae9e 100644
--- a/drivers/char/watchdog/ep93xx_wdt.c
+++ b/drivers/char/watchdog/ep93xx_wdt.c
@@ -187,7 +187,7 @@ static int ep93xx_wdt_release(struct inode *inode, struct file *file)
187 return 0; 187 return 0;
188} 188}
189 189
190static struct file_operations ep93xx_wdt_fops = { 190static const struct file_operations ep93xx_wdt_fops = {
191 .owner = THIS_MODULE, 191 .owner = THIS_MODULE,
192 .write = ep93xx_wdt_write, 192 .write = ep93xx_wdt_write,
193 .ioctl = ep93xx_wdt_ioctl, 193 .ioctl = ep93xx_wdt_ioctl,
diff --git a/drivers/char/watchdog/eurotechwdt.c b/drivers/char/watchdog/eurotechwdt.c
index ea670de4fab7..62dbccb2f6df 100644
--- a/drivers/char/watchdog/eurotechwdt.c
+++ b/drivers/char/watchdog/eurotechwdt.c
@@ -356,7 +356,7 @@ static int eurwdt_notify_sys(struct notifier_block *this, unsigned long code,
356 */ 356 */
357 357
358 358
359static struct file_operations eurwdt_fops = { 359static const struct file_operations eurwdt_fops = {
360 .owner = THIS_MODULE, 360 .owner = THIS_MODULE,
361 .llseek = no_llseek, 361 .llseek = no_llseek,
362 .write = eurwdt_write, 362 .write = eurwdt_write,
diff --git a/drivers/char/watchdog/i6300esb.c b/drivers/char/watchdog/i6300esb.c
index 93785f13242e..870539eabbf3 100644
--- a/drivers/char/watchdog/i6300esb.c
+++ b/drivers/char/watchdog/i6300esb.c
@@ -337,7 +337,7 @@ static int esb_notify_sys (struct notifier_block *this, unsigned long code, void
337 * Kernel Interfaces 337 * Kernel Interfaces
338 */ 338 */
339 339
340static struct file_operations esb_fops = { 340static const struct file_operations esb_fops = {
341 .owner = THIS_MODULE, 341 .owner = THIS_MODULE,
342 .llseek = no_llseek, 342 .llseek = no_llseek,
343 .write = esb_write, 343 .write = esb_write,
diff --git a/drivers/char/watchdog/i8xx_tco.c b/drivers/char/watchdog/i8xx_tco.c
index bfbdbbf3c2f2..8385dd36eefe 100644
--- a/drivers/char/watchdog/i8xx_tco.c
+++ b/drivers/char/watchdog/i8xx_tco.c
@@ -378,7 +378,7 @@ static int i8xx_tco_notify_sys (struct notifier_block *this, unsigned long code,
378 * Kernel Interfaces 378 * Kernel Interfaces
379 */ 379 */
380 380
381static struct file_operations i8xx_tco_fops = { 381static const struct file_operations i8xx_tco_fops = {
382 .owner = THIS_MODULE, 382 .owner = THIS_MODULE,
383 .llseek = no_llseek, 383 .llseek = no_llseek,
384 .write = i8xx_tco_write, 384 .write = i8xx_tco_write,
diff --git a/drivers/char/watchdog/ib700wdt.c b/drivers/char/watchdog/ib700wdt.c
index a2e53c715b36..fd95f7327798 100644
--- a/drivers/char/watchdog/ib700wdt.c
+++ b/drivers/char/watchdog/ib700wdt.c
@@ -255,7 +255,7 @@ ibwdt_notify_sys(struct notifier_block *this, unsigned long code,
255 * Kernel Interfaces 255 * Kernel Interfaces
256 */ 256 */
257 257
258static struct file_operations ibwdt_fops = { 258static const struct file_operations ibwdt_fops = {
259 .owner = THIS_MODULE, 259 .owner = THIS_MODULE,
260 .llseek = no_llseek, 260 .llseek = no_llseek,
261 .write = ibwdt_write, 261 .write = ibwdt_write,
diff --git a/drivers/char/watchdog/ibmasr.c b/drivers/char/watchdog/ibmasr.c
index b0741cbdc139..26ceee7a4df0 100644
--- a/drivers/char/watchdog/ibmasr.c
+++ b/drivers/char/watchdog/ibmasr.c
@@ -322,7 +322,7 @@ static int asr_release(struct inode *inode, struct file *file)
322 return 0; 322 return 0;
323} 323}
324 324
325static struct file_operations asr_fops = { 325static const struct file_operations asr_fops = {
326 .owner = THIS_MODULE, 326 .owner = THIS_MODULE,
327 .llseek = no_llseek, 327 .llseek = no_llseek,
328 .write = asr_write, 328 .write = asr_write,
diff --git a/drivers/char/watchdog/indydog.c b/drivers/char/watchdog/indydog.c
index d387979b2434..dacc1c20a310 100644
--- a/drivers/char/watchdog/indydog.c
+++ b/drivers/char/watchdog/indydog.c
@@ -154,7 +154,7 @@ static int indydog_notify_sys(struct notifier_block *this, unsigned long code, v
154 return NOTIFY_DONE; 154 return NOTIFY_DONE;
155} 155}
156 156
157static struct file_operations indydog_fops = { 157static const struct file_operations indydog_fops = {
158 .owner = THIS_MODULE, 158 .owner = THIS_MODULE,
159 .llseek = no_llseek, 159 .llseek = no_llseek,
160 .write = indydog_write, 160 .write = indydog_write,
diff --git a/drivers/char/watchdog/ixp2000_wdt.c b/drivers/char/watchdog/ixp2000_wdt.c
index aa29a7d68759..692908819e26 100644
--- a/drivers/char/watchdog/ixp2000_wdt.c
+++ b/drivers/char/watchdog/ixp2000_wdt.c
@@ -168,7 +168,7 @@ ixp2000_wdt_release(struct inode *inode, struct file *file)
168} 168}
169 169
170 170
171static struct file_operations ixp2000_wdt_fops = 171static const struct file_operations ixp2000_wdt_fops =
172{ 172{
173 .owner = THIS_MODULE, 173 .owner = THIS_MODULE,
174 .llseek = no_llseek, 174 .llseek = no_llseek,
diff --git a/drivers/char/watchdog/ixp4xx_wdt.c b/drivers/char/watchdog/ixp4xx_wdt.c
index e6a3fe83fa01..9db5cf2c38c3 100644
--- a/drivers/char/watchdog/ixp4xx_wdt.c
+++ b/drivers/char/watchdog/ixp4xx_wdt.c
@@ -162,7 +162,7 @@ ixp4xx_wdt_release(struct inode *inode, struct file *file)
162} 162}
163 163
164 164
165static struct file_operations ixp4xx_wdt_fops = 165static const struct file_operations ixp4xx_wdt_fops =
166{ 166{
167 .owner = THIS_MODULE, 167 .owner = THIS_MODULE,
168 .llseek = no_llseek, 168 .llseek = no_llseek,
diff --git a/drivers/char/watchdog/machzwd.c b/drivers/char/watchdog/machzwd.c
index b67b4878ae0f..23734e07fb22 100644
--- a/drivers/char/watchdog/machzwd.c
+++ b/drivers/char/watchdog/machzwd.c
@@ -388,7 +388,7 @@ static int zf_notify_sys(struct notifier_block *this, unsigned long code,
388 388
389 389
390 390
391static struct file_operations zf_fops = { 391static const struct file_operations zf_fops = {
392 .owner = THIS_MODULE, 392 .owner = THIS_MODULE,
393 .llseek = no_llseek, 393 .llseek = no_llseek,
394 .write = zf_write, 394 .write = zf_write,
diff --git a/drivers/char/watchdog/mixcomwd.c b/drivers/char/watchdog/mixcomwd.c
index 433c27f98159..ae943324d251 100644
--- a/drivers/char/watchdog/mixcomwd.c
+++ b/drivers/char/watchdog/mixcomwd.c
@@ -190,7 +190,7 @@ static int mixcomwd_ioctl(struct inode *inode, struct file *file,
190 return 0; 190 return 0;
191} 191}
192 192
193static struct file_operations mixcomwd_fops= 193static const struct file_operations mixcomwd_fops=
194{ 194{
195 .owner = THIS_MODULE, 195 .owner = THIS_MODULE,
196 .llseek = no_llseek, 196 .llseek = no_llseek,
diff --git a/drivers/char/watchdog/mpc83xx_wdt.c b/drivers/char/watchdog/mpc83xx_wdt.c
index dac1381af364..a480903ee1a5 100644
--- a/drivers/char/watchdog/mpc83xx_wdt.c
+++ b/drivers/char/watchdog/mpc83xx_wdt.c
@@ -129,7 +129,7 @@ static int mpc83xx_wdt_ioctl(struct inode *inode, struct file *file,
129 } 129 }
130} 130}
131 131
132static struct file_operations mpc83xx_wdt_fops = { 132static const struct file_operations mpc83xx_wdt_fops = {
133 .owner = THIS_MODULE, 133 .owner = THIS_MODULE,
134 .llseek = no_llseek, 134 .llseek = no_llseek,
135 .write = mpc83xx_wdt_write, 135 .write = mpc83xx_wdt_write,
diff --git a/drivers/char/watchdog/mpc8xx_wdt.c b/drivers/char/watchdog/mpc8xx_wdt.c
index 11f0ccd4c4d4..35dd9e6e1140 100644
--- a/drivers/char/watchdog/mpc8xx_wdt.c
+++ b/drivers/char/watchdog/mpc8xx_wdt.c
@@ -132,7 +132,7 @@ static int mpc8xx_wdt_ioctl(struct inode *inode, struct file *file,
132 return 0; 132 return 0;
133} 133}
134 134
135static struct file_operations mpc8xx_wdt_fops = { 135static const struct file_operations mpc8xx_wdt_fops = {
136 .owner = THIS_MODULE, 136 .owner = THIS_MODULE,
137 .llseek = no_llseek, 137 .llseek = no_llseek,
138 .write = mpc8xx_wdt_write, 138 .write = mpc8xx_wdt_write,
diff --git a/drivers/char/watchdog/mpcore_wdt.c b/drivers/char/watchdog/mpcore_wdt.c
index c2d492c852fc..54b3c56ead0d 100644
--- a/drivers/char/watchdog/mpcore_wdt.c
+++ b/drivers/char/watchdog/mpcore_wdt.c
@@ -297,7 +297,7 @@ static void mpcore_wdt_shutdown(struct platform_device *dev)
297/* 297/*
298 * Kernel Interfaces 298 * Kernel Interfaces
299 */ 299 */
300static struct file_operations mpcore_wdt_fops = { 300static const struct file_operations mpcore_wdt_fops = {
301 .owner = THIS_MODULE, 301 .owner = THIS_MODULE,
302 .llseek = no_llseek, 302 .llseek = no_llseek,
303 .write = mpcore_wdt_write, 303 .write = mpcore_wdt_write,
diff --git a/drivers/char/watchdog/mv64x60_wdt.c b/drivers/char/watchdog/mv64x60_wdt.c
index 20a6cbb0fbb8..5c8fab345b40 100644
--- a/drivers/char/watchdog/mv64x60_wdt.c
+++ b/drivers/char/watchdog/mv64x60_wdt.c
@@ -166,7 +166,7 @@ static int mv64x60_wdt_ioctl(struct inode *inode, struct file *file,
166 return 0; 166 return 0;
167} 167}
168 168
169static struct file_operations mv64x60_wdt_fops = { 169static const struct file_operations mv64x60_wdt_fops = {
170 .owner = THIS_MODULE, 170 .owner = THIS_MODULE,
171 .llseek = no_llseek, 171 .llseek = no_llseek,
172 .write = mv64x60_wdt_write, 172 .write = mv64x60_wdt_write,
diff --git a/drivers/char/watchdog/pcwd.c b/drivers/char/watchdog/pcwd.c
index 6d44ca68312d..cd7d1b6a5d9f 100644
--- a/drivers/char/watchdog/pcwd.c
+++ b/drivers/char/watchdog/pcwd.c
@@ -740,7 +740,7 @@ static int pcwd_notify_sys(struct notifier_block *this, unsigned long code, void
740 * Kernel Interfaces 740 * Kernel Interfaces
741 */ 741 */
742 742
743static struct file_operations pcwd_fops = { 743static const struct file_operations pcwd_fops = {
744 .owner = THIS_MODULE, 744 .owner = THIS_MODULE,
745 .llseek = no_llseek, 745 .llseek = no_llseek,
746 .write = pcwd_write, 746 .write = pcwd_write,
@@ -755,7 +755,7 @@ static struct miscdevice pcwd_miscdev = {
755 .fops = &pcwd_fops, 755 .fops = &pcwd_fops,
756}; 756};
757 757
758static struct file_operations pcwd_temp_fops = { 758static const struct file_operations pcwd_temp_fops = {
759 .owner = THIS_MODULE, 759 .owner = THIS_MODULE,
760 .llseek = no_llseek, 760 .llseek = no_llseek,
761 .read = pcwd_temp_read, 761 .read = pcwd_temp_read,
diff --git a/drivers/char/watchdog/pcwd_pci.c b/drivers/char/watchdog/pcwd_pci.c
index 1f40ecefbf72..c7cfd6dbfe1b 100644
--- a/drivers/char/watchdog/pcwd_pci.c
+++ b/drivers/char/watchdog/pcwd_pci.c
@@ -625,7 +625,7 @@ static int pcipcwd_notify_sys(struct notifier_block *this, unsigned long code, v
625 * Kernel Interfaces 625 * Kernel Interfaces
626 */ 626 */
627 627
628static struct file_operations pcipcwd_fops = { 628static const struct file_operations pcipcwd_fops = {
629 .owner = THIS_MODULE, 629 .owner = THIS_MODULE,
630 .llseek = no_llseek, 630 .llseek = no_llseek,
631 .write = pcipcwd_write, 631 .write = pcipcwd_write,
@@ -640,7 +640,7 @@ static struct miscdevice pcipcwd_miscdev = {
640 .fops = &pcipcwd_fops, 640 .fops = &pcipcwd_fops,
641}; 641};
642 642
643static struct file_operations pcipcwd_temp_fops = { 643static const struct file_operations pcipcwd_temp_fops = {
644 .owner = THIS_MODULE, 644 .owner = THIS_MODULE,
645 .llseek = no_llseek, 645 .llseek = no_llseek,
646 .read = pcipcwd_temp_read, 646 .read = pcipcwd_temp_read,
diff --git a/drivers/char/watchdog/pcwd_usb.c b/drivers/char/watchdog/pcwd_usb.c
index 92bf8c1a0f0d..b7ae73dcdd08 100644
--- a/drivers/char/watchdog/pcwd_usb.c
+++ b/drivers/char/watchdog/pcwd_usb.c
@@ -523,7 +523,7 @@ static int usb_pcwd_notify_sys(struct notifier_block *this, unsigned long code,
523 * Kernel Interfaces 523 * Kernel Interfaces
524 */ 524 */
525 525
526static struct file_operations usb_pcwd_fops = { 526static const struct file_operations usb_pcwd_fops = {
527 .owner = THIS_MODULE, 527 .owner = THIS_MODULE,
528 .llseek = no_llseek, 528 .llseek = no_llseek,
529 .write = usb_pcwd_write, 529 .write = usb_pcwd_write,
@@ -538,7 +538,7 @@ static struct miscdevice usb_pcwd_miscdev = {
538 .fops = &usb_pcwd_fops, 538 .fops = &usb_pcwd_fops,
539}; 539};
540 540
541static struct file_operations usb_pcwd_temperature_fops = { 541static const struct file_operations usb_pcwd_temperature_fops = {
542 .owner = THIS_MODULE, 542 .owner = THIS_MODULE,
543 .llseek = no_llseek, 543 .llseek = no_llseek,
544 .read = usb_pcwd_temperature_read, 544 .read = usb_pcwd_temperature_read,
diff --git a/drivers/char/watchdog/s3c2410_wdt.c b/drivers/char/watchdog/s3c2410_wdt.c
index f267dad26071..be978e8ed754 100644
--- a/drivers/char/watchdog/s3c2410_wdt.c
+++ b/drivers/char/watchdog/s3c2410_wdt.c
@@ -319,7 +319,7 @@ static int s3c2410wdt_ioctl(struct inode *inode, struct file *file,
319 319
320/* kernel interface */ 320/* kernel interface */
321 321
322static struct file_operations s3c2410wdt_fops = { 322static const struct file_operations s3c2410wdt_fops = {
323 .owner = THIS_MODULE, 323 .owner = THIS_MODULE,
324 .llseek = no_llseek, 324 .llseek = no_llseek,
325 .write = s3c2410wdt_write, 325 .write = s3c2410wdt_write,
diff --git a/drivers/char/watchdog/sa1100_wdt.c b/drivers/char/watchdog/sa1100_wdt.c
index b22e95c5470c..1fc16d995788 100644
--- a/drivers/char/watchdog/sa1100_wdt.c
+++ b/drivers/char/watchdog/sa1100_wdt.c
@@ -135,7 +135,7 @@ static int sa1100dog_ioctl(struct inode *inode, struct file *file,
135 return ret; 135 return ret;
136} 136}
137 137
138static struct file_operations sa1100dog_fops = 138static const struct file_operations sa1100dog_fops =
139{ 139{
140 .owner = THIS_MODULE, 140 .owner = THIS_MODULE,
141 .llseek = no_llseek, 141 .llseek = no_llseek,
diff --git a/drivers/char/watchdog/sbc60xxwdt.c b/drivers/char/watchdog/sbc60xxwdt.c
index ed0bd55fbfc1..4663c2fd53cd 100644
--- a/drivers/char/watchdog/sbc60xxwdt.c
+++ b/drivers/char/watchdog/sbc60xxwdt.c
@@ -282,7 +282,7 @@ static int fop_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
282 } 282 }
283} 283}
284 284
285static struct file_operations wdt_fops = { 285static const struct file_operations wdt_fops = {
286 .owner = THIS_MODULE, 286 .owner = THIS_MODULE,
287 .llseek = no_llseek, 287 .llseek = no_llseek,
288 .write = fop_write, 288 .write = fop_write,
diff --git a/drivers/char/watchdog/sbc8360.c b/drivers/char/watchdog/sbc8360.c
index 6562aa910ace..1035be5b5019 100644
--- a/drivers/char/watchdog/sbc8360.c
+++ b/drivers/char/watchdog/sbc8360.c
@@ -305,7 +305,7 @@ static int sbc8360_notify_sys(struct notifier_block *this, unsigned long code,
305 * Kernel Interfaces 305 * Kernel Interfaces
306 */ 306 */
307 307
308static struct file_operations sbc8360_fops = { 308static const struct file_operations sbc8360_fops = {
309 .owner = THIS_MODULE, 309 .owner = THIS_MODULE,
310 .llseek = no_llseek, 310 .llseek = no_llseek,
311 .write = sbc8360_write, 311 .write = sbc8360_write,
diff --git a/drivers/char/watchdog/sbc_epx_c3.c b/drivers/char/watchdog/sbc_epx_c3.c
index 09867fadc720..bfc475dabe6d 100644
--- a/drivers/char/watchdog/sbc_epx_c3.c
+++ b/drivers/char/watchdog/sbc_epx_c3.c
@@ -154,7 +154,7 @@ static int epx_c3_notify_sys(struct notifier_block *this, unsigned long code,
154 return NOTIFY_DONE; 154 return NOTIFY_DONE;
155} 155}
156 156
157static struct file_operations epx_c3_fops = { 157static const struct file_operations epx_c3_fops = {
158 .owner = THIS_MODULE, 158 .owner = THIS_MODULE,
159 .llseek = no_llseek, 159 .llseek = no_llseek,
160 .write = epx_c3_write, 160 .write = epx_c3_write,
diff --git a/drivers/char/watchdog/sc1200wdt.c b/drivers/char/watchdog/sc1200wdt.c
index 78ef6333c181..7c3cf293a5af 100644
--- a/drivers/char/watchdog/sc1200wdt.c
+++ b/drivers/char/watchdog/sc1200wdt.c
@@ -292,7 +292,7 @@ static struct notifier_block sc1200wdt_notifier =
292 .notifier_call = sc1200wdt_notify_sys, 292 .notifier_call = sc1200wdt_notify_sys,
293}; 293};
294 294
295static struct file_operations sc1200wdt_fops = 295static const struct file_operations sc1200wdt_fops =
296{ 296{
297 .owner = THIS_MODULE, 297 .owner = THIS_MODULE,
298 .llseek = no_llseek, 298 .llseek = no_llseek,
diff --git a/drivers/char/watchdog/sc520_wdt.c b/drivers/char/watchdog/sc520_wdt.c
index 4ee9974ad8cb..2c7c9db71be8 100644
--- a/drivers/char/watchdog/sc520_wdt.c
+++ b/drivers/char/watchdog/sc520_wdt.c
@@ -336,7 +336,7 @@ static int fop_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
336 } 336 }
337} 337}
338 338
339static struct file_operations wdt_fops = { 339static const struct file_operations wdt_fops = {
340 .owner = THIS_MODULE, 340 .owner = THIS_MODULE,
341 .llseek = no_llseek, 341 .llseek = no_llseek,
342 .write = fop_write, 342 .write = fop_write,
diff --git a/drivers/char/watchdog/scx200_wdt.c b/drivers/char/watchdog/scx200_wdt.c
index c0b4754e8de0..c561299a5537 100644
--- a/drivers/char/watchdog/scx200_wdt.c
+++ b/drivers/char/watchdog/scx200_wdt.c
@@ -194,7 +194,7 @@ static int scx200_wdt_ioctl(struct inode *inode, struct file *file,
194 } 194 }
195} 195}
196 196
197static struct file_operations scx200_wdt_fops = { 197static const struct file_operations scx200_wdt_fops = {
198 .owner = THIS_MODULE, 198 .owner = THIS_MODULE,
199 .llseek = no_llseek, 199 .llseek = no_llseek,
200 .write = scx200_wdt_write, 200 .write = scx200_wdt_write,
diff --git a/drivers/char/watchdog/shwdt.c b/drivers/char/watchdog/shwdt.c
index 803701b675c0..1355038f1044 100644
--- a/drivers/char/watchdog/shwdt.c
+++ b/drivers/char/watchdog/shwdt.c
@@ -344,7 +344,7 @@ static int sh_wdt_notify_sys(struct notifier_block *this,
344 return NOTIFY_DONE; 344 return NOTIFY_DONE;
345} 345}
346 346
347static struct file_operations sh_wdt_fops = { 347static const struct file_operations sh_wdt_fops = {
348 .owner = THIS_MODULE, 348 .owner = THIS_MODULE,
349 .llseek = no_llseek, 349 .llseek = no_llseek,
350 .write = sh_wdt_write, 350 .write = sh_wdt_write,
diff --git a/drivers/char/watchdog/softdog.c b/drivers/char/watchdog/softdog.c
index 79ce5c655428..ef8da517545a 100644
--- a/drivers/char/watchdog/softdog.c
+++ b/drivers/char/watchdog/softdog.c
@@ -243,7 +243,7 @@ static int softdog_notify_sys(struct notifier_block *this, unsigned long code,
243 * Kernel Interfaces 243 * Kernel Interfaces
244 */ 244 */
245 245
246static struct file_operations softdog_fops = { 246static const struct file_operations softdog_fops = {
247 .owner = THIS_MODULE, 247 .owner = THIS_MODULE,
248 .llseek = no_llseek, 248 .llseek = no_llseek,
249 .write = softdog_write, 249 .write = softdog_write,
diff --git a/drivers/char/watchdog/w83627hf_wdt.c b/drivers/char/watchdog/w83627hf_wdt.c
index d15ca9a3986f..13f16d41c2fd 100644
--- a/drivers/char/watchdog/w83627hf_wdt.c
+++ b/drivers/char/watchdog/w83627hf_wdt.c
@@ -274,7 +274,7 @@ wdt_notify_sys(struct notifier_block *this, unsigned long code,
274 * Kernel Interfaces 274 * Kernel Interfaces
275 */ 275 */
276 276
277static struct file_operations wdt_fops = { 277static const struct file_operations wdt_fops = {
278 .owner = THIS_MODULE, 278 .owner = THIS_MODULE,
279 .llseek = no_llseek, 279 .llseek = no_llseek,
280 .write = wdt_write, 280 .write = wdt_write,
diff --git a/drivers/char/watchdog/w83877f_wdt.c b/drivers/char/watchdog/w83877f_wdt.c
index 52a8bd0a5988..ccf6c0915945 100644
--- a/drivers/char/watchdog/w83877f_wdt.c
+++ b/drivers/char/watchdog/w83877f_wdt.c
@@ -299,7 +299,7 @@ static int fop_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
299 } 299 }
300} 300}
301 301
302static struct file_operations wdt_fops = { 302static const struct file_operations wdt_fops = {
303 .owner = THIS_MODULE, 303 .owner = THIS_MODULE,
304 .llseek = no_llseek, 304 .llseek = no_llseek,
305 .write = fop_write, 305 .write = fop_write,
diff --git a/drivers/char/watchdog/w83977f_wdt.c b/drivers/char/watchdog/w83977f_wdt.c
index c31849e4c5c2..98f4e17db70a 100644
--- a/drivers/char/watchdog/w83977f_wdt.c
+++ b/drivers/char/watchdog/w83977f_wdt.c
@@ -449,7 +449,7 @@ static int wdt_notify_sys(struct notifier_block *this, unsigned long code,
449 return NOTIFY_DONE; 449 return NOTIFY_DONE;
450} 450}
451 451
452static struct file_operations wdt_fops= 452static const struct file_operations wdt_fops=
453{ 453{
454 .owner = THIS_MODULE, 454 .owner = THIS_MODULE,
455 .llseek = no_llseek, 455 .llseek = no_llseek,
diff --git a/drivers/char/watchdog/wafer5823wdt.c b/drivers/char/watchdog/wafer5823wdt.c
index 7cf6c9bbf486..2bb6a9d6ad28 100644
--- a/drivers/char/watchdog/wafer5823wdt.c
+++ b/drivers/char/watchdog/wafer5823wdt.c
@@ -222,7 +222,7 @@ static int wafwdt_notify_sys(struct notifier_block *this, unsigned long code, vo
222 * Kernel Interfaces 222 * Kernel Interfaces
223 */ 223 */
224 224
225static struct file_operations wafwdt_fops = { 225static const struct file_operations wafwdt_fops = {
226 .owner = THIS_MODULE, 226 .owner = THIS_MODULE,
227 .llseek = no_llseek, 227 .llseek = no_llseek,
228 .write = wafwdt_write, 228 .write = wafwdt_write,
diff --git a/drivers/char/watchdog/wdrtas.c b/drivers/char/watchdog/wdrtas.c
index 3a462c34b92a..5c38cdf41731 100644
--- a/drivers/char/watchdog/wdrtas.c
+++ b/drivers/char/watchdog/wdrtas.c
@@ -520,7 +520,7 @@ wdrtas_reboot(struct notifier_block *this, unsigned long code, void *ptr)
520 520
521/*** initialization stuff */ 521/*** initialization stuff */
522 522
523static struct file_operations wdrtas_fops = { 523static const struct file_operations wdrtas_fops = {
524 .owner = THIS_MODULE, 524 .owner = THIS_MODULE,
525 .llseek = no_llseek, 525 .llseek = no_llseek,
526 .write = wdrtas_write, 526 .write = wdrtas_write,
@@ -535,7 +535,7 @@ static struct miscdevice wdrtas_miscdev = {
535 .fops = &wdrtas_fops, 535 .fops = &wdrtas_fops,
536}; 536};
537 537
538static struct file_operations wdrtas_temp_fops = { 538static const struct file_operations wdrtas_temp_fops = {
539 .owner = THIS_MODULE, 539 .owner = THIS_MODULE,
540 .llseek = no_llseek, 540 .llseek = no_llseek,
541 .read = wdrtas_temp_read, 541 .read = wdrtas_temp_read,
diff --git a/drivers/char/watchdog/wdt.c b/drivers/char/watchdog/wdt.c
index a1d972c8f44c..70be81e39a61 100644
--- a/drivers/char/watchdog/wdt.c
+++ b/drivers/char/watchdog/wdt.c
@@ -494,7 +494,7 @@ static int wdt_notify_sys(struct notifier_block *this, unsigned long code,
494 */ 494 */
495 495
496 496
497static struct file_operations wdt_fops = { 497static const struct file_operations wdt_fops = {
498 .owner = THIS_MODULE, 498 .owner = THIS_MODULE,
499 .llseek = no_llseek, 499 .llseek = no_llseek,
500 .write = wdt_write, 500 .write = wdt_write,
@@ -510,7 +510,7 @@ static struct miscdevice wdt_miscdev = {
510}; 510};
511 511
512#ifdef CONFIG_WDT_501 512#ifdef CONFIG_WDT_501
513static struct file_operations wdt_temp_fops = { 513static const struct file_operations wdt_temp_fops = {
514 .owner = THIS_MODULE, 514 .owner = THIS_MODULE,
515 .llseek = no_llseek, 515 .llseek = no_llseek,
516 .read = wdt_temp_read, 516 .read = wdt_temp_read,
diff --git a/drivers/char/watchdog/wdt285.c b/drivers/char/watchdog/wdt285.c
index 52825a1f1779..6555fb844f23 100644
--- a/drivers/char/watchdog/wdt285.c
+++ b/drivers/char/watchdog/wdt285.c
@@ -178,7 +178,7 @@ watchdog_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
178 return ret; 178 return ret;
179} 179}
180 180
181static struct file_operations watchdog_fops = { 181static const struct file_operations watchdog_fops = {
182 .owner = THIS_MODULE, 182 .owner = THIS_MODULE,
183 .llseek = no_llseek, 183 .llseek = no_llseek,
184 .write = watchdog_write, 184 .write = watchdog_write,
diff --git a/drivers/char/watchdog/wdt977.c b/drivers/char/watchdog/wdt977.c
index 3cde2b9bb763..a0935bc775f8 100644
--- a/drivers/char/watchdog/wdt977.c
+++ b/drivers/char/watchdog/wdt977.c
@@ -418,7 +418,7 @@ static int wdt977_notify_sys(struct notifier_block *this, unsigned long code,
418 return NOTIFY_DONE; 418 return NOTIFY_DONE;
419} 419}
420 420
421static struct file_operations wdt977_fops= 421static const struct file_operations wdt977_fops=
422{ 422{
423 .owner = THIS_MODULE, 423 .owner = THIS_MODULE,
424 .llseek = no_llseek, 424 .llseek = no_llseek,
diff --git a/drivers/char/watchdog/wdt_pci.c b/drivers/char/watchdog/wdt_pci.c
index 7529ecdbabae..5918ca2c9c35 100644
--- a/drivers/char/watchdog/wdt_pci.c
+++ b/drivers/char/watchdog/wdt_pci.c
@@ -543,7 +543,7 @@ static int wdtpci_notify_sys(struct notifier_block *this, unsigned long code,
543 */ 543 */
544 544
545 545
546static struct file_operations wdtpci_fops = { 546static const struct file_operations wdtpci_fops = {
547 .owner = THIS_MODULE, 547 .owner = THIS_MODULE,
548 .llseek = no_llseek, 548 .llseek = no_llseek,
549 .write = wdtpci_write, 549 .write = wdtpci_write,
@@ -559,7 +559,7 @@ static struct miscdevice wdtpci_miscdev = {
559}; 559};
560 560
561#ifdef CONFIG_WDT_501_PCI 561#ifdef CONFIG_WDT_501_PCI
562static struct file_operations wdtpci_temp_fops = { 562static const struct file_operations wdtpci_temp_fops = {
563 .owner = THIS_MODULE, 563 .owner = THIS_MODULE,
564 .llseek = no_llseek, 564 .llseek = no_llseek,
565 .read = wdtpci_temp_read, 565 .read = wdtpci_temp_read,
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 6ca3476d02c7..adbe9f76a505 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -838,7 +838,7 @@ static ide_startstop_t idefloppy_pc_intr (ide_drive_t *drive)
838 "transferred\n", pc->actually_transferred); 838 "transferred\n", pc->actually_transferred);
839 clear_bit(PC_DMA_IN_PROGRESS, &pc->flags); 839 clear_bit(PC_DMA_IN_PROGRESS, &pc->flags);
840 840
841 local_irq_enable(); 841 local_irq_enable_in_hardirq();
842 842
843 if (status.b.check || test_bit(PC_DMA_ERROR, &pc->flags)) { 843 if (status.b.check || test_bit(PC_DMA_ERROR, &pc->flags)) {
844 /* Error detected */ 844 /* Error detected */
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 7dba9992ad30..fb6795236e76 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -693,7 +693,7 @@ static ide_startstop_t drive_cmd_intr (ide_drive_t *drive)
693 u8 stat = hwif->INB(IDE_STATUS_REG); 693 u8 stat = hwif->INB(IDE_STATUS_REG);
694 int retries = 10; 694 int retries = 10;
695 695
696 local_irq_enable(); 696 local_irq_enable_in_hardirq();
697 if ((stat & DRQ_STAT) && args && args[3]) { 697 if ((stat & DRQ_STAT) && args && args[3]) {
698 u8 io_32bit = drive->io_32bit; 698 u8 io_32bit = drive->io_32bit;
699 drive->io_32bit = 0; 699 drive->io_32bit = 0;
@@ -1286,7 +1286,7 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
1286 if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq) 1286 if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq)
1287 disable_irq_nosync(hwif->irq); 1287 disable_irq_nosync(hwif->irq);
1288 spin_unlock(&ide_lock); 1288 spin_unlock(&ide_lock);
1289 local_irq_enable(); 1289 local_irq_enable_in_hardirq();
1290 /* allow other IRQs while we start this request */ 1290 /* allow other IRQs while we start this request */
1291 startstop = start_request(drive, rq); 1291 startstop = start_request(drive, rq);
1292 spin_lock_irq(&ide_lock); 1292 spin_lock_irq(&ide_lock);
@@ -1631,7 +1631,7 @@ irqreturn_t ide_intr (int irq, void *dev_id, struct pt_regs *regs)
1631 spin_unlock(&ide_lock); 1631 spin_unlock(&ide_lock);
1632 1632
1633 if (drive->unmask) 1633 if (drive->unmask)
1634 local_irq_enable(); 1634 local_irq_enable_in_hardirq();
1635 /* service this interrupt, may set handler for next interrupt */ 1635 /* service this interrupt, may set handler for next interrupt */
1636 startstop = handler(drive); 1636 startstop = handler(drive);
1637 spin_lock_irq(&ide_lock); 1637 spin_lock_irq(&ide_lock);
@@ -1705,7 +1705,7 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio
1705{ 1705{
1706 unsigned long flags; 1706 unsigned long flags;
1707 ide_hwgroup_t *hwgroup = HWGROUP(drive); 1707 ide_hwgroup_t *hwgroup = HWGROUP(drive);
1708 DECLARE_COMPLETION(wait); 1708 DECLARE_COMPLETION_ONSTACK(wait);
1709 int where = ELEVATOR_INSERT_BACK, err; 1709 int where = ELEVATOR_INSERT_BACK, err;
1710 int must_wait = (action == ide_wait || action == ide_head_wait); 1710 int must_wait = (action == ide_wait || action == ide_head_wait);
1711 1711
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 04547eb0833f..97a9244312fc 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -222,7 +222,7 @@ ide_startstop_t task_no_data_intr (ide_drive_t *drive)
222 ide_hwif_t *hwif = HWIF(drive); 222 ide_hwif_t *hwif = HWIF(drive);
223 u8 stat; 223 u8 stat;
224 224
225 local_irq_enable(); 225 local_irq_enable_in_hardirq();
226 if (!OK_STAT(stat = hwif->INB(IDE_STATUS_REG),READY_STAT,BAD_STAT)) { 226 if (!OK_STAT(stat = hwif->INB(IDE_STATUS_REG),READY_STAT,BAD_STAT)) {
227 return ide_error(drive, "task_no_data_intr", stat); 227 return ide_error(drive, "task_no_data_intr", stat);
228 /* calls ide_end_drive_cmd */ 228 /* calls ide_end_drive_cmd */
diff --git a/drivers/ieee1394/hosts.c b/drivers/ieee1394/hosts.c
index 2c669287f5bd..4feead4a35c5 100644
--- a/drivers/ieee1394/hosts.c
+++ b/drivers/ieee1394/hosts.c
@@ -107,6 +107,14 @@ static int alloc_hostnum_cb(struct hpsb_host *host, void *__data)
107 */ 107 */
108static DEFINE_MUTEX(host_num_alloc); 108static DEFINE_MUTEX(host_num_alloc);
109 109
110/*
111 * The pending_packet_queue is special in that it's processed
112 * from hardirq context too (such as hpsb_bus_reset()). Hence
113 * split the lock class from the usual networking skb-head
114 * lock class by using a separate key for it:
115 */
116static struct lock_class_key pending_packet_queue_key;
117
110struct hpsb_host *hpsb_alloc_host(struct hpsb_host_driver *drv, size_t extra, 118struct hpsb_host *hpsb_alloc_host(struct hpsb_host_driver *drv, size_t extra,
111 struct device *dev) 119 struct device *dev)
112{ 120{
@@ -128,6 +136,8 @@ struct hpsb_host *hpsb_alloc_host(struct hpsb_host_driver *drv, size_t extra,
128 h->driver = drv; 136 h->driver = drv;
129 137
130 skb_queue_head_init(&h->pending_packet_queue); 138 skb_queue_head_init(&h->pending_packet_queue);
139 lockdep_set_class(&h->pending_packet_queue.lock,
140 &pending_packet_queue_key);
131 INIT_LIST_HEAD(&h->addr_space); 141 INIT_LIST_HEAD(&h->addr_space);
132 142
133 for (i = 2; i < 16; i++) 143 for (i = 2; i < 16; i++)
diff --git a/drivers/input/serio/i8042-sparcio.h b/drivers/input/serio/i8042-sparcio.h
index 7d9fafea9615..54adba2d8ed5 100644
--- a/drivers/input/serio/i8042-sparcio.h
+++ b/drivers/input/serio/i8042-sparcio.h
@@ -88,7 +88,7 @@ static struct of_device_id sparc_i8042_match[] = {
88 }, 88 },
89 {}, 89 {},
90}; 90};
91MODULE_DEVICE_TABLE(of, i8042_match); 91MODULE_DEVICE_TABLE(of, sparc_i8042_match);
92 92
93static struct of_platform_driver sparc_i8042_driver = { 93static struct of_platform_driver sparc_i8042_driver = {
94 .name = "i8042", 94 .name = "i8042",
diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c
index 79c97f94bcbd..61a6f977846f 100644
--- a/drivers/input/serio/libps2.c
+++ b/drivers/input/serio/libps2.c
@@ -177,7 +177,7 @@ int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command)
177 return -1; 177 return -1;
178 } 178 }
179 179
180 mutex_lock(&ps2dev->cmd_mutex); 180 mutex_lock_nested(&ps2dev->cmd_mutex, SINGLE_DEPTH_NESTING);
181 181
182 serio_pause_rx(ps2dev->serio); 182 serio_pause_rx(ps2dev->serio);
183 ps2dev->flags = command == PS2_CMD_GETID ? PS2_FLAG_WAITID : 0; 183 ps2dev->flags = command == PS2_CMD_GETID ? PS2_FLAG_WAITID : 0;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 2fe32c261922..e4e161372a3e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1404,7 +1404,7 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev)
1404 struct block_device *bdev; 1404 struct block_device *bdev;
1405 char b[BDEVNAME_SIZE]; 1405 char b[BDEVNAME_SIZE];
1406 1406
1407 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); 1407 bdev = open_partition_by_devnum(dev, FMODE_READ|FMODE_WRITE);
1408 if (IS_ERR(bdev)) { 1408 if (IS_ERR(bdev)) {
1409 printk(KERN_ERR "md: could not open %s.\n", 1409 printk(KERN_ERR "md: could not open %s.\n",
1410 __bdevname(dev, b)); 1410 __bdevname(dev, b));
@@ -1414,7 +1414,7 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev)
1414 if (err) { 1414 if (err) {
1415 printk(KERN_ERR "md: could not bd_claim %s.\n", 1415 printk(KERN_ERR "md: could not bd_claim %s.\n",
1416 bdevname(bdev, b)); 1416 bdevname(bdev, b));
1417 blkdev_put(bdev); 1417 blkdev_put_partition(bdev);
1418 return err; 1418 return err;
1419 } 1419 }
1420 rdev->bdev = bdev; 1420 rdev->bdev = bdev;
@@ -1428,7 +1428,7 @@ static void unlock_rdev(mdk_rdev_t *rdev)
1428 if (!bdev) 1428 if (!bdev)
1429 MD_BUG(); 1429 MD_BUG();
1430 bd_release(bdev); 1430 bd_release(bdev);
1431 blkdev_put(bdev); 1431 blkdev_put_partition(bdev);
1432} 1432}
1433 1433
1434void md_autodetect_dev(dev_t dev); 1434void md_autodetect_dev(dev_t dev);
diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c
index 247ff2f23ac9..33525bdf2ab6 100644
--- a/drivers/mmc/mmc.c
+++ b/drivers/mmc/mmc.c
@@ -128,7 +128,7 @@ static void mmc_wait_done(struct mmc_request *mrq)
128 128
129int mmc_wait_for_req(struct mmc_host *host, struct mmc_request *mrq) 129int mmc_wait_for_req(struct mmc_host *host, struct mmc_request *mrq)
130{ 130{
131 DECLARE_COMPLETION(complete); 131 DECLARE_COMPLETION_ONSTACK(complete);
132 132
133 mrq->done_data = &complete; 133 mrq->done_data = &complete;
134 mrq->done = mmc_wait_done; 134 mrq->done = mmc_wait_done;
diff --git a/drivers/mmc/sdhci.c b/drivers/mmc/sdhci.c
index 74134699ccee..893319108ba4 100644
--- a/drivers/mmc/sdhci.c
+++ b/drivers/mmc/sdhci.c
@@ -32,9 +32,39 @@ static unsigned int debug_nodma = 0;
32static unsigned int debug_forcedma = 0; 32static unsigned int debug_forcedma = 0;
33static unsigned int debug_quirks = 0; 33static unsigned int debug_quirks = 0;
34 34
35#define SDHCI_QUIRK_CLOCK_BEFORE_RESET (1<<0)
36#define SDHCI_QUIRK_FORCE_DMA (1<<1)
37
35static const struct pci_device_id pci_ids[] __devinitdata = { 38static const struct pci_device_id pci_ids[] __devinitdata = {
36 /* handle any SD host controller */ 39 {
37 {PCI_DEVICE_CLASS((PCI_CLASS_SYSTEM_SDHCI << 8), 0xFFFF00)}, 40 .vendor = PCI_VENDOR_ID_RICOH,
41 .device = PCI_DEVICE_ID_RICOH_R5C822,
42 .subvendor = PCI_VENDOR_ID_IBM,
43 .subdevice = PCI_ANY_ID,
44 .driver_data = SDHCI_QUIRK_CLOCK_BEFORE_RESET |
45 SDHCI_QUIRK_FORCE_DMA,
46 },
47
48 {
49 .vendor = PCI_VENDOR_ID_RICOH,
50 .device = PCI_DEVICE_ID_RICOH_R5C822,
51 .subvendor = PCI_ANY_ID,
52 .subdevice = PCI_ANY_ID,
53 .driver_data = SDHCI_QUIRK_FORCE_DMA,
54 },
55
56 {
57 .vendor = PCI_VENDOR_ID_TI,
58 .device = PCI_DEVICE_ID_TI_XX21_XX11_SD,
59 .subvendor = PCI_ANY_ID,
60 .subdevice = PCI_ANY_ID,
61 .driver_data = SDHCI_QUIRK_FORCE_DMA,
62 },
63
64 { /* Generic SD host controller */
65 PCI_DEVICE_CLASS((PCI_CLASS_SYSTEM_SDHCI << 8), 0xFFFF00)
66 },
67
38 { /* end: all zeroes */ }, 68 { /* end: all zeroes */ },
39}; 69};
40 70
@@ -440,9 +470,7 @@ static void sdhci_finish_data(struct sdhci_host *host)
440 "though there were blocks left. Please report this " 470 "though there were blocks left. Please report this "
441 "to " BUGMAIL ".\n", mmc_hostname(host->mmc)); 471 "to " BUGMAIL ".\n", mmc_hostname(host->mmc));
442 data->error = MMC_ERR_FAILED; 472 data->error = MMC_ERR_FAILED;
443 } 473 } else if (host->size != 0) {
444
445 if (host->size != 0) {
446 printk(KERN_ERR "%s: %d bytes were left untransferred. " 474 printk(KERN_ERR "%s: %d bytes were left untransferred. "
447 "Please report this to " BUGMAIL ".\n", 475 "Please report this to " BUGMAIL ".\n",
448 mmc_hostname(host->mmc), host->size); 476 mmc_hostname(host->mmc), host->size);
@@ -808,6 +836,19 @@ static void sdhci_tasklet_finish(unsigned long param)
808 if ((mrq->cmd->error != MMC_ERR_NONE) || 836 if ((mrq->cmd->error != MMC_ERR_NONE) ||
809 (mrq->data && ((mrq->data->error != MMC_ERR_NONE) || 837 (mrq->data && ((mrq->data->error != MMC_ERR_NONE) ||
810 (mrq->data->stop && (mrq->data->stop->error != MMC_ERR_NONE))))) { 838 (mrq->data->stop && (mrq->data->stop->error != MMC_ERR_NONE))))) {
839
840 /* Some controllers need this kick or reset won't work here */
841 if (host->chip->quirks & SDHCI_QUIRK_CLOCK_BEFORE_RESET) {
842 unsigned int clock;
843
844 /* This is to force an update */
845 clock = host->clock;
846 host->clock = 0;
847 sdhci_set_clock(host, clock);
848 }
849
850 /* Spec says we should do both at the same time, but Ricoh
851 controllers do not like that. */
811 sdhci_reset(host, SDHCI_RESET_CMD); 852 sdhci_reset(host, SDHCI_RESET_CMD);
812 sdhci_reset(host, SDHCI_RESET_DATA); 853 sdhci_reset(host, SDHCI_RESET_DATA);
813 } 854 }
@@ -1165,7 +1206,9 @@ static int __devinit sdhci_probe_slot(struct pci_dev *pdev, int slot)
1165 else if (debug_forcedma) { 1206 else if (debug_forcedma) {
1166 DBG("DMA forced on\n"); 1207 DBG("DMA forced on\n");
1167 host->flags |= SDHCI_USE_DMA; 1208 host->flags |= SDHCI_USE_DMA;
1168 } else if ((pdev->class & 0x0000FF) != PCI_SDHCI_IFDMA) 1209 } else if (chip->quirks & SDHCI_QUIRK_FORCE_DMA)
1210 host->flags |= SDHCI_USE_DMA;
1211 else if ((pdev->class & 0x0000FF) != PCI_SDHCI_IFDMA)
1169 DBG("Controller doesn't have DMA interface\n"); 1212 DBG("Controller doesn't have DMA interface\n");
1170 else if (!(caps & SDHCI_CAN_DO_DMA)) 1213 else if (!(caps & SDHCI_CAN_DO_DMA))
1171 DBG("Controller doesn't have DMA capability\n"); 1214 DBG("Controller doesn't have DMA capability\n");
diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c
index 8ab03b4a885e..2819de79442c 100644
--- a/drivers/net/3c59x.c
+++ b/drivers/net/3c59x.c
@@ -1897,7 +1897,7 @@ vortex_timer(unsigned long data)
1897 printk(KERN_DEBUG "dev->watchdog_timeo=%d\n", dev->watchdog_timeo); 1897 printk(KERN_DEBUG "dev->watchdog_timeo=%d\n", dev->watchdog_timeo);
1898 } 1898 }
1899 1899
1900 disable_irq(dev->irq); 1900 disable_irq_lockdep(dev->irq);
1901 old_window = ioread16(ioaddr + EL3_CMD) >> 13; 1901 old_window = ioread16(ioaddr + EL3_CMD) >> 13;
1902 EL3WINDOW(4); 1902 EL3WINDOW(4);
1903 media_status = ioread16(ioaddr + Wn4_Media); 1903 media_status = ioread16(ioaddr + Wn4_Media);
@@ -1978,7 +1978,7 @@ leave_media_alone:
1978 dev->name, media_tbl[dev->if_port].name); 1978 dev->name, media_tbl[dev->if_port].name);
1979 1979
1980 EL3WINDOW(old_window); 1980 EL3WINDOW(old_window);
1981 enable_irq(dev->irq); 1981 enable_irq_lockdep(dev->irq);
1982 mod_timer(&vp->timer, RUN_AT(next_tick)); 1982 mod_timer(&vp->timer, RUN_AT(next_tick));
1983 if (vp->deferred) 1983 if (vp->deferred)
1984 iowrite16(FakeIntr, ioaddr + EL3_CMD); 1984 iowrite16(FakeIntr, ioaddr + EL3_CMD);
diff --git a/drivers/net/8390.c b/drivers/net/8390.c
index 86be96af9c8f..d2935ae39814 100644
--- a/drivers/net/8390.c
+++ b/drivers/net/8390.c
@@ -249,7 +249,7 @@ void ei_tx_timeout(struct net_device *dev)
249 249
250 /* Ugly but a reset can be slow, yet must be protected */ 250 /* Ugly but a reset can be slow, yet must be protected */
251 251
252 disable_irq_nosync(dev->irq); 252 disable_irq_nosync_lockdep(dev->irq);
253 spin_lock(&ei_local->page_lock); 253 spin_lock(&ei_local->page_lock);
254 254
255 /* Try to restart the card. Perhaps the user has fixed something. */ 255 /* Try to restart the card. Perhaps the user has fixed something. */
@@ -257,7 +257,7 @@ void ei_tx_timeout(struct net_device *dev)
257 NS8390_init(dev, 1); 257 NS8390_init(dev, 1);
258 258
259 spin_unlock(&ei_local->page_lock); 259 spin_unlock(&ei_local->page_lock);
260 enable_irq(dev->irq); 260 enable_irq_lockdep(dev->irq);
261 netif_wake_queue(dev); 261 netif_wake_queue(dev);
262} 262}
263 263
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index 3c90003f4230..037d870712ff 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -2735,21 +2735,21 @@ static void nv_do_nic_poll(unsigned long data)
2735 2735
2736 if (!using_multi_irqs(dev)) { 2736 if (!using_multi_irqs(dev)) {
2737 if (np->msi_flags & NV_MSI_X_ENABLED) 2737 if (np->msi_flags & NV_MSI_X_ENABLED)
2738 disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector); 2738 disable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
2739 else 2739 else
2740 disable_irq(dev->irq); 2740 disable_irq_lockdep(dev->irq);
2741 mask = np->irqmask; 2741 mask = np->irqmask;
2742 } else { 2742 } else {
2743 if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) { 2743 if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) {
2744 disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector); 2744 disable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
2745 mask |= NVREG_IRQ_RX_ALL; 2745 mask |= NVREG_IRQ_RX_ALL;
2746 } 2746 }
2747 if (np->nic_poll_irq & NVREG_IRQ_TX_ALL) { 2747 if (np->nic_poll_irq & NVREG_IRQ_TX_ALL) {
2748 disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector); 2748 disable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector);
2749 mask |= NVREG_IRQ_TX_ALL; 2749 mask |= NVREG_IRQ_TX_ALL;
2750 } 2750 }
2751 if (np->nic_poll_irq & NVREG_IRQ_OTHER) { 2751 if (np->nic_poll_irq & NVREG_IRQ_OTHER) {
2752 disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector); 2752 disable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector);
2753 mask |= NVREG_IRQ_OTHER; 2753 mask |= NVREG_IRQ_OTHER;
2754 } 2754 }
2755 } 2755 }
@@ -2761,23 +2761,23 @@ static void nv_do_nic_poll(unsigned long data)
2761 pci_push(base); 2761 pci_push(base);
2762 2762
2763 if (!using_multi_irqs(dev)) { 2763 if (!using_multi_irqs(dev)) {
2764 nv_nic_irq((int) 0, (void *) data, (struct pt_regs *) NULL); 2764 nv_nic_irq(0, dev, NULL);
2765 if (np->msi_flags & NV_MSI_X_ENABLED) 2765 if (np->msi_flags & NV_MSI_X_ENABLED)
2766 enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector); 2766 enable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
2767 else 2767 else
2768 enable_irq(dev->irq); 2768 enable_irq_lockdep(dev->irq);
2769 } else { 2769 } else {
2770 if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) { 2770 if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) {
2771 nv_nic_irq_rx((int) 0, (void *) data, (struct pt_regs *) NULL); 2771 nv_nic_irq_rx(0, dev, NULL);
2772 enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector); 2772 enable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
2773 } 2773 }
2774 if (np->nic_poll_irq & NVREG_IRQ_TX_ALL) { 2774 if (np->nic_poll_irq & NVREG_IRQ_TX_ALL) {
2775 nv_nic_irq_tx((int) 0, (void *) data, (struct pt_regs *) NULL); 2775 nv_nic_irq_tx(0, dev, NULL);
2776 enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector); 2776 enable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector);
2777 } 2777 }
2778 if (np->nic_poll_irq & NVREG_IRQ_OTHER) { 2778 if (np->nic_poll_irq & NVREG_IRQ_OTHER) {
2779 nv_nic_irq_other((int) 0, (void *) data, (struct pt_regs *) NULL); 2779 nv_nic_irq_other(0, dev, NULL);
2780 enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector); 2780 enable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector);
2781 } 2781 }
2782 } 2782 }
2783} 2783}
diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c
index b764cfda6e84..dafaa5ff5aa6 100644
--- a/drivers/net/wireless/hostap/hostap_hw.c
+++ b/drivers/net/wireless/hostap/hostap_hw.c
@@ -3095,6 +3095,14 @@ static void prism2_clear_set_tim_queue(local_info_t *local)
3095} 3095}
3096 3096
3097 3097
3098/*
3099 * HostAP uses two layers of net devices, where the inner
3100 * layer gets called all the time from the outer layer.
3101 * This is a natural nesting, which needs a split lock type.
3102 */
3103static struct lock_class_key hostap_netdev_xmit_lock_key;
3104
3105
3098static struct net_device * 3106static struct net_device *
3099prism2_init_local_data(struct prism2_helper_functions *funcs, int card_idx, 3107prism2_init_local_data(struct prism2_helper_functions *funcs, int card_idx,
3100 struct device *sdev) 3108 struct device *sdev)
@@ -3259,6 +3267,8 @@ while (0)
3259 SET_NETDEV_DEV(dev, sdev); 3267 SET_NETDEV_DEV(dev, sdev);
3260 if (ret >= 0) 3268 if (ret >= 0)
3261 ret = register_netdevice(dev); 3269 ret = register_netdevice(dev);
3270
3271 lockdep_set_class(&dev->_xmit_lock, &hostap_netdev_xmit_lock_key);
3262 rtnl_unlock(); 3272 rtnl_unlock();
3263 if (ret < 0) { 3273 if (ret < 0) {
3264 printk(KERN_WARNING "%s: register netdevice failed!\n", 3274 printk(KERN_WARNING "%s: register netdevice failed!\n",
diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c
index 3a4a644c2686..212268881857 100644
--- a/drivers/pnp/pnpacpi/rsparser.c
+++ b/drivers/pnp/pnpacpi/rsparser.c
@@ -74,7 +74,7 @@ static void decode_irq_flags(int flag, int *triggering, int *polarity)
74 74
75static void 75static void
76pnpacpi_parse_allocated_irqresource(struct pnp_resource_table *res, u32 gsi, 76pnpacpi_parse_allocated_irqresource(struct pnp_resource_table *res, u32 gsi,
77 int triggering, int polarity) 77 int triggering, int polarity, int shareable)
78{ 78{
79 int i = 0; 79 int i = 0;
80 int irq; 80 int irq;
@@ -95,6 +95,9 @@ pnpacpi_parse_allocated_irqresource(struct pnp_resource_table *res, u32 gsi,
95 return; 95 return;
96 } 96 }
97 97
98 if (shareable)
99 res->irq_resource[i].flags |= IORESOURCE_IRQ_SHAREABLE;
100
98 res->irq_resource[i].start = irq; 101 res->irq_resource[i].start = irq;
99 res->irq_resource[i].end = irq; 102 res->irq_resource[i].end = irq;
100 pcibios_penalize_isa_irq(irq, 1); 103 pcibios_penalize_isa_irq(irq, 1);
@@ -194,7 +197,8 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
194 pnpacpi_parse_allocated_irqresource(res_table, 197 pnpacpi_parse_allocated_irqresource(res_table,
195 res->data.irq.interrupts[i], 198 res->data.irq.interrupts[i],
196 res->data.irq.triggering, 199 res->data.irq.triggering,
197 res->data.irq.polarity); 200 res->data.irq.polarity,
201 res->data.irq.sharable);
198 } 202 }
199 break; 203 break;
200 204
@@ -255,7 +259,8 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
255 pnpacpi_parse_allocated_irqresource(res_table, 259 pnpacpi_parse_allocated_irqresource(res_table,
256 res->data.extended_irq.interrupts[i], 260 res->data.extended_irq.interrupts[i],
257 res->data.extended_irq.triggering, 261 res->data.extended_irq.triggering,
258 res->data.extended_irq.polarity); 262 res->data.extended_irq.polarity,
263 res->data.extended_irq.sharable);
259 } 264 }
260 break; 265 break;
261 266
diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c
index 4138564402b8..985d1613baaa 100644
--- a/drivers/s390/char/sclp.c
+++ b/drivers/s390/char/sclp.c
@@ -383,6 +383,7 @@ void
383sclp_sync_wait(void) 383sclp_sync_wait(void)
384{ 384{
385 unsigned long psw_mask; 385 unsigned long psw_mask;
386 unsigned long flags;
386 unsigned long cr0, cr0_sync; 387 unsigned long cr0, cr0_sync;
387 u64 timeout; 388 u64 timeout;
388 389
@@ -395,9 +396,11 @@ sclp_sync_wait(void)
395 sclp_tod_from_jiffies(sclp_request_timer.expires - 396 sclp_tod_from_jiffies(sclp_request_timer.expires -
396 jiffies); 397 jiffies);
397 } 398 }
399 local_irq_save(flags);
398 /* Prevent bottom half from executing once we force interrupts open */ 400 /* Prevent bottom half from executing once we force interrupts open */
399 local_bh_disable(); 401 local_bh_disable();
400 /* Enable service-signal interruption, disable timer interrupts */ 402 /* Enable service-signal interruption, disable timer interrupts */
403 trace_hardirqs_on();
401 __ctl_store(cr0, 0, 0); 404 __ctl_store(cr0, 0, 0);
402 cr0_sync = cr0; 405 cr0_sync = cr0;
403 cr0_sync |= 0x00000200; 406 cr0_sync |= 0x00000200;
@@ -415,11 +418,10 @@ sclp_sync_wait(void)
415 barrier(); 418 barrier();
416 cpu_relax(); 419 cpu_relax();
417 } 420 }
418 /* Restore interrupt settings */ 421 local_irq_disable();
419 asm volatile ("SSM 0(%0)"
420 : : "a" (&psw_mask) : "memory");
421 __ctl_load(cr0, 0, 0); 422 __ctl_load(cr0, 0, 0);
422 __local_bh_enable(); 423 _local_bh_enable();
424 local_irq_restore(flags);
423} 425}
424 426
425EXPORT_SYMBOL(sclp_sync_wait); 427EXPORT_SYMBOL(sclp_sync_wait);
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index a3423267467f..6fec90eab00e 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -147,7 +147,7 @@ cio_tpi(void)
147 sch->driver->irq(&sch->dev); 147 sch->driver->irq(&sch->dev);
148 spin_unlock(&sch->lock); 148 spin_unlock(&sch->lock);
149 irq_exit (); 149 irq_exit ();
150 __local_bh_enable(); 150 _local_bh_enable();
151 return 1; 151 return 1;
152} 152}
153 153
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index 36733b9823c6..8e8963f15731 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -84,6 +84,8 @@ static debug_info_t *qeth_dbf_qerr = NULL;
84 84
85DEFINE_PER_CPU(char[256], qeth_dbf_txt_buf); 85DEFINE_PER_CPU(char[256], qeth_dbf_txt_buf);
86 86
87static struct lock_class_key qdio_out_skb_queue_key;
88
87/** 89/**
88 * some more definitions and declarations 90 * some more definitions and declarations
89 */ 91 */
@@ -3229,6 +3231,9 @@ qeth_alloc_qdio_buffers(struct qeth_card *card)
3229 &card->qdio.out_qs[i]->qdio_bufs[j]; 3231 &card->qdio.out_qs[i]->qdio_bufs[j];
3230 skb_queue_head_init(&card->qdio.out_qs[i]->bufs[j]. 3232 skb_queue_head_init(&card->qdio.out_qs[i]->bufs[j].
3231 skb_list); 3233 skb_list);
3234 lockdep_set_class(
3235 &card->qdio.out_qs[i]->bufs[j].skb_list.lock,
3236 &qdio_out_skb_queue_key);
3232 INIT_LIST_HEAD(&card->qdio.out_qs[i]->bufs[j].ctx_list); 3237 INIT_LIST_HEAD(&card->qdio.out_qs[i]->bufs[j].ctx_list);
3233 } 3238 }
3234 } 3239 }
@@ -5272,6 +5277,7 @@ qeth_free_vlan_buffer(struct qeth_card *card, struct qeth_qdio_out_buffer *buf,
5272 struct sk_buff_head tmp_list; 5277 struct sk_buff_head tmp_list;
5273 5278
5274 skb_queue_head_init(&tmp_list); 5279 skb_queue_head_init(&tmp_list);
5280 lockdep_set_class(&tmp_list.lock, &qdio_out_skb_queue_key);
5275 for(i = 0; i < QETH_MAX_BUFFER_ELEMENTS(card); ++i){ 5281 for(i = 0; i < QETH_MAX_BUFFER_ELEMENTS(card); ++i){
5276 while ((skb = skb_dequeue(&buf->skb_list))){ 5282 while ((skb = skb_dequeue(&buf->skb_list))){
5277 if (vlan_tx_tag_present(skb) && 5283 if (vlan_tx_tag_present(skb) &&
diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c
index 432136f96e64..ffb3677e354f 100644
--- a/drivers/s390/s390mach.c
+++ b/drivers/s390/s390mach.c
@@ -378,6 +378,8 @@ s390_do_machine_check(struct pt_regs *regs)
378 struct mcck_struct *mcck; 378 struct mcck_struct *mcck;
379 int umode; 379 int umode;
380 380
381 lockdep_off();
382
381 mci = (struct mci *) &S390_lowcore.mcck_interruption_code; 383 mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
382 mcck = &__get_cpu_var(cpu_mcck); 384 mcck = &__get_cpu_var(cpu_mcck);
383 umode = user_mode(regs); 385 umode = user_mode(regs);
@@ -482,6 +484,7 @@ s390_do_machine_check(struct pt_regs *regs)
482 mcck->warning = 1; 484 mcck->warning = 1;
483 set_thread_flag(TIF_MCCK_PENDING); 485 set_thread_flag(TIF_MCCK_PENDING);
484 } 486 }
487 lockdep_on();
485} 488}
486 489
487/* 490/*
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index 82caba464291..1c960ac1617f 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -1001,7 +1001,7 @@ unsigned ata_exec_internal(struct ata_device *dev,
1001 struct ata_queued_cmd *qc; 1001 struct ata_queued_cmd *qc;
1002 unsigned int tag, preempted_tag; 1002 unsigned int tag, preempted_tag;
1003 u32 preempted_sactive, preempted_qc_active; 1003 u32 preempted_sactive, preempted_qc_active;
1004 DECLARE_COMPLETION(wait); 1004 DECLARE_COMPLETION_ONSTACK(wait);
1005 unsigned long flags; 1005 unsigned long flags;
1006 unsigned int err_mask; 1006 unsigned int err_mask;
1007 int rc; 1007 int rc;
diff --git a/drivers/serial/8250_pnp.c b/drivers/serial/8250_pnp.c
index 739bc84f91e9..632f62d6ec7e 100644
--- a/drivers/serial/8250_pnp.c
+++ b/drivers/serial/8250_pnp.c
@@ -431,6 +431,8 @@ serial_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id)
431#endif 431#endif
432 432
433 port.flags |= UPF_SKIP_TEST | UPF_BOOT_AUTOCONF; 433 port.flags |= UPF_SKIP_TEST | UPF_BOOT_AUTOCONF;
434 if (pnp_irq_flags(dev, 0) & IORESOURCE_IRQ_SHAREABLE)
435 port.flags |= UPF_SHARE_IRQ;
434 port.uartclk = 1843200; 436 port.uartclk = 1843200;
435 port.dev = &dev->dev; 437 port.dev = &dev->dev;
436 438
diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index c54af8774393..95831808334c 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -49,6 +49,12 @@
49 */ 49 */
50static DEFINE_MUTEX(port_mutex); 50static DEFINE_MUTEX(port_mutex);
51 51
52/*
53 * lockdep: port->lock is initialized in two places, but we
54 * want only one lock-class:
55 */
56static struct lock_class_key port_lock_key;
57
52#define HIGH_BITS_OFFSET ((sizeof(long)-sizeof(int))*8) 58#define HIGH_BITS_OFFSET ((sizeof(long)-sizeof(int))*8)
53 59
54#define uart_users(state) ((state)->count + ((state)->info ? (state)->info->blocked_open : 0)) 60#define uart_users(state) ((state)->count + ((state)->info ? (state)->info->blocked_open : 0))
@@ -1865,6 +1871,7 @@ uart_set_options(struct uart_port *port, struct console *co,
1865 * early. 1871 * early.
1866 */ 1872 */
1867 spin_lock_init(&port->lock); 1873 spin_lock_init(&port->lock);
1874 lockdep_set_class(&port->lock, &port_lock_key);
1868 1875
1869 memset(&termios, 0, sizeof(struct termios)); 1876 memset(&termios, 0, sizeof(struct termios));
1870 1877
@@ -2247,8 +2254,10 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *port)
2247 * If this port is a console, then the spinlock is already 2254 * If this port is a console, then the spinlock is already
2248 * initialised. 2255 * initialised.
2249 */ 2256 */
2250 if (!(uart_console(port) && (port->cons->flags & CON_ENABLED))) 2257 if (!(uart_console(port) && (port->cons->flags & CON_ENABLED))) {
2251 spin_lock_init(&port->lock); 2258 spin_lock_init(&port->lock);
2259 lockdep_set_class(&port->lock, &port_lock_key);
2260 }
2252 2261
2253 uart_configure_port(drv, state, port); 2262 uart_configure_port(drv, state, port);
2254 2263
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index ed1cdf6ac8f3..146298ad7371 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -510,7 +510,7 @@ static void spi_complete(void *arg)
510 */ 510 */
511int spi_sync(struct spi_device *spi, struct spi_message *message) 511int spi_sync(struct spi_device *spi, struct spi_message *message)
512{ 512{
513 DECLARE_COMPLETION(done); 513 DECLARE_COMPLETION_ONSTACK(done);
514 int status; 514 int status;
515 515
516 message->complete = spi_complete; 516 message->complete = spi_complete;
diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index e47e3a8ed6e4..f48c3dbc367a 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -200,7 +200,7 @@ static void update_sb(struct super_block *sb)
200 if (!root) 200 if (!root)
201 return; 201 return;
202 202
203 mutex_lock(&root->d_inode->i_mutex); 203 mutex_lock_nested(&root->d_inode->i_mutex, I_MUTEX_PARENT);
204 204
205 list_for_each_entry(bus, &root->d_subdirs, d_u.d_child) { 205 list_for_each_entry(bus, &root->d_subdirs, d_u.d_child) {
206 if (bus->d_inode) { 206 if (bus->d_inode) {
@@ -527,7 +527,7 @@ static void fs_remove_file (struct dentry *dentry)
527 if (!parent || !parent->d_inode) 527 if (!parent || !parent->d_inode)
528 return; 528 return;
529 529
530 mutex_lock(&parent->d_inode->i_mutex); 530 mutex_lock_nested(&parent->d_inode->i_mutex, I_MUTEX_PARENT);
531 if (usbfs_positive(dentry)) { 531 if (usbfs_positive(dentry)) {
532 if (dentry->d_inode) { 532 if (dentry->d_inode) {
533 if (S_ISDIR(dentry->d_inode->i_mode)) 533 if (S_ISDIR(dentry->d_inode->i_mode))
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 17de4c84db69..3badb48d662b 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -1557,6 +1557,21 @@ config FB_S3C2410_DEBUG
1557 Turn on debugging messages. Note that you can set/unset at run time 1557 Turn on debugging messages. Note that you can set/unset at run time
1558 through sysfs 1558 through sysfs
1559 1559
1560config FB_PNX4008_DUM
1561 tristate "Display Update Module support on Philips PNX4008 board"
1562 depends on FB && ARCH_PNX4008
1563 ---help---
1564 Say Y here to enable support for PNX4008 Display Update Module (DUM)
1565
1566config FB_PNX4008_DUM_RGB
1567 tristate "RGB Framebuffer support on Philips PNX4008 board"
1568 depends on FB_PNX4008_DUM
1569 select FB_CFB_FILLRECT
1570 select FB_CFB_COPYAREA
1571 select FB_CFB_IMAGEBLIT
1572 ---help---
1573 Say Y here to enable support for PNX4008 RGB Framebuffer
1574
1560config FB_VIRTUAL 1575config FB_VIRTUAL
1561 tristate "Virtual Frame Buffer support (ONLY FOR TESTING!)" 1576 tristate "Virtual Frame Buffer support (ONLY FOR TESTING!)"
1562 depends on FB 1577 depends on FB
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index c335e9bc3b20..6283d015f8f5 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -94,6 +94,8 @@ obj-$(CONFIG_FB_TX3912) += tx3912fb.o
94obj-$(CONFIG_FB_S1D13XXX) += s1d13xxxfb.o 94obj-$(CONFIG_FB_S1D13XXX) += s1d13xxxfb.o
95obj-$(CONFIG_FB_IMX) += imxfb.o 95obj-$(CONFIG_FB_IMX) += imxfb.o
96obj-$(CONFIG_FB_S3C2410) += s3c2410fb.o 96obj-$(CONFIG_FB_S3C2410) += s3c2410fb.o
97obj-$(CONFIG_FB_PNX4008_DUM) += pnx4008/
98obj-$(CONFIG_FB_PNX4008_DUM_RGB) += pnx4008/
97 99
98# Platform or fallback drivers go here 100# Platform or fallback drivers go here
99obj-$(CONFIG_FB_VESA) += vesafb.o 101obj-$(CONFIG_FB_VESA) += vesafb.o
diff --git a/drivers/video/pnx4008/Makefile b/drivers/video/pnx4008/Makefile
new file mode 100644
index 000000000000..636aaccf01fd
--- /dev/null
+++ b/drivers/video/pnx4008/Makefile
@@ -0,0 +1,7 @@
1#
2# Makefile for the new PNX4008 framebuffer device driver
3#
4
5obj-$(CONFIG_FB_PNX4008_DUM) += sdum.o
6obj-$(CONFIG_FB_PNX4008_DUM_RGB) += pnxrgbfb.o
7
diff --git a/drivers/video/pnx4008/dum.h b/drivers/video/pnx4008/dum.h
new file mode 100644
index 000000000000..d80a614d89ed
--- /dev/null
+++ b/drivers/video/pnx4008/dum.h
@@ -0,0 +1,211 @@
1/*
2 * linux/drivers/video/pnx4008/dum.h
3 *
4 * Internal header for SDUM
5 *
6 * 2005 (c) Koninklijke Philips N.V. This file is licensed under
7 * the terms of the GNU General Public License version 2. This program
8 * is licensed "as is" without any warranty of any kind, whether express
9 * or implied.
10 */
11
12#ifndef __PNX008_DUM_H__
13#define __PNX008_DUM_H__
14
15#include <asm/arch/platform.h>
16
17#define PNX4008_DUMCONF_VA_BASE IO_ADDRESS(PNX4008_DUMCONF_BASE)
18#define PNX4008_DUM_MAIN_VA_BASE IO_ADDRESS(PNX4008_DUM_MAINCFG_BASE)
19
20/* DUM CFG ADDRESSES */
21#define DUM_CH_BASE_ADR (PNX4008_DUMCONF_VA_BASE + 0x00)
22#define DUM_CH_MIN_ADR (PNX4008_DUMCONF_VA_BASE + 0x00)
23#define DUM_CH_MAX_ADR (PNX4008_DUMCONF_VA_BASE + 0x04)
24#define DUM_CH_CONF_ADR (PNX4008_DUMCONF_VA_BASE + 0x08)
25#define DUM_CH_STAT_ADR (PNX4008_DUMCONF_VA_BASE + 0x0C)
26#define DUM_CH_CTRL_ADR (PNX4008_DUMCONF_VA_BASE + 0x10)
27
28#define CH_MARG (0x100 / sizeof(u32))
29#define DUM_CH_MIN(i) (*((volatile u32 *)DUM_CH_MIN_ADR + (i) * CH_MARG))
30#define DUM_CH_MAX(i) (*((volatile u32 *)DUM_CH_MAX_ADR + (i) * CH_MARG))
31#define DUM_CH_CONF(i) (*((volatile u32 *)DUM_CH_CONF_ADR + (i) * CH_MARG))
32#define DUM_CH_STAT(i) (*((volatile u32 *)DUM_CH_STAT_ADR + (i) * CH_MARG))
33#define DUM_CH_CTRL(i) (*((volatile u32 *)DUM_CH_CTRL_ADR + (i) * CH_MARG))
34
35#define DUM_CONF_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x00)
36#define DUM_CTRL_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x04)
37#define DUM_STAT_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x08)
38#define DUM_DECODE_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x0C)
39#define DUM_COM_BASE_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x10)
40#define DUM_SYNC_C_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x14)
41#define DUM_CLK_DIV_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x18)
42#define DUM_DIRTY_LOW_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x20)
43#define DUM_DIRTY_HIGH_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x24)
44#define DUM_FORMAT_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x28)
45#define DUM_WTCFG1_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x30)
46#define DUM_RTCFG1_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x34)
47#define DUM_WTCFG2_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x38)
48#define DUM_RTCFG2_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x3C)
49#define DUM_TCFG_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x40)
50#define DUM_OUTP_FORMAT1_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x44)
51#define DUM_OUTP_FORMAT2_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x48)
52#define DUM_SYNC_MODE_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x4C)
53#define DUM_SYNC_OUT_C_ADR (PNX4008_DUM_MAIN_VA_BASE + 0x50)
54
55#define DUM_CONF (*(volatile u32 *)(DUM_CONF_ADR))
56#define DUM_CTRL (*(volatile u32 *)(DUM_CTRL_ADR))
57#define DUM_STAT (*(volatile u32 *)(DUM_STAT_ADR))
58#define DUM_DECODE (*(volatile u32 *)(DUM_DECODE_ADR))
59#define DUM_COM_BASE (*(volatile u32 *)(DUM_COM_BASE_ADR))
60#define DUM_SYNC_C (*(volatile u32 *)(DUM_SYNC_C_ADR))
61#define DUM_CLK_DIV (*(volatile u32 *)(DUM_CLK_DIV_ADR))
62#define DUM_DIRTY_LOW (*(volatile u32 *)(DUM_DIRTY_LOW_ADR))
63#define DUM_DIRTY_HIGH (*(volatile u32 *)(DUM_DIRTY_HIGH_ADR))
64#define DUM_FORMAT (*(volatile u32 *)(DUM_FORMAT_ADR))
65#define DUM_WTCFG1 (*(volatile u32 *)(DUM_WTCFG1_ADR))
66#define DUM_RTCFG1 (*(volatile u32 *)(DUM_RTCFG1_ADR))
67#define DUM_WTCFG2 (*(volatile u32 *)(DUM_WTCFG2_ADR))
68#define DUM_RTCFG2 (*(volatile u32 *)(DUM_RTCFG2_ADR))
69#define DUM_TCFG (*(volatile u32 *)(DUM_TCFG_ADR))
70#define DUM_OUTP_FORMAT1 (*(volatile u32 *)(DUM_OUTP_FORMAT1_ADR))
71#define DUM_OUTP_FORMAT2 (*(volatile u32 *)(DUM_OUTP_FORMAT2_ADR))
72#define DUM_SYNC_MODE (*(volatile u32 *)(DUM_SYNC_MODE_ADR))
73#define DUM_SYNC_OUT_C (*(volatile u32 *)(DUM_SYNC_OUT_C_ADR))
74
75/* DUM SLAVE ADDRESSES */
76#define DUM_SLAVE_WRITE_ADR (PNX4008_DUM_MAINCFG_BASE + 0x0000000)
77#define DUM_SLAVE_READ1_I_ADR (PNX4008_DUM_MAINCFG_BASE + 0x1000000)
78#define DUM_SLAVE_READ1_R_ADR (PNX4008_DUM_MAINCFG_BASE + 0x1000004)
79#define DUM_SLAVE_READ2_I_ADR (PNX4008_DUM_MAINCFG_BASE + 0x1000008)
80#define DUM_SLAVE_READ2_R_ADR (PNX4008_DUM_MAINCFG_BASE + 0x100000C)
81
82#define DUM_SLAVE_WRITE_W ((volatile u32 *)(DUM_SLAVE_WRITE_ADR))
83#define DUM_SLAVE_WRITE_HW ((volatile u16 *)(DUM_SLAVE_WRITE_ADR))
84#define DUM_SLAVE_READ1_I ((volatile u8 *)(DUM_SLAVE_READ1_I_ADR))
85#define DUM_SLAVE_READ1_R ((volatile u16 *)(DUM_SLAVE_READ1_R_ADR))
86#define DUM_SLAVE_READ2_I ((volatile u8 *)(DUM_SLAVE_READ2_I_ADR))
87#define DUM_SLAVE_READ2_R ((volatile u16 *)(DUM_SLAVE_READ2_R_ADR))
88
89/* Sony display register addresses */
90#define DISP_0_REG (0x00)
91#define DISP_1_REG (0x01)
92#define DISP_CAL_REG (0x20)
93#define DISP_ID_REG (0x2A)
94#define DISP_XMIN_L_REG (0x30)
95#define DISP_XMIN_H_REG (0x31)
96#define DISP_YMIN_REG (0x32)
97#define DISP_XMAX_L_REG (0x34)
98#define DISP_XMAX_H_REG (0x35)
99#define DISP_YMAX_REG (0x36)
100#define DISP_SYNC_EN_REG (0x38)
101#define DISP_SYNC_RISE_L_REG (0x3C)
102#define DISP_SYNC_RISE_H_REG (0x3D)
103#define DISP_SYNC_FALL_L_REG (0x3E)
104#define DISP_SYNC_FALL_H_REG (0x3F)
105#define DISP_PIXEL_REG (0x0B)
106#define DISP_DUMMY1_REG (0x28)
107#define DISP_DUMMY2_REG (0x29)
108#define DISP_TIMING_REG (0x98)
109#define DISP_DUMP_REG (0x99)
110
111/* Sony display constants */
112#define SONY_ID1 (0x22)
113#define SONY_ID2 (0x23)
114
115/* Philips display register addresses */
116#define PH_DISP_ORIENT_REG (0x003)
117#define PH_DISP_YPOINT_REG (0x200)
118#define PH_DISP_XPOINT_REG (0x201)
119#define PH_DISP_PIXEL_REG (0x202)
120#define PH_DISP_YMIN_REG (0x406)
121#define PH_DISP_YMAX_REG (0x407)
122#define PH_DISP_XMIN_REG (0x408)
123#define PH_DISP_XMAX_REG (0x409)
124
125/* Misc constants */
126#define NO_VALID_DISPLAY_FOUND (0)
127#define DISPLAY2_IS_NOT_CONNECTED (0)
128
129/* register values */
130#define V_BAC_ENABLE (BIT(0))
131#define V_BAC_DISABLE_IDLE (BIT(1))
132#define V_BAC_DISABLE_TRIG (BIT(2))
133#define V_DUM_RESET (BIT(3))
134#define V_MUX_RESET (BIT(4))
135#define BAC_ENABLED (BIT(0))
136#define BAC_DISABLED 0
137
138/* Sony LCD commands */
139#define V_LCD_STANDBY_OFF ((BIT(25)) | (0 << 16) | DISP_0_REG)
140#define V_LCD_USE_9BIT_BUS ((BIT(25)) | (2 << 16) | DISP_1_REG)
141#define V_LCD_SYNC_RISE_L ((BIT(25)) | (0 << 16) | DISP_SYNC_RISE_L_REG)
142#define V_LCD_SYNC_RISE_H ((BIT(25)) | (0 << 16) | DISP_SYNC_RISE_H_REG)
143#define V_LCD_SYNC_FALL_L ((BIT(25)) | (160 << 16) | DISP_SYNC_FALL_L_REG)
144#define V_LCD_SYNC_FALL_H ((BIT(25)) | (0 << 16) | DISP_SYNC_FALL_H_REG)
145#define V_LCD_SYNC_ENABLE ((BIT(25)) | (128 << 16) | DISP_SYNC_EN_REG)
146#define V_LCD_DISPLAY_ON ((BIT(25)) | (64 << 16) | DISP_0_REG)
147
148enum {
149 PAD_NONE,
150 PAD_512,
151 PAD_1024
152};
153
154enum {
155 RGB888,
156 RGB666,
157 RGB565,
158 BGR565,
159 ARGB1555,
160 ABGR1555,
161 ARGB4444,
162 ABGR4444
163};
164
165struct dum_setup {
166 int sync_neg_edge;
167 int round_robin;
168 int mux_int;
169 int synced_dirty_flag_int;
170 int dirty_flag_int;
171 int error_int;
172 int pf_empty_int;
173 int sf_empty_int;
174 int bac_dis_int;
175 u32 dirty_base_adr;
176 u32 command_base_adr;
177 u32 sync_clk_div;
178 int sync_output;
179 u32 sync_restart_val;
180 u32 set_sync_high;
181 u32 set_sync_low;
182};
183
184struct dum_ch_setup {
185 int disp_no;
186 u32 xmin;
187 u32 ymin;
188 u32 xmax;
189 u32 ymax;
190 int xmirror;
191 int ymirror;
192 int rotate;
193 u32 minadr;
194 u32 maxadr;
195 u32 dirtybuffer;
196 int pad;
197 int format;
198 int hwdirty;
199 int slave_trans;
200};
201
202struct disp_window {
203 u32 xmin_l;
204 u32 xmin_h;
205 u32 ymin;
206 u32 xmax_l;
207 u32 xmax_h;
208 u32 ymax;
209};
210
211#endif /* #ifndef __PNX008_DUM_H__ */
diff --git a/drivers/video/pnx4008/fbcommon.h b/drivers/video/pnx4008/fbcommon.h
new file mode 100644
index 000000000000..4ebc87dafafb
--- /dev/null
+++ b/drivers/video/pnx4008/fbcommon.h
@@ -0,0 +1,43 @@
1/*
2 * Copyright (C) 2005 Philips Semiconductors
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; see the file COPYING. If not, write to
16 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 02111-1307, USA, or http://www.gnu.org/licenses/gpl.html
18*/
19
20#define QCIF_W (176)
21#define QCIF_H (144)
22
23#define CIF_W (352)
24#define CIF_H (288)
25
26#define LCD_X_RES 208
27#define LCD_Y_RES 320
28#define LCD_X_PAD 256
29#define LCD_BBP 4 /* Bytes Per Pixel */
30
31#define DISP_MAX_X_SIZE (320)
32#define DISP_MAX_Y_SIZE (208)
33
34#define RETURNVAL_BASE (0x400)
35
36enum fb_ioctl_returntype {
37 ENORESOURCESLEFT = RETURNVAL_BASE,
38 ERESOURCESNOTFREED,
39 EPROCNOTOWNER,
40 EFBNOTOWNER,
41 ECOPYFAILED,
42 EIOREMAPFAILED,
43};
diff --git a/drivers/video/pnx4008/pnxrgbfb.c b/drivers/video/pnx4008/pnxrgbfb.c
new file mode 100644
index 000000000000..7d9453c91a42
--- /dev/null
+++ b/drivers/video/pnx4008/pnxrgbfb.c
@@ -0,0 +1,213 @@
1/*
2 * drivers/video/pnx4008/pnxrgbfb.c
3 *
4 * PNX4008's framebuffer support
5 *
6 * Author: Grigory Tolstolytkin <gtolstolytkin@ru.mvista.com>
7 * Based on Philips Semiconductors's code
8 *
9 * Copyrght (c) 2005 MontaVista Software, Inc.
10 * Copyright (c) 2005 Philips Semiconductors
11 * This file is licensed under the terms of the GNU General Public License
12 * version 2. This program is licensed "as is" without any warranty of any
13 * kind, whether express or implied.
14 */
15
16#include <linux/module.h>
17#include <linux/kernel.h>
18#include <linux/errno.h>
19#include <linux/string.h>
20#include <linux/mm.h>
21#include <linux/slab.h>
22#include <linux/vmalloc.h>
23#include <linux/delay.h>
24#include <linux/interrupt.h>
25#include <linux/fb.h>
26#include <linux/init.h>
27#include <linux/platform_device.h>
28
29#include <asm/uaccess.h>
30#include "sdum.h"
31#include "fbcommon.h"
32
33static u32 colreg[16];
34
35static struct fb_var_screeninfo rgbfb_var __initdata = {
36 .xres = LCD_X_RES,
37 .yres = LCD_Y_RES,
38 .xres_virtual = LCD_X_RES,
39 .yres_virtual = LCD_Y_RES,
40 .bits_per_pixel = 32,
41 .red.offset = 16,
42 .red.length = 8,
43 .green.offset = 8,
44 .green.length = 8,
45 .blue.offset = 0,
46 .blue.length = 8,
47 .left_margin = 0,
48 .right_margin = 0,
49 .upper_margin = 0,
50 .lower_margin = 0,
51 .vmode = FB_VMODE_NONINTERLACED,
52};
53static struct fb_fix_screeninfo rgbfb_fix __initdata = {
54 .id = "RGBFB",
55 .line_length = LCD_X_RES * LCD_BBP,
56 .type = FB_TYPE_PACKED_PIXELS,
57 .visual = FB_VISUAL_TRUECOLOR,
58 .xpanstep = 0,
59 .ypanstep = 0,
60 .ywrapstep = 0,
61 .accel = FB_ACCEL_NONE,
62};
63
64static int channel_owned;
65
66static int no_cursor(struct fb_info *info, struct fb_cursor *cursor)
67{
68 return 0;
69}
70
71static int rgbfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
72 u_int transp, struct fb_info *info)
73{
74 if (regno > 15)
75 return 1;
76
77 colreg[regno] = ((red & 0xff00) << 8) | (green & 0xff00) |
78 ((blue & 0xff00) >> 8);
79 return 0;
80}
81
82static int rgbfb_mmap(struct fb_info *info, struct vm_area_struct *vma)
83{
84 return pnx4008_sdum_mmap(info, vma, NULL);
85}
86
87static struct fb_ops rgbfb_ops = {
88 .fb_mmap = rgbfb_mmap,
89 .fb_setcolreg = rgbfb_setcolreg,
90 .fb_fillrect = cfb_fillrect,
91 .fb_copyarea = cfb_copyarea,
92 .fb_imageblit = cfb_imageblit,
93};
94
95static int rgbfb_remove(struct platform_device *pdev)
96{
97 struct fb_info *info = platform_get_drvdata(pdev);
98
99 if (info) {
100 unregister_framebuffer(info);
101 fb_dealloc_cmap(&info->cmap);
102 framebuffer_release(info);
103 platform_set_drvdata(pdev, NULL);
104 kfree(info);
105 }
106
107 pnx4008_free_dum_channel(channel_owned, pdev->id);
108 pnx4008_set_dum_exit_notification(pdev->id);
109
110 return 0;
111}
112
113static int __devinit rgbfb_probe(struct platform_device *pdev)
114{
115 struct fb_info *info;
116 struct dumchannel_uf chan_uf;
117 int ret;
118 char *option;
119
120 info = framebuffer_alloc(sizeof(u32) * 16, &pdev->dev);
121 if (!info) {
122 ret = -ENOMEM;
123 goto err;
124 }
125
126 pnx4008_get_fb_addresses(FB_TYPE_RGB, (void **)&info->screen_base,
127 (dma_addr_t *) &rgbfb_fix.smem_start,
128 &rgbfb_fix.smem_len);
129
130 if ((ret = pnx4008_alloc_dum_channel(pdev->id)) < 0)
131 goto err0;
132 else {
133 channel_owned = ret;
134 chan_uf.channelnr = channel_owned;
135 chan_uf.dirty = (u32 *) NULL;
136 chan_uf.source = (u32 *) rgbfb_fix.smem_start;
137 chan_uf.x_offset = 0;
138 chan_uf.y_offset = 0;
139 chan_uf.width = LCD_X_RES;
140 chan_uf.height = LCD_Y_RES;
141
142 if ((ret = pnx4008_put_dum_channel_uf(chan_uf, pdev->id))< 0)
143 goto err1;
144
145 if ((ret =
146 pnx4008_set_dum_channel_sync(channel_owned, CONF_SYNC_ON,
147 pdev->id)) < 0)
148 goto err1;
149
150 if ((ret =
151 pnx4008_set_dum_channel_dirty_detect(channel_owned,
152 CONF_DIRTYDETECTION_ON,
153 pdev->id)) < 0)
154 goto err1;
155 }
156
157 if (!fb_get_options("pnxrgbfb", &option) && !strcmp(option, "nocursor"))
158 rgbfb_ops.fb_cursor = no_cursor;
159
160 info->node = -1;
161 info->flags = FBINFO_FLAG_DEFAULT;
162 info->fbops = &rgbfb_ops;
163 info->fix = rgbfb_fix;
164 info->var = rgbfb_var;
165 info->screen_size = rgbfb_fix.smem_len;
166 info->pseudo_palette = info->par;
167 info->par = NULL;
168
169 ret = fb_alloc_cmap(&info->cmap, 256, 0);
170 if (ret < 0)
171 goto err2;
172
173 ret = register_framebuffer(info);
174 if (ret < 0)
175 goto err3;
176 platform_set_drvdata(pdev, info);
177
178 return 0;
179
180err3:
181 fb_dealloc_cmap(&info->cmap);
182err2:
183 framebuffer_release(info);
184err1:
185 pnx4008_free_dum_channel(channel_owned, pdev->id);
186err0:
187 kfree(info);
188err:
189 return ret;
190}
191
192static struct platform_driver rgbfb_driver = {
193 .driver = {
194 .name = "rgbfb",
195 },
196 .probe = rgbfb_probe,
197 .remove = rgbfb_remove,
198};
199
200static int __init rgbfb_init(void)
201{
202 return platform_driver_register(&rgbfb_driver);
203}
204
205static void __exit rgbfb_exit(void)
206{
207 platform_driver_unregister(&rgbfb_driver);
208}
209
210module_init(rgbfb_init);
211module_exit(rgbfb_exit);
212
213MODULE_LICENSE("GPL");
diff --git a/drivers/video/pnx4008/sdum.c b/drivers/video/pnx4008/sdum.c
new file mode 100644
index 000000000000..51f0ecc2a511
--- /dev/null
+++ b/drivers/video/pnx4008/sdum.c
@@ -0,0 +1,872 @@
1/*
2 * drivers/video/pnx4008/sdum.c
3 *
4 * Display Update Master support
5 *
6 * Authors: Grigory Tolstolytkin <gtolstolytkin@ru.mvista.com>
7 * Vitaly Wool <vitalywool@gmail.com>
8 * Based on Philips Semiconductors's code
9 *
10 * Copyrght (c) 2005-2006 MontaVista Software, Inc.
11 * Copyright (c) 2005 Philips Semiconductors
12 * This file is licensed under the terms of the GNU General Public License
13 * version 2. This program is licensed "as is" without any warranty of any
14 * kind, whether express or implied.
15 */
16
17#include <linux/module.h>
18#include <linux/kernel.h>
19#include <linux/errno.h>
20#include <linux/string.h>
21#include <linux/mm.h>
22#include <linux/tty.h>
23#include <linux/slab.h>
24#include <linux/vmalloc.h>
25#include <linux/delay.h>
26#include <linux/interrupt.h>
27#include <linux/platform_device.h>
28#include <linux/fb.h>
29#include <linux/init.h>
30#include <linux/dma-mapping.h>
31#include <linux/clk.h>
32#include <asm/uaccess.h>
33#include <asm/arch/gpio.h>
34
35#include "sdum.h"
36#include "fbcommon.h"
37#include "dum.h"
38
39/* Framebuffers we have */
40
41static struct pnx4008_fb_addr {
42 int fb_type;
43 long addr_offset;
44 long fb_length;
45} fb_addr[] = {
46 [0] = {
47 FB_TYPE_YUV, 0, 0xB0000
48 },
49 [1] = {
50 FB_TYPE_RGB, 0xB0000, 0x50000
51 },
52};
53
54static struct dum_data {
55 u32 lcd_phys_start;
56 u32 lcd_virt_start;
57 u32 slave_phys_base;
58 u32 *slave_virt_base;
59 int fb_owning_channel[MAX_DUM_CHANNELS];
60 struct dumchannel_uf chan_uf_store[MAX_DUM_CHANNELS];
61} dum_data;
62
63/* Different local helper functions */
64
65static u32 nof_pixels_dx(struct dum_ch_setup *ch_setup)
66{
67 return (ch_setup->xmax - ch_setup->xmin + 1);
68}
69
70static u32 nof_pixels_dy(struct dum_ch_setup *ch_setup)
71{
72 return (ch_setup->ymax - ch_setup->ymin + 1);
73}
74
75static u32 nof_pixels_dxy(struct dum_ch_setup *ch_setup)
76{
77 return (nof_pixels_dx(ch_setup) * nof_pixels_dy(ch_setup));
78}
79
80static u32 nof_bytes(struct dum_ch_setup *ch_setup)
81{
82 u32 r = nof_pixels_dxy(ch_setup);
83 switch (ch_setup->format) {
84 case RGB888:
85 case RGB666:
86 r *= 4;
87 break;
88
89 default:
90 r *= 2;
91 break;
92 }
93 return r;
94}
95
96static u32 build_command(int disp_no, u32 reg, u32 val)
97{
98 return ((disp_no << 26) | BIT(25) | (val << 16) | (disp_no << 10) |
99 (reg << 0));
100}
101
102static u32 build_double_index(int disp_no, u32 val)
103{
104 return ((disp_no << 26) | (val << 16) | (disp_no << 10) | (val << 0));
105}
106
107static void build_disp_window(struct dum_ch_setup * ch_setup, struct disp_window * dw)
108{
109 dw->ymin = ch_setup->ymin;
110 dw->ymax = ch_setup->ymax;
111 dw->xmin_l = ch_setup->xmin & 0xFF;
112 dw->xmin_h = (ch_setup->xmin & BIT(8)) >> 8;
113 dw->xmax_l = ch_setup->xmax & 0xFF;
114 dw->xmax_h = (ch_setup->xmax & BIT(8)) >> 8;
115}
116
117static int put_channel(struct dumchannel chan)
118{
119 int i = chan.channelnr;
120
121 if (i < 0 || i > MAX_DUM_CHANNELS)
122 return -EINVAL;
123 else {
124 DUM_CH_MIN(i) = chan.dum_ch_min;
125 DUM_CH_MAX(i) = chan.dum_ch_max;
126 DUM_CH_CONF(i) = chan.dum_ch_conf;
127 DUM_CH_CTRL(i) = chan.dum_ch_ctrl;
128 }
129
130 return 0;
131}
132
133static void clear_channel(int channr)
134{
135 struct dumchannel chan;
136
137 chan.channelnr = channr;
138 chan.dum_ch_min = 0;
139 chan.dum_ch_max = 0;
140 chan.dum_ch_conf = 0;
141 chan.dum_ch_ctrl = 0;
142
143 put_channel(chan);
144}
145
146static int put_cmd_string(struct cmdstring cmds)
147{
148 u16 *cmd_str_virtaddr;
149 u32 *cmd_ptr0_virtaddr;
150 u32 cmd_str_physaddr;
151
152 int i = cmds.channelnr;
153
154 if (i < 0 || i > MAX_DUM_CHANNELS)
155 return -EINVAL;
156 else if ((cmd_ptr0_virtaddr =
157 (int *)ioremap_nocache(DUM_COM_BASE,
158 sizeof(int) * MAX_DUM_CHANNELS)) ==
159 NULL)
160 return -EIOREMAPFAILED;
161 else {
162 cmd_str_physaddr = ioread32(&cmd_ptr0_virtaddr[cmds.channelnr]);
163 if ((cmd_str_virtaddr =
164 (u16 *) ioremap_nocache(cmd_str_physaddr,
165 sizeof(cmds))) == NULL) {
166 iounmap(cmd_ptr0_virtaddr);
167 return -EIOREMAPFAILED;
168 } else {
169 int t;
170 for (t = 0; t < 8; t++)
171 iowrite16(*((u16 *)&cmds.prestringlen + t),
172 cmd_str_virtaddr + t);
173
174 for (t = 0; t < cmds.prestringlen / 2; t++)
175 iowrite16(*((u16 *)&cmds.precmd + t),
176 cmd_str_virtaddr + t + 8);
177
178 for (t = 0; t < cmds.poststringlen / 2; t++)
179 iowrite16(*((u16 *)&cmds.postcmd + t),
180 cmd_str_virtaddr + t + 8 +
181 cmds.prestringlen / 2);
182
183 iounmap(cmd_ptr0_virtaddr);
184 iounmap(cmd_str_virtaddr);
185 }
186 }
187
188 return 0;
189}
190
191static u32 dum_ch_setup(int ch_no, struct dum_ch_setup * ch_setup)
192{
193 struct cmdstring cmds_c;
194 struct cmdstring *cmds = &cmds_c;
195 struct disp_window dw;
196 int standard;
197 u32 orientation = 0;
198 struct dumchannel chan = { 0 };
199 int ret;
200
201 if ((ch_setup->xmirror) || (ch_setup->ymirror) || (ch_setup->rotate)) {
202 standard = 0;
203
204 orientation = BIT(1); /* always set 9-bit-bus */
205 if (ch_setup->xmirror)
206 orientation |= BIT(4);
207 if (ch_setup->ymirror)
208 orientation |= BIT(3);
209 if (ch_setup->rotate)
210 orientation |= BIT(0);
211 } else
212 standard = 1;
213
214 cmds->channelnr = ch_no;
215
216 /* build command string header */
217 if (standard) {
218 cmds->prestringlen = 32;
219 cmds->poststringlen = 0;
220 } else {
221 cmds->prestringlen = 48;
222 cmds->poststringlen = 16;
223 }
224
225 cmds->format =
226 (u16) ((ch_setup->disp_no << 4) | (BIT(3)) | (ch_setup->format));
227 cmds->reserved = 0x0;
228 cmds->startaddr_low = (ch_setup->minadr & 0xFFFF);
229 cmds->startaddr_high = (ch_setup->minadr >> 16);
230
231 if ((ch_setup->minadr == 0) && (ch_setup->maxadr == 0)
232 && (ch_setup->xmin == 0)
233 && (ch_setup->ymin == 0) && (ch_setup->xmax == 0)
234 && (ch_setup->ymax == 0)) {
235 cmds->pixdatlen_low = 0;
236 cmds->pixdatlen_high = 0;
237 } else {
238 u32 nbytes = nof_bytes(ch_setup);
239 cmds->pixdatlen_low = (nbytes & 0xFFFF);
240 cmds->pixdatlen_high = (nbytes >> 16);
241 }
242
243 if (ch_setup->slave_trans)
244 cmds->pixdatlen_high |= BIT(15);
245
246 /* build pre-string */
247 build_disp_window(ch_setup, &dw);
248
249 if (standard) {
250 cmds->precmd[0] =
251 build_command(ch_setup->disp_no, DISP_XMIN_L_REG, 0x99);
252 cmds->precmd[1] =
253 build_command(ch_setup->disp_no, DISP_XMIN_L_REG,
254 dw.xmin_l);
255 cmds->precmd[2] =
256 build_command(ch_setup->disp_no, DISP_XMIN_H_REG,
257 dw.xmin_h);
258 cmds->precmd[3] =
259 build_command(ch_setup->disp_no, DISP_YMIN_REG, dw.ymin);
260 cmds->precmd[4] =
261 build_command(ch_setup->disp_no, DISP_XMAX_L_REG,
262 dw.xmax_l);
263 cmds->precmd[5] =
264 build_command(ch_setup->disp_no, DISP_XMAX_H_REG,
265 dw.xmax_h);
266 cmds->precmd[6] =
267 build_command(ch_setup->disp_no, DISP_YMAX_REG, dw.ymax);
268 cmds->precmd[7] =
269 build_double_index(ch_setup->disp_no, DISP_PIXEL_REG);
270 } else {
271 if (dw.xmin_l == ch_no)
272 cmds->precmd[0] =
273 build_command(ch_setup->disp_no, DISP_XMIN_L_REG,
274 0x99);
275 else
276 cmds->precmd[0] =
277 build_command(ch_setup->disp_no, DISP_XMIN_L_REG,
278 ch_no);
279
280 cmds->precmd[1] =
281 build_command(ch_setup->disp_no, DISP_XMIN_L_REG,
282 dw.xmin_l);
283 cmds->precmd[2] =
284 build_command(ch_setup->disp_no, DISP_XMIN_H_REG,
285 dw.xmin_h);
286 cmds->precmd[3] =
287 build_command(ch_setup->disp_no, DISP_YMIN_REG, dw.ymin);
288 cmds->precmd[4] =
289 build_command(ch_setup->disp_no, DISP_XMAX_L_REG,
290 dw.xmax_l);
291 cmds->precmd[5] =
292 build_command(ch_setup->disp_no, DISP_XMAX_H_REG,
293 dw.xmax_h);
294 cmds->precmd[6] =
295 build_command(ch_setup->disp_no, DISP_YMAX_REG, dw.ymax);
296 cmds->precmd[7] =
297 build_command(ch_setup->disp_no, DISP_1_REG, orientation);
298 cmds->precmd[8] =
299 build_double_index(ch_setup->disp_no, DISP_PIXEL_REG);
300 cmds->precmd[9] =
301 build_double_index(ch_setup->disp_no, DISP_PIXEL_REG);
302 cmds->precmd[0xA] =
303 build_double_index(ch_setup->disp_no, DISP_PIXEL_REG);
304 cmds->precmd[0xB] =
305 build_double_index(ch_setup->disp_no, DISP_PIXEL_REG);
306 cmds->postcmd[0] =
307 build_command(ch_setup->disp_no, DISP_1_REG, BIT(1));
308 cmds->postcmd[1] =
309 build_command(ch_setup->disp_no, DISP_DUMMY1_REG, 1);
310 cmds->postcmd[2] =
311 build_command(ch_setup->disp_no, DISP_DUMMY1_REG, 2);
312 cmds->postcmd[3] =
313 build_command(ch_setup->disp_no, DISP_DUMMY1_REG, 3);
314 }
315
316 if ((ret = put_cmd_string(cmds_c)) != 0) {
317 return ret;
318 }
319
320 chan.channelnr = cmds->channelnr;
321 chan.dum_ch_min = ch_setup->dirtybuffer + ch_setup->minadr;
322 chan.dum_ch_max = ch_setup->dirtybuffer + ch_setup->maxadr;
323 chan.dum_ch_conf = 0x002;
324 chan.dum_ch_ctrl = 0x04;
325
326 put_channel(chan);
327
328 return 0;
329}
330
331static u32 display_open(int ch_no, int auto_update, u32 * dirty_buffer,
332 u32 * frame_buffer, u32 xpos, u32 ypos, u32 w, u32 h)
333{
334
335 struct dum_ch_setup k;
336 int ret;
337
338 /* keep width & height within display area */
339 if ((xpos + w) > DISP_MAX_X_SIZE)
340 w = DISP_MAX_X_SIZE - xpos;
341
342 if ((ypos + h) > DISP_MAX_Y_SIZE)
343 h = DISP_MAX_Y_SIZE - ypos;
344
345 /* assume 1 display only */
346 k.disp_no = 0;
347 k.xmin = xpos;
348 k.ymin = ypos;
349 k.xmax = xpos + (w - 1);
350 k.ymax = ypos + (h - 1);
351
352 /* adjust min and max values if necessary */
353 if (k.xmin > DISP_MAX_X_SIZE - 1)
354 k.xmin = DISP_MAX_X_SIZE - 1;
355 if (k.ymin > DISP_MAX_Y_SIZE - 1)
356 k.ymin = DISP_MAX_Y_SIZE - 1;
357
358 if (k.xmax > DISP_MAX_X_SIZE - 1)
359 k.xmax = DISP_MAX_X_SIZE - 1;
360 if (k.ymax > DISP_MAX_Y_SIZE - 1)
361 k.ymax = DISP_MAX_Y_SIZE - 1;
362
363 k.xmirror = 0;
364 k.ymirror = 0;
365 k.rotate = 0;
366 k.minadr = (u32) frame_buffer;
367 k.maxadr = (u32) frame_buffer + (((w - 1) << 10) | ((h << 2) - 2));
368 k.pad = PAD_1024;
369 k.dirtybuffer = (u32) dirty_buffer;
370 k.format = RGB888;
371 k.hwdirty = 0;
372 k.slave_trans = 0;
373
374 ret = dum_ch_setup(ch_no, &k);
375
376 return ret;
377}
378
379static void lcd_reset(void)
380{
381 u32 *dum_pio_base = (u32 *)IO_ADDRESS(PNX4008_PIO_BASE);
382
383 udelay(1);
384 iowrite32(BIT(19), &dum_pio_base[2]);
385 udelay(1);
386 iowrite32(BIT(19), &dum_pio_base[1]);
387 udelay(1);
388}
389
390static int dum_init(struct platform_device *pdev)
391{
392 struct clk *clk;
393
394 /* enable DUM clock */
395 clk = clk_get(&pdev->dev, "dum_ck");
396 if (IS_ERR(clk)) {
397 printk(KERN_ERR "pnx4008_dum: Unable to access DUM clock\n");
398 return PTR_ERR(clk);
399 }
400
401 clk_set_rate(clk, 1);
402 clk_put(clk);
403
404 DUM_CTRL = V_DUM_RESET;
405
406 /* set priority to "round-robin". All other params to "false" */
407 DUM_CONF = BIT(9);
408
409 /* Display 1 */
410 DUM_WTCFG1 = PNX4008_DUM_WT_CFG;
411 DUM_RTCFG1 = PNX4008_DUM_RT_CFG;
412 DUM_TCFG = PNX4008_DUM_T_CFG;
413
414 return 0;
415}
416
417static void dum_chan_init(void)
418{
419 int i = 0, ch = 0;
420 u32 *cmdptrs;
421 u32 *cmdstrings;
422
423 DUM_COM_BASE =
424 CMDSTRING_BASEADDR + BYTES_PER_CMDSTRING * NR_OF_CMDSTRINGS;
425
426 if ((cmdptrs =
427 (u32 *) ioremap_nocache(DUM_COM_BASE,
428 sizeof(u32) * NR_OF_CMDSTRINGS)) == NULL)
429 return;
430
431 for (ch = 0; ch < NR_OF_CMDSTRINGS; ch++)
432 iowrite32(CMDSTRING_BASEADDR + BYTES_PER_CMDSTRING * ch,
433 cmdptrs + ch);
434
435 for (ch = 0; ch < MAX_DUM_CHANNELS; ch++)
436 clear_channel(ch);
437
438 /* Clear the cmdstrings */
439 cmdstrings =
440 (u32 *)ioremap_nocache(*cmdptrs,
441 BYTES_PER_CMDSTRING * NR_OF_CMDSTRINGS);
442
443 if (!cmdstrings)
444 goto out;
445
446 for (i = 0; i < NR_OF_CMDSTRINGS * BYTES_PER_CMDSTRING / sizeof(u32);
447 i++)
448 iowrite32(0, cmdstrings + i);
449
450 iounmap((u32 *)cmdstrings);
451
452out:
453 iounmap((u32 *)cmdptrs);
454}
455
456static void lcd_init(void)
457{
458 lcd_reset();
459
460 DUM_OUTP_FORMAT1 = 0; /* RGB666 */
461
462 udelay(1);
463 iowrite32(V_LCD_STANDBY_OFF, dum_data.slave_virt_base);
464 udelay(1);
465 iowrite32(V_LCD_USE_9BIT_BUS, dum_data.slave_virt_base);
466 udelay(1);
467 iowrite32(V_LCD_SYNC_RISE_L, dum_data.slave_virt_base);
468 udelay(1);
469 iowrite32(V_LCD_SYNC_RISE_H, dum_data.slave_virt_base);
470 udelay(1);
471 iowrite32(V_LCD_SYNC_FALL_L, dum_data.slave_virt_base);
472 udelay(1);
473 iowrite32(V_LCD_SYNC_FALL_H, dum_data.slave_virt_base);
474 udelay(1);
475 iowrite32(V_LCD_SYNC_ENABLE, dum_data.slave_virt_base);
476 udelay(1);
477 iowrite32(V_LCD_DISPLAY_ON, dum_data.slave_virt_base);
478 udelay(1);
479}
480
481/* Interface exported to framebuffer drivers */
482
483int pnx4008_get_fb_addresses(int fb_type, void **virt_addr,
484 dma_addr_t *phys_addr, int *fb_length)
485{
486 int i;
487 int ret = -1;
488 for (i = 0; i < ARRAY_SIZE(fb_addr); i++)
489 if (fb_addr[i].fb_type == fb_type) {
490 *virt_addr = (void *)(dum_data.lcd_virt_start +
491 fb_addr[i].addr_offset);
492 *phys_addr =
493 dum_data.lcd_phys_start + fb_addr[i].addr_offset;
494 *fb_length = fb_addr[i].fb_length;
495 ret = 0;
496 break;
497 }
498
499 return ret;
500}
501
502EXPORT_SYMBOL(pnx4008_get_fb_addresses);
503
504int pnx4008_alloc_dum_channel(int dev_id)
505{
506 int i = 0;
507
508 while ((i < MAX_DUM_CHANNELS) && (dum_data.fb_owning_channel[i] != -1))
509 i++;
510
511 if (i == MAX_DUM_CHANNELS)
512 return -ENORESOURCESLEFT;
513 else {
514 dum_data.fb_owning_channel[i] = dev_id;
515 return i;
516 }
517}
518
519EXPORT_SYMBOL(pnx4008_alloc_dum_channel);
520
521int pnx4008_free_dum_channel(int channr, int dev_id)
522{
523 if (channr < 0 || channr > MAX_DUM_CHANNELS)
524 return -EINVAL;
525 else if (dum_data.fb_owning_channel[channr] != dev_id)
526 return -EFBNOTOWNER;
527 else {
528 clear_channel(channr);
529 dum_data.fb_owning_channel[channr] = -1;
530 }
531
532 return 0;
533}
534
535EXPORT_SYMBOL(pnx4008_free_dum_channel);
536
537int pnx4008_put_dum_channel_uf(struct dumchannel_uf chan_uf, int dev_id)
538{
539 int i = chan_uf.channelnr;
540 int ret;
541
542 if (i < 0 || i > MAX_DUM_CHANNELS)
543 return -EINVAL;
544 else if (dum_data.fb_owning_channel[i] != dev_id)
545 return -EFBNOTOWNER;
546 else if ((ret =
547 display_open(chan_uf.channelnr, 0, chan_uf.dirty,
548 chan_uf.source, chan_uf.y_offset,
549 chan_uf.x_offset, chan_uf.height,
550 chan_uf.width)) != 0)
551 return ret;
552 else {
553 dum_data.chan_uf_store[i].dirty = chan_uf.dirty;
554 dum_data.chan_uf_store[i].source = chan_uf.source;
555 dum_data.chan_uf_store[i].x_offset = chan_uf.x_offset;
556 dum_data.chan_uf_store[i].y_offset = chan_uf.y_offset;
557 dum_data.chan_uf_store[i].width = chan_uf.width;
558 dum_data.chan_uf_store[i].height = chan_uf.height;
559 }
560
561 return 0;
562}
563
564EXPORT_SYMBOL(pnx4008_put_dum_channel_uf);
565
566int pnx4008_set_dum_channel_sync(int channr, int val, int dev_id)
567{
568 if (channr < 0 || channr > MAX_DUM_CHANNELS)
569 return -EINVAL;
570 else if (dum_data.fb_owning_channel[channr] != dev_id)
571 return -EFBNOTOWNER;
572 else {
573 if (val == CONF_SYNC_ON) {
574 DUM_CH_CONF(channr) |= CONF_SYNCENABLE;
575 DUM_CH_CONF(channr) |= DUM_CHANNEL_CFG_SYNC_MASK |
576 DUM_CHANNEL_CFG_SYNC_MASK_SET;
577 } else if (val == CONF_SYNC_OFF)
578 DUM_CH_CONF(channr) &= ~CONF_SYNCENABLE;
579 else
580 return -EINVAL;
581 }
582
583 return 0;
584}
585
586EXPORT_SYMBOL(pnx4008_set_dum_channel_sync);
587
588int pnx4008_set_dum_channel_dirty_detect(int channr, int val, int dev_id)
589{
590 if (channr < 0 || channr > MAX_DUM_CHANNELS)
591 return -EINVAL;
592 else if (dum_data.fb_owning_channel[channr] != dev_id)
593 return -EFBNOTOWNER;
594 else {
595 if (val == CONF_DIRTYDETECTION_ON)
596 DUM_CH_CONF(channr) |= CONF_DIRTYENABLE;
597 else if (val == CONF_DIRTYDETECTION_OFF)
598 DUM_CH_CONF(channr) &= ~CONF_DIRTYENABLE;
599 else
600 return -EINVAL;
601 }
602
603 return 0;
604}
605
606EXPORT_SYMBOL(pnx4008_set_dum_channel_dirty_detect);
607
608#if 0 /* Functions not used currently, but likely to be used in future */
609
610static int get_channel(struct dumchannel *p_chan)
611{
612 int i = p_chan->channelnr;
613
614 if (i < 0 || i > MAX_DUM_CHANNELS)
615 return -EINVAL;
616 else {
617 p_chan->dum_ch_min = DUM_CH_MIN(i);
618 p_chan->dum_ch_max = DUM_CH_MAX(i);
619 p_chan->dum_ch_conf = DUM_CH_CONF(i);
620 p_chan->dum_ch_stat = DUM_CH_STAT(i);
621 p_chan->dum_ch_ctrl = 0; /* WriteOnly control register */
622 }
623
624 return 0;
625}
626
627int pnx4008_get_dum_channel_uf(struct dumchannel_uf *p_chan_uf, int dev_id)
628{
629 int i = p_chan_uf->channelnr;
630
631 if (i < 0 || i > MAX_DUM_CHANNELS)
632 return -EINVAL;
633 else if (dum_data.fb_owning_channel[i] != dev_id)
634 return -EFBNOTOWNER;
635 else {
636 p_chan_uf->dirty = dum_data.chan_uf_store[i].dirty;
637 p_chan_uf->source = dum_data.chan_uf_store[i].source;
638 p_chan_uf->x_offset = dum_data.chan_uf_store[i].x_offset;
639 p_chan_uf->y_offset = dum_data.chan_uf_store[i].y_offset;
640 p_chan_uf->width = dum_data.chan_uf_store[i].width;
641 p_chan_uf->height = dum_data.chan_uf_store[i].height;
642 }
643
644 return 0;
645}
646
647EXPORT_SYMBOL(pnx4008_get_dum_channel_uf);
648
649int pnx4008_get_dum_channel_config(int channr, int dev_id)
650{
651 int ret;
652 struct dumchannel chan;
653
654 if (channr < 0 || channr > MAX_DUM_CHANNELS)
655 return -EINVAL;
656 else if (dum_data.fb_owning_channel[channr] != dev_id)
657 return -EFBNOTOWNER;
658 else {
659 chan.channelnr = channr;
660 if ((ret = get_channel(&chan)) != 0)
661 return ret;
662 }
663
664 return (chan.dum_ch_conf & DUM_CHANNEL_CFG_MASK);
665}
666
667EXPORT_SYMBOL(pnx4008_get_dum_channel_config);
668
669int pnx4008_force_update_dum_channel(int channr, int dev_id)
670{
671 if (channr < 0 || channr > MAX_DUM_CHANNELS)
672 return -EINVAL;
673
674 else if (dum_data.fb_owning_channel[channr] != dev_id)
675 return -EFBNOTOWNER;
676 else
677 DUM_CH_CTRL(channr) = CTRL_SETDIRTY;
678
679 return 0;
680}
681
682EXPORT_SYMBOL(pnx4008_force_update_dum_channel);
683
684#endif
685
686int pnx4008_sdum_mmap(struct fb_info *info, struct vm_area_struct *vma,
687 struct device *dev)
688{
689 unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
690
691 if (off < info->fix.smem_len) {
692 vma->vm_pgoff += 1;
693 return dma_mmap_writecombine(dev, vma,
694 (void *)dum_data.lcd_virt_start,
695 dum_data.lcd_phys_start,
696 FB_DMA_SIZE);
697 }
698 return -EINVAL;
699}
700
701EXPORT_SYMBOL(pnx4008_sdum_mmap);
702
703int pnx4008_set_dum_exit_notification(int dev_id)
704{
705 int i;
706
707 for (i = 0; i < MAX_DUM_CHANNELS; i++)
708 if (dum_data.fb_owning_channel[i] == dev_id)
709 return -ERESOURCESNOTFREED;
710
711 return 0;
712}
713
714EXPORT_SYMBOL(pnx4008_set_dum_exit_notification);
715
716/* Platform device driver for DUM */
717
718static int sdum_suspend(struct platform_device *pdev, pm_message_t state)
719{
720 int retval = 0;
721 struct clk *clk;
722
723 clk = clk_get(0, "dum_ck");
724 if (!IS_ERR(clk)) {
725 clk_set_rate(clk, 0);
726 clk_put(clk);
727 } else
728 retval = PTR_ERR(clk);
729
730 /* disable BAC */
731 DUM_CTRL = V_BAC_DISABLE_IDLE;
732
733 /* LCD standby & turn off display */
734 lcd_reset();
735
736 return retval;
737}
738
739static int sdum_resume(struct platform_device *pdev)
740{
741 int retval = 0;
742 struct clk *clk;
743
744 clk = clk_get(0, "dum_ck");
745 if (!IS_ERR(clk)) {
746 clk_set_rate(clk, 1);
747 clk_put(clk);
748 } else
749 retval = PTR_ERR(clk);
750
751 /* wait for BAC disable */
752 DUM_CTRL = V_BAC_DISABLE_TRIG;
753
754 while (DUM_CTRL & BAC_ENABLED)
755 udelay(10);
756
757 /* re-init LCD */
758 lcd_init();
759
760 /* enable BAC and reset MUX */
761 DUM_CTRL = V_BAC_ENABLE;
762 udelay(1);
763 DUM_CTRL = V_MUX_RESET;
764 return 0;
765}
766
767static int __devinit sdum_probe(struct platform_device *pdev)
768{
769 int ret = 0, i = 0;
770
771 /* map frame buffer */
772 dum_data.lcd_virt_start = (u32) dma_alloc_writecombine(&pdev->dev,
773 FB_DMA_SIZE,
774 &dum_data.lcd_phys_start,
775 GFP_KERNEL);
776
777 if (!dum_data.lcd_virt_start) {
778 ret = -ENOMEM;
779 goto out_3;
780 }
781
782 /* map slave registers */
783 dum_data.slave_phys_base = PNX4008_DUM_SLAVE_BASE;
784 dum_data.slave_virt_base =
785 (u32 *) ioremap_nocache(dum_data.slave_phys_base, sizeof(u32));
786
787 if (dum_data.slave_virt_base == NULL) {
788 ret = -ENOMEM;
789 goto out_2;
790 }
791
792 /* initialize DUM and LCD display */
793 ret = dum_init(pdev);
794 if (ret)
795 goto out_1;
796
797 dum_chan_init();
798 lcd_init();
799
800 DUM_CTRL = V_BAC_ENABLE;
801 udelay(1);
802 DUM_CTRL = V_MUX_RESET;
803
804 /* set decode address and sync clock divider */
805 DUM_DECODE = dum_data.lcd_phys_start & DUM_DECODE_MASK;
806 DUM_CLK_DIV = PNX4008_DUM_CLK_DIV;
807
808 for (i = 0; i < MAX_DUM_CHANNELS; i++)
809 dum_data.fb_owning_channel[i] = -1;
810
811 /*setup wakeup interrupt */
812 start_int_set_rising_edge(SE_DISP_SYNC_INT);
813 start_int_ack(SE_DISP_SYNC_INT);
814 start_int_umask(SE_DISP_SYNC_INT);
815
816 return 0;
817
818out_1:
819 iounmap((void *)dum_data.slave_virt_base);
820out_2:
821 dma_free_writecombine(&pdev->dev, FB_DMA_SIZE,
822 (void *)dum_data.lcd_virt_start,
823 dum_data.lcd_phys_start);
824out_3:
825 return ret;
826}
827
828static int sdum_remove(struct platform_device *pdev)
829{
830 struct clk *clk;
831
832 start_int_mask(SE_DISP_SYNC_INT);
833
834 clk = clk_get(0, "dum_ck");
835 if (!IS_ERR(clk)) {
836 clk_set_rate(clk, 0);
837 clk_put(clk);
838 }
839
840 iounmap((void *)dum_data.slave_virt_base);
841
842 dma_free_writecombine(&pdev->dev, FB_DMA_SIZE,
843 (void *)dum_data.lcd_virt_start,
844 dum_data.lcd_phys_start);
845
846 return 0;
847}
848
849static struct platform_driver sdum_driver = {
850 .driver = {
851 .name = "sdum",
852 },
853 .probe = sdum_probe,
854 .remove = sdum_remove,
855 .suspend = sdum_suspend,
856 .resume = sdum_resume,
857};
858
859int __init sdum_init(void)
860{
861 return platform_driver_register(&sdum_driver);
862}
863
864static void __exit sdum_exit(void)
865{
866 platform_driver_unregister(&sdum_driver);
867};
868
869module_init(sdum_init);
870module_exit(sdum_exit);
871
872MODULE_LICENSE("GPL");
diff --git a/drivers/video/pnx4008/sdum.h b/drivers/video/pnx4008/sdum.h
new file mode 100644
index 000000000000..e8c5dcdd8813
--- /dev/null
+++ b/drivers/video/pnx4008/sdum.h
@@ -0,0 +1,139 @@
1/*
2 * Copyright (C) 2005 Philips Semiconductors
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; see the file COPYING. If not, write to
16 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 02111-1307, USA, or http://www.gnu.org/licenses/gpl.html
18*/
19
20#define MAX_DUM_CHANNELS 64
21
22#define RGB_MEM_WINDOW(x) (0x10000000 + (x)*0x00100000)
23
24#define QCIF_OFFSET(x) (((x) == 0) ? 0x00000: ((x) == 1) ? 0x30000: -1)
25#define CIF_OFFSET(x) (((x) == 0) ? 0x00000: ((x) == 1) ? 0x60000: -1)
26
27#define CTRL_SETDIRTY (0x00000001)
28#define CONF_DIRTYENABLE (0x00000020)
29#define CONF_SYNCENABLE (0x00000004)
30
31#define DIRTY_ENABLED(conf) ((conf) & 0x0020)
32#define SYNC_ENABLED(conf) ((conf) & 0x0004)
33
34/* Display 1 & 2 Write Timing Configuration */
35#define PNX4008_DUM_WT_CFG 0x00372000
36
37/* Display 1 & 2 Read Timing Configuration */
38#define PNX4008_DUM_RT_CFG 0x00003A47
39
40/* DUM Transit State Timing Configuration */
41#define PNX4008_DUM_T_CFG 0x1D /* 29 HCLK cycles */
42
43/* DUM Sync count clock divider */
44#define PNX4008_DUM_CLK_DIV 0x02DD
45
46/* Memory size for framebuffer, allocated through dma_alloc_writecombine().
47 * Must be PAGE aligned
48 */
49#define FB_DMA_SIZE (PAGE_ALIGN(SZ_1M + PAGE_SIZE))
50
51#define OFFSET_RGBBUFFER (0xB0000)
52#define OFFSET_YUVBUFFER (0x00000)
53
54#define YUVBUFFER (lcd_video_start + OFFSET_YUVBUFFER)
55#define RGBBUFFER (lcd_video_start + OFFSET_RGBBUFFER)
56
57#define CMDSTRING_BASEADDR (0x00C000) /* iram */
58#define BYTES_PER_CMDSTRING (0x80)
59#define NR_OF_CMDSTRINGS (64)
60
61#define MAX_NR_PRESTRINGS (0x40)
62#define MAX_NR_POSTSTRINGS (0x40)
63
64/* various mask definitions */
65#define DUM_CLK_ENABLE 0x01
66#define DUM_CLK_DISABLE 0
67#define DUM_DECODE_MASK 0x1FFFFFFF
68#define DUM_CHANNEL_CFG_MASK 0x01FF
69#define DUM_CHANNEL_CFG_SYNC_MASK 0xFFFE00FF
70#define DUM_CHANNEL_CFG_SYNC_MASK_SET 0x0CA00
71
72#define SDUM_RETURNVAL_BASE (0x500)
73
74#define CONF_SYNC_OFF (0x602)
75#define CONF_SYNC_ON (0x603)
76
77#define CONF_DIRTYDETECTION_OFF (0x600)
78#define CONF_DIRTYDETECTION_ON (0x601)
79
80/* Set the corresponding bit. */
81#define BIT(n) (0x1U << (n))
82
83struct dumchannel_uf {
84 int channelnr;
85 u32 *dirty;
86 u32 *source;
87 u32 x_offset;
88 u32 y_offset;
89 u32 width;
90 u32 height;
91};
92
93enum {
94 FB_TYPE_YUV,
95 FB_TYPE_RGB
96};
97
98struct cmdstring {
99 int channelnr;
100 uint16_t prestringlen;
101 uint16_t poststringlen;
102 uint16_t format;
103 uint16_t reserved;
104 uint16_t startaddr_low;
105 uint16_t startaddr_high;
106 uint16_t pixdatlen_low;
107 uint16_t pixdatlen_high;
108 u32 precmd[MAX_NR_PRESTRINGS];
109 u32 postcmd[MAX_NR_POSTSTRINGS];
110
111};
112
113struct dumchannel {
114 int channelnr;
115 int dum_ch_min;
116 int dum_ch_max;
117 int dum_ch_conf;
118 int dum_ch_stat;
119 int dum_ch_ctrl;
120};
121
122int pnx4008_alloc_dum_channel(int dev_id);
123int pnx4008_free_dum_channel(int channr, int dev_id);
124
125int pnx4008_get_dum_channel_uf(struct dumchannel_uf *pChan_uf, int dev_id);
126int pnx4008_put_dum_channel_uf(struct dumchannel_uf chan_uf, int dev_id);
127
128int pnx4008_set_dum_channel_sync(int channr, int val, int dev_id);
129int pnx4008_set_dum_channel_dirty_detect(int channr, int val, int dev_id);
130
131int pnx4008_force_dum_update_channel(int channr, int dev_id);
132
133int pnx4008_get_dum_channel_config(int channr, int dev_id);
134
135int pnx4008_sdum_mmap(struct fb_info *info, struct vm_area_struct *vma, struct device *dev);
136int pnx4008_set_dum_exit_notification(int dev_id);
137
138int pnx4008_get_fb_addresses(int fb_type, void **virt_addr,
139 dma_addr_t * phys_addr, int *fb_length);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index d0434406eaeb..f42e64210ee5 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -84,7 +84,7 @@ static struct linux_binfmt elf_format = {
84 .min_coredump = ELF_EXEC_PAGESIZE 84 .min_coredump = ELF_EXEC_PAGESIZE
85}; 85};
86 86
87#define BAD_ADDR(x) ((unsigned long)(x) > TASK_SIZE) 87#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
88 88
89static int set_brk(unsigned long start, unsigned long end) 89static int set_brk(unsigned long start, unsigned long end)
90{ 90{
@@ -394,7 +394,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
394 * <= p_memsize so it's only necessary to check p_memsz. 394 * <= p_memsize so it's only necessary to check p_memsz.
395 */ 395 */
396 k = load_addr + eppnt->p_vaddr; 396 k = load_addr + eppnt->p_vaddr;
397 if (k > TASK_SIZE || 397 if (BAD_ADDR(k) ||
398 eppnt->p_filesz > eppnt->p_memsz || 398 eppnt->p_filesz > eppnt->p_memsz ||
399 eppnt->p_memsz > TASK_SIZE || 399 eppnt->p_memsz > TASK_SIZE ||
400 TASK_SIZE - eppnt->p_memsz < k) { 400 TASK_SIZE - eppnt->p_memsz < k) {
@@ -887,7 +887,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
887 * allowed task size. Note that p_filesz must always be 887 * allowed task size. Note that p_filesz must always be
888 * <= p_memsz so it is only necessary to check p_memsz. 888 * <= p_memsz so it is only necessary to check p_memsz.
889 */ 889 */
890 if (k > TASK_SIZE || elf_ppnt->p_filesz > elf_ppnt->p_memsz || 890 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
891 elf_ppnt->p_memsz > TASK_SIZE || 891 elf_ppnt->p_memsz > TASK_SIZE ||
892 TASK_SIZE - elf_ppnt->p_memsz < k) { 892 TASK_SIZE - elf_ppnt->p_memsz < k) {
893 /* set_brk can never work. Avoid overflows. */ 893 /* set_brk can never work. Avoid overflows. */
@@ -941,10 +941,9 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
941 interpreter, 941 interpreter,
942 &interp_load_addr); 942 &interp_load_addr);
943 if (BAD_ADDR(elf_entry)) { 943 if (BAD_ADDR(elf_entry)) {
944 printk(KERN_ERR "Unable to load interpreter %.128s\n",
945 elf_interpreter);
946 force_sig(SIGSEGV, current); 944 force_sig(SIGSEGV, current);
947 retval = -ENOEXEC; /* Nobody gets to see this, but.. */ 945 retval = IS_ERR((void *)elf_entry) ?
946 (int)elf_entry : -EINVAL;
948 goto out_free_dentry; 947 goto out_free_dentry;
949 } 948 }
950 reloc_func_desc = interp_load_addr; 949 reloc_func_desc = interp_load_addr;
@@ -955,8 +954,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
955 } else { 954 } else {
956 elf_entry = loc->elf_ex.e_entry; 955 elf_entry = loc->elf_ex.e_entry;
957 if (BAD_ADDR(elf_entry)) { 956 if (BAD_ADDR(elf_entry)) {
958 send_sig(SIGSEGV, current, 0); 957 force_sig(SIGSEGV, current);
959 retval = -ENOEXEC; /* Nobody gets to see this, but.. */ 958 retval = -EINVAL;
960 goto out_free_dentry; 959 goto out_free_dentry;
961 } 960 }
962 } 961 }
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9633a490dab0..37534573960b 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -739,7 +739,7 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
739 if (!bo) 739 if (!bo)
740 return -ENOMEM; 740 return -ENOMEM;
741 741
742 mutex_lock(&bdev->bd_mutex); 742 mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION);
743 res = bd_claim(bdev, holder); 743 res = bd_claim(bdev, holder);
744 if (res || !add_bd_holder(bdev, bo)) 744 if (res || !add_bd_holder(bdev, bo))
745 free_bd_holder(bo); 745 free_bd_holder(bo);
@@ -764,7 +764,7 @@ static void bd_release_from_kobject(struct block_device *bdev,
764 if (!kobj) 764 if (!kobj)
765 return; 765 return;
766 766
767 mutex_lock(&bdev->bd_mutex); 767 mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION);
768 bd_release(bdev); 768 bd_release(bdev);
769 if ((bo = del_bd_holder(bdev, kobj))) 769 if ((bo = del_bd_holder(bdev, kobj)))
770 free_bd_holder(bo); 770 free_bd_holder(bo);
@@ -822,6 +822,22 @@ struct block_device *open_by_devnum(dev_t dev, unsigned mode)
822 822
823EXPORT_SYMBOL(open_by_devnum); 823EXPORT_SYMBOL(open_by_devnum);
824 824
825static int
826blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags);
827
828struct block_device *open_partition_by_devnum(dev_t dev, unsigned mode)
829{
830 struct block_device *bdev = bdget(dev);
831 int err = -ENOMEM;
832 int flags = mode & FMODE_WRITE ? O_RDWR : O_RDONLY;
833 if (bdev)
834 err = blkdev_get_partition(bdev, mode, flags);
835 return err ? ERR_PTR(err) : bdev;
836}
837
838EXPORT_SYMBOL(open_partition_by_devnum);
839
840
825/* 841/*
826 * This routine checks whether a removable media has been changed, 842 * This routine checks whether a removable media has been changed,
827 * and invalidates all buffer-cache-entries in that case. This 843 * and invalidates all buffer-cache-entries in that case. This
@@ -868,7 +884,11 @@ void bd_set_size(struct block_device *bdev, loff_t size)
868} 884}
869EXPORT_SYMBOL(bd_set_size); 885EXPORT_SYMBOL(bd_set_size);
870 886
871static int do_open(struct block_device *bdev, struct file *file) 887static int
888blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags);
889
890static int
891do_open(struct block_device *bdev, struct file *file, unsigned int subclass)
872{ 892{
873 struct module *owner = NULL; 893 struct module *owner = NULL;
874 struct gendisk *disk; 894 struct gendisk *disk;
@@ -885,7 +905,8 @@ static int do_open(struct block_device *bdev, struct file *file)
885 } 905 }
886 owner = disk->fops->owner; 906 owner = disk->fops->owner;
887 907
888 mutex_lock(&bdev->bd_mutex); 908 mutex_lock_nested(&bdev->bd_mutex, subclass);
909
889 if (!bdev->bd_openers) { 910 if (!bdev->bd_openers) {
890 bdev->bd_disk = disk; 911 bdev->bd_disk = disk;
891 bdev->bd_contains = bdev; 912 bdev->bd_contains = bdev;
@@ -912,11 +933,11 @@ static int do_open(struct block_device *bdev, struct file *file)
912 ret = -ENOMEM; 933 ret = -ENOMEM;
913 if (!whole) 934 if (!whole)
914 goto out_first; 935 goto out_first;
915 ret = blkdev_get(whole, file->f_mode, file->f_flags); 936 ret = blkdev_get_whole(whole, file->f_mode, file->f_flags);
916 if (ret) 937 if (ret)
917 goto out_first; 938 goto out_first;
918 bdev->bd_contains = whole; 939 bdev->bd_contains = whole;
919 mutex_lock(&whole->bd_mutex); 940 mutex_lock_nested(&whole->bd_mutex, BD_MUTEX_WHOLE);
920 whole->bd_part_count++; 941 whole->bd_part_count++;
921 p = disk->part[part - 1]; 942 p = disk->part[part - 1];
922 bdev->bd_inode->i_data.backing_dev_info = 943 bdev->bd_inode->i_data.backing_dev_info =
@@ -944,7 +965,8 @@ static int do_open(struct block_device *bdev, struct file *file)
944 if (bdev->bd_invalidated) 965 if (bdev->bd_invalidated)
945 rescan_partitions(bdev->bd_disk, bdev); 966 rescan_partitions(bdev->bd_disk, bdev);
946 } else { 967 } else {
947 mutex_lock(&bdev->bd_contains->bd_mutex); 968 mutex_lock_nested(&bdev->bd_contains->bd_mutex,
969 BD_MUTEX_PARTITION);
948 bdev->bd_contains->bd_part_count++; 970 bdev->bd_contains->bd_part_count++;
949 mutex_unlock(&bdev->bd_contains->bd_mutex); 971 mutex_unlock(&bdev->bd_contains->bd_mutex);
950 } 972 }
@@ -985,11 +1007,49 @@ int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags)
985 fake_file.f_dentry = &fake_dentry; 1007 fake_file.f_dentry = &fake_dentry;
986 fake_dentry.d_inode = bdev->bd_inode; 1008 fake_dentry.d_inode = bdev->bd_inode;
987 1009
988 return do_open(bdev, &fake_file); 1010 return do_open(bdev, &fake_file, BD_MUTEX_NORMAL);
989} 1011}
990 1012
991EXPORT_SYMBOL(blkdev_get); 1013EXPORT_SYMBOL(blkdev_get);
992 1014
1015static int
1016blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags)
1017{
1018 /*
1019 * This crockload is due to bad choice of ->open() type.
1020 * It will go away.
1021 * For now, block device ->open() routine must _not_
1022 * examine anything in 'inode' argument except ->i_rdev.
1023 */
1024 struct file fake_file = {};
1025 struct dentry fake_dentry = {};
1026 fake_file.f_mode = mode;
1027 fake_file.f_flags = flags;
1028 fake_file.f_dentry = &fake_dentry;
1029 fake_dentry.d_inode = bdev->bd_inode;
1030
1031 return do_open(bdev, &fake_file, BD_MUTEX_WHOLE);
1032}
1033
1034static int
1035blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags)
1036{
1037 /*
1038 * This crockload is due to bad choice of ->open() type.
1039 * It will go away.
1040 * For now, block device ->open() routine must _not_
1041 * examine anything in 'inode' argument except ->i_rdev.
1042 */
1043 struct file fake_file = {};
1044 struct dentry fake_dentry = {};
1045 fake_file.f_mode = mode;
1046 fake_file.f_flags = flags;
1047 fake_file.f_dentry = &fake_dentry;
1048 fake_dentry.d_inode = bdev->bd_inode;
1049
1050 return do_open(bdev, &fake_file, BD_MUTEX_PARTITION);
1051}
1052
993static int blkdev_open(struct inode * inode, struct file * filp) 1053static int blkdev_open(struct inode * inode, struct file * filp)
994{ 1054{
995 struct block_device *bdev; 1055 struct block_device *bdev;
@@ -1005,7 +1065,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
1005 1065
1006 bdev = bd_acquire(inode); 1066 bdev = bd_acquire(inode);
1007 1067
1008 res = do_open(bdev, filp); 1068 res = do_open(bdev, filp, BD_MUTEX_NORMAL);
1009 if (res) 1069 if (res)
1010 return res; 1070 return res;
1011 1071
@@ -1019,13 +1079,13 @@ static int blkdev_open(struct inode * inode, struct file * filp)
1019 return res; 1079 return res;
1020} 1080}
1021 1081
1022int blkdev_put(struct block_device *bdev) 1082static int __blkdev_put(struct block_device *bdev, unsigned int subclass)
1023{ 1083{
1024 int ret = 0; 1084 int ret = 0;
1025 struct inode *bd_inode = bdev->bd_inode; 1085 struct inode *bd_inode = bdev->bd_inode;
1026 struct gendisk *disk = bdev->bd_disk; 1086 struct gendisk *disk = bdev->bd_disk;
1027 1087
1028 mutex_lock(&bdev->bd_mutex); 1088 mutex_lock_nested(&bdev->bd_mutex, subclass);
1029 lock_kernel(); 1089 lock_kernel();
1030 if (!--bdev->bd_openers) { 1090 if (!--bdev->bd_openers) {
1031 sync_blockdev(bdev); 1091 sync_blockdev(bdev);
@@ -1035,7 +1095,8 @@ int blkdev_put(struct block_device *bdev)
1035 if (disk->fops->release) 1095 if (disk->fops->release)
1036 ret = disk->fops->release(bd_inode, NULL); 1096 ret = disk->fops->release(bd_inode, NULL);
1037 } else { 1097 } else {
1038 mutex_lock(&bdev->bd_contains->bd_mutex); 1098 mutex_lock_nested(&bdev->bd_contains->bd_mutex,
1099 subclass + 1);
1039 bdev->bd_contains->bd_part_count--; 1100 bdev->bd_contains->bd_part_count--;
1040 mutex_unlock(&bdev->bd_contains->bd_mutex); 1101 mutex_unlock(&bdev->bd_contains->bd_mutex);
1041 } 1102 }
@@ -1051,9 +1112,8 @@ int blkdev_put(struct block_device *bdev)
1051 } 1112 }
1052 bdev->bd_disk = NULL; 1113 bdev->bd_disk = NULL;
1053 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 1114 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
1054 if (bdev != bdev->bd_contains) { 1115 if (bdev != bdev->bd_contains)
1055 blkdev_put(bdev->bd_contains); 1116 __blkdev_put(bdev->bd_contains, subclass + 1);
1056 }
1057 bdev->bd_contains = NULL; 1117 bdev->bd_contains = NULL;
1058 } 1118 }
1059 unlock_kernel(); 1119 unlock_kernel();
@@ -1062,8 +1122,20 @@ int blkdev_put(struct block_device *bdev)
1062 return ret; 1122 return ret;
1063} 1123}
1064 1124
1125int blkdev_put(struct block_device *bdev)
1126{
1127 return __blkdev_put(bdev, BD_MUTEX_NORMAL);
1128}
1129
1065EXPORT_SYMBOL(blkdev_put); 1130EXPORT_SYMBOL(blkdev_put);
1066 1131
1132int blkdev_put_partition(struct block_device *bdev)
1133{
1134 return __blkdev_put(bdev, BD_MUTEX_PARTITION);
1135}
1136
1137EXPORT_SYMBOL(blkdev_put_partition);
1138
1067static int blkdev_close(struct inode * inode, struct file * filp) 1139static int blkdev_close(struct inode * inode, struct file * filp)
1068{ 1140{
1069 struct block_device *bdev = I_BDEV(filp->f_mapping->host); 1141 struct block_device *bdev = I_BDEV(filp->f_mapping->host);
diff --git a/fs/dcache.c b/fs/dcache.c
index c6e3535be192..1b4a3a34ec57 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -38,7 +38,7 @@ int sysctl_vfs_cache_pressure __read_mostly = 100;
38EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); 38EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
39 39
40 __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); 40 __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
41static seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED; 41static __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
42 42
43EXPORT_SYMBOL(dcache_lock); 43EXPORT_SYMBOL(dcache_lock);
44 44
@@ -1339,10 +1339,10 @@ void d_move(struct dentry * dentry, struct dentry * target)
1339 */ 1339 */
1340 if (target < dentry) { 1340 if (target < dentry) {
1341 spin_lock(&target->d_lock); 1341 spin_lock(&target->d_lock);
1342 spin_lock(&dentry->d_lock); 1342 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1343 } else { 1343 } else {
1344 spin_lock(&dentry->d_lock); 1344 spin_lock(&dentry->d_lock);
1345 spin_lock(&target->d_lock); 1345 spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);
1346 } 1346 }
1347 1347
1348 /* Move the dentry to the target hash queue, if on different bucket */ 1348 /* Move the dentry to the target hash queue, if on different bucket */
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 538fb0418fba..5981e17f46f0 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -220,7 +220,8 @@ static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes)
220 if (dio->end_io && dio->result) 220 if (dio->end_io && dio->result)
221 dio->end_io(dio->iocb, offset, bytes, dio->map_bh.b_private); 221 dio->end_io(dio->iocb, offset, bytes, dio->map_bh.b_private);
222 if (dio->lock_type == DIO_LOCKING) 222 if (dio->lock_type == DIO_LOCKING)
223 up_read(&dio->inode->i_alloc_sem); 223 /* lockdep: non-owner release */
224 up_read_non_owner(&dio->inode->i_alloc_sem);
224} 225}
225 226
226/* 227/*
@@ -1261,7 +1262,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1261 } 1262 }
1262 1263
1263 if (dio_lock_type == DIO_LOCKING) 1264 if (dio_lock_type == DIO_LOCKING)
1264 down_read(&inode->i_alloc_sem); 1265 /* lockdep: not the owner will release it */
1266 down_read_non_owner(&inode->i_alloc_sem);
1265 } 1267 }
1266 1268
1267 /* 1269 /*
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 9c677bbd0b08..19ffb043abbc 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -120,7 +120,7 @@ struct epoll_filefd {
120 */ 120 */
121struct wake_task_node { 121struct wake_task_node {
122 struct list_head llink; 122 struct list_head llink;
123 task_t *task; 123 struct task_struct *task;
124 wait_queue_head_t *wq; 124 wait_queue_head_t *wq;
125}; 125};
126 126
@@ -413,7 +413,7 @@ static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq)
413{ 413{
414 int wake_nests = 0; 414 int wake_nests = 0;
415 unsigned long flags; 415 unsigned long flags;
416 task_t *this_task = current; 416 struct task_struct *this_task = current;
417 struct list_head *lsthead = &psw->wake_task_list, *lnk; 417 struct list_head *lsthead = &psw->wake_task_list, *lnk;
418 struct wake_task_node *tncur; 418 struct wake_task_node *tncur;
419 struct wake_task_node tnode; 419 struct wake_task_node tnode;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 9f43879d6d68..f2702cda9779 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1157,7 +1157,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
1157 struct buffer_head tmp_bh; 1157 struct buffer_head tmp_bh;
1158 struct buffer_head *bh; 1158 struct buffer_head *bh;
1159 1159
1160 mutex_lock(&inode->i_mutex); 1160 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
1161 while (towrite > 0) { 1161 while (towrite > 0) {
1162 tocopy = sb->s_blocksize - offset < towrite ? 1162 tocopy = sb->s_blocksize - offset < towrite ?
1163 sb->s_blocksize - offset : towrite; 1163 sb->s_blocksize - offset : towrite;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index f2dd71336612..813d589cc6c0 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2614,7 +2614,7 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
2614 struct buffer_head *bh; 2614 struct buffer_head *bh;
2615 handle_t *handle = journal_current_handle(); 2615 handle_t *handle = journal_current_handle();
2616 2616
2617 mutex_lock(&inode->i_mutex); 2617 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
2618 while (towrite > 0) { 2618 while (towrite > 0) {
2619 tocopy = sb->s_blocksize - offset < towrite ? 2619 tocopy = sb->s_blocksize - offset < towrite ?
2620 sb->s_blocksize - offset : towrite; 2620 sb->s_blocksize - offset : towrite;
diff --git a/fs/namei.c b/fs/namei.c
index c784e8bb57a3..c9750d755aff 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1423,7 +1423,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
1423 struct dentry *p; 1423 struct dentry *p;
1424 1424
1425 if (p1 == p2) { 1425 if (p1 == p2) {
1426 mutex_lock(&p1->d_inode->i_mutex); 1426 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
1427 return NULL; 1427 return NULL;
1428 } 1428 }
1429 1429
@@ -1431,22 +1431,22 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
1431 1431
1432 for (p = p1; p->d_parent != p; p = p->d_parent) { 1432 for (p = p1; p->d_parent != p; p = p->d_parent) {
1433 if (p->d_parent == p2) { 1433 if (p->d_parent == p2) {
1434 mutex_lock(&p2->d_inode->i_mutex); 1434 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT);
1435 mutex_lock(&p1->d_inode->i_mutex); 1435 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD);
1436 return p; 1436 return p;
1437 } 1437 }
1438 } 1438 }
1439 1439
1440 for (p = p2; p->d_parent != p; p = p->d_parent) { 1440 for (p = p2; p->d_parent != p; p = p->d_parent) {
1441 if (p->d_parent == p1) { 1441 if (p->d_parent == p1) {
1442 mutex_lock(&p1->d_inode->i_mutex); 1442 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
1443 mutex_lock(&p2->d_inode->i_mutex); 1443 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
1444 return p; 1444 return p;
1445 } 1445 }
1446 } 1446 }
1447 1447
1448 mutex_lock(&p1->d_inode->i_mutex); 1448 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
1449 mutex_lock(&p2->d_inode->i_mutex); 1449 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
1450 return NULL; 1450 return NULL;
1451} 1451}
1452 1452
@@ -1751,7 +1751,7 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir)
1751{ 1751{
1752 struct dentry *dentry = ERR_PTR(-EEXIST); 1752 struct dentry *dentry = ERR_PTR(-EEXIST);
1753 1753
1754 mutex_lock(&nd->dentry->d_inode->i_mutex); 1754 mutex_lock_nested(&nd->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
1755 /* 1755 /*
1756 * Yucky last component or no last component at all? 1756 * Yucky last component or no last component at all?
1757 * (foo/., foo/.., /////) 1757 * (foo/., foo/.., /////)
@@ -2008,7 +2008,7 @@ static long do_rmdir(int dfd, const char __user *pathname)
2008 error = -EBUSY; 2008 error = -EBUSY;
2009 goto exit1; 2009 goto exit1;
2010 } 2010 }
2011 mutex_lock(&nd.dentry->d_inode->i_mutex); 2011 mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
2012 dentry = lookup_hash(&nd); 2012 dentry = lookup_hash(&nd);
2013 error = PTR_ERR(dentry); 2013 error = PTR_ERR(dentry);
2014 if (!IS_ERR(dentry)) { 2014 if (!IS_ERR(dentry)) {
@@ -2082,7 +2082,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
2082 error = -EISDIR; 2082 error = -EISDIR;
2083 if (nd.last_type != LAST_NORM) 2083 if (nd.last_type != LAST_NORM)
2084 goto exit1; 2084 goto exit1;
2085 mutex_lock(&nd.dentry->d_inode->i_mutex); 2085 mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
2086 dentry = lookup_hash(&nd); 2086 dentry = lookup_hash(&nd);
2087 error = PTR_ERR(dentry); 2087 error = PTR_ERR(dentry);
2088 if (!IS_ERR(dentry)) { 2088 if (!IS_ERR(dentry)) {
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 4c86b7e1d1eb..d313f356e66a 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -367,6 +367,12 @@ static void ntfs_destroy_extent_inode(ntfs_inode *ni)
367 kmem_cache_free(ntfs_inode_cache, ni); 367 kmem_cache_free(ntfs_inode_cache, ni);
368} 368}
369 369
370/*
371 * The attribute runlist lock has separate locking rules from the
372 * normal runlist lock, so split the two lock-classes:
373 */
374static struct lock_class_key attr_list_rl_lock_class;
375
370/** 376/**
371 * __ntfs_init_inode - initialize ntfs specific part of an inode 377 * __ntfs_init_inode - initialize ntfs specific part of an inode
372 * @sb: super block of mounted volume 378 * @sb: super block of mounted volume
@@ -394,6 +400,8 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni)
394 ni->attr_list_size = 0; 400 ni->attr_list_size = 0;
395 ni->attr_list = NULL; 401 ni->attr_list = NULL;
396 ntfs_init_runlist(&ni->attr_list_rl); 402 ntfs_init_runlist(&ni->attr_list_rl);
403 lockdep_set_class(&ni->attr_list_rl.lock,
404 &attr_list_rl_lock_class);
397 ni->itype.index.bmp_ino = NULL; 405 ni->itype.index.bmp_ino = NULL;
398 ni->itype.index.block_size = 0; 406 ni->itype.index.block_size = 0;
399 ni->itype.index.vcn_size = 0; 407 ni->itype.index.vcn_size = 0;
@@ -405,6 +413,13 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni)
405 ni->ext.base_ntfs_ino = NULL; 413 ni->ext.base_ntfs_ino = NULL;
406} 414}
407 415
416/*
417 * Extent inodes get MFT-mapped in a nested way, while the base inode
418 * is still mapped. Teach this nesting to the lock validator by creating
419 * a separate class for nested inode's mrec_lock's:
420 */
421static struct lock_class_key extent_inode_mrec_lock_key;
422
408inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb, 423inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb,
409 unsigned long mft_no) 424 unsigned long mft_no)
410{ 425{
@@ -413,6 +428,7 @@ inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb,
413 ntfs_debug("Entering."); 428 ntfs_debug("Entering.");
414 if (likely(ni != NULL)) { 429 if (likely(ni != NULL)) {
415 __ntfs_init_inode(sb, ni); 430 __ntfs_init_inode(sb, ni);
431 lockdep_set_class(&ni->mrec_lock, &extent_inode_mrec_lock_key);
416 ni->mft_no = mft_no; 432 ni->mft_no = mft_no;
417 ni->type = AT_UNUSED; 433 ni->type = AT_UNUSED;
418 ni->name = NULL; 434 ni->name = NULL;
@@ -1722,6 +1738,15 @@ err_out:
1722 return err; 1738 return err;
1723} 1739}
1724 1740
1741/*
1742 * The MFT inode has special locking, so teach the lock validator
1743 * about this by splitting off the locking rules of the MFT from
1744 * the locking rules of other inodes. The MFT inode can never be
1745 * accessed from the VFS side (or even internally), only by the
1746 * map_mft functions.
1747 */
1748static struct lock_class_key mft_ni_runlist_lock_key, mft_ni_mrec_lock_key;
1749
1725/** 1750/**
1726 * ntfs_read_inode_mount - special read_inode for mount time use only 1751 * ntfs_read_inode_mount - special read_inode for mount time use only
1727 * @vi: inode to read 1752 * @vi: inode to read
@@ -2148,6 +2173,14 @@ int ntfs_read_inode_mount(struct inode *vi)
2148 ntfs_attr_put_search_ctx(ctx); 2173 ntfs_attr_put_search_ctx(ctx);
2149 ntfs_debug("Done."); 2174 ntfs_debug("Done.");
2150 ntfs_free(m); 2175 ntfs_free(m);
2176
2177 /*
2178 * Split the locking rules of the MFT inode from the
2179 * locking rules of other inodes:
2180 */
2181 lockdep_set_class(&ni->runlist.lock, &mft_ni_runlist_lock_key);
2182 lockdep_set_class(&ni->mrec_lock, &mft_ni_mrec_lock_key);
2183
2151 return 0; 2184 return 0;
2152 2185
2153em_put_err_out: 2186em_put_err_out:
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 0e14acea3f8b..74e0ee8fce72 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1724,6 +1724,14 @@ upcase_failed:
1724 return FALSE; 1724 return FALSE;
1725} 1725}
1726 1726
1727/*
1728 * The lcn and mft bitmap inodes are NTFS-internal inodes with
1729 * their own special locking rules:
1730 */
1731static struct lock_class_key
1732 lcnbmp_runlist_lock_key, lcnbmp_mrec_lock_key,
1733 mftbmp_runlist_lock_key, mftbmp_mrec_lock_key;
1734
1727/** 1735/**
1728 * load_system_files - open the system files using normal functions 1736 * load_system_files - open the system files using normal functions
1729 * @vol: ntfs super block describing device whose system files to load 1737 * @vol: ntfs super block describing device whose system files to load
@@ -1780,6 +1788,10 @@ static BOOL load_system_files(ntfs_volume *vol)
1780 ntfs_error(sb, "Failed to load $MFT/$BITMAP attribute."); 1788 ntfs_error(sb, "Failed to load $MFT/$BITMAP attribute.");
1781 goto iput_mirr_err_out; 1789 goto iput_mirr_err_out;
1782 } 1790 }
1791 lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->runlist.lock,
1792 &mftbmp_runlist_lock_key);
1793 lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->mrec_lock,
1794 &mftbmp_mrec_lock_key);
1783 /* Read upcase table and setup @vol->upcase and @vol->upcase_len. */ 1795 /* Read upcase table and setup @vol->upcase and @vol->upcase_len. */
1784 if (!load_and_init_upcase(vol)) 1796 if (!load_and_init_upcase(vol))
1785 goto iput_mftbmp_err_out; 1797 goto iput_mftbmp_err_out;
@@ -1802,6 +1814,11 @@ static BOOL load_system_files(ntfs_volume *vol)
1802 iput(vol->lcnbmp_ino); 1814 iput(vol->lcnbmp_ino);
1803 goto bitmap_failed; 1815 goto bitmap_failed;
1804 } 1816 }
1817 lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->runlist.lock,
1818 &lcnbmp_runlist_lock_key);
1819 lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->mrec_lock,
1820 &lcnbmp_mrec_lock_key);
1821
1805 NInoSetSparseDisabled(NTFS_I(vol->lcnbmp_ino)); 1822 NInoSetSparseDisabled(NTFS_I(vol->lcnbmp_ino));
1806 if ((vol->nr_clusters + 7) >> 3 > i_size_read(vol->lcnbmp_ino)) { 1823 if ((vol->nr_clusters + 7) >> 3 > i_size_read(vol->lcnbmp_ino)) {
1807 iput(vol->lcnbmp_ino); 1824 iput(vol->lcnbmp_ino);
@@ -2743,6 +2760,17 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
2743 struct inode *tmp_ino; 2760 struct inode *tmp_ino;
2744 int blocksize, result; 2761 int blocksize, result;
2745 2762
2763 /*
2764 * We do a pretty difficult piece of bootstrap by reading the
2765 * MFT (and other metadata) from disk into memory. We'll only
2766 * release this metadata during umount, so the locking patterns
2767 * observed during bootstrap do not count. So turn off the
2768 * observation of locking patterns (strictly for this context
2769 * only) while mounting NTFS. [The validator is still active
2770 * otherwise, even for this context: it will for example record
2771 * lock class registrations.]
2772 */
2773 lockdep_off();
2746 ntfs_debug("Entering."); 2774 ntfs_debug("Entering.");
2747#ifndef NTFS_RW 2775#ifndef NTFS_RW
2748 sb->s_flags |= MS_RDONLY; 2776 sb->s_flags |= MS_RDONLY;
@@ -2754,6 +2782,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
2754 if (!silent) 2782 if (!silent)
2755 ntfs_error(sb, "Allocation of NTFS volume structure " 2783 ntfs_error(sb, "Allocation of NTFS volume structure "
2756 "failed. Aborting mount..."); 2784 "failed. Aborting mount...");
2785 lockdep_on();
2757 return -ENOMEM; 2786 return -ENOMEM;
2758 } 2787 }
2759 /* Initialize ntfs_volume structure. */ 2788 /* Initialize ntfs_volume structure. */
@@ -2940,6 +2969,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
2940 mutex_unlock(&ntfs_lock); 2969 mutex_unlock(&ntfs_lock);
2941 sb->s_export_op = &ntfs_export_ops; 2970 sb->s_export_op = &ntfs_export_ops;
2942 lock_kernel(); 2971 lock_kernel();
2972 lockdep_on();
2943 return 0; 2973 return 0;
2944 } 2974 }
2945 ntfs_error(sb, "Failed to allocate root directory."); 2975 ntfs_error(sb, "Failed to allocate root directory.");
@@ -3059,6 +3089,7 @@ err_out_now:
3059 sb->s_fs_info = NULL; 3089 sb->s_fs_info = NULL;
3060 kfree(vol); 3090 kfree(vol);
3061 ntfs_debug("Failed, returning -EINVAL."); 3091 ntfs_debug("Failed, returning -EINVAL.");
3092 lockdep_on();
3062 return -EINVAL; 3093 return -EINVAL;
3063} 3094}
3064 3095
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 28eb3c886034..5567328f1041 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2203,7 +2203,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
2203 size_t towrite = len; 2203 size_t towrite = len;
2204 struct buffer_head tmp_bh, *bh; 2204 struct buffer_head tmp_bh, *bh;
2205 2205
2206 mutex_lock(&inode->i_mutex); 2206 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
2207 while (towrite > 0) { 2207 while (towrite > 0) {
2208 tocopy = sb->s_blocksize - offset < towrite ? 2208 tocopy = sb->s_blocksize - offset < towrite ?
2209 sb->s_blocksize - offset : towrite; 2209 sb->s_blocksize - offset : towrite;
diff --git a/fs/super.c b/fs/super.c
index 9b780c42d845..6d4e8174b6db 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -53,7 +53,7 @@ DEFINE_SPINLOCK(sb_lock);
53 * Allocates and initializes a new &struct super_block. alloc_super() 53 * Allocates and initializes a new &struct super_block. alloc_super()
54 * returns a pointer new superblock or %NULL if allocation had failed. 54 * returns a pointer new superblock or %NULL if allocation had failed.
55 */ 55 */
56static struct super_block *alloc_super(void) 56static struct super_block *alloc_super(struct file_system_type *type)
57{ 57{
58 struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); 58 struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER);
59 static struct super_operations default_op; 59 static struct super_operations default_op;
@@ -72,6 +72,13 @@ static struct super_block *alloc_super(void)
72 INIT_LIST_HEAD(&s->s_inodes); 72 INIT_LIST_HEAD(&s->s_inodes);
73 init_rwsem(&s->s_umount); 73 init_rwsem(&s->s_umount);
74 mutex_init(&s->s_lock); 74 mutex_init(&s->s_lock);
75 lockdep_set_class(&s->s_umount, &type->s_umount_key);
76 /*
77 * The locking rules for s_lock are up to the
78 * filesystem. For example ext3fs has different
79 * lock ordering than usbfs:
80 */
81 lockdep_set_class(&s->s_lock, &type->s_lock_key);
75 down_write(&s->s_umount); 82 down_write(&s->s_umount);
76 s->s_count = S_BIAS; 83 s->s_count = S_BIAS;
77 atomic_set(&s->s_active, 1); 84 atomic_set(&s->s_active, 1);
@@ -295,7 +302,7 @@ retry:
295 } 302 }
296 if (!s) { 303 if (!s) {
297 spin_unlock(&sb_lock); 304 spin_unlock(&sb_lock);
298 s = alloc_super(); 305 s = alloc_super(type);
299 if (!s) 306 if (!s)
300 return ERR_PTR(-ENOMEM); 307 return ERR_PTR(-ENOMEM);
301 goto retry; 308 goto retry;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 19a99726e58d..992ee0b87cc3 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1326,7 +1326,7 @@ static ssize_t ufs_quota_write(struct super_block *sb, int type,
1326 size_t towrite = len; 1326 size_t towrite = len;
1327 struct buffer_head *bh; 1327 struct buffer_head *bh;
1328 1328
1329 mutex_lock(&inode->i_mutex); 1329 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
1330 while (towrite > 0) { 1330 while (towrite > 0) {
1331 tocopy = sb->s_blocksize - offset < towrite ? 1331 tocopy = sb->s_blocksize - offset < towrite ?
1332 sb->s_blocksize - offset : towrite; 1332 sb->s_blocksize - offset : towrite;
diff --git a/include/asm-alpha/rwsem.h b/include/asm-alpha/rwsem.h
index fafdd4f7010a..1570c0b54336 100644
--- a/include/asm-alpha/rwsem.h
+++ b/include/asm-alpha/rwsem.h
@@ -36,20 +36,11 @@ struct rw_semaphore {
36#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 36#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
37 spinlock_t wait_lock; 37 spinlock_t wait_lock;
38 struct list_head wait_list; 38 struct list_head wait_list;
39#if RWSEM_DEBUG
40 int debug;
41#endif
42}; 39};
43 40
44#if RWSEM_DEBUG
45#define __RWSEM_DEBUG_INIT , 0
46#else
47#define __RWSEM_DEBUG_INIT /* */
48#endif
49
50#define __RWSEM_INITIALIZER(name) \ 41#define __RWSEM_INITIALIZER(name) \
51 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ 42 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
52 LIST_HEAD_INIT((name).wait_list) __RWSEM_DEBUG_INIT } 43 LIST_HEAD_INIT((name).wait_list) }
53 44
54#define DECLARE_RWSEM(name) \ 45#define DECLARE_RWSEM(name) \
55 struct rw_semaphore name = __RWSEM_INITIALIZER(name) 46 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
@@ -59,9 +50,6 @@ static inline void init_rwsem(struct rw_semaphore *sem)
59 sem->count = RWSEM_UNLOCKED_VALUE; 50 sem->count = RWSEM_UNLOCKED_VALUE;
60 spin_lock_init(&sem->wait_lock); 51 spin_lock_init(&sem->wait_lock);
61 INIT_LIST_HEAD(&sem->wait_list); 52 INIT_LIST_HEAD(&sem->wait_list);
62#if RWSEM_DEBUG
63 sem->debug = 0;
64#endif
65} 53}
66 54
67static inline void __down_read(struct rw_semaphore *sem) 55static inline void __down_read(struct rw_semaphore *sem)
diff --git a/include/asm-arm/floppy.h b/include/asm-arm/floppy.h
index aa0c8d28d8d9..54b5ae44ed94 100644
--- a/include/asm-arm/floppy.h
+++ b/include/asm-arm/floppy.h
@@ -25,7 +25,7 @@
25 25
26#define fd_inb(port) inb((port)) 26#define fd_inb(port) inb((port))
27#define fd_request_irq() request_irq(IRQ_FLOPPYDISK,floppy_interrupt,\ 27#define fd_request_irq() request_irq(IRQ_FLOPPYDISK,floppy_interrupt,\
28 SA_INTERRUPT,"floppy",NULL) 28 IRQF_DISABLED,"floppy",NULL)
29#define fd_free_irq() free_irq(IRQ_FLOPPYDISK,NULL) 29#define fd_free_irq() free_irq(IRQ_FLOPPYDISK,NULL)
30#define fd_disable_irq() disable_irq(IRQ_FLOPPYDISK) 30#define fd_disable_irq() disable_irq(IRQ_FLOPPYDISK)
31#define fd_enable_irq() enable_irq(IRQ_FLOPPYDISK) 31#define fd_enable_irq() enable_irq(IRQ_FLOPPYDISK)
diff --git a/include/asm-arm/hw_irq.h b/include/asm-arm/hw_irq.h
index f1a08a500604..ea856971989a 100644
--- a/include/asm-arm/hw_irq.h
+++ b/include/asm-arm/hw_irq.h
@@ -6,4 +6,15 @@
6 6
7#include <asm/mach/irq.h> 7#include <asm/mach/irq.h>
8 8
9#if defined(CONFIG_NO_IDLE_HZ)
10# include <asm/dyntick.h>
11# define handle_dynamic_tick(action) \
12 if (!(action->flags & IRQF_TIMER) && system_timer->dyn_tick) { \
13 write_seqlock(&xtime_lock); \
14 if (system_timer->dyn_tick->state & DYN_TICK_ENABLED) \
15 system_timer->dyn_tick->handler(irq, 0, regs); \
16 write_sequnlock(&xtime_lock); \
17 }
18#endif
19
9#endif 20#endif
diff --git a/include/asm-arm/mach/time.h b/include/asm-arm/mach/time.h
index 9f28073559e8..dee0bc336fe8 100644
--- a/include/asm-arm/mach/time.h
+++ b/include/asm-arm/mach/time.h
@@ -69,6 +69,7 @@ extern void timer_tick(struct pt_regs *);
69/* 69/*
70 * Kernel time keeping support. 70 * Kernel time keeping support.
71 */ 71 */
72struct timespec;
72extern int (*set_rtc)(void); 73extern int (*set_rtc)(void);
73extern void save_time_delta(struct timespec *delta, struct timespec *rtc); 74extern void save_time_delta(struct timespec *delta, struct timespec *rtc);
74extern void restore_time_delta(struct timespec *delta, struct timespec *rtc); 75extern void restore_time_delta(struct timespec *delta, struct timespec *rtc);
diff --git a/include/asm-arm/signal.h b/include/asm-arm/signal.h
index ced69161917b..d0fb487aba4f 100644
--- a/include/asm-arm/signal.h
+++ b/include/asm-arm/signal.h
@@ -82,7 +82,6 @@ typedef unsigned long sigset_t;
82 * is running in 26-bit. 82 * is running in 26-bit.
83 * SA_ONSTACK allows alternate signal stacks (see sigaltstack(2)). 83 * SA_ONSTACK allows alternate signal stacks (see sigaltstack(2)).
84 * SA_RESTART flag to get restarting signals (which were the default long ago) 84 * SA_RESTART flag to get restarting signals (which were the default long ago)
85 * SA_INTERRUPT is a no-op, but left due to historical reasons. Use the
86 * SA_NODEFER prevents the current signal from being masked in the handler. 85 * SA_NODEFER prevents the current signal from being masked in the handler.
87 * SA_RESETHAND clears the handler when the signal is delivered. 86 * SA_RESETHAND clears the handler when the signal is delivered.
88 * 87 *
@@ -101,7 +100,6 @@ typedef unsigned long sigset_t;
101 100
102#define SA_NOMASK SA_NODEFER 101#define SA_NOMASK SA_NODEFER
103#define SA_ONESHOT SA_RESETHAND 102#define SA_ONESHOT SA_RESETHAND
104#define SA_INTERRUPT 0x20000000 /* dummy -- ignored */
105 103
106 104
107/* 105/*
@@ -113,10 +111,6 @@ typedef unsigned long sigset_t;
113#define MINSIGSTKSZ 2048 111#define MINSIGSTKSZ 2048
114#define SIGSTKSZ 8192 112#define SIGSTKSZ 8192
115 113
116#ifdef __KERNEL__
117#define SA_TIMER 0x40000000
118#endif
119
120#include <asm-generic/signal.h> 114#include <asm-generic/signal.h>
121 115
122#ifdef __KERNEL__ 116#ifdef __KERNEL__
diff --git a/include/asm-generic/mutex-null.h b/include/asm-generic/mutex-null.h
index 5cf8b7ce0c45..254a126ede5c 100644
--- a/include/asm-generic/mutex-null.h
+++ b/include/asm-generic/mutex-null.h
@@ -10,15 +10,10 @@
10#ifndef _ASM_GENERIC_MUTEX_NULL_H 10#ifndef _ASM_GENERIC_MUTEX_NULL_H
11#define _ASM_GENERIC_MUTEX_NULL_H 11#define _ASM_GENERIC_MUTEX_NULL_H
12 12
13/* extra parameter only needed for mutex debugging: */ 13#define __mutex_fastpath_lock(count, fail_fn) fail_fn(count)
14#ifndef __IP__ 14#define __mutex_fastpath_lock_retval(count, fail_fn) fail_fn(count)
15# define __IP__ 15#define __mutex_fastpath_unlock(count, fail_fn) fail_fn(count)
16#endif 16#define __mutex_fastpath_trylock(count, fail_fn) fail_fn(count)
17 17#define __mutex_slowpath_needs_to_unlock() 1
18#define __mutex_fastpath_lock(count, fail_fn) fail_fn(count __RET_IP__)
19#define __mutex_fastpath_lock_retval(count, fail_fn) fail_fn(count __RET_IP__)
20#define __mutex_fastpath_unlock(count, fail_fn) fail_fn(count __RET_IP__)
21#define __mutex_fastpath_trylock(count, fail_fn) fail_fn(count)
22#define __mutex_slowpath_needs_to_unlock() 1
23 18
24#endif 19#endif
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index c74521157461..e160e04290fb 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -7,6 +7,8 @@
7 7
8extern unsigned long __per_cpu_offset[NR_CPUS]; 8extern unsigned long __per_cpu_offset[NR_CPUS];
9 9
10#define per_cpu_offset(x) (__per_cpu_offset[x])
11
10/* Separate out the type, so (int[3], foo) works. */ 12/* Separate out the type, so (int[3], foo) works. */
11#define DEFINE_PER_CPU(type, name) \ 13#define DEFINE_PER_CPU(type, name) \
12 __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name 14 __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
diff --git a/include/asm-i386/irqflags.h b/include/asm-i386/irqflags.h
new file mode 100644
index 000000000000..e1bdb97c07fa
--- /dev/null
+++ b/include/asm-i386/irqflags.h
@@ -0,0 +1,127 @@
1/*
2 * include/asm-i386/irqflags.h
3 *
4 * IRQ flags handling
5 *
6 * This file gets included from lowlevel asm headers too, to provide
7 * wrapped versions of the local_irq_*() APIs, based on the
8 * raw_local_irq_*() functions from the lowlevel headers.
9 */
10#ifndef _ASM_IRQFLAGS_H
11#define _ASM_IRQFLAGS_H
12
13#ifndef __ASSEMBLY__
14
15static inline unsigned long __raw_local_save_flags(void)
16{
17 unsigned long flags;
18
19 __asm__ __volatile__(
20 "pushfl ; popl %0"
21 : "=g" (flags)
22 : /* no input */
23 );
24
25 return flags;
26}
27
28#define raw_local_save_flags(flags) \
29 do { (flags) = __raw_local_save_flags(); } while (0)
30
31static inline void raw_local_irq_restore(unsigned long flags)
32{
33 __asm__ __volatile__(
34 "pushl %0 ; popfl"
35 : /* no output */
36 :"g" (flags)
37 :"memory", "cc"
38 );
39}
40
41static inline void raw_local_irq_disable(void)
42{
43 __asm__ __volatile__("cli" : : : "memory");
44}
45
46static inline void raw_local_irq_enable(void)
47{
48 __asm__ __volatile__("sti" : : : "memory");
49}
50
51/*
52 * Used in the idle loop; sti takes one instruction cycle
53 * to complete:
54 */
55static inline void raw_safe_halt(void)
56{
57 __asm__ __volatile__("sti; hlt" : : : "memory");
58}
59
60/*
61 * Used when interrupts are already enabled or to
62 * shutdown the processor:
63 */
64static inline void halt(void)
65{
66 __asm__ __volatile__("hlt": : :"memory");
67}
68
69static inline int raw_irqs_disabled_flags(unsigned long flags)
70{
71 return !(flags & (1 << 9));
72}
73
74static inline int raw_irqs_disabled(void)
75{
76 unsigned long flags = __raw_local_save_flags();
77
78 return raw_irqs_disabled_flags(flags);
79}
80
81/*
82 * For spinlocks, etc:
83 */
84static inline unsigned long __raw_local_irq_save(void)
85{
86 unsigned long flags = __raw_local_save_flags();
87
88 raw_local_irq_disable();
89
90 return flags;
91}
92
93#define raw_local_irq_save(flags) \
94 do { (flags) = __raw_local_irq_save(); } while (0)
95
96#endif /* __ASSEMBLY__ */
97
98/*
99 * Do the CPU's IRQ-state tracing from assembly code. We call a
100 * C function, so save all the C-clobbered registers:
101 */
102#ifdef CONFIG_TRACE_IRQFLAGS
103
104# define TRACE_IRQS_ON \
105 pushl %eax; \
106 pushl %ecx; \
107 pushl %edx; \
108 call trace_hardirqs_on; \
109 popl %edx; \
110 popl %ecx; \
111 popl %eax;
112
113# define TRACE_IRQS_OFF \
114 pushl %eax; \
115 pushl %ecx; \
116 pushl %edx; \
117 call trace_hardirqs_off; \
118 popl %edx; \
119 popl %ecx; \
120 popl %eax;
121
122#else
123# define TRACE_IRQS_ON
124# define TRACE_IRQS_OFF
125#endif
126
127#endif
diff --git a/include/asm-i386/rwsem.h b/include/asm-i386/rwsem.h
index be4ab859238e..2f07601562e7 100644
--- a/include/asm-i386/rwsem.h
+++ b/include/asm-i386/rwsem.h
@@ -40,6 +40,7 @@
40 40
41#include <linux/list.h> 41#include <linux/list.h>
42#include <linux/spinlock.h> 42#include <linux/spinlock.h>
43#include <linux/lockdep.h>
43 44
44struct rwsem_waiter; 45struct rwsem_waiter;
45 46
@@ -61,36 +62,34 @@ struct rw_semaphore {
61#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 62#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
62 spinlock_t wait_lock; 63 spinlock_t wait_lock;
63 struct list_head wait_list; 64 struct list_head wait_list;
64#if RWSEM_DEBUG 65#ifdef CONFIG_DEBUG_LOCK_ALLOC
65 int debug; 66 struct lockdep_map dep_map;
66#endif 67#endif
67}; 68};
68 69
69/* 70#ifdef CONFIG_DEBUG_LOCK_ALLOC
70 * initialisation 71# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
71 */
72#if RWSEM_DEBUG
73#define __RWSEM_DEBUG_INIT , 0
74#else 72#else
75#define __RWSEM_DEBUG_INIT /* */ 73# define __RWSEM_DEP_MAP_INIT(lockname)
76#endif 74#endif
77 75
76
78#define __RWSEM_INITIALIZER(name) \ 77#define __RWSEM_INITIALIZER(name) \
79{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \ 78{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \
80 __RWSEM_DEBUG_INIT } 79 __RWSEM_DEP_MAP_INIT(name) }
81 80
82#define DECLARE_RWSEM(name) \ 81#define DECLARE_RWSEM(name) \
83 struct rw_semaphore name = __RWSEM_INITIALIZER(name) 82 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
84 83
85static inline void init_rwsem(struct rw_semaphore *sem) 84extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
86{ 85 struct lock_class_key *key);
87 sem->count = RWSEM_UNLOCKED_VALUE; 86
88 spin_lock_init(&sem->wait_lock); 87#define init_rwsem(sem) \
89 INIT_LIST_HEAD(&sem->wait_list); 88do { \
90#if RWSEM_DEBUG 89 static struct lock_class_key __key; \
91 sem->debug = 0; 90 \
92#endif 91 __init_rwsem((sem), #sem, &__key); \
93} 92} while (0)
94 93
95/* 94/*
96 * lock for reading 95 * lock for reading
@@ -143,7 +142,7 @@ LOCK_PREFIX " cmpxchgl %2,%0\n\t"
143/* 142/*
144 * lock for writing 143 * lock for writing
145 */ 144 */
146static inline void __down_write(struct rw_semaphore *sem) 145static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
147{ 146{
148 int tmp; 147 int tmp;
149 148
@@ -167,6 +166,11 @@ LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the
167 : "memory", "cc"); 166 : "memory", "cc");
168} 167}
169 168
169static inline void __down_write(struct rw_semaphore *sem)
170{
171 __down_write_nested(sem, 0);
172}
173
170/* 174/*
171 * trylock for writing -- returns 1 if successful, 0 if contention 175 * trylock for writing -- returns 1 if successful, 0 if contention
172 */ 176 */
diff --git a/include/asm-i386/spinlock.h b/include/asm-i386/spinlock.h
index 04ba30234c48..87c40f830653 100644
--- a/include/asm-i386/spinlock.h
+++ b/include/asm-i386/spinlock.h
@@ -31,6 +31,11 @@
31 "jmp 1b\n" \ 31 "jmp 1b\n" \
32 "3:\n\t" 32 "3:\n\t"
33 33
34/*
35 * NOTE: there's an irqs-on section here, which normally would have to be
36 * irq-traced, but on CONFIG_TRACE_IRQFLAGS we never use
37 * __raw_spin_lock_string_flags().
38 */
34#define __raw_spin_lock_string_flags \ 39#define __raw_spin_lock_string_flags \
35 "\n1:\t" \ 40 "\n1:\t" \
36 "lock ; decb %0\n\t" \ 41 "lock ; decb %0\n\t" \
@@ -63,6 +68,12 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock)
63 "=m" (lock->slock) : : "memory"); 68 "=m" (lock->slock) : : "memory");
64} 69}
65 70
71/*
72 * It is easier for the lock validator if interrupts are not re-enabled
73 * in the middle of a lock-acquire. This is a performance feature anyway
74 * so we turn it off:
75 */
76#ifndef CONFIG_PROVE_LOCKING
66static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) 77static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
67{ 78{
68 alternative_smp( 79 alternative_smp(
@@ -70,6 +81,7 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long fla
70 __raw_spin_lock_string_up, 81 __raw_spin_lock_string_up,
71 "=m" (lock->slock) : "r" (flags) : "memory"); 82 "=m" (lock->slock) : "r" (flags) : "memory");
72} 83}
84#endif
73 85
74static inline int __raw_spin_trylock(raw_spinlock_t *lock) 86static inline int __raw_spin_trylock(raw_spinlock_t *lock)
75{ 87{
diff --git a/include/asm-i386/system.h b/include/asm-i386/system.h
index cab0180567f9..db398d88b1d9 100644
--- a/include/asm-i386/system.h
+++ b/include/asm-i386/system.h
@@ -456,25 +456,7 @@ static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long l
456 456
457#define set_wmb(var, value) do { var = value; wmb(); } while (0) 457#define set_wmb(var, value) do { var = value; wmb(); } while (0)
458 458
459/* interrupt control.. */ 459#include <linux/irqflags.h>
460#define local_save_flags(x) do { typecheck(unsigned long,x); __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */); } while (0)
461#define local_irq_restore(x) do { typecheck(unsigned long,x); __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc"); } while (0)
462#define local_irq_disable() __asm__ __volatile__("cli": : :"memory")
463#define local_irq_enable() __asm__ __volatile__("sti": : :"memory")
464/* used in the idle loop; sti takes one instruction cycle to complete */
465#define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory")
466/* used when interrupts are already enabled or to shutdown the processor */
467#define halt() __asm__ __volatile__("hlt": : :"memory")
468
469#define irqs_disabled() \
470({ \
471 unsigned long flags; \
472 local_save_flags(flags); \
473 !(flags & (1<<9)); \
474})
475
476/* For spinlocks etc */
477#define local_irq_save(x) __asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory")
478 460
479/* 461/*
480 * disable hlt during certain critical i/o operations 462 * disable hlt during certain critical i/o operations
diff --git a/include/asm-ia64/irq.h b/include/asm-ia64/irq.h
index 8acb00190d5a..79479e2c6966 100644
--- a/include/asm-ia64/irq.h
+++ b/include/asm-ia64/irq.h
@@ -14,8 +14,6 @@
14#define NR_IRQS 256 14#define NR_IRQS 256
15#define NR_IRQ_VECTORS NR_IRQS 15#define NR_IRQ_VECTORS NR_IRQS
16 16
17#define IRQF_PERCPU 0x02000000
18
19static __inline__ int 17static __inline__ int
20irq_canonicalize (int irq) 18irq_canonicalize (int irq)
21{ 19{
diff --git a/include/asm-ia64/percpu.h b/include/asm-ia64/percpu.h
index 24d898b650c5..fbe5cf3ab8dc 100644
--- a/include/asm-ia64/percpu.h
+++ b/include/asm-ia64/percpu.h
@@ -36,6 +36,7 @@
36#ifdef CONFIG_SMP 36#ifdef CONFIG_SMP
37 37
38extern unsigned long __per_cpu_offset[NR_CPUS]; 38extern unsigned long __per_cpu_offset[NR_CPUS];
39#define per_cpu_offset(x) (__per_cpu_offset(x))
39 40
40/* Equal to __per_cpu_offset[smp_processor_id()], but faster to access: */ 41/* Equal to __per_cpu_offset[smp_processor_id()], but faster to access: */
41DECLARE_PER_CPU(unsigned long, local_per_cpu_offset); 42DECLARE_PER_CPU(unsigned long, local_per_cpu_offset);
diff --git a/include/asm-ia64/rwsem.h b/include/asm-ia64/rwsem.h
index 1327c91ea39c..2d1640cc240a 100644
--- a/include/asm-ia64/rwsem.h
+++ b/include/asm-ia64/rwsem.h
@@ -33,9 +33,6 @@ struct rw_semaphore {
33 signed long count; 33 signed long count;
34 spinlock_t wait_lock; 34 spinlock_t wait_lock;
35 struct list_head wait_list; 35 struct list_head wait_list;
36#if RWSEM_DEBUG
37 int debug;
38#endif
39}; 36};
40 37
41#define RWSEM_UNLOCKED_VALUE __IA64_UL_CONST(0x0000000000000000) 38#define RWSEM_UNLOCKED_VALUE __IA64_UL_CONST(0x0000000000000000)
@@ -45,19 +42,9 @@ struct rw_semaphore {
45#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS 42#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
46#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 43#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
47 44
48/*
49 * initialization
50 */
51#if RWSEM_DEBUG
52#define __RWSEM_DEBUG_INIT , 0
53#else
54#define __RWSEM_DEBUG_INIT /* */
55#endif
56
57#define __RWSEM_INITIALIZER(name) \ 45#define __RWSEM_INITIALIZER(name) \
58 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ 46 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
59 LIST_HEAD_INIT((name).wait_list) \ 47 LIST_HEAD_INIT((name).wait_list) }
60 __RWSEM_DEBUG_INIT }
61 48
62#define DECLARE_RWSEM(name) \ 49#define DECLARE_RWSEM(name) \
63 struct rw_semaphore name = __RWSEM_INITIALIZER(name) 50 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
@@ -73,9 +60,6 @@ init_rwsem (struct rw_semaphore *sem)
73 sem->count = RWSEM_UNLOCKED_VALUE; 60 sem->count = RWSEM_UNLOCKED_VALUE;
74 spin_lock_init(&sem->wait_lock); 61 spin_lock_init(&sem->wait_lock);
75 INIT_LIST_HEAD(&sem->wait_list); 62 INIT_LIST_HEAD(&sem->wait_list);
76#if RWSEM_DEBUG
77 sem->debug = 0;
78#endif
79} 63}
80 64
81/* 65/*
diff --git a/include/asm-ia64/thread_info.h b/include/asm-ia64/thread_info.h
index 8bc9869e5765..8adcde0934ca 100644
--- a/include/asm-ia64/thread_info.h
+++ b/include/asm-ia64/thread_info.h
@@ -68,7 +68,7 @@ struct thread_info {
68#define end_of_stack(p) (unsigned long *)((void *)(p) + IA64_RBS_OFFSET) 68#define end_of_stack(p) (unsigned long *)((void *)(p) + IA64_RBS_OFFSET)
69 69
70#define __HAVE_ARCH_TASK_STRUCT_ALLOCATOR 70#define __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
71#define alloc_task_struct() ((task_t *)__get_free_pages(GFP_KERNEL | __GFP_COMP, KERNEL_STACK_SIZE_ORDER)) 71#define alloc_task_struct() ((struct task_struct *)__get_free_pages(GFP_KERNEL | __GFP_COMP, KERNEL_STACK_SIZE_ORDER))
72#define free_task_struct(tsk) free_pages((unsigned long) (tsk), KERNEL_STACK_SIZE_ORDER) 72#define free_task_struct(tsk) free_pages((unsigned long) (tsk), KERNEL_STACK_SIZE_ORDER)
73 73
74#endif /* !__ASSEMBLY */ 74#endif /* !__ASSEMBLY */
diff --git a/include/asm-m32r/system.h b/include/asm-m32r/system.h
index 66c4742f09e7..311cebf44eff 100644
--- a/include/asm-m32r/system.h
+++ b/include/asm-m32r/system.h
@@ -18,7 +18,7 @@
18 * switch_to(prev, next) should switch from task `prev' to `next' 18 * switch_to(prev, next) should switch from task `prev' to `next'
19 * `prev' will never be the same as `next'. 19 * `prev' will never be the same as `next'.
20 * 20 *
21 * `next' and `prev' should be task_t, but it isn't always defined 21 * `next' and `prev' should be struct task_struct, but it isn't always defined
22 */ 22 */
23 23
24#define switch_to(prev, next, last) do { \ 24#define switch_to(prev, next, last) do { \
diff --git a/include/asm-powerpc/irqflags.h b/include/asm-powerpc/irqflags.h
new file mode 100644
index 000000000000..7970cbaeaa54
--- /dev/null
+++ b/include/asm-powerpc/irqflags.h
@@ -0,0 +1,31 @@
1/*
2 * include/asm-powerpc/irqflags.h
3 *
4 * IRQ flags handling
5 *
6 * This file gets included from lowlevel asm headers too, to provide
7 * wrapped versions of the local_irq_*() APIs, based on the
8 * raw_local_irq_*() macros from the lowlevel headers.
9 */
10#ifndef _ASM_IRQFLAGS_H
11#define _ASM_IRQFLAGS_H
12
13/*
14 * Get definitions for raw_local_save_flags(x), etc.
15 */
16#include <asm-powerpc/hw_irq.h>
17
18/*
19 * Do the CPU's IRQ-state tracing from assembly code. We call a
20 * C function, so save all the C-clobbered registers:
21 */
22#ifdef CONFIG_TRACE_IRQFLAGS
23
24#error No support on PowerPC yet for CONFIG_TRACE_IRQFLAGS
25
26#else
27# define TRACE_IRQS_ON
28# define TRACE_IRQS_OFF
29#endif
30
31#endif
diff --git a/include/asm-powerpc/percpu.h b/include/asm-powerpc/percpu.h
index faa1fc703053..2f2e3024fa61 100644
--- a/include/asm-powerpc/percpu.h
+++ b/include/asm-powerpc/percpu.h
@@ -14,6 +14,7 @@
14 14
15#define __per_cpu_offset(cpu) (paca[cpu].data_offset) 15#define __per_cpu_offset(cpu) (paca[cpu].data_offset)
16#define __my_cpu_offset() get_paca()->data_offset 16#define __my_cpu_offset() get_paca()->data_offset
17#define per_cpu_offset(x) (__per_cpu_offset(x))
17 18
18/* Separate out the type, so (int[3], foo) works. */ 19/* Separate out the type, so (int[3], foo) works. */
19#define DEFINE_PER_CPU(type, name) \ 20#define DEFINE_PER_CPU(type, name) \
diff --git a/include/asm-powerpc/rwsem.h b/include/asm-powerpc/rwsem.h
index 2c2fe9647595..e929145e1e46 100644
--- a/include/asm-powerpc/rwsem.h
+++ b/include/asm-powerpc/rwsem.h
@@ -28,24 +28,11 @@ struct rw_semaphore {
28#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 28#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
29 spinlock_t wait_lock; 29 spinlock_t wait_lock;
30 struct list_head wait_list; 30 struct list_head wait_list;
31#if RWSEM_DEBUG
32 int debug;
33#endif
34}; 31};
35 32
36/*
37 * initialisation
38 */
39#if RWSEM_DEBUG
40#define __RWSEM_DEBUG_INIT , 0
41#else
42#define __RWSEM_DEBUG_INIT /* */
43#endif
44
45#define __RWSEM_INITIALIZER(name) \ 33#define __RWSEM_INITIALIZER(name) \
46 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ 34 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
47 LIST_HEAD_INIT((name).wait_list) \ 35 LIST_HEAD_INIT((name).wait_list) }
48 __RWSEM_DEBUG_INIT }
49 36
50#define DECLARE_RWSEM(name) \ 37#define DECLARE_RWSEM(name) \
51 struct rw_semaphore name = __RWSEM_INITIALIZER(name) 38 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
@@ -60,9 +47,6 @@ static inline void init_rwsem(struct rw_semaphore *sem)
60 sem->count = RWSEM_UNLOCKED_VALUE; 47 sem->count = RWSEM_UNLOCKED_VALUE;
61 spin_lock_init(&sem->wait_lock); 48 spin_lock_init(&sem->wait_lock);
62 INIT_LIST_HEAD(&sem->wait_list); 49 INIT_LIST_HEAD(&sem->wait_list);
63#if RWSEM_DEBUG
64 sem->debug = 0;
65#endif
66} 50}
67 51
68/* 52/*
diff --git a/include/asm-s390/irqflags.h b/include/asm-s390/irqflags.h
new file mode 100644
index 000000000000..65f4db627e7a
--- /dev/null
+++ b/include/asm-s390/irqflags.h
@@ -0,0 +1,50 @@
1/*
2 * include/asm-s390/irqflags.h
3 *
4 * Copyright (C) IBM Corp. 2006
5 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
6 */
7
8#ifndef __ASM_IRQFLAGS_H
9#define __ASM_IRQFLAGS_H
10
11#ifdef __KERNEL__
12
13/* interrupt control.. */
14#define raw_local_irq_enable() ({ \
15 unsigned long __dummy; \
16 __asm__ __volatile__ ( \
17 "stosm 0(%1),0x03" \
18 : "=m" (__dummy) : "a" (&__dummy) : "memory" ); \
19 })
20
21#define raw_local_irq_disable() ({ \
22 unsigned long __flags; \
23 __asm__ __volatile__ ( \
24 "stnsm 0(%1),0xfc" : "=m" (__flags) : "a" (&__flags) ); \
25 __flags; \
26 })
27
28#define raw_local_save_flags(x) \
29 __asm__ __volatile__("stosm 0(%1),0" : "=m" (x) : "a" (&x), "m" (x) )
30
31#define raw_local_irq_restore(x) \
32 __asm__ __volatile__("ssm 0(%0)" : : "a" (&x), "m" (x) : "memory")
33
34#define raw_irqs_disabled() \
35({ \
36 unsigned long flags; \
37 local_save_flags(flags); \
38 !((flags >> __FLAG_SHIFT) & 3); \
39})
40
41static inline int raw_irqs_disabled_flags(unsigned long flags)
42{
43 return !((flags >> __FLAG_SHIFT) & 3);
44}
45
46/* For spinlocks etc */
47#define raw_local_irq_save(x) ((x) = raw_local_irq_disable())
48
49#endif /* __KERNEL__ */
50#endif /* __ASM_IRQFLAGS_H */
diff --git a/include/asm-s390/percpu.h b/include/asm-s390/percpu.h
index d9a8cca9b653..28b3517e787c 100644
--- a/include/asm-s390/percpu.h
+++ b/include/asm-s390/percpu.h
@@ -42,6 +42,7 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
42#define __get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset) 42#define __get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset)
43#define __raw_get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset) 43#define __raw_get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset)
44#define per_cpu(var,cpu) __reloc_hide(var,__per_cpu_offset[cpu]) 44#define per_cpu(var,cpu) __reloc_hide(var,__per_cpu_offset[cpu])
45#define per_cpu_offset(x) (__per_cpu_offset[x])
45 46
46/* A macro to avoid #include hell... */ 47/* A macro to avoid #include hell... */
47#define percpu_modcopy(pcpudst, src, size) \ 48#define percpu_modcopy(pcpudst, src, size) \
diff --git a/include/asm-s390/rwsem.h b/include/asm-s390/rwsem.h
index 0422a085dd56..13ec16965150 100644
--- a/include/asm-s390/rwsem.h
+++ b/include/asm-s390/rwsem.h
@@ -61,6 +61,9 @@ struct rw_semaphore {
61 signed long count; 61 signed long count;
62 spinlock_t wait_lock; 62 spinlock_t wait_lock;
63 struct list_head wait_list; 63 struct list_head wait_list;
64#ifdef CONFIG_DEBUG_LOCK_ALLOC
65 struct lockdep_map dep_map;
66#endif
64}; 67};
65 68
66#ifndef __s390x__ 69#ifndef __s390x__
@@ -80,8 +83,16 @@ struct rw_semaphore {
80/* 83/*
81 * initialisation 84 * initialisation
82 */ 85 */
86
87#ifdef CONFIG_DEBUG_LOCK_ALLOC
88# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
89#else
90# define __RWSEM_DEP_MAP_INIT(lockname)
91#endif
92
83#define __RWSEM_INITIALIZER(name) \ 93#define __RWSEM_INITIALIZER(name) \
84{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) } 94{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \
95 __RWSEM_DEP_MAP_INIT(name) }
85 96
86#define DECLARE_RWSEM(name) \ 97#define DECLARE_RWSEM(name) \
87 struct rw_semaphore name = __RWSEM_INITIALIZER(name) 98 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
@@ -93,6 +104,17 @@ static inline void init_rwsem(struct rw_semaphore *sem)
93 INIT_LIST_HEAD(&sem->wait_list); 104 INIT_LIST_HEAD(&sem->wait_list);
94} 105}
95 106
107extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
108 struct lock_class_key *key);
109
110#define init_rwsem(sem) \
111do { \
112 static struct lock_class_key __key; \
113 \
114 __init_rwsem((sem), #sem, &__key); \
115} while (0)
116
117
96/* 118/*
97 * lock for reading 119 * lock for reading
98 */ 120 */
@@ -155,7 +177,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
155/* 177/*
156 * lock for writing 178 * lock for writing
157 */ 179 */
158static inline void __down_write(struct rw_semaphore *sem) 180static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
159{ 181{
160 signed long old, new, tmp; 182 signed long old, new, tmp;
161 183
@@ -181,6 +203,11 @@ static inline void __down_write(struct rw_semaphore *sem)
181 rwsem_down_write_failed(sem); 203 rwsem_down_write_failed(sem);
182} 204}
183 205
206static inline void __down_write(struct rw_semaphore *sem)
207{
208 __down_write_nested(sem, 0);
209}
210
184/* 211/*
185 * trylock for writing -- returns 1 if successful, 0 if contention 212 * trylock for writing -- returns 1 if successful, 0 if contention
186 */ 213 */
diff --git a/include/asm-s390/semaphore.h b/include/asm-s390/semaphore.h
index 702cf436698c..32cdc69f39f4 100644
--- a/include/asm-s390/semaphore.h
+++ b/include/asm-s390/semaphore.h
@@ -37,7 +37,8 @@ struct semaphore {
37 37
38static inline void sema_init (struct semaphore *sem, int val) 38static inline void sema_init (struct semaphore *sem, int val)
39{ 39{
40 *sem = (struct semaphore) __SEMAPHORE_INITIALIZER((*sem),val); 40 atomic_set(&sem->count, val);
41 init_waitqueue_head(&sem->wait);
41} 42}
42 43
43static inline void init_MUTEX (struct semaphore *sem) 44static inline void init_MUTEX (struct semaphore *sem)
diff --git a/include/asm-s390/system.h b/include/asm-s390/system.h
index 71a0732cd518..9ab186ffde23 100644
--- a/include/asm-s390/system.h
+++ b/include/asm-s390/system.h
@@ -301,34 +301,6 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
301#define set_mb(var, value) do { var = value; mb(); } while (0) 301#define set_mb(var, value) do { var = value; mb(); } while (0)
302#define set_wmb(var, value) do { var = value; wmb(); } while (0) 302#define set_wmb(var, value) do { var = value; wmb(); } while (0)
303 303
304/* interrupt control.. */
305#define local_irq_enable() ({ \
306 unsigned long __dummy; \
307 __asm__ __volatile__ ( \
308 "stosm 0(%1),0x03" \
309 : "=m" (__dummy) : "a" (&__dummy) : "memory" ); \
310 })
311
312#define local_irq_disable() ({ \
313 unsigned long __flags; \
314 __asm__ __volatile__ ( \
315 "stnsm 0(%1),0xfc" : "=m" (__flags) : "a" (&__flags) ); \
316 __flags; \
317 })
318
319#define local_save_flags(x) \
320 __asm__ __volatile__("stosm 0(%1),0" : "=m" (x) : "a" (&x), "m" (x) )
321
322#define local_irq_restore(x) \
323 __asm__ __volatile__("ssm 0(%0)" : : "a" (&x), "m" (x) : "memory")
324
325#define irqs_disabled() \
326({ \
327 unsigned long flags; \
328 local_save_flags(flags); \
329 !((flags >> __FLAG_SHIFT) & 3); \
330})
331
332#ifdef __s390x__ 304#ifdef __s390x__
333 305
334#define __ctl_load(array, low, high) ({ \ 306#define __ctl_load(array, low, high) ({ \
@@ -442,8 +414,7 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
442 }) 414 })
443#endif /* __s390x__ */ 415#endif /* __s390x__ */
444 416
445/* For spinlocks etc */ 417#include <linux/irqflags.h>
446#define local_irq_save(x) ((x) = local_irq_disable())
447 418
448/* 419/*
449 * Use to set psw mask except for the first byte which 420 * Use to set psw mask except for the first byte which
@@ -482,4 +453,3 @@ extern void (*_machine_power_off)(void);
482#endif /* __KERNEL__ */ 453#endif /* __KERNEL__ */
483 454
484#endif 455#endif
485
diff --git a/include/asm-sh/rwsem.h b/include/asm-sh/rwsem.h
index 0262d3d1e5e0..9d2aea5e8488 100644
--- a/include/asm-sh/rwsem.h
+++ b/include/asm-sh/rwsem.h
@@ -25,24 +25,11 @@ struct rw_semaphore {
25#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 25#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
26 spinlock_t wait_lock; 26 spinlock_t wait_lock;
27 struct list_head wait_list; 27 struct list_head wait_list;
28#if RWSEM_DEBUG
29 int debug;
30#endif
31}; 28};
32 29
33/*
34 * initialisation
35 */
36#if RWSEM_DEBUG
37#define __RWSEM_DEBUG_INIT , 0
38#else
39#define __RWSEM_DEBUG_INIT /* */
40#endif
41
42#define __RWSEM_INITIALIZER(name) \ 30#define __RWSEM_INITIALIZER(name) \
43 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ 31 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
44 LIST_HEAD_INIT((name).wait_list) \ 32 LIST_HEAD_INIT((name).wait_list) }
45 __RWSEM_DEBUG_INIT }
46 33
47#define DECLARE_RWSEM(name) \ 34#define DECLARE_RWSEM(name) \
48 struct rw_semaphore name = __RWSEM_INITIALIZER(name) 35 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
@@ -57,9 +44,6 @@ static inline void init_rwsem(struct rw_semaphore *sem)
57 sem->count = RWSEM_UNLOCKED_VALUE; 44 sem->count = RWSEM_UNLOCKED_VALUE;
58 spin_lock_init(&sem->wait_lock); 45 spin_lock_init(&sem->wait_lock);
59 INIT_LIST_HEAD(&sem->wait_list); 46 INIT_LIST_HEAD(&sem->wait_list);
60#if RWSEM_DEBUG
61 sem->debug = 0;
62#endif
63} 47}
64 48
65/* 49/*
diff --git a/include/asm-sh/system.h b/include/asm-sh/system.h
index b752e5cbb830..ce2e60664a86 100644
--- a/include/asm-sh/system.h
+++ b/include/asm-sh/system.h
@@ -12,7 +12,7 @@
12 */ 12 */
13 13
14#define switch_to(prev, next, last) do { \ 14#define switch_to(prev, next, last) do { \
15 task_t *__last; \ 15 struct task_struct *__last; \
16 register unsigned long *__ts1 __asm__ ("r1") = &prev->thread.sp; \ 16 register unsigned long *__ts1 __asm__ ("r1") = &prev->thread.sp; \
17 register unsigned long *__ts2 __asm__ ("r2") = &prev->thread.pc; \ 17 register unsigned long *__ts2 __asm__ ("r2") = &prev->thread.pc; \
18 register unsigned long *__ts4 __asm__ ("r4") = (unsigned long *)prev; \ 18 register unsigned long *__ts4 __asm__ ("r4") = (unsigned long *)prev; \
diff --git a/include/asm-sparc64/percpu.h b/include/asm-sparc64/percpu.h
index a6ece06b83db..ced8cbde046d 100644
--- a/include/asm-sparc64/percpu.h
+++ b/include/asm-sparc64/percpu.h
@@ -11,6 +11,7 @@ extern unsigned long __per_cpu_base;
11extern unsigned long __per_cpu_shift; 11extern unsigned long __per_cpu_shift;
12#define __per_cpu_offset(__cpu) \ 12#define __per_cpu_offset(__cpu) \
13 (__per_cpu_base + ((unsigned long)(__cpu) << __per_cpu_shift)) 13 (__per_cpu_base + ((unsigned long)(__cpu) << __per_cpu_shift))
14#define per_cpu_offset(x) (__per_cpu_offset(x))
14 15
15/* Separate out the type, so (int[3], foo) works. */ 16/* Separate out the type, so (int[3], foo) works. */
16#define DEFINE_PER_CPU(type, name) \ 17#define DEFINE_PER_CPU(type, name) \
diff --git a/include/asm-x86_64/irqflags.h b/include/asm-x86_64/irqflags.h
new file mode 100644
index 000000000000..cce6937e87c0
--- /dev/null
+++ b/include/asm-x86_64/irqflags.h
@@ -0,0 +1,141 @@
1/*
2 * include/asm-x86_64/irqflags.h
3 *
4 * IRQ flags handling
5 *
6 * This file gets included from lowlevel asm headers too, to provide
7 * wrapped versions of the local_irq_*() APIs, based on the
8 * raw_local_irq_*() functions from the lowlevel headers.
9 */
10#ifndef _ASM_IRQFLAGS_H
11#define _ASM_IRQFLAGS_H
12
13#ifndef __ASSEMBLY__
14/*
15 * Interrupt control:
16 */
17
18static inline unsigned long __raw_local_save_flags(void)
19{
20 unsigned long flags;
21
22 __asm__ __volatile__(
23 "# __raw_save_flags\n\t"
24 "pushfq ; popq %q0"
25 : "=g" (flags)
26 : /* no input */
27 : "memory"
28 );
29
30 return flags;
31}
32
33#define raw_local_save_flags(flags) \
34 do { (flags) = __raw_local_save_flags(); } while (0)
35
36static inline void raw_local_irq_restore(unsigned long flags)
37{
38 __asm__ __volatile__(
39 "pushq %0 ; popfq"
40 : /* no output */
41 :"g" (flags)
42 :"memory", "cc"
43 );
44}
45
46#ifdef CONFIG_X86_VSMP
47
48/*
49 * Interrupt control for the VSMP architecture:
50 */
51
52static inline void raw_local_irq_disable(void)
53{
54 unsigned long flags = __raw_local_save_flags();
55
56 raw_local_irq_restore((flags & ~(1 << 9)) | (1 << 18));
57}
58
59static inline void raw_local_irq_enable(void)
60{
61 unsigned long flags = __raw_local_save_flags();
62
63 raw_local_irq_restore((flags | (1 << 9)) & ~(1 << 18));
64}
65
66static inline int raw_irqs_disabled_flags(unsigned long flags)
67{
68 return !(flags & (1<<9)) || (flags & (1 << 18));
69}
70
71#else /* CONFIG_X86_VSMP */
72
73static inline void raw_local_irq_disable(void)
74{
75 __asm__ __volatile__("cli" : : : "memory");
76}
77
78static inline void raw_local_irq_enable(void)
79{
80 __asm__ __volatile__("sti" : : : "memory");
81}
82
83static inline int raw_irqs_disabled_flags(unsigned long flags)
84{
85 return !(flags & (1 << 9));
86}
87
88#endif
89
90/*
91 * For spinlocks, etc.:
92 */
93
94static inline unsigned long __raw_local_irq_save(void)
95{
96 unsigned long flags = __raw_local_save_flags();
97
98 raw_local_irq_disable();
99
100 return flags;
101}
102
103#define raw_local_irq_save(flags) \
104 do { (flags) = __raw_local_irq_save(); } while (0)
105
106static inline int raw_irqs_disabled(void)
107{
108 unsigned long flags = __raw_local_save_flags();
109
110 return raw_irqs_disabled_flags(flags);
111}
112
113/*
114 * Used in the idle loop; sti takes one instruction cycle
115 * to complete:
116 */
117static inline void raw_safe_halt(void)
118{
119 __asm__ __volatile__("sti; hlt" : : : "memory");
120}
121
122/*
123 * Used when interrupts are already enabled or to
124 * shutdown the processor:
125 */
126static inline void halt(void)
127{
128 __asm__ __volatile__("hlt": : :"memory");
129}
130
131#else /* __ASSEMBLY__: */
132# ifdef CONFIG_TRACE_IRQFLAGS
133# define TRACE_IRQS_ON call trace_hardirqs_on_thunk
134# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk
135# else
136# define TRACE_IRQS_ON
137# define TRACE_IRQS_OFF
138# endif
139#endif
140
141#endif
diff --git a/include/asm-x86_64/kdebug.h b/include/asm-x86_64/kdebug.h
index cd52c7f33bca..2b0c088e2957 100644
--- a/include/asm-x86_64/kdebug.h
+++ b/include/asm-x86_64/kdebug.h
@@ -49,7 +49,7 @@ static inline int notify_die(enum die_val val, const char *str,
49 return atomic_notifier_call_chain(&die_chain, val, &args); 49 return atomic_notifier_call_chain(&die_chain, val, &args);
50} 50}
51 51
52extern int printk_address(unsigned long address); 52extern void printk_address(unsigned long address);
53extern void die(const char *,struct pt_regs *,long); 53extern void die(const char *,struct pt_regs *,long);
54extern void __die(const char *,struct pt_regs *,long); 54extern void __die(const char *,struct pt_regs *,long);
55extern void show_registers(struct pt_regs *regs); 55extern void show_registers(struct pt_regs *regs);
diff --git a/include/asm-x86_64/percpu.h b/include/asm-x86_64/percpu.h
index 549eb929b2c0..08dd9f9dda81 100644
--- a/include/asm-x86_64/percpu.h
+++ b/include/asm-x86_64/percpu.h
@@ -14,6 +14,8 @@
14#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset) 14#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
15#define __my_cpu_offset() read_pda(data_offset) 15#define __my_cpu_offset() read_pda(data_offset)
16 16
17#define per_cpu_offset(x) (__per_cpu_offset(x))
18
17/* Separate out the type, so (int[3], foo) works. */ 19/* Separate out the type, so (int[3], foo) works. */
18#define DEFINE_PER_CPU(type, name) \ 20#define DEFINE_PER_CPU(type, name) \
19 __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name 21 __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
diff --git a/include/asm-x86_64/system.h b/include/asm-x86_64/system.h
index 68e559f3631c..f67f2873a922 100644
--- a/include/asm-x86_64/system.h
+++ b/include/asm-x86_64/system.h
@@ -244,43 +244,7 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
244 244
245#define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0) 245#define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0)
246 246
247/* interrupt control.. */ 247#include <linux/irqflags.h>
248#define local_save_flags(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# save_flags \n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory"); } while (0)
249#define local_irq_restore(x) __asm__ __volatile__("# restore_flags \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc")
250
251#ifdef CONFIG_X86_VSMP
252/* Interrupt control for VSMP architecture */
253#define local_irq_disable() do { unsigned long flags; local_save_flags(flags); local_irq_restore((flags & ~(1 << 9)) | (1 << 18)); } while (0)
254#define local_irq_enable() do { unsigned long flags; local_save_flags(flags); local_irq_restore((flags | (1 << 9)) & ~(1 << 18)); } while (0)
255
256#define irqs_disabled() \
257({ \
258 unsigned long flags; \
259 local_save_flags(flags); \
260 (flags & (1<<18)) || !(flags & (1<<9)); \
261})
262
263/* For spinlocks etc */
264#define local_irq_save(x) do { local_save_flags(x); local_irq_restore((x & ~(1 << 9)) | (1 << 18)); } while (0)
265#else /* CONFIG_X86_VSMP */
266#define local_irq_disable() __asm__ __volatile__("cli": : :"memory")
267#define local_irq_enable() __asm__ __volatile__("sti": : :"memory")
268
269#define irqs_disabled() \
270({ \
271 unsigned long flags; \
272 local_save_flags(flags); \
273 !(flags & (1<<9)); \
274})
275
276/* For spinlocks etc */
277#define local_irq_save(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# local_irq_save \n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0)
278#endif
279
280/* used in the idle loop; sti takes one instruction cycle to complete */
281#define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory")
282/* used when interrupts are already enabled or to shutdown the processor */
283#define halt() __asm__ __volatile__("hlt": : :"memory")
284 248
285void cpu_idle_wait(void); 249void cpu_idle_wait(void);
286 250
diff --git a/include/asm-xtensa/rwsem.h b/include/asm-xtensa/rwsem.h
index abcd86dc5ab9..0aad3a587551 100644
--- a/include/asm-xtensa/rwsem.h
+++ b/include/asm-xtensa/rwsem.h
@@ -31,24 +31,11 @@ struct rw_semaphore {
31#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 31#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
32 spinlock_t wait_lock; 32 spinlock_t wait_lock;
33 struct list_head wait_list; 33 struct list_head wait_list;
34#if RWSEM_DEBUG
35 int debug;
36#endif
37}; 34};
38 35
39/*
40 * initialisation
41 */
42#if RWSEM_DEBUG
43#define __RWSEM_DEBUG_INIT , 0
44#else
45#define __RWSEM_DEBUG_INIT /* */
46#endif
47
48#define __RWSEM_INITIALIZER(name) \ 36#define __RWSEM_INITIALIZER(name) \
49 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ 37 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
50 LIST_HEAD_INIT((name).wait_list) \ 38 LIST_HEAD_INIT((name).wait_list) }
51 __RWSEM_DEBUG_INIT }
52 39
53#define DECLARE_RWSEM(name) \ 40#define DECLARE_RWSEM(name) \
54 struct rw_semaphore name = __RWSEM_INITIALIZER(name) 41 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
@@ -63,9 +50,6 @@ static inline void init_rwsem(struct rw_semaphore *sem)
63 sem->count = RWSEM_UNLOCKED_VALUE; 50 sem->count = RWSEM_UNLOCKED_VALUE;
64 spin_lock_init(&sem->wait_lock); 51 spin_lock_init(&sem->wait_lock);
65 INIT_LIST_HEAD(&sem->wait_list); 52 INIT_LIST_HEAD(&sem->wait_list);
66#if RWSEM_DEBUG
67 sem->debug = 0;
68#endif
69} 53}
70 54
71/* 55/*
diff --git a/include/linux/completion.h b/include/linux/completion.h
index 90663ad217f9..251c41e3ddd5 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -21,6 +21,18 @@ struct completion {
21#define DECLARE_COMPLETION(work) \ 21#define DECLARE_COMPLETION(work) \
22 struct completion work = COMPLETION_INITIALIZER(work) 22 struct completion work = COMPLETION_INITIALIZER(work)
23 23
24/*
25 * Lockdep needs to run a non-constant initializer for on-stack
26 * completions - so we use the _ONSTACK() variant for those that
27 * are on the kernel stack:
28 */
29#ifdef CONFIG_LOCKDEP
30# define DECLARE_COMPLETION_ONSTACK(work) \
31 struct completion work = ({ init_completion(&work); work; })
32#else
33# define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work)
34#endif
35
24static inline void init_completion(struct completion *x) 36static inline void init_completion(struct completion *x)
25{ 37{
26 x->done = 0; 38 x->done = 0;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 0dd1610a94a9..471781ffeab1 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -114,6 +114,18 @@ struct dentry {
114 unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */ 114 unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */
115}; 115};
116 116
117/*
118 * dentry->d_lock spinlock nesting subclasses:
119 *
120 * 0: normal
121 * 1: nested
122 */
123enum dentry_d_lock_class
124{
125 DENTRY_D_LOCK_NORMAL, /* implicitly used by plain spin_lock() APIs. */
126 DENTRY_D_LOCK_NESTED
127};
128
117struct dentry_operations { 129struct dentry_operations {
118 int (*d_revalidate)(struct dentry *, struct nameidata *); 130 int (*d_revalidate)(struct dentry *, struct nameidata *);
119 int (*d_hash) (struct dentry *, struct qstr *); 131 int (*d_hash) (struct dentry *, struct qstr *);
diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
new file mode 100644
index 000000000000..6a7047851e48
--- /dev/null
+++ b/include/linux/debug_locks.h
@@ -0,0 +1,69 @@
1#ifndef __LINUX_DEBUG_LOCKING_H
2#define __LINUX_DEBUG_LOCKING_H
3
4extern int debug_locks;
5extern int debug_locks_silent;
6
7/*
8 * Generic 'turn off all lock debugging' function:
9 */
10extern int debug_locks_off(void);
11
12/*
13 * In the debug case we carry the caller's instruction pointer into
14 * other functions, but we dont want the function argument overhead
15 * in the nondebug case - hence these macros:
16 */
17#define _RET_IP_ (unsigned long)__builtin_return_address(0)
18#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; })
19
20#define DEBUG_LOCKS_WARN_ON(c) \
21({ \
22 int __ret = 0; \
23 \
24 if (unlikely(c)) { \
25 if (debug_locks_off()) \
26 WARN_ON(1); \
27 __ret = 1; \
28 } \
29 __ret; \
30})
31
32#ifdef CONFIG_SMP
33# define SMP_DEBUG_LOCKS_WARN_ON(c) DEBUG_LOCKS_WARN_ON(c)
34#else
35# define SMP_DEBUG_LOCKS_WARN_ON(c) do { } while (0)
36#endif
37
38#ifdef CONFIG_DEBUG_LOCKING_API_SELFTESTS
39 extern void locking_selftest(void);
40#else
41# define locking_selftest() do { } while (0)
42#endif
43
44#ifdef CONFIG_LOCKDEP
45extern void debug_show_all_locks(void);
46extern void debug_show_held_locks(struct task_struct *task);
47extern void debug_check_no_locks_freed(const void *from, unsigned long len);
48extern void debug_check_no_locks_held(struct task_struct *task);
49#else
50static inline void debug_show_all_locks(void)
51{
52}
53
54static inline void debug_show_held_locks(struct task_struct *task)
55{
56}
57
58static inline void
59debug_check_no_locks_freed(const void *from, unsigned long len)
60{
61}
62
63static inline void
64debug_check_no_locks_held(struct task_struct *task)
65{
66}
67#endif
68
69#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e04a5cfe874f..134b32068246 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -436,6 +436,21 @@ struct block_device {
436}; 436};
437 437
438/* 438/*
439 * bdev->bd_mutex nesting subclasses for the lock validator:
440 *
441 * 0: normal
442 * 1: 'whole'
443 * 2: 'partition'
444 */
445enum bdev_bd_mutex_lock_class
446{
447 BD_MUTEX_NORMAL,
448 BD_MUTEX_WHOLE,
449 BD_MUTEX_PARTITION
450};
451
452
453/*
439 * Radix-tree tags, for tagging dirty and writeback pages within the pagecache 454 * Radix-tree tags, for tagging dirty and writeback pages within the pagecache
440 * radix trees 455 * radix trees
441 */ 456 */
@@ -543,6 +558,25 @@ struct inode {
543}; 558};
544 559
545/* 560/*
561 * inode->i_mutex nesting subclasses for the lock validator:
562 *
563 * 0: the object of the current VFS operation
564 * 1: parent
565 * 2: child/target
566 * 3: quota file
567 *
568 * The locking order between these classes is
569 * parent -> child -> normal -> quota
570 */
571enum inode_i_mutex_lock_class
572{
573 I_MUTEX_NORMAL,
574 I_MUTEX_PARENT,
575 I_MUTEX_CHILD,
576 I_MUTEX_QUOTA
577};
578
579/*
546 * NOTE: in a 32bit arch with a preemptable kernel and 580 * NOTE: in a 32bit arch with a preemptable kernel and
547 * an UP compile the i_size_read/write must be atomic 581 * an UP compile the i_size_read/write must be atomic
548 * with respect to the local cpu (unlike with preempt disabled), 582 * with respect to the local cpu (unlike with preempt disabled),
@@ -1276,6 +1310,8 @@ struct file_system_type {
1276 struct module *owner; 1310 struct module *owner;
1277 struct file_system_type * next; 1311 struct file_system_type * next;
1278 struct list_head fs_supers; 1312 struct list_head fs_supers;
1313 struct lock_class_key s_lock_key;
1314 struct lock_class_key s_umount_key;
1279}; 1315};
1280 1316
1281extern int get_sb_bdev(struct file_system_type *fs_type, 1317extern int get_sb_bdev(struct file_system_type *fs_type,
@@ -1404,6 +1440,7 @@ extern void bd_set_size(struct block_device *, loff_t size);
1404extern void bd_forget(struct inode *inode); 1440extern void bd_forget(struct inode *inode);
1405extern void bdput(struct block_device *); 1441extern void bdput(struct block_device *);
1406extern struct block_device *open_by_devnum(dev_t, unsigned); 1442extern struct block_device *open_by_devnum(dev_t, unsigned);
1443extern struct block_device *open_partition_by_devnum(dev_t, unsigned);
1407extern const struct file_operations def_blk_fops; 1444extern const struct file_operations def_blk_fops;
1408extern const struct address_space_operations def_blk_aops; 1445extern const struct address_space_operations def_blk_aops;
1409extern const struct file_operations def_chr_fops; 1446extern const struct file_operations def_chr_fops;
@@ -1414,6 +1451,7 @@ extern int blkdev_ioctl(struct inode *, struct file *, unsigned, unsigned long);
1414extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); 1451extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
1415extern int blkdev_get(struct block_device *, mode_t, unsigned); 1452extern int blkdev_get(struct block_device *, mode_t, unsigned);
1416extern int blkdev_put(struct block_device *); 1453extern int blkdev_put(struct block_device *);
1454extern int blkdev_put_partition(struct block_device *);
1417extern int bd_claim(struct block_device *, void *); 1455extern int bd_claim(struct block_device *, void *);
1418extern void bd_release(struct block_device *); 1456extern void bd_release(struct block_device *);
1419#ifdef CONFIG_SYSFS 1457#ifdef CONFIG_SYSFS
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 114ae583cca9..50d8b5744cf6 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -3,6 +3,7 @@
3 3
4#include <linux/preempt.h> 4#include <linux/preempt.h>
5#include <linux/smp_lock.h> 5#include <linux/smp_lock.h>
6#include <linux/lockdep.h>
6#include <asm/hardirq.h> 7#include <asm/hardirq.h>
7#include <asm/system.h> 8#include <asm/system.h>
8 9
@@ -86,9 +87,6 @@ extern void synchronize_irq(unsigned int irq);
86# define synchronize_irq(irq) barrier() 87# define synchronize_irq(irq) barrier()
87#endif 88#endif
88 89
89#define nmi_enter() irq_enter()
90#define nmi_exit() sub_preempt_count(HARDIRQ_OFFSET)
91
92struct task_struct; 90struct task_struct;
93 91
94#ifndef CONFIG_VIRT_CPU_ACCOUNTING 92#ifndef CONFIG_VIRT_CPU_ACCOUNTING
@@ -97,12 +95,35 @@ static inline void account_system_vtime(struct task_struct *tsk)
97} 95}
98#endif 96#endif
99 97
98/*
99 * It is safe to do non-atomic ops on ->hardirq_context,
100 * because NMI handlers may not preempt and the ops are
101 * always balanced, so the interrupted value of ->hardirq_context
102 * will always be restored.
103 */
100#define irq_enter() \ 104#define irq_enter() \
101 do { \ 105 do { \
102 account_system_vtime(current); \ 106 account_system_vtime(current); \
103 add_preempt_count(HARDIRQ_OFFSET); \ 107 add_preempt_count(HARDIRQ_OFFSET); \
108 trace_hardirq_enter(); \
109 } while (0)
110
111/*
112 * Exit irq context without processing softirqs:
113 */
114#define __irq_exit() \
115 do { \
116 trace_hardirq_exit(); \
117 account_system_vtime(current); \
118 sub_preempt_count(HARDIRQ_OFFSET); \
104 } while (0) 119 } while (0)
105 120
121/*
122 * Exit irq context and process softirqs if needed:
123 */
106extern void irq_exit(void); 124extern void irq_exit(void);
107 125
126#define nmi_enter() do { lockdep_off(); irq_enter(); } while (0)
127#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0)
128
108#endif /* LINUX_HARDIRQ_H */ 129#endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 07d7305f131e..e4bccbcc2750 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -91,6 +91,7 @@ struct hrtimer_base {
91 ktime_t (*get_softirq_time)(void); 91 ktime_t (*get_softirq_time)(void);
92 struct hrtimer *curr_timer; 92 struct hrtimer *curr_timer;
93 ktime_t softirq_time; 93 ktime_t softirq_time;
94 struct lock_class_key lock_key;
94}; 95};
95 96
96/* 97/*
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 285316c836b5..dc7abef10965 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1359,7 +1359,7 @@ extern struct semaphore ide_cfg_sem;
1359 * ide_drive_t->hwif: constant, no locking 1359 * ide_drive_t->hwif: constant, no locking
1360 */ 1360 */
1361 1361
1362#define local_irq_set(flags) do { local_save_flags((flags)); local_irq_enable(); } while (0) 1362#define local_irq_set(flags) do { local_save_flags((flags)); local_irq_enable_in_hardirq(); } while (0)
1363 1363
1364extern struct bus_type ide_bus_type; 1364extern struct bus_type ide_bus_type;
1365 1365
diff --git a/include/linux/idr.h b/include/linux/idr.h
index f559a719dbe8..826803449db7 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -66,7 +66,7 @@ struct idr {
66 .id_free = NULL, \ 66 .id_free = NULL, \
67 .layers = 0, \ 67 .layers = 0, \
68 .id_free_cnt = 0, \ 68 .id_free_cnt = 0, \
69 .lock = SPIN_LOCK_UNLOCKED, \ 69 .lock = __SPIN_LOCK_UNLOCKED(name.lock), \
70} 70}
71#define DEFINE_IDR(name) struct idr name = IDR_INIT(name) 71#define DEFINE_IDR(name) struct idr name = IDR_INIT(name)
72 72
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 3a256957fb56..60aac2cea0cf 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -3,6 +3,8 @@
3 3
4#include <linux/file.h> 4#include <linux/file.h>
5#include <linux/rcupdate.h> 5#include <linux/rcupdate.h>
6#include <linux/irqflags.h>
7#include <linux/lockdep.h>
6 8
7#define INIT_FDTABLE \ 9#define INIT_FDTABLE \
8{ \ 10{ \
@@ -21,7 +23,7 @@
21 .count = ATOMIC_INIT(1), \ 23 .count = ATOMIC_INIT(1), \
22 .fdt = &init_files.fdtab, \ 24 .fdt = &init_files.fdtab, \
23 .fdtab = INIT_FDTABLE, \ 25 .fdtab = INIT_FDTABLE, \
24 .file_lock = SPIN_LOCK_UNLOCKED, \ 26 .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), \
25 .next_fd = 0, \ 27 .next_fd = 0, \
26 .close_on_exec_init = { { 0, } }, \ 28 .close_on_exec_init = { { 0, } }, \
27 .open_fds_init = { { 0, } }, \ 29 .open_fds_init = { { 0, } }, \
@@ -36,7 +38,7 @@
36 .user_id = 0, \ 38 .user_id = 0, \
37 .next = NULL, \ 39 .next = NULL, \
38 .wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait), \ 40 .wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait), \
39 .ctx_lock = SPIN_LOCK_UNLOCKED, \ 41 .ctx_lock = __SPIN_LOCK_UNLOCKED(name.ctx_lock), \
40 .reqs_active = 0U, \ 42 .reqs_active = 0U, \
41 .max_reqs = ~0U, \ 43 .max_reqs = ~0U, \
42} 44}
@@ -48,7 +50,7 @@
48 .mm_users = ATOMIC_INIT(2), \ 50 .mm_users = ATOMIC_INIT(2), \
49 .mm_count = ATOMIC_INIT(1), \ 51 .mm_count = ATOMIC_INIT(1), \
50 .mmap_sem = __RWSEM_INITIALIZER(name.mmap_sem), \ 52 .mmap_sem = __RWSEM_INITIALIZER(name.mmap_sem), \
51 .page_table_lock = SPIN_LOCK_UNLOCKED, \ 53 .page_table_lock = __SPIN_LOCK_UNLOCKED(name.page_table_lock), \
52 .mmlist = LIST_HEAD_INIT(name.mmlist), \ 54 .mmlist = LIST_HEAD_INIT(name.mmlist), \
53 .cpu_vm_mask = CPU_MASK_ALL, \ 55 .cpu_vm_mask = CPU_MASK_ALL, \
54} 56}
@@ -69,7 +71,7 @@
69#define INIT_SIGHAND(sighand) { \ 71#define INIT_SIGHAND(sighand) { \
70 .count = ATOMIC_INIT(1), \ 72 .count = ATOMIC_INIT(1), \
71 .action = { { { .sa_handler = NULL, } }, }, \ 73 .action = { { { .sa_handler = NULL, } }, }, \
72 .siglock = SPIN_LOCK_UNLOCKED, \ 74 .siglock = __SPIN_LOCK_UNLOCKED(sighand.siglock), \
73} 75}
74 76
75extern struct group_info init_groups; 77extern struct group_info init_groups;
@@ -119,12 +121,13 @@ extern struct group_info init_groups;
119 .list = LIST_HEAD_INIT(tsk.pending.list), \ 121 .list = LIST_HEAD_INIT(tsk.pending.list), \
120 .signal = {{0}}}, \ 122 .signal = {{0}}}, \
121 .blocked = {{0}}, \ 123 .blocked = {{0}}, \
122 .alloc_lock = SPIN_LOCK_UNLOCKED, \ 124 .alloc_lock = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock), \
123 .journal_info = NULL, \ 125 .journal_info = NULL, \
124 .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ 126 .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
125 .fs_excl = ATOMIC_INIT(0), \ 127 .fs_excl = ATOMIC_INIT(0), \
126 .pi_lock = SPIN_LOCK_UNLOCKED, \ 128 .pi_lock = SPIN_LOCK_UNLOCKED, \
127 INIT_RT_MUTEXES(tsk) \ 129 INIT_TRACE_IRQFLAGS \
130 INIT_LOCKDEP \
128} 131}
129 132
130 133
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index cf682a73a6f9..d5afee95fd43 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -10,6 +10,7 @@
10#include <linux/irqreturn.h> 10#include <linux/irqreturn.h>
11#include <linux/hardirq.h> 11#include <linux/hardirq.h>
12#include <linux/sched.h> 12#include <linux/sched.h>
13#include <linux/irqflags.h>
13#include <asm/atomic.h> 14#include <asm/atomic.h>
14#include <asm/ptrace.h> 15#include <asm/ptrace.h>
15#include <asm/system.h> 16#include <asm/system.h>
@@ -45,6 +46,7 @@
45#define IRQF_SHARED 0x00000080 46#define IRQF_SHARED 0x00000080
46#define IRQF_PROBE_SHARED 0x00000100 47#define IRQF_PROBE_SHARED 0x00000100
47#define IRQF_TIMER 0x00000200 48#define IRQF_TIMER 0x00000200
49#define IRQF_PERCPU 0x00000400
48 50
49/* 51/*
50 * Migration helpers. Scheduled for removal in 1/2007 52 * Migration helpers. Scheduled for removal in 1/2007
@@ -54,6 +56,7 @@
54#define SA_SAMPLE_RANDOM IRQF_SAMPLE_RANDOM 56#define SA_SAMPLE_RANDOM IRQF_SAMPLE_RANDOM
55#define SA_SHIRQ IRQF_SHARED 57#define SA_SHIRQ IRQF_SHARED
56#define SA_PROBEIRQ IRQF_PROBE_SHARED 58#define SA_PROBEIRQ IRQF_PROBE_SHARED
59#define SA_PERCPU IRQF_PERCPU
57 60
58#define SA_TRIGGER_LOW IRQF_TRIGGER_LOW 61#define SA_TRIGGER_LOW IRQF_TRIGGER_LOW
59#define SA_TRIGGER_HIGH IRQF_TRIGGER_HIGH 62#define SA_TRIGGER_HIGH IRQF_TRIGGER_HIGH
@@ -78,12 +81,64 @@ extern int request_irq(unsigned int,
78 unsigned long, const char *, void *); 81 unsigned long, const char *, void *);
79extern void free_irq(unsigned int, void *); 82extern void free_irq(unsigned int, void *);
80 83
84/*
85 * On lockdep we dont want to enable hardirqs in hardirq
86 * context. Use local_irq_enable_in_hardirq() to annotate
87 * kernel code that has to do this nevertheless (pretty much
88 * the only valid case is for old/broken hardware that is
89 * insanely slow).
90 *
91 * NOTE: in theory this might break fragile code that relies
92 * on hardirq delivery - in practice we dont seem to have such
93 * places left. So the only effect should be slightly increased
94 * irqs-off latencies.
95 */
96#ifdef CONFIG_LOCKDEP
97# define local_irq_enable_in_hardirq() do { } while (0)
98#else
99# define local_irq_enable_in_hardirq() local_irq_enable()
100#endif
81 101
82#ifdef CONFIG_GENERIC_HARDIRQS 102#ifdef CONFIG_GENERIC_HARDIRQS
83extern void disable_irq_nosync(unsigned int irq); 103extern void disable_irq_nosync(unsigned int irq);
84extern void disable_irq(unsigned int irq); 104extern void disable_irq(unsigned int irq);
85extern void enable_irq(unsigned int irq); 105extern void enable_irq(unsigned int irq);
86 106
107/*
108 * Special lockdep variants of irq disabling/enabling.
109 * These should be used for locking constructs that
110 * know that a particular irq context which is disabled,
111 * and which is the only irq-context user of a lock,
112 * that it's safe to take the lock in the irq-disabled
113 * section without disabling hardirqs.
114 *
115 * On !CONFIG_LOCKDEP they are equivalent to the normal
116 * irq disable/enable methods.
117 */
118static inline void disable_irq_nosync_lockdep(unsigned int irq)
119{
120 disable_irq_nosync(irq);
121#ifdef CONFIG_LOCKDEP
122 local_irq_disable();
123#endif
124}
125
126static inline void disable_irq_lockdep(unsigned int irq)
127{
128 disable_irq(irq);
129#ifdef CONFIG_LOCKDEP
130 local_irq_disable();
131#endif
132}
133
134static inline void enable_irq_lockdep(unsigned int irq)
135{
136#ifdef CONFIG_LOCKDEP
137 local_irq_enable();
138#endif
139 enable_irq(irq);
140}
141
87/* IRQ wakeup (PM) control: */ 142/* IRQ wakeup (PM) control: */
88extern int set_irq_wake(unsigned int irq, unsigned int on); 143extern int set_irq_wake(unsigned int irq, unsigned int on);
89 144
@@ -97,7 +152,19 @@ static inline int disable_irq_wake(unsigned int irq)
97 return set_irq_wake(irq, 0); 152 return set_irq_wake(irq, 0);
98} 153}
99 154
100#endif 155#else /* !CONFIG_GENERIC_HARDIRQS */
156/*
157 * NOTE: non-genirq architectures, if they want to support the lock
158 * validator need to define the methods below in their asm/irq.h
159 * files, under an #ifdef CONFIG_LOCKDEP section.
160 */
161# ifndef CONFIG_LOCKDEP
162# define disable_irq_nosync_lockdep(irq) disable_irq_nosync(irq)
163# define disable_irq_lockdep(irq) disable_irq(irq)
164# define enable_irq_lockdep(irq) enable_irq(irq)
165# endif
166
167#endif /* CONFIG_GENERIC_HARDIRQS */
101 168
102#ifndef __ARCH_SET_SOFTIRQ_PENDING 169#ifndef __ARCH_SET_SOFTIRQ_PENDING
103#define set_softirq_pending(x) (local_softirq_pending() = (x)) 170#define set_softirq_pending(x) (local_softirq_pending() = (x))
@@ -133,13 +200,11 @@ static inline void __deprecated save_and_cli(unsigned long *x)
133#define save_and_cli(x) save_and_cli(&x) 200#define save_and_cli(x) save_and_cli(&x)
134#endif /* CONFIG_SMP */ 201#endif /* CONFIG_SMP */
135 202
136/* SoftIRQ primitives. */ 203extern void local_bh_disable(void);
137#define local_bh_disable() \ 204extern void __local_bh_enable(void);
138 do { add_preempt_count(SOFTIRQ_OFFSET); barrier(); } while (0) 205extern void _local_bh_enable(void);
139#define __local_bh_enable() \
140 do { barrier(); sub_preempt_count(SOFTIRQ_OFFSET); } while (0)
141
142extern void local_bh_enable(void); 206extern void local_bh_enable(void);
207extern void local_bh_enable_ip(unsigned long ip);
143 208
144/* PLEASE, avoid to allocate new softirqs, if you need not _really_ high 209/* PLEASE, avoid to allocate new softirqs, if you need not _really_ high
145 frequency threaded job scheduling. For almost all the purposes 210 frequency threaded job scheduling. For almost all the purposes
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 87a9fc039b47..5612dfeeae50 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -55,6 +55,7 @@ struct resource_list {
55#define IORESOURCE_IRQ_LOWEDGE (1<<1) 55#define IORESOURCE_IRQ_LOWEDGE (1<<1)
56#define IORESOURCE_IRQ_HIGHLEVEL (1<<2) 56#define IORESOURCE_IRQ_HIGHLEVEL (1<<2)
57#define IORESOURCE_IRQ_LOWLEVEL (1<<3) 57#define IORESOURCE_IRQ_LOWLEVEL (1<<3)
58#define IORESOURCE_IRQ_SHAREABLE (1<<4)
58 59
59/* ISA PnP DMA specific bits (IORESOURCE_BITS) */ 60/* ISA PnP DMA specific bits (IORESOURCE_BITS) */
60#define IORESOURCE_DMA_TYPE_MASK (3<<0) 61#define IORESOURCE_DMA_TYPE_MASK (3<<0)
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 95d7aa7954d2..b48eae32dc61 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -182,6 +182,10 @@ extern int setup_irq(unsigned int irq, struct irqaction *new);
182 182
183#ifdef CONFIG_GENERIC_HARDIRQS 183#ifdef CONFIG_GENERIC_HARDIRQS
184 184
185#ifndef handle_dynamic_tick
186# define handle_dynamic_tick(a) do { } while (0)
187#endif
188
185#ifdef CONFIG_SMP 189#ifdef CONFIG_SMP
186static inline void set_native_irq_info(int irq, cpumask_t mask) 190static inline void set_native_irq_info(int irq, cpumask_t mask)
187{ 191{
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
new file mode 100644
index 000000000000..412e025bc5c7
--- /dev/null
+++ b/include/linux/irqflags.h
@@ -0,0 +1,96 @@
1/*
2 * include/linux/irqflags.h
3 *
4 * IRQ flags tracing: follow the state of the hardirq and softirq flags and
5 * provide callbacks for transitions between ON and OFF states.
6 *
7 * This file gets included from lowlevel asm headers too, to provide
8 * wrapped versions of the local_irq_*() APIs, based on the
9 * raw_local_irq_*() macros from the lowlevel headers.
10 */
11#ifndef _LINUX_TRACE_IRQFLAGS_H
12#define _LINUX_TRACE_IRQFLAGS_H
13
14#ifdef CONFIG_TRACE_IRQFLAGS
15 extern void trace_hardirqs_on(void);
16 extern void trace_hardirqs_off(void);
17 extern void trace_softirqs_on(unsigned long ip);
18 extern void trace_softirqs_off(unsigned long ip);
19# define trace_hardirq_context(p) ((p)->hardirq_context)
20# define trace_softirq_context(p) ((p)->softirq_context)
21# define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled)
22# define trace_softirqs_enabled(p) ((p)->softirqs_enabled)
23# define trace_hardirq_enter() do { current->hardirq_context++; } while (0)
24# define trace_hardirq_exit() do { current->hardirq_context--; } while (0)
25# define trace_softirq_enter() do { current->softirq_context++; } while (0)
26# define trace_softirq_exit() do { current->softirq_context--; } while (0)
27# define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1,
28#else
29# define trace_hardirqs_on() do { } while (0)
30# define trace_hardirqs_off() do { } while (0)
31# define trace_softirqs_on(ip) do { } while (0)
32# define trace_softirqs_off(ip) do { } while (0)
33# define trace_hardirq_context(p) 0
34# define trace_softirq_context(p) 0
35# define trace_hardirqs_enabled(p) 0
36# define trace_softirqs_enabled(p) 0
37# define trace_hardirq_enter() do { } while (0)
38# define trace_hardirq_exit() do { } while (0)
39# define trace_softirq_enter() do { } while (0)
40# define trace_softirq_exit() do { } while (0)
41# define INIT_TRACE_IRQFLAGS
42#endif
43
44#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
45
46#include <asm/irqflags.h>
47
48#define local_irq_enable() \
49 do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0)
50#define local_irq_disable() \
51 do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0)
52#define local_irq_save(flags) \
53 do { raw_local_irq_save(flags); trace_hardirqs_off(); } while (0)
54
55#define local_irq_restore(flags) \
56 do { \
57 if (raw_irqs_disabled_flags(flags)) { \
58 raw_local_irq_restore(flags); \
59 trace_hardirqs_off(); \
60 } else { \
61 trace_hardirqs_on(); \
62 raw_local_irq_restore(flags); \
63 } \
64 } while (0)
65#else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */
66/*
67 * The local_irq_*() APIs are equal to the raw_local_irq*()
68 * if !TRACE_IRQFLAGS.
69 */
70# define raw_local_irq_disable() local_irq_disable()
71# define raw_local_irq_enable() local_irq_enable()
72# define raw_local_irq_save(flags) local_irq_save(flags)
73# define raw_local_irq_restore(flags) local_irq_restore(flags)
74#endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
75
76#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
77#define safe_halt() \
78 do { \
79 trace_hardirqs_on(); \
80 raw_safe_halt(); \
81 } while (0)
82
83#define local_save_flags(flags) raw_local_save_flags(flags)
84
85#define irqs_disabled() \
86({ \
87 unsigned long flags; \
88 \
89 raw_local_save_flags(flags); \
90 raw_irqs_disabled_flags(flags); \
91})
92
93#define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags)
94#endif /* CONFIG_X86 */
95
96#endif
diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index 54e2549f96ba..849043ce4ed6 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -57,10 +57,25 @@ do { \
57#define print_fn_descriptor_symbol(fmt, addr) print_symbol(fmt, addr) 57#define print_fn_descriptor_symbol(fmt, addr) print_symbol(fmt, addr)
58#endif 58#endif
59 59
60#define print_symbol(fmt, addr) \ 60static inline void print_symbol(const char *fmt, unsigned long addr)
61do { \ 61{
62 __check_printsym_format(fmt, ""); \ 62 __check_printsym_format(fmt, "");
63 __print_symbol(fmt, addr); \ 63 __print_symbol(fmt, (unsigned long)
64 __builtin_extract_return_addr((void *)addr));
65}
66
67#ifndef CONFIG_64BIT
68#define print_ip_sym(ip) \
69do { \
70 printk("[<%08lx>]", ip); \
71 print_symbol(" %s\n", ip); \
64} while(0) 72} while(0)
73#else
74#define print_ip_sym(ip) \
75do { \
76 printk("[<%016lx>]", ip); \
77 print_symbol(" %s\n", ip); \
78} while(0)
79#endif
65 80
66#endif /*_LINUX_KALLSYMS_H*/ 81#endif /*_LINUX_KALLSYMS_H*/
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
new file mode 100644
index 000000000000..316e0fb8d7b1
--- /dev/null
+++ b/include/linux/lockdep.h
@@ -0,0 +1,353 @@
1/*
2 * Runtime locking correctness validator
3 *
4 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
5 *
6 * see Documentation/lockdep-design.txt for more details.
7 */
8#ifndef __LINUX_LOCKDEP_H
9#define __LINUX_LOCKDEP_H
10
11#include <linux/linkage.h>
12#include <linux/list.h>
13#include <linux/debug_locks.h>
14#include <linux/stacktrace.h>
15
16#ifdef CONFIG_LOCKDEP
17
18/*
19 * Lock-class usage-state bits:
20 */
21enum lock_usage_bit
22{
23 LOCK_USED = 0,
24 LOCK_USED_IN_HARDIRQ,
25 LOCK_USED_IN_SOFTIRQ,
26 LOCK_ENABLED_SOFTIRQS,
27 LOCK_ENABLED_HARDIRQS,
28 LOCK_USED_IN_HARDIRQ_READ,
29 LOCK_USED_IN_SOFTIRQ_READ,
30 LOCK_ENABLED_SOFTIRQS_READ,
31 LOCK_ENABLED_HARDIRQS_READ,
32 LOCK_USAGE_STATES
33};
34
35/*
36 * Usage-state bitmasks:
37 */
38#define LOCKF_USED (1 << LOCK_USED)
39#define LOCKF_USED_IN_HARDIRQ (1 << LOCK_USED_IN_HARDIRQ)
40#define LOCKF_USED_IN_SOFTIRQ (1 << LOCK_USED_IN_SOFTIRQ)
41#define LOCKF_ENABLED_HARDIRQS (1 << LOCK_ENABLED_HARDIRQS)
42#define LOCKF_ENABLED_SOFTIRQS (1 << LOCK_ENABLED_SOFTIRQS)
43
44#define LOCKF_ENABLED_IRQS (LOCKF_ENABLED_HARDIRQS | LOCKF_ENABLED_SOFTIRQS)
45#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ)
46
47#define LOCKF_USED_IN_HARDIRQ_READ (1 << LOCK_USED_IN_HARDIRQ_READ)
48#define LOCKF_USED_IN_SOFTIRQ_READ (1 << LOCK_USED_IN_SOFTIRQ_READ)
49#define LOCKF_ENABLED_HARDIRQS_READ (1 << LOCK_ENABLED_HARDIRQS_READ)
50#define LOCKF_ENABLED_SOFTIRQS_READ (1 << LOCK_ENABLED_SOFTIRQS_READ)
51
52#define LOCKF_ENABLED_IRQS_READ \
53 (LOCKF_ENABLED_HARDIRQS_READ | LOCKF_ENABLED_SOFTIRQS_READ)
54#define LOCKF_USED_IN_IRQ_READ \
55 (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
56
57#define MAX_LOCKDEP_SUBCLASSES 8UL
58
59/*
60 * Lock-classes are keyed via unique addresses, by embedding the
61 * lockclass-key into the kernel (or module) .data section. (For
62 * static locks we use the lock address itself as the key.)
63 */
64struct lockdep_subclass_key {
65 char __one_byte;
66} __attribute__ ((__packed__));
67
68struct lock_class_key {
69 struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
70};
71
72/*
73 * The lock-class itself:
74 */
75struct lock_class {
76 /*
77 * class-hash:
78 */
79 struct list_head hash_entry;
80
81 /*
82 * global list of all lock-classes:
83 */
84 struct list_head lock_entry;
85
86 struct lockdep_subclass_key *key;
87 unsigned int subclass;
88
89 /*
90 * IRQ/softirq usage tracking bits:
91 */
92 unsigned long usage_mask;
93 struct stack_trace usage_traces[LOCK_USAGE_STATES];
94
95 /*
96 * These fields represent a directed graph of lock dependencies,
97 * to every node we attach a list of "forward" and a list of
98 * "backward" graph nodes.
99 */
100 struct list_head locks_after, locks_before;
101
102 /*
103 * Generation counter, when doing certain classes of graph walking,
104 * to ensure that we check one node only once:
105 */
106 unsigned int version;
107
108 /*
109 * Statistics counter:
110 */
111 unsigned long ops;
112
113 const char *name;
114 int name_version;
115};
116
117/*
118 * Map the lock object (the lock instance) to the lock-class object.
119 * This is embedded into specific lock instances:
120 */
121struct lockdep_map {
122 struct lock_class_key *key;
123 struct lock_class *class[MAX_LOCKDEP_SUBCLASSES];
124 const char *name;
125};
126
127/*
128 * Every lock has a list of other locks that were taken after it.
129 * We only grow the list, never remove from it:
130 */
131struct lock_list {
132 struct list_head entry;
133 struct lock_class *class;
134 struct stack_trace trace;
135};
136
137/*
138 * We record lock dependency chains, so that we can cache them:
139 */
140struct lock_chain {
141 struct list_head entry;
142 u64 chain_key;
143};
144
145struct held_lock {
146 /*
147 * One-way hash of the dependency chain up to this point. We
148 * hash the hashes step by step as the dependency chain grows.
149 *
150 * We use it for dependency-caching and we skip detection
151 * passes and dependency-updates if there is a cache-hit, so
152 * it is absolutely critical for 100% coverage of the validator
153 * to have a unique key value for every unique dependency path
154 * that can occur in the system, to make a unique hash value
155 * as likely as possible - hence the 64-bit width.
156 *
157 * The task struct holds the current hash value (initialized
158 * with zero), here we store the previous hash value:
159 */
160 u64 prev_chain_key;
161 struct lock_class *class;
162 unsigned long acquire_ip;
163 struct lockdep_map *instance;
164
165 /*
166 * The lock-stack is unified in that the lock chains of interrupt
167 * contexts nest ontop of process context chains, but we 'separate'
168 * the hashes by starting with 0 if we cross into an interrupt
169 * context, and we also keep do not add cross-context lock
170 * dependencies - the lock usage graph walking covers that area
171 * anyway, and we'd just unnecessarily increase the number of
172 * dependencies otherwise. [Note: hardirq and softirq contexts
173 * are separated from each other too.]
174 *
175 * The following field is used to detect when we cross into an
176 * interrupt context:
177 */
178 int irq_context;
179 int trylock;
180 int read;
181 int check;
182 int hardirqs_off;
183};
184
185/*
186 * Initialization, self-test and debugging-output methods:
187 */
188extern void lockdep_init(void);
189extern void lockdep_info(void);
190extern void lockdep_reset(void);
191extern void lockdep_reset_lock(struct lockdep_map *lock);
192extern void lockdep_free_key_range(void *start, unsigned long size);
193
194extern void lockdep_off(void);
195extern void lockdep_on(void);
196extern int lockdep_internal(void);
197
198/*
199 * These methods are used by specific locking variants (spinlocks,
200 * rwlocks, mutexes and rwsems) to pass init/acquire/release events
201 * to lockdep:
202 */
203
204extern void lockdep_init_map(struct lockdep_map *lock, const char *name,
205 struct lock_class_key *key);
206
207/*
208 * Reinitialize a lock key - for cases where there is special locking or
209 * special initialization of locks so that the validator gets the scope
210 * of dependencies wrong: they are either too broad (they need a class-split)
211 * or they are too narrow (they suffer from a false class-split):
212 */
213#define lockdep_set_class(lock, key) \
214 lockdep_init_map(&(lock)->dep_map, #key, key)
215#define lockdep_set_class_and_name(lock, key, name) \
216 lockdep_init_map(&(lock)->dep_map, name, key)
217
218/*
219 * Acquire a lock.
220 *
221 * Values for "read":
222 *
223 * 0: exclusive (write) acquire
224 * 1: read-acquire (no recursion allowed)
225 * 2: read-acquire with same-instance recursion allowed
226 *
227 * Values for check:
228 *
229 * 0: disabled
230 * 1: simple checks (freeing, held-at-exit-time, etc.)
231 * 2: full validation
232 */
233extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
234 int trylock, int read, int check, unsigned long ip);
235
236extern void lock_release(struct lockdep_map *lock, int nested,
237 unsigned long ip);
238
239# define INIT_LOCKDEP .lockdep_recursion = 0,
240
241#else /* !LOCKDEP */
242
243static inline void lockdep_off(void)
244{
245}
246
247static inline void lockdep_on(void)
248{
249}
250
251static inline int lockdep_internal(void)
252{
253 return 0;
254}
255
256# define lock_acquire(l, s, t, r, c, i) do { } while (0)
257# define lock_release(l, n, i) do { } while (0)
258# define lockdep_init() do { } while (0)
259# define lockdep_info() do { } while (0)
260# define lockdep_init_map(lock, name, key) do { (void)(key); } while (0)
261# define lockdep_set_class(lock, key) do { (void)(key); } while (0)
262# define lockdep_set_class_and_name(lock, key, name) \
263 do { (void)(key); } while (0)
264# define INIT_LOCKDEP
265# define lockdep_reset() do { debug_locks = 1; } while (0)
266# define lockdep_free_key_range(start, size) do { } while (0)
267/*
268 * The class key takes no space if lockdep is disabled:
269 */
270struct lock_class_key { };
271#endif /* !LOCKDEP */
272
273#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_GENERIC_HARDIRQS)
274extern void early_init_irq_lock_class(void);
275#else
276# define early_init_irq_lock_class() do { } while (0)
277#endif
278
279#ifdef CONFIG_TRACE_IRQFLAGS
280extern void early_boot_irqs_off(void);
281extern void early_boot_irqs_on(void);
282#else
283# define early_boot_irqs_off() do { } while (0)
284# define early_boot_irqs_on() do { } while (0)
285#endif
286
287/*
288 * For trivial one-depth nesting of a lock-class, the following
289 * global define can be used. (Subsystems with multiple levels
290 * of nesting should define their own lock-nesting subclasses.)
291 */
292#define SINGLE_DEPTH_NESTING 1
293
294/*
295 * Map the dependency ops to NOP or to real lockdep ops, depending
296 * on the per lock-class debug mode:
297 */
298
299#ifdef CONFIG_DEBUG_LOCK_ALLOC
300# ifdef CONFIG_PROVE_LOCKING
301# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i)
302# else
303# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i)
304# endif
305# define spin_release(l, n, i) lock_release(l, n, i)
306#else
307# define spin_acquire(l, s, t, i) do { } while (0)
308# define spin_release(l, n, i) do { } while (0)
309#endif
310
311#ifdef CONFIG_DEBUG_LOCK_ALLOC
312# ifdef CONFIG_PROVE_LOCKING
313# define rwlock_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i)
314# define rwlock_acquire_read(l, s, t, i) lock_acquire(l, s, t, 2, 2, i)
315# else
316# define rwlock_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i)
317# define rwlock_acquire_read(l, s, t, i) lock_acquire(l, s, t, 2, 1, i)
318# endif
319# define rwlock_release(l, n, i) lock_release(l, n, i)
320#else
321# define rwlock_acquire(l, s, t, i) do { } while (0)
322# define rwlock_acquire_read(l, s, t, i) do { } while (0)
323# define rwlock_release(l, n, i) do { } while (0)
324#endif
325
326#ifdef CONFIG_DEBUG_LOCK_ALLOC
327# ifdef CONFIG_PROVE_LOCKING
328# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i)
329# else
330# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i)
331# endif
332# define mutex_release(l, n, i) lock_release(l, n, i)
333#else
334# define mutex_acquire(l, s, t, i) do { } while (0)
335# define mutex_release(l, n, i) do { } while (0)
336#endif
337
338#ifdef CONFIG_DEBUG_LOCK_ALLOC
339# ifdef CONFIG_PROVE_LOCKING
340# define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i)
341# define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 2, i)
342# else
343# define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i)
344# define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 1, i)
345# endif
346# define rwsem_release(l, n, i) lock_release(l, n, i)
347#else
348# define rwsem_acquire(l, s, t, i) do { } while (0)
349# define rwsem_acquire_read(l, s, t, i) do { } while (0)
350# define rwsem_release(l, n, i) do { } while (0)
351#endif
352
353#endif /* __LINUX_LOCKDEP_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 75179529e399..990957e0929f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -14,6 +14,7 @@
14#include <linux/prio_tree.h> 14#include <linux/prio_tree.h>
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/debug_locks.h>
17 18
18struct mempolicy; 19struct mempolicy;
19struct anon_vma; 20struct anon_vma;
@@ -1034,13 +1035,6 @@ static inline void vm_stat_account(struct mm_struct *mm,
1034} 1035}
1035#endif /* CONFIG_PROC_FS */ 1036#endif /* CONFIG_PROC_FS */
1036 1037
1037static inline void
1038debug_check_no_locks_freed(const void *from, unsigned long len)
1039{
1040 mutex_debug_check_no_locks_freed(from, len);
1041 rt_mutex_debug_check_no_locks_freed(from, len);
1042}
1043
1044#ifndef CONFIG_DEBUG_PAGEALLOC 1038#ifndef CONFIG_DEBUG_PAGEALLOC
1045static inline void 1039static inline void
1046kernel_map_pages(struct page *page, int numpages, int enable) 1040kernel_map_pages(struct page *page, int numpages, int enable)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 27e748eb72b0..656b588a9f96 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -150,6 +150,10 @@ struct zone {
150 unsigned long lowmem_reserve[MAX_NR_ZONES]; 150 unsigned long lowmem_reserve[MAX_NR_ZONES];
151 151
152#ifdef CONFIG_NUMA 152#ifdef CONFIG_NUMA
153 /*
154 * zone reclaim becomes active if more unmapped pages exist.
155 */
156 unsigned long min_unmapped_ratio;
153 struct per_cpu_pageset *pageset[NR_CPUS]; 157 struct per_cpu_pageset *pageset[NR_CPUS];
154#else 158#else
155 struct per_cpu_pageset pageset[NR_CPUS]; 159 struct per_cpu_pageset pageset[NR_CPUS];
@@ -414,6 +418,8 @@ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *,
414 void __user *, size_t *, loff_t *); 418 void __user *, size_t *, loff_t *);
415int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *, 419int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *,
416 void __user *, size_t *, loff_t *); 420 void __user *, size_t *, loff_t *);
421int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
422 struct file *, void __user *, size_t *, loff_t *);
417 423
418#include <linux/topology.h> 424#include <linux/topology.h>
419/* Returns the number of the current Node. */ 425/* Returns the number of the current Node. */
diff --git a/include/linux/module.h b/include/linux/module.h
index 9e9dc7c24d95..d06c74fb8c26 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -358,6 +358,7 @@ static inline int module_is_live(struct module *mod)
358/* Is this address in a module? (second is with no locks, for oops) */ 358/* Is this address in a module? (second is with no locks, for oops) */
359struct module *module_text_address(unsigned long addr); 359struct module *module_text_address(unsigned long addr);
360struct module *__module_text_address(unsigned long addr); 360struct module *__module_text_address(unsigned long addr);
361int is_module_address(unsigned long addr);
361 362
362/* Returns module and fills in value, defined and namebuf, or NULL if 363/* Returns module and fills in value, defined and namebuf, or NULL if
363 symnum out of range. */ 364 symnum out of range. */
@@ -496,6 +497,11 @@ static inline struct module *__module_text_address(unsigned long addr)
496 return NULL; 497 return NULL;
497} 498}
498 499
500static inline int is_module_address(unsigned long addr)
501{
502 return 0;
503}
504
499/* Get/put a kernel symbol (calls should be symmetric) */ 505/* Get/put a kernel symbol (calls should be symmetric) */
500#define symbol_get(x) ({ extern typeof(x) x __attribute__((weak)); &(x); }) 506#define symbol_get(x) ({ extern typeof(x) x __attribute__((weak)); &(x); })
501#define symbol_put(x) do { } while(0) 507#define symbol_put(x) do { } while(0)
diff --git a/include/linux/mutex-debug.h b/include/linux/mutex-debug.h
index 8b5769f00467..2537285e1064 100644
--- a/include/linux/mutex-debug.h
+++ b/include/linux/mutex-debug.h
@@ -2,22 +2,22 @@
2#define __LINUX_MUTEX_DEBUG_H 2#define __LINUX_MUTEX_DEBUG_H
3 3
4#include <linux/linkage.h> 4#include <linux/linkage.h>
5#include <linux/lockdep.h>
5 6
6/* 7/*
7 * Mutexes - debugging helpers: 8 * Mutexes - debugging helpers:
8 */ 9 */
9 10
10#define __DEBUG_MUTEX_INITIALIZER(lockname) \ 11#define __DEBUG_MUTEX_INITIALIZER(lockname) \
11 , .held_list = LIST_HEAD_INIT(lockname.held_list), \ 12 , .magic = &lockname
12 .name = #lockname , .magic = &lockname
13 13
14#define mutex_init(sem) __mutex_init(sem, __FUNCTION__) 14#define mutex_init(mutex) \
15do { \
16 static struct lock_class_key __key; \
17 \
18 __mutex_init((mutex), #mutex, &__key); \
19} while (0)
15 20
16extern void FASTCALL(mutex_destroy(struct mutex *lock)); 21extern void FASTCALL(mutex_destroy(struct mutex *lock));
17 22
18extern void mutex_debug_show_all_locks(void);
19extern void mutex_debug_show_held_locks(struct task_struct *filter);
20extern void mutex_debug_check_no_locks_held(struct task_struct *task);
21extern void mutex_debug_check_no_locks_freed(const void *from, unsigned long len);
22
23#endif 23#endif
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index f1ac507fa20d..27c48daa3183 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -13,6 +13,7 @@
13#include <linux/list.h> 13#include <linux/list.h>
14#include <linux/spinlock_types.h> 14#include <linux/spinlock_types.h>
15#include <linux/linkage.h> 15#include <linux/linkage.h>
16#include <linux/lockdep.h>
16 17
17#include <asm/atomic.h> 18#include <asm/atomic.h>
18 19
@@ -50,11 +51,12 @@ struct mutex {
50 struct list_head wait_list; 51 struct list_head wait_list;
51#ifdef CONFIG_DEBUG_MUTEXES 52#ifdef CONFIG_DEBUG_MUTEXES
52 struct thread_info *owner; 53 struct thread_info *owner;
53 struct list_head held_list;
54 unsigned long acquire_ip;
55 const char *name; 54 const char *name;
56 void *magic; 55 void *magic;
57#endif 56#endif
57#ifdef CONFIG_DEBUG_LOCK_ALLOC
58 struct lockdep_map dep_map;
59#endif
58}; 60};
59 61
60/* 62/*
@@ -74,24 +76,34 @@ struct mutex_waiter {
74# include <linux/mutex-debug.h> 76# include <linux/mutex-debug.h>
75#else 77#else
76# define __DEBUG_MUTEX_INITIALIZER(lockname) 78# define __DEBUG_MUTEX_INITIALIZER(lockname)
77# define mutex_init(mutex) __mutex_init(mutex, NULL) 79# define mutex_init(mutex) \
80do { \
81 static struct lock_class_key __key; \
82 \
83 __mutex_init((mutex), #mutex, &__key); \
84} while (0)
78# define mutex_destroy(mutex) do { } while (0) 85# define mutex_destroy(mutex) do { } while (0)
79# define mutex_debug_show_all_locks() do { } while (0) 86#endif
80# define mutex_debug_show_held_locks(p) do { } while (0) 87
81# define mutex_debug_check_no_locks_held(task) do { } while (0) 88#ifdef CONFIG_DEBUG_LOCK_ALLOC
82# define mutex_debug_check_no_locks_freed(from, len) do { } while (0) 89# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
90 , .dep_map = { .name = #lockname }
91#else
92# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
83#endif 93#endif
84 94
85#define __MUTEX_INITIALIZER(lockname) \ 95#define __MUTEX_INITIALIZER(lockname) \
86 { .count = ATOMIC_INIT(1) \ 96 { .count = ATOMIC_INIT(1) \
87 , .wait_lock = SPIN_LOCK_UNLOCKED \ 97 , .wait_lock = SPIN_LOCK_UNLOCKED \
88 , .wait_list = LIST_HEAD_INIT(lockname.wait_list) \ 98 , .wait_list = LIST_HEAD_INIT(lockname.wait_list) \
89 __DEBUG_MUTEX_INITIALIZER(lockname) } 99 __DEBUG_MUTEX_INITIALIZER(lockname) \
100 __DEP_MAP_MUTEX_INITIALIZER(lockname) }
90 101
91#define DEFINE_MUTEX(mutexname) \ 102#define DEFINE_MUTEX(mutexname) \
92 struct mutex mutexname = __MUTEX_INITIALIZER(mutexname) 103 struct mutex mutexname = __MUTEX_INITIALIZER(mutexname)
93 104
94extern void fastcall __mutex_init(struct mutex *lock, const char *name); 105extern void __mutex_init(struct mutex *lock, const char *name,
106 struct lock_class_key *key);
95 107
96/*** 108/***
97 * mutex_is_locked - is the mutex locked 109 * mutex_is_locked - is the mutex locked
@@ -110,6 +122,13 @@ static inline int fastcall mutex_is_locked(struct mutex *lock)
110 */ 122 */
111extern void fastcall mutex_lock(struct mutex *lock); 123extern void fastcall mutex_lock(struct mutex *lock);
112extern int fastcall mutex_lock_interruptible(struct mutex *lock); 124extern int fastcall mutex_lock_interruptible(struct mutex *lock);
125
126#ifdef CONFIG_DEBUG_LOCK_ALLOC
127extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
128#else
129# define mutex_lock_nested(lock, subclass) mutex_lock(lock)
130#endif
131
113/* 132/*
114 * NOTE: mutex_trylock() follows the spin_trylock() convention, 133 * NOTE: mutex_trylock() follows the spin_trylock() convention,
115 * not the down_trylock() convention! 134 * not the down_trylock() convention!
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 51dbab9710c7..7ff386a6ae87 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -65,7 +65,7 @@ struct raw_notifier_head {
65 } while (0) 65 } while (0)
66 66
67#define ATOMIC_NOTIFIER_INIT(name) { \ 67#define ATOMIC_NOTIFIER_INIT(name) { \
68 .lock = SPIN_LOCK_UNLOCKED, \ 68 .lock = __SPIN_LOCK_UNLOCKED(name.lock), \
69 .head = NULL } 69 .head = NULL }
70#define BLOCKING_NOTIFIER_INIT(name) { \ 70#define BLOCKING_NOTIFIER_INIT(name) { \
71 .rwsem = __RWSEM_INITIALIZER((name).rwsem), \ 71 .rwsem = __RWSEM_INITIALIZER((name).rwsem), \
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index b093479a531d..685081c01342 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -728,6 +728,7 @@
728#define PCI_DEVICE_ID_TI_TVP4020 0x3d07 728#define PCI_DEVICE_ID_TI_TVP4020 0x3d07
729#define PCI_DEVICE_ID_TI_4450 0x8011 729#define PCI_DEVICE_ID_TI_4450 0x8011
730#define PCI_DEVICE_ID_TI_XX21_XX11 0x8031 730#define PCI_DEVICE_ID_TI_XX21_XX11 0x8031
731#define PCI_DEVICE_ID_TI_XX21_XX11_SD 0x8034
731#define PCI_DEVICE_ID_TI_X515 0x8036 732#define PCI_DEVICE_ID_TI_X515 0x8036
732#define PCI_DEVICE_ID_TI_XX12 0x8039 733#define PCI_DEVICE_ID_TI_XX12 0x8039
733#define PCI_DEVICE_ID_TI_1130 0xac12 734#define PCI_DEVICE_ID_TI_1130 0xac12
@@ -1442,6 +1443,7 @@
1442#define PCI_DEVICE_ID_RICOH_RL5C475 0x0475 1443#define PCI_DEVICE_ID_RICOH_RL5C475 0x0475
1443#define PCI_DEVICE_ID_RICOH_RL5C476 0x0476 1444#define PCI_DEVICE_ID_RICOH_RL5C476 0x0476
1444#define PCI_DEVICE_ID_RICOH_RL5C478 0x0478 1445#define PCI_DEVICE_ID_RICOH_RL5C478 0x0478
1446#define PCI_DEVICE_ID_RICOH_R5C822 0x0822
1445 1447
1446#define PCI_VENDOR_ID_DLINK 0x1186 1448#define PCI_VENDOR_ID_DLINK 0x1186
1447#define PCI_DEVICE_ID_DLINK_DGE510T 0x4c00 1449#define PCI_DEVICE_ID_DLINK_DGE510T 0x4c00
diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
index fa4a3b82ba70..5d41dee82f80 100644
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -29,8 +29,6 @@ struct rt_mutex {
29 struct task_struct *owner; 29 struct task_struct *owner;
30#ifdef CONFIG_DEBUG_RT_MUTEXES 30#ifdef CONFIG_DEBUG_RT_MUTEXES
31 int save_state; 31 int save_state;
32 struct list_head held_list_entry;
33 unsigned long acquire_ip;
34 const char *name, *file; 32 const char *name, *file;
35 int line; 33 int line;
36 void *magic; 34 void *magic;
@@ -98,14 +96,6 @@ extern int rt_mutex_trylock(struct rt_mutex *lock);
98 96
99extern void rt_mutex_unlock(struct rt_mutex *lock); 97extern void rt_mutex_unlock(struct rt_mutex *lock);
100 98
101#ifdef CONFIG_DEBUG_RT_MUTEXES
102# define INIT_RT_MUTEX_DEBUG(tsk) \
103 .held_list_head = LIST_HEAD_INIT(tsk.held_list_head), \
104 .held_list_lock = SPIN_LOCK_UNLOCKED
105#else
106# define INIT_RT_MUTEX_DEBUG(tsk)
107#endif
108
109#ifdef CONFIG_RT_MUTEXES 99#ifdef CONFIG_RT_MUTEXES
110# define INIT_RT_MUTEXES(tsk) \ 100# define INIT_RT_MUTEXES(tsk) \
111 .pi_waiters = PLIST_HEAD_INIT(tsk.pi_waiters, tsk.pi_lock), \ 101 .pi_waiters = PLIST_HEAD_INIT(tsk.pi_waiters, tsk.pi_lock), \
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index f30f805080ae..ae1fcadd598e 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -32,30 +32,37 @@ struct rw_semaphore {
32 __s32 activity; 32 __s32 activity;
33 spinlock_t wait_lock; 33 spinlock_t wait_lock;
34 struct list_head wait_list; 34 struct list_head wait_list;
35#if RWSEM_DEBUG 35#ifdef CONFIG_DEBUG_LOCK_ALLOC
36 int debug; 36 struct lockdep_map dep_map;
37#endif 37#endif
38}; 38};
39 39
40/* 40#ifdef CONFIG_DEBUG_LOCK_ALLOC
41 * initialisation 41# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
42 */
43#if RWSEM_DEBUG
44#define __RWSEM_DEBUG_INIT , 0
45#else 42#else
46#define __RWSEM_DEBUG_INIT /* */ 43# define __RWSEM_DEP_MAP_INIT(lockname)
47#endif 44#endif
48 45
49#define __RWSEM_INITIALIZER(name) \ 46#define __RWSEM_INITIALIZER(name) \
50{ 0, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) __RWSEM_DEBUG_INIT } 47{ 0, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
51 48
52#define DECLARE_RWSEM(name) \ 49#define DECLARE_RWSEM(name) \
53 struct rw_semaphore name = __RWSEM_INITIALIZER(name) 50 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
54 51
55extern void FASTCALL(init_rwsem(struct rw_semaphore *sem)); 52extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
53 struct lock_class_key *key);
54
55#define init_rwsem(sem) \
56do { \
57 static struct lock_class_key __key; \
58 \
59 __init_rwsem((sem), #sem, &__key); \
60} while (0)
61
56extern void FASTCALL(__down_read(struct rw_semaphore *sem)); 62extern void FASTCALL(__down_read(struct rw_semaphore *sem));
57extern int FASTCALL(__down_read_trylock(struct rw_semaphore *sem)); 63extern int FASTCALL(__down_read_trylock(struct rw_semaphore *sem));
58extern void FASTCALL(__down_write(struct rw_semaphore *sem)); 64extern void FASTCALL(__down_write(struct rw_semaphore *sem));
65extern void FASTCALL(__down_write_nested(struct rw_semaphore *sem, int subclass));
59extern int FASTCALL(__down_write_trylock(struct rw_semaphore *sem)); 66extern int FASTCALL(__down_write_trylock(struct rw_semaphore *sem));
60extern void FASTCALL(__up_read(struct rw_semaphore *sem)); 67extern void FASTCALL(__up_read(struct rw_semaphore *sem));
61extern void FASTCALL(__up_write(struct rw_semaphore *sem)); 68extern void FASTCALL(__up_write(struct rw_semaphore *sem));
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index f99fe90732ab..658afb37c3f5 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -9,8 +9,6 @@
9 9
10#include <linux/linkage.h> 10#include <linux/linkage.h>
11 11
12#define RWSEM_DEBUG 0
13
14#ifdef __KERNEL__ 12#ifdef __KERNEL__
15 13
16#include <linux/types.h> 14#include <linux/types.h>
@@ -26,89 +24,58 @@ struct rw_semaphore;
26#include <asm/rwsem.h> /* use an arch-specific implementation */ 24#include <asm/rwsem.h> /* use an arch-specific implementation */
27#endif 25#endif
28 26
29#ifndef rwsemtrace
30#if RWSEM_DEBUG
31extern void FASTCALL(rwsemtrace(struct rw_semaphore *sem, const char *str));
32#else
33#define rwsemtrace(SEM,FMT)
34#endif
35#endif
36
37/* 27/*
38 * lock for reading 28 * lock for reading
39 */ 29 */
40static inline void down_read(struct rw_semaphore *sem) 30extern void down_read(struct rw_semaphore *sem);
41{
42 might_sleep();
43 rwsemtrace(sem,"Entering down_read");
44 __down_read(sem);
45 rwsemtrace(sem,"Leaving down_read");
46}
47 31
48/* 32/*
49 * trylock for reading -- returns 1 if successful, 0 if contention 33 * trylock for reading -- returns 1 if successful, 0 if contention
50 */ 34 */
51static inline int down_read_trylock(struct rw_semaphore *sem) 35extern int down_read_trylock(struct rw_semaphore *sem);
52{
53 int ret;
54 rwsemtrace(sem,"Entering down_read_trylock");
55 ret = __down_read_trylock(sem);
56 rwsemtrace(sem,"Leaving down_read_trylock");
57 return ret;
58}
59 36
60/* 37/*
61 * lock for writing 38 * lock for writing
62 */ 39 */
63static inline void down_write(struct rw_semaphore *sem) 40extern void down_write(struct rw_semaphore *sem);
64{
65 might_sleep();
66 rwsemtrace(sem,"Entering down_write");
67 __down_write(sem);
68 rwsemtrace(sem,"Leaving down_write");
69}
70 41
71/* 42/*
72 * trylock for writing -- returns 1 if successful, 0 if contention 43 * trylock for writing -- returns 1 if successful, 0 if contention
73 */ 44 */
74static inline int down_write_trylock(struct rw_semaphore *sem) 45extern int down_write_trylock(struct rw_semaphore *sem);
75{
76 int ret;
77 rwsemtrace(sem,"Entering down_write_trylock");
78 ret = __down_write_trylock(sem);
79 rwsemtrace(sem,"Leaving down_write_trylock");
80 return ret;
81}
82 46
83/* 47/*
84 * release a read lock 48 * release a read lock
85 */ 49 */
86static inline void up_read(struct rw_semaphore *sem) 50extern void up_read(struct rw_semaphore *sem);
87{
88 rwsemtrace(sem,"Entering up_read");
89 __up_read(sem);
90 rwsemtrace(sem,"Leaving up_read");
91}
92 51
93/* 52/*
94 * release a write lock 53 * release a write lock
95 */ 54 */
96static inline void up_write(struct rw_semaphore *sem) 55extern void up_write(struct rw_semaphore *sem);
97{
98 rwsemtrace(sem,"Entering up_write");
99 __up_write(sem);
100 rwsemtrace(sem,"Leaving up_write");
101}
102 56
103/* 57/*
104 * downgrade write lock to read lock 58 * downgrade write lock to read lock
105 */ 59 */
106static inline void downgrade_write(struct rw_semaphore *sem) 60extern void downgrade_write(struct rw_semaphore *sem);
107{ 61
108 rwsemtrace(sem,"Entering downgrade_write"); 62#ifdef CONFIG_DEBUG_LOCK_ALLOC
109 __downgrade_write(sem); 63/*
110 rwsemtrace(sem,"Leaving downgrade_write"); 64 * nested locking:
111} 65 */
66extern void down_read_nested(struct rw_semaphore *sem, int subclass);
67extern void down_write_nested(struct rw_semaphore *sem, int subclass);
68/*
69 * Take/release a lock when not the owner will release it:
70 */
71extern void down_read_non_owner(struct rw_semaphore *sem);
72extern void up_read_non_owner(struct rw_semaphore *sem);
73#else
74# define down_read_nested(sem, subclass) down_read(sem)
75# define down_write_nested(sem, subclass) down_write(sem)
76# define down_read_non_owner(sem) down_read(sem)
77# define up_read_non_owner(sem) up_read(sem)
78#endif
112 79
113#endif /* __KERNEL__ */ 80#endif /* __KERNEL__ */
114#endif /* _LINUX_RWSEM_H */ 81#endif /* _LINUX_RWSEM_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index aaf723308ed4..1c876e27ff93 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -184,11 +184,11 @@ extern unsigned long weighted_cpuload(const int cpu);
184extern rwlock_t tasklist_lock; 184extern rwlock_t tasklist_lock;
185extern spinlock_t mmlist_lock; 185extern spinlock_t mmlist_lock;
186 186
187typedef struct task_struct task_t; 187struct task_struct;
188 188
189extern void sched_init(void); 189extern void sched_init(void);
190extern void sched_init_smp(void); 190extern void sched_init_smp(void);
191extern void init_idle(task_t *idle, int cpu); 191extern void init_idle(struct task_struct *idle, int cpu);
192 192
193extern cpumask_t nohz_cpu_mask; 193extern cpumask_t nohz_cpu_mask;
194 194
@@ -383,7 +383,7 @@ struct signal_struct {
383 wait_queue_head_t wait_chldexit; /* for wait4() */ 383 wait_queue_head_t wait_chldexit; /* for wait4() */
384 384
385 /* current thread group signal load-balancing target: */ 385 /* current thread group signal load-balancing target: */
386 task_t *curr_target; 386 struct task_struct *curr_target;
387 387
388 /* shared signal handling: */ 388 /* shared signal handling: */
389 struct sigpending shared_pending; 389 struct sigpending shared_pending;
@@ -534,7 +534,6 @@ extern struct user_struct *find_user(uid_t);
534extern struct user_struct root_user; 534extern struct user_struct root_user;
535#define INIT_USER (&root_user) 535#define INIT_USER (&root_user)
536 536
537typedef struct prio_array prio_array_t;
538struct backing_dev_info; 537struct backing_dev_info;
539struct reclaim_state; 538struct reclaim_state;
540 539
@@ -699,7 +698,7 @@ extern int groups_search(struct group_info *group_info, gid_t grp);
699 ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK]) 698 ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK])
700 699
701#ifdef ARCH_HAS_PREFETCH_SWITCH_STACK 700#ifdef ARCH_HAS_PREFETCH_SWITCH_STACK
702extern void prefetch_stack(struct task_struct*); 701extern void prefetch_stack(struct task_struct *t);
703#else 702#else
704static inline void prefetch_stack(struct task_struct *t) { } 703static inline void prefetch_stack(struct task_struct *t) { }
705#endif 704#endif
@@ -715,6 +714,8 @@ enum sleep_type {
715 SLEEP_INTERRUPTED, 714 SLEEP_INTERRUPTED,
716}; 715};
717 716
717struct prio_array;
718
718struct task_struct { 719struct task_struct {
719 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ 720 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
720 struct thread_info *thread_info; 721 struct thread_info *thread_info;
@@ -732,7 +733,7 @@ struct task_struct {
732 int load_weight; /* for niceness load balancing purposes */ 733 int load_weight; /* for niceness load balancing purposes */
733 int prio, static_prio, normal_prio; 734 int prio, static_prio, normal_prio;
734 struct list_head run_list; 735 struct list_head run_list;
735 prio_array_t *array; 736 struct prio_array *array;
736 737
737 unsigned short ioprio; 738 unsigned short ioprio;
738 unsigned int btrace_seq; 739 unsigned int btrace_seq;
@@ -865,16 +866,34 @@ struct task_struct {
865 struct plist_head pi_waiters; 866 struct plist_head pi_waiters;
866 /* Deadlock detection and priority inheritance handling */ 867 /* Deadlock detection and priority inheritance handling */
867 struct rt_mutex_waiter *pi_blocked_on; 868 struct rt_mutex_waiter *pi_blocked_on;
868# ifdef CONFIG_DEBUG_RT_MUTEXES
869 spinlock_t held_list_lock;
870 struct list_head held_list_head;
871# endif
872#endif 869#endif
873 870
874#ifdef CONFIG_DEBUG_MUTEXES 871#ifdef CONFIG_DEBUG_MUTEXES
875 /* mutex deadlock detection */ 872 /* mutex deadlock detection */
876 struct mutex_waiter *blocked_on; 873 struct mutex_waiter *blocked_on;
877#endif 874#endif
875#ifdef CONFIG_TRACE_IRQFLAGS
876 unsigned int irq_events;
877 int hardirqs_enabled;
878 unsigned long hardirq_enable_ip;
879 unsigned int hardirq_enable_event;
880 unsigned long hardirq_disable_ip;
881 unsigned int hardirq_disable_event;
882 int softirqs_enabled;
883 unsigned long softirq_disable_ip;
884 unsigned int softirq_disable_event;
885 unsigned long softirq_enable_ip;
886 unsigned int softirq_enable_event;
887 int hardirq_context;
888 int softirq_context;
889#endif
890#ifdef CONFIG_LOCKDEP
891# define MAX_LOCK_DEPTH 30UL
892 u64 curr_chain_key;
893 int lockdep_depth;
894 struct held_lock held_locks[MAX_LOCK_DEPTH];
895 unsigned int lockdep_recursion;
896#endif
878 897
879/* journalling filesystem info */ 898/* journalling filesystem info */
880 void *journal_info; 899 void *journal_info;
@@ -1013,9 +1032,9 @@ static inline void put_task_struct(struct task_struct *t)
1013#define used_math() tsk_used_math(current) 1032#define used_math() tsk_used_math(current)
1014 1033
1015#ifdef CONFIG_SMP 1034#ifdef CONFIG_SMP
1016extern int set_cpus_allowed(task_t *p, cpumask_t new_mask); 1035extern int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask);
1017#else 1036#else
1018static inline int set_cpus_allowed(task_t *p, cpumask_t new_mask) 1037static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
1019{ 1038{
1020 if (!cpu_isset(0, new_mask)) 1039 if (!cpu_isset(0, new_mask))
1021 return -EINVAL; 1040 return -EINVAL;
@@ -1024,7 +1043,8 @@ static inline int set_cpus_allowed(task_t *p, cpumask_t new_mask)
1024#endif 1043#endif
1025 1044
1026extern unsigned long long sched_clock(void); 1045extern unsigned long long sched_clock(void);
1027extern unsigned long long current_sched_time(const task_t *current_task); 1046extern unsigned long long
1047current_sched_time(const struct task_struct *current_task);
1028 1048
1029/* sched_exec is called by processes performing an exec */ 1049/* sched_exec is called by processes performing an exec */
1030#ifdef CONFIG_SMP 1050#ifdef CONFIG_SMP
@@ -1042,27 +1062,27 @@ static inline void idle_task_exit(void) {}
1042extern void sched_idle_next(void); 1062extern void sched_idle_next(void);
1043 1063
1044#ifdef CONFIG_RT_MUTEXES 1064#ifdef CONFIG_RT_MUTEXES
1045extern int rt_mutex_getprio(task_t *p); 1065extern int rt_mutex_getprio(struct task_struct *p);
1046extern void rt_mutex_setprio(task_t *p, int prio); 1066extern void rt_mutex_setprio(struct task_struct *p, int prio);
1047extern void rt_mutex_adjust_pi(task_t *p); 1067extern void rt_mutex_adjust_pi(struct task_struct *p);
1048#else 1068#else
1049static inline int rt_mutex_getprio(task_t *p) 1069static inline int rt_mutex_getprio(struct task_struct *p)
1050{ 1070{
1051 return p->normal_prio; 1071 return p->normal_prio;
1052} 1072}
1053# define rt_mutex_adjust_pi(p) do { } while (0) 1073# define rt_mutex_adjust_pi(p) do { } while (0)
1054#endif 1074#endif
1055 1075
1056extern void set_user_nice(task_t *p, long nice); 1076extern void set_user_nice(struct task_struct *p, long nice);
1057extern int task_prio(const task_t *p); 1077extern int task_prio(const struct task_struct *p);
1058extern int task_nice(const task_t *p); 1078extern int task_nice(const struct task_struct *p);
1059extern int can_nice(const task_t *p, const int nice); 1079extern int can_nice(const struct task_struct *p, const int nice);
1060extern int task_curr(const task_t *p); 1080extern int task_curr(const struct task_struct *p);
1061extern int idle_cpu(int cpu); 1081extern int idle_cpu(int cpu);
1062extern int sched_setscheduler(struct task_struct *, int, struct sched_param *); 1082extern int sched_setscheduler(struct task_struct *, int, struct sched_param *);
1063extern task_t *idle_task(int cpu); 1083extern struct task_struct *idle_task(int cpu);
1064extern task_t *curr_task(int cpu); 1084extern struct task_struct *curr_task(int cpu);
1065extern void set_curr_task(int cpu, task_t *p); 1085extern void set_curr_task(int cpu, struct task_struct *p);
1066 1086
1067void yield(void); 1087void yield(void);
1068 1088
@@ -1119,8 +1139,8 @@ extern void FASTCALL(wake_up_new_task(struct task_struct * tsk,
1119#else 1139#else
1120 static inline void kick_process(struct task_struct *tsk) { } 1140 static inline void kick_process(struct task_struct *tsk) { }
1121#endif 1141#endif
1122extern void FASTCALL(sched_fork(task_t * p, int clone_flags)); 1142extern void FASTCALL(sched_fork(struct task_struct * p, int clone_flags));
1123extern void FASTCALL(sched_exit(task_t * p)); 1143extern void FASTCALL(sched_exit(struct task_struct * p));
1124 1144
1125extern int in_group_p(gid_t); 1145extern int in_group_p(gid_t);
1126extern int in_egroup_p(gid_t); 1146extern int in_egroup_p(gid_t);
@@ -1225,17 +1245,17 @@ extern NORET_TYPE void do_group_exit(int);
1225extern void daemonize(const char *, ...); 1245extern void daemonize(const char *, ...);
1226extern int allow_signal(int); 1246extern int allow_signal(int);
1227extern int disallow_signal(int); 1247extern int disallow_signal(int);
1228extern task_t *child_reaper; 1248extern struct task_struct *child_reaper;
1229 1249
1230extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *); 1250extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
1231extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *); 1251extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
1232task_t *fork_idle(int); 1252struct task_struct *fork_idle(int);
1233 1253
1234extern void set_task_comm(struct task_struct *tsk, char *from); 1254extern void set_task_comm(struct task_struct *tsk, char *from);
1235extern void get_task_comm(char *to, struct task_struct *tsk); 1255extern void get_task_comm(char *to, struct task_struct *tsk);
1236 1256
1237#ifdef CONFIG_SMP 1257#ifdef CONFIG_SMP
1238extern void wait_task_inactive(task_t * p); 1258extern void wait_task_inactive(struct task_struct * p);
1239#else 1259#else
1240#define wait_task_inactive(p) do { } while (0) 1260#define wait_task_inactive(p) do { } while (0)
1241#endif 1261#endif
@@ -1261,13 +1281,13 @@ extern void wait_task_inactive(task_t * p);
1261/* de_thread depends on thread_group_leader not being a pid based check */ 1281/* de_thread depends on thread_group_leader not being a pid based check */
1262#define thread_group_leader(p) (p == p->group_leader) 1282#define thread_group_leader(p) (p == p->group_leader)
1263 1283
1264static inline task_t *next_thread(const task_t *p) 1284static inline struct task_struct *next_thread(const struct task_struct *p)
1265{ 1285{
1266 return list_entry(rcu_dereference(p->thread_group.next), 1286 return list_entry(rcu_dereference(p->thread_group.next),
1267 task_t, thread_group); 1287 struct task_struct, thread_group);
1268} 1288}
1269 1289
1270static inline int thread_group_empty(task_t *p) 1290static inline int thread_group_empty(struct task_struct *p)
1271{ 1291{
1272 return list_empty(&p->thread_group); 1292 return list_empty(&p->thread_group);
1273} 1293}
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 7bc5c7c12b54..46000936f8f1 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -38,9 +38,17 @@ typedef struct {
38 * These macros triggered gcc-3.x compile-time problems. We think these are 38 * These macros triggered gcc-3.x compile-time problems. We think these are
39 * OK now. Be cautious. 39 * OK now. Be cautious.
40 */ 40 */
41#define SEQLOCK_UNLOCKED { 0, SPIN_LOCK_UNLOCKED } 41#define __SEQLOCK_UNLOCKED(lockname) \
42#define seqlock_init(x) do { *(x) = (seqlock_t) SEQLOCK_UNLOCKED; } while (0) 42 { 0, __SPIN_LOCK_UNLOCKED(lockname) }
43 43
44#define SEQLOCK_UNLOCKED \
45 __SEQLOCK_UNLOCKED(old_style_seqlock_init)
46
47#define seqlock_init(x) \
48 do { *(x) = (seqlock_t) __SEQLOCK_UNLOCKED(x); } while (0)
49
50#define DEFINE_SEQLOCK(x) \
51 seqlock_t x = __SEQLOCK_UNLOCKED(x)
44 52
45/* Lock out other writers and update the count. 53/* Lock out other writers and update the count.
46 * Acts like a normal spin_lock/unlock. 54 * Acts like a normal spin_lock/unlock.
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 57d7d4965f9a..3597b4f14389 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -604,9 +604,12 @@ static inline __u32 skb_queue_len(const struct sk_buff_head *list_)
604 return list_->qlen; 604 return list_->qlen;
605} 605}
606 606
607extern struct lock_class_key skb_queue_lock_key;
608
607static inline void skb_queue_head_init(struct sk_buff_head *list) 609static inline void skb_queue_head_init(struct sk_buff_head *list)
608{ 610{
609 spin_lock_init(&list->lock); 611 spin_lock_init(&list->lock);
612 lockdep_set_class(&list->lock, &skb_queue_lock_key);
610 list->prev = list->next = (struct sk_buff *)list; 613 list->prev = list->next = (struct sk_buff *)list;
611 list->qlen = 0; 614 list->qlen = 0;
612} 615}
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index ae23beef9cc9..31473db92d3b 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -82,14 +82,40 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock);
82/* 82/*
83 * Pull the __raw*() functions/declarations (UP-nondebug doesnt need them): 83 * Pull the __raw*() functions/declarations (UP-nondebug doesnt need them):
84 */ 84 */
85#if defined(CONFIG_SMP) 85#ifdef CONFIG_SMP
86# include <asm/spinlock.h> 86# include <asm/spinlock.h>
87#else 87#else
88# include <linux/spinlock_up.h> 88# include <linux/spinlock_up.h>
89#endif 89#endif
90 90
91#define spin_lock_init(lock) do { *(lock) = SPIN_LOCK_UNLOCKED; } while (0) 91#ifdef CONFIG_DEBUG_SPINLOCK
92#define rwlock_init(lock) do { *(lock) = RW_LOCK_UNLOCKED; } while (0) 92 extern void __spin_lock_init(spinlock_t *lock, const char *name,
93 struct lock_class_key *key);
94# define spin_lock_init(lock) \
95do { \
96 static struct lock_class_key __key; \
97 \
98 __spin_lock_init((lock), #lock, &__key); \
99} while (0)
100
101#else
102# define spin_lock_init(lock) \
103 do { *(lock) = SPIN_LOCK_UNLOCKED; } while (0)
104#endif
105
106#ifdef CONFIG_DEBUG_SPINLOCK
107 extern void __rwlock_init(rwlock_t *lock, const char *name,
108 struct lock_class_key *key);
109# define rwlock_init(lock) \
110do { \
111 static struct lock_class_key __key; \
112 \
113 __rwlock_init((lock), #lock, &__key); \
114} while (0)
115#else
116# define rwlock_init(lock) \
117 do { *(lock) = RW_LOCK_UNLOCKED; } while (0)
118#endif
93 119
94#define spin_is_locked(lock) __raw_spin_is_locked(&(lock)->raw_lock) 120#define spin_is_locked(lock) __raw_spin_is_locked(&(lock)->raw_lock)
95 121
@@ -113,7 +139,6 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock);
113#define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock) 139#define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock)
114 extern int _raw_spin_trylock(spinlock_t *lock); 140 extern int _raw_spin_trylock(spinlock_t *lock);
115 extern void _raw_spin_unlock(spinlock_t *lock); 141 extern void _raw_spin_unlock(spinlock_t *lock);
116
117 extern void _raw_read_lock(rwlock_t *lock); 142 extern void _raw_read_lock(rwlock_t *lock);
118 extern int _raw_read_trylock(rwlock_t *lock); 143 extern int _raw_read_trylock(rwlock_t *lock);
119 extern void _raw_read_unlock(rwlock_t *lock); 144 extern void _raw_read_unlock(rwlock_t *lock);
@@ -121,17 +146,17 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock);
121 extern int _raw_write_trylock(rwlock_t *lock); 146 extern int _raw_write_trylock(rwlock_t *lock);
122 extern void _raw_write_unlock(rwlock_t *lock); 147 extern void _raw_write_unlock(rwlock_t *lock);
123#else 148#else
124# define _raw_spin_unlock(lock) __raw_spin_unlock(&(lock)->raw_lock)
125# define _raw_spin_trylock(lock) __raw_spin_trylock(&(lock)->raw_lock)
126# define _raw_spin_lock(lock) __raw_spin_lock(&(lock)->raw_lock) 149# define _raw_spin_lock(lock) __raw_spin_lock(&(lock)->raw_lock)
127# define _raw_spin_lock_flags(lock, flags) \ 150# define _raw_spin_lock_flags(lock, flags) \
128 __raw_spin_lock_flags(&(lock)->raw_lock, *(flags)) 151 __raw_spin_lock_flags(&(lock)->raw_lock, *(flags))
152# define _raw_spin_trylock(lock) __raw_spin_trylock(&(lock)->raw_lock)
153# define _raw_spin_unlock(lock) __raw_spin_unlock(&(lock)->raw_lock)
129# define _raw_read_lock(rwlock) __raw_read_lock(&(rwlock)->raw_lock) 154# define _raw_read_lock(rwlock) __raw_read_lock(&(rwlock)->raw_lock)
130# define _raw_write_lock(rwlock) __raw_write_lock(&(rwlock)->raw_lock)
131# define _raw_read_unlock(rwlock) __raw_read_unlock(&(rwlock)->raw_lock)
132# define _raw_write_unlock(rwlock) __raw_write_unlock(&(rwlock)->raw_lock)
133# define _raw_read_trylock(rwlock) __raw_read_trylock(&(rwlock)->raw_lock) 155# define _raw_read_trylock(rwlock) __raw_read_trylock(&(rwlock)->raw_lock)
156# define _raw_read_unlock(rwlock) __raw_read_unlock(&(rwlock)->raw_lock)
157# define _raw_write_lock(rwlock) __raw_write_lock(&(rwlock)->raw_lock)
134# define _raw_write_trylock(rwlock) __raw_write_trylock(&(rwlock)->raw_lock) 158# define _raw_write_trylock(rwlock) __raw_write_trylock(&(rwlock)->raw_lock)
159# define _raw_write_unlock(rwlock) __raw_write_unlock(&(rwlock)->raw_lock)
135#endif 160#endif
136 161
137#define read_can_lock(rwlock) __raw_read_can_lock(&(rwlock)->raw_lock) 162#define read_can_lock(rwlock) __raw_read_can_lock(&(rwlock)->raw_lock)
@@ -147,6 +172,13 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock);
147#define write_trylock(lock) __cond_lock(_write_trylock(lock)) 172#define write_trylock(lock) __cond_lock(_write_trylock(lock))
148 173
149#define spin_lock(lock) _spin_lock(lock) 174#define spin_lock(lock) _spin_lock(lock)
175
176#ifdef CONFIG_DEBUG_LOCK_ALLOC
177# define spin_lock_nested(lock, subclass) _spin_lock_nested(lock, subclass)
178#else
179# define spin_lock_nested(lock, subclass) _spin_lock(lock)
180#endif
181
150#define write_lock(lock) _write_lock(lock) 182#define write_lock(lock) _write_lock(lock)
151#define read_lock(lock) _read_lock(lock) 183#define read_lock(lock) _read_lock(lock)
152 184
@@ -172,21 +204,18 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock);
172/* 204/*
173 * We inline the unlock functions in the nondebug case: 205 * We inline the unlock functions in the nondebug case:
174 */ 206 */
175#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) 207#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || \
208 !defined(CONFIG_SMP)
176# define spin_unlock(lock) _spin_unlock(lock) 209# define spin_unlock(lock) _spin_unlock(lock)
177# define read_unlock(lock) _read_unlock(lock) 210# define read_unlock(lock) _read_unlock(lock)
178# define write_unlock(lock) _write_unlock(lock) 211# define write_unlock(lock) _write_unlock(lock)
179#else
180# define spin_unlock(lock) __raw_spin_unlock(&(lock)->raw_lock)
181# define read_unlock(lock) __raw_read_unlock(&(lock)->raw_lock)
182# define write_unlock(lock) __raw_write_unlock(&(lock)->raw_lock)
183#endif
184
185#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP)
186# define spin_unlock_irq(lock) _spin_unlock_irq(lock) 212# define spin_unlock_irq(lock) _spin_unlock_irq(lock)
187# define read_unlock_irq(lock) _read_unlock_irq(lock) 213# define read_unlock_irq(lock) _read_unlock_irq(lock)
188# define write_unlock_irq(lock) _write_unlock_irq(lock) 214# define write_unlock_irq(lock) _write_unlock_irq(lock)
189#else 215#else
216# define spin_unlock(lock) __raw_spin_unlock(&(lock)->raw_lock)
217# define read_unlock(lock) __raw_read_unlock(&(lock)->raw_lock)
218# define write_unlock(lock) __raw_write_unlock(&(lock)->raw_lock)
190# define spin_unlock_irq(lock) \ 219# define spin_unlock_irq(lock) \
191 do { __raw_spin_unlock(&(lock)->raw_lock); local_irq_enable(); } while (0) 220 do { __raw_spin_unlock(&(lock)->raw_lock); local_irq_enable(); } while (0)
192# define read_unlock_irq(lock) \ 221# define read_unlock_irq(lock) \
diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index 78e6989ffb54..b2c4f8299464 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -20,6 +20,8 @@ int in_lock_functions(unsigned long addr);
20#define assert_spin_locked(x) BUG_ON(!spin_is_locked(x)) 20#define assert_spin_locked(x) BUG_ON(!spin_is_locked(x))
21 21
22void __lockfunc _spin_lock(spinlock_t *lock) __acquires(spinlock_t); 22void __lockfunc _spin_lock(spinlock_t *lock) __acquires(spinlock_t);
23void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
24 __acquires(spinlock_t);
23void __lockfunc _read_lock(rwlock_t *lock) __acquires(rwlock_t); 25void __lockfunc _read_lock(rwlock_t *lock) __acquires(rwlock_t);
24void __lockfunc _write_lock(rwlock_t *lock) __acquires(rwlock_t); 26void __lockfunc _write_lock(rwlock_t *lock) __acquires(rwlock_t);
25void __lockfunc _spin_lock_bh(spinlock_t *lock) __acquires(spinlock_t); 27void __lockfunc _spin_lock_bh(spinlock_t *lock) __acquires(spinlock_t);
diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h
index cd81cee566f4..67faa044c5f5 100644
--- a/include/linux/spinlock_api_up.h
+++ b/include/linux/spinlock_api_up.h
@@ -49,6 +49,7 @@
49 do { local_irq_restore(flags); __UNLOCK(lock); } while (0) 49 do { local_irq_restore(flags); __UNLOCK(lock); } while (0)
50 50
51#define _spin_lock(lock) __LOCK(lock) 51#define _spin_lock(lock) __LOCK(lock)
52#define _spin_lock_nested(lock, subclass) __LOCK(lock)
52#define _read_lock(lock) __LOCK(lock) 53#define _read_lock(lock) __LOCK(lock)
53#define _write_lock(lock) __LOCK(lock) 54#define _write_lock(lock) __LOCK(lock)
54#define _spin_lock_bh(lock) __LOCK_BH(lock) 55#define _spin_lock_bh(lock) __LOCK_BH(lock)
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index 9cb51e070390..dc5fb69e4de9 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -9,6 +9,8 @@
9 * Released under the General Public License (GPL). 9 * Released under the General Public License (GPL).
10 */ 10 */
11 11
12#include <linux/lockdep.h>
13
12#if defined(CONFIG_SMP) 14#if defined(CONFIG_SMP)
13# include <asm/spinlock_types.h> 15# include <asm/spinlock_types.h>
14#else 16#else
@@ -24,6 +26,9 @@ typedef struct {
24 unsigned int magic, owner_cpu; 26 unsigned int magic, owner_cpu;
25 void *owner; 27 void *owner;
26#endif 28#endif
29#ifdef CONFIG_DEBUG_LOCK_ALLOC
30 struct lockdep_map dep_map;
31#endif
27} spinlock_t; 32} spinlock_t;
28 33
29#define SPINLOCK_MAGIC 0xdead4ead 34#define SPINLOCK_MAGIC 0xdead4ead
@@ -37,31 +42,53 @@ typedef struct {
37 unsigned int magic, owner_cpu; 42 unsigned int magic, owner_cpu;
38 void *owner; 43 void *owner;
39#endif 44#endif
45#ifdef CONFIG_DEBUG_LOCK_ALLOC
46 struct lockdep_map dep_map;
47#endif
40} rwlock_t; 48} rwlock_t;
41 49
42#define RWLOCK_MAGIC 0xdeaf1eed 50#define RWLOCK_MAGIC 0xdeaf1eed
43 51
44#define SPINLOCK_OWNER_INIT ((void *)-1L) 52#define SPINLOCK_OWNER_INIT ((void *)-1L)
45 53
54#ifdef CONFIG_DEBUG_LOCK_ALLOC
55# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
56#else
57# define SPIN_DEP_MAP_INIT(lockname)
58#endif
59
60#ifdef CONFIG_DEBUG_LOCK_ALLOC
61# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
62#else
63# define RW_DEP_MAP_INIT(lockname)
64#endif
65
46#ifdef CONFIG_DEBUG_SPINLOCK 66#ifdef CONFIG_DEBUG_SPINLOCK
47# define SPIN_LOCK_UNLOCKED \ 67# define __SPIN_LOCK_UNLOCKED(lockname) \
48 (spinlock_t) { .raw_lock = __RAW_SPIN_LOCK_UNLOCKED, \ 68 (spinlock_t) { .raw_lock = __RAW_SPIN_LOCK_UNLOCKED, \
49 .magic = SPINLOCK_MAGIC, \ 69 .magic = SPINLOCK_MAGIC, \
50 .owner = SPINLOCK_OWNER_INIT, \ 70 .owner = SPINLOCK_OWNER_INIT, \
51 .owner_cpu = -1 } 71 .owner_cpu = -1, \
52#define RW_LOCK_UNLOCKED \ 72 SPIN_DEP_MAP_INIT(lockname) }
73#define __RW_LOCK_UNLOCKED(lockname) \
53 (rwlock_t) { .raw_lock = __RAW_RW_LOCK_UNLOCKED, \ 74 (rwlock_t) { .raw_lock = __RAW_RW_LOCK_UNLOCKED, \
54 .magic = RWLOCK_MAGIC, \ 75 .magic = RWLOCK_MAGIC, \
55 .owner = SPINLOCK_OWNER_INIT, \ 76 .owner = SPINLOCK_OWNER_INIT, \
56 .owner_cpu = -1 } 77 .owner_cpu = -1, \
78 RW_DEP_MAP_INIT(lockname) }
57#else 79#else
58# define SPIN_LOCK_UNLOCKED \ 80# define __SPIN_LOCK_UNLOCKED(lockname) \
59 (spinlock_t) { .raw_lock = __RAW_SPIN_LOCK_UNLOCKED } 81 (spinlock_t) { .raw_lock = __RAW_SPIN_LOCK_UNLOCKED, \
60#define RW_LOCK_UNLOCKED \ 82 SPIN_DEP_MAP_INIT(lockname) }
61 (rwlock_t) { .raw_lock = __RAW_RW_LOCK_UNLOCKED } 83#define __RW_LOCK_UNLOCKED(lockname) \
84 (rwlock_t) { .raw_lock = __RAW_RW_LOCK_UNLOCKED, \
85 RW_DEP_MAP_INIT(lockname) }
62#endif 86#endif
63 87
64#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED 88#define SPIN_LOCK_UNLOCKED __SPIN_LOCK_UNLOCKED(old_style_spin_init)
65#define DEFINE_RWLOCK(x) rwlock_t x = RW_LOCK_UNLOCKED 89#define RW_LOCK_UNLOCKED __RW_LOCK_UNLOCKED(old_style_rw_init)
90
91#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
92#define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x)
66 93
67#endif /* __LINUX_SPINLOCK_TYPES_H */ 94#endif /* __LINUX_SPINLOCK_TYPES_H */
diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h
index 04135b0e198e..27644af20b7c 100644
--- a/include/linux/spinlock_types_up.h
+++ b/include/linux/spinlock_types_up.h
@@ -12,10 +12,14 @@
12 * Released under the General Public License (GPL). 12 * Released under the General Public License (GPL).
13 */ 13 */
14 14
15#ifdef CONFIG_DEBUG_SPINLOCK 15#if defined(CONFIG_DEBUG_SPINLOCK) || \
16 defined(CONFIG_DEBUG_LOCK_ALLOC)
16 17
17typedef struct { 18typedef struct {
18 volatile unsigned int slock; 19 volatile unsigned int slock;
20#ifdef CONFIG_DEBUG_LOCK_ALLOC
21 struct lockdep_map dep_map;
22#endif
19} raw_spinlock_t; 23} raw_spinlock_t;
20 24
21#define __RAW_SPIN_LOCK_UNLOCKED { 1 } 25#define __RAW_SPIN_LOCK_UNLOCKED { 1 }
@@ -30,6 +34,9 @@ typedef struct { } raw_spinlock_t;
30 34
31typedef struct { 35typedef struct {
32 /* no debug version on UP */ 36 /* no debug version on UP */
37#ifdef CONFIG_DEBUG_LOCK_ALLOC
38 struct lockdep_map dep_map;
39#endif
33} raw_rwlock_t; 40} raw_rwlock_t;
34 41
35#define __RAW_RW_LOCK_UNLOCKED { } 42#define __RAW_RW_LOCK_UNLOCKED { }
diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index 31accf2f0b13..ea54c4c9a4ec 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -18,7 +18,6 @@
18 */ 18 */
19 19
20#ifdef CONFIG_DEBUG_SPINLOCK 20#ifdef CONFIG_DEBUG_SPINLOCK
21
22#define __raw_spin_is_locked(x) ((x)->slock == 0) 21#define __raw_spin_is_locked(x) ((x)->slock == 0)
23 22
24static inline void __raw_spin_lock(raw_spinlock_t *lock) 23static inline void __raw_spin_lock(raw_spinlock_t *lock)
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
new file mode 100644
index 000000000000..9cc81e572224
--- /dev/null
+++ b/include/linux/stacktrace.h
@@ -0,0 +1,20 @@
1#ifndef __LINUX_STACKTRACE_H
2#define __LINUX_STACKTRACE_H
3
4#ifdef CONFIG_STACKTRACE
5struct stack_trace {
6 unsigned int nr_entries, max_entries;
7 unsigned long *entries;
8};
9
10extern void save_stack_trace(struct stack_trace *trace,
11 struct task_struct *task, int all_contexts,
12 unsigned int skip);
13
14extern void print_stack_trace(struct stack_trace *trace, int spaces);
15#else
16# define save_stack_trace(trace, task, all, skip) do { } while (0)
17# define print_stack_trace(trace) do { } while (0)
18#endif
19
20#endif
diff --git a/include/linux/swap.h b/include/linux/swap.h
index cf6ca6e377bd..5e59184c9096 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -189,6 +189,7 @@ extern long vm_total_pages;
189 189
190#ifdef CONFIG_NUMA 190#ifdef CONFIG_NUMA
191extern int zone_reclaim_mode; 191extern int zone_reclaim_mode;
192extern int sysctl_min_unmapped_ratio;
192extern int zone_reclaim(struct zone *, gfp_t, unsigned int); 193extern int zone_reclaim(struct zone *, gfp_t, unsigned int);
193#else 194#else
194#define zone_reclaim_mode 0 195#define zone_reclaim_mode 0
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 46e4d8f2771f..e4b1a4d4dcf3 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -188,7 +188,7 @@ enum
188 VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */ 188 VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */
189 VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */ 189 VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */
190 VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */ 190 VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */
191 VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */ 191 VM_MIN_UNMAPPED=32, /* Set min percent of unmapped pages */
192 VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ 192 VM_PANIC_ON_OOM=33, /* panic at out-of-memory */
193 VM_VDSO_ENABLED=34, /* map VDSO into new processes? */ 193 VM_VDSO_ENABLED=34, /* map VDSO into new processes? */
194}; 194};
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 544e855c7c02..794be7af58ae 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -68,7 +68,7 @@ struct task_struct;
68 wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk) 68 wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk)
69 69
70#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \ 70#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \
71 .lock = SPIN_LOCK_UNLOCKED, \ 71 .lock = __SPIN_LOCK_UNLOCKED(name.lock), \
72 .task_list = { &(name).task_list, &(name).task_list } } 72 .task_list = { &(name).task_list, &(name).task_list } }
73 73
74#define DECLARE_WAIT_QUEUE_HEAD(name) \ 74#define DECLARE_WAIT_QUEUE_HEAD(name) \
@@ -77,9 +77,15 @@ struct task_struct;
77#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \ 77#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \
78 { .flags = word, .bit_nr = bit, } 78 { .flags = word, .bit_nr = bit, }
79 79
80/*
81 * lockdep: we want one lock-class for all waitqueue locks.
82 */
83extern struct lock_class_key waitqueue_lock_key;
84
80static inline void init_waitqueue_head(wait_queue_head_t *q) 85static inline void init_waitqueue_head(wait_queue_head_t *q)
81{ 86{
82 spin_lock_init(&q->lock); 87 spin_lock_init(&q->lock);
88 lockdep_set_class(&q->lock, &waitqueue_lock_key);
83 INIT_LIST_HEAD(&q->task_list); 89 INIT_LIST_HEAD(&q->task_list);
84} 90}
85 91
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 5ba72d95280c..2fec827c8801 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -67,6 +67,9 @@ struct unix_skb_parms {
67#define unix_state_rlock(s) spin_lock(&unix_sk(s)->lock) 67#define unix_state_rlock(s) spin_lock(&unix_sk(s)->lock)
68#define unix_state_runlock(s) spin_unlock(&unix_sk(s)->lock) 68#define unix_state_runlock(s) spin_unlock(&unix_sk(s)->lock)
69#define unix_state_wlock(s) spin_lock(&unix_sk(s)->lock) 69#define unix_state_wlock(s) spin_lock(&unix_sk(s)->lock)
70#define unix_state_wlock_nested(s) \
71 spin_lock_nested(&unix_sk(s)->lock, \
72 SINGLE_DEPTH_NESTING)
70#define unix_state_wunlock(s) spin_unlock(&unix_sk(s)->lock) 73#define unix_state_wunlock(s) spin_unlock(&unix_sk(s)->lock)
71 74
72#ifdef __KERNEL__ 75#ifdef __KERNEL__
diff --git a/include/net/sock.h b/include/net/sock.h
index 7b3d6b856946..324b3ea233d6 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -44,6 +44,7 @@
44#include <linux/timer.h> 44#include <linux/timer.h>
45#include <linux/cache.h> 45#include <linux/cache.h>
46#include <linux/module.h> 46#include <linux/module.h>
47#include <linux/lockdep.h>
47#include <linux/netdevice.h> 48#include <linux/netdevice.h>
48#include <linux/skbuff.h> /* struct sk_buff */ 49#include <linux/skbuff.h> /* struct sk_buff */
49#include <linux/security.h> 50#include <linux/security.h>
@@ -78,14 +79,17 @@ typedef struct {
78 spinlock_t slock; 79 spinlock_t slock;
79 struct sock_iocb *owner; 80 struct sock_iocb *owner;
80 wait_queue_head_t wq; 81 wait_queue_head_t wq;
82 /*
83 * We express the mutex-alike socket_lock semantics
84 * to the lock validator by explicitly managing
85 * the slock as a lock variant (in addition to
86 * the slock itself):
87 */
88#ifdef CONFIG_DEBUG_LOCK_ALLOC
89 struct lockdep_map dep_map;
90#endif
81} socket_lock_t; 91} socket_lock_t;
82 92
83#define sock_lock_init(__sk) \
84do { spin_lock_init(&((__sk)->sk_lock.slock)); \
85 (__sk)->sk_lock.owner = NULL; \
86 init_waitqueue_head(&((__sk)->sk_lock.wq)); \
87} while(0)
88
89struct sock; 93struct sock;
90struct proto; 94struct proto;
91 95
@@ -747,6 +751,9 @@ extern void FASTCALL(release_sock(struct sock *sk));
747 751
748/* BH context may only use the following locking interface. */ 752/* BH context may only use the following locking interface. */
749#define bh_lock_sock(__sk) spin_lock(&((__sk)->sk_lock.slock)) 753#define bh_lock_sock(__sk) spin_lock(&((__sk)->sk_lock.slock))
754#define bh_lock_sock_nested(__sk) \
755 spin_lock_nested(&((__sk)->sk_lock.slock), \
756 SINGLE_DEPTH_NESTING)
750#define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) 757#define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock))
751 758
752extern struct sock *sk_alloc(int family, 759extern struct sock *sk_alloc(int family,
diff --git a/init/main.c b/init/main.c
index b2f3b566790e..628b8e9e841a 100644
--- a/init/main.c
+++ b/init/main.c
@@ -47,6 +47,8 @@
47#include <linux/key.h> 47#include <linux/key.h>
48#include <linux/unwind.h> 48#include <linux/unwind.h>
49#include <linux/buffer_head.h> 49#include <linux/buffer_head.h>
50#include <linux/debug_locks.h>
51#include <linux/lockdep.h>
50 52
51#include <asm/io.h> 53#include <asm/io.h>
52#include <asm/bugs.h> 54#include <asm/bugs.h>
@@ -456,6 +458,16 @@ asmlinkage void __init start_kernel(void)
456 458
457 smp_setup_processor_id(); 459 smp_setup_processor_id();
458 460
461 /*
462 * Need to run as early as possible, to initialize the
463 * lockdep hash:
464 */
465 lockdep_init();
466
467 local_irq_disable();
468 early_boot_irqs_off();
469 early_init_irq_lock_class();
470
459/* 471/*
460 * Interrupts are still disabled. Do necessary setups, then 472 * Interrupts are still disabled. Do necessary setups, then
461 * enable them 473 * enable them
@@ -496,8 +508,13 @@ asmlinkage void __init start_kernel(void)
496 init_timers(); 508 init_timers();
497 hrtimers_init(); 509 hrtimers_init();
498 softirq_init(); 510 softirq_init();
499 time_init();
500 timekeeping_init(); 511 timekeeping_init();
512 time_init();
513 profile_init();
514 if (!irqs_disabled())
515 printk("start_kernel(): bug: interrupts were enabled early\n");
516 early_boot_irqs_on();
517 local_irq_enable();
501 518
502 /* 519 /*
503 * HACK ALERT! This is early. We're enabling the console before 520 * HACK ALERT! This is early. We're enabling the console before
@@ -507,8 +524,16 @@ asmlinkage void __init start_kernel(void)
507 console_init(); 524 console_init();
508 if (panic_later) 525 if (panic_later)
509 panic(panic_later, panic_param); 526 panic(panic_later, panic_param);
510 profile_init(); 527
511 local_irq_enable(); 528 lockdep_info();
529
530 /*
531 * Need to run this when irqs are enabled, because it wants
532 * to self-test [hard/soft]-irqs on/off lock inversion bugs
533 * too:
534 */
535 locking_selftest();
536
512#ifdef CONFIG_BLK_DEV_INITRD 537#ifdef CONFIG_BLK_DEV_INITRD
513 if (initrd_start && !initrd_below_start_ok && 538 if (initrd_start && !initrd_below_start_ok &&
514 initrd_start < min_low_pfn << PAGE_SHIFT) { 539 initrd_start < min_low_pfn << PAGE_SHIFT) {
diff --git a/kernel/Makefile b/kernel/Makefile
index 82fb182f6f61..47dbcd570cd8 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -8,10 +8,15 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
8 signal.o sys.o kmod.o workqueue.o pid.o \ 8 signal.o sys.o kmod.o workqueue.o pid.o \
9 rcupdate.o extable.o params.o posix-timers.o \ 9 rcupdate.o extable.o params.o posix-timers.o \
10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ 10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
11 hrtimer.o 11 hrtimer.o rwsem.o
12 12
13obj-$(CONFIG_STACKTRACE) += stacktrace.o
13obj-y += time/ 14obj-y += time/
14obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o 15obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
16obj-$(CONFIG_LOCKDEP) += lockdep.o
17ifeq ($(CONFIG_PROC_FS),y)
18obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
19endif
15obj-$(CONFIG_FUTEX) += futex.o 20obj-$(CONFIG_FUTEX) += futex.o
16ifeq ($(CONFIG_COMPAT),y) 21ifeq ($(CONFIG_COMPAT),y)
17obj-$(CONFIG_FUTEX) += futex_compat.o 22obj-$(CONFIG_FUTEX) += futex_compat.o
@@ -22,6 +27,7 @@ obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
22obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o 27obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
23obj-$(CONFIG_SMP) += cpu.o spinlock.o 28obj-$(CONFIG_SMP) += cpu.o spinlock.o
24obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o 29obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
30obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
25obj-$(CONFIG_UID16) += uid16.o 31obj-$(CONFIG_UID16) += uid16.o
26obj-$(CONFIG_MODULES) += module.o 32obj-$(CONFIG_MODULES) += module.o
27obj-$(CONFIG_KALLSYMS) += kallsyms.o 33obj-$(CONFIG_KALLSYMS) += kallsyms.o
diff --git a/kernel/capability.c b/kernel/capability.c
index 1a4d8a40d3f9..c7685ad00a97 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -46,7 +46,7 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
46 int ret = 0; 46 int ret = 0;
47 pid_t pid; 47 pid_t pid;
48 __u32 version; 48 __u32 version;
49 task_t *target; 49 struct task_struct *target;
50 struct __user_cap_data_struct data; 50 struct __user_cap_data_struct data;
51 51
52 if (get_user(version, &header->version)) 52 if (get_user(version, &header->version))
@@ -96,7 +96,7 @@ static inline int cap_set_pg(int pgrp, kernel_cap_t *effective,
96 kernel_cap_t *inheritable, 96 kernel_cap_t *inheritable,
97 kernel_cap_t *permitted) 97 kernel_cap_t *permitted)
98{ 98{
99 task_t *g, *target; 99 struct task_struct *g, *target;
100 int ret = -EPERM; 100 int ret = -EPERM;
101 int found = 0; 101 int found = 0;
102 102
@@ -128,7 +128,7 @@ static inline int cap_set_all(kernel_cap_t *effective,
128 kernel_cap_t *inheritable, 128 kernel_cap_t *inheritable,
129 kernel_cap_t *permitted) 129 kernel_cap_t *permitted)
130{ 130{
131 task_t *g, *target; 131 struct task_struct *g, *target;
132 int ret = -EPERM; 132 int ret = -EPERM;
133 int found = 0; 133 int found = 0;
134 134
@@ -172,7 +172,7 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
172{ 172{
173 kernel_cap_t inheritable, permitted, effective; 173 kernel_cap_t inheritable, permitted, effective;
174 __u32 version; 174 __u32 version;
175 task_t *target; 175 struct task_struct *target;
176 int ret; 176 int ret;
177 pid_t pid; 177 pid_t pid;
178 178
diff --git a/kernel/exit.c b/kernel/exit.c
index 7f7ef2258553..6664c084783d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -134,8 +134,8 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
134 134
135void release_task(struct task_struct * p) 135void release_task(struct task_struct * p)
136{ 136{
137 struct task_struct *leader;
137 int zap_leader; 138 int zap_leader;
138 task_t *leader;
139repeat: 139repeat:
140 atomic_dec(&p->user->processes); 140 atomic_dec(&p->user->processes);
141 write_lock_irq(&tasklist_lock); 141 write_lock_irq(&tasklist_lock);
@@ -209,7 +209,7 @@ out:
209 * 209 *
210 * "I ask you, have you ever known what it is to be an orphan?" 210 * "I ask you, have you ever known what it is to be an orphan?"
211 */ 211 */
212static int will_become_orphaned_pgrp(int pgrp, task_t *ignored_task) 212static int will_become_orphaned_pgrp(int pgrp, struct task_struct *ignored_task)
213{ 213{
214 struct task_struct *p; 214 struct task_struct *p;
215 int ret = 1; 215 int ret = 1;
@@ -582,7 +582,8 @@ static void exit_mm(struct task_struct * tsk)
582 mmput(mm); 582 mmput(mm);
583} 583}
584 584
585static inline void choose_new_parent(task_t *p, task_t *reaper) 585static inline void
586choose_new_parent(struct task_struct *p, struct task_struct *reaper)
586{ 587{
587 /* 588 /*
588 * Make sure we're not reparenting to ourselves and that 589 * Make sure we're not reparenting to ourselves and that
@@ -592,7 +593,8 @@ static inline void choose_new_parent(task_t *p, task_t *reaper)
592 p->real_parent = reaper; 593 p->real_parent = reaper;
593} 594}
594 595
595static void reparent_thread(task_t *p, task_t *father, int traced) 596static void
597reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
596{ 598{
597 /* We don't want people slaying init. */ 599 /* We don't want people slaying init. */
598 if (p->exit_signal != -1) 600 if (p->exit_signal != -1)
@@ -656,8 +658,8 @@ static void reparent_thread(task_t *p, task_t *father, int traced)
656 * group, and if no such member exists, give it to 658 * group, and if no such member exists, give it to
657 * the global child reaper process (ie "init") 659 * the global child reaper process (ie "init")
658 */ 660 */
659static void forget_original_parent(struct task_struct * father, 661static void
660 struct list_head *to_release) 662forget_original_parent(struct task_struct *father, struct list_head *to_release)
661{ 663{
662 struct task_struct *p, *reaper = father; 664 struct task_struct *p, *reaper = father;
663 struct list_head *_p, *_n; 665 struct list_head *_p, *_n;
@@ -680,7 +682,7 @@ static void forget_original_parent(struct task_struct * father,
680 */ 682 */
681 list_for_each_safe(_p, _n, &father->children) { 683 list_for_each_safe(_p, _n, &father->children) {
682 int ptrace; 684 int ptrace;
683 p = list_entry(_p,struct task_struct,sibling); 685 p = list_entry(_p, struct task_struct, sibling);
684 686
685 ptrace = p->ptrace; 687 ptrace = p->ptrace;
686 688
@@ -709,7 +711,7 @@ static void forget_original_parent(struct task_struct * father,
709 list_add(&p->ptrace_list, to_release); 711 list_add(&p->ptrace_list, to_release);
710 } 712 }
711 list_for_each_safe(_p, _n, &father->ptrace_children) { 713 list_for_each_safe(_p, _n, &father->ptrace_children) {
712 p = list_entry(_p,struct task_struct,ptrace_list); 714 p = list_entry(_p, struct task_struct, ptrace_list);
713 choose_new_parent(p, reaper); 715 choose_new_parent(p, reaper);
714 reparent_thread(p, father, 1); 716 reparent_thread(p, father, 1);
715 } 717 }
@@ -829,7 +831,7 @@ static void exit_notify(struct task_struct *tsk)
829 831
830 list_for_each_safe(_p, _n, &ptrace_dead) { 832 list_for_each_safe(_p, _n, &ptrace_dead) {
831 list_del_init(_p); 833 list_del_init(_p);
832 t = list_entry(_p,struct task_struct,ptrace_list); 834 t = list_entry(_p, struct task_struct, ptrace_list);
833 release_task(t); 835 release_task(t);
834 } 836 }
835 837
@@ -933,10 +935,9 @@ fastcall NORET_TYPE void do_exit(long code)
933 if (unlikely(current->pi_state_cache)) 935 if (unlikely(current->pi_state_cache))
934 kfree(current->pi_state_cache); 936 kfree(current->pi_state_cache);
935 /* 937 /*
936 * If DEBUG_MUTEXES is on, make sure we are holding no locks: 938 * Make sure we are holding no locks:
937 */ 939 */
938 mutex_debug_check_no_locks_held(tsk); 940 debug_check_no_locks_held(tsk);
939 rt_mutex_debug_check_no_locks_held(tsk);
940 941
941 if (tsk->io_context) 942 if (tsk->io_context)
942 exit_io_context(); 943 exit_io_context();
@@ -1011,7 +1012,7 @@ asmlinkage void sys_exit_group(int error_code)
1011 do_group_exit((error_code & 0xff) << 8); 1012 do_group_exit((error_code & 0xff) << 8);
1012} 1013}
1013 1014
1014static int eligible_child(pid_t pid, int options, task_t *p) 1015static int eligible_child(pid_t pid, int options, struct task_struct *p)
1015{ 1016{
1016 if (pid > 0) { 1017 if (pid > 0) {
1017 if (p->pid != pid) 1018 if (p->pid != pid)
@@ -1052,12 +1053,13 @@ static int eligible_child(pid_t pid, int options, task_t *p)
1052 return 1; 1053 return 1;
1053} 1054}
1054 1055
1055static int wait_noreap_copyout(task_t *p, pid_t pid, uid_t uid, 1056static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid,
1056 int why, int status, 1057 int why, int status,
1057 struct siginfo __user *infop, 1058 struct siginfo __user *infop,
1058 struct rusage __user *rusagep) 1059 struct rusage __user *rusagep)
1059{ 1060{
1060 int retval = rusagep ? getrusage(p, RUSAGE_BOTH, rusagep) : 0; 1061 int retval = rusagep ? getrusage(p, RUSAGE_BOTH, rusagep) : 0;
1062
1061 put_task_struct(p); 1063 put_task_struct(p);
1062 if (!retval) 1064 if (!retval)
1063 retval = put_user(SIGCHLD, &infop->si_signo); 1065 retval = put_user(SIGCHLD, &infop->si_signo);
@@ -1082,7 +1084,7 @@ static int wait_noreap_copyout(task_t *p, pid_t pid, uid_t uid,
1082 * the lock and this task is uninteresting. If we return nonzero, we have 1084 * the lock and this task is uninteresting. If we return nonzero, we have
1083 * released the lock and the system call should return. 1085 * released the lock and the system call should return.
1084 */ 1086 */
1085static int wait_task_zombie(task_t *p, int noreap, 1087static int wait_task_zombie(struct task_struct *p, int noreap,
1086 struct siginfo __user *infop, 1088 struct siginfo __user *infop,
1087 int __user *stat_addr, struct rusage __user *ru) 1089 int __user *stat_addr, struct rusage __user *ru)
1088{ 1090{
@@ -1244,8 +1246,8 @@ static int wait_task_zombie(task_t *p, int noreap,
1244 * the lock and this task is uninteresting. If we return nonzero, we have 1246 * the lock and this task is uninteresting. If we return nonzero, we have
1245 * released the lock and the system call should return. 1247 * released the lock and the system call should return.
1246 */ 1248 */
1247static int wait_task_stopped(task_t *p, int delayed_group_leader, int noreap, 1249static int wait_task_stopped(struct task_struct *p, int delayed_group_leader,
1248 struct siginfo __user *infop, 1250 int noreap, struct siginfo __user *infop,
1249 int __user *stat_addr, struct rusage __user *ru) 1251 int __user *stat_addr, struct rusage __user *ru)
1250{ 1252{
1251 int retval, exit_code; 1253 int retval, exit_code;
@@ -1359,7 +1361,7 @@ bail_ref:
1359 * the lock and this task is uninteresting. If we return nonzero, we have 1361 * the lock and this task is uninteresting. If we return nonzero, we have
1360 * released the lock and the system call should return. 1362 * released the lock and the system call should return.
1361 */ 1363 */
1362static int wait_task_continued(task_t *p, int noreap, 1364static int wait_task_continued(struct task_struct *p, int noreap,
1363 struct siginfo __user *infop, 1365 struct siginfo __user *infop,
1364 int __user *stat_addr, struct rusage __user *ru) 1366 int __user *stat_addr, struct rusage __user *ru)
1365{ 1367{
@@ -1445,7 +1447,7 @@ repeat:
1445 int ret; 1447 int ret;
1446 1448
1447 list_for_each(_p,&tsk->children) { 1449 list_for_each(_p,&tsk->children) {
1448 p = list_entry(_p,struct task_struct,sibling); 1450 p = list_entry(_p, struct task_struct, sibling);
1449 1451
1450 ret = eligible_child(pid, options, p); 1452 ret = eligible_child(pid, options, p);
1451 if (!ret) 1453 if (!ret)
diff --git a/kernel/fork.c b/kernel/fork.c
index 9064bf9e131b..56e4e07e45f7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -193,7 +193,10 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
193 193
194 down_write(&oldmm->mmap_sem); 194 down_write(&oldmm->mmap_sem);
195 flush_cache_mm(oldmm); 195 flush_cache_mm(oldmm);
196 down_write(&mm->mmap_sem); 196 /*
197 * Not linked in yet - no deadlock potential:
198 */
199 down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
197 200
198 mm->locked_vm = 0; 201 mm->locked_vm = 0;
199 mm->mmap = NULL; 202 mm->mmap = NULL;
@@ -919,10 +922,6 @@ static inline void rt_mutex_init_task(struct task_struct *p)
919 spin_lock_init(&p->pi_lock); 922 spin_lock_init(&p->pi_lock);
920 plist_head_init(&p->pi_waiters, &p->pi_lock); 923 plist_head_init(&p->pi_waiters, &p->pi_lock);
921 p->pi_blocked_on = NULL; 924 p->pi_blocked_on = NULL;
922# ifdef CONFIG_DEBUG_RT_MUTEXES
923 spin_lock_init(&p->held_list_lock);
924 INIT_LIST_HEAD(&p->held_list_head);
925# endif
926#endif 925#endif
927} 926}
928 927
@@ -934,13 +933,13 @@ static inline void rt_mutex_init_task(struct task_struct *p)
934 * parts of the process environment (as per the clone 933 * parts of the process environment (as per the clone
935 * flags). The actual kick-off is left to the caller. 934 * flags). The actual kick-off is left to the caller.
936 */ 935 */
937static task_t *copy_process(unsigned long clone_flags, 936static struct task_struct *copy_process(unsigned long clone_flags,
938 unsigned long stack_start, 937 unsigned long stack_start,
939 struct pt_regs *regs, 938 struct pt_regs *regs,
940 unsigned long stack_size, 939 unsigned long stack_size,
941 int __user *parent_tidptr, 940 int __user *parent_tidptr,
942 int __user *child_tidptr, 941 int __user *child_tidptr,
943 int pid) 942 int pid)
944{ 943{
945 int retval; 944 int retval;
946 struct task_struct *p = NULL; 945 struct task_struct *p = NULL;
@@ -972,6 +971,10 @@ static task_t *copy_process(unsigned long clone_flags,
972 if (!p) 971 if (!p)
973 goto fork_out; 972 goto fork_out;
974 973
974#ifdef CONFIG_TRACE_IRQFLAGS
975 DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
976 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
977#endif
975 retval = -EAGAIN; 978 retval = -EAGAIN;
976 if (atomic_read(&p->user->processes) >= 979 if (atomic_read(&p->user->processes) >=
977 p->signal->rlim[RLIMIT_NPROC].rlim_cur) { 980 p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
@@ -1046,6 +1049,26 @@ static task_t *copy_process(unsigned long clone_flags,
1046 } 1049 }
1047 mpol_fix_fork_child_flag(p); 1050 mpol_fix_fork_child_flag(p);
1048#endif 1051#endif
1052#ifdef CONFIG_TRACE_IRQFLAGS
1053 p->irq_events = 0;
1054 p->hardirqs_enabled = 0;
1055 p->hardirq_enable_ip = 0;
1056 p->hardirq_enable_event = 0;
1057 p->hardirq_disable_ip = _THIS_IP_;
1058 p->hardirq_disable_event = 0;
1059 p->softirqs_enabled = 1;
1060 p->softirq_enable_ip = _THIS_IP_;
1061 p->softirq_enable_event = 0;
1062 p->softirq_disable_ip = 0;
1063 p->softirq_disable_event = 0;
1064 p->hardirq_context = 0;
1065 p->softirq_context = 0;
1066#endif
1067#ifdef CONFIG_LOCKDEP
1068 p->lockdep_depth = 0; /* no locks held yet */
1069 p->curr_chain_key = 0;
1070 p->lockdep_recursion = 0;
1071#endif
1049 1072
1050 rt_mutex_init_task(p); 1073 rt_mutex_init_task(p);
1051 1074
@@ -1271,9 +1294,9 @@ struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
1271 return regs; 1294 return regs;
1272} 1295}
1273 1296
1274task_t * __devinit fork_idle(int cpu) 1297struct task_struct * __devinit fork_idle(int cpu)
1275{ 1298{
1276 task_t *task; 1299 struct task_struct *task;
1277 struct pt_regs regs; 1300 struct pt_regs regs;
1278 1301
1279 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0); 1302 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0);
diff --git a/kernel/futex.c b/kernel/futex.c
index 15caf93e4a43..1dc98e4dd287 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -607,6 +607,22 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
607} 607}
608 608
609/* 609/*
610 * Express the locking dependencies for lockdep:
611 */
612static inline void
613double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
614{
615 if (hb1 <= hb2) {
616 spin_lock(&hb1->lock);
617 if (hb1 < hb2)
618 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
619 } else { /* hb1 > hb2 */
620 spin_lock(&hb2->lock);
621 spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
622 }
623}
624
625/*
610 * Wake up all waiters hashed on the physical page that is mapped 626 * Wake up all waiters hashed on the physical page that is mapped
611 * to this virtual address: 627 * to this virtual address:
612 */ 628 */
@@ -674,11 +690,7 @@ retryfull:
674 hb2 = hash_futex(&key2); 690 hb2 = hash_futex(&key2);
675 691
676retry: 692retry:
677 if (hb1 < hb2) 693 double_lock_hb(hb1, hb2);
678 spin_lock(&hb1->lock);
679 spin_lock(&hb2->lock);
680 if (hb1 > hb2)
681 spin_lock(&hb1->lock);
682 694
683 op_ret = futex_atomic_op_inuser(op, uaddr2); 695 op_ret = futex_atomic_op_inuser(op, uaddr2);
684 if (unlikely(op_ret < 0)) { 696 if (unlikely(op_ret < 0)) {
@@ -787,11 +799,7 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2,
787 hb1 = hash_futex(&key1); 799 hb1 = hash_futex(&key1);
788 hb2 = hash_futex(&key2); 800 hb2 = hash_futex(&key2);
789 801
790 if (hb1 < hb2) 802 double_lock_hb(hb1, hb2);
791 spin_lock(&hb1->lock);
792 spin_lock(&hb2->lock);
793 if (hb1 > hb2)
794 spin_lock(&hb1->lock);
795 803
796 if (likely(cmpval != NULL)) { 804 if (likely(cmpval != NULL)) {
797 u32 curval; 805 u32 curval;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 8d3dc29ef41a..d17766d40dab 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -669,7 +669,7 @@ static int hrtimer_wakeup(struct hrtimer *timer)
669 return HRTIMER_NORESTART; 669 return HRTIMER_NORESTART;
670} 670}
671 671
672void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, task_t *task) 672void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
673{ 673{
674 sl->timer.function = hrtimer_wakeup; 674 sl->timer.function = hrtimer_wakeup;
675 sl->task = task; 675 sl->task = task;
@@ -782,8 +782,10 @@ static void __devinit init_hrtimers_cpu(int cpu)
782 struct hrtimer_base *base = per_cpu(hrtimer_bases, cpu); 782 struct hrtimer_base *base = per_cpu(hrtimer_bases, cpu);
783 int i; 783 int i;
784 784
785 for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) 785 for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) {
786 spin_lock_init(&base->lock); 786 spin_lock_init(&base->lock);
787 lockdep_set_class(&base->lock, &base->lock_key);
788 }
787} 789}
788 790
789#ifdef CONFIG_HOTPLUG_CPU 791#ifdef CONFIG_HOTPLUG_CPU
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 6d8b30114961..fc4e906aedbd 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -16,10 +16,6 @@
16#include <linux/interrupt.h> 16#include <linux/interrupt.h>
17#include <linux/kernel_stat.h> 17#include <linux/kernel_stat.h>
18 18
19#if defined(CONFIG_NO_IDLE_HZ) && defined(CONFIG_ARM)
20#include <asm/dyntick.h>
21#endif
22
23#include "internals.h" 19#include "internals.h"
24 20
25/** 21/**
@@ -133,17 +129,10 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
133 irqreturn_t ret, retval = IRQ_NONE; 129 irqreturn_t ret, retval = IRQ_NONE;
134 unsigned int status = 0; 130 unsigned int status = 0;
135 131
136#if defined(CONFIG_NO_IDLE_HZ) && defined(CONFIG_ARM) 132 handle_dynamic_tick(action);
137 if (!(action->flags & SA_TIMER) && system_timer->dyn_tick != NULL) {
138 write_seqlock(&xtime_lock);
139 if (system_timer->dyn_tick->state & DYN_TICK_ENABLED)
140 system_timer->dyn_tick->handler(irq, 0, regs);
141 write_sequnlock(&xtime_lock);
142 }
143#endif
144 133
145 if (!(action->flags & IRQF_DISABLED)) 134 if (!(action->flags & IRQF_DISABLED))
146 local_irq_enable(); 135 local_irq_enable_in_hardirq();
147 136
148 do { 137 do {
149 ret = action->handler(irq, action->dev_id, regs); 138 ret = action->handler(irq, action->dev_id, regs);
@@ -260,3 +249,19 @@ out:
260 return 1; 249 return 1;
261} 250}
262 251
252#ifdef CONFIG_TRACE_IRQFLAGS
253
254/*
255 * lockdep: we want to handle all irq_desc locks as a single lock-class:
256 */
257static struct lock_class_key irq_desc_lock_class;
258
259void early_init_irq_lock_class(void)
260{
261 int i;
262
263 for (i = 0; i < NR_IRQS; i++)
264 lockdep_set_class(&irq_desc[i].lock, &irq_desc_lock_class);
265}
266
267#endif
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index fede5fa351df..4e461438e48b 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -234,7 +234,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
234 ((old->flags ^ new->flags) & IRQF_TRIGGER_MASK)) 234 ((old->flags ^ new->flags) & IRQF_TRIGGER_MASK))
235 goto mismatch; 235 goto mismatch;
236 236
237#if defined(CONFIG_IRQ_PER_CPU) && defined(IRQF_PERCPU) 237#if defined(CONFIG_IRQ_PER_CPU)
238 /* All handlers must agree on per-cpuness */ 238 /* All handlers must agree on per-cpuness */
239 if ((old->flags & IRQF_PERCPU) != 239 if ((old->flags & IRQF_PERCPU) !=
240 (new->flags & IRQF_PERCPU)) 240 (new->flags & IRQF_PERCPU))
@@ -250,7 +250,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
250 } 250 }
251 251
252 *p = new; 252 *p = new;
253#if defined(CONFIG_IRQ_PER_CPU) && defined(IRQF_PERCPU) 253#if defined(CONFIG_IRQ_PER_CPU)
254 if (new->flags & IRQF_PERCPU) 254 if (new->flags & IRQF_PERCPU)
255 desc->status |= IRQ_PER_CPU; 255 desc->status |= IRQ_PER_CPU;
256#endif 256#endif
@@ -410,6 +410,12 @@ int request_irq(unsigned int irq,
410 struct irqaction *action; 410 struct irqaction *action;
411 int retval; 411 int retval;
412 412
413#ifdef CONFIG_LOCKDEP
414 /*
415 * Lockdep wants atomic interrupt handlers:
416 */
417 irqflags |= SA_INTERRUPT;
418#endif
413 /* 419 /*
414 * Sanity-check: shared interrupts must pass in a real dev-ID, 420 * Sanity-check: shared interrupts must pass in a real dev-ID,
415 * otherwise we'll have trouble later trying to figure out 421 * otherwise we'll have trouble later trying to figure out
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 1b7157af051c..1d32defa38ab 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -233,7 +233,7 @@ static void __call_usermodehelper(void *data)
233int call_usermodehelper_keys(char *path, char **argv, char **envp, 233int call_usermodehelper_keys(char *path, char **argv, char **envp,
234 struct key *session_keyring, int wait) 234 struct key *session_keyring, int wait)
235{ 235{
236 DECLARE_COMPLETION(done); 236 DECLARE_COMPLETION_ONSTACK(done);
237 struct subprocess_info sub_info = { 237 struct subprocess_info sub_info = {
238 .complete = &done, 238 .complete = &done,
239 .path = path, 239 .path = path,
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
new file mode 100644
index 000000000000..f32ca78c198d
--- /dev/null
+++ b/kernel/lockdep.c
@@ -0,0 +1,2702 @@
1/*
2 * kernel/lockdep.c
3 *
4 * Runtime locking correctness validator
5 *
6 * Started by Ingo Molnar:
7 *
8 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
9 *
10 * this code maps all the lock dependencies as they occur in a live kernel
11 * and will warn about the following classes of locking bugs:
12 *
13 * - lock inversion scenarios
14 * - circular lock dependencies
15 * - hardirq/softirq safe/unsafe locking bugs
16 *
17 * Bugs are reported even if the current locking scenario does not cause
18 * any deadlock at this point.
19 *
20 * I.e. if anytime in the past two locks were taken in a different order,
21 * even if it happened for another task, even if those were different
22 * locks (but of the same class as this lock), this code will detect it.
23 *
24 * Thanks to Arjan van de Ven for coming up with the initial idea of
25 * mapping lock dependencies runtime.
26 */
27#include <linux/mutex.h>
28#include <linux/sched.h>
29#include <linux/delay.h>
30#include <linux/module.h>
31#include <linux/proc_fs.h>
32#include <linux/seq_file.h>
33#include <linux/spinlock.h>
34#include <linux/kallsyms.h>
35#include <linux/interrupt.h>
36#include <linux/stacktrace.h>
37#include <linux/debug_locks.h>
38#include <linux/irqflags.h>
39
40#include <asm/sections.h>
41
42#include "lockdep_internals.h"
43
44/*
45 * hash_lock: protects the lockdep hashes and class/list/hash allocators.
46 *
47 * This is one of the rare exceptions where it's justified
48 * to use a raw spinlock - we really dont want the spinlock
49 * code to recurse back into the lockdep code.
50 */
51static raw_spinlock_t hash_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
52
53static int lockdep_initialized;
54
55unsigned long nr_list_entries;
56static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
57
58/*
59 * Allocate a lockdep entry. (assumes hash_lock held, returns
60 * with NULL on failure)
61 */
62static struct lock_list *alloc_list_entry(void)
63{
64 if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) {
65 __raw_spin_unlock(&hash_lock);
66 debug_locks_off();
67 printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n");
68 printk("turning off the locking correctness validator.\n");
69 return NULL;
70 }
71 return list_entries + nr_list_entries++;
72}
73
74/*
75 * All data structures here are protected by the global debug_lock.
76 *
77 * Mutex key structs only get allocated, once during bootup, and never
78 * get freed - this significantly simplifies the debugging code.
79 */
80unsigned long nr_lock_classes;
81static struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
82
83/*
84 * We keep a global list of all lock classes. The list only grows,
85 * never shrinks. The list is only accessed with the lockdep
86 * spinlock lock held.
87 */
88LIST_HEAD(all_lock_classes);
89
90/*
91 * The lockdep classes are in a hash-table as well, for fast lookup:
92 */
93#define CLASSHASH_BITS (MAX_LOCKDEP_KEYS_BITS - 1)
94#define CLASSHASH_SIZE (1UL << CLASSHASH_BITS)
95#define CLASSHASH_MASK (CLASSHASH_SIZE - 1)
96#define __classhashfn(key) ((((unsigned long)key >> CLASSHASH_BITS) + (unsigned long)key) & CLASSHASH_MASK)
97#define classhashentry(key) (classhash_table + __classhashfn((key)))
98
99static struct list_head classhash_table[CLASSHASH_SIZE];
100
101unsigned long nr_lock_chains;
102static struct lock_chain lock_chains[MAX_LOCKDEP_CHAINS];
103
104/*
105 * We put the lock dependency chains into a hash-table as well, to cache
106 * their existence:
107 */
108#define CHAINHASH_BITS (MAX_LOCKDEP_CHAINS_BITS-1)
109#define CHAINHASH_SIZE (1UL << CHAINHASH_BITS)
110#define CHAINHASH_MASK (CHAINHASH_SIZE - 1)
111#define __chainhashfn(chain) \
112 (((chain >> CHAINHASH_BITS) + chain) & CHAINHASH_MASK)
113#define chainhashentry(chain) (chainhash_table + __chainhashfn((chain)))
114
115static struct list_head chainhash_table[CHAINHASH_SIZE];
116
117/*
118 * The hash key of the lock dependency chains is a hash itself too:
119 * it's a hash of all locks taken up to that lock, including that lock.
120 * It's a 64-bit hash, because it's important for the keys to be
121 * unique.
122 */
123#define iterate_chain_key(key1, key2) \
124 (((key1) << MAX_LOCKDEP_KEYS_BITS/2) ^ \
125 ((key1) >> (64-MAX_LOCKDEP_KEYS_BITS/2)) ^ \
126 (key2))
127
128void lockdep_off(void)
129{
130 current->lockdep_recursion++;
131}
132
133EXPORT_SYMBOL(lockdep_off);
134
135void lockdep_on(void)
136{
137 current->lockdep_recursion--;
138}
139
140EXPORT_SYMBOL(lockdep_on);
141
142int lockdep_internal(void)
143{
144 return current->lockdep_recursion != 0;
145}
146
147EXPORT_SYMBOL(lockdep_internal);
148
149/*
150 * Debugging switches:
151 */
152
153#define VERBOSE 0
154#ifdef VERBOSE
155# define VERY_VERBOSE 0
156#endif
157
158#if VERBOSE
159# define HARDIRQ_VERBOSE 1
160# define SOFTIRQ_VERBOSE 1
161#else
162# define HARDIRQ_VERBOSE 0
163# define SOFTIRQ_VERBOSE 0
164#endif
165
166#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE
167/*
168 * Quick filtering for interesting events:
169 */
170static int class_filter(struct lock_class *class)
171{
172 if (class->name_version == 1 &&
173 !strcmp(class->name, "&rl->lock"))
174 return 1;
175 if (class->name_version == 1 &&
176 !strcmp(class->name, "&ni->mrec_lock"))
177 return 1;
178 if (class->name_version == 1 &&
179 !strcmp(class->name, "mft_ni_runlist_lock"))
180 return 1;
181 if (class->name_version == 1 &&
182 !strcmp(class->name, "mft_ni_mrec_lock"))
183 return 1;
184 if (class->name_version == 1 &&
185 !strcmp(class->name, "&vol->lcnbmp_lock"))
186 return 1;
187 return 0;
188}
189#endif
190
191static int verbose(struct lock_class *class)
192{
193#if VERBOSE
194 return class_filter(class);
195#endif
196 return 0;
197}
198
199#ifdef CONFIG_TRACE_IRQFLAGS
200
201static int hardirq_verbose(struct lock_class *class)
202{
203#if HARDIRQ_VERBOSE
204 return class_filter(class);
205#endif
206 return 0;
207}
208
209static int softirq_verbose(struct lock_class *class)
210{
211#if SOFTIRQ_VERBOSE
212 return class_filter(class);
213#endif
214 return 0;
215}
216
217#endif
218
219/*
220 * Stack-trace: tightly packed array of stack backtrace
221 * addresses. Protected by the hash_lock.
222 */
223unsigned long nr_stack_trace_entries;
224static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES];
225
226static int save_trace(struct stack_trace *trace)
227{
228 trace->nr_entries = 0;
229 trace->max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries;
230 trace->entries = stack_trace + nr_stack_trace_entries;
231
232 save_stack_trace(trace, NULL, 0, 3);
233
234 trace->max_entries = trace->nr_entries;
235
236 nr_stack_trace_entries += trace->nr_entries;
237 if (DEBUG_LOCKS_WARN_ON(nr_stack_trace_entries > MAX_STACK_TRACE_ENTRIES))
238 return 0;
239
240 if (nr_stack_trace_entries == MAX_STACK_TRACE_ENTRIES) {
241 __raw_spin_unlock(&hash_lock);
242 if (debug_locks_off()) {
243 printk("BUG: MAX_STACK_TRACE_ENTRIES too low!\n");
244 printk("turning off the locking correctness validator.\n");
245 dump_stack();
246 }
247 return 0;
248 }
249
250 return 1;
251}
252
253unsigned int nr_hardirq_chains;
254unsigned int nr_softirq_chains;
255unsigned int nr_process_chains;
256unsigned int max_lockdep_depth;
257unsigned int max_recursion_depth;
258
259#ifdef CONFIG_DEBUG_LOCKDEP
260/*
261 * We cannot printk in early bootup code. Not even early_printk()
262 * might work. So we mark any initialization errors and printk
263 * about it later on, in lockdep_info().
264 */
265static int lockdep_init_error;
266
267/*
268 * Various lockdep statistics:
269 */
270atomic_t chain_lookup_hits;
271atomic_t chain_lookup_misses;
272atomic_t hardirqs_on_events;
273atomic_t hardirqs_off_events;
274atomic_t redundant_hardirqs_on;
275atomic_t redundant_hardirqs_off;
276atomic_t softirqs_on_events;
277atomic_t softirqs_off_events;
278atomic_t redundant_softirqs_on;
279atomic_t redundant_softirqs_off;
280atomic_t nr_unused_locks;
281atomic_t nr_cyclic_checks;
282atomic_t nr_cyclic_check_recursions;
283atomic_t nr_find_usage_forwards_checks;
284atomic_t nr_find_usage_forwards_recursions;
285atomic_t nr_find_usage_backwards_checks;
286atomic_t nr_find_usage_backwards_recursions;
287# define debug_atomic_inc(ptr) atomic_inc(ptr)
288# define debug_atomic_dec(ptr) atomic_dec(ptr)
289# define debug_atomic_read(ptr) atomic_read(ptr)
290#else
291# define debug_atomic_inc(ptr) do { } while (0)
292# define debug_atomic_dec(ptr) do { } while (0)
293# define debug_atomic_read(ptr) 0
294#endif
295
296/*
297 * Locking printouts:
298 */
299
300static const char *usage_str[] =
301{
302 [LOCK_USED] = "initial-use ",
303 [LOCK_USED_IN_HARDIRQ] = "in-hardirq-W",
304 [LOCK_USED_IN_SOFTIRQ] = "in-softirq-W",
305 [LOCK_ENABLED_SOFTIRQS] = "softirq-on-W",
306 [LOCK_ENABLED_HARDIRQS] = "hardirq-on-W",
307 [LOCK_USED_IN_HARDIRQ_READ] = "in-hardirq-R",
308 [LOCK_USED_IN_SOFTIRQ_READ] = "in-softirq-R",
309 [LOCK_ENABLED_SOFTIRQS_READ] = "softirq-on-R",
310 [LOCK_ENABLED_HARDIRQS_READ] = "hardirq-on-R",
311};
312
313const char * __get_key_name(struct lockdep_subclass_key *key, char *str)
314{
315 unsigned long offs, size;
316 char *modname;
317
318 return kallsyms_lookup((unsigned long)key, &size, &offs, &modname, str);
319}
320
321void
322get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4)
323{
324 *c1 = '.', *c2 = '.', *c3 = '.', *c4 = '.';
325
326 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ)
327 *c1 = '+';
328 else
329 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS)
330 *c1 = '-';
331
332 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ)
333 *c2 = '+';
334 else
335 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS)
336 *c2 = '-';
337
338 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ)
339 *c3 = '-';
340 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ) {
341 *c3 = '+';
342 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ)
343 *c3 = '?';
344 }
345
346 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ)
347 *c4 = '-';
348 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ) {
349 *c4 = '+';
350 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ)
351 *c4 = '?';
352 }
353}
354
355static void print_lock_name(struct lock_class *class)
356{
357 char str[128], c1, c2, c3, c4;
358 const char *name;
359
360 get_usage_chars(class, &c1, &c2, &c3, &c4);
361
362 name = class->name;
363 if (!name) {
364 name = __get_key_name(class->key, str);
365 printk(" (%s", name);
366 } else {
367 printk(" (%s", name);
368 if (class->name_version > 1)
369 printk("#%d", class->name_version);
370 if (class->subclass)
371 printk("/%d", class->subclass);
372 }
373 printk("){%c%c%c%c}", c1, c2, c3, c4);
374}
375
376static void print_lockdep_cache(struct lockdep_map *lock)
377{
378 const char *name;
379 char str[128];
380
381 name = lock->name;
382 if (!name)
383 name = __get_key_name(lock->key->subkeys, str);
384
385 printk("%s", name);
386}
387
388static void print_lock(struct held_lock *hlock)
389{
390 print_lock_name(hlock->class);
391 printk(", at: ");
392 print_ip_sym(hlock->acquire_ip);
393}
394
395static void lockdep_print_held_locks(struct task_struct *curr)
396{
397 int i, depth = curr->lockdep_depth;
398
399 if (!depth) {
400 printk("no locks held by %s/%d.\n", curr->comm, curr->pid);
401 return;
402 }
403 printk("%d lock%s held by %s/%d:\n",
404 depth, depth > 1 ? "s" : "", curr->comm, curr->pid);
405
406 for (i = 0; i < depth; i++) {
407 printk(" #%d: ", i);
408 print_lock(curr->held_locks + i);
409 }
410}
411/*
412 * Helper to print a nice hierarchy of lock dependencies:
413 */
414static void print_spaces(int nr)
415{
416 int i;
417
418 for (i = 0; i < nr; i++)
419 printk(" ");
420}
421
422static void print_lock_class_header(struct lock_class *class, int depth)
423{
424 int bit;
425
426 print_spaces(depth);
427 printk("->");
428 print_lock_name(class);
429 printk(" ops: %lu", class->ops);
430 printk(" {\n");
431
432 for (bit = 0; bit < LOCK_USAGE_STATES; bit++) {
433 if (class->usage_mask & (1 << bit)) {
434 int len = depth;
435
436 print_spaces(depth);
437 len += printk(" %s", usage_str[bit]);
438 len += printk(" at:\n");
439 print_stack_trace(class->usage_traces + bit, len);
440 }
441 }
442 print_spaces(depth);
443 printk(" }\n");
444
445 print_spaces(depth);
446 printk(" ... key at: ");
447 print_ip_sym((unsigned long)class->key);
448}
449
450/*
451 * printk all lock dependencies starting at <entry>:
452 */
453static void print_lock_dependencies(struct lock_class *class, int depth)
454{
455 struct lock_list *entry;
456
457 if (DEBUG_LOCKS_WARN_ON(depth >= 20))
458 return;
459
460 print_lock_class_header(class, depth);
461
462 list_for_each_entry(entry, &class->locks_after, entry) {
463 DEBUG_LOCKS_WARN_ON(!entry->class);
464 print_lock_dependencies(entry->class, depth + 1);
465
466 print_spaces(depth);
467 printk(" ... acquired at:\n");
468 print_stack_trace(&entry->trace, 2);
469 printk("\n");
470 }
471}
472
473/*
474 * Add a new dependency to the head of the list:
475 */
476static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
477 struct list_head *head, unsigned long ip)
478{
479 struct lock_list *entry;
480 /*
481 * Lock not present yet - get a new dependency struct and
482 * add it to the list:
483 */
484 entry = alloc_list_entry();
485 if (!entry)
486 return 0;
487
488 entry->class = this;
489 save_trace(&entry->trace);
490
491 /*
492 * Since we never remove from the dependency list, the list can
493 * be walked lockless by other CPUs, it's only allocation
494 * that must be protected by the spinlock. But this also means
495 * we must make new entries visible only once writes to the
496 * entry become visible - hence the RCU op:
497 */
498 list_add_tail_rcu(&entry->entry, head);
499
500 return 1;
501}
502
503/*
504 * Recursive, forwards-direction lock-dependency checking, used for
505 * both noncyclic checking and for hardirq-unsafe/softirq-unsafe
506 * checking.
507 *
508 * (to keep the stackframe of the recursive functions small we
509 * use these global variables, and we also mark various helper
510 * functions as noinline.)
511 */
512static struct held_lock *check_source, *check_target;
513
514/*
515 * Print a dependency chain entry (this is only done when a deadlock
516 * has been detected):
517 */
518static noinline int
519print_circular_bug_entry(struct lock_list *target, unsigned int depth)
520{
521 if (debug_locks_silent)
522 return 0;
523 printk("\n-> #%u", depth);
524 print_lock_name(target->class);
525 printk(":\n");
526 print_stack_trace(&target->trace, 6);
527
528 return 0;
529}
530
531/*
532 * When a circular dependency is detected, print the
533 * header first:
534 */
535static noinline int
536print_circular_bug_header(struct lock_list *entry, unsigned int depth)
537{
538 struct task_struct *curr = current;
539
540 __raw_spin_unlock(&hash_lock);
541 debug_locks_off();
542 if (debug_locks_silent)
543 return 0;
544
545 printk("\n=======================================================\n");
546 printk( "[ INFO: possible circular locking dependency detected ]\n");
547 printk( "-------------------------------------------------------\n");
548 printk("%s/%d is trying to acquire lock:\n",
549 curr->comm, curr->pid);
550 print_lock(check_source);
551 printk("\nbut task is already holding lock:\n");
552 print_lock(check_target);
553 printk("\nwhich lock already depends on the new lock.\n\n");
554 printk("\nthe existing dependency chain (in reverse order) is:\n");
555
556 print_circular_bug_entry(entry, depth);
557
558 return 0;
559}
560
561static noinline int print_circular_bug_tail(void)
562{
563 struct task_struct *curr = current;
564 struct lock_list this;
565
566 if (debug_locks_silent)
567 return 0;
568
569 this.class = check_source->class;
570 save_trace(&this.trace);
571 print_circular_bug_entry(&this, 0);
572
573 printk("\nother info that might help us debug this:\n\n");
574 lockdep_print_held_locks(curr);
575
576 printk("\nstack backtrace:\n");
577 dump_stack();
578
579 return 0;
580}
581
582static int noinline print_infinite_recursion_bug(void)
583{
584 __raw_spin_unlock(&hash_lock);
585 DEBUG_LOCKS_WARN_ON(1);
586
587 return 0;
588}
589
590/*
591 * Prove that the dependency graph starting at <entry> can not
592 * lead to <target>. Print an error and return 0 if it does.
593 */
594static noinline int
595check_noncircular(struct lock_class *source, unsigned int depth)
596{
597 struct lock_list *entry;
598
599 debug_atomic_inc(&nr_cyclic_check_recursions);
600 if (depth > max_recursion_depth)
601 max_recursion_depth = depth;
602 if (depth >= 20)
603 return print_infinite_recursion_bug();
604 /*
605 * Check this lock's dependency list:
606 */
607 list_for_each_entry(entry, &source->locks_after, entry) {
608 if (entry->class == check_target->class)
609 return print_circular_bug_header(entry, depth+1);
610 debug_atomic_inc(&nr_cyclic_checks);
611 if (!check_noncircular(entry->class, depth+1))
612 return print_circular_bug_entry(entry, depth+1);
613 }
614 return 1;
615}
616
617static int very_verbose(struct lock_class *class)
618{
619#if VERY_VERBOSE
620 return class_filter(class);
621#endif
622 return 0;
623}
624#ifdef CONFIG_TRACE_IRQFLAGS
625
626/*
627 * Forwards and backwards subgraph searching, for the purposes of
628 * proving that two subgraphs can be connected by a new dependency
629 * without creating any illegal irq-safe -> irq-unsafe lock dependency.
630 */
631static enum lock_usage_bit find_usage_bit;
632static struct lock_class *forwards_match, *backwards_match;
633
634/*
635 * Find a node in the forwards-direction dependency sub-graph starting
636 * at <source> that matches <find_usage_bit>.
637 *
638 * Return 2 if such a node exists in the subgraph, and put that node
639 * into <forwards_match>.
640 *
641 * Return 1 otherwise and keep <forwards_match> unchanged.
642 * Return 0 on error.
643 */
644static noinline int
645find_usage_forwards(struct lock_class *source, unsigned int depth)
646{
647 struct lock_list *entry;
648 int ret;
649
650 if (depth > max_recursion_depth)
651 max_recursion_depth = depth;
652 if (depth >= 20)
653 return print_infinite_recursion_bug();
654
655 debug_atomic_inc(&nr_find_usage_forwards_checks);
656 if (source->usage_mask & (1 << find_usage_bit)) {
657 forwards_match = source;
658 return 2;
659 }
660
661 /*
662 * Check this lock's dependency list:
663 */
664 list_for_each_entry(entry, &source->locks_after, entry) {
665 debug_atomic_inc(&nr_find_usage_forwards_recursions);
666 ret = find_usage_forwards(entry->class, depth+1);
667 if (ret == 2 || ret == 0)
668 return ret;
669 }
670 return 1;
671}
672
673/*
674 * Find a node in the backwards-direction dependency sub-graph starting
675 * at <source> that matches <find_usage_bit>.
676 *
677 * Return 2 if such a node exists in the subgraph, and put that node
678 * into <backwards_match>.
679 *
680 * Return 1 otherwise and keep <backwards_match> unchanged.
681 * Return 0 on error.
682 */
683static noinline int
684find_usage_backwards(struct lock_class *source, unsigned int depth)
685{
686 struct lock_list *entry;
687 int ret;
688
689 if (depth > max_recursion_depth)
690 max_recursion_depth = depth;
691 if (depth >= 20)
692 return print_infinite_recursion_bug();
693
694 debug_atomic_inc(&nr_find_usage_backwards_checks);
695 if (source->usage_mask & (1 << find_usage_bit)) {
696 backwards_match = source;
697 return 2;
698 }
699
700 /*
701 * Check this lock's dependency list:
702 */
703 list_for_each_entry(entry, &source->locks_before, entry) {
704 debug_atomic_inc(&nr_find_usage_backwards_recursions);
705 ret = find_usage_backwards(entry->class, depth+1);
706 if (ret == 2 || ret == 0)
707 return ret;
708 }
709 return 1;
710}
711
712static int
713print_bad_irq_dependency(struct task_struct *curr,
714 struct held_lock *prev,
715 struct held_lock *next,
716 enum lock_usage_bit bit1,
717 enum lock_usage_bit bit2,
718 const char *irqclass)
719{
720 __raw_spin_unlock(&hash_lock);
721 debug_locks_off();
722 if (debug_locks_silent)
723 return 0;
724
725 printk("\n======================================================\n");
726 printk( "[ INFO: %s-safe -> %s-unsafe lock order detected ]\n",
727 irqclass, irqclass);
728 printk( "------------------------------------------------------\n");
729 printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
730 curr->comm, curr->pid,
731 curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT,
732 curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT,
733 curr->hardirqs_enabled,
734 curr->softirqs_enabled);
735 print_lock(next);
736
737 printk("\nand this task is already holding:\n");
738 print_lock(prev);
739 printk("which would create a new lock dependency:\n");
740 print_lock_name(prev->class);
741 printk(" ->");
742 print_lock_name(next->class);
743 printk("\n");
744
745 printk("\nbut this new dependency connects a %s-irq-safe lock:\n",
746 irqclass);
747 print_lock_name(backwards_match);
748 printk("\n... which became %s-irq-safe at:\n", irqclass);
749
750 print_stack_trace(backwards_match->usage_traces + bit1, 1);
751
752 printk("\nto a %s-irq-unsafe lock:\n", irqclass);
753 print_lock_name(forwards_match);
754 printk("\n... which became %s-irq-unsafe at:\n", irqclass);
755 printk("...");
756
757 print_stack_trace(forwards_match->usage_traces + bit2, 1);
758
759 printk("\nother info that might help us debug this:\n\n");
760 lockdep_print_held_locks(curr);
761
762 printk("\nthe %s-irq-safe lock's dependencies:\n", irqclass);
763 print_lock_dependencies(backwards_match, 0);
764
765 printk("\nthe %s-irq-unsafe lock's dependencies:\n", irqclass);
766 print_lock_dependencies(forwards_match, 0);
767
768 printk("\nstack backtrace:\n");
769 dump_stack();
770
771 return 0;
772}
773
774static int
775check_usage(struct task_struct *curr, struct held_lock *prev,
776 struct held_lock *next, enum lock_usage_bit bit_backwards,
777 enum lock_usage_bit bit_forwards, const char *irqclass)
778{
779 int ret;
780
781 find_usage_bit = bit_backwards;
782 /* fills in <backwards_match> */
783 ret = find_usage_backwards(prev->class, 0);
784 if (!ret || ret == 1)
785 return ret;
786
787 find_usage_bit = bit_forwards;
788 ret = find_usage_forwards(next->class, 0);
789 if (!ret || ret == 1)
790 return ret;
791 /* ret == 2 */
792 return print_bad_irq_dependency(curr, prev, next,
793 bit_backwards, bit_forwards, irqclass);
794}
795
796#endif
797
798static int
799print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
800 struct held_lock *next)
801{
802 debug_locks_off();
803 __raw_spin_unlock(&hash_lock);
804 if (debug_locks_silent)
805 return 0;
806
807 printk("\n=============================================\n");
808 printk( "[ INFO: possible recursive locking detected ]\n");
809 printk( "---------------------------------------------\n");
810 printk("%s/%d is trying to acquire lock:\n",
811 curr->comm, curr->pid);
812 print_lock(next);
813 printk("\nbut task is already holding lock:\n");
814 print_lock(prev);
815
816 printk("\nother info that might help us debug this:\n");
817 lockdep_print_held_locks(curr);
818
819 printk("\nstack backtrace:\n");
820 dump_stack();
821
822 return 0;
823}
824
825/*
826 * Check whether we are holding such a class already.
827 *
828 * (Note that this has to be done separately, because the graph cannot
829 * detect such classes of deadlocks.)
830 *
831 * Returns: 0 on deadlock detected, 1 on OK, 2 on recursive read
832 */
833static int
834check_deadlock(struct task_struct *curr, struct held_lock *next,
835 struct lockdep_map *next_instance, int read)
836{
837 struct held_lock *prev;
838 int i;
839
840 for (i = 0; i < curr->lockdep_depth; i++) {
841 prev = curr->held_locks + i;
842 if (prev->class != next->class)
843 continue;
844 /*
845 * Allow read-after-read recursion of the same
846 * lock class (i.e. read_lock(lock)+read_lock(lock)):
847 */
848 if ((read == 2) && prev->read)
849 return 2;
850 return print_deadlock_bug(curr, prev, next);
851 }
852 return 1;
853}
854
855/*
856 * There was a chain-cache miss, and we are about to add a new dependency
857 * to a previous lock. We recursively validate the following rules:
858 *
859 * - would the adding of the <prev> -> <next> dependency create a
860 * circular dependency in the graph? [== circular deadlock]
861 *
862 * - does the new prev->next dependency connect any hardirq-safe lock
863 * (in the full backwards-subgraph starting at <prev>) with any
864 * hardirq-unsafe lock (in the full forwards-subgraph starting at
865 * <next>)? [== illegal lock inversion with hardirq contexts]
866 *
867 * - does the new prev->next dependency connect any softirq-safe lock
868 * (in the full backwards-subgraph starting at <prev>) with any
869 * softirq-unsafe lock (in the full forwards-subgraph starting at
870 * <next>)? [== illegal lock inversion with softirq contexts]
871 *
872 * any of these scenarios could lead to a deadlock.
873 *
874 * Then if all the validations pass, we add the forwards and backwards
875 * dependency.
876 */
877static int
878check_prev_add(struct task_struct *curr, struct held_lock *prev,
879 struct held_lock *next)
880{
881 struct lock_list *entry;
882 int ret;
883
884 /*
885 * Prove that the new <prev> -> <next> dependency would not
886 * create a circular dependency in the graph. (We do this by
887 * forward-recursing into the graph starting at <next>, and
888 * checking whether we can reach <prev>.)
889 *
890 * We are using global variables to control the recursion, to
891 * keep the stackframe size of the recursive functions low:
892 */
893 check_source = next;
894 check_target = prev;
895 if (!(check_noncircular(next->class, 0)))
896 return print_circular_bug_tail();
897
898#ifdef CONFIG_TRACE_IRQFLAGS
899 /*
900 * Prove that the new dependency does not connect a hardirq-safe
901 * lock with a hardirq-unsafe lock - to achieve this we search
902 * the backwards-subgraph starting at <prev>, and the
903 * forwards-subgraph starting at <next>:
904 */
905 if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ,
906 LOCK_ENABLED_HARDIRQS, "hard"))
907 return 0;
908
909 /*
910 * Prove that the new dependency does not connect a hardirq-safe-read
911 * lock with a hardirq-unsafe lock - to achieve this we search
912 * the backwards-subgraph starting at <prev>, and the
913 * forwards-subgraph starting at <next>:
914 */
915 if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ_READ,
916 LOCK_ENABLED_HARDIRQS, "hard-read"))
917 return 0;
918
919 /*
920 * Prove that the new dependency does not connect a softirq-safe
921 * lock with a softirq-unsafe lock - to achieve this we search
922 * the backwards-subgraph starting at <prev>, and the
923 * forwards-subgraph starting at <next>:
924 */
925 if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ,
926 LOCK_ENABLED_SOFTIRQS, "soft"))
927 return 0;
928 /*
929 * Prove that the new dependency does not connect a softirq-safe-read
930 * lock with a softirq-unsafe lock - to achieve this we search
931 * the backwards-subgraph starting at <prev>, and the
932 * forwards-subgraph starting at <next>:
933 */
934 if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ_READ,
935 LOCK_ENABLED_SOFTIRQS, "soft"))
936 return 0;
937#endif
938 /*
939 * For recursive read-locks we do all the dependency checks,
940 * but we dont store read-triggered dependencies (only
941 * write-triggered dependencies). This ensures that only the
942 * write-side dependencies matter, and that if for example a
943 * write-lock never takes any other locks, then the reads are
944 * equivalent to a NOP.
945 */
946 if (next->read == 2 || prev->read == 2)
947 return 1;
948 /*
949 * Is the <prev> -> <next> dependency already present?
950 *
951 * (this may occur even though this is a new chain: consider
952 * e.g. the L1 -> L2 -> L3 -> L4 and the L5 -> L1 -> L2 -> L3
953 * chains - the second one will be new, but L1 already has
954 * L2 added to its dependency list, due to the first chain.)
955 */
956 list_for_each_entry(entry, &prev->class->locks_after, entry) {
957 if (entry->class == next->class)
958 return 2;
959 }
960
961 /*
962 * Ok, all validations passed, add the new lock
963 * to the previous lock's dependency list:
964 */
965 ret = add_lock_to_list(prev->class, next->class,
966 &prev->class->locks_after, next->acquire_ip);
967 if (!ret)
968 return 0;
969 /*
970 * Return value of 2 signals 'dependency already added',
971 * in that case we dont have to add the backlink either.
972 */
973 if (ret == 2)
974 return 2;
975 ret = add_lock_to_list(next->class, prev->class,
976 &next->class->locks_before, next->acquire_ip);
977
978 /*
979 * Debugging printouts:
980 */
981 if (verbose(prev->class) || verbose(next->class)) {
982 __raw_spin_unlock(&hash_lock);
983 printk("\n new dependency: ");
984 print_lock_name(prev->class);
985 printk(" => ");
986 print_lock_name(next->class);
987 printk("\n");
988 dump_stack();
989 __raw_spin_lock(&hash_lock);
990 }
991 return 1;
992}
993
994/*
995 * Add the dependency to all directly-previous locks that are 'relevant'.
996 * The ones that are relevant are (in increasing distance from curr):
997 * all consecutive trylock entries and the final non-trylock entry - or
998 * the end of this context's lock-chain - whichever comes first.
999 */
1000static int
1001check_prevs_add(struct task_struct *curr, struct held_lock *next)
1002{
1003 int depth = curr->lockdep_depth;
1004 struct held_lock *hlock;
1005
1006 /*
1007 * Debugging checks.
1008 *
1009 * Depth must not be zero for a non-head lock:
1010 */
1011 if (!depth)
1012 goto out_bug;
1013 /*
1014 * At least two relevant locks must exist for this
1015 * to be a head:
1016 */
1017 if (curr->held_locks[depth].irq_context !=
1018 curr->held_locks[depth-1].irq_context)
1019 goto out_bug;
1020
1021 for (;;) {
1022 hlock = curr->held_locks + depth-1;
1023 /*
1024 * Only non-recursive-read entries get new dependencies
1025 * added:
1026 */
1027 if (hlock->read != 2) {
1028 check_prev_add(curr, hlock, next);
1029 /*
1030 * Stop after the first non-trylock entry,
1031 * as non-trylock entries have added their
1032 * own direct dependencies already, so this
1033 * lock is connected to them indirectly:
1034 */
1035 if (!hlock->trylock)
1036 break;
1037 }
1038 depth--;
1039 /*
1040 * End of lock-stack?
1041 */
1042 if (!depth)
1043 break;
1044 /*
1045 * Stop the search if we cross into another context:
1046 */
1047 if (curr->held_locks[depth].irq_context !=
1048 curr->held_locks[depth-1].irq_context)
1049 break;
1050 }
1051 return 1;
1052out_bug:
1053 __raw_spin_unlock(&hash_lock);
1054 DEBUG_LOCKS_WARN_ON(1);
1055
1056 return 0;
1057}
1058
1059
1060/*
1061 * Is this the address of a static object:
1062 */
1063static int static_obj(void *obj)
1064{
1065 unsigned long start = (unsigned long) &_stext,
1066 end = (unsigned long) &_end,
1067 addr = (unsigned long) obj;
1068#ifdef CONFIG_SMP
1069 int i;
1070#endif
1071
1072 /*
1073 * static variable?
1074 */
1075 if ((addr >= start) && (addr < end))
1076 return 1;
1077
1078#ifdef CONFIG_SMP
1079 /*
1080 * percpu var?
1081 */
1082 for_each_possible_cpu(i) {
1083 start = (unsigned long) &__per_cpu_start + per_cpu_offset(i);
1084 end = (unsigned long) &__per_cpu_end + per_cpu_offset(i);
1085
1086 if ((addr >= start) && (addr < end))
1087 return 1;
1088 }
1089#endif
1090
1091 /*
1092 * module var?
1093 */
1094 return is_module_address(addr);
1095}
1096
1097/*
1098 * To make lock name printouts unique, we calculate a unique
1099 * class->name_version generation counter:
1100 */
1101static int count_matching_names(struct lock_class *new_class)
1102{
1103 struct lock_class *class;
1104 int count = 0;
1105
1106 if (!new_class->name)
1107 return 0;
1108
1109 list_for_each_entry(class, &all_lock_classes, lock_entry) {
1110 if (new_class->key - new_class->subclass == class->key)
1111 return class->name_version;
1112 if (class->name && !strcmp(class->name, new_class->name))
1113 count = max(count, class->name_version);
1114 }
1115
1116 return count + 1;
1117}
1118
1119extern void __error_too_big_MAX_LOCKDEP_SUBCLASSES(void);
1120
1121/*
1122 * Register a lock's class in the hash-table, if the class is not present
1123 * yet. Otherwise we look it up. We cache the result in the lock object
1124 * itself, so actual lookup of the hash should be once per lock object.
1125 */
1126static inline struct lock_class *
1127register_lock_class(struct lockdep_map *lock, unsigned int subclass)
1128{
1129 struct lockdep_subclass_key *key;
1130 struct list_head *hash_head;
1131 struct lock_class *class;
1132
1133#ifdef CONFIG_DEBUG_LOCKDEP
1134 /*
1135 * If the architecture calls into lockdep before initializing
1136 * the hashes then we'll warn about it later. (we cannot printk
1137 * right now)
1138 */
1139 if (unlikely(!lockdep_initialized)) {
1140 lockdep_init();
1141 lockdep_init_error = 1;
1142 }
1143#endif
1144
1145 /*
1146 * Static locks do not have their class-keys yet - for them the key
1147 * is the lock object itself:
1148 */
1149 if (unlikely(!lock->key))
1150 lock->key = (void *)lock;
1151
1152 /*
1153 * NOTE: the class-key must be unique. For dynamic locks, a static
1154 * lock_class_key variable is passed in through the mutex_init()
1155 * (or spin_lock_init()) call - which acts as the key. For static
1156 * locks we use the lock object itself as the key.
1157 */
1158 if (sizeof(struct lock_class_key) > sizeof(struct lock_class))
1159 __error_too_big_MAX_LOCKDEP_SUBCLASSES();
1160
1161 key = lock->key->subkeys + subclass;
1162
1163 hash_head = classhashentry(key);
1164
1165 /*
1166 * We can walk the hash lockfree, because the hash only
1167 * grows, and we are careful when adding entries to the end:
1168 */
1169 list_for_each_entry(class, hash_head, hash_entry)
1170 if (class->key == key)
1171 goto out_set;
1172
1173 /*
1174 * Debug-check: all keys must be persistent!
1175 */
1176 if (!static_obj(lock->key)) {
1177 debug_locks_off();
1178 printk("INFO: trying to register non-static key.\n");
1179 printk("the code is fine but needs lockdep annotation.\n");
1180 printk("turning off the locking correctness validator.\n");
1181 dump_stack();
1182
1183 return NULL;
1184 }
1185
1186 __raw_spin_lock(&hash_lock);
1187 /*
1188 * We have to do the hash-walk again, to avoid races
1189 * with another CPU:
1190 */
1191 list_for_each_entry(class, hash_head, hash_entry)
1192 if (class->key == key)
1193 goto out_unlock_set;
1194 /*
1195 * Allocate a new key from the static array, and add it to
1196 * the hash:
1197 */
1198 if (nr_lock_classes >= MAX_LOCKDEP_KEYS) {
1199 __raw_spin_unlock(&hash_lock);
1200 debug_locks_off();
1201 printk("BUG: MAX_LOCKDEP_KEYS too low!\n");
1202 printk("turning off the locking correctness validator.\n");
1203 return NULL;
1204 }
1205 class = lock_classes + nr_lock_classes++;
1206 debug_atomic_inc(&nr_unused_locks);
1207 class->key = key;
1208 class->name = lock->name;
1209 class->subclass = subclass;
1210 INIT_LIST_HEAD(&class->lock_entry);
1211 INIT_LIST_HEAD(&class->locks_before);
1212 INIT_LIST_HEAD(&class->locks_after);
1213 class->name_version = count_matching_names(class);
1214 /*
1215 * We use RCU's safe list-add method to make
1216 * parallel walking of the hash-list safe:
1217 */
1218 list_add_tail_rcu(&class->hash_entry, hash_head);
1219
1220 if (verbose(class)) {
1221 __raw_spin_unlock(&hash_lock);
1222 printk("\nnew class %p: %s", class->key, class->name);
1223 if (class->name_version > 1)
1224 printk("#%d", class->name_version);
1225 printk("\n");
1226 dump_stack();
1227 __raw_spin_lock(&hash_lock);
1228 }
1229out_unlock_set:
1230 __raw_spin_unlock(&hash_lock);
1231
1232out_set:
1233 lock->class[subclass] = class;
1234
1235 DEBUG_LOCKS_WARN_ON(class->subclass != subclass);
1236
1237 return class;
1238}
1239
1240/*
1241 * Look up a dependency chain. If the key is not present yet then
1242 * add it and return 0 - in this case the new dependency chain is
1243 * validated. If the key is already hashed, return 1.
1244 */
1245static inline int lookup_chain_cache(u64 chain_key)
1246{
1247 struct list_head *hash_head = chainhashentry(chain_key);
1248 struct lock_chain *chain;
1249
1250 DEBUG_LOCKS_WARN_ON(!irqs_disabled());
1251 /*
1252 * We can walk it lock-free, because entries only get added
1253 * to the hash:
1254 */
1255 list_for_each_entry(chain, hash_head, entry) {
1256 if (chain->chain_key == chain_key) {
1257cache_hit:
1258 debug_atomic_inc(&chain_lookup_hits);
1259 /*
1260 * In the debugging case, force redundant checking
1261 * by returning 1:
1262 */
1263#ifdef CONFIG_DEBUG_LOCKDEP
1264 __raw_spin_lock(&hash_lock);
1265 return 1;
1266#endif
1267 return 0;
1268 }
1269 }
1270 /*
1271 * Allocate a new chain entry from the static array, and add
1272 * it to the hash:
1273 */
1274 __raw_spin_lock(&hash_lock);
1275 /*
1276 * We have to walk the chain again locked - to avoid duplicates:
1277 */
1278 list_for_each_entry(chain, hash_head, entry) {
1279 if (chain->chain_key == chain_key) {
1280 __raw_spin_unlock(&hash_lock);
1281 goto cache_hit;
1282 }
1283 }
1284 if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) {
1285 __raw_spin_unlock(&hash_lock);
1286 debug_locks_off();
1287 printk("BUG: MAX_LOCKDEP_CHAINS too low!\n");
1288 printk("turning off the locking correctness validator.\n");
1289 return 0;
1290 }
1291 chain = lock_chains + nr_lock_chains++;
1292 chain->chain_key = chain_key;
1293 list_add_tail_rcu(&chain->entry, hash_head);
1294 debug_atomic_inc(&chain_lookup_misses);
1295#ifdef CONFIG_TRACE_IRQFLAGS
1296 if (current->hardirq_context)
1297 nr_hardirq_chains++;
1298 else {
1299 if (current->softirq_context)
1300 nr_softirq_chains++;
1301 else
1302 nr_process_chains++;
1303 }
1304#else
1305 nr_process_chains++;
1306#endif
1307
1308 return 1;
1309}
1310
1311/*
1312 * We are building curr_chain_key incrementally, so double-check
1313 * it from scratch, to make sure that it's done correctly:
1314 */
1315static void check_chain_key(struct task_struct *curr)
1316{
1317#ifdef CONFIG_DEBUG_LOCKDEP
1318 struct held_lock *hlock, *prev_hlock = NULL;
1319 unsigned int i, id;
1320 u64 chain_key = 0;
1321
1322 for (i = 0; i < curr->lockdep_depth; i++) {
1323 hlock = curr->held_locks + i;
1324 if (chain_key != hlock->prev_chain_key) {
1325 debug_locks_off();
1326 printk("hm#1, depth: %u [%u], %016Lx != %016Lx\n",
1327 curr->lockdep_depth, i,
1328 (unsigned long long)chain_key,
1329 (unsigned long long)hlock->prev_chain_key);
1330 WARN_ON(1);
1331 return;
1332 }
1333 id = hlock->class - lock_classes;
1334 DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS);
1335 if (prev_hlock && (prev_hlock->irq_context !=
1336 hlock->irq_context))
1337 chain_key = 0;
1338 chain_key = iterate_chain_key(chain_key, id);
1339 prev_hlock = hlock;
1340 }
1341 if (chain_key != curr->curr_chain_key) {
1342 debug_locks_off();
1343 printk("hm#2, depth: %u [%u], %016Lx != %016Lx\n",
1344 curr->lockdep_depth, i,
1345 (unsigned long long)chain_key,
1346 (unsigned long long)curr->curr_chain_key);
1347 WARN_ON(1);
1348 }
1349#endif
1350}
1351
1352#ifdef CONFIG_TRACE_IRQFLAGS
1353
1354/*
1355 * print irq inversion bug:
1356 */
1357static int
1358print_irq_inversion_bug(struct task_struct *curr, struct lock_class *other,
1359 struct held_lock *this, int forwards,
1360 const char *irqclass)
1361{
1362 __raw_spin_unlock(&hash_lock);
1363 debug_locks_off();
1364 if (debug_locks_silent)
1365 return 0;
1366
1367 printk("\n=========================================================\n");
1368 printk( "[ INFO: possible irq lock inversion dependency detected ]\n");
1369 printk( "---------------------------------------------------------\n");
1370 printk("%s/%d just changed the state of lock:\n",
1371 curr->comm, curr->pid);
1372 print_lock(this);
1373 if (forwards)
1374 printk("but this lock took another, %s-irq-unsafe lock in the past:\n", irqclass);
1375 else
1376 printk("but this lock was taken by another, %s-irq-safe lock in the past:\n", irqclass);
1377 print_lock_name(other);
1378 printk("\n\nand interrupts could create inverse lock ordering between them.\n\n");
1379
1380 printk("\nother info that might help us debug this:\n");
1381 lockdep_print_held_locks(curr);
1382
1383 printk("\nthe first lock's dependencies:\n");
1384 print_lock_dependencies(this->class, 0);
1385
1386 printk("\nthe second lock's dependencies:\n");
1387 print_lock_dependencies(other, 0);
1388
1389 printk("\nstack backtrace:\n");
1390 dump_stack();
1391
1392 return 0;
1393}
1394
1395/*
1396 * Prove that in the forwards-direction subgraph starting at <this>
1397 * there is no lock matching <mask>:
1398 */
1399static int
1400check_usage_forwards(struct task_struct *curr, struct held_lock *this,
1401 enum lock_usage_bit bit, const char *irqclass)
1402{
1403 int ret;
1404
1405 find_usage_bit = bit;
1406 /* fills in <forwards_match> */
1407 ret = find_usage_forwards(this->class, 0);
1408 if (!ret || ret == 1)
1409 return ret;
1410
1411 return print_irq_inversion_bug(curr, forwards_match, this, 1, irqclass);
1412}
1413
1414/*
1415 * Prove that in the backwards-direction subgraph starting at <this>
1416 * there is no lock matching <mask>:
1417 */
1418static int
1419check_usage_backwards(struct task_struct *curr, struct held_lock *this,
1420 enum lock_usage_bit bit, const char *irqclass)
1421{
1422 int ret;
1423
1424 find_usage_bit = bit;
1425 /* fills in <backwards_match> */
1426 ret = find_usage_backwards(this->class, 0);
1427 if (!ret || ret == 1)
1428 return ret;
1429
1430 return print_irq_inversion_bug(curr, backwards_match, this, 0, irqclass);
1431}
1432
1433static inline void print_irqtrace_events(struct task_struct *curr)
1434{
1435 printk("irq event stamp: %u\n", curr->irq_events);
1436 printk("hardirqs last enabled at (%u): ", curr->hardirq_enable_event);
1437 print_ip_sym(curr->hardirq_enable_ip);
1438 printk("hardirqs last disabled at (%u): ", curr->hardirq_disable_event);
1439 print_ip_sym(curr->hardirq_disable_ip);
1440 printk("softirqs last enabled at (%u): ", curr->softirq_enable_event);
1441 print_ip_sym(curr->softirq_enable_ip);
1442 printk("softirqs last disabled at (%u): ", curr->softirq_disable_event);
1443 print_ip_sym(curr->softirq_disable_ip);
1444}
1445
1446#else
1447static inline void print_irqtrace_events(struct task_struct *curr)
1448{
1449}
1450#endif
1451
1452static int
1453print_usage_bug(struct task_struct *curr, struct held_lock *this,
1454 enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit)
1455{
1456 __raw_spin_unlock(&hash_lock);
1457 debug_locks_off();
1458 if (debug_locks_silent)
1459 return 0;
1460
1461 printk("\n=================================\n");
1462 printk( "[ INFO: inconsistent lock state ]\n");
1463 printk( "---------------------------------\n");
1464
1465 printk("inconsistent {%s} -> {%s} usage.\n",
1466 usage_str[prev_bit], usage_str[new_bit]);
1467
1468 printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
1469 curr->comm, curr->pid,
1470 trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
1471 trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT,
1472 trace_hardirqs_enabled(curr),
1473 trace_softirqs_enabled(curr));
1474 print_lock(this);
1475
1476 printk("{%s} state was registered at:\n", usage_str[prev_bit]);
1477 print_stack_trace(this->class->usage_traces + prev_bit, 1);
1478
1479 print_irqtrace_events(curr);
1480 printk("\nother info that might help us debug this:\n");
1481 lockdep_print_held_locks(curr);
1482
1483 printk("\nstack backtrace:\n");
1484 dump_stack();
1485
1486 return 0;
1487}
1488
1489/*
1490 * Print out an error if an invalid bit is set:
1491 */
1492static inline int
1493valid_state(struct task_struct *curr, struct held_lock *this,
1494 enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit)
1495{
1496 if (unlikely(this->class->usage_mask & (1 << bad_bit)))
1497 return print_usage_bug(curr, this, bad_bit, new_bit);
1498 return 1;
1499}
1500
1501#define STRICT_READ_CHECKS 1
1502
1503/*
1504 * Mark a lock with a usage bit, and validate the state transition:
1505 */
1506static int mark_lock(struct task_struct *curr, struct held_lock *this,
1507 enum lock_usage_bit new_bit, unsigned long ip)
1508{
1509 unsigned int new_mask = 1 << new_bit, ret = 1;
1510
1511 /*
1512 * If already set then do not dirty the cacheline,
1513 * nor do any checks:
1514 */
1515 if (likely(this->class->usage_mask & new_mask))
1516 return 1;
1517
1518 __raw_spin_lock(&hash_lock);
1519 /*
1520 * Make sure we didnt race:
1521 */
1522 if (unlikely(this->class->usage_mask & new_mask)) {
1523 __raw_spin_unlock(&hash_lock);
1524 return 1;
1525 }
1526
1527 this->class->usage_mask |= new_mask;
1528
1529#ifdef CONFIG_TRACE_IRQFLAGS
1530 if (new_bit == LOCK_ENABLED_HARDIRQS ||
1531 new_bit == LOCK_ENABLED_HARDIRQS_READ)
1532 ip = curr->hardirq_enable_ip;
1533 else if (new_bit == LOCK_ENABLED_SOFTIRQS ||
1534 new_bit == LOCK_ENABLED_SOFTIRQS_READ)
1535 ip = curr->softirq_enable_ip;
1536#endif
1537 if (!save_trace(this->class->usage_traces + new_bit))
1538 return 0;
1539
1540 switch (new_bit) {
1541#ifdef CONFIG_TRACE_IRQFLAGS
1542 case LOCK_USED_IN_HARDIRQ:
1543 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS))
1544 return 0;
1545 if (!valid_state(curr, this, new_bit,
1546 LOCK_ENABLED_HARDIRQS_READ))
1547 return 0;
1548 /*
1549 * just marked it hardirq-safe, check that this lock
1550 * took no hardirq-unsafe lock in the past:
1551 */
1552 if (!check_usage_forwards(curr, this,
1553 LOCK_ENABLED_HARDIRQS, "hard"))
1554 return 0;
1555#if STRICT_READ_CHECKS
1556 /*
1557 * just marked it hardirq-safe, check that this lock
1558 * took no hardirq-unsafe-read lock in the past:
1559 */
1560 if (!check_usage_forwards(curr, this,
1561 LOCK_ENABLED_HARDIRQS_READ, "hard-read"))
1562 return 0;
1563#endif
1564 if (hardirq_verbose(this->class))
1565 ret = 2;
1566 break;
1567 case LOCK_USED_IN_SOFTIRQ:
1568 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS))
1569 return 0;
1570 if (!valid_state(curr, this, new_bit,
1571 LOCK_ENABLED_SOFTIRQS_READ))
1572 return 0;
1573 /*
1574 * just marked it softirq-safe, check that this lock
1575 * took no softirq-unsafe lock in the past:
1576 */
1577 if (!check_usage_forwards(curr, this,
1578 LOCK_ENABLED_SOFTIRQS, "soft"))
1579 return 0;
1580#if STRICT_READ_CHECKS
1581 /*
1582 * just marked it softirq-safe, check that this lock
1583 * took no softirq-unsafe-read lock in the past:
1584 */
1585 if (!check_usage_forwards(curr, this,
1586 LOCK_ENABLED_SOFTIRQS_READ, "soft-read"))
1587 return 0;
1588#endif
1589 if (softirq_verbose(this->class))
1590 ret = 2;
1591 break;
1592 case LOCK_USED_IN_HARDIRQ_READ:
1593 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS))
1594 return 0;
1595 /*
1596 * just marked it hardirq-read-safe, check that this lock
1597 * took no hardirq-unsafe lock in the past:
1598 */
1599 if (!check_usage_forwards(curr, this,
1600 LOCK_ENABLED_HARDIRQS, "hard"))
1601 return 0;
1602 if (hardirq_verbose(this->class))
1603 ret = 2;
1604 break;
1605 case LOCK_USED_IN_SOFTIRQ_READ:
1606 if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS))
1607 return 0;
1608 /*
1609 * just marked it softirq-read-safe, check that this lock
1610 * took no softirq-unsafe lock in the past:
1611 */
1612 if (!check_usage_forwards(curr, this,
1613 LOCK_ENABLED_SOFTIRQS, "soft"))
1614 return 0;
1615 if (softirq_verbose(this->class))
1616 ret = 2;
1617 break;
1618 case LOCK_ENABLED_HARDIRQS:
1619 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ))
1620 return 0;
1621 if (!valid_state(curr, this, new_bit,
1622 LOCK_USED_IN_HARDIRQ_READ))
1623 return 0;
1624 /*
1625 * just marked it hardirq-unsafe, check that no hardirq-safe
1626 * lock in the system ever took it in the past:
1627 */
1628 if (!check_usage_backwards(curr, this,
1629 LOCK_USED_IN_HARDIRQ, "hard"))
1630 return 0;
1631#if STRICT_READ_CHECKS
1632 /*
1633 * just marked it hardirq-unsafe, check that no
1634 * hardirq-safe-read lock in the system ever took
1635 * it in the past:
1636 */
1637 if (!check_usage_backwards(curr, this,
1638 LOCK_USED_IN_HARDIRQ_READ, "hard-read"))
1639 return 0;
1640#endif
1641 if (hardirq_verbose(this->class))
1642 ret = 2;
1643 break;
1644 case LOCK_ENABLED_SOFTIRQS:
1645 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ))
1646 return 0;
1647 if (!valid_state(curr, this, new_bit,
1648 LOCK_USED_IN_SOFTIRQ_READ))
1649 return 0;
1650 /*
1651 * just marked it softirq-unsafe, check that no softirq-safe
1652 * lock in the system ever took it in the past:
1653 */
1654 if (!check_usage_backwards(curr, this,
1655 LOCK_USED_IN_SOFTIRQ, "soft"))
1656 return 0;
1657#if STRICT_READ_CHECKS
1658 /*
1659 * just marked it softirq-unsafe, check that no
1660 * softirq-safe-read lock in the system ever took
1661 * it in the past:
1662 */
1663 if (!check_usage_backwards(curr, this,
1664 LOCK_USED_IN_SOFTIRQ_READ, "soft-read"))
1665 return 0;
1666#endif
1667 if (softirq_verbose(this->class))
1668 ret = 2;
1669 break;
1670 case LOCK_ENABLED_HARDIRQS_READ:
1671 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ))
1672 return 0;
1673#if STRICT_READ_CHECKS
1674 /*
1675 * just marked it hardirq-read-unsafe, check that no
1676 * hardirq-safe lock in the system ever took it in the past:
1677 */
1678 if (!check_usage_backwards(curr, this,
1679 LOCK_USED_IN_HARDIRQ, "hard"))
1680 return 0;
1681#endif
1682 if (hardirq_verbose(this->class))
1683 ret = 2;
1684 break;
1685 case LOCK_ENABLED_SOFTIRQS_READ:
1686 if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ))
1687 return 0;
1688#if STRICT_READ_CHECKS
1689 /*
1690 * just marked it softirq-read-unsafe, check that no
1691 * softirq-safe lock in the system ever took it in the past:
1692 */
1693 if (!check_usage_backwards(curr, this,
1694 LOCK_USED_IN_SOFTIRQ, "soft"))
1695 return 0;
1696#endif
1697 if (softirq_verbose(this->class))
1698 ret = 2;
1699 break;
1700#endif
1701 case LOCK_USED:
1702 /*
1703 * Add it to the global list of classes:
1704 */
1705 list_add_tail_rcu(&this->class->lock_entry, &all_lock_classes);
1706 debug_atomic_dec(&nr_unused_locks);
1707 break;
1708 default:
1709 debug_locks_off();
1710 WARN_ON(1);
1711 return 0;
1712 }
1713
1714 __raw_spin_unlock(&hash_lock);
1715
1716 /*
1717 * We must printk outside of the hash_lock:
1718 */
1719 if (ret == 2) {
1720 printk("\nmarked lock as {%s}:\n", usage_str[new_bit]);
1721 print_lock(this);
1722 print_irqtrace_events(curr);
1723 dump_stack();
1724 }
1725
1726 return ret;
1727}
1728
1729#ifdef CONFIG_TRACE_IRQFLAGS
1730/*
1731 * Mark all held locks with a usage bit:
1732 */
1733static int
1734mark_held_locks(struct task_struct *curr, int hardirq, unsigned long ip)
1735{
1736 enum lock_usage_bit usage_bit;
1737 struct held_lock *hlock;
1738 int i;
1739
1740 for (i = 0; i < curr->lockdep_depth; i++) {
1741 hlock = curr->held_locks + i;
1742
1743 if (hardirq) {
1744 if (hlock->read)
1745 usage_bit = LOCK_ENABLED_HARDIRQS_READ;
1746 else
1747 usage_bit = LOCK_ENABLED_HARDIRQS;
1748 } else {
1749 if (hlock->read)
1750 usage_bit = LOCK_ENABLED_SOFTIRQS_READ;
1751 else
1752 usage_bit = LOCK_ENABLED_SOFTIRQS;
1753 }
1754 if (!mark_lock(curr, hlock, usage_bit, ip))
1755 return 0;
1756 }
1757
1758 return 1;
1759}
1760
1761/*
1762 * Debugging helper: via this flag we know that we are in
1763 * 'early bootup code', and will warn about any invalid irqs-on event:
1764 */
1765static int early_boot_irqs_enabled;
1766
1767void early_boot_irqs_off(void)
1768{
1769 early_boot_irqs_enabled = 0;
1770}
1771
1772void early_boot_irqs_on(void)
1773{
1774 early_boot_irqs_enabled = 1;
1775}
1776
1777/*
1778 * Hardirqs will be enabled:
1779 */
1780void trace_hardirqs_on(void)
1781{
1782 struct task_struct *curr = current;
1783 unsigned long ip;
1784
1785 if (unlikely(!debug_locks || current->lockdep_recursion))
1786 return;
1787
1788 if (DEBUG_LOCKS_WARN_ON(unlikely(!early_boot_irqs_enabled)))
1789 return;
1790
1791 if (unlikely(curr->hardirqs_enabled)) {
1792 debug_atomic_inc(&redundant_hardirqs_on);
1793 return;
1794 }
1795 /* we'll do an OFF -> ON transition: */
1796 curr->hardirqs_enabled = 1;
1797 ip = (unsigned long) __builtin_return_address(0);
1798
1799 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
1800 return;
1801 if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
1802 return;
1803 /*
1804 * We are going to turn hardirqs on, so set the
1805 * usage bit for all held locks:
1806 */
1807 if (!mark_held_locks(curr, 1, ip))
1808 return;
1809 /*
1810 * If we have softirqs enabled, then set the usage
1811 * bit for all held locks. (disabled hardirqs prevented
1812 * this bit from being set before)
1813 */
1814 if (curr->softirqs_enabled)
1815 if (!mark_held_locks(curr, 0, ip))
1816 return;
1817
1818 curr->hardirq_enable_ip = ip;
1819 curr->hardirq_enable_event = ++curr->irq_events;
1820 debug_atomic_inc(&hardirqs_on_events);
1821}
1822
1823EXPORT_SYMBOL(trace_hardirqs_on);
1824
1825/*
1826 * Hardirqs were disabled:
1827 */
1828void trace_hardirqs_off(void)
1829{
1830 struct task_struct *curr = current;
1831
1832 if (unlikely(!debug_locks || current->lockdep_recursion))
1833 return;
1834
1835 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
1836 return;
1837
1838 if (curr->hardirqs_enabled) {
1839 /*
1840 * We have done an ON -> OFF transition:
1841 */
1842 curr->hardirqs_enabled = 0;
1843 curr->hardirq_disable_ip = _RET_IP_;
1844 curr->hardirq_disable_event = ++curr->irq_events;
1845 debug_atomic_inc(&hardirqs_off_events);
1846 } else
1847 debug_atomic_inc(&redundant_hardirqs_off);
1848}
1849
1850EXPORT_SYMBOL(trace_hardirqs_off);
1851
1852/*
1853 * Softirqs will be enabled:
1854 */
1855void trace_softirqs_on(unsigned long ip)
1856{
1857 struct task_struct *curr = current;
1858
1859 if (unlikely(!debug_locks))
1860 return;
1861
1862 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
1863 return;
1864
1865 if (curr->softirqs_enabled) {
1866 debug_atomic_inc(&redundant_softirqs_on);
1867 return;
1868 }
1869
1870 /*
1871 * We'll do an OFF -> ON transition:
1872 */
1873 curr->softirqs_enabled = 1;
1874 curr->softirq_enable_ip = ip;
1875 curr->softirq_enable_event = ++curr->irq_events;
1876 debug_atomic_inc(&softirqs_on_events);
1877 /*
1878 * We are going to turn softirqs on, so set the
1879 * usage bit for all held locks, if hardirqs are
1880 * enabled too:
1881 */
1882 if (curr->hardirqs_enabled)
1883 mark_held_locks(curr, 0, ip);
1884}
1885
1886/*
1887 * Softirqs were disabled:
1888 */
1889void trace_softirqs_off(unsigned long ip)
1890{
1891 struct task_struct *curr = current;
1892
1893 if (unlikely(!debug_locks))
1894 return;
1895
1896 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
1897 return;
1898
1899 if (curr->softirqs_enabled) {
1900 /*
1901 * We have done an ON -> OFF transition:
1902 */
1903 curr->softirqs_enabled = 0;
1904 curr->softirq_disable_ip = ip;
1905 curr->softirq_disable_event = ++curr->irq_events;
1906 debug_atomic_inc(&softirqs_off_events);
1907 DEBUG_LOCKS_WARN_ON(!softirq_count());
1908 } else
1909 debug_atomic_inc(&redundant_softirqs_off);
1910}
1911
1912#endif
1913
1914/*
1915 * Initialize a lock instance's lock-class mapping info:
1916 */
1917void lockdep_init_map(struct lockdep_map *lock, const char *name,
1918 struct lock_class_key *key)
1919{
1920 if (unlikely(!debug_locks))
1921 return;
1922
1923 if (DEBUG_LOCKS_WARN_ON(!key))
1924 return;
1925 if (DEBUG_LOCKS_WARN_ON(!name))
1926 return;
1927 /*
1928 * Sanity check, the lock-class key must be persistent:
1929 */
1930 if (!static_obj(key)) {
1931 printk("BUG: key %p not in .data!\n", key);
1932 DEBUG_LOCKS_WARN_ON(1);
1933 return;
1934 }
1935 lock->name = name;
1936 lock->key = key;
1937 memset(lock->class, 0, sizeof(lock->class[0])*MAX_LOCKDEP_SUBCLASSES);
1938}
1939
1940EXPORT_SYMBOL_GPL(lockdep_init_map);
1941
1942/*
1943 * This gets called for every mutex_lock*()/spin_lock*() operation.
1944 * We maintain the dependency maps and validate the locking attempt:
1945 */
1946static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
1947 int trylock, int read, int check, int hardirqs_off,
1948 unsigned long ip)
1949{
1950 struct task_struct *curr = current;
1951 struct held_lock *hlock;
1952 struct lock_class *class;
1953 unsigned int depth, id;
1954 int chain_head = 0;
1955 u64 chain_key;
1956
1957 if (unlikely(!debug_locks))
1958 return 0;
1959
1960 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
1961 return 0;
1962
1963 if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) {
1964 debug_locks_off();
1965 printk("BUG: MAX_LOCKDEP_SUBCLASSES too low!\n");
1966 printk("turning off the locking correctness validator.\n");
1967 return 0;
1968 }
1969
1970 class = lock->class[subclass];
1971 /* not cached yet? */
1972 if (unlikely(!class)) {
1973 class = register_lock_class(lock, subclass);
1974 if (!class)
1975 return 0;
1976 }
1977 debug_atomic_inc((atomic_t *)&class->ops);
1978 if (very_verbose(class)) {
1979 printk("\nacquire class [%p] %s", class->key, class->name);
1980 if (class->name_version > 1)
1981 printk("#%d", class->name_version);
1982 printk("\n");
1983 dump_stack();
1984 }
1985
1986 /*
1987 * Add the lock to the list of currently held locks.
1988 * (we dont increase the depth just yet, up until the
1989 * dependency checks are done)
1990 */
1991 depth = curr->lockdep_depth;
1992 if (DEBUG_LOCKS_WARN_ON(depth >= MAX_LOCK_DEPTH))
1993 return 0;
1994
1995 hlock = curr->held_locks + depth;
1996
1997 hlock->class = class;
1998 hlock->acquire_ip = ip;
1999 hlock->instance = lock;
2000 hlock->trylock = trylock;
2001 hlock->read = read;
2002 hlock->check = check;
2003 hlock->hardirqs_off = hardirqs_off;
2004
2005 if (check != 2)
2006 goto out_calc_hash;
2007#ifdef CONFIG_TRACE_IRQFLAGS
2008 /*
2009 * If non-trylock use in a hardirq or softirq context, then
2010 * mark the lock as used in these contexts:
2011 */
2012 if (!trylock) {
2013 if (read) {
2014 if (curr->hardirq_context)
2015 if (!mark_lock(curr, hlock,
2016 LOCK_USED_IN_HARDIRQ_READ, ip))
2017 return 0;
2018 if (curr->softirq_context)
2019 if (!mark_lock(curr, hlock,
2020 LOCK_USED_IN_SOFTIRQ_READ, ip))
2021 return 0;
2022 } else {
2023 if (curr->hardirq_context)
2024 if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ, ip))
2025 return 0;
2026 if (curr->softirq_context)
2027 if (!mark_lock(curr, hlock, LOCK_USED_IN_SOFTIRQ, ip))
2028 return 0;
2029 }
2030 }
2031 if (!hardirqs_off) {
2032 if (read) {
2033 if (!mark_lock(curr, hlock,
2034 LOCK_ENABLED_HARDIRQS_READ, ip))
2035 return 0;
2036 if (curr->softirqs_enabled)
2037 if (!mark_lock(curr, hlock,
2038 LOCK_ENABLED_SOFTIRQS_READ, ip))
2039 return 0;
2040 } else {
2041 if (!mark_lock(curr, hlock,
2042 LOCK_ENABLED_HARDIRQS, ip))
2043 return 0;
2044 if (curr->softirqs_enabled)
2045 if (!mark_lock(curr, hlock,
2046 LOCK_ENABLED_SOFTIRQS, ip))
2047 return 0;
2048 }
2049 }
2050#endif
2051 /* mark it as used: */
2052 if (!mark_lock(curr, hlock, LOCK_USED, ip))
2053 return 0;
2054out_calc_hash:
2055 /*
2056 * Calculate the chain hash: it's the combined has of all the
2057 * lock keys along the dependency chain. We save the hash value
2058 * at every step so that we can get the current hash easily
2059 * after unlock. The chain hash is then used to cache dependency
2060 * results.
2061 *
2062 * The 'key ID' is what is the most compact key value to drive
2063 * the hash, not class->key.
2064 */
2065 id = class - lock_classes;
2066 if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
2067 return 0;
2068
2069 chain_key = curr->curr_chain_key;
2070 if (!depth) {
2071 if (DEBUG_LOCKS_WARN_ON(chain_key != 0))
2072 return 0;
2073 chain_head = 1;
2074 }
2075
2076 hlock->prev_chain_key = chain_key;
2077
2078#ifdef CONFIG_TRACE_IRQFLAGS
2079 /*
2080 * Keep track of points where we cross into an interrupt context:
2081 */
2082 hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) +
2083 curr->softirq_context;
2084 if (depth) {
2085 struct held_lock *prev_hlock;
2086
2087 prev_hlock = curr->held_locks + depth-1;
2088 /*
2089 * If we cross into another context, reset the
2090 * hash key (this also prevents the checking and the
2091 * adding of the dependency to 'prev'):
2092 */
2093 if (prev_hlock->irq_context != hlock->irq_context) {
2094 chain_key = 0;
2095 chain_head = 1;
2096 }
2097 }
2098#endif
2099 chain_key = iterate_chain_key(chain_key, id);
2100 curr->curr_chain_key = chain_key;
2101
2102 /*
2103 * Trylock needs to maintain the stack of held locks, but it
2104 * does not add new dependencies, because trylock can be done
2105 * in any order.
2106 *
2107 * We look up the chain_key and do the O(N^2) check and update of
2108 * the dependencies only if this is a new dependency chain.
2109 * (If lookup_chain_cache() returns with 1 it acquires
2110 * hash_lock for us)
2111 */
2112 if (!trylock && (check == 2) && lookup_chain_cache(chain_key)) {
2113 /*
2114 * Check whether last held lock:
2115 *
2116 * - is irq-safe, if this lock is irq-unsafe
2117 * - is softirq-safe, if this lock is hardirq-unsafe
2118 *
2119 * And check whether the new lock's dependency graph
2120 * could lead back to the previous lock.
2121 *
2122 * any of these scenarios could lead to a deadlock. If
2123 * All validations
2124 */
2125 int ret = check_deadlock(curr, hlock, lock, read);
2126
2127 if (!ret)
2128 return 0;
2129 /*
2130 * Mark recursive read, as we jump over it when
2131 * building dependencies (just like we jump over
2132 * trylock entries):
2133 */
2134 if (ret == 2)
2135 hlock->read = 2;
2136 /*
2137 * Add dependency only if this lock is not the head
2138 * of the chain, and if it's not a secondary read-lock:
2139 */
2140 if (!chain_head && ret != 2)
2141 if (!check_prevs_add(curr, hlock))
2142 return 0;
2143 __raw_spin_unlock(&hash_lock);
2144 }
2145 curr->lockdep_depth++;
2146 check_chain_key(curr);
2147 if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) {
2148 debug_locks_off();
2149 printk("BUG: MAX_LOCK_DEPTH too low!\n");
2150 printk("turning off the locking correctness validator.\n");
2151 return 0;
2152 }
2153 if (unlikely(curr->lockdep_depth > max_lockdep_depth))
2154 max_lockdep_depth = curr->lockdep_depth;
2155
2156 return 1;
2157}
2158
2159static int
2160print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
2161 unsigned long ip)
2162{
2163 if (!debug_locks_off())
2164 return 0;
2165 if (debug_locks_silent)
2166 return 0;
2167
2168 printk("\n=====================================\n");
2169 printk( "[ BUG: bad unlock balance detected! ]\n");
2170 printk( "-------------------------------------\n");
2171 printk("%s/%d is trying to release lock (",
2172 curr->comm, curr->pid);
2173 print_lockdep_cache(lock);
2174 printk(") at:\n");
2175 print_ip_sym(ip);
2176 printk("but there are no more locks to release!\n");
2177 printk("\nother info that might help us debug this:\n");
2178 lockdep_print_held_locks(curr);
2179
2180 printk("\nstack backtrace:\n");
2181 dump_stack();
2182
2183 return 0;
2184}
2185
2186/*
2187 * Common debugging checks for both nested and non-nested unlock:
2188 */
2189static int check_unlock(struct task_struct *curr, struct lockdep_map *lock,
2190 unsigned long ip)
2191{
2192 if (unlikely(!debug_locks))
2193 return 0;
2194 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
2195 return 0;
2196
2197 if (curr->lockdep_depth <= 0)
2198 return print_unlock_inbalance_bug(curr, lock, ip);
2199
2200 return 1;
2201}
2202
2203/*
2204 * Remove the lock to the list of currently held locks in a
2205 * potentially non-nested (out of order) manner. This is a
2206 * relatively rare operation, as all the unlock APIs default
2207 * to nested mode (which uses lock_release()):
2208 */
2209static int
2210lock_release_non_nested(struct task_struct *curr,
2211 struct lockdep_map *lock, unsigned long ip)
2212{
2213 struct held_lock *hlock, *prev_hlock;
2214 unsigned int depth;
2215 int i;
2216
2217 /*
2218 * Check whether the lock exists in the current stack
2219 * of held locks:
2220 */
2221 depth = curr->lockdep_depth;
2222 if (DEBUG_LOCKS_WARN_ON(!depth))
2223 return 0;
2224
2225 prev_hlock = NULL;
2226 for (i = depth-1; i >= 0; i--) {
2227 hlock = curr->held_locks + i;
2228 /*
2229 * We must not cross into another context:
2230 */
2231 if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
2232 break;
2233 if (hlock->instance == lock)
2234 goto found_it;
2235 prev_hlock = hlock;
2236 }
2237 return print_unlock_inbalance_bug(curr, lock, ip);
2238
2239found_it:
2240 /*
2241 * We have the right lock to unlock, 'hlock' points to it.
2242 * Now we remove it from the stack, and add back the other
2243 * entries (if any), recalculating the hash along the way:
2244 */
2245 curr->lockdep_depth = i;
2246 curr->curr_chain_key = hlock->prev_chain_key;
2247
2248 for (i++; i < depth; i++) {
2249 hlock = curr->held_locks + i;
2250 if (!__lock_acquire(hlock->instance,
2251 hlock->class->subclass, hlock->trylock,
2252 hlock->read, hlock->check, hlock->hardirqs_off,
2253 hlock->acquire_ip))
2254 return 0;
2255 }
2256
2257 if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - 1))
2258 return 0;
2259 return 1;
2260}
2261
2262/*
2263 * Remove the lock to the list of currently held locks - this gets
2264 * called on mutex_unlock()/spin_unlock*() (or on a failed
2265 * mutex_lock_interruptible()). This is done for unlocks that nest
2266 * perfectly. (i.e. the current top of the lock-stack is unlocked)
2267 */
2268static int lock_release_nested(struct task_struct *curr,
2269 struct lockdep_map *lock, unsigned long ip)
2270{
2271 struct held_lock *hlock;
2272 unsigned int depth;
2273
2274 /*
2275 * Pop off the top of the lock stack:
2276 */
2277 depth = curr->lockdep_depth - 1;
2278 hlock = curr->held_locks + depth;
2279
2280 /*
2281 * Is the unlock non-nested:
2282 */
2283 if (hlock->instance != lock)
2284 return lock_release_non_nested(curr, lock, ip);
2285 curr->lockdep_depth--;
2286
2287 if (DEBUG_LOCKS_WARN_ON(!depth && (hlock->prev_chain_key != 0)))
2288 return 0;
2289
2290 curr->curr_chain_key = hlock->prev_chain_key;
2291
2292#ifdef CONFIG_DEBUG_LOCKDEP
2293 hlock->prev_chain_key = 0;
2294 hlock->class = NULL;
2295 hlock->acquire_ip = 0;
2296 hlock->irq_context = 0;
2297#endif
2298 return 1;
2299}
2300
2301/*
2302 * Remove the lock to the list of currently held locks - this gets
2303 * called on mutex_unlock()/spin_unlock*() (or on a failed
2304 * mutex_lock_interruptible()). This is done for unlocks that nest
2305 * perfectly. (i.e. the current top of the lock-stack is unlocked)
2306 */
2307static void
2308__lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
2309{
2310 struct task_struct *curr = current;
2311
2312 if (!check_unlock(curr, lock, ip))
2313 return;
2314
2315 if (nested) {
2316 if (!lock_release_nested(curr, lock, ip))
2317 return;
2318 } else {
2319 if (!lock_release_non_nested(curr, lock, ip))
2320 return;
2321 }
2322
2323 check_chain_key(curr);
2324}
2325
2326/*
2327 * Check whether we follow the irq-flags state precisely:
2328 */
2329static void check_flags(unsigned long flags)
2330{
2331#if defined(CONFIG_DEBUG_LOCKDEP) && defined(CONFIG_TRACE_IRQFLAGS)
2332 if (!debug_locks)
2333 return;
2334
2335 if (irqs_disabled_flags(flags))
2336 DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled);
2337 else
2338 DEBUG_LOCKS_WARN_ON(!current->hardirqs_enabled);
2339
2340 /*
2341 * We dont accurately track softirq state in e.g.
2342 * hardirq contexts (such as on 4KSTACKS), so only
2343 * check if not in hardirq contexts:
2344 */
2345 if (!hardirq_count()) {
2346 if (softirq_count())
2347 DEBUG_LOCKS_WARN_ON(current->softirqs_enabled);
2348 else
2349 DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
2350 }
2351
2352 if (!debug_locks)
2353 print_irqtrace_events(current);
2354#endif
2355}
2356
2357/*
2358 * We are not always called with irqs disabled - do that here,
2359 * and also avoid lockdep recursion:
2360 */
2361void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2362 int trylock, int read, int check, unsigned long ip)
2363{
2364 unsigned long flags;
2365
2366 if (unlikely(current->lockdep_recursion))
2367 return;
2368
2369 raw_local_irq_save(flags);
2370 check_flags(flags);
2371
2372 current->lockdep_recursion = 1;
2373 __lock_acquire(lock, subclass, trylock, read, check,
2374 irqs_disabled_flags(flags), ip);
2375 current->lockdep_recursion = 0;
2376 raw_local_irq_restore(flags);
2377}
2378
2379EXPORT_SYMBOL_GPL(lock_acquire);
2380
2381void lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
2382{
2383 unsigned long flags;
2384
2385 if (unlikely(current->lockdep_recursion))
2386 return;
2387
2388 raw_local_irq_save(flags);
2389 check_flags(flags);
2390 current->lockdep_recursion = 1;
2391 __lock_release(lock, nested, ip);
2392 current->lockdep_recursion = 0;
2393 raw_local_irq_restore(flags);
2394}
2395
2396EXPORT_SYMBOL_GPL(lock_release);
2397
2398/*
2399 * Used by the testsuite, sanitize the validator state
2400 * after a simulated failure:
2401 */
2402
2403void lockdep_reset(void)
2404{
2405 unsigned long flags;
2406
2407 raw_local_irq_save(flags);
2408 current->curr_chain_key = 0;
2409 current->lockdep_depth = 0;
2410 current->lockdep_recursion = 0;
2411 memset(current->held_locks, 0, MAX_LOCK_DEPTH*sizeof(struct held_lock));
2412 nr_hardirq_chains = 0;
2413 nr_softirq_chains = 0;
2414 nr_process_chains = 0;
2415 debug_locks = 1;
2416 raw_local_irq_restore(flags);
2417}
2418
2419static void zap_class(struct lock_class *class)
2420{
2421 int i;
2422
2423 /*
2424 * Remove all dependencies this lock is
2425 * involved in:
2426 */
2427 for (i = 0; i < nr_list_entries; i++) {
2428 if (list_entries[i].class == class)
2429 list_del_rcu(&list_entries[i].entry);
2430 }
2431 /*
2432 * Unhash the class and remove it from the all_lock_classes list:
2433 */
2434 list_del_rcu(&class->hash_entry);
2435 list_del_rcu(&class->lock_entry);
2436
2437}
2438
2439static inline int within(void *addr, void *start, unsigned long size)
2440{
2441 return addr >= start && addr < start + size;
2442}
2443
2444void lockdep_free_key_range(void *start, unsigned long size)
2445{
2446 struct lock_class *class, *next;
2447 struct list_head *head;
2448 unsigned long flags;
2449 int i;
2450
2451 raw_local_irq_save(flags);
2452 __raw_spin_lock(&hash_lock);
2453
2454 /*
2455 * Unhash all classes that were created by this module:
2456 */
2457 for (i = 0; i < CLASSHASH_SIZE; i++) {
2458 head = classhash_table + i;
2459 if (list_empty(head))
2460 continue;
2461 list_for_each_entry_safe(class, next, head, hash_entry)
2462 if (within(class->key, start, size))
2463 zap_class(class);
2464 }
2465
2466 __raw_spin_unlock(&hash_lock);
2467 raw_local_irq_restore(flags);
2468}
2469
2470void lockdep_reset_lock(struct lockdep_map *lock)
2471{
2472 struct lock_class *class, *next, *entry;
2473 struct list_head *head;
2474 unsigned long flags;
2475 int i, j;
2476
2477 raw_local_irq_save(flags);
2478 __raw_spin_lock(&hash_lock);
2479
2480 /*
2481 * Remove all classes this lock has:
2482 */
2483 for (i = 0; i < CLASSHASH_SIZE; i++) {
2484 head = classhash_table + i;
2485 if (list_empty(head))
2486 continue;
2487 list_for_each_entry_safe(class, next, head, hash_entry) {
2488 for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) {
2489 entry = lock->class[j];
2490 if (class == entry) {
2491 zap_class(class);
2492 lock->class[j] = NULL;
2493 break;
2494 }
2495 }
2496 }
2497 }
2498
2499 /*
2500 * Debug check: in the end all mapped classes should
2501 * be gone.
2502 */
2503 for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) {
2504 entry = lock->class[j];
2505 if (!entry)
2506 continue;
2507 __raw_spin_unlock(&hash_lock);
2508 DEBUG_LOCKS_WARN_ON(1);
2509 raw_local_irq_restore(flags);
2510 return;
2511 }
2512
2513 __raw_spin_unlock(&hash_lock);
2514 raw_local_irq_restore(flags);
2515}
2516
2517void __init lockdep_init(void)
2518{
2519 int i;
2520
2521 /*
2522 * Some architectures have their own start_kernel()
2523 * code which calls lockdep_init(), while we also
2524 * call lockdep_init() from the start_kernel() itself,
2525 * and we want to initialize the hashes only once:
2526 */
2527 if (lockdep_initialized)
2528 return;
2529
2530 for (i = 0; i < CLASSHASH_SIZE; i++)
2531 INIT_LIST_HEAD(classhash_table + i);
2532
2533 for (i = 0; i < CHAINHASH_SIZE; i++)
2534 INIT_LIST_HEAD(chainhash_table + i);
2535
2536 lockdep_initialized = 1;
2537}
2538
2539void __init lockdep_info(void)
2540{
2541 printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
2542
2543 printk("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES);
2544 printk("... MAX_LOCK_DEPTH: %lu\n", MAX_LOCK_DEPTH);
2545 printk("... MAX_LOCKDEP_KEYS: %lu\n", MAX_LOCKDEP_KEYS);
2546 printk("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE);
2547 printk("... MAX_LOCKDEP_ENTRIES: %lu\n", MAX_LOCKDEP_ENTRIES);
2548 printk("... MAX_LOCKDEP_CHAINS: %lu\n", MAX_LOCKDEP_CHAINS);
2549 printk("... CHAINHASH_SIZE: %lu\n", CHAINHASH_SIZE);
2550
2551 printk(" memory used by lock dependency info: %lu kB\n",
2552 (sizeof(struct lock_class) * MAX_LOCKDEP_KEYS +
2553 sizeof(struct list_head) * CLASSHASH_SIZE +
2554 sizeof(struct lock_list) * MAX_LOCKDEP_ENTRIES +
2555 sizeof(struct lock_chain) * MAX_LOCKDEP_CHAINS +
2556 sizeof(struct list_head) * CHAINHASH_SIZE) / 1024);
2557
2558 printk(" per task-struct memory footprint: %lu bytes\n",
2559 sizeof(struct held_lock) * MAX_LOCK_DEPTH);
2560
2561#ifdef CONFIG_DEBUG_LOCKDEP
2562 if (lockdep_init_error)
2563 printk("WARNING: lockdep init error! Arch code didnt call lockdep_init() early enough?\n");
2564#endif
2565}
2566
2567static inline int in_range(const void *start, const void *addr, const void *end)
2568{
2569 return addr >= start && addr <= end;
2570}
2571
2572static void
2573print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
2574 const void *mem_to)
2575{
2576 if (!debug_locks_off())
2577 return;
2578 if (debug_locks_silent)
2579 return;
2580
2581 printk("\n=========================\n");
2582 printk( "[ BUG: held lock freed! ]\n");
2583 printk( "-------------------------\n");
2584 printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
2585 curr->comm, curr->pid, mem_from, mem_to-1);
2586 lockdep_print_held_locks(curr);
2587
2588 printk("\nstack backtrace:\n");
2589 dump_stack();
2590}
2591
2592/*
2593 * Called when kernel memory is freed (or unmapped), or if a lock
2594 * is destroyed or reinitialized - this code checks whether there is
2595 * any held lock in the memory range of <from> to <to>:
2596 */
2597void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
2598{
2599 const void *mem_to = mem_from + mem_len, *lock_from, *lock_to;
2600 struct task_struct *curr = current;
2601 struct held_lock *hlock;
2602 unsigned long flags;
2603 int i;
2604
2605 if (unlikely(!debug_locks))
2606 return;
2607
2608 local_irq_save(flags);
2609 for (i = 0; i < curr->lockdep_depth; i++) {
2610 hlock = curr->held_locks + i;
2611
2612 lock_from = (void *)hlock->instance;
2613 lock_to = (void *)(hlock->instance + 1);
2614
2615 if (!in_range(mem_from, lock_from, mem_to) &&
2616 !in_range(mem_from, lock_to, mem_to))
2617 continue;
2618
2619 print_freed_lock_bug(curr, mem_from, mem_to);
2620 break;
2621 }
2622 local_irq_restore(flags);
2623}
2624
2625static void print_held_locks_bug(struct task_struct *curr)
2626{
2627 if (!debug_locks_off())
2628 return;
2629 if (debug_locks_silent)
2630 return;
2631
2632 printk("\n=====================================\n");
2633 printk( "[ BUG: lock held at task exit time! ]\n");
2634 printk( "-------------------------------------\n");
2635 printk("%s/%d is exiting with locks still held!\n",
2636 curr->comm, curr->pid);
2637 lockdep_print_held_locks(curr);
2638
2639 printk("\nstack backtrace:\n");
2640 dump_stack();
2641}
2642
2643void debug_check_no_locks_held(struct task_struct *task)
2644{
2645 if (unlikely(task->lockdep_depth > 0))
2646 print_held_locks_bug(task);
2647}
2648
2649void debug_show_all_locks(void)
2650{
2651 struct task_struct *g, *p;
2652 int count = 10;
2653 int unlock = 1;
2654
2655 printk("\nShowing all locks held in the system:\n");
2656
2657 /*
2658 * Here we try to get the tasklist_lock as hard as possible,
2659 * if not successful after 2 seconds we ignore it (but keep
2660 * trying). This is to enable a debug printout even if a
2661 * tasklist_lock-holding task deadlocks or crashes.
2662 */
2663retry:
2664 if (!read_trylock(&tasklist_lock)) {
2665 if (count == 10)
2666 printk("hm, tasklist_lock locked, retrying... ");
2667 if (count) {
2668 count--;
2669 printk(" #%d", 10-count);
2670 mdelay(200);
2671 goto retry;
2672 }
2673 printk(" ignoring it.\n");
2674 unlock = 0;
2675 }
2676 if (count != 10)
2677 printk(" locked it.\n");
2678
2679 do_each_thread(g, p) {
2680 if (p->lockdep_depth)
2681 lockdep_print_held_locks(p);
2682 if (!unlock)
2683 if (read_trylock(&tasklist_lock))
2684 unlock = 1;
2685 } while_each_thread(g, p);
2686
2687 printk("\n");
2688 printk("=============================================\n\n");
2689
2690 if (unlock)
2691 read_unlock(&tasklist_lock);
2692}
2693
2694EXPORT_SYMBOL_GPL(debug_show_all_locks);
2695
2696void debug_show_held_locks(struct task_struct *task)
2697{
2698 lockdep_print_held_locks(task);
2699}
2700
2701EXPORT_SYMBOL_GPL(debug_show_held_locks);
2702
diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h
new file mode 100644
index 000000000000..0d355f24fe04
--- /dev/null
+++ b/kernel/lockdep_internals.h
@@ -0,0 +1,78 @@
1/*
2 * kernel/lockdep_internals.h
3 *
4 * Runtime locking correctness validator
5 *
6 * lockdep subsystem internal functions and variables.
7 */
8
9/*
10 * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies
11 * we track.
12 *
13 * We use the per-lock dependency maps in two ways: we grow it by adding
14 * every to-be-taken lock to all currently held lock's own dependency
15 * table (if it's not there yet), and we check it for lock order
16 * conflicts and deadlocks.
17 */
18#define MAX_LOCKDEP_ENTRIES 8192UL
19
20#define MAX_LOCKDEP_KEYS_BITS 11
21#define MAX_LOCKDEP_KEYS (1UL << MAX_LOCKDEP_KEYS_BITS)
22
23#define MAX_LOCKDEP_CHAINS_BITS 13
24#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS)
25
26/*
27 * Stack-trace: tightly packed array of stack backtrace
28 * addresses. Protected by the hash_lock.
29 */
30#define MAX_STACK_TRACE_ENTRIES 131072UL
31
32extern struct list_head all_lock_classes;
33
34extern void
35get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4);
36
37extern const char * __get_key_name(struct lockdep_subclass_key *key, char *str);
38
39extern unsigned long nr_lock_classes;
40extern unsigned long nr_list_entries;
41extern unsigned long nr_lock_chains;
42extern unsigned long nr_stack_trace_entries;
43
44extern unsigned int nr_hardirq_chains;
45extern unsigned int nr_softirq_chains;
46extern unsigned int nr_process_chains;
47extern unsigned int max_lockdep_depth;
48extern unsigned int max_recursion_depth;
49
50#ifdef CONFIG_DEBUG_LOCKDEP
51/*
52 * Various lockdep statistics:
53 */
54extern atomic_t chain_lookup_hits;
55extern atomic_t chain_lookup_misses;
56extern atomic_t hardirqs_on_events;
57extern atomic_t hardirqs_off_events;
58extern atomic_t redundant_hardirqs_on;
59extern atomic_t redundant_hardirqs_off;
60extern atomic_t softirqs_on_events;
61extern atomic_t softirqs_off_events;
62extern atomic_t redundant_softirqs_on;
63extern atomic_t redundant_softirqs_off;
64extern atomic_t nr_unused_locks;
65extern atomic_t nr_cyclic_checks;
66extern atomic_t nr_cyclic_check_recursions;
67extern atomic_t nr_find_usage_forwards_checks;
68extern atomic_t nr_find_usage_forwards_recursions;
69extern atomic_t nr_find_usage_backwards_checks;
70extern atomic_t nr_find_usage_backwards_recursions;
71# define debug_atomic_inc(ptr) atomic_inc(ptr)
72# define debug_atomic_dec(ptr) atomic_dec(ptr)
73# define debug_atomic_read(ptr) atomic_read(ptr)
74#else
75# define debug_atomic_inc(ptr) do { } while (0)
76# define debug_atomic_dec(ptr) do { } while (0)
77# define debug_atomic_read(ptr) 0
78#endif
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
new file mode 100644
index 000000000000..f6e72eaab3fa
--- /dev/null
+++ b/kernel/lockdep_proc.c
@@ -0,0 +1,345 @@
1/*
2 * kernel/lockdep_proc.c
3 *
4 * Runtime locking correctness validator
5 *
6 * Started by Ingo Molnar:
7 *
8 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
9 *
10 * Code for /proc/lockdep and /proc/lockdep_stats:
11 *
12 */
13#include <linux/sched.h>
14#include <linux/module.h>
15#include <linux/proc_fs.h>
16#include <linux/seq_file.h>
17#include <linux/kallsyms.h>
18#include <linux/debug_locks.h>
19
20#include "lockdep_internals.h"
21
22static void *l_next(struct seq_file *m, void *v, loff_t *pos)
23{
24 struct lock_class *class = v;
25
26 (*pos)++;
27
28 if (class->lock_entry.next != &all_lock_classes)
29 class = list_entry(class->lock_entry.next, struct lock_class,
30 lock_entry);
31 else
32 class = NULL;
33 m->private = class;
34
35 return class;
36}
37
38static void *l_start(struct seq_file *m, loff_t *pos)
39{
40 struct lock_class *class = m->private;
41
42 if (&class->lock_entry == all_lock_classes.next)
43 seq_printf(m, "all lock classes:\n");
44
45 return class;
46}
47
48static void l_stop(struct seq_file *m, void *v)
49{
50}
51
52static unsigned long count_forward_deps(struct lock_class *class)
53{
54 struct lock_list *entry;
55 unsigned long ret = 1;
56
57 /*
58 * Recurse this class's dependency list:
59 */
60 list_for_each_entry(entry, &class->locks_after, entry)
61 ret += count_forward_deps(entry->class);
62
63 return ret;
64}
65
66static unsigned long count_backward_deps(struct lock_class *class)
67{
68 struct lock_list *entry;
69 unsigned long ret = 1;
70
71 /*
72 * Recurse this class's dependency list:
73 */
74 list_for_each_entry(entry, &class->locks_before, entry)
75 ret += count_backward_deps(entry->class);
76
77 return ret;
78}
79
80static int l_show(struct seq_file *m, void *v)
81{
82 unsigned long nr_forward_deps, nr_backward_deps;
83 struct lock_class *class = m->private;
84 char str[128], c1, c2, c3, c4;
85 const char *name;
86
87 seq_printf(m, "%p", class->key);
88#ifdef CONFIG_DEBUG_LOCKDEP
89 seq_printf(m, " OPS:%8ld", class->ops);
90#endif
91 nr_forward_deps = count_forward_deps(class);
92 seq_printf(m, " FD:%5ld", nr_forward_deps);
93
94 nr_backward_deps = count_backward_deps(class);
95 seq_printf(m, " BD:%5ld", nr_backward_deps);
96
97 get_usage_chars(class, &c1, &c2, &c3, &c4);
98 seq_printf(m, " %c%c%c%c", c1, c2, c3, c4);
99
100 name = class->name;
101 if (!name) {
102 name = __get_key_name(class->key, str);
103 seq_printf(m, ": %s", name);
104 } else{
105 seq_printf(m, ": %s", name);
106 if (class->name_version > 1)
107 seq_printf(m, "#%d", class->name_version);
108 if (class->subclass)
109 seq_printf(m, "/%d", class->subclass);
110 }
111 seq_puts(m, "\n");
112
113 return 0;
114}
115
116static struct seq_operations lockdep_ops = {
117 .start = l_start,
118 .next = l_next,
119 .stop = l_stop,
120 .show = l_show,
121};
122
123static int lockdep_open(struct inode *inode, struct file *file)
124{
125 int res = seq_open(file, &lockdep_ops);
126 if (!res) {
127 struct seq_file *m = file->private_data;
128
129 if (!list_empty(&all_lock_classes))
130 m->private = list_entry(all_lock_classes.next,
131 struct lock_class, lock_entry);
132 else
133 m->private = NULL;
134 }
135 return res;
136}
137
138static struct file_operations proc_lockdep_operations = {
139 .open = lockdep_open,
140 .read = seq_read,
141 .llseek = seq_lseek,
142 .release = seq_release,
143};
144
145static void lockdep_stats_debug_show(struct seq_file *m)
146{
147#ifdef CONFIG_DEBUG_LOCKDEP
148 unsigned int hi1 = debug_atomic_read(&hardirqs_on_events),
149 hi2 = debug_atomic_read(&hardirqs_off_events),
150 hr1 = debug_atomic_read(&redundant_hardirqs_on),
151 hr2 = debug_atomic_read(&redundant_hardirqs_off),
152 si1 = debug_atomic_read(&softirqs_on_events),
153 si2 = debug_atomic_read(&softirqs_off_events),
154 sr1 = debug_atomic_read(&redundant_softirqs_on),
155 sr2 = debug_atomic_read(&redundant_softirqs_off);
156
157 seq_printf(m, " chain lookup misses: %11u\n",
158 debug_atomic_read(&chain_lookup_misses));
159 seq_printf(m, " chain lookup hits: %11u\n",
160 debug_atomic_read(&chain_lookup_hits));
161 seq_printf(m, " cyclic checks: %11u\n",
162 debug_atomic_read(&nr_cyclic_checks));
163 seq_printf(m, " cyclic-check recursions: %11u\n",
164 debug_atomic_read(&nr_cyclic_check_recursions));
165 seq_printf(m, " find-mask forwards checks: %11u\n",
166 debug_atomic_read(&nr_find_usage_forwards_checks));
167 seq_printf(m, " find-mask forwards recursions: %11u\n",
168 debug_atomic_read(&nr_find_usage_forwards_recursions));
169 seq_printf(m, " find-mask backwards checks: %11u\n",
170 debug_atomic_read(&nr_find_usage_backwards_checks));
171 seq_printf(m, " find-mask backwards recursions:%11u\n",
172 debug_atomic_read(&nr_find_usage_backwards_recursions));
173
174 seq_printf(m, " hardirq on events: %11u\n", hi1);
175 seq_printf(m, " hardirq off events: %11u\n", hi2);
176 seq_printf(m, " redundant hardirq ons: %11u\n", hr1);
177 seq_printf(m, " redundant hardirq offs: %11u\n", hr2);
178 seq_printf(m, " softirq on events: %11u\n", si1);
179 seq_printf(m, " softirq off events: %11u\n", si2);
180 seq_printf(m, " redundant softirq ons: %11u\n", sr1);
181 seq_printf(m, " redundant softirq offs: %11u\n", sr2);
182#endif
183}
184
185static int lockdep_stats_show(struct seq_file *m, void *v)
186{
187 struct lock_class *class;
188 unsigned long nr_unused = 0, nr_uncategorized = 0,
189 nr_irq_safe = 0, nr_irq_unsafe = 0,
190 nr_softirq_safe = 0, nr_softirq_unsafe = 0,
191 nr_hardirq_safe = 0, nr_hardirq_unsafe = 0,
192 nr_irq_read_safe = 0, nr_irq_read_unsafe = 0,
193 nr_softirq_read_safe = 0, nr_softirq_read_unsafe = 0,
194 nr_hardirq_read_safe = 0, nr_hardirq_read_unsafe = 0,
195 sum_forward_deps = 0, factor = 0;
196
197 list_for_each_entry(class, &all_lock_classes, lock_entry) {
198
199 if (class->usage_mask == 0)
200 nr_unused++;
201 if (class->usage_mask == LOCKF_USED)
202 nr_uncategorized++;
203 if (class->usage_mask & LOCKF_USED_IN_IRQ)
204 nr_irq_safe++;
205 if (class->usage_mask & LOCKF_ENABLED_IRQS)
206 nr_irq_unsafe++;
207 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ)
208 nr_softirq_safe++;
209 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS)
210 nr_softirq_unsafe++;
211 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ)
212 nr_hardirq_safe++;
213 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS)
214 nr_hardirq_unsafe++;
215 if (class->usage_mask & LOCKF_USED_IN_IRQ_READ)
216 nr_irq_read_safe++;
217 if (class->usage_mask & LOCKF_ENABLED_IRQS_READ)
218 nr_irq_read_unsafe++;
219 if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ)
220 nr_softirq_read_safe++;
221 if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ)
222 nr_softirq_read_unsafe++;
223 if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ)
224 nr_hardirq_read_safe++;
225 if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ)
226 nr_hardirq_read_unsafe++;
227
228 sum_forward_deps += count_forward_deps(class);
229 }
230#ifdef CONFIG_LOCKDEP_DEBUG
231 DEBUG_LOCKS_WARN_ON(debug_atomic_read(&nr_unused_locks) != nr_unused);
232#endif
233 seq_printf(m, " lock-classes: %11lu [max: %lu]\n",
234 nr_lock_classes, MAX_LOCKDEP_KEYS);
235 seq_printf(m, " direct dependencies: %11lu [max: %lu]\n",
236 nr_list_entries, MAX_LOCKDEP_ENTRIES);
237 seq_printf(m, " indirect dependencies: %11lu\n",
238 sum_forward_deps);
239
240 /*
241 * Total number of dependencies:
242 *
243 * All irq-safe locks may nest inside irq-unsafe locks,
244 * plus all the other known dependencies:
245 */
246 seq_printf(m, " all direct dependencies: %11lu\n",
247 nr_irq_unsafe * nr_irq_safe +
248 nr_hardirq_unsafe * nr_hardirq_safe +
249 nr_list_entries);
250
251 /*
252 * Estimated factor between direct and indirect
253 * dependencies:
254 */
255 if (nr_list_entries)
256 factor = sum_forward_deps / nr_list_entries;
257
258 seq_printf(m, " dependency chains: %11lu [max: %lu]\n",
259 nr_lock_chains, MAX_LOCKDEP_CHAINS);
260
261#ifdef CONFIG_TRACE_IRQFLAGS
262 seq_printf(m, " in-hardirq chains: %11u\n",
263 nr_hardirq_chains);
264 seq_printf(m, " in-softirq chains: %11u\n",
265 nr_softirq_chains);
266#endif
267 seq_printf(m, " in-process chains: %11u\n",
268 nr_process_chains);
269 seq_printf(m, " stack-trace entries: %11lu [max: %lu]\n",
270 nr_stack_trace_entries, MAX_STACK_TRACE_ENTRIES);
271 seq_printf(m, " combined max dependencies: %11u\n",
272 (nr_hardirq_chains + 1) *
273 (nr_softirq_chains + 1) *
274 (nr_process_chains + 1)
275 );
276 seq_printf(m, " hardirq-safe locks: %11lu\n",
277 nr_hardirq_safe);
278 seq_printf(m, " hardirq-unsafe locks: %11lu\n",
279 nr_hardirq_unsafe);
280 seq_printf(m, " softirq-safe locks: %11lu\n",
281 nr_softirq_safe);
282 seq_printf(m, " softirq-unsafe locks: %11lu\n",
283 nr_softirq_unsafe);
284 seq_printf(m, " irq-safe locks: %11lu\n",
285 nr_irq_safe);
286 seq_printf(m, " irq-unsafe locks: %11lu\n",
287 nr_irq_unsafe);
288
289 seq_printf(m, " hardirq-read-safe locks: %11lu\n",
290 nr_hardirq_read_safe);
291 seq_printf(m, " hardirq-read-unsafe locks: %11lu\n",
292 nr_hardirq_read_unsafe);
293 seq_printf(m, " softirq-read-safe locks: %11lu\n",
294 nr_softirq_read_safe);
295 seq_printf(m, " softirq-read-unsafe locks: %11lu\n",
296 nr_softirq_read_unsafe);
297 seq_printf(m, " irq-read-safe locks: %11lu\n",
298 nr_irq_read_safe);
299 seq_printf(m, " irq-read-unsafe locks: %11lu\n",
300 nr_irq_read_unsafe);
301
302 seq_printf(m, " uncategorized locks: %11lu\n",
303 nr_uncategorized);
304 seq_printf(m, " unused locks: %11lu\n",
305 nr_unused);
306 seq_printf(m, " max locking depth: %11u\n",
307 max_lockdep_depth);
308 seq_printf(m, " max recursion depth: %11u\n",
309 max_recursion_depth);
310 lockdep_stats_debug_show(m);
311 seq_printf(m, " debug_locks: %11u\n",
312 debug_locks);
313
314 return 0;
315}
316
317static int lockdep_stats_open(struct inode *inode, struct file *file)
318{
319 return single_open(file, lockdep_stats_show, NULL);
320}
321
322static struct file_operations proc_lockdep_stats_operations = {
323 .open = lockdep_stats_open,
324 .read = seq_read,
325 .llseek = seq_lseek,
326 .release = seq_release,
327};
328
329static int __init lockdep_proc_init(void)
330{
331 struct proc_dir_entry *entry;
332
333 entry = create_proc_entry("lockdep", S_IRUSR, NULL);
334 if (entry)
335 entry->proc_fops = &proc_lockdep_operations;
336
337 entry = create_proc_entry("lockdep_stats", S_IRUSR, NULL);
338 if (entry)
339 entry->proc_fops = &proc_lockdep_stats_operations;
340
341 return 0;
342}
343
344__initcall(lockdep_proc_init);
345
diff --git a/kernel/module.c b/kernel/module.c
index 281172f01e9a..35e1b1f859d7 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1121,6 +1121,9 @@ static void free_module(struct module *mod)
1121 if (mod->percpu) 1121 if (mod->percpu)
1122 percpu_modfree(mod->percpu); 1122 percpu_modfree(mod->percpu);
1123 1123
1124 /* Free lock-classes: */
1125 lockdep_free_key_range(mod->module_core, mod->core_size);
1126
1124 /* Finally, free the core (containing the module structure) */ 1127 /* Finally, free the core (containing the module structure) */
1125 module_free(mod, mod->module_core); 1128 module_free(mod, mod->module_core);
1126} 1129}
@@ -2159,6 +2162,29 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)
2159 return e; 2162 return e;
2160} 2163}
2161 2164
2165/*
2166 * Is this a valid module address?
2167 */
2168int is_module_address(unsigned long addr)
2169{
2170 unsigned long flags;
2171 struct module *mod;
2172
2173 spin_lock_irqsave(&modlist_lock, flags);
2174
2175 list_for_each_entry(mod, &modules, list) {
2176 if (within(addr, mod->module_core, mod->core_size)) {
2177 spin_unlock_irqrestore(&modlist_lock, flags);
2178 return 1;
2179 }
2180 }
2181
2182 spin_unlock_irqrestore(&modlist_lock, flags);
2183
2184 return 0;
2185}
2186
2187
2162/* Is this a valid kernel address? We don't grab the lock: we are oopsing. */ 2188/* Is this a valid kernel address? We don't grab the lock: we are oopsing. */
2163struct module *__module_text_address(unsigned long addr) 2189struct module *__module_text_address(unsigned long addr)
2164{ 2190{
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index e38e4bac97ca..e3203c654dda 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -20,367 +20,19 @@
20#include <linux/spinlock.h> 20#include <linux/spinlock.h>
21#include <linux/kallsyms.h> 21#include <linux/kallsyms.h>
22#include <linux/interrupt.h> 22#include <linux/interrupt.h>
23#include <linux/debug_locks.h>
23 24
24#include "mutex-debug.h" 25#include "mutex-debug.h"
25 26
26/* 27/*
27 * We need a global lock when we walk through the multi-process
28 * lock tree. Only used in the deadlock-debugging case.
29 */
30DEFINE_SPINLOCK(debug_mutex_lock);
31
32/*
33 * All locks held by all tasks, in a single global list:
34 */
35LIST_HEAD(debug_mutex_held_locks);
36
37/*
38 * In the debug case we carry the caller's instruction pointer into
39 * other functions, but we dont want the function argument overhead
40 * in the nondebug case - hence these macros:
41 */
42#define __IP_DECL__ , unsigned long ip
43#define __IP__ , ip
44#define __RET_IP__ , (unsigned long)__builtin_return_address(0)
45
46/*
47 * "mutex debugging enabled" flag. We turn it off when we detect
48 * the first problem because we dont want to recurse back
49 * into the tracing code when doing error printk or
50 * executing a BUG():
51 */
52int debug_mutex_on = 1;
53
54static void printk_task(struct task_struct *p)
55{
56 if (p)
57 printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio);
58 else
59 printk("<none>");
60}
61
62static void printk_ti(struct thread_info *ti)
63{
64 if (ti)
65 printk_task(ti->task);
66 else
67 printk("<none>");
68}
69
70static void printk_task_short(struct task_struct *p)
71{
72 if (p)
73 printk("%s/%d [%p, %3d]", p->comm, p->pid, p, p->prio);
74 else
75 printk("<none>");
76}
77
78static void printk_lock(struct mutex *lock, int print_owner)
79{
80 printk(" [%p] {%s}\n", lock, lock->name);
81
82 if (print_owner && lock->owner) {
83 printk(".. held by: ");
84 printk_ti(lock->owner);
85 printk("\n");
86 }
87 if (lock->owner) {
88 printk("... acquired at: ");
89 print_symbol("%s\n", lock->acquire_ip);
90 }
91}
92
93/*
94 * printk locks held by a task:
95 */
96static void show_task_locks(struct task_struct *p)
97{
98 switch (p->state) {
99 case TASK_RUNNING: printk("R"); break;
100 case TASK_INTERRUPTIBLE: printk("S"); break;
101 case TASK_UNINTERRUPTIBLE: printk("D"); break;
102 case TASK_STOPPED: printk("T"); break;
103 case EXIT_ZOMBIE: printk("Z"); break;
104 case EXIT_DEAD: printk("X"); break;
105 default: printk("?"); break;
106 }
107 printk_task(p);
108 if (p->blocked_on) {
109 struct mutex *lock = p->blocked_on->lock;
110
111 printk(" blocked on mutex:");
112 printk_lock(lock, 1);
113 } else
114 printk(" (not blocked on mutex)\n");
115}
116
117/*
118 * printk all locks held in the system (if filter == NULL),
119 * or all locks belonging to a single task (if filter != NULL):
120 */
121void show_held_locks(struct task_struct *filter)
122{
123 struct list_head *curr, *cursor = NULL;
124 struct mutex *lock;
125 struct thread_info *t;
126 unsigned long flags;
127 int count = 0;
128
129 if (filter) {
130 printk("------------------------------\n");
131 printk("| showing all locks held by: | (");
132 printk_task_short(filter);
133 printk("):\n");
134 printk("------------------------------\n");
135 } else {
136 printk("---------------------------\n");
137 printk("| showing all locks held: |\n");
138 printk("---------------------------\n");
139 }
140
141 /*
142 * Play safe and acquire the global trace lock. We
143 * cannot printk with that lock held so we iterate
144 * very carefully:
145 */
146next:
147 debug_spin_lock_save(&debug_mutex_lock, flags);
148 list_for_each(curr, &debug_mutex_held_locks) {
149 if (cursor && curr != cursor)
150 continue;
151 lock = list_entry(curr, struct mutex, held_list);
152 t = lock->owner;
153 if (filter && (t != filter->thread_info))
154 continue;
155 count++;
156 cursor = curr->next;
157 debug_spin_unlock_restore(&debug_mutex_lock, flags);
158
159 printk("\n#%03d: ", count);
160 printk_lock(lock, filter ? 0 : 1);
161 goto next;
162 }
163 debug_spin_unlock_restore(&debug_mutex_lock, flags);
164 printk("\n");
165}
166
167void mutex_debug_show_all_locks(void)
168{
169 struct task_struct *g, *p;
170 int count = 10;
171 int unlock = 1;
172
173 printk("\nShowing all blocking locks in the system:\n");
174
175 /*
176 * Here we try to get the tasklist_lock as hard as possible,
177 * if not successful after 2 seconds we ignore it (but keep
178 * trying). This is to enable a debug printout even if a
179 * tasklist_lock-holding task deadlocks or crashes.
180 */
181retry:
182 if (!read_trylock(&tasklist_lock)) {
183 if (count == 10)
184 printk("hm, tasklist_lock locked, retrying... ");
185 if (count) {
186 count--;
187 printk(" #%d", 10-count);
188 mdelay(200);
189 goto retry;
190 }
191 printk(" ignoring it.\n");
192 unlock = 0;
193 }
194 if (count != 10)
195 printk(" locked it.\n");
196
197 do_each_thread(g, p) {
198 show_task_locks(p);
199 if (!unlock)
200 if (read_trylock(&tasklist_lock))
201 unlock = 1;
202 } while_each_thread(g, p);
203
204 printk("\n");
205 show_held_locks(NULL);
206 printk("=============================================\n\n");
207
208 if (unlock)
209 read_unlock(&tasklist_lock);
210}
211
212static void report_deadlock(struct task_struct *task, struct mutex *lock,
213 struct mutex *lockblk, unsigned long ip)
214{
215 printk("\n%s/%d is trying to acquire this lock:\n",
216 current->comm, current->pid);
217 printk_lock(lock, 1);
218 printk("... trying at: ");
219 print_symbol("%s\n", ip);
220 show_held_locks(current);
221
222 if (lockblk) {
223 printk("but %s/%d is deadlocking current task %s/%d!\n\n",
224 task->comm, task->pid, current->comm, current->pid);
225 printk("\n%s/%d is blocked on this lock:\n",
226 task->comm, task->pid);
227 printk_lock(lockblk, 1);
228
229 show_held_locks(task);
230
231 printk("\n%s/%d's [blocked] stackdump:\n\n",
232 task->comm, task->pid);
233 show_stack(task, NULL);
234 }
235
236 printk("\n%s/%d's [current] stackdump:\n\n",
237 current->comm, current->pid);
238 dump_stack();
239 mutex_debug_show_all_locks();
240 printk("[ turning off deadlock detection. Please report this. ]\n\n");
241 local_irq_disable();
242}
243
244/*
245 * Recursively check for mutex deadlocks:
246 */
247static int check_deadlock(struct mutex *lock, int depth,
248 struct thread_info *ti, unsigned long ip)
249{
250 struct mutex *lockblk;
251 struct task_struct *task;
252
253 if (!debug_mutex_on)
254 return 0;
255
256 ti = lock->owner;
257 if (!ti)
258 return 0;
259
260 task = ti->task;
261 lockblk = NULL;
262 if (task->blocked_on)
263 lockblk = task->blocked_on->lock;
264
265 /* Self-deadlock: */
266 if (current == task) {
267 DEBUG_OFF();
268 if (depth)
269 return 1;
270 printk("\n==========================================\n");
271 printk( "[ BUG: lock recursion deadlock detected! |\n");
272 printk( "------------------------------------------\n");
273 report_deadlock(task, lock, NULL, ip);
274 return 0;
275 }
276
277 /* Ugh, something corrupted the lock data structure? */
278 if (depth > 20) {
279 DEBUG_OFF();
280 printk("\n===========================================\n");
281 printk( "[ BUG: infinite lock dependency detected!? |\n");
282 printk( "-------------------------------------------\n");
283 report_deadlock(task, lock, lockblk, ip);
284 return 0;
285 }
286
287 /* Recursively check for dependencies: */
288 if (lockblk && check_deadlock(lockblk, depth+1, ti, ip)) {
289 printk("\n============================================\n");
290 printk( "[ BUG: circular locking deadlock detected! ]\n");
291 printk( "--------------------------------------------\n");
292 report_deadlock(task, lock, lockblk, ip);
293 return 0;
294 }
295 return 0;
296}
297
298/*
299 * Called when a task exits, this function checks whether the
300 * task is holding any locks, and reports the first one if so:
301 */
302void mutex_debug_check_no_locks_held(struct task_struct *task)
303{
304 struct list_head *curr, *next;
305 struct thread_info *t;
306 unsigned long flags;
307 struct mutex *lock;
308
309 if (!debug_mutex_on)
310 return;
311
312 debug_spin_lock_save(&debug_mutex_lock, flags);
313 list_for_each_safe(curr, next, &debug_mutex_held_locks) {
314 lock = list_entry(curr, struct mutex, held_list);
315 t = lock->owner;
316 if (t != task->thread_info)
317 continue;
318 list_del_init(curr);
319 DEBUG_OFF();
320 debug_spin_unlock_restore(&debug_mutex_lock, flags);
321
322 printk("BUG: %s/%d, lock held at task exit time!\n",
323 task->comm, task->pid);
324 printk_lock(lock, 1);
325 if (lock->owner != task->thread_info)
326 printk("exiting task is not even the owner??\n");
327 return;
328 }
329 debug_spin_unlock_restore(&debug_mutex_lock, flags);
330}
331
332/*
333 * Called when kernel memory is freed (or unmapped), or if a mutex
334 * is destroyed or reinitialized - this code checks whether there is
335 * any held lock in the memory range of <from> to <to>:
336 */
337void mutex_debug_check_no_locks_freed(const void *from, unsigned long len)
338{
339 struct list_head *curr, *next;
340 const void *to = from + len;
341 unsigned long flags;
342 struct mutex *lock;
343 void *lock_addr;
344
345 if (!debug_mutex_on)
346 return;
347
348 debug_spin_lock_save(&debug_mutex_lock, flags);
349 list_for_each_safe(curr, next, &debug_mutex_held_locks) {
350 lock = list_entry(curr, struct mutex, held_list);
351 lock_addr = lock;
352 if (lock_addr < from || lock_addr >= to)
353 continue;
354 list_del_init(curr);
355 DEBUG_OFF();
356 debug_spin_unlock_restore(&debug_mutex_lock, flags);
357
358 printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n",
359 current->comm, current->pid, lock, from, to);
360 dump_stack();
361 printk_lock(lock, 1);
362 if (lock->owner != current_thread_info())
363 printk("freeing task is not even the owner??\n");
364 return;
365 }
366 debug_spin_unlock_restore(&debug_mutex_lock, flags);
367}
368
369/*
370 * Must be called with lock->wait_lock held. 28 * Must be called with lock->wait_lock held.
371 */ 29 */
372void debug_mutex_set_owner(struct mutex *lock, 30void debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner)
373 struct thread_info *new_owner __IP_DECL__)
374{ 31{
375 lock->owner = new_owner; 32 lock->owner = new_owner;
376 DEBUG_WARN_ON(!list_empty(&lock->held_list));
377 if (debug_mutex_on) {
378 list_add_tail(&lock->held_list, &debug_mutex_held_locks);
379 lock->acquire_ip = ip;
380 }
381} 33}
382 34
383void debug_mutex_init_waiter(struct mutex_waiter *waiter) 35void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
384{ 36{
385 memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter)); 37 memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));
386 waiter->magic = waiter; 38 waiter->magic = waiter;
@@ -389,23 +41,23 @@ void debug_mutex_init_waiter(struct mutex_waiter *waiter)
389 41
390void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter) 42void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter)
391{ 43{
392 SMP_DEBUG_WARN_ON(!spin_is_locked(&lock->wait_lock)); 44 SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
393 DEBUG_WARN_ON(list_empty(&lock->wait_list)); 45 DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list));
394 DEBUG_WARN_ON(waiter->magic != waiter); 46 DEBUG_LOCKS_WARN_ON(waiter->magic != waiter);
395 DEBUG_WARN_ON(list_empty(&waiter->list)); 47 DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
396} 48}
397 49
398void debug_mutex_free_waiter(struct mutex_waiter *waiter) 50void debug_mutex_free_waiter(struct mutex_waiter *waiter)
399{ 51{
400 DEBUG_WARN_ON(!list_empty(&waiter->list)); 52 DEBUG_LOCKS_WARN_ON(!list_empty(&waiter->list));
401 memset(waiter, MUTEX_DEBUG_FREE, sizeof(*waiter)); 53 memset(waiter, MUTEX_DEBUG_FREE, sizeof(*waiter));
402} 54}
403 55
404void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, 56void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
405 struct thread_info *ti __IP_DECL__) 57 struct thread_info *ti)
406{ 58{
407 SMP_DEBUG_WARN_ON(!spin_is_locked(&lock->wait_lock)); 59 SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
408 check_deadlock(lock, 0, ti, ip); 60
409 /* Mark the current thread as blocked on the lock: */ 61 /* Mark the current thread as blocked on the lock: */
410 ti->task->blocked_on = waiter; 62 ti->task->blocked_on = waiter;
411 waiter->lock = lock; 63 waiter->lock = lock;
@@ -414,9 +66,9 @@ void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
414void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, 66void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
415 struct thread_info *ti) 67 struct thread_info *ti)
416{ 68{
417 DEBUG_WARN_ON(list_empty(&waiter->list)); 69 DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
418 DEBUG_WARN_ON(waiter->task != ti->task); 70 DEBUG_LOCKS_WARN_ON(waiter->task != ti->task);
419 DEBUG_WARN_ON(ti->task->blocked_on != waiter); 71 DEBUG_LOCKS_WARN_ON(ti->task->blocked_on != waiter);
420 ti->task->blocked_on = NULL; 72 ti->task->blocked_on = NULL;
421 73
422 list_del_init(&waiter->list); 74 list_del_init(&waiter->list);
@@ -425,24 +77,23 @@ void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
425 77
426void debug_mutex_unlock(struct mutex *lock) 78void debug_mutex_unlock(struct mutex *lock)
427{ 79{
428 DEBUG_WARN_ON(lock->magic != lock); 80 DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
429 DEBUG_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); 81 DEBUG_LOCKS_WARN_ON(lock->magic != lock);
430 DEBUG_WARN_ON(lock->owner != current_thread_info()); 82 DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
431 if (debug_mutex_on) { 83 DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
432 DEBUG_WARN_ON(list_empty(&lock->held_list));
433 list_del_init(&lock->held_list);
434 }
435} 84}
436 85
437void debug_mutex_init(struct mutex *lock, const char *name) 86void debug_mutex_init(struct mutex *lock, const char *name,
87 struct lock_class_key *key)
438{ 88{
89#ifdef CONFIG_DEBUG_LOCK_ALLOC
439 /* 90 /*
440 * Make sure we are not reinitializing a held lock: 91 * Make sure we are not reinitializing a held lock:
441 */ 92 */
442 mutex_debug_check_no_locks_freed((void *)lock, sizeof(*lock)); 93 debug_check_no_locks_freed((void *)lock, sizeof(*lock));
94 lockdep_init_map(&lock->dep_map, name, key);
95#endif
443 lock->owner = NULL; 96 lock->owner = NULL;
444 INIT_LIST_HEAD(&lock->held_list);
445 lock->name = name;
446 lock->magic = lock; 97 lock->magic = lock;
447} 98}
448 99
@@ -456,7 +107,7 @@ void debug_mutex_init(struct mutex *lock, const char *name)
456 */ 107 */
457void fastcall mutex_destroy(struct mutex *lock) 108void fastcall mutex_destroy(struct mutex *lock)
458{ 109{
459 DEBUG_WARN_ON(mutex_is_locked(lock)); 110 DEBUG_LOCKS_WARN_ON(mutex_is_locked(lock));
460 lock->magic = NULL; 111 lock->magic = NULL;
461} 112}
462 113
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h
index a5196c36a5fd..babfbdfc534b 100644
--- a/kernel/mutex-debug.h
+++ b/kernel/mutex-debug.h
@@ -10,110 +10,44 @@
10 * More details are in kernel/mutex-debug.c. 10 * More details are in kernel/mutex-debug.c.
11 */ 11 */
12 12
13extern spinlock_t debug_mutex_lock;
14extern struct list_head debug_mutex_held_locks;
15extern int debug_mutex_on;
16
17/*
18 * In the debug case we carry the caller's instruction pointer into
19 * other functions, but we dont want the function argument overhead
20 * in the nondebug case - hence these macros:
21 */
22#define __IP_DECL__ , unsigned long ip
23#define __IP__ , ip
24#define __RET_IP__ , (unsigned long)__builtin_return_address(0)
25
26/* 13/*
27 * This must be called with lock->wait_lock held. 14 * This must be called with lock->wait_lock held.
28 */ 15 */
29extern void debug_mutex_set_owner(struct mutex *lock, 16extern void
30 struct thread_info *new_owner __IP_DECL__); 17debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner);
31 18
32static inline void debug_mutex_clear_owner(struct mutex *lock) 19static inline void debug_mutex_clear_owner(struct mutex *lock)
33{ 20{
34 lock->owner = NULL; 21 lock->owner = NULL;
35} 22}
36 23
37extern void debug_mutex_init_waiter(struct mutex_waiter *waiter); 24extern void debug_mutex_lock_common(struct mutex *lock,
25 struct mutex_waiter *waiter);
38extern void debug_mutex_wake_waiter(struct mutex *lock, 26extern void debug_mutex_wake_waiter(struct mutex *lock,
39 struct mutex_waiter *waiter); 27 struct mutex_waiter *waiter);
40extern void debug_mutex_free_waiter(struct mutex_waiter *waiter); 28extern void debug_mutex_free_waiter(struct mutex_waiter *waiter);
41extern void debug_mutex_add_waiter(struct mutex *lock, 29extern void debug_mutex_add_waiter(struct mutex *lock,
42 struct mutex_waiter *waiter, 30 struct mutex_waiter *waiter,
43 struct thread_info *ti __IP_DECL__); 31 struct thread_info *ti);
44extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, 32extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
45 struct thread_info *ti); 33 struct thread_info *ti);
46extern void debug_mutex_unlock(struct mutex *lock); 34extern void debug_mutex_unlock(struct mutex *lock);
47extern void debug_mutex_init(struct mutex *lock, const char *name); 35extern void debug_mutex_init(struct mutex *lock, const char *name,
48 36 struct lock_class_key *key);
49#define debug_spin_lock_save(lock, flags) \
50 do { \
51 local_irq_save(flags); \
52 if (debug_mutex_on) \
53 spin_lock(lock); \
54 } while (0)
55
56#define debug_spin_unlock_restore(lock, flags) \
57 do { \
58 if (debug_mutex_on) \
59 spin_unlock(lock); \
60 local_irq_restore(flags); \
61 preempt_check_resched(); \
62 } while (0)
63 37
64#define spin_lock_mutex(lock, flags) \ 38#define spin_lock_mutex(lock, flags) \
65 do { \ 39 do { \
66 struct mutex *l = container_of(lock, struct mutex, wait_lock); \ 40 struct mutex *l = container_of(lock, struct mutex, wait_lock); \
67 \ 41 \
68 DEBUG_WARN_ON(in_interrupt()); \ 42 DEBUG_LOCKS_WARN_ON(in_interrupt()); \
69 debug_spin_lock_save(&debug_mutex_lock, flags); \ 43 local_irq_save(flags); \
70 spin_lock(lock); \ 44 __raw_spin_lock(&(lock)->raw_lock); \
71 DEBUG_WARN_ON(l->magic != l); \ 45 DEBUG_LOCKS_WARN_ON(l->magic != l); \
72 } while (0) 46 } while (0)
73 47
74#define spin_unlock_mutex(lock, flags) \ 48#define spin_unlock_mutex(lock, flags) \
75 do { \ 49 do { \
76 spin_unlock(lock); \ 50 __raw_spin_unlock(&(lock)->raw_lock); \
77 debug_spin_unlock_restore(&debug_mutex_lock, flags); \ 51 local_irq_restore(flags); \
52 preempt_check_resched(); \
78 } while (0) 53 } while (0)
79
80#define DEBUG_OFF() \
81do { \
82 if (debug_mutex_on) { \
83 debug_mutex_on = 0; \
84 console_verbose(); \
85 if (spin_is_locked(&debug_mutex_lock)) \
86 spin_unlock(&debug_mutex_lock); \
87 } \
88} while (0)
89
90#define DEBUG_BUG() \
91do { \
92 if (debug_mutex_on) { \
93 DEBUG_OFF(); \
94 BUG(); \
95 } \
96} while (0)
97
98#define DEBUG_WARN_ON(c) \
99do { \
100 if (unlikely(c && debug_mutex_on)) { \
101 DEBUG_OFF(); \
102 WARN_ON(1); \
103 } \
104} while (0)
105
106# define DEBUG_BUG_ON(c) \
107do { \
108 if (unlikely(c)) \
109 DEBUG_BUG(); \
110} while (0)
111
112#ifdef CONFIG_SMP
113# define SMP_DEBUG_WARN_ON(c) DEBUG_WARN_ON(c)
114# define SMP_DEBUG_BUG_ON(c) DEBUG_BUG_ON(c)
115#else
116# define SMP_DEBUG_WARN_ON(c) do { } while (0)
117# define SMP_DEBUG_BUG_ON(c) do { } while (0)
118#endif
119
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 7043db21bbce..8c71cf72a497 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -17,6 +17,7 @@
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/spinlock.h> 18#include <linux/spinlock.h>
19#include <linux/interrupt.h> 19#include <linux/interrupt.h>
20#include <linux/debug_locks.h>
20 21
21/* 22/*
22 * In the DEBUG case we are using the "NULL fastpath" for mutexes, 23 * In the DEBUG case we are using the "NULL fastpath" for mutexes,
@@ -38,13 +39,14 @@
38 * 39 *
39 * It is not allowed to initialize an already locked mutex. 40 * It is not allowed to initialize an already locked mutex.
40 */ 41 */
41void fastcall __mutex_init(struct mutex *lock, const char *name) 42void
43__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
42{ 44{
43 atomic_set(&lock->count, 1); 45 atomic_set(&lock->count, 1);
44 spin_lock_init(&lock->wait_lock); 46 spin_lock_init(&lock->wait_lock);
45 INIT_LIST_HEAD(&lock->wait_list); 47 INIT_LIST_HEAD(&lock->wait_list);
46 48
47 debug_mutex_init(lock, name); 49 debug_mutex_init(lock, name, key);
48} 50}
49 51
50EXPORT_SYMBOL(__mutex_init); 52EXPORT_SYMBOL(__mutex_init);
@@ -56,7 +58,7 @@ EXPORT_SYMBOL(__mutex_init);
56 * branch is predicted by the CPU as default-untaken. 58 * branch is predicted by the CPU as default-untaken.
57 */ 59 */
58static void fastcall noinline __sched 60static void fastcall noinline __sched
59__mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__); 61__mutex_lock_slowpath(atomic_t *lock_count);
60 62
61/*** 63/***
62 * mutex_lock - acquire the mutex 64 * mutex_lock - acquire the mutex
@@ -79,7 +81,7 @@ __mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__);
79 * 81 *
80 * This function is similar to (but not equivalent to) down(). 82 * This function is similar to (but not equivalent to) down().
81 */ 83 */
82void fastcall __sched mutex_lock(struct mutex *lock) 84void inline fastcall __sched mutex_lock(struct mutex *lock)
83{ 85{
84 might_sleep(); 86 might_sleep();
85 /* 87 /*
@@ -92,7 +94,7 @@ void fastcall __sched mutex_lock(struct mutex *lock)
92EXPORT_SYMBOL(mutex_lock); 94EXPORT_SYMBOL(mutex_lock);
93 95
94static void fastcall noinline __sched 96static void fastcall noinline __sched
95__mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__); 97__mutex_unlock_slowpath(atomic_t *lock_count);
96 98
97/*** 99/***
98 * mutex_unlock - release the mutex 100 * mutex_unlock - release the mutex
@@ -120,18 +122,18 @@ EXPORT_SYMBOL(mutex_unlock);
120 * Lock a mutex (possibly interruptible), slowpath: 122 * Lock a mutex (possibly interruptible), slowpath:
121 */ 123 */
122static inline int __sched 124static inline int __sched
123__mutex_lock_common(struct mutex *lock, long state __IP_DECL__) 125__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass)
124{ 126{
125 struct task_struct *task = current; 127 struct task_struct *task = current;
126 struct mutex_waiter waiter; 128 struct mutex_waiter waiter;
127 unsigned int old_val; 129 unsigned int old_val;
128 unsigned long flags; 130 unsigned long flags;
129 131
130 debug_mutex_init_waiter(&waiter);
131
132 spin_lock_mutex(&lock->wait_lock, flags); 132 spin_lock_mutex(&lock->wait_lock, flags);
133 133
134 debug_mutex_add_waiter(lock, &waiter, task->thread_info, ip); 134 debug_mutex_lock_common(lock, &waiter);
135 mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
136 debug_mutex_add_waiter(lock, &waiter, task->thread_info);
135 137
136 /* add waiting tasks to the end of the waitqueue (FIFO): */ 138 /* add waiting tasks to the end of the waitqueue (FIFO): */
137 list_add_tail(&waiter.list, &lock->wait_list); 139 list_add_tail(&waiter.list, &lock->wait_list);
@@ -158,6 +160,7 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
158 if (unlikely(state == TASK_INTERRUPTIBLE && 160 if (unlikely(state == TASK_INTERRUPTIBLE &&
159 signal_pending(task))) { 161 signal_pending(task))) {
160 mutex_remove_waiter(lock, &waiter, task->thread_info); 162 mutex_remove_waiter(lock, &waiter, task->thread_info);
163 mutex_release(&lock->dep_map, 1, _RET_IP_);
161 spin_unlock_mutex(&lock->wait_lock, flags); 164 spin_unlock_mutex(&lock->wait_lock, flags);
162 165
163 debug_mutex_free_waiter(&waiter); 166 debug_mutex_free_waiter(&waiter);
@@ -173,7 +176,7 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
173 176
174 /* got the lock - rejoice! */ 177 /* got the lock - rejoice! */
175 mutex_remove_waiter(lock, &waiter, task->thread_info); 178 mutex_remove_waiter(lock, &waiter, task->thread_info);
176 debug_mutex_set_owner(lock, task->thread_info __IP__); 179 debug_mutex_set_owner(lock, task->thread_info);
177 180
178 /* set it to 0 if there are no waiters left: */ 181 /* set it to 0 if there are no waiters left: */
179 if (likely(list_empty(&lock->wait_list))) 182 if (likely(list_empty(&lock->wait_list)))
@@ -183,32 +186,40 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
183 186
184 debug_mutex_free_waiter(&waiter); 187 debug_mutex_free_waiter(&waiter);
185 188
186 DEBUG_WARN_ON(list_empty(&lock->held_list));
187 DEBUG_WARN_ON(lock->owner != task->thread_info);
188
189 return 0; 189 return 0;
190} 190}
191 191
192static void fastcall noinline __sched 192static void fastcall noinline __sched
193__mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__) 193__mutex_lock_slowpath(atomic_t *lock_count)
194{ 194{
195 struct mutex *lock = container_of(lock_count, struct mutex, count); 195 struct mutex *lock = container_of(lock_count, struct mutex, count);
196 196
197 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE __IP__); 197 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0);
198}
199
200#ifdef CONFIG_DEBUG_LOCK_ALLOC
201void __sched
202mutex_lock_nested(struct mutex *lock, unsigned int subclass)
203{
204 might_sleep();
205 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass);
198} 206}
199 207
208EXPORT_SYMBOL_GPL(mutex_lock_nested);
209#endif
210
200/* 211/*
201 * Release the lock, slowpath: 212 * Release the lock, slowpath:
202 */ 213 */
203static fastcall noinline void 214static fastcall inline void
204__mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__) 215__mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
205{ 216{
206 struct mutex *lock = container_of(lock_count, struct mutex, count); 217 struct mutex *lock = container_of(lock_count, struct mutex, count);
207 unsigned long flags; 218 unsigned long flags;
208 219
209 DEBUG_WARN_ON(lock->owner != current_thread_info());
210
211 spin_lock_mutex(&lock->wait_lock, flags); 220 spin_lock_mutex(&lock->wait_lock, flags);
221 mutex_release(&lock->dep_map, nested, _RET_IP_);
222 debug_mutex_unlock(lock);
212 223
213 /* 224 /*
214 * some architectures leave the lock unlocked in the fastpath failure 225 * some architectures leave the lock unlocked in the fastpath failure
@@ -218,8 +229,6 @@ __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__)
218 if (__mutex_slowpath_needs_to_unlock()) 229 if (__mutex_slowpath_needs_to_unlock())
219 atomic_set(&lock->count, 1); 230 atomic_set(&lock->count, 1);
220 231
221 debug_mutex_unlock(lock);
222
223 if (!list_empty(&lock->wait_list)) { 232 if (!list_empty(&lock->wait_list)) {
224 /* get the first entry from the wait-list: */ 233 /* get the first entry from the wait-list: */
225 struct mutex_waiter *waiter = 234 struct mutex_waiter *waiter =
@@ -237,11 +246,20 @@ __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__)
237} 246}
238 247
239/* 248/*
249 * Release the lock, slowpath:
250 */
251static fastcall noinline void
252__mutex_unlock_slowpath(atomic_t *lock_count)
253{
254 __mutex_unlock_common_slowpath(lock_count, 1);
255}
256
257/*
240 * Here come the less common (and hence less performance-critical) APIs: 258 * Here come the less common (and hence less performance-critical) APIs:
241 * mutex_lock_interruptible() and mutex_trylock(). 259 * mutex_lock_interruptible() and mutex_trylock().
242 */ 260 */
243static int fastcall noinline __sched 261static int fastcall noinline __sched
244__mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__); 262__mutex_lock_interruptible_slowpath(atomic_t *lock_count);
245 263
246/*** 264/***
247 * mutex_lock_interruptible - acquire the mutex, interruptable 265 * mutex_lock_interruptible - acquire the mutex, interruptable
@@ -264,11 +282,11 @@ int fastcall __sched mutex_lock_interruptible(struct mutex *lock)
264EXPORT_SYMBOL(mutex_lock_interruptible); 282EXPORT_SYMBOL(mutex_lock_interruptible);
265 283
266static int fastcall noinline __sched 284static int fastcall noinline __sched
267__mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__) 285__mutex_lock_interruptible_slowpath(atomic_t *lock_count)
268{ 286{
269 struct mutex *lock = container_of(lock_count, struct mutex, count); 287 struct mutex *lock = container_of(lock_count, struct mutex, count);
270 288
271 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE __IP__); 289 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0);
272} 290}
273 291
274/* 292/*
@@ -284,8 +302,10 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
284 spin_lock_mutex(&lock->wait_lock, flags); 302 spin_lock_mutex(&lock->wait_lock, flags);
285 303
286 prev = atomic_xchg(&lock->count, -1); 304 prev = atomic_xchg(&lock->count, -1);
287 if (likely(prev == 1)) 305 if (likely(prev == 1)) {
288 debug_mutex_set_owner(lock, current_thread_info() __RET_IP__); 306 debug_mutex_set_owner(lock, current_thread_info());
307 mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
308 }
289 /* Set it back to 0 if there are no waiters: */ 309 /* Set it back to 0 if there are no waiters: */
290 if (likely(list_empty(&lock->wait_list))) 310 if (likely(list_empty(&lock->wait_list)))
291 atomic_set(&lock->count, 0); 311 atomic_set(&lock->count, 0);
@@ -309,7 +329,7 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
309 * This function must not be used in interrupt context. The 329 * This function must not be used in interrupt context. The
310 * mutex must be released by the same task that acquired it. 330 * mutex must be released by the same task that acquired it.
311 */ 331 */
312int fastcall mutex_trylock(struct mutex *lock) 332int fastcall __sched mutex_trylock(struct mutex *lock)
313{ 333{
314 return __mutex_fastpath_trylock(&lock->count, 334 return __mutex_fastpath_trylock(&lock->count,
315 __mutex_trylock_slowpath); 335 __mutex_trylock_slowpath);
diff --git a/kernel/mutex.h b/kernel/mutex.h
index 069189947257..a075dafbb290 100644
--- a/kernel/mutex.h
+++ b/kernel/mutex.h
@@ -16,22 +16,15 @@
16#define mutex_remove_waiter(lock, waiter, ti) \ 16#define mutex_remove_waiter(lock, waiter, ti) \
17 __list_del((waiter)->list.prev, (waiter)->list.next) 17 __list_del((waiter)->list.prev, (waiter)->list.next)
18 18
19#define DEBUG_WARN_ON(c) do { } while (0)
20#define debug_mutex_set_owner(lock, new_owner) do { } while (0) 19#define debug_mutex_set_owner(lock, new_owner) do { } while (0)
21#define debug_mutex_clear_owner(lock) do { } while (0) 20#define debug_mutex_clear_owner(lock) do { } while (0)
22#define debug_mutex_init_waiter(waiter) do { } while (0)
23#define debug_mutex_wake_waiter(lock, waiter) do { } while (0) 21#define debug_mutex_wake_waiter(lock, waiter) do { } while (0)
24#define debug_mutex_free_waiter(waiter) do { } while (0) 22#define debug_mutex_free_waiter(waiter) do { } while (0)
25#define debug_mutex_add_waiter(lock, waiter, ti, ip) do { } while (0) 23#define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0)
26#define debug_mutex_unlock(lock) do { } while (0) 24#define debug_mutex_unlock(lock) do { } while (0)
27#define debug_mutex_init(lock, name) do { } while (0) 25#define debug_mutex_init(lock, name, key) do { } while (0)
28
29/*
30 * Return-address parameters/declarations. They are very useful for
31 * debugging, but add overhead in the !DEBUG case - so we go the
32 * trouble of using this not too elegant but zero-cost solution:
33 */
34#define __IP_DECL__
35#define __IP__
36#define __RET_IP__
37 26
27static inline void
28debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
29{
30}
diff --git a/kernel/pid.c b/kernel/pid.c
index eeb836b65ca4..93e212f20671 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -218,7 +218,7 @@ struct pid * fastcall find_pid(int nr)
218 return NULL; 218 return NULL;
219} 219}
220 220
221int fastcall attach_pid(task_t *task, enum pid_type type, int nr) 221int fastcall attach_pid(struct task_struct *task, enum pid_type type, int nr)
222{ 222{
223 struct pid_link *link; 223 struct pid_link *link;
224 struct pid *pid; 224 struct pid *pid;
@@ -233,7 +233,7 @@ int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
233 return 0; 233 return 0;
234} 234}
235 235
236void fastcall detach_pid(task_t *task, enum pid_type type) 236void fastcall detach_pid(struct task_struct *task, enum pid_type type)
237{ 237{
238 struct pid_link *link; 238 struct pid_link *link;
239 struct pid *pid; 239 struct pid *pid;
@@ -267,7 +267,7 @@ struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type)
267/* 267/*
268 * Must be called under rcu_read_lock() or with tasklist_lock read-held. 268 * Must be called under rcu_read_lock() or with tasklist_lock read-held.
269 */ 269 */
270task_t *find_task_by_pid_type(int type, int nr) 270struct task_struct *find_task_by_pid_type(int type, int nr)
271{ 271{
272 return pid_task(find_pid(nr), type); 272 return pid_task(find_pid(nr), type);
273} 273}
diff --git a/kernel/printk.c b/kernel/printk.c
index 39ae24d2a415..bdba5d80496c 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -518,7 +518,9 @@ asmlinkage int vprintk(const char *fmt, va_list args)
518 zap_locks(); 518 zap_locks();
519 519
520 /* This stops the holder of console_sem just where we want him */ 520 /* This stops the holder of console_sem just where we want him */
521 spin_lock_irqsave(&logbuf_lock, flags); 521 local_irq_save(flags);
522 lockdep_off();
523 spin_lock(&logbuf_lock);
522 printk_cpu = smp_processor_id(); 524 printk_cpu = smp_processor_id();
523 525
524 /* Emit the output into the temporary buffer */ 526 /* Emit the output into the temporary buffer */
@@ -588,7 +590,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
588 */ 590 */
589 console_locked = 1; 591 console_locked = 1;
590 printk_cpu = UINT_MAX; 592 printk_cpu = UINT_MAX;
591 spin_unlock_irqrestore(&logbuf_lock, flags); 593 spin_unlock(&logbuf_lock);
592 594
593 /* 595 /*
594 * Console drivers may assume that per-cpu resources have 596 * Console drivers may assume that per-cpu resources have
@@ -604,6 +606,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
604 console_locked = 0; 606 console_locked = 0;
605 up(&console_sem); 607 up(&console_sem);
606 } 608 }
609 lockdep_on();
610 local_irq_restore(flags);
607 } else { 611 } else {
608 /* 612 /*
609 * Someone else owns the drivers. We drop the spinlock, which 613 * Someone else owns the drivers. We drop the spinlock, which
@@ -611,7 +615,9 @@ asmlinkage int vprintk(const char *fmt, va_list args)
611 * console drivers with the output which we just produced. 615 * console drivers with the output which we just produced.
612 */ 616 */
613 printk_cpu = UINT_MAX; 617 printk_cpu = UINT_MAX;
614 spin_unlock_irqrestore(&logbuf_lock, flags); 618 spin_unlock(&logbuf_lock);
619 lockdep_on();
620 local_irq_restore(flags);
615 } 621 }
616 622
617 preempt_enable(); 623 preempt_enable();
@@ -809,8 +815,15 @@ void release_console_sem(void)
809 console_may_schedule = 0; 815 console_may_schedule = 0;
810 up(&console_sem); 816 up(&console_sem);
811 spin_unlock_irqrestore(&logbuf_lock, flags); 817 spin_unlock_irqrestore(&logbuf_lock, flags);
812 if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) 818 if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) {
813 wake_up_interruptible(&log_wait); 819 /*
820 * If we printk from within the lock dependency code,
821 * from within the scheduler code, then do not lock
822 * up due to self-recursion:
823 */
824 if (!lockdep_internal())
825 wake_up_interruptible(&log_wait);
826 }
814} 827}
815EXPORT_SYMBOL(release_console_sem); 828EXPORT_SYMBOL(release_console_sem);
816 829
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 335c5b932e14..9a111f70145c 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -28,7 +28,7 @@
28 * 28 *
29 * Must be called with the tasklist lock write-held. 29 * Must be called with the tasklist lock write-held.
30 */ 30 */
31void __ptrace_link(task_t *child, task_t *new_parent) 31void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
32{ 32{
33 BUG_ON(!list_empty(&child->ptrace_list)); 33 BUG_ON(!list_empty(&child->ptrace_list));
34 if (child->parent == new_parent) 34 if (child->parent == new_parent)
@@ -46,7 +46,7 @@ void __ptrace_link(task_t *child, task_t *new_parent)
46 * TASK_TRACED, resume it now. 46 * TASK_TRACED, resume it now.
47 * Requires that irqs be disabled. 47 * Requires that irqs be disabled.
48 */ 48 */
49void ptrace_untrace(task_t *child) 49void ptrace_untrace(struct task_struct *child)
50{ 50{
51 spin_lock(&child->sighand->siglock); 51 spin_lock(&child->sighand->siglock);
52 if (child->state == TASK_TRACED) { 52 if (child->state == TASK_TRACED) {
@@ -65,7 +65,7 @@ void ptrace_untrace(task_t *child)
65 * 65 *
66 * Must be called with the tasklist lock write-held. 66 * Must be called with the tasklist lock write-held.
67 */ 67 */
68void __ptrace_unlink(task_t *child) 68void __ptrace_unlink(struct task_struct *child)
69{ 69{
70 BUG_ON(!child->ptrace); 70 BUG_ON(!child->ptrace);
71 71
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index f464f5ae3f11..759805c9859a 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -53,13 +53,13 @@
53static struct rcu_ctrlblk rcu_ctrlblk = { 53static struct rcu_ctrlblk rcu_ctrlblk = {
54 .cur = -300, 54 .cur = -300,
55 .completed = -300, 55 .completed = -300,
56 .lock = SPIN_LOCK_UNLOCKED, 56 .lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
57 .cpumask = CPU_MASK_NONE, 57 .cpumask = CPU_MASK_NONE,
58}; 58};
59static struct rcu_ctrlblk rcu_bh_ctrlblk = { 59static struct rcu_ctrlblk rcu_bh_ctrlblk = {
60 .cur = -300, 60 .cur = -300,
61 .completed = -300, 61 .completed = -300,
62 .lock = SPIN_LOCK_UNLOCKED, 62 .lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
63 .cpumask = CPU_MASK_NONE, 63 .cpumask = CPU_MASK_NONE,
64}; 64};
65 65
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index 4aa8a2c9f453..0c1faa950af7 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -26,6 +26,7 @@
26#include <linux/interrupt.h> 26#include <linux/interrupt.h>
27#include <linux/plist.h> 27#include <linux/plist.h>
28#include <linux/fs.h> 28#include <linux/fs.h>
29#include <linux/debug_locks.h>
29 30
30#include "rtmutex_common.h" 31#include "rtmutex_common.h"
31 32
@@ -45,8 +46,6 @@ do { \
45 console_verbose(); \ 46 console_verbose(); \
46 if (spin_is_locked(&current->pi_lock)) \ 47 if (spin_is_locked(&current->pi_lock)) \
47 spin_unlock(&current->pi_lock); \ 48 spin_unlock(&current->pi_lock); \
48 if (spin_is_locked(&current->held_list_lock)) \
49 spin_unlock(&current->held_list_lock); \
50 } \ 49 } \
51} while (0) 50} while (0)
52 51
@@ -97,7 +96,7 @@ void deadlock_trace_off(void)
97 rt_trace_on = 0; 96 rt_trace_on = 0;
98} 97}
99 98
100static void printk_task(task_t *p) 99static void printk_task(struct task_struct *p)
101{ 100{
102 if (p) 101 if (p)
103 printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio); 102 printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio);
@@ -105,14 +104,6 @@ static void printk_task(task_t *p)
105 printk("<none>"); 104 printk("<none>");
106} 105}
107 106
108static void printk_task_short(task_t *p)
109{
110 if (p)
111 printk("%s/%d [%p, %3d]", p->comm, p->pid, p, p->prio);
112 else
113 printk("<none>");
114}
115
116static void printk_lock(struct rt_mutex *lock, int print_owner) 107static void printk_lock(struct rt_mutex *lock, int print_owner)
117{ 108{
118 if (lock->name) 109 if (lock->name)
@@ -128,222 +119,6 @@ static void printk_lock(struct rt_mutex *lock, int print_owner)
128 printk_task(rt_mutex_owner(lock)); 119 printk_task(rt_mutex_owner(lock));
129 printk("\n"); 120 printk("\n");
130 } 121 }
131 if (rt_mutex_owner(lock)) {
132 printk("... acquired at: ");
133 print_symbol("%s\n", lock->acquire_ip);
134 }
135}
136
137static void printk_waiter(struct rt_mutex_waiter *w)
138{
139 printk("-------------------------\n");
140 printk("| waiter struct %p:\n", w);
141 printk("| w->list_entry: [DP:%p/%p|SP:%p/%p|PRI:%d]\n",
142 w->list_entry.plist.prio_list.prev, w->list_entry.plist.prio_list.next,
143 w->list_entry.plist.node_list.prev, w->list_entry.plist.node_list.next,
144 w->list_entry.prio);
145 printk("| w->pi_list_entry: [DP:%p/%p|SP:%p/%p|PRI:%d]\n",
146 w->pi_list_entry.plist.prio_list.prev, w->pi_list_entry.plist.prio_list.next,
147 w->pi_list_entry.plist.node_list.prev, w->pi_list_entry.plist.node_list.next,
148 w->pi_list_entry.prio);
149 printk("\n| lock:\n");
150 printk_lock(w->lock, 1);
151 printk("| w->ti->task:\n");
152 printk_task(w->task);
153 printk("| blocked at: ");
154 print_symbol("%s\n", w->ip);
155 printk("-------------------------\n");
156}
157
158static void show_task_locks(task_t *p)
159{
160 switch (p->state) {
161 case TASK_RUNNING: printk("R"); break;
162 case TASK_INTERRUPTIBLE: printk("S"); break;
163 case TASK_UNINTERRUPTIBLE: printk("D"); break;
164 case TASK_STOPPED: printk("T"); break;
165 case EXIT_ZOMBIE: printk("Z"); break;
166 case EXIT_DEAD: printk("X"); break;
167 default: printk("?"); break;
168 }
169 printk_task(p);
170 if (p->pi_blocked_on) {
171 struct rt_mutex *lock = p->pi_blocked_on->lock;
172
173 printk(" blocked on:");
174 printk_lock(lock, 1);
175 } else
176 printk(" (not blocked)\n");
177}
178
179void rt_mutex_show_held_locks(task_t *task, int verbose)
180{
181 struct list_head *curr, *cursor = NULL;
182 struct rt_mutex *lock;
183 task_t *t;
184 unsigned long flags;
185 int count = 0;
186
187 if (!rt_trace_on)
188 return;
189
190 if (verbose) {
191 printk("------------------------------\n");
192 printk("| showing all locks held by: | (");
193 printk_task_short(task);
194 printk("):\n");
195 printk("------------------------------\n");
196 }
197
198next:
199 spin_lock_irqsave(&task->held_list_lock, flags);
200 list_for_each(curr, &task->held_list_head) {
201 if (cursor && curr != cursor)
202 continue;
203 lock = list_entry(curr, struct rt_mutex, held_list_entry);
204 t = rt_mutex_owner(lock);
205 WARN_ON(t != task);
206 count++;
207 cursor = curr->next;
208 spin_unlock_irqrestore(&task->held_list_lock, flags);
209
210 printk("\n#%03d: ", count);
211 printk_lock(lock, 0);
212 goto next;
213 }
214 spin_unlock_irqrestore(&task->held_list_lock, flags);
215
216 printk("\n");
217}
218
219void rt_mutex_show_all_locks(void)
220{
221 task_t *g, *p;
222 int count = 10;
223 int unlock = 1;
224
225 printk("\n");
226 printk("----------------------\n");
227 printk("| showing all tasks: |\n");
228 printk("----------------------\n");
229
230 /*
231 * Here we try to get the tasklist_lock as hard as possible,
232 * if not successful after 2 seconds we ignore it (but keep
233 * trying). This is to enable a debug printout even if a
234 * tasklist_lock-holding task deadlocks or crashes.
235 */
236retry:
237 if (!read_trylock(&tasklist_lock)) {
238 if (count == 10)
239 printk("hm, tasklist_lock locked, retrying... ");
240 if (count) {
241 count--;
242 printk(" #%d", 10-count);
243 mdelay(200);
244 goto retry;
245 }
246 printk(" ignoring it.\n");
247 unlock = 0;
248 }
249 if (count != 10)
250 printk(" locked it.\n");
251
252 do_each_thread(g, p) {
253 show_task_locks(p);
254 if (!unlock)
255 if (read_trylock(&tasklist_lock))
256 unlock = 1;
257 } while_each_thread(g, p);
258
259 printk("\n");
260
261 printk("-----------------------------------------\n");
262 printk("| showing all locks held in the system: |\n");
263 printk("-----------------------------------------\n");
264
265 do_each_thread(g, p) {
266 rt_mutex_show_held_locks(p, 0);
267 if (!unlock)
268 if (read_trylock(&tasklist_lock))
269 unlock = 1;
270 } while_each_thread(g, p);
271
272
273 printk("=============================================\n\n");
274
275 if (unlock)
276 read_unlock(&tasklist_lock);
277}
278
279void rt_mutex_debug_check_no_locks_held(task_t *task)
280{
281 struct rt_mutex_waiter *w;
282 struct list_head *curr;
283 struct rt_mutex *lock;
284
285 if (!rt_trace_on)
286 return;
287 if (!rt_prio(task->normal_prio) && rt_prio(task->prio)) {
288 printk("BUG: PI priority boost leaked!\n");
289 printk_task(task);
290 printk("\n");
291 }
292 if (list_empty(&task->held_list_head))
293 return;
294
295 spin_lock(&task->pi_lock);
296 plist_for_each_entry(w, &task->pi_waiters, pi_list_entry) {
297 TRACE_OFF();
298
299 printk("hm, PI interest held at exit time? Task:\n");
300 printk_task(task);
301 printk_waiter(w);
302 return;
303 }
304 spin_unlock(&task->pi_lock);
305
306 list_for_each(curr, &task->held_list_head) {
307 lock = list_entry(curr, struct rt_mutex, held_list_entry);
308
309 printk("BUG: %s/%d, lock held at task exit time!\n",
310 task->comm, task->pid);
311 printk_lock(lock, 1);
312 if (rt_mutex_owner(lock) != task)
313 printk("exiting task is not even the owner??\n");
314 }
315}
316
317int rt_mutex_debug_check_no_locks_freed(const void *from, unsigned long len)
318{
319 const void *to = from + len;
320 struct list_head *curr;
321 struct rt_mutex *lock;
322 unsigned long flags;
323 void *lock_addr;
324
325 if (!rt_trace_on)
326 return 0;
327
328 spin_lock_irqsave(&current->held_list_lock, flags);
329 list_for_each(curr, &current->held_list_head) {
330 lock = list_entry(curr, struct rt_mutex, held_list_entry);
331 lock_addr = lock;
332 if (lock_addr < from || lock_addr >= to)
333 continue;
334 TRACE_OFF();
335
336 printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n",
337 current->comm, current->pid, lock, from, to);
338 dump_stack();
339 printk_lock(lock, 1);
340 if (rt_mutex_owner(lock) != current)
341 printk("freeing task is not even the owner??\n");
342 return 1;
343 }
344 spin_unlock_irqrestore(&current->held_list_lock, flags);
345
346 return 0;
347} 122}
348 123
349void rt_mutex_debug_task_free(struct task_struct *task) 124void rt_mutex_debug_task_free(struct task_struct *task)
@@ -395,85 +170,41 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter)
395 current->comm, current->pid); 170 current->comm, current->pid);
396 printk_lock(waiter->lock, 1); 171 printk_lock(waiter->lock, 1);
397 172
398 printk("... trying at: ");
399 print_symbol("%s\n", waiter->ip);
400
401 printk("\n2) %s/%d is blocked on this lock:\n", task->comm, task->pid); 173 printk("\n2) %s/%d is blocked on this lock:\n", task->comm, task->pid);
402 printk_lock(waiter->deadlock_lock, 1); 174 printk_lock(waiter->deadlock_lock, 1);
403 175
404 rt_mutex_show_held_locks(current, 1); 176 debug_show_held_locks(current);
405 rt_mutex_show_held_locks(task, 1); 177 debug_show_held_locks(task);
406 178
407 printk("\n%s/%d's [blocked] stackdump:\n\n", task->comm, task->pid); 179 printk("\n%s/%d's [blocked] stackdump:\n\n", task->comm, task->pid);
408 show_stack(task, NULL); 180 show_stack(task, NULL);
409 printk("\n%s/%d's [current] stackdump:\n\n", 181 printk("\n%s/%d's [current] stackdump:\n\n",
410 current->comm, current->pid); 182 current->comm, current->pid);
411 dump_stack(); 183 dump_stack();
412 rt_mutex_show_all_locks(); 184 debug_show_all_locks();
185
413 printk("[ turning off deadlock detection." 186 printk("[ turning off deadlock detection."
414 "Please report this trace. ]\n\n"); 187 "Please report this trace. ]\n\n");
415 local_irq_disable(); 188 local_irq_disable();
416} 189}
417 190
418void debug_rt_mutex_lock(struct rt_mutex *lock __IP_DECL__) 191void debug_rt_mutex_lock(struct rt_mutex *lock)
419{ 192{
420 unsigned long flags;
421
422 if (rt_trace_on) {
423 TRACE_WARN_ON_LOCKED(!list_empty(&lock->held_list_entry));
424
425 spin_lock_irqsave(&current->held_list_lock, flags);
426 list_add_tail(&lock->held_list_entry, &current->held_list_head);
427 spin_unlock_irqrestore(&current->held_list_lock, flags);
428
429 lock->acquire_ip = ip;
430 }
431} 193}
432 194
433void debug_rt_mutex_unlock(struct rt_mutex *lock) 195void debug_rt_mutex_unlock(struct rt_mutex *lock)
434{ 196{
435 unsigned long flags; 197 TRACE_WARN_ON_LOCKED(rt_mutex_owner(lock) != current);
436
437 if (rt_trace_on) {
438 TRACE_WARN_ON_LOCKED(rt_mutex_owner(lock) != current);
439 TRACE_WARN_ON_LOCKED(list_empty(&lock->held_list_entry));
440
441 spin_lock_irqsave(&current->held_list_lock, flags);
442 list_del_init(&lock->held_list_entry);
443 spin_unlock_irqrestore(&current->held_list_lock, flags);
444 }
445} 198}
446 199
447void debug_rt_mutex_proxy_lock(struct rt_mutex *lock, 200void
448 struct task_struct *powner __IP_DECL__) 201debug_rt_mutex_proxy_lock(struct rt_mutex *lock, struct task_struct *powner)
449{ 202{
450 unsigned long flags;
451
452 if (rt_trace_on) {
453 TRACE_WARN_ON_LOCKED(!list_empty(&lock->held_list_entry));
454
455 spin_lock_irqsave(&powner->held_list_lock, flags);
456 list_add_tail(&lock->held_list_entry, &powner->held_list_head);
457 spin_unlock_irqrestore(&powner->held_list_lock, flags);
458
459 lock->acquire_ip = ip;
460 }
461} 203}
462 204
463void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock) 205void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock)
464{ 206{
465 unsigned long flags; 207 TRACE_WARN_ON_LOCKED(!rt_mutex_owner(lock));
466
467 if (rt_trace_on) {
468 struct task_struct *owner = rt_mutex_owner(lock);
469
470 TRACE_WARN_ON_LOCKED(!owner);
471 TRACE_WARN_ON_LOCKED(list_empty(&lock->held_list_entry));
472
473 spin_lock_irqsave(&owner->held_list_lock, flags);
474 list_del_init(&lock->held_list_entry);
475 spin_unlock_irqrestore(&owner->held_list_lock, flags);
476 }
477} 208}
478 209
479void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) 210void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
@@ -493,17 +224,15 @@ void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
493 224
494void debug_rt_mutex_init(struct rt_mutex *lock, const char *name) 225void debug_rt_mutex_init(struct rt_mutex *lock, const char *name)
495{ 226{
496 void *addr = lock; 227 /*
497 228 * Make sure we are not reinitializing a held lock:
498 if (rt_trace_on) { 229 */
499 rt_mutex_debug_check_no_locks_freed(addr, 230 debug_check_no_locks_freed((void *)lock, sizeof(*lock));
500 sizeof(struct rt_mutex)); 231 lock->name = name;
501 INIT_LIST_HEAD(&lock->held_list_entry);
502 lock->name = name;
503 }
504} 232}
505 233
506void rt_mutex_deadlock_account_lock(struct rt_mutex *lock, task_t *task) 234void
235rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task)
507{ 236{
508} 237}
509 238
diff --git a/kernel/rtmutex-debug.h b/kernel/rtmutex-debug.h
index 7612fbc62d70..14193d596d78 100644
--- a/kernel/rtmutex-debug.h
+++ b/kernel/rtmutex-debug.h
@@ -9,20 +9,16 @@
9 * This file contains macros used solely by rtmutex.c. Debug version. 9 * This file contains macros used solely by rtmutex.c. Debug version.
10 */ 10 */
11 11
12#define __IP_DECL__ , unsigned long ip
13#define __IP__ , ip
14#define __RET_IP__ , (unsigned long)__builtin_return_address(0)
15
16extern void 12extern void
17rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task); 13rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task);
18extern void rt_mutex_deadlock_account_unlock(struct task_struct *task); 14extern void rt_mutex_deadlock_account_unlock(struct task_struct *task);
19extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); 15extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
20extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter); 16extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter);
21extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name); 17extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name);
22extern void debug_rt_mutex_lock(struct rt_mutex *lock __IP_DECL__); 18extern void debug_rt_mutex_lock(struct rt_mutex *lock);
23extern void debug_rt_mutex_unlock(struct rt_mutex *lock); 19extern void debug_rt_mutex_unlock(struct rt_mutex *lock);
24extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock, 20extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock,
25 struct task_struct *powner __IP_DECL__); 21 struct task_struct *powner);
26extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock); 22extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock);
27extern void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *waiter, 23extern void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *waiter,
28 struct rt_mutex *lock); 24 struct rt_mutex *lock);
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index e82c2f848249..494dac872a13 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -33,7 +33,7 @@ struct test_thread_data {
33}; 33};
34 34
35static struct test_thread_data thread_data[MAX_RT_TEST_THREADS]; 35static struct test_thread_data thread_data[MAX_RT_TEST_THREADS];
36static task_t *threads[MAX_RT_TEST_THREADS]; 36static struct task_struct *threads[MAX_RT_TEST_THREADS];
37static struct rt_mutex mutexes[MAX_RT_TEST_MUTEXES]; 37static struct rt_mutex mutexes[MAX_RT_TEST_MUTEXES];
38 38
39enum test_opcodes { 39enum test_opcodes {
@@ -361,8 +361,8 @@ static ssize_t sysfs_test_command(struct sys_device *dev, const char *buf,
361static ssize_t sysfs_test_status(struct sys_device *dev, char *buf) 361static ssize_t sysfs_test_status(struct sys_device *dev, char *buf)
362{ 362{
363 struct test_thread_data *td; 363 struct test_thread_data *td;
364 struct task_struct *tsk;
364 char *curr = buf; 365 char *curr = buf;
365 task_t *tsk;
366 int i; 366 int i;
367 367
368 td = container_of(dev, struct test_thread_data, sysdev); 368 td = container_of(dev, struct test_thread_data, sysdev);
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 45d61016da57..d2ef13b485e7 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -157,12 +157,11 @@ int max_lock_depth = 1024;
157 * Decreases task's usage by one - may thus free the task. 157 * Decreases task's usage by one - may thus free the task.
158 * Returns 0 or -EDEADLK. 158 * Returns 0 or -EDEADLK.
159 */ 159 */
160static int rt_mutex_adjust_prio_chain(task_t *task, 160static int rt_mutex_adjust_prio_chain(struct task_struct *task,
161 int deadlock_detect, 161 int deadlock_detect,
162 struct rt_mutex *orig_lock, 162 struct rt_mutex *orig_lock,
163 struct rt_mutex_waiter *orig_waiter, 163 struct rt_mutex_waiter *orig_waiter,
164 struct task_struct *top_task 164 struct task_struct *top_task)
165 __IP_DECL__)
166{ 165{
167 struct rt_mutex *lock; 166 struct rt_mutex *lock;
168 struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; 167 struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
@@ -283,6 +282,7 @@ static int rt_mutex_adjust_prio_chain(task_t *task,
283 spin_unlock_irqrestore(&task->pi_lock, flags); 282 spin_unlock_irqrestore(&task->pi_lock, flags);
284 out_put_task: 283 out_put_task:
285 put_task_struct(task); 284 put_task_struct(task);
285
286 return ret; 286 return ret;
287} 287}
288 288
@@ -357,7 +357,7 @@ static inline int try_to_steal_lock(struct rt_mutex *lock)
357 * 357 *
358 * Must be called with lock->wait_lock held. 358 * Must be called with lock->wait_lock held.
359 */ 359 */
360static int try_to_take_rt_mutex(struct rt_mutex *lock __IP_DECL__) 360static int try_to_take_rt_mutex(struct rt_mutex *lock)
361{ 361{
362 /* 362 /*
363 * We have to be careful here if the atomic speedups are 363 * We have to be careful here if the atomic speedups are
@@ -384,7 +384,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock __IP_DECL__)
384 return 0; 384 return 0;
385 385
386 /* We got the lock. */ 386 /* We got the lock. */
387 debug_rt_mutex_lock(lock __IP__); 387 debug_rt_mutex_lock(lock);
388 388
389 rt_mutex_set_owner(lock, current, 0); 389 rt_mutex_set_owner(lock, current, 0);
390 390
@@ -402,13 +402,12 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock __IP_DECL__)
402 */ 402 */
403static int task_blocks_on_rt_mutex(struct rt_mutex *lock, 403static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
404 struct rt_mutex_waiter *waiter, 404 struct rt_mutex_waiter *waiter,
405 int detect_deadlock 405 int detect_deadlock)
406 __IP_DECL__)
407{ 406{
407 struct task_struct *owner = rt_mutex_owner(lock);
408 struct rt_mutex_waiter *top_waiter = waiter; 408 struct rt_mutex_waiter *top_waiter = waiter;
409 task_t *owner = rt_mutex_owner(lock);
410 int boost = 0, res;
411 unsigned long flags; 409 unsigned long flags;
410 int boost = 0, res;
412 411
413 spin_lock_irqsave(&current->pi_lock, flags); 412 spin_lock_irqsave(&current->pi_lock, flags);
414 __rt_mutex_adjust_prio(current); 413 __rt_mutex_adjust_prio(current);
@@ -454,7 +453,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
454 spin_unlock(&lock->wait_lock); 453 spin_unlock(&lock->wait_lock);
455 454
456 res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, 455 res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
457 current __IP__); 456 current);
458 457
459 spin_lock(&lock->wait_lock); 458 spin_lock(&lock->wait_lock);
460 459
@@ -526,12 +525,12 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
526 * Must be called with lock->wait_lock held 525 * Must be called with lock->wait_lock held
527 */ 526 */
528static void remove_waiter(struct rt_mutex *lock, 527static void remove_waiter(struct rt_mutex *lock,
529 struct rt_mutex_waiter *waiter __IP_DECL__) 528 struct rt_mutex_waiter *waiter)
530{ 529{
531 int first = (waiter == rt_mutex_top_waiter(lock)); 530 int first = (waiter == rt_mutex_top_waiter(lock));
532 int boost = 0; 531 struct task_struct *owner = rt_mutex_owner(lock);
533 task_t *owner = rt_mutex_owner(lock);
534 unsigned long flags; 532 unsigned long flags;
533 int boost = 0;
535 534
536 spin_lock_irqsave(&current->pi_lock, flags); 535 spin_lock_irqsave(&current->pi_lock, flags);
537 plist_del(&waiter->list_entry, &lock->wait_list); 536 plist_del(&waiter->list_entry, &lock->wait_list);
@@ -568,7 +567,7 @@ static void remove_waiter(struct rt_mutex *lock,
568 567
569 spin_unlock(&lock->wait_lock); 568 spin_unlock(&lock->wait_lock);
570 569
571 rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current __IP__); 570 rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);
572 571
573 spin_lock(&lock->wait_lock); 572 spin_lock(&lock->wait_lock);
574} 573}
@@ -595,7 +594,7 @@ void rt_mutex_adjust_pi(struct task_struct *task)
595 get_task_struct(task); 594 get_task_struct(task);
596 spin_unlock_irqrestore(&task->pi_lock, flags); 595 spin_unlock_irqrestore(&task->pi_lock, flags);
597 596
598 rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task __RET_IP__); 597 rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
599} 598}
600 599
601/* 600/*
@@ -604,7 +603,7 @@ void rt_mutex_adjust_pi(struct task_struct *task)
604static int __sched 603static int __sched
605rt_mutex_slowlock(struct rt_mutex *lock, int state, 604rt_mutex_slowlock(struct rt_mutex *lock, int state,
606 struct hrtimer_sleeper *timeout, 605 struct hrtimer_sleeper *timeout,
607 int detect_deadlock __IP_DECL__) 606 int detect_deadlock)
608{ 607{
609 struct rt_mutex_waiter waiter; 608 struct rt_mutex_waiter waiter;
610 int ret = 0; 609 int ret = 0;
@@ -615,7 +614,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
615 spin_lock(&lock->wait_lock); 614 spin_lock(&lock->wait_lock);
616 615
617 /* Try to acquire the lock again: */ 616 /* Try to acquire the lock again: */
618 if (try_to_take_rt_mutex(lock __IP__)) { 617 if (try_to_take_rt_mutex(lock)) {
619 spin_unlock(&lock->wait_lock); 618 spin_unlock(&lock->wait_lock);
620 return 0; 619 return 0;
621 } 620 }
@@ -629,7 +628,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
629 628
630 for (;;) { 629 for (;;) {
631 /* Try to acquire the lock: */ 630 /* Try to acquire the lock: */
632 if (try_to_take_rt_mutex(lock __IP__)) 631 if (try_to_take_rt_mutex(lock))
633 break; 632 break;
634 633
635 /* 634 /*
@@ -653,7 +652,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
653 */ 652 */
654 if (!waiter.task) { 653 if (!waiter.task) {
655 ret = task_blocks_on_rt_mutex(lock, &waiter, 654 ret = task_blocks_on_rt_mutex(lock, &waiter,
656 detect_deadlock __IP__); 655 detect_deadlock);
657 /* 656 /*
658 * If we got woken up by the owner then start loop 657 * If we got woken up by the owner then start loop
659 * all over without going into schedule to try 658 * all over without going into schedule to try
@@ -680,7 +679,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
680 set_current_state(TASK_RUNNING); 679 set_current_state(TASK_RUNNING);
681 680
682 if (unlikely(waiter.task)) 681 if (unlikely(waiter.task))
683 remove_waiter(lock, &waiter __IP__); 682 remove_waiter(lock, &waiter);
684 683
685 /* 684 /*
686 * try_to_take_rt_mutex() sets the waiter bit 685 * try_to_take_rt_mutex() sets the waiter bit
@@ -711,7 +710,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
711 * Slow path try-lock function: 710 * Slow path try-lock function:
712 */ 711 */
713static inline int 712static inline int
714rt_mutex_slowtrylock(struct rt_mutex *lock __IP_DECL__) 713rt_mutex_slowtrylock(struct rt_mutex *lock)
715{ 714{
716 int ret = 0; 715 int ret = 0;
717 716
@@ -719,7 +718,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock __IP_DECL__)
719 718
720 if (likely(rt_mutex_owner(lock) != current)) { 719 if (likely(rt_mutex_owner(lock) != current)) {
721 720
722 ret = try_to_take_rt_mutex(lock __IP__); 721 ret = try_to_take_rt_mutex(lock);
723 /* 722 /*
724 * try_to_take_rt_mutex() sets the lock waiters 723 * try_to_take_rt_mutex() sets the lock waiters
725 * bit unconditionally. Clean this up. 724 * bit unconditionally. Clean this up.
@@ -769,13 +768,13 @@ rt_mutex_fastlock(struct rt_mutex *lock, int state,
769 int detect_deadlock, 768 int detect_deadlock,
770 int (*slowfn)(struct rt_mutex *lock, int state, 769 int (*slowfn)(struct rt_mutex *lock, int state,
771 struct hrtimer_sleeper *timeout, 770 struct hrtimer_sleeper *timeout,
772 int detect_deadlock __IP_DECL__)) 771 int detect_deadlock))
773{ 772{
774 if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) { 773 if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
775 rt_mutex_deadlock_account_lock(lock, current); 774 rt_mutex_deadlock_account_lock(lock, current);
776 return 0; 775 return 0;
777 } else 776 } else
778 return slowfn(lock, state, NULL, detect_deadlock __RET_IP__); 777 return slowfn(lock, state, NULL, detect_deadlock);
779} 778}
780 779
781static inline int 780static inline int
@@ -783,24 +782,24 @@ rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
783 struct hrtimer_sleeper *timeout, int detect_deadlock, 782 struct hrtimer_sleeper *timeout, int detect_deadlock,
784 int (*slowfn)(struct rt_mutex *lock, int state, 783 int (*slowfn)(struct rt_mutex *lock, int state,
785 struct hrtimer_sleeper *timeout, 784 struct hrtimer_sleeper *timeout,
786 int detect_deadlock __IP_DECL__)) 785 int detect_deadlock))
787{ 786{
788 if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) { 787 if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
789 rt_mutex_deadlock_account_lock(lock, current); 788 rt_mutex_deadlock_account_lock(lock, current);
790 return 0; 789 return 0;
791 } else 790 } else
792 return slowfn(lock, state, timeout, detect_deadlock __RET_IP__); 791 return slowfn(lock, state, timeout, detect_deadlock);
793} 792}
794 793
795static inline int 794static inline int
796rt_mutex_fasttrylock(struct rt_mutex *lock, 795rt_mutex_fasttrylock(struct rt_mutex *lock,
797 int (*slowfn)(struct rt_mutex *lock __IP_DECL__)) 796 int (*slowfn)(struct rt_mutex *lock))
798{ 797{
799 if (likely(rt_mutex_cmpxchg(lock, NULL, current))) { 798 if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
800 rt_mutex_deadlock_account_lock(lock, current); 799 rt_mutex_deadlock_account_lock(lock, current);
801 return 1; 800 return 1;
802 } 801 }
803 return slowfn(lock __RET_IP__); 802 return slowfn(lock);
804} 803}
805 804
806static inline void 805static inline void
@@ -948,7 +947,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
948 struct task_struct *proxy_owner) 947 struct task_struct *proxy_owner)
949{ 948{
950 __rt_mutex_init(lock, NULL); 949 __rt_mutex_init(lock, NULL);
951 debug_rt_mutex_proxy_lock(lock, proxy_owner __RET_IP__); 950 debug_rt_mutex_proxy_lock(lock, proxy_owner);
952 rt_mutex_set_owner(lock, proxy_owner, 0); 951 rt_mutex_set_owner(lock, proxy_owner, 0);
953 rt_mutex_deadlock_account_lock(lock, proxy_owner); 952 rt_mutex_deadlock_account_lock(lock, proxy_owner);
954} 953}
diff --git a/kernel/rtmutex.h b/kernel/rtmutex.h
index 1e0fca13ff72..a1a1dd06421d 100644
--- a/kernel/rtmutex.h
+++ b/kernel/rtmutex.h
@@ -10,9 +10,6 @@
10 * Non-debug version. 10 * Non-debug version.
11 */ 11 */
12 12
13#define __IP_DECL__
14#define __IP__
15#define __RET_IP__
16#define rt_mutex_deadlock_check(l) (0) 13#define rt_mutex_deadlock_check(l) (0)
17#define rt_mutex_deadlock_account_lock(m, t) do { } while (0) 14#define rt_mutex_deadlock_account_lock(m, t) do { } while (0)
18#define rt_mutex_deadlock_account_unlock(l) do { } while (0) 15#define rt_mutex_deadlock_account_unlock(l) do { } while (0)
diff --git a/kernel/rwsem.c b/kernel/rwsem.c
new file mode 100644
index 000000000000..291ded556aa0
--- /dev/null
+++ b/kernel/rwsem.c
@@ -0,0 +1,147 @@
1/* kernel/rwsem.c: R/W semaphores, public implementation
2 *
3 * Written by David Howells (dhowells@redhat.com).
4 * Derived from asm-i386/semaphore.h
5 */
6
7#include <linux/types.h>
8#include <linux/kernel.h>
9#include <linux/module.h>
10#include <linux/rwsem.h>
11
12#include <asm/system.h>
13#include <asm/atomic.h>
14
15/*
16 * lock for reading
17 */
18void down_read(struct rw_semaphore *sem)
19{
20 might_sleep();
21 rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
22
23 __down_read(sem);
24}
25
26EXPORT_SYMBOL(down_read);
27
28/*
29 * trylock for reading -- returns 1 if successful, 0 if contention
30 */
31int down_read_trylock(struct rw_semaphore *sem)
32{
33 int ret = __down_read_trylock(sem);
34
35 if (ret == 1)
36 rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
37 return ret;
38}
39
40EXPORT_SYMBOL(down_read_trylock);
41
42/*
43 * lock for writing
44 */
45void down_write(struct rw_semaphore *sem)
46{
47 might_sleep();
48 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
49
50 __down_write(sem);
51}
52
53EXPORT_SYMBOL(down_write);
54
55/*
56 * trylock for writing -- returns 1 if successful, 0 if contention
57 */
58int down_write_trylock(struct rw_semaphore *sem)
59{
60 int ret = __down_write_trylock(sem);
61
62 if (ret == 1)
63 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
64 return ret;
65}
66
67EXPORT_SYMBOL(down_write_trylock);
68
69/*
70 * release a read lock
71 */
72void up_read(struct rw_semaphore *sem)
73{
74 rwsem_release(&sem->dep_map, 1, _RET_IP_);
75
76 __up_read(sem);
77}
78
79EXPORT_SYMBOL(up_read);
80
81/*
82 * release a write lock
83 */
84void up_write(struct rw_semaphore *sem)
85{
86 rwsem_release(&sem->dep_map, 1, _RET_IP_);
87
88 __up_write(sem);
89}
90
91EXPORT_SYMBOL(up_write);
92
93/*
94 * downgrade write lock to read lock
95 */
96void downgrade_write(struct rw_semaphore *sem)
97{
98 /*
99 * lockdep: a downgraded write will live on as a write
100 * dependency.
101 */
102 __downgrade_write(sem);
103}
104
105EXPORT_SYMBOL(downgrade_write);
106
107#ifdef CONFIG_DEBUG_LOCK_ALLOC
108
109void down_read_nested(struct rw_semaphore *sem, int subclass)
110{
111 might_sleep();
112 rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
113
114 __down_read(sem);
115}
116
117EXPORT_SYMBOL(down_read_nested);
118
119void down_read_non_owner(struct rw_semaphore *sem)
120{
121 might_sleep();
122
123 __down_read(sem);
124}
125
126EXPORT_SYMBOL(down_read_non_owner);
127
128void down_write_nested(struct rw_semaphore *sem, int subclass)
129{
130 might_sleep();
131 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
132
133 __down_write_nested(sem, subclass);
134}
135
136EXPORT_SYMBOL(down_write_nested);
137
138void up_read_non_owner(struct rw_semaphore *sem)
139{
140 __up_read(sem);
141}
142
143EXPORT_SYMBOL(up_read_non_owner);
144
145#endif
146
147
diff --git a/kernel/sched.c b/kernel/sched.c
index d5e37072ea54..4ee400f9d56b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -30,6 +30,7 @@
30#include <linux/capability.h> 30#include <linux/capability.h>
31#include <linux/completion.h> 31#include <linux/completion.h>
32#include <linux/kernel_stat.h> 32#include <linux/kernel_stat.h>
33#include <linux/debug_locks.h>
33#include <linux/security.h> 34#include <linux/security.h>
34#include <linux/notifier.h> 35#include <linux/notifier.h>
35#include <linux/profile.h> 36#include <linux/profile.h>
@@ -178,20 +179,15 @@ static unsigned int static_prio_timeslice(int static_prio)
178 return SCALE_PRIO(DEF_TIMESLICE, static_prio); 179 return SCALE_PRIO(DEF_TIMESLICE, static_prio);
179} 180}
180 181
181static inline unsigned int task_timeslice(task_t *p) 182static inline unsigned int task_timeslice(struct task_struct *p)
182{ 183{
183 return static_prio_timeslice(p->static_prio); 184 return static_prio_timeslice(p->static_prio);
184} 185}
185 186
186#define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \
187 < (long long) (sd)->cache_hot_time)
188
189/* 187/*
190 * These are the runqueue data structures: 188 * These are the runqueue data structures:
191 */ 189 */
192 190
193typedef struct runqueue runqueue_t;
194
195struct prio_array { 191struct prio_array {
196 unsigned int nr_active; 192 unsigned int nr_active;
197 DECLARE_BITMAP(bitmap, MAX_PRIO+1); /* include 1 bit for delimiter */ 193 DECLARE_BITMAP(bitmap, MAX_PRIO+1); /* include 1 bit for delimiter */
@@ -205,7 +201,7 @@ struct prio_array {
205 * (such as the load balancing or the thread migration code), lock 201 * (such as the load balancing or the thread migration code), lock
206 * acquire operations must be ordered by ascending &runqueue. 202 * acquire operations must be ordered by ascending &runqueue.
207 */ 203 */
208struct runqueue { 204struct rq {
209 spinlock_t lock; 205 spinlock_t lock;
210 206
211 /* 207 /*
@@ -229,9 +225,9 @@ struct runqueue {
229 225
230 unsigned long expired_timestamp; 226 unsigned long expired_timestamp;
231 unsigned long long timestamp_last_tick; 227 unsigned long long timestamp_last_tick;
232 task_t *curr, *idle; 228 struct task_struct *curr, *idle;
233 struct mm_struct *prev_mm; 229 struct mm_struct *prev_mm;
234 prio_array_t *active, *expired, arrays[2]; 230 struct prio_array *active, *expired, arrays[2];
235 int best_expired_prio; 231 int best_expired_prio;
236 atomic_t nr_iowait; 232 atomic_t nr_iowait;
237 233
@@ -242,7 +238,7 @@ struct runqueue {
242 int active_balance; 238 int active_balance;
243 int push_cpu; 239 int push_cpu;
244 240
245 task_t *migration_thread; 241 struct task_struct *migration_thread;
246 struct list_head migration_queue; 242 struct list_head migration_queue;
247#endif 243#endif
248 244
@@ -265,9 +261,10 @@ struct runqueue {
265 unsigned long ttwu_cnt; 261 unsigned long ttwu_cnt;
266 unsigned long ttwu_local; 262 unsigned long ttwu_local;
267#endif 263#endif
264 struct lock_class_key rq_lock_key;
268}; 265};
269 266
270static DEFINE_PER_CPU(struct runqueue, runqueues); 267static DEFINE_PER_CPU(struct rq, runqueues);
271 268
272/* 269/*
273 * The domain tree (rq->sd) is protected by RCU's quiescent state transition. 270 * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
@@ -276,8 +273,8 @@ static DEFINE_PER_CPU(struct runqueue, runqueues);
276 * The domain tree of any CPU may only be accessed from within 273 * The domain tree of any CPU may only be accessed from within
277 * preempt-disabled sections. 274 * preempt-disabled sections.
278 */ 275 */
279#define for_each_domain(cpu, domain) \ 276#define for_each_domain(cpu, __sd) \
280for (domain = rcu_dereference(cpu_rq(cpu)->sd); domain; domain = domain->parent) 277 for (__sd = rcu_dereference(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
281 278
282#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) 279#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
283#define this_rq() (&__get_cpu_var(runqueues)) 280#define this_rq() (&__get_cpu_var(runqueues))
@@ -292,26 +289,33 @@ for (domain = rcu_dereference(cpu_rq(cpu)->sd); domain; domain = domain->parent)
292#endif 289#endif
293 290
294#ifndef __ARCH_WANT_UNLOCKED_CTXSW 291#ifndef __ARCH_WANT_UNLOCKED_CTXSW
295static inline int task_running(runqueue_t *rq, task_t *p) 292static inline int task_running(struct rq *rq, struct task_struct *p)
296{ 293{
297 return rq->curr == p; 294 return rq->curr == p;
298} 295}
299 296
300static inline void prepare_lock_switch(runqueue_t *rq, task_t *next) 297static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
301{ 298{
302} 299}
303 300
304static inline void finish_lock_switch(runqueue_t *rq, task_t *prev) 301static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
305{ 302{
306#ifdef CONFIG_DEBUG_SPINLOCK 303#ifdef CONFIG_DEBUG_SPINLOCK
307 /* this is a valid case when another task releases the spinlock */ 304 /* this is a valid case when another task releases the spinlock */
308 rq->lock.owner = current; 305 rq->lock.owner = current;
309#endif 306#endif
307 /*
308 * If we are tracking spinlock dependencies then we have to
309 * fix up the runqueue lock - which gets 'carried over' from
310 * prev into current:
311 */
312 spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
313
310 spin_unlock_irq(&rq->lock); 314 spin_unlock_irq(&rq->lock);
311} 315}
312 316
313#else /* __ARCH_WANT_UNLOCKED_CTXSW */ 317#else /* __ARCH_WANT_UNLOCKED_CTXSW */
314static inline int task_running(runqueue_t *rq, task_t *p) 318static inline int task_running(struct rq *rq, struct task_struct *p)
315{ 319{
316#ifdef CONFIG_SMP 320#ifdef CONFIG_SMP
317 return p->oncpu; 321 return p->oncpu;
@@ -320,7 +324,7 @@ static inline int task_running(runqueue_t *rq, task_t *p)
320#endif 324#endif
321} 325}
322 326
323static inline void prepare_lock_switch(runqueue_t *rq, task_t *next) 327static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
324{ 328{
325#ifdef CONFIG_SMP 329#ifdef CONFIG_SMP
326 /* 330 /*
@@ -337,7 +341,7 @@ static inline void prepare_lock_switch(runqueue_t *rq, task_t *next)
337#endif 341#endif
338} 342}
339 343
340static inline void finish_lock_switch(runqueue_t *rq, task_t *prev) 344static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
341{ 345{
342#ifdef CONFIG_SMP 346#ifdef CONFIG_SMP
343 /* 347 /*
@@ -358,10 +362,10 @@ static inline void finish_lock_switch(runqueue_t *rq, task_t *prev)
358 * __task_rq_lock - lock the runqueue a given task resides on. 362 * __task_rq_lock - lock the runqueue a given task resides on.
359 * Must be called interrupts disabled. 363 * Must be called interrupts disabled.
360 */ 364 */
361static inline runqueue_t *__task_rq_lock(task_t *p) 365static inline struct rq *__task_rq_lock(struct task_struct *p)
362 __acquires(rq->lock) 366 __acquires(rq->lock)
363{ 367{
364 struct runqueue *rq; 368 struct rq *rq;
365 369
366repeat_lock_task: 370repeat_lock_task:
367 rq = task_rq(p); 371 rq = task_rq(p);
@@ -378,10 +382,10 @@ repeat_lock_task:
378 * interrupts. Note the ordering: we can safely lookup the task_rq without 382 * interrupts. Note the ordering: we can safely lookup the task_rq without
379 * explicitly disabling preemption. 383 * explicitly disabling preemption.
380 */ 384 */
381static runqueue_t *task_rq_lock(task_t *p, unsigned long *flags) 385static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
382 __acquires(rq->lock) 386 __acquires(rq->lock)
383{ 387{
384 struct runqueue *rq; 388 struct rq *rq;
385 389
386repeat_lock_task: 390repeat_lock_task:
387 local_irq_save(*flags); 391 local_irq_save(*flags);
@@ -394,13 +398,13 @@ repeat_lock_task:
394 return rq; 398 return rq;
395} 399}
396 400
397static inline void __task_rq_unlock(runqueue_t *rq) 401static inline void __task_rq_unlock(struct rq *rq)
398 __releases(rq->lock) 402 __releases(rq->lock)
399{ 403{
400 spin_unlock(&rq->lock); 404 spin_unlock(&rq->lock);
401} 405}
402 406
403static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags) 407static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
404 __releases(rq->lock) 408 __releases(rq->lock)
405{ 409{
406 spin_unlock_irqrestore(&rq->lock, *flags); 410 spin_unlock_irqrestore(&rq->lock, *flags);
@@ -420,7 +424,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
420 seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); 424 seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
421 seq_printf(seq, "timestamp %lu\n", jiffies); 425 seq_printf(seq, "timestamp %lu\n", jiffies);
422 for_each_online_cpu(cpu) { 426 for_each_online_cpu(cpu) {
423 runqueue_t *rq = cpu_rq(cpu); 427 struct rq *rq = cpu_rq(cpu);
424#ifdef CONFIG_SMP 428#ifdef CONFIG_SMP
425 struct sched_domain *sd; 429 struct sched_domain *sd;
426 int dcnt = 0; 430 int dcnt = 0;
@@ -507,10 +511,10 @@ struct file_operations proc_schedstat_operations = {
507/* 511/*
508 * rq_lock - lock a given runqueue and disable interrupts. 512 * rq_lock - lock a given runqueue and disable interrupts.
509 */ 513 */
510static inline runqueue_t *this_rq_lock(void) 514static inline struct rq *this_rq_lock(void)
511 __acquires(rq->lock) 515 __acquires(rq->lock)
512{ 516{
513 runqueue_t *rq; 517 struct rq *rq;
514 518
515 local_irq_disable(); 519 local_irq_disable();
516 rq = this_rq(); 520 rq = this_rq();
@@ -535,7 +539,7 @@ static inline runqueue_t *this_rq_lock(void)
535 * long it was from the *first* time it was queued to the time that it 539 * long it was from the *first* time it was queued to the time that it
536 * finally hit a cpu. 540 * finally hit a cpu.
537 */ 541 */
538static inline void sched_info_dequeued(task_t *t) 542static inline void sched_info_dequeued(struct task_struct *t)
539{ 543{
540 t->sched_info.last_queued = 0; 544 t->sched_info.last_queued = 0;
541} 545}
@@ -545,10 +549,10 @@ static inline void sched_info_dequeued(task_t *t)
545 * long it was waiting to run. We also note when it began so that we 549 * long it was waiting to run. We also note when it began so that we
546 * can keep stats on how long its timeslice is. 550 * can keep stats on how long its timeslice is.
547 */ 551 */
548static void sched_info_arrive(task_t *t) 552static void sched_info_arrive(struct task_struct *t)
549{ 553{
550 unsigned long now = jiffies, diff = 0; 554 unsigned long now = jiffies, diff = 0;
551 struct runqueue *rq = task_rq(t); 555 struct rq *rq = task_rq(t);
552 556
553 if (t->sched_info.last_queued) 557 if (t->sched_info.last_queued)
554 diff = now - t->sched_info.last_queued; 558 diff = now - t->sched_info.last_queued;
@@ -579,7 +583,7 @@ static void sched_info_arrive(task_t *t)
579 * the timestamp if it is already not set. It's assumed that 583 * the timestamp if it is already not set. It's assumed that
580 * sched_info_dequeued() will clear that stamp when appropriate. 584 * sched_info_dequeued() will clear that stamp when appropriate.
581 */ 585 */
582static inline void sched_info_queued(task_t *t) 586static inline void sched_info_queued(struct task_struct *t)
583{ 587{
584 if (!t->sched_info.last_queued) 588 if (!t->sched_info.last_queued)
585 t->sched_info.last_queued = jiffies; 589 t->sched_info.last_queued = jiffies;
@@ -589,9 +593,9 @@ static inline void sched_info_queued(task_t *t)
589 * Called when a process ceases being the active-running process, either 593 * Called when a process ceases being the active-running process, either
590 * voluntarily or involuntarily. Now we can calculate how long we ran. 594 * voluntarily or involuntarily. Now we can calculate how long we ran.
591 */ 595 */
592static inline void sched_info_depart(task_t *t) 596static inline void sched_info_depart(struct task_struct *t)
593{ 597{
594 struct runqueue *rq = task_rq(t); 598 struct rq *rq = task_rq(t);
595 unsigned long diff = jiffies - t->sched_info.last_arrival; 599 unsigned long diff = jiffies - t->sched_info.last_arrival;
596 600
597 t->sched_info.cpu_time += diff; 601 t->sched_info.cpu_time += diff;
@@ -605,9 +609,10 @@ static inline void sched_info_depart(task_t *t)
605 * their time slice. (This may also be called when switching to or from 609 * their time slice. (This may also be called when switching to or from
606 * the idle task.) We are only called when prev != next. 610 * the idle task.) We are only called when prev != next.
607 */ 611 */
608static inline void sched_info_switch(task_t *prev, task_t *next) 612static inline void
613sched_info_switch(struct task_struct *prev, struct task_struct *next)
609{ 614{
610 struct runqueue *rq = task_rq(prev); 615 struct rq *rq = task_rq(prev);
611 616
612 /* 617 /*
613 * prev now departs the cpu. It's not interesting to record 618 * prev now departs the cpu. It's not interesting to record
@@ -628,7 +633,7 @@ static inline void sched_info_switch(task_t *prev, task_t *next)
628/* 633/*
629 * Adding/removing a task to/from a priority array: 634 * Adding/removing a task to/from a priority array:
630 */ 635 */
631static void dequeue_task(struct task_struct *p, prio_array_t *array) 636static void dequeue_task(struct task_struct *p, struct prio_array *array)
632{ 637{
633 array->nr_active--; 638 array->nr_active--;
634 list_del(&p->run_list); 639 list_del(&p->run_list);
@@ -636,7 +641,7 @@ static void dequeue_task(struct task_struct *p, prio_array_t *array)
636 __clear_bit(p->prio, array->bitmap); 641 __clear_bit(p->prio, array->bitmap);
637} 642}
638 643
639static void enqueue_task(struct task_struct *p, prio_array_t *array) 644static void enqueue_task(struct task_struct *p, struct prio_array *array)
640{ 645{
641 sched_info_queued(p); 646 sched_info_queued(p);
642 list_add_tail(&p->run_list, array->queue + p->prio); 647 list_add_tail(&p->run_list, array->queue + p->prio);
@@ -649,12 +654,13 @@ static void enqueue_task(struct task_struct *p, prio_array_t *array)
649 * Put task to the end of the run list without the overhead of dequeue 654 * Put task to the end of the run list without the overhead of dequeue
650 * followed by enqueue. 655 * followed by enqueue.
651 */ 656 */
652static void requeue_task(struct task_struct *p, prio_array_t *array) 657static void requeue_task(struct task_struct *p, struct prio_array *array)
653{ 658{
654 list_move_tail(&p->run_list, array->queue + p->prio); 659 list_move_tail(&p->run_list, array->queue + p->prio);
655} 660}
656 661
657static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array) 662static inline void
663enqueue_task_head(struct task_struct *p, struct prio_array *array)
658{ 664{
659 list_add(&p->run_list, array->queue + p->prio); 665 list_add(&p->run_list, array->queue + p->prio);
660 __set_bit(p->prio, array->bitmap); 666 __set_bit(p->prio, array->bitmap);
@@ -677,7 +683,7 @@ static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array)
677 * Both properties are important to certain workloads. 683 * Both properties are important to certain workloads.
678 */ 684 */
679 685
680static inline int __normal_prio(task_t *p) 686static inline int __normal_prio(struct task_struct *p)
681{ 687{
682 int bonus, prio; 688 int bonus, prio;
683 689
@@ -713,7 +719,7 @@ static inline int __normal_prio(task_t *p)
713#define RTPRIO_TO_LOAD_WEIGHT(rp) \ 719#define RTPRIO_TO_LOAD_WEIGHT(rp) \
714 (PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + LOAD_WEIGHT(rp)) 720 (PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + LOAD_WEIGHT(rp))
715 721
716static void set_load_weight(task_t *p) 722static void set_load_weight(struct task_struct *p)
717{ 723{
718 if (has_rt_policy(p)) { 724 if (has_rt_policy(p)) {
719#ifdef CONFIG_SMP 725#ifdef CONFIG_SMP
@@ -731,23 +737,25 @@ static void set_load_weight(task_t *p)
731 p->load_weight = PRIO_TO_LOAD_WEIGHT(p->static_prio); 737 p->load_weight = PRIO_TO_LOAD_WEIGHT(p->static_prio);
732} 738}
733 739
734static inline void inc_raw_weighted_load(runqueue_t *rq, const task_t *p) 740static inline void
741inc_raw_weighted_load(struct rq *rq, const struct task_struct *p)
735{ 742{
736 rq->raw_weighted_load += p->load_weight; 743 rq->raw_weighted_load += p->load_weight;
737} 744}
738 745
739static inline void dec_raw_weighted_load(runqueue_t *rq, const task_t *p) 746static inline void
747dec_raw_weighted_load(struct rq *rq, const struct task_struct *p)
740{ 748{
741 rq->raw_weighted_load -= p->load_weight; 749 rq->raw_weighted_load -= p->load_weight;
742} 750}
743 751
744static inline void inc_nr_running(task_t *p, runqueue_t *rq) 752static inline void inc_nr_running(struct task_struct *p, struct rq *rq)
745{ 753{
746 rq->nr_running++; 754 rq->nr_running++;
747 inc_raw_weighted_load(rq, p); 755 inc_raw_weighted_load(rq, p);
748} 756}
749 757
750static inline void dec_nr_running(task_t *p, runqueue_t *rq) 758static inline void dec_nr_running(struct task_struct *p, struct rq *rq)
751{ 759{
752 rq->nr_running--; 760 rq->nr_running--;
753 dec_raw_weighted_load(rq, p); 761 dec_raw_weighted_load(rq, p);
@@ -760,7 +768,7 @@ static inline void dec_nr_running(task_t *p, runqueue_t *rq)
760 * setprio syscalls, and whenever the interactivity 768 * setprio syscalls, and whenever the interactivity
761 * estimator recalculates. 769 * estimator recalculates.
762 */ 770 */
763static inline int normal_prio(task_t *p) 771static inline int normal_prio(struct task_struct *p)
764{ 772{
765 int prio; 773 int prio;
766 774
@@ -778,7 +786,7 @@ static inline int normal_prio(task_t *p)
778 * interactivity modifiers. Will be RT if the task got 786 * interactivity modifiers. Will be RT if the task got
779 * RT-boosted. If not then it returns p->normal_prio. 787 * RT-boosted. If not then it returns p->normal_prio.
780 */ 788 */
781static int effective_prio(task_t *p) 789static int effective_prio(struct task_struct *p)
782{ 790{
783 p->normal_prio = normal_prio(p); 791 p->normal_prio = normal_prio(p);
784 /* 792 /*
@@ -794,9 +802,9 @@ static int effective_prio(task_t *p)
794/* 802/*
795 * __activate_task - move a task to the runqueue. 803 * __activate_task - move a task to the runqueue.
796 */ 804 */
797static void __activate_task(task_t *p, runqueue_t *rq) 805static void __activate_task(struct task_struct *p, struct rq *rq)
798{ 806{
799 prio_array_t *target = rq->active; 807 struct prio_array *target = rq->active;
800 808
801 if (batch_task(p)) 809 if (batch_task(p))
802 target = rq->expired; 810 target = rq->expired;
@@ -807,7 +815,7 @@ static void __activate_task(task_t *p, runqueue_t *rq)
807/* 815/*
808 * __activate_idle_task - move idle task to the _front_ of runqueue. 816 * __activate_idle_task - move idle task to the _front_ of runqueue.
809 */ 817 */
810static inline void __activate_idle_task(task_t *p, runqueue_t *rq) 818static inline void __activate_idle_task(struct task_struct *p, struct rq *rq)
811{ 819{
812 enqueue_task_head(p, rq->active); 820 enqueue_task_head(p, rq->active);
813 inc_nr_running(p, rq); 821 inc_nr_running(p, rq);
@@ -817,7 +825,7 @@ static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
817 * Recalculate p->normal_prio and p->prio after having slept, 825 * Recalculate p->normal_prio and p->prio after having slept,
818 * updating the sleep-average too: 826 * updating the sleep-average too:
819 */ 827 */
820static int recalc_task_prio(task_t *p, unsigned long long now) 828static int recalc_task_prio(struct task_struct *p, unsigned long long now)
821{ 829{
822 /* Caller must always ensure 'now >= p->timestamp' */ 830 /* Caller must always ensure 'now >= p->timestamp' */
823 unsigned long sleep_time = now - p->timestamp; 831 unsigned long sleep_time = now - p->timestamp;
@@ -889,7 +897,7 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
889 * Update all the scheduling statistics stuff. (sleep average 897 * Update all the scheduling statistics stuff. (sleep average
890 * calculation, priority modifiers, etc.) 898 * calculation, priority modifiers, etc.)
891 */ 899 */
892static void activate_task(task_t *p, runqueue_t *rq, int local) 900static void activate_task(struct task_struct *p, struct rq *rq, int local)
893{ 901{
894 unsigned long long now; 902 unsigned long long now;
895 903
@@ -897,7 +905,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
897#ifdef CONFIG_SMP 905#ifdef CONFIG_SMP
898 if (!local) { 906 if (!local) {
899 /* Compensate for drifting sched_clock */ 907 /* Compensate for drifting sched_clock */
900 runqueue_t *this_rq = this_rq(); 908 struct rq *this_rq = this_rq();
901 now = (now - this_rq->timestamp_last_tick) 909 now = (now - this_rq->timestamp_last_tick)
902 + rq->timestamp_last_tick; 910 + rq->timestamp_last_tick;
903 } 911 }
@@ -936,7 +944,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
936/* 944/*
937 * deactivate_task - remove a task from the runqueue. 945 * deactivate_task - remove a task from the runqueue.
938 */ 946 */
939static void deactivate_task(struct task_struct *p, runqueue_t *rq) 947static void deactivate_task(struct task_struct *p, struct rq *rq)
940{ 948{
941 dec_nr_running(p, rq); 949 dec_nr_running(p, rq);
942 dequeue_task(p, p->array); 950 dequeue_task(p, p->array);
@@ -956,7 +964,7 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq)
956#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) 964#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
957#endif 965#endif
958 966
959static void resched_task(task_t *p) 967static void resched_task(struct task_struct *p)
960{ 968{
961 int cpu; 969 int cpu;
962 970
@@ -977,7 +985,7 @@ static void resched_task(task_t *p)
977 smp_send_reschedule(cpu); 985 smp_send_reschedule(cpu);
978} 986}
979#else 987#else
980static inline void resched_task(task_t *p) 988static inline void resched_task(struct task_struct *p)
981{ 989{
982 assert_spin_locked(&task_rq(p)->lock); 990 assert_spin_locked(&task_rq(p)->lock);
983 set_tsk_need_resched(p); 991 set_tsk_need_resched(p);
@@ -988,7 +996,7 @@ static inline void resched_task(task_t *p)
988 * task_curr - is this task currently executing on a CPU? 996 * task_curr - is this task currently executing on a CPU?
989 * @p: the task in question. 997 * @p: the task in question.
990 */ 998 */
991inline int task_curr(const task_t *p) 999inline int task_curr(const struct task_struct *p)
992{ 1000{
993 return cpu_curr(task_cpu(p)) == p; 1001 return cpu_curr(task_cpu(p)) == p;
994} 1002}
@@ -1000,22 +1008,23 @@ unsigned long weighted_cpuload(const int cpu)
1000} 1008}
1001 1009
1002#ifdef CONFIG_SMP 1010#ifdef CONFIG_SMP
1003typedef struct { 1011struct migration_req {
1004 struct list_head list; 1012 struct list_head list;
1005 1013
1006 task_t *task; 1014 struct task_struct *task;
1007 int dest_cpu; 1015 int dest_cpu;
1008 1016
1009 struct completion done; 1017 struct completion done;
1010} migration_req_t; 1018};
1011 1019
1012/* 1020/*
1013 * The task's runqueue lock must be held. 1021 * The task's runqueue lock must be held.
1014 * Returns true if you have to wait for migration thread. 1022 * Returns true if you have to wait for migration thread.
1015 */ 1023 */
1016static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req) 1024static int
1025migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
1017{ 1026{
1018 runqueue_t *rq = task_rq(p); 1027 struct rq *rq = task_rq(p);
1019 1028
1020 /* 1029 /*
1021 * If the task is not on a runqueue (and not running), then 1030 * If the task is not on a runqueue (and not running), then
@@ -1030,6 +1039,7 @@ static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req)
1030 req->task = p; 1039 req->task = p;
1031 req->dest_cpu = dest_cpu; 1040 req->dest_cpu = dest_cpu;
1032 list_add(&req->list, &rq->migration_queue); 1041 list_add(&req->list, &rq->migration_queue);
1042
1033 return 1; 1043 return 1;
1034} 1044}
1035 1045
@@ -1042,10 +1052,10 @@ static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req)
1042 * smp_call_function() if an IPI is sent by the same process we are 1052 * smp_call_function() if an IPI is sent by the same process we are
1043 * waiting to become inactive. 1053 * waiting to become inactive.
1044 */ 1054 */
1045void wait_task_inactive(task_t *p) 1055void wait_task_inactive(struct task_struct *p)
1046{ 1056{
1047 unsigned long flags; 1057 unsigned long flags;
1048 runqueue_t *rq; 1058 struct rq *rq;
1049 int preempted; 1059 int preempted;
1050 1060
1051repeat: 1061repeat:
@@ -1076,7 +1086,7 @@ repeat:
1076 * to another CPU then no harm is done and the purpose has been 1086 * to another CPU then no harm is done and the purpose has been
1077 * achieved as well. 1087 * achieved as well.
1078 */ 1088 */
1079void kick_process(task_t *p) 1089void kick_process(struct task_struct *p)
1080{ 1090{
1081 int cpu; 1091 int cpu;
1082 1092
@@ -1096,7 +1106,7 @@ void kick_process(task_t *p)
1096 */ 1106 */
1097static inline unsigned long source_load(int cpu, int type) 1107static inline unsigned long source_load(int cpu, int type)
1098{ 1108{
1099 runqueue_t *rq = cpu_rq(cpu); 1109 struct rq *rq = cpu_rq(cpu);
1100 1110
1101 if (type == 0) 1111 if (type == 0)
1102 return rq->raw_weighted_load; 1112 return rq->raw_weighted_load;
@@ -1110,7 +1120,7 @@ static inline unsigned long source_load(int cpu, int type)
1110 */ 1120 */
1111static inline unsigned long target_load(int cpu, int type) 1121static inline unsigned long target_load(int cpu, int type)
1112{ 1122{
1113 runqueue_t *rq = cpu_rq(cpu); 1123 struct rq *rq = cpu_rq(cpu);
1114 1124
1115 if (type == 0) 1125 if (type == 0)
1116 return rq->raw_weighted_load; 1126 return rq->raw_weighted_load;
@@ -1123,10 +1133,10 @@ static inline unsigned long target_load(int cpu, int type)
1123 */ 1133 */
1124static inline unsigned long cpu_avg_load_per_task(int cpu) 1134static inline unsigned long cpu_avg_load_per_task(int cpu)
1125{ 1135{
1126 runqueue_t *rq = cpu_rq(cpu); 1136 struct rq *rq = cpu_rq(cpu);
1127 unsigned long n = rq->nr_running; 1137 unsigned long n = rq->nr_running;
1128 1138
1129 return n ? rq->raw_weighted_load / n : SCHED_LOAD_SCALE; 1139 return n ? rq->raw_weighted_load / n : SCHED_LOAD_SCALE;
1130} 1140}
1131 1141
1132/* 1142/*
@@ -1279,7 +1289,7 @@ nextlevel:
1279 * Returns the CPU we should wake onto. 1289 * Returns the CPU we should wake onto.
1280 */ 1290 */
1281#if defined(ARCH_HAS_SCHED_WAKE_IDLE) 1291#if defined(ARCH_HAS_SCHED_WAKE_IDLE)
1282static int wake_idle(int cpu, task_t *p) 1292static int wake_idle(int cpu, struct task_struct *p)
1283{ 1293{
1284 cpumask_t tmp; 1294 cpumask_t tmp;
1285 struct sched_domain *sd; 1295 struct sched_domain *sd;
@@ -1302,7 +1312,7 @@ static int wake_idle(int cpu, task_t *p)
1302 return cpu; 1312 return cpu;
1303} 1313}
1304#else 1314#else
1305static inline int wake_idle(int cpu, task_t *p) 1315static inline int wake_idle(int cpu, struct task_struct *p)
1306{ 1316{
1307 return cpu; 1317 return cpu;
1308} 1318}
@@ -1322,15 +1332,15 @@ static inline int wake_idle(int cpu, task_t *p)
1322 * 1332 *
1323 * returns failure only if the task is already active. 1333 * returns failure only if the task is already active.
1324 */ 1334 */
1325static int try_to_wake_up(task_t *p, unsigned int state, int sync) 1335static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
1326{ 1336{
1327 int cpu, this_cpu, success = 0; 1337 int cpu, this_cpu, success = 0;
1328 unsigned long flags; 1338 unsigned long flags;
1329 long old_state; 1339 long old_state;
1330 runqueue_t *rq; 1340 struct rq *rq;
1331#ifdef CONFIG_SMP 1341#ifdef CONFIG_SMP
1332 unsigned long load, this_load;
1333 struct sched_domain *sd, *this_sd = NULL; 1342 struct sched_domain *sd, *this_sd = NULL;
1343 unsigned long load, this_load;
1334 int new_cpu; 1344 int new_cpu;
1335#endif 1345#endif
1336 1346
@@ -1480,15 +1490,14 @@ out:
1480 return success; 1490 return success;
1481} 1491}
1482 1492
1483int fastcall wake_up_process(task_t *p) 1493int fastcall wake_up_process(struct task_struct *p)
1484{ 1494{
1485 return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED | 1495 return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED |
1486 TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0); 1496 TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0);
1487} 1497}
1488
1489EXPORT_SYMBOL(wake_up_process); 1498EXPORT_SYMBOL(wake_up_process);
1490 1499
1491int fastcall wake_up_state(task_t *p, unsigned int state) 1500int fastcall wake_up_state(struct task_struct *p, unsigned int state)
1492{ 1501{
1493 return try_to_wake_up(p, state, 0); 1502 return try_to_wake_up(p, state, 0);
1494} 1503}
@@ -1497,7 +1506,7 @@ int fastcall wake_up_state(task_t *p, unsigned int state)
1497 * Perform scheduler related setup for a newly forked process p. 1506 * Perform scheduler related setup for a newly forked process p.
1498 * p is forked by current. 1507 * p is forked by current.
1499 */ 1508 */
1500void fastcall sched_fork(task_t *p, int clone_flags) 1509void fastcall sched_fork(struct task_struct *p, int clone_flags)
1501{ 1510{
1502 int cpu = get_cpu(); 1511 int cpu = get_cpu();
1503 1512
@@ -1565,11 +1574,11 @@ void fastcall sched_fork(task_t *p, int clone_flags)
1565 * that must be done for every newly created context, then puts the task 1574 * that must be done for every newly created context, then puts the task
1566 * on the runqueue and wakes it. 1575 * on the runqueue and wakes it.
1567 */ 1576 */
1568void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags) 1577void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
1569{ 1578{
1579 struct rq *rq, *this_rq;
1570 unsigned long flags; 1580 unsigned long flags;
1571 int this_cpu, cpu; 1581 int this_cpu, cpu;
1572 runqueue_t *rq, *this_rq;
1573 1582
1574 rq = task_rq_lock(p, &flags); 1583 rq = task_rq_lock(p, &flags);
1575 BUG_ON(p->state != TASK_RUNNING); 1584 BUG_ON(p->state != TASK_RUNNING);
@@ -1649,10 +1658,10 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags)
1649 * artificially, because any timeslice recovered here 1658 * artificially, because any timeslice recovered here
1650 * was given away by the parent in the first place.) 1659 * was given away by the parent in the first place.)
1651 */ 1660 */
1652void fastcall sched_exit(task_t *p) 1661void fastcall sched_exit(struct task_struct *p)
1653{ 1662{
1654 unsigned long flags; 1663 unsigned long flags;
1655 runqueue_t *rq; 1664 struct rq *rq;
1656 1665
1657 /* 1666 /*
1658 * If the child was a (relative-) CPU hog then decrease 1667 * If the child was a (relative-) CPU hog then decrease
@@ -1683,7 +1692,7 @@ void fastcall sched_exit(task_t *p)
1683 * prepare_task_switch sets up locking and calls architecture specific 1692 * prepare_task_switch sets up locking and calls architecture specific
1684 * hooks. 1693 * hooks.
1685 */ 1694 */
1686static inline void prepare_task_switch(runqueue_t *rq, task_t *next) 1695static inline void prepare_task_switch(struct rq *rq, struct task_struct *next)
1687{ 1696{
1688 prepare_lock_switch(rq, next); 1697 prepare_lock_switch(rq, next);
1689 prepare_arch_switch(next); 1698 prepare_arch_switch(next);
@@ -1704,7 +1713,7 @@ static inline void prepare_task_switch(runqueue_t *rq, task_t *next)
1704 * with the lock held can cause deadlocks; see schedule() for 1713 * with the lock held can cause deadlocks; see schedule() for
1705 * details.) 1714 * details.)
1706 */ 1715 */
1707static inline void finish_task_switch(runqueue_t *rq, task_t *prev) 1716static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
1708 __releases(rq->lock) 1717 __releases(rq->lock)
1709{ 1718{
1710 struct mm_struct *mm = rq->prev_mm; 1719 struct mm_struct *mm = rq->prev_mm;
@@ -1742,10 +1751,11 @@ static inline void finish_task_switch(runqueue_t *rq, task_t *prev)
1742 * schedule_tail - first thing a freshly forked thread must call. 1751 * schedule_tail - first thing a freshly forked thread must call.
1743 * @prev: the thread we just switched away from. 1752 * @prev: the thread we just switched away from.
1744 */ 1753 */
1745asmlinkage void schedule_tail(task_t *prev) 1754asmlinkage void schedule_tail(struct task_struct *prev)
1746 __releases(rq->lock) 1755 __releases(rq->lock)
1747{ 1756{
1748 runqueue_t *rq = this_rq(); 1757 struct rq *rq = this_rq();
1758
1749 finish_task_switch(rq, prev); 1759 finish_task_switch(rq, prev);
1750#ifdef __ARCH_WANT_UNLOCKED_CTXSW 1760#ifdef __ARCH_WANT_UNLOCKED_CTXSW
1751 /* In this case, finish_task_switch does not reenable preemption */ 1761 /* In this case, finish_task_switch does not reenable preemption */
@@ -1759,8 +1769,9 @@ asmlinkage void schedule_tail(task_t *prev)
1759 * context_switch - switch to the new MM and the new 1769 * context_switch - switch to the new MM and the new
1760 * thread's register state. 1770 * thread's register state.
1761 */ 1771 */
1762static inline 1772static inline struct task_struct *
1763task_t * context_switch(runqueue_t *rq, task_t *prev, task_t *next) 1773context_switch(struct rq *rq, struct task_struct *prev,
1774 struct task_struct *next)
1764{ 1775{
1765 struct mm_struct *mm = next->mm; 1776 struct mm_struct *mm = next->mm;
1766 struct mm_struct *oldmm = prev->active_mm; 1777 struct mm_struct *oldmm = prev->active_mm;
@@ -1777,6 +1788,7 @@ task_t * context_switch(runqueue_t *rq, task_t *prev, task_t *next)
1777 WARN_ON(rq->prev_mm); 1788 WARN_ON(rq->prev_mm);
1778 rq->prev_mm = oldmm; 1789 rq->prev_mm = oldmm;
1779 } 1790 }
1791 spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
1780 1792
1781 /* Here we just switch the register state and the stack. */ 1793 /* Here we just switch the register state and the stack. */
1782 switch_to(prev, next, prev); 1794 switch_to(prev, next, prev);
@@ -1857,12 +1869,21 @@ unsigned long nr_active(void)
1857#ifdef CONFIG_SMP 1869#ifdef CONFIG_SMP
1858 1870
1859/* 1871/*
1872 * Is this task likely cache-hot:
1873 */
1874static inline int
1875task_hot(struct task_struct *p, unsigned long long now, struct sched_domain *sd)
1876{
1877 return (long long)(now - p->last_ran) < (long long)sd->cache_hot_time;
1878}
1879
1880/*
1860 * double_rq_lock - safely lock two runqueues 1881 * double_rq_lock - safely lock two runqueues
1861 * 1882 *
1862 * Note this does not disable interrupts like task_rq_lock, 1883 * Note this does not disable interrupts like task_rq_lock,
1863 * you need to do so manually before calling. 1884 * you need to do so manually before calling.
1864 */ 1885 */
1865static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2) 1886static void double_rq_lock(struct rq *rq1, struct rq *rq2)
1866 __acquires(rq1->lock) 1887 __acquires(rq1->lock)
1867 __acquires(rq2->lock) 1888 __acquires(rq2->lock)
1868{ 1889{
@@ -1886,7 +1907,7 @@ static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
1886 * Note this does not restore interrupts like task_rq_unlock, 1907 * Note this does not restore interrupts like task_rq_unlock,
1887 * you need to do so manually after calling. 1908 * you need to do so manually after calling.
1888 */ 1909 */
1889static void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2) 1910static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
1890 __releases(rq1->lock) 1911 __releases(rq1->lock)
1891 __releases(rq2->lock) 1912 __releases(rq2->lock)
1892{ 1913{
@@ -1900,7 +1921,7 @@ static void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2)
1900/* 1921/*
1901 * double_lock_balance - lock the busiest runqueue, this_rq is locked already. 1922 * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
1902 */ 1923 */
1903static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest) 1924static void double_lock_balance(struct rq *this_rq, struct rq *busiest)
1904 __releases(this_rq->lock) 1925 __releases(this_rq->lock)
1905 __acquires(busiest->lock) 1926 __acquires(busiest->lock)
1906 __acquires(this_rq->lock) 1927 __acquires(this_rq->lock)
@@ -1921,11 +1942,11 @@ static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest)
1921 * allow dest_cpu, which will force the cpu onto dest_cpu. Then 1942 * allow dest_cpu, which will force the cpu onto dest_cpu. Then
1922 * the cpu_allowed mask is restored. 1943 * the cpu_allowed mask is restored.
1923 */ 1944 */
1924static void sched_migrate_task(task_t *p, int dest_cpu) 1945static void sched_migrate_task(struct task_struct *p, int dest_cpu)
1925{ 1946{
1926 migration_req_t req; 1947 struct migration_req req;
1927 runqueue_t *rq;
1928 unsigned long flags; 1948 unsigned long flags;
1949 struct rq *rq;
1929 1950
1930 rq = task_rq_lock(p, &flags); 1951 rq = task_rq_lock(p, &flags);
1931 if (!cpu_isset(dest_cpu, p->cpus_allowed) 1952 if (!cpu_isset(dest_cpu, p->cpus_allowed)
@@ -1936,11 +1957,13 @@ static void sched_migrate_task(task_t *p, int dest_cpu)
1936 if (migrate_task(p, dest_cpu, &req)) { 1957 if (migrate_task(p, dest_cpu, &req)) {
1937 /* Need to wait for migration thread (might exit: take ref). */ 1958 /* Need to wait for migration thread (might exit: take ref). */
1938 struct task_struct *mt = rq->migration_thread; 1959 struct task_struct *mt = rq->migration_thread;
1960
1939 get_task_struct(mt); 1961 get_task_struct(mt);
1940 task_rq_unlock(rq, &flags); 1962 task_rq_unlock(rq, &flags);
1941 wake_up_process(mt); 1963 wake_up_process(mt);
1942 put_task_struct(mt); 1964 put_task_struct(mt);
1943 wait_for_completion(&req.done); 1965 wait_for_completion(&req.done);
1966
1944 return; 1967 return;
1945 } 1968 }
1946out: 1969out:
@@ -1964,9 +1987,9 @@ void sched_exec(void)
1964 * pull_task - move a task from a remote runqueue to the local runqueue. 1987 * pull_task - move a task from a remote runqueue to the local runqueue.
1965 * Both runqueues must be locked. 1988 * Both runqueues must be locked.
1966 */ 1989 */
1967static 1990static void pull_task(struct rq *src_rq, struct prio_array *src_array,
1968void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, 1991 struct task_struct *p, struct rq *this_rq,
1969 runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) 1992 struct prio_array *this_array, int this_cpu)
1970{ 1993{
1971 dequeue_task(p, src_array); 1994 dequeue_task(p, src_array);
1972 dec_nr_running(p, src_rq); 1995 dec_nr_running(p, src_rq);
@@ -1987,7 +2010,7 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
1987 * can_migrate_task - may task p from runqueue rq be migrated to this_cpu? 2010 * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
1988 */ 2011 */
1989static 2012static
1990int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, 2013int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
1991 struct sched_domain *sd, enum idle_type idle, 2014 struct sched_domain *sd, enum idle_type idle,
1992 int *all_pinned) 2015 int *all_pinned)
1993{ 2016{
@@ -2019,6 +2042,7 @@ int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,
2019} 2042}
2020 2043
2021#define rq_best_prio(rq) min((rq)->curr->prio, (rq)->best_expired_prio) 2044#define rq_best_prio(rq) min((rq)->curr->prio, (rq)->best_expired_prio)
2045
2022/* 2046/*
2023 * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted 2047 * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted
2024 * load from busiest to this_rq, as part of a balancing operation within 2048 * load from busiest to this_rq, as part of a balancing operation within
@@ -2026,18 +2050,17 @@ int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,
2026 * 2050 *
2027 * Called with both runqueues locked. 2051 * Called with both runqueues locked.
2028 */ 2052 */
2029static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest, 2053static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
2030 unsigned long max_nr_move, unsigned long max_load_move, 2054 unsigned long max_nr_move, unsigned long max_load_move,
2031 struct sched_domain *sd, enum idle_type idle, 2055 struct sched_domain *sd, enum idle_type idle,
2032 int *all_pinned) 2056 int *all_pinned)
2033{ 2057{
2034 prio_array_t *array, *dst_array; 2058 int idx, pulled = 0, pinned = 0, this_best_prio, best_prio,
2059 best_prio_seen, skip_for_load;
2060 struct prio_array *array, *dst_array;
2035 struct list_head *head, *curr; 2061 struct list_head *head, *curr;
2036 int idx, pulled = 0, pinned = 0, this_best_prio, busiest_best_prio; 2062 struct task_struct *tmp;
2037 int busiest_best_prio_seen;
2038 int skip_for_load; /* skip the task based on weighted load issues */
2039 long rem_load_move; 2063 long rem_load_move;
2040 task_t *tmp;
2041 2064
2042 if (max_nr_move == 0 || max_load_move == 0) 2065 if (max_nr_move == 0 || max_load_move == 0)
2043 goto out; 2066 goto out;
@@ -2045,15 +2068,15 @@ static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest,
2045 rem_load_move = max_load_move; 2068 rem_load_move = max_load_move;
2046 pinned = 1; 2069 pinned = 1;
2047 this_best_prio = rq_best_prio(this_rq); 2070 this_best_prio = rq_best_prio(this_rq);
2048 busiest_best_prio = rq_best_prio(busiest); 2071 best_prio = rq_best_prio(busiest);
2049 /* 2072 /*
2050 * Enable handling of the case where there is more than one task 2073 * Enable handling of the case where there is more than one task
2051 * with the best priority. If the current running task is one 2074 * with the best priority. If the current running task is one
2052 * of those with prio==busiest_best_prio we know it won't be moved 2075 * of those with prio==best_prio we know it won't be moved
2053 * and therefore it's safe to override the skip (based on load) of 2076 * and therefore it's safe to override the skip (based on load) of
2054 * any task we find with that prio. 2077 * any task we find with that prio.
2055 */ 2078 */
2056 busiest_best_prio_seen = busiest_best_prio == busiest->curr->prio; 2079 best_prio_seen = best_prio == busiest->curr->prio;
2057 2080
2058 /* 2081 /*
2059 * We first consider expired tasks. Those will likely not be 2082 * We first consider expired tasks. Those will likely not be
@@ -2089,7 +2112,7 @@ skip_bitmap:
2089 head = array->queue + idx; 2112 head = array->queue + idx;
2090 curr = head->prev; 2113 curr = head->prev;
2091skip_queue: 2114skip_queue:
2092 tmp = list_entry(curr, task_t, run_list); 2115 tmp = list_entry(curr, struct task_struct, run_list);
2093 2116
2094 curr = curr->prev; 2117 curr = curr->prev;
2095 2118
@@ -2100,10 +2123,11 @@ skip_queue:
2100 */ 2123 */
2101 skip_for_load = tmp->load_weight > rem_load_move; 2124 skip_for_load = tmp->load_weight > rem_load_move;
2102 if (skip_for_load && idx < this_best_prio) 2125 if (skip_for_load && idx < this_best_prio)
2103 skip_for_load = !busiest_best_prio_seen && idx == busiest_best_prio; 2126 skip_for_load = !best_prio_seen && idx == best_prio;
2104 if (skip_for_load || 2127 if (skip_for_load ||
2105 !can_migrate_task(tmp, busiest, this_cpu, sd, idle, &pinned)) { 2128 !can_migrate_task(tmp, busiest, this_cpu, sd, idle, &pinned)) {
2106 busiest_best_prio_seen |= idx == busiest_best_prio; 2129
2130 best_prio_seen |= idx == best_prio;
2107 if (curr != head) 2131 if (curr != head)
2108 goto skip_queue; 2132 goto skip_queue;
2109 idx++; 2133 idx++;
@@ -2146,8 +2170,8 @@ out:
2146 2170
2147/* 2171/*
2148 * find_busiest_group finds and returns the busiest CPU group within the 2172 * find_busiest_group finds and returns the busiest CPU group within the
2149 * domain. It calculates and returns the amount of weighted load which should be 2173 * domain. It calculates and returns the amount of weighted load which
2150 * moved to restore balance via the imbalance parameter. 2174 * should be moved to restore balance via the imbalance parameter.
2151 */ 2175 */
2152static struct sched_group * 2176static struct sched_group *
2153find_busiest_group(struct sched_domain *sd, int this_cpu, 2177find_busiest_group(struct sched_domain *sd, int this_cpu,
@@ -2188,7 +2212,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2188 sum_weighted_load = sum_nr_running = avg_load = 0; 2212 sum_weighted_load = sum_nr_running = avg_load = 0;
2189 2213
2190 for_each_cpu_mask(i, group->cpumask) { 2214 for_each_cpu_mask(i, group->cpumask) {
2191 runqueue_t *rq = cpu_rq(i); 2215 struct rq *rq = cpu_rq(i);
2192 2216
2193 if (*sd_idle && !idle_cpu(i)) 2217 if (*sd_idle && !idle_cpu(i))
2194 *sd_idle = 0; 2218 *sd_idle = 0;
@@ -2269,7 +2293,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2269 * capacity but still has some space to pick up some load 2293 * capacity but still has some space to pick up some load
2270 * from other group and save more power 2294 * from other group and save more power
2271 */ 2295 */
2272 if (sum_nr_running <= group_capacity - 1) 2296 if (sum_nr_running <= group_capacity - 1) {
2273 if (sum_nr_running > leader_nr_running || 2297 if (sum_nr_running > leader_nr_running ||
2274 (sum_nr_running == leader_nr_running && 2298 (sum_nr_running == leader_nr_running &&
2275 first_cpu(group->cpumask) > 2299 first_cpu(group->cpumask) >
@@ -2277,7 +2301,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2277 group_leader = group; 2301 group_leader = group;
2278 leader_nr_running = sum_nr_running; 2302 leader_nr_running = sum_nr_running;
2279 } 2303 }
2280 2304 }
2281group_next: 2305group_next:
2282#endif 2306#endif
2283 group = group->next; 2307 group = group->next;
@@ -2332,8 +2356,7 @@ group_next:
2332 * moved 2356 * moved
2333 */ 2357 */
2334 if (*imbalance < busiest_load_per_task) { 2358 if (*imbalance < busiest_load_per_task) {
2335 unsigned long pwr_now, pwr_move; 2359 unsigned long tmp, pwr_now, pwr_move;
2336 unsigned long tmp;
2337 unsigned int imbn; 2360 unsigned int imbn;
2338 2361
2339small_imbalance: 2362small_imbalance:
@@ -2405,22 +2428,23 @@ ret:
2405/* 2428/*
2406 * find_busiest_queue - find the busiest runqueue among the cpus in group. 2429 * find_busiest_queue - find the busiest runqueue among the cpus in group.
2407 */ 2430 */
2408static runqueue_t *find_busiest_queue(struct sched_group *group, 2431static struct rq *
2409 enum idle_type idle, unsigned long imbalance) 2432find_busiest_queue(struct sched_group *group, enum idle_type idle,
2433 unsigned long imbalance)
2410{ 2434{
2435 struct rq *busiest = NULL, *rq;
2411 unsigned long max_load = 0; 2436 unsigned long max_load = 0;
2412 runqueue_t *busiest = NULL, *rqi;
2413 int i; 2437 int i;
2414 2438
2415 for_each_cpu_mask(i, group->cpumask) { 2439 for_each_cpu_mask(i, group->cpumask) {
2416 rqi = cpu_rq(i); 2440 rq = cpu_rq(i);
2417 2441
2418 if (rqi->nr_running == 1 && rqi->raw_weighted_load > imbalance) 2442 if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance)
2419 continue; 2443 continue;
2420 2444
2421 if (rqi->raw_weighted_load > max_load) { 2445 if (rq->raw_weighted_load > max_load) {
2422 max_load = rqi->raw_weighted_load; 2446 max_load = rq->raw_weighted_load;
2423 busiest = rqi; 2447 busiest = rq;
2424 } 2448 }
2425 } 2449 }
2426 2450
@@ -2433,22 +2457,24 @@ static runqueue_t *find_busiest_queue(struct sched_group *group,
2433 */ 2457 */
2434#define MAX_PINNED_INTERVAL 512 2458#define MAX_PINNED_INTERVAL 512
2435 2459
2436#define minus_1_or_zero(n) ((n) > 0 ? (n) - 1 : 0) 2460static inline unsigned long minus_1_or_zero(unsigned long n)
2461{
2462 return n > 0 ? n - 1 : 0;
2463}
2464
2437/* 2465/*
2438 * Check this_cpu to ensure it is balanced within domain. Attempt to move 2466 * Check this_cpu to ensure it is balanced within domain. Attempt to move
2439 * tasks if there is an imbalance. 2467 * tasks if there is an imbalance.
2440 * 2468 *
2441 * Called with this_rq unlocked. 2469 * Called with this_rq unlocked.
2442 */ 2470 */
2443static int load_balance(int this_cpu, runqueue_t *this_rq, 2471static int load_balance(int this_cpu, struct rq *this_rq,
2444 struct sched_domain *sd, enum idle_type idle) 2472 struct sched_domain *sd, enum idle_type idle)
2445{ 2473{
2474 int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
2446 struct sched_group *group; 2475 struct sched_group *group;
2447 runqueue_t *busiest;
2448 unsigned long imbalance; 2476 unsigned long imbalance;
2449 int nr_moved, all_pinned = 0; 2477 struct rq *busiest;
2450 int active_balance = 0;
2451 int sd_idle = 0;
2452 2478
2453 if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && 2479 if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
2454 !sched_smt_power_savings) 2480 !sched_smt_power_savings)
@@ -2482,8 +2508,8 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,
2482 */ 2508 */
2483 double_rq_lock(this_rq, busiest); 2509 double_rq_lock(this_rq, busiest);
2484 nr_moved = move_tasks(this_rq, this_cpu, busiest, 2510 nr_moved = move_tasks(this_rq, this_cpu, busiest,
2485 minus_1_or_zero(busiest->nr_running), 2511 minus_1_or_zero(busiest->nr_running),
2486 imbalance, sd, idle, &all_pinned); 2512 imbalance, sd, idle, &all_pinned);
2487 double_rq_unlock(this_rq, busiest); 2513 double_rq_unlock(this_rq, busiest);
2488 2514
2489 /* All tasks on this runqueue were pinned by CPU affinity */ 2515 /* All tasks on this runqueue were pinned by CPU affinity */
@@ -2556,7 +2582,8 @@ out_one_pinned:
2556 (sd->balance_interval < sd->max_interval)) 2582 (sd->balance_interval < sd->max_interval))
2557 sd->balance_interval *= 2; 2583 sd->balance_interval *= 2;
2558 2584
2559 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) 2585 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
2586 !sched_smt_power_savings)
2560 return -1; 2587 return -1;
2561 return 0; 2588 return 0;
2562} 2589}
@@ -2568,11 +2595,11 @@ out_one_pinned:
2568 * Called from schedule when this_rq is about to become idle (NEWLY_IDLE). 2595 * Called from schedule when this_rq is about to become idle (NEWLY_IDLE).
2569 * this_rq is locked. 2596 * this_rq is locked.
2570 */ 2597 */
2571static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, 2598static int
2572 struct sched_domain *sd) 2599load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
2573{ 2600{
2574 struct sched_group *group; 2601 struct sched_group *group;
2575 runqueue_t *busiest = NULL; 2602 struct rq *busiest = NULL;
2576 unsigned long imbalance; 2603 unsigned long imbalance;
2577 int nr_moved = 0; 2604 int nr_moved = 0;
2578 int sd_idle = 0; 2605 int sd_idle = 0;
@@ -2618,9 +2645,11 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
2618 2645
2619out_balanced: 2646out_balanced:
2620 schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); 2647 schedstat_inc(sd, lb_balanced[NEWLY_IDLE]);
2621 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) 2648 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
2649 !sched_smt_power_savings)
2622 return -1; 2650 return -1;
2623 sd->nr_balance_failed = 0; 2651 sd->nr_balance_failed = 0;
2652
2624 return 0; 2653 return 0;
2625} 2654}
2626 2655
@@ -2628,16 +2657,15 @@ out_balanced:
2628 * idle_balance is called by schedule() if this_cpu is about to become 2657 * idle_balance is called by schedule() if this_cpu is about to become
2629 * idle. Attempts to pull tasks from other CPUs. 2658 * idle. Attempts to pull tasks from other CPUs.
2630 */ 2659 */
2631static void idle_balance(int this_cpu, runqueue_t *this_rq) 2660static void idle_balance(int this_cpu, struct rq *this_rq)
2632{ 2661{
2633 struct sched_domain *sd; 2662 struct sched_domain *sd;
2634 2663
2635 for_each_domain(this_cpu, sd) { 2664 for_each_domain(this_cpu, sd) {
2636 if (sd->flags & SD_BALANCE_NEWIDLE) { 2665 if (sd->flags & SD_BALANCE_NEWIDLE) {
2637 if (load_balance_newidle(this_cpu, this_rq, sd)) { 2666 /* If we've pulled tasks over stop searching: */
2638 /* We've pulled tasks over so stop searching */ 2667 if (load_balance_newidle(this_cpu, this_rq, sd))
2639 break; 2668 break;
2640 }
2641 } 2669 }
2642 } 2670 }
2643} 2671}
@@ -2650,14 +2678,14 @@ static void idle_balance(int this_cpu, runqueue_t *this_rq)
2650 * 2678 *
2651 * Called with busiest_rq locked. 2679 * Called with busiest_rq locked.
2652 */ 2680 */
2653static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu) 2681static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
2654{ 2682{
2655 struct sched_domain *sd;
2656 runqueue_t *target_rq;
2657 int target_cpu = busiest_rq->push_cpu; 2683 int target_cpu = busiest_rq->push_cpu;
2684 struct sched_domain *sd;
2685 struct rq *target_rq;
2658 2686
2687 /* Is there any task to move? */
2659 if (busiest_rq->nr_running <= 1) 2688 if (busiest_rq->nr_running <= 1)
2660 /* no task to move */
2661 return; 2689 return;
2662 2690
2663 target_rq = cpu_rq(target_cpu); 2691 target_rq = cpu_rq(target_cpu);
@@ -2675,21 +2703,20 @@ static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu)
2675 /* Search for an sd spanning us and the target CPU. */ 2703 /* Search for an sd spanning us and the target CPU. */
2676 for_each_domain(target_cpu, sd) { 2704 for_each_domain(target_cpu, sd) {
2677 if ((sd->flags & SD_LOAD_BALANCE) && 2705 if ((sd->flags & SD_LOAD_BALANCE) &&
2678 cpu_isset(busiest_cpu, sd->span)) 2706 cpu_isset(busiest_cpu, sd->span))
2679 break; 2707 break;
2680 } 2708 }
2681 2709
2682 if (unlikely(sd == NULL)) 2710 if (likely(sd)) {
2683 goto out; 2711 schedstat_inc(sd, alb_cnt);
2684
2685 schedstat_inc(sd, alb_cnt);
2686 2712
2687 if (move_tasks(target_rq, target_cpu, busiest_rq, 1, 2713 if (move_tasks(target_rq, target_cpu, busiest_rq, 1,
2688 RTPRIO_TO_LOAD_WEIGHT(100), sd, SCHED_IDLE, NULL)) 2714 RTPRIO_TO_LOAD_WEIGHT(100), sd, SCHED_IDLE,
2689 schedstat_inc(sd, alb_pushed); 2715 NULL))
2690 else 2716 schedstat_inc(sd, alb_pushed);
2691 schedstat_inc(sd, alb_failed); 2717 else
2692out: 2718 schedstat_inc(sd, alb_failed);
2719 }
2693 spin_unlock(&target_rq->lock); 2720 spin_unlock(&target_rq->lock);
2694} 2721}
2695 2722
@@ -2702,23 +2729,27 @@ out:
2702 * Balancing parameters are set up in arch_init_sched_domains. 2729 * Balancing parameters are set up in arch_init_sched_domains.
2703 */ 2730 */
2704 2731
2705/* Don't have all balancing operations going off at once */ 2732/* Don't have all balancing operations going off at once: */
2706#define CPU_OFFSET(cpu) (HZ * cpu / NR_CPUS) 2733static inline unsigned long cpu_offset(int cpu)
2734{
2735 return jiffies + cpu * HZ / NR_CPUS;
2736}
2707 2737
2708static void rebalance_tick(int this_cpu, runqueue_t *this_rq, 2738static void
2709 enum idle_type idle) 2739rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle)
2710{ 2740{
2711 unsigned long old_load, this_load; 2741 unsigned long this_load, interval, j = cpu_offset(this_cpu);
2712 unsigned long j = jiffies + CPU_OFFSET(this_cpu);
2713 struct sched_domain *sd; 2742 struct sched_domain *sd;
2714 int i; 2743 int i, scale;
2715 2744
2716 this_load = this_rq->raw_weighted_load; 2745 this_load = this_rq->raw_weighted_load;
2717 /* Update our load */ 2746
2718 for (i = 0; i < 3; i++) { 2747 /* Update our load: */
2719 unsigned long new_load = this_load; 2748 for (i = 0, scale = 1; i < 3; i++, scale <<= 1) {
2720 int scale = 1 << i; 2749 unsigned long old_load, new_load;
2750
2721 old_load = this_rq->cpu_load[i]; 2751 old_load = this_rq->cpu_load[i];
2752 new_load = this_load;
2722 /* 2753 /*
2723 * Round up the averaging division if load is increasing. This 2754 * Round up the averaging division if load is increasing. This
2724 * prevents us from getting stuck on 9 if the load is 10, for 2755 * prevents us from getting stuck on 9 if the load is 10, for
@@ -2730,8 +2761,6 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq,
2730 } 2761 }
2731 2762
2732 for_each_domain(this_cpu, sd) { 2763 for_each_domain(this_cpu, sd) {
2733 unsigned long interval;
2734
2735 if (!(sd->flags & SD_LOAD_BALANCE)) 2764 if (!(sd->flags & SD_LOAD_BALANCE))
2736 continue; 2765 continue;
2737 2766
@@ -2761,17 +2790,18 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq,
2761/* 2790/*
2762 * on UP we do not need to balance between CPUs: 2791 * on UP we do not need to balance between CPUs:
2763 */ 2792 */
2764static inline void rebalance_tick(int cpu, runqueue_t *rq, enum idle_type idle) 2793static inline void rebalance_tick(int cpu, struct rq *rq, enum idle_type idle)
2765{ 2794{
2766} 2795}
2767static inline void idle_balance(int cpu, runqueue_t *rq) 2796static inline void idle_balance(int cpu, struct rq *rq)
2768{ 2797{
2769} 2798}
2770#endif 2799#endif
2771 2800
2772static inline int wake_priority_sleeper(runqueue_t *rq) 2801static inline int wake_priority_sleeper(struct rq *rq)
2773{ 2802{
2774 int ret = 0; 2803 int ret = 0;
2804
2775#ifdef CONFIG_SCHED_SMT 2805#ifdef CONFIG_SCHED_SMT
2776 spin_lock(&rq->lock); 2806 spin_lock(&rq->lock);
2777 /* 2807 /*
@@ -2795,25 +2825,26 @@ EXPORT_PER_CPU_SYMBOL(kstat);
2795 * This is called on clock ticks and on context switches. 2825 * This is called on clock ticks and on context switches.
2796 * Bank in p->sched_time the ns elapsed since the last tick or switch. 2826 * Bank in p->sched_time the ns elapsed since the last tick or switch.
2797 */ 2827 */
2798static inline void update_cpu_clock(task_t *p, runqueue_t *rq, 2828static inline void
2799 unsigned long long now) 2829update_cpu_clock(struct task_struct *p, struct rq *rq, unsigned long long now)
2800{ 2830{
2801 unsigned long long last = max(p->timestamp, rq->timestamp_last_tick); 2831 p->sched_time += now - max(p->timestamp, rq->timestamp_last_tick);
2802 p->sched_time += now - last;
2803} 2832}
2804 2833
2805/* 2834/*
2806 * Return current->sched_time plus any more ns on the sched_clock 2835 * Return current->sched_time plus any more ns on the sched_clock
2807 * that have not yet been banked. 2836 * that have not yet been banked.
2808 */ 2837 */
2809unsigned long long current_sched_time(const task_t *tsk) 2838unsigned long long current_sched_time(const struct task_struct *p)
2810{ 2839{
2811 unsigned long long ns; 2840 unsigned long long ns;
2812 unsigned long flags; 2841 unsigned long flags;
2842
2813 local_irq_save(flags); 2843 local_irq_save(flags);
2814 ns = max(tsk->timestamp, task_rq(tsk)->timestamp_last_tick); 2844 ns = max(p->timestamp, task_rq(p)->timestamp_last_tick);
2815 ns = tsk->sched_time + (sched_clock() - ns); 2845 ns = p->sched_time + sched_clock() - ns;
2816 local_irq_restore(flags); 2846 local_irq_restore(flags);
2847
2817 return ns; 2848 return ns;
2818} 2849}
2819 2850
@@ -2827,11 +2858,16 @@ unsigned long long current_sched_time(const task_t *tsk)
2827 * increasing number of running tasks. We also ignore the interactivity 2858 * increasing number of running tasks. We also ignore the interactivity
2828 * if a better static_prio task has expired: 2859 * if a better static_prio task has expired:
2829 */ 2860 */
2830#define EXPIRED_STARVING(rq) \ 2861static inline int expired_starving(struct rq *rq)
2831 ((STARVATION_LIMIT && ((rq)->expired_timestamp && \ 2862{
2832 (jiffies - (rq)->expired_timestamp >= \ 2863 if (rq->curr->static_prio > rq->best_expired_prio)
2833 STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \ 2864 return 1;
2834 ((rq)->curr->static_prio > (rq)->best_expired_prio)) 2865 if (!STARVATION_LIMIT || !rq->expired_timestamp)
2866 return 0;
2867 if (jiffies - rq->expired_timestamp > STARVATION_LIMIT * rq->nr_running)
2868 return 1;
2869 return 0;
2870}
2835 2871
2836/* 2872/*
2837 * Account user cpu time to a process. 2873 * Account user cpu time to a process.
@@ -2864,7 +2900,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
2864 cputime_t cputime) 2900 cputime_t cputime)
2865{ 2901{
2866 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; 2902 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
2867 runqueue_t *rq = this_rq(); 2903 struct rq *rq = this_rq();
2868 cputime64_t tmp; 2904 cputime64_t tmp;
2869 2905
2870 p->stime = cputime_add(p->stime, cputime); 2906 p->stime = cputime_add(p->stime, cputime);
@@ -2894,7 +2930,7 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
2894{ 2930{
2895 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; 2931 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
2896 cputime64_t tmp = cputime_to_cputime64(steal); 2932 cputime64_t tmp = cputime_to_cputime64(steal);
2897 runqueue_t *rq = this_rq(); 2933 struct rq *rq = this_rq();
2898 2934
2899 if (p == rq->idle) { 2935 if (p == rq->idle) {
2900 p->stime = cputime_add(p->stime, steal); 2936 p->stime = cputime_add(p->stime, steal);
@@ -2915,10 +2951,10 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
2915 */ 2951 */
2916void scheduler_tick(void) 2952void scheduler_tick(void)
2917{ 2953{
2918 int cpu = smp_processor_id();
2919 runqueue_t *rq = this_rq();
2920 task_t *p = current;
2921 unsigned long long now = sched_clock(); 2954 unsigned long long now = sched_clock();
2955 struct task_struct *p = current;
2956 int cpu = smp_processor_id();
2957 struct rq *rq = cpu_rq(cpu);
2922 2958
2923 update_cpu_clock(p, rq, now); 2959 update_cpu_clock(p, rq, now);
2924 2960
@@ -2968,7 +3004,7 @@ void scheduler_tick(void)
2968 3004
2969 if (!rq->expired_timestamp) 3005 if (!rq->expired_timestamp)
2970 rq->expired_timestamp = jiffies; 3006 rq->expired_timestamp = jiffies;
2971 if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) { 3007 if (!TASK_INTERACTIVE(p) || expired_starving(rq)) {
2972 enqueue_task(p, rq->expired); 3008 enqueue_task(p, rq->expired);
2973 if (p->static_prio < rq->best_expired_prio) 3009 if (p->static_prio < rq->best_expired_prio)
2974 rq->best_expired_prio = p->static_prio; 3010 rq->best_expired_prio = p->static_prio;
@@ -3007,7 +3043,7 @@ out:
3007} 3043}
3008 3044
3009#ifdef CONFIG_SCHED_SMT 3045#ifdef CONFIG_SCHED_SMT
3010static inline void wakeup_busy_runqueue(runqueue_t *rq) 3046static inline void wakeup_busy_runqueue(struct rq *rq)
3011{ 3047{
3012 /* If an SMT runqueue is sleeping due to priority reasons wake it up */ 3048 /* If an SMT runqueue is sleeping due to priority reasons wake it up */
3013 if (rq->curr == rq->idle && rq->nr_running) 3049 if (rq->curr == rq->idle && rq->nr_running)
@@ -3033,7 +3069,7 @@ static void wake_sleeping_dependent(int this_cpu)
3033 return; 3069 return;
3034 3070
3035 for_each_cpu_mask(i, sd->span) { 3071 for_each_cpu_mask(i, sd->span) {
3036 runqueue_t *smt_rq = cpu_rq(i); 3072 struct rq *smt_rq = cpu_rq(i);
3037 3073
3038 if (i == this_cpu) 3074 if (i == this_cpu)
3039 continue; 3075 continue;
@@ -3050,7 +3086,8 @@ static void wake_sleeping_dependent(int this_cpu)
3050 * utilize, if another task runs on a sibling. This models the 3086 * utilize, if another task runs on a sibling. This models the
3051 * slowdown effect of other tasks running on siblings: 3087 * slowdown effect of other tasks running on siblings:
3052 */ 3088 */
3053static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd) 3089static inline unsigned long
3090smt_slice(struct task_struct *p, struct sched_domain *sd)
3054{ 3091{
3055 return p->time_slice * (100 - sd->per_cpu_gain) / 100; 3092 return p->time_slice * (100 - sd->per_cpu_gain) / 100;
3056} 3093}
@@ -3061,7 +3098,8 @@ static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd)
3061 * acquire their lock. As we only trylock the normal locking order does not 3098 * acquire their lock. As we only trylock the normal locking order does not
3062 * need to be obeyed. 3099 * need to be obeyed.
3063 */ 3100 */
3064static int dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p) 3101static int
3102dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p)
3065{ 3103{
3066 struct sched_domain *tmp, *sd = NULL; 3104 struct sched_domain *tmp, *sd = NULL;
3067 int ret = 0, i; 3105 int ret = 0, i;
@@ -3081,8 +3119,8 @@ static int dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p)
3081 return 0; 3119 return 0;
3082 3120
3083 for_each_cpu_mask(i, sd->span) { 3121 for_each_cpu_mask(i, sd->span) {
3084 runqueue_t *smt_rq; 3122 struct task_struct *smt_curr;
3085 task_t *smt_curr; 3123 struct rq *smt_rq;
3086 3124
3087 if (i == this_cpu) 3125 if (i == this_cpu)
3088 continue; 3126 continue;
@@ -3127,9 +3165,8 @@ unlock:
3127static inline void wake_sleeping_dependent(int this_cpu) 3165static inline void wake_sleeping_dependent(int this_cpu)
3128{ 3166{
3129} 3167}
3130 3168static inline int
3131static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq, 3169dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p)
3132 task_t *p)
3133{ 3170{
3134 return 0; 3171 return 0;
3135} 3172}
@@ -3142,12 +3179,13 @@ void fastcall add_preempt_count(int val)
3142 /* 3179 /*
3143 * Underflow? 3180 * Underflow?
3144 */ 3181 */
3145 BUG_ON((preempt_count() < 0)); 3182 if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
3183 return;
3146 preempt_count() += val; 3184 preempt_count() += val;
3147 /* 3185 /*
3148 * Spinlock count overflowing soon? 3186 * Spinlock count overflowing soon?
3149 */ 3187 */
3150 BUG_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10); 3188 DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10);
3151} 3189}
3152EXPORT_SYMBOL(add_preempt_count); 3190EXPORT_SYMBOL(add_preempt_count);
3153 3191
@@ -3156,11 +3194,15 @@ void fastcall sub_preempt_count(int val)
3156 /* 3194 /*
3157 * Underflow? 3195 * Underflow?
3158 */ 3196 */
3159 BUG_ON(val > preempt_count()); 3197 if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
3198 return;
3160 /* 3199 /*
3161 * Is the spinlock portion underflowing? 3200 * Is the spinlock portion underflowing?
3162 */ 3201 */
3163 BUG_ON((val < PREEMPT_MASK) && !(preempt_count() & PREEMPT_MASK)); 3202 if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
3203 !(preempt_count() & PREEMPT_MASK)))
3204 return;
3205
3164 preempt_count() -= val; 3206 preempt_count() -= val;
3165} 3207}
3166EXPORT_SYMBOL(sub_preempt_count); 3208EXPORT_SYMBOL(sub_preempt_count);
@@ -3178,14 +3220,14 @@ static inline int interactive_sleep(enum sleep_type sleep_type)
3178 */ 3220 */
3179asmlinkage void __sched schedule(void) 3221asmlinkage void __sched schedule(void)
3180{ 3222{
3181 long *switch_count; 3223 struct task_struct *prev, *next;
3182 task_t *prev, *next; 3224 struct prio_array *array;
3183 runqueue_t *rq;
3184 prio_array_t *array;
3185 struct list_head *queue; 3225 struct list_head *queue;
3186 unsigned long long now; 3226 unsigned long long now;
3187 unsigned long run_time; 3227 unsigned long run_time;
3188 int cpu, idx, new_prio; 3228 int cpu, idx, new_prio;
3229 long *switch_count;
3230 struct rq *rq;
3189 3231
3190 /* 3232 /*
3191 * Test if we are atomic. Since do_exit() needs to call into 3233 * Test if we are atomic. Since do_exit() needs to call into
@@ -3275,7 +3317,7 @@ need_resched_nonpreemptible:
3275 3317
3276 idx = sched_find_first_bit(array->bitmap); 3318 idx = sched_find_first_bit(array->bitmap);
3277 queue = array->queue + idx; 3319 queue = array->queue + idx;
3278 next = list_entry(queue->next, task_t, run_list); 3320 next = list_entry(queue->next, struct task_struct, run_list);
3279 3321
3280 if (!rt_task(next) && interactive_sleep(next->sleep_type)) { 3322 if (!rt_task(next) && interactive_sleep(next->sleep_type)) {
3281 unsigned long long delta = now - next->timestamp; 3323 unsigned long long delta = now - next->timestamp;
@@ -3338,7 +3380,6 @@ switch_tasks:
3338 if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) 3380 if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
3339 goto need_resched; 3381 goto need_resched;
3340} 3382}
3341
3342EXPORT_SYMBOL(schedule); 3383EXPORT_SYMBOL(schedule);
3343 3384
3344#ifdef CONFIG_PREEMPT 3385#ifdef CONFIG_PREEMPT
@@ -3383,7 +3424,6 @@ need_resched:
3383 if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) 3424 if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
3384 goto need_resched; 3425 goto need_resched;
3385} 3426}
3386
3387EXPORT_SYMBOL(preempt_schedule); 3427EXPORT_SYMBOL(preempt_schedule);
3388 3428
3389/* 3429/*
@@ -3432,10 +3472,8 @@ need_resched:
3432int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, 3472int default_wake_function(wait_queue_t *curr, unsigned mode, int sync,
3433 void *key) 3473 void *key)
3434{ 3474{
3435 task_t *p = curr->private; 3475 return try_to_wake_up(curr->private, mode, sync);
3436 return try_to_wake_up(p, mode, sync);
3437} 3476}
3438
3439EXPORT_SYMBOL(default_wake_function); 3477EXPORT_SYMBOL(default_wake_function);
3440 3478
3441/* 3479/*
@@ -3453,13 +3491,11 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
3453 struct list_head *tmp, *next; 3491 struct list_head *tmp, *next;
3454 3492
3455 list_for_each_safe(tmp, next, &q->task_list) { 3493 list_for_each_safe(tmp, next, &q->task_list) {
3456 wait_queue_t *curr; 3494 wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
3457 unsigned flags; 3495 unsigned flags = curr->flags;
3458 curr = list_entry(tmp, wait_queue_t, task_list); 3496
3459 flags = curr->flags;
3460 if (curr->func(curr, mode, sync, key) && 3497 if (curr->func(curr, mode, sync, key) &&
3461 (flags & WQ_FLAG_EXCLUSIVE) && 3498 (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
3462 !--nr_exclusive)
3463 break; 3499 break;
3464 } 3500 }
3465} 3501}
@@ -3480,7 +3516,6 @@ void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode,
3480 __wake_up_common(q, mode, nr_exclusive, 0, key); 3516 __wake_up_common(q, mode, nr_exclusive, 0, key);
3481 spin_unlock_irqrestore(&q->lock, flags); 3517 spin_unlock_irqrestore(&q->lock, flags);
3482} 3518}
3483
3484EXPORT_SYMBOL(__wake_up); 3519EXPORT_SYMBOL(__wake_up);
3485 3520
3486/* 3521/*
@@ -3549,6 +3584,7 @@ EXPORT_SYMBOL(complete_all);
3549void fastcall __sched wait_for_completion(struct completion *x) 3584void fastcall __sched wait_for_completion(struct completion *x)
3550{ 3585{
3551 might_sleep(); 3586 might_sleep();
3587
3552 spin_lock_irq(&x->wait.lock); 3588 spin_lock_irq(&x->wait.lock);
3553 if (!x->done) { 3589 if (!x->done) {
3554 DECLARE_WAITQUEUE(wait, current); 3590 DECLARE_WAITQUEUE(wait, current);
@@ -3693,7 +3729,6 @@ void fastcall __sched interruptible_sleep_on(wait_queue_head_t *q)
3693 schedule(); 3729 schedule();
3694 SLEEP_ON_TAIL 3730 SLEEP_ON_TAIL
3695} 3731}
3696
3697EXPORT_SYMBOL(interruptible_sleep_on); 3732EXPORT_SYMBOL(interruptible_sleep_on);
3698 3733
3699long fastcall __sched 3734long fastcall __sched
@@ -3709,7 +3744,6 @@ interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout)
3709 3744
3710 return timeout; 3745 return timeout;
3711} 3746}
3712
3713EXPORT_SYMBOL(interruptible_sleep_on_timeout); 3747EXPORT_SYMBOL(interruptible_sleep_on_timeout);
3714 3748
3715void fastcall __sched sleep_on(wait_queue_head_t *q) 3749void fastcall __sched sleep_on(wait_queue_head_t *q)
@@ -3722,7 +3756,6 @@ void fastcall __sched sleep_on(wait_queue_head_t *q)
3722 schedule(); 3756 schedule();
3723 SLEEP_ON_TAIL 3757 SLEEP_ON_TAIL
3724} 3758}
3725
3726EXPORT_SYMBOL(sleep_on); 3759EXPORT_SYMBOL(sleep_on);
3727 3760
3728long fastcall __sched sleep_on_timeout(wait_queue_head_t *q, long timeout) 3761long fastcall __sched sleep_on_timeout(wait_queue_head_t *q, long timeout)
@@ -3752,11 +3785,11 @@ EXPORT_SYMBOL(sleep_on_timeout);
3752 * 3785 *
3753 * Used by the rt_mutex code to implement priority inheritance logic. 3786 * Used by the rt_mutex code to implement priority inheritance logic.
3754 */ 3787 */
3755void rt_mutex_setprio(task_t *p, int prio) 3788void rt_mutex_setprio(struct task_struct *p, int prio)
3756{ 3789{
3790 struct prio_array *array;
3757 unsigned long flags; 3791 unsigned long flags;
3758 prio_array_t *array; 3792 struct rq *rq;
3759 runqueue_t *rq;
3760 int oldprio; 3793 int oldprio;
3761 3794
3762 BUG_ON(prio < 0 || prio > MAX_PRIO); 3795 BUG_ON(prio < 0 || prio > MAX_PRIO);
@@ -3793,12 +3826,12 @@ void rt_mutex_setprio(task_t *p, int prio)
3793 3826
3794#endif 3827#endif
3795 3828
3796void set_user_nice(task_t *p, long nice) 3829void set_user_nice(struct task_struct *p, long nice)
3797{ 3830{
3798 unsigned long flags; 3831 struct prio_array *array;
3799 prio_array_t *array;
3800 runqueue_t *rq;
3801 int old_prio, delta; 3832 int old_prio, delta;
3833 unsigned long flags;
3834 struct rq *rq;
3802 3835
3803 if (TASK_NICE(p) == nice || nice < -20 || nice > 19) 3836 if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
3804 return; 3837 return;
@@ -3849,10 +3882,11 @@ EXPORT_SYMBOL(set_user_nice);
3849 * @p: task 3882 * @p: task
3850 * @nice: nice value 3883 * @nice: nice value
3851 */ 3884 */
3852int can_nice(const task_t *p, const int nice) 3885int can_nice(const struct task_struct *p, const int nice)
3853{ 3886{
3854 /* convert nice value [19,-20] to rlimit style value [1,40] */ 3887 /* convert nice value [19,-20] to rlimit style value [1,40] */
3855 int nice_rlim = 20 - nice; 3888 int nice_rlim = 20 - nice;
3889
3856 return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur || 3890 return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur ||
3857 capable(CAP_SYS_NICE)); 3891 capable(CAP_SYS_NICE));
3858} 3892}
@@ -3868,8 +3902,7 @@ int can_nice(const task_t *p, const int nice)
3868 */ 3902 */
3869asmlinkage long sys_nice(int increment) 3903asmlinkage long sys_nice(int increment)
3870{ 3904{
3871 int retval; 3905 long nice, retval;
3872 long nice;
3873 3906
3874 /* 3907 /*
3875 * Setpriority might change our priority at the same moment. 3908 * Setpriority might change our priority at the same moment.
@@ -3908,7 +3941,7 @@ asmlinkage long sys_nice(int increment)
3908 * RT tasks are offset by -200. Normal tasks are centered 3941 * RT tasks are offset by -200. Normal tasks are centered
3909 * around 0, value goes from -16 to +15. 3942 * around 0, value goes from -16 to +15.
3910 */ 3943 */
3911int task_prio(const task_t *p) 3944int task_prio(const struct task_struct *p)
3912{ 3945{
3913 return p->prio - MAX_RT_PRIO; 3946 return p->prio - MAX_RT_PRIO;
3914} 3947}
@@ -3917,7 +3950,7 @@ int task_prio(const task_t *p)
3917 * task_nice - return the nice value of a given task. 3950 * task_nice - return the nice value of a given task.
3918 * @p: the task in question. 3951 * @p: the task in question.
3919 */ 3952 */
3920int task_nice(const task_t *p) 3953int task_nice(const struct task_struct *p)
3921{ 3954{
3922 return TASK_NICE(p); 3955 return TASK_NICE(p);
3923} 3956}
@@ -3936,7 +3969,7 @@ int idle_cpu(int cpu)
3936 * idle_task - return the idle task for a given cpu. 3969 * idle_task - return the idle task for a given cpu.
3937 * @cpu: the processor in question. 3970 * @cpu: the processor in question.
3938 */ 3971 */
3939task_t *idle_task(int cpu) 3972struct task_struct *idle_task(int cpu)
3940{ 3973{
3941 return cpu_rq(cpu)->idle; 3974 return cpu_rq(cpu)->idle;
3942} 3975}
@@ -3945,7 +3978,7 @@ task_t *idle_task(int cpu)
3945 * find_process_by_pid - find a process with a matching PID value. 3978 * find_process_by_pid - find a process with a matching PID value.
3946 * @pid: the pid in question. 3979 * @pid: the pid in question.
3947 */ 3980 */
3948static inline task_t *find_process_by_pid(pid_t pid) 3981static inline struct task_struct *find_process_by_pid(pid_t pid)
3949{ 3982{
3950 return pid ? find_task_by_pid(pid) : current; 3983 return pid ? find_task_by_pid(pid) : current;
3951} 3984}
@@ -3954,6 +3987,7 @@ static inline task_t *find_process_by_pid(pid_t pid)
3954static void __setscheduler(struct task_struct *p, int policy, int prio) 3987static void __setscheduler(struct task_struct *p, int policy, int prio)
3955{ 3988{
3956 BUG_ON(p->array); 3989 BUG_ON(p->array);
3990
3957 p->policy = policy; 3991 p->policy = policy;
3958 p->rt_priority = prio; 3992 p->rt_priority = prio;
3959 p->normal_prio = normal_prio(p); 3993 p->normal_prio = normal_prio(p);
@@ -3977,11 +4011,10 @@ static void __setscheduler(struct task_struct *p, int policy, int prio)
3977int sched_setscheduler(struct task_struct *p, int policy, 4011int sched_setscheduler(struct task_struct *p, int policy,
3978 struct sched_param *param) 4012 struct sched_param *param)
3979{ 4013{
3980 int retval; 4014 int retval, oldprio, oldpolicy = -1;
3981 int oldprio, oldpolicy = -1; 4015 struct prio_array *array;
3982 prio_array_t *array;
3983 unsigned long flags; 4016 unsigned long flags;
3984 runqueue_t *rq; 4017 struct rq *rq;
3985 4018
3986 /* may grab non-irq protected spin_locks */ 4019 /* may grab non-irq protected spin_locks */
3987 BUG_ON(in_interrupt()); 4020 BUG_ON(in_interrupt());
@@ -4079,9 +4112,9 @@ EXPORT_SYMBOL_GPL(sched_setscheduler);
4079static int 4112static int
4080do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) 4113do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
4081{ 4114{
4082 int retval;
4083 struct sched_param lparam; 4115 struct sched_param lparam;
4084 struct task_struct *p; 4116 struct task_struct *p;
4117 int retval;
4085 4118
4086 if (!param || pid < 0) 4119 if (!param || pid < 0)
4087 return -EINVAL; 4120 return -EINVAL;
@@ -4097,6 +4130,7 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
4097 read_unlock_irq(&tasklist_lock); 4130 read_unlock_irq(&tasklist_lock);
4098 retval = sched_setscheduler(p, policy, &lparam); 4131 retval = sched_setscheduler(p, policy, &lparam);
4099 put_task_struct(p); 4132 put_task_struct(p);
4133
4100 return retval; 4134 return retval;
4101} 4135}
4102 4136
@@ -4132,8 +4166,8 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param)
4132 */ 4166 */
4133asmlinkage long sys_sched_getscheduler(pid_t pid) 4167asmlinkage long sys_sched_getscheduler(pid_t pid)
4134{ 4168{
4169 struct task_struct *p;
4135 int retval = -EINVAL; 4170 int retval = -EINVAL;
4136 task_t *p;
4137 4171
4138 if (pid < 0) 4172 if (pid < 0)
4139 goto out_nounlock; 4173 goto out_nounlock;
@@ -4160,8 +4194,8 @@ out_nounlock:
4160asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) 4194asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param)
4161{ 4195{
4162 struct sched_param lp; 4196 struct sched_param lp;
4197 struct task_struct *p;
4163 int retval = -EINVAL; 4198 int retval = -EINVAL;
4164 task_t *p;
4165 4199
4166 if (!param || pid < 0) 4200 if (!param || pid < 0)
4167 goto out_nounlock; 4201 goto out_nounlock;
@@ -4194,9 +4228,9 @@ out_unlock:
4194 4228
4195long sched_setaffinity(pid_t pid, cpumask_t new_mask) 4229long sched_setaffinity(pid_t pid, cpumask_t new_mask)
4196{ 4230{
4197 task_t *p;
4198 int retval;
4199 cpumask_t cpus_allowed; 4231 cpumask_t cpus_allowed;
4232 struct task_struct *p;
4233 int retval;
4200 4234
4201 lock_cpu_hotplug(); 4235 lock_cpu_hotplug();
4202 read_lock(&tasklist_lock); 4236 read_lock(&tasklist_lock);
@@ -4282,8 +4316,8 @@ cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
4282 4316
4283long sched_getaffinity(pid_t pid, cpumask_t *mask) 4317long sched_getaffinity(pid_t pid, cpumask_t *mask)
4284{ 4318{
4319 struct task_struct *p;
4285 int retval; 4320 int retval;
4286 task_t *p;
4287 4321
4288 lock_cpu_hotplug(); 4322 lock_cpu_hotplug();
4289 read_lock(&tasklist_lock); 4323 read_lock(&tasklist_lock);
@@ -4342,9 +4376,8 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
4342 */ 4376 */
4343asmlinkage long sys_sched_yield(void) 4377asmlinkage long sys_sched_yield(void)
4344{ 4378{
4345 runqueue_t *rq = this_rq_lock(); 4379 struct rq *rq = this_rq_lock();
4346 prio_array_t *array = current->array; 4380 struct prio_array *array = current->array, *target = rq->expired;
4347 prio_array_t *target = rq->expired;
4348 4381
4349 schedstat_inc(rq, yld_cnt); 4382 schedstat_inc(rq, yld_cnt);
4350 /* 4383 /*
@@ -4378,6 +4411,7 @@ asmlinkage long sys_sched_yield(void)
4378 * no need to preempt or enable interrupts: 4411 * no need to preempt or enable interrupts:
4379 */ 4412 */
4380 __release(rq->lock); 4413 __release(rq->lock);
4414 spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
4381 _raw_spin_unlock(&rq->lock); 4415 _raw_spin_unlock(&rq->lock);
4382 preempt_enable_no_resched(); 4416 preempt_enable_no_resched();
4383 4417
@@ -4441,6 +4475,7 @@ int cond_resched_lock(spinlock_t *lock)
4441 spin_lock(lock); 4475 spin_lock(lock);
4442 } 4476 }
4443 if (need_resched() && __resched_legal()) { 4477 if (need_resched() && __resched_legal()) {
4478 spin_release(&lock->dep_map, 1, _THIS_IP_);
4444 _raw_spin_unlock(lock); 4479 _raw_spin_unlock(lock);
4445 preempt_enable_no_resched(); 4480 preempt_enable_no_resched();
4446 __cond_resched(); 4481 __cond_resched();
@@ -4456,7 +4491,9 @@ int __sched cond_resched_softirq(void)
4456 BUG_ON(!in_softirq()); 4491 BUG_ON(!in_softirq());
4457 4492
4458 if (need_resched() && __resched_legal()) { 4493 if (need_resched() && __resched_legal()) {
4459 __local_bh_enable(); 4494 raw_local_irq_disable();
4495 _local_bh_enable();
4496 raw_local_irq_enable();
4460 __cond_resched(); 4497 __cond_resched();
4461 local_bh_disable(); 4498 local_bh_disable();
4462 return 1; 4499 return 1;
@@ -4476,7 +4513,6 @@ void __sched yield(void)
4476 set_current_state(TASK_RUNNING); 4513 set_current_state(TASK_RUNNING);
4477 sys_sched_yield(); 4514 sys_sched_yield();
4478} 4515}
4479
4480EXPORT_SYMBOL(yield); 4516EXPORT_SYMBOL(yield);
4481 4517
4482/* 4518/*
@@ -4488,18 +4524,17 @@ EXPORT_SYMBOL(yield);
4488 */ 4524 */
4489void __sched io_schedule(void) 4525void __sched io_schedule(void)
4490{ 4526{
4491 struct runqueue *rq = &__raw_get_cpu_var(runqueues); 4527 struct rq *rq = &__raw_get_cpu_var(runqueues);
4492 4528
4493 atomic_inc(&rq->nr_iowait); 4529 atomic_inc(&rq->nr_iowait);
4494 schedule(); 4530 schedule();
4495 atomic_dec(&rq->nr_iowait); 4531 atomic_dec(&rq->nr_iowait);
4496} 4532}
4497
4498EXPORT_SYMBOL(io_schedule); 4533EXPORT_SYMBOL(io_schedule);
4499 4534
4500long __sched io_schedule_timeout(long timeout) 4535long __sched io_schedule_timeout(long timeout)
4501{ 4536{
4502 struct runqueue *rq = &__raw_get_cpu_var(runqueues); 4537 struct rq *rq = &__raw_get_cpu_var(runqueues);
4503 long ret; 4538 long ret;
4504 4539
4505 atomic_inc(&rq->nr_iowait); 4540 atomic_inc(&rq->nr_iowait);
@@ -4566,9 +4601,9 @@ asmlinkage long sys_sched_get_priority_min(int policy)
4566asmlinkage 4601asmlinkage
4567long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) 4602long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval)
4568{ 4603{
4604 struct task_struct *p;
4569 int retval = -EINVAL; 4605 int retval = -EINVAL;
4570 struct timespec t; 4606 struct timespec t;
4571 task_t *p;
4572 4607
4573 if (pid < 0) 4608 if (pid < 0)
4574 goto out_nounlock; 4609 goto out_nounlock;
@@ -4596,28 +4631,32 @@ out_unlock:
4596 4631
4597static inline struct task_struct *eldest_child(struct task_struct *p) 4632static inline struct task_struct *eldest_child(struct task_struct *p)
4598{ 4633{
4599 if (list_empty(&p->children)) return NULL; 4634 if (list_empty(&p->children))
4635 return NULL;
4600 return list_entry(p->children.next,struct task_struct,sibling); 4636 return list_entry(p->children.next,struct task_struct,sibling);
4601} 4637}
4602 4638
4603static inline struct task_struct *older_sibling(struct task_struct *p) 4639static inline struct task_struct *older_sibling(struct task_struct *p)
4604{ 4640{
4605 if (p->sibling.prev==&p->parent->children) return NULL; 4641 if (p->sibling.prev==&p->parent->children)
4642 return NULL;
4606 return list_entry(p->sibling.prev,struct task_struct,sibling); 4643 return list_entry(p->sibling.prev,struct task_struct,sibling);
4607} 4644}
4608 4645
4609static inline struct task_struct *younger_sibling(struct task_struct *p) 4646static inline struct task_struct *younger_sibling(struct task_struct *p)
4610{ 4647{
4611 if (p->sibling.next==&p->parent->children) return NULL; 4648 if (p->sibling.next==&p->parent->children)
4649 return NULL;
4612 return list_entry(p->sibling.next,struct task_struct,sibling); 4650 return list_entry(p->sibling.next,struct task_struct,sibling);
4613} 4651}
4614 4652
4615static void show_task(task_t *p) 4653static const char *stat_nam[] = { "R", "S", "D", "T", "t", "Z", "X" };
4654
4655static void show_task(struct task_struct *p)
4616{ 4656{
4617 task_t *relative; 4657 struct task_struct *relative;
4618 unsigned state;
4619 unsigned long free = 0; 4658 unsigned long free = 0;
4620 static const char *stat_nam[] = { "R", "S", "D", "T", "t", "Z", "X" }; 4659 unsigned state;
4621 4660
4622 printk("%-13.13s ", p->comm); 4661 printk("%-13.13s ", p->comm);
4623 state = p->state ? __ffs(p->state) + 1 : 0; 4662 state = p->state ? __ffs(p->state) + 1 : 0;
@@ -4668,7 +4707,7 @@ static void show_task(task_t *p)
4668 4707
4669void show_state(void) 4708void show_state(void)
4670{ 4709{
4671 task_t *g, *p; 4710 struct task_struct *g, *p;
4672 4711
4673#if (BITS_PER_LONG == 32) 4712#if (BITS_PER_LONG == 32)
4674 printk("\n" 4713 printk("\n"
@@ -4690,7 +4729,7 @@ void show_state(void)
4690 } while_each_thread(g, p); 4729 } while_each_thread(g, p);
4691 4730
4692 read_unlock(&tasklist_lock); 4731 read_unlock(&tasklist_lock);
4693 mutex_debug_show_all_locks(); 4732 debug_show_all_locks();
4694} 4733}
4695 4734
4696/** 4735/**
@@ -4701,9 +4740,9 @@ void show_state(void)
4701 * NOTE: this function does not set the idle thread's NEED_RESCHED 4740 * NOTE: this function does not set the idle thread's NEED_RESCHED
4702 * flag, to make booting more robust. 4741 * flag, to make booting more robust.
4703 */ 4742 */
4704void __devinit init_idle(task_t *idle, int cpu) 4743void __devinit init_idle(struct task_struct *idle, int cpu)
4705{ 4744{
4706 runqueue_t *rq = cpu_rq(cpu); 4745 struct rq *rq = cpu_rq(cpu);
4707 unsigned long flags; 4746 unsigned long flags;
4708 4747
4709 idle->timestamp = sched_clock(); 4748 idle->timestamp = sched_clock();
@@ -4742,7 +4781,7 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
4742/* 4781/*
4743 * This is how migration works: 4782 * This is how migration works:
4744 * 4783 *
4745 * 1) we queue a migration_req_t structure in the source CPU's 4784 * 1) we queue a struct migration_req structure in the source CPU's
4746 * runqueue and wake up that CPU's migration thread. 4785 * runqueue and wake up that CPU's migration thread.
4747 * 2) we down() the locked semaphore => thread blocks. 4786 * 2) we down() the locked semaphore => thread blocks.
4748 * 3) migration thread wakes up (implicitly it forces the migrated 4787 * 3) migration thread wakes up (implicitly it forces the migrated
@@ -4764,12 +4803,12 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
4764 * task must not exit() & deallocate itself prematurely. The 4803 * task must not exit() & deallocate itself prematurely. The
4765 * call is not atomic; no spinlocks may be held. 4804 * call is not atomic; no spinlocks may be held.
4766 */ 4805 */
4767int set_cpus_allowed(task_t *p, cpumask_t new_mask) 4806int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
4768{ 4807{
4808 struct migration_req req;
4769 unsigned long flags; 4809 unsigned long flags;
4810 struct rq *rq;
4770 int ret = 0; 4811 int ret = 0;
4771 migration_req_t req;
4772 runqueue_t *rq;
4773 4812
4774 rq = task_rq_lock(p, &flags); 4813 rq = task_rq_lock(p, &flags);
4775 if (!cpus_intersects(new_mask, cpu_online_map)) { 4814 if (!cpus_intersects(new_mask, cpu_online_map)) {
@@ -4792,9 +4831,9 @@ int set_cpus_allowed(task_t *p, cpumask_t new_mask)
4792 } 4831 }
4793out: 4832out:
4794 task_rq_unlock(rq, &flags); 4833 task_rq_unlock(rq, &flags);
4834
4795 return ret; 4835 return ret;
4796} 4836}
4797
4798EXPORT_SYMBOL_GPL(set_cpus_allowed); 4837EXPORT_SYMBOL_GPL(set_cpus_allowed);
4799 4838
4800/* 4839/*
@@ -4810,7 +4849,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed);
4810 */ 4849 */
4811static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) 4850static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
4812{ 4851{
4813 runqueue_t *rq_dest, *rq_src; 4852 struct rq *rq_dest, *rq_src;
4814 int ret = 0; 4853 int ret = 0;
4815 4854
4816 if (unlikely(cpu_is_offline(dest_cpu))) 4855 if (unlikely(cpu_is_offline(dest_cpu)))
@@ -4855,16 +4894,16 @@ out:
4855 */ 4894 */
4856static int migration_thread(void *data) 4895static int migration_thread(void *data)
4857{ 4896{
4858 runqueue_t *rq;
4859 int cpu = (long)data; 4897 int cpu = (long)data;
4898 struct rq *rq;
4860 4899
4861 rq = cpu_rq(cpu); 4900 rq = cpu_rq(cpu);
4862 BUG_ON(rq->migration_thread != current); 4901 BUG_ON(rq->migration_thread != current);
4863 4902
4864 set_current_state(TASK_INTERRUPTIBLE); 4903 set_current_state(TASK_INTERRUPTIBLE);
4865 while (!kthread_should_stop()) { 4904 while (!kthread_should_stop()) {
4905 struct migration_req *req;
4866 struct list_head *head; 4906 struct list_head *head;
4867 migration_req_t *req;
4868 4907
4869 try_to_freeze(); 4908 try_to_freeze();
4870 4909
@@ -4888,7 +4927,7 @@ static int migration_thread(void *data)
4888 set_current_state(TASK_INTERRUPTIBLE); 4927 set_current_state(TASK_INTERRUPTIBLE);
4889 continue; 4928 continue;
4890 } 4929 }
4891 req = list_entry(head->next, migration_req_t, list); 4930 req = list_entry(head->next, struct migration_req, list);
4892 list_del_init(head->next); 4931 list_del_init(head->next);
4893 4932
4894 spin_unlock(&rq->lock); 4933 spin_unlock(&rq->lock);
@@ -4913,28 +4952,28 @@ wait_to_die:
4913 4952
4914#ifdef CONFIG_HOTPLUG_CPU 4953#ifdef CONFIG_HOTPLUG_CPU
4915/* Figure out where task on dead CPU should go, use force if neccessary. */ 4954/* Figure out where task on dead CPU should go, use force if neccessary. */
4916static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *tsk) 4955static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
4917{ 4956{
4918 runqueue_t *rq;
4919 unsigned long flags; 4957 unsigned long flags;
4920 int dest_cpu;
4921 cpumask_t mask; 4958 cpumask_t mask;
4959 struct rq *rq;
4960 int dest_cpu;
4922 4961
4923restart: 4962restart:
4924 /* On same node? */ 4963 /* On same node? */
4925 mask = node_to_cpumask(cpu_to_node(dead_cpu)); 4964 mask = node_to_cpumask(cpu_to_node(dead_cpu));
4926 cpus_and(mask, mask, tsk->cpus_allowed); 4965 cpus_and(mask, mask, p->cpus_allowed);
4927 dest_cpu = any_online_cpu(mask); 4966 dest_cpu = any_online_cpu(mask);
4928 4967
4929 /* On any allowed CPU? */ 4968 /* On any allowed CPU? */
4930 if (dest_cpu == NR_CPUS) 4969 if (dest_cpu == NR_CPUS)
4931 dest_cpu = any_online_cpu(tsk->cpus_allowed); 4970 dest_cpu = any_online_cpu(p->cpus_allowed);
4932 4971
4933 /* No more Mr. Nice Guy. */ 4972 /* No more Mr. Nice Guy. */
4934 if (dest_cpu == NR_CPUS) { 4973 if (dest_cpu == NR_CPUS) {
4935 rq = task_rq_lock(tsk, &flags); 4974 rq = task_rq_lock(p, &flags);
4936 cpus_setall(tsk->cpus_allowed); 4975 cpus_setall(p->cpus_allowed);
4937 dest_cpu = any_online_cpu(tsk->cpus_allowed); 4976 dest_cpu = any_online_cpu(p->cpus_allowed);
4938 task_rq_unlock(rq, &flags); 4977 task_rq_unlock(rq, &flags);
4939 4978
4940 /* 4979 /*
@@ -4942,12 +4981,12 @@ restart:
4942 * kernel threads (both mm NULL), since they never 4981 * kernel threads (both mm NULL), since they never
4943 * leave kernel. 4982 * leave kernel.
4944 */ 4983 */
4945 if (tsk->mm && printk_ratelimit()) 4984 if (p->mm && printk_ratelimit())
4946 printk(KERN_INFO "process %d (%s) no " 4985 printk(KERN_INFO "process %d (%s) no "
4947 "longer affine to cpu%d\n", 4986 "longer affine to cpu%d\n",
4948 tsk->pid, tsk->comm, dead_cpu); 4987 p->pid, p->comm, dead_cpu);
4949 } 4988 }
4950 if (!__migrate_task(tsk, dead_cpu, dest_cpu)) 4989 if (!__migrate_task(p, dead_cpu, dest_cpu))
4951 goto restart; 4990 goto restart;
4952} 4991}
4953 4992
@@ -4958,9 +4997,9 @@ restart:
4958 * their home CPUs. So we just add the counter to another CPU's counter, 4997 * their home CPUs. So we just add the counter to another CPU's counter,
4959 * to keep the global sum constant after CPU-down: 4998 * to keep the global sum constant after CPU-down:
4960 */ 4999 */
4961static void migrate_nr_uninterruptible(runqueue_t *rq_src) 5000static void migrate_nr_uninterruptible(struct rq *rq_src)
4962{ 5001{
4963 runqueue_t *rq_dest = cpu_rq(any_online_cpu(CPU_MASK_ALL)); 5002 struct rq *rq_dest = cpu_rq(any_online_cpu(CPU_MASK_ALL));
4964 unsigned long flags; 5003 unsigned long flags;
4965 5004
4966 local_irq_save(flags); 5005 local_irq_save(flags);
@@ -4974,48 +5013,51 @@ static void migrate_nr_uninterruptible(runqueue_t *rq_src)
4974/* Run through task list and migrate tasks from the dead cpu. */ 5013/* Run through task list and migrate tasks from the dead cpu. */
4975static void migrate_live_tasks(int src_cpu) 5014static void migrate_live_tasks(int src_cpu)
4976{ 5015{
4977 struct task_struct *tsk, *t; 5016 struct task_struct *p, *t;
4978 5017
4979 write_lock_irq(&tasklist_lock); 5018 write_lock_irq(&tasklist_lock);
4980 5019
4981 do_each_thread(t, tsk) { 5020 do_each_thread(t, p) {
4982 if (tsk == current) 5021 if (p == current)
4983 continue; 5022 continue;
4984 5023
4985 if (task_cpu(tsk) == src_cpu) 5024 if (task_cpu(p) == src_cpu)
4986 move_task_off_dead_cpu(src_cpu, tsk); 5025 move_task_off_dead_cpu(src_cpu, p);
4987 } while_each_thread(t, tsk); 5026 } while_each_thread(t, p);
4988 5027
4989 write_unlock_irq(&tasklist_lock); 5028 write_unlock_irq(&tasklist_lock);
4990} 5029}
4991 5030
4992/* Schedules idle task to be the next runnable task on current CPU. 5031/* Schedules idle task to be the next runnable task on current CPU.
4993 * It does so by boosting its priority to highest possible and adding it to 5032 * It does so by boosting its priority to highest possible and adding it to
4994 * the _front_ of runqueue. Used by CPU offline code. 5033 * the _front_ of the runqueue. Used by CPU offline code.
4995 */ 5034 */
4996void sched_idle_next(void) 5035void sched_idle_next(void)
4997{ 5036{
4998 int cpu = smp_processor_id(); 5037 int this_cpu = smp_processor_id();
4999 runqueue_t *rq = this_rq(); 5038 struct rq *rq = cpu_rq(this_cpu);
5000 struct task_struct *p = rq->idle; 5039 struct task_struct *p = rq->idle;
5001 unsigned long flags; 5040 unsigned long flags;
5002 5041
5003 /* cpu has to be offline */ 5042 /* cpu has to be offline */
5004 BUG_ON(cpu_online(cpu)); 5043 BUG_ON(cpu_online(this_cpu));
5005 5044
5006 /* Strictly not necessary since rest of the CPUs are stopped by now 5045 /*
5007 * and interrupts disabled on current cpu. 5046 * Strictly not necessary since rest of the CPUs are stopped by now
5047 * and interrupts disabled on the current cpu.
5008 */ 5048 */
5009 spin_lock_irqsave(&rq->lock, flags); 5049 spin_lock_irqsave(&rq->lock, flags);
5010 5050
5011 __setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1); 5051 __setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1);
5012 /* Add idle task to _front_ of it's priority queue */ 5052
5053 /* Add idle task to the _front_ of its priority queue: */
5013 __activate_idle_task(p, rq); 5054 __activate_idle_task(p, rq);
5014 5055
5015 spin_unlock_irqrestore(&rq->lock, flags); 5056 spin_unlock_irqrestore(&rq->lock, flags);
5016} 5057}
5017 5058
5018/* Ensures that the idle task is using init_mm right before its cpu goes 5059/*
5060 * Ensures that the idle task is using init_mm right before its cpu goes
5019 * offline. 5061 * offline.
5020 */ 5062 */
5021void idle_task_exit(void) 5063void idle_task_exit(void)
@@ -5029,17 +5071,17 @@ void idle_task_exit(void)
5029 mmdrop(mm); 5071 mmdrop(mm);
5030} 5072}
5031 5073
5032static void migrate_dead(unsigned int dead_cpu, task_t *tsk) 5074static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
5033{ 5075{
5034 struct runqueue *rq = cpu_rq(dead_cpu); 5076 struct rq *rq = cpu_rq(dead_cpu);
5035 5077
5036 /* Must be exiting, otherwise would be on tasklist. */ 5078 /* Must be exiting, otherwise would be on tasklist. */
5037 BUG_ON(tsk->exit_state != EXIT_ZOMBIE && tsk->exit_state != EXIT_DEAD); 5079 BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD);
5038 5080
5039 /* Cannot have done final schedule yet: would have vanished. */ 5081 /* Cannot have done final schedule yet: would have vanished. */
5040 BUG_ON(tsk->flags & PF_DEAD); 5082 BUG_ON(p->flags & PF_DEAD);
5041 5083
5042 get_task_struct(tsk); 5084 get_task_struct(p);
5043 5085
5044 /* 5086 /*
5045 * Drop lock around migration; if someone else moves it, 5087 * Drop lock around migration; if someone else moves it,
@@ -5047,25 +5089,25 @@ static void migrate_dead(unsigned int dead_cpu, task_t *tsk)
5047 * fine. 5089 * fine.
5048 */ 5090 */
5049 spin_unlock_irq(&rq->lock); 5091 spin_unlock_irq(&rq->lock);
5050 move_task_off_dead_cpu(dead_cpu, tsk); 5092 move_task_off_dead_cpu(dead_cpu, p);
5051 spin_lock_irq(&rq->lock); 5093 spin_lock_irq(&rq->lock);
5052 5094
5053 put_task_struct(tsk); 5095 put_task_struct(p);
5054} 5096}
5055 5097
5056/* release_task() removes task from tasklist, so we won't find dead tasks. */ 5098/* release_task() removes task from tasklist, so we won't find dead tasks. */
5057static void migrate_dead_tasks(unsigned int dead_cpu) 5099static void migrate_dead_tasks(unsigned int dead_cpu)
5058{ 5100{
5059 unsigned arr, i; 5101 struct rq *rq = cpu_rq(dead_cpu);
5060 struct runqueue *rq = cpu_rq(dead_cpu); 5102 unsigned int arr, i;
5061 5103
5062 for (arr = 0; arr < 2; arr++) { 5104 for (arr = 0; arr < 2; arr++) {
5063 for (i = 0; i < MAX_PRIO; i++) { 5105 for (i = 0; i < MAX_PRIO; i++) {
5064 struct list_head *list = &rq->arrays[arr].queue[i]; 5106 struct list_head *list = &rq->arrays[arr].queue[i];
5107
5065 while (!list_empty(list)) 5108 while (!list_empty(list))
5066 migrate_dead(dead_cpu, 5109 migrate_dead(dead_cpu, list_entry(list->next,
5067 list_entry(list->next, task_t, 5110 struct task_struct, run_list));
5068 run_list));
5069 } 5111 }
5070 } 5112 }
5071} 5113}
@@ -5075,14 +5117,13 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
5075 * migration_call - callback that gets triggered when a CPU is added. 5117 * migration_call - callback that gets triggered when a CPU is added.
5076 * Here we can start up the necessary migration thread for the new CPU. 5118 * Here we can start up the necessary migration thread for the new CPU.
5077 */ 5119 */
5078static int __cpuinit migration_call(struct notifier_block *nfb, 5120static int __cpuinit
5079 unsigned long action, 5121migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5080 void *hcpu)
5081{ 5122{
5082 int cpu = (long)hcpu;
5083 struct task_struct *p; 5123 struct task_struct *p;
5084 struct runqueue *rq; 5124 int cpu = (long)hcpu;
5085 unsigned long flags; 5125 unsigned long flags;
5126 struct rq *rq;
5086 5127
5087 switch (action) { 5128 switch (action) {
5088 case CPU_UP_PREPARE: 5129 case CPU_UP_PREPARE:
@@ -5097,10 +5138,12 @@ static int __cpuinit migration_call(struct notifier_block *nfb,
5097 task_rq_unlock(rq, &flags); 5138 task_rq_unlock(rq, &flags);
5098 cpu_rq(cpu)->migration_thread = p; 5139 cpu_rq(cpu)->migration_thread = p;
5099 break; 5140 break;
5141
5100 case CPU_ONLINE: 5142 case CPU_ONLINE:
5101 /* Strictly unneccessary, as first user will wake it. */ 5143 /* Strictly unneccessary, as first user will wake it. */
5102 wake_up_process(cpu_rq(cpu)->migration_thread); 5144 wake_up_process(cpu_rq(cpu)->migration_thread);
5103 break; 5145 break;
5146
5104#ifdef CONFIG_HOTPLUG_CPU 5147#ifdef CONFIG_HOTPLUG_CPU
5105 case CPU_UP_CANCELED: 5148 case CPU_UP_CANCELED:
5106 if (!cpu_rq(cpu)->migration_thread) 5149 if (!cpu_rq(cpu)->migration_thread)
@@ -5111,6 +5154,7 @@ static int __cpuinit migration_call(struct notifier_block *nfb,
5111 kthread_stop(cpu_rq(cpu)->migration_thread); 5154 kthread_stop(cpu_rq(cpu)->migration_thread);
5112 cpu_rq(cpu)->migration_thread = NULL; 5155 cpu_rq(cpu)->migration_thread = NULL;
5113 break; 5156 break;
5157
5114 case CPU_DEAD: 5158 case CPU_DEAD:
5115 migrate_live_tasks(cpu); 5159 migrate_live_tasks(cpu);
5116 rq = cpu_rq(cpu); 5160 rq = cpu_rq(cpu);
@@ -5131,9 +5175,10 @@ static int __cpuinit migration_call(struct notifier_block *nfb,
5131 * the requestors. */ 5175 * the requestors. */
5132 spin_lock_irq(&rq->lock); 5176 spin_lock_irq(&rq->lock);
5133 while (!list_empty(&rq->migration_queue)) { 5177 while (!list_empty(&rq->migration_queue)) {
5134 migration_req_t *req; 5178 struct migration_req *req;
5179
5135 req = list_entry(rq->migration_queue.next, 5180 req = list_entry(rq->migration_queue.next,
5136 migration_req_t, list); 5181 struct migration_req, list);
5137 list_del_init(&req->list); 5182 list_del_init(&req->list);
5138 complete(&req->done); 5183 complete(&req->done);
5139 } 5184 }
@@ -5155,10 +5200,12 @@ static struct notifier_block __cpuinitdata migration_notifier = {
5155int __init migration_init(void) 5200int __init migration_init(void)
5156{ 5201{
5157 void *cpu = (void *)(long)smp_processor_id(); 5202 void *cpu = (void *)(long)smp_processor_id();
5158 /* Start one for boot CPU. */ 5203
5204 /* Start one for the boot CPU: */
5159 migration_call(&migration_notifier, CPU_UP_PREPARE, cpu); 5205 migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
5160 migration_call(&migration_notifier, CPU_ONLINE, cpu); 5206 migration_call(&migration_notifier, CPU_ONLINE, cpu);
5161 register_cpu_notifier(&migration_notifier); 5207 register_cpu_notifier(&migration_notifier);
5208
5162 return 0; 5209 return 0;
5163} 5210}
5164#endif 5211#endif
@@ -5254,7 +5301,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
5254 } while (sd); 5301 } while (sd);
5255} 5302}
5256#else 5303#else
5257#define sched_domain_debug(sd, cpu) {} 5304# define sched_domain_debug(sd, cpu) do { } while (0)
5258#endif 5305#endif
5259 5306
5260static int sd_degenerate(struct sched_domain *sd) 5307static int sd_degenerate(struct sched_domain *sd)
@@ -5280,8 +5327,8 @@ static int sd_degenerate(struct sched_domain *sd)
5280 return 1; 5327 return 1;
5281} 5328}
5282 5329
5283static int sd_parent_degenerate(struct sched_domain *sd, 5330static int
5284 struct sched_domain *parent) 5331sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
5285{ 5332{
5286 unsigned long cflags = sd->flags, pflags = parent->flags; 5333 unsigned long cflags = sd->flags, pflags = parent->flags;
5287 5334
@@ -5314,7 +5361,7 @@ static int sd_parent_degenerate(struct sched_domain *sd,
5314 */ 5361 */
5315static void cpu_attach_domain(struct sched_domain *sd, int cpu) 5362static void cpu_attach_domain(struct sched_domain *sd, int cpu)
5316{ 5363{
5317 runqueue_t *rq = cpu_rq(cpu); 5364 struct rq *rq = cpu_rq(cpu);
5318 struct sched_domain *tmp; 5365 struct sched_domain *tmp;
5319 5366
5320 /* Remove the sched domains which do not contribute to scheduling. */ 5367 /* Remove the sched domains which do not contribute to scheduling. */
@@ -5576,8 +5623,8 @@ static void touch_cache(void *__cache, unsigned long __size)
5576/* 5623/*
5577 * Measure the cache-cost of one task migration. Returns in units of nsec. 5624 * Measure the cache-cost of one task migration. Returns in units of nsec.
5578 */ 5625 */
5579static unsigned long long measure_one(void *cache, unsigned long size, 5626static unsigned long long
5580 int source, int target) 5627measure_one(void *cache, unsigned long size, int source, int target)
5581{ 5628{
5582 cpumask_t mask, saved_mask; 5629 cpumask_t mask, saved_mask;
5583 unsigned long long t0, t1, t2, t3, cost; 5630 unsigned long long t0, t1, t2, t3, cost;
@@ -5927,9 +5974,9 @@ static int find_next_best_node(int node, unsigned long *used_nodes)
5927 */ 5974 */
5928static cpumask_t sched_domain_node_span(int node) 5975static cpumask_t sched_domain_node_span(int node)
5929{ 5976{
5930 int i;
5931 cpumask_t span, nodemask;
5932 DECLARE_BITMAP(used_nodes, MAX_NUMNODES); 5977 DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
5978 cpumask_t span, nodemask;
5979 int i;
5933 5980
5934 cpus_clear(span); 5981 cpus_clear(span);
5935 bitmap_zero(used_nodes, MAX_NUMNODES); 5982 bitmap_zero(used_nodes, MAX_NUMNODES);
@@ -5940,6 +5987,7 @@ static cpumask_t sched_domain_node_span(int node)
5940 5987
5941 for (i = 1; i < SD_NODES_PER_DOMAIN; i++) { 5988 for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
5942 int next_node = find_next_best_node(node, used_nodes); 5989 int next_node = find_next_best_node(node, used_nodes);
5990
5943 nodemask = node_to_cpumask(next_node); 5991 nodemask = node_to_cpumask(next_node);
5944 cpus_or(span, span, nodemask); 5992 cpus_or(span, span, nodemask);
5945 } 5993 }
@@ -5949,19 +5997,23 @@ static cpumask_t sched_domain_node_span(int node)
5949#endif 5997#endif
5950 5998
5951int sched_smt_power_savings = 0, sched_mc_power_savings = 0; 5999int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
6000
5952/* 6001/*
5953 * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we 6002 * SMT sched-domains:
5954 * can switch it on easily if needed.
5955 */ 6003 */
5956#ifdef CONFIG_SCHED_SMT 6004#ifdef CONFIG_SCHED_SMT
5957static DEFINE_PER_CPU(struct sched_domain, cpu_domains); 6005static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
5958static struct sched_group sched_group_cpus[NR_CPUS]; 6006static struct sched_group sched_group_cpus[NR_CPUS];
6007
5959static int cpu_to_cpu_group(int cpu) 6008static int cpu_to_cpu_group(int cpu)
5960{ 6009{
5961 return cpu; 6010 return cpu;
5962} 6011}
5963#endif 6012#endif
5964 6013
6014/*
6015 * multi-core sched-domains:
6016 */
5965#ifdef CONFIG_SCHED_MC 6017#ifdef CONFIG_SCHED_MC
5966static DEFINE_PER_CPU(struct sched_domain, core_domains); 6018static DEFINE_PER_CPU(struct sched_domain, core_domains);
5967static struct sched_group *sched_group_core_bycpu[NR_CPUS]; 6019static struct sched_group *sched_group_core_bycpu[NR_CPUS];
@@ -5981,9 +6033,10 @@ static int cpu_to_core_group(int cpu)
5981 6033
5982static DEFINE_PER_CPU(struct sched_domain, phys_domains); 6034static DEFINE_PER_CPU(struct sched_domain, phys_domains);
5983static struct sched_group *sched_group_phys_bycpu[NR_CPUS]; 6035static struct sched_group *sched_group_phys_bycpu[NR_CPUS];
6036
5984static int cpu_to_phys_group(int cpu) 6037static int cpu_to_phys_group(int cpu)
5985{ 6038{
5986#if defined(CONFIG_SCHED_MC) 6039#ifdef CONFIG_SCHED_MC
5987 cpumask_t mask = cpu_coregroup_map(cpu); 6040 cpumask_t mask = cpu_coregroup_map(cpu);
5988 return first_cpu(mask); 6041 return first_cpu(mask);
5989#elif defined(CONFIG_SCHED_SMT) 6042#elif defined(CONFIG_SCHED_SMT)
@@ -6529,6 +6582,7 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
6529int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) 6582int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
6530{ 6583{
6531 int err = 0; 6584 int err = 0;
6585
6532#ifdef CONFIG_SCHED_SMT 6586#ifdef CONFIG_SCHED_SMT
6533 if (smt_capable()) 6587 if (smt_capable())
6534 err = sysfs_create_file(&cls->kset.kobj, 6588 err = sysfs_create_file(&cls->kset.kobj,
@@ -6548,7 +6602,8 @@ static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page)
6548{ 6602{
6549 return sprintf(page, "%u\n", sched_mc_power_savings); 6603 return sprintf(page, "%u\n", sched_mc_power_savings);
6550} 6604}
6551static ssize_t sched_mc_power_savings_store(struct sys_device *dev, const char *buf, size_t count) 6605static ssize_t sched_mc_power_savings_store(struct sys_device *dev,
6606 const char *buf, size_t count)
6552{ 6607{
6553 return sched_power_savings_store(buf, count, 0); 6608 return sched_power_savings_store(buf, count, 0);
6554} 6609}
@@ -6561,7 +6616,8 @@ static ssize_t sched_smt_power_savings_show(struct sys_device *dev, char *page)
6561{ 6616{
6562 return sprintf(page, "%u\n", sched_smt_power_savings); 6617 return sprintf(page, "%u\n", sched_smt_power_savings);
6563} 6618}
6564static ssize_t sched_smt_power_savings_store(struct sys_device *dev, const char *buf, size_t count) 6619static ssize_t sched_smt_power_savings_store(struct sys_device *dev,
6620 const char *buf, size_t count)
6565{ 6621{
6566 return sched_power_savings_store(buf, count, 1); 6622 return sched_power_savings_store(buf, count, 1);
6567} 6623}
@@ -6623,6 +6679,7 @@ int in_sched_functions(unsigned long addr)
6623{ 6679{
6624 /* Linker adds these: start and end of __sched functions */ 6680 /* Linker adds these: start and end of __sched functions */
6625 extern char __sched_text_start[], __sched_text_end[]; 6681 extern char __sched_text_start[], __sched_text_end[];
6682
6626 return in_lock_functions(addr) || 6683 return in_lock_functions(addr) ||
6627 (addr >= (unsigned long)__sched_text_start 6684 (addr >= (unsigned long)__sched_text_start
6628 && addr < (unsigned long)__sched_text_end); 6685 && addr < (unsigned long)__sched_text_end);
@@ -6630,14 +6687,15 @@ int in_sched_functions(unsigned long addr)
6630 6687
6631void __init sched_init(void) 6688void __init sched_init(void)
6632{ 6689{
6633 runqueue_t *rq;
6634 int i, j, k; 6690 int i, j, k;
6635 6691
6636 for_each_possible_cpu(i) { 6692 for_each_possible_cpu(i) {
6637 prio_array_t *array; 6693 struct prio_array *array;
6694 struct rq *rq;
6638 6695
6639 rq = cpu_rq(i); 6696 rq = cpu_rq(i);
6640 spin_lock_init(&rq->lock); 6697 spin_lock_init(&rq->lock);
6698 lockdep_set_class(&rq->lock, &rq->rq_lock_key);
6641 rq->nr_running = 0; 6699 rq->nr_running = 0;
6642 rq->active = rq->arrays; 6700 rq->active = rq->arrays;
6643 rq->expired = rq->arrays + 1; 6701 rq->expired = rq->arrays + 1;
@@ -6684,7 +6742,7 @@ void __init sched_init(void)
6684#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP 6742#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
6685void __might_sleep(char *file, int line) 6743void __might_sleep(char *file, int line)
6686{ 6744{
6687#if defined(in_atomic) 6745#ifdef in_atomic
6688 static unsigned long prev_jiffy; /* ratelimiting */ 6746 static unsigned long prev_jiffy; /* ratelimiting */
6689 6747
6690 if ((in_atomic() || irqs_disabled()) && 6748 if ((in_atomic() || irqs_disabled()) &&
@@ -6706,10 +6764,10 @@ EXPORT_SYMBOL(__might_sleep);
6706#ifdef CONFIG_MAGIC_SYSRQ 6764#ifdef CONFIG_MAGIC_SYSRQ
6707void normalize_rt_tasks(void) 6765void normalize_rt_tasks(void)
6708{ 6766{
6767 struct prio_array *array;
6709 struct task_struct *p; 6768 struct task_struct *p;
6710 prio_array_t *array;
6711 unsigned long flags; 6769 unsigned long flags;
6712 runqueue_t *rq; 6770 struct rq *rq;
6713 6771
6714 read_lock_irq(&tasklist_lock); 6772 read_lock_irq(&tasklist_lock);
6715 for_each_process(p) { 6773 for_each_process(p) {
@@ -6753,7 +6811,7 @@ void normalize_rt_tasks(void)
6753 * 6811 *
6754 * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED! 6812 * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
6755 */ 6813 */
6756task_t *curr_task(int cpu) 6814struct task_struct *curr_task(int cpu)
6757{ 6815{
6758 return cpu_curr(cpu); 6816 return cpu_curr(cpu);
6759} 6817}
@@ -6773,7 +6831,7 @@ task_t *curr_task(int cpu)
6773 * 6831 *
6774 * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED! 6832 * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
6775 */ 6833 */
6776void set_curr_task(int cpu, task_t *p) 6834void set_curr_task(int cpu, struct task_struct *p)
6777{ 6835{
6778 cpu_curr(cpu) = p; 6836 cpu_curr(cpu) = p;
6779} 6837}
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 8f03e3b89b55..215541e26c1a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -62,6 +62,119 @@ static inline void wakeup_softirqd(void)
62} 62}
63 63
64/* 64/*
65 * This one is for softirq.c-internal use,
66 * where hardirqs are disabled legitimately:
67 */
68static void __local_bh_disable(unsigned long ip)
69{
70 unsigned long flags;
71
72 WARN_ON_ONCE(in_irq());
73
74 raw_local_irq_save(flags);
75 add_preempt_count(SOFTIRQ_OFFSET);
76 /*
77 * Were softirqs turned off above:
78 */
79 if (softirq_count() == SOFTIRQ_OFFSET)
80 trace_softirqs_off(ip);
81 raw_local_irq_restore(flags);
82}
83
84void local_bh_disable(void)
85{
86 __local_bh_disable((unsigned long)__builtin_return_address(0));
87}
88
89EXPORT_SYMBOL(local_bh_disable);
90
91void __local_bh_enable(void)
92{
93 WARN_ON_ONCE(in_irq());
94
95 /*
96 * softirqs should never be enabled by __local_bh_enable(),
97 * it always nests inside local_bh_enable() sections:
98 */
99 WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
100
101 sub_preempt_count(SOFTIRQ_OFFSET);
102}
103EXPORT_SYMBOL_GPL(__local_bh_enable);
104
105/*
106 * Special-case - softirqs can safely be enabled in
107 * cond_resched_softirq(), or by __do_softirq(),
108 * without processing still-pending softirqs:
109 */
110void _local_bh_enable(void)
111{
112 WARN_ON_ONCE(in_irq());
113 WARN_ON_ONCE(!irqs_disabled());
114
115 if (softirq_count() == SOFTIRQ_OFFSET)
116 trace_softirqs_on((unsigned long)__builtin_return_address(0));
117 sub_preempt_count(SOFTIRQ_OFFSET);
118}
119
120EXPORT_SYMBOL(_local_bh_enable);
121
122void local_bh_enable(void)
123{
124 unsigned long flags;
125
126 WARN_ON_ONCE(in_irq());
127 WARN_ON_ONCE(irqs_disabled());
128
129 local_irq_save(flags);
130 /*
131 * Are softirqs going to be turned on now:
132 */
133 if (softirq_count() == SOFTIRQ_OFFSET)
134 trace_softirqs_on((unsigned long)__builtin_return_address(0));
135 /*
136 * Keep preemption disabled until we are done with
137 * softirq processing:
138 */
139 sub_preempt_count(SOFTIRQ_OFFSET - 1);
140
141 if (unlikely(!in_interrupt() && local_softirq_pending()))
142 do_softirq();
143
144 dec_preempt_count();
145 local_irq_restore(flags);
146 preempt_check_resched();
147}
148EXPORT_SYMBOL(local_bh_enable);
149
150void local_bh_enable_ip(unsigned long ip)
151{
152 unsigned long flags;
153
154 WARN_ON_ONCE(in_irq());
155
156 local_irq_save(flags);
157 /*
158 * Are softirqs going to be turned on now:
159 */
160 if (softirq_count() == SOFTIRQ_OFFSET)
161 trace_softirqs_on(ip);
162 /*
163 * Keep preemption disabled until we are done with
164 * softirq processing:
165 */
166 sub_preempt_count(SOFTIRQ_OFFSET - 1);
167
168 if (unlikely(!in_interrupt() && local_softirq_pending()))
169 do_softirq();
170
171 dec_preempt_count();
172 local_irq_restore(flags);
173 preempt_check_resched();
174}
175EXPORT_SYMBOL(local_bh_enable_ip);
176
177/*
65 * We restart softirq processing MAX_SOFTIRQ_RESTART times, 178 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
66 * and we fall back to softirqd after that. 179 * and we fall back to softirqd after that.
67 * 180 *
@@ -80,8 +193,11 @@ asmlinkage void __do_softirq(void)
80 int cpu; 193 int cpu;
81 194
82 pending = local_softirq_pending(); 195 pending = local_softirq_pending();
196 account_system_vtime(current);
197
198 __local_bh_disable((unsigned long)__builtin_return_address(0));
199 trace_softirq_enter();
83 200
84 local_bh_disable();
85 cpu = smp_processor_id(); 201 cpu = smp_processor_id();
86restart: 202restart:
87 /* Reset the pending bitmask before enabling irqs */ 203 /* Reset the pending bitmask before enabling irqs */
@@ -109,7 +225,10 @@ restart:
109 if (pending) 225 if (pending)
110 wakeup_softirqd(); 226 wakeup_softirqd();
111 227
112 __local_bh_enable(); 228 trace_softirq_exit();
229
230 account_system_vtime(current);
231 _local_bh_enable();
113} 232}
114 233
115#ifndef __ARCH_HAS_DO_SOFTIRQ 234#ifndef __ARCH_HAS_DO_SOFTIRQ
@@ -136,23 +255,6 @@ EXPORT_SYMBOL(do_softirq);
136 255
137#endif 256#endif
138 257
139void local_bh_enable(void)
140{
141 WARN_ON(irqs_disabled());
142 /*
143 * Keep preemption disabled until we are done with
144 * softirq processing:
145 */
146 sub_preempt_count(SOFTIRQ_OFFSET - 1);
147
148 if (unlikely(!in_interrupt() && local_softirq_pending()))
149 do_softirq();
150
151 dec_preempt_count();
152 preempt_check_resched();
153}
154EXPORT_SYMBOL(local_bh_enable);
155
156#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED 258#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
157# define invoke_softirq() __do_softirq() 259# define invoke_softirq() __do_softirq()
158#else 260#else
@@ -165,6 +267,7 @@ EXPORT_SYMBOL(local_bh_enable);
165void irq_exit(void) 267void irq_exit(void)
166{ 268{
167 account_system_vtime(current); 269 account_system_vtime(current);
270 trace_hardirq_exit();
168 sub_preempt_count(IRQ_EXIT_OFFSET); 271 sub_preempt_count(IRQ_EXIT_OFFSET);
169 if (!in_interrupt() && local_softirq_pending()) 272 if (!in_interrupt() && local_softirq_pending())
170 invoke_softirq(); 273 invoke_softirq();
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index b31e54eadf56..bfd6ad9c0330 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -13,6 +13,7 @@
13#include <linux/preempt.h> 13#include <linux/preempt.h>
14#include <linux/spinlock.h> 14#include <linux/spinlock.h>
15#include <linux/interrupt.h> 15#include <linux/interrupt.h>
16#include <linux/debug_locks.h>
16#include <linux/module.h> 17#include <linux/module.h>
17 18
18/* 19/*
@@ -29,8 +30,10 @@ EXPORT_SYMBOL(generic__raw_read_trylock);
29int __lockfunc _spin_trylock(spinlock_t *lock) 30int __lockfunc _spin_trylock(spinlock_t *lock)
30{ 31{
31 preempt_disable(); 32 preempt_disable();
32 if (_raw_spin_trylock(lock)) 33 if (_raw_spin_trylock(lock)) {
34 spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
33 return 1; 35 return 1;
36 }
34 37
35 preempt_enable(); 38 preempt_enable();
36 return 0; 39 return 0;
@@ -40,8 +43,10 @@ EXPORT_SYMBOL(_spin_trylock);
40int __lockfunc _read_trylock(rwlock_t *lock) 43int __lockfunc _read_trylock(rwlock_t *lock)
41{ 44{
42 preempt_disable(); 45 preempt_disable();
43 if (_raw_read_trylock(lock)) 46 if (_raw_read_trylock(lock)) {
47 rwlock_acquire_read(&lock->dep_map, 0, 1, _RET_IP_);
44 return 1; 48 return 1;
49 }
45 50
46 preempt_enable(); 51 preempt_enable();
47 return 0; 52 return 0;
@@ -51,19 +56,28 @@ EXPORT_SYMBOL(_read_trylock);
51int __lockfunc _write_trylock(rwlock_t *lock) 56int __lockfunc _write_trylock(rwlock_t *lock)
52{ 57{
53 preempt_disable(); 58 preempt_disable();
54 if (_raw_write_trylock(lock)) 59 if (_raw_write_trylock(lock)) {
60 rwlock_acquire(&lock->dep_map, 0, 1, _RET_IP_);
55 return 1; 61 return 1;
62 }
56 63
57 preempt_enable(); 64 preempt_enable();
58 return 0; 65 return 0;
59} 66}
60EXPORT_SYMBOL(_write_trylock); 67EXPORT_SYMBOL(_write_trylock);
61 68
62#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) 69/*
70 * If lockdep is enabled then we use the non-preemption spin-ops
71 * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
72 * not re-enabled during lock-acquire (which the preempt-spin-ops do):
73 */
74#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) || \
75 defined(CONFIG_PROVE_LOCKING)
63 76
64void __lockfunc _read_lock(rwlock_t *lock) 77void __lockfunc _read_lock(rwlock_t *lock)
65{ 78{
66 preempt_disable(); 79 preempt_disable();
80 rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
67 _raw_read_lock(lock); 81 _raw_read_lock(lock);
68} 82}
69EXPORT_SYMBOL(_read_lock); 83EXPORT_SYMBOL(_read_lock);
@@ -74,7 +88,17 @@ unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
74 88
75 local_irq_save(flags); 89 local_irq_save(flags);
76 preempt_disable(); 90 preempt_disable();
91 spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
92 /*
93 * On lockdep we dont want the hand-coded irq-enable of
94 * _raw_spin_lock_flags() code, because lockdep assumes
95 * that interrupts are not re-enabled during lock-acquire:
96 */
97#ifdef CONFIG_PROVE_LOCKING
98 _raw_spin_lock(lock);
99#else
77 _raw_spin_lock_flags(lock, &flags); 100 _raw_spin_lock_flags(lock, &flags);
101#endif
78 return flags; 102 return flags;
79} 103}
80EXPORT_SYMBOL(_spin_lock_irqsave); 104EXPORT_SYMBOL(_spin_lock_irqsave);
@@ -83,6 +107,7 @@ void __lockfunc _spin_lock_irq(spinlock_t *lock)
83{ 107{
84 local_irq_disable(); 108 local_irq_disable();
85 preempt_disable(); 109 preempt_disable();
110 spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
86 _raw_spin_lock(lock); 111 _raw_spin_lock(lock);
87} 112}
88EXPORT_SYMBOL(_spin_lock_irq); 113EXPORT_SYMBOL(_spin_lock_irq);
@@ -91,6 +116,7 @@ void __lockfunc _spin_lock_bh(spinlock_t *lock)
91{ 116{
92 local_bh_disable(); 117 local_bh_disable();
93 preempt_disable(); 118 preempt_disable();
119 spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
94 _raw_spin_lock(lock); 120 _raw_spin_lock(lock);
95} 121}
96EXPORT_SYMBOL(_spin_lock_bh); 122EXPORT_SYMBOL(_spin_lock_bh);
@@ -101,6 +127,7 @@ unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
101 127
102 local_irq_save(flags); 128 local_irq_save(flags);
103 preempt_disable(); 129 preempt_disable();
130 rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
104 _raw_read_lock(lock); 131 _raw_read_lock(lock);
105 return flags; 132 return flags;
106} 133}
@@ -110,6 +137,7 @@ void __lockfunc _read_lock_irq(rwlock_t *lock)
110{ 137{
111 local_irq_disable(); 138 local_irq_disable();
112 preempt_disable(); 139 preempt_disable();
140 rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
113 _raw_read_lock(lock); 141 _raw_read_lock(lock);
114} 142}
115EXPORT_SYMBOL(_read_lock_irq); 143EXPORT_SYMBOL(_read_lock_irq);
@@ -118,6 +146,7 @@ void __lockfunc _read_lock_bh(rwlock_t *lock)
118{ 146{
119 local_bh_disable(); 147 local_bh_disable();
120 preempt_disable(); 148 preempt_disable();
149 rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
121 _raw_read_lock(lock); 150 _raw_read_lock(lock);
122} 151}
123EXPORT_SYMBOL(_read_lock_bh); 152EXPORT_SYMBOL(_read_lock_bh);
@@ -128,6 +157,7 @@ unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
128 157
129 local_irq_save(flags); 158 local_irq_save(flags);
130 preempt_disable(); 159 preempt_disable();
160 rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
131 _raw_write_lock(lock); 161 _raw_write_lock(lock);
132 return flags; 162 return flags;
133} 163}
@@ -137,6 +167,7 @@ void __lockfunc _write_lock_irq(rwlock_t *lock)
137{ 167{
138 local_irq_disable(); 168 local_irq_disable();
139 preempt_disable(); 169 preempt_disable();
170 rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
140 _raw_write_lock(lock); 171 _raw_write_lock(lock);
141} 172}
142EXPORT_SYMBOL(_write_lock_irq); 173EXPORT_SYMBOL(_write_lock_irq);
@@ -145,6 +176,7 @@ void __lockfunc _write_lock_bh(rwlock_t *lock)
145{ 176{
146 local_bh_disable(); 177 local_bh_disable();
147 preempt_disable(); 178 preempt_disable();
179 rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
148 _raw_write_lock(lock); 180 _raw_write_lock(lock);
149} 181}
150EXPORT_SYMBOL(_write_lock_bh); 182EXPORT_SYMBOL(_write_lock_bh);
@@ -152,6 +184,7 @@ EXPORT_SYMBOL(_write_lock_bh);
152void __lockfunc _spin_lock(spinlock_t *lock) 184void __lockfunc _spin_lock(spinlock_t *lock)
153{ 185{
154 preempt_disable(); 186 preempt_disable();
187 spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
155 _raw_spin_lock(lock); 188 _raw_spin_lock(lock);
156} 189}
157 190
@@ -160,6 +193,7 @@ EXPORT_SYMBOL(_spin_lock);
160void __lockfunc _write_lock(rwlock_t *lock) 193void __lockfunc _write_lock(rwlock_t *lock)
161{ 194{
162 preempt_disable(); 195 preempt_disable();
196 rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
163 _raw_write_lock(lock); 197 _raw_write_lock(lock);
164} 198}
165 199
@@ -255,8 +289,22 @@ BUILD_LOCK_OPS(write, rwlock);
255 289
256#endif /* CONFIG_PREEMPT */ 290#endif /* CONFIG_PREEMPT */
257 291
292#ifdef CONFIG_DEBUG_LOCK_ALLOC
293
294void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
295{
296 preempt_disable();
297 spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
298 _raw_spin_lock(lock);
299}
300
301EXPORT_SYMBOL(_spin_lock_nested);
302
303#endif
304
258void __lockfunc _spin_unlock(spinlock_t *lock) 305void __lockfunc _spin_unlock(spinlock_t *lock)
259{ 306{
307 spin_release(&lock->dep_map, 1, _RET_IP_);
260 _raw_spin_unlock(lock); 308 _raw_spin_unlock(lock);
261 preempt_enable(); 309 preempt_enable();
262} 310}
@@ -264,6 +312,7 @@ EXPORT_SYMBOL(_spin_unlock);
264 312
265void __lockfunc _write_unlock(rwlock_t *lock) 313void __lockfunc _write_unlock(rwlock_t *lock)
266{ 314{
315 rwlock_release(&lock->dep_map, 1, _RET_IP_);
267 _raw_write_unlock(lock); 316 _raw_write_unlock(lock);
268 preempt_enable(); 317 preempt_enable();
269} 318}
@@ -271,6 +320,7 @@ EXPORT_SYMBOL(_write_unlock);
271 320
272void __lockfunc _read_unlock(rwlock_t *lock) 321void __lockfunc _read_unlock(rwlock_t *lock)
273{ 322{
323 rwlock_release(&lock->dep_map, 1, _RET_IP_);
274 _raw_read_unlock(lock); 324 _raw_read_unlock(lock);
275 preempt_enable(); 325 preempt_enable();
276} 326}
@@ -278,6 +328,7 @@ EXPORT_SYMBOL(_read_unlock);
278 328
279void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) 329void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
280{ 330{
331 spin_release(&lock->dep_map, 1, _RET_IP_);
281 _raw_spin_unlock(lock); 332 _raw_spin_unlock(lock);
282 local_irq_restore(flags); 333 local_irq_restore(flags);
283 preempt_enable(); 334 preempt_enable();
@@ -286,6 +337,7 @@ EXPORT_SYMBOL(_spin_unlock_irqrestore);
286 337
287void __lockfunc _spin_unlock_irq(spinlock_t *lock) 338void __lockfunc _spin_unlock_irq(spinlock_t *lock)
288{ 339{
340 spin_release(&lock->dep_map, 1, _RET_IP_);
289 _raw_spin_unlock(lock); 341 _raw_spin_unlock(lock);
290 local_irq_enable(); 342 local_irq_enable();
291 preempt_enable(); 343 preempt_enable();
@@ -294,14 +346,16 @@ EXPORT_SYMBOL(_spin_unlock_irq);
294 346
295void __lockfunc _spin_unlock_bh(spinlock_t *lock) 347void __lockfunc _spin_unlock_bh(spinlock_t *lock)
296{ 348{
349 spin_release(&lock->dep_map, 1, _RET_IP_);
297 _raw_spin_unlock(lock); 350 _raw_spin_unlock(lock);
298 preempt_enable_no_resched(); 351 preempt_enable_no_resched();
299 local_bh_enable(); 352 local_bh_enable_ip((unsigned long)__builtin_return_address(0));
300} 353}
301EXPORT_SYMBOL(_spin_unlock_bh); 354EXPORT_SYMBOL(_spin_unlock_bh);
302 355
303void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) 356void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
304{ 357{
358 rwlock_release(&lock->dep_map, 1, _RET_IP_);
305 _raw_read_unlock(lock); 359 _raw_read_unlock(lock);
306 local_irq_restore(flags); 360 local_irq_restore(flags);
307 preempt_enable(); 361 preempt_enable();
@@ -310,6 +364,7 @@ EXPORT_SYMBOL(_read_unlock_irqrestore);
310 364
311void __lockfunc _read_unlock_irq(rwlock_t *lock) 365void __lockfunc _read_unlock_irq(rwlock_t *lock)
312{ 366{
367 rwlock_release(&lock->dep_map, 1, _RET_IP_);
313 _raw_read_unlock(lock); 368 _raw_read_unlock(lock);
314 local_irq_enable(); 369 local_irq_enable();
315 preempt_enable(); 370 preempt_enable();
@@ -318,14 +373,16 @@ EXPORT_SYMBOL(_read_unlock_irq);
318 373
319void __lockfunc _read_unlock_bh(rwlock_t *lock) 374void __lockfunc _read_unlock_bh(rwlock_t *lock)
320{ 375{
376 rwlock_release(&lock->dep_map, 1, _RET_IP_);
321 _raw_read_unlock(lock); 377 _raw_read_unlock(lock);
322 preempt_enable_no_resched(); 378 preempt_enable_no_resched();
323 local_bh_enable(); 379 local_bh_enable_ip((unsigned long)__builtin_return_address(0));
324} 380}
325EXPORT_SYMBOL(_read_unlock_bh); 381EXPORT_SYMBOL(_read_unlock_bh);
326 382
327void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) 383void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
328{ 384{
385 rwlock_release(&lock->dep_map, 1, _RET_IP_);
329 _raw_write_unlock(lock); 386 _raw_write_unlock(lock);
330 local_irq_restore(flags); 387 local_irq_restore(flags);
331 preempt_enable(); 388 preempt_enable();
@@ -334,6 +391,7 @@ EXPORT_SYMBOL(_write_unlock_irqrestore);
334 391
335void __lockfunc _write_unlock_irq(rwlock_t *lock) 392void __lockfunc _write_unlock_irq(rwlock_t *lock)
336{ 393{
394 rwlock_release(&lock->dep_map, 1, _RET_IP_);
337 _raw_write_unlock(lock); 395 _raw_write_unlock(lock);
338 local_irq_enable(); 396 local_irq_enable();
339 preempt_enable(); 397 preempt_enable();
@@ -342,9 +400,10 @@ EXPORT_SYMBOL(_write_unlock_irq);
342 400
343void __lockfunc _write_unlock_bh(rwlock_t *lock) 401void __lockfunc _write_unlock_bh(rwlock_t *lock)
344{ 402{
403 rwlock_release(&lock->dep_map, 1, _RET_IP_);
345 _raw_write_unlock(lock); 404 _raw_write_unlock(lock);
346 preempt_enable_no_resched(); 405 preempt_enable_no_resched();
347 local_bh_enable(); 406 local_bh_enable_ip((unsigned long)__builtin_return_address(0));
348} 407}
349EXPORT_SYMBOL(_write_unlock_bh); 408EXPORT_SYMBOL(_write_unlock_bh);
350 409
@@ -352,11 +411,13 @@ int __lockfunc _spin_trylock_bh(spinlock_t *lock)
352{ 411{
353 local_bh_disable(); 412 local_bh_disable();
354 preempt_disable(); 413 preempt_disable();
355 if (_raw_spin_trylock(lock)) 414 if (_raw_spin_trylock(lock)) {
415 spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
356 return 1; 416 return 1;
417 }
357 418
358 preempt_enable_no_resched(); 419 preempt_enable_no_resched();
359 local_bh_enable(); 420 local_bh_enable_ip((unsigned long)__builtin_return_address(0));
360 return 0; 421 return 0;
361} 422}
362EXPORT_SYMBOL(_spin_trylock_bh); 423EXPORT_SYMBOL(_spin_trylock_bh);
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
new file mode 100644
index 000000000000..b71816e47a30
--- /dev/null
+++ b/kernel/stacktrace.c
@@ -0,0 +1,24 @@
1/*
2 * kernel/stacktrace.c
3 *
4 * Stack trace management functions
5 *
6 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7 */
8#include <linux/sched.h>
9#include <linux/kallsyms.h>
10#include <linux/stacktrace.h>
11
12void print_stack_trace(struct stack_trace *trace, int spaces)
13{
14 int i, j;
15
16 for (i = 0; i < trace->nr_entries; i++) {
17 unsigned long ip = trace->entries[i];
18
19 for (j = 0; j < spaces + 1; j++)
20 printk(" ");
21 print_ip_sym(ip);
22 }
23}
24
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 99a58f279077..362a0cc37138 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -932,6 +932,17 @@ static ctl_table vm_table[] = {
932 .strategy = &sysctl_intvec, 932 .strategy = &sysctl_intvec,
933 .extra1 = &zero, 933 .extra1 = &zero,
934 }, 934 },
935 {
936 .ctl_name = VM_MIN_UNMAPPED,
937 .procname = "min_unmapped_ratio",
938 .data = &sysctl_min_unmapped_ratio,
939 .maxlen = sizeof(sysctl_min_unmapped_ratio),
940 .mode = 0644,
941 .proc_handler = &sysctl_min_unmapped_ratio_sysctl_handler,
942 .strategy = &sysctl_intvec,
943 .extra1 = &zero,
944 .extra2 = &one_hundred,
945 },
935#endif 946#endif
936#ifdef CONFIG_X86_32 947#ifdef CONFIG_X86_32
937 { 948 {
diff --git a/kernel/timer.c b/kernel/timer.c
index 5a8960253063..396a3c024c2c 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1208,7 +1208,7 @@ unsigned long wall_jiffies = INITIAL_JIFFIES;
1208 * playing with xtime and avenrun. 1208 * playing with xtime and avenrun.
1209 */ 1209 */
1210#ifndef ARCH_HAVE_XTIME_LOCK 1210#ifndef ARCH_HAVE_XTIME_LOCK
1211seqlock_t xtime_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED; 1211__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
1212 1212
1213EXPORT_SYMBOL(xtime_lock); 1213EXPORT_SYMBOL(xtime_lock);
1214#endif 1214#endif
@@ -1368,7 +1368,7 @@ asmlinkage long sys_getegid(void)
1368 1368
1369static void process_timeout(unsigned long __data) 1369static void process_timeout(unsigned long __data)
1370{ 1370{
1371 wake_up_process((task_t *)__data); 1371 wake_up_process((struct task_struct *)__data);
1372} 1372}
1373 1373
1374/** 1374/**
@@ -1559,6 +1559,13 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info)
1559 return 0; 1559 return 0;
1560} 1560}
1561 1561
1562/*
1563 * lockdep: we want to track each per-CPU base as a separate lock-class,
1564 * but timer-bases are kmalloc()-ed, so we need to attach separate
1565 * keys to them:
1566 */
1567static struct lock_class_key base_lock_keys[NR_CPUS];
1568
1562static int __devinit init_timers_cpu(int cpu) 1569static int __devinit init_timers_cpu(int cpu)
1563{ 1570{
1564 int j; 1571 int j;
@@ -1594,6 +1601,8 @@ static int __devinit init_timers_cpu(int cpu)
1594 } 1601 }
1595 1602
1596 spin_lock_init(&base->lock); 1603 spin_lock_init(&base->lock);
1604 lockdep_set_class(&base->lock, base_lock_keys + cpu);
1605
1597 for (j = 0; j < TVN_SIZE; j++) { 1606 for (j = 0; j < TVN_SIZE; j++) {
1598 INIT_LIST_HEAD(base->tv5.vec + j); 1607 INIT_LIST_HEAD(base->tv5.vec + j);
1599 INIT_LIST_HEAD(base->tv4.vec + j); 1608 INIT_LIST_HEAD(base->tv4.vec + j);
diff --git a/kernel/wait.c b/kernel/wait.c
index 5985d866531f..a1d57aeb7f75 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -10,6 +10,10 @@
10#include <linux/wait.h> 10#include <linux/wait.h>
11#include <linux/hash.h> 11#include <linux/hash.h>
12 12
13struct lock_class_key waitqueue_lock_key;
14
15EXPORT_SYMBOL(waitqueue_lock_key);
16
13void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) 17void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
14{ 18{
15 unsigned long flags; 19 unsigned long flags;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 59f0b42bd89e..90d2c6001659 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -51,7 +51,7 @@ struct cpu_workqueue_struct {
51 wait_queue_head_t work_done; 51 wait_queue_head_t work_done;
52 52
53 struct workqueue_struct *wq; 53 struct workqueue_struct *wq;
54 task_t *thread; 54 struct task_struct *thread;
55 55
56 int run_depth; /* Detect run_workqueue() recursion depth */ 56 int run_depth; /* Detect run_workqueue() recursion depth */
57} ____cacheline_aligned; 57} ____cacheline_aligned;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e4fcbd12cf6e..e5889b1a33ff 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -48,7 +48,7 @@ config DEBUG_KERNEL
48config LOG_BUF_SHIFT 48config LOG_BUF_SHIFT
49 int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" if DEBUG_KERNEL 49 int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" if DEBUG_KERNEL
50 range 12 21 50 range 12 21
51 default 17 if S390 51 default 17 if S390 || LOCKDEP
52 default 16 if X86_NUMAQ || IA64 52 default 16 if X86_NUMAQ || IA64
53 default 15 if SMP 53 default 15 if SMP
54 default 14 54 default 14
@@ -107,7 +107,7 @@ config DEBUG_SLAB_LEAK
107 107
108config DEBUG_PREEMPT 108config DEBUG_PREEMPT
109 bool "Debug preemptible kernel" 109 bool "Debug preemptible kernel"
110 depends on DEBUG_KERNEL && PREEMPT 110 depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT
111 default y 111 default y
112 help 112 help
113 If you say Y here then the kernel will use a debug variant of the 113 If you say Y here then the kernel will use a debug variant of the
@@ -115,14 +115,6 @@ config DEBUG_PREEMPT
115 if kernel code uses it in a preemption-unsafe way. Also, the kernel 115 if kernel code uses it in a preemption-unsafe way. Also, the kernel
116 will detect preemption count underflows. 116 will detect preemption count underflows.
117 117
118config DEBUG_MUTEXES
119 bool "Mutex debugging, deadlock detection"
120 default n
121 depends on DEBUG_KERNEL
122 help
123 This allows mutex semantics violations and mutex related deadlocks
124 (lockups) to be detected and reported automatically.
125
126config DEBUG_RT_MUTEXES 118config DEBUG_RT_MUTEXES
127 bool "RT Mutex debugging, deadlock detection" 119 bool "RT Mutex debugging, deadlock detection"
128 depends on DEBUG_KERNEL && RT_MUTEXES 120 depends on DEBUG_KERNEL && RT_MUTEXES
@@ -142,7 +134,7 @@ config RT_MUTEX_TESTER
142 This option enables a rt-mutex tester. 134 This option enables a rt-mutex tester.
143 135
144config DEBUG_SPINLOCK 136config DEBUG_SPINLOCK
145 bool "Spinlock debugging" 137 bool "Spinlock and rw-lock debugging: basic checks"
146 depends on DEBUG_KERNEL 138 depends on DEBUG_KERNEL
147 help 139 help
148 Say Y here and build SMP to catch missing spinlock initialization 140 Say Y here and build SMP to catch missing spinlock initialization
@@ -150,13 +142,122 @@ config DEBUG_SPINLOCK
150 best used in conjunction with the NMI watchdog so that spinlock 142 best used in conjunction with the NMI watchdog so that spinlock
151 deadlocks are also debuggable. 143 deadlocks are also debuggable.
152 144
145config DEBUG_MUTEXES
146 bool "Mutex debugging: basic checks"
147 depends on DEBUG_KERNEL
148 help
149 This feature allows mutex semantics violations to be detected and
150 reported.
151
152config DEBUG_RWSEMS
153 bool "RW-sem debugging: basic checks"
154 depends on DEBUG_KERNEL
155 help
156 This feature allows read-write semaphore semantics violations to
157 be detected and reported.
158
159config DEBUG_LOCK_ALLOC
160 bool "Lock debugging: detect incorrect freeing of live locks"
161 depends on TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
162 select DEBUG_SPINLOCK
163 select DEBUG_MUTEXES
164 select DEBUG_RWSEMS
165 select LOCKDEP
166 help
167 This feature will check whether any held lock (spinlock, rwlock,
168 mutex or rwsem) is incorrectly freed by the kernel, via any of the
169 memory-freeing routines (kfree(), kmem_cache_free(), free_pages(),
170 vfree(), etc.), whether a live lock is incorrectly reinitialized via
171 spin_lock_init()/mutex_init()/etc., or whether there is any lock
172 held during task exit.
173
174config PROVE_LOCKING
175 bool "Lock debugging: prove locking correctness"
176 depends on TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
177 select LOCKDEP
178 select DEBUG_SPINLOCK
179 select DEBUG_MUTEXES
180 select DEBUG_RWSEMS
181 select DEBUG_LOCK_ALLOC
182 default n
183 help
184 This feature enables the kernel to prove that all locking
185 that occurs in the kernel runtime is mathematically
186 correct: that under no circumstance could an arbitrary (and
187 not yet triggered) combination of observed locking
188 sequences (on an arbitrary number of CPUs, running an
189 arbitrary number of tasks and interrupt contexts) cause a
190 deadlock.
191
192 In short, this feature enables the kernel to report locking
193 related deadlocks before they actually occur.
194
195 The proof does not depend on how hard and complex a
196 deadlock scenario would be to trigger: how many
197 participant CPUs, tasks and irq-contexts would be needed
198 for it to trigger. The proof also does not depend on
199 timing: if a race and a resulting deadlock is possible
200 theoretically (no matter how unlikely the race scenario
201 is), it will be proven so and will immediately be
202 reported by the kernel (once the event is observed that
203 makes the deadlock theoretically possible).
204
205 If a deadlock is impossible (i.e. the locking rules, as
206 observed by the kernel, are mathematically correct), the
207 kernel reports nothing.
208
209 NOTE: this feature can also be enabled for rwlocks, mutexes
210 and rwsems - in which case all dependencies between these
211 different locking variants are observed and mapped too, and
212 the proof of observed correctness is also maintained for an
213 arbitrary combination of these separate locking variants.
214
215 For more details, see Documentation/lockdep-design.txt.
216
217config LOCKDEP
218 bool
219 depends on TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
220 select STACKTRACE
221 select FRAME_POINTER
222 select KALLSYMS
223 select KALLSYMS_ALL
224
225config DEBUG_LOCKDEP
226 bool "Lock dependency engine debugging"
227 depends on LOCKDEP
228 help
229 If you say Y here, the lock dependency engine will do
230 additional runtime checks to debug itself, at the price
231 of more runtime overhead.
232
233config TRACE_IRQFLAGS
234 bool
235 default y
236 depends on TRACE_IRQFLAGS_SUPPORT
237 depends on PROVE_LOCKING
238
153config DEBUG_SPINLOCK_SLEEP 239config DEBUG_SPINLOCK_SLEEP
154 bool "Sleep-inside-spinlock checking" 240 bool "Spinlock debugging: sleep-inside-spinlock checking"
155 depends on DEBUG_KERNEL 241 depends on DEBUG_KERNEL
156 help 242 help
157 If you say Y here, various routines which may sleep will become very 243 If you say Y here, various routines which may sleep will become very
158 noisy if they are called with a spinlock held. 244 noisy if they are called with a spinlock held.
159 245
246config DEBUG_LOCKING_API_SELFTESTS
247 bool "Locking API boot-time self-tests"
248 depends on DEBUG_KERNEL
249 help
250 Say Y here if you want the kernel to run a short self-test during
251 bootup. The self-test checks whether common types of locking bugs
252 are detected by debugging mechanisms or not. (if you disable
253 lock debugging then those bugs wont be detected of course.)
254 The following locking APIs are covered: spinlocks, rwlocks,
255 mutexes and rwsems.
256
257config STACKTRACE
258 bool
259 depends on STACKTRACE_SUPPORT
260
160config DEBUG_KOBJECT 261config DEBUG_KOBJECT
161 bool "kobject debugging" 262 bool "kobject debugging"
162 depends on DEBUG_KERNEL 263 depends on DEBUG_KERNEL
@@ -212,7 +313,7 @@ config DEBUG_VM
212 313
213config FRAME_POINTER 314config FRAME_POINTER
214 bool "Compile the kernel with frame pointers" 315 bool "Compile the kernel with frame pointers"
215 depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML) 316 depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML || S390)
216 default y if DEBUG_INFO && UML 317 default y if DEBUG_INFO && UML
217 help 318 help
218 If you say Y here the resulting kernel image will be slightly larger 319 If you say Y here the resulting kernel image will be slightly larger
diff --git a/lib/Makefile b/lib/Makefile
index 10c13c9d7824..be9719ae82d0 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -11,13 +11,14 @@ lib-$(CONFIG_SMP) += cpumask.o
11 11
12lib-y += kobject.o kref.o kobject_uevent.o klist.o 12lib-y += kobject.o kref.o kobject_uevent.o klist.o
13 13
14obj-y += sort.o parser.o halfmd4.o iomap_copy.o 14obj-y += sort.o parser.o halfmd4.o iomap_copy.o debug_locks.o
15 15
16ifeq ($(CONFIG_DEBUG_KOBJECT),y) 16ifeq ($(CONFIG_DEBUG_KOBJECT),y)
17CFLAGS_kobject.o += -DDEBUG 17CFLAGS_kobject.o += -DDEBUG
18CFLAGS_kobject_uevent.o += -DDEBUG 18CFLAGS_kobject_uevent.o += -DDEBUG
19endif 19endif
20 20
21obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
21obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o 22obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
22lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o 23lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
23lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o 24lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
diff --git a/lib/debug_locks.c b/lib/debug_locks.c
new file mode 100644
index 000000000000..0ef01d14727c
--- /dev/null
+++ b/lib/debug_locks.c
@@ -0,0 +1,45 @@
1/*
2 * lib/debug_locks.c
3 *
4 * Generic place for common debugging facilities for various locks:
5 * spinlocks, rwlocks, mutexes and rwsems.
6 *
7 * Started by Ingo Molnar:
8 *
9 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
10 */
11#include <linux/rwsem.h>
12#include <linux/mutex.h>
13#include <linux/module.h>
14#include <linux/spinlock.h>
15#include <linux/debug_locks.h>
16
17/*
18 * We want to turn all lock-debugging facilities on/off at once,
19 * via a global flag. The reason is that once a single bug has been
20 * detected and reported, there might be cascade of followup bugs
21 * that would just muddy the log. So we report the first one and
22 * shut up after that.
23 */
24int debug_locks = 1;
25
26/*
27 * The locking-testsuite uses <debug_locks_silent> to get a
28 * 'silent failure': nothing is printed to the console when
29 * a locking bug is detected.
30 */
31int debug_locks_silent;
32
33/*
34 * Generic 'turn off all lock debugging' function:
35 */
36int debug_locks_off(void)
37{
38 if (xchg(&debug_locks, 0)) {
39 if (!debug_locks_silent) {
40 console_verbose();
41 return 1;
42 }
43 }
44 return 0;
45}
diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
index e713e86811ae..e0fdfddb406e 100644
--- a/lib/kernel_lock.c
+++ b/lib/kernel_lock.c
@@ -177,7 +177,12 @@ static inline void __lock_kernel(void)
177 177
178static inline void __unlock_kernel(void) 178static inline void __unlock_kernel(void)
179{ 179{
180 spin_unlock(&kernel_flag); 180 /*
181 * the BKL is not covered by lockdep, so we open-code the
182 * unlocking sequence (and thus avoid the dep-chain ops):
183 */
184 _raw_spin_unlock(&kernel_flag);
185 preempt_enable();
181} 186}
182 187
183/* 188/*
diff --git a/lib/locking-selftest-hardirq.h b/lib/locking-selftest-hardirq.h
new file mode 100644
index 000000000000..10d4a150b259
--- /dev/null
+++ b/lib/locking-selftest-hardirq.h
@@ -0,0 +1,9 @@
1#undef IRQ_DISABLE
2#undef IRQ_ENABLE
3#undef IRQ_ENTER
4#undef IRQ_EXIT
5
6#define IRQ_ENABLE HARDIRQ_ENABLE
7#define IRQ_DISABLE HARDIRQ_DISABLE
8#define IRQ_ENTER HARDIRQ_ENTER
9#define IRQ_EXIT HARDIRQ_EXIT
diff --git a/lib/locking-selftest-mutex.h b/lib/locking-selftest-mutex.h
new file mode 100644
index 000000000000..68601b6f584b
--- /dev/null
+++ b/lib/locking-selftest-mutex.h
@@ -0,0 +1,11 @@
1#undef LOCK
2#define LOCK ML
3
4#undef UNLOCK
5#define UNLOCK MU
6
7#undef RLOCK
8#undef WLOCK
9
10#undef INIT
11#define INIT MI
diff --git a/lib/locking-selftest-rlock-hardirq.h b/lib/locking-selftest-rlock-hardirq.h
new file mode 100644
index 000000000000..9f517ebcb786
--- /dev/null
+++ b/lib/locking-selftest-rlock-hardirq.h
@@ -0,0 +1,2 @@
1#include "locking-selftest-rlock.h"
2#include "locking-selftest-hardirq.h"
diff --git a/lib/locking-selftest-rlock-softirq.h b/lib/locking-selftest-rlock-softirq.h
new file mode 100644
index 000000000000..981455db7ff0
--- /dev/null
+++ b/lib/locking-selftest-rlock-softirq.h
@@ -0,0 +1,2 @@
1#include "locking-selftest-rlock.h"
2#include "locking-selftest-softirq.h"
diff --git a/lib/locking-selftest-rlock.h b/lib/locking-selftest-rlock.h
new file mode 100644
index 000000000000..6789044f4d0e
--- /dev/null
+++ b/lib/locking-selftest-rlock.h
@@ -0,0 +1,14 @@
1#undef LOCK
2#define LOCK RL
3
4#undef UNLOCK
5#define UNLOCK RU
6
7#undef RLOCK
8#define RLOCK RL
9
10#undef WLOCK
11#define WLOCK WL
12
13#undef INIT
14#define INIT RWI
diff --git a/lib/locking-selftest-rsem.h b/lib/locking-selftest-rsem.h
new file mode 100644
index 000000000000..62da886680c7
--- /dev/null
+++ b/lib/locking-selftest-rsem.h
@@ -0,0 +1,14 @@
1#undef LOCK
2#define LOCK RSL
3
4#undef UNLOCK
5#define UNLOCK RSU
6
7#undef RLOCK
8#define RLOCK RSL
9
10#undef WLOCK
11#define WLOCK WSL
12
13#undef INIT
14#define INIT RWSI
diff --git a/lib/locking-selftest-softirq.h b/lib/locking-selftest-softirq.h
new file mode 100644
index 000000000000..a83de2a04ace
--- /dev/null
+++ b/lib/locking-selftest-softirq.h
@@ -0,0 +1,9 @@
1#undef IRQ_DISABLE
2#undef IRQ_ENABLE
3#undef IRQ_ENTER
4#undef IRQ_EXIT
5
6#define IRQ_DISABLE SOFTIRQ_DISABLE
7#define IRQ_ENABLE SOFTIRQ_ENABLE
8#define IRQ_ENTER SOFTIRQ_ENTER
9#define IRQ_EXIT SOFTIRQ_EXIT
diff --git a/lib/locking-selftest-spin-hardirq.h b/lib/locking-selftest-spin-hardirq.h
new file mode 100644
index 000000000000..693198dce30a
--- /dev/null
+++ b/lib/locking-selftest-spin-hardirq.h
@@ -0,0 +1,2 @@
1#include "locking-selftest-spin.h"
2#include "locking-selftest-hardirq.h"
diff --git a/lib/locking-selftest-spin-softirq.h b/lib/locking-selftest-spin-softirq.h
new file mode 100644
index 000000000000..c472e2a87ffc
--- /dev/null
+++ b/lib/locking-selftest-spin-softirq.h
@@ -0,0 +1,2 @@
1#include "locking-selftest-spin.h"
2#include "locking-selftest-softirq.h"
diff --git a/lib/locking-selftest-spin.h b/lib/locking-selftest-spin.h
new file mode 100644
index 000000000000..ccd1b4b09757
--- /dev/null
+++ b/lib/locking-selftest-spin.h
@@ -0,0 +1,11 @@
1#undef LOCK
2#define LOCK L
3
4#undef UNLOCK
5#define UNLOCK U
6
7#undef RLOCK
8#undef WLOCK
9
10#undef INIT
11#define INIT SI
diff --git a/lib/locking-selftest-wlock-hardirq.h b/lib/locking-selftest-wlock-hardirq.h
new file mode 100644
index 000000000000..2dd2e5122caa
--- /dev/null
+++ b/lib/locking-selftest-wlock-hardirq.h
@@ -0,0 +1,2 @@
1#include "locking-selftest-wlock.h"
2#include "locking-selftest-hardirq.h"
diff --git a/lib/locking-selftest-wlock-softirq.h b/lib/locking-selftest-wlock-softirq.h
new file mode 100644
index 000000000000..cb80d1cb944e
--- /dev/null
+++ b/lib/locking-selftest-wlock-softirq.h
@@ -0,0 +1,2 @@
1#include "locking-selftest-wlock.h"
2#include "locking-selftest-softirq.h"
diff --git a/lib/locking-selftest-wlock.h b/lib/locking-selftest-wlock.h
new file mode 100644
index 000000000000..0815322d99ed
--- /dev/null
+++ b/lib/locking-selftest-wlock.h
@@ -0,0 +1,14 @@
1#undef LOCK
2#define LOCK WL
3
4#undef UNLOCK
5#define UNLOCK WU
6
7#undef RLOCK
8#define RLOCK RL
9
10#undef WLOCK
11#define WLOCK WL
12
13#undef INIT
14#define INIT RWI
diff --git a/lib/locking-selftest-wsem.h b/lib/locking-selftest-wsem.h
new file mode 100644
index 000000000000..b88c5f2dc5f0
--- /dev/null
+++ b/lib/locking-selftest-wsem.h
@@ -0,0 +1,14 @@
1#undef LOCK
2#define LOCK WSL
3
4#undef UNLOCK
5#define UNLOCK WSU
6
7#undef RLOCK
8#define RLOCK RSL
9
10#undef WLOCK
11#define WLOCK WSL
12
13#undef INIT
14#define INIT RWSI
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
new file mode 100644
index 000000000000..7945787f439a
--- /dev/null
+++ b/lib/locking-selftest.c
@@ -0,0 +1,1216 @@
1/*
2 * lib/locking-selftest.c
3 *
4 * Testsuite for various locking APIs: spinlocks, rwlocks,
5 * mutexes and rw-semaphores.
6 *
7 * It is checking both false positives and false negatives.
8 *
9 * Started by Ingo Molnar:
10 *
11 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
12 */
13#include <linux/rwsem.h>
14#include <linux/mutex.h>
15#include <linux/sched.h>
16#include <linux/delay.h>
17#include <linux/module.h>
18#include <linux/lockdep.h>
19#include <linux/spinlock.h>
20#include <linux/kallsyms.h>
21#include <linux/interrupt.h>
22#include <linux/debug_locks.h>
23#include <linux/irqflags.h>
24
25/*
26 * Change this to 1 if you want to see the failure printouts:
27 */
28static unsigned int debug_locks_verbose;
29
30static int __init setup_debug_locks_verbose(char *str)
31{
32 get_option(&str, &debug_locks_verbose);
33
34 return 1;
35}
36
37__setup("debug_locks_verbose=", setup_debug_locks_verbose);
38
39#define FAILURE 0
40#define SUCCESS 1
41
42#define LOCKTYPE_SPIN 0x1
43#define LOCKTYPE_RWLOCK 0x2
44#define LOCKTYPE_MUTEX 0x4
45#define LOCKTYPE_RWSEM 0x8
46
47/*
48 * Normal standalone locks, for the circular and irq-context
49 * dependency tests:
50 */
51static DEFINE_SPINLOCK(lock_A);
52static DEFINE_SPINLOCK(lock_B);
53static DEFINE_SPINLOCK(lock_C);
54static DEFINE_SPINLOCK(lock_D);
55
56static DEFINE_RWLOCK(rwlock_A);
57static DEFINE_RWLOCK(rwlock_B);
58static DEFINE_RWLOCK(rwlock_C);
59static DEFINE_RWLOCK(rwlock_D);
60
61static DEFINE_MUTEX(mutex_A);
62static DEFINE_MUTEX(mutex_B);
63static DEFINE_MUTEX(mutex_C);
64static DEFINE_MUTEX(mutex_D);
65
66static DECLARE_RWSEM(rwsem_A);
67static DECLARE_RWSEM(rwsem_B);
68static DECLARE_RWSEM(rwsem_C);
69static DECLARE_RWSEM(rwsem_D);
70
71/*
72 * Locks that we initialize dynamically as well so that
73 * e.g. X1 and X2 becomes two instances of the same class,
74 * but X* and Y* are different classes. We do this so that
75 * we do not trigger a real lockup:
76 */
77static DEFINE_SPINLOCK(lock_X1);
78static DEFINE_SPINLOCK(lock_X2);
79static DEFINE_SPINLOCK(lock_Y1);
80static DEFINE_SPINLOCK(lock_Y2);
81static DEFINE_SPINLOCK(lock_Z1);
82static DEFINE_SPINLOCK(lock_Z2);
83
84static DEFINE_RWLOCK(rwlock_X1);
85static DEFINE_RWLOCK(rwlock_X2);
86static DEFINE_RWLOCK(rwlock_Y1);
87static DEFINE_RWLOCK(rwlock_Y2);
88static DEFINE_RWLOCK(rwlock_Z1);
89static DEFINE_RWLOCK(rwlock_Z2);
90
91static DEFINE_MUTEX(mutex_X1);
92static DEFINE_MUTEX(mutex_X2);
93static DEFINE_MUTEX(mutex_Y1);
94static DEFINE_MUTEX(mutex_Y2);
95static DEFINE_MUTEX(mutex_Z1);
96static DEFINE_MUTEX(mutex_Z2);
97
98static DECLARE_RWSEM(rwsem_X1);
99static DECLARE_RWSEM(rwsem_X2);
100static DECLARE_RWSEM(rwsem_Y1);
101static DECLARE_RWSEM(rwsem_Y2);
102static DECLARE_RWSEM(rwsem_Z1);
103static DECLARE_RWSEM(rwsem_Z2);
104
105/*
106 * non-inlined runtime initializers, to let separate locks share
107 * the same lock-class:
108 */
109#define INIT_CLASS_FUNC(class) \
110static noinline void \
111init_class_##class(spinlock_t *lock, rwlock_t *rwlock, struct mutex *mutex, \
112 struct rw_semaphore *rwsem) \
113{ \
114 spin_lock_init(lock); \
115 rwlock_init(rwlock); \
116 mutex_init(mutex); \
117 init_rwsem(rwsem); \
118}
119
120INIT_CLASS_FUNC(X)
121INIT_CLASS_FUNC(Y)
122INIT_CLASS_FUNC(Z)
123
124static void init_shared_classes(void)
125{
126 init_class_X(&lock_X1, &rwlock_X1, &mutex_X1, &rwsem_X1);
127 init_class_X(&lock_X2, &rwlock_X2, &mutex_X2, &rwsem_X2);
128
129 init_class_Y(&lock_Y1, &rwlock_Y1, &mutex_Y1, &rwsem_Y1);
130 init_class_Y(&lock_Y2, &rwlock_Y2, &mutex_Y2, &rwsem_Y2);
131
132 init_class_Z(&lock_Z1, &rwlock_Z1, &mutex_Z1, &rwsem_Z1);
133 init_class_Z(&lock_Z2, &rwlock_Z2, &mutex_Z2, &rwsem_Z2);
134}
135
136/*
137 * For spinlocks and rwlocks we also do hardirq-safe / softirq-safe tests.
138 * The following functions use a lock from a simulated hardirq/softirq
139 * context, causing the locks to be marked as hardirq-safe/softirq-safe:
140 */
141
142#define HARDIRQ_DISABLE local_irq_disable
143#define HARDIRQ_ENABLE local_irq_enable
144
145#define HARDIRQ_ENTER() \
146 local_irq_disable(); \
147 irq_enter(); \
148 WARN_ON(!in_irq());
149
150#define HARDIRQ_EXIT() \
151 __irq_exit(); \
152 local_irq_enable();
153
154#define SOFTIRQ_DISABLE local_bh_disable
155#define SOFTIRQ_ENABLE local_bh_enable
156
157#define SOFTIRQ_ENTER() \
158 local_bh_disable(); \
159 local_irq_disable(); \
160 trace_softirq_enter(); \
161 WARN_ON(!in_softirq());
162
163#define SOFTIRQ_EXIT() \
164 trace_softirq_exit(); \
165 local_irq_enable(); \
166 local_bh_enable();
167
168/*
169 * Shortcuts for lock/unlock API variants, to keep
170 * the testcases compact:
171 */
172#define L(x) spin_lock(&lock_##x)
173#define U(x) spin_unlock(&lock_##x)
174#define LU(x) L(x); U(x)
175#define SI(x) spin_lock_init(&lock_##x)
176
177#define WL(x) write_lock(&rwlock_##x)
178#define WU(x) write_unlock(&rwlock_##x)
179#define WLU(x) WL(x); WU(x)
180
181#define RL(x) read_lock(&rwlock_##x)
182#define RU(x) read_unlock(&rwlock_##x)
183#define RLU(x) RL(x); RU(x)
184#define RWI(x) rwlock_init(&rwlock_##x)
185
186#define ML(x) mutex_lock(&mutex_##x)
187#define MU(x) mutex_unlock(&mutex_##x)
188#define MI(x) mutex_init(&mutex_##x)
189
190#define WSL(x) down_write(&rwsem_##x)
191#define WSU(x) up_write(&rwsem_##x)
192
193#define RSL(x) down_read(&rwsem_##x)
194#define RSU(x) up_read(&rwsem_##x)
195#define RWSI(x) init_rwsem(&rwsem_##x)
196
197#define LOCK_UNLOCK_2(x,y) LOCK(x); LOCK(y); UNLOCK(y); UNLOCK(x)
198
199/*
200 * Generate different permutations of the same testcase, using
201 * the same basic lock-dependency/state events:
202 */
203
204#define GENERATE_TESTCASE(name) \
205 \
206static void name(void) { E(); }
207
208#define GENERATE_PERMUTATIONS_2_EVENTS(name) \
209 \
210static void name##_12(void) { E1(); E2(); } \
211static void name##_21(void) { E2(); E1(); }
212
213#define GENERATE_PERMUTATIONS_3_EVENTS(name) \
214 \
215static void name##_123(void) { E1(); E2(); E3(); } \
216static void name##_132(void) { E1(); E3(); E2(); } \
217static void name##_213(void) { E2(); E1(); E3(); } \
218static void name##_231(void) { E2(); E3(); E1(); } \
219static void name##_312(void) { E3(); E1(); E2(); } \
220static void name##_321(void) { E3(); E2(); E1(); }
221
222/*
223 * AA deadlock:
224 */
225
226#define E() \
227 \
228 LOCK(X1); \
229 LOCK(X2); /* this one should fail */
230
231/*
232 * 6 testcases:
233 */
234#include "locking-selftest-spin.h"
235GENERATE_TESTCASE(AA_spin)
236#include "locking-selftest-wlock.h"
237GENERATE_TESTCASE(AA_wlock)
238#include "locking-selftest-rlock.h"
239GENERATE_TESTCASE(AA_rlock)
240#include "locking-selftest-mutex.h"
241GENERATE_TESTCASE(AA_mutex)
242#include "locking-selftest-wsem.h"
243GENERATE_TESTCASE(AA_wsem)
244#include "locking-selftest-rsem.h"
245GENERATE_TESTCASE(AA_rsem)
246
247#undef E
248
249/*
250 * Special-case for read-locking, they are
251 * allowed to recurse on the same lock class:
252 */
253static void rlock_AA1(void)
254{
255 RL(X1);
256 RL(X1); // this one should NOT fail
257}
258
259static void rlock_AA1B(void)
260{
261 RL(X1);
262 RL(X2); // this one should NOT fail
263}
264
265static void rsem_AA1(void)
266{
267 RSL(X1);
268 RSL(X1); // this one should fail
269}
270
271static void rsem_AA1B(void)
272{
273 RSL(X1);
274 RSL(X2); // this one should fail
275}
276/*
277 * The mixing of read and write locks is not allowed:
278 */
279static void rlock_AA2(void)
280{
281 RL(X1);
282 WL(X2); // this one should fail
283}
284
285static void rsem_AA2(void)
286{
287 RSL(X1);
288 WSL(X2); // this one should fail
289}
290
291static void rlock_AA3(void)
292{
293 WL(X1);
294 RL(X2); // this one should fail
295}
296
297static void rsem_AA3(void)
298{
299 WSL(X1);
300 RSL(X2); // this one should fail
301}
302
303/*
304 * ABBA deadlock:
305 */
306
307#define E() \
308 \
309 LOCK_UNLOCK_2(A, B); \
310 LOCK_UNLOCK_2(B, A); /* fail */
311
312/*
313 * 6 testcases:
314 */
315#include "locking-selftest-spin.h"
316GENERATE_TESTCASE(ABBA_spin)
317#include "locking-selftest-wlock.h"
318GENERATE_TESTCASE(ABBA_wlock)
319#include "locking-selftest-rlock.h"
320GENERATE_TESTCASE(ABBA_rlock)
321#include "locking-selftest-mutex.h"
322GENERATE_TESTCASE(ABBA_mutex)
323#include "locking-selftest-wsem.h"
324GENERATE_TESTCASE(ABBA_wsem)
325#include "locking-selftest-rsem.h"
326GENERATE_TESTCASE(ABBA_rsem)
327
328#undef E
329
330/*
331 * AB BC CA deadlock:
332 */
333
334#define E() \
335 \
336 LOCK_UNLOCK_2(A, B); \
337 LOCK_UNLOCK_2(B, C); \
338 LOCK_UNLOCK_2(C, A); /* fail */
339
340/*
341 * 6 testcases:
342 */
343#include "locking-selftest-spin.h"
344GENERATE_TESTCASE(ABBCCA_spin)
345#include "locking-selftest-wlock.h"
346GENERATE_TESTCASE(ABBCCA_wlock)
347#include "locking-selftest-rlock.h"
348GENERATE_TESTCASE(ABBCCA_rlock)
349#include "locking-selftest-mutex.h"
350GENERATE_TESTCASE(ABBCCA_mutex)
351#include "locking-selftest-wsem.h"
352GENERATE_TESTCASE(ABBCCA_wsem)
353#include "locking-selftest-rsem.h"
354GENERATE_TESTCASE(ABBCCA_rsem)
355
356#undef E
357
358/*
359 * AB CA BC deadlock:
360 */
361
362#define E() \
363 \
364 LOCK_UNLOCK_2(A, B); \
365 LOCK_UNLOCK_2(C, A); \
366 LOCK_UNLOCK_2(B, C); /* fail */
367
368/*
369 * 6 testcases:
370 */
371#include "locking-selftest-spin.h"
372GENERATE_TESTCASE(ABCABC_spin)
373#include "locking-selftest-wlock.h"
374GENERATE_TESTCASE(ABCABC_wlock)
375#include "locking-selftest-rlock.h"
376GENERATE_TESTCASE(ABCABC_rlock)
377#include "locking-selftest-mutex.h"
378GENERATE_TESTCASE(ABCABC_mutex)
379#include "locking-selftest-wsem.h"
380GENERATE_TESTCASE(ABCABC_wsem)
381#include "locking-selftest-rsem.h"
382GENERATE_TESTCASE(ABCABC_rsem)
383
384#undef E
385
386/*
387 * AB BC CD DA deadlock:
388 */
389
390#define E() \
391 \
392 LOCK_UNLOCK_2(A, B); \
393 LOCK_UNLOCK_2(B, C); \
394 LOCK_UNLOCK_2(C, D); \
395 LOCK_UNLOCK_2(D, A); /* fail */
396
397/*
398 * 6 testcases:
399 */
400#include "locking-selftest-spin.h"
401GENERATE_TESTCASE(ABBCCDDA_spin)
402#include "locking-selftest-wlock.h"
403GENERATE_TESTCASE(ABBCCDDA_wlock)
404#include "locking-selftest-rlock.h"
405GENERATE_TESTCASE(ABBCCDDA_rlock)
406#include "locking-selftest-mutex.h"
407GENERATE_TESTCASE(ABBCCDDA_mutex)
408#include "locking-selftest-wsem.h"
409GENERATE_TESTCASE(ABBCCDDA_wsem)
410#include "locking-selftest-rsem.h"
411GENERATE_TESTCASE(ABBCCDDA_rsem)
412
413#undef E
414
415/*
416 * AB CD BD DA deadlock:
417 */
418#define E() \
419 \
420 LOCK_UNLOCK_2(A, B); \
421 LOCK_UNLOCK_2(C, D); \
422 LOCK_UNLOCK_2(B, D); \
423 LOCK_UNLOCK_2(D, A); /* fail */
424
425/*
426 * 6 testcases:
427 */
428#include "locking-selftest-spin.h"
429GENERATE_TESTCASE(ABCDBDDA_spin)
430#include "locking-selftest-wlock.h"
431GENERATE_TESTCASE(ABCDBDDA_wlock)
432#include "locking-selftest-rlock.h"
433GENERATE_TESTCASE(ABCDBDDA_rlock)
434#include "locking-selftest-mutex.h"
435GENERATE_TESTCASE(ABCDBDDA_mutex)
436#include "locking-selftest-wsem.h"
437GENERATE_TESTCASE(ABCDBDDA_wsem)
438#include "locking-selftest-rsem.h"
439GENERATE_TESTCASE(ABCDBDDA_rsem)
440
441#undef E
442
443/*
444 * AB CD BC DA deadlock:
445 */
446#define E() \
447 \
448 LOCK_UNLOCK_2(A, B); \
449 LOCK_UNLOCK_2(C, D); \
450 LOCK_UNLOCK_2(B, C); \
451 LOCK_UNLOCK_2(D, A); /* fail */
452
453/*
454 * 6 testcases:
455 */
456#include "locking-selftest-spin.h"
457GENERATE_TESTCASE(ABCDBCDA_spin)
458#include "locking-selftest-wlock.h"
459GENERATE_TESTCASE(ABCDBCDA_wlock)
460#include "locking-selftest-rlock.h"
461GENERATE_TESTCASE(ABCDBCDA_rlock)
462#include "locking-selftest-mutex.h"
463GENERATE_TESTCASE(ABCDBCDA_mutex)
464#include "locking-selftest-wsem.h"
465GENERATE_TESTCASE(ABCDBCDA_wsem)
466#include "locking-selftest-rsem.h"
467GENERATE_TESTCASE(ABCDBCDA_rsem)
468
469#undef E
470
471/*
472 * Double unlock:
473 */
474#define E() \
475 \
476 LOCK(A); \
477 UNLOCK(A); \
478 UNLOCK(A); /* fail */
479
480/*
481 * 6 testcases:
482 */
483#include "locking-selftest-spin.h"
484GENERATE_TESTCASE(double_unlock_spin)
485#include "locking-selftest-wlock.h"
486GENERATE_TESTCASE(double_unlock_wlock)
487#include "locking-selftest-rlock.h"
488GENERATE_TESTCASE(double_unlock_rlock)
489#include "locking-selftest-mutex.h"
490GENERATE_TESTCASE(double_unlock_mutex)
491#include "locking-selftest-wsem.h"
492GENERATE_TESTCASE(double_unlock_wsem)
493#include "locking-selftest-rsem.h"
494GENERATE_TESTCASE(double_unlock_rsem)
495
496#undef E
497
498/*
499 * Bad unlock ordering:
500 */
501#define E() \
502 \
503 LOCK(A); \
504 LOCK(B); \
505 UNLOCK(A); /* fail */ \
506 UNLOCK(B);
507
508/*
509 * 6 testcases:
510 */
511#include "locking-selftest-spin.h"
512GENERATE_TESTCASE(bad_unlock_order_spin)
513#include "locking-selftest-wlock.h"
514GENERATE_TESTCASE(bad_unlock_order_wlock)
515#include "locking-selftest-rlock.h"
516GENERATE_TESTCASE(bad_unlock_order_rlock)
517#include "locking-selftest-mutex.h"
518GENERATE_TESTCASE(bad_unlock_order_mutex)
519#include "locking-selftest-wsem.h"
520GENERATE_TESTCASE(bad_unlock_order_wsem)
521#include "locking-selftest-rsem.h"
522GENERATE_TESTCASE(bad_unlock_order_rsem)
523
524#undef E
525
526/*
527 * initializing a held lock:
528 */
529#define E() \
530 \
531 LOCK(A); \
532 INIT(A); /* fail */
533
534/*
535 * 6 testcases:
536 */
537#include "locking-selftest-spin.h"
538GENERATE_TESTCASE(init_held_spin)
539#include "locking-selftest-wlock.h"
540GENERATE_TESTCASE(init_held_wlock)
541#include "locking-selftest-rlock.h"
542GENERATE_TESTCASE(init_held_rlock)
543#include "locking-selftest-mutex.h"
544GENERATE_TESTCASE(init_held_mutex)
545#include "locking-selftest-wsem.h"
546GENERATE_TESTCASE(init_held_wsem)
547#include "locking-selftest-rsem.h"
548GENERATE_TESTCASE(init_held_rsem)
549
550#undef E
551
552/*
553 * locking an irq-safe lock with irqs enabled:
554 */
555#define E1() \
556 \
557 IRQ_ENTER(); \
558 LOCK(A); \
559 UNLOCK(A); \
560 IRQ_EXIT();
561
562#define E2() \
563 \
564 LOCK(A); \
565 UNLOCK(A);
566
567/*
568 * Generate 24 testcases:
569 */
570#include "locking-selftest-spin-hardirq.h"
571GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin)
572
573#include "locking-selftest-rlock-hardirq.h"
574GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock)
575
576#include "locking-selftest-wlock-hardirq.h"
577GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_wlock)
578
579#include "locking-selftest-spin-softirq.h"
580GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_spin)
581
582#include "locking-selftest-rlock-softirq.h"
583GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_rlock)
584
585#include "locking-selftest-wlock-softirq.h"
586GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock)
587
588#undef E1
589#undef E2
590
591/*
592 * Enabling hardirqs with a softirq-safe lock held:
593 */
594#define E1() \
595 \
596 SOFTIRQ_ENTER(); \
597 LOCK(A); \
598 UNLOCK(A); \
599 SOFTIRQ_EXIT();
600
601#define E2() \
602 \
603 HARDIRQ_DISABLE(); \
604 LOCK(A); \
605 HARDIRQ_ENABLE(); \
606 UNLOCK(A);
607
608/*
609 * Generate 12 testcases:
610 */
611#include "locking-selftest-spin.h"
612GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_spin)
613
614#include "locking-selftest-wlock.h"
615GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_wlock)
616
617#include "locking-selftest-rlock.h"
618GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock)
619
620#undef E1
621#undef E2
622
623/*
624 * Enabling irqs with an irq-safe lock held:
625 */
626#define E1() \
627 \
628 IRQ_ENTER(); \
629 LOCK(A); \
630 UNLOCK(A); \
631 IRQ_EXIT();
632
633#define E2() \
634 \
635 IRQ_DISABLE(); \
636 LOCK(A); \
637 IRQ_ENABLE(); \
638 UNLOCK(A);
639
640/*
641 * Generate 24 testcases:
642 */
643#include "locking-selftest-spin-hardirq.h"
644GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin)
645
646#include "locking-selftest-rlock-hardirq.h"
647GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock)
648
649#include "locking-selftest-wlock-hardirq.h"
650GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_wlock)
651
652#include "locking-selftest-spin-softirq.h"
653GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_spin)
654
655#include "locking-selftest-rlock-softirq.h"
656GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_rlock)
657
658#include "locking-selftest-wlock-softirq.h"
659GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock)
660
661#undef E1
662#undef E2
663
664/*
665 * Acquiring a irq-unsafe lock while holding an irq-safe-lock:
666 */
667#define E1() \
668 \
669 LOCK(A); \
670 LOCK(B); \
671 UNLOCK(B); \
672 UNLOCK(A); \
673
674#define E2() \
675 \
676 LOCK(B); \
677 UNLOCK(B);
678
679#define E3() \
680 \
681 IRQ_ENTER(); \
682 LOCK(A); \
683 UNLOCK(A); \
684 IRQ_EXIT();
685
686/*
687 * Generate 36 testcases:
688 */
689#include "locking-selftest-spin-hardirq.h"
690GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin)
691
692#include "locking-selftest-rlock-hardirq.h"
693GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock)
694
695#include "locking-selftest-wlock-hardirq.h"
696GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_wlock)
697
698#include "locking-selftest-spin-softirq.h"
699GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_spin)
700
701#include "locking-selftest-rlock-softirq.h"
702GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_rlock)
703
704#include "locking-selftest-wlock-softirq.h"
705GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock)
706
707#undef E1
708#undef E2
709#undef E3
710
711/*
712 * If a lock turns into softirq-safe, but earlier it took
713 * a softirq-unsafe lock:
714 */
715
716#define E1() \
717 IRQ_DISABLE(); \
718 LOCK(A); \
719 LOCK(B); \
720 UNLOCK(B); \
721 UNLOCK(A); \
722 IRQ_ENABLE();
723
724#define E2() \
725 LOCK(B); \
726 UNLOCK(B);
727
728#define E3() \
729 IRQ_ENTER(); \
730 LOCK(A); \
731 UNLOCK(A); \
732 IRQ_EXIT();
733
734/*
735 * Generate 36 testcases:
736 */
737#include "locking-selftest-spin-hardirq.h"
738GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin)
739
740#include "locking-selftest-rlock-hardirq.h"
741GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock)
742
743#include "locking-selftest-wlock-hardirq.h"
744GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_wlock)
745
746#include "locking-selftest-spin-softirq.h"
747GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_spin)
748
749#include "locking-selftest-rlock-softirq.h"
750GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_rlock)
751
752#include "locking-selftest-wlock-softirq.h"
753GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock)
754
755#undef E1
756#undef E2
757#undef E3
758
759/*
760 * read-lock / write-lock irq inversion.
761 *
762 * Deadlock scenario:
763 *
764 * CPU#1 is at #1, i.e. it has write-locked A, but has not
765 * taken B yet.
766 *
767 * CPU#2 is at #2, i.e. it has locked B.
768 *
769 * Hardirq hits CPU#2 at point #2 and is trying to read-lock A.
770 *
771 * The deadlock occurs because CPU#1 will spin on B, and CPU#2
772 * will spin on A.
773 */
774
775#define E1() \
776 \
777 IRQ_DISABLE(); \
778 WL(A); \
779 LOCK(B); \
780 UNLOCK(B); \
781 WU(A); \
782 IRQ_ENABLE();
783
784#define E2() \
785 \
786 LOCK(B); \
787 UNLOCK(B);
788
789#define E3() \
790 \
791 IRQ_ENTER(); \
792 RL(A); \
793 RU(A); \
794 IRQ_EXIT();
795
796/*
797 * Generate 36 testcases:
798 */
799#include "locking-selftest-spin-hardirq.h"
800GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_spin)
801
802#include "locking-selftest-rlock-hardirq.h"
803GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_rlock)
804
805#include "locking-selftest-wlock-hardirq.h"
806GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_wlock)
807
808#include "locking-selftest-spin-softirq.h"
809GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_spin)
810
811#include "locking-selftest-rlock-softirq.h"
812GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_rlock)
813
814#include "locking-selftest-wlock-softirq.h"
815GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_wlock)
816
817#undef E1
818#undef E2
819#undef E3
820
821/*
822 * read-lock / write-lock recursion that is actually safe.
823 */
824
825#define E1() \
826 \
827 IRQ_DISABLE(); \
828 WL(A); \
829 WU(A); \
830 IRQ_ENABLE();
831
832#define E2() \
833 \
834 RL(A); \
835 RU(A); \
836
837#define E3() \
838 \
839 IRQ_ENTER(); \
840 RL(A); \
841 L(B); \
842 U(B); \
843 RU(A); \
844 IRQ_EXIT();
845
846/*
847 * Generate 12 testcases:
848 */
849#include "locking-selftest-hardirq.h"
850GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_hard)
851
852#include "locking-selftest-softirq.h"
853GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
854
855#undef E1
856#undef E2
857#undef E3
858
859/*
860 * read-lock / write-lock recursion that is unsafe.
861 */
862
863#define E1() \
864 \
865 IRQ_DISABLE(); \
866 L(B); \
867 WL(A); \
868 WU(A); \
869 U(B); \
870 IRQ_ENABLE();
871
872#define E2() \
873 \
874 RL(A); \
875 RU(A); \
876
877#define E3() \
878 \
879 IRQ_ENTER(); \
880 L(B); \
881 U(B); \
882 IRQ_EXIT();
883
884/*
885 * Generate 12 testcases:
886 */
887#include "locking-selftest-hardirq.h"
888// GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_hard)
889
890#include "locking-selftest-softirq.h"
891// GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft)
892
893#ifdef CONFIG_DEBUG_LOCK_ALLOC
894# define I_SPINLOCK(x) lockdep_reset_lock(&lock_##x.dep_map)
895# define I_RWLOCK(x) lockdep_reset_lock(&rwlock_##x.dep_map)
896# define I_MUTEX(x) lockdep_reset_lock(&mutex_##x.dep_map)
897# define I_RWSEM(x) lockdep_reset_lock(&rwsem_##x.dep_map)
898#else
899# define I_SPINLOCK(x)
900# define I_RWLOCK(x)
901# define I_MUTEX(x)
902# define I_RWSEM(x)
903#endif
904
905#define I1(x) \
906 do { \
907 I_SPINLOCK(x); \
908 I_RWLOCK(x); \
909 I_MUTEX(x); \
910 I_RWSEM(x); \
911 } while (0)
912
913#define I2(x) \
914 do { \
915 spin_lock_init(&lock_##x); \
916 rwlock_init(&rwlock_##x); \
917 mutex_init(&mutex_##x); \
918 init_rwsem(&rwsem_##x); \
919 } while (0)
920
921static void reset_locks(void)
922{
923 local_irq_disable();
924 I1(A); I1(B); I1(C); I1(D);
925 I1(X1); I1(X2); I1(Y1); I1(Y2); I1(Z1); I1(Z2);
926 lockdep_reset();
927 I2(A); I2(B); I2(C); I2(D);
928 init_shared_classes();
929 local_irq_enable();
930}
931
932#undef I
933
934static int testcase_total;
935static int testcase_successes;
936static int expected_testcase_failures;
937static int unexpected_testcase_failures;
938
939static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask)
940{
941 unsigned long saved_preempt_count = preempt_count();
942 int expected_failure = 0;
943
944 WARN_ON(irqs_disabled());
945
946 testcase_fn();
947 /*
948 * Filter out expected failures:
949 */
950#ifndef CONFIG_PROVE_LOCKING
951 if ((lockclass_mask & LOCKTYPE_SPIN) && debug_locks != expected)
952 expected_failure = 1;
953 if ((lockclass_mask & LOCKTYPE_RWLOCK) && debug_locks != expected)
954 expected_failure = 1;
955 if ((lockclass_mask & LOCKTYPE_MUTEX) && debug_locks != expected)
956 expected_failure = 1;
957 if ((lockclass_mask & LOCKTYPE_RWSEM) && debug_locks != expected)
958 expected_failure = 1;
959#endif
960 if (debug_locks != expected) {
961 if (expected_failure) {
962 expected_testcase_failures++;
963 printk("failed|");
964 } else {
965 unexpected_testcase_failures++;
966 printk("FAILED|");
967 }
968 } else {
969 testcase_successes++;
970 printk(" ok |");
971 }
972 testcase_total++;
973
974 if (debug_locks_verbose)
975 printk(" lockclass mask: %x, debug_locks: %d, expected: %d\n",
976 lockclass_mask, debug_locks, expected);
977 /*
978 * Some tests (e.g. double-unlock) might corrupt the preemption
979 * count, so restore it:
980 */
981 preempt_count() = saved_preempt_count;
982#ifdef CONFIG_TRACE_IRQFLAGS
983 if (softirq_count())
984 current->softirqs_enabled = 0;
985 else
986 current->softirqs_enabled = 1;
987#endif
988
989 reset_locks();
990}
991
992static inline void print_testname(const char *testname)
993{
994 printk("%33s:", testname);
995}
996
997#define DO_TESTCASE_1(desc, name, nr) \
998 print_testname(desc"/"#nr); \
999 dotest(name##_##nr, SUCCESS, LOCKTYPE_RWLOCK); \
1000 printk("\n");
1001
1002#define DO_TESTCASE_1B(desc, name, nr) \
1003 print_testname(desc"/"#nr); \
1004 dotest(name##_##nr, FAILURE, LOCKTYPE_RWLOCK); \
1005 printk("\n");
1006
1007#define DO_TESTCASE_3(desc, name, nr) \
1008 print_testname(desc"/"#nr); \
1009 dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN); \
1010 dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \
1011 dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK); \
1012 printk("\n");
1013
1014#define DO_TESTCASE_3RW(desc, name, nr) \
1015 print_testname(desc"/"#nr); \
1016 dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN|LOCKTYPE_RWLOCK);\
1017 dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \
1018 dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK); \
1019 printk("\n");
1020
1021#define DO_TESTCASE_6(desc, name) \
1022 print_testname(desc); \
1023 dotest(name##_spin, FAILURE, LOCKTYPE_SPIN); \
1024 dotest(name##_wlock, FAILURE, LOCKTYPE_RWLOCK); \
1025 dotest(name##_rlock, FAILURE, LOCKTYPE_RWLOCK); \
1026 dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX); \
1027 dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM); \
1028 dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM); \
1029 printk("\n");
1030
1031#define DO_TESTCASE_6_SUCCESS(desc, name) \
1032 print_testname(desc); \
1033 dotest(name##_spin, SUCCESS, LOCKTYPE_SPIN); \
1034 dotest(name##_wlock, SUCCESS, LOCKTYPE_RWLOCK); \
1035 dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK); \
1036 dotest(name##_mutex, SUCCESS, LOCKTYPE_MUTEX); \
1037 dotest(name##_wsem, SUCCESS, LOCKTYPE_RWSEM); \
1038 dotest(name##_rsem, SUCCESS, LOCKTYPE_RWSEM); \
1039 printk("\n");
1040
1041/*
1042 * 'read' variant: rlocks must not trigger.
1043 */
1044#define DO_TESTCASE_6R(desc, name) \
1045 print_testname(desc); \
1046 dotest(name##_spin, FAILURE, LOCKTYPE_SPIN); \
1047 dotest(name##_wlock, FAILURE, LOCKTYPE_RWLOCK); \
1048 dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK); \
1049 dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX); \
1050 dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM); \
1051 dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM); \
1052 printk("\n");
1053
1054#define DO_TESTCASE_2I(desc, name, nr) \
1055 DO_TESTCASE_1("hard-"desc, name##_hard, nr); \
1056 DO_TESTCASE_1("soft-"desc, name##_soft, nr);
1057
1058#define DO_TESTCASE_2IB(desc, name, nr) \
1059 DO_TESTCASE_1B("hard-"desc, name##_hard, nr); \
1060 DO_TESTCASE_1B("soft-"desc, name##_soft, nr);
1061
1062#define DO_TESTCASE_6I(desc, name, nr) \
1063 DO_TESTCASE_3("hard-"desc, name##_hard, nr); \
1064 DO_TESTCASE_3("soft-"desc, name##_soft, nr);
1065
1066#define DO_TESTCASE_6IRW(desc, name, nr) \
1067 DO_TESTCASE_3RW("hard-"desc, name##_hard, nr); \
1068 DO_TESTCASE_3RW("soft-"desc, name##_soft, nr);
1069
1070#define DO_TESTCASE_2x3(desc, name) \
1071 DO_TESTCASE_3(desc, name, 12); \
1072 DO_TESTCASE_3(desc, name, 21);
1073
1074#define DO_TESTCASE_2x6(desc, name) \
1075 DO_TESTCASE_6I(desc, name, 12); \
1076 DO_TESTCASE_6I(desc, name, 21);
1077
1078#define DO_TESTCASE_6x2(desc, name) \
1079 DO_TESTCASE_2I(desc, name, 123); \
1080 DO_TESTCASE_2I(desc, name, 132); \
1081 DO_TESTCASE_2I(desc, name, 213); \
1082 DO_TESTCASE_2I(desc, name, 231); \
1083 DO_TESTCASE_2I(desc, name, 312); \
1084 DO_TESTCASE_2I(desc, name, 321);
1085
1086#define DO_TESTCASE_6x2B(desc, name) \
1087 DO_TESTCASE_2IB(desc, name, 123); \
1088 DO_TESTCASE_2IB(desc, name, 132); \
1089 DO_TESTCASE_2IB(desc, name, 213); \
1090 DO_TESTCASE_2IB(desc, name, 231); \
1091 DO_TESTCASE_2IB(desc, name, 312); \
1092 DO_TESTCASE_2IB(desc, name, 321);
1093
1094#define DO_TESTCASE_6x6(desc, name) \
1095 DO_TESTCASE_6I(desc, name, 123); \
1096 DO_TESTCASE_6I(desc, name, 132); \
1097 DO_TESTCASE_6I(desc, name, 213); \
1098 DO_TESTCASE_6I(desc, name, 231); \
1099 DO_TESTCASE_6I(desc, name, 312); \
1100 DO_TESTCASE_6I(desc, name, 321);
1101
1102#define DO_TESTCASE_6x6RW(desc, name) \
1103 DO_TESTCASE_6IRW(desc, name, 123); \
1104 DO_TESTCASE_6IRW(desc, name, 132); \
1105 DO_TESTCASE_6IRW(desc, name, 213); \
1106 DO_TESTCASE_6IRW(desc, name, 231); \
1107 DO_TESTCASE_6IRW(desc, name, 312); \
1108 DO_TESTCASE_6IRW(desc, name, 321);
1109
1110
1111void locking_selftest(void)
1112{
1113 /*
1114 * Got a locking failure before the selftest ran?
1115 */
1116 if (!debug_locks) {
1117 printk("----------------------------------\n");
1118 printk("| Locking API testsuite disabled |\n");
1119 printk("----------------------------------\n");
1120 return;
1121 }
1122
1123 /*
1124 * Run the testsuite:
1125 */
1126 printk("------------------------\n");
1127 printk("| Locking API testsuite:\n");
1128 printk("----------------------------------------------------------------------------\n");
1129 printk(" | spin |wlock |rlock |mutex | wsem | rsem |\n");
1130 printk(" --------------------------------------------------------------------------\n");
1131
1132 init_shared_classes();
1133 debug_locks_silent = !debug_locks_verbose;
1134
1135 DO_TESTCASE_6R("A-A deadlock", AA);
1136 DO_TESTCASE_6R("A-B-B-A deadlock", ABBA);
1137 DO_TESTCASE_6R("A-B-B-C-C-A deadlock", ABBCCA);
1138 DO_TESTCASE_6R("A-B-C-A-B-C deadlock", ABCABC);
1139 DO_TESTCASE_6R("A-B-B-C-C-D-D-A deadlock", ABBCCDDA);
1140 DO_TESTCASE_6R("A-B-C-D-B-D-D-A deadlock", ABCDBDDA);
1141 DO_TESTCASE_6R("A-B-C-D-B-C-D-A deadlock", ABCDBCDA);
1142 DO_TESTCASE_6("double unlock", double_unlock);
1143 DO_TESTCASE_6("initialize held", init_held);
1144 DO_TESTCASE_6_SUCCESS("bad unlock order", bad_unlock_order);
1145
1146 printk(" --------------------------------------------------------------------------\n");
1147 print_testname("recursive read-lock");
1148 printk(" |");
1149 dotest(rlock_AA1, SUCCESS, LOCKTYPE_RWLOCK);
1150 printk(" |");
1151 dotest(rsem_AA1, FAILURE, LOCKTYPE_RWSEM);
1152 printk("\n");
1153
1154 print_testname("recursive read-lock #2");
1155 printk(" |");
1156 dotest(rlock_AA1B, SUCCESS, LOCKTYPE_RWLOCK);
1157 printk(" |");
1158 dotest(rsem_AA1B, FAILURE, LOCKTYPE_RWSEM);
1159 printk("\n");
1160
1161 print_testname("mixed read-write-lock");
1162 printk(" |");
1163 dotest(rlock_AA2, FAILURE, LOCKTYPE_RWLOCK);
1164 printk(" |");
1165 dotest(rsem_AA2, FAILURE, LOCKTYPE_RWSEM);
1166 printk("\n");
1167
1168 print_testname("mixed write-read-lock");
1169 printk(" |");
1170 dotest(rlock_AA3, FAILURE, LOCKTYPE_RWLOCK);
1171 printk(" |");
1172 dotest(rsem_AA3, FAILURE, LOCKTYPE_RWSEM);
1173 printk("\n");
1174
1175 printk(" --------------------------------------------------------------------------\n");
1176
1177 /*
1178 * irq-context testcases:
1179 */
1180 DO_TESTCASE_2x6("irqs-on + irq-safe-A", irqsafe1);
1181 DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A);
1182 DO_TESTCASE_2x6("safe-A + irqs-on", irqsafe2B);
1183 DO_TESTCASE_6x6("safe-A + unsafe-B #1", irqsafe3);
1184 DO_TESTCASE_6x6("safe-A + unsafe-B #2", irqsafe4);
1185 DO_TESTCASE_6x6RW("irq lock-inversion", irq_inversion);
1186
1187 DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion);
1188// DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2);
1189
1190 if (unexpected_testcase_failures) {
1191 printk("-----------------------------------------------------------------\n");
1192 debug_locks = 0;
1193 printk("BUG: %3d unexpected failures (out of %3d) - debugging disabled! |\n",
1194 unexpected_testcase_failures, testcase_total);
1195 printk("-----------------------------------------------------------------\n");
1196 } else if (expected_testcase_failures && testcase_successes) {
1197 printk("--------------------------------------------------------\n");
1198 printk("%3d out of %3d testcases failed, as expected. |\n",
1199 expected_testcase_failures, testcase_total);
1200 printk("----------------------------------------------------\n");
1201 debug_locks = 1;
1202 } else if (expected_testcase_failures && !testcase_successes) {
1203 printk("--------------------------------------------------------\n");
1204 printk("All %3d testcases failed, as expected. |\n",
1205 expected_testcase_failures);
1206 printk("----------------------------------------\n");
1207 debug_locks = 1;
1208 } else {
1209 printk("-------------------------------------------------------\n");
1210 printk("Good, all %3d testcases passed! |\n",
1211 testcase_successes);
1212 printk("---------------------------------\n");
1213 debug_locks = 1;
1214 }
1215 debug_locks_silent = 0;
1216}
diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c
index 40ffde940a86..db4fed74b940 100644
--- a/lib/rwsem-spinlock.c
+++ b/lib/rwsem-spinlock.c
@@ -17,27 +17,22 @@ struct rwsem_waiter {
17#define RWSEM_WAITING_FOR_WRITE 0x00000002 17#define RWSEM_WAITING_FOR_WRITE 0x00000002
18}; 18};
19 19
20#if RWSEM_DEBUG
21void rwsemtrace(struct rw_semaphore *sem, const char *str)
22{
23 if (sem->debug)
24 printk("[%d] %s({%d,%d})\n",
25 current->pid, str, sem->activity,
26 list_empty(&sem->wait_list) ? 0 : 1);
27}
28#endif
29
30/* 20/*
31 * initialise the semaphore 21 * initialise the semaphore
32 */ 22 */
33void fastcall init_rwsem(struct rw_semaphore *sem) 23void __init_rwsem(struct rw_semaphore *sem, const char *name,
24 struct lock_class_key *key)
34{ 25{
26#ifdef CONFIG_DEBUG_LOCK_ALLOC
27 /*
28 * Make sure we are not reinitializing a held semaphore:
29 */
30 debug_check_no_locks_freed((void *)sem, sizeof(*sem));
31 lockdep_init_map(&sem->dep_map, name, key);
32#endif
35 sem->activity = 0; 33 sem->activity = 0;
36 spin_lock_init(&sem->wait_lock); 34 spin_lock_init(&sem->wait_lock);
37 INIT_LIST_HEAD(&sem->wait_list); 35 INIT_LIST_HEAD(&sem->wait_list);
38#if RWSEM_DEBUG
39 sem->debug = 0;
40#endif
41} 36}
42 37
43/* 38/*
@@ -56,8 +51,6 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
56 struct task_struct *tsk; 51 struct task_struct *tsk;
57 int woken; 52 int woken;
58 53
59 rwsemtrace(sem, "Entering __rwsem_do_wake");
60
61 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); 54 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
62 55
63 if (!wakewrite) { 56 if (!wakewrite) {
@@ -104,7 +97,6 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
104 sem->activity += woken; 97 sem->activity += woken;
105 98
106 out: 99 out:
107 rwsemtrace(sem, "Leaving __rwsem_do_wake");
108 return sem; 100 return sem;
109} 101}
110 102
@@ -138,8 +130,6 @@ void fastcall __sched __down_read(struct rw_semaphore *sem)
138 struct rwsem_waiter waiter; 130 struct rwsem_waiter waiter;
139 struct task_struct *tsk; 131 struct task_struct *tsk;
140 132
141 rwsemtrace(sem, "Entering __down_read");
142
143 spin_lock_irq(&sem->wait_lock); 133 spin_lock_irq(&sem->wait_lock);
144 134
145 if (sem->activity >= 0 && list_empty(&sem->wait_list)) { 135 if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
@@ -171,9 +161,8 @@ void fastcall __sched __down_read(struct rw_semaphore *sem)
171 } 161 }
172 162
173 tsk->state = TASK_RUNNING; 163 tsk->state = TASK_RUNNING;
174
175 out: 164 out:
176 rwsemtrace(sem, "Leaving __down_read"); 165 ;
177} 166}
178 167
179/* 168/*
@@ -184,7 +173,6 @@ int fastcall __down_read_trylock(struct rw_semaphore *sem)
184 unsigned long flags; 173 unsigned long flags;
185 int ret = 0; 174 int ret = 0;
186 175
187 rwsemtrace(sem, "Entering __down_read_trylock");
188 176
189 spin_lock_irqsave(&sem->wait_lock, flags); 177 spin_lock_irqsave(&sem->wait_lock, flags);
190 178
@@ -196,7 +184,6 @@ int fastcall __down_read_trylock(struct rw_semaphore *sem)
196 184
197 spin_unlock_irqrestore(&sem->wait_lock, flags); 185 spin_unlock_irqrestore(&sem->wait_lock, flags);
198 186
199 rwsemtrace(sem, "Leaving __down_read_trylock");
200 return ret; 187 return ret;
201} 188}
202 189
@@ -204,13 +191,11 @@ int fastcall __down_read_trylock(struct rw_semaphore *sem)
204 * get a write lock on the semaphore 191 * get a write lock on the semaphore
205 * - we increment the waiting count anyway to indicate an exclusive lock 192 * - we increment the waiting count anyway to indicate an exclusive lock
206 */ 193 */
207void fastcall __sched __down_write(struct rw_semaphore *sem) 194void fastcall __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
208{ 195{
209 struct rwsem_waiter waiter; 196 struct rwsem_waiter waiter;
210 struct task_struct *tsk; 197 struct task_struct *tsk;
211 198
212 rwsemtrace(sem, "Entering __down_write");
213
214 spin_lock_irq(&sem->wait_lock); 199 spin_lock_irq(&sem->wait_lock);
215 200
216 if (sem->activity == 0 && list_empty(&sem->wait_list)) { 201 if (sem->activity == 0 && list_empty(&sem->wait_list)) {
@@ -242,9 +227,13 @@ void fastcall __sched __down_write(struct rw_semaphore *sem)
242 } 227 }
243 228
244 tsk->state = TASK_RUNNING; 229 tsk->state = TASK_RUNNING;
245
246 out: 230 out:
247 rwsemtrace(sem, "Leaving __down_write"); 231 ;
232}
233
234void fastcall __sched __down_write(struct rw_semaphore *sem)
235{
236 __down_write_nested(sem, 0);
248} 237}
249 238
250/* 239/*
@@ -255,8 +244,6 @@ int fastcall __down_write_trylock(struct rw_semaphore *sem)
255 unsigned long flags; 244 unsigned long flags;
256 int ret = 0; 245 int ret = 0;
257 246
258 rwsemtrace(sem, "Entering __down_write_trylock");
259
260 spin_lock_irqsave(&sem->wait_lock, flags); 247 spin_lock_irqsave(&sem->wait_lock, flags);
261 248
262 if (sem->activity == 0 && list_empty(&sem->wait_list)) { 249 if (sem->activity == 0 && list_empty(&sem->wait_list)) {
@@ -267,7 +254,6 @@ int fastcall __down_write_trylock(struct rw_semaphore *sem)
267 254
268 spin_unlock_irqrestore(&sem->wait_lock, flags); 255 spin_unlock_irqrestore(&sem->wait_lock, flags);
269 256
270 rwsemtrace(sem, "Leaving __down_write_trylock");
271 return ret; 257 return ret;
272} 258}
273 259
@@ -278,16 +264,12 @@ void fastcall __up_read(struct rw_semaphore *sem)
278{ 264{
279 unsigned long flags; 265 unsigned long flags;
280 266
281 rwsemtrace(sem, "Entering __up_read");
282
283 spin_lock_irqsave(&sem->wait_lock, flags); 267 spin_lock_irqsave(&sem->wait_lock, flags);
284 268
285 if (--sem->activity == 0 && !list_empty(&sem->wait_list)) 269 if (--sem->activity == 0 && !list_empty(&sem->wait_list))
286 sem = __rwsem_wake_one_writer(sem); 270 sem = __rwsem_wake_one_writer(sem);
287 271
288 spin_unlock_irqrestore(&sem->wait_lock, flags); 272 spin_unlock_irqrestore(&sem->wait_lock, flags);
289
290 rwsemtrace(sem, "Leaving __up_read");
291} 273}
292 274
293/* 275/*
@@ -297,8 +279,6 @@ void fastcall __up_write(struct rw_semaphore *sem)
297{ 279{
298 unsigned long flags; 280 unsigned long flags;
299 281
300 rwsemtrace(sem, "Entering __up_write");
301
302 spin_lock_irqsave(&sem->wait_lock, flags); 282 spin_lock_irqsave(&sem->wait_lock, flags);
303 283
304 sem->activity = 0; 284 sem->activity = 0;
@@ -306,8 +286,6 @@ void fastcall __up_write(struct rw_semaphore *sem)
306 sem = __rwsem_do_wake(sem, 1); 286 sem = __rwsem_do_wake(sem, 1);
307 287
308 spin_unlock_irqrestore(&sem->wait_lock, flags); 288 spin_unlock_irqrestore(&sem->wait_lock, flags);
309
310 rwsemtrace(sem, "Leaving __up_write");
311} 289}
312 290
313/* 291/*
@@ -318,8 +296,6 @@ void fastcall __downgrade_write(struct rw_semaphore *sem)
318{ 296{
319 unsigned long flags; 297 unsigned long flags;
320 298
321 rwsemtrace(sem, "Entering __downgrade_write");
322
323 spin_lock_irqsave(&sem->wait_lock, flags); 299 spin_lock_irqsave(&sem->wait_lock, flags);
324 300
325 sem->activity = 1; 301 sem->activity = 1;
@@ -327,18 +303,14 @@ void fastcall __downgrade_write(struct rw_semaphore *sem)
327 sem = __rwsem_do_wake(sem, 0); 303 sem = __rwsem_do_wake(sem, 0);
328 304
329 spin_unlock_irqrestore(&sem->wait_lock, flags); 305 spin_unlock_irqrestore(&sem->wait_lock, flags);
330
331 rwsemtrace(sem, "Leaving __downgrade_write");
332} 306}
333 307
334EXPORT_SYMBOL(init_rwsem); 308EXPORT_SYMBOL(__init_rwsem);
335EXPORT_SYMBOL(__down_read); 309EXPORT_SYMBOL(__down_read);
336EXPORT_SYMBOL(__down_read_trylock); 310EXPORT_SYMBOL(__down_read_trylock);
311EXPORT_SYMBOL(__down_write_nested);
337EXPORT_SYMBOL(__down_write); 312EXPORT_SYMBOL(__down_write);
338EXPORT_SYMBOL(__down_write_trylock); 313EXPORT_SYMBOL(__down_write_trylock);
339EXPORT_SYMBOL(__up_read); 314EXPORT_SYMBOL(__up_read);
340EXPORT_SYMBOL(__up_write); 315EXPORT_SYMBOL(__up_write);
341EXPORT_SYMBOL(__downgrade_write); 316EXPORT_SYMBOL(__downgrade_write);
342#if RWSEM_DEBUG
343EXPORT_SYMBOL(rwsemtrace);
344#endif
diff --git a/lib/rwsem.c b/lib/rwsem.c
index 62fa4eba9ffe..b322421c2969 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -8,6 +8,26 @@
8#include <linux/init.h> 8#include <linux/init.h>
9#include <linux/module.h> 9#include <linux/module.h>
10 10
11/*
12 * Initialize an rwsem:
13 */
14void __init_rwsem(struct rw_semaphore *sem, const char *name,
15 struct lock_class_key *key)
16{
17#ifdef CONFIG_DEBUG_LOCK_ALLOC
18 /*
19 * Make sure we are not reinitializing a held semaphore:
20 */
21 debug_check_no_locks_freed((void *)sem, sizeof(*sem));
22 lockdep_init_map(&sem->dep_map, name, key);
23#endif
24 sem->count = RWSEM_UNLOCKED_VALUE;
25 spin_lock_init(&sem->wait_lock);
26 INIT_LIST_HEAD(&sem->wait_list);
27}
28
29EXPORT_SYMBOL(__init_rwsem);
30
11struct rwsem_waiter { 31struct rwsem_waiter {
12 struct list_head list; 32 struct list_head list;
13 struct task_struct *task; 33 struct task_struct *task;
@@ -16,17 +36,6 @@ struct rwsem_waiter {
16#define RWSEM_WAITING_FOR_WRITE 0x00000002 36#define RWSEM_WAITING_FOR_WRITE 0x00000002
17}; 37};
18 38
19#if RWSEM_DEBUG
20#undef rwsemtrace
21void rwsemtrace(struct rw_semaphore *sem, const char *str)
22{
23 printk("sem=%p\n", sem);
24 printk("(sem)=%08lx\n", sem->count);
25 if (sem->debug)
26 printk("[%d] %s({%08lx})\n", current->pid, str, sem->count);
27}
28#endif
29
30/* 39/*
31 * handle the lock release when processes blocked on it that can now run 40 * handle the lock release when processes blocked on it that can now run
32 * - if we come here from up_xxxx(), then: 41 * - if we come here from up_xxxx(), then:
@@ -45,8 +54,6 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
45 struct list_head *next; 54 struct list_head *next;
46 signed long oldcount, woken, loop; 55 signed long oldcount, woken, loop;
47 56
48 rwsemtrace(sem, "Entering __rwsem_do_wake");
49
50 if (downgrading) 57 if (downgrading)
51 goto dont_wake_writers; 58 goto dont_wake_writers;
52 59
@@ -127,7 +134,6 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
127 next->prev = &sem->wait_list; 134 next->prev = &sem->wait_list;
128 135
129 out: 136 out:
130 rwsemtrace(sem, "Leaving __rwsem_do_wake");
131 return sem; 137 return sem;
132 138
133 /* undo the change to count, but check for a transition 1->0 */ 139 /* undo the change to count, but check for a transition 1->0 */
@@ -186,13 +192,9 @@ rwsem_down_read_failed(struct rw_semaphore *sem)
186{ 192{
187 struct rwsem_waiter waiter; 193 struct rwsem_waiter waiter;
188 194
189 rwsemtrace(sem, "Entering rwsem_down_read_failed");
190
191 waiter.flags = RWSEM_WAITING_FOR_READ; 195 waiter.flags = RWSEM_WAITING_FOR_READ;
192 rwsem_down_failed_common(sem, &waiter, 196 rwsem_down_failed_common(sem, &waiter,
193 RWSEM_WAITING_BIAS - RWSEM_ACTIVE_BIAS); 197 RWSEM_WAITING_BIAS - RWSEM_ACTIVE_BIAS);
194
195 rwsemtrace(sem, "Leaving rwsem_down_read_failed");
196 return sem; 198 return sem;
197} 199}
198 200
@@ -204,12 +206,9 @@ rwsem_down_write_failed(struct rw_semaphore *sem)
204{ 206{
205 struct rwsem_waiter waiter; 207 struct rwsem_waiter waiter;
206 208
207 rwsemtrace(sem, "Entering rwsem_down_write_failed");
208
209 waiter.flags = RWSEM_WAITING_FOR_WRITE; 209 waiter.flags = RWSEM_WAITING_FOR_WRITE;
210 rwsem_down_failed_common(sem, &waiter, -RWSEM_ACTIVE_BIAS); 210 rwsem_down_failed_common(sem, &waiter, -RWSEM_ACTIVE_BIAS);
211 211
212 rwsemtrace(sem, "Leaving rwsem_down_write_failed");
213 return sem; 212 return sem;
214} 213}
215 214
@@ -221,8 +220,6 @@ struct rw_semaphore fastcall *rwsem_wake(struct rw_semaphore *sem)
221{ 220{
222 unsigned long flags; 221 unsigned long flags;
223 222
224 rwsemtrace(sem, "Entering rwsem_wake");
225
226 spin_lock_irqsave(&sem->wait_lock, flags); 223 spin_lock_irqsave(&sem->wait_lock, flags);
227 224
228 /* do nothing if list empty */ 225 /* do nothing if list empty */
@@ -231,8 +228,6 @@ struct rw_semaphore fastcall *rwsem_wake(struct rw_semaphore *sem)
231 228
232 spin_unlock_irqrestore(&sem->wait_lock, flags); 229 spin_unlock_irqrestore(&sem->wait_lock, flags);
233 230
234 rwsemtrace(sem, "Leaving rwsem_wake");
235
236 return sem; 231 return sem;
237} 232}
238 233
@@ -245,8 +240,6 @@ struct rw_semaphore fastcall *rwsem_downgrade_wake(struct rw_semaphore *sem)
245{ 240{
246 unsigned long flags; 241 unsigned long flags;
247 242
248 rwsemtrace(sem, "Entering rwsem_downgrade_wake");
249
250 spin_lock_irqsave(&sem->wait_lock, flags); 243 spin_lock_irqsave(&sem->wait_lock, flags);
251 244
252 /* do nothing if list empty */ 245 /* do nothing if list empty */
@@ -255,7 +248,6 @@ struct rw_semaphore fastcall *rwsem_downgrade_wake(struct rw_semaphore *sem)
255 248
256 spin_unlock_irqrestore(&sem->wait_lock, flags); 249 spin_unlock_irqrestore(&sem->wait_lock, flags);
257 250
258 rwsemtrace(sem, "Leaving rwsem_downgrade_wake");
259 return sem; 251 return sem;
260} 252}
261 253
@@ -263,6 +255,3 @@ EXPORT_SYMBOL(rwsem_down_read_failed);
263EXPORT_SYMBOL(rwsem_down_write_failed); 255EXPORT_SYMBOL(rwsem_down_write_failed);
264EXPORT_SYMBOL(rwsem_wake); 256EXPORT_SYMBOL(rwsem_wake);
265EXPORT_SYMBOL(rwsem_downgrade_wake); 257EXPORT_SYMBOL(rwsem_downgrade_wake);
266#if RWSEM_DEBUG
267EXPORT_SYMBOL(rwsemtrace);
268#endif
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index 93c15ee3f8ea..3d9c4dc965ed 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -8,38 +8,71 @@
8 8
9#include <linux/spinlock.h> 9#include <linux/spinlock.h>
10#include <linux/interrupt.h> 10#include <linux/interrupt.h>
11#include <linux/debug_locks.h>
11#include <linux/delay.h> 12#include <linux/delay.h>
13#include <linux/module.h>
14
15void __spin_lock_init(spinlock_t *lock, const char *name,
16 struct lock_class_key *key)
17{
18#ifdef CONFIG_DEBUG_LOCK_ALLOC
19 /*
20 * Make sure we are not reinitializing a held lock:
21 */
22 debug_check_no_locks_freed((void *)lock, sizeof(*lock));
23 lockdep_init_map(&lock->dep_map, name, key);
24#endif
25 lock->raw_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
26 lock->magic = SPINLOCK_MAGIC;
27 lock->owner = SPINLOCK_OWNER_INIT;
28 lock->owner_cpu = -1;
29}
30
31EXPORT_SYMBOL(__spin_lock_init);
32
33void __rwlock_init(rwlock_t *lock, const char *name,
34 struct lock_class_key *key)
35{
36#ifdef CONFIG_DEBUG_LOCK_ALLOC
37 /*
38 * Make sure we are not reinitializing a held lock:
39 */
40 debug_check_no_locks_freed((void *)lock, sizeof(*lock));
41 lockdep_init_map(&lock->dep_map, name, key);
42#endif
43 lock->raw_lock = (raw_rwlock_t) __RAW_RW_LOCK_UNLOCKED;
44 lock->magic = RWLOCK_MAGIC;
45 lock->owner = SPINLOCK_OWNER_INIT;
46 lock->owner_cpu = -1;
47}
48
49EXPORT_SYMBOL(__rwlock_init);
12 50
13static void spin_bug(spinlock_t *lock, const char *msg) 51static void spin_bug(spinlock_t *lock, const char *msg)
14{ 52{
15 static long print_once = 1;
16 struct task_struct *owner = NULL; 53 struct task_struct *owner = NULL;
17 54
18 if (xchg(&print_once, 0)) { 55 if (!debug_locks_off())
19 if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT) 56 return;
20 owner = lock->owner; 57
21 printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n", 58 if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT)
22 msg, raw_smp_processor_id(), 59 owner = lock->owner;
23 current->comm, current->pid); 60 printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n",
24 printk(KERN_EMERG " lock: %p, .magic: %08x, .owner: %s/%d, " 61 msg, raw_smp_processor_id(),
25 ".owner_cpu: %d\n", 62 current->comm, current->pid);
26 lock, lock->magic, 63 printk(KERN_EMERG " lock: %p, .magic: %08x, .owner: %s/%d, "
27 owner ? owner->comm : "<none>", 64 ".owner_cpu: %d\n",
28 owner ? owner->pid : -1, 65 lock, lock->magic,
29 lock->owner_cpu); 66 owner ? owner->comm : "<none>",
30 dump_stack(); 67 owner ? owner->pid : -1,
31#ifdef CONFIG_SMP 68 lock->owner_cpu);
32 /* 69 dump_stack();
33 * We cannot continue on SMP:
34 */
35// panic("bad locking");
36#endif
37 }
38} 70}
39 71
40#define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg) 72#define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg)
41 73
42static inline void debug_spin_lock_before(spinlock_t *lock) 74static inline void
75debug_spin_lock_before(spinlock_t *lock)
43{ 76{
44 SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic"); 77 SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
45 SPIN_BUG_ON(lock->owner == current, lock, "recursion"); 78 SPIN_BUG_ON(lock->owner == current, lock, "recursion");
@@ -118,20 +151,13 @@ void _raw_spin_unlock(spinlock_t *lock)
118 151
119static void rwlock_bug(rwlock_t *lock, const char *msg) 152static void rwlock_bug(rwlock_t *lock, const char *msg)
120{ 153{
121 static long print_once = 1; 154 if (!debug_locks_off())
122 155 return;
123 if (xchg(&print_once, 0)) { 156
124 printk(KERN_EMERG "BUG: rwlock %s on CPU#%d, %s/%d, %p\n", 157 printk(KERN_EMERG "BUG: rwlock %s on CPU#%d, %s/%d, %p\n",
125 msg, raw_smp_processor_id(), current->comm, 158 msg, raw_smp_processor_id(), current->comm,
126 current->pid, lock); 159 current->pid, lock);
127 dump_stack(); 160 dump_stack();
128#ifdef CONFIG_SMP
129 /*
130 * We cannot continue on SMP:
131 */
132 panic("bad locking");
133#endif
134 }
135} 161}
136 162
137#define RWLOCK_BUG_ON(cond, lock, msg) if (unlikely(cond)) rwlock_bug(lock, msg) 163#define RWLOCK_BUG_ON(cond, lock, msg) if (unlikely(cond)) rwlock_bug(lock, msg)
diff --git a/mm/memory.c b/mm/memory.c
index 7e2a4b1580e3..c1e14c9e67e4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -503,7 +503,7 @@ again:
503 return -ENOMEM; 503 return -ENOMEM;
504 src_pte = pte_offset_map_nested(src_pmd, addr); 504 src_pte = pte_offset_map_nested(src_pmd, addr);
505 src_ptl = pte_lockptr(src_mm, src_pmd); 505 src_ptl = pte_lockptr(src_mm, src_pmd);
506 spin_lock(src_ptl); 506 spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
507 507
508 do { 508 do {
509 /* 509 /*
diff --git a/mm/mremap.c b/mm/mremap.c
index 1903bdf65e42..7c15cf3373ad 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -97,7 +97,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
97 new_pte = pte_offset_map_nested(new_pmd, new_addr); 97 new_pte = pte_offset_map_nested(new_pmd, new_addr);
98 new_ptl = pte_lockptr(mm, new_pmd); 98 new_ptl = pte_lockptr(mm, new_pmd);
99 if (new_ptl != old_ptl) 99 if (new_ptl != old_ptl)
100 spin_lock(new_ptl); 100 spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
101 101
102 for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, 102 for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
103 new_pte++, new_addr += PAGE_SIZE) { 103 new_pte++, new_addr += PAGE_SIZE) {
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index d46ed0f1dc06..b9af136e5cfa 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -225,7 +225,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
225 * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that 225 * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that
226 * we select a process with CAP_SYS_RAW_IO set). 226 * we select a process with CAP_SYS_RAW_IO set).
227 */ 227 */
228static void __oom_kill_task(task_t *p, const char *message) 228static void __oom_kill_task(struct task_struct *p, const char *message)
229{ 229{
230 if (p->pid == 1) { 230 if (p->pid == 1) {
231 WARN_ON(1); 231 WARN_ON(1);
@@ -255,10 +255,10 @@ static void __oom_kill_task(task_t *p, const char *message)
255 force_sig(SIGKILL, p); 255 force_sig(SIGKILL, p);
256} 256}
257 257
258static int oom_kill_task(task_t *p, const char *message) 258static int oom_kill_task(struct task_struct *p, const char *message)
259{ 259{
260 struct mm_struct *mm; 260 struct mm_struct *mm;
261 task_t * g, * q; 261 struct task_struct *g, *q;
262 262
263 mm = p->mm; 263 mm = p->mm;
264 264
@@ -316,7 +316,7 @@ static int oom_kill_process(struct task_struct *p, unsigned long points,
316 */ 316 */
317void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) 317void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
318{ 318{
319 task_t *p; 319 struct task_struct *p;
320 unsigned long points = 0; 320 unsigned long points = 0;
321 321
322 if (printk_ratelimit()) { 322 if (printk_ratelimit()) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3e792a583f3b..54a4f5375bba 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2005,6 +2005,10 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
2005 2005
2006 zone->spanned_pages = size; 2006 zone->spanned_pages = size;
2007 zone->present_pages = realsize; 2007 zone->present_pages = realsize;
2008#ifdef CONFIG_NUMA
2009 zone->min_unmapped_ratio = (realsize*sysctl_min_unmapped_ratio)
2010 / 100;
2011#endif
2008 zone->name = zone_names[j]; 2012 zone->name = zone_names[j];
2009 spin_lock_init(&zone->lock); 2013 spin_lock_init(&zone->lock);
2010 spin_lock_init(&zone->lru_lock); 2014 spin_lock_init(&zone->lru_lock);
@@ -2298,6 +2302,24 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
2298 return 0; 2302 return 0;
2299} 2303}
2300 2304
2305#ifdef CONFIG_NUMA
2306int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
2307 struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
2308{
2309 struct zone *zone;
2310 int rc;
2311
2312 rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
2313 if (rc)
2314 return rc;
2315
2316 for_each_zone(zone)
2317 zone->min_unmapped_ratio = (zone->present_pages *
2318 sysctl_min_unmapped_ratio) / 100;
2319 return 0;
2320}
2321#endif
2322
2301/* 2323/*
2302 * lowmem_reserve_ratio_sysctl_handler - just a wrapper around 2324 * lowmem_reserve_ratio_sysctl_handler - just a wrapper around
2303 * proc_dointvec() so that we can call setup_per_zone_lowmem_reserve() 2325 * proc_dointvec() so that we can call setup_per_zone_lowmem_reserve()
diff --git a/mm/slab.c b/mm/slab.c
index 3936af344542..85c2e03098a7 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1021,7 +1021,8 @@ static void drain_alien_cache(struct kmem_cache *cachep,
1021 } 1021 }
1022} 1022}
1023 1023
1024static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) 1024static inline int cache_free_alien(struct kmem_cache *cachep, void *objp,
1025 int nesting)
1025{ 1026{
1026 struct slab *slabp = virt_to_slab(objp); 1027 struct slab *slabp = virt_to_slab(objp);
1027 int nodeid = slabp->nodeid; 1028 int nodeid = slabp->nodeid;
@@ -1039,7 +1040,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1039 STATS_INC_NODEFREES(cachep); 1040 STATS_INC_NODEFREES(cachep);
1040 if (l3->alien && l3->alien[nodeid]) { 1041 if (l3->alien && l3->alien[nodeid]) {
1041 alien = l3->alien[nodeid]; 1042 alien = l3->alien[nodeid];
1042 spin_lock(&alien->lock); 1043 spin_lock_nested(&alien->lock, nesting);
1043 if (unlikely(alien->avail == alien->limit)) { 1044 if (unlikely(alien->avail == alien->limit)) {
1044 STATS_INC_ACOVERFLOW(cachep); 1045 STATS_INC_ACOVERFLOW(cachep);
1045 __drain_alien_cache(cachep, alien, nodeid); 1046 __drain_alien_cache(cachep, alien, nodeid);
@@ -1068,7 +1069,8 @@ static inline void free_alien_cache(struct array_cache **ac_ptr)
1068{ 1069{
1069} 1070}
1070 1071
1071static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) 1072static inline int cache_free_alien(struct kmem_cache *cachep, void *objp,
1073 int nesting)
1072{ 1074{
1073 return 0; 1075 return 0;
1074} 1076}
@@ -1272,6 +1274,11 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
1272 1274
1273 local_irq_disable(); 1275 local_irq_disable();
1274 memcpy(ptr, list, sizeof(struct kmem_list3)); 1276 memcpy(ptr, list, sizeof(struct kmem_list3));
1277 /*
1278 * Do not assume that spinlocks can be initialized via memcpy:
1279 */
1280 spin_lock_init(&ptr->list_lock);
1281
1275 MAKE_ALL_LISTS(cachep, ptr, nodeid); 1282 MAKE_ALL_LISTS(cachep, ptr, nodeid);
1276 cachep->nodelists[nodeid] = ptr; 1283 cachep->nodelists[nodeid] = ptr;
1277 local_irq_enable(); 1284 local_irq_enable();
@@ -1398,7 +1405,7 @@ void __init kmem_cache_init(void)
1398 } 1405 }
1399 /* 4) Replace the bootstrap head arrays */ 1406 /* 4) Replace the bootstrap head arrays */
1400 { 1407 {
1401 void *ptr; 1408 struct array_cache *ptr;
1402 1409
1403 ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); 1410 ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
1404 1411
@@ -1406,6 +1413,11 @@ void __init kmem_cache_init(void)
1406 BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); 1413 BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
1407 memcpy(ptr, cpu_cache_get(&cache_cache), 1414 memcpy(ptr, cpu_cache_get(&cache_cache),
1408 sizeof(struct arraycache_init)); 1415 sizeof(struct arraycache_init));
1416 /*
1417 * Do not assume that spinlocks can be initialized via memcpy:
1418 */
1419 spin_lock_init(&ptr->lock);
1420
1409 cache_cache.array[smp_processor_id()] = ptr; 1421 cache_cache.array[smp_processor_id()] = ptr;
1410 local_irq_enable(); 1422 local_irq_enable();
1411 1423
@@ -1416,6 +1428,11 @@ void __init kmem_cache_init(void)
1416 != &initarray_generic.cache); 1428 != &initarray_generic.cache);
1417 memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), 1429 memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
1418 sizeof(struct arraycache_init)); 1430 sizeof(struct arraycache_init));
1431 /*
1432 * Do not assume that spinlocks can be initialized via memcpy:
1433 */
1434 spin_lock_init(&ptr->lock);
1435
1419 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = 1436 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
1420 ptr; 1437 ptr;
1421 local_irq_enable(); 1438 local_irq_enable();
@@ -1743,6 +1760,8 @@ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp)
1743} 1760}
1744#endif 1761#endif
1745 1762
1763static void __cache_free(struct kmem_cache *cachep, void *objp, int nesting);
1764
1746/** 1765/**
1747 * slab_destroy - destroy and release all objects in a slab 1766 * slab_destroy - destroy and release all objects in a slab
1748 * @cachep: cache pointer being destroyed 1767 * @cachep: cache pointer being destroyed
@@ -1766,8 +1785,17 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
1766 call_rcu(&slab_rcu->head, kmem_rcu_free); 1785 call_rcu(&slab_rcu->head, kmem_rcu_free);
1767 } else { 1786 } else {
1768 kmem_freepages(cachep, addr); 1787 kmem_freepages(cachep, addr);
1769 if (OFF_SLAB(cachep)) 1788 if (OFF_SLAB(cachep)) {
1770 kmem_cache_free(cachep->slabp_cache, slabp); 1789 unsigned long flags;
1790
1791 /*
1792 * lockdep: we may nest inside an already held
1793 * ac->lock, so pass in a nesting flag:
1794 */
1795 local_irq_save(flags);
1796 __cache_free(cachep->slabp_cache, slabp, 1);
1797 local_irq_restore(flags);
1798 }
1771 } 1799 }
1772} 1800}
1773 1801
@@ -3072,7 +3100,16 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3072 if (slabp->inuse == 0) { 3100 if (slabp->inuse == 0) {
3073 if (l3->free_objects > l3->free_limit) { 3101 if (l3->free_objects > l3->free_limit) {
3074 l3->free_objects -= cachep->num; 3102 l3->free_objects -= cachep->num;
3103 /*
3104 * It is safe to drop the lock. The slab is
3105 * no longer linked to the cache. cachep
3106 * cannot disappear - we are using it and
3107 * all destruction of caches must be
3108 * serialized properly by the user.
3109 */
3110 spin_unlock(&l3->list_lock);
3075 slab_destroy(cachep, slabp); 3111 slab_destroy(cachep, slabp);
3112 spin_lock(&l3->list_lock);
3076 } else { 3113 } else {
3077 list_add(&slabp->list, &l3->slabs_free); 3114 list_add(&slabp->list, &l3->slabs_free);
3078 } 3115 }
@@ -3098,7 +3135,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
3098#endif 3135#endif
3099 check_irq_off(); 3136 check_irq_off();
3100 l3 = cachep->nodelists[node]; 3137 l3 = cachep->nodelists[node];
3101 spin_lock(&l3->list_lock); 3138 spin_lock_nested(&l3->list_lock, SINGLE_DEPTH_NESTING);
3102 if (l3->shared) { 3139 if (l3->shared) {
3103 struct array_cache *shared_array = l3->shared; 3140 struct array_cache *shared_array = l3->shared;
3104 int max = shared_array->limit - shared_array->avail; 3141 int max = shared_array->limit - shared_array->avail;
@@ -3141,14 +3178,14 @@ free_done:
3141 * Release an obj back to its cache. If the obj has a constructed state, it must 3178 * Release an obj back to its cache. If the obj has a constructed state, it must
3142 * be in this state _before_ it is released. Called with disabled ints. 3179 * be in this state _before_ it is released. Called with disabled ints.
3143 */ 3180 */
3144static inline void __cache_free(struct kmem_cache *cachep, void *objp) 3181static void __cache_free(struct kmem_cache *cachep, void *objp, int nesting)
3145{ 3182{
3146 struct array_cache *ac = cpu_cache_get(cachep); 3183 struct array_cache *ac = cpu_cache_get(cachep);
3147 3184
3148 check_irq_off(); 3185 check_irq_off();
3149 objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); 3186 objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
3150 3187
3151 if (cache_free_alien(cachep, objp)) 3188 if (cache_free_alien(cachep, objp, nesting))
3152 return; 3189 return;
3153 3190
3154 if (likely(ac->avail < ac->limit)) { 3191 if (likely(ac->avail < ac->limit)) {
@@ -3387,7 +3424,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
3387 BUG_ON(virt_to_cache(objp) != cachep); 3424 BUG_ON(virt_to_cache(objp) != cachep);
3388 3425
3389 local_irq_save(flags); 3426 local_irq_save(flags);
3390 __cache_free(cachep, objp); 3427 __cache_free(cachep, objp, 0);
3391 local_irq_restore(flags); 3428 local_irq_restore(flags);
3392} 3429}
3393EXPORT_SYMBOL(kmem_cache_free); 3430EXPORT_SYMBOL(kmem_cache_free);
@@ -3412,7 +3449,7 @@ void kfree(const void *objp)
3412 kfree_debugcheck(objp); 3449 kfree_debugcheck(objp);
3413 c = virt_to_cache(objp); 3450 c = virt_to_cache(objp);
3414 debug_check_no_locks_freed(objp, obj_size(c)); 3451 debug_check_no_locks_freed(objp, obj_size(c));
3415 __cache_free(c, (void *)objp); 3452 __cache_free(c, (void *)objp, 0);
3416 local_irq_restore(flags); 3453 local_irq_restore(flags);
3417} 3454}
3418EXPORT_SYMBOL(kfree); 3455EXPORT_SYMBOL(kfree);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index fccbd9bba77b..5f7cf2a4cb55 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -38,7 +38,7 @@ static struct backing_dev_info swap_backing_dev_info = {
38 38
39struct address_space swapper_space = { 39struct address_space swapper_space = {
40 .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), 40 .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
41 .tree_lock = RW_LOCK_UNLOCKED, 41 .tree_lock = __RW_LOCK_UNLOCKED(swapper_space.tree_lock),
42 .a_ops = &swap_aops, 42 .a_ops = &swap_aops,
43 .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), 43 .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear),
44 .backing_dev_info = &swap_backing_dev_info, 44 .backing_dev_info = &swap_backing_dev_info,
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 35f8553f893a..7b450798b458 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -330,6 +330,8 @@ void __vunmap(void *addr, int deallocate_pages)
330 return; 330 return;
331 } 331 }
332 332
333 debug_check_no_locks_freed(addr, area->size);
334
333 if (deallocate_pages) { 335 if (deallocate_pages) {
334 int i; 336 int i;
335 337
diff --git a/mm/vmscan.c b/mm/vmscan.c
index ff2ebe9458a3..5d4c4d02254d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1503,10 +1503,6 @@ module_init(kswapd_init)
1503 * 1503 *
1504 * If non-zero call zone_reclaim when the number of free pages falls below 1504 * If non-zero call zone_reclaim when the number of free pages falls below
1505 * the watermarks. 1505 * the watermarks.
1506 *
1507 * In the future we may add flags to the mode. However, the page allocator
1508 * should only have to check that zone_reclaim_mode != 0 before calling
1509 * zone_reclaim().
1510 */ 1506 */
1511int zone_reclaim_mode __read_mostly; 1507int zone_reclaim_mode __read_mostly;
1512 1508
@@ -1524,6 +1520,12 @@ int zone_reclaim_mode __read_mostly;
1524#define ZONE_RECLAIM_PRIORITY 4 1520#define ZONE_RECLAIM_PRIORITY 4
1525 1521
1526/* 1522/*
1523 * Percentage of pages in a zone that must be unmapped for zone_reclaim to
1524 * occur.
1525 */
1526int sysctl_min_unmapped_ratio = 1;
1527
1528/*
1527 * Try to free up some pages from this zone through reclaim. 1529 * Try to free up some pages from this zone through reclaim.
1528 */ 1530 */
1529static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) 1531static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
@@ -1590,18 +1592,17 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
1590 int node_id; 1592 int node_id;
1591 1593
1592 /* 1594 /*
1593 * Do not reclaim if there are not enough reclaimable pages in this 1595 * Zone reclaim reclaims unmapped file backed pages.
1594 * zone that would satify this allocations.
1595 * 1596 *
1596 * All unmapped pagecache pages are reclaimable. 1597 * A small portion of unmapped file backed pages is needed for
1597 * 1598 * file I/O otherwise pages read by file I/O will be immediately
1598 * Both counters may be temporarily off a bit so we use 1599 * thrown out if the zone is overallocated. So we do not reclaim
1599 * SWAP_CLUSTER_MAX as the boundary. It may also be good to 1600 * if less than a specified percentage of the zone is used by
1600 * leave a few frequently used unmapped pagecache pages around. 1601 * unmapped file backed pages.
1601 */ 1602 */
1602 if (zone_page_state(zone, NR_FILE_PAGES) - 1603 if (zone_page_state(zone, NR_FILE_PAGES) -
1603 zone_page_state(zone, NR_FILE_MAPPED) < SWAP_CLUSTER_MAX) 1604 zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_ratio)
1604 return 0; 1605 return 0;
1605 1606
1606 /* 1607 /*
1607 * Avoid concurrent zone reclaims, do not reclaim in a zone that does 1608 * Avoid concurrent zone reclaims, do not reclaim in a zone that does
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 3948949a609a..458031bfff55 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -364,6 +364,14 @@ static void vlan_transfer_operstate(const struct net_device *dev, struct net_dev
364 } 364 }
365} 365}
366 366
367/*
368 * vlan network devices have devices nesting below it, and are a special
369 * "super class" of normal network devices; split their locks off into a
370 * separate class since they always nest.
371 */
372static struct lock_class_key vlan_netdev_xmit_lock_key;
373
374
367/* Attach a VLAN device to a mac address (ie Ethernet Card). 375/* Attach a VLAN device to a mac address (ie Ethernet Card).
368 * Returns the device that was created, or NULL if there was 376 * Returns the device that was created, or NULL if there was
369 * an error of some kind. 377 * an error of some kind.
@@ -460,6 +468,7 @@ static struct net_device *register_vlan_device(const char *eth_IF_name,
460 468
461 new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name, 469 new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name,
462 vlan_setup); 470 vlan_setup);
471
463 if (new_dev == NULL) 472 if (new_dev == NULL)
464 goto out_unlock; 473 goto out_unlock;
465 474
@@ -518,6 +527,8 @@ static struct net_device *register_vlan_device(const char *eth_IF_name,
518 if (register_netdevice(new_dev)) 527 if (register_netdevice(new_dev))
519 goto out_free_newdev; 528 goto out_free_newdev;
520 529
530 lockdep_set_class(&new_dev->_xmit_lock, &vlan_netdev_xmit_lock_key);
531
521 new_dev->iflink = real_dev->ifindex; 532 new_dev->iflink = real_dev->ifindex;
522 vlan_transfer_operstate(real_dev, new_dev); 533 vlan_transfer_operstate(real_dev, new_dev);
523 linkwatch_fire_event(new_dev); /* _MUST_ call rfc2863_policy() */ 534 linkwatch_fire_event(new_dev); /* _MUST_ call rfc2863_policy() */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7cfbdb215ba2..44f6a181a754 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -71,6 +71,13 @@ static kmem_cache_t *skbuff_head_cache __read_mostly;
71static kmem_cache_t *skbuff_fclone_cache __read_mostly; 71static kmem_cache_t *skbuff_fclone_cache __read_mostly;
72 72
73/* 73/*
74 * lockdep: lock class key used by skb_queue_head_init():
75 */
76struct lock_class_key skb_queue_lock_key;
77
78EXPORT_SYMBOL(skb_queue_lock_key);
79
80/*
74 * Keep out-of-line to prevent kernel bloat. 81 * Keep out-of-line to prevent kernel bloat.
75 * __builtin_return_address is not used because it is not always 82 * __builtin_return_address is not used because it is not always
76 * reliable. 83 * reliable.
diff --git a/net/core/sock.c b/net/core/sock.c
index 533b9317144b..51fcfbc041a7 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -129,6 +129,53 @@
129#include <net/tcp.h> 129#include <net/tcp.h>
130#endif 130#endif
131 131
132/*
133 * Each address family might have different locking rules, so we have
134 * one slock key per address family:
135 */
136static struct lock_class_key af_family_keys[AF_MAX];
137static struct lock_class_key af_family_slock_keys[AF_MAX];
138
139#ifdef CONFIG_DEBUG_LOCK_ALLOC
140/*
141 * Make lock validator output more readable. (we pre-construct these
142 * strings build-time, so that runtime initialization of socket
143 * locks is fast):
144 */
145static const char *af_family_key_strings[AF_MAX+1] = {
146 "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
147 "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
148 "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
149 "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
150 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
151 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
152 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
153 "sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
154 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
155 "sk_lock-27" , "sk_lock-28" , "sk_lock-29" ,
156 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-AF_MAX"
157};
158static const char *af_family_slock_key_strings[AF_MAX+1] = {
159 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
160 "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
161 "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
162 "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
163 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
164 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
165 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
166 "slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" ,
167 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
168 "slock-27" , "slock-28" , "slock-29" ,
169 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_MAX"
170};
171#endif
172
173/*
174 * sk_callback_lock locking rules are per-address-family,
175 * so split the lock classes by using a per-AF key:
176 */
177static struct lock_class_key af_callback_keys[AF_MAX];
178
132/* Take into consideration the size of the struct sk_buff overhead in the 179/* Take into consideration the size of the struct sk_buff overhead in the
133 * determination of these values, since that is non-constant across 180 * determination of these values, since that is non-constant across
134 * platforms. This makes socket queueing behavior and performance 181 * platforms. This makes socket queueing behavior and performance
@@ -237,9 +284,16 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb)
237 skb->dev = NULL; 284 skb->dev = NULL;
238 285
239 bh_lock_sock(sk); 286 bh_lock_sock(sk);
240 if (!sock_owned_by_user(sk)) 287 if (!sock_owned_by_user(sk)) {
288 /*
289 * trylock + unlock semantics:
290 */
291 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
292
241 rc = sk->sk_backlog_rcv(sk, skb); 293 rc = sk->sk_backlog_rcv(sk, skb);
242 else 294
295 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
296 } else
243 sk_add_backlog(sk, skb); 297 sk_add_backlog(sk, skb);
244 bh_unlock_sock(sk); 298 bh_unlock_sock(sk);
245out: 299out:
@@ -749,6 +803,33 @@ lenout:
749 return 0; 803 return 0;
750} 804}
751 805
806/*
807 * Initialize an sk_lock.
808 *
809 * (We also register the sk_lock with the lock validator.)
810 */
811static void inline sock_lock_init(struct sock *sk)
812{
813 spin_lock_init(&sk->sk_lock.slock);
814 sk->sk_lock.owner = NULL;
815 init_waitqueue_head(&sk->sk_lock.wq);
816 /*
817 * Make sure we are not reinitializing a held lock:
818 */
819 debug_check_no_locks_freed((void *)&sk->sk_lock, sizeof(sk->sk_lock));
820
821 /*
822 * Mark both the sk_lock and the sk_lock.slock as a
823 * per-address-family lock class:
824 */
825 lockdep_set_class_and_name(&sk->sk_lock.slock,
826 af_family_slock_keys + sk->sk_family,
827 af_family_slock_key_strings[sk->sk_family]);
828 lockdep_init_map(&sk->sk_lock.dep_map,
829 af_family_key_strings[sk->sk_family],
830 af_family_keys + sk->sk_family);
831}
832
752/** 833/**
753 * sk_alloc - All socket objects are allocated here 834 * sk_alloc - All socket objects are allocated here
754 * @family: protocol family 835 * @family: protocol family
@@ -848,6 +929,8 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
848 929
849 rwlock_init(&newsk->sk_dst_lock); 930 rwlock_init(&newsk->sk_dst_lock);
850 rwlock_init(&newsk->sk_callback_lock); 931 rwlock_init(&newsk->sk_callback_lock);
932 lockdep_set_class(&newsk->sk_callback_lock,
933 af_callback_keys + newsk->sk_family);
851 934
852 newsk->sk_dst_cache = NULL; 935 newsk->sk_dst_cache = NULL;
853 newsk->sk_wmem_queued = 0; 936 newsk->sk_wmem_queued = 0;
@@ -1422,6 +1505,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
1422 1505
1423 rwlock_init(&sk->sk_dst_lock); 1506 rwlock_init(&sk->sk_dst_lock);
1424 rwlock_init(&sk->sk_callback_lock); 1507 rwlock_init(&sk->sk_callback_lock);
1508 lockdep_set_class(&sk->sk_callback_lock,
1509 af_callback_keys + sk->sk_family);
1425 1510
1426 sk->sk_state_change = sock_def_wakeup; 1511 sk->sk_state_change = sock_def_wakeup;
1427 sk->sk_data_ready = sock_def_readable; 1512 sk->sk_data_ready = sock_def_readable;
@@ -1449,24 +1534,34 @@ void sock_init_data(struct socket *sock, struct sock *sk)
1449void fastcall lock_sock(struct sock *sk) 1534void fastcall lock_sock(struct sock *sk)
1450{ 1535{
1451 might_sleep(); 1536 might_sleep();
1452 spin_lock_bh(&(sk->sk_lock.slock)); 1537 spin_lock_bh(&sk->sk_lock.slock);
1453 if (sk->sk_lock.owner) 1538 if (sk->sk_lock.owner)
1454 __lock_sock(sk); 1539 __lock_sock(sk);
1455 sk->sk_lock.owner = (void *)1; 1540 sk->sk_lock.owner = (void *)1;
1456 spin_unlock_bh(&(sk->sk_lock.slock)); 1541 spin_unlock(&sk->sk_lock.slock);
1542 /*
1543 * The sk_lock has mutex_lock() semantics here:
1544 */
1545 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
1546 local_bh_enable();
1457} 1547}
1458 1548
1459EXPORT_SYMBOL(lock_sock); 1549EXPORT_SYMBOL(lock_sock);
1460 1550
1461void fastcall release_sock(struct sock *sk) 1551void fastcall release_sock(struct sock *sk)
1462{ 1552{
1463 spin_lock_bh(&(sk->sk_lock.slock)); 1553 /*
1554 * The sk_lock has mutex_unlock() semantics:
1555 */
1556 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
1557
1558 spin_lock_bh(&sk->sk_lock.slock);
1464 if (sk->sk_backlog.tail) 1559 if (sk->sk_backlog.tail)
1465 __release_sock(sk); 1560 __release_sock(sk);
1466 sk->sk_lock.owner = NULL; 1561 sk->sk_lock.owner = NULL;
1467 if (waitqueue_active(&(sk->sk_lock.wq))) 1562 if (waitqueue_active(&sk->sk_lock.wq))
1468 wake_up(&(sk->sk_lock.wq)); 1563 wake_up(&sk->sk_lock.wq);
1469 spin_unlock_bh(&(sk->sk_lock.slock)); 1564 spin_unlock_bh(&sk->sk_lock.slock);
1470} 1565}
1471EXPORT_SYMBOL(release_sock); 1566EXPORT_SYMBOL(release_sock);
1472 1567
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index da44fabf4dc5..2dc6dbb28467 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -205,21 +205,27 @@ __u8 ip_tos2prio[16] = {
205struct rt_hash_bucket { 205struct rt_hash_bucket {
206 struct rtable *chain; 206 struct rtable *chain;
207}; 207};
208#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) 208#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
209 defined(CONFIG_PROVE_LOCKING)
209/* 210/*
210 * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks 211 * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks
211 * The size of this table is a power of two and depends on the number of CPUS. 212 * The size of this table is a power of two and depends on the number of CPUS.
213 * (on lockdep we have a quite big spinlock_t, so keep the size down there)
212 */ 214 */
213#if NR_CPUS >= 32 215#ifdef CONFIG_LOCKDEP
214#define RT_HASH_LOCK_SZ 4096 216# define RT_HASH_LOCK_SZ 256
215#elif NR_CPUS >= 16
216#define RT_HASH_LOCK_SZ 2048
217#elif NR_CPUS >= 8
218#define RT_HASH_LOCK_SZ 1024
219#elif NR_CPUS >= 4
220#define RT_HASH_LOCK_SZ 512
221#else 217#else
222#define RT_HASH_LOCK_SZ 256 218# if NR_CPUS >= 32
219# define RT_HASH_LOCK_SZ 4096
220# elif NR_CPUS >= 16
221# define RT_HASH_LOCK_SZ 2048
222# elif NR_CPUS >= 8
223# define RT_HASH_LOCK_SZ 1024
224# elif NR_CPUS >= 4
225# define RT_HASH_LOCK_SZ 512
226# else
227# define RT_HASH_LOCK_SZ 256
228# endif
223#endif 229#endif
224 230
225static spinlock_t *rt_hash_locks; 231static spinlock_t *rt_hash_locks;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8355b729fa95..5a886e6efbbe 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -90,7 +90,7 @@ static struct socket *tcp_socket;
90void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); 90void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
91 91
92struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { 92struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
93 .lhash_lock = RW_LOCK_UNLOCKED, 93 .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
94 .lhash_users = ATOMIC_INIT(0), 94 .lhash_users = ATOMIC_INIT(0),
95 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), 95 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
96}; 96};
@@ -1090,7 +1090,7 @@ process:
1090 1090
1091 skb->dev = NULL; 1091 skb->dev = NULL;
1092 1092
1093 bh_lock_sock(sk); 1093 bh_lock_sock_nested(sk);
1094 ret = 0; 1094 ret = 0;
1095 if (!sock_owned_by_user(sk)) { 1095 if (!sock_owned_by_user(sk)) {
1096#ifdef CONFIG_NET_DMA 1096#ifdef CONFIG_NET_DMA
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e0851697ad5e..0ccb7cb22b15 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -40,7 +40,7 @@ int sysctl_tcp_abort_on_overflow;
40struct inet_timewait_death_row tcp_death_row = { 40struct inet_timewait_death_row tcp_death_row = {
41 .sysctl_max_tw_buckets = NR_FILE * 2, 41 .sysctl_max_tw_buckets = NR_FILE * 2,
42 .period = TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS, 42 .period = TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
43 .death_lock = SPIN_LOCK_UNLOCKED, 43 .death_lock = __SPIN_LOCK_UNLOCKED(tcp_death_row.death_lock),
44 .hashinfo = &tcp_hashinfo, 44 .hashinfo = &tcp_hashinfo,
45 .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0, 45 .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0,
46 (unsigned long)&tcp_death_row), 46 (unsigned long)&tcp_death_row),
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 70cee82a98bf..55c0adc8f115 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -156,7 +156,7 @@ static void netlink_sock_destruct(struct sock *sk)
156 156
157static void netlink_table_grab(void) 157static void netlink_table_grab(void)
158{ 158{
159 write_lock_bh(&nl_table_lock); 159 write_lock_irq(&nl_table_lock);
160 160
161 if (atomic_read(&nl_table_users)) { 161 if (atomic_read(&nl_table_users)) {
162 DECLARE_WAITQUEUE(wait, current); 162 DECLARE_WAITQUEUE(wait, current);
@@ -166,9 +166,9 @@ static void netlink_table_grab(void)
166 set_current_state(TASK_UNINTERRUPTIBLE); 166 set_current_state(TASK_UNINTERRUPTIBLE);
167 if (atomic_read(&nl_table_users) == 0) 167 if (atomic_read(&nl_table_users) == 0)
168 break; 168 break;
169 write_unlock_bh(&nl_table_lock); 169 write_unlock_irq(&nl_table_lock);
170 schedule(); 170 schedule();
171 write_lock_bh(&nl_table_lock); 171 write_lock_irq(&nl_table_lock);
172 } 172 }
173 173
174 __set_current_state(TASK_RUNNING); 174 __set_current_state(TASK_RUNNING);
@@ -178,7 +178,7 @@ static void netlink_table_grab(void)
178 178
179static __inline__ void netlink_table_ungrab(void) 179static __inline__ void netlink_table_ungrab(void)
180{ 180{
181 write_unlock_bh(&nl_table_lock); 181 write_unlock_irq(&nl_table_lock);
182 wake_up(&nl_table_wait); 182 wake_up(&nl_table_wait);
183} 183}
184 184
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 6db6006616c6..dc6cb93c8830 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -515,7 +515,7 @@ rpc_depopulate(struct dentry *parent)
515 struct dentry *dentry, *dvec[10]; 515 struct dentry *dentry, *dvec[10];
516 int n = 0; 516 int n = 0;
517 517
518 mutex_lock(&dir->i_mutex); 518 mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD);
519repeat: 519repeat:
520 spin_lock(&dcache_lock); 520 spin_lock(&dcache_lock);
521 list_for_each_safe(pos, next, &parent->d_subdirs) { 521 list_for_each_safe(pos, next, &parent->d_subdirs) {
@@ -631,7 +631,7 @@ rpc_lookup_negative(char *path, struct nameidata *nd)
631 if ((error = rpc_lookup_parent(path, nd)) != 0) 631 if ((error = rpc_lookup_parent(path, nd)) != 0)
632 return ERR_PTR(error); 632 return ERR_PTR(error);
633 dir = nd->dentry->d_inode; 633 dir = nd->dentry->d_inode;
634 mutex_lock(&dir->i_mutex); 634 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
635 dentry = lookup_one_len(nd->last.name, nd->dentry, nd->last.len); 635 dentry = lookup_one_len(nd->last.name, nd->dentry, nd->last.len);
636 if (IS_ERR(dentry)) 636 if (IS_ERR(dentry))
637 goto out_err; 637 goto out_err;
@@ -693,7 +693,7 @@ rpc_rmdir(char *path)
693 if ((error = rpc_lookup_parent(path, &nd)) != 0) 693 if ((error = rpc_lookup_parent(path, &nd)) != 0)
694 return error; 694 return error;
695 dir = nd.dentry->d_inode; 695 dir = nd.dentry->d_inode;
696 mutex_lock(&dir->i_mutex); 696 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
697 dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len); 697 dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len);
698 if (IS_ERR(dentry)) { 698 if (IS_ERR(dentry)) {
699 error = PTR_ERR(dentry); 699 error = PTR_ERR(dentry);
@@ -754,7 +754,7 @@ rpc_unlink(char *path)
754 if ((error = rpc_lookup_parent(path, &nd)) != 0) 754 if ((error = rpc_lookup_parent(path, &nd)) != 0)
755 return error; 755 return error;
756 dir = nd.dentry->d_inode; 756 dir = nd.dentry->d_inode;
757 mutex_lock(&dir->i_mutex); 757 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
758 dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len); 758 dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len);
759 if (IS_ERR(dentry)) { 759 if (IS_ERR(dentry)) {
760 error = PTR_ERR(dentry); 760 error = PTR_ERR(dentry);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index aca650109425..e9a287bc3142 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -565,6 +565,14 @@ static struct proto unix_proto = {
565 .obj_size = sizeof(struct unix_sock), 565 .obj_size = sizeof(struct unix_sock),
566}; 566};
567 567
568/*
569 * AF_UNIX sockets do not interact with hardware, hence they
570 * dont trigger interrupts - so it's safe for them to have
571 * bh-unsafe locking for their sk_receive_queue.lock. Split off
572 * this special lock-class by reinitializing the spinlock key:
573 */
574static struct lock_class_key af_unix_sk_receive_queue_lock_key;
575
568static struct sock * unix_create1(struct socket *sock) 576static struct sock * unix_create1(struct socket *sock)
569{ 577{
570 struct sock *sk = NULL; 578 struct sock *sk = NULL;
@@ -580,6 +588,8 @@ static struct sock * unix_create1(struct socket *sock)
580 atomic_inc(&unix_nr_socks); 588 atomic_inc(&unix_nr_socks);
581 589
582 sock_init_data(sock,sk); 590 sock_init_data(sock,sk);
591 lockdep_set_class(&sk->sk_receive_queue.lock,
592 &af_unix_sk_receive_queue_lock_key);
583 593
584 sk->sk_write_space = unix_write_space; 594 sk->sk_write_space = unix_write_space;
585 sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen; 595 sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen;
@@ -1045,7 +1055,7 @@ restart:
1045 goto out_unlock; 1055 goto out_unlock;
1046 } 1056 }
1047 1057
1048 unix_state_wlock(sk); 1058 unix_state_wlock_nested(sk);
1049 1059
1050 if (sk->sk_state != st) { 1060 if (sk->sk_state != st) {
1051 unix_state_wunlock(sk); 1061 unix_state_wunlock(sk);
diff --git a/sound/core/seq/seq_device.c b/sound/core/seq/seq_device.c
index d812dc886360..4260de90f36f 100644
--- a/sound/core/seq/seq_device.c
+++ b/sound/core/seq/seq_device.c
@@ -380,6 +380,12 @@ static struct ops_list * create_driver(char *id)
380 /* set up driver entry */ 380 /* set up driver entry */
381 strlcpy(ops->id, id, sizeof(ops->id)); 381 strlcpy(ops->id, id, sizeof(ops->id));
382 mutex_init(&ops->reg_mutex); 382 mutex_init(&ops->reg_mutex);
383 /*
384 * The ->reg_mutex locking rules are per-driver, so we create
385 * separate per-driver lock classes:
386 */
387 lockdep_set_class(&ops->reg_mutex, (struct lock_class_key *)id);
388
383 ops->driver = DRIVER_EMPTY; 389 ops->driver = DRIVER_EMPTY;
384 INIT_LIST_HEAD(&ops->dev_list); 390 INIT_LIST_HEAD(&ops->dev_list);
385 /* lock this instance */ 391 /* lock this instance */
diff --git a/sound/core/seq/seq_ports.c b/sound/core/seq/seq_ports.c
index d467b4f0ff2b..8c64b58ff77b 100644
--- a/sound/core/seq/seq_ports.c
+++ b/sound/core/seq/seq_ports.c
@@ -514,7 +514,7 @@ int snd_seq_port_connect(struct snd_seq_client *connector,
514 atomic_set(&subs->ref_count, 2); 514 atomic_set(&subs->ref_count, 2);
515 515
516 down_write(&src->list_mutex); 516 down_write(&src->list_mutex);
517 down_write(&dest->list_mutex); 517 down_write_nested(&dest->list_mutex, SINGLE_DEPTH_NESTING);
518 518
519 exclusive = info->flags & SNDRV_SEQ_PORT_SUBS_EXCLUSIVE ? 1 : 0; 519 exclusive = info->flags & SNDRV_SEQ_PORT_SUBS_EXCLUSIVE ? 1 : 0;
520 err = -EBUSY; 520 err = -EBUSY;
@@ -587,7 +587,7 @@ int snd_seq_port_disconnect(struct snd_seq_client *connector,
587 unsigned long flags; 587 unsigned long flags;
588 588
589 down_write(&src->list_mutex); 589 down_write(&src->list_mutex);
590 down_write(&dest->list_mutex); 590 down_write_nested(&dest->list_mutex, SINGLE_DEPTH_NESTING);
591 591
592 /* look for the connection */ 592 /* look for the connection */
593 list_for_each(p, &src->list_head) { 593 list_for_each(p, &src->list_head) {
diff --git a/sound/sparc/amd7930.c b/sound/sparc/amd7930.c
index db3e22efd02e..2bd8e40b8541 100644
--- a/sound/sparc/amd7930.c
+++ b/sound/sparc/amd7930.c
@@ -1033,10 +1033,10 @@ static int __init amd7930_attach_common(struct resource *rp, int irq)
1033 1033
1034 strcpy(card->driver, "AMD7930"); 1034 strcpy(card->driver, "AMD7930");
1035 strcpy(card->shortname, "Sun AMD7930"); 1035 strcpy(card->shortname, "Sun AMD7930");
1036 sprintf(card->longname, "%s at 0x%02lx:0x%08lx, irq %d", 1036 sprintf(card->longname, "%s at 0x%02lx:0x%08Lx, irq %d",
1037 card->shortname, 1037 card->shortname,
1038 rp->flags & 0xffL, 1038 rp->flags & 0xffL,
1039 rp->start, 1039 (unsigned long long)rp->start,
1040 irq); 1040 irq);
1041 1041
1042 if ((err = snd_amd7930_create(card, rp, 1042 if ((err = snd_amd7930_create(card, rp,
diff --git a/sound/sparc/cs4231.c b/sound/sparc/cs4231.c
index 5018fcf41df5..9a06c3bd6944 100644
--- a/sound/sparc/cs4231.c
+++ b/sound/sparc/cs4231.c
@@ -2036,7 +2036,7 @@ static int __init cs4231_sbus_attach(struct sbus_dev *sdev)
2036 if (err) 2036 if (err)
2037 return err; 2037 return err;
2038 2038
2039 sprintf(card->longname, "%s at 0x%02lx:0x%016lx, irq %d", 2039 sprintf(card->longname, "%s at 0x%02lx:0x%016Lx, irq %d",
2040 card->shortname, 2040 card->shortname,
2041 rp->flags & 0xffL, 2041 rp->flags & 0xffL,
2042 (unsigned long long)rp->start, 2042 (unsigned long long)rp->start,
diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c
index 59a02a0d9afc..f3ae6e23610e 100644
--- a/sound/sparc/dbri.c
+++ b/sound/sparc/dbri.c
@@ -2645,7 +2645,7 @@ static int __init dbri_attach(int prom_node, struct sbus_dev *sdev)
2645 strcpy(card->driver, "DBRI"); 2645 strcpy(card->driver, "DBRI");
2646 strcpy(card->shortname, "Sun DBRI"); 2646 strcpy(card->shortname, "Sun DBRI");
2647 rp = &sdev->resource[0]; 2647 rp = &sdev->resource[0];
2648 sprintf(card->longname, "%s at 0x%02lx:0x%016lx, irq %d", 2648 sprintf(card->longname, "%s at 0x%02lx:0x%016Lx, irq %d",
2649 card->shortname, 2649 card->shortname,
2650 rp->flags & 0xffL, (unsigned long long)rp->start, irq.pri); 2650 rp->flags & 0xffL, (unsigned long long)rp->start, irq.pri);
2651 2651