aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/00-INDEX2
-rw-r--r--Documentation/CodingStyle42
-rw-r--r--Documentation/DocBook/procfs-guide.tmpl4
-rw-r--r--Documentation/accounting/delay-accounting.txt11
-rw-r--r--Documentation/accounting/getdelays.c8
-rw-r--r--Documentation/accounting/taskstats-struct.txt7
-rw-r--r--Documentation/bt8xxgpio.txt67
-rw-r--r--Documentation/controllers/memory.txt3
-rw-r--r--Documentation/edac.txt151
-rw-r--r--Documentation/feature-removal-schedule.txt18
-rw-r--r--Documentation/filesystems/vfat.txt8
-rw-r--r--Documentation/gpio.txt135
-rw-r--r--Documentation/kernel-parameters.txt7
-rw-r--r--Documentation/moxa-smartio392
-rw-r--r--Documentation/powerpc/booting-without-of.txt57
-rw-r--r--Documentation/unaligned-memory-access.txt32
-rw-r--r--MAINTAINERS6
-rw-r--r--arch/Kconfig19
-rw-r--r--arch/alpha/boot/misc.c39
-rw-r--r--arch/arm/Kconfig8
-rw-r--r--arch/arm/boot/compressed/misc.c59
-rw-r--r--arch/arm/kernel/kprobes.c6
-rw-r--r--arch/arm/plat-omap/gpio.c3
-rw-r--r--arch/avr32/Kconfig2
-rw-r--r--arch/avr32/mach-at32ap/pio.c2
-rw-r--r--arch/cris/arch-v10/boot/compressed/misc.c36
-rw-r--r--arch/cris/arch-v32/boot/compressed/misc.c39
-rw-r--r--arch/h8300/Kconfig14
-rw-r--r--arch/h8300/boot/compressed/misc.c38
-rw-r--r--arch/ia64/kernel/kprobes.c6
-rw-r--r--arch/m32r/boot/compressed/misc.c37
-rw-r--r--arch/mips/Kconfig2
-rw-r--r--arch/mips/kernel/linux32.c1
-rw-r--r--arch/mn10300/boot/compressed/misc.c37
-rw-r--r--arch/powerpc/Kconfig2
-rw-r--r--arch/powerpc/kernel/cputable.c11
-rw-r--r--arch/powerpc/kernel/entry_32.S6
-rw-r--r--arch/powerpc/kernel/iommu.c28
-rw-r--r--arch/powerpc/kernel/kprobes.c6
-rw-r--r--arch/powerpc/kernel/lparcfg.c386
-rw-r--r--arch/powerpc/kernel/process.c46
-rw-r--r--arch/powerpc/kernel/prom_init.c9
-rw-r--r--arch/powerpc/kernel/ptrace.c72
-rw-r--r--arch/powerpc/kernel/signal.c6
-rw-r--r--arch/powerpc/kernel/sysfs.c3
-rw-r--r--arch/powerpc/kernel/traps.c16
-rw-r--r--arch/powerpc/kernel/vio.c1033
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S2
-rw-r--r--arch/powerpc/mm/fault.c25
-rw-r--r--arch/powerpc/platforms/52xx/Kconfig4
-rw-r--r--arch/powerpc/platforms/cell/iommu.c16
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c35
-rw-r--r--arch/powerpc/platforms/cell/spufs/sputrace.c3
-rw-r--r--arch/powerpc/platforms/iseries/iommu.c3
-rw-r--r--arch/powerpc/platforms/pasemi/iommu.c3
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig23
-rw-r--r--arch/powerpc/platforms/pseries/Makefile1
-rw-r--r--arch/powerpc/platforms/pseries/cmm.c468
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c42
-rw-r--r--arch/powerpc/platforms/pseries/plpar_wrappers.h10
-rw-r--r--arch/powerpc/platforms/pseries/setup.c71
-rw-r--r--arch/powerpc/sysdev/dart_iommu.c3
-rw-r--r--arch/powerpc/sysdev/qe_lib/Kconfig2
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/kernel/kprobes.c6
-rw-r--r--arch/s390/kernel/setup.c4
-rw-r--r--arch/s390/kernel/topology.c14
-rw-r--r--arch/sh/boot/compressed/misc_32.c38
-rw-r--r--arch/sh/boot/compressed/misc_64.c40
-rw-r--r--arch/sparc/Kconfig14
-rw-r--r--arch/sparc64/kernel/kprobes.c11
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/boot/compressed/misc.c39
-rw-r--r--arch/x86/kernel/kprobes.c6
-rw-r--r--arch/x86/kernel/pci-calgary_64.c85
-rw-r--r--block/ioctl.c5
-rw-r--r--drivers/Makefile2
-rw-r--r--drivers/ata/ahci.c2
-rw-r--r--drivers/base/firmware_class.c12
-rw-r--r--drivers/block/aoe/aoechr.c9
-rw-r--r--drivers/block/virtio_blk.c10
-rw-r--r--drivers/char/Kconfig11
-rw-r--r--drivers/char/Makefile2
-rw-r--r--drivers/char/ds1302.c17
-rw-r--r--drivers/char/dsp56k.c20
-rw-r--r--drivers/char/efirtc.c35
-rw-r--r--drivers/char/hpet.c2
-rw-r--r--drivers/char/hvc_console.c85
-rw-r--r--drivers/char/hvc_console.h35
-rw-r--r--drivers/char/hvc_irq.c44
-rw-r--r--drivers/char/hvc_iseries.c2
-rw-r--r--drivers/char/hvc_vio.c2
-rw-r--r--drivers/char/hvc_xen.c2
-rw-r--r--drivers/char/ip2/ip2main.c13
-rw-r--r--drivers/char/mwave/mwavedd.c39
-rw-r--r--drivers/char/mwave/mwavedd.h2
-rw-r--r--drivers/char/mwave/tp3780i.c2
-rw-r--r--drivers/char/mxser.c337
-rw-r--r--drivers/char/nwflash.c31
-rw-r--r--drivers/char/ppdev.c18
-rw-r--r--drivers/char/rio/rio_linux.c8
-rw-r--r--drivers/char/sx.c73
-rw-r--r--drivers/char/tty_io.c1
-rw-r--r--drivers/char/virtio_console.c40
-rw-r--r--drivers/char/xilinx_hwicap/xilinx_hwicap.c1
-rw-r--r--drivers/edac/Kconfig7
-rw-r--r--drivers/edac/Makefile1
-rw-r--r--drivers/edac/e752x_edac.c59
-rw-r--r--drivers/edac/edac_mc_sysfs.c158
-rw-r--r--drivers/edac/edac_pci_sysfs.c30
-rw-r--r--drivers/edac/i5100_edac.c981
-rw-r--r--drivers/edac/mpc85xx_edac.c67
-rw-r--r--drivers/edac/mv64x60_edac.c37
-rw-r--r--drivers/firmware/dcdbas.c13
-rw-r--r--drivers/firmware/dell_rbu.c28
-rw-r--r--drivers/gpio/Kconfig85
-rw-r--r--drivers/gpio/Makefile4
-rw-r--r--drivers/gpio/bt8xxgpio.c348
-rw-r--r--drivers/gpio/gpiolib.c536
-rw-r--r--drivers/gpio/max732x.c385
-rw-r--r--drivers/gpio/mcp23s08.c134
-rw-r--r--drivers/gpio/pca953x.c1
-rw-r--r--drivers/gpio/pcf857x.c1
-rw-r--r--drivers/i2c/chips/Kconfig2
-rw-r--r--drivers/i2c/chips/tps65010.c2
-rw-r--r--drivers/input/serio/hp_sdc.c1
-rw-r--r--drivers/isdn/hisax/st5481.h4
-rw-r--r--drivers/isdn/hisax/st5481_b.c4
-rw-r--r--drivers/isdn/hisax/st5481_d.c6
-rw-r--r--drivers/isdn/hisax/st5481_usb.c18
-rw-r--r--drivers/lguest/lguest_device.c14
-rw-r--r--drivers/mfd/Kconfig12
-rw-r--r--drivers/mfd/htc-egpio.c2
-rw-r--r--drivers/mfd/htc-pasic3.c2
-rw-r--r--drivers/mfd/mcp-sa11x0.c2
-rw-r--r--drivers/mfd/mfd-core.c2
-rw-r--r--drivers/mfd/sm501.c439
-rw-r--r--drivers/misc/Kconfig17
-rw-r--r--drivers/misc/Makefile1
-rw-r--r--drivers/misc/hp-wmi.c494
-rw-r--r--drivers/misc/phantom.c2
-rw-r--r--drivers/mtd/ubi/build.c99
-rw-r--r--drivers/mtd/ubi/cdev.c234
-rw-r--r--drivers/mtd/ubi/debug.c158
-rw-r--r--drivers/mtd/ubi/debug.h74
-rw-r--r--drivers/mtd/ubi/eba.c77
-rw-r--r--drivers/mtd/ubi/gluebi.c16
-rw-r--r--drivers/mtd/ubi/io.c48
-rw-r--r--drivers/mtd/ubi/kapi.c50
-rw-r--r--drivers/mtd/ubi/misc.c2
-rw-r--r--drivers/mtd/ubi/scan.c136
-rw-r--r--drivers/mtd/ubi/scan.h21
-rw-r--r--drivers/mtd/ubi/ubi-media.h38
-rw-r--r--drivers/mtd/ubi/ubi.h75
-rw-r--r--drivers/mtd/ubi/upd.c32
-rw-r--r--drivers/mtd/ubi/vmt.c148
-rw-r--r--drivers/mtd/ubi/vtbl.c127
-rw-r--r--drivers/mtd/ubi/wl.c208
-rw-r--r--drivers/net/ibmveth.c189
-rw-r--r--drivers/net/ibmveth.h5
-rw-r--r--drivers/net/virtio_net.c114
-rw-r--r--drivers/of/Kconfig2
-rw-r--r--drivers/of/of_i2c.c2
-rw-r--r--drivers/parport/parport_ax88796.c2
-rw-r--r--drivers/power/ds2760_battery.c2
-rw-r--r--drivers/power/pda_power.c2
-rw-r--r--drivers/s390/kvm/kvm_virtio.c34
-rw-r--r--drivers/scsi/ibmvscsi/ibmvfc.c15
-rw-r--r--drivers/scsi/ibmvscsi/ibmvscsi.c45
-rw-r--r--drivers/scsi/ibmvscsi/ibmvscsi.h2
-rw-r--r--drivers/telephony/ixj.c17
-rw-r--r--drivers/usb/gadget/at91_udc.h2
-rw-r--r--drivers/usb/gadget/cdc2.c2
-rw-r--r--drivers/usb/gadget/ether.c2
-rw-r--r--drivers/usb/gadget/file_storage.c14
-rw-r--r--drivers/usb/gadget/fsl_usb2_udc.c2
-rw-r--r--drivers/usb/gadget/fsl_usb2_udc.h2
-rw-r--r--drivers/usb/gadget/gmidi.c2
-rw-r--r--drivers/usb/gadget/goku_udc.c2
-rw-r--r--drivers/usb/gadget/goku_udc.h2
-rw-r--r--drivers/usb/gadget/inode.c2
-rw-r--r--drivers/usb/gadget/net2280.c2
-rw-r--r--drivers/usb/gadget/net2280.h2
-rw-r--r--drivers/usb/gadget/omap_udc.c6
-rw-r--r--drivers/usb/gadget/omap_udc.h2
-rw-r--r--drivers/usb/gadget/printer.c2
-rw-r--r--drivers/usb/gadget/pxa25x_udc.c6
-rw-r--r--drivers/usb/gadget/pxa25x_udc.h2
-rw-r--r--drivers/usb/gadget/u_ether.c3
-rw-r--r--drivers/usb/host/isp116x-hcd.c2
-rw-r--r--drivers/usb/host/isp116x.h2
-rw-r--r--drivers/usb/host/sl811-hcd.c2
-rw-r--r--drivers/usb/host/sl811.h2
-rw-r--r--drivers/usb/misc/usbtest.c4
-rw-r--r--drivers/virtio/virtio.c26
-rw-r--r--drivers/virtio/virtio_pci.c13
-rw-r--r--drivers/virtio/virtio_ring.c23
-rw-r--r--fs/Kconfig74
-rw-r--r--fs/aio.c2
-rw-r--r--fs/binfmt_elf.c99
-rw-r--r--fs/binfmt_elf_fdpic.c26
-rw-r--r--fs/coda/coda_linux.c6
-rw-r--r--fs/coda/psdev.c4
-rw-r--r--fs/coda/upcall.c15
-rw-r--r--fs/compat_ioctl.c1
-rw-r--r--fs/dlm/plock.c2
-rw-r--r--fs/dquot.c129
-rw-r--r--fs/exec.c139
-rw-r--r--fs/ext2/super.c1
-rw-r--r--fs/ext2/xattr_security.c2
-rw-r--r--fs/ext2/xattr_trusted.c4
-rw-r--r--fs/ext2/xattr_user.c4
-rw-r--r--fs/ext3/dir.c14
-rw-r--r--fs/ext3/ialloc.c9
-rw-r--r--fs/ext3/inode.c46
-rw-r--r--fs/ext3/namei.c26
-rw-r--r--fs/ext3/super.c78
-rw-r--r--fs/ext3/xattr_security.c2
-rw-r--r--fs/ext3/xattr_trusted.c4
-rw-r--r--fs/ext3/xattr_user.c4
-rw-r--r--fs/fat/dir.c229
-rw-r--r--fs/fat/inode.c34
-rw-r--r--fs/fat/misc.c10
-rw-r--r--fs/fuse/dir.c139
-rw-r--r--fs/fuse/file.c11
-rw-r--r--fs/fuse/fuse_i.h10
-rw-r--r--fs/fuse/inode.c177
-rw-r--r--fs/hfs/bitmap.c8
-rw-r--r--fs/hfs/btree.c2
-rw-r--r--fs/hfs/extent.c14
-rw-r--r--fs/hfs/hfs_fs.h5
-rw-r--r--fs/hfs/inode.c4
-rw-r--r--fs/hfs/super.c2
-rw-r--r--fs/hfsplus/extents.c14
-rw-r--r--fs/hfsplus/hfsplus_fs.h3
-rw-r--r--fs/hfsplus/inode.c4
-rw-r--r--fs/hfsplus/super.c2
-rw-r--r--fs/isofs/rock.c22
-rw-r--r--fs/jbd/commit.c64
-rw-r--r--fs/jbd/journal.c8
-rw-r--r--fs/jbd/revoke.c163
-rw-r--r--fs/jbd/transaction.c57
-rw-r--r--fs/jfs/super.c1
-rw-r--r--fs/lockd/clntproc.c10
-rw-r--r--fs/lockd/svclock.c13
-rw-r--r--fs/locks.c90
-rw-r--r--fs/minix/inode.c3
-rw-r--r--fs/minix/minix.h6
-rw-r--r--fs/minix/namei.c24
-rw-r--r--fs/msdos/namei.c21
-rw-r--r--fs/nfsd/lockd.c13
-rw-r--r--fs/partitions/check.c38
-rw-r--r--fs/partitions/efi.c42
-rw-r--r--fs/partitions/ldm.c70
-rw-r--r--fs/partitions/ldm.h5
-rw-r--r--fs/proc/Kconfig59
-rw-r--r--fs/proc/base.c86
-rw-r--r--fs/proc/generic.c14
-rw-r--r--fs/proc/inode.c81
-rw-r--r--fs/proc/internal.h8
-rw-r--r--fs/proc/kcore.c10
-rw-r--r--fs/proc/kmsg.c2
-rw-r--r--fs/quota.c18
-rw-r--r--fs/quota_v1.c1
-rw-r--r--fs/quota_v2.c1
-rw-r--r--fs/reiserfs/journal.c42
-rw-r--r--fs/reiserfs/super.c124
-rw-r--r--fs/reiserfs/xattr_security.c2
-rw-r--r--fs/reiserfs/xattr_trusted.c2
-rw-r--r--fs/reiserfs/xattr_user.c2
-rw-r--r--fs/smbfs/cache.c1
-rw-r--r--fs/smbfs/proc.c1
-rw-r--r--fs/ufs/super.c1
-rw-r--r--fs/vfat/namei.c2
-rw-r--r--include/asm-alpha/thread_info.h4
-rw-r--r--include/asm-arm/ptrace.h6
-rw-r--r--include/asm-arm/thread_info.h13
-rw-r--r--include/asm-avr32/thread_info.h4
-rw-r--r--include/asm-blackfin/ptrace.h6
-rw-r--r--include/asm-blackfin/thread_info.h5
-rw-r--r--include/asm-cris/arch-v10/Kbuild1
-rw-r--r--include/asm-cris/arch-v10/ptrace.h4
-rw-r--r--include/asm-cris/arch-v32/Kbuild1
-rw-r--r--include/asm-cris/arch-v32/ptrace.h4
-rw-r--r--include/asm-cris/ptrace.h4
-rw-r--r--include/asm-cris/thread_info.h2
-rw-r--r--include/asm-frv/Kbuild1
-rw-r--r--include/asm-frv/thread_info.h2
-rw-r--r--include/asm-generic/bug.h25
-rw-r--r--include/asm-generic/gpio.h35
-rw-r--r--include/asm-generic/int-ll64.h2
-rw-r--r--include/asm-h8300/elf.h4
-rw-r--r--include/asm-h8300/thread_info.h5
-rw-r--r--include/asm-ia64/thread_info.h2
-rw-r--r--include/asm-m32r/thread_info.h2
-rw-r--r--include/asm-m68k/thread_info.h8
-rw-r--r--include/asm-m68knommu/ptrace.h2
-rw-r--r--include/asm-m68knommu/thread_info.h4
-rw-r--r--include/asm-mips/mach-generic/gpio.h2
-rw-r--r--include/asm-mips/thread_info.h2
-rw-r--r--include/asm-mn10300/ptrace.h8
-rw-r--r--include/asm-mn10300/thread_info.h2
-rw-r--r--include/asm-parisc/ptrace.h4
-rw-r--r--include/asm-parisc/thread_info.h10
-rw-r--r--include/asm-powerpc/Kbuild1
-rw-r--r--include/asm-powerpc/cputable.h2
-rw-r--r--include/asm-powerpc/elf.h8
-rw-r--r--include/asm-powerpc/firmware.h3
-rw-r--r--include/asm-powerpc/gpio.h4
-rw-r--r--include/asm-powerpc/hvcall.h23
-rw-r--r--include/asm-powerpc/lppaca.h5
-rw-r--r--include/asm-powerpc/machdep.h2
-rw-r--r--include/asm-powerpc/mpc52xx_psc.h40
-rw-r--r--include/asm-powerpc/pgtable.h13
-rw-r--r--include/asm-powerpc/syscalls.h1
-rw-r--r--include/asm-powerpc/systbl.h6
-rw-r--r--include/asm-powerpc/system.h2
-rw-r--r--include/asm-powerpc/thread_info.h14
-rw-r--r--include/asm-powerpc/tlbflush.h11
-rw-r--r--include/asm-powerpc/unistd.h8
-rw-r--r--include/asm-powerpc/vio.h27
-rw-r--r--include/asm-s390/kvm_virtio.h10
-rw-r--r--include/asm-s390/thread_info.h5
-rw-r--r--include/asm-sh/ptrace.h2
-rw-r--r--include/asm-sh/thread_info.h2
-rw-r--r--include/asm-sparc/thread_info_32.h2
-rw-r--r--include/asm-sparc/thread_info_64.h2
-rw-r--r--include/asm-um/thread_info.h16
-rw-r--r--include/asm-x86/Kbuild1
-rw-r--r--include/asm-x86/gpio.h56
-rw-r--r--include/asm-x86/thread_info.h2
-rw-r--r--include/asm-xtensa/ptrace.h10
-rw-r--r--include/asm-xtensa/thread_info.h5
-rw-r--r--include/linux/Kbuild1
-rw-r--r--include/linux/acct.h3
-rw-r--r--include/linux/auxvec.h6
-rw-r--r--include/linux/bootmem.h9
-rw-r--r--include/linux/byteorder/big_endian.h12
-rw-r--r--include/linux/byteorder/little_endian.h12
-rw-r--r--include/linux/cgroup.h51
-rw-r--r--include/linux/coda.h43
-rw-r--r--include/linux/cpu.h15
-rw-r--r--include/linux/crash_dump.h8
-rw-r--r--include/linux/delayacct.h19
-rw-r--r--include/linux/dirent.h20
-rw-r--r--include/linux/ext2_fs.h4
-rw-r--r--include/linux/ext3_fs.h1
-rw-r--r--include/linux/fd1772.h80
-rw-r--r--include/linux/fs.h6
-rw-r--r--include/linux/fuse.h3
-rw-r--r--include/linux/genhd.h2
-rw-r--r--include/linux/gpio.h13
-rw-r--r--include/linux/i2c/max732x.h19
-rw-r--r--include/linux/idr.h24
-rw-r--r--include/linux/init.h8
-rw-r--r--include/linux/init_task.h2
-rw-r--r--include/linux/ipc_namespace.h3
-rw-r--r--include/linux/irqflags.h54
-rw-r--r--include/linux/kallsyms.h19
-rw-r--r--include/linux/kernel.h29
-rw-r--r--include/linux/kmod.h11
-rw-r--r--include/linux/kprobes.h7
-rw-r--r--include/linux/kthread.h3
-rw-r--r--include/linux/list.h4
-rw-r--r--include/linux/memcontrol.h24
-rw-r--r--include/linux/mm_types.h15
-rw-r--r--include/linux/msdos_fs.h56
-rw-r--r--include/linux/mtd/ubi.h5
-rw-r--r--include/linux/net.h3
-rw-r--r--include/linux/nfsd/nfsd.h1
-rw-r--r--include/linux/notifier.h2
-rw-r--r--include/linux/nsproxy.h7
-rw-r--r--include/linux/pci_ids.h3
-rw-r--r--include/linux/pid.h8
-rw-r--r--include/linux/pid_namespace.h8
-rw-r--r--include/linux/proc_fs.h2
-rw-r--r--include/linux/profile.h58
-rw-r--r--include/linux/quota.h33
-rw-r--r--include/linux/quotaops.h281
-rw-r--r--include/linux/ratelimit.h27
-rw-r--r--include/linux/rcupreempt.h9
-rw-r--r--include/linux/reiserfs_fs.h4
-rw-r--r--include/linux/reiserfs_fs_sb.h6
-rw-r--r--include/linux/res_counter.h33
-rw-r--r--include/linux/sched.h24
-rw-r--r--include/linux/sem.h30
-rw-r--r--include/linux/sm501.h37
-rw-r--r--include/linux/smb_fs.h19
-rw-r--r--include/linux/spi/mcp23s08.h25
-rw-r--r--include/linux/spinlock.h72
-rw-r--r--include/linux/taskstats.h6
-rw-r--r--include/linux/typecheck.h24
-rw-r--r--include/linux/usb/composite.h2
-rw-r--r--include/linux/virtio_9p.h2
-rw-r--r--include/linux/virtio_balloon.h2
-rw-r--r--include/linux/virtio_blk.h5
-rw-r--r--include/linux/virtio_config.h16
-rw-r--r--include/linux/virtio_console.h2
-rw-r--r--include/linux/virtio_net.h2
-rw-r--r--include/linux/virtio_pci.h5
-rw-r--r--include/linux/virtio_ring.h2
-rw-r--r--include/linux/virtio_rng.h2
-rw-r--r--include/linux/workqueue.h2
-rw-r--r--include/mtd/ubi-user.h76
-rw-r--r--include/net/ieee80211_radiotap.h2
-rw-r--r--init/do_mounts.c1
-rw-r--r--init/do_mounts_rd.c37
-rw-r--r--init/initramfs.c22
-rw-r--r--init/main.c2
-rw-r--r--init/version.c3
-rw-r--r--ipc/ipc_sysctl.c72
-rw-r--r--ipc/ipcns_notifier.c20
-rw-r--r--ipc/mqueue.c25
-rw-r--r--ipc/sem.c316
-rw-r--r--ipc/shm.c21
-rw-r--r--ipc/util.c61
-rw-r--r--ipc/util.h6
-rw-r--r--kernel/Makefile3
-rw-r--r--kernel/acct.c222
-rw-r--r--kernel/cgroup.c309
-rw-r--r--kernel/cpu.c5
-rw-r--r--kernel/cpuset.c357
-rw-r--r--kernel/delayacct.c16
-rw-r--r--kernel/exit.c61
-rw-r--r--kernel/fork.c35
-rw-r--r--kernel/irq/manage.c4
-rw-r--r--kernel/kallsyms.c2
-rw-r--r--kernel/kmod.c9
-rw-r--r--kernel/kprobes.c132
-rw-r--r--kernel/marker.c25
-rw-r--r--kernel/ns_cgroup.c8
-rw-r--r--kernel/nsproxy.c8
-rw-r--r--kernel/panic.c22
-rw-r--r--kernel/pid.c10
-rw-r--r--kernel/pid_namespace.c10
-rw-r--r--kernel/posix-timers.c21
-rw-r--r--kernel/printk.c17
-rw-r--r--kernel/profile.c4
-rw-r--r--kernel/res_counter.c48
-rw-r--r--kernel/sched.c2
-rw-r--r--kernel/signal.c80
-rw-r--r--kernel/sys.c4
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--kernel/sysctl.c4
-rw-r--r--kernel/sysctl_check.c2
-rw-r--r--kernel/taskstats.c2
-rw-r--r--kernel/trace/trace_sysprof.c2
-rw-r--r--kernel/tsacct.c25
-rw-r--r--kernel/workqueue.c112
-rw-r--r--lib/cmdline.c16
-rw-r--r--lib/idr.c140
-rw-r--r--lib/inflate.c52
-rw-r--r--lib/kobject.c9
-rw-r--r--lib/list_debug.c50
-rw-r--r--lib/lzo/lzo1x_decompress.c6
-rw-r--r--lib/ratelimit.c55
-rw-r--r--mm/filemap.c9
-rw-r--r--mm/hugetlb.c22
-rw-r--r--mm/memcontrol.c364
-rw-r--r--mm/migrate.c21
-rw-r--r--mm/pdflush.c4
-rw-r--r--mm/rmap.c14
-rw-r--r--mm/shmem.c44
-rw-r--r--mm/vmscan.c5
-rw-r--r--net/802/psnap.c4
-rw-r--r--net/core/sysctl_net_core.c4
-rw-r--r--net/core/utils.c5
-rw-r--r--net/ipv4/af_inet.c9
-rw-r--r--net/ipv6/af_inet6.c9
-rw-r--r--net/sysctl_net.c14
-rwxr-xr-xscripts/checkstack.pl27
-rw-r--r--security/device_cgroup.c158
472 files changed, 12817 insertions, 5488 deletions
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index 1977fab38656..6de71308a906 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -361,8 +361,6 @@ telephony/
361 - directory with info on telephony (e.g. voice over IP) support. 361 - directory with info on telephony (e.g. voice over IP) support.
362time_interpolators.txt 362time_interpolators.txt
363 - info on time interpolators. 363 - info on time interpolators.
364tipar.txt
365 - information about Parallel link cable for Texas Instruments handhelds.
366tty.txt 364tty.txt
367 - guide to the locking policies of the tty layer. 365 - guide to the locking policies of the tty layer.
368uml/ 366uml/
diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle
index 6caa14615578..1875e502f872 100644
--- a/Documentation/CodingStyle
+++ b/Documentation/CodingStyle
@@ -474,25 +474,29 @@ make a good program).
474So, you can either get rid of GNU emacs, or change it to use saner 474So, you can either get rid of GNU emacs, or change it to use saner
475values. To do the latter, you can stick the following in your .emacs file: 475values. To do the latter, you can stick the following in your .emacs file:
476 476
477(defun linux-c-mode () 477(defun c-lineup-arglist-tabs-only (ignored)
478 "C mode with adjusted defaults for use with the Linux kernel." 478 "Line up argument lists by tabs, not spaces"
479 (interactive) 479 (let* ((anchor (c-langelem-pos c-syntactic-element))
480 (c-mode) 480 (column (c-langelem-2nd-pos c-syntactic-element))
481 (c-set-style "K&R") 481 (offset (- (1+ column) anchor))
482 (setq tab-width 8) 482 (steps (floor offset c-basic-offset)))
483 (setq indent-tabs-mode t) 483 (* (max steps 1)
484 (setq c-basic-offset 8)) 484 c-basic-offset)))
485 485
486This will define the M-x linux-c-mode command. When hacking on a 486(add-hook 'c-mode-hook
487module, if you put the string -*- linux-c -*- somewhere on the first 487 (lambda ()
488two lines, this mode will be automatically invoked. Also, you may want 488 (let ((filename (buffer-file-name)))
489to add 489 ;; Enable kernel mode for the appropriate files
490 490 (when (and filename
491(setq auto-mode-alist (cons '("/usr/src/linux.*/.*\\.[ch]$" . linux-c-mode) 491 (string-match "~/src/linux-trees" filename))
492 auto-mode-alist)) 492 (setq indent-tabs-mode t)
493 493 (c-set-style "linux")
494to your .emacs file if you want to have linux-c-mode switched on 494 (c-set-offset 'arglist-cont-nonempty
495automagically when you edit source files under /usr/src/linux. 495 '(c-lineup-gcc-asm-reg
496 c-lineup-arglist-tabs-only))))))
497
498This will make emacs go better with the kernel coding style for C
499files below ~/src/linux-trees.
496 500
497But even if you fail in getting emacs to do sane formatting, not 501But even if you fail in getting emacs to do sane formatting, not
498everything is lost: use "indent". 502everything is lost: use "indent".
diff --git a/Documentation/DocBook/procfs-guide.tmpl b/Documentation/DocBook/procfs-guide.tmpl
index 1fd6a1ec7591..8a5dc6e021ff 100644
--- a/Documentation/DocBook/procfs-guide.tmpl
+++ b/Documentation/DocBook/procfs-guide.tmpl
@@ -29,12 +29,12 @@
29 29
30 <revhistory> 30 <revhistory>
31 <revision> 31 <revision>
32 <revnumber>1.0&nbsp;</revnumber> 32 <revnumber>1.0</revnumber>
33 <date>May 30, 2001</date> 33 <date>May 30, 2001</date>
34 <revremark>Initial revision posted to linux-kernel</revremark> 34 <revremark>Initial revision posted to linux-kernel</revremark>
35 </revision> 35 </revision>
36 <revision> 36 <revision>
37 <revnumber>1.1&nbsp;</revnumber> 37 <revnumber>1.1</revnumber>
38 <date>June 3, 2001</date> 38 <date>June 3, 2001</date>
39 <revremark>Revised after comments from linux-kernel</revremark> 39 <revremark>Revised after comments from linux-kernel</revremark>
40 </revision> 40 </revision>
diff --git a/Documentation/accounting/delay-accounting.txt b/Documentation/accounting/delay-accounting.txt
index 1443cd71d263..8a12f0730c94 100644
--- a/Documentation/accounting/delay-accounting.txt
+++ b/Documentation/accounting/delay-accounting.txt
@@ -11,6 +11,7 @@ the delays experienced by a task while
11a) waiting for a CPU (while being runnable) 11a) waiting for a CPU (while being runnable)
12b) completion of synchronous block I/O initiated by the task 12b) completion of synchronous block I/O initiated by the task
13c) swapping in pages 13c) swapping in pages
14d) memory reclaim
14 15
15and makes these statistics available to userspace through 16and makes these statistics available to userspace through
16the taskstats interface. 17the taskstats interface.
@@ -41,7 +42,7 @@ this structure. See
41 include/linux/taskstats.h 42 include/linux/taskstats.h
42for a description of the fields pertaining to delay accounting. 43for a description of the fields pertaining to delay accounting.
43It will generally be in the form of counters returning the cumulative 44It will generally be in the form of counters returning the cumulative
44delay seen for cpu, sync block I/O, swapin etc. 45delay seen for cpu, sync block I/O, swapin, memory reclaim etc.
45 46
46Taking the difference of two successive readings of a given 47Taking the difference of two successive readings of a given
47counter (say cpu_delay_total) for a task will give the delay 48counter (say cpu_delay_total) for a task will give the delay
@@ -94,7 +95,9 @@ CPU count real total virtual total delay total
94 7876 92005750 100000000 24001500 95 7876 92005750 100000000 24001500
95IO count delay total 96IO count delay total
96 0 0 97 0 0
97MEM count delay total 98SWAP count delay total
99 0 0
100RECLAIM count delay total
98 0 0 101 0 0
99 102
100Get delays seen in executing a given simple command 103Get delays seen in executing a given simple command
@@ -108,5 +111,7 @@ CPU count real total virtual total delay total
108 6 4000250 4000000 0 111 6 4000250 4000000 0
109IO count delay total 112IO count delay total
110 0 0 113 0 0
111MEM count delay total 114SWAP count delay total
115 0 0
116RECLAIM count delay total
112 0 0 117 0 0
diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
index 40121b5cca14..3f7755f3963f 100644
--- a/Documentation/accounting/getdelays.c
+++ b/Documentation/accounting/getdelays.c
@@ -196,14 +196,18 @@ void print_delayacct(struct taskstats *t)
196 " %15llu%15llu%15llu%15llu\n" 196 " %15llu%15llu%15llu%15llu\n"
197 "IO %15s%15s\n" 197 "IO %15s%15s\n"
198 " %15llu%15llu\n" 198 " %15llu%15llu\n"
199 "MEM %15s%15s\n" 199 "SWAP %15s%15s\n"
200 " %15llu%15llu\n"
201 "RECLAIM %12s%15s\n"
200 " %15llu%15llu\n", 202 " %15llu%15llu\n",
201 "count", "real total", "virtual total", "delay total", 203 "count", "real total", "virtual total", "delay total",
202 t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total, 204 t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total,
203 t->cpu_delay_total, 205 t->cpu_delay_total,
204 "count", "delay total", 206 "count", "delay total",
205 t->blkio_count, t->blkio_delay_total, 207 t->blkio_count, t->blkio_delay_total,
206 "count", "delay total", t->swapin_count, t->swapin_delay_total); 208 "count", "delay total", t->swapin_count, t->swapin_delay_total,
209 "count", "delay total",
210 t->freepages_count, t->freepages_delay_total);
207} 211}
208 212
209void task_context_switch_counts(struct taskstats *t) 213void task_context_switch_counts(struct taskstats *t)
diff --git a/Documentation/accounting/taskstats-struct.txt b/Documentation/accounting/taskstats-struct.txt
index cd784f46bf8a..b988d110db59 100644
--- a/Documentation/accounting/taskstats-struct.txt
+++ b/Documentation/accounting/taskstats-struct.txt
@@ -26,6 +26,8 @@ There are three different groups of fields in the struct taskstats:
26 26
275) Time accounting for SMT machines 275) Time accounting for SMT machines
28 28
296) Extended delay accounting fields for memory reclaim
30
29Future extension should add fields to the end of the taskstats struct, and 31Future extension should add fields to the end of the taskstats struct, and
30should not change the relative position of each field within the struct. 32should not change the relative position of each field within the struct.
31 33
@@ -170,4 +172,9 @@ struct taskstats {
170 __u64 ac_utimescaled; /* utime scaled on frequency etc */ 172 __u64 ac_utimescaled; /* utime scaled on frequency etc */
171 __u64 ac_stimescaled; /* stime scaled on frequency etc */ 173 __u64 ac_stimescaled; /* stime scaled on frequency etc */
172 __u64 cpu_scaled_run_real_total; /* scaled cpu_run_real_total */ 174 __u64 cpu_scaled_run_real_total; /* scaled cpu_run_real_total */
175
1766) Extended delay accounting fields for memory reclaim
177 /* Delay waiting for memory reclaim */
178 __u64 freepages_count;
179 __u64 freepages_delay_total;
173} 180}
diff --git a/Documentation/bt8xxgpio.txt b/Documentation/bt8xxgpio.txt
new file mode 100644
index 000000000000..d8297e4ebd26
--- /dev/null
+++ b/Documentation/bt8xxgpio.txt
@@ -0,0 +1,67 @@
1===============================================================
2== BT8XXGPIO driver ==
3== ==
4== A driver for a selfmade cheap BT8xx based PCI GPIO-card ==
5== ==
6== For advanced documentation, see ==
7== http://www.bu3sch.de/btgpio.php ==
8===============================================================
9
10
11A generic digital 24-port PCI GPIO card can be built out of an ordinary
12Brooktree bt848, bt849, bt878 or bt879 based analog TV tuner card. The
13Brooktree chip is used in old analog Hauppauge WinTV PCI cards. You can easily
14find them used for low prices on the net.
15
16The bt8xx chip does have 24 digital GPIO ports.
17These ports are accessible via 24 pins on the SMD chip package.
18
19
20==============================================
21== How to physically access the GPIO pins ==
22==============================================
23
24The are several ways to access these pins. One might unsolder the whole chip
25and put it on a custom PCI board, or one might only unsolder each individual
26GPIO pin and solder that to some tiny wire. As the chip package really is tiny
27there are some advanced soldering skills needed in any case.
28
29The physical pinouts are drawn in the following ASCII art.
30The GPIO pins are marked with G00-G23
31
32 G G G G G G G G G G G G G G G G G G
33 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1
34 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7
35 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
36 ---------------------------------------------------------------------------
37 --| ^ ^ |--
38 --| pin 86 pin 67 |--
39 --| |--
40 --| pin 61 > |-- G18
41 --| |-- G19
42 --| |-- G20
43 --| |-- G21
44 --| |-- G22
45 --| pin 56 > |-- G23
46 --| |--
47 --| Brooktree 878/879 |--
48 --| |--
49 --| |--
50 --| |--
51 --| |--
52 --| |--
53 --| |--
54 --| |--
55 --| |--
56 --| |--
57 --| |--
58 --| |--
59 --| |--
60 --| |--
61 --| O |--
62 --| |--
63 ---------------------------------------------------------------------------
64 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
65 ^
66 This is pin 1
67
diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt
index 866b9cd9a959..9b53d5827361 100644
--- a/Documentation/controllers/memory.txt
+++ b/Documentation/controllers/memory.txt
@@ -242,8 +242,7 @@ rmdir() if there are no tasks.
2421. Add support for accounting huge pages (as a separate controller) 2421. Add support for accounting huge pages (as a separate controller)
2432. Make per-cgroup scanner reclaim not-shared pages first 2432. Make per-cgroup scanner reclaim not-shared pages first
2443. Teach controller to account for shared-pages 2443. Teach controller to account for shared-pages
2454. Start reclamation when the limit is lowered 2454. Start reclamation in the background when the limit is
2465. Start reclamation in the background when the limit is
247 not yet hit but the usage is getting closer 246 not yet hit but the usage is getting closer
248 247
249Summary 248Summary
diff --git a/Documentation/edac.txt b/Documentation/edac.txt
index a5c36842ecef..ced527388001 100644
--- a/Documentation/edac.txt
+++ b/Documentation/edac.txt
@@ -222,74 +222,9 @@ both csrow2 and csrow3 are populated, this indicates a dual ranked
222set of DIMMs for channels 0 and 1. 222set of DIMMs for channels 0 and 1.
223 223
224 224
225Within each of the 'mc','mcX' and 'csrowX' directories are several 225Within each of the 'mcX' and 'csrowX' directories are several
226EDAC control and attribute files. 226EDAC control and attribute files.
227 227
228
229============================================================================
230DIRECTORY 'mc'
231
232In directory 'mc' are EDAC system overall control and attribute files:
233
234
235Panic on UE control file:
236
237 'edac_mc_panic_on_ue'
238
239 An uncorrectable error will cause a machine panic. This is usually
240 desirable. It is a bad idea to continue when an uncorrectable error
241 occurs - it is indeterminate what was uncorrected and the operating
242 system context might be so mangled that continuing will lead to further
243 corruption. If the kernel has MCE configured, then EDAC will never
244 notice the UE.
245
246 LOAD TIME: module/kernel parameter: panic_on_ue=[0|1]
247
248 RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_panic_on_ue
249
250
251Log UE control file:
252
253 'edac_mc_log_ue'
254
255 Generate kernel messages describing uncorrectable errors. These errors
256 are reported through the system message log system. UE statistics
257 will be accumulated even when UE logging is disabled.
258
259 LOAD TIME: module/kernel parameter: log_ue=[0|1]
260
261 RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_log_ue
262
263
264Log CE control file:
265
266 'edac_mc_log_ce'
267
268 Generate kernel messages describing correctable errors. These
269 errors are reported through the system message log system.
270 CE statistics will be accumulated even when CE logging is disabled.
271
272 LOAD TIME: module/kernel parameter: log_ce=[0|1]
273
274 RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_log_ce
275
276
277Polling period control file:
278
279 'edac_mc_poll_msec'
280
281 The time period, in milliseconds, for polling for error information.
282 Too small a value wastes resources. Too large a value might delay
283 necessary handling of errors and might loose valuable information for
284 locating the error. 1000 milliseconds (once each second) is the current
285 default. Systems which require all the bandwidth they can get, may
286 increase this.
287
288 LOAD TIME: module/kernel parameter: poll_msec=[0|1]
289
290 RUN TIME: echo "1000" >/sys/devices/system/edac/mc/edac_mc_poll_msec
291
292
293============================================================================ 228============================================================================
294'mcX' DIRECTORIES 229'mcX' DIRECTORIES
295 230
@@ -537,7 +472,6 @@ Channel 1 DIMM Label control file:
537 motherboard specific and determination of this information 472 motherboard specific and determination of this information
538 must occur in userland at this time. 473 must occur in userland at this time.
539 474
540
541============================================================================ 475============================================================================
542SYSTEM LOGGING 476SYSTEM LOGGING
543 477
@@ -570,7 +504,6 @@ error type, a notice of "no info" and then an optional,
570driver-specific error message. 504driver-specific error message.
571 505
572 506
573
574============================================================================ 507============================================================================
575PCI Bus Parity Detection 508PCI Bus Parity Detection
576 509
@@ -604,6 +537,74 @@ Enable/Disable PCI Parity checking control file:
604 echo "0" >/sys/devices/system/edac/pci/check_pci_parity 537 echo "0" >/sys/devices/system/edac/pci/check_pci_parity
605 538
606 539
540Parity Count:
541
542 'pci_parity_count'
543
544 This attribute file will display the number of parity errors that
545 have been detected.
546
547
548============================================================================
549MODULE PARAMETERS
550
551Panic on UE control file:
552
553 'edac_mc_panic_on_ue'
554
555 An uncorrectable error will cause a machine panic. This is usually
556 desirable. It is a bad idea to continue when an uncorrectable error
557 occurs - it is indeterminate what was uncorrected and the operating
558 system context might be so mangled that continuing will lead to further
559 corruption. If the kernel has MCE configured, then EDAC will never
560 notice the UE.
561
562 LOAD TIME: module/kernel parameter: edac_mc_panic_on_ue=[0|1]
563
564 RUN TIME: echo "1" > /sys/module/edac_core/parameters/edac_mc_panic_on_ue
565
566
567Log UE control file:
568
569 'edac_mc_log_ue'
570
571 Generate kernel messages describing uncorrectable errors. These errors
572 are reported through the system message log system. UE statistics
573 will be accumulated even when UE logging is disabled.
574
575 LOAD TIME: module/kernel parameter: edac_mc_log_ue=[0|1]
576
577 RUN TIME: echo "1" > /sys/module/edac_core/parameters/edac_mc_log_ue
578
579
580Log CE control file:
581
582 'edac_mc_log_ce'
583
584 Generate kernel messages describing correctable errors. These
585 errors are reported through the system message log system.
586 CE statistics will be accumulated even when CE logging is disabled.
587
588 LOAD TIME: module/kernel parameter: edac_mc_log_ce=[0|1]
589
590 RUN TIME: echo "1" > /sys/module/edac_core/parameters/edac_mc_log_ce
591
592
593Polling period control file:
594
595 'edac_mc_poll_msec'
596
597 The time period, in milliseconds, for polling for error information.
598 Too small a value wastes resources. Too large a value might delay
599 necessary handling of errors and might loose valuable information for
600 locating the error. 1000 milliseconds (once each second) is the current
601 default. Systems which require all the bandwidth they can get, may
602 increase this.
603
604 LOAD TIME: module/kernel parameter: edac_mc_poll_msec=[0|1]
605
606 RUN TIME: echo "1000" > /sys/module/edac_core/parameters/edac_mc_poll_msec
607
607 608
608Panic on PCI PARITY Error: 609Panic on PCI PARITY Error:
609 610
@@ -614,21 +615,13 @@ Panic on PCI PARITY Error:
614 error has been detected. 615 error has been detected.
615 616
616 617
617 module/kernel parameter: panic_on_pci_parity=[0|1] 618 module/kernel parameter: edac_panic_on_pci_pe=[0|1]
618 619
619 Enable: 620 Enable:
620 echo "1" >/sys/devices/system/edac/pci/panic_on_pci_parity 621 echo "1" > /sys/module/edac_core/parameters/edac_panic_on_pci_pe
621 622
622 Disable: 623 Disable:
623 echo "0" >/sys/devices/system/edac/pci/panic_on_pci_parity 624 echo "0" > /sys/module/edac_core/parameters/edac_panic_on_pci_pe
624
625
626Parity Count:
627
628 'pci_parity_count'
629
630 This attribute file will display the number of parity errors that
631 have been detected.
632 625
633 626
634 627
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 09c4a1efb8e3..721c71b86e06 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -138,24 +138,6 @@ Who: Kay Sievers <kay.sievers@suse.de>
138 138
139--------------------------- 139---------------------------
140 140
141What: find_task_by_pid
142When: 2.6.26
143Why: With pid namespaces, calling this funciton will return the
144 wrong task when called from inside a namespace.
145
146 The best way to save a task pid and find a task by this
147 pid later, is to find this task's struct pid pointer (or get
148 it directly from the task) and call pid_task() later.
149
150 If someone really needs to get a task by its pid_t, then
151 he most likely needs the find_task_by_vpid() to get the
152 task from the same namespace as the current task is in, but
153 this may be not so in general.
154
155Who: Pavel Emelyanov <xemul@openvz.org>
156
157---------------------------
158
159What: ACPI procfs interface 141What: ACPI procfs interface
160When: July 2008 142When: July 2008
161Why: ACPI sysfs conversion should be finished by January 2008. 143Why: ACPI sysfs conversion should be finished by January 2008.
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt
index 2d5e1e582e13..bbac4f1d9056 100644
--- a/Documentation/filesystems/vfat.txt
+++ b/Documentation/filesystems/vfat.txt
@@ -96,6 +96,14 @@ shortname=lower|win95|winnt|mixed
96 emulate the Windows 95 rule for create. 96 emulate the Windows 95 rule for create.
97 Default setting is `lower'. 97 Default setting is `lower'.
98 98
99tz=UTC -- Interpret timestamps as UTC rather than local time.
100 This option disables the conversion of timestamps
101 between local time (as used by Windows on FAT) and UTC
102 (which Linux uses internally). This is particuluarly
103 useful when mounting devices (like digital cameras)
104 that are set to UTC in order to avoid the pitfalls of
105 local time.
106
99<bool>: 0,1,yes,no,true,false 107<bool>: 0,1,yes,no,true,false
100 108
101TODO 109TODO
diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt
index c35ca9e40d4c..18022e249c53 100644
--- a/Documentation/gpio.txt
+++ b/Documentation/gpio.txt
@@ -347,15 +347,12 @@ necessarily be nonportable.
347Dynamic definition of GPIOs is not currently standard; for example, as 347Dynamic definition of GPIOs is not currently standard; for example, as
348a side effect of configuring an add-on board with some GPIO expanders. 348a side effect of configuring an add-on board with some GPIO expanders.
349 349
350These calls are purely for kernel space, but a userspace API could be built
351on top of them.
352
353 350
354GPIO implementor's framework (OPTIONAL) 351GPIO implementor's framework (OPTIONAL)
355======================================= 352=======================================
356As noted earlier, there is an optional implementation framework making it 353As noted earlier, there is an optional implementation framework making it
357easier for platforms to support different kinds of GPIO controller using 354easier for platforms to support different kinds of GPIO controller using
358the same programming interface. 355the same programming interface. This framework is called "gpiolib".
359 356
360As a debugging aid, if debugfs is available a /sys/kernel/debug/gpio file 357As a debugging aid, if debugfs is available a /sys/kernel/debug/gpio file
361will be found there. That will list all the controllers registered through 358will be found there. That will list all the controllers registered through
@@ -392,11 +389,21 @@ either NULL or the label associated with that GPIO when it was requested.
392 389
393Platform Support 390Platform Support
394---------------- 391----------------
395To support this framework, a platform's Kconfig will "select HAVE_GPIO_LIB" 392To support this framework, a platform's Kconfig will "select" either
393ARCH_REQUIRE_GPIOLIB or ARCH_WANT_OPTIONAL_GPIOLIB
396and arrange that its <asm/gpio.h> includes <asm-generic/gpio.h> and defines 394and arrange that its <asm/gpio.h> includes <asm-generic/gpio.h> and defines
397three functions: gpio_get_value(), gpio_set_value(), and gpio_cansleep(). 395three functions: gpio_get_value(), gpio_set_value(), and gpio_cansleep().
398They may also want to provide a custom value for ARCH_NR_GPIOS. 396They may also want to provide a custom value for ARCH_NR_GPIOS.
399 397
398ARCH_REQUIRE_GPIOLIB means that the gpio-lib code will always get compiled
399into the kernel on that architecture.
400
401ARCH_WANT_OPTIONAL_GPIOLIB means the gpio-lib code defaults to off and the user
402can enable it and build it into the kernel optionally.
403
404If neither of these options are selected, the platform does not support
405GPIOs through GPIO-lib and the code cannot be enabled by the user.
406
400Trivial implementations of those functions can directly use framework 407Trivial implementations of those functions can directly use framework
401code, which always dispatches through the gpio_chip: 408code, which always dispatches through the gpio_chip:
402 409
@@ -439,4 +446,120 @@ becomes available. That may mean the device should not be registered until
439calls for that GPIO can work. One way to address such dependencies is for 446calls for that GPIO can work. One way to address such dependencies is for
440such gpio_chip controllers to provide setup() and teardown() callbacks to 447such gpio_chip controllers to provide setup() and teardown() callbacks to
441board specific code; those board specific callbacks would register devices 448board specific code; those board specific callbacks would register devices
442once all the necessary resources are available. 449once all the necessary resources are available, and remove them later when
450the GPIO controller device becomes unavailable.
451
452
453Sysfs Interface for Userspace (OPTIONAL)
454========================================
455Platforms which use the "gpiolib" implementors framework may choose to
456configure a sysfs user interface to GPIOs. This is different from the
457debugfs interface, since it provides control over GPIO direction and
458value instead of just showing a gpio state summary. Plus, it could be
459present on production systems without debugging support.
460
461Given approprate hardware documentation for the system, userspace could
462know for example that GPIO #23 controls the write protect line used to
463protect boot loader segments in flash memory. System upgrade procedures
464may need to temporarily remove that protection, first importing a GPIO,
465then changing its output state, then updating the code before re-enabling
466the write protection. In normal use, GPIO #23 would never be touched,
467and the kernel would have no need to know about it.
468
469Again depending on appropriate hardware documentation, on some systems
470userspace GPIO can be used to determine system configuration data that
471standard kernels won't know about. And for some tasks, simple userspace
472GPIO drivers could be all that the system really needs.
473
474Note that standard kernel drivers exist for common "LEDs and Buttons"
475GPIO tasks: "leds-gpio" and "gpio_keys", respectively. Use those
476instead of talking directly to the GPIOs; they integrate with kernel
477frameworks better than your userspace code could.
478
479
480Paths in Sysfs
481--------------
482There are three kinds of entry in /sys/class/gpio:
483
484 - Control interfaces used to get userspace control over GPIOs;
485
486 - GPIOs themselves; and
487
488 - GPIO controllers ("gpio_chip" instances).
489
490That's in addition to standard files including the "device" symlink.
491
492The control interfaces are write-only:
493
494 /sys/class/gpio/
495
496 "export" ... Userspace may ask the kernel to export control of
497 a GPIO to userspace by writing its number to this file.
498
499 Example: "echo 19 > export" will create a "gpio19" node
500 for GPIO #19, if that's not requested by kernel code.
501
502 "unexport" ... Reverses the effect of exporting to userspace.
503
504 Example: "echo 19 > unexport" will remove a "gpio19"
505 node exported using the "export" file.
506
507GPIO signals have paths like /sys/class/gpio/gpio42/ (for GPIO #42)
508and have the following read/write attributes:
509
510 /sys/class/gpio/gpioN/
511
512 "direction" ... reads as either "in" or "out". This value may
513 normally be written. Writing as "out" defaults to
514 initializing the value as low. To ensure glitch free
515 operation, values "low" and "high" may be written to
516 configure the GPIO as an output with that initial value.
517
518 Note that this attribute *will not exist* if the kernel
519 doesn't support changing the direction of a GPIO, or
520 it was exported by kernel code that didn't explicitly
521 allow userspace to reconfigure this GPIO's direction.
522
523 "value" ... reads as either 0 (low) or 1 (high). If the GPIO
524 is configured as an output, this value may be written;
525 any nonzero value is treated as high.
526
527GPIO controllers have paths like /sys/class/gpio/chipchip42/ (for the
528controller implementing GPIOs starting at #42) and have the following
529read-only attributes:
530
531 /sys/class/gpio/gpiochipN/
532
533 "base" ... same as N, the first GPIO managed by this chip
534
535 "label" ... provided for diagnostics (not always unique)
536
537 "ngpio" ... how many GPIOs this manges (N to N + ngpio - 1)
538
539Board documentation should in most cases cover what GPIOs are used for
540what purposes. However, those numbers are not always stable; GPIOs on
541a daughtercard might be different depending on the base board being used,
542or other cards in the stack. In such cases, you may need to use the
543gpiochip nodes (possibly in conjunction with schematics) to determine
544the correct GPIO number to use for a given signal.
545
546
547Exporting from Kernel code
548--------------------------
549Kernel code can explicitly manage exports of GPIOs which have already been
550requested using gpio_request():
551
552 /* export the GPIO to userspace */
553 int gpio_export(unsigned gpio, bool direction_may_change);
554
555 /* reverse gpio_export() */
556 void gpio_unexport();
557
558After a kernel driver requests a GPIO, it may only be made available in
559the sysfs interface by gpio_export(). The driver can control whether the
560signal direction may change. This helps drivers prevent userspace code
561from accidentally clobbering important system state.
562
563This explicit exporting can help with debugging (by making some kinds
564of experiments easier), or can provide an always-there interface that's
565suitable for documenting as part of a board support package.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 497a98dafdaa..e7bea3e85304 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2159,13 +2159,6 @@ and is between 256 and 4096 characters. It is defined in the file
2159 <deci-seconds>: poll all this frequency 2159 <deci-seconds>: poll all this frequency
2160 0: no polling (default) 2160 0: no polling (default)
2161 2161
2162 tipar.timeout= [HW,PPT]
2163 Set communications timeout in tenths of a second
2164 (default 15).
2165
2166 tipar.delay= [HW,PPT]
2167 Set inter-bit delay in microseconds (default 10).
2168
2169 tmscsim= [HW,SCSI] 2162 tmscsim= [HW,SCSI]
2170 See comment before function dc390_setup() in 2163 See comment before function dc390_setup() in
2171 drivers/scsi/tmscsim.c. 2164 drivers/scsi/tmscsim.c.
diff --git a/Documentation/moxa-smartio b/Documentation/moxa-smartio
index fe24ecc6372e..5337e80a5b96 100644
--- a/Documentation/moxa-smartio
+++ b/Documentation/moxa-smartio
@@ -1,14 +1,22 @@
1============================================================================= 1=============================================================================
2 2 MOXA Smartio/Industio Family Device Driver Installation Guide
3 MOXA Smartio Family Device Driver Ver 1.1 Installation Guide 3 for Linux Kernel 2.4.x, 2.6.x
4 for Linux Kernel 2.2.x and 2.0.3x 4 Copyright (C) 2008, Moxa Inc.
5 Copyright (C) 1999, Moxa Technologies Co, Ltd.
6============================================================================= 5=============================================================================
6Date: 01/21/2008
7
7Content 8Content
8 9
91. Introduction 101. Introduction
102. System Requirement 112. System Requirement
113. Installation 123. Installation
13 3.1 Hardware installation
14 3.2 Driver files
15 3.3 Device naming convention
16 3.4 Module driver configuration
17 3.5 Static driver configuration for Linux kernel 2.4.x and 2.6.x.
18 3.6 Custom configuration
19 3.7 Verify driver installation
124. Utilities 204. Utilities
135. Setserial 215. Setserial
146. Troubleshooting 226. Troubleshooting
@@ -16,27 +24,48 @@ Content
16----------------------------------------------------------------------------- 24-----------------------------------------------------------------------------
171. Introduction 251. Introduction
18 26
19 The Smartio family Linux driver, Ver. 1.1, supports following multiport 27 The Smartio/Industio/UPCI family Linux driver supports following multiport
20 boards. 28 boards.
21 29
22 -C104P/H/HS, C104H/PCI, C104HS/PCI, CI-104J 4 port multiport board. 30 - 2 ports multiport board
23 -C168P/H/HS, C168H/PCI 8 port multiport board. 31 CP-102U, CP-102UL, CP-102UF
24 32 CP-132U-I, CP-132UL,
25 This driver has been modified a little and cleaned up from the Moxa 33 CP-132, CP-132I, CP132S, CP-132IS,
26 contributed driver code and merged into Linux 2.2.14pre. In particular 34 CI-132, CI-132I, CI-132IS,
27 official major/minor numbers have been assigned which are different to 35 (C102H, C102HI, C102HIS, C102P, CP-102, CP-102S)
28 those the original Moxa supplied driver used. 36
37 - 4 ports multiport board
38 CP-104EL,
39 CP-104UL, CP-104JU,
40 CP-134U, CP-134U-I,
41 C104H/PCI, C104HS/PCI,
42 CP-114, CP-114I, CP-114S, CP-114IS, CP-114UL,
43 C104H, C104HS,
44 CI-104J, CI-104JS,
45 CI-134, CI-134I, CI-134IS,
46 (C114HI, CT-114I, C104P)
47 POS-104UL,
48 CB-114,
49 CB-134I
50
51 - 8 ports multiport board
52 CP-118EL, CP-168EL,
53 CP-118U, CP-168U,
54 C168H/PCI,
55 C168H, C168HS,
56 (C168P),
57 CB-108
29 58
30 This driver and installation procedure have been developed upon Linux Kernel 59 This driver and installation procedure have been developed upon Linux Kernel
31 2.2.5 and backward compatible to 2.0.3x. This driver supports Intel x86 and 60 2.4.x and 2.6.x. This driver supports Intel x86 hardware platform. In order
32 Alpha hardware platform. In order to maintain compatibility, this version 61 to maintain compatibility, this version has also been properly tested with
33 has also been properly tested with RedHat, OpenLinux, TurboLinux and 62 RedHat, Mandrake, Fedora and S.u.S.E Linux. However, if compatibility problem
34 S.u.S.E Linux. However, if compatibility problem occurs, please contact 63 occurs, please contact Moxa at support@moxa.com.tw.
35 Moxa at support@moxa.com.tw.
36 64
37 In addition to device driver, useful utilities are also provided in this 65 In addition to device driver, useful utilities are also provided in this
38 version. They are 66 version. They are
39 - msdiag Diagnostic program for detecting installed Moxa Smartio boards. 67 - msdiag Diagnostic program for displaying installed Moxa
68 Smartio/Industio boards.
40 - msmon Monitor program to observe data count and line status signals. 69 - msmon Monitor program to observe data count and line status signals.
41 - msterm A simple terminal program which is useful in testing serial 70 - msterm A simple terminal program which is useful in testing serial
42 ports. 71 ports.
@@ -47,8 +76,7 @@ Content
47 GNU General Public License in this version. Please refer to GNU General 76 GNU General Public License in this version. Please refer to GNU General
48 Public License announcement in each source code file for more detail. 77 Public License announcement in each source code file for more detail.
49 78
50 In Moxa's ftp sites, you may always find latest driver at 79 In Moxa's Web sites, you may always find latest driver at http://web.moxa.com.
51 ftp://ftp.moxa.com or ftp://ftp.moxa.com.tw.
52 80
53 This version of driver can be installed as Loadable Module (Module driver) 81 This version of driver can be installed as Loadable Module (Module driver)
54 or built-in into kernel (Static driver). You may refer to following 82 or built-in into kernel (Static driver). You may refer to following
@@ -61,8 +89,8 @@ Content
61 89
62----------------------------------------------------------------------------- 90-----------------------------------------------------------------------------
632. System Requirement 912. System Requirement
64 - Hardware platform: Intel x86 or Alpha machine 92 - Hardware platform: Intel x86 machine
65 - Kernel version: 2.0.3x or 2.2.x 93 - Kernel version: 2.4.x or 2.6.x
66 - gcc version 2.72 or later 94 - gcc version 2.72 or later
67 - Maximum 4 boards can be installed in combination 95 - Maximum 4 boards can be installed in combination
68 96
@@ -70,9 +98,18 @@ Content
703. Installation 983. Installation
71 99
72 3.1 Hardware installation 100 3.1 Hardware installation
101 3.2 Driver files
102 3.3 Device naming convention
103 3.4 Module driver configuration
104 3.5 Static driver configuration for Linux kernel 2.4.x, 2.6.x.
105 3.6 Custom configuration
106 3.7 Verify driver installation
107
108
109 3.1 Hardware installation
73 110
74 There are two types of buses, ISA and PCI, for Smartio family multiport 111 There are two types of buses, ISA and PCI, for Smartio/Industio
75 board. 112 family multiport board.
76 113
77 ISA board 114 ISA board
78 --------- 115 ---------
@@ -81,47 +118,57 @@ Content
81 installation procedure in User's Manual before proceed any further. 118 installation procedure in User's Manual before proceed any further.
82 Please make sure the JP1 is open after the ISA board is set properly. 119 Please make sure the JP1 is open after the ISA board is set properly.
83 120
84 PCI board 121 PCI/UPCI board
85 --------- 122 --------------
86 You may need to adjust IRQ usage in BIOS to avoid from IRQ conflict 123 You may need to adjust IRQ usage in BIOS to avoid from IRQ conflict
87 with other ISA devices. Please refer to hardware installation 124 with other ISA devices. Please refer to hardware installation
88 procedure in User's Manual in advance. 125 procedure in User's Manual in advance.
89 126
90 IRQ Sharing 127 PCI IRQ Sharing
91 ----------- 128 -----------
92 Each port within the same multiport board shares the same IRQ. Up to 129 Each port within the same multiport board shares the same IRQ. Up to
93 4 Moxa Smartio Family multiport boards can be installed together on 130 4 Moxa Smartio/Industio PCI Family multiport boards can be installed
94 one system and they can share the same IRQ. 131 together on one system and they can share the same IRQ.
132
95 133
96 3.2 Driver files and device naming convention 134 3.2 Driver files
97 135
98 The driver file may be obtained from ftp, CD-ROM or floppy disk. The 136 The driver file may be obtained from ftp, CD-ROM or floppy disk. The
99 first step, anyway, is to copy driver file "mxser.tgz" into specified 137 first step, anyway, is to copy driver file "mxser.tgz" into specified
100 directory. e.g. /moxa. The execute commands as below. 138 directory. e.g. /moxa. The execute commands as below.
101 139
140 # cd /
141 # mkdir moxa
102 # cd /moxa 142 # cd /moxa
103 # tar xvf /dev/fd0 143 # tar xvf /dev/fd0
144
104 or 145 or
146
147 # cd /
148 # mkdir moxa
105 # cd /moxa 149 # cd /moxa
106 # cp /mnt/cdrom/<driver directory>/mxser.tgz . 150 # cp /mnt/cdrom/<driver directory>/mxser.tgz .
107 # tar xvfz mxser.tgz 151 # tar xvfz mxser.tgz
108 152
153
154 3.3 Device naming convention
155
109 You may find all the driver and utilities files in /moxa/mxser. 156 You may find all the driver and utilities files in /moxa/mxser.
110 Following installation procedure depends on the model you'd like to 157 Following installation procedure depends on the model you'd like to
111 run the driver. If you prefer module driver, please refer to 3.3. 158 run the driver. If you prefer module driver, please refer to 3.4.
112 If static driver is required, please refer to 3.4. 159 If static driver is required, please refer to 3.5.
113 160
114 Dialin and callout port 161 Dialin and callout port
115 ----------------------- 162 -----------------------
116 This driver remains traditional serial device properties. There're 163 This driver remains traditional serial device properties. There are
117 two special file name for each serial port. One is dial-in port 164 two special file name for each serial port. One is dial-in port
118 which is named "ttyMxx". For callout port, the naming convention 165 which is named "ttyMxx". For callout port, the naming convention
119 is "cumxx". 166 is "cumxx".
120 167
121 Device naming when more than 2 boards installed 168 Device naming when more than 2 boards installed
122 ----------------------------------------------- 169 -----------------------------------------------
123 Naming convention for each Smartio multiport board is pre-defined 170 Naming convention for each Smartio/Industio multiport board is
124 as below. 171 pre-defined as below.
125 172
126 Board Num. Dial-in Port Callout port 173 Board Num. Dial-in Port Callout port
127 1st board ttyM0 - ttyM7 cum0 - cum7 174 1st board ttyM0 - ttyM7 cum0 - cum7
@@ -129,6 +176,12 @@ Content
129 3rd board ttyM16 - ttyM23 cum16 - cum23 176 3rd board ttyM16 - ttyM23 cum16 - cum23
130 4th board ttyM24 - ttym31 cum24 - cum31 177 4th board ttyM24 - ttym31 cum24 - cum31
131 178
179
180 !!!!!!!!!!!!!!!!!!!! NOTE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
181 Under Kernel 2.6 the cum Device is Obsolete. So use ttyM*
182 device instead.
183 !!!!!!!!!!!!!!!!!!!! NOTE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
184
132 Board sequence 185 Board sequence
133 -------------- 186 --------------
134 This driver will activate ISA boards according to the parameter set 187 This driver will activate ISA boards according to the parameter set
@@ -138,69 +191,131 @@ Content
138 For PCI boards, their sequence will be after ISA boards and C168H/PCI 191 For PCI boards, their sequence will be after ISA boards and C168H/PCI
139 has higher priority than C104H/PCI boards. 192 has higher priority than C104H/PCI boards.
140 193
141 3.3 Module driver configuration 194 3.4 Module driver configuration
142 Module driver is easiest way to install. If you prefer static driver 195 Module driver is easiest way to install. If you prefer static driver
143 installation, please skip this paragraph. 196 installation, please skip this paragraph.
144 1. Find "Makefile" in /moxa/mxser, then run
145 197
146 # make install 198
199 ------------- Prepare to use the MOXA driver--------------------
200 3.4.1 Create tty device with correct major number
201 Before using MOXA driver, your system must have the tty devices
202 which are created with driver's major number. We offer one shell
203 script "msmknod" to simplify the procedure.
204 This step is only needed to be executed once. But you still
205 need to do this procedure when:
206 a. You change the driver's major number. Please refer the "3.7"
207 section.
208 b. Your total installed MOXA boards number is changed. Maybe you
209 add/delete one MOXA board.
210 c. You want to change the tty name. This needs to modify the
211 shell script "msmknod"
212
213 The procedure is:
214 # cd /moxa/mxser/driver
215 # ./msmknod
216
217 This shell script will require the major number for dial-in
218 device and callout device to create tty device. You also need
219 to specify the total installed MOXA board number. Default major
220 numbers for dial-in device and callout device are 30, 35. If
221 you need to change to other number, please refer section "3.7"
222 for more detailed procedure.
223 Msmknod will delete any special files occupying the same device
224 naming.
225
226 3.4.2 Build the MOXA driver and utilities
227 Before using the MOXA driver and utilities, you need compile the
228 all the source code. This step is only need to be executed once.
229 But you still re-compile the source code if you modify the source
230 code. For example, if you change the driver's major number (see
231 "3.7" section), then you need to do this step again.
232
233 Find "Makefile" in /moxa/mxser, then run
234
235 # make clean; make install
236
237 !!!!!!!!!! NOTE !!!!!!!!!!!!!!!!!
238 For Red Hat 9, Red Hat Enterprise Linux AS3/ES3/WS3 & Fedora Core1:
239 # make clean; make installsp1
240
241 For Red Hat Enterprise Linux AS4/ES4/WS4:
242 # make clean; make installsp2
243 !!!!!!!!!! NOTE !!!!!!!!!!!!!!!!!
147 244
148 The driver files "mxser.o" and utilities will be properly compiled 245 The driver files "mxser.o" and utilities will be properly compiled
149 and copied to system directories respectively.Then run 246 and copied to system directories respectively.
150 247
151 # insmod mxser 248 ------------- Load MOXA driver--------------------
249 3.4.3 Load the MOXA driver
152 250
153 to activate the modular driver. You may run "lsmod" to check 251 # modprobe mxser <argument>
154 if "mxser.o" is activated.
155 252
156 2. Create special files by executing "msmknod". 253 will activate the module driver. You may run "lsmod" to check
157 # cd /moxa/mxser/driver 254 if "mxser" is activated. If the MOXA board is ISA board, the
158 # ./msmknod 255 <argument> is needed. Please refer to section "3.4.5" for more
256 information.
257
258
259 ------------- Load MOXA driver on boot --------------------
260 3.4.4 For the above description, you may manually execute
261 "modprobe mxser" to activate this driver and run
262 "rmmod mxser" to remove it.
263 However, it's better to have a boot time configuration to
264 eliminate manual operation. Boot time configuration can be
265 achieved by rc file. We offer one "rc.mxser" file to simplify
266 the procedure under "moxa/mxser/driver".
159 267
160 Default major numbers for dial-in device and callout device are 268 But if you use ISA board, please modify the "modprobe ..." command
161 174, 175. Msmknod will delete any special files occupying the same 269 to add the argument (see "3.4.5" section). After modifying the
162 device naming. 270 rc.mxser, please try to execute "/moxa/mxser/driver/rc.mxser"
271 manually to make sure the modification is ok. If any error
272 encountered, please try to modify again. If the modification is
273 completed, follow the below step.
163 274
164 3. Up to now, you may manually execute "insmod mxser" to activate 275 Run following command for setting rc files.
165 this driver and run "rmmod mxser" to remove it. However, it's
166 better to have a boot time configuration to eliminate manual
167 operation.
168 Boot time configuration can be achieved by rc file. Run following
169 command for setting rc files.
170 276
171 # cd /moxa/mxser/driver 277 # cd /moxa/mxser/driver
172 # cp ./rc.mxser /etc/rc.d 278 # cp ./rc.mxser /etc/rc.d
173 # cd /etc/rc.d 279 # cd /etc/rc.d
174 280
175 You may have to modify part of the content in rc.mxser to specify 281 Check "rc.serial" is existed or not. If "rc.serial" doesn't exist,
176 parameters for ISA board. Please refer to rc.mxser for more detail. 282 create it by vi, run "chmod 755 rc.serial" to change the permission.
177 Find "rc.serial". If "rc.serial" doesn't exist, create it by vi. 283 Add "/etc/rc.d/rc.mxser" in last line,
178 Add "rc.mxser" in last line. Next, open rc.local by vi
179 and append following content.
180 284
181 if [ -f /etc/rc.d/rc.serial ]; then 285 Reboot and check if moxa.o activated by "lsmod" command.
182 sh /etc/rc.d/rc.serial
183 fi
184 286
185 4. Reboot and check if mxser.o activated by "lsmod" command. 287 3.4.5. If you'd like to drive Smartio/Industio ISA boards in the system,
186 5. If you'd like to drive Smartio ISA boards in the system, you'll 288 you'll have to add parameter to specify CAP address of given
187 have to add parameter to specify CAP address of given board while 289 board while activating "mxser.o". The format for parameters are
188 activating "mxser.o". The format for parameters are as follows. 290 as follows.
189 291
190 insmod mxser ioaddr=0x???,0x???,0x???,0x??? 292 modprobe mxser ioaddr=0x???,0x???,0x???,0x???
191 | | | | 293 | | | |
192 | | | +- 4th ISA board 294 | | | +- 4th ISA board
193 | | +------ 3rd ISA board 295 | | +------ 3rd ISA board
194 | +------------ 2nd ISA board 296 | +------------ 2nd ISA board
195 +------------------- 1st ISA board 297 +------------------- 1st ISA board
196 298
197 3.4 Static driver configuration 299 3.5 Static driver configuration for Linux kernel 2.4.x and 2.6.x
300
301 Note: To use static driver, you must install the linux kernel
302 source package.
303
304 3.5.1 Backup the built-in driver in the kernel.
305 # cd /usr/src/linux/drivers/char
306 # mv mxser.c mxser.c.old
307
308 For Red Hat 7.x user, you need to create link:
309 # cd /usr/src
310 # ln -s linux-2.4 linux
198 311
199 1. Create link 312 3.5.2 Create link
200 # cd /usr/src/linux/drivers/char 313 # cd /usr/src/linux/drivers/char
201 # ln -s /moxa/mxser/driver/mxser.c mxser.c 314 # ln -s /moxa/mxser/driver/mxser.c mxser.c
202 315
203 2. Add CAP address list for ISA boards 316 3.5.3 Add CAP address list for ISA boards. For PCI boards user,
317 please skip this step.
318
204 In module mode, the CAP address for ISA board is given by 319 In module mode, the CAP address for ISA board is given by
205 parameter. In static driver configuration, you'll have to 320 parameter. In static driver configuration, you'll have to
206 assign it within driver's source code. If you will not 321 assign it within driver's source code. If you will not
@@ -222,73 +337,55 @@ Content
222 static int mxserBoardCAP[] 337 static int mxserBoardCAP[]
223 = {0x280, 0x180, 0x00, 0x00}; 338 = {0x280, 0x180, 0x00, 0x00};
224 339
225 3. Modify tty_io.c 340 3.5.4 Setup kernel configuration
226 # cd /usr/src/linux/drivers/char/
227 # vi tty_io.c
228 Find pty_init(), insert "mxser_init()" as
229 341
230 pty_init(); 342 Configure the kernel:
231 mxser_init();
232 343
233 4. Modify tty.h 344 # cd /usr/src/linux
234 # cd /usr/src/linux/include/linux 345 # make menuconfig
235 # vi tty.h
236 Find extern int tty_init(void), insert "mxser_init()" as
237 346
238 extern int tty_init(void); 347 You will go into a menu-driven system. Please select [Character
239 extern int mxser_init(void); 348 devices][Non-standard serial port support], enable the [Moxa
240 349 SmartIO support] driver with "[*]" for built-in (not "[M]"), then
241 5. Modify Makefile 350 select [Exit] to exit this program.
242 # cd /usr/src/linux/drivers/char
243 # vi Makefile
244 Find L_OBJS := tty_io.o ...... random.o, add
245 "mxser.o" at last of this line as
246 L_OBJS := tty_io.o ....... mxser.o
247 351
248 6. Rebuild kernel 352 3.5.5 Rebuild kernel
249 The following are for Linux kernel rebuilding,for your reference only. 353 The following are for Linux kernel rebuilding, for your
354 reference only.
250 For appropriate details, please refer to the Linux document. 355 For appropriate details, please refer to the Linux document.
251 356
252 If 'lilo' utility is installed, please use 'make zlilo' to rebuild
253 kernel. If 'lilo' is not installed, please follow the following steps.
254
255 a. cd /usr/src/linux 357 a. cd /usr/src/linux
256 b. make clean /* take a few minutes */ 358 b. make clean /* take a few minutes */
257 c. make bzImage /* take probably 10-20 minutes */ 359 c. make dep /* take a few minutes */
258 d. Backup original boot kernel. /* optional step */ 360 d. make bzImage /* take probably 10-20 minutes */
259 e. cp /usr/src/linux/arch/i386/boot/bzImage /boot/vmlinuz 361 e. make install /* copy boot image to correct position */
260 f. Please make sure the boot kernel (vmlinuz) is in the 362 f. Please make sure the boot kernel (vmlinuz) is in the
261 correct position. If you use 'lilo' utility, you should 363 correct position.
262 check /etc/lilo.conf 'image' item specified the path 364 g. If you use 'lilo' utility, you should check /etc/lilo.conf
263 which is the 'vmlinuz' path, or you will load wrong 365 'image' item specified the path which is the 'vmlinuz' path,
264 (or old) boot kernel image (vmlinuz). 366 or you will load wrong (or old) boot kernel image (vmlinuz).
265 g. chmod 400 /vmlinuz 367 After checking /etc/lilo.conf, please run "lilo".
266 h. lilo 368
267 i. rdev -R /vmlinuz 1 369 Note that if the result of "make bzImage" is ERROR, then you have to
268 j. sync 370 go back to Linux configuration Setup. Type "make menuconfig" in
269 371 directory /usr/src/linux.
270 Note that if the result of "make zImage" is ERROR, then you have to 372
271 go back to Linux configuration Setup. Type "make config" in directory 373
272 /usr/src/linux or "setup". 374 3.5.6 Make tty device and special file
273
274 Since system include file, /usr/src/linux/include/linux/interrupt.h,
275 is modified each time the MOXA driver is installed, kernel rebuilding
276 is inevitable. And it takes about 10 to 20 minutes depends on the
277 machine.
278
279 7. Make utility
280 # cd /moxa/mxser/utility
281 # make install
282
283 8. Make special file
284 # cd /moxa/mxser/driver 375 # cd /moxa/mxser/driver
285 # ./msmknod 376 # ./msmknod
286 377
287 9. Reboot 378 3.5.7 Make utility
379 # cd /moxa/mxser/utility
380 # make clean; make install
381
382 3.5.8 Reboot
288 383
289 3.5 Custom configuration 384
385
386 3.6 Custom configuration
290 Although this driver already provides you default configuration, you 387 Although this driver already provides you default configuration, you
291 still can change the device name and major number.The instruction to 388 still can change the device name and major number. The instruction to
292 change these parameters are shown as below. 389 change these parameters are shown as below.
293 390
294 Change Device name 391 Change Device name
@@ -306,33 +403,37 @@ Content
306 2 free major numbers for this driver. There are 3 steps to change 403 2 free major numbers for this driver. There are 3 steps to change
307 major numbers. 404 major numbers.
308 405
309 1. Find free major numbers 406 3.6.1 Find free major numbers
310 In /proc/devices, you may find all the major numbers occupied 407 In /proc/devices, you may find all the major numbers occupied
311 in the system. Please select 2 major numbers that are available. 408 in the system. Please select 2 major numbers that are available.
312 e.g. 40, 45. 409 e.g. 40, 45.
313 2. Create special files 410 3.6.2 Create special files
314 Run /moxa/mxser/driver/msmknod to create special files with 411 Run /moxa/mxser/driver/msmknod to create special files with
315 specified major numbers. 412 specified major numbers.
316 3. Modify driver with new major number 413 3.6.3 Modify driver with new major number
317 Run vi to open /moxa/mxser/driver/mxser.c. Locate the line 414 Run vi to open /moxa/mxser/driver/mxser.c. Locate the line
318 contains "MXSERMAJOR". Change the content as below. 415 contains "MXSERMAJOR". Change the content as below.
319 #define MXSERMAJOR 40 416 #define MXSERMAJOR 40
320 #define MXSERCUMAJOR 45 417 #define MXSERCUMAJOR 45
321 4. Run # make install in /moxa/mxser/driver. 418 3.6.4 Run "make clean; make install" in /moxa/mxser/driver.
322 419
323 3.6 Verify driver installation 420 3.7 Verify driver installation
324 You may refer to /var/log/messages to check the latest status 421 You may refer to /var/log/messages to check the latest status
325 log reported by this driver whenever it's activated. 422 log reported by this driver whenever it's activated.
423
326----------------------------------------------------------------------------- 424-----------------------------------------------------------------------------
3274. Utilities 4254. Utilities
328 There are 3 utilities contained in this driver. They are msdiag, msmon and 426 There are 3 utilities contained in this driver. They are msdiag, msmon and
329 msterm. These 3 utilities are released in form of source code. They should 427 msterm. These 3 utilities are released in form of source code. They should
330 be compiled into executable file and copied into /usr/bin. 428 be compiled into executable file and copied into /usr/bin.
331 429
430 Before using these utilities, please load driver (refer 3.4 & 3.5) and
431 make sure you had run the "msmknod" utility.
432
332 msdiag - Diagnostic 433 msdiag - Diagnostic
333 -------------------- 434 --------------------
334 This utility provides the function to detect what Moxa Smartio multiport 435 This utility provides the function to display what Moxa Smartio/Industio
335 board exists in the system. 436 board found by driver in the system.
336 437
337 msmon - Port Monitoring 438 msmon - Port Monitoring
338 ----------------------- 439 -----------------------
@@ -353,12 +454,13 @@ Content
353 application, for example, sending AT command to a modem connected to the 454 application, for example, sending AT command to a modem connected to the
354 port or used as a terminal for login purpose. Note that this is only a 455 port or used as a terminal for login purpose. Note that this is only a
355 dumb terminal emulation without handling full screen operation. 456 dumb terminal emulation without handling full screen operation.
457
356----------------------------------------------------------------------------- 458-----------------------------------------------------------------------------
3575. Setserial 4595. Setserial
358 460
359 Supported Setserial parameters are listed as below. 461 Supported Setserial parameters are listed as below.
360 462
361 uart set UART type(16450-->disable FIFO, 16550A-->enable FIFO) 463 uart set UART type(16450-->disable FIFO, 16550A-->enable FIFO)
362 close_delay set the amount of time(in 1/100 of a second) that DTR 464 close_delay set the amount of time(in 1/100 of a second) that DTR
363 should be kept low while being closed. 465 should be kept low while being closed.
364 closing_wait set the amount of time(in 1/100 of a second) that the 466 closing_wait set the amount of time(in 1/100 of a second) that the
@@ -366,7 +468,13 @@ Content
366 being closed, before the receiver is disable. 468 being closed, before the receiver is disable.
367 spd_hi Use 57.6kb when the application requests 38.4kb. 469 spd_hi Use 57.6kb when the application requests 38.4kb.
368 spd_vhi Use 115.2kb when the application requests 38.4kb. 470 spd_vhi Use 115.2kb when the application requests 38.4kb.
471 spd_shi Use 230.4kb when the application requests 38.4kb.
472 spd_warp Use 460.8kb when the application requests 38.4kb.
369 spd_normal Use 38.4kb when the application requests 38.4kb. 473 spd_normal Use 38.4kb when the application requests 38.4kb.
474 spd_cust Use the custom divisor to set the speed when the
475 application requests 38.4kb.
476 divisor This option set the custom divison.
477 baud_base This option set the base baud rate.
370 478
371----------------------------------------------------------------------------- 479-----------------------------------------------------------------------------
3726. Troubleshooting 4806. Troubleshooting
@@ -375,8 +483,9 @@ Content
375 possible. If all the possible solutions fail, please contact our technical 483 possible. If all the possible solutions fail, please contact our technical
376 support team to get more help. 484 support team to get more help.
377 485
378 Error msg: More than 4 Moxa Smartio family boards found. Fifth board and 486
379 after are ignored. 487 Error msg: More than 4 Moxa Smartio/Industio family boards found. Fifth board
488 and after are ignored.
380 Solution: 489 Solution:
381 To avoid this problem, please unplug fifth and after board, because Moxa 490 To avoid this problem, please unplug fifth and after board, because Moxa
382 driver supports up to 4 boards. 491 driver supports up to 4 boards.
@@ -384,7 +493,7 @@ Content
384 Error msg: Request_irq fail, IRQ(?) may be conflict with another device. 493 Error msg: Request_irq fail, IRQ(?) may be conflict with another device.
385 Solution: 494 Solution:
386 Other PCI or ISA devices occupy the assigned IRQ. If you are not sure 495 Other PCI or ISA devices occupy the assigned IRQ. If you are not sure
387 which device causes the situation,please check /proc/interrupts to find 496 which device causes the situation, please check /proc/interrupts to find
388 free IRQ and simply change another free IRQ for Moxa board. 497 free IRQ and simply change another free IRQ for Moxa board.
389 498
390 Error msg: Board #: C1xx Series(CAP=xxx) interrupt number invalid. 499 Error msg: Board #: C1xx Series(CAP=xxx) interrupt number invalid.
@@ -397,15 +506,18 @@ Content
397 Moxa ISA board needs an interrupt vector.Please refer to user's manual 506 Moxa ISA board needs an interrupt vector.Please refer to user's manual
398 "Hardware Installation" chapter to set interrupt vector. 507 "Hardware Installation" chapter to set interrupt vector.
399 508
400 Error msg: Couldn't install MOXA Smartio family driver! 509 Error msg: Couldn't install MOXA Smartio/Industio family driver!
401 Solution: 510 Solution:
402 Load Moxa driver fail, the major number may conflict with other devices. 511 Load Moxa driver fail, the major number may conflict with other devices.
403 Please refer to previous section 3.5 to change a free major number for 512 Please refer to previous section 3.7 to change a free major number for
404 Moxa driver. 513 Moxa driver.
405 514
406 Error msg: Couldn't install MOXA Smartio family callout driver! 515 Error msg: Couldn't install MOXA Smartio/Industio family callout driver!
407 Solution: 516 Solution:
408 Load Moxa callout driver fail, the callout device major number may 517 Load Moxa callout driver fail, the callout device major number may
409 conflict with other devices. Please refer to previous section 3.5 to 518 conflict with other devices. Please refer to previous section 3.7 to
410 change a free callout device major number for Moxa driver. 519 change a free callout device major number for Moxa driver.
520
521
411----------------------------------------------------------------------------- 522-----------------------------------------------------------------------------
523
diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt
index ea1b70b35793..99514ced82c5 100644
--- a/Documentation/powerpc/booting-without-of.txt
+++ b/Documentation/powerpc/booting-without-of.txt
@@ -59,6 +59,7 @@ Table of Contents
59 p) Freescale Synchronous Serial Interface 59 p) Freescale Synchronous Serial Interface
60 q) USB EHCI controllers 60 q) USB EHCI controllers
61 r) MDIO on GPIOs 61 r) MDIO on GPIOs
62 s) SPI busses
62 63
63 VII - Marvell Discovery mv64[345]6x System Controller chips 64 VII - Marvell Discovery mv64[345]6x System Controller chips
64 1) The /system-controller node 65 1) The /system-controller node
@@ -1883,6 +1884,62 @@ platforms are moved over to use the flattened-device-tree model.
1883 &qe_pio_c 6>; 1884 &qe_pio_c 6>;
1884 }; 1885 };
1885 1886
1887 s) SPI (Serial Peripheral Interface) busses
1888
1889 SPI busses can be described with a node for the SPI master device
1890 and a set of child nodes for each SPI slave on the bus. For this
1891 discussion, it is assumed that the system's SPI controller is in
1892 SPI master mode. This binding does not describe SPI controllers
1893 in slave mode.
1894
1895 The SPI master node requires the following properties:
1896 - #address-cells - number of cells required to define a chip select
1897 address on the SPI bus.
1898 - #size-cells - should be zero.
1899 - compatible - name of SPI bus controller following generic names
1900 recommended practice.
1901 No other properties are required in the SPI bus node. It is assumed
1902 that a driver for an SPI bus device will understand that it is an SPI bus.
1903 However, the binding does not attempt to define the specific method for
1904 assigning chip select numbers. Since SPI chip select configuration is
1905 flexible and non-standardized, it is left out of this binding with the
1906 assumption that board specific platform code will be used to manage
1907 chip selects. Individual drivers can define additional properties to
1908 support describing the chip select layout.
1909
1910 SPI slave nodes must be children of the SPI master node and can
1911 contain the following properties.
1912 - reg - (required) chip select address of device.
1913 - compatible - (required) name of SPI device following generic names
1914 recommended practice
1915 - spi-max-frequency - (required) Maximum SPI clocking speed of device in Hz
1916 - spi-cpol - (optional) Empty property indicating device requires
1917 inverse clock polarity (CPOL) mode
1918 - spi-cpha - (optional) Empty property indicating device requires
1919 shifted clock phase (CPHA) mode
1920
1921 SPI example for an MPC5200 SPI bus:
1922 spi@f00 {
1923 #address-cells = <1>;
1924 #size-cells = <0>;
1925 compatible = "fsl,mpc5200b-spi","fsl,mpc5200-spi";
1926 reg = <0xf00 0x20>;
1927 interrupts = <2 13 0 2 14 0>;
1928 interrupt-parent = <&mpc5200_pic>;
1929
1930 ethernet-switch@0 {
1931 compatible = "micrel,ks8995m";
1932 spi-max-frequency = <1000000>;
1933 reg = <0>;
1934 };
1935
1936 codec@1 {
1937 compatible = "ti,tlv320aic26";
1938 spi-max-frequency = <100000>;
1939 reg = <1>;
1940 };
1941 };
1942
1886VII - Marvell Discovery mv64[345]6x System Controller chips 1943VII - Marvell Discovery mv64[345]6x System Controller chips
1887=========================================================== 1944===========================================================
1888 1945
diff --git a/Documentation/unaligned-memory-access.txt b/Documentation/unaligned-memory-access.txt
index b0472ac5226a..f866c72291bf 100644
--- a/Documentation/unaligned-memory-access.txt
+++ b/Documentation/unaligned-memory-access.txt
@@ -218,9 +218,35 @@ If use of such macros is not convenient, another option is to use memcpy(),
218where the source or destination (or both) are of type u8* or unsigned char*. 218where the source or destination (or both) are of type u8* or unsigned char*.
219Due to the byte-wise nature of this operation, unaligned accesses are avoided. 219Due to the byte-wise nature of this operation, unaligned accesses are avoided.
220 220
221
222Alignment vs. Networking
223========================
224
225On architectures that require aligned loads, networking requires that the IP
226header is aligned on a four-byte boundary to optimise the IP stack. For
227regular ethernet hardware, the constant NET_IP_ALIGN is used. On most
228architectures this constant has the value 2 because the normal ethernet
229header is 14 bytes long, so in order to get proper alignment one needs to
230DMA to an address which can be expressed as 4*n + 2. One notable exception
231here is powerpc which defines NET_IP_ALIGN to 0 because DMA to unaligned
232addresses can be very expensive and dwarf the cost of unaligned loads.
233
234For some ethernet hardware that cannot DMA to unaligned addresses like
2354*n+2 or non-ethernet hardware, this can be a problem, and it is then
236required to copy the incoming frame into an aligned buffer. Because this is
237unnecessary on architectures that can do unaligned accesses, the code can be
238made dependent on CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS like so:
239
240#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
241 skb = original skb
242#else
243 skb = copy skb
244#endif
245
221-- 246--
222Author: Daniel Drake <dsd@gentoo.org> 247Authors: Daniel Drake <dsd@gentoo.org>,
248 Johannes Berg <johannes@sipsolutions.net>
223With help from: Alan Cox, Avuton Olrich, Heikki Orsila, Jan Engelhardt, 249With help from: Alan Cox, Avuton Olrich, Heikki Orsila, Jan Engelhardt,
224Johannes Berg, Kyle McMartin, Kyle Moffett, Randy Dunlap, Robert Hancock, 250Kyle McMartin, Kyle Moffett, Randy Dunlap, Robert Hancock, Uli Kunitz,
225Uli Kunitz, Vadim Lobanov 251Vadim Lobanov
226 252
diff --git a/MAINTAINERS b/MAINTAINERS
index be05ef9b7b42..4cbf6016a9b9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1043,6 +1043,12 @@ M: fujita.tomonori@lab.ntt.co.jp
1043L: linux-scsi@vger.kernel.org 1043L: linux-scsi@vger.kernel.org
1044S: Supported 1044S: Supported
1045 1045
1046BT8XXGPIO DRIVER
1047P: Michael Buesch
1048M: mb@bu3sch.de
1049W: http://bu3sch.de/btgpio.php
1050S: Maintained
1051
1046BTTV VIDEO4LINUX DRIVER 1052BTTV VIDEO4LINUX DRIVER
1047P: Mauro Carvalho Chehab 1053P: Mauro Carvalho Chehab
1048M: mchehab@infradead.org 1054M: mchehab@infradead.org
diff --git a/arch/Kconfig b/arch/Kconfig
index 6093c0be58b0..b0fabfa864ff 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -27,6 +27,25 @@ config KPROBES
27 for kernel debugging, non-intrusive instrumentation and testing. 27 for kernel debugging, non-intrusive instrumentation and testing.
28 If in doubt, say "N". 28 If in doubt, say "N".
29 29
30config HAVE_EFFICIENT_UNALIGNED_ACCESS
31 def_bool n
32 help
33 Some architectures are unable to perform unaligned accesses
34 without the use of get_unaligned/put_unaligned. Others are
35 unable to perform such accesses efficiently (e.g. trap on
36 unaligned access and require fixing it up in the exception
37 handler.)
38
39 This symbol should be selected by an architecture if it can
40 perform unaligned accesses efficiently to allow different
41 code paths to be selected for these cases. Some network
42 drivers, for example, could opt to not fix up alignment
43 problems with received packets if doing so would not help
44 much.
45
46 See Documentation/unaligned-memory-access.txt for more
47 information on the topic of unaligned memory accesses.
48
30config KRETPROBES 49config KRETPROBES
31 def_bool y 50 def_bool y
32 depends on KPROBES && HAVE_KRETPROBES 51 depends on KPROBES && HAVE_KRETPROBES
diff --git a/arch/alpha/boot/misc.c b/arch/alpha/boot/misc.c
index c00646b25f6e..3047a1b3a517 100644
--- a/arch/alpha/boot/misc.c
+++ b/arch/alpha/boot/misc.c
@@ -78,8 +78,6 @@ static unsigned outcnt; /* bytes in output buffer */
78static int fill_inbuf(void); 78static int fill_inbuf(void);
79static void flush_window(void); 79static void flush_window(void);
80static void error(char *m); 80static void error(char *m);
81static void gzip_mark(void **);
82static void gzip_release(void **);
83 81
84static char *input_data; 82static char *input_data;
85static int input_data_size; 83static int input_data_size;
@@ -88,51 +86,18 @@ static uch *output_data;
88static ulg output_ptr; 86static ulg output_ptr;
89static ulg bytes_out; 87static ulg bytes_out;
90 88
91static void *malloc(int size);
92static void free(void *where);
93static void error(char *m); 89static void error(char *m);
94static void gzip_mark(void **); 90static void gzip_mark(void **);
95static void gzip_release(void **); 91static void gzip_release(void **);
96 92
97extern int end; 93extern int end;
98static ulg free_mem_ptr; 94static ulg free_mem_ptr;
99static ulg free_mem_ptr_end; 95static ulg free_mem_end_ptr;
100 96
101#define HEAP_SIZE 0x3000 97#define HEAP_SIZE 0x3000
102 98
103#include "../../../lib/inflate.c" 99#include "../../../lib/inflate.c"
104 100
105static void *malloc(int size)
106{
107 void *p;
108
109 if (size <0) error("Malloc error");
110 if (free_mem_ptr <= 0) error("Memory error");
111
112 free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
113
114 p = (void *)free_mem_ptr;
115 free_mem_ptr += size;
116
117 if (free_mem_ptr >= free_mem_ptr_end)
118 error("Out of memory");
119 return p;
120}
121
122static void free(void *where)
123{ /* gzip_mark & gzip_release do the free */
124}
125
126static void gzip_mark(void **ptr)
127{
128 *ptr = (void *) free_mem_ptr;
129}
130
131static void gzip_release(void **ptr)
132{
133 free_mem_ptr = (long) *ptr;
134}
135
136/* =========================================================================== 101/* ===========================================================================
137 * Fill the input buffer. This is called only when the buffer is empty 102 * Fill the input buffer. This is called only when the buffer is empty
138 * and at least one byte is really needed. 103 * and at least one byte is really needed.
@@ -193,7 +158,7 @@ decompress_kernel(void *output_start,
193 158
194 /* FIXME FIXME FIXME */ 159 /* FIXME FIXME FIXME */
195 free_mem_ptr = (ulg)output_start + ksize; 160 free_mem_ptr = (ulg)output_start + ksize;
196 free_mem_ptr_end = (ulg)output_start + ksize + 0x200000; 161 free_mem_end_ptr = (ulg)output_start + ksize + 0x200000;
197 /* FIXME FIXME FIXME */ 162 /* FIXME FIXME FIXME */
198 163
199 /* put in temp area to reduce initial footprint */ 164 /* put in temp area to reduce initial footprint */
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 6fb4f03369f2..dabb015aa40b 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -268,7 +268,7 @@ config ARCH_EP93XX
268 select GENERIC_GPIO 268 select GENERIC_GPIO
269 select HAVE_CLK 269 select HAVE_CLK
270 select HAVE_CLK 270 select HAVE_CLK
271 select HAVE_GPIO_LIB 271 select ARCH_REQUIRE_GPIOLIB
272 help 272 help
273 This enables support for the Cirrus EP93xx series of CPUs. 273 This enables support for the Cirrus EP93xx series of CPUs.
274 274
@@ -447,7 +447,7 @@ config ARCH_PXA
447 select ARCH_MTD_XIP 447 select ARCH_MTD_XIP
448 select GENERIC_GPIO 448 select GENERIC_GPIO
449 select HAVE_CLK 449 select HAVE_CLK
450 select HAVE_GPIO_LIB 450 select ARCH_REQUIRE_GPIOLIB
451 select GENERIC_TIME 451 select GENERIC_TIME
452 select GENERIC_CLOCKEVENTS 452 select GENERIC_CLOCKEVENTS
453 select TICK_ONESHOT 453 select TICK_ONESHOT
@@ -479,7 +479,7 @@ config ARCH_SA1100
479 select GENERIC_CLOCKEVENTS 479 select GENERIC_CLOCKEVENTS
480 select HAVE_CLK 480 select HAVE_CLK
481 select TICK_ONESHOT 481 select TICK_ONESHOT
482 select HAVE_GPIO_LIB 482 select ARCH_REQUIRE_GPIOLIB
483 help 483 help
484 Support for StrongARM 11x0 based boards. 484 Support for StrongARM 11x0 based boards.
485 485
@@ -522,7 +522,7 @@ config ARCH_OMAP
522 bool "TI OMAP" 522 bool "TI OMAP"
523 select GENERIC_GPIO 523 select GENERIC_GPIO
524 select HAVE_CLK 524 select HAVE_CLK
525 select HAVE_GPIO_LIB 525 select ARCH_REQUIRE_GPIOLIB
526 select GENERIC_TIME 526 select GENERIC_TIME
527 select GENERIC_CLOCKEVENTS 527 select GENERIC_CLOCKEVENTS
528 help 528 help
diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c
index 9b444022cb9b..7145cc7c04f0 100644
--- a/arch/arm/boot/compressed/misc.c
+++ b/arch/arm/boot/compressed/misc.c
@@ -217,8 +217,6 @@ static unsigned outcnt; /* bytes in output buffer */
217static int fill_inbuf(void); 217static int fill_inbuf(void);
218static void flush_window(void); 218static void flush_window(void);
219static void error(char *m); 219static void error(char *m);
220static void gzip_mark(void **);
221static void gzip_release(void **);
222 220
223extern char input_data[]; 221extern char input_data[];
224extern char input_data_end[]; 222extern char input_data_end[];
@@ -227,64 +225,21 @@ static uch *output_data;
227static ulg output_ptr; 225static ulg output_ptr;
228static ulg bytes_out; 226static ulg bytes_out;
229 227
230static void *malloc(int size);
231static void free(void *where);
232static void error(char *m); 228static void error(char *m);
233static void gzip_mark(void **);
234static void gzip_release(void **);
235 229
236static void putstr(const char *); 230static void putstr(const char *);
237 231
238extern int end; 232extern int end;
239static ulg free_mem_ptr; 233static ulg free_mem_ptr;
240static ulg free_mem_ptr_end; 234static ulg free_mem_end_ptr;
241 235
242#define HEAP_SIZE 0x3000 236#ifdef STANDALONE_DEBUG
243 237#define NO_INFLATE_MALLOC
244#include "../../../../lib/inflate.c" 238#endif
245
246#ifndef STANDALONE_DEBUG
247static void *malloc(int size)
248{
249 void *p;
250
251 if (size <0) error("Malloc error");
252 if (free_mem_ptr <= 0) error("Memory error");
253
254 free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
255
256 p = (void *)free_mem_ptr;
257 free_mem_ptr += size;
258
259 if (free_mem_ptr >= free_mem_ptr_end)
260 error("Out of memory");
261 return p;
262}
263
264static void free(void *where)
265{ /* gzip_mark & gzip_release do the free */
266}
267
268static void gzip_mark(void **ptr)
269{
270 arch_decomp_wdog();
271 *ptr = (void *) free_mem_ptr;
272}
273 239
274static void gzip_release(void **ptr) 240#define ARCH_HAS_DECOMP_WDOG
275{
276 arch_decomp_wdog();
277 free_mem_ptr = (long) *ptr;
278}
279#else
280static void gzip_mark(void **ptr)
281{
282}
283 241
284static void gzip_release(void **ptr) 242#include "../../../../lib/inflate.c"
285{
286}
287#endif
288 243
289/* =========================================================================== 244/* ===========================================================================
290 * Fill the input buffer. This is called only when the buffer is empty 245 * Fill the input buffer. This is called only when the buffer is empty
@@ -348,7 +303,7 @@ decompress_kernel(ulg output_start, ulg free_mem_ptr_p, ulg free_mem_ptr_end_p,
348{ 303{
349 output_data = (uch *)output_start; /* Points to kernel start */ 304 output_data = (uch *)output_start; /* Points to kernel start */
350 free_mem_ptr = free_mem_ptr_p; 305 free_mem_ptr = free_mem_ptr_p;
351 free_mem_ptr_end = free_mem_ptr_end_p; 306 free_mem_end_ptr = free_mem_ptr_end_p;
352 __machine_arch_type = arch_id; 307 __machine_arch_type = arch_id;
353 308
354 arch_decomp_setup(); 309 arch_decomp_setup();
diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c
index 5ee39e10c8d1..d28513f14d05 100644
--- a/arch/arm/kernel/kprobes.c
+++ b/arch/arm/kernel/kprobes.c
@@ -296,8 +296,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
296 unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; 296 unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
297 297
298 INIT_HLIST_HEAD(&empty_rp); 298 INIT_HLIST_HEAD(&empty_rp);
299 spin_lock_irqsave(&kretprobe_lock, flags); 299 kretprobe_hash_lock(current, &head, &flags);
300 head = kretprobe_inst_table_head(current);
301 300
302 /* 301 /*
303 * It is possible to have multiple instances associated with a given 302 * It is possible to have multiple instances associated with a given
@@ -337,7 +336,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
337 } 336 }
338 337
339 kretprobe_assert(ri, orig_ret_address, trampoline_address); 338 kretprobe_assert(ri, orig_ret_address, trampoline_address);
340 spin_unlock_irqrestore(&kretprobe_lock, flags); 339 kretprobe_hash_unlock(current, &flags);
341 340
342 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 341 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
343 hlist_del(&ri->hlist); 342 hlist_del(&ri->hlist);
@@ -347,7 +346,6 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
347 return (void *)orig_ret_address; 346 return (void *)orig_ret_address;
348} 347}
349 348
350/* Called with kretprobe_lock held. */
351void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 349void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
352 struct pt_regs *regs) 350 struct pt_regs *regs)
353{ 351{
diff --git a/arch/arm/plat-omap/gpio.c b/arch/arm/plat-omap/gpio.c
index 1903a3491ee9..d8e9c2c3f0f6 100644
--- a/arch/arm/plat-omap/gpio.c
+++ b/arch/arm/plat-omap/gpio.c
@@ -1488,6 +1488,9 @@ static int __init _omap_gpio_init(void)
1488 bank->chip.set = gpio_set; 1488 bank->chip.set = gpio_set;
1489 if (bank_is_mpuio(bank)) { 1489 if (bank_is_mpuio(bank)) {
1490 bank->chip.label = "mpuio"; 1490 bank->chip.label = "mpuio";
1491#ifdef CONFIG_ARCH_OMAP1
1492 bank->chip.dev = &omap_mpuio_device.dev;
1493#endif
1491 bank->chip.base = OMAP_MPUIO(0); 1494 bank->chip.base = OMAP_MPUIO(0);
1492 } else { 1495 } else {
1493 bank->chip.label = "gpio"; 1496 bank->chip.label = "gpio";
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
index df4adefedb42..7c239a916275 100644
--- a/arch/avr32/Kconfig
+++ b/arch/avr32/Kconfig
@@ -88,7 +88,7 @@ config PLATFORM_AT32AP
88 select SUBARCH_AVR32B 88 select SUBARCH_AVR32B
89 select MMU 89 select MMU
90 select PERFORMANCE_COUNTERS 90 select PERFORMANCE_COUNTERS
91 select HAVE_GPIO_LIB 91 select ARCH_REQUIRE_GPIOLIB
92 select GENERIC_ALLOCATOR 92 select GENERIC_ALLOCATOR
93 93
94# 94#
diff --git a/arch/avr32/mach-at32ap/pio.c b/arch/avr32/mach-at32ap/pio.c
index 60da03ba7117..296294f8ed81 100644
--- a/arch/avr32/mach-at32ap/pio.c
+++ b/arch/avr32/mach-at32ap/pio.c
@@ -360,6 +360,8 @@ static int __init pio_probe(struct platform_device *pdev)
360 pio->chip.label = pio->name; 360 pio->chip.label = pio->name;
361 pio->chip.base = pdev->id * 32; 361 pio->chip.base = pdev->id * 32;
362 pio->chip.ngpio = 32; 362 pio->chip.ngpio = 32;
363 pio->chip.dev = &pdev->dev;
364 pio->chip.owner = THIS_MODULE;
363 365
364 pio->chip.direction_input = direction_input; 366 pio->chip.direction_input = direction_input;
365 pio->chip.get = gpio_get; 367 pio->chip.get = gpio_get;
diff --git a/arch/cris/arch-v10/boot/compressed/misc.c b/arch/cris/arch-v10/boot/compressed/misc.c
index 18e13bce1400..d933c89889db 100644
--- a/arch/cris/arch-v10/boot/compressed/misc.c
+++ b/arch/cris/arch-v10/boot/compressed/misc.c
@@ -102,50 +102,16 @@ extern char *input_data; /* lives in head.S */
102static long bytes_out = 0; 102static long bytes_out = 0;
103static uch *output_data; 103static uch *output_data;
104static unsigned long output_ptr = 0; 104static unsigned long output_ptr = 0;
105
106static void *malloc(int size);
107static void free(void *where);
108static void gzip_mark(void **);
109static void gzip_release(void **);
110
111static void puts(const char *); 105static void puts(const char *);
112 106
113/* the "heap" is put directly after the BSS ends, at end */ 107/* the "heap" is put directly after the BSS ends, at end */
114 108
115extern int _end; 109extern int _end;
116static long free_mem_ptr = (long)&_end; 110static long free_mem_ptr = (long)&_end;
111static long free_mem_end_ptr;
117 112
118#include "../../../../../lib/inflate.c" 113#include "../../../../../lib/inflate.c"
119 114
120static void *malloc(int size)
121{
122 void *p;
123
124 if (size < 0)
125 error("Malloc error");
126
127 free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
128
129 p = (void *)free_mem_ptr;
130 free_mem_ptr += size;
131
132 return p;
133}
134
135static void free(void *where)
136{ /* Don't care */
137}
138
139static void gzip_mark(void **ptr)
140{
141 *ptr = (void *) free_mem_ptr;
142}
143
144static void gzip_release(void **ptr)
145{
146 free_mem_ptr = (long) *ptr;
147}
148
149/* decompressor info and error messages to serial console */ 115/* decompressor info and error messages to serial console */
150 116
151static void 117static void
diff --git a/arch/cris/arch-v32/boot/compressed/misc.c b/arch/cris/arch-v32/boot/compressed/misc.c
index 55b2695c5d70..3595e16e82bc 100644
--- a/arch/cris/arch-v32/boot/compressed/misc.c
+++ b/arch/cris/arch-v32/boot/compressed/misc.c
@@ -89,20 +89,14 @@ static unsigned outcnt = 0; /* bytes in output buffer */
89 89
90static void flush_window(void); 90static void flush_window(void);
91static void error(char *m); 91static void error(char *m);
92static void gzip_mark(void **);
93static void gzip_release(void **);
94 92
95extern char *input_data; /* lives in head.S */ 93extern char *input_data; /* lives in head.S */
96 94
97static long bytes_out = 0; 95static long bytes_out;
98static uch *output_data; 96static uch *output_data;
99static unsigned long output_ptr = 0; 97static unsigned long output_ptr;
100 98
101static void *malloc(int size);
102static void free(void *where);
103static void error(char *m); 99static void error(char *m);
104static void gzip_mark(void **);
105static void gzip_release(void **);
106 100
107static void puts(const char *); 101static void puts(const char *);
108 102
@@ -110,37 +104,10 @@ static void puts(const char *);
110 104
111extern int _end; 105extern int _end;
112static long free_mem_ptr = (long)&_end; 106static long free_mem_ptr = (long)&_end;
107static long free_mem_end_ptr;
113 108
114#include "../../../../../lib/inflate.c" 109#include "../../../../../lib/inflate.c"
115 110
116static void *malloc(int size)
117{
118 void *p;
119
120 if (size <0) error("Malloc error");
121
122 free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
123
124 p = (void *)free_mem_ptr;
125 free_mem_ptr += size;
126
127 return p;
128}
129
130static void free(void *where)
131{ /* Don't care */
132}
133
134static void gzip_mark(void **ptr)
135{
136 *ptr = (void *) free_mem_ptr;
137}
138
139static void gzip_release(void **ptr)
140{
141 free_mem_ptr = (long) *ptr;
142}
143
144/* decompressor info and error messages to serial console */ 111/* decompressor info and error messages to serial console */
145 112
146static inline void 113static inline void
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 085dc6ec152b..396ab059efa3 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -203,20 +203,6 @@ config UNIX98_PTYS
203 Read the instructions in <file:Documentation/Changes> pertaining to 203 Read the instructions in <file:Documentation/Changes> pertaining to
204 pseudo terminals. It's safe to say N. 204 pseudo terminals. It's safe to say N.
205 205
206config UNIX98_PTY_COUNT
207 int "Maximum number of Unix98 PTYs in use (0-2048)"
208 depends on UNIX98_PTYS
209 default "256"
210 help
211 The maximum number of Unix98 PTYs that can be used at any one time.
212 The default is 256, and should be enough for desktop systems. Server
213 machines which support incoming telnet/rlogin/ssh connections and/or
214 serve several X terminals may want to increase this: every incoming
215 connection and every xterm uses up one PTY.
216
217 When not in use, each additional set of 256 PTYs occupy
218 approximately 8 KB of kernel memory on 32-bit architectures.
219
220source "drivers/char/pcmcia/Kconfig" 206source "drivers/char/pcmcia/Kconfig"
221 207
222source "drivers/serial/Kconfig" 208source "drivers/serial/Kconfig"
diff --git a/arch/h8300/boot/compressed/misc.c b/arch/h8300/boot/compressed/misc.c
index 845074588af0..51ab6cbd030f 100644
--- a/arch/h8300/boot/compressed/misc.c
+++ b/arch/h8300/boot/compressed/misc.c
@@ -67,8 +67,6 @@ static unsigned outcnt = 0; /* bytes in output buffer */
67static int fill_inbuf(void); 67static int fill_inbuf(void);
68static void flush_window(void); 68static void flush_window(void);
69static void error(char *m); 69static void error(char *m);
70static void gzip_mark(void **);
71static void gzip_release(void **);
72 70
73extern char input_data[]; 71extern char input_data[];
74extern int input_len; 72extern int input_len;
@@ -77,11 +75,7 @@ static long bytes_out = 0;
77static uch *output_data; 75static uch *output_data;
78static unsigned long output_ptr = 0; 76static unsigned long output_ptr = 0;
79 77
80static void *malloc(int size);
81static void free(void *where);
82static void error(char *m); 78static void error(char *m);
83static void gzip_mark(void **);
84static void gzip_release(void **);
85 79
86int puts(const char *); 80int puts(const char *);
87 81
@@ -98,38 +92,6 @@ static unsigned long free_mem_end_ptr;
98#define TDR *((volatile unsigned char *)0xffff8b) 92#define TDR *((volatile unsigned char *)0xffff8b)
99#define SSR *((volatile unsigned char *)0xffff8c) 93#define SSR *((volatile unsigned char *)0xffff8c)
100 94
101static void *malloc(int size)
102{
103 void *p;
104
105 if (size <0) error("Malloc error");
106 if (free_mem_ptr == 0) error("Memory error");
107
108 free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
109
110 p = (void *)free_mem_ptr;
111 free_mem_ptr += size;
112
113 if (free_mem_ptr >= free_mem_end_ptr)
114 error("Out of memory");
115
116 return p;
117}
118
119static void free(void *where)
120{ /* Don't care */
121}
122
123static void gzip_mark(void **ptr)
124{
125 *ptr = (void *) free_mem_ptr;
126}
127
128static void gzip_release(void **ptr)
129{
130 free_mem_ptr = (long) *ptr;
131}
132
133int puts(const char *s) 95int puts(const char *s)
134{ 96{
135 return 0; 97 return 0;
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 233434f4f88f..f07688da947c 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -429,8 +429,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
429 ((struct fnptr *)kretprobe_trampoline)->ip; 429 ((struct fnptr *)kretprobe_trampoline)->ip;
430 430
431 INIT_HLIST_HEAD(&empty_rp); 431 INIT_HLIST_HEAD(&empty_rp);
432 spin_lock_irqsave(&kretprobe_lock, flags); 432 kretprobe_hash_lock(current, &head, &flags);
433 head = kretprobe_inst_table_head(current);
434 433
435 /* 434 /*
436 * It is possible to have multiple instances associated with a given 435 * It is possible to have multiple instances associated with a given
@@ -485,7 +484,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
485 kretprobe_assert(ri, orig_ret_address, trampoline_address); 484 kretprobe_assert(ri, orig_ret_address, trampoline_address);
486 485
487 reset_current_kprobe(); 486 reset_current_kprobe();
488 spin_unlock_irqrestore(&kretprobe_lock, flags); 487 kretprobe_hash_unlock(current, &flags);
489 preempt_enable_no_resched(); 488 preempt_enable_no_resched();
490 489
491 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 490 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
@@ -500,7 +499,6 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
500 return 1; 499 return 1;
501} 500}
502 501
503/* Called with kretprobe_lock held */
504void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 502void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
505 struct pt_regs *regs) 503 struct pt_regs *regs)
506{ 504{
diff --git a/arch/m32r/boot/compressed/misc.c b/arch/m32r/boot/compressed/misc.c
index 600d40e33495..d394292498c0 100644
--- a/arch/m32r/boot/compressed/misc.c
+++ b/arch/m32r/boot/compressed/misc.c
@@ -70,8 +70,6 @@ static unsigned outcnt = 0; /* bytes in output buffer */
70static int fill_inbuf(void); 70static int fill_inbuf(void);
71static void flush_window(void); 71static void flush_window(void);
72static void error(char *m); 72static void error(char *m);
73static void gzip_mark(void **);
74static void gzip_release(void **);
75 73
76static unsigned char *input_data; 74static unsigned char *input_data;
77static int input_len; 75static int input_len;
@@ -82,9 +80,6 @@ static unsigned long output_ptr = 0;
82 80
83#include "m32r_sio.c" 81#include "m32r_sio.c"
84 82
85static void *malloc(int size);
86static void free(void *where);
87
88static unsigned long free_mem_ptr; 83static unsigned long free_mem_ptr;
89static unsigned long free_mem_end_ptr; 84static unsigned long free_mem_end_ptr;
90 85
@@ -92,38 +87,6 @@ static unsigned long free_mem_end_ptr;
92 87
93#include "../../../../lib/inflate.c" 88#include "../../../../lib/inflate.c"
94 89
95static void *malloc(int size)
96{
97 void *p;
98
99 if (size <0) error("Malloc error");
100 if (free_mem_ptr == 0) error("Memory error");
101
102 free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
103
104 p = (void *)free_mem_ptr;
105 free_mem_ptr += size;
106
107 if (free_mem_ptr >= free_mem_end_ptr)
108 error("Out of memory");
109
110 return p;
111}
112
113static void free(void *where)
114{ /* Don't care */
115}
116
117static void gzip_mark(void **ptr)
118{
119 *ptr = (void *) free_mem_ptr;
120}
121
122static void gzip_release(void **ptr)
123{
124 free_mem_ptr = (long) *ptr;
125}
126
127void* memset(void* s, int c, size_t n) 90void* memset(void* s, int c, size_t n)
128{ 91{
129 int i; 92 int i;
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index b9c754f4070c..b4c4eaa5dd26 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -713,7 +713,7 @@ config CSRC_SB1250
713 713
714config GPIO_TXX9 714config GPIO_TXX9
715 select GENERIC_GPIO 715 select GENERIC_GPIO
716 select HAVE_GPIO_LIB 716 select ARCH_REQUIRE_GPIOLIB
717 bool 717 bool
718 718
719config CFE 719config CFE
diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c
index c266211ed653..2fefb14414b7 100644
--- a/arch/mips/kernel/linux32.c
+++ b/arch/mips/kernel/linux32.c
@@ -11,7 +11,6 @@
11#include <linux/file.h> 11#include <linux/file.h>
12#include <linux/smp_lock.h> 12#include <linux/smp_lock.h>
13#include <linux/highuid.h> 13#include <linux/highuid.h>
14#include <linux/dirent.h>
15#include <linux/resource.h> 14#include <linux/resource.h>
16#include <linux/highmem.h> 15#include <linux/highmem.h>
17#include <linux/time.h> 16#include <linux/time.h>
diff --git a/arch/mn10300/boot/compressed/misc.c b/arch/mn10300/boot/compressed/misc.c
index ded207efc97a..f673383518e4 100644
--- a/arch/mn10300/boot/compressed/misc.c
+++ b/arch/mn10300/boot/compressed/misc.c
@@ -153,26 +153,9 @@ static uch *output_data;
153static unsigned long output_ptr; 153static unsigned long output_ptr;
154 154
155 155
156static void *malloc(int size);
157
158static inline void free(void *where)
159{ /* Don't care */
160}
161
162static unsigned long free_mem_ptr = (unsigned long) &end; 156static unsigned long free_mem_ptr = (unsigned long) &end;
163static unsigned long free_mem_end_ptr = (unsigned long) &end + 0x90000; 157static unsigned long free_mem_end_ptr = (unsigned long) &end + 0x90000;
164 158
165static inline void gzip_mark(void **ptr)
166{
167 kputs(".");
168 *ptr = (void *) free_mem_ptr;
169}
170
171static inline void gzip_release(void **ptr)
172{
173 free_mem_ptr = (unsigned long) *ptr;
174}
175
176#define INPLACE_MOVE_ROUTINE 0x1000 159#define INPLACE_MOVE_ROUTINE 0x1000
177#define LOW_BUFFER_START 0x2000 160#define LOW_BUFFER_START 0x2000
178#define LOW_BUFFER_END 0x90000 161#define LOW_BUFFER_END 0x90000
@@ -186,26 +169,6 @@ static int lines, cols;
186 169
187#include "../../../../lib/inflate.c" 170#include "../../../../lib/inflate.c"
188 171
189static void *malloc(int size)
190{
191 void *p;
192
193 if (size < 0)
194 error("Malloc error\n");
195 if (!free_mem_ptr)
196 error("Memory error\n");
197
198 free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
199
200 p = (void *) free_mem_ptr;
201 free_mem_ptr += size;
202
203 if (free_mem_ptr >= free_mem_end_ptr)
204 error("\nOut of memory\n");
205
206 return p;
207}
208
209static inline void scroll(void) 172static inline void scroll(void)
210{ 173{
211 int i; 174 int i;
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a487671c282f..fe88418167c5 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -110,8 +110,10 @@ config PPC
110 default y 110 default y
111 select HAVE_DYNAMIC_FTRACE 111 select HAVE_DYNAMIC_FTRACE
112 select HAVE_FTRACE 112 select HAVE_FTRACE
113 select ARCH_WANT_OPTIONAL_GPIOLIB
113 select HAVE_IDE 114 select HAVE_IDE
114 select HAVE_IOREMAP_PROT 115 select HAVE_IOREMAP_PROT
116 select HAVE_EFFICIENT_UNALIGNED_ACCESS
115 select HAVE_KPROBES 117 select HAVE_KPROBES
116 select HAVE_ARCH_KGDB 118 select HAVE_ARCH_KGDB
117 select HAVE_KRETPROBES 119 select HAVE_KRETPROBES
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index b936a1dd0a50..25a052c16754 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -23,6 +23,9 @@
23struct cpu_spec* cur_cpu_spec = NULL; 23struct cpu_spec* cur_cpu_spec = NULL;
24EXPORT_SYMBOL(cur_cpu_spec); 24EXPORT_SYMBOL(cur_cpu_spec);
25 25
26/* The platform string corresponding to the real PVR */
27const char *powerpc_base_platform;
28
26/* NOTE: 29/* NOTE:
27 * Unlike ppc32, ppc64 will only call this once for the boot CPU, it's 30 * Unlike ppc32, ppc64 will only call this once for the boot CPU, it's
28 * the responsibility of the appropriate CPU save/restore functions to 31 * the responsibility of the appropriate CPU save/restore functions to
@@ -1652,6 +1655,14 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr)
1652 } else 1655 } else
1653 *t = *s; 1656 *t = *s;
1654 *PTRRELOC(&cur_cpu_spec) = &the_cpu_spec; 1657 *PTRRELOC(&cur_cpu_spec) = &the_cpu_spec;
1658
1659 /*
1660 * Set the base platform string once; assumes
1661 * we're called with real pvr first.
1662 */
1663 if (powerpc_base_platform == NULL)
1664 powerpc_base_platform = t->platform;
1665
1655#if defined(CONFIG_PPC64) || defined(CONFIG_BOOKE) 1666#if defined(CONFIG_PPC64) || defined(CONFIG_BOOKE)
1656 /* ppc64 and booke expect identify_cpu to also call 1667 /* ppc64 and booke expect identify_cpu to also call
1657 * setup_cpu for that processor. I will consolidate 1668 * setup_cpu for that processor. I will consolidate
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index da52269aec1e..81c8324a4a3c 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -148,7 +148,7 @@ transfer_to_handler:
148 /* Check to see if the dbcr0 register is set up to debug. Use the 148 /* Check to see if the dbcr0 register is set up to debug. Use the
149 internal debug mode bit to do this. */ 149 internal debug mode bit to do this. */
150 lwz r12,THREAD_DBCR0(r12) 150 lwz r12,THREAD_DBCR0(r12)
151 andis. r12,r12,DBCR0_IDM@h 151 andis. r12,r12,(DBCR0_IDM | DBSR_DAC1R | DBSR_DAC1W)@h
152 beq+ 3f 152 beq+ 3f
153 /* From user and task is ptraced - load up global dbcr0 */ 153 /* From user and task is ptraced - load up global dbcr0 */
154 li r12,-1 /* clear all pending debug events */ 154 li r12,-1 /* clear all pending debug events */
@@ -292,7 +292,7 @@ syscall_exit_cont:
292 /* If the process has its own DBCR0 value, load it up. The internal 292 /* If the process has its own DBCR0 value, load it up. The internal
293 debug mode bit tells us that dbcr0 should be loaded. */ 293 debug mode bit tells us that dbcr0 should be loaded. */
294 lwz r0,THREAD+THREAD_DBCR0(r2) 294 lwz r0,THREAD+THREAD_DBCR0(r2)
295 andis. r10,r0,DBCR0_IDM@h 295 andis. r10,r0,(DBCR0_IDM | DBSR_DAC1R | DBSR_DAC1W)@h
296 bnel- load_dbcr0 296 bnel- load_dbcr0
297#endif 297#endif
298#ifdef CONFIG_44x 298#ifdef CONFIG_44x
@@ -720,7 +720,7 @@ restore_user:
720 /* Check whether this process has its own DBCR0 value. The internal 720 /* Check whether this process has its own DBCR0 value. The internal
721 debug mode bit tells us that dbcr0 should be loaded. */ 721 debug mode bit tells us that dbcr0 should be loaded. */
722 lwz r0,THREAD+THREAD_DBCR0(r2) 722 lwz r0,THREAD+THREAD_DBCR0(r2)
723 andis. r10,r0,DBCR0_IDM@h 723 andis. r10,r0,(DBCR0_IDM | DBSR_DAC1R | DBSR_DAC1W)@h
724 bnel- load_dbcr0 724 bnel- load_dbcr0
725#endif 725#endif
726 726
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 2385f68c1751..550a19399bfa 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -49,6 +49,8 @@ static int novmerge = 1;
49 49
50static int protect4gb = 1; 50static int protect4gb = 1;
51 51
52static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int);
53
52static inline unsigned long iommu_num_pages(unsigned long vaddr, 54static inline unsigned long iommu_num_pages(unsigned long vaddr,
53 unsigned long slen) 55 unsigned long slen)
54{ 56{
@@ -191,6 +193,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
191{ 193{
192 unsigned long entry, flags; 194 unsigned long entry, flags;
193 dma_addr_t ret = DMA_ERROR_CODE; 195 dma_addr_t ret = DMA_ERROR_CODE;
196 int build_fail;
194 197
195 spin_lock_irqsave(&(tbl->it_lock), flags); 198 spin_lock_irqsave(&(tbl->it_lock), flags);
196 199
@@ -205,9 +208,21 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
205 ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */ 208 ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */
206 209
207 /* Put the TCEs in the HW table */ 210 /* Put the TCEs in the HW table */
208 ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & IOMMU_PAGE_MASK, 211 build_fail = ppc_md.tce_build(tbl, entry, npages,
209 direction, attrs); 212 (unsigned long)page & IOMMU_PAGE_MASK,
213 direction, attrs);
214
215 /* ppc_md.tce_build() only returns non-zero for transient errors.
216 * Clean up the table bitmap in this case and return
217 * DMA_ERROR_CODE. For all other errors the functionality is
218 * not altered.
219 */
220 if (unlikely(build_fail)) {
221 __iommu_free(tbl, ret, npages);
210 222
223 spin_unlock_irqrestore(&(tbl->it_lock), flags);
224 return DMA_ERROR_CODE;
225 }
211 226
212 /* Flush/invalidate TLB caches if necessary */ 227 /* Flush/invalidate TLB caches if necessary */
213 if (ppc_md.tce_flush) 228 if (ppc_md.tce_flush)
@@ -276,7 +291,7 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
276 dma_addr_t dma_next = 0, dma_addr; 291 dma_addr_t dma_next = 0, dma_addr;
277 unsigned long flags; 292 unsigned long flags;
278 struct scatterlist *s, *outs, *segstart; 293 struct scatterlist *s, *outs, *segstart;
279 int outcount, incount, i; 294 int outcount, incount, i, build_fail = 0;
280 unsigned int align; 295 unsigned int align;
281 unsigned long handle; 296 unsigned long handle;
282 unsigned int max_seg_size; 297 unsigned int max_seg_size;
@@ -337,8 +352,11 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
337 npages, entry, dma_addr); 352 npages, entry, dma_addr);
338 353
339 /* Insert into HW table */ 354 /* Insert into HW table */
340 ppc_md.tce_build(tbl, entry, npages, vaddr & IOMMU_PAGE_MASK, 355 build_fail = ppc_md.tce_build(tbl, entry, npages,
341 direction, attrs); 356 vaddr & IOMMU_PAGE_MASK,
357 direction, attrs);
358 if(unlikely(build_fail))
359 goto failure;
342 360
343 /* If we are in an open segment, try merging */ 361 /* If we are in an open segment, try merging */
344 if (segstart != s) { 362 if (segstart != s) {
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 4ba2af125450..de79915452c8 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -144,7 +144,6 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
144 kcb->kprobe_saved_msr = regs->msr; 144 kcb->kprobe_saved_msr = regs->msr;
145} 145}
146 146
147/* Called with kretprobe_lock held */
148void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 147void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
149 struct pt_regs *regs) 148 struct pt_regs *regs)
150{ 149{
@@ -312,8 +311,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
312 unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; 311 unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
313 312
314 INIT_HLIST_HEAD(&empty_rp); 313 INIT_HLIST_HEAD(&empty_rp);
315 spin_lock_irqsave(&kretprobe_lock, flags); 314 kretprobe_hash_lock(current, &head, &flags);
316 head = kretprobe_inst_table_head(current);
317 315
318 /* 316 /*
319 * It is possible to have multiple instances associated with a given 317 * It is possible to have multiple instances associated with a given
@@ -352,7 +350,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
352 regs->nip = orig_ret_address; 350 regs->nip = orig_ret_address;
353 351
354 reset_current_kprobe(); 352 reset_current_kprobe();
355 spin_unlock_irqrestore(&kretprobe_lock, flags); 353 kretprobe_hash_unlock(current, &flags);
356 preempt_enable_no_resched(); 354 preempt_enable_no_resched();
357 355
358 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 356 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index 827a5726a035..9f856a0c3e38 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -34,8 +34,9 @@
34#include <asm/time.h> 34#include <asm/time.h>
35#include <asm/prom.h> 35#include <asm/prom.h>
36#include <asm/vdso_datapage.h> 36#include <asm/vdso_datapage.h>
37#include <asm/vio.h>
37 38
38#define MODULE_VERS "1.7" 39#define MODULE_VERS "1.8"
39#define MODULE_NAME "lparcfg" 40#define MODULE_NAME "lparcfg"
40 41
41/* #define LPARCFG_DEBUG */ 42/* #define LPARCFG_DEBUG */
@@ -129,32 +130,46 @@ static int iseries_lparcfg_data(struct seq_file *m, void *v)
129/* 130/*
130 * Methods used to fetch LPAR data when running on a pSeries platform. 131 * Methods used to fetch LPAR data when running on a pSeries platform.
131 */ 132 */
132static void log_plpar_hcall_return(unsigned long rc, char *tag) 133/**
134 * h_get_mpp
135 * H_GET_MPP hcall returns info in 7 parms
136 */
137int h_get_mpp(struct hvcall_mpp_data *mpp_data)
133{ 138{
134 switch(rc) { 139 int rc;
135 case 0: 140 unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
136 return; 141
137 case H_HARDWARE: 142 rc = plpar_hcall9(H_GET_MPP, retbuf);
138 printk(KERN_INFO "plpar-hcall (%s) " 143
139 "Hardware fault\n", tag); 144 mpp_data->entitled_mem = retbuf[0];
140 return; 145 mpp_data->mapped_mem = retbuf[1];
141 case H_FUNCTION: 146
142 printk(KERN_INFO "plpar-hcall (%s) " 147 mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
143 "Function not allowed\n", tag); 148 mpp_data->pool_num = retbuf[2] & 0xffff;
144 return; 149
145 case H_AUTHORITY: 150 mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff;
146 printk(KERN_INFO "plpar-hcall (%s) " 151 mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff;
147 "Not authorized to this function\n", tag); 152 mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff;
148 return; 153
149 case H_PARAMETER: 154 mpp_data->pool_size = retbuf[4];
150 printk(KERN_INFO "plpar-hcall (%s) " 155 mpp_data->loan_request = retbuf[5];
151 "Bad parameter(s)\n",tag); 156 mpp_data->backing_mem = retbuf[6];
152 return; 157
153 default: 158 return rc;
154 printk(KERN_INFO "plpar-hcall (%s) "
155 "Unexpected rc(0x%lx)\n", tag, rc);
156 }
157} 159}
160EXPORT_SYMBOL(h_get_mpp);
161
162struct hvcall_ppp_data {
163 u64 entitlement;
164 u64 unallocated_entitlement;
165 u16 group_num;
166 u16 pool_num;
167 u8 capped;
168 u8 weight;
169 u8 unallocated_weight;
170 u16 active_procs_in_pool;
171 u16 active_system_procs;
172};
158 173
159/* 174/*
160 * H_GET_PPP hcall returns info in 4 parms. 175 * H_GET_PPP hcall returns info in 4 parms.
@@ -176,27 +191,30 @@ static void log_plpar_hcall_return(unsigned long rc, char *tag)
176 * XXXX - Active processors in Physical Processor Pool. 191 * XXXX - Active processors in Physical Processor Pool.
177 * XXXX - Processors active on platform. 192 * XXXX - Processors active on platform.
178 */ 193 */
179static unsigned int h_get_ppp(unsigned long *entitled, 194static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data)
180 unsigned long *unallocated,
181 unsigned long *aggregation,
182 unsigned long *resource)
183{ 195{
184 unsigned long rc; 196 unsigned long rc;
185 unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; 197 unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
186 198
187 rc = plpar_hcall(H_GET_PPP, retbuf); 199 rc = plpar_hcall(H_GET_PPP, retbuf);
188 200
189 *entitled = retbuf[0]; 201 ppp_data->entitlement = retbuf[0];
190 *unallocated = retbuf[1]; 202 ppp_data->unallocated_entitlement = retbuf[1];
191 *aggregation = retbuf[2]; 203
192 *resource = retbuf[3]; 204 ppp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
205 ppp_data->pool_num = retbuf[2] & 0xffff;
193 206
194 log_plpar_hcall_return(rc, "H_GET_PPP"); 207 ppp_data->capped = (retbuf[3] >> 6 * 8) & 0x01;
208 ppp_data->weight = (retbuf[3] >> 5 * 8) & 0xff;
209 ppp_data->unallocated_weight = (retbuf[3] >> 4 * 8) & 0xff;
210 ppp_data->active_procs_in_pool = (retbuf[3] >> 2 * 8) & 0xffff;
211 ppp_data->active_system_procs = retbuf[3] & 0xffff;
195 212
196 return rc; 213 return rc;
197} 214}
198 215
199static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs) 216static unsigned h_pic(unsigned long *pool_idle_time,
217 unsigned long *num_procs)
200{ 218{
201 unsigned long rc; 219 unsigned long rc;
202 unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; 220 unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
@@ -206,8 +224,87 @@ static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs)
206 *pool_idle_time = retbuf[0]; 224 *pool_idle_time = retbuf[0];
207 *num_procs = retbuf[1]; 225 *num_procs = retbuf[1];
208 226
209 if (rc != H_AUTHORITY) 227 return rc;
210 log_plpar_hcall_return(rc, "H_PIC"); 228}
229
230/*
231 * parse_ppp_data
232 * Parse out the data returned from h_get_ppp and h_pic
233 */
234static void parse_ppp_data(struct seq_file *m)
235{
236 struct hvcall_ppp_data ppp_data;
237 int rc;
238
239 rc = h_get_ppp(&ppp_data);
240 if (rc)
241 return;
242
243 seq_printf(m, "partition_entitled_capacity=%ld\n",
244 ppp_data.entitlement);
245 seq_printf(m, "group=%d\n", ppp_data.group_num);
246 seq_printf(m, "system_active_processors=%d\n",
247 ppp_data.active_system_procs);
248
249 /* pool related entries are apropriate for shared configs */
250 if (lppaca[0].shared_proc) {
251 unsigned long pool_idle_time, pool_procs;
252
253 seq_printf(m, "pool=%d\n", ppp_data.pool_num);
254
255 /* report pool_capacity in percentage */
256 seq_printf(m, "pool_capacity=%d\n",
257 ppp_data.active_procs_in_pool * 100);
258
259 h_pic(&pool_idle_time, &pool_procs);
260 seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
261 seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
262 }
263
264 seq_printf(m, "unallocated_capacity_weight=%d\n",
265 ppp_data.unallocated_weight);
266 seq_printf(m, "capacity_weight=%d\n", ppp_data.weight);
267 seq_printf(m, "capped=%d\n", ppp_data.capped);
268 seq_printf(m, "unallocated_capacity=%ld\n",
269 ppp_data.unallocated_entitlement);
270}
271
272/**
273 * parse_mpp_data
274 * Parse out data returned from h_get_mpp
275 */
276static void parse_mpp_data(struct seq_file *m)
277{
278 struct hvcall_mpp_data mpp_data;
279 int rc;
280
281 rc = h_get_mpp(&mpp_data);
282 if (rc)
283 return;
284
285 seq_printf(m, "entitled_memory=%ld\n", mpp_data.entitled_mem);
286
287 if (mpp_data.mapped_mem != -1)
288 seq_printf(m, "mapped_entitled_memory=%ld\n",
289 mpp_data.mapped_mem);
290
291 seq_printf(m, "entitled_memory_group_number=%d\n", mpp_data.group_num);
292 seq_printf(m, "entitled_memory_pool_number=%d\n", mpp_data.pool_num);
293
294 seq_printf(m, "entitled_memory_weight=%d\n", mpp_data.mem_weight);
295 seq_printf(m, "unallocated_entitled_memory_weight=%d\n",
296 mpp_data.unallocated_mem_weight);
297 seq_printf(m, "unallocated_io_mapping_entitlement=%ld\n",
298 mpp_data.unallocated_entitlement);
299
300 if (mpp_data.pool_size != -1)
301 seq_printf(m, "entitled_memory_pool_size=%ld bytes\n",
302 mpp_data.pool_size);
303
304 seq_printf(m, "entitled_memory_loan_request=%ld\n",
305 mpp_data.loan_request);
306
307 seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem);
211} 308}
212 309
213#define SPLPAR_CHARACTERISTICS_TOKEN 20 310#define SPLPAR_CHARACTERISTICS_TOKEN 20
@@ -313,6 +410,25 @@ static int lparcfg_count_active_processors(void)
313 return count; 410 return count;
314} 411}
315 412
413static void pseries_cmo_data(struct seq_file *m)
414{
415 int cpu;
416 unsigned long cmo_faults = 0;
417 unsigned long cmo_fault_time = 0;
418
419 if (!firmware_has_feature(FW_FEATURE_CMO))
420 return;
421
422 for_each_possible_cpu(cpu) {
423 cmo_faults += lppaca[cpu].cmo_faults;
424 cmo_fault_time += lppaca[cpu].cmo_fault_time;
425 }
426
427 seq_printf(m, "cmo_faults=%lu\n", cmo_faults);
428 seq_printf(m, "cmo_fault_time_usec=%lu\n",
429 cmo_fault_time / tb_ticks_per_usec);
430}
431
316static int pseries_lparcfg_data(struct seq_file *m, void *v) 432static int pseries_lparcfg_data(struct seq_file *m, void *v)
317{ 433{
318 int partition_potential_processors; 434 int partition_potential_processors;
@@ -334,60 +450,13 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
334 partition_active_processors = lparcfg_count_active_processors(); 450 partition_active_processors = lparcfg_count_active_processors();
335 451
336 if (firmware_has_feature(FW_FEATURE_SPLPAR)) { 452 if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
337 unsigned long h_entitled, h_unallocated;
338 unsigned long h_aggregation, h_resource;
339 unsigned long pool_idle_time, pool_procs;
340 unsigned long purr;
341
342 h_get_ppp(&h_entitled, &h_unallocated, &h_aggregation,
343 &h_resource);
344
345 seq_printf(m, "R4=0x%lx\n", h_entitled);
346 seq_printf(m, "R5=0x%lx\n", h_unallocated);
347 seq_printf(m, "R6=0x%lx\n", h_aggregation);
348 seq_printf(m, "R7=0x%lx\n", h_resource);
349
350 purr = get_purr();
351
352 /* this call handles the ibm,get-system-parameter contents */ 453 /* this call handles the ibm,get-system-parameter contents */
353 parse_system_parameter_string(m); 454 parse_system_parameter_string(m);
455 parse_ppp_data(m);
456 parse_mpp_data(m);
457 pseries_cmo_data(m);
354 458
355 seq_printf(m, "partition_entitled_capacity=%ld\n", h_entitled); 459 seq_printf(m, "purr=%ld\n", get_purr());
356
357 seq_printf(m, "group=%ld\n", (h_aggregation >> 2 * 8) & 0xffff);
358
359 seq_printf(m, "system_active_processors=%ld\n",
360 (h_resource >> 0 * 8) & 0xffff);
361
362 /* pool related entries are apropriate for shared configs */
363 if (lppaca[0].shared_proc) {
364
365 h_pic(&pool_idle_time, &pool_procs);
366
367 seq_printf(m, "pool=%ld\n",
368 (h_aggregation >> 0 * 8) & 0xffff);
369
370 /* report pool_capacity in percentage */
371 seq_printf(m, "pool_capacity=%ld\n",
372 ((h_resource >> 2 * 8) & 0xffff) * 100);
373
374 seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
375
376 seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
377 }
378
379 seq_printf(m, "unallocated_capacity_weight=%ld\n",
380 (h_resource >> 4 * 8) & 0xFF);
381
382 seq_printf(m, "capacity_weight=%ld\n",
383 (h_resource >> 5 * 8) & 0xFF);
384
385 seq_printf(m, "capped=%ld\n", (h_resource >> 6 * 8) & 0x01);
386
387 seq_printf(m, "unallocated_capacity=%ld\n", h_unallocated);
388
389 seq_printf(m, "purr=%ld\n", purr);
390
391 } else { /* non SPLPAR case */ 460 } else { /* non SPLPAR case */
392 461
393 seq_printf(m, "system_active_processors=%d\n", 462 seq_printf(m, "system_active_processors=%d\n",
@@ -414,6 +483,83 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
414 return 0; 483 return 0;
415} 484}
416 485
486static ssize_t update_ppp(u64 *entitlement, u8 *weight)
487{
488 struct hvcall_ppp_data ppp_data;
489 u8 new_weight;
490 u64 new_entitled;
491 ssize_t retval;
492
493 /* Get our current parameters */
494 retval = h_get_ppp(&ppp_data);
495 if (retval)
496 return retval;
497
498 if (entitlement) {
499 new_weight = ppp_data.weight;
500 new_entitled = *entitlement;
501 } else if (weight) {
502 new_weight = *weight;
503 new_entitled = ppp_data.entitlement;
504 } else
505 return -EINVAL;
506
507 pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
508 __FUNCTION__, ppp_data.entitlement, ppp_data.weight);
509
510 pr_debug("%s: new_entitled = %lu, new_weight = %u\n",
511 __FUNCTION__, new_entitled, new_weight);
512
513 retval = plpar_hcall_norets(H_SET_PPP, new_entitled, new_weight);
514 return retval;
515}
516
517/**
518 * update_mpp
519 *
520 * Update the memory entitlement and weight for the partition. Caller must
521 * specify either a new entitlement or weight, not both, to be updated
522 * since the h_set_mpp call takes both entitlement and weight as parameters.
523 */
524static ssize_t update_mpp(u64 *entitlement, u8 *weight)
525{
526 struct hvcall_mpp_data mpp_data;
527 u64 new_entitled;
528 u8 new_weight;
529 ssize_t rc;
530
531 if (entitlement) {
532 /* Check with vio to ensure the new memory entitlement
533 * can be handled.
534 */
535 rc = vio_cmo_entitlement_update(*entitlement);
536 if (rc)
537 return rc;
538 }
539
540 rc = h_get_mpp(&mpp_data);
541 if (rc)
542 return rc;
543
544 if (entitlement) {
545 new_weight = mpp_data.mem_weight;
546 new_entitled = *entitlement;
547 } else if (weight) {
548 new_weight = *weight;
549 new_entitled = mpp_data.entitled_mem;
550 } else
551 return -EINVAL;
552
553 pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
554 __FUNCTION__, mpp_data.entitled_mem, mpp_data.mem_weight);
555
556 pr_debug("%s: new_entitled = %lu, new_weight = %u\n",
557 __FUNCTION__, new_entitled, new_weight);
558
559 rc = plpar_hcall_norets(H_SET_MPP, new_entitled, new_weight);
560 return rc;
561}
562
417/* 563/*
418 * Interface for changing system parameters (variable capacity weight 564 * Interface for changing system parameters (variable capacity weight
419 * and entitled capacity). Format of input is "param_name=value"; 565 * and entitled capacity). Format of input is "param_name=value";
@@ -427,35 +573,27 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
427static ssize_t lparcfg_write(struct file *file, const char __user * buf, 573static ssize_t lparcfg_write(struct file *file, const char __user * buf,
428 size_t count, loff_t * off) 574 size_t count, loff_t * off)
429{ 575{
430 char *kbuf; 576 int kbuf_sz = 64;
577 char kbuf[kbuf_sz];
431 char *tmp; 578 char *tmp;
432 u64 new_entitled, *new_entitled_ptr = &new_entitled; 579 u64 new_entitled, *new_entitled_ptr = &new_entitled;
433 u8 new_weight, *new_weight_ptr = &new_weight; 580 u8 new_weight, *new_weight_ptr = &new_weight;
434 581 ssize_t retval;
435 unsigned long current_entitled; /* parameters for h_get_ppp */
436 unsigned long dummy;
437 unsigned long resource;
438 u8 current_weight;
439
440 ssize_t retval = -ENOMEM;
441 582
442 if (!firmware_has_feature(FW_FEATURE_SPLPAR) || 583 if (!firmware_has_feature(FW_FEATURE_SPLPAR) ||
443 firmware_has_feature(FW_FEATURE_ISERIES)) 584 firmware_has_feature(FW_FEATURE_ISERIES))
444 return -EINVAL; 585 return -EINVAL;
445 586
446 kbuf = kmalloc(count, GFP_KERNEL); 587 if (count > kbuf_sz)
447 if (!kbuf) 588 return -EINVAL;
448 goto out;
449 589
450 retval = -EFAULT;
451 if (copy_from_user(kbuf, buf, count)) 590 if (copy_from_user(kbuf, buf, count))
452 goto out; 591 return -EFAULT;
453 592
454 retval = -EINVAL;
455 kbuf[count - 1] = '\0'; 593 kbuf[count - 1] = '\0';
456 tmp = strchr(kbuf, '='); 594 tmp = strchr(kbuf, '=');
457 if (!tmp) 595 if (!tmp)
458 goto out; 596 return -EINVAL;
459 597
460 *tmp++ = '\0'; 598 *tmp++ = '\0';
461 599
@@ -463,34 +601,32 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
463 char *endp; 601 char *endp;
464 *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10); 602 *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
465 if (endp == tmp) 603 if (endp == tmp)
466 goto out; 604 return -EINVAL;
467 new_weight_ptr = &current_weight; 605
606 retval = update_ppp(new_entitled_ptr, NULL);
468 } else if (!strcmp(kbuf, "capacity_weight")) { 607 } else if (!strcmp(kbuf, "capacity_weight")) {
469 char *endp; 608 char *endp;
470 *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10); 609 *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
471 if (endp == tmp) 610 if (endp == tmp)
472 goto out; 611 return -EINVAL;
473 new_entitled_ptr = &current_entitled;
474 } else
475 goto out;
476
477 /* Get our current parameters */
478 retval = h_get_ppp(&current_entitled, &dummy, &dummy, &resource);
479 if (retval) {
480 retval = -EIO;
481 goto out;
482 }
483
484 current_weight = (resource >> 5 * 8) & 0xFF;
485 612
486 pr_debug("%s: current_entitled = %lu, current_weight = %u\n", 613 retval = update_ppp(NULL, new_weight_ptr);
487 __func__, current_entitled, current_weight); 614 } else if (!strcmp(kbuf, "entitled_memory")) {
615 char *endp;
616 *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
617 if (endp == tmp)
618 return -EINVAL;
488 619
489 pr_debug("%s: new_entitled = %lu, new_weight = %u\n", 620 retval = update_mpp(new_entitled_ptr, NULL);
490 __func__, *new_entitled_ptr, *new_weight_ptr); 621 } else if (!strcmp(kbuf, "entitled_memory_weight")) {
622 char *endp;
623 *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
624 if (endp == tmp)
625 return -EINVAL;
491 626
492 retval = plpar_hcall_norets(H_SET_PPP, *new_entitled_ptr, 627 retval = update_mpp(NULL, new_weight_ptr);
493 *new_weight_ptr); 628 } else
629 return -EINVAL;
494 630
495 if (retval == H_SUCCESS || retval == H_CONSTRAINED) { 631 if (retval == H_SUCCESS || retval == H_CONSTRAINED) {
496 retval = count; 632 retval = count;
@@ -506,8 +642,6 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
506 retval = -EIO; 642 retval = -EIO;
507 } 643 }
508 644
509out:
510 kfree(kbuf);
511 return retval; 645 return retval;
512} 646}
513 647
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 219f3634115e..db2497ccc111 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -47,6 +47,8 @@
47#ifdef CONFIG_PPC64 47#ifdef CONFIG_PPC64
48#include <asm/firmware.h> 48#include <asm/firmware.h>
49#endif 49#endif
50#include <linux/kprobes.h>
51#include <linux/kdebug.h>
50 52
51extern unsigned long _get_SP(void); 53extern unsigned long _get_SP(void);
52 54
@@ -239,6 +241,35 @@ void discard_lazy_cpu_state(void)
239} 241}
240#endif /* CONFIG_SMP */ 242#endif /* CONFIG_SMP */
241 243
244void do_dabr(struct pt_regs *regs, unsigned long address,
245 unsigned long error_code)
246{
247 siginfo_t info;
248
249 if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
250 11, SIGSEGV) == NOTIFY_STOP)
251 return;
252
253 if (debugger_dabr_match(regs))
254 return;
255
256 /* Clear the DAC and struct entries. One shot trigger */
257#if (defined(CONFIG_44x) || defined(CONFIG_BOOKE))
258 mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R | DBSR_DAC1W
259 | DBCR0_IDM));
260#endif
261
262 /* Clear the DABR */
263 set_dabr(0);
264
265 /* Deliver the signal to userspace */
266 info.si_signo = SIGTRAP;
267 info.si_errno = 0;
268 info.si_code = TRAP_HWBKPT;
269 info.si_addr = (void __user *)address;
270 force_sig_info(SIGTRAP, &info, current);
271}
272
242static DEFINE_PER_CPU(unsigned long, current_dabr); 273static DEFINE_PER_CPU(unsigned long, current_dabr);
243 274
244int set_dabr(unsigned long dabr) 275int set_dabr(unsigned long dabr)
@@ -254,6 +285,11 @@ int set_dabr(unsigned long dabr)
254#if defined(CONFIG_PPC64) || defined(CONFIG_6xx) 285#if defined(CONFIG_PPC64) || defined(CONFIG_6xx)
255 mtspr(SPRN_DABR, dabr); 286 mtspr(SPRN_DABR, dabr);
256#endif 287#endif
288
289#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
290 mtspr(SPRN_DAC1, dabr);
291#endif
292
257 return 0; 293 return 0;
258} 294}
259 295
@@ -337,6 +373,12 @@ struct task_struct *__switch_to(struct task_struct *prev,
337 if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr)) 373 if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr))
338 set_dabr(new->thread.dabr); 374 set_dabr(new->thread.dabr);
339 375
376#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
377 /* If new thread DAC (HW breakpoint) is the same then leave it */
378 if (new->thread.dabr)
379 set_dabr(new->thread.dabr);
380#endif
381
340 new_thread = &new->thread; 382 new_thread = &new->thread;
341 old_thread = &current->thread; 383 old_thread = &current->thread;
342 384
@@ -525,6 +567,10 @@ void flush_thread(void)
525 if (current->thread.dabr) { 567 if (current->thread.dabr) {
526 current->thread.dabr = 0; 568 current->thread.dabr = 0;
527 set_dabr(0); 569 set_dabr(0);
570
571#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
572 current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W);
573#endif
528 } 574 }
529} 575}
530 576
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 1ea8c8d3ce89..c4ab2195b9cb 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -643,6 +643,11 @@ static void __init early_cmdline_parse(void)
643#else 643#else
644#define OV5_MSI 0x00 644#define OV5_MSI 0x00
645#endif /* CONFIG_PCI_MSI */ 645#endif /* CONFIG_PCI_MSI */
646#ifdef CONFIG_PPC_SMLPAR
647#define OV5_CMO 0x80 /* Cooperative Memory Overcommitment */
648#else
649#define OV5_CMO 0x00
650#endif
646 651
647/* 652/*
648 * The architecture vector has an array of PVR mask/value pairs, 653 * The architecture vector has an array of PVR mask/value pairs,
@@ -687,10 +692,12 @@ static unsigned char ibm_architecture_vec[] = {
687 0, /* don't halt */ 692 0, /* don't halt */
688 693
689 /* option vector 5: PAPR/OF options */ 694 /* option vector 5: PAPR/OF options */
690 3 - 2, /* length */ 695 5 - 2, /* length */
691 0, /* don't ignore, don't halt */ 696 0, /* don't ignore, don't halt */
692 OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY | 697 OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY |
693 OV5_DONATE_DEDICATE_CPU | OV5_MSI, 698 OV5_DONATE_DEDICATE_CPU | OV5_MSI,
699 0,
700 OV5_CMO,
694}; 701};
695 702
696/* Old method - ELF header with PT_NOTE sections */ 703/* Old method - ELF header with PT_NOTE sections */
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 8feb93e7890c..a5d0e78779c8 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -703,7 +703,7 @@ void user_enable_single_step(struct task_struct *task)
703 703
704 if (regs != NULL) { 704 if (regs != NULL) {
705#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) 705#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
706 task->thread.dbcr0 = DBCR0_IDM | DBCR0_IC; 706 task->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC;
707 regs->msr |= MSR_DE; 707 regs->msr |= MSR_DE;
708#else 708#else
709 regs->msr |= MSR_SE; 709 regs->msr |= MSR_SE;
@@ -716,9 +716,16 @@ void user_disable_single_step(struct task_struct *task)
716{ 716{
717 struct pt_regs *regs = task->thread.regs; 717 struct pt_regs *regs = task->thread.regs;
718 718
719
720#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
721 /* If DAC then do not single step, skip */
722 if (task->thread.dabr)
723 return;
724#endif
725
719 if (regs != NULL) { 726 if (regs != NULL) {
720#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) 727#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
721 task->thread.dbcr0 = 0; 728 task->thread.dbcr0 &= ~(DBCR0_IC | DBCR0_IDM);
722 regs->msr &= ~MSR_DE; 729 regs->msr &= ~MSR_DE;
723#else 730#else
724 regs->msr &= ~MSR_SE; 731 regs->msr &= ~MSR_SE;
@@ -727,22 +734,75 @@ void user_disable_single_step(struct task_struct *task)
727 clear_tsk_thread_flag(task, TIF_SINGLESTEP); 734 clear_tsk_thread_flag(task, TIF_SINGLESTEP);
728} 735}
729 736
730static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, 737int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
731 unsigned long data) 738 unsigned long data)
732{ 739{
733 /* We only support one DABR and no IABRS at the moment */ 740 /* For ppc64 we support one DABR and no IABR's at the moment (ppc64).
741 * For embedded processors we support one DAC and no IAC's at the
742 * moment.
743 */
734 if (addr > 0) 744 if (addr > 0)
735 return -EINVAL; 745 return -EINVAL;
736 746
737 /* The bottom 3 bits are flags */
738 if ((data & ~0x7UL) >= TASK_SIZE) 747 if ((data & ~0x7UL) >= TASK_SIZE)
739 return -EIO; 748 return -EIO;
740 749
741 /* Ensure translation is on */ 750#ifdef CONFIG_PPC64
751
752 /* For processors using DABR (i.e. 970), the bottom 3 bits are flags.
753 * It was assumed, on previous implementations, that 3 bits were
754 * passed together with the data address, fitting the design of the
755 * DABR register, as follows:
756 *
757 * bit 0: Read flag
758 * bit 1: Write flag
759 * bit 2: Breakpoint translation
760 *
761 * Thus, we use them here as so.
762 */
763
764 /* Ensure breakpoint translation bit is set */
742 if (data && !(data & DABR_TRANSLATION)) 765 if (data && !(data & DABR_TRANSLATION))
743 return -EIO; 766 return -EIO;
744 767
768 /* Move contents to the DABR register */
745 task->thread.dabr = data; 769 task->thread.dabr = data;
770
771#endif
772#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
773
774 /* As described above, it was assumed 3 bits were passed with the data
775 * address, but we will assume only the mode bits will be passed
776 * as to not cause alignment restrictions for DAC-based processors.
777 */
778
779 /* DAC's hold the whole address without any mode flags */
780 task->thread.dabr = data & ~0x3UL;
781
782 if (task->thread.dabr == 0) {
783 task->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W | DBCR0_IDM);
784 task->thread.regs->msr &= ~MSR_DE;
785 return 0;
786 }
787
788 /* Read or Write bits must be set */
789
790 if (!(data & 0x3UL))
791 return -EINVAL;
792
793 /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0
794 register */
795 task->thread.dbcr0 = DBCR0_IDM;
796
797 /* Check for write and read flags and set DBCR0
798 accordingly */
799 if (data & 0x1UL)
800 task->thread.dbcr0 |= DBSR_DAC1R;
801 if (data & 0x2UL)
802 task->thread.dbcr0 |= DBSR_DAC1W;
803
804 task->thread.regs->msr |= MSR_DE;
805#endif
746 return 0; 806 return 0;
747} 807}
748 808
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index ad55488939c3..7aada783ec6a 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -145,8 +145,12 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs)
145 * user space. The DABR will have been cleared if it 145 * user space. The DABR will have been cleared if it
146 * triggered inside the kernel. 146 * triggered inside the kernel.
147 */ 147 */
148 if (current->thread.dabr) 148 if (current->thread.dabr) {
149 set_dabr(current->thread.dabr); 149 set_dabr(current->thread.dabr);
150#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
151 mtspr(SPRN_DBCR0, current->thread.dbcr0);
152#endif
153 }
150 154
151 if (is32) { 155 if (is32) {
152 if (ka.sa.sa_flags & SA_SIGINFO) 156 if (ka.sa.sa_flags & SA_SIGINFO)
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index aba0ba95f062..800e5e9a087b 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -529,7 +529,8 @@ static void register_nodes(void)
529#endif 529#endif
530 530
531/* Only valid if CPU is present. */ 531/* Only valid if CPU is present. */
532static ssize_t show_physical_id(struct sys_device *dev, char *buf) 532static ssize_t show_physical_id(struct sys_device *dev,
533 struct sysdev_attribute *attr, char *buf)
533{ 534{
534 struct cpu *cpu = container_of(dev, struct cpu, sysdev); 535 struct cpu *cpu = container_of(dev, struct cpu, sysdev);
535 536
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 878fbddb6ae1..81ccb8dd1a54 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1067,6 +1067,22 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
1067 } 1067 }
1068 1068
1069 _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); 1069 _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
1070 } else if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) {
1071 regs->msr &= ~MSR_DE;
1072
1073 if (user_mode(regs)) {
1074 current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W |
1075 DBCR0_IDM);
1076 } else {
1077 /* Disable DAC interupts */
1078 mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R |
1079 DBSR_DAC1W | DBCR0_IDM));
1080
1081 /* Clear the DAC event */
1082 mtspr(SPRN_DBSR, (DBSR_DAC1R | DBSR_DAC1W));
1083 }
1084 /* Setup and send the trap to the handler */
1085 do_dabr(regs, mfspr(SPRN_DAC1), debug_status);
1070 } 1086 }
1071} 1087}
1072#endif /* CONFIG_4xx || CONFIG_BOOKE */ 1088#endif /* CONFIG_4xx || CONFIG_BOOKE */
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index b77f8af7ddde..ade8aeaa2e70 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -1,11 +1,12 @@
1/* 1/*
2 * IBM PowerPC Virtual I/O Infrastructure Support. 2 * IBM PowerPC Virtual I/O Infrastructure Support.
3 * 3 *
4 * Copyright (c) 2003-2005 IBM Corp. 4 * Copyright (c) 2003,2008 IBM Corp.
5 * Dave Engebretsen engebret@us.ibm.com 5 * Dave Engebretsen engebret@us.ibm.com
6 * Santiago Leon santil@us.ibm.com 6 * Santiago Leon santil@us.ibm.com
7 * Hollis Blanchard <hollisb@us.ibm.com> 7 * Hollis Blanchard <hollisb@us.ibm.com>
8 * Stephen Rothwell 8 * Stephen Rothwell
9 * Robert Jennings <rcjenn@us.ibm.com>
9 * 10 *
10 * This program is free software; you can redistribute it and/or 11 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License 12 * modify it under the terms of the GNU General Public License
@@ -46,6 +47,996 @@ static struct vio_dev vio_bus_device = { /* fake "parent" device */
46 .dev.bus = &vio_bus_type, 47 .dev.bus = &vio_bus_type,
47}; 48};
48 49
50#ifdef CONFIG_PPC_SMLPAR
51/**
52 * vio_cmo_pool - A pool of IO memory for CMO use
53 *
54 * @size: The size of the pool in bytes
55 * @free: The amount of free memory in the pool
56 */
57struct vio_cmo_pool {
58 size_t size;
59 size_t free;
60};
61
62/* How many ms to delay queued balance work */
63#define VIO_CMO_BALANCE_DELAY 100
64
65/* Portion out IO memory to CMO devices by this chunk size */
66#define VIO_CMO_BALANCE_CHUNK 131072
67
68/**
69 * vio_cmo_dev_entry - A device that is CMO-enabled and requires entitlement
70 *
71 * @vio_dev: struct vio_dev pointer
72 * @list: pointer to other devices on bus that are being tracked
73 */
74struct vio_cmo_dev_entry {
75 struct vio_dev *viodev;
76 struct list_head list;
77};
78
79/**
80 * vio_cmo - VIO bus accounting structure for CMO entitlement
81 *
82 * @lock: spinlock for entire structure
83 * @balance_q: work queue for balancing system entitlement
84 * @device_list: list of CMO-enabled devices requiring entitlement
85 * @entitled: total system entitlement in bytes
86 * @reserve: pool of memory from which devices reserve entitlement, incl. spare
87 * @excess: pool of excess entitlement not needed for device reserves or spare
88 * @spare: IO memory for device hotplug functionality
89 * @min: minimum necessary for system operation
90 * @desired: desired memory for system operation
91 * @curr: bytes currently allocated
92 * @high: high water mark for IO data usage
93 */
94struct vio_cmo {
95 spinlock_t lock;
96 struct delayed_work balance_q;
97 struct list_head device_list;
98 size_t entitled;
99 struct vio_cmo_pool reserve;
100 struct vio_cmo_pool excess;
101 size_t spare;
102 size_t min;
103 size_t desired;
104 size_t curr;
105 size_t high;
106} vio_cmo;
107
108/**
109 * vio_cmo_OF_devices - Count the number of OF devices that have DMA windows
110 */
111static int vio_cmo_num_OF_devs(void)
112{
113 struct device_node *node_vroot;
114 int count = 0;
115
116 /*
117 * Count the number of vdevice entries with an
118 * ibm,my-dma-window OF property
119 */
120 node_vroot = of_find_node_by_name(NULL, "vdevice");
121 if (node_vroot) {
122 struct device_node *of_node;
123 struct property *prop;
124
125 for_each_child_of_node(node_vroot, of_node) {
126 prop = of_find_property(of_node, "ibm,my-dma-window",
127 NULL);
128 if (prop)
129 count++;
130 }
131 }
132 of_node_put(node_vroot);
133 return count;
134}
135
136/**
137 * vio_cmo_alloc - allocate IO memory for CMO-enable devices
138 *
139 * @viodev: VIO device requesting IO memory
140 * @size: size of allocation requested
141 *
142 * Allocations come from memory reserved for the devices and any excess
143 * IO memory available to all devices. The spare pool used to service
144 * hotplug must be equal to %VIO_CMO_MIN_ENT for the excess pool to be
145 * made available.
146 *
147 * Return codes:
148 * 0 for successful allocation and -ENOMEM for a failure
149 */
150static inline int vio_cmo_alloc(struct vio_dev *viodev, size_t size)
151{
152 unsigned long flags;
153 size_t reserve_free = 0;
154 size_t excess_free = 0;
155 int ret = -ENOMEM;
156
157 spin_lock_irqsave(&vio_cmo.lock, flags);
158
159 /* Determine the amount of free entitlement available in reserve */
160 if (viodev->cmo.entitled > viodev->cmo.allocated)
161 reserve_free = viodev->cmo.entitled - viodev->cmo.allocated;
162
163 /* If spare is not fulfilled, the excess pool can not be used. */
164 if (vio_cmo.spare >= VIO_CMO_MIN_ENT)
165 excess_free = vio_cmo.excess.free;
166
167 /* The request can be satisfied */
168 if ((reserve_free + excess_free) >= size) {
169 vio_cmo.curr += size;
170 if (vio_cmo.curr > vio_cmo.high)
171 vio_cmo.high = vio_cmo.curr;
172 viodev->cmo.allocated += size;
173 size -= min(reserve_free, size);
174 vio_cmo.excess.free -= size;
175 ret = 0;
176 }
177
178 spin_unlock_irqrestore(&vio_cmo.lock, flags);
179 return ret;
180}
181
182/**
183 * vio_cmo_dealloc - deallocate IO memory from CMO-enable devices
184 * @viodev: VIO device freeing IO memory
185 * @size: size of deallocation
186 *
187 * IO memory is freed by the device back to the correct memory pools.
188 * The spare pool is replenished first from either memory pool, then
189 * the reserve pool is used to reduce device entitlement, the excess
190 * pool is used to increase the reserve pool toward the desired entitlement
191 * target, and then the remaining memory is returned to the pools.
192 *
193 */
194static inline void vio_cmo_dealloc(struct vio_dev *viodev, size_t size)
195{
196 unsigned long flags;
197 size_t spare_needed = 0;
198 size_t excess_freed = 0;
199 size_t reserve_freed = size;
200 size_t tmp;
201 int balance = 0;
202
203 spin_lock_irqsave(&vio_cmo.lock, flags);
204 vio_cmo.curr -= size;
205
206 /* Amount of memory freed from the excess pool */
207 if (viodev->cmo.allocated > viodev->cmo.entitled) {
208 excess_freed = min(reserve_freed, (viodev->cmo.allocated -
209 viodev->cmo.entitled));
210 reserve_freed -= excess_freed;
211 }
212
213 /* Remove allocation from device */
214 viodev->cmo.allocated -= (reserve_freed + excess_freed);
215
216 /* Spare is a subset of the reserve pool, replenish it first. */
217 spare_needed = VIO_CMO_MIN_ENT - vio_cmo.spare;
218
219 /*
220 * Replenish the spare in the reserve pool from the excess pool.
221 * This moves entitlement into the reserve pool.
222 */
223 if (spare_needed && excess_freed) {
224 tmp = min(excess_freed, spare_needed);
225 vio_cmo.excess.size -= tmp;
226 vio_cmo.reserve.size += tmp;
227 vio_cmo.spare += tmp;
228 excess_freed -= tmp;
229 spare_needed -= tmp;
230 balance = 1;
231 }
232
233 /*
234 * Replenish the spare in the reserve pool from the reserve pool.
235 * This removes entitlement from the device down to VIO_CMO_MIN_ENT,
236 * if needed, and gives it to the spare pool. The amount of used
237 * memory in this pool does not change.
238 */
239 if (spare_needed && reserve_freed) {
240 tmp = min(spare_needed, min(reserve_freed,
241 (viodev->cmo.entitled -
242 VIO_CMO_MIN_ENT)));
243
244 vio_cmo.spare += tmp;
245 viodev->cmo.entitled -= tmp;
246 reserve_freed -= tmp;
247 spare_needed -= tmp;
248 balance = 1;
249 }
250
251 /*
252 * Increase the reserve pool until the desired allocation is met.
253 * Move an allocation freed from the excess pool into the reserve
254 * pool and schedule a balance operation.
255 */
256 if (excess_freed && (vio_cmo.desired > vio_cmo.reserve.size)) {
257 tmp = min(excess_freed, (vio_cmo.desired - vio_cmo.reserve.size));
258
259 vio_cmo.excess.size -= tmp;
260 vio_cmo.reserve.size += tmp;
261 excess_freed -= tmp;
262 balance = 1;
263 }
264
265 /* Return memory from the excess pool to that pool */
266 if (excess_freed)
267 vio_cmo.excess.free += excess_freed;
268
269 if (balance)
270 schedule_delayed_work(&vio_cmo.balance_q, VIO_CMO_BALANCE_DELAY);
271 spin_unlock_irqrestore(&vio_cmo.lock, flags);
272}
273
274/**
275 * vio_cmo_entitlement_update - Manage system entitlement changes
276 *
277 * @new_entitlement: new system entitlement to attempt to accommodate
278 *
279 * Increases in entitlement will be used to fulfill the spare entitlement
280 * and the rest is given to the excess pool. Decreases, if they are
281 * possible, come from the excess pool and from unused device entitlement
282 *
283 * Returns: 0 on success, -ENOMEM when change can not be made
284 */
285int vio_cmo_entitlement_update(size_t new_entitlement)
286{
287 struct vio_dev *viodev;
288 struct vio_cmo_dev_entry *dev_ent;
289 unsigned long flags;
290 size_t avail, delta, tmp;
291
292 spin_lock_irqsave(&vio_cmo.lock, flags);
293
294 /* Entitlement increases */
295 if (new_entitlement > vio_cmo.entitled) {
296 delta = new_entitlement - vio_cmo.entitled;
297
298 /* Fulfill spare allocation */
299 if (vio_cmo.spare < VIO_CMO_MIN_ENT) {
300 tmp = min(delta, (VIO_CMO_MIN_ENT - vio_cmo.spare));
301 vio_cmo.spare += tmp;
302 vio_cmo.reserve.size += tmp;
303 delta -= tmp;
304 }
305
306 /* Remaining new allocation goes to the excess pool */
307 vio_cmo.entitled += delta;
308 vio_cmo.excess.size += delta;
309 vio_cmo.excess.free += delta;
310
311 goto out;
312 }
313
314 /* Entitlement decreases */
315 delta = vio_cmo.entitled - new_entitlement;
316 avail = vio_cmo.excess.free;
317
318 /*
319 * Need to check how much unused entitlement each device can
320 * sacrifice to fulfill entitlement change.
321 */
322 list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
323 if (avail >= delta)
324 break;
325
326 viodev = dev_ent->viodev;
327 if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
328 (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
329 avail += viodev->cmo.entitled -
330 max_t(size_t, viodev->cmo.allocated,
331 VIO_CMO_MIN_ENT);
332 }
333
334 if (delta <= avail) {
335 vio_cmo.entitled -= delta;
336
337 /* Take entitlement from the excess pool first */
338 tmp = min(vio_cmo.excess.free, delta);
339 vio_cmo.excess.size -= tmp;
340 vio_cmo.excess.free -= tmp;
341 delta -= tmp;
342
343 /*
344 * Remove all but VIO_CMO_MIN_ENT bytes from devices
345 * until entitlement change is served
346 */
347 list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
348 if (!delta)
349 break;
350
351 viodev = dev_ent->viodev;
352 tmp = 0;
353 if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
354 (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
355 tmp = viodev->cmo.entitled -
356 max_t(size_t, viodev->cmo.allocated,
357 VIO_CMO_MIN_ENT);
358 viodev->cmo.entitled -= min(tmp, delta);
359 delta -= min(tmp, delta);
360 }
361 } else {
362 spin_unlock_irqrestore(&vio_cmo.lock, flags);
363 return -ENOMEM;
364 }
365
366out:
367 schedule_delayed_work(&vio_cmo.balance_q, 0);
368 spin_unlock_irqrestore(&vio_cmo.lock, flags);
369 return 0;
370}
371
372/**
373 * vio_cmo_balance - Balance entitlement among devices
374 *
375 * @work: work queue structure for this operation
376 *
377 * Any system entitlement above the minimum needed for devices, or
378 * already allocated to devices, can be distributed to the devices.
379 * The list of devices is iterated through to recalculate the desired
380 * entitlement level and to determine how much entitlement above the
381 * minimum entitlement is allocated to devices.
382 *
383 * Small chunks of the available entitlement are given to devices until
384 * their requirements are fulfilled or there is no entitlement left to give.
385 * Upon completion sizes of the reserve and excess pools are calculated.
386 *
387 * The system minimum entitlement level is also recalculated here.
388 * Entitlement will be reserved for devices even after vio_bus_remove to
389 * accommodate reloading the driver. The OF tree is walked to count the
390 * number of devices present and this will remove entitlement for devices
391 * that have actually left the system after having vio_bus_remove called.
392 */
393static void vio_cmo_balance(struct work_struct *work)
394{
395 struct vio_cmo *cmo;
396 struct vio_dev *viodev;
397 struct vio_cmo_dev_entry *dev_ent;
398 unsigned long flags;
399 size_t avail = 0, level, chunk, need;
400 int devcount = 0, fulfilled;
401
402 cmo = container_of(work, struct vio_cmo, balance_q.work);
403
404 spin_lock_irqsave(&vio_cmo.lock, flags);
405
406 /* Calculate minimum entitlement and fulfill spare */
407 cmo->min = vio_cmo_num_OF_devs() * VIO_CMO_MIN_ENT;
408 BUG_ON(cmo->min > cmo->entitled);
409 cmo->spare = min_t(size_t, VIO_CMO_MIN_ENT, (cmo->entitled - cmo->min));
410 cmo->min += cmo->spare;
411 cmo->desired = cmo->min;
412
413 /*
414 * Determine how much entitlement is available and reset device
415 * entitlements
416 */
417 avail = cmo->entitled - cmo->spare;
418 list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
419 viodev = dev_ent->viodev;
420 devcount++;
421 viodev->cmo.entitled = VIO_CMO_MIN_ENT;
422 cmo->desired += (viodev->cmo.desired - VIO_CMO_MIN_ENT);
423 avail -= max_t(size_t, viodev->cmo.allocated, VIO_CMO_MIN_ENT);
424 }
425
426 /*
427 * Having provided each device with the minimum entitlement, loop
428 * over the devices portioning out the remaining entitlement
429 * until there is nothing left.
430 */
431 level = VIO_CMO_MIN_ENT;
432 while (avail) {
433 fulfilled = 0;
434 list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
435 viodev = dev_ent->viodev;
436
437 if (viodev->cmo.desired <= level) {
438 fulfilled++;
439 continue;
440 }
441
442 /*
443 * Give the device up to VIO_CMO_BALANCE_CHUNK
444 * bytes of entitlement, but do not exceed the
445 * desired level of entitlement for the device.
446 */
447 chunk = min_t(size_t, avail, VIO_CMO_BALANCE_CHUNK);
448 chunk = min(chunk, (viodev->cmo.desired -
449 viodev->cmo.entitled));
450 viodev->cmo.entitled += chunk;
451
452 /*
453 * If the memory for this entitlement increase was
454 * already allocated to the device it does not come
455 * from the available pool being portioned out.
456 */
457 need = max(viodev->cmo.allocated, viodev->cmo.entitled)-
458 max(viodev->cmo.allocated, level);
459 avail -= need;
460
461 }
462 if (fulfilled == devcount)
463 break;
464 level += VIO_CMO_BALANCE_CHUNK;
465 }
466
467 /* Calculate new reserve and excess pool sizes */
468 cmo->reserve.size = cmo->min;
469 cmo->excess.free = 0;
470 cmo->excess.size = 0;
471 need = 0;
472 list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
473 viodev = dev_ent->viodev;
474 /* Calculated reserve size above the minimum entitlement */
475 if (viodev->cmo.entitled)
476 cmo->reserve.size += (viodev->cmo.entitled -
477 VIO_CMO_MIN_ENT);
478 /* Calculated used excess entitlement */
479 if (viodev->cmo.allocated > viodev->cmo.entitled)
480 need += viodev->cmo.allocated - viodev->cmo.entitled;
481 }
482 cmo->excess.size = cmo->entitled - cmo->reserve.size;
483 cmo->excess.free = cmo->excess.size - need;
484
485 cancel_delayed_work(container_of(work, struct delayed_work, work));
486 spin_unlock_irqrestore(&vio_cmo.lock, flags);
487}
488
489static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size,
490 dma_addr_t *dma_handle, gfp_t flag)
491{
492 struct vio_dev *viodev = to_vio_dev(dev);
493 void *ret;
494
495 if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) {
496 atomic_inc(&viodev->cmo.allocs_failed);
497 return NULL;
498 }
499
500 ret = dma_iommu_ops.alloc_coherent(dev, size, dma_handle, flag);
501 if (unlikely(ret == NULL)) {
502 vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
503 atomic_inc(&viodev->cmo.allocs_failed);
504 }
505
506 return ret;
507}
508
509static void vio_dma_iommu_free_coherent(struct device *dev, size_t size,
510 void *vaddr, dma_addr_t dma_handle)
511{
512 struct vio_dev *viodev = to_vio_dev(dev);
513
514 dma_iommu_ops.free_coherent(dev, size, vaddr, dma_handle);
515
516 vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
517}
518
519static dma_addr_t vio_dma_iommu_map_single(struct device *dev, void *vaddr,
520 size_t size,
521 enum dma_data_direction direction,
522 struct dma_attrs *attrs)
523{
524 struct vio_dev *viodev = to_vio_dev(dev);
525 dma_addr_t ret = DMA_ERROR_CODE;
526
527 if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) {
528 atomic_inc(&viodev->cmo.allocs_failed);
529 return ret;
530 }
531
532 ret = dma_iommu_ops.map_single(dev, vaddr, size, direction, attrs);
533 if (unlikely(dma_mapping_error(ret))) {
534 vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
535 atomic_inc(&viodev->cmo.allocs_failed);
536 }
537
538 return ret;
539}
540
541static void vio_dma_iommu_unmap_single(struct device *dev,
542 dma_addr_t dma_handle, size_t size,
543 enum dma_data_direction direction,
544 struct dma_attrs *attrs)
545{
546 struct vio_dev *viodev = to_vio_dev(dev);
547
548 dma_iommu_ops.unmap_single(dev, dma_handle, size, direction, attrs);
549
550 vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
551}
552
553static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
554 int nelems, enum dma_data_direction direction,
555 struct dma_attrs *attrs)
556{
557 struct vio_dev *viodev = to_vio_dev(dev);
558 struct scatterlist *sgl;
559 int ret, count = 0;
560 size_t alloc_size = 0;
561
562 for (sgl = sglist; count < nelems; count++, sgl++)
563 alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE);
564
565 if (vio_cmo_alloc(viodev, alloc_size)) {
566 atomic_inc(&viodev->cmo.allocs_failed);
567 return 0;
568 }
569
570 ret = dma_iommu_ops.map_sg(dev, sglist, nelems, direction, attrs);
571
572 if (unlikely(!ret)) {
573 vio_cmo_dealloc(viodev, alloc_size);
574 atomic_inc(&viodev->cmo.allocs_failed);
575 }
576
577 for (sgl = sglist, count = 0; count < ret; count++, sgl++)
578 alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE);
579 if (alloc_size)
580 vio_cmo_dealloc(viodev, alloc_size);
581
582 return ret;
583}
584
585static void vio_dma_iommu_unmap_sg(struct device *dev,
586 struct scatterlist *sglist, int nelems,
587 enum dma_data_direction direction,
588 struct dma_attrs *attrs)
589{
590 struct vio_dev *viodev = to_vio_dev(dev);
591 struct scatterlist *sgl;
592 size_t alloc_size = 0;
593 int count = 0;
594
595 for (sgl = sglist; count < nelems; count++, sgl++)
596 alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE);
597
598 dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs);
599
600 vio_cmo_dealloc(viodev, alloc_size);
601}
602
603struct dma_mapping_ops vio_dma_mapping_ops = {
604 .alloc_coherent = vio_dma_iommu_alloc_coherent,
605 .free_coherent = vio_dma_iommu_free_coherent,
606 .map_single = vio_dma_iommu_map_single,
607 .unmap_single = vio_dma_iommu_unmap_single,
608 .map_sg = vio_dma_iommu_map_sg,
609 .unmap_sg = vio_dma_iommu_unmap_sg,
610};
611
612/**
613 * vio_cmo_set_dev_desired - Set desired entitlement for a device
614 *
615 * @viodev: struct vio_dev for device to alter
616 * @new_desired: new desired entitlement level in bytes
617 *
618 * For use by devices to request a change to their entitlement at runtime or
619 * through sysfs. The desired entitlement level is changed and a balancing
620 * of system resources is scheduled to run in the future.
621 */
622void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired)
623{
624 unsigned long flags;
625 struct vio_cmo_dev_entry *dev_ent;
626 int found = 0;
627
628 if (!firmware_has_feature(FW_FEATURE_CMO))
629 return;
630
631 spin_lock_irqsave(&vio_cmo.lock, flags);
632 if (desired < VIO_CMO_MIN_ENT)
633 desired = VIO_CMO_MIN_ENT;
634
635 /*
636 * Changes will not be made for devices not in the device list.
637 * If it is not in the device list, then no driver is loaded
638 * for the device and it can not receive entitlement.
639 */
640 list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
641 if (viodev == dev_ent->viodev) {
642 found = 1;
643 break;
644 }
645 if (!found)
646 return;
647
648 /* Increase/decrease in desired device entitlement */
649 if (desired >= viodev->cmo.desired) {
650 /* Just bump the bus and device values prior to a balance*/
651 vio_cmo.desired += desired - viodev->cmo.desired;
652 viodev->cmo.desired = desired;
653 } else {
654 /* Decrease bus and device values for desired entitlement */
655 vio_cmo.desired -= viodev->cmo.desired - desired;
656 viodev->cmo.desired = desired;
657 /*
658 * If less entitlement is desired than current entitlement, move
659 * any reserve memory in the change region to the excess pool.
660 */
661 if (viodev->cmo.entitled > desired) {
662 vio_cmo.reserve.size -= viodev->cmo.entitled - desired;
663 vio_cmo.excess.size += viodev->cmo.entitled - desired;
664 /*
665 * If entitlement moving from the reserve pool to the
666 * excess pool is currently unused, add to the excess
667 * free counter.
668 */
669 if (viodev->cmo.allocated < viodev->cmo.entitled)
670 vio_cmo.excess.free += viodev->cmo.entitled -
671 max(viodev->cmo.allocated, desired);
672 viodev->cmo.entitled = desired;
673 }
674 }
675 schedule_delayed_work(&vio_cmo.balance_q, 0);
676 spin_unlock_irqrestore(&vio_cmo.lock, flags);
677}
678
679/**
680 * vio_cmo_bus_probe - Handle CMO specific bus probe activities
681 *
682 * @viodev - Pointer to struct vio_dev for device
683 *
684 * Determine the devices IO memory entitlement needs, attempting
685 * to satisfy the system minimum entitlement at first and scheduling
686 * a balance operation to take care of the rest at a later time.
687 *
688 * Returns: 0 on success, -EINVAL when device doesn't support CMO, and
689 * -ENOMEM when entitlement is not available for device or
690 * device entry.
691 *
692 */
693static int vio_cmo_bus_probe(struct vio_dev *viodev)
694{
695 struct vio_cmo_dev_entry *dev_ent;
696 struct device *dev = &viodev->dev;
697 struct vio_driver *viodrv = to_vio_driver(dev->driver);
698 unsigned long flags;
699 size_t size;
700
701 /*
702 * Check to see that device has a DMA window and configure
703 * entitlement for the device.
704 */
705 if (of_get_property(viodev->dev.archdata.of_node,
706 "ibm,my-dma-window", NULL)) {
707 /* Check that the driver is CMO enabled and get desired DMA */
708 if (!viodrv->get_desired_dma) {
709 dev_err(dev, "%s: device driver does not support CMO\n",
710 __func__);
711 return -EINVAL;
712 }
713
714 viodev->cmo.desired = IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev));
715 if (viodev->cmo.desired < VIO_CMO_MIN_ENT)
716 viodev->cmo.desired = VIO_CMO_MIN_ENT;
717 size = VIO_CMO_MIN_ENT;
718
719 dev_ent = kmalloc(sizeof(struct vio_cmo_dev_entry),
720 GFP_KERNEL);
721 if (!dev_ent)
722 return -ENOMEM;
723
724 dev_ent->viodev = viodev;
725 spin_lock_irqsave(&vio_cmo.lock, flags);
726 list_add(&dev_ent->list, &vio_cmo.device_list);
727 } else {
728 viodev->cmo.desired = 0;
729 size = 0;
730 spin_lock_irqsave(&vio_cmo.lock, flags);
731 }
732
733 /*
734 * If the needs for vio_cmo.min have not changed since they
735 * were last set, the number of devices in the OF tree has
736 * been constant and the IO memory for this is already in
737 * the reserve pool.
738 */
739 if (vio_cmo.min == ((vio_cmo_num_OF_devs() + 1) *
740 VIO_CMO_MIN_ENT)) {
741 /* Updated desired entitlement if device requires it */
742 if (size)
743 vio_cmo.desired += (viodev->cmo.desired -
744 VIO_CMO_MIN_ENT);
745 } else {
746 size_t tmp;
747
748 tmp = vio_cmo.spare + vio_cmo.excess.free;
749 if (tmp < size) {
750 dev_err(dev, "%s: insufficient free "
751 "entitlement to add device. "
752 "Need %lu, have %lu\n", __func__,
753 size, (vio_cmo.spare + tmp));
754 spin_unlock_irqrestore(&vio_cmo.lock, flags);
755 return -ENOMEM;
756 }
757
758 /* Use excess pool first to fulfill request */
759 tmp = min(size, vio_cmo.excess.free);
760 vio_cmo.excess.free -= tmp;
761 vio_cmo.excess.size -= tmp;
762 vio_cmo.reserve.size += tmp;
763
764 /* Use spare if excess pool was insufficient */
765 vio_cmo.spare -= size - tmp;
766
767 /* Update bus accounting */
768 vio_cmo.min += size;
769 vio_cmo.desired += viodev->cmo.desired;
770 }
771 spin_unlock_irqrestore(&vio_cmo.lock, flags);
772 return 0;
773}
774
775/**
776 * vio_cmo_bus_remove - Handle CMO specific bus removal activities
777 *
778 * @viodev - Pointer to struct vio_dev for device
779 *
780 * Remove the device from the cmo device list. The minimum entitlement
781 * will be reserved for the device as long as it is in the system. The
782 * rest of the entitlement the device had been allocated will be returned
783 * to the system.
784 */
785static void vio_cmo_bus_remove(struct vio_dev *viodev)
786{
787 struct vio_cmo_dev_entry *dev_ent;
788 unsigned long flags;
789 size_t tmp;
790
791 spin_lock_irqsave(&vio_cmo.lock, flags);
792 if (viodev->cmo.allocated) {
793 dev_err(&viodev->dev, "%s: device had %lu bytes of IO "
794 "allocated after remove operation.\n",
795 __func__, viodev->cmo.allocated);
796 BUG();
797 }
798
799 /*
800 * Remove the device from the device list being maintained for
801 * CMO enabled devices.
802 */
803 list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
804 if (viodev == dev_ent->viodev) {
805 list_del(&dev_ent->list);
806 kfree(dev_ent);
807 break;
808 }
809
810 /*
811 * Devices may not require any entitlement and they do not need
812 * to be processed. Otherwise, return the device's entitlement
813 * back to the pools.
814 */
815 if (viodev->cmo.entitled) {
816 /*
817 * This device has not yet left the OF tree, it's
818 * minimum entitlement remains in vio_cmo.min and
819 * vio_cmo.desired
820 */
821 vio_cmo.desired -= (viodev->cmo.desired - VIO_CMO_MIN_ENT);
822
823 /*
824 * Save min allocation for device in reserve as long
825 * as it exists in OF tree as determined by later
826 * balance operation
827 */
828 viodev->cmo.entitled -= VIO_CMO_MIN_ENT;
829
830 /* Replenish spare from freed reserve pool */
831 if (viodev->cmo.entitled && (vio_cmo.spare < VIO_CMO_MIN_ENT)) {
832 tmp = min(viodev->cmo.entitled, (VIO_CMO_MIN_ENT -
833 vio_cmo.spare));
834 vio_cmo.spare += tmp;
835 viodev->cmo.entitled -= tmp;
836 }
837
838 /* Remaining reserve goes to excess pool */
839 vio_cmo.excess.size += viodev->cmo.entitled;
840 vio_cmo.excess.free += viodev->cmo.entitled;
841 vio_cmo.reserve.size -= viodev->cmo.entitled;
842
843 /*
844 * Until the device is removed it will keep a
845 * minimum entitlement; this will guarantee that
846 * a module unload/load will result in a success.
847 */
848 viodev->cmo.entitled = VIO_CMO_MIN_ENT;
849 viodev->cmo.desired = VIO_CMO_MIN_ENT;
850 atomic_set(&viodev->cmo.allocs_failed, 0);
851 }
852
853 spin_unlock_irqrestore(&vio_cmo.lock, flags);
854}
855
856static void vio_cmo_set_dma_ops(struct vio_dev *viodev)
857{
858 vio_dma_mapping_ops.dma_supported = dma_iommu_ops.dma_supported;
859 viodev->dev.archdata.dma_ops = &vio_dma_mapping_ops;
860}
861
862/**
863 * vio_cmo_bus_init - CMO entitlement initialization at bus init time
864 *
865 * Set up the reserve and excess entitlement pools based on available
866 * system entitlement and the number of devices in the OF tree that
867 * require entitlement in the reserve pool.
868 */
869static void vio_cmo_bus_init(void)
870{
871 struct hvcall_mpp_data mpp_data;
872 int err;
873
874 memset(&vio_cmo, 0, sizeof(struct vio_cmo));
875 spin_lock_init(&vio_cmo.lock);
876 INIT_LIST_HEAD(&vio_cmo.device_list);
877 INIT_DELAYED_WORK(&vio_cmo.balance_q, vio_cmo_balance);
878
879 /* Get current system entitlement */
880 err = h_get_mpp(&mpp_data);
881
882 /*
883 * On failure, continue with entitlement set to 0, will panic()
884 * later when spare is reserved.
885 */
886 if (err != H_SUCCESS) {
887 printk(KERN_ERR "%s: unable to determine system IO "\
888 "entitlement. (%d)\n", __func__, err);
889 vio_cmo.entitled = 0;
890 } else {
891 vio_cmo.entitled = mpp_data.entitled_mem;
892 }
893
894 /* Set reservation and check against entitlement */
895 vio_cmo.spare = VIO_CMO_MIN_ENT;
896 vio_cmo.reserve.size = vio_cmo.spare;
897 vio_cmo.reserve.size += (vio_cmo_num_OF_devs() *
898 VIO_CMO_MIN_ENT);
899 if (vio_cmo.reserve.size > vio_cmo.entitled) {
900 printk(KERN_ERR "%s: insufficient system entitlement\n",
901 __func__);
902 panic("%s: Insufficient system entitlement", __func__);
903 }
904
905 /* Set the remaining accounting variables */
906 vio_cmo.excess.size = vio_cmo.entitled - vio_cmo.reserve.size;
907 vio_cmo.excess.free = vio_cmo.excess.size;
908 vio_cmo.min = vio_cmo.reserve.size;
909 vio_cmo.desired = vio_cmo.reserve.size;
910}
911
912/* sysfs device functions and data structures for CMO */
913
914#define viodev_cmo_rd_attr(name) \
915static ssize_t viodev_cmo_##name##_show(struct device *dev, \
916 struct device_attribute *attr, \
917 char *buf) \
918{ \
919 return sprintf(buf, "%lu\n", to_vio_dev(dev)->cmo.name); \
920}
921
922static ssize_t viodev_cmo_allocs_failed_show(struct device *dev,
923 struct device_attribute *attr, char *buf)
924{
925 struct vio_dev *viodev = to_vio_dev(dev);
926 return sprintf(buf, "%d\n", atomic_read(&viodev->cmo.allocs_failed));
927}
928
929static ssize_t viodev_cmo_allocs_failed_reset(struct device *dev,
930 struct device_attribute *attr, const char *buf, size_t count)
931{
932 struct vio_dev *viodev = to_vio_dev(dev);
933 atomic_set(&viodev->cmo.allocs_failed, 0);
934 return count;
935}
936
937static ssize_t viodev_cmo_desired_set(struct device *dev,
938 struct device_attribute *attr, const char *buf, size_t count)
939{
940 struct vio_dev *viodev = to_vio_dev(dev);
941 size_t new_desired;
942 int ret;
943
944 ret = strict_strtoul(buf, 10, &new_desired);
945 if (ret)
946 return ret;
947
948 vio_cmo_set_dev_desired(viodev, new_desired);
949 return count;
950}
951
952viodev_cmo_rd_attr(desired);
953viodev_cmo_rd_attr(entitled);
954viodev_cmo_rd_attr(allocated);
955
956static ssize_t name_show(struct device *, struct device_attribute *, char *);
957static ssize_t devspec_show(struct device *, struct device_attribute *, char *);
958static struct device_attribute vio_cmo_dev_attrs[] = {
959 __ATTR_RO(name),
960 __ATTR_RO(devspec),
961 __ATTR(cmo_desired, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
962 viodev_cmo_desired_show, viodev_cmo_desired_set),
963 __ATTR(cmo_entitled, S_IRUGO, viodev_cmo_entitled_show, NULL),
964 __ATTR(cmo_allocated, S_IRUGO, viodev_cmo_allocated_show, NULL),
965 __ATTR(cmo_allocs_failed, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
966 viodev_cmo_allocs_failed_show, viodev_cmo_allocs_failed_reset),
967 __ATTR_NULL
968};
969
970/* sysfs bus functions and data structures for CMO */
971
972#define viobus_cmo_rd_attr(name) \
973static ssize_t \
974viobus_cmo_##name##_show(struct bus_type *bt, char *buf) \
975{ \
976 return sprintf(buf, "%lu\n", vio_cmo.name); \
977}
978
979#define viobus_cmo_pool_rd_attr(name, var) \
980static ssize_t \
981viobus_cmo_##name##_pool_show_##var(struct bus_type *bt, char *buf) \
982{ \
983 return sprintf(buf, "%lu\n", vio_cmo.name.var); \
984}
985
986static ssize_t viobus_cmo_high_reset(struct bus_type *bt, const char *buf,
987 size_t count)
988{
989 unsigned long flags;
990
991 spin_lock_irqsave(&vio_cmo.lock, flags);
992 vio_cmo.high = vio_cmo.curr;
993 spin_unlock_irqrestore(&vio_cmo.lock, flags);
994
995 return count;
996}
997
998viobus_cmo_rd_attr(entitled);
999viobus_cmo_pool_rd_attr(reserve, size);
1000viobus_cmo_pool_rd_attr(excess, size);
1001viobus_cmo_pool_rd_attr(excess, free);
1002viobus_cmo_rd_attr(spare);
1003viobus_cmo_rd_attr(min);
1004viobus_cmo_rd_attr(desired);
1005viobus_cmo_rd_attr(curr);
1006viobus_cmo_rd_attr(high);
1007
1008static struct bus_attribute vio_cmo_bus_attrs[] = {
1009 __ATTR(cmo_entitled, S_IRUGO, viobus_cmo_entitled_show, NULL),
1010 __ATTR(cmo_reserve_size, S_IRUGO, viobus_cmo_reserve_pool_show_size, NULL),
1011 __ATTR(cmo_excess_size, S_IRUGO, viobus_cmo_excess_pool_show_size, NULL),
1012 __ATTR(cmo_excess_free, S_IRUGO, viobus_cmo_excess_pool_show_free, NULL),
1013 __ATTR(cmo_spare, S_IRUGO, viobus_cmo_spare_show, NULL),
1014 __ATTR(cmo_min, S_IRUGO, viobus_cmo_min_show, NULL),
1015 __ATTR(cmo_desired, S_IRUGO, viobus_cmo_desired_show, NULL),
1016 __ATTR(cmo_curr, S_IRUGO, viobus_cmo_curr_show, NULL),
1017 __ATTR(cmo_high, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
1018 viobus_cmo_high_show, viobus_cmo_high_reset),
1019 __ATTR_NULL
1020};
1021
1022static void vio_cmo_sysfs_init(void)
1023{
1024 vio_bus_type.dev_attrs = vio_cmo_dev_attrs;
1025 vio_bus_type.bus_attrs = vio_cmo_bus_attrs;
1026}
1027#else /* CONFIG_PPC_SMLPAR */
1028/* Dummy functions for iSeries platform */
1029int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; }
1030void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {}
1031static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; }
1032static void vio_cmo_bus_remove(struct vio_dev *viodev) {}
1033static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {}
1034static void vio_cmo_bus_init() {}
1035static void vio_cmo_sysfs_init() { }
1036#endif /* CONFIG_PPC_SMLPAR */
1037EXPORT_SYMBOL(vio_cmo_entitlement_update);
1038EXPORT_SYMBOL(vio_cmo_set_dev_desired);
1039
49static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) 1040static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
50{ 1041{
51 const unsigned char *dma_window; 1042 const unsigned char *dma_window;
@@ -114,8 +1105,17 @@ static int vio_bus_probe(struct device *dev)
114 return error; 1105 return error;
115 1106
116 id = vio_match_device(viodrv->id_table, viodev); 1107 id = vio_match_device(viodrv->id_table, viodev);
117 if (id) 1108 if (id) {
1109 memset(&viodev->cmo, 0, sizeof(viodev->cmo));
1110 if (firmware_has_feature(FW_FEATURE_CMO)) {
1111 error = vio_cmo_bus_probe(viodev);
1112 if (error)
1113 return error;
1114 }
118 error = viodrv->probe(viodev, id); 1115 error = viodrv->probe(viodev, id);
1116 if (error)
1117 vio_cmo_bus_remove(viodev);
1118 }
119 1119
120 return error; 1120 return error;
121} 1121}
@@ -125,12 +1125,23 @@ static int vio_bus_remove(struct device *dev)
125{ 1125{
126 struct vio_dev *viodev = to_vio_dev(dev); 1126 struct vio_dev *viodev = to_vio_dev(dev);
127 struct vio_driver *viodrv = to_vio_driver(dev->driver); 1127 struct vio_driver *viodrv = to_vio_driver(dev->driver);
1128 struct device *devptr;
1129 int ret = 1;
1130
1131 /*
1132 * Hold a reference to the device after the remove function is called
1133 * to allow for CMO accounting cleanup for the device.
1134 */
1135 devptr = get_device(dev);
128 1136
129 if (viodrv->remove) 1137 if (viodrv->remove)
130 return viodrv->remove(viodev); 1138 ret = viodrv->remove(viodev);
1139
1140 if (!ret && firmware_has_feature(FW_FEATURE_CMO))
1141 vio_cmo_bus_remove(viodev);
131 1142
132 /* driver can't remove */ 1143 put_device(devptr);
133 return 1; 1144 return ret;
134} 1145}
135 1146
136/** 1147/**
@@ -215,7 +1226,11 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
215 viodev->unit_address = *unit_address; 1226 viodev->unit_address = *unit_address;
216 } 1227 }
217 viodev->dev.archdata.of_node = of_node_get(of_node); 1228 viodev->dev.archdata.of_node = of_node_get(of_node);
218 viodev->dev.archdata.dma_ops = &dma_iommu_ops; 1229
1230 if (firmware_has_feature(FW_FEATURE_CMO))
1231 vio_cmo_set_dma_ops(viodev);
1232 else
1233 viodev->dev.archdata.dma_ops = &dma_iommu_ops;
219 viodev->dev.archdata.dma_data = vio_build_iommu_table(viodev); 1234 viodev->dev.archdata.dma_data = vio_build_iommu_table(viodev);
220 viodev->dev.archdata.numa_node = of_node_to_nid(of_node); 1235 viodev->dev.archdata.numa_node = of_node_to_nid(of_node);
221 1236
@@ -245,6 +1260,9 @@ static int __init vio_bus_init(void)
245 int err; 1260 int err;
246 struct device_node *node_vroot; 1261 struct device_node *node_vroot;
247 1262
1263 if (firmware_has_feature(FW_FEATURE_CMO))
1264 vio_cmo_sysfs_init();
1265
248 err = bus_register(&vio_bus_type); 1266 err = bus_register(&vio_bus_type);
249 if (err) { 1267 if (err) {
250 printk(KERN_ERR "failed to register VIO bus\n"); 1268 printk(KERN_ERR "failed to register VIO bus\n");
@@ -262,6 +1280,9 @@ static int __init vio_bus_init(void)
262 return err; 1280 return err;
263 } 1281 }
264 1282
1283 if (firmware_has_feature(FW_FEATURE_CMO))
1284 vio_cmo_bus_init();
1285
265 node_vroot = of_find_node_by_name(NULL, "vdevice"); 1286 node_vroot = of_find_node_by_name(NULL, "vdevice");
266 if (node_vroot) { 1287 if (node_vroot) {
267 struct device_node *of_node; 1288 struct device_node *of_node;
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index a914411bced5..4a8ce62fe112 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -85,7 +85,7 @@ SECTIONS
85 85
86 /* The dummy segment contents for the bug workaround mentioned above 86 /* The dummy segment contents for the bug workaround mentioned above
87 near PHDRS. */ 87 near PHDRS. */
88 .dummy : { 88 .dummy : AT(ADDR(.dummy) - LOAD_OFFSET) {
89 LONG(0xf177) 89 LONG(0xf177)
90 } :kernel :dummy 90 } :kernel :dummy
91 91
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 1707d00331fc..565b7a237c84 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -100,31 +100,6 @@ static int store_updates_sp(struct pt_regs *regs)
100 return 0; 100 return 0;
101} 101}
102 102
103#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
104static void do_dabr(struct pt_regs *regs, unsigned long address,
105 unsigned long error_code)
106{
107 siginfo_t info;
108
109 if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
110 11, SIGSEGV) == NOTIFY_STOP)
111 return;
112
113 if (debugger_dabr_match(regs))
114 return;
115
116 /* Clear the DABR */
117 set_dabr(0);
118
119 /* Deliver the signal to userspace */
120 info.si_signo = SIGTRAP;
121 info.si_errno = 0;
122 info.si_code = TRAP_HWBKPT;
123 info.si_addr = (void __user *)address;
124 force_sig_info(SIGTRAP, &info, current);
125}
126#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
127
128/* 103/*
129 * For 600- and 800-family processors, the error_code parameter is DSISR 104 * For 600- and 800-family processors, the error_code parameter is DSISR
130 * for a data fault, SRR1 for an instruction fault. For 400-family processors 105 * for a data fault, SRR1 for an instruction fault. For 400-family processors
diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig
index d664b1bce381..696a5ee4962d 100644
--- a/arch/powerpc/platforms/52xx/Kconfig
+++ b/arch/powerpc/platforms/52xx/Kconfig
@@ -1,7 +1,6 @@
1config PPC_MPC52xx 1config PPC_MPC52xx
2 bool "52xx-based boards" 2 bool "52xx-based boards"
3 depends on PPC_MULTIPLATFORM && PPC32 3 depends on PPC_MULTIPLATFORM && PPC32
4 select FSL_SOC
5 select PPC_CLOCK 4 select PPC_CLOCK
6 select PPC_PCI_CHOICE 5 select PPC_PCI_CHOICE
7 6
@@ -48,6 +47,7 @@ config PPC_MPC5200_BUGFIX
48config PPC_MPC5200_GPIO 47config PPC_MPC5200_GPIO
49 bool "MPC5200 GPIO support" 48 bool "MPC5200 GPIO support"
50 depends on PPC_MPC52xx 49 depends on PPC_MPC52xx
51 select HAVE_GPIO_LIB 50 select ARCH_REQUIRE_GPIOLIB
51 select GENERIC_GPIO
52 help 52 help
53 Enable gpiolib support for mpc5200 based boards 53 Enable gpiolib support for mpc5200 based boards
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 208005ca262c..e06420af5fe9 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -172,7 +172,7 @@ static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte,
172 } 172 }
173} 173}
174 174
175static void tce_build_cell(struct iommu_table *tbl, long index, long npages, 175static int tce_build_cell(struct iommu_table *tbl, long index, long npages,
176 unsigned long uaddr, enum dma_data_direction direction, 176 unsigned long uaddr, enum dma_data_direction direction,
177 struct dma_attrs *attrs) 177 struct dma_attrs *attrs)
178{ 178{
@@ -213,6 +213,7 @@ static void tce_build_cell(struct iommu_table *tbl, long index, long npages,
213 213
214 pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n", 214 pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n",
215 index, npages, direction, base_pte); 215 index, npages, direction, base_pte);
216 return 0;
216} 217}
217 218
218static void tce_free_cell(struct iommu_table *tbl, long index, long npages) 219static void tce_free_cell(struct iommu_table *tbl, long index, long npages)
@@ -1150,12 +1151,23 @@ static int iommu_fixed_disabled;
1150 1151
1151static int __init setup_iommu_fixed(char *str) 1152static int __init setup_iommu_fixed(char *str)
1152{ 1153{
1154 struct device_node *pciep;
1155
1153 if (strcmp(str, "off") == 0) 1156 if (strcmp(str, "off") == 0)
1154 iommu_fixed_disabled = 1; 1157 iommu_fixed_disabled = 1;
1155 1158
1156 else if (strcmp(str, "weak") == 0) 1159 /* If we can find a pcie-endpoint in the device tree assume that
1160 * we're on a triblade or a CAB so by default the fixed mapping
1161 * should be set to be weakly ordered; but only if the boot
1162 * option WASN'T set for strong ordering
1163 */
1164 pciep = of_find_node_by_type(NULL, "pcie-endpoint");
1165
1166 if (strcmp(str, "weak") == 0 || (pciep && strcmp(str, "strong") != 0))
1157 iommu_fixed_is_weak = 1; 1167 iommu_fixed_is_weak = 1;
1158 1168
1169 of_node_put(pciep);
1170
1159 return 1; 1171 return 1;
1160} 1172}
1161__setup("iommu_fixed=", setup_iommu_fixed); 1173__setup("iommu_fixed=", setup_iommu_fixed);
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 34654743363d..2deeeba7eccf 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -312,11 +312,28 @@ static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff,
312 */ 312 */
313 node = cpu_to_node(raw_smp_processor_id()); 313 node = cpu_to_node(raw_smp_processor_id());
314 for (n = 0; n < MAX_NUMNODES; n++, node++) { 314 for (n = 0; n < MAX_NUMNODES; n++, node++) {
315 int available_spus;
316
315 node = (node < MAX_NUMNODES) ? node : 0; 317 node = (node < MAX_NUMNODES) ? node : 0;
316 if (!node_allowed(ctx, node)) 318 if (!node_allowed(ctx, node))
317 continue; 319 continue;
320
321 available_spus = 0;
318 mutex_lock(&cbe_spu_info[node].list_mutex); 322 mutex_lock(&cbe_spu_info[node].list_mutex);
319 list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { 323 list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
324 if (spu->ctx && spu->ctx->gang
325 && spu->ctx->aff_offset == 0)
326 available_spus -=
327 (spu->ctx->gang->contexts - 1);
328 else
329 available_spus++;
330 }
331 if (available_spus < ctx->gang->contexts) {
332 mutex_unlock(&cbe_spu_info[node].list_mutex);
333 continue;
334 }
335
336 list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
320 if ((!mem_aff || spu->has_mem_affinity) && 337 if ((!mem_aff || spu->has_mem_affinity) &&
321 sched_spu(spu)) { 338 sched_spu(spu)) {
322 mutex_unlock(&cbe_spu_info[node].list_mutex); 339 mutex_unlock(&cbe_spu_info[node].list_mutex);
@@ -389,6 +406,9 @@ static int has_affinity(struct spu_context *ctx)
389 if (list_empty(&ctx->aff_list)) 406 if (list_empty(&ctx->aff_list))
390 return 0; 407 return 0;
391 408
409 if (atomic_read(&ctx->gang->aff_sched_count) == 0)
410 ctx->gang->aff_ref_spu = NULL;
411
392 if (!gang->aff_ref_spu) { 412 if (!gang->aff_ref_spu) {
393 if (!(gang->aff_flags & AFF_MERGED)) 413 if (!(gang->aff_flags & AFF_MERGED))
394 aff_merge_remaining_ctxs(gang); 414 aff_merge_remaining_ctxs(gang);
@@ -416,14 +436,8 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
416 if (spu->ctx->flags & SPU_CREATE_NOSCHED) 436 if (spu->ctx->flags & SPU_CREATE_NOSCHED)
417 atomic_dec(&cbe_spu_info[spu->node].reserved_spus); 437 atomic_dec(&cbe_spu_info[spu->node].reserved_spus);
418 438
419 if (ctx->gang){ 439 if (ctx->gang)
420 mutex_lock(&ctx->gang->aff_mutex); 440 atomic_dec_if_positive(&ctx->gang->aff_sched_count);
421 if (has_affinity(ctx)) {
422 if (atomic_dec_and_test(&ctx->gang->aff_sched_count))
423 ctx->gang->aff_ref_spu = NULL;
424 }
425 mutex_unlock(&ctx->gang->aff_mutex);
426 }
427 441
428 spu_switch_notify(spu, NULL); 442 spu_switch_notify(spu, NULL);
429 spu_unmap_mappings(ctx); 443 spu_unmap_mappings(ctx);
@@ -562,10 +576,7 @@ static struct spu *spu_get_idle(struct spu_context *ctx)
562 goto found; 576 goto found;
563 mutex_unlock(&cbe_spu_info[node].list_mutex); 577 mutex_unlock(&cbe_spu_info[node].list_mutex);
564 578
565 mutex_lock(&ctx->gang->aff_mutex); 579 atomic_dec(&ctx->gang->aff_sched_count);
566 if (atomic_dec_and_test(&ctx->gang->aff_sched_count))
567 ctx->gang->aff_ref_spu = NULL;
568 mutex_unlock(&ctx->gang->aff_mutex);
569 goto not_found; 580 goto not_found;
570 } 581 }
571 mutex_unlock(&ctx->gang->aff_mutex); 582 mutex_unlock(&ctx->gang->aff_mutex);
diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.c b/arch/powerpc/platforms/cell/spufs/sputrace.c
index 8c0e95766a62..92d20e993ede 100644
--- a/arch/powerpc/platforms/cell/spufs/sputrace.c
+++ b/arch/powerpc/platforms/cell/spufs/sputrace.c
@@ -196,8 +196,7 @@ static int __init sputrace_init(void)
196 struct proc_dir_entry *entry; 196 struct proc_dir_entry *entry;
197 int i, error = -ENOMEM; 197 int i, error = -ENOMEM;
198 198
199 sputrace_log = kcalloc(sizeof(struct sputrace), 199 sputrace_log = kcalloc(bufsize, sizeof(struct sputrace), GFP_KERNEL);
200 bufsize, GFP_KERNEL);
201 if (!sputrace_log) 200 if (!sputrace_log)
202 goto out; 201 goto out;
203 202
diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c
index bc818e4e2033..bb464d1211b2 100644
--- a/arch/powerpc/platforms/iseries/iommu.c
+++ b/arch/powerpc/platforms/iseries/iommu.c
@@ -41,7 +41,7 @@
41#include <asm/iseries/hv_call_event.h> 41#include <asm/iseries/hv_call_event.h>
42#include <asm/iseries/iommu.h> 42#include <asm/iseries/iommu.h>
43 43
44static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages, 44static int tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
45 unsigned long uaddr, enum dma_data_direction direction, 45 unsigned long uaddr, enum dma_data_direction direction,
46 struct dma_attrs *attrs) 46 struct dma_attrs *attrs)
47{ 47{
@@ -71,6 +71,7 @@ static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
71 index++; 71 index++;
72 uaddr += TCE_PAGE_SIZE; 72 uaddr += TCE_PAGE_SIZE;
73 } 73 }
74 return 0;
74} 75}
75 76
76static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages) 77static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages)
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
index 70541b7a5013..a0ff03a3d8da 100644
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -83,7 +83,7 @@ static u32 *iob_l2_base;
83static struct iommu_table iommu_table_iobmap; 83static struct iommu_table iommu_table_iobmap;
84static int iommu_table_iobmap_inited; 84static int iommu_table_iobmap_inited;
85 85
86static void iobmap_build(struct iommu_table *tbl, long index, 86static int iobmap_build(struct iommu_table *tbl, long index,
87 long npages, unsigned long uaddr, 87 long npages, unsigned long uaddr,
88 enum dma_data_direction direction, 88 enum dma_data_direction direction,
89 struct dma_attrs *attrs) 89 struct dma_attrs *attrs)
@@ -108,6 +108,7 @@ static void iobmap_build(struct iommu_table *tbl, long index,
108 uaddr += IOBMAP_PAGE_SIZE; 108 uaddr += IOBMAP_PAGE_SIZE;
109 bus_addr += IOBMAP_PAGE_SIZE; 109 bus_addr += IOBMAP_PAGE_SIZE;
110 } 110 }
111 return 0;
111} 112}
112 113
113 114
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 757c0296e0b8..97619fd51e39 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -40,3 +40,26 @@ config PPC_PSERIES_DEBUG
40 depends on PPC_PSERIES && PPC_EARLY_DEBUG 40 depends on PPC_PSERIES && PPC_EARLY_DEBUG
41 bool "Enable extra debug logging in platforms/pseries" 41 bool "Enable extra debug logging in platforms/pseries"
42 default y 42 default y
43
44config PPC_SMLPAR
45 bool "Support for shared-memory logical partitions"
46 depends on PPC_PSERIES
47 select LPARCFG
48 default n
49 help
50 Select this option to enable shared memory partition support.
51 With this option a system running in an LPAR can be given more
52 memory than physically available and will allow firmware to
53 balance memory across many LPARs.
54
55config CMM
56 tristate "Collaborative memory management"
57 depends on PPC_SMLPAR
58 default y
59 help
60 Select this option, if you want to enable the kernel interface
61 to reduce the memory size of the system. This is accomplished
62 by allocating pages of memory and put them "on hold". This only
63 makes sense for a system running in an LPAR where the unused pages
64 will be reused for other LPARs. The interface allows firmware to
65 balance memory across many LPARs.
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 554c6e42ef2a..dfe574af2dc0 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -24,3 +24,4 @@ obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o
24obj-$(CONFIG_HVCS) += hvcserver.o 24obj-$(CONFIG_HVCS) += hvcserver.o
25obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o 25obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o
26obj-$(CONFIG_PHYP_DUMP) += phyp_dump.o 26obj-$(CONFIG_PHYP_DUMP) += phyp_dump.o
27obj-$(CONFIG_CMM) += cmm.o
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
new file mode 100644
index 000000000000..c6b3be03168b
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -0,0 +1,468 @@
1/*
2 * Collaborative memory management interface.
3 *
4 * Copyright (C) 2008 IBM Corporation
5 * Author(s): Brian King (brking@linux.vnet.ibm.com),
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 */
22
23#include <linux/ctype.h>
24#include <linux/delay.h>
25#include <linux/errno.h>
26#include <linux/fs.h>
27#include <linux/init.h>
28#include <linux/kthread.h>
29#include <linux/module.h>
30#include <linux/oom.h>
31#include <linux/sched.h>
32#include <linux/stringify.h>
33#include <linux/swap.h>
34#include <linux/sysdev.h>
35#include <asm/firmware.h>
36#include <asm/hvcall.h>
37#include <asm/mmu.h>
38#include <asm/pgalloc.h>
39#include <asm/uaccess.h>
40
41#include "plpar_wrappers.h"
42
43#define CMM_DRIVER_VERSION "1.0.0"
44#define CMM_DEFAULT_DELAY 1
45#define CMM_DEBUG 0
46#define CMM_DISABLE 0
47#define CMM_OOM_KB 1024
48#define CMM_MIN_MEM_MB 256
49#define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10))
50#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
51
52static unsigned int delay = CMM_DEFAULT_DELAY;
53static unsigned int oom_kb = CMM_OOM_KB;
54static unsigned int cmm_debug = CMM_DEBUG;
55static unsigned int cmm_disabled = CMM_DISABLE;
56static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
57static struct sys_device cmm_sysdev;
58
59MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
60MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
61MODULE_LICENSE("GPL");
62MODULE_VERSION(CMM_DRIVER_VERSION);
63
64module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR);
65MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
66 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
67module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR);
68MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
69 "[Default=" __stringify(CMM_OOM_KB) "]");
70module_param_named(min_mem_mb, min_mem_mb, ulong, S_IRUGO | S_IWUSR);
71MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
72 "[Default=" __stringify(CMM_MIN_MEM_MB) "]");
73module_param_named(debug, cmm_debug, uint, S_IRUGO | S_IWUSR);
74MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
75 "[Default=" __stringify(CMM_DEBUG) "]");
76
77#define CMM_NR_PAGES ((PAGE_SIZE - sizeof(void *) - sizeof(unsigned long)) / sizeof(unsigned long))
78
79#define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
80
81struct cmm_page_array {
82 struct cmm_page_array *next;
83 unsigned long index;
84 unsigned long page[CMM_NR_PAGES];
85};
86
87static unsigned long loaned_pages;
88static unsigned long loaned_pages_target;
89static unsigned long oom_freed_pages;
90
91static struct cmm_page_array *cmm_page_list;
92static DEFINE_SPINLOCK(cmm_lock);
93
94static struct task_struct *cmm_thread_ptr;
95
96/**
97 * cmm_alloc_pages - Allocate pages and mark them as loaned
98 * @nr: number of pages to allocate
99 *
100 * Return value:
101 * number of pages requested to be allocated which were not
102 **/
103static long cmm_alloc_pages(long nr)
104{
105 struct cmm_page_array *pa, *npa;
106 unsigned long addr;
107 long rc;
108
109 cmm_dbg("Begin request for %ld pages\n", nr);
110
111 while (nr) {
112 addr = __get_free_page(GFP_NOIO | __GFP_NOWARN |
113 __GFP_NORETRY | __GFP_NOMEMALLOC);
114 if (!addr)
115 break;
116 spin_lock(&cmm_lock);
117 pa = cmm_page_list;
118 if (!pa || pa->index >= CMM_NR_PAGES) {
119 /* Need a new page for the page list. */
120 spin_unlock(&cmm_lock);
121 npa = (struct cmm_page_array *)__get_free_page(GFP_NOIO | __GFP_NOWARN |
122 __GFP_NORETRY | __GFP_NOMEMALLOC);
123 if (!npa) {
124 pr_info("%s: Can not allocate new page list\n", __FUNCTION__);
125 free_page(addr);
126 break;
127 }
128 spin_lock(&cmm_lock);
129 pa = cmm_page_list;
130
131 if (!pa || pa->index >= CMM_NR_PAGES) {
132 npa->next = pa;
133 npa->index = 0;
134 pa = npa;
135 cmm_page_list = pa;
136 } else
137 free_page((unsigned long) npa);
138 }
139
140 if ((rc = plpar_page_set_loaned(__pa(addr)))) {
141 pr_err("%s: Can not set page to loaned. rc=%ld\n", __FUNCTION__, rc);
142 spin_unlock(&cmm_lock);
143 free_page(addr);
144 break;
145 }
146
147 pa->page[pa->index++] = addr;
148 loaned_pages++;
149 totalram_pages--;
150 spin_unlock(&cmm_lock);
151 nr--;
152 }
153
154 cmm_dbg("End request with %ld pages unfulfilled\n", nr);
155 return nr;
156}
157
158/**
159 * cmm_free_pages - Free pages and mark them as active
160 * @nr: number of pages to free
161 *
162 * Return value:
163 * number of pages requested to be freed which were not
164 **/
165static long cmm_free_pages(long nr)
166{
167 struct cmm_page_array *pa;
168 unsigned long addr;
169
170 cmm_dbg("Begin free of %ld pages.\n", nr);
171 spin_lock(&cmm_lock);
172 pa = cmm_page_list;
173 while (nr) {
174 if (!pa || pa->index <= 0)
175 break;
176 addr = pa->page[--pa->index];
177
178 if (pa->index == 0) {
179 pa = pa->next;
180 free_page((unsigned long) cmm_page_list);
181 cmm_page_list = pa;
182 }
183
184 plpar_page_set_active(__pa(addr));
185 free_page(addr);
186 loaned_pages--;
187 nr--;
188 totalram_pages++;
189 }
190 spin_unlock(&cmm_lock);
191 cmm_dbg("End request with %ld pages unfulfilled\n", nr);
192 return nr;
193}
194
195/**
196 * cmm_oom_notify - OOM notifier
197 * @self: notifier block struct
198 * @dummy: not used
199 * @parm: returned - number of pages freed
200 *
201 * Return value:
202 * NOTIFY_OK
203 **/
204static int cmm_oom_notify(struct notifier_block *self,
205 unsigned long dummy, void *parm)
206{
207 unsigned long *freed = parm;
208 long nr = KB2PAGES(oom_kb);
209
210 cmm_dbg("OOM processing started\n");
211 nr = cmm_free_pages(nr);
212 loaned_pages_target = loaned_pages;
213 *freed += KB2PAGES(oom_kb) - nr;
214 oom_freed_pages += KB2PAGES(oom_kb) - nr;
215 cmm_dbg("OOM processing complete\n");
216 return NOTIFY_OK;
217}
218
219/**
220 * cmm_get_mpp - Read memory performance parameters
221 *
222 * Makes hcall to query the current page loan request from the hypervisor.
223 *
224 * Return value:
225 * nothing
226 **/
227static void cmm_get_mpp(void)
228{
229 int rc;
230 struct hvcall_mpp_data mpp_data;
231 unsigned long active_pages_target;
232 signed long page_loan_request;
233
234 rc = h_get_mpp(&mpp_data);
235
236 if (rc != H_SUCCESS)
237 return;
238
239 page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE);
240 loaned_pages_target = page_loan_request + loaned_pages;
241 if (loaned_pages_target > oom_freed_pages)
242 loaned_pages_target -= oom_freed_pages;
243 else
244 loaned_pages_target = 0;
245
246 active_pages_target = totalram_pages + loaned_pages - loaned_pages_target;
247
248 if ((min_mem_mb * 1024 * 1024) > (active_pages_target * PAGE_SIZE))
249 loaned_pages_target = totalram_pages + loaned_pages -
250 ((min_mem_mb * 1024 * 1024) / PAGE_SIZE);
251
252 cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
253 page_loan_request, loaned_pages, loaned_pages_target,
254 oom_freed_pages, totalram_pages);
255}
256
257static struct notifier_block cmm_oom_nb = {
258 .notifier_call = cmm_oom_notify
259};
260
261/**
262 * cmm_thread - CMM task thread
263 * @dummy: not used
264 *
265 * Return value:
266 * 0
267 **/
268static int cmm_thread(void *dummy)
269{
270 unsigned long timeleft;
271
272 while (1) {
273 timeleft = msleep_interruptible(delay * 1000);
274
275 if (kthread_should_stop() || timeleft) {
276 loaned_pages_target = loaned_pages;
277 break;
278 }
279
280 cmm_get_mpp();
281
282 if (loaned_pages_target > loaned_pages) {
283 if (cmm_alloc_pages(loaned_pages_target - loaned_pages))
284 loaned_pages_target = loaned_pages;
285 } else if (loaned_pages_target < loaned_pages)
286 cmm_free_pages(loaned_pages - loaned_pages_target);
287 }
288 return 0;
289}
290
291#define CMM_SHOW(name, format, args...) \
292 static ssize_t show_##name(struct sys_device *dev, char *buf) \
293 { \
294 return sprintf(buf, format, ##args); \
295 } \
296 static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
297
298CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages));
299CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
300
301static ssize_t show_oom_pages(struct sys_device *dev, char *buf)
302{
303 return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages));
304}
305
306static ssize_t store_oom_pages(struct sys_device *dev,
307 const char *buf, size_t count)
308{
309 unsigned long val = simple_strtoul (buf, NULL, 10);
310
311 if (!capable(CAP_SYS_ADMIN))
312 return -EPERM;
313 if (val != 0)
314 return -EBADMSG;
315
316 oom_freed_pages = 0;
317 return count;
318}
319
320static SYSDEV_ATTR(oom_freed_kb, S_IWUSR| S_IRUGO,
321 show_oom_pages, store_oom_pages);
322
323static struct sysdev_attribute *cmm_attrs[] = {
324 &attr_loaned_kb,
325 &attr_loaned_target_kb,
326 &attr_oom_freed_kb,
327};
328
329static struct sysdev_class cmm_sysdev_class = {
330 .name = "cmm",
331};
332
333/**
334 * cmm_sysfs_register - Register with sysfs
335 *
336 * Return value:
337 * 0 on success / other on failure
338 **/
339static int cmm_sysfs_register(struct sys_device *sysdev)
340{
341 int i, rc;
342
343 if ((rc = sysdev_class_register(&cmm_sysdev_class)))
344 return rc;
345
346 sysdev->id = 0;
347 sysdev->cls = &cmm_sysdev_class;
348
349 if ((rc = sysdev_register(sysdev)))
350 goto class_unregister;
351
352 for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) {
353 if ((rc = sysdev_create_file(sysdev, cmm_attrs[i])))
354 goto fail;
355 }
356
357 return 0;
358
359fail:
360 while (--i >= 0)
361 sysdev_remove_file(sysdev, cmm_attrs[i]);
362 sysdev_unregister(sysdev);
363class_unregister:
364 sysdev_class_unregister(&cmm_sysdev_class);
365 return rc;
366}
367
368/**
369 * cmm_unregister_sysfs - Unregister from sysfs
370 *
371 **/
372static void cmm_unregister_sysfs(struct sys_device *sysdev)
373{
374 int i;
375
376 for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++)
377 sysdev_remove_file(sysdev, cmm_attrs[i]);
378 sysdev_unregister(sysdev);
379 sysdev_class_unregister(&cmm_sysdev_class);
380}
381
382/**
383 * cmm_init - Module initialization
384 *
385 * Return value:
386 * 0 on success / other on failure
387 **/
388static int cmm_init(void)
389{
390 int rc = -ENOMEM;
391
392 if (!firmware_has_feature(FW_FEATURE_CMO))
393 return -EOPNOTSUPP;
394
395 if ((rc = register_oom_notifier(&cmm_oom_nb)) < 0)
396 return rc;
397
398 if ((rc = cmm_sysfs_register(&cmm_sysdev)))
399 goto out_oom_notifier;
400
401 if (cmm_disabled)
402 return rc;
403
404 cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
405 if (IS_ERR(cmm_thread_ptr)) {
406 rc = PTR_ERR(cmm_thread_ptr);
407 goto out_unregister_sysfs;
408 }
409
410 return rc;
411
412out_unregister_sysfs:
413 cmm_unregister_sysfs(&cmm_sysdev);
414out_oom_notifier:
415 unregister_oom_notifier(&cmm_oom_nb);
416 return rc;
417}
418
419/**
420 * cmm_exit - Module exit
421 *
422 * Return value:
423 * nothing
424 **/
425static void cmm_exit(void)
426{
427 if (cmm_thread_ptr)
428 kthread_stop(cmm_thread_ptr);
429 unregister_oom_notifier(&cmm_oom_nb);
430 cmm_free_pages(loaned_pages);
431 cmm_unregister_sysfs(&cmm_sysdev);
432}
433
434/**
435 * cmm_set_disable - Disable/Enable CMM
436 *
437 * Return value:
438 * 0 on success / other on failure
439 **/
440static int cmm_set_disable(const char *val, struct kernel_param *kp)
441{
442 int disable = simple_strtoul(val, NULL, 10);
443
444 if (disable != 0 && disable != 1)
445 return -EINVAL;
446
447 if (disable && !cmm_disabled) {
448 if (cmm_thread_ptr)
449 kthread_stop(cmm_thread_ptr);
450 cmm_thread_ptr = NULL;
451 cmm_free_pages(loaned_pages);
452 } else if (!disable && cmm_disabled) {
453 cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
454 if (IS_ERR(cmm_thread_ptr))
455 return PTR_ERR(cmm_thread_ptr);
456 }
457
458 cmm_disabled = disable;
459 return 0;
460}
461
462module_param_call(disable, cmm_set_disable, param_get_uint,
463 &cmm_disabled, S_IRUGO | S_IWUSR);
464MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. "
465 "[Default=" __stringify(CMM_DISABLE) "]");
466
467module_init(cmm_init);
468module_exit(cmm_exit);
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 5377dd4b849a..a8c446697f9e 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -48,7 +48,7 @@
48#include "plpar_wrappers.h" 48#include "plpar_wrappers.h"
49 49
50 50
51static void tce_build_pSeries(struct iommu_table *tbl, long index, 51static int tce_build_pSeries(struct iommu_table *tbl, long index,
52 long npages, unsigned long uaddr, 52 long npages, unsigned long uaddr,
53 enum dma_data_direction direction, 53 enum dma_data_direction direction,
54 struct dma_attrs *attrs) 54 struct dma_attrs *attrs)
@@ -72,6 +72,7 @@ static void tce_build_pSeries(struct iommu_table *tbl, long index,
72 uaddr += TCE_PAGE_SIZE; 72 uaddr += TCE_PAGE_SIZE;
73 tcep++; 73 tcep++;
74 } 74 }
75 return 0;
75} 76}
76 77
77 78
@@ -94,14 +95,19 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
94 return *tcep; 95 return *tcep;
95} 96}
96 97
97static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, 98static void tce_free_pSeriesLP(struct iommu_table*, long, long);
99static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
100
101static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
98 long npages, unsigned long uaddr, 102 long npages, unsigned long uaddr,
99 enum dma_data_direction direction, 103 enum dma_data_direction direction,
100 struct dma_attrs *attrs) 104 struct dma_attrs *attrs)
101{ 105{
102 u64 rc; 106 u64 rc = 0;
103 u64 proto_tce, tce; 107 u64 proto_tce, tce;
104 u64 rpn; 108 u64 rpn;
109 int ret = 0;
110 long tcenum_start = tcenum, npages_start = npages;
105 111
106 rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; 112 rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
107 proto_tce = TCE_PCI_READ; 113 proto_tce = TCE_PCI_READ;
@@ -112,6 +118,13 @@ static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
112 tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; 118 tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
113 rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce); 119 rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce);
114 120
121 if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
122 ret = (int)rc;
123 tce_free_pSeriesLP(tbl, tcenum_start,
124 (npages_start - (npages + 1)));
125 break;
126 }
127
115 if (rc && printk_ratelimit()) { 128 if (rc && printk_ratelimit()) {
116 printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); 129 printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
117 printk("\tindex = 0x%lx\n", (u64)tbl->it_index); 130 printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
@@ -123,25 +136,27 @@ static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
123 tcenum++; 136 tcenum++;
124 rpn++; 137 rpn++;
125 } 138 }
139 return ret;
126} 140}
127 141
128static DEFINE_PER_CPU(u64 *, tce_page) = NULL; 142static DEFINE_PER_CPU(u64 *, tce_page) = NULL;
129 143
130static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, 144static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
131 long npages, unsigned long uaddr, 145 long npages, unsigned long uaddr,
132 enum dma_data_direction direction, 146 enum dma_data_direction direction,
133 struct dma_attrs *attrs) 147 struct dma_attrs *attrs)
134{ 148{
135 u64 rc; 149 u64 rc = 0;
136 u64 proto_tce; 150 u64 proto_tce;
137 u64 *tcep; 151 u64 *tcep;
138 u64 rpn; 152 u64 rpn;
139 long l, limit; 153 long l, limit;
154 long tcenum_start = tcenum, npages_start = npages;
155 int ret = 0;
140 156
141 if (npages == 1) { 157 if (npages == 1) {
142 tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, 158 return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
143 direction, attrs); 159 direction, attrs);
144 return;
145 } 160 }
146 161
147 tcep = __get_cpu_var(tce_page); 162 tcep = __get_cpu_var(tce_page);
@@ -153,9 +168,8 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
153 tcep = (u64 *)__get_free_page(GFP_ATOMIC); 168 tcep = (u64 *)__get_free_page(GFP_ATOMIC);
154 /* If allocation fails, fall back to the loop implementation */ 169 /* If allocation fails, fall back to the loop implementation */
155 if (!tcep) { 170 if (!tcep) {
156 tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, 171 return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
157 direction, attrs); 172 direction, attrs);
158 return;
159 } 173 }
160 __get_cpu_var(tce_page) = tcep; 174 __get_cpu_var(tce_page) = tcep;
161 } 175 }
@@ -187,6 +201,13 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
187 tcenum += limit; 201 tcenum += limit;
188 } while (npages > 0 && !rc); 202 } while (npages > 0 && !rc);
189 203
204 if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
205 ret = (int)rc;
206 tce_freemulti_pSeriesLP(tbl, tcenum_start,
207 (npages_start - (npages + limit)));
208 return ret;
209 }
210
190 if (rc && printk_ratelimit()) { 211 if (rc && printk_ratelimit()) {
191 printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); 212 printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
192 printk("\tindex = 0x%lx\n", (u64)tbl->it_index); 213 printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
@@ -194,6 +215,7 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
194 printk("\ttce[0] val = 0x%lx\n", tcep[0]); 215 printk("\ttce[0] val = 0x%lx\n", tcep[0]);
195 show_stack(current, (unsigned long *)__get_SP()); 216 show_stack(current, (unsigned long *)__get_SP());
196 } 217 }
218 return ret;
197} 219}
198 220
199static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) 221static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h
index d8680b589dc9..a437267c6bf8 100644
--- a/arch/powerpc/platforms/pseries/plpar_wrappers.h
+++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h
@@ -42,6 +42,16 @@ static inline long register_slb_shadow(unsigned long cpu, unsigned long vpa)
42 return vpa_call(0x3, cpu, vpa); 42 return vpa_call(0x3, cpu, vpa);
43} 43}
44 44
45static inline long plpar_page_set_loaned(unsigned long vpa)
46{
47 return plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa, 0);
48}
49
50static inline long plpar_page_set_active(unsigned long vpa)
51{
52 return plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa, 0);
53}
54
45extern void vpa_init(int cpu); 55extern void vpa_init(int cpu);
46 56
47static inline long plpar_pte_enter(unsigned long flags, 57static inline long plpar_pte_enter(unsigned long flags,
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 90beb444e1dd..063a0d2fba30 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -314,6 +314,76 @@ static int pseries_set_xdabr(unsigned long dabr)
314 H_DABRX_KERNEL | H_DABRX_USER); 314 H_DABRX_KERNEL | H_DABRX_USER);
315} 315}
316 316
317#define CMO_CHARACTERISTICS_TOKEN 44
318#define CMO_MAXLENGTH 1026
319
320/**
321 * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions,
322 * handle that here. (Stolen from parse_system_parameter_string)
323 */
324void pSeries_cmo_feature_init(void)
325{
326 char *ptr, *key, *value, *end;
327 int call_status;
328 int PrPSP = -1;
329 int SecPSP = -1;
330
331 pr_debug(" -> fw_cmo_feature_init()\n");
332 spin_lock(&rtas_data_buf_lock);
333 memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
334 call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
335 NULL,
336 CMO_CHARACTERISTICS_TOKEN,
337 __pa(rtas_data_buf),
338 RTAS_DATA_BUF_SIZE);
339
340 if (call_status != 0) {
341 spin_unlock(&rtas_data_buf_lock);
342 pr_debug("CMO not available\n");
343 pr_debug(" <- fw_cmo_feature_init()\n");
344 return;
345 }
346
347 end = rtas_data_buf + CMO_MAXLENGTH - 2;
348 ptr = rtas_data_buf + 2; /* step over strlen value */
349 key = value = ptr;
350
351 while (*ptr && (ptr <= end)) {
352 /* Separate the key and value by replacing '=' with '\0' and
353 * point the value at the string after the '='
354 */
355 if (ptr[0] == '=') {
356 ptr[0] = '\0';
357 value = ptr + 1;
358 } else if (ptr[0] == '\0' || ptr[0] == ',') {
359 /* Terminate the string containing the key/value pair */
360 ptr[0] = '\0';
361
362 if (key == value) {
363 pr_debug("Malformed key/value pair\n");
364 /* Never found a '=', end processing */
365 break;
366 }
367
368 if (0 == strcmp(key, "PrPSP"))
369 PrPSP = simple_strtol(value, NULL, 10);
370 else if (0 == strcmp(key, "SecPSP"))
371 SecPSP = simple_strtol(value, NULL, 10);
372 value = key = ptr + 1;
373 }
374 ptr++;
375 }
376
377 if (PrPSP != -1 || SecPSP != -1) {
378 pr_info("CMO enabled\n");
379 pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", PrPSP, SecPSP);
380 powerpc_firmware_features |= FW_FEATURE_CMO;
381 } else
382 pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", PrPSP, SecPSP);
383 spin_unlock(&rtas_data_buf_lock);
384 pr_debug(" <- fw_cmo_feature_init()\n");
385}
386
317/* 387/*
318 * Early initialization. Relocation is on but do not reference unbolted pages 388 * Early initialization. Relocation is on but do not reference unbolted pages
319 */ 389 */
@@ -329,6 +399,7 @@ static void __init pSeries_init_early(void)
329 else if (firmware_has_feature(FW_FEATURE_XDABR)) 399 else if (firmware_has_feature(FW_FEATURE_XDABR))
330 ppc_md.set_dabr = pseries_set_xdabr; 400 ppc_md.set_dabr = pseries_set_xdabr;
331 401
402 pSeries_cmo_feature_init();
332 iommu_init_early_pSeries(); 403 iommu_init_early_pSeries();
333 404
334 pr_debug(" <- pSeries_init_early()\n"); 405 pr_debug(" <- pSeries_init_early()\n");
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index de8c8b542cfa..89639ecbf381 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -147,7 +147,7 @@ static void dart_flush(struct iommu_table *tbl)
147 } 147 }
148} 148}
149 149
150static void dart_build(struct iommu_table *tbl, long index, 150static int dart_build(struct iommu_table *tbl, long index,
151 long npages, unsigned long uaddr, 151 long npages, unsigned long uaddr,
152 enum dma_data_direction direction, 152 enum dma_data_direction direction,
153 struct dma_attrs *attrs) 153 struct dma_attrs *attrs)
@@ -184,6 +184,7 @@ static void dart_build(struct iommu_table *tbl, long index,
184 } else { 184 } else {
185 dart_dirty = 1; 185 dart_dirty = 1;
186 } 186 }
187 return 0;
187} 188}
188 189
189 190
diff --git a/arch/powerpc/sysdev/qe_lib/Kconfig b/arch/powerpc/sysdev/qe_lib/Kconfig
index 4bb18f57901e..1ce546462be5 100644
--- a/arch/powerpc/sysdev/qe_lib/Kconfig
+++ b/arch/powerpc/sysdev/qe_lib/Kconfig
@@ -29,7 +29,7 @@ config QE_GPIO
29 bool "QE GPIO support" 29 bool "QE GPIO support"
30 depends on QUICC_ENGINE 30 depends on QUICC_ENGINE
31 select GENERIC_GPIO 31 select GENERIC_GPIO
32 select HAVE_GPIO_LIB 32 select ARCH_REQUIRE_GPIOLIB
33 help 33 help
34 Say Y here if you're going to use hardware that connects to the 34 Say Y here if you're going to use hardware that connects to the
35 QE GPIOs. 35 QE GPIOs.
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index eb530b4128ba..2ed88122be93 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -565,6 +565,7 @@ bool "s390 guest support (EXPERIMENTAL)"
565 depends on 64BIT && EXPERIMENTAL 565 depends on 64BIT && EXPERIMENTAL
566 select VIRTIO 566 select VIRTIO
567 select VIRTIO_RING 567 select VIRTIO_RING
568 select VIRTIO_CONSOLE
568 help 569 help
569 Select this option if you want to run the kernel under s390 linux 570 Select this option if you want to run the kernel under s390 linux
570endmenu 571endmenu
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 288ad490a6dd..4f82e5b5f879 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -270,7 +270,6 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
270 __ctl_store(kcb->kprobe_saved_ctl, 9, 11); 270 __ctl_store(kcb->kprobe_saved_ctl, 9, 11);
271} 271}
272 272
273/* Called with kretprobe_lock held */
274void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 273void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
275 struct pt_regs *regs) 274 struct pt_regs *regs)
276{ 275{
@@ -377,8 +376,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
377 unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; 376 unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
378 377
379 INIT_HLIST_HEAD(&empty_rp); 378 INIT_HLIST_HEAD(&empty_rp);
380 spin_lock_irqsave(&kretprobe_lock, flags); 379 kretprobe_hash_lock(current, &head, &flags);
381 head = kretprobe_inst_table_head(current);
382 380
383 /* 381 /*
384 * It is possible to have multiple instances associated with a given 382 * It is possible to have multiple instances associated with a given
@@ -417,7 +415,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
417 regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE; 415 regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE;
418 416
419 reset_current_kprobe(); 417 reset_current_kprobe();
420 spin_unlock_irqrestore(&kretprobe_lock, flags); 418 kretprobe_hash_unlock(current, &flags);
421 preempt_enable_no_resched(); 419 preempt_enable_no_resched();
422 420
423 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 421 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index b358e18273b0..62122bad1e33 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -54,6 +54,7 @@
54#include <asm/sections.h> 54#include <asm/sections.h>
55#include <asm/ebcdic.h> 55#include <asm/ebcdic.h>
56#include <asm/compat.h> 56#include <asm/compat.h>
57#include <asm/kvm_virtio.h>
57 58
58long psw_kernel_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY | 59long psw_kernel_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY |
59 PSW_MASK_MCHECK | PSW_DEFAULT_KEY); 60 PSW_MASK_MCHECK | PSW_DEFAULT_KEY);
@@ -766,7 +767,8 @@ setup_arch(char **cmdline_p)
766 printk("We are running under VM (64 bit mode)\n"); 767 printk("We are running under VM (64 bit mode)\n");
767 else if (MACHINE_IS_KVM) { 768 else if (MACHINE_IS_KVM) {
768 printk("We are running under KVM (64 bit mode)\n"); 769 printk("We are running under KVM (64 bit mode)\n");
769 add_preferred_console("ttyS", 1, NULL); 770 add_preferred_console("hvc", 0, NULL);
771 s390_virtio_console_init();
770 } else 772 } else
771 printk("We are running native (64 bit mode)\n"); 773 printk("We are running native (64 bit mode)\n");
772#endif /* CONFIG_64BIT */ 774#endif /* CONFIG_64BIT */
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 212d618b0095..632b13e10053 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -9,7 +9,6 @@
9#include <linux/device.h> 9#include <linux/device.h>
10#include <linux/bootmem.h> 10#include <linux/bootmem.h>
11#include <linux/sched.h> 11#include <linux/sched.h>
12#include <linux/kthread.h>
13#include <linux/workqueue.h> 12#include <linux/workqueue.h>
14#include <linux/cpu.h> 13#include <linux/cpu.h>
15#include <linux/smp.h> 14#include <linux/smp.h>
@@ -230,20 +229,9 @@ void arch_update_cpu_topology(void)
230 } 229 }
231} 230}
232 231
233static int topology_kthread(void *data)
234{
235 arch_reinit_sched_domains();
236 return 0;
237}
238
239static void topology_work_fn(struct work_struct *work) 232static void topology_work_fn(struct work_struct *work)
240{ 233{
241 /* We can't call arch_reinit_sched_domains() from a multi-threaded 234 arch_reinit_sched_domains();
242 * workqueue context since it may deadlock in case of cpu hotplug.
243 * So we have to create a kernel thread in order to call
244 * arch_reinit_sched_domains().
245 */
246 kthread_run(topology_kthread, NULL, "topology_update");
247} 235}
248 236
249void topology_schedule_update(void) 237void topology_schedule_update(void)
diff --git a/arch/sh/boot/compressed/misc_32.c b/arch/sh/boot/compressed/misc_32.c
index adcea31e663e..f386997e4d9c 100644
--- a/arch/sh/boot/compressed/misc_32.c
+++ b/arch/sh/boot/compressed/misc_32.c
@@ -74,8 +74,6 @@ static unsigned outcnt = 0; /* bytes in output buffer */
74static int fill_inbuf(void); 74static int fill_inbuf(void);
75static void flush_window(void); 75static void flush_window(void);
76static void error(char *m); 76static void error(char *m);
77static void gzip_mark(void **);
78static void gzip_release(void **);
79 77
80extern char input_data[]; 78extern char input_data[];
81extern int input_len; 79extern int input_len;
@@ -84,11 +82,7 @@ static long bytes_out = 0;
84static uch *output_data; 82static uch *output_data;
85static unsigned long output_ptr = 0; 83static unsigned long output_ptr = 0;
86 84
87static void *malloc(int size);
88static void free(void *where);
89static void error(char *m); 85static void error(char *m);
90static void gzip_mark(void **);
91static void gzip_release(void **);
92 86
93int puts(const char *); 87int puts(const char *);
94 88
@@ -101,38 +95,6 @@ static unsigned long free_mem_end_ptr;
101 95
102#include "../../../../lib/inflate.c" 96#include "../../../../lib/inflate.c"
103 97
104static void *malloc(int size)
105{
106 void *p;
107
108 if (size <0) error("Malloc error");
109 if (free_mem_ptr == 0) error("Memory error");
110
111 free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
112
113 p = (void *)free_mem_ptr;
114 free_mem_ptr += size;
115
116 if (free_mem_ptr >= free_mem_end_ptr)
117 error("Out of memory");
118
119 return p;
120}
121
122static void free(void *where)
123{ /* Don't care */
124}
125
126static void gzip_mark(void **ptr)
127{
128 *ptr = (void *) free_mem_ptr;
129}
130
131static void gzip_release(void **ptr)
132{
133 free_mem_ptr = (long) *ptr;
134}
135
136#ifdef CONFIG_SH_STANDARD_BIOS 98#ifdef CONFIG_SH_STANDARD_BIOS
137size_t strlen(const char *s) 99size_t strlen(const char *s)
138{ 100{
diff --git a/arch/sh/boot/compressed/misc_64.c b/arch/sh/boot/compressed/misc_64.c
index a006ef89b9dd..2941657e18aa 100644
--- a/arch/sh/boot/compressed/misc_64.c
+++ b/arch/sh/boot/compressed/misc_64.c
@@ -72,8 +72,6 @@ static unsigned outcnt = 0; /* bytes in output buffer */
72static int fill_inbuf(void); 72static int fill_inbuf(void);
73static void flush_window(void); 73static void flush_window(void);
74static void error(char *m); 74static void error(char *m);
75static void gzip_mark(void **);
76static void gzip_release(void **);
77 75
78extern char input_data[]; 76extern char input_data[];
79extern int input_len; 77extern int input_len;
@@ -82,11 +80,7 @@ static long bytes_out = 0;
82static uch *output_data; 80static uch *output_data;
83static unsigned long output_ptr = 0; 81static unsigned long output_ptr = 0;
84 82
85static void *malloc(int size);
86static void free(void *where);
87static void error(char *m); 83static void error(char *m);
88static void gzip_mark(void **);
89static void gzip_release(void **);
90 84
91static void puts(const char *); 85static void puts(const char *);
92 86
@@ -99,40 +93,6 @@ static unsigned long free_mem_end_ptr;
99 93
100#include "../../../../lib/inflate.c" 94#include "../../../../lib/inflate.c"
101 95
102static void *malloc(int size)
103{
104 void *p;
105
106 if (size < 0)
107 error("Malloc error\n");
108 if (free_mem_ptr == 0)
109 error("Memory error\n");
110
111 free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
112
113 p = (void *) free_mem_ptr;
114 free_mem_ptr += size;
115
116 if (free_mem_ptr >= free_mem_end_ptr)
117 error("\nOut of memory\n");
118
119 return p;
120}
121
122static void free(void *where)
123{ /* Don't care */
124}
125
126static void gzip_mark(void **ptr)
127{
128 *ptr = (void *) free_mem_ptr;
129}
130
131static void gzip_release(void **ptr)
132{
133 free_mem_ptr = (long) *ptr;
134}
135
136void puts(const char *s) 96void puts(const char *s)
137{ 97{
138} 98}
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 789724e61e83..375de7c6d082 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -298,20 +298,6 @@ config UNIX98_PTYS
298 Read the instructions in <file:Documentation/Changes> pertaining to 298 Read the instructions in <file:Documentation/Changes> pertaining to
299 pseudo terminals. It's safe to say N. 299 pseudo terminals. It's safe to say N.
300 300
301config UNIX98_PTY_COUNT
302 int "Maximum number of Unix98 PTYs in use (0-2048)"
303 depends on UNIX98_PTYS
304 default "256"
305 help
306 The maximum number of Unix98 PTYs that can be used at any one time.
307 The default is 256, and should be enough for desktop systems. Server
308 machines which support incoming telnet/rlogin/ssh connections and/or
309 serve several X terminals may want to increase this: every incoming
310 connection and every xterm uses up one PTY.
311
312 When not in use, each additional set of 256 PTYs occupy
313 approximately 8 KB of kernel memory on 32-bit architectures.
314
315endmenu 301endmenu
316 302
317source "fs/Kconfig" 303source "fs/Kconfig"
diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c
index f43b5d755354..201a6e547e4a 100644
--- a/arch/sparc64/kernel/kprobes.c
+++ b/arch/sparc64/kernel/kprobes.c
@@ -478,9 +478,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
478 return 0; 478 return 0;
479} 479}
480 480
481/* Called with kretprobe_lock held. The value stored in the return 481/* The value stored in the return address register is actually 2
482 * address register is actually 2 instructions before where the 482 * instructions before where the callee will return to.
483 * callee will return to. Sequences usually look something like this 483 * Sequences usually look something like this
484 * 484 *
485 * call some_function <--- return register points here 485 * call some_function <--- return register points here
486 * nop <--- call delay slot 486 * nop <--- call delay slot
@@ -512,8 +512,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
512 unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; 512 unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
513 513
514 INIT_HLIST_HEAD(&empty_rp); 514 INIT_HLIST_HEAD(&empty_rp);
515 spin_lock_irqsave(&kretprobe_lock, flags); 515 kretprobe_hash_lock(current, &head, &flags);
516 head = kretprobe_inst_table_head(current);
517 516
518 /* 517 /*
519 * It is possible to have multiple instances associated with a given 518 * It is possible to have multiple instances associated with a given
@@ -553,7 +552,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
553 regs->tnpc = orig_ret_address + 4; 552 regs->tnpc = orig_ret_address + 4;
554 553
555 reset_current_kprobe(); 554 reset_current_kprobe();
556 spin_unlock_irqrestore(&kretprobe_lock, flags); 555 kretprobe_hash_unlock(current, &flags);
557 preempt_enable_no_resched(); 556 preempt_enable_no_resched();
558 557
559 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 558 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b2ddfcf01728..e3cba0b45600 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -23,11 +23,13 @@ config X86
23 select HAVE_OPROFILE 23 select HAVE_OPROFILE
24 select HAVE_IOREMAP_PROT 24 select HAVE_IOREMAP_PROT
25 select HAVE_KPROBES 25 select HAVE_KPROBES
26 select ARCH_WANT_OPTIONAL_GPIOLIB if !X86_RDC321X
26 select HAVE_KRETPROBES 27 select HAVE_KRETPROBES
27 select HAVE_DYNAMIC_FTRACE 28 select HAVE_DYNAMIC_FTRACE
28 select HAVE_FTRACE 29 select HAVE_FTRACE
29 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) 30 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
30 select HAVE_ARCH_KGDB if !X86_VOYAGER 31 select HAVE_ARCH_KGDB if !X86_VOYAGER
32 select HAVE_EFFICIENT_UNALIGNED_ACCESS
31 33
32config ARCH_DEFCONFIG 34config ARCH_DEFCONFIG
33 string 35 string
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index bc5553b496f7..9fea73706479 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -182,8 +182,6 @@ static unsigned outcnt;
182static int fill_inbuf(void); 182static int fill_inbuf(void);
183static void flush_window(void); 183static void flush_window(void);
184static void error(char *m); 184static void error(char *m);
185static void gzip_mark(void **);
186static void gzip_release(void **);
187 185
188/* 186/*
189 * This is set up by the setup-routine at boot-time 187 * This is set up by the setup-routine at boot-time
@@ -196,9 +194,6 @@ extern int input_len;
196 194
197static long bytes_out; 195static long bytes_out;
198 196
199static void *malloc(int size);
200static void free(void *where);
201
202static void *memset(void *s, int c, unsigned n); 197static void *memset(void *s, int c, unsigned n);
203static void *memcpy(void *dest, const void *src, unsigned n); 198static void *memcpy(void *dest, const void *src, unsigned n);
204 199
@@ -220,40 +215,6 @@ static int lines, cols;
220 215
221#include "../../../../lib/inflate.c" 216#include "../../../../lib/inflate.c"
222 217
223static void *malloc(int size)
224{
225 void *p;
226
227 if (size < 0)
228 error("Malloc error");
229 if (free_mem_ptr <= 0)
230 error("Memory error");
231
232 free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
233
234 p = (void *)free_mem_ptr;
235 free_mem_ptr += size;
236
237 if (free_mem_ptr >= free_mem_end_ptr)
238 error("Out of memory");
239
240 return p;
241}
242
243static void free(void *where)
244{ /* Don't care */
245}
246
247static void gzip_mark(void **ptr)
248{
249 *ptr = (void *) free_mem_ptr;
250}
251
252static void gzip_release(void **ptr)
253{
254 free_mem_ptr = (memptr) *ptr;
255}
256
257static void scroll(void) 218static void scroll(void)
258{ 219{
259 int i; 220 int i;
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 43c019f85f0d..6c27679ec6aa 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -431,7 +431,6 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
431 regs->ip = (unsigned long)p->ainsn.insn; 431 regs->ip = (unsigned long)p->ainsn.insn;
432} 432}
433 433
434/* Called with kretprobe_lock held */
435void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 434void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
436 struct pt_regs *regs) 435 struct pt_regs *regs)
437{ 436{
@@ -682,8 +681,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
682 unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; 681 unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
683 682
684 INIT_HLIST_HEAD(&empty_rp); 683 INIT_HLIST_HEAD(&empty_rp);
685 spin_lock_irqsave(&kretprobe_lock, flags); 684 kretprobe_hash_lock(current, &head, &flags);
686 head = kretprobe_inst_table_head(current);
687 /* fixup registers */ 685 /* fixup registers */
688#ifdef CONFIG_X86_64 686#ifdef CONFIG_X86_64
689 regs->cs = __KERNEL_CS; 687 regs->cs = __KERNEL_CS;
@@ -732,7 +730,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
732 730
733 kretprobe_assert(ri, orig_ret_address, trampoline_address); 731 kretprobe_assert(ri, orig_ret_address, trampoline_address);
734 732
735 spin_unlock_irqrestore(&kretprobe_lock, flags); 733 kretprobe_hash_unlock(current, &flags);
736 734
737 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 735 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
738 hlist_del(&ri->hlist); 736 hlist_del(&ri->hlist);
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 151f2d171f7c..19e7fc7c2c4f 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -29,6 +29,7 @@
29#include <linux/mm.h> 29#include <linux/mm.h>
30#include <linux/spinlock.h> 30#include <linux/spinlock.h>
31#include <linux/string.h> 31#include <linux/string.h>
32#include <linux/crash_dump.h>
32#include <linux/dma-mapping.h> 33#include <linux/dma-mapping.h>
33#include <linux/bitops.h> 34#include <linux/bitops.h>
34#include <linux/pci_ids.h> 35#include <linux/pci_ids.h>
@@ -167,6 +168,8 @@ static void calgary_dump_error_regs(struct iommu_table *tbl);
167static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev); 168static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev);
168static void calioc2_tce_cache_blast(struct iommu_table *tbl); 169static void calioc2_tce_cache_blast(struct iommu_table *tbl);
169static void calioc2_dump_error_regs(struct iommu_table *tbl); 170static void calioc2_dump_error_regs(struct iommu_table *tbl);
171static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl);
172static void get_tce_space_from_tar(void);
170 173
171static struct cal_chipset_ops calgary_chip_ops = { 174static struct cal_chipset_ops calgary_chip_ops = {
172 .handle_quirks = calgary_handle_quirks, 175 .handle_quirks = calgary_handle_quirks,
@@ -830,7 +833,11 @@ static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar)
830 833
831 tbl = pci_iommu(dev->bus); 834 tbl = pci_iommu(dev->bus);
832 tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space; 835 tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space;
833 tce_free(tbl, 0, tbl->it_size); 836
837 if (is_kdump_kernel())
838 calgary_init_bitmap_from_tce_table(tbl);
839 else
840 tce_free(tbl, 0, tbl->it_size);
834 841
835 if (is_calgary(dev->device)) 842 if (is_calgary(dev->device))
836 tbl->chip_ops = &calgary_chip_ops; 843 tbl->chip_ops = &calgary_chip_ops;
@@ -1209,6 +1216,10 @@ static int __init calgary_init(void)
1209 if (ret) 1216 if (ret)
1210 return ret; 1217 return ret;
1211 1218
1219 /* Purely for kdump kernel case */
1220 if (is_kdump_kernel())
1221 get_tce_space_from_tar();
1222
1212 do { 1223 do {
1213 dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev); 1224 dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev);
1214 if (!dev) 1225 if (!dev)
@@ -1339,6 +1350,61 @@ static int __init calgary_bus_has_devices(int bus, unsigned short pci_dev)
1339 return (val != 0xffffffff); 1350 return (val != 0xffffffff);
1340} 1351}
1341 1352
1353/*
1354 * calgary_init_bitmap_from_tce_table():
1355 * Funtion for kdump case. In the second/kdump kernel initialize
1356 * the bitmap based on the tce table entries obtained from first kernel
1357 */
1358static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl)
1359{
1360 u64 *tp;
1361 unsigned int index;
1362 tp = ((u64 *)tbl->it_base);
1363 for (index = 0 ; index < tbl->it_size; index++) {
1364 if (*tp != 0x0)
1365 set_bit(index, tbl->it_map);
1366 tp++;
1367 }
1368}
1369
1370/*
1371 * get_tce_space_from_tar():
1372 * Function for kdump case. Get the tce tables from first kernel
1373 * by reading the contents of the base adress register of calgary iommu
1374 */
1375static void get_tce_space_from_tar()
1376{
1377 int bus;
1378 void __iomem *target;
1379 unsigned long tce_space;
1380
1381 for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
1382 struct calgary_bus_info *info = &bus_info[bus];
1383 unsigned short pci_device;
1384 u32 val;
1385
1386 val = read_pci_config(bus, 0, 0, 0);
1387 pci_device = (val & 0xFFFF0000) >> 16;
1388
1389 if (!is_cal_pci_dev(pci_device))
1390 continue;
1391 if (info->translation_disabled)
1392 continue;
1393
1394 if (calgary_bus_has_devices(bus, pci_device) ||
1395 translate_empty_slots) {
1396 target = calgary_reg(bus_info[bus].bbar,
1397 tar_offset(bus));
1398 tce_space = be64_to_cpu(readq(target));
1399 tce_space = tce_space & TAR_SW_BITS;
1400
1401 tce_space = tce_space & (~specified_table_size);
1402 info->tce_space = (u64 *)__va(tce_space);
1403 }
1404 }
1405 return;
1406}
1407
1342void __init detect_calgary(void) 1408void __init detect_calgary(void)
1343{ 1409{
1344 int bus; 1410 int bus;
@@ -1394,7 +1460,8 @@ void __init detect_calgary(void)
1394 return; 1460 return;
1395 } 1461 }
1396 1462
1397 specified_table_size = determine_tce_table_size(max_pfn * PAGE_SIZE); 1463 specified_table_size = determine_tce_table_size((is_kdump_kernel() ?
1464 saved_max_pfn : max_pfn) * PAGE_SIZE);
1398 1465
1399 for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { 1466 for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
1400 struct calgary_bus_info *info = &bus_info[bus]; 1467 struct calgary_bus_info *info = &bus_info[bus];
@@ -1412,10 +1479,16 @@ void __init detect_calgary(void)
1412 1479
1413 if (calgary_bus_has_devices(bus, pci_device) || 1480 if (calgary_bus_has_devices(bus, pci_device) ||
1414 translate_empty_slots) { 1481 translate_empty_slots) {
1415 tbl = alloc_tce_table(); 1482 /*
1416 if (!tbl) 1483 * If it is kdump kernel, find and use tce tables
1417 goto cleanup; 1484 * from first kernel, else allocate tce tables here
1418 info->tce_space = tbl; 1485 */
1486 if (!is_kdump_kernel()) {
1487 tbl = alloc_tce_table();
1488 if (!tbl)
1489 goto cleanup;
1490 info->tce_space = tbl;
1491 }
1419 calgary_found = 1; 1492 calgary_found = 1;
1420 } 1493 }
1421 } 1494 }
diff --git a/block/ioctl.c b/block/ioctl.c
index 52d6385216ad..77185e5c026a 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -17,6 +17,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
17 long long start, length; 17 long long start, length;
18 int part; 18 int part;
19 int i; 19 int i;
20 int err;
20 21
21 if (!capable(CAP_SYS_ADMIN)) 22 if (!capable(CAP_SYS_ADMIN))
22 return -EACCES; 23 return -EACCES;
@@ -61,9 +62,9 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
61 } 62 }
62 } 63 }
63 /* all seems OK */ 64 /* all seems OK */
64 add_partition(disk, part, start, length, ADDPART_FLAG_NONE); 65 err = add_partition(disk, part, start, length, ADDPART_FLAG_NONE);
65 mutex_unlock(&bdev->bd_mutex); 66 mutex_unlock(&bdev->bd_mutex);
66 return 0; 67 return err;
67 case BLKPG_DEL_PARTITION: 68 case BLKPG_DEL_PARTITION:
68 if (!disk->part[part-1]) 69 if (!disk->part[part-1])
69 return -ENXIO; 70 return -ENXIO;
diff --git a/drivers/Makefile b/drivers/Makefile
index 808e0ae66aa8..54ec5e718c0e 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -5,7 +5,7 @@
5# Rewritten to use lists instead of if-statements. 5# Rewritten to use lists instead of if-statements.
6# 6#
7 7
8obj-$(CONFIG_HAVE_GPIO_LIB) += gpio/ 8obj-y += gpio/
9obj-$(CONFIG_PCI) += pci/ 9obj-$(CONFIG_PCI) += pci/
10obj-$(CONFIG_PARISC) += parisc/ 10obj-$(CONFIG_PARISC) += parisc/
11obj-$(CONFIG_RAPIDIO) += rapidio/ 11obj-$(CONFIG_RAPIDIO) += rapidio/
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index dc7596f028b6..ef3e5522e1a4 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1273,7 +1273,7 @@ static ssize_t ahci_transmit_led_message(struct ata_port *ap, u32 state,
1273 void __iomem *mmio = ap->host->iomap[AHCI_PCI_BAR]; 1273 void __iomem *mmio = ap->host->iomap[AHCI_PCI_BAR];
1274 u32 em_ctl; 1274 u32 em_ctl;
1275 u32 message[] = {0, 0}; 1275 u32 message[] = {0, 0};
1276 unsigned int flags; 1276 unsigned long flags;
1277 int pmp; 1277 int pmp;
1278 struct ahci_em_priv *emp; 1278 struct ahci_em_priv *emp;
1279 1279
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index b0be1d18fee2..c9c92b00fd55 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -184,7 +184,7 @@ firmware_data_read(struct kobject *kobj, struct bin_attribute *bin_attr,
184 struct device *dev = to_dev(kobj); 184 struct device *dev = to_dev(kobj);
185 struct firmware_priv *fw_priv = dev_get_drvdata(dev); 185 struct firmware_priv *fw_priv = dev_get_drvdata(dev);
186 struct firmware *fw; 186 struct firmware *fw;
187 ssize_t ret_count = count; 187 ssize_t ret_count;
188 188
189 mutex_lock(&fw_lock); 189 mutex_lock(&fw_lock);
190 fw = fw_priv->fw; 190 fw = fw_priv->fw;
@@ -192,14 +192,8 @@ firmware_data_read(struct kobject *kobj, struct bin_attribute *bin_attr,
192 ret_count = -ENODEV; 192 ret_count = -ENODEV;
193 goto out; 193 goto out;
194 } 194 }
195 if (offset > fw->size) { 195 ret_count = memory_read_from_buffer(buffer, count, &offset,
196 ret_count = 0; 196 fw->data, fw->size);
197 goto out;
198 }
199 if (offset + ret_count > fw->size)
200 ret_count = fw->size - offset;
201
202 memcpy(buffer, fw->data + offset, ret_count);
203out: 197out:
204 mutex_unlock(&fw_lock); 198 mutex_unlock(&fw_lock);
205 return ret_count; 199 return ret_count;
diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c
index c04440cd6a32..181ebb85f0be 100644
--- a/drivers/block/aoe/aoechr.c
+++ b/drivers/block/aoe/aoechr.c
@@ -6,6 +6,7 @@
6 6
7#include <linux/hdreg.h> 7#include <linux/hdreg.h>
8#include <linux/blkdev.h> 8#include <linux/blkdev.h>
9#include <linux/completion.h>
9#include <linux/delay.h> 10#include <linux/delay.h>
10#include <linux/smp_lock.h> 11#include <linux/smp_lock.h>
11#include "aoe.h" 12#include "aoe.h"
@@ -36,7 +37,7 @@ struct ErrMsg {
36 37
37static struct ErrMsg emsgs[NMSG]; 38static struct ErrMsg emsgs[NMSG];
38static int emsgs_head_idx, emsgs_tail_idx; 39static int emsgs_head_idx, emsgs_tail_idx;
39static struct semaphore emsgs_sema; 40static struct completion emsgs_comp;
40static spinlock_t emsgs_lock; 41static spinlock_t emsgs_lock;
41static int nblocked_emsgs_readers; 42static int nblocked_emsgs_readers;
42static struct class *aoe_class; 43static struct class *aoe_class;
@@ -141,7 +142,7 @@ bail: spin_unlock_irqrestore(&emsgs_lock, flags);
141 spin_unlock_irqrestore(&emsgs_lock, flags); 142 spin_unlock_irqrestore(&emsgs_lock, flags);
142 143
143 if (nblocked_emsgs_readers) 144 if (nblocked_emsgs_readers)
144 up(&emsgs_sema); 145 complete(&emsgs_comp);
145} 146}
146 147
147static ssize_t 148static ssize_t
@@ -221,7 +222,7 @@ aoechr_read(struct file *filp, char __user *buf, size_t cnt, loff_t *off)
221 222
222 spin_unlock_irqrestore(&emsgs_lock, flags); 223 spin_unlock_irqrestore(&emsgs_lock, flags);
223 224
224 n = down_interruptible(&emsgs_sema); 225 n = wait_for_completion_interruptible(&emsgs_comp);
225 226
226 spin_lock_irqsave(&emsgs_lock, flags); 227 spin_lock_irqsave(&emsgs_lock, flags);
227 228
@@ -269,7 +270,7 @@ aoechr_init(void)
269 printk(KERN_ERR "aoe: can't register char device\n"); 270 printk(KERN_ERR "aoe: can't register char device\n");
270 return n; 271 return n;
271 } 272 }
272 sema_init(&emsgs_sema, 0); 273 init_completion(&emsgs_comp);
273 spin_lock_init(&emsgs_lock); 274 spin_lock_init(&emsgs_lock);
274 aoe_class = class_create(THIS_MODULE, "aoe"); 275 aoe_class = class_create(THIS_MODULE, "aoe");
275 if (IS_ERR(aoe_class)) { 276 if (IS_ERR(aoe_class)) {
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index dd7ea203f940..42251095134f 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -196,6 +196,7 @@ static int virtblk_probe(struct virtio_device *vdev)
196 int err; 196 int err;
197 u64 cap; 197 u64 cap;
198 u32 v; 198 u32 v;
199 u32 blk_size;
199 200
200 if (index_to_minor(index) >= 1 << MINORBITS) 201 if (index_to_minor(index) >= 1 << MINORBITS)
201 return -ENOSPC; 202 return -ENOSPC;
@@ -290,6 +291,13 @@ static int virtblk_probe(struct virtio_device *vdev)
290 if (!err) 291 if (!err)
291 blk_queue_max_hw_segments(vblk->disk->queue, v); 292 blk_queue_max_hw_segments(vblk->disk->queue, v);
292 293
294 /* Host can optionally specify the block size of the device */
295 err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE,
296 offsetof(struct virtio_blk_config, blk_size),
297 &blk_size);
298 if (!err)
299 blk_queue_hardsect_size(vblk->disk->queue, blk_size);
300
293 add_disk(vblk->disk); 301 add_disk(vblk->disk);
294 return 0; 302 return 0;
295 303
@@ -330,7 +338,7 @@ static struct virtio_device_id id_table[] = {
330 338
331static unsigned int features[] = { 339static unsigned int features[] = {
332 VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, 340 VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
333 VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, 341 VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
334}; 342};
335 343
336static struct virtio_driver virtio_blk = { 344static struct virtio_driver virtio_blk = {
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 67b07576f8bf..6c070dc5f2d4 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -578,11 +578,14 @@ config HVC_DRIVER
578 It will automatically be selected if one of the back-end console drivers 578 It will automatically be selected if one of the back-end console drivers
579 is selected. 579 is selected.
580 580
581config HVC_IRQ
582 bool
581 583
582config HVC_CONSOLE 584config HVC_CONSOLE
583 bool "pSeries Hypervisor Virtual Console support" 585 bool "pSeries Hypervisor Virtual Console support"
584 depends on PPC_PSERIES 586 depends on PPC_PSERIES
585 select HVC_DRIVER 587 select HVC_DRIVER
588 select HVC_IRQ
586 help 589 help
587 pSeries machines when partitioned support a hypervisor virtual 590 pSeries machines when partitioned support a hypervisor virtual
588 console. This driver allows each pSeries partition to have a console 591 console. This driver allows each pSeries partition to have a console
@@ -593,6 +596,7 @@ config HVC_ISERIES
593 depends on PPC_ISERIES 596 depends on PPC_ISERIES
594 default y 597 default y
595 select HVC_DRIVER 598 select HVC_DRIVER
599 select HVC_IRQ
596 help 600 help
597 iSeries machines support a hypervisor virtual console. 601 iSeries machines support a hypervisor virtual console.
598 602
@@ -614,13 +618,18 @@ config HVC_XEN
614 bool "Xen Hypervisor Console support" 618 bool "Xen Hypervisor Console support"
615 depends on XEN 619 depends on XEN
616 select HVC_DRIVER 620 select HVC_DRIVER
621 select HVC_IRQ
617 default y 622 default y
618 help 623 help
619 Xen virtual console device driver 624 Xen virtual console device driver
620 625
621config VIRTIO_CONSOLE 626config VIRTIO_CONSOLE
622 bool 627 tristate "Virtio console"
628 depends on VIRTIO
623 select HVC_DRIVER 629 select HVC_DRIVER
630 help
631 Virtio console for use with lguest and other hypervisors.
632
624 633
625config HVCS 634config HVCS
626 tristate "IBM Hypervisor Virtual Console Server support" 635 tristate "IBM Hypervisor Virtual Console Server support"
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 4b6e736cfa02..f7a0d1a754fc 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_HVC_ISERIES) += hvc_iseries.o
48obj-$(CONFIG_HVC_RTAS) += hvc_rtas.o 48obj-$(CONFIG_HVC_RTAS) += hvc_rtas.o
49obj-$(CONFIG_HVC_BEAT) += hvc_beat.o 49obj-$(CONFIG_HVC_BEAT) += hvc_beat.o
50obj-$(CONFIG_HVC_DRIVER) += hvc_console.o 50obj-$(CONFIG_HVC_DRIVER) += hvc_console.o
51obj-$(CONFIG_HVC_IRQ) += hvc_irq.o
51obj-$(CONFIG_HVC_XEN) += hvc_xen.o 52obj-$(CONFIG_HVC_XEN) += hvc_xen.o
52obj-$(CONFIG_VIRTIO_CONSOLE) += virtio_console.o 53obj-$(CONFIG_VIRTIO_CONSOLE) += virtio_console.o
53obj-$(CONFIG_RAW_DRIVER) += raw.o 54obj-$(CONFIG_RAW_DRIVER) += raw.o
@@ -63,7 +64,6 @@ obj-$(CONFIG_BRIQ_PANEL) += briq_panel.o
63obj-$(CONFIG_BFIN_OTP) += bfin-otp.o 64obj-$(CONFIG_BFIN_OTP) += bfin-otp.o
64 65
65obj-$(CONFIG_PRINTER) += lp.o 66obj-$(CONFIG_PRINTER) += lp.o
66obj-$(CONFIG_TIPAR) += tipar.o
67 67
68obj-$(CONFIG_APM_EMULATION) += apm-emulation.o 68obj-$(CONFIG_APM_EMULATION) += apm-emulation.o
69 69
diff --git a/drivers/char/ds1302.c b/drivers/char/ds1302.c
index fada6ddefbae..c5e67a623951 100644
--- a/drivers/char/ds1302.c
+++ b/drivers/char/ds1302.c
@@ -20,10 +20,11 @@
20#include <linux/miscdevice.h> 20#include <linux/miscdevice.h>
21#include <linux/delay.h> 21#include <linux/delay.h>
22#include <linux/bcd.h> 22#include <linux/bcd.h>
23#include <linux/smp_lock.h>
24#include <linux/uaccess.h>
25#include <linux/io.h>
23 26
24#include <asm/uaccess.h>
25#include <asm/system.h> 27#include <asm/system.h>
26#include <asm/io.h>
27#include <asm/rtc.h> 28#include <asm/rtc.h>
28#if defined(CONFIG_M32R) 29#if defined(CONFIG_M32R)
29#include <asm/m32r.h> 30#include <asm/m32r.h>
@@ -153,9 +154,7 @@ static unsigned char days_in_mo[] =
153 154
154/* ioctl that supports RTC_RD_TIME and RTC_SET_TIME (read and set time/date). */ 155/* ioctl that supports RTC_RD_TIME and RTC_SET_TIME (read and set time/date). */
155 156
156static int 157static long rtc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
157rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
158 unsigned long arg)
159{ 158{
160 unsigned long flags; 159 unsigned long flags;
161 160
@@ -165,7 +164,9 @@ rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
165 struct rtc_time rtc_tm; 164 struct rtc_time rtc_tm;
166 165
167 memset(&rtc_tm, 0, sizeof (struct rtc_time)); 166 memset(&rtc_tm, 0, sizeof (struct rtc_time));
167 lock_kernel();
168 get_rtc_time(&rtc_tm); 168 get_rtc_time(&rtc_tm);
169 unlock_kernel();
169 if (copy_to_user((struct rtc_time*)arg, &rtc_tm, sizeof(struct rtc_time))) 170 if (copy_to_user((struct rtc_time*)arg, &rtc_tm, sizeof(struct rtc_time)))
170 return -EFAULT; 171 return -EFAULT;
171 return 0; 172 return 0;
@@ -217,6 +218,7 @@ rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
217 BIN_TO_BCD(mon); 218 BIN_TO_BCD(mon);
218 BIN_TO_BCD(yrs); 219 BIN_TO_BCD(yrs);
219 220
221 lock_kernel();
220 local_irq_save(flags); 222 local_irq_save(flags);
221 CMOS_WRITE(yrs, RTC_YEAR); 223 CMOS_WRITE(yrs, RTC_YEAR);
222 CMOS_WRITE(mon, RTC_MONTH); 224 CMOS_WRITE(mon, RTC_MONTH);
@@ -225,6 +227,7 @@ rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
225 CMOS_WRITE(min, RTC_MINUTES); 227 CMOS_WRITE(min, RTC_MINUTES);
226 CMOS_WRITE(sec, RTC_SECONDS); 228 CMOS_WRITE(sec, RTC_SECONDS);
227 local_irq_restore(flags); 229 local_irq_restore(flags);
230 unlock_kernel();
228 231
229 /* Notice that at this point, the RTC is updated but 232 /* Notice that at this point, the RTC is updated but
230 * the kernel is still running with the old time. 233 * the kernel is still running with the old time.
@@ -244,8 +247,10 @@ rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
244 if(copy_from_user(&tcs_val, (int*)arg, sizeof(int))) 247 if(copy_from_user(&tcs_val, (int*)arg, sizeof(int)))
245 return -EFAULT; 248 return -EFAULT;
246 249
250 lock_kernel();
247 tcs_val = RTC_TCR_PATTERN | (tcs_val & 0x0F); 251 tcs_val = RTC_TCR_PATTERN | (tcs_val & 0x0F);
248 ds1302_writereg(RTC_TRICKLECHARGER, tcs_val); 252 ds1302_writereg(RTC_TRICKLECHARGER, tcs_val);
253 unlock_kernel();
249 return 0; 254 return 0;
250 } 255 }
251 default: 256 default:
@@ -282,7 +287,7 @@ get_rtc_status(char *buf)
282 287
283static const struct file_operations rtc_fops = { 288static const struct file_operations rtc_fops = {
284 .owner = THIS_MODULE, 289 .owner = THIS_MODULE,
285 .ioctl = rtc_ioctl, 290 .unlocked_ioctl = rtc_ioctl,
286}; 291};
287 292
288/* Probe for the chip by writing something to its RAM and try reading it back. */ 293/* Probe for the chip by writing something to its RAM and try reading it back. */
diff --git a/drivers/char/dsp56k.c b/drivers/char/dsp56k.c
index 33c466a4888f..19b88504e960 100644
--- a/drivers/char/dsp56k.c
+++ b/drivers/char/dsp56k.c
@@ -36,10 +36,10 @@
36#include <linux/smp_lock.h> 36#include <linux/smp_lock.h>
37#include <linux/firmware.h> 37#include <linux/firmware.h>
38#include <linux/platform_device.h> 38#include <linux/platform_device.h>
39#include <linux/uaccess.h> /* For put_user and get_user */
39 40
40#include <asm/atarihw.h> 41#include <asm/atarihw.h>
41#include <asm/traps.h> 42#include <asm/traps.h>
42#include <asm/uaccess.h> /* For put_user and get_user */
43 43
44#include <asm/dsp56k.h> 44#include <asm/dsp56k.h>
45 45
@@ -303,8 +303,8 @@ static ssize_t dsp56k_write(struct file *file, const char __user *buf, size_t co
303 } 303 }
304} 304}
305 305
306static int dsp56k_ioctl(struct inode *inode, struct file *file, 306static long dsp56k_ioctl(struct file *file, unsigned int cmd,
307 unsigned int cmd, unsigned long arg) 307 unsigned long arg)
308{ 308{
309 int dev = iminor(inode) & 0x0f; 309 int dev = iminor(inode) & 0x0f;
310 void __user *argp = (void __user *)arg; 310 void __user *argp = (void __user *)arg;
@@ -331,8 +331,9 @@ static int dsp56k_ioctl(struct inode *inode, struct file *file,
331 if (len > DSP56K_MAX_BINARY_LENGTH) { 331 if (len > DSP56K_MAX_BINARY_LENGTH) {
332 return -EINVAL; 332 return -EINVAL;
333 } 333 }
334 334 lock_kernel();
335 r = dsp56k_upload(bin, len); 335 r = dsp56k_upload(bin, len);
336 unlock_kernel();
336 if (r < 0) { 337 if (r < 0) {
337 return r; 338 return r;
338 } 339 }
@@ -342,12 +343,16 @@ static int dsp56k_ioctl(struct inode *inode, struct file *file,
342 case DSP56K_SET_TX_WSIZE: 343 case DSP56K_SET_TX_WSIZE:
343 if (arg > 4 || arg < 1) 344 if (arg > 4 || arg < 1)
344 return -EINVAL; 345 return -EINVAL;
346 lock_kernel();
345 dsp56k.tx_wsize = (int) arg; 347 dsp56k.tx_wsize = (int) arg;
348 unlock_kernel();
346 break; 349 break;
347 case DSP56K_SET_RX_WSIZE: 350 case DSP56K_SET_RX_WSIZE:
348 if (arg > 4 || arg < 1) 351 if (arg > 4 || arg < 1)
349 return -EINVAL; 352 return -EINVAL;
353 lock_kernel();
350 dsp56k.rx_wsize = (int) arg; 354 dsp56k.rx_wsize = (int) arg;
355 unlock_kernel();
351 break; 356 break;
352 case DSP56K_HOST_FLAGS: 357 case DSP56K_HOST_FLAGS:
353 { 358 {
@@ -359,6 +364,7 @@ static int dsp56k_ioctl(struct inode *inode, struct file *file,
359 if(get_user(out, &hf->out) < 0) 364 if(get_user(out, &hf->out) < 0)
360 return -EFAULT; 365 return -EFAULT;
361 366
367 lock_kernel();
362 if ((dir & 0x1) && (out & 0x1)) 368 if ((dir & 0x1) && (out & 0x1))
363 dsp56k_host_interface.icr |= DSP56K_ICR_HF0; 369 dsp56k_host_interface.icr |= DSP56K_ICR_HF0;
364 else if (dir & 0x1) 370 else if (dir & 0x1)
@@ -373,14 +379,16 @@ static int dsp56k_ioctl(struct inode *inode, struct file *file,
373 if (dsp56k_host_interface.icr & DSP56K_ICR_HF1) status |= 0x2; 379 if (dsp56k_host_interface.icr & DSP56K_ICR_HF1) status |= 0x2;
374 if (dsp56k_host_interface.isr & DSP56K_ISR_HF2) status |= 0x4; 380 if (dsp56k_host_interface.isr & DSP56K_ISR_HF2) status |= 0x4;
375 if (dsp56k_host_interface.isr & DSP56K_ISR_HF3) status |= 0x8; 381 if (dsp56k_host_interface.isr & DSP56K_ISR_HF3) status |= 0x8;
376 382 unlock_kernel();
377 return put_user(status, &hf->status); 383 return put_user(status, &hf->status);
378 } 384 }
379 case DSP56K_HOST_CMD: 385 case DSP56K_HOST_CMD:
380 if (arg > 31 || arg < 0) 386 if (arg > 31 || arg < 0)
381 return -EINVAL; 387 return -EINVAL;
388 lock_kernel();
382 dsp56k_host_interface.cvr = (u_char)((arg & DSP56K_CVR_HV_MASK) | 389 dsp56k_host_interface.cvr = (u_char)((arg & DSP56K_CVR_HV_MASK) |
383 DSP56K_CVR_HC); 390 DSP56K_CVR_HC);
391 unlock_kernel();
384 break; 392 break;
385 default: 393 default:
386 return -EINVAL; 394 return -EINVAL;
@@ -472,7 +480,7 @@ static const struct file_operations dsp56k_fops = {
472 .owner = THIS_MODULE, 480 .owner = THIS_MODULE,
473 .read = dsp56k_read, 481 .read = dsp56k_read,
474 .write = dsp56k_write, 482 .write = dsp56k_write,
475 .ioctl = dsp56k_ioctl, 483 .unlocked_ioctl = dsp56k_ioctl,
476 .open = dsp56k_open, 484 .open = dsp56k_open,
477 .release = dsp56k_release, 485 .release = dsp56k_release,
478}; 486};
diff --git a/drivers/char/efirtc.c b/drivers/char/efirtc.c
index d57ca3e4e534..67fbd7aab5db 100644
--- a/drivers/char/efirtc.c
+++ b/drivers/char/efirtc.c
@@ -37,8 +37,9 @@
37#include <linux/rtc.h> 37#include <linux/rtc.h>
38#include <linux/proc_fs.h> 38#include <linux/proc_fs.h>
39#include <linux/efi.h> 39#include <linux/efi.h>
40#include <linux/smp_lock.h>
41#include <linux/uaccess.h>
40 42
41#include <asm/uaccess.h>
42#include <asm/system.h> 43#include <asm/system.h>
43 44
44#define EFI_RTC_VERSION "0.4" 45#define EFI_RTC_VERSION "0.4"
@@ -51,8 +52,8 @@
51 52
52static DEFINE_SPINLOCK(efi_rtc_lock); 53static DEFINE_SPINLOCK(efi_rtc_lock);
53 54
54static int efi_rtc_ioctl(struct inode *inode, struct file *file, 55static long efi_rtc_ioctl(struct file *file, unsigned int cmd,
55 unsigned int cmd, unsigned long arg); 56 unsigned long arg);
56 57
57#define is_leap(year) \ 58#define is_leap(year) \
58 ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0)) 59 ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0))
@@ -146,9 +147,8 @@ convert_from_efi_time(efi_time_t *eft, struct rtc_time *wtime)
146 } 147 }
147} 148}
148 149
149static int 150static long efi_rtc_ioctl(struct file *file, unsigned int cmd,
150efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, 151 unsigned long arg)
151 unsigned long arg)
152{ 152{
153 153
154 efi_status_t status; 154 efi_status_t status;
@@ -175,13 +175,13 @@ efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
175 return -EINVAL; 175 return -EINVAL;
176 176
177 case RTC_RD_TIME: 177 case RTC_RD_TIME:
178 178 lock_kernel();
179 spin_lock_irqsave(&efi_rtc_lock, flags); 179 spin_lock_irqsave(&efi_rtc_lock, flags);
180 180
181 status = efi.get_time(&eft, &cap); 181 status = efi.get_time(&eft, &cap);
182 182
183 spin_unlock_irqrestore(&efi_rtc_lock,flags); 183 spin_unlock_irqrestore(&efi_rtc_lock,flags);
184 184 unlock_kernel();
185 if (status != EFI_SUCCESS) { 185 if (status != EFI_SUCCESS) {
186 /* should never happen */ 186 /* should never happen */
187 printk(KERN_ERR "efitime: can't read time\n"); 187 printk(KERN_ERR "efitime: can't read time\n");
@@ -203,11 +203,13 @@ efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
203 203
204 convert_to_efi_time(&wtime, &eft); 204 convert_to_efi_time(&wtime, &eft);
205 205
206 lock_kernel();
206 spin_lock_irqsave(&efi_rtc_lock, flags); 207 spin_lock_irqsave(&efi_rtc_lock, flags);
207 208
208 status = efi.set_time(&eft); 209 status = efi.set_time(&eft);
209 210
210 spin_unlock_irqrestore(&efi_rtc_lock,flags); 211 spin_unlock_irqrestore(&efi_rtc_lock,flags);
212 unlock_kernel();
211 213
212 return status == EFI_SUCCESS ? 0 : -EINVAL; 214 return status == EFI_SUCCESS ? 0 : -EINVAL;
213 215
@@ -223,6 +225,7 @@ efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
223 225
224 convert_to_efi_time(&wtime, &eft); 226 convert_to_efi_time(&wtime, &eft);
225 227
228 lock_kernel();
226 spin_lock_irqsave(&efi_rtc_lock, flags); 229 spin_lock_irqsave(&efi_rtc_lock, flags);
227 /* 230 /*
228 * XXX Fixme: 231 * XXX Fixme:
@@ -233,16 +236,19 @@ efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
233 status = efi.set_wakeup_time((efi_bool_t)enabled, &eft); 236 status = efi.set_wakeup_time((efi_bool_t)enabled, &eft);
234 237
235 spin_unlock_irqrestore(&efi_rtc_lock,flags); 238 spin_unlock_irqrestore(&efi_rtc_lock,flags);
239 unlock_kernel();
236 240
237 return status == EFI_SUCCESS ? 0 : -EINVAL; 241 return status == EFI_SUCCESS ? 0 : -EINVAL;
238 242
239 case RTC_WKALM_RD: 243 case RTC_WKALM_RD:
240 244
245 lock_kernel();
241 spin_lock_irqsave(&efi_rtc_lock, flags); 246 spin_lock_irqsave(&efi_rtc_lock, flags);
242 247
243 status = efi.get_wakeup_time((efi_bool_t *)&enabled, (efi_bool_t *)&pending, &eft); 248 status = efi.get_wakeup_time((efi_bool_t *)&enabled, (efi_bool_t *)&pending, &eft);
244 249
245 spin_unlock_irqrestore(&efi_rtc_lock,flags); 250 spin_unlock_irqrestore(&efi_rtc_lock,flags);
251 unlock_kernel();
246 252
247 if (status != EFI_SUCCESS) return -EINVAL; 253 if (status != EFI_SUCCESS) return -EINVAL;
248 254
@@ -256,7 +262,7 @@ efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
256 return copy_to_user(&ewp->time, &wtime, 262 return copy_to_user(&ewp->time, &wtime,
257 sizeof(struct rtc_time)) ? -EFAULT : 0; 263 sizeof(struct rtc_time)) ? -EFAULT : 0;
258 } 264 }
259 return -EINVAL; 265 return -ENOTTY;
260} 266}
261 267
262/* 268/*
@@ -265,8 +271,7 @@ efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
265 * up things on a close. 271 * up things on a close.
266 */ 272 */
267 273
268static int 274static int efi_rtc_open(struct inode *inode, struct file *file)
269efi_rtc_open(struct inode *inode, struct file *file)
270{ 275{
271 /* 276 /*
272 * nothing special to do here 277 * nothing special to do here
@@ -277,8 +282,7 @@ efi_rtc_open(struct inode *inode, struct file *file)
277 return 0; 282 return 0;
278} 283}
279 284
280static int 285static int efi_rtc_close(struct inode *inode, struct file *file)
281efi_rtc_close(struct inode *inode, struct file *file)
282{ 286{
283 return 0; 287 return 0;
284} 288}
@@ -289,13 +293,12 @@ efi_rtc_close(struct inode *inode, struct file *file)
289 293
290static const struct file_operations efi_rtc_fops = { 294static const struct file_operations efi_rtc_fops = {
291 .owner = THIS_MODULE, 295 .owner = THIS_MODULE,
292 .ioctl = efi_rtc_ioctl, 296 .unlocked_ioctl = efi_rtc_ioctl,
293 .open = efi_rtc_open, 297 .open = efi_rtc_open,
294 .release = efi_rtc_close, 298 .release = efi_rtc_close,
295}; 299};
296 300
297static struct miscdevice efi_rtc_dev= 301static struct miscdevice efi_rtc_dev= {
298{
299 EFI_RTC_MINOR, 302 EFI_RTC_MINOR,
300 "efirtc", 303 "efirtc",
301 &efi_rtc_fops 304 &efi_rtc_fops
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index fb0a85a1eb36..b3f5dbc6d880 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -623,6 +623,7 @@ static inline int hpet_tpcheck(struct hpet_task *tp)
623 return -ENXIO; 623 return -ENXIO;
624} 624}
625 625
626#if 0
626int hpet_unregister(struct hpet_task *tp) 627int hpet_unregister(struct hpet_task *tp)
627{ 628{
628 struct hpet_dev *devp; 629 struct hpet_dev *devp;
@@ -652,6 +653,7 @@ int hpet_unregister(struct hpet_task *tp)
652 653
653 return 0; 654 return 0;
654} 655}
656#endif /* 0 */
655 657
656static ctl_table hpet_table[] = { 658static ctl_table hpet_table[] = {
657 { 659 {
diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c
index 2f9759d625cc..02aac104842d 100644
--- a/drivers/char/hvc_console.c
+++ b/drivers/char/hvc_console.c
@@ -27,7 +27,6 @@
27#include <linux/init.h> 27#include <linux/init.h>
28#include <linux/kbd_kern.h> 28#include <linux/kbd_kern.h>
29#include <linux/kernel.h> 29#include <linux/kernel.h>
30#include <linux/kref.h>
31#include <linux/kthread.h> 30#include <linux/kthread.h>
32#include <linux/list.h> 31#include <linux/list.h>
33#include <linux/module.h> 32#include <linux/module.h>
@@ -75,23 +74,6 @@ static int hvc_init(void);
75static int sysrq_pressed; 74static int sysrq_pressed;
76#endif 75#endif
77 76
78struct hvc_struct {
79 spinlock_t lock;
80 int index;
81 struct tty_struct *tty;
82 unsigned int count;
83 int do_wakeup;
84 char *outbuf;
85 int outbuf_size;
86 int n_outbuf;
87 uint32_t vtermno;
88 struct hv_ops *ops;
89 int irq_requested;
90 int irq;
91 struct list_head next;
92 struct kref kref; /* ref count & hvc_struct lifetime */
93};
94
95/* dynamic list of hvc_struct instances */ 77/* dynamic list of hvc_struct instances */
96static LIST_HEAD(hvc_structs); 78static LIST_HEAD(hvc_structs);
97 79
@@ -298,27 +280,15 @@ int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops)
298 280
299 return 0; 281 return 0;
300} 282}
283EXPORT_SYMBOL_GPL(hvc_instantiate);
301 284
302/* Wake the sleeping khvcd */ 285/* Wake the sleeping khvcd */
303static void hvc_kick(void) 286void hvc_kick(void)
304{ 287{
305 hvc_kicked = 1; 288 hvc_kicked = 1;
306 wake_up_process(hvc_task); 289 wake_up_process(hvc_task);
307} 290}
308 291EXPORT_SYMBOL_GPL(hvc_kick);
309static int hvc_poll(struct hvc_struct *hp);
310
311/*
312 * NOTE: This API isn't used if the console adapter doesn't support interrupts.
313 * In this case the console is poll driven.
314 */
315static irqreturn_t hvc_handle_interrupt(int irq, void *dev_instance)
316{
317 /* if hvc_poll request a repoll, then kick the hvcd thread */
318 if (hvc_poll(dev_instance))
319 hvc_kick();
320 return IRQ_HANDLED;
321}
322 292
323static void hvc_unthrottle(struct tty_struct *tty) 293static void hvc_unthrottle(struct tty_struct *tty)
324{ 294{
@@ -333,7 +303,6 @@ static int hvc_open(struct tty_struct *tty, struct file * filp)
333{ 303{
334 struct hvc_struct *hp; 304 struct hvc_struct *hp;
335 unsigned long flags; 305 unsigned long flags;
336 int irq = 0;
337 int rc = 0; 306 int rc = 0;
338 307
339 /* Auto increments kref reference if found. */ 308 /* Auto increments kref reference if found. */
@@ -352,18 +321,15 @@ static int hvc_open(struct tty_struct *tty, struct file * filp)
352 tty->low_latency = 1; /* Makes flushes to ldisc synchronous. */ 321 tty->low_latency = 1; /* Makes flushes to ldisc synchronous. */
353 322
354 hp->tty = tty; 323 hp->tty = tty;
355 /* Save for request_irq outside of spin_lock. */ 324
356 irq = hp->irq; 325 if (hp->ops->notifier_add)
357 if (irq) 326 rc = hp->ops->notifier_add(hp, hp->data);
358 hp->irq_requested = 1;
359 327
360 spin_unlock_irqrestore(&hp->lock, flags); 328 spin_unlock_irqrestore(&hp->lock, flags);
361 /* check error, fallback to non-irq */ 329
362 if (irq)
363 rc = request_irq(irq, hvc_handle_interrupt, IRQF_DISABLED, "hvc_console", hp);
364 330
365 /* 331 /*
366 * If the request_irq() fails and we return an error. The tty layer 332 * If the notifier fails we return an error. The tty layer
367 * will call hvc_close() after a failed open but we don't want to clean 333 * will call hvc_close() after a failed open but we don't want to clean
368 * up there so we'll clean up here and clear out the previously set 334 * up there so we'll clean up here and clear out the previously set
369 * tty fields and return the kref reference. 335 * tty fields and return the kref reference.
@@ -371,7 +337,6 @@ static int hvc_open(struct tty_struct *tty, struct file * filp)
371 if (rc) { 337 if (rc) {
372 spin_lock_irqsave(&hp->lock, flags); 338 spin_lock_irqsave(&hp->lock, flags);
373 hp->tty = NULL; 339 hp->tty = NULL;
374 hp->irq_requested = 0;
375 spin_unlock_irqrestore(&hp->lock, flags); 340 spin_unlock_irqrestore(&hp->lock, flags);
376 tty->driver_data = NULL; 341 tty->driver_data = NULL;
377 kref_put(&hp->kref, destroy_hvc_struct); 342 kref_put(&hp->kref, destroy_hvc_struct);
@@ -386,7 +351,6 @@ static int hvc_open(struct tty_struct *tty, struct file * filp)
386static void hvc_close(struct tty_struct *tty, struct file * filp) 351static void hvc_close(struct tty_struct *tty, struct file * filp)
387{ 352{
388 struct hvc_struct *hp; 353 struct hvc_struct *hp;
389 int irq = 0;
390 unsigned long flags; 354 unsigned long flags;
391 355
392 if (tty_hung_up_p(filp)) 356 if (tty_hung_up_p(filp))
@@ -404,9 +368,8 @@ static void hvc_close(struct tty_struct *tty, struct file * filp)
404 spin_lock_irqsave(&hp->lock, flags); 368 spin_lock_irqsave(&hp->lock, flags);
405 369
406 if (--hp->count == 0) { 370 if (--hp->count == 0) {
407 if (hp->irq_requested) 371 if (hp->ops->notifier_del)
408 irq = hp->irq; 372 hp->ops->notifier_del(hp, hp->data);
409 hp->irq_requested = 0;
410 373
411 /* We are done with the tty pointer now. */ 374 /* We are done with the tty pointer now. */
412 hp->tty = NULL; 375 hp->tty = NULL;
@@ -418,10 +381,6 @@ static void hvc_close(struct tty_struct *tty, struct file * filp)
418 * waking periodically to check chars_in_buffer(). 381 * waking periodically to check chars_in_buffer().
419 */ 382 */
420 tty_wait_until_sent(tty, HVC_CLOSE_WAIT); 383 tty_wait_until_sent(tty, HVC_CLOSE_WAIT);
421
422 if (irq)
423 free_irq(irq, hp);
424
425 } else { 384 } else {
426 if (hp->count < 0) 385 if (hp->count < 0)
427 printk(KERN_ERR "hvc_close %X: oops, count is %d\n", 386 printk(KERN_ERR "hvc_close %X: oops, count is %d\n",
@@ -436,7 +395,6 @@ static void hvc_hangup(struct tty_struct *tty)
436{ 395{
437 struct hvc_struct *hp = tty->driver_data; 396 struct hvc_struct *hp = tty->driver_data;
438 unsigned long flags; 397 unsigned long flags;
439 int irq = 0;
440 int temp_open_count; 398 int temp_open_count;
441 399
442 if (!hp) 400 if (!hp)
@@ -458,13 +416,12 @@ static void hvc_hangup(struct tty_struct *tty)
458 hp->count = 0; 416 hp->count = 0;
459 hp->n_outbuf = 0; 417 hp->n_outbuf = 0;
460 hp->tty = NULL; 418 hp->tty = NULL;
461 if (hp->irq_requested) 419
462 /* Saved for use outside of spin_lock. */ 420 if (hp->ops->notifier_del)
463 irq = hp->irq; 421 hp->ops->notifier_del(hp, hp->data);
464 hp->irq_requested = 0; 422
465 spin_unlock_irqrestore(&hp->lock, flags); 423 spin_unlock_irqrestore(&hp->lock, flags);
466 if (irq) 424
467 free_irq(irq, hp);
468 while(temp_open_count) { 425 while(temp_open_count) {
469 --temp_open_count; 426 --temp_open_count;
470 kref_put(&hp->kref, destroy_hvc_struct); 427 kref_put(&hp->kref, destroy_hvc_struct);
@@ -575,7 +532,7 @@ static u32 timeout = MIN_TIMEOUT;
575#define HVC_POLL_READ 0x00000001 532#define HVC_POLL_READ 0x00000001
576#define HVC_POLL_WRITE 0x00000002 533#define HVC_POLL_WRITE 0x00000002
577 534
578static int hvc_poll(struct hvc_struct *hp) 535int hvc_poll(struct hvc_struct *hp)
579{ 536{
580 struct tty_struct *tty; 537 struct tty_struct *tty;
581 int i, n, poll_mask = 0; 538 int i, n, poll_mask = 0;
@@ -602,10 +559,10 @@ static int hvc_poll(struct hvc_struct *hp)
602 if (test_bit(TTY_THROTTLED, &tty->flags)) 559 if (test_bit(TTY_THROTTLED, &tty->flags))
603 goto throttled; 560 goto throttled;
604 561
605 /* If we aren't interrupt driven and aren't throttled, we always 562 /* If we aren't notifier driven and aren't throttled, we always
606 * request a reschedule 563 * request a reschedule
607 */ 564 */
608 if (hp->irq == 0) 565 if (!hp->irq_requested)
609 poll_mask |= HVC_POLL_READ; 566 poll_mask |= HVC_POLL_READ;
610 567
611 /* Read data if any */ 568 /* Read data if any */
@@ -674,6 +631,7 @@ static int hvc_poll(struct hvc_struct *hp)
674 631
675 return poll_mask; 632 return poll_mask;
676} 633}
634EXPORT_SYMBOL_GPL(hvc_poll);
677 635
678/* 636/*
679 * This kthread is either polling or interrupt driven. This is determined by 637 * This kthread is either polling or interrupt driven. This is determined by
@@ -733,7 +691,7 @@ static const struct tty_operations hvc_ops = {
733 .chars_in_buffer = hvc_chars_in_buffer, 691 .chars_in_buffer = hvc_chars_in_buffer,
734}; 692};
735 693
736struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int irq, 694struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int data,
737 struct hv_ops *ops, int outbuf_size) 695 struct hv_ops *ops, int outbuf_size)
738{ 696{
739 struct hvc_struct *hp; 697 struct hvc_struct *hp;
@@ -754,7 +712,7 @@ struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int irq,
754 memset(hp, 0x00, sizeof(*hp)); 712 memset(hp, 0x00, sizeof(*hp));
755 713
756 hp->vtermno = vtermno; 714 hp->vtermno = vtermno;
757 hp->irq = irq; 715 hp->data = data;
758 hp->ops = ops; 716 hp->ops = ops;
759 hp->outbuf_size = outbuf_size; 717 hp->outbuf_size = outbuf_size;
760 hp->outbuf = &((char *)hp)[ALIGN(sizeof(*hp), sizeof(long))]; 718 hp->outbuf = &((char *)hp)[ALIGN(sizeof(*hp), sizeof(long))];
@@ -784,6 +742,7 @@ struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int irq,
784 742
785 return hp; 743 return hp;
786} 744}
745EXPORT_SYMBOL_GPL(hvc_alloc);
787 746
788int __devexit hvc_remove(struct hvc_struct *hp) 747int __devexit hvc_remove(struct hvc_struct *hp)
789{ 748{
diff --git a/drivers/char/hvc_console.h b/drivers/char/hvc_console.h
index 42ffb17e15df..d9ce10915625 100644
--- a/drivers/char/hvc_console.h
+++ b/drivers/char/hvc_console.h
@@ -26,6 +26,7 @@
26 26
27#ifndef HVC_CONSOLE_H 27#ifndef HVC_CONSOLE_H
28#define HVC_CONSOLE_H 28#define HVC_CONSOLE_H
29#include <linux/kref.h>
29 30
30/* 31/*
31 * This is the max number of console adapters that can/will be found as 32 * This is the max number of console adapters that can/will be found as
@@ -42,24 +43,50 @@
42 */ 43 */
43#define HVC_ALLOC_TTY_ADAPTERS 8 44#define HVC_ALLOC_TTY_ADAPTERS 8
44 45
46struct hvc_struct {
47 spinlock_t lock;
48 int index;
49 struct tty_struct *tty;
50 unsigned int count;
51 int do_wakeup;
52 char *outbuf;
53 int outbuf_size;
54 int n_outbuf;
55 uint32_t vtermno;
56 struct hv_ops *ops;
57 int irq_requested;
58 int data;
59 struct list_head next;
60 struct kref kref; /* ref count & hvc_struct lifetime */
61};
45 62
46/* implemented by a low level driver */ 63/* implemented by a low level driver */
47struct hv_ops { 64struct hv_ops {
48 int (*get_chars)(uint32_t vtermno, char *buf, int count); 65 int (*get_chars)(uint32_t vtermno, char *buf, int count);
49 int (*put_chars)(uint32_t vtermno, const char *buf, int count); 66 int (*put_chars)(uint32_t vtermno, const char *buf, int count);
50};
51 67
52struct hvc_struct; 68 /* Callbacks for notification. Called in open and close */
69 int (*notifier_add)(struct hvc_struct *hp, int irq);
70 void (*notifier_del)(struct hvc_struct *hp, int irq);
71};
53 72
54/* Register a vterm and a slot index for use as a console (console_init) */ 73/* Register a vterm and a slot index for use as a console (console_init) */
55extern int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops); 74extern int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops);
56 75
57/* register a vterm for hvc tty operation (module_init or hotplug add) */ 76/* register a vterm for hvc tty operation (module_init or hotplug add) */
58extern struct hvc_struct * __devinit hvc_alloc(uint32_t vtermno, int irq, 77extern struct hvc_struct * __devinit hvc_alloc(uint32_t vtermno, int data,
59 struct hv_ops *ops, int outbuf_size); 78 struct hv_ops *ops, int outbuf_size);
60/* remove a vterm from hvc tty operation (modele_exit or hotplug remove) */ 79/* remove a vterm from hvc tty operation (module_exit or hotplug remove) */
61extern int __devexit hvc_remove(struct hvc_struct *hp); 80extern int __devexit hvc_remove(struct hvc_struct *hp);
62 81
82/* data available */
83int hvc_poll(struct hvc_struct *hp);
84void hvc_kick(void);
85
86/* default notifier for irq based notification */
87extern int notifier_add_irq(struct hvc_struct *hp, int data);
88extern void notifier_del_irq(struct hvc_struct *hp, int data);
89
63 90
64#if defined(CONFIG_XMON) && defined(CONFIG_SMP) 91#if defined(CONFIG_XMON) && defined(CONFIG_SMP)
65#include <asm/xmon.h> 92#include <asm/xmon.h>
diff --git a/drivers/char/hvc_irq.c b/drivers/char/hvc_irq.c
new file mode 100644
index 000000000000..73a59cdb8947
--- /dev/null
+++ b/drivers/char/hvc_irq.c
@@ -0,0 +1,44 @@
1/*
2 * Copyright IBM Corp. 2001,2008
3 *
4 * This file contains the IRQ specific code for hvc_console
5 *
6 */
7
8#include <linux/interrupt.h>
9
10#include "hvc_console.h"
11
12static irqreturn_t hvc_handle_interrupt(int irq, void *dev_instance)
13{
14 /* if hvc_poll request a repoll, then kick the hvcd thread */
15 if (hvc_poll(dev_instance))
16 hvc_kick();
17 return IRQ_HANDLED;
18}
19
20/*
21 * For IRQ based systems these callbacks can be used
22 */
23int notifier_add_irq(struct hvc_struct *hp, int irq)
24{
25 int rc;
26
27 if (!irq) {
28 hp->irq_requested = 0;
29 return 0;
30 }
31 rc = request_irq(irq, hvc_handle_interrupt, IRQF_DISABLED,
32 "hvc_console", hp);
33 if (!rc)
34 hp->irq_requested = 1;
35 return rc;
36}
37
38void notifier_del_irq(struct hvc_struct *hp, int irq)
39{
40 if (!irq)
41 return;
42 free_irq(irq, hp);
43 hp->irq_requested = 0;
44}
diff --git a/drivers/char/hvc_iseries.c b/drivers/char/hvc_iseries.c
index a08f8f981c11..b71c610fe5ae 100644
--- a/drivers/char/hvc_iseries.c
+++ b/drivers/char/hvc_iseries.c
@@ -200,6 +200,8 @@ done:
200static struct hv_ops hvc_get_put_ops = { 200static struct hv_ops hvc_get_put_ops = {
201 .get_chars = get_chars, 201 .get_chars = get_chars,
202 .put_chars = put_chars, 202 .put_chars = put_chars,
203 .notifier_add = notifier_add_irq,
204 .notifier_del = notifier_del_irq,
203}; 205};
204 206
205static int __devinit hvc_vio_probe(struct vio_dev *vdev, 207static int __devinit hvc_vio_probe(struct vio_dev *vdev,
diff --git a/drivers/char/hvc_vio.c b/drivers/char/hvc_vio.c
index 79711aa4b41d..93f3840c1682 100644
--- a/drivers/char/hvc_vio.c
+++ b/drivers/char/hvc_vio.c
@@ -80,6 +80,8 @@ static int filtered_get_chars(uint32_t vtermno, char *buf, int count)
80static struct hv_ops hvc_get_put_ops = { 80static struct hv_ops hvc_get_put_ops = {
81 .get_chars = filtered_get_chars, 81 .get_chars = filtered_get_chars,
82 .put_chars = hvc_put_chars, 82 .put_chars = hvc_put_chars,
83 .notifier_add = notifier_add_irq,
84 .notifier_del = notifier_del_irq,
83}; 85};
84 86
85static int __devinit hvc_vio_probe(struct vio_dev *vdev, 87static int __devinit hvc_vio_probe(struct vio_dev *vdev,
diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c
index db2ae4216279..6b70aa66a587 100644
--- a/drivers/char/hvc_xen.c
+++ b/drivers/char/hvc_xen.c
@@ -100,6 +100,8 @@ static int read_console(uint32_t vtermno, char *buf, int len)
100static struct hv_ops hvc_ops = { 100static struct hv_ops hvc_ops = {
101 .get_chars = read_console, 101 .get_chars = read_console,
102 .put_chars = write_console, 102 .put_chars = write_console,
103 .notifier_add = notifier_add_irq,
104 .notifier_del = notifier_del_irq,
103}; 105};
104 106
105static int __init xen_init(void) 107static int __init xen_init(void)
diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c
index 9cb48fcd316c..689f9dcd3b86 100644
--- a/drivers/char/ip2/ip2main.c
+++ b/drivers/char/ip2/ip2main.c
@@ -203,7 +203,7 @@ static int set_serial_info(i2ChanStrPtr, struct serial_struct __user *);
203 203
204static ssize_t ip2_ipl_read(struct file *, char __user *, size_t, loff_t *); 204static ssize_t ip2_ipl_read(struct file *, char __user *, size_t, loff_t *);
205static ssize_t ip2_ipl_write(struct file *, const char __user *, size_t, loff_t *); 205static ssize_t ip2_ipl_write(struct file *, const char __user *, size_t, loff_t *);
206static int ip2_ipl_ioctl(struct inode *, struct file *, UINT, ULONG); 206static long ip2_ipl_ioctl(struct file *, UINT, ULONG);
207static int ip2_ipl_open(struct inode *, struct file *); 207static int ip2_ipl_open(struct inode *, struct file *);
208 208
209static int DumpTraceBuffer(char __user *, int); 209static int DumpTraceBuffer(char __user *, int);
@@ -236,7 +236,7 @@ static const struct file_operations ip2_ipl = {
236 .owner = THIS_MODULE, 236 .owner = THIS_MODULE,
237 .read = ip2_ipl_read, 237 .read = ip2_ipl_read,
238 .write = ip2_ipl_write, 238 .write = ip2_ipl_write,
239 .ioctl = ip2_ipl_ioctl, 239 .unlocked_ioctl = ip2_ipl_ioctl,
240 .open = ip2_ipl_open, 240 .open = ip2_ipl_open,
241}; 241};
242 242
@@ -2845,10 +2845,10 @@ ip2_ipl_write(struct file *pFile, const char __user *pData, size_t count, loff_t
2845/* */ 2845/* */
2846/* */ 2846/* */
2847/******************************************************************************/ 2847/******************************************************************************/
2848static int 2848static long
2849ip2_ipl_ioctl ( struct inode *pInode, struct file *pFile, UINT cmd, ULONG arg ) 2849ip2_ipl_ioctl (struct file *pFile, UINT cmd, ULONG arg )
2850{ 2850{
2851 unsigned int iplminor = iminor(pInode); 2851 unsigned int iplminor = iminor(pFile->f_path.dentry->d_inode);
2852 int rc = 0; 2852 int rc = 0;
2853 void __user *argp = (void __user *)arg; 2853 void __user *argp = (void __user *)arg;
2854 ULONG __user *pIndex = argp; 2854 ULONG __user *pIndex = argp;
@@ -2859,6 +2859,8 @@ ip2_ipl_ioctl ( struct inode *pInode, struct file *pFile, UINT cmd, ULONG arg )
2859 printk (KERN_DEBUG "IP2IPL: ioctl cmd %d, arg %ld\n", cmd, arg ); 2859 printk (KERN_DEBUG "IP2IPL: ioctl cmd %d, arg %ld\n", cmd, arg );
2860#endif 2860#endif
2861 2861
2862 lock_kernel();
2863
2862 switch ( iplminor ) { 2864 switch ( iplminor ) {
2863 case 0: // IPL device 2865 case 0: // IPL device
2864 rc = -EINVAL; 2866 rc = -EINVAL;
@@ -2919,6 +2921,7 @@ ip2_ipl_ioctl ( struct inode *pInode, struct file *pFile, UINT cmd, ULONG arg )
2919 rc = -ENODEV; 2921 rc = -ENODEV;
2920 break; 2922 break;
2921 } 2923 }
2924 unlock_kernel();
2922 return rc; 2925 return rc;
2923} 2926}
2924 2927
diff --git a/drivers/char/mwave/mwavedd.c b/drivers/char/mwave/mwavedd.c
index 50243fcd87e8..4f8d67fed292 100644
--- a/drivers/char/mwave/mwavedd.c
+++ b/drivers/char/mwave/mwavedd.c
@@ -86,8 +86,8 @@ module_param(mwave_uart_io, int, 0);
86 86
87static int mwave_open(struct inode *inode, struct file *file); 87static int mwave_open(struct inode *inode, struct file *file);
88static int mwave_close(struct inode *inode, struct file *file); 88static int mwave_close(struct inode *inode, struct file *file);
89static int mwave_ioctl(struct inode *inode, struct file *filp, 89static long mwave_ioctl(struct file *filp, unsigned int iocmd,
90 unsigned int iocmd, unsigned long ioarg); 90 unsigned long ioarg);
91 91
92MWAVE_DEVICE_DATA mwave_s_mdd; 92MWAVE_DEVICE_DATA mwave_s_mdd;
93 93
@@ -119,16 +119,16 @@ static int mwave_close(struct inode *inode, struct file *file)
119 return retval; 119 return retval;
120} 120}
121 121
122static int mwave_ioctl(struct inode *inode, struct file *file, 122static long mwave_ioctl(struct file *file, unsigned int iocmd,
123 unsigned int iocmd, unsigned long ioarg) 123 unsigned long ioarg)
124{ 124{
125 unsigned int retval = 0; 125 unsigned int retval = 0;
126 pMWAVE_DEVICE_DATA pDrvData = &mwave_s_mdd; 126 pMWAVE_DEVICE_DATA pDrvData = &mwave_s_mdd;
127 void __user *arg = (void __user *)ioarg; 127 void __user *arg = (void __user *)ioarg;
128 128
129 PRINTK_5(TRACE_MWAVE, 129 PRINTK_4(TRACE_MWAVE,
130 "mwavedd::mwave_ioctl, entry inode %p file %p cmd %x arg %x\n", 130 "mwavedd::mwave_ioctl, entry file %p cmd %x arg %x\n",
131 inode, file, iocmd, (int) ioarg); 131 file, iocmd, (int) ioarg);
132 132
133 switch (iocmd) { 133 switch (iocmd) {
134 134
@@ -136,7 +136,9 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
136 PRINTK_1(TRACE_MWAVE, 136 PRINTK_1(TRACE_MWAVE,
137 "mwavedd::mwave_ioctl, IOCTL_MW_RESET" 137 "mwavedd::mwave_ioctl, IOCTL_MW_RESET"
138 " calling tp3780I_ResetDSP\n"); 138 " calling tp3780I_ResetDSP\n");
139 lock_kernel();
139 retval = tp3780I_ResetDSP(&pDrvData->rBDData); 140 retval = tp3780I_ResetDSP(&pDrvData->rBDData);
141 unlock_kernel();
140 PRINTK_2(TRACE_MWAVE, 142 PRINTK_2(TRACE_MWAVE,
141 "mwavedd::mwave_ioctl, IOCTL_MW_RESET" 143 "mwavedd::mwave_ioctl, IOCTL_MW_RESET"
142 " retval %x from tp3780I_ResetDSP\n", 144 " retval %x from tp3780I_ResetDSP\n",
@@ -147,7 +149,9 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
147 PRINTK_1(TRACE_MWAVE, 149 PRINTK_1(TRACE_MWAVE,
148 "mwavedd::mwave_ioctl, IOCTL_MW_RUN" 150 "mwavedd::mwave_ioctl, IOCTL_MW_RUN"
149 " calling tp3780I_StartDSP\n"); 151 " calling tp3780I_StartDSP\n");
152 lock_kernel();
150 retval = tp3780I_StartDSP(&pDrvData->rBDData); 153 retval = tp3780I_StartDSP(&pDrvData->rBDData);
154 unlock_kernel();
151 PRINTK_2(TRACE_MWAVE, 155 PRINTK_2(TRACE_MWAVE,
152 "mwavedd::mwave_ioctl, IOCTL_MW_RUN" 156 "mwavedd::mwave_ioctl, IOCTL_MW_RUN"
153 " retval %x from tp3780I_StartDSP\n", 157 " retval %x from tp3780I_StartDSP\n",
@@ -161,8 +165,10 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
161 "mwavedd::mwave_ioctl," 165 "mwavedd::mwave_ioctl,"
162 " IOCTL_MW_DSP_ABILITIES calling" 166 " IOCTL_MW_DSP_ABILITIES calling"
163 " tp3780I_QueryAbilities\n"); 167 " tp3780I_QueryAbilities\n");
168 lock_kernel();
164 retval = tp3780I_QueryAbilities(&pDrvData->rBDData, 169 retval = tp3780I_QueryAbilities(&pDrvData->rBDData,
165 &rAbilities); 170 &rAbilities);
171 unlock_kernel();
166 PRINTK_2(TRACE_MWAVE, 172 PRINTK_2(TRACE_MWAVE,
167 "mwavedd::mwave_ioctl, IOCTL_MW_DSP_ABILITIES" 173 "mwavedd::mwave_ioctl, IOCTL_MW_DSP_ABILITIES"
168 " retval %x from tp3780I_QueryAbilities\n", 174 " retval %x from tp3780I_QueryAbilities\n",
@@ -193,11 +199,13 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
193 "mwavedd::mwave_ioctl IOCTL_MW_READ_DATA," 199 "mwavedd::mwave_ioctl IOCTL_MW_READ_DATA,"
194 " size %lx, ioarg %lx pusBuffer %p\n", 200 " size %lx, ioarg %lx pusBuffer %p\n",
195 rReadData.ulDataLength, ioarg, pusBuffer); 201 rReadData.ulDataLength, ioarg, pusBuffer);
202 lock_kernel();
196 retval = tp3780I_ReadWriteDspDStore(&pDrvData->rBDData, 203 retval = tp3780I_ReadWriteDspDStore(&pDrvData->rBDData,
197 iocmd, 204 iocmd,
198 pusBuffer, 205 pusBuffer,
199 rReadData.ulDataLength, 206 rReadData.ulDataLength,
200 rReadData.usDspAddress); 207 rReadData.usDspAddress);
208 unlock_kernel();
201 } 209 }
202 break; 210 break;
203 211
@@ -215,10 +223,12 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
215 " size %lx, ioarg %lx pusBuffer %p\n", 223 " size %lx, ioarg %lx pusBuffer %p\n",
216 rReadData.ulDataLength / 2, ioarg, 224 rReadData.ulDataLength / 2, ioarg,
217 pusBuffer); 225 pusBuffer);
226 lock_kernel();
218 retval = tp3780I_ReadWriteDspDStore(&pDrvData->rBDData, 227 retval = tp3780I_ReadWriteDspDStore(&pDrvData->rBDData,
219 iocmd, pusBuffer, 228 iocmd, pusBuffer,
220 rReadData.ulDataLength / 2, 229 rReadData.ulDataLength / 2,
221 rReadData.usDspAddress); 230 rReadData.usDspAddress);
231 unlock_kernel();
222 } 232 }
223 break; 233 break;
224 234
@@ -236,10 +246,12 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
236 " size %lx, ioarg %lx pusBuffer %p\n", 246 " size %lx, ioarg %lx pusBuffer %p\n",
237 rWriteData.ulDataLength, ioarg, 247 rWriteData.ulDataLength, ioarg,
238 pusBuffer); 248 pusBuffer);
249 lock_kernel();
239 retval = tp3780I_ReadWriteDspDStore(&pDrvData->rBDData, 250 retval = tp3780I_ReadWriteDspDStore(&pDrvData->rBDData,
240 iocmd, pusBuffer, 251 iocmd, pusBuffer,
241 rWriteData.ulDataLength, 252 rWriteData.ulDataLength,
242 rWriteData.usDspAddress); 253 rWriteData.usDspAddress);
254 unlock_kernel();
243 } 255 }
244 break; 256 break;
245 257
@@ -257,10 +269,12 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
257 " size %lx, ioarg %lx pusBuffer %p\n", 269 " size %lx, ioarg %lx pusBuffer %p\n",
258 rWriteData.ulDataLength, ioarg, 270 rWriteData.ulDataLength, ioarg,
259 pusBuffer); 271 pusBuffer);
272 lock_kernel();
260 retval = tp3780I_ReadWriteDspIStore(&pDrvData->rBDData, 273 retval = tp3780I_ReadWriteDspIStore(&pDrvData->rBDData,
261 iocmd, pusBuffer, 274 iocmd, pusBuffer,
262 rWriteData.ulDataLength, 275 rWriteData.ulDataLength,
263 rWriteData.usDspAddress); 276 rWriteData.usDspAddress);
277 unlock_kernel();
264 } 278 }
265 break; 279 break;
266 280
@@ -281,8 +295,10 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
281 ipcnum); 295 ipcnum);
282 return -EINVAL; 296 return -EINVAL;
283 } 297 }
298 lock_kernel();
284 pDrvData->IPCs[ipcnum].bIsHere = FALSE; 299 pDrvData->IPCs[ipcnum].bIsHere = FALSE;
285 pDrvData->IPCs[ipcnum].bIsEnabled = TRUE; 300 pDrvData->IPCs[ipcnum].bIsEnabled = TRUE;
301 unlock_kernel();
286 302
287 PRINTK_2(TRACE_MWAVE, 303 PRINTK_2(TRACE_MWAVE,
288 "mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC" 304 "mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC"
@@ -307,6 +323,7 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
307 return -EINVAL; 323 return -EINVAL;
308 } 324 }
309 325
326 lock_kernel();
310 if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) { 327 if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) {
311 DECLARE_WAITQUEUE(wait, current); 328 DECLARE_WAITQUEUE(wait, current);
312 329
@@ -347,6 +364,7 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
347 " processing\n", 364 " processing\n",
348 ipcnum); 365 ipcnum);
349 } 366 }
367 unlock_kernel();
350 } 368 }
351 break; 369 break;
352 370
@@ -365,19 +383,18 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
365 ipcnum); 383 ipcnum);
366 return -EINVAL; 384 return -EINVAL;
367 } 385 }
386 lock_kernel();
368 if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) { 387 if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) {
369 pDrvData->IPCs[ipcnum].bIsEnabled = FALSE; 388 pDrvData->IPCs[ipcnum].bIsEnabled = FALSE;
370 if (pDrvData->IPCs[ipcnum].bIsHere == TRUE) { 389 if (pDrvData->IPCs[ipcnum].bIsHere == TRUE) {
371 wake_up_interruptible(&pDrvData->IPCs[ipcnum].ipc_wait_queue); 390 wake_up_interruptible(&pDrvData->IPCs[ipcnum].ipc_wait_queue);
372 } 391 }
373 } 392 }
393 unlock_kernel();
374 } 394 }
375 break; 395 break;
376 396
377 default: 397 default:
378 PRINTK_ERROR(KERN_ERR_MWAVE "mwavedd::mwave_ioctl:"
379 " Error: Unrecognized iocmd %x\n",
380 iocmd);
381 return -ENOTTY; 398 return -ENOTTY;
382 break; 399 break;
383 } /* switch */ 400 } /* switch */
@@ -460,7 +477,7 @@ static const struct file_operations mwave_fops = {
460 .owner = THIS_MODULE, 477 .owner = THIS_MODULE,
461 .read = mwave_read, 478 .read = mwave_read,
462 .write = mwave_write, 479 .write = mwave_write,
463 .ioctl = mwave_ioctl, 480 .unlocked_ioctl = mwave_ioctl,
464 .open = mwave_open, 481 .open = mwave_open,
465 .release = mwave_close 482 .release = mwave_close
466}; 483};
diff --git a/drivers/char/mwave/mwavedd.h b/drivers/char/mwave/mwavedd.h
index 8eca61e0a19c..7e0d530e2e07 100644
--- a/drivers/char/mwave/mwavedd.h
+++ b/drivers/char/mwave/mwavedd.h
@@ -147,4 +147,6 @@ typedef struct _MWAVE_DEVICE_DATA {
147 147
148} MWAVE_DEVICE_DATA, *pMWAVE_DEVICE_DATA; 148} MWAVE_DEVICE_DATA, *pMWAVE_DEVICE_DATA;
149 149
150extern MWAVE_DEVICE_DATA mwave_s_mdd;
151
150#endif 152#endif
diff --git a/drivers/char/mwave/tp3780i.c b/drivers/char/mwave/tp3780i.c
index f282976daaac..c68969708068 100644
--- a/drivers/char/mwave/tp3780i.c
+++ b/drivers/char/mwave/tp3780i.c
@@ -57,8 +57,6 @@
57#include "3780i.h" 57#include "3780i.h"
58#include "mwavepub.h" 58#include "mwavepub.h"
59 59
60extern MWAVE_DEVICE_DATA mwave_s_mdd;
61
62static unsigned short s_ausThinkpadIrqToField[16] = 60static unsigned short s_ausThinkpadIrqToField[16] =
63 { 0xFFFF, 0xFFFF, 0xFFFF, 0x0001, 0x0002, 0x0003, 0xFFFF, 0x0004, 61 { 0xFFFF, 0xFFFF, 0xFFFF, 0x0001, 0x0002, 0x0003, 0xFFFF, 0x0004,
64 0xFFFF, 0xFFFF, 0x0005, 0x0006, 0xFFFF, 0xFFFF, 0xFFFF, 0x0007 }; 62 0xFFFF, 0xFFFF, 0x0005, 0x0006, 0xFFFF, 0xFFFF, 0xFFFF, 0x0007 };
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index 4c756bbba948..e30575e87648 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -16,7 +16,6 @@
16 * Fed through a cleanup, indent and remove of non 2.6 code by Alan Cox 16 * Fed through a cleanup, indent and remove of non 2.6 code by Alan Cox
17 * <alan@redhat.com>. The original 1.8 code is available on www.moxa.com. 17 * <alan@redhat.com>. The original 1.8 code is available on www.moxa.com.
18 * - Fixed x86_64 cleanness 18 * - Fixed x86_64 cleanness
19 * - Fixed sleep with spinlock held in mxser_send_break
20 */ 19 */
21 20
22#include <linux/module.h> 21#include <linux/module.h>
@@ -49,18 +48,12 @@
49 48
50#define MXSER_VERSION "2.0.4" /* 1.12 */ 49#define MXSER_VERSION "2.0.4" /* 1.12 */
51#define MXSERMAJOR 174 50#define MXSERMAJOR 174
52#define MXSERCUMAJOR 175
53 51
54#define MXSER_BOARDS 4 /* Max. boards */ 52#define MXSER_BOARDS 4 /* Max. boards */
55#define MXSER_PORTS_PER_BOARD 8 /* Max. ports per board */ 53#define MXSER_PORTS_PER_BOARD 8 /* Max. ports per board */
56#define MXSER_PORTS (MXSER_BOARDS * MXSER_PORTS_PER_BOARD) 54#define MXSER_PORTS (MXSER_BOARDS * MXSER_PORTS_PER_BOARD)
57#define MXSER_ISR_PASS_LIMIT 100 55#define MXSER_ISR_PASS_LIMIT 100
58 56
59#define MXSER_ERR_IOADDR -1
60#define MXSER_ERR_IRQ -2
61#define MXSER_ERR_IRQ_CONFLIT -3
62#define MXSER_ERR_VECTOR -4
63
64/*CheckIsMoxaMust return value*/ 57/*CheckIsMoxaMust return value*/
65#define MOXA_OTHER_UART 0x00 58#define MOXA_OTHER_UART 0x00
66#define MOXA_MUST_MU150_HWID 0x01 59#define MOXA_MUST_MU150_HWID 0x01
@@ -179,14 +172,15 @@ static struct pci_device_id mxser_pcibrds[] = {
179}; 172};
180MODULE_DEVICE_TABLE(pci, mxser_pcibrds); 173MODULE_DEVICE_TABLE(pci, mxser_pcibrds);
181 174
182static int ioaddr[MXSER_BOARDS] = { 0, 0, 0, 0 }; 175static unsigned long ioaddr[MXSER_BOARDS];
183static int ttymajor = MXSERMAJOR; 176static int ttymajor = MXSERMAJOR;
184 177
185/* Variables for insmod */ 178/* Variables for insmod */
186 179
187MODULE_AUTHOR("Casper Yang"); 180MODULE_AUTHOR("Casper Yang");
188MODULE_DESCRIPTION("MOXA Smartio/Industio Family Multiport Board Device Driver"); 181MODULE_DESCRIPTION("MOXA Smartio/Industio Family Multiport Board Device Driver");
189module_param_array(ioaddr, int, NULL, 0); 182module_param_array(ioaddr, ulong, NULL, 0);
183MODULE_PARM_DESC(ioaddr, "ISA io addresses to look for a moxa board");
190module_param(ttymajor, int, 0); 184module_param(ttymajor, int, 0);
191MODULE_LICENSE("GPL"); 185MODULE_LICENSE("GPL");
192 186
@@ -196,7 +190,6 @@ struct mxser_log {
196 unsigned long txcnt[MXSER_PORTS]; 190 unsigned long txcnt[MXSER_PORTS];
197}; 191};
198 192
199
200struct mxser_mon { 193struct mxser_mon {
201 unsigned long rxcnt; 194 unsigned long rxcnt;
202 unsigned long txcnt; 195 unsigned long txcnt;
@@ -287,19 +280,9 @@ struct mxser_mstatus {
287 int dcd; 280 int dcd;
288}; 281};
289 282
290static struct mxser_mstatus GMStatus[MXSER_PORTS];
291
292static int mxserBoardCAP[MXSER_BOARDS] = {
293 0, 0, 0, 0
294 /* 0x180, 0x280, 0x200, 0x320 */
295};
296
297static struct mxser_board mxser_boards[MXSER_BOARDS]; 283static struct mxser_board mxser_boards[MXSER_BOARDS];
298static struct tty_driver *mxvar_sdriver; 284static struct tty_driver *mxvar_sdriver;
299static struct mxser_log mxvar_log; 285static struct mxser_log mxvar_log;
300static int mxvar_diagflag;
301static unsigned char mxser_msr[MXSER_PORTS + 1];
302static struct mxser_mon_ext mon_data_ext;
303static int mxser_set_baud_method[MXSER_PORTS + 1]; 286static int mxser_set_baud_method[MXSER_PORTS + 1];
304 287
305static void mxser_enable_must_enchance_mode(unsigned long baseio) 288static void mxser_enable_must_enchance_mode(unsigned long baseio)
@@ -543,6 +526,7 @@ static void process_txrx_fifo(struct mxser_port *info)
543 526
544static unsigned char mxser_get_msr(int baseaddr, int mode, int port) 527static unsigned char mxser_get_msr(int baseaddr, int mode, int port)
545{ 528{
529 static unsigned char mxser_msr[MXSER_PORTS + 1];
546 unsigned char status = 0; 530 unsigned char status = 0;
547 531
548 status = inb(baseaddr + UART_MSR); 532 status = inb(baseaddr + UART_MSR);
@@ -1319,13 +1303,9 @@ static void mxser_flush_chars(struct tty_struct *tty)
1319 struct mxser_port *info = tty->driver_data; 1303 struct mxser_port *info = tty->driver_data;
1320 unsigned long flags; 1304 unsigned long flags;
1321 1305
1322 if (info->xmit_cnt <= 0 || 1306 if (info->xmit_cnt <= 0 || tty->stopped || !info->port.xmit_buf ||
1323 tty->stopped || 1307 (tty->hw_stopped && info->type != PORT_16550A &&
1324 !info->port.xmit_buf || 1308 !info->board->chip_flag))
1325 (tty->hw_stopped &&
1326 (info->type != PORT_16550A) &&
1327 (!info->board->chip_flag)
1328 ))
1329 return; 1309 return;
1330 1310
1331 spin_lock_irqsave(&info->slock, flags); 1311 spin_lock_irqsave(&info->slock, flags);
@@ -1343,9 +1323,7 @@ static int mxser_write_room(struct tty_struct *tty)
1343 int ret; 1323 int ret;
1344 1324
1345 ret = SERIAL_XMIT_SIZE - info->xmit_cnt - 1; 1325 ret = SERIAL_XMIT_SIZE - info->xmit_cnt - 1;
1346 if (ret < 0) 1326 return ret < 0 ? 0 : ret;
1347 ret = 0;
1348 return ret;
1349} 1327}
1350 1328
1351static int mxser_chars_in_buffer(struct tty_struct *tty) 1329static int mxser_chars_in_buffer(struct tty_struct *tty)
@@ -1634,6 +1612,8 @@ static int mxser_ioctl_special(unsigned int cmd, void __user *argp)
1634 1612
1635 switch (cmd) { 1613 switch (cmd) {
1636 case MOXA_GET_MAJOR: 1614 case MOXA_GET_MAJOR:
1615 printk(KERN_WARNING "mxser: '%s' uses deprecated ioctl %x, fix "
1616 "your userspace\n", current->comm, cmd);
1637 return put_user(ttymajor, (int __user *)argp); 1617 return put_user(ttymajor, (int __user *)argp);
1638 1618
1639 case MOXA_CHKPORTENABLE: 1619 case MOXA_CHKPORTENABLE:
@@ -1651,62 +1631,60 @@ static int mxser_ioctl_special(unsigned int cmd, void __user *argp)
1651 ret = -EFAULT; 1631 ret = -EFAULT;
1652 unlock_kernel(); 1632 unlock_kernel();
1653 return ret; 1633 return ret;
1654 case MOXA_GETMSTATUS: 1634 case MOXA_GETMSTATUS: {
1635 struct mxser_mstatus ms, __user *msu = argp;
1655 lock_kernel(); 1636 lock_kernel();
1656 for (i = 0; i < MXSER_BOARDS; i++) 1637 for (i = 0; i < MXSER_BOARDS; i++)
1657 for (j = 0; j < MXSER_PORTS_PER_BOARD; j++) { 1638 for (j = 0; j < MXSER_PORTS_PER_BOARD; j++) {
1658 port = &mxser_boards[i].ports[j]; 1639 port = &mxser_boards[i].ports[j];
1640 memset(&ms, 0, sizeof(ms));
1659 1641
1660 GMStatus[i].ri = 0; 1642 if (!port->ioaddr)
1661 if (!port->ioaddr) { 1643 goto copy;
1662 GMStatus[i].dcd = 0;
1663 GMStatus[i].dsr = 0;
1664 GMStatus[i].cts = 0;
1665 continue;
1666 }
1667 1644
1668 if (!port->port.tty || !port->port.tty->termios) 1645 if (!port->port.tty || !port->port.tty->termios)
1669 GMStatus[i].cflag = 1646 ms.cflag = port->normal_termios.c_cflag;
1670 port->normal_termios.c_cflag;
1671 else 1647 else
1672 GMStatus[i].cflag = 1648 ms.cflag = port->port.tty->termios->c_cflag;
1673 port->port.tty->termios->c_cflag;
1674 1649
1675 status = inb(port->ioaddr + UART_MSR); 1650 status = inb(port->ioaddr + UART_MSR);
1676 if (status & 0x80 /*UART_MSR_DCD */ ) 1651 if (status & UART_MSR_DCD)
1677 GMStatus[i].dcd = 1; 1652 ms.dcd = 1;
1678 else 1653 if (status & UART_MSR_DSR)
1679 GMStatus[i].dcd = 0; 1654 ms.dsr = 1;
1680 1655 if (status & UART_MSR_CTS)
1681 if (status & 0x20 /*UART_MSR_DSR */ ) 1656 ms.cts = 1;
1682 GMStatus[i].dsr = 1; 1657 copy:
1683 else 1658 if (copy_to_user(msu, &ms, sizeof(ms))) {
1684 GMStatus[i].dsr = 0; 1659 unlock_kernel();
1685 1660 return -EFAULT;
1686 1661 }
1687 if (status & 0x10 /*UART_MSR_CTS */ ) 1662 msu++;
1688 GMStatus[i].cts = 1;
1689 else
1690 GMStatus[i].cts = 0;
1691 } 1663 }
1692 unlock_kernel(); 1664 unlock_kernel();
1693 if (copy_to_user(argp, GMStatus,
1694 sizeof(struct mxser_mstatus) * MXSER_PORTS))
1695 return -EFAULT;
1696 return 0; 1665 return 0;
1666 }
1697 case MOXA_ASPP_MON_EXT: { 1667 case MOXA_ASPP_MON_EXT: {
1698 int p, shiftbit; 1668 struct mxser_mon_ext *me; /* it's 2k, stack unfriendly */
1699 unsigned long opmode; 1669 unsigned int cflag, iflag, p;
1700 unsigned cflag, iflag; 1670 u8 opmode;
1671
1672 me = kzalloc(sizeof(*me), GFP_KERNEL);
1673 if (!me)
1674 return -ENOMEM;
1701 1675
1702 lock_kernel(); 1676 lock_kernel();
1703 for (i = 0; i < MXSER_BOARDS; i++) { 1677 for (i = 0, p = 0; i < MXSER_BOARDS; i++) {
1704 for (j = 0; j < MXSER_PORTS_PER_BOARD; j++) { 1678 for (j = 0; j < MXSER_PORTS_PER_BOARD; j++, p++) {
1679 if (p >= ARRAY_SIZE(me->rx_cnt)) {
1680 i = MXSER_BOARDS;
1681 break;
1682 }
1705 port = &mxser_boards[i].ports[j]; 1683 port = &mxser_boards[i].ports[j];
1706 if (!port->ioaddr) 1684 if (!port->ioaddr)
1707 continue; 1685 continue;
1708 1686
1709 status = mxser_get_msr(port->ioaddr, 0, i); 1687 status = mxser_get_msr(port->ioaddr, 0, p);
1710 1688
1711 if (status & UART_MSR_TERI) 1689 if (status & UART_MSR_TERI)
1712 port->icount.rng++; 1690 port->icount.rng++;
@@ -1718,16 +1696,13 @@ static int mxser_ioctl_special(unsigned int cmd, void __user *argp)
1718 port->icount.cts++; 1696 port->icount.cts++;
1719 1697
1720 port->mon_data.modem_status = status; 1698 port->mon_data.modem_status = status;
1721 mon_data_ext.rx_cnt[i] = port->mon_data.rxcnt; 1699 me->rx_cnt[p] = port->mon_data.rxcnt;
1722 mon_data_ext.tx_cnt[i] = port->mon_data.txcnt; 1700 me->tx_cnt[p] = port->mon_data.txcnt;
1723 mon_data_ext.up_rxcnt[i] = 1701 me->up_rxcnt[p] = port->mon_data.up_rxcnt;
1724 port->mon_data.up_rxcnt; 1702 me->up_txcnt[p] = port->mon_data.up_txcnt;
1725 mon_data_ext.up_txcnt[i] = 1703 me->modem_status[p] =
1726 port->mon_data.up_txcnt;
1727 mon_data_ext.modem_status[i] =
1728 port->mon_data.modem_status; 1704 port->mon_data.modem_status;
1729 mon_data_ext.baudrate[i] = 1705 me->baudrate[p] = tty_get_baud_rate(port->port.tty);
1730 tty_get_baud_rate(port->port.tty);
1731 1706
1732 if (!port->port.tty || !port->port.tty->termios) { 1707 if (!port->port.tty || !port->port.tty->termios) {
1733 cflag = port->normal_termios.c_cflag; 1708 cflag = port->normal_termios.c_cflag;
@@ -1737,40 +1712,31 @@ static int mxser_ioctl_special(unsigned int cmd, void __user *argp)
1737 iflag = port->port.tty->termios->c_iflag; 1712 iflag = port->port.tty->termios->c_iflag;
1738 } 1713 }
1739 1714
1740 mon_data_ext.databits[i] = cflag & CSIZE; 1715 me->databits[p] = cflag & CSIZE;
1741 1716 me->stopbits[p] = cflag & CSTOPB;
1742 mon_data_ext.stopbits[i] = cflag & CSTOPB; 1717 me->parity[p] = cflag & (PARENB | PARODD |
1743 1718 CMSPAR);
1744 mon_data_ext.parity[i] =
1745 cflag & (PARENB | PARODD | CMSPAR);
1746
1747 mon_data_ext.flowctrl[i] = 0x00;
1748 1719
1749 if (cflag & CRTSCTS) 1720 if (cflag & CRTSCTS)
1750 mon_data_ext.flowctrl[i] |= 0x03; 1721 me->flowctrl[p] |= 0x03;
1751 1722
1752 if (iflag & (IXON | IXOFF)) 1723 if (iflag & (IXON | IXOFF))
1753 mon_data_ext.flowctrl[i] |= 0x0C; 1724 me->flowctrl[p] |= 0x0C;
1754 1725
1755 if (port->type == PORT_16550A) 1726 if (port->type == PORT_16550A)
1756 mon_data_ext.fifo[i] = 1; 1727 me->fifo[p] = 1;
1757 else
1758 mon_data_ext.fifo[i] = 0;
1759 1728
1760 p = i % 4; 1729 opmode = inb(port->opmode_ioaddr) >>
1761 shiftbit = p * 2; 1730 ((p % 4) * 2);
1762 opmode = inb(port->opmode_ioaddr) >> shiftbit;
1763 opmode &= OP_MODE_MASK; 1731 opmode &= OP_MODE_MASK;
1764 1732 me->iftype[p] = opmode;
1765 mon_data_ext.iftype[i] = opmode;
1766
1767 } 1733 }
1768 } 1734 }
1769 unlock_kernel(); 1735 unlock_kernel();
1770 if (copy_to_user(argp, &mon_data_ext, 1736 if (copy_to_user(argp, me, sizeof(*me)))
1771 sizeof(mon_data_ext))) 1737 ret = -EFAULT;
1772 return -EFAULT; 1738 kfree(me);
1773 return 0; 1739 return ret;
1774 } 1740 }
1775 default: 1741 default:
1776 return -ENOIOCTLCMD; 1742 return -ENOIOCTLCMD;
@@ -1804,7 +1770,6 @@ static int mxser_ioctl(struct tty_struct *tty, struct file *file,
1804{ 1770{
1805 struct mxser_port *info = tty->driver_data; 1771 struct mxser_port *info = tty->driver_data;
1806 struct async_icount cnow; 1772 struct async_icount cnow;
1807 struct serial_icounter_struct __user *p_cuser;
1808 unsigned long flags; 1773 unsigned long flags;
1809 void __user *argp = (void __user *)arg; 1774 void __user *argp = (void __user *)arg;
1810 int retval; 1775 int retval;
@@ -1884,30 +1849,26 @@ static int mxser_ioctl(struct tty_struct *tty, struct file *file,
1884 * NB: both 1->0 and 0->1 transitions are counted except for 1849 * NB: both 1->0 and 0->1 transitions are counted except for
1885 * RI where only 0->1 is counted. 1850 * RI where only 0->1 is counted.
1886 */ 1851 */
1887 case TIOCGICOUNT: 1852 case TIOCGICOUNT: {
1853 struct serial_icounter_struct icnt = { 0 };
1888 spin_lock_irqsave(&info->slock, flags); 1854 spin_lock_irqsave(&info->slock, flags);
1889 cnow = info->icount; 1855 cnow = info->icount;
1890 spin_unlock_irqrestore(&info->slock, flags); 1856 spin_unlock_irqrestore(&info->slock, flags);
1891 p_cuser = argp; 1857
1892 if (put_user(cnow.frame, &p_cuser->frame)) 1858 icnt.frame = cnow.frame;
1893 return -EFAULT; 1859 icnt.brk = cnow.brk;
1894 if (put_user(cnow.brk, &p_cuser->brk)) 1860 icnt.overrun = cnow.overrun;
1895 return -EFAULT; 1861 icnt.buf_overrun = cnow.buf_overrun;
1896 if (put_user(cnow.overrun, &p_cuser->overrun)) 1862 icnt.parity = cnow.parity;
1897 return -EFAULT; 1863 icnt.rx = cnow.rx;
1898 if (put_user(cnow.buf_overrun, &p_cuser->buf_overrun)) 1864 icnt.tx = cnow.tx;
1899 return -EFAULT; 1865 icnt.cts = cnow.cts;
1900 if (put_user(cnow.parity, &p_cuser->parity)) 1866 icnt.dsr = cnow.dsr;
1901 return -EFAULT; 1867 icnt.rng = cnow.rng;
1902 if (put_user(cnow.rx, &p_cuser->rx)) 1868 icnt.dcd = cnow.dcd;
1903 return -EFAULT; 1869
1904 if (put_user(cnow.tx, &p_cuser->tx)) 1870 return copy_to_user(argp, &icnt, sizeof(icnt)) ? -EFAULT : 0;
1905 return -EFAULT; 1871 }
1906 put_user(cnow.cts, &p_cuser->cts);
1907 put_user(cnow.dsr, &p_cuser->dsr);
1908 put_user(cnow.rng, &p_cuser->rng);
1909 put_user(cnow.dcd, &p_cuser->dcd);
1910 return 0;
1911 case MOXA_HighSpeedOn: 1872 case MOXA_HighSpeedOn:
1912 return put_user(info->baud_base != 115200 ? 1 : 0, (int __user *)argp); 1873 return put_user(info->baud_base != 115200 ? 1 : 0, (int __user *)argp);
1913 case MOXA_SDS_RSTICOUNTER: 1874 case MOXA_SDS_RSTICOUNTER:
@@ -2503,7 +2464,8 @@ static int __devinit mxser_initbrd(struct mxser_board *brd,
2503 unsigned int i; 2464 unsigned int i;
2504 int retval; 2465 int retval;
2505 2466
2506 printk(KERN_INFO "max. baud rate = %d bps.\n", brd->ports[0].max_baud); 2467 printk(KERN_INFO "mxser: max. baud rate = %d bps\n",
2468 brd->ports[0].max_baud);
2507 2469
2508 for (i = 0; i < brd->info->nports; i++) { 2470 for (i = 0; i < brd->info->nports; i++) {
2509 info = &brd->ports[i]; 2471 info = &brd->ports[i];
@@ -2586,28 +2548,32 @@ static int __init mxser_get_ISA_conf(int cap, struct mxser_board *brd)
2586 irq = regs[9] & 0xF000; 2548 irq = regs[9] & 0xF000;
2587 irq = irq | (irq >> 4); 2549 irq = irq | (irq >> 4);
2588 if (irq != (regs[9] & 0xFF00)) 2550 if (irq != (regs[9] & 0xFF00))
2589 return MXSER_ERR_IRQ_CONFLIT; 2551 goto err_irqconflict;
2590 } else if (brd->info->nports == 4) { 2552 } else if (brd->info->nports == 4) {
2591 irq = regs[9] & 0xF000; 2553 irq = regs[9] & 0xF000;
2592 irq = irq | (irq >> 4); 2554 irq = irq | (irq >> 4);
2593 irq = irq | (irq >> 8); 2555 irq = irq | (irq >> 8);
2594 if (irq != regs[9]) 2556 if (irq != regs[9])
2595 return MXSER_ERR_IRQ_CONFLIT; 2557 goto err_irqconflict;
2596 } else if (brd->info->nports == 8) { 2558 } else if (brd->info->nports == 8) {
2597 irq = regs[9] & 0xF000; 2559 irq = regs[9] & 0xF000;
2598 irq = irq | (irq >> 4); 2560 irq = irq | (irq >> 4);
2599 irq = irq | (irq >> 8); 2561 irq = irq | (irq >> 8);
2600 if ((irq != regs[9]) || (irq != regs[10])) 2562 if ((irq != regs[9]) || (irq != regs[10]))
2601 return MXSER_ERR_IRQ_CONFLIT; 2563 goto err_irqconflict;
2602 } 2564 }
2603 2565
2604 if (!irq) 2566 if (!irq) {
2605 return MXSER_ERR_IRQ; 2567 printk(KERN_ERR "mxser: interrupt number unset\n");
2568 return -EIO;
2569 }
2606 brd->irq = ((int)(irq & 0xF000) >> 12); 2570 brd->irq = ((int)(irq & 0xF000) >> 12);
2607 for (i = 0; i < 8; i++) 2571 for (i = 0; i < 8; i++)
2608 brd->ports[i].ioaddr = (int) regs[i + 1] & 0xFFF8; 2572 brd->ports[i].ioaddr = (int) regs[i + 1] & 0xFFF8;
2609 if ((regs[12] & 0x80) == 0) 2573 if ((regs[12] & 0x80) == 0) {
2610 return MXSER_ERR_VECTOR; 2574 printk(KERN_ERR "mxser: invalid interrupt vector\n");
2575 return -EIO;
2576 }
2611 brd->vector = (int)regs[11]; /* interrupt vector */ 2577 brd->vector = (int)regs[11]; /* interrupt vector */
2612 if (id == 1) 2578 if (id == 1)
2613 brd->vector_mask = 0x00FF; 2579 brd->vector_mask = 0x00FF;
@@ -2634,13 +2600,26 @@ static int __init mxser_get_ISA_conf(int cap, struct mxser_board *brd)
2634 else 2600 else
2635 brd->uart_type = PORT_16450; 2601 brd->uart_type = PORT_16450;
2636 if (!request_region(brd->ports[0].ioaddr, 8 * brd->info->nports, 2602 if (!request_region(brd->ports[0].ioaddr, 8 * brd->info->nports,
2637 "mxser(IO)")) 2603 "mxser(IO)")) {
2638 return MXSER_ERR_IOADDR; 2604 printk(KERN_ERR "mxser: can't request ports I/O region: "
2605 "0x%.8lx-0x%.8lx\n",
2606 brd->ports[0].ioaddr, brd->ports[0].ioaddr +
2607 8 * brd->info->nports - 1);
2608 return -EIO;
2609 }
2639 if (!request_region(brd->vector, 1, "mxser(vector)")) { 2610 if (!request_region(brd->vector, 1, "mxser(vector)")) {
2640 release_region(brd->ports[0].ioaddr, 8 * brd->info->nports); 2611 release_region(brd->ports[0].ioaddr, 8 * brd->info->nports);
2641 return MXSER_ERR_VECTOR; 2612 printk(KERN_ERR "mxser: can't request interrupt vector region: "
2613 "0x%.8lx-0x%.8lx\n",
2614 brd->ports[0].ioaddr, brd->ports[0].ioaddr +
2615 8 * brd->info->nports - 1);
2616 return -EIO;
2642 } 2617 }
2643 return brd->info->nports; 2618 return brd->info->nports;
2619
2620err_irqconflict:
2621 printk(KERN_ERR "mxser: invalid interrupt number\n");
2622 return -EIO;
2644} 2623}
2645 2624
2646static int __devinit mxser_probe(struct pci_dev *pdev, 2625static int __devinit mxser_probe(struct pci_dev *pdev,
@@ -2657,20 +2636,20 @@ static int __devinit mxser_probe(struct pci_dev *pdev,
2657 break; 2636 break;
2658 2637
2659 if (i >= MXSER_BOARDS) { 2638 if (i >= MXSER_BOARDS) {
2660 printk(KERN_ERR "Too many Smartio/Industio family boards found " 2639 dev_err(&pdev->dev, "too many boards found (maximum %d), board "
2661 "(maximum %d), board not configured\n", MXSER_BOARDS); 2640 "not configured\n", MXSER_BOARDS);
2662 goto err; 2641 goto err;
2663 } 2642 }
2664 2643
2665 brd = &mxser_boards[i]; 2644 brd = &mxser_boards[i];
2666 brd->idx = i * MXSER_PORTS_PER_BOARD; 2645 brd->idx = i * MXSER_PORTS_PER_BOARD;
2667 printk(KERN_INFO "Found MOXA %s board (BusNo=%d, DevNo=%d)\n", 2646 dev_info(&pdev->dev, "found MOXA %s board (BusNo=%d, DevNo=%d)\n",
2668 mxser_cards[ent->driver_data].name, 2647 mxser_cards[ent->driver_data].name,
2669 pdev->bus->number, PCI_SLOT(pdev->devfn)); 2648 pdev->bus->number, PCI_SLOT(pdev->devfn));
2670 2649
2671 retval = pci_enable_device(pdev); 2650 retval = pci_enable_device(pdev);
2672 if (retval) { 2651 if (retval) {
2673 printk(KERN_ERR "Moxa SmartI/O PCI enable fail !\n"); 2652 dev_err(&pdev->dev, "PCI enable failed\n");
2674 goto err; 2653 goto err;
2675 } 2654 }
2676 2655
@@ -2772,11 +2751,8 @@ static struct pci_driver mxser_driver = {
2772static int __init mxser_module_init(void) 2751static int __init mxser_module_init(void)
2773{ 2752{
2774 struct mxser_board *brd; 2753 struct mxser_board *brd;
2775 unsigned long cap; 2754 unsigned int b, i, m;
2776 unsigned int i, m, isaloop; 2755 int retval;
2777 int retval, b;
2778
2779 pr_debug("Loading module mxser ...\n");
2780 2756
2781 mxvar_sdriver = alloc_tty_driver(MXSER_PORTS + 1); 2757 mxvar_sdriver = alloc_tty_driver(MXSER_PORTS + 1);
2782 if (!mxvar_sdriver) 2758 if (!mxvar_sdriver)
@@ -2806,74 +2782,43 @@ static int __init mxser_module_init(void)
2806 goto err_put; 2782 goto err_put;
2807 } 2783 }
2808 2784
2809 mxvar_diagflag = 0;
2810
2811 m = 0;
2812 /* Start finding ISA boards here */ 2785 /* Start finding ISA boards here */
2813 for (isaloop = 0; isaloop < 2; isaloop++) 2786 for (m = 0, b = 0; b < MXSER_BOARDS; b++) {
2814 for (b = 0; b < MXSER_BOARDS && m < MXSER_BOARDS; b++) { 2787 if (!ioaddr[b])
2815 if (!isaloop) 2788 continue;
2816 cap = mxserBoardCAP[b]; /* predefined */ 2789
2817 else 2790 brd = &mxser_boards[m];
2818 cap = ioaddr[b]; /* module param */ 2791 retval = mxser_get_ISA_conf(!ioaddr[b], brd);
2819 2792 if (retval <= 0) {
2820 if (!cap) 2793 brd->info = NULL;
2821 continue; 2794 continue;
2795 }
2822 2796
2823 brd = &mxser_boards[m]; 2797 printk(KERN_INFO "mxser: found MOXA %s board (CAP=0x%lx)\n",
2824 retval = mxser_get_ISA_conf(cap, brd); 2798 brd->info->name, ioaddr[b]);
2825
2826 if (retval != 0)
2827 printk(KERN_INFO "Found MOXA %s board "
2828 "(CAP=0x%x)\n",
2829 brd->info->name, ioaddr[b]);
2830
2831 if (retval <= 0) {
2832 if (retval == MXSER_ERR_IRQ)
2833 printk(KERN_ERR "Invalid interrupt "
2834 "number, board not "
2835 "configured\n");
2836 else if (retval == MXSER_ERR_IRQ_CONFLIT)
2837 printk(KERN_ERR "Invalid interrupt "
2838 "number, board not "
2839 "configured\n");
2840 else if (retval == MXSER_ERR_VECTOR)
2841 printk(KERN_ERR "Invalid interrupt "
2842 "vector, board not "
2843 "configured\n");
2844 else if (retval == MXSER_ERR_IOADDR)
2845 printk(KERN_ERR "Invalid I/O address, "
2846 "board not configured\n");
2847
2848 brd->info = NULL;
2849 continue;
2850 }
2851 2799
2852 /* mxser_initbrd will hook ISR. */ 2800 /* mxser_initbrd will hook ISR. */
2853 if (mxser_initbrd(brd, NULL) < 0) { 2801 if (mxser_initbrd(brd, NULL) < 0) {
2854 brd->info = NULL; 2802 brd->info = NULL;
2855 continue; 2803 continue;
2856 } 2804 }
2857 2805
2858 brd->idx = m * MXSER_PORTS_PER_BOARD; 2806 brd->idx = m * MXSER_PORTS_PER_BOARD;
2859 for (i = 0; i < brd->info->nports; i++) 2807 for (i = 0; i < brd->info->nports; i++)
2860 tty_register_device(mxvar_sdriver, brd->idx + i, 2808 tty_register_device(mxvar_sdriver, brd->idx + i, NULL);
2861 NULL);
2862 2809
2863 m++; 2810 m++;
2864 } 2811 }
2865 2812
2866 retval = pci_register_driver(&mxser_driver); 2813 retval = pci_register_driver(&mxser_driver);
2867 if (retval) { 2814 if (retval) {
2868 printk(KERN_ERR "Can't register pci driver\n"); 2815 printk(KERN_ERR "mxser: can't register pci driver\n");
2869 if (!m) { 2816 if (!m) {
2870 retval = -ENODEV; 2817 retval = -ENODEV;
2871 goto err_unr; 2818 goto err_unr;
2872 } /* else: we have some ISA cards under control */ 2819 } /* else: we have some ISA cards under control */
2873 } 2820 }
2874 2821
2875 pr_debug("Done.\n");
2876
2877 return 0; 2822 return 0;
2878err_unr: 2823err_unr:
2879 tty_unregister_driver(mxvar_sdriver); 2824 tty_unregister_driver(mxvar_sdriver);
@@ -2886,8 +2831,6 @@ static void __exit mxser_module_exit(void)
2886{ 2831{
2887 unsigned int i, j; 2832 unsigned int i, j;
2888 2833
2889 pr_debug("Unloading module mxser ...\n");
2890
2891 pci_unregister_driver(&mxser_driver); 2834 pci_unregister_driver(&mxser_driver);
2892 2835
2893 for (i = 0; i < MXSER_BOARDS; i++) /* ISA remains */ 2836 for (i = 0; i < MXSER_BOARDS; i++) /* ISA remains */
@@ -2901,8 +2844,6 @@ static void __exit mxser_module_exit(void)
2901 for (i = 0; i < MXSER_BOARDS; i++) 2844 for (i = 0; i < MXSER_BOARDS; i++)
2902 if (mxser_boards[i].info != NULL) 2845 if (mxser_boards[i].info != NULL)
2903 mxser_release_res(&mxser_boards[i], NULL, 1); 2846 mxser_release_res(&mxser_boards[i], NULL, 1);
2904
2905 pr_debug("Done.\n");
2906} 2847}
2907 2848
2908module_init(mxser_module_init); 2849module_init(mxser_module_init);
diff --git a/drivers/char/nwflash.c b/drivers/char/nwflash.c
index ba012c2bdf7a..f9f72a211292 100644
--- a/drivers/char/nwflash.c
+++ b/drivers/char/nwflash.c
@@ -122,35 +122,20 @@ static int flash_ioctl(struct inode *inodep, struct file *filep, unsigned int cm
122static ssize_t flash_read(struct file *file, char __user *buf, size_t size, 122static ssize_t flash_read(struct file *file, char __user *buf, size_t size,
123 loff_t *ppos) 123 loff_t *ppos)
124{ 124{
125 unsigned long p = *ppos; 125 ssize_t ret;
126 unsigned int count = size;
127 int ret = 0;
128 126
129 if (flashdebug) 127 if (flashdebug)
130 printk(KERN_DEBUG "flash_read: flash_read: offset=0x%lX, " 128 printk(KERN_DEBUG "flash_read: flash_read: offset=0x%lX, "
131 "buffer=%p, count=0x%X.\n", p, buf, count); 129 "buffer=%p, count=0x%X.\n", p, buf, count);
130 /*
131 * We now lock against reads and writes. --rmk
132 */
133 if (mutex_lock_interruptible(&nwflash_mutex))
134 return -ERESTARTSYS;
132 135
133 if (count) 136 ret = simple_read_from_buffer(buf, size, ppos, FLASH_BASE, gbFlashSize);
134 ret = -ENXIO; 137 mutex_unlock(&nwflash_mutex);
135
136 if (p < gbFlashSize) {
137 if (count > gbFlashSize - p)
138 count = gbFlashSize - p;
139 138
140 /*
141 * We now lock against reads and writes. --rmk
142 */
143 if (mutex_lock_interruptible(&nwflash_mutex))
144 return -ERESTARTSYS;
145
146 ret = copy_to_user(buf, (void *)(FLASH_BASE + p), count);
147 if (ret == 0) {
148 ret = count;
149 *ppos += count;
150 } else
151 ret = -EFAULT;
152 mutex_unlock(&nwflash_mutex);
153 }
154 return ret; 139 return ret;
155} 140}
156 141
diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c
index 7af7a7e6b9c2..bee39fdfba73 100644
--- a/drivers/char/ppdev.c
+++ b/drivers/char/ppdev.c
@@ -67,7 +67,7 @@
67#include <linux/major.h> 67#include <linux/major.h>
68#include <linux/ppdev.h> 68#include <linux/ppdev.h>
69#include <linux/smp_lock.h> 69#include <linux/smp_lock.h>
70#include <asm/uaccess.h> 70#include <linux/uaccess.h>
71 71
72#define PP_VERSION "ppdev: user-space parallel port driver" 72#define PP_VERSION "ppdev: user-space parallel port driver"
73#define CHRDEV "ppdev" 73#define CHRDEV "ppdev"
@@ -328,10 +328,9 @@ static enum ieee1284_phase init_phase (int mode)
328 return IEEE1284_PH_FWD_IDLE; 328 return IEEE1284_PH_FWD_IDLE;
329} 329}
330 330
331static int pp_ioctl(struct inode *inode, struct file *file, 331static int pp_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
332 unsigned int cmd, unsigned long arg)
333{ 332{
334 unsigned int minor = iminor(inode); 333 unsigned int minor = iminor(file->f_path.dentry->d_inode);
335 struct pp_struct *pp = file->private_data; 334 struct pp_struct *pp = file->private_data;
336 struct parport * port; 335 struct parport * port;
337 void __user *argp = (void __user *)arg; 336 void __user *argp = (void __user *)arg;
@@ -634,6 +633,15 @@ static int pp_ioctl(struct inode *inode, struct file *file,
634 return 0; 633 return 0;
635} 634}
636 635
636static long pp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
637{
638 long ret;
639 lock_kernel();
640 ret = pp_do_ioctl(file, cmd, arg);
641 unlock_kernel();
642 return ret;
643}
644
637static int pp_open (struct inode * inode, struct file * file) 645static int pp_open (struct inode * inode, struct file * file)
638{ 646{
639 unsigned int minor = iminor(inode); 647 unsigned int minor = iminor(inode);
@@ -745,7 +753,7 @@ static const struct file_operations pp_fops = {
745 .read = pp_read, 753 .read = pp_read,
746 .write = pp_write, 754 .write = pp_write,
747 .poll = pp_poll, 755 .poll = pp_poll,
748 .ioctl = pp_ioctl, 756 .unlocked_ioctl = pp_ioctl,
749 .open = pp_open, 757 .open = pp_open,
750 .release = pp_release, 758 .release = pp_release,
751}; 759};
diff --git a/drivers/char/rio/rio_linux.c b/drivers/char/rio/rio_linux.c
index 0cdfee152916..a8f68a3f14dd 100644
--- a/drivers/char/rio/rio_linux.c
+++ b/drivers/char/rio/rio_linux.c
@@ -179,7 +179,7 @@ static int rio_set_real_termios(void *ptr);
179static void rio_hungup(void *ptr); 179static void rio_hungup(void *ptr);
180static void rio_close(void *ptr); 180static void rio_close(void *ptr);
181static int rio_chars_in_buffer(void *ptr); 181static int rio_chars_in_buffer(void *ptr);
182static int rio_fw_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); 182static long rio_fw_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
183static int rio_init_drivers(void); 183static int rio_init_drivers(void);
184 184
185static void my_hd(void *addr, int len); 185static void my_hd(void *addr, int len);
@@ -240,7 +240,7 @@ static struct real_driver rio_real_driver = {
240 240
241static const struct file_operations rio_fw_fops = { 241static const struct file_operations rio_fw_fops = {
242 .owner = THIS_MODULE, 242 .owner = THIS_MODULE,
243 .ioctl = rio_fw_ioctl, 243 .unlocked_ioctl = rio_fw_ioctl,
244}; 244};
245 245
246static struct miscdevice rio_fw_device = { 246static struct miscdevice rio_fw_device = {
@@ -560,13 +560,15 @@ static void rio_close(void *ptr)
560 560
561 561
562 562
563static int rio_fw_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) 563static long rio_fw_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
564{ 564{
565 int rc = 0; 565 int rc = 0;
566 func_enter(); 566 func_enter();
567 567
568 /* The "dev" argument isn't used. */ 568 /* The "dev" argument isn't used. */
569 lock_kernel();
569 rc = riocontrol(p, 0, cmd, arg, capable(CAP_SYS_ADMIN)); 570 rc = riocontrol(p, 0, cmd, arg, capable(CAP_SYS_ADMIN));
571 unlock_kernel();
570 572
571 func_exit(); 573 func_exit();
572 return rc; 574 return rc;
diff --git a/drivers/char/sx.c b/drivers/char/sx.c
index 2162439bbe48..c385206f9db5 100644
--- a/drivers/char/sx.c
+++ b/drivers/char/sx.c
@@ -286,8 +286,8 @@ static void sx_close(void *ptr);
286static int sx_chars_in_buffer(void *ptr); 286static int sx_chars_in_buffer(void *ptr);
287static int sx_init_board(struct sx_board *board); 287static int sx_init_board(struct sx_board *board);
288static int sx_init_portstructs(int nboards, int nports); 288static int sx_init_portstructs(int nboards, int nports);
289static int sx_fw_ioctl(struct inode *inode, struct file *filp, 289static long sx_fw_ioctl(struct file *filp, unsigned int cmd,
290 unsigned int cmd, unsigned long arg); 290 unsigned long arg);
291static int sx_init_drivers(void); 291static int sx_init_drivers(void);
292 292
293static struct tty_driver *sx_driver; 293static struct tty_driver *sx_driver;
@@ -396,7 +396,7 @@ static struct real_driver sx_real_driver = {
396 396
397static const struct file_operations sx_fw_fops = { 397static const struct file_operations sx_fw_fops = {
398 .owner = THIS_MODULE, 398 .owner = THIS_MODULE,
399 .ioctl = sx_fw_ioctl, 399 .unlocked_ioctl = sx_fw_ioctl,
400}; 400};
401 401
402static struct miscdevice sx_fw_device = { 402static struct miscdevice sx_fw_device = {
@@ -1686,10 +1686,10 @@ static int do_memtest_w(struct sx_board *board, int min, int max)
1686} 1686}
1687#endif 1687#endif
1688 1688
1689static int sx_fw_ioctl(struct inode *inode, struct file *filp, 1689static long sx_fw_ioctl(struct file *filp, unsigned int cmd,
1690 unsigned int cmd, unsigned long arg) 1690 unsigned long arg)
1691{ 1691{
1692 int rc = 0; 1692 long rc = 0;
1693 int __user *descr = (int __user *)arg; 1693 int __user *descr = (int __user *)arg;
1694 int i; 1694 int i;
1695 static struct sx_board *board = NULL; 1695 static struct sx_board *board = NULL;
@@ -1699,13 +1699,10 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp,
1699 1699
1700 func_enter(); 1700 func_enter();
1701 1701
1702#if 0 1702 if (!capable(CAP_SYS_RAWIO))
1703 /* Removed superuser check: Sysops can use the permissions on the device
1704 file to restrict access. Recommendation: Root only. (root.root 600) */
1705 if (!capable(CAP_SYS_ADMIN)) {
1706 return -EPERM; 1703 return -EPERM;
1707 } 1704
1708#endif 1705 lock_kernel();
1709 1706
1710 sx_dprintk(SX_DEBUG_FIRMWARE, "IOCTL %x: %lx\n", cmd, arg); 1707 sx_dprintk(SX_DEBUG_FIRMWARE, "IOCTL %x: %lx\n", cmd, arg);
1711 1708
@@ -1720,19 +1717,23 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp,
1720 for (i = 0; i < SX_NBOARDS; i++) 1717 for (i = 0; i < SX_NBOARDS; i++)
1721 sx_dprintk(SX_DEBUG_FIRMWARE, "<%x> ", boards[i].flags); 1718 sx_dprintk(SX_DEBUG_FIRMWARE, "<%x> ", boards[i].flags);
1722 sx_dprintk(SX_DEBUG_FIRMWARE, "\n"); 1719 sx_dprintk(SX_DEBUG_FIRMWARE, "\n");
1720 unlock_kernel();
1723 return -EIO; 1721 return -EIO;
1724 } 1722 }
1725 1723
1726 switch (cmd) { 1724 switch (cmd) {
1727 case SXIO_SET_BOARD: 1725 case SXIO_SET_BOARD:
1728 sx_dprintk(SX_DEBUG_FIRMWARE, "set board to %ld\n", arg); 1726 sx_dprintk(SX_DEBUG_FIRMWARE, "set board to %ld\n", arg);
1727 rc = -EIO;
1729 if (arg >= SX_NBOARDS) 1728 if (arg >= SX_NBOARDS)
1730 return -EIO; 1729 break;
1731 sx_dprintk(SX_DEBUG_FIRMWARE, "not out of range\n"); 1730 sx_dprintk(SX_DEBUG_FIRMWARE, "not out of range\n");
1732 if (!(boards[arg].flags & SX_BOARD_PRESENT)) 1731 if (!(boards[arg].flags & SX_BOARD_PRESENT))
1733 return -EIO; 1732 break;
1734 sx_dprintk(SX_DEBUG_FIRMWARE, ".. and present!\n"); 1733 sx_dprintk(SX_DEBUG_FIRMWARE, ".. and present!\n");
1735 board = &boards[arg]; 1734 board = &boards[arg];
1735 rc = 0;
1736 /* FIXME: And this does ... nothing?? */
1736 break; 1737 break;
1737 case SXIO_GET_TYPE: 1738 case SXIO_GET_TYPE:
1738 rc = -ENOENT; /* If we manage to miss one, return error. */ 1739 rc = -ENOENT; /* If we manage to miss one, return error. */
@@ -1746,7 +1747,7 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp,
1746 rc = SX_TYPE_SI; 1747 rc = SX_TYPE_SI;
1747 if (IS_EISA_BOARD(board)) 1748 if (IS_EISA_BOARD(board))
1748 rc = SX_TYPE_SI; 1749 rc = SX_TYPE_SI;
1749 sx_dprintk(SX_DEBUG_FIRMWARE, "returning type= %d\n", rc); 1750 sx_dprintk(SX_DEBUG_FIRMWARE, "returning type= %ld\n", rc);
1750 break; 1751 break;
1751 case SXIO_DO_RAMTEST: 1752 case SXIO_DO_RAMTEST:
1752 if (sx_initialized) /* Already initialized: better not ramtest the board. */ 1753 if (sx_initialized) /* Already initialized: better not ramtest the board. */
@@ -1760,19 +1761,26 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp,
1760 rc = do_memtest(board, 0, 0x7ff8); 1761 rc = do_memtest(board, 0, 0x7ff8);
1761 /* if (!rc) rc = do_memtest_w (board, 0, 0x7ff8); */ 1762 /* if (!rc) rc = do_memtest_w (board, 0, 0x7ff8); */
1762 } 1763 }
1763 sx_dprintk(SX_DEBUG_FIRMWARE, "returning memtest result= %d\n", 1764 sx_dprintk(SX_DEBUG_FIRMWARE,
1764 rc); 1765 "returning memtest result= %ld\n", rc);
1765 break; 1766 break;
1766 case SXIO_DOWNLOAD: 1767 case SXIO_DOWNLOAD:
1767 if (sx_initialized) /* Already initialized */ 1768 if (sx_initialized) {/* Already initialized */
1768 return -EEXIST; 1769 rc = -EEXIST;
1769 if (!sx_reset(board)) 1770 break;
1770 return -EIO; 1771 }
1772 if (!sx_reset(board)) {
1773 rc = -EIO;
1774 break;
1775 }
1771 sx_dprintk(SX_DEBUG_INIT, "reset the board...\n"); 1776 sx_dprintk(SX_DEBUG_INIT, "reset the board...\n");
1772 1777
1773 tmp = kmalloc(SX_CHUNK_SIZE, GFP_USER); 1778 tmp = kmalloc(SX_CHUNK_SIZE, GFP_USER);
1774 if (!tmp) 1779 if (!tmp) {
1775 return -ENOMEM; 1780 rc = -ENOMEM;
1781 break;
1782 }
1783 /* FIXME: check returns */
1776 get_user(nbytes, descr++); 1784 get_user(nbytes, descr++);
1777 get_user(offset, descr++); 1785 get_user(offset, descr++);
1778 get_user(data, descr++); 1786 get_user(data, descr++);
@@ -1782,7 +1790,8 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp,
1782 (i + SX_CHUNK_SIZE > nbytes) ? 1790 (i + SX_CHUNK_SIZE > nbytes) ?
1783 nbytes - i : SX_CHUNK_SIZE)) { 1791 nbytes - i : SX_CHUNK_SIZE)) {
1784 kfree(tmp); 1792 kfree(tmp);
1785 return -EFAULT; 1793 rc = -EFAULT;
1794 break;
1786 } 1795 }
1787 memcpy_toio(board->base2 + offset + i, tmp, 1796 memcpy_toio(board->base2 + offset + i, tmp,
1788 (i + SX_CHUNK_SIZE > nbytes) ? 1797 (i + SX_CHUNK_SIZE > nbytes) ?
@@ -1798,13 +1807,17 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp,
1798 rc = sx_nports; 1807 rc = sx_nports;
1799 break; 1808 break;
1800 case SXIO_INIT: 1809 case SXIO_INIT:
1801 if (sx_initialized) /* Already initialized */ 1810 if (sx_initialized) { /* Already initialized */
1802 return -EEXIST; 1811 rc = -EEXIST;
1812 break;
1813 }
1803 /* This is not allowed until all boards are initialized... */ 1814 /* This is not allowed until all boards are initialized... */
1804 for (i = 0; i < SX_NBOARDS; i++) { 1815 for (i = 0; i < SX_NBOARDS; i++) {
1805 if ((boards[i].flags & SX_BOARD_PRESENT) && 1816 if ((boards[i].flags & SX_BOARD_PRESENT) &&
1806 !(boards[i].flags & SX_BOARD_INITIALIZED)) 1817 !(boards[i].flags & SX_BOARD_INITIALIZED)) {
1807 return -EIO; 1818 rc = -EIO;
1819 break;
1820 }
1808 } 1821 }
1809 for (i = 0; i < SX_NBOARDS; i++) 1822 for (i = 0; i < SX_NBOARDS; i++)
1810 if (!(boards[i].flags & SX_BOARD_PRESENT)) 1823 if (!(boards[i].flags & SX_BOARD_PRESENT))
@@ -1832,10 +1845,10 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp,
1832 rc = sx_nports; 1845 rc = sx_nports;
1833 break; 1846 break;
1834 default: 1847 default:
1835 printk(KERN_WARNING "Unknown ioctl on firmware device (%x).\n", 1848 rc = -ENOTTY;
1836 cmd);
1837 break; 1849 break;
1838 } 1850 }
1851 unlock_kernel();
1839 func_exit(); 1852 func_exit();
1840 return rc; 1853 return rc;
1841} 1854}
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 6f4d856df987..e1b46bc7e43c 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -3580,7 +3580,6 @@ void proc_clear_tty(struct task_struct *p)
3580 p->signal->tty = NULL; 3580 p->signal->tty = NULL;
3581 spin_unlock_irq(&p->sighand->siglock); 3581 spin_unlock_irq(&p->sighand->siglock);
3582} 3582}
3583EXPORT_SYMBOL(proc_clear_tty);
3584 3583
3585/* Called under the sighand lock */ 3584/* Called under the sighand lock */
3586 3585
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index dc17fe3a88bc..d0f4eb6fdb7f 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -46,6 +46,9 @@ static char *in, *inbuf;
46/* The operations for our console. */ 46/* The operations for our console. */
47static struct hv_ops virtio_cons; 47static struct hv_ops virtio_cons;
48 48
49/* The hvc device */
50static struct hvc_struct *hvc;
51
49/*D:310 The put_chars() callback is pretty straightforward. 52/*D:310 The put_chars() callback is pretty straightforward.
50 * 53 *
51 * We turn the characters into a scatter-gather list, add it to the output 54 * We turn the characters into a scatter-gather list, add it to the output
@@ -134,6 +137,27 @@ int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int))
134 return hvc_instantiate(0, 0, &virtio_cons); 137 return hvc_instantiate(0, 0, &virtio_cons);
135} 138}
136 139
140/*
141 * we support only one console, the hvc struct is a global var
142 * There is no need to do anything
143 */
144static int notifier_add_vio(struct hvc_struct *hp, int data)
145{
146 hp->irq_requested = 1;
147 return 0;
148}
149
150static void notifier_del_vio(struct hvc_struct *hp, int data)
151{
152 hp->irq_requested = 0;
153}
154
155static void hvc_handle_input(struct virtqueue *vq)
156{
157 if (hvc_poll(hvc))
158 hvc_kick();
159}
160
137/*D:370 Once we're further in boot, we get probed like any other virtio device. 161/*D:370 Once we're further in boot, we get probed like any other virtio device.
138 * At this stage we set up the output virtqueue. 162 * At this stage we set up the output virtqueue.
139 * 163 *
@@ -144,7 +168,6 @@ int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int))
144static int __devinit virtcons_probe(struct virtio_device *dev) 168static int __devinit virtcons_probe(struct virtio_device *dev)
145{ 169{
146 int err; 170 int err;
147 struct hvc_struct *hvc;
148 171
149 vdev = dev; 172 vdev = dev;
150 173
@@ -158,7 +181,7 @@ static int __devinit virtcons_probe(struct virtio_device *dev)
158 /* Find the input queue. */ 181 /* Find the input queue. */
159 /* FIXME: This is why we want to wean off hvc: we do nothing 182 /* FIXME: This is why we want to wean off hvc: we do nothing
160 * when input comes in. */ 183 * when input comes in. */
161 in_vq = vdev->config->find_vq(vdev, 0, NULL); 184 in_vq = vdev->config->find_vq(vdev, 0, hvc_handle_input);
162 if (IS_ERR(in_vq)) { 185 if (IS_ERR(in_vq)) {
163 err = PTR_ERR(in_vq); 186 err = PTR_ERR(in_vq);
164 goto free; 187 goto free;
@@ -173,15 +196,18 @@ static int __devinit virtcons_probe(struct virtio_device *dev)
173 /* Start using the new console output. */ 196 /* Start using the new console output. */
174 virtio_cons.get_chars = get_chars; 197 virtio_cons.get_chars = get_chars;
175 virtio_cons.put_chars = put_chars; 198 virtio_cons.put_chars = put_chars;
199 virtio_cons.notifier_add = notifier_add_vio;
200 virtio_cons.notifier_del = notifier_del_vio;
176 201
177 /* The first argument of hvc_alloc() is the virtual console number, so 202 /* The first argument of hvc_alloc() is the virtual console number, so
178 * we use zero. The second argument is the interrupt number; we 203 * we use zero. The second argument is the parameter for the
179 * currently leave this as zero: it would be better not to use the 204 * notification mechanism (like irq number). We currently leave this
180 * hvc mechanism and fix this (FIXME!). 205 * as zero, virtqueues have implicit notifications.
181 * 206 *
182 * The third argument is a "struct hv_ops" containing the put_chars() 207 * The third argument is a "struct hv_ops" containing the put_chars()
183 * and get_chars() pointers. The final argument is the output buffer 208 * get_chars(), notifier_add() and notifier_del() pointers.
184 * size: we can do any size, so we put PAGE_SIZE here. */ 209 * The final argument is the output buffer size: we can do any size,
210 * so we put PAGE_SIZE here. */
185 hvc = hvc_alloc(0, 0, &virtio_cons, PAGE_SIZE); 211 hvc = hvc_alloc(0, 0, &virtio_cons, PAGE_SIZE);
186 if (IS_ERR(hvc)) { 212 if (IS_ERR(hvc)) {
187 err = PTR_ERR(hvc); 213 err = PTR_ERR(hvc);
diff --git a/drivers/char/xilinx_hwicap/xilinx_hwicap.c b/drivers/char/xilinx_hwicap/xilinx_hwicap.c
index 51966ccf4ea3..8bfee5fb7223 100644
--- a/drivers/char/xilinx_hwicap/xilinx_hwicap.c
+++ b/drivers/char/xilinx_hwicap/xilinx_hwicap.c
@@ -87,7 +87,6 @@
87#include <linux/mutex.h> 87#include <linux/mutex.h>
88#include <linux/smp_lock.h> 88#include <linux/smp_lock.h>
89#include <linux/sysctl.h> 89#include <linux/sysctl.h>
90#include <linux/version.h>
91#include <linux/fs.h> 90#include <linux/fs.h>
92#include <linux/cdev.h> 91#include <linux/cdev.h>
93#include <linux/platform_device.h> 92#include <linux/platform_device.h>
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 6e6c3c4aea6b..5a11e3cbcae2 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -123,6 +123,13 @@ config EDAC_I5000
123 Support for error detection and correction the Intel 123 Support for error detection and correction the Intel
124 Greekcreek/Blackford chipsets. 124 Greekcreek/Blackford chipsets.
125 125
126config EDAC_I5100
127 tristate "Intel San Clemente MCH"
128 depends on EDAC_MM_EDAC && X86 && PCI
129 help
130 Support for error detection and correction the Intel
131 San Clemente MCH.
132
126config EDAC_MPC85XX 133config EDAC_MPC85XX
127 tristate "Freescale MPC85xx" 134 tristate "Freescale MPC85xx"
128 depends on EDAC_MM_EDAC && FSL_SOC && MPC85xx 135 depends on EDAC_MM_EDAC && FSL_SOC && MPC85xx
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 83807731d4a9..e5e9104b5520 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -19,6 +19,7 @@ endif
19 19
20obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o 20obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o
21obj-$(CONFIG_EDAC_I5000) += i5000_edac.o 21obj-$(CONFIG_EDAC_I5000) += i5000_edac.o
22obj-$(CONFIG_EDAC_I5100) += i5100_edac.o
22obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o 23obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o
23obj-$(CONFIG_EDAC_E752X) += e752x_edac.o 24obj-$(CONFIG_EDAC_E752X) += e752x_edac.o
24obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o 25obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o
diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c
index c94a0eb492cb..facfdb1fa71c 100644
--- a/drivers/edac/e752x_edac.c
+++ b/drivers/edac/e752x_edac.c
@@ -28,6 +28,7 @@
28#define E752X_REVISION " Ver: 2.0.2 " __DATE__ 28#define E752X_REVISION " Ver: 2.0.2 " __DATE__
29#define EDAC_MOD_STR "e752x_edac" 29#define EDAC_MOD_STR "e752x_edac"
30 30
31static int report_non_memory_errors;
31static int force_function_unhide; 32static int force_function_unhide;
32static int sysbus_parity = -1; 33static int sysbus_parity = -1;
33 34
@@ -117,7 +118,7 @@ static struct edac_pci_ctl_info *e752x_pci;
117#define E752X_BUF_FERR 0x70 /* Memory buffer first error reg (8b) */ 118#define E752X_BUF_FERR 0x70 /* Memory buffer first error reg (8b) */
118#define E752X_BUF_NERR 0x72 /* Memory buffer next error reg (8b) */ 119#define E752X_BUF_NERR 0x72 /* Memory buffer next error reg (8b) */
119#define E752X_BUF_ERRMASK 0x74 /* Memory buffer error mask reg (8b) */ 120#define E752X_BUF_ERRMASK 0x74 /* Memory buffer error mask reg (8b) */
120#define E752X_BUF_SMICMD 0x7A /* Memory buffer SMI command reg (8b) */ 121#define E752X_BUF_SMICMD 0x7A /* Memory buffer SMI cmd reg (8b) */
121#define E752X_DRAM_FERR 0x80 /* DRAM first error register (16b) */ 122#define E752X_DRAM_FERR 0x80 /* DRAM first error register (16b) */
122#define E752X_DRAM_NERR 0x82 /* DRAM next error register (16b) */ 123#define E752X_DRAM_NERR 0x82 /* DRAM next error register (16b) */
123#define E752X_DRAM_ERRMASK 0x84 /* DRAM error mask register (8b) */ 124#define E752X_DRAM_ERRMASK 0x84 /* DRAM error mask register (8b) */
@@ -127,7 +128,7 @@ static struct edac_pci_ctl_info *e752x_pci;
127 /* error address register (32b) */ 128 /* error address register (32b) */
128 /* 129 /*
129 * 31 Reserved 130 * 31 Reserved
130 * 30:2 CE address (64 byte block 34:6) 131 * 30:2 CE address (64 byte block 34:6
131 * 1 Reserved 132 * 1 Reserved
132 * 0 HiLoCS 133 * 0 HiLoCS
133 */ 134 */
@@ -147,11 +148,11 @@ static struct edac_pci_ctl_info *e752x_pci;
147 * 1 Reserved 148 * 1 Reserved
148 * 0 HiLoCS 149 * 0 HiLoCS
149 */ 150 */
150#define E752X_DRAM_SCRB_ADD 0xA8 /* DRAM first uncorrectable scrub memory */ 151#define E752X_DRAM_SCRB_ADD 0xA8 /* DRAM 1st uncorrectable scrub mem */
151 /* error address register (32b) */ 152 /* error address register (32b) */
152 /* 153 /*
153 * 31 Reserved 154 * 31 Reserved
154 * 30:2 CE address (64 byte block 34:6) 155 * 30:2 CE address (64 byte block 34:6
155 * 1 Reserved 156 * 1 Reserved
156 * 0 HiLoCS 157 * 0 HiLoCS
157 */ 158 */
@@ -394,9 +395,12 @@ static void do_process_ded_retry(struct mem_ctl_info *mci, u16 error,
394 struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info; 395 struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info;
395 396
396 error_1b = retry_add; 397 error_1b = retry_add;
397 page = error_1b >> (PAGE_SHIFT - 4); /* convert the addr to 4k page */ 398 page = error_1b >> (PAGE_SHIFT - 4); /* convert the addr to 4k page */
398 row = pvt->mc_symmetric ? ((page >> 1) & 3) : /* chip select are bits 14 & 13 */ 399
400 /* chip select are bits 14 & 13 */
401 row = pvt->mc_symmetric ? ((page >> 1) & 3) :
399 edac_mc_find_csrow_by_page(mci, page); 402 edac_mc_find_csrow_by_page(mci, page);
403
400 e752x_mc_printk(mci, KERN_WARNING, 404 e752x_mc_printk(mci, KERN_WARNING,
401 "CE page 0x%lx, row %d : Memory read retry\n", 405 "CE page 0x%lx, row %d : Memory read retry\n",
402 (long unsigned int)page, row); 406 (long unsigned int)page, row);
@@ -422,12 +426,21 @@ static inline void process_threshold_ce(struct mem_ctl_info *mci, u16 error,
422} 426}
423 427
424static char *global_message[11] = { 428static char *global_message[11] = {
425 "PCI Express C1", "PCI Express C", "PCI Express B1", 429 "PCI Express C1",
426 "PCI Express B", "PCI Express A1", "PCI Express A", 430 "PCI Express C",
427 "DMA Controler", "HUB or NS Interface", "System Bus", 431 "PCI Express B1",
428 "DRAM Controler", "Internal Buffer" 432 "PCI Express B",
433 "PCI Express A1",
434 "PCI Express A",
435 "DMA Controller",
436 "HUB or NS Interface",
437 "System Bus",
438 "DRAM Controller", /* 9th entry */
439 "Internal Buffer"
429}; 440};
430 441
442#define DRAM_ENTRY 9
443
431static char *fatal_message[2] = { "Non-Fatal ", "Fatal " }; 444static char *fatal_message[2] = { "Non-Fatal ", "Fatal " };
432 445
433static void do_global_error(int fatal, u32 errors) 446static void do_global_error(int fatal, u32 errors)
@@ -435,9 +448,16 @@ static void do_global_error(int fatal, u32 errors)
435 int i; 448 int i;
436 449
437 for (i = 0; i < 11; i++) { 450 for (i = 0; i < 11; i++) {
438 if (errors & (1 << i)) 451 if (errors & (1 << i)) {
439 e752x_printk(KERN_WARNING, "%sError %s\n", 452 /* If the error is from DRAM Controller OR
440 fatal_message[fatal], global_message[i]); 453 * we are to report ALL errors, then
454 * report the error
455 */
456 if ((i == DRAM_ENTRY) || report_non_memory_errors)
457 e752x_printk(KERN_WARNING, "%sError %s\n",
458 fatal_message[fatal],
459 global_message[i]);
460 }
441 } 461 }
442} 462}
443 463
@@ -1021,7 +1041,7 @@ static int e752x_get_devs(struct pci_dev *pdev, int dev_idx,
1021 struct pci_dev *dev; 1041 struct pci_dev *dev;
1022 1042
1023 pvt->bridge_ck = pci_get_device(PCI_VENDOR_ID_INTEL, 1043 pvt->bridge_ck = pci_get_device(PCI_VENDOR_ID_INTEL,
1024 pvt->dev_info->err_dev, pvt->bridge_ck); 1044 pvt->dev_info->err_dev, pvt->bridge_ck);
1025 1045
1026 if (pvt->bridge_ck == NULL) 1046 if (pvt->bridge_ck == NULL)
1027 pvt->bridge_ck = pci_scan_single_device(pdev->bus, 1047 pvt->bridge_ck = pci_scan_single_device(pdev->bus,
@@ -1034,8 +1054,9 @@ static int e752x_get_devs(struct pci_dev *pdev, int dev_idx,
1034 return 1; 1054 return 1;
1035 } 1055 }
1036 1056
1037 dev = pci_get_device(PCI_VENDOR_ID_INTEL, e752x_devs[dev_idx].ctl_dev, 1057 dev = pci_get_device(PCI_VENDOR_ID_INTEL,
1038 NULL); 1058 e752x_devs[dev_idx].ctl_dev,
1059 NULL);
1039 1060
1040 if (dev == NULL) 1061 if (dev == NULL)
1041 goto fail; 1062 goto fail;
@@ -1316,7 +1337,8 @@ MODULE_DESCRIPTION("MC support for Intel e752x/3100 memory controllers");
1316 1337
1317module_param(force_function_unhide, int, 0444); 1338module_param(force_function_unhide, int, 0444);
1318MODULE_PARM_DESC(force_function_unhide, "if BIOS sets Dev0:Fun1 up as hidden:" 1339MODULE_PARM_DESC(force_function_unhide, "if BIOS sets Dev0:Fun1 up as hidden:"
1319 " 1=force unhide and hope BIOS doesn't fight driver for Dev0:Fun1 access"); 1340 " 1=force unhide and hope BIOS doesn't fight driver for "
1341 "Dev0:Fun1 access");
1320 1342
1321module_param(edac_op_state, int, 0444); 1343module_param(edac_op_state, int, 0444);
1322MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); 1344MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
@@ -1324,3 +1346,6 @@ MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
1324module_param(sysbus_parity, int, 0444); 1346module_param(sysbus_parity, int, 0444);
1325MODULE_PARM_DESC(sysbus_parity, "0=disable system bus parity checking," 1347MODULE_PARM_DESC(sysbus_parity, "0=disable system bus parity checking,"
1326 " 1=enable system bus parity checking, default=auto-detect"); 1348 " 1=enable system bus parity checking, default=auto-detect");
1349module_param(report_non_memory_errors, int, 0644);
1350MODULE_PARM_DESC(report_non_memory_errors, "0=disable non-memory error "
1351 "reporting, 1=enable non-memory error reporting");
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 021d18795145..ad218fe4942d 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -44,6 +44,25 @@ int edac_mc_get_poll_msec(void)
44 return edac_mc_poll_msec; 44 return edac_mc_poll_msec;
45} 45}
46 46
47static int edac_set_poll_msec(const char *val, struct kernel_param *kp)
48{
49 long l;
50 int ret;
51
52 if (!val)
53 return -EINVAL;
54
55 ret = strict_strtol(val, 0, &l);
56 if (ret == -EINVAL || ((int)l != l))
57 return -EINVAL;
58 *((int *)kp->arg) = l;
59
60 /* notify edac_mc engine to reset the poll period */
61 edac_mc_reset_delay_period(l);
62
63 return 0;
64}
65
47/* Parameter declarations for above */ 66/* Parameter declarations for above */
48module_param(edac_mc_panic_on_ue, int, 0644); 67module_param(edac_mc_panic_on_ue, int, 0644);
49MODULE_PARM_DESC(edac_mc_panic_on_ue, "Panic on uncorrected error: 0=off 1=on"); 68MODULE_PARM_DESC(edac_mc_panic_on_ue, "Panic on uncorrected error: 0=off 1=on");
@@ -53,7 +72,8 @@ MODULE_PARM_DESC(edac_mc_log_ue,
53module_param(edac_mc_log_ce, int, 0644); 72module_param(edac_mc_log_ce, int, 0644);
54MODULE_PARM_DESC(edac_mc_log_ce, 73MODULE_PARM_DESC(edac_mc_log_ce,
55 "Log correctable error to console: 0=off 1=on"); 74 "Log correctable error to console: 0=off 1=on");
56module_param(edac_mc_poll_msec, int, 0644); 75module_param_call(edac_mc_poll_msec, edac_set_poll_msec, param_get_int,
76 &edac_mc_poll_msec, 0644);
57MODULE_PARM_DESC(edac_mc_poll_msec, "Polling period in milliseconds"); 77MODULE_PARM_DESC(edac_mc_poll_msec, "Polling period in milliseconds");
58 78
59/* 79/*
@@ -103,16 +123,6 @@ static const char *edac_caps[] = {
103 123
104 124
105 125
106/*
107 * /sys/devices/system/edac/mc;
108 * data structures and methods
109 */
110static ssize_t memctrl_int_show(void *ptr, char *buffer)
111{
112 int *value = (int *)ptr;
113 return sprintf(buffer, "%u\n", *value);
114}
115
116static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count) 126static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count)
117{ 127{
118 int *value = (int *)ptr; 128 int *value = (int *)ptr;
@@ -123,23 +133,6 @@ static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count)
123 return count; 133 return count;
124} 134}
125 135
126/*
127 * mc poll_msec time value
128 */
129static ssize_t poll_msec_int_store(void *ptr, const char *buffer, size_t count)
130{
131 int *value = (int *)ptr;
132
133 if (isdigit(*buffer)) {
134 *value = simple_strtoul(buffer, NULL, 0);
135
136 /* notify edac_mc engine to reset the poll period */
137 edac_mc_reset_delay_period(*value);
138 }
139
140 return count;
141}
142
143 136
144/* EDAC sysfs CSROW data structures and methods 137/* EDAC sysfs CSROW data structures and methods
145 */ 138 */
@@ -185,7 +178,11 @@ static ssize_t csrow_edac_mode_show(struct csrow_info *csrow, char *data,
185static ssize_t channel_dimm_label_show(struct csrow_info *csrow, 178static ssize_t channel_dimm_label_show(struct csrow_info *csrow,
186 char *data, int channel) 179 char *data, int channel)
187{ 180{
188 return snprintf(data, EDAC_MC_LABEL_LEN, "%s", 181 /* if field has not been initialized, there is nothing to send */
182 if (!csrow->channels[channel].label[0])
183 return 0;
184
185 return snprintf(data, EDAC_MC_LABEL_LEN, "%s\n",
189 csrow->channels[channel].label); 186 csrow->channels[channel].label);
190} 187}
191 188
@@ -649,98 +646,10 @@ static struct kobj_type ktype_mci = {
649 .default_attrs = (struct attribute **)mci_attr, 646 .default_attrs = (struct attribute **)mci_attr,
650}; 647};
651 648
652/* show/store, tables, etc for the MC kset */
653
654
655struct memctrl_dev_attribute {
656 struct attribute attr;
657 void *value;
658 ssize_t(*show) (void *, char *);
659 ssize_t(*store) (void *, const char *, size_t);
660};
661
662/* Set of show/store abstract level functions for memory control object */
663static ssize_t memctrl_dev_show(struct kobject *kobj,
664 struct attribute *attr, char *buffer)
665{
666 struct memctrl_dev_attribute *memctrl_dev;
667 memctrl_dev = (struct memctrl_dev_attribute *)attr;
668
669 if (memctrl_dev->show)
670 return memctrl_dev->show(memctrl_dev->value, buffer);
671
672 return -EIO;
673}
674
675static ssize_t memctrl_dev_store(struct kobject *kobj, struct attribute *attr,
676 const char *buffer, size_t count)
677{
678 struct memctrl_dev_attribute *memctrl_dev;
679 memctrl_dev = (struct memctrl_dev_attribute *)attr;
680
681 if (memctrl_dev->store)
682 return memctrl_dev->store(memctrl_dev->value, buffer, count);
683
684 return -EIO;
685}
686
687static struct sysfs_ops memctrlfs_ops = {
688 .show = memctrl_dev_show,
689 .store = memctrl_dev_store
690};
691
692#define MEMCTRL_ATTR(_name, _mode, _show, _store) \
693static struct memctrl_dev_attribute attr_##_name = { \
694 .attr = {.name = __stringify(_name), .mode = _mode }, \
695 .value = &_name, \
696 .show = _show, \
697 .store = _store, \
698};
699
700#define MEMCTRL_STRING_ATTR(_name, _data, _mode, _show, _store) \
701static struct memctrl_dev_attribute attr_##_name = { \
702 .attr = {.name = __stringify(_name), .mode = _mode }, \
703 .value = _data, \
704 .show = _show, \
705 .store = _store, \
706};
707
708/* csrow<id> control files */
709MEMCTRL_ATTR(edac_mc_panic_on_ue,
710 S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store);
711
712MEMCTRL_ATTR(edac_mc_log_ue,
713 S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store);
714
715MEMCTRL_ATTR(edac_mc_log_ce,
716 S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store);
717
718MEMCTRL_ATTR(edac_mc_poll_msec,
719 S_IRUGO | S_IWUSR, memctrl_int_show, poll_msec_int_store);
720
721/* Base Attributes of the memory ECC object */
722static struct memctrl_dev_attribute *memctrl_attr[] = {
723 &attr_edac_mc_panic_on_ue,
724 &attr_edac_mc_log_ue,
725 &attr_edac_mc_log_ce,
726 &attr_edac_mc_poll_msec,
727 NULL,
728};
729
730
731/* the ktype for the mc_kset internal kobj */
732static struct kobj_type ktype_mc_set_attribs = {
733 .sysfs_ops = &memctrlfs_ops,
734 .default_attrs = (struct attribute **)memctrl_attr,
735};
736
737/* EDAC memory controller sysfs kset: 649/* EDAC memory controller sysfs kset:
738 * /sys/devices/system/edac/mc 650 * /sys/devices/system/edac/mc
739 */ 651 */
740static struct kset mc_kset = { 652static struct kset *mc_kset;
741 .kobj = {.ktype = &ktype_mc_set_attribs },
742};
743
744 653
745/* 654/*
746 * edac_mc_register_sysfs_main_kobj 655 * edac_mc_register_sysfs_main_kobj
@@ -771,7 +680,7 @@ int edac_mc_register_sysfs_main_kobj(struct mem_ctl_info *mci)
771 } 680 }
772 681
773 /* this instance become part of the mc_kset */ 682 /* this instance become part of the mc_kset */
774 kobj_mci->kset = &mc_kset; 683 kobj_mci->kset = mc_kset;
775 684
776 /* register the mc<id> kobject to the mc_kset */ 685 /* register the mc<id> kobject to the mc_kset */
777 err = kobject_init_and_add(kobj_mci, &ktype_mci, NULL, 686 err = kobject_init_and_add(kobj_mci, &ktype_mci, NULL,
@@ -1001,12 +910,9 @@ int edac_sysfs_setup_mc_kset(void)
1001 } 910 }
1002 911
1003 /* Init the MC's kobject */ 912 /* Init the MC's kobject */
1004 kobject_set_name(&mc_kset.kobj, "mc"); 913 mc_kset = kset_create_and_add("mc", NULL, &edac_class->kset.kobj);
1005 mc_kset.kobj.parent = &edac_class->kset.kobj; 914 if (!mc_kset) {
1006 915 err = -ENOMEM;
1007 /* register the mc_kset */
1008 err = kset_register(&mc_kset);
1009 if (err) {
1010 debugf1("%s() Failed to register '.../edac/mc'\n", __func__); 916 debugf1("%s() Failed to register '.../edac/mc'\n", __func__);
1011 goto fail_out; 917 goto fail_out;
1012 } 918 }
@@ -1028,6 +934,6 @@ fail_out:
1028 */ 934 */
1029void edac_sysfs_teardown_mc_kset(void) 935void edac_sysfs_teardown_mc_kset(void)
1030{ 936{
1031 kset_unregister(&mc_kset); 937 kset_unregister(mc_kset);
1032} 938}
1033 939
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index 2c1fa1bb6df2..5c153dccc95e 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -28,7 +28,7 @@ static int edac_pci_poll_msec = 1000; /* one second workq period */
28static atomic_t pci_parity_count = ATOMIC_INIT(0); 28static atomic_t pci_parity_count = ATOMIC_INIT(0);
29static atomic_t pci_nonparity_count = ATOMIC_INIT(0); 29static atomic_t pci_nonparity_count = ATOMIC_INIT(0);
30 30
31static struct kobject edac_pci_top_main_kobj; 31static struct kobject *edac_pci_top_main_kobj;
32static atomic_t edac_pci_sysfs_refcount = ATOMIC_INIT(0); 32static atomic_t edac_pci_sysfs_refcount = ATOMIC_INIT(0);
33 33
34/* getter functions for the data variables */ 34/* getter functions for the data variables */
@@ -83,7 +83,7 @@ static void edac_pci_instance_release(struct kobject *kobj)
83 pci = to_instance(kobj); 83 pci = to_instance(kobj);
84 84
85 /* decrement reference count on top main kobj */ 85 /* decrement reference count on top main kobj */
86 kobject_put(&edac_pci_top_main_kobj); 86 kobject_put(edac_pci_top_main_kobj);
87 87
88 kfree(pci); /* Free the control struct */ 88 kfree(pci); /* Free the control struct */
89} 89}
@@ -166,7 +166,7 @@ static int edac_pci_create_instance_kobj(struct edac_pci_ctl_info *pci, int idx)
166 * track the number of PCI instances we have, and thus nest 166 * track the number of PCI instances we have, and thus nest
167 * properly on keeping the module loaded 167 * properly on keeping the module loaded
168 */ 168 */
169 main_kobj = kobject_get(&edac_pci_top_main_kobj); 169 main_kobj = kobject_get(edac_pci_top_main_kobj);
170 if (!main_kobj) { 170 if (!main_kobj) {
171 err = -ENODEV; 171 err = -ENODEV;
172 goto error_out; 172 goto error_out;
@@ -174,11 +174,11 @@ static int edac_pci_create_instance_kobj(struct edac_pci_ctl_info *pci, int idx)
174 174
175 /* And now register this new kobject under the main kobj */ 175 /* And now register this new kobject under the main kobj */
176 err = kobject_init_and_add(&pci->kobj, &ktype_pci_instance, 176 err = kobject_init_and_add(&pci->kobj, &ktype_pci_instance,
177 &edac_pci_top_main_kobj, "pci%d", idx); 177 edac_pci_top_main_kobj, "pci%d", idx);
178 if (err != 0) { 178 if (err != 0) {
179 debugf2("%s() failed to register instance pci%d\n", 179 debugf2("%s() failed to register instance pci%d\n",
180 __func__, idx); 180 __func__, idx);
181 kobject_put(&edac_pci_top_main_kobj); 181 kobject_put(edac_pci_top_main_kobj);
182 goto error_out; 182 goto error_out;
183 } 183 }
184 184
@@ -316,9 +316,10 @@ static struct edac_pci_dev_attribute *edac_pci_attr[] = {
316 */ 316 */
317static void edac_pci_release_main_kobj(struct kobject *kobj) 317static void edac_pci_release_main_kobj(struct kobject *kobj)
318{ 318{
319
320 debugf0("%s() here to module_put(THIS_MODULE)\n", __func__); 319 debugf0("%s() here to module_put(THIS_MODULE)\n", __func__);
321 320
321 kfree(kobj);
322
322 /* last reference to top EDAC PCI kobject has been removed, 323 /* last reference to top EDAC PCI kobject has been removed,
323 * NOW release our ref count on the core module 324 * NOW release our ref count on the core module
324 */ 325 */
@@ -369,8 +370,16 @@ static int edac_pci_main_kobj_setup(void)
369 goto decrement_count_fail; 370 goto decrement_count_fail;
370 } 371 }
371 372
373 edac_pci_top_main_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
374 if (!edac_pci_top_main_kobj) {
375 debugf1("Failed to allocate\n");
376 err = -ENOMEM;
377 goto kzalloc_fail;
378 }
379
372 /* Instanstiate the pci object */ 380 /* Instanstiate the pci object */
373 err = kobject_init_and_add(&edac_pci_top_main_kobj, &ktype_edac_pci_main_kobj, 381 err = kobject_init_and_add(edac_pci_top_main_kobj,
382 &ktype_edac_pci_main_kobj,
374 &edac_class->kset.kobj, "pci"); 383 &edac_class->kset.kobj, "pci");
375 if (err) { 384 if (err) {
376 debugf1("Failed to register '.../edac/pci'\n"); 385 debugf1("Failed to register '.../edac/pci'\n");
@@ -381,13 +390,16 @@ static int edac_pci_main_kobj_setup(void)
381 * for EDAC PCI, then edac_pci_main_kobj_teardown() 390 * for EDAC PCI, then edac_pci_main_kobj_teardown()
382 * must be used, for resources to be cleaned up properly 391 * must be used, for resources to be cleaned up properly
383 */ 392 */
384 kobject_uevent(&edac_pci_top_main_kobj, KOBJ_ADD); 393 kobject_uevent(edac_pci_top_main_kobj, KOBJ_ADD);
385 debugf1("Registered '.../edac/pci' kobject\n"); 394 debugf1("Registered '.../edac/pci' kobject\n");
386 395
387 return 0; 396 return 0;
388 397
389 /* Error unwind statck */ 398 /* Error unwind statck */
390kobject_init_and_add_fail: 399kobject_init_and_add_fail:
400 kfree(edac_pci_top_main_kobj);
401
402kzalloc_fail:
391 module_put(THIS_MODULE); 403 module_put(THIS_MODULE);
392 404
393decrement_count_fail: 405decrement_count_fail:
@@ -414,7 +426,7 @@ static void edac_pci_main_kobj_teardown(void)
414 if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) { 426 if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) {
415 debugf0("%s() called kobject_put on main kobj\n", 427 debugf0("%s() called kobject_put on main kobj\n",
416 __func__); 428 __func__);
417 kobject_put(&edac_pci_top_main_kobj); 429 kobject_put(edac_pci_top_main_kobj);
418 } 430 }
419} 431}
420 432
diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
new file mode 100644
index 000000000000..22db05a67bfb
--- /dev/null
+++ b/drivers/edac/i5100_edac.c
@@ -0,0 +1,981 @@
1/*
2 * Intel 5100 Memory Controllers kernel module
3 *
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * This module is based on the following document:
8 *
9 * Intel 5100X Chipset Memory Controller Hub (MCH) - Datasheet
10 * http://download.intel.com/design/chipsets/datashts/318378.pdf
11 *
12 */
13#include <linux/module.h>
14#include <linux/init.h>
15#include <linux/pci.h>
16#include <linux/pci_ids.h>
17#include <linux/slab.h>
18#include <linux/edac.h>
19#include <linux/delay.h>
20#include <linux/mmzone.h>
21
22#include "edac_core.h"
23
24/* register addresses */
25
26/* device 16, func 1 */
27#define I5100_MC 0x40 /* Memory Control Register */
28#define I5100_MS 0x44 /* Memory Status Register */
29#define I5100_SPDDATA 0x48 /* Serial Presence Detect Status Reg */
30#define I5100_SPDCMD 0x4c /* Serial Presence Detect Command Reg */
31#define I5100_TOLM 0x6c /* Top of Low Memory */
32#define I5100_MIR0 0x80 /* Memory Interleave Range 0 */
33#define I5100_MIR1 0x84 /* Memory Interleave Range 1 */
34#define I5100_AMIR_0 0x8c /* Adjusted Memory Interleave Range 0 */
35#define I5100_AMIR_1 0x90 /* Adjusted Memory Interleave Range 1 */
36#define I5100_FERR_NF_MEM 0xa0 /* MC First Non Fatal Errors */
37#define I5100_FERR_NF_MEM_M16ERR_MASK (1 << 16)
38#define I5100_FERR_NF_MEM_M15ERR_MASK (1 << 15)
39#define I5100_FERR_NF_MEM_M14ERR_MASK (1 << 14)
40#define I5100_FERR_NF_MEM_M12ERR_MASK (1 << 12)
41#define I5100_FERR_NF_MEM_M11ERR_MASK (1 << 11)
42#define I5100_FERR_NF_MEM_M10ERR_MASK (1 << 10)
43#define I5100_FERR_NF_MEM_M6ERR_MASK (1 << 6)
44#define I5100_FERR_NF_MEM_M5ERR_MASK (1 << 5)
45#define I5100_FERR_NF_MEM_M4ERR_MASK (1 << 4)
46#define I5100_FERR_NF_MEM_M1ERR_MASK 1
47#define I5100_FERR_NF_MEM_ANY_MASK \
48 (I5100_FERR_NF_MEM_M16ERR_MASK | \
49 I5100_FERR_NF_MEM_M15ERR_MASK | \
50 I5100_FERR_NF_MEM_M14ERR_MASK | \
51 I5100_FERR_NF_MEM_M12ERR_MASK | \
52 I5100_FERR_NF_MEM_M11ERR_MASK | \
53 I5100_FERR_NF_MEM_M10ERR_MASK | \
54 I5100_FERR_NF_MEM_M6ERR_MASK | \
55 I5100_FERR_NF_MEM_M5ERR_MASK | \
56 I5100_FERR_NF_MEM_M4ERR_MASK | \
57 I5100_FERR_NF_MEM_M1ERR_MASK)
58#define I5100_NERR_NF_MEM 0xa4 /* MC Next Non-Fatal Errors */
59#define I5100_EMASK_MEM 0xa8 /* MC Error Mask Register */
60
61/* device 21 and 22, func 0 */
62#define I5100_MTR_0 0x154 /* Memory Technology Registers 0-3 */
63#define I5100_DMIR 0x15c /* DIMM Interleave Range */
64#define I5100_VALIDLOG 0x18c /* Valid Log Markers */
65#define I5100_NRECMEMA 0x190 /* Non-Recoverable Memory Error Log Reg A */
66#define I5100_NRECMEMB 0x194 /* Non-Recoverable Memory Error Log Reg B */
67#define I5100_REDMEMA 0x198 /* Recoverable Memory Data Error Log Reg A */
68#define I5100_REDMEMB 0x19c /* Recoverable Memory Data Error Log Reg B */
69#define I5100_RECMEMA 0x1a0 /* Recoverable Memory Error Log Reg A */
70#define I5100_RECMEMB 0x1a4 /* Recoverable Memory Error Log Reg B */
71#define I5100_MTR_4 0x1b0 /* Memory Technology Registers 4,5 */
72
73/* bit field accessors */
74
75static inline u32 i5100_mc_errdeten(u32 mc)
76{
77 return mc >> 5 & 1;
78}
79
80static inline u16 i5100_spddata_rdo(u16 a)
81{
82 return a >> 15 & 1;
83}
84
85static inline u16 i5100_spddata_sbe(u16 a)
86{
87 return a >> 13 & 1;
88}
89
90static inline u16 i5100_spddata_busy(u16 a)
91{
92 return a >> 12 & 1;
93}
94
95static inline u16 i5100_spddata_data(u16 a)
96{
97 return a & ((1 << 8) - 1);
98}
99
100static inline u32 i5100_spdcmd_create(u32 dti, u32 ckovrd, u32 sa, u32 ba,
101 u32 data, u32 cmd)
102{
103 return ((dti & ((1 << 4) - 1)) << 28) |
104 ((ckovrd & 1) << 27) |
105 ((sa & ((1 << 3) - 1)) << 24) |
106 ((ba & ((1 << 8) - 1)) << 16) |
107 ((data & ((1 << 8) - 1)) << 8) |
108 (cmd & 1);
109}
110
111static inline u16 i5100_tolm_tolm(u16 a)
112{
113 return a >> 12 & ((1 << 4) - 1);
114}
115
116static inline u16 i5100_mir_limit(u16 a)
117{
118 return a >> 4 & ((1 << 12) - 1);
119}
120
121static inline u16 i5100_mir_way1(u16 a)
122{
123 return a >> 1 & 1;
124}
125
126static inline u16 i5100_mir_way0(u16 a)
127{
128 return a & 1;
129}
130
131static inline u32 i5100_ferr_nf_mem_chan_indx(u32 a)
132{
133 return a >> 28 & 1;
134}
135
136static inline u32 i5100_ferr_nf_mem_any(u32 a)
137{
138 return a & I5100_FERR_NF_MEM_ANY_MASK;
139}
140
141static inline u32 i5100_nerr_nf_mem_any(u32 a)
142{
143 return i5100_ferr_nf_mem_any(a);
144}
145
146static inline u32 i5100_dmir_limit(u32 a)
147{
148 return a >> 16 & ((1 << 11) - 1);
149}
150
151static inline u32 i5100_dmir_rank(u32 a, u32 i)
152{
153 return a >> (4 * i) & ((1 << 2) - 1);
154}
155
156static inline u16 i5100_mtr_present(u16 a)
157{
158 return a >> 10 & 1;
159}
160
161static inline u16 i5100_mtr_ethrottle(u16 a)
162{
163 return a >> 9 & 1;
164}
165
166static inline u16 i5100_mtr_width(u16 a)
167{
168 return a >> 8 & 1;
169}
170
171static inline u16 i5100_mtr_numbank(u16 a)
172{
173 return a >> 6 & 1;
174}
175
176static inline u16 i5100_mtr_numrow(u16 a)
177{
178 return a >> 2 & ((1 << 2) - 1);
179}
180
181static inline u16 i5100_mtr_numcol(u16 a)
182{
183 return a & ((1 << 2) - 1);
184}
185
186
187static inline u32 i5100_validlog_redmemvalid(u32 a)
188{
189 return a >> 2 & 1;
190}
191
192static inline u32 i5100_validlog_recmemvalid(u32 a)
193{
194 return a >> 1 & 1;
195}
196
197static inline u32 i5100_validlog_nrecmemvalid(u32 a)
198{
199 return a & 1;
200}
201
202static inline u32 i5100_nrecmema_merr(u32 a)
203{
204 return a >> 15 & ((1 << 5) - 1);
205}
206
207static inline u32 i5100_nrecmema_bank(u32 a)
208{
209 return a >> 12 & ((1 << 3) - 1);
210}
211
212static inline u32 i5100_nrecmema_rank(u32 a)
213{
214 return a >> 8 & ((1 << 3) - 1);
215}
216
217static inline u32 i5100_nrecmema_dm_buf_id(u32 a)
218{
219 return a & ((1 << 8) - 1);
220}
221
222static inline u32 i5100_nrecmemb_cas(u32 a)
223{
224 return a >> 16 & ((1 << 13) - 1);
225}
226
227static inline u32 i5100_nrecmemb_ras(u32 a)
228{
229 return a & ((1 << 16) - 1);
230}
231
232static inline u32 i5100_redmemb_ecc_locator(u32 a)
233{
234 return a & ((1 << 18) - 1);
235}
236
237static inline u32 i5100_recmema_merr(u32 a)
238{
239 return i5100_nrecmema_merr(a);
240}
241
242static inline u32 i5100_recmema_bank(u32 a)
243{
244 return i5100_nrecmema_bank(a);
245}
246
247static inline u32 i5100_recmema_rank(u32 a)
248{
249 return i5100_nrecmema_rank(a);
250}
251
252static inline u32 i5100_recmema_dm_buf_id(u32 a)
253{
254 return i5100_nrecmema_dm_buf_id(a);
255}
256
257static inline u32 i5100_recmemb_cas(u32 a)
258{
259 return i5100_nrecmemb_cas(a);
260}
261
262static inline u32 i5100_recmemb_ras(u32 a)
263{
264 return i5100_nrecmemb_ras(a);
265}
266
267/* some generic limits */
268#define I5100_MAX_RANKS_PER_CTLR 6
269#define I5100_MAX_CTLRS 2
270#define I5100_MAX_RANKS_PER_DIMM 4
271#define I5100_DIMM_ADDR_LINES (6 - 3) /* 64 bits / 8 bits per byte */
272#define I5100_MAX_DIMM_SLOTS_PER_CTLR 4
273#define I5100_MAX_RANK_INTERLEAVE 4
274#define I5100_MAX_DMIRS 5
275
276struct i5100_priv {
277 /* ranks on each dimm -- 0 maps to not present -- obtained via SPD */
278 int dimm_numrank[I5100_MAX_CTLRS][I5100_MAX_DIMM_SLOTS_PER_CTLR];
279
280 /*
281 * mainboard chip select map -- maps i5100 chip selects to
282 * DIMM slot chip selects. In the case of only 4 ranks per
283 * controller, the mapping is fairly obvious but not unique.
284 * we map -1 -> NC and assume both controllers use the same
285 * map...
286 *
287 */
288 int dimm_csmap[I5100_MAX_DIMM_SLOTS_PER_CTLR][I5100_MAX_RANKS_PER_DIMM];
289
290 /* memory interleave range */
291 struct {
292 u64 limit;
293 unsigned way[2];
294 } mir[I5100_MAX_CTLRS];
295
296 /* adjusted memory interleave range register */
297 unsigned amir[I5100_MAX_CTLRS];
298
299 /* dimm interleave range */
300 struct {
301 unsigned rank[I5100_MAX_RANK_INTERLEAVE];
302 u64 limit;
303 } dmir[I5100_MAX_CTLRS][I5100_MAX_DMIRS];
304
305 /* memory technology registers... */
306 struct {
307 unsigned present; /* 0 or 1 */
308 unsigned ethrottle; /* 0 or 1 */
309 unsigned width; /* 4 or 8 bits */
310 unsigned numbank; /* 2 or 3 lines */
311 unsigned numrow; /* 13 .. 16 lines */
312 unsigned numcol; /* 11 .. 12 lines */
313 } mtr[I5100_MAX_CTLRS][I5100_MAX_RANKS_PER_CTLR];
314
315 u64 tolm; /* top of low memory in bytes */
316 unsigned ranksperctlr; /* number of ranks per controller */
317
318 struct pci_dev *mc; /* device 16 func 1 */
319 struct pci_dev *ch0mm; /* device 21 func 0 */
320 struct pci_dev *ch1mm; /* device 22 func 0 */
321};
322
323/* map a rank/ctlr to a slot number on the mainboard */
324static int i5100_rank_to_slot(const struct mem_ctl_info *mci,
325 int ctlr, int rank)
326{
327 const struct i5100_priv *priv = mci->pvt_info;
328 int i;
329
330 for (i = 0; i < I5100_MAX_DIMM_SLOTS_PER_CTLR; i++) {
331 int j;
332 const int numrank = priv->dimm_numrank[ctlr][i];
333
334 for (j = 0; j < numrank; j++)
335 if (priv->dimm_csmap[i][j] == rank)
336 return i * 2 + ctlr;
337 }
338
339 return -1;
340}
341
342static const char *i5100_err_msg(unsigned err)
343{
344 static const char *merrs[] = {
345 "unknown", /* 0 */
346 "uncorrectable data ECC on replay", /* 1 */
347 "unknown", /* 2 */
348 "unknown", /* 3 */
349 "aliased uncorrectable demand data ECC", /* 4 */
350 "aliased uncorrectable spare-copy data ECC", /* 5 */
351 "aliased uncorrectable patrol data ECC", /* 6 */
352 "unknown", /* 7 */
353 "unknown", /* 8 */
354 "unknown", /* 9 */
355 "non-aliased uncorrectable demand data ECC", /* 10 */
356 "non-aliased uncorrectable spare-copy data ECC", /* 11 */
357 "non-aliased uncorrectable patrol data ECC", /* 12 */
358 "unknown", /* 13 */
359 "correctable demand data ECC", /* 14 */
360 "correctable spare-copy data ECC", /* 15 */
361 "correctable patrol data ECC", /* 16 */
362 "unknown", /* 17 */
363 "SPD protocol error", /* 18 */
364 "unknown", /* 19 */
365 "spare copy initiated", /* 20 */
366 "spare copy completed", /* 21 */
367 };
368 unsigned i;
369
370 for (i = 0; i < ARRAY_SIZE(merrs); i++)
371 if (1 << i & err)
372 return merrs[i];
373
374 return "none";
375}
376
377/* convert csrow index into a rank (per controller -- 0..5) */
378static int i5100_csrow_to_rank(const struct mem_ctl_info *mci, int csrow)
379{
380 const struct i5100_priv *priv = mci->pvt_info;
381
382 return csrow % priv->ranksperctlr;
383}
384
385/* convert csrow index into a controller (0..1) */
386static int i5100_csrow_to_cntlr(const struct mem_ctl_info *mci, int csrow)
387{
388 const struct i5100_priv *priv = mci->pvt_info;
389
390 return csrow / priv->ranksperctlr;
391}
392
393static unsigned i5100_rank_to_csrow(const struct mem_ctl_info *mci,
394 int ctlr, int rank)
395{
396 const struct i5100_priv *priv = mci->pvt_info;
397
398 return ctlr * priv->ranksperctlr + rank;
399}
400
401static void i5100_handle_ce(struct mem_ctl_info *mci,
402 int ctlr,
403 unsigned bank,
404 unsigned rank,
405 unsigned long syndrome,
406 unsigned cas,
407 unsigned ras,
408 const char *msg)
409{
410 const int csrow = i5100_rank_to_csrow(mci, ctlr, rank);
411
412 printk(KERN_ERR
413 "CE ctlr %d, bank %u, rank %u, syndrome 0x%lx, "
414 "cas %u, ras %u, csrow %u, label \"%s\": %s\n",
415 ctlr, bank, rank, syndrome, cas, ras,
416 csrow, mci->csrows[csrow].channels[0].label, msg);
417
418 mci->ce_count++;
419 mci->csrows[csrow].ce_count++;
420 mci->csrows[csrow].channels[0].ce_count++;
421}
422
423static void i5100_handle_ue(struct mem_ctl_info *mci,
424 int ctlr,
425 unsigned bank,
426 unsigned rank,
427 unsigned long syndrome,
428 unsigned cas,
429 unsigned ras,
430 const char *msg)
431{
432 const int csrow = i5100_rank_to_csrow(mci, ctlr, rank);
433
434 printk(KERN_ERR
435 "UE ctlr %d, bank %u, rank %u, syndrome 0x%lx, "
436 "cas %u, ras %u, csrow %u, label \"%s\": %s\n",
437 ctlr, bank, rank, syndrome, cas, ras,
438 csrow, mci->csrows[csrow].channels[0].label, msg);
439
440 mci->ue_count++;
441 mci->csrows[csrow].ue_count++;
442}
443
444static void i5100_read_log(struct mem_ctl_info *mci, int ctlr,
445 u32 ferr, u32 nerr)
446{
447 struct i5100_priv *priv = mci->pvt_info;
448 struct pci_dev *pdev = (ctlr) ? priv->ch1mm : priv->ch0mm;
449 u32 dw;
450 u32 dw2;
451 unsigned syndrome = 0;
452 unsigned ecc_loc = 0;
453 unsigned merr;
454 unsigned bank;
455 unsigned rank;
456 unsigned cas;
457 unsigned ras;
458
459 pci_read_config_dword(pdev, I5100_VALIDLOG, &dw);
460
461 if (i5100_validlog_redmemvalid(dw)) {
462 pci_read_config_dword(pdev, I5100_REDMEMA, &dw2);
463 syndrome = dw2;
464 pci_read_config_dword(pdev, I5100_REDMEMB, &dw2);
465 ecc_loc = i5100_redmemb_ecc_locator(dw2);
466 }
467
468 if (i5100_validlog_recmemvalid(dw)) {
469 const char *msg;
470
471 pci_read_config_dword(pdev, I5100_RECMEMA, &dw2);
472 merr = i5100_recmema_merr(dw2);
473 bank = i5100_recmema_bank(dw2);
474 rank = i5100_recmema_rank(dw2);
475
476 pci_read_config_dword(pdev, I5100_RECMEMB, &dw2);
477 cas = i5100_recmemb_cas(dw2);
478 ras = i5100_recmemb_ras(dw2);
479
480 /* FIXME: not really sure if this is what merr is...
481 */
482 if (!merr)
483 msg = i5100_err_msg(ferr);
484 else
485 msg = i5100_err_msg(nerr);
486
487 i5100_handle_ce(mci, ctlr, bank, rank, syndrome, cas, ras, msg);
488 }
489
490 if (i5100_validlog_nrecmemvalid(dw)) {
491 const char *msg;
492
493 pci_read_config_dword(pdev, I5100_NRECMEMA, &dw2);
494 merr = i5100_nrecmema_merr(dw2);
495 bank = i5100_nrecmema_bank(dw2);
496 rank = i5100_nrecmema_rank(dw2);
497
498 pci_read_config_dword(pdev, I5100_NRECMEMB, &dw2);
499 cas = i5100_nrecmemb_cas(dw2);
500 ras = i5100_nrecmemb_ras(dw2);
501
502 /* FIXME: not really sure if this is what merr is...
503 */
504 if (!merr)
505 msg = i5100_err_msg(ferr);
506 else
507 msg = i5100_err_msg(nerr);
508
509 i5100_handle_ue(mci, ctlr, bank, rank, syndrome, cas, ras, msg);
510 }
511
512 pci_write_config_dword(pdev, I5100_VALIDLOG, dw);
513}
514
515static void i5100_check_error(struct mem_ctl_info *mci)
516{
517 struct i5100_priv *priv = mci->pvt_info;
518 u32 dw;
519
520
521 pci_read_config_dword(priv->mc, I5100_FERR_NF_MEM, &dw);
522 if (i5100_ferr_nf_mem_any(dw)) {
523 u32 dw2;
524
525 pci_read_config_dword(priv->mc, I5100_NERR_NF_MEM, &dw2);
526 if (dw2)
527 pci_write_config_dword(priv->mc, I5100_NERR_NF_MEM,
528 dw2);
529 pci_write_config_dword(priv->mc, I5100_FERR_NF_MEM, dw);
530
531 i5100_read_log(mci, i5100_ferr_nf_mem_chan_indx(dw),
532 i5100_ferr_nf_mem_any(dw),
533 i5100_nerr_nf_mem_any(dw2));
534 }
535}
536
537static struct pci_dev *pci_get_device_func(unsigned vendor,
538 unsigned device,
539 unsigned func)
540{
541 struct pci_dev *ret = NULL;
542
543 while (1) {
544 ret = pci_get_device(vendor, device, ret);
545
546 if (!ret)
547 break;
548
549 if (PCI_FUNC(ret->devfn) == func)
550 break;
551 }
552
553 return ret;
554}
555
556static unsigned long __devinit i5100_npages(struct mem_ctl_info *mci,
557 int csrow)
558{
559 struct i5100_priv *priv = mci->pvt_info;
560 const unsigned ctlr_rank = i5100_csrow_to_rank(mci, csrow);
561 const unsigned ctlr = i5100_csrow_to_cntlr(mci, csrow);
562 unsigned addr_lines;
563
564 /* dimm present? */
565 if (!priv->mtr[ctlr][ctlr_rank].present)
566 return 0ULL;
567
568 addr_lines =
569 I5100_DIMM_ADDR_LINES +
570 priv->mtr[ctlr][ctlr_rank].numcol +
571 priv->mtr[ctlr][ctlr_rank].numrow +
572 priv->mtr[ctlr][ctlr_rank].numbank;
573
574 return (unsigned long)
575 ((unsigned long long) (1ULL << addr_lines) / PAGE_SIZE);
576}
577
578static void __devinit i5100_init_mtr(struct mem_ctl_info *mci)
579{
580 struct i5100_priv *priv = mci->pvt_info;
581 struct pci_dev *mms[2] = { priv->ch0mm, priv->ch1mm };
582 int i;
583
584 for (i = 0; i < I5100_MAX_CTLRS; i++) {
585 int j;
586 struct pci_dev *pdev = mms[i];
587
588 for (j = 0; j < I5100_MAX_RANKS_PER_CTLR; j++) {
589 const unsigned addr =
590 (j < 4) ? I5100_MTR_0 + j * 2 :
591 I5100_MTR_4 + (j - 4) * 2;
592 u16 w;
593
594 pci_read_config_word(pdev, addr, &w);
595
596 priv->mtr[i][j].present = i5100_mtr_present(w);
597 priv->mtr[i][j].ethrottle = i5100_mtr_ethrottle(w);
598 priv->mtr[i][j].width = 4 + 4 * i5100_mtr_width(w);
599 priv->mtr[i][j].numbank = 2 + i5100_mtr_numbank(w);
600 priv->mtr[i][j].numrow = 13 + i5100_mtr_numrow(w);
601 priv->mtr[i][j].numcol = 10 + i5100_mtr_numcol(w);
602 }
603 }
604}
605
606/*
607 * FIXME: make this into a real i2c adapter (so that dimm-decode
608 * will work)?
609 */
610static int i5100_read_spd_byte(const struct mem_ctl_info *mci,
611 u8 ch, u8 slot, u8 addr, u8 *byte)
612{
613 struct i5100_priv *priv = mci->pvt_info;
614 u16 w;
615 unsigned long et;
616
617 pci_read_config_word(priv->mc, I5100_SPDDATA, &w);
618 if (i5100_spddata_busy(w))
619 return -1;
620
621 pci_write_config_dword(priv->mc, I5100_SPDCMD,
622 i5100_spdcmd_create(0xa, 1, ch * 4 + slot, addr,
623 0, 0));
624
625 /* wait up to 100ms */
626 et = jiffies + HZ / 10;
627 udelay(100);
628 while (1) {
629 pci_read_config_word(priv->mc, I5100_SPDDATA, &w);
630 if (!i5100_spddata_busy(w))
631 break;
632 udelay(100);
633 }
634
635 if (!i5100_spddata_rdo(w) || i5100_spddata_sbe(w))
636 return -1;
637
638 *byte = i5100_spddata_data(w);
639
640 return 0;
641}
642
643/*
644 * fill dimm chip select map
645 *
646 * FIXME:
647 * o only valid for 4 ranks per controller
648 * o not the only way to may chip selects to dimm slots
649 * o investigate if there is some way to obtain this map from the bios
650 */
651static void __devinit i5100_init_dimm_csmap(struct mem_ctl_info *mci)
652{
653 struct i5100_priv *priv = mci->pvt_info;
654 int i;
655
656 WARN_ON(priv->ranksperctlr != 4);
657
658 for (i = 0; i < I5100_MAX_DIMM_SLOTS_PER_CTLR; i++) {
659 int j;
660
661 for (j = 0; j < I5100_MAX_RANKS_PER_DIMM; j++)
662 priv->dimm_csmap[i][j] = -1; /* default NC */
663 }
664
665 /* only 2 chip selects per slot... */
666 priv->dimm_csmap[0][0] = 0;
667 priv->dimm_csmap[0][1] = 3;
668 priv->dimm_csmap[1][0] = 1;
669 priv->dimm_csmap[1][1] = 2;
670 priv->dimm_csmap[2][0] = 2;
671 priv->dimm_csmap[3][0] = 3;
672}
673
674static void __devinit i5100_init_dimm_layout(struct pci_dev *pdev,
675 struct mem_ctl_info *mci)
676{
677 struct i5100_priv *priv = mci->pvt_info;
678 int i;
679
680 for (i = 0; i < I5100_MAX_CTLRS; i++) {
681 int j;
682
683 for (j = 0; j < I5100_MAX_DIMM_SLOTS_PER_CTLR; j++) {
684 u8 rank;
685
686 if (i5100_read_spd_byte(mci, i, j, 5, &rank) < 0)
687 priv->dimm_numrank[i][j] = 0;
688 else
689 priv->dimm_numrank[i][j] = (rank & 3) + 1;
690 }
691 }
692
693 i5100_init_dimm_csmap(mci);
694}
695
696static void __devinit i5100_init_interleaving(struct pci_dev *pdev,
697 struct mem_ctl_info *mci)
698{
699 u16 w;
700 u32 dw;
701 struct i5100_priv *priv = mci->pvt_info;
702 struct pci_dev *mms[2] = { priv->ch0mm, priv->ch1mm };
703 int i;
704
705 pci_read_config_word(pdev, I5100_TOLM, &w);
706 priv->tolm = (u64) i5100_tolm_tolm(w) * 256 * 1024 * 1024;
707
708 pci_read_config_word(pdev, I5100_MIR0, &w);
709 priv->mir[0].limit = (u64) i5100_mir_limit(w) << 28;
710 priv->mir[0].way[1] = i5100_mir_way1(w);
711 priv->mir[0].way[0] = i5100_mir_way0(w);
712
713 pci_read_config_word(pdev, I5100_MIR1, &w);
714 priv->mir[1].limit = (u64) i5100_mir_limit(w) << 28;
715 priv->mir[1].way[1] = i5100_mir_way1(w);
716 priv->mir[1].way[0] = i5100_mir_way0(w);
717
718 pci_read_config_word(pdev, I5100_AMIR_0, &w);
719 priv->amir[0] = w;
720 pci_read_config_word(pdev, I5100_AMIR_1, &w);
721 priv->amir[1] = w;
722
723 for (i = 0; i < I5100_MAX_CTLRS; i++) {
724 int j;
725
726 for (j = 0; j < 5; j++) {
727 int k;
728
729 pci_read_config_dword(mms[i], I5100_DMIR + j * 4, &dw);
730
731 priv->dmir[i][j].limit =
732 (u64) i5100_dmir_limit(dw) << 28;
733 for (k = 0; k < I5100_MAX_RANKS_PER_DIMM; k++)
734 priv->dmir[i][j].rank[k] =
735 i5100_dmir_rank(dw, k);
736 }
737 }
738
739 i5100_init_mtr(mci);
740}
741
742static void __devinit i5100_init_csrows(struct mem_ctl_info *mci)
743{
744 int i;
745 unsigned long total_pages = 0UL;
746 struct i5100_priv *priv = mci->pvt_info;
747
748 for (i = 0; i < mci->nr_csrows; i++) {
749 const unsigned long npages = i5100_npages(mci, i);
750 const unsigned cntlr = i5100_csrow_to_cntlr(mci, i);
751 const unsigned rank = i5100_csrow_to_rank(mci, i);
752
753 if (!npages)
754 continue;
755
756 /*
757 * FIXME: these two are totally bogus -- I don't see how to
758 * map them correctly to this structure...
759 */
760 mci->csrows[i].first_page = total_pages;
761 mci->csrows[i].last_page = total_pages + npages - 1;
762 mci->csrows[i].page_mask = 0UL;
763
764 mci->csrows[i].nr_pages = npages;
765 mci->csrows[i].grain = 32;
766 mci->csrows[i].csrow_idx = i;
767 mci->csrows[i].dtype =
768 (priv->mtr[cntlr][rank].width == 4) ? DEV_X4 : DEV_X8;
769 mci->csrows[i].ue_count = 0;
770 mci->csrows[i].ce_count = 0;
771 mci->csrows[i].mtype = MEM_RDDR2;
772 mci->csrows[i].edac_mode = EDAC_SECDED;
773 mci->csrows[i].mci = mci;
774 mci->csrows[i].nr_channels = 1;
775 mci->csrows[i].channels[0].chan_idx = 0;
776 mci->csrows[i].channels[0].ce_count = 0;
777 mci->csrows[i].channels[0].csrow = mci->csrows + i;
778 snprintf(mci->csrows[i].channels[0].label,
779 sizeof(mci->csrows[i].channels[0].label),
780 "DIMM%u", i5100_rank_to_slot(mci, cntlr, rank));
781
782 total_pages += npages;
783 }
784}
785
786static int __devinit i5100_init_one(struct pci_dev *pdev,
787 const struct pci_device_id *id)
788{
789 int rc;
790 struct mem_ctl_info *mci;
791 struct i5100_priv *priv;
792 struct pci_dev *ch0mm, *ch1mm;
793 int ret = 0;
794 u32 dw;
795 int ranksperch;
796
797 if (PCI_FUNC(pdev->devfn) != 1)
798 return -ENODEV;
799
800 rc = pci_enable_device(pdev);
801 if (rc < 0) {
802 ret = rc;
803 goto bail;
804 }
805
806 /* ECC enabled? */
807 pci_read_config_dword(pdev, I5100_MC, &dw);
808 if (!i5100_mc_errdeten(dw)) {
809 printk(KERN_INFO "i5100_edac: ECC not enabled.\n");
810 ret = -ENODEV;
811 goto bail_pdev;
812 }
813
814 /* figure out how many ranks, from strapped state of 48GB_Mode input */
815 pci_read_config_dword(pdev, I5100_MS, &dw);
816 ranksperch = !!(dw & (1 << 8)) * 2 + 4;
817
818 if (ranksperch != 4) {
819 /* FIXME: get 6 ranks / controller to work - need hw... */
820 printk(KERN_INFO "i5100_edac: unsupported configuration.\n");
821 ret = -ENODEV;
822 goto bail_pdev;
823 }
824
825 /* enable error reporting... */
826 pci_read_config_dword(pdev, I5100_EMASK_MEM, &dw);
827 dw &= ~I5100_FERR_NF_MEM_ANY_MASK;
828 pci_write_config_dword(pdev, I5100_EMASK_MEM, dw);
829
830 /* device 21, func 0, Channel 0 Memory Map, Error Flag/Mask, etc... */
831 ch0mm = pci_get_device_func(PCI_VENDOR_ID_INTEL,
832 PCI_DEVICE_ID_INTEL_5100_21, 0);
833 if (!ch0mm) {
834 ret = -ENODEV;
835 goto bail_pdev;
836 }
837
838 rc = pci_enable_device(ch0mm);
839 if (rc < 0) {
840 ret = rc;
841 goto bail_ch0;
842 }
843
844 /* device 22, func 0, Channel 1 Memory Map, Error Flag/Mask, etc... */
845 ch1mm = pci_get_device_func(PCI_VENDOR_ID_INTEL,
846 PCI_DEVICE_ID_INTEL_5100_22, 0);
847 if (!ch1mm) {
848 ret = -ENODEV;
849 goto bail_disable_ch0;
850 }
851
852 rc = pci_enable_device(ch1mm);
853 if (rc < 0) {
854 ret = rc;
855 goto bail_ch1;
856 }
857
858 mci = edac_mc_alloc(sizeof(*priv), ranksperch * 2, 1, 0);
859 if (!mci) {
860 ret = -ENOMEM;
861 goto bail_disable_ch1;
862 }
863
864 mci->dev = &pdev->dev;
865
866 priv = mci->pvt_info;
867 priv->ranksperctlr = ranksperch;
868 priv->mc = pdev;
869 priv->ch0mm = ch0mm;
870 priv->ch1mm = ch1mm;
871
872 i5100_init_dimm_layout(pdev, mci);
873 i5100_init_interleaving(pdev, mci);
874
875 mci->mtype_cap = MEM_FLAG_FB_DDR2;
876 mci->edac_ctl_cap = EDAC_FLAG_SECDED;
877 mci->edac_cap = EDAC_FLAG_SECDED;
878 mci->mod_name = "i5100_edac.c";
879 mci->mod_ver = "not versioned";
880 mci->ctl_name = "i5100";
881 mci->dev_name = pci_name(pdev);
882 mci->ctl_page_to_phys = NULL;
883
884 mci->edac_check = i5100_check_error;
885
886 i5100_init_csrows(mci);
887
888 /* this strange construction seems to be in every driver, dunno why */
889 switch (edac_op_state) {
890 case EDAC_OPSTATE_POLL:
891 case EDAC_OPSTATE_NMI:
892 break;
893 default:
894 edac_op_state = EDAC_OPSTATE_POLL;
895 break;
896 }
897
898 if (edac_mc_add_mc(mci)) {
899 ret = -ENODEV;
900 goto bail_mc;
901 }
902
903 return ret;
904
905bail_mc:
906 edac_mc_free(mci);
907
908bail_disable_ch1:
909 pci_disable_device(ch1mm);
910
911bail_ch1:
912 pci_dev_put(ch1mm);
913
914bail_disable_ch0:
915 pci_disable_device(ch0mm);
916
917bail_ch0:
918 pci_dev_put(ch0mm);
919
920bail_pdev:
921 pci_disable_device(pdev);
922
923bail:
924 return ret;
925}
926
927static void __devexit i5100_remove_one(struct pci_dev *pdev)
928{
929 struct mem_ctl_info *mci;
930 struct i5100_priv *priv;
931
932 mci = edac_mc_del_mc(&pdev->dev);
933
934 if (!mci)
935 return;
936
937 priv = mci->pvt_info;
938 pci_disable_device(pdev);
939 pci_disable_device(priv->ch0mm);
940 pci_disable_device(priv->ch1mm);
941 pci_dev_put(priv->ch0mm);
942 pci_dev_put(priv->ch1mm);
943
944 edac_mc_free(mci);
945}
946
947static const struct pci_device_id i5100_pci_tbl[] __devinitdata = {
948 /* Device 16, Function 0, Channel 0 Memory Map, Error Flag/Mask, ... */
949 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5100_16) },
950 { 0, }
951};
952MODULE_DEVICE_TABLE(pci, i5100_pci_tbl);
953
954static struct pci_driver i5100_driver = {
955 .name = KBUILD_BASENAME,
956 .probe = i5100_init_one,
957 .remove = __devexit_p(i5100_remove_one),
958 .id_table = i5100_pci_tbl,
959};
960
961static int __init i5100_init(void)
962{
963 int pci_rc;
964
965 pci_rc = pci_register_driver(&i5100_driver);
966
967 return (pci_rc < 0) ? pci_rc : 0;
968}
969
970static void __exit i5100_exit(void)
971{
972 pci_unregister_driver(&i5100_driver);
973}
974
975module_init(i5100_init);
976module_exit(i5100_exit);
977
978MODULE_LICENSE("GPL");
979MODULE_AUTHOR
980 ("Arthur Jones <ajones@riverbed.com>");
981MODULE_DESCRIPTION("MC Driver for Intel I5100 memory controllers");
diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c
index d49361bfe670..2265d9ca1535 100644
--- a/drivers/edac/mpc85xx_edac.c
+++ b/drivers/edac/mpc85xx_edac.c
@@ -195,14 +195,15 @@ static irqreturn_t mpc85xx_pci_isr(int irq, void *dev_id)
195 return IRQ_HANDLED; 195 return IRQ_HANDLED;
196} 196}
197 197
198static int __devinit mpc85xx_pci_err_probe(struct platform_device *pdev) 198static int __devinit mpc85xx_pci_err_probe(struct of_device *op,
199 const struct of_device_id *match)
199{ 200{
200 struct edac_pci_ctl_info *pci; 201 struct edac_pci_ctl_info *pci;
201 struct mpc85xx_pci_pdata *pdata; 202 struct mpc85xx_pci_pdata *pdata;
202 struct resource *r; 203 struct resource r;
203 int res = 0; 204 int res = 0;
204 205
205 if (!devres_open_group(&pdev->dev, mpc85xx_pci_err_probe, GFP_KERNEL)) 206 if (!devres_open_group(&op->dev, mpc85xx_pci_err_probe, GFP_KERNEL))
206 return -ENOMEM; 207 return -ENOMEM;
207 208
208 pci = edac_pci_alloc_ctl_info(sizeof(*pdata), "mpc85xx_pci_err"); 209 pci = edac_pci_alloc_ctl_info(sizeof(*pdata), "mpc85xx_pci_err");
@@ -212,34 +213,37 @@ static int __devinit mpc85xx_pci_err_probe(struct platform_device *pdev)
212 pdata = pci->pvt_info; 213 pdata = pci->pvt_info;
213 pdata->name = "mpc85xx_pci_err"; 214 pdata->name = "mpc85xx_pci_err";
214 pdata->irq = NO_IRQ; 215 pdata->irq = NO_IRQ;
215 platform_set_drvdata(pdev, pci); 216 dev_set_drvdata(&op->dev, pci);
216 pci->dev = &pdev->dev; 217 pci->dev = &op->dev;
217 pci->mod_name = EDAC_MOD_STR; 218 pci->mod_name = EDAC_MOD_STR;
218 pci->ctl_name = pdata->name; 219 pci->ctl_name = pdata->name;
219 pci->dev_name = pdev->dev.bus_id; 220 pci->dev_name = op->dev.bus_id;
220 221
221 if (edac_op_state == EDAC_OPSTATE_POLL) 222 if (edac_op_state == EDAC_OPSTATE_POLL)
222 pci->edac_check = mpc85xx_pci_check; 223 pci->edac_check = mpc85xx_pci_check;
223 224
224 pdata->edac_idx = edac_pci_idx++; 225 pdata->edac_idx = edac_pci_idx++;
225 226
226 r = platform_get_resource(pdev, IORESOURCE_MEM, 0); 227 res = of_address_to_resource(op->node, 0, &r);
227 if (!r) { 228 if (res) {
228 printk(KERN_ERR "%s: Unable to get resource for " 229 printk(KERN_ERR "%s: Unable to get resource for "
229 "PCI err regs\n", __func__); 230 "PCI err regs\n", __func__);
230 goto err; 231 goto err;
231 } 232 }
232 233
233 if (!devm_request_mem_region(&pdev->dev, r->start, 234 /* we only need the error registers */
234 r->end - r->start + 1, pdata->name)) { 235 r.start += 0xe00;
236
237 if (!devm_request_mem_region(&op->dev, r.start,
238 r.end - r.start + 1, pdata->name)) {
235 printk(KERN_ERR "%s: Error while requesting mem region\n", 239 printk(KERN_ERR "%s: Error while requesting mem region\n",
236 __func__); 240 __func__);
237 res = -EBUSY; 241 res = -EBUSY;
238 goto err; 242 goto err;
239 } 243 }
240 244
241 pdata->pci_vbase = devm_ioremap(&pdev->dev, r->start, 245 pdata->pci_vbase = devm_ioremap(&op->dev, r.start,
242 r->end - r->start + 1); 246 r.end - r.start + 1);
243 if (!pdata->pci_vbase) { 247 if (!pdata->pci_vbase) {
244 printk(KERN_ERR "%s: Unable to setup PCI err regs\n", __func__); 248 printk(KERN_ERR "%s: Unable to setup PCI err regs\n", __func__);
245 res = -ENOMEM; 249 res = -ENOMEM;
@@ -266,14 +270,15 @@ static int __devinit mpc85xx_pci_err_probe(struct platform_device *pdev)
266 } 270 }
267 271
268 if (edac_op_state == EDAC_OPSTATE_INT) { 272 if (edac_op_state == EDAC_OPSTATE_INT) {
269 pdata->irq = platform_get_irq(pdev, 0); 273 pdata->irq = irq_of_parse_and_map(op->node, 0);
270 res = devm_request_irq(&pdev->dev, pdata->irq, 274 res = devm_request_irq(&op->dev, pdata->irq,
271 mpc85xx_pci_isr, IRQF_DISABLED, 275 mpc85xx_pci_isr, IRQF_DISABLED,
272 "[EDAC] PCI err", pci); 276 "[EDAC] PCI err", pci);
273 if (res < 0) { 277 if (res < 0) {
274 printk(KERN_ERR 278 printk(KERN_ERR
275 "%s: Unable to requiest irq %d for " 279 "%s: Unable to requiest irq %d for "
276 "MPC85xx PCI err\n", __func__, pdata->irq); 280 "MPC85xx PCI err\n", __func__, pdata->irq);
281 irq_dispose_mapping(pdata->irq);
277 res = -ENODEV; 282 res = -ENODEV;
278 goto err2; 283 goto err2;
279 } 284 }
@@ -282,23 +287,23 @@ static int __devinit mpc85xx_pci_err_probe(struct platform_device *pdev)
282 pdata->irq); 287 pdata->irq);
283 } 288 }
284 289
285 devres_remove_group(&pdev->dev, mpc85xx_pci_err_probe); 290 devres_remove_group(&op->dev, mpc85xx_pci_err_probe);
286 debugf3("%s(): success\n", __func__); 291 debugf3("%s(): success\n", __func__);
287 printk(KERN_INFO EDAC_MOD_STR " PCI err registered\n"); 292 printk(KERN_INFO EDAC_MOD_STR " PCI err registered\n");
288 293
289 return 0; 294 return 0;
290 295
291err2: 296err2:
292 edac_pci_del_device(&pdev->dev); 297 edac_pci_del_device(&op->dev);
293err: 298err:
294 edac_pci_free_ctl_info(pci); 299 edac_pci_free_ctl_info(pci);
295 devres_release_group(&pdev->dev, mpc85xx_pci_err_probe); 300 devres_release_group(&op->dev, mpc85xx_pci_err_probe);
296 return res; 301 return res;
297} 302}
298 303
299static int mpc85xx_pci_err_remove(struct platform_device *pdev) 304static int mpc85xx_pci_err_remove(struct of_device *op)
300{ 305{
301 struct edac_pci_ctl_info *pci = platform_get_drvdata(pdev); 306 struct edac_pci_ctl_info *pci = dev_get_drvdata(&op->dev);
302 struct mpc85xx_pci_pdata *pdata = pci->pvt_info; 307 struct mpc85xx_pci_pdata *pdata = pci->pvt_info;
303 308
304 debugf0("%s()\n", __func__); 309 debugf0("%s()\n", __func__);
@@ -318,12 +323,26 @@ static int mpc85xx_pci_err_remove(struct platform_device *pdev)
318 return 0; 323 return 0;
319} 324}
320 325
321static struct platform_driver mpc85xx_pci_err_driver = { 326static struct of_device_id mpc85xx_pci_err_of_match[] = {
327 {
328 .compatible = "fsl,mpc8540-pcix",
329 },
330 {
331 .compatible = "fsl,mpc8540-pci",
332 },
333 {},
334};
335
336static struct of_platform_driver mpc85xx_pci_err_driver = {
337 .owner = THIS_MODULE,
338 .name = "mpc85xx_pci_err",
339 .match_table = mpc85xx_pci_err_of_match,
322 .probe = mpc85xx_pci_err_probe, 340 .probe = mpc85xx_pci_err_probe,
323 .remove = __devexit_p(mpc85xx_pci_err_remove), 341 .remove = __devexit_p(mpc85xx_pci_err_remove),
324 .driver = { 342 .driver = {
325 .name = "mpc85xx_pci_err", 343 .name = "mpc85xx_pci_err",
326 } 344 .owner = THIS_MODULE,
345 },
327}; 346};
328 347
329#endif /* CONFIG_PCI */ 348#endif /* CONFIG_PCI */
@@ -1002,7 +1021,7 @@ static int __init mpc85xx_mc_init(void)
1002 printk(KERN_WARNING EDAC_MOD_STR "L2 fails to register\n"); 1021 printk(KERN_WARNING EDAC_MOD_STR "L2 fails to register\n");
1003 1022
1004#ifdef CONFIG_PCI 1023#ifdef CONFIG_PCI
1005 res = platform_driver_register(&mpc85xx_pci_err_driver); 1024 res = of_register_platform_driver(&mpc85xx_pci_err_driver);
1006 if (res) 1025 if (res)
1007 printk(KERN_WARNING EDAC_MOD_STR "PCI fails to register\n"); 1026 printk(KERN_WARNING EDAC_MOD_STR "PCI fails to register\n");
1008#endif 1027#endif
@@ -1025,7 +1044,7 @@ static void __exit mpc85xx_mc_exit(void)
1025{ 1044{
1026 mtspr(SPRN_HID1, orig_hid1); 1045 mtspr(SPRN_HID1, orig_hid1);
1027#ifdef CONFIG_PCI 1046#ifdef CONFIG_PCI
1028 platform_driver_unregister(&mpc85xx_pci_err_driver); 1047 of_unregister_platform_driver(&mpc85xx_pci_err_driver);
1029#endif 1048#endif
1030 of_unregister_platform_driver(&mpc85xx_l2_err_driver); 1049 of_unregister_platform_driver(&mpc85xx_l2_err_driver);
1031 of_unregister_platform_driver(&mpc85xx_mc_err_driver); 1050 of_unregister_platform_driver(&mpc85xx_mc_err_driver);
diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c
index bf071f140a05..083ce8d0c63d 100644
--- a/drivers/edac/mv64x60_edac.c
+++ b/drivers/edac/mv64x60_edac.c
@@ -71,6 +71,35 @@ static irqreturn_t mv64x60_pci_isr(int irq, void *dev_id)
71 return IRQ_HANDLED; 71 return IRQ_HANDLED;
72} 72}
73 73
74/*
75 * Bit 0 of MV64x60_PCIx_ERR_MASK does not exist on the 64360 and because of
76 * errata FEr-#11 and FEr-##16 for the 64460, it should be 0 on that chip as
77 * well. IOW, don't set bit 0.
78 */
79
80/* Erratum FEr PCI-#16: clear bit 0 of PCI SERRn Mask reg. */
81static int __init mv64x60_pci_fixup(struct platform_device *pdev)
82{
83 struct resource *r;
84 void __iomem *pci_serr;
85
86 r = platform_get_resource(pdev, IORESOURCE_MEM, 1);
87 if (!r) {
88 printk(KERN_ERR "%s: Unable to get resource for "
89 "PCI err regs\n", __func__);
90 return -ENOENT;
91 }
92
93 pci_serr = ioremap(r->start, r->end - r->start + 1);
94 if (!pci_serr)
95 return -ENOMEM;
96
97 out_le32(pci_serr, in_le32(pci_serr) & ~0x1);
98 iounmap(pci_serr);
99
100 return 0;
101}
102
74static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev) 103static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev)
75{ 104{
76 struct edac_pci_ctl_info *pci; 105 struct edac_pci_ctl_info *pci;
@@ -128,6 +157,12 @@ static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev)
128 goto err; 157 goto err;
129 } 158 }
130 159
160 res = mv64x60_pci_fixup(pdev);
161 if (res < 0) {
162 printk(KERN_ERR "%s: PCI fixup failed\n", __func__);
163 goto err;
164 }
165
131 out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE, 0); 166 out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE, 0);
132 out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_MASK, 0); 167 out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_MASK, 0);
133 out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_MASK, 168 out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_MASK,
@@ -612,7 +647,7 @@ static void get_total_mem(struct mv64x60_mc_pdata *pdata)
612 if (!np) 647 if (!np)
613 return; 648 return;
614 649
615 reg = get_property(np, "reg", NULL); 650 reg = of_get_property(np, "reg", NULL);
616 651
617 pdata->total_mem = reg[1]; 652 pdata->total_mem = reg[1];
618} 653}
diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c
index 0b624e927a6f..c66817e7717b 100644
--- a/drivers/firmware/dcdbas.c
+++ b/drivers/firmware/dcdbas.c
@@ -152,20 +152,11 @@ static ssize_t smi_data_read(struct kobject *kobj,
152 struct bin_attribute *bin_attr, 152 struct bin_attribute *bin_attr,
153 char *buf, loff_t pos, size_t count) 153 char *buf, loff_t pos, size_t count)
154{ 154{
155 size_t max_read;
156 ssize_t ret; 155 ssize_t ret;
157 156
158 mutex_lock(&smi_data_lock); 157 mutex_lock(&smi_data_lock);
159 158 ret = memory_read_from_buffer(buf, count, &pos, smi_data_buf,
160 if (pos >= smi_data_buf_size) { 159 smi_data_buf_size);
161 ret = 0;
162 goto out;
163 }
164
165 max_read = smi_data_buf_size - pos;
166 ret = min(max_read, count);
167 memcpy(buf, smi_data_buf + pos, ret);
168out:
169 mutex_unlock(&smi_data_lock); 160 mutex_unlock(&smi_data_lock);
170 return ret; 161 return ret;
171} 162}
diff --git a/drivers/firmware/dell_rbu.c b/drivers/firmware/dell_rbu.c
index 7430e218cda6..13946ebd77d6 100644
--- a/drivers/firmware/dell_rbu.c
+++ b/drivers/firmware/dell_rbu.c
@@ -507,11 +507,6 @@ static ssize_t read_packet_data(char *buffer, loff_t pos, size_t count)
507 507
508static ssize_t read_rbu_mono_data(char *buffer, loff_t pos, size_t count) 508static ssize_t read_rbu_mono_data(char *buffer, loff_t pos, size_t count)
509{ 509{
510 unsigned char *ptemp = NULL;
511 size_t bytes_left = 0;
512 size_t data_length = 0;
513 ssize_t ret_count = 0;
514
515 /* check to see if we have something to return */ 510 /* check to see if we have something to return */
516 if ((rbu_data.image_update_buffer == NULL) || 511 if ((rbu_data.image_update_buffer == NULL) ||
517 (rbu_data.bios_image_size == 0)) { 512 (rbu_data.bios_image_size == 0)) {
@@ -519,28 +514,11 @@ static ssize_t read_rbu_mono_data(char *buffer, loff_t pos, size_t count)
519 "bios_image_size %lu\n", 514 "bios_image_size %lu\n",
520 rbu_data.image_update_buffer, 515 rbu_data.image_update_buffer,
521 rbu_data.bios_image_size); 516 rbu_data.bios_image_size);
522 ret_count = -ENOMEM; 517 return -ENOMEM;
523 goto read_rbu_data_exit;
524 }
525
526 if (pos > rbu_data.bios_image_size) {
527 ret_count = 0;
528 goto read_rbu_data_exit;
529 } 518 }
530 519
531 bytes_left = rbu_data.bios_image_size - pos; 520 return memory_read_from_buffer(buffer, count, &pos,
532 data_length = min(bytes_left, count); 521 rbu_data.image_update_buffer, rbu_data.bios_image_size);
533
534 ptemp = rbu_data.image_update_buffer;
535 memcpy(buffer, (ptemp + pos), data_length);
536
537 if ((pos + count) > rbu_data.bios_image_size)
538 /* this was the last copy */
539 ret_count = bytes_left;
540 else
541 ret_count = count;
542 read_rbu_data_exit:
543 return ret_count;
544} 522}
545 523
546static ssize_t read_rbu_data(struct kobject *kobj, 524static ssize_t read_rbu_data(struct kobject *kobj,
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index fced1909cbba..dbd42d6c93a7 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -2,15 +2,40 @@
2# GPIO infrastructure and expanders 2# GPIO infrastructure and expanders
3# 3#
4 4
5config HAVE_GPIO_LIB 5config ARCH_WANT_OPTIONAL_GPIOLIB
6 bool 6 bool
7 help 7 help
8 Select this config option from the architecture Kconfig, if
9 it is possible to use gpiolib on the architecture, but let the
10 user decide whether to actually build it or not.
11 Select this instead of ARCH_REQUIRE_GPIOLIB, if your architecture does
12 not depend on GPIOs being available, but rather let the user
13 decide whether he needs it or not.
14
15config ARCH_REQUIRE_GPIOLIB
16 bool
17 select GPIOLIB
18 help
8 Platforms select gpiolib if they use this infrastructure 19 Platforms select gpiolib if they use this infrastructure
9 for all their GPIOs, usually starting with ones integrated 20 for all their GPIOs, usually starting with ones integrated
10 into SOC processors. 21 into SOC processors.
22 Selecting this from the architecture code will cause the gpiolib
23 code to always get built in.
24
25
26
27menuconfig GPIOLIB
28 bool "GPIO Support"
29 depends on ARCH_WANT_OPTIONAL_GPIOLIB || ARCH_REQUIRE_GPIOLIB
30 select GENERIC_GPIO
31 help
32 This enables GPIO support through the generic GPIO library.
33 You only need to enable this, if you also want to enable
34 one or more of the GPIO expansion card drivers below.
11 35
12menu "GPIO Support" 36 If unsure, say N.
13 depends on HAVE_GPIO_LIB 37
38if GPIOLIB
14 39
15config DEBUG_GPIO 40config DEBUG_GPIO
16 bool "Debug GPIO calls" 41 bool "Debug GPIO calls"
@@ -23,10 +48,44 @@ config DEBUG_GPIO
23 slower. The diagnostics help catch the type of setup errors 48 slower. The diagnostics help catch the type of setup errors
24 that are most common when setting up new platforms or boards. 49 that are most common when setting up new platforms or boards.
25 50
51config GPIO_SYSFS
52 bool "/sys/class/gpio/... (sysfs interface)"
53 depends on SYSFS && EXPERIMENTAL
54 help
55 Say Y here to add a sysfs interface for GPIOs.
56
57 This is mostly useful to work around omissions in a system's
58 kernel support. Those are common in custom and semicustom
59 hardware assembled using standard kernels with a minimum of
60 custom patches. In those cases, userspace code may import
61 a given GPIO from the kernel, if no kernel driver requested it.
62
63 Kernel drivers may also request that a particular GPIO be
64 exported to userspace; this can be useful when debugging.
65
26# put expanders in the right section, in alphabetical order 66# put expanders in the right section, in alphabetical order
27 67
28comment "I2C GPIO expanders:" 68comment "I2C GPIO expanders:"
29 69
70config GPIO_MAX732X
71 tristate "MAX7319, MAX7320-7327 I2C Port Expanders"
72 depends on I2C
73 help
74 Say yes here to support the MAX7319, MAX7320-7327 series of I2C
75 Port Expanders. Each IO port on these chips has a fixed role of
76 Input (designated by 'I'), Push-Pull Output ('O'), or Open-Drain
77 Input and Output (designed by 'P'). The combinations are listed
78 below:
79
80 8 bits: max7319 (8I), max7320 (8O), max7321 (8P),
81 max7322 (4I4O), max7323 (4P4O)
82
83 16 bits: max7324 (8I8O), max7325 (8P8O),
84 max7326 (4I12O), max7327 (4P12O)
85
86 Board setup code must specify the model to use, and the start
87 number for these GPIOs.
88
30config GPIO_PCA953X 89config GPIO_PCA953X
31 tristate "PCA953x, PCA955x, and MAX7310 I/O ports" 90 tristate "PCA953x, PCA955x, and MAX7310 I/O ports"
32 depends on I2C 91 depends on I2C
@@ -68,6 +127,24 @@ config GPIO_PCF857X
68 This driver provides an in-kernel interface to those GPIOs using 127 This driver provides an in-kernel interface to those GPIOs using
69 platform-neutral GPIO calls. 128 platform-neutral GPIO calls.
70 129
130comment "PCI GPIO expanders:"
131
132config GPIO_BT8XX
133 tristate "BT8XX GPIO abuser"
134 depends on PCI && VIDEO_BT848=n
135 help
136 The BT8xx frame grabber chip has 24 GPIO pins than can be abused
137 as a cheap PCI GPIO card.
138
139 This chip can be found on Miro, Hauppauge and STB TV-cards.
140
141 The card needs to be physically altered for using it as a
142 GPIO card. For more information on how to build a GPIO card
143 from a BT8xx TV card, see the documentation file at
144 Documentation/bt8xxgpio.txt
145
146 If unsure, say N.
147
71comment "SPI GPIO expanders:" 148comment "SPI GPIO expanders:"
72 149
73config GPIO_MAX7301 150config GPIO_MAX7301
@@ -83,4 +160,4 @@ config GPIO_MCP23S08
83 SPI driver for Microchip MCP23S08 I/O expander. This provides 160 SPI driver for Microchip MCP23S08 I/O expander. This provides
84 a GPIO interface supporting inputs and outputs. 161 a GPIO interface supporting inputs and outputs.
85 162
86endmenu 163endif
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 16e796dc5410..01b4bbde1956 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -2,9 +2,11 @@
2 2
3ccflags-$(CONFIG_DEBUG_GPIO) += -DDEBUG 3ccflags-$(CONFIG_DEBUG_GPIO) += -DDEBUG
4 4
5obj-$(CONFIG_HAVE_GPIO_LIB) += gpiolib.o 5obj-$(CONFIG_GPIOLIB) += gpiolib.o
6 6
7obj-$(CONFIG_GPIO_MAX7301) += max7301.o 7obj-$(CONFIG_GPIO_MAX7301) += max7301.o
8obj-$(CONFIG_GPIO_MAX732X) += max732x.o
8obj-$(CONFIG_GPIO_MCP23S08) += mcp23s08.o 9obj-$(CONFIG_GPIO_MCP23S08) += mcp23s08.o
9obj-$(CONFIG_GPIO_PCA953X) += pca953x.o 10obj-$(CONFIG_GPIO_PCA953X) += pca953x.o
10obj-$(CONFIG_GPIO_PCF857X) += pcf857x.o 11obj-$(CONFIG_GPIO_PCF857X) += pcf857x.o
12obj-$(CONFIG_GPIO_BT8XX) += bt8xxgpio.o
diff --git a/drivers/gpio/bt8xxgpio.c b/drivers/gpio/bt8xxgpio.c
new file mode 100644
index 000000000000..7a1168249dd5
--- /dev/null
+++ b/drivers/gpio/bt8xxgpio.c
@@ -0,0 +1,348 @@
1/*
2
3 bt8xx GPIO abuser
4
5 Copyright (C) 2008 Michael Buesch <mb@bu3sch.de>
6
7 Please do _only_ contact the people listed _above_ with issues related to this driver.
8 All the other people listed below are not related to this driver. Their names
9 are only here, because this driver is derived from the bt848 driver.
10
11
12 Derived from the bt848 driver:
13
14 Copyright (C) 1996,97,98 Ralph Metzler
15 & Marcus Metzler
16 (c) 1999-2002 Gerd Knorr
17
18 some v4l2 code lines are taken from Justin's bttv2 driver which is
19 (c) 2000 Justin Schoeman
20
21 V4L1 removal from:
22 (c) 2005-2006 Nickolay V. Shmyrev
23
24 Fixes to be fully V4L2 compliant by
25 (c) 2006 Mauro Carvalho Chehab
26
27 Cropping and overscan support
28 Copyright (C) 2005, 2006 Michael H. Schimek
29 Sponsored by OPQ Systems AB
30
31 This program is free software; you can redistribute it and/or modify
32 it under the terms of the GNU General Public License as published by
33 the Free Software Foundation; either version 2 of the License, or
34 (at your option) any later version.
35
36 This program is distributed in the hope that it will be useful,
37 but WITHOUT ANY WARRANTY; without even the implied warranty of
38 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
39 GNU General Public License for more details.
40
41 You should have received a copy of the GNU General Public License
42 along with this program; if not, write to the Free Software
43 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
44*/
45
46#include <linux/module.h>
47#include <linux/pci.h>
48#include <linux/spinlock.h>
49
50#include <asm/gpio.h>
51
52/* Steal the hardware definitions from the bttv driver. */
53#include "../media/video/bt8xx/bt848.h"
54
55
56#define BT8XXGPIO_NR_GPIOS 24 /* We have 24 GPIO pins */
57
58
59struct bt8xxgpio {
60 spinlock_t lock;
61
62 void __iomem *mmio;
63 struct pci_dev *pdev;
64 struct gpio_chip gpio;
65
66#ifdef CONFIG_PM
67 u32 saved_outen;
68 u32 saved_data;
69#endif
70};
71
72#define bgwrite(dat, adr) writel((dat), bg->mmio+(adr))
73#define bgread(adr) readl(bg->mmio+(adr))
74
75
76static int modparam_gpiobase = -1/* dynamic */;
77module_param_named(gpiobase, modparam_gpiobase, int, 0444);
78MODULE_PARM_DESC(gpiobase, "The GPIO number base. -1 means dynamic, which is the default.");
79
80
81static int bt8xxgpio_gpio_direction_input(struct gpio_chip *gpio, unsigned nr)
82{
83 struct bt8xxgpio *bg = container_of(gpio, struct bt8xxgpio, gpio);
84 unsigned long flags;
85 u32 outen, data;
86
87 spin_lock_irqsave(&bg->lock, flags);
88
89 data = bgread(BT848_GPIO_DATA);
90 data &= ~(1 << nr);
91 bgwrite(data, BT848_GPIO_DATA);
92
93 outen = bgread(BT848_GPIO_OUT_EN);
94 outen &= ~(1 << nr);
95 bgwrite(outen, BT848_GPIO_OUT_EN);
96
97 spin_unlock_irqrestore(&bg->lock, flags);
98
99 return 0;
100}
101
102static int bt8xxgpio_gpio_get(struct gpio_chip *gpio, unsigned nr)
103{
104 struct bt8xxgpio *bg = container_of(gpio, struct bt8xxgpio, gpio);
105 unsigned long flags;
106 u32 val;
107
108 spin_lock_irqsave(&bg->lock, flags);
109 val = bgread(BT848_GPIO_DATA);
110 spin_unlock_irqrestore(&bg->lock, flags);
111
112 return !!(val & (1 << nr));
113}
114
115static int bt8xxgpio_gpio_direction_output(struct gpio_chip *gpio,
116 unsigned nr, int val)
117{
118 struct bt8xxgpio *bg = container_of(gpio, struct bt8xxgpio, gpio);
119 unsigned long flags;
120 u32 outen, data;
121
122 spin_lock_irqsave(&bg->lock, flags);
123
124 outen = bgread(BT848_GPIO_OUT_EN);
125 outen |= (1 << nr);
126 bgwrite(outen, BT848_GPIO_OUT_EN);
127
128 data = bgread(BT848_GPIO_DATA);
129 if (val)
130 data |= (1 << nr);
131 else
132 data &= ~(1 << nr);
133 bgwrite(data, BT848_GPIO_DATA);
134
135 spin_unlock_irqrestore(&bg->lock, flags);
136
137 return 0;
138}
139
140static void bt8xxgpio_gpio_set(struct gpio_chip *gpio,
141 unsigned nr, int val)
142{
143 struct bt8xxgpio *bg = container_of(gpio, struct bt8xxgpio, gpio);
144 unsigned long flags;
145 u32 data;
146
147 spin_lock_irqsave(&bg->lock, flags);
148
149 data = bgread(BT848_GPIO_DATA);
150 if (val)
151 data |= (1 << nr);
152 else
153 data &= ~(1 << nr);
154 bgwrite(data, BT848_GPIO_DATA);
155
156 spin_unlock_irqrestore(&bg->lock, flags);
157}
158
159static void bt8xxgpio_gpio_setup(struct bt8xxgpio *bg)
160{
161 struct gpio_chip *c = &bg->gpio;
162
163 c->label = bg->pdev->dev.bus_id;
164 c->owner = THIS_MODULE;
165 c->direction_input = bt8xxgpio_gpio_direction_input;
166 c->get = bt8xxgpio_gpio_get;
167 c->direction_output = bt8xxgpio_gpio_direction_output;
168 c->set = bt8xxgpio_gpio_set;
169 c->dbg_show = NULL;
170 c->base = modparam_gpiobase;
171 c->ngpio = BT8XXGPIO_NR_GPIOS;
172 c->can_sleep = 0;
173}
174
175static int bt8xxgpio_probe(struct pci_dev *dev,
176 const struct pci_device_id *pci_id)
177{
178 struct bt8xxgpio *bg;
179 int err;
180
181 bg = kzalloc(sizeof(*bg), GFP_KERNEL);
182 if (!bg)
183 return -ENOMEM;
184
185 bg->pdev = dev;
186 spin_lock_init(&bg->lock);
187
188 err = pci_enable_device(dev);
189 if (err) {
190 printk(KERN_ERR "bt8xxgpio: Can't enable device.\n");
191 goto err_freebg;
192 }
193 if (!request_mem_region(pci_resource_start(dev, 0),
194 pci_resource_len(dev, 0),
195 "bt8xxgpio")) {
196 printk(KERN_WARNING "bt8xxgpio: Can't request iomem (0x%llx).\n",
197 (unsigned long long)pci_resource_start(dev, 0));
198 err = -EBUSY;
199 goto err_disable;
200 }
201 pci_set_master(dev);
202 pci_set_drvdata(dev, bg);
203
204 bg->mmio = ioremap(pci_resource_start(dev, 0), 0x1000);
205 if (!bg->mmio) {
206 printk(KERN_ERR "bt8xxgpio: ioremap() failed\n");
207 err = -EIO;
208 goto err_release_mem;
209 }
210
211 /* Disable interrupts */
212 bgwrite(0, BT848_INT_MASK);
213
214 /* gpio init */
215 bgwrite(0, BT848_GPIO_DMA_CTL);
216 bgwrite(0, BT848_GPIO_REG_INP);
217 bgwrite(0, BT848_GPIO_OUT_EN);
218
219 bt8xxgpio_gpio_setup(bg);
220 err = gpiochip_add(&bg->gpio);
221 if (err) {
222 printk(KERN_ERR "bt8xxgpio: Failed to register GPIOs\n");
223 goto err_release_mem;
224 }
225
226 printk(KERN_INFO "bt8xxgpio: Abusing BT8xx card for GPIOs %d to %d\n",
227 bg->gpio.base, bg->gpio.base + BT8XXGPIO_NR_GPIOS - 1);
228
229 return 0;
230
231err_release_mem:
232 release_mem_region(pci_resource_start(dev, 0),
233 pci_resource_len(dev, 0));
234 pci_set_drvdata(dev, NULL);
235err_disable:
236 pci_disable_device(dev);
237err_freebg:
238 kfree(bg);
239
240 return err;
241}
242
243static void bt8xxgpio_remove(struct pci_dev *pdev)
244{
245 struct bt8xxgpio *bg = pci_get_drvdata(pdev);
246
247 gpiochip_remove(&bg->gpio);
248
249 bgwrite(0, BT848_INT_MASK);
250 bgwrite(~0x0, BT848_INT_STAT);
251 bgwrite(0x0, BT848_GPIO_OUT_EN);
252
253 iounmap(bg->mmio);
254 release_mem_region(pci_resource_start(pdev, 0),
255 pci_resource_len(pdev, 0));
256 pci_disable_device(pdev);
257
258 pci_set_drvdata(pdev, NULL);
259 kfree(bg);
260}
261
262#ifdef CONFIG_PM
263static int bt8xxgpio_suspend(struct pci_dev *pdev, pm_message_t state)
264{
265 struct bt8xxgpio *bg = pci_get_drvdata(pdev);
266 unsigned long flags;
267
268 spin_lock_irqsave(&bg->lock, flags);
269
270 bg->saved_outen = bgread(BT848_GPIO_OUT_EN);
271 bg->saved_data = bgread(BT848_GPIO_DATA);
272
273 bgwrite(0, BT848_INT_MASK);
274 bgwrite(~0x0, BT848_INT_STAT);
275 bgwrite(0x0, BT848_GPIO_OUT_EN);
276
277 spin_unlock_irqrestore(&bg->lock, flags);
278
279 pci_save_state(pdev);
280 pci_disable_device(pdev);
281 pci_set_power_state(pdev, pci_choose_state(pdev, state));
282
283 return 0;
284}
285
286static int bt8xxgpio_resume(struct pci_dev *pdev)
287{
288 struct bt8xxgpio *bg = pci_get_drvdata(pdev);
289 unsigned long flags;
290 int err;
291
292 pci_set_power_state(pdev, 0);
293 err = pci_enable_device(pdev);
294 if (err)
295 return err;
296 pci_restore_state(pdev);
297
298 spin_lock_irqsave(&bg->lock, flags);
299
300 bgwrite(0, BT848_INT_MASK);
301 bgwrite(0, BT848_GPIO_DMA_CTL);
302 bgwrite(0, BT848_GPIO_REG_INP);
303 bgwrite(bg->saved_outen, BT848_GPIO_OUT_EN);
304 bgwrite(bg->saved_data & bg->saved_outen,
305 BT848_GPIO_DATA);
306
307 spin_unlock_irqrestore(&bg->lock, flags);
308
309 return 0;
310}
311#else
312#define bt8xxgpio_suspend NULL
313#define bt8xxgpio_resume NULL
314#endif /* CONFIG_PM */
315
316static struct pci_device_id bt8xxgpio_pci_tbl[] = {
317 { PCI_DEVICE(PCI_VENDOR_ID_BROOKTREE, PCI_DEVICE_ID_BT848) },
318 { PCI_DEVICE(PCI_VENDOR_ID_BROOKTREE, PCI_DEVICE_ID_BT849) },
319 { PCI_DEVICE(PCI_VENDOR_ID_BROOKTREE, PCI_DEVICE_ID_BT878) },
320 { PCI_DEVICE(PCI_VENDOR_ID_BROOKTREE, PCI_DEVICE_ID_BT879) },
321 { 0, },
322};
323MODULE_DEVICE_TABLE(pci, bt8xxgpio_pci_tbl);
324
325static struct pci_driver bt8xxgpio_pci_driver = {
326 .name = "bt8xxgpio",
327 .id_table = bt8xxgpio_pci_tbl,
328 .probe = bt8xxgpio_probe,
329 .remove = bt8xxgpio_remove,
330 .suspend = bt8xxgpio_suspend,
331 .resume = bt8xxgpio_resume,
332};
333
334static int bt8xxgpio_init(void)
335{
336 return pci_register_driver(&bt8xxgpio_pci_driver);
337}
338module_init(bt8xxgpio_init)
339
340static void bt8xxgpio_exit(void)
341{
342 pci_unregister_driver(&bt8xxgpio_pci_driver);
343}
344module_exit(bt8xxgpio_exit)
345
346MODULE_LICENSE("GPL");
347MODULE_AUTHOR("Michael Buesch");
348MODULE_DESCRIPTION("Abuse a BT8xx framegrabber card as generic GPIO card");
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index beaf6b3a37dc..8d2940517c99 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -2,8 +2,11 @@
2#include <linux/module.h> 2#include <linux/module.h>
3#include <linux/irq.h> 3#include <linux/irq.h>
4#include <linux/spinlock.h> 4#include <linux/spinlock.h>
5 5#include <linux/device.h>
6#include <asm/gpio.h> 6#include <linux/err.h>
7#include <linux/debugfs.h>
8#include <linux/seq_file.h>
9#include <linux/gpio.h>
7 10
8 11
9/* Optional implementation infrastructure for GPIO interfaces. 12/* Optional implementation infrastructure for GPIO interfaces.
@@ -44,6 +47,8 @@ struct gpio_desc {
44#define FLAG_REQUESTED 0 47#define FLAG_REQUESTED 0
45#define FLAG_IS_OUT 1 48#define FLAG_IS_OUT 1
46#define FLAG_RESERVED 2 49#define FLAG_RESERVED 2
50#define FLAG_EXPORT 3 /* protected by sysfs_lock */
51#define FLAG_SYSFS 4 /* exported via /sys/class/gpio/control */
47 52
48#ifdef CONFIG_DEBUG_FS 53#ifdef CONFIG_DEBUG_FS
49 const char *label; 54 const char *label;
@@ -151,6 +156,482 @@ err:
151 return ret; 156 return ret;
152} 157}
153 158
159#ifdef CONFIG_GPIO_SYSFS
160
161/* lock protects against unexport_gpio() being called while
162 * sysfs files are active.
163 */
164static DEFINE_MUTEX(sysfs_lock);
165
166/*
167 * /sys/class/gpio/gpioN... only for GPIOs that are exported
168 * /direction
169 * * MAY BE OMITTED if kernel won't allow direction changes
170 * * is read/write as "in" or "out"
171 * * may also be written as "high" or "low", initializing
172 * output value as specified ("out" implies "low")
173 * /value
174 * * always readable, subject to hardware behavior
175 * * may be writable, as zero/nonzero
176 *
177 * REVISIT there will likely be an attribute for configuring async
178 * notifications, e.g. to specify polling interval or IRQ trigger type
179 * that would for example trigger a poll() on the "value".
180 */
181
182static ssize_t gpio_direction_show(struct device *dev,
183 struct device_attribute *attr, char *buf)
184{
185 const struct gpio_desc *desc = dev_get_drvdata(dev);
186 ssize_t status;
187
188 mutex_lock(&sysfs_lock);
189
190 if (!test_bit(FLAG_EXPORT, &desc->flags))
191 status = -EIO;
192 else
193 status = sprintf(buf, "%s\n",
194 test_bit(FLAG_IS_OUT, &desc->flags)
195 ? "out" : "in");
196
197 mutex_unlock(&sysfs_lock);
198 return status;
199}
200
201static ssize_t gpio_direction_store(struct device *dev,
202 struct device_attribute *attr, const char *buf, size_t size)
203{
204 const struct gpio_desc *desc = dev_get_drvdata(dev);
205 unsigned gpio = desc - gpio_desc;
206 ssize_t status;
207
208 mutex_lock(&sysfs_lock);
209
210 if (!test_bit(FLAG_EXPORT, &desc->flags))
211 status = -EIO;
212 else if (sysfs_streq(buf, "high"))
213 status = gpio_direction_output(gpio, 1);
214 else if (sysfs_streq(buf, "out") || sysfs_streq(buf, "low"))
215 status = gpio_direction_output(gpio, 0);
216 else if (sysfs_streq(buf, "in"))
217 status = gpio_direction_input(gpio);
218 else
219 status = -EINVAL;
220
221 mutex_unlock(&sysfs_lock);
222 return status ? : size;
223}
224
225static const DEVICE_ATTR(direction, 0644,
226 gpio_direction_show, gpio_direction_store);
227
228static ssize_t gpio_value_show(struct device *dev,
229 struct device_attribute *attr, char *buf)
230{
231 const struct gpio_desc *desc = dev_get_drvdata(dev);
232 unsigned gpio = desc - gpio_desc;
233 ssize_t status;
234
235 mutex_lock(&sysfs_lock);
236
237 if (!test_bit(FLAG_EXPORT, &desc->flags))
238 status = -EIO;
239 else
240 status = sprintf(buf, "%d\n", gpio_get_value_cansleep(gpio));
241
242 mutex_unlock(&sysfs_lock);
243 return status;
244}
245
246static ssize_t gpio_value_store(struct device *dev,
247 struct device_attribute *attr, const char *buf, size_t size)
248{
249 const struct gpio_desc *desc = dev_get_drvdata(dev);
250 unsigned gpio = desc - gpio_desc;
251 ssize_t status;
252
253 mutex_lock(&sysfs_lock);
254
255 if (!test_bit(FLAG_EXPORT, &desc->flags))
256 status = -EIO;
257 else if (!test_bit(FLAG_IS_OUT, &desc->flags))
258 status = -EPERM;
259 else {
260 long value;
261
262 status = strict_strtol(buf, 0, &value);
263 if (status == 0) {
264 gpio_set_value_cansleep(gpio, value != 0);
265 status = size;
266 }
267 }
268
269 mutex_unlock(&sysfs_lock);
270 return status;
271}
272
273static /*const*/ DEVICE_ATTR(value, 0644,
274 gpio_value_show, gpio_value_store);
275
276static const struct attribute *gpio_attrs[] = {
277 &dev_attr_direction.attr,
278 &dev_attr_value.attr,
279 NULL,
280};
281
282static const struct attribute_group gpio_attr_group = {
283 .attrs = (struct attribute **) gpio_attrs,
284};
285
286/*
287 * /sys/class/gpio/gpiochipN/
288 * /base ... matching gpio_chip.base (N)
289 * /label ... matching gpio_chip.label
290 * /ngpio ... matching gpio_chip.ngpio
291 */
292
293static ssize_t chip_base_show(struct device *dev,
294 struct device_attribute *attr, char *buf)
295{
296 const struct gpio_chip *chip = dev_get_drvdata(dev);
297
298 return sprintf(buf, "%d\n", chip->base);
299}
300static DEVICE_ATTR(base, 0444, chip_base_show, NULL);
301
302static ssize_t chip_label_show(struct device *dev,
303 struct device_attribute *attr, char *buf)
304{
305 const struct gpio_chip *chip = dev_get_drvdata(dev);
306
307 return sprintf(buf, "%s\n", chip->label ? : "");
308}
309static DEVICE_ATTR(label, 0444, chip_label_show, NULL);
310
311static ssize_t chip_ngpio_show(struct device *dev,
312 struct device_attribute *attr, char *buf)
313{
314 const struct gpio_chip *chip = dev_get_drvdata(dev);
315
316 return sprintf(buf, "%u\n", chip->ngpio);
317}
318static DEVICE_ATTR(ngpio, 0444, chip_ngpio_show, NULL);
319
320static const struct attribute *gpiochip_attrs[] = {
321 &dev_attr_base.attr,
322 &dev_attr_label.attr,
323 &dev_attr_ngpio.attr,
324 NULL,
325};
326
327static const struct attribute_group gpiochip_attr_group = {
328 .attrs = (struct attribute **) gpiochip_attrs,
329};
330
331/*
332 * /sys/class/gpio/export ... write-only
333 * integer N ... number of GPIO to export (full access)
334 * /sys/class/gpio/unexport ... write-only
335 * integer N ... number of GPIO to unexport
336 */
337static ssize_t export_store(struct class *class, const char *buf, size_t len)
338{
339 long gpio;
340 int status;
341
342 status = strict_strtol(buf, 0, &gpio);
343 if (status < 0)
344 goto done;
345
346 /* No extra locking here; FLAG_SYSFS just signifies that the
347 * request and export were done by on behalf of userspace, so
348 * they may be undone on its behalf too.
349 */
350
351 status = gpio_request(gpio, "sysfs");
352 if (status < 0)
353 goto done;
354
355 status = gpio_export(gpio, true);
356 if (status < 0)
357 gpio_free(gpio);
358 else
359 set_bit(FLAG_SYSFS, &gpio_desc[gpio].flags);
360
361done:
362 if (status)
363 pr_debug("%s: status %d\n", __func__, status);
364 return status ? : len;
365}
366
367static ssize_t unexport_store(struct class *class, const char *buf, size_t len)
368{
369 long gpio;
370 int status;
371
372 status = strict_strtol(buf, 0, &gpio);
373 if (status < 0)
374 goto done;
375
376 status = -EINVAL;
377
378 /* reject bogus commands (gpio_unexport ignores them) */
379 if (!gpio_is_valid(gpio))
380 goto done;
381
382 /* No extra locking here; FLAG_SYSFS just signifies that the
383 * request and export were done by on behalf of userspace, so
384 * they may be undone on its behalf too.
385 */
386 if (test_and_clear_bit(FLAG_SYSFS, &gpio_desc[gpio].flags)) {
387 status = 0;
388 gpio_free(gpio);
389 }
390done:
391 if (status)
392 pr_debug("%s: status %d\n", __func__, status);
393 return status ? : len;
394}
395
396static struct class_attribute gpio_class_attrs[] = {
397 __ATTR(export, 0200, NULL, export_store),
398 __ATTR(unexport, 0200, NULL, unexport_store),
399 __ATTR_NULL,
400};
401
402static struct class gpio_class = {
403 .name = "gpio",
404 .owner = THIS_MODULE,
405
406 .class_attrs = gpio_class_attrs,
407};
408
409
410/**
411 * gpio_export - export a GPIO through sysfs
412 * @gpio: gpio to make available, already requested
413 * @direction_may_change: true if userspace may change gpio direction
414 * Context: arch_initcall or later
415 *
416 * When drivers want to make a GPIO accessible to userspace after they
417 * have requested it -- perhaps while debugging, or as part of their
418 * public interface -- they may use this routine. If the GPIO can
419 * change direction (some can't) and the caller allows it, userspace
420 * will see "direction" sysfs attribute which may be used to change
421 * the gpio's direction. A "value" attribute will always be provided.
422 *
423 * Returns zero on success, else an error.
424 */
425int gpio_export(unsigned gpio, bool direction_may_change)
426{
427 unsigned long flags;
428 struct gpio_desc *desc;
429 int status = -EINVAL;
430
431 /* can't export until sysfs is available ... */
432 if (!gpio_class.p) {
433 pr_debug("%s: called too early!\n", __func__);
434 return -ENOENT;
435 }
436
437 if (!gpio_is_valid(gpio))
438 goto done;
439
440 mutex_lock(&sysfs_lock);
441
442 spin_lock_irqsave(&gpio_lock, flags);
443 desc = &gpio_desc[gpio];
444 if (test_bit(FLAG_REQUESTED, &desc->flags)
445 && !test_bit(FLAG_EXPORT, &desc->flags)) {
446 status = 0;
447 if (!desc->chip->direction_input
448 || !desc->chip->direction_output)
449 direction_may_change = false;
450 }
451 spin_unlock_irqrestore(&gpio_lock, flags);
452
453 if (status == 0) {
454 struct device *dev;
455
456 dev = device_create(&gpio_class, desc->chip->dev, MKDEV(0, 0),
457 desc, "gpio%d", gpio);
458 if (dev) {
459 if (direction_may_change)
460 status = sysfs_create_group(&dev->kobj,
461 &gpio_attr_group);
462 else
463 status = device_create_file(dev,
464 &dev_attr_value);
465 if (status != 0)
466 device_unregister(dev);
467 } else
468 status = -ENODEV;
469 if (status == 0)
470 set_bit(FLAG_EXPORT, &desc->flags);
471 }
472
473 mutex_unlock(&sysfs_lock);
474
475done:
476 if (status)
477 pr_debug("%s: gpio%d status %d\n", __func__, gpio, status);
478
479 return status;
480}
481EXPORT_SYMBOL_GPL(gpio_export);
482
483static int match_export(struct device *dev, void *data)
484{
485 return dev_get_drvdata(dev) == data;
486}
487
488/**
489 * gpio_unexport - reverse effect of gpio_export()
490 * @gpio: gpio to make unavailable
491 *
492 * This is implicit on gpio_free().
493 */
494void gpio_unexport(unsigned gpio)
495{
496 struct gpio_desc *desc;
497 int status = -EINVAL;
498
499 if (!gpio_is_valid(gpio))
500 goto done;
501
502 mutex_lock(&sysfs_lock);
503
504 desc = &gpio_desc[gpio];
505 if (test_bit(FLAG_EXPORT, &desc->flags)) {
506 struct device *dev = NULL;
507
508 dev = class_find_device(&gpio_class, NULL, desc, match_export);
509 if (dev) {
510 clear_bit(FLAG_EXPORT, &desc->flags);
511 put_device(dev);
512 device_unregister(dev);
513 status = 0;
514 } else
515 status = -ENODEV;
516 }
517
518 mutex_unlock(&sysfs_lock);
519done:
520 if (status)
521 pr_debug("%s: gpio%d status %d\n", __func__, gpio, status);
522}
523EXPORT_SYMBOL_GPL(gpio_unexport);
524
525static int gpiochip_export(struct gpio_chip *chip)
526{
527 int status;
528 struct device *dev;
529
530 /* Many systems register gpio chips for SOC support very early,
531 * before driver model support is available. In those cases we
532 * export this later, in gpiolib_sysfs_init() ... here we just
533 * verify that _some_ field of gpio_class got initialized.
534 */
535 if (!gpio_class.p)
536 return 0;
537
538 /* use chip->base for the ID; it's already known to be unique */
539 mutex_lock(&sysfs_lock);
540 dev = device_create(&gpio_class, chip->dev, MKDEV(0, 0), chip,
541 "gpiochip%d", chip->base);
542 if (dev) {
543 status = sysfs_create_group(&dev->kobj,
544 &gpiochip_attr_group);
545 } else
546 status = -ENODEV;
547 chip->exported = (status == 0);
548 mutex_unlock(&sysfs_lock);
549
550 if (status) {
551 unsigned long flags;
552 unsigned gpio;
553
554 spin_lock_irqsave(&gpio_lock, flags);
555 gpio = chip->base;
556 while (gpio_desc[gpio].chip == chip)
557 gpio_desc[gpio++].chip = NULL;
558 spin_unlock_irqrestore(&gpio_lock, flags);
559
560 pr_debug("%s: chip %s status %d\n", __func__,
561 chip->label, status);
562 }
563
564 return status;
565}
566
567static void gpiochip_unexport(struct gpio_chip *chip)
568{
569 int status;
570 struct device *dev;
571
572 mutex_lock(&sysfs_lock);
573 dev = class_find_device(&gpio_class, NULL, chip, match_export);
574 if (dev) {
575 put_device(dev);
576 device_unregister(dev);
577 chip->exported = 0;
578 status = 0;
579 } else
580 status = -ENODEV;
581 mutex_unlock(&sysfs_lock);
582
583 if (status)
584 pr_debug("%s: chip %s status %d\n", __func__,
585 chip->label, status);
586}
587
588static int __init gpiolib_sysfs_init(void)
589{
590 int status;
591 unsigned long flags;
592 unsigned gpio;
593
594 status = class_register(&gpio_class);
595 if (status < 0)
596 return status;
597
598 /* Scan and register the gpio_chips which registered very
599 * early (e.g. before the class_register above was called).
600 *
601 * We run before arch_initcall() so chip->dev nodes can have
602 * registered, and so arch_initcall() can always gpio_export().
603 */
604 spin_lock_irqsave(&gpio_lock, flags);
605 for (gpio = 0; gpio < ARCH_NR_GPIOS; gpio++) {
606 struct gpio_chip *chip;
607
608 chip = gpio_desc[gpio].chip;
609 if (!chip || chip->exported)
610 continue;
611
612 spin_unlock_irqrestore(&gpio_lock, flags);
613 status = gpiochip_export(chip);
614 spin_lock_irqsave(&gpio_lock, flags);
615 }
616 spin_unlock_irqrestore(&gpio_lock, flags);
617
618
619 return status;
620}
621postcore_initcall(gpiolib_sysfs_init);
622
623#else
624static inline int gpiochip_export(struct gpio_chip *chip)
625{
626 return 0;
627}
628
629static inline void gpiochip_unexport(struct gpio_chip *chip)
630{
631}
632
633#endif /* CONFIG_GPIO_SYSFS */
634
154/** 635/**
155 * gpiochip_add() - register a gpio_chip 636 * gpiochip_add() - register a gpio_chip
156 * @chip: the chip to register, with chip->base initialized 637 * @chip: the chip to register, with chip->base initialized
@@ -160,6 +641,11 @@ err:
160 * because the chip->base is invalid or already associated with a 641 * because the chip->base is invalid or already associated with a
161 * different chip. Otherwise it returns zero as a success code. 642 * different chip. Otherwise it returns zero as a success code.
162 * 643 *
644 * When gpiochip_add() is called very early during boot, so that GPIOs
645 * can be freely used, the chip->dev device must be registered before
646 * the gpio framework's arch_initcall(). Otherwise sysfs initialization
647 * for GPIOs will fail rudely.
648 *
163 * If chip->base is negative, this requests dynamic assignment of 649 * If chip->base is negative, this requests dynamic assignment of
164 * a range of valid GPIOs. 650 * a range of valid GPIOs.
165 */ 651 */
@@ -182,7 +668,7 @@ int gpiochip_add(struct gpio_chip *chip)
182 base = gpiochip_find_base(chip->ngpio); 668 base = gpiochip_find_base(chip->ngpio);
183 if (base < 0) { 669 if (base < 0) {
184 status = base; 670 status = base;
185 goto fail_unlock; 671 goto unlock;
186 } 672 }
187 chip->base = base; 673 chip->base = base;
188 } 674 }
@@ -197,12 +683,23 @@ int gpiochip_add(struct gpio_chip *chip)
197 if (status == 0) { 683 if (status == 0) {
198 for (id = base; id < base + chip->ngpio; id++) { 684 for (id = base; id < base + chip->ngpio; id++) {
199 gpio_desc[id].chip = chip; 685 gpio_desc[id].chip = chip;
200 gpio_desc[id].flags = 0; 686
687 /* REVISIT: most hardware initializes GPIOs as
688 * inputs (often with pullups enabled) so power
689 * usage is minimized. Linux code should set the
690 * gpio direction first thing; but until it does,
691 * we may expose the wrong direction in sysfs.
692 */
693 gpio_desc[id].flags = !chip->direction_input
694 ? (1 << FLAG_IS_OUT)
695 : 0;
201 } 696 }
202 } 697 }
203 698
204fail_unlock: 699unlock:
205 spin_unlock_irqrestore(&gpio_lock, flags); 700 spin_unlock_irqrestore(&gpio_lock, flags);
701 if (status == 0)
702 status = gpiochip_export(chip);
206fail: 703fail:
207 /* failures here can mean systems won't boot... */ 704 /* failures here can mean systems won't boot... */
208 if (status) 705 if (status)
@@ -239,6 +736,10 @@ int gpiochip_remove(struct gpio_chip *chip)
239 } 736 }
240 737
241 spin_unlock_irqrestore(&gpio_lock, flags); 738 spin_unlock_irqrestore(&gpio_lock, flags);
739
740 if (status == 0)
741 gpiochip_unexport(chip);
742
242 return status; 743 return status;
243} 744}
244EXPORT_SYMBOL_GPL(gpiochip_remove); 745EXPORT_SYMBOL_GPL(gpiochip_remove);
@@ -296,6 +797,8 @@ void gpio_free(unsigned gpio)
296 return; 797 return;
297 } 798 }
298 799
800 gpio_unexport(gpio);
801
299 spin_lock_irqsave(&gpio_lock, flags); 802 spin_lock_irqsave(&gpio_lock, flags);
300 803
301 desc = &gpio_desc[gpio]; 804 desc = &gpio_desc[gpio];
@@ -534,10 +1037,6 @@ EXPORT_SYMBOL_GPL(gpio_set_value_cansleep);
534 1037
535#ifdef CONFIG_DEBUG_FS 1038#ifdef CONFIG_DEBUG_FS
536 1039
537#include <linux/debugfs.h>
538#include <linux/seq_file.h>
539
540
541static void gpiolib_dbg_show(struct seq_file *s, struct gpio_chip *chip) 1040static void gpiolib_dbg_show(struct seq_file *s, struct gpio_chip *chip)
542{ 1041{
543 unsigned i; 1042 unsigned i;
@@ -614,17 +1113,28 @@ static int gpiolib_show(struct seq_file *s, void *unused)
614 /* REVISIT this isn't locked against gpio_chip removal ... */ 1113 /* REVISIT this isn't locked against gpio_chip removal ... */
615 1114
616 for (gpio = 0; gpio_is_valid(gpio); gpio++) { 1115 for (gpio = 0; gpio_is_valid(gpio); gpio++) {
1116 struct device *dev;
1117
617 if (chip == gpio_desc[gpio].chip) 1118 if (chip == gpio_desc[gpio].chip)
618 continue; 1119 continue;
619 chip = gpio_desc[gpio].chip; 1120 chip = gpio_desc[gpio].chip;
620 if (!chip) 1121 if (!chip)
621 continue; 1122 continue;
622 1123
623 seq_printf(s, "%sGPIOs %d-%d, %s%s:\n", 1124 seq_printf(s, "%sGPIOs %d-%d",
624 started ? "\n" : "", 1125 started ? "\n" : "",
625 chip->base, chip->base + chip->ngpio - 1, 1126 chip->base, chip->base + chip->ngpio - 1);
626 chip->label ? : "generic", 1127 dev = chip->dev;
627 chip->can_sleep ? ", can sleep" : ""); 1128 if (dev)
1129 seq_printf(s, ", %s/%s",
1130 dev->bus ? dev->bus->name : "no-bus",
1131 dev->bus_id);
1132 if (chip->label)
1133 seq_printf(s, ", %s", chip->label);
1134 if (chip->can_sleep)
1135 seq_printf(s, ", can sleep");
1136 seq_printf(s, ":\n");
1137
628 started = 1; 1138 started = 1;
629 if (chip->dbg_show) 1139 if (chip->dbg_show)
630 chip->dbg_show(s, chip); 1140 chip->dbg_show(s, chip);
diff --git a/drivers/gpio/max732x.c b/drivers/gpio/max732x.c
new file mode 100644
index 000000000000..b51c8135ca28
--- /dev/null
+++ b/drivers/gpio/max732x.c
@@ -0,0 +1,385 @@
1/*
2 * max732x.c - I2C Port Expander with 8/16 I/O
3 *
4 * Copyright (C) 2007 Marvell International Ltd.
5 * Copyright (C) 2008 Jack Ren <jack.ren@marvell.com>
6 * Copyright (C) 2008 Eric Miao <eric.miao@marvell.com>
7 *
8 * Derived from drivers/gpio/pca953x.c
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; version 2 of the License.
13 */
14
15#include <linux/module.h>
16#include <linux/init.h>
17#include <linux/slab.h>
18#include <linux/string.h>
19#include <linux/gpio.h>
20
21#include <linux/i2c.h>
22#include <linux/i2c/max732x.h>
23
24
25/*
26 * Each port of MAX732x (including MAX7319) falls into one of the
27 * following three types:
28 *
29 * - Push Pull Output
30 * - Input
31 * - Open Drain I/O
32 *
33 * designated by 'O', 'I' and 'P' individually according to MAXIM's
34 * datasheets.
35 *
36 * There are two groups of I/O ports, each group usually includes
37 * up to 8 I/O ports, and is accessed by a specific I2C address:
38 *
39 * - Group A : by I2C address 0b'110xxxx
40 * - Group B : by I2C address 0b'101xxxx
41 *
42 * where 'xxxx' is decided by the connections of pin AD2/AD0. The
43 * address used also affects the initial state of output signals.
44 *
45 * Within each group of ports, there are five known combinations of
46 * I/O ports: 4I4O, 4P4O, 8I, 8P, 8O, see the definitions below for
47 * the detailed organization of these ports.
48 *
49 * GPIO numbers start from 'gpio_base + 0' to 'gpio_base + 8/16',
50 * and GPIOs from GROUP_A are numbered before those from GROUP_B
51 * (if there are two groups).
52 *
53 * NOTE: MAX7328/MAX7329 are drop-in replacements for PCF8574/a, so
54 * they are not supported by this driver.
55 */
56
57#define PORT_NONE 0x0 /* '/' No Port */
58#define PORT_OUTPUT 0x1 /* 'O' Push-Pull, Output Only */
59#define PORT_INPUT 0x2 /* 'I' Input Only */
60#define PORT_OPENDRAIN 0x3 /* 'P' Open-Drain, I/O */
61
62#define IO_4I4O 0x5AA5 /* O7 O6 I5 I4 I3 I2 O1 O0 */
63#define IO_4P4O 0x5FF5 /* O7 O6 P5 P4 P3 P2 O1 O0 */
64#define IO_8I 0xAAAA /* I7 I6 I5 I4 I3 I2 I1 I0 */
65#define IO_8P 0xFFFF /* P7 P6 P5 P4 P3 P2 P1 P0 */
66#define IO_8O 0x5555 /* O7 O6 O5 O4 O3 O2 O1 O0 */
67
68#define GROUP_A(x) ((x) & 0xffff) /* I2C Addr: 0b'110xxxx */
69#define GROUP_B(x) ((x) << 16) /* I2C Addr: 0b'101xxxx */
70
71static const struct i2c_device_id max732x_id[] = {
72 { "max7319", GROUP_A(IO_8I) },
73 { "max7320", GROUP_B(IO_8O) },
74 { "max7321", GROUP_A(IO_8P) },
75 { "max7322", GROUP_A(IO_4I4O) },
76 { "max7323", GROUP_A(IO_4P4O) },
77 { "max7324", GROUP_A(IO_8I) | GROUP_B(IO_8O) },
78 { "max7325", GROUP_A(IO_8P) | GROUP_B(IO_8O) },
79 { "max7326", GROUP_A(IO_4I4O) | GROUP_B(IO_8O) },
80 { "max7327", GROUP_A(IO_4P4O) | GROUP_B(IO_8O) },
81 { },
82};
83MODULE_DEVICE_TABLE(i2c, max732x_id);
84
85struct max732x_chip {
86 struct gpio_chip gpio_chip;
87
88 struct i2c_client *client; /* "main" client */
89 struct i2c_client *client_dummy;
90 struct i2c_client *client_group_a;
91 struct i2c_client *client_group_b;
92
93 unsigned int mask_group_a;
94 unsigned int dir_input;
95 unsigned int dir_output;
96
97 struct mutex lock;
98 uint8_t reg_out[2];
99};
100
101static int max732x_write(struct max732x_chip *chip, int group_a, uint8_t val)
102{
103 struct i2c_client *client;
104 int ret;
105
106 client = group_a ? chip->client_group_a : chip->client_group_b;
107 ret = i2c_smbus_write_byte(client, val);
108 if (ret < 0) {
109 dev_err(&client->dev, "failed writing\n");
110 return ret;
111 }
112
113 return 0;
114}
115
116static int max732x_read(struct max732x_chip *chip, int group_a, uint8_t *val)
117{
118 struct i2c_client *client;
119 int ret;
120
121 client = group_a ? chip->client_group_a : chip->client_group_b;
122 ret = i2c_smbus_read_byte(client);
123 if (ret < 0) {
124 dev_err(&client->dev, "failed reading\n");
125 return ret;
126 }
127
128 *val = (uint8_t)ret;
129 return 0;
130}
131
132static inline int is_group_a(struct max732x_chip *chip, unsigned off)
133{
134 return (1u << off) & chip->mask_group_a;
135}
136
137static int max732x_gpio_get_value(struct gpio_chip *gc, unsigned off)
138{
139 struct max732x_chip *chip;
140 uint8_t reg_val;
141 int ret;
142
143 chip = container_of(gc, struct max732x_chip, gpio_chip);
144
145 ret = max732x_read(chip, is_group_a(chip, off), &reg_val);
146 if (ret < 0)
147 return 0;
148
149 return reg_val & (1u << (off & 0x7));
150}
151
152static void max732x_gpio_set_value(struct gpio_chip *gc, unsigned off, int val)
153{
154 struct max732x_chip *chip;
155 uint8_t reg_out, mask = 1u << (off & 0x7);
156 int ret;
157
158 chip = container_of(gc, struct max732x_chip, gpio_chip);
159
160 mutex_lock(&chip->lock);
161
162 reg_out = (off > 7) ? chip->reg_out[1] : chip->reg_out[0];
163 reg_out = (val) ? reg_out | mask : reg_out & ~mask;
164
165 ret = max732x_write(chip, is_group_a(chip, off), reg_out);
166 if (ret < 0)
167 goto out;
168
169 /* update the shadow register then */
170 if (off > 7)
171 chip->reg_out[1] = reg_out;
172 else
173 chip->reg_out[0] = reg_out;
174out:
175 mutex_unlock(&chip->lock);
176}
177
178static int max732x_gpio_direction_input(struct gpio_chip *gc, unsigned off)
179{
180 struct max732x_chip *chip;
181 unsigned int mask = 1u << off;
182
183 chip = container_of(gc, struct max732x_chip, gpio_chip);
184
185 if ((mask & chip->dir_input) == 0) {
186 dev_dbg(&chip->client->dev, "%s port %d is output only\n",
187 chip->client->name, off);
188 return -EACCES;
189 }
190
191 return 0;
192}
193
194static int max732x_gpio_direction_output(struct gpio_chip *gc,
195 unsigned off, int val)
196{
197 struct max732x_chip *chip;
198 unsigned int mask = 1u << off;
199
200 chip = container_of(gc, struct max732x_chip, gpio_chip);
201
202 if ((mask & chip->dir_output) == 0) {
203 dev_dbg(&chip->client->dev, "%s port %d is input only\n",
204 chip->client->name, off);
205 return -EACCES;
206 }
207
208 max732x_gpio_set_value(gc, off, val);
209 return 0;
210}
211
212static int __devinit max732x_setup_gpio(struct max732x_chip *chip,
213 const struct i2c_device_id *id,
214 unsigned gpio_start)
215{
216 struct gpio_chip *gc = &chip->gpio_chip;
217 uint32_t id_data = id->driver_data;
218 int i, port = 0;
219
220 for (i = 0; i < 16; i++, id_data >>= 2) {
221 unsigned int mask = 1 << port;
222
223 switch (id_data & 0x3) {
224 case PORT_OUTPUT:
225 chip->dir_output |= mask;
226 break;
227 case PORT_INPUT:
228 chip->dir_input |= mask;
229 break;
230 case PORT_OPENDRAIN:
231 chip->dir_output |= mask;
232 chip->dir_input |= mask;
233 break;
234 default:
235 continue;
236 }
237
238 if (i < 8)
239 chip->mask_group_a |= mask;
240 port++;
241 }
242
243 if (chip->dir_input)
244 gc->direction_input = max732x_gpio_direction_input;
245 if (chip->dir_output) {
246 gc->direction_output = max732x_gpio_direction_output;
247 gc->set = max732x_gpio_set_value;
248 }
249 gc->get = max732x_gpio_get_value;
250 gc->can_sleep = 1;
251
252 gc->base = gpio_start;
253 gc->ngpio = port;
254 gc->label = chip->client->name;
255 gc->owner = THIS_MODULE;
256
257 return port;
258}
259
260static int __devinit max732x_probe(struct i2c_client *client,
261 const struct i2c_device_id *id)
262{
263 struct max732x_platform_data *pdata;
264 struct max732x_chip *chip;
265 struct i2c_client *c;
266 uint16_t addr_a, addr_b;
267 int ret, nr_port;
268
269 pdata = client->dev.platform_data;
270 if (pdata == NULL)
271 return -ENODEV;
272
273 chip = kzalloc(sizeof(struct max732x_chip), GFP_KERNEL);
274 if (chip == NULL)
275 return -ENOMEM;
276 chip->client = client;
277
278 nr_port = max732x_setup_gpio(chip, id, pdata->gpio_base);
279
280 addr_a = (client->addr & 0x0f) | 0x60;
281 addr_b = (client->addr & 0x0f) | 0x50;
282
283 switch (client->addr & 0x70) {
284 case 0x60:
285 chip->client_group_a = client;
286 if (nr_port > 7) {
287 c = i2c_new_dummy(client->adapter, addr_b);
288 chip->client_group_b = chip->client_dummy = c;
289 }
290 break;
291 case 0x50:
292 chip->client_group_b = client;
293 if (nr_port > 7) {
294 c = i2c_new_dummy(client->adapter, addr_a);
295 chip->client_group_a = chip->client_dummy = c;
296 }
297 break;
298 default:
299 dev_err(&client->dev, "invalid I2C address specified %02x\n",
300 client->addr);
301 ret = -EINVAL;
302 goto out_failed;
303 }
304
305 mutex_init(&chip->lock);
306
307 max732x_read(chip, is_group_a(chip, 0), &chip->reg_out[0]);
308 if (nr_port > 7)
309 max732x_read(chip, is_group_a(chip, 8), &chip->reg_out[1]);
310
311 ret = gpiochip_add(&chip->gpio_chip);
312 if (ret)
313 goto out_failed;
314
315 if (pdata->setup) {
316 ret = pdata->setup(client, chip->gpio_chip.base,
317 chip->gpio_chip.ngpio, pdata->context);
318 if (ret < 0)
319 dev_warn(&client->dev, "setup failed, %d\n", ret);
320 }
321
322 i2c_set_clientdata(client, chip);
323 return 0;
324
325out_failed:
326 kfree(chip);
327 return ret;
328}
329
330static int __devexit max732x_remove(struct i2c_client *client)
331{
332 struct max732x_platform_data *pdata = client->dev.platform_data;
333 struct max732x_chip *chip = i2c_get_clientdata(client);
334 int ret;
335
336 if (pdata->teardown) {
337 ret = pdata->teardown(client, chip->gpio_chip.base,
338 chip->gpio_chip.ngpio, pdata->context);
339 if (ret < 0) {
340 dev_err(&client->dev, "%s failed, %d\n",
341 "teardown", ret);
342 return ret;
343 }
344 }
345
346 ret = gpiochip_remove(&chip->gpio_chip);
347 if (ret) {
348 dev_err(&client->dev, "%s failed, %d\n",
349 "gpiochip_remove()", ret);
350 return ret;
351 }
352
353 /* unregister any dummy i2c_client */
354 if (chip->client_dummy)
355 i2c_unregister_device(chip->client_dummy);
356
357 kfree(chip);
358 return 0;
359}
360
361static struct i2c_driver max732x_driver = {
362 .driver = {
363 .name = "max732x",
364 .owner = THIS_MODULE,
365 },
366 .probe = max732x_probe,
367 .remove = __devexit_p(max732x_remove),
368 .id_table = max732x_id,
369};
370
371static int __init max732x_init(void)
372{
373 return i2c_add_driver(&max732x_driver);
374}
375module_init(max732x_init);
376
377static void __exit max732x_exit(void)
378{
379 i2c_del_driver(&max732x_driver);
380}
381module_exit(max732x_exit);
382
383MODULE_AUTHOR("Eric Miao <eric.miao@marvell.com>");
384MODULE_DESCRIPTION("GPIO expander driver for MAX732X");
385MODULE_LICENSE("GPL");
diff --git a/drivers/gpio/mcp23s08.c b/drivers/gpio/mcp23s08.c
index 7f92fdd5f0e2..8a1b405fefda 100644
--- a/drivers/gpio/mcp23s08.c
+++ b/drivers/gpio/mcp23s08.c
@@ -40,15 +40,26 @@ struct mcp23s08 {
40 struct spi_device *spi; 40 struct spi_device *spi;
41 u8 addr; 41 u8 addr;
42 42
43 u8 cache[11];
43 /* lock protects the cached values */ 44 /* lock protects the cached values */
44 struct mutex lock; 45 struct mutex lock;
45 u8 cache[11];
46 46
47 struct gpio_chip chip; 47 struct gpio_chip chip;
48 48
49 struct work_struct work; 49 struct work_struct work;
50}; 50};
51 51
52/* A given spi_device can represent up to four mcp23s08 chips
53 * sharing the same chipselect but using different addresses
54 * (e.g. chips #0 and #3 might be populated, but not #1 or $2).
55 * Driver data holds all the per-chip data.
56 */
57struct mcp23s08_driver_data {
58 unsigned ngpio;
59 struct mcp23s08 *mcp[4];
60 struct mcp23s08 chip[];
61};
62
52static int mcp23s08_read(struct mcp23s08 *mcp, unsigned reg) 63static int mcp23s08_read(struct mcp23s08 *mcp, unsigned reg)
53{ 64{
54 u8 tx[2], rx[1]; 65 u8 tx[2], rx[1];
@@ -208,25 +219,18 @@ done:
208 219
209/*----------------------------------------------------------------------*/ 220/*----------------------------------------------------------------------*/
210 221
211static int mcp23s08_probe(struct spi_device *spi) 222static int mcp23s08_probe_one(struct spi_device *spi, unsigned addr,
223 unsigned base, unsigned pullups)
212{ 224{
213 struct mcp23s08 *mcp; 225 struct mcp23s08_driver_data *data = spi_get_drvdata(spi);
214 struct mcp23s08_platform_data *pdata; 226 struct mcp23s08 *mcp = data->mcp[addr];
215 int status; 227 int status;
216 int do_update = 0; 228 int do_update = 0;
217 229
218 pdata = spi->dev.platform_data;
219 if (!pdata || pdata->slave > 3 || !pdata->base)
220 return -ENODEV;
221
222 mcp = kzalloc(sizeof *mcp, GFP_KERNEL);
223 if (!mcp)
224 return -ENOMEM;
225
226 mutex_init(&mcp->lock); 230 mutex_init(&mcp->lock);
227 231
228 mcp->spi = spi; 232 mcp->spi = spi;
229 mcp->addr = 0x40 | (pdata->slave << 1); 233 mcp->addr = 0x40 | (addr << 1);
230 234
231 mcp->chip.label = "mcp23s08", 235 mcp->chip.label = "mcp23s08",
232 236
@@ -236,26 +240,28 @@ static int mcp23s08_probe(struct spi_device *spi)
236 mcp->chip.set = mcp23s08_set; 240 mcp->chip.set = mcp23s08_set;
237 mcp->chip.dbg_show = mcp23s08_dbg_show; 241 mcp->chip.dbg_show = mcp23s08_dbg_show;
238 242
239 mcp->chip.base = pdata->base; 243 mcp->chip.base = base;
240 mcp->chip.ngpio = 8; 244 mcp->chip.ngpio = 8;
241 mcp->chip.can_sleep = 1; 245 mcp->chip.can_sleep = 1;
246 mcp->chip.dev = &spi->dev;
242 mcp->chip.owner = THIS_MODULE; 247 mcp->chip.owner = THIS_MODULE;
243 248
244 spi_set_drvdata(spi, mcp); 249 /* verify MCP_IOCON.SEQOP = 0, so sequential reads work,
245 250 * and MCP_IOCON.HAEN = 1, so we work with all chips.
246 /* verify MCP_IOCON.SEQOP = 0, so sequential reads work */ 251 */
247 status = mcp23s08_read(mcp, MCP_IOCON); 252 status = mcp23s08_read(mcp, MCP_IOCON);
248 if (status < 0) 253 if (status < 0)
249 goto fail; 254 goto fail;
250 if (status & IOCON_SEQOP) { 255 if ((status & IOCON_SEQOP) || !(status & IOCON_HAEN)) {
251 status &= ~IOCON_SEQOP; 256 status &= ~IOCON_SEQOP;
257 status |= IOCON_HAEN;
252 status = mcp23s08_write(mcp, MCP_IOCON, (u8) status); 258 status = mcp23s08_write(mcp, MCP_IOCON, (u8) status);
253 if (status < 0) 259 if (status < 0)
254 goto fail; 260 goto fail;
255 } 261 }
256 262
257 /* configure ~100K pullups */ 263 /* configure ~100K pullups */
258 status = mcp23s08_write(mcp, MCP_GPPU, pdata->pullups); 264 status = mcp23s08_write(mcp, MCP_GPPU, pullups);
259 if (status < 0) 265 if (status < 0)
260 goto fail; 266 goto fail;
261 267
@@ -282,11 +288,58 @@ static int mcp23s08_probe(struct spi_device *spi)
282 tx[1] = MCP_IPOL; 288 tx[1] = MCP_IPOL;
283 memcpy(&tx[2], &mcp->cache[MCP_IPOL], sizeof(tx) - 2); 289 memcpy(&tx[2], &mcp->cache[MCP_IPOL], sizeof(tx) - 2);
284 status = spi_write_then_read(mcp->spi, tx, sizeof tx, NULL, 0); 290 status = spi_write_then_read(mcp->spi, tx, sizeof tx, NULL, 0);
285 291 if (status < 0)
286 /* FIXME check status... */ 292 goto fail;
287 } 293 }
288 294
289 status = gpiochip_add(&mcp->chip); 295 status = gpiochip_add(&mcp->chip);
296fail:
297 if (status < 0)
298 dev_dbg(&spi->dev, "can't setup chip %d, --> %d\n",
299 addr, status);
300 return status;
301}
302
303static int mcp23s08_probe(struct spi_device *spi)
304{
305 struct mcp23s08_platform_data *pdata;
306 unsigned addr;
307 unsigned chips = 0;
308 struct mcp23s08_driver_data *data;
309 int status;
310 unsigned base;
311
312 pdata = spi->dev.platform_data;
313 if (!pdata || !gpio_is_valid(pdata->base))
314 return -ENODEV;
315
316 for (addr = 0; addr < 4; addr++) {
317 if (!pdata->chip[addr].is_present)
318 continue;
319 chips++;
320 }
321 if (!chips)
322 return -ENODEV;
323
324 data = kzalloc(sizeof *data + chips * sizeof(struct mcp23s08),
325 GFP_KERNEL);
326 if (!data)
327 return -ENOMEM;
328 spi_set_drvdata(spi, data);
329
330 base = pdata->base;
331 for (addr = 0; addr < 4; addr++) {
332 if (!pdata->chip[addr].is_present)
333 continue;
334 chips--;
335 data->mcp[addr] = &data->chip[chips];
336 status = mcp23s08_probe_one(spi, addr, base,
337 pdata->chip[addr].pullups);
338 if (status < 0)
339 goto fail;
340 base += 8;
341 }
342 data->ngpio = base - pdata->base;
290 343
291 /* NOTE: these chips have a relatively sane IRQ framework, with 344 /* NOTE: these chips have a relatively sane IRQ framework, with
292 * per-signal masking and level/edge triggering. It's not yet 345 * per-signal masking and level/edge triggering. It's not yet
@@ -294,8 +347,9 @@ static int mcp23s08_probe(struct spi_device *spi)
294 */ 347 */
295 348
296 if (pdata->setup) { 349 if (pdata->setup) {
297 status = pdata->setup(spi, mcp->chip.base, 350 status = pdata->setup(spi,
298 mcp->chip.ngpio, pdata->context); 351 pdata->base, data->ngpio,
352 pdata->context);
299 if (status < 0) 353 if (status < 0)
300 dev_dbg(&spi->dev, "setup --> %d\n", status); 354 dev_dbg(&spi->dev, "setup --> %d\n", status);
301 } 355 }
@@ -303,19 +357,29 @@ static int mcp23s08_probe(struct spi_device *spi)
303 return 0; 357 return 0;
304 358
305fail: 359fail:
306 kfree(mcp); 360 for (addr = 0; addr < 4; addr++) {
361 int tmp;
362
363 if (!data->mcp[addr])
364 continue;
365 tmp = gpiochip_remove(&data->mcp[addr]->chip);
366 if (tmp < 0)
367 dev_err(&spi->dev, "%s --> %d\n", "remove", tmp);
368 }
369 kfree(data);
307 return status; 370 return status;
308} 371}
309 372
310static int mcp23s08_remove(struct spi_device *spi) 373static int mcp23s08_remove(struct spi_device *spi)
311{ 374{
312 struct mcp23s08 *mcp = spi_get_drvdata(spi); 375 struct mcp23s08_driver_data *data = spi_get_drvdata(spi);
313 struct mcp23s08_platform_data *pdata = spi->dev.platform_data; 376 struct mcp23s08_platform_data *pdata = spi->dev.platform_data;
377 unsigned addr;
314 int status = 0; 378 int status = 0;
315 379
316 if (pdata->teardown) { 380 if (pdata->teardown) {
317 status = pdata->teardown(spi, 381 status = pdata->teardown(spi,
318 mcp->chip.base, mcp->chip.ngpio, 382 pdata->base, data->ngpio,
319 pdata->context); 383 pdata->context);
320 if (status < 0) { 384 if (status < 0) {
321 dev_err(&spi->dev, "%s --> %d\n", "teardown", status); 385 dev_err(&spi->dev, "%s --> %d\n", "teardown", status);
@@ -323,11 +387,20 @@ static int mcp23s08_remove(struct spi_device *spi)
323 } 387 }
324 } 388 }
325 389
326 status = gpiochip_remove(&mcp->chip); 390 for (addr = 0; addr < 4; addr++) {
391 int tmp;
392
393 if (!data->mcp[addr])
394 continue;
395
396 tmp = gpiochip_remove(&data->mcp[addr]->chip);
397 if (tmp < 0) {
398 dev_err(&spi->dev, "%s --> %d\n", "remove", tmp);
399 status = tmp;
400 }
401 }
327 if (status == 0) 402 if (status == 0)
328 kfree(mcp); 403 kfree(data);
329 else
330 dev_err(&spi->dev, "%s --> %d\n", "remove", status);
331 return status; 404 return status;
332} 405}
333 406
@@ -355,4 +428,3 @@ static void __exit mcp23s08_exit(void)
355module_exit(mcp23s08_exit); 428module_exit(mcp23s08_exit);
356 429
357MODULE_LICENSE("GPL"); 430MODULE_LICENSE("GPL");
358
diff --git a/drivers/gpio/pca953x.c b/drivers/gpio/pca953x.c
index a380730b61ab..cc8468692ae0 100644
--- a/drivers/gpio/pca953x.c
+++ b/drivers/gpio/pca953x.c
@@ -188,6 +188,7 @@ static void pca953x_setup_gpio(struct pca953x_chip *chip, int gpios)
188 gc->base = chip->gpio_start; 188 gc->base = chip->gpio_start;
189 gc->ngpio = gpios; 189 gc->ngpio = gpios;
190 gc->label = chip->client->name; 190 gc->label = chip->client->name;
191 gc->dev = &chip->client->dev;
191 gc->owner = THIS_MODULE; 192 gc->owner = THIS_MODULE;
192} 193}
193 194
diff --git a/drivers/gpio/pcf857x.c b/drivers/gpio/pcf857x.c
index d25d356c4f20..fc9c6ae739ee 100644
--- a/drivers/gpio/pcf857x.c
+++ b/drivers/gpio/pcf857x.c
@@ -200,6 +200,7 @@ static int pcf857x_probe(struct i2c_client *client,
200 200
201 gpio->chip.base = pdata->gpio_base; 201 gpio->chip.base = pdata->gpio_base;
202 gpio->chip.can_sleep = 1; 202 gpio->chip.can_sleep = 1;
203 gpio->chip.dev = &client->dev;
203 gpio->chip.owner = THIS_MODULE; 204 gpio->chip.owner = THIS_MODULE;
204 205
205 /* NOTE: the OnSemi jlc1562b is also largely compatible with 206 /* NOTE: the OnSemi jlc1562b is also largely compatible with
diff --git a/drivers/i2c/chips/Kconfig b/drivers/i2c/chips/Kconfig
index 50e0a4653741..a95cb9465d65 100644
--- a/drivers/i2c/chips/Kconfig
+++ b/drivers/i2c/chips/Kconfig
@@ -126,7 +126,7 @@ config ISP1301_OMAP
126 126
127config TPS65010 127config TPS65010
128 tristate "TPS6501x Power Management chips" 128 tristate "TPS6501x Power Management chips"
129 depends on HAVE_GPIO_LIB 129 depends on GPIOLIB
130 default y if MACH_OMAP_H2 || MACH_OMAP_H3 || MACH_OMAP_OSK 130 default y if MACH_OMAP_H2 || MACH_OMAP_H3 || MACH_OMAP_OSK
131 help 131 help
132 If you say yes here you get support for the TPS6501x series of 132 If you say yes here you get support for the TPS6501x series of
diff --git a/drivers/i2c/chips/tps65010.c b/drivers/i2c/chips/tps65010.c
index 85949685191b..cf02e8fceb42 100644
--- a/drivers/i2c/chips/tps65010.c
+++ b/drivers/i2c/chips/tps65010.c
@@ -636,6 +636,8 @@ static int tps65010_probe(struct i2c_client *client,
636 tps->outmask = board->outmask; 636 tps->outmask = board->outmask;
637 637
638 tps->chip.label = client->name; 638 tps->chip.label = client->name;
639 tps->chip.dev = &client->dev;
640 tps->chip.owner = THIS_MODULE;
639 641
640 tps->chip.set = tps65010_gpio_set; 642 tps->chip.set = tps65010_gpio_set;
641 tps->chip.direction_output = tps65010_output; 643 tps->chip.direction_output = tps65010_output;
diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c
index aad664d5259f..0d395979b2d1 100644
--- a/drivers/input/serio/hp_sdc.c
+++ b/drivers/input/serio/hp_sdc.c
@@ -70,7 +70,6 @@
70#include <linux/semaphore.h> 70#include <linux/semaphore.h>
71#include <linux/slab.h> 71#include <linux/slab.h>
72#include <linux/hil.h> 72#include <linux/hil.h>
73#include <linux/semaphore.h>
74#include <asm/io.h> 73#include <asm/io.h>
75#include <asm/system.h> 74#include <asm/system.h>
76 75
diff --git a/drivers/isdn/hisax/st5481.h b/drivers/isdn/hisax/st5481.h
index 2044e7173ab4..cff7a6354334 100644
--- a/drivers/isdn/hisax/st5481.h
+++ b/drivers/isdn/hisax/st5481.h
@@ -220,7 +220,7 @@ enum {
220#define ERR(format, arg...) \ 220#define ERR(format, arg...) \
221printk(KERN_ERR "%s:%s: " format "\n" , __FILE__, __func__ , ## arg) 221printk(KERN_ERR "%s:%s: " format "\n" , __FILE__, __func__ , ## arg)
222 222
223#define WARN(format, arg...) \ 223#define WARNING(format, arg...) \
224printk(KERN_WARNING "%s:%s: " format "\n" , __FILE__, __func__ , ## arg) 224printk(KERN_WARNING "%s:%s: " format "\n" , __FILE__, __func__ , ## arg)
225 225
226#define INFO(format, arg...) \ 226#define INFO(format, arg...) \
@@ -412,7 +412,7 @@ struct st5481_adapter {
412({ \ 412({ \
413 int status; \ 413 int status; \
414 if ((status = usb_submit_urb(urb, mem_flags)) < 0) { \ 414 if ((status = usb_submit_urb(urb, mem_flags)) < 0) { \
415 WARN("usb_submit_urb failed,status=%d", status); \ 415 WARNING("usb_submit_urb failed,status=%d", status); \
416 } \ 416 } \
417 status; \ 417 status; \
418}) 418})
diff --git a/drivers/isdn/hisax/st5481_b.c b/drivers/isdn/hisax/st5481_b.c
index fa64115cd7c7..0074b600a0ef 100644
--- a/drivers/isdn/hisax/st5481_b.c
+++ b/drivers/isdn/hisax/st5481_b.c
@@ -180,7 +180,7 @@ static void usb_b_out_complete(struct urb *urb)
180 DBG(4,"urb killed status %d", urb->status); 180 DBG(4,"urb killed status %d", urb->status);
181 return; // Give up 181 return; // Give up
182 default: 182 default:
183 WARN("urb status %d",urb->status); 183 WARNING("urb status %d",urb->status);
184 if (b_out->busy == 0) { 184 if (b_out->busy == 0) {
185 st5481_usb_pipe_reset(adapter, (bcs->channel+1)*2 | USB_DIR_OUT, NULL, NULL); 185 st5481_usb_pipe_reset(adapter, (bcs->channel+1)*2 | USB_DIR_OUT, NULL, NULL);
186 } 186 }
@@ -372,6 +372,6 @@ void st5481_b_l2l1(struct hisax_if *ifc, int pr, void *arg)
372 B_L1L2(bcs, PH_DEACTIVATE | INDICATION, NULL); 372 B_L1L2(bcs, PH_DEACTIVATE | INDICATION, NULL);
373 break; 373 break;
374 default: 374 default:
375 WARN("pr %#x\n", pr); 375 WARNING("pr %#x\n", pr);
376 } 376 }
377} 377}
diff --git a/drivers/isdn/hisax/st5481_d.c b/drivers/isdn/hisax/st5481_d.c
index b8c4855cc889..077991c1cd05 100644
--- a/drivers/isdn/hisax/st5481_d.c
+++ b/drivers/isdn/hisax/st5481_d.c
@@ -389,7 +389,7 @@ static void usb_d_out_complete(struct urb *urb)
389 DBG(1,"urb killed status %d", urb->status); 389 DBG(1,"urb killed status %d", urb->status);
390 break; 390 break;
391 default: 391 default:
392 WARN("urb status %d",urb->status); 392 WARNING("urb status %d",urb->status);
393 if (d_out->busy == 0) { 393 if (d_out->busy == 0) {
394 st5481_usb_pipe_reset(adapter, EP_D_OUT | USB_DIR_OUT, fifo_reseted, adapter); 394 st5481_usb_pipe_reset(adapter, EP_D_OUT | USB_DIR_OUT, fifo_reseted, adapter);
395 } 395 }
@@ -420,7 +420,7 @@ static void dout_start_xmit(struct FsmInst *fsm, int event, void *arg)
420 isdnhdlc_out_init(&d_out->hdlc_state, 1, 0); 420 isdnhdlc_out_init(&d_out->hdlc_state, 1, 0);
421 421
422 if (test_and_set_bit(buf_nr, &d_out->busy)) { 422 if (test_and_set_bit(buf_nr, &d_out->busy)) {
423 WARN("ep %d urb %d busy %#lx", EP_D_OUT, buf_nr, d_out->busy); 423 WARNING("ep %d urb %d busy %#lx", EP_D_OUT, buf_nr, d_out->busy);
424 return; 424 return;
425 } 425 }
426 urb = d_out->urb[buf_nr]; 426 urb = d_out->urb[buf_nr];
@@ -601,7 +601,7 @@ void st5481_d_l2l1(struct hisax_if *hisax_d_if, int pr, void *arg)
601 FsmEvent(&adapter->d_out.fsm, EV_DOUT_START_XMIT, NULL); 601 FsmEvent(&adapter->d_out.fsm, EV_DOUT_START_XMIT, NULL);
602 break; 602 break;
603 default: 603 default:
604 WARN("pr %#x\n", pr); 604 WARNING("pr %#x\n", pr);
605 break; 605 break;
606 } 606 }
607} 607}
diff --git a/drivers/isdn/hisax/st5481_usb.c b/drivers/isdn/hisax/st5481_usb.c
index 427a8b0520f5..ec3c0e507669 100644
--- a/drivers/isdn/hisax/st5481_usb.c
+++ b/drivers/isdn/hisax/st5481_usb.c
@@ -66,7 +66,7 @@ static void usb_ctrl_msg(struct st5481_adapter *adapter,
66 struct ctrl_msg *ctrl_msg; 66 struct ctrl_msg *ctrl_msg;
67 67
68 if ((w_index = fifo_add(&ctrl->msg_fifo.f)) < 0) { 68 if ((w_index = fifo_add(&ctrl->msg_fifo.f)) < 0) {
69 WARN("control msg FIFO full"); 69 WARNING("control msg FIFO full");
70 return; 70 return;
71 } 71 }
72 ctrl_msg = &ctrl->msg_fifo.data[w_index]; 72 ctrl_msg = &ctrl->msg_fifo.data[w_index];
@@ -139,7 +139,7 @@ static void usb_ctrl_complete(struct urb *urb)
139 DBG(1,"urb killed status %d", urb->status); 139 DBG(1,"urb killed status %d", urb->status);
140 return; // Give up 140 return; // Give up
141 default: 141 default:
142 WARN("urb status %d",urb->status); 142 WARNING("urb status %d",urb->status);
143 break; 143 break;
144 } 144 }
145 } 145 }
@@ -198,7 +198,7 @@ static void usb_int_complete(struct urb *urb)
198 DBG(2, "urb shutting down with status: %d", urb->status); 198 DBG(2, "urb shutting down with status: %d", urb->status);
199 return; 199 return;
200 default: 200 default:
201 WARN("nonzero urb status received: %d", urb->status); 201 WARNING("nonzero urb status received: %d", urb->status);
202 goto exit; 202 goto exit;
203 } 203 }
204 204
@@ -235,7 +235,7 @@ static void usb_int_complete(struct urb *urb)
235exit: 235exit:
236 status = usb_submit_urb (urb, GFP_ATOMIC); 236 status = usb_submit_urb (urb, GFP_ATOMIC);
237 if (status) 237 if (status)
238 WARN("usb_submit_urb failed with result %d", status); 238 WARNING("usb_submit_urb failed with result %d", status);
239} 239}
240 240
241/* ====================================================================== 241/* ======================================================================
@@ -257,7 +257,7 @@ int st5481_setup_usb(struct st5481_adapter *adapter)
257 DBG(2,""); 257 DBG(2,"");
258 258
259 if ((status = usb_reset_configuration (dev)) < 0) { 259 if ((status = usb_reset_configuration (dev)) < 0) {
260 WARN("reset_configuration failed,status=%d",status); 260 WARNING("reset_configuration failed,status=%d",status);
261 return status; 261 return status;
262 } 262 }
263 263
@@ -269,7 +269,7 @@ int st5481_setup_usb(struct st5481_adapter *adapter)
269 269
270 // Check if the config is sane 270 // Check if the config is sane
271 if ( altsetting->desc.bNumEndpoints != 7 ) { 271 if ( altsetting->desc.bNumEndpoints != 7 ) {
272 WARN("expecting 7 got %d endpoints!", altsetting->desc.bNumEndpoints); 272 WARNING("expecting 7 got %d endpoints!", altsetting->desc.bNumEndpoints);
273 return -EINVAL; 273 return -EINVAL;
274 } 274 }
275 275
@@ -279,7 +279,7 @@ int st5481_setup_usb(struct st5481_adapter *adapter)
279 279
280 // Use alternative setting 3 on interface 0 to have 2B+D 280 // Use alternative setting 3 on interface 0 to have 2B+D
281 if ((status = usb_set_interface (dev, 0, 3)) < 0) { 281 if ((status = usb_set_interface (dev, 0, 3)) < 0) {
282 WARN("usb_set_interface failed,status=%d",status); 282 WARNING("usb_set_interface failed,status=%d",status);
283 return status; 283 return status;
284 } 284 }
285 285
@@ -497,7 +497,7 @@ static void usb_in_complete(struct urb *urb)
497 DBG(1,"urb killed status %d", urb->status); 497 DBG(1,"urb killed status %d", urb->status);
498 return; // Give up 498 return; // Give up
499 default: 499 default:
500 WARN("urb status %d",urb->status); 500 WARNING("urb status %d",urb->status);
501 break; 501 break;
502 } 502 }
503 } 503 }
@@ -523,7 +523,7 @@ static void usb_in_complete(struct urb *urb)
523 DBG(4,"count=%d",status); 523 DBG(4,"count=%d",status);
524 DBG_PACKET(0x400, in->rcvbuf, status); 524 DBG_PACKET(0x400, in->rcvbuf, status);
525 if (!(skb = dev_alloc_skb(status))) { 525 if (!(skb = dev_alloc_skb(status))) {
526 WARN("receive out of memory\n"); 526 WARNING("receive out of memory\n");
527 break; 527 break;
528 } 528 }
529 memcpy(skb_put(skb, status), in->rcvbuf, status); 529 memcpy(skb_put(skb, status), in->rcvbuf, status);
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index 1a8de57289eb..37344aaee22f 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -98,16 +98,20 @@ static u32 lg_get_features(struct virtio_device *vdev)
98 return features; 98 return features;
99} 99}
100 100
101static void lg_set_features(struct virtio_device *vdev, u32 features) 101static void lg_finalize_features(struct virtio_device *vdev)
102{ 102{
103 unsigned int i; 103 unsigned int i, bits;
104 struct lguest_device_desc *desc = to_lgdev(vdev)->desc; 104 struct lguest_device_desc *desc = to_lgdev(vdev)->desc;
105 /* Second half of bitmap is features we accept. */ 105 /* Second half of bitmap is features we accept. */
106 u8 *out_features = lg_features(desc) + desc->feature_len; 106 u8 *out_features = lg_features(desc) + desc->feature_len;
107 107
108 /* Give virtio_ring a chance to accept features. */
109 vring_transport_features(vdev);
110
108 memset(out_features, 0, desc->feature_len); 111 memset(out_features, 0, desc->feature_len);
109 for (i = 0; i < min(desc->feature_len * 8, 32); i++) { 112 bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
110 if (features & (1 << i)) 113 for (i = 0; i < bits; i++) {
114 if (test_bit(i, vdev->features))
111 out_features[i / 8] |= (1 << (i % 8)); 115 out_features[i / 8] |= (1 << (i % 8));
112 } 116 }
113} 117}
@@ -297,7 +301,7 @@ static void lg_del_vq(struct virtqueue *vq)
297/* The ops structure which hooks everything together. */ 301/* The ops structure which hooks everything together. */
298static struct virtio_config_ops lguest_config_ops = { 302static struct virtio_config_ops lguest_config_ops = {
299 .get_features = lg_get_features, 303 .get_features = lg_get_features,
300 .set_features = lg_set_features, 304 .finalize_features = lg_finalize_features,
301 .get = lg_get, 305 .get = lg_get,
302 .set = lg_set, 306 .set = lg_set,
303 .get_status = lg_get_status, 307 .get_status = lg_get_status,
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 9f93c29fed35..1f57a99fd968 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -19,6 +19,14 @@ config MFD_SM501
19 interface. The device may be connected by PCI or local bus with 19 interface. The device may be connected by PCI or local bus with
20 varying functions enabled. 20 varying functions enabled.
21 21
22config MFD_SM501_GPIO
23 bool "Export GPIO via GPIO layer"
24 depends on MFD_SM501 && HAVE_GPIO_LIB
25 ---help---
26 This option uses the gpio library layer to export the 64 GPIO
27 lines on the SM501. The platform data is used to supply the
28 base number for the first GPIO line to register.
29
22config MFD_ASIC3 30config MFD_ASIC3
23 bool "Support for Compaq ASIC3" 31 bool "Support for Compaq ASIC3"
24 depends on GENERIC_HARDIRQS && HAVE_GPIO_LIB && ARM 32 depends on GENERIC_HARDIRQS && HAVE_GPIO_LIB && ARM
@@ -28,7 +36,7 @@ config MFD_ASIC3
28 36
29config HTC_EGPIO 37config HTC_EGPIO
30 bool "HTC EGPIO support" 38 bool "HTC EGPIO support"
31 depends on GENERIC_HARDIRQS && HAVE_GPIO_LIB && ARM 39 depends on GENERIC_HARDIRQS && GPIOLIB && ARM
32 help 40 help
33 This driver supports the CPLD egpio chip present on 41 This driver supports the CPLD egpio chip present on
34 several HTC phones. It provides basic support for input 42 several HTC phones. It provides basic support for input
@@ -44,7 +52,7 @@ config HTC_PASIC3
44 52
45config MFD_TC6393XB 53config MFD_TC6393XB
46 bool "Support Toshiba TC6393XB" 54 bool "Support Toshiba TC6393XB"
47 depends on HAVE_GPIO_LIB 55 depends on GPIOLIB
48 select MFD_CORE 56 select MFD_CORE
49 help 57 help
50 Support for Toshiba Mobile IO Controller TC6393XB 58 Support for Toshiba Mobile IO Controller TC6393XB
diff --git a/drivers/mfd/htc-egpio.c b/drivers/mfd/htc-egpio.c
index 8872cc077519..6be43172dc65 100644
--- a/drivers/mfd/htc-egpio.c
+++ b/drivers/mfd/htc-egpio.c
@@ -318,6 +318,8 @@ static int __init egpio_probe(struct platform_device *pdev)
318 ei->chip[i].dev = &(pdev->dev); 318 ei->chip[i].dev = &(pdev->dev);
319 chip = &(ei->chip[i].chip); 319 chip = &(ei->chip[i].chip);
320 chip->label = "htc-egpio"; 320 chip->label = "htc-egpio";
321 chip->dev = &pdev->dev;
322 chip->owner = THIS_MODULE;
321 chip->get = egpio_get; 323 chip->get = egpio_get;
322 chip->set = egpio_set; 324 chip->set = egpio_set;
323 chip->direction_input = egpio_direction_input; 325 chip->direction_input = egpio_direction_input;
diff --git a/drivers/mfd/htc-pasic3.c b/drivers/mfd/htc-pasic3.c
index 633cbba072f0..91b294dcc133 100644
--- a/drivers/mfd/htc-pasic3.c
+++ b/drivers/mfd/htc-pasic3.c
@@ -238,6 +238,8 @@ static int pasic3_remove(struct platform_device *pdev)
238 return 0; 238 return 0;
239} 239}
240 240
241MODULE_ALIAS("platform:pasic3");
242
241static struct platform_driver pasic3_driver = { 243static struct platform_driver pasic3_driver = {
242 .driver = { 244 .driver = {
243 .name = "pasic3", 245 .name = "pasic3",
diff --git a/drivers/mfd/mcp-sa11x0.c b/drivers/mfd/mcp-sa11x0.c
index 1eab7cffceaa..b5272b5ce3fa 100644
--- a/drivers/mfd/mcp-sa11x0.c
+++ b/drivers/mfd/mcp-sa11x0.c
@@ -242,6 +242,8 @@ static int mcp_sa11x0_resume(struct platform_device *dev)
242/* 242/*
243 * The driver for the SA11x0 MCP port. 243 * The driver for the SA11x0 MCP port.
244 */ 244 */
245MODULE_ALIAS("platform:sa11x0-mcp");
246
245static struct platform_driver mcp_sa11x0_driver = { 247static struct platform_driver mcp_sa11x0_driver = {
246 .probe = mcp_sa11x0_probe, 248 .probe = mcp_sa11x0_probe,
247 .remove = mcp_sa11x0_remove, 249 .remove = mcp_sa11x0_remove,
diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index d7d88ce053a6..0454be4266c1 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -36,7 +36,7 @@ static int mfd_add_device(struct platform_device *parent,
36 if (ret) 36 if (ret)
37 goto fail_device; 37 goto fail_device;
38 38
39 memzero(res, sizeof(res)); 39 memset(res, 0, sizeof(res));
40 for (r = 0; r < cell->num_resources; r++) { 40 for (r = 0; r < cell->num_resources; r++) {
41 res[r].name = cell->resources[r].name; 41 res[r].name = cell->resources[r].name;
42 res[r].flags = cell->resources[r].flags; 42 res[r].flags = cell->resources[r].flags;
diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index 2fe64734d8af..7aebad4c06ff 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -19,6 +19,7 @@
19#include <linux/device.h> 19#include <linux/device.h>
20#include <linux/platform_device.h> 20#include <linux/platform_device.h>
21#include <linux/pci.h> 21#include <linux/pci.h>
22#include <linux/i2c-gpio.h>
22 23
23#include <linux/sm501.h> 24#include <linux/sm501.h>
24#include <linux/sm501-regs.h> 25#include <linux/sm501-regs.h>
@@ -31,10 +32,37 @@ struct sm501_device {
31 struct platform_device pdev; 32 struct platform_device pdev;
32}; 33};
33 34
35struct sm501_gpio;
36
37#ifdef CONFIG_MFD_SM501_GPIO
38#include <linux/gpio.h>
39
40struct sm501_gpio_chip {
41 struct gpio_chip gpio;
42 struct sm501_gpio *ourgpio; /* to get back to parent. */
43 void __iomem *regbase;
44};
45
46struct sm501_gpio {
47 struct sm501_gpio_chip low;
48 struct sm501_gpio_chip high;
49 spinlock_t lock;
50
51 unsigned int registered : 1;
52 void __iomem *regs;
53 struct resource *regs_res;
54};
55#else
56struct sm501_gpio {
57 /* no gpio support, empty definition for sm501_devdata. */
58};
59#endif
60
34struct sm501_devdata { 61struct sm501_devdata {
35 spinlock_t reg_lock; 62 spinlock_t reg_lock;
36 struct mutex clock_lock; 63 struct mutex clock_lock;
37 struct list_head devices; 64 struct list_head devices;
65 struct sm501_gpio gpio;
38 66
39 struct device *dev; 67 struct device *dev;
40 struct resource *io_res; 68 struct resource *io_res;
@@ -42,6 +70,7 @@ struct sm501_devdata {
42 struct resource *regs_claim; 70 struct resource *regs_claim;
43 struct sm501_platdata *platdata; 71 struct sm501_platdata *platdata;
44 72
73
45 unsigned int in_suspend; 74 unsigned int in_suspend;
46 unsigned long pm_misc; 75 unsigned long pm_misc;
47 76
@@ -52,6 +81,7 @@ struct sm501_devdata {
52 unsigned int rev; 81 unsigned int rev;
53}; 82};
54 83
84
55#define MHZ (1000 * 1000) 85#define MHZ (1000 * 1000)
56 86
57#ifdef DEBUG 87#ifdef DEBUG
@@ -276,58 +306,6 @@ unsigned long sm501_modify_reg(struct device *dev,
276 306
277EXPORT_SYMBOL_GPL(sm501_modify_reg); 307EXPORT_SYMBOL_GPL(sm501_modify_reg);
278 308
279unsigned long sm501_gpio_get(struct device *dev,
280 unsigned long gpio)
281{
282 struct sm501_devdata *sm = dev_get_drvdata(dev);
283 unsigned long result;
284 unsigned long reg;
285
286 reg = (gpio > 32) ? SM501_GPIO_DATA_HIGH : SM501_GPIO_DATA_LOW;
287 result = readl(sm->regs + reg);
288
289 result >>= (gpio & 31);
290 return result & 1UL;
291}
292
293EXPORT_SYMBOL_GPL(sm501_gpio_get);
294
295void sm501_gpio_set(struct device *dev,
296 unsigned long gpio,
297 unsigned int to,
298 unsigned int dir)
299{
300 struct sm501_devdata *sm = dev_get_drvdata(dev);
301
302 unsigned long bit = 1 << (gpio & 31);
303 unsigned long base;
304 unsigned long save;
305 unsigned long val;
306
307 base = (gpio > 32) ? SM501_GPIO_DATA_HIGH : SM501_GPIO_DATA_LOW;
308 base += SM501_GPIO;
309
310 spin_lock_irqsave(&sm->reg_lock, save);
311
312 val = readl(sm->regs + base) & ~bit;
313 if (to)
314 val |= bit;
315 writel(val, sm->regs + base);
316
317 val = readl(sm->regs + SM501_GPIO_DDR_LOW) & ~bit;
318 if (dir)
319 val |= bit;
320
321 writel(val, sm->regs + SM501_GPIO_DDR_LOW);
322 sm501_sync_regs(sm);
323
324 spin_unlock_irqrestore(&sm->reg_lock, save);
325
326}
327
328EXPORT_SYMBOL_GPL(sm501_gpio_set);
329
330
331/* sm501_unit_power 309/* sm501_unit_power
332 * 310 *
333 * alters the power active gate to set specific units on or off 311 * alters the power active gate to set specific units on or off
@@ -906,6 +884,313 @@ static int sm501_register_display(struct sm501_devdata *sm,
906 return sm501_register_device(sm, pdev); 884 return sm501_register_device(sm, pdev);
907} 885}
908 886
887#ifdef CONFIG_MFD_SM501_GPIO
888
889static inline struct sm501_gpio_chip *to_sm501_gpio(struct gpio_chip *gc)
890{
891 return container_of(gc, struct sm501_gpio_chip, gpio);
892}
893
894static inline struct sm501_devdata *sm501_gpio_to_dev(struct sm501_gpio *gpio)
895{
896 return container_of(gpio, struct sm501_devdata, gpio);
897}
898
899static int sm501_gpio_get(struct gpio_chip *chip, unsigned offset)
900
901{
902 struct sm501_gpio_chip *smgpio = to_sm501_gpio(chip);
903 unsigned long result;
904
905 result = readl(smgpio->regbase + SM501_GPIO_DATA_LOW);
906 result >>= offset;
907
908 return result & 1UL;
909}
910
911static void sm501_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
912
913{
914 struct sm501_gpio_chip *smchip = to_sm501_gpio(chip);
915 struct sm501_gpio *smgpio = smchip->ourgpio;
916 unsigned long bit = 1 << offset;
917 void __iomem *regs = smchip->regbase;
918 unsigned long save;
919 unsigned long val;
920
921 dev_dbg(sm501_gpio_to_dev(smgpio)->dev, "%s(%p,%d)\n",
922 __func__, chip, offset);
923
924 spin_lock_irqsave(&smgpio->lock, save);
925
926 val = readl(regs + SM501_GPIO_DATA_LOW) & ~bit;
927 if (value)
928 val |= bit;
929 writel(val, regs);
930
931 sm501_sync_regs(sm501_gpio_to_dev(smgpio));
932 spin_unlock_irqrestore(&smgpio->lock, save);
933}
934
935static int sm501_gpio_input(struct gpio_chip *chip, unsigned offset)
936{
937 struct sm501_gpio_chip *smchip = to_sm501_gpio(chip);
938 struct sm501_gpio *smgpio = smchip->ourgpio;
939 void __iomem *regs = smchip->regbase;
940 unsigned long bit = 1 << offset;
941 unsigned long save;
942 unsigned long ddr;
943
944 dev_info(sm501_gpio_to_dev(smgpio)->dev, "%s(%p,%d)\n",
945 __func__, chip, offset);
946
947 spin_lock_irqsave(&smgpio->lock, save);
948
949 ddr = readl(regs + SM501_GPIO_DDR_LOW);
950 writel(ddr & ~bit, regs + SM501_GPIO_DDR_LOW);
951
952 sm501_sync_regs(sm501_gpio_to_dev(smgpio));
953 spin_unlock_irqrestore(&smgpio->lock, save);
954
955 return 0;
956}
957
958static int sm501_gpio_output(struct gpio_chip *chip,
959 unsigned offset, int value)
960{
961 struct sm501_gpio_chip *smchip = to_sm501_gpio(chip);
962 struct sm501_gpio *smgpio = smchip->ourgpio;
963 unsigned long bit = 1 << offset;
964 void __iomem *regs = smchip->regbase;
965 unsigned long save;
966 unsigned long val;
967 unsigned long ddr;
968
969 dev_dbg(sm501_gpio_to_dev(smgpio)->dev, "%s(%p,%d,%d)\n",
970 __func__, chip, offset, value);
971
972 spin_lock_irqsave(&smgpio->lock, save);
973
974 val = readl(regs + SM501_GPIO_DATA_LOW);
975 if (value)
976 val |= bit;
977 else
978 val &= ~bit;
979 writel(val, regs);
980
981 ddr = readl(regs + SM501_GPIO_DDR_LOW);
982 writel(ddr | bit, regs + SM501_GPIO_DDR_LOW);
983
984 sm501_sync_regs(sm501_gpio_to_dev(smgpio));
985 writel(val, regs + SM501_GPIO_DATA_LOW);
986
987 sm501_sync_regs(sm501_gpio_to_dev(smgpio));
988 spin_unlock_irqrestore(&smgpio->lock, save);
989
990 return 0;
991}
992
993static struct gpio_chip gpio_chip_template = {
994 .ngpio = 32,
995 .direction_input = sm501_gpio_input,
996 .direction_output = sm501_gpio_output,
997 .set = sm501_gpio_set,
998 .get = sm501_gpio_get,
999};
1000
1001static int __devinit sm501_gpio_register_chip(struct sm501_devdata *sm,
1002 struct sm501_gpio *gpio,
1003 struct sm501_gpio_chip *chip)
1004{
1005 struct sm501_platdata *pdata = sm->platdata;
1006 struct gpio_chip *gchip = &chip->gpio;
1007 int base = pdata->gpio_base;
1008
1009 chip->gpio = gpio_chip_template;
1010
1011 if (chip == &gpio->high) {
1012 if (base > 0)
1013 base += 32;
1014 chip->regbase = gpio->regs + SM501_GPIO_DATA_HIGH;
1015 gchip->label = "SM501-HIGH";
1016 } else {
1017 chip->regbase = gpio->regs + SM501_GPIO_DATA_LOW;
1018 gchip->label = "SM501-LOW";
1019 }
1020
1021 gchip->base = base;
1022 chip->ourgpio = gpio;
1023
1024 return gpiochip_add(gchip);
1025}
1026
1027static int sm501_register_gpio(struct sm501_devdata *sm)
1028{
1029 struct sm501_gpio *gpio = &sm->gpio;
1030 resource_size_t iobase = sm->io_res->start + SM501_GPIO;
1031 int ret;
1032 int tmp;
1033
1034 dev_dbg(sm->dev, "registering gpio block %08llx\n",
1035 (unsigned long long)iobase);
1036
1037 spin_lock_init(&gpio->lock);
1038
1039 gpio->regs_res = request_mem_region(iobase, 0x20, "sm501-gpio");
1040 if (gpio->regs_res == NULL) {
1041 dev_err(sm->dev, "gpio: failed to request region\n");
1042 return -ENXIO;
1043 }
1044
1045 gpio->regs = ioremap(iobase, 0x20);
1046 if (gpio->regs == NULL) {
1047 dev_err(sm->dev, "gpio: failed to remap registers\n");
1048 ret = -ENXIO;
1049 goto err_claimed;
1050 }
1051
1052 /* Register both our chips. */
1053
1054 ret = sm501_gpio_register_chip(sm, gpio, &gpio->low);
1055 if (ret) {
1056 dev_err(sm->dev, "failed to add low chip\n");
1057 goto err_mapped;
1058 }
1059
1060 ret = sm501_gpio_register_chip(sm, gpio, &gpio->high);
1061 if (ret) {
1062 dev_err(sm->dev, "failed to add high chip\n");
1063 goto err_low_chip;
1064 }
1065
1066 gpio->registered = 1;
1067
1068 return 0;
1069
1070 err_low_chip:
1071 tmp = gpiochip_remove(&gpio->low.gpio);
1072 if (tmp) {
1073 dev_err(sm->dev, "cannot remove low chip, cannot tidy up\n");
1074 return ret;
1075 }
1076
1077 err_mapped:
1078 iounmap(gpio->regs);
1079
1080 err_claimed:
1081 release_resource(gpio->regs_res);
1082 kfree(gpio->regs_res);
1083
1084 return ret;
1085}
1086
1087static void sm501_gpio_remove(struct sm501_devdata *sm)
1088{
1089 struct sm501_gpio *gpio = &sm->gpio;
1090 int ret;
1091
1092 if (!sm->gpio.registered)
1093 return;
1094
1095 ret = gpiochip_remove(&gpio->low.gpio);
1096 if (ret)
1097 dev_err(sm->dev, "cannot remove low chip, cannot tidy up\n");
1098
1099 ret = gpiochip_remove(&gpio->high.gpio);
1100 if (ret)
1101 dev_err(sm->dev, "cannot remove high chip, cannot tidy up\n");
1102
1103 iounmap(gpio->regs);
1104 release_resource(gpio->regs_res);
1105 kfree(gpio->regs_res);
1106}
1107
1108static inline int sm501_gpio_pin2nr(struct sm501_devdata *sm, unsigned int pin)
1109{
1110 struct sm501_gpio *gpio = &sm->gpio;
1111 int base = (pin < 32) ? gpio->low.gpio.base : gpio->high.gpio.base;
1112
1113 return (pin % 32) + base;
1114}
1115
1116static inline int sm501_gpio_isregistered(struct sm501_devdata *sm)
1117{
1118 return sm->gpio.registered;
1119}
1120#else
1121static inline int sm501_register_gpio(struct sm501_devdata *sm)
1122{
1123 return 0;
1124}
1125
1126static inline void sm501_gpio_remove(struct sm501_devdata *sm)
1127{
1128}
1129
1130static inline int sm501_gpio_pin2nr(struct sm501_devdata *sm, unsigned int pin)
1131{
1132 return -1;
1133}
1134
1135static inline int sm501_gpio_isregistered(struct sm501_devdata *sm)
1136{
1137 return 0;
1138}
1139#endif
1140
1141static int sm501_register_gpio_i2c_instance(struct sm501_devdata *sm,
1142 struct sm501_platdata_gpio_i2c *iic)
1143{
1144 struct i2c_gpio_platform_data *icd;
1145 struct platform_device *pdev;
1146
1147 pdev = sm501_create_subdev(sm, "i2c-gpio", 0,
1148 sizeof(struct i2c_gpio_platform_data));
1149 if (!pdev)
1150 return -ENOMEM;
1151
1152 icd = pdev->dev.platform_data;
1153
1154 /* We keep the pin_sda and pin_scl fields relative in case the
1155 * same platform data is passed to >1 SM501.
1156 */
1157
1158 icd->sda_pin = sm501_gpio_pin2nr(sm, iic->pin_sda);
1159 icd->scl_pin = sm501_gpio_pin2nr(sm, iic->pin_scl);
1160 icd->timeout = iic->timeout;
1161 icd->udelay = iic->udelay;
1162
1163 /* note, we can't use either of the pin numbers, as the i2c-gpio
1164 * driver uses the platform.id field to generate the bus number
1165 * to register with the i2c core; The i2c core doesn't have enough
1166 * entries to deal with anything we currently use.
1167 */
1168
1169 pdev->id = iic->bus_num;
1170
1171 dev_info(sm->dev, "registering i2c-%d: sda=%d (%d), scl=%d (%d)\n",
1172 iic->bus_num,
1173 icd->sda_pin, iic->pin_sda, icd->scl_pin, iic->pin_scl);
1174
1175 return sm501_register_device(sm, pdev);
1176}
1177
1178static int sm501_register_gpio_i2c(struct sm501_devdata *sm,
1179 struct sm501_platdata *pdata)
1180{
1181 struct sm501_platdata_gpio_i2c *iic = pdata->gpio_i2c;
1182 int index;
1183 int ret;
1184
1185 for (index = 0; index < pdata->gpio_i2c_nr; index++, iic++) {
1186 ret = sm501_register_gpio_i2c_instance(sm, iic);
1187 if (ret < 0)
1188 return ret;
1189 }
1190
1191 return 0;
1192}
1193
909/* sm501_dbg_regs 1194/* sm501_dbg_regs
910 * 1195 *
911 * Debug attribute to attach to parent device to show core registers 1196 * Debug attribute to attach to parent device to show core registers
@@ -1013,6 +1298,7 @@ static unsigned int sm501_mem_local[] = {
1013static int sm501_init_dev(struct sm501_devdata *sm) 1298static int sm501_init_dev(struct sm501_devdata *sm)
1014{ 1299{
1015 struct sm501_initdata *idata; 1300 struct sm501_initdata *idata;
1301 struct sm501_platdata *pdata;
1016 resource_size_t mem_avail; 1302 resource_size_t mem_avail;
1017 unsigned long dramctrl; 1303 unsigned long dramctrl;
1018 unsigned long devid; 1304 unsigned long devid;
@@ -1051,7 +1337,9 @@ static int sm501_init_dev(struct sm501_devdata *sm)
1051 1337
1052 /* check to see if we have some device initialisation */ 1338 /* check to see if we have some device initialisation */
1053 1339
1054 idata = sm->platdata ? sm->platdata->init : NULL; 1340 pdata = sm->platdata;
1341 idata = pdata ? pdata->init : NULL;
1342
1055 if (idata) { 1343 if (idata) {
1056 sm501_init_regs(sm, idata); 1344 sm501_init_regs(sm, idata);
1057 1345
@@ -1059,6 +1347,15 @@ static int sm501_init_dev(struct sm501_devdata *sm)
1059 sm501_register_usbhost(sm, &mem_avail); 1347 sm501_register_usbhost(sm, &mem_avail);
1060 if (idata->devices & (SM501_USE_UART0 | SM501_USE_UART1)) 1348 if (idata->devices & (SM501_USE_UART0 | SM501_USE_UART1))
1061 sm501_register_uart(sm, idata->devices); 1349 sm501_register_uart(sm, idata->devices);
1350 if (idata->devices & SM501_USE_GPIO)
1351 sm501_register_gpio(sm);
1352 }
1353
1354 if (pdata->gpio_i2c != NULL && pdata->gpio_i2c_nr > 0) {
1355 if (!sm501_gpio_isregistered(sm))
1356 dev_err(sm->dev, "no gpio available for i2c gpio.\n");
1357 else
1358 sm501_register_gpio_i2c(sm, pdata);
1062 } 1359 }
1063 1360
1064 ret = sm501_check_clocks(sm); 1361 ret = sm501_check_clocks(sm);
@@ -1138,8 +1435,31 @@ static int sm501_plat_probe(struct platform_device *dev)
1138} 1435}
1139 1436
1140#ifdef CONFIG_PM 1437#ifdef CONFIG_PM
1438
1141/* power management support */ 1439/* power management support */
1142 1440
1441static void sm501_set_power(struct sm501_devdata *sm, int on)
1442{
1443 struct sm501_platdata *pd = sm->platdata;
1444
1445 if (pd == NULL)
1446 return;
1447
1448 if (pd->get_power) {
1449 if (pd->get_power(sm->dev) == on) {
1450 dev_dbg(sm->dev, "is already %d\n", on);
1451 return;
1452 }
1453 }
1454
1455 if (pd->set_power) {
1456 dev_dbg(sm->dev, "setting power to %d\n", on);
1457
1458 pd->set_power(sm->dev, on);
1459 sm501_mdelay(sm, 10);
1460 }
1461}
1462
1143static int sm501_plat_suspend(struct platform_device *pdev, pm_message_t state) 1463static int sm501_plat_suspend(struct platform_device *pdev, pm_message_t state)
1144{ 1464{
1145 struct sm501_devdata *sm = platform_get_drvdata(pdev); 1465 struct sm501_devdata *sm = platform_get_drvdata(pdev);
@@ -1148,6 +1468,12 @@ static int sm501_plat_suspend(struct platform_device *pdev, pm_message_t state)
1148 sm->pm_misc = readl(sm->regs + SM501_MISC_CONTROL); 1468 sm->pm_misc = readl(sm->regs + SM501_MISC_CONTROL);
1149 1469
1150 sm501_dump_regs(sm); 1470 sm501_dump_regs(sm);
1471
1472 if (sm->platdata) {
1473 if (sm->platdata->flags & SM501_FLAG_SUSPEND_OFF)
1474 sm501_set_power(sm, 0);
1475 }
1476
1151 return 0; 1477 return 0;
1152} 1478}
1153 1479
@@ -1155,6 +1481,8 @@ static int sm501_plat_resume(struct platform_device *pdev)
1155{ 1481{
1156 struct sm501_devdata *sm = platform_get_drvdata(pdev); 1482 struct sm501_devdata *sm = platform_get_drvdata(pdev);
1157 1483
1484 sm501_set_power(sm, 1);
1485
1158 sm501_dump_regs(sm); 1486 sm501_dump_regs(sm);
1159 sm501_dump_gate(sm); 1487 sm501_dump_gate(sm);
1160 sm501_dump_clk(sm); 1488 sm501_dump_clk(sm);
@@ -1229,6 +1557,7 @@ static struct sm501_platdata_fb sm501_fb_pdata = {
1229static struct sm501_platdata sm501_pci_platdata = { 1557static struct sm501_platdata sm501_pci_platdata = {
1230 .init = &sm501_pci_initdata, 1558 .init = &sm501_pci_initdata,
1231 .fb = &sm501_fb_pdata, 1559 .fb = &sm501_fb_pdata,
1560 .gpio_base = -1,
1232}; 1561};
1233 1562
1234static int sm501_pci_probe(struct pci_dev *dev, 1563static int sm501_pci_probe(struct pci_dev *dev,
@@ -1335,6 +1664,8 @@ static void sm501_dev_remove(struct sm501_devdata *sm)
1335 sm501_remove_sub(sm, smdev); 1664 sm501_remove_sub(sm, smdev);
1336 1665
1337 device_remove_file(sm->dev, &dev_attr_dbg_regs); 1666 device_remove_file(sm->dev, &dev_attr_dbg_regs);
1667
1668 sm501_gpio_remove(sm);
1338} 1669}
1339 1670
1340static void sm501_pci_remove(struct pci_dev *dev) 1671static void sm501_pci_remove(struct pci_dev *dev)
@@ -1378,6 +1709,8 @@ static struct pci_driver sm501_pci_drv = {
1378 .remove = sm501_pci_remove, 1709 .remove = sm501_pci_remove,
1379}; 1710};
1380 1711
1712MODULE_ALIAS("platform:sm501");
1713
1381static struct platform_driver sm501_plat_drv = { 1714static struct platform_driver sm501_plat_drv = {
1382 .driver = { 1715 .driver = {
1383 .name = "sm501", 1716 .name = "sm501",
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index d5bc288b1b0d..321eb9134635 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -77,11 +77,13 @@ config IBM_ASM
77 for your IBM server. 77 for your IBM server.
78 78
79config PHANTOM 79config PHANTOM
80 tristate "Sensable PHANToM" 80 tristate "Sensable PHANToM (PCI)"
81 depends on PCI 81 depends on PCI
82 help 82 help
83 Say Y here if you want to build a driver for Sensable PHANToM device. 83 Say Y here if you want to build a driver for Sensable PHANToM device.
84 84
85 This driver is only for PCI PHANToMs.
86
85 If you choose to build module, its name will be phantom. If unsure, 87 If you choose to build module, its name will be phantom. If unsure,
86 say N here. 88 say N here.
87 89
@@ -212,6 +214,18 @@ config TC1100_WMI
212 This is a driver for the WMI extensions (wireless and bluetooth power 214 This is a driver for the WMI extensions (wireless and bluetooth power
213 control) of the HP Compaq TC1100 tablet. 215 control) of the HP Compaq TC1100 tablet.
214 216
217config HP_WMI
218 tristate "HP WMI extras"
219 depends on ACPI_WMI
220 depends on INPUT
221 depends on RFKILL
222 help
223 Say Y here if you want to support WMI-based hotkeys on HP laptops and
224 to read data from WMI such as docking or ambient light sensor state.
225
226 To compile this driver as a module, choose M here: the module will
227 be called hp-wmi.
228
215config MSI_LAPTOP 229config MSI_LAPTOP
216 tristate "MSI Laptop Extras" 230 tristate "MSI Laptop Extras"
217 depends on X86 231 depends on X86
@@ -424,6 +438,7 @@ config SGI_XP
424 438
425config HP_ILO 439config HP_ILO
426 tristate "Channel interface driver for HP iLO/iLO2 processor" 440 tristate "Channel interface driver for HP iLO/iLO2 processor"
441 depends on PCI
427 default n 442 default n
428 help 443 help
429 The channel interface driver allows applications to communicate 444 The channel interface driver allows applications to communicate
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 688fe76135e0..f5e273420c09 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ACER_WMI) += acer-wmi.o
13obj-$(CONFIG_ATMEL_PWM) += atmel_pwm.o 13obj-$(CONFIG_ATMEL_PWM) += atmel_pwm.o
14obj-$(CONFIG_ATMEL_SSC) += atmel-ssc.o 14obj-$(CONFIG_ATMEL_SSC) += atmel-ssc.o
15obj-$(CONFIG_ATMEL_TCLIB) += atmel_tclib.o 15obj-$(CONFIG_ATMEL_TCLIB) += atmel_tclib.o
16obj-$(CONFIG_HP_WMI) += hp-wmi.o
16obj-$(CONFIG_TC1100_WMI) += tc1100-wmi.o 17obj-$(CONFIG_TC1100_WMI) += tc1100-wmi.o
17obj-$(CONFIG_LKDTM) += lkdtm.o 18obj-$(CONFIG_LKDTM) += lkdtm.o
18obj-$(CONFIG_TIFM_CORE) += tifm_core.o 19obj-$(CONFIG_TIFM_CORE) += tifm_core.o
diff --git a/drivers/misc/hp-wmi.c b/drivers/misc/hp-wmi.c
new file mode 100644
index 000000000000..1dbcbcb323a2
--- /dev/null
+++ b/drivers/misc/hp-wmi.c
@@ -0,0 +1,494 @@
1/*
2 * HP WMI hotkeys
3 *
4 * Copyright (C) 2008 Red Hat <mjg@redhat.com>
5 *
6 * Portions based on wistron_btns.c:
7 * Copyright (C) 2005 Miloslav Trmac <mitr@volny.cz>
8 * Copyright (C) 2005 Bernhard Rosenkraenzer <bero@arklinux.org>
9 * Copyright (C) 2005 Dmitry Torokhov <dtor@mail.ru>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25
26#include <linux/kernel.h>
27#include <linux/module.h>
28#include <linux/init.h>
29#include <linux/types.h>
30#include <linux/input.h>
31#include <acpi/acpi_drivers.h>
32#include <linux/platform_device.h>
33#include <linux/acpi.h>
34#include <linux/rfkill.h>
35#include <linux/string.h>
36
37MODULE_AUTHOR("Matthew Garrett <mjg59@srcf.ucam.org>");
38MODULE_DESCRIPTION("HP laptop WMI hotkeys driver");
39MODULE_LICENSE("GPL");
40
41MODULE_ALIAS("wmi:95F24279-4D7B-4334-9387-ACCDC67EF61C");
42MODULE_ALIAS("wmi:5FB7F034-2C63-45e9-BE91-3D44E2C707E4");
43
44#define HPWMI_EVENT_GUID "95F24279-4D7B-4334-9387-ACCDC67EF61C"
45#define HPWMI_BIOS_GUID "5FB7F034-2C63-45e9-BE91-3D44E2C707E4"
46
47#define HPWMI_DISPLAY_QUERY 0x1
48#define HPWMI_HDDTEMP_QUERY 0x2
49#define HPWMI_ALS_QUERY 0x3
50#define HPWMI_DOCK_QUERY 0x4
51#define HPWMI_WIRELESS_QUERY 0x5
52
53static int __init hp_wmi_bios_setup(struct platform_device *device);
54static int __exit hp_wmi_bios_remove(struct platform_device *device);
55
56struct bios_args {
57 u32 signature;
58 u32 command;
59 u32 commandtype;
60 u32 datasize;
61 u32 data;
62};
63
64struct bios_return {
65 u32 sigpass;
66 u32 return_code;
67 u32 value;
68};
69
70struct key_entry {
71 char type; /* See KE_* below */
72 u8 code;
73 u16 keycode;
74};
75
76enum { KE_KEY, KE_SW, KE_END };
77
78static struct key_entry hp_wmi_keymap[] = {
79 {KE_SW, 0x01, SW_DOCK},
80 {KE_KEY, 0x02, KEY_BRIGHTNESSUP},
81 {KE_KEY, 0x03, KEY_BRIGHTNESSDOWN},
82 {KE_KEY, 0x04, KEY_HELP},
83 {KE_END, 0}
84};
85
86static struct input_dev *hp_wmi_input_dev;
87static struct platform_device *hp_wmi_platform_dev;
88
89static struct rfkill *wifi_rfkill;
90static struct rfkill *bluetooth_rfkill;
91static struct rfkill *wwan_rfkill;
92
93static struct platform_driver hp_wmi_driver = {
94 .driver = {
95 .name = "hp-wmi",
96 .owner = THIS_MODULE,
97 },
98 .probe = hp_wmi_bios_setup,
99 .remove = hp_wmi_bios_remove,
100};
101
102static int hp_wmi_perform_query(int query, int write, int value)
103{
104 struct bios_return bios_return;
105 acpi_status status;
106 union acpi_object *obj;
107 struct bios_args args = {
108 .signature = 0x55434553,
109 .command = write ? 0x2 : 0x1,
110 .commandtype = query,
111 .datasize = write ? 0x4 : 0,
112 .data = value,
113 };
114 struct acpi_buffer input = { sizeof(struct bios_args), &args };
115 struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
116
117 status = wmi_evaluate_method(HPWMI_BIOS_GUID, 0, 0x3, &input, &output);
118
119 obj = output.pointer;
120
121 if (!obj || obj->type != ACPI_TYPE_BUFFER)
122 return -EINVAL;
123
124 bios_return = *((struct bios_return *)obj->buffer.pointer);
125 if (bios_return.return_code > 0)
126 return bios_return.return_code * -1;
127 else
128 return bios_return.value;
129}
130
131static int hp_wmi_display_state(void)
132{
133 return hp_wmi_perform_query(HPWMI_DISPLAY_QUERY, 0, 0);
134}
135
136static int hp_wmi_hddtemp_state(void)
137{
138 return hp_wmi_perform_query(HPWMI_HDDTEMP_QUERY, 0, 0);
139}
140
141static int hp_wmi_als_state(void)
142{
143 return hp_wmi_perform_query(HPWMI_ALS_QUERY, 0, 0);
144}
145
146static int hp_wmi_dock_state(void)
147{
148 return hp_wmi_perform_query(HPWMI_DOCK_QUERY, 0, 0);
149}
150
151static int hp_wmi_wifi_set(void *data, enum rfkill_state state)
152{
153 if (state)
154 return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x101);
155 else
156 return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x100);
157}
158
159static int hp_wmi_bluetooth_set(void *data, enum rfkill_state state)
160{
161 if (state)
162 return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x202);
163 else
164 return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x200);
165}
166
167static int hp_wmi_wwan_set(void *data, enum rfkill_state state)
168{
169 if (state)
170 return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x404);
171 else
172 return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x400);
173}
174
175static int hp_wmi_wifi_state(void)
176{
177 int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0);
178
179 if (wireless & 0x100)
180 return 1;
181 else
182 return 0;
183}
184
185static int hp_wmi_bluetooth_state(void)
186{
187 int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0);
188
189 if (wireless & 0x10000)
190 return 1;
191 else
192 return 0;
193}
194
195static int hp_wmi_wwan_state(void)
196{
197 int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0);
198
199 if (wireless & 0x1000000)
200 return 1;
201 else
202 return 0;
203}
204
205static ssize_t show_display(struct device *dev, struct device_attribute *attr,
206 char *buf)
207{
208 int value = hp_wmi_display_state();
209 if (value < 0)
210 return -EINVAL;
211 return sprintf(buf, "%d\n", value);
212}
213
214static ssize_t show_hddtemp(struct device *dev, struct device_attribute *attr,
215 char *buf)
216{
217 int value = hp_wmi_hddtemp_state();
218 if (value < 0)
219 return -EINVAL;
220 return sprintf(buf, "%d\n", value);
221}
222
223static ssize_t show_als(struct device *dev, struct device_attribute *attr,
224 char *buf)
225{
226 int value = hp_wmi_als_state();
227 if (value < 0)
228 return -EINVAL;
229 return sprintf(buf, "%d\n", value);
230}
231
232static ssize_t show_dock(struct device *dev, struct device_attribute *attr,
233 char *buf)
234{
235 int value = hp_wmi_dock_state();
236 if (value < 0)
237 return -EINVAL;
238 return sprintf(buf, "%d\n", value);
239}
240
241static ssize_t set_als(struct device *dev, struct device_attribute *attr,
242 const char *buf, size_t count)
243{
244 u32 tmp = simple_strtoul(buf, NULL, 10);
245 hp_wmi_perform_query(HPWMI_ALS_QUERY, 1, tmp);
246 return count;
247}
248
249static DEVICE_ATTR(display, S_IRUGO, show_display, NULL);
250static DEVICE_ATTR(hddtemp, S_IRUGO, show_hddtemp, NULL);
251static DEVICE_ATTR(als, S_IRUGO | S_IWUSR, show_als, set_als);
252static DEVICE_ATTR(dock, S_IRUGO, show_dock, NULL);
253
254static struct key_entry *hp_wmi_get_entry_by_scancode(int code)
255{
256 struct key_entry *key;
257
258 for (key = hp_wmi_keymap; key->type != KE_END; key++)
259 if (code == key->code)
260 return key;
261
262 return NULL;
263}
264
265static struct key_entry *hp_wmi_get_entry_by_keycode(int keycode)
266{
267 struct key_entry *key;
268
269 for (key = hp_wmi_keymap; key->type != KE_END; key++)
270 if (key->type == KE_KEY && keycode == key->keycode)
271 return key;
272
273 return NULL;
274}
275
276static int hp_wmi_getkeycode(struct input_dev *dev, int scancode, int *keycode)
277{
278 struct key_entry *key = hp_wmi_get_entry_by_scancode(scancode);
279
280 if (key && key->type == KE_KEY) {
281 *keycode = key->keycode;
282 return 0;
283 }
284
285 return -EINVAL;
286}
287
288static int hp_wmi_setkeycode(struct input_dev *dev, int scancode, int keycode)
289{
290 struct key_entry *key;
291 int old_keycode;
292
293 if (keycode < 0 || keycode > KEY_MAX)
294 return -EINVAL;
295
296 key = hp_wmi_get_entry_by_scancode(scancode);
297 if (key && key->type == KE_KEY) {
298 old_keycode = key->keycode;
299 key->keycode = keycode;
300 set_bit(keycode, dev->keybit);
301 if (!hp_wmi_get_entry_by_keycode(old_keycode))
302 clear_bit(old_keycode, dev->keybit);
303 return 0;
304 }
305
306 return -EINVAL;
307}
308
309void hp_wmi_notify(u32 value, void *context)
310{
311 struct acpi_buffer response = { ACPI_ALLOCATE_BUFFER, NULL };
312 static struct key_entry *key;
313 union acpi_object *obj;
314
315 wmi_get_event_data(value, &response);
316
317 obj = (union acpi_object *)response.pointer;
318
319 if (obj && obj->type == ACPI_TYPE_BUFFER && obj->buffer.length == 8) {
320 int eventcode = *((u8 *) obj->buffer.pointer);
321 key = hp_wmi_get_entry_by_scancode(eventcode);
322 if (key) {
323 switch (key->type) {
324 case KE_KEY:
325 input_report_key(hp_wmi_input_dev,
326 key->keycode, 1);
327 input_sync(hp_wmi_input_dev);
328 input_report_key(hp_wmi_input_dev,
329 key->keycode, 0);
330 input_sync(hp_wmi_input_dev);
331 break;
332 case KE_SW:
333 input_report_switch(hp_wmi_input_dev,
334 key->keycode,
335 hp_wmi_dock_state());
336 input_sync(hp_wmi_input_dev);
337 break;
338 }
339 } else if (eventcode == 0x5) {
340 if (wifi_rfkill)
341 wifi_rfkill->state = hp_wmi_wifi_state();
342 if (bluetooth_rfkill)
343 bluetooth_rfkill->state =
344 hp_wmi_bluetooth_state();
345 if (wwan_rfkill)
346 wwan_rfkill->state = hp_wmi_wwan_state();
347 } else
348 printk(KERN_INFO "HP WMI: Unknown key pressed - %x\n",
349 eventcode);
350 } else
351 printk(KERN_INFO "HP WMI: Unknown response received\n");
352}
353
354static int __init hp_wmi_input_setup(void)
355{
356 struct key_entry *key;
357 int err;
358
359 hp_wmi_input_dev = input_allocate_device();
360
361 hp_wmi_input_dev->name = "HP WMI hotkeys";
362 hp_wmi_input_dev->phys = "wmi/input0";
363 hp_wmi_input_dev->id.bustype = BUS_HOST;
364 hp_wmi_input_dev->getkeycode = hp_wmi_getkeycode;
365 hp_wmi_input_dev->setkeycode = hp_wmi_setkeycode;
366
367 for (key = hp_wmi_keymap; key->type != KE_END; key++) {
368 switch (key->type) {
369 case KE_KEY:
370 set_bit(EV_KEY, hp_wmi_input_dev->evbit);
371 set_bit(key->keycode, hp_wmi_input_dev->keybit);
372 break;
373 case KE_SW:
374 set_bit(EV_SW, hp_wmi_input_dev->evbit);
375 set_bit(key->keycode, hp_wmi_input_dev->swbit);
376 break;
377 }
378 }
379
380 err = input_register_device(hp_wmi_input_dev);
381
382 if (err) {
383 input_free_device(hp_wmi_input_dev);
384 return err;
385 }
386
387 return 0;
388}
389
390static void cleanup_sysfs(struct platform_device *device)
391{
392 device_remove_file(&device->dev, &dev_attr_display);
393 device_remove_file(&device->dev, &dev_attr_hddtemp);
394 device_remove_file(&device->dev, &dev_attr_als);
395 device_remove_file(&device->dev, &dev_attr_dock);
396}
397
398static int __init hp_wmi_bios_setup(struct platform_device *device)
399{
400 int err;
401
402 err = device_create_file(&device->dev, &dev_attr_display);
403 if (err)
404 goto add_sysfs_error;
405 err = device_create_file(&device->dev, &dev_attr_hddtemp);
406 if (err)
407 goto add_sysfs_error;
408 err = device_create_file(&device->dev, &dev_attr_als);
409 if (err)
410 goto add_sysfs_error;
411 err = device_create_file(&device->dev, &dev_attr_dock);
412 if (err)
413 goto add_sysfs_error;
414
415 wifi_rfkill = rfkill_allocate(&device->dev, RFKILL_TYPE_WLAN);
416 wifi_rfkill->name = "hp-wifi";
417 wifi_rfkill->state = hp_wmi_wifi_state();
418 wifi_rfkill->toggle_radio = hp_wmi_wifi_set;
419 wifi_rfkill->user_claim_unsupported = 1;
420
421 bluetooth_rfkill = rfkill_allocate(&device->dev,
422 RFKILL_TYPE_BLUETOOTH);
423 bluetooth_rfkill->name = "hp-bluetooth";
424 bluetooth_rfkill->state = hp_wmi_bluetooth_state();
425 bluetooth_rfkill->toggle_radio = hp_wmi_bluetooth_set;
426 bluetooth_rfkill->user_claim_unsupported = 1;
427
428 wwan_rfkill = rfkill_allocate(&device->dev, RFKILL_TYPE_WIMAX);
429 wwan_rfkill->name = "hp-wwan";
430 wwan_rfkill->state = hp_wmi_wwan_state();
431 wwan_rfkill->toggle_radio = hp_wmi_wwan_set;
432 wwan_rfkill->user_claim_unsupported = 1;
433
434 rfkill_register(wifi_rfkill);
435 rfkill_register(bluetooth_rfkill);
436 rfkill_register(wwan_rfkill);
437
438 return 0;
439add_sysfs_error:
440 cleanup_sysfs(device);
441 return err;
442}
443
444static int __exit hp_wmi_bios_remove(struct platform_device *device)
445{
446 cleanup_sysfs(device);
447
448 rfkill_unregister(wifi_rfkill);
449 rfkill_unregister(bluetooth_rfkill);
450 rfkill_unregister(wwan_rfkill);
451
452 return 0;
453}
454
455static int __init hp_wmi_init(void)
456{
457 int err;
458
459 if (wmi_has_guid(HPWMI_EVENT_GUID)) {
460 err = wmi_install_notify_handler(HPWMI_EVENT_GUID,
461 hp_wmi_notify, NULL);
462 if (!err)
463 hp_wmi_input_setup();
464 }
465
466 if (wmi_has_guid(HPWMI_BIOS_GUID)) {
467 err = platform_driver_register(&hp_wmi_driver);
468 if (err)
469 return 0;
470 hp_wmi_platform_dev = platform_device_alloc("hp-wmi", -1);
471 if (!hp_wmi_platform_dev) {
472 platform_driver_unregister(&hp_wmi_driver);
473 return 0;
474 }
475 platform_device_add(hp_wmi_platform_dev);
476 }
477
478 return 0;
479}
480
481static void __exit hp_wmi_exit(void)
482{
483 if (wmi_has_guid(HPWMI_EVENT_GUID)) {
484 wmi_remove_notify_handler(HPWMI_EVENT_GUID);
485 input_unregister_device(hp_wmi_input_dev);
486 }
487 if (hp_wmi_platform_dev) {
488 platform_device_del(hp_wmi_platform_dev);
489 platform_driver_unregister(&hp_wmi_driver);
490 }
491}
492
493module_init(hp_wmi_init);
494module_exit(hp_wmi_exit);
diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c
index 4ce3bdc2f959..daf585689ce3 100644
--- a/drivers/misc/phantom.c
+++ b/drivers/misc/phantom.c
@@ -563,6 +563,6 @@ module_init(phantom_init);
563module_exit(phantom_exit); 563module_exit(phantom_exit);
564 564
565MODULE_AUTHOR("Jiri Slaby <jirislaby@gmail.com>"); 565MODULE_AUTHOR("Jiri Slaby <jirislaby@gmail.com>");
566MODULE_DESCRIPTION("Sensable Phantom driver"); 566MODULE_DESCRIPTION("Sensable Phantom driver (PCI devices)");
567MODULE_LICENSE("GPL"); 567MODULE_LICENSE("GPL");
568MODULE_VERSION(PHANTOM_VERSION); 568MODULE_VERSION(PHANTOM_VERSION);
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 961416ac0616..c7630a228310 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -51,14 +51,13 @@
51 * @name: MTD device name or number string 51 * @name: MTD device name or number string
52 * @vid_hdr_offs: VID header offset 52 * @vid_hdr_offs: VID header offset
53 */ 53 */
54struct mtd_dev_param 54struct mtd_dev_param {
55{
56 char name[MTD_PARAM_LEN_MAX]; 55 char name[MTD_PARAM_LEN_MAX];
57 int vid_hdr_offs; 56 int vid_hdr_offs;
58}; 57};
59 58
60/* Numbers of elements set in the @mtd_dev_param array */ 59/* Numbers of elements set in the @mtd_dev_param array */
61static int mtd_devs = 0; 60static int mtd_devs;
62 61
63/* MTD devices specification parameters */ 62/* MTD devices specification parameters */
64static struct mtd_dev_param mtd_dev_param[UBI_MAX_DEVICES]; 63static struct mtd_dev_param mtd_dev_param[UBI_MAX_DEVICES];
@@ -160,8 +159,7 @@ void ubi_put_device(struct ubi_device *ubi)
160} 159}
161 160
162/** 161/**
163 * ubi_get_by_major - get UBI device description object by character device 162 * ubi_get_by_major - get UBI device by character device major number.
164 * major number.
165 * @major: major number 163 * @major: major number
166 * 164 *
167 * This function is similar to 'ubi_get_device()', but it searches the device 165 * This function is similar to 'ubi_get_device()', but it searches the device
@@ -355,15 +353,34 @@ static void kill_volumes(struct ubi_device *ubi)
355} 353}
356 354
357/** 355/**
356 * free_user_volumes - free all user volumes.
357 * @ubi: UBI device description object
358 *
359 * Normally the volumes are freed at the release function of the volume device
360 * objects. However, on error paths the volumes have to be freed before the
361 * device objects have been initialized.
362 */
363static void free_user_volumes(struct ubi_device *ubi)
364{
365 int i;
366
367 for (i = 0; i < ubi->vtbl_slots; i++)
368 if (ubi->volumes[i]) {
369 kfree(ubi->volumes[i]->eba_tbl);
370 kfree(ubi->volumes[i]);
371 }
372}
373
374/**
358 * uif_init - initialize user interfaces for an UBI device. 375 * uif_init - initialize user interfaces for an UBI device.
359 * @ubi: UBI device description object 376 * @ubi: UBI device description object
360 * 377 *
361 * This function returns zero in case of success and a negative error code in 378 * This function returns zero in case of success and a negative error code in
362 * case of failure. 379 * case of failure. Note, this function destroys all volumes if it failes.
363 */ 380 */
364static int uif_init(struct ubi_device *ubi) 381static int uif_init(struct ubi_device *ubi)
365{ 382{
366 int i, err; 383 int i, err, do_free = 0;
367 dev_t dev; 384 dev_t dev;
368 385
369 sprintf(ubi->ubi_name, UBI_NAME_STR "%d", ubi->ubi_num); 386 sprintf(ubi->ubi_name, UBI_NAME_STR "%d", ubi->ubi_num);
@@ -384,7 +401,7 @@ static int uif_init(struct ubi_device *ubi)
384 401
385 ubi_assert(MINOR(dev) == 0); 402 ubi_assert(MINOR(dev) == 0);
386 cdev_init(&ubi->cdev, &ubi_cdev_operations); 403 cdev_init(&ubi->cdev, &ubi_cdev_operations);
387 dbg_msg("%s major is %u", ubi->ubi_name, MAJOR(dev)); 404 dbg_gen("%s major is %u", ubi->ubi_name, MAJOR(dev));
388 ubi->cdev.owner = THIS_MODULE; 405 ubi->cdev.owner = THIS_MODULE;
389 406
390 err = cdev_add(&ubi->cdev, dev, 1); 407 err = cdev_add(&ubi->cdev, dev, 1);
@@ -410,10 +427,13 @@ static int uif_init(struct ubi_device *ubi)
410 427
411out_volumes: 428out_volumes:
412 kill_volumes(ubi); 429 kill_volumes(ubi);
430 do_free = 0;
413out_sysfs: 431out_sysfs:
414 ubi_sysfs_close(ubi); 432 ubi_sysfs_close(ubi);
415 cdev_del(&ubi->cdev); 433 cdev_del(&ubi->cdev);
416out_unreg: 434out_unreg:
435 if (do_free)
436 free_user_volumes(ubi);
417 unregister_chrdev_region(ubi->cdev.dev, ubi->vtbl_slots + 1); 437 unregister_chrdev_region(ubi->cdev.dev, ubi->vtbl_slots + 1);
418 ubi_err("cannot initialize UBI %s, error %d", ubi->ubi_name, err); 438 ubi_err("cannot initialize UBI %s, error %d", ubi->ubi_name, err);
419 return err; 439 return err;
@@ -422,6 +442,10 @@ out_unreg:
422/** 442/**
423 * uif_close - close user interfaces for an UBI device. 443 * uif_close - close user interfaces for an UBI device.
424 * @ubi: UBI device description object 444 * @ubi: UBI device description object
445 *
446 * Note, since this function un-registers UBI volume device objects (@vol->dev),
447 * the memory allocated voe the volumes is freed as well (in the release
448 * function).
425 */ 449 */
426static void uif_close(struct ubi_device *ubi) 450static void uif_close(struct ubi_device *ubi)
427{ 451{
@@ -432,6 +456,21 @@ static void uif_close(struct ubi_device *ubi)
432} 456}
433 457
434/** 458/**
459 * free_internal_volumes - free internal volumes.
460 * @ubi: UBI device description object
461 */
462static void free_internal_volumes(struct ubi_device *ubi)
463{
464 int i;
465
466 for (i = ubi->vtbl_slots;
467 i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) {
468 kfree(ubi->volumes[i]->eba_tbl);
469 kfree(ubi->volumes[i]);
470 }
471}
472
473/**
435 * attach_by_scanning - attach an MTD device using scanning method. 474 * attach_by_scanning - attach an MTD device using scanning method.
436 * @ubi: UBI device descriptor 475 * @ubi: UBI device descriptor
437 * 476 *
@@ -475,6 +514,7 @@ static int attach_by_scanning(struct ubi_device *ubi)
475out_wl: 514out_wl:
476 ubi_wl_close(ubi); 515 ubi_wl_close(ubi);
477out_vtbl: 516out_vtbl:
517 free_internal_volumes(ubi);
478 vfree(ubi->vtbl); 518 vfree(ubi->vtbl);
479out_si: 519out_si:
480 ubi_scan_destroy_si(si); 520 ubi_scan_destroy_si(si);
@@ -482,7 +522,7 @@ out_si:
482} 522}
483 523
484/** 524/**
485 * io_init - initialize I/O unit for a given UBI device. 525 * io_init - initialize I/O sub-system for a given UBI device.
486 * @ubi: UBI device description object 526 * @ubi: UBI device description object
487 * 527 *
488 * If @ubi->vid_hdr_offset or @ubi->leb_start is zero, default offsets are 528 * If @ubi->vid_hdr_offset or @ubi->leb_start is zero, default offsets are
@@ -530,7 +570,11 @@ static int io_init(struct ubi_device *ubi)
530 ubi->min_io_size = ubi->mtd->writesize; 570 ubi->min_io_size = ubi->mtd->writesize;
531 ubi->hdrs_min_io_size = ubi->mtd->writesize >> ubi->mtd->subpage_sft; 571 ubi->hdrs_min_io_size = ubi->mtd->writesize >> ubi->mtd->subpage_sft;
532 572
533 /* Make sure minimal I/O unit is power of 2 */ 573 /*
574 * Make sure minimal I/O unit is power of 2. Note, there is no
575 * fundamental reason for this assumption. It is just an optimization
576 * which allows us to avoid costly division operations.
577 */
534 if (!is_power_of_2(ubi->min_io_size)) { 578 if (!is_power_of_2(ubi->min_io_size)) {
535 ubi_err("min. I/O unit (%d) is not power of 2", 579 ubi_err("min. I/O unit (%d) is not power of 2",
536 ubi->min_io_size); 580 ubi->min_io_size);
@@ -581,7 +625,7 @@ static int io_init(struct ubi_device *ubi)
581 if (ubi->vid_hdr_offset < UBI_EC_HDR_SIZE || 625 if (ubi->vid_hdr_offset < UBI_EC_HDR_SIZE ||
582 ubi->leb_start < ubi->vid_hdr_offset + UBI_VID_HDR_SIZE || 626 ubi->leb_start < ubi->vid_hdr_offset + UBI_VID_HDR_SIZE ||
583 ubi->leb_start > ubi->peb_size - UBI_VID_HDR_SIZE || 627 ubi->leb_start > ubi->peb_size - UBI_VID_HDR_SIZE ||
584 ubi->leb_start % ubi->min_io_size) { 628 ubi->leb_start & (ubi->min_io_size - 1)) {
585 ubi_err("bad VID header (%d) or data offsets (%d)", 629 ubi_err("bad VID header (%d) or data offsets (%d)",
586 ubi->vid_hdr_offset, ubi->leb_start); 630 ubi->vid_hdr_offset, ubi->leb_start);
587 return -EINVAL; 631 return -EINVAL;
@@ -646,7 +690,7 @@ static int autoresize(struct ubi_device *ubi, int vol_id)
646 690
647 /* 691 /*
648 * Clear the auto-resize flag in the volume in-memory copy of the 692 * Clear the auto-resize flag in the volume in-memory copy of the
649 * volume table, and 'ubi_resize_volume()' will propogate this change 693 * volume table, and 'ubi_resize_volume()' will propagate this change
650 * to the flash. 694 * to the flash.
651 */ 695 */
652 ubi->vtbl[vol_id].flags &= ~UBI_VTBL_AUTORESIZE_FLG; 696 ubi->vtbl[vol_id].flags &= ~UBI_VTBL_AUTORESIZE_FLG;
@@ -655,7 +699,7 @@ static int autoresize(struct ubi_device *ubi, int vol_id)
655 struct ubi_vtbl_record vtbl_rec; 699 struct ubi_vtbl_record vtbl_rec;
656 700
657 /* 701 /*
658 * No avalilable PEBs to re-size the volume, clear the flag on 702 * No available PEBs to re-size the volume, clear the flag on
659 * flash and exit. 703 * flash and exit.
660 */ 704 */
661 memcpy(&vtbl_rec, &ubi->vtbl[vol_id], 705 memcpy(&vtbl_rec, &ubi->vtbl[vol_id],
@@ -682,13 +726,13 @@ static int autoresize(struct ubi_device *ubi, int vol_id)
682 726
683/** 727/**
684 * ubi_attach_mtd_dev - attach an MTD device. 728 * ubi_attach_mtd_dev - attach an MTD device.
685 * @mtd_dev: MTD device description object 729 * @mtd: MTD device description object
686 * @ubi_num: number to assign to the new UBI device 730 * @ubi_num: number to assign to the new UBI device
687 * @vid_hdr_offset: VID header offset 731 * @vid_hdr_offset: VID header offset
688 * 732 *
689 * This function attaches MTD device @mtd_dev to UBI and assign @ubi_num number 733 * This function attaches MTD device @mtd_dev to UBI and assign @ubi_num number
690 * to the newly created UBI device, unless @ubi_num is %UBI_DEV_NUM_AUTO, in 734 * to the newly created UBI device, unless @ubi_num is %UBI_DEV_NUM_AUTO, in
691 * which case this function finds a vacant device nubert and assings it 735 * which case this function finds a vacant device number and assigns it
692 * automatically. Returns the new UBI device number in case of success and a 736 * automatically. Returns the new UBI device number in case of success and a
693 * negative error code in case of failure. 737 * negative error code in case of failure.
694 * 738 *
@@ -698,7 +742,7 @@ static int autoresize(struct ubi_device *ubi, int vol_id)
698int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) 742int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
699{ 743{
700 struct ubi_device *ubi; 744 struct ubi_device *ubi;
701 int i, err; 745 int i, err, do_free = 1;
702 746
703 /* 747 /*
704 * Check if we already have the same MTD device attached. 748 * Check if we already have the same MTD device attached.
@@ -735,7 +779,8 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
735 if (!ubi_devices[ubi_num]) 779 if (!ubi_devices[ubi_num])
736 break; 780 break;
737 if (ubi_num == UBI_MAX_DEVICES) { 781 if (ubi_num == UBI_MAX_DEVICES) {
738 dbg_err("only %d UBI devices may be created", UBI_MAX_DEVICES); 782 dbg_err("only %d UBI devices may be created",
783 UBI_MAX_DEVICES);
739 return -ENFILE; 784 return -ENFILE;
740 } 785 }
741 } else { 786 } else {
@@ -760,6 +805,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
760 805
761 mutex_init(&ubi->buf_mutex); 806 mutex_init(&ubi->buf_mutex);
762 mutex_init(&ubi->ckvol_mutex); 807 mutex_init(&ubi->ckvol_mutex);
808 mutex_init(&ubi->mult_mutex);
763 mutex_init(&ubi->volumes_mutex); 809 mutex_init(&ubi->volumes_mutex);
764 spin_lock_init(&ubi->volumes_lock); 810 spin_lock_init(&ubi->volumes_lock);
765 811
@@ -798,7 +844,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
798 844
799 err = uif_init(ubi); 845 err = uif_init(ubi);
800 if (err) 846 if (err)
801 goto out_detach; 847 goto out_nofree;
802 848
803 ubi->bgt_thread = kthread_create(ubi_thread, ubi, ubi->bgt_name); 849 ubi->bgt_thread = kthread_create(ubi_thread, ubi, ubi->bgt_name);
804 if (IS_ERR(ubi->bgt_thread)) { 850 if (IS_ERR(ubi->bgt_thread)) {
@@ -824,20 +870,22 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
824 ubi->beb_rsvd_pebs); 870 ubi->beb_rsvd_pebs);
825 ubi_msg("max/mean erase counter: %d/%d", ubi->max_ec, ubi->mean_ec); 871 ubi_msg("max/mean erase counter: %d/%d", ubi->max_ec, ubi->mean_ec);
826 872
827 /* Enable the background thread */ 873 if (!DBG_DISABLE_BGT)
828 if (!DBG_DISABLE_BGT) {
829 ubi->thread_enabled = 1; 874 ubi->thread_enabled = 1;
830 wake_up_process(ubi->bgt_thread); 875 wake_up_process(ubi->bgt_thread);
831 }
832 876
833 ubi_devices[ubi_num] = ubi; 877 ubi_devices[ubi_num] = ubi;
834 return ubi_num; 878 return ubi_num;
835 879
836out_uif: 880out_uif:
837 uif_close(ubi); 881 uif_close(ubi);
882out_nofree:
883 do_free = 0;
838out_detach: 884out_detach:
839 ubi_eba_close(ubi);
840 ubi_wl_close(ubi); 885 ubi_wl_close(ubi);
886 if (do_free)
887 free_user_volumes(ubi);
888 free_internal_volumes(ubi);
841 vfree(ubi->vtbl); 889 vfree(ubi->vtbl);
842out_free: 890out_free:
843 vfree(ubi->peb_buf1); 891 vfree(ubi->peb_buf1);
@@ -899,8 +947,8 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway)
899 kthread_stop(ubi->bgt_thread); 947 kthread_stop(ubi->bgt_thread);
900 948
901 uif_close(ubi); 949 uif_close(ubi);
902 ubi_eba_close(ubi);
903 ubi_wl_close(ubi); 950 ubi_wl_close(ubi);
951 free_internal_volumes(ubi);
904 vfree(ubi->vtbl); 952 vfree(ubi->vtbl);
905 put_mtd_device(ubi->mtd); 953 put_mtd_device(ubi->mtd);
906 vfree(ubi->peb_buf1); 954 vfree(ubi->peb_buf1);
@@ -1044,8 +1092,7 @@ static void __exit ubi_exit(void)
1044module_exit(ubi_exit); 1092module_exit(ubi_exit);
1045 1093
1046/** 1094/**
1047 * bytes_str_to_int - convert a string representing number of bytes to an 1095 * bytes_str_to_int - convert a number of bytes string into an integer.
1048 * integer.
1049 * @str: the string to convert 1096 * @str: the string to convert
1050 * 1097 *
1051 * This function returns positive resulting integer in case of success and a 1098 * This function returns positive resulting integer in case of success and a
diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index 89193ba9451e..03c759b4eeb5 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -39,9 +39,9 @@
39#include <linux/stat.h> 39#include <linux/stat.h>
40#include <linux/ioctl.h> 40#include <linux/ioctl.h>
41#include <linux/capability.h> 41#include <linux/capability.h>
42#include <linux/uaccess.h>
42#include <linux/smp_lock.h> 43#include <linux/smp_lock.h>
43#include <mtd/ubi-user.h> 44#include <mtd/ubi-user.h>
44#include <asm/uaccess.h>
45#include <asm/div64.h> 45#include <asm/div64.h>
46#include "ubi.h" 46#include "ubi.h"
47 47
@@ -116,7 +116,7 @@ static int vol_cdev_open(struct inode *inode, struct file *file)
116 else 116 else
117 mode = UBI_READONLY; 117 mode = UBI_READONLY;
118 118
119 dbg_msg("open volume %d, mode %d", vol_id, mode); 119 dbg_gen("open volume %d, mode %d", vol_id, mode);
120 120
121 desc = ubi_open_volume(ubi_num, vol_id, mode); 121 desc = ubi_open_volume(ubi_num, vol_id, mode);
122 unlock_kernel(); 122 unlock_kernel();
@@ -132,7 +132,7 @@ static int vol_cdev_release(struct inode *inode, struct file *file)
132 struct ubi_volume_desc *desc = file->private_data; 132 struct ubi_volume_desc *desc = file->private_data;
133 struct ubi_volume *vol = desc->vol; 133 struct ubi_volume *vol = desc->vol;
134 134
135 dbg_msg("release volume %d, mode %d", vol->vol_id, desc->mode); 135 dbg_gen("release volume %d, mode %d", vol->vol_id, desc->mode);
136 136
137 if (vol->updating) { 137 if (vol->updating) {
138 ubi_warn("update of volume %d not finished, volume is damaged", 138 ubi_warn("update of volume %d not finished, volume is damaged",
@@ -141,7 +141,7 @@ static int vol_cdev_release(struct inode *inode, struct file *file)
141 vol->updating = 0; 141 vol->updating = 0;
142 vfree(vol->upd_buf); 142 vfree(vol->upd_buf);
143 } else if (vol->changing_leb) { 143 } else if (vol->changing_leb) {
144 dbg_msg("only %lld of %lld bytes received for atomic LEB change" 144 dbg_gen("only %lld of %lld bytes received for atomic LEB change"
145 " for volume %d:%d, cancel", vol->upd_received, 145 " for volume %d:%d, cancel", vol->upd_received,
146 vol->upd_bytes, vol->ubi->ubi_num, vol->vol_id); 146 vol->upd_bytes, vol->ubi->ubi_num, vol->vol_id);
147 vol->changing_leb = 0; 147 vol->changing_leb = 0;
@@ -183,7 +183,7 @@ static loff_t vol_cdev_llseek(struct file *file, loff_t offset, int origin)
183 return -EINVAL; 183 return -EINVAL;
184 } 184 }
185 185
186 dbg_msg("seek volume %d, offset %lld, origin %d, new offset %lld", 186 dbg_gen("seek volume %d, offset %lld, origin %d, new offset %lld",
187 vol->vol_id, offset, origin, new_offset); 187 vol->vol_id, offset, origin, new_offset);
188 188
189 file->f_pos = new_offset; 189 file->f_pos = new_offset;
@@ -201,7 +201,7 @@ static ssize_t vol_cdev_read(struct file *file, __user char *buf, size_t count,
201 void *tbuf; 201 void *tbuf;
202 uint64_t tmp; 202 uint64_t tmp;
203 203
204 dbg_msg("read %zd bytes from offset %lld of volume %d", 204 dbg_gen("read %zd bytes from offset %lld of volume %d",
205 count, *offp, vol->vol_id); 205 count, *offp, vol->vol_id);
206 206
207 if (vol->updating) { 207 if (vol->updating) {
@@ -216,7 +216,7 @@ static ssize_t vol_cdev_read(struct file *file, __user char *buf, size_t count,
216 return 0; 216 return 0;
217 217
218 if (vol->corrupted) 218 if (vol->corrupted)
219 dbg_msg("read from corrupted volume %d", vol->vol_id); 219 dbg_gen("read from corrupted volume %d", vol->vol_id);
220 220
221 if (*offp + count > vol->used_bytes) 221 if (*offp + count > vol->used_bytes)
222 count_save = count = vol->used_bytes - *offp; 222 count_save = count = vol->used_bytes - *offp;
@@ -285,7 +285,7 @@ static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf,
285 char *tbuf; 285 char *tbuf;
286 uint64_t tmp; 286 uint64_t tmp;
287 287
288 dbg_msg("requested: write %zd bytes to offset %lld of volume %u", 288 dbg_gen("requested: write %zd bytes to offset %lld of volume %u",
289 count, *offp, vol->vol_id); 289 count, *offp, vol->vol_id);
290 290
291 if (vol->vol_type == UBI_STATIC_VOLUME) 291 if (vol->vol_type == UBI_STATIC_VOLUME)
@@ -295,7 +295,7 @@ static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf,
295 off = do_div(tmp, vol->usable_leb_size); 295 off = do_div(tmp, vol->usable_leb_size);
296 lnum = tmp; 296 lnum = tmp;
297 297
298 if (off % ubi->min_io_size) { 298 if (off & (ubi->min_io_size - 1)) {
299 dbg_err("unaligned position"); 299 dbg_err("unaligned position");
300 return -EINVAL; 300 return -EINVAL;
301 } 301 }
@@ -304,7 +304,7 @@ static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf,
304 count_save = count = vol->used_bytes - *offp; 304 count_save = count = vol->used_bytes - *offp;
305 305
306 /* We can write only in fractions of the minimum I/O unit */ 306 /* We can write only in fractions of the minimum I/O unit */
307 if (count % ubi->min_io_size) { 307 if (count & (ubi->min_io_size - 1)) {
308 dbg_err("unaligned write length"); 308 dbg_err("unaligned write length");
309 return -EINVAL; 309 return -EINVAL;
310 } 310 }
@@ -352,7 +352,7 @@ static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf,
352} 352}
353 353
354#else 354#else
355#define vol_cdev_direct_write(file, buf, count, offp) -EPERM 355#define vol_cdev_direct_write(file, buf, count, offp) (-EPERM)
356#endif /* CONFIG_MTD_UBI_DEBUG_USERSPACE_IO */ 356#endif /* CONFIG_MTD_UBI_DEBUG_USERSPACE_IO */
357 357
358static ssize_t vol_cdev_write(struct file *file, const char __user *buf, 358static ssize_t vol_cdev_write(struct file *file, const char __user *buf,
@@ -437,7 +437,8 @@ static int vol_cdev_ioctl(struct inode *inode, struct file *file,
437 break; 437 break;
438 } 438 }
439 439
440 rsvd_bytes = vol->reserved_pebs * (ubi->leb_size-vol->data_pad); 440 rsvd_bytes = (long long)vol->reserved_pebs *
441 ubi->leb_size-vol->data_pad;
441 if (bytes < 0 || bytes > rsvd_bytes) { 442 if (bytes < 0 || bytes > rsvd_bytes) {
442 err = -EINVAL; 443 err = -EINVAL;
443 break; 444 break;
@@ -513,7 +514,7 @@ static int vol_cdev_ioctl(struct inode *inode, struct file *file,
513 break; 514 break;
514 } 515 }
515 516
516 dbg_msg("erase LEB %d:%d", vol->vol_id, lnum); 517 dbg_gen("erase LEB %d:%d", vol->vol_id, lnum);
517 err = ubi_eba_unmap_leb(ubi, vol, lnum); 518 err = ubi_eba_unmap_leb(ubi, vol, lnum);
518 if (err) 519 if (err)
519 break; 520 break;
@@ -564,7 +565,7 @@ static int verify_mkvol_req(const struct ubi_device *ubi,
564 if (req->alignment > ubi->leb_size) 565 if (req->alignment > ubi->leb_size)
565 goto bad; 566 goto bad;
566 567
567 n = req->alignment % ubi->min_io_size; 568 n = req->alignment & (ubi->min_io_size - 1);
568 if (req->alignment != 1 && n) 569 if (req->alignment != 1 && n)
569 goto bad; 570 goto bad;
570 571
@@ -573,6 +574,10 @@ static int verify_mkvol_req(const struct ubi_device *ubi,
573 goto bad; 574 goto bad;
574 } 575 }
575 576
577 n = strnlen(req->name, req->name_len + 1);
578 if (n != req->name_len)
579 goto bad;
580
576 return 0; 581 return 0;
577 582
578bad: 583bad:
@@ -600,6 +605,166 @@ static int verify_rsvol_req(const struct ubi_device *ubi,
600 return 0; 605 return 0;
601} 606}
602 607
608/**
609 * rename_volumes - rename UBI volumes.
610 * @ubi: UBI device description object
611 * @req: volumes re-name request
612 *
613 * This is a helper function for the volume re-name IOCTL which validates the
614 * the request, opens the volume and calls corresponding volumes management
615 * function. Returns zero in case of success and a negative error code in case
616 * of failure.
617 */
618static int rename_volumes(struct ubi_device *ubi,
619 struct ubi_rnvol_req *req)
620{
621 int i, n, err;
622 struct list_head rename_list;
623 struct ubi_rename_entry *re, *re1;
624
625 if (req->count < 0 || req->count > UBI_MAX_RNVOL)
626 return -EINVAL;
627
628 if (req->count == 0)
629 return 0;
630
631 /* Validate volume IDs and names in the request */
632 for (i = 0; i < req->count; i++) {
633 if (req->ents[i].vol_id < 0 ||
634 req->ents[i].vol_id >= ubi->vtbl_slots)
635 return -EINVAL;
636 if (req->ents[i].name_len < 0)
637 return -EINVAL;
638 if (req->ents[i].name_len > UBI_VOL_NAME_MAX)
639 return -ENAMETOOLONG;
640 req->ents[i].name[req->ents[i].name_len] = '\0';
641 n = strlen(req->ents[i].name);
642 if (n != req->ents[i].name_len)
643 err = -EINVAL;
644 }
645
646 /* Make sure volume IDs and names are unique */
647 for (i = 0; i < req->count - 1; i++) {
648 for (n = i + 1; n < req->count; n++) {
649 if (req->ents[i].vol_id == req->ents[n].vol_id) {
650 dbg_err("duplicated volume id %d",
651 req->ents[i].vol_id);
652 return -EINVAL;
653 }
654 if (!strcmp(req->ents[i].name, req->ents[n].name)) {
655 dbg_err("duplicated volume name \"%s\"",
656 req->ents[i].name);
657 return -EINVAL;
658 }
659 }
660 }
661
662 /* Create the re-name list */
663 INIT_LIST_HEAD(&rename_list);
664 for (i = 0; i < req->count; i++) {
665 int vol_id = req->ents[i].vol_id;
666 int name_len = req->ents[i].name_len;
667 const char *name = req->ents[i].name;
668
669 re = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL);
670 if (!re) {
671 err = -ENOMEM;
672 goto out_free;
673 }
674
675 re->desc = ubi_open_volume(ubi->ubi_num, vol_id, UBI_EXCLUSIVE);
676 if (IS_ERR(re->desc)) {
677 err = PTR_ERR(re->desc);
678 dbg_err("cannot open volume %d, error %d", vol_id, err);
679 kfree(re);
680 goto out_free;
681 }
682
683 /* Skip this re-naming if the name does not really change */
684 if (re->desc->vol->name_len == name_len &&
685 !memcmp(re->desc->vol->name, name, name_len)) {
686 ubi_close_volume(re->desc);
687 kfree(re);
688 continue;
689 }
690
691 re->new_name_len = name_len;
692 memcpy(re->new_name, name, name_len);
693 list_add_tail(&re->list, &rename_list);
694 dbg_msg("will rename volume %d from \"%s\" to \"%s\"",
695 vol_id, re->desc->vol->name, name);
696 }
697
698 if (list_empty(&rename_list))
699 return 0;
700
701 /* Find out the volumes which have to be removed */
702 list_for_each_entry(re, &rename_list, list) {
703 struct ubi_volume_desc *desc;
704 int no_remove_needed = 0;
705
706 /*
707 * Volume @re->vol_id is going to be re-named to
708 * @re->new_name, while its current name is @name. If a volume
709 * with name @re->new_name currently exists, it has to be
710 * removed, unless it is also re-named in the request (@req).
711 */
712 list_for_each_entry(re1, &rename_list, list) {
713 if (re->new_name_len == re1->desc->vol->name_len &&
714 !memcmp(re->new_name, re1->desc->vol->name,
715 re1->desc->vol->name_len)) {
716 no_remove_needed = 1;
717 break;
718 }
719 }
720
721 if (no_remove_needed)
722 continue;
723
724 /*
725 * It seems we need to remove volume with name @re->new_name,
726 * if it exists.
727 */
728 desc = ubi_open_volume_nm(ubi->ubi_num, re->new_name, UBI_EXCLUSIVE);
729 if (IS_ERR(desc)) {
730 err = PTR_ERR(desc);
731 if (err == -ENODEV)
732 /* Re-naming into a non-existing volume name */
733 continue;
734
735 /* The volume exists but busy, or an error occurred */
736 dbg_err("cannot open volume \"%s\", error %d",
737 re->new_name, err);
738 goto out_free;
739 }
740
741 re = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL);
742 if (!re) {
743 err = -ENOMEM;
744 ubi_close_volume(desc);
745 goto out_free;
746 }
747
748 re->remove = 1;
749 re->desc = desc;
750 list_add(&re->list, &rename_list);
751 dbg_msg("will remove volume %d, name \"%s\"",
752 re->desc->vol->vol_id, re->desc->vol->name);
753 }
754
755 mutex_lock(&ubi->volumes_mutex);
756 err = ubi_rename_volumes(ubi, &rename_list);
757 mutex_unlock(&ubi->volumes_mutex);
758
759out_free:
760 list_for_each_entry_safe(re, re1, &rename_list, list) {
761 ubi_close_volume(re->desc);
762 list_del(&re->list);
763 kfree(re);
764 }
765 return err;
766}
767
603static int ubi_cdev_ioctl(struct inode *inode, struct file *file, 768static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
604 unsigned int cmd, unsigned long arg) 769 unsigned int cmd, unsigned long arg)
605{ 770{
@@ -621,19 +786,18 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
621 { 786 {
622 struct ubi_mkvol_req req; 787 struct ubi_mkvol_req req;
623 788
624 dbg_msg("create volume"); 789 dbg_gen("create volume");
625 err = copy_from_user(&req, argp, sizeof(struct ubi_mkvol_req)); 790 err = copy_from_user(&req, argp, sizeof(struct ubi_mkvol_req));
626 if (err) { 791 if (err) {
627 err = -EFAULT; 792 err = -EFAULT;
628 break; 793 break;
629 } 794 }
630 795
796 req.name[req.name_len] = '\0';
631 err = verify_mkvol_req(ubi, &req); 797 err = verify_mkvol_req(ubi, &req);
632 if (err) 798 if (err)
633 break; 799 break;
634 800
635 req.name[req.name_len] = '\0';
636
637 mutex_lock(&ubi->volumes_mutex); 801 mutex_lock(&ubi->volumes_mutex);
638 err = ubi_create_volume(ubi, &req); 802 err = ubi_create_volume(ubi, &req);
639 mutex_unlock(&ubi->volumes_mutex); 803 mutex_unlock(&ubi->volumes_mutex);
@@ -652,7 +816,7 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
652 { 816 {
653 int vol_id; 817 int vol_id;
654 818
655 dbg_msg("remove volume"); 819 dbg_gen("remove volume");
656 err = get_user(vol_id, (__user int32_t *)argp); 820 err = get_user(vol_id, (__user int32_t *)argp);
657 if (err) { 821 if (err) {
658 err = -EFAULT; 822 err = -EFAULT;
@@ -666,7 +830,7 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
666 } 830 }
667 831
668 mutex_lock(&ubi->volumes_mutex); 832 mutex_lock(&ubi->volumes_mutex);
669 err = ubi_remove_volume(desc); 833 err = ubi_remove_volume(desc, 0);
670 mutex_unlock(&ubi->volumes_mutex); 834 mutex_unlock(&ubi->volumes_mutex);
671 835
672 /* 836 /*
@@ -685,7 +849,7 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
685 uint64_t tmp; 849 uint64_t tmp;
686 struct ubi_rsvol_req req; 850 struct ubi_rsvol_req req;
687 851
688 dbg_msg("re-size volume"); 852 dbg_gen("re-size volume");
689 err = copy_from_user(&req, argp, sizeof(struct ubi_rsvol_req)); 853 err = copy_from_user(&req, argp, sizeof(struct ubi_rsvol_req));
690 if (err) { 854 if (err) {
691 err = -EFAULT; 855 err = -EFAULT;
@@ -713,6 +877,32 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
713 break; 877 break;
714 } 878 }
715 879
880 /* Re-name volumes command */
881 case UBI_IOCRNVOL:
882 {
883 struct ubi_rnvol_req *req;
884
885 dbg_msg("re-name volumes");
886 req = kmalloc(sizeof(struct ubi_rnvol_req), GFP_KERNEL);
887 if (!req) {
888 err = -ENOMEM;
889 break;
890 };
891
892 err = copy_from_user(req, argp, sizeof(struct ubi_rnvol_req));
893 if (err) {
894 err = -EFAULT;
895 kfree(req);
896 break;
897 }
898
899 mutex_lock(&ubi->mult_mutex);
900 err = rename_volumes(ubi, req);
901 mutex_unlock(&ubi->mult_mutex);
902 kfree(req);
903 break;
904 }
905
716 default: 906 default:
717 err = -ENOTTY; 907 err = -ENOTTY;
718 break; 908 break;
@@ -738,7 +928,7 @@ static int ctrl_cdev_ioctl(struct inode *inode, struct file *file,
738 struct ubi_attach_req req; 928 struct ubi_attach_req req;
739 struct mtd_info *mtd; 929 struct mtd_info *mtd;
740 930
741 dbg_msg("attach MTD device"); 931 dbg_gen("attach MTD device");
742 err = copy_from_user(&req, argp, sizeof(struct ubi_attach_req)); 932 err = copy_from_user(&req, argp, sizeof(struct ubi_attach_req));
743 if (err) { 933 if (err) {
744 err = -EFAULT; 934 err = -EFAULT;
@@ -778,7 +968,7 @@ static int ctrl_cdev_ioctl(struct inode *inode, struct file *file,
778 { 968 {
779 int ubi_num; 969 int ubi_num;
780 970
781 dbg_msg("dettach MTD device"); 971 dbg_gen("dettach MTD device");
782 err = get_user(ubi_num, (__user int32_t *)argp); 972 err = get_user(ubi_num, (__user int32_t *)argp);
783 if (err) { 973 if (err) {
784 err = -EFAULT; 974 err = -EFAULT;
diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c
index 56956ec2845f..c0ed60e8ade9 100644
--- a/drivers/mtd/ubi/debug.c
+++ b/drivers/mtd/ubi/debug.c
@@ -24,7 +24,7 @@
24 * changes. 24 * changes.
25 */ 25 */
26 26
27#ifdef CONFIG_MTD_UBI_DEBUG_MSG 27#ifdef CONFIG_MTD_UBI_DEBUG
28 28
29#include "ubi.h" 29#include "ubi.h"
30 30
@@ -34,14 +34,19 @@
34 */ 34 */
35void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr) 35void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr)
36{ 36{
37 dbg_msg("erase counter header dump:"); 37 printk(KERN_DEBUG "Erase counter header dump:\n");
38 dbg_msg("magic %#08x", be32_to_cpu(ec_hdr->magic)); 38 printk(KERN_DEBUG "\tmagic %#08x\n",
39 dbg_msg("version %d", (int)ec_hdr->version); 39 be32_to_cpu(ec_hdr->magic));
40 dbg_msg("ec %llu", (long long)be64_to_cpu(ec_hdr->ec)); 40 printk(KERN_DEBUG "\tversion %d\n", (int)ec_hdr->version);
41 dbg_msg("vid_hdr_offset %d", be32_to_cpu(ec_hdr->vid_hdr_offset)); 41 printk(KERN_DEBUG "\tec %llu\n",
42 dbg_msg("data_offset %d", be32_to_cpu(ec_hdr->data_offset)); 42 (long long)be64_to_cpu(ec_hdr->ec));
43 dbg_msg("hdr_crc %#08x", be32_to_cpu(ec_hdr->hdr_crc)); 43 printk(KERN_DEBUG "\tvid_hdr_offset %d\n",
44 dbg_msg("erase counter header hexdump:"); 44 be32_to_cpu(ec_hdr->vid_hdr_offset));
45 printk(KERN_DEBUG "\tdata_offset %d\n",
46 be32_to_cpu(ec_hdr->data_offset));
47 printk(KERN_DEBUG "\thdr_crc %#08x\n",
48 be32_to_cpu(ec_hdr->hdr_crc));
49 printk(KERN_DEBUG "erase counter header hexdump:\n");
45 print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, 50 print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
46 ec_hdr, UBI_EC_HDR_SIZE, 1); 51 ec_hdr, UBI_EC_HDR_SIZE, 1);
47} 52}
@@ -52,22 +57,23 @@ void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr)
52 */ 57 */
53void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr) 58void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr)
54{ 59{
55 dbg_msg("volume identifier header dump:"); 60 printk(KERN_DEBUG "Volume identifier header dump:\n");
56 dbg_msg("magic %08x", be32_to_cpu(vid_hdr->magic)); 61 printk(KERN_DEBUG "\tmagic %08x\n", be32_to_cpu(vid_hdr->magic));
57 dbg_msg("version %d", (int)vid_hdr->version); 62 printk(KERN_DEBUG "\tversion %d\n", (int)vid_hdr->version);
58 dbg_msg("vol_type %d", (int)vid_hdr->vol_type); 63 printk(KERN_DEBUG "\tvol_type %d\n", (int)vid_hdr->vol_type);
59 dbg_msg("copy_flag %d", (int)vid_hdr->copy_flag); 64 printk(KERN_DEBUG "\tcopy_flag %d\n", (int)vid_hdr->copy_flag);
60 dbg_msg("compat %d", (int)vid_hdr->compat); 65 printk(KERN_DEBUG "\tcompat %d\n", (int)vid_hdr->compat);
61 dbg_msg("vol_id %d", be32_to_cpu(vid_hdr->vol_id)); 66 printk(KERN_DEBUG "\tvol_id %d\n", be32_to_cpu(vid_hdr->vol_id));
62 dbg_msg("lnum %d", be32_to_cpu(vid_hdr->lnum)); 67 printk(KERN_DEBUG "\tlnum %d\n", be32_to_cpu(vid_hdr->lnum));
63 dbg_msg("leb_ver %u", be32_to_cpu(vid_hdr->leb_ver)); 68 printk(KERN_DEBUG "\tdata_size %d\n", be32_to_cpu(vid_hdr->data_size));
64 dbg_msg("data_size %d", be32_to_cpu(vid_hdr->data_size)); 69 printk(KERN_DEBUG "\tused_ebs %d\n", be32_to_cpu(vid_hdr->used_ebs));
65 dbg_msg("used_ebs %d", be32_to_cpu(vid_hdr->used_ebs)); 70 printk(KERN_DEBUG "\tdata_pad %d\n", be32_to_cpu(vid_hdr->data_pad));
66 dbg_msg("data_pad %d", be32_to_cpu(vid_hdr->data_pad)); 71 printk(KERN_DEBUG "\tsqnum %llu\n",
67 dbg_msg("sqnum %llu",
68 (unsigned long long)be64_to_cpu(vid_hdr->sqnum)); 72 (unsigned long long)be64_to_cpu(vid_hdr->sqnum));
69 dbg_msg("hdr_crc %08x", be32_to_cpu(vid_hdr->hdr_crc)); 73 printk(KERN_DEBUG "\thdr_crc %08x\n", be32_to_cpu(vid_hdr->hdr_crc));
70 dbg_msg("volume identifier header hexdump:"); 74 printk(KERN_DEBUG "Volume identifier header hexdump:\n");
75 print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
76 vid_hdr, UBI_VID_HDR_SIZE, 1);
71} 77}
72 78
73/** 79/**
@@ -76,27 +82,27 @@ void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr)
76 */ 82 */
77void ubi_dbg_dump_vol_info(const struct ubi_volume *vol) 83void ubi_dbg_dump_vol_info(const struct ubi_volume *vol)
78{ 84{
79 dbg_msg("volume information dump:"); 85 printk(KERN_DEBUG "Volume information dump:\n");
80 dbg_msg("vol_id %d", vol->vol_id); 86 printk(KERN_DEBUG "\tvol_id %d\n", vol->vol_id);
81 dbg_msg("reserved_pebs %d", vol->reserved_pebs); 87 printk(KERN_DEBUG "\treserved_pebs %d\n", vol->reserved_pebs);
82 dbg_msg("alignment %d", vol->alignment); 88 printk(KERN_DEBUG "\talignment %d\n", vol->alignment);
83 dbg_msg("data_pad %d", vol->data_pad); 89 printk(KERN_DEBUG "\tdata_pad %d\n", vol->data_pad);
84 dbg_msg("vol_type %d", vol->vol_type); 90 printk(KERN_DEBUG "\tvol_type %d\n", vol->vol_type);
85 dbg_msg("name_len %d", vol->name_len); 91 printk(KERN_DEBUG "\tname_len %d\n", vol->name_len);
86 dbg_msg("usable_leb_size %d", vol->usable_leb_size); 92 printk(KERN_DEBUG "\tusable_leb_size %d\n", vol->usable_leb_size);
87 dbg_msg("used_ebs %d", vol->used_ebs); 93 printk(KERN_DEBUG "\tused_ebs %d\n", vol->used_ebs);
88 dbg_msg("used_bytes %lld", vol->used_bytes); 94 printk(KERN_DEBUG "\tused_bytes %lld\n", vol->used_bytes);
89 dbg_msg("last_eb_bytes %d", vol->last_eb_bytes); 95 printk(KERN_DEBUG "\tlast_eb_bytes %d\n", vol->last_eb_bytes);
90 dbg_msg("corrupted %d", vol->corrupted); 96 printk(KERN_DEBUG "\tcorrupted %d\n", vol->corrupted);
91 dbg_msg("upd_marker %d", vol->upd_marker); 97 printk(KERN_DEBUG "\tupd_marker %d\n", vol->upd_marker);
92 98
93 if (vol->name_len <= UBI_VOL_NAME_MAX && 99 if (vol->name_len <= UBI_VOL_NAME_MAX &&
94 strnlen(vol->name, vol->name_len + 1) == vol->name_len) { 100 strnlen(vol->name, vol->name_len + 1) == vol->name_len) {
95 dbg_msg("name %s", vol->name); 101 printk(KERN_DEBUG "\tname %s\n", vol->name);
96 } else { 102 } else {
97 dbg_msg("the 1st 5 characters of the name: %c%c%c%c%c", 103 printk(KERN_DEBUG "\t1st 5 characters of name: %c%c%c%c%c\n",
98 vol->name[0], vol->name[1], vol->name[2], 104 vol->name[0], vol->name[1], vol->name[2],
99 vol->name[3], vol->name[4]); 105 vol->name[3], vol->name[4]);
100 } 106 }
101} 107}
102 108
@@ -109,28 +115,29 @@ void ubi_dbg_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx)
109{ 115{
110 int name_len = be16_to_cpu(r->name_len); 116 int name_len = be16_to_cpu(r->name_len);
111 117
112 dbg_msg("volume table record %d dump:", idx); 118 printk(KERN_DEBUG "Volume table record %d dump:\n", idx);
113 dbg_msg("reserved_pebs %d", be32_to_cpu(r->reserved_pebs)); 119 printk(KERN_DEBUG "\treserved_pebs %d\n",
114 dbg_msg("alignment %d", be32_to_cpu(r->alignment)); 120 be32_to_cpu(r->reserved_pebs));
115 dbg_msg("data_pad %d", be32_to_cpu(r->data_pad)); 121 printk(KERN_DEBUG "\talignment %d\n", be32_to_cpu(r->alignment));
116 dbg_msg("vol_type %d", (int)r->vol_type); 122 printk(KERN_DEBUG "\tdata_pad %d\n", be32_to_cpu(r->data_pad));
117 dbg_msg("upd_marker %d", (int)r->upd_marker); 123 printk(KERN_DEBUG "\tvol_type %d\n", (int)r->vol_type);
118 dbg_msg("name_len %d", name_len); 124 printk(KERN_DEBUG "\tupd_marker %d\n", (int)r->upd_marker);
125 printk(KERN_DEBUG "\tname_len %d\n", name_len);
119 126
120 if (r->name[0] == '\0') { 127 if (r->name[0] == '\0') {
121 dbg_msg("name NULL"); 128 printk(KERN_DEBUG "\tname NULL\n");
122 return; 129 return;
123 } 130 }
124 131
125 if (name_len <= UBI_VOL_NAME_MAX && 132 if (name_len <= UBI_VOL_NAME_MAX &&
126 strnlen(&r->name[0], name_len + 1) == name_len) { 133 strnlen(&r->name[0], name_len + 1) == name_len) {
127 dbg_msg("name %s", &r->name[0]); 134 printk(KERN_DEBUG "\tname %s\n", &r->name[0]);
128 } else { 135 } else {
129 dbg_msg("1st 5 characters of the name: %c%c%c%c%c", 136 printk(KERN_DEBUG "\t1st 5 characters of name: %c%c%c%c%c\n",
130 r->name[0], r->name[1], r->name[2], r->name[3], 137 r->name[0], r->name[1], r->name[2], r->name[3],
131 r->name[4]); 138 r->name[4]);
132 } 139 }
133 dbg_msg("crc %#08x", be32_to_cpu(r->crc)); 140 printk(KERN_DEBUG "\tcrc %#08x\n", be32_to_cpu(r->crc));
134} 141}
135 142
136/** 143/**
@@ -139,15 +146,15 @@ void ubi_dbg_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx)
139 */ 146 */
140void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv) 147void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv)
141{ 148{
142 dbg_msg("volume scanning information dump:"); 149 printk(KERN_DEBUG "Volume scanning information dump:\n");
143 dbg_msg("vol_id %d", sv->vol_id); 150 printk(KERN_DEBUG "\tvol_id %d\n", sv->vol_id);
144 dbg_msg("highest_lnum %d", sv->highest_lnum); 151 printk(KERN_DEBUG "\thighest_lnum %d\n", sv->highest_lnum);
145 dbg_msg("leb_count %d", sv->leb_count); 152 printk(KERN_DEBUG "\tleb_count %d\n", sv->leb_count);
146 dbg_msg("compat %d", sv->compat); 153 printk(KERN_DEBUG "\tcompat %d\n", sv->compat);
147 dbg_msg("vol_type %d", sv->vol_type); 154 printk(KERN_DEBUG "\tvol_type %d\n", sv->vol_type);
148 dbg_msg("used_ebs %d", sv->used_ebs); 155 printk(KERN_DEBUG "\tused_ebs %d\n", sv->used_ebs);
149 dbg_msg("last_data_size %d", sv->last_data_size); 156 printk(KERN_DEBUG "\tlast_data_size %d\n", sv->last_data_size);
150 dbg_msg("data_pad %d", sv->data_pad); 157 printk(KERN_DEBUG "\tdata_pad %d\n", sv->data_pad);
151} 158}
152 159
153/** 160/**
@@ -157,14 +164,13 @@ void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv)
157 */ 164 */
158void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type) 165void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type)
159{ 166{
160 dbg_msg("eraseblock scanning information dump:"); 167 printk(KERN_DEBUG "eraseblock scanning information dump:\n");
161 dbg_msg("ec %d", seb->ec); 168 printk(KERN_DEBUG "\tec %d\n", seb->ec);
162 dbg_msg("pnum %d", seb->pnum); 169 printk(KERN_DEBUG "\tpnum %d\n", seb->pnum);
163 if (type == 0) { 170 if (type == 0) {
164 dbg_msg("lnum %d", seb->lnum); 171 printk(KERN_DEBUG "\tlnum %d\n", seb->lnum);
165 dbg_msg("scrub %d", seb->scrub); 172 printk(KERN_DEBUG "\tscrub %d\n", seb->scrub);
166 dbg_msg("sqnum %llu", seb->sqnum); 173 printk(KERN_DEBUG "\tsqnum %llu\n", seb->sqnum);
167 dbg_msg("leb_ver %u", seb->leb_ver);
168 } 174 }
169} 175}
170 176
@@ -176,16 +182,16 @@ void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req)
176{ 182{
177 char nm[17]; 183 char nm[17];
178 184
179 dbg_msg("volume creation request dump:"); 185 printk(KERN_DEBUG "Volume creation request dump:\n");
180 dbg_msg("vol_id %d", req->vol_id); 186 printk(KERN_DEBUG "\tvol_id %d\n", req->vol_id);
181 dbg_msg("alignment %d", req->alignment); 187 printk(KERN_DEBUG "\talignment %d\n", req->alignment);
182 dbg_msg("bytes %lld", (long long)req->bytes); 188 printk(KERN_DEBUG "\tbytes %lld\n", (long long)req->bytes);
183 dbg_msg("vol_type %d", req->vol_type); 189 printk(KERN_DEBUG "\tvol_type %d\n", req->vol_type);
184 dbg_msg("name_len %d", req->name_len); 190 printk(KERN_DEBUG "\tname_len %d\n", req->name_len);
185 191
186 memcpy(nm, req->name, 16); 192 memcpy(nm, req->name, 16);
187 nm[16] = 0; 193 nm[16] = 0;
188 dbg_msg("the 1st 16 characters of the name: %s", nm); 194 printk(KERN_DEBUG "\t1st 16 characters of name: %s\n", nm);
189} 195}
190 196
191#endif /* CONFIG_MTD_UBI_DEBUG_MSG */ 197#endif /* CONFIG_MTD_UBI_DEBUG */
diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h
index 8ea99d8c9e1f..78e914d23ece 100644
--- a/drivers/mtd/ubi/debug.h
+++ b/drivers/mtd/ubi/debug.h
@@ -24,21 +24,16 @@
24#ifdef CONFIG_MTD_UBI_DEBUG 24#ifdef CONFIG_MTD_UBI_DEBUG
25#include <linux/random.h> 25#include <linux/random.h>
26 26
27#define ubi_assert(expr) BUG_ON(!(expr))
28#define dbg_err(fmt, ...) ubi_err(fmt, ##__VA_ARGS__) 27#define dbg_err(fmt, ...) ubi_err(fmt, ##__VA_ARGS__)
29#else
30#define ubi_assert(expr) ({})
31#define dbg_err(fmt, ...) ({})
32#endif
33 28
34#ifdef CONFIG_MTD_UBI_DEBUG_DISABLE_BGT 29#define ubi_assert(expr) do { \
35#define DBG_DISABLE_BGT 1 30 if (unlikely(!(expr))) { \
36#else 31 printk(KERN_CRIT "UBI assert failed in %s at %u (pid %d)\n", \
37#define DBG_DISABLE_BGT 0 32 __func__, __LINE__, current->pid); \
38#endif 33 ubi_dbg_dump_stack(); \
34 } \
35} while (0)
39 36
40#ifdef CONFIG_MTD_UBI_DEBUG_MSG
41/* Generic debugging message */
42#define dbg_msg(fmt, ...) \ 37#define dbg_msg(fmt, ...) \
43 printk(KERN_DEBUG "UBI DBG (pid %d): %s: " fmt "\n", \ 38 printk(KERN_DEBUG "UBI DBG (pid %d): %s: " fmt "\n", \
44 current->pid, __func__, ##__VA_ARGS__) 39 current->pid, __func__, ##__VA_ARGS__)
@@ -61,36 +56,29 @@ void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv);
61void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type); 56void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type);
62void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req); 57void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req);
63 58
59#ifdef CONFIG_MTD_UBI_DEBUG_MSG
60/* General debugging messages */
61#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
64#else 62#else
65 63#define dbg_gen(fmt, ...) ({})
66#define dbg_msg(fmt, ...) ({}) 64#endif
67#define ubi_dbg_dump_stack() ({})
68#define ubi_dbg_dump_ec_hdr(ec_hdr) ({})
69#define ubi_dbg_dump_vid_hdr(vid_hdr) ({})
70#define ubi_dbg_dump_vol_info(vol) ({})
71#define ubi_dbg_dump_vtbl_record(r, idx) ({})
72#define ubi_dbg_dump_sv(sv) ({})
73#define ubi_dbg_dump_seb(seb, type) ({})
74#define ubi_dbg_dump_mkvol_req(req) ({})
75
76#endif /* CONFIG_MTD_UBI_DEBUG_MSG */
77 65
78#ifdef CONFIG_MTD_UBI_DEBUG_MSG_EBA 66#ifdef CONFIG_MTD_UBI_DEBUG_MSG_EBA
79/* Messages from the eraseblock association unit */ 67/* Messages from the eraseblock association sub-system */
80#define dbg_eba(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 68#define dbg_eba(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
81#else 69#else
82#define dbg_eba(fmt, ...) ({}) 70#define dbg_eba(fmt, ...) ({})
83#endif 71#endif
84 72
85#ifdef CONFIG_MTD_UBI_DEBUG_MSG_WL 73#ifdef CONFIG_MTD_UBI_DEBUG_MSG_WL
86/* Messages from the wear-leveling unit */ 74/* Messages from the wear-leveling sub-system */
87#define dbg_wl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 75#define dbg_wl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
88#else 76#else
89#define dbg_wl(fmt, ...) ({}) 77#define dbg_wl(fmt, ...) ({})
90#endif 78#endif
91 79
92#ifdef CONFIG_MTD_UBI_DEBUG_MSG_IO 80#ifdef CONFIG_MTD_UBI_DEBUG_MSG_IO
93/* Messages from the input/output unit */ 81/* Messages from the input/output sub-system */
94#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 82#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
95#else 83#else
96#define dbg_io(fmt, ...) ({}) 84#define dbg_io(fmt, ...) ({})
@@ -105,6 +93,12 @@ void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req);
105#define UBI_IO_DEBUG 0 93#define UBI_IO_DEBUG 0
106#endif 94#endif
107 95
96#ifdef CONFIG_MTD_UBI_DEBUG_DISABLE_BGT
97#define DBG_DISABLE_BGT 1
98#else
99#define DBG_DISABLE_BGT 0
100#endif
101
108#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_BITFLIPS 102#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_BITFLIPS
109/** 103/**
110 * ubi_dbg_is_bitflip - if it is time to emulate a bit-flip. 104 * ubi_dbg_is_bitflip - if it is time to emulate a bit-flip.
@@ -149,4 +143,30 @@ static inline int ubi_dbg_is_erase_failure(void)
149#define ubi_dbg_is_erase_failure() 0 143#define ubi_dbg_is_erase_failure() 0
150#endif 144#endif
151 145
146#else
147
148#define ubi_assert(expr) ({})
149#define dbg_err(fmt, ...) ({})
150#define dbg_msg(fmt, ...) ({})
151#define dbg_gen(fmt, ...) ({})
152#define dbg_eba(fmt, ...) ({})
153#define dbg_wl(fmt, ...) ({})
154#define dbg_io(fmt, ...) ({})
155#define dbg_bld(fmt, ...) ({})
156#define ubi_dbg_dump_stack() ({})
157#define ubi_dbg_dump_ec_hdr(ec_hdr) ({})
158#define ubi_dbg_dump_vid_hdr(vid_hdr) ({})
159#define ubi_dbg_dump_vol_info(vol) ({})
160#define ubi_dbg_dump_vtbl_record(r, idx) ({})
161#define ubi_dbg_dump_sv(sv) ({})
162#define ubi_dbg_dump_seb(seb, type) ({})
163#define ubi_dbg_dump_mkvol_req(req) ({})
164
165#define UBI_IO_DEBUG 0
166#define DBG_DISABLE_BGT 0
167#define ubi_dbg_is_bitflip() 0
168#define ubi_dbg_is_write_failure() 0
169#define ubi_dbg_is_erase_failure() 0
170
171#endif /* !CONFIG_MTD_UBI_DEBUG */
152#endif /* !__UBI_DEBUG_H__ */ 172#endif /* !__UBI_DEBUG_H__ */
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index 7ce91ca742b1..e04bcf1dff87 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -19,20 +19,20 @@
19 */ 19 */
20 20
21/* 21/*
22 * The UBI Eraseblock Association (EBA) unit. 22 * The UBI Eraseblock Association (EBA) sub-system.
23 * 23 *
24 * This unit is responsible for I/O to/from logical eraseblock. 24 * This sub-system is responsible for I/O to/from logical eraseblock.
25 * 25 *
26 * Although in this implementation the EBA table is fully kept and managed in 26 * Although in this implementation the EBA table is fully kept and managed in
27 * RAM, which assumes poor scalability, it might be (partially) maintained on 27 * RAM, which assumes poor scalability, it might be (partially) maintained on
28 * flash in future implementations. 28 * flash in future implementations.
29 * 29 *
30 * The EBA unit implements per-logical eraseblock locking. Before accessing a 30 * The EBA sub-system implements per-logical eraseblock locking. Before
31 * logical eraseblock it is locked for reading or writing. The per-logical 31 * accessing a logical eraseblock it is locked for reading or writing. The
32 * eraseblock locking is implemented by means of the lock tree. The lock tree 32 * per-logical eraseblock locking is implemented by means of the lock tree. The
33 * is an RB-tree which refers all the currently locked logical eraseblocks. The 33 * lock tree is an RB-tree which refers all the currently locked logical
34 * lock tree elements are &struct ubi_ltree_entry objects. They are indexed by 34 * eraseblocks. The lock tree elements are &struct ubi_ltree_entry objects.
35 * (@vol_id, @lnum) pairs. 35 * They are indexed by (@vol_id, @lnum) pairs.
36 * 36 *
37 * EBA also maintains the global sequence counter which is incremented each 37 * EBA also maintains the global sequence counter which is incremented each
38 * time a logical eraseblock is mapped to a physical eraseblock and it is 38 * time a logical eraseblock is mapped to a physical eraseblock and it is
@@ -189,9 +189,7 @@ static struct ubi_ltree_entry *ltree_add_entry(struct ubi_device *ubi,
189 le->users += 1; 189 le->users += 1;
190 spin_unlock(&ubi->ltree_lock); 190 spin_unlock(&ubi->ltree_lock);
191 191
192 if (le_free) 192 kfree(le_free);
193 kfree(le_free);
194
195 return le; 193 return le;
196} 194}
197 195
@@ -223,22 +221,18 @@ static int leb_read_lock(struct ubi_device *ubi, int vol_id, int lnum)
223 */ 221 */
224static void leb_read_unlock(struct ubi_device *ubi, int vol_id, int lnum) 222static void leb_read_unlock(struct ubi_device *ubi, int vol_id, int lnum)
225{ 223{
226 int free = 0;
227 struct ubi_ltree_entry *le; 224 struct ubi_ltree_entry *le;
228 225
229 spin_lock(&ubi->ltree_lock); 226 spin_lock(&ubi->ltree_lock);
230 le = ltree_lookup(ubi, vol_id, lnum); 227 le = ltree_lookup(ubi, vol_id, lnum);
231 le->users -= 1; 228 le->users -= 1;
232 ubi_assert(le->users >= 0); 229 ubi_assert(le->users >= 0);
230 up_read(&le->mutex);
233 if (le->users == 0) { 231 if (le->users == 0) {
234 rb_erase(&le->rb, &ubi->ltree); 232 rb_erase(&le->rb, &ubi->ltree);
235 free = 1; 233 kfree(le);
236 } 234 }
237 spin_unlock(&ubi->ltree_lock); 235 spin_unlock(&ubi->ltree_lock);
238
239 up_read(&le->mutex);
240 if (free)
241 kfree(le);
242} 236}
243 237
244/** 238/**
@@ -274,7 +268,6 @@ static int leb_write_lock(struct ubi_device *ubi, int vol_id, int lnum)
274 */ 268 */
275static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum) 269static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum)
276{ 270{
277 int free;
278 struct ubi_ltree_entry *le; 271 struct ubi_ltree_entry *le;
279 272
280 le = ltree_add_entry(ubi, vol_id, lnum); 273 le = ltree_add_entry(ubi, vol_id, lnum);
@@ -289,12 +282,9 @@ static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum)
289 ubi_assert(le->users >= 0); 282 ubi_assert(le->users >= 0);
290 if (le->users == 0) { 283 if (le->users == 0) {
291 rb_erase(&le->rb, &ubi->ltree); 284 rb_erase(&le->rb, &ubi->ltree);
292 free = 1;
293 } else
294 free = 0;
295 spin_unlock(&ubi->ltree_lock);
296 if (free)
297 kfree(le); 285 kfree(le);
286 }
287 spin_unlock(&ubi->ltree_lock);
298 288
299 return 1; 289 return 1;
300} 290}
@@ -307,23 +297,18 @@ static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum)
307 */ 297 */
308static void leb_write_unlock(struct ubi_device *ubi, int vol_id, int lnum) 298static void leb_write_unlock(struct ubi_device *ubi, int vol_id, int lnum)
309{ 299{
310 int free;
311 struct ubi_ltree_entry *le; 300 struct ubi_ltree_entry *le;
312 301
313 spin_lock(&ubi->ltree_lock); 302 spin_lock(&ubi->ltree_lock);
314 le = ltree_lookup(ubi, vol_id, lnum); 303 le = ltree_lookup(ubi, vol_id, lnum);
315 le->users -= 1; 304 le->users -= 1;
316 ubi_assert(le->users >= 0); 305 ubi_assert(le->users >= 0);
306 up_write(&le->mutex);
317 if (le->users == 0) { 307 if (le->users == 0) {
318 rb_erase(&le->rb, &ubi->ltree); 308 rb_erase(&le->rb, &ubi->ltree);
319 free = 1;
320 } else
321 free = 0;
322 spin_unlock(&ubi->ltree_lock);
323
324 up_write(&le->mutex);
325 if (free)
326 kfree(le); 309 kfree(le);
310 }
311 spin_unlock(&ubi->ltree_lock);
327} 312}
328 313
329/** 314/**
@@ -516,9 +501,8 @@ static int recover_peb(struct ubi_device *ubi, int pnum, int vol_id, int lnum,
516 struct ubi_vid_hdr *vid_hdr; 501 struct ubi_vid_hdr *vid_hdr;
517 502
518 vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); 503 vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
519 if (!vid_hdr) { 504 if (!vid_hdr)
520 return -ENOMEM; 505 return -ENOMEM;
521 }
522 506
523 mutex_lock(&ubi->buf_mutex); 507 mutex_lock(&ubi->buf_mutex);
524 508
@@ -752,7 +736,7 @@ int ubi_eba_write_leb_st(struct ubi_device *ubi, struct ubi_volume *vol,
752 /* If this is the last LEB @len may be unaligned */ 736 /* If this is the last LEB @len may be unaligned */
753 len = ALIGN(data_size, ubi->min_io_size); 737 len = ALIGN(data_size, ubi->min_io_size);
754 else 738 else
755 ubi_assert(len % ubi->min_io_size == 0); 739 ubi_assert(!(len & (ubi->min_io_size - 1)));
756 740
757 vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); 741 vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
758 if (!vid_hdr) 742 if (!vid_hdr)
@@ -919,7 +903,7 @@ retry:
919 } 903 }
920 904
921 if (vol->eba_tbl[lnum] >= 0) { 905 if (vol->eba_tbl[lnum] >= 0) {
922 err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 1); 906 err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 0);
923 if (err) 907 if (err)
924 goto out_leb_unlock; 908 goto out_leb_unlock;
925 } 909 }
@@ -1141,7 +1125,7 @@ out_unlock_leb:
1141} 1125}
1142 1126
1143/** 1127/**
1144 * ubi_eba_init_scan - initialize the EBA unit using scanning information. 1128 * ubi_eba_init_scan - initialize the EBA sub-system using scanning information.
1145 * @ubi: UBI device description object 1129 * @ubi: UBI device description object
1146 * @si: scanning information 1130 * @si: scanning information
1147 * 1131 *
@@ -1156,7 +1140,7 @@ int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
1156 struct ubi_scan_leb *seb; 1140 struct ubi_scan_leb *seb;
1157 struct rb_node *rb; 1141 struct rb_node *rb;
1158 1142
1159 dbg_eba("initialize EBA unit"); 1143 dbg_eba("initialize EBA sub-system");
1160 1144
1161 spin_lock_init(&ubi->ltree_lock); 1145 spin_lock_init(&ubi->ltree_lock);
1162 mutex_init(&ubi->alc_mutex); 1146 mutex_init(&ubi->alc_mutex);
@@ -1222,7 +1206,7 @@ int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
1222 ubi->rsvd_pebs += ubi->beb_rsvd_pebs; 1206 ubi->rsvd_pebs += ubi->beb_rsvd_pebs;
1223 } 1207 }
1224 1208
1225 dbg_eba("EBA unit is initialized"); 1209 dbg_eba("EBA sub-system is initialized");
1226 return 0; 1210 return 0;
1227 1211
1228out_free: 1212out_free:
@@ -1233,20 +1217,3 @@ out_free:
1233 } 1217 }
1234 return err; 1218 return err;
1235} 1219}
1236
1237/**
1238 * ubi_eba_close - close EBA unit.
1239 * @ubi: UBI device description object
1240 */
1241void ubi_eba_close(const struct ubi_device *ubi)
1242{
1243 int i, num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT;
1244
1245 dbg_eba("close EBA unit");
1246
1247 for (i = 0; i < num_volumes; i++) {
1248 if (!ubi->volumes[i])
1249 continue;
1250 kfree(ubi->volumes[i]->eba_tbl);
1251 }
1252}
diff --git a/drivers/mtd/ubi/gluebi.c b/drivers/mtd/ubi/gluebi.c
index e909b390069a..605812bb0b1a 100644
--- a/drivers/mtd/ubi/gluebi.c
+++ b/drivers/mtd/ubi/gluebi.c
@@ -111,7 +111,7 @@ static int gluebi_read(struct mtd_info *mtd, loff_t from, size_t len,
111 struct ubi_device *ubi; 111 struct ubi_device *ubi;
112 uint64_t tmp = from; 112 uint64_t tmp = from;
113 113
114 dbg_msg("read %zd bytes from offset %lld", len, from); 114 dbg_gen("read %zd bytes from offset %lld", len, from);
115 115
116 if (len < 0 || from < 0 || from + len > mtd->size) 116 if (len < 0 || from < 0 || from + len > mtd->size)
117 return -EINVAL; 117 return -EINVAL;
@@ -162,7 +162,7 @@ static int gluebi_write(struct mtd_info *mtd, loff_t to, size_t len,
162 struct ubi_device *ubi; 162 struct ubi_device *ubi;
163 uint64_t tmp = to; 163 uint64_t tmp = to;
164 164
165 dbg_msg("write %zd bytes to offset %lld", len, to); 165 dbg_gen("write %zd bytes to offset %lld", len, to);
166 166
167 if (len < 0 || to < 0 || len + to > mtd->size) 167 if (len < 0 || to < 0 || len + to > mtd->size)
168 return -EINVAL; 168 return -EINVAL;
@@ -215,7 +215,7 @@ static int gluebi_erase(struct mtd_info *mtd, struct erase_info *instr)
215 struct ubi_volume *vol; 215 struct ubi_volume *vol;
216 struct ubi_device *ubi; 216 struct ubi_device *ubi;
217 217
218 dbg_msg("erase %u bytes at offset %u", instr->len, instr->addr); 218 dbg_gen("erase %u bytes at offset %u", instr->len, instr->addr);
219 219
220 if (instr->addr < 0 || instr->addr > mtd->size - mtd->erasesize) 220 if (instr->addr < 0 || instr->addr > mtd->size - mtd->erasesize)
221 return -EINVAL; 221 return -EINVAL;
@@ -249,8 +249,8 @@ static int gluebi_erase(struct mtd_info *mtd, struct erase_info *instr)
249 if (err) 249 if (err)
250 goto out_err; 250 goto out_err;
251 251
252 instr->state = MTD_ERASE_DONE; 252 instr->state = MTD_ERASE_DONE;
253 mtd_erase_callback(instr); 253 mtd_erase_callback(instr);
254 return 0; 254 return 0;
255 255
256out_err: 256out_err:
@@ -299,12 +299,12 @@ int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol)
299 mtd->size = vol->used_bytes; 299 mtd->size = vol->used_bytes;
300 300
301 if (add_mtd_device(mtd)) { 301 if (add_mtd_device(mtd)) {
302 ubi_err("cannot not add MTD device\n"); 302 ubi_err("cannot not add MTD device");
303 kfree(mtd->name); 303 kfree(mtd->name);
304 return -ENFILE; 304 return -ENFILE;
305 } 305 }
306 306
307 dbg_msg("added mtd%d (\"%s\"), size %u, EB size %u", 307 dbg_gen("added mtd%d (\"%s\"), size %u, EB size %u",
308 mtd->index, mtd->name, mtd->size, mtd->erasesize); 308 mtd->index, mtd->name, mtd->size, mtd->erasesize);
309 return 0; 309 return 0;
310} 310}
@@ -322,7 +322,7 @@ int ubi_destroy_gluebi(struct ubi_volume *vol)
322 int err; 322 int err;
323 struct mtd_info *mtd = &vol->gluebi_mtd; 323 struct mtd_info *mtd = &vol->gluebi_mtd;
324 324
325 dbg_msg("remove mtd%d", mtd->index); 325 dbg_gen("remove mtd%d", mtd->index);
326 err = del_mtd_device(mtd); 326 err = del_mtd_device(mtd);
327 if (err) 327 if (err)
328 return err; 328 return err;
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index 4ac11df7b048..2fb64be44f1b 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -20,15 +20,15 @@
20 */ 20 */
21 21
22/* 22/*
23 * UBI input/output unit. 23 * UBI input/output sub-system.
24 * 24 *
25 * This unit provides a uniform way to work with all kinds of the underlying 25 * This sub-system provides a uniform way to work with all kinds of the
26 * MTD devices. It also implements handy functions for reading and writing UBI 26 * underlying MTD devices. It also implements handy functions for reading and
27 * headers. 27 * writing UBI headers.
28 * 28 *
29 * We are trying to have a paranoid mindset and not to trust to what we read 29 * We are trying to have a paranoid mindset and not to trust to what we read
30 * from the flash media in order to be more secure and robust. So this unit 30 * from the flash media in order to be more secure and robust. So this
31 * validates every single header it reads from the flash media. 31 * sub-system validates every single header it reads from the flash media.
32 * 32 *
33 * Some words about how the eraseblock headers are stored. 33 * Some words about how the eraseblock headers are stored.
34 * 34 *
@@ -79,11 +79,11 @@
79 * 512-byte chunks, we have to allocate one more buffer and copy our VID header 79 * 512-byte chunks, we have to allocate one more buffer and copy our VID header
80 * to offset 448 of this buffer. 80 * to offset 448 of this buffer.
81 * 81 *
82 * The I/O unit does the following trick in order to avoid this extra copy. 82 * The I/O sub-system does the following trick in order to avoid this extra
83 * It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID header 83 * copy. It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID
84 * and returns a pointer to offset @ubi->vid_hdr_shift of this buffer. When the 84 * header and returns a pointer to offset @ubi->vid_hdr_shift of this buffer.
85 * VID header is being written out, it shifts the VID header pointer back and 85 * When the VID header is being written out, it shifts the VID header pointer
86 * writes the whole sub-page. 86 * back and writes the whole sub-page.
87 */ 87 */
88 88
89#include <linux/crc32.h> 89#include <linux/crc32.h>
@@ -156,15 +156,19 @@ retry:
156 /* 156 /*
157 * -EUCLEAN is reported if there was a bit-flip which 157 * -EUCLEAN is reported if there was a bit-flip which
158 * was corrected, so this is harmless. 158 * was corrected, so this is harmless.
159 *
160 * We do not report about it here unless debugging is
161 * enabled. A corresponding message will be printed
162 * later, when it is has been scrubbed.
159 */ 163 */
160 ubi_msg("fixable bit-flip detected at PEB %d", pnum); 164 dbg_msg("fixable bit-flip detected at PEB %d", pnum);
161 ubi_assert(len == read); 165 ubi_assert(len == read);
162 return UBI_IO_BITFLIPS; 166 return UBI_IO_BITFLIPS;
163 } 167 }
164 168
165 if (read != len && retries++ < UBI_IO_RETRIES) { 169 if (read != len && retries++ < UBI_IO_RETRIES) {
166 dbg_io("error %d while reading %d bytes from PEB %d:%d, " 170 dbg_io("error %d while reading %d bytes from PEB %d:%d,"
167 "read only %zd bytes, retry", 171 " read only %zd bytes, retry",
168 err, len, pnum, offset, read); 172 err, len, pnum, offset, read);
169 yield(); 173 yield();
170 goto retry; 174 goto retry;
@@ -187,7 +191,7 @@ retry:
187 ubi_assert(len == read); 191 ubi_assert(len == read);
188 192
189 if (ubi_dbg_is_bitflip()) { 193 if (ubi_dbg_is_bitflip()) {
190 dbg_msg("bit-flip (emulated)"); 194 dbg_gen("bit-flip (emulated)");
191 err = UBI_IO_BITFLIPS; 195 err = UBI_IO_BITFLIPS;
192 } 196 }
193 } 197 }
@@ -391,6 +395,7 @@ static int torture_peb(struct ubi_device *ubi, int pnum)
391{ 395{
392 int err, i, patt_count; 396 int err, i, patt_count;
393 397
398 ubi_msg("run torture test for PEB %d", pnum);
394 patt_count = ARRAY_SIZE(patterns); 399 patt_count = ARRAY_SIZE(patterns);
395 ubi_assert(patt_count > 0); 400 ubi_assert(patt_count > 0);
396 401
@@ -434,6 +439,7 @@ static int torture_peb(struct ubi_device *ubi, int pnum)
434 } 439 }
435 440
436 err = patt_count; 441 err = patt_count;
442 ubi_msg("PEB %d passed torture test, do not mark it a bad", pnum);
437 443
438out: 444out:
439 mutex_unlock(&ubi->buf_mutex); 445 mutex_unlock(&ubi->buf_mutex);
@@ -699,8 +705,8 @@ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum,
699 705
700 if (hdr_crc != crc) { 706 if (hdr_crc != crc) {
701 if (verbose) { 707 if (verbose) {
702 ubi_warn("bad EC header CRC at PEB %d, calculated %#08x," 708 ubi_warn("bad EC header CRC at PEB %d, calculated "
703 " read %#08x", pnum, crc, hdr_crc); 709 "%#08x, read %#08x", pnum, crc, hdr_crc);
704 ubi_dbg_dump_ec_hdr(ec_hdr); 710 ubi_dbg_dump_ec_hdr(ec_hdr);
705 } 711 }
706 return UBI_IO_BAD_EC_HDR; 712 return UBI_IO_BAD_EC_HDR;
@@ -1095,8 +1101,7 @@ fail:
1095} 1101}
1096 1102
1097/** 1103/**
1098 * paranoid_check_peb_ec_hdr - check that the erase counter header of a 1104 * paranoid_check_peb_ec_hdr - check erase counter header.
1099 * physical eraseblock is in-place and is all right.
1100 * @ubi: UBI device description object 1105 * @ubi: UBI device description object
1101 * @pnum: the physical eraseblock number to check 1106 * @pnum: the physical eraseblock number to check
1102 * 1107 *
@@ -1174,8 +1179,7 @@ fail:
1174} 1179}
1175 1180
1176/** 1181/**
1177 * paranoid_check_peb_vid_hdr - check that the volume identifier header of a 1182 * paranoid_check_peb_vid_hdr - check volume identifier header.
1178 * physical eraseblock is in-place and is all right.
1179 * @ubi: UBI device description object 1183 * @ubi: UBI device description object
1180 * @pnum: the physical eraseblock number to check 1184 * @pnum: the physical eraseblock number to check
1181 * 1185 *
@@ -1256,7 +1260,7 @@ static int paranoid_check_all_ff(struct ubi_device *ubi, int pnum, int offset,
1256 1260
1257fail: 1261fail:
1258 ubi_err("paranoid check failed for PEB %d", pnum); 1262 ubi_err("paranoid check failed for PEB %d", pnum);
1259 dbg_msg("hex dump of the %d-%d region", offset, offset + len); 1263 ubi_msg("hex dump of the %d-%d region", offset, offset + len);
1260 print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, 1264 print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
1261 ubi->dbg_peb_buf, len, 1); 1265 ubi->dbg_peb_buf, len, 1);
1262 err = 1; 1266 err = 1;
diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c
index a70d58823f8d..5d9bcf109c13 100644
--- a/drivers/mtd/ubi/kapi.c
+++ b/drivers/mtd/ubi/kapi.c
@@ -106,7 +106,7 @@ struct ubi_volume_desc *ubi_open_volume(int ubi_num, int vol_id, int mode)
106 struct ubi_device *ubi; 106 struct ubi_device *ubi;
107 struct ubi_volume *vol; 107 struct ubi_volume *vol;
108 108
109 dbg_msg("open device %d volume %d, mode %d", ubi_num, vol_id, mode); 109 dbg_gen("open device %d volume %d, mode %d", ubi_num, vol_id, mode);
110 110
111 if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES) 111 if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES)
112 return ERR_PTR(-EINVAL); 112 return ERR_PTR(-EINVAL);
@@ -215,7 +215,7 @@ struct ubi_volume_desc *ubi_open_volume_nm(int ubi_num, const char *name,
215 struct ubi_device *ubi; 215 struct ubi_device *ubi;
216 struct ubi_volume_desc *ret; 216 struct ubi_volume_desc *ret;
217 217
218 dbg_msg("open volume %s, mode %d", name, mode); 218 dbg_gen("open volume %s, mode %d", name, mode);
219 219
220 if (!name) 220 if (!name)
221 return ERR_PTR(-EINVAL); 221 return ERR_PTR(-EINVAL);
@@ -266,7 +266,7 @@ void ubi_close_volume(struct ubi_volume_desc *desc)
266 struct ubi_volume *vol = desc->vol; 266 struct ubi_volume *vol = desc->vol;
267 struct ubi_device *ubi = vol->ubi; 267 struct ubi_device *ubi = vol->ubi;
268 268
269 dbg_msg("close volume %d, mode %d", vol->vol_id, desc->mode); 269 dbg_gen("close volume %d, mode %d", vol->vol_id, desc->mode);
270 270
271 spin_lock(&ubi->volumes_lock); 271 spin_lock(&ubi->volumes_lock);
272 switch (desc->mode) { 272 switch (desc->mode) {
@@ -323,7 +323,7 @@ int ubi_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
323 struct ubi_device *ubi = vol->ubi; 323 struct ubi_device *ubi = vol->ubi;
324 int err, vol_id = vol->vol_id; 324 int err, vol_id = vol->vol_id;
325 325
326 dbg_msg("read %d bytes from LEB %d:%d:%d", len, vol_id, lnum, offset); 326 dbg_gen("read %d bytes from LEB %d:%d:%d", len, vol_id, lnum, offset);
327 327
328 if (vol_id < 0 || vol_id >= ubi->vtbl_slots || lnum < 0 || 328 if (vol_id < 0 || vol_id >= ubi->vtbl_slots || lnum < 0 ||
329 lnum >= vol->used_ebs || offset < 0 || len < 0 || 329 lnum >= vol->used_ebs || offset < 0 || len < 0 ||
@@ -388,7 +388,7 @@ int ubi_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
388 struct ubi_device *ubi = vol->ubi; 388 struct ubi_device *ubi = vol->ubi;
389 int vol_id = vol->vol_id; 389 int vol_id = vol->vol_id;
390 390
391 dbg_msg("write %d bytes to LEB %d:%d:%d", len, vol_id, lnum, offset); 391 dbg_gen("write %d bytes to LEB %d:%d:%d", len, vol_id, lnum, offset);
392 392
393 if (vol_id < 0 || vol_id >= ubi->vtbl_slots) 393 if (vol_id < 0 || vol_id >= ubi->vtbl_slots)
394 return -EINVAL; 394 return -EINVAL;
@@ -397,8 +397,8 @@ int ubi_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
397 return -EROFS; 397 return -EROFS;
398 398
399 if (lnum < 0 || lnum >= vol->reserved_pebs || offset < 0 || len < 0 || 399 if (lnum < 0 || lnum >= vol->reserved_pebs || offset < 0 || len < 0 ||
400 offset + len > vol->usable_leb_size || offset % ubi->min_io_size || 400 offset + len > vol->usable_leb_size ||
401 len % ubi->min_io_size) 401 offset & (ubi->min_io_size - 1) || len & (ubi->min_io_size - 1))
402 return -EINVAL; 402 return -EINVAL;
403 403
404 if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM && 404 if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM &&
@@ -438,7 +438,7 @@ int ubi_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
438 struct ubi_device *ubi = vol->ubi; 438 struct ubi_device *ubi = vol->ubi;
439 int vol_id = vol->vol_id; 439 int vol_id = vol->vol_id;
440 440
441 dbg_msg("atomically write %d bytes to LEB %d:%d", len, vol_id, lnum); 441 dbg_gen("atomically write %d bytes to LEB %d:%d", len, vol_id, lnum);
442 442
443 if (vol_id < 0 || vol_id >= ubi->vtbl_slots) 443 if (vol_id < 0 || vol_id >= ubi->vtbl_slots)
444 return -EINVAL; 444 return -EINVAL;
@@ -447,7 +447,7 @@ int ubi_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
447 return -EROFS; 447 return -EROFS;
448 448
449 if (lnum < 0 || lnum >= vol->reserved_pebs || len < 0 || 449 if (lnum < 0 || lnum >= vol->reserved_pebs || len < 0 ||
450 len > vol->usable_leb_size || len % ubi->min_io_size) 450 len > vol->usable_leb_size || len & (ubi->min_io_size - 1))
451 return -EINVAL; 451 return -EINVAL;
452 452
453 if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM && 453 if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM &&
@@ -482,7 +482,7 @@ int ubi_leb_erase(struct ubi_volume_desc *desc, int lnum)
482 struct ubi_device *ubi = vol->ubi; 482 struct ubi_device *ubi = vol->ubi;
483 int err; 483 int err;
484 484
485 dbg_msg("erase LEB %d:%d", vol->vol_id, lnum); 485 dbg_gen("erase LEB %d:%d", vol->vol_id, lnum);
486 486
487 if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) 487 if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME)
488 return -EROFS; 488 return -EROFS;
@@ -542,7 +542,7 @@ int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum)
542 struct ubi_volume *vol = desc->vol; 542 struct ubi_volume *vol = desc->vol;
543 struct ubi_device *ubi = vol->ubi; 543 struct ubi_device *ubi = vol->ubi;
544 544
545 dbg_msg("unmap LEB %d:%d", vol->vol_id, lnum); 545 dbg_gen("unmap LEB %d:%d", vol->vol_id, lnum);
546 546
547 if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) 547 if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME)
548 return -EROFS; 548 return -EROFS;
@@ -579,7 +579,7 @@ int ubi_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype)
579 struct ubi_volume *vol = desc->vol; 579 struct ubi_volume *vol = desc->vol;
580 struct ubi_device *ubi = vol->ubi; 580 struct ubi_device *ubi = vol->ubi;
581 581
582 dbg_msg("unmap LEB %d:%d", vol->vol_id, lnum); 582 dbg_gen("unmap LEB %d:%d", vol->vol_id, lnum);
583 583
584 if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) 584 if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME)
585 return -EROFS; 585 return -EROFS;
@@ -621,7 +621,7 @@ int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum)
621{ 621{
622 struct ubi_volume *vol = desc->vol; 622 struct ubi_volume *vol = desc->vol;
623 623
624 dbg_msg("test LEB %d:%d", vol->vol_id, lnum); 624 dbg_gen("test LEB %d:%d", vol->vol_id, lnum);
625 625
626 if (lnum < 0 || lnum >= vol->reserved_pebs) 626 if (lnum < 0 || lnum >= vol->reserved_pebs)
627 return -EINVAL; 627 return -EINVAL;
@@ -632,3 +632,27 @@ int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum)
632 return vol->eba_tbl[lnum] >= 0; 632 return vol->eba_tbl[lnum] >= 0;
633} 633}
634EXPORT_SYMBOL_GPL(ubi_is_mapped); 634EXPORT_SYMBOL_GPL(ubi_is_mapped);
635
636/**
637 * ubi_sync - synchronize UBI device buffers.
638 * @ubi_num: UBI device to synchronize
639 *
640 * The underlying MTD device may cache data in hardware or in software. This
641 * function ensures the caches are flushed. Returns zero in case of success and
642 * a negative error code in case of failure.
643 */
644int ubi_sync(int ubi_num)
645{
646 struct ubi_device *ubi;
647
648 ubi = ubi_get_device(ubi_num);
649 if (!ubi)
650 return -ENODEV;
651
652 if (ubi->mtd->sync)
653 ubi->mtd->sync(ubi->mtd);
654
655 ubi_put_device(ubi);
656 return 0;
657}
658EXPORT_SYMBOL_GPL(ubi_sync);
diff --git a/drivers/mtd/ubi/misc.c b/drivers/mtd/ubi/misc.c
index 93e052812012..22ad31402945 100644
--- a/drivers/mtd/ubi/misc.c
+++ b/drivers/mtd/ubi/misc.c
@@ -37,7 +37,7 @@ int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf,
37{ 37{
38 int i; 38 int i;
39 39
40 ubi_assert(length % ubi->min_io_size == 0); 40 ubi_assert(!(length & (ubi->min_io_size - 1)));
41 41
42 for (i = length - 1; i >= 0; i--) 42 for (i = length - 1; i >= 0; i--)
43 if (((const uint8_t *)buf)[i] != 0xFF) 43 if (((const uint8_t *)buf)[i] != 0xFF)
diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
index 96d410e106ab..967bb4406df9 100644
--- a/drivers/mtd/ubi/scan.c
+++ b/drivers/mtd/ubi/scan.c
@@ -19,9 +19,9 @@
19 */ 19 */
20 20
21/* 21/*
22 * UBI scanning unit. 22 * UBI scanning sub-system.
23 * 23 *
24 * This unit is responsible for scanning the flash media, checking UBI 24 * This sub-system is responsible for scanning the flash media, checking UBI
25 * headers and providing complete information about the UBI flash image. 25 * headers and providing complete information about the UBI flash image.
26 * 26 *
27 * The scanning information is represented by a &struct ubi_scan_info' object. 27 * The scanning information is represented by a &struct ubi_scan_info' object.
@@ -93,8 +93,7 @@ static int add_to_list(struct ubi_scan_info *si, int pnum, int ec,
93} 93}
94 94
95/** 95/**
96 * validate_vid_hdr - check that volume identifier header is correct and 96 * validate_vid_hdr - check volume identifier header.
97 * consistent.
98 * @vid_hdr: the volume identifier header to check 97 * @vid_hdr: the volume identifier header to check
99 * @sv: information about the volume this logical eraseblock belongs to 98 * @sv: information about the volume this logical eraseblock belongs to
100 * @pnum: physical eraseblock number the VID header came from 99 * @pnum: physical eraseblock number the VID header came from
@@ -103,7 +102,7 @@ static int add_to_list(struct ubi_scan_info *si, int pnum, int ec,
103 * non-zero if an inconsistency was found and zero if not. 102 * non-zero if an inconsistency was found and zero if not.
104 * 103 *
105 * Note, UBI does sanity check of everything it reads from the flash media. 104 * Note, UBI does sanity check of everything it reads from the flash media.
106 * Most of the checks are done in the I/O unit. Here we check that the 105 * Most of the checks are done in the I/O sub-system. Here we check that the
107 * information in the VID header is consistent to the information in other VID 106 * information in the VID header is consistent to the information in other VID
108 * headers of the same volume. 107 * headers of the same volume.
109 */ 108 */
@@ -247,45 +246,21 @@ static int compare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb,
247 struct ubi_vid_hdr *vh = NULL; 246 struct ubi_vid_hdr *vh = NULL;
248 unsigned long long sqnum2 = be64_to_cpu(vid_hdr->sqnum); 247 unsigned long long sqnum2 = be64_to_cpu(vid_hdr->sqnum);
249 248
250 if (seb->sqnum == 0 && sqnum2 == 0) { 249 if (sqnum2 == seb->sqnum) {
251 long long abs, v1 = seb->leb_ver, v2 = be32_to_cpu(vid_hdr->leb_ver);
252
253 /* 250 /*
254 * UBI constantly increases the logical eraseblock version 251 * This must be a really ancient UBI image which has been
255 * number and it can overflow. Thus, we have to bear in mind 252 * created before sequence numbers support has been added. At
256 * that versions that are close to %0xFFFFFFFF are less then 253 * that times we used 32-bit LEB versions stored in logical
257 * versions that are close to %0. 254 * eraseblocks. That was before UBI got into mainline. We do not
258 * 255 * support these images anymore. Well, those images will work
259 * The UBI WL unit guarantees that the number of pending tasks 256 * still work, but only if no unclean reboots happened.
260 * is not greater then %0x7FFFFFFF. So, if the difference
261 * between any two versions is greater or equivalent to
262 * %0x7FFFFFFF, there was an overflow and the logical
263 * eraseblock with lower version is actually newer then the one
264 * with higher version.
265 *
266 * FIXME: but this is anyway obsolete and will be removed at
267 * some point.
268 */ 257 */
269 dbg_bld("using old crappy leb_ver stuff"); 258 ubi_err("unsupported on-flash UBI format\n");
270 259 return -EINVAL;
271 if (v1 == v2) { 260 }
272 ubi_err("PEB %d and PEB %d have the same version %lld",
273 seb->pnum, pnum, v1);
274 return -EINVAL;
275 }
276
277 abs = v1 - v2;
278 if (abs < 0)
279 abs = -abs;
280 261
281 if (abs < 0x7FFFFFFF) 262 /* Obviously the LEB with lower sequence counter is older */
282 /* Non-overflow situation */ 263 second_is_newer = !!(sqnum2 > seb->sqnum);
283 second_is_newer = (v2 > v1);
284 else
285 second_is_newer = (v2 < v1);
286 } else
287 /* Obviously the LEB with lower sequence counter is older */
288 second_is_newer = sqnum2 > seb->sqnum;
289 264
290 /* 265 /*
291 * Now we know which copy is newer. If the copy flag of the PEB with 266 * Now we know which copy is newer. If the copy flag of the PEB with
@@ -293,7 +268,7 @@ static int compare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb,
293 * check data CRC. For the second PEB we already have the VID header, 268 * check data CRC. For the second PEB we already have the VID header,
294 * for the first one - we'll need to re-read it from flash. 269 * for the first one - we'll need to re-read it from flash.
295 * 270 *
296 * FIXME: this may be optimized so that we wouldn't read twice. 271 * Note: this may be optimized so that we wouldn't read twice.
297 */ 272 */
298 273
299 if (second_is_newer) { 274 if (second_is_newer) {
@@ -379,8 +354,7 @@ out_free_vidh:
379} 354}
380 355
381/** 356/**
382 * ubi_scan_add_used - add information about a physical eraseblock to the 357 * ubi_scan_add_used - add physical eraseblock to the scanning information.
383 * scanning information.
384 * @ubi: UBI device description object 358 * @ubi: UBI device description object
385 * @si: scanning information 359 * @si: scanning information
386 * @pnum: the physical eraseblock number 360 * @pnum: the physical eraseblock number
@@ -400,7 +374,6 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
400 int bitflips) 374 int bitflips)
401{ 375{
402 int err, vol_id, lnum; 376 int err, vol_id, lnum;
403 uint32_t leb_ver;
404 unsigned long long sqnum; 377 unsigned long long sqnum;
405 struct ubi_scan_volume *sv; 378 struct ubi_scan_volume *sv;
406 struct ubi_scan_leb *seb; 379 struct ubi_scan_leb *seb;
@@ -409,10 +382,9 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
409 vol_id = be32_to_cpu(vid_hdr->vol_id); 382 vol_id = be32_to_cpu(vid_hdr->vol_id);
410 lnum = be32_to_cpu(vid_hdr->lnum); 383 lnum = be32_to_cpu(vid_hdr->lnum);
411 sqnum = be64_to_cpu(vid_hdr->sqnum); 384 sqnum = be64_to_cpu(vid_hdr->sqnum);
412 leb_ver = be32_to_cpu(vid_hdr->leb_ver);
413 385
414 dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, ver %u, bitflips %d", 386 dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, bitflips %d",
415 pnum, vol_id, lnum, ec, sqnum, leb_ver, bitflips); 387 pnum, vol_id, lnum, ec, sqnum, bitflips);
416 388
417 sv = add_volume(si, vol_id, pnum, vid_hdr); 389 sv = add_volume(si, vol_id, pnum, vid_hdr);
418 if (IS_ERR(sv) < 0) 390 if (IS_ERR(sv) < 0)
@@ -445,25 +417,20 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
445 */ 417 */
446 418
447 dbg_bld("this LEB already exists: PEB %d, sqnum %llu, " 419 dbg_bld("this LEB already exists: PEB %d, sqnum %llu, "
448 "LEB ver %u, EC %d", seb->pnum, seb->sqnum, 420 "EC %d", seb->pnum, seb->sqnum, seb->ec);
449 seb->leb_ver, seb->ec);
450
451 /*
452 * Make sure that the logical eraseblocks have different
453 * versions. Otherwise the image is bad.
454 */
455 if (seb->leb_ver == leb_ver && leb_ver != 0) {
456 ubi_err("two LEBs with same version %u", leb_ver);
457 ubi_dbg_dump_seb(seb, 0);
458 ubi_dbg_dump_vid_hdr(vid_hdr);
459 return -EINVAL;
460 }
461 421
462 /* 422 /*
463 * Make sure that the logical eraseblocks have different 423 * Make sure that the logical eraseblocks have different
464 * sequence numbers. Otherwise the image is bad. 424 * sequence numbers. Otherwise the image is bad.
465 * 425 *
466 * FIXME: remove 'sqnum != 0' check when leb_ver is removed. 426 * However, if the sequence number is zero, we assume it must
427 * be an ancient UBI image from the era when UBI did not have
428 * sequence numbers. We still can attach these images, unless
429 * there is a need to distinguish between old and new
430 * eraseblocks, in which case we'll refuse the image in
431 * 'compare_lebs()'. In other words, we attach old clean
432 * images, but refuse attaching old images with duplicated
433 * logical eraseblocks because there was an unclean reboot.
467 */ 434 */
468 if (seb->sqnum == sqnum && sqnum != 0) { 435 if (seb->sqnum == sqnum && sqnum != 0) {
469 ubi_err("two LEBs with same sequence number %llu", 436 ubi_err("two LEBs with same sequence number %llu",
@@ -503,7 +470,6 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
503 seb->pnum = pnum; 470 seb->pnum = pnum;
504 seb->scrub = ((cmp_res & 2) || bitflips); 471 seb->scrub = ((cmp_res & 2) || bitflips);
505 seb->sqnum = sqnum; 472 seb->sqnum = sqnum;
506 seb->leb_ver = leb_ver;
507 473
508 if (sv->highest_lnum == lnum) 474 if (sv->highest_lnum == lnum)
509 sv->last_data_size = 475 sv->last_data_size =
@@ -540,7 +506,6 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
540 seb->lnum = lnum; 506 seb->lnum = lnum;
541 seb->sqnum = sqnum; 507 seb->sqnum = sqnum;
542 seb->scrub = bitflips; 508 seb->scrub = bitflips;
543 seb->leb_ver = leb_ver;
544 509
545 if (sv->highest_lnum <= lnum) { 510 if (sv->highest_lnum <= lnum) {
546 sv->highest_lnum = lnum; 511 sv->highest_lnum = lnum;
@@ -554,8 +519,7 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
554} 519}
555 520
556/** 521/**
557 * ubi_scan_find_sv - find information about a particular volume in the 522 * ubi_scan_find_sv - find volume in the scanning information.
558 * scanning information.
559 * @si: scanning information 523 * @si: scanning information
560 * @vol_id: the requested volume ID 524 * @vol_id: the requested volume ID
561 * 525 *
@@ -584,8 +548,7 @@ struct ubi_scan_volume *ubi_scan_find_sv(const struct ubi_scan_info *si,
584} 548}
585 549
586/** 550/**
587 * ubi_scan_find_seb - find information about a particular logical 551 * ubi_scan_find_seb - find LEB in the volume scanning information.
588 * eraseblock in the volume scanning information.
589 * @sv: a pointer to the volume scanning information 552 * @sv: a pointer to the volume scanning information
590 * @lnum: the requested logical eraseblock 553 * @lnum: the requested logical eraseblock
591 * 554 *
@@ -645,9 +608,9 @@ void ubi_scan_rm_volume(struct ubi_scan_info *si, struct ubi_scan_volume *sv)
645 * 608 *
646 * This function erases physical eraseblock 'pnum', and writes the erase 609 * This function erases physical eraseblock 'pnum', and writes the erase
647 * counter header to it. This function should only be used on UBI device 610 * counter header to it. This function should only be used on UBI device
648 * initialization stages, when the EBA unit had not been yet initialized. This 611 * initialization stages, when the EBA sub-system had not been yet initialized.
649 * function returns zero in case of success and a negative error code in case 612 * This function returns zero in case of success and a negative error code in
650 * of failure. 613 * case of failure.
651 */ 614 */
652int ubi_scan_erase_peb(struct ubi_device *ubi, const struct ubi_scan_info *si, 615int ubi_scan_erase_peb(struct ubi_device *ubi, const struct ubi_scan_info *si,
653 int pnum, int ec) 616 int pnum, int ec)
@@ -687,9 +650,10 @@ out_free:
687 * @si: scanning information 650 * @si: scanning information
688 * 651 *
689 * This function returns a free physical eraseblock. It is supposed to be 652 * This function returns a free physical eraseblock. It is supposed to be
690 * called on the UBI initialization stages when the wear-leveling unit is not 653 * called on the UBI initialization stages when the wear-leveling sub-system is
691 * initialized yet. This function picks a physical eraseblocks from one of the 654 * not initialized yet. This function picks a physical eraseblocks from one of
692 * lists, writes the EC header if it is needed, and removes it from the list. 655 * the lists, writes the EC header if it is needed, and removes it from the
656 * list.
693 * 657 *
694 * This function returns scanning physical eraseblock information in case of 658 * This function returns scanning physical eraseblock information in case of
695 * success and an error code in case of failure. 659 * success and an error code in case of failure.
@@ -742,8 +706,7 @@ struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi,
742} 706}
743 707
744/** 708/**
745 * process_eb - read UBI headers, check them and add corresponding data 709 * process_eb - read, check UBI headers, and add them to scanning information.
746 * to the scanning information.
747 * @ubi: UBI device description object 710 * @ubi: UBI device description object
748 * @si: scanning information 711 * @si: scanning information
749 * @pnum: the physical eraseblock number 712 * @pnum: the physical eraseblock number
@@ -751,7 +714,8 @@ struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi,
751 * This function returns a zero if the physical eraseblock was successfully 714 * This function returns a zero if the physical eraseblock was successfully
752 * handled and a negative error code in case of failure. 715 * handled and a negative error code in case of failure.
753 */ 716 */
754static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum) 717static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si,
718 int pnum)
755{ 719{
756 long long uninitialized_var(ec); 720 long long uninitialized_var(ec);
757 int err, bitflips = 0, vol_id, ec_corr = 0; 721 int err, bitflips = 0, vol_id, ec_corr = 0;
@@ -764,8 +728,9 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum
764 return err; 728 return err;
765 else if (err) { 729 else if (err) {
766 /* 730 /*
767 * FIXME: this is actually duty of the I/O unit to initialize 731 * FIXME: this is actually duty of the I/O sub-system to
768 * this, but MTD does not provide enough information. 732 * initialize this, but MTD does not provide enough
733 * information.
769 */ 734 */
770 si->bad_peb_count += 1; 735 si->bad_peb_count += 1;
771 return 0; 736 return 0;
@@ -930,7 +895,7 @@ struct ubi_scan_info *ubi_scan(struct ubi_device *ubi)
930 for (pnum = 0; pnum < ubi->peb_count; pnum++) { 895 for (pnum = 0; pnum < ubi->peb_count; pnum++) {
931 cond_resched(); 896 cond_resched();
932 897
933 dbg_msg("process PEB %d", pnum); 898 dbg_gen("process PEB %d", pnum);
934 err = process_eb(ubi, si, pnum); 899 err = process_eb(ubi, si, pnum);
935 if (err < 0) 900 if (err < 0)
936 goto out_vidh; 901 goto out_vidh;
@@ -1079,8 +1044,7 @@ void ubi_scan_destroy_si(struct ubi_scan_info *si)
1079#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID 1044#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
1080 1045
1081/** 1046/**
1082 * paranoid_check_si - check if the scanning information is correct and 1047 * paranoid_check_si - check the scanning information.
1083 * consistent.
1084 * @ubi: UBI device description object 1048 * @ubi: UBI device description object
1085 * @si: scanning information 1049 * @si: scanning information
1086 * 1050 *
@@ -1265,11 +1229,6 @@ static int paranoid_check_si(struct ubi_device *ubi, struct ubi_scan_info *si)
1265 ubi_err("bad data_pad %d", sv->data_pad); 1229 ubi_err("bad data_pad %d", sv->data_pad);
1266 goto bad_vid_hdr; 1230 goto bad_vid_hdr;
1267 } 1231 }
1268
1269 if (seb->leb_ver != be32_to_cpu(vidh->leb_ver)) {
1270 ubi_err("bad leb_ver %u", seb->leb_ver);
1271 goto bad_vid_hdr;
1272 }
1273 } 1232 }
1274 1233
1275 if (!last_seb) 1234 if (!last_seb)
@@ -1299,8 +1258,7 @@ static int paranoid_check_si(struct ubi_device *ubi, struct ubi_scan_info *si)
1299 if (err < 0) { 1258 if (err < 0) {
1300 kfree(buf); 1259 kfree(buf);
1301 return err; 1260 return err;
1302 } 1261 } else if (err)
1303 else if (err)
1304 buf[pnum] = 1; 1262 buf[pnum] = 1;
1305 } 1263 }
1306 1264
diff --git a/drivers/mtd/ubi/scan.h b/drivers/mtd/ubi/scan.h
index 966b9b682a42..61df208e2f20 100644
--- a/drivers/mtd/ubi/scan.h
+++ b/drivers/mtd/ubi/scan.h
@@ -34,7 +34,6 @@
34 * @u: unions RB-tree or @list links 34 * @u: unions RB-tree or @list links
35 * @u.rb: link in the per-volume RB-tree of &struct ubi_scan_leb objects 35 * @u.rb: link in the per-volume RB-tree of &struct ubi_scan_leb objects
36 * @u.list: link in one of the eraseblock lists 36 * @u.list: link in one of the eraseblock lists
37 * @leb_ver: logical eraseblock version (obsolete)
38 * 37 *
39 * One object of this type is allocated for each physical eraseblock during 38 * One object of this type is allocated for each physical eraseblock during
40 * scanning. 39 * scanning.
@@ -49,7 +48,6 @@ struct ubi_scan_leb {
49 struct rb_node rb; 48 struct rb_node rb;
50 struct list_head list; 49 struct list_head list;
51 } u; 50 } u;
52 uint32_t leb_ver;
53}; 51};
54 52
55/** 53/**
@@ -59,16 +57,16 @@ struct ubi_scan_leb {
59 * @leb_count: number of logical eraseblocks in this volume 57 * @leb_count: number of logical eraseblocks in this volume
60 * @vol_type: volume type 58 * @vol_type: volume type
61 * @used_ebs: number of used logical eraseblocks in this volume (only for 59 * @used_ebs: number of used logical eraseblocks in this volume (only for
62 * static volumes) 60 * static volumes)
63 * @last_data_size: amount of data in the last logical eraseblock of this 61 * @last_data_size: amount of data in the last logical eraseblock of this
64 * volume (always equivalent to the usable logical eraseblock size in case of 62 * volume (always equivalent to the usable logical eraseblock
65 * dynamic volumes) 63 * size in case of dynamic volumes)
66 * @data_pad: how many bytes at the end of logical eraseblocks of this volume 64 * @data_pad: how many bytes at the end of logical eraseblocks of this volume
67 * are not used (due to volume alignment) 65 * are not used (due to volume alignment)
68 * @compat: compatibility flags of this volume 66 * @compat: compatibility flags of this volume
69 * @rb: link in the volume RB-tree 67 * @rb: link in the volume RB-tree
70 * @root: root of the RB-tree containing all the eraseblock belonging to this 68 * @root: root of the RB-tree containing all the eraseblock belonging to this
71 * volume (&struct ubi_scan_leb objects) 69 * volume (&struct ubi_scan_leb objects)
72 * 70 *
73 * One object of this type is allocated for each volume during scanning. 71 * One object of this type is allocated for each volume during scanning.
74 */ 72 */
@@ -92,8 +90,8 @@ struct ubi_scan_volume {
92 * @free: list of free physical eraseblocks 90 * @free: list of free physical eraseblocks
93 * @erase: list of physical eraseblocks which have to be erased 91 * @erase: list of physical eraseblocks which have to be erased
94 * @alien: list of physical eraseblocks which should not be used by UBI (e.g., 92 * @alien: list of physical eraseblocks which should not be used by UBI (e.g.,
93 * those belonging to "preserve"-compatible internal volumes)
95 * @bad_peb_count: count of bad physical eraseblocks 94 * @bad_peb_count: count of bad physical eraseblocks
96 * those belonging to "preserve"-compatible internal volumes)
97 * @vols_found: number of volumes found during scanning 95 * @vols_found: number of volumes found during scanning
98 * @highest_vol_id: highest volume ID 96 * @highest_vol_id: highest volume ID
99 * @alien_peb_count: count of physical eraseblocks in the @alien list 97 * @alien_peb_count: count of physical eraseblocks in the @alien list
@@ -106,8 +104,8 @@ struct ubi_scan_volume {
106 * @ec_count: a temporary variable used when calculating @mean_ec 104 * @ec_count: a temporary variable used when calculating @mean_ec
107 * 105 *
108 * This data structure contains the result of scanning and may be used by other 106 * This data structure contains the result of scanning and may be used by other
109 * UBI units to build final UBI data structures, further error-recovery and so 107 * UBI sub-systems to build final UBI data structures, further error-recovery
110 * on. 108 * and so on.
111 */ 109 */
112struct ubi_scan_info { 110struct ubi_scan_info {
113 struct rb_root volumes; 111 struct rb_root volumes;
@@ -132,8 +130,7 @@ struct ubi_device;
132struct ubi_vid_hdr; 130struct ubi_vid_hdr;
133 131
134/* 132/*
135 * ubi_scan_move_to_list - move a physical eraseblock from the volume tree to a 133 * ubi_scan_move_to_list - move a PEB from the volume tree to a list.
136 * list.
137 * 134 *
138 * @sv: volume scanning information 135 * @sv: volume scanning information
139 * @seb: scanning eraseblock infprmation 136 * @seb: scanning eraseblock infprmation
diff --git a/drivers/mtd/ubi/ubi-media.h b/drivers/mtd/ubi/ubi-media.h
index c3185d9fd048..2ad940409053 100644
--- a/drivers/mtd/ubi/ubi-media.h
+++ b/drivers/mtd/ubi/ubi-media.h
@@ -98,10 +98,11 @@ enum {
98 * Compatibility constants used by internal volumes. 98 * Compatibility constants used by internal volumes.
99 * 99 *
100 * @UBI_COMPAT_DELETE: delete this internal volume before anything is written 100 * @UBI_COMPAT_DELETE: delete this internal volume before anything is written
101 * to the flash 101 * to the flash
102 * @UBI_COMPAT_RO: attach this device in read-only mode 102 * @UBI_COMPAT_RO: attach this device in read-only mode
103 * @UBI_COMPAT_PRESERVE: preserve this internal volume - do not touch its 103 * @UBI_COMPAT_PRESERVE: preserve this internal volume - do not touch its
104 * physical eraseblocks, don't allow the wear-leveling unit to move them 104 * physical eraseblocks, don't allow the wear-leveling
105 * sub-system to move them
105 * @UBI_COMPAT_REJECT: reject this UBI image 106 * @UBI_COMPAT_REJECT: reject this UBI image
106 */ 107 */
107enum { 108enum {
@@ -123,7 +124,7 @@ enum {
123 * struct ubi_ec_hdr - UBI erase counter header. 124 * struct ubi_ec_hdr - UBI erase counter header.
124 * @magic: erase counter header magic number (%UBI_EC_HDR_MAGIC) 125 * @magic: erase counter header magic number (%UBI_EC_HDR_MAGIC)
125 * @version: version of UBI implementation which is supposed to accept this 126 * @version: version of UBI implementation which is supposed to accept this
126 * UBI image 127 * UBI image
127 * @padding1: reserved for future, zeroes 128 * @padding1: reserved for future, zeroes
128 * @ec: the erase counter 129 * @ec: the erase counter
129 * @vid_hdr_offset: where the VID header starts 130 * @vid_hdr_offset: where the VID header starts
@@ -159,24 +160,23 @@ struct ubi_ec_hdr {
159 * struct ubi_vid_hdr - on-flash UBI volume identifier header. 160 * struct ubi_vid_hdr - on-flash UBI volume identifier header.
160 * @magic: volume identifier header magic number (%UBI_VID_HDR_MAGIC) 161 * @magic: volume identifier header magic number (%UBI_VID_HDR_MAGIC)
161 * @version: UBI implementation version which is supposed to accept this UBI 162 * @version: UBI implementation version which is supposed to accept this UBI
162 * image (%UBI_VERSION) 163 * image (%UBI_VERSION)
163 * @vol_type: volume type (%UBI_VID_DYNAMIC or %UBI_VID_STATIC) 164 * @vol_type: volume type (%UBI_VID_DYNAMIC or %UBI_VID_STATIC)
164 * @copy_flag: if this logical eraseblock was copied from another physical 165 * @copy_flag: if this logical eraseblock was copied from another physical
165 * eraseblock (for wear-leveling reasons) 166 * eraseblock (for wear-leveling reasons)
166 * @compat: compatibility of this volume (%0, %UBI_COMPAT_DELETE, 167 * @compat: compatibility of this volume (%0, %UBI_COMPAT_DELETE,
167 * %UBI_COMPAT_IGNORE, %UBI_COMPAT_PRESERVE, or %UBI_COMPAT_REJECT) 168 * %UBI_COMPAT_IGNORE, %UBI_COMPAT_PRESERVE, or %UBI_COMPAT_REJECT)
168 * @vol_id: ID of this volume 169 * @vol_id: ID of this volume
169 * @lnum: logical eraseblock number 170 * @lnum: logical eraseblock number
170 * @leb_ver: version of this logical eraseblock (IMPORTANT: obsolete, to be 171 * @padding1: reserved for future, zeroes
171 * removed, kept only for not breaking older UBI users)
172 * @data_size: how many bytes of data this logical eraseblock contains 172 * @data_size: how many bytes of data this logical eraseblock contains
173 * @used_ebs: total number of used logical eraseblocks in this volume 173 * @used_ebs: total number of used logical eraseblocks in this volume
174 * @data_pad: how many bytes at the end of this physical eraseblock are not 174 * @data_pad: how many bytes at the end of this physical eraseblock are not
175 * used 175 * used
176 * @data_crc: CRC checksum of the data stored in this logical eraseblock 176 * @data_crc: CRC checksum of the data stored in this logical eraseblock
177 * @padding1: reserved for future, zeroes
178 * @sqnum: sequence number
179 * @padding2: reserved for future, zeroes 177 * @padding2: reserved for future, zeroes
178 * @sqnum: sequence number
179 * @padding3: reserved for future, zeroes
180 * @hdr_crc: volume identifier header CRC checksum 180 * @hdr_crc: volume identifier header CRC checksum
181 * 181 *
182 * The @sqnum is the value of the global sequence counter at the time when this 182 * The @sqnum is the value of the global sequence counter at the time when this
@@ -224,10 +224,6 @@ struct ubi_ec_hdr {
224 * checksum is correct, this physical eraseblock is selected (P1). Otherwise 224 * checksum is correct, this physical eraseblock is selected (P1). Otherwise
225 * the older one (P) is selected. 225 * the older one (P) is selected.
226 * 226 *
227 * Note, there is an obsolete @leb_ver field which was used instead of @sqnum
228 * in the past. But it is not used anymore and we keep it in order to be able
229 * to deal with old UBI images. It will be removed at some point.
230 *
231 * There are 2 sorts of volumes in UBI: user volumes and internal volumes. 227 * There are 2 sorts of volumes in UBI: user volumes and internal volumes.
232 * Internal volumes are not seen from outside and are used for various internal 228 * Internal volumes are not seen from outside and are used for various internal
233 * UBI purposes. In this implementation there is only one internal volume - the 229 * UBI purposes. In this implementation there is only one internal volume - the
@@ -248,9 +244,9 @@ struct ubi_ec_hdr {
248 * The @data_crc field contains the CRC checksum of the contents of the logical 244 * The @data_crc field contains the CRC checksum of the contents of the logical
249 * eraseblock if this is a static volume. In case of dynamic volumes, it does 245 * eraseblock if this is a static volume. In case of dynamic volumes, it does
250 * not contain the CRC checksum as a rule. The only exception is when the 246 * not contain the CRC checksum as a rule. The only exception is when the
251 * data of the physical eraseblock was moved by the wear-leveling unit, then 247 * data of the physical eraseblock was moved by the wear-leveling sub-system,
252 * the wear-leveling unit calculates the data CRC and stores it in the 248 * then the wear-leveling sub-system calculates the data CRC and stores it in
253 * @data_crc field. And of course, the @copy_flag is %in this case. 249 * the @data_crc field. And of course, the @copy_flag is %in this case.
254 * 250 *
255 * The @data_size field is used only for static volumes because UBI has to know 251 * The @data_size field is used only for static volumes because UBI has to know
256 * how many bytes of data are stored in this eraseblock. For dynamic volumes, 252 * how many bytes of data are stored in this eraseblock. For dynamic volumes,
@@ -277,14 +273,14 @@ struct ubi_vid_hdr {
277 __u8 compat; 273 __u8 compat;
278 __be32 vol_id; 274 __be32 vol_id;
279 __be32 lnum; 275 __be32 lnum;
280 __be32 leb_ver; /* obsolete, to be removed, don't use */ 276 __u8 padding1[4];
281 __be32 data_size; 277 __be32 data_size;
282 __be32 used_ebs; 278 __be32 used_ebs;
283 __be32 data_pad; 279 __be32 data_pad;
284 __be32 data_crc; 280 __be32 data_crc;
285 __u8 padding1[4]; 281 __u8 padding2[4];
286 __be64 sqnum; 282 __be64 sqnum;
287 __u8 padding2[12]; 283 __u8 padding3[12];
288 __be32 hdr_crc; 284 __be32 hdr_crc;
289} __attribute__ ((packed)); 285} __attribute__ ((packed));
290 286
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index 67dcbd11c15c..1c3fa18c26a7 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -74,15 +74,15 @@
74#define UBI_IO_RETRIES 3 74#define UBI_IO_RETRIES 3
75 75
76/* 76/*
77 * Error codes returned by the I/O unit. 77 * Error codes returned by the I/O sub-system.
78 * 78 *
79 * UBI_IO_PEB_EMPTY: the physical eraseblock is empty, i.e. it contains only 79 * UBI_IO_PEB_EMPTY: the physical eraseblock is empty, i.e. it contains only
80 * 0xFF bytes 80 * %0xFF bytes
81 * UBI_IO_PEB_FREE: the physical eraseblock is free, i.e. it contains only a 81 * UBI_IO_PEB_FREE: the physical eraseblock is free, i.e. it contains only a
82 * valid erase counter header, and the rest are %0xFF bytes 82 * valid erase counter header, and the rest are %0xFF bytes
83 * UBI_IO_BAD_EC_HDR: the erase counter header is corrupted (bad magic or CRC) 83 * UBI_IO_BAD_EC_HDR: the erase counter header is corrupted (bad magic or CRC)
84 * UBI_IO_BAD_VID_HDR: the volume identifier header is corrupted (bad magic or 84 * UBI_IO_BAD_VID_HDR: the volume identifier header is corrupted (bad magic or
85 * CRC) 85 * CRC)
86 * UBI_IO_BITFLIPS: bit-flips were detected and corrected 86 * UBI_IO_BITFLIPS: bit-flips were detected and corrected
87 */ 87 */
88enum { 88enum {
@@ -99,9 +99,9 @@ enum {
99 * @ec: erase counter 99 * @ec: erase counter
100 * @pnum: physical eraseblock number 100 * @pnum: physical eraseblock number
101 * 101 *
102 * This data structure is used in the WL unit. Each physical eraseblock has a 102 * This data structure is used in the WL sub-system. Each physical eraseblock
103 * corresponding &struct wl_entry object which may be kept in different 103 * has a corresponding &struct wl_entry object which may be kept in different
104 * RB-trees. See WL unit for details. 104 * RB-trees. See WL sub-system for details.
105 */ 105 */
106struct ubi_wl_entry { 106struct ubi_wl_entry {
107 struct rb_node rb; 107 struct rb_node rb;
@@ -118,10 +118,10 @@ struct ubi_wl_entry {
118 * @mutex: read/write mutex to implement read/write access serialization to 118 * @mutex: read/write mutex to implement read/write access serialization to
119 * the (@vol_id, @lnum) logical eraseblock 119 * the (@vol_id, @lnum) logical eraseblock
120 * 120 *
121 * This data structure is used in the EBA unit to implement per-LEB locking. 121 * This data structure is used in the EBA sub-system to implement per-LEB
122 * When a logical eraseblock is being locked - corresponding 122 * locking. When a logical eraseblock is being locked - corresponding
123 * &struct ubi_ltree_entry object is inserted to the lock tree (@ubi->ltree). 123 * &struct ubi_ltree_entry object is inserted to the lock tree (@ubi->ltree).
124 * See EBA unit for details. 124 * See EBA sub-system for details.
125 */ 125 */
126struct ubi_ltree_entry { 126struct ubi_ltree_entry {
127 struct rb_node rb; 127 struct rb_node rb;
@@ -131,6 +131,27 @@ struct ubi_ltree_entry {
131 struct rw_semaphore mutex; 131 struct rw_semaphore mutex;
132}; 132};
133 133
134/**
135 * struct ubi_rename_entry - volume re-name description data structure.
136 * @new_name_len: new volume name length
137 * @new_name: new volume name
138 * @remove: if not zero, this volume should be removed, not re-named
139 * @desc: descriptor of the volume
140 * @list: links re-name entries into a list
141 *
142 * This data structure is utilized in the multiple volume re-name code. Namely,
143 * UBI first creates a list of &struct ubi_rename_entry objects from the
144 * &struct ubi_rnvol_req request object, and then utilizes this list to do all
145 * the job.
146 */
147struct ubi_rename_entry {
148 int new_name_len;
149 char new_name[UBI_VOL_NAME_MAX + 1];
150 int remove;
151 struct ubi_volume_desc *desc;
152 struct list_head list;
153};
154
134struct ubi_volume_desc; 155struct ubi_volume_desc;
135 156
136/** 157/**
@@ -206,7 +227,7 @@ struct ubi_volume {
206 int alignment; 227 int alignment;
207 int data_pad; 228 int data_pad;
208 int name_len; 229 int name_len;
209 char name[UBI_VOL_NAME_MAX+1]; 230 char name[UBI_VOL_NAME_MAX + 1];
210 231
211 int upd_ebs; 232 int upd_ebs;
212 int ch_lnum; 233 int ch_lnum;
@@ -225,7 +246,7 @@ struct ubi_volume {
225#ifdef CONFIG_MTD_UBI_GLUEBI 246#ifdef CONFIG_MTD_UBI_GLUEBI
226 /* 247 /*
227 * Gluebi-related stuff may be compiled out. 248 * Gluebi-related stuff may be compiled out.
228 * TODO: this should not be built into UBI but should be a separate 249 * Note: this should not be built into UBI but should be a separate
229 * ubimtd driver which works on top of UBI and emulates MTD devices. 250 * ubimtd driver which works on top of UBI and emulates MTD devices.
230 */ 251 */
231 struct ubi_volume_desc *gluebi_desc; 252 struct ubi_volume_desc *gluebi_desc;
@@ -235,8 +256,7 @@ struct ubi_volume {
235}; 256};
236 257
237/** 258/**
238 * struct ubi_volume_desc - descriptor of the UBI volume returned when it is 259 * struct ubi_volume_desc - UBI volume descriptor returned when it is opened.
239 * opened.
240 * @vol: reference to the corresponding volume description object 260 * @vol: reference to the corresponding volume description object
241 * @mode: open mode (%UBI_READONLY, %UBI_READWRITE, or %UBI_EXCLUSIVE) 261 * @mode: open mode (%UBI_READONLY, %UBI_READWRITE, or %UBI_EXCLUSIVE)
242 */ 262 */
@@ -273,7 +293,7 @@ struct ubi_wl_entry;
273 * @vtbl_size: size of the volume table in bytes 293 * @vtbl_size: size of the volume table in bytes
274 * @vtbl: in-RAM volume table copy 294 * @vtbl: in-RAM volume table copy
275 * @volumes_mutex: protects on-flash volume table and serializes volume 295 * @volumes_mutex: protects on-flash volume table and serializes volume
276 * changes, like creation, deletion, update, resize 296 * changes, like creation, deletion, update, re-size and re-name
277 * 297 *
278 * @max_ec: current highest erase counter value 298 * @max_ec: current highest erase counter value
279 * @mean_ec: current mean erase counter value 299 * @mean_ec: current mean erase counter value
@@ -293,6 +313,7 @@ struct ubi_wl_entry;
293 * @move_to, @move_to_put @erase_pending, @wl_scheduled, and @works 313 * @move_to, @move_to_put @erase_pending, @wl_scheduled, and @works
294 * fields 314 * fields
295 * @move_mutex: serializes eraseblock moves 315 * @move_mutex: serializes eraseblock moves
316 * @work_sem: sycnhronizes the WL worker with use tasks
296 * @wl_scheduled: non-zero if the wear-leveling was scheduled 317 * @wl_scheduled: non-zero if the wear-leveling was scheduled
297 * @lookuptbl: a table to quickly find a &struct ubi_wl_entry object for any 318 * @lookuptbl: a table to quickly find a &struct ubi_wl_entry object for any
298 * physical eraseblock 319 * physical eraseblock
@@ -316,11 +337,11 @@ struct ubi_wl_entry;
316 * @ro_mode: if the UBI device is in read-only mode 337 * @ro_mode: if the UBI device is in read-only mode
317 * @leb_size: logical eraseblock size 338 * @leb_size: logical eraseblock size
318 * @leb_start: starting offset of logical eraseblocks within physical 339 * @leb_start: starting offset of logical eraseblocks within physical
319 * eraseblocks 340 * eraseblocks
320 * @ec_hdr_alsize: size of the EC header aligned to @hdrs_min_io_size 341 * @ec_hdr_alsize: size of the EC header aligned to @hdrs_min_io_size
321 * @vid_hdr_alsize: size of the VID header aligned to @hdrs_min_io_size 342 * @vid_hdr_alsize: size of the VID header aligned to @hdrs_min_io_size
322 * @vid_hdr_offset: starting offset of the volume identifier header (might be 343 * @vid_hdr_offset: starting offset of the volume identifier header (might be
323 * unaligned) 344 * unaligned)
324 * @vid_hdr_aloffset: starting offset of the VID header aligned to 345 * @vid_hdr_aloffset: starting offset of the VID header aligned to
325 * @hdrs_min_io_size 346 * @hdrs_min_io_size
326 * @vid_hdr_shift: contains @vid_hdr_offset - @vid_hdr_aloffset 347 * @vid_hdr_shift: contains @vid_hdr_offset - @vid_hdr_aloffset
@@ -331,6 +352,8 @@ struct ubi_wl_entry;
331 * @peb_buf1: a buffer of PEB size used for different purposes 352 * @peb_buf1: a buffer of PEB size used for different purposes
332 * @peb_buf2: another buffer of PEB size used for different purposes 353 * @peb_buf2: another buffer of PEB size used for different purposes
333 * @buf_mutex: proptects @peb_buf1 and @peb_buf2 354 * @buf_mutex: proptects @peb_buf1 and @peb_buf2
355 * @ckvol_mutex: serializes static volume checking when opening
356 * @mult_mutex: serializes operations on multiple volumes, like re-nameing
334 * @dbg_peb_buf: buffer of PEB size used for debugging 357 * @dbg_peb_buf: buffer of PEB size used for debugging
335 * @dbg_buf_mutex: proptects @dbg_peb_buf 358 * @dbg_buf_mutex: proptects @dbg_peb_buf
336 */ 359 */
@@ -356,16 +379,16 @@ struct ubi_device {
356 struct mutex volumes_mutex; 379 struct mutex volumes_mutex;
357 380
358 int max_ec; 381 int max_ec;
359 /* TODO: mean_ec is not updated run-time, fix */ 382 /* Note, mean_ec is not updated run-time - should be fixed */
360 int mean_ec; 383 int mean_ec;
361 384
362 /* EBA unit's stuff */ 385 /* EBA sub-system's stuff */
363 unsigned long long global_sqnum; 386 unsigned long long global_sqnum;
364 spinlock_t ltree_lock; 387 spinlock_t ltree_lock;
365 struct rb_root ltree; 388 struct rb_root ltree;
366 struct mutex alc_mutex; 389 struct mutex alc_mutex;
367 390
368 /* Wear-leveling unit's stuff */ 391 /* Wear-leveling sub-system's stuff */
369 struct rb_root used; 392 struct rb_root used;
370 struct rb_root free; 393 struct rb_root free;
371 struct rb_root scrub; 394 struct rb_root scrub;
@@ -388,7 +411,7 @@ struct ubi_device {
388 int thread_enabled; 411 int thread_enabled;
389 char bgt_name[sizeof(UBI_BGT_NAME_PATTERN)+2]; 412 char bgt_name[sizeof(UBI_BGT_NAME_PATTERN)+2];
390 413
391 /* I/O unit's stuff */ 414 /* I/O sub-system's stuff */
392 long long flash_size; 415 long long flash_size;
393 int peb_count; 416 int peb_count;
394 int peb_size; 417 int peb_size;
@@ -411,6 +434,7 @@ struct ubi_device {
411 void *peb_buf2; 434 void *peb_buf2;
412 struct mutex buf_mutex; 435 struct mutex buf_mutex;
413 struct mutex ckvol_mutex; 436 struct mutex ckvol_mutex;
437 struct mutex mult_mutex;
414#ifdef CONFIG_MTD_UBI_DEBUG 438#ifdef CONFIG_MTD_UBI_DEBUG
415 void *dbg_peb_buf; 439 void *dbg_peb_buf;
416 struct mutex dbg_buf_mutex; 440 struct mutex dbg_buf_mutex;
@@ -427,12 +451,15 @@ extern struct mutex ubi_devices_mutex;
427/* vtbl.c */ 451/* vtbl.c */
428int ubi_change_vtbl_record(struct ubi_device *ubi, int idx, 452int ubi_change_vtbl_record(struct ubi_device *ubi, int idx,
429 struct ubi_vtbl_record *vtbl_rec); 453 struct ubi_vtbl_record *vtbl_rec);
454int ubi_vtbl_rename_volumes(struct ubi_device *ubi,
455 struct list_head *rename_list);
430int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si); 456int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si);
431 457
432/* vmt.c */ 458/* vmt.c */
433int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req); 459int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req);
434int ubi_remove_volume(struct ubi_volume_desc *desc); 460int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl);
435int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs); 461int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs);
462int ubi_rename_volumes(struct ubi_device *ubi, struct list_head *rename_list);
436int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol); 463int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol);
437void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol); 464void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol);
438 465
@@ -447,7 +474,8 @@ int ubi_more_leb_change_data(struct ubi_device *ubi, struct ubi_volume *vol,
447 const void __user *buf, int count); 474 const void __user *buf, int count);
448 475
449/* misc.c */ 476/* misc.c */
450int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf, int length); 477int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf,
478 int length);
451int ubi_check_volume(struct ubi_device *ubi, int vol_id); 479int ubi_check_volume(struct ubi_device *ubi, int vol_id);
452void ubi_calculate_reserved(struct ubi_device *ubi); 480void ubi_calculate_reserved(struct ubi_device *ubi);
453 481
@@ -477,7 +505,6 @@ int ubi_eba_atomic_leb_change(struct ubi_device *ubi, struct ubi_volume *vol,
477int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, 505int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
478 struct ubi_vid_hdr *vid_hdr); 506 struct ubi_vid_hdr *vid_hdr);
479int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si); 507int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si);
480void ubi_eba_close(const struct ubi_device *ubi);
481 508
482/* wl.c */ 509/* wl.c */
483int ubi_wl_get_peb(struct ubi_device *ubi, int dtype); 510int ubi_wl_get_peb(struct ubi_device *ubi, int dtype);
diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c
index ddaa1a56cc69..8b89cc18ff0b 100644
--- a/drivers/mtd/ubi/upd.c
+++ b/drivers/mtd/ubi/upd.c
@@ -39,7 +39,7 @@
39 */ 39 */
40 40
41#include <linux/err.h> 41#include <linux/err.h>
42#include <asm/uaccess.h> 42#include <linux/uaccess.h>
43#include <asm/div64.h> 43#include <asm/div64.h>
44#include "ubi.h" 44#include "ubi.h"
45 45
@@ -56,11 +56,11 @@ static int set_update_marker(struct ubi_device *ubi, struct ubi_volume *vol)
56 int err; 56 int err;
57 struct ubi_vtbl_record vtbl_rec; 57 struct ubi_vtbl_record vtbl_rec;
58 58
59 dbg_msg("set update marker for volume %d", vol->vol_id); 59 dbg_gen("set update marker for volume %d", vol->vol_id);
60 60
61 if (vol->upd_marker) { 61 if (vol->upd_marker) {
62 ubi_assert(ubi->vtbl[vol->vol_id].upd_marker); 62 ubi_assert(ubi->vtbl[vol->vol_id].upd_marker);
63 dbg_msg("already set"); 63 dbg_gen("already set");
64 return 0; 64 return 0;
65 } 65 }
66 66
@@ -92,7 +92,7 @@ static int clear_update_marker(struct ubi_device *ubi, struct ubi_volume *vol,
92 uint64_t tmp; 92 uint64_t tmp;
93 struct ubi_vtbl_record vtbl_rec; 93 struct ubi_vtbl_record vtbl_rec;
94 94
95 dbg_msg("clear update marker for volume %d", vol->vol_id); 95 dbg_gen("clear update marker for volume %d", vol->vol_id);
96 96
97 memcpy(&vtbl_rec, &ubi->vtbl[vol->vol_id], 97 memcpy(&vtbl_rec, &ubi->vtbl[vol->vol_id],
98 sizeof(struct ubi_vtbl_record)); 98 sizeof(struct ubi_vtbl_record));
@@ -133,7 +133,7 @@ int ubi_start_update(struct ubi_device *ubi, struct ubi_volume *vol,
133 int i, err; 133 int i, err;
134 uint64_t tmp; 134 uint64_t tmp;
135 135
136 dbg_msg("start update of volume %d, %llu bytes", vol->vol_id, bytes); 136 dbg_gen("start update of volume %d, %llu bytes", vol->vol_id, bytes);
137 ubi_assert(!vol->updating && !vol->changing_leb); 137 ubi_assert(!vol->updating && !vol->changing_leb);
138 vol->updating = 1; 138 vol->updating = 1;
139 139
@@ -183,7 +183,7 @@ int ubi_start_leb_change(struct ubi_device *ubi, struct ubi_volume *vol,
183{ 183{
184 ubi_assert(!vol->updating && !vol->changing_leb); 184 ubi_assert(!vol->updating && !vol->changing_leb);
185 185
186 dbg_msg("start changing LEB %d:%d, %u bytes", 186 dbg_gen("start changing LEB %d:%d, %u bytes",
187 vol->vol_id, req->lnum, req->bytes); 187 vol->vol_id, req->lnum, req->bytes);
188 if (req->bytes == 0) 188 if (req->bytes == 0)
189 return ubi_eba_atomic_leb_change(ubi, vol, req->lnum, NULL, 0, 189 return ubi_eba_atomic_leb_change(ubi, vol, req->lnum, NULL, 0,
@@ -237,16 +237,17 @@ static int write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum,
237 int err; 237 int err;
238 238
239 if (vol->vol_type == UBI_DYNAMIC_VOLUME) { 239 if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
240 len = ALIGN(len, ubi->min_io_size); 240 int l = ALIGN(len, ubi->min_io_size);
241 memset(buf + len, 0xFF, len - len);
242 241
243 len = ubi_calc_data_len(ubi, buf, len); 242 memset(buf + len, 0xFF, l - len);
243 len = ubi_calc_data_len(ubi, buf, l);
244 if (len == 0) { 244 if (len == 0) {
245 dbg_msg("all %d bytes contain 0xFF - skip", len); 245 dbg_gen("all %d bytes contain 0xFF - skip", len);
246 return 0; 246 return 0;
247 } 247 }
248 248
249 err = ubi_eba_write_leb(ubi, vol, lnum, buf, 0, len, UBI_UNKNOWN); 249 err = ubi_eba_write_leb(ubi, vol, lnum, buf, 0, len,
250 UBI_UNKNOWN);
250 } else { 251 } else {
251 /* 252 /*
252 * When writing static volume, and this is the last logical 253 * When writing static volume, and this is the last logical
@@ -267,6 +268,7 @@ static int write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum,
267 268
268/** 269/**
269 * ubi_more_update_data - write more update data. 270 * ubi_more_update_data - write more update data.
271 * @ubi: UBI device description object
270 * @vol: volume description object 272 * @vol: volume description object
271 * @buf: write data (user-space memory buffer) 273 * @buf: write data (user-space memory buffer)
272 * @count: how much bytes to write 274 * @count: how much bytes to write
@@ -283,7 +285,7 @@ int ubi_more_update_data(struct ubi_device *ubi, struct ubi_volume *vol,
283 uint64_t tmp; 285 uint64_t tmp;
284 int lnum, offs, err = 0, len, to_write = count; 286 int lnum, offs, err = 0, len, to_write = count;
285 287
286 dbg_msg("write %d of %lld bytes, %lld already passed", 288 dbg_gen("write %d of %lld bytes, %lld already passed",
287 count, vol->upd_bytes, vol->upd_received); 289 count, vol->upd_bytes, vol->upd_received);
288 290
289 if (ubi->ro_mode) 291 if (ubi->ro_mode)
@@ -384,6 +386,7 @@ int ubi_more_update_data(struct ubi_device *ubi, struct ubi_volume *vol,
384 386
385/** 387/**
386 * ubi_more_leb_change_data - accept more data for atomic LEB change. 388 * ubi_more_leb_change_data - accept more data for atomic LEB change.
389 * @ubi: UBI device description object
387 * @vol: volume description object 390 * @vol: volume description object
388 * @buf: write data (user-space memory buffer) 391 * @buf: write data (user-space memory buffer)
389 * @count: how much bytes to write 392 * @count: how much bytes to write
@@ -400,7 +403,7 @@ int ubi_more_leb_change_data(struct ubi_device *ubi, struct ubi_volume *vol,
400{ 403{
401 int err; 404 int err;
402 405
403 dbg_msg("write %d of %lld bytes, %lld already passed", 406 dbg_gen("write %d of %lld bytes, %lld already passed",
404 count, vol->upd_bytes, vol->upd_received); 407 count, vol->upd_bytes, vol->upd_received);
405 408
406 if (ubi->ro_mode) 409 if (ubi->ro_mode)
@@ -418,7 +421,8 @@ int ubi_more_leb_change_data(struct ubi_device *ubi, struct ubi_volume *vol,
418 if (vol->upd_received == vol->upd_bytes) { 421 if (vol->upd_received == vol->upd_bytes) {
419 int len = ALIGN((int)vol->upd_bytes, ubi->min_io_size); 422 int len = ALIGN((int)vol->upd_bytes, ubi->min_io_size);
420 423
421 memset(vol->upd_buf + vol->upd_bytes, 0xFF, len - vol->upd_bytes); 424 memset(vol->upd_buf + vol->upd_bytes, 0xFF,
425 len - vol->upd_bytes);
422 len = ubi_calc_data_len(ubi, vol->upd_buf, len); 426 len = ubi_calc_data_len(ubi, vol->upd_buf, len);
423 err = ubi_eba_atomic_leb_change(ubi, vol, vol->ch_lnum, 427 err = ubi_eba_atomic_leb_change(ubi, vol, vol->ch_lnum,
424 vol->upd_buf, len, UBI_UNKNOWN); 428 vol->upd_buf, len, UBI_UNKNOWN);
diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c
index 5be58d85c639..3531ca9a1e24 100644
--- a/drivers/mtd/ubi/vmt.c
+++ b/drivers/mtd/ubi/vmt.c
@@ -28,9 +28,9 @@
28#include "ubi.h" 28#include "ubi.h"
29 29
30#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID 30#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
31static void paranoid_check_volumes(struct ubi_device *ubi); 31static int paranoid_check_volumes(struct ubi_device *ubi);
32#else 32#else
33#define paranoid_check_volumes(ubi) 33#define paranoid_check_volumes(ubi) 0
34#endif 34#endif
35 35
36static ssize_t vol_attribute_show(struct device *dev, 36static ssize_t vol_attribute_show(struct device *dev,
@@ -127,6 +127,7 @@ static void vol_release(struct device *dev)
127{ 127{
128 struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev); 128 struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev);
129 129
130 kfree(vol->eba_tbl);
130 kfree(vol); 131 kfree(vol);
131} 132}
132 133
@@ -201,7 +202,7 @@ static void volume_sysfs_close(struct ubi_volume *vol)
201 */ 202 */
202int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) 203int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
203{ 204{
204 int i, err, vol_id = req->vol_id, dont_free = 0; 205 int i, err, vol_id = req->vol_id, do_free = 1;
205 struct ubi_volume *vol; 206 struct ubi_volume *vol;
206 struct ubi_vtbl_record vtbl_rec; 207 struct ubi_vtbl_record vtbl_rec;
207 uint64_t bytes; 208 uint64_t bytes;
@@ -217,7 +218,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
217 spin_lock(&ubi->volumes_lock); 218 spin_lock(&ubi->volumes_lock);
218 if (vol_id == UBI_VOL_NUM_AUTO) { 219 if (vol_id == UBI_VOL_NUM_AUTO) {
219 /* Find unused volume ID */ 220 /* Find unused volume ID */
220 dbg_msg("search for vacant volume ID"); 221 dbg_gen("search for vacant volume ID");
221 for (i = 0; i < ubi->vtbl_slots; i++) 222 for (i = 0; i < ubi->vtbl_slots; i++)
222 if (!ubi->volumes[i]) { 223 if (!ubi->volumes[i]) {
223 vol_id = i; 224 vol_id = i;
@@ -232,7 +233,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
232 req->vol_id = vol_id; 233 req->vol_id = vol_id;
233 } 234 }
234 235
235 dbg_msg("volume ID %d, %llu bytes, type %d, name %s", 236 dbg_gen("volume ID %d, %llu bytes, type %d, name %s",
236 vol_id, (unsigned long long)req->bytes, 237 vol_id, (unsigned long long)req->bytes,
237 (int)req->vol_type, req->name); 238 (int)req->vol_type, req->name);
238 239
@@ -252,7 +253,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
252 goto out_unlock; 253 goto out_unlock;
253 } 254 }
254 255
255 /* Calculate how many eraseblocks are requested */ 256 /* Calculate how many eraseblocks are requested */
256 vol->usable_leb_size = ubi->leb_size - ubi->leb_size % req->alignment; 257 vol->usable_leb_size = ubi->leb_size - ubi->leb_size % req->alignment;
257 bytes = req->bytes; 258 bytes = req->bytes;
258 if (do_div(bytes, vol->usable_leb_size)) 259 if (do_div(bytes, vol->usable_leb_size))
@@ -274,7 +275,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
274 vol->data_pad = ubi->leb_size % vol->alignment; 275 vol->data_pad = ubi->leb_size % vol->alignment;
275 vol->vol_type = req->vol_type; 276 vol->vol_type = req->vol_type;
276 vol->name_len = req->name_len; 277 vol->name_len = req->name_len;
277 memcpy(vol->name, req->name, vol->name_len + 1); 278 memcpy(vol->name, req->name, vol->name_len);
278 vol->ubi = ubi; 279 vol->ubi = ubi;
279 280
280 /* 281 /*
@@ -349,7 +350,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
349 vtbl_rec.vol_type = UBI_VID_DYNAMIC; 350 vtbl_rec.vol_type = UBI_VID_DYNAMIC;
350 else 351 else
351 vtbl_rec.vol_type = UBI_VID_STATIC; 352 vtbl_rec.vol_type = UBI_VID_STATIC;
352 memcpy(vtbl_rec.name, vol->name, vol->name_len + 1); 353 memcpy(vtbl_rec.name, vol->name, vol->name_len);
353 354
354 err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); 355 err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec);
355 if (err) 356 if (err)
@@ -360,19 +361,19 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
360 ubi->vol_count += 1; 361 ubi->vol_count += 1;
361 spin_unlock(&ubi->volumes_lock); 362 spin_unlock(&ubi->volumes_lock);
362 363
363 paranoid_check_volumes(ubi); 364 err = paranoid_check_volumes(ubi);
364 return 0; 365 return err;
365 366
366out_sysfs: 367out_sysfs:
367 /* 368 /*
368 * We have registered our device, we should not free the volume* 369 * We have registered our device, we should not free the volume
369 * description object in this function in case of an error - it is 370 * description object in this function in case of an error - it is
370 * freed by the release function. 371 * freed by the release function.
371 * 372 *
372 * Get device reference to prevent the release function from being 373 * Get device reference to prevent the release function from being
373 * called just after sysfs has been closed. 374 * called just after sysfs has been closed.
374 */ 375 */
375 dont_free = 1; 376 do_free = 0;
376 get_device(&vol->dev); 377 get_device(&vol->dev);
377 volume_sysfs_close(vol); 378 volume_sysfs_close(vol);
378out_gluebi: 379out_gluebi:
@@ -382,17 +383,18 @@ out_gluebi:
382out_cdev: 383out_cdev:
383 cdev_del(&vol->cdev); 384 cdev_del(&vol->cdev);
384out_mapping: 385out_mapping:
385 kfree(vol->eba_tbl); 386 if (do_free)
387 kfree(vol->eba_tbl);
386out_acc: 388out_acc:
387 spin_lock(&ubi->volumes_lock); 389 spin_lock(&ubi->volumes_lock);
388 ubi->rsvd_pebs -= vol->reserved_pebs; 390 ubi->rsvd_pebs -= vol->reserved_pebs;
389 ubi->avail_pebs += vol->reserved_pebs; 391 ubi->avail_pebs += vol->reserved_pebs;
390out_unlock: 392out_unlock:
391 spin_unlock(&ubi->volumes_lock); 393 spin_unlock(&ubi->volumes_lock);
392 if (dont_free) 394 if (do_free)
393 put_device(&vol->dev);
394 else
395 kfree(vol); 395 kfree(vol);
396 else
397 put_device(&vol->dev);
396 ubi_err("cannot create volume %d, error %d", vol_id, err); 398 ubi_err("cannot create volume %d, error %d", vol_id, err);
397 return err; 399 return err;
398} 400}
@@ -400,19 +402,20 @@ out_unlock:
400/** 402/**
401 * ubi_remove_volume - remove volume. 403 * ubi_remove_volume - remove volume.
402 * @desc: volume descriptor 404 * @desc: volume descriptor
405 * @no_vtbl: do not change volume table if not zero
403 * 406 *
404 * This function removes volume described by @desc. The volume has to be opened 407 * This function removes volume described by @desc. The volume has to be opened
405 * in "exclusive" mode. Returns zero in case of success and a negative error 408 * in "exclusive" mode. Returns zero in case of success and a negative error
406 * code in case of failure. The caller has to have the @ubi->volumes_mutex 409 * code in case of failure. The caller has to have the @ubi->volumes_mutex
407 * locked. 410 * locked.
408 */ 411 */
409int ubi_remove_volume(struct ubi_volume_desc *desc) 412int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl)
410{ 413{
411 struct ubi_volume *vol = desc->vol; 414 struct ubi_volume *vol = desc->vol;
412 struct ubi_device *ubi = vol->ubi; 415 struct ubi_device *ubi = vol->ubi;
413 int i, err, vol_id = vol->vol_id, reserved_pebs = vol->reserved_pebs; 416 int i, err, vol_id = vol->vol_id, reserved_pebs = vol->reserved_pebs;
414 417
415 dbg_msg("remove UBI volume %d", vol_id); 418 dbg_gen("remove UBI volume %d", vol_id);
416 ubi_assert(desc->mode == UBI_EXCLUSIVE); 419 ubi_assert(desc->mode == UBI_EXCLUSIVE);
417 ubi_assert(vol == ubi->volumes[vol_id]); 420 ubi_assert(vol == ubi->volumes[vol_id]);
418 421
@@ -435,9 +438,11 @@ int ubi_remove_volume(struct ubi_volume_desc *desc)
435 if (err) 438 if (err)
436 goto out_err; 439 goto out_err;
437 440
438 err = ubi_change_vtbl_record(ubi, vol_id, NULL); 441 if (!no_vtbl) {
439 if (err) 442 err = ubi_change_vtbl_record(ubi, vol_id, NULL);
440 goto out_err; 443 if (err)
444 goto out_err;
445 }
441 446
442 for (i = 0; i < vol->reserved_pebs; i++) { 447 for (i = 0; i < vol->reserved_pebs; i++) {
443 err = ubi_eba_unmap_leb(ubi, vol, i); 448 err = ubi_eba_unmap_leb(ubi, vol, i);
@@ -445,8 +450,6 @@ int ubi_remove_volume(struct ubi_volume_desc *desc)
445 goto out_err; 450 goto out_err;
446 } 451 }
447 452
448 kfree(vol->eba_tbl);
449 vol->eba_tbl = NULL;
450 cdev_del(&vol->cdev); 453 cdev_del(&vol->cdev);
451 volume_sysfs_close(vol); 454 volume_sysfs_close(vol);
452 455
@@ -465,8 +468,9 @@ int ubi_remove_volume(struct ubi_volume_desc *desc)
465 ubi->vol_count -= 1; 468 ubi->vol_count -= 1;
466 spin_unlock(&ubi->volumes_lock); 469 spin_unlock(&ubi->volumes_lock);
467 470
468 paranoid_check_volumes(ubi); 471 if (!no_vtbl)
469 return 0; 472 err = paranoid_check_volumes(ubi);
473 return err;
470 474
471out_err: 475out_err:
472 ubi_err("cannot remove volume %d, error %d", vol_id, err); 476 ubi_err("cannot remove volume %d, error %d", vol_id, err);
@@ -497,7 +501,7 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
497 if (ubi->ro_mode) 501 if (ubi->ro_mode)
498 return -EROFS; 502 return -EROFS;
499 503
500 dbg_msg("re-size volume %d to from %d to %d PEBs", 504 dbg_gen("re-size volume %d to from %d to %d PEBs",
501 vol_id, vol->reserved_pebs, reserved_pebs); 505 vol_id, vol->reserved_pebs, reserved_pebs);
502 506
503 if (vol->vol_type == UBI_STATIC_VOLUME && 507 if (vol->vol_type == UBI_STATIC_VOLUME &&
@@ -586,8 +590,8 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
586 (long long)vol->used_ebs * vol->usable_leb_size; 590 (long long)vol->used_ebs * vol->usable_leb_size;
587 } 591 }
588 592
589 paranoid_check_volumes(ubi); 593 err = paranoid_check_volumes(ubi);
590 return 0; 594 return err;
591 595
592out_acc: 596out_acc:
593 if (pebs > 0) { 597 if (pebs > 0) {
@@ -602,6 +606,44 @@ out_free:
602} 606}
603 607
604/** 608/**
609 * ubi_rename_volumes - re-name UBI volumes.
610 * @ubi: UBI device description object
611 * @rename_list: list of &struct ubi_rename_entry objects
612 *
613 * This function re-names or removes volumes specified in the re-name list.
614 * Returns zero in case of success and a negative error code in case of
615 * failure.
616 */
617int ubi_rename_volumes(struct ubi_device *ubi, struct list_head *rename_list)
618{
619 int err;
620 struct ubi_rename_entry *re;
621
622 err = ubi_vtbl_rename_volumes(ubi, rename_list);
623 if (err)
624 return err;
625
626 list_for_each_entry(re, rename_list, list) {
627 if (re->remove) {
628 err = ubi_remove_volume(re->desc, 1);
629 if (err)
630 break;
631 } else {
632 struct ubi_volume *vol = re->desc->vol;
633
634 spin_lock(&ubi->volumes_lock);
635 vol->name_len = re->new_name_len;
636 memcpy(vol->name, re->new_name, re->new_name_len + 1);
637 spin_unlock(&ubi->volumes_lock);
638 }
639 }
640
641 if (!err)
642 err = paranoid_check_volumes(ubi);
643 return err;
644}
645
646/**
605 * ubi_add_volume - add volume. 647 * ubi_add_volume - add volume.
606 * @ubi: UBI device description object 648 * @ubi: UBI device description object
607 * @vol: volume description object 649 * @vol: volume description object
@@ -615,8 +657,7 @@ int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol)
615 int err, vol_id = vol->vol_id; 657 int err, vol_id = vol->vol_id;
616 dev_t dev; 658 dev_t dev;
617 659
618 dbg_msg("add volume %d", vol_id); 660 dbg_gen("add volume %d", vol_id);
619 ubi_dbg_dump_vol_info(vol);
620 661
621 /* Register character device for the volume */ 662 /* Register character device for the volume */
622 cdev_init(&vol->cdev, &ubi_vol_cdev_operations); 663 cdev_init(&vol->cdev, &ubi_vol_cdev_operations);
@@ -650,8 +691,8 @@ int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol)
650 return err; 691 return err;
651 } 692 }
652 693
653 paranoid_check_volumes(ubi); 694 err = paranoid_check_volumes(ubi);
654 return 0; 695 return err;
655 696
656out_gluebi: 697out_gluebi:
657 err = ubi_destroy_gluebi(vol); 698 err = ubi_destroy_gluebi(vol);
@@ -672,7 +713,7 @@ void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol)
672{ 713{
673 int err; 714 int err;
674 715
675 dbg_msg("free volume %d", vol->vol_id); 716 dbg_gen("free volume %d", vol->vol_id);
676 717
677 ubi->volumes[vol->vol_id] = NULL; 718 ubi->volumes[vol->vol_id] = NULL;
678 err = ubi_destroy_gluebi(vol); 719 err = ubi_destroy_gluebi(vol);
@@ -686,8 +727,10 @@ void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol)
686 * paranoid_check_volume - check volume information. 727 * paranoid_check_volume - check volume information.
687 * @ubi: UBI device description object 728 * @ubi: UBI device description object
688 * @vol_id: volume ID 729 * @vol_id: volume ID
730 *
731 * Returns zero if volume is all right and a a negative error code if not.
689 */ 732 */
690static void paranoid_check_volume(struct ubi_device *ubi, int vol_id) 733static int paranoid_check_volume(struct ubi_device *ubi, int vol_id)
691{ 734{
692 int idx = vol_id2idx(ubi, vol_id); 735 int idx = vol_id2idx(ubi, vol_id);
693 int reserved_pebs, alignment, data_pad, vol_type, name_len, upd_marker; 736 int reserved_pebs, alignment, data_pad, vol_type, name_len, upd_marker;
@@ -705,16 +748,7 @@ static void paranoid_check_volume(struct ubi_device *ubi, int vol_id)
705 goto fail; 748 goto fail;
706 } 749 }
707 spin_unlock(&ubi->volumes_lock); 750 spin_unlock(&ubi->volumes_lock);
708 return; 751 return 0;
709 }
710
711 if (vol->exclusive) {
712 /*
713 * The volume may be being created at the moment, do not check
714 * it (e.g., it may be in the middle of ubi_create_volume().
715 */
716 spin_unlock(&ubi->volumes_lock);
717 return;
718 } 752 }
719 753
720 if (vol->reserved_pebs < 0 || vol->alignment < 0 || vol->data_pad < 0 || 754 if (vol->reserved_pebs < 0 || vol->alignment < 0 || vol->data_pad < 0 ||
@@ -727,7 +761,7 @@ static void paranoid_check_volume(struct ubi_device *ubi, int vol_id)
727 goto fail; 761 goto fail;
728 } 762 }
729 763
730 n = vol->alignment % ubi->min_io_size; 764 n = vol->alignment & (ubi->min_io_size - 1);
731 if (vol->alignment != 1 && n) { 765 if (vol->alignment != 1 && n) {
732 ubi_err("alignment is not multiple of min I/O unit"); 766 ubi_err("alignment is not multiple of min I/O unit");
733 goto fail; 767 goto fail;
@@ -824,31 +858,39 @@ static void paranoid_check_volume(struct ubi_device *ubi, int vol_id)
824 858
825 if (alignment != vol->alignment || data_pad != vol->data_pad || 859 if (alignment != vol->alignment || data_pad != vol->data_pad ||
826 upd_marker != vol->upd_marker || vol_type != vol->vol_type || 860 upd_marker != vol->upd_marker || vol_type != vol->vol_type ||
827 name_len!= vol->name_len || strncmp(name, vol->name, name_len)) { 861 name_len != vol->name_len || strncmp(name, vol->name, name_len)) {
828 ubi_err("volume info is different"); 862 ubi_err("volume info is different");
829 goto fail; 863 goto fail;
830 } 864 }
831 865
832 spin_unlock(&ubi->volumes_lock); 866 spin_unlock(&ubi->volumes_lock);
833 return; 867 return 0;
834 868
835fail: 869fail:
836 ubi_err("paranoid check failed for volume %d", vol_id); 870 ubi_err("paranoid check failed for volume %d", vol_id);
837 ubi_dbg_dump_vol_info(vol); 871 if (vol)
872 ubi_dbg_dump_vol_info(vol);
838 ubi_dbg_dump_vtbl_record(&ubi->vtbl[vol_id], vol_id); 873 ubi_dbg_dump_vtbl_record(&ubi->vtbl[vol_id], vol_id);
839 spin_unlock(&ubi->volumes_lock); 874 spin_unlock(&ubi->volumes_lock);
840 BUG(); 875 return -EINVAL;
841} 876}
842 877
843/** 878/**
844 * paranoid_check_volumes - check information about all volumes. 879 * paranoid_check_volumes - check information about all volumes.
845 * @ubi: UBI device description object 880 * @ubi: UBI device description object
881 *
882 * Returns zero if volumes are all right and a a negative error code if not.
846 */ 883 */
847static void paranoid_check_volumes(struct ubi_device *ubi) 884static int paranoid_check_volumes(struct ubi_device *ubi)
848{ 885{
849 int i; 886 int i, err = 0;
850 887
851 for (i = 0; i < ubi->vtbl_slots; i++) 888 for (i = 0; i < ubi->vtbl_slots; i++) {
852 paranoid_check_volume(ubi, i); 889 err = paranoid_check_volume(ubi, i);
890 if (err)
891 break;
892 }
893
894 return err;
853} 895}
854#endif 896#endif
diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
index af36b12be278..217d0e111b2a 100644
--- a/drivers/mtd/ubi/vtbl.c
+++ b/drivers/mtd/ubi/vtbl.c
@@ -115,8 +115,58 @@ int ubi_change_vtbl_record(struct ubi_device *ubi, int idx,
115} 115}
116 116
117/** 117/**
118 * vtbl_check - check if volume table is not corrupted and contains sensible 118 * ubi_vtbl_rename_volumes - rename UBI volumes in the volume table.
119 * data. 119 * @ubi: UBI device description object
120 * @rename_list: list of &struct ubi_rename_entry objects
121 *
122 * This function re-names multiple volumes specified in @req in the volume
123 * table. Returns zero in case of success and a negative error code in case of
124 * failure.
125 */
126int ubi_vtbl_rename_volumes(struct ubi_device *ubi,
127 struct list_head *rename_list)
128{
129 int i, err;
130 struct ubi_rename_entry *re;
131 struct ubi_volume *layout_vol;
132
133 list_for_each_entry(re, rename_list, list) {
134 uint32_t crc;
135 struct ubi_volume *vol = re->desc->vol;
136 struct ubi_vtbl_record *vtbl_rec = &ubi->vtbl[vol->vol_id];
137
138 if (re->remove) {
139 memcpy(vtbl_rec, &empty_vtbl_record,
140 sizeof(struct ubi_vtbl_record));
141 continue;
142 }
143
144 vtbl_rec->name_len = cpu_to_be16(re->new_name_len);
145 memcpy(vtbl_rec->name, re->new_name, re->new_name_len);
146 memset(vtbl_rec->name + re->new_name_len, 0,
147 UBI_VOL_NAME_MAX + 1 - re->new_name_len);
148 crc = crc32(UBI_CRC32_INIT, vtbl_rec,
149 UBI_VTBL_RECORD_SIZE_CRC);
150 vtbl_rec->crc = cpu_to_be32(crc);
151 }
152
153 layout_vol = ubi->volumes[vol_id2idx(ubi, UBI_LAYOUT_VOLUME_ID)];
154 for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) {
155 err = ubi_eba_unmap_leb(ubi, layout_vol, i);
156 if (err)
157 return err;
158
159 err = ubi_eba_write_leb(ubi, layout_vol, i, ubi->vtbl, 0,
160 ubi->vtbl_size, UBI_LONGTERM);
161 if (err)
162 return err;
163 }
164
165 return 0;
166}
167
168/**
169 * vtbl_check - check if volume table is not corrupted and sensible.
120 * @ubi: UBI device description object 170 * @ubi: UBI device description object
121 * @vtbl: volume table 171 * @vtbl: volume table
122 * 172 *
@@ -127,7 +177,7 @@ static int vtbl_check(const struct ubi_device *ubi,
127 const struct ubi_vtbl_record *vtbl) 177 const struct ubi_vtbl_record *vtbl)
128{ 178{
129 int i, n, reserved_pebs, alignment, data_pad, vol_type, name_len; 179 int i, n, reserved_pebs, alignment, data_pad, vol_type, name_len;
130 int upd_marker; 180 int upd_marker, err;
131 uint32_t crc; 181 uint32_t crc;
132 const char *name; 182 const char *name;
133 183
@@ -153,7 +203,7 @@ static int vtbl_check(const struct ubi_device *ubi,
153 if (reserved_pebs == 0) { 203 if (reserved_pebs == 0) {
154 if (memcmp(&vtbl[i], &empty_vtbl_record, 204 if (memcmp(&vtbl[i], &empty_vtbl_record,
155 UBI_VTBL_RECORD_SIZE)) { 205 UBI_VTBL_RECORD_SIZE)) {
156 dbg_err("bad empty record"); 206 err = 2;
157 goto bad; 207 goto bad;
158 } 208 }
159 continue; 209 continue;
@@ -161,56 +211,57 @@ static int vtbl_check(const struct ubi_device *ubi,
161 211
162 if (reserved_pebs < 0 || alignment < 0 || data_pad < 0 || 212 if (reserved_pebs < 0 || alignment < 0 || data_pad < 0 ||
163 name_len < 0) { 213 name_len < 0) {
164 dbg_err("negative values"); 214 err = 3;
165 goto bad; 215 goto bad;
166 } 216 }
167 217
168 if (alignment > ubi->leb_size || alignment == 0) { 218 if (alignment > ubi->leb_size || alignment == 0) {
169 dbg_err("bad alignment"); 219 err = 4;
170 goto bad; 220 goto bad;
171 } 221 }
172 222
173 n = alignment % ubi->min_io_size; 223 n = alignment & (ubi->min_io_size - 1);
174 if (alignment != 1 && n) { 224 if (alignment != 1 && n) {
175 dbg_err("alignment is not multiple of min I/O unit"); 225 err = 5;
176 goto bad; 226 goto bad;
177 } 227 }
178 228
179 n = ubi->leb_size % alignment; 229 n = ubi->leb_size % alignment;
180 if (data_pad != n) { 230 if (data_pad != n) {
181 dbg_err("bad data_pad, has to be %d", n); 231 dbg_err("bad data_pad, has to be %d", n);
232 err = 6;
182 goto bad; 233 goto bad;
183 } 234 }
184 235
185 if (vol_type != UBI_VID_DYNAMIC && vol_type != UBI_VID_STATIC) { 236 if (vol_type != UBI_VID_DYNAMIC && vol_type != UBI_VID_STATIC) {
186 dbg_err("bad vol_type"); 237 err = 7;
187 goto bad; 238 goto bad;
188 } 239 }
189 240
190 if (upd_marker != 0 && upd_marker != 1) { 241 if (upd_marker != 0 && upd_marker != 1) {
191 dbg_err("bad upd_marker"); 242 err = 8;
192 goto bad; 243 goto bad;
193 } 244 }
194 245
195 if (reserved_pebs > ubi->good_peb_count) { 246 if (reserved_pebs > ubi->good_peb_count) {
196 dbg_err("too large reserved_pebs, good PEBs %d", 247 dbg_err("too large reserved_pebs, good PEBs %d",
197 ubi->good_peb_count); 248 ubi->good_peb_count);
249 err = 9;
198 goto bad; 250 goto bad;
199 } 251 }
200 252
201 if (name_len > UBI_VOL_NAME_MAX) { 253 if (name_len > UBI_VOL_NAME_MAX) {
202 dbg_err("too long volume name, max %d", 254 err = 10;
203 UBI_VOL_NAME_MAX);
204 goto bad; 255 goto bad;
205 } 256 }
206 257
207 if (name[0] == '\0') { 258 if (name[0] == '\0') {
208 dbg_err("NULL volume name"); 259 err = 11;
209 goto bad; 260 goto bad;
210 } 261 }
211 262
212 if (name_len != strnlen(name, name_len + 1)) { 263 if (name_len != strnlen(name, name_len + 1)) {
213 dbg_err("bad name_len"); 264 err = 12;
214 goto bad; 265 goto bad;
215 } 266 }
216 } 267 }
@@ -235,7 +286,7 @@ static int vtbl_check(const struct ubi_device *ubi,
235 return 0; 286 return 0;
236 287
237bad: 288bad:
238 ubi_err("volume table check failed, record %d", i); 289 ubi_err("volume table check failed: record %d, error %d", i, err);
239 ubi_dbg_dump_vtbl_record(&vtbl[i], i); 290 ubi_dbg_dump_vtbl_record(&vtbl[i], i);
240 return -EINVAL; 291 return -EINVAL;
241} 292}
@@ -287,7 +338,6 @@ retry:
287 vid_hdr->data_pad = cpu_to_be32(0); 338 vid_hdr->data_pad = cpu_to_be32(0);
288 vid_hdr->lnum = cpu_to_be32(copy); 339 vid_hdr->lnum = cpu_to_be32(copy);
289 vid_hdr->sqnum = cpu_to_be64(++si->max_sqnum); 340 vid_hdr->sqnum = cpu_to_be64(++si->max_sqnum);
290 vid_hdr->leb_ver = cpu_to_be32(old_seb ? old_seb->leb_ver + 1: 0);
291 341
292 /* The EC header is already there, write the VID header */ 342 /* The EC header is already there, write the VID header */
293 err = ubi_io_write_vid_hdr(ubi, new_seb->pnum, vid_hdr); 343 err = ubi_io_write_vid_hdr(ubi, new_seb->pnum, vid_hdr);
@@ -370,7 +420,7 @@ static struct ubi_vtbl_record *process_lvol(struct ubi_device *ubi,
370 * to LEB 0. 420 * to LEB 0.
371 */ 421 */
372 422
373 dbg_msg("check layout volume"); 423 dbg_gen("check layout volume");
374 424
375 /* Read both LEB 0 and LEB 1 into memory */ 425 /* Read both LEB 0 and LEB 1 into memory */
376 ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) { 426 ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) {
@@ -384,7 +434,16 @@ static struct ubi_vtbl_record *process_lvol(struct ubi_device *ubi,
384 err = ubi_io_read_data(ubi, leb[seb->lnum], seb->pnum, 0, 434 err = ubi_io_read_data(ubi, leb[seb->lnum], seb->pnum, 0,
385 ubi->vtbl_size); 435 ubi->vtbl_size);
386 if (err == UBI_IO_BITFLIPS || err == -EBADMSG) 436 if (err == UBI_IO_BITFLIPS || err == -EBADMSG)
387 /* Scrub the PEB later */ 437 /*
438 * Scrub the PEB later. Note, -EBADMSG indicates an
439 * uncorrectable ECC error, but we have our own CRC and
440 * the data will be checked later. If the data is OK,
441 * the PEB will be scrubbed (because we set
442 * seb->scrub). If the data is not OK, the contents of
443 * the PEB will be recovered from the second copy, and
444 * seb->scrub will be cleared in
445 * 'ubi_scan_add_used()'.
446 */
388 seb->scrub = 1; 447 seb->scrub = 1;
389 else if (err) 448 else if (err)
390 goto out_free; 449 goto out_free;
@@ -400,7 +459,8 @@ static struct ubi_vtbl_record *process_lvol(struct ubi_device *ubi,
400 if (!leb_corrupted[0]) { 459 if (!leb_corrupted[0]) {
401 /* LEB 0 is OK */ 460 /* LEB 0 is OK */
402 if (leb[1]) 461 if (leb[1])
403 leb_corrupted[1] = memcmp(leb[0], leb[1], ubi->vtbl_size); 462 leb_corrupted[1] = memcmp(leb[0], leb[1],
463 ubi->vtbl_size);
404 if (leb_corrupted[1]) { 464 if (leb_corrupted[1]) {
405 ubi_warn("volume table copy #2 is corrupted"); 465 ubi_warn("volume table copy #2 is corrupted");
406 err = create_vtbl(ubi, si, 1, leb[0]); 466 err = create_vtbl(ubi, si, 1, leb[0]);
@@ -620,30 +680,32 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si,
620static int check_sv(const struct ubi_volume *vol, 680static int check_sv(const struct ubi_volume *vol,
621 const struct ubi_scan_volume *sv) 681 const struct ubi_scan_volume *sv)
622{ 682{
683 int err;
684
623 if (sv->highest_lnum >= vol->reserved_pebs) { 685 if (sv->highest_lnum >= vol->reserved_pebs) {
624 dbg_err("bad highest_lnum"); 686 err = 1;
625 goto bad; 687 goto bad;
626 } 688 }
627 if (sv->leb_count > vol->reserved_pebs) { 689 if (sv->leb_count > vol->reserved_pebs) {
628 dbg_err("bad leb_count"); 690 err = 2;
629 goto bad; 691 goto bad;
630 } 692 }
631 if (sv->vol_type != vol->vol_type) { 693 if (sv->vol_type != vol->vol_type) {
632 dbg_err("bad vol_type"); 694 err = 3;
633 goto bad; 695 goto bad;
634 } 696 }
635 if (sv->used_ebs > vol->reserved_pebs) { 697 if (sv->used_ebs > vol->reserved_pebs) {
636 dbg_err("bad used_ebs"); 698 err = 4;
637 goto bad; 699 goto bad;
638 } 700 }
639 if (sv->data_pad != vol->data_pad) { 701 if (sv->data_pad != vol->data_pad) {
640 dbg_err("bad data_pad"); 702 err = 5;
641 goto bad; 703 goto bad;
642 } 704 }
643 return 0; 705 return 0;
644 706
645bad: 707bad:
646 ubi_err("bad scanning information"); 708 ubi_err("bad scanning information, error %d", err);
647 ubi_dbg_dump_sv(sv); 709 ubi_dbg_dump_sv(sv);
648 ubi_dbg_dump_vol_info(vol); 710 ubi_dbg_dump_vol_info(vol);
649 return -EINVAL; 711 return -EINVAL;
@@ -672,14 +734,13 @@ static int check_scanning_info(const struct ubi_device *ubi,
672 return -EINVAL; 734 return -EINVAL;
673 } 735 }
674 736
675 if (si->highest_vol_id >= ubi->vtbl_slots + UBI_INT_VOL_COUNT&& 737 if (si->highest_vol_id >= ubi->vtbl_slots + UBI_INT_VOL_COUNT &&
676 si->highest_vol_id < UBI_INTERNAL_VOL_START) { 738 si->highest_vol_id < UBI_INTERNAL_VOL_START) {
677 ubi_err("too large volume ID %d found by scanning", 739 ubi_err("too large volume ID %d found by scanning",
678 si->highest_vol_id); 740 si->highest_vol_id);
679 return -EINVAL; 741 return -EINVAL;
680 } 742 }
681 743
682
683 for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) { 744 for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) {
684 cond_resched(); 745 cond_resched();
685 746
@@ -717,8 +778,7 @@ static int check_scanning_info(const struct ubi_device *ubi,
717} 778}
718 779
719/** 780/**
720 * ubi_read_volume_table - read volume table. 781 * ubi_read_volume_table - read the volume table.
721 * information.
722 * @ubi: UBI device description object 782 * @ubi: UBI device description object
723 * @si: scanning information 783 * @si: scanning information
724 * 784 *
@@ -797,11 +857,10 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si)
797 857
798out_free: 858out_free:
799 vfree(ubi->vtbl); 859 vfree(ubi->vtbl);
800 for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) 860 for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) {
801 if (ubi->volumes[i]) { 861 kfree(ubi->volumes[i]);
802 kfree(ubi->volumes[i]); 862 ubi->volumes[i] = NULL;
803 ubi->volumes[i] = NULL; 863 }
804 }
805 return err; 864 return err;
806} 865}
807 866
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index a471a491f0ab..05d70937b543 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -19,22 +19,22 @@
19 */ 19 */
20 20
21/* 21/*
22 * UBI wear-leveling unit. 22 * UBI wear-leveling sub-system.
23 * 23 *
24 * This unit is responsible for wear-leveling. It works in terms of physical 24 * This sub-system is responsible for wear-leveling. It works in terms of
25 * eraseblocks and erase counters and knows nothing about logical eraseblocks, 25 * physical* eraseblocks and erase counters and knows nothing about logical
26 * volumes, etc. From this unit's perspective all physical eraseblocks are of 26 * eraseblocks, volumes, etc. From this sub-system's perspective all physical
27 * two types - used and free. Used physical eraseblocks are those that were 27 * eraseblocks are of two types - used and free. Used physical eraseblocks are
28 * "get" by the 'ubi_wl_get_peb()' function, and free physical eraseblocks are 28 * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical
29 * those that were put by the 'ubi_wl_put_peb()' function. 29 * eraseblocks are those that were put by the 'ubi_wl_put_peb()' function.
30 * 30 *
31 * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter 31 * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter
32 * header. The rest of the physical eraseblock contains only 0xFF bytes. 32 * header. The rest of the physical eraseblock contains only %0xFF bytes.
33 * 33 *
34 * When physical eraseblocks are returned to the WL unit by means of the 34 * When physical eraseblocks are returned to the WL sub-system by means of the
35 * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is 35 * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is
36 * done asynchronously in context of the per-UBI device background thread, 36 * done asynchronously in context of the per-UBI device background thread,
37 * which is also managed by the WL unit. 37 * which is also managed by the WL sub-system.
38 * 38 *
39 * The wear-leveling is ensured by means of moving the contents of used 39 * The wear-leveling is ensured by means of moving the contents of used
40 * physical eraseblocks with low erase counter to free physical eraseblocks 40 * physical eraseblocks with low erase counter to free physical eraseblocks
@@ -43,34 +43,36 @@
43 * The 'ubi_wl_get_peb()' function accepts data type hints which help to pick 43 * The 'ubi_wl_get_peb()' function accepts data type hints which help to pick
44 * an "optimal" physical eraseblock. For example, when it is known that the 44 * an "optimal" physical eraseblock. For example, when it is known that the
45 * physical eraseblock will be "put" soon because it contains short-term data, 45 * physical eraseblock will be "put" soon because it contains short-term data,
46 * the WL unit may pick a free physical eraseblock with low erase counter, and 46 * the WL sub-system may pick a free physical eraseblock with low erase
47 * so forth. 47 * counter, and so forth.
48 * 48 *
49 * If the WL unit fails to erase a physical eraseblock, it marks it as bad. 49 * If the WL sub-system fails to erase a physical eraseblock, it marks it as
50 * bad.
50 * 51 *
51 * This unit is also responsible for scrubbing. If a bit-flip is detected in a 52 * This sub-system is also responsible for scrubbing. If a bit-flip is detected
52 * physical eraseblock, it has to be moved. Technically this is the same as 53 * in a physical eraseblock, it has to be moved. Technically this is the same
53 * moving it for wear-leveling reasons. 54 * as moving it for wear-leveling reasons.
54 * 55 *
55 * As it was said, for the UBI unit all physical eraseblocks are either "free" 56 * As it was said, for the UBI sub-system all physical eraseblocks are either
56 * or "used". Free eraseblock are kept in the @wl->free RB-tree, while used 57 * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while
57 * eraseblocks are kept in a set of different RB-trees: @wl->used, 58 * used eraseblocks are kept in a set of different RB-trees: @wl->used,
58 * @wl->prot.pnum, @wl->prot.aec, and @wl->scrub. 59 * @wl->prot.pnum, @wl->prot.aec, and @wl->scrub.
59 * 60 *
60 * Note, in this implementation, we keep a small in-RAM object for each physical 61 * Note, in this implementation, we keep a small in-RAM object for each physical
61 * eraseblock. This is surely not a scalable solution. But it appears to be good 62 * eraseblock. This is surely not a scalable solution. But it appears to be good
62 * enough for moderately large flashes and it is simple. In future, one may 63 * enough for moderately large flashes and it is simple. In future, one may
63 * re-work this unit and make it more scalable. 64 * re-work this sub-system and make it more scalable.
64 * 65 *
65 * At the moment this unit does not utilize the sequence number, which was 66 * At the moment this sub-system does not utilize the sequence number, which
66 * introduced relatively recently. But it would be wise to do this because the 67 * was introduced relatively recently. But it would be wise to do this because
67 * sequence number of a logical eraseblock characterizes how old is it. For 68 * the sequence number of a logical eraseblock characterizes how old is it. For
68 * example, when we move a PEB with low erase counter, and we need to pick the 69 * example, when we move a PEB with low erase counter, and we need to pick the
69 * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we 70 * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we
70 * pick target PEB with an average EC if our PEB is not very "old". This is a 71 * pick target PEB with an average EC if our PEB is not very "old". This is a
71 * room for future re-works of the WL unit. 72 * room for future re-works of the WL sub-system.
72 * 73 *
73 * FIXME: looks too complex, should be simplified (later). 74 * Note: the stuff with protection trees looks too complex and is difficult to
75 * understand. Should be fixed.
74 */ 76 */
75 77
76#include <linux/slab.h> 78#include <linux/slab.h>
@@ -92,20 +94,21 @@
92 94
93/* 95/*
94 * Maximum difference between two erase counters. If this threshold is 96 * Maximum difference between two erase counters. If this threshold is
95 * exceeded, the WL unit starts moving data from used physical eraseblocks with 97 * exceeded, the WL sub-system starts moving data from used physical
96 * low erase counter to free physical eraseblocks with high erase counter. 98 * eraseblocks with low erase counter to free physical eraseblocks with high
99 * erase counter.
97 */ 100 */
98#define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD 101#define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD
99 102
100/* 103/*
101 * When a physical eraseblock is moved, the WL unit has to pick the target 104 * When a physical eraseblock is moved, the WL sub-system has to pick the target
102 * physical eraseblock to move to. The simplest way would be just to pick the 105 * physical eraseblock to move to. The simplest way would be just to pick the
103 * one with the highest erase counter. But in certain workloads this could lead 106 * one with the highest erase counter. But in certain workloads this could lead
104 * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a 107 * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a
105 * situation when the picked physical eraseblock is constantly erased after the 108 * situation when the picked physical eraseblock is constantly erased after the
106 * data is written to it. So, we have a constant which limits the highest erase 109 * data is written to it. So, we have a constant which limits the highest erase
107 * counter of the free physical eraseblock to pick. Namely, the WL unit does 110 * counter of the free physical eraseblock to pick. Namely, the WL sub-system
108 * not pick eraseblocks with erase counter greater then the lowest erase 111 * does not pick eraseblocks with erase counter greater then the lowest erase
109 * counter plus %WL_FREE_MAX_DIFF. 112 * counter plus %WL_FREE_MAX_DIFF.
110 */ 113 */
111#define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD) 114#define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD)
@@ -123,11 +126,11 @@
123 * @abs_ec: the absolute erase counter value when the protection ends 126 * @abs_ec: the absolute erase counter value when the protection ends
124 * @e: the wear-leveling entry of the physical eraseblock under protection 127 * @e: the wear-leveling entry of the physical eraseblock under protection
125 * 128 *
126 * When the WL unit returns a physical eraseblock, the physical eraseblock is 129 * When the WL sub-system returns a physical eraseblock, the physical
127 * protected from being moved for some "time". For this reason, the physical 130 * eraseblock is protected from being moved for some "time". For this reason,
128 * eraseblock is not directly moved from the @wl->free tree to the @wl->used 131 * the physical eraseblock is not directly moved from the @wl->free tree to the
129 * tree. There is one more tree in between where this physical eraseblock is 132 * @wl->used tree. There is one more tree in between where this physical
130 * temporarily stored (@wl->prot). 133 * eraseblock is temporarily stored (@wl->prot).
131 * 134 *
132 * All this protection stuff is needed because: 135 * All this protection stuff is needed because:
133 * o we don't want to move physical eraseblocks just after we have given them 136 * o we don't want to move physical eraseblocks just after we have given them
@@ -175,7 +178,6 @@ struct ubi_wl_prot_entry {
175 * @list: a link in the list of pending works 178 * @list: a link in the list of pending works
176 * @func: worker function 179 * @func: worker function
177 * @priv: private data of the worker function 180 * @priv: private data of the worker function
178 *
179 * @e: physical eraseblock to erase 181 * @e: physical eraseblock to erase
180 * @torture: if the physical eraseblock has to be tortured 182 * @torture: if the physical eraseblock has to be tortured
181 * 183 *
@@ -473,52 +475,47 @@ retry:
473 } 475 }
474 476
475 switch (dtype) { 477 switch (dtype) {
476 case UBI_LONGTERM: 478 case UBI_LONGTERM:
477 /* 479 /*
478 * For long term data we pick a physical eraseblock 480 * For long term data we pick a physical eraseblock with high
479 * with high erase counter. But the highest erase 481 * erase counter. But the highest erase counter we can pick is
480 * counter we can pick is bounded by the the lowest 482 * bounded by the the lowest erase counter plus
481 * erase counter plus %WL_FREE_MAX_DIFF. 483 * %WL_FREE_MAX_DIFF.
482 */ 484 */
483 e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); 485 e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
484 protect = LT_PROTECTION; 486 protect = LT_PROTECTION;
485 break; 487 break;
486 case UBI_UNKNOWN: 488 case UBI_UNKNOWN:
487 /* 489 /*
488 * For unknown data we pick a physical eraseblock with 490 * For unknown data we pick a physical eraseblock with medium
489 * medium erase counter. But we by no means can pick a 491 * erase counter. But we by no means can pick a physical
490 * physical eraseblock with erase counter greater or 492 * eraseblock with erase counter greater or equivalent than the
491 * equivalent than the lowest erase counter plus 493 * lowest erase counter plus %WL_FREE_MAX_DIFF.
492 * %WL_FREE_MAX_DIFF. 494 */
493 */ 495 first = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, rb);
494 first = rb_entry(rb_first(&ubi->free), 496 last = rb_entry(rb_last(&ubi->free), struct ubi_wl_entry, rb);
495 struct ubi_wl_entry, rb);
496 last = rb_entry(rb_last(&ubi->free),
497 struct ubi_wl_entry, rb);
498 497
499 if (last->ec - first->ec < WL_FREE_MAX_DIFF) 498 if (last->ec - first->ec < WL_FREE_MAX_DIFF)
500 e = rb_entry(ubi->free.rb_node, 499 e = rb_entry(ubi->free.rb_node,
501 struct ubi_wl_entry, rb); 500 struct ubi_wl_entry, rb);
502 else { 501 else {
503 medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2; 502 medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2;
504 e = find_wl_entry(&ubi->free, medium_ec); 503 e = find_wl_entry(&ubi->free, medium_ec);
505 } 504 }
506 protect = U_PROTECTION; 505 protect = U_PROTECTION;
507 break; 506 break;
508 case UBI_SHORTTERM: 507 case UBI_SHORTTERM:
509 /* 508 /*
510 * For short term data we pick a physical eraseblock 509 * For short term data we pick a physical eraseblock with the
511 * with the lowest erase counter as we expect it will 510 * lowest erase counter as we expect it will be erased soon.
512 * be erased soon. 511 */
513 */ 512 e = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, rb);
514 e = rb_entry(rb_first(&ubi->free), 513 protect = ST_PROTECTION;
515 struct ubi_wl_entry, rb); 514 break;
516 protect = ST_PROTECTION; 515 default:
517 break; 516 protect = 0;
518 default: 517 e = NULL;
519 protect = 0; 518 BUG();
520 e = NULL;
521 BUG();
522 } 519 }
523 520
524 /* 521 /*
@@ -582,7 +579,8 @@ found:
582 * This function returns zero in case of success and a negative error code in 579 * This function returns zero in case of success and a negative error code in
583 * case of failure. 580 * case of failure.
584 */ 581 */
585static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, int torture) 582static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
583 int torture)
586{ 584{
587 int err; 585 int err;
588 struct ubi_ec_hdr *ec_hdr; 586 struct ubi_ec_hdr *ec_hdr;
@@ -634,8 +632,7 @@ out_free:
634} 632}
635 633
636/** 634/**
637 * check_protection_over - check if it is time to stop protecting some 635 * check_protection_over - check if it is time to stop protecting some PEBs.
638 * physical eraseblocks.
639 * @ubi: UBI device description object 636 * @ubi: UBI device description object
640 * 637 *
641 * This function is called after each erase operation, when the absolute erase 638 * This function is called after each erase operation, when the absolute erase
@@ -871,6 +868,10 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
871 } 868 }
872 869
873 ubi_free_vid_hdr(ubi, vid_hdr); 870 ubi_free_vid_hdr(ubi, vid_hdr);
871 if (scrubbing && !protect)
872 ubi_msg("scrubbed PEB %d, data moved to PEB %d",
873 e1->pnum, e2->pnum);
874
874 spin_lock(&ubi->wl_lock); 875 spin_lock(&ubi->wl_lock);
875 if (protect) 876 if (protect)
876 prot_tree_add(ubi, e1, pe, protect); 877 prot_tree_add(ubi, e1, pe, protect);
@@ -1054,8 +1055,8 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
1054 spin_unlock(&ubi->wl_lock); 1055 spin_unlock(&ubi->wl_lock);
1055 1056
1056 /* 1057 /*
1057 * One more erase operation has happened, take care about protected 1058 * One more erase operation has happened, take care about
1058 * physical eraseblocks. 1059 * protected physical eraseblocks.
1059 */ 1060 */
1060 check_protection_over(ubi); 1061 check_protection_over(ubi);
1061 1062
@@ -1136,7 +1137,7 @@ out_ro:
1136} 1137}
1137 1138
1138/** 1139/**
1139 * ubi_wl_put_peb - return a physical eraseblock to the wear-leveling unit. 1140 * ubi_wl_put_peb - return a PEB to the wear-leveling sub-system.
1140 * @ubi: UBI device description object 1141 * @ubi: UBI device description object
1141 * @pnum: physical eraseblock to return 1142 * @pnum: physical eraseblock to return
1142 * @torture: if this physical eraseblock has to be tortured 1143 * @torture: if this physical eraseblock has to be tortured
@@ -1175,11 +1176,11 @@ retry:
1175 /* 1176 /*
1176 * User is putting the physical eraseblock which was selected 1177 * User is putting the physical eraseblock which was selected
1177 * as the target the data is moved to. It may happen if the EBA 1178 * as the target the data is moved to. It may happen if the EBA
1178 * unit already re-mapped the LEB in 'ubi_eba_copy_leb()' but 1179 * sub-system already re-mapped the LEB in 'ubi_eba_copy_leb()'
1179 * the WL unit has not put the PEB to the "used" tree yet, but 1180 * but the WL sub-system has not put the PEB to the "used" tree
1180 * it is about to do this. So we just set a flag which will 1181 * yet, but it is about to do this. So we just set a flag which
1181 * tell the WL worker that the PEB is not needed anymore and 1182 * will tell the WL worker that the PEB is not needed anymore
1182 * should be scheduled for erasure. 1183 * and should be scheduled for erasure.
1183 */ 1184 */
1184 dbg_wl("PEB %d is the target of data moving", pnum); 1185 dbg_wl("PEB %d is the target of data moving", pnum);
1185 ubi_assert(!ubi->move_to_put); 1186 ubi_assert(!ubi->move_to_put);
@@ -1229,7 +1230,7 @@ int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum)
1229{ 1230{
1230 struct ubi_wl_entry *e; 1231 struct ubi_wl_entry *e;
1231 1232
1232 ubi_msg("schedule PEB %d for scrubbing", pnum); 1233 dbg_msg("schedule PEB %d for scrubbing", pnum);
1233 1234
1234retry: 1235retry:
1235 spin_lock(&ubi->wl_lock); 1236 spin_lock(&ubi->wl_lock);
@@ -1368,7 +1369,7 @@ int ubi_thread(void *u)
1368 int err; 1369 int err;
1369 1370
1370 if (kthread_should_stop()) 1371 if (kthread_should_stop())
1371 goto out; 1372 break;
1372 1373
1373 if (try_to_freeze()) 1374 if (try_to_freeze())
1374 continue; 1375 continue;
@@ -1403,7 +1404,6 @@ int ubi_thread(void *u)
1403 cond_resched(); 1404 cond_resched();
1404 } 1405 }
1405 1406
1406out:
1407 dbg_wl("background thread \"%s\" is killed", ubi->bgt_name); 1407 dbg_wl("background thread \"%s\" is killed", ubi->bgt_name);
1408 return 0; 1408 return 0;
1409} 1409}
@@ -1426,8 +1426,7 @@ static void cancel_pending(struct ubi_device *ubi)
1426} 1426}
1427 1427
1428/** 1428/**
1429 * ubi_wl_init_scan - initialize the wear-leveling unit using scanning 1429 * ubi_wl_init_scan - initialize the WL sub-system using scanning information.
1430 * information.
1431 * @ubi: UBI device description object 1430 * @ubi: UBI device description object
1432 * @si: scanning information 1431 * @si: scanning information
1433 * 1432 *
@@ -1584,13 +1583,12 @@ static void protection_trees_destroy(struct ubi_device *ubi)
1584} 1583}
1585 1584
1586/** 1585/**
1587 * ubi_wl_close - close the wear-leveling unit. 1586 * ubi_wl_close - close the wear-leveling sub-system.
1588 * @ubi: UBI device description object 1587 * @ubi: UBI device description object
1589 */ 1588 */
1590void ubi_wl_close(struct ubi_device *ubi) 1589void ubi_wl_close(struct ubi_device *ubi)
1591{ 1590{
1592 dbg_wl("close the UBI wear-leveling unit"); 1591 dbg_wl("close the WL sub-system");
1593
1594 cancel_pending(ubi); 1592 cancel_pending(ubi);
1595 protection_trees_destroy(ubi); 1593 protection_trees_destroy(ubi);
1596 tree_destroy(&ubi->used); 1594 tree_destroy(&ubi->used);
@@ -1602,8 +1600,7 @@ void ubi_wl_close(struct ubi_device *ubi)
1602#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID 1600#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
1603 1601
1604/** 1602/**
1605 * paranoid_check_ec - make sure that the erase counter of a physical eraseblock 1603 * paranoid_check_ec - make sure that the erase counter of a PEB is correct.
1606 * is correct.
1607 * @ubi: UBI device description object 1604 * @ubi: UBI device description object
1608 * @pnum: the physical eraseblock number to check 1605 * @pnum: the physical eraseblock number to check
1609 * @ec: the erase counter to check 1606 * @ec: the erase counter to check
@@ -1644,13 +1641,12 @@ out_free:
1644} 1641}
1645 1642
1646/** 1643/**
1647 * paranoid_check_in_wl_tree - make sure that a wear-leveling entry is present 1644 * paranoid_check_in_wl_tree - check that wear-leveling entry is in WL RB-tree.
1648 * in a WL RB-tree.
1649 * @e: the wear-leveling entry to check 1645 * @e: the wear-leveling entry to check
1650 * @root: the root of the tree 1646 * @root: the root of the tree
1651 * 1647 *
1652 * This function returns zero if @e is in the @root RB-tree and %1 if it 1648 * This function returns zero if @e is in the @root RB-tree and %1 if it is
1653 * is not. 1649 * not.
1654 */ 1650 */
1655static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, 1651static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e,
1656 struct rb_root *root) 1652 struct rb_root *root)
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c
index 00527805e4f1..e5a6e2e84540 100644
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -33,6 +33,7 @@
33*/ 33*/
34 34
35#include <linux/module.h> 35#include <linux/module.h>
36#include <linux/moduleparam.h>
36#include <linux/types.h> 37#include <linux/types.h>
37#include <linux/errno.h> 38#include <linux/errno.h>
38#include <linux/ioport.h> 39#include <linux/ioport.h>
@@ -52,7 +53,9 @@
52#include <asm/hvcall.h> 53#include <asm/hvcall.h>
53#include <asm/atomic.h> 54#include <asm/atomic.h>
54#include <asm/vio.h> 55#include <asm/vio.h>
56#include <asm/iommu.h>
55#include <asm/uaccess.h> 57#include <asm/uaccess.h>
58#include <asm/firmware.h>
56#include <linux/seq_file.h> 59#include <linux/seq_file.h>
57 60
58#include "ibmveth.h" 61#include "ibmveth.h"
@@ -94,8 +97,10 @@ static void ibmveth_proc_register_adapter(struct ibmveth_adapter *adapter);
94static void ibmveth_proc_unregister_adapter(struct ibmveth_adapter *adapter); 97static void ibmveth_proc_unregister_adapter(struct ibmveth_adapter *adapter);
95static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance); 98static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance);
96static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter); 99static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter);
100static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev);
97static struct kobj_type ktype_veth_pool; 101static struct kobj_type ktype_veth_pool;
98 102
103
99#ifdef CONFIG_PROC_FS 104#ifdef CONFIG_PROC_FS
100#define IBMVETH_PROC_DIR "ibmveth" 105#define IBMVETH_PROC_DIR "ibmveth"
101static struct proc_dir_entry *ibmveth_proc_dir; 106static struct proc_dir_entry *ibmveth_proc_dir;
@@ -226,16 +231,16 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc
226 u32 i; 231 u32 i;
227 u32 count = pool->size - atomic_read(&pool->available); 232 u32 count = pool->size - atomic_read(&pool->available);
228 u32 buffers_added = 0; 233 u32 buffers_added = 0;
234 struct sk_buff *skb;
235 unsigned int free_index, index;
236 u64 correlator;
237 unsigned long lpar_rc;
238 dma_addr_t dma_addr;
229 239
230 mb(); 240 mb();
231 241
232 for(i = 0; i < count; ++i) { 242 for(i = 0; i < count; ++i) {
233 struct sk_buff *skb;
234 unsigned int free_index, index;
235 u64 correlator;
236 union ibmveth_buf_desc desc; 243 union ibmveth_buf_desc desc;
237 unsigned long lpar_rc;
238 dma_addr_t dma_addr;
239 244
240 skb = alloc_skb(pool->buff_size, GFP_ATOMIC); 245 skb = alloc_skb(pool->buff_size, GFP_ATOMIC);
241 246
@@ -255,6 +260,9 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc
255 dma_addr = dma_map_single(&adapter->vdev->dev, skb->data, 260 dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
256 pool->buff_size, DMA_FROM_DEVICE); 261 pool->buff_size, DMA_FROM_DEVICE);
257 262
263 if (dma_mapping_error(dma_addr))
264 goto failure;
265
258 pool->free_map[free_index] = IBM_VETH_INVALID_MAP; 266 pool->free_map[free_index] = IBM_VETH_INVALID_MAP;
259 pool->dma_addr[index] = dma_addr; 267 pool->dma_addr[index] = dma_addr;
260 pool->skbuff[index] = skb; 268 pool->skbuff[index] = skb;
@@ -267,20 +275,9 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc
267 275
268 lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address, desc.desc); 276 lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address, desc.desc);
269 277
270 if(lpar_rc != H_SUCCESS) { 278 if (lpar_rc != H_SUCCESS)
271 pool->free_map[free_index] = index; 279 goto failure;
272 pool->skbuff[index] = NULL; 280 else {
273 if (pool->consumer_index == 0)
274 pool->consumer_index = pool->size - 1;
275 else
276 pool->consumer_index--;
277 dma_unmap_single(&adapter->vdev->dev,
278 pool->dma_addr[index], pool->buff_size,
279 DMA_FROM_DEVICE);
280 dev_kfree_skb_any(skb);
281 adapter->replenish_add_buff_failure++;
282 break;
283 } else {
284 buffers_added++; 281 buffers_added++;
285 adapter->replenish_add_buff_success++; 282 adapter->replenish_add_buff_success++;
286 } 283 }
@@ -288,6 +285,24 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc
288 285
289 mb(); 286 mb();
290 atomic_add(buffers_added, &(pool->available)); 287 atomic_add(buffers_added, &(pool->available));
288 return;
289
290failure:
291 pool->free_map[free_index] = index;
292 pool->skbuff[index] = NULL;
293 if (pool->consumer_index == 0)
294 pool->consumer_index = pool->size - 1;
295 else
296 pool->consumer_index--;
297 if (!dma_mapping_error(dma_addr))
298 dma_unmap_single(&adapter->vdev->dev,
299 pool->dma_addr[index], pool->buff_size,
300 DMA_FROM_DEVICE);
301 dev_kfree_skb_any(skb);
302 adapter->replenish_add_buff_failure++;
303
304 mb();
305 atomic_add(buffers_added, &(pool->available));
291} 306}
292 307
293/* replenish routine */ 308/* replenish routine */
@@ -297,7 +312,7 @@ static void ibmveth_replenish_task(struct ibmveth_adapter *adapter)
297 312
298 adapter->replenish_task_cycles++; 313 adapter->replenish_task_cycles++;
299 314
300 for(i = 0; i < IbmVethNumBufferPools; i++) 315 for (i = (IbmVethNumBufferPools - 1); i >= 0; i--)
301 if(adapter->rx_buff_pool[i].active) 316 if(adapter->rx_buff_pool[i].active)
302 ibmveth_replenish_buffer_pool(adapter, 317 ibmveth_replenish_buffer_pool(adapter,
303 &adapter->rx_buff_pool[i]); 318 &adapter->rx_buff_pool[i]);
@@ -472,6 +487,18 @@ static void ibmveth_cleanup(struct ibmveth_adapter *adapter)
472 if (adapter->rx_buff_pool[i].active) 487 if (adapter->rx_buff_pool[i].active)
473 ibmveth_free_buffer_pool(adapter, 488 ibmveth_free_buffer_pool(adapter,
474 &adapter->rx_buff_pool[i]); 489 &adapter->rx_buff_pool[i]);
490
491 if (adapter->bounce_buffer != NULL) {
492 if (!dma_mapping_error(adapter->bounce_buffer_dma)) {
493 dma_unmap_single(&adapter->vdev->dev,
494 adapter->bounce_buffer_dma,
495 adapter->netdev->mtu + IBMVETH_BUFF_OH,
496 DMA_BIDIRECTIONAL);
497 adapter->bounce_buffer_dma = DMA_ERROR_CODE;
498 }
499 kfree(adapter->bounce_buffer);
500 adapter->bounce_buffer = NULL;
501 }
475} 502}
476 503
477static int ibmveth_register_logical_lan(struct ibmveth_adapter *adapter, 504static int ibmveth_register_logical_lan(struct ibmveth_adapter *adapter,
@@ -607,6 +634,24 @@ static int ibmveth_open(struct net_device *netdev)
607 return rc; 634 return rc;
608 } 635 }
609 636
637 adapter->bounce_buffer =
638 kmalloc(netdev->mtu + IBMVETH_BUFF_OH, GFP_KERNEL);
639 if (!adapter->bounce_buffer) {
640 ibmveth_error_printk("unable to allocate bounce buffer\n");
641 ibmveth_cleanup(adapter);
642 napi_disable(&adapter->napi);
643 return -ENOMEM;
644 }
645 adapter->bounce_buffer_dma =
646 dma_map_single(&adapter->vdev->dev, adapter->bounce_buffer,
647 netdev->mtu + IBMVETH_BUFF_OH, DMA_BIDIRECTIONAL);
648 if (dma_mapping_error(adapter->bounce_buffer_dma)) {
649 ibmveth_error_printk("unable to map bounce buffer\n");
650 ibmveth_cleanup(adapter);
651 napi_disable(&adapter->napi);
652 return -ENOMEM;
653 }
654
610 ibmveth_debug_printk("initial replenish cycle\n"); 655 ibmveth_debug_printk("initial replenish cycle\n");
611 ibmveth_interrupt(netdev->irq, netdev); 656 ibmveth_interrupt(netdev->irq, netdev);
612 657
@@ -853,10 +898,12 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev)
853 unsigned int tx_packets = 0; 898 unsigned int tx_packets = 0;
854 unsigned int tx_send_failed = 0; 899 unsigned int tx_send_failed = 0;
855 unsigned int tx_map_failed = 0; 900 unsigned int tx_map_failed = 0;
901 int used_bounce = 0;
902 unsigned long data_dma_addr;
856 903
857 desc.fields.flags_len = IBMVETH_BUF_VALID | skb->len; 904 desc.fields.flags_len = IBMVETH_BUF_VALID | skb->len;
858 desc.fields.address = dma_map_single(&adapter->vdev->dev, skb->data, 905 data_dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
859 skb->len, DMA_TO_DEVICE); 906 skb->len, DMA_TO_DEVICE);
860 907
861 if (skb->ip_summed == CHECKSUM_PARTIAL && 908 if (skb->ip_summed == CHECKSUM_PARTIAL &&
862 ip_hdr(skb)->protocol != IPPROTO_TCP && skb_checksum_help(skb)) { 909 ip_hdr(skb)->protocol != IPPROTO_TCP && skb_checksum_help(skb)) {
@@ -875,12 +922,16 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev)
875 buf[1] = 0; 922 buf[1] = 0;
876 } 923 }
877 924
878 if (dma_mapping_error(desc.fields.address)) { 925 if (dma_mapping_error(data_dma_addr)) {
879 ibmveth_error_printk("tx: unable to map xmit buffer\n"); 926 if (!firmware_has_feature(FW_FEATURE_CMO))
927 ibmveth_error_printk("tx: unable to map xmit buffer\n");
928 skb_copy_from_linear_data(skb, adapter->bounce_buffer,
929 skb->len);
930 desc.fields.address = adapter->bounce_buffer_dma;
880 tx_map_failed++; 931 tx_map_failed++;
881 tx_dropped++; 932 used_bounce = 1;
882 goto out; 933 } else
883 } 934 desc.fields.address = data_dma_addr;
884 935
885 /* send the frame. Arbitrarily set retrycount to 1024 */ 936 /* send the frame. Arbitrarily set retrycount to 1024 */
886 correlator = 0; 937 correlator = 0;
@@ -904,8 +955,9 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev)
904 netdev->trans_start = jiffies; 955 netdev->trans_start = jiffies;
905 } 956 }
906 957
907 dma_unmap_single(&adapter->vdev->dev, desc.fields.address, 958 if (!used_bounce)
908 skb->len, DMA_TO_DEVICE); 959 dma_unmap_single(&adapter->vdev->dev, data_dma_addr,
960 skb->len, DMA_TO_DEVICE);
909 961
910out: spin_lock_irqsave(&adapter->stats_lock, flags); 962out: spin_lock_irqsave(&adapter->stats_lock, flags);
911 netdev->stats.tx_dropped += tx_dropped; 963 netdev->stats.tx_dropped += tx_dropped;
@@ -1053,9 +1105,9 @@ static void ibmveth_set_multicast_list(struct net_device *netdev)
1053static int ibmveth_change_mtu(struct net_device *dev, int new_mtu) 1105static int ibmveth_change_mtu(struct net_device *dev, int new_mtu)
1054{ 1106{
1055 struct ibmveth_adapter *adapter = dev->priv; 1107 struct ibmveth_adapter *adapter = dev->priv;
1108 struct vio_dev *viodev = adapter->vdev;
1056 int new_mtu_oh = new_mtu + IBMVETH_BUFF_OH; 1109 int new_mtu_oh = new_mtu + IBMVETH_BUFF_OH;
1057 int reinit = 0; 1110 int i;
1058 int i, rc;
1059 1111
1060 if (new_mtu < IBMVETH_MAX_MTU) 1112 if (new_mtu < IBMVETH_MAX_MTU)
1061 return -EINVAL; 1113 return -EINVAL;
@@ -1067,23 +1119,34 @@ static int ibmveth_change_mtu(struct net_device *dev, int new_mtu)
1067 if (i == IbmVethNumBufferPools) 1119 if (i == IbmVethNumBufferPools)
1068 return -EINVAL; 1120 return -EINVAL;
1069 1121
1122 /* Deactivate all the buffer pools so that the next loop can activate
1123 only the buffer pools necessary to hold the new MTU */
1124 for (i = 0; i < IbmVethNumBufferPools; i++)
1125 if (adapter->rx_buff_pool[i].active) {
1126 ibmveth_free_buffer_pool(adapter,
1127 &adapter->rx_buff_pool[i]);
1128 adapter->rx_buff_pool[i].active = 0;
1129 }
1130
1070 /* Look for an active buffer pool that can hold the new MTU */ 1131 /* Look for an active buffer pool that can hold the new MTU */
1071 for(i = 0; i<IbmVethNumBufferPools; i++) { 1132 for(i = 0; i<IbmVethNumBufferPools; i++) {
1072 if (!adapter->rx_buff_pool[i].active) { 1133 adapter->rx_buff_pool[i].active = 1;
1073 adapter->rx_buff_pool[i].active = 1;
1074 reinit = 1;
1075 }
1076 1134
1077 if (new_mtu_oh < adapter->rx_buff_pool[i].buff_size) { 1135 if (new_mtu_oh < adapter->rx_buff_pool[i].buff_size) {
1078 if (reinit && netif_running(adapter->netdev)) { 1136 if (netif_running(adapter->netdev)) {
1079 adapter->pool_config = 1; 1137 adapter->pool_config = 1;
1080 ibmveth_close(adapter->netdev); 1138 ibmveth_close(adapter->netdev);
1081 adapter->pool_config = 0; 1139 adapter->pool_config = 0;
1082 dev->mtu = new_mtu; 1140 dev->mtu = new_mtu;
1083 if ((rc = ibmveth_open(adapter->netdev))) 1141 vio_cmo_set_dev_desired(viodev,
1084 return rc; 1142 ibmveth_get_desired_dma
1085 } else 1143 (viodev));
1086 dev->mtu = new_mtu; 1144 return ibmveth_open(adapter->netdev);
1145 }
1146 dev->mtu = new_mtu;
1147 vio_cmo_set_dev_desired(viodev,
1148 ibmveth_get_desired_dma
1149 (viodev));
1087 return 0; 1150 return 0;
1088 } 1151 }
1089 } 1152 }
@@ -1098,6 +1161,46 @@ static void ibmveth_poll_controller(struct net_device *dev)
1098} 1161}
1099#endif 1162#endif
1100 1163
1164/**
1165 * ibmveth_get_desired_dma - Calculate IO memory desired by the driver
1166 *
1167 * @vdev: struct vio_dev for the device whose desired IO mem is to be returned
1168 *
1169 * Return value:
1170 * Number of bytes of IO data the driver will need to perform well.
1171 */
1172static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev)
1173{
1174 struct net_device *netdev = dev_get_drvdata(&vdev->dev);
1175 struct ibmveth_adapter *adapter;
1176 unsigned long ret;
1177 int i;
1178 int rxqentries = 1;
1179
1180 /* netdev inits at probe time along with the structures we need below*/
1181 if (netdev == NULL)
1182 return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT);
1183
1184 adapter = netdev_priv(netdev);
1185
1186 ret = IBMVETH_BUFF_LIST_SIZE + IBMVETH_FILT_LIST_SIZE;
1187 ret += IOMMU_PAGE_ALIGN(netdev->mtu);
1188
1189 for (i = 0; i < IbmVethNumBufferPools; i++) {
1190 /* add the size of the active receive buffers */
1191 if (adapter->rx_buff_pool[i].active)
1192 ret +=
1193 adapter->rx_buff_pool[i].size *
1194 IOMMU_PAGE_ALIGN(adapter->rx_buff_pool[i].
1195 buff_size);
1196 rxqentries += adapter->rx_buff_pool[i].size;
1197 }
1198 /* add the size of the receive queue entries */
1199 ret += IOMMU_PAGE_ALIGN(rxqentries * sizeof(struct ibmveth_rx_q_entry));
1200
1201 return ret;
1202}
1203
1101static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) 1204static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
1102{ 1205{
1103 int rc, i; 1206 int rc, i;
@@ -1242,6 +1345,8 @@ static int __devexit ibmveth_remove(struct vio_dev *dev)
1242 ibmveth_proc_unregister_adapter(adapter); 1345 ibmveth_proc_unregister_adapter(adapter);
1243 1346
1244 free_netdev(netdev); 1347 free_netdev(netdev);
1348 dev_set_drvdata(&dev->dev, NULL);
1349
1245 return 0; 1350 return 0;
1246} 1351}
1247 1352
@@ -1402,14 +1507,15 @@ const char * buf, size_t count)
1402 return -EPERM; 1507 return -EPERM;
1403 } 1508 }
1404 1509
1405 pool->active = 0;
1406 if (netif_running(netdev)) { 1510 if (netif_running(netdev)) {
1407 adapter->pool_config = 1; 1511 adapter->pool_config = 1;
1408 ibmveth_close(netdev); 1512 ibmveth_close(netdev);
1513 pool->active = 0;
1409 adapter->pool_config = 0; 1514 adapter->pool_config = 0;
1410 if ((rc = ibmveth_open(netdev))) 1515 if ((rc = ibmveth_open(netdev)))
1411 return rc; 1516 return rc;
1412 } 1517 }
1518 pool->active = 0;
1413 } 1519 }
1414 } else if (attr == &veth_num_attr) { 1520 } else if (attr == &veth_num_attr) {
1415 if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT) 1521 if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT)
@@ -1485,6 +1591,7 @@ static struct vio_driver ibmveth_driver = {
1485 .id_table = ibmveth_device_table, 1591 .id_table = ibmveth_device_table,
1486 .probe = ibmveth_probe, 1592 .probe = ibmveth_probe,
1487 .remove = ibmveth_remove, 1593 .remove = ibmveth_remove,
1594 .get_desired_dma = ibmveth_get_desired_dma,
1488 .driver = { 1595 .driver = {
1489 .name = ibmveth_driver_name, 1596 .name = ibmveth_driver_name,
1490 .owner = THIS_MODULE, 1597 .owner = THIS_MODULE,
diff --git a/drivers/net/ibmveth.h b/drivers/net/ibmveth.h
index 41f61cd18852..d28186948752 100644
--- a/drivers/net/ibmveth.h
+++ b/drivers/net/ibmveth.h
@@ -93,9 +93,12 @@ static inline long h_illan_attributes(unsigned long unit_address,
93 plpar_hcall_norets(H_CHANGE_LOGICAL_LAN_MAC, ua, mac) 93 plpar_hcall_norets(H_CHANGE_LOGICAL_LAN_MAC, ua, mac)
94 94
95#define IbmVethNumBufferPools 5 95#define IbmVethNumBufferPools 5
96#define IBMVETH_IO_ENTITLEMENT_DEFAULT 4243456 /* MTU of 1500 needs 4.2Mb */
96#define IBMVETH_BUFF_OH 22 /* Overhead: 14 ethernet header + 8 opaque handle */ 97#define IBMVETH_BUFF_OH 22 /* Overhead: 14 ethernet header + 8 opaque handle */
97#define IBMVETH_MAX_MTU 68 98#define IBMVETH_MAX_MTU 68
98#define IBMVETH_MAX_POOL_COUNT 4096 99#define IBMVETH_MAX_POOL_COUNT 4096
100#define IBMVETH_BUFF_LIST_SIZE 4096
101#define IBMVETH_FILT_LIST_SIZE 4096
99#define IBMVETH_MAX_BUF_SIZE (1024 * 128) 102#define IBMVETH_MAX_BUF_SIZE (1024 * 128)
100 103
101static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 }; 104static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 };
@@ -143,6 +146,8 @@ struct ibmveth_adapter {
143 struct ibmveth_rx_q rx_queue; 146 struct ibmveth_rx_q rx_queue;
144 int pool_config; 147 int pool_config;
145 int rx_csum; 148 int rx_csum;
149 void *bounce_buffer;
150 dma_addr_t bounce_buffer_dma;
146 151
147 /* adapter specific stats */ 152 /* adapter specific stats */
148 u64 replenish_task_cycles; 153 u64 replenish_task_cycles;
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index c28d7cb2035b..0196a0df9021 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -19,6 +19,7 @@
19//#define DEBUG 19//#define DEBUG
20#include <linux/netdevice.h> 20#include <linux/netdevice.h>
21#include <linux/etherdevice.h> 21#include <linux/etherdevice.h>
22#include <linux/ethtool.h>
22#include <linux/module.h> 23#include <linux/module.h>
23#include <linux/virtio.h> 24#include <linux/virtio.h>
24#include <linux/virtio_net.h> 25#include <linux/virtio_net.h>
@@ -54,9 +55,15 @@ struct virtnet_info
54 struct tasklet_struct tasklet; 55 struct tasklet_struct tasklet;
55 bool free_in_tasklet; 56 bool free_in_tasklet;
56 57
58 /* I like... big packets and I cannot lie! */
59 bool big_packets;
60
57 /* Receive & send queues. */ 61 /* Receive & send queues. */
58 struct sk_buff_head recv; 62 struct sk_buff_head recv;
59 struct sk_buff_head send; 63 struct sk_buff_head send;
64
65 /* Chain pages by the private ptr. */
66 struct page *pages;
60}; 67};
61 68
62static inline struct virtio_net_hdr *skb_vnet_hdr(struct sk_buff *skb) 69static inline struct virtio_net_hdr *skb_vnet_hdr(struct sk_buff *skb)
@@ -69,6 +76,23 @@ static inline void vnet_hdr_to_sg(struct scatterlist *sg, struct sk_buff *skb)
69 sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr)); 76 sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr));
70} 77}
71 78
79static void give_a_page(struct virtnet_info *vi, struct page *page)
80{
81 page->private = (unsigned long)vi->pages;
82 vi->pages = page;
83}
84
85static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask)
86{
87 struct page *p = vi->pages;
88
89 if (p)
90 vi->pages = (struct page *)p->private;
91 else
92 p = alloc_page(gfp_mask);
93 return p;
94}
95
72static void skb_xmit_done(struct virtqueue *svq) 96static void skb_xmit_done(struct virtqueue *svq)
73{ 97{
74 struct virtnet_info *vi = svq->vdev->priv; 98 struct virtnet_info *vi = svq->vdev->priv;
@@ -88,6 +112,7 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb,
88 unsigned len) 112 unsigned len)
89{ 113{
90 struct virtio_net_hdr *hdr = skb_vnet_hdr(skb); 114 struct virtio_net_hdr *hdr = skb_vnet_hdr(skb);
115 int err;
91 116
92 if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { 117 if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
93 pr_debug("%s: short packet %i\n", dev->name, len); 118 pr_debug("%s: short packet %i\n", dev->name, len);
@@ -95,10 +120,23 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb,
95 goto drop; 120 goto drop;
96 } 121 }
97 len -= sizeof(struct virtio_net_hdr); 122 len -= sizeof(struct virtio_net_hdr);
98 BUG_ON(len > MAX_PACKET_LEN);
99 123
100 skb_trim(skb, len); 124 if (len <= MAX_PACKET_LEN) {
125 unsigned int i;
101 126
127 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
128 give_a_page(dev->priv, skb_shinfo(skb)->frags[i].page);
129 skb->data_len = 0;
130 skb_shinfo(skb)->nr_frags = 0;
131 }
132
133 err = pskb_trim(skb, len);
134 if (err) {
135 pr_debug("%s: pskb_trim failed %i %d\n", dev->name, len, err);
136 dev->stats.rx_dropped++;
137 goto drop;
138 }
139 skb->truesize += skb->data_len;
102 dev->stats.rx_bytes += skb->len; 140 dev->stats.rx_bytes += skb->len;
103 dev->stats.rx_packets++; 141 dev->stats.rx_packets++;
104 142
@@ -160,7 +198,7 @@ static void try_fill_recv(struct virtnet_info *vi)
160{ 198{
161 struct sk_buff *skb; 199 struct sk_buff *skb;
162 struct scatterlist sg[2+MAX_SKB_FRAGS]; 200 struct scatterlist sg[2+MAX_SKB_FRAGS];
163 int num, err; 201 int num, err, i;
164 202
165 sg_init_table(sg, 2+MAX_SKB_FRAGS); 203 sg_init_table(sg, 2+MAX_SKB_FRAGS);
166 for (;;) { 204 for (;;) {
@@ -170,6 +208,24 @@ static void try_fill_recv(struct virtnet_info *vi)
170 208
171 skb_put(skb, MAX_PACKET_LEN); 209 skb_put(skb, MAX_PACKET_LEN);
172 vnet_hdr_to_sg(sg, skb); 210 vnet_hdr_to_sg(sg, skb);
211
212 if (vi->big_packets) {
213 for (i = 0; i < MAX_SKB_FRAGS; i++) {
214 skb_frag_t *f = &skb_shinfo(skb)->frags[i];
215 f->page = get_a_page(vi, GFP_ATOMIC);
216 if (!f->page)
217 break;
218
219 f->page_offset = 0;
220 f->size = PAGE_SIZE;
221
222 skb->data_len += PAGE_SIZE;
223 skb->len += PAGE_SIZE;
224
225 skb_shinfo(skb)->nr_frags++;
226 }
227 }
228
173 num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; 229 num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
174 skb_queue_head(&vi->recv, skb); 230 skb_queue_head(&vi->recv, skb);
175 231
@@ -335,16 +391,11 @@ again:
335 free_old_xmit_skbs(vi); 391 free_old_xmit_skbs(vi);
336 392
337 /* If we has a buffer left over from last time, send it now. */ 393 /* If we has a buffer left over from last time, send it now. */
338 if (unlikely(vi->last_xmit_skb)) { 394 if (unlikely(vi->last_xmit_skb) &&
339 if (xmit_skb(vi, vi->last_xmit_skb) != 0) { 395 xmit_skb(vi, vi->last_xmit_skb) != 0)
340 /* Drop this skb: we only queue one. */ 396 goto stop_queue;
341 vi->dev->stats.tx_dropped++; 397
342 kfree_skb(skb); 398 vi->last_xmit_skb = NULL;
343 skb = NULL;
344 goto stop_queue;
345 }
346 vi->last_xmit_skb = NULL;
347 }
348 399
349 /* Put new one in send queue and do transmit */ 400 /* Put new one in send queue and do transmit */
350 if (likely(skb)) { 401 if (likely(skb)) {
@@ -370,6 +421,11 @@ stop_queue:
370 netif_start_queue(dev); 421 netif_start_queue(dev);
371 goto again; 422 goto again;
372 } 423 }
424 if (skb) {
425 /* Drop this skb: we only queue one. */
426 vi->dev->stats.tx_dropped++;
427 kfree_skb(skb);
428 }
373 goto done; 429 goto done;
374} 430}
375 431
@@ -408,6 +464,22 @@ static int virtnet_close(struct net_device *dev)
408 return 0; 464 return 0;
409} 465}
410 466
467static int virtnet_set_tx_csum(struct net_device *dev, u32 data)
468{
469 struct virtnet_info *vi = netdev_priv(dev);
470 struct virtio_device *vdev = vi->vdev;
471
472 if (data && !virtio_has_feature(vdev, VIRTIO_NET_F_CSUM))
473 return -ENOSYS;
474
475 return ethtool_op_set_tx_hw_csum(dev, data);
476}
477
478static struct ethtool_ops virtnet_ethtool_ops = {
479 .set_tx_csum = virtnet_set_tx_csum,
480 .set_sg = ethtool_op_set_sg,
481};
482
411static int virtnet_probe(struct virtio_device *vdev) 483static int virtnet_probe(struct virtio_device *vdev)
412{ 484{
413 int err; 485 int err;
@@ -427,6 +499,7 @@ static int virtnet_probe(struct virtio_device *vdev)
427#ifdef CONFIG_NET_POLL_CONTROLLER 499#ifdef CONFIG_NET_POLL_CONTROLLER
428 dev->poll_controller = virtnet_netpoll; 500 dev->poll_controller = virtnet_netpoll;
429#endif 501#endif
502 SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops);
430 SET_NETDEV_DEV(dev, &vdev->dev); 503 SET_NETDEV_DEV(dev, &vdev->dev);
431 504
432 /* Do we support "hardware" checksums? */ 505 /* Do we support "hardware" checksums? */
@@ -462,11 +535,18 @@ static int virtnet_probe(struct virtio_device *vdev)
462 vi->dev = dev; 535 vi->dev = dev;
463 vi->vdev = vdev; 536 vi->vdev = vdev;
464 vdev->priv = vi; 537 vdev->priv = vi;
538 vi->pages = NULL;
465 539
466 /* If they give us a callback when all buffers are done, we don't need 540 /* If they give us a callback when all buffers are done, we don't need
467 * the timer. */ 541 * the timer. */
468 vi->free_in_tasklet = virtio_has_feature(vdev,VIRTIO_F_NOTIFY_ON_EMPTY); 542 vi->free_in_tasklet = virtio_has_feature(vdev,VIRTIO_F_NOTIFY_ON_EMPTY);
469 543
544 /* If we can receive ANY GSO packets, we must allocate large ones. */
545 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4)
546 || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)
547 || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN))
548 vi->big_packets = true;
549
470 /* We expect two virtqueues, receive then send. */ 550 /* We expect two virtqueues, receive then send. */
471 vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done); 551 vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done);
472 if (IS_ERR(vi->rvq)) { 552 if (IS_ERR(vi->rvq)) {
@@ -541,6 +621,10 @@ static void virtnet_remove(struct virtio_device *vdev)
541 vdev->config->del_vq(vi->svq); 621 vdev->config->del_vq(vi->svq);
542 vdev->config->del_vq(vi->rvq); 622 vdev->config->del_vq(vi->rvq);
543 unregister_netdev(vi->dev); 623 unregister_netdev(vi->dev);
624
625 while (vi->pages)
626 __free_pages(get_a_page(vi, GFP_KERNEL), 0);
627
544 free_netdev(vi->dev); 628 free_netdev(vi->dev);
545} 629}
546 630
@@ -553,7 +637,9 @@ static unsigned int features[] = {
553 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, 637 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM,
554 VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC, 638 VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC,
555 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, 639 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6,
556 VIRTIO_NET_F_HOST_ECN, VIRTIO_F_NOTIFY_ON_EMPTY, 640 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
641 VIRTIO_NET_F_GUEST_ECN, /* We don't yet handle UFO input. */
642 VIRTIO_F_NOTIFY_ON_EMPTY,
557}; 643};
558 644
559static struct virtio_driver virtio_net = { 645static struct virtio_driver virtio_net = {
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index 3a7a11a75fb4..1d7ec3129349 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -4,7 +4,7 @@ config OF_DEVICE
4 4
5config OF_GPIO 5config OF_GPIO
6 def_bool y 6 def_bool y
7 depends on OF && PPC_OF && HAVE_GPIO_LIB 7 depends on OF && PPC_OF && GPIOLIB
8 help 8 help
9 OpenFirmware GPIO accessors 9 OpenFirmware GPIO accessors
10 10
diff --git a/drivers/of/of_i2c.c b/drivers/of/of_i2c.c
index 5c015d310d4a..344e1b03dd8b 100644
--- a/drivers/of/of_i2c.c
+++ b/drivers/of/of_i2c.c
@@ -91,8 +91,6 @@ void of_register_i2c_devices(struct i2c_adapter *adap,
91 } 91 }
92 92
93 info.irq = irq_of_parse_and_map(node, 0); 93 info.irq = irq_of_parse_and_map(node, 0);
94 if (info.irq == NO_IRQ)
95 info.irq = -1;
96 94
97 if (of_find_i2c_driver(node, &info) < 0) { 95 if (of_find_i2c_driver(node, &info) < 0) {
98 irq_dispose_mapping(info.irq); 96 irq_dispose_mapping(info.irq);
diff --git a/drivers/parport/parport_ax88796.c b/drivers/parport/parport_ax88796.c
index 4ec220b2eae7..6938d2e9f18f 100644
--- a/drivers/parport/parport_ax88796.c
+++ b/drivers/parport/parport_ax88796.c
@@ -406,6 +406,8 @@ static int parport_ax88796_resume(struct platform_device *dev)
406#define parport_ax88796_resume NULL 406#define parport_ax88796_resume NULL
407#endif 407#endif
408 408
409MODULE_ALIAS("platform:ax88796-pp");
410
409static struct platform_driver axdrv = { 411static struct platform_driver axdrv = {
410 .driver = { 412 .driver = {
411 .name = "ax88796-pp", 413 .name = "ax88796-pp",
diff --git a/drivers/power/ds2760_battery.c b/drivers/power/ds2760_battery.c
index 71be36f18709..308ddb201b66 100644
--- a/drivers/power/ds2760_battery.c
+++ b/drivers/power/ds2760_battery.c
@@ -433,6 +433,8 @@ static int ds2760_battery_resume(struct platform_device *pdev)
433 433
434#endif /* CONFIG_PM */ 434#endif /* CONFIG_PM */
435 435
436MODULE_ALIAS("platform:ds2760-battery");
437
436static struct platform_driver ds2760_battery_driver = { 438static struct platform_driver ds2760_battery_driver = {
437 .driver = { 439 .driver = {
438 .name = "ds2760-battery", 440 .name = "ds2760-battery",
diff --git a/drivers/power/pda_power.c b/drivers/power/pda_power.c
index 82810b7bff9c..0471ec743ab9 100644
--- a/drivers/power/pda_power.c
+++ b/drivers/power/pda_power.c
@@ -362,6 +362,8 @@ static int pda_power_resume(struct platform_device *pdev)
362#define pda_power_resume NULL 362#define pda_power_resume NULL
363#endif /* CONFIG_PM */ 363#endif /* CONFIG_PM */
364 364
365MODULE_ALIAS("platform:pda-power");
366
365static struct platform_driver pda_power_pdrv = { 367static struct platform_driver pda_power_pdrv = {
366 .driver = { 368 .driver = {
367 .name = "pda-power", 369 .name = "pda-power",
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index 5ab34340919b..79954bd6bfa5 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -15,6 +15,7 @@
15#include <linux/err.h> 15#include <linux/err.h>
16#include <linux/virtio.h> 16#include <linux/virtio.h>
17#include <linux/virtio_config.h> 17#include <linux/virtio_config.h>
18#include <linux/virtio_console.h>
18#include <linux/interrupt.h> 19#include <linux/interrupt.h>
19#include <linux/virtio_ring.h> 20#include <linux/virtio_ring.h>
20#include <linux/pfn.h> 21#include <linux/pfn.h>
@@ -87,16 +88,20 @@ static u32 kvm_get_features(struct virtio_device *vdev)
87 return features; 88 return features;
88} 89}
89 90
90static void kvm_set_features(struct virtio_device *vdev, u32 features) 91static void kvm_finalize_features(struct virtio_device *vdev)
91{ 92{
92 unsigned int i; 93 unsigned int i, bits;
93 struct kvm_device_desc *desc = to_kvmdev(vdev)->desc; 94 struct kvm_device_desc *desc = to_kvmdev(vdev)->desc;
94 /* Second half of bitmap is features we accept. */ 95 /* Second half of bitmap is features we accept. */
95 u8 *out_features = kvm_vq_features(desc) + desc->feature_len; 96 u8 *out_features = kvm_vq_features(desc) + desc->feature_len;
96 97
98 /* Give virtio_ring a chance to accept features. */
99 vring_transport_features(vdev);
100
97 memset(out_features, 0, desc->feature_len); 101 memset(out_features, 0, desc->feature_len);
98 for (i = 0; i < min(desc->feature_len * 8, 32); i++) { 102 bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
99 if (features & (1 << i)) 103 for (i = 0; i < bits; i++) {
104 if (test_bit(i, vdev->features))
100 out_features[i / 8] |= (1 << (i % 8)); 105 out_features[i / 8] |= (1 << (i % 8));
101 } 106 }
102} 107}
@@ -222,7 +227,7 @@ static void kvm_del_vq(struct virtqueue *vq)
222 */ 227 */
223static struct virtio_config_ops kvm_vq_configspace_ops = { 228static struct virtio_config_ops kvm_vq_configspace_ops = {
224 .get_features = kvm_get_features, 229 .get_features = kvm_get_features,
225 .set_features = kvm_set_features, 230 .finalize_features = kvm_finalize_features,
226 .get = kvm_get, 231 .get = kvm_get,
227 .set = kvm_set, 232 .set = kvm_set,
228 .get_status = kvm_get_status, 233 .get_status = kvm_get_status,
@@ -333,6 +338,25 @@ static int __init kvm_devices_init(void)
333 return 0; 338 return 0;
334} 339}
335 340
341/* code for early console output with virtio_console */
342static __init int early_put_chars(u32 vtermno, const char *buf, int count)
343{
344 char scratch[17];
345 unsigned int len = count;
346
347 if (len > sizeof(scratch) - 1)
348 len = sizeof(scratch) - 1;
349 scratch[len] = '\0';
350 memcpy(scratch, buf, len);
351 kvm_hypercall1(KVM_S390_VIRTIO_NOTIFY, __pa(scratch));
352 return len;
353}
354
355void s390_virtio_console_init(void)
356{
357 virtio_cons_early_init(early_put_chars);
358}
359
336/* 360/*
337 * We do this after core stuff, but before the drivers. 361 * We do this after core stuff, but before the drivers.
338 */ 362 */
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index eb702b96d57c..c4a7c06793c5 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -3819,6 +3819,20 @@ static int ibmvfc_remove(struct vio_dev *vdev)
3819 return 0; 3819 return 0;
3820} 3820}
3821 3821
3822/**
3823 * ibmvfc_get_desired_dma - Calculate DMA resources needed by the driver
3824 * @vdev: vio device struct
3825 *
3826 * Return value:
3827 * Number of bytes the driver will need to DMA map at the same time in
3828 * order to perform well.
3829 */
3830static unsigned long ibmvfc_get_desired_dma(struct vio_dev *vdev)
3831{
3832 unsigned long pool_dma = max_requests * sizeof(union ibmvfc_iu);
3833 return pool_dma + ((512 * 1024) * driver_template.cmd_per_lun);
3834}
3835
3822static struct vio_device_id ibmvfc_device_table[] __devinitdata = { 3836static struct vio_device_id ibmvfc_device_table[] __devinitdata = {
3823 {"fcp", "IBM,vfc-client"}, 3837 {"fcp", "IBM,vfc-client"},
3824 { "", "" } 3838 { "", "" }
@@ -3829,6 +3843,7 @@ static struct vio_driver ibmvfc_driver = {
3829 .id_table = ibmvfc_device_table, 3843 .id_table = ibmvfc_device_table,
3830 .probe = ibmvfc_probe, 3844 .probe = ibmvfc_probe,
3831 .remove = ibmvfc_remove, 3845 .remove = ibmvfc_remove,
3846 .get_desired_dma = ibmvfc_get_desired_dma,
3832 .driver = { 3847 .driver = {
3833 .name = IBMVFC_NAME, 3848 .name = IBMVFC_NAME,
3834 .owner = THIS_MODULE, 3849 .owner = THIS_MODULE,
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index 5d23368a1bce..20000ec79b04 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -72,6 +72,7 @@
72#include <linux/delay.h> 72#include <linux/delay.h>
73#include <asm/firmware.h> 73#include <asm/firmware.h>
74#include <asm/vio.h> 74#include <asm/vio.h>
75#include <asm/firmware.h>
75#include <scsi/scsi.h> 76#include <scsi/scsi.h>
76#include <scsi/scsi_cmnd.h> 77#include <scsi/scsi_cmnd.h>
77#include <scsi/scsi_host.h> 78#include <scsi/scsi_host.h>
@@ -426,8 +427,10 @@ static int map_sg_data(struct scsi_cmnd *cmd,
426 SG_ALL * sizeof(struct srp_direct_buf), 427 SG_ALL * sizeof(struct srp_direct_buf),
427 &evt_struct->ext_list_token, 0); 428 &evt_struct->ext_list_token, 0);
428 if (!evt_struct->ext_list) { 429 if (!evt_struct->ext_list) {
429 sdev_printk(KERN_ERR, cmd->device, 430 if (!firmware_has_feature(FW_FEATURE_CMO))
430 "Can't allocate memory for indirect table\n"); 431 sdev_printk(KERN_ERR, cmd->device,
432 "Can't allocate memory "
433 "for indirect table\n");
431 return 0; 434 return 0;
432 } 435 }
433 } 436 }
@@ -743,7 +746,9 @@ static int ibmvscsi_queuecommand(struct scsi_cmnd *cmnd,
743 srp_cmd->lun = ((u64) lun) << 48; 746 srp_cmd->lun = ((u64) lun) << 48;
744 747
745 if (!map_data_for_srp_cmd(cmnd, evt_struct, srp_cmd, hostdata->dev)) { 748 if (!map_data_for_srp_cmd(cmnd, evt_struct, srp_cmd, hostdata->dev)) {
746 sdev_printk(KERN_ERR, cmnd->device, "couldn't convert cmd to srp_cmd\n"); 749 if (!firmware_has_feature(FW_FEATURE_CMO))
750 sdev_printk(KERN_ERR, cmnd->device,
751 "couldn't convert cmd to srp_cmd\n");
747 free_event_struct(&hostdata->pool, evt_struct); 752 free_event_struct(&hostdata->pool, evt_struct);
748 return SCSI_MLQUEUE_HOST_BUSY; 753 return SCSI_MLQUEUE_HOST_BUSY;
749 } 754 }
@@ -855,7 +860,10 @@ static void send_mad_adapter_info(struct ibmvscsi_host_data *hostdata)
855 DMA_BIDIRECTIONAL); 860 DMA_BIDIRECTIONAL);
856 861
857 if (dma_mapping_error(req->buffer)) { 862 if (dma_mapping_error(req->buffer)) {
858 dev_err(hostdata->dev, "Unable to map request_buffer for adapter_info!\n"); 863 if (!firmware_has_feature(FW_FEATURE_CMO))
864 dev_err(hostdata->dev,
865 "Unable to map request_buffer for "
866 "adapter_info!\n");
859 free_event_struct(&hostdata->pool, evt_struct); 867 free_event_struct(&hostdata->pool, evt_struct);
860 return; 868 return;
861 } 869 }
@@ -1400,7 +1408,9 @@ static int ibmvscsi_do_host_config(struct ibmvscsi_host_data *hostdata,
1400 DMA_BIDIRECTIONAL); 1408 DMA_BIDIRECTIONAL);
1401 1409
1402 if (dma_mapping_error(host_config->buffer)) { 1410 if (dma_mapping_error(host_config->buffer)) {
1403 dev_err(hostdata->dev, "dma_mapping error getting host config\n"); 1411 if (!firmware_has_feature(FW_FEATURE_CMO))
1412 dev_err(hostdata->dev,
1413 "dma_mapping error getting host config\n");
1404 free_event_struct(&hostdata->pool, evt_struct); 1414 free_event_struct(&hostdata->pool, evt_struct);
1405 return -1; 1415 return -1;
1406 } 1416 }
@@ -1604,7 +1614,7 @@ static struct scsi_host_template driver_template = {
1604 .eh_host_reset_handler = ibmvscsi_eh_host_reset_handler, 1614 .eh_host_reset_handler = ibmvscsi_eh_host_reset_handler,
1605 .slave_configure = ibmvscsi_slave_configure, 1615 .slave_configure = ibmvscsi_slave_configure,
1606 .change_queue_depth = ibmvscsi_change_queue_depth, 1616 .change_queue_depth = ibmvscsi_change_queue_depth,
1607 .cmd_per_lun = 16, 1617 .cmd_per_lun = IBMVSCSI_CMDS_PER_LUN_DEFAULT,
1608 .can_queue = IBMVSCSI_MAX_REQUESTS_DEFAULT, 1618 .can_queue = IBMVSCSI_MAX_REQUESTS_DEFAULT,
1609 .this_id = -1, 1619 .this_id = -1,
1610 .sg_tablesize = SG_ALL, 1620 .sg_tablesize = SG_ALL,
@@ -1613,6 +1623,26 @@ static struct scsi_host_template driver_template = {
1613}; 1623};
1614 1624
1615/** 1625/**
1626 * ibmvscsi_get_desired_dma - Calculate IO memory desired by the driver
1627 *
1628 * @vdev: struct vio_dev for the device whose desired IO mem is to be returned
1629 *
1630 * Return value:
1631 * Number of bytes of IO data the driver will need to perform well.
1632 */
1633static unsigned long ibmvscsi_get_desired_dma(struct vio_dev *vdev)
1634{
1635 /* iu_storage data allocated in initialize_event_pool */
1636 unsigned long desired_io = max_requests * sizeof(union viosrp_iu);
1637
1638 /* add io space for sg data */
1639 desired_io += (IBMVSCSI_MAX_SECTORS_DEFAULT *
1640 IBMVSCSI_CMDS_PER_LUN_DEFAULT);
1641
1642 return desired_io;
1643}
1644
1645/**
1616 * Called by bus code for each adapter 1646 * Called by bus code for each adapter
1617 */ 1647 */
1618static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id) 1648static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
@@ -1641,7 +1671,7 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
1641 hostdata->host = host; 1671 hostdata->host = host;
1642 hostdata->dev = dev; 1672 hostdata->dev = dev;
1643 atomic_set(&hostdata->request_limit, -1); 1673 atomic_set(&hostdata->request_limit, -1);
1644 hostdata->host->max_sectors = 32 * 8; /* default max I/O 32 pages */ 1674 hostdata->host->max_sectors = IBMVSCSI_MAX_SECTORS_DEFAULT;
1645 1675
1646 rc = ibmvscsi_ops->init_crq_queue(&hostdata->queue, hostdata, max_requests); 1676 rc = ibmvscsi_ops->init_crq_queue(&hostdata->queue, hostdata, max_requests);
1647 if (rc != 0 && rc != H_RESOURCE) { 1677 if (rc != 0 && rc != H_RESOURCE) {
@@ -1735,6 +1765,7 @@ static struct vio_driver ibmvscsi_driver = {
1735 .id_table = ibmvscsi_device_table, 1765 .id_table = ibmvscsi_device_table,
1736 .probe = ibmvscsi_probe, 1766 .probe = ibmvscsi_probe,
1737 .remove = ibmvscsi_remove, 1767 .remove = ibmvscsi_remove,
1768 .get_desired_dma = ibmvscsi_get_desired_dma,
1738 .driver = { 1769 .driver = {
1739 .name = "ibmvscsi", 1770 .name = "ibmvscsi",
1740 .owner = THIS_MODULE, 1771 .owner = THIS_MODULE,
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.h b/drivers/scsi/ibmvscsi/ibmvscsi.h
index 46e850e302c7..2d4339d5e16e 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.h
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.h
@@ -45,6 +45,8 @@ struct Scsi_Host;
45#define MAX_INDIRECT_BUFS 10 45#define MAX_INDIRECT_BUFS 10
46 46
47#define IBMVSCSI_MAX_REQUESTS_DEFAULT 100 47#define IBMVSCSI_MAX_REQUESTS_DEFAULT 100
48#define IBMVSCSI_CMDS_PER_LUN_DEFAULT 16
49#define IBMVSCSI_MAX_SECTORS_DEFAULT 256 /* 32 * 8 = default max I/O 32 pages */
48#define IBMVSCSI_MAX_CMDS_PER_LUN 64 50#define IBMVSCSI_MAX_CMDS_PER_LUN 64
49 51
50/* ------------------------------------------------------------ 52/* ------------------------------------------------------------
diff --git a/drivers/telephony/ixj.c b/drivers/telephony/ixj.c
index 49cd9793404f..ec7aeb502d15 100644
--- a/drivers/telephony/ixj.c
+++ b/drivers/telephony/ixj.c
@@ -6095,15 +6095,15 @@ static int capabilities_check(IXJ *j, struct phone_capability *pcreq)
6095 return retval; 6095 return retval;
6096} 6096}
6097 6097
6098static int ixj_ioctl(struct inode *inode, struct file *file_p, unsigned int cmd, unsigned long arg) 6098static long do_ixj_ioctl(struct file *file_p, unsigned int cmd, unsigned long arg)
6099{ 6099{
6100 IXJ_TONE ti; 6100 IXJ_TONE ti;
6101 IXJ_FILTER jf; 6101 IXJ_FILTER jf;
6102 IXJ_FILTER_RAW jfr; 6102 IXJ_FILTER_RAW jfr;
6103 void __user *argp = (void __user *)arg; 6103 void __user *argp = (void __user *)arg;
6104 6104 struct inode *inode = file_p->f_path.dentry->d_inode;
6105 unsigned int raise, mant;
6106 unsigned int minor = iminor(inode); 6105 unsigned int minor = iminor(inode);
6106 unsigned int raise, mant;
6107 int board = NUM(inode); 6107 int board = NUM(inode);
6108 6108
6109 IXJ *j = get_ixj(NUM(inode)); 6109 IXJ *j = get_ixj(NUM(inode));
@@ -6661,6 +6661,15 @@ static int ixj_ioctl(struct inode *inode, struct file *file_p, unsigned int cmd,
6661 return retval; 6661 return retval;
6662} 6662}
6663 6663
6664static long ixj_ioctl(struct file *file_p, unsigned int cmd, unsigned long arg)
6665{
6666 long ret;
6667 lock_kernel();
6668 ret = do_ixj_ioctl(file_p, cmd, arg);
6669 unlock_kernel();
6670 return ret;
6671}
6672
6664static int ixj_fasync(int fd, struct file *file_p, int mode) 6673static int ixj_fasync(int fd, struct file *file_p, int mode)
6665{ 6674{
6666 IXJ *j = get_ixj(NUM(file_p->f_path.dentry->d_inode)); 6675 IXJ *j = get_ixj(NUM(file_p->f_path.dentry->d_inode));
@@ -6674,7 +6683,7 @@ static const struct file_operations ixj_fops =
6674 .read = ixj_enhanced_read, 6683 .read = ixj_enhanced_read,
6675 .write = ixj_enhanced_write, 6684 .write = ixj_enhanced_write,
6676 .poll = ixj_poll, 6685 .poll = ixj_poll,
6677 .ioctl = ixj_ioctl, 6686 .unlocked_ioctl = ixj_ioctl,
6678 .release = ixj_release, 6687 .release = ixj_release,
6679 .fasync = ixj_fasync 6688 .fasync = ixj_fasync
6680}; 6689};
diff --git a/drivers/usb/gadget/at91_udc.h b/drivers/usb/gadget/at91_udc.h
index a973f2a50fb9..c65d62295890 100644
--- a/drivers/usb/gadget/at91_udc.h
+++ b/drivers/usb/gadget/at91_udc.h
@@ -171,7 +171,7 @@ struct at91_request {
171#endif 171#endif
172 172
173#define ERR(stuff...) pr_err("udc: " stuff) 173#define ERR(stuff...) pr_err("udc: " stuff)
174#define WARN(stuff...) pr_warning("udc: " stuff) 174#define WARNING(stuff...) pr_warning("udc: " stuff)
175#define INFO(stuff...) pr_info("udc: " stuff) 175#define INFO(stuff...) pr_info("udc: " stuff)
176#define DBG(stuff...) pr_debug("udc: " stuff) 176#define DBG(stuff...) pr_debug("udc: " stuff)
177 177
diff --git a/drivers/usb/gadget/cdc2.c b/drivers/usb/gadget/cdc2.c
index d490d0289507..a39a4b940c33 100644
--- a/drivers/usb/gadget/cdc2.c
+++ b/drivers/usb/gadget/cdc2.c
@@ -170,7 +170,7 @@ static int __init cdc_bind(struct usb_composite_dev *cdev)
170 * but if the controller isn't recognized at all then 170 * but if the controller isn't recognized at all then
171 * that assumption is a bit more likely to be wrong. 171 * that assumption is a bit more likely to be wrong.
172 */ 172 */
173 WARN(cdev, "controller '%s' not recognized; trying %s\n", 173 WARNING(cdev, "controller '%s' not recognized; trying %s\n",
174 gadget->name, 174 gadget->name,
175 cdc_config_driver.label); 175 cdc_config_driver.label);
176 device_desc.bcdDevice = 176 device_desc.bcdDevice =
diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c
index d7aaaa29b1e1..bcac2e68660d 100644
--- a/drivers/usb/gadget/ether.c
+++ b/drivers/usb/gadget/ether.c
@@ -293,7 +293,7 @@ static int __init eth_bind(struct usb_composite_dev *cdev)
293 * but if the controller isn't recognized at all then 293 * but if the controller isn't recognized at all then
294 * that assumption is a bit more likely to be wrong. 294 * that assumption is a bit more likely to be wrong.
295 */ 295 */
296 WARN(cdev, "controller '%s' not recognized; trying %s\n", 296 WARNING(cdev, "controller '%s' not recognized; trying %s\n",
297 gadget->name, 297 gadget->name,
298 eth_config_driver.label); 298 eth_config_driver.label);
299 device_desc.bcdDevice = 299 device_desc.bcdDevice =
diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c
index 15c24edbb61a..ea2c31d18080 100644
--- a/drivers/usb/gadget/file_storage.c
+++ b/drivers/usb/gadget/file_storage.c
@@ -308,7 +308,7 @@ MODULE_LICENSE("Dual BSD/GPL");
308 dev_vdbg(&(d)->gadget->dev , fmt , ## args) 308 dev_vdbg(&(d)->gadget->dev , fmt , ## args)
309#define ERROR(d, fmt, args...) \ 309#define ERROR(d, fmt, args...) \
310 dev_err(&(d)->gadget->dev , fmt , ## args) 310 dev_err(&(d)->gadget->dev , fmt , ## args)
311#define WARN(d, fmt, args...) \ 311#define WARNING(d, fmt, args...) \
312 dev_warn(&(d)->gadget->dev , fmt , ## args) 312 dev_warn(&(d)->gadget->dev , fmt , ## args)
313#define INFO(d, fmt, args...) \ 313#define INFO(d, fmt, args...) \
314 dev_info(&(d)->gadget->dev , fmt , ## args) 314 dev_info(&(d)->gadget->dev , fmt , ## args)
@@ -1091,7 +1091,7 @@ static int ep0_queue(struct fsg_dev *fsg)
1091 if (rc != 0 && rc != -ESHUTDOWN) { 1091 if (rc != 0 && rc != -ESHUTDOWN) {
1092 1092
1093 /* We can't do much more than wait for a reset */ 1093 /* We can't do much more than wait for a reset */
1094 WARN(fsg, "error in submission: %s --> %d\n", 1094 WARNING(fsg, "error in submission: %s --> %d\n",
1095 fsg->ep0->name, rc); 1095 fsg->ep0->name, rc);
1096 } 1096 }
1097 return rc; 1097 return rc;
@@ -1227,7 +1227,7 @@ static void received_cbi_adsc(struct fsg_dev *fsg, struct fsg_buffhd *bh)
1227 1227
1228 /* Save the command for later */ 1228 /* Save the command for later */
1229 if (fsg->cbbuf_cmnd_size) 1229 if (fsg->cbbuf_cmnd_size)
1230 WARN(fsg, "CB[I] overwriting previous command\n"); 1230 WARNING(fsg, "CB[I] overwriting previous command\n");
1231 fsg->cbbuf_cmnd_size = req->actual; 1231 fsg->cbbuf_cmnd_size = req->actual;
1232 memcpy(fsg->cbbuf_cmnd, req->buf, fsg->cbbuf_cmnd_size); 1232 memcpy(fsg->cbbuf_cmnd, req->buf, fsg->cbbuf_cmnd_size);
1233 1233
@@ -1506,7 +1506,7 @@ static void start_transfer(struct fsg_dev *fsg, struct usb_ep *ep,
1506 * submissions if DMA is enabled. */ 1506 * submissions if DMA is enabled. */
1507 if (rc != -ESHUTDOWN && !(rc == -EOPNOTSUPP && 1507 if (rc != -ESHUTDOWN && !(rc == -EOPNOTSUPP &&
1508 req->length == 0)) 1508 req->length == 0))
1509 WARN(fsg, "error in submission: %s --> %d\n", 1509 WARNING(fsg, "error in submission: %s --> %d\n",
1510 ep->name, rc); 1510 ep->name, rc);
1511 } 1511 }
1512} 1512}
@@ -2294,7 +2294,7 @@ static int halt_bulk_in_endpoint(struct fsg_dev *fsg)
2294 VDBG(fsg, "delayed bulk-in endpoint halt\n"); 2294 VDBG(fsg, "delayed bulk-in endpoint halt\n");
2295 while (rc != 0) { 2295 while (rc != 0) {
2296 if (rc != -EAGAIN) { 2296 if (rc != -EAGAIN) {
2297 WARN(fsg, "usb_ep_set_halt -> %d\n", rc); 2297 WARNING(fsg, "usb_ep_set_halt -> %d\n", rc);
2298 rc = 0; 2298 rc = 0;
2299 break; 2299 break;
2300 } 2300 }
@@ -2317,7 +2317,7 @@ static int wedge_bulk_in_endpoint(struct fsg_dev *fsg)
2317 VDBG(fsg, "delayed bulk-in endpoint wedge\n"); 2317 VDBG(fsg, "delayed bulk-in endpoint wedge\n");
2318 while (rc != 0) { 2318 while (rc != 0) {
2319 if (rc != -EAGAIN) { 2319 if (rc != -EAGAIN) {
2320 WARN(fsg, "usb_ep_set_wedge -> %d\n", rc); 2320 WARNING(fsg, "usb_ep_set_wedge -> %d\n", rc);
2321 rc = 0; 2321 rc = 0;
2322 break; 2322 break;
2323 } 2323 }
@@ -3755,7 +3755,7 @@ static int __init check_parameters(struct fsg_dev *fsg)
3755 if (gcnum >= 0) 3755 if (gcnum >= 0)
3756 mod_data.release = 0x0300 + gcnum; 3756 mod_data.release = 0x0300 + gcnum;
3757 else { 3757 else {
3758 WARN(fsg, "controller '%s' not recognized\n", 3758 WARNING(fsg, "controller '%s' not recognized\n",
3759 fsg->gadget->name); 3759 fsg->gadget->name);
3760 mod_data.release = 0x0399; 3760 mod_data.release = 0x0399;
3761 } 3761 }
diff --git a/drivers/usb/gadget/fsl_usb2_udc.c b/drivers/usb/gadget/fsl_usb2_udc.c
index 1695382f30fe..1cfccf102a2d 100644
--- a/drivers/usb/gadget/fsl_usb2_udc.c
+++ b/drivers/usb/gadget/fsl_usb2_udc.c
@@ -1538,7 +1538,7 @@ static void dtd_complete_irq(struct fsl_udc *udc)
1538 1538
1539 /* If the ep is configured */ 1539 /* If the ep is configured */
1540 if (curr_ep->name == NULL) { 1540 if (curr_ep->name == NULL) {
1541 WARN("Invalid EP?"); 1541 WARNING("Invalid EP?");
1542 continue; 1542 continue;
1543 } 1543 }
1544 1544
diff --git a/drivers/usb/gadget/fsl_usb2_udc.h b/drivers/usb/gadget/fsl_usb2_udc.h
index 98b1483ef6a5..6131752a38bc 100644
--- a/drivers/usb/gadget/fsl_usb2_udc.h
+++ b/drivers/usb/gadget/fsl_usb2_udc.h
@@ -552,7 +552,7 @@ static void dump_msg(const char *label, const u8 * buf, unsigned int length)
552#endif 552#endif
553 553
554#define ERR(stuff...) pr_err("udc: " stuff) 554#define ERR(stuff...) pr_err("udc: " stuff)
555#define WARN(stuff...) pr_warning("udc: " stuff) 555#define WARNING(stuff...) pr_warning("udc: " stuff)
556#define INFO(stuff...) pr_info("udc: " stuff) 556#define INFO(stuff...) pr_info("udc: " stuff)
557 557
558/*-------------------------------------------------------------------------*/ 558/*-------------------------------------------------------------------------*/
diff --git a/drivers/usb/gadget/gmidi.c b/drivers/usb/gadget/gmidi.c
index 7f4d4828e3aa..ea8651e3da1a 100644
--- a/drivers/usb/gadget/gmidi.c
+++ b/drivers/usb/gadget/gmidi.c
@@ -138,8 +138,6 @@ static void gmidi_transmit(struct gmidi_device* dev, struct usb_request* req);
138 dev_vdbg(&(d)->gadget->dev , fmt , ## args) 138 dev_vdbg(&(d)->gadget->dev , fmt , ## args)
139#define ERROR(d, fmt, args...) \ 139#define ERROR(d, fmt, args...) \
140 dev_err(&(d)->gadget->dev , fmt , ## args) 140 dev_err(&(d)->gadget->dev , fmt , ## args)
141#define WARN(d, fmt, args...) \
142 dev_warn(&(d)->gadget->dev , fmt , ## args)
143#define INFO(d, fmt, args...) \ 141#define INFO(d, fmt, args...) \
144 dev_info(&(d)->gadget->dev , fmt , ## args) 142 dev_info(&(d)->gadget->dev , fmt , ## args)
145 143
diff --git a/drivers/usb/gadget/goku_udc.c b/drivers/usb/gadget/goku_udc.c
index 48f1c63b7013..60aa04847b18 100644
--- a/drivers/usb/gadget/goku_udc.c
+++ b/drivers/usb/gadget/goku_udc.c
@@ -1768,7 +1768,7 @@ static int goku_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1768 * usb_gadget_driver_{register,unregister}() must change. 1768 * usb_gadget_driver_{register,unregister}() must change.
1769 */ 1769 */
1770 if (the_controller) { 1770 if (the_controller) {
1771 WARN(dev, "ignoring %s\n", pci_name(pdev)); 1771 WARNING(dev, "ignoring %s\n", pci_name(pdev));
1772 return -EBUSY; 1772 return -EBUSY;
1773 } 1773 }
1774 if (!pdev->irq) { 1774 if (!pdev->irq) {
diff --git a/drivers/usb/gadget/goku_udc.h b/drivers/usb/gadget/goku_udc.h
index bc4eb1e0b507..566cb2319056 100644
--- a/drivers/usb/gadget/goku_udc.h
+++ b/drivers/usb/gadget/goku_udc.h
@@ -285,7 +285,7 @@ struct goku_udc {
285 285
286#define ERROR(dev,fmt,args...) \ 286#define ERROR(dev,fmt,args...) \
287 xprintk(dev , KERN_ERR , fmt , ## args) 287 xprintk(dev , KERN_ERR , fmt , ## args)
288#define WARN(dev,fmt,args...) \ 288#define WARNING(dev,fmt,args...) \
289 xprintk(dev , KERN_WARNING , fmt , ## args) 289 xprintk(dev , KERN_WARNING , fmt , ## args)
290#define INFO(dev,fmt,args...) \ 290#define INFO(dev,fmt,args...) \
291 xprintk(dev , KERN_INFO , fmt , ## args) 291 xprintk(dev , KERN_INFO , fmt , ## args)
diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
index 04692d59fc1c..f4585d3e90d7 100644
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -262,8 +262,6 @@ static const char *CHIP;
262 262
263#define ERROR(dev,fmt,args...) \ 263#define ERROR(dev,fmt,args...) \
264 xprintk(dev , KERN_ERR , fmt , ## args) 264 xprintk(dev , KERN_ERR , fmt , ## args)
265#define WARN(dev,fmt,args...) \
266 xprintk(dev , KERN_WARNING , fmt , ## args)
267#define INFO(dev,fmt,args...) \ 265#define INFO(dev,fmt,args...) \
268 xprintk(dev , KERN_INFO , fmt , ## args) 266 xprintk(dev , KERN_INFO , fmt , ## args)
269 267
diff --git a/drivers/usb/gadget/net2280.c b/drivers/usb/gadget/net2280.c
index b67ab677af72..5cfb5ebf3881 100644
--- a/drivers/usb/gadget/net2280.c
+++ b/drivers/usb/gadget/net2280.c
@@ -1007,7 +1007,7 @@ static void scan_dma_completions (struct net2280_ep *ep)
1007 * 0122, and 0124; not all cases trigger the warning. 1007 * 0122, and 0124; not all cases trigger the warning.
1008 */ 1008 */
1009 if ((tmp & (1 << NAK_OUT_PACKETS)) == 0) { 1009 if ((tmp & (1 << NAK_OUT_PACKETS)) == 0) {
1010 WARN (ep->dev, "%s lost packet sync!\n", 1010 WARNING (ep->dev, "%s lost packet sync!\n",
1011 ep->ep.name); 1011 ep->ep.name);
1012 req->req.status = -EOVERFLOW; 1012 req->req.status = -EOVERFLOW;
1013 } else if ((tmp = readl (&ep->regs->ep_avail)) != 0) { 1013 } else if ((tmp = readl (&ep->regs->ep_avail)) != 0) {
diff --git a/drivers/usb/gadget/net2280.h b/drivers/usb/gadget/net2280.h
index 1f2af398a9a4..81a71dbdc2c6 100644
--- a/drivers/usb/gadget/net2280.h
+++ b/drivers/usb/gadget/net2280.h
@@ -272,7 +272,7 @@ static inline void net2280_led_shutdown (struct net2280 *dev)
272 272
273#define ERROR(dev,fmt,args...) \ 273#define ERROR(dev,fmt,args...) \
274 xprintk(dev , KERN_ERR , fmt , ## args) 274 xprintk(dev , KERN_ERR , fmt , ## args)
275#define WARN(dev,fmt,args...) \ 275#define WARNING(dev,fmt,args...) \
276 xprintk(dev , KERN_WARNING , fmt , ## args) 276 xprintk(dev , KERN_WARNING , fmt , ## args)
277#define INFO(dev,fmt,args...) \ 277#define INFO(dev,fmt,args...) \
278 xprintk(dev , KERN_INFO , fmt , ## args) 278 xprintk(dev , KERN_INFO , fmt , ## args)
diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c
index 4b79a8509e84..395bd1844482 100644
--- a/drivers/usb/gadget/omap_udc.c
+++ b/drivers/usb/gadget/omap_udc.c
@@ -1120,7 +1120,7 @@ static int omap_ep_set_halt(struct usb_ep *_ep, int value)
1120 status = -EINVAL; 1120 status = -EINVAL;
1121 else if (value) { 1121 else if (value) {
1122 if (ep->udc->ep0_set_config) { 1122 if (ep->udc->ep0_set_config) {
1123 WARN("error changing config?\n"); 1123 WARNING("error changing config?\n");
1124 omap_writew(UDC_CLR_CFG, UDC_SYSCON2); 1124 omap_writew(UDC_CLR_CFG, UDC_SYSCON2);
1125 } 1125 }
1126 omap_writew(UDC_STALL_CMD, UDC_SYSCON2); 1126 omap_writew(UDC_STALL_CMD, UDC_SYSCON2);
@@ -1764,7 +1764,7 @@ do_stall:
1764 u.r.bRequestType, u.r.bRequest, status); 1764 u.r.bRequestType, u.r.bRequest, status);
1765 if (udc->ep0_set_config) { 1765 if (udc->ep0_set_config) {
1766 if (udc->ep0_reset_config) 1766 if (udc->ep0_reset_config)
1767 WARN("error resetting config?\n"); 1767 WARNING("error resetting config?\n");
1768 else 1768 else
1769 omap_writew(UDC_CLR_CFG, UDC_SYSCON2); 1769 omap_writew(UDC_CLR_CFG, UDC_SYSCON2);
1770 } 1770 }
@@ -3076,7 +3076,7 @@ static int omap_udc_suspend(struct platform_device *dev, pm_message_t message)
3076 * which would prevent entry to deep sleep... 3076 * which would prevent entry to deep sleep...
3077 */ 3077 */
3078 if ((devstat & UDC_ATT) != 0 && (devstat & UDC_SUS) == 0) { 3078 if ((devstat & UDC_ATT) != 0 && (devstat & UDC_SUS) == 0) {
3079 WARN("session active; suspend requires disconnect\n"); 3079 WARNING("session active; suspend requires disconnect\n");
3080 omap_pullup(&udc->gadget, 0); 3080 omap_pullup(&udc->gadget, 0);
3081 } 3081 }
3082 3082
diff --git a/drivers/usb/gadget/omap_udc.h b/drivers/usb/gadget/omap_udc.h
index 8522bbb12278..29edc51b6b22 100644
--- a/drivers/usb/gadget/omap_udc.h
+++ b/drivers/usb/gadget/omap_udc.h
@@ -188,7 +188,7 @@ struct omap_udc {
188#endif 188#endif
189 189
190#define ERR(stuff...) pr_err("udc: " stuff) 190#define ERR(stuff...) pr_err("udc: " stuff)
191#define WARN(stuff...) pr_warning("udc: " stuff) 191#define WARNING(stuff...) pr_warning("udc: " stuff)
192#define INFO(stuff...) pr_info("udc: " stuff) 192#define INFO(stuff...) pr_info("udc: " stuff)
193#define DBG(stuff...) pr_debug("udc: " stuff) 193#define DBG(stuff...) pr_debug("udc: " stuff)
194 194
diff --git a/drivers/usb/gadget/printer.c b/drivers/usb/gadget/printer.c
index 49cd9e145a9b..e0090085b78e 100644
--- a/drivers/usb/gadget/printer.c
+++ b/drivers/usb/gadget/printer.c
@@ -179,7 +179,7 @@ module_param(qlen, uint, S_IRUGO|S_IWUSR);
179 179
180#define ERROR(dev, fmt, args...) \ 180#define ERROR(dev, fmt, args...) \
181 xprintk(dev, KERN_ERR, fmt, ## args) 181 xprintk(dev, KERN_ERR, fmt, ## args)
182#define WARN(dev, fmt, args...) \ 182#define WARNING(dev, fmt, args...) \
183 xprintk(dev, KERN_WARNING, fmt, ## args) 183 xprintk(dev, KERN_WARNING, fmt, ## args)
184#define INFO(dev, fmt, args...) \ 184#define INFO(dev, fmt, args...) \
185 xprintk(dev, KERN_INFO, fmt, ## args) 185 xprintk(dev, KERN_INFO, fmt, ## args)
diff --git a/drivers/usb/gadget/pxa25x_udc.c b/drivers/usb/gadget/pxa25x_udc.c
index 8fb0066609bb..7e6725d89976 100644
--- a/drivers/usb/gadget/pxa25x_udc.c
+++ b/drivers/usb/gadget/pxa25x_udc.c
@@ -342,7 +342,7 @@ pxa25x_ep_free_request (struct usb_ep *_ep, struct usb_request *_req)
342 struct pxa25x_request *req; 342 struct pxa25x_request *req;
343 343
344 req = container_of (_req, struct pxa25x_request, req); 344 req = container_of (_req, struct pxa25x_request, req);
345 WARN_ON (!list_empty (&req->queue)); 345 WARN_ON(!list_empty (&req->queue));
346 kfree(req); 346 kfree(req);
347} 347}
348 348
@@ -1556,7 +1556,7 @@ config_change:
1556 * tell us about config change events, 1556 * tell us about config change events,
1557 * so later ones may fail... 1557 * so later ones may fail...
1558 */ 1558 */
1559 WARN("config change %02x fail %d?\n", 1559 WARNING("config change %02x fail %d?\n",
1560 u.r.bRequest, i); 1560 u.r.bRequest, i);
1561 return; 1561 return;
1562 /* TODO experiment: if has_cfr, 1562 /* TODO experiment: if has_cfr,
@@ -2330,7 +2330,7 @@ static int pxa25x_udc_suspend(struct platform_device *dev, pm_message_t state)
2330 unsigned long flags; 2330 unsigned long flags;
2331 2331
2332 if (!udc->mach->gpio_pullup && !udc->mach->udc_command) 2332 if (!udc->mach->gpio_pullup && !udc->mach->udc_command)
2333 WARN("USB host won't detect disconnect!\n"); 2333 WARNING("USB host won't detect disconnect!\n");
2334 udc->suspended = 1; 2334 udc->suspended = 1;
2335 2335
2336 local_irq_save(flags); 2336 local_irq_save(flags);
diff --git a/drivers/usb/gadget/pxa25x_udc.h b/drivers/usb/gadget/pxa25x_udc.h
index 4d11ece7c95f..c8a13215e02c 100644
--- a/drivers/usb/gadget/pxa25x_udc.h
+++ b/drivers/usb/gadget/pxa25x_udc.h
@@ -259,7 +259,7 @@ dump_state(struct pxa25x_udc *dev)
259#define DBG(lvl, stuff...) do{if ((lvl) <= UDC_DEBUG) DMSG(stuff);}while(0) 259#define DBG(lvl, stuff...) do{if ((lvl) <= UDC_DEBUG) DMSG(stuff);}while(0)
260 260
261#define ERR(stuff...) pr_err("udc: " stuff) 261#define ERR(stuff...) pr_err("udc: " stuff)
262#define WARN(stuff...) pr_warning("udc: " stuff) 262#define WARNING(stuff...) pr_warning("udc: " stuff)
263#define INFO(stuff...) pr_info("udc: " stuff) 263#define INFO(stuff...) pr_info("udc: " stuff)
264 264
265 265
diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c
index 5458f43a8668..3791e6271903 100644
--- a/drivers/usb/gadget/u_ether.c
+++ b/drivers/usb/gadget/u_ether.c
@@ -116,7 +116,6 @@ static inline int qlen(struct usb_gadget *gadget)
116#undef DBG 116#undef DBG
117#undef VDBG 117#undef VDBG
118#undef ERROR 118#undef ERROR
119#undef WARN
120#undef INFO 119#undef INFO
121 120
122#define xprintk(d, level, fmt, args...) \ 121#define xprintk(d, level, fmt, args...) \
@@ -140,8 +139,6 @@ static inline int qlen(struct usb_gadget *gadget)
140 139
141#define ERROR(dev, fmt, args...) \ 140#define ERROR(dev, fmt, args...) \
142 xprintk(dev , KERN_ERR , fmt , ## args) 141 xprintk(dev , KERN_ERR , fmt , ## args)
143#define WARN(dev, fmt, args...) \
144 xprintk(dev , KERN_WARNING , fmt , ## args)
145#define INFO(dev, fmt, args...) \ 142#define INFO(dev, fmt, args...) \
146 xprintk(dev , KERN_INFO , fmt , ## args) 143 xprintk(dev , KERN_INFO , fmt , ## args)
147 144
diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c
index 31178e10cbbe..ce1ca0ba0515 100644
--- a/drivers/usb/host/isp116x-hcd.c
+++ b/drivers/usb/host/isp116x-hcd.c
@@ -882,7 +882,7 @@ static void isp116x_endpoint_disable(struct usb_hcd *hcd,
882 for (i = 0; i < 100 && !list_empty(&hep->urb_list); i++) 882 for (i = 0; i < 100 && !list_empty(&hep->urb_list); i++)
883 msleep(3); 883 msleep(3);
884 if (!list_empty(&hep->urb_list)) 884 if (!list_empty(&hep->urb_list))
885 WARN("ep %p not empty?\n", ep); 885 WARNING("ep %p not empty?\n", ep);
886 886
887 kfree(ep); 887 kfree(ep);
888 hep->hcpriv = NULL; 888 hep->hcpriv = NULL;
diff --git a/drivers/usb/host/isp116x.h b/drivers/usb/host/isp116x.h
index 595b90a99848..aa211bafcff9 100644
--- a/drivers/usb/host/isp116x.h
+++ b/drivers/usb/host/isp116x.h
@@ -338,7 +338,7 @@ struct isp116x_ep {
338#endif 338#endif
339 339
340#define ERR(stuff...) printk(KERN_ERR "116x: " stuff) 340#define ERR(stuff...) printk(KERN_ERR "116x: " stuff)
341#define WARN(stuff...) printk(KERN_WARNING "116x: " stuff) 341#define WARNING(stuff...) printk(KERN_WARNING "116x: " stuff)
342#define INFO(stuff...) printk(KERN_INFO "116x: " stuff) 342#define INFO(stuff...) printk(KERN_INFO "116x: " stuff)
343 343
344/* ------------------------------------------------- */ 344/* ------------------------------------------------- */
diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c
index 340d72da554a..8a74bbb57d08 100644
--- a/drivers/usb/host/sl811-hcd.c
+++ b/drivers/usb/host/sl811-hcd.c
@@ -1026,7 +1026,7 @@ sl811h_endpoint_disable(struct usb_hcd *hcd, struct usb_host_endpoint *hep)
1026 if (!list_empty(&hep->urb_list)) 1026 if (!list_empty(&hep->urb_list))
1027 msleep(3); 1027 msleep(3);
1028 if (!list_empty(&hep->urb_list)) 1028 if (!list_empty(&hep->urb_list))
1029 WARN("ep %p not empty?\n", ep); 1029 WARNING("ep %p not empty?\n", ep);
1030 1030
1031 kfree(ep); 1031 kfree(ep);
1032 hep->hcpriv = NULL; 1032 hep->hcpriv = NULL;
diff --git a/drivers/usb/host/sl811.h b/drivers/usb/host/sl811.h
index 7690d98e42a7..b6b8c1f233dd 100644
--- a/drivers/usb/host/sl811.h
+++ b/drivers/usb/host/sl811.h
@@ -261,6 +261,6 @@ sl811_read_buf(struct sl811 *sl811, int addr, void *buf, size_t count)
261#endif 261#endif
262 262
263#define ERR(stuff...) printk(KERN_ERR "sl811: " stuff) 263#define ERR(stuff...) printk(KERN_ERR "sl811: " stuff)
264#define WARN(stuff...) printk(KERN_WARNING "sl811: " stuff) 264#define WARNING(stuff...) printk(KERN_WARNING "sl811: " stuff)
265#define INFO(stuff...) printk(KERN_INFO "sl811: " stuff) 265#define INFO(stuff...) printk(KERN_INFO "sl811: " stuff)
266 266
diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c
index 054dedd28127..b358c4e1cf21 100644
--- a/drivers/usb/misc/usbtest.c
+++ b/drivers/usb/misc/usbtest.c
@@ -81,7 +81,7 @@ static struct usb_device *testdev_to_usbdev (struct usbtest_dev *test)
81 81
82#define ERROR(tdev, fmt, args...) \ 82#define ERROR(tdev, fmt, args...) \
83 dev_err(&(tdev)->intf->dev , fmt , ## args) 83 dev_err(&(tdev)->intf->dev , fmt , ## args)
84#define WARN(tdev, fmt, args...) \ 84#define WARNING(tdev, fmt, args...) \
85 dev_warn(&(tdev)->intf->dev , fmt , ## args) 85 dev_warn(&(tdev)->intf->dev , fmt , ## args)
86 86
87/*-------------------------------------------------------------------------*/ 87/*-------------------------------------------------------------------------*/
@@ -1946,7 +1946,7 @@ usbtest_probe (struct usb_interface *intf, const struct usb_device_id *id)
1946 1946
1947 status = get_endpoints (dev, intf); 1947 status = get_endpoints (dev, intf);
1948 if (status < 0) { 1948 if (status < 0) {
1949 WARN(dev, "couldn't get endpoints, %d\n", 1949 WARNING(dev, "couldn't get endpoints, %d\n",
1950 status); 1950 status);
1951 return status; 1951 return status;
1952 } 1952 }
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 7084e7e146c0..5b78fd0aff0a 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -71,13 +71,6 @@ static int virtio_uevent(struct device *_dv, struct kobj_uevent_env *env)
71 dev->id.device, dev->id.vendor); 71 dev->id.device, dev->id.vendor);
72} 72}
73 73
74static struct bus_type virtio_bus = {
75 .name = "virtio",
76 .match = virtio_dev_match,
77 .dev_attrs = virtio_dev_attrs,
78 .uevent = virtio_uevent,
79};
80
81static void add_status(struct virtio_device *dev, unsigned status) 74static void add_status(struct virtio_device *dev, unsigned status)
82{ 75{
83 dev->config->set_status(dev, dev->config->get_status(dev) | status); 76 dev->config->set_status(dev, dev->config->get_status(dev) | status);
@@ -120,12 +113,16 @@ static int virtio_dev_probe(struct device *_d)
120 set_bit(f, dev->features); 113 set_bit(f, dev->features);
121 } 114 }
122 115
116 /* Transport features always preserved to pass to finalize_features. */
117 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++)
118 if (device_features & (1 << i))
119 set_bit(i, dev->features);
120
123 err = drv->probe(dev); 121 err = drv->probe(dev);
124 if (err) 122 if (err)
125 add_status(dev, VIRTIO_CONFIG_S_FAILED); 123 add_status(dev, VIRTIO_CONFIG_S_FAILED);
126 else { 124 else {
127 /* They should never have set feature bits beyond 32 */ 125 dev->config->finalize_features(dev);
128 dev->config->set_features(dev, dev->features[0]);
129 add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); 126 add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
130 } 127 }
131 return err; 128 return err;
@@ -147,13 +144,20 @@ static int virtio_dev_remove(struct device *_d)
147 return 0; 144 return 0;
148} 145}
149 146
147static struct bus_type virtio_bus = {
148 .name = "virtio",
149 .match = virtio_dev_match,
150 .dev_attrs = virtio_dev_attrs,
151 .uevent = virtio_uevent,
152 .probe = virtio_dev_probe,
153 .remove = virtio_dev_remove,
154};
155
150int register_virtio_driver(struct virtio_driver *driver) 156int register_virtio_driver(struct virtio_driver *driver)
151{ 157{
152 /* Catch this early. */ 158 /* Catch this early. */
153 BUG_ON(driver->feature_table_size && !driver->feature_table); 159 BUG_ON(driver->feature_table_size && !driver->feature_table);
154 driver->driver.bus = &virtio_bus; 160 driver->driver.bus = &virtio_bus;
155 driver->driver.probe = virtio_dev_probe;
156 driver->driver.remove = virtio_dev_remove;
157 return driver_register(&driver->driver); 161 return driver_register(&driver->driver);
158} 162}
159EXPORT_SYMBOL_GPL(register_virtio_driver); 163EXPORT_SYMBOL_GPL(register_virtio_driver);
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index eae7236310e4..c7dc37c7cce9 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -94,12 +94,17 @@ static u32 vp_get_features(struct virtio_device *vdev)
94 return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES); 94 return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
95} 95}
96 96
97/* virtio config->set_features() implementation */ 97/* virtio config->finalize_features() implementation */
98static void vp_set_features(struct virtio_device *vdev, u32 features) 98static void vp_finalize_features(struct virtio_device *vdev)
99{ 99{
100 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 100 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
101 101
102 iowrite32(features, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES); 102 /* Give virtio_ring a chance to accept features. */
103 vring_transport_features(vdev);
104
105 /* We only support 32 feature bits. */
106 BUILD_BUG_ON(ARRAY_SIZE(vdev->features) != 1);
107 iowrite32(vdev->features[0], vp_dev->ioaddr+VIRTIO_PCI_GUEST_FEATURES);
103} 108}
104 109
105/* virtio config->get() implementation */ 110/* virtio config->get() implementation */
@@ -297,7 +302,7 @@ static struct virtio_config_ops virtio_pci_config_ops = {
297 .find_vq = vp_find_vq, 302 .find_vq = vp_find_vq,
298 .del_vq = vp_del_vq, 303 .del_vq = vp_del_vq,
299 .get_features = vp_get_features, 304 .get_features = vp_get_features,
300 .set_features = vp_set_features, 305 .finalize_features = vp_finalize_features,
301}; 306};
302 307
303/* the PCI probing function */ 308/* the PCI probing function */
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 72bf8bc09014..6eb5303fed11 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -18,6 +18,7 @@
18 */ 18 */
19#include <linux/virtio.h> 19#include <linux/virtio.h>
20#include <linux/virtio_ring.h> 20#include <linux/virtio_ring.h>
21#include <linux/virtio_config.h>
21#include <linux/device.h> 22#include <linux/device.h>
22 23
23#ifdef DEBUG 24#ifdef DEBUG
@@ -87,8 +88,11 @@ static int vring_add_buf(struct virtqueue *_vq,
87 if (vq->num_free < out + in) { 88 if (vq->num_free < out + in) {
88 pr_debug("Can't add buf len %i - avail = %i\n", 89 pr_debug("Can't add buf len %i - avail = %i\n",
89 out + in, vq->num_free); 90 out + in, vq->num_free);
90 /* We notify *even if* VRING_USED_F_NO_NOTIFY is set here. */ 91 /* FIXME: for historical reasons, we force a notify here if
91 vq->notify(&vq->vq); 92 * there are outgoing parts to the buffer. Presumably the
93 * host should service the ring ASAP. */
94 if (out)
95 vq->notify(&vq->vq);
92 END_USE(vq); 96 END_USE(vq);
93 return -ENOSPC; 97 return -ENOSPC;
94 } 98 }
@@ -320,4 +324,19 @@ void vring_del_virtqueue(struct virtqueue *vq)
320} 324}
321EXPORT_SYMBOL_GPL(vring_del_virtqueue); 325EXPORT_SYMBOL_GPL(vring_del_virtqueue);
322 326
327/* Manipulates transport-specific feature bits. */
328void vring_transport_features(struct virtio_device *vdev)
329{
330 unsigned int i;
331
332 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
333 switch (i) {
334 default:
335 /* We don't understand this bit. */
336 clear_bit(i, vdev->features);
337 }
338 }
339}
340EXPORT_SYMBOL_GPL(vring_transport_features);
341
323MODULE_LICENSE("GPL"); 342MODULE_LICENSE("GPL");
diff --git a/fs/Kconfig b/fs/Kconfig
index 37db79a2ff95..97e3bdedb1e6 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -902,65 +902,7 @@ endif # BLOCK
902 902
903menu "Pseudo filesystems" 903menu "Pseudo filesystems"
904 904
905config PROC_FS 905source "fs/proc/Kconfig"
906 bool "/proc file system support" if EMBEDDED
907 default y
908 help
909 This is a virtual file system providing information about the status
910 of the system. "Virtual" means that it doesn't take up any space on
911 your hard disk: the files are created on the fly by the kernel when
912 you try to access them. Also, you cannot read the files with older
913 version of the program less: you need to use more or cat.
914
915 It's totally cool; for example, "cat /proc/interrupts" gives
916 information about what the different IRQs are used for at the moment
917 (there is a small number of Interrupt ReQuest lines in your computer
918 that are used by the attached devices to gain the CPU's attention --
919 often a source of trouble if two devices are mistakenly configured
920 to use the same IRQ). The program procinfo to display some
921 information about your system gathered from the /proc file system.
922
923 Before you can use the /proc file system, it has to be mounted,
924 meaning it has to be given a location in the directory hierarchy.
925 That location should be /proc. A command such as "mount -t proc proc
926 /proc" or the equivalent line in /etc/fstab does the job.
927
928 The /proc file system is explained in the file
929 <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage
930 ("man 5 proc").
931
932 This option will enlarge your kernel by about 67 KB. Several
933 programs depend on this, so everyone should say Y here.
934
935config PROC_KCORE
936 bool "/proc/kcore support" if !ARM
937 depends on PROC_FS && MMU
938
939config PROC_VMCORE
940 bool "/proc/vmcore support (EXPERIMENTAL)"
941 depends on PROC_FS && CRASH_DUMP
942 default y
943 help
944 Exports the dump image of crashed kernel in ELF format.
945
946config PROC_SYSCTL
947 bool "Sysctl support (/proc/sys)" if EMBEDDED
948 depends on PROC_FS
949 select SYSCTL
950 default y
951 ---help---
952 The sysctl interface provides a means of dynamically changing
953 certain kernel parameters and variables on the fly without requiring
954 a recompile of the kernel or reboot of the system. The primary
955 interface is through /proc/sys. If you say Y here a tree of
956 modifiable sysctl entries will be generated beneath the
957 /proc/sys directory. They are explained in the files
958 in <file:Documentation/sysctl/>. Note that enabling this
959 option will enlarge the kernel by at least 8 KB.
960
961 As it is generally a good thing, you should say Y here unless
962 building a kernel for install/rescue disks or your system is very
963 limited in memory.
964 906
965config SYSFS 907config SYSFS
966 bool "sysfs file system support" if EMBEDDED 908 bool "sysfs file system support" if EMBEDDED
@@ -2093,20 +2035,6 @@ config CODA_FS
2093 To compile the coda client support as a module, choose M here: the 2035 To compile the coda client support as a module, choose M here: the
2094 module will be called coda. 2036 module will be called coda.
2095 2037
2096config CODA_FS_OLD_API
2097 bool "Use 96-bit Coda file identifiers"
2098 depends on CODA_FS
2099 help
2100 A new kernel-userspace API had to be introduced for Coda v6.0
2101 to support larger 128-bit file identifiers as needed by the
2102 new realms implementation.
2103
2104 However this new API is not backward compatible with older
2105 clients. If you really need to run the old Coda userspace
2106 cache manager then say Y.
2107
2108 For most cases you probably want to say N.
2109
2110config AFS_FS 2038config AFS_FS
2111 tristate "Andrew File System support (AFS) (EXPERIMENTAL)" 2039 tristate "Andrew File System support (AFS) (EXPERIMENTAL)"
2112 depends on INET && EXPERIMENTAL 2040 depends on INET && EXPERIMENTAL
diff --git a/fs/aio.c b/fs/aio.c
index 0fb3117ddd93..0051fd94b44e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -586,7 +586,6 @@ static void use_mm(struct mm_struct *mm)
586 struct task_struct *tsk = current; 586 struct task_struct *tsk = current;
587 587
588 task_lock(tsk); 588 task_lock(tsk);
589 tsk->flags |= PF_BORROWED_MM;
590 active_mm = tsk->active_mm; 589 active_mm = tsk->active_mm;
591 atomic_inc(&mm->mm_count); 590 atomic_inc(&mm->mm_count);
592 tsk->mm = mm; 591 tsk->mm = mm;
@@ -610,7 +609,6 @@ static void unuse_mm(struct mm_struct *mm)
610 struct task_struct *tsk = current; 609 struct task_struct *tsk = current;
611 610
612 task_lock(tsk); 611 task_lock(tsk);
613 tsk->flags &= ~PF_BORROWED_MM;
614 tsk->mm = NULL; 612 tsk->mm = NULL;
615 /* active_mm is still 'mm' */ 613 /* active_mm is still 'mm' */
616 enter_lazy_tlb(mm, tsk); 614 enter_lazy_tlb(mm, tsk);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 639d2d8b5710..3b6ff854d983 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -131,6 +131,15 @@ static int padzero(unsigned long elf_bss)
131#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; }) 131#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132#endif 132#endif
133 133
134#ifndef ELF_BASE_PLATFORM
135/*
136 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
137 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
138 * will be copied to the user stack in the same manner as AT_PLATFORM.
139 */
140#define ELF_BASE_PLATFORM NULL
141#endif
142
134static int 143static int
135create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, 144create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
136 unsigned long load_addr, unsigned long interp_load_addr) 145 unsigned long load_addr, unsigned long interp_load_addr)
@@ -142,7 +151,9 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
142 elf_addr_t __user *envp; 151 elf_addr_t __user *envp;
143 elf_addr_t __user *sp; 152 elf_addr_t __user *sp;
144 elf_addr_t __user *u_platform; 153 elf_addr_t __user *u_platform;
154 elf_addr_t __user *u_base_platform;
145 const char *k_platform = ELF_PLATFORM; 155 const char *k_platform = ELF_PLATFORM;
156 const char *k_base_platform = ELF_BASE_PLATFORM;
146 int items; 157 int items;
147 elf_addr_t *elf_info; 158 elf_addr_t *elf_info;
148 int ei_index = 0; 159 int ei_index = 0;
@@ -172,6 +183,19 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
172 return -EFAULT; 183 return -EFAULT;
173 } 184 }
174 185
186 /*
187 * If this architecture has a "base" platform capability
188 * string, copy it to userspace.
189 */
190 u_base_platform = NULL;
191 if (k_base_platform) {
192 size_t len = strlen(k_base_platform) + 1;
193
194 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
195 if (__copy_to_user(u_base_platform, k_base_platform, len))
196 return -EFAULT;
197 }
198
175 /* Create the ELF interpreter info */ 199 /* Create the ELF interpreter info */
176 elf_info = (elf_addr_t *)current->mm->saved_auxv; 200 elf_info = (elf_addr_t *)current->mm->saved_auxv;
177 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */ 201 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
@@ -209,6 +233,10 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
209 NEW_AUX_ENT(AT_PLATFORM, 233 NEW_AUX_ENT(AT_PLATFORM,
210 (elf_addr_t)(unsigned long)u_platform); 234 (elf_addr_t)(unsigned long)u_platform);
211 } 235 }
236 if (k_base_platform) {
237 NEW_AUX_ENT(AT_BASE_PLATFORM,
238 (elf_addr_t)(unsigned long)u_base_platform);
239 }
212 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) { 240 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
213 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data); 241 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
214 } 242 }
@@ -1478,7 +1506,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1478 const struct user_regset_view *view = task_user_regset_view(dump_task); 1506 const struct user_regset_view *view = task_user_regset_view(dump_task);
1479 struct elf_thread_core_info *t; 1507 struct elf_thread_core_info *t;
1480 struct elf_prpsinfo *psinfo; 1508 struct elf_prpsinfo *psinfo;
1481 struct task_struct *g, *p; 1509 struct core_thread *ct;
1482 unsigned int i; 1510 unsigned int i;
1483 1511
1484 info->size = 0; 1512 info->size = 0;
@@ -1517,31 +1545,26 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1517 /* 1545 /*
1518 * Allocate a structure for each thread. 1546 * Allocate a structure for each thread.
1519 */ 1547 */
1520 rcu_read_lock(); 1548 for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1521 do_each_thread(g, p) 1549 t = kzalloc(offsetof(struct elf_thread_core_info,
1522 if (p->mm == dump_task->mm) { 1550 notes[info->thread_notes]),
1523 t = kzalloc(offsetof(struct elf_thread_core_info, 1551 GFP_KERNEL);
1524 notes[info->thread_notes]), 1552 if (unlikely(!t))
1525 GFP_ATOMIC); 1553 return 0;
1526 if (unlikely(!t)) { 1554
1527 rcu_read_unlock(); 1555 t->task = ct->task;
1528 return 0; 1556 if (ct->task == dump_task || !info->thread) {
1529 } 1557 t->next = info->thread;
1530 t->task = p; 1558 info->thread = t;
1531 if (p == dump_task || !info->thread) { 1559 } else {
1532 t->next = info->thread; 1560 /*
1533 info->thread = t; 1561 * Make sure to keep the original task at
1534 } else { 1562 * the head of the list.
1535 /* 1563 */
1536 * Make sure to keep the original task at 1564 t->next = info->thread->next;
1537 * the head of the list. 1565 info->thread->next = t;
1538 */
1539 t->next = info->thread->next;
1540 info->thread->next = t;
1541 }
1542 } 1566 }
1543 while_each_thread(g, p); 1567 }
1544 rcu_read_unlock();
1545 1568
1546 /* 1569 /*
1547 * Now fill in each thread's information. 1570 * Now fill in each thread's information.
@@ -1688,7 +1711,6 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1688{ 1711{
1689#define NUM_NOTES 6 1712#define NUM_NOTES 6
1690 struct list_head *t; 1713 struct list_head *t;
1691 struct task_struct *g, *p;
1692 1714
1693 info->notes = NULL; 1715 info->notes = NULL;
1694 info->prstatus = NULL; 1716 info->prstatus = NULL;
@@ -1720,20 +1742,19 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1720 1742
1721 info->thread_status_size = 0; 1743 info->thread_status_size = 0;
1722 if (signr) { 1744 if (signr) {
1745 struct core_thread *ct;
1723 struct elf_thread_status *ets; 1746 struct elf_thread_status *ets;
1724 rcu_read_lock(); 1747
1725 do_each_thread(g, p) 1748 for (ct = current->mm->core_state->dumper.next;
1726 if (current->mm == p->mm && current != p) { 1749 ct; ct = ct->next) {
1727 ets = kzalloc(sizeof(*ets), GFP_ATOMIC); 1750 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1728 if (!ets) { 1751 if (!ets)
1729 rcu_read_unlock(); 1752 return 0;
1730 return 0; 1753
1731 } 1754 ets->thread = ct->task;
1732 ets->thread = p; 1755 list_add(&ets->list, &info->thread_list);
1733 list_add(&ets->list, &info->thread_list); 1756 }
1734 } 1757
1735 while_each_thread(g, p);
1736 rcu_read_unlock();
1737 list_for_each(t, &info->thread_list) { 1758 list_for_each(t, &info->thread_list) {
1738 int sz; 1759 int sz;
1739 1760
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index d051a32e6270..1b59b1edf26d 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1573,7 +1573,6 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1573 struct memelfnote *notes = NULL; 1573 struct memelfnote *notes = NULL;
1574 struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */ 1574 struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */
1575 struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */ 1575 struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */
1576 struct task_struct *g, *p;
1577 LIST_HEAD(thread_list); 1576 LIST_HEAD(thread_list);
1578 struct list_head *t; 1577 struct list_head *t;
1579 elf_fpregset_t *fpu = NULL; 1578 elf_fpregset_t *fpu = NULL;
@@ -1622,20 +1621,19 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1622#endif 1621#endif
1623 1622
1624 if (signr) { 1623 if (signr) {
1624 struct core_thread *ct;
1625 struct elf_thread_status *tmp; 1625 struct elf_thread_status *tmp;
1626 rcu_read_lock(); 1626
1627 do_each_thread(g,p) 1627 for (ct = current->mm->core_state->dumper.next;
1628 if (current->mm == p->mm && current != p) { 1628 ct; ct = ct->next) {
1629 tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); 1629 tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
1630 if (!tmp) { 1630 if (!tmp)
1631 rcu_read_unlock(); 1631 goto cleanup;
1632 goto cleanup; 1632
1633 } 1633 tmp->thread = ct->task;
1634 tmp->thread = p; 1634 list_add(&tmp->list, &thread_list);
1635 list_add(&tmp->list, &thread_list); 1635 }
1636 } 1636
1637 while_each_thread(g,p);
1638 rcu_read_unlock();
1639 list_for_each(t, &thread_list) { 1637 list_for_each(t, &thread_list) {
1640 struct elf_thread_status *tmp; 1638 struct elf_thread_status *tmp;
1641 int sz; 1639 int sz;
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index e1c854890f94..bf4a3fd3c8e3 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -28,11 +28,9 @@ int coda_fake_statfs;
28char * coda_f2s(struct CodaFid *f) 28char * coda_f2s(struct CodaFid *f)
29{ 29{
30 static char s[60]; 30 static char s[60];
31#ifdef CONFIG_CODA_FS_OLD_API 31
32 sprintf(s, "(%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2]);
33#else
34 sprintf(s, "(%08x.%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2], f->opaque[3]); 32 sprintf(s, "(%08x.%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2], f->opaque[3]);
35#endif 33
36 return s; 34 return s;
37} 35}
38 36
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 40c36f7352a6..0d9b80ec689c 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -378,11 +378,7 @@ MODULE_AUTHOR("Jan Harkes, Peter J. Braam");
378MODULE_DESCRIPTION("Coda Distributed File System VFS interface"); 378MODULE_DESCRIPTION("Coda Distributed File System VFS interface");
379MODULE_ALIAS_CHARDEV_MAJOR(CODA_PSDEV_MAJOR); 379MODULE_ALIAS_CHARDEV_MAJOR(CODA_PSDEV_MAJOR);
380MODULE_LICENSE("GPL"); 380MODULE_LICENSE("GPL");
381#ifdef CONFIG_CODA_FS_OLD_API
382MODULE_VERSION("5.3.21");
383#else
384MODULE_VERSION("6.6"); 381MODULE_VERSION("6.6");
385#endif
386 382
387static int __init init_coda(void) 383static int __init init_coda(void)
388{ 384{
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 359e531094dd..ce432bca95d1 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -52,12 +52,8 @@ static void *alloc_upcall(int opcode, int size)
52 inp->ih.opcode = opcode; 52 inp->ih.opcode = opcode;
53 inp->ih.pid = current->pid; 53 inp->ih.pid = current->pid;
54 inp->ih.pgid = task_pgrp_nr(current); 54 inp->ih.pgid = task_pgrp_nr(current);
55#ifdef CONFIG_CODA_FS_OLD_API
56 memset(&inp->ih.cred, 0, sizeof(struct coda_cred));
57 inp->ih.cred.cr_fsuid = current->fsuid;
58#else
59 inp->ih.uid = current->fsuid; 55 inp->ih.uid = current->fsuid;
60#endif 56
61 return (void*)inp; 57 return (void*)inp;
62} 58}
63 59
@@ -166,20 +162,11 @@ int venus_close(struct super_block *sb, struct CodaFid *fid, int flags,
166 union inputArgs *inp; 162 union inputArgs *inp;
167 union outputArgs *outp; 163 union outputArgs *outp;
168 int insize, outsize, error; 164 int insize, outsize, error;
169#ifdef CONFIG_CODA_FS_OLD_API
170 struct coda_cred cred = { 0, };
171 cred.cr_fsuid = uid;
172#endif
173 165
174 insize = SIZE(release); 166 insize = SIZE(release);
175 UPARG(CODA_CLOSE); 167 UPARG(CODA_CLOSE);
176 168
177#ifdef CONFIG_CODA_FS_OLD_API
178 memcpy(&(inp->ih.cred), &cred, sizeof(cred));
179#else
180 inp->ih.uid = uid; 169 inp->ih.uid = uid;
181#endif
182
183 inp->coda_close.VFid = *fid; 170 inp->coda_close.VFid = *fid;
184 inp->coda_close.flags = flags; 171 inp->coda_close.flags = flags;
185 172
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 18e2c548161d..5235c67e7594 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -25,7 +25,6 @@
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/raid/md.h> 26#include <linux/raid/md.h>
27#include <linux/kd.h> 27#include <linux/kd.h>
28#include <linux/dirent.h>
29#include <linux/route.h> 28#include <linux/route.h>
30#include <linux/in6.h> 29#include <linux/in6.h>
31#include <linux/ipv6_route.h> 30#include <linux/ipv6_route.h>
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 78878c5781ca..eba87ff3177b 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -116,7 +116,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
116 if (xop->callback == NULL) 116 if (xop->callback == NULL)
117 wait_event(recv_wq, (op->done != 0)); 117 wait_event(recv_wq, (op->done != 0));
118 else { 118 else {
119 rv = -EINPROGRESS; 119 rv = FILE_LOCK_DEFERRED;
120 goto out; 120 goto out;
121 } 121 }
122 122
diff --git a/fs/dquot.c b/fs/dquot.c
index 5ac77da19959..1346eebe74ce 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -562,6 +562,8 @@ static struct shrinker dqcache_shrinker = {
562 */ 562 */
563static void dqput(struct dquot *dquot) 563static void dqput(struct dquot *dquot)
564{ 564{
565 int ret;
566
565 if (!dquot) 567 if (!dquot)
566 return; 568 return;
567#ifdef __DQUOT_PARANOIA 569#ifdef __DQUOT_PARANOIA
@@ -594,7 +596,19 @@ we_slept:
594 if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_dirty(dquot)) { 596 if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_dirty(dquot)) {
595 spin_unlock(&dq_list_lock); 597 spin_unlock(&dq_list_lock);
596 /* Commit dquot before releasing */ 598 /* Commit dquot before releasing */
597 dquot->dq_sb->dq_op->write_dquot(dquot); 599 ret = dquot->dq_sb->dq_op->write_dquot(dquot);
600 if (ret < 0) {
601 printk(KERN_ERR "VFS: cannot write quota structure on "
602 "device %s (error %d). Quota may get out of "
603 "sync!\n", dquot->dq_sb->s_id, ret);
604 /*
605 * We clear dirty bit anyway, so that we avoid
606 * infinite loop here
607 */
608 spin_lock(&dq_list_lock);
609 clear_dquot_dirty(dquot);
610 spin_unlock(&dq_list_lock);
611 }
598 goto we_slept; 612 goto we_slept;
599 } 613 }
600 /* Clear flag in case dquot was inactive (something bad happened) */ 614 /* Clear flag in case dquot was inactive (something bad happened) */
@@ -875,7 +889,10 @@ static void print_warning(struct dquot *dquot, const int warntype)
875 char *msg = NULL; 889 char *msg = NULL;
876 struct tty_struct *tty; 890 struct tty_struct *tty;
877 891
878 if (!need_print_warning(dquot)) 892 if (warntype == QUOTA_NL_IHARDBELOW ||
893 warntype == QUOTA_NL_ISOFTBELOW ||
894 warntype == QUOTA_NL_BHARDBELOW ||
895 warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(dquot))
879 return; 896 return;
880 897
881 mutex_lock(&tty_mutex); 898 mutex_lock(&tty_mutex);
@@ -1083,6 +1100,35 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
1083 return QUOTA_OK; 1100 return QUOTA_OK;
1084} 1101}
1085 1102
1103static int info_idq_free(struct dquot *dquot, ulong inodes)
1104{
1105 if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
1106 dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit)
1107 return QUOTA_NL_NOWARN;
1108
1109 if (dquot->dq_dqb.dqb_curinodes - inodes <= dquot->dq_dqb.dqb_isoftlimit)
1110 return QUOTA_NL_ISOFTBELOW;
1111 if (dquot->dq_dqb.dqb_curinodes >= dquot->dq_dqb.dqb_ihardlimit &&
1112 dquot->dq_dqb.dqb_curinodes - inodes < dquot->dq_dqb.dqb_ihardlimit)
1113 return QUOTA_NL_IHARDBELOW;
1114 return QUOTA_NL_NOWARN;
1115}
1116
1117static int info_bdq_free(struct dquot *dquot, qsize_t space)
1118{
1119 if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
1120 toqb(dquot->dq_dqb.dqb_curspace) <= dquot->dq_dqb.dqb_bsoftlimit)
1121 return QUOTA_NL_NOWARN;
1122
1123 if (toqb(dquot->dq_dqb.dqb_curspace - space) <=
1124 dquot->dq_dqb.dqb_bsoftlimit)
1125 return QUOTA_NL_BSOFTBELOW;
1126 if (toqb(dquot->dq_dqb.dqb_curspace) >= dquot->dq_dqb.dqb_bhardlimit &&
1127 toqb(dquot->dq_dqb.dqb_curspace - space) <
1128 dquot->dq_dqb.dqb_bhardlimit)
1129 return QUOTA_NL_BHARDBELOW;
1130 return QUOTA_NL_NOWARN;
1131}
1086/* 1132/*
1087 * Initialize quota pointers in inode 1133 * Initialize quota pointers in inode
1088 * Transaction must be started at entry 1134 * Transaction must be started at entry
@@ -1139,6 +1185,28 @@ int dquot_drop(struct inode *inode)
1139 return 0; 1185 return 0;
1140} 1186}
1141 1187
1188/* Wrapper to remove references to quota structures from inode */
1189void vfs_dq_drop(struct inode *inode)
1190{
1191 /* Here we can get arbitrary inode from clear_inode() so we have
1192 * to be careful. OTOH we don't need locking as quota operations
1193 * are allowed to change only at mount time */
1194 if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op
1195 && inode->i_sb->dq_op->drop) {
1196 int cnt;
1197 /* Test before calling to rule out calls from proc and such
1198 * where we are not allowed to block. Note that this is
1199 * actually reliable test even without the lock - the caller
1200 * must assure that nobody can come after the DQUOT_DROP and
1201 * add quota pointers back anyway */
1202 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1203 if (inode->i_dquot[cnt] != NODQUOT)
1204 break;
1205 if (cnt < MAXQUOTAS)
1206 inode->i_sb->dq_op->drop(inode);
1207 }
1208}
1209
1142/* 1210/*
1143 * Following four functions update i_blocks+i_bytes fields and 1211 * Following four functions update i_blocks+i_bytes fields and
1144 * quota information (together with appropriate checks) 1212 * quota information (together with appropriate checks)
@@ -1248,6 +1316,7 @@ warn_put_all:
1248int dquot_free_space(struct inode *inode, qsize_t number) 1316int dquot_free_space(struct inode *inode, qsize_t number)
1249{ 1317{
1250 unsigned int cnt; 1318 unsigned int cnt;
1319 char warntype[MAXQUOTAS];
1251 1320
1252 /* First test before acquiring mutex - solves deadlocks when we 1321 /* First test before acquiring mutex - solves deadlocks when we
1253 * re-enter the quota code and are already holding the mutex */ 1322 * re-enter the quota code and are already holding the mutex */
@@ -1256,6 +1325,7 @@ out_sub:
1256 inode_sub_bytes(inode, number); 1325 inode_sub_bytes(inode, number);
1257 return QUOTA_OK; 1326 return QUOTA_OK;
1258 } 1327 }
1328
1259 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1329 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1260 /* Now recheck reliably when holding dqptr_sem */ 1330 /* Now recheck reliably when holding dqptr_sem */
1261 if (IS_NOQUOTA(inode)) { 1331 if (IS_NOQUOTA(inode)) {
@@ -1266,6 +1336,7 @@ out_sub:
1266 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1336 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1267 if (inode->i_dquot[cnt] == NODQUOT) 1337 if (inode->i_dquot[cnt] == NODQUOT)
1268 continue; 1338 continue;
1339 warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
1269 dquot_decr_space(inode->i_dquot[cnt], number); 1340 dquot_decr_space(inode->i_dquot[cnt], number);
1270 } 1341 }
1271 inode_sub_bytes(inode, number); 1342 inode_sub_bytes(inode, number);
@@ -1274,6 +1345,7 @@ out_sub:
1274 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1345 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1275 if (inode->i_dquot[cnt]) 1346 if (inode->i_dquot[cnt])
1276 mark_dquot_dirty(inode->i_dquot[cnt]); 1347 mark_dquot_dirty(inode->i_dquot[cnt]);
1348 flush_warnings(inode->i_dquot, warntype);
1277 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1349 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1278 return QUOTA_OK; 1350 return QUOTA_OK;
1279} 1351}
@@ -1284,11 +1356,13 @@ out_sub:
1284int dquot_free_inode(const struct inode *inode, unsigned long number) 1356int dquot_free_inode(const struct inode *inode, unsigned long number)
1285{ 1357{
1286 unsigned int cnt; 1358 unsigned int cnt;
1359 char warntype[MAXQUOTAS];
1287 1360
1288 /* First test before acquiring mutex - solves deadlocks when we 1361 /* First test before acquiring mutex - solves deadlocks when we
1289 * re-enter the quota code and are already holding the mutex */ 1362 * re-enter the quota code and are already holding the mutex */
1290 if (IS_NOQUOTA(inode)) 1363 if (IS_NOQUOTA(inode))
1291 return QUOTA_OK; 1364 return QUOTA_OK;
1365
1292 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1366 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1293 /* Now recheck reliably when holding dqptr_sem */ 1367 /* Now recheck reliably when holding dqptr_sem */
1294 if (IS_NOQUOTA(inode)) { 1368 if (IS_NOQUOTA(inode)) {
@@ -1299,6 +1373,7 @@ int dquot_free_inode(const struct inode *inode, unsigned long number)
1299 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1373 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1300 if (inode->i_dquot[cnt] == NODQUOT) 1374 if (inode->i_dquot[cnt] == NODQUOT)
1301 continue; 1375 continue;
1376 warntype[cnt] = info_idq_free(inode->i_dquot[cnt], number);
1302 dquot_decr_inodes(inode->i_dquot[cnt], number); 1377 dquot_decr_inodes(inode->i_dquot[cnt], number);
1303 } 1378 }
1304 spin_unlock(&dq_data_lock); 1379 spin_unlock(&dq_data_lock);
@@ -1306,6 +1381,7 @@ int dquot_free_inode(const struct inode *inode, unsigned long number)
1306 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1381 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1307 if (inode->i_dquot[cnt]) 1382 if (inode->i_dquot[cnt])
1308 mark_dquot_dirty(inode->i_dquot[cnt]); 1383 mark_dquot_dirty(inode->i_dquot[cnt]);
1384 flush_warnings(inode->i_dquot, warntype);
1309 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1385 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1310 return QUOTA_OK; 1386 return QUOTA_OK;
1311} 1387}
@@ -1323,7 +1399,8 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
1323 struct dquot *transfer_to[MAXQUOTAS]; 1399 struct dquot *transfer_to[MAXQUOTAS];
1324 int cnt, ret = NO_QUOTA, chuid = (iattr->ia_valid & ATTR_UID) && inode->i_uid != iattr->ia_uid, 1400 int cnt, ret = NO_QUOTA, chuid = (iattr->ia_valid & ATTR_UID) && inode->i_uid != iattr->ia_uid,
1325 chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid; 1401 chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid;
1326 char warntype[MAXQUOTAS]; 1402 char warntype_to[MAXQUOTAS];
1403 char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
1327 1404
1328 /* First test before acquiring mutex - solves deadlocks when we 1405 /* First test before acquiring mutex - solves deadlocks when we
1329 * re-enter the quota code and are already holding the mutex */ 1406 * re-enter the quota code and are already holding the mutex */
@@ -1332,7 +1409,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
1332 /* Clear the arrays */ 1409 /* Clear the arrays */
1333 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1410 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1334 transfer_to[cnt] = transfer_from[cnt] = NODQUOT; 1411 transfer_to[cnt] = transfer_from[cnt] = NODQUOT;
1335 warntype[cnt] = QUOTA_NL_NOWARN; 1412 warntype_to[cnt] = QUOTA_NL_NOWARN;
1336 } 1413 }
1337 down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1414 down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
1338 /* Now recheck reliably when holding dqptr_sem */ 1415 /* Now recheck reliably when holding dqptr_sem */
@@ -1364,8 +1441,9 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
1364 if (transfer_to[cnt] == NODQUOT) 1441 if (transfer_to[cnt] == NODQUOT)
1365 continue; 1442 continue;
1366 transfer_from[cnt] = inode->i_dquot[cnt]; 1443 transfer_from[cnt] = inode->i_dquot[cnt];
1367 if (check_idq(transfer_to[cnt], 1, warntype+cnt) == NO_QUOTA || 1444 if (check_idq(transfer_to[cnt], 1, warntype_to + cnt) ==
1368 check_bdq(transfer_to[cnt], space, 0, warntype+cnt) == NO_QUOTA) 1445 NO_QUOTA || check_bdq(transfer_to[cnt], space, 0,
1446 warntype_to + cnt) == NO_QUOTA)
1369 goto warn_put_all; 1447 goto warn_put_all;
1370 } 1448 }
1371 1449
@@ -1381,6 +1459,10 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
1381 1459
1382 /* Due to IO error we might not have transfer_from[] structure */ 1460 /* Due to IO error we might not have transfer_from[] structure */
1383 if (transfer_from[cnt]) { 1461 if (transfer_from[cnt]) {
1462 warntype_from_inodes[cnt] =
1463 info_idq_free(transfer_from[cnt], 1);
1464 warntype_from_space[cnt] =
1465 info_bdq_free(transfer_from[cnt], space);
1384 dquot_decr_inodes(transfer_from[cnt], 1); 1466 dquot_decr_inodes(transfer_from[cnt], 1);
1385 dquot_decr_space(transfer_from[cnt], space); 1467 dquot_decr_space(transfer_from[cnt], space);
1386 } 1468 }
@@ -1400,7 +1482,9 @@ warn_put_all:
1400 if (transfer_to[cnt]) 1482 if (transfer_to[cnt])
1401 mark_dquot_dirty(transfer_to[cnt]); 1483 mark_dquot_dirty(transfer_to[cnt]);
1402 } 1484 }
1403 flush_warnings(transfer_to, warntype); 1485 flush_warnings(transfer_to, warntype_to);
1486 flush_warnings(transfer_from, warntype_from_inodes);
1487 flush_warnings(transfer_from, warntype_from_space);
1404 1488
1405 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1489 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1406 if (ret == QUOTA_OK && transfer_from[cnt] != NODQUOT) 1490 if (ret == QUOTA_OK && transfer_from[cnt] != NODQUOT)
@@ -1412,6 +1496,18 @@ warn_put_all:
1412 return ret; 1496 return ret;
1413} 1497}
1414 1498
1499/* Wrapper for transferring ownership of an inode */
1500int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
1501{
1502 if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
1503 vfs_dq_init(inode);
1504 if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
1505 return 1;
1506 }
1507 return 0;
1508}
1509
1510
1415/* 1511/*
1416 * Write info of quota file to disk 1512 * Write info of quota file to disk
1417 */ 1513 */
@@ -1752,6 +1848,22 @@ out:
1752 return error; 1848 return error;
1753} 1849}
1754 1850
1851/* Wrapper to turn on quotas when remounting rw */
1852int vfs_dq_quota_on_remount(struct super_block *sb)
1853{
1854 int cnt;
1855 int ret = 0, err;
1856
1857 if (!sb->s_qcop || !sb->s_qcop->quota_on)
1858 return -ENOSYS;
1859 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1860 err = sb->s_qcop->quota_on(sb, cnt, 0, NULL, 1);
1861 if (err < 0 && !ret)
1862 ret = err;
1863 }
1864 return ret;
1865}
1866
1755/* Generic routine for getting common part of quota structure */ 1867/* Generic routine for getting common part of quota structure */
1756static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di) 1868static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di)
1757{ 1869{
@@ -2087,8 +2199,11 @@ EXPORT_SYMBOL(dquot_release);
2087EXPORT_SYMBOL(dquot_mark_dquot_dirty); 2199EXPORT_SYMBOL(dquot_mark_dquot_dirty);
2088EXPORT_SYMBOL(dquot_initialize); 2200EXPORT_SYMBOL(dquot_initialize);
2089EXPORT_SYMBOL(dquot_drop); 2201EXPORT_SYMBOL(dquot_drop);
2202EXPORT_SYMBOL(vfs_dq_drop);
2090EXPORT_SYMBOL(dquot_alloc_space); 2203EXPORT_SYMBOL(dquot_alloc_space);
2091EXPORT_SYMBOL(dquot_alloc_inode); 2204EXPORT_SYMBOL(dquot_alloc_inode);
2092EXPORT_SYMBOL(dquot_free_space); 2205EXPORT_SYMBOL(dquot_free_space);
2093EXPORT_SYMBOL(dquot_free_inode); 2206EXPORT_SYMBOL(dquot_free_inode);
2094EXPORT_SYMBOL(dquot_transfer); 2207EXPORT_SYMBOL(dquot_transfer);
2208EXPORT_SYMBOL(vfs_dq_transfer);
2209EXPORT_SYMBOL(vfs_dq_quota_on_remount);
diff --git a/fs/exec.c b/fs/exec.c
index 190ed1f92774..5e559013e303 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -25,19 +25,18 @@
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/file.h> 26#include <linux/file.h>
27#include <linux/fdtable.h> 27#include <linux/fdtable.h>
28#include <linux/mman.h> 28#include <linux/mm.h>
29#include <linux/stat.h> 29#include <linux/stat.h>
30#include <linux/fcntl.h> 30#include <linux/fcntl.h>
31#include <linux/smp_lock.h> 31#include <linux/smp_lock.h>
32#include <linux/swap.h>
32#include <linux/string.h> 33#include <linux/string.h>
33#include <linux/init.h> 34#include <linux/init.h>
34#include <linux/pagemap.h>
35#include <linux/highmem.h> 35#include <linux/highmem.h>
36#include <linux/spinlock.h> 36#include <linux/spinlock.h>
37#include <linux/key.h> 37#include <linux/key.h>
38#include <linux/personality.h> 38#include <linux/personality.h>
39#include <linux/binfmts.h> 39#include <linux/binfmts.h>
40#include <linux/swap.h>
41#include <linux/utsname.h> 40#include <linux/utsname.h>
42#include <linux/pid_namespace.h> 41#include <linux/pid_namespace.h>
43#include <linux/module.h> 42#include <linux/module.h>
@@ -47,7 +46,6 @@
47#include <linux/mount.h> 46#include <linux/mount.h>
48#include <linux/security.h> 47#include <linux/security.h>
49#include <linux/syscalls.h> 48#include <linux/syscalls.h>
50#include <linux/rmap.h>
51#include <linux/tsacct_kern.h> 49#include <linux/tsacct_kern.h>
52#include <linux/cn_proc.h> 50#include <linux/cn_proc.h>
53#include <linux/audit.h> 51#include <linux/audit.h>
@@ -724,12 +722,10 @@ static int exec_mmap(struct mm_struct *mm)
724 * Make sure that if there is a core dump in progress 722 * Make sure that if there is a core dump in progress
725 * for the old mm, we get out and die instead of going 723 * for the old mm, we get out and die instead of going
726 * through with the exec. We must hold mmap_sem around 724 * through with the exec. We must hold mmap_sem around
727 * checking core_waiters and changing tsk->mm. The 725 * checking core_state and changing tsk->mm.
728 * core-inducing thread will increment core_waiters for
729 * each thread whose ->mm == old_mm.
730 */ 726 */
731 down_read(&old_mm->mmap_sem); 727 down_read(&old_mm->mmap_sem);
732 if (unlikely(old_mm->core_waiters)) { 728 if (unlikely(old_mm->core_state)) {
733 up_read(&old_mm->mmap_sem); 729 up_read(&old_mm->mmap_sem);
734 return -EINTR; 730 return -EINTR;
735 } 731 }
@@ -1328,6 +1324,7 @@ int do_execve(char * filename,
1328 if (retval < 0) 1324 if (retval < 0)
1329 goto out; 1325 goto out;
1330 1326
1327 current->flags &= ~PF_KTHREAD;
1331 retval = search_binary_handler(bprm,regs); 1328 retval = search_binary_handler(bprm,regs);
1332 if (retval >= 0) { 1329 if (retval >= 0) {
1333 /* execve success */ 1330 /* execve success */
@@ -1382,17 +1379,14 @@ EXPORT_SYMBOL(set_binfmt);
1382 * name into corename, which must have space for at least 1379 * name into corename, which must have space for at least
1383 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. 1380 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
1384 */ 1381 */
1385static int format_corename(char *corename, const char *pattern, long signr) 1382static int format_corename(char *corename, int nr_threads, long signr)
1386{ 1383{
1387 const char *pat_ptr = pattern; 1384 const char *pat_ptr = core_pattern;
1385 int ispipe = (*pat_ptr == '|');
1388 char *out_ptr = corename; 1386 char *out_ptr = corename;
1389 char *const out_end = corename + CORENAME_MAX_SIZE; 1387 char *const out_end = corename + CORENAME_MAX_SIZE;
1390 int rc; 1388 int rc;
1391 int pid_in_pattern = 0; 1389 int pid_in_pattern = 0;
1392 int ispipe = 0;
1393
1394 if (*pattern == '|')
1395 ispipe = 1;
1396 1390
1397 /* Repeat as long as we have more pattern to process and more output 1391 /* Repeat as long as we have more pattern to process and more output
1398 space */ 1392 space */
@@ -1493,7 +1487,7 @@ static int format_corename(char *corename, const char *pattern, long signr)
1493 * and core_uses_pid is set, then .%pid will be appended to 1487 * and core_uses_pid is set, then .%pid will be appended to
1494 * the filename. Do not do this for piped commands. */ 1488 * the filename. Do not do this for piped commands. */
1495 if (!ispipe && !pid_in_pattern 1489 if (!ispipe && !pid_in_pattern
1496 && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) { 1490 && (core_uses_pid || nr_threads)) {
1497 rc = snprintf(out_ptr, out_end - out_ptr, 1491 rc = snprintf(out_ptr, out_end - out_ptr,
1498 ".%d", task_tgid_vnr(current)); 1492 ".%d", task_tgid_vnr(current));
1499 if (rc > out_end - out_ptr) 1493 if (rc > out_end - out_ptr)
@@ -1505,9 +1499,10 @@ out:
1505 return ispipe; 1499 return ispipe;
1506} 1500}
1507 1501
1508static void zap_process(struct task_struct *start) 1502static int zap_process(struct task_struct *start)
1509{ 1503{
1510 struct task_struct *t; 1504 struct task_struct *t;
1505 int nr = 0;
1511 1506
1512 start->signal->flags = SIGNAL_GROUP_EXIT; 1507 start->signal->flags = SIGNAL_GROUP_EXIT;
1513 start->signal->group_stop_count = 0; 1508 start->signal->group_stop_count = 0;
@@ -1515,72 +1510,99 @@ static void zap_process(struct task_struct *start)
1515 t = start; 1510 t = start;
1516 do { 1511 do {
1517 if (t != current && t->mm) { 1512 if (t != current && t->mm) {
1518 t->mm->core_waiters++;
1519 sigaddset(&t->pending.signal, SIGKILL); 1513 sigaddset(&t->pending.signal, SIGKILL);
1520 signal_wake_up(t, 1); 1514 signal_wake_up(t, 1);
1515 nr++;
1521 } 1516 }
1522 } while ((t = next_thread(t)) != start); 1517 } while_each_thread(start, t);
1518
1519 return nr;
1523} 1520}
1524 1521
1525static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, 1522static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
1526 int exit_code) 1523 struct core_state *core_state, int exit_code)
1527{ 1524{
1528 struct task_struct *g, *p; 1525 struct task_struct *g, *p;
1529 unsigned long flags; 1526 unsigned long flags;
1530 int err = -EAGAIN; 1527 int nr = -EAGAIN;
1531 1528
1532 spin_lock_irq(&tsk->sighand->siglock); 1529 spin_lock_irq(&tsk->sighand->siglock);
1533 if (!signal_group_exit(tsk->signal)) { 1530 if (!signal_group_exit(tsk->signal)) {
1531 mm->core_state = core_state;
1534 tsk->signal->group_exit_code = exit_code; 1532 tsk->signal->group_exit_code = exit_code;
1535 zap_process(tsk); 1533 nr = zap_process(tsk);
1536 err = 0;
1537 } 1534 }
1538 spin_unlock_irq(&tsk->sighand->siglock); 1535 spin_unlock_irq(&tsk->sighand->siglock);
1539 if (err) 1536 if (unlikely(nr < 0))
1540 return err; 1537 return nr;
1541 1538
1542 if (atomic_read(&mm->mm_users) == mm->core_waiters + 1) 1539 if (atomic_read(&mm->mm_users) == nr + 1)
1543 goto done; 1540 goto done;
1544 1541 /*
1542 * We should find and kill all tasks which use this mm, and we should
1543 * count them correctly into ->nr_threads. We don't take tasklist
1544 * lock, but this is safe wrt:
1545 *
1546 * fork:
1547 * None of sub-threads can fork after zap_process(leader). All
1548 * processes which were created before this point should be
1549 * visible to zap_threads() because copy_process() adds the new
1550 * process to the tail of init_task.tasks list, and lock/unlock
1551 * of ->siglock provides a memory barrier.
1552 *
1553 * do_exit:
1554 * The caller holds mm->mmap_sem. This means that the task which
1555 * uses this mm can't pass exit_mm(), so it can't exit or clear
1556 * its ->mm.
1557 *
1558 * de_thread:
1559 * It does list_replace_rcu(&leader->tasks, &current->tasks),
1560 * we must see either old or new leader, this does not matter.
1561 * However, it can change p->sighand, so lock_task_sighand(p)
1562 * must be used. Since p->mm != NULL and we hold ->mmap_sem
1563 * it can't fail.
1564 *
1565 * Note also that "g" can be the old leader with ->mm == NULL
1566 * and already unhashed and thus removed from ->thread_group.
1567 * This is OK, __unhash_process()->list_del_rcu() does not
1568 * clear the ->next pointer, we will find the new leader via
1569 * next_thread().
1570 */
1545 rcu_read_lock(); 1571 rcu_read_lock();
1546 for_each_process(g) { 1572 for_each_process(g) {
1547 if (g == tsk->group_leader) 1573 if (g == tsk->group_leader)
1548 continue; 1574 continue;
1549 1575 if (g->flags & PF_KTHREAD)
1576 continue;
1550 p = g; 1577 p = g;
1551 do { 1578 do {
1552 if (p->mm) { 1579 if (p->mm) {
1553 if (p->mm == mm) { 1580 if (unlikely(p->mm == mm)) {
1554 /*
1555 * p->sighand can't disappear, but
1556 * may be changed by de_thread()
1557 */
1558 lock_task_sighand(p, &flags); 1581 lock_task_sighand(p, &flags);
1559 zap_process(p); 1582 nr += zap_process(p);
1560 unlock_task_sighand(p, &flags); 1583 unlock_task_sighand(p, &flags);
1561 } 1584 }
1562 break; 1585 break;
1563 } 1586 }
1564 } while ((p = next_thread(p)) != g); 1587 } while_each_thread(g, p);
1565 } 1588 }
1566 rcu_read_unlock(); 1589 rcu_read_unlock();
1567done: 1590done:
1568 return mm->core_waiters; 1591 atomic_set(&core_state->nr_threads, nr);
1592 return nr;
1569} 1593}
1570 1594
1571static int coredump_wait(int exit_code) 1595static int coredump_wait(int exit_code, struct core_state *core_state)
1572{ 1596{
1573 struct task_struct *tsk = current; 1597 struct task_struct *tsk = current;
1574 struct mm_struct *mm = tsk->mm; 1598 struct mm_struct *mm = tsk->mm;
1575 struct completion startup_done;
1576 struct completion *vfork_done; 1599 struct completion *vfork_done;
1577 int core_waiters; 1600 int core_waiters;
1578 1601
1579 init_completion(&mm->core_done); 1602 init_completion(&core_state->startup);
1580 init_completion(&startup_done); 1603 core_state->dumper.task = tsk;
1581 mm->core_startup_done = &startup_done; 1604 core_state->dumper.next = NULL;
1582 1605 core_waiters = zap_threads(tsk, mm, core_state, exit_code);
1583 core_waiters = zap_threads(tsk, mm, exit_code);
1584 up_write(&mm->mmap_sem); 1606 up_write(&mm->mmap_sem);
1585 1607
1586 if (unlikely(core_waiters < 0)) 1608 if (unlikely(core_waiters < 0))
@@ -1597,12 +1619,32 @@ static int coredump_wait(int exit_code)
1597 } 1619 }
1598 1620
1599 if (core_waiters) 1621 if (core_waiters)
1600 wait_for_completion(&startup_done); 1622 wait_for_completion(&core_state->startup);
1601fail: 1623fail:
1602 BUG_ON(mm->core_waiters);
1603 return core_waiters; 1624 return core_waiters;
1604} 1625}
1605 1626
1627static void coredump_finish(struct mm_struct *mm)
1628{
1629 struct core_thread *curr, *next;
1630 struct task_struct *task;
1631
1632 next = mm->core_state->dumper.next;
1633 while ((curr = next) != NULL) {
1634 next = curr->next;
1635 task = curr->task;
1636 /*
1637 * see exit_mm(), curr->task must not see
1638 * ->task == NULL before we read ->next.
1639 */
1640 smp_mb();
1641 curr->task = NULL;
1642 wake_up_process(task);
1643 }
1644
1645 mm->core_state = NULL;
1646}
1647
1606/* 1648/*
1607 * set_dumpable converts traditional three-value dumpable to two flags and 1649 * set_dumpable converts traditional three-value dumpable to two flags and
1608 * stores them into mm->flags. It modifies lower two bits of mm->flags, but 1650 * stores them into mm->flags. It modifies lower two bits of mm->flags, but
@@ -1654,6 +1696,7 @@ int get_dumpable(struct mm_struct *mm)
1654 1696
1655int do_coredump(long signr, int exit_code, struct pt_regs * regs) 1697int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1656{ 1698{
1699 struct core_state core_state;
1657 char corename[CORENAME_MAX_SIZE + 1]; 1700 char corename[CORENAME_MAX_SIZE + 1];
1658 struct mm_struct *mm = current->mm; 1701 struct mm_struct *mm = current->mm;
1659 struct linux_binfmt * binfmt; 1702 struct linux_binfmt * binfmt;
@@ -1677,7 +1720,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1677 /* 1720 /*
1678 * If another thread got here first, or we are not dumpable, bail out. 1721 * If another thread got here first, or we are not dumpable, bail out.
1679 */ 1722 */
1680 if (mm->core_waiters || !get_dumpable(mm)) { 1723 if (mm->core_state || !get_dumpable(mm)) {
1681 up_write(&mm->mmap_sem); 1724 up_write(&mm->mmap_sem);
1682 goto fail; 1725 goto fail;
1683 } 1726 }
@@ -1692,7 +1735,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1692 current->fsuid = 0; /* Dump root private */ 1735 current->fsuid = 0; /* Dump root private */
1693 } 1736 }
1694 1737
1695 retval = coredump_wait(exit_code); 1738 retval = coredump_wait(exit_code, &core_state);
1696 if (retval < 0) 1739 if (retval < 0)
1697 goto fail; 1740 goto fail;
1698 1741
@@ -1707,7 +1750,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1707 * uses lock_kernel() 1750 * uses lock_kernel()
1708 */ 1751 */
1709 lock_kernel(); 1752 lock_kernel();
1710 ispipe = format_corename(corename, core_pattern, signr); 1753 ispipe = format_corename(corename, retval, signr);
1711 unlock_kernel(); 1754 unlock_kernel();
1712 /* 1755 /*
1713 * Don't bother to check the RLIMIT_CORE value if core_pattern points 1756 * Don't bother to check the RLIMIT_CORE value if core_pattern points
@@ -1786,7 +1829,7 @@ fail_unlock:
1786 argv_free(helper_argv); 1829 argv_free(helper_argv);
1787 1830
1788 current->fsuid = fsuid; 1831 current->fsuid = fsuid;
1789 complete_all(&mm->core_done); 1832 coredump_finish(mm);
1790fail: 1833fail:
1791 return retval; 1834 return retval;
1792} 1835}
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index ef50cbc792db..31308a3b0b8b 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -31,6 +31,7 @@
31#include <linux/seq_file.h> 31#include <linux/seq_file.h>
32#include <linux/mount.h> 32#include <linux/mount.h>
33#include <linux/log2.h> 33#include <linux/log2.h>
34#include <linux/quotaops.h>
34#include <asm/uaccess.h> 35#include <asm/uaccess.h>
35#include "ext2.h" 36#include "ext2.h"
36#include "xattr.h" 37#include "xattr.h"
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index eaa23d2d5213..70c0dbdcdcb7 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -14,7 +14,7 @@ static size_t
14ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size, 14ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size,
15 const char *name, size_t name_len) 15 const char *name, size_t name_len)
16{ 16{
17 const int prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; 17 const int prefix_len = XATTR_SECURITY_PREFIX_LEN;
18 const size_t total_len = prefix_len + name_len + 1; 18 const size_t total_len = prefix_len + name_len + 1;
19 19
20 if (list && total_len <= list_size) { 20 if (list && total_len <= list_size) {
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index 83ee149f353d..e8219f8eae9f 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -12,13 +12,11 @@
12#include <linux/ext2_fs.h> 12#include <linux/ext2_fs.h>
13#include "xattr.h" 13#include "xattr.h"
14 14
15#define XATTR_TRUSTED_PREFIX "trusted."
16
17static size_t 15static size_t
18ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, 16ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
19 const char *name, size_t name_len) 17 const char *name, size_t name_len)
20{ 18{
21 const int prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1; 19 const int prefix_len = XATTR_TRUSTED_PREFIX_LEN;
22 const size_t total_len = prefix_len + name_len + 1; 20 const size_t total_len = prefix_len + name_len + 1;
23 21
24 if (!capable(CAP_SYS_ADMIN)) 22 if (!capable(CAP_SYS_ADMIN))
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
index f383e7c3a7b5..92495d28c62f 100644
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -11,13 +11,11 @@
11#include "ext2.h" 11#include "ext2.h"
12#include "xattr.h" 12#include "xattr.h"
13 13
14#define XATTR_USER_PREFIX "user."
15
16static size_t 14static size_t
17ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size, 15ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size,
18 const char *name, size_t name_len) 16 const char *name, size_t name_len)
19{ 17{
20 const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1; 18 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
21 const size_t total_len = prefix_len + name_len + 1; 19 const size_t total_len = prefix_len + name_len + 1;
22 20
23 if (!test_opt(inode->i_sb, XATTR_USER)) 21 if (!test_opt(inode->i_sb, XATTR_USER))
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 8ca3bfd72427..2eea96ec78ed 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -272,7 +272,7 @@ static void free_rb_tree_fname(struct rb_root *root)
272 272
273 while (n) { 273 while (n) {
274 /* Do the node's children first */ 274 /* Do the node's children first */
275 if ((n)->rb_left) { 275 if (n->rb_left) {
276 n = n->rb_left; 276 n = n->rb_left;
277 continue; 277 continue;
278 } 278 }
@@ -301,24 +301,18 @@ static void free_rb_tree_fname(struct rb_root *root)
301 parent->rb_right = NULL; 301 parent->rb_right = NULL;
302 n = parent; 302 n = parent;
303 } 303 }
304 root->rb_node = NULL;
305} 304}
306 305
307 306
308static struct dir_private_info *create_dir_info(loff_t pos) 307static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos)
309{ 308{
310 struct dir_private_info *p; 309 struct dir_private_info *p;
311 310
312 p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL); 311 p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
313 if (!p) 312 if (!p)
314 return NULL; 313 return NULL;
315 p->root.rb_node = NULL;
316 p->curr_node = NULL;
317 p->extra_fname = NULL;
318 p->last_pos = 0;
319 p->curr_hash = pos2maj_hash(pos); 314 p->curr_hash = pos2maj_hash(pos);
320 p->curr_minor_hash = pos2min_hash(pos); 315 p->curr_minor_hash = pos2min_hash(pos);
321 p->next_hash = 0;
322 return p; 316 return p;
323} 317}
324 318
@@ -433,7 +427,7 @@ static int ext3_dx_readdir(struct file * filp,
433 int ret; 427 int ret;
434 428
435 if (!info) { 429 if (!info) {
436 info = create_dir_info(filp->f_pos); 430 info = ext3_htree_create_dir_info(filp->f_pos);
437 if (!info) 431 if (!info)
438 return -ENOMEM; 432 return -ENOMEM;
439 filp->private_data = info; 433 filp->private_data = info;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 77126821b2e9..47b678d73e7a 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -669,6 +669,14 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
669 if (IS_ERR(inode)) 669 if (IS_ERR(inode))
670 goto iget_failed; 670 goto iget_failed;
671 671
672 /*
673 * If the orphans has i_nlinks > 0 then it should be able to be
674 * truncated, otherwise it won't be removed from the orphan list
675 * during processing and an infinite loop will result.
676 */
677 if (inode->i_nlink && !ext3_can_truncate(inode))
678 goto bad_orphan;
679
672 if (NEXT_ORPHAN(inode) > max_ino) 680 if (NEXT_ORPHAN(inode) > max_ino)
673 goto bad_orphan; 681 goto bad_orphan;
674 brelse(bitmap_bh); 682 brelse(bitmap_bh);
@@ -690,6 +698,7 @@ bad_orphan:
690 printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", 698 printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
691 NEXT_ORPHAN(inode)); 699 NEXT_ORPHAN(inode));
692 printk(KERN_NOTICE "max_ino=%lu\n", max_ino); 700 printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
701 printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink);
693 /* Avoid freeing blocks if we got a bad deleted inode */ 702 /* Avoid freeing blocks if we got a bad deleted inode */
694 if (inode->i_nlink == 0) 703 if (inode->i_nlink == 0)
695 inode->i_blocks = 0; 704 inode->i_blocks = 0;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 6ae4ecf3ce40..3bf07d70b914 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2127,7 +2127,21 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
2127 2127
2128 if (this_bh) { 2128 if (this_bh) {
2129 BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata"); 2129 BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata");
2130 ext3_journal_dirty_metadata(handle, this_bh); 2130
2131 /*
2132 * The buffer head should have an attached journal head at this
2133 * point. However, if the data is corrupted and an indirect
2134 * block pointed to itself, it would have been detached when
2135 * the block was cleared. Check for this instead of OOPSing.
2136 */
2137 if (bh2jh(this_bh))
2138 ext3_journal_dirty_metadata(handle, this_bh);
2139 else
2140 ext3_error(inode->i_sb, "ext3_free_data",
2141 "circular indirect block detected, "
2142 "inode=%lu, block=%llu",
2143 inode->i_ino,
2144 (unsigned long long)this_bh->b_blocknr);
2131 } 2145 }
2132} 2146}
2133 2147
@@ -2253,6 +2267,19 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
2253 } 2267 }
2254} 2268}
2255 2269
2270int ext3_can_truncate(struct inode *inode)
2271{
2272 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2273 return 0;
2274 if (S_ISREG(inode->i_mode))
2275 return 1;
2276 if (S_ISDIR(inode->i_mode))
2277 return 1;
2278 if (S_ISLNK(inode->i_mode))
2279 return !ext3_inode_is_fast_symlink(inode);
2280 return 0;
2281}
2282
2256/* 2283/*
2257 * ext3_truncate() 2284 * ext3_truncate()
2258 * 2285 *
@@ -2297,12 +2324,7 @@ void ext3_truncate(struct inode *inode)
2297 unsigned blocksize = inode->i_sb->s_blocksize; 2324 unsigned blocksize = inode->i_sb->s_blocksize;
2298 struct page *page; 2325 struct page *page;
2299 2326
2300 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 2327 if (!ext3_can_truncate(inode))
2301 S_ISLNK(inode->i_mode)))
2302 return;
2303 if (ext3_inode_is_fast_symlink(inode))
2304 return;
2305 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2306 return; 2328 return;
2307 2329
2308 /* 2330 /*
@@ -2513,6 +2535,16 @@ static int __ext3_get_inode_loc(struct inode *inode,
2513 } 2535 }
2514 if (!buffer_uptodate(bh)) { 2536 if (!buffer_uptodate(bh)) {
2515 lock_buffer(bh); 2537 lock_buffer(bh);
2538
2539 /*
2540 * If the buffer has the write error flag, we have failed
2541 * to write out another inode in the same block. In this
2542 * case, we don't have to read the block because we may
2543 * read the old inode data successfully.
2544 */
2545 if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
2546 set_buffer_uptodate(bh);
2547
2516 if (buffer_uptodate(bh)) { 2548 if (buffer_uptodate(bh)) {
2517 /* someone brought it uptodate while we waited */ 2549 /* someone brought it uptodate while we waited */
2518 unlock_buffer(bh); 2550 unlock_buffer(bh);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 0b8cf80154f1..de13e919cd81 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -240,13 +240,13 @@ static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
240{ 240{
241 unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) - 241 unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
242 EXT3_DIR_REC_LEN(2) - infosize; 242 EXT3_DIR_REC_LEN(2) - infosize;
243 return 0? 20: entry_space / sizeof(struct dx_entry); 243 return entry_space / sizeof(struct dx_entry);
244} 244}
245 245
246static inline unsigned dx_node_limit (struct inode *dir) 246static inline unsigned dx_node_limit (struct inode *dir)
247{ 247{
248 unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0); 248 unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
249 return 0? 22: entry_space / sizeof(struct dx_entry); 249 return entry_space / sizeof(struct dx_entry);
250} 250}
251 251
252/* 252/*
@@ -991,19 +991,21 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
991 de = (struct ext3_dir_entry_2 *) bh->b_data; 991 de = (struct ext3_dir_entry_2 *) bh->b_data;
992 top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize - 992 top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
993 EXT3_DIR_REC_LEN(0)); 993 EXT3_DIR_REC_LEN(0));
994 for (; de < top; de = ext3_next_entry(de)) 994 for (; de < top; de = ext3_next_entry(de)) {
995 if (ext3_match (namelen, name, de)) { 995 int off = (block << EXT3_BLOCK_SIZE_BITS(sb))
996 if (!ext3_check_dir_entry("ext3_find_entry", 996 + ((char *) de - bh->b_data);
997 dir, de, bh, 997
998 (block<<EXT3_BLOCK_SIZE_BITS(sb)) 998 if (!ext3_check_dir_entry(__func__, dir, de, bh, off)) {
999 +((char *)de - bh->b_data))) { 999 brelse(bh);
1000 brelse (bh);
1001 *err = ERR_BAD_DX_DIR; 1000 *err = ERR_BAD_DX_DIR;
1002 goto errout; 1001 goto errout;
1003 } 1002 }
1004 *res_dir = de; 1003
1005 dx_release (frames); 1004 if (ext3_match(namelen, name, de)) {
1006 return bh; 1005 *res_dir = de;
1006 dx_release(frames);
1007 return bh;
1008 }
1007 } 1009 }
1008 brelse (bh); 1010 brelse (bh);
1009 /* Check to see if we should continue to search */ 1011 /* Check to see if we should continue to search */
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 2845425077e8..615788c6843a 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -842,7 +842,7 @@ static int parse_options (char *options, struct super_block *sb,
842 int data_opt = 0; 842 int data_opt = 0;
843 int option; 843 int option;
844#ifdef CONFIG_QUOTA 844#ifdef CONFIG_QUOTA
845 int qtype; 845 int qtype, qfmt;
846 char *qname; 846 char *qname;
847#endif 847#endif
848 848
@@ -1018,9 +1018,11 @@ static int parse_options (char *options, struct super_block *sb,
1018 case Opt_grpjquota: 1018 case Opt_grpjquota:
1019 qtype = GRPQUOTA; 1019 qtype = GRPQUOTA;
1020set_qf_name: 1020set_qf_name:
1021 if (sb_any_quota_enabled(sb)) { 1021 if ((sb_any_quota_enabled(sb) ||
1022 sb_any_quota_suspended(sb)) &&
1023 !sbi->s_qf_names[qtype]) {
1022 printk(KERN_ERR 1024 printk(KERN_ERR
1023 "EXT3-fs: Cannot change journalled " 1025 "EXT3-fs: Cannot change journaled "
1024 "quota options when quota turned on.\n"); 1026 "quota options when quota turned on.\n");
1025 return 0; 1027 return 0;
1026 } 1028 }
@@ -1056,9 +1058,11 @@ set_qf_name:
1056 case Opt_offgrpjquota: 1058 case Opt_offgrpjquota:
1057 qtype = GRPQUOTA; 1059 qtype = GRPQUOTA;
1058clear_qf_name: 1060clear_qf_name:
1059 if (sb_any_quota_enabled(sb)) { 1061 if ((sb_any_quota_enabled(sb) ||
1062 sb_any_quota_suspended(sb)) &&
1063 sbi->s_qf_names[qtype]) {
1060 printk(KERN_ERR "EXT3-fs: Cannot change " 1064 printk(KERN_ERR "EXT3-fs: Cannot change "
1061 "journalled quota options when " 1065 "journaled quota options when "
1062 "quota turned on.\n"); 1066 "quota turned on.\n");
1063 return 0; 1067 return 0;
1064 } 1068 }
@@ -1069,10 +1073,20 @@ clear_qf_name:
1069 sbi->s_qf_names[qtype] = NULL; 1073 sbi->s_qf_names[qtype] = NULL;
1070 break; 1074 break;
1071 case Opt_jqfmt_vfsold: 1075 case Opt_jqfmt_vfsold:
1072 sbi->s_jquota_fmt = QFMT_VFS_OLD; 1076 qfmt = QFMT_VFS_OLD;
1073 break; 1077 goto set_qf_format;
1074 case Opt_jqfmt_vfsv0: 1078 case Opt_jqfmt_vfsv0:
1075 sbi->s_jquota_fmt = QFMT_VFS_V0; 1079 qfmt = QFMT_VFS_V0;
1080set_qf_format:
1081 if ((sb_any_quota_enabled(sb) ||
1082 sb_any_quota_suspended(sb)) &&
1083 sbi->s_jquota_fmt != qfmt) {
1084 printk(KERN_ERR "EXT3-fs: Cannot change "
1085 "journaled quota options when "
1086 "quota turned on.\n");
1087 return 0;
1088 }
1089 sbi->s_jquota_fmt = qfmt;
1076 break; 1090 break;
1077 case Opt_quota: 1091 case Opt_quota:
1078 case Opt_usrquota: 1092 case Opt_usrquota:
@@ -1084,7 +1098,8 @@ clear_qf_name:
1084 set_opt(sbi->s_mount_opt, GRPQUOTA); 1098 set_opt(sbi->s_mount_opt, GRPQUOTA);
1085 break; 1099 break;
1086 case Opt_noquota: 1100 case Opt_noquota:
1087 if (sb_any_quota_enabled(sb)) { 1101 if (sb_any_quota_enabled(sb) ||
1102 sb_any_quota_suspended(sb)) {
1088 printk(KERN_ERR "EXT3-fs: Cannot change quota " 1103 printk(KERN_ERR "EXT3-fs: Cannot change quota "
1089 "options when quota turned on.\n"); 1104 "options when quota turned on.\n");
1090 return 0; 1105 return 0;
@@ -1169,14 +1184,14 @@ clear_qf_name:
1169 } 1184 }
1170 1185
1171 if (!sbi->s_jquota_fmt) { 1186 if (!sbi->s_jquota_fmt) {
1172 printk(KERN_ERR "EXT3-fs: journalled quota format " 1187 printk(KERN_ERR "EXT3-fs: journaled quota format "
1173 "not specified.\n"); 1188 "not specified.\n");
1174 return 0; 1189 return 0;
1175 } 1190 }
1176 } else { 1191 } else {
1177 if (sbi->s_jquota_fmt) { 1192 if (sbi->s_jquota_fmt) {
1178 printk(KERN_ERR "EXT3-fs: journalled quota format " 1193 printk(KERN_ERR "EXT3-fs: journaled quota format "
1179 "specified with no journalling " 1194 "specified with no journaling "
1180 "enabled.\n"); 1195 "enabled.\n");
1181 return 0; 1196 return 0;
1182 } 1197 }
@@ -1370,7 +1385,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
1370 int ret = ext3_quota_on_mount(sb, i); 1385 int ret = ext3_quota_on_mount(sb, i);
1371 if (ret < 0) 1386 if (ret < 0)
1372 printk(KERN_ERR 1387 printk(KERN_ERR
1373 "EXT3-fs: Cannot turn on journalled " 1388 "EXT3-fs: Cannot turn on journaled "
1374 "quota: error %d\n", ret); 1389 "quota: error %d\n", ret);
1375 } 1390 }
1376 } 1391 }
@@ -2712,7 +2727,7 @@ static int ext3_release_dquot(struct dquot *dquot)
2712 2727
2713static int ext3_mark_dquot_dirty(struct dquot *dquot) 2728static int ext3_mark_dquot_dirty(struct dquot *dquot)
2714{ 2729{
2715 /* Are we journalling quotas? */ 2730 /* Are we journaling quotas? */
2716 if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 2731 if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
2717 EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 2732 EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
2718 dquot_mark_dquot_dirty(dquot); 2733 dquot_mark_dquot_dirty(dquot);
@@ -2759,23 +2774,42 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2759 2774
2760 if (!test_opt(sb, QUOTA)) 2775 if (!test_opt(sb, QUOTA))
2761 return -EINVAL; 2776 return -EINVAL;
2762 /* Not journalling quota or remount? */ 2777 /* When remounting, no checks are needed and in fact, path is NULL */
2763 if ((!EXT3_SB(sb)->s_qf_names[USRQUOTA] && 2778 if (remount)
2764 !EXT3_SB(sb)->s_qf_names[GRPQUOTA]) || remount)
2765 return vfs_quota_on(sb, type, format_id, path, remount); 2779 return vfs_quota_on(sb, type, format_id, path, remount);
2780
2766 err = path_lookup(path, LOOKUP_FOLLOW, &nd); 2781 err = path_lookup(path, LOOKUP_FOLLOW, &nd);
2767 if (err) 2782 if (err)
2768 return err; 2783 return err;
2784
2769 /* Quotafile not on the same filesystem? */ 2785 /* Quotafile not on the same filesystem? */
2770 if (nd.path.mnt->mnt_sb != sb) { 2786 if (nd.path.mnt->mnt_sb != sb) {
2771 path_put(&nd.path); 2787 path_put(&nd.path);
2772 return -EXDEV; 2788 return -EXDEV;
2773 } 2789 }
2774 /* Quotafile not in fs root? */ 2790 /* Journaling quota? */
2775 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) 2791 if (EXT3_SB(sb)->s_qf_names[type]) {
2776 printk(KERN_WARNING 2792 /* Quotafile not of fs root? */
2777 "EXT3-fs: Quota file not on filesystem root. " 2793 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
2778 "Journalled quota will not work.\n"); 2794 printk(KERN_WARNING
2795 "EXT3-fs: Quota file not on filesystem root. "
2796 "Journaled quota will not work.\n");
2797 }
2798
2799 /*
2800 * When we journal data on quota file, we have to flush journal to see
2801 * all updates to the file when we bypass pagecache...
2802 */
2803 if (ext3_should_journal_data(nd.path.dentry->d_inode)) {
2804 /*
2805 * We don't need to lock updates but journal_flush() could
2806 * otherwise be livelocked...
2807 */
2808 journal_lock_updates(EXT3_SB(sb)->s_journal);
2809 journal_flush(EXT3_SB(sb)->s_journal);
2810 journal_unlock_updates(EXT3_SB(sb)->s_journal);
2811 }
2812
2779 path_put(&nd.path); 2813 path_put(&nd.path);
2780 return vfs_quota_on(sb, type, format_id, path, remount); 2814 return vfs_quota_on(sb, type, format_id, path, remount);
2781} 2815}
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index 821efaf2b94e..37b81097bdf2 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -15,7 +15,7 @@ static size_t
15ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size, 15ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
16 const char *name, size_t name_len) 16 const char *name, size_t name_len)
17{ 17{
18 const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; 18 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
19 const size_t total_len = prefix_len + name_len + 1; 19 const size_t total_len = prefix_len + name_len + 1;
20 20
21 21
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c
index 0327497a55ce..c7c41a410c4b 100644
--- a/fs/ext3/xattr_trusted.c
+++ b/fs/ext3/xattr_trusted.c
@@ -13,13 +13,11 @@
13#include <linux/ext3_fs.h> 13#include <linux/ext3_fs.h>
14#include "xattr.h" 14#include "xattr.h"
15 15
16#define XATTR_TRUSTED_PREFIX "trusted."
17
18static size_t 16static size_t
19ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, 17ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
20 const char *name, size_t name_len) 18 const char *name, size_t name_len)
21{ 19{
22 const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1; 20 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
23 const size_t total_len = prefix_len + name_len + 1; 21 const size_t total_len = prefix_len + name_len + 1;
24 22
25 if (!capable(CAP_SYS_ADMIN)) 23 if (!capable(CAP_SYS_ADMIN))
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c
index 1abd8f92c440..430fe63b31b3 100644
--- a/fs/ext3/xattr_user.c
+++ b/fs/ext3/xattr_user.c
@@ -12,13 +12,11 @@
12#include <linux/ext3_fs.h> 12#include <linux/ext3_fs.h>
13#include "xattr.h" 13#include "xattr.h"
14 14
15#define XATTR_USER_PREFIX "user."
16
17static size_t 15static size_t
18ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size, 16ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
19 const char *name, size_t name_len) 17 const char *name, size_t name_len)
20{ 18{
21 const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1; 19 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
22 const size_t total_len = prefix_len + name_len + 1; 20 const size_t total_len = prefix_len + name_len + 1;
23 21
24 if (!test_opt(inode->i_sb, XATTR_USER)) 22 if (!test_opt(inode->i_sb, XATTR_USER))
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 34541d06e626..cd4a0162e10d 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -17,7 +17,6 @@
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/time.h> 18#include <linux/time.h>
19#include <linux/msdos_fs.h> 19#include <linux/msdos_fs.h>
20#include <linux/dirent.h>
21#include <linux/smp_lock.h> 20#include <linux/smp_lock.h>
22#include <linux/buffer_head.h> 21#include <linux/buffer_head.h>
23#include <linux/compat.h> 22#include <linux/compat.h>
@@ -124,10 +123,11 @@ static inline int fat_get_entry(struct inode *dir, loff_t *pos,
124 * but ignore that right now. 123 * but ignore that right now.
125 * Ahem... Stack smashing in ring 0 isn't fun. Fixed. 124 * Ahem... Stack smashing in ring 0 isn't fun. Fixed.
126 */ 125 */
127static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int len, 126static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len,
128 int uni_xlate, struct nls_table *nls) 127 int uni_xlate, struct nls_table *nls)
129{ 128{
130 wchar_t *ip, ec; 129 const wchar_t *ip;
130 wchar_t ec;
131 unsigned char *op, nc; 131 unsigned char *op, nc;
132 int charlen; 132 int charlen;
133 int k; 133 int k;
@@ -167,6 +167,16 @@ static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int len,
167 return (op - ascii); 167 return (op - ascii);
168} 168}
169 169
170static inline int fat_uni_to_x8(struct msdos_sb_info *sbi, const wchar_t *uni,
171 unsigned char *buf, int size)
172{
173 if (sbi->options.utf8)
174 return utf8_wcstombs(buf, uni, size);
175 else
176 return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate,
177 sbi->nls_io);
178}
179
170static inline int 180static inline int
171fat_short2uni(struct nls_table *t, unsigned char *c, int clen, wchar_t *uni) 181fat_short2uni(struct nls_table *t, unsigned char *c, int clen, wchar_t *uni)
172{ 182{
@@ -227,6 +237,19 @@ fat_shortname2uni(struct nls_table *nls, unsigned char *buf, int buf_size,
227 return len; 237 return len;
228} 238}
229 239
240static inline int fat_name_match(struct msdos_sb_info *sbi,
241 const unsigned char *a, int a_len,
242 const unsigned char *b, int b_len)
243{
244 if (a_len != b_len)
245 return 0;
246
247 if (sbi->options.name_check != 's')
248 return !nls_strnicmp(sbi->nls_io, a, b, a_len);
249 else
250 return !memcmp(a, b, a_len);
251}
252
230enum { PARSE_INVALID = 1, PARSE_NOT_LONGNAME, PARSE_EOF, }; 253enum { PARSE_INVALID = 1, PARSE_NOT_LONGNAME, PARSE_EOF, };
231 254
232/** 255/**
@@ -302,6 +325,19 @@ parse_long:
302} 325}
303 326
304/* 327/*
328 * Maximum buffer size of short name.
329 * [(MSDOS_NAME + '.') * max one char + nul]
330 * For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul]
331 */
332#define FAT_MAX_SHORT_SIZE ((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1)
333/*
334 * Maximum buffer size of unicode chars from slots.
335 * [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)]
336 */
337#define FAT_MAX_UNI_CHARS ((MSDOS_SLOTS - 1) * 13 + 1)
338#define FAT_MAX_UNI_SIZE (FAT_MAX_UNI_CHARS * sizeof(wchar_t))
339
340/*
305 * Return values: negative -> error, 0 -> not found, positive -> found, 341 * Return values: negative -> error, 0 -> not found, positive -> found,
306 * value is the total amount of slots, including the shortname entry. 342 * value is the total amount of slots, including the shortname entry.
307 */ 343 */
@@ -312,29 +348,20 @@ int fat_search_long(struct inode *inode, const unsigned char *name,
312 struct msdos_sb_info *sbi = MSDOS_SB(sb); 348 struct msdos_sb_info *sbi = MSDOS_SB(sb);
313 struct buffer_head *bh = NULL; 349 struct buffer_head *bh = NULL;
314 struct msdos_dir_entry *de; 350 struct msdos_dir_entry *de;
315 struct nls_table *nls_io = sbi->nls_io;
316 struct nls_table *nls_disk = sbi->nls_disk; 351 struct nls_table *nls_disk = sbi->nls_disk;
317 wchar_t bufuname[14];
318 unsigned char nr_slots; 352 unsigned char nr_slots;
319 int xlate_len; 353 wchar_t bufuname[14];
320 wchar_t *unicode = NULL; 354 wchar_t *unicode = NULL;
321 unsigned char work[MSDOS_NAME]; 355 unsigned char work[MSDOS_NAME];
322 unsigned char *bufname = NULL; 356 unsigned char bufname[FAT_MAX_SHORT_SIZE];
323 int uni_xlate = sbi->options.unicode_xlate;
324 int utf8 = sbi->options.utf8;
325 int anycase = (sbi->options.name_check != 's');
326 unsigned short opt_shortname = sbi->options.shortname; 357 unsigned short opt_shortname = sbi->options.shortname;
327 loff_t cpos = 0; 358 loff_t cpos = 0;
328 int chl, i, j, last_u, err; 359 int chl, i, j, last_u, err, len;
329
330 bufname = __getname();
331 if (!bufname)
332 return -ENOMEM;
333 360
334 err = -ENOENT; 361 err = -ENOENT;
335 while(1) { 362 while (1) {
336 if (fat_get_entry(inode, &cpos, &bh, &de) == -1) 363 if (fat_get_entry(inode, &cpos, &bh, &de) == -1)
337 goto EODir; 364 goto end_of_dir;
338parse_record: 365parse_record:
339 nr_slots = 0; 366 nr_slots = 0;
340 if (de->name[0] == DELETED_FLAG) 367 if (de->name[0] == DELETED_FLAG)
@@ -353,7 +380,7 @@ parse_record:
353 else if (status == PARSE_NOT_LONGNAME) 380 else if (status == PARSE_NOT_LONGNAME)
354 goto parse_record; 381 goto parse_record;
355 else if (status == PARSE_EOF) 382 else if (status == PARSE_EOF)
356 goto EODir; 383 goto end_of_dir;
357 } 384 }
358 385
359 memcpy(work, de->name, sizeof(de->name)); 386 memcpy(work, de->name, sizeof(de->name));
@@ -394,30 +421,24 @@ parse_record:
394 if (!last_u) 421 if (!last_u)
395 continue; 422 continue;
396 423
424 /* Compare shortname */
397 bufuname[last_u] = 0x0000; 425 bufuname[last_u] = 0x0000;
398 xlate_len = utf8 426 len = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname));
399 ?utf8_wcstombs(bufname, bufuname, PATH_MAX) 427 if (fat_name_match(sbi, name, name_len, bufname, len))
400 :uni16_to_x8(bufname, bufuname, PATH_MAX, uni_xlate, nls_io); 428 goto found;
401 if (xlate_len == name_len)
402 if ((!anycase && !memcmp(name, bufname, xlate_len)) ||
403 (anycase && !nls_strnicmp(nls_io, name, bufname,
404 xlate_len)))
405 goto Found;
406 429
407 if (nr_slots) { 430 if (nr_slots) {
408 xlate_len = utf8 431 void *longname = unicode + FAT_MAX_UNI_CHARS;
409 ?utf8_wcstombs(bufname, unicode, PATH_MAX) 432 int size = PATH_MAX - FAT_MAX_UNI_SIZE;
410 :uni16_to_x8(bufname, unicode, PATH_MAX, uni_xlate, nls_io); 433
411 if (xlate_len != name_len) 434 /* Compare longname */
412 continue; 435 len = fat_uni_to_x8(sbi, unicode, longname, size);
413 if ((!anycase && !memcmp(name, bufname, xlate_len)) || 436 if (fat_name_match(sbi, name, name_len, longname, len))
414 (anycase && !nls_strnicmp(nls_io, name, bufname, 437 goto found;
415 xlate_len)))
416 goto Found;
417 } 438 }
418 } 439 }
419 440
420Found: 441found:
421 nr_slots++; /* include the de */ 442 nr_slots++; /* include the de */
422 sinfo->slot_off = cpos - nr_slots * sizeof(*de); 443 sinfo->slot_off = cpos - nr_slots * sizeof(*de);
423 sinfo->nr_slots = nr_slots; 444 sinfo->nr_slots = nr_slots;
@@ -425,9 +446,7 @@ Found:
425 sinfo->bh = bh; 446 sinfo->bh = bh;
426 sinfo->i_pos = fat_make_i_pos(sb, sinfo->bh, sinfo->de); 447 sinfo->i_pos = fat_make_i_pos(sb, sinfo->bh, sinfo->de);
427 err = 0; 448 err = 0;
428EODir: 449end_of_dir:
429 if (bufname)
430 __putname(bufname);
431 if (unicode) 450 if (unicode)
432 __putname(unicode); 451 __putname(unicode);
433 452
@@ -453,23 +472,20 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent,
453 struct msdos_sb_info *sbi = MSDOS_SB(sb); 472 struct msdos_sb_info *sbi = MSDOS_SB(sb);
454 struct buffer_head *bh; 473 struct buffer_head *bh;
455 struct msdos_dir_entry *de; 474 struct msdos_dir_entry *de;
456 struct nls_table *nls_io = sbi->nls_io;
457 struct nls_table *nls_disk = sbi->nls_disk; 475 struct nls_table *nls_disk = sbi->nls_disk;
458 unsigned char long_slots; 476 unsigned char nr_slots;
459 const char *fill_name;
460 int fill_len;
461 wchar_t bufuname[14]; 477 wchar_t bufuname[14];
462 wchar_t *unicode = NULL; 478 wchar_t *unicode = NULL;
463 unsigned char c, work[MSDOS_NAME], bufname[56], *ptname = bufname; 479 unsigned char c, work[MSDOS_NAME];
464 unsigned long lpos, dummy, *furrfu = &lpos; 480 unsigned char bufname[FAT_MAX_SHORT_SIZE], *ptname = bufname;
465 int uni_xlate = sbi->options.unicode_xlate; 481 unsigned short opt_shortname = sbi->options.shortname;
466 int isvfat = sbi->options.isvfat; 482 int isvfat = sbi->options.isvfat;
467 int utf8 = sbi->options.utf8;
468 int nocase = sbi->options.nocase; 483 int nocase = sbi->options.nocase;
469 unsigned short opt_shortname = sbi->options.shortname; 484 const char *fill_name = NULL;
470 unsigned long inum; 485 unsigned long inum;
471 int chi, chl, i, i2, j, last, last_u, dotoffset = 0; 486 unsigned long lpos, dummy, *furrfu = &lpos;
472 loff_t cpos; 487 loff_t cpos;
488 int chi, chl, i, i2, j, last, last_u, dotoffset = 0, fill_len = 0;
473 int ret = 0; 489 int ret = 0;
474 490
475 lock_super(sb); 491 lock_super(sb);
@@ -489,43 +505,58 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent,
489 cpos = 0; 505 cpos = 0;
490 } 506 }
491 } 507 }
492 if (cpos & (sizeof(struct msdos_dir_entry)-1)) { 508 if (cpos & (sizeof(struct msdos_dir_entry) - 1)) {
493 ret = -ENOENT; 509 ret = -ENOENT;
494 goto out; 510 goto out;
495 } 511 }
496 512
497 bh = NULL; 513 bh = NULL;
498GetNew: 514get_new:
499 if (fat_get_entry(inode, &cpos, &bh, &de) == -1) 515 if (fat_get_entry(inode, &cpos, &bh, &de) == -1)
500 goto EODir; 516 goto end_of_dir;
501parse_record: 517parse_record:
502 long_slots = 0; 518 nr_slots = 0;
503 /* Check for long filename entry */ 519 /*
504 if (isvfat) { 520 * Check for long filename entry, but if short_only, we don't
521 * need to parse long filename.
522 */
523 if (isvfat && !short_only) {
505 if (de->name[0] == DELETED_FLAG) 524 if (de->name[0] == DELETED_FLAG)
506 goto RecEnd; 525 goto record_end;
507 if (de->attr != ATTR_EXT && (de->attr & ATTR_VOLUME)) 526 if (de->attr != ATTR_EXT && (de->attr & ATTR_VOLUME))
508 goto RecEnd; 527 goto record_end;
509 if (de->attr != ATTR_EXT && IS_FREE(de->name)) 528 if (de->attr != ATTR_EXT && IS_FREE(de->name))
510 goto RecEnd; 529 goto record_end;
511 } else { 530 } else {
512 if ((de->attr & ATTR_VOLUME) || IS_FREE(de->name)) 531 if ((de->attr & ATTR_VOLUME) || IS_FREE(de->name))
513 goto RecEnd; 532 goto record_end;
514 } 533 }
515 534
516 if (isvfat && de->attr == ATTR_EXT) { 535 if (isvfat && de->attr == ATTR_EXT) {
517 int status = fat_parse_long(inode, &cpos, &bh, &de, 536 int status = fat_parse_long(inode, &cpos, &bh, &de,
518 &unicode, &long_slots); 537 &unicode, &nr_slots);
519 if (status < 0) { 538 if (status < 0) {
520 filp->f_pos = cpos; 539 filp->f_pos = cpos;
521 ret = status; 540 ret = status;
522 goto out; 541 goto out;
523 } else if (status == PARSE_INVALID) 542 } else if (status == PARSE_INVALID)
524 goto RecEnd; 543 goto record_end;
525 else if (status == PARSE_NOT_LONGNAME) 544 else if (status == PARSE_NOT_LONGNAME)
526 goto parse_record; 545 goto parse_record;
527 else if (status == PARSE_EOF) 546 else if (status == PARSE_EOF)
528 goto EODir; 547 goto end_of_dir;
548
549 if (nr_slots) {
550 void *longname = unicode + FAT_MAX_UNI_CHARS;
551 int size = PATH_MAX - FAT_MAX_UNI_SIZE;
552 int len = fat_uni_to_x8(sbi, unicode, longname, size);
553
554 fill_name = longname;
555 fill_len = len;
556 /* !both && !short_only, so we don't need shortname. */
557 if (!both)
558 goto start_filldir;
559 }
529 } 560 }
530 561
531 if (sbi->options.dotsOK) { 562 if (sbi->options.dotsOK) {
@@ -587,12 +618,32 @@ parse_record:
587 } 618 }
588 } 619 }
589 if (!last) 620 if (!last)
590 goto RecEnd; 621 goto record_end;
591 622
592 i = last + dotoffset; 623 i = last + dotoffset;
593 j = last_u; 624 j = last_u;
594 625
595 lpos = cpos - (long_slots+1)*sizeof(struct msdos_dir_entry); 626 if (isvfat) {
627 bufuname[j] = 0x0000;
628 i = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname));
629 }
630 if (nr_slots) {
631 /* hack for fat_ioctl_filldir() */
632 struct fat_ioctl_filldir_callback *p = dirent;
633
634 p->longname = fill_name;
635 p->long_len = fill_len;
636 p->shortname = bufname;
637 p->short_len = i;
638 fill_name = NULL;
639 fill_len = 0;
640 } else {
641 fill_name = bufname;
642 fill_len = i;
643 }
644
645start_filldir:
646 lpos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry);
596 if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME)) 647 if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME))
597 inum = inode->i_ino; 648 inum = inode->i_ino;
598 else if (!memcmp(de->name, MSDOS_DOTDOT, MSDOS_NAME)) { 649 else if (!memcmp(de->name, MSDOS_DOTDOT, MSDOS_NAME)) {
@@ -607,49 +658,17 @@ parse_record:
607 inum = iunique(sb, MSDOS_ROOT_INO); 658 inum = iunique(sb, MSDOS_ROOT_INO);
608 } 659 }
609 660
610 if (isvfat) {
611 bufuname[j] = 0x0000;
612 i = utf8 ? utf8_wcstombs(bufname, bufuname, sizeof(bufname))
613 : uni16_to_x8(bufname, bufuname, sizeof(bufname), uni_xlate, nls_io);
614 }
615
616 fill_name = bufname;
617 fill_len = i;
618 if (!short_only && long_slots) {
619 /* convert the unicode long name. 261 is maximum size
620 * of unicode buffer. (13 * slots + nul) */
621 void *longname = unicode + 261;
622 int buf_size = PATH_MAX - (261 * sizeof(unicode[0]));
623 int long_len = utf8
624 ? utf8_wcstombs(longname, unicode, buf_size)
625 : uni16_to_x8(longname, unicode, buf_size, uni_xlate, nls_io);
626
627 if (!both) {
628 fill_name = longname;
629 fill_len = long_len;
630 } else {
631 /* hack for fat_ioctl_filldir() */
632 struct fat_ioctl_filldir_callback *p = dirent;
633
634 p->longname = longname;
635 p->long_len = long_len;
636 p->shortname = bufname;
637 p->short_len = i;
638 fill_name = NULL;
639 fill_len = 0;
640 }
641 }
642 if (filldir(dirent, fill_name, fill_len, *furrfu, inum, 661 if (filldir(dirent, fill_name, fill_len, *furrfu, inum,
643 (de->attr & ATTR_DIR) ? DT_DIR : DT_REG) < 0) 662 (de->attr & ATTR_DIR) ? DT_DIR : DT_REG) < 0)
644 goto FillFailed; 663 goto fill_failed;
645 664
646RecEnd: 665record_end:
647 furrfu = &lpos; 666 furrfu = &lpos;
648 filp->f_pos = cpos; 667 filp->f_pos = cpos;
649 goto GetNew; 668 goto get_new;
650EODir: 669end_of_dir:
651 filp->f_pos = cpos; 670 filp->f_pos = cpos;
652FillFailed: 671fill_failed:
653 brelse(bh); 672 brelse(bh);
654 if (unicode) 673 if (unicode)
655 __putname(unicode); 674 __putname(unicode);
@@ -715,7 +734,7 @@ efault: \
715 return -EFAULT; \ 734 return -EFAULT; \
716} 735}
717 736
718FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, dirent) 737FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, __fat_dirent)
719 738
720static int fat_ioctl_readdir(struct inode *inode, struct file *filp, 739static int fat_ioctl_readdir(struct inode *inode, struct file *filp,
721 void __user *dirent, filldir_t filldir, 740 void __user *dirent, filldir_t filldir,
@@ -741,7 +760,7 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *filp,
741static int fat_dir_ioctl(struct inode *inode, struct file *filp, 760static int fat_dir_ioctl(struct inode *inode, struct file *filp,
742 unsigned int cmd, unsigned long arg) 761 unsigned int cmd, unsigned long arg)
743{ 762{
744 struct dirent __user *d1 = (struct dirent __user *)arg; 763 struct __fat_dirent __user *d1 = (struct __fat_dirent __user *)arg;
745 int short_only, both; 764 int short_only, both;
746 765
747 switch (cmd) { 766 switch (cmd) {
@@ -757,7 +776,7 @@ static int fat_dir_ioctl(struct inode *inode, struct file *filp,
757 return fat_generic_ioctl(inode, filp, cmd, arg); 776 return fat_generic_ioctl(inode, filp, cmd, arg);
758 } 777 }
759 778
760 if (!access_ok(VERIFY_WRITE, d1, sizeof(struct dirent[2]))) 779 if (!access_ok(VERIFY_WRITE, d1, sizeof(struct __fat_dirent[2])))
761 return -EFAULT; 780 return -EFAULT;
762 /* 781 /*
763 * Yes, we don't need this put_user() absolutely. However old 782 * Yes, we don't need this put_user() absolutely. However old
@@ -1082,7 +1101,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts)
1082 goto error_free; 1101 goto error_free;
1083 } 1102 }
1084 1103
1085 fat_date_unix2dos(ts->tv_sec, &time, &date); 1104 fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
1086 1105
1087 de = (struct msdos_dir_entry *)bhs[0]->b_data; 1106 de = (struct msdos_dir_entry *)bhs[0]->b_data;
1088 /* filling the new directory slots ("." and ".." entries) */ 1107 /* filling the new directory slots ("." and ".." entries) */
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 46a4508ffd2e..23676f9d79ce 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -382,17 +382,20 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
382 inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1)) 382 inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
383 & ~((loff_t)sbi->cluster_size - 1)) >> 9; 383 & ~((loff_t)sbi->cluster_size - 1)) >> 9;
384 inode->i_mtime.tv_sec = 384 inode->i_mtime.tv_sec =
385 date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date)); 385 date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date),
386 sbi->options.tz_utc);
386 inode->i_mtime.tv_nsec = 0; 387 inode->i_mtime.tv_nsec = 0;
387 if (sbi->options.isvfat) { 388 if (sbi->options.isvfat) {
388 int secs = de->ctime_cs / 100; 389 int secs = de->ctime_cs / 100;
389 int csecs = de->ctime_cs % 100; 390 int csecs = de->ctime_cs % 100;
390 inode->i_ctime.tv_sec = 391 inode->i_ctime.tv_sec =
391 date_dos2unix(le16_to_cpu(de->ctime), 392 date_dos2unix(le16_to_cpu(de->ctime),
392 le16_to_cpu(de->cdate)) + secs; 393 le16_to_cpu(de->cdate),
394 sbi->options.tz_utc) + secs;
393 inode->i_ctime.tv_nsec = csecs * 10000000; 395 inode->i_ctime.tv_nsec = csecs * 10000000;
394 inode->i_atime.tv_sec = 396 inode->i_atime.tv_sec =
395 date_dos2unix(0, le16_to_cpu(de->adate)); 397 date_dos2unix(0, le16_to_cpu(de->adate),
398 sbi->options.tz_utc);
396 inode->i_atime.tv_nsec = 0; 399 inode->i_atime.tv_nsec = 0;
397 } else 400 } else
398 inode->i_ctime = inode->i_atime = inode->i_mtime; 401 inode->i_ctime = inode->i_atime = inode->i_mtime;
@@ -591,11 +594,14 @@ retry:
591 raw_entry->attr = fat_attr(inode); 594 raw_entry->attr = fat_attr(inode);
592 raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart); 595 raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart);
593 raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16); 596 raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16);
594 fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, &raw_entry->date); 597 fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time,
598 &raw_entry->date, sbi->options.tz_utc);
595 if (sbi->options.isvfat) { 599 if (sbi->options.isvfat) {
596 __le16 atime; 600 __le16 atime;
597 fat_date_unix2dos(inode->i_ctime.tv_sec,&raw_entry->ctime,&raw_entry->cdate); 601 fat_date_unix2dos(inode->i_ctime.tv_sec, &raw_entry->ctime,
598 fat_date_unix2dos(inode->i_atime.tv_sec,&atime,&raw_entry->adate); 602 &raw_entry->cdate, sbi->options.tz_utc);
603 fat_date_unix2dos(inode->i_atime.tv_sec, &atime,
604 &raw_entry->adate, sbi->options.tz_utc);
599 raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 + 605 raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 +
600 inode->i_ctime.tv_nsec / 10000000; 606 inode->i_ctime.tv_nsec / 10000000;
601 } 607 }
@@ -836,6 +842,8 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
836 } 842 }
837 if (sbi->options.flush) 843 if (sbi->options.flush)
838 seq_puts(m, ",flush"); 844 seq_puts(m, ",flush");
845 if (opts->tz_utc)
846 seq_puts(m, ",tz=UTC");
839 847
840 return 0; 848 return 0;
841} 849}
@@ -848,7 +856,7 @@ enum {
848 Opt_charset, Opt_shortname_lower, Opt_shortname_win95, 856 Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
849 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, 857 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
850 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, 858 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
851 Opt_obsolate, Opt_flush, Opt_err, 859 Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err,
852}; 860};
853 861
854static match_table_t fat_tokens = { 862static match_table_t fat_tokens = {
@@ -883,6 +891,7 @@ static match_table_t fat_tokens = {
883 {Opt_obsolate, "cvf_options=%100s"}, 891 {Opt_obsolate, "cvf_options=%100s"},
884 {Opt_obsolate, "posix"}, 892 {Opt_obsolate, "posix"},
885 {Opt_flush, "flush"}, 893 {Opt_flush, "flush"},
894 {Opt_tz_utc, "tz=UTC"},
886 {Opt_err, NULL}, 895 {Opt_err, NULL},
887}; 896};
888static match_table_t msdos_tokens = { 897static match_table_t msdos_tokens = {
@@ -947,10 +956,11 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
947 opts->utf8 = opts->unicode_xlate = 0; 956 opts->utf8 = opts->unicode_xlate = 0;
948 opts->numtail = 1; 957 opts->numtail = 1;
949 opts->usefree = opts->nocase = 0; 958 opts->usefree = opts->nocase = 0;
959 opts->tz_utc = 0;
950 *debug = 0; 960 *debug = 0;
951 961
952 if (!options) 962 if (!options)
953 return 0; 963 goto out;
954 964
955 while ((p = strsep(&options, ",")) != NULL) { 965 while ((p = strsep(&options, ",")) != NULL) {
956 int token; 966 int token;
@@ -1036,6 +1046,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1036 case Opt_flush: 1046 case Opt_flush:
1037 opts->flush = 1; 1047 opts->flush = 1;
1038 break; 1048 break;
1049 case Opt_tz_utc:
1050 opts->tz_utc = 1;
1051 break;
1039 1052
1040 /* msdos specific */ 1053 /* msdos specific */
1041 case Opt_dots: 1054 case Opt_dots:
@@ -1104,10 +1117,13 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1104 return -EINVAL; 1117 return -EINVAL;
1105 } 1118 }
1106 } 1119 }
1120
1121out:
1107 /* UTF-8 doesn't provide FAT semantics */ 1122 /* UTF-8 doesn't provide FAT semantics */
1108 if (!strcmp(opts->iocharset, "utf8")) { 1123 if (!strcmp(opts->iocharset, "utf8")) {
1109 printk(KERN_ERR "FAT: utf8 is not a recommended IO charset" 1124 printk(KERN_ERR "FAT: utf8 is not a recommended IO charset"
1110 " for FAT filesystems, filesystem will be case sensitive!\n"); 1125 " for FAT filesystems, filesystem will be "
1126 "case sensitive!\n");
1111 } 1127 }
1112 1128
1113 /* If user doesn't specify allow_utime, it's initialized from dmask. */ 1129 /* If user doesn't specify allow_utime, it's initialized from dmask. */
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 61f23511eacf..79fb98ad36d4 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -142,7 +142,7 @@ static int day_n[] = {
142}; 142};
143 143
144/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */ 144/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */
145int date_dos2unix(unsigned short time, unsigned short date) 145int date_dos2unix(unsigned short time, unsigned short date, int tz_utc)
146{ 146{
147 int month, year, secs; 147 int month, year, secs;
148 148
@@ -156,16 +156,18 @@ int date_dos2unix(unsigned short time, unsigned short date)
156 ((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 && 156 ((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 &&
157 month < 2 ? 1 : 0)+3653); 157 month < 2 ? 1 : 0)+3653);
158 /* days since 1.1.70 plus 80's leap day */ 158 /* days since 1.1.70 plus 80's leap day */
159 secs += sys_tz.tz_minuteswest*60; 159 if (!tz_utc)
160 secs += sys_tz.tz_minuteswest*60;
160 return secs; 161 return secs;
161} 162}
162 163
163/* Convert linear UNIX date to a MS-DOS time/date pair. */ 164/* Convert linear UNIX date to a MS-DOS time/date pair. */
164void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date) 165void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, int tz_utc)
165{ 166{
166 int day, year, nl_day, month; 167 int day, year, nl_day, month;
167 168
168 unix_date -= sys_tz.tz_minuteswest*60; 169 if (!tz_utc)
170 unix_date -= sys_tz.tz_minuteswest*60;
169 171
170 /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */ 172 /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */
171 if (unix_date < 315532800) 173 if (unix_date < 315532800)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 2060bf06b906..51d0035ff07e 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -97,7 +97,7 @@ void fuse_invalidate_attr(struct inode *inode)
97 * timeout is unknown (unlink, rmdir, rename and in some cases 97 * timeout is unknown (unlink, rmdir, rename and in some cases
98 * lookup) 98 * lookup)
99 */ 99 */
100static void fuse_invalidate_entry_cache(struct dentry *entry) 100void fuse_invalidate_entry_cache(struct dentry *entry)
101{ 101{
102 fuse_dentry_settime(entry, 0); 102 fuse_dentry_settime(entry, 0);
103} 103}
@@ -112,18 +112,16 @@ static void fuse_invalidate_entry(struct dentry *entry)
112 fuse_invalidate_entry_cache(entry); 112 fuse_invalidate_entry_cache(entry);
113} 113}
114 114
115static void fuse_lookup_init(struct fuse_req *req, struct inode *dir, 115static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_req *req,
116 struct dentry *entry, 116 u64 nodeid, struct qstr *name,
117 struct fuse_entry_out *outarg) 117 struct fuse_entry_out *outarg)
118{ 118{
119 struct fuse_conn *fc = get_fuse_conn(dir);
120
121 memset(outarg, 0, sizeof(struct fuse_entry_out)); 119 memset(outarg, 0, sizeof(struct fuse_entry_out));
122 req->in.h.opcode = FUSE_LOOKUP; 120 req->in.h.opcode = FUSE_LOOKUP;
123 req->in.h.nodeid = get_node_id(dir); 121 req->in.h.nodeid = nodeid;
124 req->in.numargs = 1; 122 req->in.numargs = 1;
125 req->in.args[0].size = entry->d_name.len + 1; 123 req->in.args[0].size = name->len + 1;
126 req->in.args[0].value = entry->d_name.name; 124 req->in.args[0].value = name->name;
127 req->out.numargs = 1; 125 req->out.numargs = 1;
128 if (fc->minor < 9) 126 if (fc->minor < 9)
129 req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; 127 req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
@@ -189,7 +187,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
189 attr_version = fuse_get_attr_version(fc); 187 attr_version = fuse_get_attr_version(fc);
190 188
191 parent = dget_parent(entry); 189 parent = dget_parent(entry);
192 fuse_lookup_init(req, parent->d_inode, entry, &outarg); 190 fuse_lookup_init(fc, req, get_node_id(parent->d_inode),
191 &entry->d_name, &outarg);
193 request_send(fc, req); 192 request_send(fc, req);
194 dput(parent); 193 dput(parent);
195 err = req->out.h.error; 194 err = req->out.h.error;
@@ -225,7 +224,7 @@ static int invalid_nodeid(u64 nodeid)
225 return !nodeid || nodeid == FUSE_ROOT_ID; 224 return !nodeid || nodeid == FUSE_ROOT_ID;
226} 225}
227 226
228static struct dentry_operations fuse_dentry_operations = { 227struct dentry_operations fuse_dentry_operations = {
229 .d_revalidate = fuse_dentry_revalidate, 228 .d_revalidate = fuse_dentry_revalidate,
230}; 229};
231 230
@@ -239,85 +238,127 @@ int fuse_valid_type(int m)
239 * Add a directory inode to a dentry, ensuring that no other dentry 238 * Add a directory inode to a dentry, ensuring that no other dentry
240 * refers to this inode. Called with fc->inst_mutex. 239 * refers to this inode. Called with fc->inst_mutex.
241 */ 240 */
242static int fuse_d_add_directory(struct dentry *entry, struct inode *inode) 241static struct dentry *fuse_d_add_directory(struct dentry *entry,
242 struct inode *inode)
243{ 243{
244 struct dentry *alias = d_find_alias(inode); 244 struct dentry *alias = d_find_alias(inode);
245 if (alias) { 245 if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) {
246 /* This tries to shrink the subtree below alias */ 246 /* This tries to shrink the subtree below alias */
247 fuse_invalidate_entry(alias); 247 fuse_invalidate_entry(alias);
248 dput(alias); 248 dput(alias);
249 if (!list_empty(&inode->i_dentry)) 249 if (!list_empty(&inode->i_dentry))
250 return -EBUSY; 250 return ERR_PTR(-EBUSY);
251 } else {
252 dput(alias);
251 } 253 }
252 d_add(entry, inode); 254 return d_splice_alias(inode, entry);
253 return 0;
254} 255}
255 256
256static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, 257int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
257 struct nameidata *nd) 258 struct fuse_entry_out *outarg, struct inode **inode)
258{ 259{
259 int err; 260 struct fuse_conn *fc = get_fuse_conn_super(sb);
260 struct fuse_entry_out outarg;
261 struct inode *inode = NULL;
262 struct fuse_conn *fc = get_fuse_conn(dir);
263 struct fuse_req *req; 261 struct fuse_req *req;
264 struct fuse_req *forget_req; 262 struct fuse_req *forget_req;
265 u64 attr_version; 263 u64 attr_version;
264 int err;
266 265
267 if (entry->d_name.len > FUSE_NAME_MAX) 266 *inode = NULL;
268 return ERR_PTR(-ENAMETOOLONG); 267 err = -ENAMETOOLONG;
268 if (name->len > FUSE_NAME_MAX)
269 goto out;
269 270
270 req = fuse_get_req(fc); 271 req = fuse_get_req(fc);
272 err = PTR_ERR(req);
271 if (IS_ERR(req)) 273 if (IS_ERR(req))
272 return ERR_CAST(req); 274 goto out;
273 275
274 forget_req = fuse_get_req(fc); 276 forget_req = fuse_get_req(fc);
277 err = PTR_ERR(forget_req);
275 if (IS_ERR(forget_req)) { 278 if (IS_ERR(forget_req)) {
276 fuse_put_request(fc, req); 279 fuse_put_request(fc, req);
277 return ERR_CAST(forget_req); 280 goto out;
278 } 281 }
279 282
280 attr_version = fuse_get_attr_version(fc); 283 attr_version = fuse_get_attr_version(fc);
281 284
282 fuse_lookup_init(req, dir, entry, &outarg); 285 fuse_lookup_init(fc, req, nodeid, name, outarg);
283 request_send(fc, req); 286 request_send(fc, req);
284 err = req->out.h.error; 287 err = req->out.h.error;
285 fuse_put_request(fc, req); 288 fuse_put_request(fc, req);
286 /* Zero nodeid is same as -ENOENT, but with valid timeout */ 289 /* Zero nodeid is same as -ENOENT, but with valid timeout */
287 if (!err && outarg.nodeid && 290 if (err || !outarg->nodeid)
288 (invalid_nodeid(outarg.nodeid) || 291 goto out_put_forget;
289 !fuse_valid_type(outarg.attr.mode))) 292
290 err = -EIO; 293 err = -EIO;
291 if (!err && outarg.nodeid) { 294 if (!outarg->nodeid)
292 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, 295 goto out_put_forget;
293 &outarg.attr, entry_attr_timeout(&outarg), 296 if (!fuse_valid_type(outarg->attr.mode))
294 attr_version); 297 goto out_put_forget;
295 if (!inode) { 298
296 fuse_send_forget(fc, forget_req, outarg.nodeid, 1); 299 *inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
297 return ERR_PTR(-ENOMEM); 300 &outarg->attr, entry_attr_timeout(outarg),
298 } 301 attr_version);
302 err = -ENOMEM;
303 if (!*inode) {
304 fuse_send_forget(fc, forget_req, outarg->nodeid, 1);
305 goto out;
299 } 306 }
307 err = 0;
308
309 out_put_forget:
300 fuse_put_request(fc, forget_req); 310 fuse_put_request(fc, forget_req);
301 if (err && err != -ENOENT) 311 out:
302 return ERR_PTR(err); 312 return err;
313}
314
315static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
316 struct nameidata *nd)
317{
318 int err;
319 struct fuse_entry_out outarg;
320 struct inode *inode;
321 struct dentry *newent;
322 struct fuse_conn *fc = get_fuse_conn(dir);
323 bool outarg_valid = true;
324
325 err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
326 &outarg, &inode);
327 if (err == -ENOENT) {
328 outarg_valid = false;
329 err = 0;
330 }
331 if (err)
332 goto out_err;
333
334 err = -EIO;
335 if (inode && get_node_id(inode) == FUSE_ROOT_ID)
336 goto out_iput;
303 337
304 if (inode && S_ISDIR(inode->i_mode)) { 338 if (inode && S_ISDIR(inode->i_mode)) {
305 mutex_lock(&fc->inst_mutex); 339 mutex_lock(&fc->inst_mutex);
306 err = fuse_d_add_directory(entry, inode); 340 newent = fuse_d_add_directory(entry, inode);
307 mutex_unlock(&fc->inst_mutex); 341 mutex_unlock(&fc->inst_mutex);
308 if (err) { 342 err = PTR_ERR(newent);
309 iput(inode); 343 if (IS_ERR(newent))
310 return ERR_PTR(err); 344 goto out_iput;
311 } 345 } else {
312 } else 346 newent = d_splice_alias(inode, entry);
313 d_add(entry, inode); 347 }
314 348
349 entry = newent ? newent : entry;
315 entry->d_op = &fuse_dentry_operations; 350 entry->d_op = &fuse_dentry_operations;
316 if (!err) 351 if (outarg_valid)
317 fuse_change_entry_timeout(entry, &outarg); 352 fuse_change_entry_timeout(entry, &outarg);
318 else 353 else
319 fuse_invalidate_entry_cache(entry); 354 fuse_invalidate_entry_cache(entry);
320 return NULL; 355
356 return newent;
357
358 out_iput:
359 iput(inode);
360 out_err:
361 return ERR_PTR(err);
321} 362}
322 363
323/* 364/*
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 8092f0d9fd1f..67ff2c6a8f63 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1341,6 +1341,11 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
1341 pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0; 1341 pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0;
1342 int err; 1342 int err;
1343 1343
1344 if (fl->fl_lmops && fl->fl_lmops->fl_grant) {
1345 /* NLM needs asynchronous locks, which we don't support yet */
1346 return -ENOLCK;
1347 }
1348
1344 /* Unlock on close is handled by the flush method */ 1349 /* Unlock on close is handled by the flush method */
1345 if (fl->fl_flags & FL_CLOSE) 1350 if (fl->fl_flags & FL_CLOSE)
1346 return 0; 1351 return 0;
@@ -1365,7 +1370,9 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
1365 struct fuse_conn *fc = get_fuse_conn(inode); 1370 struct fuse_conn *fc = get_fuse_conn(inode);
1366 int err; 1371 int err;
1367 1372
1368 if (cmd == F_GETLK) { 1373 if (cmd == F_CANCELLK) {
1374 err = 0;
1375 } else if (cmd == F_GETLK) {
1369 if (fc->no_lock) { 1376 if (fc->no_lock) {
1370 posix_test_lock(file, fl); 1377 posix_test_lock(file, fl);
1371 err = 0; 1378 err = 0;
@@ -1373,7 +1380,7 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
1373 err = fuse_getlk(file, fl); 1380 err = fuse_getlk(file, fl);
1374 } else { 1381 } else {
1375 if (fc->no_lock) 1382 if (fc->no_lock)
1376 err = posix_lock_file_wait(file, fl); 1383 err = posix_lock_file(file, fl, NULL);
1377 else 1384 else
1378 err = fuse_setlk(file, fl, 0); 1385 err = fuse_setlk(file, fl, 0);
1379 } 1386 }
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index bae948657c4f..3a876076bdd1 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -363,6 +363,9 @@ struct fuse_conn {
363 /** Do not send separate SETATTR request before open(O_TRUNC) */ 363 /** Do not send separate SETATTR request before open(O_TRUNC) */
364 unsigned atomic_o_trunc : 1; 364 unsigned atomic_o_trunc : 1;
365 365
366 /** Filesystem supports NFS exporting. Only set in INIT */
367 unsigned export_support : 1;
368
366 /* 369 /*
367 * The following bitfields are only for optimization purposes 370 * The following bitfields are only for optimization purposes
368 * and hence races in setting them will not cause malfunction 371 * and hence races in setting them will not cause malfunction
@@ -464,6 +467,8 @@ static inline u64 get_node_id(struct inode *inode)
464/** Device operations */ 467/** Device operations */
465extern const struct file_operations fuse_dev_operations; 468extern const struct file_operations fuse_dev_operations;
466 469
470extern struct dentry_operations fuse_dentry_operations;
471
467/** 472/**
468 * Get a filled in inode 473 * Get a filled in inode
469 */ 474 */
@@ -471,6 +476,9 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
471 int generation, struct fuse_attr *attr, 476 int generation, struct fuse_attr *attr,
472 u64 attr_valid, u64 attr_version); 477 u64 attr_valid, u64 attr_version);
473 478
479int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
480 struct fuse_entry_out *outarg, struct inode **inode);
481
474/** 482/**
475 * Send FORGET command 483 * Send FORGET command
476 */ 484 */
@@ -604,6 +612,8 @@ void fuse_abort_conn(struct fuse_conn *fc);
604 */ 612 */
605void fuse_invalidate_attr(struct inode *inode); 613void fuse_invalidate_attr(struct inode *inode);
606 614
615void fuse_invalidate_entry_cache(struct dentry *entry);
616
607/** 617/**
608 * Acquire reference to fuse_conn 618 * Acquire reference to fuse_conn
609 */ 619 */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 3141690558c8..7d2f7d6e22e2 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -18,6 +18,7 @@
18#include <linux/statfs.h> 18#include <linux/statfs.h>
19#include <linux/random.h> 19#include <linux/random.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/exportfs.h>
21 22
22MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 23MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
23MODULE_DESCRIPTION("Filesystem in Userspace"); 24MODULE_DESCRIPTION("Filesystem in Userspace");
@@ -552,6 +553,174 @@ static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
552 return fuse_iget(sb, 1, 0, &attr, 0, 0); 553 return fuse_iget(sb, 1, 0, &attr, 0, 0);
553} 554}
554 555
556struct fuse_inode_handle
557{
558 u64 nodeid;
559 u32 generation;
560};
561
562static struct dentry *fuse_get_dentry(struct super_block *sb,
563 struct fuse_inode_handle *handle)
564{
565 struct fuse_conn *fc = get_fuse_conn_super(sb);
566 struct inode *inode;
567 struct dentry *entry;
568 int err = -ESTALE;
569
570 if (handle->nodeid == 0)
571 goto out_err;
572
573 inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid);
574 if (!inode) {
575 struct fuse_entry_out outarg;
576 struct qstr name;
577
578 if (!fc->export_support)
579 goto out_err;
580
581 name.len = 1;
582 name.name = ".";
583 err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg,
584 &inode);
585 if (err && err != -ENOENT)
586 goto out_err;
587 if (err || !inode) {
588 err = -ESTALE;
589 goto out_err;
590 }
591 err = -EIO;
592 if (get_node_id(inode) != handle->nodeid)
593 goto out_iput;
594 }
595 err = -ESTALE;
596 if (inode->i_generation != handle->generation)
597 goto out_iput;
598
599 entry = d_alloc_anon(inode);
600 err = -ENOMEM;
601 if (!entry)
602 goto out_iput;
603
604 if (get_node_id(inode) != FUSE_ROOT_ID) {
605 entry->d_op = &fuse_dentry_operations;
606 fuse_invalidate_entry_cache(entry);
607 }
608
609 return entry;
610
611 out_iput:
612 iput(inode);
613 out_err:
614 return ERR_PTR(err);
615}
616
617static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
618 int connectable)
619{
620 struct inode *inode = dentry->d_inode;
621 bool encode_parent = connectable && !S_ISDIR(inode->i_mode);
622 int len = encode_parent ? 6 : 3;
623 u64 nodeid;
624 u32 generation;
625
626 if (*max_len < len)
627 return 255;
628
629 nodeid = get_fuse_inode(inode)->nodeid;
630 generation = inode->i_generation;
631
632 fh[0] = (u32)(nodeid >> 32);
633 fh[1] = (u32)(nodeid & 0xffffffff);
634 fh[2] = generation;
635
636 if (encode_parent) {
637 struct inode *parent;
638
639 spin_lock(&dentry->d_lock);
640 parent = dentry->d_parent->d_inode;
641 nodeid = get_fuse_inode(parent)->nodeid;
642 generation = parent->i_generation;
643 spin_unlock(&dentry->d_lock);
644
645 fh[3] = (u32)(nodeid >> 32);
646 fh[4] = (u32)(nodeid & 0xffffffff);
647 fh[5] = generation;
648 }
649
650 *max_len = len;
651 return encode_parent ? 0x82 : 0x81;
652}
653
654static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
655 struct fid *fid, int fh_len, int fh_type)
656{
657 struct fuse_inode_handle handle;
658
659 if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3)
660 return NULL;
661
662 handle.nodeid = (u64) fid->raw[0] << 32;
663 handle.nodeid |= (u64) fid->raw[1];
664 handle.generation = fid->raw[2];
665 return fuse_get_dentry(sb, &handle);
666}
667
668static struct dentry *fuse_fh_to_parent(struct super_block *sb,
669 struct fid *fid, int fh_len, int fh_type)
670{
671 struct fuse_inode_handle parent;
672
673 if (fh_type != 0x82 || fh_len < 6)
674 return NULL;
675
676 parent.nodeid = (u64) fid->raw[3] << 32;
677 parent.nodeid |= (u64) fid->raw[4];
678 parent.generation = fid->raw[5];
679 return fuse_get_dentry(sb, &parent);
680}
681
682static struct dentry *fuse_get_parent(struct dentry *child)
683{
684 struct inode *child_inode = child->d_inode;
685 struct fuse_conn *fc = get_fuse_conn(child_inode);
686 struct inode *inode;
687 struct dentry *parent;
688 struct fuse_entry_out outarg;
689 struct qstr name;
690 int err;
691
692 if (!fc->export_support)
693 return ERR_PTR(-ESTALE);
694
695 name.len = 2;
696 name.name = "..";
697 err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
698 &name, &outarg, &inode);
699 if (err && err != -ENOENT)
700 return ERR_PTR(err);
701 if (err || !inode)
702 return ERR_PTR(-ESTALE);
703
704 parent = d_alloc_anon(inode);
705 if (!parent) {
706 iput(inode);
707 return ERR_PTR(-ENOMEM);
708 }
709 if (get_node_id(inode) != FUSE_ROOT_ID) {
710 parent->d_op = &fuse_dentry_operations;
711 fuse_invalidate_entry_cache(parent);
712 }
713
714 return parent;
715}
716
717static const struct export_operations fuse_export_operations = {
718 .fh_to_dentry = fuse_fh_to_dentry,
719 .fh_to_parent = fuse_fh_to_parent,
720 .encode_fh = fuse_encode_fh,
721 .get_parent = fuse_get_parent,
722};
723
555static const struct super_operations fuse_super_operations = { 724static const struct super_operations fuse_super_operations = {
556 .alloc_inode = fuse_alloc_inode, 725 .alloc_inode = fuse_alloc_inode,
557 .destroy_inode = fuse_destroy_inode, 726 .destroy_inode = fuse_destroy_inode,
@@ -581,6 +750,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
581 fc->no_lock = 1; 750 fc->no_lock = 1;
582 if (arg->flags & FUSE_ATOMIC_O_TRUNC) 751 if (arg->flags & FUSE_ATOMIC_O_TRUNC)
583 fc->atomic_o_trunc = 1; 752 fc->atomic_o_trunc = 1;
753 if (arg->minor >= 9) {
754 /* LOOKUP has dependency on proto version */
755 if (arg->flags & FUSE_EXPORT_SUPPORT)
756 fc->export_support = 1;
757 }
584 if (arg->flags & FUSE_BIG_WRITES) 758 if (arg->flags & FUSE_BIG_WRITES)
585 fc->big_writes = 1; 759 fc->big_writes = 1;
586 } else { 760 } else {
@@ -607,7 +781,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
607 arg->minor = FUSE_KERNEL_MINOR_VERSION; 781 arg->minor = FUSE_KERNEL_MINOR_VERSION;
608 arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; 782 arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
609 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | 783 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
610 FUSE_BIG_WRITES; 784 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES;
611 req->in.h.opcode = FUSE_INIT; 785 req->in.h.opcode = FUSE_INIT;
612 req->in.numargs = 1; 786 req->in.numargs = 1;
613 req->in.args[0].size = sizeof(*arg); 787 req->in.args[0].size = sizeof(*arg);
@@ -652,6 +826,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
652 sb->s_magic = FUSE_SUPER_MAGIC; 826 sb->s_magic = FUSE_SUPER_MAGIC;
653 sb->s_op = &fuse_super_operations; 827 sb->s_op = &fuse_super_operations;
654 sb->s_maxbytes = MAX_LFS_FILESIZE; 828 sb->s_maxbytes = MAX_LFS_FILESIZE;
829 sb->s_export_op = &fuse_export_operations;
655 830
656 file = fget(d.fd); 831 file = fget(d.fd);
657 if (!file) 832 if (!file)
diff --git a/fs/hfs/bitmap.c b/fs/hfs/bitmap.c
index 24e75798ddf0..c6e97366e8ac 100644
--- a/fs/hfs/bitmap.c
+++ b/fs/hfs/bitmap.c
@@ -145,7 +145,7 @@ u32 hfs_vbm_search_free(struct super_block *sb, u32 goal, u32 *num_bits)
145 if (!*num_bits) 145 if (!*num_bits)
146 return 0; 146 return 0;
147 147
148 down(&HFS_SB(sb)->bitmap_lock); 148 mutex_lock(&HFS_SB(sb)->bitmap_lock);
149 bitmap = HFS_SB(sb)->bitmap; 149 bitmap = HFS_SB(sb)->bitmap;
150 150
151 pos = hfs_find_set_zero_bits(bitmap, HFS_SB(sb)->fs_ablocks, goal, num_bits); 151 pos = hfs_find_set_zero_bits(bitmap, HFS_SB(sb)->fs_ablocks, goal, num_bits);
@@ -162,7 +162,7 @@ u32 hfs_vbm_search_free(struct super_block *sb, u32 goal, u32 *num_bits)
162 HFS_SB(sb)->free_ablocks -= *num_bits; 162 HFS_SB(sb)->free_ablocks -= *num_bits;
163 hfs_bitmap_dirty(sb); 163 hfs_bitmap_dirty(sb);
164out: 164out:
165 up(&HFS_SB(sb)->bitmap_lock); 165 mutex_unlock(&HFS_SB(sb)->bitmap_lock);
166 return pos; 166 return pos;
167} 167}
168 168
@@ -205,7 +205,7 @@ int hfs_clear_vbm_bits(struct super_block *sb, u16 start, u16 count)
205 if ((start + count) > HFS_SB(sb)->fs_ablocks) 205 if ((start + count) > HFS_SB(sb)->fs_ablocks)
206 return -2; 206 return -2;
207 207
208 down(&HFS_SB(sb)->bitmap_lock); 208 mutex_lock(&HFS_SB(sb)->bitmap_lock);
209 /* bitmap is always on a 32-bit boundary */ 209 /* bitmap is always on a 32-bit boundary */
210 curr = HFS_SB(sb)->bitmap + (start / 32); 210 curr = HFS_SB(sb)->bitmap + (start / 32);
211 len = count; 211 len = count;
@@ -236,7 +236,7 @@ int hfs_clear_vbm_bits(struct super_block *sb, u16 start, u16 count)
236 } 236 }
237out: 237out:
238 HFS_SB(sb)->free_ablocks += len; 238 HFS_SB(sb)->free_ablocks += len;
239 up(&HFS_SB(sb)->bitmap_lock); 239 mutex_unlock(&HFS_SB(sb)->bitmap_lock);
240 hfs_bitmap_dirty(sb); 240 hfs_bitmap_dirty(sb);
241 241
242 return 0; 242 return 0;
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index f6621a785202..9b9d6395bad3 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -40,7 +40,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
40 { 40 {
41 struct hfs_mdb *mdb = HFS_SB(sb)->mdb; 41 struct hfs_mdb *mdb = HFS_SB(sb)->mdb;
42 HFS_I(tree->inode)->flags = 0; 42 HFS_I(tree->inode)->flags = 0;
43 init_MUTEX(&HFS_I(tree->inode)->extents_lock); 43 mutex_init(&HFS_I(tree->inode)->extents_lock);
44 switch (id) { 44 switch (id) {
45 case HFS_EXT_CNID: 45 case HFS_EXT_CNID:
46 hfs_inode_read_fork(tree->inode, mdb->drXTExtRec, mdb->drXTFlSize, 46 hfs_inode_read_fork(tree->inode, mdb->drXTExtRec, mdb->drXTFlSize,
diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c
index c176f67ba0a5..2c16316d2917 100644
--- a/fs/hfs/extent.c
+++ b/fs/hfs/extent.c
@@ -343,16 +343,16 @@ int hfs_get_block(struct inode *inode, sector_t block,
343 goto done; 343 goto done;
344 } 344 }
345 345
346 down(&HFS_I(inode)->extents_lock); 346 mutex_lock(&HFS_I(inode)->extents_lock);
347 res = hfs_ext_read_extent(inode, ablock); 347 res = hfs_ext_read_extent(inode, ablock);
348 if (!res) 348 if (!res)
349 dblock = hfs_ext_find_block(HFS_I(inode)->cached_extents, 349 dblock = hfs_ext_find_block(HFS_I(inode)->cached_extents,
350 ablock - HFS_I(inode)->cached_start); 350 ablock - HFS_I(inode)->cached_start);
351 else { 351 else {
352 up(&HFS_I(inode)->extents_lock); 352 mutex_unlock(&HFS_I(inode)->extents_lock);
353 return -EIO; 353 return -EIO;
354 } 354 }
355 up(&HFS_I(inode)->extents_lock); 355 mutex_unlock(&HFS_I(inode)->extents_lock);
356 356
357done: 357done:
358 map_bh(bh_result, sb, HFS_SB(sb)->fs_start + 358 map_bh(bh_result, sb, HFS_SB(sb)->fs_start +
@@ -375,7 +375,7 @@ int hfs_extend_file(struct inode *inode)
375 u32 start, len, goal; 375 u32 start, len, goal;
376 int res; 376 int res;
377 377
378 down(&HFS_I(inode)->extents_lock); 378 mutex_lock(&HFS_I(inode)->extents_lock);
379 if (HFS_I(inode)->alloc_blocks == HFS_I(inode)->first_blocks) 379 if (HFS_I(inode)->alloc_blocks == HFS_I(inode)->first_blocks)
380 goal = hfs_ext_lastblock(HFS_I(inode)->first_extents); 380 goal = hfs_ext_lastblock(HFS_I(inode)->first_extents);
381 else { 381 else {
@@ -425,7 +425,7 @@ int hfs_extend_file(struct inode *inode)
425 goto insert_extent; 425 goto insert_extent;
426 } 426 }
427out: 427out:
428 up(&HFS_I(inode)->extents_lock); 428 mutex_unlock(&HFS_I(inode)->extents_lock);
429 if (!res) { 429 if (!res) {
430 HFS_I(inode)->alloc_blocks += len; 430 HFS_I(inode)->alloc_blocks += len;
431 mark_inode_dirty(inode); 431 mark_inode_dirty(inode);
@@ -487,7 +487,7 @@ void hfs_file_truncate(struct inode *inode)
487 if (blk_cnt == alloc_cnt) 487 if (blk_cnt == alloc_cnt)
488 goto out; 488 goto out;
489 489
490 down(&HFS_I(inode)->extents_lock); 490 mutex_lock(&HFS_I(inode)->extents_lock);
491 hfs_find_init(HFS_SB(sb)->ext_tree, &fd); 491 hfs_find_init(HFS_SB(sb)->ext_tree, &fd);
492 while (1) { 492 while (1) {
493 if (alloc_cnt == HFS_I(inode)->first_blocks) { 493 if (alloc_cnt == HFS_I(inode)->first_blocks) {
@@ -514,7 +514,7 @@ void hfs_file_truncate(struct inode *inode)
514 hfs_brec_remove(&fd); 514 hfs_brec_remove(&fd);
515 } 515 }
516 hfs_find_exit(&fd); 516 hfs_find_exit(&fd);
517 up(&HFS_I(inode)->extents_lock); 517 mutex_unlock(&HFS_I(inode)->extents_lock);
518 518
519 HFS_I(inode)->alloc_blocks = blk_cnt; 519 HFS_I(inode)->alloc_blocks = blk_cnt;
520out: 520out:
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 147374b6f675..9955232fdf8c 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -11,6 +11,7 @@
11 11
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/types.h> 13#include <linux/types.h>
14#include <linux/mutex.h>
14#include <linux/buffer_head.h> 15#include <linux/buffer_head.h>
15#include <linux/fs.h> 16#include <linux/fs.h>
16 17
@@ -53,7 +54,7 @@ struct hfs_inode_info {
53 struct list_head open_dir_list; 54 struct list_head open_dir_list;
54 struct inode *rsrc_inode; 55 struct inode *rsrc_inode;
55 56
56 struct semaphore extents_lock; 57 struct mutex extents_lock;
57 58
58 u16 alloc_blocks, clump_blocks; 59 u16 alloc_blocks, clump_blocks;
59 sector_t fs_blocks; 60 sector_t fs_blocks;
@@ -139,7 +140,7 @@ struct hfs_sb_info {
139 140
140 struct nls_table *nls_io, *nls_disk; 141 struct nls_table *nls_io, *nls_disk;
141 142
142 struct semaphore bitmap_lock; 143 struct mutex bitmap_lock;
143 144
144 unsigned long flags; 145 unsigned long flags;
145 146
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 97f8446c4ff4..dc4ec640e875 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -150,7 +150,7 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode)
150 if (!inode) 150 if (!inode)
151 return NULL; 151 return NULL;
152 152
153 init_MUTEX(&HFS_I(inode)->extents_lock); 153 mutex_init(&HFS_I(inode)->extents_lock);
154 INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list); 154 INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list);
155 hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name); 155 hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name);
156 inode->i_ino = HFS_SB(sb)->next_id++; 156 inode->i_ino = HFS_SB(sb)->next_id++;
@@ -281,7 +281,7 @@ static int hfs_read_inode(struct inode *inode, void *data)
281 281
282 HFS_I(inode)->flags = 0; 282 HFS_I(inode)->flags = 0;
283 HFS_I(inode)->rsrc_inode = NULL; 283 HFS_I(inode)->rsrc_inode = NULL;
284 init_MUTEX(&HFS_I(inode)->extents_lock); 284 mutex_init(&HFS_I(inode)->extents_lock);
285 INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list); 285 INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list);
286 286
287 /* Initialize the inode */ 287 /* Initialize the inode */
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 8cf67974adf6..ac2ec5ef66e4 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -372,7 +372,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
372 372
373 sb->s_op = &hfs_super_operations; 373 sb->s_op = &hfs_super_operations;
374 sb->s_flags |= MS_NODIRATIME; 374 sb->s_flags |= MS_NODIRATIME;
375 init_MUTEX(&sbi->bitmap_lock); 375 mutex_init(&sbi->bitmap_lock);
376 376
377 res = hfs_mdb_get(sb); 377 res = hfs_mdb_get(sb);
378 if (res) { 378 if (res) {
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index 12e899cd7886..fec8f61227ff 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -199,16 +199,16 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
199 goto done; 199 goto done;
200 } 200 }
201 201
202 down(&HFSPLUS_I(inode).extents_lock); 202 mutex_lock(&HFSPLUS_I(inode).extents_lock);
203 res = hfsplus_ext_read_extent(inode, ablock); 203 res = hfsplus_ext_read_extent(inode, ablock);
204 if (!res) { 204 if (!res) {
205 dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).cached_extents, ablock - 205 dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).cached_extents, ablock -
206 HFSPLUS_I(inode).cached_start); 206 HFSPLUS_I(inode).cached_start);
207 } else { 207 } else {
208 up(&HFSPLUS_I(inode).extents_lock); 208 mutex_unlock(&HFSPLUS_I(inode).extents_lock);
209 return -EIO; 209 return -EIO;
210 } 210 }
211 up(&HFSPLUS_I(inode).extents_lock); 211 mutex_unlock(&HFSPLUS_I(inode).extents_lock);
212 212
213done: 213done:
214 dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock); 214 dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock);
@@ -355,7 +355,7 @@ int hfsplus_file_extend(struct inode *inode)
355 return -ENOSPC; 355 return -ENOSPC;
356 } 356 }
357 357
358 down(&HFSPLUS_I(inode).extents_lock); 358 mutex_lock(&HFSPLUS_I(inode).extents_lock);
359 if (HFSPLUS_I(inode).alloc_blocks == HFSPLUS_I(inode).first_blocks) 359 if (HFSPLUS_I(inode).alloc_blocks == HFSPLUS_I(inode).first_blocks)
360 goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).first_extents); 360 goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).first_extents);
361 else { 361 else {
@@ -408,7 +408,7 @@ int hfsplus_file_extend(struct inode *inode)
408 goto insert_extent; 408 goto insert_extent;
409 } 409 }
410out: 410out:
411 up(&HFSPLUS_I(inode).extents_lock); 411 mutex_unlock(&HFSPLUS_I(inode).extents_lock);
412 if (!res) { 412 if (!res) {
413 HFSPLUS_I(inode).alloc_blocks += len; 413 HFSPLUS_I(inode).alloc_blocks += len;
414 mark_inode_dirty(inode); 414 mark_inode_dirty(inode);
@@ -465,7 +465,7 @@ void hfsplus_file_truncate(struct inode *inode)
465 if (blk_cnt == alloc_cnt) 465 if (blk_cnt == alloc_cnt)
466 goto out; 466 goto out;
467 467
468 down(&HFSPLUS_I(inode).extents_lock); 468 mutex_lock(&HFSPLUS_I(inode).extents_lock);
469 hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd); 469 hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd);
470 while (1) { 470 while (1) {
471 if (alloc_cnt == HFSPLUS_I(inode).first_blocks) { 471 if (alloc_cnt == HFSPLUS_I(inode).first_blocks) {
@@ -492,7 +492,7 @@ void hfsplus_file_truncate(struct inode *inode)
492 hfs_brec_remove(&fd); 492 hfs_brec_remove(&fd);
493 } 493 }
494 hfs_find_exit(&fd); 494 hfs_find_exit(&fd);
495 up(&HFSPLUS_I(inode).extents_lock); 495 mutex_unlock(&HFSPLUS_I(inode).extents_lock);
496 496
497 HFSPLUS_I(inode).alloc_blocks = blk_cnt; 497 HFSPLUS_I(inode).alloc_blocks = blk_cnt;
498out: 498out:
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 9e59537b43d5..f027a905225f 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -11,6 +11,7 @@
11#define _LINUX_HFSPLUS_FS_H 11#define _LINUX_HFSPLUS_FS_H
12 12
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/mutex.h>
14#include <linux/buffer_head.h> 15#include <linux/buffer_head.h>
15#include "hfsplus_raw.h" 16#include "hfsplus_raw.h"
16 17
@@ -154,7 +155,7 @@ struct hfsplus_sb_info {
154 155
155 156
156struct hfsplus_inode_info { 157struct hfsplus_inode_info {
157 struct semaphore extents_lock; 158 struct mutex extents_lock;
158 u32 clump_blocks, alloc_blocks; 159 u32 clump_blocks, alloc_blocks;
159 sector_t fs_blocks; 160 sector_t fs_blocks;
160 /* Allocation extents from catalog record or volume header */ 161 /* Allocation extents from catalog record or volume header */
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 67e1c8b467c4..cc3b5e24339b 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -163,7 +163,7 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent
163 163
164 inode->i_ino = dir->i_ino; 164 inode->i_ino = dir->i_ino;
165 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); 165 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
166 init_MUTEX(&HFSPLUS_I(inode).extents_lock); 166 mutex_init(&HFSPLUS_I(inode).extents_lock);
167 HFSPLUS_I(inode).flags = HFSPLUS_FLG_RSRC; 167 HFSPLUS_I(inode).flags = HFSPLUS_FLG_RSRC;
168 168
169 hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); 169 hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd);
@@ -316,7 +316,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode)
316 inode->i_nlink = 1; 316 inode->i_nlink = 1;
317 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 317 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
318 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); 318 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
319 init_MUTEX(&HFSPLUS_I(inode).extents_lock); 319 mutex_init(&HFSPLUS_I(inode).extents_lock);
320 atomic_set(&HFSPLUS_I(inode).opencnt, 0); 320 atomic_set(&HFSPLUS_I(inode).opencnt, 0);
321 HFSPLUS_I(inode).flags = 0; 321 HFSPLUS_I(inode).flags = 0;
322 memset(HFSPLUS_I(inode).first_extents, 0, sizeof(hfsplus_extent_rec)); 322 memset(HFSPLUS_I(inode).first_extents, 0, sizeof(hfsplus_extent_rec));
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index ce97a54518d8..3859118531c7 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -34,7 +34,7 @@ struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino)
34 return inode; 34 return inode;
35 35
36 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); 36 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
37 init_MUTEX(&HFSPLUS_I(inode).extents_lock); 37 mutex_init(&HFSPLUS_I(inode).extents_lock);
38 HFSPLUS_I(inode).flags = 0; 38 HFSPLUS_I(inode).flags = 0;
39 HFSPLUS_I(inode).rsrc_inode = NULL; 39 HFSPLUS_I(inode).rsrc_inode = NULL;
40 atomic_set(&HFSPLUS_I(inode).opencnt, 0); 40 atomic_set(&HFSPLUS_I(inode).opencnt, 0);
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 6bd48f0a7047..c2fb2dd0131f 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -209,6 +209,11 @@ repeat:
209 209
210 while (rs.len > 2) { /* There may be one byte for padding somewhere */ 210 while (rs.len > 2) { /* There may be one byte for padding somewhere */
211 rr = (struct rock_ridge *)rs.chr; 211 rr = (struct rock_ridge *)rs.chr;
212 /*
213 * Ignore rock ridge info if rr->len is out of range, but
214 * don't return -EIO because that would make the file
215 * invisible.
216 */
212 if (rr->len < 3) 217 if (rr->len < 3)
213 goto out; /* Something got screwed up here */ 218 goto out; /* Something got screwed up here */
214 sig = isonum_721(rs.chr); 219 sig = isonum_721(rs.chr);
@@ -216,8 +221,12 @@ repeat:
216 goto eio; 221 goto eio;
217 rs.chr += rr->len; 222 rs.chr += rr->len;
218 rs.len -= rr->len; 223 rs.len -= rr->len;
224 /*
225 * As above, just ignore the rock ridge info if rr->len
226 * is bogus.
227 */
219 if (rs.len < 0) 228 if (rs.len < 0)
220 goto eio; /* corrupted isofs */ 229 goto out; /* Something got screwed up here */
221 230
222 switch (sig) { 231 switch (sig) {
223 case SIG('R', 'R'): 232 case SIG('R', 'R'):
@@ -307,6 +316,11 @@ parse_rock_ridge_inode_internal(struct iso_directory_record *de,
307repeat: 316repeat:
308 while (rs.len > 2) { /* There may be one byte for padding somewhere */ 317 while (rs.len > 2) { /* There may be one byte for padding somewhere */
309 rr = (struct rock_ridge *)rs.chr; 318 rr = (struct rock_ridge *)rs.chr;
319 /*
320 * Ignore rock ridge info if rr->len is out of range, but
321 * don't return -EIO because that would make the file
322 * invisible.
323 */
310 if (rr->len < 3) 324 if (rr->len < 3)
311 goto out; /* Something got screwed up here */ 325 goto out; /* Something got screwed up here */
312 sig = isonum_721(rs.chr); 326 sig = isonum_721(rs.chr);
@@ -314,8 +328,12 @@ repeat:
314 goto eio; 328 goto eio;
315 rs.chr += rr->len; 329 rs.chr += rr->len;
316 rs.len -= rr->len; 330 rs.len -= rr->len;
331 /*
332 * As above, just ignore the rock ridge info if rr->len
333 * is bogus.
334 */
317 if (rs.len < 0) 335 if (rs.len < 0)
318 goto eio; /* corrupted isofs */ 336 goto out; /* Something got screwed up here */
319 337
320 switch (sig) { 338 switch (sig) {
321#ifndef CONFIG_ZISOFS /* No flag for SF or ZF */ 339#ifndef CONFIG_ZISOFS /* No flag for SF or ZF */
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 5a8ca61498ca..2eccbfaa1d48 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -36,7 +36,7 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
36 36
37/* 37/*
38 * When an ext3-ordered file is truncated, it is possible that many pages are 38 * When an ext3-ordered file is truncated, it is possible that many pages are
39 * not sucessfully freed, because they are attached to a committing transaction. 39 * not successfully freed, because they are attached to a committing transaction.
40 * After the transaction commits, these pages are left on the LRU, with no 40 * After the transaction commits, these pages are left on the LRU, with no
41 * ->mapping, and with attached buffers. These pages are trivially reclaimable 41 * ->mapping, and with attached buffers. These pages are trivially reclaimable
42 * by the VM, but their apparent absence upsets the VM accounting, and it makes 42 * by the VM, but their apparent absence upsets the VM accounting, and it makes
@@ -45,8 +45,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
45 * So here, we have a buffer which has just come off the forget list. Look to 45 * So here, we have a buffer which has just come off the forget list. Look to
46 * see if we can strip all buffers from the backing page. 46 * see if we can strip all buffers from the backing page.
47 * 47 *
48 * Called under lock_journal(), and possibly under journal_datalist_lock. The 48 * Called under journal->j_list_lock. The caller provided us with a ref
49 * caller provided us with a ref against the buffer, and we drop that here. 49 * against the buffer, and we drop that here.
50 */ 50 */
51static void release_buffer_page(struct buffer_head *bh) 51static void release_buffer_page(struct buffer_head *bh)
52{ 52{
@@ -78,6 +78,19 @@ nope:
78} 78}
79 79
80/* 80/*
81 * Decrement reference counter for data buffer. If it has been marked
82 * 'BH_Freed', release it and the page to which it belongs if possible.
83 */
84static void release_data_buffer(struct buffer_head *bh)
85{
86 if (buffer_freed(bh)) {
87 clear_buffer_freed(bh);
88 release_buffer_page(bh);
89 } else
90 put_bh(bh);
91}
92
93/*
81 * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is 94 * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
82 * held. For ranking reasons we must trylock. If we lose, schedule away and 95 * held. For ranking reasons we must trylock. If we lose, schedule away and
83 * return 0. j_list_lock is dropped in this case. 96 * return 0. j_list_lock is dropped in this case.
@@ -172,7 +185,7 @@ static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
172/* 185/*
173 * Submit all the data buffers to disk 186 * Submit all the data buffers to disk
174 */ 187 */
175static void journal_submit_data_buffers(journal_t *journal, 188static int journal_submit_data_buffers(journal_t *journal,
176 transaction_t *commit_transaction) 189 transaction_t *commit_transaction)
177{ 190{
178 struct journal_head *jh; 191 struct journal_head *jh;
@@ -180,6 +193,7 @@ static void journal_submit_data_buffers(journal_t *journal,
180 int locked; 193 int locked;
181 int bufs = 0; 194 int bufs = 0;
182 struct buffer_head **wbuf = journal->j_wbuf; 195 struct buffer_head **wbuf = journal->j_wbuf;
196 int err = 0;
183 197
184 /* 198 /*
185 * Whenever we unlock the journal and sleep, things can get added 199 * Whenever we unlock the journal and sleep, things can get added
@@ -231,7 +245,7 @@ write_out_data:
231 if (locked) 245 if (locked)
232 unlock_buffer(bh); 246 unlock_buffer(bh);
233 BUFFER_TRACE(bh, "already cleaned up"); 247 BUFFER_TRACE(bh, "already cleaned up");
234 put_bh(bh); 248 release_data_buffer(bh);
235 continue; 249 continue;
236 } 250 }
237 if (locked && test_clear_buffer_dirty(bh)) { 251 if (locked && test_clear_buffer_dirty(bh)) {
@@ -253,15 +267,17 @@ write_out_data:
253 put_bh(bh); 267 put_bh(bh);
254 } else { 268 } else {
255 BUFFER_TRACE(bh, "writeout complete: unfile"); 269 BUFFER_TRACE(bh, "writeout complete: unfile");
270 if (unlikely(!buffer_uptodate(bh)))
271 err = -EIO;
256 __journal_unfile_buffer(jh); 272 __journal_unfile_buffer(jh);
257 jbd_unlock_bh_state(bh); 273 jbd_unlock_bh_state(bh);
258 if (locked) 274 if (locked)
259 unlock_buffer(bh); 275 unlock_buffer(bh);
260 journal_remove_journal_head(bh); 276 journal_remove_journal_head(bh);
261 /* Once for our safety reference, once for 277 /* One for our safety reference, other for
262 * journal_remove_journal_head() */ 278 * journal_remove_journal_head() */
263 put_bh(bh); 279 put_bh(bh);
264 put_bh(bh); 280 release_data_buffer(bh);
265 } 281 }
266 282
267 if (need_resched() || spin_needbreak(&journal->j_list_lock)) { 283 if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
@@ -271,6 +287,8 @@ write_out_data:
271 } 287 }
272 spin_unlock(&journal->j_list_lock); 288 spin_unlock(&journal->j_list_lock);
273 journal_do_submit_data(wbuf, bufs); 289 journal_do_submit_data(wbuf, bufs);
290
291 return err;
274} 292}
275 293
276/* 294/*
@@ -410,8 +428,7 @@ void journal_commit_transaction(journal_t *journal)
410 * Now start flushing things to disk, in the order they appear 428 * Now start flushing things to disk, in the order they appear
411 * on the transaction lists. Data blocks go first. 429 * on the transaction lists. Data blocks go first.
412 */ 430 */
413 err = 0; 431 err = journal_submit_data_buffers(journal, commit_transaction);
414 journal_submit_data_buffers(journal, commit_transaction);
415 432
416 /* 433 /*
417 * Wait for all previously submitted IO to complete. 434 * Wait for all previously submitted IO to complete.
@@ -426,10 +443,21 @@ void journal_commit_transaction(journal_t *journal)
426 if (buffer_locked(bh)) { 443 if (buffer_locked(bh)) {
427 spin_unlock(&journal->j_list_lock); 444 spin_unlock(&journal->j_list_lock);
428 wait_on_buffer(bh); 445 wait_on_buffer(bh);
429 if (unlikely(!buffer_uptodate(bh)))
430 err = -EIO;
431 spin_lock(&journal->j_list_lock); 446 spin_lock(&journal->j_list_lock);
432 } 447 }
448 if (unlikely(!buffer_uptodate(bh))) {
449 if (TestSetPageLocked(bh->b_page)) {
450 spin_unlock(&journal->j_list_lock);
451 lock_page(bh->b_page);
452 spin_lock(&journal->j_list_lock);
453 }
454 if (bh->b_page->mapping)
455 set_bit(AS_EIO, &bh->b_page->mapping->flags);
456
457 unlock_page(bh->b_page);
458 SetPageError(bh->b_page);
459 err = -EIO;
460 }
433 if (!inverted_lock(journal, bh)) { 461 if (!inverted_lock(journal, bh)) {
434 put_bh(bh); 462 put_bh(bh);
435 spin_lock(&journal->j_list_lock); 463 spin_lock(&journal->j_list_lock);
@@ -443,17 +471,21 @@ void journal_commit_transaction(journal_t *journal)
443 } else { 471 } else {
444 jbd_unlock_bh_state(bh); 472 jbd_unlock_bh_state(bh);
445 } 473 }
446 put_bh(bh); 474 release_data_buffer(bh);
447 cond_resched_lock(&journal->j_list_lock); 475 cond_resched_lock(&journal->j_list_lock);
448 } 476 }
449 spin_unlock(&journal->j_list_lock); 477 spin_unlock(&journal->j_list_lock);
450 478
451 if (err) 479 if (err) {
452 journal_abort(journal, err); 480 char b[BDEVNAME_SIZE];
453 481
454 journal_write_revoke_records(journal, commit_transaction); 482 printk(KERN_WARNING
483 "JBD: Detected IO errors while flushing file data "
484 "on %s\n", bdevname(journal->j_fs_dev, b));
485 err = 0;
486 }
455 487
456 jbd_debug(3, "JBD: commit phase 2\n"); 488 journal_write_revoke_records(journal, commit_transaction);
457 489
458 /* 490 /*
459 * If we found any dirty or locked buffers, then we should have 491 * If we found any dirty or locked buffers, then we should have
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index b99c3b3654c4..aa7143a8349b 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -68,7 +68,6 @@ EXPORT_SYMBOL(journal_set_features);
68EXPORT_SYMBOL(journal_create); 68EXPORT_SYMBOL(journal_create);
69EXPORT_SYMBOL(journal_load); 69EXPORT_SYMBOL(journal_load);
70EXPORT_SYMBOL(journal_destroy); 70EXPORT_SYMBOL(journal_destroy);
71EXPORT_SYMBOL(journal_update_superblock);
72EXPORT_SYMBOL(journal_abort); 71EXPORT_SYMBOL(journal_abort);
73EXPORT_SYMBOL(journal_errno); 72EXPORT_SYMBOL(journal_errno);
74EXPORT_SYMBOL(journal_ack_err); 73EXPORT_SYMBOL(journal_ack_err);
@@ -1636,9 +1635,10 @@ static int journal_init_journal_head_cache(void)
1636 1635
1637static void journal_destroy_journal_head_cache(void) 1636static void journal_destroy_journal_head_cache(void)
1638{ 1637{
1639 J_ASSERT(journal_head_cache != NULL); 1638 if (journal_head_cache) {
1640 kmem_cache_destroy(journal_head_cache); 1639 kmem_cache_destroy(journal_head_cache);
1641 journal_head_cache = NULL; 1640 journal_head_cache = NULL;
1641 }
1642} 1642}
1643 1643
1644/* 1644/*
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index 1bb43e987f4b..c7bd649bbbdc 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -166,138 +166,123 @@ static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal,
166 return NULL; 166 return NULL;
167} 167}
168 168
169void journal_destroy_revoke_caches(void)
170{
171 if (revoke_record_cache) {
172 kmem_cache_destroy(revoke_record_cache);
173 revoke_record_cache = NULL;
174 }
175 if (revoke_table_cache) {
176 kmem_cache_destroy(revoke_table_cache);
177 revoke_table_cache = NULL;
178 }
179}
180
169int __init journal_init_revoke_caches(void) 181int __init journal_init_revoke_caches(void)
170{ 182{
183 J_ASSERT(!revoke_record_cache);
184 J_ASSERT(!revoke_table_cache);
185
171 revoke_record_cache = kmem_cache_create("revoke_record", 186 revoke_record_cache = kmem_cache_create("revoke_record",
172 sizeof(struct jbd_revoke_record_s), 187 sizeof(struct jbd_revoke_record_s),
173 0, 188 0,
174 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, 189 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
175 NULL); 190 NULL);
176 if (!revoke_record_cache) 191 if (!revoke_record_cache)
177 return -ENOMEM; 192 goto record_cache_failure;
178 193
179 revoke_table_cache = kmem_cache_create("revoke_table", 194 revoke_table_cache = kmem_cache_create("revoke_table",
180 sizeof(struct jbd_revoke_table_s), 195 sizeof(struct jbd_revoke_table_s),
181 0, SLAB_TEMPORARY, NULL); 196 0, SLAB_TEMPORARY, NULL);
182 if (!revoke_table_cache) { 197 if (!revoke_table_cache)
183 kmem_cache_destroy(revoke_record_cache); 198 goto table_cache_failure;
184 revoke_record_cache = NULL; 199
185 return -ENOMEM;
186 }
187 return 0; 200 return 0;
188}
189 201
190void journal_destroy_revoke_caches(void) 202table_cache_failure:
191{ 203 journal_destroy_revoke_caches();
192 kmem_cache_destroy(revoke_record_cache); 204record_cache_failure:
193 revoke_record_cache = NULL; 205 return -ENOMEM;
194 kmem_cache_destroy(revoke_table_cache);
195 revoke_table_cache = NULL;
196} 206}
197 207
198/* Initialise the revoke table for a given journal to a given size. */ 208static struct jbd_revoke_table_s *journal_init_revoke_table(int hash_size)
199
200int journal_init_revoke(journal_t *journal, int hash_size)
201{ 209{
202 int shift, tmp; 210 int shift = 0;
211 int tmp = hash_size;
212 struct jbd_revoke_table_s *table;
203 213
204 J_ASSERT (journal->j_revoke_table[0] == NULL); 214 table = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
215 if (!table)
216 goto out;
205 217
206 shift = 0;
207 tmp = hash_size;
208 while((tmp >>= 1UL) != 0UL) 218 while((tmp >>= 1UL) != 0UL)
209 shift++; 219 shift++;
210 220
211 journal->j_revoke_table[0] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); 221 table->hash_size = hash_size;
212 if (!journal->j_revoke_table[0]) 222 table->hash_shift = shift;
213 return -ENOMEM; 223 table->hash_table =
214 journal->j_revoke = journal->j_revoke_table[0];
215
216 /* Check that the hash_size is a power of two */
217 J_ASSERT(is_power_of_2(hash_size));
218
219 journal->j_revoke->hash_size = hash_size;
220
221 journal->j_revoke->hash_shift = shift;
222
223 journal->j_revoke->hash_table =
224 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); 224 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
225 if (!journal->j_revoke->hash_table) { 225 if (!table->hash_table) {
226 kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]); 226 kmem_cache_free(revoke_table_cache, table);
227 journal->j_revoke = NULL; 227 table = NULL;
228 return -ENOMEM; 228 goto out;
229 } 229 }
230 230
231 for (tmp = 0; tmp < hash_size; tmp++) 231 for (tmp = 0; tmp < hash_size; tmp++)
232 INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); 232 INIT_LIST_HEAD(&table->hash_table[tmp]);
233 233
234 journal->j_revoke_table[1] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); 234out:
235 if (!journal->j_revoke_table[1]) { 235 return table;
236 kfree(journal->j_revoke_table[0]->hash_table); 236}
237 kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]); 237
238 return -ENOMEM; 238static void journal_destroy_revoke_table(struct jbd_revoke_table_s *table)
239{
240 int i;
241 struct list_head *hash_list;
242
243 for (i = 0; i < table->hash_size; i++) {
244 hash_list = &table->hash_table[i];
245 J_ASSERT(list_empty(hash_list));
239 } 246 }
240 247
241 journal->j_revoke = journal->j_revoke_table[1]; 248 kfree(table->hash_table);
249 kmem_cache_free(revoke_table_cache, table);
250}
242 251
243 /* Check that the hash_size is a power of two */ 252/* Initialise the revoke table for a given journal to a given size. */
253int journal_init_revoke(journal_t *journal, int hash_size)
254{
255 J_ASSERT(journal->j_revoke_table[0] == NULL);
244 J_ASSERT(is_power_of_2(hash_size)); 256 J_ASSERT(is_power_of_2(hash_size));
245 257
246 journal->j_revoke->hash_size = hash_size; 258 journal->j_revoke_table[0] = journal_init_revoke_table(hash_size);
259 if (!journal->j_revoke_table[0])
260 goto fail0;
247 261
248 journal->j_revoke->hash_shift = shift; 262 journal->j_revoke_table[1] = journal_init_revoke_table(hash_size);
263 if (!journal->j_revoke_table[1])
264 goto fail1;
249 265
250 journal->j_revoke->hash_table = 266 journal->j_revoke = journal->j_revoke_table[1];
251 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
252 if (!journal->j_revoke->hash_table) {
253 kfree(journal->j_revoke_table[0]->hash_table);
254 kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
255 kmem_cache_free(revoke_table_cache, journal->j_revoke_table[1]);
256 journal->j_revoke = NULL;
257 return -ENOMEM;
258 }
259
260 for (tmp = 0; tmp < hash_size; tmp++)
261 INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
262 267
263 spin_lock_init(&journal->j_revoke_lock); 268 spin_lock_init(&journal->j_revoke_lock);
264 269
265 return 0; 270 return 0;
266}
267 271
268/* Destoy a journal's revoke table. The table must already be empty! */ 272fail1:
273 journal_destroy_revoke_table(journal->j_revoke_table[0]);
274fail0:
275 return -ENOMEM;
276}
269 277
278/* Destroy a journal's revoke table. The table must already be empty! */
270void journal_destroy_revoke(journal_t *journal) 279void journal_destroy_revoke(journal_t *journal)
271{ 280{
272 struct jbd_revoke_table_s *table;
273 struct list_head *hash_list;
274 int i;
275
276 table = journal->j_revoke_table[0];
277 if (!table)
278 return;
279
280 for (i=0; i<table->hash_size; i++) {
281 hash_list = &table->hash_table[i];
282 J_ASSERT (list_empty(hash_list));
283 }
284
285 kfree(table->hash_table);
286 kmem_cache_free(revoke_table_cache, table);
287 journal->j_revoke = NULL;
288
289 table = journal->j_revoke_table[1];
290 if (!table)
291 return;
292
293 for (i=0; i<table->hash_size; i++) {
294 hash_list = &table->hash_table[i];
295 J_ASSERT (list_empty(hash_list));
296 }
297
298 kfree(table->hash_table);
299 kmem_cache_free(revoke_table_cache, table);
300 journal->j_revoke = NULL; 281 journal->j_revoke = NULL;
282 if (journal->j_revoke_table[0])
283 journal_destroy_revoke_table(journal->j_revoke_table[0]);
284 if (journal->j_revoke_table[1])
285 journal_destroy_revoke_table(journal->j_revoke_table[1]);
301} 286}
302 287
303 288
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 67ff2024c23c..8dee32007500 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1648,12 +1648,42 @@ out:
1648 return; 1648 return;
1649} 1649}
1650 1650
1651/*
1652 * journal_try_to_free_buffers() could race with journal_commit_transaction()
1653 * The latter might still hold the a count on buffers when inspecting
1654 * them on t_syncdata_list or t_locked_list.
1655 *
1656 * journal_try_to_free_buffers() will call this function to
1657 * wait for the current transaction to finish syncing data buffers, before
1658 * tryinf to free that buffer.
1659 *
1660 * Called with journal->j_state_lock held.
1661 */
1662static void journal_wait_for_transaction_sync_data(journal_t *journal)
1663{
1664 transaction_t *transaction = NULL;
1665 tid_t tid;
1666
1667 spin_lock(&journal->j_state_lock);
1668 transaction = journal->j_committing_transaction;
1669
1670 if (!transaction) {
1671 spin_unlock(&journal->j_state_lock);
1672 return;
1673 }
1674
1675 tid = transaction->t_tid;
1676 spin_unlock(&journal->j_state_lock);
1677 log_wait_commit(journal, tid);
1678}
1651 1679
1652/** 1680/**
1653 * int journal_try_to_free_buffers() - try to free page buffers. 1681 * int journal_try_to_free_buffers() - try to free page buffers.
1654 * @journal: journal for operation 1682 * @journal: journal for operation
1655 * @page: to try and free 1683 * @page: to try and free
1656 * @unused_gfp_mask: unused 1684 * @gfp_mask: we use the mask to detect how hard should we try to release
1685 * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
1686 * release the buffers.
1657 * 1687 *
1658 * 1688 *
1659 * For all the buffers on this page, 1689 * For all the buffers on this page,
@@ -1682,9 +1712,11 @@ out:
1682 * journal_try_to_free_buffer() is changing its state. But that 1712 * journal_try_to_free_buffer() is changing its state. But that
1683 * cannot happen because we never reallocate freed data as metadata 1713 * cannot happen because we never reallocate freed data as metadata
1684 * while the data is part of a transaction. Yes? 1714 * while the data is part of a transaction. Yes?
1715 *
1716 * Return 0 on failure, 1 on success
1685 */ 1717 */
1686int journal_try_to_free_buffers(journal_t *journal, 1718int journal_try_to_free_buffers(journal_t *journal,
1687 struct page *page, gfp_t unused_gfp_mask) 1719 struct page *page, gfp_t gfp_mask)
1688{ 1720{
1689 struct buffer_head *head; 1721 struct buffer_head *head;
1690 struct buffer_head *bh; 1722 struct buffer_head *bh;
@@ -1713,7 +1745,28 @@ int journal_try_to_free_buffers(journal_t *journal,
1713 if (buffer_jbd(bh)) 1745 if (buffer_jbd(bh))
1714 goto busy; 1746 goto busy;
1715 } while ((bh = bh->b_this_page) != head); 1747 } while ((bh = bh->b_this_page) != head);
1748
1716 ret = try_to_free_buffers(page); 1749 ret = try_to_free_buffers(page);
1750
1751 /*
1752 * There are a number of places where journal_try_to_free_buffers()
1753 * could race with journal_commit_transaction(), the later still
1754 * holds the reference to the buffers to free while processing them.
1755 * try_to_free_buffers() failed to free those buffers. Some of the
1756 * caller of releasepage() request page buffers to be dropped, otherwise
1757 * treat the fail-to-free as errors (such as generic_file_direct_IO())
1758 *
1759 * So, if the caller of try_to_release_page() wants the synchronous
1760 * behaviour(i.e make sure buffers are dropped upon return),
1761 * let's wait for the current transaction to finish flush of
1762 * dirty data buffers, then try to free those buffers again,
1763 * with the journal locked.
1764 */
1765 if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
1766 journal_wait_for_transaction_sync_data(journal);
1767 ret = try_to_free_buffers(page);
1768 }
1769
1717busy: 1770busy:
1718 return ret; 1771 return ret;
1719} 1772}
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 0288e6d7936a..359c091d8965 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -22,6 +22,7 @@
22#include <linux/parser.h> 22#include <linux/parser.h>
23#include <linux/completion.h> 23#include <linux/completion.h>
24#include <linux/vfs.h> 24#include <linux/vfs.h>
25#include <linux/quotaops.h>
25#include <linux/mount.h> 26#include <linux/mount.h>
26#include <linux/moduleparam.h> 27#include <linux/moduleparam.h>
27#include <linux/kthread.h> 28#include <linux/kthread.h>
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 1f6dc518505c..31668b690e03 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -582,7 +582,15 @@ again:
582 } 582 }
583 if (status < 0) 583 if (status < 0)
584 goto out_unlock; 584 goto out_unlock;
585 status = nlm_stat_to_errno(resp->status); 585 /*
586 * EAGAIN doesn't make sense for sleeping locks, and in some
587 * cases NLM_LCK_DENIED is returned for a permanent error. So
588 * turn it into an ENOLCK.
589 */
590 if (resp->status == nlm_lck_denied && (fl_flags & FL_SLEEP))
591 status = -ENOLCK;
592 else
593 status = nlm_stat_to_errno(resp->status);
586out_unblock: 594out_unblock:
587 nlmclnt_finish_block(block); 595 nlmclnt_finish_block(block);
588out: 596out:
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 821b9acdfb66..cf0d5c2c318d 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -418,8 +418,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
418 goto out; 418 goto out;
419 case -EAGAIN: 419 case -EAGAIN:
420 ret = nlm_lck_denied; 420 ret = nlm_lck_denied;
421 break; 421 goto out;
422 case -EINPROGRESS: 422 case FILE_LOCK_DEFERRED:
423 if (wait) 423 if (wait)
424 break; 424 break;
425 /* Filesystem lock operation is in progress 425 /* Filesystem lock operation is in progress
@@ -434,10 +434,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
434 goto out; 434 goto out;
435 } 435 }
436 436
437 ret = nlm_lck_denied;
438 if (!wait)
439 goto out;
440
441 ret = nlm_lck_blocked; 437 ret = nlm_lck_blocked;
442 438
443 /* Append to list of blocked */ 439 /* Append to list of blocked */
@@ -507,7 +503,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
507 } 503 }
508 504
509 error = vfs_test_lock(file->f_file, &lock->fl); 505 error = vfs_test_lock(file->f_file, &lock->fl);
510 if (error == -EINPROGRESS) { 506 if (error == FILE_LOCK_DEFERRED) {
511 ret = nlmsvc_defer_lock_rqst(rqstp, block); 507 ret = nlmsvc_defer_lock_rqst(rqstp, block);
512 goto out; 508 goto out;
513 } 509 }
@@ -731,8 +727,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
731 switch (error) { 727 switch (error) {
732 case 0: 728 case 0:
733 break; 729 break;
734 case -EAGAIN: 730 case FILE_LOCK_DEFERRED:
735 case -EINPROGRESS:
736 dprintk("lockd: lock still blocked error %d\n", error); 731 dprintk("lockd: lock still blocked error %d\n", error);
737 nlmsvc_insert_block(block, NLM_NEVER); 732 nlmsvc_insert_block(block, NLM_NEVER);
738 nlmsvc_release_block(block); 733 nlmsvc_release_block(block);
diff --git a/fs/locks.c b/fs/locks.c
index dce8c747371c..01490300f7cb 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -779,8 +779,10 @@ find_conflict:
779 if (!flock_locks_conflict(request, fl)) 779 if (!flock_locks_conflict(request, fl))
780 continue; 780 continue;
781 error = -EAGAIN; 781 error = -EAGAIN;
782 if (request->fl_flags & FL_SLEEP) 782 if (!(request->fl_flags & FL_SLEEP))
783 locks_insert_block(fl, request); 783 goto out;
784 error = FILE_LOCK_DEFERRED;
785 locks_insert_block(fl, request);
784 goto out; 786 goto out;
785 } 787 }
786 if (request->fl_flags & FL_ACCESS) 788 if (request->fl_flags & FL_ACCESS)
@@ -836,7 +838,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
836 error = -EDEADLK; 838 error = -EDEADLK;
837 if (posix_locks_deadlock(request, fl)) 839 if (posix_locks_deadlock(request, fl))
838 goto out; 840 goto out;
839 error = -EAGAIN; 841 error = FILE_LOCK_DEFERRED;
840 locks_insert_block(fl, request); 842 locks_insert_block(fl, request);
841 goto out; 843 goto out;
842 } 844 }
@@ -1035,7 +1037,7 @@ int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
1035 might_sleep (); 1037 might_sleep ();
1036 for (;;) { 1038 for (;;) {
1037 error = posix_lock_file(filp, fl, NULL); 1039 error = posix_lock_file(filp, fl, NULL);
1038 if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP)) 1040 if (error != FILE_LOCK_DEFERRED)
1039 break; 1041 break;
1040 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); 1042 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
1041 if (!error) 1043 if (!error)
@@ -1107,9 +1109,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
1107 1109
1108 for (;;) { 1110 for (;;) {
1109 error = __posix_lock_file(inode, &fl, NULL); 1111 error = __posix_lock_file(inode, &fl, NULL);
1110 if (error != -EAGAIN) 1112 if (error != FILE_LOCK_DEFERRED)
1111 break;
1112 if (!(fl.fl_flags & FL_SLEEP))
1113 break; 1113 break;
1114 error = wait_event_interruptible(fl.fl_wait, !fl.fl_next); 1114 error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
1115 if (!error) { 1115 if (!error) {
@@ -1531,7 +1531,7 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl)
1531 might_sleep(); 1531 might_sleep();
1532 for (;;) { 1532 for (;;) {
1533 error = flock_lock_file(filp, fl); 1533 error = flock_lock_file(filp, fl);
1534 if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP)) 1534 if (error != FILE_LOCK_DEFERRED)
1535 break; 1535 break;
1536 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); 1536 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
1537 if (!error) 1537 if (!error)
@@ -1716,17 +1716,17 @@ out:
1716 * fl_grant is set. Callers expecting ->lock() to return asynchronously 1716 * fl_grant is set. Callers expecting ->lock() to return asynchronously
1717 * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if) 1717 * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if)
1718 * the request is for a blocking lock. When ->lock() does return asynchronously, 1718 * the request is for a blocking lock. When ->lock() does return asynchronously,
1719 * it must return -EINPROGRESS, and call ->fl_grant() when the lock 1719 * it must return FILE_LOCK_DEFERRED, and call ->fl_grant() when the lock
1720 * request completes. 1720 * request completes.
1721 * If the request is for non-blocking lock the file system should return 1721 * If the request is for non-blocking lock the file system should return
1722 * -EINPROGRESS then try to get the lock and call the callback routine with 1722 * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine
1723 * the result. If the request timed out the callback routine will return a 1723 * with the result. If the request timed out the callback routine will return a
1724 * nonzero return code and the file system should release the lock. The file 1724 * nonzero return code and the file system should release the lock. The file
1725 * system is also responsible to keep a corresponding posix lock when it 1725 * system is also responsible to keep a corresponding posix lock when it
1726 * grants a lock so the VFS can find out which locks are locally held and do 1726 * grants a lock so the VFS can find out which locks are locally held and do
1727 * the correct lock cleanup when required. 1727 * the correct lock cleanup when required.
1728 * The underlying filesystem must not drop the kernel lock or call 1728 * The underlying filesystem must not drop the kernel lock or call
1729 * ->fl_grant() before returning to the caller with a -EINPROGRESS 1729 * ->fl_grant() before returning to the caller with a FILE_LOCK_DEFERRED
1730 * return code. 1730 * return code.
1731 */ 1731 */
1732int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) 1732int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
@@ -1738,6 +1738,30 @@ int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, str
1738} 1738}
1739EXPORT_SYMBOL_GPL(vfs_lock_file); 1739EXPORT_SYMBOL_GPL(vfs_lock_file);
1740 1740
1741static int do_lock_file_wait(struct file *filp, unsigned int cmd,
1742 struct file_lock *fl)
1743{
1744 int error;
1745
1746 error = security_file_lock(filp, fl->fl_type);
1747 if (error)
1748 return error;
1749
1750 for (;;) {
1751 error = vfs_lock_file(filp, cmd, fl, NULL);
1752 if (error != FILE_LOCK_DEFERRED)
1753 break;
1754 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
1755 if (!error)
1756 continue;
1757
1758 locks_delete_block(fl);
1759 break;
1760 }
1761
1762 return error;
1763}
1764
1741/* Apply the lock described by l to an open file descriptor. 1765/* Apply the lock described by l to an open file descriptor.
1742 * This implements both the F_SETLK and F_SETLKW commands of fcntl(). 1766 * This implements both the F_SETLK and F_SETLKW commands of fcntl().
1743 */ 1767 */
@@ -1795,26 +1819,7 @@ again:
1795 goto out; 1819 goto out;
1796 } 1820 }
1797 1821
1798 error = security_file_lock(filp, file_lock->fl_type); 1822 error = do_lock_file_wait(filp, cmd, file_lock);
1799 if (error)
1800 goto out;
1801
1802 if (filp->f_op && filp->f_op->lock != NULL)
1803 error = filp->f_op->lock(filp, cmd, file_lock);
1804 else {
1805 for (;;) {
1806 error = posix_lock_file(filp, file_lock, NULL);
1807 if (error != -EAGAIN || cmd == F_SETLK)
1808 break;
1809 error = wait_event_interruptible(file_lock->fl_wait,
1810 !file_lock->fl_next);
1811 if (!error)
1812 continue;
1813
1814 locks_delete_block(file_lock);
1815 break;
1816 }
1817 }
1818 1823
1819 /* 1824 /*
1820 * Attempt to detect a close/fcntl race and recover by 1825 * Attempt to detect a close/fcntl race and recover by
@@ -1932,26 +1937,7 @@ again:
1932 goto out; 1937 goto out;
1933 } 1938 }
1934 1939
1935 error = security_file_lock(filp, file_lock->fl_type); 1940 error = do_lock_file_wait(filp, cmd, file_lock);
1936 if (error)
1937 goto out;
1938
1939 if (filp->f_op && filp->f_op->lock != NULL)
1940 error = filp->f_op->lock(filp, cmd, file_lock);
1941 else {
1942 for (;;) {
1943 error = posix_lock_file(filp, file_lock, NULL);
1944 if (error != -EAGAIN || cmd == F_SETLK64)
1945 break;
1946 error = wait_event_interruptible(file_lock->fl_wait,
1947 !file_lock->fl_next);
1948 if (!error)
1949 continue;
1950
1951 locks_delete_block(file_lock);
1952 break;
1953 }
1954 }
1955 1941
1956 /* 1942 /*
1957 * Attempt to detect a close/fcntl race and recover by 1943 * Attempt to detect a close/fcntl race and recover by
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 84f6242ba6fc..523d73713418 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -256,9 +256,6 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
256 if (!s->s_root) 256 if (!s->s_root)
257 goto out_iput; 257 goto out_iput;
258 258
259 if (!NO_TRUNCATE)
260 s->s_root->d_op = &minix_dentry_operations;
261
262 if (!(s->s_flags & MS_RDONLY)) { 259 if (!(s->s_flags & MS_RDONLY)) {
263 if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */ 260 if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
264 ms->s_state &= ~MINIX_VALID_FS; 261 ms->s_state &= ~MINIX_VALID_FS;
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 326edfe96108..e6a0b193bea4 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -2,11 +2,6 @@
2#include <linux/pagemap.h> 2#include <linux/pagemap.h>
3#include <linux/minix_fs.h> 3#include <linux/minix_fs.h>
4 4
5/*
6 * change the define below to 0 if you want names > info->s_namelen chars to be
7 * truncated. Else they will be disallowed (ENAMETOOLONG).
8 */
9#define NO_TRUNCATE 1
10#define INODE_VERSION(inode) minix_sb(inode->i_sb)->s_version 5#define INODE_VERSION(inode) minix_sb(inode->i_sb)->s_version
11#define MINIX_V1 0x0001 /* original minix fs */ 6#define MINIX_V1 0x0001 /* original minix fs */
12#define MINIX_V2 0x0002 /* minix V2 fs */ 7#define MINIX_V2 0x0002 /* minix V2 fs */
@@ -83,7 +78,6 @@ extern const struct inode_operations minix_file_inode_operations;
83extern const struct inode_operations minix_dir_inode_operations; 78extern const struct inode_operations minix_dir_inode_operations;
84extern const struct file_operations minix_file_operations; 79extern const struct file_operations minix_file_operations;
85extern const struct file_operations minix_dir_operations; 80extern const struct file_operations minix_dir_operations;
86extern struct dentry_operations minix_dentry_operations;
87 81
88static inline struct minix_sb_info *minix_sb(struct super_block *sb) 82static inline struct minix_sb_info *minix_sb(struct super_block *sb)
89{ 83{
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 102241bc9c79..32b131cd6121 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -18,30 +18,6 @@ static int add_nondir(struct dentry *dentry, struct inode *inode)
18 return err; 18 return err;
19} 19}
20 20
21static int minix_hash(struct dentry *dentry, struct qstr *qstr)
22{
23 unsigned long hash;
24 int i;
25 const unsigned char *name;
26
27 i = minix_sb(dentry->d_inode->i_sb)->s_namelen;
28 if (i >= qstr->len)
29 return 0;
30 /* Truncate the name in place, avoids having to define a compare
31 function. */
32 qstr->len = i;
33 name = qstr->name;
34 hash = init_name_hash();
35 while (i--)
36 hash = partial_name_hash(*name++, hash);
37 qstr->hash = end_name_hash(hash);
38 return 0;
39}
40
41struct dentry_operations minix_dentry_operations = {
42 .d_hash = minix_hash,
43};
44
45static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) 21static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
46{ 22{
47 struct inode * inode = NULL; 23 struct inode * inode = NULL;
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 1f7f2956412a..e844b9809d27 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -14,12 +14,7 @@
14 14
15/* Characters that are undesirable in an MS-DOS file name */ 15/* Characters that are undesirable in an MS-DOS file name */
16static unsigned char bad_chars[] = "*?<>|\""; 16static unsigned char bad_chars[] = "*?<>|\"";
17static unsigned char bad_if_strict_pc[] = "+=,; "; 17static unsigned char bad_if_strict[] = "+=,; ";
18/* GEMDOS is less restrictive */
19static unsigned char bad_if_strict_atari[] = " ";
20
21#define bad_if_strict(opts) \
22 ((opts)->atari ? bad_if_strict_atari : bad_if_strict_pc)
23 18
24/***** Formats an MS-DOS file name. Rejects invalid names. */ 19/***** Formats an MS-DOS file name. Rejects invalid names. */
25static int msdos_format_name(const unsigned char *name, int len, 20static int msdos_format_name(const unsigned char *name, int len,
@@ -40,21 +35,20 @@ static int msdos_format_name(const unsigned char *name, int len,
40 /* Get rid of dot - test for it elsewhere */ 35 /* Get rid of dot - test for it elsewhere */
41 name++; 36 name++;
42 len--; 37 len--;
43 } else if (!opts->atari) 38 } else
44 return -EINVAL; 39 return -EINVAL;
45 } 40 }
46 /* 41 /*
47 * disallow names that _really_ start with a dot for MS-DOS, 42 * disallow names that _really_ start with a dot
48 * GEMDOS does not care
49 */ 43 */
50 space = !opts->atari; 44 space = 1;
51 c = 0; 45 c = 0;
52 for (walk = res; len && walk - res < 8; walk++) { 46 for (walk = res; len && walk - res < 8; walk++) {
53 c = *name++; 47 c = *name++;
54 len--; 48 len--;
55 if (opts->name_check != 'r' && strchr(bad_chars, c)) 49 if (opts->name_check != 'r' && strchr(bad_chars, c))
56 return -EINVAL; 50 return -EINVAL;
57 if (opts->name_check == 's' && strchr(bad_if_strict(opts), c)) 51 if (opts->name_check == 's' && strchr(bad_if_strict, c))
58 return -EINVAL; 52 return -EINVAL;
59 if (c >= 'A' && c <= 'Z' && opts->name_check == 's') 53 if (c >= 'A' && c <= 'Z' && opts->name_check == 's')
60 return -EINVAL; 54 return -EINVAL;
@@ -94,7 +88,7 @@ static int msdos_format_name(const unsigned char *name, int len,
94 if (opts->name_check != 'r' && strchr(bad_chars, c)) 88 if (opts->name_check != 'r' && strchr(bad_chars, c))
95 return -EINVAL; 89 return -EINVAL;
96 if (opts->name_check == 's' && 90 if (opts->name_check == 's' &&
97 strchr(bad_if_strict(opts), c)) 91 strchr(bad_if_strict, c))
98 return -EINVAL; 92 return -EINVAL;
99 if (c < ' ' || c == ':' || c == '\\') 93 if (c < ' ' || c == ':' || c == '\\')
100 return -EINVAL; 94 return -EINVAL;
@@ -243,6 +237,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
243 int is_dir, int is_hid, int cluster, 237 int is_dir, int is_hid, int cluster,
244 struct timespec *ts, struct fat_slot_info *sinfo) 238 struct timespec *ts, struct fat_slot_info *sinfo)
245{ 239{
240 struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb);
246 struct msdos_dir_entry de; 241 struct msdos_dir_entry de;
247 __le16 time, date; 242 __le16 time, date;
248 int err; 243 int err;
@@ -252,7 +247,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
252 if (is_hid) 247 if (is_hid)
253 de.attr |= ATTR_HIDDEN; 248 de.attr |= ATTR_HIDDEN;
254 de.lcase = 0; 249 de.lcase = 0;
255 fat_date_unix2dos(ts->tv_sec, &time, &date); 250 fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
256 de.cdate = de.adate = 0; 251 de.cdate = de.adate = 0;
257 de.ctime = 0; 252 de.ctime = 0;
258 de.ctime_cs = 0; 253 de.ctime_cs = 0;
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 6b6225ac4926..15c6faeec77c 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -19,6 +19,13 @@
19 19
20#define NFSDDBG_FACILITY NFSDDBG_LOCKD 20#define NFSDDBG_FACILITY NFSDDBG_LOCKD
21 21
22#ifdef CONFIG_LOCKD_V4
23#define nlm_stale_fh nlm4_stale_fh
24#define nlm_failed nlm4_failed
25#else
26#define nlm_stale_fh nlm_lck_denied_nolocks
27#define nlm_failed nlm_lck_denied_nolocks
28#endif
22/* 29/*
23 * Note: we hold the dentry use count while the file is open. 30 * Note: we hold the dentry use count while the file is open.
24 */ 31 */
@@ -47,12 +54,10 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
47 return 0; 54 return 0;
48 case nfserr_dropit: 55 case nfserr_dropit:
49 return nlm_drop_reply; 56 return nlm_drop_reply;
50#ifdef CONFIG_LOCKD_V4
51 case nfserr_stale: 57 case nfserr_stale:
52 return nlm4_stale_fh; 58 return nlm_stale_fh;
53#endif
54 default: 59 default:
55 return nlm_lck_denied; 60 return nlm_failed;
56 } 61 }
57} 62}
58 63
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index efef715135d3..7d6b34e201db 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -344,18 +344,18 @@ static ssize_t whole_disk_show(struct device *dev,
344static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, 344static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
345 whole_disk_show, NULL); 345 whole_disk_show, NULL);
346 346
347void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags) 347int add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags)
348{ 348{
349 struct hd_struct *p; 349 struct hd_struct *p;
350 int err; 350 int err;
351 351
352 p = kzalloc(sizeof(*p), GFP_KERNEL); 352 p = kzalloc(sizeof(*p), GFP_KERNEL);
353 if (!p) 353 if (!p)
354 return; 354 return -ENOMEM;
355 355
356 if (!init_part_stats(p)) { 356 if (!init_part_stats(p)) {
357 kfree(p); 357 err = -ENOMEM;
358 return; 358 goto out0;
359 } 359 }
360 p->start_sect = start; 360 p->start_sect = start;
361 p->nr_sects = len; 361 p->nr_sects = len;
@@ -378,15 +378,31 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len,
378 378
379 /* delay uevent until 'holders' subdir is created */ 379 /* delay uevent until 'holders' subdir is created */
380 p->dev.uevent_suppress = 1; 380 p->dev.uevent_suppress = 1;
381 device_add(&p->dev); 381 err = device_add(&p->dev);
382 if (err)
383 goto out1;
382 partition_sysfs_add_subdir(p); 384 partition_sysfs_add_subdir(p);
383 p->dev.uevent_suppress = 0; 385 p->dev.uevent_suppress = 0;
384 if (flags & ADDPART_FLAG_WHOLEDISK) 386 if (flags & ADDPART_FLAG_WHOLEDISK) {
385 err = device_create_file(&p->dev, &dev_attr_whole_disk); 387 err = device_create_file(&p->dev, &dev_attr_whole_disk);
388 if (err)
389 goto out2;
390 }
386 391
387 /* suppress uevent if the disk supresses it */ 392 /* suppress uevent if the disk supresses it */
388 if (!disk->dev.uevent_suppress) 393 if (!disk->dev.uevent_suppress)
389 kobject_uevent(&p->dev.kobj, KOBJ_ADD); 394 kobject_uevent(&p->dev.kobj, KOBJ_ADD);
395
396 return 0;
397
398out2:
399 device_del(&p->dev);
400out1:
401 put_device(&p->dev);
402 free_part_stats(p);
403out0:
404 kfree(p);
405 return err;
390} 406}
391 407
392/* Not exported, helper to add_disk(). */ 408/* Not exported, helper to add_disk(). */
@@ -483,10 +499,16 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
483 if (!size) 499 if (!size)
484 continue; 500 continue;
485 if (from + size > get_capacity(disk)) { 501 if (from + size > get_capacity(disk)) {
486 printk(" %s: p%d exceeds device capacity\n", 502 printk(KERN_ERR " %s: p%d exceeds device capacity\n",
487 disk->disk_name, p); 503 disk->disk_name, p);
504 continue;
505 }
506 res = add_partition(disk, p, from, size, state->parts[p].flags);
507 if (res) {
508 printk(KERN_ERR " %s: p%d could not be added: %d\n",
509 disk->disk_name, p, -res);
510 continue;
488 } 511 }
489 add_partition(disk, p, from, size, state->parts[p].flags);
490#ifdef CONFIG_BLK_DEV_MD 512#ifdef CONFIG_BLK_DEV_MD
491 if (state->parts[p].flags & ADDPART_FLAG_RAID) 513 if (state->parts[p].flags & ADDPART_FLAG_RAID)
492 md_autodetect_dev(bdev->bd_dev+p); 514 md_autodetect_dev(bdev->bd_dev+p);
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c
index e7b07006bc41..038a6022152f 100644
--- a/fs/partitions/efi.c
+++ b/fs/partitions/efi.c
@@ -95,13 +95,6 @@
95#include "check.h" 95#include "check.h"
96#include "efi.h" 96#include "efi.h"
97 97
98#undef EFI_DEBUG
99#ifdef EFI_DEBUG
100#define Dprintk(x...) printk(KERN_DEBUG x)
101#else
102#define Dprintk(x...)
103#endif
104
105/* This allows a kernel command line option 'gpt' to override 98/* This allows a kernel command line option 'gpt' to override
106 * the test for invalid PMBR. Not __initdata because reloading 99 * the test for invalid PMBR. Not __initdata because reloading
107 * the partition tables happens after init too. 100 * the partition tables happens after init too.
@@ -305,10 +298,10 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
305 298
306 /* Check the GUID Partition Table signature */ 299 /* Check the GUID Partition Table signature */
307 if (le64_to_cpu((*gpt)->signature) != GPT_HEADER_SIGNATURE) { 300 if (le64_to_cpu((*gpt)->signature) != GPT_HEADER_SIGNATURE) {
308 Dprintk("GUID Partition Table Header signature is wrong:" 301 pr_debug("GUID Partition Table Header signature is wrong:"
309 "%lld != %lld\n", 302 "%lld != %lld\n",
310 (unsigned long long)le64_to_cpu((*gpt)->signature), 303 (unsigned long long)le64_to_cpu((*gpt)->signature),
311 (unsigned long long)GPT_HEADER_SIGNATURE); 304 (unsigned long long)GPT_HEADER_SIGNATURE);
312 goto fail; 305 goto fail;
313 } 306 }
314 307
@@ -318,9 +311,8 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
318 crc = efi_crc32((const unsigned char *) (*gpt), le32_to_cpu((*gpt)->header_size)); 311 crc = efi_crc32((const unsigned char *) (*gpt), le32_to_cpu((*gpt)->header_size));
319 312
320 if (crc != origcrc) { 313 if (crc != origcrc) {
321 Dprintk 314 pr_debug("GUID Partition Table Header CRC is wrong: %x != %x\n",
322 ("GUID Partition Table Header CRC is wrong: %x != %x\n", 315 crc, origcrc);
323 crc, origcrc);
324 goto fail; 316 goto fail;
325 } 317 }
326 (*gpt)->header_crc32 = cpu_to_le32(origcrc); 318 (*gpt)->header_crc32 = cpu_to_le32(origcrc);
@@ -328,9 +320,9 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
328 /* Check that the my_lba entry points to the LBA that contains 320 /* Check that the my_lba entry points to the LBA that contains
329 * the GUID Partition Table */ 321 * the GUID Partition Table */
330 if (le64_to_cpu((*gpt)->my_lba) != lba) { 322 if (le64_to_cpu((*gpt)->my_lba) != lba) {
331 Dprintk("GPT my_lba incorrect: %lld != %lld\n", 323 pr_debug("GPT my_lba incorrect: %lld != %lld\n",
332 (unsigned long long)le64_to_cpu((*gpt)->my_lba), 324 (unsigned long long)le64_to_cpu((*gpt)->my_lba),
333 (unsigned long long)lba); 325 (unsigned long long)lba);
334 goto fail; 326 goto fail;
335 } 327 }
336 328
@@ -339,15 +331,15 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
339 */ 331 */
340 lastlba = last_lba(bdev); 332 lastlba = last_lba(bdev);
341 if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) { 333 if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) {
342 Dprintk("GPT: first_usable_lba incorrect: %lld > %lld\n", 334 pr_debug("GPT: first_usable_lba incorrect: %lld > %lld\n",
343 (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba), 335 (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba),
344 (unsigned long long)lastlba); 336 (unsigned long long)lastlba);
345 goto fail; 337 goto fail;
346 } 338 }
347 if (le64_to_cpu((*gpt)->last_usable_lba) > lastlba) { 339 if (le64_to_cpu((*gpt)->last_usable_lba) > lastlba) {
348 Dprintk("GPT: last_usable_lba incorrect: %lld > %lld\n", 340 pr_debug("GPT: last_usable_lba incorrect: %lld > %lld\n",
349 (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba), 341 (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba),
350 (unsigned long long)lastlba); 342 (unsigned long long)lastlba);
351 goto fail; 343 goto fail;
352 } 344 }
353 345
@@ -360,7 +352,7 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
360 le32_to_cpu((*gpt)->sizeof_partition_entry)); 352 le32_to_cpu((*gpt)->sizeof_partition_entry));
361 353
362 if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) { 354 if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) {
363 Dprintk("GUID Partitition Entry Array CRC check failed.\n"); 355 pr_debug("GUID Partitition Entry Array CRC check failed.\n");
364 goto fail_ptes; 356 goto fail_ptes;
365 } 357 }
366 358
@@ -616,7 +608,7 @@ efi_partition(struct parsed_partitions *state, struct block_device *bdev)
616 return 0; 608 return 0;
617 } 609 }
618 610
619 Dprintk("GUID Partition Table is valid! Yea!\n"); 611 pr_debug("GUID Partition Table is valid! Yea!\n");
620 612
621 for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) { 613 for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) {
622 if (!is_pte_valid(&ptes[i], last_lba(bdev))) 614 if (!is_pte_valid(&ptes[i], last_lba(bdev)))
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 0fdda2e8a4cc..8652fb99e962 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -133,17 +133,17 @@ static bool ldm_parse_privhead(const u8 *data, struct privhead *ph)
133 bool is_vista = false; 133 bool is_vista = false;
134 134
135 BUG_ON(!data || !ph); 135 BUG_ON(!data || !ph);
136 if (MAGIC_PRIVHEAD != BE64(data)) { 136 if (MAGIC_PRIVHEAD != get_unaligned_be64(data)) {
137 ldm_error("Cannot find PRIVHEAD structure. LDM database is" 137 ldm_error("Cannot find PRIVHEAD structure. LDM database is"
138 " corrupt. Aborting."); 138 " corrupt. Aborting.");
139 return false; 139 return false;
140 } 140 }
141 ph->ver_major = BE16(data + 0x000C); 141 ph->ver_major = get_unaligned_be16(data + 0x000C);
142 ph->ver_minor = BE16(data + 0x000E); 142 ph->ver_minor = get_unaligned_be16(data + 0x000E);
143 ph->logical_disk_start = BE64(data + 0x011B); 143 ph->logical_disk_start = get_unaligned_be64(data + 0x011B);
144 ph->logical_disk_size = BE64(data + 0x0123); 144 ph->logical_disk_size = get_unaligned_be64(data + 0x0123);
145 ph->config_start = BE64(data + 0x012B); 145 ph->config_start = get_unaligned_be64(data + 0x012B);
146 ph->config_size = BE64(data + 0x0133); 146 ph->config_size = get_unaligned_be64(data + 0x0133);
147 /* Version 2.11 is Win2k/XP and version 2.12 is Vista. */ 147 /* Version 2.11 is Win2k/XP and version 2.12 is Vista. */
148 if (ph->ver_major == 2 && ph->ver_minor == 12) 148 if (ph->ver_major == 2 && ph->ver_minor == 12)
149 is_vista = true; 149 is_vista = true;
@@ -191,14 +191,14 @@ static bool ldm_parse_tocblock (const u8 *data, struct tocblock *toc)
191{ 191{
192 BUG_ON (!data || !toc); 192 BUG_ON (!data || !toc);
193 193
194 if (MAGIC_TOCBLOCK != BE64 (data)) { 194 if (MAGIC_TOCBLOCK != get_unaligned_be64(data)) {
195 ldm_crit ("Cannot find TOCBLOCK, database may be corrupt."); 195 ldm_crit ("Cannot find TOCBLOCK, database may be corrupt.");
196 return false; 196 return false;
197 } 197 }
198 strncpy (toc->bitmap1_name, data + 0x24, sizeof (toc->bitmap1_name)); 198 strncpy (toc->bitmap1_name, data + 0x24, sizeof (toc->bitmap1_name));
199 toc->bitmap1_name[sizeof (toc->bitmap1_name) - 1] = 0; 199 toc->bitmap1_name[sizeof (toc->bitmap1_name) - 1] = 0;
200 toc->bitmap1_start = BE64 (data + 0x2E); 200 toc->bitmap1_start = get_unaligned_be64(data + 0x2E);
201 toc->bitmap1_size = BE64 (data + 0x36); 201 toc->bitmap1_size = get_unaligned_be64(data + 0x36);
202 202
203 if (strncmp (toc->bitmap1_name, TOC_BITMAP1, 203 if (strncmp (toc->bitmap1_name, TOC_BITMAP1,
204 sizeof (toc->bitmap1_name)) != 0) { 204 sizeof (toc->bitmap1_name)) != 0) {
@@ -208,8 +208,8 @@ static bool ldm_parse_tocblock (const u8 *data, struct tocblock *toc)
208 } 208 }
209 strncpy (toc->bitmap2_name, data + 0x46, sizeof (toc->bitmap2_name)); 209 strncpy (toc->bitmap2_name, data + 0x46, sizeof (toc->bitmap2_name));
210 toc->bitmap2_name[sizeof (toc->bitmap2_name) - 1] = 0; 210 toc->bitmap2_name[sizeof (toc->bitmap2_name) - 1] = 0;
211 toc->bitmap2_start = BE64 (data + 0x50); 211 toc->bitmap2_start = get_unaligned_be64(data + 0x50);
212 toc->bitmap2_size = BE64 (data + 0x58); 212 toc->bitmap2_size = get_unaligned_be64(data + 0x58);
213 if (strncmp (toc->bitmap2_name, TOC_BITMAP2, 213 if (strncmp (toc->bitmap2_name, TOC_BITMAP2,
214 sizeof (toc->bitmap2_name)) != 0) { 214 sizeof (toc->bitmap2_name)) != 0) {
215 ldm_crit ("TOCBLOCK's second bitmap is '%s', should be '%s'.", 215 ldm_crit ("TOCBLOCK's second bitmap is '%s', should be '%s'.",
@@ -237,22 +237,22 @@ static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm)
237{ 237{
238 BUG_ON (!data || !vm); 238 BUG_ON (!data || !vm);
239 239
240 if (MAGIC_VMDB != BE32 (data)) { 240 if (MAGIC_VMDB != get_unaligned_be32(data)) {
241 ldm_crit ("Cannot find the VMDB, database may be corrupt."); 241 ldm_crit ("Cannot find the VMDB, database may be corrupt.");
242 return false; 242 return false;
243 } 243 }
244 244
245 vm->ver_major = BE16 (data + 0x12); 245 vm->ver_major = get_unaligned_be16(data + 0x12);
246 vm->ver_minor = BE16 (data + 0x14); 246 vm->ver_minor = get_unaligned_be16(data + 0x14);
247 if ((vm->ver_major != 4) || (vm->ver_minor != 10)) { 247 if ((vm->ver_major != 4) || (vm->ver_minor != 10)) {
248 ldm_error ("Expected VMDB version %d.%d, got %d.%d. " 248 ldm_error ("Expected VMDB version %d.%d, got %d.%d. "
249 "Aborting.", 4, 10, vm->ver_major, vm->ver_minor); 249 "Aborting.", 4, 10, vm->ver_major, vm->ver_minor);
250 return false; 250 return false;
251 } 251 }
252 252
253 vm->vblk_size = BE32 (data + 0x08); 253 vm->vblk_size = get_unaligned_be32(data + 0x08);
254 vm->vblk_offset = BE32 (data + 0x0C); 254 vm->vblk_offset = get_unaligned_be32(data + 0x0C);
255 vm->last_vblk_seq = BE32 (data + 0x04); 255 vm->last_vblk_seq = get_unaligned_be32(data + 0x04);
256 256
257 ldm_debug ("Parsed VMDB successfully."); 257 ldm_debug ("Parsed VMDB successfully.");
258 return true; 258 return true;
@@ -507,7 +507,7 @@ static bool ldm_validate_vmdb (struct block_device *bdev, unsigned long base,
507 goto out; /* Already logged */ 507 goto out; /* Already logged */
508 508
509 /* Are there uncommitted transactions? */ 509 /* Are there uncommitted transactions? */
510 if (BE16(data + 0x10) != 0x01) { 510 if (get_unaligned_be16(data + 0x10) != 0x01) {
511 ldm_crit ("Database is not in a consistent state. Aborting."); 511 ldm_crit ("Database is not in a consistent state. Aborting.");
512 goto out; 512 goto out;
513 } 513 }
@@ -802,7 +802,7 @@ static bool ldm_parse_cmp3 (const u8 *buffer, int buflen, struct vblk *vb)
802 return false; 802 return false;
803 803
804 len += VBLK_SIZE_CMP3; 804 len += VBLK_SIZE_CMP3;
805 if (len != BE32 (buffer + 0x14)) 805 if (len != get_unaligned_be32(buffer + 0x14))
806 return false; 806 return false;
807 807
808 comp = &vb->vblk.comp; 808 comp = &vb->vblk.comp;
@@ -851,7 +851,7 @@ static int ldm_parse_dgr3 (const u8 *buffer, int buflen, struct vblk *vb)
851 return false; 851 return false;
852 852
853 len += VBLK_SIZE_DGR3; 853 len += VBLK_SIZE_DGR3;
854 if (len != BE32 (buffer + 0x14)) 854 if (len != get_unaligned_be32(buffer + 0x14))
855 return false; 855 return false;
856 856
857 dgrp = &vb->vblk.dgrp; 857 dgrp = &vb->vblk.dgrp;
@@ -895,7 +895,7 @@ static bool ldm_parse_dgr4 (const u8 *buffer, int buflen, struct vblk *vb)
895 return false; 895 return false;
896 896
897 len += VBLK_SIZE_DGR4; 897 len += VBLK_SIZE_DGR4;
898 if (len != BE32 (buffer + 0x14)) 898 if (len != get_unaligned_be32(buffer + 0x14))
899 return false; 899 return false;
900 900
901 dgrp = &vb->vblk.dgrp; 901 dgrp = &vb->vblk.dgrp;
@@ -931,7 +931,7 @@ static bool ldm_parse_dsk3 (const u8 *buffer, int buflen, struct vblk *vb)
931 return false; 931 return false;
932 932
933 len += VBLK_SIZE_DSK3; 933 len += VBLK_SIZE_DSK3;
934 if (len != BE32 (buffer + 0x14)) 934 if (len != get_unaligned_be32(buffer + 0x14))
935 return false; 935 return false;
936 936
937 disk = &vb->vblk.disk; 937 disk = &vb->vblk.disk;
@@ -968,7 +968,7 @@ static bool ldm_parse_dsk4 (const u8 *buffer, int buflen, struct vblk *vb)
968 return false; 968 return false;
969 969
970 len += VBLK_SIZE_DSK4; 970 len += VBLK_SIZE_DSK4;
971 if (len != BE32 (buffer + 0x14)) 971 if (len != get_unaligned_be32(buffer + 0x14))
972 return false; 972 return false;
973 973
974 disk = &vb->vblk.disk; 974 disk = &vb->vblk.disk;
@@ -1034,14 +1034,14 @@ static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb)
1034 return false; 1034 return false;
1035 } 1035 }
1036 len += VBLK_SIZE_PRT3; 1036 len += VBLK_SIZE_PRT3;
1037 if (len > BE32(buffer + 0x14)) { 1037 if (len > get_unaligned_be32(buffer + 0x14)) {
1038 ldm_error("len %d > BE32(buffer + 0x14) %d", len, 1038 ldm_error("len %d > BE32(buffer + 0x14) %d", len,
1039 BE32(buffer + 0x14)); 1039 get_unaligned_be32(buffer + 0x14));
1040 return false; 1040 return false;
1041 } 1041 }
1042 part = &vb->vblk.part; 1042 part = &vb->vblk.part;
1043 part->start = BE64(buffer + 0x24 + r_name); 1043 part->start = get_unaligned_be64(buffer + 0x24 + r_name);
1044 part->volume_offset = BE64(buffer + 0x2C + r_name); 1044 part->volume_offset = get_unaligned_be64(buffer + 0x2C + r_name);
1045 part->size = ldm_get_vnum(buffer + 0x34 + r_name); 1045 part->size = ldm_get_vnum(buffer + 0x34 + r_name);
1046 part->parent_id = ldm_get_vnum(buffer + 0x34 + r_size); 1046 part->parent_id = ldm_get_vnum(buffer + 0x34 + r_size);
1047 part->disk_id = ldm_get_vnum(buffer + 0x34 + r_parent); 1047 part->disk_id = ldm_get_vnum(buffer + 0x34 + r_parent);
@@ -1139,9 +1139,9 @@ static bool ldm_parse_vol5(const u8 *buffer, int buflen, struct vblk *vb)
1139 return false; 1139 return false;
1140 } 1140 }
1141 len += VBLK_SIZE_VOL5; 1141 len += VBLK_SIZE_VOL5;
1142 if (len > BE32(buffer + 0x14)) { 1142 if (len > get_unaligned_be32(buffer + 0x14)) {
1143 ldm_error("len %d > BE32(buffer + 0x14) %d", len, 1143 ldm_error("len %d > BE32(buffer + 0x14) %d", len,
1144 BE32(buffer + 0x14)); 1144 get_unaligned_be32(buffer + 0x14));
1145 return false; 1145 return false;
1146 } 1146 }
1147 volu = &vb->vblk.volu; 1147 volu = &vb->vblk.volu;
@@ -1294,9 +1294,9 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
1294 1294
1295 BUG_ON (!data || !frags); 1295 BUG_ON (!data || !frags);
1296 1296
1297 group = BE32 (data + 0x08); 1297 group = get_unaligned_be32(data + 0x08);
1298 rec = BE16 (data + 0x0C); 1298 rec = get_unaligned_be16(data + 0x0C);
1299 num = BE16 (data + 0x0E); 1299 num = get_unaligned_be16(data + 0x0E);
1300 if ((num < 1) || (num > 4)) { 1300 if ((num < 1) || (num > 4)) {
1301 ldm_error ("A VBLK claims to have %d parts.", num); 1301 ldm_error ("A VBLK claims to have %d parts.", num);
1302 return false; 1302 return false;
@@ -1425,12 +1425,12 @@ static bool ldm_get_vblks (struct block_device *bdev, unsigned long base,
1425 } 1425 }
1426 1426
1427 for (v = 0; v < perbuf; v++, data+=size) { /* For each vblk */ 1427 for (v = 0; v < perbuf; v++, data+=size) { /* For each vblk */
1428 if (MAGIC_VBLK != BE32 (data)) { 1428 if (MAGIC_VBLK != get_unaligned_be32(data)) {
1429 ldm_error ("Expected to find a VBLK."); 1429 ldm_error ("Expected to find a VBLK.");
1430 goto out; 1430 goto out;
1431 } 1431 }
1432 1432
1433 recs = BE16 (data + 0x0E); /* Number of records */ 1433 recs = get_unaligned_be16(data + 0x0E); /* Number of records */
1434 if (recs == 1) { 1434 if (recs == 1) {
1435 if (!ldm_ldmdb_add (data, size, ldb)) 1435 if (!ldm_ldmdb_add (data, size, ldb))
1436 goto out; /* Already logged */ 1436 goto out; /* Already logged */
diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h
index 80f63b5fdd9f..30e08e809c1d 100644
--- a/fs/partitions/ldm.h
+++ b/fs/partitions/ldm.h
@@ -98,11 +98,6 @@ struct parsed_partitions;
98#define TOC_BITMAP1 "config" /* Names of the two defined */ 98#define TOC_BITMAP1 "config" /* Names of the two defined */
99#define TOC_BITMAP2 "log" /* bitmaps in the TOCBLOCK. */ 99#define TOC_BITMAP2 "log" /* bitmaps in the TOCBLOCK. */
100 100
101/* Most numbers we deal with are big-endian and won't be aligned. */
102#define BE16(x) ((u16)be16_to_cpu(get_unaligned((__be16*)(x))))
103#define BE32(x) ((u32)be32_to_cpu(get_unaligned((__be32*)(x))))
104#define BE64(x) ((u64)be64_to_cpu(get_unaligned((__be64*)(x))))
105
106/* Borrowed from msdos.c */ 101/* Borrowed from msdos.c */
107#define SYS_IND(p) (get_unaligned(&(p)->sys_ind)) 102#define SYS_IND(p) (get_unaligned(&(p)->sys_ind))
108 103
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
new file mode 100644
index 000000000000..73cd7a418f06
--- /dev/null
+++ b/fs/proc/Kconfig
@@ -0,0 +1,59 @@
1config PROC_FS
2 bool "/proc file system support" if EMBEDDED
3 default y
4 help
5 This is a virtual file system providing information about the status
6 of the system. "Virtual" means that it doesn't take up any space on
7 your hard disk: the files are created on the fly by the kernel when
8 you try to access them. Also, you cannot read the files with older
9 version of the program less: you need to use more or cat.
10
11 It's totally cool; for example, "cat /proc/interrupts" gives
12 information about what the different IRQs are used for at the moment
13 (there is a small number of Interrupt ReQuest lines in your computer
14 that are used by the attached devices to gain the CPU's attention --
15 often a source of trouble if two devices are mistakenly configured
16 to use the same IRQ). The program procinfo to display some
17 information about your system gathered from the /proc file system.
18
19 Before you can use the /proc file system, it has to be mounted,
20 meaning it has to be given a location in the directory hierarchy.
21 That location should be /proc. A command such as "mount -t proc proc
22 /proc" or the equivalent line in /etc/fstab does the job.
23
24 The /proc file system is explained in the file
25 <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage
26 ("man 5 proc").
27
28 This option will enlarge your kernel by about 67 KB. Several
29 programs depend on this, so everyone should say Y here.
30
31config PROC_KCORE
32 bool "/proc/kcore support" if !ARM
33 depends on PROC_FS && MMU
34
35config PROC_VMCORE
36 bool "/proc/vmcore support (EXPERIMENTAL)"
37 depends on PROC_FS && CRASH_DUMP
38 default y
39 help
40 Exports the dump image of crashed kernel in ELF format.
41
42config PROC_SYSCTL
43 bool "Sysctl support (/proc/sys)" if EMBEDDED
44 depends on PROC_FS
45 select SYSCTL
46 default y
47 ---help---
48 The sysctl interface provides a means of dynamically changing
49 certain kernel parameters and variables on the fly without requiring
50 a recompile of the kernel or reboot of the system. The primary
51 interface is through /proc/sys. If you say Y here a tree of
52 modifiable sysctl entries will be generated beneath the
53 /proc/sys directory. They are explained in the files
54 in <file:Documentation/sysctl/>. Note that enabling this
55 option will enlarge the kernel by at least 8 KB.
56
57 As it is generally a good thing, you should say Y here unless
58 building a kernel for install/rescue disks or your system is very
59 limited in memory.
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 58c3e6a8e15e..a891fe4cb43b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2376,29 +2376,82 @@ static int proc_base_fill_cache(struct file *filp, void *dirent,
2376} 2376}
2377 2377
2378#ifdef CONFIG_TASK_IO_ACCOUNTING 2378#ifdef CONFIG_TASK_IO_ACCOUNTING
2379static int proc_pid_io_accounting(struct task_struct *task, char *buffer) 2379static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
2380{ 2380{
2381 u64 rchar, wchar, syscr, syscw;
2382 struct task_io_accounting ioac;
2383
2384 if (!whole) {
2385 rchar = task->rchar;
2386 wchar = task->wchar;
2387 syscr = task->syscr;
2388 syscw = task->syscw;
2389 memcpy(&ioac, &task->ioac, sizeof(ioac));
2390 } else {
2391 unsigned long flags;
2392 struct task_struct *t = task;
2393 rchar = wchar = syscr = syscw = 0;
2394 memset(&ioac, 0, sizeof(ioac));
2395
2396 rcu_read_lock();
2397 do {
2398 rchar += t->rchar;
2399 wchar += t->wchar;
2400 syscr += t->syscr;
2401 syscw += t->syscw;
2402
2403 ioac.read_bytes += t->ioac.read_bytes;
2404 ioac.write_bytes += t->ioac.write_bytes;
2405 ioac.cancelled_write_bytes +=
2406 t->ioac.cancelled_write_bytes;
2407 t = next_thread(t);
2408 } while (t != task);
2409 rcu_read_unlock();
2410
2411 if (lock_task_sighand(task, &flags)) {
2412 struct signal_struct *sig = task->signal;
2413
2414 rchar += sig->rchar;
2415 wchar += sig->wchar;
2416 syscr += sig->syscr;
2417 syscw += sig->syscw;
2418
2419 ioac.read_bytes += sig->ioac.read_bytes;
2420 ioac.write_bytes += sig->ioac.write_bytes;
2421 ioac.cancelled_write_bytes +=
2422 sig->ioac.cancelled_write_bytes;
2423
2424 unlock_task_sighand(task, &flags);
2425 }
2426 }
2427
2381 return sprintf(buffer, 2428 return sprintf(buffer,
2382#ifdef CONFIG_TASK_XACCT
2383 "rchar: %llu\n" 2429 "rchar: %llu\n"
2384 "wchar: %llu\n" 2430 "wchar: %llu\n"
2385 "syscr: %llu\n" 2431 "syscr: %llu\n"
2386 "syscw: %llu\n" 2432 "syscw: %llu\n"
2387#endif
2388 "read_bytes: %llu\n" 2433 "read_bytes: %llu\n"
2389 "write_bytes: %llu\n" 2434 "write_bytes: %llu\n"
2390 "cancelled_write_bytes: %llu\n", 2435 "cancelled_write_bytes: %llu\n",
2391#ifdef CONFIG_TASK_XACCT 2436 (unsigned long long)rchar,
2392 (unsigned long long)task->rchar, 2437 (unsigned long long)wchar,
2393 (unsigned long long)task->wchar, 2438 (unsigned long long)syscr,
2394 (unsigned long long)task->syscr, 2439 (unsigned long long)syscw,
2395 (unsigned long long)task->syscw, 2440 (unsigned long long)ioac.read_bytes,
2396#endif 2441 (unsigned long long)ioac.write_bytes,
2397 (unsigned long long)task->ioac.read_bytes, 2442 (unsigned long long)ioac.cancelled_write_bytes);
2398 (unsigned long long)task->ioac.write_bytes, 2443}
2399 (unsigned long long)task->ioac.cancelled_write_bytes); 2444
2445static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
2446{
2447 return do_io_accounting(task, buffer, 0);
2400} 2448}
2401#endif 2449
2450static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
2451{
2452 return do_io_accounting(task, buffer, 1);
2453}
2454#endif /* CONFIG_TASK_IO_ACCOUNTING */
2402 2455
2403/* 2456/*
2404 * Thread groups 2457 * Thread groups
@@ -2470,7 +2523,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2470 REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter), 2523 REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter),
2471#endif 2524#endif
2472#ifdef CONFIG_TASK_IO_ACCOUNTING 2525#ifdef CONFIG_TASK_IO_ACCOUNTING
2473 INF("io", S_IRUGO, pid_io_accounting), 2526 INF("io", S_IRUGO, tgid_io_accounting),
2474#endif 2527#endif
2475}; 2528};
2476 2529
@@ -2797,6 +2850,9 @@ static const struct pid_entry tid_base_stuff[] = {
2797#ifdef CONFIG_FAULT_INJECTION 2850#ifdef CONFIG_FAULT_INJECTION
2798 REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), 2851 REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
2799#endif 2852#endif
2853#ifdef CONFIG_TASK_IO_ACCOUNTING
2854 INF("io", S_IRUGO, tid_io_accounting),
2855#endif
2800}; 2856};
2801 2857
2802static int proc_tid_base_readdir(struct file * filp, 2858static int proc_tid_base_readdir(struct file * filp,
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 43e54e86cefd..bc0a0dd2d844 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -597,6 +597,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
597 ent->pde_users = 0; 597 ent->pde_users = 0;
598 spin_lock_init(&ent->pde_unload_lock); 598 spin_lock_init(&ent->pde_unload_lock);
599 ent->pde_unload_completion = NULL; 599 ent->pde_unload_completion = NULL;
600 INIT_LIST_HEAD(&ent->pde_openers);
600 out: 601 out:
601 return ent; 602 return ent;
602} 603}
@@ -789,6 +790,19 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
789 spin_unlock(&de->pde_unload_lock); 790 spin_unlock(&de->pde_unload_lock);
790 791
791continue_removing: 792continue_removing:
793 spin_lock(&de->pde_unload_lock);
794 while (!list_empty(&de->pde_openers)) {
795 struct pde_opener *pdeo;
796
797 pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
798 list_del(&pdeo->lh);
799 spin_unlock(&de->pde_unload_lock);
800 pdeo->release(pdeo->inode, pdeo->file);
801 kfree(pdeo);
802 spin_lock(&de->pde_unload_lock);
803 }
804 spin_unlock(&de->pde_unload_lock);
805
792 if (S_ISDIR(de->mode)) 806 if (S_ISDIR(de->mode))
793 parent->nlink--; 807 parent->nlink--;
794 de->nlink = 0; 808 de->nlink = 0;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index b08d10017911..02eca2ed9dd7 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -111,27 +111,25 @@ int __init proc_init_inodecache(void)
111 return 0; 111 return 0;
112} 112}
113 113
114static int proc_remount(struct super_block *sb, int *flags, char *data)
115{
116 *flags |= MS_NODIRATIME;
117 return 0;
118}
119
120static const struct super_operations proc_sops = { 114static const struct super_operations proc_sops = {
121 .alloc_inode = proc_alloc_inode, 115 .alloc_inode = proc_alloc_inode,
122 .destroy_inode = proc_destroy_inode, 116 .destroy_inode = proc_destroy_inode,
123 .drop_inode = generic_delete_inode, 117 .drop_inode = generic_delete_inode,
124 .delete_inode = proc_delete_inode, 118 .delete_inode = proc_delete_inode,
125 .statfs = simple_statfs, 119 .statfs = simple_statfs,
126 .remount_fs = proc_remount,
127}; 120};
128 121
129static void pde_users_dec(struct proc_dir_entry *pde) 122static void __pde_users_dec(struct proc_dir_entry *pde)
130{ 123{
131 spin_lock(&pde->pde_unload_lock);
132 pde->pde_users--; 124 pde->pde_users--;
133 if (pde->pde_unload_completion && pde->pde_users == 0) 125 if (pde->pde_unload_completion && pde->pde_users == 0)
134 complete(pde->pde_unload_completion); 126 complete(pde->pde_unload_completion);
127}
128
129static void pde_users_dec(struct proc_dir_entry *pde)
130{
131 spin_lock(&pde->pde_unload_lock);
132 __pde_users_dec(pde);
135 spin_unlock(&pde->pde_unload_lock); 133 spin_unlock(&pde->pde_unload_lock);
136} 134}
137 135
@@ -318,36 +316,97 @@ static int proc_reg_open(struct inode *inode, struct file *file)
318 struct proc_dir_entry *pde = PDE(inode); 316 struct proc_dir_entry *pde = PDE(inode);
319 int rv = 0; 317 int rv = 0;
320 int (*open)(struct inode *, struct file *); 318 int (*open)(struct inode *, struct file *);
319 int (*release)(struct inode *, struct file *);
320 struct pde_opener *pdeo;
321
322 /*
323 * What for, you ask? Well, we can have open, rmmod, remove_proc_entry
324 * sequence. ->release won't be called because ->proc_fops will be
325 * cleared. Depending on complexity of ->release, consequences vary.
326 *
327 * We can't wait for mercy when close will be done for real, it's
328 * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release
329 * by hand in remove_proc_entry(). For this, save opener's credentials
330 * for later.
331 */
332 pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL);
333 if (!pdeo)
334 return -ENOMEM;
321 335
322 spin_lock(&pde->pde_unload_lock); 336 spin_lock(&pde->pde_unload_lock);
323 if (!pde->proc_fops) { 337 if (!pde->proc_fops) {
324 spin_unlock(&pde->pde_unload_lock); 338 spin_unlock(&pde->pde_unload_lock);
339 kfree(pdeo);
325 return rv; 340 return rv;
326 } 341 }
327 pde->pde_users++; 342 pde->pde_users++;
328 open = pde->proc_fops->open; 343 open = pde->proc_fops->open;
344 release = pde->proc_fops->release;
329 spin_unlock(&pde->pde_unload_lock); 345 spin_unlock(&pde->pde_unload_lock);
330 346
331 if (open) 347 if (open)
332 rv = open(inode, file); 348 rv = open(inode, file);
333 349
334 pde_users_dec(pde); 350 spin_lock(&pde->pde_unload_lock);
351 if (rv == 0 && release) {
352 /* To know what to release. */
353 pdeo->inode = inode;
354 pdeo->file = file;
355 /* Strictly for "too late" ->release in proc_reg_release(). */
356 pdeo->release = release;
357 list_add(&pdeo->lh, &pde->pde_openers);
358 } else
359 kfree(pdeo);
360 __pde_users_dec(pde);
361 spin_unlock(&pde->pde_unload_lock);
335 return rv; 362 return rv;
336} 363}
337 364
365static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde,
366 struct inode *inode, struct file *file)
367{
368 struct pde_opener *pdeo;
369
370 list_for_each_entry(pdeo, &pde->pde_openers, lh) {
371 if (pdeo->inode == inode && pdeo->file == file)
372 return pdeo;
373 }
374 return NULL;
375}
376
338static int proc_reg_release(struct inode *inode, struct file *file) 377static int proc_reg_release(struct inode *inode, struct file *file)
339{ 378{
340 struct proc_dir_entry *pde = PDE(inode); 379 struct proc_dir_entry *pde = PDE(inode);
341 int rv = 0; 380 int rv = 0;
342 int (*release)(struct inode *, struct file *); 381 int (*release)(struct inode *, struct file *);
382 struct pde_opener *pdeo;
343 383
344 spin_lock(&pde->pde_unload_lock); 384 spin_lock(&pde->pde_unload_lock);
385 pdeo = find_pde_opener(pde, inode, file);
345 if (!pde->proc_fops) { 386 if (!pde->proc_fops) {
346 spin_unlock(&pde->pde_unload_lock); 387 /*
388 * Can't simply exit, __fput() will think that everything is OK,
389 * and move on to freeing struct file. remove_proc_entry() will
390 * find slacker in opener's list and will try to do non-trivial
391 * things with struct file. Therefore, remove opener from list.
392 *
393 * But if opener is removed from list, who will ->release it?
394 */
395 if (pdeo) {
396 list_del(&pdeo->lh);
397 spin_unlock(&pde->pde_unload_lock);
398 rv = pdeo->release(inode, file);
399 kfree(pdeo);
400 } else
401 spin_unlock(&pde->pde_unload_lock);
347 return rv; 402 return rv;
348 } 403 }
349 pde->pde_users++; 404 pde->pde_users++;
350 release = pde->proc_fops->release; 405 release = pde->proc_fops->release;
406 if (pdeo) {
407 list_del(&pdeo->lh);
408 kfree(pdeo);
409 }
351 spin_unlock(&pde->pde_unload_lock); 410 spin_unlock(&pde->pde_unload_lock);
352 411
353 if (release) 412 if (release)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 28cbca805905..442202314d53 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -63,6 +63,7 @@ extern const struct file_operations proc_smaps_operations;
63extern const struct file_operations proc_clear_refs_operations; 63extern const struct file_operations proc_clear_refs_operations;
64extern const struct file_operations proc_pagemap_operations; 64extern const struct file_operations proc_pagemap_operations;
65extern const struct file_operations proc_net_operations; 65extern const struct file_operations proc_net_operations;
66extern const struct file_operations proc_kmsg_operations;
66extern const struct inode_operations proc_net_inode_operations; 67extern const struct inode_operations proc_net_inode_operations;
67 68
68void free_proc_entry(struct proc_dir_entry *de); 69void free_proc_entry(struct proc_dir_entry *de);
@@ -88,3 +89,10 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
88 struct dentry *dentry); 89 struct dentry *dentry);
89int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, 90int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
90 filldir_t filldir); 91 filldir_t filldir);
92
93struct pde_opener {
94 struct inode *inode;
95 struct file *file;
96 int (*release)(struct inode *, struct file *);
97 struct list_head lh;
98};
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index e78c81fcf547..c2370c76fb71 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -23,6 +23,10 @@
23 23
24#define CORE_STR "CORE" 24#define CORE_STR "CORE"
25 25
26#ifndef ELF_CORE_EFLAGS
27#define ELF_CORE_EFLAGS 0
28#endif
29
26static int open_kcore(struct inode * inode, struct file * filp) 30static int open_kcore(struct inode * inode, struct file * filp)
27{ 31{
28 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; 32 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
@@ -164,11 +168,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
164 elf->e_entry = 0; 168 elf->e_entry = 0;
165 elf->e_phoff = sizeof(struct elfhdr); 169 elf->e_phoff = sizeof(struct elfhdr);
166 elf->e_shoff = 0; 170 elf->e_shoff = 0;
167#if defined(CONFIG_H8300) 171 elf->e_flags = ELF_CORE_EFLAGS;
168 elf->e_flags = ELF_FLAGS;
169#else
170 elf->e_flags = 0;
171#endif
172 elf->e_ehsize = sizeof(struct elfhdr); 172 elf->e_ehsize = sizeof(struct elfhdr);
173 elf->e_phentsize= sizeof(struct elf_phdr); 173 elf->e_phentsize= sizeof(struct elf_phdr);
174 elf->e_phnum = nphdr; 174 elf->e_phnum = nphdr;
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index ff3b90b56e9d..9fd5df3f40ce 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -15,6 +15,8 @@
15#include <asm/uaccess.h> 15#include <asm/uaccess.h>
16#include <asm/io.h> 16#include <asm/io.h>
17 17
18#include "internal.h"
19
18extern wait_queue_head_t log_wait; 20extern wait_queue_head_t log_wait;
19 21
20extern int do_syslog(int type, char __user *bug, int count); 22extern int do_syslog(int type, char __user *bug, int count);
diff --git a/fs/quota.c b/fs/quota.c
index db1cc9f3c7aa..7f4386ebc23a 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -186,7 +186,7 @@ static void quota_sync_sb(struct super_block *sb, int type)
186 186
187void sync_dquots(struct super_block *sb, int type) 187void sync_dquots(struct super_block *sb, int type)
188{ 188{
189 int cnt, dirty; 189 int cnt;
190 190
191 if (sb) { 191 if (sb) {
192 if (sb->s_qcop->quota_sync) 192 if (sb->s_qcop->quota_sync)
@@ -198,11 +198,17 @@ void sync_dquots(struct super_block *sb, int type)
198restart: 198restart:
199 list_for_each_entry(sb, &super_blocks, s_list) { 199 list_for_each_entry(sb, &super_blocks, s_list) {
200 /* This test just improves performance so it needn't be reliable... */ 200 /* This test just improves performance so it needn't be reliable... */
201 for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++) 201 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
202 if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt) 202 if (type != -1 && type != cnt)
203 && info_any_dirty(&sb_dqopt(sb)->info[cnt])) 203 continue;
204 dirty = 1; 204 if (!sb_has_quota_enabled(sb, cnt))
205 if (!dirty) 205 continue;
206 if (!info_dirty(&sb_dqopt(sb)->info[cnt]) &&
207 list_empty(&sb_dqopt(sb)->info[cnt].dqi_dirty_list))
208 continue;
209 break;
210 }
211 if (cnt == MAXQUOTAS)
206 continue; 212 continue;
207 sb->s_count++; 213 sb->s_count++;
208 spin_unlock(&sb_lock); 214 spin_unlock(&sb_lock);
diff --git a/fs/quota_v1.c b/fs/quota_v1.c
index a6cf9269105c..5ae15b13eeb0 100644
--- a/fs/quota_v1.c
+++ b/fs/quota_v1.c
@@ -1,6 +1,7 @@
1#include <linux/errno.h> 1#include <linux/errno.h>
2#include <linux/fs.h> 2#include <linux/fs.h>
3#include <linux/quota.h> 3#include <linux/quota.h>
4#include <linux/quotaops.h>
4#include <linux/dqblk_v1.h> 5#include <linux/dqblk_v1.h>
5#include <linux/quotaio_v1.h> 6#include <linux/quotaio_v1.h>
6#include <linux/kernel.h> 7#include <linux/kernel.h>
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index 234ada903633..b53827dc02d9 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -11,6 +11,7 @@
11#include <linux/init.h> 11#include <linux/init.h>
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/quotaops.h>
14 15
15#include <asm/byteorder.h> 16#include <asm/byteorder.h>
16 17
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index e396b2fa4743..c8f60ee183b5 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -34,15 +34,10 @@
34** from within kupdate, it will ignore the immediate flag 34** from within kupdate, it will ignore the immediate flag
35*/ 35*/
36 36
37#include <asm/uaccess.h>
38#include <asm/system.h>
39
40#include <linux/time.h> 37#include <linux/time.h>
41#include <linux/semaphore.h> 38#include <linux/semaphore.h>
42
43#include <linux/vmalloc.h> 39#include <linux/vmalloc.h>
44#include <linux/reiserfs_fs.h> 40#include <linux/reiserfs_fs.h>
45
46#include <linux/kernel.h> 41#include <linux/kernel.h>
47#include <linux/errno.h> 42#include <linux/errno.h>
48#include <linux/fcntl.h> 43#include <linux/fcntl.h>
@@ -54,6 +49,9 @@
54#include <linux/writeback.h> 49#include <linux/writeback.h>
55#include <linux/blkdev.h> 50#include <linux/blkdev.h>
56#include <linux/backing-dev.h> 51#include <linux/backing-dev.h>
52#include <linux/uaccess.h>
53
54#include <asm/system.h>
57 55
58/* gets a struct reiserfs_journal_list * from a list head */ 56/* gets a struct reiserfs_journal_list * from a list head */
59#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ 57#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
@@ -558,13 +556,13 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
558static inline void lock_journal(struct super_block *p_s_sb) 556static inline void lock_journal(struct super_block *p_s_sb)
559{ 557{
560 PROC_INFO_INC(p_s_sb, journal.lock_journal); 558 PROC_INFO_INC(p_s_sb, journal.lock_journal);
561 down(&SB_JOURNAL(p_s_sb)->j_lock); 559 mutex_lock(&SB_JOURNAL(p_s_sb)->j_mutex);
562} 560}
563 561
564/* unlock the current transaction */ 562/* unlock the current transaction */
565static inline void unlock_journal(struct super_block *p_s_sb) 563static inline void unlock_journal(struct super_block *p_s_sb)
566{ 564{
567 up(&SB_JOURNAL(p_s_sb)->j_lock); 565 mutex_unlock(&SB_JOURNAL(p_s_sb)->j_mutex);
568} 566}
569 567
570static inline void get_journal_list(struct reiserfs_journal_list *jl) 568static inline void get_journal_list(struct reiserfs_journal_list *jl)
@@ -1045,9 +1043,9 @@ static int flush_commit_list(struct super_block *s,
1045 } 1043 }
1046 1044
1047 /* make sure nobody is trying to flush this one at the same time */ 1045 /* make sure nobody is trying to flush this one at the same time */
1048 down(&jl->j_commit_lock); 1046 mutex_lock(&jl->j_commit_mutex);
1049 if (!journal_list_still_alive(s, trans_id)) { 1047 if (!journal_list_still_alive(s, trans_id)) {
1050 up(&jl->j_commit_lock); 1048 mutex_unlock(&jl->j_commit_mutex);
1051 goto put_jl; 1049 goto put_jl;
1052 } 1050 }
1053 BUG_ON(jl->j_trans_id == 0); 1051 BUG_ON(jl->j_trans_id == 0);
@@ -1057,7 +1055,7 @@ static int flush_commit_list(struct super_block *s,
1057 if (flushall) { 1055 if (flushall) {
1058 atomic_set(&(jl->j_older_commits_done), 1); 1056 atomic_set(&(jl->j_older_commits_done), 1);
1059 } 1057 }
1060 up(&jl->j_commit_lock); 1058 mutex_unlock(&jl->j_commit_mutex);
1061 goto put_jl; 1059 goto put_jl;
1062 } 1060 }
1063 1061
@@ -1181,7 +1179,7 @@ static int flush_commit_list(struct super_block *s,
1181 if (flushall) { 1179 if (flushall) {
1182 atomic_set(&(jl->j_older_commits_done), 1); 1180 atomic_set(&(jl->j_older_commits_done), 1);
1183 } 1181 }
1184 up(&jl->j_commit_lock); 1182 mutex_unlock(&jl->j_commit_mutex);
1185 put_jl: 1183 put_jl:
1186 put_journal_list(s, jl); 1184 put_journal_list(s, jl);
1187 1185
@@ -1411,8 +1409,8 @@ static int flush_journal_list(struct super_block *s,
1411 1409
1412 /* if flushall == 0, the lock is already held */ 1410 /* if flushall == 0, the lock is already held */
1413 if (flushall) { 1411 if (flushall) {
1414 down(&journal->j_flush_sem); 1412 mutex_lock(&journal->j_flush_mutex);
1415 } else if (!down_trylock(&journal->j_flush_sem)) { 1413 } else if (mutex_trylock(&journal->j_flush_mutex)) {
1416 BUG(); 1414 BUG();
1417 } 1415 }
1418 1416
@@ -1642,7 +1640,7 @@ static int flush_journal_list(struct super_block *s,
1642 jl->j_state = 0; 1640 jl->j_state = 0;
1643 put_journal_list(s, jl); 1641 put_journal_list(s, jl);
1644 if (flushall) 1642 if (flushall)
1645 up(&journal->j_flush_sem); 1643 mutex_unlock(&journal->j_flush_mutex);
1646 put_fs_excl(); 1644 put_fs_excl();
1647 return err; 1645 return err;
1648} 1646}
@@ -1772,12 +1770,12 @@ static int kupdate_transactions(struct super_block *s,
1772 struct reiserfs_journal *journal = SB_JOURNAL(s); 1770 struct reiserfs_journal *journal = SB_JOURNAL(s);
1773 chunk.nr = 0; 1771 chunk.nr = 0;
1774 1772
1775 down(&journal->j_flush_sem); 1773 mutex_lock(&journal->j_flush_mutex);
1776 if (!journal_list_still_alive(s, orig_trans_id)) { 1774 if (!journal_list_still_alive(s, orig_trans_id)) {
1777 goto done; 1775 goto done;
1778 } 1776 }
1779 1777
1780 /* we've got j_flush_sem held, nobody is going to delete any 1778 /* we've got j_flush_mutex held, nobody is going to delete any
1781 * of these lists out from underneath us 1779 * of these lists out from underneath us
1782 */ 1780 */
1783 while ((num_trans && transactions_flushed < num_trans) || 1781 while ((num_trans && transactions_flushed < num_trans) ||
@@ -1812,7 +1810,7 @@ static int kupdate_transactions(struct super_block *s,
1812 } 1810 }
1813 1811
1814 done: 1812 done:
1815 up(&journal->j_flush_sem); 1813 mutex_unlock(&journal->j_flush_mutex);
1816 return ret; 1814 return ret;
1817} 1815}
1818 1816
@@ -2556,7 +2554,7 @@ static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
2556 INIT_LIST_HEAD(&jl->j_working_list); 2554 INIT_LIST_HEAD(&jl->j_working_list);
2557 INIT_LIST_HEAD(&jl->j_tail_bh_list); 2555 INIT_LIST_HEAD(&jl->j_tail_bh_list);
2558 INIT_LIST_HEAD(&jl->j_bh_list); 2556 INIT_LIST_HEAD(&jl->j_bh_list);
2559 sema_init(&jl->j_commit_lock, 1); 2557 mutex_init(&jl->j_commit_mutex);
2560 SB_JOURNAL(s)->j_num_lists++; 2558 SB_JOURNAL(s)->j_num_lists++;
2561 get_journal_list(jl); 2559 get_journal_list(jl);
2562 return jl; 2560 return jl;
@@ -2837,8 +2835,8 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
2837 journal->j_last = NULL; 2835 journal->j_last = NULL;
2838 journal->j_first = NULL; 2836 journal->j_first = NULL;
2839 init_waitqueue_head(&(journal->j_join_wait)); 2837 init_waitqueue_head(&(journal->j_join_wait));
2840 sema_init(&journal->j_lock, 1); 2838 mutex_init(&journal->j_mutex);
2841 sema_init(&journal->j_flush_sem, 1); 2839 mutex_init(&journal->j_flush_mutex);
2842 2840
2843 journal->j_trans_id = 10; 2841 journal->j_trans_id = 10;
2844 journal->j_mount_id = 10; 2842 journal->j_mount_id = 10;
@@ -4030,7 +4028,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4030 * the new transaction is fully setup, and we've already flushed the 4028 * the new transaction is fully setup, and we've already flushed the
4031 * ordered bh list 4029 * ordered bh list
4032 */ 4030 */
4033 down(&jl->j_commit_lock); 4031 mutex_lock(&jl->j_commit_mutex);
4034 4032
4035 /* save the transaction id in case we need to commit it later */ 4033 /* save the transaction id in case we need to commit it later */
4036 commit_trans_id = jl->j_trans_id; 4034 commit_trans_id = jl->j_trans_id;
@@ -4196,7 +4194,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4196 lock_kernel(); 4194 lock_kernel();
4197 } 4195 }
4198 BUG_ON(!list_empty(&jl->j_tail_bh_list)); 4196 BUG_ON(!list_empty(&jl->j_tail_bh_list));
4199 up(&jl->j_commit_lock); 4197 mutex_unlock(&jl->j_commit_mutex);
4200 4198
4201 /* honor the flush wishes from the caller, simple commits can 4199 /* honor the flush wishes from the caller, simple commits can
4202 ** be done outside the journal lock, they are done below 4200 ** be done outside the journal lock, they are done below
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 1d40f2bd1970..2ec748ba0bd3 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -22,6 +22,7 @@
22#include <linux/blkdev.h> 22#include <linux/blkdev.h>
23#include <linux/buffer_head.h> 23#include <linux/buffer_head.h>
24#include <linux/exportfs.h> 24#include <linux/exportfs.h>
25#include <linux/quotaops.h>
25#include <linux/vfs.h> 26#include <linux/vfs.h>
26#include <linux/mnt_namespace.h> 27#include <linux/mnt_namespace.h>
27#include <linux/mount.h> 28#include <linux/mount.h>
@@ -182,7 +183,7 @@ static int finish_unfinished(struct super_block *s)
182 int ret = reiserfs_quota_on_mount(s, i); 183 int ret = reiserfs_quota_on_mount(s, i);
183 if (ret < 0) 184 if (ret < 0)
184 reiserfs_warning(s, 185 reiserfs_warning(s,
185 "reiserfs: cannot turn on journalled quota: error %d", 186 "reiserfs: cannot turn on journaled quota: error %d",
186 ret); 187 ret);
187 } 188 }
188 } 189 }
@@ -876,7 +877,9 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
876 mount options were selected. */ 877 mount options were selected. */
877 unsigned long *blocks, /* strtol-ed from NNN of resize=NNN */ 878 unsigned long *blocks, /* strtol-ed from NNN of resize=NNN */
878 char **jdev_name, 879 char **jdev_name,
879 unsigned int *commit_max_age) 880 unsigned int *commit_max_age,
881 char **qf_names,
882 unsigned int *qfmt)
880{ 883{
881 int c; 884 int c;
882 char *arg = NULL; 885 char *arg = NULL;
@@ -992,9 +995,11 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
992 if (c == 'u' || c == 'g') { 995 if (c == 'u' || c == 'g') {
993 int qtype = c == 'u' ? USRQUOTA : GRPQUOTA; 996 int qtype = c == 'u' ? USRQUOTA : GRPQUOTA;
994 997
995 if (sb_any_quota_enabled(s)) { 998 if ((sb_any_quota_enabled(s) ||
999 sb_any_quota_suspended(s)) &&
1000 (!*arg != !REISERFS_SB(s)->s_qf_names[qtype])) {
996 reiserfs_warning(s, 1001 reiserfs_warning(s,
997 "reiserfs_parse_options: cannot change journalled quota options when quota turned on."); 1002 "reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
998 return 0; 1003 return 0;
999 } 1004 }
1000 if (*arg) { /* Some filename specified? */ 1005 if (*arg) { /* Some filename specified? */
@@ -1011,46 +1016,54 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
1011 "reiserfs_parse_options: quotafile must be on filesystem root."); 1016 "reiserfs_parse_options: quotafile must be on filesystem root.");
1012 return 0; 1017 return 0;
1013 } 1018 }
1014 REISERFS_SB(s)->s_qf_names[qtype] = 1019 qf_names[qtype] =
1015 kmalloc(strlen(arg) + 1, GFP_KERNEL); 1020 kmalloc(strlen(arg) + 1, GFP_KERNEL);
1016 if (!REISERFS_SB(s)->s_qf_names[qtype]) { 1021 if (!qf_names[qtype]) {
1017 reiserfs_warning(s, 1022 reiserfs_warning(s,
1018 "reiserfs_parse_options: not enough memory for storing quotafile name."); 1023 "reiserfs_parse_options: not enough memory for storing quotafile name.");
1019 return 0; 1024 return 0;
1020 } 1025 }
1021 strcpy(REISERFS_SB(s)->s_qf_names[qtype], arg); 1026 strcpy(qf_names[qtype], arg);
1022 *mount_options |= 1 << REISERFS_QUOTA; 1027 *mount_options |= 1 << REISERFS_QUOTA;
1023 } else { 1028 } else {
1024 kfree(REISERFS_SB(s)->s_qf_names[qtype]); 1029 if (qf_names[qtype] !=
1025 REISERFS_SB(s)->s_qf_names[qtype] = NULL; 1030 REISERFS_SB(s)->s_qf_names[qtype])
1031 kfree(qf_names[qtype]);
1032 qf_names[qtype] = NULL;
1026 } 1033 }
1027 } 1034 }
1028 if (c == 'f') { 1035 if (c == 'f') {
1029 if (!strcmp(arg, "vfsold")) 1036 if (!strcmp(arg, "vfsold"))
1030 REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_OLD; 1037 *qfmt = QFMT_VFS_OLD;
1031 else if (!strcmp(arg, "vfsv0")) 1038 else if (!strcmp(arg, "vfsv0"))
1032 REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_V0; 1039 *qfmt = QFMT_VFS_V0;
1033 else { 1040 else {
1034 reiserfs_warning(s, 1041 reiserfs_warning(s,
1035 "reiserfs_parse_options: unknown quota format specified."); 1042 "reiserfs_parse_options: unknown quota format specified.");
1036 return 0; 1043 return 0;
1037 } 1044 }
1045 if ((sb_any_quota_enabled(s) ||
1046 sb_any_quota_suspended(s)) &&
1047 *qfmt != REISERFS_SB(s)->s_jquota_fmt) {
1048 reiserfs_warning(s,
1049 "reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
1050 return 0;
1051 }
1038 } 1052 }
1039#else 1053#else
1040 if (c == 'u' || c == 'g' || c == 'f') { 1054 if (c == 'u' || c == 'g' || c == 'f') {
1041 reiserfs_warning(s, 1055 reiserfs_warning(s,
1042 "reiserfs_parse_options: journalled quota options not supported."); 1056 "reiserfs_parse_options: journaled quota options not supported.");
1043 return 0; 1057 return 0;
1044 } 1058 }
1045#endif 1059#endif
1046 } 1060 }
1047 1061
1048#ifdef CONFIG_QUOTA 1062#ifdef CONFIG_QUOTA
1049 if (!REISERFS_SB(s)->s_jquota_fmt 1063 if (!REISERFS_SB(s)->s_jquota_fmt && !*qfmt
1050 && (REISERFS_SB(s)->s_qf_names[USRQUOTA] 1064 && (qf_names[USRQUOTA] || qf_names[GRPQUOTA])) {
1051 || REISERFS_SB(s)->s_qf_names[GRPQUOTA])) {
1052 reiserfs_warning(s, 1065 reiserfs_warning(s,
1053 "reiserfs_parse_options: journalled quota format not specified."); 1066 "reiserfs_parse_options: journaled quota format not specified.");
1054 return 0; 1067 return 0;
1055 } 1068 }
1056 /* This checking is not precise wrt the quota type but for our purposes it is sufficient */ 1069 /* This checking is not precise wrt the quota type but for our purposes it is sufficient */
@@ -1130,6 +1143,21 @@ static void handle_attrs(struct super_block *s)
1130 } 1143 }
1131} 1144}
1132 1145
1146#ifdef CONFIG_QUOTA
1147static void handle_quota_files(struct super_block *s, char **qf_names,
1148 unsigned int *qfmt)
1149{
1150 int i;
1151
1152 for (i = 0; i < MAXQUOTAS; i++) {
1153 if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i])
1154 kfree(REISERFS_SB(s)->s_qf_names[i]);
1155 REISERFS_SB(s)->s_qf_names[i] = qf_names[i];
1156 }
1157 REISERFS_SB(s)->s_jquota_fmt = *qfmt;
1158}
1159#endif
1160
1133static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) 1161static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1134{ 1162{
1135 struct reiserfs_super_block *rs; 1163 struct reiserfs_super_block *rs;
@@ -1141,23 +1169,30 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1141 struct reiserfs_journal *journal = SB_JOURNAL(s); 1169 struct reiserfs_journal *journal = SB_JOURNAL(s);
1142 char *new_opts = kstrdup(arg, GFP_KERNEL); 1170 char *new_opts = kstrdup(arg, GFP_KERNEL);
1143 int err; 1171 int err;
1172 char *qf_names[MAXQUOTAS];
1173 unsigned int qfmt = 0;
1144#ifdef CONFIG_QUOTA 1174#ifdef CONFIG_QUOTA
1145 int i; 1175 int i;
1176
1177 memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names));
1146#endif 1178#endif
1147 1179
1148 rs = SB_DISK_SUPER_BLOCK(s); 1180 rs = SB_DISK_SUPER_BLOCK(s);
1149 1181
1150 if (!reiserfs_parse_options 1182 if (!reiserfs_parse_options
1151 (s, arg, &mount_options, &blocks, NULL, &commit_max_age)) { 1183 (s, arg, &mount_options, &blocks, NULL, &commit_max_age,
1184 qf_names, &qfmt)) {
1152#ifdef CONFIG_QUOTA 1185#ifdef CONFIG_QUOTA
1153 for (i = 0; i < MAXQUOTAS; i++) { 1186 for (i = 0; i < MAXQUOTAS; i++)
1154 kfree(REISERFS_SB(s)->s_qf_names[i]); 1187 if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i])
1155 REISERFS_SB(s)->s_qf_names[i] = NULL; 1188 kfree(qf_names[i]);
1156 }
1157#endif 1189#endif
1158 err = -EINVAL; 1190 err = -EINVAL;
1159 goto out_err; 1191 goto out_err;
1160 } 1192 }
1193#ifdef CONFIG_QUOTA
1194 handle_quota_files(s, qf_names, &qfmt);
1195#endif
1161 1196
1162 handle_attrs(s); 1197 handle_attrs(s);
1163 1198
@@ -1570,6 +1605,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1570 char *jdev_name; 1605 char *jdev_name;
1571 struct reiserfs_sb_info *sbi; 1606 struct reiserfs_sb_info *sbi;
1572 int errval = -EINVAL; 1607 int errval = -EINVAL;
1608 char *qf_names[MAXQUOTAS] = {};
1609 unsigned int qfmt = 0;
1573 1610
1574 save_mount_options(s, data); 1611 save_mount_options(s, data);
1575 1612
@@ -1597,9 +1634,12 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1597 jdev_name = NULL; 1634 jdev_name = NULL;
1598 if (reiserfs_parse_options 1635 if (reiserfs_parse_options
1599 (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name, 1636 (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name,
1600 &commit_max_age) == 0) { 1637 &commit_max_age, qf_names, &qfmt) == 0) {
1601 goto error; 1638 goto error;
1602 } 1639 }
1640#ifdef CONFIG_QUOTA
1641 handle_quota_files(s, qf_names, &qfmt);
1642#endif
1603 1643
1604 if (blocks) { 1644 if (blocks) {
1605 SWARN(silent, s, "jmacd-7: reiserfs_fill_super: resize option " 1645 SWARN(silent, s, "jmacd-7: reiserfs_fill_super: resize option "
@@ -1819,7 +1859,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1819 1859
1820 return (0); 1860 return (0);
1821 1861
1822 error: 1862error:
1823 if (jinit_done) { /* kill the commit thread, free journal ram */ 1863 if (jinit_done) { /* kill the commit thread, free journal ram */
1824 journal_release_error(NULL, s); 1864 journal_release_error(NULL, s);
1825 } 1865 }
@@ -1830,10 +1870,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1830#ifdef CONFIG_QUOTA 1870#ifdef CONFIG_QUOTA
1831 { 1871 {
1832 int j; 1872 int j;
1833 for (j = 0; j < MAXQUOTAS; j++) { 1873 for (j = 0; j < MAXQUOTAS; j++)
1834 kfree(sbi->s_qf_names[j]); 1874 kfree(qf_names[j]);
1835 sbi->s_qf_names[j] = NULL;
1836 }
1837 } 1875 }
1838#endif 1876#endif
1839 kfree(sbi); 1877 kfree(sbi);
@@ -1980,7 +2018,7 @@ static int reiserfs_release_dquot(struct dquot *dquot)
1980 2018
1981static int reiserfs_mark_dquot_dirty(struct dquot *dquot) 2019static int reiserfs_mark_dquot_dirty(struct dquot *dquot)
1982{ 2020{
1983 /* Are we journalling quotas? */ 2021 /* Are we journaling quotas? */
1984 if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 2022 if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
1985 REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 2023 REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
1986 dquot_mark_dquot_dirty(dquot); 2024 dquot_mark_dquot_dirty(dquot);
@@ -2026,6 +2064,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2026 int err; 2064 int err;
2027 struct nameidata nd; 2065 struct nameidata nd;
2028 struct inode *inode; 2066 struct inode *inode;
2067 struct reiserfs_transaction_handle th;
2029 2068
2030 if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA))) 2069 if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA)))
2031 return -EINVAL; 2070 return -EINVAL;
@@ -2053,17 +2092,28 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2053 } 2092 }
2054 mark_inode_dirty(inode); 2093 mark_inode_dirty(inode);
2055 } 2094 }
2056 /* Not journalling quota? No more tests needed... */ 2095 /* Journaling quota? */
2057 if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] && 2096 if (REISERFS_SB(sb)->s_qf_names[type]) {
2058 !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) { 2097 /* Quotafile not of fs root? */
2059 path_put(&nd.path); 2098 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
2060 return vfs_quota_on(sb, type, format_id, path, 0); 2099 reiserfs_warning(sb,
2061 }
2062 /* Quotafile not of fs root? */
2063 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
2064 reiserfs_warning(sb,
2065 "reiserfs: Quota file not on filesystem root. " 2100 "reiserfs: Quota file not on filesystem root. "
2066 "Journalled quota will not work."); 2101 "Journalled quota will not work.");
2102 }
2103
2104 /*
2105 * When we journal data on quota file, we have to flush journal to see
2106 * all updates to the file when we bypass pagecache...
2107 */
2108 if (reiserfs_file_data_log(inode)) {
2109 /* Just start temporary transaction and finish it */
2110 err = journal_begin(&th, sb, 1);
2111 if (err)
2112 return err;
2113 err = journal_end_sync(&th, sb, 1);
2114 if (err)
2115 return err;
2116 }
2067 path_put(&nd.path); 2117 path_put(&nd.path);
2068 return vfs_quota_on(sb, type, format_id, path, 0); 2118 return vfs_quota_on(sb, type, format_id, path, 0);
2069} 2119}
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 5e90a95ad60b..056008db1377 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -6,8 +6,6 @@
6#include <linux/reiserfs_xattr.h> 6#include <linux/reiserfs_xattr.h>
7#include <asm/uaccess.h> 7#include <asm/uaccess.h>
8 8
9#define XATTR_SECURITY_PREFIX "security."
10
11static int 9static int
12security_get(struct inode *inode, const char *name, void *buffer, size_t size) 10security_get(struct inode *inode, const char *name, void *buffer, size_t size)
13{ 11{
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index 024a938ca60f..60abe2bb1f98 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -7,8 +7,6 @@
7#include <linux/reiserfs_xattr.h> 7#include <linux/reiserfs_xattr.h>
8#include <asm/uaccess.h> 8#include <asm/uaccess.h>
9 9
10#define XATTR_TRUSTED_PREFIX "trusted."
11
12static int 10static int
13trusted_get(struct inode *inode, const char *name, void *buffer, size_t size) 11trusted_get(struct inode *inode, const char *name, void *buffer, size_t size)
14{ 12{
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 073f39364b11..1384efcb938e 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -10,8 +10,6 @@
10# include <linux/reiserfs_acl.h> 10# include <linux/reiserfs_acl.h>
11#endif 11#endif
12 12
13#define XATTR_USER_PREFIX "user."
14
15static int 13static int
16user_get(struct inode *inode, const char *name, void *buffer, size_t size) 14user_get(struct inode *inode, const char *name, void *buffer, size_t size)
17{ 15{
diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c
index 8182f0542a21..8c177eb7e344 100644
--- a/fs/smbfs/cache.c
+++ b/fs/smbfs/cache.c
@@ -13,7 +13,6 @@
13#include <linux/errno.h> 13#include <linux/errno.h>
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/mm.h> 15#include <linux/mm.h>
16#include <linux/dirent.h>
17#include <linux/smb_fs.h> 16#include <linux/smb_fs.h>
18#include <linux/pagemap.h> 17#include <linux/pagemap.h>
19#include <linux/net.h> 18#include <linux/net.h>
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index d517a27b7f4b..ee536e8a649a 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -16,7 +16,6 @@
16#include <linux/stat.h> 16#include <linux/stat.h>
17#include <linux/fcntl.h> 17#include <linux/fcntl.h>
18#include <linux/dcache.h> 18#include <linux/dcache.h>
19#include <linux/dirent.h>
20#include <linux/nls.h> 19#include <linux/nls.h>
21#include <linux/smp_lock.h> 20#include <linux/smp_lock.h>
22#include <linux/net.h> 21#include <linux/net.h>
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 506f724055c2..227c9d700040 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -76,6 +76,7 @@
76 76
77#include <linux/errno.h> 77#include <linux/errno.h>
78#include <linux/fs.h> 78#include <linux/fs.h>
79#include <linux/quotaops.h>
79#include <linux/slab.h> 80#include <linux/slab.h>
80#include <linux/time.h> 81#include <linux/time.h>
81#include <linux/stat.h> 82#include <linux/stat.h>
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c
index b546ba69be82..155c10b4adbd 100644
--- a/fs/vfat/namei.c
+++ b/fs/vfat/namei.c
@@ -621,7 +621,7 @@ shortname:
621 memcpy(de->name, msdos_name, MSDOS_NAME); 621 memcpy(de->name, msdos_name, MSDOS_NAME);
622 de->attr = is_dir ? ATTR_DIR : ATTR_ARCH; 622 de->attr = is_dir ? ATTR_DIR : ATTR_ARCH;
623 de->lcase = lcase; 623 de->lcase = lcase;
624 fat_date_unix2dos(ts->tv_sec, &time, &date); 624 fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
625 de->time = de->ctime = time; 625 de->time = de->ctime = time;
626 de->date = de->cdate = de->adate = date; 626 de->date = de->cdate = de->adate = date;
627 de->ctime_cs = 0; 627 de->ctime_cs = 0;
diff --git a/include/asm-alpha/thread_info.h b/include/asm-alpha/thread_info.h
index fb3185196298..15fda4344424 100644
--- a/include/asm-alpha/thread_info.h
+++ b/include/asm-alpha/thread_info.h
@@ -50,10 +50,8 @@ register struct thread_info *__current_thread_info __asm__("$8");
50#define current_thread_info() __current_thread_info 50#define current_thread_info() __current_thread_info
51 51
52/* Thread information allocation. */ 52/* Thread information allocation. */
53#define THREAD_SIZE_ORDER 1
53#define THREAD_SIZE (2*PAGE_SIZE) 54#define THREAD_SIZE (2*PAGE_SIZE)
54#define alloc_thread_info(tsk) \
55 ((struct thread_info *) __get_free_pages(GFP_KERNEL,1))
56#define free_thread_info(ti) free_pages((unsigned long) (ti), 1)
57 55
58#endif /* __ASSEMBLY__ */ 56#endif /* __ASSEMBLY__ */
59 57
diff --git a/include/asm-arm/ptrace.h b/include/asm-arm/ptrace.h
index 7aaa206cb54e..8382b7510f94 100644
--- a/include/asm-arm/ptrace.h
+++ b/include/asm-arm/ptrace.h
@@ -139,8 +139,6 @@ static inline int valid_user_regs(struct pt_regs *regs)
139 return 0; 139 return 0;
140} 140}
141 141
142#endif /* __KERNEL__ */
143
144#define pc_pointer(v) \ 142#define pc_pointer(v) \
145 ((v) & ~PCMASK) 143 ((v) & ~PCMASK)
146 144
@@ -153,10 +151,10 @@ extern unsigned long profile_pc(struct pt_regs *regs);
153#define profile_pc(regs) instruction_pointer(regs) 151#define profile_pc(regs) instruction_pointer(regs)
154#endif 152#endif
155 153
156#ifdef __KERNEL__
157#define predicate(x) ((x) & 0xf0000000) 154#define predicate(x) ((x) & 0xf0000000)
158#define PREDICATE_ALWAYS 0xe0000000 155#define PREDICATE_ALWAYS 0xe0000000
159#endif 156
157#endif /* __KERNEL__ */
160 158
161#endif /* __ASSEMBLY__ */ 159#endif /* __ASSEMBLY__ */
162 160
diff --git a/include/asm-arm/thread_info.h b/include/asm-arm/thread_info.h
index f5a664786311..d4be2d646160 100644
--- a/include/asm-arm/thread_info.h
+++ b/include/asm-arm/thread_info.h
@@ -97,19 +97,6 @@ static inline struct thread_info *current_thread_info(void)
97 return (struct thread_info *)(sp & ~(THREAD_SIZE - 1)); 97 return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));
98} 98}
99 99
100/* thread information allocation */
101#ifdef CONFIG_DEBUG_STACK_USAGE
102#define alloc_thread_info(tsk) \
103 ((struct thread_info *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, \
104 THREAD_SIZE_ORDER))
105#else
106#define alloc_thread_info(tsk) \
107 ((struct thread_info *)__get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER))
108#endif
109
110#define free_thread_info(info) \
111 free_pages((unsigned long)info, THREAD_SIZE_ORDER);
112
113#define thread_saved_pc(tsk) \ 100#define thread_saved_pc(tsk) \
114 ((unsigned long)(pc_pointer(task_thread_info(tsk)->cpu_context.pc))) 101 ((unsigned long)(pc_pointer(task_thread_info(tsk)->cpu_context.pc)))
115#define thread_saved_fp(tsk) \ 102#define thread_saved_fp(tsk) \
diff --git a/include/asm-avr32/thread_info.h b/include/asm-avr32/thread_info.h
index df68631b7b27..294b25f9323d 100644
--- a/include/asm-avr32/thread_info.h
+++ b/include/asm-avr32/thread_info.h
@@ -61,10 +61,6 @@ static inline struct thread_info *current_thread_info(void)
61 return (struct thread_info *)addr; 61 return (struct thread_info *)addr;
62} 62}
63 63
64/* thread information allocation */
65#define alloc_thread_info(ti) \
66 ((struct thread_info *) __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER))
67#define free_thread_info(ti) free_pages((unsigned long)(ti), 1)
68#define get_thread_info(ti) get_task_struct((ti)->task) 64#define get_thread_info(ti) get_task_struct((ti)->task)
69#define put_thread_info(ti) put_task_struct((ti)->task) 65#define put_thread_info(ti) put_task_struct((ti)->task)
70 66
diff --git a/include/asm-blackfin/ptrace.h b/include/asm-blackfin/ptrace.h
index b8346cd3a6f6..a45a80e54adc 100644
--- a/include/asm-blackfin/ptrace.h
+++ b/include/asm-blackfin/ptrace.h
@@ -83,14 +83,14 @@ struct pt_regs {
83#define PTRACE_GETREGS 12 83#define PTRACE_GETREGS 12
84#define PTRACE_SETREGS 13 /* ptrace signal */ 84#define PTRACE_SETREGS 13 /* ptrace signal */
85 85
86#ifdef CONFIG_BINFMT_ELF_FDPIC
87#define PTRACE_GETFDPIC 31 86#define PTRACE_GETFDPIC 31
88#define PTRACE_GETFDPIC_EXEC 0 87#define PTRACE_GETFDPIC_EXEC 0
89#define PTRACE_GETFDPIC_INTERP 1 88#define PTRACE_GETFDPIC_INTERP 1
90#endif
91 89
92#define PS_S (0x0002) 90#define PS_S (0x0002)
93 91
92#ifdef __KERNEL__
93
94/* user_mode returns true if only one bit is set in IPEND, other than the 94/* user_mode returns true if only one bit is set in IPEND, other than the
95 master interrupt enable. */ 95 master interrupt enable. */
96#define user_mode(regs) (!(((regs)->ipend & ~0x10) & (((regs)->ipend & ~0x10) - 1))) 96#define user_mode(regs) (!(((regs)->ipend & ~0x10) & (((regs)->ipend & ~0x10) - 1)))
@@ -98,6 +98,8 @@ struct pt_regs {
98#define profile_pc(regs) instruction_pointer(regs) 98#define profile_pc(regs) instruction_pointer(regs)
99extern void show_regs(struct pt_regs *); 99extern void show_regs(struct pt_regs *);
100 100
101#endif /* __KERNEL__ */
102
101#endif /* __ASSEMBLY__ */ 103#endif /* __ASSEMBLY__ */
102 104
103/* 105/*
diff --git a/include/asm-blackfin/thread_info.h b/include/asm-blackfin/thread_info.h
index bc2fe5accf20..642769329d12 100644
--- a/include/asm-blackfin/thread_info.h
+++ b/include/asm-blackfin/thread_info.h
@@ -42,6 +42,7 @@
42/* 42/*
43 * Size of kernel stack for each process. This must be a power of 2... 43 * Size of kernel stack for each process. This must be a power of 2...
44 */ 44 */
45#define THREAD_SIZE_ORDER 1
45#define THREAD_SIZE 8192 /* 2 pages */ 46#define THREAD_SIZE 8192 /* 2 pages */
46 47
47#ifndef __ASSEMBLY__ 48#ifndef __ASSEMBLY__
@@ -94,10 +95,6 @@ static inline struct thread_info *current_thread_info(void)
94 return (struct thread_info *)((long)ti & ~((long)THREAD_SIZE-1)); 95 return (struct thread_info *)((long)ti & ~((long)THREAD_SIZE-1));
95} 96}
96 97
97/* thread information allocation */
98#define alloc_thread_info(tsk) ((struct thread_info *) \
99 __get_free_pages(GFP_KERNEL, 1))
100#define free_thread_info(ti) free_pages((unsigned long) (ti), 1)
101#endif /* __ASSEMBLY__ */ 98#endif /* __ASSEMBLY__ */
102 99
103/* 100/*
diff --git a/include/asm-cris/arch-v10/Kbuild b/include/asm-cris/arch-v10/Kbuild
index 60e7e1b73cec..7a192e1290b1 100644
--- a/include/asm-cris/arch-v10/Kbuild
+++ b/include/asm-cris/arch-v10/Kbuild
@@ -1,4 +1,3 @@
1header-y += ptrace.h
2header-y += user.h 1header-y += user.h
3header-y += svinto.h 2header-y += svinto.h
4header-y += sv_addr_ag.h 3header-y += sv_addr_ag.h
diff --git a/include/asm-cris/arch-v10/ptrace.h b/include/asm-cris/arch-v10/ptrace.h
index fb14c5ee37f9..2f464eab3a51 100644
--- a/include/asm-cris/arch-v10/ptrace.h
+++ b/include/asm-cris/arch-v10/ptrace.h
@@ -106,10 +106,14 @@ struct switch_stack {
106 unsigned long return_ip; /* ip that _resume will return to */ 106 unsigned long return_ip; /* ip that _resume will return to */
107}; 107};
108 108
109#ifdef __KERNEL__
110
109/* bit 8 is user-mode flag */ 111/* bit 8 is user-mode flag */
110#define user_mode(regs) (((regs)->dccr & 0x100) != 0) 112#define user_mode(regs) (((regs)->dccr & 0x100) != 0)
111#define instruction_pointer(regs) ((regs)->irp) 113#define instruction_pointer(regs) ((regs)->irp)
112#define profile_pc(regs) instruction_pointer(regs) 114#define profile_pc(regs) instruction_pointer(regs)
113extern void show_regs(struct pt_regs *); 115extern void show_regs(struct pt_regs *);
114 116
117#endif /* __KERNEL__ */
118
115#endif 119#endif
diff --git a/include/asm-cris/arch-v32/Kbuild b/include/asm-cris/arch-v32/Kbuild
index a0ec545e242e..35f2fc4f993e 100644
--- a/include/asm-cris/arch-v32/Kbuild
+++ b/include/asm-cris/arch-v32/Kbuild
@@ -1,3 +1,2 @@
1header-y += ptrace.h
2header-y += user.h 1header-y += user.h
3header-y += cryptocop.h 2header-y += cryptocop.h
diff --git a/include/asm-cris/arch-v32/ptrace.h b/include/asm-cris/arch-v32/ptrace.h
index 516cc7062d94..41f4e8662bc2 100644
--- a/include/asm-cris/arch-v32/ptrace.h
+++ b/include/asm-cris/arch-v32/ptrace.h
@@ -106,9 +106,13 @@ struct switch_stack {
106 unsigned long return_ip; /* ip that _resume will return to */ 106 unsigned long return_ip; /* ip that _resume will return to */
107}; 107};
108 108
109#ifdef __KERNEL__
110
109#define user_mode(regs) (((regs)->ccs & (1 << (U_CCS_BITNR + CCS_SHIFT))) != 0) 111#define user_mode(regs) (((regs)->ccs & (1 << (U_CCS_BITNR + CCS_SHIFT))) != 0)
110#define instruction_pointer(regs) ((regs)->erp) 112#define instruction_pointer(regs) ((regs)->erp)
111extern void show_regs(struct pt_regs *); 113extern void show_regs(struct pt_regs *);
112#define profile_pc(regs) instruction_pointer(regs) 114#define profile_pc(regs) instruction_pointer(regs)
113 115
116#endif /* __KERNEL__ */
117
114#endif 118#endif
diff --git a/include/asm-cris/ptrace.h b/include/asm-cris/ptrace.h
index 1ec69a7ea836..d910925e3174 100644
--- a/include/asm-cris/ptrace.h
+++ b/include/asm-cris/ptrace.h
@@ -4,11 +4,13 @@
4#include <asm/arch/ptrace.h> 4#include <asm/arch/ptrace.h>
5 5
6#ifdef __KERNEL__ 6#ifdef __KERNEL__
7
7/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */ 8/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
8#define PTRACE_GETREGS 12 9#define PTRACE_GETREGS 12
9#define PTRACE_SETREGS 13 10#define PTRACE_SETREGS 13
10#endif
11 11
12#define profile_pc(regs) instruction_pointer(regs) 12#define profile_pc(regs) instruction_pointer(regs)
13 13
14#endif /* __KERNEL__ */
15
14#endif /* _CRIS_PTRACE_H */ 16#endif /* _CRIS_PTRACE_H */
diff --git a/include/asm-cris/thread_info.h b/include/asm-cris/thread_info.h
index 784668ab0fa2..7efe1000f99d 100644
--- a/include/asm-cris/thread_info.h
+++ b/include/asm-cris/thread_info.h
@@ -11,6 +11,8 @@
11 11
12#ifdef __KERNEL__ 12#ifdef __KERNEL__
13 13
14#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
15
14#ifndef __ASSEMBLY__ 16#ifndef __ASSEMBLY__
15#include <asm/types.h> 17#include <asm/types.h>
16#include <asm/processor.h> 18#include <asm/processor.h>
diff --git a/include/asm-frv/Kbuild b/include/asm-frv/Kbuild
index bc3f12c5b7e0..0f8956def738 100644
--- a/include/asm-frv/Kbuild
+++ b/include/asm-frv/Kbuild
@@ -3,4 +3,3 @@ include include/asm-generic/Kbuild.asm
3header-y += registers.h 3header-y += registers.h
4 4
5unifdef-y += termios.h 5unifdef-y += termios.h
6unifdef-y += ptrace.h
diff --git a/include/asm-frv/thread_info.h b/include/asm-frv/thread_info.h
index 348b8f1df17e..b7ac6bf2844c 100644
--- a/include/asm-frv/thread_info.h
+++ b/include/asm-frv/thread_info.h
@@ -82,6 +82,8 @@ register struct thread_info *__current_thread_info asm("gr15");
82 82
83#define current_thread_info() ({ __current_thread_info; }) 83#define current_thread_info() ({ __current_thread_info; })
84 84
85#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
86
85/* thread information allocation */ 87/* thread information allocation */
86#ifdef CONFIG_DEBUG_STACK_USAGE 88#ifdef CONFIG_DEBUG_STACK_USAGE
87#define alloc_thread_info(tsk) \ 89#define alloc_thread_info(tsk) \
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 2632328d8646..a3f738cffdb6 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -34,9 +34,14 @@ struct bug_entry {
34#ifndef __WARN 34#ifndef __WARN
35#ifndef __ASSEMBLY__ 35#ifndef __ASSEMBLY__
36extern void warn_on_slowpath(const char *file, const int line); 36extern void warn_on_slowpath(const char *file, const int line);
37extern void warn_slowpath(const char *file, const int line,
38 const char *fmt, ...) __attribute__((format(printf, 3, 4)));
37#define WANT_WARN_ON_SLOWPATH 39#define WANT_WARN_ON_SLOWPATH
38#endif 40#endif
39#define __WARN() warn_on_slowpath(__FILE__, __LINE__) 41#define __WARN() warn_on_slowpath(__FILE__, __LINE__)
42#define __WARN_printf(arg...) warn_slowpath(__FILE__, __LINE__, arg)
43#else
44#define __WARN_printf(arg...) __WARN()
40#endif 45#endif
41 46
42#ifndef WARN_ON 47#ifndef WARN_ON
@@ -48,6 +53,15 @@ extern void warn_on_slowpath(const char *file, const int line);
48}) 53})
49#endif 54#endif
50 55
56#ifndef WARN
57#define WARN(condition, format...) ({ \
58 int __ret_warn_on = !!(condition); \
59 if (unlikely(__ret_warn_on)) \
60 __WARN_printf(format); \
61 unlikely(__ret_warn_on); \
62})
63#endif
64
51#else /* !CONFIG_BUG */ 65#else /* !CONFIG_BUG */
52#ifndef HAVE_ARCH_BUG 66#ifndef HAVE_ARCH_BUG
53#define BUG() 67#define BUG()
@@ -63,6 +77,14 @@ extern void warn_on_slowpath(const char *file, const int line);
63 unlikely(__ret_warn_on); \ 77 unlikely(__ret_warn_on); \
64}) 78})
65#endif 79#endif
80
81#ifndef WARN
82#define WARN(condition, format...) ({ \
83 int __ret_warn_on = !!(condition); \
84 unlikely(__ret_warn_on); \
85})
86#endif
87
66#endif 88#endif
67 89
68#define WARN_ON_ONCE(condition) ({ \ 90#define WARN_ON_ONCE(condition) ({ \
@@ -75,6 +97,9 @@ extern void warn_on_slowpath(const char *file, const int line);
75 unlikely(__ret_warn_once); \ 97 unlikely(__ret_warn_once); \
76}) 98})
77 99
100#define WARN_ON_RATELIMIT(condition, state) \
101 WARN_ON((condition) && __ratelimit(state))
102
78#ifdef CONFIG_SMP 103#ifdef CONFIG_SMP
79# define WARN_ON_SMP(x) WARN_ON(x) 104# define WARN_ON_SMP(x) WARN_ON(x)
80#else 105#else
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index 6be061d09da9..a3034d20ebd5 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -3,7 +3,7 @@
3 3
4#include <linux/types.h> 4#include <linux/types.h>
5 5
6#ifdef CONFIG_HAVE_GPIO_LIB 6#ifdef CONFIG_GPIOLIB
7 7
8#include <linux/compiler.h> 8#include <linux/compiler.h>
9 9
@@ -32,6 +32,8 @@ struct module;
32/** 32/**
33 * struct gpio_chip - abstract a GPIO controller 33 * struct gpio_chip - abstract a GPIO controller
34 * @label: for diagnostics 34 * @label: for diagnostics
35 * @dev: optional device providing the GPIOs
36 * @owner: helps prevent removal of modules exporting active GPIOs
35 * @direction_input: configures signal "offset" as input, or returns error 37 * @direction_input: configures signal "offset" as input, or returns error
36 * @get: returns value for signal "offset"; for output signals this 38 * @get: returns value for signal "offset"; for output signals this
37 * returns either the value actually sensed, or zero 39 * returns either the value actually sensed, or zero
@@ -59,6 +61,7 @@ struct module;
59 */ 61 */
60struct gpio_chip { 62struct gpio_chip {
61 char *label; 63 char *label;
64 struct device *dev;
62 struct module *owner; 65 struct module *owner;
63 66
64 int (*direction_input)(struct gpio_chip *chip, 67 int (*direction_input)(struct gpio_chip *chip,
@@ -74,6 +77,7 @@ struct gpio_chip {
74 int base; 77 int base;
75 u16 ngpio; 78 u16 ngpio;
76 unsigned can_sleep:1; 79 unsigned can_sleep:1;
80 unsigned exported:1;
77}; 81};
78 82
79extern const char *gpiochip_is_requested(struct gpio_chip *chip, 83extern const char *gpiochip_is_requested(struct gpio_chip *chip,
@@ -108,7 +112,18 @@ extern void __gpio_set_value(unsigned gpio, int value);
108extern int __gpio_cansleep(unsigned gpio); 112extern int __gpio_cansleep(unsigned gpio);
109 113
110 114
111#else 115#ifdef CONFIG_GPIO_SYSFS
116
117/*
118 * A sysfs interface can be exported by individual drivers if they want,
119 * but more typically is configured entirely from userspace.
120 */
121extern int gpio_export(unsigned gpio, bool direction_may_change);
122extern void gpio_unexport(unsigned gpio);
123
124#endif /* CONFIG_GPIO_SYSFS */
125
126#else /* !CONFIG_HAVE_GPIO_LIB */
112 127
113static inline int gpio_is_valid(int number) 128static inline int gpio_is_valid(int number)
114{ 129{
@@ -137,6 +152,20 @@ static inline void gpio_set_value_cansleep(unsigned gpio, int value)
137 gpio_set_value(gpio, value); 152 gpio_set_value(gpio, value);
138} 153}
139 154
140#endif 155#endif /* !CONFIG_HAVE_GPIO_LIB */
156
157#ifndef CONFIG_GPIO_SYSFS
158
159/* sysfs support is only available with gpiolib, where it's optional */
160
161static inline int gpio_export(unsigned gpio, bool direction_may_change)
162{
163 return -ENOSYS;
164}
165
166static inline void gpio_unexport(unsigned gpio)
167{
168}
169#endif /* CONFIG_GPIO_SYSFS */
141 170
142#endif /* _ASM_GENERIC_GPIO_H */ 171#endif /* _ASM_GENERIC_GPIO_H */
diff --git a/include/asm-generic/int-ll64.h b/include/asm-generic/int-ll64.h
index 260948905e4e..f9bc9ac29b36 100644
--- a/include/asm-generic/int-ll64.h
+++ b/include/asm-generic/int-ll64.h
@@ -26,7 +26,7 @@ typedef unsigned int __u32;
26#ifdef __GNUC__ 26#ifdef __GNUC__
27__extension__ typedef __signed__ long long __s64; 27__extension__ typedef __signed__ long long __s64;
28__extension__ typedef unsigned long long __u64; 28__extension__ typedef unsigned long long __u64;
29#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L 29#else
30typedef __signed__ long long __s64; 30typedef __signed__ long long __s64;
31typedef unsigned long long __u64; 31typedef unsigned long long __u64;
32#endif 32#endif
diff --git a/include/asm-h8300/elf.h b/include/asm-h8300/elf.h
index 26bfc7e641da..a8b57d1f4128 100644
--- a/include/asm-h8300/elf.h
+++ b/include/asm-h8300/elf.h
@@ -26,10 +26,10 @@ typedef unsigned long elf_fpregset_t;
26#define ELF_DATA ELFDATA2MSB 26#define ELF_DATA ELFDATA2MSB
27#define ELF_ARCH EM_H8_300 27#define ELF_ARCH EM_H8_300
28#if defined(__H8300H__) 28#if defined(__H8300H__)
29#define ELF_FLAGS 0x810000 29#define ELF_CORE_EFLAGS 0x810000
30#endif 30#endif
31#if defined(__H8300S__) 31#if defined(__H8300S__)
32#define ELF_FLAGS 0x820000 32#define ELF_CORE_EFLAGS 0x820000
33#endif 33#endif
34 34
35#define ELF_PLAT_INIT(_r) _r->er1 = 0 35#define ELF_PLAT_INIT(_r) _r->er1 = 0
diff --git a/include/asm-h8300/thread_info.h b/include/asm-h8300/thread_info.h
index 27bb95e2944c..aafd4d322ec3 100644
--- a/include/asm-h8300/thread_info.h
+++ b/include/asm-h8300/thread_info.h
@@ -49,6 +49,7 @@ struct thread_info {
49/* 49/*
50 * Size of kernel stack for each process. This must be a power of 2... 50 * Size of kernel stack for each process. This must be a power of 2...
51 */ 51 */
52#define THREAD_SIZE_ORDER 1
52#define THREAD_SIZE 8192 /* 2 pages */ 53#define THREAD_SIZE 8192 /* 2 pages */
53 54
54 55
@@ -65,10 +66,6 @@ static inline struct thread_info *current_thread_info(void)
65 return ti; 66 return ti;
66} 67}
67 68
68/* thread information allocation */
69#define alloc_thread_info(tsk) ((struct thread_info *) \
70 __get_free_pages(GFP_KERNEL, 1))
71#define free_thread_info(ti) free_pages((unsigned long) (ti), 1)
72#endif /* __ASSEMBLY__ */ 69#endif /* __ASSEMBLY__ */
73 70
74/* 71/*
diff --git a/include/asm-ia64/thread_info.h b/include/asm-ia64/thread_info.h
index 2422ac61658a..7c60fcdd2efd 100644
--- a/include/asm-ia64/thread_info.h
+++ b/include/asm-ia64/thread_info.h
@@ -54,6 +54,8 @@ struct thread_info {
54 }, \ 54 }, \
55} 55}
56 56
57#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
58
57#ifndef ASM_OFFSETS_C 59#ifndef ASM_OFFSETS_C
58/* how to get the thread information struct from C */ 60/* how to get the thread information struct from C */
59#define current_thread_info() ((struct thread_info *) ((char *) current + IA64_TASK_SIZE)) 61#define current_thread_info() ((struct thread_info *) ((char *) current + IA64_TASK_SIZE))
diff --git a/include/asm-m32r/thread_info.h b/include/asm-m32r/thread_info.h
index 1effcd0f5e63..8589d462df27 100644
--- a/include/asm-m32r/thread_info.h
+++ b/include/asm-m32r/thread_info.h
@@ -94,6 +94,8 @@ static inline struct thread_info *current_thread_info(void)
94 return ti; 94 return ti;
95} 95}
96 96
97#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
98
97/* thread information allocation */ 99/* thread information allocation */
98#ifdef CONFIG_DEBUG_STACK_USAGE 100#ifdef CONFIG_DEBUG_STACK_USAGE
99#define alloc_thread_info(tsk) \ 101#define alloc_thread_info(tsk) \
diff --git a/include/asm-m68k/thread_info.h b/include/asm-m68k/thread_info.h
index d635a3752488..abc002798a2b 100644
--- a/include/asm-m68k/thread_info.h
+++ b/include/asm-m68k/thread_info.h
@@ -25,13 +25,7 @@ struct thread_info {
25} 25}
26 26
27/* THREAD_SIZE should be 8k, so handle differently for 4k and 8k machines */ 27/* THREAD_SIZE should be 8k, so handle differently for 4k and 8k machines */
28#if PAGE_SHIFT == 13 /* 8k machines */ 28#define THREAD_SIZE_ORDER (13 - PAGE_SHIFT)
29#define alloc_thread_info(tsk) ((struct thread_info *)__get_free_pages(GFP_KERNEL,0))
30#define free_thread_info(ti) free_pages((unsigned long)(ti),0)
31#else /* otherwise assume 4k pages */
32#define alloc_thread_info(tsk) ((struct thread_info *)__get_free_pages(GFP_KERNEL,1))
33#define free_thread_info(ti) free_pages((unsigned long)(ti),1)
34#endif /* PAGE_SHIFT == 13 */
35 29
36#define init_thread_info (init_task.thread.info) 30#define init_thread_info (init_task.thread.info)
37#define init_stack (init_thread_union.stack) 31#define init_stack (init_thread_union.stack)
diff --git a/include/asm-m68knommu/ptrace.h b/include/asm-m68knommu/ptrace.h
index 47258e86e8c4..8c9194b98548 100644
--- a/include/asm-m68knommu/ptrace.h
+++ b/include/asm-m68knommu/ptrace.h
@@ -68,10 +68,8 @@ struct switch_stack {
68/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */ 68/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
69#define PTRACE_GETREGS 12 69#define PTRACE_GETREGS 12
70#define PTRACE_SETREGS 13 70#define PTRACE_SETREGS 13
71#ifdef CONFIG_FPU
72#define PTRACE_GETFPREGS 14 71#define PTRACE_GETFPREGS 14
73#define PTRACE_SETFPREGS 15 72#define PTRACE_SETFPREGS 15
74#endif
75 73
76#ifdef __KERNEL__ 74#ifdef __KERNEL__
77 75
diff --git a/include/asm-m68knommu/thread_info.h b/include/asm-m68knommu/thread_info.h
index 95996d978bed..0c9bc095f3f0 100644
--- a/include/asm-m68knommu/thread_info.h
+++ b/include/asm-m68knommu/thread_info.h
@@ -71,10 +71,6 @@ static inline struct thread_info *current_thread_info(void)
71 return ti; 71 return ti;
72} 72}
73 73
74/* thread information allocation */
75#define alloc_thread_info(tsk) ((struct thread_info *) \
76 __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER))
77#define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_SIZE_ORDER)
78#endif /* __ASSEMBLY__ */ 74#endif /* __ASSEMBLY__ */
79 75
80#define PREEMPT_ACTIVE 0x4000000 76#define PREEMPT_ACTIVE 0x4000000
diff --git a/include/asm-mips/mach-generic/gpio.h b/include/asm-mips/mach-generic/gpio.h
index e6b376bd9d06..b4e70208da64 100644
--- a/include/asm-mips/mach-generic/gpio.h
+++ b/include/asm-mips/mach-generic/gpio.h
@@ -1,7 +1,7 @@
1#ifndef __ASM_MACH_GENERIC_GPIO_H 1#ifndef __ASM_MACH_GENERIC_GPIO_H
2#define __ASM_MACH_GENERIC_GPIO_H 2#define __ASM_MACH_GENERIC_GPIO_H
3 3
4#ifdef CONFIG_HAVE_GPIO_LIB 4#ifdef CONFIG_GPIOLIB
5#define gpio_get_value __gpio_get_value 5#define gpio_get_value __gpio_get_value
6#define gpio_set_value __gpio_set_value 6#define gpio_set_value __gpio_set_value
7#define gpio_cansleep __gpio_cansleep 7#define gpio_cansleep __gpio_cansleep
diff --git a/include/asm-mips/thread_info.h b/include/asm-mips/thread_info.h
index b2772df1a1bd..bb3060699df2 100644
--- a/include/asm-mips/thread_info.h
+++ b/include/asm-mips/thread_info.h
@@ -82,6 +82,8 @@ register struct thread_info *__current_thread_info __asm__("$28");
82#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) 82#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
83#define THREAD_MASK (THREAD_SIZE - 1UL) 83#define THREAD_MASK (THREAD_SIZE - 1UL)
84 84
85#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
86
85#ifdef CONFIG_DEBUG_STACK_USAGE 87#ifdef CONFIG_DEBUG_STACK_USAGE
86#define alloc_thread_info(tsk) \ 88#define alloc_thread_info(tsk) \
87({ \ 89({ \
diff --git a/include/asm-mn10300/ptrace.h b/include/asm-mn10300/ptrace.h
index b3684689fcce..7b06cc623d8b 100644
--- a/include/asm-mn10300/ptrace.h
+++ b/include/asm-mn10300/ptrace.h
@@ -88,12 +88,16 @@ extern struct pt_regs *__frame; /* current frame pointer */
88/* options set using PTRACE_SETOPTIONS */ 88/* options set using PTRACE_SETOPTIONS */
89#define PTRACE_O_TRACESYSGOOD 0x00000001 89#define PTRACE_O_TRACESYSGOOD 0x00000001
90 90
91#if defined(__KERNEL__) && !defined(__ASSEMBLY__) 91#if defined(__KERNEL__)
92
93#if !defined(__ASSEMBLY__)
92#define user_mode(regs) (((regs)->epsw & EPSW_nSL) == EPSW_nSL) 94#define user_mode(regs) (((regs)->epsw & EPSW_nSL) == EPSW_nSL)
93#define instruction_pointer(regs) ((regs)->pc) 95#define instruction_pointer(regs) ((regs)->pc)
94extern void show_regs(struct pt_regs *); 96extern void show_regs(struct pt_regs *);
95#endif 97#endif /* !__ASSEMBLY */
96 98
97#define profile_pc(regs) ((regs)->pc) 99#define profile_pc(regs) ((regs)->pc)
98 100
101#endif /* __KERNEL__ */
102
99#endif /* _ASM_PTRACE_H */ 103#endif /* _ASM_PTRACE_H */
diff --git a/include/asm-mn10300/thread_info.h b/include/asm-mn10300/thread_info.h
index e397e7192785..78a3881f3c12 100644
--- a/include/asm-mn10300/thread_info.h
+++ b/include/asm-mn10300/thread_info.h
@@ -112,6 +112,8 @@ static inline unsigned long current_stack_pointer(void)
112 return sp; 112 return sp;
113} 113}
114 114
115#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
116
115/* thread information allocation */ 117/* thread information allocation */
116#ifdef CONFIG_DEBUG_STACK_USAGE 118#ifdef CONFIG_DEBUG_STACK_USAGE
117#define alloc_thread_info(tsk) kzalloc(THREAD_SIZE, GFP_KERNEL) 119#define alloc_thread_info(tsk) kzalloc(THREAD_SIZE, GFP_KERNEL)
diff --git a/include/asm-parisc/ptrace.h b/include/asm-parisc/ptrace.h
index 93f990e418f1..3e94c5d85ff5 100644
--- a/include/asm-parisc/ptrace.h
+++ b/include/asm-parisc/ptrace.h
@@ -33,7 +33,6 @@ struct pt_regs {
33 unsigned long ipsw; /* CR22 */ 33 unsigned long ipsw; /* CR22 */
34}; 34};
35 35
36#define task_regs(task) ((struct pt_regs *) ((char *)(task) + TASK_REGS))
37/* 36/*
38 * The numbers chosen here are somewhat arbitrary but absolutely MUST 37 * The numbers chosen here are somewhat arbitrary but absolutely MUST
39 * not overlap with any of the number assigned in <linux/ptrace.h>. 38 * not overlap with any of the number assigned in <linux/ptrace.h>.
@@ -43,8 +42,11 @@ struct pt_regs {
43 * since we have taken branch traps too) 42 * since we have taken branch traps too)
44 */ 43 */
45#define PTRACE_SINGLEBLOCK 12 /* resume execution until next branch */ 44#define PTRACE_SINGLEBLOCK 12 /* resume execution until next branch */
45
46#ifdef __KERNEL__ 46#ifdef __KERNEL__
47 47
48#define task_regs(task) ((struct pt_regs *) ((char *)(task) + TASK_REGS))
49
48/* XXX should we use iaoq[1] or iaoq[0] ? */ 50/* XXX should we use iaoq[1] or iaoq[0] ? */
49#define user_mode(regs) (((regs)->iaoq[0] & 3) ? 1 : 0) 51#define user_mode(regs) (((regs)->iaoq[0] & 3) ? 1 : 0)
50#define user_space(regs) (((regs)->iasq[1] != 0) ? 1 : 0) 52#define user_space(regs) (((regs)->iasq[1] != 0) ? 1 : 0)
diff --git a/include/asm-parisc/thread_info.h b/include/asm-parisc/thread_info.h
index 2d9c7500867b..9f812741c355 100644
--- a/include/asm-parisc/thread_info.h
+++ b/include/asm-parisc/thread_info.h
@@ -34,15 +34,11 @@ struct thread_info {
34 34
35/* thread information allocation */ 35/* thread information allocation */
36 36
37#define THREAD_ORDER 2 37#define THREAD_SIZE_ORDER 2
38/* Be sure to hunt all references to this down when you change the size of 38/* Be sure to hunt all references to this down when you change the size of
39 * the kernel stack */ 39 * the kernel stack */
40#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) 40#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
41#define THREAD_SHIFT (PAGE_SHIFT + THREAD_ORDER) 41#define THREAD_SHIFT (PAGE_SHIFT + THREAD_SIZE_ORDER)
42
43#define alloc_thread_info(tsk) ((struct thread_info *) \
44 __get_free_pages(GFP_KERNEL, THREAD_ORDER))
45#define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_ORDER)
46 42
47/* how to get the thread information struct from C */ 43/* how to get the thread information struct from C */
48#define current_thread_info() ((struct thread_info *)mfctl(30)) 44#define current_thread_info() ((struct thread_info *)mfctl(30))
diff --git a/include/asm-powerpc/Kbuild b/include/asm-powerpc/Kbuild
index 04ce8f8a2ee7..5ab7d7fe198c 100644
--- a/include/asm-powerpc/Kbuild
+++ b/include/asm-powerpc/Kbuild
@@ -29,7 +29,6 @@ unifdef-y += elf.h
29unifdef-y += nvram.h 29unifdef-y += nvram.h
30unifdef-y += param.h 30unifdef-y += param.h
31unifdef-y += posix_types.h 31unifdef-y += posix_types.h
32unifdef-y += ptrace.h
33unifdef-y += seccomp.h 32unifdef-y += seccomp.h
34unifdef-y += signal.h 33unifdef-y += signal.h
35unifdef-y += spu_info.h 34unifdef-y += spu_info.h
diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h
index 2a3e9075a5a0..ef8a248dfd55 100644
--- a/include/asm-powerpc/cputable.h
+++ b/include/asm-powerpc/cputable.h
@@ -127,6 +127,8 @@ extern struct cpu_spec *identify_cpu(unsigned long offset, unsigned int pvr);
127extern void do_feature_fixups(unsigned long value, void *fixup_start, 127extern void do_feature_fixups(unsigned long value, void *fixup_start,
128 void *fixup_end); 128 void *fixup_end);
129 129
130extern const char *powerpc_base_platform;
131
130#endif /* __ASSEMBLY__ */ 132#endif /* __ASSEMBLY__ */
131 133
132/* CPU kernel features */ 134/* CPU kernel features */
diff --git a/include/asm-powerpc/elf.h b/include/asm-powerpc/elf.h
index 89664675b469..80d1f399ee51 100644
--- a/include/asm-powerpc/elf.h
+++ b/include/asm-powerpc/elf.h
@@ -217,6 +217,14 @@ typedef elf_vrregset_t elf_fpxregset_t;
217 217
218#define ELF_PLATFORM (cur_cpu_spec->platform) 218#define ELF_PLATFORM (cur_cpu_spec->platform)
219 219
220/* While ELF_PLATFORM indicates the ISA supported by the platform, it
221 * may not accurately reflect the underlying behavior of the hardware
222 * (as in the case of running in Power5+ compatibility mode on a
223 * Power6 machine). ELF_BASE_PLATFORM allows ld.so to load libraries
224 * that are tuned for the real hardware.
225 */
226#define ELF_BASE_PLATFORM (powerpc_base_platform)
227
220#ifdef __powerpc64__ 228#ifdef __powerpc64__
221# define ELF_PLAT_INIT(_r, load_addr) do { \ 229# define ELF_PLAT_INIT(_r, load_addr) do { \
222 _r->gpr[2] = load_addr; \ 230 _r->gpr[2] = load_addr; \
diff --git a/include/asm-powerpc/firmware.h b/include/asm-powerpc/firmware.h
index ef328995ba9d..3a179827528d 100644
--- a/include/asm-powerpc/firmware.h
+++ b/include/asm-powerpc/firmware.h
@@ -46,6 +46,7 @@
46#define FW_FEATURE_PS3_LV1 ASM_CONST(0x0000000000800000) 46#define FW_FEATURE_PS3_LV1 ASM_CONST(0x0000000000800000)
47#define FW_FEATURE_BEAT ASM_CONST(0x0000000001000000) 47#define FW_FEATURE_BEAT ASM_CONST(0x0000000001000000)
48#define FW_FEATURE_BULK_REMOVE ASM_CONST(0x0000000002000000) 48#define FW_FEATURE_BULK_REMOVE ASM_CONST(0x0000000002000000)
49#define FW_FEATURE_CMO ASM_CONST(0x0000000004000000)
49 50
50#ifndef __ASSEMBLY__ 51#ifndef __ASSEMBLY__
51 52
@@ -58,7 +59,7 @@ enum {
58 FW_FEATURE_MIGRATE | FW_FEATURE_PERFMON | FW_FEATURE_CRQ | 59 FW_FEATURE_MIGRATE | FW_FEATURE_PERFMON | FW_FEATURE_CRQ |
59 FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN | 60 FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN |
60 FW_FEATURE_BULK | FW_FEATURE_XDABR | FW_FEATURE_MULTITCE | 61 FW_FEATURE_BULK | FW_FEATURE_XDABR | FW_FEATURE_MULTITCE |
61 FW_FEATURE_SPLPAR | FW_FEATURE_LPAR, 62 FW_FEATURE_SPLPAR | FW_FEATURE_LPAR | FW_FEATURE_CMO,
62 FW_FEATURE_PSERIES_ALWAYS = 0, 63 FW_FEATURE_PSERIES_ALWAYS = 0,
63 FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES | FW_FEATURE_LPAR, 64 FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES | FW_FEATURE_LPAR,
64 FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES | FW_FEATURE_LPAR, 65 FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES | FW_FEATURE_LPAR,
diff --git a/include/asm-powerpc/gpio.h b/include/asm-powerpc/gpio.h
index 77ad3a890f30..ea04632399d8 100644
--- a/include/asm-powerpc/gpio.h
+++ b/include/asm-powerpc/gpio.h
@@ -17,7 +17,7 @@
17#include <linux/errno.h> 17#include <linux/errno.h>
18#include <asm-generic/gpio.h> 18#include <asm-generic/gpio.h>
19 19
20#ifdef CONFIG_HAVE_GPIO_LIB 20#ifdef CONFIG_GPIOLIB
21 21
22/* 22/*
23 * We don't (yet) implement inlined/rapid versions for on-chip gpios. 23 * We don't (yet) implement inlined/rapid versions for on-chip gpios.
@@ -51,6 +51,6 @@ static inline int irq_to_gpio(unsigned int irq)
51 return -EINVAL; 51 return -EINVAL;
52} 52}
53 53
54#endif /* CONFIG_HAVE_GPIO_LIB */ 54#endif /* CONFIG_GPIOLIB */
55 55
56#endif /* __ASM_POWERPC_GPIO_H */ 56#endif /* __ASM_POWERPC_GPIO_H */
diff --git a/include/asm-powerpc/hvcall.h b/include/asm-powerpc/hvcall.h
index bf6cd7cb996c..fbe2932fa9e9 100644
--- a/include/asm-powerpc/hvcall.h
+++ b/include/asm-powerpc/hvcall.h
@@ -92,6 +92,11 @@
92#define H_EXACT (1UL<<(63-24)) /* Use exact PTE or return H_PTEG_FULL */ 92#define H_EXACT (1UL<<(63-24)) /* Use exact PTE or return H_PTEG_FULL */
93#define H_R_XLATE (1UL<<(63-25)) /* include a valid logical page num in the pte if the valid bit is set */ 93#define H_R_XLATE (1UL<<(63-25)) /* include a valid logical page num in the pte if the valid bit is set */
94#define H_READ_4 (1UL<<(63-26)) /* Return 4 PTEs */ 94#define H_READ_4 (1UL<<(63-26)) /* Return 4 PTEs */
95#define H_PAGE_STATE_CHANGE (1UL<<(63-28))
96#define H_PAGE_UNUSED ((1UL<<(63-29)) | (1UL<<(63-30)))
97#define H_PAGE_SET_UNUSED (H_PAGE_STATE_CHANGE | H_PAGE_UNUSED)
98#define H_PAGE_SET_LOANED (H_PAGE_SET_UNUSED | (1UL<<(63-31)))
99#define H_PAGE_SET_ACTIVE H_PAGE_STATE_CHANGE
95#define H_AVPN (1UL<<(63-32)) /* An avpn is provided as a sanity test */ 100#define H_AVPN (1UL<<(63-32)) /* An avpn is provided as a sanity test */
96#define H_ANDCOND (1UL<<(63-33)) 101#define H_ANDCOND (1UL<<(63-33))
97#define H_ICACHE_INVALIDATE (1UL<<(63-40)) /* icbi, etc. (ignored for IO pages) */ 102#define H_ICACHE_INVALIDATE (1UL<<(63-40)) /* icbi, etc. (ignored for IO pages) */
@@ -210,7 +215,9 @@
210#define H_JOIN 0x298 215#define H_JOIN 0x298
211#define H_VASI_STATE 0x2A4 216#define H_VASI_STATE 0x2A4
212#define H_ENABLE_CRQ 0x2B0 217#define H_ENABLE_CRQ 0x2B0
213#define MAX_HCALL_OPCODE H_ENABLE_CRQ 218#define H_SET_MPP 0x2D0
219#define H_GET_MPP 0x2D4
220#define MAX_HCALL_OPCODE H_GET_MPP
214 221
215#ifndef __ASSEMBLY__ 222#ifndef __ASSEMBLY__
216 223
@@ -270,6 +277,20 @@ struct hcall_stats {
270}; 277};
271#define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1) 278#define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1)
272 279
280struct hvcall_mpp_data {
281 unsigned long entitled_mem;
282 unsigned long mapped_mem;
283 unsigned short group_num;
284 unsigned short pool_num;
285 unsigned char mem_weight;
286 unsigned char unallocated_mem_weight;
287 unsigned long unallocated_entitlement; /* value in bytes */
288 unsigned long pool_size;
289 signed long loan_request;
290 unsigned long backing_mem;
291};
292
293int h_get_mpp(struct hvcall_mpp_data *);
273#endif /* __ASSEMBLY__ */ 294#endif /* __ASSEMBLY__ */
274#endif /* __KERNEL__ */ 295#endif /* __KERNEL__ */
275#endif /* _ASM_POWERPC_HVCALL_H */ 296#endif /* _ASM_POWERPC_HVCALL_H */
diff --git a/include/asm-powerpc/lppaca.h b/include/asm-powerpc/lppaca.h
index 567ed92cd91f..2fe268b10333 100644
--- a/include/asm-powerpc/lppaca.h
+++ b/include/asm-powerpc/lppaca.h
@@ -125,7 +125,10 @@ struct lppaca {
125 // NOTE: This value will ALWAYS be zero for dedicated processors and 125 // NOTE: This value will ALWAYS be zero for dedicated processors and
126 // will NEVER be zero for shared processors (ie, initialized to a 1). 126 // will NEVER be zero for shared processors (ie, initialized to a 1).
127 volatile u32 yield_count; // PLIC increments each dispatchx00-x03 127 volatile u32 yield_count; // PLIC increments each dispatchx00-x03
128 u8 reserved6[124]; // Reserved x04-x7F 128 u32 reserved6;
129 volatile u64 cmo_faults; // CMO page fault count x08-x0F
130 volatile u64 cmo_fault_time; // CMO page fault time x10-x17
131 u8 reserved7[104]; // Reserved x18-x7F
129 132
130//============================================================================= 133//=============================================================================
131// CACHE_LINE_4-5 0x0180 - 0x027F Contains PMC interrupt data 134// CACHE_LINE_4-5 0x0180 - 0x027F Contains PMC interrupt data
diff --git a/include/asm-powerpc/machdep.h b/include/asm-powerpc/machdep.h
index 1233d735fd28..893aafd87fde 100644
--- a/include/asm-powerpc/machdep.h
+++ b/include/asm-powerpc/machdep.h
@@ -76,7 +76,7 @@ struct machdep_calls {
76 * destroyed as well */ 76 * destroyed as well */
77 void (*hpte_clear_all)(void); 77 void (*hpte_clear_all)(void);
78 78
79 void (*tce_build)(struct iommu_table * tbl, 79 int (*tce_build)(struct iommu_table *tbl,
80 long index, 80 long index,
81 long npages, 81 long npages,
82 unsigned long uaddr, 82 unsigned long uaddr,
diff --git a/include/asm-powerpc/mpc52xx_psc.h b/include/asm-powerpc/mpc52xx_psc.h
index 710c5d36efaa..8917ed630565 100644
--- a/include/asm-powerpc/mpc52xx_psc.h
+++ b/include/asm-powerpc/mpc52xx_psc.h
@@ -60,10 +60,12 @@
60#define MPC52xx_PSC_RXTX_FIFO_ALARM 0x0002 60#define MPC52xx_PSC_RXTX_FIFO_ALARM 0x0002
61#define MPC52xx_PSC_RXTX_FIFO_EMPTY 0x0001 61#define MPC52xx_PSC_RXTX_FIFO_EMPTY 0x0001
62 62
63/* PSC interrupt mask bits */ 63/* PSC interrupt status/mask bits */
64#define MPC52xx_PSC_IMR_TXRDY 0x0100 64#define MPC52xx_PSC_IMR_TXRDY 0x0100
65#define MPC52xx_PSC_IMR_RXRDY 0x0200 65#define MPC52xx_PSC_IMR_RXRDY 0x0200
66#define MPC52xx_PSC_IMR_DB 0x0400 66#define MPC52xx_PSC_IMR_DB 0x0400
67#define MPC52xx_PSC_IMR_TXEMP 0x0800
68#define MPC52xx_PSC_IMR_ORERR 0x1000
67#define MPC52xx_PSC_IMR_IPC 0x8000 69#define MPC52xx_PSC_IMR_IPC 0x8000
68 70
69/* PSC input port change bit */ 71/* PSC input port change bit */
@@ -92,6 +94,34 @@
92 94
93#define MPC52xx_PSC_RFNUM_MASK 0x01ff 95#define MPC52xx_PSC_RFNUM_MASK 0x01ff
94 96
97#define MPC52xx_PSC_SICR_DTS1 (1 << 29)
98#define MPC52xx_PSC_SICR_SHDR (1 << 28)
99#define MPC52xx_PSC_SICR_SIM_MASK (0xf << 24)
100#define MPC52xx_PSC_SICR_SIM_UART (0x0 << 24)
101#define MPC52xx_PSC_SICR_SIM_UART_DCD (0x8 << 24)
102#define MPC52xx_PSC_SICR_SIM_CODEC_8 (0x1 << 24)
103#define MPC52xx_PSC_SICR_SIM_CODEC_16 (0x2 << 24)
104#define MPC52xx_PSC_SICR_SIM_AC97 (0x3 << 24)
105#define MPC52xx_PSC_SICR_SIM_SIR (0x8 << 24)
106#define MPC52xx_PSC_SICR_SIM_SIR_DCD (0xc << 24)
107#define MPC52xx_PSC_SICR_SIM_MIR (0x5 << 24)
108#define MPC52xx_PSC_SICR_SIM_FIR (0x6 << 24)
109#define MPC52xx_PSC_SICR_SIM_CODEC_24 (0x7 << 24)
110#define MPC52xx_PSC_SICR_SIM_CODEC_32 (0xf << 24)
111#define MPC52xx_PSC_SICR_GENCLK (1 << 23)
112#define MPC52xx_PSC_SICR_I2S (1 << 22)
113#define MPC52xx_PSC_SICR_CLKPOL (1 << 21)
114#define MPC52xx_PSC_SICR_SYNCPOL (1 << 20)
115#define MPC52xx_PSC_SICR_CELLSLAVE (1 << 19)
116#define MPC52xx_PSC_SICR_CELL2XCLK (1 << 18)
117#define MPC52xx_PSC_SICR_ESAI (1 << 17)
118#define MPC52xx_PSC_SICR_ENAC97 (1 << 16)
119#define MPC52xx_PSC_SICR_SPI (1 << 15)
120#define MPC52xx_PSC_SICR_MSTR (1 << 14)
121#define MPC52xx_PSC_SICR_CPOL (1 << 13)
122#define MPC52xx_PSC_SICR_CPHA (1 << 12)
123#define MPC52xx_PSC_SICR_USEEOF (1 << 11)
124#define MPC52xx_PSC_SICR_DISABLEEOF (1 << 10)
95 125
96/* Structure of the hardware registers */ 126/* Structure of the hardware registers */
97struct mpc52xx_psc { 127struct mpc52xx_psc {
@@ -132,8 +162,12 @@ struct mpc52xx_psc {
132 u8 reserved5[3]; 162 u8 reserved5[3];
133 u8 ctlr; /* PSC + 0x1c */ 163 u8 ctlr; /* PSC + 0x1c */
134 u8 reserved6[3]; 164 u8 reserved6[3];
135 u16 ccr; /* PSC + 0x20 */ 165 /* BitClkDiv field of CCR is byte swapped in
136 u8 reserved7[14]; 166 * the hardware for mpc5200/b compatibility */
167 u32 ccr; /* PSC + 0x20 */
168 u32 ac97_slots; /* PSC + 0x24 */
169 u32 ac97_cmd; /* PSC + 0x28 */
170 u32 ac97_data; /* PSC + 0x2c */
137 u8 ivr; /* PSC + 0x30 */ 171 u8 ivr; /* PSC + 0x30 */
138 u8 reserved8[3]; 172 u8 reserved8[3];
139 u8 ip; /* PSC + 0x34 */ 173 u8 ip; /* PSC + 0x34 */
diff --git a/include/asm-powerpc/pgtable.h b/include/asm-powerpc/pgtable.h
index d18ffe7bc7c4..dbb8ca172e44 100644
--- a/include/asm-powerpc/pgtable.h
+++ b/include/asm-powerpc/pgtable.h
@@ -38,6 +38,19 @@ extern void paging_init(void);
38 remap_pfn_range(vma, vaddr, pfn, size, prot) 38 remap_pfn_range(vma, vaddr, pfn, size, prot)
39 39
40#include <asm-generic/pgtable.h> 40#include <asm-generic/pgtable.h>
41
42
43/*
44 * This gets called at the end of handling a page fault, when
45 * the kernel has put a new PTE into the page table for the process.
46 * We use it to ensure coherency between the i-cache and d-cache
47 * for the page which has just been mapped in.
48 * On machines which use an MMU hash table, we use this to put a
49 * corresponding HPTE into the hash table ahead of time, instead of
50 * waiting for the inevitable extra hash-table miss exception.
51 */
52extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
53
41#endif /* __ASSEMBLY__ */ 54#endif /* __ASSEMBLY__ */
42 55
43#endif /* __KERNEL__ */ 56#endif /* __KERNEL__ */
diff --git a/include/asm-powerpc/syscalls.h b/include/asm-powerpc/syscalls.h
index 2b8a458f990a..eb8eb400c664 100644
--- a/include/asm-powerpc/syscalls.h
+++ b/include/asm-powerpc/syscalls.h
@@ -31,6 +31,7 @@ asmlinkage int sys_vfork(unsigned long p1, unsigned long p2,
31 unsigned long p3, unsigned long p4, unsigned long p5, 31 unsigned long p3, unsigned long p4, unsigned long p5,
32 unsigned long p6, struct pt_regs *regs); 32 unsigned long p6, struct pt_regs *regs);
33asmlinkage long sys_pipe(int __user *fildes); 33asmlinkage long sys_pipe(int __user *fildes);
34asmlinkage long sys_pipe2(int __user *fildes, int flags);
34asmlinkage long sys_rt_sigaction(int sig, 35asmlinkage long sys_rt_sigaction(int sig,
35 const struct sigaction __user *act, 36 const struct sigaction __user *act,
36 struct sigaction __user *oact, size_t sigsetsize); 37 struct sigaction __user *oact, size_t sigsetsize);
diff --git a/include/asm-powerpc/systbl.h b/include/asm-powerpc/systbl.h
index ae7085c65692..e084272ed1c2 100644
--- a/include/asm-powerpc/systbl.h
+++ b/include/asm-powerpc/systbl.h
@@ -316,3 +316,9 @@ COMPAT_SYS(fallocate)
316SYSCALL(subpage_prot) 316SYSCALL(subpage_prot)
317COMPAT_SYS_SPU(timerfd_settime) 317COMPAT_SYS_SPU(timerfd_settime)
318COMPAT_SYS_SPU(timerfd_gettime) 318COMPAT_SYS_SPU(timerfd_gettime)
319COMPAT_SYS_SPU(signalfd4)
320SYSCALL_SPU(eventfd2)
321SYSCALL_SPU(epoll_create1)
322SYSCALL_SPU(dup3)
323SYSCALL_SPU(pipe2)
324SYSCALL(inotify_init1)
diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h
index e6e25e2364eb..d6648c143322 100644
--- a/include/asm-powerpc/system.h
+++ b/include/asm-powerpc/system.h
@@ -110,6 +110,8 @@ static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; }
110#endif 110#endif
111 111
112extern int set_dabr(unsigned long dabr); 112extern int set_dabr(unsigned long dabr);
113extern void do_dabr(struct pt_regs *regs, unsigned long address,
114 unsigned long error_code);
113extern void print_backtrace(unsigned long *); 115extern void print_backtrace(unsigned long *);
114extern void show_regs(struct pt_regs * regs); 116extern void show_regs(struct pt_regs * regs);
115extern void flush_instruction_cache(void); 117extern void flush_instruction_cache(void);
diff --git a/include/asm-powerpc/thread_info.h b/include/asm-powerpc/thread_info.h
index b705c2a7651a..a9db562df69a 100644
--- a/include/asm-powerpc/thread_info.h
+++ b/include/asm-powerpc/thread_info.h
@@ -66,20 +66,12 @@ struct thread_info {
66 66
67#if THREAD_SHIFT >= PAGE_SHIFT 67#if THREAD_SHIFT >= PAGE_SHIFT
68 68
69#define THREAD_ORDER (THREAD_SHIFT - PAGE_SHIFT) 69#define THREAD_SIZE_ORDER (THREAD_SHIFT - PAGE_SHIFT)
70
71#ifdef CONFIG_DEBUG_STACK_USAGE
72#define alloc_thread_info(tsk) \
73 ((struct thread_info *)__get_free_pages(GFP_KERNEL | \
74 __GFP_ZERO, THREAD_ORDER))
75#else
76#define alloc_thread_info(tsk) \
77 ((struct thread_info *)__get_free_pages(GFP_KERNEL, THREAD_ORDER))
78#endif
79#define free_thread_info(ti) free_pages((unsigned long)ti, THREAD_ORDER)
80 70
81#else /* THREAD_SHIFT < PAGE_SHIFT */ 71#else /* THREAD_SHIFT < PAGE_SHIFT */
82 72
73#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
74
83extern struct thread_info *alloc_thread_info(struct task_struct *tsk); 75extern struct thread_info *alloc_thread_info(struct task_struct *tsk);
84extern void free_thread_info(struct thread_info *ti); 76extern void free_thread_info(struct thread_info *ti);
85 77
diff --git a/include/asm-powerpc/tlbflush.h b/include/asm-powerpc/tlbflush.h
index 5c9108147644..361cd5c7a32b 100644
--- a/include/asm-powerpc/tlbflush.h
+++ b/include/asm-powerpc/tlbflush.h
@@ -162,16 +162,5 @@ extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
162 162
163#endif 163#endif
164 164
165/*
166 * This gets called at the end of handling a page fault, when
167 * the kernel has put a new PTE into the page table for the process.
168 * We use it to ensure coherency between the i-cache and d-cache
169 * for the page which has just been mapped in.
170 * On machines which use an MMU hash table, we use this to put a
171 * corresponding HPTE into the hash table ahead of time, instead of
172 * waiting for the inevitable extra hash-table miss exception.
173 */
174extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
175
176#endif /*__KERNEL__ */ 165#endif /*__KERNEL__ */
177#endif /* _ASM_POWERPC_TLBFLUSH_H */ 166#endif /* _ASM_POWERPC_TLBFLUSH_H */
diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h
index ce91bb662063..e07d0c76ed77 100644
--- a/include/asm-powerpc/unistd.h
+++ b/include/asm-powerpc/unistd.h
@@ -335,10 +335,16 @@
335#define __NR_subpage_prot 310 335#define __NR_subpage_prot 310
336#define __NR_timerfd_settime 311 336#define __NR_timerfd_settime 311
337#define __NR_timerfd_gettime 312 337#define __NR_timerfd_gettime 312
338#define __NR_signalfd4 313
339#define __NR_eventfd2 314
340#define __NR_epoll_create1 315
341#define __NR_dup3 316
342#define __NR_pipe2 317
343#define __NR_inotify_init1 318
338 344
339#ifdef __KERNEL__ 345#ifdef __KERNEL__
340 346
341#define __NR_syscalls 313 347#define __NR_syscalls 319
342 348
343#define __NR__exit __NR_exit 349#define __NR__exit __NR_exit
344#define NR_syscalls __NR_syscalls 350#define NR_syscalls __NR_syscalls
diff --git a/include/asm-powerpc/vio.h b/include/asm-powerpc/vio.h
index 56512a968dab..0a290a195946 100644
--- a/include/asm-powerpc/vio.h
+++ b/include/asm-powerpc/vio.h
@@ -39,16 +39,32 @@
39#define VIO_IRQ_DISABLE 0UL 39#define VIO_IRQ_DISABLE 0UL
40#define VIO_IRQ_ENABLE 1UL 40#define VIO_IRQ_ENABLE 1UL
41 41
42/*
43 * VIO CMO minimum entitlement for all devices and spare entitlement
44 */
45#define VIO_CMO_MIN_ENT 1562624
46
42struct iommu_table; 47struct iommu_table;
43 48
44/* 49/**
45 * The vio_dev structure is used to describe virtual I/O devices. 50 * vio_dev - This structure is used to describe virtual I/O devices.
51 *
52 * @desired: set from return of driver's get_desired_dma() function
53 * @entitled: bytes of IO data that has been reserved for this device.
54 * @allocated: bytes of IO data currently in use by the device.
55 * @allocs_failed: number of DMA failures due to insufficient entitlement.
46 */ 56 */
47struct vio_dev { 57struct vio_dev {
48 const char *name; 58 const char *name;
49 const char *type; 59 const char *type;
50 uint32_t unit_address; 60 uint32_t unit_address;
51 unsigned int irq; 61 unsigned int irq;
62 struct {
63 size_t desired;
64 size_t entitled;
65 size_t allocated;
66 atomic_t allocs_failed;
67 } cmo;
52 struct device dev; 68 struct device dev;
53}; 69};
54 70
@@ -56,12 +72,19 @@ struct vio_driver {
56 const struct vio_device_id *id_table; 72 const struct vio_device_id *id_table;
57 int (*probe)(struct vio_dev *dev, const struct vio_device_id *id); 73 int (*probe)(struct vio_dev *dev, const struct vio_device_id *id);
58 int (*remove)(struct vio_dev *dev); 74 int (*remove)(struct vio_dev *dev);
75 /* A driver must have a get_desired_dma() function to
76 * be loaded in a CMO environment if it uses DMA.
77 */
78 unsigned long (*get_desired_dma)(struct vio_dev *dev);
59 struct device_driver driver; 79 struct device_driver driver;
60}; 80};
61 81
62extern int vio_register_driver(struct vio_driver *drv); 82extern int vio_register_driver(struct vio_driver *drv);
63extern void vio_unregister_driver(struct vio_driver *drv); 83extern void vio_unregister_driver(struct vio_driver *drv);
64 84
85extern int vio_cmo_entitlement_update(size_t);
86extern void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired);
87
65extern void __devinit vio_unregister_device(struct vio_dev *dev); 88extern void __devinit vio_unregister_device(struct vio_dev *dev);
66 89
67struct device_node; 90struct device_node;
diff --git a/include/asm-s390/kvm_virtio.h b/include/asm-s390/kvm_virtio.h
index 5c871a990c29..146100224def 100644
--- a/include/asm-s390/kvm_virtio.h
+++ b/include/asm-s390/kvm_virtio.h
@@ -50,4 +50,14 @@ struct kvm_vqconfig {
50#define KVM_S390_VIRTIO_RESET 1 50#define KVM_S390_VIRTIO_RESET 1
51#define KVM_S390_VIRTIO_SET_STATUS 2 51#define KVM_S390_VIRTIO_SET_STATUS 2
52 52
53#ifdef __KERNEL__
54/* early virtio console setup */
55#ifdef CONFIG_VIRTIO_CONSOLE
56extern void s390_virtio_console_init(void);
57#else
58static inline void s390_virtio_console_init(void)
59{
60}
61#endif /* CONFIG_VIRTIO_CONSOLE */
62#endif /* __KERNEL__ */
53#endif 63#endif
diff --git a/include/asm-s390/thread_info.h b/include/asm-s390/thread_info.h
index 99bbed99a3b2..91a8f93ad355 100644
--- a/include/asm-s390/thread_info.h
+++ b/include/asm-s390/thread_info.h
@@ -78,10 +78,7 @@ static inline struct thread_info *current_thread_info(void)
78 return (struct thread_info *)((*(unsigned long *) __LC_KERNEL_STACK)-THREAD_SIZE); 78 return (struct thread_info *)((*(unsigned long *) __LC_KERNEL_STACK)-THREAD_SIZE);
79} 79}
80 80
81/* thread information allocation */ 81#define THREAD_SIZE_ORDER THREAD_ORDER
82#define alloc_thread_info(tsk) ((struct thread_info *) \
83 __get_free_pages(GFP_KERNEL,THREAD_ORDER))
84#define free_thread_info(ti) free_pages((unsigned long) (ti),THREAD_ORDER)
85 82
86#endif 83#endif
87 84
diff --git a/include/asm-sh/ptrace.h b/include/asm-sh/ptrace.h
index 8d6c92b3e770..7d36dc3bee69 100644
--- a/include/asm-sh/ptrace.h
+++ b/include/asm-sh/ptrace.h
@@ -5,7 +5,7 @@
5 * Copyright (C) 1999, 2000 Niibe Yutaka 5 * Copyright (C) 1999, 2000 Niibe Yutaka
6 * 6 *
7 */ 7 */
8#if defined(__SH5__) || defined(CONFIG_SUPERH64) 8#if defined(__SH5__)
9struct pt_regs { 9struct pt_regs {
10 unsigned long long pc; 10 unsigned long long pc;
11 unsigned long long sr; 11 unsigned long long sr;
diff --git a/include/asm-sh/thread_info.h b/include/asm-sh/thread_info.h
index c50e5d35fe84..5131e3907525 100644
--- a/include/asm-sh/thread_info.h
+++ b/include/asm-sh/thread_info.h
@@ -92,6 +92,8 @@ static inline struct thread_info *current_thread_info(void)
92 return ti; 92 return ti;
93} 93}
94 94
95#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
96
95/* thread information allocation */ 97/* thread information allocation */
96#ifdef CONFIG_DEBUG_STACK_USAGE 98#ifdef CONFIG_DEBUG_STACK_USAGE
97#define alloc_thread_info(ti) kzalloc(THREAD_SIZE, GFP_KERNEL) 99#define alloc_thread_info(ti) kzalloc(THREAD_SIZE, GFP_KERNEL)
diff --git a/include/asm-sparc/thread_info_32.h b/include/asm-sparc/thread_info_32.h
index 91b9f5888c85..2cf9db044055 100644
--- a/include/asm-sparc/thread_info_32.h
+++ b/include/asm-sparc/thread_info_32.h
@@ -86,6 +86,8 @@ register struct thread_info *current_thread_info_reg asm("g6");
86#define THREAD_INFO_ORDER 1 86#define THREAD_INFO_ORDER 1
87#endif 87#endif
88 88
89#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
90
89BTFIXUPDEF_CALL(struct thread_info *, alloc_thread_info, void) 91BTFIXUPDEF_CALL(struct thread_info *, alloc_thread_info, void)
90#define alloc_thread_info(tsk) BTFIXUP_CALL(alloc_thread_info)() 92#define alloc_thread_info(tsk) BTFIXUP_CALL(alloc_thread_info)()
91 93
diff --git a/include/asm-sparc/thread_info_64.h b/include/asm-sparc/thread_info_64.h
index c6d2e6c7f844..960969d5ad06 100644
--- a/include/asm-sparc/thread_info_64.h
+++ b/include/asm-sparc/thread_info_64.h
@@ -155,6 +155,8 @@ register struct thread_info *current_thread_info_reg asm("g6");
155#define __THREAD_INFO_ORDER 0 155#define __THREAD_INFO_ORDER 0
156#endif /* PAGE_SHIFT == 13 */ 156#endif /* PAGE_SHIFT == 13 */
157 157
158#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
159
158#ifdef CONFIG_DEBUG_STACK_USAGE 160#ifdef CONFIG_DEBUG_STACK_USAGE
159#define alloc_thread_info(tsk) \ 161#define alloc_thread_info(tsk) \
160({ \ 162({ \
diff --git a/include/asm-um/thread_info.h b/include/asm-um/thread_info.h
index 356b83e2c22e..e07e72846c7a 100644
--- a/include/asm-um/thread_info.h
+++ b/include/asm-um/thread_info.h
@@ -53,21 +53,7 @@ static inline struct thread_info *current_thread_info(void)
53 return ti; 53 return ti;
54} 54}
55 55
56#ifdef CONFIG_DEBUG_STACK_USAGE 56#define THREAD_SIZE_ORDER CONFIG_KERNEL_STACK_ORDER
57
58#define alloc_thread_info(tsk) \
59 ((struct thread_info *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, \
60 CONFIG_KERNEL_STACK_ORDER))
61#else
62
63/* thread information allocation */
64#define alloc_thread_info(tsk) \
65 ((struct thread_info *) __get_free_pages(GFP_KERNEL, \
66 CONFIG_KERNEL_STACK_ORDER))
67#endif
68
69#define free_thread_info(ti) \
70 free_pages((unsigned long)(ti),CONFIG_KERNEL_STACK_ORDER)
71 57
72#endif 58#endif
73 59
diff --git a/include/asm-x86/Kbuild b/include/asm-x86/Kbuild
index 811e9828ccb3..4a8e80cdcfa5 100644
--- a/include/asm-x86/Kbuild
+++ b/include/asm-x86/Kbuild
@@ -18,7 +18,6 @@ unifdef-y += msr.h
18unifdef-y += mtrr.h 18unifdef-y += mtrr.h
19unifdef-y += posix_types_32.h 19unifdef-y += posix_types_32.h
20unifdef-y += posix_types_64.h 20unifdef-y += posix_types_64.h
21unifdef-y += ptrace.h
22unifdef-y += unistd_32.h 21unifdef-y += unistd_32.h
23unifdef-y += unistd_64.h 22unifdef-y += unistd_64.h
24unifdef-y += vm86.h 23unifdef-y += vm86.h
diff --git a/include/asm-x86/gpio.h b/include/asm-x86/gpio.h
index ff87fca0caf9..116e9147fe66 100644
--- a/include/asm-x86/gpio.h
+++ b/include/asm-x86/gpio.h
@@ -1,6 +1,62 @@
1/*
2 * Generic GPIO API implementation for x86.
3 *
4 * Derived from the generic GPIO API for powerpc:
5 *
6 * Copyright (c) 2007-2008 MontaVista Software, Inc.
7 *
8 * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 */
15
1#ifndef _ASM_I386_GPIO_H 16#ifndef _ASM_I386_GPIO_H
2#define _ASM_I386_GPIO_H 17#define _ASM_I386_GPIO_H
3 18
19#ifdef CONFIG_X86_RDC321X
4#include <gpio.h> 20#include <gpio.h>
21#else /* CONFIG_X86_RDC321X */
22
23#include <asm-generic/gpio.h>
24
25#ifdef CONFIG_GPIOLIB
26
27/*
28 * Just call gpiolib.
29 */
30static inline int gpio_get_value(unsigned int gpio)
31{
32 return __gpio_get_value(gpio);
33}
34
35static inline void gpio_set_value(unsigned int gpio, int value)
36{
37 __gpio_set_value(gpio, value);
38}
39
40static inline int gpio_cansleep(unsigned int gpio)
41{
42 return __gpio_cansleep(gpio);
43}
44
45/*
46 * Not implemented, yet.
47 */
48static inline int gpio_to_irq(unsigned int gpio)
49{
50 return -ENOSYS;
51}
52
53static inline int irq_to_gpio(unsigned int irq)
54{
55 return -EINVAL;
56}
57
58#endif /* CONFIG_GPIOLIB */
59
60#endif /* CONFIG_X86_RDC321X */
5 61
6#endif /* _ASM_I386_GPIO_H */ 62#endif /* _ASM_I386_GPIO_H */
diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index 3f2de1050988..da0a675adf94 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -152,6 +152,8 @@ struct thread_info {
152#define THREAD_FLAGS GFP_KERNEL 152#define THREAD_FLAGS GFP_KERNEL
153#endif 153#endif
154 154
155#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
156
155#define alloc_thread_info(tsk) \ 157#define alloc_thread_info(tsk) \
156 ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER)) 158 ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER))
157 159
diff --git a/include/asm-xtensa/ptrace.h b/include/asm-xtensa/ptrace.h
index 422c73e26937..089b0db44816 100644
--- a/include/asm-xtensa/ptrace.h
+++ b/include/asm-xtensa/ptrace.h
@@ -73,10 +73,10 @@
73#define PTRACE_GETXTREGS 18 73#define PTRACE_GETXTREGS 18
74#define PTRACE_SETXTREGS 19 74#define PTRACE_SETXTREGS 19
75 75
76#ifndef __ASSEMBLY__
77
78#ifdef __KERNEL__ 76#ifdef __KERNEL__
79 77
78#ifndef __ASSEMBLY__
79
80/* 80/*
81 * This struct defines the way the registers are stored on the 81 * This struct defines the way the registers are stored on the
82 * kernel stack during a system call or other kernel entry. 82 * kernel stack during a system call or other kernel entry.
@@ -122,14 +122,14 @@ extern void show_regs(struct pt_regs *);
122# ifndef CONFIG_SMP 122# ifndef CONFIG_SMP
123# define profile_pc(regs) instruction_pointer(regs) 123# define profile_pc(regs) instruction_pointer(regs)
124# endif 124# endif
125#endif /* __KERNEL__ */
126 125
127#else /* __ASSEMBLY__ */ 126#else /* __ASSEMBLY__ */
128 127
129#ifdef __KERNEL__
130# include <asm/asm-offsets.h> 128# include <asm/asm-offsets.h>
131#define PT_REGS_OFFSET (KERNEL_STACK_SIZE - PT_USER_SIZE) 129#define PT_REGS_OFFSET (KERNEL_STACK_SIZE - PT_USER_SIZE)
132#endif
133 130
134#endif /* !__ASSEMBLY__ */ 131#endif /* !__ASSEMBLY__ */
132
133#endif /* __KERNEL__ */
134
135#endif /* _XTENSA_PTRACE_H */ 135#endif /* _XTENSA_PTRACE_H */
diff --git a/include/asm-xtensa/thread_info.h b/include/asm-xtensa/thread_info.h
index a2c640682ed9..7e4131dd546c 100644
--- a/include/asm-xtensa/thread_info.h
+++ b/include/asm-xtensa/thread_info.h
@@ -111,10 +111,6 @@ static inline struct thread_info *current_thread_info(void)
111 return ti; 111 return ti;
112} 112}
113 113
114/* thread information allocation */
115#define alloc_thread_info(tsk) ((struct thread_info *) __get_free_pages(GFP_KERNEL,1))
116#define free_thread_info(ti) free_pages((unsigned long) (ti), 1)
117
118#else /* !__ASSEMBLY__ */ 114#else /* !__ASSEMBLY__ */
119 115
120/* how to get the thread information struct from ASM */ 116/* how to get the thread information struct from ASM */
@@ -160,6 +156,7 @@ static inline struct thread_info *current_thread_info(void)
160#define TS_USEDFPU 0x0001 /* FPU was used by this task this quantum (SMP) */ 156#define TS_USEDFPU 0x0001 /* FPU was used by this task this quantum (SMP) */
161 157
162#define THREAD_SIZE 8192 //(2*PAGE_SIZE) 158#define THREAD_SIZE 8192 //(2*PAGE_SIZE)
159#define THREAD_SIZE_ORDER 1
163 160
164#endif /* __KERNEL__ */ 161#endif /* __KERNEL__ */
165#endif /* _XTENSA_THREAD_INFO */ 162#endif /* _XTENSA_THREAD_INFO */
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 402c8f55d713..4c4142c5aa6e 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -189,7 +189,6 @@ unifdef-y += connector.h
189unifdef-y += cuda.h 189unifdef-y += cuda.h
190unifdef-y += cyclades.h 190unifdef-y += cyclades.h
191unifdef-y += dccp.h 191unifdef-y += dccp.h
192unifdef-y += dirent.h
193unifdef-y += dlm.h 192unifdef-y += dlm.h
194unifdef-y += dlm_plock.h 193unifdef-y += dlm_plock.h
195unifdef-y += edd.h 194unifdef-y += edd.h
diff --git a/include/linux/acct.h b/include/linux/acct.h
index e8cae54e8d88..882dc7248766 100644
--- a/include/linux/acct.h
+++ b/include/linux/acct.h
@@ -120,17 +120,20 @@ struct acct_v3
120struct vfsmount; 120struct vfsmount;
121struct super_block; 121struct super_block;
122struct pacct_struct; 122struct pacct_struct;
123struct pid_namespace;
123extern void acct_auto_close_mnt(struct vfsmount *m); 124extern void acct_auto_close_mnt(struct vfsmount *m);
124extern void acct_auto_close(struct super_block *sb); 125extern void acct_auto_close(struct super_block *sb);
125extern void acct_init_pacct(struct pacct_struct *pacct); 126extern void acct_init_pacct(struct pacct_struct *pacct);
126extern void acct_collect(long exitcode, int group_dead); 127extern void acct_collect(long exitcode, int group_dead);
127extern void acct_process(void); 128extern void acct_process(void);
129extern void acct_exit_ns(struct pid_namespace *);
128#else 130#else
129#define acct_auto_close_mnt(x) do { } while (0) 131#define acct_auto_close_mnt(x) do { } while (0)
130#define acct_auto_close(x) do { } while (0) 132#define acct_auto_close(x) do { } while (0)
131#define acct_init_pacct(x) do { } while (0) 133#define acct_init_pacct(x) do { } while (0)
132#define acct_collect(x,y) do { } while (0) 134#define acct_collect(x,y) do { } while (0)
133#define acct_process() do { } while (0) 135#define acct_process() do { } while (0)
136#define acct_exit_ns(ns) do { } while (0)
134#endif 137#endif
135 138
136/* 139/*
diff --git a/include/linux/auxvec.h b/include/linux/auxvec.h
index 0da17d14fd13..d7afa9dd6635 100644
--- a/include/linux/auxvec.h
+++ b/include/linux/auxvec.h
@@ -26,9 +26,13 @@
26 26
27#define AT_SECURE 23 /* secure mode boolean */ 27#define AT_SECURE 23 /* secure mode boolean */
28 28
29#define AT_BASE_PLATFORM 24 /* string identifying real platform, may
30 * differ from AT_PLATFORM. */
31
29#define AT_EXECFN 31 /* filename of program */ 32#define AT_EXECFN 31 /* filename of program */
33
30#ifdef __KERNEL__ 34#ifdef __KERNEL__
31#define AT_VECTOR_SIZE_BASE 17 /* NEW_AUX_ENT entries in auxiliary table */ 35#define AT_VECTOR_SIZE_BASE 18 /* NEW_AUX_ENT entries in auxiliary table */
32 /* number of "#define AT_.*" above, minus {AT_NULL, AT_IGNORE, AT_NOTELF} */ 36 /* number of "#define AT_.*" above, minus {AT_NULL, AT_IGNORE, AT_NOTELF} */
33#endif 37#endif
34 38
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 4ddf2922fc8d..652470b687c9 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -103,17 +103,16 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
103 __alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) 103 __alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
104#define alloc_bootmem_low_pages(x) \ 104#define alloc_bootmem_low_pages(x) \
105 __alloc_bootmem_low(x, PAGE_SIZE, 0) 105 __alloc_bootmem_low(x, PAGE_SIZE, 0)
106#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
107
108extern int reserve_bootmem_generic(unsigned long addr, unsigned long size,
109 int flags);
110
111#define alloc_bootmem_node(pgdat, x) \ 106#define alloc_bootmem_node(pgdat, x) \
112 __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) 107 __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
113#define alloc_bootmem_pages_node(pgdat, x) \ 108#define alloc_bootmem_pages_node(pgdat, x) \
114 __alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) 109 __alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
115#define alloc_bootmem_low_pages_node(pgdat, x) \ 110#define alloc_bootmem_low_pages_node(pgdat, x) \
116 __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0) 111 __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
112#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
113
114extern int reserve_bootmem_generic(unsigned long addr, unsigned long size,
115 int flags);
117 116
118extern void *alloc_bootmem_section(unsigned long size, 117extern void *alloc_bootmem_section(unsigned long size,
119 unsigned long section_nr); 118 unsigned long section_nr);
diff --git a/include/linux/byteorder/big_endian.h b/include/linux/byteorder/big_endian.h
index 961ed4b48d8e..44f95b92393b 100644
--- a/include/linux/byteorder/big_endian.h
+++ b/include/linux/byteorder/big_endian.h
@@ -94,12 +94,12 @@ static inline __u16 __be16_to_cpup(const __be16 *p)
94#define __le32_to_cpus(x) __swab32s((x)) 94#define __le32_to_cpus(x) __swab32s((x))
95#define __cpu_to_le16s(x) __swab16s((x)) 95#define __cpu_to_le16s(x) __swab16s((x))
96#define __le16_to_cpus(x) __swab16s((x)) 96#define __le16_to_cpus(x) __swab16s((x))
97#define __cpu_to_be64s(x) do {} while (0) 97#define __cpu_to_be64s(x) do { (void)(x); } while (0)
98#define __be64_to_cpus(x) do {} while (0) 98#define __be64_to_cpus(x) do { (void)(x); } while (0)
99#define __cpu_to_be32s(x) do {} while (0) 99#define __cpu_to_be32s(x) do { (void)(x); } while (0)
100#define __be32_to_cpus(x) do {} while (0) 100#define __be32_to_cpus(x) do { (void)(x); } while (0)
101#define __cpu_to_be16s(x) do {} while (0) 101#define __cpu_to_be16s(x) do { (void)(x); } while (0)
102#define __be16_to_cpus(x) do {} while (0) 102#define __be16_to_cpus(x) do { (void)(x); } while (0)
103 103
104#ifdef __KERNEL__ 104#ifdef __KERNEL__
105#include <linux/byteorder/generic.h> 105#include <linux/byteorder/generic.h>
diff --git a/include/linux/byteorder/little_endian.h b/include/linux/byteorder/little_endian.h
index 05dc7c35b3b2..4cc170a31762 100644
--- a/include/linux/byteorder/little_endian.h
+++ b/include/linux/byteorder/little_endian.h
@@ -88,12 +88,12 @@ static inline __u16 __be16_to_cpup(const __be16 *p)
88{ 88{
89 return __swab16p((__u16 *)p); 89 return __swab16p((__u16 *)p);
90} 90}
91#define __cpu_to_le64s(x) do {} while (0) 91#define __cpu_to_le64s(x) do { (void)(x); } while (0)
92#define __le64_to_cpus(x) do {} while (0) 92#define __le64_to_cpus(x) do { (void)(x); } while (0)
93#define __cpu_to_le32s(x) do {} while (0) 93#define __cpu_to_le32s(x) do { (void)(x); } while (0)
94#define __le32_to_cpus(x) do {} while (0) 94#define __le32_to_cpus(x) do { (void)(x); } while (0)
95#define __cpu_to_le16s(x) do {} while (0) 95#define __cpu_to_le16s(x) do { (void)(x); } while (0)
96#define __le16_to_cpus(x) do {} while (0) 96#define __le16_to_cpus(x) do { (void)(x); } while (0)
97#define __cpu_to_be64s(x) __swab64s((x)) 97#define __cpu_to_be64s(x) __swab64s((x))
98#define __be64_to_cpus(x) __swab64s((x)) 98#define __be64_to_cpus(x) __swab64s((x))
99#define __cpu_to_be32s(x) __swab32s((x)) 99#define __cpu_to_be32s(x) __swab32s((x))
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index e155aa78d859..c98dd7cb7076 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -21,11 +21,13 @@
21struct cgroupfs_root; 21struct cgroupfs_root;
22struct cgroup_subsys; 22struct cgroup_subsys;
23struct inode; 23struct inode;
24struct cgroup;
24 25
25extern int cgroup_init_early(void); 26extern int cgroup_init_early(void);
26extern int cgroup_init(void); 27extern int cgroup_init(void);
27extern void cgroup_init_smp(void); 28extern void cgroup_init_smp(void);
28extern void cgroup_lock(void); 29extern void cgroup_lock(void);
30extern bool cgroup_lock_live_group(struct cgroup *cgrp);
29extern void cgroup_unlock(void); 31extern void cgroup_unlock(void);
30extern void cgroup_fork(struct task_struct *p); 32extern void cgroup_fork(struct task_struct *p);
31extern void cgroup_fork_callbacks(struct task_struct *p); 33extern void cgroup_fork_callbacks(struct task_struct *p);
@@ -205,50 +207,64 @@ struct cftype {
205 * subsystem, followed by a period */ 207 * subsystem, followed by a period */
206 char name[MAX_CFTYPE_NAME]; 208 char name[MAX_CFTYPE_NAME];
207 int private; 209 int private;
208 int (*open) (struct inode *inode, struct file *file); 210
209 ssize_t (*read) (struct cgroup *cgrp, struct cftype *cft, 211 /*
210 struct file *file, 212 * If non-zero, defines the maximum length of string that can
211 char __user *buf, size_t nbytes, loff_t *ppos); 213 * be passed to write_string; defaults to 64
214 */
215 size_t max_write_len;
216
217 int (*open)(struct inode *inode, struct file *file);
218 ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
219 struct file *file,
220 char __user *buf, size_t nbytes, loff_t *ppos);
212 /* 221 /*
213 * read_u64() is a shortcut for the common case of returning a 222 * read_u64() is a shortcut for the common case of returning a
214 * single integer. Use it in place of read() 223 * single integer. Use it in place of read()
215 */ 224 */
216 u64 (*read_u64) (struct cgroup *cgrp, struct cftype *cft); 225 u64 (*read_u64)(struct cgroup *cgrp, struct cftype *cft);
217 /* 226 /*
218 * read_s64() is a signed version of read_u64() 227 * read_s64() is a signed version of read_u64()
219 */ 228 */
220 s64 (*read_s64) (struct cgroup *cgrp, struct cftype *cft); 229 s64 (*read_s64)(struct cgroup *cgrp, struct cftype *cft);
221 /* 230 /*
222 * read_map() is used for defining a map of key/value 231 * read_map() is used for defining a map of key/value
223 * pairs. It should call cb->fill(cb, key, value) for each 232 * pairs. It should call cb->fill(cb, key, value) for each
224 * entry. The key/value pairs (and their ordering) should not 233 * entry. The key/value pairs (and their ordering) should not
225 * change between reboots. 234 * change between reboots.
226 */ 235 */
227 int (*read_map) (struct cgroup *cont, struct cftype *cft, 236 int (*read_map)(struct cgroup *cont, struct cftype *cft,
228 struct cgroup_map_cb *cb); 237 struct cgroup_map_cb *cb);
229 /* 238 /*
230 * read_seq_string() is used for outputting a simple sequence 239 * read_seq_string() is used for outputting a simple sequence
231 * using seqfile. 240 * using seqfile.
232 */ 241 */
233 int (*read_seq_string) (struct cgroup *cont, struct cftype *cft, 242 int (*read_seq_string)(struct cgroup *cont, struct cftype *cft,
234 struct seq_file *m); 243 struct seq_file *m);
235 244
236 ssize_t (*write) (struct cgroup *cgrp, struct cftype *cft, 245 ssize_t (*write)(struct cgroup *cgrp, struct cftype *cft,
237 struct file *file, 246 struct file *file,
238 const char __user *buf, size_t nbytes, loff_t *ppos); 247 const char __user *buf, size_t nbytes, loff_t *ppos);
239 248
240 /* 249 /*
241 * write_u64() is a shortcut for the common case of accepting 250 * write_u64() is a shortcut for the common case of accepting
242 * a single integer (as parsed by simple_strtoull) from 251 * a single integer (as parsed by simple_strtoull) from
243 * userspace. Use in place of write(); return 0 or error. 252 * userspace. Use in place of write(); return 0 or error.
244 */ 253 */
245 int (*write_u64) (struct cgroup *cgrp, struct cftype *cft, u64 val); 254 int (*write_u64)(struct cgroup *cgrp, struct cftype *cft, u64 val);
246 /* 255 /*
247 * write_s64() is a signed version of write_u64() 256 * write_s64() is a signed version of write_u64()
248 */ 257 */
249 int (*write_s64) (struct cgroup *cgrp, struct cftype *cft, s64 val); 258 int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val);
250 259
251 /* 260 /*
261 * write_string() is passed a nul-terminated kernelspace
262 * buffer of maximum length determined by max_write_len.
263 * Returns 0 or -ve error code.
264 */
265 int (*write_string)(struct cgroup *cgrp, struct cftype *cft,
266 const char *buffer);
267 /*
252 * trigger() callback can be used to get some kick from the 268 * trigger() callback can be used to get some kick from the
253 * userspace, when the actual string written is not important 269 * userspace, when the actual string written is not important
254 * at all. The private field can be used to determine the 270 * at all. The private field can be used to determine the
@@ -256,7 +272,7 @@ struct cftype {
256 */ 272 */
257 int (*trigger)(struct cgroup *cgrp, unsigned int event); 273 int (*trigger)(struct cgroup *cgrp, unsigned int event);
258 274
259 int (*release) (struct inode *inode, struct file *file); 275 int (*release)(struct inode *inode, struct file *file);
260}; 276};
261 277
262struct cgroup_scanner { 278struct cgroup_scanner {
@@ -348,7 +364,8 @@ static inline struct cgroup* task_cgroup(struct task_struct *task,
348 return task_subsys_state(task, subsys_id)->cgroup; 364 return task_subsys_state(task, subsys_id)->cgroup;
349} 365}
350 366
351int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss); 367int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss,
368 char *nodename);
352 369
353/* A cgroup_iter should be treated as an opaque object */ 370/* A cgroup_iter should be treated as an opaque object */
354struct cgroup_iter { 371struct cgroup_iter {
diff --git a/include/linux/coda.h b/include/linux/coda.h
index b5cf0780c51a..96c87693800b 100644
--- a/include/linux/coda.h
+++ b/include/linux/coda.h
@@ -199,28 +199,6 @@ typedef u_int32_t vuid_t;
199typedef u_int32_t vgid_t; 199typedef u_int32_t vgid_t;
200#endif /*_VUID_T_ */ 200#endif /*_VUID_T_ */
201 201
202#ifdef CONFIG_CODA_FS_OLD_API
203struct CodaFid {
204 u_int32_t opaque[3];
205};
206
207static __inline__ ino_t coda_f2i(struct CodaFid *fid)
208{
209 if ( ! fid )
210 return 0;
211 if (fid->opaque[1] == 0xfffffffe || fid->opaque[1] == 0xffffffff)
212 return ((fid->opaque[0] << 20) | (fid->opaque[2] & 0xfffff));
213 else
214 return (fid->opaque[2] + (fid->opaque[1]<<10) + (fid->opaque[0]<<20));
215}
216
217struct coda_cred {
218 vuid_t cr_uid, cr_euid, cr_suid, cr_fsuid; /* Real, efftve, set, fs uid*/
219 vgid_t cr_groupid, cr_egid, cr_sgid, cr_fsgid; /* same for groups */
220};
221
222#else /* not defined(CONFIG_CODA_FS_OLD_API) */
223
224struct CodaFid { 202struct CodaFid {
225 u_int32_t opaque[4]; 203 u_int32_t opaque[4];
226}; 204};
@@ -228,8 +206,6 @@ struct CodaFid {
228#define coda_f2i(fid)\ 206#define coda_f2i(fid)\
229 (fid ? (fid->opaque[3] ^ (fid->opaque[2]<<10) ^ (fid->opaque[1]<<20) ^ fid->opaque[0]) : 0) 207 (fid ? (fid->opaque[3] ^ (fid->opaque[2]<<10) ^ (fid->opaque[1]<<20) ^ fid->opaque[0]) : 0)
230 208
231#endif
232
233#ifndef _VENUS_VATTR_T_ 209#ifndef _VENUS_VATTR_T_
234#define _VENUS_VATTR_T_ 210#define _VENUS_VATTR_T_
235/* 211/*
@@ -313,15 +289,7 @@ struct coda_statfs {
313 289
314#define CIOC_KERNEL_VERSION _IOWR('c', 10, size_t) 290#define CIOC_KERNEL_VERSION _IOWR('c', 10, size_t)
315 291
316#if 0
317#define CODA_KERNEL_VERSION 0 /* don't care about kernel version number */
318#define CODA_KERNEL_VERSION 1 /* The old venus 4.6 compatible interface */
319#endif
320#ifdef CONFIG_CODA_FS_OLD_API
321#define CODA_KERNEL_VERSION 2 /* venus_lookup got an extra parameter */
322#else
323#define CODA_KERNEL_VERSION 3 /* 128-bit file identifiers */ 292#define CODA_KERNEL_VERSION 3 /* 128-bit file identifiers */
324#endif
325 293
326/* 294/*
327 * Venus <-> Coda RPC arguments 295 * Venus <-> Coda RPC arguments
@@ -329,16 +297,9 @@ struct coda_statfs {
329struct coda_in_hdr { 297struct coda_in_hdr {
330 u_int32_t opcode; 298 u_int32_t opcode;
331 u_int32_t unique; /* Keep multiple outstanding msgs distinct */ 299 u_int32_t unique; /* Keep multiple outstanding msgs distinct */
332#ifdef CONFIG_CODA_FS_OLD_API
333 u_int16_t pid; /* Common to all */
334 u_int16_t pgid; /* Common to all */
335 u_int16_t sid; /* Common to all */
336 struct coda_cred cred; /* Common to all */
337#else
338 pid_t pid; 300 pid_t pid;
339 pid_t pgid; 301 pid_t pgid;
340 vuid_t uid; 302 vuid_t uid;
341#endif
342}; 303};
343 304
344/* Really important that opcode and unique are 1st two fields! */ 305/* Really important that opcode and unique are 1st two fields! */
@@ -613,11 +574,7 @@ struct coda_vget_out {
613/* CODA_PURGEUSER is a venus->kernel call */ 574/* CODA_PURGEUSER is a venus->kernel call */
614struct coda_purgeuser_out { 575struct coda_purgeuser_out {
615 struct coda_out_hdr oh; 576 struct coda_out_hdr oh;
616#ifdef CONFIG_CODA_FS_OLD_API
617 struct coda_cred cred;
618#else
619 vuid_t uid; 577 vuid_t uid;
620#endif
621}; 578};
622 579
623/* coda_zapfile: */ 580/* coda_zapfile: */
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 7464ba3b4333..d7faf8808497 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -69,10 +69,11 @@ static inline void unregister_cpu_notifier(struct notifier_block *nb)
69#endif 69#endif
70 70
71int cpu_up(unsigned int cpu); 71int cpu_up(unsigned int cpu);
72
73extern void cpu_hotplug_init(void); 72extern void cpu_hotplug_init(void);
73extern void cpu_maps_update_begin(void);
74extern void cpu_maps_update_done(void);
74 75
75#else 76#else /* CONFIG_SMP */
76 77
77static inline int register_cpu_notifier(struct notifier_block *nb) 78static inline int register_cpu_notifier(struct notifier_block *nb)
78{ 79{
@@ -87,10 +88,16 @@ static inline void cpu_hotplug_init(void)
87{ 88{
88} 89}
89 90
91static inline void cpu_maps_update_begin(void)
92{
93}
94
95static inline void cpu_maps_update_done(void)
96{
97}
98
90#endif /* CONFIG_SMP */ 99#endif /* CONFIG_SMP */
91extern struct sysdev_class cpu_sysdev_class; 100extern struct sysdev_class cpu_sysdev_class;
92extern void cpu_maps_update_begin(void);
93extern void cpu_maps_update_done(void);
94 101
95#ifdef CONFIG_HOTPLUG_CPU 102#ifdef CONFIG_HOTPLUG_CPU
96/* Stop CPUs going up and down. */ 103/* Stop CPUs going up and down. */
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 22c7ac5cd80c..6cd39a927e1f 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -22,5 +22,13 @@ extern struct proc_dir_entry *proc_vmcore;
22 22
23#define vmcore_elf_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x)) 23#define vmcore_elf_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x))
24 24
25static inline int is_kdump_kernel(void)
26{
27 return (elfcorehdr_addr != ELFCORE_ADDR_MAX) ? 1 : 0;
28}
29#else /* !CONFIG_CRASH_DUMP */
30static inline int is_kdump_kernel(void) { return 0; }
25#endif /* CONFIG_CRASH_DUMP */ 31#endif /* CONFIG_CRASH_DUMP */
32
33extern unsigned long saved_max_pfn;
26#endif /* LINUX_CRASHDUMP_H */ 34#endif /* LINUX_CRASHDUMP_H */
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index ab94bc083558..f352f06fa063 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -39,6 +39,8 @@ extern void __delayacct_blkio_start(void);
39extern void __delayacct_blkio_end(void); 39extern void __delayacct_blkio_end(void);
40extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *); 40extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
41extern __u64 __delayacct_blkio_ticks(struct task_struct *); 41extern __u64 __delayacct_blkio_ticks(struct task_struct *);
42extern void __delayacct_freepages_start(void);
43extern void __delayacct_freepages_end(void);
42 44
43static inline int delayacct_is_task_waiting_on_io(struct task_struct *p) 45static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
44{ 46{
@@ -107,6 +109,18 @@ static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
107 return 0; 109 return 0;
108} 110}
109 111
112static inline void delayacct_freepages_start(void)
113{
114 if (current->delays)
115 __delayacct_freepages_start();
116}
117
118static inline void delayacct_freepages_end(void)
119{
120 if (current->delays)
121 __delayacct_freepages_end();
122}
123
110#else 124#else
111static inline void delayacct_set_flag(int flag) 125static inline void delayacct_set_flag(int flag)
112{} 126{}
@@ -129,6 +143,11 @@ static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
129{ return 0; } 143{ return 0; }
130static inline int delayacct_is_task_waiting_on_io(struct task_struct *p) 144static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
131{ return 0; } 145{ return 0; }
146static inline void delayacct_freepages_start(void)
147{}
148static inline void delayacct_freepages_end(void)
149{}
150
132#endif /* CONFIG_TASK_DELAY_ACCT */ 151#endif /* CONFIG_TASK_DELAY_ACCT */
133 152
134#endif 153#endif
diff --git a/include/linux/dirent.h b/include/linux/dirent.h
index 5d6023b87800..f072fb8d10a3 100644
--- a/include/linux/dirent.h
+++ b/include/linux/dirent.h
@@ -1,23 +1,6 @@
1#ifndef _LINUX_DIRENT_H 1#ifndef _LINUX_DIRENT_H
2#define _LINUX_DIRENT_H 2#define _LINUX_DIRENT_H
3 3
4struct dirent {
5 long d_ino;
6 __kernel_off_t d_off;
7 unsigned short d_reclen;
8 char d_name[256]; /* We must not include limits.h! */
9};
10
11struct dirent64 {
12 __u64 d_ino;
13 __s64 d_off;
14 unsigned short d_reclen;
15 unsigned char d_type;
16 char d_name[256];
17};
18
19#ifdef __KERNEL__
20
21struct linux_dirent64 { 4struct linux_dirent64 {
22 u64 d_ino; 5 u64 d_ino;
23 s64 d_off; 6 s64 d_off;
@@ -26,7 +9,4 @@ struct linux_dirent64 {
26 char d_name[0]; 9 char d_name[0];
27}; 10};
28 11
29#endif /* __KERNEL__ */
30
31
32#endif 12#endif
diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
index 84cec2aa9f1e..2efe7b863cff 100644
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -284,8 +284,8 @@ struct ext2_inode {
284 284
285#ifdef __hurd__ 285#ifdef __hurd__
286#define i_translator osd1.hurd1.h_i_translator 286#define i_translator osd1.hurd1.h_i_translator
287#define i_frag osd2.hurd2.h_i_frag; 287#define i_frag osd2.hurd2.h_i_frag
288#define i_fsize osd2.hurd2.h_i_fsize; 288#define i_fsize osd2.hurd2.h_i_fsize
289#define i_uid_high osd2.hurd2.h_i_uid_high 289#define i_uid_high osd2.hurd2.h_i_uid_high
290#define i_gid_high osd2.hurd2.h_i_gid_high 290#define i_gid_high osd2.hurd2.h_i_gid_high
291#define i_author osd2.hurd2.h_i_author 291#define i_author osd2.hurd2.h_i_author
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 36c540396377..80171ee89a22 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -832,6 +832,7 @@ extern void ext3_discard_reservation (struct inode *);
832extern void ext3_dirty_inode(struct inode *); 832extern void ext3_dirty_inode(struct inode *);
833extern int ext3_change_inode_journal_flag(struct inode *, int); 833extern int ext3_change_inode_journal_flag(struct inode *, int);
834extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *); 834extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
835extern int ext3_can_truncate(struct inode *inode);
835extern void ext3_truncate (struct inode *); 836extern void ext3_truncate (struct inode *);
836extern void ext3_set_inode_flags(struct inode *); 837extern void ext3_set_inode_flags(struct inode *);
837extern void ext3_get_inode_flags(struct ext3_inode_info *); 838extern void ext3_get_inode_flags(struct ext3_inode_info *);
diff --git a/include/linux/fd1772.h b/include/linux/fd1772.h
deleted file mode 100644
index 871d6e4c677e..000000000000
--- a/include/linux/fd1772.h
+++ /dev/null
@@ -1,80 +0,0 @@
1#ifndef _LINUX_FD1772REG_H
2#define _LINUX_FD1772REG_H
3
4/*
5** WD1772 stuff - originally from the M68K Linux
6 * Modified for Archimedes by Dave Gilbert (gilbertd@cs.man.ac.uk)
7 */
8
9/* register codes */
10
11#define FDC1772SELREG_STP (0x80) /* command/status register */
12#define FDC1772SELREG_TRA (0x82) /* track register */
13#define FDC1772SELREG_SEC (0x84) /* sector register */
14#define FDC1772SELREG_DTA (0x86) /* data register */
15
16/* register names for FDC1772_READ/WRITE macros */
17
18#define FDC1772REG_CMD 0
19#define FDC1772REG_STATUS 0
20#define FDC1772REG_TRACK 2
21#define FDC1772REG_SECTOR 4
22#define FDC1772REG_DATA 6
23
24/* command opcodes */
25
26#define FDC1772CMD_RESTORE (0x00) /* - */
27#define FDC1772CMD_SEEK (0x10) /* | */
28#define FDC1772CMD_STEP (0x20) /* | TYP 1 Commands */
29#define FDC1772CMD_STIN (0x40) /* | */
30#define FDC1772CMD_STOT (0x60) /* - */
31#define FDC1772CMD_RDSEC (0x80) /* - TYP 2 Commands */
32#define FDC1772CMD_WRSEC (0xa0) /* - " */
33#define FDC1772CMD_RDADR (0xc0) /* - */
34#define FDC1772CMD_RDTRA (0xe0) /* | TYP 3 Commands */
35#define FDC1772CMD_WRTRA (0xf0) /* - */
36#define FDC1772CMD_FORCI (0xd0) /* - TYP 4 Command */
37
38/* command modifier bits */
39
40#define FDC1772CMDADD_SR6 (0x00) /* step rate settings */
41#define FDC1772CMDADD_SR12 (0x01)
42#define FDC1772CMDADD_SR2 (0x02)
43#define FDC1772CMDADD_SR3 (0x03)
44#define FDC1772CMDADD_V (0x04) /* verify */
45#define FDC1772CMDADD_H (0x08) /* wait for spin-up */
46#define FDC1772CMDADD_U (0x10) /* update track register */
47#define FDC1772CMDADD_M (0x10) /* multiple sector access */
48#define FDC1772CMDADD_E (0x04) /* head settling flag */
49#define FDC1772CMDADD_P (0x02) /* precompensation */
50#define FDC1772CMDADD_A0 (0x01) /* DAM flag */
51
52/* status register bits */
53
54#define FDC1772STAT_MOTORON (0x80) /* motor on */
55#define FDC1772STAT_WPROT (0x40) /* write protected (FDC1772CMD_WR*) */
56#define FDC1772STAT_SPINUP (0x20) /* motor speed stable (Type I) */
57#define FDC1772STAT_DELDAM (0x20) /* sector has deleted DAM (Type II+III) */
58#define FDC1772STAT_RECNF (0x10) /* record not found */
59#define FDC1772STAT_CRC (0x08) /* CRC error */
60#define FDC1772STAT_TR00 (0x04) /* Track 00 flag (Type I) */
61#define FDC1772STAT_LOST (0x04) /* Lost Data (Type II+III) */
62#define FDC1772STAT_IDX (0x02) /* Index status (Type I) */
63#define FDC1772STAT_DRQ (0x02) /* DRQ status (Type II+III) */
64#define FDC1772STAT_BUSY (0x01) /* FDC1772 is busy */
65
66
67/* PSG Port A Bit Nr 0 .. Side Sel .. 0 -> Side 1 1 -> Side 2 */
68#define DSKSIDE (0x01)
69
70#define DSKDRVNONE (0x06)
71#define DSKDRV0 (0x02)
72#define DSKDRV1 (0x04)
73
74/* step rates */
75#define FDC1772STEP_6 0x00
76#define FDC1772STEP_12 0x01
77#define FDC1772STEP_2 0x02
78#define FDC1772STEP_3 0x03
79
80#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4b86f806014c..49d8eb7a71be 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -886,6 +886,12 @@ static inline int file_check_writeable(struct file *filp)
886#define FL_SLEEP 128 /* A blocking lock */ 886#define FL_SLEEP 128 /* A blocking lock */
887 887
888/* 888/*
889 * Special return value from posix_lock_file() and vfs_lock_file() for
890 * asynchronous locking.
891 */
892#define FILE_LOCK_DEFERRED 1
893
894/*
889 * The POSIX file lock owner is determined by 895 * The POSIX file lock owner is determined by
890 * the "struct files_struct" in the thread group 896 * the "struct files_struct" in the thread group
891 * (or NULL for no owner - BSD locks). 897 * (or NULL for no owner - BSD locks).
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index d48282197696..265635dc9908 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -104,11 +104,14 @@ struct fuse_file_lock {
104 104
105/** 105/**
106 * INIT request/reply flags 106 * INIT request/reply flags
107 *
108 * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".."
107 */ 109 */
108#define FUSE_ASYNC_READ (1 << 0) 110#define FUSE_ASYNC_READ (1 << 0)
109#define FUSE_POSIX_LOCKS (1 << 1) 111#define FUSE_POSIX_LOCKS (1 << 1)
110#define FUSE_FILE_OPS (1 << 2) 112#define FUSE_FILE_OPS (1 << 2)
111#define FUSE_ATOMIC_O_TRUNC (1 << 3) 113#define FUSE_ATOMIC_O_TRUNC (1 << 3)
114#define FUSE_EXPORT_SUPPORT (1 << 4)
112#define FUSE_BIG_WRITES (1 << 5) 115#define FUSE_BIG_WRITES (1 << 5)
113 116
114/** 117/**
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index e8787417f65a..118216f1bd3c 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -541,7 +541,7 @@ extern dev_t blk_lookup_devt(const char *name, int part);
541extern char *disk_name (struct gendisk *hd, int part, char *buf); 541extern char *disk_name (struct gendisk *hd, int part, char *buf);
542 542
543extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev); 543extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
544extern void add_partition(struct gendisk *, int, sector_t, sector_t, int); 544extern int __must_check add_partition(struct gendisk *, int, sector_t, sector_t, int);
545extern void delete_partition(struct gendisk *, int); 545extern void delete_partition(struct gendisk *, int);
546extern void printk_all_partitions(void); 546extern void printk_all_partitions(void);
547 547
diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 98be6c5762b9..730a20b83576 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -79,6 +79,19 @@ static inline void gpio_set_value_cansleep(unsigned gpio, int value)
79 WARN_ON(1); 79 WARN_ON(1);
80} 80}
81 81
82static inline int gpio_export(unsigned gpio, bool direction_may_change)
83{
84 /* GPIO can never have been requested or set as {in,out}put */
85 WARN_ON(1);
86 return -EINVAL;
87}
88
89static inline void gpio_unexport(unsigned gpio)
90{
91 /* GPIO can never have been exported */
92 WARN_ON(1);
93}
94
82static inline int gpio_to_irq(unsigned gpio) 95static inline int gpio_to_irq(unsigned gpio)
83{ 96{
84 /* GPIO can never have been requested or set as input */ 97 /* GPIO can never have been requested or set as input */
diff --git a/include/linux/i2c/max732x.h b/include/linux/i2c/max732x.h
new file mode 100644
index 000000000000..e10336631c62
--- /dev/null
+++ b/include/linux/i2c/max732x.h
@@ -0,0 +1,19 @@
1#ifndef __LINUX_I2C_MAX732X_H
2#define __LINUX_I2C_MAX732X_H
3
4/* platform data for the MAX732x 8/16-bit I/O expander driver */
5
6struct max732x_platform_data {
7 /* number of the first GPIO */
8 unsigned gpio_base;
9
10 void *context; /* param to setup/teardown */
11
12 int (*setup)(struct i2c_client *client,
13 unsigned gpio, unsigned ngpio,
14 void *context);
15 int (*teardown)(struct i2c_client *client,
16 unsigned gpio, unsigned ngpio,
17 void *context);
18};
19#endif /* __LINUX_I2C_MAX732X_H */
diff --git a/include/linux/idr.h b/include/linux/idr.h
index 9a2d762124de..fa035f96f2a3 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -15,6 +15,7 @@
15#include <linux/types.h> 15#include <linux/types.h>
16#include <linux/bitops.h> 16#include <linux/bitops.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/rcupdate.h>
18 19
19#if BITS_PER_LONG == 32 20#if BITS_PER_LONG == 32
20# define IDR_BITS 5 21# define IDR_BITS 5
@@ -51,6 +52,7 @@ struct idr_layer {
51 unsigned long bitmap; /* A zero bit means "space here" */ 52 unsigned long bitmap; /* A zero bit means "space here" */
52 struct idr_layer *ary[1<<IDR_BITS]; 53 struct idr_layer *ary[1<<IDR_BITS];
53 int count; /* When zero, we can release it */ 54 int count; /* When zero, we can release it */
55 struct rcu_head rcu_head;
54}; 56};
55 57
56struct idr { 58struct idr {
@@ -71,6 +73,28 @@ struct idr {
71} 73}
72#define DEFINE_IDR(name) struct idr name = IDR_INIT(name) 74#define DEFINE_IDR(name) struct idr name = IDR_INIT(name)
73 75
76/* Actions to be taken after a call to _idr_sub_alloc */
77#define IDR_NEED_TO_GROW -2
78#define IDR_NOMORE_SPACE -3
79
80#define _idr_rc_to_errno(rc) ((rc) == -1 ? -EAGAIN : -ENOSPC)
81
82/**
83 * idr synchronization (stolen from radix-tree.h)
84 *
85 * idr_find() is able to be called locklessly, using RCU. The caller must
86 * ensure calls to this function are made within rcu_read_lock() regions.
87 * Other readers (lock-free or otherwise) and modifications may be running
88 * concurrently.
89 *
90 * It is still required that the caller manage the synchronization and
91 * lifetimes of the items. So if RCU lock-free lookups are used, typically
92 * this would mean that the items have their own locks, or are amenable to
93 * lock-free access; and that the items are freed by RCU (or only freed after
94 * having been deleted from the idr tree *and* a synchronize_rcu() grace
95 * period).
96 */
97
74/* 98/*
75 * This is what we export. 99 * This is what we export.
76 */ 100 */
diff --git a/include/linux/init.h b/include/linux/init.h
index 21d658cdfa27..42ae95411a93 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -275,13 +275,7 @@ void __init parse_early_param(void);
275 275
276#define security_initcall(fn) module_init(fn) 276#define security_initcall(fn) module_init(fn)
277 277
278/* These macros create a dummy inline: gcc 2.9x does not count alias 278/* Each module must use one module_init(). */
279 as usage, hence the `unused function' warning when __init functions
280 are declared static. We use the dummy __*_module_inline functions
281 both to kill the warning and check the type of the init/cleanup
282 function. */
283
284/* Each module must use one module_init(), or one no_module_init */
285#define module_init(initfn) \ 279#define module_init(initfn) \
286 static inline initcall_t __inittest(void) \ 280 static inline initcall_t __inittest(void) \
287 { return initfn; } \ 281 { return initfn; } \
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 93c45acf249a..021d8e720c79 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -122,7 +122,7 @@ extern struct group_info init_groups;
122 .state = 0, \ 122 .state = 0, \
123 .stack = &init_thread_info, \ 123 .stack = &init_thread_info, \
124 .usage = ATOMIC_INIT(2), \ 124 .usage = ATOMIC_INIT(2), \
125 .flags = 0, \ 125 .flags = PF_KTHREAD, \
126 .lock_depth = -1, \ 126 .lock_depth = -1, \
127 .prio = MAX_PRIO-20, \ 127 .prio = MAX_PRIO-20, \
128 .static_prio = MAX_PRIO-20, \ 128 .static_prio = MAX_PRIO-20, \
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index ea6c18a8b0d4..ea330f9e7100 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -36,6 +36,7 @@ struct ipc_namespace {
36 int msg_ctlmni; 36 int msg_ctlmni;
37 atomic_t msg_bytes; 37 atomic_t msg_bytes;
38 atomic_t msg_hdrs; 38 atomic_t msg_hdrs;
39 int auto_msgmni;
39 40
40 size_t shm_ctlmax; 41 size_t shm_ctlmax;
41 size_t shm_ctlall; 42 size_t shm_ctlall;
@@ -53,7 +54,7 @@ extern atomic_t nr_ipc_ns;
53 54
54extern int register_ipcns_notifier(struct ipc_namespace *); 55extern int register_ipcns_notifier(struct ipc_namespace *);
55extern int cond_register_ipcns_notifier(struct ipc_namespace *); 56extern int cond_register_ipcns_notifier(struct ipc_namespace *);
56extern int unregister_ipcns_notifier(struct ipc_namespace *); 57extern void unregister_ipcns_notifier(struct ipc_namespace *);
57extern int ipcns_notify(unsigned long); 58extern int ipcns_notify(unsigned long);
58 59
59#else /* CONFIG_SYSVIPC */ 60#else /* CONFIG_SYSVIPC */
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 2b1c2e58566e..74bde13224c9 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -11,6 +11,8 @@
11#ifndef _LINUX_TRACE_IRQFLAGS_H 11#ifndef _LINUX_TRACE_IRQFLAGS_H
12#define _LINUX_TRACE_IRQFLAGS_H 12#define _LINUX_TRACE_IRQFLAGS_H
13 13
14#include <linux/typecheck.h>
15
14#ifdef CONFIG_TRACE_IRQFLAGS 16#ifdef CONFIG_TRACE_IRQFLAGS
15 extern void trace_softirqs_on(unsigned long ip); 17 extern void trace_softirqs_on(unsigned long ip);
16 extern void trace_softirqs_off(unsigned long ip); 18 extern void trace_softirqs_off(unsigned long ip);
@@ -58,18 +60,24 @@
58 do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0) 60 do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0)
59#define local_irq_disable() \ 61#define local_irq_disable() \
60 do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0) 62 do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0)
61#define local_irq_save(flags) \ 63#define local_irq_save(flags) \
62 do { raw_local_irq_save(flags); trace_hardirqs_off(); } while (0) 64 do { \
65 typecheck(unsigned long, flags); \
66 raw_local_irq_save(flags); \
67 trace_hardirqs_off(); \
68 } while (0)
63 69
64#define local_irq_restore(flags) \ 70
65 do { \ 71#define local_irq_restore(flags) \
66 if (raw_irqs_disabled_flags(flags)) { \ 72 do { \
67 raw_local_irq_restore(flags); \ 73 typecheck(unsigned long, flags); \
68 trace_hardirqs_off(); \ 74 if (raw_irqs_disabled_flags(flags)) { \
69 } else { \ 75 raw_local_irq_restore(flags); \
70 trace_hardirqs_on(); \ 76 trace_hardirqs_off(); \
71 raw_local_irq_restore(flags); \ 77 } else { \
72 } \ 78 trace_hardirqs_on(); \
79 raw_local_irq_restore(flags); \
80 } \
73 } while (0) 81 } while (0)
74#else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */ 82#else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */
75/* 83/*
@@ -78,8 +86,16 @@
78 */ 86 */
79# define raw_local_irq_disable() local_irq_disable() 87# define raw_local_irq_disable() local_irq_disable()
80# define raw_local_irq_enable() local_irq_enable() 88# define raw_local_irq_enable() local_irq_enable()
81# define raw_local_irq_save(flags) local_irq_save(flags) 89# define raw_local_irq_save(flags) \
82# define raw_local_irq_restore(flags) local_irq_restore(flags) 90 do { \
91 typecheck(unsigned long, flags); \
92 local_irq_save(flags); \
93 } while (0)
94# define raw_local_irq_restore(flags) \
95 do { \
96 typecheck(unsigned long, flags); \
97 local_irq_restore(flags); \
98 } while (0)
83#endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */ 99#endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
84 100
85#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT 101#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
@@ -89,7 +105,11 @@
89 raw_safe_halt(); \ 105 raw_safe_halt(); \
90 } while (0) 106 } while (0)
91 107
92#define local_save_flags(flags) raw_local_save_flags(flags) 108#define local_save_flags(flags) \
109 do { \
110 typecheck(unsigned long, flags); \
111 raw_local_save_flags(flags); \
112 } while (0)
93 113
94#define irqs_disabled() \ 114#define irqs_disabled() \
95({ \ 115({ \
@@ -99,7 +119,11 @@
99 raw_irqs_disabled_flags(_flags); \ 119 raw_irqs_disabled_flags(_flags); \
100}) 120})
101 121
102#define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags) 122#define irqs_disabled_flags(flags) \
123({ \
124 typecheck(unsigned long, flags); \
125 raw_irqs_disabled_flags(flags); \
126})
103#endif /* CONFIG_X86 */ 127#endif /* CONFIG_X86 */
104 128
105#endif 129#endif
diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index 00c1801099fa..57aefa160a92 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -6,6 +6,7 @@
6#define _LINUX_KALLSYMS_H 6#define _LINUX_KALLSYMS_H
7 7
8#include <linux/errno.h> 8#include <linux/errno.h>
9#include <linux/kernel.h>
9#include <linux/stddef.h> 10#include <linux/stddef.h>
10 11
11#define KSYM_NAME_LEN 128 12#define KSYM_NAME_LEN 128
@@ -105,18 +106,10 @@ static inline void print_fn_descriptor_symbol(const char *fmt, void *addr)
105 print_symbol(fmt, (unsigned long)addr); 106 print_symbol(fmt, (unsigned long)addr);
106} 107}
107 108
108#ifndef CONFIG_64BIT 109static inline void print_ip_sym(unsigned long ip)
109#define print_ip_sym(ip) \ 110{
110do { \ 111 printk("[<%p>]", (void *) ip);
111 printk("[<%08lx>]", ip); \ 112 print_symbol(" %s\n", ip);
112 print_symbol(" %s\n", ip); \ 113}
113} while(0)
114#else
115#define print_ip_sym(ip) \
116do { \
117 printk("[<%016lx>]", ip); \
118 print_symbol(" %s\n", ip); \
119} while(0)
120#endif
121 114
122#endif /*_LINUX_KALLSYMS_H*/ 115#endif /*_LINUX_KALLSYMS_H*/
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index f9cd7a513f9c..fdbbf72ca2eb 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -14,6 +14,8 @@
14#include <linux/compiler.h> 14#include <linux/compiler.h>
15#include <linux/bitops.h> 15#include <linux/bitops.h>
16#include <linux/log2.h> 16#include <linux/log2.h>
17#include <linux/typecheck.h>
18#include <linux/ratelimit.h>
17#include <asm/byteorder.h> 19#include <asm/byteorder.h>
18#include <asm/bug.h> 20#include <asm/bug.h>
19 21
@@ -188,11 +190,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
188asmlinkage int printk(const char * fmt, ...) 190asmlinkage int printk(const char * fmt, ...)
189 __attribute__ ((format (printf, 1, 2))) __cold; 191 __attribute__ ((format (printf, 1, 2))) __cold;
190 192
191extern int printk_ratelimit_jiffies; 193extern struct ratelimit_state printk_ratelimit_state;
192extern int printk_ratelimit_burst;
193extern int printk_ratelimit(void); 194extern int printk_ratelimit(void);
194extern int __ratelimit(int ratelimit_jiffies, int ratelimit_burst);
195extern int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst);
196extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, 195extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
197 unsigned int interval_msec); 196 unsigned int interval_msec);
198#else 197#else
@@ -203,8 +202,6 @@ static inline int printk(const char *s, ...)
203 __attribute__ ((format (printf, 1, 2))); 202 __attribute__ ((format (printf, 1, 2)));
204static inline int __cold printk(const char *s, ...) { return 0; } 203static inline int __cold printk(const char *s, ...) { return 0; }
205static inline int printk_ratelimit(void) { return 0; } 204static inline int printk_ratelimit(void) { return 0; }
206static inline int __printk_ratelimit(int ratelimit_jiffies, \
207 int ratelimit_burst) { return 0; }
208static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \ 205static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \
209 unsigned int interval_msec) \ 206 unsigned int interval_msec) \
210 { return false; } 207 { return false; }
@@ -441,26 +438,6 @@ static inline char *pack_hex_byte(char *buf, u8 byte)
441 const typeof( ((type *)0)->member ) *__mptr = (ptr); \ 438 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
442 (type *)( (char *)__mptr - offsetof(type,member) );}) 439 (type *)( (char *)__mptr - offsetof(type,member) );})
443 440
444/*
445 * Check at compile time that something is of a particular type.
446 * Always evaluates to 1 so you may use it easily in comparisons.
447 */
448#define typecheck(type,x) \
449({ type __dummy; \
450 typeof(x) __dummy2; \
451 (void)(&__dummy == &__dummy2); \
452 1; \
453})
454
455/*
456 * Check at compile time that 'function' is a certain type, or is a pointer
457 * to that type (needs to use typedef for the function type.)
458 */
459#define typecheck_fn(type,function) \
460({ typeof(type) __tmp = function; \
461 (void)__tmp; \
462})
463
464struct sysinfo; 441struct sysinfo;
465extern int do_sysinfo(struct sysinfo *info); 442extern int do_sysinfo(struct sysinfo *info);
466 443
diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index 0509c4ce4857..a1a91577813c 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -19,6 +19,7 @@
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 */ 20 */
21 21
22#include <linux/gfp.h>
22#include <linux/stddef.h> 23#include <linux/stddef.h>
23#include <linux/errno.h> 24#include <linux/errno.h>
24#include <linux/compiler.h> 25#include <linux/compiler.h>
@@ -41,8 +42,8 @@ struct file;
41struct subprocess_info; 42struct subprocess_info;
42 43
43/* Allocate a subprocess_info structure */ 44/* Allocate a subprocess_info structure */
44struct subprocess_info *call_usermodehelper_setup(char *path, 45struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
45 char **argv, char **envp); 46 char **envp, gfp_t gfp_mask);
46 47
47/* Set various pieces of state into the subprocess_info structure */ 48/* Set various pieces of state into the subprocess_info structure */
48void call_usermodehelper_setkeys(struct subprocess_info *info, 49void call_usermodehelper_setkeys(struct subprocess_info *info,
@@ -69,8 +70,9 @@ static inline int
69call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait) 70call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait)
70{ 71{
71 struct subprocess_info *info; 72 struct subprocess_info *info;
73 gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
72 74
73 info = call_usermodehelper_setup(path, argv, envp); 75 info = call_usermodehelper_setup(path, argv, envp, gfp_mask);
74 if (info == NULL) 76 if (info == NULL)
75 return -ENOMEM; 77 return -ENOMEM;
76 return call_usermodehelper_exec(info, wait); 78 return call_usermodehelper_exec(info, wait);
@@ -81,8 +83,9 @@ call_usermodehelper_keys(char *path, char **argv, char **envp,
81 struct key *session_keyring, enum umh_wait wait) 83 struct key *session_keyring, enum umh_wait wait)
82{ 84{
83 struct subprocess_info *info; 85 struct subprocess_info *info;
86 gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
84 87
85 info = call_usermodehelper_setup(path, argv, envp); 88 info = call_usermodehelper_setup(path, argv, envp, gfp_mask);
86 if (info == NULL) 89 if (info == NULL)
87 return -ENOMEM; 90 return -ENOMEM;
88 91
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 04a3556bdea6..0be7795655fa 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -157,11 +157,10 @@ struct kretprobe {
157 int nmissed; 157 int nmissed;
158 size_t data_size; 158 size_t data_size;
159 struct hlist_head free_instances; 159 struct hlist_head free_instances;
160 struct hlist_head used_instances; 160 spinlock_t lock;
161}; 161};
162 162
163struct kretprobe_instance { 163struct kretprobe_instance {
164 struct hlist_node uflist; /* either on free list or used list */
165 struct hlist_node hlist; 164 struct hlist_node hlist;
166 struct kretprobe *rp; 165 struct kretprobe *rp;
167 kprobe_opcode_t *ret_addr; 166 kprobe_opcode_t *ret_addr;
@@ -201,7 +200,6 @@ static inline int init_test_probes(void)
201} 200}
202#endif /* CONFIG_KPROBES_SANITY_TEST */ 201#endif /* CONFIG_KPROBES_SANITY_TEST */
203 202
204extern spinlock_t kretprobe_lock;
205extern struct mutex kprobe_mutex; 203extern struct mutex kprobe_mutex;
206extern int arch_prepare_kprobe(struct kprobe *p); 204extern int arch_prepare_kprobe(struct kprobe *p);
207extern void arch_arm_kprobe(struct kprobe *p); 205extern void arch_arm_kprobe(struct kprobe *p);
@@ -214,6 +212,9 @@ extern void kprobes_inc_nmissed_count(struct kprobe *p);
214 212
215/* Get the kprobe at this addr (if any) - called with preemption disabled */ 213/* Get the kprobe at this addr (if any) - called with preemption disabled */
216struct kprobe *get_kprobe(void *addr); 214struct kprobe *get_kprobe(void *addr);
215void kretprobe_hash_lock(struct task_struct *tsk,
216 struct hlist_head **head, unsigned long *flags);
217void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags);
217struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk); 218struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk);
218 219
219/* kprobe_running() will just return the current_kprobe on this CPU */ 220/* kprobe_running() will just return the current_kprobe on this CPU */
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 00dd957e245b..aabc8a13ba71 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -6,7 +6,8 @@
6 6
7struct task_struct *kthread_create(int (*threadfn)(void *data), 7struct task_struct *kthread_create(int (*threadfn)(void *data),
8 void *data, 8 void *data,
9 const char namefmt[], ...); 9 const char namefmt[], ...)
10 __attribute__((format(printf, 3, 4)));
10 11
11/** 12/**
12 * kthread_run - create and wake a thread. 13 * kthread_run - create and wake a thread.
diff --git a/include/linux/list.h b/include/linux/list.h
index 139ec41d9c2e..453916bc0412 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -61,14 +61,10 @@ extern void __list_add(struct list_head *new,
61 * Insert a new entry after the specified head. 61 * Insert a new entry after the specified head.
62 * This is good for implementing stacks. 62 * This is good for implementing stacks.
63 */ 63 */
64#ifndef CONFIG_DEBUG_LIST
65static inline void list_add(struct list_head *new, struct list_head *head) 64static inline void list_add(struct list_head *new, struct list_head *head)
66{ 65{
67 __list_add(new, head, head->next); 66 __list_add(new, head, head->next);
68} 67}
69#else
70extern void list_add(struct list_head *new, struct list_head *head);
71#endif
72 68
73 69
74/** 70/**
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e6608776bc96..fdf3967e1397 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -35,7 +35,10 @@ extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
35extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, 35extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
36 gfp_t gfp_mask); 36 gfp_t gfp_mask);
37extern void mem_cgroup_uncharge_page(struct page *page); 37extern void mem_cgroup_uncharge_page(struct page *page);
38extern void mem_cgroup_uncharge_cache_page(struct page *page);
38extern void mem_cgroup_move_lists(struct page *page, bool active); 39extern void mem_cgroup_move_lists(struct page *page, bool active);
40extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask);
41
39extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, 42extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
40 struct list_head *dst, 43 struct list_head *dst,
41 unsigned long *scanned, int order, 44 unsigned long *scanned, int order,
@@ -50,9 +53,9 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
50#define mm_match_cgroup(mm, cgroup) \ 53#define mm_match_cgroup(mm, cgroup) \
51 ((cgroup) == mem_cgroup_from_task((mm)->owner)) 54 ((cgroup) == mem_cgroup_from_task((mm)->owner))
52 55
53extern int mem_cgroup_prepare_migration(struct page *page); 56extern int
57mem_cgroup_prepare_migration(struct page *page, struct page *newpage);
54extern void mem_cgroup_end_migration(struct page *page); 58extern void mem_cgroup_end_migration(struct page *page);
55extern void mem_cgroup_page_migration(struct page *page, struct page *newpage);
56 59
57/* 60/*
58 * For memory reclaim. 61 * For memory reclaim.
@@ -97,6 +100,15 @@ static inline void mem_cgroup_uncharge_page(struct page *page)
97{ 100{
98} 101}
99 102
103static inline void mem_cgroup_uncharge_cache_page(struct page *page)
104{
105}
106
107static inline int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
108{
109 return 0;
110}
111
100static inline void mem_cgroup_move_lists(struct page *page, bool active) 112static inline void mem_cgroup_move_lists(struct page *page, bool active)
101{ 113{
102} 114}
@@ -112,7 +124,8 @@ static inline int task_in_mem_cgroup(struct task_struct *task,
112 return 1; 124 return 1;
113} 125}
114 126
115static inline int mem_cgroup_prepare_migration(struct page *page) 127static inline int
128mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
116{ 129{
117 return 0; 130 return 0;
118} 131}
@@ -121,11 +134,6 @@ static inline void mem_cgroup_end_migration(struct page *page)
121{ 134{
122} 135}
123 136
124static inline void
125mem_cgroup_page_migration(struct page *page, struct page *newpage)
126{
127}
128
129static inline int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem) 137static inline int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem)
130{ 138{
131 return 0; 139 return 0;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 02a27ae78539..746f975b58ef 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -159,6 +159,17 @@ struct vm_area_struct {
159#endif 159#endif
160}; 160};
161 161
162struct core_thread {
163 struct task_struct *task;
164 struct core_thread *next;
165};
166
167struct core_state {
168 atomic_t nr_threads;
169 struct core_thread dumper;
170 struct completion startup;
171};
172
162struct mm_struct { 173struct mm_struct {
163 struct vm_area_struct * mmap; /* list of VMAs */ 174 struct vm_area_struct * mmap; /* list of VMAs */
164 struct rb_root mm_rb; 175 struct rb_root mm_rb;
@@ -175,7 +186,6 @@ struct mm_struct {
175 atomic_t mm_users; /* How many users with user space? */ 186 atomic_t mm_users; /* How many users with user space? */
176 atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */ 187 atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */
177 int map_count; /* number of VMAs */ 188 int map_count; /* number of VMAs */
178 int core_waiters;
179 struct rw_semaphore mmap_sem; 189 struct rw_semaphore mmap_sem;
180 spinlock_t page_table_lock; /* Protects page tables and some counters */ 190 spinlock_t page_table_lock; /* Protects page tables and some counters */
181 191
@@ -219,8 +229,7 @@ struct mm_struct {
219 229
220 unsigned long flags; /* Must use atomic bitops to access the bits */ 230 unsigned long flags; /* Must use atomic bitops to access the bits */
221 231
222 /* coredumping support */ 232 struct core_state *core_state; /* coredumping support */
223 struct completion *core_startup_done, core_done;
224 233
225 /* aio bits */ 234 /* aio bits */
226 rwlock_t ioctx_list_lock; /* aio lock */ 235 rwlock_t ioctx_list_lock; /* aio lock */
diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h
index 81cd36b735b0..ba63858056c7 100644
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -2,11 +2,11 @@
2#define _LINUX_MSDOS_FS_H 2#define _LINUX_MSDOS_FS_H
3 3
4#include <linux/magic.h> 4#include <linux/magic.h>
5#include <asm/byteorder.h>
5 6
6/* 7/*
7 * The MS-DOS filesystem constants/structures 8 * The MS-DOS filesystem constants/structures
8 */ 9 */
9#include <asm/byteorder.h>
10 10
11#define SECTOR_SIZE 512 /* sector size (bytes) */ 11#define SECTOR_SIZE 512 /* sector size (bytes) */
12#define SECTOR_BITS 9 /* log2(SECTOR_SIZE) */ 12#define SECTOR_BITS 9 /* log2(SECTOR_SIZE) */
@@ -89,24 +89,22 @@
89#define IS_FSINFO(x) (le32_to_cpu((x)->signature1) == FAT_FSINFO_SIG1 \ 89#define IS_FSINFO(x) (le32_to_cpu((x)->signature1) == FAT_FSINFO_SIG1 \
90 && le32_to_cpu((x)->signature2) == FAT_FSINFO_SIG2) 90 && le32_to_cpu((x)->signature2) == FAT_FSINFO_SIG2)
91 91
92struct __fat_dirent {
93 long d_ino;
94 __kernel_off_t d_off;
95 unsigned short d_reclen;
96 char d_name[256]; /* We must not include limits.h! */
97};
98
92/* 99/*
93 * ioctl commands 100 * ioctl commands
94 */ 101 */
95#define VFAT_IOCTL_READDIR_BOTH _IOR('r', 1, struct dirent [2]) 102#define VFAT_IOCTL_READDIR_BOTH _IOR('r', 1, struct __fat_dirent[2])
96#define VFAT_IOCTL_READDIR_SHORT _IOR('r', 2, struct dirent [2]) 103#define VFAT_IOCTL_READDIR_SHORT _IOR('r', 2, struct __fat_dirent[2])
97/* <linux/videotext.h> has used 0x72 ('r') in collision, so skip a few */ 104/* <linux/videotext.h> has used 0x72 ('r') in collision, so skip a few */
98#define FAT_IOCTL_GET_ATTRIBUTES _IOR('r', 0x10, __u32) 105#define FAT_IOCTL_GET_ATTRIBUTES _IOR('r', 0x10, __u32)
99#define FAT_IOCTL_SET_ATTRIBUTES _IOW('r', 0x11, __u32) 106#define FAT_IOCTL_SET_ATTRIBUTES _IOW('r', 0x11, __u32)
100 107
101/*
102 * vfat shortname flags
103 */
104#define VFAT_SFN_DISPLAY_LOWER 0x0001 /* convert to lowercase for display */
105#define VFAT_SFN_DISPLAY_WIN95 0x0002 /* emulate win95 rule for display */
106#define VFAT_SFN_DISPLAY_WINNT 0x0004 /* emulate winnt rule for display */
107#define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */
108#define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */
109
110struct fat_boot_sector { 108struct fat_boot_sector {
111 __u8 ignored[3]; /* Boot strap short or near jump */ 109 __u8 ignored[3]; /* Boot strap short or near jump */
112 __u8 system_id[8]; /* Name - can be used to special case 110 __u8 system_id[8]; /* Name - can be used to special case
@@ -168,14 +166,6 @@ struct msdos_dir_slot {
168 __u8 name11_12[4]; /* last 2 characters in name */ 166 __u8 name11_12[4]; /* last 2 characters in name */
169}; 167};
170 168
171struct fat_slot_info {
172 loff_t i_pos; /* on-disk position of directory entry */
173 loff_t slot_off; /* offset for slot or de start */
174 int nr_slots; /* number of slots + 1(de) in filename */
175 struct msdos_dir_entry *de;
176 struct buffer_head *bh;
177};
178
179#ifdef __KERNEL__ 169#ifdef __KERNEL__
180 170
181#include <linux/buffer_head.h> 171#include <linux/buffer_head.h>
@@ -184,6 +174,15 @@ struct fat_slot_info {
184#include <linux/fs.h> 174#include <linux/fs.h>
185#include <linux/mutex.h> 175#include <linux/mutex.h>
186 176
177/*
178 * vfat shortname flags
179 */
180#define VFAT_SFN_DISPLAY_LOWER 0x0001 /* convert to lowercase for display */
181#define VFAT_SFN_DISPLAY_WIN95 0x0002 /* emulate win95 rule for display */
182#define VFAT_SFN_DISPLAY_WINNT 0x0004 /* emulate winnt rule for display */
183#define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */
184#define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */
185
187struct fat_mount_options { 186struct fat_mount_options {
188 uid_t fs_uid; 187 uid_t fs_uid;
189 gid_t fs_gid; 188 gid_t fs_gid;
@@ -202,10 +201,10 @@ struct fat_mount_options {
202 utf8:1, /* Use of UTF-8 character set (Default) */ 201 utf8:1, /* Use of UTF-8 character set (Default) */
203 unicode_xlate:1, /* create escape sequences for unhandled Unicode */ 202 unicode_xlate:1, /* create escape sequences for unhandled Unicode */
204 numtail:1, /* Does first alias have a numeric '~1' type tail? */ 203 numtail:1, /* Does first alias have a numeric '~1' type tail? */
205 atari:1, /* Use Atari GEMDOS variation of MS-DOS fs */
206 flush:1, /* write things quickly */ 204 flush:1, /* write things quickly */
207 nocase:1, /* Does this need case conversion? 0=need case conversion*/ 205 nocase:1, /* Does this need case conversion? 0=need case conversion*/
208 usefree:1; /* Use free_clusters for FAT32 */ 206 usefree:1, /* Use free_clusters for FAT32 */
207 tz_utc:1; /* Filesystem timestamps are in UTC */
209}; 208};
210 209
211#define FAT_HASH_BITS 8 210#define FAT_HASH_BITS 8
@@ -267,6 +266,14 @@ struct msdos_inode_info {
267 struct inode vfs_inode; 266 struct inode vfs_inode;
268}; 267};
269 268
269struct fat_slot_info {
270 loff_t i_pos; /* on-disk position of directory entry */
271 loff_t slot_off; /* offset for slot or de start */
272 int nr_slots; /* number of slots + 1(de) in filename */
273 struct msdos_dir_entry *de;
274 struct buffer_head *bh;
275};
276
270static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb) 277static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb)
271{ 278{
272 return sb->s_fs_info; 279 return sb->s_fs_info;
@@ -428,8 +435,9 @@ extern int fat_flush_inodes(struct super_block *sb, struct inode *i1,
428extern void fat_fs_panic(struct super_block *s, const char *fmt, ...); 435extern void fat_fs_panic(struct super_block *s, const char *fmt, ...);
429extern void fat_clusters_flush(struct super_block *sb); 436extern void fat_clusters_flush(struct super_block *sb);
430extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); 437extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster);
431extern int date_dos2unix(unsigned short time, unsigned short date); 438extern int date_dos2unix(unsigned short time, unsigned short date, int tz_utc);
432extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date); 439extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date,
440 int tz_utc);
433extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs); 441extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs);
434 442
435int fat_cache_init(void); 443int fat_cache_init(void);
diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h
index f71201d0f3e7..6316fafe5c2a 100644
--- a/include/linux/mtd/ubi.h
+++ b/include/linux/mtd/ubi.h
@@ -45,13 +45,13 @@ enum {
45 * @size: how many physical eraseblocks are reserved for this volume 45 * @size: how many physical eraseblocks are reserved for this volume
46 * @used_bytes: how many bytes of data this volume contains 46 * @used_bytes: how many bytes of data this volume contains
47 * @used_ebs: how many physical eraseblocks of this volume actually contain any 47 * @used_ebs: how many physical eraseblocks of this volume actually contain any
48 * data 48 * data
49 * @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME) 49 * @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME)
50 * @corrupted: non-zero if the volume is corrupted (static volumes only) 50 * @corrupted: non-zero if the volume is corrupted (static volumes only)
51 * @upd_marker: non-zero if the volume has update marker set 51 * @upd_marker: non-zero if the volume has update marker set
52 * @alignment: volume alignment 52 * @alignment: volume alignment
53 * @usable_leb_size: how many bytes are available in logical eraseblocks of 53 * @usable_leb_size: how many bytes are available in logical eraseblocks of
54 * this volume 54 * this volume
55 * @name_len: volume name length 55 * @name_len: volume name length
56 * @name: volume name 56 * @name: volume name
57 * @cdev: UBI volume character device major and minor numbers 57 * @cdev: UBI volume character device major and minor numbers
@@ -152,6 +152,7 @@ int ubi_leb_erase(struct ubi_volume_desc *desc, int lnum);
152int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum); 152int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum);
153int ubi_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype); 153int ubi_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype);
154int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum); 154int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum);
155int ubi_sync(int ubi_num);
155 156
156/* 157/*
157 * This function is the same as the 'ubi_leb_read()' function, but it does not 158 * This function is the same as the 'ubi_leb_read()' function, but it does not
diff --git a/include/linux/net.h b/include/linux/net.h
index 2f999fbb188d..4a9a30f2d68f 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -351,8 +351,7 @@ static const struct proto_ops name##_ops = { \
351 351
352#ifdef CONFIG_SYSCTL 352#ifdef CONFIG_SYSCTL
353#include <linux/sysctl.h> 353#include <linux/sysctl.h>
354extern int net_msg_cost; 354extern struct ratelimit_state net_ratelimit_state;
355extern int net_msg_burst;
356#endif 355#endif
357 356
358#endif /* __KERNEL__ */ 357#endif /* __KERNEL__ */
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index a2861d95ecc3..108f47e5fd95 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -12,7 +12,6 @@
12 12
13#include <linux/types.h> 13#include <linux/types.h>
14#include <linux/unistd.h> 14#include <linux/unistd.h>
15#include <linux/dirent.h>
16#include <linux/fs.h> 15#include <linux/fs.h>
17#include <linux/posix_acl.h> 16#include <linux/posix_acl.h>
18#include <linux/mount.h> 17#include <linux/mount.h>
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index bd3d72ddf333..da2698b0fdd1 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -214,6 +214,8 @@ static inline int notifier_to_errno(int ret)
214#define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */ 214#define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */
215#define CPU_DYING 0x0008 /* CPU (unsigned)v not running any task, 215#define CPU_DYING 0x0008 /* CPU (unsigned)v not running any task,
216 * not handling interrupts, soon dead */ 216 * not handling interrupts, soon dead */
217#define CPU_POST_DEAD 0x0009 /* CPU (unsigned)v dead, cpu_hotplug
218 * lock is dropped */
217 219
218/* Used for CPU hotplug events occuring while tasks are frozen due to a suspend 220/* Used for CPU hotplug events occuring while tasks are frozen due to a suspend
219 * operation in progress 221 * operation in progress
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 0e66b57631fc..c8a768e59640 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -82,9 +82,12 @@ static inline void get_nsproxy(struct nsproxy *ns)
82} 82}
83 83
84#ifdef CONFIG_CGROUP_NS 84#ifdef CONFIG_CGROUP_NS
85int ns_cgroup_clone(struct task_struct *tsk); 85int ns_cgroup_clone(struct task_struct *tsk, struct pid *pid);
86#else 86#else
87static inline int ns_cgroup_clone(struct task_struct *tsk) { return 0; } 87static inline int ns_cgroup_clone(struct task_struct *tsk, struct pid *pid)
88{
89 return 0;
90}
88#endif 91#endif
89 92
90#endif 93#endif
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 119ae7b8f028..c3b1761aba26 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2400,6 +2400,9 @@
2400#define PCI_DEVICE_ID_INTEL_ICH10_4 0x3a30 2400#define PCI_DEVICE_ID_INTEL_ICH10_4 0x3a30
2401#define PCI_DEVICE_ID_INTEL_ICH10_5 0x3a60 2401#define PCI_DEVICE_ID_INTEL_ICH10_5 0x3a60
2402#define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f 2402#define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f
2403#define PCI_DEVICE_ID_INTEL_5100_16 0x65f0
2404#define PCI_DEVICE_ID_INTEL_5100_21 0x65f5
2405#define PCI_DEVICE_ID_INTEL_5100_22 0x65f6
2403#define PCI_DEVICE_ID_INTEL_5400_ERR 0x4030 2406#define PCI_DEVICE_ID_INTEL_5400_ERR 0x4030
2404#define PCI_DEVICE_ID_INTEL_5400_FBD0 0x4035 2407#define PCI_DEVICE_ID_INTEL_5400_FBD0 0x4035
2405#define PCI_DEVICE_ID_INTEL_5400_FBD1 0x4036 2408#define PCI_DEVICE_ID_INTEL_5400_FBD1 0x4036
diff --git a/include/linux/pid.h b/include/linux/pid.h
index c21c7e8124a7..22921ac4cfd9 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -48,7 +48,7 @@ enum pid_type
48 */ 48 */
49 49
50struct upid { 50struct upid {
51 /* Try to keep pid_chain in the same cacheline as nr for find_pid */ 51 /* Try to keep pid_chain in the same cacheline as nr for find_vpid */
52 int nr; 52 int nr;
53 struct pid_namespace *ns; 53 struct pid_namespace *ns;
54 struct hlist_node pid_chain; 54 struct hlist_node pid_chain;
@@ -57,10 +57,10 @@ struct upid {
57struct pid 57struct pid
58{ 58{
59 atomic_t count; 59 atomic_t count;
60 unsigned int level;
60 /* lists of tasks that use this pid */ 61 /* lists of tasks that use this pid */
61 struct hlist_head tasks[PIDTYPE_MAX]; 62 struct hlist_head tasks[PIDTYPE_MAX];
62 struct rcu_head rcu; 63 struct rcu_head rcu;
63 unsigned int level;
64 struct upid numbers[1]; 64 struct upid numbers[1];
65}; 65};
66 66
@@ -105,14 +105,12 @@ extern struct pid_namespace init_pid_ns;
105 * or rcu_read_lock() held. 105 * or rcu_read_lock() held.
106 * 106 *
107 * find_pid_ns() finds the pid in the namespace specified 107 * find_pid_ns() finds the pid in the namespace specified
108 * find_pid() find the pid by its global id, i.e. in the init namespace
109 * find_vpid() finr the pid by its virtual id, i.e. in the current namespace 108 * find_vpid() finr the pid by its virtual id, i.e. in the current namespace
110 * 109 *
111 * see also find_task_by_pid() set in include/linux/sched.h 110 * see also find_task_by_vpid() set in include/linux/sched.h
112 */ 111 */
113extern struct pid *find_pid_ns(int nr, struct pid_namespace *ns); 112extern struct pid *find_pid_ns(int nr, struct pid_namespace *ns);
114extern struct pid *find_vpid(int nr); 113extern struct pid *find_vpid(int nr);
115extern struct pid *find_pid(int nr);
116 114
117/* 115/*
118 * Lookup a PID in the hash table, and return with it's count elevated. 116 * Lookup a PID in the hash table, and return with it's count elevated.
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index caff5283d15c..1af82c4e17d4 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -14,6 +14,8 @@ struct pidmap {
14 14
15#define PIDMAP_ENTRIES ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8) 15#define PIDMAP_ENTRIES ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
16 16
17struct bsd_acct_struct;
18
17struct pid_namespace { 19struct pid_namespace {
18 struct kref kref; 20 struct kref kref;
19 struct pidmap pidmap[PIDMAP_ENTRIES]; 21 struct pidmap pidmap[PIDMAP_ENTRIES];
@@ -25,6 +27,9 @@ struct pid_namespace {
25#ifdef CONFIG_PROC_FS 27#ifdef CONFIG_PROC_FS
26 struct vfsmount *proc_mnt; 28 struct vfsmount *proc_mnt;
27#endif 29#endif
30#ifdef CONFIG_BSD_PROCESS_ACCT
31 struct bsd_acct_struct *bacct;
32#endif
28}; 33};
29 34
30extern struct pid_namespace init_pid_ns; 35extern struct pid_namespace init_pid_ns;
@@ -85,4 +90,7 @@ static inline struct task_struct *task_child_reaper(struct task_struct *tsk)
85 return tsk->nsproxy->pid_ns->child_reaper; 90 return tsk->nsproxy->pid_ns->child_reaper;
86} 91}
87 92
93void pidhash_init(void);
94void pidmap_init(void);
95
88#endif /* _LINUX_PID_NS_H */ 96#endif /* _LINUX_PID_NS_H */
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 15a9eaf4a802..f560d1705afe 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -79,6 +79,7 @@ struct proc_dir_entry {
79 int pde_users; /* number of callers into module in progress */ 79 int pde_users; /* number of callers into module in progress */
80 spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */ 80 spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
81 struct completion *pde_unload_completion; 81 struct completion *pde_unload_completion;
82 struct list_head pde_openers; /* who did ->open, but not ->release */
82}; 83};
83 84
84struct kcore_list { 85struct kcore_list {
@@ -138,7 +139,6 @@ extern int proc_readdir(struct file *, void *, filldir_t);
138extern struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); 139extern struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
139 140
140extern const struct file_operations proc_kcore_operations; 141extern const struct file_operations proc_kcore_operations;
141extern const struct file_operations proc_kmsg_operations;
142extern const struct file_operations ppc_htab_operations; 142extern const struct file_operations ppc_htab_operations;
143 143
144extern int pid_ns_prepare_proc(struct pid_namespace *ns); 144extern int pid_ns_prepare_proc(struct pid_namespace *ns);
diff --git a/include/linux/profile.h b/include/linux/profile.h
index 05c1cc736937..7e7087239af5 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -8,8 +8,6 @@
8 8
9#include <asm/errno.h> 9#include <asm/errno.h>
10 10
11extern int prof_on __read_mostly;
12
13#define CPU_PROFILING 1 11#define CPU_PROFILING 1
14#define SCHED_PROFILING 2 12#define SCHED_PROFILING 2
15#define SLEEP_PROFILING 3 13#define SLEEP_PROFILING 3
@@ -19,14 +17,31 @@ struct proc_dir_entry;
19struct pt_regs; 17struct pt_regs;
20struct notifier_block; 18struct notifier_block;
21 19
20#if defined(CONFIG_PROFILING) && defined(CONFIG_PROC_FS)
21void create_prof_cpu_mask(struct proc_dir_entry *de);
22#else
23static inline void create_prof_cpu_mask(struct proc_dir_entry *de)
24{
25}
26#endif
27
28enum profile_type {
29 PROFILE_TASK_EXIT,
30 PROFILE_MUNMAP
31};
32
33#ifdef CONFIG_PROFILING
34
35extern int prof_on __read_mostly;
36
22/* init basic kernel profiler */ 37/* init basic kernel profiler */
23void __init profile_init(void); 38void __init profile_init(void);
24void profile_tick(int); 39void profile_tick(int type);
25 40
26/* 41/*
27 * Add multiple profiler hits to a given address: 42 * Add multiple profiler hits to a given address:
28 */ 43 */
29void profile_hits(int, void *ip, unsigned int nr_hits); 44void profile_hits(int type, void *ip, unsigned int nr_hits);
30 45
31/* 46/*
32 * Single profiler hit: 47 * Single profiler hit:
@@ -40,19 +55,6 @@ static inline void profile_hit(int type, void *ip)
40 profile_hits(type, ip, 1); 55 profile_hits(type, ip, 1);
41} 56}
42 57
43#ifdef CONFIG_PROC_FS
44void create_prof_cpu_mask(struct proc_dir_entry *);
45#else
46#define create_prof_cpu_mask(x) do { (void)(x); } while (0)
47#endif
48
49enum profile_type {
50 PROFILE_TASK_EXIT,
51 PROFILE_MUNMAP
52};
53
54#ifdef CONFIG_PROFILING
55
56struct task_struct; 58struct task_struct;
57struct mm_struct; 59struct mm_struct;
58 60
@@ -80,6 +82,28 @@ struct pt_regs;
80 82
81#else 83#else
82 84
85#define prof_on 0
86
87static inline void profile_init(void)
88{
89 return;
90}
91
92static inline void profile_tick(int type)
93{
94 return;
95}
96
97static inline void profile_hits(int type, void *ip, unsigned int nr_hits)
98{
99 return;
100}
101
102static inline void profile_hit(int type, void *ip)
103{
104 return;
105}
106
83static inline int task_handoff_register(struct notifier_block * n) 107static inline int task_handoff_register(struct notifier_block * n)
84{ 108{
85 return -ENOSYS; 109 return -ENOSYS;
diff --git a/include/linux/quota.h b/include/linux/quota.h
index dcddfb200947..376a05048bc5 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -41,9 +41,6 @@
41#define __DQUOT_VERSION__ "dquot_6.5.1" 41#define __DQUOT_VERSION__ "dquot_6.5.1"
42#define __DQUOT_NUM_VERSION__ 6*10000+5*100+1 42#define __DQUOT_NUM_VERSION__ 6*10000+5*100+1
43 43
44typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
45typedef __u64 qsize_t; /* Type in which we store sizes */
46
47/* Size of blocks in which are counted size limits */ 44/* Size of blocks in which are counted size limits */
48#define QUOTABLOCK_BITS 10 45#define QUOTABLOCK_BITS 10
49#define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS) 46#define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
@@ -138,6 +135,10 @@ struct if_dqinfo {
138#define QUOTA_NL_BHARDWARN 4 /* Block hardlimit reached */ 135#define QUOTA_NL_BHARDWARN 4 /* Block hardlimit reached */
139#define QUOTA_NL_BSOFTLONGWARN 5 /* Block grace time expired */ 136#define QUOTA_NL_BSOFTLONGWARN 5 /* Block grace time expired */
140#define QUOTA_NL_BSOFTWARN 6 /* Block softlimit reached */ 137#define QUOTA_NL_BSOFTWARN 6 /* Block softlimit reached */
138#define QUOTA_NL_IHARDBELOW 7 /* Usage got below inode hardlimit */
139#define QUOTA_NL_ISOFTBELOW 8 /* Usage got below inode softlimit */
140#define QUOTA_NL_BHARDBELOW 9 /* Usage got below block hardlimit */
141#define QUOTA_NL_BSOFTBELOW 10 /* Usage got below block softlimit */
141 142
142enum { 143enum {
143 QUOTA_NL_C_UNSPEC, 144 QUOTA_NL_C_UNSPEC,
@@ -172,6 +173,9 @@ enum {
172 173
173#include <asm/atomic.h> 174#include <asm/atomic.h>
174 175
176typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
177typedef __u64 qsize_t; /* Type in which we store sizes */
178
175extern spinlock_t dq_data_lock; 179extern spinlock_t dq_data_lock;
176 180
177/* Maximal numbers of writes for quota operation (insert/delete/update) 181/* Maximal numbers of writes for quota operation (insert/delete/update)
@@ -223,12 +227,10 @@ struct super_block;
223#define DQF_INFO_DIRTY (1 << DQF_INFO_DIRTY_B) /* Is info dirty? */ 227#define DQF_INFO_DIRTY (1 << DQF_INFO_DIRTY_B) /* Is info dirty? */
224 228
225extern void mark_info_dirty(struct super_block *sb, int type); 229extern void mark_info_dirty(struct super_block *sb, int type);
226#define info_dirty(info) test_bit(DQF_INFO_DIRTY_B, &(info)->dqi_flags) 230static inline int info_dirty(struct mem_dqinfo *info)
227#define info_any_dquot_dirty(info) (!list_empty(&(info)->dqi_dirty_list)) 231{
228#define info_any_dirty(info) (info_dirty(info) || info_any_dquot_dirty(info)) 232 return test_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
229 233}
230#define sb_dqopt(sb) (&(sb)->s_dquot)
231#define sb_dqinfo(sb, type) (sb_dqopt(sb)->info+(type))
232 234
233struct dqstats { 235struct dqstats {
234 int lookups; 236 int lookups;
@@ -337,19 +339,6 @@ struct quota_info {
337 struct quota_format_ops *ops[MAXQUOTAS]; /* Operations for each type */ 339 struct quota_format_ops *ops[MAXQUOTAS]; /* Operations for each type */
338}; 340};
339 341
340#define sb_has_quota_enabled(sb, type) ((type)==USRQUOTA ? \
341 (sb_dqopt(sb)->flags & DQUOT_USR_ENABLED) : (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED))
342
343#define sb_any_quota_enabled(sb) (sb_has_quota_enabled(sb, USRQUOTA) | \
344 sb_has_quota_enabled(sb, GRPQUOTA))
345
346#define sb_has_quota_suspended(sb, type) \
347 ((type) == USRQUOTA ? (sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED) : \
348 (sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED))
349
350#define sb_any_quota_suspended(sb) (sb_has_quota_suspended(sb, USRQUOTA) | \
351 sb_has_quota_suspended(sb, GRPQUOTA))
352
353int register_quota_format(struct quota_format_type *fmt); 342int register_quota_format(struct quota_format_type *fmt);
354void unregister_quota_format(struct quota_format_type *fmt); 343void unregister_quota_format(struct quota_format_type *fmt);
355 344
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index f86702053853..742187f7a05c 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -11,42 +11,85 @@
11#define _LINUX_QUOTAOPS_ 11#define _LINUX_QUOTAOPS_
12 12
13#include <linux/smp_lock.h> 13#include <linux/smp_lock.h>
14
15#include <linux/fs.h> 14#include <linux/fs.h>
16 15
16static inline struct quota_info *sb_dqopt(struct super_block *sb)
17{
18 return &sb->s_dquot;
19}
20
17#if defined(CONFIG_QUOTA) 21#if defined(CONFIG_QUOTA)
18 22
19/* 23/*
20 * declaration of quota_function calls in kernel. 24 * declaration of quota_function calls in kernel.
21 */ 25 */
22extern void sync_dquots(struct super_block *sb, int type); 26void sync_dquots(struct super_block *sb, int type);
23 27
24extern int dquot_initialize(struct inode *inode, int type); 28int dquot_initialize(struct inode *inode, int type);
25extern int dquot_drop(struct inode *inode); 29int dquot_drop(struct inode *inode);
26 30
27extern int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc); 31int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
28extern int dquot_alloc_inode(const struct inode *inode, unsigned long number); 32int dquot_alloc_inode(const struct inode *inode, unsigned long number);
29 33
30extern int dquot_free_space(struct inode *inode, qsize_t number); 34int dquot_free_space(struct inode *inode, qsize_t number);
31extern int dquot_free_inode(const struct inode *inode, unsigned long number); 35int dquot_free_inode(const struct inode *inode, unsigned long number);
32 36
33extern int dquot_transfer(struct inode *inode, struct iattr *iattr); 37int dquot_transfer(struct inode *inode, struct iattr *iattr);
34extern int dquot_commit(struct dquot *dquot); 38int dquot_commit(struct dquot *dquot);
35extern int dquot_acquire(struct dquot *dquot); 39int dquot_acquire(struct dquot *dquot);
36extern int dquot_release(struct dquot *dquot); 40int dquot_release(struct dquot *dquot);
37extern int dquot_commit_info(struct super_block *sb, int type); 41int dquot_commit_info(struct super_block *sb, int type);
38extern int dquot_mark_dquot_dirty(struct dquot *dquot); 42int dquot_mark_dquot_dirty(struct dquot *dquot);
39 43
40extern int vfs_quota_on(struct super_block *sb, int type, int format_id, 44int vfs_quota_on(struct super_block *sb, int type, int format_id,
41 char *path, int remount); 45 char *path, int remount);
42extern int vfs_quota_on_mount(struct super_block *sb, char *qf_name, 46int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
43 int format_id, int type); 47 int format_id, int type);
44extern int vfs_quota_off(struct super_block *sb, int type, int remount); 48int vfs_quota_off(struct super_block *sb, int type, int remount);
45extern int vfs_quota_sync(struct super_block *sb, int type); 49int vfs_quota_sync(struct super_block *sb, int type);
46extern int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); 50int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
47extern int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); 51int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
48extern int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di); 52int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
49extern int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di); 53int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
54
55void vfs_dq_drop(struct inode *inode);
56int vfs_dq_transfer(struct inode *inode, struct iattr *iattr);
57int vfs_dq_quota_on_remount(struct super_block *sb);
58
59static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type)
60{
61 return sb_dqopt(sb)->info + type;
62}
63
64/*
65 * Functions for checking status of quota
66 */
67
68static inline int sb_has_quota_enabled(struct super_block *sb, int type)
69{
70 if (type == USRQUOTA)
71 return sb_dqopt(sb)->flags & DQUOT_USR_ENABLED;
72 return sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED;
73}
74
75static inline int sb_any_quota_enabled(struct super_block *sb)
76{
77 return sb_has_quota_enabled(sb, USRQUOTA) ||
78 sb_has_quota_enabled(sb, GRPQUOTA);
79}
80
81static inline int sb_has_quota_suspended(struct super_block *sb, int type)
82{
83 if (type == USRQUOTA)
84 return sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED;
85 return sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED;
86}
87
88static inline int sb_any_quota_suspended(struct super_block *sb)
89{
90 return sb_has_quota_suspended(sb, USRQUOTA) ||
91 sb_has_quota_suspended(sb, GRPQUOTA);
92}
50 93
51/* 94/*
52 * Operations supported for diskquotas. 95 * Operations supported for diskquotas.
@@ -59,38 +102,16 @@ extern struct quotactl_ops vfs_quotactl_ops;
59 102
60/* It is better to call this function outside of any transaction as it might 103/* It is better to call this function outside of any transaction as it might
61 * need a lot of space in journal for dquot structure allocation. */ 104 * need a lot of space in journal for dquot structure allocation. */
62static inline void DQUOT_INIT(struct inode *inode) 105static inline void vfs_dq_init(struct inode *inode)
63{ 106{
64 BUG_ON(!inode->i_sb); 107 BUG_ON(!inode->i_sb);
65 if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) 108 if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode))
66 inode->i_sb->dq_op->initialize(inode, -1); 109 inode->i_sb->dq_op->initialize(inode, -1);
67} 110}
68 111
69/* The same as with DQUOT_INIT */
70static inline void DQUOT_DROP(struct inode *inode)
71{
72 /* Here we can get arbitrary inode from clear_inode() so we have
73 * to be careful. OTOH we don't need locking as quota operations
74 * are allowed to change only at mount time */
75 if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op
76 && inode->i_sb->dq_op->drop) {
77 int cnt;
78 /* Test before calling to rule out calls from proc and such
79 * where we are not allowed to block. Note that this is
80 * actually reliable test even without the lock - the caller
81 * must assure that nobody can come after the DQUOT_DROP and
82 * add quota pointers back anyway */
83 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
84 if (inode->i_dquot[cnt] != NODQUOT)
85 break;
86 if (cnt < MAXQUOTAS)
87 inode->i_sb->dq_op->drop(inode);
88 }
89}
90
91/* The following allocation/freeing/transfer functions *must* be called inside 112/* The following allocation/freeing/transfer functions *must* be called inside
92 * a transaction (deadlocks possible otherwise) */ 113 * a transaction (deadlocks possible otherwise) */
93static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) 114static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
94{ 115{
95 if (sb_any_quota_enabled(inode->i_sb)) { 116 if (sb_any_quota_enabled(inode->i_sb)) {
96 /* Used space is updated in alloc_space() */ 117 /* Used space is updated in alloc_space() */
@@ -102,15 +123,15 @@ static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
102 return 0; 123 return 0;
103} 124}
104 125
105static inline int DQUOT_PREALLOC_SPACE(struct inode *inode, qsize_t nr) 126static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr)
106{ 127{
107 int ret; 128 int ret;
108 if (!(ret = DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr))) 129 if (!(ret = vfs_dq_prealloc_space_nodirty(inode, nr)))
109 mark_inode_dirty(inode); 130 mark_inode_dirty(inode);
110 return ret; 131 return ret;
111} 132}
112 133
113static inline int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) 134static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr)
114{ 135{
115 if (sb_any_quota_enabled(inode->i_sb)) { 136 if (sb_any_quota_enabled(inode->i_sb)) {
116 /* Used space is updated in alloc_space() */ 137 /* Used space is updated in alloc_space() */
@@ -122,25 +143,25 @@ static inline int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
122 return 0; 143 return 0;
123} 144}
124 145
125static inline int DQUOT_ALLOC_SPACE(struct inode *inode, qsize_t nr) 146static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
126{ 147{
127 int ret; 148 int ret;
128 if (!(ret = DQUOT_ALLOC_SPACE_NODIRTY(inode, nr))) 149 if (!(ret = vfs_dq_alloc_space_nodirty(inode, nr)))
129 mark_inode_dirty(inode); 150 mark_inode_dirty(inode);
130 return ret; 151 return ret;
131} 152}
132 153
133static inline int DQUOT_ALLOC_INODE(struct inode *inode) 154static inline int vfs_dq_alloc_inode(struct inode *inode)
134{ 155{
135 if (sb_any_quota_enabled(inode->i_sb)) { 156 if (sb_any_quota_enabled(inode->i_sb)) {
136 DQUOT_INIT(inode); 157 vfs_dq_init(inode);
137 if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) 158 if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA)
138 return 1; 159 return 1;
139 } 160 }
140 return 0; 161 return 0;
141} 162}
142 163
143static inline void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr) 164static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
144{ 165{
145 if (sb_any_quota_enabled(inode->i_sb)) 166 if (sb_any_quota_enabled(inode->i_sb))
146 inode->i_sb->dq_op->free_space(inode, nr); 167 inode->i_sb->dq_op->free_space(inode, nr);
@@ -148,35 +169,25 @@ static inline void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
148 inode_sub_bytes(inode, nr); 169 inode_sub_bytes(inode, nr);
149} 170}
150 171
151static inline void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr) 172static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
152{ 173{
153 DQUOT_FREE_SPACE_NODIRTY(inode, nr); 174 vfs_dq_free_space_nodirty(inode, nr);
154 mark_inode_dirty(inode); 175 mark_inode_dirty(inode);
155} 176}
156 177
157static inline void DQUOT_FREE_INODE(struct inode *inode) 178static inline void vfs_dq_free_inode(struct inode *inode)
158{ 179{
159 if (sb_any_quota_enabled(inode->i_sb)) 180 if (sb_any_quota_enabled(inode->i_sb))
160 inode->i_sb->dq_op->free_inode(inode, 1); 181 inode->i_sb->dq_op->free_inode(inode, 1);
161} 182}
162 183
163static inline int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr)
164{
165 if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
166 DQUOT_INIT(inode);
167 if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
168 return 1;
169 }
170 return 0;
171}
172
173/* The following two functions cannot be called inside a transaction */ 184/* The following two functions cannot be called inside a transaction */
174static inline void DQUOT_SYNC(struct super_block *sb) 185static inline void vfs_dq_sync(struct super_block *sb)
175{ 186{
176 sync_dquots(sb, -1); 187 sync_dquots(sb, -1);
177} 188}
178 189
179static inline int DQUOT_OFF(struct super_block *sb, int remount) 190static inline int vfs_dq_off(struct super_block *sb, int remount)
180{ 191{
181 int ret = -ENOSYS; 192 int ret = -ENOSYS;
182 193
@@ -185,22 +196,27 @@ static inline int DQUOT_OFF(struct super_block *sb, int remount)
185 return ret; 196 return ret;
186} 197}
187 198
188static inline int DQUOT_ON_REMOUNT(struct super_block *sb) 199#else
200
201static inline int sb_has_quota_enabled(struct super_block *sb, int type)
189{ 202{
190 int cnt; 203 return 0;
191 int ret = 0, err; 204}
192 205
193 if (!sb->s_qcop || !sb->s_qcop->quota_on) 206static inline int sb_any_quota_enabled(struct super_block *sb)
194 return -ENOSYS; 207{
195 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 208 return 0;
196 err = sb->s_qcop->quota_on(sb, cnt, 0, NULL, 1);
197 if (err < 0 && !ret)
198 ret = err;
199 }
200 return ret;
201} 209}
202 210
203#else 211static inline int sb_has_quota_suspended(struct super_block *sb, int type)
212{
213 return 0;
214}
215
216static inline int sb_any_quota_suspended(struct super_block *sb)
217{
218 return 0;
219}
204 220
205/* 221/*
206 * NO-OP when quota not configured. 222 * NO-OP when quota not configured.
@@ -208,113 +224,144 @@ static inline int DQUOT_ON_REMOUNT(struct super_block *sb)
208#define sb_dquot_ops (NULL) 224#define sb_dquot_ops (NULL)
209#define sb_quotactl_ops (NULL) 225#define sb_quotactl_ops (NULL)
210 226
211static inline void DQUOT_INIT(struct inode *inode) 227static inline void vfs_dq_init(struct inode *inode)
212{ 228{
213} 229}
214 230
215static inline void DQUOT_DROP(struct inode *inode) 231static inline void vfs_dq_drop(struct inode *inode)
216{ 232{
217} 233}
218 234
219static inline int DQUOT_ALLOC_INODE(struct inode *inode) 235static inline int vfs_dq_alloc_inode(struct inode *inode)
220{ 236{
221 return 0; 237 return 0;
222} 238}
223 239
224static inline void DQUOT_FREE_INODE(struct inode *inode) 240static inline void vfs_dq_free_inode(struct inode *inode)
225{ 241{
226} 242}
227 243
228static inline void DQUOT_SYNC(struct super_block *sb) 244static inline void vfs_dq_sync(struct super_block *sb)
229{ 245{
230} 246}
231 247
232static inline int DQUOT_OFF(struct super_block *sb, int remount) 248static inline int vfs_dq_off(struct super_block *sb, int remount)
233{ 249{
234 return 0; 250 return 0;
235} 251}
236 252
237static inline int DQUOT_ON_REMOUNT(struct super_block *sb) 253static inline int vfs_dq_quota_on_remount(struct super_block *sb)
238{ 254{
239 return 0; 255 return 0;
240} 256}
241 257
242static inline int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr) 258static inline int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
243{ 259{
244 return 0; 260 return 0;
245} 261}
246 262
247static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) 263static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
248{ 264{
249 inode_add_bytes(inode, nr); 265 inode_add_bytes(inode, nr);
250 return 0; 266 return 0;
251} 267}
252 268
253static inline int DQUOT_PREALLOC_SPACE(struct inode *inode, qsize_t nr) 269static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr)
254{ 270{
255 DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr); 271 vfs_dq_prealloc_space_nodirty(inode, nr);
256 mark_inode_dirty(inode); 272 mark_inode_dirty(inode);
257 return 0; 273 return 0;
258} 274}
259 275
260static inline int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) 276static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr)
261{ 277{
262 inode_add_bytes(inode, nr); 278 inode_add_bytes(inode, nr);
263 return 0; 279 return 0;
264} 280}
265 281
266static inline int DQUOT_ALLOC_SPACE(struct inode *inode, qsize_t nr) 282static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
267{ 283{
268 DQUOT_ALLOC_SPACE_NODIRTY(inode, nr); 284 vfs_dq_alloc_space_nodirty(inode, nr);
269 mark_inode_dirty(inode); 285 mark_inode_dirty(inode);
270 return 0; 286 return 0;
271} 287}
272 288
273static inline void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr) 289static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
274{ 290{
275 inode_sub_bytes(inode, nr); 291 inode_sub_bytes(inode, nr);
276} 292}
277 293
278static inline void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr) 294static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
279{ 295{
280 DQUOT_FREE_SPACE_NODIRTY(inode, nr); 296 vfs_dq_free_space_nodirty(inode, nr);
281 mark_inode_dirty(inode); 297 mark_inode_dirty(inode);
282} 298}
283 299
284#endif /* CONFIG_QUOTA */ 300#endif /* CONFIG_QUOTA */
285 301
286static inline int DQUOT_PREALLOC_BLOCK_NODIRTY(struct inode *inode, qsize_t nr) 302static inline int vfs_dq_prealloc_block_nodirty(struct inode *inode, qsize_t nr)
287{ 303{
288 return DQUOT_PREALLOC_SPACE_NODIRTY(inode, 304 return vfs_dq_prealloc_space_nodirty(inode,
289 nr << inode->i_sb->s_blocksize_bits); 305 nr << inode->i_sb->s_blocksize_bits);
290} 306}
291 307
292static inline int DQUOT_PREALLOC_BLOCK(struct inode *inode, qsize_t nr) 308static inline int vfs_dq_prealloc_block(struct inode *inode, qsize_t nr)
293{ 309{
294 return DQUOT_PREALLOC_SPACE(inode, 310 return vfs_dq_prealloc_space(inode,
295 nr << inode->i_sb->s_blocksize_bits); 311 nr << inode->i_sb->s_blocksize_bits);
296} 312}
297 313
298static inline int DQUOT_ALLOC_BLOCK_NODIRTY(struct inode *inode, qsize_t nr) 314static inline int vfs_dq_alloc_block_nodirty(struct inode *inode, qsize_t nr)
299{ 315{
300 return DQUOT_ALLOC_SPACE_NODIRTY(inode, 316 return vfs_dq_alloc_space_nodirty(inode,
301 nr << inode->i_sb->s_blocksize_bits); 317 nr << inode->i_sb->s_blocksize_bits);
302} 318}
303 319
304static inline int DQUOT_ALLOC_BLOCK(struct inode *inode, qsize_t nr) 320static inline int vfs_dq_alloc_block(struct inode *inode, qsize_t nr)
305{ 321{
306 return DQUOT_ALLOC_SPACE(inode, 322 return vfs_dq_alloc_space(inode,
307 nr << inode->i_sb->s_blocksize_bits); 323 nr << inode->i_sb->s_blocksize_bits);
308} 324}
309 325
310static inline void DQUOT_FREE_BLOCK_NODIRTY(struct inode *inode, qsize_t nr) 326static inline void vfs_dq_free_block_nodirty(struct inode *inode, qsize_t nr)
311{ 327{
312 DQUOT_FREE_SPACE_NODIRTY(inode, nr << inode->i_sb->s_blocksize_bits); 328 vfs_dq_free_space_nodirty(inode, nr << inode->i_sb->s_blocksize_bits);
313} 329}
314 330
315static inline void DQUOT_FREE_BLOCK(struct inode *inode, qsize_t nr) 331static inline void vfs_dq_free_block(struct inode *inode, qsize_t nr)
316{ 332{
317 DQUOT_FREE_SPACE(inode, nr << inode->i_sb->s_blocksize_bits); 333 vfs_dq_free_space(inode, nr << inode->i_sb->s_blocksize_bits);
318} 334}
319 335
336/*
337 * Define uppercase equivalents for compatibility with old function names
338 * Can go away when we think all users have been converted (15/04/2008)
339 */
340#define DQUOT_INIT(inode) vfs_dq_init(inode)
341#define DQUOT_DROP(inode) vfs_dq_drop(inode)
342#define DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr) \
343 vfs_dq_prealloc_space_nodirty(inode, nr)
344#define DQUOT_PREALLOC_SPACE(inode, nr) vfs_dq_prealloc_space(inode, nr)
345#define DQUOT_ALLOC_SPACE_NODIRTY(inode, nr) \
346 vfs_dq_alloc_space_nodirty(inode, nr)
347#define DQUOT_ALLOC_SPACE(inode, nr) vfs_dq_alloc_space(inode, nr)
348#define DQUOT_PREALLOC_BLOCK_NODIRTY(inode, nr) \
349 vfs_dq_prealloc_block_nodirty(inode, nr)
350#define DQUOT_PREALLOC_BLOCK(inode, nr) vfs_dq_prealloc_block(inode, nr)
351#define DQUOT_ALLOC_BLOCK_NODIRTY(inode, nr) \
352 vfs_dq_alloc_block_nodirty(inode, nr)
353#define DQUOT_ALLOC_BLOCK(inode, nr) vfs_dq_alloc_block(inode, nr)
354#define DQUOT_ALLOC_INODE(inode) vfs_dq_alloc_inode(inode)
355#define DQUOT_FREE_SPACE_NODIRTY(inode, nr) \
356 vfs_dq_free_space_nodirty(inode, nr)
357#define DQUOT_FREE_SPACE(inode, nr) vfs_dq_free_space(inode, nr)
358#define DQUOT_FREE_BLOCK_NODIRTY(inode, nr) \
359 vfs_dq_free_block_nodirty(inode, nr)
360#define DQUOT_FREE_BLOCK(inode, nr) vfs_dq_free_block(inode, nr)
361#define DQUOT_FREE_INODE(inode) vfs_dq_free_inode(inode)
362#define DQUOT_TRANSFER(inode, iattr) vfs_dq_transfer(inode, iattr)
363#define DQUOT_SYNC(sb) vfs_dq_sync(sb)
364#define DQUOT_OFF(sb, remount) vfs_dq_off(sb, remount)
365#define DQUOT_ON_REMOUNT(sb) vfs_dq_quota_on_remount(sb)
366
320#endif /* _LINUX_QUOTAOPS_ */ 367#endif /* _LINUX_QUOTAOPS_ */
diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h
new file mode 100644
index 000000000000..18a5b9ba9d40
--- /dev/null
+++ b/include/linux/ratelimit.h
@@ -0,0 +1,27 @@
1#ifndef _LINUX_RATELIMIT_H
2#define _LINUX_RATELIMIT_H
3#include <linux/param.h>
4
5#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ)
6#define DEFAULT_RATELIMIT_BURST 10
7
8struct ratelimit_state {
9 int interval;
10 int burst;
11 int printed;
12 int missed;
13 unsigned long begin;
14};
15
16#define DEFINE_RATELIMIT_STATE(name, interval, burst) \
17 struct ratelimit_state name = {interval, burst,}
18
19extern int __ratelimit(struct ratelimit_state *rs);
20
21static inline int ratelimit(void)
22{
23 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
24 DEFAULT_RATELIMIT_BURST);
25 return __ratelimit(&rs);
26}
27#endif
diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h
index f04b64eca636..0967f03b0705 100644
--- a/include/linux/rcupreempt.h
+++ b/include/linux/rcupreempt.h
@@ -115,16 +115,21 @@ DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched);
115 115
116static inline void rcu_enter_nohz(void) 116static inline void rcu_enter_nohz(void)
117{ 117{
118 static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
119
118 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ 120 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
119 __get_cpu_var(rcu_dyntick_sched).dynticks++; 121 __get_cpu_var(rcu_dyntick_sched).dynticks++;
120 WARN_ON(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1); 122 WARN_ON_RATELIMIT(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1, &rs);
121} 123}
122 124
123static inline void rcu_exit_nohz(void) 125static inline void rcu_exit_nohz(void)
124{ 126{
127 static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
128
125 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ 129 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
126 __get_cpu_var(rcu_dyntick_sched).dynticks++; 130 __get_cpu_var(rcu_dyntick_sched).dynticks++;
127 WARN_ON(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1)); 131 WARN_ON_RATELIMIT(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1),
132 &rs);
128} 133}
129 134
130#else /* CONFIG_NO_HZ */ 135#else /* CONFIG_NO_HZ */
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 4aacaeecb56f..e9963af16cda 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -526,8 +526,8 @@ struct item_head {
526** p is the array of __u32, i is the index into the array, v is the value 526** p is the array of __u32, i is the index into the array, v is the value
527** to store there. 527** to store there.
528*/ 528*/
529#define get_block_num(p, i) le32_to_cpu(get_unaligned((p) + (i))) 529#define get_block_num(p, i) get_unaligned_le32((p) + (i))
530#define put_block_num(p, i, v) put_unaligned(cpu_to_le32(v), (p) + (i)) 530#define put_block_num(p, i, v) put_unaligned_le32((v), (p) + (i))
531 531
532// 532//
533// in old version uniqueness field shows key type 533// in old version uniqueness field shows key type
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
index 336ee43ed7d8..315517e8bfa1 100644
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -152,7 +152,7 @@ struct reiserfs_journal_list {
152 atomic_t j_nonzerolen; 152 atomic_t j_nonzerolen;
153 atomic_t j_commit_left; 153 atomic_t j_commit_left;
154 atomic_t j_older_commits_done; /* all commits older than this on disk */ 154 atomic_t j_older_commits_done; /* all commits older than this on disk */
155 struct semaphore j_commit_lock; 155 struct mutex j_commit_mutex;
156 unsigned long j_trans_id; 156 unsigned long j_trans_id;
157 time_t j_timestamp; 157 time_t j_timestamp;
158 struct reiserfs_list_bitmap *j_list_bitmap; 158 struct reiserfs_list_bitmap *j_list_bitmap;
@@ -193,8 +193,8 @@ struct reiserfs_journal {
193 struct buffer_head *j_header_bh; 193 struct buffer_head *j_header_bh;
194 194
195 time_t j_trans_start_time; /* time this transaction started */ 195 time_t j_trans_start_time; /* time this transaction started */
196 struct semaphore j_lock; 196 struct mutex j_mutex;
197 struct semaphore j_flush_sem; 197 struct mutex j_flush_mutex;
198 wait_queue_head_t j_join_wait; /* wait for current transaction to finish before starting new one */ 198 wait_queue_head_t j_join_wait; /* wait for current transaction to finish before starting new one */
199 atomic_t j_jlock; /* lock for j_join_wait */ 199 atomic_t j_jlock; /* lock for j_join_wait */
200 int j_list_bitmap_index; /* number of next list bitmap to use */ 200 int j_list_bitmap_index; /* number of next list bitmap to use */
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 6d9e1fca098c..fdeadd9740dc 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -63,9 +63,14 @@ u64 res_counter_read_u64(struct res_counter *counter, int member);
63ssize_t res_counter_read(struct res_counter *counter, int member, 63ssize_t res_counter_read(struct res_counter *counter, int member,
64 const char __user *buf, size_t nbytes, loff_t *pos, 64 const char __user *buf, size_t nbytes, loff_t *pos,
65 int (*read_strategy)(unsigned long long val, char *s)); 65 int (*read_strategy)(unsigned long long val, char *s));
66ssize_t res_counter_write(struct res_counter *counter, int member, 66
67 const char __user *buf, size_t nbytes, loff_t *pos, 67typedef int (*write_strategy_fn)(const char *buf, unsigned long long *val);
68 int (*write_strategy)(char *buf, unsigned long long *val)); 68
69int res_counter_memparse_write_strategy(const char *buf,
70 unsigned long long *res);
71
72int res_counter_write(struct res_counter *counter, int member,
73 const char *buffer, write_strategy_fn write_strategy);
69 74
70/* 75/*
71 * the field descriptors. one for each member of res_counter 76 * the field descriptors. one for each member of res_counter
@@ -95,8 +100,10 @@ void res_counter_init(struct res_counter *counter);
95 * counter->limit _locked call expects the counter->lock to be taken 100 * counter->limit _locked call expects the counter->lock to be taken
96 */ 101 */
97 102
98int res_counter_charge_locked(struct res_counter *counter, unsigned long val); 103int __must_check res_counter_charge_locked(struct res_counter *counter,
99int res_counter_charge(struct res_counter *counter, unsigned long val); 104 unsigned long val);
105int __must_check res_counter_charge(struct res_counter *counter,
106 unsigned long val);
100 107
101/* 108/*
102 * uncharge - tell that some portion of the resource is released 109 * uncharge - tell that some portion of the resource is released
@@ -151,4 +158,20 @@ static inline void res_counter_reset_failcnt(struct res_counter *cnt)
151 cnt->failcnt = 0; 158 cnt->failcnt = 0;
152 spin_unlock_irqrestore(&cnt->lock, flags); 159 spin_unlock_irqrestore(&cnt->lock, flags);
153} 160}
161
162static inline int res_counter_set_limit(struct res_counter *cnt,
163 unsigned long long limit)
164{
165 unsigned long flags;
166 int ret = -EBUSY;
167
168 spin_lock_irqsave(&cnt->lock, flags);
169 if (cnt->usage < limit) {
170 cnt->limit = limit;
171 ret = 0;
172 }
173 spin_unlock_irqrestore(&cnt->lock, flags);
174 return ret;
175}
176
154#endif 177#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6aca4a16e377..42036ffe6b00 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -506,6 +506,10 @@ struct signal_struct {
506 unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; 506 unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
507 unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; 507 unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
508 unsigned long inblock, oublock, cinblock, coublock; 508 unsigned long inblock, oublock, cinblock, coublock;
509#ifdef CONFIG_TASK_XACCT
510 u64 rchar, wchar, syscr, syscw;
511#endif
512 struct task_io_accounting ioac;
509 513
510 /* 514 /*
511 * Cumulative ns of scheduled CPU time for dead threads in the 515 * Cumulative ns of scheduled CPU time for dead threads in the
@@ -668,6 +672,10 @@ struct task_delay_info {
668 /* io operations performed */ 672 /* io operations performed */
669 u32 swapin_count; /* total count of the number of swapin block */ 673 u32 swapin_count; /* total count of the number of swapin block */
670 /* io operations performed */ 674 /* io operations performed */
675
676 struct timespec freepages_start, freepages_end;
677 u64 freepages_delay; /* wait for memory reclaim */
678 u32 freepages_count; /* total count of memory reclaim */
671}; 679};
672#endif /* CONFIG_TASK_DELAY_ACCT */ 680#endif /* CONFIG_TASK_DELAY_ACCT */
673 681
@@ -1257,7 +1265,7 @@ struct task_struct {
1257#if defined(CONFIG_TASK_XACCT) 1265#if defined(CONFIG_TASK_XACCT)
1258 u64 acct_rss_mem1; /* accumulated rss usage */ 1266 u64 acct_rss_mem1; /* accumulated rss usage */
1259 u64 acct_vm_mem1; /* accumulated virtual memory usage */ 1267 u64 acct_vm_mem1; /* accumulated virtual memory usage */
1260 cputime_t acct_stimexpd;/* stime since last update */ 1268 cputime_t acct_timexpd; /* stime + utime since last update */
1261#endif 1269#endif
1262#ifdef CONFIG_CPUSETS 1270#ifdef CONFIG_CPUSETS
1263 nodemask_t mems_allowed; 1271 nodemask_t mems_allowed;
@@ -1496,7 +1504,7 @@ static inline void put_task_struct(struct task_struct *t)
1496#define PF_KSWAPD 0x00040000 /* I am kswapd */ 1504#define PF_KSWAPD 0x00040000 /* I am kswapd */
1497#define PF_SWAPOFF 0x00080000 /* I am in swapoff */ 1505#define PF_SWAPOFF 0x00080000 /* I am in swapoff */
1498#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ 1506#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */
1499#define PF_BORROWED_MM 0x00200000 /* I am a kthread doing use_mm */ 1507#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
1500#define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ 1508#define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */
1501#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ 1509#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
1502#define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ 1510#define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */
@@ -1715,19 +1723,13 @@ extern struct pid_namespace init_pid_ns;
1715 * finds a task by its pid in the specified namespace 1723 * finds a task by its pid in the specified namespace
1716 * find_task_by_vpid(): 1724 * find_task_by_vpid():
1717 * finds a task by its virtual pid 1725 * finds a task by its virtual pid
1718 * find_task_by_pid():
1719 * finds a task by its global pid
1720 * 1726 *
1721 * see also find_pid() etc in include/linux/pid.h 1727 * see also find_vpid() etc in include/linux/pid.h
1722 */ 1728 */
1723 1729
1724extern struct task_struct *find_task_by_pid_type_ns(int type, int pid, 1730extern struct task_struct *find_task_by_pid_type_ns(int type, int pid,
1725 struct pid_namespace *ns); 1731 struct pid_namespace *ns);
1726 1732
1727static inline struct task_struct *__deprecated find_task_by_pid(pid_t nr)
1728{
1729 return find_task_by_pid_type_ns(PIDTYPE_PID, nr, &init_pid_ns);
1730}
1731extern struct task_struct *find_task_by_vpid(pid_t nr); 1733extern struct task_struct *find_task_by_vpid(pid_t nr);
1732extern struct task_struct *find_task_by_pid_ns(pid_t nr, 1734extern struct task_struct *find_task_by_pid_ns(pid_t nr,
1733 struct pid_namespace *ns); 1735 struct pid_namespace *ns);
@@ -1800,7 +1802,6 @@ extern void force_sig(int, struct task_struct *);
1800extern void force_sig_specific(int, struct task_struct *); 1802extern void force_sig_specific(int, struct task_struct *);
1801extern int send_sig(int, struct task_struct *, int); 1803extern int send_sig(int, struct task_struct *, int);
1802extern void zap_other_threads(struct task_struct *p); 1804extern void zap_other_threads(struct task_struct *p);
1803extern int kill_proc(pid_t, int, int);
1804extern struct sigqueue *sigqueue_alloc(void); 1805extern struct sigqueue *sigqueue_alloc(void);
1805extern void sigqueue_free(struct sigqueue *); 1806extern void sigqueue_free(struct sigqueue *);
1806extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group); 1807extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group);
@@ -2054,9 +2055,6 @@ static inline int signal_pending_state(long state, struct task_struct *p)
2054 if (!signal_pending(p)) 2055 if (!signal_pending(p))
2055 return 0; 2056 return 0;
2056 2057
2057 if (state & (__TASK_STOPPED | __TASK_TRACED))
2058 return 0;
2059
2060 return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); 2058 return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
2061} 2059}
2062 2060
diff --git a/include/linux/sem.h b/include/linux/sem.h
index c8eaad9e4b72..1b191c176bcd 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -78,6 +78,7 @@ struct seminfo {
78 78
79#ifdef __KERNEL__ 79#ifdef __KERNEL__
80#include <asm/atomic.h> 80#include <asm/atomic.h>
81#include <linux/rcupdate.h>
81 82
82struct task_struct; 83struct task_struct;
83 84
@@ -93,23 +94,19 @@ struct sem_array {
93 time_t sem_otime; /* last semop time */ 94 time_t sem_otime; /* last semop time */
94 time_t sem_ctime; /* last change time */ 95 time_t sem_ctime; /* last change time */
95 struct sem *sem_base; /* ptr to first semaphore in array */ 96 struct sem *sem_base; /* ptr to first semaphore in array */
96 struct sem_queue *sem_pending; /* pending operations to be processed */ 97 struct list_head sem_pending; /* pending operations to be processed */
97 struct sem_queue **sem_pending_last; /* last pending operation */ 98 struct list_head list_id; /* undo requests on this array */
98 struct sem_undo *undo; /* undo requests on this array */
99 unsigned long sem_nsems; /* no. of semaphores in array */ 99 unsigned long sem_nsems; /* no. of semaphores in array */
100}; 100};
101 101
102/* One queue for each sleeping process in the system. */ 102/* One queue for each sleeping process in the system. */
103struct sem_queue { 103struct sem_queue {
104 struct sem_queue * next; /* next entry in the queue */ 104 struct list_head list; /* queue of pending operations */
105 struct sem_queue ** prev; /* previous entry in the queue, *(q->prev) == q */ 105 struct task_struct *sleeper; /* this process */
106 struct task_struct* sleeper; /* this process */ 106 struct sem_undo *undo; /* undo structure */
107 struct sem_undo * undo; /* undo structure */
108 int pid; /* process id of requesting process */ 107 int pid; /* process id of requesting process */
109 int status; /* completion status of operation */ 108 int status; /* completion status of operation */
110 struct sem_array * sma; /* semaphore array for operations */ 109 struct sembuf *sops; /* array of pending operations */
111 int id; /* internal sem id */
112 struct sembuf * sops; /* array of pending operations */
113 int nsops; /* number of operations */ 110 int nsops; /* number of operations */
114 int alter; /* does the operation alter the array? */ 111 int alter; /* does the operation alter the array? */
115}; 112};
@@ -118,8 +115,11 @@ struct sem_queue {
118 * when the process exits. 115 * when the process exits.
119 */ 116 */
120struct sem_undo { 117struct sem_undo {
121 struct sem_undo * proc_next; /* next entry on this process */ 118 struct list_head list_proc; /* per-process list: all undos from one process. */
122 struct sem_undo * id_next; /* next entry on this semaphore set */ 119 /* rcu protected */
120 struct rcu_head rcu; /* rcu struct for sem_undo() */
121 struct sem_undo_list *ulp; /* sem_undo_list for the process */
122 struct list_head list_id; /* per semaphore array list: all undos for one array */
123 int semid; /* semaphore set identifier */ 123 int semid; /* semaphore set identifier */
124 short * semadj; /* array of adjustments, one per semaphore */ 124 short * semadj; /* array of adjustments, one per semaphore */
125}; 125};
@@ -128,9 +128,9 @@ struct sem_undo {
128 * that may be shared among all a CLONE_SYSVSEM task group. 128 * that may be shared among all a CLONE_SYSVSEM task group.
129 */ 129 */
130struct sem_undo_list { 130struct sem_undo_list {
131 atomic_t refcnt; 131 atomic_t refcnt;
132 spinlock_t lock; 132 spinlock_t lock;
133 struct sem_undo *proc_list; 133 struct list_head list_proc;
134}; 134};
135 135
136struct sysv_sem { 136struct sysv_sem {
diff --git a/include/linux/sm501.h b/include/linux/sm501.h
index b530fa6a1d34..214f93209b8c 100644
--- a/include/linux/sm501.h
+++ b/include/linux/sm501.h
@@ -46,24 +46,6 @@ extern unsigned long sm501_modify_reg(struct device *dev,
46 unsigned long set, 46 unsigned long set,
47 unsigned long clear); 47 unsigned long clear);
48 48
49/* sm501_gpio_set
50 *
51 * set the state of the given GPIO line
52*/
53
54extern void sm501_gpio_set(struct device *dev,
55 unsigned long gpio,
56 unsigned int to,
57 unsigned int dir);
58
59/* sm501_gpio_get
60 *
61 * get the state of the given GPIO line
62*/
63
64extern unsigned long sm501_gpio_get(struct device *dev,
65 unsigned long gpio);
66
67 49
68/* Platform data definitions */ 50/* Platform data definitions */
69 51
@@ -104,11 +86,19 @@ struct sm501_platdata_fb {
104 struct sm501_platdata_fbsub *fb_pnl; 86 struct sm501_platdata_fbsub *fb_pnl;
105}; 87};
106 88
107/* gpio i2c */ 89/* gpio i2c
90 *
91 * Note, we have to pass in the bus number, as the number used will be
92 * passed to the i2c-gpio driver's platform_device.id, subsequently used
93 * to register the i2c bus.
94*/
108 95
109struct sm501_platdata_gpio_i2c { 96struct sm501_platdata_gpio_i2c {
97 unsigned int bus_num;
110 unsigned int pin_sda; 98 unsigned int pin_sda;
111 unsigned int pin_scl; 99 unsigned int pin_scl;
100 int udelay;
101 int timeout;
112}; 102};
113 103
114/* sm501_initdata 104/* sm501_initdata
@@ -131,6 +121,7 @@ struct sm501_reg_init {
131#define SM501_USE_FBACCEL (1<<6) 121#define SM501_USE_FBACCEL (1<<6)
132#define SM501_USE_AC97 (1<<7) 122#define SM501_USE_AC97 (1<<7)
133#define SM501_USE_I2S (1<<8) 123#define SM501_USE_I2S (1<<8)
124#define SM501_USE_GPIO (1<<9)
134 125
135#define SM501_USE_ALL (0xffffffff) 126#define SM501_USE_ALL (0xffffffff)
136 127
@@ -157,6 +148,8 @@ struct sm501_init_gpio {
157 struct sm501_reg_init gpio_ddr_high; 148 struct sm501_reg_init gpio_ddr_high;
158}; 149};
159 150
151#define SM501_FLAG_SUSPEND_OFF (1<<4)
152
160/* sm501_platdata 153/* sm501_platdata
161 * 154 *
162 * This is passed with the platform device to allow the board 155 * This is passed with the platform device to allow the board
@@ -170,6 +163,12 @@ struct sm501_platdata {
170 struct sm501_init_gpio *init_gpiop; 163 struct sm501_init_gpio *init_gpiop;
171 struct sm501_platdata_fb *fb; 164 struct sm501_platdata_fb *fb;
172 165
166 int flags;
167 int gpio_base;
168
169 int (*get_power)(struct device *dev);
170 int (*set_power)(struct device *dev, unsigned int on);
171
173 struct sm501_platdata_gpio_i2c *gpio_i2c; 172 struct sm501_platdata_gpio_i2c *gpio_i2c;
174 unsigned int gpio_i2c_nr; 173 unsigned int gpio_i2c_nr;
175}; 174};
diff --git a/include/linux/smb_fs.h b/include/linux/smb_fs.h
index 2c5cd55f44ff..923cd8a247b1 100644
--- a/include/linux/smb_fs.h
+++ b/include/linux/smb_fs.h
@@ -43,18 +43,13 @@ static inline struct smb_inode_info *SMB_I(struct inode *inode)
43} 43}
44 44
45/* macro names are short for word, double-word, long value (?) */ 45/* macro names are short for word, double-word, long value (?) */
46#define WVAL(buf,pos) \ 46#define WVAL(buf, pos) (get_unaligned_le16((u8 *)(buf) + (pos)))
47 (le16_to_cpu(get_unaligned((__le16 *)((u8 *)(buf) + (pos))))) 47#define DVAL(buf, pos) (get_unaligned_le32((u8 *)(buf) + (pos)))
48#define DVAL(buf,pos) \ 48#define LVAL(buf, pos) (get_unaligned_le64((u8 *)(buf) + (pos)))
49 (le32_to_cpu(get_unaligned((__le32 *)((u8 *)(buf) + (pos))))) 49
50#define LVAL(buf,pos) \ 50#define WSET(buf, pos, val) put_unaligned_le16((val), (u8 *)(buf) + (pos))
51 (le64_to_cpu(get_unaligned((__le64 *)((u8 *)(buf) + (pos))))) 51#define DSET(buf, pos, val) put_unaligned_le32((val), (u8 *)(buf) + (pos))
52#define WSET(buf,pos,val) \ 52#define LSET(buf, pos, val) put_unaligned_le64((val), (u8 *)(buf) + (pos))
53 put_unaligned(cpu_to_le16((u16)(val)), (__le16 *)((u8 *)(buf) + (pos)))
54#define DSET(buf,pos,val) \
55 put_unaligned(cpu_to_le32((u32)(val)), (__le32 *)((u8 *)(buf) + (pos)))
56#define LSET(buf,pos,val) \
57 put_unaligned(cpu_to_le64((u64)(val)), (__le64 *)((u8 *)(buf) + (pos)))
58 53
59/* where to find the base of the SMB packet proper */ 54/* where to find the base of the SMB packet proper */
60#define smb_base(buf) ((u8 *)(((u8 *)(buf))+4)) 55#define smb_base(buf) ((u8 *)(((u8 *)(buf))+4))
diff --git a/include/linux/spi/mcp23s08.h b/include/linux/spi/mcp23s08.h
index 835ddf47d45c..22ef107d7704 100644
--- a/include/linux/spi/mcp23s08.h
+++ b/include/linux/spi/mcp23s08.h
@@ -1,18 +1,25 @@
1 1
2/* FIXME driver should be able to handle all four slaves that 2/* FIXME driver should be able to handle IRQs... */
3 * can be hooked up to each chipselect, as well as IRQs... 3
4 */ 4struct mcp23s08_chip_info {
5 bool is_present; /* true iff populated */
6 u8 pullups; /* BIT(x) means enable pullup x */
7};
5 8
6struct mcp23s08_platform_data { 9struct mcp23s08_platform_data {
7 /* four slaves can share one SPI chipselect */ 10 /* Four slaves (numbered 0..3) can share one SPI chipselect, and
8 u8 slave; 11 * will provide 8..32 GPIOs using 1..4 gpio_chip instances.
12 */
13 struct mcp23s08_chip_info chip[4];
9 14
10 /* number assigned to the first GPIO */ 15 /* "base" is the number of the first GPIO. Dynamic assignment is
16 * not currently supported, and even if there are gaps in chip
17 * addressing the GPIO numbers are sequential .. so for example
18 * if only slaves 0 and 3 are present, their GPIOs range from
19 * base to base+15.
20 */
11 unsigned base; 21 unsigned base;
12 22
13 /* pins with pullups */
14 u8 pullups;
15
16 void *context; /* param to setup/teardown */ 23 void *context; /* param to setup/teardown */
17 24
18 int (*setup)(struct spi_device *spi, 25 int (*setup)(struct spi_device *spi,
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index d311a090fae7..61e5610ad165 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -46,6 +46,7 @@
46 * linux/spinlock.h: builds the final spin_*() APIs. 46 * linux/spinlock.h: builds the final spin_*() APIs.
47 */ 47 */
48 48
49#include <linux/typecheck.h>
49#include <linux/preempt.h> 50#include <linux/preempt.h>
50#include <linux/linkage.h> 51#include <linux/linkage.h>
51#include <linux/compiler.h> 52#include <linux/compiler.h>
@@ -191,23 +192,53 @@ do { \
191 192
192#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) 193#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
193 194
194#define spin_lock_irqsave(lock, flags) flags = _spin_lock_irqsave(lock) 195#define spin_lock_irqsave(lock, flags) \
195#define read_lock_irqsave(lock, flags) flags = _read_lock_irqsave(lock) 196 do { \
196#define write_lock_irqsave(lock, flags) flags = _write_lock_irqsave(lock) 197 typecheck(unsigned long, flags); \
198 flags = _spin_lock_irqsave(lock); \
199 } while (0)
200#define read_lock_irqsave(lock, flags) \
201 do { \
202 typecheck(unsigned long, flags); \
203 flags = _read_lock_irqsave(lock); \
204 } while (0)
205#define write_lock_irqsave(lock, flags) \
206 do { \
207 typecheck(unsigned long, flags); \
208 flags = _write_lock_irqsave(lock); \
209 } while (0)
197 210
198#ifdef CONFIG_DEBUG_LOCK_ALLOC 211#ifdef CONFIG_DEBUG_LOCK_ALLOC
199#define spin_lock_irqsave_nested(lock, flags, subclass) \ 212#define spin_lock_irqsave_nested(lock, flags, subclass) \
200 flags = _spin_lock_irqsave_nested(lock, subclass) 213 do { \
214 typecheck(unsigned long, flags); \
215 flags = _spin_lock_irqsave_nested(lock, subclass); \
216 } while (0)
201#else 217#else
202#define spin_lock_irqsave_nested(lock, flags, subclass) \ 218#define spin_lock_irqsave_nested(lock, flags, subclass) \
203 flags = _spin_lock_irqsave(lock) 219 do { \
220 typecheck(unsigned long, flags); \
221 flags = _spin_lock_irqsave(lock); \
222 } while (0)
204#endif 223#endif
205 224
206#else 225#else
207 226
208#define spin_lock_irqsave(lock, flags) _spin_lock_irqsave(lock, flags) 227#define spin_lock_irqsave(lock, flags) \
209#define read_lock_irqsave(lock, flags) _read_lock_irqsave(lock, flags) 228 do { \
210#define write_lock_irqsave(lock, flags) _write_lock_irqsave(lock, flags) 229 typecheck(unsigned long, flags); \
230 _spin_lock_irqsave(lock, flags); \
231 } while (0)
232#define read_lock_irqsave(lock, flags) \
233 do { \
234 typecheck(unsigned long, flags); \
235 _read_lock_irqsave(lock, flags); \
236 } while (0)
237#define write_lock_irqsave(lock, flags) \
238 do { \
239 typecheck(unsigned long, flags); \
240 _write_lock_irqsave(lock, flags); \
241 } while (0)
211#define spin_lock_irqsave_nested(lock, flags, subclass) \ 242#define spin_lock_irqsave_nested(lock, flags, subclass) \
212 spin_lock_irqsave(lock, flags) 243 spin_lock_irqsave(lock, flags)
213 244
@@ -260,16 +291,25 @@ do { \
260} while (0) 291} while (0)
261#endif 292#endif
262 293
263#define spin_unlock_irqrestore(lock, flags) \ 294#define spin_unlock_irqrestore(lock, flags) \
264 _spin_unlock_irqrestore(lock, flags) 295 do { \
296 typecheck(unsigned long, flags); \
297 _spin_unlock_irqrestore(lock, flags); \
298 } while (0)
265#define spin_unlock_bh(lock) _spin_unlock_bh(lock) 299#define spin_unlock_bh(lock) _spin_unlock_bh(lock)
266 300
267#define read_unlock_irqrestore(lock, flags) \ 301#define read_unlock_irqrestore(lock, flags) \
268 _read_unlock_irqrestore(lock, flags) 302 do { \
303 typecheck(unsigned long, flags); \
304 _read_unlock_irqrestore(lock, flags); \
305 } while (0)
269#define read_unlock_bh(lock) _read_unlock_bh(lock) 306#define read_unlock_bh(lock) _read_unlock_bh(lock)
270 307
271#define write_unlock_irqrestore(lock, flags) \ 308#define write_unlock_irqrestore(lock, flags) \
272 _write_unlock_irqrestore(lock, flags) 309 do { \
310 typecheck(unsigned long, flags); \
311 _write_unlock_irqrestore(lock, flags); \
312 } while (0)
273#define write_unlock_bh(lock) _write_unlock_bh(lock) 313#define write_unlock_bh(lock) _write_unlock_bh(lock)
274 314
275#define spin_trylock_bh(lock) __cond_lock(lock, _spin_trylock_bh(lock)) 315#define spin_trylock_bh(lock) __cond_lock(lock, _spin_trylock_bh(lock))
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
index 5d69c0744fff..18269e956a71 100644
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -31,7 +31,7 @@
31 */ 31 */
32 32
33 33
34#define TASKSTATS_VERSION 6 34#define TASKSTATS_VERSION 7
35#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN 35#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN
36 * in linux/sched.h */ 36 * in linux/sched.h */
37 37
@@ -157,6 +157,10 @@ struct taskstats {
157 __u64 ac_utimescaled; /* utime scaled on frequency etc */ 157 __u64 ac_utimescaled; /* utime scaled on frequency etc */
158 __u64 ac_stimescaled; /* stime scaled on frequency etc */ 158 __u64 ac_stimescaled; /* stime scaled on frequency etc */
159 __u64 cpu_scaled_run_real_total; /* scaled cpu_run_real_total */ 159 __u64 cpu_scaled_run_real_total; /* scaled cpu_run_real_total */
160
161 /* Delay waiting for memory reclaim */
162 __u64 freepages_count;
163 __u64 freepages_delay_total;
160}; 164};
161 165
162 166
diff --git a/include/linux/typecheck.h b/include/linux/typecheck.h
new file mode 100644
index 000000000000..eb5b74a575be
--- /dev/null
+++ b/include/linux/typecheck.h
@@ -0,0 +1,24 @@
1#ifndef TYPECHECK_H_INCLUDED
2#define TYPECHECK_H_INCLUDED
3
4/*
5 * Check at compile time that something is of a particular type.
6 * Always evaluates to 1 so you may use it easily in comparisons.
7 */
8#define typecheck(type,x) \
9({ type __dummy; \
10 typeof(x) __dummy2; \
11 (void)(&__dummy == &__dummy2); \
12 1; \
13})
14
15/*
16 * Check at compile time that 'function' is a certain type, or is a pointer
17 * to that type (needs to use typedef for the function type.)
18 */
19#define typecheck_fn(type,function) \
20({ typeof(type) __tmp = function; \
21 (void)__tmp; \
22})
23
24#endif /* TYPECHECK_H_INCLUDED */
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 747c3a49cdc9..c932390c6da0 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -330,7 +330,7 @@ extern int usb_string_id(struct usb_composite_dev *c);
330 dev_vdbg(&(d)->gadget->dev , fmt , ## args) 330 dev_vdbg(&(d)->gadget->dev , fmt , ## args)
331#define ERROR(d, fmt, args...) \ 331#define ERROR(d, fmt, args...) \
332 dev_err(&(d)->gadget->dev , fmt , ## args) 332 dev_err(&(d)->gadget->dev , fmt , ## args)
333#define WARN(d, fmt, args...) \ 333#define WARNING(d, fmt, args...) \
334 dev_warn(&(d)->gadget->dev , fmt , ## args) 334 dev_warn(&(d)->gadget->dev , fmt , ## args)
335#define INFO(d, fmt, args...) \ 335#define INFO(d, fmt, args...) \
336 dev_info(&(d)->gadget->dev , fmt , ## args) 336 dev_info(&(d)->gadget->dev , fmt , ## args)
diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h
index 8eff0b53910b..b3c4a60ceeb3 100644
--- a/include/linux/virtio_9p.h
+++ b/include/linux/virtio_9p.h
@@ -1,5 +1,7 @@
1#ifndef _LINUX_VIRTIO_9P_H 1#ifndef _LINUX_VIRTIO_9P_H
2#define _LINUX_VIRTIO_9P_H 2#define _LINUX_VIRTIO_9P_H
3/* This header is BSD licensed so anyone can use the definitions to implement
4 * compatible drivers/servers. */
3#include <linux/virtio_config.h> 5#include <linux/virtio_config.h>
4 6
5/* The ID for virtio console */ 7/* The ID for virtio console */
diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h
index 979524ee75b7..c30c7bfbf39b 100644
--- a/include/linux/virtio_balloon.h
+++ b/include/linux/virtio_balloon.h
@@ -1,5 +1,7 @@
1#ifndef _LINUX_VIRTIO_BALLOON_H 1#ifndef _LINUX_VIRTIO_BALLOON_H
2#define _LINUX_VIRTIO_BALLOON_H 2#define _LINUX_VIRTIO_BALLOON_H
3/* This header is BSD licensed so anyone can use the definitions to implement
4 * compatible drivers/servers. */
3#include <linux/virtio_config.h> 5#include <linux/virtio_config.h>
4 6
5/* The ID for virtio_balloon */ 7/* The ID for virtio_balloon */
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index 5f79a5f9de79..c1aef85243bf 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -1,5 +1,7 @@
1#ifndef _LINUX_VIRTIO_BLK_H 1#ifndef _LINUX_VIRTIO_BLK_H
2#define _LINUX_VIRTIO_BLK_H 2#define _LINUX_VIRTIO_BLK_H
3/* This header is BSD licensed so anyone can use the definitions to implement
4 * compatible drivers/servers. */
3#include <linux/virtio_config.h> 5#include <linux/virtio_config.h>
4 6
5/* The ID for virtio_block */ 7/* The ID for virtio_block */
@@ -11,6 +13,7 @@
11#define VIRTIO_BLK_F_SEG_MAX 2 /* Indicates maximum # of segments */ 13#define VIRTIO_BLK_F_SEG_MAX 2 /* Indicates maximum # of segments */
12#define VIRTIO_BLK_F_GEOMETRY 4 /* Legacy geometry available */ 14#define VIRTIO_BLK_F_GEOMETRY 4 /* Legacy geometry available */
13#define VIRTIO_BLK_F_RO 5 /* Disk is read-only */ 15#define VIRTIO_BLK_F_RO 5 /* Disk is read-only */
16#define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/
14 17
15struct virtio_blk_config 18struct virtio_blk_config
16{ 19{
@@ -26,6 +29,8 @@ struct virtio_blk_config
26 __u8 heads; 29 __u8 heads;
27 __u8 sectors; 30 __u8 sectors;
28 } geometry; 31 } geometry;
32 /* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */
33 __u32 blk_size;
29} __attribute__((packed)); 34} __attribute__((packed));
30 35
31/* These two define direction. */ 36/* These two define direction. */
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index f364bbf63c34..bf8ec283b232 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -1,5 +1,8 @@
1#ifndef _LINUX_VIRTIO_CONFIG_H 1#ifndef _LINUX_VIRTIO_CONFIG_H
2#define _LINUX_VIRTIO_CONFIG_H 2#define _LINUX_VIRTIO_CONFIG_H
3/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
4 * anyone can use the definitions to implement compatible drivers/servers. */
5
3/* Virtio devices use a standardized configuration space to define their 6/* Virtio devices use a standardized configuration space to define their
4 * features and pass configuration information, but each implementation can 7 * features and pass configuration information, but each implementation can
5 * store and access that space differently. */ 8 * store and access that space differently. */
@@ -15,6 +18,12 @@
15/* We've given up on this device. */ 18/* We've given up on this device. */
16#define VIRTIO_CONFIG_S_FAILED 0x80 19#define VIRTIO_CONFIG_S_FAILED 0x80
17 20
21/* Some virtio feature bits (currently bits 28 through 31) are reserved for the
22 * transport being used (eg. virtio_ring), the rest are per-device feature
23 * bits. */
24#define VIRTIO_TRANSPORT_F_START 28
25#define VIRTIO_TRANSPORT_F_END 32
26
18/* Do we get callbacks when the ring is completely used, even if we've 27/* Do we get callbacks when the ring is completely used, even if we've
19 * suppressed them? */ 28 * suppressed them? */
20#define VIRTIO_F_NOTIFY_ON_EMPTY 24 29#define VIRTIO_F_NOTIFY_ON_EMPTY 24
@@ -52,9 +61,10 @@
52 * @get_features: get the array of feature bits for this device. 61 * @get_features: get the array of feature bits for this device.
53 * vdev: the virtio_device 62 * vdev: the virtio_device
54 * Returns the first 32 feature bits (all we currently need). 63 * Returns the first 32 feature bits (all we currently need).
55 * @set_features: confirm what device features we'll be using. 64 * @finalize_features: confirm what device features we'll be using.
56 * vdev: the virtio_device 65 * vdev: the virtio_device
57 * feature: the first 32 feature bits 66 * This gives the final feature bits for the device: it can change
67 * the dev->feature bits if it wants.
58 */ 68 */
59struct virtio_config_ops 69struct virtio_config_ops
60{ 70{
@@ -70,7 +80,7 @@ struct virtio_config_ops
70 void (*callback)(struct virtqueue *)); 80 void (*callback)(struct virtqueue *));
71 void (*del_vq)(struct virtqueue *vq); 81 void (*del_vq)(struct virtqueue *vq);
72 u32 (*get_features)(struct virtio_device *vdev); 82 u32 (*get_features)(struct virtio_device *vdev);
73 void (*set_features)(struct virtio_device *vdev, u32 features); 83 void (*finalize_features)(struct virtio_device *vdev);
74}; 84};
75 85
76/* If driver didn't advertise the feature, it will never appear. */ 86/* If driver didn't advertise the feature, it will never appear. */
diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h
index ed2d4ead7eb7..19a0da0dba41 100644
--- a/include/linux/virtio_console.h
+++ b/include/linux/virtio_console.h
@@ -1,6 +1,8 @@
1#ifndef _LINUX_VIRTIO_CONSOLE_H 1#ifndef _LINUX_VIRTIO_CONSOLE_H
2#define _LINUX_VIRTIO_CONSOLE_H 2#define _LINUX_VIRTIO_CONSOLE_H
3#include <linux/virtio_config.h> 3#include <linux/virtio_config.h>
4/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
5 * anyone can use the definitions to implement compatible drivers/servers. */
4 6
5/* The ID for virtio console */ 7/* The ID for virtio console */
6#define VIRTIO_ID_CONSOLE 3 8#define VIRTIO_ID_CONSOLE 3
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 38c0571820fb..5e33761b9b8a 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -1,5 +1,7 @@
1#ifndef _LINUX_VIRTIO_NET_H 1#ifndef _LINUX_VIRTIO_NET_H
2#define _LINUX_VIRTIO_NET_H 2#define _LINUX_VIRTIO_NET_H
3/* This header is BSD licensed so anyone can use the definitions to implement
4 * compatible drivers/servers. */
3#include <linux/virtio_config.h> 5#include <linux/virtio_config.h>
4 6
5/* The ID for virtio_net */ 7/* The ID for virtio_net */
diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h
index b3151659cf49..cdef35742932 100644
--- a/include/linux/virtio_pci.h
+++ b/include/linux/virtio_pci.h
@@ -9,9 +9,8 @@
9 * Authors: 9 * Authors:
10 * Anthony Liguori <aliguori@us.ibm.com> 10 * Anthony Liguori <aliguori@us.ibm.com>
11 * 11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later. 12 * This header is BSD licensed so anyone can use the definitions to implement
13 * See the COPYING file in the top-level directory. 13 * compatible drivers/servers.
14 *
15 */ 14 */
16 15
17#ifndef _LINUX_VIRTIO_PCI_H 16#ifndef _LINUX_VIRTIO_PCI_H
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index abe481ed990e..c4a598fb3826 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -120,6 +120,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
120 void (*notify)(struct virtqueue *vq), 120 void (*notify)(struct virtqueue *vq),
121 void (*callback)(struct virtqueue *vq)); 121 void (*callback)(struct virtqueue *vq));
122void vring_del_virtqueue(struct virtqueue *vq); 122void vring_del_virtqueue(struct virtqueue *vq);
123/* Filter out transport-specific feature bits. */
124void vring_transport_features(struct virtio_device *vdev);
123 125
124irqreturn_t vring_interrupt(int irq, void *_vq); 126irqreturn_t vring_interrupt(int irq, void *_vq);
125#endif /* __KERNEL__ */ 127#endif /* __KERNEL__ */
diff --git a/include/linux/virtio_rng.h b/include/linux/virtio_rng.h
index 331afb6c9f62..1a85dab8a940 100644
--- a/include/linux/virtio_rng.h
+++ b/include/linux/virtio_rng.h
@@ -1,5 +1,7 @@
1#ifndef _LINUX_VIRTIO_RNG_H 1#ifndef _LINUX_VIRTIO_RNG_H
2#define _LINUX_VIRTIO_RNG_H 2#define _LINUX_VIRTIO_RNG_H
3/* This header is BSD licensed so anyone can use the definitions to implement
4 * compatible drivers/servers. */
3#include <linux/virtio_config.h> 5#include <linux/virtio_config.h>
4 6
5/* The ID for virtio_rng */ 7/* The ID for virtio_rng */
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 14d47120682b..5c158c477ac7 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -201,6 +201,8 @@ extern int keventd_up(void);
201extern void init_workqueues(void); 201extern void init_workqueues(void);
202int execute_in_process_context(work_func_t fn, struct execute_work *); 202int execute_in_process_context(work_func_t fn, struct execute_work *);
203 203
204extern int flush_work(struct work_struct *work);
205
204extern int cancel_work_sync(struct work_struct *work); 206extern int cancel_work_sync(struct work_struct *work);
205 207
206/* 208/*
diff --git a/include/mtd/ubi-user.h b/include/mtd/ubi-user.h
index a7421f130cc0..ccdc562e444e 100644
--- a/include/mtd/ubi-user.h
+++ b/include/mtd/ubi-user.h
@@ -58,6 +58,13 @@
58 * device should be used. A &struct ubi_rsvol_req object has to be properly 58 * device should be used. A &struct ubi_rsvol_req object has to be properly
59 * filled and a pointer to it has to be passed to the IOCTL. 59 * filled and a pointer to it has to be passed to the IOCTL.
60 * 60 *
61 * UBI volumes re-name
62 * ~~~~~~~~~~~~~~~~~~~
63 *
64 * To re-name several volumes atomically at one go, the %UBI_IOCRNVOL command
65 * of the UBI character device should be used. A &struct ubi_rnvol_req object
66 * has to be properly filled and a pointer to it has to be passed to the IOCTL.
67 *
61 * UBI volume update 68 * UBI volume update
62 * ~~~~~~~~~~~~~~~~~ 69 * ~~~~~~~~~~~~~~~~~
63 * 70 *
@@ -104,6 +111,8 @@
104#define UBI_IOCRMVOL _IOW(UBI_IOC_MAGIC, 1, int32_t) 111#define UBI_IOCRMVOL _IOW(UBI_IOC_MAGIC, 1, int32_t)
105/* Re-size an UBI volume */ 112/* Re-size an UBI volume */
106#define UBI_IOCRSVOL _IOW(UBI_IOC_MAGIC, 2, struct ubi_rsvol_req) 113#define UBI_IOCRSVOL _IOW(UBI_IOC_MAGIC, 2, struct ubi_rsvol_req)
114/* Re-name volumes */
115#define UBI_IOCRNVOL _IOW(UBI_IOC_MAGIC, 3, struct ubi_rnvol_req)
107 116
108/* IOCTL commands of the UBI control character device */ 117/* IOCTL commands of the UBI control character device */
109 118
@@ -128,6 +137,9 @@
128/* Maximum MTD device name length supported by UBI */ 137/* Maximum MTD device name length supported by UBI */
129#define MAX_UBI_MTD_NAME_LEN 127 138#define MAX_UBI_MTD_NAME_LEN 127
130 139
140/* Maximum amount of UBI volumes that can be re-named at one go */
141#define UBI_MAX_RNVOL 32
142
131/* 143/*
132 * UBI data type hint constants. 144 * UBI data type hint constants.
133 * 145 *
@@ -176,20 +188,20 @@ enum {
176 * it will be 512 in case of a 2KiB page NAND flash with 4 512-byte sub-pages. 188 * it will be 512 in case of a 2KiB page NAND flash with 4 512-byte sub-pages.
177 * 189 *
178 * But in rare cases, if this optimizes things, the VID header may be placed to 190 * But in rare cases, if this optimizes things, the VID header may be placed to
179 * a different offset. For example, the boot-loader might do things faster if the 191 * a different offset. For example, the boot-loader might do things faster if
180 * VID header sits at the end of the first 2KiB NAND page with 4 sub-pages. As 192 * the VID header sits at the end of the first 2KiB NAND page with 4 sub-pages.
181 * the boot-loader would not normally need to read EC headers (unless it needs 193 * As the boot-loader would not normally need to read EC headers (unless it
182 * UBI in RW mode), it might be faster to calculate ECC. This is weird example, 194 * needs UBI in RW mode), it might be faster to calculate ECC. This is weird
183 * but it real-life example. So, in this example, @vid_hdr_offer would be 195 * example, but it real-life example. So, in this example, @vid_hdr_offer would
184 * 2KiB-64 bytes = 1984. Note, that this position is not even 512-bytes 196 * be 2KiB-64 bytes = 1984. Note, that this position is not even 512-bytes
185 * aligned, which is OK, as UBI is clever enough to realize this is 4th sub-page 197 * aligned, which is OK, as UBI is clever enough to realize this is 4th
186 * of the first page and add needed padding. 198 * sub-page of the first page and add needed padding.
187 */ 199 */
188struct ubi_attach_req { 200struct ubi_attach_req {
189 int32_t ubi_num; 201 int32_t ubi_num;
190 int32_t mtd_num; 202 int32_t mtd_num;
191 int32_t vid_hdr_offset; 203 int32_t vid_hdr_offset;
192 uint8_t padding[12]; 204 int8_t padding[12];
193}; 205};
194 206
195/** 207/**
@@ -251,6 +263,48 @@ struct ubi_rsvol_req {
251} __attribute__ ((packed)); 263} __attribute__ ((packed));
252 264
253/** 265/**
266 * struct ubi_rnvol_req - volumes re-name request.
267 * @count: count of volumes to re-name
268 * @padding1: reserved for future, not used, has to be zeroed
269 * @vol_id: ID of the volume to re-name
270 * @name_len: name length
271 * @padding2: reserved for future, not used, has to be zeroed
272 * @name: new volume name
273 *
274 * UBI allows to re-name up to %32 volumes at one go. The count of volumes to
275 * re-name is specified in the @count field. The ID of the volumes to re-name
276 * and the new names are specified in the @vol_id and @name fields.
277 *
278 * The UBI volume re-name operation is atomic, which means that should power cut
279 * happen, the volumes will have either old name or new name. So the possible
280 * use-cases of this command is atomic upgrade. Indeed, to upgrade, say, volumes
281 * A and B one may create temporary volumes %A1 and %B1 with the new contents,
282 * then atomically re-name A1->A and B1->B, in which case old %A and %B will
283 * be removed.
284 *
285 * If it is not desirable to remove old A and B, the re-name request has to
286 * contain 4 entries: A1->A, A->A1, B1->B, B->B1, in which case old A1 and B1
287 * become A and B, and old A and B will become A1 and B1.
288 *
289 * It is also OK to request: A1->A, A1->X, B1->B, B->Y, in which case old A1
290 * and B1 become A and B, and old A and B become X and Y.
291 *
292 * In other words, in case of re-naming into an existing volume name, the
293 * existing volume is removed, unless it is re-named as well at the same
294 * re-name request.
295 */
296struct ubi_rnvol_req {
297 int32_t count;
298 int8_t padding1[12];
299 struct {
300 int32_t vol_id;
301 int16_t name_len;
302 int8_t padding2[2];
303 char name[UBI_MAX_VOLUME_NAME + 1];
304 } ents[UBI_MAX_RNVOL];
305} __attribute__ ((packed));
306
307/**
254 * struct ubi_leb_change_req - a data structure used in atomic logical 308 * struct ubi_leb_change_req - a data structure used in atomic logical
255 * eraseblock change requests. 309 * eraseblock change requests.
256 * @lnum: logical eraseblock number to change 310 * @lnum: logical eraseblock number to change
@@ -261,8 +315,8 @@ struct ubi_rsvol_req {
261struct ubi_leb_change_req { 315struct ubi_leb_change_req {
262 int32_t lnum; 316 int32_t lnum;
263 int32_t bytes; 317 int32_t bytes;
264 uint8_t dtype; 318 int8_t dtype;
265 uint8_t padding[7]; 319 int8_t padding[7];
266} __attribute__ ((packed)); 320} __attribute__ ((packed));
267 321
268#endif /* __UBI_USER_H__ */ 322#endif /* __UBI_USER_H__ */
diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h
index dfd8bf66ce27..d364fd594ea4 100644
--- a/include/net/ieee80211_radiotap.h
+++ b/include/net/ieee80211_radiotap.h
@@ -262,7 +262,7 @@ static inline int ieee80211_get_radiotap_len(unsigned char *data)
262 struct ieee80211_radiotap_header *hdr = 262 struct ieee80211_radiotap_header *hdr =
263 (struct ieee80211_radiotap_header *)data; 263 (struct ieee80211_radiotap_header *)data;
264 264
265 return le16_to_cpu(get_unaligned(&hdr->it_len)); 265 return get_unaligned_le16(&hdr->it_len);
266} 266}
267 267
268#endif /* IEEE80211_RADIOTAP_H */ 268#endif /* IEEE80211_RADIOTAP_H */
diff --git a/init/do_mounts.c b/init/do_mounts.c
index a1de1bf3d6b9..f769fac4f4c0 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -12,6 +12,7 @@
12#include <linux/device.h> 12#include <linux/device.h>
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/fs.h> 14#include <linux/fs.h>
15#include <linux/initrd.h>
15 16
16#include <linux/nfs_fs.h> 17#include <linux/nfs_fs.h>
17#include <linux/nfs_fs_sb.h> 18#include <linux/nfs_fs_sb.h>
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index 46dfd64ae8fb..fedef93b586f 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -10,8 +10,6 @@
10 10
11#include "do_mounts.h" 11#include "do_mounts.h"
12 12
13#define BUILD_CRAMDISK
14
15int __initdata rd_prompt = 1;/* 1 = prompt for RAM disk, 0 = don't prompt */ 13int __initdata rd_prompt = 1;/* 1 = prompt for RAM disk, 0 = don't prompt */
16 14
17static int __init prompt_ramdisk(char *str) 15static int __init prompt_ramdisk(char *str)
@@ -162,14 +160,8 @@ int __init rd_load_image(char *from)
162 goto done; 160 goto done;
163 161
164 if (nblocks == 0) { 162 if (nblocks == 0) {
165#ifdef BUILD_CRAMDISK
166 if (crd_load(in_fd, out_fd) == 0) 163 if (crd_load(in_fd, out_fd) == 0)
167 goto successful_load; 164 goto successful_load;
168#else
169 printk(KERN_NOTICE
170 "RAMDISK: Kernel does not support compressed "
171 "RAM disk images\n");
172#endif
173 goto done; 165 goto done;
174 } 166 }
175 167
@@ -267,8 +259,6 @@ int __init rd_load_disk(int n)
267 return rd_load_image("/dev/root"); 259 return rd_load_image("/dev/root");
268} 260}
269 261
270#ifdef BUILD_CRAMDISK
271
272/* 262/*
273 * gzip declarations 263 * gzip declarations
274 */ 264 */
@@ -313,32 +303,11 @@ static int crd_infd, crd_outfd;
313 303
314static int __init fill_inbuf(void); 304static int __init fill_inbuf(void);
315static void __init flush_window(void); 305static void __init flush_window(void);
316static void __init *malloc(size_t size);
317static void __init free(void *where);
318static void __init error(char *m); 306static void __init error(char *m);
319static void __init gzip_mark(void **);
320static void __init gzip_release(void **);
321
322#include "../lib/inflate.c"
323 307
324static void __init *malloc(size_t size) 308#define NO_INFLATE_MALLOC
325{
326 return kmalloc(size, GFP_KERNEL);
327}
328
329static void __init free(void *where)
330{
331 kfree(where);
332}
333
334static void __init gzip_mark(void **ptr)
335{
336}
337
338static void __init gzip_release(void **ptr)
339{
340}
341 309
310#include "../lib/inflate.c"
342 311
343/* =========================================================================== 312/* ===========================================================================
344 * Fill the input buffer. This is called only when the buffer is empty 313 * Fill the input buffer. This is called only when the buffer is empty
@@ -425,5 +394,3 @@ static int __init crd_load(int in_fd, int out_fd)
425 kfree(window); 394 kfree(window);
426 return result; 395 return result;
427} 396}
428
429#endif /* BUILD_CRAMDISK */
diff --git a/init/initramfs.c b/init/initramfs.c
index 8eeeccb328c9..644fc01ad5f0 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -14,16 +14,6 @@ static void __init error(char *x)
14 message = x; 14 message = x;
15} 15}
16 16
17static void __init *malloc(size_t size)
18{
19 return kmalloc(size, GFP_KERNEL);
20}
21
22static void __init free(void *where)
23{
24 kfree(where);
25}
26
27/* link hash */ 17/* link hash */
28 18
29#define N_ALIGN(len) ((((len) + 1) & ~3) + 2) 19#define N_ALIGN(len) ((((len) + 1) & ~3) + 2)
@@ -407,18 +397,10 @@ static long bytes_out;
407 397
408static void __init flush_window(void); 398static void __init flush_window(void);
409static void __init error(char *m); 399static void __init error(char *m);
410static void __init gzip_mark(void **);
411static void __init gzip_release(void **);
412 400
413#include "../lib/inflate.c" 401#define NO_INFLATE_MALLOC
414 402
415static void __init gzip_mark(void **ptr) 403#include "../lib/inflate.c"
416{
417}
418
419static void __init gzip_release(void **ptr)
420{
421}
422 404
423/* =========================================================================== 405/* ===========================================================================
424 * Write the output window window[0..outcnt-1] and update crc and bytes_out. 406 * Write the output window window[0..outcnt-1] and update crc and bytes_out.
diff --git a/init/main.c b/init/main.c
index 2769dc031c62..0604cbcaf1e4 100644
--- a/init/main.c
+++ b/init/main.c
@@ -87,8 +87,6 @@ extern void init_IRQ(void);
87extern void fork_init(unsigned long); 87extern void fork_init(unsigned long);
88extern void mca_init(void); 88extern void mca_init(void);
89extern void sbus_init(void); 89extern void sbus_init(void);
90extern void pidhash_init(void);
91extern void pidmap_init(void);
92extern void prio_tree_init(void); 90extern void prio_tree_init(void);
93extern void radix_tree_init(void); 91extern void radix_tree_init(void);
94extern void free_initmem(void); 92extern void free_initmem(void);
diff --git a/init/version.c b/init/version.c
index 9d17d70ee02d..52a8b98642b8 100644
--- a/init/version.c
+++ b/init/version.c
@@ -13,10 +13,13 @@
13#include <linux/utsrelease.h> 13#include <linux/utsrelease.h>
14#include <linux/version.h> 14#include <linux/version.h>
15 15
16#ifndef CONFIG_KALLSYMS
16#define version(a) Version_ ## a 17#define version(a) Version_ ## a
17#define version_string(a) version(a) 18#define version_string(a) version(a)
18 19
20extern int version_string(LINUX_VERSION_CODE);
19int version_string(LINUX_VERSION_CODE); 21int version_string(LINUX_VERSION_CODE);
22#endif
20 23
21struct uts_namespace init_uts_ns = { 24struct uts_namespace init_uts_ns = {
22 .kref = { 25 .kref = {
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index d3497465cc0a..69bc85978ba0 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -27,15 +27,17 @@ static void *get_ipc(ctl_table *table)
27} 27}
28 28
29/* 29/*
30 * Routine that is called when a tunable has successfully been changed by 30 * Routine that is called when the file "auto_msgmni" has successfully been
31 * hand and it has a callback routine registered on the ipc namespace notifier 31 * written.
32 * chain: we don't want such tunables to be recomputed anymore upon memory 32 * Two values are allowed:
33 * add/remove or ipc namespace creation/removal. 33 * 0: unregister msgmni's callback routine from the ipc namespace notifier
34 * They can come back to a recomputable state by being set to a <0 value. 34 * chain. This means that msgmni won't be recomputed anymore upon memory
35 * add/remove or ipc namespace creation/removal.
36 * 1: register back the callback routine.
35 */ 37 */
36static void tunable_set_callback(int val) 38static void ipc_auto_callback(int val)
37{ 39{
38 if (val >= 0) 40 if (!val)
39 unregister_ipcns_notifier(current->nsproxy->ipc_ns); 41 unregister_ipcns_notifier(current->nsproxy->ipc_ns);
40 else { 42 else {
41 /* 43 /*
@@ -71,7 +73,12 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write,
71 rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos); 73 rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos);
72 74
73 if (write && !rc && lenp_bef == *lenp) 75 if (write && !rc && lenp_bef == *lenp)
74 tunable_set_callback(*((int *)(ipc_table.data))); 76 /*
77 * Tunable has successfully been changed by hand. Disable its
78 * automatic adjustment. This simply requires unregistering
79 * the notifiers that trigger recalculation.
80 */
81 unregister_ipcns_notifier(current->nsproxy->ipc_ns);
75 82
76 return rc; 83 return rc;
77} 84}
@@ -87,10 +94,39 @@ static int proc_ipc_doulongvec_minmax(ctl_table *table, int write,
87 lenp, ppos); 94 lenp, ppos);
88} 95}
89 96
97static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
98 struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
99{
100 struct ctl_table ipc_table;
101 size_t lenp_bef = *lenp;
102 int oldval;
103 int rc;
104
105 memcpy(&ipc_table, table, sizeof(ipc_table));
106 ipc_table.data = get_ipc(table);
107 oldval = *((int *)(ipc_table.data));
108
109 rc = proc_dointvec_minmax(&ipc_table, write, filp, buffer, lenp, ppos);
110
111 if (write && !rc && lenp_bef == *lenp) {
112 int newval = *((int *)(ipc_table.data));
113 /*
114 * The file "auto_msgmni" has correctly been set.
115 * React by (un)registering the corresponding tunable, if the
116 * value has changed.
117 */
118 if (newval != oldval)
119 ipc_auto_callback(newval);
120 }
121
122 return rc;
123}
124
90#else 125#else
91#define proc_ipc_doulongvec_minmax NULL 126#define proc_ipc_doulongvec_minmax NULL
92#define proc_ipc_dointvec NULL 127#define proc_ipc_dointvec NULL
93#define proc_ipc_callback_dointvec NULL 128#define proc_ipc_callback_dointvec NULL
129#define proc_ipcauto_dointvec_minmax NULL
94#endif 130#endif
95 131
96#ifdef CONFIG_SYSCTL_SYSCALL 132#ifdef CONFIG_SYSCTL_SYSCALL
@@ -142,14 +178,11 @@ static int sysctl_ipc_registered_data(ctl_table *table, int __user *name,
142 rc = sysctl_ipc_data(table, name, nlen, oldval, oldlenp, newval, 178 rc = sysctl_ipc_data(table, name, nlen, oldval, oldlenp, newval,
143 newlen); 179 newlen);
144 180
145 if (newval && newlen && rc > 0) { 181 if (newval && newlen && rc > 0)
146 /* 182 /*
147 * Tunable has successfully been changed from userland 183 * Tunable has successfully been changed from userland
148 */ 184 */
149 int *data = get_ipc(table); 185 unregister_ipcns_notifier(current->nsproxy->ipc_ns);
150
151 tunable_set_callback(*data);
152 }
153 186
154 return rc; 187 return rc;
155} 188}
@@ -158,6 +191,9 @@ static int sysctl_ipc_registered_data(ctl_table *table, int __user *name,
158#define sysctl_ipc_registered_data NULL 191#define sysctl_ipc_registered_data NULL
159#endif 192#endif
160 193
194static int zero;
195static int one = 1;
196
161static struct ctl_table ipc_kern_table[] = { 197static struct ctl_table ipc_kern_table[] = {
162 { 198 {
163 .ctl_name = KERN_SHMMAX, 199 .ctl_name = KERN_SHMMAX,
@@ -222,6 +258,16 @@ static struct ctl_table ipc_kern_table[] = {
222 .proc_handler = proc_ipc_dointvec, 258 .proc_handler = proc_ipc_dointvec,
223 .strategy = sysctl_ipc_data, 259 .strategy = sysctl_ipc_data,
224 }, 260 },
261 {
262 .ctl_name = CTL_UNNUMBERED,
263 .procname = "auto_msgmni",
264 .data = &init_ipc_ns.auto_msgmni,
265 .maxlen = sizeof(int),
266 .mode = 0644,
267 .proc_handler = proc_ipcauto_dointvec_minmax,
268 .extra1 = &zero,
269 .extra2 = &one,
270 },
225 {} 271 {}
226}; 272};
227 273
diff --git a/ipc/ipcns_notifier.c b/ipc/ipcns_notifier.c
index 70ff09183f7b..b9b31a4f77e1 100644
--- a/ipc/ipcns_notifier.c
+++ b/ipc/ipcns_notifier.c
@@ -55,25 +55,35 @@ static int ipcns_callback(struct notifier_block *self,
55 55
56int register_ipcns_notifier(struct ipc_namespace *ns) 56int register_ipcns_notifier(struct ipc_namespace *ns)
57{ 57{
58 int rc;
59
58 memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb)); 60 memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb));
59 ns->ipcns_nb.notifier_call = ipcns_callback; 61 ns->ipcns_nb.notifier_call = ipcns_callback;
60 ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI; 62 ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI;
61 return blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb); 63 rc = blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb);
64 if (!rc)
65 ns->auto_msgmni = 1;
66 return rc;
62} 67}
63 68
64int cond_register_ipcns_notifier(struct ipc_namespace *ns) 69int cond_register_ipcns_notifier(struct ipc_namespace *ns)
65{ 70{
71 int rc;
72
66 memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb)); 73 memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb));
67 ns->ipcns_nb.notifier_call = ipcns_callback; 74 ns->ipcns_nb.notifier_call = ipcns_callback;
68 ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI; 75 ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI;
69 return blocking_notifier_chain_cond_register(&ipcns_chain, 76 rc = blocking_notifier_chain_cond_register(&ipcns_chain,
70 &ns->ipcns_nb); 77 &ns->ipcns_nb);
78 if (!rc)
79 ns->auto_msgmni = 1;
80 return rc;
71} 81}
72 82
73int unregister_ipcns_notifier(struct ipc_namespace *ns) 83void unregister_ipcns_notifier(struct ipc_namespace *ns)
74{ 84{
75 return blocking_notifier_chain_unregister(&ipcns_chain, 85 blocking_notifier_chain_unregister(&ipcns_chain, &ns->ipcns_nb);
76 &ns->ipcns_nb); 86 ns->auto_msgmni = 0;
77} 87}
78 88
79int ipcns_notify(unsigned long val) 89int ipcns_notify(unsigned long val)
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 3e84b958186b..1fdc2eb2f6d8 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -314,15 +314,11 @@ static int mqueue_unlink(struct inode *dir, struct dentry *dentry)
314* through std routines) 314* through std routines)
315*/ 315*/
316static ssize_t mqueue_read_file(struct file *filp, char __user *u_data, 316static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
317 size_t count, loff_t * off) 317 size_t count, loff_t *off)
318{ 318{
319 struct mqueue_inode_info *info = MQUEUE_I(filp->f_path.dentry->d_inode); 319 struct mqueue_inode_info *info = MQUEUE_I(filp->f_path.dentry->d_inode);
320 char buffer[FILENT_SIZE]; 320 char buffer[FILENT_SIZE];
321 size_t slen; 321 ssize_t ret;
322 loff_t o;
323
324 if (!count)
325 return 0;
326 322
327 spin_lock(&info->lock); 323 spin_lock(&info->lock);
328 snprintf(buffer, sizeof(buffer), 324 snprintf(buffer, sizeof(buffer),
@@ -335,21 +331,14 @@ static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
335 pid_vnr(info->notify_owner)); 331 pid_vnr(info->notify_owner));
336 spin_unlock(&info->lock); 332 spin_unlock(&info->lock);
337 buffer[sizeof(buffer)-1] = '\0'; 333 buffer[sizeof(buffer)-1] = '\0';
338 slen = strlen(buffer)+1;
339
340 o = *off;
341 if (o > slen)
342 return 0;
343
344 if (o + count > slen)
345 count = slen - o;
346 334
347 if (copy_to_user(u_data, buffer + o, count)) 335 ret = simple_read_from_buffer(u_data, count, off, buffer,
348 return -EFAULT; 336 strlen(buffer));
337 if (ret <= 0)
338 return ret;
349 339
350 *off = o + count;
351 filp->f_path.dentry->d_inode->i_atime = filp->f_path.dentry->d_inode->i_ctime = CURRENT_TIME; 340 filp->f_path.dentry->d_inode->i_atime = filp->f_path.dentry->d_inode->i_ctime = CURRENT_TIME;
352 return count; 341 return ret;
353} 342}
354 343
355static int mqueue_flush_file(struct file *filp, fl_owner_t id) 344static int mqueue_flush_file(struct file *filp, fl_owner_t id)
diff --git a/ipc/sem.c b/ipc/sem.c
index e9418df5ff3e..bf1bc36cb7ee 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -272,9 +272,8 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
272 ns->used_sems += nsems; 272 ns->used_sems += nsems;
273 273
274 sma->sem_base = (struct sem *) &sma[1]; 274 sma->sem_base = (struct sem *) &sma[1];
275 /* sma->sem_pending = NULL; */ 275 INIT_LIST_HEAD(&sma->sem_pending);
276 sma->sem_pending_last = &sma->sem_pending; 276 INIT_LIST_HEAD(&sma->list_id);
277 /* sma->undo = NULL; */
278 sma->sem_nsems = nsems; 277 sma->sem_nsems = nsems;
279 sma->sem_ctime = get_seconds(); 278 sma->sem_ctime = get_seconds();
280 sem_unlock(sma); 279 sem_unlock(sma);
@@ -331,38 +330,6 @@ asmlinkage long sys_semget(key_t key, int nsems, int semflg)
331 return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); 330 return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
332} 331}
333 332
334/* Manage the doubly linked list sma->sem_pending as a FIFO:
335 * insert new queue elements at the tail sma->sem_pending_last.
336 */
337static inline void append_to_queue (struct sem_array * sma,
338 struct sem_queue * q)
339{
340 *(q->prev = sma->sem_pending_last) = q;
341 *(sma->sem_pending_last = &q->next) = NULL;
342}
343
344static inline void prepend_to_queue (struct sem_array * sma,
345 struct sem_queue * q)
346{
347 q->next = sma->sem_pending;
348 *(q->prev = &sma->sem_pending) = q;
349 if (q->next)
350 q->next->prev = &q->next;
351 else /* sma->sem_pending_last == &sma->sem_pending */
352 sma->sem_pending_last = &q->next;
353}
354
355static inline void remove_from_queue (struct sem_array * sma,
356 struct sem_queue * q)
357{
358 *(q->prev) = q->next;
359 if (q->next)
360 q->next->prev = q->prev;
361 else /* sma->sem_pending_last == &q->next */
362 sma->sem_pending_last = q->prev;
363 q->prev = NULL; /* mark as removed */
364}
365
366/* 333/*
367 * Determine whether a sequence of semaphore operations would succeed 334 * Determine whether a sequence of semaphore operations would succeed
368 * all at once. Return 0 if yes, 1 if need to sleep, else return error code. 335 * all at once. Return 0 if yes, 1 if need to sleep, else return error code.
@@ -438,16 +405,15 @@ static void update_queue (struct sem_array * sma)
438 int error; 405 int error;
439 struct sem_queue * q; 406 struct sem_queue * q;
440 407
441 q = sma->sem_pending; 408 q = list_entry(sma->sem_pending.next, struct sem_queue, list);
442 while(q) { 409 while (&q->list != &sma->sem_pending) {
443 error = try_atomic_semop(sma, q->sops, q->nsops, 410 error = try_atomic_semop(sma, q->sops, q->nsops,
444 q->undo, q->pid); 411 q->undo, q->pid);
445 412
446 /* Does q->sleeper still need to sleep? */ 413 /* Does q->sleeper still need to sleep? */
447 if (error <= 0) { 414 if (error <= 0) {
448 struct sem_queue *n; 415 struct sem_queue *n;
449 remove_from_queue(sma,q); 416
450 q->status = IN_WAKEUP;
451 /* 417 /*
452 * Continue scanning. The next operation 418 * Continue scanning. The next operation
453 * that must be checked depends on the type of the 419 * that must be checked depends on the type of the
@@ -458,11 +424,26 @@ static void update_queue (struct sem_array * sma)
458 * for semaphore values to become 0. 424 * for semaphore values to become 0.
459 * - if the operation didn't modify the array, 425 * - if the operation didn't modify the array,
460 * then just continue. 426 * then just continue.
427 * The order of list_del() and reading ->next
428 * is crucial: In the former case, the list_del()
429 * must be done first [because we might be the
430 * first entry in ->sem_pending], in the latter
431 * case the list_del() must be done last
432 * [because the list is invalid after the list_del()]
461 */ 433 */
462 if (q->alter) 434 if (q->alter) {
463 n = sma->sem_pending; 435 list_del(&q->list);
464 else 436 n = list_entry(sma->sem_pending.next,
465 n = q->next; 437 struct sem_queue, list);
438 } else {
439 n = list_entry(q->list.next, struct sem_queue,
440 list);
441 list_del(&q->list);
442 }
443
444 /* wake up the waiting thread */
445 q->status = IN_WAKEUP;
446
466 wake_up_process(q->sleeper); 447 wake_up_process(q->sleeper);
467 /* hands-off: q will disappear immediately after 448 /* hands-off: q will disappear immediately after
468 * writing q->status. 449 * writing q->status.
@@ -471,7 +452,7 @@ static void update_queue (struct sem_array * sma)
471 q->status = error; 452 q->status = error;
472 q = n; 453 q = n;
473 } else { 454 } else {
474 q = q->next; 455 q = list_entry(q->list.next, struct sem_queue, list);
475 } 456 }
476 } 457 }
477} 458}
@@ -491,7 +472,7 @@ static int count_semncnt (struct sem_array * sma, ushort semnum)
491 struct sem_queue * q; 472 struct sem_queue * q;
492 473
493 semncnt = 0; 474 semncnt = 0;
494 for (q = sma->sem_pending; q; q = q->next) { 475 list_for_each_entry(q, &sma->sem_pending, list) {
495 struct sembuf * sops = q->sops; 476 struct sembuf * sops = q->sops;
496 int nsops = q->nsops; 477 int nsops = q->nsops;
497 int i; 478 int i;
@@ -503,13 +484,14 @@ static int count_semncnt (struct sem_array * sma, ushort semnum)
503 } 484 }
504 return semncnt; 485 return semncnt;
505} 486}
487
506static int count_semzcnt (struct sem_array * sma, ushort semnum) 488static int count_semzcnt (struct sem_array * sma, ushort semnum)
507{ 489{
508 int semzcnt; 490 int semzcnt;
509 struct sem_queue * q; 491 struct sem_queue * q;
510 492
511 semzcnt = 0; 493 semzcnt = 0;
512 for (q = sma->sem_pending; q; q = q->next) { 494 list_for_each_entry(q, &sma->sem_pending, list) {
513 struct sembuf * sops = q->sops; 495 struct sembuf * sops = q->sops;
514 int nsops = q->nsops; 496 int nsops = q->nsops;
515 int i; 497 int i;
@@ -522,35 +504,41 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum)
522 return semzcnt; 504 return semzcnt;
523} 505}
524 506
507void free_un(struct rcu_head *head)
508{
509 struct sem_undo *un = container_of(head, struct sem_undo, rcu);
510 kfree(un);
511}
512
525/* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked 513/* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked
526 * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex 514 * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex
527 * remains locked on exit. 515 * remains locked on exit.
528 */ 516 */
529static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 517static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
530{ 518{
531 struct sem_undo *un; 519 struct sem_undo *un, *tu;
532 struct sem_queue *q; 520 struct sem_queue *q, *tq;
533 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); 521 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
534 522
535 /* Invalidate the existing undo structures for this semaphore set. 523 /* Free the existing undo structures for this semaphore set. */
536 * (They will be freed without any further action in exit_sem() 524 assert_spin_locked(&sma->sem_perm.lock);
537 * or during the next semop.) 525 list_for_each_entry_safe(un, tu, &sma->list_id, list_id) {
538 */ 526 list_del(&un->list_id);
539 for (un = sma->undo; un; un = un->id_next) 527 spin_lock(&un->ulp->lock);
540 un->semid = -1; 528 un->semid = -1;
529 list_del_rcu(&un->list_proc);
530 spin_unlock(&un->ulp->lock);
531 call_rcu(&un->rcu, free_un);
532 }
541 533
542 /* Wake up all pending processes and let them fail with EIDRM. */ 534 /* Wake up all pending processes and let them fail with EIDRM. */
543 q = sma->sem_pending; 535 list_for_each_entry_safe(q, tq, &sma->sem_pending, list) {
544 while(q) { 536 list_del(&q->list);
545 struct sem_queue *n; 537
546 /* lazy remove_from_queue: we are killing the whole queue */
547 q->prev = NULL;
548 n = q->next;
549 q->status = IN_WAKEUP; 538 q->status = IN_WAKEUP;
550 wake_up_process(q->sleeper); /* doesn't sleep */ 539 wake_up_process(q->sleeper); /* doesn't sleep */
551 smp_wmb(); 540 smp_wmb();
552 q->status = -EIDRM; /* hands-off q */ 541 q->status = -EIDRM; /* hands-off q */
553 q = n;
554 } 542 }
555 543
556 /* Remove the semaphore set from the IDR */ 544 /* Remove the semaphore set from the IDR */
@@ -763,9 +751,12 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
763 751
764 for (i = 0; i < nsems; i++) 752 for (i = 0; i < nsems; i++)
765 sma->sem_base[i].semval = sem_io[i]; 753 sma->sem_base[i].semval = sem_io[i];
766 for (un = sma->undo; un; un = un->id_next) 754
755 assert_spin_locked(&sma->sem_perm.lock);
756 list_for_each_entry(un, &sma->list_id, list_id) {
767 for (i = 0; i < nsems; i++) 757 for (i = 0; i < nsems; i++)
768 un->semadj[i] = 0; 758 un->semadj[i] = 0;
759 }
769 sma->sem_ctime = get_seconds(); 760 sma->sem_ctime = get_seconds();
770 /* maybe some queued-up processes were waiting for this */ 761 /* maybe some queued-up processes were waiting for this */
771 update_queue(sma); 762 update_queue(sma);
@@ -797,12 +788,15 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
797 { 788 {
798 int val = arg.val; 789 int val = arg.val;
799 struct sem_undo *un; 790 struct sem_undo *un;
791
800 err = -ERANGE; 792 err = -ERANGE;
801 if (val > SEMVMX || val < 0) 793 if (val > SEMVMX || val < 0)
802 goto out_unlock; 794 goto out_unlock;
803 795
804 for (un = sma->undo; un; un = un->id_next) 796 assert_spin_locked(&sma->sem_perm.lock);
797 list_for_each_entry(un, &sma->list_id, list_id)
805 un->semadj[semnum] = 0; 798 un->semadj[semnum] = 0;
799
806 curr->semval = val; 800 curr->semval = val;
807 curr->sempid = task_tgid_vnr(current); 801 curr->sempid = task_tgid_vnr(current);
808 sma->sem_ctime = get_seconds(); 802 sma->sem_ctime = get_seconds();
@@ -952,6 +946,8 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp)
952 return -ENOMEM; 946 return -ENOMEM;
953 spin_lock_init(&undo_list->lock); 947 spin_lock_init(&undo_list->lock);
954 atomic_set(&undo_list->refcnt, 1); 948 atomic_set(&undo_list->refcnt, 1);
949 INIT_LIST_HEAD(&undo_list->list_proc);
950
955 current->sysvsem.undo_list = undo_list; 951 current->sysvsem.undo_list = undo_list;
956 } 952 }
957 *undo_listp = undo_list; 953 *undo_listp = undo_list;
@@ -960,25 +956,27 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp)
960 956
961static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) 957static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
962{ 958{
963 struct sem_undo **last, *un; 959 struct sem_undo *walk;
964 960
965 last = &ulp->proc_list; 961 list_for_each_entry_rcu(walk, &ulp->list_proc, list_proc) {
966 un = *last; 962 if (walk->semid == semid)
967 while(un != NULL) { 963 return walk;
968 if(un->semid==semid)
969 break;
970 if(un->semid==-1) {
971 *last=un->proc_next;
972 kfree(un);
973 } else {
974 last=&un->proc_next;
975 }
976 un=*last;
977 } 964 }
978 return un; 965 return NULL;
979} 966}
980 967
981static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid) 968/**
969 * find_alloc_undo - Lookup (and if not present create) undo array
970 * @ns: namespace
971 * @semid: semaphore array id
972 *
973 * The function looks up (and if not present creates) the undo structure.
974 * The size of the undo structure depends on the size of the semaphore
975 * array, thus the alloc path is not that straightforward.
976 * Lifetime-rules: sem_undo is rcu-protected, on success, the function
977 * performs a rcu_read_lock().
978 */
979static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
982{ 980{
983 struct sem_array *sma; 981 struct sem_array *sma;
984 struct sem_undo_list *ulp; 982 struct sem_undo_list *ulp;
@@ -990,13 +988,16 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
990 if (error) 988 if (error)
991 return ERR_PTR(error); 989 return ERR_PTR(error);
992 990
991 rcu_read_lock();
993 spin_lock(&ulp->lock); 992 spin_lock(&ulp->lock);
994 un = lookup_undo(ulp, semid); 993 un = lookup_undo(ulp, semid);
995 spin_unlock(&ulp->lock); 994 spin_unlock(&ulp->lock);
996 if (likely(un!=NULL)) 995 if (likely(un!=NULL))
997 goto out; 996 goto out;
997 rcu_read_unlock();
998 998
999 /* no undo structure around - allocate one. */ 999 /* no undo structure around - allocate one. */
1000 /* step 1: figure out the size of the semaphore array */
1000 sma = sem_lock_check(ns, semid); 1001 sma = sem_lock_check(ns, semid);
1001 if (IS_ERR(sma)) 1002 if (IS_ERR(sma))
1002 return ERR_PTR(PTR_ERR(sma)); 1003 return ERR_PTR(PTR_ERR(sma));
@@ -1004,37 +1005,45 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
1004 nsems = sma->sem_nsems; 1005 nsems = sma->sem_nsems;
1005 sem_getref_and_unlock(sma); 1006 sem_getref_and_unlock(sma);
1006 1007
1008 /* step 2: allocate new undo structure */
1007 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); 1009 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
1008 if (!new) { 1010 if (!new) {
1009 sem_putref(sma); 1011 sem_putref(sma);
1010 return ERR_PTR(-ENOMEM); 1012 return ERR_PTR(-ENOMEM);
1011 } 1013 }
1012 new->semadj = (short *) &new[1];
1013 new->semid = semid;
1014 1014
1015 spin_lock(&ulp->lock); 1015 /* step 3: Acquire the lock on semaphore array */
1016 un = lookup_undo(ulp, semid);
1017 if (un) {
1018 spin_unlock(&ulp->lock);
1019 kfree(new);
1020 sem_putref(sma);
1021 goto out;
1022 }
1023 sem_lock_and_putref(sma); 1016 sem_lock_and_putref(sma);
1024 if (sma->sem_perm.deleted) { 1017 if (sma->sem_perm.deleted) {
1025 sem_unlock(sma); 1018 sem_unlock(sma);
1026 spin_unlock(&ulp->lock);
1027 kfree(new); 1019 kfree(new);
1028 un = ERR_PTR(-EIDRM); 1020 un = ERR_PTR(-EIDRM);
1029 goto out; 1021 goto out;
1030 } 1022 }
1031 new->proc_next = ulp->proc_list; 1023 spin_lock(&ulp->lock);
1032 ulp->proc_list = new; 1024
1033 new->id_next = sma->undo; 1025 /*
1034 sma->undo = new; 1026 * step 4: check for races: did someone else allocate the undo struct?
1035 sem_unlock(sma); 1027 */
1028 un = lookup_undo(ulp, semid);
1029 if (un) {
1030 kfree(new);
1031 goto success;
1032 }
1033 /* step 5: initialize & link new undo structure */
1034 new->semadj = (short *) &new[1];
1035 new->ulp = ulp;
1036 new->semid = semid;
1037 assert_spin_locked(&ulp->lock);
1038 list_add_rcu(&new->list_proc, &ulp->list_proc);
1039 assert_spin_locked(&sma->sem_perm.lock);
1040 list_add(&new->list_id, &sma->list_id);
1036 un = new; 1041 un = new;
1042
1043success:
1037 spin_unlock(&ulp->lock); 1044 spin_unlock(&ulp->lock);
1045 rcu_read_lock();
1046 sem_unlock(sma);
1038out: 1047out:
1039 return un; 1048 return un;
1040} 1049}
@@ -1090,9 +1099,8 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
1090 alter = 1; 1099 alter = 1;
1091 } 1100 }
1092 1101
1093retry_undos:
1094 if (undos) { 1102 if (undos) {
1095 un = find_undo(ns, semid); 1103 un = find_alloc_undo(ns, semid);
1096 if (IS_ERR(un)) { 1104 if (IS_ERR(un)) {
1097 error = PTR_ERR(un); 1105 error = PTR_ERR(un);
1098 goto out_free; 1106 goto out_free;
@@ -1102,19 +1110,37 @@ retry_undos:
1102 1110
1103 sma = sem_lock_check(ns, semid); 1111 sma = sem_lock_check(ns, semid);
1104 if (IS_ERR(sma)) { 1112 if (IS_ERR(sma)) {
1113 if (un)
1114 rcu_read_unlock();
1105 error = PTR_ERR(sma); 1115 error = PTR_ERR(sma);
1106 goto out_free; 1116 goto out_free;
1107 } 1117 }
1108 1118
1109 /* 1119 /*
1110 * semid identifiers are not unique - find_undo may have 1120 * semid identifiers are not unique - find_alloc_undo may have
1111 * allocated an undo structure, it was invalidated by an RMID 1121 * allocated an undo structure, it was invalidated by an RMID
1112 * and now a new array with received the same id. Check and retry. 1122 * and now a new array with received the same id. Check and fail.
1123 * This case can be detected checking un->semid. The existance of
1124 * "un" itself is guaranteed by rcu.
1113 */ 1125 */
1114 if (un && un->semid == -1) { 1126 error = -EIDRM;
1115 sem_unlock(sma); 1127 if (un) {
1116 goto retry_undos; 1128 if (un->semid == -1) {
1129 rcu_read_unlock();
1130 goto out_unlock_free;
1131 } else {
1132 /*
1133 * rcu lock can be released, "un" cannot disappear:
1134 * - sem_lock is acquired, thus IPC_RMID is
1135 * impossible.
1136 * - exit_sem is impossible, it always operates on
1137 * current (or a dead task).
1138 */
1139
1140 rcu_read_unlock();
1141 }
1117 } 1142 }
1143
1118 error = -EFBIG; 1144 error = -EFBIG;
1119 if (max >= sma->sem_nsems) 1145 if (max >= sma->sem_nsems)
1120 goto out_unlock_free; 1146 goto out_unlock_free;
@@ -1138,17 +1164,15 @@ retry_undos:
1138 * task into the pending queue and go to sleep. 1164 * task into the pending queue and go to sleep.
1139 */ 1165 */
1140 1166
1141 queue.sma = sma;
1142 queue.sops = sops; 1167 queue.sops = sops;
1143 queue.nsops = nsops; 1168 queue.nsops = nsops;
1144 queue.undo = un; 1169 queue.undo = un;
1145 queue.pid = task_tgid_vnr(current); 1170 queue.pid = task_tgid_vnr(current);
1146 queue.id = semid;
1147 queue.alter = alter; 1171 queue.alter = alter;
1148 if (alter) 1172 if (alter)
1149 append_to_queue(sma ,&queue); 1173 list_add_tail(&queue.list, &sma->sem_pending);
1150 else 1174 else
1151 prepend_to_queue(sma ,&queue); 1175 list_add(&queue.list, &sma->sem_pending);
1152 1176
1153 queue.status = -EINTR; 1177 queue.status = -EINTR;
1154 queue.sleeper = current; 1178 queue.sleeper = current;
@@ -1174,7 +1198,6 @@ retry_undos:
1174 1198
1175 sma = sem_lock(ns, semid); 1199 sma = sem_lock(ns, semid);
1176 if (IS_ERR(sma)) { 1200 if (IS_ERR(sma)) {
1177 BUG_ON(queue.prev != NULL);
1178 error = -EIDRM; 1201 error = -EIDRM;
1179 goto out_free; 1202 goto out_free;
1180 } 1203 }
@@ -1192,7 +1215,7 @@ retry_undos:
1192 */ 1215 */
1193 if (timeout && jiffies_left == 0) 1216 if (timeout && jiffies_left == 0)
1194 error = -EAGAIN; 1217 error = -EAGAIN;
1195 remove_from_queue(sma,&queue); 1218 list_del(&queue.list);
1196 goto out_unlock_free; 1219 goto out_unlock_free;
1197 1220
1198out_unlock_free: 1221out_unlock_free:
@@ -1243,56 +1266,62 @@ int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
1243 */ 1266 */
1244void exit_sem(struct task_struct *tsk) 1267void exit_sem(struct task_struct *tsk)
1245{ 1268{
1246 struct sem_undo_list *undo_list; 1269 struct sem_undo_list *ulp;
1247 struct sem_undo *u, **up;
1248 struct ipc_namespace *ns;
1249 1270
1250 undo_list = tsk->sysvsem.undo_list; 1271 ulp = tsk->sysvsem.undo_list;
1251 if (!undo_list) 1272 if (!ulp)
1252 return; 1273 return;
1253 tsk->sysvsem.undo_list = NULL; 1274 tsk->sysvsem.undo_list = NULL;
1254 1275
1255 if (!atomic_dec_and_test(&undo_list->refcnt)) 1276 if (!atomic_dec_and_test(&ulp->refcnt))
1256 return; 1277 return;
1257 1278
1258 ns = tsk->nsproxy->ipc_ns; 1279 for (;;) {
1259 /* There's no need to hold the semundo list lock, as current
1260 * is the last task exiting for this undo list.
1261 */
1262 for (up = &undo_list->proc_list; (u = *up); *up = u->proc_next, kfree(u)) {
1263 struct sem_array *sma; 1280 struct sem_array *sma;
1264 int nsems, i; 1281 struct sem_undo *un;
1265 struct sem_undo *un, **unp;
1266 int semid; 1282 int semid;
1267 1283 int i;
1268 semid = u->semid;
1269 1284
1270 if(semid == -1) 1285 rcu_read_lock();
1271 continue; 1286 un = list_entry(rcu_dereference(ulp->list_proc.next),
1272 sma = sem_lock(ns, semid); 1287 struct sem_undo, list_proc);
1288 if (&un->list_proc == &ulp->list_proc)
1289 semid = -1;
1290 else
1291 semid = un->semid;
1292 rcu_read_unlock();
1293
1294 if (semid == -1)
1295 break;
1296
1297 sma = sem_lock_check(tsk->nsproxy->ipc_ns, un->semid);
1298
1299 /* exit_sem raced with IPC_RMID, nothing to do */
1273 if (IS_ERR(sma)) 1300 if (IS_ERR(sma))
1274 continue; 1301 continue;
1275 1302
1276 if (u->semid == -1) 1303 un = lookup_undo(ulp, semid);
1277 goto next_entry; 1304 if (un == NULL) {
1305 /* exit_sem raced with IPC_RMID+semget() that created
1306 * exactly the same semid. Nothing to do.
1307 */
1308 sem_unlock(sma);
1309 continue;
1310 }
1278 1311
1279 BUG_ON(sem_checkid(sma, u->semid)); 1312 /* remove un from the linked lists */
1313 assert_spin_locked(&sma->sem_perm.lock);
1314 list_del(&un->list_id);
1280 1315
1281 /* remove u from the sma->undo list */ 1316 spin_lock(&ulp->lock);
1282 for (unp = &sma->undo; (un = *unp); unp = &un->id_next) { 1317 list_del_rcu(&un->list_proc);
1283 if (u == un) 1318 spin_unlock(&ulp->lock);
1284 goto found; 1319
1285 } 1320 /* perform adjustments registered in un */
1286 printk ("exit_sem undo list error id=%d\n", u->semid); 1321 for (i = 0; i < sma->sem_nsems; i++) {
1287 goto next_entry;
1288found:
1289 *unp = un->id_next;
1290 /* perform adjustments registered in u */
1291 nsems = sma->sem_nsems;
1292 for (i = 0; i < nsems; i++) {
1293 struct sem * semaphore = &sma->sem_base[i]; 1322 struct sem * semaphore = &sma->sem_base[i];
1294 if (u->semadj[i]) { 1323 if (un->semadj[i]) {
1295 semaphore->semval += u->semadj[i]; 1324 semaphore->semval += un->semadj[i];
1296 /* 1325 /*
1297 * Range checks of the new semaphore value, 1326 * Range checks of the new semaphore value,
1298 * not defined by sus: 1327 * not defined by sus:
@@ -1316,10 +1345,11 @@ found:
1316 sma->sem_otime = get_seconds(); 1345 sma->sem_otime = get_seconds();
1317 /* maybe some queued-up processes were waiting for this */ 1346 /* maybe some queued-up processes were waiting for this */
1318 update_queue(sma); 1347 update_queue(sma);
1319next_entry:
1320 sem_unlock(sma); 1348 sem_unlock(sma);
1349
1350 call_rcu(&un->rcu, free_un);
1321 } 1351 }
1322 kfree(undo_list); 1352 kfree(ulp);
1323} 1353}
1324 1354
1325#ifdef CONFIG_PROC_FS 1355#ifdef CONFIG_PROC_FS
diff --git a/ipc/shm.c b/ipc/shm.c
index a726aebce7d7..e77ec698cf40 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -112,23 +112,8 @@ void __init shm_init (void)
112} 112}
113 113
114/* 114/*
115 * shm_lock_(check_)down routines are called in the paths where the rw_mutex
116 * is held to protect access to the idr tree.
117 */
118static inline struct shmid_kernel *shm_lock_down(struct ipc_namespace *ns,
119 int id)
120{
121 struct kern_ipc_perm *ipcp = ipc_lock_down(&shm_ids(ns), id);
122
123 if (IS_ERR(ipcp))
124 return (struct shmid_kernel *)ipcp;
125
126 return container_of(ipcp, struct shmid_kernel, shm_perm);
127}
128
129/*
130 * shm_lock_(check_) routines are called in the paths where the rw_mutex 115 * shm_lock_(check_) routines are called in the paths where the rw_mutex
131 * is not held. 116 * is not necessarily held.
132 */ 117 */
133static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) 118static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
134{ 119{
@@ -211,7 +196,7 @@ static void shm_close(struct vm_area_struct *vma)
211 196
212 down_write(&shm_ids(ns).rw_mutex); 197 down_write(&shm_ids(ns).rw_mutex);
213 /* remove from the list of attaches of the shm segment */ 198 /* remove from the list of attaches of the shm segment */
214 shp = shm_lock_down(ns, sfd->id); 199 shp = shm_lock(ns, sfd->id);
215 BUG_ON(IS_ERR(shp)); 200 BUG_ON(IS_ERR(shp));
216 shp->shm_lprid = task_tgid_vnr(current); 201 shp->shm_lprid = task_tgid_vnr(current);
217 shp->shm_dtim = get_seconds(); 202 shp->shm_dtim = get_seconds();
@@ -932,7 +917,7 @@ invalid:
932 917
933out_nattch: 918out_nattch:
934 down_write(&shm_ids(ns).rw_mutex); 919 down_write(&shm_ids(ns).rw_mutex);
935 shp = shm_lock_down(ns, shmid); 920 shp = shm_lock(ns, shmid);
936 BUG_ON(IS_ERR(shp)); 921 BUG_ON(IS_ERR(shp));
937 shp->shm_nattch--; 922 shp->shm_nattch--;
938 if(shp->shm_nattch == 0 && 923 if(shp->shm_nattch == 0 &&
diff --git a/ipc/util.c b/ipc/util.c
index 3339177b336c..49b3ea615dc5 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -688,10 +688,6 @@ void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out)
688 * Look for an id in the ipc ids idr and lock the associated ipc object. 688 * Look for an id in the ipc ids idr and lock the associated ipc object.
689 * 689 *
690 * The ipc object is locked on exit. 690 * The ipc object is locked on exit.
691 *
692 * This is the routine that should be called when the rw_mutex is not already
693 * held, i.e. idr tree not protected: it protects the idr tree in read mode
694 * during the idr_find().
695 */ 691 */
696 692
697struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id) 693struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
@@ -699,18 +695,13 @@ struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
699 struct kern_ipc_perm *out; 695 struct kern_ipc_perm *out;
700 int lid = ipcid_to_idx(id); 696 int lid = ipcid_to_idx(id);
701 697
702 down_read(&ids->rw_mutex);
703
704 rcu_read_lock(); 698 rcu_read_lock();
705 out = idr_find(&ids->ipcs_idr, lid); 699 out = idr_find(&ids->ipcs_idr, lid);
706 if (out == NULL) { 700 if (out == NULL) {
707 rcu_read_unlock(); 701 rcu_read_unlock();
708 up_read(&ids->rw_mutex);
709 return ERR_PTR(-EINVAL); 702 return ERR_PTR(-EINVAL);
710 } 703 }
711 704
712 up_read(&ids->rw_mutex);
713
714 spin_lock(&out->lock); 705 spin_lock(&out->lock);
715 706
716 /* ipc_rmid() may have already freed the ID while ipc_lock 707 /* ipc_rmid() may have already freed the ID while ipc_lock
@@ -725,56 +716,6 @@ struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
725 return out; 716 return out;
726} 717}
727 718
728/**
729 * ipc_lock_down - Lock an ipc structure with rw_sem held
730 * @ids: IPC identifier set
731 * @id: ipc id to look for
732 *
733 * Look for an id in the ipc ids idr and lock the associated ipc object.
734 *
735 * The ipc object is locked on exit.
736 *
737 * This is the routine that should be called when the rw_mutex is already
738 * held, i.e. idr tree protected.
739 */
740
741struct kern_ipc_perm *ipc_lock_down(struct ipc_ids *ids, int id)
742{
743 struct kern_ipc_perm *out;
744 int lid = ipcid_to_idx(id);
745
746 rcu_read_lock();
747 out = idr_find(&ids->ipcs_idr, lid);
748 if (out == NULL) {
749 rcu_read_unlock();
750 return ERR_PTR(-EINVAL);
751 }
752
753 spin_lock(&out->lock);
754
755 /*
756 * No need to verify that the structure is still valid since the
757 * rw_mutex is held.
758 */
759 return out;
760}
761
762struct kern_ipc_perm *ipc_lock_check_down(struct ipc_ids *ids, int id)
763{
764 struct kern_ipc_perm *out;
765
766 out = ipc_lock_down(ids, id);
767 if (IS_ERR(out))
768 return out;
769
770 if (ipc_checkid(out, id)) {
771 ipc_unlock(out);
772 return ERR_PTR(-EIDRM);
773 }
774
775 return out;
776}
777
778struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id) 719struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id)
779{ 720{
780 struct kern_ipc_perm *out; 721 struct kern_ipc_perm *out;
@@ -846,7 +787,7 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_ids *ids, int id, int cmd,
846 int err; 787 int err;
847 788
848 down_write(&ids->rw_mutex); 789 down_write(&ids->rw_mutex);
849 ipcp = ipc_lock_check_down(ids, id); 790 ipcp = ipc_lock_check(ids, id);
850 if (IS_ERR(ipcp)) { 791 if (IS_ERR(ipcp)) {
851 err = PTR_ERR(ipcp); 792 err = PTR_ERR(ipcp);
852 goto out_up; 793 goto out_up;
diff --git a/ipc/util.h b/ipc/util.h
index cdb966aebe07..3646b45a03c9 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -102,11 +102,6 @@ void* ipc_rcu_alloc(int size);
102void ipc_rcu_getref(void *ptr); 102void ipc_rcu_getref(void *ptr);
103void ipc_rcu_putref(void *ptr); 103void ipc_rcu_putref(void *ptr);
104 104
105/*
106 * ipc_lock_down: called with rw_mutex held
107 * ipc_lock: called without that lock held
108 */
109struct kern_ipc_perm *ipc_lock_down(struct ipc_ids *, int);
110struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int); 105struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
111 106
112void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out); 107void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out);
@@ -155,7 +150,6 @@ static inline void ipc_unlock(struct kern_ipc_perm *perm)
155 rcu_read_unlock(); 150 rcu_read_unlock();
156} 151}
157 152
158struct kern_ipc_perm *ipc_lock_check_down(struct ipc_ids *ids, int id);
159struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id); 153struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id);
160int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids, 154int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
161 struct ipc_ops *ops, struct ipc_params *params); 155 struct ipc_ops *ops, struct ipc_params *params);
diff --git a/kernel/Makefile b/kernel/Makefile
index 15ab63ffe64d..54f69837d35a 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -2,7 +2,7 @@
2# Makefile for the linux kernel. 2# Makefile for the linux kernel.
3# 3#
4 4
5obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ 5obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
6 cpu.o exit.o itimer.o time.o softirq.o resource.o \ 6 cpu.o exit.o itimer.o time.o softirq.o resource.o \
7 sysctl.o capability.o ptrace.o timer.o user.o \ 7 sysctl.o capability.o ptrace.o timer.o user.o \
8 signal.o sys.o kmod.o workqueue.o pid.o \ 8 signal.o sys.o kmod.o workqueue.o pid.o \
@@ -24,6 +24,7 @@ CFLAGS_REMOVE_sched_clock.o = -pg
24CFLAGS_REMOVE_sched.o = -mno-spe -pg 24CFLAGS_REMOVE_sched.o = -mno-spe -pg
25endif 25endif
26 26
27obj-$(CONFIG_PROFILING) += profile.o
27obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o 28obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
28obj-$(CONFIG_STACKTRACE) += stacktrace.o 29obj-$(CONFIG_STACKTRACE) += stacktrace.o
29obj-y += time/ 30obj-y += time/
diff --git a/kernel/acct.c b/kernel/acct.c
index 91e1cfd734d2..dd68b9059418 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -75,37 +75,39 @@ int acct_parm[3] = {4, 2, 30};
75/* 75/*
76 * External references and all of the globals. 76 * External references and all of the globals.
77 */ 77 */
78static void do_acct_process(struct pid_namespace *ns, struct file *); 78static void do_acct_process(struct bsd_acct_struct *acct,
79 struct pid_namespace *ns, struct file *);
79 80
80/* 81/*
81 * This structure is used so that all the data protected by lock 82 * This structure is used so that all the data protected by lock
82 * can be placed in the same cache line as the lock. This primes 83 * can be placed in the same cache line as the lock. This primes
83 * the cache line to have the data after getting the lock. 84 * the cache line to have the data after getting the lock.
84 */ 85 */
85struct acct_glbs { 86struct bsd_acct_struct {
86 spinlock_t lock;
87 volatile int active; 87 volatile int active;
88 volatile int needcheck; 88 volatile int needcheck;
89 struct file *file; 89 struct file *file;
90 struct pid_namespace *ns; 90 struct pid_namespace *ns;
91 struct timer_list timer; 91 struct timer_list timer;
92 struct list_head list;
92}; 93};
93 94
94static struct acct_glbs acct_globals __cacheline_aligned = 95static DEFINE_SPINLOCK(acct_lock);
95 {__SPIN_LOCK_UNLOCKED(acct_globals.lock)}; 96static LIST_HEAD(acct_list);
96 97
97/* 98/*
98 * Called whenever the timer says to check the free space. 99 * Called whenever the timer says to check the free space.
99 */ 100 */
100static void acct_timeout(unsigned long unused) 101static void acct_timeout(unsigned long x)
101{ 102{
102 acct_globals.needcheck = 1; 103 struct bsd_acct_struct *acct = (struct bsd_acct_struct *)x;
104 acct->needcheck = 1;
103} 105}
104 106
105/* 107/*
106 * Check the amount of free space and suspend/resume accordingly. 108 * Check the amount of free space and suspend/resume accordingly.
107 */ 109 */
108static int check_free_space(struct file *file) 110static int check_free_space(struct bsd_acct_struct *acct, struct file *file)
109{ 111{
110 struct kstatfs sbuf; 112 struct kstatfs sbuf;
111 int res; 113 int res;
@@ -113,11 +115,11 @@ static int check_free_space(struct file *file)
113 sector_t resume; 115 sector_t resume;
114 sector_t suspend; 116 sector_t suspend;
115 117
116 spin_lock(&acct_globals.lock); 118 spin_lock(&acct_lock);
117 res = acct_globals.active; 119 res = acct->active;
118 if (!file || !acct_globals.needcheck) 120 if (!file || !acct->needcheck)
119 goto out; 121 goto out;
120 spin_unlock(&acct_globals.lock); 122 spin_unlock(&acct_lock);
121 123
122 /* May block */ 124 /* May block */
123 if (vfs_statfs(file->f_path.dentry, &sbuf)) 125 if (vfs_statfs(file->f_path.dentry, &sbuf))
@@ -136,35 +138,35 @@ static int check_free_space(struct file *file)
136 act = 0; 138 act = 0;
137 139
138 /* 140 /*
139 * If some joker switched acct_globals.file under us we'ld better be 141 * If some joker switched acct->file under us we'ld better be
140 * silent and _not_ touch anything. 142 * silent and _not_ touch anything.
141 */ 143 */
142 spin_lock(&acct_globals.lock); 144 spin_lock(&acct_lock);
143 if (file != acct_globals.file) { 145 if (file != acct->file) {
144 if (act) 146 if (act)
145 res = act>0; 147 res = act>0;
146 goto out; 148 goto out;
147 } 149 }
148 150
149 if (acct_globals.active) { 151 if (acct->active) {
150 if (act < 0) { 152 if (act < 0) {
151 acct_globals.active = 0; 153 acct->active = 0;
152 printk(KERN_INFO "Process accounting paused\n"); 154 printk(KERN_INFO "Process accounting paused\n");
153 } 155 }
154 } else { 156 } else {
155 if (act > 0) { 157 if (act > 0) {
156 acct_globals.active = 1; 158 acct->active = 1;
157 printk(KERN_INFO "Process accounting resumed\n"); 159 printk(KERN_INFO "Process accounting resumed\n");
158 } 160 }
159 } 161 }
160 162
161 del_timer(&acct_globals.timer); 163 del_timer(&acct->timer);
162 acct_globals.needcheck = 0; 164 acct->needcheck = 0;
163 acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ; 165 acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ;
164 add_timer(&acct_globals.timer); 166 add_timer(&acct->timer);
165 res = acct_globals.active; 167 res = acct->active;
166out: 168out:
167 spin_unlock(&acct_globals.lock); 169 spin_unlock(&acct_lock);
168 return res; 170 return res;
169} 171}
170 172
@@ -172,39 +174,41 @@ out:
172 * Close the old accounting file (if currently open) and then replace 174 * Close the old accounting file (if currently open) and then replace
173 * it with file (if non-NULL). 175 * it with file (if non-NULL).
174 * 176 *
175 * NOTE: acct_globals.lock MUST be held on entry and exit. 177 * NOTE: acct_lock MUST be held on entry and exit.
176 */ 178 */
177static void acct_file_reopen(struct file *file) 179static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file,
180 struct pid_namespace *ns)
178{ 181{
179 struct file *old_acct = NULL; 182 struct file *old_acct = NULL;
180 struct pid_namespace *old_ns = NULL; 183 struct pid_namespace *old_ns = NULL;
181 184
182 if (acct_globals.file) { 185 if (acct->file) {
183 old_acct = acct_globals.file; 186 old_acct = acct->file;
184 old_ns = acct_globals.ns; 187 old_ns = acct->ns;
185 del_timer(&acct_globals.timer); 188 del_timer(&acct->timer);
186 acct_globals.active = 0; 189 acct->active = 0;
187 acct_globals.needcheck = 0; 190 acct->needcheck = 0;
188 acct_globals.file = NULL; 191 acct->file = NULL;
192 acct->ns = NULL;
193 list_del(&acct->list);
189 } 194 }
190 if (file) { 195 if (file) {
191 acct_globals.file = file; 196 acct->file = file;
192 acct_globals.ns = get_pid_ns(task_active_pid_ns(current)); 197 acct->ns = ns;
193 acct_globals.needcheck = 0; 198 acct->needcheck = 0;
194 acct_globals.active = 1; 199 acct->active = 1;
200 list_add(&acct->list, &acct_list);
195 /* It's been deleted if it was used before so this is safe */ 201 /* It's been deleted if it was used before so this is safe */
196 init_timer(&acct_globals.timer); 202 setup_timer(&acct->timer, acct_timeout, (unsigned long)acct);
197 acct_globals.timer.function = acct_timeout; 203 acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ;
198 acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ; 204 add_timer(&acct->timer);
199 add_timer(&acct_globals.timer);
200 } 205 }
201 if (old_acct) { 206 if (old_acct) {
202 mnt_unpin(old_acct->f_path.mnt); 207 mnt_unpin(old_acct->f_path.mnt);
203 spin_unlock(&acct_globals.lock); 208 spin_unlock(&acct_lock);
204 do_acct_process(old_ns, old_acct); 209 do_acct_process(acct, old_ns, old_acct);
205 filp_close(old_acct, NULL); 210 filp_close(old_acct, NULL);
206 put_pid_ns(old_ns); 211 spin_lock(&acct_lock);
207 spin_lock(&acct_globals.lock);
208 } 212 }
209} 213}
210 214
@@ -212,6 +216,8 @@ static int acct_on(char *name)
212{ 216{
213 struct file *file; 217 struct file *file;
214 int error; 218 int error;
219 struct pid_namespace *ns;
220 struct bsd_acct_struct *acct = NULL;
215 221
216 /* Difference from BSD - they don't do O_APPEND */ 222 /* Difference from BSD - they don't do O_APPEND */
217 file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0); 223 file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
@@ -228,18 +234,34 @@ static int acct_on(char *name)
228 return -EIO; 234 return -EIO;
229 } 235 }
230 236
237 ns = task_active_pid_ns(current);
238 if (ns->bacct == NULL) {
239 acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
240 if (acct == NULL) {
241 filp_close(file, NULL);
242 return -ENOMEM;
243 }
244 }
245
231 error = security_acct(file); 246 error = security_acct(file);
232 if (error) { 247 if (error) {
248 kfree(acct);
233 filp_close(file, NULL); 249 filp_close(file, NULL);
234 return error; 250 return error;
235 } 251 }
236 252
237 spin_lock(&acct_globals.lock); 253 spin_lock(&acct_lock);
254 if (ns->bacct == NULL) {
255 ns->bacct = acct;
256 acct = NULL;
257 }
258
238 mnt_pin(file->f_path.mnt); 259 mnt_pin(file->f_path.mnt);
239 acct_file_reopen(file); 260 acct_file_reopen(ns->bacct, file, ns);
240 spin_unlock(&acct_globals.lock); 261 spin_unlock(&acct_lock);
241 262
242 mntput(file->f_path.mnt); /* it's pinned, now give up active reference */ 263 mntput(file->f_path.mnt); /* it's pinned, now give up active reference */
264 kfree(acct);
243 265
244 return 0; 266 return 0;
245} 267}
@@ -269,11 +291,17 @@ asmlinkage long sys_acct(const char __user *name)
269 error = acct_on(tmp); 291 error = acct_on(tmp);
270 putname(tmp); 292 putname(tmp);
271 } else { 293 } else {
294 struct bsd_acct_struct *acct;
295
296 acct = task_active_pid_ns(current)->bacct;
297 if (acct == NULL)
298 return 0;
299
272 error = security_acct(NULL); 300 error = security_acct(NULL);
273 if (!error) { 301 if (!error) {
274 spin_lock(&acct_globals.lock); 302 spin_lock(&acct_lock);
275 acct_file_reopen(NULL); 303 acct_file_reopen(acct, NULL, NULL);
276 spin_unlock(&acct_globals.lock); 304 spin_unlock(&acct_lock);
277 } 305 }
278 } 306 }
279 return error; 307 return error;
@@ -288,10 +316,16 @@ asmlinkage long sys_acct(const char __user *name)
288 */ 316 */
289void acct_auto_close_mnt(struct vfsmount *m) 317void acct_auto_close_mnt(struct vfsmount *m)
290{ 318{
291 spin_lock(&acct_globals.lock); 319 struct bsd_acct_struct *acct;
292 if (acct_globals.file && acct_globals.file->f_path.mnt == m) 320
293 acct_file_reopen(NULL); 321 spin_lock(&acct_lock);
294 spin_unlock(&acct_globals.lock); 322restart:
323 list_for_each_entry(acct, &acct_list, list)
324 if (acct->file && acct->file->f_path.mnt == m) {
325 acct_file_reopen(acct, NULL, NULL);
326 goto restart;
327 }
328 spin_unlock(&acct_lock);
295} 329}
296 330
297/** 331/**
@@ -303,12 +337,31 @@ void acct_auto_close_mnt(struct vfsmount *m)
303 */ 337 */
304void acct_auto_close(struct super_block *sb) 338void acct_auto_close(struct super_block *sb)
305{ 339{
306 spin_lock(&acct_globals.lock); 340 struct bsd_acct_struct *acct;
307 if (acct_globals.file && 341
308 acct_globals.file->f_path.mnt->mnt_sb == sb) { 342 spin_lock(&acct_lock);
309 acct_file_reopen(NULL); 343restart:
344 list_for_each_entry(acct, &acct_list, list)
345 if (acct->file && acct->file->f_path.mnt->mnt_sb == sb) {
346 acct_file_reopen(acct, NULL, NULL);
347 goto restart;
348 }
349 spin_unlock(&acct_lock);
350}
351
352void acct_exit_ns(struct pid_namespace *ns)
353{
354 struct bsd_acct_struct *acct;
355
356 spin_lock(&acct_lock);
357 acct = ns->bacct;
358 if (acct != NULL) {
359 if (acct->file != NULL)
360 acct_file_reopen(acct, NULL, NULL);
361
362 kfree(acct);
310 } 363 }
311 spin_unlock(&acct_globals.lock); 364 spin_unlock(&acct_lock);
312} 365}
313 366
314/* 367/*
@@ -425,7 +478,8 @@ static u32 encode_float(u64 value)
425/* 478/*
426 * do_acct_process does all actual work. Caller holds the reference to file. 479 * do_acct_process does all actual work. Caller holds the reference to file.
427 */ 480 */
428static void do_acct_process(struct pid_namespace *ns, struct file *file) 481static void do_acct_process(struct bsd_acct_struct *acct,
482 struct pid_namespace *ns, struct file *file)
429{ 483{
430 struct pacct_struct *pacct = &current->signal->pacct; 484 struct pacct_struct *pacct = &current->signal->pacct;
431 acct_t ac; 485 acct_t ac;
@@ -440,7 +494,7 @@ static void do_acct_process(struct pid_namespace *ns, struct file *file)
440 * First check to see if there is enough free_space to continue 494 * First check to see if there is enough free_space to continue
441 * the process accounting system. 495 * the process accounting system.
442 */ 496 */
443 if (!check_free_space(file)) 497 if (!check_free_space(acct, file))
444 return; 498 return;
445 499
446 /* 500 /*
@@ -577,34 +631,46 @@ void acct_collect(long exitcode, int group_dead)
577 spin_unlock_irq(&current->sighand->siglock); 631 spin_unlock_irq(&current->sighand->siglock);
578} 632}
579 633
580/** 634static void acct_process_in_ns(struct pid_namespace *ns)
581 * acct_process - now just a wrapper around do_acct_process
582 * @exitcode: task exit code
583 *
584 * handles process accounting for an exiting task
585 */
586void acct_process(void)
587{ 635{
588 struct file *file = NULL; 636 struct file *file = NULL;
589 struct pid_namespace *ns; 637 struct bsd_acct_struct *acct;
590 638
639 acct = ns->bacct;
591 /* 640 /*
592 * accelerate the common fastpath: 641 * accelerate the common fastpath:
593 */ 642 */
594 if (!acct_globals.file) 643 if (!acct || !acct->file)
595 return; 644 return;
596 645
597 spin_lock(&acct_globals.lock); 646 spin_lock(&acct_lock);
598 file = acct_globals.file; 647 file = acct->file;
599 if (unlikely(!file)) { 648 if (unlikely(!file)) {
600 spin_unlock(&acct_globals.lock); 649 spin_unlock(&acct_lock);
601 return; 650 return;
602 } 651 }
603 get_file(file); 652 get_file(file);
604 ns = get_pid_ns(acct_globals.ns); 653 spin_unlock(&acct_lock);
605 spin_unlock(&acct_globals.lock);
606 654
607 do_acct_process(ns, file); 655 do_acct_process(acct, ns, file);
608 fput(file); 656 fput(file);
609 put_pid_ns(ns); 657}
658
659/**
660 * acct_process - now just a wrapper around acct_process_in_ns,
661 * which in turn is a wrapper around do_acct_process.
662 *
663 * handles process accounting for an exiting task
664 */
665void acct_process(void)
666{
667 struct pid_namespace *ns;
668
669 /*
670 * This loop is safe lockless, since current is still
671 * alive and holds its namespace, which in turn holds
672 * its parent.
673 */
674 for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent)
675 acct_process_in_ns(ns);
610} 676}
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 15ac0e1e4f4d..66ec9fd21e0c 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -89,11 +89,7 @@ struct cgroupfs_root {
89 /* Hierarchy-specific flags */ 89 /* Hierarchy-specific flags */
90 unsigned long flags; 90 unsigned long flags;
91 91
92 /* The path to use for release notifications. No locking 92 /* The path to use for release notifications. */
93 * between setting and use - so if userspace updates this
94 * while child cgroups exist, you could miss a
95 * notification. We ensure that it's always a valid
96 * NUL-terminated string */
97 char release_agent_path[PATH_MAX]; 93 char release_agent_path[PATH_MAX];
98}; 94};
99 95
@@ -118,7 +114,7 @@ static int root_count;
118 * extra work in the fork/exit path if none of the subsystems need to 114 * extra work in the fork/exit path if none of the subsystems need to
119 * be called. 115 * be called.
120 */ 116 */
121static int need_forkexit_callback; 117static int need_forkexit_callback __read_mostly;
122static int need_mm_owner_callback __read_mostly; 118static int need_mm_owner_callback __read_mostly;
123 119
124/* convenient tests for these bits */ 120/* convenient tests for these bits */
@@ -220,7 +216,7 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
220 * task until after the first call to cgroup_iter_start(). This 216 * task until after the first call to cgroup_iter_start(). This
221 * reduces the fork()/exit() overhead for people who have cgroups 217 * reduces the fork()/exit() overhead for people who have cgroups
222 * compiled into their kernel but not actually in use */ 218 * compiled into their kernel but not actually in use */
223static int use_task_css_set_links; 219static int use_task_css_set_links __read_mostly;
224 220
225/* When we create or destroy a css_set, the operation simply 221/* When we create or destroy a css_set, the operation simply
226 * takes/releases a reference count on all the cgroups referenced 222 * takes/releases a reference count on all the cgroups referenced
@@ -241,17 +237,20 @@ static int use_task_css_set_links;
241 */ 237 */
242static void unlink_css_set(struct css_set *cg) 238static void unlink_css_set(struct css_set *cg)
243{ 239{
240 struct cg_cgroup_link *link;
241 struct cg_cgroup_link *saved_link;
242
244 write_lock(&css_set_lock); 243 write_lock(&css_set_lock);
245 hlist_del(&cg->hlist); 244 hlist_del(&cg->hlist);
246 css_set_count--; 245 css_set_count--;
247 while (!list_empty(&cg->cg_links)) { 246
248 struct cg_cgroup_link *link; 247 list_for_each_entry_safe(link, saved_link, &cg->cg_links,
249 link = list_entry(cg->cg_links.next, 248 cg_link_list) {
250 struct cg_cgroup_link, cg_link_list);
251 list_del(&link->cg_link_list); 249 list_del(&link->cg_link_list);
252 list_del(&link->cgrp_link_list); 250 list_del(&link->cgrp_link_list);
253 kfree(link); 251 kfree(link);
254 } 252 }
253
255 write_unlock(&css_set_lock); 254 write_unlock(&css_set_lock);
256} 255}
257 256
@@ -363,15 +362,14 @@ static struct css_set *find_existing_css_set(
363static int allocate_cg_links(int count, struct list_head *tmp) 362static int allocate_cg_links(int count, struct list_head *tmp)
364{ 363{
365 struct cg_cgroup_link *link; 364 struct cg_cgroup_link *link;
365 struct cg_cgroup_link *saved_link;
366 int i; 366 int i;
367 INIT_LIST_HEAD(tmp); 367 INIT_LIST_HEAD(tmp);
368 for (i = 0; i < count; i++) { 368 for (i = 0; i < count; i++) {
369 link = kmalloc(sizeof(*link), GFP_KERNEL); 369 link = kmalloc(sizeof(*link), GFP_KERNEL);
370 if (!link) { 370 if (!link) {
371 while (!list_empty(tmp)) { 371 list_for_each_entry_safe(link, saved_link, tmp,
372 link = list_entry(tmp->next, 372 cgrp_link_list) {
373 struct cg_cgroup_link,
374 cgrp_link_list);
375 list_del(&link->cgrp_link_list); 373 list_del(&link->cgrp_link_list);
376 kfree(link); 374 kfree(link);
377 } 375 }
@@ -384,11 +382,10 @@ static int allocate_cg_links(int count, struct list_head *tmp)
384 382
385static void free_cg_links(struct list_head *tmp) 383static void free_cg_links(struct list_head *tmp)
386{ 384{
387 while (!list_empty(tmp)) { 385 struct cg_cgroup_link *link;
388 struct cg_cgroup_link *link; 386 struct cg_cgroup_link *saved_link;
389 link = list_entry(tmp->next, 387
390 struct cg_cgroup_link, 388 list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
391 cgrp_link_list);
392 list_del(&link->cgrp_link_list); 389 list_del(&link->cgrp_link_list);
393 kfree(link); 390 kfree(link);
394 } 391 }
@@ -415,11 +412,11 @@ static struct css_set *find_css_set(
415 412
416 /* First see if we already have a cgroup group that matches 413 /* First see if we already have a cgroup group that matches
417 * the desired set */ 414 * the desired set */
418 write_lock(&css_set_lock); 415 read_lock(&css_set_lock);
419 res = find_existing_css_set(oldcg, cgrp, template); 416 res = find_existing_css_set(oldcg, cgrp, template);
420 if (res) 417 if (res)
421 get_css_set(res); 418 get_css_set(res);
422 write_unlock(&css_set_lock); 419 read_unlock(&css_set_lock);
423 420
424 if (res) 421 if (res)
425 return res; 422 return res;
@@ -507,10 +504,6 @@ static struct css_set *find_css_set(
507 * knows that the cgroup won't be removed, as cgroup_rmdir() 504 * knows that the cgroup won't be removed, as cgroup_rmdir()
508 * needs that mutex. 505 * needs that mutex.
509 * 506 *
510 * The cgroup_common_file_write handler for operations that modify
511 * the cgroup hierarchy holds cgroup_mutex across the entire operation,
512 * single threading all such cgroup modifications across the system.
513 *
514 * The fork and exit callbacks cgroup_fork() and cgroup_exit(), don't 507 * The fork and exit callbacks cgroup_fork() and cgroup_exit(), don't
515 * (usually) take cgroup_mutex. These are the two most performance 508 * (usually) take cgroup_mutex. These are the two most performance
516 * critical pieces of code here. The exception occurs on cgroup_exit(), 509 * critical pieces of code here. The exception occurs on cgroup_exit(),
@@ -1093,6 +1086,8 @@ static void cgroup_kill_sb(struct super_block *sb) {
1093 struct cgroupfs_root *root = sb->s_fs_info; 1086 struct cgroupfs_root *root = sb->s_fs_info;
1094 struct cgroup *cgrp = &root->top_cgroup; 1087 struct cgroup *cgrp = &root->top_cgroup;
1095 int ret; 1088 int ret;
1089 struct cg_cgroup_link *link;
1090 struct cg_cgroup_link *saved_link;
1096 1091
1097 BUG_ON(!root); 1092 BUG_ON(!root);
1098 1093
@@ -1112,10 +1107,9 @@ static void cgroup_kill_sb(struct super_block *sb) {
1112 * root cgroup 1107 * root cgroup
1113 */ 1108 */
1114 write_lock(&css_set_lock); 1109 write_lock(&css_set_lock);
1115 while (!list_empty(&cgrp->css_sets)) { 1110
1116 struct cg_cgroup_link *link; 1111 list_for_each_entry_safe(link, saved_link, &cgrp->css_sets,
1117 link = list_entry(cgrp->css_sets.next, 1112 cgrp_link_list) {
1118 struct cg_cgroup_link, cgrp_link_list);
1119 list_del(&link->cg_link_list); 1113 list_del(&link->cg_link_list);
1120 list_del(&link->cgrp_link_list); 1114 list_del(&link->cgrp_link_list);
1121 kfree(link); 1115 kfree(link);
@@ -1281,18 +1275,14 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1281} 1275}
1282 1276
1283/* 1277/*
1284 * Attach task with pid 'pid' to cgroup 'cgrp'. Call with 1278 * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex
1285 * cgroup_mutex, may take task_lock of task 1279 * held. May take task_lock of task
1286 */ 1280 */
1287static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf) 1281static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
1288{ 1282{
1289 pid_t pid;
1290 struct task_struct *tsk; 1283 struct task_struct *tsk;
1291 int ret; 1284 int ret;
1292 1285
1293 if (sscanf(pidbuf, "%d", &pid) != 1)
1294 return -EIO;
1295
1296 if (pid) { 1286 if (pid) {
1297 rcu_read_lock(); 1287 rcu_read_lock();
1298 tsk = find_task_by_vpid(pid); 1288 tsk = find_task_by_vpid(pid);
@@ -1318,6 +1308,16 @@ static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf)
1318 return ret; 1308 return ret;
1319} 1309}
1320 1310
1311static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
1312{
1313 int ret;
1314 if (!cgroup_lock_live_group(cgrp))
1315 return -ENODEV;
1316 ret = attach_task_by_pid(cgrp, pid);
1317 cgroup_unlock();
1318 return ret;
1319}
1320
1321/* The various types of files and directories in a cgroup file system */ 1321/* The various types of files and directories in a cgroup file system */
1322enum cgroup_filetype { 1322enum cgroup_filetype {
1323 FILE_ROOT, 1323 FILE_ROOT,
@@ -1327,12 +1327,54 @@ enum cgroup_filetype {
1327 FILE_RELEASE_AGENT, 1327 FILE_RELEASE_AGENT,
1328}; 1328};
1329 1329
1330/**
1331 * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive.
1332 * @cgrp: the cgroup to be checked for liveness
1333 *
1334 * On success, returns true; the lock should be later released with
1335 * cgroup_unlock(). On failure returns false with no lock held.
1336 */
1337bool cgroup_lock_live_group(struct cgroup *cgrp)
1338{
1339 mutex_lock(&cgroup_mutex);
1340 if (cgroup_is_removed(cgrp)) {
1341 mutex_unlock(&cgroup_mutex);
1342 return false;
1343 }
1344 return true;
1345}
1346
1347static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
1348 const char *buffer)
1349{
1350 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
1351 if (!cgroup_lock_live_group(cgrp))
1352 return -ENODEV;
1353 strcpy(cgrp->root->release_agent_path, buffer);
1354 cgroup_unlock();
1355 return 0;
1356}
1357
1358static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
1359 struct seq_file *seq)
1360{
1361 if (!cgroup_lock_live_group(cgrp))
1362 return -ENODEV;
1363 seq_puts(seq, cgrp->root->release_agent_path);
1364 seq_putc(seq, '\n');
1365 cgroup_unlock();
1366 return 0;
1367}
1368
1369/* A buffer size big enough for numbers or short strings */
1370#define CGROUP_LOCAL_BUFFER_SIZE 64
1371
1330static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft, 1372static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
1331 struct file *file, 1373 struct file *file,
1332 const char __user *userbuf, 1374 const char __user *userbuf,
1333 size_t nbytes, loff_t *unused_ppos) 1375 size_t nbytes, loff_t *unused_ppos)
1334{ 1376{
1335 char buffer[64]; 1377 char buffer[CGROUP_LOCAL_BUFFER_SIZE];
1336 int retval = 0; 1378 int retval = 0;
1337 char *end; 1379 char *end;
1338 1380
@@ -1361,68 +1403,36 @@ static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
1361 return retval; 1403 return retval;
1362} 1404}
1363 1405
1364static ssize_t cgroup_common_file_write(struct cgroup *cgrp, 1406static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
1365 struct cftype *cft, 1407 struct file *file,
1366 struct file *file, 1408 const char __user *userbuf,
1367 const char __user *userbuf, 1409 size_t nbytes, loff_t *unused_ppos)
1368 size_t nbytes, loff_t *unused_ppos)
1369{ 1410{
1370 enum cgroup_filetype type = cft->private; 1411 char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
1371 char *buffer;
1372 int retval = 0; 1412 int retval = 0;
1413 size_t max_bytes = cft->max_write_len;
1414 char *buffer = local_buffer;
1373 1415
1374 if (nbytes >= PATH_MAX) 1416 if (!max_bytes)
1417 max_bytes = sizeof(local_buffer) - 1;
1418 if (nbytes >= max_bytes)
1375 return -E2BIG; 1419 return -E2BIG;
1376 1420 /* Allocate a dynamic buffer if we need one */
1377 /* +1 for nul-terminator */ 1421 if (nbytes >= sizeof(local_buffer)) {
1378 buffer = kmalloc(nbytes + 1, GFP_KERNEL); 1422 buffer = kmalloc(nbytes + 1, GFP_KERNEL);
1379 if (buffer == NULL) 1423 if (buffer == NULL)
1380 return -ENOMEM; 1424 return -ENOMEM;
1381
1382 if (copy_from_user(buffer, userbuf, nbytes)) {
1383 retval = -EFAULT;
1384 goto out1;
1385 } 1425 }
1386 buffer[nbytes] = 0; /* nul-terminate */ 1426 if (nbytes && copy_from_user(buffer, userbuf, nbytes))
1387 strstrip(buffer); /* strip -just- trailing whitespace */ 1427 return -EFAULT;
1388
1389 mutex_lock(&cgroup_mutex);
1390 1428
1391 /* 1429 buffer[nbytes] = 0; /* nul-terminate */
1392 * This was already checked for in cgroup_file_write(), but 1430 strstrip(buffer);
1393 * check again now we're holding cgroup_mutex. 1431 retval = cft->write_string(cgrp, cft, buffer);
1394 */ 1432 if (!retval)
1395 if (cgroup_is_removed(cgrp)) {
1396 retval = -ENODEV;
1397 goto out2;
1398 }
1399
1400 switch (type) {
1401 case FILE_TASKLIST:
1402 retval = attach_task_by_pid(cgrp, buffer);
1403 break;
1404 case FILE_NOTIFY_ON_RELEASE:
1405 clear_bit(CGRP_RELEASABLE, &cgrp->flags);
1406 if (simple_strtoul(buffer, NULL, 10) != 0)
1407 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
1408 else
1409 clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
1410 break;
1411 case FILE_RELEASE_AGENT:
1412 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
1413 strcpy(cgrp->root->release_agent_path, buffer);
1414 break;
1415 default:
1416 retval = -EINVAL;
1417 goto out2;
1418 }
1419
1420 if (retval == 0)
1421 retval = nbytes; 1433 retval = nbytes;
1422out2: 1434 if (buffer != local_buffer)
1423 mutex_unlock(&cgroup_mutex); 1435 kfree(buffer);
1424out1:
1425 kfree(buffer);
1426 return retval; 1436 return retval;
1427} 1437}
1428 1438
@@ -1438,6 +1448,8 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
1438 return cft->write(cgrp, cft, file, buf, nbytes, ppos); 1448 return cft->write(cgrp, cft, file, buf, nbytes, ppos);
1439 if (cft->write_u64 || cft->write_s64) 1449 if (cft->write_u64 || cft->write_s64)
1440 return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos); 1450 return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
1451 if (cft->write_string)
1452 return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
1441 if (cft->trigger) { 1453 if (cft->trigger) {
1442 int ret = cft->trigger(cgrp, (unsigned int)cft->private); 1454 int ret = cft->trigger(cgrp, (unsigned int)cft->private);
1443 return ret ? ret : nbytes; 1455 return ret ? ret : nbytes;
@@ -1450,7 +1462,7 @@ static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft,
1450 char __user *buf, size_t nbytes, 1462 char __user *buf, size_t nbytes,
1451 loff_t *ppos) 1463 loff_t *ppos)
1452{ 1464{
1453 char tmp[64]; 1465 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
1454 u64 val = cft->read_u64(cgrp, cft); 1466 u64 val = cft->read_u64(cgrp, cft);
1455 int len = sprintf(tmp, "%llu\n", (unsigned long long) val); 1467 int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
1456 1468
@@ -1462,56 +1474,13 @@ static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
1462 char __user *buf, size_t nbytes, 1474 char __user *buf, size_t nbytes,
1463 loff_t *ppos) 1475 loff_t *ppos)
1464{ 1476{
1465 char tmp[64]; 1477 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
1466 s64 val = cft->read_s64(cgrp, cft); 1478 s64 val = cft->read_s64(cgrp, cft);
1467 int len = sprintf(tmp, "%lld\n", (long long) val); 1479 int len = sprintf(tmp, "%lld\n", (long long) val);
1468 1480
1469 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); 1481 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
1470} 1482}
1471 1483
1472static ssize_t cgroup_common_file_read(struct cgroup *cgrp,
1473 struct cftype *cft,
1474 struct file *file,
1475 char __user *buf,
1476 size_t nbytes, loff_t *ppos)
1477{
1478 enum cgroup_filetype type = cft->private;
1479 char *page;
1480 ssize_t retval = 0;
1481 char *s;
1482
1483 if (!(page = (char *)__get_free_page(GFP_KERNEL)))
1484 return -ENOMEM;
1485
1486 s = page;
1487
1488 switch (type) {
1489 case FILE_RELEASE_AGENT:
1490 {
1491 struct cgroupfs_root *root;
1492 size_t n;
1493 mutex_lock(&cgroup_mutex);
1494 root = cgrp->root;
1495 n = strnlen(root->release_agent_path,
1496 sizeof(root->release_agent_path));
1497 n = min(n, (size_t) PAGE_SIZE);
1498 strncpy(s, root->release_agent_path, n);
1499 mutex_unlock(&cgroup_mutex);
1500 s += n;
1501 break;
1502 }
1503 default:
1504 retval = -EINVAL;
1505 goto out;
1506 }
1507 *s++ = '\n';
1508
1509 retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page);
1510out:
1511 free_page((unsigned long)page);
1512 return retval;
1513}
1514
1515static ssize_t cgroup_file_read(struct file *file, char __user *buf, 1484static ssize_t cgroup_file_read(struct file *file, char __user *buf,
1516 size_t nbytes, loff_t *ppos) 1485 size_t nbytes, loff_t *ppos)
1517{ 1486{
@@ -1569,6 +1538,7 @@ int cgroup_seqfile_release(struct inode *inode, struct file *file)
1569 1538
1570static struct file_operations cgroup_seqfile_operations = { 1539static struct file_operations cgroup_seqfile_operations = {
1571 .read = seq_read, 1540 .read = seq_read,
1541 .write = cgroup_file_write,
1572 .llseek = seq_lseek, 1542 .llseek = seq_lseek,
1573 .release = cgroup_seqfile_release, 1543 .release = cgroup_seqfile_release,
1574}; 1544};
@@ -1756,15 +1726,11 @@ int cgroup_add_files(struct cgroup *cgrp,
1756int cgroup_task_count(const struct cgroup *cgrp) 1726int cgroup_task_count(const struct cgroup *cgrp)
1757{ 1727{
1758 int count = 0; 1728 int count = 0;
1759 struct list_head *l; 1729 struct cg_cgroup_link *link;
1760 1730
1761 read_lock(&css_set_lock); 1731 read_lock(&css_set_lock);
1762 l = cgrp->css_sets.next; 1732 list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
1763 while (l != &cgrp->css_sets) {
1764 struct cg_cgroup_link *link =
1765 list_entry(l, struct cg_cgroup_link, cgrp_link_list);
1766 count += atomic_read(&link->cg->ref.refcount); 1733 count += atomic_read(&link->cg->ref.refcount);
1767 l = l->next;
1768 } 1734 }
1769 read_unlock(&css_set_lock); 1735 read_unlock(&css_set_lock);
1770 return count; 1736 return count;
@@ -2227,6 +2193,18 @@ static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
2227 return notify_on_release(cgrp); 2193 return notify_on_release(cgrp);
2228} 2194}
2229 2195
2196static int cgroup_write_notify_on_release(struct cgroup *cgrp,
2197 struct cftype *cft,
2198 u64 val)
2199{
2200 clear_bit(CGRP_RELEASABLE, &cgrp->flags);
2201 if (val)
2202 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
2203 else
2204 clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
2205 return 0;
2206}
2207
2230/* 2208/*
2231 * for the common functions, 'private' gives the type of file 2209 * for the common functions, 'private' gives the type of file
2232 */ 2210 */
@@ -2235,7 +2213,7 @@ static struct cftype files[] = {
2235 .name = "tasks", 2213 .name = "tasks",
2236 .open = cgroup_tasks_open, 2214 .open = cgroup_tasks_open,
2237 .read = cgroup_tasks_read, 2215 .read = cgroup_tasks_read,
2238 .write = cgroup_common_file_write, 2216 .write_u64 = cgroup_tasks_write,
2239 .release = cgroup_tasks_release, 2217 .release = cgroup_tasks_release,
2240 .private = FILE_TASKLIST, 2218 .private = FILE_TASKLIST,
2241 }, 2219 },
@@ -2243,15 +2221,16 @@ static struct cftype files[] = {
2243 { 2221 {
2244 .name = "notify_on_release", 2222 .name = "notify_on_release",
2245 .read_u64 = cgroup_read_notify_on_release, 2223 .read_u64 = cgroup_read_notify_on_release,
2246 .write = cgroup_common_file_write, 2224 .write_u64 = cgroup_write_notify_on_release,
2247 .private = FILE_NOTIFY_ON_RELEASE, 2225 .private = FILE_NOTIFY_ON_RELEASE,
2248 }, 2226 },
2249}; 2227};
2250 2228
2251static struct cftype cft_release_agent = { 2229static struct cftype cft_release_agent = {
2252 .name = "release_agent", 2230 .name = "release_agent",
2253 .read = cgroup_common_file_read, 2231 .read_seq_string = cgroup_release_agent_show,
2254 .write = cgroup_common_file_write, 2232 .write_string = cgroup_release_agent_write,
2233 .max_write_len = PATH_MAX,
2255 .private = FILE_RELEASE_AGENT, 2234 .private = FILE_RELEASE_AGENT,
2256}; 2235};
2257 2236
@@ -2869,16 +2848,17 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
2869 * cgroup_clone - clone the cgroup the given subsystem is attached to 2848 * cgroup_clone - clone the cgroup the given subsystem is attached to
2870 * @tsk: the task to be moved 2849 * @tsk: the task to be moved
2871 * @subsys: the given subsystem 2850 * @subsys: the given subsystem
2851 * @nodename: the name for the new cgroup
2872 * 2852 *
2873 * Duplicate the current cgroup in the hierarchy that the given 2853 * Duplicate the current cgroup in the hierarchy that the given
2874 * subsystem is attached to, and move this task into the new 2854 * subsystem is attached to, and move this task into the new
2875 * child. 2855 * child.
2876 */ 2856 */
2877int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys) 2857int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
2858 char *nodename)
2878{ 2859{
2879 struct dentry *dentry; 2860 struct dentry *dentry;
2880 int ret = 0; 2861 int ret = 0;
2881 char nodename[MAX_CGROUP_TYPE_NAMELEN];
2882 struct cgroup *parent, *child; 2862 struct cgroup *parent, *child;
2883 struct inode *inode; 2863 struct inode *inode;
2884 struct css_set *cg; 2864 struct css_set *cg;
@@ -2903,8 +2883,6 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys)
2903 cg = tsk->cgroups; 2883 cg = tsk->cgroups;
2904 parent = task_cgroup(tsk, subsys->subsys_id); 2884 parent = task_cgroup(tsk, subsys->subsys_id);
2905 2885
2906 snprintf(nodename, MAX_CGROUP_TYPE_NAMELEN, "%d", tsk->pid);
2907
2908 /* Pin the hierarchy */ 2886 /* Pin the hierarchy */
2909 atomic_inc(&parent->root->sb->s_active); 2887 atomic_inc(&parent->root->sb->s_active);
2910 2888
@@ -3078,27 +3056,24 @@ static void cgroup_release_agent(struct work_struct *work)
3078 while (!list_empty(&release_list)) { 3056 while (!list_empty(&release_list)) {
3079 char *argv[3], *envp[3]; 3057 char *argv[3], *envp[3];
3080 int i; 3058 int i;
3081 char *pathbuf; 3059 char *pathbuf = NULL, *agentbuf = NULL;
3082 struct cgroup *cgrp = list_entry(release_list.next, 3060 struct cgroup *cgrp = list_entry(release_list.next,
3083 struct cgroup, 3061 struct cgroup,
3084 release_list); 3062 release_list);
3085 list_del_init(&cgrp->release_list); 3063 list_del_init(&cgrp->release_list);
3086 spin_unlock(&release_list_lock); 3064 spin_unlock(&release_list_lock);
3087 pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL); 3065 pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
3088 if (!pathbuf) { 3066 if (!pathbuf)
3089 spin_lock(&release_list_lock); 3067 goto continue_free;
3090 continue; 3068 if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
3091 } 3069 goto continue_free;
3092 3070 agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
3093 if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0) { 3071 if (!agentbuf)
3094 kfree(pathbuf); 3072 goto continue_free;
3095 spin_lock(&release_list_lock);
3096 continue;
3097 }
3098 3073
3099 i = 0; 3074 i = 0;
3100 argv[i++] = cgrp->root->release_agent_path; 3075 argv[i++] = agentbuf;
3101 argv[i++] = (char *)pathbuf; 3076 argv[i++] = pathbuf;
3102 argv[i] = NULL; 3077 argv[i] = NULL;
3103 3078
3104 i = 0; 3079 i = 0;
@@ -3112,8 +3087,10 @@ static void cgroup_release_agent(struct work_struct *work)
3112 * be a slow process */ 3087 * be a slow process */
3113 mutex_unlock(&cgroup_mutex); 3088 mutex_unlock(&cgroup_mutex);
3114 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); 3089 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
3115 kfree(pathbuf);
3116 mutex_lock(&cgroup_mutex); 3090 mutex_lock(&cgroup_mutex);
3091 continue_free:
3092 kfree(pathbuf);
3093 kfree(agentbuf);
3117 spin_lock(&release_list_lock); 3094 spin_lock(&release_list_lock);
3118 } 3095 }
3119 spin_unlock(&release_list_lock); 3096 spin_unlock(&release_list_lock);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 2cc409ce0a8f..10ba5f1004a5 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -285,6 +285,11 @@ out_allowed:
285 set_cpus_allowed_ptr(current, &old_allowed); 285 set_cpus_allowed_ptr(current, &old_allowed);
286out_release: 286out_release:
287 cpu_hotplug_done(); 287 cpu_hotplug_done();
288 if (!err) {
289 if (raw_notifier_call_chain(&cpu_chain, CPU_POST_DEAD | mod,
290 hcpu) == NOTIFY_BAD)
291 BUG();
292 }
288 return err; 293 return err;
289} 294}
290 295
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index d5738910c34c..91cf85b36dd5 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -227,10 +227,6 @@ static struct cpuset top_cpuset = {
227 * The task_struct fields mems_allowed and mems_generation may only 227 * The task_struct fields mems_allowed and mems_generation may only
228 * be accessed in the context of that task, so require no locks. 228 * be accessed in the context of that task, so require no locks.
229 * 229 *
230 * The cpuset_common_file_write handler for operations that modify
231 * the cpuset hierarchy holds cgroup_mutex across the entire operation,
232 * single threading all such cpuset modifications across the system.
233 *
234 * The cpuset_common_file_read() handlers only hold callback_mutex across 230 * The cpuset_common_file_read() handlers only hold callback_mutex across
235 * small pieces of code, such as when reading out possibly multi-word 231 * small pieces of code, such as when reading out possibly multi-word
236 * cpumasks and nodemasks. 232 * cpumasks and nodemasks.
@@ -369,7 +365,7 @@ void cpuset_update_task_memory_state(void)
369 my_cpusets_mem_gen = top_cpuset.mems_generation; 365 my_cpusets_mem_gen = top_cpuset.mems_generation;
370 } else { 366 } else {
371 rcu_read_lock(); 367 rcu_read_lock();
372 my_cpusets_mem_gen = task_cs(current)->mems_generation; 368 my_cpusets_mem_gen = task_cs(tsk)->mems_generation;
373 rcu_read_unlock(); 369 rcu_read_unlock();
374 } 370 }
375 371
@@ -500,11 +496,16 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
500/* 496/*
501 * rebuild_sched_domains() 497 * rebuild_sched_domains()
502 * 498 *
503 * If the flag 'sched_load_balance' of any cpuset with non-empty 499 * This routine will be called to rebuild the scheduler's dynamic
504 * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset 500 * sched domains:
505 * which has that flag enabled, or if any cpuset with a non-empty 501 * - if the flag 'sched_load_balance' of any cpuset with non-empty
506 * 'cpus' is removed, then call this routine to rebuild the 502 * 'cpus' changes,
507 * scheduler's dynamic sched domains. 503 * - or if the 'cpus' allowed changes in any cpuset which has that
504 * flag enabled,
505 * - or if the 'sched_relax_domain_level' of any cpuset which has
506 * that flag enabled and with non-empty 'cpus' changes,
507 * - or if any cpuset with non-empty 'cpus' is removed,
508 * - or if a cpu gets offlined.
508 * 509 *
509 * This routine builds a partial partition of the systems CPUs 510 * This routine builds a partial partition of the systems CPUs
510 * (the set of non-overlappping cpumask_t's in the array 'part' 511 * (the set of non-overlappping cpumask_t's in the array 'part'
@@ -609,8 +610,13 @@ void rebuild_sched_domains(void)
609 while (__kfifo_get(q, (void *)&cp, sizeof(cp))) { 610 while (__kfifo_get(q, (void *)&cp, sizeof(cp))) {
610 struct cgroup *cont; 611 struct cgroup *cont;
611 struct cpuset *child; /* scans child cpusets of cp */ 612 struct cpuset *child; /* scans child cpusets of cp */
613
614 if (cpus_empty(cp->cpus_allowed))
615 continue;
616
612 if (is_sched_load_balance(cp)) 617 if (is_sched_load_balance(cp))
613 csa[csn++] = cp; 618 csa[csn++] = cp;
619
614 list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { 620 list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
615 child = cgroup_cs(cont); 621 child = cgroup_cs(cont);
616 __kfifo_put(q, (void *)&child, sizeof(cp)); 622 __kfifo_put(q, (void *)&child, sizeof(cp));
@@ -703,36 +709,6 @@ done:
703 /* Don't kfree(dattr) -- partition_sched_domains() does that. */ 709 /* Don't kfree(dattr) -- partition_sched_domains() does that. */
704} 710}
705 711
706static inline int started_after_time(struct task_struct *t1,
707 struct timespec *time,
708 struct task_struct *t2)
709{
710 int start_diff = timespec_compare(&t1->start_time, time);
711 if (start_diff > 0) {
712 return 1;
713 } else if (start_diff < 0) {
714 return 0;
715 } else {
716 /*
717 * Arbitrarily, if two processes started at the same
718 * time, we'll say that the lower pointer value
719 * started first. Note that t2 may have exited by now
720 * so this may not be a valid pointer any longer, but
721 * that's fine - it still serves to distinguish
722 * between two tasks started (effectively)
723 * simultaneously.
724 */
725 return t1 > t2;
726 }
727}
728
729static inline int started_after(void *p1, void *p2)
730{
731 struct task_struct *t1 = p1;
732 struct task_struct *t2 = p2;
733 return started_after_time(t1, &t2->start_time, t2);
734}
735
736/** 712/**
737 * cpuset_test_cpumask - test a task's cpus_allowed versus its cpuset's 713 * cpuset_test_cpumask - test a task's cpus_allowed versus its cpuset's
738 * @tsk: task to test 714 * @tsk: task to test
@@ -768,15 +744,49 @@ static void cpuset_change_cpumask(struct task_struct *tsk,
768} 744}
769 745
770/** 746/**
747 * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
748 * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
749 *
750 * Called with cgroup_mutex held
751 *
752 * The cgroup_scan_tasks() function will scan all the tasks in a cgroup,
753 * calling callback functions for each.
754 *
755 * Return 0 if successful, -errno if not.
756 */
757static int update_tasks_cpumask(struct cpuset *cs)
758{
759 struct cgroup_scanner scan;
760 struct ptr_heap heap;
761 int retval;
762
763 /*
764 * cgroup_scan_tasks() will initialize heap->gt for us.
765 * heap_init() is still needed here for we should not change
766 * cs->cpus_allowed when heap_init() fails.
767 */
768 retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
769 if (retval)
770 return retval;
771
772 scan.cg = cs->css.cgroup;
773 scan.test_task = cpuset_test_cpumask;
774 scan.process_task = cpuset_change_cpumask;
775 scan.heap = &heap;
776 retval = cgroup_scan_tasks(&scan);
777
778 heap_free(&heap);
779 return retval;
780}
781
782/**
771 * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it 783 * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it
772 * @cs: the cpuset to consider 784 * @cs: the cpuset to consider
773 * @buf: buffer of cpu numbers written to this cpuset 785 * @buf: buffer of cpu numbers written to this cpuset
774 */ 786 */
775static int update_cpumask(struct cpuset *cs, char *buf) 787static int update_cpumask(struct cpuset *cs, const char *buf)
776{ 788{
777 struct cpuset trialcs; 789 struct cpuset trialcs;
778 struct cgroup_scanner scan;
779 struct ptr_heap heap;
780 int retval; 790 int retval;
781 int is_load_balanced; 791 int is_load_balanced;
782 792
@@ -792,7 +802,6 @@ static int update_cpumask(struct cpuset *cs, char *buf)
792 * that parsing. The validate_change() call ensures that cpusets 802 * that parsing. The validate_change() call ensures that cpusets
793 * with tasks have cpus. 803 * with tasks have cpus.
794 */ 804 */
795 buf = strstrip(buf);
796 if (!*buf) { 805 if (!*buf) {
797 cpus_clear(trialcs.cpus_allowed); 806 cpus_clear(trialcs.cpus_allowed);
798 } else { 807 } else {
@@ -811,10 +820,6 @@ static int update_cpumask(struct cpuset *cs, char *buf)
811 if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed)) 820 if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed))
812 return 0; 821 return 0;
813 822
814 retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, &started_after);
815 if (retval)
816 return retval;
817
818 is_load_balanced = is_sched_load_balance(&trialcs); 823 is_load_balanced = is_sched_load_balance(&trialcs);
819 824
820 mutex_lock(&callback_mutex); 825 mutex_lock(&callback_mutex);
@@ -825,12 +830,9 @@ static int update_cpumask(struct cpuset *cs, char *buf)
825 * Scan tasks in the cpuset, and update the cpumasks of any 830 * Scan tasks in the cpuset, and update the cpumasks of any
826 * that need an update. 831 * that need an update.
827 */ 832 */
828 scan.cg = cs->css.cgroup; 833 retval = update_tasks_cpumask(cs);
829 scan.test_task = cpuset_test_cpumask; 834 if (retval < 0)
830 scan.process_task = cpuset_change_cpumask; 835 return retval;
831 scan.heap = &heap;
832 cgroup_scan_tasks(&scan);
833 heap_free(&heap);
834 836
835 if (is_load_balanced) 837 if (is_load_balanced)
836 rebuild_sched_domains(); 838 rebuild_sched_domains();
@@ -886,74 +888,25 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
886 mutex_unlock(&callback_mutex); 888 mutex_unlock(&callback_mutex);
887} 889}
888 890
889/*
890 * Handle user request to change the 'mems' memory placement
891 * of a cpuset. Needs to validate the request, update the
892 * cpusets mems_allowed and mems_generation, and for each
893 * task in the cpuset, rebind any vma mempolicies and if
894 * the cpuset is marked 'memory_migrate', migrate the tasks
895 * pages to the new memory.
896 *
897 * Call with cgroup_mutex held. May take callback_mutex during call.
898 * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
899 * lock each such tasks mm->mmap_sem, scan its vma's and rebind
900 * their mempolicies to the cpusets new mems_allowed.
901 */
902
903static void *cpuset_being_rebound; 891static void *cpuset_being_rebound;
904 892
905static int update_nodemask(struct cpuset *cs, char *buf) 893/**
894 * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset.
895 * @cs: the cpuset in which each task's mems_allowed mask needs to be changed
896 * @oldmem: old mems_allowed of cpuset cs
897 *
898 * Called with cgroup_mutex held
899 * Return 0 if successful, -errno if not.
900 */
901static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem)
906{ 902{
907 struct cpuset trialcs;
908 nodemask_t oldmem;
909 struct task_struct *p; 903 struct task_struct *p;
910 struct mm_struct **mmarray; 904 struct mm_struct **mmarray;
911 int i, n, ntasks; 905 int i, n, ntasks;
912 int migrate; 906 int migrate;
913 int fudge; 907 int fudge;
914 int retval;
915 struct cgroup_iter it; 908 struct cgroup_iter it;
916 909 int retval;
917 /*
918 * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
919 * it's read-only
920 */
921 if (cs == &top_cpuset)
922 return -EACCES;
923
924 trialcs = *cs;
925
926 /*
927 * An empty mems_allowed is ok iff there are no tasks in the cpuset.
928 * Since nodelist_parse() fails on an empty mask, we special case
929 * that parsing. The validate_change() call ensures that cpusets
930 * with tasks have memory.
931 */
932 buf = strstrip(buf);
933 if (!*buf) {
934 nodes_clear(trialcs.mems_allowed);
935 } else {
936 retval = nodelist_parse(buf, trialcs.mems_allowed);
937 if (retval < 0)
938 goto done;
939
940 if (!nodes_subset(trialcs.mems_allowed,
941 node_states[N_HIGH_MEMORY]))
942 return -EINVAL;
943 }
944 oldmem = cs->mems_allowed;
945 if (nodes_equal(oldmem, trialcs.mems_allowed)) {
946 retval = 0; /* Too easy - nothing to do */
947 goto done;
948 }
949 retval = validate_change(cs, &trialcs);
950 if (retval < 0)
951 goto done;
952
953 mutex_lock(&callback_mutex);
954 cs->mems_allowed = trialcs.mems_allowed;
955 cs->mems_generation = cpuset_mems_generation++;
956 mutex_unlock(&callback_mutex);
957 910
958 cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ 911 cpuset_being_rebound = cs; /* causes mpol_dup() rebind */
959 912
@@ -1020,7 +973,7 @@ static int update_nodemask(struct cpuset *cs, char *buf)
1020 973
1021 mpol_rebind_mm(mm, &cs->mems_allowed); 974 mpol_rebind_mm(mm, &cs->mems_allowed);
1022 if (migrate) 975 if (migrate)
1023 cpuset_migrate_mm(mm, &oldmem, &cs->mems_allowed); 976 cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed);
1024 mmput(mm); 977 mmput(mm);
1025 } 978 }
1026 979
@@ -1032,6 +985,70 @@ done:
1032 return retval; 985 return retval;
1033} 986}
1034 987
988/*
989 * Handle user request to change the 'mems' memory placement
990 * of a cpuset. Needs to validate the request, update the
991 * cpusets mems_allowed and mems_generation, and for each
992 * task in the cpuset, rebind any vma mempolicies and if
993 * the cpuset is marked 'memory_migrate', migrate the tasks
994 * pages to the new memory.
995 *
996 * Call with cgroup_mutex held. May take callback_mutex during call.
997 * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
998 * lock each such tasks mm->mmap_sem, scan its vma's and rebind
999 * their mempolicies to the cpusets new mems_allowed.
1000 */
1001static int update_nodemask(struct cpuset *cs, const char *buf)
1002{
1003 struct cpuset trialcs;
1004 nodemask_t oldmem;
1005 int retval;
1006
1007 /*
1008 * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
1009 * it's read-only
1010 */
1011 if (cs == &top_cpuset)
1012 return -EACCES;
1013
1014 trialcs = *cs;
1015
1016 /*
1017 * An empty mems_allowed is ok iff there are no tasks in the cpuset.
1018 * Since nodelist_parse() fails on an empty mask, we special case
1019 * that parsing. The validate_change() call ensures that cpusets
1020 * with tasks have memory.
1021 */
1022 if (!*buf) {
1023 nodes_clear(trialcs.mems_allowed);
1024 } else {
1025 retval = nodelist_parse(buf, trialcs.mems_allowed);
1026 if (retval < 0)
1027 goto done;
1028
1029 if (!nodes_subset(trialcs.mems_allowed,
1030 node_states[N_HIGH_MEMORY]))
1031 return -EINVAL;
1032 }
1033 oldmem = cs->mems_allowed;
1034 if (nodes_equal(oldmem, trialcs.mems_allowed)) {
1035 retval = 0; /* Too easy - nothing to do */
1036 goto done;
1037 }
1038 retval = validate_change(cs, &trialcs);
1039 if (retval < 0)
1040 goto done;
1041
1042 mutex_lock(&callback_mutex);
1043 cs->mems_allowed = trialcs.mems_allowed;
1044 cs->mems_generation = cpuset_mems_generation++;
1045 mutex_unlock(&callback_mutex);
1046
1047 retval = update_tasks_nodemask(cs, &oldmem);
1048done:
1049 return retval;
1050}
1051
1035int current_cpuset_is_being_rebound(void) 1052int current_cpuset_is_being_rebound(void)
1036{ 1053{
1037 return task_cs(current) == cpuset_being_rebound; 1054 return task_cs(current) == cpuset_being_rebound;
@@ -1044,7 +1061,8 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
1044 1061
1045 if (val != cs->relax_domain_level) { 1062 if (val != cs->relax_domain_level) {
1046 cs->relax_domain_level = val; 1063 cs->relax_domain_level = val;
1047 rebuild_sched_domains(); 1064 if (!cpus_empty(cs->cpus_allowed) && is_sched_load_balance(cs))
1065 rebuild_sched_domains();
1048 } 1066 }
1049 1067
1050 return 0; 1068 return 0;
@@ -1256,72 +1274,14 @@ typedef enum {
1256 FILE_SPREAD_SLAB, 1274 FILE_SPREAD_SLAB,
1257} cpuset_filetype_t; 1275} cpuset_filetype_t;
1258 1276
1259static ssize_t cpuset_common_file_write(struct cgroup *cont,
1260 struct cftype *cft,
1261 struct file *file,
1262 const char __user *userbuf,
1263 size_t nbytes, loff_t *unused_ppos)
1264{
1265 struct cpuset *cs = cgroup_cs(cont);
1266 cpuset_filetype_t type = cft->private;
1267 char *buffer;
1268 int retval = 0;
1269
1270 /* Crude upper limit on largest legitimate cpulist user might write. */
1271 if (nbytes > 100U + 6 * max(NR_CPUS, MAX_NUMNODES))
1272 return -E2BIG;
1273
1274 /* +1 for nul-terminator */
1275 buffer = kmalloc(nbytes + 1, GFP_KERNEL);
1276 if (!buffer)
1277 return -ENOMEM;
1278
1279 if (copy_from_user(buffer, userbuf, nbytes)) {
1280 retval = -EFAULT;
1281 goto out1;
1282 }
1283 buffer[nbytes] = 0; /* nul-terminate */
1284
1285 cgroup_lock();
1286
1287 if (cgroup_is_removed(cont)) {
1288 retval = -ENODEV;
1289 goto out2;
1290 }
1291
1292 switch (type) {
1293 case FILE_CPULIST:
1294 retval = update_cpumask(cs, buffer);
1295 break;
1296 case FILE_MEMLIST:
1297 retval = update_nodemask(cs, buffer);
1298 break;
1299 default:
1300 retval = -EINVAL;
1301 goto out2;
1302 }
1303
1304 if (retval == 0)
1305 retval = nbytes;
1306out2:
1307 cgroup_unlock();
1308out1:
1309 kfree(buffer);
1310 return retval;
1311}
1312
1313static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) 1277static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
1314{ 1278{
1315 int retval = 0; 1279 int retval = 0;
1316 struct cpuset *cs = cgroup_cs(cgrp); 1280 struct cpuset *cs = cgroup_cs(cgrp);
1317 cpuset_filetype_t type = cft->private; 1281 cpuset_filetype_t type = cft->private;
1318 1282
1319 cgroup_lock(); 1283 if (!cgroup_lock_live_group(cgrp))
1320
1321 if (cgroup_is_removed(cgrp)) {
1322 cgroup_unlock();
1323 return -ENODEV; 1284 return -ENODEV;
1324 }
1325 1285
1326 switch (type) { 1286 switch (type) {
1327 case FILE_CPU_EXCLUSIVE: 1287 case FILE_CPU_EXCLUSIVE:
@@ -1367,12 +1327,9 @@ static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val)
1367 struct cpuset *cs = cgroup_cs(cgrp); 1327 struct cpuset *cs = cgroup_cs(cgrp);
1368 cpuset_filetype_t type = cft->private; 1328 cpuset_filetype_t type = cft->private;
1369 1329
1370 cgroup_lock(); 1330 if (!cgroup_lock_live_group(cgrp))
1371
1372 if (cgroup_is_removed(cgrp)) {
1373 cgroup_unlock();
1374 return -ENODEV; 1331 return -ENODEV;
1375 } 1332
1376 switch (type) { 1333 switch (type) {
1377 case FILE_SCHED_RELAX_DOMAIN_LEVEL: 1334 case FILE_SCHED_RELAX_DOMAIN_LEVEL:
1378 retval = update_relax_domain_level(cs, val); 1335 retval = update_relax_domain_level(cs, val);
@@ -1386,6 +1343,32 @@ static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val)
1386} 1343}
1387 1344
1388/* 1345/*
1346 * Common handling for a write to a "cpus" or "mems" file.
1347 */
1348static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
1349 const char *buf)
1350{
1351 int retval = 0;
1352
1353 if (!cgroup_lock_live_group(cgrp))
1354 return -ENODEV;
1355
1356 switch (cft->private) {
1357 case FILE_CPULIST:
1358 retval = update_cpumask(cgroup_cs(cgrp), buf);
1359 break;
1360 case FILE_MEMLIST:
1361 retval = update_nodemask(cgroup_cs(cgrp), buf);
1362 break;
1363 default:
1364 retval = -EINVAL;
1365 break;
1366 }
1367 cgroup_unlock();
1368 return retval;
1369}
1370
1371/*
1389 * These ascii lists should be read in a single call, by using a user 1372 * These ascii lists should be read in a single call, by using a user
1390 * buffer large enough to hold the entire map. If read in smaller 1373 * buffer large enough to hold the entire map. If read in smaller
1391 * chunks, there is no guarantee of atomicity. Since the display format 1374 * chunks, there is no guarantee of atomicity. Since the display format
@@ -1504,14 +1487,16 @@ static struct cftype files[] = {
1504 { 1487 {
1505 .name = "cpus", 1488 .name = "cpus",
1506 .read = cpuset_common_file_read, 1489 .read = cpuset_common_file_read,
1507 .write = cpuset_common_file_write, 1490 .write_string = cpuset_write_resmask,
1491 .max_write_len = (100U + 6 * NR_CPUS),
1508 .private = FILE_CPULIST, 1492 .private = FILE_CPULIST,
1509 }, 1493 },
1510 1494
1511 { 1495 {
1512 .name = "mems", 1496 .name = "mems",
1513 .read = cpuset_common_file_read, 1497 .read = cpuset_common_file_read,
1514 .write = cpuset_common_file_write, 1498 .write_string = cpuset_write_resmask,
1499 .max_write_len = (100U + 6 * MAX_NUMNODES),
1515 .private = FILE_MEMLIST, 1500 .private = FILE_MEMLIST,
1516 }, 1501 },
1517 1502
@@ -1792,7 +1777,7 @@ static void move_member_tasks_to_cpuset(struct cpuset *from, struct cpuset *to)
1792 scan.scan.heap = NULL; 1777 scan.scan.heap = NULL;
1793 scan.to = to->css.cgroup; 1778 scan.to = to->css.cgroup;
1794 1779
1795 if (cgroup_scan_tasks((struct cgroup_scanner *)&scan)) 1780 if (cgroup_scan_tasks(&scan.scan))
1796 printk(KERN_ERR "move_member_tasks_to_cpuset: " 1781 printk(KERN_ERR "move_member_tasks_to_cpuset: "
1797 "cgroup_scan_tasks failed\n"); 1782 "cgroup_scan_tasks failed\n");
1798} 1783}
@@ -1852,6 +1837,7 @@ static void scan_for_empty_cpusets(const struct cpuset *root)
1852 struct cpuset *child; /* scans child cpusets of cp */ 1837 struct cpuset *child; /* scans child cpusets of cp */
1853 struct list_head queue; 1838 struct list_head queue;
1854 struct cgroup *cont; 1839 struct cgroup *cont;
1840 nodemask_t oldmems;
1855 1841
1856 INIT_LIST_HEAD(&queue); 1842 INIT_LIST_HEAD(&queue);
1857 1843
@@ -1871,6 +1857,8 @@ static void scan_for_empty_cpusets(const struct cpuset *root)
1871 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) 1857 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
1872 continue; 1858 continue;
1873 1859
1860 oldmems = cp->mems_allowed;
1861
1874 /* Remove offline cpus and mems from this cpuset. */ 1862 /* Remove offline cpus and mems from this cpuset. */
1875 mutex_lock(&callback_mutex); 1863 mutex_lock(&callback_mutex);
1876 cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map); 1864 cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map);
@@ -1882,6 +1870,10 @@ static void scan_for_empty_cpusets(const struct cpuset *root)
1882 if (cpus_empty(cp->cpus_allowed) || 1870 if (cpus_empty(cp->cpus_allowed) ||
1883 nodes_empty(cp->mems_allowed)) 1871 nodes_empty(cp->mems_allowed))
1884 remove_tasks_in_empty_cpuset(cp); 1872 remove_tasks_in_empty_cpuset(cp);
1873 else {
1874 update_tasks_cpumask(cp);
1875 update_tasks_nodemask(cp, &oldmems);
1876 }
1885 } 1877 }
1886} 1878}
1887 1879
@@ -1974,7 +1966,6 @@ void __init cpuset_init_smp(void)
1974} 1966}
1975 1967
1976/** 1968/**
1977
1978 * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset. 1969 * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
1979 * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed. 1970 * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
1980 * @pmask: pointer to cpumask_t variable to receive cpus_allowed set. 1971 * @pmask: pointer to cpumask_t variable to receive cpus_allowed set.
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 10e43fd8b721..b3179dad71be 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -145,8 +145,11 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
145 d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp; 145 d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
146 tmp = d->swapin_delay_total + tsk->delays->swapin_delay; 146 tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
147 d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp; 147 d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
148 tmp = d->freepages_delay_total + tsk->delays->freepages_delay;
149 d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp;
148 d->blkio_count += tsk->delays->blkio_count; 150 d->blkio_count += tsk->delays->blkio_count;
149 d->swapin_count += tsk->delays->swapin_count; 151 d->swapin_count += tsk->delays->swapin_count;
152 d->freepages_count += tsk->delays->freepages_count;
150 spin_unlock_irqrestore(&tsk->delays->lock, flags); 153 spin_unlock_irqrestore(&tsk->delays->lock, flags);
151 154
152done: 155done:
@@ -165,3 +168,16 @@ __u64 __delayacct_blkio_ticks(struct task_struct *tsk)
165 return ret; 168 return ret;
166} 169}
167 170
171void __delayacct_freepages_start(void)
172{
173 delayacct_start(&current->delays->freepages_start);
174}
175
176void __delayacct_freepages_end(void)
177{
178 delayacct_end(&current->delays->freepages_start,
179 &current->delays->freepages_end,
180 &current->delays->freepages_delay,
181 &current->delays->freepages_count);
182}
183
diff --git a/kernel/exit.c b/kernel/exit.c
index 93d2711b9381..ad933bb29ec7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -85,7 +85,6 @@ static void __exit_signal(struct task_struct *tsk)
85 BUG_ON(!sig); 85 BUG_ON(!sig);
86 BUG_ON(!atomic_read(&sig->count)); 86 BUG_ON(!atomic_read(&sig->count));
87 87
88 rcu_read_lock();
89 sighand = rcu_dereference(tsk->sighand); 88 sighand = rcu_dereference(tsk->sighand);
90 spin_lock(&sighand->siglock); 89 spin_lock(&sighand->siglock);
91 90
@@ -121,6 +120,18 @@ static void __exit_signal(struct task_struct *tsk)
121 sig->nivcsw += tsk->nivcsw; 120 sig->nivcsw += tsk->nivcsw;
122 sig->inblock += task_io_get_inblock(tsk); 121 sig->inblock += task_io_get_inblock(tsk);
123 sig->oublock += task_io_get_oublock(tsk); 122 sig->oublock += task_io_get_oublock(tsk);
123#ifdef CONFIG_TASK_XACCT
124 sig->rchar += tsk->rchar;
125 sig->wchar += tsk->wchar;
126 sig->syscr += tsk->syscr;
127 sig->syscw += tsk->syscw;
128#endif /* CONFIG_TASK_XACCT */
129#ifdef CONFIG_TASK_IO_ACCOUNTING
130 sig->ioac.read_bytes += tsk->ioac.read_bytes;
131 sig->ioac.write_bytes += tsk->ioac.write_bytes;
132 sig->ioac.cancelled_write_bytes +=
133 tsk->ioac.cancelled_write_bytes;
134#endif /* CONFIG_TASK_IO_ACCOUNTING */
124 sig->sum_sched_runtime += tsk->se.sum_exec_runtime; 135 sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
125 sig = NULL; /* Marker for below. */ 136 sig = NULL; /* Marker for below. */
126 } 137 }
@@ -136,7 +147,6 @@ static void __exit_signal(struct task_struct *tsk)
136 tsk->signal = NULL; 147 tsk->signal = NULL;
137 tsk->sighand = NULL; 148 tsk->sighand = NULL;
138 spin_unlock(&sighand->siglock); 149 spin_unlock(&sighand->siglock);
139 rcu_read_unlock();
140 150
141 __cleanup_sighand(sighand); 151 __cleanup_sighand(sighand);
142 clear_tsk_thread_flag(tsk,TIF_SIGPENDING); 152 clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
@@ -432,7 +442,7 @@ void daemonize(const char *name, ...)
432 * We don't want to have TIF_FREEZE set if the system-wide hibernation 442 * We don't want to have TIF_FREEZE set if the system-wide hibernation
433 * or suspend transition begins right now. 443 * or suspend transition begins right now.
434 */ 444 */
435 current->flags |= PF_NOFREEZE; 445 current->flags |= (PF_NOFREEZE | PF_KTHREAD);
436 446
437 if (current->nsproxy != &init_nsproxy) { 447 if (current->nsproxy != &init_nsproxy) {
438 get_nsproxy(&init_nsproxy); 448 get_nsproxy(&init_nsproxy);
@@ -666,26 +676,40 @@ assign_new_owner:
666static void exit_mm(struct task_struct * tsk) 676static void exit_mm(struct task_struct * tsk)
667{ 677{
668 struct mm_struct *mm = tsk->mm; 678 struct mm_struct *mm = tsk->mm;
679 struct core_state *core_state;
669 680
670 mm_release(tsk, mm); 681 mm_release(tsk, mm);
671 if (!mm) 682 if (!mm)
672 return; 683 return;
673 /* 684 /*
674 * Serialize with any possible pending coredump. 685 * Serialize with any possible pending coredump.
675 * We must hold mmap_sem around checking core_waiters 686 * We must hold mmap_sem around checking core_state
676 * and clearing tsk->mm. The core-inducing thread 687 * and clearing tsk->mm. The core-inducing thread
677 * will increment core_waiters for each thread in the 688 * will increment ->nr_threads for each thread in the
678 * group with ->mm != NULL. 689 * group with ->mm != NULL.
679 */ 690 */
680 down_read(&mm->mmap_sem); 691 down_read(&mm->mmap_sem);
681 if (mm->core_waiters) { 692 core_state = mm->core_state;
693 if (core_state) {
694 struct core_thread self;
682 up_read(&mm->mmap_sem); 695 up_read(&mm->mmap_sem);
683 down_write(&mm->mmap_sem);
684 if (!--mm->core_waiters)
685 complete(mm->core_startup_done);
686 up_write(&mm->mmap_sem);
687 696
688 wait_for_completion(&mm->core_done); 697 self.task = tsk;
698 self.next = xchg(&core_state->dumper.next, &self);
699 /*
700 * Implies mb(), the result of xchg() must be visible
701 * to core_state->dumper.
702 */
703 if (atomic_dec_and_test(&core_state->nr_threads))
704 complete(&core_state->startup);
705
706 for (;;) {
707 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
708 if (!self.task) /* see coredump_finish() */
709 break;
710 schedule();
711 }
712 __set_task_state(tsk, TASK_RUNNING);
689 down_read(&mm->mmap_sem); 713 down_read(&mm->mmap_sem);
690 } 714 }
691 atomic_inc(&mm->mm_count); 715 atomic_inc(&mm->mm_count);
@@ -1354,6 +1378,21 @@ static int wait_task_zombie(struct task_struct *p, int options,
1354 psig->coublock += 1378 psig->coublock +=
1355 task_io_get_oublock(p) + 1379 task_io_get_oublock(p) +
1356 sig->oublock + sig->coublock; 1380 sig->oublock + sig->coublock;
1381#ifdef CONFIG_TASK_XACCT
1382 psig->rchar += p->rchar + sig->rchar;
1383 psig->wchar += p->wchar + sig->wchar;
1384 psig->syscr += p->syscr + sig->syscr;
1385 psig->syscw += p->syscw + sig->syscw;
1386#endif /* CONFIG_TASK_XACCT */
1387#ifdef CONFIG_TASK_IO_ACCOUNTING
1388 psig->ioac.read_bytes +=
1389 p->ioac.read_bytes + sig->ioac.read_bytes;
1390 psig->ioac.write_bytes +=
1391 p->ioac.write_bytes + sig->ioac.write_bytes;
1392 psig->ioac.cancelled_write_bytes +=
1393 p->ioac.cancelled_write_bytes +
1394 sig->ioac.cancelled_write_bytes;
1395#endif /* CONFIG_TASK_IO_ACCOUNTING */
1357 spin_unlock_irq(&p->parent->sighand->siglock); 1396 spin_unlock_irq(&p->parent->sighand->siglock);
1358 } 1397 }
1359 1398
diff --git a/kernel/fork.c b/kernel/fork.c
index 552c8d8e77ad..b99d73e971a4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -93,6 +93,23 @@ int nr_processes(void)
93static struct kmem_cache *task_struct_cachep; 93static struct kmem_cache *task_struct_cachep;
94#endif 94#endif
95 95
96#ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR
97static inline struct thread_info *alloc_thread_info(struct task_struct *tsk)
98{
99#ifdef CONFIG_DEBUG_STACK_USAGE
100 gfp_t mask = GFP_KERNEL | __GFP_ZERO;
101#else
102 gfp_t mask = GFP_KERNEL;
103#endif
104 return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER);
105}
106
107static inline void free_thread_info(struct thread_info *ti)
108{
109 free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
110}
111#endif
112
96/* SLAB cache for signal_struct structures (tsk->signal) */ 113/* SLAB cache for signal_struct structures (tsk->signal) */
97static struct kmem_cache *signal_cachep; 114static struct kmem_cache *signal_cachep;
98 115
@@ -383,7 +400,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
383 INIT_LIST_HEAD(&mm->mmlist); 400 INIT_LIST_HEAD(&mm->mmlist);
384 mm->flags = (current->mm) ? current->mm->flags 401 mm->flags = (current->mm) ? current->mm->flags
385 : MMF_DUMP_FILTER_DEFAULT; 402 : MMF_DUMP_FILTER_DEFAULT;
386 mm->core_waiters = 0; 403 mm->core_state = NULL;
387 mm->nr_ptes = 0; 404 mm->nr_ptes = 0;
388 set_mm_counter(mm, file_rss, 0); 405 set_mm_counter(mm, file_rss, 0);
389 set_mm_counter(mm, anon_rss, 0); 406 set_mm_counter(mm, anon_rss, 0);
@@ -457,7 +474,7 @@ EXPORT_SYMBOL_GPL(mmput);
457/** 474/**
458 * get_task_mm - acquire a reference to the task's mm 475 * get_task_mm - acquire a reference to the task's mm
459 * 476 *
460 * Returns %NULL if the task has no mm. Checks PF_BORROWED_MM (meaning 477 * Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning
461 * this kernel workthread has transiently adopted a user mm with use_mm, 478 * this kernel workthread has transiently adopted a user mm with use_mm,
462 * to do its AIO) is not set and if so returns a reference to it, after 479 * to do its AIO) is not set and if so returns a reference to it, after
463 * bumping up the use count. User must release the mm via mmput() 480 * bumping up the use count. User must release the mm via mmput()
@@ -470,7 +487,7 @@ struct mm_struct *get_task_mm(struct task_struct *task)
470 task_lock(task); 487 task_lock(task);
471 mm = task->mm; 488 mm = task->mm;
472 if (mm) { 489 if (mm) {
473 if (task->flags & PF_BORROWED_MM) 490 if (task->flags & PF_KTHREAD)
474 mm = NULL; 491 mm = NULL;
475 else 492 else
476 atomic_inc(&mm->mm_users); 493 atomic_inc(&mm->mm_users);
@@ -795,6 +812,12 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
795 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; 812 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
796 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; 813 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
797 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; 814 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
815#ifdef CONFIG_TASK_XACCT
816 sig->rchar = sig->wchar = sig->syscr = sig->syscw = 0;
817#endif
818#ifdef CONFIG_TASK_IO_ACCOUNTING
819 memset(&sig->ioac, 0, sizeof(sig->ioac));
820#endif
798 sig->sum_sched_runtime = 0; 821 sig->sum_sched_runtime = 0;
799 INIT_LIST_HEAD(&sig->cpu_timers[0]); 822 INIT_LIST_HEAD(&sig->cpu_timers[0]);
800 INIT_LIST_HEAD(&sig->cpu_timers[1]); 823 INIT_LIST_HEAD(&sig->cpu_timers[1]);
@@ -1090,6 +1113,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1090 if (clone_flags & CLONE_THREAD) 1113 if (clone_flags & CLONE_THREAD)
1091 p->tgid = current->tgid; 1114 p->tgid = current->tgid;
1092 1115
1116 if (current->nsproxy != p->nsproxy) {
1117 retval = ns_cgroup_clone(p, pid);
1118 if (retval)
1119 goto bad_fork_free_pid;
1120 }
1121
1093 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; 1122 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
1094 /* 1123 /*
1095 * Clear TID on mm_release()? 1124 * Clear TID on mm_release()?
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 5bc6e5ecc493..f8914b92b664 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -260,9 +260,7 @@ int set_irq_wake(unsigned int irq, unsigned int on)
260 } 260 }
261 } else { 261 } else {
262 if (desc->wake_depth == 0) { 262 if (desc->wake_depth == 0) {
263 printk(KERN_WARNING "Unbalanced IRQ %d " 263 WARN(1, "Unbalanced IRQ %d wake disable\n", irq);
264 "wake disable\n", irq);
265 WARN_ON(1);
266 } else if (--desc->wake_depth == 0) { 264 } else if (--desc->wake_depth == 0) {
267 ret = set_irq_wake_real(irq, on); 265 ret = set_irq_wake_real(irq, on);
268 if (ret) 266 if (ret)
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 6fc0040f3e3a..38fc10ac7541 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -176,7 +176,7 @@ static unsigned long get_symbol_pos(unsigned long addr,
176 high = kallsyms_num_syms; 176 high = kallsyms_num_syms;
177 177
178 while (high - low > 1) { 178 while (high - low > 1) {
179 mid = (low + high) / 2; 179 mid = low + (high - low) / 2;
180 if (kallsyms_addresses[mid] <= addr) 180 if (kallsyms_addresses[mid] <= addr)
181 low = mid; 181 low = mid;
182 else 182 else
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 2989f67c4446..2456d1a0befb 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -352,16 +352,17 @@ static inline void register_pm_notifier_callback(void) {}
352 * @path: path to usermode executable 352 * @path: path to usermode executable
353 * @argv: arg vector for process 353 * @argv: arg vector for process
354 * @envp: environment for process 354 * @envp: environment for process
355 * @gfp_mask: gfp mask for memory allocation
355 * 356 *
356 * Returns either %NULL on allocation failure, or a subprocess_info 357 * Returns either %NULL on allocation failure, or a subprocess_info
357 * structure. This should be passed to call_usermodehelper_exec to 358 * structure. This should be passed to call_usermodehelper_exec to
358 * exec the process and free the structure. 359 * exec the process and free the structure.
359 */ 360 */
360struct subprocess_info *call_usermodehelper_setup(char *path, 361struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
361 char **argv, char **envp) 362 char **envp, gfp_t gfp_mask)
362{ 363{
363 struct subprocess_info *sub_info; 364 struct subprocess_info *sub_info;
364 sub_info = kzalloc(sizeof(struct subprocess_info), GFP_ATOMIC); 365 sub_info = kzalloc(sizeof(struct subprocess_info), gfp_mask);
365 if (!sub_info) 366 if (!sub_info)
366 goto out; 367 goto out;
367 368
@@ -494,7 +495,7 @@ int call_usermodehelper_pipe(char *path, char **argv, char **envp,
494 struct subprocess_info *sub_info; 495 struct subprocess_info *sub_info;
495 int ret; 496 int ret;
496 497
497 sub_info = call_usermodehelper_setup(path, argv, envp); 498 sub_info = call_usermodehelper_setup(path, argv, envp, GFP_KERNEL);
498 if (sub_info == NULL) 499 if (sub_info == NULL)
499 return -ENOMEM; 500 return -ENOMEM;
500 501
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1485ca8d0e00..75bc2cd9ebc6 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -62,6 +62,7 @@
62 addr = ((kprobe_opcode_t *)(kallsyms_lookup_name(name))) 62 addr = ((kprobe_opcode_t *)(kallsyms_lookup_name(name)))
63#endif 63#endif
64 64
65static int kprobes_initialized;
65static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; 66static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
66static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; 67static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
67 68
@@ -69,8 +70,15 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
69static bool kprobe_enabled; 70static bool kprobe_enabled;
70 71
71DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ 72DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
72DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */
73static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; 73static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
74static struct {
75 spinlock_t lock ____cacheline_aligned;
76} kretprobe_table_locks[KPROBE_TABLE_SIZE];
77
78static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
79{
80 return &(kretprobe_table_locks[hash].lock);
81}
74 82
75/* 83/*
76 * Normally, functions that we'd want to prohibit kprobes in, are marked 84 * Normally, functions that we'd want to prohibit kprobes in, are marked
@@ -368,26 +376,53 @@ void __kprobes kprobes_inc_nmissed_count(struct kprobe *p)
368 return; 376 return;
369} 377}
370 378
371/* Called with kretprobe_lock held */
372void __kprobes recycle_rp_inst(struct kretprobe_instance *ri, 379void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
373 struct hlist_head *head) 380 struct hlist_head *head)
374{ 381{
382 struct kretprobe *rp = ri->rp;
383
375 /* remove rp inst off the rprobe_inst_table */ 384 /* remove rp inst off the rprobe_inst_table */
376 hlist_del(&ri->hlist); 385 hlist_del(&ri->hlist);
377 if (ri->rp) { 386 INIT_HLIST_NODE(&ri->hlist);
378 /* remove rp inst off the used list */ 387 if (likely(rp)) {
379 hlist_del(&ri->uflist); 388 spin_lock(&rp->lock);
380 /* put rp inst back onto the free list */ 389 hlist_add_head(&ri->hlist, &rp->free_instances);
381 INIT_HLIST_NODE(&ri->uflist); 390 spin_unlock(&rp->lock);
382 hlist_add_head(&ri->uflist, &ri->rp->free_instances);
383 } else 391 } else
384 /* Unregistering */ 392 /* Unregistering */
385 hlist_add_head(&ri->hlist, head); 393 hlist_add_head(&ri->hlist, head);
386} 394}
387 395
388struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk) 396void kretprobe_hash_lock(struct task_struct *tsk,
397 struct hlist_head **head, unsigned long *flags)
389{ 398{
390 return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)]; 399 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
400 spinlock_t *hlist_lock;
401
402 *head = &kretprobe_inst_table[hash];
403 hlist_lock = kretprobe_table_lock_ptr(hash);
404 spin_lock_irqsave(hlist_lock, *flags);
405}
406
407void kretprobe_table_lock(unsigned long hash, unsigned long *flags)
408{
409 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
410 spin_lock_irqsave(hlist_lock, *flags);
411}
412
413void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags)
414{
415 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
416 spinlock_t *hlist_lock;
417
418 hlist_lock = kretprobe_table_lock_ptr(hash);
419 spin_unlock_irqrestore(hlist_lock, *flags);
420}
421
422void kretprobe_table_unlock(unsigned long hash, unsigned long *flags)
423{
424 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
425 spin_unlock_irqrestore(hlist_lock, *flags);
391} 426}
392 427
393/* 428/*
@@ -401,17 +436,21 @@ void __kprobes kprobe_flush_task(struct task_struct *tk)
401 struct kretprobe_instance *ri; 436 struct kretprobe_instance *ri;
402 struct hlist_head *head, empty_rp; 437 struct hlist_head *head, empty_rp;
403 struct hlist_node *node, *tmp; 438 struct hlist_node *node, *tmp;
404 unsigned long flags = 0; 439 unsigned long hash, flags = 0;
405 440
406 INIT_HLIST_HEAD(&empty_rp); 441 if (unlikely(!kprobes_initialized))
407 spin_lock_irqsave(&kretprobe_lock, flags); 442 /* Early boot. kretprobe_table_locks not yet initialized. */
408 head = kretprobe_inst_table_head(tk); 443 return;
444
445 hash = hash_ptr(tk, KPROBE_HASH_BITS);
446 head = &kretprobe_inst_table[hash];
447 kretprobe_table_lock(hash, &flags);
409 hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { 448 hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
410 if (ri->task == tk) 449 if (ri->task == tk)
411 recycle_rp_inst(ri, &empty_rp); 450 recycle_rp_inst(ri, &empty_rp);
412 } 451 }
413 spin_unlock_irqrestore(&kretprobe_lock, flags); 452 kretprobe_table_unlock(hash, &flags);
414 453 INIT_HLIST_HEAD(&empty_rp);
415 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 454 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
416 hlist_del(&ri->hlist); 455 hlist_del(&ri->hlist);
417 kfree(ri); 456 kfree(ri);
@@ -423,24 +462,29 @@ static inline void free_rp_inst(struct kretprobe *rp)
423 struct kretprobe_instance *ri; 462 struct kretprobe_instance *ri;
424 struct hlist_node *pos, *next; 463 struct hlist_node *pos, *next;
425 464
426 hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, uflist) { 465 hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, hlist) {
427 hlist_del(&ri->uflist); 466 hlist_del(&ri->hlist);
428 kfree(ri); 467 kfree(ri);
429 } 468 }
430} 469}
431 470
432static void __kprobes cleanup_rp_inst(struct kretprobe *rp) 471static void __kprobes cleanup_rp_inst(struct kretprobe *rp)
433{ 472{
434 unsigned long flags; 473 unsigned long flags, hash;
435 struct kretprobe_instance *ri; 474 struct kretprobe_instance *ri;
436 struct hlist_node *pos, *next; 475 struct hlist_node *pos, *next;
476 struct hlist_head *head;
477
437 /* No race here */ 478 /* No race here */
438 spin_lock_irqsave(&kretprobe_lock, flags); 479 for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) {
439 hlist_for_each_entry_safe(ri, pos, next, &rp->used_instances, uflist) { 480 kretprobe_table_lock(hash, &flags);
440 ri->rp = NULL; 481 head = &kretprobe_inst_table[hash];
441 hlist_del(&ri->uflist); 482 hlist_for_each_entry_safe(ri, pos, next, head, hlist) {
483 if (ri->rp == rp)
484 ri->rp = NULL;
485 }
486 kretprobe_table_unlock(hash, &flags);
442 } 487 }
443 spin_unlock_irqrestore(&kretprobe_lock, flags);
444 free_rp_inst(rp); 488 free_rp_inst(rp);
445} 489}
446 490
@@ -831,32 +875,37 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
831 struct pt_regs *regs) 875 struct pt_regs *regs)
832{ 876{
833 struct kretprobe *rp = container_of(p, struct kretprobe, kp); 877 struct kretprobe *rp = container_of(p, struct kretprobe, kp);
834 unsigned long flags = 0; 878 unsigned long hash, flags = 0;
879 struct kretprobe_instance *ri;
835 880
836 /*TODO: consider to only swap the RA after the last pre_handler fired */ 881 /*TODO: consider to only swap the RA after the last pre_handler fired */
837 spin_lock_irqsave(&kretprobe_lock, flags); 882 hash = hash_ptr(current, KPROBE_HASH_BITS);
883 spin_lock_irqsave(&rp->lock, flags);
838 if (!hlist_empty(&rp->free_instances)) { 884 if (!hlist_empty(&rp->free_instances)) {
839 struct kretprobe_instance *ri;
840
841 ri = hlist_entry(rp->free_instances.first, 885 ri = hlist_entry(rp->free_instances.first,
842 struct kretprobe_instance, uflist); 886 struct kretprobe_instance, hlist);
887 hlist_del(&ri->hlist);
888 spin_unlock_irqrestore(&rp->lock, flags);
889
843 ri->rp = rp; 890 ri->rp = rp;
844 ri->task = current; 891 ri->task = current;
845 892
846 if (rp->entry_handler && rp->entry_handler(ri, regs)) { 893 if (rp->entry_handler && rp->entry_handler(ri, regs)) {
847 spin_unlock_irqrestore(&kretprobe_lock, flags); 894 spin_unlock_irqrestore(&rp->lock, flags);
848 return 0; 895 return 0;
849 } 896 }
850 897
851 arch_prepare_kretprobe(ri, regs); 898 arch_prepare_kretprobe(ri, regs);
852 899
853 /* XXX(hch): why is there no hlist_move_head? */ 900 /* XXX(hch): why is there no hlist_move_head? */
854 hlist_del(&ri->uflist); 901 INIT_HLIST_NODE(&ri->hlist);
855 hlist_add_head(&ri->uflist, &ri->rp->used_instances); 902 kretprobe_table_lock(hash, &flags);
856 hlist_add_head(&ri->hlist, kretprobe_inst_table_head(ri->task)); 903 hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]);
857 } else 904 kretprobe_table_unlock(hash, &flags);
905 } else {
858 rp->nmissed++; 906 rp->nmissed++;
859 spin_unlock_irqrestore(&kretprobe_lock, flags); 907 spin_unlock_irqrestore(&rp->lock, flags);
908 }
860 return 0; 909 return 0;
861} 910}
862 911
@@ -892,7 +941,7 @@ static int __kprobes __register_kretprobe(struct kretprobe *rp,
892 rp->maxactive = NR_CPUS; 941 rp->maxactive = NR_CPUS;
893#endif 942#endif
894 } 943 }
895 INIT_HLIST_HEAD(&rp->used_instances); 944 spin_lock_init(&rp->lock);
896 INIT_HLIST_HEAD(&rp->free_instances); 945 INIT_HLIST_HEAD(&rp->free_instances);
897 for (i = 0; i < rp->maxactive; i++) { 946 for (i = 0; i < rp->maxactive; i++) {
898 inst = kmalloc(sizeof(struct kretprobe_instance) + 947 inst = kmalloc(sizeof(struct kretprobe_instance) +
@@ -901,8 +950,8 @@ static int __kprobes __register_kretprobe(struct kretprobe *rp,
901 free_rp_inst(rp); 950 free_rp_inst(rp);
902 return -ENOMEM; 951 return -ENOMEM;
903 } 952 }
904 INIT_HLIST_NODE(&inst->uflist); 953 INIT_HLIST_NODE(&inst->hlist);
905 hlist_add_head(&inst->uflist, &rp->free_instances); 954 hlist_add_head(&inst->hlist, &rp->free_instances);
906 } 955 }
907 956
908 rp->nmissed = 0; 957 rp->nmissed = 0;
@@ -1009,6 +1058,7 @@ static int __init init_kprobes(void)
1009 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 1058 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1010 INIT_HLIST_HEAD(&kprobe_table[i]); 1059 INIT_HLIST_HEAD(&kprobe_table[i]);
1011 INIT_HLIST_HEAD(&kretprobe_inst_table[i]); 1060 INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
1061 spin_lock_init(&(kretprobe_table_locks[i].lock));
1012 } 1062 }
1013 1063
1014 /* 1064 /*
@@ -1050,6 +1100,7 @@ static int __init init_kprobes(void)
1050 err = arch_init_kprobes(); 1100 err = arch_init_kprobes();
1051 if (!err) 1101 if (!err)
1052 err = register_die_notifier(&kprobe_exceptions_nb); 1102 err = register_die_notifier(&kprobe_exceptions_nb);
1103 kprobes_initialized = (err == 0);
1053 1104
1054 if (!err) 1105 if (!err)
1055 init_test_probes(); 1106 init_test_probes();
@@ -1286,13 +1337,8 @@ EXPORT_SYMBOL_GPL(register_jprobe);
1286EXPORT_SYMBOL_GPL(unregister_jprobe); 1337EXPORT_SYMBOL_GPL(unregister_jprobe);
1287EXPORT_SYMBOL_GPL(register_jprobes); 1338EXPORT_SYMBOL_GPL(register_jprobes);
1288EXPORT_SYMBOL_GPL(unregister_jprobes); 1339EXPORT_SYMBOL_GPL(unregister_jprobes);
1289#ifdef CONFIG_KPROBES
1290EXPORT_SYMBOL_GPL(jprobe_return); 1340EXPORT_SYMBOL_GPL(jprobe_return);
1291#endif
1292
1293#ifdef CONFIG_KPROBES
1294EXPORT_SYMBOL_GPL(register_kretprobe); 1341EXPORT_SYMBOL_GPL(register_kretprobe);
1295EXPORT_SYMBOL_GPL(unregister_kretprobe); 1342EXPORT_SYMBOL_GPL(unregister_kretprobe);
1296EXPORT_SYMBOL_GPL(register_kretprobes); 1343EXPORT_SYMBOL_GPL(register_kretprobes);
1297EXPORT_SYMBOL_GPL(unregister_kretprobes); 1344EXPORT_SYMBOL_GPL(unregister_kretprobes);
1298#endif
diff --git a/kernel/marker.c b/kernel/marker.c
index 1abfb923b761..971da5317903 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -441,7 +441,7 @@ static int remove_marker(const char *name)
441 hlist_del(&e->hlist); 441 hlist_del(&e->hlist);
442 /* Make sure the call_rcu has been executed */ 442 /* Make sure the call_rcu has been executed */
443 if (e->rcu_pending) 443 if (e->rcu_pending)
444 rcu_barrier(); 444 rcu_barrier_sched();
445 kfree(e); 445 kfree(e);
446 return 0; 446 return 0;
447} 447}
@@ -476,7 +476,7 @@ static int marker_set_format(struct marker_entry **entry, const char *format)
476 hlist_del(&(*entry)->hlist); 476 hlist_del(&(*entry)->hlist);
477 /* Make sure the call_rcu has been executed */ 477 /* Make sure the call_rcu has been executed */
478 if ((*entry)->rcu_pending) 478 if ((*entry)->rcu_pending)
479 rcu_barrier(); 479 rcu_barrier_sched();
480 kfree(*entry); 480 kfree(*entry);
481 *entry = e; 481 *entry = e;
482 trace_mark(core_marker_format, "name %s format %s", 482 trace_mark(core_marker_format, "name %s format %s",
@@ -655,7 +655,7 @@ int marker_probe_register(const char *name, const char *format,
655 * make sure it's executed now. 655 * make sure it's executed now.
656 */ 656 */
657 if (entry->rcu_pending) 657 if (entry->rcu_pending)
658 rcu_barrier(); 658 rcu_barrier_sched();
659 old = marker_entry_add_probe(entry, probe, probe_private); 659 old = marker_entry_add_probe(entry, probe, probe_private);
660 if (IS_ERR(old)) { 660 if (IS_ERR(old)) {
661 ret = PTR_ERR(old); 661 ret = PTR_ERR(old);
@@ -670,10 +670,7 @@ int marker_probe_register(const char *name, const char *format,
670 entry->rcu_pending = 1; 670 entry->rcu_pending = 1;
671 /* write rcu_pending before calling the RCU callback */ 671 /* write rcu_pending before calling the RCU callback */
672 smp_wmb(); 672 smp_wmb();
673#ifdef CONFIG_PREEMPT_RCU 673 call_rcu_sched(&entry->rcu, free_old_closure);
674 synchronize_sched(); /* Until we have the call_rcu_sched() */
675#endif
676 call_rcu(&entry->rcu, free_old_closure);
677end: 674end:
678 mutex_unlock(&markers_mutex); 675 mutex_unlock(&markers_mutex);
679 return ret; 676 return ret;
@@ -704,7 +701,7 @@ int marker_probe_unregister(const char *name,
704 if (!entry) 701 if (!entry)
705 goto end; 702 goto end;
706 if (entry->rcu_pending) 703 if (entry->rcu_pending)
707 rcu_barrier(); 704 rcu_barrier_sched();
708 old = marker_entry_remove_probe(entry, probe, probe_private); 705 old = marker_entry_remove_probe(entry, probe, probe_private);
709 mutex_unlock(&markers_mutex); 706 mutex_unlock(&markers_mutex);
710 marker_update_probes(); /* may update entry */ 707 marker_update_probes(); /* may update entry */
@@ -716,10 +713,7 @@ int marker_probe_unregister(const char *name,
716 entry->rcu_pending = 1; 713 entry->rcu_pending = 1;
717 /* write rcu_pending before calling the RCU callback */ 714 /* write rcu_pending before calling the RCU callback */
718 smp_wmb(); 715 smp_wmb();
719#ifdef CONFIG_PREEMPT_RCU 716 call_rcu_sched(&entry->rcu, free_old_closure);
720 synchronize_sched(); /* Until we have the call_rcu_sched() */
721#endif
722 call_rcu(&entry->rcu, free_old_closure);
723 remove_marker(name); /* Ignore busy error message */ 717 remove_marker(name); /* Ignore busy error message */
724 ret = 0; 718 ret = 0;
725end: 719end:
@@ -786,7 +780,7 @@ int marker_probe_unregister_private_data(marker_probe_func *probe,
786 goto end; 780 goto end;
787 } 781 }
788 if (entry->rcu_pending) 782 if (entry->rcu_pending)
789 rcu_barrier(); 783 rcu_barrier_sched();
790 old = marker_entry_remove_probe(entry, NULL, probe_private); 784 old = marker_entry_remove_probe(entry, NULL, probe_private);
791 mutex_unlock(&markers_mutex); 785 mutex_unlock(&markers_mutex);
792 marker_update_probes(); /* may update entry */ 786 marker_update_probes(); /* may update entry */
@@ -797,10 +791,7 @@ int marker_probe_unregister_private_data(marker_probe_func *probe,
797 entry->rcu_pending = 1; 791 entry->rcu_pending = 1;
798 /* write rcu_pending before calling the RCU callback */ 792 /* write rcu_pending before calling the RCU callback */
799 smp_wmb(); 793 smp_wmb();
800#ifdef CONFIG_PREEMPT_RCU 794 call_rcu_sched(&entry->rcu, free_old_closure);
801 synchronize_sched(); /* Until we have the call_rcu_sched() */
802#endif
803 call_rcu(&entry->rcu, free_old_closure);
804 remove_marker(entry->name); /* Ignore busy error message */ 795 remove_marker(entry->name); /* Ignore busy error message */
805end: 796end:
806 mutex_unlock(&markers_mutex); 797 mutex_unlock(&markers_mutex);
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c
index 48d7ed6fc3a4..43c2111cd54d 100644
--- a/kernel/ns_cgroup.c
+++ b/kernel/ns_cgroup.c
@@ -7,6 +7,7 @@
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/cgroup.h> 8#include <linux/cgroup.h>
9#include <linux/fs.h> 9#include <linux/fs.h>
10#include <linux/proc_fs.h>
10#include <linux/slab.h> 11#include <linux/slab.h>
11#include <linux/nsproxy.h> 12#include <linux/nsproxy.h>
12 13
@@ -24,9 +25,12 @@ static inline struct ns_cgroup *cgroup_to_ns(
24 struct ns_cgroup, css); 25 struct ns_cgroup, css);
25} 26}
26 27
27int ns_cgroup_clone(struct task_struct *task) 28int ns_cgroup_clone(struct task_struct *task, struct pid *pid)
28{ 29{
29 return cgroup_clone(task, &ns_subsys); 30 char name[PROC_NUMBUF];
31
32 snprintf(name, PROC_NUMBUF, "%d", pid_vnr(pid));
33 return cgroup_clone(task, &ns_subsys, name);
30} 34}
31 35
32/* 36/*
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index adc785146a1c..21575fc46d05 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -157,12 +157,6 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
157 goto out; 157 goto out;
158 } 158 }
159 159
160 err = ns_cgroup_clone(tsk);
161 if (err) {
162 put_nsproxy(new_ns);
163 goto out;
164 }
165
166 tsk->nsproxy = new_ns; 160 tsk->nsproxy = new_ns;
167 161
168out: 162out:
@@ -209,7 +203,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
209 goto out; 203 goto out;
210 } 204 }
211 205
212 err = ns_cgroup_clone(current); 206 err = ns_cgroup_clone(current, task_pid(current));
213 if (err) 207 if (err)
214 put_nsproxy(*new_nsp); 208 put_nsproxy(*new_nsp);
215 209
diff --git a/kernel/panic.c b/kernel/panic.c
index 425567f45b9f..12c5a0a6c89b 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -318,6 +318,28 @@ void warn_on_slowpath(const char *file, int line)
318 add_taint(TAINT_WARN); 318 add_taint(TAINT_WARN);
319} 319}
320EXPORT_SYMBOL(warn_on_slowpath); 320EXPORT_SYMBOL(warn_on_slowpath);
321
322
323void warn_slowpath(const char *file, int line, const char *fmt, ...)
324{
325 va_list args;
326 char function[KSYM_SYMBOL_LEN];
327 unsigned long caller = (unsigned long)__builtin_return_address(0);
328 sprint_symbol(function, caller);
329
330 printk(KERN_WARNING "------------[ cut here ]------------\n");
331 printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file,
332 line, function);
333 va_start(args, fmt);
334 vprintk(fmt, args);
335 va_end(args);
336
337 print_modules();
338 dump_stack();
339 print_oops_end_marker();
340 add_taint(TAINT_WARN);
341}
342EXPORT_SYMBOL(warn_slowpath);
321#endif 343#endif
322 344
323#ifdef CONFIG_CC_STACKPROTECTOR 345#ifdef CONFIG_CC_STACKPROTECTOR
diff --git a/kernel/pid.c b/kernel/pid.c
index 30bd5d4b2ac7..064e76afa507 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -309,12 +309,6 @@ struct pid *find_vpid(int nr)
309} 309}
310EXPORT_SYMBOL_GPL(find_vpid); 310EXPORT_SYMBOL_GPL(find_vpid);
311 311
312struct pid *find_pid(int nr)
313{
314 return find_pid_ns(nr, &init_pid_ns);
315}
316EXPORT_SYMBOL_GPL(find_pid);
317
318/* 312/*
319 * attach_pid() must be called with the tasklist_lock write-held. 313 * attach_pid() must be called with the tasklist_lock write-held.
320 */ 314 */
@@ -435,6 +429,7 @@ struct pid *find_get_pid(pid_t nr)
435 429
436 return pid; 430 return pid;
437} 431}
432EXPORT_SYMBOL_GPL(find_get_pid);
438 433
439pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns) 434pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
440{ 435{
@@ -482,7 +477,7 @@ EXPORT_SYMBOL(task_session_nr_ns);
482/* 477/*
483 * Used by proc to find the first pid that is greater then or equal to nr. 478 * Used by proc to find the first pid that is greater then or equal to nr.
484 * 479 *
485 * If there is a pid at nr this function is exactly the same as find_pid. 480 * If there is a pid at nr this function is exactly the same as find_pid_ns.
486 */ 481 */
487struct pid *find_ge_pid(int nr, struct pid_namespace *ns) 482struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
488{ 483{
@@ -497,7 +492,6 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
497 492
498 return pid; 493 return pid;
499} 494}
500EXPORT_SYMBOL_GPL(find_get_pid);
501 495
502/* 496/*
503 * The pid hash table is scaled according to the amount of memory in the 497 * The pid hash table is scaled according to the amount of memory in the
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 98702b4b8851..ea567b78d1aa 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -12,6 +12,7 @@
12#include <linux/pid_namespace.h> 12#include <linux/pid_namespace.h>
13#include <linux/syscalls.h> 13#include <linux/syscalls.h>
14#include <linux/err.h> 14#include <linux/err.h>
15#include <linux/acct.h>
15 16
16#define BITS_PER_PAGE (PAGE_SIZE*8) 17#define BITS_PER_PAGE (PAGE_SIZE*8)
17 18
@@ -71,7 +72,7 @@ static struct pid_namespace *create_pid_namespace(unsigned int level)
71 struct pid_namespace *ns; 72 struct pid_namespace *ns;
72 int i; 73 int i;
73 74
74 ns = kmem_cache_alloc(pid_ns_cachep, GFP_KERNEL); 75 ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);
75 if (ns == NULL) 76 if (ns == NULL)
76 goto out; 77 goto out;
77 78
@@ -84,17 +85,13 @@ static struct pid_namespace *create_pid_namespace(unsigned int level)
84 goto out_free_map; 85 goto out_free_map;
85 86
86 kref_init(&ns->kref); 87 kref_init(&ns->kref);
87 ns->last_pid = 0;
88 ns->child_reaper = NULL;
89 ns->level = level; 88 ns->level = level;
90 89
91 set_bit(0, ns->pidmap[0].page); 90 set_bit(0, ns->pidmap[0].page);
92 atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); 91 atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
93 92
94 for (i = 1; i < PIDMAP_ENTRIES; i++) { 93 for (i = 1; i < PIDMAP_ENTRIES; i++)
95 ns->pidmap[i].page = NULL;
96 atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); 94 atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
97 }
98 95
99 return ns; 96 return ns;
100 97
@@ -185,6 +182,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
185 182
186 /* Child reaper for the pid namespace is going away */ 183 /* Child reaper for the pid namespace is going away */
187 pid_ns->child_reaper = NULL; 184 pid_ns->child_reaper = NULL;
185 acct_exit_ns(pid_ns);
188 return; 186 return;
189} 187}
190 188
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index dbd8398ddb0b..9a21681aa80f 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -449,9 +449,6 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
449 spin_unlock_irqrestore(&idr_lock, flags); 449 spin_unlock_irqrestore(&idr_lock, flags);
450 } 450 }
451 sigqueue_free(tmr->sigq); 451 sigqueue_free(tmr->sigq);
452 if (unlikely(tmr->it_process) &&
453 tmr->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
454 put_task_struct(tmr->it_process);
455 kmem_cache_free(posix_timers_cache, tmr); 452 kmem_cache_free(posix_timers_cache, tmr);
456} 453}
457 454
@@ -856,11 +853,10 @@ retry_delete:
856 * This keeps any tasks waiting on the spin lock from thinking 853 * This keeps any tasks waiting on the spin lock from thinking
857 * they got something (see the lock code above). 854 * they got something (see the lock code above).
858 */ 855 */
859 if (timer->it_process) { 856 if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
860 if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) 857 put_task_struct(timer->it_process);
861 put_task_struct(timer->it_process); 858 timer->it_process = NULL;
862 timer->it_process = NULL; 859
863 }
864 unlock_timer(timer, flags); 860 unlock_timer(timer, flags);
865 release_posix_timer(timer, IT_ID_SET); 861 release_posix_timer(timer, IT_ID_SET);
866 return 0; 862 return 0;
@@ -885,11 +881,10 @@ retry_delete:
885 * This keeps any tasks waiting on the spin lock from thinking 881 * This keeps any tasks waiting on the spin lock from thinking
886 * they got something (see the lock code above). 882 * they got something (see the lock code above).
887 */ 883 */
888 if (timer->it_process) { 884 if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
889 if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) 885 put_task_struct(timer->it_process);
890 put_task_struct(timer->it_process); 886 timer->it_process = NULL;
891 timer->it_process = NULL; 887
892 }
893 unlock_timer(timer, flags); 888 unlock_timer(timer, flags);
894 release_posix_timer(timer, IT_ID_SET); 889 release_posix_timer(timer, IT_ID_SET);
895} 890}
diff --git a/kernel/printk.c b/kernel/printk.c
index 3f7a2a94583b..a7f7559c5f6c 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1308,6 +1308,8 @@ void tty_write_message(struct tty_struct *tty, char *msg)
1308} 1308}
1309 1309
1310#if defined CONFIG_PRINTK 1310#if defined CONFIG_PRINTK
1311
1312DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10);
1311/* 1313/*
1312 * printk rate limiting, lifted from the networking subsystem. 1314 * printk rate limiting, lifted from the networking subsystem.
1313 * 1315 *
@@ -1315,22 +1317,9 @@ void tty_write_message(struct tty_struct *tty, char *msg)
1315 * every printk_ratelimit_jiffies to make a denial-of-service 1317 * every printk_ratelimit_jiffies to make a denial-of-service
1316 * attack impossible. 1318 * attack impossible.
1317 */ 1319 */
1318int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst)
1319{
1320 return __ratelimit(ratelimit_jiffies, ratelimit_burst);
1321}
1322EXPORT_SYMBOL(__printk_ratelimit);
1323
1324/* minimum time in jiffies between messages */
1325int printk_ratelimit_jiffies = 5 * HZ;
1326
1327/* number of messages we send before ratelimiting */
1328int printk_ratelimit_burst = 10;
1329
1330int printk_ratelimit(void) 1320int printk_ratelimit(void)
1331{ 1321{
1332 return __printk_ratelimit(printk_ratelimit_jiffies, 1322 return __ratelimit(&printk_ratelimit_state);
1333 printk_ratelimit_burst);
1334} 1323}
1335EXPORT_SYMBOL(printk_ratelimit); 1324EXPORT_SYMBOL(printk_ratelimit);
1336 1325
diff --git a/kernel/profile.c b/kernel/profile.c
index 58926411eb2a..cd26bed4cc26 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -112,8 +112,6 @@ void __init profile_init(void)
112 112
113/* Profile event notifications */ 113/* Profile event notifications */
114 114
115#ifdef CONFIG_PROFILING
116
117static BLOCKING_NOTIFIER_HEAD(task_exit_notifier); 115static BLOCKING_NOTIFIER_HEAD(task_exit_notifier);
118static ATOMIC_NOTIFIER_HEAD(task_free_notifier); 116static ATOMIC_NOTIFIER_HEAD(task_free_notifier);
119static BLOCKING_NOTIFIER_HEAD(munmap_notifier); 117static BLOCKING_NOTIFIER_HEAD(munmap_notifier);
@@ -203,8 +201,6 @@ void unregister_timer_hook(int (*hook)(struct pt_regs *))
203} 201}
204EXPORT_SYMBOL_GPL(unregister_timer_hook); 202EXPORT_SYMBOL_GPL(unregister_timer_hook);
205 203
206#endif /* CONFIG_PROFILING */
207
208 204
209#ifdef CONFIG_SMP 205#ifdef CONFIG_SMP
210/* 206/*
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index d3c61b4ebef2..f275c8eca772 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -13,6 +13,7 @@
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/res_counter.h> 14#include <linux/res_counter.h>
15#include <linux/uaccess.h> 15#include <linux/uaccess.h>
16#include <linux/mm.h>
16 17
17void res_counter_init(struct res_counter *counter) 18void res_counter_init(struct res_counter *counter)
18{ 19{
@@ -102,44 +103,37 @@ u64 res_counter_read_u64(struct res_counter *counter, int member)
102 return *res_counter_member(counter, member); 103 return *res_counter_member(counter, member);
103} 104}
104 105
105ssize_t res_counter_write(struct res_counter *counter, int member, 106int res_counter_memparse_write_strategy(const char *buf,
106 const char __user *userbuf, size_t nbytes, loff_t *pos, 107 unsigned long long *res)
107 int (*write_strategy)(char *st_buf, unsigned long long *val))
108{ 108{
109 int ret; 109 char *end;
110 char *buf, *end; 110 /* FIXME - make memparse() take const char* args */
111 unsigned long flags; 111 *res = memparse((char *)buf, &end);
112 unsigned long long tmp, *val; 112 if (*end != '\0')
113 113 return -EINVAL;
114 buf = kmalloc(nbytes + 1, GFP_KERNEL);
115 ret = -ENOMEM;
116 if (buf == NULL)
117 goto out;
118 114
119 buf[nbytes] = '\0'; 115 *res = PAGE_ALIGN(*res);
120 ret = -EFAULT; 116 return 0;
121 if (copy_from_user(buf, userbuf, nbytes)) 117}
122 goto out_free;
123 118
124 ret = -EINVAL; 119int res_counter_write(struct res_counter *counter, int member,
120 const char *buf, write_strategy_fn write_strategy)
121{
122 char *end;
123 unsigned long flags;
124 unsigned long long tmp, *val;
125 125
126 strstrip(buf);
127 if (write_strategy) { 126 if (write_strategy) {
128 if (write_strategy(buf, &tmp)) { 127 if (write_strategy(buf, &tmp))
129 goto out_free; 128 return -EINVAL;
130 }
131 } else { 129 } else {
132 tmp = simple_strtoull(buf, &end, 10); 130 tmp = simple_strtoull(buf, &end, 10);
133 if (*end != '\0') 131 if (*end != '\0')
134 goto out_free; 132 return -EINVAL;
135 } 133 }
136 spin_lock_irqsave(&counter->lock, flags); 134 spin_lock_irqsave(&counter->lock, flags);
137 val = res_counter_member(counter, member); 135 val = res_counter_member(counter, member);
138 *val = tmp; 136 *val = tmp;
139 spin_unlock_irqrestore(&counter->lock, flags); 137 spin_unlock_irqrestore(&counter->lock, flags);
140 ret = nbytes; 138 return 0;
141out_free:
142 kfree(buf);
143out:
144 return ret;
145} 139}
diff --git a/kernel/sched.c b/kernel/sched.c
index 6acf749d3336..0047bd9b96aa 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4046,6 +4046,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime)
4046 cpustat->nice = cputime64_add(cpustat->nice, tmp); 4046 cpustat->nice = cputime64_add(cpustat->nice, tmp);
4047 else 4047 else
4048 cpustat->user = cputime64_add(cpustat->user, tmp); 4048 cpustat->user = cputime64_add(cpustat->user, tmp);
4049 /* Account for user time used */
4050 acct_update_integrals(p);
4049} 4051}
4050 4052
4051/* 4053/*
diff --git a/kernel/signal.c b/kernel/signal.c
index 6c0958e52ea7..82c3545596c5 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -338,13 +338,9 @@ unblock_all_signals(void)
338 spin_unlock_irqrestore(&current->sighand->siglock, flags); 338 spin_unlock_irqrestore(&current->sighand->siglock, flags);
339} 339}
340 340
341static int collect_signal(int sig, struct sigpending *list, siginfo_t *info) 341static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
342{ 342{
343 struct sigqueue *q, *first = NULL; 343 struct sigqueue *q, *first = NULL;
344 int still_pending = 0;
345
346 if (unlikely(!sigismember(&list->signal, sig)))
347 return 0;
348 344
349 /* 345 /*
350 * Collect the siginfo appropriate to this signal. Check if 346 * Collect the siginfo appropriate to this signal. Check if
@@ -352,33 +348,30 @@ static int collect_signal(int sig, struct sigpending *list, siginfo_t *info)
352 */ 348 */
353 list_for_each_entry(q, &list->list, list) { 349 list_for_each_entry(q, &list->list, list) {
354 if (q->info.si_signo == sig) { 350 if (q->info.si_signo == sig) {
355 if (first) { 351 if (first)
356 still_pending = 1; 352 goto still_pending;
357 break;
358 }
359 first = q; 353 first = q;
360 } 354 }
361 } 355 }
356
357 sigdelset(&list->signal, sig);
358
362 if (first) { 359 if (first) {
360still_pending:
363 list_del_init(&first->list); 361 list_del_init(&first->list);
364 copy_siginfo(info, &first->info); 362 copy_siginfo(info, &first->info);
365 __sigqueue_free(first); 363 __sigqueue_free(first);
366 if (!still_pending)
367 sigdelset(&list->signal, sig);
368 } else { 364 } else {
369
370 /* Ok, it wasn't in the queue. This must be 365 /* Ok, it wasn't in the queue. This must be
371 a fast-pathed signal or we must have been 366 a fast-pathed signal or we must have been
372 out of queue space. So zero out the info. 367 out of queue space. So zero out the info.
373 */ 368 */
374 sigdelset(&list->signal, sig);
375 info->si_signo = sig; 369 info->si_signo = sig;
376 info->si_errno = 0; 370 info->si_errno = 0;
377 info->si_code = 0; 371 info->si_code = 0;
378 info->si_pid = 0; 372 info->si_pid = 0;
379 info->si_uid = 0; 373 info->si_uid = 0;
380 } 374 }
381 return 1;
382} 375}
383 376
384static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, 377static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
@@ -396,8 +389,7 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
396 } 389 }
397 } 390 }
398 391
399 if (!collect_signal(sig, pending, info)) 392 collect_signal(sig, pending, info);
400 sig = 0;
401 } 393 }
402 394
403 return sig; 395 return sig;
@@ -462,8 +454,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
462 * is to alert stop-signal processing code when another 454 * is to alert stop-signal processing code when another
463 * processor has come along and cleared the flag. 455 * processor has come along and cleared the flag.
464 */ 456 */
465 if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) 457 tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
466 tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
467 } 458 }
468 if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) { 459 if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) {
469 /* 460 /*
@@ -1125,7 +1116,7 @@ EXPORT_SYMBOL_GPL(kill_pid_info_as_uid);
1125 * is probably wrong. Should make it like BSD or SYSV. 1116 * is probably wrong. Should make it like BSD or SYSV.
1126 */ 1117 */
1127 1118
1128static int kill_something_info(int sig, struct siginfo *info, int pid) 1119static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
1129{ 1120{
1130 int ret; 1121 int ret;
1131 1122
@@ -1237,17 +1228,6 @@ int kill_pid(struct pid *pid, int sig, int priv)
1237} 1228}
1238EXPORT_SYMBOL(kill_pid); 1229EXPORT_SYMBOL(kill_pid);
1239 1230
1240int
1241kill_proc(pid_t pid, int sig, int priv)
1242{
1243 int ret;
1244
1245 rcu_read_lock();
1246 ret = kill_pid_info(sig, __si_special(priv), find_pid(pid));
1247 rcu_read_unlock();
1248 return ret;
1249}
1250
1251/* 1231/*
1252 * These functions support sending signals using preallocated sigqueue 1232 * These functions support sending signals using preallocated sigqueue
1253 * structures. This is needed "because realtime applications cannot 1233 * structures. This is needed "because realtime applications cannot
@@ -1379,10 +1359,9 @@ void do_notify_parent(struct task_struct *tsk, int sig)
1379 1359
1380 info.si_uid = tsk->uid; 1360 info.si_uid = tsk->uid;
1381 1361
1382 /* FIXME: find out whether or not this is supposed to be c*time. */ 1362 info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime,
1383 info.si_utime = cputime_to_jiffies(cputime_add(tsk->utime,
1384 tsk->signal->utime)); 1363 tsk->signal->utime));
1385 info.si_stime = cputime_to_jiffies(cputime_add(tsk->stime, 1364 info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime,
1386 tsk->signal->stime)); 1365 tsk->signal->stime));
1387 1366
1388 info.si_status = tsk->exit_code & 0x7f; 1367 info.si_status = tsk->exit_code & 0x7f;
@@ -1450,9 +1429,8 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
1450 1429
1451 info.si_uid = tsk->uid; 1430 info.si_uid = tsk->uid;
1452 1431
1453 /* FIXME: find out whether or not this is supposed to be c*time. */ 1432 info.si_utime = cputime_to_clock_t(tsk->utime);
1454 info.si_utime = cputime_to_jiffies(tsk->utime); 1433 info.si_stime = cputime_to_clock_t(tsk->stime);
1455 info.si_stime = cputime_to_jiffies(tsk->stime);
1456 1434
1457 info.si_code = why; 1435 info.si_code = why;
1458 switch (why) { 1436 switch (why) {
@@ -1491,10 +1469,10 @@ static inline int may_ptrace_stop(void)
1491 * is a deadlock situation, and pointless because our tracer 1469 * is a deadlock situation, and pointless because our tracer
1492 * is dead so don't allow us to stop. 1470 * is dead so don't allow us to stop.
1493 * If SIGKILL was already sent before the caller unlocked 1471 * If SIGKILL was already sent before the caller unlocked
1494 * ->siglock we must see ->core_waiters != 0. Otherwise it 1472 * ->siglock we must see ->core_state != NULL. Otherwise it
1495 * is safe to enter schedule(). 1473 * is safe to enter schedule().
1496 */ 1474 */
1497 if (unlikely(current->mm->core_waiters) && 1475 if (unlikely(current->mm->core_state) &&
1498 unlikely(current->mm == current->parent->mm)) 1476 unlikely(current->mm == current->parent->mm))
1499 return 0; 1477 return 0;
1500 1478
@@ -1507,9 +1485,8 @@ static inline int may_ptrace_stop(void)
1507 */ 1485 */
1508static int sigkill_pending(struct task_struct *tsk) 1486static int sigkill_pending(struct task_struct *tsk)
1509{ 1487{
1510 return ((sigismember(&tsk->pending.signal, SIGKILL) || 1488 return sigismember(&tsk->pending.signal, SIGKILL) ||
1511 sigismember(&tsk->signal->shared_pending.signal, SIGKILL)) && 1489 sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
1512 !unlikely(sigismember(&tsk->blocked, SIGKILL)));
1513} 1490}
1514 1491
1515/* 1492/*
@@ -1525,8 +1502,6 @@ static int sigkill_pending(struct task_struct *tsk)
1525 */ 1502 */
1526static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) 1503static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
1527{ 1504{
1528 int killed = 0;
1529
1530 if (arch_ptrace_stop_needed(exit_code, info)) { 1505 if (arch_ptrace_stop_needed(exit_code, info)) {
1531 /* 1506 /*
1532 * The arch code has something special to do before a 1507 * The arch code has something special to do before a
@@ -1542,7 +1517,8 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
1542 spin_unlock_irq(&current->sighand->siglock); 1517 spin_unlock_irq(&current->sighand->siglock);
1543 arch_ptrace_stop(exit_code, info); 1518 arch_ptrace_stop(exit_code, info);
1544 spin_lock_irq(&current->sighand->siglock); 1519 spin_lock_irq(&current->sighand->siglock);
1545 killed = sigkill_pending(current); 1520 if (sigkill_pending(current))
1521 return;
1546 } 1522 }
1547 1523
1548 /* 1524 /*
@@ -1559,7 +1535,7 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
1559 __set_current_state(TASK_TRACED); 1535 __set_current_state(TASK_TRACED);
1560 spin_unlock_irq(&current->sighand->siglock); 1536 spin_unlock_irq(&current->sighand->siglock);
1561 read_lock(&tasklist_lock); 1537 read_lock(&tasklist_lock);
1562 if (!unlikely(killed) && may_ptrace_stop()) { 1538 if (may_ptrace_stop()) {
1563 do_notify_parent_cldstop(current, CLD_TRAPPED); 1539 do_notify_parent_cldstop(current, CLD_TRAPPED);
1564 read_unlock(&tasklist_lock); 1540 read_unlock(&tasklist_lock);
1565 schedule(); 1541 schedule();
@@ -1658,8 +1634,7 @@ static int do_signal_stop(int signr)
1658 } else { 1634 } else {
1659 struct task_struct *t; 1635 struct task_struct *t;
1660 1636
1661 if (unlikely((sig->flags & (SIGNAL_STOP_DEQUEUED | SIGNAL_UNKILLABLE)) 1637 if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
1662 != SIGNAL_STOP_DEQUEUED) ||
1663 unlikely(signal_group_exit(sig))) 1638 unlikely(signal_group_exit(sig)))
1664 return 0; 1639 return 0;
1665 /* 1640 /*
@@ -1920,7 +1895,6 @@ EXPORT_SYMBOL(recalc_sigpending);
1920EXPORT_SYMBOL_GPL(dequeue_signal); 1895EXPORT_SYMBOL_GPL(dequeue_signal);
1921EXPORT_SYMBOL(flush_signals); 1896EXPORT_SYMBOL(flush_signals);
1922EXPORT_SYMBOL(force_sig); 1897EXPORT_SYMBOL(force_sig);
1923EXPORT_SYMBOL(kill_proc);
1924EXPORT_SYMBOL(ptrace_notify); 1898EXPORT_SYMBOL(ptrace_notify);
1925EXPORT_SYMBOL(send_sig); 1899EXPORT_SYMBOL(send_sig);
1926EXPORT_SYMBOL(send_sig_info); 1900EXPORT_SYMBOL(send_sig_info);
@@ -2196,7 +2170,7 @@ sys_rt_sigtimedwait(const sigset_t __user *uthese,
2196} 2170}
2197 2171
2198asmlinkage long 2172asmlinkage long
2199sys_kill(int pid, int sig) 2173sys_kill(pid_t pid, int sig)
2200{ 2174{
2201 struct siginfo info; 2175 struct siginfo info;
2202 2176
@@ -2209,7 +2183,7 @@ sys_kill(int pid, int sig)
2209 return kill_something_info(sig, &info, pid); 2183 return kill_something_info(sig, &info, pid);
2210} 2184}
2211 2185
2212static int do_tkill(int tgid, int pid, int sig) 2186static int do_tkill(pid_t tgid, pid_t pid, int sig)
2213{ 2187{
2214 int error; 2188 int error;
2215 struct siginfo info; 2189 struct siginfo info;
@@ -2255,7 +2229,7 @@ static int do_tkill(int tgid, int pid, int sig)
2255 * exists but it's not belonging to the target process anymore. This 2229 * exists but it's not belonging to the target process anymore. This
2256 * method solves the problem of threads exiting and PIDs getting reused. 2230 * method solves the problem of threads exiting and PIDs getting reused.
2257 */ 2231 */
2258asmlinkage long sys_tgkill(int tgid, int pid, int sig) 2232asmlinkage long sys_tgkill(pid_t tgid, pid_t pid, int sig)
2259{ 2233{
2260 /* This is only valid for single tasks */ 2234 /* This is only valid for single tasks */
2261 if (pid <= 0 || tgid <= 0) 2235 if (pid <= 0 || tgid <= 0)
@@ -2268,7 +2242,7 @@ asmlinkage long sys_tgkill(int tgid, int pid, int sig)
2268 * Send a signal to only one task, even if it's a CLONE_THREAD task. 2242 * Send a signal to only one task, even if it's a CLONE_THREAD task.
2269 */ 2243 */
2270asmlinkage long 2244asmlinkage long
2271sys_tkill(int pid, int sig) 2245sys_tkill(pid_t pid, int sig)
2272{ 2246{
2273 /* This is only valid for single tasks */ 2247 /* This is only valid for single tasks */
2274 if (pid <= 0) 2248 if (pid <= 0)
@@ -2278,7 +2252,7 @@ sys_tkill(int pid, int sig)
2278} 2252}
2279 2253
2280asmlinkage long 2254asmlinkage long
2281sys_rt_sigqueueinfo(int pid, int sig, siginfo_t __user *uinfo) 2255sys_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t __user *uinfo)
2282{ 2256{
2283 siginfo_t info; 2257 siginfo_t info;
2284 2258
diff --git a/kernel/sys.c b/kernel/sys.c
index 14e97282eb6c..0c9d3fa1f5ff 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1343,8 +1343,6 @@ EXPORT_SYMBOL(in_egroup_p);
1343 1343
1344DECLARE_RWSEM(uts_sem); 1344DECLARE_RWSEM(uts_sem);
1345 1345
1346EXPORT_SYMBOL(uts_sem);
1347
1348asmlinkage long sys_newuname(struct new_utsname __user * name) 1346asmlinkage long sys_newuname(struct new_utsname __user * name)
1349{ 1347{
1350 int errno = 0; 1348 int errno = 0;
@@ -1795,7 +1793,7 @@ int orderly_poweroff(bool force)
1795 goto out; 1793 goto out;
1796 } 1794 }
1797 1795
1798 info = call_usermodehelper_setup(argv[0], argv, envp); 1796 info = call_usermodehelper_setup(argv[0], argv, envp, GFP_ATOMIC);
1799 if (info == NULL) { 1797 if (info == NULL) {
1800 argv_free(argv); 1798 argv_free(argv);
1801 goto out; 1799 goto out;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index bd66ac5406f3..08d6e1bb99ac 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -57,6 +57,7 @@ cond_syscall(compat_sys_set_robust_list);
57cond_syscall(sys_get_robust_list); 57cond_syscall(sys_get_robust_list);
58cond_syscall(compat_sys_get_robust_list); 58cond_syscall(compat_sys_get_robust_list);
59cond_syscall(sys_epoll_create); 59cond_syscall(sys_epoll_create);
60cond_syscall(sys_epoll_create1);
60cond_syscall(sys_epoll_ctl); 61cond_syscall(sys_epoll_ctl);
61cond_syscall(sys_epoll_wait); 62cond_syscall(sys_epoll_wait);
62cond_syscall(sys_epoll_pwait); 63cond_syscall(sys_epoll_pwait);
@@ -159,6 +160,7 @@ cond_syscall(sys_ioprio_get);
159cond_syscall(sys_signalfd); 160cond_syscall(sys_signalfd);
160cond_syscall(sys_signalfd4); 161cond_syscall(sys_signalfd4);
161cond_syscall(compat_sys_signalfd); 162cond_syscall(compat_sys_signalfd);
163cond_syscall(compat_sys_signalfd4);
162cond_syscall(sys_timerfd_create); 164cond_syscall(sys_timerfd_create);
163cond_syscall(sys_timerfd_settime); 165cond_syscall(sys_timerfd_settime);
164cond_syscall(sys_timerfd_gettime); 166cond_syscall(sys_timerfd_gettime);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 1a8299d1fe59..35a50db9b6ce 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -624,7 +624,7 @@ static struct ctl_table kern_table[] = {
624 { 624 {
625 .ctl_name = KERN_PRINTK_RATELIMIT, 625 .ctl_name = KERN_PRINTK_RATELIMIT,
626 .procname = "printk_ratelimit", 626 .procname = "printk_ratelimit",
627 .data = &printk_ratelimit_jiffies, 627 .data = &printk_ratelimit_state.interval,
628 .maxlen = sizeof(int), 628 .maxlen = sizeof(int),
629 .mode = 0644, 629 .mode = 0644,
630 .proc_handler = &proc_dointvec_jiffies, 630 .proc_handler = &proc_dointvec_jiffies,
@@ -633,7 +633,7 @@ static struct ctl_table kern_table[] = {
633 { 633 {
634 .ctl_name = KERN_PRINTK_RATELIMIT_BURST, 634 .ctl_name = KERN_PRINTK_RATELIMIT_BURST,
635 .procname = "printk_ratelimit_burst", 635 .procname = "printk_ratelimit_burst",
636 .data = &printk_ratelimit_burst, 636 .data = &printk_ratelimit_state.burst,
637 .maxlen = sizeof(int), 637 .maxlen = sizeof(int),
638 .mode = 0644, 638 .mode = 0644,
639 .proc_handler = &proc_dointvec, 639 .proc_handler = &proc_dointvec,
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c
index c09350d564f2..c35da23ab8fb 100644
--- a/kernel/sysctl_check.c
+++ b/kernel/sysctl_check.c
@@ -1532,6 +1532,8 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
1532 sysctl_check_leaf(namespaces, table, &fail); 1532 sysctl_check_leaf(namespaces, table, &fail);
1533 } 1533 }
1534 sysctl_check_bin_path(table, &fail); 1534 sysctl_check_bin_path(table, &fail);
1535 if (table->mode > 0777)
1536 set_fail(&fail, table, "bogus .mode");
1535 if (fail) { 1537 if (fail) {
1536 set_fail(&fail, table, NULL); 1538 set_fail(&fail, table, NULL);
1537 error = -EINVAL; 1539 error = -EINVAL;
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 06b17547f4e7..bd6be76303cf 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -35,7 +35,7 @@
35 */ 35 */
36#define TASKSTATS_CPUMASK_MAXLEN (100+6*NR_CPUS) 36#define TASKSTATS_CPUMASK_MAXLEN (100+6*NR_CPUS)
37 37
38static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 }; 38static DEFINE_PER_CPU(__u32, taskstats_seqnum);
39static int family_registered; 39static int family_registered;
40struct kmem_cache *taskstats_cache; 40struct kmem_cache *taskstats_cache;
41 41
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index 63528086337c..ce2d723c10e1 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -161,7 +161,7 @@ static void timer_notify(struct pt_regs *regs, int cpu)
161 __trace_special(tr, data, 2, regs->ip, 0); 161 __trace_special(tr, data, 2, regs->ip, 0);
162 162
163 while (i < sample_max_depth) { 163 while (i < sample_max_depth) {
164 frame.next_fp = 0; 164 frame.next_fp = NULL;
165 frame.return_address = 0; 165 frame.return_address = 0;
166 if (!copy_stack_frame(fp, &frame)) 166 if (!copy_stack_frame(fp, &frame))
167 break; 167 break;
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 4ab1b584961b..3da47ccdc5e5 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -28,14 +28,14 @@
28void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) 28void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
29{ 29{
30 struct timespec uptime, ts; 30 struct timespec uptime, ts;
31 s64 ac_etime; 31 u64 ac_etime;
32 32
33 BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN); 33 BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN);
34 34
35 /* calculate task elapsed time in timespec */ 35 /* calculate task elapsed time in timespec */
36 do_posix_clock_monotonic_gettime(&uptime); 36 do_posix_clock_monotonic_gettime(&uptime);
37 ts = timespec_sub(uptime, tsk->start_time); 37 ts = timespec_sub(uptime, tsk->start_time);
38 /* rebase elapsed time to usec */ 38 /* rebase elapsed time to usec (should never be negative) */
39 ac_etime = timespec_to_ns(&ts); 39 ac_etime = timespec_to_ns(&ts);
40 do_div(ac_etime, NSEC_PER_USEC); 40 do_div(ac_etime, NSEC_PER_USEC);
41 stats->ac_etime = ac_etime; 41 stats->ac_etime = ac_etime;
@@ -84,9 +84,9 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
84{ 84{
85 struct mm_struct *mm; 85 struct mm_struct *mm;
86 86
87 /* convert pages-jiffies to Mbyte-usec */ 87 /* convert pages-usec to Mbyte-usec */
88 stats->coremem = jiffies_to_usecs(p->acct_rss_mem1) * PAGE_SIZE / MB; 88 stats->coremem = p->acct_rss_mem1 * PAGE_SIZE / MB;
89 stats->virtmem = jiffies_to_usecs(p->acct_vm_mem1) * PAGE_SIZE / MB; 89 stats->virtmem = p->acct_vm_mem1 * PAGE_SIZE / MB;
90 mm = get_task_mm(p); 90 mm = get_task_mm(p);
91 if (mm) { 91 if (mm) {
92 /* adjust to KB unit */ 92 /* adjust to KB unit */
@@ -118,12 +118,19 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
118void acct_update_integrals(struct task_struct *tsk) 118void acct_update_integrals(struct task_struct *tsk)
119{ 119{
120 if (likely(tsk->mm)) { 120 if (likely(tsk->mm)) {
121 long delta = cputime_to_jiffies( 121 cputime_t time, dtime;
122 cputime_sub(tsk->stime, tsk->acct_stimexpd)); 122 struct timeval value;
123 u64 delta;
124
125 time = tsk->stime + tsk->utime;
126 dtime = cputime_sub(time, tsk->acct_timexpd);
127 jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
128 delta = value.tv_sec;
129 delta = delta * USEC_PER_SEC + value.tv_usec;
123 130
124 if (delta == 0) 131 if (delta == 0)
125 return; 132 return;
126 tsk->acct_stimexpd = tsk->stime; 133 tsk->acct_timexpd = time;
127 tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm); 134 tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
128 tsk->acct_vm_mem1 += delta * tsk->mm->total_vm; 135 tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
129 } 136 }
@@ -135,7 +142,7 @@ void acct_update_integrals(struct task_struct *tsk)
135 */ 142 */
136void acct_clear_integrals(struct task_struct *tsk) 143void acct_clear_integrals(struct task_struct *tsk)
137{ 144{
138 tsk->acct_stimexpd = 0; 145 tsk->acct_timexpd = 0;
139 tsk->acct_rss_mem1 = 0; 146 tsk->acct_rss_mem1 = 0;
140 tsk->acct_vm_mem1 = 0; 147 tsk->acct_vm_mem1 = 0;
141} 148}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 6fd158b21026..ec7e4f62aaff 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -125,7 +125,7 @@ struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
125} 125}
126 126
127static void insert_work(struct cpu_workqueue_struct *cwq, 127static void insert_work(struct cpu_workqueue_struct *cwq,
128 struct work_struct *work, int tail) 128 struct work_struct *work, struct list_head *head)
129{ 129{
130 set_wq_data(work, cwq); 130 set_wq_data(work, cwq);
131 /* 131 /*
@@ -133,10 +133,7 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
133 * result of list_add() below, see try_to_grab_pending(). 133 * result of list_add() below, see try_to_grab_pending().
134 */ 134 */
135 smp_wmb(); 135 smp_wmb();
136 if (tail) 136 list_add_tail(&work->entry, head);
137 list_add_tail(&work->entry, &cwq->worklist);
138 else
139 list_add(&work->entry, &cwq->worklist);
140 wake_up(&cwq->more_work); 137 wake_up(&cwq->more_work);
141} 138}
142 139
@@ -146,7 +143,7 @@ static void __queue_work(struct cpu_workqueue_struct *cwq,
146 unsigned long flags; 143 unsigned long flags;
147 144
148 spin_lock_irqsave(&cwq->lock, flags); 145 spin_lock_irqsave(&cwq->lock, flags);
149 insert_work(cwq, work, 1); 146 insert_work(cwq, work, &cwq->worklist);
150 spin_unlock_irqrestore(&cwq->lock, flags); 147 spin_unlock_irqrestore(&cwq->lock, flags);
151} 148}
152 149
@@ -162,14 +159,11 @@ static void __queue_work(struct cpu_workqueue_struct *cwq,
162 */ 159 */
163int queue_work(struct workqueue_struct *wq, struct work_struct *work) 160int queue_work(struct workqueue_struct *wq, struct work_struct *work)
164{ 161{
165 int ret = 0; 162 int ret;
163
164 ret = queue_work_on(get_cpu(), wq, work);
165 put_cpu();
166 166
167 if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
168 BUG_ON(!list_empty(&work->entry));
169 __queue_work(wq_per_cpu(wq, get_cpu()), work);
170 put_cpu();
171 ret = 1;
172 }
173 return ret; 167 return ret;
174} 168}
175EXPORT_SYMBOL_GPL(queue_work); 169EXPORT_SYMBOL_GPL(queue_work);
@@ -361,14 +355,14 @@ static void wq_barrier_func(struct work_struct *work)
361} 355}
362 356
363static void insert_wq_barrier(struct cpu_workqueue_struct *cwq, 357static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
364 struct wq_barrier *barr, int tail) 358 struct wq_barrier *barr, struct list_head *head)
365{ 359{
366 INIT_WORK(&barr->work, wq_barrier_func); 360 INIT_WORK(&barr->work, wq_barrier_func);
367 __set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work)); 361 __set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work));
368 362
369 init_completion(&barr->done); 363 init_completion(&barr->done);
370 364
371 insert_work(cwq, &barr->work, tail); 365 insert_work(cwq, &barr->work, head);
372} 366}
373 367
374static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq) 368static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
@@ -388,7 +382,7 @@ static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
388 active = 0; 382 active = 0;
389 spin_lock_irq(&cwq->lock); 383 spin_lock_irq(&cwq->lock);
390 if (!list_empty(&cwq->worklist) || cwq->current_work != NULL) { 384 if (!list_empty(&cwq->worklist) || cwq->current_work != NULL) {
391 insert_wq_barrier(cwq, &barr, 1); 385 insert_wq_barrier(cwq, &barr, &cwq->worklist);
392 active = 1; 386 active = 1;
393 } 387 }
394 spin_unlock_irq(&cwq->lock); 388 spin_unlock_irq(&cwq->lock);
@@ -426,6 +420,57 @@ void flush_workqueue(struct workqueue_struct *wq)
426} 420}
427EXPORT_SYMBOL_GPL(flush_workqueue); 421EXPORT_SYMBOL_GPL(flush_workqueue);
428 422
423/**
424 * flush_work - block until a work_struct's callback has terminated
425 * @work: the work which is to be flushed
426 *
427 * Returns false if @work has already terminated.
428 *
429 * It is expected that, prior to calling flush_work(), the caller has
430 * arranged for the work to not be requeued, otherwise it doesn't make
431 * sense to use this function.
432 */
433int flush_work(struct work_struct *work)
434{
435 struct cpu_workqueue_struct *cwq;
436 struct list_head *prev;
437 struct wq_barrier barr;
438
439 might_sleep();
440 cwq = get_wq_data(work);
441 if (!cwq)
442 return 0;
443
444 lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
445 lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_);
446
447 prev = NULL;
448 spin_lock_irq(&cwq->lock);
449 if (!list_empty(&work->entry)) {
450 /*
451 * See the comment near try_to_grab_pending()->smp_rmb().
452 * If it was re-queued under us we are not going to wait.
453 */
454 smp_rmb();
455 if (unlikely(cwq != get_wq_data(work)))
456 goto out;
457 prev = &work->entry;
458 } else {
459 if (cwq->current_work != work)
460 goto out;
461 prev = &cwq->worklist;
462 }
463 insert_wq_barrier(cwq, &barr, prev->next);
464out:
465 spin_unlock_irq(&cwq->lock);
466 if (!prev)
467 return 0;
468
469 wait_for_completion(&barr.done);
470 return 1;
471}
472EXPORT_SYMBOL_GPL(flush_work);
473
429/* 474/*
430 * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit, 475 * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
431 * so this work can't be re-armed in any way. 476 * so this work can't be re-armed in any way.
@@ -473,7 +518,7 @@ static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,
473 518
474 spin_lock_irq(&cwq->lock); 519 spin_lock_irq(&cwq->lock);
475 if (unlikely(cwq->current_work == work)) { 520 if (unlikely(cwq->current_work == work)) {
476 insert_wq_barrier(cwq, &barr, 0); 521 insert_wq_barrier(cwq, &barr, cwq->worklist.next);
477 running = 1; 522 running = 1;
478 } 523 }
479 spin_unlock_irq(&cwq->lock); 524 spin_unlock_irq(&cwq->lock);
@@ -644,10 +689,10 @@ int schedule_on_each_cpu(work_func_t func)
644 struct work_struct *work = per_cpu_ptr(works, cpu); 689 struct work_struct *work = per_cpu_ptr(works, cpu);
645 690
646 INIT_WORK(work, func); 691 INIT_WORK(work, func);
647 set_bit(WORK_STRUCT_PENDING, work_data_bits(work)); 692 schedule_work_on(cpu, work);
648 __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), work);
649 } 693 }
650 flush_workqueue(keventd_wq); 694 for_each_online_cpu(cpu)
695 flush_work(per_cpu_ptr(works, cpu));
651 put_online_cpus(); 696 put_online_cpus();
652 free_percpu(works); 697 free_percpu(works);
653 return 0; 698 return 0;
@@ -784,7 +829,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
784 err = create_workqueue_thread(cwq, singlethread_cpu); 829 err = create_workqueue_thread(cwq, singlethread_cpu);
785 start_workqueue_thread(cwq, -1); 830 start_workqueue_thread(cwq, -1);
786 } else { 831 } else {
787 get_online_cpus(); 832 cpu_maps_update_begin();
788 spin_lock(&workqueue_lock); 833 spin_lock(&workqueue_lock);
789 list_add(&wq->list, &workqueues); 834 list_add(&wq->list, &workqueues);
790 spin_unlock(&workqueue_lock); 835 spin_unlock(&workqueue_lock);
@@ -796,7 +841,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
796 err = create_workqueue_thread(cwq, cpu); 841 err = create_workqueue_thread(cwq, cpu);
797 start_workqueue_thread(cwq, cpu); 842 start_workqueue_thread(cwq, cpu);
798 } 843 }
799 put_online_cpus(); 844 cpu_maps_update_done();
800 } 845 }
801 846
802 if (err) { 847 if (err) {
@@ -810,8 +855,8 @@ EXPORT_SYMBOL_GPL(__create_workqueue_key);
810static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq) 855static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
811{ 856{
812 /* 857 /*
813 * Our caller is either destroy_workqueue() or CPU_DEAD, 858 * Our caller is either destroy_workqueue() or CPU_POST_DEAD,
814 * get_online_cpus() protects cwq->thread. 859 * cpu_add_remove_lock protects cwq->thread.
815 */ 860 */
816 if (cwq->thread == NULL) 861 if (cwq->thread == NULL)
817 return; 862 return;
@@ -821,7 +866,7 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
821 866
822 flush_cpu_workqueue(cwq); 867 flush_cpu_workqueue(cwq);
823 /* 868 /*
824 * If the caller is CPU_DEAD and cwq->worklist was not empty, 869 * If the caller is CPU_POST_DEAD and cwq->worklist was not empty,
825 * a concurrent flush_workqueue() can insert a barrier after us. 870 * a concurrent flush_workqueue() can insert a barrier after us.
826 * However, in that case run_workqueue() won't return and check 871 * However, in that case run_workqueue() won't return and check
827 * kthread_should_stop() until it flushes all work_struct's. 872 * kthread_should_stop() until it flushes all work_struct's.
@@ -845,14 +890,14 @@ void destroy_workqueue(struct workqueue_struct *wq)
845 const cpumask_t *cpu_map = wq_cpu_map(wq); 890 const cpumask_t *cpu_map = wq_cpu_map(wq);
846 int cpu; 891 int cpu;
847 892
848 get_online_cpus(); 893 cpu_maps_update_begin();
849 spin_lock(&workqueue_lock); 894 spin_lock(&workqueue_lock);
850 list_del(&wq->list); 895 list_del(&wq->list);
851 spin_unlock(&workqueue_lock); 896 spin_unlock(&workqueue_lock);
852 897
853 for_each_cpu_mask_nr(cpu, *cpu_map) 898 for_each_cpu_mask_nr(cpu, *cpu_map)
854 cleanup_workqueue_thread(per_cpu_ptr(wq->cpu_wq, cpu)); 899 cleanup_workqueue_thread(per_cpu_ptr(wq->cpu_wq, cpu));
855 put_online_cpus(); 900 cpu_maps_update_done();
856 901
857 free_percpu(wq->cpu_wq); 902 free_percpu(wq->cpu_wq);
858 kfree(wq); 903 kfree(wq);
@@ -866,6 +911,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
866 unsigned int cpu = (unsigned long)hcpu; 911 unsigned int cpu = (unsigned long)hcpu;
867 struct cpu_workqueue_struct *cwq; 912 struct cpu_workqueue_struct *cwq;
868 struct workqueue_struct *wq; 913 struct workqueue_struct *wq;
914 int ret = NOTIFY_OK;
869 915
870 action &= ~CPU_TASKS_FROZEN; 916 action &= ~CPU_TASKS_FROZEN;
871 917
@@ -873,7 +919,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
873 case CPU_UP_PREPARE: 919 case CPU_UP_PREPARE:
874 cpu_set(cpu, cpu_populated_map); 920 cpu_set(cpu, cpu_populated_map);
875 } 921 }
876 922undo:
877 list_for_each_entry(wq, &workqueues, list) { 923 list_for_each_entry(wq, &workqueues, list) {
878 cwq = per_cpu_ptr(wq->cpu_wq, cpu); 924 cwq = per_cpu_ptr(wq->cpu_wq, cpu);
879 925
@@ -883,7 +929,9 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
883 break; 929 break;
884 printk(KERN_ERR "workqueue [%s] for %i failed\n", 930 printk(KERN_ERR "workqueue [%s] for %i failed\n",
885 wq->name, cpu); 931 wq->name, cpu);
886 return NOTIFY_BAD; 932 action = CPU_UP_CANCELED;
933 ret = NOTIFY_BAD;
934 goto undo;
887 935
888 case CPU_ONLINE: 936 case CPU_ONLINE:
889 start_workqueue_thread(cwq, cpu); 937 start_workqueue_thread(cwq, cpu);
@@ -891,7 +939,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
891 939
892 case CPU_UP_CANCELED: 940 case CPU_UP_CANCELED:
893 start_workqueue_thread(cwq, -1); 941 start_workqueue_thread(cwq, -1);
894 case CPU_DEAD: 942 case CPU_POST_DEAD:
895 cleanup_workqueue_thread(cwq); 943 cleanup_workqueue_thread(cwq);
896 break; 944 break;
897 } 945 }
@@ -899,11 +947,11 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
899 947
900 switch (action) { 948 switch (action) {
901 case CPU_UP_CANCELED: 949 case CPU_UP_CANCELED:
902 case CPU_DEAD: 950 case CPU_POST_DEAD:
903 cpu_clear(cpu, cpu_populated_map); 951 cpu_clear(cpu, cpu_populated_map);
904 } 952 }
905 953
906 return NOTIFY_OK; 954 return ret;
907} 955}
908 956
909void __init init_workqueues(void) 957void __init init_workqueues(void)
diff --git a/lib/cmdline.c b/lib/cmdline.c
index f596c08d213a..5ba8a942a478 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -116,7 +116,7 @@ char *get_options(const char *str, int nints, int *ints)
116/** 116/**
117 * memparse - parse a string with mem suffixes into a number 117 * memparse - parse a string with mem suffixes into a number
118 * @ptr: Where parse begins 118 * @ptr: Where parse begins
119 * @retptr: (output) Pointer to next char after parse completes 119 * @retptr: (output) Optional pointer to next char after parse completes
120 * 120 *
121 * Parses a string into a number. The number stored at @ptr is 121 * Parses a string into a number. The number stored at @ptr is
122 * potentially suffixed with %K (for kilobytes, or 1024 bytes), 122 * potentially suffixed with %K (for kilobytes, or 1024 bytes),
@@ -126,11 +126,13 @@ char *get_options(const char *str, int nints, int *ints)
126 * megabyte, or one gigabyte, respectively. 126 * megabyte, or one gigabyte, respectively.
127 */ 127 */
128 128
129unsigned long long memparse (char *ptr, char **retptr) 129unsigned long long memparse(char *ptr, char **retptr)
130{ 130{
131 unsigned long long ret = simple_strtoull (ptr, retptr, 0); 131 char *endptr; /* local pointer to end of parsed string */
132 132
133 switch (**retptr) { 133 unsigned long long ret = simple_strtoull(ptr, &endptr, 0);
134
135 switch (*endptr) {
134 case 'G': 136 case 'G':
135 case 'g': 137 case 'g':
136 ret <<= 10; 138 ret <<= 10;
@@ -140,10 +142,14 @@ unsigned long long memparse (char *ptr, char **retptr)
140 case 'K': 142 case 'K':
141 case 'k': 143 case 'k':
142 ret <<= 10; 144 ret <<= 10;
143 (*retptr)++; 145 endptr++;
144 default: 146 default:
145 break; 147 break;
146 } 148 }
149
150 if (retptr)
151 *retptr = endptr;
152
147 return ret; 153 return ret;
148} 154}
149 155
diff --git a/lib/idr.c b/lib/idr.c
index 7a02e173f027..3476f8203e97 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -6,6 +6,8 @@
6 * Modified by George Anzinger to reuse immediately and to use 6 * Modified by George Anzinger to reuse immediately and to use
7 * find bit instructions. Also removed _irq on spinlocks. 7 * find bit instructions. Also removed _irq on spinlocks.
8 * 8 *
9 * Modified by Nadia Derbey to make it RCU safe.
10 *
9 * Small id to pointer translation service. 11 * Small id to pointer translation service.
10 * 12 *
11 * It uses a radix tree like structure as a sparse array indexed 13 * It uses a radix tree like structure as a sparse array indexed
@@ -35,7 +37,7 @@
35 37
36static struct kmem_cache *idr_layer_cache; 38static struct kmem_cache *idr_layer_cache;
37 39
38static struct idr_layer *alloc_layer(struct idr *idp) 40static struct idr_layer *get_from_free_list(struct idr *idp)
39{ 41{
40 struct idr_layer *p; 42 struct idr_layer *p;
41 unsigned long flags; 43 unsigned long flags;
@@ -50,15 +52,28 @@ static struct idr_layer *alloc_layer(struct idr *idp)
50 return(p); 52 return(p);
51} 53}
52 54
55static void idr_layer_rcu_free(struct rcu_head *head)
56{
57 struct idr_layer *layer;
58
59 layer = container_of(head, struct idr_layer, rcu_head);
60 kmem_cache_free(idr_layer_cache, layer);
61}
62
63static inline void free_layer(struct idr_layer *p)
64{
65 call_rcu(&p->rcu_head, idr_layer_rcu_free);
66}
67
53/* only called when idp->lock is held */ 68/* only called when idp->lock is held */
54static void __free_layer(struct idr *idp, struct idr_layer *p) 69static void __move_to_free_list(struct idr *idp, struct idr_layer *p)
55{ 70{
56 p->ary[0] = idp->id_free; 71 p->ary[0] = idp->id_free;
57 idp->id_free = p; 72 idp->id_free = p;
58 idp->id_free_cnt++; 73 idp->id_free_cnt++;
59} 74}
60 75
61static void free_layer(struct idr *idp, struct idr_layer *p) 76static void move_to_free_list(struct idr *idp, struct idr_layer *p)
62{ 77{
63 unsigned long flags; 78 unsigned long flags;
64 79
@@ -66,7 +81,7 @@ static void free_layer(struct idr *idp, struct idr_layer *p)
66 * Depends on the return element being zeroed. 81 * Depends on the return element being zeroed.
67 */ 82 */
68 spin_lock_irqsave(&idp->lock, flags); 83 spin_lock_irqsave(&idp->lock, flags);
69 __free_layer(idp, p); 84 __move_to_free_list(idp, p);
70 spin_unlock_irqrestore(&idp->lock, flags); 85 spin_unlock_irqrestore(&idp->lock, flags);
71} 86}
72 87
@@ -96,7 +111,7 @@ static void idr_mark_full(struct idr_layer **pa, int id)
96 * @gfp_mask: memory allocation flags 111 * @gfp_mask: memory allocation flags
97 * 112 *
98 * This function should be called prior to locking and calling the 113 * This function should be called prior to locking and calling the
99 * following function. It preallocates enough memory to satisfy 114 * idr_get_new* functions. It preallocates enough memory to satisfy
100 * the worst possible allocation. 115 * the worst possible allocation.
101 * 116 *
102 * If the system is REALLY out of memory this function returns 0, 117 * If the system is REALLY out of memory this function returns 0,
@@ -109,7 +124,7 @@ int idr_pre_get(struct idr *idp, gfp_t gfp_mask)
109 new = kmem_cache_alloc(idr_layer_cache, gfp_mask); 124 new = kmem_cache_alloc(idr_layer_cache, gfp_mask);
110 if (new == NULL) 125 if (new == NULL)
111 return (0); 126 return (0);
112 free_layer(idp, new); 127 move_to_free_list(idp, new);
113 } 128 }
114 return 1; 129 return 1;
115} 130}
@@ -143,7 +158,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
143 /* if already at the top layer, we need to grow */ 158 /* if already at the top layer, we need to grow */
144 if (!(p = pa[l])) { 159 if (!(p = pa[l])) {
145 *starting_id = id; 160 *starting_id = id;
146 return -2; 161 return IDR_NEED_TO_GROW;
147 } 162 }
148 163
149 /* If we need to go up one layer, continue the 164 /* If we need to go up one layer, continue the
@@ -160,16 +175,17 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
160 id = ((id >> sh) ^ n ^ m) << sh; 175 id = ((id >> sh) ^ n ^ m) << sh;
161 } 176 }
162 if ((id >= MAX_ID_BIT) || (id < 0)) 177 if ((id >= MAX_ID_BIT) || (id < 0))
163 return -3; 178 return IDR_NOMORE_SPACE;
164 if (l == 0) 179 if (l == 0)
165 break; 180 break;
166 /* 181 /*
167 * Create the layer below if it is missing. 182 * Create the layer below if it is missing.
168 */ 183 */
169 if (!p->ary[m]) { 184 if (!p->ary[m]) {
170 if (!(new = alloc_layer(idp))) 185 new = get_from_free_list(idp);
186 if (!new)
171 return -1; 187 return -1;
172 p->ary[m] = new; 188 rcu_assign_pointer(p->ary[m], new);
173 p->count++; 189 p->count++;
174 } 190 }
175 pa[l--] = p; 191 pa[l--] = p;
@@ -192,7 +208,7 @@ build_up:
192 p = idp->top; 208 p = idp->top;
193 layers = idp->layers; 209 layers = idp->layers;
194 if (unlikely(!p)) { 210 if (unlikely(!p)) {
195 if (!(p = alloc_layer(idp))) 211 if (!(p = get_from_free_list(idp)))
196 return -1; 212 return -1;
197 layers = 1; 213 layers = 1;
198 } 214 }
@@ -204,7 +220,7 @@ build_up:
204 layers++; 220 layers++;
205 if (!p->count) 221 if (!p->count)
206 continue; 222 continue;
207 if (!(new = alloc_layer(idp))) { 223 if (!(new = get_from_free_list(idp))) {
208 /* 224 /*
209 * The allocation failed. If we built part of 225 * The allocation failed. If we built part of
210 * the structure tear it down. 226 * the structure tear it down.
@@ -214,7 +230,7 @@ build_up:
214 p = p->ary[0]; 230 p = p->ary[0];
215 new->ary[0] = NULL; 231 new->ary[0] = NULL;
216 new->bitmap = new->count = 0; 232 new->bitmap = new->count = 0;
217 __free_layer(idp, new); 233 __move_to_free_list(idp, new);
218 } 234 }
219 spin_unlock_irqrestore(&idp->lock, flags); 235 spin_unlock_irqrestore(&idp->lock, flags);
220 return -1; 236 return -1;
@@ -225,10 +241,10 @@ build_up:
225 __set_bit(0, &new->bitmap); 241 __set_bit(0, &new->bitmap);
226 p = new; 242 p = new;
227 } 243 }
228 idp->top = p; 244 rcu_assign_pointer(idp->top, p);
229 idp->layers = layers; 245 idp->layers = layers;
230 v = sub_alloc(idp, &id, pa); 246 v = sub_alloc(idp, &id, pa);
231 if (v == -2) 247 if (v == IDR_NEED_TO_GROW)
232 goto build_up; 248 goto build_up;
233 return(v); 249 return(v);
234} 250}
@@ -244,7 +260,8 @@ static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id)
244 * Successfully found an empty slot. Install the user 260 * Successfully found an empty slot. Install the user
245 * pointer and mark the slot full. 261 * pointer and mark the slot full.
246 */ 262 */
247 pa[0]->ary[id & IDR_MASK] = (struct idr_layer *)ptr; 263 rcu_assign_pointer(pa[0]->ary[id & IDR_MASK],
264 (struct idr_layer *)ptr);
248 pa[0]->count++; 265 pa[0]->count++;
249 idr_mark_full(pa, id); 266 idr_mark_full(pa, id);
250 } 267 }
@@ -277,12 +294,8 @@ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id)
277 * This is a cheap hack until the IDR code can be fixed to 294 * This is a cheap hack until the IDR code can be fixed to
278 * return proper error values. 295 * return proper error values.
279 */ 296 */
280 if (rv < 0) { 297 if (rv < 0)
281 if (rv == -1) 298 return _idr_rc_to_errno(rv);
282 return -EAGAIN;
283 else /* Will be -3 */
284 return -ENOSPC;
285 }
286 *id = rv; 299 *id = rv;
287 return 0; 300 return 0;
288} 301}
@@ -312,12 +325,8 @@ int idr_get_new(struct idr *idp, void *ptr, int *id)
312 * This is a cheap hack until the IDR code can be fixed to 325 * This is a cheap hack until the IDR code can be fixed to
313 * return proper error values. 326 * return proper error values.
314 */ 327 */
315 if (rv < 0) { 328 if (rv < 0)
316 if (rv == -1) 329 return _idr_rc_to_errno(rv);
317 return -EAGAIN;
318 else /* Will be -3 */
319 return -ENOSPC;
320 }
321 *id = rv; 330 *id = rv;
322 return 0; 331 return 0;
323} 332}
@@ -325,7 +334,8 @@ EXPORT_SYMBOL(idr_get_new);
325 334
326static void idr_remove_warning(int id) 335static void idr_remove_warning(int id)
327{ 336{
328 printk("idr_remove called for id=%d which is not allocated.\n", id); 337 printk(KERN_WARNING
338 "idr_remove called for id=%d which is not allocated.\n", id);
329 dump_stack(); 339 dump_stack();
330} 340}
331 341
@@ -334,6 +344,7 @@ static void sub_remove(struct idr *idp, int shift, int id)
334 struct idr_layer *p = idp->top; 344 struct idr_layer *p = idp->top;
335 struct idr_layer **pa[MAX_LEVEL]; 345 struct idr_layer **pa[MAX_LEVEL];
336 struct idr_layer ***paa = &pa[0]; 346 struct idr_layer ***paa = &pa[0];
347 struct idr_layer *to_free;
337 int n; 348 int n;
338 349
339 *paa = NULL; 350 *paa = NULL;
@@ -349,13 +360,18 @@ static void sub_remove(struct idr *idp, int shift, int id)
349 n = id & IDR_MASK; 360 n = id & IDR_MASK;
350 if (likely(p != NULL && test_bit(n, &p->bitmap))){ 361 if (likely(p != NULL && test_bit(n, &p->bitmap))){
351 __clear_bit(n, &p->bitmap); 362 __clear_bit(n, &p->bitmap);
352 p->ary[n] = NULL; 363 rcu_assign_pointer(p->ary[n], NULL);
364 to_free = NULL;
353 while(*paa && ! --((**paa)->count)){ 365 while(*paa && ! --((**paa)->count)){
354 free_layer(idp, **paa); 366 if (to_free)
367 free_layer(to_free);
368 to_free = **paa;
355 **paa-- = NULL; 369 **paa-- = NULL;
356 } 370 }
357 if (!*paa) 371 if (!*paa)
358 idp->layers = 0; 372 idp->layers = 0;
373 if (to_free)
374 free_layer(to_free);
359 } else 375 } else
360 idr_remove_warning(id); 376 idr_remove_warning(id);
361} 377}
@@ -368,22 +384,34 @@ static void sub_remove(struct idr *idp, int shift, int id)
368void idr_remove(struct idr *idp, int id) 384void idr_remove(struct idr *idp, int id)
369{ 385{
370 struct idr_layer *p; 386 struct idr_layer *p;
387 struct idr_layer *to_free;
371 388
372 /* Mask off upper bits we don't use for the search. */ 389 /* Mask off upper bits we don't use for the search. */
373 id &= MAX_ID_MASK; 390 id &= MAX_ID_MASK;
374 391
375 sub_remove(idp, (idp->layers - 1) * IDR_BITS, id); 392 sub_remove(idp, (idp->layers - 1) * IDR_BITS, id);
376 if (idp->top && idp->top->count == 1 && (idp->layers > 1) && 393 if (idp->top && idp->top->count == 1 && (idp->layers > 1) &&
377 idp->top->ary[0]) { // We can drop a layer 394 idp->top->ary[0]) {
378 395 /*
396 * Single child at leftmost slot: we can shrink the tree.
397 * This level is not needed anymore since when layers are
398 * inserted, they are inserted at the top of the existing
399 * tree.
400 */
401 to_free = idp->top;
379 p = idp->top->ary[0]; 402 p = idp->top->ary[0];
380 idp->top->bitmap = idp->top->count = 0; 403 rcu_assign_pointer(idp->top, p);
381 free_layer(idp, idp->top);
382 idp->top = p;
383 --idp->layers; 404 --idp->layers;
405 to_free->bitmap = to_free->count = 0;
406 free_layer(to_free);
384 } 407 }
385 while (idp->id_free_cnt >= IDR_FREE_MAX) { 408 while (idp->id_free_cnt >= IDR_FREE_MAX) {
386 p = alloc_layer(idp); 409 p = get_from_free_list(idp);
410 /*
411 * Note: we don't call the rcu callback here, since the only
412 * layers that fall into the freelist are those that have been
413 * preallocated.
414 */
387 kmem_cache_free(idr_layer_cache, p); 415 kmem_cache_free(idr_layer_cache, p);
388 } 416 }
389 return; 417 return;
@@ -424,15 +452,13 @@ void idr_remove_all(struct idr *idp)
424 452
425 id += 1 << n; 453 id += 1 << n;
426 while (n < fls(id)) { 454 while (n < fls(id)) {
427 if (p) { 455 if (p)
428 memset(p, 0, sizeof *p); 456 free_layer(p);
429 free_layer(idp, p);
430 }
431 n += IDR_BITS; 457 n += IDR_BITS;
432 p = *--paa; 458 p = *--paa;
433 } 459 }
434 } 460 }
435 idp->top = NULL; 461 rcu_assign_pointer(idp->top, NULL);
436 idp->layers = 0; 462 idp->layers = 0;
437} 463}
438EXPORT_SYMBOL(idr_remove_all); 464EXPORT_SYMBOL(idr_remove_all);
@@ -444,7 +470,7 @@ EXPORT_SYMBOL(idr_remove_all);
444void idr_destroy(struct idr *idp) 470void idr_destroy(struct idr *idp)
445{ 471{
446 while (idp->id_free_cnt) { 472 while (idp->id_free_cnt) {
447 struct idr_layer *p = alloc_layer(idp); 473 struct idr_layer *p = get_from_free_list(idp);
448 kmem_cache_free(idr_layer_cache, p); 474 kmem_cache_free(idr_layer_cache, p);
449 } 475 }
450} 476}
@@ -459,7 +485,8 @@ EXPORT_SYMBOL(idr_destroy);
459 * return indicates that @id is not valid or you passed %NULL in 485 * return indicates that @id is not valid or you passed %NULL in
460 * idr_get_new(). 486 * idr_get_new().
461 * 487 *
462 * The caller must serialize idr_find() vs idr_get_new() and idr_remove(). 488 * This function can be called under rcu_read_lock(), given that the leaf
489 * pointers lifetimes are correctly managed.
463 */ 490 */
464void *idr_find(struct idr *idp, int id) 491void *idr_find(struct idr *idp, int id)
465{ 492{
@@ -467,7 +494,7 @@ void *idr_find(struct idr *idp, int id)
467 struct idr_layer *p; 494 struct idr_layer *p;
468 495
469 n = idp->layers * IDR_BITS; 496 n = idp->layers * IDR_BITS;
470 p = idp->top; 497 p = rcu_dereference(idp->top);
471 498
472 /* Mask off upper bits we don't use for the search. */ 499 /* Mask off upper bits we don't use for the search. */
473 id &= MAX_ID_MASK; 500 id &= MAX_ID_MASK;
@@ -477,7 +504,7 @@ void *idr_find(struct idr *idp, int id)
477 504
478 while (n > 0 && p) { 505 while (n > 0 && p) {
479 n -= IDR_BITS; 506 n -= IDR_BITS;
480 p = p->ary[(id >> n) & IDR_MASK]; 507 p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
481 } 508 }
482 return((void *)p); 509 return((void *)p);
483} 510}
@@ -510,7 +537,7 @@ int idr_for_each(struct idr *idp,
510 struct idr_layer **paa = &pa[0]; 537 struct idr_layer **paa = &pa[0];
511 538
512 n = idp->layers * IDR_BITS; 539 n = idp->layers * IDR_BITS;
513 p = idp->top; 540 p = rcu_dereference(idp->top);
514 max = 1 << n; 541 max = 1 << n;
515 542
516 id = 0; 543 id = 0;
@@ -518,7 +545,7 @@ int idr_for_each(struct idr *idp,
518 while (n > 0 && p) { 545 while (n > 0 && p) {
519 n -= IDR_BITS; 546 n -= IDR_BITS;
520 *paa++ = p; 547 *paa++ = p;
521 p = p->ary[(id >> n) & IDR_MASK]; 548 p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
522 } 549 }
523 550
524 if (p) { 551 if (p) {
@@ -548,7 +575,7 @@ EXPORT_SYMBOL(idr_for_each);
548 * A -ENOENT return indicates that @id was not found. 575 * A -ENOENT return indicates that @id was not found.
549 * A -EINVAL return indicates that @id was not within valid constraints. 576 * A -EINVAL return indicates that @id was not within valid constraints.
550 * 577 *
551 * The caller must serialize vs idr_find(), idr_get_new(), and idr_remove(). 578 * The caller must serialize with writers.
552 */ 579 */
553void *idr_replace(struct idr *idp, void *ptr, int id) 580void *idr_replace(struct idr *idp, void *ptr, int id)
554{ 581{
@@ -574,7 +601,7 @@ void *idr_replace(struct idr *idp, void *ptr, int id)
574 return ERR_PTR(-ENOENT); 601 return ERR_PTR(-ENOENT);
575 602
576 old_p = p->ary[n]; 603 old_p = p->ary[n];
577 p->ary[n] = ptr; 604 rcu_assign_pointer(p->ary[n], ptr);
578 605
579 return old_p; 606 return old_p;
580} 607}
@@ -694,12 +721,8 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
694 restart: 721 restart:
695 /* get vacant slot */ 722 /* get vacant slot */
696 t = idr_get_empty_slot(&ida->idr, idr_id, pa); 723 t = idr_get_empty_slot(&ida->idr, idr_id, pa);
697 if (t < 0) { 724 if (t < 0)
698 if (t == -1) 725 return _idr_rc_to_errno(t);
699 return -EAGAIN;
700 else /* will be -3 */
701 return -ENOSPC;
702 }
703 726
704 if (t * IDA_BITMAP_BITS >= MAX_ID_BIT) 727 if (t * IDA_BITMAP_BITS >= MAX_ID_BIT)
705 return -ENOSPC; 728 return -ENOSPC;
@@ -720,7 +743,8 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
720 return -EAGAIN; 743 return -EAGAIN;
721 744
722 memset(bitmap, 0, sizeof(struct ida_bitmap)); 745 memset(bitmap, 0, sizeof(struct ida_bitmap));
723 pa[0]->ary[idr_id & IDR_MASK] = (void *)bitmap; 746 rcu_assign_pointer(pa[0]->ary[idr_id & IDR_MASK],
747 (void *)bitmap);
724 pa[0]->count++; 748 pa[0]->count++;
725 } 749 }
726 750
@@ -749,7 +773,7 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
749 * allocation. 773 * allocation.
750 */ 774 */
751 if (ida->idr.id_free_cnt || ida->free_bitmap) { 775 if (ida->idr.id_free_cnt || ida->free_bitmap) {
752 struct idr_layer *p = alloc_layer(&ida->idr); 776 struct idr_layer *p = get_from_free_list(&ida->idr);
753 if (p) 777 if (p)
754 kmem_cache_free(idr_layer_cache, p); 778 kmem_cache_free(idr_layer_cache, p);
755 } 779 }
diff --git a/lib/inflate.c b/lib/inflate.c
index 9762294be062..1a8e8a978128 100644
--- a/lib/inflate.c
+++ b/lib/inflate.c
@@ -230,6 +230,45 @@ STATIC const ush mask_bits[] = {
230#define NEEDBITS(n) {while(k<(n)){b|=((ulg)NEXTBYTE())<<k;k+=8;}} 230#define NEEDBITS(n) {while(k<(n)){b|=((ulg)NEXTBYTE())<<k;k+=8;}}
231#define DUMPBITS(n) {b>>=(n);k-=(n);} 231#define DUMPBITS(n) {b>>=(n);k-=(n);}
232 232
233#ifndef NO_INFLATE_MALLOC
234/* A trivial malloc implementation, adapted from
235 * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
236 */
237
238static unsigned long malloc_ptr;
239static int malloc_count;
240
241static void *malloc(int size)
242{
243 void *p;
244
245 if (size < 0)
246 error("Malloc error");
247 if (!malloc_ptr)
248 malloc_ptr = free_mem_ptr;
249
250 malloc_ptr = (malloc_ptr + 3) & ~3; /* Align */
251
252 p = (void *)malloc_ptr;
253 malloc_ptr += size;
254
255 if (free_mem_end_ptr && malloc_ptr >= free_mem_end_ptr)
256 error("Out of memory");
257
258 malloc_count++;
259 return p;
260}
261
262static void free(void *where)
263{
264 malloc_count--;
265 if (!malloc_count)
266 malloc_ptr = free_mem_ptr;
267}
268#else
269#define malloc(a) kmalloc(a, GFP_KERNEL)
270#define free(a) kfree(a)
271#endif
233 272
234/* 273/*
235 Huffman code decoding is performed using a multi-level table lookup. 274 Huffman code decoding is performed using a multi-level table lookup.
@@ -1045,7 +1084,6 @@ STATIC int INIT inflate(void)
1045 int e; /* last block flag */ 1084 int e; /* last block flag */
1046 int r; /* result code */ 1085 int r; /* result code */
1047 unsigned h; /* maximum struct huft's malloc'ed */ 1086 unsigned h; /* maximum struct huft's malloc'ed */
1048 void *ptr;
1049 1087
1050 /* initialize window, bit buffer */ 1088 /* initialize window, bit buffer */
1051 wp = 0; 1089 wp = 0;
@@ -1057,12 +1095,12 @@ STATIC int INIT inflate(void)
1057 h = 0; 1095 h = 0;
1058 do { 1096 do {
1059 hufts = 0; 1097 hufts = 0;
1060 gzip_mark(&ptr); 1098#ifdef ARCH_HAS_DECOMP_WDOG
1061 if ((r = inflate_block(&e)) != 0) { 1099 arch_decomp_wdog();
1062 gzip_release(&ptr); 1100#endif
1063 return r; 1101 r = inflate_block(&e);
1064 } 1102 if (r)
1065 gzip_release(&ptr); 1103 return r;
1066 if (hufts > h) 1104 if (hufts > h)
1067 h = hufts; 1105 h = hufts;
1068 } while (!e); 1106 } while (!e);
diff --git a/lib/kobject.c b/lib/kobject.c
index 744401571ed7..bd732ffebc85 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -164,9 +164,8 @@ static int kobject_add_internal(struct kobject *kobj)
164 return -ENOENT; 164 return -ENOENT;
165 165
166 if (!kobj->name || !kobj->name[0]) { 166 if (!kobj->name || !kobj->name[0]) {
167 pr_debug("kobject: (%p): attempted to be registered with empty " 167 WARN(1, "kobject: (%p): attempted to be registered with empty "
168 "name!\n", kobj); 168 "name!\n", kobj);
169 WARN_ON(1);
170 return -EINVAL; 169 return -EINVAL;
171 } 170 }
172 171
@@ -583,12 +582,10 @@ static void kobject_release(struct kref *kref)
583void kobject_put(struct kobject *kobj) 582void kobject_put(struct kobject *kobj)
584{ 583{
585 if (kobj) { 584 if (kobj) {
586 if (!kobj->state_initialized) { 585 if (!kobj->state_initialized)
587 printk(KERN_WARNING "kobject: '%s' (%p): is not " 586 WARN(1, KERN_WARNING "kobject: '%s' (%p): is not "
588 "initialized, yet kobject_put() is being " 587 "initialized, yet kobject_put() is being "
589 "called.\n", kobject_name(kobj), kobj); 588 "called.\n", kobject_name(kobj), kobj);
590 WARN_ON(1);
591 }
592 kref_put(&kobj->kref, kobject_release); 589 kref_put(&kobj->kref, kobject_release);
593 } 590 }
594} 591}
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 4350ba9655bd..1a39f4e3ae1f 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -20,18 +20,14 @@ void __list_add(struct list_head *new,
20 struct list_head *prev, 20 struct list_head *prev,
21 struct list_head *next) 21 struct list_head *next)
22{ 22{
23 if (unlikely(next->prev != prev)) { 23 WARN(next->prev != prev,
24 printk(KERN_ERR "list_add corruption. next->prev should be " 24 "list_add corruption. next->prev should be "
25 "prev (%p), but was %p. (next=%p).\n", 25 "prev (%p), but was %p. (next=%p).\n",
26 prev, next->prev, next); 26 prev, next->prev, next);
27 BUG(); 27 WARN(prev->next != next,
28 } 28 "list_add corruption. prev->next should be "
29 if (unlikely(prev->next != next)) { 29 "next (%p), but was %p. (prev=%p).\n",
30 printk(KERN_ERR "list_add corruption. prev->next should be " 30 next, prev->next, prev);
31 "next (%p), but was %p. (prev=%p).\n",
32 next, prev->next, prev);
33 BUG();
34 }
35 next->prev = new; 31 next->prev = new;
36 new->next = next; 32 new->next = next;
37 new->prev = prev; 33 new->prev = prev;
@@ -40,20 +36,6 @@ void __list_add(struct list_head *new,
40EXPORT_SYMBOL(__list_add); 36EXPORT_SYMBOL(__list_add);
41 37
42/** 38/**
43 * list_add - add a new entry
44 * @new: new entry to be added
45 * @head: list head to add it after
46 *
47 * Insert a new entry after the specified head.
48 * This is good for implementing stacks.
49 */
50void list_add(struct list_head *new, struct list_head *head)
51{
52 __list_add(new, head, head->next);
53}
54EXPORT_SYMBOL(list_add);
55
56/**
57 * list_del - deletes entry from list. 39 * list_del - deletes entry from list.
58 * @entry: the element to delete from the list. 40 * @entry: the element to delete from the list.
59 * Note: list_empty on entry does not return true after this, the entry is 41 * Note: list_empty on entry does not return true after this, the entry is
@@ -61,16 +43,12 @@ EXPORT_SYMBOL(list_add);
61 */ 43 */
62void list_del(struct list_head *entry) 44void list_del(struct list_head *entry)
63{ 45{
64 if (unlikely(entry->prev->next != entry)) { 46 WARN(entry->prev->next != entry,
65 printk(KERN_ERR "list_del corruption. prev->next should be %p, " 47 "list_del corruption. prev->next should be %p, "
66 "but was %p\n", entry, entry->prev->next); 48 "but was %p\n", entry, entry->prev->next);
67 BUG(); 49 WARN(entry->next->prev != entry,
68 } 50 "list_del corruption. next->prev should be %p, "
69 if (unlikely(entry->next->prev != entry)) { 51 "but was %p\n", entry, entry->next->prev);
70 printk(KERN_ERR "list_del corruption. next->prev should be %p, "
71 "but was %p\n", entry, entry->next->prev);
72 BUG();
73 }
74 __list_del(entry->prev, entry->next); 52 __list_del(entry->prev, entry->next);
75 entry->next = LIST_POISON1; 53 entry->next = LIST_POISON1;
76 entry->prev = LIST_POISON2; 54 entry->prev = LIST_POISON2;
diff --git a/lib/lzo/lzo1x_decompress.c b/lib/lzo/lzo1x_decompress.c
index 77f0f9b775a9..5dc6b29c1575 100644
--- a/lib/lzo/lzo1x_decompress.c
+++ b/lib/lzo/lzo1x_decompress.c
@@ -138,8 +138,7 @@ match:
138 t += 31 + *ip++; 138 t += 31 + *ip++;
139 } 139 }
140 m_pos = op - 1; 140 m_pos = op - 1;
141 m_pos -= le16_to_cpu(get_unaligned( 141 m_pos -= get_unaligned_le16(ip) >> 2;
142 (const unsigned short *)ip)) >> 2;
143 ip += 2; 142 ip += 2;
144 } else if (t >= 16) { 143 } else if (t >= 16) {
145 m_pos = op; 144 m_pos = op;
@@ -157,8 +156,7 @@ match:
157 } 156 }
158 t += 7 + *ip++; 157 t += 7 + *ip++;
159 } 158 }
160 m_pos -= le16_to_cpu(get_unaligned( 159 m_pos -= get_unaligned_le16(ip) >> 2;
161 (const unsigned short *)ip)) >> 2;
162 ip += 2; 160 ip += 2;
163 if (m_pos == op) 161 if (m_pos == op)
164 goto eof_found; 162 goto eof_found;
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 485e3040dcd4..35136671b215 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -3,6 +3,9 @@
3 * 3 *
4 * Isolated from kernel/printk.c by Dave Young <hidave.darkstar@gmail.com> 4 * Isolated from kernel/printk.c by Dave Young <hidave.darkstar@gmail.com>
5 * 5 *
6 * 2008-05-01 rewrite the function and use a ratelimit_state data struct as
7 * parameter. Now every user can use their own standalone ratelimit_state.
8 *
6 * This file is released under the GPLv2. 9 * This file is released under the GPLv2.
7 * 10 *
8 */ 11 */
@@ -11,41 +14,43 @@
11#include <linux/jiffies.h> 14#include <linux/jiffies.h>
12#include <linux/module.h> 15#include <linux/module.h>
13 16
17static DEFINE_SPINLOCK(ratelimit_lock);
18static unsigned long flags;
19
14/* 20/*
15 * __ratelimit - rate limiting 21 * __ratelimit - rate limiting
16 * @ratelimit_jiffies: minimum time in jiffies between two callbacks 22 * @rs: ratelimit_state data
17 * @ratelimit_burst: number of callbacks we do before ratelimiting
18 * 23 *
19 * This enforces a rate limit: not more than @ratelimit_burst callbacks 24 * This enforces a rate limit: not more than @rs->ratelimit_burst callbacks
20 * in every ratelimit_jiffies 25 * in every @rs->ratelimit_jiffies
21 */ 26 */
22int __ratelimit(int ratelimit_jiffies, int ratelimit_burst) 27int __ratelimit(struct ratelimit_state *rs)
23{ 28{
24 static DEFINE_SPINLOCK(ratelimit_lock); 29 if (!rs->interval)
25 static unsigned toks = 10 * 5 * HZ; 30 return 1;
26 static unsigned long last_msg;
27 static int missed;
28 unsigned long flags;
29 unsigned long now = jiffies;
30 31
31 spin_lock_irqsave(&ratelimit_lock, flags); 32 spin_lock_irqsave(&ratelimit_lock, flags);
32 toks += now - last_msg; 33 if (!rs->begin)
33 last_msg = now; 34 rs->begin = jiffies;
34 if (toks > (ratelimit_burst * ratelimit_jiffies))
35 toks = ratelimit_burst * ratelimit_jiffies;
36 if (toks >= ratelimit_jiffies) {
37 int lost = missed;
38 35
39 missed = 0; 36 if (time_is_before_jiffies(rs->begin + rs->interval)) {
40 toks -= ratelimit_jiffies; 37 if (rs->missed)
41 spin_unlock_irqrestore(&ratelimit_lock, flags); 38 printk(KERN_WARNING "%s: %d callbacks suppressed\n",
42 if (lost) 39 __func__, rs->missed);
43 printk(KERN_WARNING "%s: %d messages suppressed\n", 40 rs->begin = 0;
44 __func__, lost); 41 rs->printed = 0;
45 return 1; 42 rs->missed = 0;
46 } 43 }
47 missed++; 44 if (rs->burst && rs->burst > rs->printed)
45 goto print;
46
47 rs->missed++;
48 spin_unlock_irqrestore(&ratelimit_lock, flags); 48 spin_unlock_irqrestore(&ratelimit_lock, flags);
49 return 0; 49 return 0;
50
51print:
52 rs->printed++;
53 spin_unlock_irqrestore(&ratelimit_lock, flags);
54 return 1;
50} 55}
51EXPORT_SYMBOL(__ratelimit); 56EXPORT_SYMBOL(__ratelimit);
diff --git a/mm/filemap.c b/mm/filemap.c
index 7675b91f4f63..2d3ec1ffc66e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -115,7 +115,7 @@ void __remove_from_page_cache(struct page *page)
115{ 115{
116 struct address_space *mapping = page->mapping; 116 struct address_space *mapping = page->mapping;
117 117
118 mem_cgroup_uncharge_page(page); 118 mem_cgroup_uncharge_cache_page(page);
119 radix_tree_delete(&mapping->page_tree, page->index); 119 radix_tree_delete(&mapping->page_tree, page->index);
120 page->mapping = NULL; 120 page->mapping = NULL;
121 mapping->nrpages--; 121 mapping->nrpages--;
@@ -474,12 +474,12 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
474 mapping->nrpages++; 474 mapping->nrpages++;
475 __inc_zone_page_state(page, NR_FILE_PAGES); 475 __inc_zone_page_state(page, NR_FILE_PAGES);
476 } else 476 } else
477 mem_cgroup_uncharge_page(page); 477 mem_cgroup_uncharge_cache_page(page);
478 478
479 write_unlock_irq(&mapping->tree_lock); 479 write_unlock_irq(&mapping->tree_lock);
480 radix_tree_preload_end(); 480 radix_tree_preload_end();
481 } else 481 } else
482 mem_cgroup_uncharge_page(page); 482 mem_cgroup_uncharge_cache_page(page);
483out: 483out:
484 return error; 484 return error;
485} 485}
@@ -2563,9 +2563,8 @@ EXPORT_SYMBOL(generic_file_aio_write);
2563 * Otherwise return zero. 2563 * Otherwise return zero.
2564 * 2564 *
2565 * The @gfp_mask argument specifies whether I/O may be performed to release 2565 * The @gfp_mask argument specifies whether I/O may be performed to release
2566 * this page (__GFP_IO), and whether the call may block (__GFP_WAIT). 2566 * this page (__GFP_IO), and whether the call may block (__GFP_WAIT & __GFP_FS).
2567 * 2567 *
2568 * NOTE: @gfp_mask may go away, and this function may become non-blocking.
2569 */ 2568 */
2570int try_to_release_page(struct page *page, gfp_t gfp_mask) 2569int try_to_release_page(struct page *page, gfp_t gfp_mask)
2571{ 2570{
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 41341c414194..a8bf4ab01f86 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1026,6 +1026,17 @@ static void __init report_hugepages(void)
1026 } 1026 }
1027} 1027}
1028 1028
1029static unsigned int cpuset_mems_nr(unsigned int *array)
1030{
1031 int node;
1032 unsigned int nr = 0;
1033
1034 for_each_node_mask(node, cpuset_current_mems_allowed)
1035 nr += array[node];
1036
1037 return nr;
1038}
1039
1029#ifdef CONFIG_SYSCTL 1040#ifdef CONFIG_SYSCTL
1030#ifdef CONFIG_HIGHMEM 1041#ifdef CONFIG_HIGHMEM
1031static void try_to_free_low(struct hstate *h, unsigned long count) 1042static void try_to_free_low(struct hstate *h, unsigned long count)
@@ -1375,17 +1386,6 @@ static int __init hugetlb_default_setup(char *s)
1375} 1386}
1376__setup("default_hugepagesz=", hugetlb_default_setup); 1387__setup("default_hugepagesz=", hugetlb_default_setup);
1377 1388
1378static unsigned int cpuset_mems_nr(unsigned int *array)
1379{
1380 int node;
1381 unsigned int nr = 0;
1382
1383 for_each_node_mask(node, cpuset_current_mems_allowed)
1384 nr += array[node];
1385
1386 return nr;
1387}
1388
1389int hugetlb_sysctl_handler(struct ctl_table *table, int write, 1389int hugetlb_sysctl_handler(struct ctl_table *table, int write,
1390 struct file *file, void __user *buffer, 1390 struct file *file, void __user *buffer,
1391 size_t *length, loff_t *ppos) 1391 size_t *length, loff_t *ppos)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e46451e1d9b7..fba566c51322 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -35,9 +35,9 @@
35 35
36#include <asm/uaccess.h> 36#include <asm/uaccess.h>
37 37
38struct cgroup_subsys mem_cgroup_subsys; 38struct cgroup_subsys mem_cgroup_subsys __read_mostly;
39static const int MEM_CGROUP_RECLAIM_RETRIES = 5; 39static struct kmem_cache *page_cgroup_cache __read_mostly;
40static struct kmem_cache *page_cgroup_cache; 40#define MEM_CGROUP_RECLAIM_RETRIES 5
41 41
42/* 42/*
43 * Statistics for memory cgroup. 43 * Statistics for memory cgroup.
@@ -166,7 +166,6 @@ struct page_cgroup {
166 struct list_head lru; /* per cgroup LRU list */ 166 struct list_head lru; /* per cgroup LRU list */
167 struct page *page; 167 struct page *page;
168 struct mem_cgroup *mem_cgroup; 168 struct mem_cgroup *mem_cgroup;
169 int ref_cnt; /* cached, mapped, migrating */
170 int flags; 169 int flags;
171}; 170};
172#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ 171#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */
@@ -185,6 +184,7 @@ static enum zone_type page_cgroup_zid(struct page_cgroup *pc)
185enum charge_type { 184enum charge_type {
186 MEM_CGROUP_CHARGE_TYPE_CACHE = 0, 185 MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
187 MEM_CGROUP_CHARGE_TYPE_MAPPED, 186 MEM_CGROUP_CHARGE_TYPE_MAPPED,
187 MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */
188}; 188};
189 189
190/* 190/*
@@ -296,7 +296,7 @@ static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
296 MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1; 296 MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1;
297 297
298 mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false); 298 mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false);
299 list_del_init(&pc->lru); 299 list_del(&pc->lru);
300} 300}
301 301
302static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, 302static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
@@ -354,6 +354,9 @@ void mem_cgroup_move_lists(struct page *page, bool active)
354 struct mem_cgroup_per_zone *mz; 354 struct mem_cgroup_per_zone *mz;
355 unsigned long flags; 355 unsigned long flags;
356 356
357 if (mem_cgroup_subsys.disabled)
358 return;
359
357 /* 360 /*
358 * We cannot lock_page_cgroup while holding zone's lru_lock, 361 * We cannot lock_page_cgroup while holding zone's lru_lock,
359 * because other holders of lock_page_cgroup can be interrupted 362 * because other holders of lock_page_cgroup can be interrupted
@@ -524,7 +527,8 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
524 * < 0 if the cgroup is over its limit 527 * < 0 if the cgroup is over its limit
525 */ 528 */
526static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, 529static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
527 gfp_t gfp_mask, enum charge_type ctype) 530 gfp_t gfp_mask, enum charge_type ctype,
531 struct mem_cgroup *memcg)
528{ 532{
529 struct mem_cgroup *mem; 533 struct mem_cgroup *mem;
530 struct page_cgroup *pc; 534 struct page_cgroup *pc;
@@ -532,35 +536,8 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
532 unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; 536 unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
533 struct mem_cgroup_per_zone *mz; 537 struct mem_cgroup_per_zone *mz;
534 538
535 if (mem_cgroup_subsys.disabled) 539 pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask);
536 return 0; 540 if (unlikely(pc == NULL))
537
538 /*
539 * Should page_cgroup's go to their own slab?
540 * One could optimize the performance of the charging routine
541 * by saving a bit in the page_flags and using it as a lock
542 * to see if the cgroup page already has a page_cgroup associated
543 * with it
544 */
545retry:
546 lock_page_cgroup(page);
547 pc = page_get_page_cgroup(page);
548 /*
549 * The page_cgroup exists and
550 * the page has already been accounted.
551 */
552 if (pc) {
553 VM_BUG_ON(pc->page != page);
554 VM_BUG_ON(pc->ref_cnt <= 0);
555
556 pc->ref_cnt++;
557 unlock_page_cgroup(page);
558 goto done;
559 }
560 unlock_page_cgroup(page);
561
562 pc = kmem_cache_zalloc(page_cgroup_cache, gfp_mask);
563 if (pc == NULL)
564 goto err; 541 goto err;
565 542
566 /* 543 /*
@@ -569,16 +546,18 @@ retry:
569 * thread group leader migrates. It's possible that mm is not 546 * thread group leader migrates. It's possible that mm is not
570 * set, if so charge the init_mm (happens for pagecache usage). 547 * set, if so charge the init_mm (happens for pagecache usage).
571 */ 548 */
572 if (!mm) 549 if (likely(!memcg)) {
573 mm = &init_mm; 550 rcu_read_lock();
574 551 mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
575 rcu_read_lock(); 552 /*
576 mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); 553 * For every charge from the cgroup, increment reference count
577 /* 554 */
578 * For every charge from the cgroup, increment reference count 555 css_get(&mem->css);
579 */ 556 rcu_read_unlock();
580 css_get(&mem->css); 557 } else {
581 rcu_read_unlock(); 558 mem = memcg;
559 css_get(&memcg->css);
560 }
582 561
583 while (res_counter_charge(&mem->res, PAGE_SIZE)) { 562 while (res_counter_charge(&mem->res, PAGE_SIZE)) {
584 if (!(gfp_mask & __GFP_WAIT)) 563 if (!(gfp_mask & __GFP_WAIT))
@@ -603,25 +582,24 @@ retry:
603 } 582 }
604 } 583 }
605 584
606 pc->ref_cnt = 1;
607 pc->mem_cgroup = mem; 585 pc->mem_cgroup = mem;
608 pc->page = page; 586 pc->page = page;
609 pc->flags = PAGE_CGROUP_FLAG_ACTIVE; 587 /*
588 * If a page is accounted as a page cache, insert to inactive list.
589 * If anon, insert to active list.
590 */
610 if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) 591 if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE)
611 pc->flags = PAGE_CGROUP_FLAG_CACHE; 592 pc->flags = PAGE_CGROUP_FLAG_CACHE;
593 else
594 pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
612 595
613 lock_page_cgroup(page); 596 lock_page_cgroup(page);
614 if (page_get_page_cgroup(page)) { 597 if (unlikely(page_get_page_cgroup(page))) {
615 unlock_page_cgroup(page); 598 unlock_page_cgroup(page);
616 /*
617 * Another charge has been added to this page already.
618 * We take lock_page_cgroup(page) again and read
619 * page->cgroup, increment refcnt.... just retry is OK.
620 */
621 res_counter_uncharge(&mem->res, PAGE_SIZE); 599 res_counter_uncharge(&mem->res, PAGE_SIZE);
622 css_put(&mem->css); 600 css_put(&mem->css);
623 kmem_cache_free(page_cgroup_cache, pc); 601 kmem_cache_free(page_cgroup_cache, pc);
624 goto retry; 602 goto done;
625 } 603 }
626 page_assign_page_cgroup(page, pc); 604 page_assign_page_cgroup(page, pc);
627 605
@@ -642,24 +620,65 @@ err:
642 620
643int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) 621int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
644{ 622{
623 if (mem_cgroup_subsys.disabled)
624 return 0;
625
626 /*
627 * If already mapped, we don't have to account.
628 * If page cache, page->mapping has address_space.
629 * But page->mapping may have out-of-use anon_vma pointer,
630 * detecit it by PageAnon() check. newly-mapped-anon's page->mapping
631 * is NULL.
632 */
633 if (page_mapped(page) || (page->mapping && !PageAnon(page)))
634 return 0;
635 if (unlikely(!mm))
636 mm = &init_mm;
645 return mem_cgroup_charge_common(page, mm, gfp_mask, 637 return mem_cgroup_charge_common(page, mm, gfp_mask,
646 MEM_CGROUP_CHARGE_TYPE_MAPPED); 638 MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
647} 639}
648 640
649int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, 641int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
650 gfp_t gfp_mask) 642 gfp_t gfp_mask)
651{ 643{
652 if (!mm) 644 if (mem_cgroup_subsys.disabled)
645 return 0;
646
647 /*
648 * Corner case handling. This is called from add_to_page_cache()
649 * in usual. But some FS (shmem) precharges this page before calling it
650 * and call add_to_page_cache() with GFP_NOWAIT.
651 *
652 * For GFP_NOWAIT case, the page may be pre-charged before calling
653 * add_to_page_cache(). (See shmem.c) check it here and avoid to call
654 * charge twice. (It works but has to pay a bit larger cost.)
655 */
656 if (!(gfp_mask & __GFP_WAIT)) {
657 struct page_cgroup *pc;
658
659 lock_page_cgroup(page);
660 pc = page_get_page_cgroup(page);
661 if (pc) {
662 VM_BUG_ON(pc->page != page);
663 VM_BUG_ON(!pc->mem_cgroup);
664 unlock_page_cgroup(page);
665 return 0;
666 }
667 unlock_page_cgroup(page);
668 }
669
670 if (unlikely(!mm))
653 mm = &init_mm; 671 mm = &init_mm;
672
654 return mem_cgroup_charge_common(page, mm, gfp_mask, 673 return mem_cgroup_charge_common(page, mm, gfp_mask,
655 MEM_CGROUP_CHARGE_TYPE_CACHE); 674 MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
656} 675}
657 676
658/* 677/*
659 * Uncharging is always a welcome operation, we never complain, simply 678 * uncharge if !page_mapped(page)
660 * uncharge.
661 */ 679 */
662void mem_cgroup_uncharge_page(struct page *page) 680static void
681__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
663{ 682{
664 struct page_cgroup *pc; 683 struct page_cgroup *pc;
665 struct mem_cgroup *mem; 684 struct mem_cgroup *mem;
@@ -674,98 +693,151 @@ void mem_cgroup_uncharge_page(struct page *page)
674 */ 693 */
675 lock_page_cgroup(page); 694 lock_page_cgroup(page);
676 pc = page_get_page_cgroup(page); 695 pc = page_get_page_cgroup(page);
677 if (!pc) 696 if (unlikely(!pc))
678 goto unlock; 697 goto unlock;
679 698
680 VM_BUG_ON(pc->page != page); 699 VM_BUG_ON(pc->page != page);
681 VM_BUG_ON(pc->ref_cnt <= 0);
682 700
683 if (--(pc->ref_cnt) == 0) { 701 if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
684 mz = page_cgroup_zoneinfo(pc); 702 && ((pc->flags & PAGE_CGROUP_FLAG_CACHE)
685 spin_lock_irqsave(&mz->lru_lock, flags); 703 || page_mapped(page)))
686 __mem_cgroup_remove_list(mz, pc); 704 goto unlock;
687 spin_unlock_irqrestore(&mz->lru_lock, flags);
688 705
689 page_assign_page_cgroup(page, NULL); 706 mz = page_cgroup_zoneinfo(pc);
690 unlock_page_cgroup(page); 707 spin_lock_irqsave(&mz->lru_lock, flags);
708 __mem_cgroup_remove_list(mz, pc);
709 spin_unlock_irqrestore(&mz->lru_lock, flags);
691 710
692 mem = pc->mem_cgroup; 711 page_assign_page_cgroup(page, NULL);
693 res_counter_uncharge(&mem->res, PAGE_SIZE); 712 unlock_page_cgroup(page);
694 css_put(&mem->css);
695 713
696 kmem_cache_free(page_cgroup_cache, pc); 714 mem = pc->mem_cgroup;
697 return; 715 res_counter_uncharge(&mem->res, PAGE_SIZE);
698 } 716 css_put(&mem->css);
699 717
718 kmem_cache_free(page_cgroup_cache, pc);
719 return;
700unlock: 720unlock:
701 unlock_page_cgroup(page); 721 unlock_page_cgroup(page);
702} 722}
703 723
724void mem_cgroup_uncharge_page(struct page *page)
725{
726 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED);
727}
728
729void mem_cgroup_uncharge_cache_page(struct page *page)
730{
731 VM_BUG_ON(page_mapped(page));
732 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
733}
734
704/* 735/*
705 * Returns non-zero if a page (under migration) has valid page_cgroup member. 736 * Before starting migration, account against new page.
706 * Refcnt of page_cgroup is incremented.
707 */ 737 */
708int mem_cgroup_prepare_migration(struct page *page) 738int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
709{ 739{
710 struct page_cgroup *pc; 740 struct page_cgroup *pc;
741 struct mem_cgroup *mem = NULL;
742 enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
743 int ret = 0;
711 744
712 if (mem_cgroup_subsys.disabled) 745 if (mem_cgroup_subsys.disabled)
713 return 0; 746 return 0;
714 747
715 lock_page_cgroup(page); 748 lock_page_cgroup(page);
716 pc = page_get_page_cgroup(page); 749 pc = page_get_page_cgroup(page);
717 if (pc) 750 if (pc) {
718 pc->ref_cnt++; 751 mem = pc->mem_cgroup;
752 css_get(&mem->css);
753 if (pc->flags & PAGE_CGROUP_FLAG_CACHE)
754 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
755 }
719 unlock_page_cgroup(page); 756 unlock_page_cgroup(page);
720 return pc != NULL; 757 if (mem) {
758 ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL,
759 ctype, mem);
760 css_put(&mem->css);
761 }
762 return ret;
721} 763}
722 764
723void mem_cgroup_end_migration(struct page *page) 765/* remove redundant charge if migration failed*/
766void mem_cgroup_end_migration(struct page *newpage)
724{ 767{
725 mem_cgroup_uncharge_page(page); 768 /*
769 * At success, page->mapping is not NULL.
770 * special rollback care is necessary when
771 * 1. at migration failure. (newpage->mapping is cleared in this case)
772 * 2. the newpage was moved but not remapped again because the task
773 * exits and the newpage is obsolete. In this case, the new page
774 * may be a swapcache. So, we just call mem_cgroup_uncharge_page()
775 * always for avoiding mess. The page_cgroup will be removed if
776 * unnecessary. File cache pages is still on radix-tree. Don't
777 * care it.
778 */
779 if (!newpage->mapping)
780 __mem_cgroup_uncharge_common(newpage,
781 MEM_CGROUP_CHARGE_TYPE_FORCE);
782 else if (PageAnon(newpage))
783 mem_cgroup_uncharge_page(newpage);
726} 784}
727 785
728/* 786/*
729 * We know both *page* and *newpage* are now not-on-LRU and PG_locked. 787 * A call to try to shrink memory usage under specified resource controller.
730 * And no race with uncharge() routines because page_cgroup for *page* 788 * This is typically used for page reclaiming for shmem for reducing side
731 * has extra one reference by mem_cgroup_prepare_migration. 789 * effect of page allocation from shmem, which is used by some mem_cgroup.
732 */ 790 */
733void mem_cgroup_page_migration(struct page *page, struct page *newpage) 791int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
734{ 792{
735 struct page_cgroup *pc; 793 struct mem_cgroup *mem;
736 struct mem_cgroup_per_zone *mz; 794 int progress = 0;
737 unsigned long flags; 795 int retry = MEM_CGROUP_RECLAIM_RETRIES;
738 796
739 lock_page_cgroup(page); 797 if (mem_cgroup_subsys.disabled)
740 pc = page_get_page_cgroup(page); 798 return 0;
741 if (!pc) {
742 unlock_page_cgroup(page);
743 return;
744 }
745 799
746 mz = page_cgroup_zoneinfo(pc); 800 rcu_read_lock();
747 spin_lock_irqsave(&mz->lru_lock, flags); 801 mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
748 __mem_cgroup_remove_list(mz, pc); 802 css_get(&mem->css);
749 spin_unlock_irqrestore(&mz->lru_lock, flags); 803 rcu_read_unlock();
750 804
751 page_assign_page_cgroup(page, NULL); 805 do {
752 unlock_page_cgroup(page); 806 progress = try_to_free_mem_cgroup_pages(mem, gfp_mask);
807 } while (!progress && --retry);
753 808
754 pc->page = newpage; 809 css_put(&mem->css);
755 lock_page_cgroup(newpage); 810 if (!retry)
756 page_assign_page_cgroup(newpage, pc); 811 return -ENOMEM;
812 return 0;
813}
757 814
758 mz = page_cgroup_zoneinfo(pc); 815int mem_cgroup_resize_limit(struct mem_cgroup *memcg, unsigned long long val)
759 spin_lock_irqsave(&mz->lru_lock, flags); 816{
760 __mem_cgroup_add_list(mz, pc); 817
761 spin_unlock_irqrestore(&mz->lru_lock, flags); 818 int retry_count = MEM_CGROUP_RECLAIM_RETRIES;
819 int progress;
820 int ret = 0;
762 821
763 unlock_page_cgroup(newpage); 822 while (res_counter_set_limit(&memcg->res, val)) {
823 if (signal_pending(current)) {
824 ret = -EINTR;
825 break;
826 }
827 if (!retry_count) {
828 ret = -EBUSY;
829 break;
830 }
831 progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL);
832 if (!progress)
833 retry_count--;
834 }
835 return ret;
764} 836}
765 837
838
766/* 839/*
767 * This routine traverse page_cgroup in given list and drop them all. 840 * This routine traverse page_cgroup in given list and drop them all.
768 * This routine ignores page_cgroup->ref_cnt.
769 * *And* this routine doesn't reclaim page itself, just removes page_cgroup. 841 * *And* this routine doesn't reclaim page itself, just removes page_cgroup.
770 */ 842 */
771#define FORCE_UNCHARGE_BATCH (128) 843#define FORCE_UNCHARGE_BATCH (128)
@@ -790,12 +862,20 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
790 page = pc->page; 862 page = pc->page;
791 get_page(page); 863 get_page(page);
792 spin_unlock_irqrestore(&mz->lru_lock, flags); 864 spin_unlock_irqrestore(&mz->lru_lock, flags);
793 mem_cgroup_uncharge_page(page); 865 /*
794 put_page(page); 866 * Check if this page is on LRU. !LRU page can be found
795 if (--count <= 0) { 867 * if it's under page migration.
796 count = FORCE_UNCHARGE_BATCH; 868 */
869 if (PageLRU(page)) {
870 __mem_cgroup_uncharge_common(page,
871 MEM_CGROUP_CHARGE_TYPE_FORCE);
872 put_page(page);
873 if (--count <= 0) {
874 count = FORCE_UNCHARGE_BATCH;
875 cond_resched();
876 }
877 } else
797 cond_resched(); 878 cond_resched();
798 }
799 spin_lock_irqsave(&mz->lru_lock, flags); 879 spin_lock_irqsave(&mz->lru_lock, flags);
800 } 880 }
801 spin_unlock_irqrestore(&mz->lru_lock, flags); 881 spin_unlock_irqrestore(&mz->lru_lock, flags);
@@ -810,9 +890,6 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem)
810 int ret = -EBUSY; 890 int ret = -EBUSY;
811 int node, zid; 891 int node, zid;
812 892
813 if (mem_cgroup_subsys.disabled)
814 return 0;
815
816 css_get(&mem->css); 893 css_get(&mem->css);
817 /* 894 /*
818 * page reclaim code (kswapd etc..) will move pages between 895 * page reclaim code (kswapd etc..) will move pages between
@@ -838,32 +915,34 @@ out:
838 return ret; 915 return ret;
839} 916}
840 917
841static int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp)
842{
843 *tmp = memparse(buf, &buf);
844 if (*buf != '\0')
845 return -EINVAL;
846
847 /*
848 * Round up the value to the closest page size
849 */
850 *tmp = ((*tmp + PAGE_SIZE - 1) >> PAGE_SHIFT) << PAGE_SHIFT;
851 return 0;
852}
853
854static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) 918static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
855{ 919{
856 return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res, 920 return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res,
857 cft->private); 921 cft->private);
858} 922}
859 923/*
860static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft, 924 * The user of this function is...
861 struct file *file, const char __user *userbuf, 925 * RES_LIMIT.
862 size_t nbytes, loff_t *ppos) 926 */
927static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
928 const char *buffer)
863{ 929{
864 return res_counter_write(&mem_cgroup_from_cont(cont)->res, 930 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
865 cft->private, userbuf, nbytes, ppos, 931 unsigned long long val;
866 mem_cgroup_write_strategy); 932 int ret;
933
934 switch (cft->private) {
935 case RES_LIMIT:
936 /* This function does all necessary parse...reuse it */
937 ret = res_counter_memparse_write_strategy(buffer, &val);
938 if (!ret)
939 ret = mem_cgroup_resize_limit(memcg, val);
940 break;
941 default:
942 ret = -EINVAL; /* should be BUG() ? */
943 break;
944 }
945 return ret;
867} 946}
868 947
869static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) 948static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
@@ -940,7 +1019,7 @@ static struct cftype mem_cgroup_files[] = {
940 { 1019 {
941 .name = "limit_in_bytes", 1020 .name = "limit_in_bytes",
942 .private = RES_LIMIT, 1021 .private = RES_LIMIT,
943 .write = mem_cgroup_write, 1022 .write_string = mem_cgroup_write,
944 .read_u64 = mem_cgroup_read, 1023 .read_u64 = mem_cgroup_read,
945 }, 1024 },
946 { 1025 {
@@ -1070,8 +1149,6 @@ static void mem_cgroup_destroy(struct cgroup_subsys *ss,
1070static int mem_cgroup_populate(struct cgroup_subsys *ss, 1149static int mem_cgroup_populate(struct cgroup_subsys *ss,
1071 struct cgroup *cont) 1150 struct cgroup *cont)
1072{ 1151{
1073 if (mem_cgroup_subsys.disabled)
1074 return 0;
1075 return cgroup_add_files(cont, ss, mem_cgroup_files, 1152 return cgroup_add_files(cont, ss, mem_cgroup_files,
1076 ARRAY_SIZE(mem_cgroup_files)); 1153 ARRAY_SIZE(mem_cgroup_files));
1077} 1154}
@@ -1084,9 +1161,6 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
1084 struct mm_struct *mm; 1161 struct mm_struct *mm;
1085 struct mem_cgroup *mem, *old_mem; 1162 struct mem_cgroup *mem, *old_mem;
1086 1163
1087 if (mem_cgroup_subsys.disabled)
1088 return;
1089
1090 mm = get_task_mm(p); 1164 mm = get_task_mm(p);
1091 if (mm == NULL) 1165 if (mm == NULL)
1092 return; 1166 return;
diff --git a/mm/migrate.c b/mm/migrate.c
index 376cceba82f9..d8c65a65c61d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -358,6 +358,9 @@ static int migrate_page_move_mapping(struct address_space *mapping,
358 __inc_zone_page_state(newpage, NR_FILE_PAGES); 358 __inc_zone_page_state(newpage, NR_FILE_PAGES);
359 359
360 write_unlock_irq(&mapping->tree_lock); 360 write_unlock_irq(&mapping->tree_lock);
361 if (!PageSwapCache(newpage)) {
362 mem_cgroup_uncharge_cache_page(page);
363 }
361 364
362 return 0; 365 return 0;
363} 366}
@@ -611,7 +614,6 @@ static int move_to_new_page(struct page *newpage, struct page *page)
611 rc = fallback_migrate_page(mapping, newpage, page); 614 rc = fallback_migrate_page(mapping, newpage, page);
612 615
613 if (!rc) { 616 if (!rc) {
614 mem_cgroup_page_migration(page, newpage);
615 remove_migration_ptes(page, newpage); 617 remove_migration_ptes(page, newpage);
616 } else 618 } else
617 newpage->mapping = NULL; 619 newpage->mapping = NULL;
@@ -641,6 +643,14 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
641 /* page was freed from under us. So we are done. */ 643 /* page was freed from under us. So we are done. */
642 goto move_newpage; 644 goto move_newpage;
643 645
646 charge = mem_cgroup_prepare_migration(page, newpage);
647 if (charge == -ENOMEM) {
648 rc = -ENOMEM;
649 goto move_newpage;
650 }
651 /* prepare cgroup just returns 0 or -ENOMEM */
652 BUG_ON(charge);
653
644 rc = -EAGAIN; 654 rc = -EAGAIN;
645 if (TestSetPageLocked(page)) { 655 if (TestSetPageLocked(page)) {
646 if (!force) 656 if (!force)
@@ -692,19 +702,14 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
692 goto rcu_unlock; 702 goto rcu_unlock;
693 } 703 }
694 704
695 charge = mem_cgroup_prepare_migration(page);
696 /* Establish migration ptes or remove ptes */ 705 /* Establish migration ptes or remove ptes */
697 try_to_unmap(page, 1); 706 try_to_unmap(page, 1);
698 707
699 if (!page_mapped(page)) 708 if (!page_mapped(page))
700 rc = move_to_new_page(newpage, page); 709 rc = move_to_new_page(newpage, page);
701 710
702 if (rc) { 711 if (rc)
703 remove_migration_ptes(page, page); 712 remove_migration_ptes(page, page);
704 if (charge)
705 mem_cgroup_end_migration(page);
706 } else if (charge)
707 mem_cgroup_end_migration(newpage);
708rcu_unlock: 713rcu_unlock:
709 if (rcu_locked) 714 if (rcu_locked)
710 rcu_read_unlock(); 715 rcu_read_unlock();
@@ -725,6 +730,8 @@ unlock:
725 } 730 }
726 731
727move_newpage: 732move_newpage:
733 if (!charge)
734 mem_cgroup_end_migration(newpage);
728 /* 735 /*
729 * Move the new page to the LRU. If migration was not successful 736 * Move the new page to the LRU. If migration was not successful
730 * then this will free the page. 737 * then this will free the page.
diff --git a/mm/pdflush.c b/mm/pdflush.c
index 9d834aa4b979..0cbe0c60c6bf 100644
--- a/mm/pdflush.c
+++ b/mm/pdflush.c
@@ -130,7 +130,7 @@ static int __pdflush(struct pdflush_work *my_work)
130 * Thread creation: For how long have there been zero 130 * Thread creation: For how long have there been zero
131 * available threads? 131 * available threads?
132 */ 132 */
133 if (jiffies - last_empty_jifs > 1 * HZ) { 133 if (time_after(jiffies, last_empty_jifs + 1 * HZ)) {
134 /* unlocked list_empty() test is OK here */ 134 /* unlocked list_empty() test is OK here */
135 if (list_empty(&pdflush_list)) { 135 if (list_empty(&pdflush_list)) {
136 /* unlocked test is OK here */ 136 /* unlocked test is OK here */
@@ -151,7 +151,7 @@ static int __pdflush(struct pdflush_work *my_work)
151 if (nr_pdflush_threads <= MIN_PDFLUSH_THREADS) 151 if (nr_pdflush_threads <= MIN_PDFLUSH_THREADS)
152 continue; 152 continue;
153 pdf = list_entry(pdflush_list.prev, struct pdflush_work, list); 153 pdf = list_entry(pdflush_list.prev, struct pdflush_work, list);
154 if (jiffies - pdf->when_i_went_to_sleep > 1 * HZ) { 154 if (time_after(jiffies, pdf->when_i_went_to_sleep + 1 * HZ)) {
155 /* Limit exit rate */ 155 /* Limit exit rate */
156 pdf->when_i_went_to_sleep = jiffies; 156 pdf->when_i_went_to_sleep = jiffies;
157 break; /* exeunt */ 157 break; /* exeunt */
diff --git a/mm/rmap.c b/mm/rmap.c
index bf0a5b7cfb8e..abbd29f7c43f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -576,14 +576,8 @@ void page_add_anon_rmap(struct page *page,
576 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); 576 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
577 if (atomic_inc_and_test(&page->_mapcount)) 577 if (atomic_inc_and_test(&page->_mapcount))
578 __page_set_anon_rmap(page, vma, address); 578 __page_set_anon_rmap(page, vma, address);
579 else { 579 else
580 __page_check_anon_rmap(page, vma, address); 580 __page_check_anon_rmap(page, vma, address);
581 /*
582 * We unconditionally charged during prepare, we uncharge here
583 * This takes care of balancing the reference counts
584 */
585 mem_cgroup_uncharge_page(page);
586 }
587} 581}
588 582
589/** 583/**
@@ -614,12 +608,6 @@ void page_add_file_rmap(struct page *page)
614{ 608{
615 if (atomic_inc_and_test(&page->_mapcount)) 609 if (atomic_inc_and_test(&page->_mapcount))
616 __inc_zone_page_state(page, NR_FILE_MAPPED); 610 __inc_zone_page_state(page, NR_FILE_MAPPED);
617 else
618 /*
619 * We unconditionally charged during prepare, we uncharge here
620 * This takes care of balancing the reference counts
621 */
622 mem_cgroup_uncharge_page(page);
623} 611}
624 612
625#ifdef CONFIG_DEBUG_VM 613#ifdef CONFIG_DEBUG_VM
diff --git a/mm/shmem.c b/mm/shmem.c
index 9ffbea9b79e1..f92fea94d037 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -922,20 +922,26 @@ found:
922 error = 1; 922 error = 1;
923 if (!inode) 923 if (!inode)
924 goto out; 924 goto out;
925 /* Precharge page while we can wait, compensate afterwards */ 925 /* Precharge page using GFP_KERNEL while we can wait */
926 error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); 926 error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
927 if (error) 927 if (error)
928 goto out; 928 goto out;
929 error = radix_tree_preload(GFP_KERNEL); 929 error = radix_tree_preload(GFP_KERNEL);
930 if (error) 930 if (error) {
931 goto uncharge; 931 mem_cgroup_uncharge_cache_page(page);
932 goto out;
933 }
932 error = 1; 934 error = 1;
933 935
934 spin_lock(&info->lock); 936 spin_lock(&info->lock);
935 ptr = shmem_swp_entry(info, idx, NULL); 937 ptr = shmem_swp_entry(info, idx, NULL);
936 if (ptr && ptr->val == entry.val) 938 if (ptr && ptr->val == entry.val) {
937 error = add_to_page_cache(page, inode->i_mapping, 939 error = add_to_page_cache(page, inode->i_mapping,
938 idx, GFP_NOWAIT); 940 idx, GFP_NOWAIT);
941 /* does mem_cgroup_uncharge_cache_page on error */
942 } else /* we must compensate for our precharge above */
943 mem_cgroup_uncharge_cache_page(page);
944
939 if (error == -EEXIST) { 945 if (error == -EEXIST) {
940 struct page *filepage = find_get_page(inode->i_mapping, idx); 946 struct page *filepage = find_get_page(inode->i_mapping, idx);
941 error = 1; 947 error = 1;
@@ -961,8 +967,6 @@ found:
961 shmem_swp_unmap(ptr); 967 shmem_swp_unmap(ptr);
962 spin_unlock(&info->lock); 968 spin_unlock(&info->lock);
963 radix_tree_preload_end(); 969 radix_tree_preload_end();
964uncharge:
965 mem_cgroup_uncharge_page(page);
966out: 970out:
967 unlock_page(page); 971 unlock_page(page);
968 page_cache_release(page); 972 page_cache_release(page);
@@ -1311,17 +1315,14 @@ repeat:
1311 shmem_swp_unmap(entry); 1315 shmem_swp_unmap(entry);
1312 spin_unlock(&info->lock); 1316 spin_unlock(&info->lock);
1313 unlock_page(swappage); 1317 unlock_page(swappage);
1318 page_cache_release(swappage);
1314 if (error == -ENOMEM) { 1319 if (error == -ENOMEM) {
1315 /* allow reclaim from this memory cgroup */ 1320 /* allow reclaim from this memory cgroup */
1316 error = mem_cgroup_cache_charge(swappage, 1321 error = mem_cgroup_shrink_usage(current->mm,
1317 current->mm, gfp & ~__GFP_HIGHMEM); 1322 gfp);
1318 if (error) { 1323 if (error)
1319 page_cache_release(swappage);
1320 goto failed; 1324 goto failed;
1321 }
1322 mem_cgroup_uncharge_page(swappage);
1323 } 1325 }
1324 page_cache_release(swappage);
1325 goto repeat; 1326 goto repeat;
1326 } 1327 }
1327 } else if (sgp == SGP_READ && !filepage) { 1328 } else if (sgp == SGP_READ && !filepage) {
@@ -1358,6 +1359,8 @@ repeat:
1358 } 1359 }
1359 1360
1360 if (!filepage) { 1361 if (!filepage) {
1362 int ret;
1363
1361 spin_unlock(&info->lock); 1364 spin_unlock(&info->lock);
1362 filepage = shmem_alloc_page(gfp, info, idx); 1365 filepage = shmem_alloc_page(gfp, info, idx);
1363 if (!filepage) { 1366 if (!filepage) {
@@ -1386,10 +1389,18 @@ repeat:
1386 swap = *entry; 1389 swap = *entry;
1387 shmem_swp_unmap(entry); 1390 shmem_swp_unmap(entry);
1388 } 1391 }
1389 if (error || swap.val || 0 != add_to_page_cache_lru( 1392 ret = error || swap.val;
1390 filepage, mapping, idx, GFP_NOWAIT)) { 1393 if (ret)
1394 mem_cgroup_uncharge_cache_page(filepage);
1395 else
1396 ret = add_to_page_cache_lru(filepage, mapping,
1397 idx, GFP_NOWAIT);
1398 /*
1399 * At add_to_page_cache_lru() failure, uncharge will
1400 * be done automatically.
1401 */
1402 if (ret) {
1391 spin_unlock(&info->lock); 1403 spin_unlock(&info->lock);
1392 mem_cgroup_uncharge_page(filepage);
1393 page_cache_release(filepage); 1404 page_cache_release(filepage);
1394 shmem_unacct_blocks(info->flags, 1); 1405 shmem_unacct_blocks(info->flags, 1);
1395 shmem_free_blocks(inode, 1); 1406 shmem_free_blocks(inode, 1);
@@ -1398,7 +1409,6 @@ repeat:
1398 goto failed; 1409 goto failed;
1399 goto repeat; 1410 goto repeat;
1400 } 1411 }
1401 mem_cgroup_uncharge_page(filepage);
1402 info->flags |= SHMEM_PAGEIN; 1412 info->flags |= SHMEM_PAGEIN;
1403 } 1413 }
1404 1414
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 967d30ccd92b..26672c6cd3ce 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -38,6 +38,7 @@
38#include <linux/kthread.h> 38#include <linux/kthread.h>
39#include <linux/freezer.h> 39#include <linux/freezer.h>
40#include <linux/memcontrol.h> 40#include <linux/memcontrol.h>
41#include <linux/delayacct.h>
41 42
42#include <asm/tlbflush.h> 43#include <asm/tlbflush.h>
43#include <asm/div64.h> 44#include <asm/div64.h>
@@ -1316,6 +1317,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1316 struct zone *zone; 1317 struct zone *zone;
1317 enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask); 1318 enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
1318 1319
1320 delayacct_freepages_start();
1321
1319 if (scan_global_lru(sc)) 1322 if (scan_global_lru(sc))
1320 count_vm_event(ALLOCSTALL); 1323 count_vm_event(ALLOCSTALL);
1321 /* 1324 /*
@@ -1396,6 +1399,8 @@ out:
1396 } else 1399 } else
1397 mem_cgroup_record_reclaim_priority(sc->mem_cgroup, priority); 1400 mem_cgroup_record_reclaim_priority(sc->mem_cgroup, priority);
1398 1401
1402 delayacct_freepages_end();
1403
1399 return ret; 1404 return ret;
1400} 1405}
1401 1406
diff --git a/net/802/psnap.c b/net/802/psnap.c
index ea4643931446..b3cfe5a14fca 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -31,11 +31,9 @@ static struct llc_sap *snap_sap;
31 */ 31 */
32static struct datalink_proto *find_snap_client(unsigned char *desc) 32static struct datalink_proto *find_snap_client(unsigned char *desc)
33{ 33{
34 struct list_head *entry;
35 struct datalink_proto *proto = NULL, *p; 34 struct datalink_proto *proto = NULL, *p;
36 35
37 list_for_each_rcu(entry, &snap_list) { 36 list_for_each_entry_rcu(p, &snap_list, node) {
38 p = list_entry(entry, struct datalink_proto, node);
39 if (!memcmp(p->type, desc, 5)) { 37 if (!memcmp(p->type, desc, 5)) {
40 proto = p; 38 proto = p;
41 break; 39 break;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index a570e2af22cb..f686467ff12b 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -67,7 +67,7 @@ static struct ctl_table net_core_table[] = {
67 { 67 {
68 .ctl_name = NET_CORE_MSG_COST, 68 .ctl_name = NET_CORE_MSG_COST,
69 .procname = "message_cost", 69 .procname = "message_cost",
70 .data = &net_msg_cost, 70 .data = &net_ratelimit_state.interval,
71 .maxlen = sizeof(int), 71 .maxlen = sizeof(int),
72 .mode = 0644, 72 .mode = 0644,
73 .proc_handler = &proc_dointvec_jiffies, 73 .proc_handler = &proc_dointvec_jiffies,
@@ -76,7 +76,7 @@ static struct ctl_table net_core_table[] = {
76 { 76 {
77 .ctl_name = NET_CORE_MSG_BURST, 77 .ctl_name = NET_CORE_MSG_BURST,
78 .procname = "message_burst", 78 .procname = "message_burst",
79 .data = &net_msg_burst, 79 .data = &net_ratelimit_state.burst,
80 .maxlen = sizeof(int), 80 .maxlen = sizeof(int),
81 .mode = 0644, 81 .mode = 0644,
82 .proc_handler = &proc_dointvec, 82 .proc_handler = &proc_dointvec,
diff --git a/net/core/utils.c b/net/core/utils.c
index 8031eb59054e..72e0ebe964a0 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -31,17 +31,16 @@
31#include <asm/system.h> 31#include <asm/system.h>
32#include <asm/uaccess.h> 32#include <asm/uaccess.h>
33 33
34int net_msg_cost __read_mostly = 5*HZ;
35int net_msg_burst __read_mostly = 10;
36int net_msg_warn __read_mostly = 1; 34int net_msg_warn __read_mostly = 1;
37EXPORT_SYMBOL(net_msg_warn); 35EXPORT_SYMBOL(net_msg_warn);
38 36
37DEFINE_RATELIMIT_STATE(net_ratelimit_state, 5 * HZ, 10);
39/* 38/*
40 * All net warning printk()s should be guarded by this function. 39 * All net warning printk()s should be guarded by this function.
41 */ 40 */
42int net_ratelimit(void) 41int net_ratelimit(void)
43{ 42{
44 return __printk_ratelimit(net_msg_cost, net_msg_burst); 43 return __ratelimit(&net_ratelimit_state);
45} 44}
46EXPORT_SYMBOL(net_ratelimit); 45EXPORT_SYMBOL(net_ratelimit);
47 46
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index dd919d84285f..f440a9f54924 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -264,7 +264,6 @@ static inline int inet_netns_ok(struct net *net, int protocol)
264static int inet_create(struct net *net, struct socket *sock, int protocol) 264static int inet_create(struct net *net, struct socket *sock, int protocol)
265{ 265{
266 struct sock *sk; 266 struct sock *sk;
267 struct list_head *p;
268 struct inet_protosw *answer; 267 struct inet_protosw *answer;
269 struct inet_sock *inet; 268 struct inet_sock *inet;
270 struct proto *answer_prot; 269 struct proto *answer_prot;
@@ -281,13 +280,12 @@ static int inet_create(struct net *net, struct socket *sock, int protocol)
281 sock->state = SS_UNCONNECTED; 280 sock->state = SS_UNCONNECTED;
282 281
283 /* Look for the requested type/protocol pair. */ 282 /* Look for the requested type/protocol pair. */
284 answer = NULL;
285lookup_protocol: 283lookup_protocol:
286 err = -ESOCKTNOSUPPORT; 284 err = -ESOCKTNOSUPPORT;
287 rcu_read_lock(); 285 rcu_read_lock();
288 list_for_each_rcu(p, &inetsw[sock->type]) { 286 list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {
289 answer = list_entry(p, struct inet_protosw, list);
290 287
288 err = 0;
291 /* Check the non-wild match. */ 289 /* Check the non-wild match. */
292 if (protocol == answer->protocol) { 290 if (protocol == answer->protocol) {
293 if (protocol != IPPROTO_IP) 291 if (protocol != IPPROTO_IP)
@@ -302,10 +300,9 @@ lookup_protocol:
302 break; 300 break;
303 } 301 }
304 err = -EPROTONOSUPPORT; 302 err = -EPROTONOSUPPORT;
305 answer = NULL;
306 } 303 }
307 304
308 if (unlikely(answer == NULL)) { 305 if (unlikely(err)) {
309 if (try_loading_module < 2) { 306 if (try_loading_module < 2) {
310 rcu_read_unlock(); 307 rcu_read_unlock();
311 /* 308 /*
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3d828bc4b1cf..60461ad7fa6f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -83,7 +83,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol)
83 struct inet_sock *inet; 83 struct inet_sock *inet;
84 struct ipv6_pinfo *np; 84 struct ipv6_pinfo *np;
85 struct sock *sk; 85 struct sock *sk;
86 struct list_head *p;
87 struct inet_protosw *answer; 86 struct inet_protosw *answer;
88 struct proto *answer_prot; 87 struct proto *answer_prot;
89 unsigned char answer_flags; 88 unsigned char answer_flags;
@@ -97,13 +96,12 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol)
97 build_ehash_secret(); 96 build_ehash_secret();
98 97
99 /* Look for the requested type/protocol pair. */ 98 /* Look for the requested type/protocol pair. */
100 answer = NULL;
101lookup_protocol: 99lookup_protocol:
102 err = -ESOCKTNOSUPPORT; 100 err = -ESOCKTNOSUPPORT;
103 rcu_read_lock(); 101 rcu_read_lock();
104 list_for_each_rcu(p, &inetsw6[sock->type]) { 102 list_for_each_entry_rcu(answer, &inetsw6[sock->type], list) {
105 answer = list_entry(p, struct inet_protosw, list);
106 103
104 err = 0;
107 /* Check the non-wild match. */ 105 /* Check the non-wild match. */
108 if (protocol == answer->protocol) { 106 if (protocol == answer->protocol) {
109 if (protocol != IPPROTO_IP) 107 if (protocol != IPPROTO_IP)
@@ -118,10 +116,9 @@ lookup_protocol:
118 break; 116 break;
119 } 117 }
120 err = -EPROTONOSUPPORT; 118 err = -EPROTONOSUPPORT;
121 answer = NULL;
122 } 119 }
123 120
124 if (!answer) { 121 if (err) {
125 if (try_loading_module < 2) { 122 if (try_loading_module < 2) {
126 rcu_read_unlock(); 123 rcu_read_unlock();
127 /* 124 /*
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 007c1a6708ee..63ada437fc2f 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -35,8 +35,22 @@ net_ctl_header_lookup(struct ctl_table_root *root, struct nsproxy *namespaces)
35 return &namespaces->net_ns->sysctl_table_headers; 35 return &namespaces->net_ns->sysctl_table_headers;
36} 36}
37 37
38/* Return standard mode bits for table entry. */
39static int net_ctl_permissions(struct ctl_table_root *root,
40 struct nsproxy *nsproxy,
41 struct ctl_table *table)
42{
43 /* Allow network administrator to have same access as root. */
44 if (capable(CAP_NET_ADMIN)) {
45 int mode = (table->mode >> 6) & 7;
46 return (mode << 6) | (mode << 3) | mode;
47 }
48 return table->mode;
49}
50
38static struct ctl_table_root net_sysctl_root = { 51static struct ctl_table_root net_sysctl_root = {
39 .lookup = net_ctl_header_lookup, 52 .lookup = net_ctl_header_lookup,
53 .permissions = net_ctl_permissions,
40}; 54};
41 55
42static LIST_HEAD(net_sysctl_ro_tables); 56static LIST_HEAD(net_sysctl_ro_tables);
diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl
index 340ad6920511..3eca62566d6b 100755
--- a/scripts/checkstack.pl
+++ b/scripts/checkstack.pl
@@ -26,12 +26,17 @@
26# $& (whole re) matches the complete objdump line with the stack growth 26# $& (whole re) matches the complete objdump line with the stack growth
27# $1 (first bracket) matches the size of the stack growth 27# $1 (first bracket) matches the size of the stack growth
28# 28#
29# $dre is similar, but for dynamic stack redutions:
30# $& (whole re) matches the complete objdump line with the stack growth
31# $1 (first bracket) matches the dynamic amount of the stack growth
32#
29# use anything else and feel the pain ;) 33# use anything else and feel the pain ;)
30my (@stack, $re, $x, $xs); 34my (@stack, $re, $dre, $x, $xs);
31{ 35{
32 my $arch = shift; 36 my $arch = shift;
33 if ($arch eq "") { 37 if ($arch eq "") {
34 $arch = `uname -m`; 38 $arch = `uname -m`;
39 chomp($arch);
35 } 40 }
36 41
37 $x = "[0-9a-f]"; # hex character 42 $x = "[0-9a-f]"; # hex character
@@ -46,9 +51,11 @@ my (@stack, $re, $x, $xs);
46 } elsif ($arch =~ /^i[3456]86$/) { 51 } elsif ($arch =~ /^i[3456]86$/) {
47 #c0105234: 81 ec ac 05 00 00 sub $0x5ac,%esp 52 #c0105234: 81 ec ac 05 00 00 sub $0x5ac,%esp
48 $re = qr/^.*[as][du][db] \$(0x$x{1,8}),\%esp$/o; 53 $re = qr/^.*[as][du][db] \$(0x$x{1,8}),\%esp$/o;
54 $dre = qr/^.*[as][du][db] (%.*),\%esp$/o;
49 } elsif ($arch eq 'x86_64') { 55 } elsif ($arch eq 'x86_64') {
50 # 2f60: 48 81 ec e8 05 00 00 sub $0x5e8,%rsp 56 # 2f60: 48 81 ec e8 05 00 00 sub $0x5e8,%rsp
51 $re = qr/^.*[as][du][db] \$(0x$x{1,8}),\%rsp$/o; 57 $re = qr/^.*[as][du][db] \$(0x$x{1,8}),\%rsp$/o;
58 $dre = qr/^.*[as][du][db] (\%.*),\%rsp$/o;
52 } elsif ($arch eq 'ia64') { 59 } elsif ($arch eq 'ia64') {
53 #e0000000044011fc: 01 0f fc 8c adds r12=-384,r12 60 #e0000000044011fc: 01 0f fc 8c adds r12=-384,r12
54 $re = qr/.*adds.*r12=-(([0-9]{2}|[3-9])[0-9]{2}),r12/o; 61 $re = qr/.*adds.*r12=-(([0-9]{2}|[3-9])[0-9]{2}),r12/o;
@@ -85,7 +92,7 @@ my (@stack, $re, $x, $xs);
85 # 0: 00 e8 38 01 LINK 0x4e0; 92 # 0: 00 e8 38 01 LINK 0x4e0;
86 $re = qr/.*[[:space:]]LINK[[:space:]]*(0x$x{1,8})/o; 93 $re = qr/.*[[:space:]]LINK[[:space:]]*(0x$x{1,8})/o;
87 } else { 94 } else {
88 print("wrong or unknown architecture\n"); 95 print("wrong or unknown architecture \"$arch\"\n");
89 exit 96 exit
90 } 97 }
91} 98}
@@ -141,6 +148,22 @@ while (my $line = <STDIN>) {
141 next if ($size < 100); 148 next if ($size < 100);
142 push @stack, "$intro$size\n"; 149 push @stack, "$intro$size\n";
143 } 150 }
151 elsif (defined $dre && $line =~ m/$dre/) {
152 my $size = "Dynamic ($1)";
153
154 next if $line !~ m/^($xs*)/;
155 my $addr = $1;
156 $addr =~ s/ /0/g;
157 $addr = "0x$addr";
158
159 my $intro = "$addr $func [$file]:";
160 my $padlen = 56 - length($intro);
161 while ($padlen > 0) {
162 $intro .= ' ';
163 $padlen -= 8;
164 }
165 push @stack, "$intro$size\n";
166 }
144} 167}
145 168
146print sort bysize @stack; 169print sort bysize @stack;
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index ddd92cec78ed..7bd296cca041 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -41,6 +41,7 @@ struct dev_whitelist_item {
41 short type; 41 short type;
42 short access; 42 short access;
43 struct list_head list; 43 struct list_head list;
44 struct rcu_head rcu;
44}; 45};
45 46
46struct dev_cgroup { 47struct dev_cgroup {
@@ -59,6 +60,11 @@ static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup)
59 return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id)); 60 return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id));
60} 61}
61 62
63static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
64{
65 return css_to_devcgroup(task_subsys_state(task, devices_subsys_id));
66}
67
62struct cgroup_subsys devices_subsys; 68struct cgroup_subsys devices_subsys;
63 69
64static int devcgroup_can_attach(struct cgroup_subsys *ss, 70static int devcgroup_can_attach(struct cgroup_subsys *ss,
@@ -128,11 +134,19 @@ static int dev_whitelist_add(struct dev_cgroup *dev_cgroup,
128 } 134 }
129 135
130 if (whcopy != NULL) 136 if (whcopy != NULL)
131 list_add_tail(&whcopy->list, &dev_cgroup->whitelist); 137 list_add_tail_rcu(&whcopy->list, &dev_cgroup->whitelist);
132 spin_unlock(&dev_cgroup->lock); 138 spin_unlock(&dev_cgroup->lock);
133 return 0; 139 return 0;
134} 140}
135 141
142static void whitelist_item_free(struct rcu_head *rcu)
143{
144 struct dev_whitelist_item *item;
145
146 item = container_of(rcu, struct dev_whitelist_item, rcu);
147 kfree(item);
148}
149
136/* 150/*
137 * called under cgroup_lock() 151 * called under cgroup_lock()
138 * since the list is visible to other tasks, we need the spinlock also 152 * since the list is visible to other tasks, we need the spinlock also
@@ -156,8 +170,8 @@ static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup,
156remove: 170remove:
157 walk->access &= ~wh->access; 171 walk->access &= ~wh->access;
158 if (!walk->access) { 172 if (!walk->access) {
159 list_del(&walk->list); 173 list_del_rcu(&walk->list);
160 kfree(walk); 174 call_rcu(&walk->rcu, whitelist_item_free);
161 } 175 }
162 } 176 }
163 spin_unlock(&dev_cgroup->lock); 177 spin_unlock(&dev_cgroup->lock);
@@ -188,7 +202,7 @@ static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss,
188 } 202 }
189 wh->minor = wh->major = ~0; 203 wh->minor = wh->major = ~0;
190 wh->type = DEV_ALL; 204 wh->type = DEV_ALL;
191 wh->access = ACC_MKNOD | ACC_READ | ACC_WRITE; 205 wh->access = ACC_MASK;
192 list_add(&wh->list, &dev_cgroup->whitelist); 206 list_add(&wh->list, &dev_cgroup->whitelist);
193 } else { 207 } else {
194 parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup); 208 parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup);
@@ -250,11 +264,10 @@ static char type_to_char(short type)
250 264
251static void set_majmin(char *str, unsigned m) 265static void set_majmin(char *str, unsigned m)
252{ 266{
253 memset(str, 0, MAJMINLEN);
254 if (m == ~0) 267 if (m == ~0)
255 sprintf(str, "*"); 268 strcpy(str, "*");
256 else 269 else
257 snprintf(str, MAJMINLEN, "%u", m); 270 sprintf(str, "%u", m);
258} 271}
259 272
260static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft, 273static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft,
@@ -264,15 +277,15 @@ static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft,
264 struct dev_whitelist_item *wh; 277 struct dev_whitelist_item *wh;
265 char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN]; 278 char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN];
266 279
267 spin_lock(&devcgroup->lock); 280 rcu_read_lock();
268 list_for_each_entry(wh, &devcgroup->whitelist, list) { 281 list_for_each_entry_rcu(wh, &devcgroup->whitelist, list) {
269 set_access(acc, wh->access); 282 set_access(acc, wh->access);
270 set_majmin(maj, wh->major); 283 set_majmin(maj, wh->major);
271 set_majmin(min, wh->minor); 284 set_majmin(min, wh->minor);
272 seq_printf(m, "%c %s:%s %s\n", type_to_char(wh->type), 285 seq_printf(m, "%c %s:%s %s\n", type_to_char(wh->type),
273 maj, min, acc); 286 maj, min, acc);
274 } 287 }
275 spin_unlock(&devcgroup->lock); 288 rcu_read_unlock();
276 289
277 return 0; 290 return 0;
278} 291}
@@ -312,10 +325,10 @@ static int may_access_whitelist(struct dev_cgroup *c,
312 * when adding a new allow rule to a device whitelist, the rule 325 * when adding a new allow rule to a device whitelist, the rule
313 * must be allowed in the parent device 326 * must be allowed in the parent device
314 */ 327 */
315static int parent_has_perm(struct cgroup *childcg, 328static int parent_has_perm(struct dev_cgroup *childcg,
316 struct dev_whitelist_item *wh) 329 struct dev_whitelist_item *wh)
317{ 330{
318 struct cgroup *pcg = childcg->parent; 331 struct cgroup *pcg = childcg->css.cgroup->parent;
319 struct dev_cgroup *parent; 332 struct dev_cgroup *parent;
320 int ret; 333 int ret;
321 334
@@ -341,39 +354,19 @@ static int parent_has_perm(struct cgroup *childcg,
341 * new access is only allowed if you're in the top-level cgroup, or your 354 * new access is only allowed if you're in the top-level cgroup, or your
342 * parent cgroup has the access you're asking for. 355 * parent cgroup has the access you're asking for.
343 */ 356 */
344static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft, 357static int devcgroup_update_access(struct dev_cgroup *devcgroup,
345 struct file *file, const char __user *userbuf, 358 int filetype, const char *buffer)
346 size_t nbytes, loff_t *ppos)
347{ 359{
348 struct cgroup *cur_cgroup; 360 struct dev_cgroup *cur_devcgroup;
349 struct dev_cgroup *devcgroup, *cur_devcgroup; 361 const char *b;
350 int filetype = cft->private; 362 char *endp;
351 char *buffer, *b;
352 int retval = 0, count; 363 int retval = 0, count;
353 struct dev_whitelist_item wh; 364 struct dev_whitelist_item wh;
354 365
355 if (!capable(CAP_SYS_ADMIN)) 366 if (!capable(CAP_SYS_ADMIN))
356 return -EPERM; 367 return -EPERM;
357 368
358 devcgroup = cgroup_to_devcgroup(cgroup); 369 cur_devcgroup = task_devcgroup(current);
359 cur_cgroup = task_cgroup(current, devices_subsys.subsys_id);
360 cur_devcgroup = cgroup_to_devcgroup(cur_cgroup);
361
362 buffer = kmalloc(nbytes+1, GFP_KERNEL);
363 if (!buffer)
364 return -ENOMEM;
365
366 if (copy_from_user(buffer, userbuf, nbytes)) {
367 retval = -EFAULT;
368 goto out1;
369 }
370 buffer[nbytes] = 0; /* nul-terminate */
371
372 cgroup_lock();
373 if (cgroup_is_removed(cgroup)) {
374 retval = -ENODEV;
375 goto out2;
376 }
377 370
378 memset(&wh, 0, sizeof(wh)); 371 memset(&wh, 0, sizeof(wh));
379 b = buffer; 372 b = buffer;
@@ -392,32 +385,23 @@ static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft,
392 wh.type = DEV_CHAR; 385 wh.type = DEV_CHAR;
393 break; 386 break;
394 default: 387 default:
395 retval = -EINVAL; 388 return -EINVAL;
396 goto out2;
397 } 389 }
398 b++; 390 b++;
399 if (!isspace(*b)) { 391 if (!isspace(*b))
400 retval = -EINVAL; 392 return -EINVAL;
401 goto out2;
402 }
403 b++; 393 b++;
404 if (*b == '*') { 394 if (*b == '*') {
405 wh.major = ~0; 395 wh.major = ~0;
406 b++; 396 b++;
407 } else if (isdigit(*b)) { 397 } else if (isdigit(*b)) {
408 wh.major = 0; 398 wh.major = simple_strtoul(b, &endp, 10);
409 while (isdigit(*b)) { 399 b = endp;
410 wh.major = wh.major*10+(*b-'0');
411 b++;
412 }
413 } else { 400 } else {
414 retval = -EINVAL; 401 return -EINVAL;
415 goto out2;
416 }
417 if (*b != ':') {
418 retval = -EINVAL;
419 goto out2;
420 } 402 }
403 if (*b != ':')
404 return -EINVAL;
421 b++; 405 b++;
422 406
423 /* read minor */ 407 /* read minor */
@@ -425,19 +409,13 @@ static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft,
425 wh.minor = ~0; 409 wh.minor = ~0;
426 b++; 410 b++;
427 } else if (isdigit(*b)) { 411 } else if (isdigit(*b)) {
428 wh.minor = 0; 412 wh.minor = simple_strtoul(b, &endp, 10);
429 while (isdigit(*b)) { 413 b = endp;
430 wh.minor = wh.minor*10+(*b-'0');
431 b++;
432 }
433 } else { 414 } else {
434 retval = -EINVAL; 415 return -EINVAL;
435 goto out2;
436 }
437 if (!isspace(*b)) {
438 retval = -EINVAL;
439 goto out2;
440 } 416 }
417 if (!isspace(*b))
418 return -EINVAL;
441 for (b++, count = 0; count < 3; count++, b++) { 419 for (b++, count = 0; count < 3; count++, b++) {
442 switch (*b) { 420 switch (*b) {
443 case 'r': 421 case 'r':
@@ -454,8 +432,7 @@ static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft,
454 count = 3; 432 count = 3;
455 break; 433 break;
456 default: 434 default:
457 retval = -EINVAL; 435 return -EINVAL;
458 goto out2;
459 } 436 }
460 } 437 }
461 438
@@ -463,38 +440,39 @@ handle:
463 retval = 0; 440 retval = 0;
464 switch (filetype) { 441 switch (filetype) {
465 case DEVCG_ALLOW: 442 case DEVCG_ALLOW:
466 if (!parent_has_perm(cgroup, &wh)) 443 if (!parent_has_perm(devcgroup, &wh))
467 retval = -EPERM; 444 return -EPERM;
468 else 445 return dev_whitelist_add(devcgroup, &wh);
469 retval = dev_whitelist_add(devcgroup, &wh);
470 break;
471 case DEVCG_DENY: 446 case DEVCG_DENY:
472 dev_whitelist_rm(devcgroup, &wh); 447 dev_whitelist_rm(devcgroup, &wh);
473 break; 448 break;
474 default: 449 default:
475 retval = -EINVAL; 450 return -EINVAL;
476 goto out2;
477 } 451 }
452 return 0;
453}
478 454
479 if (retval == 0) 455static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft,
480 retval = nbytes; 456 const char *buffer)
481 457{
482out2: 458 int retval;
459 if (!cgroup_lock_live_group(cgrp))
460 return -ENODEV;
461 retval = devcgroup_update_access(cgroup_to_devcgroup(cgrp),
462 cft->private, buffer);
483 cgroup_unlock(); 463 cgroup_unlock();
484out1:
485 kfree(buffer);
486 return retval; 464 return retval;
487} 465}
488 466
489static struct cftype dev_cgroup_files[] = { 467static struct cftype dev_cgroup_files[] = {
490 { 468 {
491 .name = "allow", 469 .name = "allow",
492 .write = devcgroup_access_write, 470 .write_string = devcgroup_access_write,
493 .private = DEVCG_ALLOW, 471 .private = DEVCG_ALLOW,
494 }, 472 },
495 { 473 {
496 .name = "deny", 474 .name = "deny",
497 .write = devcgroup_access_write, 475 .write_string = devcgroup_access_write,
498 .private = DEVCG_DENY, 476 .private = DEVCG_DENY,
499 }, 477 },
500 { 478 {
@@ -535,8 +513,8 @@ int devcgroup_inode_permission(struct inode *inode, int mask)
535 if (!dev_cgroup) 513 if (!dev_cgroup)
536 return 0; 514 return 0;
537 515
538 spin_lock(&dev_cgroup->lock); 516 rcu_read_lock();
539 list_for_each_entry(wh, &dev_cgroup->whitelist, list) { 517 list_for_each_entry_rcu(wh, &dev_cgroup->whitelist, list) {
540 if (wh->type & DEV_ALL) 518 if (wh->type & DEV_ALL)
541 goto acc_check; 519 goto acc_check;
542 if ((wh->type & DEV_BLOCK) && !S_ISBLK(inode->i_mode)) 520 if ((wh->type & DEV_BLOCK) && !S_ISBLK(inode->i_mode))
@@ -552,10 +530,10 @@ acc_check:
552 continue; 530 continue;
553 if ((mask & MAY_READ) && !(wh->access & ACC_READ)) 531 if ((mask & MAY_READ) && !(wh->access & ACC_READ))
554 continue; 532 continue;
555 spin_unlock(&dev_cgroup->lock); 533 rcu_read_unlock();
556 return 0; 534 return 0;
557 } 535 }
558 spin_unlock(&dev_cgroup->lock); 536 rcu_read_unlock();
559 537
560 return -EPERM; 538 return -EPERM;
561} 539}
@@ -570,7 +548,7 @@ int devcgroup_inode_mknod(int mode, dev_t dev)
570 if (!dev_cgroup) 548 if (!dev_cgroup)
571 return 0; 549 return 0;
572 550
573 spin_lock(&dev_cgroup->lock); 551 rcu_read_lock();
574 list_for_each_entry(wh, &dev_cgroup->whitelist, list) { 552 list_for_each_entry(wh, &dev_cgroup->whitelist, list) {
575 if (wh->type & DEV_ALL) 553 if (wh->type & DEV_ALL)
576 goto acc_check; 554 goto acc_check;
@@ -585,9 +563,9 @@ int devcgroup_inode_mknod(int mode, dev_t dev)
585acc_check: 563acc_check:
586 if (!(wh->access & ACC_MKNOD)) 564 if (!(wh->access & ACC_MKNOD))
587 continue; 565 continue;
588 spin_unlock(&dev_cgroup->lock); 566 rcu_read_unlock();
589 return 0; 567 return 0;
590 } 568 }
591 spin_unlock(&dev_cgroup->lock); 569 rcu_read_unlock();
592 return -EPERM; 570 return -EPERM;
593} 571}