aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRussell King <rmk@dyn-67.arm.linux.org.uk>2006-01-09 14:18:33 -0500
committerRussell King <rmk+kernel@arm.linux.org.uk>2006-01-09 14:18:33 -0500
commit0a3a98f6dd4e8f4d928a09302c0d1c56f2192ac3 (patch)
tree92f55e374a84d06ce8213a4540454760fdecf137
parent8ef12c9f01afba47c2d33bb939085111ca0d0f7d (diff)
parent5367f2d67c7d0bf1faae90e6e7b4e2ac3c9b5e0f (diff)
Merge Linus' tree.
-rw-r--r--CREDITS2
-rw-r--r--Documentation/Changes31
-rw-r--r--Documentation/CodingStyle41
-rw-r--r--Documentation/RCU/rcuref.txt87
-rw-r--r--Documentation/SubmittingDrivers24
-rw-r--r--Documentation/SubmittingPatches63
-rw-r--r--Documentation/applying-patches.txt29
-rw-r--r--Documentation/block/stat.txt82
-rw-r--r--Documentation/cpu-hotplug.txt357
-rw-r--r--Documentation/cpusets.txt161
-rw-r--r--Documentation/filesystems/ext3.txt5
-rw-r--r--Documentation/filesystems/proc.txt17
-rw-r--r--Documentation/filesystems/ramfs-rootfs-initramfs.txt72
-rw-r--r--Documentation/filesystems/relayfs.txt126
-rw-r--r--Documentation/keys-request-key.txt22
-rw-r--r--Documentation/keys.txt43
-rw-r--r--Documentation/sysctl/vm.txt20
-rw-r--r--MAINTAINERS1
-rw-r--r--README7
-rw-r--r--arch/alpha/Kconfig3
-rw-r--r--arch/alpha/kernel/process.c5
-rw-r--r--arch/alpha/kernel/ptrace.c24
-rw-r--r--arch/arm/Kconfig4
-rw-r--r--arch/arm/common/scoop.c1
-rw-r--r--arch/arm/kernel/asm-offsets.c9
-rw-r--r--arch/arm/kernel/irq.c14
-rw-r--r--arch/arm/mach-footbridge/netwinder-hw.c1
-rw-r--r--arch/arm/mach-integrator/time.c5
-rw-r--r--arch/arm/mach-omap1/serial.c3
-rw-r--r--arch/arm/mach-pxa/corgi.c7
-rw-r--r--arch/arm/mach-pxa/poodle.c7
-rw-r--r--arch/arm/mach-pxa/spitz.c7
-rw-r--r--arch/arm/mach-realview/localtimer.c1
-rw-r--r--arch/arm/mach-s3c2410/usb-simtec.c6
-rw-r--r--arch/arm26/Kconfig4
-rw-r--r--arch/arm26/kernel/asm-offsets.c7
-rw-r--r--arch/cris/Kconfig4
-rw-r--r--arch/frv/Kconfig23
-rw-r--r--arch/frv/Kconfig.debug22
-rw-r--r--arch/frv/Makefile6
-rw-r--r--arch/frv/kernel/Makefile1
-rw-r--r--arch/frv/kernel/frv_ksyms.c25
-rw-r--r--arch/frv/kernel/irq.c17
-rw-r--r--arch/frv/kernel/module.c80
-rw-r--r--arch/frv/kernel/pm.c2
-rw-r--r--arch/frv/kernel/setup.c2
-rw-r--r--arch/frv/kernel/time.c3
-rw-r--r--arch/frv/kernel/traps.c3
-rw-r--r--arch/frv/kernel/uaccess.c7
-rw-r--r--arch/frv/kernel/vmlinux.lds.S1
-rw-r--r--arch/frv/lib/Makefile2
-rw-r--r--arch/frv/lib/__ucmpdi2.S45
-rw-r--r--arch/frv/lib/atomic-ops.S92
-rw-r--r--arch/frv/lib/checksum.c31
-rw-r--r--arch/frv/mb93090-mb00/Makefile2
-rw-r--r--arch/frv/mb93090-mb00/pci-dma-nommu.c8
-rw-r--r--arch/frv/mb93090-mb00/pci-dma.c10
-rw-r--r--arch/frv/mb93090-mb00/pci-iomap.c29
-rw-r--r--arch/frv/mm/cache-page.c5
-rw-r--r--arch/frv/mm/extable.c34
-rw-r--r--arch/frv/mm/highmem.c8
-rw-r--r--arch/h8300/Kconfig4
-rw-r--r--arch/i386/Kconfig10
-rw-r--r--arch/i386/Makefile5
-rw-r--r--arch/i386/Makefile.cpu10
-rw-r--r--arch/i386/boot/compressed/misc.c2
-rw-r--r--arch/i386/kernel/Makefile6
-rw-r--r--arch/i386/kernel/apm.c4
-rw-r--r--arch/i386/kernel/cpu/common.c2
-rw-r--r--arch/i386/kernel/entry.S2
-rw-r--r--arch/i386/kernel/init_task.c2
-rw-r--r--arch/i386/kernel/irq.c2
-rw-r--r--arch/i386/kernel/process.c1
-rw-r--r--arch/i386/kernel/syscall_table.S1
-rw-r--r--arch/i386/kernel/time_hpet.c2
-rw-r--r--arch/ia64/Makefile4
-rw-r--r--arch/ia64/ia32/sys_ia32.c16
-rw-r--r--arch/ia64/kernel/efi.c160
-rw-r--r--arch/ia64/kernel/entry.S1
-rw-r--r--arch/ia64/kernel/head.S2
-rw-r--r--arch/ia64/kernel/ia64_ksyms.c2
-rw-r--r--arch/ia64/kernel/ptrace.c9
-rw-r--r--arch/ia64/oprofile/backtrace.c2
-rw-r--r--arch/m32r/kernel/process.c4
-rw-r--r--arch/m32r/kernel/ptrace.c22
-rw-r--r--arch/m68k/Kconfig4
-rw-r--r--arch/m68knommu/Kconfig4
-rw-r--r--arch/mips/kernel/ptrace32.c26
-rw-r--r--arch/mips/sgi-ip27/ip27-berr.c1
-rw-r--r--arch/parisc/Kconfig3
-rw-r--r--arch/powerpc/Kconfig3
-rw-r--r--arch/powerpc/kernel/ptrace32.c28
-rw-r--r--arch/ppc/Kconfig3
-rw-r--r--arch/s390/kernel/ptrace.c29
-rw-r--r--arch/sh/Kconfig4
-rw-r--r--arch/sh64/kernel/time.c7
-rw-r--r--arch/sparc/Kconfig4
-rw-r--r--arch/sparc/kernel/ptrace.c35
-rw-r--r--arch/sparc64/Kconfig5
-rw-r--r--arch/sparc64/kernel/ptrace.c34
-rw-r--r--arch/um/Kconfig6
-rw-r--r--arch/um/drivers/ubd_kern.c21
-rw-r--r--arch/um/include/kern_util.h19
-rw-r--r--arch/um/include/os.h16
-rw-r--r--arch/um/include/signal_user.h28
-rw-r--r--arch/um/include/user_util.h10
-rw-r--r--arch/um/kernel/Makefile4
-rw-r--r--arch/um/kernel/irq_user.c1
-rw-r--r--arch/um/kernel/process_kern.c1
-rw-r--r--arch/um/kernel/reboot.c2
-rw-r--r--arch/um/kernel/signal_kern.c1
-rw-r--r--arch/um/kernel/signal_user.c157
-rw-r--r--arch/um/kernel/skas/Makefile2
-rw-r--r--arch/um/kernel/skas/include/skas.h1
-rw-r--r--arch/um/kernel/skas/process.c11
-rw-r--r--arch/um/kernel/skas/process_kern.c1
-rw-r--r--arch/um/kernel/time.c2
-rw-r--r--arch/um/kernel/trap_kern.c25
-rw-r--r--arch/um/kernel/trap_user.c98
-rw-r--r--arch/um/kernel/tt/exec_kern.c1
-rw-r--r--arch/um/kernel/tt/process_kern.c1
-rw-r--r--arch/um/kernel/tt/tracer.c1
-rw-r--r--arch/um/kernel/tt/trap_user.c16
-rw-r--r--arch/um/kernel/um_arch.c7
-rw-r--r--arch/um/os-Linux/Makefile8
-rw-r--r--arch/um/os-Linux/main.c1
-rw-r--r--arch/um/os-Linux/process.c1
-rw-r--r--arch/um/os-Linux/signal.c158
-rw-r--r--arch/um/os-Linux/skas/Makefile10
-rw-r--r--arch/um/os-Linux/skas/trap.c (renamed from arch/um/kernel/skas/trap_user.c)49
-rw-r--r--arch/um/os-Linux/start_up.c1
-rw-r--r--arch/um/os-Linux/trap.c40
-rw-r--r--arch/um/os-Linux/tt.c15
-rw-r--r--arch/um/sys-i386/signal.c1
-rw-r--r--arch/v850/Kconfig3
-rw-r--r--arch/x86_64/Kconfig5
-rw-r--r--arch/x86_64/boot/compressed/misc.c2
-rw-r--r--arch/x86_64/boot/compressed/miscsetup.h39
-rw-r--r--arch/x86_64/ia32/ia32entry.S1
-rw-r--r--arch/x86_64/ia32/ptrace32.c44
-rw-r--r--arch/x86_64/kernel/init_task.c2
-rw-r--r--arch/x86_64/kernel/time.c2
-rw-r--r--block/ioctl.c22
-rw-r--r--drivers/acorn/block/mfmhd.c36
-rw-r--r--drivers/acpi/osl.c2
-rw-r--r--drivers/atm/nicstar.c5
-rw-r--r--drivers/block/DAC960.c37
-rw-r--r--drivers/block/acsi.c26
-rw-r--r--drivers/block/amiflop.c25
-rw-r--r--drivers/block/aoe/aoeblk.c26
-rw-r--r--drivers/block/ataflop.c2
-rw-r--r--drivers/block/cciss.c124
-rw-r--r--drivers/block/cciss.h10
-rw-r--r--drivers/block/cciss_scsi.c2
-rw-r--r--drivers/block/cpqarray.c40
-rw-r--r--drivers/block/floppy.c44
-rw-r--r--drivers/block/nbd.c1
-rw-r--r--drivers/block/paride/pd.c34
-rw-r--r--drivers/block/paride/pf.c50
-rw-r--r--drivers/block/pktcdvd.c12
-rw-r--r--drivers/block/ps2esdi.c25
-rw-r--r--drivers/block/sx8.c35
-rw-r--r--drivers/block/umem.c41
-rw-r--r--drivers/block/viodasd.c44
-rw-r--r--drivers/block/xd.c31
-rw-r--r--drivers/char/Kconfig12
-rw-r--r--drivers/char/Makefile1
-rw-r--r--drivers/char/agp/sworks-agp.c1
-rw-r--r--drivers/char/hw_random.c1
-rw-r--r--drivers/char/mem.c42
-rw-r--r--drivers/char/sonypi.c381
-rw-r--r--drivers/char/synclink_gt.c4501
-rw-r--r--drivers/char/tpm/Makefile3
-rw-r--r--drivers/char/tpm/tpm.c3
-rw-r--r--drivers/char/tpm/tpm.h15
-rw-r--r--drivers/char/tpm/tpm_bios.c540
-rw-r--r--drivers/char/vr41xx_giu.c4
-rw-r--r--drivers/char/watchdog/wdt977.c216
-rw-r--r--drivers/connector/cn_proc.c4
-rw-r--r--drivers/i2c/chips/tps65010.c11
-rw-r--r--drivers/ide/ide-disk.c12
-rw-r--r--drivers/ide/ide-floppy.c12
-rw-r--r--drivers/ide/ide.c13
-rw-r--r--drivers/ide/legacy/hd.c24
-rw-r--r--drivers/ide/pci/serverworks.c2
-rw-r--r--drivers/infiniband/core/cm.c16
-rw-r--r--drivers/infiniband/core/user_mad.c4
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c21
-rw-r--r--drivers/infiniband/core/verbs.c4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c12
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c23
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mcg.c54
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c265
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c2
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c1
-rw-r--r--drivers/input/keyboard/corgikbd.c6
-rw-r--r--drivers/input/keyboard/spitzkbd.c27
-rw-r--r--drivers/input/misc/hp_sdc_rtc.c9
-rw-r--r--drivers/isdn/act2000/act2000.h6
-rw-r--r--drivers/isdn/act2000/capi.h90
-rw-r--r--drivers/isdn/capi/capifs.c2
-rw-r--r--drivers/isdn/hardware/eicon/os_4bri.c3
-rw-r--r--drivers/isdn/hardware/eicon/os_bri.c1
-rw-r--r--drivers/isdn/hardware/eicon/os_pri.c1
-rw-r--r--drivers/isdn/hisax/Kconfig10
-rw-r--r--drivers/isdn/hisax/hisax.h18
-rw-r--r--drivers/isdn/hisax/hisax_fcpcipnp.h18
-rw-r--r--drivers/isdn/sc/command.c1
-rw-r--r--drivers/macintosh/windfarm_smu_controls.c1
-rw-r--r--drivers/macintosh/windfarm_smu_sensors.c1
-rw-r--r--drivers/md/md.c30
-rw-r--r--drivers/md/raid0.c6
-rw-r--r--drivers/media/video/v4l2-common.c2
-rw-r--r--drivers/message/i2o/i2o_block.c18
-rw-r--r--drivers/mfd/ucb1x00-core.c5
-rw-r--r--drivers/mfd/ucb1x00-ts.c1
-rw-r--r--drivers/mmc/mmc_block.c25
-rw-r--r--drivers/mtd/mtd_blkdevs.c25
-rw-r--r--drivers/mtd/onenand/generic.c1
-rw-r--r--drivers/mtd/rfd_ftl.c1
-rw-r--r--drivers/net/3c527.h50
-rw-r--r--drivers/net/Kconfig2
-rw-r--r--drivers/net/cs89x0.c138
-rw-r--r--drivers/net/cs89x0.h19
-rw-r--r--drivers/net/hamradio/mkiss.c1
-rw-r--r--drivers/net/irda/vlsi_ir.h4
-rw-r--r--drivers/net/smc91x.c5
-rw-r--r--drivers/net/smc91x.h18
-rw-r--r--drivers/net/wan/sdla.c6
-rw-r--r--drivers/oprofile/buffer_sync.c30
-rw-r--r--drivers/oprofile/cpu_buffer.c3
-rw-r--r--drivers/parport/Kconfig2
-rw-r--r--drivers/parport/parport_pc.c6
-rw-r--r--drivers/pci/hotplug/pciehp.h1
-rw-r--r--drivers/pci/hotplug/pciehp_hpc.c3
-rw-r--r--drivers/rapidio/rio-scan.c2
-rw-r--r--drivers/rapidio/rio-sysfs.c1
-rw-r--r--drivers/rapidio/rio.c1
-rw-r--r--drivers/s390/block/dasd.c23
-rw-r--r--drivers/s390/block/dasd_ioctl.c28
-rw-r--r--drivers/s390/block/xpram.c18
-rw-r--r--drivers/scsi/sd.c21
-rw-r--r--drivers/usb/core/inode.c6
-rw-r--r--drivers/usb/host/ohci-au1xxx.c1
-rw-r--r--drivers/usb/host/ohci-lh7a404.c1
-rw-r--r--drivers/usb/host/ohci-ppc-soc.c1
-rw-r--r--drivers/video/console/Kconfig2
-rw-r--r--drivers/video/console/vgacon.c36
-rw-r--r--fs/9p/9p.c314
-rw-r--r--fs/9p/9p.h77
-rw-r--r--fs/9p/Makefile10
-rw-r--r--fs/9p/conv.c884
-rw-r--r--fs/9p/conv.h35
-rw-r--r--fs/9p/debug.h23
-rw-r--r--fs/9p/error.c10
-rw-r--r--fs/9p/error.h2
-rw-r--r--fs/9p/fid.c5
-rw-r--r--fs/9p/mux.c1145
-rw-r--r--fs/9p/mux.h41
-rw-r--r--fs/9p/trans_fd.c53
-rw-r--r--fs/9p/trans_sock.c160
-rw-r--r--fs/9p/transport.h4
-rw-r--r--fs/9p/v9fs.c59
-rw-r--r--fs/9p/v9fs.h17
-rw-r--r--fs/9p/v9fs_vfs.h5
-rw-r--r--fs/9p/vfs_dentry.c15
-rw-r--r--fs/9p/vfs_dir.c47
-rw-r--r--fs/9p/vfs_file.c28
-rw-r--r--fs/9p/vfs_inode.c617
-rw-r--r--fs/9p/vfs_super.c12
-rw-r--r--fs/Kconfig.binfmt2
-rw-r--r--fs/Makefile2
-rw-r--r--fs/afs/dir.c2
-rw-r--r--fs/afs/volume.h4
-rw-r--r--fs/aio.c3
-rw-r--r--fs/attr.c24
-rw-r--r--fs/autofs4/autofs_i.h2
-rw-r--r--fs/autofs4/expire.c12
-rw-r--r--fs/autofs4/inode.c4
-rw-r--r--fs/autofs4/root.c3
-rw-r--r--fs/binfmt_elf.c16
-rw-r--r--fs/buffer.c76
-rw-r--r--fs/cifs/file.c6
-rw-r--r--fs/cifs/inode.c3
-rw-r--r--fs/coda/cache.c2
-rw-r--r--fs/compat.c22
-rw-r--r--fs/dcache.c34
-rw-r--r--fs/drop_caches.c68
-rw-r--r--fs/exec.c8
-rw-r--r--fs/ext3/ialloc.c6
-rw-r--r--fs/ext3/namei.c2
-rw-r--r--fs/ext3/resize.c32
-rw-r--r--fs/ext3/super.c54
-rw-r--r--fs/fat/cache.c14
-rw-r--r--fs/fat/dir.c24
-rw-r--r--fs/fat/fatent.c10
-rw-r--r--fs/fat/file.c33
-rw-r--r--fs/fat/inode.c119
-rw-r--r--fs/fat/misc.c8
-rw-r--r--fs/fcntl.c6
-rw-r--r--fs/file_table.c8
-rw-r--r--fs/freevxfs/vxfs_immed.c4
-rw-r--r--fs/inode.c4
-rw-r--r--fs/jffs/inode-v23.c4
-rw-r--r--fs/jfs/jfs_dmap.c3
-rw-r--r--fs/jfs/jfs_imap.c6
-rw-r--r--fs/jfs/jfs_txnmgr.c6
-rw-r--r--fs/jfs/jfs_umount.c6
-rw-r--r--fs/jfs/resize.c3
-rw-r--r--fs/jfs/super.c3
-rw-r--r--fs/libfs.c12
-rw-r--r--fs/locks.c7
-rw-r--r--fs/mpage.c4
-rw-r--r--fs/namei.c2
-rw-r--r--fs/namespace.c6
-rw-r--r--fs/ncpfs/dir.c2
-rw-r--r--fs/ncpfs/ncplib_kernel.h4
-rw-r--r--fs/nfs/inode.c8
-rw-r--r--fs/nfs/nfsroot.c4
-rw-r--r--fs/ocfs2/cluster/masklog.h7
-rw-r--r--fs/open.c15
-rw-r--r--fs/pnode.c2
-rw-r--r--fs/proc/generic.c2
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/internal.h4
-rw-r--r--fs/proc/proc_misc.c4
-rw-r--r--fs/proc/root.c3
-rw-r--r--fs/proc/task_mmu.c127
-rw-r--r--fs/relayfs/buffers.c3
-rw-r--r--fs/relayfs/inode.c214
-rw-r--r--fs/relayfs/relay.c69
-rw-r--r--fs/relayfs/relay.h4
-rw-r--r--fs/romfs/inode.c6
-rw-r--r--fs/smbfs/cache.c4
-rw-r--r--fs/smbfs/file.c7
-rw-r--r--fs/smbfs/inode.c3
-rw-r--r--fs/smbfs/proc.c2
-rw-r--r--fs/super.c3
-rw-r--r--fs/sysv/dir.c4
-rw-r--r--fs/udf/balloc.c2
-rw-r--r--fs/udf/inode.c5
-rw-r--r--fs/ufs/super.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c3
-rw-r--r--fs/xfs/xfs_log.h8
-rw-r--r--include/asm-alpha/cache.h1
-rw-r--r--include/asm-alpha/compiler.h2
-rw-r--r--include/asm-alpha/futex.h49
-rw-r--r--include/asm-alpha/processor.h21
-rw-r--r--include/asm-arm/cache.h5
-rw-r--r--include/asm-arm/futex.h49
-rw-r--r--include/asm-arm/irq.h12
-rw-r--r--include/asm-arm26/futex.h49
-rw-r--r--include/asm-cris/arch-v10/cache.h1
-rw-r--r--include/asm-cris/arch-v32/cache.h1
-rw-r--r--include/asm-cris/dma-mapping.h2
-rw-r--r--include/asm-cris/futex.h49
-rw-r--r--include/asm-frv/atomic.h96
-rw-r--r--include/asm-frv/bug.h1
-rw-r--r--include/asm-frv/dma-mapping.h2
-rw-r--r--include/asm-frv/io.h123
-rw-r--r--include/asm-frv/mb-regs.h4
-rw-r--r--include/asm-frv/mc146818rtc.h16
-rw-r--r--include/asm-frv/module.h16
-rw-r--r--include/asm-frv/pci.h8
-rw-r--r--include/asm-frv/pgtable.h5
-rw-r--r--include/asm-frv/types.h1
-rw-r--r--include/asm-frv/uaccess.h8
-rw-r--r--include/asm-frv/unistd.h2
-rw-r--r--include/asm-frv/vga.h17
-rw-r--r--include/asm-frv/xor.h1
-rw-r--r--include/asm-generic/atomic.h1
-rw-r--r--include/asm-generic/dma-mapping.h2
-rw-r--r--include/asm-generic/futex.h53
-rw-r--r--include/asm-h8300/futex.h49
-rw-r--r--include/asm-i386/cache.h2
-rw-r--r--include/asm-i386/dma-mapping.h2
-rw-r--r--include/asm-i386/irq.h2
-rw-r--r--include/asm-i386/ptrace.h3
-rw-r--r--include/asm-i386/unistd.h3
-rw-r--r--include/asm-i386/vm86.h20
-rw-r--r--include/asm-ia64/bug.h6
-rw-r--r--include/asm-ia64/cache.h2
-rw-r--r--include/asm-ia64/futex.h49
-rw-r--r--include/asm-ia64/io.h1
-rw-r--r--include/asm-ia64/spinlock.h2
-rw-r--r--include/asm-ia64/unistd.h3
-rw-r--r--include/asm-m32r/cache.h2
-rw-r--r--include/asm-m32r/futex.h49
-rw-r--r--include/asm-m68k/cache.h2
-rw-r--r--include/asm-m68k/futex.h49
-rw-r--r--include/asm-m68knommu/futex.h49
-rw-r--r--include/asm-mips/cache.h1
-rw-r--r--include/asm-parisc/cache.h1
-rw-r--r--include/asm-parisc/futex.h49
-rw-r--r--include/asm-powerpc/cache.h1
-rw-r--r--include/asm-powerpc/dma-mapping.h2
-rw-r--r--include/asm-s390/cache.h1
-rw-r--r--include/asm-s390/futex.h49
-rw-r--r--include/asm-sh/cache.h2
-rw-r--r--include/asm-sh/futex.h49
-rw-r--r--include/asm-sh64/cache.h2
-rw-r--r--include/asm-sh64/futex.h49
-rw-r--r--include/asm-sparc/cache.h1
-rw-r--r--include/asm-sparc/futex.h49
-rw-r--r--include/asm-sparc64/cache.h1
-rw-r--r--include/asm-sparc64/futex.h49
-rw-r--r--include/asm-sparc64/system.h4
-rw-r--r--include/asm-um/cache.h3
-rw-r--r--include/asm-um/futex.h12
-rw-r--r--include/asm-um/rwsem.h4
-rw-r--r--include/asm-v850/cache.h2
-rw-r--r--include/asm-v850/futex.h49
-rw-r--r--include/asm-v850/unistd.h18
-rw-r--r--include/asm-x86_64/cache.h1
-rw-r--r--include/asm-x86_64/ia32_unistd.h3
-rw-r--r--include/asm-x86_64/unistd.h4
-rw-r--r--include/linux/aio.h12
-rw-r--r--include/linux/atalk.h18
-rw-r--r--include/linux/buffer_head.h3
-rw-r--r--include/linux/byteorder/generic.h2
-rw-r--r--include/linux/byteorder/swab.h2
-rw-r--r--include/linux/byteorder/swabb.h2
-rw-r--r--include/linux/cache.h17
-rw-r--r--include/linux/compiler-gcc.h9
-rw-r--r--include/linux/compiler-gcc2.h29
-rw-r--r--include/linux/compiler-gcc3.h17
-rw-r--r--include/linux/compiler-gcc4.h7
-rw-r--r--include/linux/compiler.h2
-rw-r--r--include/linux/cpuset.h40
-rw-r--r--include/linux/cycx_x25.h66
-rw-r--r--include/linux/dcache.h9
-rw-r--r--include/linux/elf.h2
-rw-r--r--include/linux/fs.h11
-rw-r--r--include/linux/ide.h2
-rw-r--r--include/linux/if_frad.h12
-rw-r--r--include/linux/interrupt.h2
-rw-r--r--include/linux/ipv6.h4
-rw-r--r--include/linux/isdnif.h70
-rw-r--r--include/linux/kernel.h2
-rw-r--r--include/linux/key.h12
-rw-r--r--include/linux/keyctl.h3
-rw-r--r--include/linux/memory.h8
-rw-r--r--include/linux/mempolicy.h46
-rw-r--r--include/linux/mm.h50
-rw-r--r--include/linux/mm_inline.h22
-rw-r--r--include/linux/mmzone.h6
-rw-r--r--include/linux/mount.h3
-rw-r--r--include/linux/msdos_fs.h3
-rw-r--r--include/linux/ncp.h126
-rw-r--r--include/linux/netfilter.h92
-rw-r--r--include/linux/netfilter_ipv4/ipt_policy.h52
-rw-r--r--include/linux/netfilter_ipv6/ip6t_policy.h52
-rw-r--r--include/linux/pagevec.h5
-rw-r--r--include/linux/parport.h4
-rw-r--r--include/linux/percpu.h8
-rw-r--r--include/linux/ptrace.h4
-rw-r--r--include/linux/radix-tree.h1
-rw-r--r--include/linux/rcupdate.h2
-rw-r--r--include/linux/rcuref.h220
-rw-r--r--include/linux/relayfs_fs.h65
-rw-r--r--include/linux/rio_drv.h1
-rw-r--r--include/linux/rtc.h3
-rw-r--r--include/linux/sched.h22
-rw-r--r--include/linux/screen_info.h77
-rw-r--r--include/linux/sdla.h64
-rw-r--r--include/linux/seccomp.h6
-rw-r--r--include/linux/signal.h30
-rw-r--r--include/linux/skbuff.h2
-rw-r--r--include/linux/slab.h35
-rw-r--r--include/linux/spinlock_types_up.h14
-rw-r--r--include/linux/swap.h9
-rw-r--r--include/linux/synclink.h9
-rw-r--r--include/linux/syscalls.h2
-rw-r--r--include/linux/sysctl.h2
-rw-r--r--include/linux/tty.h72
-rw-r--r--include/linux/wavefront.h36
-rw-r--r--include/linux/workqueue.h1
-rw-r--r--include/linux/writeback.h4
-rw-r--r--include/net/dn_dev.h84
-rw-r--r--include/net/dn_nsp.h74
-rw-r--r--include/net/dst.h11
-rw-r--r--include/net/ip.h10
-rw-r--r--include/net/ipv6.h2
-rw-r--r--include/net/protocol.h2
-rw-r--r--include/net/xfrm.h9
-rw-r--r--include/sound/wavefront.h36
-rw-r--r--init/Kconfig47
-rw-r--r--init/main.c8
-rw-r--r--ipc/shm.c22
-rw-r--r--kernel/audit.c2
-rw-r--r--kernel/cpuset.c537
-rw-r--r--kernel/exit.c4
-rw-r--r--kernel/fork.c33
-rw-r--r--kernel/irq/proc.c2
-rw-r--r--kernel/module.c56
-rw-r--r--kernel/pid.c22
-rw-r--r--kernel/printk.c6
-rw-r--r--kernel/ptrace.c77
-rw-r--r--kernel/rcupdate.c19
-rw-r--r--kernel/rcutorture.c99
-rw-r--r--kernel/sched.c7
-rw-r--r--kernel/signal.c137
-rw-r--r--kernel/sys.c62
-rw-r--r--kernel/sys_ni.c22
-rw-r--r--kernel/sysctl.c22
-rw-r--r--kernel/timer.c1
-rw-r--r--kernel/workqueue.c40
-rw-r--r--lib/Kconfig.debug2
-rw-r--r--lib/bitmap.c89
-rw-r--r--lib/dec_and_lock.c49
-rw-r--r--lib/find_next_bit.c3
-rw-r--r--lib/radix-tree.c143
-rw-r--r--mm/Kconfig7
-rw-r--r--mm/Makefile6
-rw-r--r--mm/fadvise.c5
-rw-r--r--mm/filemap.c48
-rw-r--r--mm/hugetlb.c4
-rw-r--r--mm/mempolicy.c561
-rw-r--r--mm/oom_kill.c5
-rw-r--r--mm/page_alloc.c129
-rw-r--r--mm/pdflush.c2
-rw-r--r--mm/rmap.c7
-rw-r--r--mm/slab.c1139
-rw-r--r--mm/slob.c385
-rw-r--r--mm/sparse.c4
-rw-r--r--mm/swap_state.c4
-rw-r--r--mm/swapfile.c12
-rw-r--r--mm/truncate.c1
-rw-r--r--mm/util.c39
-rw-r--r--mm/vmscan.c343
-rw-r--r--net/802/Makefile4
-rw-r--r--net/dccp/ipv4.c1
-rw-r--r--net/dccp/ipv6.c3
-rw-r--r--net/ipv4/ip_gre.c1
-rw-r--r--net/ipv4/ip_input.c15
-rw-r--r--net/ipv4/ip_output.c10
-rw-r--r--net/ipv4/ipip.c1
-rw-r--r--net/ipv4/netfilter.c15
-rw-r--r--net/ipv4/netfilter/Kconfig10
-rw-r--r--net/ipv4/netfilter/Makefile1
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_sctp.c1
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c109
-rw-r--r--net/ipv4/netfilter/ipt_policy.c170
-rw-r--r--net/ipv4/raw.c1
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv4/xfrm4_input.c31
-rw-r--r--net/ipv4/xfrm4_output.c72
-rw-r--r--net/ipv6/addrconf.c2
-rw-r--r--net/ipv6/af_inet6.c4
-rw-r--r--net/ipv6/exthdrs.c19
-rw-r--r--net/ipv6/icmp.c4
-rw-r--r--net/ipv6/inet6_connection_sock.c1
-rw-r--r--net/ipv6/ip6_input.c23
-rw-r--r--net/ipv6/ip6_tunnel.c2
-rw-r--r--net/ipv6/netfilter.c9
-rw-r--r--net/ipv6/netfilter/Kconfig10
-rw-r--r--net/ipv6/netfilter/Makefile1
-rw-r--r--net/ipv6/netfilter/ip6t_policy.c175
-rw-r--r--net/ipv6/reassembly.c11
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/ipv6/tcp_ipv6.c2
-rw-r--r--net/ipv6/udp.c2
-rw-r--r--net/ipv6/xfrm6_input.c21
-rw-r--r--net/ipv6/xfrm6_output.c76
-rw-r--r--net/ipv6/xfrm6_tunnel.c6
-rw-r--r--net/sctp/input.c1
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sunrpc/rpc_pipe.c2
-rw-r--r--net/xfrm/xfrm_policy.c11
-rw-r--r--scripts/bloat-o-meter58
-rw-r--r--scripts/kconfig/conf.c18
-rw-r--r--scripts/kconfig/qconf.h6
-rw-r--r--security/keys/compat.c6
-rw-r--r--security/keys/internal.h5
-rw-r--r--security/keys/keyctl.c151
-rw-r--r--security/keys/keyring.c132
-rw-r--r--security/keys/permission.c32
-rw-r--r--security/keys/process_keys.c71
-rw-r--r--security/keys/request_key.c108
-rw-r--r--security/keys/request_key_auth.c192
-rw-r--r--security/selinux/hooks.c2
-rw-r--r--security/selinux/selinuxfs.c2
-rw-r--r--security/selinux/xfrm.c2
-rw-r--r--sound/isa/wavefront/wavefront_synth.c7
-rw-r--r--sound/oss/i810_audio.c18
590 files changed, 16671 insertions, 7654 deletions
diff --git a/CREDITS b/CREDITS
index 521f00d1b549..8e577ce4abeb 100644
--- a/CREDITS
+++ b/CREDITS
@@ -3203,7 +3203,7 @@ N: Eugene Surovegin
3203E: ebs@ebshome.net 3203E: ebs@ebshome.net
3204W: http://kernel.ebshome.net/ 3204W: http://kernel.ebshome.net/
3205P: 1024D/AE5467F1 FF22 39F1 6728 89F6 6E6C 2365 7602 F33D AE54 67F1 3205P: 1024D/AE5467F1 FF22 39F1 6728 89F6 6E6C 2365 7602 F33D AE54 67F1
3206D: Embedded PowerPC 4xx: I2C, PIC and random hacks/fixes 3206D: Embedded PowerPC 4xx: EMAC, I2C, PIC and random hacks/fixes
3207S: Sunnyvale, California 94085 3207S: Sunnyvale, California 94085
3208S: USA 3208S: USA
3209 3209
diff --git a/Documentation/Changes b/Documentation/Changes
index 86b86399d61d..fe5ae0f55020 100644
--- a/Documentation/Changes
+++ b/Documentation/Changes
@@ -31,8 +31,6 @@ al español de este documento en varios formatos.
31Eine deutsche Version dieser Datei finden Sie unter 31Eine deutsche Version dieser Datei finden Sie unter
32<http://www.stefan-winter.de/Changes-2.4.0.txt>. 32<http://www.stefan-winter.de/Changes-2.4.0.txt>.
33 33
34Last updated: October 29th, 2002
35
36Chris Ricker (kaboom@gatech.edu or chris.ricker@genetics.utah.edu). 34Chris Ricker (kaboom@gatech.edu or chris.ricker@genetics.utah.edu).
37 35
38Current Minimal Requirements 36Current Minimal Requirements
@@ -48,7 +46,7 @@ necessary on all systems; obviously, if you don't have any ISDN
48hardware, for example, you probably needn't concern yourself with 46hardware, for example, you probably needn't concern yourself with
49isdn4k-utils. 47isdn4k-utils.
50 48
51o Gnu C 2.95.3 # gcc --version 49o Gnu C 3.2 # gcc --version
52o Gnu make 3.79.1 # make --version 50o Gnu make 3.79.1 # make --version
53o binutils 2.12 # ld -v 51o binutils 2.12 # ld -v
54o util-linux 2.10o # fdformat --version 52o util-linux 2.10o # fdformat --version
@@ -74,26 +72,7 @@ GCC
74--- 72---
75 73
76The gcc version requirements may vary depending on the type of CPU in your 74The gcc version requirements may vary depending on the type of CPU in your
77computer. The next paragraph applies to users of x86 CPUs, but not 75computer.
78necessarily to users of other CPUs. Users of other CPUs should obtain
79information about their gcc version requirements from another source.
80
81The recommended compiler for the kernel is gcc 2.95.x (x >= 3), and it
82should be used when you need absolute stability. You may use gcc 3.0.x
83instead if you wish, although it may cause problems. Later versions of gcc
84have not received much testing for Linux kernel compilation, and there are
85almost certainly bugs (mainly, but not exclusively, in the kernel) that
86will need to be fixed in order to use these compilers. In any case, using
87pgcc instead of plain gcc is just asking for trouble.
88
89The Red Hat gcc 2.96 compiler subtree can also be used to build this tree.
90You should ensure you use gcc-2.96-74 or later. gcc-2.96-54 will not build
91the kernel correctly.
92
93In addition, please pay attention to compiler optimization. Anything
94greater than -O2 may not be wise. Similarly, if you choose to use gcc-2.95.x
95or derivatives, be sure not to use -fstrict-aliasing (which, depending on
96your version of gcc 2.95.x, may necessitate using -fno-strict-aliasing).
97 76
98Make 77Make
99---- 78----
@@ -322,9 +301,9 @@ Getting updated software
322Kernel compilation 301Kernel compilation
323****************** 302******************
324 303
325gcc 2.95.3 304gcc
326---------- 305---
327o <ftp://ftp.gnu.org/gnu/gcc/gcc-2.95.3.tar.gz> 306o <ftp://ftp.gnu.org/gnu/gcc/>
328 307
329Make 308Make
330---- 309----
diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle
index eb7db3c19227..ce780ef648f1 100644
--- a/Documentation/CodingStyle
+++ b/Documentation/CodingStyle
@@ -344,7 +344,7 @@ Remember: if another thread can find your data structure, and you don't
344have a reference count on it, you almost certainly have a bug. 344have a reference count on it, you almost certainly have a bug.
345 345
346 346
347 Chapter 11: Macros, Enums, Inline functions and RTL 347 Chapter 11: Macros, Enums and RTL
348 348
349Names of macros defining constants and labels in enums are capitalized. 349Names of macros defining constants and labels in enums are capitalized.
350 350
@@ -429,7 +429,35 @@ from void pointer to any other pointer type is guaranteed by the C programming
429language. 429language.
430 430
431 431
432 Chapter 14: References 432 Chapter 14: The inline disease
433
434There appears to be a common misperception that gcc has a magic "make me
435faster" speedup option called "inline". While the use of inlines can be
436appropriate (for example as a means of replacing macros, see Chapter 11), it
437very often is not. Abundant use of the inline keyword leads to a much bigger
438kernel, which in turn slows the system as a whole down, due to a bigger
439icache footprint for the CPU and simply because there is less memory
440available for the pagecache. Just think about it; a pagecache miss causes a
441disk seek, which easily takes 5 miliseconds. There are a LOT of cpu cycles
442that can go into these 5 miliseconds.
443
444A reasonable rule of thumb is to not put inline at functions that have more
445than 3 lines of code in them. An exception to this rule are the cases where
446a parameter is known to be a compiletime constant, and as a result of this
447constantness you *know* the compiler will be able to optimize most of your
448function away at compile time. For a good example of this later case, see
449the kmalloc() inline function.
450
451Often people argue that adding inline to functions that are static and used
452only once is always a win since there is no space tradeoff. While this is
453technically correct, gcc is capable of inlining these automatically without
454help, and the maintenance issue of removing the inline when a second user
455appears outweighs the potential value of the hint that tells gcc to do
456something it would have done anyway.
457
458
459
460 Chapter 15: References
433 461
434The C Programming Language, Second Edition 462The C Programming Language, Second Edition
435by Brian W. Kernighan and Dennis M. Ritchie. 463by Brian W. Kernighan and Dennis M. Ritchie.
@@ -444,10 +472,13 @@ ISBN 0-201-61586-X.
444URL: http://cm.bell-labs.com/cm/cs/tpop/ 472URL: http://cm.bell-labs.com/cm/cs/tpop/
445 473
446GNU manuals - where in compliance with K&R and this text - for cpp, gcc, 474GNU manuals - where in compliance with K&R and this text - for cpp, gcc,
447gcc internals and indent, all available from http://www.gnu.org 475gcc internals and indent, all available from http://www.gnu.org/manual/
448 476
449WG14 is the international standardization working group for the programming 477WG14 is the international standardization working group for the programming
450language C, URL: http://std.dkuug.dk/JTC1/SC22/WG14/ 478language C, URL: http://www.open-std.org/JTC1/SC22/WG14/
479
480Kernel CodingStyle, by greg@kroah.com at OLS 2002:
481http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/
451 482
452-- 483--
453Last updated on 16 February 2004 by a community effort on LKML. 484Last updated on 30 December 2005 by a community effort on LKML.
diff --git a/Documentation/RCU/rcuref.txt b/Documentation/RCU/rcuref.txt
index a23fee66064d..3f60db41b2f0 100644
--- a/Documentation/RCU/rcuref.txt
+++ b/Documentation/RCU/rcuref.txt
@@ -1,74 +1,67 @@
1Refcounter framework for elements of lists/arrays protected by 1Refcounter design for elements of lists/arrays protected by RCU.
2RCU.
3 2
4Refcounting on elements of lists which are protected by traditional 3Refcounting on elements of lists which are protected by traditional
5reader/writer spinlocks or semaphores are straight forward as in: 4reader/writer spinlocks or semaphores are straight forward as in:
6 5
71. 2. 61. 2.
8add() search_and_reference() 7add() search_and_reference()
9{ { 8{ {
10 alloc_object read_lock(&list_lock); 9 alloc_object read_lock(&list_lock);
11 ... search_for_element 10 ... search_for_element
12 atomic_set(&el->rc, 1); atomic_inc(&el->rc); 11 atomic_set(&el->rc, 1); atomic_inc(&el->rc);
13 write_lock(&list_lock); ... 12 write_lock(&list_lock); ...
14 add_element read_unlock(&list_lock); 13 add_element read_unlock(&list_lock);
15 ... ... 14 ... ...
16 write_unlock(&list_lock); } 15 write_unlock(&list_lock); }
17} 16}
18 17
193. 4. 183. 4.
20release_referenced() delete() 19release_referenced() delete()
21{ { 20{ {
22 ... write_lock(&list_lock); 21 ... write_lock(&list_lock);
23 atomic_dec(&el->rc, relfunc) ... 22 atomic_dec(&el->rc, relfunc) ...
24 ... delete_element 23 ... delete_element
25} write_unlock(&list_lock); 24} write_unlock(&list_lock);
26 ... 25 ...
27 if (atomic_dec_and_test(&el->rc)) 26 if (atomic_dec_and_test(&el->rc))
28 kfree(el); 27 kfree(el);
29 ... 28 ...
30 } 29 }
31 30
32If this list/array is made lock free using rcu as in changing the 31If this list/array is made lock free using rcu as in changing the
33write_lock in add() and delete() to spin_lock and changing read_lock 32write_lock in add() and delete() to spin_lock and changing read_lock
34in search_and_reference to rcu_read_lock(), the rcuref_get in 33in search_and_reference to rcu_read_lock(), the atomic_get in
35search_and_reference could potentially hold reference to an element which 34search_and_reference could potentially hold reference to an element which
36has already been deleted from the list/array. rcuref_lf_get_rcu takes 35has already been deleted from the list/array. atomic_inc_not_zero takes
37care of this scenario. search_and_reference should look as; 36care of this scenario. search_and_reference should look as;
38 37
391. 2. 381. 2.
40add() search_and_reference() 39add() search_and_reference()
41{ { 40{ {
42 alloc_object rcu_read_lock(); 41 alloc_object rcu_read_lock();
43 ... search_for_element 42 ... search_for_element
44 atomic_set(&el->rc, 1); if (rcuref_inc_lf(&el->rc)) { 43 atomic_set(&el->rc, 1); if (atomic_inc_not_zero(&el->rc)) {
45 write_lock(&list_lock); rcu_read_unlock(); 44 write_lock(&list_lock); rcu_read_unlock();
46 return FAIL; 45 return FAIL;
47 add_element } 46 add_element }
48 ... ... 47 ... ...
49 write_unlock(&list_lock); rcu_read_unlock(); 48 write_unlock(&list_lock); rcu_read_unlock();
50} } 49} }
513. 4. 503. 4.
52release_referenced() delete() 51release_referenced() delete()
53{ { 52{ {
54 ... write_lock(&list_lock); 53 ... write_lock(&list_lock);
55 rcuref_dec(&el->rc, relfunc) ... 54 atomic_dec(&el->rc, relfunc) ...
56 ... delete_element 55 ... delete_element
57} write_unlock(&list_lock); 56} write_unlock(&list_lock);
58 ... 57 ...
59 if (rcuref_dec_and_test(&el->rc)) 58 if (atomic_dec_and_test(&el->rc))
60 call_rcu(&el->head, el_free); 59 call_rcu(&el->head, el_free);
61 ... 60 ...
62 } 61 }
63 62
64Sometimes, reference to the element need to be obtained in the 63Sometimes, reference to the element need to be obtained in the
65update (write) stream. In such cases, rcuref_inc_lf might be an overkill 64update (write) stream. In such cases, atomic_inc_not_zero might be an
66since the spinlock serialising list updates are held. rcuref_inc 65overkill since the spinlock serialising list updates are held. atomic_inc
67is to be used in such cases. 66is to be used in such cases.
68For arches which do not have cmpxchg rcuref_inc_lf 67
69api uses a hashed spinlock implementation and the same hashed spinlock
70is acquired in all rcuref_xxx primitives to preserve atomicity.
71Note: Use rcuref_inc api only if you need to use rcuref_inc_lf on the
72refcounter atleast at one place. Mixing rcuref_inc and atomic_xxx api
73might lead to races. rcuref_inc_lf() must be used in lockfree
74RCU critical sections only.
diff --git a/Documentation/SubmittingDrivers b/Documentation/SubmittingDrivers
index c3cca924e94b..dd311cff1cc3 100644
--- a/Documentation/SubmittingDrivers
+++ b/Documentation/SubmittingDrivers
@@ -27,18 +27,17 @@ Who To Submit Drivers To
27------------------------ 27------------------------
28 28
29Linux 2.0: 29Linux 2.0:
30 No new drivers are accepted for this kernel tree 30 No new drivers are accepted for this kernel tree.
31 31
32Linux 2.2: 32Linux 2.2:
33 No new drivers are accepted for this kernel tree.
34
35Linux 2.4:
33 If the code area has a general maintainer then please submit it to 36 If the code area has a general maintainer then please submit it to
34 the maintainer listed in MAINTAINERS in the kernel file. If the 37 the maintainer listed in MAINTAINERS in the kernel file. If the
35 maintainer does not respond or you cannot find the appropriate 38 maintainer does not respond or you cannot find the appropriate
36 maintainer then please contact the 2.2 kernel maintainer: 39 maintainer then please contact Marcelo Tosatti
37 Marc-Christian Petersen <m.c.p@wolk-project.de>. 40 <marcelo.tosatti@cyclades.com>.
38
39Linux 2.4:
40 The same rules apply as 2.2. The final contact point for Linux 2.4
41 submissions is Marcelo Tosatti <marcelo.tosatti@cyclades.com>.
42 41
43Linux 2.6: 42Linux 2.6:
44 The same rules apply as 2.4 except that you should follow linux-kernel 43 The same rules apply as 2.4 except that you should follow linux-kernel
@@ -53,6 +52,7 @@ Licensing: The code must be released to us under the
53 of exclusive GPL licensing, and if you wish the driver 52 of exclusive GPL licensing, and if you wish the driver
54 to be useful to other communities such as BSD you may well 53 to be useful to other communities such as BSD you may well
55 wish to release under multiple licenses. 54 wish to release under multiple licenses.
55 See accepted licenses at include/linux/module.h
56 56
57Copyright: The copyright owner must agree to use of GPL. 57Copyright: The copyright owner must agree to use of GPL.
58 It's best if the submitter and copyright owner 58 It's best if the submitter and copyright owner
@@ -143,5 +143,13 @@ KernelNewbies:
143 http://kernelnewbies.org/ 143 http://kernelnewbies.org/
144 144
145Linux USB project: 145Linux USB project:
146 http://sourceforge.net/projects/linux-usb/ 146 http://linux-usb.sourceforge.net/
147
148How to NOT write kernel driver by arjanv@redhat.com
149 http://people.redhat.com/arjanv/olspaper.pdf
150
151Kernel Janitor:
152 http://janitor.kernelnewbies.org/
147 153
154--
155Last updated on 17 Nov 2005.
diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches
index 1d47e6c09dc6..6198e5ebcf65 100644
--- a/Documentation/SubmittingPatches
+++ b/Documentation/SubmittingPatches
@@ -78,7 +78,9 @@ Randy Dunlap's patch scripts:
78http://www.xenotime.net/linux/scripts/patching-scripts-002.tar.gz 78http://www.xenotime.net/linux/scripts/patching-scripts-002.tar.gz
79 79
80Andrew Morton's patch scripts: 80Andrew Morton's patch scripts:
81http://www.zip.com.au/~akpm/linux/patches/patch-scripts-0.20 81http://www.zip.com.au/~akpm/linux/patches/
82Instead of these scripts, quilt is the recommended patch management
83tool (see above).
82 84
83 85
84 86
@@ -97,7 +99,7 @@ need to split up your patch. See #3, next.
97 99
983) Separate your changes. 1003) Separate your changes.
99 101
100Separate each logical change into its own patch. 102Separate _logical changes_ into a single patch file.
101 103
102For example, if your changes include both bug fixes and performance 104For example, if your changes include both bug fixes and performance
103enhancements for a single driver, separate those changes into two 105enhancements for a single driver, separate those changes into two
@@ -112,6 +114,10 @@ If one patch depends on another patch in order for a change to be
112complete, that is OK. Simply note "this patch depends on patch X" 114complete, that is OK. Simply note "this patch depends on patch X"
113in your patch description. 115in your patch description.
114 116
117If you cannot condense your patch set into a smaller set of patches,
118then only post say 15 or so at a time and wait for review and integration.
119
120
115 121
1164) Select e-mail destination. 1224) Select e-mail destination.
117 123
@@ -124,6 +130,10 @@ your patch to the primary Linux kernel developer's mailing list,
124linux-kernel@vger.kernel.org. Most kernel developers monitor this 130linux-kernel@vger.kernel.org. Most kernel developers monitor this
125e-mail list, and can comment on your changes. 131e-mail list, and can comment on your changes.
126 132
133
134Do not send more than 15 patches at once to the vger mailing lists!!!
135
136
127Linus Torvalds is the final arbiter of all changes accepted into the 137Linus Torvalds is the final arbiter of all changes accepted into the
128Linux kernel. His e-mail address is <torvalds@osdl.org>. He gets 138Linux kernel. His e-mail address is <torvalds@osdl.org>. He gets
129a lot of e-mail, so typically you should do your best to -avoid- sending 139a lot of e-mail, so typically you should do your best to -avoid- sending
@@ -149,6 +159,9 @@ USB, framebuffer devices, the VFS, the SCSI subsystem, etc. See the
149MAINTAINERS file for a mailing list that relates specifically to 159MAINTAINERS file for a mailing list that relates specifically to
150your change. 160your change.
151 161
162Majordomo lists of VGER.KERNEL.ORG at:
163 <http://vger.kernel.org/vger-lists.html>
164
152If changes affect userland-kernel interfaces, please send 165If changes affect userland-kernel interfaces, please send
153the MAN-PAGES maintainer (as listed in the MAINTAINERS file) 166the MAN-PAGES maintainer (as listed in the MAINTAINERS file)
154a man-pages patch, or at least a notification of the change, 167a man-pages patch, or at least a notification of the change,
@@ -373,27 +386,14 @@ a diffstat, to show what files have changed, and the number of inserted
373and deleted lines per file. A diffstat is especially useful on bigger 386and deleted lines per file. A diffstat is especially useful on bigger
374patches. Other comments relevant only to the moment or the maintainer, 387patches. Other comments relevant only to the moment or the maintainer,
375not suitable for the permanent changelog, should also go here. 388not suitable for the permanent changelog, should also go here.
389Use diffstat options "-p 1 -w 70" so that filenames are listed from the
390top of the kernel source tree and don't use too much horizontal space
391(easily fit in 80 columns, maybe with some indentation).
376 392
377See more details on the proper patch format in the following 393See more details on the proper patch format in the following
378references. 394references.
379 395
380 396
38113) More references for submitting patches
382
383Andrew Morton, "The perfect patch" (tpp).
384 <http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt>
385
386Jeff Garzik, "Linux kernel patch submission format."
387 <http://linux.yyz.us/patch-format.html>
388
389Greg KH, "How to piss off a kernel subsystem maintainer"
390 <http://www.kroah.com/log/2005/03/31/>
391
392Kernel Documentation/CodingStyle
393 <http://sosdg.org/~coywolf/lxr/source/Documentation/CodingStyle>
394
395Linus Torvald's mail on the canonical patch format:
396 <http://lkml.org/lkml/2005/4/7/183>
397 397
398 398
399----------------------------------- 399-----------------------------------
@@ -466,3 +466,30 @@ and 'extern __inline__'.
466Don't try to anticipate nebulous future cases which may or may not 466Don't try to anticipate nebulous future cases which may or may not
467be useful: "Make it as simple as you can, and no simpler." 467be useful: "Make it as simple as you can, and no simpler."
468 468
469
470
471----------------------
472SECTION 3 - REFERENCES
473----------------------
474
475Andrew Morton, "The perfect patch" (tpp).
476 <http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt>
477
478Jeff Garzik, "Linux kernel patch submission format."
479 <http://linux.yyz.us/patch-format.html>
480
481Greg Kroah, "How to piss off a kernel subsystem maintainer".
482 <http://www.kroah.com/log/2005/03/31/>
483 <http://www.kroah.com/log/2005/07/08/>
484 <http://www.kroah.com/log/2005/10/19/>
485
486NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people!.
487 <http://marc.theaimsgroup.com/?l=linux-kernel&m=112112749912944&w=2>
488
489Kernel Documentation/CodingStyle
490 <http://sosdg.org/~coywolf/lxr/source/Documentation/CodingStyle>
491
492Linus Torvald's mail on the canonical patch format:
493 <http://lkml.org/lkml/2005/4/7/183>
494--
495Last updated on 17 Nov 2005.
diff --git a/Documentation/applying-patches.txt b/Documentation/applying-patches.txt
index 681e426e2482..05a08c2c1889 100644
--- a/Documentation/applying-patches.txt
+++ b/Documentation/applying-patches.txt
@@ -2,7 +2,8 @@
2 Applying Patches To The Linux Kernel 2 Applying Patches To The Linux Kernel
3 ------------------------------------ 3 ------------------------------------
4 4
5 (Written by Jesper Juhl, August 2005) 5 Original by: Jesper Juhl, August 2005
6 Last update: 2005-12-02
6 7
7 8
8 9
@@ -118,7 +119,7 @@ wrong.
118 119
119When patch encounters a change that it can't fix up with fuzz it rejects it 120When patch encounters a change that it can't fix up with fuzz it rejects it
120outright and leaves a file with a .rej extension (a reject file). You can 121outright and leaves a file with a .rej extension (a reject file). You can
121read this file to see exactely what change couldn't be applied, so you can 122read this file to see exactly what change couldn't be applied, so you can
122go fix it up by hand if you wish. 123go fix it up by hand if you wish.
123 124
124If you don't have any third party patches applied to your kernel source, but 125If you don't have any third party patches applied to your kernel source, but
@@ -127,7 +128,7 @@ and have made no modifications yourself to the source files, then you should
127never see a fuzz or reject message from patch. If you do see such messages 128never see a fuzz or reject message from patch. If you do see such messages
128anyway, then there's a high risk that either your local source tree or the 129anyway, then there's a high risk that either your local source tree or the
129patch file is corrupted in some way. In that case you should probably try 130patch file is corrupted in some way. In that case you should probably try
130redownloading the patch and if things are still not OK then you'd be advised 131re-downloading the patch and if things are still not OK then you'd be advised
131to start with a fresh tree downloaded in full from kernel.org. 132to start with a fresh tree downloaded in full from kernel.org.
132 133
133Let's look a bit more at some of the messages patch can produce. 134Let's look a bit more at some of the messages patch can produce.
@@ -180,9 +181,11 @@ wish to apply.
180 181
181Are there any alternatives to `patch'? 182Are there any alternatives to `patch'?
182--- 183---
183 Yes there are alternatives. You can use the `interdiff' program 184 Yes there are alternatives.
184(http://cyberelk.net/tim/patchutils/) to generate a patch representing the 185
185differences between two patches and then apply the result. 186 You can use the `interdiff' program (http://cyberelk.net/tim/patchutils/) to
187generate a patch representing the differences between two patches and then
188apply the result.
186This will let you move from something like 2.6.12.2 to 2.6.12.3 in a single 189This will let you move from something like 2.6.12.2 to 2.6.12.3 in a single
187step. The -z flag to interdiff will even let you feed it patches in gzip or 190step. The -z flag to interdiff will even let you feed it patches in gzip or
188bzip2 compressed form directly without the use of zcat or bzcat or manual 191bzip2 compressed form directly without the use of zcat or bzcat or manual
@@ -197,7 +200,7 @@ do the additional steps since interdiff can get things wrong in some cases.
197 Another alternative is `ketchup', which is a python script for automatic 200 Another alternative is `ketchup', which is a python script for automatic
198downloading and applying of patches (http://www.selenic.com/ketchup/). 201downloading and applying of patches (http://www.selenic.com/ketchup/).
199 202
200Other nice tools are diffstat which shows a summary of changes made by a 203 Other nice tools are diffstat which shows a summary of changes made by a
201patch, lsdiff which displays a short listing of affected files in a patch 204patch, lsdiff which displays a short listing of affected files in a patch
202file, along with (optionally) the line numbers of the start of each patch 205file, along with (optionally) the line numbers of the start of each patch
203and grepdiff which displays a list of the files modified by a patch where 206and grepdiff which displays a list of the files modified by a patch where
@@ -258,7 +261,7 @@ $ patch -p1 -R < ../patch-2.6.11.1 # revert the 2.6.11.1 patch
258 # source dir is now 2.6.11 261 # source dir is now 2.6.11
259$ patch -p1 < ../patch-2.6.12 # apply new 2.6.12 patch 262$ patch -p1 < ../patch-2.6.12 # apply new 2.6.12 patch
260$ cd .. 263$ cd ..
261$ mv linux-2.6.11.1 inux-2.6.12 # rename source dir 264$ mv linux-2.6.11.1 linux-2.6.12 # rename source dir
262 265
263 266
264The 2.6.x.y kernels 267The 2.6.x.y kernels
@@ -433,7 +436,11 @@ $ cd ..
433$ mv linux-2.6.12-mm1 linux-2.6.13-rc3-mm3 # rename the source dir 436$ mv linux-2.6.12-mm1 linux-2.6.13-rc3-mm3 # rename the source dir
434 437
435 438
436This concludes this list of explanations of the various kernel trees and I 439This concludes this list of explanations of the various kernel trees.
437hope you are now crystal clear on how to apply the various patches and help 440I hope you are now clear on how to apply the various patches and help testing
438testing the kernel. 441the kernel.
442
443Thank you's to Randy Dunlap, Rolf Eike Beer, Linus Torvalds, Bodo Eggert,
444Johannes Stezenbach, Grant Coady, Pavel Machek and others that I may have
445forgotten for their reviews and contributions to this document.
439 446
diff --git a/Documentation/block/stat.txt b/Documentation/block/stat.txt
new file mode 100644
index 000000000000..0dbc946de2ea
--- /dev/null
+++ b/Documentation/block/stat.txt
@@ -0,0 +1,82 @@
1Block layer statistics in /sys/block/<dev>/stat
2===============================================
3
4This file documents the contents of the /sys/block/<dev>/stat file.
5
6The stat file provides several statistics about the state of block
7device <dev>.
8
9Q. Why are there multiple statistics in a single file? Doesn't sysfs
10 normally contain a single value per file?
11A. By having a single file, the kernel can guarantee that the statistics
12 represent a consistent snapshot of the state of the device. If the
13 statistics were exported as multiple files containing one statistic
14 each, it would be impossible to guarantee that a set of readings
15 represent a single point in time.
16
17The stat file consists of a single line of text containing 11 decimal
18values separated by whitespace. The fields are summarized in the
19following table, and described in more detail below.
20
21Name units description
22---- ----- -----------
23read I/Os requests number of read I/Os processed
24read merges requests number of read I/Os merged with in-queue I/O
25read sectors sectors number of sectors read
26read ticks milliseconds total wait time for read requests
27write I/Os requests number of write I/Os processed
28write merges requests number of write I/Os merged with in-queue I/O
29write sectors sectors number of sectors written
30write ticks milliseconds total wait time for write requests
31in_flight requests number of I/Os currently in flight
32io_ticks milliseconds total time this block device has been active
33time_in_queue milliseconds total wait time for all requests
34
35read I/Os, write I/Os
36=====================
37
38These values increment when an I/O request completes.
39
40read merges, write merges
41=========================
42
43These values increment when an I/O request is merged with an
44already-queued I/O request.
45
46read sectors, write sectors
47===========================
48
49These values count the number of sectors read from or written to this
50block device. The "sectors" in question are the standard UNIX 512-byte
51sectors, not any device- or filesystem-specific block size. The
52counters are incremented when the I/O completes.
53
54read ticks, write ticks
55=======================
56
57These values count the number of milliseconds that I/O requests have
58waited on this block device. If there are multiple I/O requests waiting,
59these values will increase at a rate greater than 1000/second; for
60example, if 60 read requests wait for an average of 30 ms, the read_ticks
61field will increase by 60*30 = 1800.
62
63in_flight
64=========
65
66This value counts the number of I/O requests that have been issued to
67the device driver but have not yet completed. It does not include I/O
68requests that are in the queue but not yet issued to the device driver.
69
70io_ticks
71========
72
73This value counts the number of milliseconds during which the device has
74had I/O requests queued.
75
76time_in_queue
77=============
78
79This value counts the number of milliseconds that I/O requests have waited
80on this block device. If there are multiple I/O requests waiting, this
81value will increase as the product of the number of milliseconds times the
82number of requests waiting (see "read ticks" above for an example).
diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt
new file mode 100644
index 000000000000..08c5d04f3086
--- /dev/null
+++ b/Documentation/cpu-hotplug.txt
@@ -0,0 +1,357 @@
1 CPU hotplug Support in Linux(tm) Kernel
2
3 Maintainers:
4 CPU Hotplug Core:
5 Rusty Russell <rusty@rustycorp.com.au>
6 Srivatsa Vaddagiri <vatsa@in.ibm.com>
7 i386:
8 Zwane Mwaikambo <zwane@arm.linux.org.uk>
9 ppc64:
10 Nathan Lynch <nathanl@austin.ibm.com>
11 Joel Schopp <jschopp@austin.ibm.com>
12 ia64/x86_64:
13 Ashok Raj <ashok.raj@intel.com>
14
15Authors: Ashok Raj <ashok.raj@intel.com>
16Lots of feedback: Nathan Lynch <nathanl@austin.ibm.com>,
17 Joel Schopp <jschopp@austin.ibm.com>
18
19Introduction
20
21Modern advances in system architectures have introduced advanced error
22reporting and correction capabilities in processors. CPU architectures permit
23partitioning support, where compute resources of a single CPU could be made
24available to virtual machine environments. There are couple OEMS that
25support NUMA hardware which are hot pluggable as well, where physical
26node insertion and removal require support for CPU hotplug.
27
28Such advances require CPUs available to a kernel to be removed either for
29provisioning reasons, or for RAS purposes to keep an offending CPU off
30system execution path. Hence the need for CPU hotplug support in the
31Linux kernel.
32
33A more novel use of CPU-hotplug support is its use today in suspend
34resume support for SMP. Dual-core and HT support makes even
35a laptop run SMP kernels which didn't support these methods. SMP support
36for suspend/resume is a work in progress.
37
38General Stuff about CPU Hotplug
39--------------------------------
40
41Command Line Switches
42---------------------
43maxcpus=n Restrict boot time cpus to n. Say if you have 4 cpus, using
44 maxcpus=2 will only boot 2. You can choose to bring the
45 other cpus later online, read FAQ's for more info.
46
47additional_cpus=n [x86_64 only] use this to limit hotpluggable cpus.
48 This option sets
49 cpu_possible_map = cpu_present_map + additional_cpus
50
51CPU maps and such
52-----------------
53[More on cpumaps and primitive to manipulate, please check
54include/linux/cpumask.h that has more descriptive text.]
55
56cpu_possible_map: Bitmap of possible CPUs that can ever be available in the
57system. This is used to allocate some boot time memory for per_cpu variables
58that aren't designed to grow/shrink as CPUs are made available or removed.
59Once set during boot time discovery phase, the map is static, i.e no bits
60are added or removed anytime. Trimming it accurately for your system needs
61upfront can save some boot time memory. See below for how we use heuristics
62in x86_64 case to keep this under check.
63
64cpu_online_map: Bitmap of all CPUs currently online. Its set in __cpu_up()
65after a cpu is available for kernel scheduling and ready to receive
66interrupts from devices. Its cleared when a cpu is brought down using
67__cpu_disable(), before which all OS services including interrupts are
68migrated to another target CPU.
69
70cpu_present_map: Bitmap of CPUs currently present in the system. Not all
71of them may be online. When physical hotplug is processed by the relevant
72subsystem (e.g ACPI) can change and new bit either be added or removed
73from the map depending on the event is hot-add/hot-remove. There are currently
74no locking rules as of now. Typical usage is to init topology during boot,
75at which time hotplug is disabled.
76
77You really dont need to manipulate any of the system cpu maps. They should
78be read-only for most use. When setting up per-cpu resources almost always use
79cpu_possible_map/for_each_cpu() to iterate.
80
81Never use anything other than cpumask_t to represent bitmap of CPUs.
82
83#include <linux/cpumask.h>
84
85for_each_cpu - Iterate over cpu_possible_map
86for_each_online_cpu - Iterate over cpu_online_map
87for_each_present_cpu - Iterate over cpu_present_map
88for_each_cpu_mask(x,mask) - Iterate over some random collection of cpu mask.
89
90#include <linux/cpu.h>
91lock_cpu_hotplug() and unlock_cpu_hotplug():
92
93The above calls are used to inhibit cpu hotplug operations. While holding the
94cpucontrol mutex, cpu_online_map will not change. If you merely need to avoid
95cpus going away, you could also use preempt_disable() and preempt_enable()
96for those sections. Just remember the critical section cannot call any
97function that can sleep or schedule this process away. The preempt_disable()
98will work as long as stop_machine_run() is used to take a cpu down.
99
100CPU Hotplug - Frequently Asked Questions.
101
102Q: How to i enable my kernel to support CPU hotplug?
103A: When doing make defconfig, Enable CPU hotplug support
104
105 "Processor type and Features" -> Support for Hotpluggable CPUs
106
107Make sure that you have CONFIG_HOTPLUG, and CONFIG_SMP turned on as well.
108
109You would need to enable CONFIG_HOTPLUG_CPU for SMP suspend/resume support
110as well.
111
112Q: What architectures support CPU hotplug?
113A: As of 2.6.14, the following architectures support CPU hotplug.
114
115i386 (Intel), ppc, ppc64, parisc, s390, ia64 and x86_64
116
117Q: How to test if hotplug is supported on the newly built kernel?
118A: You should now notice an entry in sysfs.
119
120Check if sysfs is mounted, using the "mount" command. You should notice
121an entry as shown below in the output.
122
123....
124none on /sys type sysfs (rw)
125....
126
127if this is not mounted, do the following.
128
129#mkdir /sysfs
130#mount -t sysfs sys /sys
131
132now you should see entries for all present cpu, the following is an example
133in a 8-way system.
134
135#pwd
136#/sys/devices/system/cpu
137#ls -l
138total 0
139drwxr-xr-x 10 root root 0 Sep 19 07:44 .
140drwxr-xr-x 13 root root 0 Sep 19 07:45 ..
141drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu0
142drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu1
143drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu2
144drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu3
145drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu4
146drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu5
147drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu6
148drwxr-xr-x 3 root root 0 Sep 19 07:48 cpu7
149
150Under each directory you would find an "online" file which is the control
151file to logically online/offline a processor.
152
153Q: Does hot-add/hot-remove refer to physical add/remove of cpus?
154A: The usage of hot-add/remove may not be very consistently used in the code.
155CONFIG_CPU_HOTPLUG enables logical online/offline capability in the kernel.
156To support physical addition/removal, one would need some BIOS hooks and
157the platform should have something like an attention button in PCI hotplug.
158CONFIG_ACPI_HOTPLUG_CPU enables ACPI support for physical add/remove of CPUs.
159
160Q: How do i logically offline a CPU?
161A: Do the following.
162
163#echo 0 > /sys/devices/system/cpu/cpuX/online
164
165once the logical offline is successful, check
166
167#cat /proc/interrupts
168
169you should now not see the CPU that you removed. Also online file will report
170the state as 0 when a cpu if offline and 1 when its online.
171
172#To display the current cpu state.
173#cat /sys/devices/system/cpu/cpuX/online
174
175Q: Why cant i remove CPU0 on some systems?
176A: Some architectures may have some special dependency on a certain CPU.
177
178For e.g in IA64 platforms we have ability to sent platform interrupts to the
179OS. a.k.a Corrected Platform Error Interrupts (CPEI). In current ACPI
180specifications, we didn't have a way to change the target CPU. Hence if the
181current ACPI version doesn't support such re-direction, we disable that CPU
182by making it not-removable.
183
184In such cases you will also notice that the online file is missing under cpu0.
185
186Q: How do i find out if a particular CPU is not removable?
187A: Depending on the implementation, some architectures may show this by the
188absence of the "online" file. This is done if it can be determined ahead of
189time that this CPU cannot be removed.
190
191In some situations, this can be a run time check, i.e if you try to remove the
192last CPU, this will not be permitted. You can find such failures by
193investigating the return value of the "echo" command.
194
195Q: What happens when a CPU is being logically offlined?
196A: The following happen, listed in no particular order :-)
197
198- A notification is sent to in-kernel registered modules by sending an event
199 CPU_DOWN_PREPARE
200- All process is migrated away from this outgoing CPU to a new CPU
201- All interrupts targeted to this CPU is migrated to a new CPU
202- timers/bottom half/task lets are also migrated to a new CPU
203- Once all services are migrated, kernel calls an arch specific routine
204 __cpu_disable() to perform arch specific cleanup.
205- Once this is successful, an event for successful cleanup is sent by an event
206 CPU_DEAD.
207
208 "It is expected that each service cleans up when the CPU_DOWN_PREPARE
209 notifier is called, when CPU_DEAD is called its expected there is nothing
210 running on behalf of this CPU that was offlined"
211
212Q: If i have some kernel code that needs to be aware of CPU arrival and
213 departure, how to i arrange for proper notification?
214A: This is what you would need in your kernel code to receive notifications.
215
216 #include <linux/cpu.h>
217 static int __cpuinit foobar_cpu_callback(struct notifier_block *nfb,
218 unsigned long action, void *hcpu)
219 {
220 unsigned int cpu = (unsigned long)hcpu;
221
222 switch (action) {
223 case CPU_ONLINE:
224 foobar_online_action(cpu);
225 break;
226 case CPU_DEAD:
227 foobar_dead_action(cpu);
228 break;
229 }
230 return NOTIFY_OK;
231 }
232
233 static struct notifier_block foobar_cpu_notifer =
234 {
235 .notifier_call = foobar_cpu_callback,
236 };
237
238
239In your init function,
240
241 register_cpu_notifier(&foobar_cpu_notifier);
242
243You can fail PREPARE notifiers if something doesn't work to prepare resources.
244This will stop the activity and send a following CANCELED event back.
245
246CPU_DEAD should not be failed, its just a goodness indication, but bad
247things will happen if a notifier in path sent a BAD notify code.
248
249Q: I don't see my action being called for all CPUs already up and running?
250A: Yes, CPU notifiers are called only when new CPUs are on-lined or offlined.
251 If you need to perform some action for each cpu already in the system, then
252
253 for_each_online_cpu(i) {
254 foobar_cpu_callback(&foobar_cpu_notifier, CPU_UP_PREPARE, i);
255 foobar_cpu_callback(&foobar-cpu_notifier, CPU_ONLINE, i);
256 }
257
258Q: If i would like to develop cpu hotplug support for a new architecture,
259 what do i need at a minimum?
260A: The following are what is required for CPU hotplug infrastructure to work
261 correctly.
262
263 - Make sure you have an entry in Kconfig to enable CONFIG_HOTPLUG_CPU
264 - __cpu_up() - Arch interface to bring up a CPU
265 - __cpu_disable() - Arch interface to shutdown a CPU, no more interrupts
266 can be handled by the kernel after the routine
267 returns. Including local APIC timers etc are
268 shutdown.
269 - __cpu_die() - This actually supposed to ensure death of the CPU.
270 Actually look at some example code in other arch
271 that implement CPU hotplug. The processor is taken
272 down from the idle() loop for that specific
273 architecture. __cpu_die() typically waits for some
274 per_cpu state to be set, to ensure the processor
275 dead routine is called to be sure positively.
276
277Q: I need to ensure that a particular cpu is not removed when there is some
278 work specific to this cpu is in progress.
279A: First switch the current thread context to preferred cpu
280
281 int my_func_on_cpu(int cpu)
282 {
283 cpumask_t saved_mask, new_mask = CPU_MASK_NONE;
284 int curr_cpu, err = 0;
285
286 saved_mask = current->cpus_allowed;
287 cpu_set(cpu, new_mask);
288 err = set_cpus_allowed(current, new_mask);
289
290 if (err)
291 return err;
292
293 /*
294 * If we got scheduled out just after the return from
295 * set_cpus_allowed() before running the work, this ensures
296 * we stay locked.
297 */
298 curr_cpu = get_cpu();
299
300 if (curr_cpu != cpu) {
301 err = -EAGAIN;
302 goto ret;
303 } else {
304 /*
305 * Do work : But cant sleep, since get_cpu() disables preempt
306 */
307 }
308 ret:
309 put_cpu();
310 set_cpus_allowed(current, saved_mask);
311 return err;
312 }
313
314
315Q: How do we determine how many CPUs are available for hotplug.
316A: There is no clear spec defined way from ACPI that can give us that
317 information today. Based on some input from Natalie of Unisys,
318 that the ACPI MADT (Multiple APIC Description Tables) marks those possible
319 CPUs in a system with disabled status.
320
321 Andi implemented some simple heuristics that count the number of disabled
322 CPUs in MADT as hotpluggable CPUS. In the case there are no disabled CPUS
323 we assume 1/2 the number of CPUs currently present can be hotplugged.
324
325 Caveat: Today's ACPI MADT can only provide 256 entries since the apicid field
326 in MADT is only 8 bits.
327
328User Space Notification
329
330Hotplug support for devices is common in Linux today. Its being used today to
331support automatic configuration of network, usb and pci devices. A hotplug
332event can be used to invoke an agent script to perform the configuration task.
333
334You can add /etc/hotplug/cpu.agent to handle hotplug notification user space
335scripts.
336
337 #!/bin/bash
338 # $Id: cpu.agent
339 # Kernel hotplug params include:
340 #ACTION=%s [online or offline]
341 #DEVPATH=%s
342 #
343 cd /etc/hotplug
344 . ./hotplug.functions
345
346 case $ACTION in
347 online)
348 echo `date` ":cpu.agent" add cpu >> /tmp/hotplug.txt
349 ;;
350 offline)
351 echo `date` ":cpu.agent" remove cpu >>/tmp/hotplug.txt
352 ;;
353 *)
354 debug_mesg CPU $ACTION event not supported
355 exit 1
356 ;;
357 esac
diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt
index a09a8eb80665..9e49b1c35729 100644
--- a/Documentation/cpusets.txt
+++ b/Documentation/cpusets.txt
@@ -14,7 +14,10 @@ CONTENTS:
14 1.1 What are cpusets ? 14 1.1 What are cpusets ?
15 1.2 Why are cpusets needed ? 15 1.2 Why are cpusets needed ?
16 1.3 How are cpusets implemented ? 16 1.3 How are cpusets implemented ?
17 1.4 How do I use cpusets ? 17 1.4 What are exclusive cpusets ?
18 1.5 What does notify_on_release do ?
19 1.6 What is memory_pressure ?
20 1.7 How do I use cpusets ?
182. Usage Examples and Syntax 212. Usage Examples and Syntax
19 2.1 Basic Usage 22 2.1 Basic Usage
20 2.2 Adding/removing cpus 23 2.2 Adding/removing cpus
@@ -49,29 +52,6 @@ its cpus_allowed vector, and the kernel page allocator will not
49allocate a page on a node that is not allowed in the requesting tasks 52allocate a page on a node that is not allowed in the requesting tasks
50mems_allowed vector. 53mems_allowed vector.
51 54
52If a cpuset is cpu or mem exclusive, no other cpuset, other than a direct
53ancestor or descendent, may share any of the same CPUs or Memory Nodes.
54A cpuset that is cpu exclusive has a sched domain associated with it.
55The sched domain consists of all cpus in the current cpuset that are not
56part of any exclusive child cpusets.
57This ensures that the scheduler load balacing code only balances
58against the cpus that are in the sched domain as defined above and not
59all of the cpus in the system. This removes any overhead due to
60load balancing code trying to pull tasks outside of the cpu exclusive
61cpuset only to be prevented by the tasks' cpus_allowed mask.
62
63A cpuset that is mem_exclusive restricts kernel allocations for
64page, buffer and other data commonly shared by the kernel across
65multiple users. All cpusets, whether mem_exclusive or not, restrict
66allocations of memory for user space. This enables configuring a
67system so that several independent jobs can share common kernel
68data, such as file system pages, while isolating each jobs user
69allocation in its own cpuset. To do this, construct a large
70mem_exclusive cpuset to hold all the jobs, and construct child,
71non-mem_exclusive cpusets for each individual job. Only a small
72amount of typical kernel memory, such as requests from interrupt
73handlers, is allowed to be taken outside even a mem_exclusive cpuset.
74
75User level code may create and destroy cpusets by name in the cpuset 55User level code may create and destroy cpusets by name in the cpuset
76virtual file system, manage the attributes and permissions of these 56virtual file system, manage the attributes and permissions of these
77cpusets and which CPUs and Memory Nodes are assigned to each cpuset, 57cpusets and which CPUs and Memory Nodes are assigned to each cpuset,
@@ -192,9 +172,15 @@ containing the following files describing that cpuset:
192 172
193 - cpus: list of CPUs in that cpuset 173 - cpus: list of CPUs in that cpuset
194 - mems: list of Memory Nodes in that cpuset 174 - mems: list of Memory Nodes in that cpuset
175 - memory_migrate flag: if set, move pages to cpusets nodes
195 - cpu_exclusive flag: is cpu placement exclusive? 176 - cpu_exclusive flag: is cpu placement exclusive?
196 - mem_exclusive flag: is memory placement exclusive? 177 - mem_exclusive flag: is memory placement exclusive?
197 - tasks: list of tasks (by pid) attached to that cpuset 178 - tasks: list of tasks (by pid) attached to that cpuset
179 - notify_on_release flag: run /sbin/cpuset_release_agent on exit?
180 - memory_pressure: measure of how much paging pressure in cpuset
181
182In addition, the root cpuset only has the following file:
183 - memory_pressure_enabled flag: compute memory_pressure?
198 184
199New cpusets are created using the mkdir system call or shell 185New cpusets are created using the mkdir system call or shell
200command. The properties of a cpuset, such as its flags, allowed 186command. The properties of a cpuset, such as its flags, allowed
@@ -228,7 +214,108 @@ exclusive cpuset. Also, the use of a Linux virtual file system (vfs)
228to represent the cpuset hierarchy provides for a familiar permission 214to represent the cpuset hierarchy provides for a familiar permission
229and name space for cpusets, with a minimum of additional kernel code. 215and name space for cpusets, with a minimum of additional kernel code.
230 216
2311.4 How do I use cpusets ? 217
2181.4 What are exclusive cpusets ?
219--------------------------------
220
221If a cpuset is cpu or mem exclusive, no other cpuset, other than
222a direct ancestor or descendent, may share any of the same CPUs or
223Memory Nodes.
224
225A cpuset that is cpu_exclusive has a scheduler (sched) domain
226associated with it. The sched domain consists of all CPUs in the
227current cpuset that are not part of any exclusive child cpusets.
228This ensures that the scheduler load balancing code only balances
229against the CPUs that are in the sched domain as defined above and
230not all of the CPUs in the system. This removes any overhead due to
231load balancing code trying to pull tasks outside of the cpu_exclusive
232cpuset only to be prevented by the tasks' cpus_allowed mask.
233
234A cpuset that is mem_exclusive restricts kernel allocations for
235page, buffer and other data commonly shared by the kernel across
236multiple users. All cpusets, whether mem_exclusive or not, restrict
237allocations of memory for user space. This enables configuring a
238system so that several independent jobs can share common kernel data,
239such as file system pages, while isolating each jobs user allocation in
240its own cpuset. To do this, construct a large mem_exclusive cpuset to
241hold all the jobs, and construct child, non-mem_exclusive cpusets for
242each individual job. Only a small amount of typical kernel memory,
243such as requests from interrupt handlers, is allowed to be taken
244outside even a mem_exclusive cpuset.
245
246
2471.5 What does notify_on_release do ?
248------------------------------------
249
250If the notify_on_release flag is enabled (1) in a cpuset, then whenever
251the last task in the cpuset leaves (exits or attaches to some other
252cpuset) and the last child cpuset of that cpuset is removed, then
253the kernel runs the command /sbin/cpuset_release_agent, supplying the
254pathname (relative to the mount point of the cpuset file system) of the
255abandoned cpuset. This enables automatic removal of abandoned cpusets.
256The default value of notify_on_release in the root cpuset at system
257boot is disabled (0). The default value of other cpusets at creation
258is the current value of their parents notify_on_release setting.
259
260
2611.6 What is memory_pressure ?
262-----------------------------
263The memory_pressure of a cpuset provides a simple per-cpuset metric
264of the rate that the tasks in a cpuset are attempting to free up in
265use memory on the nodes of the cpuset to satisfy additional memory
266requests.
267
268This enables batch managers monitoring jobs running in dedicated
269cpusets to efficiently detect what level of memory pressure that job
270is causing.
271
272This is useful both on tightly managed systems running a wide mix of
273submitted jobs, which may choose to terminate or re-prioritize jobs that
274are trying to use more memory than allowed on the nodes assigned them,
275and with tightly coupled, long running, massively parallel scientific
276computing jobs that will dramatically fail to meet required performance
277goals if they start to use more memory than allowed to them.
278
279This mechanism provides a very economical way for the batch manager
280to monitor a cpuset for signs of memory pressure. It's up to the
281batch manager or other user code to decide what to do about it and
282take action.
283
284==> Unless this feature is enabled by writing "1" to the special file
285 /dev/cpuset/memory_pressure_enabled, the hook in the rebalance
286 code of __alloc_pages() for this metric reduces to simply noticing
287 that the cpuset_memory_pressure_enabled flag is zero. So only
288 systems that enable this feature will compute the metric.
289
290Why a per-cpuset, running average:
291
292 Because this meter is per-cpuset, rather than per-task or mm,
293 the system load imposed by a batch scheduler monitoring this
294 metric is sharply reduced on large systems, because a scan of
295 the tasklist can be avoided on each set of queries.
296
297 Because this meter is a running average, instead of an accumulating
298 counter, a batch scheduler can detect memory pressure with a
299 single read, instead of having to read and accumulate results
300 for a period of time.
301
302 Because this meter is per-cpuset rather than per-task or mm,
303 the batch scheduler can obtain the key information, memory
304 pressure in a cpuset, with a single read, rather than having to
305 query and accumulate results over all the (dynamically changing)
306 set of tasks in the cpuset.
307
308A per-cpuset simple digital filter (requires a spinlock and 3 words
309of data per-cpuset) is kept, and updated by any task attached to that
310cpuset, if it enters the synchronous (direct) page reclaim code.
311
312A per-cpuset file provides an integer number representing the recent
313(half-life of 10 seconds) rate of direct page reclaims caused by
314the tasks in the cpuset, in units of reclaims attempted per second,
315times 1000.
316
317
3181.7 How do I use cpusets ?
232-------------------------- 319--------------------------
233 320
234In order to minimize the impact of cpusets on critical kernel 321In order to minimize the impact of cpusets on critical kernel
@@ -277,6 +364,30 @@ rewritten to the 'tasks' file of its cpuset. This is done to avoid
277impacting the scheduler code in the kernel with a check for changes 364impacting the scheduler code in the kernel with a check for changes
278in a tasks processor placement. 365in a tasks processor placement.
279 366
367Normally, once a page is allocated (given a physical page
368of main memory) then that page stays on whatever node it
369was allocated, so long as it remains allocated, even if the
370cpusets memory placement policy 'mems' subsequently changes.
371If the cpuset flag file 'memory_migrate' is set true, then when
372tasks are attached to that cpuset, any pages that task had
373allocated to it on nodes in its previous cpuset are migrated
374to the tasks new cpuset. Depending on the implementation,
375this migration may either be done by swapping the page out,
376so that the next time the page is referenced, it will be paged
377into the tasks new cpuset, usually on the node where it was
378referenced, or this migration may be done by directly copying
379the pages from the tasks previous cpuset to the new cpuset,
380where possible to the same node, relative to the new cpuset,
381as the node that held the page, relative to the old cpuset.
382Also if 'memory_migrate' is set true, then if that cpusets
383'mems' file is modified, pages allocated to tasks in that
384cpuset, that were on nodes in the previous setting of 'mems',
385will be moved to nodes in the new setting of 'mems.' Again,
386depending on the implementation, this might be done by swapping,
387or by direct copying. In either case, pages that were not in
388the tasks prior cpuset, or in the cpusets prior 'mems' setting,
389will not be moved.
390
280There is an exception to the above. If hotplug functionality is used 391There is an exception to the above. If hotplug functionality is used
281to remove all the CPUs that are currently assigned to a cpuset, 392to remove all the CPUs that are currently assigned to a cpuset,
282then the kernel will automatically update the cpus_allowed of all 393then the kernel will automatically update the cpus_allowed of all
diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt
index 9840d5b8d5b9..22e4040564d5 100644
--- a/Documentation/filesystems/ext3.txt
+++ b/Documentation/filesystems/ext3.txt
@@ -22,6 +22,11 @@ journal=inum When a journal already exists, this option is
22 the inode which will represent the ext3 file 22 the inode which will represent the ext3 file
23 system's journal file. 23 system's journal file.
24 24
25journal_dev=devnum When the external journal device's major/minor numbers
26 have changed, this option allows to specify the new
27 journal location. The journal device is identified
28 through its new major/minor numbers encoded in devnum.
29
25noload Don't load the journal on mounting. 30noload Don't load the journal on mounting.
26 31
27data=journal All data are committed into the journal prior 32data=journal All data are committed into the journal prior
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index d4773565ea2f..a4dcf42c2fd9 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1302,6 +1302,23 @@ VM has token based thrashing control mechanism and uses the token to prevent
1302unnecessary page faults in thrashing situation. The unit of the value is 1302unnecessary page faults in thrashing situation. The unit of the value is
1303second. The value would be useful to tune thrashing behavior. 1303second. The value would be useful to tune thrashing behavior.
1304 1304
1305drop_caches
1306-----------
1307
1308Writing to this will cause the kernel to drop clean caches, dentries and
1309inodes from memory, causing that memory to become free.
1310
1311To free pagecache:
1312 echo 1 > /proc/sys/vm/drop_caches
1313To free dentries and inodes:
1314 echo 2 > /proc/sys/vm/drop_caches
1315To free pagecache, dentries and inodes:
1316 echo 3 > /proc/sys/vm/drop_caches
1317
1318As this is a non-destructive operation and dirty objects are not freeable, the
1319user should run `sync' first.
1320
1321
13052.5 /proc/sys/dev - Device specific parameters 13222.5 /proc/sys/dev - Device specific parameters
1306---------------------------------------------- 1323----------------------------------------------
1307 1324
diff --git a/Documentation/filesystems/ramfs-rootfs-initramfs.txt b/Documentation/filesystems/ramfs-rootfs-initramfs.txt
index b3404a032596..60ab61e54e8a 100644
--- a/Documentation/filesystems/ramfs-rootfs-initramfs.txt
+++ b/Documentation/filesystems/ramfs-rootfs-initramfs.txt
@@ -143,12 +143,26 @@ as the following example:
143 dir /mnt 755 0 0 143 dir /mnt 755 0 0
144 file /init initramfs/init.sh 755 0 0 144 file /init initramfs/init.sh 755 0 0
145 145
146Run "usr/gen_init_cpio" (after the kernel build) to get a usage message
147documenting the above file format.
148
146One advantage of the text file is that root access is not required to 149One advantage of the text file is that root access is not required to
147set permissions or create device nodes in the new archive. (Note that those 150set permissions or create device nodes in the new archive. (Note that those
148two example "file" entries expect to find files named "init.sh" and "busybox" in 151two example "file" entries expect to find files named "init.sh" and "busybox" in
149a directory called "initramfs", under the linux-2.6.* directory. See 152a directory called "initramfs", under the linux-2.6.* directory. See
150Documentation/early-userspace/README for more details.) 153Documentation/early-userspace/README for more details.)
151 154
155The kernel does not depend on external cpio tools, gen_init_cpio is created
156from usr/gen_init_cpio.c which is entirely self-contained, and the kernel's
157boot-time extractor is also (obviously) self-contained. However, if you _do_
158happen to have cpio installed, the following command line can extract the
159generated cpio image back into its component files:
160
161 cpio -i -d -H newc -F initramfs_data.cpio --no-absolute-filenames
162
163Contents of initramfs:
164----------------------
165
152If you don't already understand what shared libraries, devices, and paths 166If you don't already understand what shared libraries, devices, and paths
153you need to get a minimal root filesystem up and running, here are some 167you need to get a minimal root filesystem up and running, here are some
154references: 168references:
@@ -161,13 +175,69 @@ designed to be a tiny C library to statically link early userspace
161code against, along with some related utilities. It is BSD licensed. 175code against, along with some related utilities. It is BSD licensed.
162 176
163I use uClibc (http://www.uclibc.org) and busybox (http://www.busybox.net) 177I use uClibc (http://www.uclibc.org) and busybox (http://www.busybox.net)
164myself. These are LGPL and GPL, respectively. 178myself. These are LGPL and GPL, respectively. (A self-contained initramfs
179package is planned for the busybox 1.2 release.)
165 180
166In theory you could use glibc, but that's not well suited for small embedded 181In theory you could use glibc, but that's not well suited for small embedded
167uses like this. (A "hello world" program statically linked against glibc is 182uses like this. (A "hello world" program statically linked against glibc is
168over 400k. With uClibc it's 7k. Also note that glibc dlopens libnss to do 183over 400k. With uClibc it's 7k. Also note that glibc dlopens libnss to do
169name lookups, even when otherwise statically linked.) 184name lookups, even when otherwise statically linked.)
170 185
186Why cpio rather than tar?
187-------------------------
188
189This decision was made back in December, 2001. The discussion started here:
190
191 http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1538.html
192
193And spawned a second thread (specifically on tar vs cpio), starting here:
194
195 http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1587.html
196
197The quick and dirty summary version (which is no substitute for reading
198the above threads) is:
199
2001) cpio is a standard. It's decades old (from the AT&T days), and already
201 widely used on Linux (inside RPM, Red Hat's device driver disks). Here's
202 a Linux Journal article about it from 1996:
203
204 http://www.linuxjournal.com/article/1213
205
206 It's not as popular as tar because the traditional cpio command line tools
207 require _truly_hideous_ command line arguments. But that says nothing
208 either way about the archive format, and there are alternative tools,
209 such as:
210
211 http://freshmeat.net/projects/afio/
212
2132) The cpio archive format chosen by the kernel is simpler and cleaner (and
214 thus easier to create and parse) than any of the (literally dozens of)
215 various tar archive formats. The complete initramfs archive format is
216 explained in buffer-format.txt, created in usr/gen_init_cpio.c, and
217 extracted in init/initramfs.c. All three together come to less than 26k
218 total of human-readable text.
219
2203) The GNU project standardizing on tar is approximately as relevant as
221 Windows standardizing on zip. Linux is not part of either, and is free
222 to make its own technical decisions.
223
2244) Since this is a kernel internal format, it could easily have been
225 something brand new. The kernel provides its own tools to create and
226 extract this format anyway. Using an existing standard was preferable,
227 but not essential.
228
2295) Al Viro made the decision (quote: "tar is ugly as hell and not going to be
230 supported on the kernel side"):
231
232 http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1540.html
233
234 explained his reasoning:
235
236 http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1550.html
237 http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1638.html
238
239 and, most importantly, designed and implemented the initramfs code.
240
171Future directions: 241Future directions:
172------------------ 242------------------
173 243
diff --git a/Documentation/filesystems/relayfs.txt b/Documentation/filesystems/relayfs.txt
index d803abed29f0..5832377b7340 100644
--- a/Documentation/filesystems/relayfs.txt
+++ b/Documentation/filesystems/relayfs.txt
@@ -44,30 +44,41 @@ relayfs can operate in a mode where it will overwrite data not yet
44collected by userspace, and not wait for it to consume it. 44collected by userspace, and not wait for it to consume it.
45 45
46relayfs itself does not provide for communication of such data between 46relayfs itself does not provide for communication of such data between
47userspace and kernel, allowing the kernel side to remain simple and not 47userspace and kernel, allowing the kernel side to remain simple and
48impose a single interface on userspace. It does provide a separate 48not impose a single interface on userspace. It does provide a set of
49helper though, described below. 49examples and a separate helper though, described below.
50
51klog and relay-apps example code
52================================
53
54relayfs itself is ready to use, but to make things easier, a couple
55simple utility functions and a set of examples are provided.
56
57The relay-apps example tarball, available on the relayfs sourceforge
58site, contains a set of self-contained examples, each consisting of a
59pair of .c files containing boilerplate code for each of the user and
60kernel sides of a relayfs application; combined these two sets of
61boilerplate code provide glue to easily stream data to disk, without
62having to bother with mundane housekeeping chores.
63
64The 'klog debugging functions' patch (klog.patch in the relay-apps
65tarball) provides a couple of high-level logging functions to the
66kernel which allow writing formatted text or raw data to a channel,
67regardless of whether a channel to write into exists or not, or
68whether relayfs is compiled into the kernel or is configured as a
69module. These functions allow you to put unconditional 'trace'
70statements anywhere in the kernel or kernel modules; only when there
71is a 'klog handler' registered will data actually be logged (see the
72klog and kleak examples for details).
73
74It is of course possible to use relayfs from scratch i.e. without
75using any of the relay-apps example code or klog, but you'll have to
76implement communication between userspace and kernel, allowing both to
77convey the state of buffers (full, empty, amount of padding).
78
79klog and the relay-apps examples can be found in the relay-apps
80tarball on http://relayfs.sourceforge.net
50 81
51klog, relay-app & librelay
52==========================
53
54relayfs itself is ready to use, but to make things easier, two
55additional systems are provided. klog is a simple wrapper to make
56writing formatted text or raw data to a channel simpler, regardless of
57whether a channel to write into exists or not, or whether relayfs is
58compiled into the kernel or is configured as a module. relay-app is
59the kernel counterpart of userspace librelay.c, combined these two
60files provide glue to easily stream data to disk, without having to
61bother with housekeeping. klog and relay-app can be used together,
62with klog providing high-level logging functions to the kernel and
63relay-app taking care of kernel-user control and disk-logging chores.
64
65It is possible to use relayfs without relay-app & librelay, but you'll
66have to implement communication between userspace and kernel, allowing
67both to convey the state of buffers (full, empty, amount of padding).
68
69klog, relay-app and librelay can be found in the relay-apps tarball on
70http://relayfs.sourceforge.net
71 82
72The relayfs user space API 83The relayfs user space API
73========================== 84==========================
@@ -125,6 +136,8 @@ Here's a summary of the API relayfs provides to in-kernel clients:
125 relay_reset(chan) 136 relay_reset(chan)
126 relayfs_create_dir(name, parent) 137 relayfs_create_dir(name, parent)
127 relayfs_remove_dir(dentry) 138 relayfs_remove_dir(dentry)
139 relayfs_create_file(name, parent, mode, fops, data)
140 relayfs_remove_file(dentry)
128 141
129 channel management typically called on instigation of userspace: 142 channel management typically called on instigation of userspace:
130 143
@@ -141,6 +154,8 @@ Here's a summary of the API relayfs provides to in-kernel clients:
141 subbuf_start(buf, subbuf, prev_subbuf, prev_padding) 154 subbuf_start(buf, subbuf, prev_subbuf, prev_padding)
142 buf_mapped(buf, filp) 155 buf_mapped(buf, filp)
143 buf_unmapped(buf, filp) 156 buf_unmapped(buf, filp)
157 create_buf_file(filename, parent, mode, buf, is_global)
158 remove_buf_file(dentry)
144 159
145 helper functions: 160 helper functions:
146 161
@@ -320,6 +335,71 @@ forces a sub-buffer switch on all the channel buffers, and can be used
320to finalize and process the last sub-buffers before the channel is 335to finalize and process the last sub-buffers before the channel is
321closed. 336closed.
322 337
338Creating non-relay files
339------------------------
340
341relay_open() automatically creates files in the relayfs filesystem to
342represent the per-cpu kernel buffers; it's often useful for
343applications to be able to create their own files alongside the relay
344files in the relayfs filesystem as well e.g. 'control' files much like
345those created in /proc or debugfs for similar purposes, used to
346communicate control information between the kernel and user sides of a
347relayfs application. For this purpose the relayfs_create_file() and
348relayfs_remove_file() API functions exist. For relayfs_create_file(),
349the caller passes in a set of user-defined file operations to be used
350for the file and an optional void * to a user-specified data item,
351which will be accessible via inode->u.generic_ip (see the relay-apps
352tarball for examples). The file_operations are a required parameter
353to relayfs_create_file() and thus the semantics of these files are
354completely defined by the caller.
355
356See the relay-apps tarball at http://relayfs.sourceforge.net for
357examples of how these non-relay files are meant to be used.
358
359Creating relay files in other filesystems
360-----------------------------------------
361
362By default of course, relay_open() creates relay files in the relayfs
363filesystem. Because relay_file_operations is exported, however, it's
364also possible to create and use relay files in other pseudo-filesytems
365such as debugfs.
366
367For this purpose, two callback functions are provided,
368create_buf_file() and remove_buf_file(). create_buf_file() is called
369once for each per-cpu buffer from relay_open() to allow the client to
370create a file to be used to represent the corresponding buffer; if
371this callback is not defined, the default implementation will create
372and return a file in the relayfs filesystem to represent the buffer.
373The callback should return the dentry of the file created to represent
374the relay buffer. Note that the parent directory passed to
375relay_open() (and passed along to the callback), if specified, must
376exist in the same filesystem the new relay file is created in. If
377create_buf_file() is defined, remove_buf_file() must also be defined;
378it's responsible for deleting the file(s) created in create_buf_file()
379and is called during relay_close().
380
381The create_buf_file() implementation can also be defined in such a way
382as to allow the creation of a single 'global' buffer instead of the
383default per-cpu set. This can be useful for applications interested
384mainly in seeing the relative ordering of system-wide events without
385the need to bother with saving explicit timestamps for the purpose of
386merging/sorting per-cpu files in a postprocessing step.
387
388To have relay_open() create a global buffer, the create_buf_file()
389implementation should set the value of the is_global outparam to a
390non-zero value in addition to creating the file that will be used to
391represent the single buffer. In the case of a global buffer,
392create_buf_file() and remove_buf_file() will be called only once. The
393normal channel-writing functions e.g. relay_write() can still be used
394- writes from any cpu will transparently end up in the global buffer -
395but since it is a global buffer, callers should make sure they use the
396proper locking for such a buffer, either by wrapping writes in a
397spinlock, or by copying a write function from relayfs_fs.h and
398creating a local version that internally does the proper locking.
399
400See the 'exported-relayfile' examples in the relay-apps tarball for
401examples of creating and using relay files in debugfs.
402
323Misc 403Misc
324---- 404----
325 405
diff --git a/Documentation/keys-request-key.txt b/Documentation/keys-request-key.txt
index 5f2b9c5edbb5..22488d791168 100644
--- a/Documentation/keys-request-key.txt
+++ b/Documentation/keys-request-key.txt
@@ -56,10 +56,12 @@ A request proceeds in the following manner:
56 (4) request_key() then forks and executes /sbin/request-key with a new session 56 (4) request_key() then forks and executes /sbin/request-key with a new session
57 keyring that contains a link to auth key V. 57 keyring that contains a link to auth key V.
58 58
59 (5) /sbin/request-key execs an appropriate program to perform the actual 59 (5) /sbin/request-key assumes the authority associated with key U.
60
61 (6) /sbin/request-key execs an appropriate program to perform the actual
60 instantiation. 62 instantiation.
61 63
62 (6) The program may want to access another key from A's context (say a 64 (7) The program may want to access another key from A's context (say a
63 Kerberos TGT key). It just requests the appropriate key, and the keyring 65 Kerberos TGT key). It just requests the appropriate key, and the keyring
64 search notes that the session keyring has auth key V in its bottom level. 66 search notes that the session keyring has auth key V in its bottom level.
65 67
@@ -67,19 +69,19 @@ A request proceeds in the following manner:
67 UID, GID, groups and security info of process A as if it was process A, 69 UID, GID, groups and security info of process A as if it was process A,
68 and come up with key W. 70 and come up with key W.
69 71
70 (7) The program then does what it must to get the data with which to 72 (8) The program then does what it must to get the data with which to
71 instantiate key U, using key W as a reference (perhaps it contacts a 73 instantiate key U, using key W as a reference (perhaps it contacts a
72 Kerberos server using the TGT) and then instantiates key U. 74 Kerberos server using the TGT) and then instantiates key U.
73 75
74 (8) Upon instantiating key U, auth key V is automatically revoked so that it 76 (9) Upon instantiating key U, auth key V is automatically revoked so that it
75 may not be used again. 77 may not be used again.
76 78
77 (9) The program then exits 0 and request_key() deletes key V and returns key 79(10) The program then exits 0 and request_key() deletes key V and returns key
78 U to the caller. 80 U to the caller.
79 81
80This also extends further. If key W (step 5 above) didn't exist, key W would be 82This also extends further. If key W (step 7 above) didn't exist, key W would be
81created uninstantiated, another auth key (X) would be created [as per step 3] 83created uninstantiated, another auth key (X) would be created (as per step 3)
82and another copy of /sbin/request-key spawned [as per step 4]; but the context 84and another copy of /sbin/request-key spawned (as per step 4); but the context
83specified by auth key X will still be process A, as it was in auth key V. 85specified by auth key X will still be process A, as it was in auth key V.
84 86
85This is because process A's keyrings can't simply be attached to 87This is because process A's keyrings can't simply be attached to
@@ -138,8 +140,8 @@ until one succeeds:
138 140
139 (3) The process's session keyring is searched. 141 (3) The process's session keyring is searched.
140 142
141 (4) If the process has a request_key() authorisation key in its session 143 (4) If the process has assumed the authority associated with a request_key()
142 keyring then: 144 authorisation key then:
143 145
144 (a) If extant, the calling process's thread keyring is searched. 146 (a) If extant, the calling process's thread keyring is searched.
145 147
diff --git a/Documentation/keys.txt b/Documentation/keys.txt
index 6304db59bfe4..aaa01b0e3ee9 100644
--- a/Documentation/keys.txt
+++ b/Documentation/keys.txt
@@ -308,6 +308,8 @@ process making the call:
308 KEY_SPEC_USER_KEYRING -4 UID-specific keyring 308 KEY_SPEC_USER_KEYRING -4 UID-specific keyring
309 KEY_SPEC_USER_SESSION_KEYRING -5 UID-session keyring 309 KEY_SPEC_USER_SESSION_KEYRING -5 UID-session keyring
310 KEY_SPEC_GROUP_KEYRING -6 GID-specific keyring 310 KEY_SPEC_GROUP_KEYRING -6 GID-specific keyring
311 KEY_SPEC_REQKEY_AUTH_KEY -7 assumed request_key()
312 authorisation key
311 313
312 314
313The main syscalls are: 315The main syscalls are:
@@ -498,7 +500,11 @@ The keyctl syscall functions are:
498 keyring is full, error ENFILE will result. 500 keyring is full, error ENFILE will result.
499 501
500 The link procedure checks the nesting of the keyrings, returning ELOOP if 502 The link procedure checks the nesting of the keyrings, returning ELOOP if
501 it appears to deep or EDEADLK if the link would introduce a cycle. 503 it appears too deep or EDEADLK if the link would introduce a cycle.
504
505 Any links within the keyring to keys that match the new key in terms of
506 type and description will be discarded from the keyring as the new one is
507 added.
502 508
503 509
504 (*) Unlink a key or keyring from another keyring: 510 (*) Unlink a key or keyring from another keyring:
@@ -628,6 +634,41 @@ The keyctl syscall functions are:
628 there is one, otherwise the user default session keyring. 634 there is one, otherwise the user default session keyring.
629 635
630 636
637 (*) Set the timeout on a key.
638
639 long keyctl(KEYCTL_SET_TIMEOUT, key_serial_t key, unsigned timeout);
640
641 This sets or clears the timeout on a key. The timeout can be 0 to clear
642 the timeout or a number of seconds to set the expiry time that far into
643 the future.
644
645 The process must have attribute modification access on a key to set its
646 timeout. Timeouts may not be set with this function on negative, revoked
647 or expired keys.
648
649
650 (*) Assume the authority granted to instantiate a key
651
652 long keyctl(KEYCTL_ASSUME_AUTHORITY, key_serial_t key);
653
654 This assumes or divests the authority required to instantiate the
655 specified key. Authority can only be assumed if the thread has the
656 authorisation key associated with the specified key in its keyrings
657 somewhere.
658
659 Once authority is assumed, searches for keys will also search the
660 requester's keyrings using the requester's security label, UID, GID and
661 groups.
662
663 If the requested authority is unavailable, error EPERM will be returned,
664 likewise if the authority has been revoked because the target key is
665 already instantiated.
666
667 If the specified key is 0, then any assumed authority will be divested.
668
669 The assumed authorititive key is inherited across fork and exec.
670
671
631=============== 672===============
632KERNEL SERVICES 673KERNEL SERVICES
633=============== 674===============
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 2f1aae32a5d9..6910c0136f8d 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -26,12 +26,13 @@ Currently, these files are in /proc/sys/vm:
26- min_free_kbytes 26- min_free_kbytes
27- laptop_mode 27- laptop_mode
28- block_dump 28- block_dump
29- drop-caches
29 30
30============================================================== 31==============================================================
31 32
32dirty_ratio, dirty_background_ratio, dirty_expire_centisecs, 33dirty_ratio, dirty_background_ratio, dirty_expire_centisecs,
33dirty_writeback_centisecs, vfs_cache_pressure, laptop_mode, 34dirty_writeback_centisecs, vfs_cache_pressure, laptop_mode,
34block_dump, swap_token_timeout: 35block_dump, swap_token_timeout, drop-caches:
35 36
36See Documentation/filesystems/proc.txt 37See Documentation/filesystems/proc.txt
37 38
@@ -102,3 +103,20 @@ This is used to force the Linux VM to keep a minimum number
102of kilobytes free. The VM uses this number to compute a pages_min 103of kilobytes free. The VM uses this number to compute a pages_min
103value for each lowmem zone in the system. Each lowmem zone gets 104value for each lowmem zone in the system. Each lowmem zone gets
104a number of reserved free pages based proportionally on its size. 105a number of reserved free pages based proportionally on its size.
106
107==============================================================
108
109percpu_pagelist_fraction
110
111This is the fraction of pages at most (high mark pcp->high) in each zone that
112are allocated for each per cpu page list. The min value for this is 8. It
113means that we don't allow more than 1/8th of pages in each zone to be
114allocated in any single per_cpu_pagelist. This entry only changes the value
115of hot per cpu pagelists. User can specify a number like 100 to allocate
1161/100th of each zone to each per cpu page list.
117
118The batch value of each per cpu pagelist is also updated as a result. It is
119set to pcp->high/4. The upper limit of batch is (PAGE_SHIFT * 8)
120
121The initial value is zero. Kernel does not use this value at boot time to set
122the high water marks for each per cpu page list.
diff --git a/MAINTAINERS b/MAINTAINERS
index 7e780906d34c..76dc820bc889 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -927,7 +927,6 @@ S: Maintained
927FARSYNC SYNCHRONOUS DRIVER 927FARSYNC SYNCHRONOUS DRIVER
928P: Kevin Curtis 928P: Kevin Curtis
929M: kevin.curtis@farsite.co.uk 929M: kevin.curtis@farsite.co.uk
930M: kevin.curtis@farsite.co.uk
931W: http://www.farsite.co.uk/ 930W: http://www.farsite.co.uk/
932S: Supported 931S: Supported
933 932
diff --git a/README b/README
index 61c4f7429233..cd5e2eb6213b 100644
--- a/README
+++ b/README
@@ -183,11 +183,8 @@ CONFIGURING the kernel:
183 183
184COMPILING the kernel: 184COMPILING the kernel:
185 185
186 - Make sure you have gcc 2.95.3 available. 186 - Make sure you have at least gcc 3.2 available.
187 gcc 2.91.66 (egcs-1.1.2), and gcc 2.7.2.3 are known to miscompile 187 For more information, refer to Documentation/Changes.
188 some parts of the kernel, and are *no longer supported*.
189 Also remember to upgrade your binutils package (for as/ld/nm and company)
190 if necessary. For more information, refer to Documentation/Changes.
191 188
192 Please note that you can still run a.out user programs with this kernel. 189 Please note that you can still run a.out user programs with this kernel.
193 190
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 153337ff1d7b..eedf41bf7057 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -18,9 +18,6 @@ config MMU
18 bool 18 bool
19 default y 19 default y
20 20
21config UID16
22 bool
23
24config RWSEM_GENERIC_SPINLOCK 21config RWSEM_GENERIC_SPINLOCK
25 bool 22 bool
26 23
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index a8682612abc0..abb739b88ed1 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -43,6 +43,11 @@
43#include "proto.h" 43#include "proto.h"
44#include "pci_impl.h" 44#include "pci_impl.h"
45 45
46/*
47 * Power off function, if any
48 */
49void (*pm_power_off)(void) = machine_power_off;
50
46void 51void
47cpu_idle(void) 52cpu_idle(void)
48{ 53{
diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c
index bbd37536d14e..9969d212e94d 100644
--- a/arch/alpha/kernel/ptrace.c
+++ b/arch/alpha/kernel/ptrace.c
@@ -265,30 +265,16 @@ do_sys_ptrace(long request, long pid, long addr, long data,
265 lock_kernel(); 265 lock_kernel();
266 DBG(DBG_MEM, ("request=%ld pid=%ld addr=0x%lx data=0x%lx\n", 266 DBG(DBG_MEM, ("request=%ld pid=%ld addr=0x%lx data=0x%lx\n",
267 request, pid, addr, data)); 267 request, pid, addr, data));
268 ret = -EPERM;
269 if (request == PTRACE_TRACEME) { 268 if (request == PTRACE_TRACEME) {
270 /* are we already being traced? */ 269 ret = ptrace_traceme();
271 if (current->ptrace & PT_PTRACED)
272 goto out_notsk;
273 ret = security_ptrace(current->parent, current);
274 if (ret)
275 goto out_notsk;
276 /* set the ptrace bit in the process ptrace flags. */
277 current->ptrace |= PT_PTRACED;
278 ret = 0;
279 goto out_notsk; 270 goto out_notsk;
280 } 271 }
281 if (pid == 1) /* you may not mess with init */
282 goto out_notsk;
283 272
284 ret = -ESRCH; 273 child = ptrace_get_task_struct(pid);
285 read_lock(&tasklist_lock); 274 if (IS_ERR(child)) {
286 child = find_task_by_pid(pid); 275 ret = PTR_ERR(child);
287 if (child)
288 get_task_struct(child);
289 read_unlock(&tasklist_lock);
290 if (!child)
291 goto out_notsk; 276 goto out_notsk;
277 }
292 278
293 if (request == PTRACE_ATTACH) { 279 if (request == PTRACE_ATTACH) {
294 ret = ptrace_attach(child); 280 ret = ptrace_attach(child);
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index e149f152e70b..50b9afa8ae6d 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -46,10 +46,6 @@ config MCA
46 <file:Documentation/mca.txt> (and especially the web page given 46 <file:Documentation/mca.txt> (and especially the web page given
47 there) before attempting to build an MCA bus kernel. 47 there) before attempting to build an MCA bus kernel.
48 48
49config UID16
50 bool
51 default y
52
53config RWSEM_GENERIC_SPINLOCK 49config RWSEM_GENERIC_SPINLOCK
54 bool 50 bool
55 default y 51 default y
diff --git a/arch/arm/common/scoop.c b/arch/arm/common/scoop.c
index b6de43e73699..a2dfe0b0f1ec 100644
--- a/arch/arm/common/scoop.c
+++ b/arch/arm/common/scoop.c
@@ -13,6 +13,7 @@
13 13
14#include <linux/device.h> 14#include <linux/device.h>
15#include <linux/string.h> 15#include <linux/string.h>
16#include <linux/slab.h>
16#include <linux/platform_device.h> 17#include <linux/platform_device.h>
17#include <asm/io.h> 18#include <asm/io.h>
18#include <asm/hardware/scoop.h> 19#include <asm/hardware/scoop.h>
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 04d3082a7b94..0abbce8c70bc 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -23,20 +23,15 @@
23#error Sorry, your compiler targets APCS-26 but this kernel requires APCS-32 23#error Sorry, your compiler targets APCS-26 but this kernel requires APCS-32
24#endif 24#endif
25/* 25/*
26 * GCC 2.95.1, 2.95.2: ignores register clobber list in asm().
27 * GCC 3.0, 3.1: general bad code generation. 26 * GCC 3.0, 3.1: general bad code generation.
28 * GCC 3.2.0: incorrect function argument offset calculation. 27 * GCC 3.2.0: incorrect function argument offset calculation.
29 * GCC 3.2.x: miscompiles NEW_AUX_ENT in fs/binfmt_elf.c 28 * GCC 3.2.x: miscompiles NEW_AUX_ENT in fs/binfmt_elf.c
30 * (http://gcc.gnu.org/PR8896) and incorrect structure 29 * (http://gcc.gnu.org/PR8896) and incorrect structure
31 * initialisation in fs/jffs2/erase.c 30 * initialisation in fs/jffs2/erase.c
32 */ 31 */
33#if __GNUC__ < 2 || \ 32#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
34 (__GNUC__ == 2 && __GNUC_MINOR__ < 95) || \
35 (__GNUC__ == 2 && __GNUC_MINOR__ == 95 && __GNUC_PATCHLEVEL__ != 0 && \
36 __GNUC_PATCHLEVEL__ < 3) || \
37 (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
38#error Your compiler is too buggy; it is known to miscompile kernels. 33#error Your compiler is too buggy; it is known to miscompile kernels.
39#error Known good compilers: 2.95.3, 2.95.4, 2.96, 3.3 34#error Known good compilers: 3.3
40#endif 35#endif
41 36
42/* Use marker if you need to separate the values later */ 37/* Use marker if you need to separate the values later */
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 869c466e6258..b5645c4462cf 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -684,8 +684,12 @@ int setup_irq(unsigned int irq, struct irqaction *new)
684 spin_lock_irqsave(&irq_controller_lock, flags); 684 spin_lock_irqsave(&irq_controller_lock, flags);
685 p = &desc->action; 685 p = &desc->action;
686 if ((old = *p) != NULL) { 686 if ((old = *p) != NULL) {
687 /* Can't share interrupts unless both agree to */ 687 /*
688 if (!(old->flags & new->flags & SA_SHIRQ)) { 688 * Can't share interrupts unless both agree to and are
689 * the same type.
690 */
691 if (!(old->flags & new->flags & SA_SHIRQ) ||
692 (~old->flags & new->flags) & SA_TRIGGER_MASK) {
689 spin_unlock_irqrestore(&irq_controller_lock, flags); 693 spin_unlock_irqrestore(&irq_controller_lock, flags);
690 return -EBUSY; 694 return -EBUSY;
691 } 695 }
@@ -705,6 +709,12 @@ int setup_irq(unsigned int irq, struct irqaction *new)
705 desc->running = 0; 709 desc->running = 0;
706 desc->pending = 0; 710 desc->pending = 0;
707 desc->disable_depth = 1; 711 desc->disable_depth = 1;
712
713 if (new->flags & SA_TRIGGER_MASK) {
714 unsigned int type = new->flags & SA_TRIGGER_MASK;
715 desc->chip->set_type(irq, type);
716 }
717
708 if (!desc->noautoenable) { 718 if (!desc->noautoenable) {
709 desc->disable_depth = 0; 719 desc->disable_depth = 0;
710 desc->chip->unmask(irq); 720 desc->chip->unmask(irq);
diff --git a/arch/arm/mach-footbridge/netwinder-hw.c b/arch/arm/mach-footbridge/netwinder-hw.c
index 775f85fc8513..9e563de465b5 100644
--- a/arch/arm/mach-footbridge/netwinder-hw.c
+++ b/arch/arm/mach-footbridge/netwinder-hw.c
@@ -601,6 +601,7 @@ EXPORT_SYMBOL(gpio_lock);
601EXPORT_SYMBOL(gpio_modify_op); 601EXPORT_SYMBOL(gpio_modify_op);
602EXPORT_SYMBOL(gpio_modify_io); 602EXPORT_SYMBOL(gpio_modify_io);
603EXPORT_SYMBOL(cpld_modify); 603EXPORT_SYMBOL(cpld_modify);
604EXPORT_SYMBOL(gpio_read);
604 605
605/* 606/*
606 * Initialise any other hardware after we've got the PCI bus 607 * Initialise any other hardware after we've got the PCI bus
diff --git a/arch/arm/mach-integrator/time.c b/arch/arm/mach-integrator/time.c
index 9f46aaef8968..3c22c16b38bf 100644
--- a/arch/arm/mach-integrator/time.c
+++ b/arch/arm/mach-integrator/time.c
@@ -96,7 +96,8 @@ static struct rtc_ops rtc_ops = {
96 .set_alarm = rtc_set_alarm, 96 .set_alarm = rtc_set_alarm,
97}; 97};
98 98
99static irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) 99static irqreturn_t arm_rtc_interrupt(int irq, void *dev_id,
100 struct pt_regs *regs)
100{ 101{
101 writel(0, rtc_base + RTC_EOI); 102 writel(0, rtc_base + RTC_EOI);
102 return IRQ_HANDLED; 103 return IRQ_HANDLED;
@@ -124,7 +125,7 @@ static int rtc_probe(struct amba_device *dev, void *id)
124 125
125 xtime.tv_sec = __raw_readl(rtc_base + RTC_DR); 126 xtime.tv_sec = __raw_readl(rtc_base + RTC_DR);
126 127
127 ret = request_irq(dev->irq[0], rtc_interrupt, SA_INTERRUPT, 128 ret = request_irq(dev->irq[0], arm_rtc_interrupt, SA_INTERRUPT,
128 "rtc-pl030", dev); 129 "rtc-pl030", dev);
129 if (ret) 130 if (ret)
130 goto map_out; 131 goto map_out;
diff --git a/arch/arm/mach-omap1/serial.c b/arch/arm/mach-omap1/serial.c
index fcfb81d13cfe..7a68f098a025 100644
--- a/arch/arm/mach-omap1/serial.c
+++ b/arch/arm/mach-omap1/serial.c
@@ -252,9 +252,8 @@ static void __init omap_serial_set_port_wakeup(int gpio_nr)
252 return; 252 return;
253 } 253 }
254 omap_set_gpio_direction(gpio_nr, 1); 254 omap_set_gpio_direction(gpio_nr, 1);
255 set_irq_type(OMAP_GPIO_IRQ(gpio_nr), IRQT_RISING);
256 ret = request_irq(OMAP_GPIO_IRQ(gpio_nr), &omap_serial_wake_interrupt, 255 ret = request_irq(OMAP_GPIO_IRQ(gpio_nr), &omap_serial_wake_interrupt,
257 0, "serial wakeup", NULL); 256 SA_TRIGGER_RISING, "serial wakeup", NULL);
258 if (ret) { 257 if (ret) {
259 omap_free_gpio(gpio_nr); 258 omap_free_gpio(gpio_nr);
260 printk(KERN_ERR "No interrupt for UART wake GPIO: %i\n", 259 printk(KERN_ERR "No interrupt for UART wake GPIO: %i\n",
diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c
index 100fb31b5156..5a7b873f29b3 100644
--- a/arch/arm/mach-pxa/corgi.c
+++ b/arch/arm/mach-pxa/corgi.c
@@ -213,15 +213,14 @@ static int corgi_mci_init(struct device *dev, irqreturn_t (*corgi_detect_int)(in
213 213
214 corgi_mci_platform_data.detect_delay = msecs_to_jiffies(250); 214 corgi_mci_platform_data.detect_delay = msecs_to_jiffies(250);
215 215
216 err = request_irq(CORGI_IRQ_GPIO_nSD_DETECT, corgi_detect_int, SA_INTERRUPT, 216 err = request_irq(CORGI_IRQ_GPIO_nSD_DETECT, corgi_detect_int,
217 "MMC card detect", data); 217 SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING,
218 "MMC card detect", data);
218 if (err) { 219 if (err) {
219 printk(KERN_ERR "corgi_mci_init: MMC/SD: can't request MMC card detect IRQ\n"); 220 printk(KERN_ERR "corgi_mci_init: MMC/SD: can't request MMC card detect IRQ\n");
220 return -1; 221 return -1;
221 } 222 }
222 223
223 set_irq_type(CORGI_IRQ_GPIO_nSD_DETECT, IRQT_BOTHEDGE);
224
225 return 0; 224 return 0;
226} 225}
227 226
diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c
index eef3de26ad37..663c95005985 100644
--- a/arch/arm/mach-pxa/poodle.c
+++ b/arch/arm/mach-pxa/poodle.c
@@ -146,15 +146,14 @@ static int poodle_mci_init(struct device *dev, irqreturn_t (*poodle_detect_int)(
146 146
147 poodle_mci_platform_data.detect_delay = msecs_to_jiffies(250); 147 poodle_mci_platform_data.detect_delay = msecs_to_jiffies(250);
148 148
149 err = request_irq(POODLE_IRQ_GPIO_nSD_DETECT, poodle_detect_int, SA_INTERRUPT, 149 err = request_irq(POODLE_IRQ_GPIO_nSD_DETECT, poodle_detect_int,
150 "MMC card detect", data); 150 SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING,
151 "MMC card detect", data);
151 if (err) { 152 if (err) {
152 printk(KERN_ERR "poodle_mci_init: MMC/SD: can't request MMC card detect IRQ\n"); 153 printk(KERN_ERR "poodle_mci_init: MMC/SD: can't request MMC card detect IRQ\n");
153 return -1; 154 return -1;
154 } 155 }
155 156
156 set_irq_type(POODLE_IRQ_GPIO_nSD_DETECT, IRQT_BOTHEDGE);
157
158 return 0; 157 return 0;
159} 158}
160 159
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index f2007db0cda5..a9eacc06555f 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -296,15 +296,14 @@ static int spitz_mci_init(struct device *dev, irqreturn_t (*spitz_detect_int)(in
296 296
297 spitz_mci_platform_data.detect_delay = msecs_to_jiffies(250); 297 spitz_mci_platform_data.detect_delay = msecs_to_jiffies(250);
298 298
299 err = request_irq(SPITZ_IRQ_GPIO_nSD_DETECT, spitz_detect_int, SA_INTERRUPT, 299 err = request_irq(SPITZ_IRQ_GPIO_nSD_DETECT, spitz_detect_int,
300 "MMC card detect", data); 300 SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING,
301 "MMC card detect", data);
301 if (err) { 302 if (err) {
302 printk(KERN_ERR "spitz_mci_init: MMC/SD: can't request MMC card detect IRQ\n"); 303 printk(KERN_ERR "spitz_mci_init: MMC/SD: can't request MMC card detect IRQ\n");
303 return -1; 304 return -1;
304 } 305 }
305 306
306 set_irq_type(SPITZ_IRQ_GPIO_nSD_DETECT, IRQT_BOTHEDGE);
307
308 return 0; 307 return 0;
309} 308}
310 309
diff --git a/arch/arm/mach-realview/localtimer.c b/arch/arm/mach-realview/localtimer.c
index c9d7c596b200..caf6b8bb6c95 100644
--- a/arch/arm/mach-realview/localtimer.c
+++ b/arch/arm/mach-realview/localtimer.c
@@ -13,6 +13,7 @@
13#include <linux/delay.h> 13#include <linux/delay.h>
14#include <linux/device.h> 14#include <linux/device.h>
15#include <linux/smp.h> 15#include <linux/smp.h>
16#include <linux/jiffies.h>
16 17
17#include <asm/mach/time.h> 18#include <asm/mach/time.h>
18#include <asm/hardware/arm_twd.h> 19#include <asm/hardware/arm_twd.h>
diff --git a/arch/arm/mach-s3c2410/usb-simtec.c b/arch/arm/mach-s3c2410/usb-simtec.c
index 5098b50158a3..495f8c6ffcb6 100644
--- a/arch/arm/mach-s3c2410/usb-simtec.c
+++ b/arch/arm/mach-s3c2410/usb-simtec.c
@@ -84,13 +84,13 @@ static void usb_simtec_enableoc(struct s3c2410_hcd_info *info, int on)
84 int ret; 84 int ret;
85 85
86 if (on) { 86 if (on) {
87 ret = request_irq(IRQ_USBOC, usb_simtec_ocirq, SA_INTERRUPT, 87 ret = request_irq(IRQ_USBOC, usb_simtec_ocirq,
88 SA_INTERRUPT | SA_TRIGGER_RISING |
89 SA_TRIGGER_FALLING,
88 "USB Over-current", info); 90 "USB Over-current", info);
89 if (ret != 0) { 91 if (ret != 0) {
90 printk(KERN_ERR "failed to request usb oc irq\n"); 92 printk(KERN_ERR "failed to request usb oc irq\n");
91 } 93 }
92
93 set_irq_type(IRQ_USBOC, IRQT_BOTHEDGE);
94 } else { 94 } else {
95 free_irq(IRQ_USBOC, info); 95 free_irq(IRQ_USBOC, info);
96 } 96 }
diff --git a/arch/arm26/Kconfig b/arch/arm26/Kconfig
index 1f00b3d03a07..274e07019b46 100644
--- a/arch/arm26/Kconfig
+++ b/arch/arm26/Kconfig
@@ -34,10 +34,6 @@ config FORCE_MAX_ZONEORDER
34 int 34 int
35 default 9 35 default 9
36 36
37config UID16
38 bool
39 default y
40
41config RWSEM_GENERIC_SPINLOCK 37config RWSEM_GENERIC_SPINLOCK
42 bool 38 bool
43 default y 39 default y
diff --git a/arch/arm26/kernel/asm-offsets.c b/arch/arm26/kernel/asm-offsets.c
index 4ccacaef94df..ac682d5fd039 100644
--- a/arch/arm26/kernel/asm-offsets.c
+++ b/arch/arm26/kernel/asm-offsets.c
@@ -25,13 +25,6 @@
25#if defined(__APCS_32__) && defined(CONFIG_CPU_26) 25#if defined(__APCS_32__) && defined(CONFIG_CPU_26)
26#error Sorry, your compiler targets APCS-32 but this kernel requires APCS-26 26#error Sorry, your compiler targets APCS-32 but this kernel requires APCS-26
27#endif 27#endif
28#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 95)
29#error Sorry, your compiler is known to miscompile kernels. Only use gcc 2.95.3 and later.
30#endif
31#if __GNUC__ == 2 && __GNUC_MINOR__ == 95
32/* shame we can't detect the .1 or .2 releases */
33#warning GCC 2.95.2 and earlier miscompiles kernels.
34#endif
35 28
36/* Use marker if you need to separate the values later */ 29/* Use marker if you need to separate the values later */
37 30
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index e5979d68e352..b83261949737 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -9,10 +9,6 @@ config MMU
9 bool 9 bool
10 default y 10 default y
11 11
12config UID16
13 bool
14 default y
15
16config RWSEM_GENERIC_SPINLOCK 12config RWSEM_GENERIC_SPINLOCK
17 bool 13 bool
18 default y 14 default y
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index ec85c0d6c6da..61261b78ced7 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -274,6 +274,11 @@ config GPREL_DATA_NONE
274 274
275endchoice 275endchoice
276 276
277config FRV_ONCPU_SERIAL
278 bool "Use on-CPU serial ports"
279 select SERIAL_8250
280 default y
281
277config PCI 282config PCI
278 bool "Use PCI" 283 bool "Use PCI"
279 depends on MB93090_MB00 284 depends on MB93090_MB00
@@ -305,23 +310,7 @@ config RESERVE_DMA_COHERENT
305 310
306source "drivers/pci/Kconfig" 311source "drivers/pci/Kconfig"
307 312
308config PCMCIA 313source "drivers/pcmcia/Kconfig"
309 tristate "Use PCMCIA"
310 help
311 Say Y here if you want to attach PCMCIA- or PC-cards to your FR-V
312 board. These are credit-card size devices such as network cards,
313 modems or hard drives often used with laptops computers. There are
314 actually two varieties of these cards: the older 16 bit PCMCIA cards
315 and the newer 32 bit CardBus cards. If you want to use CardBus
316 cards, you need to say Y here and also to "CardBus support" below.
317
318 To use your PC-cards, you will need supporting software from David
319 Hinds pcmcia-cs package (see the file <file:Documentation/Changes>
320 for location). Please also read the PCMCIA-HOWTO, available from
321 <http://www.tldp.org/docs.html#howto>.
322
323 To compile this driver as modules, choose M here: the
324 modules will be called pcmcia_core and ds.
325 314
326#config MATH_EMULATION 315#config MATH_EMULATION
327# bool "Math emulation support (EXPERIMENTAL)" 316# bool "Math emulation support (EXPERIMENTAL)"
diff --git a/arch/frv/Kconfig.debug b/arch/frv/Kconfig.debug
index 0034b654995d..211f01bc4caa 100644
--- a/arch/frv/Kconfig.debug
+++ b/arch/frv/Kconfig.debug
@@ -2,32 +2,10 @@ menu "Kernel hacking"
2 2
3source "lib/Kconfig.debug" 3source "lib/Kconfig.debug"
4 4
5config EARLY_PRINTK
6 bool "Early printk"
7 depends on EMBEDDED && DEBUG_KERNEL
8 default n
9 help
10 Write kernel log output directly into the VGA buffer or to a serial
11 port.
12
13 This is useful for kernel debugging when your machine crashes very
14 early before the console code is initialized. For normal operation
15 it is not recommended because it looks ugly and doesn't cooperate
16 with klogd/syslogd or the X server. You should normally N here,
17 unless you want to debug such a crash.
18
19config DEBUG_STACKOVERFLOW 5config DEBUG_STACKOVERFLOW
20 bool "Check for stack overflows" 6 bool "Check for stack overflows"
21 depends on DEBUG_KERNEL 7 depends on DEBUG_KERNEL
22 8
23config DEBUG_PAGEALLOC
24 bool "Page alloc debugging"
25 depends on DEBUG_KERNEL
26 help
27 Unmap pages from the kernel linear mapping after free_pages().
28 This results in a large slowdown, but helps to find certain types
29 of memory corruptions.
30
31config GDBSTUB 9config GDBSTUB
32 bool "Remote GDB kernel debugging" 10 bool "Remote GDB kernel debugging"
33 depends on DEBUG_KERNEL 11 depends on DEBUG_KERNEL
diff --git a/arch/frv/Makefile b/arch/frv/Makefile
index 54046d2386f5..90c0fb8d9dc3 100644
--- a/arch/frv/Makefile
+++ b/arch/frv/Makefile
@@ -109,10 +109,10 @@ bootstrap:
109 $(Q)$(MAKEBOOT) bootstrap 109 $(Q)$(MAKEBOOT) bootstrap
110 110
111archmrproper: 111archmrproper:
112 $(Q)$(MAKE) -C arch/frv/boot mrproper 112 $(Q)$(MAKE) $(build)=arch/frv/boot mrproper
113 113
114archclean: 114archclean:
115 $(Q)$(MAKE) -C arch/frv/boot clean 115 $(Q)$(MAKE) $(build)=arch/frv/boot clean
116 116
117archdep: scripts/mkdep symlinks 117archdep: scripts/mkdep symlinks
118 $(Q)$(MAKE) -C arch/frv/boot dep 118 $(Q)$(MAKE) $(build)=arch/frv/boot dep
diff --git a/arch/frv/kernel/Makefile b/arch/frv/kernel/Makefile
index 422f30ede575..5a827b349b5e 100644
--- a/arch/frv/kernel/Makefile
+++ b/arch/frv/kernel/Makefile
@@ -21,3 +21,4 @@ obj-$(CONFIG_PM) += pm.o cmode.o
21obj-$(CONFIG_MB93093_PDK) += pm-mb93093.o 21obj-$(CONFIG_MB93093_PDK) += pm-mb93093.o
22obj-$(CONFIG_SYSCTL) += sysctl.o 22obj-$(CONFIG_SYSCTL) += sysctl.o
23obj-$(CONFIG_FUTEX) += futex.o 23obj-$(CONFIG_FUTEX) += futex.o
24obj-$(CONFIG_MODULES) += module.o
diff --git a/arch/frv/kernel/frv_ksyms.c b/arch/frv/kernel/frv_ksyms.c
index 1a76d5247190..5f118c89d091 100644
--- a/arch/frv/kernel/frv_ksyms.c
+++ b/arch/frv/kernel/frv_ksyms.c
@@ -16,10 +16,11 @@
16#include <asm/semaphore.h> 16#include <asm/semaphore.h>
17#include <asm/checksum.h> 17#include <asm/checksum.h>
18#include <asm/hardirq.h> 18#include <asm/hardirq.h>
19#include <asm/current.h> 19#include <asm/cacheflush.h>
20 20
21extern void dump_thread(struct pt_regs *, struct user *); 21extern void dump_thread(struct pt_regs *, struct user *);
22extern long __memcpy_user(void *dst, const void *src, size_t count); 22extern long __memcpy_user(void *dst, const void *src, size_t count);
23extern long __memset_user(void *dst, const void *src, size_t count);
23 24
24/* platform dependent support */ 25/* platform dependent support */
25 26
@@ -50,7 +51,11 @@ EXPORT_SYMBOL(disable_irq);
50EXPORT_SYMBOL(__res_bus_clock_speed_HZ); 51EXPORT_SYMBOL(__res_bus_clock_speed_HZ);
51EXPORT_SYMBOL(__page_offset); 52EXPORT_SYMBOL(__page_offset);
52EXPORT_SYMBOL(__memcpy_user); 53EXPORT_SYMBOL(__memcpy_user);
53EXPORT_SYMBOL(flush_dcache_page); 54EXPORT_SYMBOL(__memset_user);
55EXPORT_SYMBOL(frv_dcache_writeback);
56EXPORT_SYMBOL(frv_cache_invalidate);
57EXPORT_SYMBOL(frv_icache_invalidate);
58EXPORT_SYMBOL(frv_cache_wback_inv);
54 59
55#ifndef CONFIG_MMU 60#ifndef CONFIG_MMU
56EXPORT_SYMBOL(memory_start); 61EXPORT_SYMBOL(memory_start);
@@ -72,6 +77,9 @@ EXPORT_SYMBOL(memcmp);
72EXPORT_SYMBOL(memscan); 77EXPORT_SYMBOL(memscan);
73EXPORT_SYMBOL(memmove); 78EXPORT_SYMBOL(memmove);
74 79
80EXPORT_SYMBOL(__outsl_ns);
81EXPORT_SYMBOL(__insl_ns);
82
75EXPORT_SYMBOL(get_wchan); 83EXPORT_SYMBOL(get_wchan);
76 84
77#ifdef CONFIG_FRV_OUTOFLINE_ATOMIC_OPS 85#ifdef CONFIG_FRV_OUTOFLINE_ATOMIC_OPS
@@ -80,14 +88,13 @@ EXPORT_SYMBOL(atomic_test_and_OR_mask);
80EXPORT_SYMBOL(atomic_test_and_XOR_mask); 88EXPORT_SYMBOL(atomic_test_and_XOR_mask);
81EXPORT_SYMBOL(atomic_add_return); 89EXPORT_SYMBOL(atomic_add_return);
82EXPORT_SYMBOL(atomic_sub_return); 90EXPORT_SYMBOL(atomic_sub_return);
83EXPORT_SYMBOL(__xchg_8);
84EXPORT_SYMBOL(__xchg_16);
85EXPORT_SYMBOL(__xchg_32); 91EXPORT_SYMBOL(__xchg_32);
86EXPORT_SYMBOL(__cmpxchg_8);
87EXPORT_SYMBOL(__cmpxchg_16);
88EXPORT_SYMBOL(__cmpxchg_32); 92EXPORT_SYMBOL(__cmpxchg_32);
89#endif 93#endif
90 94
95EXPORT_SYMBOL(__debug_bug_printk);
96EXPORT_SYMBOL(__delay_loops_MHz);
97
91/* 98/*
92 * libgcc functions - functions that are used internally by the 99 * libgcc functions - functions that are used internally by the
93 * compiler... (prototypes are not correct though, but that 100 * compiler... (prototypes are not correct though, but that
@@ -101,6 +108,8 @@ extern void __divdi3(void);
101extern void __lshrdi3(void); 108extern void __lshrdi3(void);
102extern void __moddi3(void); 109extern void __moddi3(void);
103extern void __muldi3(void); 110extern void __muldi3(void);
111extern void __mulll(void);
112extern void __umulll(void);
104extern void __negdi2(void); 113extern void __negdi2(void);
105extern void __ucmpdi2(void); 114extern void __ucmpdi2(void);
106extern void __udivdi3(void); 115extern void __udivdi3(void);
@@ -116,8 +125,10 @@ EXPORT_SYMBOL(__ashrdi3);
116EXPORT_SYMBOL(__lshrdi3); 125EXPORT_SYMBOL(__lshrdi3);
117//EXPORT_SYMBOL(__moddi3); 126//EXPORT_SYMBOL(__moddi3);
118EXPORT_SYMBOL(__muldi3); 127EXPORT_SYMBOL(__muldi3);
128EXPORT_SYMBOL(__mulll);
129EXPORT_SYMBOL(__umulll);
119EXPORT_SYMBOL(__negdi2); 130EXPORT_SYMBOL(__negdi2);
120//EXPORT_SYMBOL(__ucmpdi2); 131EXPORT_SYMBOL(__ucmpdi2);
121//EXPORT_SYMBOL(__udivdi3); 132//EXPORT_SYMBOL(__udivdi3);
122//EXPORT_SYMBOL(__udivmoddi4); 133//EXPORT_SYMBOL(__udivmoddi4);
123//EXPORT_SYMBOL(__umoddi3); 134//EXPORT_SYMBOL(__umoddi3);
diff --git a/arch/frv/kernel/irq.c b/arch/frv/kernel/irq.c
index 8c524cdd2717..59580c59c62c 100644
--- a/arch/frv/kernel/irq.c
+++ b/arch/frv/kernel/irq.c
@@ -32,6 +32,7 @@
32#include <linux/irq.h> 32#include <linux/irq.h>
33#include <linux/proc_fs.h> 33#include <linux/proc_fs.h>
34#include <linux/seq_file.h> 34#include <linux/seq_file.h>
35#include <linux/module.h>
35 36
36#include <asm/atomic.h> 37#include <asm/atomic.h>
37#include <asm/io.h> 38#include <asm/io.h>
@@ -178,6 +179,8 @@ void disable_irq_nosync(unsigned int irq)
178 spin_unlock_irqrestore(&level->lock, flags); 179 spin_unlock_irqrestore(&level->lock, flags);
179} 180}
180 181
182EXPORT_SYMBOL(disable_irq_nosync);
183
181/** 184/**
182 * disable_irq - disable an irq and wait for completion 185 * disable_irq - disable an irq and wait for completion
183 * @irq: Interrupt to disable 186 * @irq: Interrupt to disable
@@ -204,6 +207,8 @@ void disable_irq(unsigned int irq)
204#endif 207#endif
205} 208}
206 209
210EXPORT_SYMBOL(disable_irq);
211
207/** 212/**
208 * enable_irq - enable handling of an irq 213 * enable_irq - enable handling of an irq
209 * @irq: Interrupt to enable 214 * @irq: Interrupt to enable
@@ -268,6 +273,8 @@ void enable_irq(unsigned int irq)
268 spin_unlock_irqrestore(&level->lock, flags); 273 spin_unlock_irqrestore(&level->lock, flags);
269} 274}
270 275
276EXPORT_SYMBOL(enable_irq);
277
271/*****************************************************************************/ 278/*****************************************************************************/
272/* 279/*
273 * handles all normal device IRQ's 280 * handles all normal device IRQ's
@@ -425,6 +432,8 @@ int request_irq(unsigned int irq,
425 return retval; 432 return retval;
426} 433}
427 434
435EXPORT_SYMBOL(request_irq);
436
428/** 437/**
429 * free_irq - free an interrupt 438 * free_irq - free an interrupt
430 * @irq: Interrupt line to free 439 * @irq: Interrupt line to free
@@ -496,6 +505,8 @@ void free_irq(unsigned int irq, void *dev_id)
496 } 505 }
497} 506}
498 507
508EXPORT_SYMBOL(free_irq);
509
499/* 510/*
500 * IRQ autodetection code.. 511 * IRQ autodetection code..
501 * 512 *
@@ -519,6 +530,8 @@ unsigned long probe_irq_on(void)
519 return 0; 530 return 0;
520} 531}
521 532
533EXPORT_SYMBOL(probe_irq_on);
534
522/* 535/*
523 * Return a mask of triggered interrupts (this 536 * Return a mask of triggered interrupts (this
524 * can handle only legacy ISA interrupts). 537 * can handle only legacy ISA interrupts).
@@ -542,6 +555,8 @@ unsigned int probe_irq_mask(unsigned long xmask)
542 return 0; 555 return 0;
543} 556}
544 557
558EXPORT_SYMBOL(probe_irq_mask);
559
545/* 560/*
546 * Return the one interrupt that triggered (this can 561 * Return the one interrupt that triggered (this can
547 * handle any interrupt source). 562 * handle any interrupt source).
@@ -571,6 +586,8 @@ int probe_irq_off(unsigned long xmask)
571 return -1; 586 return -1;
572} 587}
573 588
589EXPORT_SYMBOL(probe_irq_off);
590
574/* this was setup_x86_irq but it seems pretty generic */ 591/* this was setup_x86_irq but it seems pretty generic */
575int setup_irq(unsigned int irq, struct irqaction *new) 592int setup_irq(unsigned int irq, struct irqaction *new)
576{ 593{
diff --git a/arch/frv/kernel/module.c b/arch/frv/kernel/module.c
new file mode 100644
index 000000000000..850d168f69fc
--- /dev/null
+++ b/arch/frv/kernel/module.c
@@ -0,0 +1,80 @@
1/* module.c: FRV specific module loading bits
2 *
3 * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 * - Derived from arch/i386/kernel/module.c, Copyright (C) 2001 Rusty Russell.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12#include <linux/moduleloader.h>
13#include <linux/elf.h>
14#include <linux/vmalloc.h>
15#include <linux/fs.h>
16#include <linux/string.h>
17#include <linux/kernel.h>
18
19#if 0
20#define DEBUGP printk
21#else
22#define DEBUGP(fmt...)
23#endif
24
25void *module_alloc(unsigned long size)
26{
27 if (size == 0)
28 return NULL;
29
30 return vmalloc_exec(size);
31}
32
33
34/* Free memory returned from module_alloc */
35void module_free(struct module *mod, void *module_region)
36{
37 vfree(module_region);
38 /* FIXME: If module_region == mod->init_region, trim exception
39 table entries. */
40}
41
42/* We don't need anything special. */
43int module_frob_arch_sections(Elf_Ehdr *hdr,
44 Elf_Shdr *sechdrs,
45 char *secstrings,
46 struct module *mod)
47{
48 return 0;
49}
50
51int apply_relocate(Elf32_Shdr *sechdrs,
52 const char *strtab,
53 unsigned int symindex,
54 unsigned int relsec,
55 struct module *me)
56{
57 printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n", me->name);
58 return -ENOEXEC;
59}
60
61int apply_relocate_add(Elf32_Shdr *sechdrs,
62 const char *strtab,
63 unsigned int symindex,
64 unsigned int relsec,
65 struct module *me)
66{
67 printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n", me->name);
68 return -ENOEXEC;
69}
70
71int module_finalize(const Elf_Ehdr *hdr,
72 const Elf_Shdr *sechdrs,
73 struct module *me)
74{
75 return 0;
76}
77
78void module_arch_cleanup(struct module *mod)
79{
80}
diff --git a/arch/frv/kernel/pm.c b/arch/frv/kernel/pm.c
index 712c3c24c954..f0b8fff3e733 100644
--- a/arch/frv/kernel/pm.c
+++ b/arch/frv/kernel/pm.c
@@ -13,6 +13,7 @@
13 13
14#include <linux/config.h> 14#include <linux/config.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/module.h>
16#include <linux/pm.h> 17#include <linux/pm.h>
17#include <linux/pm_legacy.h> 18#include <linux/pm_legacy.h>
18#include <linux/sched.h> 19#include <linux/sched.h>
@@ -27,6 +28,7 @@
27#include "local.h" 28#include "local.h"
28 29
29void (*pm_power_off)(void); 30void (*pm_power_off)(void);
31EXPORT_SYMBOL(pm_power_off);
30 32
31extern void frv_change_cmode(int); 33extern void frv_change_cmode(int);
32 34
diff --git a/arch/frv/kernel/setup.c b/arch/frv/kernel/setup.c
index 767ebb55bd83..5908deae9607 100644
--- a/arch/frv/kernel/setup.c
+++ b/arch/frv/kernel/setup.c
@@ -787,6 +787,7 @@ void __init setup_arch(char **cmdline_p)
787#endif 787#endif
788 788
789 /* register those serial ports that are available */ 789 /* register those serial ports that are available */
790#ifdef CONFIG_FRV_ONCPU_SERIAL
790#ifndef CONFIG_GDBSTUB_UART0 791#ifndef CONFIG_GDBSTUB_UART0
791 __reg(UART0_BASE + UART_IER * 8) = 0; 792 __reg(UART0_BASE + UART_IER * 8) = 0;
792 early_serial_setup(&__frv_uart0); 793 early_serial_setup(&__frv_uart0);
@@ -795,6 +796,7 @@ void __init setup_arch(char **cmdline_p)
795 __reg(UART1_BASE + UART_IER * 8) = 0; 796 __reg(UART1_BASE + UART_IER * 8) = 0;
796 early_serial_setup(&__frv_uart1); 797 early_serial_setup(&__frv_uart1);
797#endif 798#endif
799#endif
798 800
799#if defined(CONFIG_CHR_DEV_FLASH) || defined(CONFIG_BLK_DEV_FLASH) 801#if defined(CONFIG_CHR_DEV_FLASH) || defined(CONFIG_BLK_DEV_FLASH)
800 /* we need to initialize the Flashrom device here since we might 802 /* we need to initialize the Flashrom device here since we might
diff --git a/arch/frv/kernel/time.c b/arch/frv/kernel/time.c
index 2e9741227b73..24cf85f89e40 100644
--- a/arch/frv/kernel/time.c
+++ b/arch/frv/kernel/time.c
@@ -189,6 +189,8 @@ void do_gettimeofday(struct timeval *tv)
189 tv->tv_usec = usec; 189 tv->tv_usec = usec;
190} 190}
191 191
192EXPORT_SYMBOL(do_gettimeofday);
193
192int do_settimeofday(struct timespec *tv) 194int do_settimeofday(struct timespec *tv)
193{ 195{
194 time_t wtm_sec, sec = tv->tv_sec; 196 time_t wtm_sec, sec = tv->tv_sec;
@@ -218,6 +220,7 @@ int do_settimeofday(struct timespec *tv)
218 clock_was_set(); 220 clock_was_set();
219 return 0; 221 return 0;
220} 222}
223
221EXPORT_SYMBOL(do_settimeofday); 224EXPORT_SYMBOL(do_settimeofday);
222 225
223/* 226/*
diff --git a/arch/frv/kernel/traps.c b/arch/frv/kernel/traps.c
index 89073cae4b5d..9eb84b2e6abc 100644
--- a/arch/frv/kernel/traps.c
+++ b/arch/frv/kernel/traps.c
@@ -19,6 +19,7 @@
19#include <linux/string.h> 19#include <linux/string.h>
20#include <linux/linkage.h> 20#include <linux/linkage.h>
21#include <linux/init.h> 21#include <linux/init.h>
22#include <linux/module.h>
22 23
23#include <asm/setup.h> 24#include <asm/setup.h>
24#include <asm/fpu.h> 25#include <asm/fpu.h>
@@ -250,6 +251,8 @@ void dump_stack(void)
250 show_stack(NULL, NULL); 251 show_stack(NULL, NULL);
251} 252}
252 253
254EXPORT_SYMBOL(dump_stack);
255
253void show_stack(struct task_struct *task, unsigned long *sp) 256void show_stack(struct task_struct *task, unsigned long *sp)
254{ 257{
255} 258}
diff --git a/arch/frv/kernel/uaccess.c b/arch/frv/kernel/uaccess.c
index f3fd58a5bc4a..9b751c0f0e84 100644
--- a/arch/frv/kernel/uaccess.c
+++ b/arch/frv/kernel/uaccess.c
@@ -10,6 +10,7 @@
10 */ 10 */
11 11
12#include <linux/mm.h> 12#include <linux/mm.h>
13#include <linux/module.h>
13#include <asm/uaccess.h> 14#include <asm/uaccess.h>
14 15
15/*****************************************************************************/ 16/*****************************************************************************/
@@ -58,8 +59,11 @@ long strncpy_from_user(char *dst, const char *src, long count)
58 memset(p, 0, count); /* clear remainder of buffer [security] */ 59 memset(p, 0, count); /* clear remainder of buffer [security] */
59 60
60 return err; 61 return err;
62
61} /* end strncpy_from_user() */ 63} /* end strncpy_from_user() */
62 64
65EXPORT_SYMBOL(strncpy_from_user);
66
63/*****************************************************************************/ 67/*****************************************************************************/
64/* 68/*
65 * Return the size of a string (including the ending 0) 69 * Return the size of a string (including the ending 0)
@@ -92,4 +96,7 @@ long strnlen_user(const char *src, long count)
92 } 96 }
93 97
94 return p - src + 1; /* return length including NUL */ 98 return p - src + 1; /* return length including NUL */
99
95} /* end strnlen_user() */ 100} /* end strnlen_user() */
101
102EXPORT_SYMBOL(strnlen_user);
diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S
index fceafd2cc202..f474534ba78a 100644
--- a/arch/frv/kernel/vmlinux.lds.S
+++ b/arch/frv/kernel/vmlinux.lds.S
@@ -112,6 +112,7 @@ SECTIONS
112#endif 112#endif
113 ) 113 )
114 SCHED_TEXT 114 SCHED_TEXT
115 LOCK_TEXT
115 *(.fixup) 116 *(.fixup)
116 *(.gnu.warning) 117 *(.gnu.warning)
117 *(.exitcall.exit) 118 *(.exitcall.exit)
diff --git a/arch/frv/lib/Makefile b/arch/frv/lib/Makefile
index 19be2626d5e6..08be305c9f44 100644
--- a/arch/frv/lib/Makefile
+++ b/arch/frv/lib/Makefile
@@ -3,6 +3,6 @@
3# 3#
4 4
5lib-y := \ 5lib-y := \
6 __ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o \ 6 __ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \
7 checksum.o memcpy.o memset.o atomic-ops.o \ 7 checksum.o memcpy.o memset.o atomic-ops.o \
8 outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o 8 outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o
diff --git a/arch/frv/lib/__ucmpdi2.S b/arch/frv/lib/__ucmpdi2.S
new file mode 100644
index 000000000000..d892f16ffaa9
--- /dev/null
+++ b/arch/frv/lib/__ucmpdi2.S
@@ -0,0 +1,45 @@
1/* __ucmpdi2.S: 64-bit unsigned compare
2 *
3 * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12
13 .text
14 .p2align 4
15
16###############################################################################
17#
18# int __ucmpdi2(unsigned long long a [GR8:GR9],
19# unsigned long long b [GR10:GR11])
20#
21# - returns 0, 1, or 2 as a <, =, > b respectively.
22#
23###############################################################################
24 .globl __ucmpdi2
25 .type __ucmpdi2,@function
26__ucmpdi2:
27 or.p gr8,gr0,gr4
28 subcc gr8,gr10,gr0,icc0
29 setlos.p #0,gr8
30 bclr icc0,#2 ; a.msw < b.msw
31
32 setlos.p #2,gr8
33 bhilr icc0,#0 ; a.msw > b.msw
34
35 subcc.p gr9,gr11,gr0,icc1
36 setlos #0,gr8
37 setlos.p #2,gr9
38 setlos #1,gr7
39 cknc icc1,cc6
40 cor.p gr9,gr0,gr8, cc6,#1
41 cckls icc1,cc4, cc6,#1
42 andcr cc6,cc4,cc4
43 cor gr7,gr0,gr8, cc4,#1
44 bralr
45 .size __ucmpdi2, .-__ucmpdi2
diff --git a/arch/frv/lib/atomic-ops.S b/arch/frv/lib/atomic-ops.S
index b03d510a89e4..545cd325ac57 100644
--- a/arch/frv/lib/atomic-ops.S
+++ b/arch/frv/lib/atomic-ops.S
@@ -129,48 +129,6 @@ atomic_sub_return:
129 129
130############################################################################### 130###############################################################################
131# 131#
132# uint8_t __xchg_8(uint8_t i, uint8_t *v)
133#
134###############################################################################
135 .globl __xchg_8
136 .type __xchg_8,@function
137__xchg_8:
138 or.p gr8,gr8,gr10
1390:
140 orcc gr0,gr0,gr0,icc3 /* set ICC3.Z */
141 ckeq icc3,cc7
142 ldub.p @(gr9,gr0),gr8 /* LD.P/ORCR must be atomic */
143 orcr cc7,cc7,cc3 /* set CC3 to true */
144 cstb.p gr10,@(gr9,gr0) ,cc3,#1
145 corcc gr29,gr29,gr0 ,cc3,#1 /* clear ICC3.Z if store happens */
146 beq icc3,#0,0b
147 bralr
148
149 .size __xchg_8, .-__xchg_8
150
151###############################################################################
152#
153# uint16_t __xchg_16(uint16_t i, uint16_t *v)
154#
155###############################################################################
156 .globl __xchg_16
157 .type __xchg_16,@function
158__xchg_16:
159 or.p gr8,gr8,gr10
1600:
161 orcc gr0,gr0,gr0,icc3 /* set ICC3.Z */
162 ckeq icc3,cc7
163 lduh.p @(gr9,gr0),gr8 /* LD.P/ORCR must be atomic */
164 orcr cc7,cc7,cc3 /* set CC3 to true */
165 csth.p gr10,@(gr9,gr0) ,cc3,#1
166 corcc gr29,gr29,gr0 ,cc3,#1 /* clear ICC3.Z if store happens */
167 beq icc3,#0,0b
168 bralr
169
170 .size __xchg_16, .-__xchg_16
171
172###############################################################################
173#
174# uint32_t __xchg_32(uint32_t i, uint32_t *v) 132# uint32_t __xchg_32(uint32_t i, uint32_t *v)
175# 133#
176############################################################################### 134###############################################################################
@@ -192,56 +150,6 @@ __xchg_32:
192 150
193############################################################################### 151###############################################################################
194# 152#
195# uint8_t __cmpxchg_8(uint8_t *v, uint8_t test, uint8_t new)
196#
197###############################################################################
198 .globl __cmpxchg_8
199 .type __cmpxchg_8,@function
200__cmpxchg_8:
201 or.p gr8,gr8,gr11
2020:
203 orcc gr0,gr0,gr0,icc3
204 ckeq icc3,cc7
205 ldub.p @(gr11,gr0),gr8
206 orcr cc7,cc7,cc3
207 sub gr8,gr9,gr7
208 sllicc gr7,#24,gr0,icc0
209 bne icc0,#0,1f
210 cstb.p gr10,@(gr11,gr0) ,cc3,#1
211 corcc gr29,gr29,gr0 ,cc3,#1
212 beq icc3,#0,0b
2131:
214 bralr
215
216 .size __cmpxchg_8, .-__cmpxchg_8
217
218###############################################################################
219#
220# uint16_t __cmpxchg_16(uint16_t *v, uint16_t test, uint16_t new)
221#
222###############################################################################
223 .globl __cmpxchg_16
224 .type __cmpxchg_16,@function
225__cmpxchg_16:
226 or.p gr8,gr8,gr11
2270:
228 orcc gr0,gr0,gr0,icc3
229 ckeq icc3,cc7
230 lduh.p @(gr11,gr0),gr8
231 orcr cc7,cc7,cc3
232 sub gr8,gr9,gr7
233 sllicc gr7,#16,gr0,icc0
234 bne icc0,#0,1f
235 csth.p gr10,@(gr11,gr0) ,cc3,#1
236 corcc gr29,gr29,gr0 ,cc3,#1
237 beq icc3,#0,0b
2381:
239 bralr
240
241 .size __cmpxchg_16, .-__cmpxchg_16
242
243###############################################################################
244#
245# uint32_t __cmpxchg_32(uint32_t *v, uint32_t test, uint32_t new) 153# uint32_t __cmpxchg_32(uint32_t *v, uint32_t test, uint32_t new)
246# 154#
247############################################################################### 155###############################################################################
diff --git a/arch/frv/lib/checksum.c b/arch/frv/lib/checksum.c
index 7bf5bd6cac8a..20e7dfc474ef 100644
--- a/arch/frv/lib/checksum.c
+++ b/arch/frv/lib/checksum.c
@@ -33,6 +33,7 @@
33 33
34#include <net/checksum.h> 34#include <net/checksum.h>
35#include <asm/checksum.h> 35#include <asm/checksum.h>
36#include <linux/module.h>
36 37
37static inline unsigned short from32to16(unsigned long x) 38static inline unsigned short from32to16(unsigned long x)
38{ 39{
@@ -115,34 +116,52 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
115 return result; 116 return result;
116} 117}
117 118
119EXPORT_SYMBOL(csum_partial);
120
118/* 121/*
119 * this routine is used for miscellaneous IP-like checksums, mainly 122 * this routine is used for miscellaneous IP-like checksums, mainly
120 * in icmp.c 123 * in icmp.c
121 */ 124 */
122unsigned short ip_compute_csum(const unsigned char * buff, int len) 125unsigned short ip_compute_csum(const unsigned char * buff, int len)
123{ 126{
124 return ~do_csum(buff,len); 127 return ~do_csum(buff, len);
125} 128}
126 129
130EXPORT_SYMBOL(ip_compute_csum);
131
127/* 132/*
128 * copy from fs while checksumming, otherwise like csum_partial 133 * copy from fs while checksumming, otherwise like csum_partial
129 */ 134 */
130
131unsigned int 135unsigned int
132csum_partial_copy_from_user(const char *src, char *dst, int len, int sum, int *csum_err) 136csum_partial_copy_from_user(const char __user *src, char *dst,
137 int len, int sum, int *csum_err)
133{ 138{
134 if (csum_err) *csum_err = 0; 139 int rem;
135 memcpy(dst, src, len); 140
141 if (csum_err)
142 *csum_err = 0;
143
144 rem = copy_from_user(dst, src, len);
145 if (rem != 0) {
146 if (csum_err)
147 *csum_err = -EFAULT;
148 memset(dst + len - rem, 0, rem);
149 len = rem;
150 }
151
136 return csum_partial(dst, len, sum); 152 return csum_partial(dst, len, sum);
137} 153}
138 154
155EXPORT_SYMBOL(csum_partial_copy_from_user);
156
139/* 157/*
140 * copy from ds while checksumming, otherwise like csum_partial 158 * copy from ds while checksumming, otherwise like csum_partial
141 */ 159 */
142
143unsigned int 160unsigned int
144csum_partial_copy(const char *src, char *dst, int len, int sum) 161csum_partial_copy(const char *src, char *dst, int len, int sum)
145{ 162{
146 memcpy(dst, src, len); 163 memcpy(dst, src, len);
147 return csum_partial(dst, len, sum); 164 return csum_partial(dst, len, sum);
148} 165}
166
167EXPORT_SYMBOL(csum_partial_copy);
diff --git a/arch/frv/mb93090-mb00/Makefile b/arch/frv/mb93090-mb00/Makefile
index 3faf0f8cf9b5..76595e870733 100644
--- a/arch/frv/mb93090-mb00/Makefile
+++ b/arch/frv/mb93090-mb00/Makefile
@@ -3,7 +3,7 @@
3# 3#
4 4
5ifeq "$(CONFIG_PCI)" "y" 5ifeq "$(CONFIG_PCI)" "y"
6obj-y := pci-frv.o pci-irq.o pci-vdk.o 6obj-y := pci-frv.o pci-irq.o pci-vdk.o pci-iomap.o
7 7
8ifeq "$(CONFIG_MMU)" "y" 8ifeq "$(CONFIG_MMU)" "y"
9obj-y += pci-dma.o 9obj-y += pci-dma.o
diff --git a/arch/frv/mb93090-mb00/pci-dma-nommu.c b/arch/frv/mb93090-mb00/pci-dma-nommu.c
index 2082a9647f4f..4985466b1a7c 100644
--- a/arch/frv/mb93090-mb00/pci-dma-nommu.c
+++ b/arch/frv/mb93090-mb00/pci-dma-nommu.c
@@ -83,6 +83,8 @@ void *dma_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_hand
83 return NULL; 83 return NULL;
84} 84}
85 85
86EXPORT_SYMBOL(dma_alloc_coherent);
87
86void dma_free_coherent(struct device *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle) 88void dma_free_coherent(struct device *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle)
87{ 89{
88 struct dma_alloc_record *rec; 90 struct dma_alloc_record *rec;
@@ -102,6 +104,8 @@ void dma_free_coherent(struct device *hwdev, size_t size, void *vaddr, dma_addr_
102 BUG(); 104 BUG();
103} 105}
104 106
107EXPORT_SYMBOL(dma_free_coherent);
108
105/* 109/*
106 * Map a single buffer of the indicated size for DMA in streaming mode. 110 * Map a single buffer of the indicated size for DMA in streaming mode.
107 * The 32-bit bus address to use is returned. 111 * The 32-bit bus address to use is returned.
@@ -120,6 +124,8 @@ dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
120 return virt_to_bus(ptr); 124 return virt_to_bus(ptr);
121} 125}
122 126
127EXPORT_SYMBOL(dma_map_single);
128
123/* 129/*
124 * Map a set of buffers described by scatterlist in streaming 130 * Map a set of buffers described by scatterlist in streaming
125 * mode for DMA. This is the scather-gather version of the 131 * mode for DMA. This is the scather-gather version of the
@@ -150,3 +156,5 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
150 156
151 return nents; 157 return nents;
152} 158}
159
160EXPORT_SYMBOL(dma_map_sg);
diff --git a/arch/frv/mb93090-mb00/pci-dma.c b/arch/frv/mb93090-mb00/pci-dma.c
index 86fbdadc51b6..671ce1e8434f 100644
--- a/arch/frv/mb93090-mb00/pci-dma.c
+++ b/arch/frv/mb93090-mb00/pci-dma.c
@@ -28,11 +28,15 @@ void *dma_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_hand
28 return ret; 28 return ret;
29} 29}
30 30
31EXPORT_SYMBOL(dma_alloc_coherent);
32
31void dma_free_coherent(struct device *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle) 33void dma_free_coherent(struct device *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle)
32{ 34{
33 consistent_free(vaddr); 35 consistent_free(vaddr);
34} 36}
35 37
38EXPORT_SYMBOL(dma_free_coherent);
39
36/* 40/*
37 * Map a single buffer of the indicated size for DMA in streaming mode. 41 * Map a single buffer of the indicated size for DMA in streaming mode.
38 * The 32-bit bus address to use is returned. 42 * The 32-bit bus address to use is returned.
@@ -51,6 +55,8 @@ dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
51 return virt_to_bus(ptr); 55 return virt_to_bus(ptr);
52} 56}
53 57
58EXPORT_SYMBOL(dma_map_single);
59
54/* 60/*
55 * Map a set of buffers described by scatterlist in streaming 61 * Map a set of buffers described by scatterlist in streaming
56 * mode for DMA. This is the scather-gather version of the 62 * mode for DMA. This is the scather-gather version of the
@@ -96,6 +102,8 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
96 return nents; 102 return nents;
97} 103}
98 104
105EXPORT_SYMBOL(dma_map_sg);
106
99dma_addr_t dma_map_page(struct device *dev, struct page *page, unsigned long offset, 107dma_addr_t dma_map_page(struct device *dev, struct page *page, unsigned long offset,
100 size_t size, enum dma_data_direction direction) 108 size_t size, enum dma_data_direction direction)
101{ 109{
@@ -103,3 +111,5 @@ dma_addr_t dma_map_page(struct device *dev, struct page *page, unsigned long off
103 flush_dcache_page(page); 111 flush_dcache_page(page);
104 return (dma_addr_t) page_to_phys(page) + offset; 112 return (dma_addr_t) page_to_phys(page) + offset;
105} 113}
114
115EXPORT_SYMBOL(dma_map_page);
diff --git a/arch/frv/mb93090-mb00/pci-iomap.c b/arch/frv/mb93090-mb00/pci-iomap.c
new file mode 100644
index 000000000000..068fa04bd527
--- /dev/null
+++ b/arch/frv/mb93090-mb00/pci-iomap.c
@@ -0,0 +1,29 @@
1/* pci-iomap.c: description
2 *
3 * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/pci.h>
12#include <linux/module.h>
13
14void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
15{
16 unsigned long start = pci_resource_start(dev, bar);
17 unsigned long len = pci_resource_len(dev, bar);
18 unsigned long flags = pci_resource_flags(dev, bar);
19
20 if (!len || !start)
21 return NULL;
22
23 if ((flags & IORESOURCE_IO) || (flags & IORESOURCE_MEM))
24 return (void __iomem *) start;
25
26 return NULL;
27}
28
29EXPORT_SYMBOL(pci_iomap);
diff --git a/arch/frv/mm/cache-page.c b/arch/frv/mm/cache-page.c
index 683b5e344318..0261cbe153b5 100644
--- a/arch/frv/mm/cache-page.c
+++ b/arch/frv/mm/cache-page.c
@@ -11,6 +11,7 @@
11#include <linux/sched.h> 11#include <linux/sched.h>
12#include <linux/mm.h> 12#include <linux/mm.h>
13#include <linux/highmem.h> 13#include <linux/highmem.h>
14#include <linux/module.h>
14#include <asm/pgalloc.h> 15#include <asm/pgalloc.h>
15 16
16/*****************************************************************************/ 17/*****************************************************************************/
@@ -38,6 +39,8 @@ void flush_dcache_page(struct page *page)
38 39
39} /* end flush_dcache_page() */ 40} /* end flush_dcache_page() */
40 41
42EXPORT_SYMBOL(flush_dcache_page);
43
41/*****************************************************************************/ 44/*****************************************************************************/
42/* 45/*
43 * ICI takes a virtual address and the page may not currently have one 46 * ICI takes a virtual address and the page may not currently have one
@@ -64,3 +67,5 @@ void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
64 } 67 }
65 68
66} /* end flush_icache_user_range() */ 69} /* end flush_icache_user_range() */
70
71EXPORT_SYMBOL(flush_icache_user_range);
diff --git a/arch/frv/mm/extable.c b/arch/frv/mm/extable.c
index 41be1128dc64..caacf030ac75 100644
--- a/arch/frv/mm/extable.c
+++ b/arch/frv/mm/extable.c
@@ -43,7 +43,7 @@ static inline unsigned long search_one_table(const struct exception_table_entry
43 */ 43 */
44unsigned long search_exception_table(unsigned long pc) 44unsigned long search_exception_table(unsigned long pc)
45{ 45{
46 unsigned long ret = 0; 46 const struct exception_table_entry *extab;
47 47
48 /* determine if the fault lay during a memcpy_user or a memset_user */ 48 /* determine if the fault lay during a memcpy_user or a memset_user */
49 if (__frame->lr == (unsigned long) &__memset_user_error_lr && 49 if (__frame->lr == (unsigned long) &__memset_user_error_lr &&
@@ -55,9 +55,10 @@ unsigned long search_exception_table(unsigned long pc)
55 */ 55 */
56 return (unsigned long) &__memset_user_error_handler; 56 return (unsigned long) &__memset_user_error_handler;
57 } 57 }
58 else if (__frame->lr == (unsigned long) &__memcpy_user_error_lr && 58
59 (unsigned long) &memcpy <= pc && pc < (unsigned long) &__memcpy_end 59 if (__frame->lr == (unsigned long) &__memcpy_user_error_lr &&
60 ) { 60 (unsigned long) &memcpy <= pc && pc < (unsigned long) &__memcpy_end
61 ) {
61 /* the fault occurred in a protected memset 62 /* the fault occurred in a protected memset
62 * - we search for the return address (in LR) instead of the program counter 63 * - we search for the return address (in LR) instead of the program counter
63 * - it was probably during a copy_to/from_user() 64 * - it was probably during a copy_to/from_user()
@@ -65,27 +66,10 @@ unsigned long search_exception_table(unsigned long pc)
65 return (unsigned long) &__memcpy_user_error_handler; 66 return (unsigned long) &__memcpy_user_error_handler;
66 } 67 }
67 68
68#ifndef CONFIG_MODULES 69 extab = search_exception_tables(pc);
69 /* there is only the kernel to search. */ 70 if (extab)
70 ret = search_one_table(__start___ex_table, __stop___ex_table - 1, pc); 71 return extab->fixup;
71 return ret;
72
73#else
74 /* the kernel is the last "module" -- no need to treat it special */
75 unsigned long flags;
76 struct module *mp;
77 72
78 spin_lock_irqsave(&modlist_lock, flags); 73 return 0;
79
80 for (mp = module_list; mp != NULL; mp = mp->next) {
81 if (mp->ex_table_start == NULL || !(mp->flags & (MOD_RUNNING | MOD_INITIALIZING)))
82 continue;
83 ret = search_one_table(mp->ex_table_start, mp->ex_table_end - 1, pc);
84 if (ret)
85 break;
86 }
87 74
88 spin_unlock_irqrestore(&modlist_lock, flags);
89 return ret;
90#endif
91} /* end search_exception_table() */ 75} /* end search_exception_table() */
diff --git a/arch/frv/mm/highmem.c b/arch/frv/mm/highmem.c
index 7dc8fbf3af97..7f77db7fabc7 100644
--- a/arch/frv/mm/highmem.c
+++ b/arch/frv/mm/highmem.c
@@ -9,6 +9,7 @@
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11#include <linux/highmem.h> 11#include <linux/highmem.h>
12#include <linux/module.h>
12 13
13void *kmap(struct page *page) 14void *kmap(struct page *page)
14{ 15{
@@ -18,6 +19,8 @@ void *kmap(struct page *page)
18 return kmap_high(page); 19 return kmap_high(page);
19} 20}
20 21
22EXPORT_SYMBOL(kmap);
23
21void kunmap(struct page *page) 24void kunmap(struct page *page)
22{ 25{
23 if (in_interrupt()) 26 if (in_interrupt())
@@ -27,7 +30,12 @@ void kunmap(struct page *page)
27 kunmap_high(page); 30 kunmap_high(page);
28} 31}
29 32
33EXPORT_SYMBOL(kunmap);
34
30struct page *kmap_atomic_to_page(void *ptr) 35struct page *kmap_atomic_to_page(void *ptr)
31{ 36{
32 return virt_to_page(ptr); 37 return virt_to_page(ptr);
33} 38}
39
40
41EXPORT_SYMBOL(kmap_atomic_to_page);
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 26698a49f153..80940d712acf 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -21,10 +21,6 @@ config FPU
21 bool 21 bool
22 default n 22 default n
23 23
24config UID16
25 bool
26 default y
27
28config RWSEM_GENERIC_SPINLOCK 24config RWSEM_GENERIC_SPINLOCK
29 bool 25 bool
30 default y 26 default y
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 968fabd8723f..d849c6870e3a 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -29,10 +29,6 @@ config MMU
29config SBUS 29config SBUS
30 bool 30 bool
31 31
32config UID16
33 bool
34 default y
35
36config GENERIC_ISA_DMA 32config GENERIC_ISA_DMA
37 bool 33 bool
38 default y 34 default y
@@ -630,10 +626,6 @@ config REGPARM
630 and passes the first three arguments of a function call in registers. 626 and passes the first three arguments of a function call in registers.
631 This will probably break binary only modules. 627 This will probably break binary only modules.
632 628
633 This feature is only enabled for gcc-3.0 and later - earlier compilers
634 generate incorrect output with certain kernel constructs when
635 -mregparm=3 is used.
636
637config SECCOMP 629config SECCOMP
638 bool "Enable seccomp to safely compute untrusted bytecode" 630 bool "Enable seccomp to safely compute untrusted bytecode"
639 depends on PROC_FS 631 depends on PROC_FS
@@ -703,7 +695,7 @@ depends on PM && !X86_VISWS
703 695
704config APM 696config APM
705 tristate "APM (Advanced Power Management) BIOS support" 697 tristate "APM (Advanced Power Management) BIOS support"
706 depends on PM && PM_LEGACY 698 depends on PM
707 ---help--- 699 ---help---
708 APM is a BIOS specification for saving power using several different 700 APM is a BIOS specification for saving power using several different
709 techniques. This is mostly useful for battery powered laptops with 701 techniques. This is mostly useful for battery powered laptops with
diff --git a/arch/i386/Makefile b/arch/i386/Makefile
index d121ea18460f..b84119f9cc63 100644
--- a/arch/i386/Makefile
+++ b/arch/i386/Makefile
@@ -37,10 +37,7 @@ CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2)
37# CPU-specific tuning. Anything which can be shared with UML should go here. 37# CPU-specific tuning. Anything which can be shared with UML should go here.
38include $(srctree)/arch/i386/Makefile.cpu 38include $(srctree)/arch/i386/Makefile.cpu
39 39
40# -mregparm=3 works ok on gcc-3.0 and later 40cflags-$(CONFIG_REGPARM) += -mregparm=3
41#
42GCC_VERSION := $(call cc-version)
43cflags-$(CONFIG_REGPARM) += $(shell if [ $(GCC_VERSION) -ge 0300 ] ; then echo "-mregparm=3"; fi ;)
44 41
45# Disable unit-at-a-time mode, it makes gcc use a lot more stack 42# Disable unit-at-a-time mode, it makes gcc use a lot more stack
46# due to the lack of sharing of stacklots. 43# due to the lack of sharing of stacklots.
diff --git a/arch/i386/Makefile.cpu b/arch/i386/Makefile.cpu
index 8e51456df23d..dcd936ef45db 100644
--- a/arch/i386/Makefile.cpu
+++ b/arch/i386/Makefile.cpu
@@ -1,7 +1,7 @@
1# CPU tuning section - shared with UML. 1# CPU tuning section - shared with UML.
2# Must change only cflags-y (or [yn]), not CFLAGS! That makes a difference for UML. 2# Must change only cflags-y (or [yn]), not CFLAGS! That makes a difference for UML.
3 3
4#-mtune exists since gcc 3.4, and some -mcpu flavors didn't exist in gcc 2.95. 4#-mtune exists since gcc 3.4
5HAS_MTUNE := $(call cc-option-yn, -mtune=i386) 5HAS_MTUNE := $(call cc-option-yn, -mtune=i386)
6ifeq ($(HAS_MTUNE),y) 6ifeq ($(HAS_MTUNE),y)
7tune = $(call cc-option,-mtune=$(1),) 7tune = $(call cc-option,-mtune=$(1),)
@@ -14,7 +14,7 @@ cflags-$(CONFIG_M386) += -march=i386
14cflags-$(CONFIG_M486) += -march=i486 14cflags-$(CONFIG_M486) += -march=i486
15cflags-$(CONFIG_M586) += -march=i586 15cflags-$(CONFIG_M586) += -march=i586
16cflags-$(CONFIG_M586TSC) += -march=i586 16cflags-$(CONFIG_M586TSC) += -march=i586
17cflags-$(CONFIG_M586MMX) += $(call cc-option,-march=pentium-mmx,-march=i586) 17cflags-$(CONFIG_M586MMX) += -march=pentium-mmx
18cflags-$(CONFIG_M686) += -march=i686 18cflags-$(CONFIG_M686) += -march=i686
19cflags-$(CONFIG_MPENTIUMII) += -march=i686 $(call tune,pentium2) 19cflags-$(CONFIG_MPENTIUMII) += -march=i686 $(call tune,pentium2)
20cflags-$(CONFIG_MPENTIUMIII) += -march=i686 $(call tune,pentium3) 20cflags-$(CONFIG_MPENTIUMIII) += -march=i686 $(call tune,pentium3)
@@ -23,8 +23,8 @@ cflags-$(CONFIG_MPENTIUM4) += -march=i686 $(call tune,pentium4)
23cflags-$(CONFIG_MK6) += -march=k6 23cflags-$(CONFIG_MK6) += -march=k6
24# Please note, that patches that add -march=athlon-xp and friends are pointless. 24# Please note, that patches that add -march=athlon-xp and friends are pointless.
25# They make zero difference whatsosever to performance at this time. 25# They make zero difference whatsosever to performance at this time.
26cflags-$(CONFIG_MK7) += $(call cc-option,-march=athlon,-march=i686 $(align)-functions=4) 26cflags-$(CONFIG_MK7) += -march=athlon
27cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,$(call cc-option,-march=athlon,-march=i686 $(align)-functions=4)) 27cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon)
28cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 28cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
29cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 29cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
30cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) 30cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586)
@@ -37,5 +37,5 @@ cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686)
37cflags-$(CONFIG_X86_ELAN) += -march=i486 37cflags-$(CONFIG_X86_ELAN) += -march=i486
38 38
39# Geode GX1 support 39# Geode GX1 support
40cflags-$(CONFIG_MGEODEGX1) += $(call cc-option,-march=pentium-mmx,-march=i486) 40cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx
41 41
diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c
index 82a807f9f5e6..f19f3a7492a5 100644
--- a/arch/i386/boot/compressed/misc.c
+++ b/arch/i386/boot/compressed/misc.c
@@ -11,7 +11,7 @@
11 11
12#include <linux/linkage.h> 12#include <linux/linkage.h>
13#include <linux/vmalloc.h> 13#include <linux/vmalloc.h>
14#include <linux/tty.h> 14#include <linux/screen_info.h>
15#include <asm/io.h> 15#include <asm/io.h>
16#include <asm/page.h> 16#include <asm/page.h>
17 17
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index f10de0f2c5e6..be1880bb75b4 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -4,10 +4,10 @@
4 4
5extra-y := head.o init_task.o vmlinux.lds 5extra-y := head.o init_task.o vmlinux.lds
6 6
7obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ 7obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \
8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ 8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
9 pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ 9 pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
10 doublefault.o quirks.o i8237.o 10 quirks.o i8237.o
11 11
12obj-y += cpu/ 12obj-y += cpu/
13obj-y += timers/ 13obj-y += timers/
@@ -33,6 +33,8 @@ obj-y += sysenter.o vsyscall.o
33obj-$(CONFIG_ACPI_SRAT) += srat.o 33obj-$(CONFIG_ACPI_SRAT) += srat.o
34obj-$(CONFIG_HPET_TIMER) += time_hpet.o 34obj-$(CONFIG_HPET_TIMER) += time_hpet.o
35obj-$(CONFIG_EFI) += efi.o efi_stub.o 35obj-$(CONFIG_EFI) += efi.o efi_stub.o
36obj-$(CONFIG_DOUBLEFAULT) += doublefault.o
37obj-$(CONFIG_VM86) += vm86.o
36obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 38obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
37 39
38EXTRA_AFLAGS := -traditional 40EXTRA_AFLAGS := -traditional
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index 2d793d4aef1a..9d8827156e54 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -2291,7 +2291,9 @@ static int __init apm_init(void)
2291 apm_info.disabled = 1; 2291 apm_info.disabled = 1;
2292 return -ENODEV; 2292 return -ENODEV;
2293 } 2293 }
2294#ifdef CONFIG_PM_LEGACY
2294 pm_active = 1; 2295 pm_active = 1;
2296#endif
2295 2297
2296 /* 2298 /*
2297 * Set up a segment that references the real mode segment 0x40 2299 * Set up a segment that references the real mode segment 0x40
@@ -2382,7 +2384,9 @@ static void __exit apm_exit(void)
2382 exit_kapmd = 1; 2384 exit_kapmd = 1;
2383 while (kapmd_running) 2385 while (kapmd_running)
2384 schedule(); 2386 schedule();
2387#ifdef CONFIG_PM_LEGACY
2385 pm_active = 0; 2388 pm_active = 0;
2389#endif
2386} 2390}
2387 2391
2388module_init(apm_init); 2392module_init(apm_init);
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index cca655688ffc..170400879f44 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -609,8 +609,10 @@ void __devinit cpu_init(void)
609 load_TR_desc(); 609 load_TR_desc();
610 load_LDT(&init_mm.context); 610 load_LDT(&init_mm.context);
611 611
612#ifdef CONFIG_DOUBLEFAULT
612 /* Set up doublefault TSS pointer in the GDT */ 613 /* Set up doublefault TSS pointer in the GDT */
613 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); 614 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
615#endif
614 616
615 /* Clear %fs and %gs. */ 617 /* Clear %fs and %gs. */
616 asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); 618 asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 607c06007508..4d704724b2f5 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -323,6 +323,7 @@ work_notifysig: # deal with pending signals and
323 323
324 ALIGN 324 ALIGN
325work_notifysig_v86: 325work_notifysig_v86:
326#ifdef CONFIG_VM86
326 pushl %ecx # save ti_flags for do_notify_resume 327 pushl %ecx # save ti_flags for do_notify_resume
327 call save_v86_state # %eax contains pt_regs pointer 328 call save_v86_state # %eax contains pt_regs pointer
328 popl %ecx 329 popl %ecx
@@ -330,6 +331,7 @@ work_notifysig_v86:
330 xorl %edx, %edx 331 xorl %edx, %edx
331 call do_notify_resume 332 call do_notify_resume
332 jmp resume_userspace 333 jmp resume_userspace
334#endif
333 335
334 # perform syscall exit tracing 336 # perform syscall exit tracing
335 ALIGN 337 ALIGN
diff --git a/arch/i386/kernel/init_task.c b/arch/i386/kernel/init_task.c
index 9caa8e8db80c..cff95d10a4d8 100644
--- a/arch/i386/kernel/init_task.c
+++ b/arch/i386/kernel/init_task.c
@@ -42,5 +42,5 @@ EXPORT_SYMBOL(init_task);
42 * per-CPU TSS segments. Threads are completely 'soft' on Linux, 42 * per-CPU TSS segments. Threads are completely 'soft' on Linux,
43 * no more per-task TSS's. 43 * no more per-task TSS's.
44 */ 44 */
45DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp = INIT_TSS; 45DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS;
46 46
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 1a201a932865..f3a9c78c4a24 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -19,7 +19,7 @@
19#include <linux/cpu.h> 19#include <linux/cpu.h>
20#include <linux/delay.h> 20#include <linux/delay.h>
21 21
22DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp; 22DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
23EXPORT_PER_CPU_SYMBOL(irq_stat); 23EXPORT_PER_CPU_SYMBOL(irq_stat);
24 24
25#ifndef CONFIG_X86_LOCAL_APIC 25#ifndef CONFIG_X86_LOCAL_APIC
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 45e7f0ac4b04..035928f3f6c1 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -48,6 +48,7 @@
48#include <asm/processor.h> 48#include <asm/processor.h>
49#include <asm/i387.h> 49#include <asm/i387.h>
50#include <asm/desc.h> 50#include <asm/desc.h>
51#include <asm/vm86.h>
51#ifdef CONFIG_MATH_EMULATION 52#ifdef CONFIG_MATH_EMULATION
52#include <asm/math_emu.h> 53#include <asm/math_emu.h>
53#endif 54#endif
diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index f7ba4acc20ec..6ff3e5243226 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -293,3 +293,4 @@ ENTRY(sys_call_table)
293 .long sys_inotify_init 293 .long sys_inotify_init
294 .long sys_inotify_add_watch 294 .long sys_inotify_add_watch
295 .long sys_inotify_rm_watch 295 .long sys_inotify_rm_watch
296 .long sys_migrate_pages
diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c
index 9caeaa315cd7..a529f0cdce17 100644
--- a/arch/i386/kernel/time_hpet.c
+++ b/arch/i386/kernel/time_hpet.c
@@ -259,8 +259,6 @@ __setup("hpet=", hpet_setup);
259#include <linux/mc146818rtc.h> 259#include <linux/mc146818rtc.h>
260#include <linux/rtc.h> 260#include <linux/rtc.h>
261 261
262extern irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs);
263
264#define DEFAULT_RTC_INT_FREQ 64 262#define DEFAULT_RTC_INT_FREQ 64
265#define RTC_NUM_INTS 1 263#define RTC_NUM_INTS 1
266 264
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index 67932ad53082..57b047c27e46 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -37,10 +37,6 @@ $(error Sorry, you need a newer version of the assember, one that is built from
37 ftp://ftp.hpl.hp.com/pub/linux-ia64/gas-030124.tar.gz) 37 ftp://ftp.hpl.hp.com/pub/linux-ia64/gas-030124.tar.gz)
38endif 38endif
39 39
40ifneq ($(shell if [ $(GCC_VERSION) -lt 0300 ] ; then echo "bad"; fi ;),)
41$(error Sorry, your compiler is too old. GCC v2.96 is known to generate bad code.)
42endif
43
44ifeq ($(GCC_VERSION),0304) 40ifeq ($(GCC_VERSION),0304)
45 cflags-$(CONFIG_ITANIUM) += -mtune=merced 41 cflags-$(CONFIG_ITANIUM) += -mtune=merced
46 cflags-$(CONFIG_MCKINLEY) += -mtune=mckinley 42 cflags-$(CONFIG_MCKINLEY) += -mtune=mckinley
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index dc282710421a..9f8e8d558873 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -1761,21 +1761,15 @@ sys32_ptrace (int request, pid_t pid, unsigned int addr, unsigned int data)
1761 1761
1762 lock_kernel(); 1762 lock_kernel();
1763 if (request == PTRACE_TRACEME) { 1763 if (request == PTRACE_TRACEME) {
1764 ret = sys_ptrace(request, pid, addr, data); 1764 ret = ptrace_traceme();
1765 goto out; 1765 goto out;
1766 } 1766 }
1767 1767
1768 ret = -ESRCH; 1768 child = ptrace_get_task_struct(pid);
1769 read_lock(&tasklist_lock); 1769 if (IS_ERR(child)) {
1770 child = find_task_by_pid(pid); 1770 ret = PTR_ERR(child);
1771 if (child)
1772 get_task_struct(child);
1773 read_unlock(&tasklist_lock);
1774 if (!child)
1775 goto out; 1771 goto out;
1776 ret = -EPERM; 1772 }
1777 if (pid == 1) /* no messing around with init! */
1778 goto out_tsk;
1779 1773
1780 if (request == PTRACE_ATTACH) { 1774 if (request == PTRACE_ATTACH) {
1781 ret = sys_ptrace(request, pid, addr, data); 1775 ret = sys_ptrace(request, pid, addr, data);
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index a3aa45cbcfa0..c485a3b32ba8 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -247,6 +247,32 @@ typedef struct kern_memdesc {
247 247
248static kern_memdesc_t *kern_memmap; 248static kern_memdesc_t *kern_memmap;
249 249
250#define efi_md_size(md) (md->num_pages << EFI_PAGE_SHIFT)
251
252static inline u64
253kmd_end(kern_memdesc_t *kmd)
254{
255 return (kmd->start + (kmd->num_pages << EFI_PAGE_SHIFT));
256}
257
258static inline u64
259efi_md_end(efi_memory_desc_t *md)
260{
261 return (md->phys_addr + efi_md_size(md));
262}
263
264static inline int
265efi_wb(efi_memory_desc_t *md)
266{
267 return (md->attribute & EFI_MEMORY_WB);
268}
269
270static inline int
271efi_uc(efi_memory_desc_t *md)
272{
273 return (md->attribute & EFI_MEMORY_UC);
274}
275
250static void 276static void
251walk (efi_freemem_callback_t callback, void *arg, u64 attr) 277walk (efi_freemem_callback_t callback, void *arg, u64 attr)
252{ 278{
@@ -595,8 +621,8 @@ efi_get_iobase (void)
595 return 0; 621 return 0;
596} 622}
597 623
598u32 624static efi_memory_desc_t *
599efi_mem_type (unsigned long phys_addr) 625efi_memory_descriptor (unsigned long phys_addr)
600{ 626{
601 void *efi_map_start, *efi_map_end, *p; 627 void *efi_map_start, *efi_map_end, *p;
602 efi_memory_desc_t *md; 628 efi_memory_desc_t *md;
@@ -610,13 +636,13 @@ efi_mem_type (unsigned long phys_addr)
610 md = p; 636 md = p;
611 637
612 if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT)) 638 if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT))
613 return md->type; 639 return md;
614 } 640 }
615 return 0; 641 return 0;
616} 642}
617 643
618u64 644static int
619efi_mem_attributes (unsigned long phys_addr) 645efi_memmap_has_mmio (void)
620{ 646{
621 void *efi_map_start, *efi_map_end, *p; 647 void *efi_map_start, *efi_map_end, *p;
622 efi_memory_desc_t *md; 648 efi_memory_desc_t *md;
@@ -629,36 +655,98 @@ efi_mem_attributes (unsigned long phys_addr)
629 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { 655 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
630 md = p; 656 md = p;
631 657
632 if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT)) 658 if (md->type == EFI_MEMORY_MAPPED_IO)
633 return md->attribute; 659 return 1;
634 } 660 }
635 return 0; 661 return 0;
636} 662}
663
664u32
665efi_mem_type (unsigned long phys_addr)
666{
667 efi_memory_desc_t *md = efi_memory_descriptor(phys_addr);
668
669 if (md)
670 return md->type;
671 return 0;
672}
673
674u64
675efi_mem_attributes (unsigned long phys_addr)
676{
677 efi_memory_desc_t *md = efi_memory_descriptor(phys_addr);
678
679 if (md)
680 return md->attribute;
681 return 0;
682}
637EXPORT_SYMBOL(efi_mem_attributes); 683EXPORT_SYMBOL(efi_mem_attributes);
638 684
685/*
686 * Determines whether the memory at phys_addr supports the desired
687 * attribute (WB, UC, etc). If this returns 1, the caller can safely
688 * access *size bytes at phys_addr with the specified attribute.
689 */
690static int
691efi_mem_attribute_range (unsigned long phys_addr, unsigned long *size, u64 attr)
692{
693 efi_memory_desc_t *md = efi_memory_descriptor(phys_addr);
694 unsigned long md_end;
695
696 if (!md || (md->attribute & attr) != attr)
697 return 0;
698
699 do {
700 md_end = efi_md_end(md);
701 if (phys_addr + *size <= md_end)
702 return 1;
703
704 md = efi_memory_descriptor(md_end);
705 if (!md || (md->attribute & attr) != attr) {
706 *size = md_end - phys_addr;
707 return 1;
708 }
709 } while (md);
710 return 0;
711}
712
713/*
714 * For /dev/mem, we only allow read & write system calls to access
715 * write-back memory, because read & write don't allow the user to
716 * control access size.
717 */
639int 718int
640valid_phys_addr_range (unsigned long phys_addr, unsigned long *size) 719valid_phys_addr_range (unsigned long phys_addr, unsigned long *size)
641{ 720{
642 void *efi_map_start, *efi_map_end, *p; 721 return efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_WB);
643 efi_memory_desc_t *md; 722}
644 u64 efi_desc_size;
645 723
646 efi_map_start = __va(ia64_boot_param->efi_memmap); 724/*
647 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; 725 * We allow mmap of anything in the EFI memory map that supports
648 efi_desc_size = ia64_boot_param->efi_memdesc_size; 726 * either write-back or uncacheable access. For uncacheable regions,
727 * the supported access sizes are system-dependent, and the user is
728 * responsible for using the correct size.
729 *
730 * Note that this doesn't currently allow access to hot-added memory,
731 * because that doesn't appear in the boot-time EFI memory map.
732 */
733int
734valid_mmap_phys_addr_range (unsigned long phys_addr, unsigned long *size)
735{
736 if (efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_WB))
737 return 1;
649 738
650 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { 739 if (efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_UC))
651 md = p; 740 return 1;
652 741
653 if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT)) { 742 /*
654 if (!(md->attribute & EFI_MEMORY_WB)) 743 * Some firmware doesn't report MMIO regions in the EFI memory map.
655 return 0; 744 * The Intel BigSur (a.k.a. HP i2000) has this problem. In this
745 * case, we can't use the EFI memory map to validate mmap requests.
746 */
747 if (!efi_memmap_has_mmio())
748 return 1;
656 749
657 if (*size > md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - phys_addr)
658 *size = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - phys_addr;
659 return 1;
660 }
661 }
662 return 0; 750 return 0;
663} 751}
664 752
@@ -707,32 +795,6 @@ efi_uart_console_only(void)
707 return 0; 795 return 0;
708} 796}
709 797
710#define efi_md_size(md) (md->num_pages << EFI_PAGE_SHIFT)
711
712static inline u64
713kmd_end(kern_memdesc_t *kmd)
714{
715 return (kmd->start + (kmd->num_pages << EFI_PAGE_SHIFT));
716}
717
718static inline u64
719efi_md_end(efi_memory_desc_t *md)
720{
721 return (md->phys_addr + efi_md_size(md));
722}
723
724static inline int
725efi_wb(efi_memory_desc_t *md)
726{
727 return (md->attribute & EFI_MEMORY_WB);
728}
729
730static inline int
731efi_uc(efi_memory_desc_t *md)
732{
733 return (md->attribute & EFI_MEMORY_UC);
734}
735
736/* 798/*
737 * Look for the first granule aligned memory descriptor memory 799 * Look for the first granule aligned memory descriptor memory
738 * that is big enough to hold EFI memory map. Make sure this 800 * that is big enough to hold EFI memory map. Make sure this
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 0741b066b98f..7a6ffd613789 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1600,5 +1600,6 @@ sys_call_table:
1600 data8 sys_inotify_init 1600 data8 sys_inotify_init
1601 data8 sys_inotify_add_watch 1601 data8 sys_inotify_add_watch
1602 data8 sys_inotify_rm_watch 1602 data8 sys_inotify_rm_watch
1603 data8 sys_migrate_pages // 1280
1603 1604
1604 .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls 1605 .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index bfe65b2e8621..fbc7ea35dd57 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -1060,7 +1060,7 @@ SET_REG(b5);
1060 * the clobber lists for spin_lock() in include/asm-ia64/spinlock.h. 1060 * the clobber lists for spin_lock() in include/asm-ia64/spinlock.h.
1061 */ 1061 */
1062 1062
1063#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3) 1063#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
1064 1064
1065GLOBAL_ENTRY(ia64_spinlock_contention_pre3_4) 1065GLOBAL_ENTRY(ia64_spinlock_contention_pre3_4)
1066 .prologue 1066 .prologue
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index 5db9d3bcbbcb..e72de580ebbf 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -103,7 +103,7 @@ EXPORT_SYMBOL(unw_init_running);
103 103
104#ifdef ASM_SUPPORTED 104#ifdef ASM_SUPPORTED
105# ifdef CONFIG_SMP 105# ifdef CONFIG_SMP
106# if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3) 106# if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
107/* 107/*
108 * This is not a normal routine and we don't want a function descriptor for it, so we use 108 * This is not a normal routine and we don't want a function descriptor for it, so we use
109 * a fake declaration here. 109 * a fake declaration here.
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 4b19d0410632..8d88eeea02d1 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -1422,14 +1422,7 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data)
1422 lock_kernel(); 1422 lock_kernel();
1423 ret = -EPERM; 1423 ret = -EPERM;
1424 if (request == PTRACE_TRACEME) { 1424 if (request == PTRACE_TRACEME) {
1425 /* are we already being traced? */ 1425 ret = ptrace_traceme();
1426 if (current->ptrace & PT_PTRACED)
1427 goto out;
1428 ret = security_ptrace(current->parent, current);
1429 if (ret)
1430 goto out;
1431 current->ptrace |= PT_PTRACED;
1432 ret = 0;
1433 goto out; 1426 goto out;
1434 } 1427 }
1435 1428
diff --git a/arch/ia64/oprofile/backtrace.c b/arch/ia64/oprofile/backtrace.c
index b7dabbfb0d61..adb01566bd57 100644
--- a/arch/ia64/oprofile/backtrace.c
+++ b/arch/ia64/oprofile/backtrace.c
@@ -32,7 +32,7 @@ typedef struct
32 u64 *prev_pfs_loc; /* state for WAR for old spinlock ool code */ 32 u64 *prev_pfs_loc; /* state for WAR for old spinlock ool code */
33} ia64_backtrace_t; 33} ia64_backtrace_t;
34 34
35#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3) 35#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
36/* 36/*
37 * Returns non-zero if the PC is in the spinlock contention out-of-line code 37 * Returns non-zero if the PC is in the spinlock contention out-of-line code
38 * with non-standard calling sequence (on older compilers). 38 * with non-standard calling sequence (on older compilers).
diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c
index cc4b571e5db7..3bf55d92933f 100644
--- a/arch/m32r/kernel/process.c
+++ b/arch/m32r/kernel/process.c
@@ -50,6 +50,10 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
50 * Powermanagement idle function, if any.. 50 * Powermanagement idle function, if any..
51 */ 51 */
52void (*pm_idle)(void) = NULL; 52void (*pm_idle)(void) = NULL;
53EXPORT_SYMBOL(pm_idle);
54
55void (*pm_power_off)(void) = NULL;
56EXPORT_SYMBOL(pm_power_off);
53 57
54void disable_hlt(void) 58void disable_hlt(void)
55{ 59{
diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c
index 078d2a0e71c2..9b75caaf5cec 100644
--- a/arch/m32r/kernel/ptrace.c
+++ b/arch/m32r/kernel/ptrace.c
@@ -762,28 +762,16 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
762 int ret; 762 int ret;
763 763
764 lock_kernel(); 764 lock_kernel();
765 ret = -EPERM;
766 if (request == PTRACE_TRACEME) { 765 if (request == PTRACE_TRACEME) {
767 /* are we already being traced? */ 766 ret = ptrace_traceme();
768 if (current->ptrace & PT_PTRACED)
769 goto out;
770 /* set the ptrace bit in the process flags. */
771 current->ptrace |= PT_PTRACED;
772 ret = 0;
773 goto out; 767 goto out;
774 } 768 }
775 ret = -ESRCH;
776 read_lock(&tasklist_lock);
777 child = find_task_by_pid(pid);
778 if (child)
779 get_task_struct(child);
780 read_unlock(&tasklist_lock);
781 if (!child)
782 goto out;
783 769
784 ret = -EPERM; 770 child = ptrace_get_task_struct(pid);
785 if (pid == 1) /* you may not mess with init */ 771 if (IS_ERR(child)) {
772 ret = PTR_ERR(child);
786 goto out; 773 goto out;
774 }
787 775
788 if (request == PTRACE_ATTACH) { 776 if (request == PTRACE_ATTACH) {
789 ret = ptrace_attach(child); 777 ret = ptrace_attach(child);
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 1dd5d18b2201..96b919828053 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -10,10 +10,6 @@ config MMU
10 bool 10 bool
11 default y 11 default y
12 12
13config UID16
14 bool
15 default y
16
17config RWSEM_GENERIC_SPINLOCK 13config RWSEM_GENERIC_SPINLOCK
18 bool 14 bool
19 default y 15 default y
diff --git a/arch/m68knommu/Kconfig b/arch/m68knommu/Kconfig
index b96498120fe9..e2a6e8648960 100644
--- a/arch/m68knommu/Kconfig
+++ b/arch/m68knommu/Kconfig
@@ -17,10 +17,6 @@ config FPU
17 bool 17 bool
18 default n 18 default n
19 19
20config UID16
21 bool
22 default y
23
24config RWSEM_GENERIC_SPINLOCK 20config RWSEM_GENERIC_SPINLOCK
25 bool 21 bool
26 default y 22 default y
diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c
index 9a9b04972132..7e55457a491f 100644
--- a/arch/mips/kernel/ptrace32.c
+++ b/arch/mips/kernel/ptrace32.c
@@ -57,30 +57,16 @@ asmlinkage int sys32_ptrace(int request, int pid, int addr, int data)
57 (unsigned long) data); 57 (unsigned long) data);
58#endif 58#endif
59 lock_kernel(); 59 lock_kernel();
60 ret = -EPERM;
61 if (request == PTRACE_TRACEME) { 60 if (request == PTRACE_TRACEME) {
62 /* are we already being traced? */ 61 ret = ptrace_traceme();
63 if (current->ptrace & PT_PTRACED)
64 goto out;
65 if ((ret = security_ptrace(current->parent, current)))
66 goto out;
67 /* set the ptrace bit in the process flags. */
68 current->ptrace |= PT_PTRACED;
69 ret = 0;
70 goto out; 62 goto out;
71 } 63 }
72 ret = -ESRCH;
73 read_lock(&tasklist_lock);
74 child = find_task_by_pid(pid);
75 if (child)
76 get_task_struct(child);
77 read_unlock(&tasklist_lock);
78 if (!child)
79 goto out;
80 64
81 ret = -EPERM; 65 child = ptrace_get_task_struct(pid);
82 if (pid == 1) /* you may not mess with init */ 66 if (IS_ERR(child)) {
83 goto out_tsk; 67 ret = PTR_ERR(child);
68 goto out;
69 }
84 70
85 if (request == PTRACE_ATTACH) { 71 if (request == PTRACE_ATTACH) {
86 ret = ptrace_attach(child); 72 ret = ptrace_attach(child);
diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c
index 07631a97670b..ce907eda221b 100644
--- a/arch/mips/sgi-ip27/ip27-berr.c
+++ b/arch/mips/sgi-ip27/ip27-berr.c
@@ -11,6 +11,7 @@
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/signal.h> /* for SIGBUS */ 13#include <linux/signal.h> /* for SIGBUS */
14#include <linux/sched.h> /* schow_regs(), force_sig() */
14 15
15#include <asm/module.h> 16#include <asm/module.h>
16#include <asm/sn/addrs.h> 17#include <asm/sn/addrs.h>
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 874a283edb95..e77a06e9621e 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -19,9 +19,6 @@ config MMU
19config STACK_GROWSUP 19config STACK_GROWSUP
20 def_bool y 20 def_bool y
21 21
22config UID16
23 bool
24
25config RWSEM_GENERIC_SPINLOCK 22config RWSEM_GENERIC_SPINLOCK
26 def_bool y 23 def_bool y
27 24
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index db93dbc0e21a..331483ace0d9 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -26,9 +26,6 @@ config MMU
26 bool 26 bool
27 default y 27 default y
28 28
29config UID16
30 bool
31
32config GENERIC_HARDIRQS 29config GENERIC_HARDIRQS
33 bool 30 bool
34 default y 31 default y
diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c
index 61762640b877..826ee3d056de 100644
--- a/arch/powerpc/kernel/ptrace32.c
+++ b/arch/powerpc/kernel/ptrace32.c
@@ -45,33 +45,19 @@ long compat_sys_ptrace(int request, int pid, unsigned long addr,
45 unsigned long data) 45 unsigned long data)
46{ 46{
47 struct task_struct *child; 47 struct task_struct *child;
48 int ret = -EPERM; 48 int ret;
49 49
50 lock_kernel(); 50 lock_kernel();
51 if (request == PTRACE_TRACEME) { 51 if (request == PTRACE_TRACEME) {
52 /* are we already being traced? */ 52 ret = ptrace_traceme();
53 if (current->ptrace & PT_PTRACED)
54 goto out;
55 ret = security_ptrace(current->parent, current);
56 if (ret)
57 goto out;
58 /* set the ptrace bit in the process flags. */
59 current->ptrace |= PT_PTRACED;
60 ret = 0;
61 goto out; 53 goto out;
62 } 54 }
63 ret = -ESRCH;
64 read_lock(&tasklist_lock);
65 child = find_task_by_pid(pid);
66 if (child)
67 get_task_struct(child);
68 read_unlock(&tasklist_lock);
69 if (!child)
70 goto out;
71 55
72 ret = -EPERM; 56 child = ptrace_get_task_struct(pid);
73 if (pid == 1) /* you may not mess with init */ 57 if (IS_ERR(child)) {
74 goto out_tsk; 58 ret = PTR_ERR(child);
59 goto out;
60 }
75 61
76 if (request == PTRACE_ATTACH) { 62 if (request == PTRACE_ATTACH) {
77 ret = ptrace_attach(child); 63 ret = ptrace_attach(child);
diff --git a/arch/ppc/Kconfig b/arch/ppc/Kconfig
index cc3f64c084c5..e396f4591d59 100644
--- a/arch/ppc/Kconfig
+++ b/arch/ppc/Kconfig
@@ -8,9 +8,6 @@ config MMU
8 bool 8 bool
9 default y 9 default y
10 10
11config UID16
12 bool
13
14config GENERIC_HARDIRQS 11config GENERIC_HARDIRQS
15 bool 12 bool
16 default y 13 default y
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 8ecda6d66de4..cc02232aa96e 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -712,35 +712,18 @@ sys_ptrace(long request, long pid, long addr, long data)
712 int ret; 712 int ret;
713 713
714 lock_kernel(); 714 lock_kernel();
715
716 if (request == PTRACE_TRACEME) { 715 if (request == PTRACE_TRACEME) {
717 /* are we already being traced? */ 716 ret = ptrace_traceme();
718 ret = -EPERM; 717 goto out;
719 if (current->ptrace & PT_PTRACED)
720 goto out;
721 ret = security_ptrace(current->parent, current);
722 if (ret)
723 goto out;
724 /* set the ptrace bit in the process flags. */
725 current->ptrace |= PT_PTRACED;
726 goto out;
727 } 718 }
728 719
729 ret = -EPERM; 720 child = ptrace_get_task_struct(pid);
730 if (pid == 1) /* you may not mess with init */ 721 if (IS_ERR(child)) {
731 goto out; 722 ret = PTR_ERR(child);
732
733 ret = -ESRCH;
734 read_lock(&tasklist_lock);
735 child = find_task_by_pid(pid);
736 if (child)
737 get_task_struct(child);
738 read_unlock(&tasklist_lock);
739 if (!child)
740 goto out; 723 goto out;
724 }
741 725
742 ret = do_ptrace(child, request, addr, data); 726 ret = do_ptrace(child, request, addr, data);
743
744 put_task_struct(child); 727 put_task_struct(child);
745out: 728out:
746 unlock_kernel(); 729 unlock_kernel();
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 64f5ae0ff96d..8cf6d437a630 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -14,10 +14,6 @@ config SUPERH
14 gaming console. The SuperH port has a home page at 14 gaming console. The SuperH port has a home page at
15 <http://www.linux-sh.org/>. 15 <http://www.linux-sh.org/>.
16 16
17config UID16
18 bool
19 default y
20
21config RWSEM_GENERIC_SPINLOCK 17config RWSEM_GENERIC_SPINLOCK
22 bool 18 bool
23 default y 19 default y
diff --git a/arch/sh64/kernel/time.c b/arch/sh64/kernel/time.c
index 870fe5327e09..1195af37ee5a 100644
--- a/arch/sh64/kernel/time.c
+++ b/arch/sh64/kernel/time.c
@@ -417,7 +417,7 @@ static __init unsigned int get_cpu_hz(void)
417 /* 417 /*
418 ** Regardless the toolchain, force the compiler to use the 418 ** Regardless the toolchain, force the compiler to use the
419 ** arbitrary register r3 as a clock tick counter. 419 ** arbitrary register r3 as a clock tick counter.
420 ** NOTE: r3 must be in accordance with rtc_interrupt() 420 ** NOTE: r3 must be in accordance with sh64_rtc_interrupt()
421 */ 421 */
422 register unsigned long long __rtc_irq_flag __asm__ ("r3"); 422 register unsigned long long __rtc_irq_flag __asm__ ("r3");
423 423
@@ -482,7 +482,8 @@ static __init unsigned int get_cpu_hz(void)
482#endif 482#endif
483} 483}
484 484
485static irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) 485static irqreturn_t sh64_rtc_interrupt(int irq, void *dev_id,
486 struct pt_regs *regs)
486{ 487{
487 ctrl_outb(0, RCR1); /* Disable Carry Interrupts */ 488 ctrl_outb(0, RCR1); /* Disable Carry Interrupts */
488 regs->regs[3] = 1; /* Using r3 */ 489 regs->regs[3] = 1; /* Using r3 */
@@ -491,7 +492,7 @@ static irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs)
491} 492}
492 493
493static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL}; 494static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL};
494static struct irqaction irq1 = { rtc_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "rtc", NULL, NULL}; 495static struct irqaction irq1 = { sh64_rtc_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "rtc", NULL, NULL};
495 496
496void __init time_init(void) 497void __init time_init(void)
497{ 498{
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 56c34e7fd4ee..f944b58cdfe7 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -9,10 +9,6 @@ config MMU
9 bool 9 bool
10 default y 10 default y
11 11
12config UID16
13 bool
14 default y
15
16config HIGHMEM 12config HIGHMEM
17 bool 13 bool
18 default y 14 default y
diff --git a/arch/sparc/kernel/ptrace.c b/arch/sparc/kernel/ptrace.c
index 475c4c13462c..fc470c0e9dc6 100644
--- a/arch/sparc/kernel/ptrace.c
+++ b/arch/sparc/kernel/ptrace.c
@@ -286,40 +286,17 @@ asmlinkage void do_ptrace(struct pt_regs *regs)
286 s, (int) request, (int) pid, addr, data, addr2); 286 s, (int) request, (int) pid, addr, data, addr2);
287 } 287 }
288#endif 288#endif
289 if (request == PTRACE_TRACEME) {
290 int my_ret;
291
292 /* are we already being traced? */
293 if (current->ptrace & PT_PTRACED) {
294 pt_error_return(regs, EPERM);
295 goto out;
296 }
297 my_ret = security_ptrace(current->parent, current);
298 if (my_ret) {
299 pt_error_return(regs, -my_ret);
300 goto out;
301 }
302 289
303 /* set the ptrace bit in the process flags. */ 290 if (request == PTRACE_TRACEME) {
304 current->ptrace |= PT_PTRACED; 291 ret = ptrace_traceme();
305 pt_succ_return(regs, 0); 292 pt_succ_return(regs, 0);
306 goto out; 293 goto out;
307 } 294 }
308#ifndef ALLOW_INIT_TRACING
309 if (pid == 1) {
310 /* Can't dork with init. */
311 pt_error_return(regs, EPERM);
312 goto out;
313 }
314#endif
315 read_lock(&tasklist_lock);
316 child = find_task_by_pid(pid);
317 if (child)
318 get_task_struct(child);
319 read_unlock(&tasklist_lock);
320 295
321 if (!child) { 296 child = ptrace_get_task_struct(pid);
322 pt_error_return(regs, ESRCH); 297 if (IS_ERR(child)) {
298 ret = PTR_ERR(child);
299 pt_error_return(regs, -ret);
323 goto out; 300 goto out;
324 } 301 }
325 302
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index c4b7ad70cd7c..b775ceb4cf98 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -309,11 +309,6 @@ config COMPAT
309 depends on SPARC32_COMPAT 309 depends on SPARC32_COMPAT
310 default y 310 default y
311 311
312config UID16
313 bool
314 depends on SPARC32_COMPAT
315 default y
316
317config BINFMT_ELF32 312config BINFMT_ELF32
318 tristate "Kernel support for 32-bit ELF binaries" 313 tristate "Kernel support for 32-bit ELF binaries"
319 depends on SPARC32_COMPAT 314 depends on SPARC32_COMPAT
diff --git a/arch/sparc64/kernel/ptrace.c b/arch/sparc64/kernel/ptrace.c
index 774ecbb8a031..84d3df2264cb 100644
--- a/arch/sparc64/kernel/ptrace.c
+++ b/arch/sparc64/kernel/ptrace.c
@@ -198,39 +198,15 @@ asmlinkage void do_ptrace(struct pt_regs *regs)
198 } 198 }
199#endif 199#endif
200 if (request == PTRACE_TRACEME) { 200 if (request == PTRACE_TRACEME) {
201 int ret; 201 ret = ptrace_traceme();
202
203 /* are we already being traced? */
204 if (current->ptrace & PT_PTRACED) {
205 pt_error_return(regs, EPERM);
206 goto out;
207 }
208 ret = security_ptrace(current->parent, current);
209 if (ret) {
210 pt_error_return(regs, -ret);
211 goto out;
212 }
213
214 /* set the ptrace bit in the process flags. */
215 current->ptrace |= PT_PTRACED;
216 pt_succ_return(regs, 0); 202 pt_succ_return(regs, 0);
217 goto out; 203 goto out;
218 } 204 }
219#ifndef ALLOW_INIT_TRACING
220 if (pid == 1) {
221 /* Can't dork with init. */
222 pt_error_return(regs, EPERM);
223 goto out;
224 }
225#endif
226 read_lock(&tasklist_lock);
227 child = find_task_by_pid(pid);
228 if (child)
229 get_task_struct(child);
230 read_unlock(&tasklist_lock);
231 205
232 if (!child) { 206 child = ptrace_get_task_struct(pid);
233 pt_error_return(regs, ESRCH); 207 if (IS_ERR(child)) {
208 ret = PTR_ERR(child);
209 pt_error_return(regs, -ret);
234 goto out; 210 goto out;
235 } 211 }
236 212
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 1eb21de9d1b5..b4ff2e576021 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -22,10 +22,6 @@ config SBUS
22config PCI 22config PCI
23 bool 23 bool
24 24
25config UID16
26 bool
27 default y
28
29config GENERIC_CALIBRATE_DELAY 25config GENERIC_CALIBRATE_DELAY
30 bool 26 bool
31 default y 27 default y
@@ -83,7 +79,7 @@ config KERNEL_HALF_GIGS
83 of physical memory. 79 of physical memory.
84 80
85config MODE_SKAS 81config MODE_SKAS
86 bool "Separate Kernel Address Space support" 82 bool "Separate Kernel Address Space support" if MODE_TT
87 default y 83 default y
88 help 84 help
89 This option controls whether skas (separate kernel address space) 85 This option controls whether skas (separate kernel address space)
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 73f9652b2ee9..3a93c6f772fa 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -117,6 +117,7 @@ static int ubd_open(struct inode * inode, struct file * filp);
117static int ubd_release(struct inode * inode, struct file * file); 117static int ubd_release(struct inode * inode, struct file * file);
118static int ubd_ioctl(struct inode * inode, struct file * file, 118static int ubd_ioctl(struct inode * inode, struct file * file,
119 unsigned int cmd, unsigned long arg); 119 unsigned int cmd, unsigned long arg);
120static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
120 121
121#define MAX_DEV (8) 122#define MAX_DEV (8)
122 123
@@ -125,6 +126,7 @@ static struct block_device_operations ubd_blops = {
125 .open = ubd_open, 126 .open = ubd_open,
126 .release = ubd_release, 127 .release = ubd_release,
127 .ioctl = ubd_ioctl, 128 .ioctl = ubd_ioctl,
129 .getgeo = ubd_getgeo,
128}; 130};
129 131
130/* Protected by the queue_lock */ 132/* Protected by the queue_lock */
@@ -1058,6 +1060,16 @@ static void do_ubd_request(request_queue_t *q)
1058 } 1060 }
1059} 1061}
1060 1062
1063static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1064{
1065 struct ubd *dev = bdev->bd_disk->private_data;
1066
1067 geo->heads = 128;
1068 geo->sectors = 32;
1069 geo->cylinders = dev->size / (128 * 32 * 512);
1070 return 0;
1071}
1072
1061static int ubd_ioctl(struct inode * inode, struct file * file, 1073static int ubd_ioctl(struct inode * inode, struct file * file,
1062 unsigned int cmd, unsigned long arg) 1074 unsigned int cmd, unsigned long arg)
1063{ 1075{
@@ -1070,16 +1082,7 @@ static int ubd_ioctl(struct inode * inode, struct file * file,
1070 }; 1082 };
1071 1083
1072 switch (cmd) { 1084 switch (cmd) {
1073 struct hd_geometry g;
1074 struct cdrom_volctrl volume; 1085 struct cdrom_volctrl volume;
1075 case HDIO_GETGEO:
1076 if(!loc) return(-EINVAL);
1077 g.heads = 128;
1078 g.sectors = 32;
1079 g.cylinders = dev->size / (128 * 32 * 512);
1080 g.start = get_start_sect(inode->i_bdev);
1081 return(copy_to_user(loc, &g, sizeof(g)) ? -EFAULT : 0);
1082
1083 case HDIO_GET_IDENTITY: 1086 case HDIO_GET_IDENTITY:
1084 ubd_id.cyls = dev->size / (128 * 32 * 512); 1087 ubd_id.cyls = dev->size / (128 * 32 * 512);
1085 if(copy_to_user((char __user *) arg, (char *) &ubd_id, 1088 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
diff --git a/arch/um/include/kern_util.h b/arch/um/include/kern_util.h
index e5fec5570199..8f4e46d677ab 100644
--- a/arch/um/include/kern_util.h
+++ b/arch/um/include/kern_util.h
@@ -1,4 +1,4 @@
1/* 1/*
2 * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) 2 * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL 3 * Licensed under the GPL
4 */ 4 */
@@ -10,6 +10,19 @@
10#include "sysdep/ptrace.h" 10#include "sysdep/ptrace.h"
11#include "sysdep/faultinfo.h" 11#include "sysdep/faultinfo.h"
12 12
13typedef void (*kern_hndl)(int, union uml_pt_regs *);
14
15struct kern_handlers {
16 kern_hndl relay_signal;
17 kern_hndl winch;
18 kern_hndl bus_handler;
19 kern_hndl page_fault;
20 kern_hndl sigio_handler;
21 kern_hndl timer_handler;
22};
23
24extern struct kern_handlers handlinfo_kern;
25
13extern int ncpus; 26extern int ncpus;
14extern char *linux_prog; 27extern char *linux_prog;
15extern char *gdb_init; 28extern char *gdb_init;
@@ -51,8 +64,6 @@ extern void timer_handler(int sig, union uml_pt_regs *regs);
51extern int set_signals(int enable); 64extern int set_signals(int enable);
52extern void force_sigbus(void); 65extern void force_sigbus(void);
53extern int pid_to_processor_id(int pid); 66extern int pid_to_processor_id(int pid);
54extern void block_signals(void);
55extern void unblock_signals(void);
56extern void deliver_signals(void *t); 67extern void deliver_signals(void *t);
57extern int next_syscall_index(int max); 68extern int next_syscall_index(int max);
58extern int next_trap_index(int max); 69extern int next_trap_index(int max);
@@ -111,6 +122,8 @@ extern void arch_switch(void);
111extern void free_irq(unsigned int, void *); 122extern void free_irq(unsigned int, void *);
112extern int um_in_interrupt(void); 123extern int um_in_interrupt(void);
113extern int cpu(void); 124extern int cpu(void);
125extern void segv_handler(int sig, union uml_pt_regs *regs);
126extern void sigio_handler(int sig, union uml_pt_regs *regs);
114 127
115#endif 128#endif
116 129
diff --git a/arch/um/include/os.h b/arch/um/include/os.h
index c279ee6d89e4..dd72d66cf0ed 100644
--- a/arch/um/include/os.h
+++ b/arch/um/include/os.h
@@ -9,6 +9,8 @@
9#include "uml-config.h" 9#include "uml-config.h"
10#include "asm/types.h" 10#include "asm/types.h"
11#include "../os/include/file.h" 11#include "../os/include/file.h"
12#include "sysdep/ptrace.h"
13#include "kern_util.h"
12 14
13#define OS_TYPE_FILE 1 15#define OS_TYPE_FILE 1
14#define OS_TYPE_DIR 2 16#define OS_TYPE_DIR 2
@@ -219,4 +221,18 @@ extern int umid_file_name(char *name, char *buf, int len);
219extern int set_umid(char *name); 221extern int set_umid(char *name);
220extern char *get_umid(void); 222extern char *get_umid(void);
221 223
224/* signal.c */
225extern void set_sigstack(void *sig_stack, int size);
226extern void remove_sigstack(void);
227extern void set_handler(int sig, void (*handler)(int), int flags, ...);
228extern int change_sig(int signal, int on);
229extern void block_signals(void);
230extern void unblock_signals(void);
231extern int get_signals(void);
232extern int set_signals(int enable);
233
234/* trap.c */
235extern void os_fill_handlinfo(struct kern_handlers h);
236extern void do_longjmp(void *p, int val);
237
222#endif 238#endif
diff --git a/arch/um/include/signal_user.h b/arch/um/include/signal_user.h
deleted file mode 100644
index b075e543d864..000000000000
--- a/arch/um/include/signal_user.h
+++ /dev/null
@@ -1,28 +0,0 @@
1/*
2 * Copyright (C) 2001 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6#ifndef __SIGNAL_USER_H__
7#define __SIGNAL_USER_H__
8
9extern int signal_stack_size;
10
11extern int change_sig(int signal, int on);
12extern void set_sigstack(void *stack, int size);
13extern void set_handler(int sig, void (*handler)(int), int flags, ...);
14extern int set_signals(int enable);
15extern int get_signals(void);
16
17#endif
18
19/*
20 * Overrides for Emacs so that we follow Linus's tabbing style.
21 * Emacs will notice this stuff at the end of the file and automatically
22 * adjust the settings for this buffer only. This must remain at the end
23 * of the file.
24 * ---------------------------------------------------------------------------
25 * Local variables:
26 * c-file-style: "linux"
27 * End:
28 */
diff --git a/arch/um/include/user_util.h b/arch/um/include/user_util.h
index b9984003e603..c1dbd77b073f 100644
--- a/arch/um/include/user_util.h
+++ b/arch/um/include/user_util.h
@@ -1,4 +1,4 @@
1/* 1/*
2 * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) 2 * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL 3 * Licensed under the GPL
4 */ 4 */
@@ -23,12 +23,7 @@ struct cpu_task {
23 23
24extern struct cpu_task cpu_tasks[]; 24extern struct cpu_task cpu_tasks[];
25 25
26struct signal_info { 26extern void (*sig_info[])(int, union uml_pt_regs *);
27 void (*handler)(int, union uml_pt_regs *);
28 int is_irq;
29};
30
31extern struct signal_info sig_info[];
32 27
33extern unsigned long low_physmem; 28extern unsigned long low_physmem;
34extern unsigned long high_physmem; 29extern unsigned long high_physmem;
@@ -64,7 +59,6 @@ extern void setup_machinename(char *machine_out);
64extern void setup_hostinfo(void); 59extern void setup_hostinfo(void);
65extern void do_exec(int old_pid, int new_pid); 60extern void do_exec(int old_pid, int new_pid);
66extern void tracer_panic(char *msg, ...); 61extern void tracer_panic(char *msg, ...);
67extern void do_longjmp(void *p, int val);
68extern int detach(int pid, int sig); 62extern int detach(int pid, int sig);
69extern int attach(int pid); 63extern int attach(int pid);
70extern void kill_child_dead(int pid); 64extern void kill_child_dead(int pid);
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index 6f7700593a6f..193cc2b7448d 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -9,8 +9,8 @@ clean-files :=
9obj-y = config.o exec_kern.o exitcode.o \ 9obj-y = config.o exec_kern.o exitcode.o \
10 init_task.o irq.o irq_user.o ksyms.o mem.o physmem.o \ 10 init_task.o irq.o irq_user.o ksyms.o mem.o physmem.o \
11 process_kern.o ptrace.o reboot.o resource.o sigio_user.o sigio_kern.o \ 11 process_kern.o ptrace.o reboot.o resource.o sigio_user.o sigio_kern.o \
12 signal_kern.o signal_user.o smp.o syscall_kern.o sysrq.o time.o \ 12 signal_kern.o smp.o syscall_kern.o sysrq.o time.o \
13 time_kern.o tlb.o trap_kern.o trap_user.o uaccess.o um_arch.o umid.o \ 13 time_kern.o tlb.o trap_kern.o uaccess.o um_arch.o umid.o \
14 user_util.o 14 user_util.o
15 15
16obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o 16obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o
diff --git a/arch/um/kernel/irq_user.c b/arch/um/kernel/irq_user.c
index 50a2aa35cda9..0e32f5f4a887 100644
--- a/arch/um/kernel/irq_user.c
+++ b/arch/um/kernel/irq_user.c
@@ -15,7 +15,6 @@
15#include "kern_util.h" 15#include "kern_util.h"
16#include "user.h" 16#include "user.h"
17#include "process.h" 17#include "process.h"
18#include "signal_user.h"
19#include "sigio.h" 18#include "sigio.h"
20#include "irq_user.h" 19#include "irq_user.h"
21#include "os.h" 20#include "os.h"
diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c
index 651abf255bc5..d2d3f256778c 100644
--- a/arch/um/kernel/process_kern.c
+++ b/arch/um/kernel/process_kern.c
@@ -36,7 +36,6 @@
36#include "kern_util.h" 36#include "kern_util.h"
37#include "kern.h" 37#include "kern.h"
38#include "signal_kern.h" 38#include "signal_kern.h"
39#include "signal_user.h"
40#include "init.h" 39#include "init.h"
41#include "irq_user.h" 40#include "irq_user.h"
42#include "mem_user.h" 41#include "mem_user.h"
diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c
index a637e885c583..6f1a3a288117 100644
--- a/arch/um/kernel/reboot.c
+++ b/arch/um/kernel/reboot.c
@@ -12,6 +12,8 @@
12#include "mode.h" 12#include "mode.h"
13#include "choose-mode.h" 13#include "choose-mode.h"
14 14
15void (*pm_power_off)(void);
16
15#ifdef CONFIG_SMP 17#ifdef CONFIG_SMP
16static void kill_idlers(int me) 18static void kill_idlers(int me)
17{ 19{
diff --git a/arch/um/kernel/signal_kern.c b/arch/um/kernel/signal_kern.c
index 03618bd13d55..7b0e0e81c161 100644
--- a/arch/um/kernel/signal_kern.c
+++ b/arch/um/kernel/signal_kern.c
@@ -22,7 +22,6 @@
22#include "asm/ucontext.h" 22#include "asm/ucontext.h"
23#include "kern_util.h" 23#include "kern_util.h"
24#include "signal_kern.h" 24#include "signal_kern.h"
25#include "signal_user.h"
26#include "kern.h" 25#include "kern.h"
27#include "frame_kern.h" 26#include "frame_kern.h"
28#include "sigcontext.h" 27#include "sigcontext.h"
diff --git a/arch/um/kernel/signal_user.c b/arch/um/kernel/signal_user.c
deleted file mode 100644
index 62f457835fb1..000000000000
--- a/arch/um/kernel/signal_user.c
+++ /dev/null
@@ -1,157 +0,0 @@
1/*
2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6#include <stdio.h>
7#include <unistd.h>
8#include <stdlib.h>
9#include <signal.h>
10#include <errno.h>
11#include <stdarg.h>
12#include <string.h>
13#include <sys/mman.h>
14#include "user_util.h"
15#include "kern_util.h"
16#include "user.h"
17#include "signal_user.h"
18#include "signal_kern.h"
19#include "sysdep/sigcontext.h"
20#include "sigcontext.h"
21
22void set_sigstack(void *sig_stack, int size)
23{
24 stack_t stack = ((stack_t) { .ss_flags = 0,
25 .ss_sp = (__ptr_t) sig_stack,
26 .ss_size = size - sizeof(void *) });
27
28 if(sigaltstack(&stack, NULL) != 0)
29 panic("enabling signal stack failed, errno = %d\n", errno);
30}
31
32void set_handler(int sig, void (*handler)(int), int flags, ...)
33{
34 struct sigaction action;
35 va_list ap;
36 int mask;
37
38 va_start(ap, flags);
39 action.sa_handler = handler;
40 sigemptyset(&action.sa_mask);
41 while((mask = va_arg(ap, int)) != -1){
42 sigaddset(&action.sa_mask, mask);
43 }
44 va_end(ap);
45 action.sa_flags = flags;
46 action.sa_restorer = NULL;
47 if(sigaction(sig, &action, NULL) < 0)
48 panic("sigaction failed");
49}
50
51int change_sig(int signal, int on)
52{
53 sigset_t sigset, old;
54
55 sigemptyset(&sigset);
56 sigaddset(&sigset, signal);
57 sigprocmask(on ? SIG_UNBLOCK : SIG_BLOCK, &sigset, &old);
58 return(!sigismember(&old, signal));
59}
60
61/* Both here and in set/get_signal we don't touch SIGPROF, because we must not
62 * disable profiling; it's safe because the profiling code does not interact
63 * with the kernel code at all.*/
64
65static void change_signals(int type)
66{
67 sigset_t mask;
68
69 sigemptyset(&mask);
70 sigaddset(&mask, SIGVTALRM);
71 sigaddset(&mask, SIGALRM);
72 sigaddset(&mask, SIGIO);
73 if(sigprocmask(type, &mask, NULL) < 0)
74 panic("Failed to change signal mask - errno = %d", errno);
75}
76
77void block_signals(void)
78{
79 change_signals(SIG_BLOCK);
80}
81
82void unblock_signals(void)
83{
84 change_signals(SIG_UNBLOCK);
85}
86
87/* These are the asynchronous signals. SIGVTALRM and SIGARLM are handled
88 * together under SIGVTALRM_BIT. SIGPROF is excluded because we want to
89 * be able to profile all of UML, not just the non-critical sections. If
90 * profiling is not thread-safe, then that is not my problem. We can disable
91 * profiling when SMP is enabled in that case.
92 */
93#define SIGIO_BIT 0
94#define SIGVTALRM_BIT 1
95
96static int enable_mask(sigset_t *mask)
97{
98 int sigs;
99
100 sigs = sigismember(mask, SIGIO) ? 0 : 1 << SIGIO_BIT;
101 sigs |= sigismember(mask, SIGVTALRM) ? 0 : 1 << SIGVTALRM_BIT;
102 sigs |= sigismember(mask, SIGALRM) ? 0 : 1 << SIGVTALRM_BIT;
103 return(sigs);
104}
105
106int get_signals(void)
107{
108 sigset_t mask;
109
110 if(sigprocmask(SIG_SETMASK, NULL, &mask) < 0)
111 panic("Failed to get signal mask");
112 return(enable_mask(&mask));
113}
114
115int set_signals(int enable)
116{
117 sigset_t mask;
118 int ret;
119
120 sigemptyset(&mask);
121 if(enable & (1 << SIGIO_BIT))
122 sigaddset(&mask, SIGIO);
123 if(enable & (1 << SIGVTALRM_BIT)){
124 sigaddset(&mask, SIGVTALRM);
125 sigaddset(&mask, SIGALRM);
126 }
127
128 /* This is safe - sigprocmask is guaranteed to copy locally the
129 * value of new_set, do his work and then, at the end, write to
130 * old_set.
131 */
132 if(sigprocmask(SIG_UNBLOCK, &mask, &mask) < 0)
133 panic("Failed to enable signals");
134 ret = enable_mask(&mask);
135 sigemptyset(&mask);
136 if((enable & (1 << SIGIO_BIT)) == 0)
137 sigaddset(&mask, SIGIO);
138 if((enable & (1 << SIGVTALRM_BIT)) == 0){
139 sigaddset(&mask, SIGVTALRM);
140 sigaddset(&mask, SIGALRM);
141 }
142 if(sigprocmask(SIG_BLOCK, &mask, NULL) < 0)
143 panic("Failed to block signals");
144
145 return(ret);
146}
147
148/*
149 * Overrides for Emacs so that we follow Linus's tabbing style.
150 * Emacs will notice this stuff at the end of the file and automatically
151 * adjust the settings for this buffer only. This must remain at the end
152 * of the file.
153 * ---------------------------------------------------------------------------
154 * Local variables:
155 * c-file-style: "linux"
156 * End:
157 */
diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile
index 8de471b59c1c..7a9fc16d71d4 100644
--- a/arch/um/kernel/skas/Makefile
+++ b/arch/um/kernel/skas/Makefile
@@ -4,7 +4,7 @@
4# 4#
5 5
6obj-y := clone.o exec_kern.o mem.o mem_user.o mmu.o process.o process_kern.o \ 6obj-y := clone.o exec_kern.o mem.o mem_user.o mmu.o process.o process_kern.o \
7 syscall.o tlb.o trap_user.o uaccess.o 7 syscall.o tlb.o uaccess.o
8 8
9USER_OBJS := process.o clone.o 9USER_OBJS := process.o clone.o
10 10
diff --git a/arch/um/kernel/skas/include/skas.h b/arch/um/kernel/skas/include/skas.h
index daa2f85b684c..01d489de3986 100644
--- a/arch/um/kernel/skas/include/skas.h
+++ b/arch/um/kernel/skas/include/skas.h
@@ -22,7 +22,6 @@ extern int start_idle_thread(void *stack, void *switch_buf_ptr,
22extern int user_thread(unsigned long stack, int flags); 22extern int user_thread(unsigned long stack, int flags);
23extern void userspace(union uml_pt_regs *regs); 23extern void userspace(union uml_pt_regs *regs);
24extern void new_thread_proc(void *stack, void (*handler)(int sig)); 24extern void new_thread_proc(void *stack, void (*handler)(int sig));
25extern void remove_sigstack(void);
26extern void new_thread_handler(int sig); 25extern void new_thread_handler(int sig);
27extern void handle_syscall(union uml_pt_regs *regs); 26extern void handle_syscall(union uml_pt_regs *regs);
28extern int map(struct mm_id * mm_idp, unsigned long virt, 27extern int map(struct mm_id * mm_idp, unsigned long virt,
diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
index 599d679bd4fc..9264d4021dfe 100644
--- a/arch/um/kernel/skas/process.c
+++ b/arch/um/kernel/skas/process.c
@@ -31,7 +31,6 @@
31#include "proc_mm.h" 31#include "proc_mm.h"
32#include "skas_ptrace.h" 32#include "skas_ptrace.h"
33#include "chan_user.h" 33#include "chan_user.h"
34#include "signal_user.h"
35#include "registers.h" 34#include "registers.h"
36#include "mem.h" 35#include "mem.h"
37#include "uml-config.h" 36#include "uml-config.h"
@@ -514,16 +513,6 @@ int start_idle_thread(void *stack, void *switch_buf_ptr, void **fork_buf_ptr)
514 siglongjmp(**switch_buf, 1); 513 siglongjmp(**switch_buf, 1);
515} 514}
516 515
517void remove_sigstack(void)
518{
519 stack_t stack = ((stack_t) { .ss_flags = SS_DISABLE,
520 .ss_sp = NULL,
521 .ss_size = 0 });
522
523 if(sigaltstack(&stack, NULL) != 0)
524 panic("disabling signal stack failed, errno = %d\n", errno);
525}
526
527void initial_thread_cb_skas(void (*proc)(void *), void *arg) 516void initial_thread_cb_skas(void (*proc)(void *), void *arg)
528{ 517{
529 sigjmp_buf here; 518 sigjmp_buf here;
diff --git a/arch/um/kernel/skas/process_kern.c b/arch/um/kernel/skas/process_kern.c
index 9c990253966c..09790ccb161c 100644
--- a/arch/um/kernel/skas/process_kern.c
+++ b/arch/um/kernel/skas/process_kern.c
@@ -14,7 +14,6 @@
14#include "asm/atomic.h" 14#include "asm/atomic.h"
15#include "kern_util.h" 15#include "kern_util.h"
16#include "time_user.h" 16#include "time_user.h"
17#include "signal_user.h"
18#include "skas.h" 17#include "skas.h"
19#include "os.h" 18#include "os.h"
20#include "user_util.h" 19#include "user_util.h"
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index c40b611e3d93..11f518a7e156 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -14,9 +14,9 @@
14#include "kern_util.h" 14#include "kern_util.h"
15#include "user.h" 15#include "user.h"
16#include "process.h" 16#include "process.h"
17#include "signal_user.h"
18#include "time_user.h" 17#include "time_user.h"
19#include "kern_constants.h" 18#include "kern_constants.h"
19#include "os.h"
20 20
21/* XXX This really needs to be declared and initialized in a kernel file since 21/* XXX This really needs to be declared and initialized in a kernel file since
22 * it's in <linux/time.h> 22 * it's in <linux/time.h>
diff --git a/arch/um/kernel/trap_kern.c b/arch/um/kernel/trap_kern.c
index 0d4c10a73607..d56046c2aba2 100644
--- a/arch/um/kernel/trap_kern.c
+++ b/arch/um/kernel/trap_kern.c
@@ -1,4 +1,4 @@
1/* 1/*
2 * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) 2 * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL 3 * Licensed under the GPL
4 */ 4 */
@@ -26,9 +26,13 @@
26#include "mconsole_kern.h" 26#include "mconsole_kern.h"
27#include "mem.h" 27#include "mem.h"
28#include "mem_kern.h" 28#include "mem_kern.h"
29#include "sysdep/sigcontext.h"
30#include "sysdep/ptrace.h"
31#include "os.h"
29#ifdef CONFIG_MODE_SKAS 32#ifdef CONFIG_MODE_SKAS
30#include "skas.h" 33#include "skas.h"
31#endif 34#endif
35#include "os.h"
32 36
33/* Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by segv(). */ 37/* Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by segv(). */
34int handle_page_fault(unsigned long address, unsigned long ip, 38int handle_page_fault(unsigned long address, unsigned long ip,
@@ -125,6 +129,25 @@ out_of_memory:
125 goto out; 129 goto out;
126} 130}
127 131
132void segv_handler(int sig, union uml_pt_regs *regs)
133{
134 struct faultinfo * fi = UPT_FAULTINFO(regs);
135
136 if(UPT_IS_USER(regs) && !SEGV_IS_FIXABLE(fi)){
137 bad_segv(*fi, UPT_IP(regs));
138 return;
139 }
140 segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs);
141}
142
143struct kern_handlers handlinfo_kern = {
144 .relay_signal = relay_signal,
145 .winch = winch,
146 .bus_handler = relay_signal,
147 .page_fault = segv_handler,
148 .sigio_handler = sigio_handler,
149 .timer_handler = timer_handler
150};
128/* 151/*
129 * We give a *copy* of the faultinfo in the regs to segv. 152 * We give a *copy* of the faultinfo in the regs to segv.
130 * This must be done, since nesting SEGVs could overwrite 153 * This must be done, since nesting SEGVs could overwrite
diff --git a/arch/um/kernel/trap_user.c b/arch/um/kernel/trap_user.c
deleted file mode 100644
index e9ccd6b8d3c7..000000000000
--- a/arch/um/kernel/trap_user.c
+++ /dev/null
@@ -1,98 +0,0 @@
1/*
2 * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6#include <stdlib.h>
7#include <errno.h>
8#include <setjmp.h>
9#include <signal.h>
10#include <sys/time.h>
11#include <sys/wait.h>
12#include <asm/page.h>
13#include <asm/unistd.h>
14#include <asm/ptrace.h>
15#include "init.h"
16#include "sysdep/ptrace.h"
17#include "sigcontext.h"
18#include "sysdep/sigcontext.h"
19#include "irq_user.h"
20#include "signal_user.h"
21#include "time_user.h"
22#include "task.h"
23#include "mode.h"
24#include "choose-mode.h"
25#include "kern_util.h"
26#include "user_util.h"
27#include "os.h"
28
29void kill_child_dead(int pid)
30{
31 kill(pid, SIGKILL);
32 kill(pid, SIGCONT);
33 do {
34 int n;
35 CATCH_EINTR(n = waitpid(pid, NULL, 0));
36 if (n > 0)
37 kill(pid, SIGCONT);
38 else
39 break;
40 } while(1);
41}
42
43void segv_handler(int sig, union uml_pt_regs *regs)
44{
45 struct faultinfo * fi = UPT_FAULTINFO(regs);
46
47 if(UPT_IS_USER(regs) && !SEGV_IS_FIXABLE(fi)){
48 bad_segv(*fi, UPT_IP(regs));
49 return;
50 }
51 segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs);
52}
53
54void usr2_handler(int sig, union uml_pt_regs *regs)
55{
56 CHOOSE_MODE(syscall_handler_tt(sig, regs), (void) 0);
57}
58
59struct signal_info sig_info[] = {
60 [ SIGTRAP ] { .handler = relay_signal,
61 .is_irq = 0 },
62 [ SIGFPE ] { .handler = relay_signal,
63 .is_irq = 0 },
64 [ SIGILL ] { .handler = relay_signal,
65 .is_irq = 0 },
66 [ SIGWINCH ] { .handler = winch,
67 .is_irq = 1 },
68 [ SIGBUS ] { .handler = bus_handler,
69 .is_irq = 0 },
70 [ SIGSEGV] { .handler = segv_handler,
71 .is_irq = 0 },
72 [ SIGIO ] { .handler = sigio_handler,
73 .is_irq = 1 },
74 [ SIGVTALRM ] { .handler = timer_handler,
75 .is_irq = 1 },
76 [ SIGALRM ] { .handler = timer_handler,
77 .is_irq = 1 },
78 [ SIGUSR2 ] { .handler = usr2_handler,
79 .is_irq = 0 },
80};
81
82void do_longjmp(void *b, int val)
83{
84 sigjmp_buf *buf = b;
85
86 siglongjmp(*buf, val);
87}
88
89/*
90 * Overrides for Emacs so that we follow Linus's tabbing style.
91 * Emacs will notice this stuff at the end of the file and automatically
92 * adjust the settings for this buffer only. This must remain at the end
93 * of the file.
94 * ---------------------------------------------------------------------------
95 * Local variables:
96 * c-file-style: "linux"
97 * End:
98 */
diff --git a/arch/um/kernel/tt/exec_kern.c b/arch/um/kernel/tt/exec_kern.c
index 065b504a653b..136e54c47d37 100644
--- a/arch/um/kernel/tt/exec_kern.c
+++ b/arch/um/kernel/tt/exec_kern.c
@@ -14,7 +14,6 @@
14#include "kern_util.h" 14#include "kern_util.h"
15#include "irq_user.h" 15#include "irq_user.h"
16#include "time_user.h" 16#include "time_user.h"
17#include "signal_user.h"
18#include "mem_user.h" 17#include "mem_user.h"
19#include "os.h" 18#include "os.h"
20#include "tlb.h" 19#include "tlb.h"
diff --git a/arch/um/kernel/tt/process_kern.c b/arch/um/kernel/tt/process_kern.c
index cfaa373a6e77..14d4622a5fb8 100644
--- a/arch/um/kernel/tt/process_kern.c
+++ b/arch/um/kernel/tt/process_kern.c
@@ -13,7 +13,6 @@
13#include "asm/ptrace.h" 13#include "asm/ptrace.h"
14#include "asm/tlbflush.h" 14#include "asm/tlbflush.h"
15#include "irq_user.h" 15#include "irq_user.h"
16#include "signal_user.h"
17#include "kern_util.h" 16#include "kern_util.h"
18#include "user_util.h" 17#include "user_util.h"
19#include "os.h" 18#include "os.h"
diff --git a/arch/um/kernel/tt/tracer.c b/arch/um/kernel/tt/tracer.c
index d11e7399d7a1..71daae24e48a 100644
--- a/arch/um/kernel/tt/tracer.c
+++ b/arch/um/kernel/tt/tracer.c
@@ -19,7 +19,6 @@
19#include "sigcontext.h" 19#include "sigcontext.h"
20#include "sysdep/sigcontext.h" 20#include "sysdep/sigcontext.h"
21#include "os.h" 21#include "os.h"
22#include "signal_user.h"
23#include "user_util.h" 22#include "user_util.h"
24#include "mem_user.h" 23#include "mem_user.h"
25#include "process.h" 24#include "process.h"
diff --git a/arch/um/kernel/tt/trap_user.c b/arch/um/kernel/tt/trap_user.c
index fc108615beaf..a414c529fbcd 100644
--- a/arch/um/kernel/tt/trap_user.c
+++ b/arch/um/kernel/tt/trap_user.c
@@ -1,4 +1,4 @@
1/* 1/*
2 * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) 2 * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL 3 * Licensed under the GPL
4 */ 4 */
@@ -8,18 +8,18 @@
8#include <signal.h> 8#include <signal.h>
9#include "sysdep/ptrace.h" 9#include "sysdep/ptrace.h"
10#include "sysdep/sigcontext.h" 10#include "sysdep/sigcontext.h"
11#include "signal_user.h"
12#include "user_util.h" 11#include "user_util.h"
13#include "kern_util.h" 12#include "kern_util.h"
14#include "task.h" 13#include "task.h"
15#include "tt.h" 14#include "tt.h"
15#include "os.h"
16 16
17void sig_handler_common_tt(int sig, void *sc_ptr) 17void sig_handler_common_tt(int sig, void *sc_ptr)
18{ 18{
19 struct sigcontext *sc = sc_ptr; 19 struct sigcontext *sc = sc_ptr;
20 struct tt_regs save_regs, *r; 20 struct tt_regs save_regs, *r;
21 struct signal_info *info;
22 int save_errno = errno, is_user; 21 int save_errno = errno, is_user;
22 void (*handler)(int, union uml_pt_regs *);
23 23
24 /* This is done because to allow SIGSEGV to be delivered inside a SEGV 24 /* This is done because to allow SIGSEGV to be delivered inside a SEGV
25 * handler. This can happen in copy_user, and if SEGV is disabled, 25 * handler. This can happen in copy_user, and if SEGV is disabled,
@@ -40,10 +40,14 @@ void sig_handler_common_tt(int sig, void *sc_ptr)
40 if(sig != SIGUSR2) 40 if(sig != SIGUSR2)
41 r->syscall = -1; 41 r->syscall = -1;
42 42
43 info = &sig_info[sig]; 43 handler = sig_info[sig];
44 if(!info->is_irq) unblock_signals(); 44
45 /* unblock SIGALRM, SIGVTALRM, SIGIO if sig isn't IRQ signal */
46 if (sig != SIGIO && sig != SIGWINCH &&
47 sig != SIGVTALRM && sig != SIGALRM)
48 unblock_signals();
45 49
46 (*info->handler)(sig, (union uml_pt_regs *) r); 50 handler(sig, (union uml_pt_regs *) r);
47 51
48 if(is_user){ 52 if(is_user){
49 interrupt_end(); 53 interrupt_end();
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 26626b2b9172..73747ac19774 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -1,4 +1,4 @@
1/* 1/*
2 * Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) 2 * Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL 3 * Licensed under the GPL
4 */ 4 */
@@ -363,6 +363,11 @@ int linux_main(int argc, char **argv)
363 uml_start = CHOOSE_MODE_PROC(set_task_sizes_tt, set_task_sizes_skas, 0, 363 uml_start = CHOOSE_MODE_PROC(set_task_sizes_tt, set_task_sizes_skas, 0,
364 &host_task_size, &task_size); 364 &host_task_size, &task_size);
365 365
366 /*
367 * Setting up handlers to 'sig_info' struct
368 */
369 os_fill_handlinfo(handlinfo_kern);
370
366 brk_start = (unsigned long) sbrk(0); 371 brk_start = (unsigned long) sbrk(0);
367 CHOOSE_MODE_PROC(before_mem_tt, before_mem_skas, brk_start); 372 CHOOSE_MODE_PROC(before_mem_tt, before_mem_skas, brk_start);
368 /* Increase physical memory size for exec-shield users 373 /* Increase physical memory size for exec-shield users
diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile
index 11e30b13e318..40c7d6b1df68 100644
--- a/arch/um/os-Linux/Makefile
+++ b/arch/um/os-Linux/Makefile
@@ -4,11 +4,13 @@
4# 4#
5 5
6obj-y = aio.o elf_aux.o file.o helper.o main.o mem.o process.o signal.o \ 6obj-y = aio.o elf_aux.o file.o helper.o main.o mem.o process.o signal.o \
7 start_up.o time.o tt.o tty.o uaccess.o umid.o user_syms.o drivers/ \ 7 start_up.o time.o trap.o tt.o tty.o uaccess.o umid.o user_syms.o \
8 sys-$(SUBARCH)/ 8 drivers/ sys-$(SUBARCH)/
9
10obj-$(CONFIG_MODE_SKAS) += skas/
9 11
10USER_OBJS := aio.o elf_aux.o file.o helper.o main.o mem.o process.o signal.o \ 12USER_OBJS := aio.o elf_aux.o file.o helper.o main.o mem.o process.o signal.o \
11 start_up.o time.o tt.o tty.o uaccess.o umid.o 13 start_up.o time.o trap.o tt.o tty.o uaccess.o umid.o
12 14
13elf_aux.o: $(ARCH_DIR)/kernel-offsets.h 15elf_aux.o: $(ARCH_DIR)/kernel-offsets.h
14CFLAGS_elf_aux.o += -I$(objtree)/arch/um 16CFLAGS_elf_aux.o += -I$(objtree)/arch/um
diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c
index 23da27d22569..172c8474453c 100644
--- a/arch/um/os-Linux/main.c
+++ b/arch/um/os-Linux/main.c
@@ -16,7 +16,6 @@
16#include "user_util.h" 16#include "user_util.h"
17#include "kern_util.h" 17#include "kern_util.h"
18#include "mem_user.h" 18#include "mem_user.h"
19#include "signal_user.h"
20#include "time_user.h" 19#include "time_user.h"
21#include "irq_user.h" 20#include "irq_user.h"
22#include "user.h" 21#include "user.h"
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index d9c52387c4a1..39815c6b5e45 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -15,7 +15,6 @@
15#include "os.h" 15#include "os.h"
16#include "user.h" 16#include "user.h"
17#include "user_util.h" 17#include "user_util.h"
18#include "signal_user.h"
19#include "process.h" 18#include "process.h"
20#include "irq_user.h" 19#include "irq_user.h"
21#include "kern_util.h" 20#include "kern_util.h"
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index c7bfd5ee3925..c1f46a0fef13 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -4,9 +4,22 @@
4 */ 4 */
5 5
6#include <signal.h> 6#include <signal.h>
7#include <stdio.h>
8#include <unistd.h>
9#include <stdlib.h>
10#include <errno.h>
11#include <stdarg.h>
12#include <string.h>
13#include <sys/mman.h>
14#include "user_util.h"
15#include "kern_util.h"
16#include "user.h"
17#include "signal_kern.h"
18#include "sysdep/sigcontext.h"
19#include "sysdep/signal.h"
20#include "sigcontext.h"
7#include "time_user.h" 21#include "time_user.h"
8#include "mode.h" 22#include "mode.h"
9#include "sysdep/signal.h"
10 23
11void sig_handler(ARCH_SIGHDLR_PARAM) 24void sig_handler(ARCH_SIGHDLR_PARAM)
12{ 25{
@@ -36,13 +49,138 @@ void alarm_handler(ARCH_SIGHDLR_PARAM)
36 switch_timers(1); 49 switch_timers(1);
37} 50}
38 51
39/* 52void set_sigstack(void *sig_stack, int size)
40 * Overrides for Emacs so that we follow Linus's tabbing style. 53{
41 * Emacs will notice this stuff at the end of the file and automatically 54 stack_t stack = ((stack_t) { .ss_flags = 0,
42 * adjust the settings for this buffer only. This must remain at the end 55 .ss_sp = (__ptr_t) sig_stack,
43 * of the file. 56 .ss_size = size - sizeof(void *) });
44 * --------------------------------------------------------------------------- 57
45 * Local variables: 58 if(sigaltstack(&stack, NULL) != 0)
46 * c-file-style: "linux" 59 panic("enabling signal stack failed, errno = %d\n", errno);
47 * End: 60}
61
62void remove_sigstack(void)
63{
64 stack_t stack = ((stack_t) { .ss_flags = SS_DISABLE,
65 .ss_sp = NULL,
66 .ss_size = 0 });
67
68 if(sigaltstack(&stack, NULL) != 0)
69 panic("disabling signal stack failed, errno = %d\n", errno);
70}
71
72void set_handler(int sig, void (*handler)(int), int flags, ...)
73{
74 struct sigaction action;
75 va_list ap;
76 int mask;
77
78 va_start(ap, flags);
79 action.sa_handler = handler;
80 sigemptyset(&action.sa_mask);
81 while((mask = va_arg(ap, int)) != -1){
82 sigaddset(&action.sa_mask, mask);
83 }
84 va_end(ap);
85 action.sa_flags = flags;
86 action.sa_restorer = NULL;
87 if(sigaction(sig, &action, NULL) < 0)
88 panic("sigaction failed");
89}
90
91int change_sig(int signal, int on)
92{
93 sigset_t sigset, old;
94
95 sigemptyset(&sigset);
96 sigaddset(&sigset, signal);
97 sigprocmask(on ? SIG_UNBLOCK : SIG_BLOCK, &sigset, &old);
98 return(!sigismember(&old, signal));
99}
100
101/* Both here and in set/get_signal we don't touch SIGPROF, because we must not
102 * disable profiling; it's safe because the profiling code does not interact
103 * with the kernel code at all.*/
104
105static void change_signals(int type)
106{
107 sigset_t mask;
108
109 sigemptyset(&mask);
110 sigaddset(&mask, SIGVTALRM);
111 sigaddset(&mask, SIGALRM);
112 sigaddset(&mask, SIGIO);
113 if(sigprocmask(type, &mask, NULL) < 0)
114 panic("Failed to change signal mask - errno = %d", errno);
115}
116
117void block_signals(void)
118{
119 change_signals(SIG_BLOCK);
120}
121
122void unblock_signals(void)
123{
124 change_signals(SIG_UNBLOCK);
125}
126
127/* These are the asynchronous signals. SIGVTALRM and SIGARLM are handled
128 * together under SIGVTALRM_BIT. SIGPROF is excluded because we want to
129 * be able to profile all of UML, not just the non-critical sections. If
130 * profiling is not thread-safe, then that is not my problem. We can disable
131 * profiling when SMP is enabled in that case.
48 */ 132 */
133#define SIGIO_BIT 0
134#define SIGVTALRM_BIT 1
135
136static int enable_mask(sigset_t *mask)
137{
138 int sigs;
139
140 sigs = sigismember(mask, SIGIO) ? 0 : 1 << SIGIO_BIT;
141 sigs |= sigismember(mask, SIGVTALRM) ? 0 : 1 << SIGVTALRM_BIT;
142 sigs |= sigismember(mask, SIGALRM) ? 0 : 1 << SIGVTALRM_BIT;
143 return(sigs);
144}
145
146int get_signals(void)
147{
148 sigset_t mask;
149
150 if(sigprocmask(SIG_SETMASK, NULL, &mask) < 0)
151 panic("Failed to get signal mask");
152 return(enable_mask(&mask));
153}
154
155int set_signals(int enable)
156{
157 sigset_t mask;
158 int ret;
159
160 sigemptyset(&mask);
161 if(enable & (1 << SIGIO_BIT))
162 sigaddset(&mask, SIGIO);
163 if(enable & (1 << SIGVTALRM_BIT)){
164 sigaddset(&mask, SIGVTALRM);
165 sigaddset(&mask, SIGALRM);
166 }
167
168 /* This is safe - sigprocmask is guaranteed to copy locally the
169 * value of new_set, do his work and then, at the end, write to
170 * old_set.
171 */
172 if(sigprocmask(SIG_UNBLOCK, &mask, &mask) < 0)
173 panic("Failed to enable signals");
174 ret = enable_mask(&mask);
175 sigemptyset(&mask);
176 if((enable & (1 << SIGIO_BIT)) == 0)
177 sigaddset(&mask, SIGIO);
178 if((enable & (1 << SIGVTALRM_BIT)) == 0){
179 sigaddset(&mask, SIGVTALRM);
180 sigaddset(&mask, SIGALRM);
181 }
182 if(sigprocmask(SIG_BLOCK, &mask, NULL) < 0)
183 panic("Failed to block signals");
184
185 return(ret);
186}
diff --git a/arch/um/os-Linux/skas/Makefile b/arch/um/os-Linux/skas/Makefile
new file mode 100644
index 000000000000..eab5386d60a7
--- /dev/null
+++ b/arch/um/os-Linux/skas/Makefile
@@ -0,0 +1,10 @@
1#
2# Copyright (C) 2002 - 2004 Jeff Dike (jdike@addtoit.com)
3# Licensed under the GPL
4#
5
6obj-y := trap.o
7
8USER_OBJS := trap.o
9
10include arch/um/scripts/Makefile.rules
diff --git a/arch/um/kernel/skas/trap_user.c b/arch/um/os-Linux/skas/trap.c
index 9950a6716fe5..9ad5fbec4593 100644
--- a/arch/um/kernel/skas/trap_user.c
+++ b/arch/um/os-Linux/skas/trap.c
@@ -1,11 +1,10 @@
1/* 1/*
2 * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) 2 * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com)
3 * Licensed under the GPL 3 * Licensed under the GPL
4 */ 4 */
5 5
6#include <signal.h> 6#include <signal.h>
7#include <errno.h> 7#include <errno.h>
8#include "signal_user.h"
9#include "user_util.h" 8#include "user_util.h"
10#include "kern_util.h" 9#include "kern_util.h"
11#include "task.h" 10#include "task.h"
@@ -14,12 +13,13 @@
14#include "ptrace_user.h" 13#include "ptrace_user.h"
15#include "sysdep/ptrace.h" 14#include "sysdep/ptrace.h"
16#include "sysdep/ptrace_user.h" 15#include "sysdep/ptrace_user.h"
16#include "os.h"
17 17
18void sig_handler_common_skas(int sig, void *sc_ptr) 18void sig_handler_common_skas(int sig, void *sc_ptr)
19{ 19{
20 struct sigcontext *sc = sc_ptr; 20 struct sigcontext *sc = sc_ptr;
21 struct skas_regs *r; 21 struct skas_regs *r;
22 struct signal_info *info; 22 void (*handler)(int, union uml_pt_regs *);
23 int save_errno = errno; 23 int save_errno = errno;
24 int save_user; 24 int save_user;
25 25
@@ -34,17 +34,22 @@ void sig_handler_common_skas(int sig, void *sc_ptr)
34 r = &TASK_REGS(get_current())->skas; 34 r = &TASK_REGS(get_current())->skas;
35 save_user = r->is_user; 35 save_user = r->is_user;
36 r->is_user = 0; 36 r->is_user = 0;
37 if ( sig == SIGFPE || sig == SIGSEGV || 37 if ( sig == SIGFPE || sig == SIGSEGV ||
38 sig == SIGBUS || sig == SIGILL || 38 sig == SIGBUS || sig == SIGILL ||
39 sig == SIGTRAP ) { 39 sig == SIGTRAP ) {
40 GET_FAULTINFO_FROM_SC(r->faultinfo, sc); 40 GET_FAULTINFO_FROM_SC(r->faultinfo, sc);
41 } 41 }
42 42
43 change_sig(SIGUSR1, 1); 43 change_sig(SIGUSR1, 1);
44 info = &sig_info[sig];
45 if(!info->is_irq) unblock_signals();
46 44
47 (*info->handler)(sig, (union uml_pt_regs *) r); 45 handler = sig_info[sig];
46
47 /* unblock SIGALRM, SIGVTALRM, SIGIO if sig isn't IRQ signal */
48 if (sig != SIGIO && sig != SIGWINCH &&
49 sig != SIGVTALRM && sig != SIGALRM)
50 unblock_signals();
51
52 handler(sig, (union uml_pt_regs *) r);
48 53
49 errno = save_errno; 54 errno = save_errno;
50 r->is_user = save_user; 55 r->is_user = save_user;
@@ -54,25 +59,15 @@ extern int ptrace_faultinfo;
54 59
55void user_signal(int sig, union uml_pt_regs *regs, int pid) 60void user_signal(int sig, union uml_pt_regs *regs, int pid)
56{ 61{
57 struct signal_info *info; 62 void (*handler)(int, union uml_pt_regs *);
58 int segv = ((sig == SIGFPE) || (sig == SIGSEGV) || (sig == SIGBUS) || 63 int segv = ((sig == SIGFPE) || (sig == SIGSEGV) || (sig == SIGBUS) ||
59 (sig == SIGILL) || (sig == SIGTRAP)); 64 (sig == SIGILL) || (sig == SIGTRAP));
60 65
61 if (segv) 66 if (segv)
62 get_skas_faultinfo(pid, &regs->skas.faultinfo); 67 get_skas_faultinfo(pid, &regs->skas.faultinfo);
63 info = &sig_info[sig]; 68
64 (*info->handler)(sig, regs); 69 handler = sig_info[sig];
70 handler(sig, (union uml_pt_regs *) regs);
65 71
66 unblock_signals(); 72 unblock_signals();
67} 73}
68
69/*
70 * Overrides for Emacs so that we follow Linus's tabbing style.
71 * Emacs will notice this stuff at the end of the file and automatically
72 * adjust the settings for this buffer only. This must remain at the end
73 * of the file.
74 * ---------------------------------------------------------------------------
75 * Local variables:
76 * c-file-style: "linux"
77 * End:
78 */
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index 29a9e3f43763..b47e5e71d1a5 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -24,7 +24,6 @@
24#include "kern_util.h" 24#include "kern_util.h"
25#include "user.h" 25#include "user.h"
26#include "signal_kern.h" 26#include "signal_kern.h"
27#include "signal_user.h"
28#include "sysdep/ptrace.h" 27#include "sysdep/ptrace.h"
29#include "sysdep/sigcontext.h" 28#include "sysdep/sigcontext.h"
30#include "irq_user.h" 29#include "irq_user.h"
diff --git a/arch/um/os-Linux/trap.c b/arch/um/os-Linux/trap.c
new file mode 100644
index 000000000000..321e1c8e227d
--- /dev/null
+++ b/arch/um/os-Linux/trap.c
@@ -0,0 +1,40 @@
1/*
2 * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6#include <stdlib.h>
7#include <signal.h>
8#include <setjmp.h>
9#include "kern_util.h"
10#include "user_util.h"
11#include "os.h"
12#include "mode.h"
13
14void usr2_handler(int sig, union uml_pt_regs *regs)
15{
16 CHOOSE_MODE(syscall_handler_tt(sig, regs), (void) 0);
17}
18
19void (*sig_info[NSIG])(int, union uml_pt_regs *);
20
21void os_fill_handlinfo(struct kern_handlers h)
22{
23 sig_info[SIGTRAP] = h.relay_signal;
24 sig_info[SIGFPE] = h.relay_signal;
25 sig_info[SIGILL] = h.relay_signal;
26 sig_info[SIGWINCH] = h.winch;
27 sig_info[SIGBUS] = h.bus_handler;
28 sig_info[SIGSEGV] = h.page_fault;
29 sig_info[SIGIO] = h.sigio_handler;
30 sig_info[SIGVTALRM] = h.timer_handler;
31 sig_info[SIGALRM] = h.timer_handler;
32 sig_info[SIGUSR2] = usr2_handler;
33}
34
35void do_longjmp(void *b, int val)
36{
37 sigjmp_buf *buf = b;
38
39 siglongjmp(*buf, val);
40}
diff --git a/arch/um/os-Linux/tt.c b/arch/um/os-Linux/tt.c
index a6db8877931a..cb2648b79d0f 100644
--- a/arch/um/os-Linux/tt.c
+++ b/arch/um/os-Linux/tt.c
@@ -23,7 +23,6 @@
23#include "kern_util.h" 23#include "kern_util.h"
24#include "user.h" 24#include "user.h"
25#include "signal_kern.h" 25#include "signal_kern.h"
26#include "signal_user.h"
27#include "sysdep/ptrace.h" 26#include "sysdep/ptrace.h"
28#include "sysdep/sigcontext.h" 27#include "sysdep/sigcontext.h"
29#include "irq_user.h" 28#include "irq_user.h"
@@ -50,6 +49,20 @@ int protect_memory(unsigned long addr, unsigned long len, int r, int w, int x,
50 return(0); 49 return(0);
51} 50}
52 51
52void kill_child_dead(int pid)
53{
54 kill(pid, SIGKILL);
55 kill(pid, SIGCONT);
56 do {
57 int n;
58 CATCH_EINTR(n = waitpid(pid, NULL, 0));
59 if (n > 0)
60 kill(pid, SIGCONT);
61 else
62 break;
63 } while(1);
64}
65
53/* 66/*
54 *------------------------- 67 *-------------------------
55 * only for tt mode (will be deleted in future...) 68 * only for tt mode (will be deleted in future...)
diff --git a/arch/um/sys-i386/signal.c b/arch/um/sys-i386/signal.c
index 16bc19928b3c..7cd1a82dc8c2 100644
--- a/arch/um/sys-i386/signal.c
+++ b/arch/um/sys-i386/signal.c
@@ -10,7 +10,6 @@
10#include "asm/uaccess.h" 10#include "asm/uaccess.h"
11#include "asm/unistd.h" 11#include "asm/unistd.h"
12#include "frame_kern.h" 12#include "frame_kern.h"
13#include "signal_user.h"
14#include "sigcontext.h" 13#include "sigcontext.h"
15#include "registers.h" 14#include "registers.h"
16#include "mode.h" 15#include "mode.h"
diff --git a/arch/v850/Kconfig b/arch/v850/Kconfig
index 310865903234..04494638b963 100644
--- a/arch/v850/Kconfig
+++ b/arch/v850/Kconfig
@@ -10,9 +10,6 @@ mainmenu "uClinux/v850 (w/o MMU) Kernel Configuration"
10config MMU 10config MMU
11 bool 11 bool
12 default n 12 default n
13config UID16
14 bool
15 default n
16config RWSEM_GENERIC_SPINLOCK 13config RWSEM_GENERIC_SPINLOCK
17 bool 14 bool
18 default y 15 default y
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 6ece645e4dbe..4f3e925962c3 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -542,11 +542,6 @@ config SYSVIPC_COMPAT
542 depends on COMPAT && SYSVIPC 542 depends on COMPAT && SYSVIPC
543 default y 543 default y
544 544
545config UID16
546 bool
547 depends on IA32_EMULATION
548 default y
549
550endmenu 545endmenu
551 546
552source "net/Kconfig" 547source "net/Kconfig"
diff --git a/arch/x86_64/boot/compressed/misc.c b/arch/x86_64/boot/compressed/misc.c
index 0e10fd84c7cc..cf4b88c416dc 100644
--- a/arch/x86_64/boot/compressed/misc.c
+++ b/arch/x86_64/boot/compressed/misc.c
@@ -9,7 +9,7 @@
9 * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 9 * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
10 */ 10 */
11 11
12#include "miscsetup.h" 12#include <linux/screen_info.h>
13#include <asm/io.h> 13#include <asm/io.h>
14#include <asm/page.h> 14#include <asm/page.h>
15 15
diff --git a/arch/x86_64/boot/compressed/miscsetup.h b/arch/x86_64/boot/compressed/miscsetup.h
deleted file mode 100644
index bb1620531703..000000000000
--- a/arch/x86_64/boot/compressed/miscsetup.h
+++ /dev/null
@@ -1,39 +0,0 @@
1#define NULL 0
2//typedef unsigned int size_t;
3
4
5struct screen_info {
6 unsigned char orig_x; /* 0x00 */
7 unsigned char orig_y; /* 0x01 */
8 unsigned short dontuse1; /* 0x02 -- EXT_MEM_K sits here */
9 unsigned short orig_video_page; /* 0x04 */
10 unsigned char orig_video_mode; /* 0x06 */
11 unsigned char orig_video_cols; /* 0x07 */
12 unsigned short unused2; /* 0x08 */
13 unsigned short orig_video_ega_bx; /* 0x0a */
14 unsigned short unused3; /* 0x0c */
15 unsigned char orig_video_lines; /* 0x0e */
16 unsigned char orig_video_isVGA; /* 0x0f */
17 unsigned short orig_video_points; /* 0x10 */
18
19 /* VESA graphic mode -- linear frame buffer */
20 unsigned short lfb_width; /* 0x12 */
21 unsigned short lfb_height; /* 0x14 */
22 unsigned short lfb_depth; /* 0x16 */
23 unsigned long lfb_base; /* 0x18 */
24 unsigned long lfb_size; /* 0x1c */
25 unsigned short dontuse2, dontuse3; /* 0x20 -- CL_MAGIC and CL_OFFSET here */
26 unsigned short lfb_linelength; /* 0x24 */
27 unsigned char red_size; /* 0x26 */
28 unsigned char red_pos; /* 0x27 */
29 unsigned char green_size; /* 0x28 */
30 unsigned char green_pos; /* 0x29 */
31 unsigned char blue_size; /* 0x2a */
32 unsigned char blue_pos; /* 0x2b */
33 unsigned char rsvd_size; /* 0x2c */
34 unsigned char rsvd_pos; /* 0x2d */
35 unsigned short vesapm_seg; /* 0x2e */
36 unsigned short vesapm_off; /* 0x30 */
37 unsigned short pages; /* 0x32 */
38 /* 0x34 -- 0x3f reserved for future expansion */
39};
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index df0773c9bdbe..1f0ff5adc80e 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -643,6 +643,7 @@ ia32_sys_call_table:
643 .quad sys_inotify_init 643 .quad sys_inotify_init
644 .quad sys_inotify_add_watch 644 .quad sys_inotify_add_watch
645 .quad sys_inotify_rm_watch 645 .quad sys_inotify_rm_watch
646 .quad sys_migrate_pages
646ia32_syscall_end: 647ia32_syscall_end:
647 .rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8 648 .rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8
648 .quad ni_syscall 649 .quad ni_syscall
diff --git a/arch/x86_64/ia32/ptrace32.c b/arch/x86_64/ia32/ptrace32.c
index 2a925e2af390..5f4cdfa56901 100644
--- a/arch/x86_64/ia32/ptrace32.c
+++ b/arch/x86_64/ia32/ptrace32.c
@@ -196,36 +196,6 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
196 196
197#undef R32 197#undef R32
198 198
199static struct task_struct *find_target(int request, int pid, int *err)
200{
201 struct task_struct *child;
202
203 *err = -EPERM;
204 if (pid == 1)
205 return NULL;
206
207 *err = -ESRCH;
208 read_lock(&tasklist_lock);
209 child = find_task_by_pid(pid);
210 if (child)
211 get_task_struct(child);
212 read_unlock(&tasklist_lock);
213 if (child) {
214 *err = -EPERM;
215 if (child->pid == 1)
216 goto out;
217 *err = ptrace_check_attach(child, request == PTRACE_KILL);
218 if (*err < 0)
219 goto out;
220 return child;
221 }
222 out:
223 if (child)
224 put_task_struct(child);
225 return NULL;
226
227}
228
229asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) 199asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
230{ 200{
231 struct task_struct *child; 201 struct task_struct *child;
@@ -254,9 +224,16 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
254 break; 224 break;
255 } 225 }
256 226
257 child = find_target(request, pid, &ret); 227 if (request == PTRACE_TRACEME)
258 if (!child) 228 return ptrace_traceme();
259 return ret; 229
230 child = ptrace_get_task_struct(pid);
231 if (IS_ERR(child))
232 return PTR_ERR(child);
233
234 ret = ptrace_check_attach(child, request == PTRACE_KILL);
235 if (ret < 0)
236 goto out;
260 237
261 childregs = (struct pt_regs *)(child->thread.rsp0 - sizeof(struct pt_regs)); 238 childregs = (struct pt_regs *)(child->thread.rsp0 - sizeof(struct pt_regs));
262 239
@@ -373,6 +350,7 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
373 break; 350 break;
374 } 351 }
375 352
353 out:
376 put_task_struct(child); 354 put_task_struct(child);
377 return ret; 355 return ret;
378} 356}
diff --git a/arch/x86_64/kernel/init_task.c b/arch/x86_64/kernel/init_task.c
index e0ba5c1043fd..ce31d904d601 100644
--- a/arch/x86_64/kernel/init_task.c
+++ b/arch/x86_64/kernel/init_task.c
@@ -44,6 +44,6 @@ EXPORT_SYMBOL(init_task);
44 * section. Since TSS's are completely CPU-local, we want them 44 * section. Since TSS's are completely CPU-local, we want them
45 * on exact cacheline boundaries, to eliminate cacheline ping-pong. 45 * on exact cacheline boundaries, to eliminate cacheline ping-pong.
46 */ 46 */
47DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp = INIT_TSS; 47DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS;
48 48
49#define ALIGN_TO_4K __attribute__((section(".data.init_task"))) 49#define ALIGN_TO_4K __attribute__((section(".data.init_task")))
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 74102796e5c0..43c9fa0f8d5f 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -1075,8 +1075,6 @@ device_initcall(time_init_device);
1075 */ 1075 */
1076#include <linux/rtc.h> 1076#include <linux/rtc.h>
1077 1077
1078extern irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs);
1079
1080#define DEFAULT_RTC_INT_FREQ 64 1078#define DEFAULT_RTC_INT_FREQ 64
1081#define RTC_NUM_INTS 1 1079#define RTC_NUM_INTS 1
1082 1080
diff --git a/block/ioctl.c b/block/ioctl.c
index 6e278474f9a8..82030e1dfd63 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -1,6 +1,7 @@
1#include <linux/sched.h> /* for capable() */ 1#include <linux/sched.h> /* for capable() */
2#include <linux/blkdev.h> 2#include <linux/blkdev.h>
3#include <linux/blkpg.h> 3#include <linux/blkpg.h>
4#include <linux/hdreg.h>
4#include <linux/backing-dev.h> 5#include <linux/backing-dev.h>
5#include <linux/buffer_head.h> 6#include <linux/buffer_head.h>
6#include <linux/smp_lock.h> 7#include <linux/smp_lock.h>
@@ -245,6 +246,27 @@ int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd,
245 set_device_ro(bdev, n); 246 set_device_ro(bdev, n);
246 unlock_kernel(); 247 unlock_kernel();
247 return 0; 248 return 0;
249 case HDIO_GETGEO: {
250 struct hd_geometry geo;
251
252 if (!arg)
253 return -EINVAL;
254 if (!disk->fops->getgeo)
255 return -ENOTTY;
256
257 /*
258 * We need to set the startsect first, the driver may
259 * want to override it.
260 */
261 geo.start = get_start_sect(bdev);
262 ret = disk->fops->getgeo(bdev, &geo);
263 if (ret)
264 return ret;
265 if (copy_to_user((struct hd_geometry __user *)arg, &geo,
266 sizeof(geo)))
267 return -EFAULT;
268 return 0;
269 }
248 } 270 }
249 271
250 lock_kernel(); 272 lock_kernel();
diff --git a/drivers/acorn/block/mfmhd.c b/drivers/acorn/block/mfmhd.c
index 4b65f74d66b1..ce074f6f3369 100644
--- a/drivers/acorn/block/mfmhd.c
+++ b/drivers/acorn/block/mfmhd.c
@@ -129,19 +129,6 @@ static DEFINE_SPINLOCK(mfm_lock);
129#define MAJOR_NR MFM_ACORN_MAJOR 129#define MAJOR_NR MFM_ACORN_MAJOR
130#define QUEUE (mfm_queue) 130#define QUEUE (mfm_queue)
131#define CURRENT elv_next_request(mfm_queue) 131#define CURRENT elv_next_request(mfm_queue)
132/*
133 * This sort of stuff should be in a header file shared with ide.c, hd.c, xd.c etc
134 */
135#ifndef HDIO_GETGEO
136#define HDIO_GETGEO 0x301
137struct hd_geometry {
138 unsigned char heads;
139 unsigned char sectors;
140 unsigned short cylinders;
141 unsigned long start;
142};
143#endif
144
145 132
146/* 133/*
147 * Configuration section 134 * Configuration section
@@ -1153,22 +1140,13 @@ static int mfm_initdrives(void)
1153 * The 'front' end of the mfm driver follows... 1140 * The 'front' end of the mfm driver follows...
1154 */ 1141 */
1155 1142
1156static int mfm_ioctl(struct inode *inode, struct file *file, u_int cmd, u_long arg) 1143static int mfm_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1157{ 1144{
1158 struct mfm_info *p = inode->i_bdev->bd_disk->private_data; 1145 struct mfm_info *p = bdev->bd_disk->private_data;
1159 struct hd_geometry *geo = (struct hd_geometry *) arg; 1146
1160 if (cmd != HDIO_GETGEO) 1147 geo->heads = p->heads;
1161 return -EINVAL; 1148 geo->sectors = p->sectors;
1162 if (!arg) 1149 geo->cylinders = p->cylinders;
1163 return -EINVAL;
1164 if (put_user (p->heads, &geo->heads))
1165 return -EFAULT;
1166 if (put_user (p->sectors, &geo->sectors))
1167 return -EFAULT;
1168 if (put_user (p->cylinders, &geo->cylinders))
1169 return -EFAULT;
1170 if (put_user (get_start_sect(inode->i_bdev), &geo->start))
1171 return -EFAULT;
1172 return 0; 1150 return 0;
1173} 1151}
1174 1152
@@ -1219,7 +1197,7 @@ void xd_set_geometry(struct block_device *bdev, unsigned char secsptrack,
1219static struct block_device_operations mfm_fops = 1197static struct block_device_operations mfm_fops =
1220{ 1198{
1221 .owner = THIS_MODULE, 1199 .owner = THIS_MODULE,
1222 .ioctl = mfm_ioctl, 1200 .getgeo = mfm_getgeo,
1223}; 1201};
1224 1202
1225/* 1203/*
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index e3cd0b16031a..20c9a37643c7 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -204,11 +204,13 @@ acpi_os_map_memory(acpi_physical_address phys, acpi_size size,
204 204
205 return AE_OK; 205 return AE_OK;
206} 206}
207EXPORT_SYMBOL_GPL(acpi_os_map_memory);
207 208
208void acpi_os_unmap_memory(void __iomem * virt, acpi_size size) 209void acpi_os_unmap_memory(void __iomem * virt, acpi_size size)
209{ 210{
210 iounmap(virt); 211 iounmap(virt);
211} 212}
213EXPORT_SYMBOL_GPL(acpi_os_unmap_memory);
212 214
213#ifdef ACPI_FUTURE_USAGE 215#ifdef ACPI_FUTURE_USAGE
214acpi_status 216acpi_status
diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c
index c57e20dcb0f8..074abc81ec3d 100644
--- a/drivers/atm/nicstar.c
+++ b/drivers/atm/nicstar.c
@@ -2126,8 +2126,7 @@ static void process_rsq(ns_dev *card)
2126 2126
2127 if (!ns_rsqe_valid(card->rsq.next)) 2127 if (!ns_rsqe_valid(card->rsq.next))
2128 return; 2128 return;
2129 while (ns_rsqe_valid(card->rsq.next)) 2129 do {
2130 {
2131 dequeue_rx(card, card->rsq.next); 2130 dequeue_rx(card, card->rsq.next);
2132 ns_rsqe_init(card->rsq.next); 2131 ns_rsqe_init(card->rsq.next);
2133 previous = card->rsq.next; 2132 previous = card->rsq.next;
@@ -2135,7 +2134,7 @@ static void process_rsq(ns_dev *card)
2135 card->rsq.next = card->rsq.base; 2134 card->rsq.next = card->rsq.base;
2136 else 2135 else
2137 card->rsq.next++; 2136 card->rsq.next++;
2138 } 2137 } while (ns_rsqe_valid(card->rsq.next));
2139 writel((((u32) previous) - ((u32) card->rsq.base)), 2138 writel((((u32) previous) - ((u32) card->rsq.base)),
2140 card->membase + RSQH); 2139 card->membase + RSQH);
2141} 2140}
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 21097a39a057..4a7bb7dfce85 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -92,34 +92,28 @@ static int DAC960_open(struct inode *inode, struct file *file)
92 return 0; 92 return 0;
93} 93}
94 94
95static int DAC960_ioctl(struct inode *inode, struct file *file, 95static int DAC960_getgeo(struct block_device *bdev, struct hd_geometry *geo)
96 unsigned int cmd, unsigned long arg)
97{ 96{
98 struct gendisk *disk = inode->i_bdev->bd_disk; 97 struct gendisk *disk = bdev->bd_disk;
99 DAC960_Controller_T *p = disk->queue->queuedata; 98 DAC960_Controller_T *p = disk->queue->queuedata;
100 int drive_nr = (long)disk->private_data; 99 int drive_nr = (long)disk->private_data;
101 struct hd_geometry g;
102 struct hd_geometry __user *loc = (struct hd_geometry __user *)arg;
103
104 if (cmd != HDIO_GETGEO || !loc)
105 return -EINVAL;
106 100
107 if (p->FirmwareType == DAC960_V1_Controller) { 101 if (p->FirmwareType == DAC960_V1_Controller) {
108 g.heads = p->V1.GeometryTranslationHeads; 102 geo->heads = p->V1.GeometryTranslationHeads;
109 g.sectors = p->V1.GeometryTranslationSectors; 103 geo->sectors = p->V1.GeometryTranslationSectors;
110 g.cylinders = p->V1.LogicalDriveInformation[drive_nr]. 104 geo->cylinders = p->V1.LogicalDriveInformation[drive_nr].
111 LogicalDriveSize / (g.heads * g.sectors); 105 LogicalDriveSize / (geo->heads * geo->sectors);
112 } else { 106 } else {
113 DAC960_V2_LogicalDeviceInfo_T *i = 107 DAC960_V2_LogicalDeviceInfo_T *i =
114 p->V2.LogicalDeviceInformation[drive_nr]; 108 p->V2.LogicalDeviceInformation[drive_nr];
115 switch (i->DriveGeometry) { 109 switch (i->DriveGeometry) {
116 case DAC960_V2_Geometry_128_32: 110 case DAC960_V2_Geometry_128_32:
117 g.heads = 128; 111 geo->heads = 128;
118 g.sectors = 32; 112 geo->sectors = 32;
119 break; 113 break;
120 case DAC960_V2_Geometry_255_63: 114 case DAC960_V2_Geometry_255_63:
121 g.heads = 255; 115 geo->heads = 255;
122 g.sectors = 63; 116 geo->sectors = 63;
123 break; 117 break;
124 default: 118 default:
125 DAC960_Error("Illegal Logical Device Geometry %d\n", 119 DAC960_Error("Illegal Logical Device Geometry %d\n",
@@ -127,12 +121,11 @@ static int DAC960_ioctl(struct inode *inode, struct file *file,
127 return -EINVAL; 121 return -EINVAL;
128 } 122 }
129 123
130 g.cylinders = i->ConfigurableDeviceSize / (g.heads * g.sectors); 124 geo->cylinders = i->ConfigurableDeviceSize /
125 (geo->heads * geo->sectors);
131 } 126 }
132 127
133 g.start = get_start_sect(inode->i_bdev); 128 return 0;
134
135 return copy_to_user(loc, &g, sizeof g) ? -EFAULT : 0;
136} 129}
137 130
138static int DAC960_media_changed(struct gendisk *disk) 131static int DAC960_media_changed(struct gendisk *disk)
@@ -157,7 +150,7 @@ static int DAC960_revalidate_disk(struct gendisk *disk)
157static struct block_device_operations DAC960_BlockDeviceOperations = { 150static struct block_device_operations DAC960_BlockDeviceOperations = {
158 .owner = THIS_MODULE, 151 .owner = THIS_MODULE,
159 .open = DAC960_open, 152 .open = DAC960_open,
160 .ioctl = DAC960_ioctl, 153 .getgeo = DAC960_getgeo,
161 .media_changed = DAC960_media_changed, 154 .media_changed = DAC960_media_changed,
162 .revalidate_disk = DAC960_revalidate_disk, 155 .revalidate_disk = DAC960_revalidate_disk,
163}; 156};
@@ -3767,7 +3760,7 @@ static void DAC960_V1_ProcessCompletedCommand(DAC960_Command_T *Command)
3767 if (SenseKey == DAC960_SenseKey_VendorSpecific && 3760 if (SenseKey == DAC960_SenseKey_VendorSpecific &&
3768 AdditionalSenseCode == 0x80 && 3761 AdditionalSenseCode == 0x80 &&
3769 AdditionalSenseCodeQualifier < 3762 AdditionalSenseCodeQualifier <
3770 sizeof(DAC960_EventMessages) / sizeof(char *)) 3763 ARRAY_SIZE(DAC960_EventMessages))
3771 DAC960_Critical("Physical Device %d:%d %s\n", Controller, 3764 DAC960_Critical("Physical Device %d:%d %s\n", Controller,
3772 EventLogEntry->Channel, 3765 EventLogEntry->Channel,
3773 EventLogEntry->TargetID, 3766 EventLogEntry->TargetID,
diff --git a/drivers/block/acsi.c b/drivers/block/acsi.c
index 5d2d649f7e8d..196c0ec9cd54 100644
--- a/drivers/block/acsi.c
+++ b/drivers/block/acsi.c
@@ -1079,6 +1079,19 @@ static void redo_acsi_request( void )
1079 * 1079 *
1080 ***********************************************************************/ 1080 ***********************************************************************/
1081 1081
1082static int acsi_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1083{
1084 struct acsi_info_struct *aip = bdev->bd_disk->private_data;
1085
1086 /*
1087 * Just fake some geometry here, it's nonsense anyway
1088 * To make it easy, use Adaptec's usual 64/32 mapping
1089 */
1090 geo->heads = 64;
1091 geo->sectors = 32;
1092 geo->cylinders = aip->size >> 11;
1093 return 0;
1094}
1082 1095
1083static int acsi_ioctl( struct inode *inode, struct file *file, 1096static int acsi_ioctl( struct inode *inode, struct file *file,
1084 unsigned int cmd, unsigned long arg ) 1097 unsigned int cmd, unsigned long arg )
@@ -1086,18 +1099,6 @@ static int acsi_ioctl( struct inode *inode, struct file *file,
1086 struct gendisk *disk = inode->i_bdev->bd_disk; 1099 struct gendisk *disk = inode->i_bdev->bd_disk;
1087 struct acsi_info_struct *aip = disk->private_data; 1100 struct acsi_info_struct *aip = disk->private_data;
1088 switch (cmd) { 1101 switch (cmd) {
1089 case HDIO_GETGEO:
1090 /* HDIO_GETGEO is supported more for getting the partition's
1091 * start sector... */
1092 { struct hd_geometry *geo = (struct hd_geometry *)arg;
1093 /* just fake some geometry here, it's nonsense anyway; to make it
1094 * easy, use Adaptec's usual 64/32 mapping */
1095 put_user( 64, &geo->heads );
1096 put_user( 32, &geo->sectors );
1097 put_user( aip->size >> 11, &geo->cylinders );
1098 put_user(get_start_sect(inode->i_bdev), &geo->start);
1099 return 0;
1100 }
1101 case SCSI_IOCTL_GET_IDLUN: 1102 case SCSI_IOCTL_GET_IDLUN:
1102 /* SCSI compatible GET_IDLUN call to get target's ID and LUN number */ 1103 /* SCSI compatible GET_IDLUN call to get target's ID and LUN number */
1103 put_user( aip->target | (aip->lun << 8), 1104 put_user( aip->target | (aip->lun << 8),
@@ -1592,6 +1593,7 @@ static struct block_device_operations acsi_fops = {
1592 .open = acsi_open, 1593 .open = acsi_open,
1593 .release = acsi_release, 1594 .release = acsi_release,
1594 .ioctl = acsi_ioctl, 1595 .ioctl = acsi_ioctl,
1596 .getgeo = acsi_getgeo,
1595 .media_changed = acsi_media_change, 1597 .media_changed = acsi_media_change,
1596 .revalidate_disk= acsi_revalidate, 1598 .revalidate_disk= acsi_revalidate,
1597}; 1599};
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 0acbfff8ad28..3c679d30b698 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -131,7 +131,7 @@ static struct fd_drive_type drive_types[] = {
131{ FD_DD_5, "DD 5.25", 40, 2, 14716, 13630, 1, 40, 81, 6, 30, 2}, 131{ FD_DD_5, "DD 5.25", 40, 2, 14716, 13630, 1, 40, 81, 6, 30, 2},
132{ FD_NODRIVE, "No Drive", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} 132{ FD_NODRIVE, "No Drive", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
133}; 133};
134static int num_dr_types = sizeof(drive_types) / sizeof(drive_types[0]); 134static int num_dr_types = ARRAY_SIZE(drive_types);
135 135
136static int amiga_read(int), dos_read(int); 136static int amiga_read(int), dos_read(int);
137static void amiga_write(int), dos_write(int); 137static void amiga_write(int), dos_write(int);
@@ -1424,6 +1424,16 @@ static void do_fd_request(request_queue_t * q)
1424 redo_fd_request(); 1424 redo_fd_request();
1425} 1425}
1426 1426
1427static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1428{
1429 int drive = MINOR(bdev->bd_dev) & 3;
1430
1431 geo->heads = unit[drive].type->heads;
1432 geo->sectors = unit[drive].dtype->sects * unit[drive].type->sect_mult;
1433 geo->cylinders = unit[drive].type->tracks;
1434 return 0;
1435}
1436
1427static int fd_ioctl(struct inode *inode, struct file *filp, 1437static int fd_ioctl(struct inode *inode, struct file *filp,
1428 unsigned int cmd, unsigned long param) 1438 unsigned int cmd, unsigned long param)
1429{ 1439{
@@ -1431,18 +1441,6 @@ static int fd_ioctl(struct inode *inode, struct file *filp,
1431 static struct floppy_struct getprm; 1441 static struct floppy_struct getprm;
1432 1442
1433 switch(cmd){ 1443 switch(cmd){
1434 case HDIO_GETGEO:
1435 {
1436 struct hd_geometry loc;
1437 loc.heads = unit[drive].type->heads;
1438 loc.sectors = unit[drive].dtype->sects * unit[drive].type->sect_mult;
1439 loc.cylinders = unit[drive].type->tracks;
1440 loc.start = 0;
1441 if (copy_to_user((void *)param, (void *)&loc,
1442 sizeof(struct hd_geometry)))
1443 return -EFAULT;
1444 break;
1445 }
1446 case FDFMTBEG: 1444 case FDFMTBEG:
1447 get_fdc(drive); 1445 get_fdc(drive);
1448 if (fd_ref[drive] > 1) { 1446 if (fd_ref[drive] > 1) {
@@ -1652,6 +1650,7 @@ static struct block_device_operations floppy_fops = {
1652 .open = floppy_open, 1650 .open = floppy_open,
1653 .release = floppy_release, 1651 .release = floppy_release,
1654 .ioctl = fd_ioctl, 1652 .ioctl = fd_ioctl,
1653 .getgeo = fd_getgeo,
1655 .media_changed = amiga_floppy_change, 1654 .media_changed = amiga_floppy_change,
1656}; 1655};
1657 1656
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 0e97fcb9f3a1..c05ee8bffd97 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -169,38 +169,26 @@ aoeblk_make_request(request_queue_t *q, struct bio *bio)
169 return 0; 169 return 0;
170} 170}
171 171
172/* This ioctl implementation expects userland to have the device node
173 * permissions set so that only priviledged users can open an aoe
174 * block device directly.
175 */
176static int 172static int
177aoeblk_ioctl(struct inode *inode, struct file *filp, uint cmd, ulong arg) 173aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
178{ 174{
179 struct aoedev *d; 175 struct aoedev *d = bdev->bd_disk->private_data;
180
181 if (!arg)
182 return -EINVAL;
183 176
184 d = inode->i_bdev->bd_disk->private_data;
185 if ((d->flags & DEVFL_UP) == 0) { 177 if ((d->flags & DEVFL_UP) == 0) {
186 printk(KERN_ERR "aoe: aoeblk_ioctl: disk not up\n"); 178 printk(KERN_ERR "aoe: aoeblk_ioctl: disk not up\n");
187 return -ENODEV; 179 return -ENODEV;
188 } 180 }
189 181
190 if (cmd == HDIO_GETGEO) { 182 geo->cylinders = d->geo.cylinders;
191 d->geo.start = get_start_sect(inode->i_bdev); 183 geo->heads = d->geo.heads;
192 if (!copy_to_user((void __user *) arg, &d->geo, sizeof d->geo)) 184 geo->sectors = d->geo.sectors;
193 return 0; 185 return 0;
194 return -EFAULT;
195 }
196 printk(KERN_INFO "aoe: aoeblk_ioctl: unknown ioctl %d\n", cmd);
197 return -EINVAL;
198} 186}
199 187
200static struct block_device_operations aoe_bdops = { 188static struct block_device_operations aoe_bdops = {
201 .open = aoeblk_open, 189 .open = aoeblk_open,
202 .release = aoeblk_release, 190 .release = aoeblk_release,
203 .ioctl = aoeblk_ioctl, 191 .getgeo = aoeblk_getgeo,
204 .owner = THIS_MODULE, 192 .owner = THIS_MODULE,
205}; 193};
206 194
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 22bda05fc693..3aa68a5447d6 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -181,7 +181,7 @@ static struct {
181 { 6, TYPE_HD }, /* 31: H1640 <- was H1600 == h1600 for PC */ 181 { 6, TYPE_HD }, /* 31: H1640 <- was H1600 == h1600 for PC */
182}; 182};
183 183
184#define NUM_DISK_MINORS (sizeof(minor2disktype)/sizeof(*minor2disktype)) 184#define NUM_DISK_MINORS ARRAY_SIZE(minor2disktype)
185 185
186/* 186/*
187 * Maximum disk size (in kilobytes). This default is used whenever the 187 * Maximum disk size (in kilobytes). This default is used whenever the
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index d2815b7a9150..88452c79fb64 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Disk Array driver for HP SA 5xxx and 6xxx Controllers 2 * Disk Array driver for HP SA 5xxx and 6xxx Controllers
3 * Copyright 2000, 2005 Hewlett-Packard Development Company, L.P. 3 * Copyright 2000, 2006 Hewlett-Packard Development Company, L.P.
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
@@ -47,12 +47,12 @@
47#include <linux/completion.h> 47#include <linux/completion.h>
48 48
49#define CCISS_DRIVER_VERSION(maj,min,submin) ((maj<<16)|(min<<8)|(submin)) 49#define CCISS_DRIVER_VERSION(maj,min,submin) ((maj<<16)|(min<<8)|(submin))
50#define DRIVER_NAME "HP CISS Driver (v 2.6.8)" 50#define DRIVER_NAME "HP CISS Driver (v 2.6.10)"
51#define DRIVER_VERSION CCISS_DRIVER_VERSION(2,6,8) 51#define DRIVER_VERSION CCISS_DRIVER_VERSION(2,6,10)
52 52
53/* Embedded module documentation macros - see modules.h */ 53/* Embedded module documentation macros - see modules.h */
54MODULE_AUTHOR("Hewlett-Packard Company"); 54MODULE_AUTHOR("Hewlett-Packard Company");
55MODULE_DESCRIPTION("Driver for HP Controller SA5xxx SA6xxx version 2.6.8"); 55MODULE_DESCRIPTION("Driver for HP Controller SA5xxx SA6xxx version 2.6.10");
56MODULE_SUPPORTED_DEVICE("HP SA5i SA5i+ SA532 SA5300 SA5312 SA641 SA642 SA6400" 56MODULE_SUPPORTED_DEVICE("HP SA5i SA5i+ SA532 SA5300 SA5312 SA641 SA642 SA6400"
57 " SA6i P600 P800 P400 P400i E200 E200i"); 57 " SA6i P600 P800 P400 P400i E200 E200i");
58MODULE_LICENSE("GPL"); 58MODULE_LICENSE("GPL");
@@ -103,7 +103,7 @@ static const struct pci_device_id cciss_pci_device_id[] = {
103}; 103};
104MODULE_DEVICE_TABLE(pci, cciss_pci_device_id); 104MODULE_DEVICE_TABLE(pci, cciss_pci_device_id);
105 105
106#define NR_PRODUCTS (sizeof(products)/sizeof(struct board_type)) 106#define NR_PRODUCTS ARRAY_SIZE(products)
107 107
108/* board_id = Subsystem Device ID & Vendor ID 108/* board_id = Subsystem Device ID & Vendor ID
109 * product = Marketing Name for the board 109 * product = Marketing Name for the board
@@ -153,6 +153,7 @@ static int cciss_open(struct inode *inode, struct file *filep);
153static int cciss_release(struct inode *inode, struct file *filep); 153static int cciss_release(struct inode *inode, struct file *filep);
154static int cciss_ioctl(struct inode *inode, struct file *filep, 154static int cciss_ioctl(struct inode *inode, struct file *filep,
155 unsigned int cmd, unsigned long arg); 155 unsigned int cmd, unsigned long arg);
156static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo);
156 157
157static int revalidate_allvol(ctlr_info_t *host); 158static int revalidate_allvol(ctlr_info_t *host);
158static int cciss_revalidate(struct gendisk *disk); 159static int cciss_revalidate(struct gendisk *disk);
@@ -166,7 +167,7 @@ static void cciss_geometry_inquiry(int ctlr, int logvol,
166 unsigned int block_size, InquiryData_struct *inq_buff, 167 unsigned int block_size, InquiryData_struct *inq_buff,
167 drive_info_struct *drv); 168 drive_info_struct *drv);
168static void cciss_getgeometry(int cntl_num); 169static void cciss_getgeometry(int cntl_num);
169 170static void __devinit cciss_interrupt_mode(ctlr_info_t *, struct pci_dev *, __u32);
170static void start_io( ctlr_info_t *h); 171static void start_io( ctlr_info_t *h);
171static int sendcmd( __u8 cmd, int ctlr, void *buff, size_t size, 172static int sendcmd( __u8 cmd, int ctlr, void *buff, size_t size,
172 unsigned int use_unit_num, unsigned int log_unit, __u8 page_code, 173 unsigned int use_unit_num, unsigned int log_unit, __u8 page_code,
@@ -194,6 +195,7 @@ static struct block_device_operations cciss_fops = {
194 .open = cciss_open, 195 .open = cciss_open,
195 .release = cciss_release, 196 .release = cciss_release,
196 .ioctl = cciss_ioctl, 197 .ioctl = cciss_ioctl,
198 .getgeo = cciss_getgeo,
197#ifdef CONFIG_COMPAT 199#ifdef CONFIG_COMPAT
198 .compat_ioctl = cciss_compat_ioctl, 200 .compat_ioctl = cciss_compat_ioctl,
199#endif 201#endif
@@ -282,7 +284,7 @@ static int cciss_proc_get_info(char *buffer, char **start, off_t offset,
282 h->product_name, 284 h->product_name,
283 (unsigned long)h->board_id, 285 (unsigned long)h->board_id,
284 h->firm_ver[0], h->firm_ver[1], h->firm_ver[2], h->firm_ver[3], 286 h->firm_ver[0], h->firm_ver[1], h->firm_ver[2], h->firm_ver[3],
285 (unsigned int)h->intr, 287 (unsigned int)h->intr[SIMPLE_MODE_INT],
286 h->num_luns, 288 h->num_luns,
287 h->Qdepth, h->commands_outstanding, 289 h->Qdepth, h->commands_outstanding,
288 h->maxQsinceinit, h->max_outstanding, h->maxSG); 290 h->maxQsinceinit, h->max_outstanding, h->maxSG);
@@ -633,6 +635,20 @@ static int cciss_ioctl32_big_passthru(struct file *file, unsigned cmd, unsigned
633 return err; 635 return err;
634} 636}
635#endif 637#endif
638
639static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo)
640{
641 drive_info_struct *drv = get_drv(bdev->bd_disk);
642
643 if (!drv->cylinders)
644 return -ENXIO;
645
646 geo->heads = drv->heads;
647 geo->sectors = drv->sectors;
648 geo->cylinders = drv->cylinders;
649 return 0;
650}
651
636/* 652/*
637 * ioctl 653 * ioctl
638 */ 654 */
@@ -651,21 +667,6 @@ static int cciss_ioctl(struct inode *inode, struct file *filep,
651#endif /* CCISS_DEBUG */ 667#endif /* CCISS_DEBUG */
652 668
653 switch(cmd) { 669 switch(cmd) {
654 case HDIO_GETGEO:
655 {
656 struct hd_geometry driver_geo;
657 if (drv->cylinders) {
658 driver_geo.heads = drv->heads;
659 driver_geo.sectors = drv->sectors;
660 driver_geo.cylinders = drv->cylinders;
661 } else
662 return -ENXIO;
663 driver_geo.start= get_start_sect(inode->i_bdev);
664 if (copy_to_user(argp, &driver_geo, sizeof(struct hd_geometry)))
665 return -EFAULT;
666 return(0);
667 }
668
669 case CCISS_GETPCIINFO: 670 case CCISS_GETPCIINFO:
670 { 671 {
671 cciss_pci_info_struct pciinfo; 672 cciss_pci_info_struct pciinfo;
@@ -2661,6 +2662,60 @@ static int find_PCI_BAR_index(struct pci_dev *pdev,
2661 return -1; 2662 return -1;
2662} 2663}
2663 2664
2665/* If MSI/MSI-X is supported by the kernel we will try to enable it on
2666 * controllers that are capable. If not, we use IO-APIC mode.
2667 */
2668
2669static void __devinit cciss_interrupt_mode(ctlr_info_t *c, struct pci_dev *pdev, __u32 board_id)
2670{
2671#ifdef CONFIG_PCI_MSI
2672 int err;
2673 struct msix_entry cciss_msix_entries[4] = {{0,0}, {0,1},
2674 {0,2}, {0,3}};
2675
2676 /* Some boards advertise MSI but don't really support it */
2677 if ((board_id == 0x40700E11) ||
2678 (board_id == 0x40800E11) ||
2679 (board_id == 0x40820E11) ||
2680 (board_id == 0x40830E11))
2681 goto default_int_mode;
2682
2683 if (pci_find_capability(pdev, PCI_CAP_ID_MSIX)) {
2684 err = pci_enable_msix(pdev, cciss_msix_entries, 4);
2685 if (!err) {
2686 c->intr[0] = cciss_msix_entries[0].vector;
2687 c->intr[1] = cciss_msix_entries[1].vector;
2688 c->intr[2] = cciss_msix_entries[2].vector;
2689 c->intr[3] = cciss_msix_entries[3].vector;
2690 c->msix_vector = 1;
2691 return;
2692 }
2693 if (err > 0) {
2694 printk(KERN_WARNING "cciss: only %d MSI-X vectors "
2695 "available\n", err);
2696 } else {
2697 printk(KERN_WARNING "cciss: MSI-X init failed %d\n",
2698 err);
2699 }
2700 }
2701 if (pci_find_capability(pdev, PCI_CAP_ID_MSI)) {
2702 if (!pci_enable_msi(pdev)) {
2703 c->intr[SIMPLE_MODE_INT] = pdev->irq;
2704 c->msi_vector = 1;
2705 return;
2706 } else {
2707 printk(KERN_WARNING "cciss: MSI init failed\n");
2708 c->intr[SIMPLE_MODE_INT] = pdev->irq;
2709 return;
2710 }
2711 }
2712#endif /* CONFIG_PCI_MSI */
2713 /* if we get here we're going to use the default interrupt mode */
2714default_int_mode:
2715 c->intr[SIMPLE_MODE_INT] = pdev->irq;
2716 return;
2717}
2718
2664static int cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) 2719static int cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
2665{ 2720{
2666 ushort subsystem_vendor_id, subsystem_device_id, command; 2721 ushort subsystem_vendor_id, subsystem_device_id, command;
@@ -2721,7 +2776,10 @@ static int cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
2721 printk("board_id = %x\n", board_id); 2776 printk("board_id = %x\n", board_id);
2722#endif /* CCISS_DEBUG */ 2777#endif /* CCISS_DEBUG */
2723 2778
2724 c->intr = pdev->irq; 2779/* If the kernel supports MSI/MSI-X we will try to enable that functionality,
2780 * else we use the IO-APIC interrupt assigned to us by system ROM.
2781 */
2782 cciss_interrupt_mode(c, pdev, board_id);
2725 2783
2726 /* 2784 /*
2727 * Memory base addr is first addr , the second points to the config 2785 * Memory base addr is first addr , the second points to the config
@@ -2775,7 +2833,7 @@ static int cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
2775 c->board_id = board_id; 2833 c->board_id = board_id;
2776 2834
2777#ifdef CCISS_DEBUG 2835#ifdef CCISS_DEBUG
2778 print_cfg_table(c->cfgtable); 2836 print_cfg_table(c->cfgtable);
2779#endif /* CCISS_DEBUG */ 2837#endif /* CCISS_DEBUG */
2780 2838
2781 for(i=0; i<NR_PRODUCTS; i++) { 2839 for(i=0; i<NR_PRODUCTS; i++) {
@@ -3060,7 +3118,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
3060 * 8 controller support. 3118 * 8 controller support.
3061 */ 3119 */
3062 if (i < MAX_CTLR_ORIG) 3120 if (i < MAX_CTLR_ORIG)
3063 hba[i]->major = MAJOR_NR + i; 3121 hba[i]->major = COMPAQ_CISS_MAJOR + i;
3064 rc = register_blkdev(hba[i]->major, hba[i]->devname); 3122 rc = register_blkdev(hba[i]->major, hba[i]->devname);
3065 if(rc == -EBUSY || rc == -EINVAL) { 3123 if(rc == -EBUSY || rc == -EINVAL) {
3066 printk(KERN_ERR 3124 printk(KERN_ERR
@@ -3075,11 +3133,11 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
3075 3133
3076 /* make sure the board interrupts are off */ 3134 /* make sure the board interrupts are off */
3077 hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_OFF); 3135 hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_OFF);
3078 if( request_irq(hba[i]->intr, do_cciss_intr, 3136 if( request_irq(hba[i]->intr[SIMPLE_MODE_INT], do_cciss_intr,
3079 SA_INTERRUPT | SA_SHIRQ | SA_SAMPLE_RANDOM, 3137 SA_INTERRUPT | SA_SHIRQ | SA_SAMPLE_RANDOM,
3080 hba[i]->devname, hba[i])) { 3138 hba[i]->devname, hba[i])) {
3081 printk(KERN_ERR "cciss: Unable to get irq %d for %s\n", 3139 printk(KERN_ERR "cciss: Unable to get irq %d for %s\n",
3082 hba[i]->intr, hba[i]->devname); 3140 hba[i]->intr[SIMPLE_MODE_INT], hba[i]->devname);
3083 goto clean2; 3141 goto clean2;
3084 } 3142 }
3085 hba[i]->cmd_pool_bits = kmalloc(((NR_CMDS+BITS_PER_LONG-1)/BITS_PER_LONG)*sizeof(unsigned long), GFP_KERNEL); 3143 hba[i]->cmd_pool_bits = kmalloc(((NR_CMDS+BITS_PER_LONG-1)/BITS_PER_LONG)*sizeof(unsigned long), GFP_KERNEL);
@@ -3185,7 +3243,7 @@ clean4:
3185 NR_CMDS * sizeof( ErrorInfo_struct), 3243 NR_CMDS * sizeof( ErrorInfo_struct),
3186 hba[i]->errinfo_pool, 3244 hba[i]->errinfo_pool,
3187 hba[i]->errinfo_pool_dhandle); 3245 hba[i]->errinfo_pool_dhandle);
3188 free_irq(hba[i]->intr, hba[i]); 3246 free_irq(hba[i]->intr[SIMPLE_MODE_INT], hba[i]);
3189clean2: 3247clean2:
3190 unregister_blkdev(hba[i]->major, hba[i]->devname); 3248 unregister_blkdev(hba[i]->major, hba[i]->devname);
3191clean1: 3249clean1:
@@ -3226,7 +3284,15 @@ static void __devexit cciss_remove_one (struct pci_dev *pdev)
3226 printk(KERN_WARNING "Error Flushing cache on controller %d\n", 3284 printk(KERN_WARNING "Error Flushing cache on controller %d\n",
3227 i); 3285 i);
3228 } 3286 }
3229 free_irq(hba[i]->intr, hba[i]); 3287 free_irq(hba[i]->intr[2], hba[i]);
3288
3289#ifdef CONFIG_PCI_MSI
3290 if (hba[i]->msix_vector)
3291 pci_disable_msix(hba[i]->pdev);
3292 else if (hba[i]->msi_vector)
3293 pci_disable_msi(hba[i]->pdev);
3294#endif /* CONFIG_PCI_MSI */
3295
3230 pci_set_drvdata(pdev, NULL); 3296 pci_set_drvdata(pdev, NULL);
3231 iounmap(hba[i]->vaddr); 3297 iounmap(hba[i]->vaddr);
3232 cciss_unregister_scsi(i); /* unhook from SCSI subsystem */ 3298 cciss_unregister_scsi(i); /* unhook from SCSI subsystem */
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index 3b0858c83897..b24fc0553ccf 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h
@@ -13,8 +13,6 @@
13#define IO_OK 0 13#define IO_OK 0
14#define IO_ERROR 1 14#define IO_ERROR 1
15 15
16#define MAJOR_NR COMPAQ_CISS_MAJOR
17
18struct ctlr_info; 16struct ctlr_info;
19typedef struct ctlr_info ctlr_info_t; 17typedef struct ctlr_info ctlr_info_t;
20 18
@@ -65,7 +63,6 @@ struct ctlr_info
65 unsigned long io_mem_addr; 63 unsigned long io_mem_addr;
66 unsigned long io_mem_length; 64 unsigned long io_mem_length;
67 CfgTable_struct __iomem *cfgtable; 65 CfgTable_struct __iomem *cfgtable;
68 unsigned int intr;
69 int interrupts_enabled; 66 int interrupts_enabled;
70 int major; 67 int major;
71 int max_commands; 68 int max_commands;
@@ -74,6 +71,13 @@ struct ctlr_info
74 int num_luns; 71 int num_luns;
75 int highest_lun; 72 int highest_lun;
76 int usage_count; /* number of opens all all minor devices */ 73 int usage_count; /* number of opens all all minor devices */
74# define DOORBELL_INT 0
75# define PERF_MODE_INT 1
76# define SIMPLE_MODE_INT 2
77# define MEMQ_MODE_INT 3
78 unsigned int intr[4];
79 unsigned int msix_vector;
80 unsigned int msi_vector;
77 81
78 // information about each logical volume 82 // information about each logical volume
79 drive_info_struct drv[CISS_MAX_LUN]; 83 drive_info_struct drv[CISS_MAX_LUN];
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index 2942d32280a5..9e35de05d5c5 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -714,7 +714,7 @@ cciss_scsi_detect(int ctlr)
714 ((struct cciss_scsi_adapter_data_t *) 714 ((struct cciss_scsi_adapter_data_t *)
715 hba[ctlr]->scsi_ctlr)->scsi_host = (void *) sh; 715 hba[ctlr]->scsi_ctlr)->scsi_host = (void *) sh;
716 sh->hostdata[0] = (unsigned long) hba[ctlr]; 716 sh->hostdata[0] = (unsigned long) hba[ctlr];
717 sh->irq = hba[ctlr]->intr; 717 sh->irq = hba[ctlr]->intr[SIMPLE_MODE_INT];
718 sh->unique_id = sh->irq; 718 sh->unique_id = sh->irq;
719 error = scsi_add_host(sh, &hba[ctlr]->pdev->dev); 719 error = scsi_add_host(sh, &hba[ctlr]->pdev->dev);
720 if (error) 720 if (error)
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 9bddb6874873..862b9abac0ae 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -72,11 +72,11 @@ static ctlr_info_t *hba[MAX_CTLR];
72 72
73static int eisa[8]; 73static int eisa[8];
74 74
75#define NR_PRODUCTS (sizeof(products)/sizeof(struct board_type)) 75#define NR_PRODUCTS ARRAY_SIZE(products)
76 76
77/* board_id = Subsystem Device ID & Vendor ID 77/* board_id = Subsystem Device ID & Vendor ID
78 * product = Marketing Name for the board 78 * product = Marketing Name for the board
79 * access = Address of the struct of function pointers 79 * access = Address of the struct of function pointers
80 */ 80 */
81static struct board_type products[] = { 81static struct board_type products[] = {
82 { 0x0040110E, "IDA", &smart1_access }, 82 { 0x0040110E, "IDA", &smart1_access },
@@ -160,6 +160,7 @@ static int sendcmd(
160static int ida_open(struct inode *inode, struct file *filep); 160static int ida_open(struct inode *inode, struct file *filep);
161static int ida_release(struct inode *inode, struct file *filep); 161static int ida_release(struct inode *inode, struct file *filep);
162static int ida_ioctl(struct inode *inode, struct file *filep, unsigned int cmd, unsigned long arg); 162static int ida_ioctl(struct inode *inode, struct file *filep, unsigned int cmd, unsigned long arg);
163static int ida_getgeo(struct block_device *bdev, struct hd_geometry *geo);
163static int ida_ctlr_ioctl(ctlr_info_t *h, int dsk, ida_ioctl_t *io); 164static int ida_ctlr_ioctl(ctlr_info_t *h, int dsk, ida_ioctl_t *io);
164 165
165static void do_ida_request(request_queue_t *q); 166static void do_ida_request(request_queue_t *q);
@@ -199,6 +200,7 @@ static struct block_device_operations ida_fops = {
199 .open = ida_open, 200 .open = ida_open,
200 .release = ida_release, 201 .release = ida_release,
201 .ioctl = ida_ioctl, 202 .ioctl = ida_ioctl,
203 .getgeo = ida_getgeo,
202 .revalidate_disk= ida_revalidate, 204 .revalidate_disk= ida_revalidate,
203}; 205};
204 206
@@ -1124,6 +1126,23 @@ static void ida_timer(unsigned long tdata)
1124 h->misc_tflags = 0; 1126 h->misc_tflags = 0;
1125} 1127}
1126 1128
1129static int ida_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1130{
1131 drv_info_t *drv = get_drv(bdev->bd_disk);
1132
1133 if (drv->cylinders) {
1134 geo->heads = drv->heads;
1135 geo->sectors = drv->sectors;
1136 geo->cylinders = drv->cylinders;
1137 } else {
1138 geo->heads = 0xff;
1139 geo->sectors = 0x3f;
1140 geo->cylinders = drv->nr_blks / (0xff*0x3f);
1141 }
1142
1143 return 0;
1144}
1145
1127/* 1146/*
1128 * ida_ioctl does some miscellaneous stuff like reporting drive geometry, 1147 * ida_ioctl does some miscellaneous stuff like reporting drive geometry,
1129 * setting readahead and submitting commands from userspace to the controller. 1148 * setting readahead and submitting commands from userspace to the controller.
@@ -1133,27 +1152,10 @@ static int ida_ioctl(struct inode *inode, struct file *filep, unsigned int cmd,
1133 drv_info_t *drv = get_drv(inode->i_bdev->bd_disk); 1152 drv_info_t *drv = get_drv(inode->i_bdev->bd_disk);
1134 ctlr_info_t *host = get_host(inode->i_bdev->bd_disk); 1153 ctlr_info_t *host = get_host(inode->i_bdev->bd_disk);
1135 int error; 1154 int error;
1136 int diskinfo[4];
1137 struct hd_geometry __user *geo = (struct hd_geometry __user *)arg;
1138 ida_ioctl_t __user *io = (ida_ioctl_t __user *)arg; 1155 ida_ioctl_t __user *io = (ida_ioctl_t __user *)arg;
1139 ida_ioctl_t *my_io; 1156 ida_ioctl_t *my_io;
1140 1157
1141 switch(cmd) { 1158 switch(cmd) {
1142 case HDIO_GETGEO:
1143 if (drv->cylinders) {
1144 diskinfo[0] = drv->heads;
1145 diskinfo[1] = drv->sectors;
1146 diskinfo[2] = drv->cylinders;
1147 } else {
1148 diskinfo[0] = 0xff;
1149 diskinfo[1] = 0x3f;
1150 diskinfo[2] = drv->nr_blks / (0xff*0x3f);
1151 }
1152 put_user(diskinfo[0], &geo->heads);
1153 put_user(diskinfo[1], &geo->sectors);
1154 put_user(diskinfo[2], &geo->cylinders);
1155 put_user(get_start_sect(inode->i_bdev), &geo->start);
1156 return 0;
1157 case IDAGETDRVINFO: 1159 case IDAGETDRVINFO:
1158 if (copy_to_user(&io->c.drv, drv, sizeof(drv_info_t))) 1160 if (copy_to_user(&io->c.drv, drv, sizeof(drv_info_t)))
1159 return -EFAULT; 1161 return -EFAULT;
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index a5b857c5c4b8..374621a512e0 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -479,7 +479,6 @@ static struct floppy_struct floppy_type[32] = {
479 { 3200,20,2,80,0,0x1C,0x00,0xCF,0x2C,"H1600" }, /* 31 1.6MB 3.5" */ 479 { 3200,20,2,80,0,0x1C,0x00,0xCF,0x2C,"H1600" }, /* 31 1.6MB 3.5" */
480}; 480};
481 481
482#define NUMBER(x) (sizeof(x) / sizeof(*(x)))
483#define SECTSIZE (_FD_SECTSIZE(*floppy)) 482#define SECTSIZE (_FD_SECTSIZE(*floppy))
484 483
485/* Auto-detection: Disk type used until the next media change occurs. */ 484/* Auto-detection: Disk type used until the next media change occurs. */
@@ -3445,6 +3444,23 @@ static int get_floppy_geometry(int drive, int type, struct floppy_struct **g)
3445 return 0; 3444 return 0;
3446} 3445}
3447 3446
3447static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
3448{
3449 int drive = (long)bdev->bd_disk->private_data;
3450 int type = ITYPE(drive_state[drive].fd_device);
3451 struct floppy_struct *g;
3452 int ret;
3453
3454 ret = get_floppy_geometry(drive, type, &g);
3455 if (ret)
3456 return ret;
3457
3458 geo->heads = g->head;
3459 geo->sectors = g->sect;
3460 geo->cylinders = g->track;
3461 return 0;
3462}
3463
3448static int fd_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, 3464static int fd_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
3449 unsigned long param) 3465 unsigned long param)
3450{ 3466{
@@ -3474,23 +3490,6 @@ static int fd_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
3474 cmd = FDEJECT; 3490 cmd = FDEJECT;
3475 } 3491 }
3476 3492
3477 /* generic block device ioctls */
3478 switch (cmd) {
3479 /* the following have been inspired by the corresponding
3480 * code for other block devices. */
3481 struct floppy_struct *g;
3482 case HDIO_GETGEO:
3483 {
3484 struct hd_geometry loc;
3485 ECALL(get_floppy_geometry(drive, type, &g));
3486 loc.heads = g->head;
3487 loc.sectors = g->sect;
3488 loc.cylinders = g->track;
3489 loc.start = 0;
3490 return _COPYOUT(loc);
3491 }
3492 }
3493
3494 /* convert the old style command into a new style command */ 3493 /* convert the old style command into a new style command */
3495 if ((cmd & 0xff00) == 0x0200) { 3494 if ((cmd & 0xff00) == 0x0200) {
3496 ECALL(normalize_ioctl(&cmd, &size)); 3495 ECALL(normalize_ioctl(&cmd, &size));
@@ -3645,7 +3644,7 @@ static void __init config_types(void)
3645 const char *name = NULL; 3644 const char *name = NULL;
3646 static char temparea[32]; 3645 static char temparea[32];
3647 3646
3648 if (type < NUMBER(default_drive_params)) { 3647 if (type < ARRAY_SIZE(default_drive_params)) {
3649 params = &default_drive_params[type].params; 3648 params = &default_drive_params[type].params;
3650 if (type) { 3649 if (type) {
3651 name = default_drive_params[type].name; 3650 name = default_drive_params[type].name;
@@ -3938,6 +3937,7 @@ static struct block_device_operations floppy_fops = {
3938 .open = floppy_open, 3937 .open = floppy_open,
3939 .release = floppy_release, 3938 .release = floppy_release,
3940 .ioctl = fd_ioctl, 3939 .ioctl = fd_ioctl,
3940 .getgeo = fd_getgeo,
3941 .media_changed = check_floppy_change, 3941 .media_changed = check_floppy_change,
3942 .revalidate_disk = floppy_revalidate, 3942 .revalidate_disk = floppy_revalidate,
3943}; 3943};
@@ -3960,7 +3960,7 @@ static void __init register_devfs_entries(int drive)
3960{ 3960{
3961 int base_minor = (drive < 4) ? drive : (124 + drive); 3961 int base_minor = (drive < 4) ? drive : (124 + drive);
3962 3962
3963 if (UDP->cmos < NUMBER(default_drive_params)) { 3963 if (UDP->cmos < ARRAY_SIZE(default_drive_params)) {
3964 int i = 0; 3964 int i = 0;
3965 do { 3965 do {
3966 int minor = base_minor + (table_sup[UDP->cmos][i] << 2); 3966 int minor = base_minor + (table_sup[UDP->cmos][i] << 2);
@@ -4218,7 +4218,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data)
4218 !(allowed_drive_mask & (1 << drive)) || 4218 !(allowed_drive_mask & (1 << drive)) ||
4219 fdc_state[FDC(drive)].version == FDC_NONE) 4219 fdc_state[FDC(drive)].version == FDC_NONE)
4220 return NULL; 4220 return NULL;
4221 if (((*part >> 2) & 0x1f) >= NUMBER(floppy_type)) 4221 if (((*part >> 2) & 0x1f) >= ARRAY_SIZE(floppy_type))
4222 return NULL; 4222 return NULL;
4223 *part = 0; 4223 *part = 0;
4224 return get_disk(disks[drive]); 4224 return get_disk(disks[drive]);
@@ -4570,7 +4570,7 @@ static void unregister_devfs_entries(int drive)
4570{ 4570{
4571 int i; 4571 int i;
4572 4572
4573 if (UDP->cmos < NUMBER(default_drive_params)) { 4573 if (UDP->cmos < ARRAY_SIZE(default_drive_params)) {
4574 i = 0; 4574 i = 0;
4575 do { 4575 do {
4576 devfs_remove("floppy/%d%s", drive, 4576 devfs_remove("floppy/%d%s", drive,
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 33d6f237b2ed..6997d8e6bfb5 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -174,7 +174,6 @@ static int sock_xmit(struct socket *sock, int send, void *buf, int size,
174 msg.msg_namelen = 0; 174 msg.msg_namelen = 0;
175 msg.msg_control = NULL; 175 msg.msg_control = NULL;
176 msg.msg_controllen = 0; 176 msg.msg_controllen = 0;
177 msg.msg_namelen = 0;
178 msg.msg_flags = msg_flags | MSG_NOSIGNAL; 177 msg.msg_flags = msg_flags | MSG_NOSIGNAL;
179 178
180 if (send) 179 if (send)
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index fa49d62626ba..62d2464c12f2 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -747,32 +747,33 @@ static int pd_open(struct inode *inode, struct file *file)
747 return 0; 747 return 0;
748} 748}
749 749
750static int pd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
751{
752 struct pd_unit *disk = bdev->bd_disk->private_data;
753
754 if (disk->alt_geom) {
755 geo->heads = PD_LOG_HEADS;
756 geo->sectors = PD_LOG_SECTS;
757 geo->cylinders = disk->capacity / (geo->heads * geo->sectors);
758 } else {
759 geo->heads = disk->heads;
760 geo->sectors = disk->sectors;
761 geo->cylinders = disk->cylinders;
762 }
763
764 return 0;
765}
766
750static int pd_ioctl(struct inode *inode, struct file *file, 767static int pd_ioctl(struct inode *inode, struct file *file,
751 unsigned int cmd, unsigned long arg) 768 unsigned int cmd, unsigned long arg)
752{ 769{
753 struct pd_unit *disk = inode->i_bdev->bd_disk->private_data; 770 struct pd_unit *disk = inode->i_bdev->bd_disk->private_data;
754 struct hd_geometry __user *geo = (struct hd_geometry __user *) arg;
755 struct hd_geometry g;
756 771
757 switch (cmd) { 772 switch (cmd) {
758 case CDROMEJECT: 773 case CDROMEJECT:
759 if (disk->access == 1) 774 if (disk->access == 1)
760 pd_special_command(disk, pd_eject); 775 pd_special_command(disk, pd_eject);
761 return 0; 776 return 0;
762 case HDIO_GETGEO:
763 if (disk->alt_geom) {
764 g.heads = PD_LOG_HEADS;
765 g.sectors = PD_LOG_SECTS;
766 g.cylinders = disk->capacity / (g.heads * g.sectors);
767 } else {
768 g.heads = disk->heads;
769 g.sectors = disk->sectors;
770 g.cylinders = disk->cylinders;
771 }
772 g.start = get_start_sect(inode->i_bdev);
773 if (copy_to_user(geo, &g, sizeof(struct hd_geometry)))
774 return -EFAULT;
775 return 0;
776 default: 777 default:
777 return -EINVAL; 778 return -EINVAL;
778 } 779 }
@@ -815,6 +816,7 @@ static struct block_device_operations pd_fops = {
815 .open = pd_open, 816 .open = pd_open,
816 .release = pd_release, 817 .release = pd_release,
817 .ioctl = pd_ioctl, 818 .ioctl = pd_ioctl,
819 .getgeo = pd_getgeo,
818 .media_changed = pd_check_media, 820 .media_changed = pd_check_media,
819 .revalidate_disk= pd_revalidate 821 .revalidate_disk= pd_revalidate
820}; 822};
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index e9746af29b9f..852b564e903a 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -205,6 +205,7 @@ static int pf_open(struct inode *inode, struct file *file);
205static void do_pf_request(request_queue_t * q); 205static void do_pf_request(request_queue_t * q);
206static int pf_ioctl(struct inode *inode, struct file *file, 206static int pf_ioctl(struct inode *inode, struct file *file,
207 unsigned int cmd, unsigned long arg); 207 unsigned int cmd, unsigned long arg);
208static int pf_getgeo(struct block_device *bdev, struct hd_geometry *geo);
208 209
209static int pf_release(struct inode *inode, struct file *file); 210static int pf_release(struct inode *inode, struct file *file);
210 211
@@ -266,6 +267,7 @@ static struct block_device_operations pf_fops = {
266 .open = pf_open, 267 .open = pf_open,
267 .release = pf_release, 268 .release = pf_release,
268 .ioctl = pf_ioctl, 269 .ioctl = pf_ioctl,
270 .getgeo = pf_getgeo,
269 .media_changed = pf_check_media, 271 .media_changed = pf_check_media,
270}; 272};
271 273
@@ -313,34 +315,34 @@ static int pf_open(struct inode *inode, struct file *file)
313 return 0; 315 return 0;
314} 316}
315 317
316static int pf_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) 318static int pf_getgeo(struct block_device *bdev, struct hd_geometry *geo)
317{ 319{
318 struct pf_unit *pf = inode->i_bdev->bd_disk->private_data; 320 struct pf_unit *pf = bdev->bd_disk->private_data;
319 struct hd_geometry __user *geo = (struct hd_geometry __user *) arg; 321 sector_t capacity = get_capacity(pf->disk);
320 struct hd_geometry g; 322
321 sector_t capacity;
322
323 if (cmd == CDROMEJECT) {
324 if (pf->access == 1) {
325 pf_eject(pf);
326 return 0;
327 }
328 return -EBUSY;
329 }
330 if (cmd != HDIO_GETGEO)
331 return -EINVAL;
332 capacity = get_capacity(pf->disk);
333 if (capacity < PF_FD_MAX) { 323 if (capacity < PF_FD_MAX) {
334 g.cylinders = sector_div(capacity, PF_FD_HDS * PF_FD_SPT); 324 geo->cylinders = sector_div(capacity, PF_FD_HDS * PF_FD_SPT);
335 g.heads = PF_FD_HDS; 325 geo->heads = PF_FD_HDS;
336 g.sectors = PF_FD_SPT; 326 geo->sectors = PF_FD_SPT;
337 } else { 327 } else {
338 g.cylinders = sector_div(capacity, PF_HD_HDS * PF_HD_SPT); 328 geo->cylinders = sector_div(capacity, PF_HD_HDS * PF_HD_SPT);
339 g.heads = PF_HD_HDS; 329 geo->heads = PF_HD_HDS;
340 g.sectors = PF_HD_SPT; 330 geo->sectors = PF_HD_SPT;
341 } 331 }
342 if (copy_to_user(geo, &g, sizeof(g))) 332
343 return -EFAULT; 333 return 0;
334}
335
336static int pf_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
337{
338 struct pf_unit *pf = inode->i_bdev->bd_disk->private_data;
339
340 if (cmd != CDROMEJECT)
341 return -EINVAL;
342
343 if (pf->access != 1)
344 return -EBUSY;
345 pf_eject(pf);
344 return 0; 346 return 0;
345} 347}
346 348
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index c0233efabeba..51b7a5c5b77a 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -1955,9 +1955,12 @@ static int pkt_open_dev(struct pktcdvd_device *pd, int write)
1955 if ((ret = blkdev_get(pd->bdev, FMODE_READ, O_RDONLY))) 1955 if ((ret = blkdev_get(pd->bdev, FMODE_READ, O_RDONLY)))
1956 goto out; 1956 goto out;
1957 1957
1958 if ((ret = bd_claim(pd->bdev, pd)))
1959 goto out_putdev;
1960
1958 if ((ret = pkt_get_last_written(pd, &lba))) { 1961 if ((ret = pkt_get_last_written(pd, &lba))) {
1959 printk("pktcdvd: pkt_get_last_written failed\n"); 1962 printk("pktcdvd: pkt_get_last_written failed\n");
1960 goto out_putdev; 1963 goto out_unclaim;
1961 } 1964 }
1962 1965
1963 set_capacity(pd->disk, lba << 2); 1966 set_capacity(pd->disk, lba << 2);
@@ -1967,7 +1970,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, int write)
1967 q = bdev_get_queue(pd->bdev); 1970 q = bdev_get_queue(pd->bdev);
1968 if (write) { 1971 if (write) {
1969 if ((ret = pkt_open_write(pd))) 1972 if ((ret = pkt_open_write(pd)))
1970 goto out_putdev; 1973 goto out_unclaim;
1971 /* 1974 /*
1972 * Some CDRW drives can not handle writes larger than one packet, 1975 * Some CDRW drives can not handle writes larger than one packet,
1973 * even if the size is a multiple of the packet size. 1976 * even if the size is a multiple of the packet size.
@@ -1982,13 +1985,15 @@ static int pkt_open_dev(struct pktcdvd_device *pd, int write)
1982 } 1985 }
1983 1986
1984 if ((ret = pkt_set_segment_merging(pd, q))) 1987 if ((ret = pkt_set_segment_merging(pd, q)))
1985 goto out_putdev; 1988 goto out_unclaim;
1986 1989
1987 if (write) 1990 if (write)
1988 printk("pktcdvd: %lukB available on disc\n", lba << 1); 1991 printk("pktcdvd: %lukB available on disc\n", lba << 1);
1989 1992
1990 return 0; 1993 return 0;
1991 1994
1995out_unclaim:
1996 bd_release(pd->bdev);
1992out_putdev: 1997out_putdev:
1993 blkdev_put(pd->bdev); 1998 blkdev_put(pd->bdev);
1994out: 1999out:
@@ -2007,6 +2012,7 @@ static void pkt_release_dev(struct pktcdvd_device *pd, int flush)
2007 pkt_lock_door(pd, 0); 2012 pkt_lock_door(pd, 0);
2008 2013
2009 pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); 2014 pkt_set_speed(pd, MAX_SPEED, MAX_SPEED);
2015 bd_release(pd->bdev);
2010 blkdev_put(pd->bdev); 2016 blkdev_put(pd->bdev);
2011} 2017}
2012 2018
diff --git a/drivers/block/ps2esdi.c b/drivers/block/ps2esdi.c
index 29d1518be72a..43415f69839f 100644
--- a/drivers/block/ps2esdi.c
+++ b/drivers/block/ps2esdi.c
@@ -81,8 +81,7 @@ static void (*current_int_handler) (u_int) = NULL;
81static void ps2esdi_normal_interrupt_handler(u_int); 81static void ps2esdi_normal_interrupt_handler(u_int);
82static void ps2esdi_initial_reset_int_handler(u_int); 82static void ps2esdi_initial_reset_int_handler(u_int);
83static void ps2esdi_geometry_int_handler(u_int); 83static void ps2esdi_geometry_int_handler(u_int);
84static int ps2esdi_ioctl(struct inode *inode, struct file *file, 84static int ps2esdi_getgeo(struct block_device *bdev, struct hd_geometry *geo);
85 u_int cmd, u_long arg);
86 85
87static int ps2esdi_read_status_words(int num_words, int max_words, u_short * buffer); 86static int ps2esdi_read_status_words(int num_words, int max_words, u_short * buffer);
88 87
@@ -132,7 +131,7 @@ static struct ps2esdi_i_struct ps2esdi_info[MAX_HD] =
132static struct block_device_operations ps2esdi_fops = 131static struct block_device_operations ps2esdi_fops =
133{ 132{
134 .owner = THIS_MODULE, 133 .owner = THIS_MODULE,
135 .ioctl = ps2esdi_ioctl, 134 .getgeo = ps2esdi_getgeo,
136}; 135};
137 136
138static struct gendisk *ps2esdi_gendisk[2]; 137static struct gendisk *ps2esdi_gendisk[2];
@@ -1058,21 +1057,13 @@ static void dump_cmd_complete_status(u_int int_ret_code)
1058 1057
1059} 1058}
1060 1059
1061static int ps2esdi_ioctl(struct inode *inode, 1060static int ps2esdi_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1062 struct file *file, u_int cmd, u_long arg)
1063{ 1061{
1064 struct ps2esdi_i_struct *p = inode->i_bdev->bd_disk->private_data; 1062 struct ps2esdi_i_struct *p = bdev->bd_disk->private_data;
1065 struct ps2esdi_geometry geom; 1063
1066 1064 geo->heads = p->head;
1067 if (cmd != HDIO_GETGEO) 1065 geo->sectors = p->sect;
1068 return -EINVAL; 1066 geo->cylinders = p->cyl;
1069 memset(&geom, 0, sizeof(geom));
1070 geom.heads = p->head;
1071 geom.sectors = p->sect;
1072 geom.cylinders = p->cyl;
1073 geom.start = get_start_sect(inode->i_bdev);
1074 if (copy_to_user((void __user *)arg, &geom, sizeof(geom)))
1075 return -EFAULT;
1076 return 0; 1067 return 0;
1077} 1068}
1078 1069
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 9251f4131b53..c0cdc182a8b0 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -407,8 +407,7 @@ struct carm_array_info {
407 407
408static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent); 408static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent);
409static void carm_remove_one (struct pci_dev *pdev); 409static void carm_remove_one (struct pci_dev *pdev);
410static int carm_bdev_ioctl(struct inode *ino, struct file *fil, 410static int carm_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo);
411 unsigned int cmd, unsigned long arg);
412 411
413static struct pci_device_id carm_pci_tbl[] = { 412static struct pci_device_id carm_pci_tbl[] = {
414 { PCI_VENDOR_ID_PROMISE, 0x8000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, }, 413 { PCI_VENDOR_ID_PROMISE, 0x8000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, },
@@ -426,7 +425,7 @@ static struct pci_driver carm_driver = {
426 425
427static struct block_device_operations carm_bd_ops = { 426static struct block_device_operations carm_bd_ops = {
428 .owner = THIS_MODULE, 427 .owner = THIS_MODULE,
429 .ioctl = carm_bdev_ioctl, 428 .getgeo = carm_bdev_getgeo,
430}; 429};
431 430
432static unsigned int carm_host_id; 431static unsigned int carm_host_id;
@@ -434,32 +433,14 @@ static unsigned long carm_major_alloc;
434 433
435 434
436 435
437static int carm_bdev_ioctl(struct inode *ino, struct file *fil, 436static int carm_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo)
438 unsigned int cmd, unsigned long arg)
439{ 437{
440 void __user *usermem = (void __user *) arg; 438 struct carm_port *port = bdev->bd_disk->private_data;
441 struct carm_port *port = ino->i_bdev->bd_disk->private_data;
442 struct hd_geometry geom;
443 439
444 switch (cmd) { 440 geo->heads = (u8) port->dev_geom_head;
445 case HDIO_GETGEO: 441 geo->sectors = (u8) port->dev_geom_sect;
446 if (!usermem) 442 geo->cylinders = port->dev_geom_cyl;
447 return -EINVAL; 443 return 0;
448
449 geom.heads = (u8) port->dev_geom_head;
450 geom.sectors = (u8) port->dev_geom_sect;
451 geom.cylinders = port->dev_geom_cyl;
452 geom.start = get_start_sect(ino->i_bdev);
453
454 if (copy_to_user(usermem, &geom, sizeof(geom)))
455 return -EFAULT;
456 return 0;
457
458 default:
459 break;
460 }
461
462 return -EOPNOTSUPP;
463} 444}
464 445
465static const u32 msg_sizes[] = { 32, 64, 128, CARM_MSG_SIZE }; 446static const u32 msg_sizes[] = { 32, 64, 128, CARM_MSG_SIZE };
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 0f48301342da..15299e7a1ade 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -809,34 +809,23 @@ static int mm_revalidate(struct gendisk *disk)
809 set_capacity(disk, card->mm_size << 1); 809 set_capacity(disk, card->mm_size << 1);
810 return 0; 810 return 0;
811} 811}
812/* 812
813----------------------------------------------------------------------------------- 813static int mm_getgeo(struct block_device *bdev, struct hd_geometry *geo)
814-- mm_ioctl
815-----------------------------------------------------------------------------------
816*/
817static int mm_ioctl(struct inode *i, struct file *f, unsigned int cmd, unsigned long arg)
818{ 814{
819 if (cmd == HDIO_GETGEO) { 815 struct cardinfo *card = bdev->bd_disk->private_data;
820 struct cardinfo *card = i->i_bdev->bd_disk->private_data; 816 int size = card->mm_size * (1024 / MM_HARDSECT);
821 int size = card->mm_size * (1024 / MM_HARDSECT);
822 struct hd_geometry geo;
823 /*
824 * get geometry: we have to fake one... trim the size to a
825 * multiple of 2048 (1M): tell we have 32 sectors, 64 heads,
826 * whatever cylinders.
827 */
828 geo.heads = 64;
829 geo.sectors = 32;
830 geo.start = get_start_sect(i->i_bdev);
831 geo.cylinders = size / (geo.heads * geo.sectors);
832
833 if (copy_to_user((void __user *) arg, &geo, sizeof(geo)))
834 return -EFAULT;
835 return 0;
836 }
837 817
838 return -EINVAL; 818 /*
819 * get geometry: we have to fake one... trim the size to a
820 * multiple of 2048 (1M): tell we have 32 sectors, 64 heads,
821 * whatever cylinders.
822 */
823 geo->heads = 64;
824 geo->sectors = 32;
825 geo->cylinders = size / (geo->heads * geo->sectors);
826 return 0;
839} 827}
828
840/* 829/*
841----------------------------------------------------------------------------------- 830-----------------------------------------------------------------------------------
842-- mm_check_change 831-- mm_check_change
@@ -855,7 +844,7 @@ static int mm_check_change(struct gendisk *disk)
855*/ 844*/
856static struct block_device_operations mm_fops = { 845static struct block_device_operations mm_fops = {
857 .owner = THIS_MODULE, 846 .owner = THIS_MODULE,
858 .ioctl = mm_ioctl, 847 .getgeo = mm_getgeo,
859 .revalidate_disk= mm_revalidate, 848 .revalidate_disk= mm_revalidate,
860 .media_changed = mm_check_change, 849 .media_changed = mm_check_change,
861}; 850};
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
index 063f0304a163..d1aaf31bd97e 100644
--- a/drivers/block/viodasd.c
+++ b/drivers/block/viodasd.c
@@ -247,43 +247,17 @@ static int viodasd_release(struct inode *ino, struct file *fil)
247 247
248/* External ioctl entry point. 248/* External ioctl entry point.
249 */ 249 */
250static int viodasd_ioctl(struct inode *ino, struct file *fil, 250static int viodasd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
251 unsigned int cmd, unsigned long arg)
252{ 251{
253 unsigned char sectors; 252 struct gendisk *disk = bdev->bd_disk;
254 unsigned char heads; 253 struct viodasd_device *d = disk->private_data;
255 unsigned short cylinders;
256 struct hd_geometry *geo;
257 struct gendisk *gendisk;
258 struct viodasd_device *d;
259 254
260 switch (cmd) { 255 geo->sectors = d->sectors ? d->sectors : 0;
261 case HDIO_GETGEO: 256 geo->heads = d->tracks ? d->tracks : 64;
262 geo = (struct hd_geometry *)arg; 257 geo->cylinders = d->cylinders ? d->cylinders :
263 if (geo == NULL) 258 get_capacity(disk) / (geo->cylinders * geo->heads);
264 return -EINVAL;
265 if (!access_ok(VERIFY_WRITE, geo, sizeof(*geo)))
266 return -EFAULT;
267 gendisk = ino->i_bdev->bd_disk;
268 d = gendisk->private_data;
269 sectors = d->sectors;
270 if (sectors == 0)
271 sectors = 32;
272 heads = d->tracks;
273 if (heads == 0)
274 heads = 64;
275 cylinders = d->cylinders;
276 if (cylinders == 0)
277 cylinders = get_capacity(gendisk) / (sectors * heads);
278 if (__put_user(sectors, &geo->sectors) ||
279 __put_user(heads, &geo->heads) ||
280 __put_user(cylinders, &geo->cylinders) ||
281 __put_user(get_start_sect(ino->i_bdev), &geo->start))
282 return -EFAULT;
283 return 0;
284 }
285 259
286 return -EINVAL; 260 return 0;
287} 261}
288 262
289/* 263/*
@@ -293,7 +267,7 @@ static struct block_device_operations viodasd_fops = {
293 .owner = THIS_MODULE, 267 .owner = THIS_MODULE,
294 .open = viodasd_open, 268 .open = viodasd_open,
295 .release = viodasd_release, 269 .release = viodasd_release,
296 .ioctl = viodasd_ioctl, 270 .getgeo = viodasd_getgeo,
297}; 271};
298 272
299/* 273/*
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index 68b6d7b154cf..cbce7c5e9445 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c
@@ -128,9 +128,12 @@ static DEFINE_SPINLOCK(xd_lock);
128 128
129static struct gendisk *xd_gendisk[2]; 129static struct gendisk *xd_gendisk[2];
130 130
131static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
132
131static struct block_device_operations xd_fops = { 133static struct block_device_operations xd_fops = {
132 .owner = THIS_MODULE, 134 .owner = THIS_MODULE,
133 .ioctl = xd_ioctl, 135 .ioctl = xd_ioctl,
136 .getgeo = xd_getgeo,
134}; 137};
135static DECLARE_WAIT_QUEUE_HEAD(xd_wait_int); 138static DECLARE_WAIT_QUEUE_HEAD(xd_wait_int);
136static u_char xd_drives, xd_irq = 5, xd_dma = 3, xd_maxsectors; 139static u_char xd_drives, xd_irq = 5, xd_dma = 3, xd_maxsectors;
@@ -276,11 +279,11 @@ static u_char __init xd_detect (u_char *controller, unsigned int *address)
276 return(1); 279 return(1);
277 } 280 }
278 281
279 for (i = 0; i < (sizeof(xd_bases) / sizeof(xd_bases[0])); i++) { 282 for (i = 0; i < ARRAY_SIZE(xd_bases); i++) {
280 void __iomem *p = ioremap(xd_bases[i], 0x2000); 283 void __iomem *p = ioremap(xd_bases[i], 0x2000);
281 if (!p) 284 if (!p)
282 continue; 285 continue;
283 for (j = 1; j < (sizeof(xd_sigs) / sizeof(xd_sigs[0])); j++) { 286 for (j = 1; j < ARRAY_SIZE(xd_sigs); j++) {
284 const char *s = xd_sigs[j].string; 287 const char *s = xd_sigs[j].string;
285 if (check_signature(p + xd_sigs[j].offset, s, strlen(s))) { 288 if (check_signature(p + xd_sigs[j].offset, s, strlen(s))) {
286 *controller = j; 289 *controller = j;
@@ -330,22 +333,20 @@ static void do_xd_request (request_queue_t * q)
330 } 333 }
331} 334}
332 335
336static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
337{
338 XD_INFO *p = bdev->bd_disk->private_data;
339
340 geo->heads = p->heads;
341 geo->sectors = p->sectors;
342 geo->cylinders = p->cylinders;
343 return 0;
344}
345
333/* xd_ioctl: handle device ioctl's */ 346/* xd_ioctl: handle device ioctl's */
334static int xd_ioctl (struct inode *inode,struct file *file,u_int cmd,u_long arg) 347static int xd_ioctl (struct inode *inode,struct file *file,u_int cmd,u_long arg)
335{ 348{
336 XD_INFO *p = inode->i_bdev->bd_disk->private_data;
337
338 switch (cmd) { 349 switch (cmd) {
339 case HDIO_GETGEO:
340 {
341 struct hd_geometry g;
342 struct hd_geometry __user *geom= (void __user *)arg;
343 g.heads = p->heads;
344 g.sectors = p->sectors;
345 g.cylinders = p->cylinders;
346 g.start = get_start_sect(inode->i_bdev);
347 return copy_to_user(geom, &g, sizeof(g)) ? -EFAULT : 0;
348 }
349 case HDIO_SET_DMA: 350 case HDIO_SET_DMA:
350 if (!capable(CAP_SYS_ADMIN)) return -EACCES; 351 if (!capable(CAP_SYS_ADMIN)) return -EACCES;
351 if (xdc_busy) return -EBUSY; 352 if (xdc_busy) return -EBUSY;
@@ -1017,7 +1018,7 @@ static void __init do_xd_setup (int *integers)
1017 case 2: if ((integers[2] > 0) && (integers[2] < 16)) 1018 case 2: if ((integers[2] > 0) && (integers[2] < 16))
1018 xd_irq = integers[2]; 1019 xd_irq = integers[2];
1019 case 1: xd_override = 1; 1020 case 1: xd_override = 1;
1020 if ((integers[1] >= 0) && (integers[1] < (sizeof(xd_sigs) / sizeof(xd_sigs[0])))) 1021 if ((integers[1] >= 0) && (integers[1] < ARRAY_SIZE(xd_sigs)))
1021 xd_type = integers[1]; 1022 xd_type = integers[1];
1022 case 0: break; 1023 case 0: break;
1023 default:printk("xd: too many parameters for xd\n"); 1024 default:printk("xd: too many parameters for xd\n");
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 5ebd06b1b4ca..dd7e6901c575 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -220,6 +220,14 @@ config SYNCLINKMP
220 The module will be called synclinkmp. If you want to do that, say M 220 The module will be called synclinkmp. If you want to do that, say M
221 here. 221 here.
222 222
223config SYNCLINK_GT
224 tristate "SyncLink GT/AC support"
225 depends on SERIAL_NONSTANDARD
226 help
227 Support for SyncLink GT and SyncLink AC families of
228 synchronous and asynchronous serial adapters
229 manufactured by Microgate Systems, Ltd. (www.microgate.com)
230
223config N_HDLC 231config N_HDLC
224 tristate "HDLC line discipline support" 232 tristate "HDLC line discipline support"
225 depends on SERIAL_NONSTANDARD 233 depends on SERIAL_NONSTANDARD
@@ -687,7 +695,7 @@ config NVRAM
687 695
688config RTC 696config RTC
689 tristate "Enhanced Real Time Clock Support" 697 tristate "Enhanced Real Time Clock Support"
690 depends on !PPC32 && !PARISC && !IA64 && !M68K && (!SPARC || PCI) 698 depends on !PPC32 && !PARISC && !IA64 && !M68K && (!SPARC || PCI) && !FRV
691 ---help--- 699 ---help---
692 If you say Y here and create a character special file /dev/rtc with 700 If you say Y here and create a character special file /dev/rtc with
693 major number 10 and minor number 135 using mknod ("man mknod"), you 701 major number 10 and minor number 135 using mknod ("man mknod"), you
@@ -735,7 +743,7 @@ config SGI_IP27_RTC
735 743
736config GEN_RTC 744config GEN_RTC
737 tristate "Generic /dev/rtc emulation" 745 tristate "Generic /dev/rtc emulation"
738 depends on RTC!=y && !IA64 && !ARM && !M32R && !SPARC 746 depends on RTC!=y && !IA64 && !ARM && !M32R && !SPARC && !FRV
739 ---help--- 747 ---help---
740 If you say Y here and create a character special file /dev/rtc with 748 If you say Y here and create a character special file /dev/rtc with
741 major number 10 and minor number 135 using mknod ("man mknod"), you 749 major number 10 and minor number 135 using mknod ("man mknod"), you
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 4aeae687e88a..d973d14d8f7f 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_RISCOM8) += riscom8.o
36obj-$(CONFIG_ISI) += isicom.o 36obj-$(CONFIG_ISI) += isicom.o
37obj-$(CONFIG_SYNCLINK) += synclink.o 37obj-$(CONFIG_SYNCLINK) += synclink.o
38obj-$(CONFIG_SYNCLINKMP) += synclinkmp.o 38obj-$(CONFIG_SYNCLINKMP) += synclinkmp.o
39obj-$(CONFIG_SYNCLINK_GT) += synclink_gt.o
39obj-$(CONFIG_N_HDLC) += n_hdlc.o 40obj-$(CONFIG_N_HDLC) += n_hdlc.o
40obj-$(CONFIG_AMIGA_BUILTIN_SERIAL) += amiserial.o 41obj-$(CONFIG_AMIGA_BUILTIN_SERIAL) += amiserial.o
41obj-$(CONFIG_SX) += sx.o generic_serial.o 42obj-$(CONFIG_SX) += sx.o generic_serial.o
diff --git a/drivers/char/agp/sworks-agp.c b/drivers/char/agp/sworks-agp.c
index 3f8f7fa6b0ff..268f78d926d3 100644
--- a/drivers/char/agp/sworks-agp.c
+++ b/drivers/char/agp/sworks-agp.c
@@ -7,6 +7,7 @@
7#include <linux/init.h> 7#include <linux/init.h>
8#include <linux/string.h> 8#include <linux/string.h>
9#include <linux/slab.h> 9#include <linux/slab.h>
10#include <linux/jiffies.h>
10#include <linux/agp_backend.h> 11#include <linux/agp_backend.h>
11#include "agp.h" 12#include "agp.h"
12 13
diff --git a/drivers/char/hw_random.c b/drivers/char/hw_random.c
index 49769f59ea1b..b3bc2e37e616 100644
--- a/drivers/char/hw_random.c
+++ b/drivers/char/hw_random.c
@@ -169,6 +169,7 @@ static struct pci_device_id rng_pci_tbl[] = {
169 169
170 { 0x8086, 0x2418, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel }, 170 { 0x8086, 0x2418, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel },
171 { 0x8086, 0x2428, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel }, 171 { 0x8086, 0x2428, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel },
172 { 0x8086, 0x2430, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel },
172 { 0x8086, 0x2448, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel }, 173 { 0x8086, 0x2448, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel },
173 { 0x8086, 0x244e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel }, 174 { 0x8086, 0x244e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel },
174 { 0x8086, 0x245e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel }, 175 { 0x8086, 0x245e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel },
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 91dd669273e0..5b2d18035073 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -101,6 +101,11 @@ static inline int valid_phys_addr_range(unsigned long addr, size_t *count)
101 101
102 return 1; 102 return 1;
103} 103}
104
105static inline int valid_mmap_phys_addr_range(unsigned long addr, size_t *size)
106{
107 return 1;
108}
104#endif 109#endif
105 110
106/* 111/*
@@ -228,26 +233,36 @@ static ssize_t write_mem(struct file * file, const char __user * buf,
228 return written; 233 return written;
229} 234}
230 235
236#ifndef __HAVE_PHYS_MEM_ACCESS_PROT
237static pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
238 unsigned long size, pgprot_t vma_prot)
239{
240#ifdef pgprot_noncached
241 unsigned long offset = pfn << PAGE_SHIFT;
242
243 if (uncached_access(file, offset))
244 return pgprot_noncached(vma_prot);
245#endif
246 return vma_prot;
247}
248#endif
249
231static int mmap_mem(struct file * file, struct vm_area_struct * vma) 250static int mmap_mem(struct file * file, struct vm_area_struct * vma)
232{ 251{
233#if defined(__HAVE_PHYS_MEM_ACCESS_PROT) 252 size_t size = vma->vm_end - vma->vm_start;
253
254 if (!valid_mmap_phys_addr_range(vma->vm_pgoff << PAGE_SHIFT, &size))
255 return -EINVAL;
256
234 vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff, 257 vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
235 vma->vm_end - vma->vm_start, 258 size,
236 vma->vm_page_prot); 259 vma->vm_page_prot);
237#elif defined(pgprot_noncached)
238 unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
239 int uncached;
240
241 uncached = uncached_access(file, offset);
242 if (uncached)
243 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
244#endif
245 260
246 /* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */ 261 /* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */
247 if (remap_pfn_range(vma, 262 if (remap_pfn_range(vma,
248 vma->vm_start, 263 vma->vm_start,
249 vma->vm_pgoff, 264 vma->vm_pgoff,
250 vma->vm_end-vma->vm_start, 265 size,
251 vma->vm_page_prot)) 266 vma->vm_page_prot))
252 return -EAGAIN; 267 return -EAGAIN;
253 return 0; 268 return 0;
@@ -817,7 +832,7 @@ static ssize_t kmsg_write(struct file * file, const char __user * buf,
817 size_t count, loff_t *ppos) 832 size_t count, loff_t *ppos)
818{ 833{
819 char *tmp; 834 char *tmp;
820 int ret; 835 ssize_t ret;
821 836
822 tmp = kmalloc(count + 1, GFP_KERNEL); 837 tmp = kmalloc(count + 1, GFP_KERNEL);
823 if (tmp == NULL) 838 if (tmp == NULL)
@@ -826,6 +841,9 @@ static ssize_t kmsg_write(struct file * file, const char __user * buf,
826 if (!copy_from_user(tmp, buf, count)) { 841 if (!copy_from_user(tmp, buf, count)) {
827 tmp[count] = 0; 842 tmp[count] = 0;
828 ret = printk("%s", tmp); 843 ret = printk("%s", tmp);
844 if (ret > count)
845 /* printk can add a prefix */
846 ret = count;
829 } 847 }
830 kfree(tmp); 848 kfree(tmp);
831 return ret; 849 return ret;
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index 51a07370e636..f8dd8527c6aa 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -471,7 +471,6 @@ struct sonypi_keypress {
471 471
472static struct sonypi_device { 472static struct sonypi_device {
473 struct pci_dev *dev; 473 struct pci_dev *dev;
474 struct platform_device *pdev;
475 u16 irq; 474 u16 irq;
476 u16 bits; 475 u16 bits;
477 u16 ioport1; 476 u16 ioport1;
@@ -511,6 +510,11 @@ static struct sonypi_device {
511#define SONYPI_ACPI_ACTIVE 0 510#define SONYPI_ACPI_ACTIVE 0
512#endif /* CONFIG_ACPI */ 511#endif /* CONFIG_ACPI */
513 512
513#ifdef CONFIG_ACPI
514static struct acpi_device *sonypi_acpi_device;
515static int acpi_enabled;
516#endif
517
514static int sonypi_ec_write(u8 addr, u8 value) 518static int sonypi_ec_write(u8 addr, u8 value)
515{ 519{
516#ifdef CONFIG_ACPI_EC 520#ifdef CONFIG_ACPI_EC
@@ -864,6 +868,11 @@ found:
864 if (useinput) 868 if (useinput)
865 sonypi_report_input_event(event); 869 sonypi_report_input_event(event);
866 870
871#ifdef CONFIG_ACPI
872 if (acpi_enabled)
873 acpi_bus_generate_event(sonypi_acpi_device, 1, event);
874#endif
875
867 kfifo_put(sonypi_device.fifo, (unsigned char *)&event, sizeof(event)); 876 kfifo_put(sonypi_device.fifo, (unsigned char *)&event, sizeof(event));
868 kill_fasync(&sonypi_device.fifo_async, SIGIO, POLL_IN); 877 kill_fasync(&sonypi_device.fifo_async, SIGIO, POLL_IN);
869 wake_up_interruptible(&sonypi_device.fifo_proc_list); 878 wake_up_interruptible(&sonypi_device.fifo_proc_list);
@@ -1165,45 +1174,38 @@ static int sonypi_disable(void)
1165 return 0; 1174 return 0;
1166} 1175}
1167 1176
1168#ifdef CONFIG_PM 1177#ifdef CONFIG_ACPI
1169static int old_camera_power; 1178static int sonypi_acpi_add(struct acpi_device *device)
1170
1171static int sonypi_suspend(struct platform_device *dev, pm_message_t state)
1172{ 1179{
1173 old_camera_power = sonypi_device.camera_power; 1180 sonypi_acpi_device = device;
1174 sonypi_disable(); 1181 strcpy(acpi_device_name(device), "Sony laptop hotkeys");
1175 1182 strcpy(acpi_device_class(device), "sony/hotkey");
1176 return 0; 1183 return 0;
1177} 1184}
1178 1185
1179static int sonypi_resume(struct platform_device *dev) 1186static int sonypi_acpi_remove(struct acpi_device *device, int type)
1180{ 1187{
1181 sonypi_enable(old_camera_power); 1188 sonypi_acpi_device = NULL;
1182 return 0; 1189 return 0;
1183} 1190}
1184#endif
1185
1186static void sonypi_shutdown(struct platform_device *dev)
1187{
1188 sonypi_disable();
1189}
1190 1191
1191static struct platform_driver sonypi_driver = { 1192static struct acpi_driver sonypi_acpi_driver = {
1192#ifdef CONFIG_PM 1193 .name = "sonypi",
1193 .suspend = sonypi_suspend, 1194 .class = "hkey",
1194 .resume = sonypi_resume, 1195 .ids = "SNY6001",
1195#endif 1196 .ops = {
1196 .shutdown = sonypi_shutdown, 1197 .add = sonypi_acpi_add,
1197 .driver = { 1198 .remove = sonypi_acpi_remove,
1198 .name = "sonypi",
1199 }, 1199 },
1200}; 1200};
1201#endif
1201 1202
1202static int __devinit sonypi_create_input_devices(void) 1203static int __devinit sonypi_create_input_devices(void)
1203{ 1204{
1204 struct input_dev *jog_dev; 1205 struct input_dev *jog_dev;
1205 struct input_dev *key_dev; 1206 struct input_dev *key_dev;
1206 int i; 1207 int i;
1208 int error;
1207 1209
1208 sonypi_device.input_jog_dev = jog_dev = input_allocate_device(); 1210 sonypi_device.input_jog_dev = jog_dev = input_allocate_device();
1209 if (!jog_dev) 1211 if (!jog_dev)
@@ -1219,9 +1221,8 @@ static int __devinit sonypi_create_input_devices(void)
1219 1221
1220 sonypi_device.input_key_dev = key_dev = input_allocate_device(); 1222 sonypi_device.input_key_dev = key_dev = input_allocate_device();
1221 if (!key_dev) { 1223 if (!key_dev) {
1222 input_free_device(jog_dev); 1224 error = -ENOMEM;
1223 sonypi_device.input_jog_dev = NULL; 1225 goto err_free_jogdev;
1224 return -ENOMEM;
1225 } 1226 }
1226 1227
1227 key_dev->name = "Sony Vaio Keys"; 1228 key_dev->name = "Sony Vaio Keys";
@@ -1234,56 +1235,122 @@ static int __devinit sonypi_create_input_devices(void)
1234 if (sonypi_inputkeys[i].inputev) 1235 if (sonypi_inputkeys[i].inputev)
1235 set_bit(sonypi_inputkeys[i].inputev, key_dev->keybit); 1236 set_bit(sonypi_inputkeys[i].inputev, key_dev->keybit);
1236 1237
1237 input_register_device(jog_dev); 1238 error = input_register_device(jog_dev);
1238 input_register_device(key_dev); 1239 if (error)
1240 goto err_free_keydev;
1241
1242 error = input_register_device(key_dev);
1243 if (error)
1244 goto err_unregister_jogdev;
1239 1245
1240 return 0; 1246 return 0;
1247
1248 err_unregister_jogdev:
1249 input_unregister_device(jog_dev);
1250 /* Set to NULL so we don't free it again below */
1251 jog_dev = NULL;
1252 err_free_keydev:
1253 input_free_device(key_dev);
1254 sonypi_device.input_key_dev = NULL;
1255 err_free_jogdev:
1256 input_free_device(jog_dev);
1257 sonypi_device.input_jog_dev = NULL;
1258
1259 return error;
1241} 1260}
1242 1261
1243static int __devinit sonypi_probe(void) 1262static int __devinit sonypi_setup_ioports(struct sonypi_device *dev,
1263 const struct sonypi_ioport_list *ioport_list)
1244{ 1264{
1245 int i, ret; 1265 while (ioport_list->port1) {
1246 struct sonypi_ioport_list *ioport_list;
1247 struct sonypi_irq_list *irq_list;
1248 struct pci_dev *pcidev;
1249 1266
1250 if ((pcidev = pci_get_device(PCI_VENDOR_ID_INTEL, 1267 if (request_region(ioport_list->port1,
1251 PCI_DEVICE_ID_INTEL_82371AB_3, NULL))) 1268 sonypi_device.region_size,
1252 sonypi_device.model = SONYPI_DEVICE_MODEL_TYPE1; 1269 "Sony Programable I/O Device")) {
1253 else if ((pcidev = pci_get_device(PCI_VENDOR_ID_INTEL, 1270 dev->ioport1 = ioport_list->port1;
1254 PCI_DEVICE_ID_INTEL_ICH6_1, NULL))) 1271 dev->ioport2 = ioport_list->port2;
1255 sonypi_device.model = SONYPI_DEVICE_MODEL_TYPE3; 1272 return 0;
1256 else 1273 }
1257 sonypi_device.model = SONYPI_DEVICE_MODEL_TYPE2; 1274 ioport_list++;
1275 }
1258 1276
1259 sonypi_device.dev = pcidev; 1277 return -EBUSY;
1278}
1279
1280static int __devinit sonypi_setup_irq(struct sonypi_device *dev,
1281 const struct sonypi_irq_list *irq_list)
1282{
1283 while (irq_list->irq) {
1284
1285 if (!request_irq(irq_list->irq, sonypi_irq,
1286 SA_SHIRQ, "sonypi", sonypi_irq)) {
1287 dev->irq = irq_list->irq;
1288 dev->bits = irq_list->bits;
1289 return 0;
1290 }
1291 irq_list++;
1292 }
1293
1294 return -EBUSY;
1295}
1296
1297static void __devinit sonypi_display_info(void)
1298{
1299 printk(KERN_INFO "sonypi: detected type%d model, "
1300 "verbose = %d, fnkeyinit = %s, camera = %s, "
1301 "compat = %s, mask = 0x%08lx, useinput = %s, acpi = %s\n",
1302 sonypi_device.model,
1303 verbose,
1304 fnkeyinit ? "on" : "off",
1305 camera ? "on" : "off",
1306 compat ? "on" : "off",
1307 mask,
1308 useinput ? "on" : "off",
1309 SONYPI_ACPI_ACTIVE ? "on" : "off");
1310 printk(KERN_INFO "sonypi: enabled at irq=%d, port1=0x%x, port2=0x%x\n",
1311 sonypi_device.irq,
1312 sonypi_device.ioport1, sonypi_device.ioport2);
1313
1314 if (minor == -1)
1315 printk(KERN_INFO "sonypi: device allocated minor is %d\n",
1316 sonypi_misc_device.minor);
1317}
1318
1319static int __devinit sonypi_probe(struct platform_device *dev)
1320{
1321 const struct sonypi_ioport_list *ioport_list;
1322 const struct sonypi_irq_list *irq_list;
1323 struct pci_dev *pcidev;
1324 int error;
1260 1325
1261 spin_lock_init(&sonypi_device.fifo_lock); 1326 spin_lock_init(&sonypi_device.fifo_lock);
1262 sonypi_device.fifo = kfifo_alloc(SONYPI_BUF_SIZE, GFP_KERNEL, 1327 sonypi_device.fifo = kfifo_alloc(SONYPI_BUF_SIZE, GFP_KERNEL,
1263 &sonypi_device.fifo_lock); 1328 &sonypi_device.fifo_lock);
1264 if (IS_ERR(sonypi_device.fifo)) { 1329 if (IS_ERR(sonypi_device.fifo)) {
1265 printk(KERN_ERR "sonypi: kfifo_alloc failed\n"); 1330 printk(KERN_ERR "sonypi: kfifo_alloc failed\n");
1266 ret = PTR_ERR(sonypi_device.fifo); 1331 return PTR_ERR(sonypi_device.fifo);
1267 goto out_fifo;
1268 } 1332 }
1269 1333
1270 init_waitqueue_head(&sonypi_device.fifo_proc_list); 1334 init_waitqueue_head(&sonypi_device.fifo_proc_list);
1271 init_MUTEX(&sonypi_device.lock); 1335 init_MUTEX(&sonypi_device.lock);
1272 sonypi_device.bluetooth_power = -1; 1336 sonypi_device.bluetooth_power = -1;
1273 1337
1338 if ((pcidev = pci_get_device(PCI_VENDOR_ID_INTEL,
1339 PCI_DEVICE_ID_INTEL_82371AB_3, NULL)))
1340 sonypi_device.model = SONYPI_DEVICE_MODEL_TYPE1;
1341 else if ((pcidev = pci_get_device(PCI_VENDOR_ID_INTEL,
1342 PCI_DEVICE_ID_INTEL_ICH6_1, NULL)))
1343 sonypi_device.model = SONYPI_DEVICE_MODEL_TYPE3;
1344 else
1345 sonypi_device.model = SONYPI_DEVICE_MODEL_TYPE2;
1346
1274 if (pcidev && pci_enable_device(pcidev)) { 1347 if (pcidev && pci_enable_device(pcidev)) {
1275 printk(KERN_ERR "sonypi: pci_enable_device failed\n"); 1348 printk(KERN_ERR "sonypi: pci_enable_device failed\n");
1276 ret = -EIO; 1349 error = -EIO;
1277 goto out_pcienable; 1350 goto err_put_pcidev;
1278 }
1279
1280 if (minor != -1)
1281 sonypi_misc_device.minor = minor;
1282 if ((ret = misc_register(&sonypi_misc_device))) {
1283 printk(KERN_ERR "sonypi: misc_register failed\n");
1284 goto out_miscreg;
1285 } 1351 }
1286 1352
1353 sonypi_device.dev = pcidev;
1287 1354
1288 if (sonypi_device.model == SONYPI_DEVICE_MODEL_TYPE1) { 1355 if (sonypi_device.model == SONYPI_DEVICE_MODEL_TYPE1) {
1289 ioport_list = sonypi_type1_ioport_list; 1356 ioport_list = sonypi_type1_ioport_list;
@@ -1302,43 +1369,36 @@ static int __devinit sonypi_probe(void)
1302 irq_list = sonypi_type3_irq_list; 1369 irq_list = sonypi_type3_irq_list;
1303 } 1370 }
1304 1371
1305 for (i = 0; ioport_list[i].port1; i++) { 1372 error = sonypi_setup_ioports(&sonypi_device, ioport_list);
1306 if (request_region(ioport_list[i].port1, 1373 if (error) {
1307 sonypi_device.region_size, 1374 printk(KERN_ERR "sonypi: failed to request ioports\n");
1308 "Sony Programable I/O Device")) { 1375 goto err_disable_pcidev;
1309 /* get the ioport */
1310 sonypi_device.ioport1 = ioport_list[i].port1;
1311 sonypi_device.ioport2 = ioport_list[i].port2;
1312 break;
1313 }
1314 }
1315 if (!sonypi_device.ioport1) {
1316 printk(KERN_ERR "sonypi: request_region failed\n");
1317 ret = -ENODEV;
1318 goto out_reqreg;
1319 } 1376 }
1320 1377
1321 for (i = 0; irq_list[i].irq; i++) { 1378 error = sonypi_setup_irq(&sonypi_device, irq_list);
1322 1379 if (error) {
1323 sonypi_device.irq = irq_list[i].irq; 1380 printk(KERN_ERR "sonypi: request_irq failed\n");
1324 sonypi_device.bits = irq_list[i].bits; 1381 goto err_free_ioports;
1325
1326 if (!request_irq(sonypi_device.irq, sonypi_irq,
1327 SA_SHIRQ, "sonypi", sonypi_irq))
1328 break;
1329 } 1382 }
1330 1383
1331 if (!irq_list[i].irq) { 1384 if (minor != -1)
1332 printk(KERN_ERR "sonypi: request_irq failed\n"); 1385 sonypi_misc_device.minor = minor;
1333 ret = -ENODEV; 1386 error = misc_register(&sonypi_misc_device);
1334 goto out_reqirq; 1387 if (error) {
1388 printk(KERN_ERR "sonypi: misc_register failed\n");
1389 goto err_free_irq;
1335 } 1390 }
1336 1391
1392 sonypi_display_info();
1393
1337 if (useinput) { 1394 if (useinput) {
1338 1395
1339 ret = sonypi_create_input_devices(); 1396 error = sonypi_create_input_devices();
1340 if (ret) 1397 if (error) {
1341 goto out_inputdevices; 1398 printk(KERN_ERR
1399 "sonypi: failed to create input devices\n");
1400 goto err_miscdev_unregister;
1401 }
1342 1402
1343 spin_lock_init(&sonypi_device.input_fifo_lock); 1403 spin_lock_init(&sonypi_device.input_fifo_lock);
1344 sonypi_device.input_fifo = 1404 sonypi_device.input_fifo =
@@ -1346,91 +1406,104 @@ static int __devinit sonypi_probe(void)
1346 &sonypi_device.input_fifo_lock); 1406 &sonypi_device.input_fifo_lock);
1347 if (IS_ERR(sonypi_device.input_fifo)) { 1407 if (IS_ERR(sonypi_device.input_fifo)) {
1348 printk(KERN_ERR "sonypi: kfifo_alloc failed\n"); 1408 printk(KERN_ERR "sonypi: kfifo_alloc failed\n");
1349 ret = PTR_ERR(sonypi_device.input_fifo); 1409 error = PTR_ERR(sonypi_device.input_fifo);
1350 goto out_infifo; 1410 goto err_inpdev_unregister;
1351 } 1411 }
1352 1412
1353 INIT_WORK(&sonypi_device.input_work, input_keyrelease, NULL); 1413 INIT_WORK(&sonypi_device.input_work, input_keyrelease, NULL);
1354 } 1414 }
1355 1415
1356 sonypi_device.pdev = platform_device_register_simple("sonypi", -1,
1357 NULL, 0);
1358 if (IS_ERR(sonypi_device.pdev)) {
1359 ret = PTR_ERR(sonypi_device.pdev);
1360 goto out_platformdev;
1361 }
1362
1363 sonypi_enable(0); 1416 sonypi_enable(0);
1364 1417
1365 printk(KERN_INFO "sonypi: Sony Programmable I/O Controller Driver"
1366 "v%s.\n", SONYPI_DRIVER_VERSION);
1367 printk(KERN_INFO "sonypi: detected type%d model, "
1368 "verbose = %d, fnkeyinit = %s, camera = %s, "
1369 "compat = %s, mask = 0x%08lx, useinput = %s, acpi = %s\n",
1370 sonypi_device.model,
1371 verbose,
1372 fnkeyinit ? "on" : "off",
1373 camera ? "on" : "off",
1374 compat ? "on" : "off",
1375 mask,
1376 useinput ? "on" : "off",
1377 SONYPI_ACPI_ACTIVE ? "on" : "off");
1378 printk(KERN_INFO "sonypi: enabled at irq=%d, port1=0x%x, port2=0x%x\n",
1379 sonypi_device.irq,
1380 sonypi_device.ioport1, sonypi_device.ioport2);
1381
1382 if (minor == -1)
1383 printk(KERN_INFO "sonypi: device allocated minor is %d\n",
1384 sonypi_misc_device.minor);
1385
1386 return 0; 1418 return 0;
1387 1419
1388out_platformdev: 1420 err_inpdev_unregister:
1389 kfifo_free(sonypi_device.input_fifo);
1390out_infifo:
1391 input_unregister_device(sonypi_device.input_key_dev); 1421 input_unregister_device(sonypi_device.input_key_dev);
1392 input_unregister_device(sonypi_device.input_jog_dev); 1422 input_unregister_device(sonypi_device.input_jog_dev);
1393out_inputdevices: 1423 err_miscdev_unregister:
1424 misc_deregister(&sonypi_misc_device);
1425 err_free_irq:
1394 free_irq(sonypi_device.irq, sonypi_irq); 1426 free_irq(sonypi_device.irq, sonypi_irq);
1395out_reqirq: 1427 err_free_ioports:
1396 release_region(sonypi_device.ioport1, sonypi_device.region_size); 1428 release_region(sonypi_device.ioport1, sonypi_device.region_size);
1397out_reqreg: 1429 err_disable_pcidev:
1398 misc_deregister(&sonypi_misc_device);
1399out_miscreg:
1400 if (pcidev) 1430 if (pcidev)
1401 pci_disable_device(pcidev); 1431 pci_disable_device(pcidev);
1402out_pcienable: 1432 err_put_pcidev:
1433 pci_dev_put(pcidev);
1403 kfifo_free(sonypi_device.fifo); 1434 kfifo_free(sonypi_device.fifo);
1404out_fifo: 1435
1405 pci_dev_put(sonypi_device.dev); 1436 return error;
1406 return ret;
1407} 1437}
1408 1438
1409static void __devexit sonypi_remove(void) 1439static int __devexit sonypi_remove(struct platform_device *dev)
1410{ 1440{
1411 sonypi_disable(); 1441 sonypi_disable();
1412 1442
1413 synchronize_sched(); /* Allow sonypi interrupt to complete. */ 1443 synchronize_sched(); /* Allow sonypi interrupt to complete. */
1414 flush_scheduled_work(); 1444 flush_scheduled_work();
1415 1445
1416 platform_device_unregister(sonypi_device.pdev);
1417
1418 if (useinput) { 1446 if (useinput) {
1419 input_unregister_device(sonypi_device.input_key_dev); 1447 input_unregister_device(sonypi_device.input_key_dev);
1420 input_unregister_device(sonypi_device.input_jog_dev); 1448 input_unregister_device(sonypi_device.input_jog_dev);
1421 kfifo_free(sonypi_device.input_fifo); 1449 kfifo_free(sonypi_device.input_fifo);
1422 } 1450 }
1423 1451
1452 misc_deregister(&sonypi_misc_device);
1453
1424 free_irq(sonypi_device.irq, sonypi_irq); 1454 free_irq(sonypi_device.irq, sonypi_irq);
1425 release_region(sonypi_device.ioport1, sonypi_device.region_size); 1455 release_region(sonypi_device.ioport1, sonypi_device.region_size);
1426 misc_deregister(&sonypi_misc_device); 1456
1427 if (sonypi_device.dev) 1457 if (sonypi_device.dev) {
1428 pci_disable_device(sonypi_device.dev); 1458 pci_disable_device(sonypi_device.dev);
1459 pci_dev_put(sonypi_device.dev);
1460 }
1461
1429 kfifo_free(sonypi_device.fifo); 1462 kfifo_free(sonypi_device.fifo);
1430 pci_dev_put(sonypi_device.dev); 1463
1431 printk(KERN_INFO "sonypi: removed.\n"); 1464 return 0;
1432} 1465}
1433 1466
1467#ifdef CONFIG_PM
1468static int old_camera_power;
1469
1470static int sonypi_suspend(struct platform_device *dev, pm_message_t state)
1471{
1472 old_camera_power = sonypi_device.camera_power;
1473 sonypi_disable();
1474
1475 return 0;
1476}
1477
1478static int sonypi_resume(struct platform_device *dev)
1479{
1480 sonypi_enable(old_camera_power);
1481 return 0;
1482}
1483#else
1484#define sonypi_suspend NULL
1485#define sonypi_resume NULL
1486#endif
1487
1488static void sonypi_shutdown(struct platform_device *dev)
1489{
1490 sonypi_disable();
1491}
1492
1493static struct platform_driver sonypi_driver = {
1494 .driver = {
1495 .name = "sonypi",
1496 .owner = THIS_MODULE,
1497 },
1498 .probe = sonypi_probe,
1499 .remove = __devexit_p(sonypi_remove),
1500 .shutdown = sonypi_shutdown,
1501 .suspend = sonypi_suspend,
1502 .resume = sonypi_resume,
1503};
1504
1505static struct platform_device *sonypi_platform_device;
1506
1434static struct dmi_system_id __initdata sonypi_dmi_table[] = { 1507static struct dmi_system_id __initdata sonypi_dmi_table[] = {
1435 { 1508 {
1436 .ident = "Sony Vaio", 1509 .ident = "Sony Vaio",
@@ -1451,26 +1524,52 @@ static struct dmi_system_id __initdata sonypi_dmi_table[] = {
1451 1524
1452static int __init sonypi_init(void) 1525static int __init sonypi_init(void)
1453{ 1526{
1454 int ret; 1527 int error;
1528
1529 printk(KERN_INFO
1530 "sonypi: Sony Programmable I/O Controller Driver v%s.\n",
1531 SONYPI_DRIVER_VERSION);
1455 1532
1456 if (!dmi_check_system(sonypi_dmi_table)) 1533 if (!dmi_check_system(sonypi_dmi_table))
1457 return -ENODEV; 1534 return -ENODEV;
1458 1535
1459 ret = platform_driver_register(&sonypi_driver); 1536 error = platform_driver_register(&sonypi_driver);
1460 if (ret) 1537 if (error)
1461 return ret; 1538 return error;
1462 1539
1463 ret = sonypi_probe(); 1540 sonypi_platform_device = platform_device_alloc("sonypi", -1);
1464 if (ret) 1541 if (!sonypi_platform_device) {
1465 platform_driver_unregister(&sonypi_driver); 1542 error = -ENOMEM;
1543 goto err_driver_unregister;
1544 }
1466 1545
1467 return ret; 1546 error = platform_device_add(sonypi_platform_device);
1547 if (error)
1548 goto err_free_device;
1549
1550#ifdef CONFIG_ACPI
1551 if (acpi_bus_register_driver(&sonypi_acpi_driver) > 0)
1552 acpi_enabled = 1;
1553#endif
1554
1555 return 0;
1556
1557 err_free_device:
1558 platform_device_put(sonypi_platform_device);
1559 err_driver_unregister:
1560 platform_driver_unregister(&sonypi_driver);
1561 return error;
1468} 1562}
1469 1563
1470static void __exit sonypi_exit(void) 1564static void __exit sonypi_exit(void)
1471{ 1565{
1566#ifdef CONFIG_ACPI
1567 if (acpi_enabled)
1568 acpi_bus_unregister_driver(&sonypi_acpi_driver);
1569#endif
1570 platform_device_unregister(sonypi_platform_device);
1472 platform_driver_unregister(&sonypi_driver); 1571 platform_driver_unregister(&sonypi_driver);
1473 sonypi_remove(); 1572 printk(KERN_INFO "sonypi: removed.\n");
1474} 1573}
1475 1574
1476module_init(sonypi_init); 1575module_init(sonypi_init);
diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c
new file mode 100644
index 000000000000..2b9cde94e2f7
--- /dev/null
+++ b/drivers/char/synclink_gt.c
@@ -0,0 +1,4501 @@
1/*
2 * $Id: synclink_gt.c,v 4.20 2005/11/08 19:51:55 paulkf Exp $
3 *
4 * Device driver for Microgate SyncLink GT serial adapters.
5 *
6 * written by Paul Fulghum for Microgate Corporation
7 * paulkf@microgate.com
8 *
9 * Microgate and SyncLink are trademarks of Microgate Corporation
10 *
11 * This code is released under the GNU General Public License (GPL)
12 *
13 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
14 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
15 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
17 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
19 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
23 * OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26/*
27 * DEBUG OUTPUT DEFINITIONS
28 *
29 * uncomment lines below to enable specific types of debug output
30 *
31 * DBGINFO information - most verbose output
32 * DBGERR serious errors
33 * DBGBH bottom half service routine debugging
34 * DBGISR interrupt service routine debugging
35 * DBGDATA output receive and transmit data
36 * DBGTBUF output transmit DMA buffers and registers
37 * DBGRBUF output receive DMA buffers and registers
38 */
39
40#define DBGINFO(fmt) if (debug_level >= DEBUG_LEVEL_INFO) printk fmt
41#define DBGERR(fmt) if (debug_level >= DEBUG_LEVEL_ERROR) printk fmt
42#define DBGBH(fmt) if (debug_level >= DEBUG_LEVEL_BH) printk fmt
43#define DBGISR(fmt) if (debug_level >= DEBUG_LEVEL_ISR) printk fmt
44#define DBGDATA(info, buf, size, label) if (debug_level >= DEBUG_LEVEL_DATA) trace_block((info), (buf), (size), (label))
45//#define DBGTBUF(info) dump_tbufs(info)
46//#define DBGRBUF(info) dump_rbufs(info)
47
48
49#include <linux/config.h>
50#include <linux/module.h>
51#include <linux/version.h>
52#include <linux/errno.h>
53#include <linux/signal.h>
54#include <linux/sched.h>
55#include <linux/timer.h>
56#include <linux/interrupt.h>
57#include <linux/pci.h>
58#include <linux/tty.h>
59#include <linux/tty_flip.h>
60#include <linux/serial.h>
61#include <linux/major.h>
62#include <linux/string.h>
63#include <linux/fcntl.h>
64#include <linux/ptrace.h>
65#include <linux/ioport.h>
66#include <linux/mm.h>
67#include <linux/slab.h>
68#include <linux/netdevice.h>
69#include <linux/vmalloc.h>
70#include <linux/init.h>
71#include <linux/delay.h>
72#include <linux/ioctl.h>
73#include <linux/termios.h>
74#include <linux/bitops.h>
75#include <linux/workqueue.h>
76#include <linux/hdlc.h>
77
78#include <asm/serial.h>
79#include <asm/system.h>
80#include <asm/io.h>
81#include <asm/irq.h>
82#include <asm/dma.h>
83#include <asm/types.h>
84#include <asm/uaccess.h>
85
86#include "linux/synclink.h"
87
88#ifdef CONFIG_HDLC_MODULE
89#define CONFIG_HDLC 1
90#endif
91
92/*
93 * module identification
94 */
95static char *driver_name = "SyncLink GT";
96static char *driver_version = "$Revision: 4.20 $";
97static char *tty_driver_name = "synclink_gt";
98static char *tty_dev_prefix = "ttySLG";
99MODULE_LICENSE("GPL");
100#define MGSL_MAGIC 0x5401
101#define MAX_DEVICES 12
102
103static struct pci_device_id pci_table[] = {
104 {PCI_VENDOR_ID_MICROGATE, SYNCLINK_GT_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
105 {PCI_VENDOR_ID_MICROGATE, SYNCLINK_GT4_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
106 {PCI_VENDOR_ID_MICROGATE, SYNCLINK_AC_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
107 {0,}, /* terminate list */
108};
109MODULE_DEVICE_TABLE(pci, pci_table);
110
111static int init_one(struct pci_dev *dev,const struct pci_device_id *ent);
112static void remove_one(struct pci_dev *dev);
113static struct pci_driver pci_driver = {
114 .name = "synclink_gt",
115 .id_table = pci_table,
116 .probe = init_one,
117 .remove = __devexit_p(remove_one),
118};
119
120static int pci_registered;
121
122/*
123 * module configuration and status
124 */
125static struct slgt_info *slgt_device_list;
126static int slgt_device_count;
127
128static int ttymajor;
129static int debug_level;
130static int maxframe[MAX_DEVICES];
131static int dosyncppp[MAX_DEVICES];
132
133module_param(ttymajor, int, 0);
134module_param(debug_level, int, 0);
135module_param_array(maxframe, int, NULL, 0);
136module_param_array(dosyncppp, int, NULL, 0);
137
138MODULE_PARM_DESC(ttymajor, "TTY major device number override: 0=auto assigned");
139MODULE_PARM_DESC(debug_level, "Debug syslog output: 0=disabled, 1 to 5=increasing detail");
140MODULE_PARM_DESC(maxframe, "Maximum frame size used by device (4096 to 65535)");
141MODULE_PARM_DESC(dosyncppp, "Enable synchronous net device, 0=disable 1=enable");
142
143/*
144 * tty support and callbacks
145 */
146#define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK))
147
148static struct tty_driver *serial_driver;
149
150static int open(struct tty_struct *tty, struct file * filp);
151static void close(struct tty_struct *tty, struct file * filp);
152static void hangup(struct tty_struct *tty);
153static void set_termios(struct tty_struct *tty, struct termios *old_termios);
154
155static int write(struct tty_struct *tty, const unsigned char *buf, int count);
156static void put_char(struct tty_struct *tty, unsigned char ch);
157static void send_xchar(struct tty_struct *tty, char ch);
158static void wait_until_sent(struct tty_struct *tty, int timeout);
159static int write_room(struct tty_struct *tty);
160static void flush_chars(struct tty_struct *tty);
161static void flush_buffer(struct tty_struct *tty);
162static void tx_hold(struct tty_struct *tty);
163static void tx_release(struct tty_struct *tty);
164
165static int ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg);
166static int read_proc(char *page, char **start, off_t off, int count,int *eof, void *data);
167static int chars_in_buffer(struct tty_struct *tty);
168static void throttle(struct tty_struct * tty);
169static void unthrottle(struct tty_struct * tty);
170static void set_break(struct tty_struct *tty, int break_state);
171
172/*
173 * generic HDLC support and callbacks
174 */
175#ifdef CONFIG_HDLC
176#define dev_to_port(D) (dev_to_hdlc(D)->priv)
177static void hdlcdev_tx_done(struct slgt_info *info);
178static void hdlcdev_rx(struct slgt_info *info, char *buf, int size);
179static int hdlcdev_init(struct slgt_info *info);
180static void hdlcdev_exit(struct slgt_info *info);
181#endif
182
183
184/*
185 * device specific structures, macros and functions
186 */
187
188#define SLGT_MAX_PORTS 4
189#define SLGT_REG_SIZE 256
190
191/*
192 * DMA buffer descriptor and access macros
193 */
194struct slgt_desc
195{
196 unsigned short count;
197 unsigned short status;
198 unsigned int pbuf; /* physical address of data buffer */
199 unsigned int next; /* physical address of next descriptor */
200
201 /* driver book keeping */
202 char *buf; /* virtual address of data buffer */
203 unsigned int pdesc; /* physical address of this descriptor */
204 dma_addr_t buf_dma_addr;
205};
206
207#define set_desc_buffer(a,b) (a).pbuf = cpu_to_le32((unsigned int)(b))
208#define set_desc_next(a,b) (a).next = cpu_to_le32((unsigned int)(b))
209#define set_desc_count(a,b)(a).count = cpu_to_le16((unsigned short)(b))
210#define set_desc_eof(a,b) (a).status = cpu_to_le16((b) ? (le16_to_cpu((a).status) | BIT0) : (le16_to_cpu((a).status) & ~BIT0))
211#define desc_count(a) (le16_to_cpu((a).count))
212#define desc_status(a) (le16_to_cpu((a).status))
213#define desc_complete(a) (le16_to_cpu((a).status) & BIT15)
214#define desc_eof(a) (le16_to_cpu((a).status) & BIT2)
215#define desc_crc_error(a) (le16_to_cpu((a).status) & BIT1)
216#define desc_abort(a) (le16_to_cpu((a).status) & BIT0)
217#define desc_residue(a) ((le16_to_cpu((a).status) & 0x38) >> 3)
218
219struct _input_signal_events {
220 int ri_up;
221 int ri_down;
222 int dsr_up;
223 int dsr_down;
224 int dcd_up;
225 int dcd_down;
226 int cts_up;
227 int cts_down;
228};
229
230/*
231 * device instance data structure
232 */
233struct slgt_info {
234 void *if_ptr; /* General purpose pointer (used by SPPP) */
235
236 struct slgt_info *next_device; /* device list link */
237
238 int magic;
239 int flags;
240
241 char device_name[25];
242 struct pci_dev *pdev;
243
244 int port_count; /* count of ports on adapter */
245 int adapter_num; /* adapter instance number */
246 int port_num; /* port instance number */
247
248 /* array of pointers to port contexts on this adapter */
249 struct slgt_info *port_array[SLGT_MAX_PORTS];
250
251 int count; /* count of opens */
252 int line; /* tty line instance number */
253 unsigned short close_delay;
254 unsigned short closing_wait; /* time to wait before closing */
255
256 struct mgsl_icount icount;
257
258 struct tty_struct *tty;
259 int timeout;
260 int x_char; /* xon/xoff character */
261 int blocked_open; /* # of blocked opens */
262 unsigned int read_status_mask;
263 unsigned int ignore_status_mask;
264
265 wait_queue_head_t open_wait;
266 wait_queue_head_t close_wait;
267
268 wait_queue_head_t status_event_wait_q;
269 wait_queue_head_t event_wait_q;
270 struct timer_list tx_timer;
271 struct timer_list rx_timer;
272
273 spinlock_t lock; /* spinlock for synchronizing with ISR */
274
275 struct work_struct task;
276 u32 pending_bh;
277 int bh_requested;
278 int bh_running;
279
280 int isr_overflow;
281 int irq_requested; /* nonzero if IRQ requested */
282 int irq_occurred; /* for diagnostics use */
283
284 /* device configuration */
285
286 unsigned int bus_type;
287 unsigned int irq_level;
288 unsigned long irq_flags;
289
290 unsigned char __iomem * reg_addr; /* memory mapped registers address */
291 u32 phys_reg_addr;
292 u32 reg_offset;
293 int reg_addr_requested;
294
295 MGSL_PARAMS params; /* communications parameters */
296 u32 idle_mode;
297 u32 max_frame_size; /* as set by device config */
298
299 unsigned int raw_rx_size;
300 unsigned int if_mode;
301
302 /* device status */
303
304 int rx_enabled;
305 int rx_restart;
306
307 int tx_enabled;
308 int tx_active;
309
310 unsigned char signals; /* serial signal states */
311 unsigned int init_error; /* initialization error */
312
313 unsigned char *tx_buf;
314 int tx_count;
315
316 char flag_buf[MAX_ASYNC_BUFFER_SIZE];
317 char char_buf[MAX_ASYNC_BUFFER_SIZE];
318 BOOLEAN drop_rts_on_tx_done;
319 struct _input_signal_events input_signal_events;
320
321 int dcd_chkcount; /* check counts to prevent */
322 int cts_chkcount; /* too many IRQs if a signal */
323 int dsr_chkcount; /* is floating */
324 int ri_chkcount;
325
326 char *bufs; /* virtual address of DMA buffer lists */
327 dma_addr_t bufs_dma_addr; /* physical address of buffer descriptors */
328
329 unsigned int rbuf_count;
330 struct slgt_desc *rbufs;
331 unsigned int rbuf_current;
332 unsigned int rbuf_index;
333
334 unsigned int tbuf_count;
335 struct slgt_desc *tbufs;
336 unsigned int tbuf_current;
337 unsigned int tbuf_start;
338
339 unsigned char *tmp_rbuf;
340 unsigned int tmp_rbuf_count;
341
342 /* SPPP/Cisco HDLC device parts */
343
344 int netcount;
345 int dosyncppp;
346 spinlock_t netlock;
347#ifdef CONFIG_HDLC
348 struct net_device *netdev;
349#endif
350
351};
352
353static MGSL_PARAMS default_params = {
354 .mode = MGSL_MODE_HDLC,
355 .loopback = 0,
356 .flags = HDLC_FLAG_UNDERRUN_ABORT15,
357 .encoding = HDLC_ENCODING_NRZI_SPACE,
358 .clock_speed = 0,
359 .addr_filter = 0xff,
360 .crc_type = HDLC_CRC_16_CCITT,
361 .preamble_length = HDLC_PREAMBLE_LENGTH_8BITS,
362 .preamble = HDLC_PREAMBLE_PATTERN_NONE,
363 .data_rate = 9600,
364 .data_bits = 8,
365 .stop_bits = 1,
366 .parity = ASYNC_PARITY_NONE
367};
368
369
370#define BH_RECEIVE 1
371#define BH_TRANSMIT 2
372#define BH_STATUS 4
373#define IO_PIN_SHUTDOWN_LIMIT 100
374
375#define DMABUFSIZE 256
376#define DESC_LIST_SIZE 4096
377
378#define MASK_PARITY BIT1
379#define MASK_FRAMING BIT2
380#define MASK_BREAK BIT3
381#define MASK_OVERRUN BIT4
382
383#define GSR 0x00 /* global status */
384#define TDR 0x80 /* tx data */
385#define RDR 0x80 /* rx data */
386#define TCR 0x82 /* tx control */
387#define TIR 0x84 /* tx idle */
388#define TPR 0x85 /* tx preamble */
389#define RCR 0x86 /* rx control */
390#define VCR 0x88 /* V.24 control */
391#define CCR 0x89 /* clock control */
392#define BDR 0x8a /* baud divisor */
393#define SCR 0x8c /* serial control */
394#define SSR 0x8e /* serial status */
395#define RDCSR 0x90 /* rx DMA control/status */
396#define TDCSR 0x94 /* tx DMA control/status */
397#define RDDAR 0x98 /* rx DMA descriptor address */
398#define TDDAR 0x9c /* tx DMA descriptor address */
399
400#define RXIDLE BIT14
401#define RXBREAK BIT14
402#define IRQ_TXDATA BIT13
403#define IRQ_TXIDLE BIT12
404#define IRQ_TXUNDER BIT11 /* HDLC */
405#define IRQ_RXDATA BIT10
406#define IRQ_RXIDLE BIT9 /* HDLC */
407#define IRQ_RXBREAK BIT9 /* async */
408#define IRQ_RXOVER BIT8
409#define IRQ_DSR BIT7
410#define IRQ_CTS BIT6
411#define IRQ_DCD BIT5
412#define IRQ_RI BIT4
413#define IRQ_ALL 0x3ff0
414#define IRQ_MASTER BIT0
415
416#define slgt_irq_on(info, mask) \
417 wr_reg16((info), SCR, (unsigned short)(rd_reg16((info), SCR) | (mask)))
418#define slgt_irq_off(info, mask) \
419 wr_reg16((info), SCR, (unsigned short)(rd_reg16((info), SCR) & ~(mask)))
420
421static __u8 rd_reg8(struct slgt_info *info, unsigned int addr);
422static void wr_reg8(struct slgt_info *info, unsigned int addr, __u8 value);
423static __u16 rd_reg16(struct slgt_info *info, unsigned int addr);
424static void wr_reg16(struct slgt_info *info, unsigned int addr, __u16 value);
425static __u32 rd_reg32(struct slgt_info *info, unsigned int addr);
426static void wr_reg32(struct slgt_info *info, unsigned int addr, __u32 value);
427
428static void msc_set_vcr(struct slgt_info *info);
429
430static int startup(struct slgt_info *info);
431static int block_til_ready(struct tty_struct *tty, struct file * filp,struct slgt_info *info);
432static void shutdown(struct slgt_info *info);
433static void program_hw(struct slgt_info *info);
434static void change_params(struct slgt_info *info);
435
436static int register_test(struct slgt_info *info);
437static int irq_test(struct slgt_info *info);
438static int loopback_test(struct slgt_info *info);
439static int adapter_test(struct slgt_info *info);
440
441static void reset_adapter(struct slgt_info *info);
442static void reset_port(struct slgt_info *info);
443static void async_mode(struct slgt_info *info);
444static void hdlc_mode(struct slgt_info *info);
445
446static void rx_stop(struct slgt_info *info);
447static void rx_start(struct slgt_info *info);
448static void reset_rbufs(struct slgt_info *info);
449static void free_rbufs(struct slgt_info *info, unsigned int first, unsigned int last);
450static void rdma_reset(struct slgt_info *info);
451static int rx_get_frame(struct slgt_info *info);
452static int rx_get_buf(struct slgt_info *info);
453
454static void tx_start(struct slgt_info *info);
455static void tx_stop(struct slgt_info *info);
456static void tx_set_idle(struct slgt_info *info);
457static unsigned int free_tbuf_count(struct slgt_info *info);
458static void reset_tbufs(struct slgt_info *info);
459static void tdma_reset(struct slgt_info *info);
460static void tx_load(struct slgt_info *info, const char *buf, unsigned int count);
461
462static void get_signals(struct slgt_info *info);
463static void set_signals(struct slgt_info *info);
464static void enable_loopback(struct slgt_info *info);
465static void set_rate(struct slgt_info *info, u32 data_rate);
466
467static int bh_action(struct slgt_info *info);
468static void bh_handler(void* context);
469static void bh_transmit(struct slgt_info *info);
470static void isr_serial(struct slgt_info *info);
471static void isr_rdma(struct slgt_info *info);
472static void isr_txeom(struct slgt_info *info, unsigned short status);
473static void isr_tdma(struct slgt_info *info);
474static irqreturn_t slgt_interrupt(int irq, void *dev_id, struct pt_regs * regs);
475
476static int alloc_dma_bufs(struct slgt_info *info);
477static void free_dma_bufs(struct slgt_info *info);
478static int alloc_desc(struct slgt_info *info);
479static void free_desc(struct slgt_info *info);
480static int alloc_bufs(struct slgt_info *info, struct slgt_desc *bufs, int count);
481static void free_bufs(struct slgt_info *info, struct slgt_desc *bufs, int count);
482
483static int alloc_tmp_rbuf(struct slgt_info *info);
484static void free_tmp_rbuf(struct slgt_info *info);
485
486static void tx_timeout(unsigned long context);
487static void rx_timeout(unsigned long context);
488
489/*
490 * ioctl handlers
491 */
492static int get_stats(struct slgt_info *info, struct mgsl_icount __user *user_icount);
493static int get_params(struct slgt_info *info, MGSL_PARAMS __user *params);
494static int set_params(struct slgt_info *info, MGSL_PARAMS __user *params);
495static int get_txidle(struct slgt_info *info, int __user *idle_mode);
496static int set_txidle(struct slgt_info *info, int idle_mode);
497static int tx_enable(struct slgt_info *info, int enable);
498static int tx_abort(struct slgt_info *info);
499static int rx_enable(struct slgt_info *info, int enable);
500static int modem_input_wait(struct slgt_info *info,int arg);
501static int wait_mgsl_event(struct slgt_info *info, int __user *mask_ptr);
502static int tiocmget(struct tty_struct *tty, struct file *file);
503static int tiocmset(struct tty_struct *tty, struct file *file,
504 unsigned int set, unsigned int clear);
505static void set_break(struct tty_struct *tty, int break_state);
506static int get_interface(struct slgt_info *info, int __user *if_mode);
507static int set_interface(struct slgt_info *info, int if_mode);
508
509/*
510 * driver functions
511 */
512static void add_device(struct slgt_info *info);
513static void device_init(int adapter_num, struct pci_dev *pdev);
514static int claim_resources(struct slgt_info *info);
515static void release_resources(struct slgt_info *info);
516
517/*
518 * DEBUG OUTPUT CODE
519 */
520#ifndef DBGINFO
521#define DBGINFO(fmt)
522#endif
523#ifndef DBGERR
524#define DBGERR(fmt)
525#endif
526#ifndef DBGBH
527#define DBGBH(fmt)
528#endif
529#ifndef DBGISR
530#define DBGISR(fmt)
531#endif
532
533#ifdef DBGDATA
534static void trace_block(struct slgt_info *info, const char *data, int count, const char *label)
535{
536 int i;
537 int linecount;
538 printk("%s %s data:\n",info->device_name, label);
539 while(count) {
540 linecount = (count > 16) ? 16 : count;
541 for(i=0; i < linecount; i++)
542 printk("%02X ",(unsigned char)data[i]);
543 for(;i<17;i++)
544 printk(" ");
545 for(i=0;i<linecount;i++) {
546 if (data[i]>=040 && data[i]<=0176)
547 printk("%c",data[i]);
548 else
549 printk(".");
550 }
551 printk("\n");
552 data += linecount;
553 count -= linecount;
554 }
555}
556#else
557#define DBGDATA(info, buf, size, label)
558#endif
559
560#ifdef DBGTBUF
561static void dump_tbufs(struct slgt_info *info)
562{
563 int i;
564 printk("tbuf_current=%d\n", info->tbuf_current);
565 for (i=0 ; i < info->tbuf_count ; i++) {
566 printk("%d: count=%04X status=%04X\n",
567 i, le16_to_cpu(info->tbufs[i].count), le16_to_cpu(info->tbufs[i].status));
568 }
569}
570#else
571#define DBGTBUF(info)
572#endif
573
574#ifdef DBGRBUF
575static void dump_rbufs(struct slgt_info *info)
576{
577 int i;
578 printk("rbuf_current=%d\n", info->rbuf_current);
579 for (i=0 ; i < info->rbuf_count ; i++) {
580 printk("%d: count=%04X status=%04X\n",
581 i, le16_to_cpu(info->rbufs[i].count), le16_to_cpu(info->rbufs[i].status));
582 }
583}
584#else
585#define DBGRBUF(info)
586#endif
587
588static inline int sanity_check(struct slgt_info *info, char *devname, const char *name)
589{
590#ifdef SANITY_CHECK
591 if (!info) {
592 printk("null struct slgt_info for (%s) in %s\n", devname, name);
593 return 1;
594 }
595 if (info->magic != MGSL_MAGIC) {
596 printk("bad magic number struct slgt_info (%s) in %s\n", devname, name);
597 return 1;
598 }
599#else
600 if (!info)
601 return 1;
602#endif
603 return 0;
604}
605
606/**
607 * line discipline callback wrappers
608 *
609 * The wrappers maintain line discipline references
610 * while calling into the line discipline.
611 *
612 * ldisc_receive_buf - pass receive data to line discipline
613 */
614static void ldisc_receive_buf(struct tty_struct *tty,
615 const __u8 *data, char *flags, int count)
616{
617 struct tty_ldisc *ld;
618 if (!tty)
619 return;
620 ld = tty_ldisc_ref(tty);
621 if (ld) {
622 if (ld->receive_buf)
623 ld->receive_buf(tty, data, flags, count);
624 tty_ldisc_deref(ld);
625 }
626}
627
628/* tty callbacks */
629
630static int open(struct tty_struct *tty, struct file *filp)
631{
632 struct slgt_info *info;
633 int retval, line;
634 unsigned long flags;
635
636 line = tty->index;
637 if ((line < 0) || (line >= slgt_device_count)) {
638 DBGERR(("%s: open with invalid line #%d.\n", driver_name, line));
639 return -ENODEV;
640 }
641
642 info = slgt_device_list;
643 while(info && info->line != line)
644 info = info->next_device;
645 if (sanity_check(info, tty->name, "open"))
646 return -ENODEV;
647 if (info->init_error) {
648 DBGERR(("%s init error=%d\n", info->device_name, info->init_error));
649 return -ENODEV;
650 }
651
652 tty->driver_data = info;
653 info->tty = tty;
654
655 DBGINFO(("%s open, old ref count = %d\n", info->device_name, info->count));
656
657 /* If port is closing, signal caller to try again */
658 if (tty_hung_up_p(filp) || info->flags & ASYNC_CLOSING){
659 if (info->flags & ASYNC_CLOSING)
660 interruptible_sleep_on(&info->close_wait);
661 retval = ((info->flags & ASYNC_HUP_NOTIFY) ?
662 -EAGAIN : -ERESTARTSYS);
663 goto cleanup;
664 }
665
666 info->tty->low_latency = (info->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
667
668 spin_lock_irqsave(&info->netlock, flags);
669 if (info->netcount) {
670 retval = -EBUSY;
671 spin_unlock_irqrestore(&info->netlock, flags);
672 goto cleanup;
673 }
674 info->count++;
675 spin_unlock_irqrestore(&info->netlock, flags);
676
677 if (info->count == 1) {
678 /* 1st open on this device, init hardware */
679 retval = startup(info);
680 if (retval < 0)
681 goto cleanup;
682 }
683
684 retval = block_til_ready(tty, filp, info);
685 if (retval) {
686 DBGINFO(("%s block_til_ready rc=%d\n", info->device_name, retval));
687 goto cleanup;
688 }
689
690 retval = 0;
691
692cleanup:
693 if (retval) {
694 if (tty->count == 1)
695 info->tty = NULL; /* tty layer will release tty struct */
696 if(info->count)
697 info->count--;
698 }
699
700 DBGINFO(("%s open rc=%d\n", info->device_name, retval));
701 return retval;
702}
703
704static void close(struct tty_struct *tty, struct file *filp)
705{
706 struct slgt_info *info = tty->driver_data;
707
708 if (sanity_check(info, tty->name, "close"))
709 return;
710 DBGINFO(("%s close entry, count=%d\n", info->device_name, info->count));
711
712 if (!info->count)
713 return;
714
715 if (tty_hung_up_p(filp))
716 goto cleanup;
717
718 if ((tty->count == 1) && (info->count != 1)) {
719 /*
720 * tty->count is 1 and the tty structure will be freed.
721 * info->count should be one in this case.
722 * if it's not, correct it so that the port is shutdown.
723 */
724 DBGERR(("%s close: bad refcount; tty->count=1, "
725 "info->count=%d\n", info->device_name, info->count));
726 info->count = 1;
727 }
728
729 info->count--;
730
731 /* if at least one open remaining, leave hardware active */
732 if (info->count)
733 goto cleanup;
734
735 info->flags |= ASYNC_CLOSING;
736
737 /* set tty->closing to notify line discipline to
738 * only process XON/XOFF characters. Only the N_TTY
739 * discipline appears to use this (ppp does not).
740 */
741 tty->closing = 1;
742
743 /* wait for transmit data to clear all layers */
744
745 if (info->closing_wait != ASYNC_CLOSING_WAIT_NONE) {
746 DBGINFO(("%s call tty_wait_until_sent\n", info->device_name));
747 tty_wait_until_sent(tty, info->closing_wait);
748 }
749
750 if (info->flags & ASYNC_INITIALIZED)
751 wait_until_sent(tty, info->timeout);
752 if (tty->driver->flush_buffer)
753 tty->driver->flush_buffer(tty);
754 tty_ldisc_flush(tty);
755
756 shutdown(info);
757
758 tty->closing = 0;
759 info->tty = NULL;
760
761 if (info->blocked_open) {
762 if (info->close_delay) {
763 msleep_interruptible(jiffies_to_msecs(info->close_delay));
764 }
765 wake_up_interruptible(&info->open_wait);
766 }
767
768 info->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
769
770 wake_up_interruptible(&info->close_wait);
771
772cleanup:
773 DBGINFO(("%s close exit, count=%d\n", tty->driver->name, info->count));
774}
775
776static void hangup(struct tty_struct *tty)
777{
778 struct slgt_info *info = tty->driver_data;
779
780 if (sanity_check(info, tty->name, "hangup"))
781 return;
782 DBGINFO(("%s hangup\n", info->device_name));
783
784 flush_buffer(tty);
785 shutdown(info);
786
787 info->count = 0;
788 info->flags &= ~ASYNC_NORMAL_ACTIVE;
789 info->tty = NULL;
790
791 wake_up_interruptible(&info->open_wait);
792}
793
794static void set_termios(struct tty_struct *tty, struct termios *old_termios)
795{
796 struct slgt_info *info = tty->driver_data;
797 unsigned long flags;
798
799 DBGINFO(("%s set_termios\n", tty->driver->name));
800
801 /* just return if nothing has changed */
802 if ((tty->termios->c_cflag == old_termios->c_cflag)
803 && (RELEVANT_IFLAG(tty->termios->c_iflag)
804 == RELEVANT_IFLAG(old_termios->c_iflag)))
805 return;
806
807 change_params(info);
808
809 /* Handle transition to B0 status */
810 if (old_termios->c_cflag & CBAUD &&
811 !(tty->termios->c_cflag & CBAUD)) {
812 info->signals &= ~(SerialSignal_RTS + SerialSignal_DTR);
813 spin_lock_irqsave(&info->lock,flags);
814 set_signals(info);
815 spin_unlock_irqrestore(&info->lock,flags);
816 }
817
818 /* Handle transition away from B0 status */
819 if (!(old_termios->c_cflag & CBAUD) &&
820 tty->termios->c_cflag & CBAUD) {
821 info->signals |= SerialSignal_DTR;
822 if (!(tty->termios->c_cflag & CRTSCTS) ||
823 !test_bit(TTY_THROTTLED, &tty->flags)) {
824 info->signals |= SerialSignal_RTS;
825 }
826 spin_lock_irqsave(&info->lock,flags);
827 set_signals(info);
828 spin_unlock_irqrestore(&info->lock,flags);
829 }
830
831 /* Handle turning off CRTSCTS */
832 if (old_termios->c_cflag & CRTSCTS &&
833 !(tty->termios->c_cflag & CRTSCTS)) {
834 tty->hw_stopped = 0;
835 tx_release(tty);
836 }
837}
838
839static int write(struct tty_struct *tty,
840 const unsigned char *buf, int count)
841{
842 int ret = 0;
843 struct slgt_info *info = tty->driver_data;
844 unsigned long flags;
845
846 if (sanity_check(info, tty->name, "write"))
847 goto cleanup;
848 DBGINFO(("%s write count=%d\n", info->device_name, count));
849
850 if (!tty || !info->tx_buf)
851 goto cleanup;
852
853 if (count > info->max_frame_size) {
854 ret = -EIO;
855 goto cleanup;
856 }
857
858 if (!count)
859 goto cleanup;
860
861 if (info->params.mode == MGSL_MODE_RAW) {
862 unsigned int bufs_needed = (count/DMABUFSIZE);
863 unsigned int bufs_free = free_tbuf_count(info);
864 if (count % DMABUFSIZE)
865 ++bufs_needed;
866 if (bufs_needed > bufs_free)
867 goto cleanup;
868 } else {
869 if (info->tx_active)
870 goto cleanup;
871 if (info->tx_count) {
872 /* send accumulated data from send_char() calls */
873 /* as frame and wait before accepting more data. */
874 tx_load(info, info->tx_buf, info->tx_count);
875 goto start;
876 }
877 }
878
879 ret = info->tx_count = count;
880 tx_load(info, buf, count);
881 goto start;
882
883start:
884 if (info->tx_count && !tty->stopped && !tty->hw_stopped) {
885 spin_lock_irqsave(&info->lock,flags);
886 if (!info->tx_active)
887 tx_start(info);
888 spin_unlock_irqrestore(&info->lock,flags);
889 }
890
891cleanup:
892 DBGINFO(("%s write rc=%d\n", info->device_name, ret));
893 return ret;
894}
895
896static void put_char(struct tty_struct *tty, unsigned char ch)
897{
898 struct slgt_info *info = tty->driver_data;
899 unsigned long flags;
900
901 if (sanity_check(info, tty->name, "put_char"))
902 return;
903 DBGINFO(("%s put_char(%d)\n", info->device_name, ch));
904 if (!tty || !info->tx_buf)
905 return;
906 spin_lock_irqsave(&info->lock,flags);
907 if (!info->tx_active && (info->tx_count < info->max_frame_size))
908 info->tx_buf[info->tx_count++] = ch;
909 spin_unlock_irqrestore(&info->lock,flags);
910}
911
912static void send_xchar(struct tty_struct *tty, char ch)
913{
914 struct slgt_info *info = tty->driver_data;
915 unsigned long flags;
916
917 if (sanity_check(info, tty->name, "send_xchar"))
918 return;
919 DBGINFO(("%s send_xchar(%d)\n", info->device_name, ch));
920 info->x_char = ch;
921 if (ch) {
922 spin_lock_irqsave(&info->lock,flags);
923 if (!info->tx_enabled)
924 tx_start(info);
925 spin_unlock_irqrestore(&info->lock,flags);
926 }
927}
928
929static void wait_until_sent(struct tty_struct *tty, int timeout)
930{
931 struct slgt_info *info = tty->driver_data;
932 unsigned long orig_jiffies, char_time;
933
934 if (!info )
935 return;
936 if (sanity_check(info, tty->name, "wait_until_sent"))
937 return;
938 DBGINFO(("%s wait_until_sent entry\n", info->device_name));
939 if (!(info->flags & ASYNC_INITIALIZED))
940 goto exit;
941
942 orig_jiffies = jiffies;
943
944 /* Set check interval to 1/5 of estimated time to
945 * send a character, and make it at least 1. The check
946 * interval should also be less than the timeout.
947 * Note: use tight timings here to satisfy the NIST-PCTS.
948 */
949
950 if (info->params.data_rate) {
951 char_time = info->timeout/(32 * 5);
952 if (!char_time)
953 char_time++;
954 } else
955 char_time = 1;
956
957 if (timeout)
958 char_time = min_t(unsigned long, char_time, timeout);
959
960 while (info->tx_active) {
961 msleep_interruptible(jiffies_to_msecs(char_time));
962 if (signal_pending(current))
963 break;
964 if (timeout && time_after(jiffies, orig_jiffies + timeout))
965 break;
966 }
967
968exit:
969 DBGINFO(("%s wait_until_sent exit\n", info->device_name));
970}
971
972static int write_room(struct tty_struct *tty)
973{
974 struct slgt_info *info = tty->driver_data;
975 int ret;
976
977 if (sanity_check(info, tty->name, "write_room"))
978 return 0;
979 ret = (info->tx_active) ? 0 : HDLC_MAX_FRAME_SIZE;
980 DBGINFO(("%s write_room=%d\n", info->device_name, ret));
981 return ret;
982}
983
984static void flush_chars(struct tty_struct *tty)
985{
986 struct slgt_info *info = tty->driver_data;
987 unsigned long flags;
988
989 if (sanity_check(info, tty->name, "flush_chars"))
990 return;
991 DBGINFO(("%s flush_chars entry tx_count=%d\n", info->device_name, info->tx_count));
992
993 if (info->tx_count <= 0 || tty->stopped ||
994 tty->hw_stopped || !info->tx_buf)
995 return;
996
997 DBGINFO(("%s flush_chars start transmit\n", info->device_name));
998
999 spin_lock_irqsave(&info->lock,flags);
1000 if (!info->tx_active && info->tx_count) {
1001 tx_load(info, info->tx_buf,info->tx_count);
1002 tx_start(info);
1003 }
1004 spin_unlock_irqrestore(&info->lock,flags);
1005}
1006
1007static void flush_buffer(struct tty_struct *tty)
1008{
1009 struct slgt_info *info = tty->driver_data;
1010 unsigned long flags;
1011
1012 if (sanity_check(info, tty->name, "flush_buffer"))
1013 return;
1014 DBGINFO(("%s flush_buffer\n", info->device_name));
1015
1016 spin_lock_irqsave(&info->lock,flags);
1017 if (!info->tx_active)
1018 info->tx_count = 0;
1019 spin_unlock_irqrestore(&info->lock,flags);
1020
1021 wake_up_interruptible(&tty->write_wait);
1022 tty_wakeup(tty);
1023}
1024
1025/*
1026 * throttle (stop) transmitter
1027 */
1028static void tx_hold(struct tty_struct *tty)
1029{
1030 struct slgt_info *info = tty->driver_data;
1031 unsigned long flags;
1032
1033 if (sanity_check(info, tty->name, "tx_hold"))
1034 return;
1035 DBGINFO(("%s tx_hold\n", info->device_name));
1036 spin_lock_irqsave(&info->lock,flags);
1037 if (info->tx_enabled && info->params.mode == MGSL_MODE_ASYNC)
1038 tx_stop(info);
1039 spin_unlock_irqrestore(&info->lock,flags);
1040}
1041
1042/*
1043 * release (start) transmitter
1044 */
1045static void tx_release(struct tty_struct *tty)
1046{
1047 struct slgt_info *info = tty->driver_data;
1048 unsigned long flags;
1049
1050 if (sanity_check(info, tty->name, "tx_release"))
1051 return;
1052 DBGINFO(("%s tx_release\n", info->device_name));
1053 spin_lock_irqsave(&info->lock,flags);
1054 if (!info->tx_active && info->tx_count) {
1055 tx_load(info, info->tx_buf, info->tx_count);
1056 tx_start(info);
1057 }
1058 spin_unlock_irqrestore(&info->lock,flags);
1059}
1060
1061/*
1062 * Service an IOCTL request
1063 *
1064 * Arguments
1065 *
1066 * tty pointer to tty instance data
1067 * file pointer to associated file object for device
1068 * cmd IOCTL command code
1069 * arg command argument/context
1070 *
1071 * Return 0 if success, otherwise error code
1072 */
1073static int ioctl(struct tty_struct *tty, struct file *file,
1074 unsigned int cmd, unsigned long arg)
1075{
1076 struct slgt_info *info = tty->driver_data;
1077 struct mgsl_icount cnow; /* kernel counter temps */
1078 struct serial_icounter_struct __user *p_cuser; /* user space */
1079 unsigned long flags;
1080 void __user *argp = (void __user *)arg;
1081
1082 if (sanity_check(info, tty->name, "ioctl"))
1083 return -ENODEV;
1084 DBGINFO(("%s ioctl() cmd=%08X\n", info->device_name, cmd));
1085
1086 if ((cmd != TIOCGSERIAL) && (cmd != TIOCSSERIAL) &&
1087 (cmd != TIOCMIWAIT) && (cmd != TIOCGICOUNT)) {
1088 if (tty->flags & (1 << TTY_IO_ERROR))
1089 return -EIO;
1090 }
1091
1092 switch (cmd) {
1093 case MGSL_IOCGPARAMS:
1094 return get_params(info, argp);
1095 case MGSL_IOCSPARAMS:
1096 return set_params(info, argp);
1097 case MGSL_IOCGTXIDLE:
1098 return get_txidle(info, argp);
1099 case MGSL_IOCSTXIDLE:
1100 return set_txidle(info, (int)arg);
1101 case MGSL_IOCTXENABLE:
1102 return tx_enable(info, (int)arg);
1103 case MGSL_IOCRXENABLE:
1104 return rx_enable(info, (int)arg);
1105 case MGSL_IOCTXABORT:
1106 return tx_abort(info);
1107 case MGSL_IOCGSTATS:
1108 return get_stats(info, argp);
1109 case MGSL_IOCWAITEVENT:
1110 return wait_mgsl_event(info, argp);
1111 case TIOCMIWAIT:
1112 return modem_input_wait(info,(int)arg);
1113 case MGSL_IOCGIF:
1114 return get_interface(info, argp);
1115 case MGSL_IOCSIF:
1116 return set_interface(info,(int)arg);
1117 case TIOCGICOUNT:
1118 spin_lock_irqsave(&info->lock,flags);
1119 cnow = info->icount;
1120 spin_unlock_irqrestore(&info->lock,flags);
1121 p_cuser = argp;
1122 if (put_user(cnow.cts, &p_cuser->cts) ||
1123 put_user(cnow.dsr, &p_cuser->dsr) ||
1124 put_user(cnow.rng, &p_cuser->rng) ||
1125 put_user(cnow.dcd, &p_cuser->dcd) ||
1126 put_user(cnow.rx, &p_cuser->rx) ||
1127 put_user(cnow.tx, &p_cuser->tx) ||
1128 put_user(cnow.frame, &p_cuser->frame) ||
1129 put_user(cnow.overrun, &p_cuser->overrun) ||
1130 put_user(cnow.parity, &p_cuser->parity) ||
1131 put_user(cnow.brk, &p_cuser->brk) ||
1132 put_user(cnow.buf_overrun, &p_cuser->buf_overrun))
1133 return -EFAULT;
1134 return 0;
1135 default:
1136 return -ENOIOCTLCMD;
1137 }
1138 return 0;
1139}
1140
1141/*
1142 * proc fs support
1143 */
1144static inline int line_info(char *buf, struct slgt_info *info)
1145{
1146 char stat_buf[30];
1147 int ret;
1148 unsigned long flags;
1149
1150 ret = sprintf(buf, "%s: IO=%08X IRQ=%d MaxFrameSize=%u\n",
1151 info->device_name, info->phys_reg_addr,
1152 info->irq_level, info->max_frame_size);
1153
1154 /* output current serial signal states */
1155 spin_lock_irqsave(&info->lock,flags);
1156 get_signals(info);
1157 spin_unlock_irqrestore(&info->lock,flags);
1158
1159 stat_buf[0] = 0;
1160 stat_buf[1] = 0;
1161 if (info->signals & SerialSignal_RTS)
1162 strcat(stat_buf, "|RTS");
1163 if (info->signals & SerialSignal_CTS)
1164 strcat(stat_buf, "|CTS");
1165 if (info->signals & SerialSignal_DTR)
1166 strcat(stat_buf, "|DTR");
1167 if (info->signals & SerialSignal_DSR)
1168 strcat(stat_buf, "|DSR");
1169 if (info->signals & SerialSignal_DCD)
1170 strcat(stat_buf, "|CD");
1171 if (info->signals & SerialSignal_RI)
1172 strcat(stat_buf, "|RI");
1173
1174 if (info->params.mode != MGSL_MODE_ASYNC) {
1175 ret += sprintf(buf+ret, "\tHDLC txok:%d rxok:%d",
1176 info->icount.txok, info->icount.rxok);
1177 if (info->icount.txunder)
1178 ret += sprintf(buf+ret, " txunder:%d", info->icount.txunder);
1179 if (info->icount.txabort)
1180 ret += sprintf(buf+ret, " txabort:%d", info->icount.txabort);
1181 if (info->icount.rxshort)
1182 ret += sprintf(buf+ret, " rxshort:%d", info->icount.rxshort);
1183 if (info->icount.rxlong)
1184 ret += sprintf(buf+ret, " rxlong:%d", info->icount.rxlong);
1185 if (info->icount.rxover)
1186 ret += sprintf(buf+ret, " rxover:%d", info->icount.rxover);
1187 if (info->icount.rxcrc)
1188 ret += sprintf(buf+ret, " rxcrc:%d", info->icount.rxcrc);
1189 } else {
1190 ret += sprintf(buf+ret, "\tASYNC tx:%d rx:%d",
1191 info->icount.tx, info->icount.rx);
1192 if (info->icount.frame)
1193 ret += sprintf(buf+ret, " fe:%d", info->icount.frame);
1194 if (info->icount.parity)
1195 ret += sprintf(buf+ret, " pe:%d", info->icount.parity);
1196 if (info->icount.brk)
1197 ret += sprintf(buf+ret, " brk:%d", info->icount.brk);
1198 if (info->icount.overrun)
1199 ret += sprintf(buf+ret, " oe:%d", info->icount.overrun);
1200 }
1201
1202 /* Append serial signal status to end */
1203 ret += sprintf(buf+ret, " %s\n", stat_buf+1);
1204
1205 ret += sprintf(buf+ret, "\ttxactive=%d bh_req=%d bh_run=%d pending_bh=%x\n",
1206 info->tx_active,info->bh_requested,info->bh_running,
1207 info->pending_bh);
1208
1209 return ret;
1210}
1211
1212/* Called to print information about devices
1213 */
1214static int read_proc(char *page, char **start, off_t off, int count,
1215 int *eof, void *data)
1216{
1217 int len = 0, l;
1218 off_t begin = 0;
1219 struct slgt_info *info;
1220
1221 len += sprintf(page, "synclink_gt driver:%s\n", driver_version);
1222
1223 info = slgt_device_list;
1224 while( info ) {
1225 l = line_info(page + len, info);
1226 len += l;
1227 if (len+begin > off+count)
1228 goto done;
1229 if (len+begin < off) {
1230 begin += len;
1231 len = 0;
1232 }
1233 info = info->next_device;
1234 }
1235
1236 *eof = 1;
1237done:
1238 if (off >= len+begin)
1239 return 0;
1240 *start = page + (off-begin);
1241 return ((count < begin+len-off) ? count : begin+len-off);
1242}
1243
1244/*
1245 * return count of bytes in transmit buffer
1246 */
1247static int chars_in_buffer(struct tty_struct *tty)
1248{
1249 struct slgt_info *info = tty->driver_data;
1250 if (sanity_check(info, tty->name, "chars_in_buffer"))
1251 return 0;
1252 DBGINFO(("%s chars_in_buffer()=%d\n", info->device_name, info->tx_count));
1253 return info->tx_count;
1254}
1255
1256/*
1257 * signal remote device to throttle send data (our receive data)
1258 */
1259static void throttle(struct tty_struct * tty)
1260{
1261 struct slgt_info *info = tty->driver_data;
1262 unsigned long flags;
1263
1264 if (sanity_check(info, tty->name, "throttle"))
1265 return;
1266 DBGINFO(("%s throttle\n", info->device_name));
1267 if (I_IXOFF(tty))
1268 send_xchar(tty, STOP_CHAR(tty));
1269 if (tty->termios->c_cflag & CRTSCTS) {
1270 spin_lock_irqsave(&info->lock,flags);
1271 info->signals &= ~SerialSignal_RTS;
1272 set_signals(info);
1273 spin_unlock_irqrestore(&info->lock,flags);
1274 }
1275}
1276
1277/*
1278 * signal remote device to stop throttling send data (our receive data)
1279 */
1280static void unthrottle(struct tty_struct * tty)
1281{
1282 struct slgt_info *info = tty->driver_data;
1283 unsigned long flags;
1284
1285 if (sanity_check(info, tty->name, "unthrottle"))
1286 return;
1287 DBGINFO(("%s unthrottle\n", info->device_name));
1288 if (I_IXOFF(tty)) {
1289 if (info->x_char)
1290 info->x_char = 0;
1291 else
1292 send_xchar(tty, START_CHAR(tty));
1293 }
1294 if (tty->termios->c_cflag & CRTSCTS) {
1295 spin_lock_irqsave(&info->lock,flags);
1296 info->signals |= SerialSignal_RTS;
1297 set_signals(info);
1298 spin_unlock_irqrestore(&info->lock,flags);
1299 }
1300}
1301
1302/*
1303 * set or clear transmit break condition
1304 * break_state -1=set break condition, 0=clear
1305 */
1306static void set_break(struct tty_struct *tty, int break_state)
1307{
1308 struct slgt_info *info = tty->driver_data;
1309 unsigned short value;
1310 unsigned long flags;
1311
1312 if (sanity_check(info, tty->name, "set_break"))
1313 return;
1314 DBGINFO(("%s set_break(%d)\n", info->device_name, break_state));
1315
1316 spin_lock_irqsave(&info->lock,flags);
1317 value = rd_reg16(info, TCR);
1318 if (break_state == -1)
1319 value |= BIT6;
1320 else
1321 value &= ~BIT6;
1322 wr_reg16(info, TCR, value);
1323 spin_unlock_irqrestore(&info->lock,flags);
1324}
1325
1326#ifdef CONFIG_HDLC
1327
1328/**
1329 * called by generic HDLC layer when protocol selected (PPP, frame relay, etc.)
1330 * set encoding and frame check sequence (FCS) options
1331 *
1332 * dev pointer to network device structure
1333 * encoding serial encoding setting
1334 * parity FCS setting
1335 *
1336 * returns 0 if success, otherwise error code
1337 */
1338static int hdlcdev_attach(struct net_device *dev, unsigned short encoding,
1339 unsigned short parity)
1340{
1341 struct slgt_info *info = dev_to_port(dev);
1342 unsigned char new_encoding;
1343 unsigned short new_crctype;
1344
1345 /* return error if TTY interface open */
1346 if (info->count)
1347 return -EBUSY;
1348
1349 DBGINFO(("%s hdlcdev_attach\n", info->device_name));
1350
1351 switch (encoding)
1352 {
1353 case ENCODING_NRZ: new_encoding = HDLC_ENCODING_NRZ; break;
1354 case ENCODING_NRZI: new_encoding = HDLC_ENCODING_NRZI_SPACE; break;
1355 case ENCODING_FM_MARK: new_encoding = HDLC_ENCODING_BIPHASE_MARK; break;
1356 case ENCODING_FM_SPACE: new_encoding = HDLC_ENCODING_BIPHASE_SPACE; break;
1357 case ENCODING_MANCHESTER: new_encoding = HDLC_ENCODING_BIPHASE_LEVEL; break;
1358 default: return -EINVAL;
1359 }
1360
1361 switch (parity)
1362 {
1363 case PARITY_NONE: new_crctype = HDLC_CRC_NONE; break;
1364 case PARITY_CRC16_PR1_CCITT: new_crctype = HDLC_CRC_16_CCITT; break;
1365 case PARITY_CRC32_PR1_CCITT: new_crctype = HDLC_CRC_32_CCITT; break;
1366 default: return -EINVAL;
1367 }
1368
1369 info->params.encoding = new_encoding;
1370 info->params.crc_type = new_crctype;;
1371
1372 /* if network interface up, reprogram hardware */
1373 if (info->netcount)
1374 program_hw(info);
1375
1376 return 0;
1377}
1378
1379/**
1380 * called by generic HDLC layer to send frame
1381 *
1382 * skb socket buffer containing HDLC frame
1383 * dev pointer to network device structure
1384 *
1385 * returns 0 if success, otherwise error code
1386 */
1387static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev)
1388{
1389 struct slgt_info *info = dev_to_port(dev);
1390 struct net_device_stats *stats = hdlc_stats(dev);
1391 unsigned long flags;
1392
1393 DBGINFO(("%s hdlc_xmit\n", dev->name));
1394
1395 /* stop sending until this frame completes */
1396 netif_stop_queue(dev);
1397
1398 /* copy data to device buffers */
1399 info->tx_count = skb->len;
1400 tx_load(info, skb->data, skb->len);
1401
1402 /* update network statistics */
1403 stats->tx_packets++;
1404 stats->tx_bytes += skb->len;
1405
1406 /* done with socket buffer, so free it */
1407 dev_kfree_skb(skb);
1408
1409 /* save start time for transmit timeout detection */
1410 dev->trans_start = jiffies;
1411
1412 /* start hardware transmitter if necessary */
1413 spin_lock_irqsave(&info->lock,flags);
1414 if (!info->tx_active)
1415 tx_start(info);
1416 spin_unlock_irqrestore(&info->lock,flags);
1417
1418 return 0;
1419}
1420
1421/**
1422 * called by network layer when interface enabled
1423 * claim resources and initialize hardware
1424 *
1425 * dev pointer to network device structure
1426 *
1427 * returns 0 if success, otherwise error code
1428 */
1429static int hdlcdev_open(struct net_device *dev)
1430{
1431 struct slgt_info *info = dev_to_port(dev);
1432 int rc;
1433 unsigned long flags;
1434
1435 DBGINFO(("%s hdlcdev_open\n", dev->name));
1436
1437 /* generic HDLC layer open processing */
1438 if ((rc = hdlc_open(dev)))
1439 return rc;
1440
1441 /* arbitrate between network and tty opens */
1442 spin_lock_irqsave(&info->netlock, flags);
1443 if (info->count != 0 || info->netcount != 0) {
1444 DBGINFO(("%s hdlc_open busy\n", dev->name));
1445 spin_unlock_irqrestore(&info->netlock, flags);
1446 return -EBUSY;
1447 }
1448 info->netcount=1;
1449 spin_unlock_irqrestore(&info->netlock, flags);
1450
1451 /* claim resources and init adapter */
1452 if ((rc = startup(info)) != 0) {
1453 spin_lock_irqsave(&info->netlock, flags);
1454 info->netcount=0;
1455 spin_unlock_irqrestore(&info->netlock, flags);
1456 return rc;
1457 }
1458
1459 /* assert DTR and RTS, apply hardware settings */
1460 info->signals |= SerialSignal_RTS + SerialSignal_DTR;
1461 program_hw(info);
1462
1463 /* enable network layer transmit */
1464 dev->trans_start = jiffies;
1465 netif_start_queue(dev);
1466
1467 /* inform generic HDLC layer of current DCD status */
1468 spin_lock_irqsave(&info->lock, flags);
1469 get_signals(info);
1470 spin_unlock_irqrestore(&info->lock, flags);
1471 hdlc_set_carrier(info->signals & SerialSignal_DCD, dev);
1472
1473 return 0;
1474}
1475
1476/**
1477 * called by network layer when interface is disabled
1478 * shutdown hardware and release resources
1479 *
1480 * dev pointer to network device structure
1481 *
1482 * returns 0 if success, otherwise error code
1483 */
1484static int hdlcdev_close(struct net_device *dev)
1485{
1486 struct slgt_info *info = dev_to_port(dev);
1487 unsigned long flags;
1488
1489 DBGINFO(("%s hdlcdev_close\n", dev->name));
1490
1491 netif_stop_queue(dev);
1492
1493 /* shutdown adapter and release resources */
1494 shutdown(info);
1495
1496 hdlc_close(dev);
1497
1498 spin_lock_irqsave(&info->netlock, flags);
1499 info->netcount=0;
1500 spin_unlock_irqrestore(&info->netlock, flags);
1501
1502 return 0;
1503}
1504
1505/**
1506 * called by network layer to process IOCTL call to network device
1507 *
1508 * dev pointer to network device structure
1509 * ifr pointer to network interface request structure
1510 * cmd IOCTL command code
1511 *
1512 * returns 0 if success, otherwise error code
1513 */
1514static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1515{
1516 const size_t size = sizeof(sync_serial_settings);
1517 sync_serial_settings new_line;
1518 sync_serial_settings __user *line = ifr->ifr_settings.ifs_ifsu.sync;
1519 struct slgt_info *info = dev_to_port(dev);
1520 unsigned int flags;
1521
1522 DBGINFO(("%s hdlcdev_ioctl\n", dev->name));
1523
1524 /* return error if TTY interface open */
1525 if (info->count)
1526 return -EBUSY;
1527
1528 if (cmd != SIOCWANDEV)
1529 return hdlc_ioctl(dev, ifr, cmd);
1530
1531 switch(ifr->ifr_settings.type) {
1532 case IF_GET_IFACE: /* return current sync_serial_settings */
1533
1534 ifr->ifr_settings.type = IF_IFACE_SYNC_SERIAL;
1535 if (ifr->ifr_settings.size < size) {
1536 ifr->ifr_settings.size = size; /* data size wanted */
1537 return -ENOBUFS;
1538 }
1539
1540 flags = info->params.flags & (HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_RXC_DPLL |
1541 HDLC_FLAG_RXC_BRG | HDLC_FLAG_RXC_TXCPIN |
1542 HDLC_FLAG_TXC_TXCPIN | HDLC_FLAG_TXC_DPLL |
1543 HDLC_FLAG_TXC_BRG | HDLC_FLAG_TXC_RXCPIN);
1544
1545 switch (flags){
1546 case (HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_TXC_TXCPIN): new_line.clock_type = CLOCK_EXT; break;
1547 case (HDLC_FLAG_RXC_BRG | HDLC_FLAG_TXC_BRG): new_line.clock_type = CLOCK_INT; break;
1548 case (HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_TXC_BRG): new_line.clock_type = CLOCK_TXINT; break;
1549 case (HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_TXC_RXCPIN): new_line.clock_type = CLOCK_TXFROMRX; break;
1550 default: new_line.clock_type = CLOCK_DEFAULT;
1551 }
1552
1553 new_line.clock_rate = info->params.clock_speed;
1554 new_line.loopback = info->params.loopback ? 1:0;
1555
1556 if (copy_to_user(line, &new_line, size))
1557 return -EFAULT;
1558 return 0;
1559
1560 case IF_IFACE_SYNC_SERIAL: /* set sync_serial_settings */
1561
1562 if(!capable(CAP_NET_ADMIN))
1563 return -EPERM;
1564 if (copy_from_user(&new_line, line, size))
1565 return -EFAULT;
1566
1567 switch (new_line.clock_type)
1568 {
1569 case CLOCK_EXT: flags = HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_TXC_TXCPIN; break;
1570 case CLOCK_TXFROMRX: flags = HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_TXC_RXCPIN; break;
1571 case CLOCK_INT: flags = HDLC_FLAG_RXC_BRG | HDLC_FLAG_TXC_BRG; break;
1572 case CLOCK_TXINT: flags = HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_TXC_BRG; break;
1573 case CLOCK_DEFAULT: flags = info->params.flags &
1574 (HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_RXC_DPLL |
1575 HDLC_FLAG_RXC_BRG | HDLC_FLAG_RXC_TXCPIN |
1576 HDLC_FLAG_TXC_TXCPIN | HDLC_FLAG_TXC_DPLL |
1577 HDLC_FLAG_TXC_BRG | HDLC_FLAG_TXC_RXCPIN); break;
1578 default: return -EINVAL;
1579 }
1580
1581 if (new_line.loopback != 0 && new_line.loopback != 1)
1582 return -EINVAL;
1583
1584 info->params.flags &= ~(HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_RXC_DPLL |
1585 HDLC_FLAG_RXC_BRG | HDLC_FLAG_RXC_TXCPIN |
1586 HDLC_FLAG_TXC_TXCPIN | HDLC_FLAG_TXC_DPLL |
1587 HDLC_FLAG_TXC_BRG | HDLC_FLAG_TXC_RXCPIN);
1588 info->params.flags |= flags;
1589
1590 info->params.loopback = new_line.loopback;
1591
1592 if (flags & (HDLC_FLAG_RXC_BRG | HDLC_FLAG_TXC_BRG))
1593 info->params.clock_speed = new_line.clock_rate;
1594 else
1595 info->params.clock_speed = 0;
1596
1597 /* if network interface up, reprogram hardware */
1598 if (info->netcount)
1599 program_hw(info);
1600 return 0;
1601
1602 default:
1603 return hdlc_ioctl(dev, ifr, cmd);
1604 }
1605}
1606
1607/**
1608 * called by network layer when transmit timeout is detected
1609 *
1610 * dev pointer to network device structure
1611 */
1612static void hdlcdev_tx_timeout(struct net_device *dev)
1613{
1614 struct slgt_info *info = dev_to_port(dev);
1615 struct net_device_stats *stats = hdlc_stats(dev);
1616 unsigned long flags;
1617
1618 DBGINFO(("%s hdlcdev_tx_timeout\n", dev->name));
1619
1620 stats->tx_errors++;
1621 stats->tx_aborted_errors++;
1622
1623 spin_lock_irqsave(&info->lock,flags);
1624 tx_stop(info);
1625 spin_unlock_irqrestore(&info->lock,flags);
1626
1627 netif_wake_queue(dev);
1628}
1629
1630/**
1631 * called by device driver when transmit completes
1632 * reenable network layer transmit if stopped
1633 *
1634 * info pointer to device instance information
1635 */
1636static void hdlcdev_tx_done(struct slgt_info *info)
1637{
1638 if (netif_queue_stopped(info->netdev))
1639 netif_wake_queue(info->netdev);
1640}
1641
1642/**
1643 * called by device driver when frame received
1644 * pass frame to network layer
1645 *
1646 * info pointer to device instance information
1647 * buf pointer to buffer contianing frame data
1648 * size count of data bytes in buf
1649 */
1650static void hdlcdev_rx(struct slgt_info *info, char *buf, int size)
1651{
1652 struct sk_buff *skb = dev_alloc_skb(size);
1653 struct net_device *dev = info->netdev;
1654 struct net_device_stats *stats = hdlc_stats(dev);
1655
1656 DBGINFO(("%s hdlcdev_rx\n", dev->name));
1657
1658 if (skb == NULL) {
1659 DBGERR(("%s: can't alloc skb, drop packet\n", dev->name));
1660 stats->rx_dropped++;
1661 return;
1662 }
1663
1664 memcpy(skb_put(skb, size),buf,size);
1665
1666 skb->protocol = hdlc_type_trans(skb, info->netdev);
1667
1668 stats->rx_packets++;
1669 stats->rx_bytes += size;
1670
1671 netif_rx(skb);
1672
1673 info->netdev->last_rx = jiffies;
1674}
1675
1676/**
1677 * called by device driver when adding device instance
1678 * do generic HDLC initialization
1679 *
1680 * info pointer to device instance information
1681 *
1682 * returns 0 if success, otherwise error code
1683 */
1684static int hdlcdev_init(struct slgt_info *info)
1685{
1686 int rc;
1687 struct net_device *dev;
1688 hdlc_device *hdlc;
1689
1690 /* allocate and initialize network and HDLC layer objects */
1691
1692 if (!(dev = alloc_hdlcdev(info))) {
1693 printk(KERN_ERR "%s hdlc device alloc failure\n", info->device_name);
1694 return -ENOMEM;
1695 }
1696
1697 /* for network layer reporting purposes only */
1698 dev->mem_start = info->phys_reg_addr;
1699 dev->mem_end = info->phys_reg_addr + SLGT_REG_SIZE - 1;
1700 dev->irq = info->irq_level;
1701
1702 /* network layer callbacks and settings */
1703 dev->do_ioctl = hdlcdev_ioctl;
1704 dev->open = hdlcdev_open;
1705 dev->stop = hdlcdev_close;
1706 dev->tx_timeout = hdlcdev_tx_timeout;
1707 dev->watchdog_timeo = 10*HZ;
1708 dev->tx_queue_len = 50;
1709
1710 /* generic HDLC layer callbacks and settings */
1711 hdlc = dev_to_hdlc(dev);
1712 hdlc->attach = hdlcdev_attach;
1713 hdlc->xmit = hdlcdev_xmit;
1714
1715 /* register objects with HDLC layer */
1716 if ((rc = register_hdlc_device(dev))) {
1717 printk(KERN_WARNING "%s:unable to register hdlc device\n",__FILE__);
1718 free_netdev(dev);
1719 return rc;
1720 }
1721
1722 info->netdev = dev;
1723 return 0;
1724}
1725
1726/**
1727 * called by device driver when removing device instance
1728 * do generic HDLC cleanup
1729 *
1730 * info pointer to device instance information
1731 */
1732static void hdlcdev_exit(struct slgt_info *info)
1733{
1734 unregister_hdlc_device(info->netdev);
1735 free_netdev(info->netdev);
1736 info->netdev = NULL;
1737}
1738
1739#endif /* ifdef CONFIG_HDLC */
1740
1741/*
1742 * get async data from rx DMA buffers
1743 */
1744static void rx_async(struct slgt_info *info)
1745{
1746 struct tty_struct *tty = info->tty;
1747 struct mgsl_icount *icount = &info->icount;
1748 unsigned int start, end;
1749 unsigned char *p;
1750 unsigned char status;
1751 struct slgt_desc *bufs = info->rbufs;
1752 int i, count;
1753
1754 start = end = info->rbuf_current;
1755
1756 while(desc_complete(bufs[end])) {
1757 count = desc_count(bufs[end]) - info->rbuf_index;
1758 p = bufs[end].buf + info->rbuf_index;
1759
1760 DBGISR(("%s rx_async count=%d\n", info->device_name, count));
1761 DBGDATA(info, p, count, "rx");
1762
1763 for(i=0 ; i < count; i+=2, p+=2) {
1764 if (tty) {
1765 if (tty->flip.count >= TTY_FLIPBUF_SIZE)
1766 tty_flip_buffer_push(tty);
1767 if (tty->flip.count >= TTY_FLIPBUF_SIZE)
1768 break;
1769 *tty->flip.char_buf_ptr = *p;
1770 *tty->flip.flag_buf_ptr = 0;
1771 }
1772 icount->rx++;
1773
1774 if ((status = *(p+1) & (BIT9 + BIT8))) {
1775 if (status & BIT9)
1776 icount->parity++;
1777 else if (status & BIT8)
1778 icount->frame++;
1779 /* discard char if tty control flags say so */
1780 if (status & info->ignore_status_mask)
1781 continue;
1782 if (tty) {
1783 if (status & BIT9)
1784 *tty->flip.flag_buf_ptr = TTY_PARITY;
1785 else if (status & BIT8)
1786 *tty->flip.flag_buf_ptr = TTY_FRAME;
1787 }
1788 }
1789 if (tty) {
1790 tty->flip.flag_buf_ptr++;
1791 tty->flip.char_buf_ptr++;
1792 tty->flip.count++;
1793 }
1794 }
1795
1796 if (i < count) {
1797 /* receive buffer not completed */
1798 info->rbuf_index += i;
1799 info->rx_timer.expires = jiffies + 1;
1800 add_timer(&info->rx_timer);
1801 break;
1802 }
1803
1804 info->rbuf_index = 0;
1805 free_rbufs(info, end, end);
1806
1807 if (++end == info->rbuf_count)
1808 end = 0;
1809
1810 /* if entire list searched then no frame available */
1811 if (end == start)
1812 break;
1813 }
1814
1815 if (tty && tty->flip.count)
1816 tty_flip_buffer_push(tty);
1817}
1818
1819/*
1820 * return next bottom half action to perform
1821 */
1822static int bh_action(struct slgt_info *info)
1823{
1824 unsigned long flags;
1825 int rc;
1826
1827 spin_lock_irqsave(&info->lock,flags);
1828
1829 if (info->pending_bh & BH_RECEIVE) {
1830 info->pending_bh &= ~BH_RECEIVE;
1831 rc = BH_RECEIVE;
1832 } else if (info->pending_bh & BH_TRANSMIT) {
1833 info->pending_bh &= ~BH_TRANSMIT;
1834 rc = BH_TRANSMIT;
1835 } else if (info->pending_bh & BH_STATUS) {
1836 info->pending_bh &= ~BH_STATUS;
1837 rc = BH_STATUS;
1838 } else {
1839 /* Mark BH routine as complete */
1840 info->bh_running = 0;
1841 info->bh_requested = 0;
1842 rc = 0;
1843 }
1844
1845 spin_unlock_irqrestore(&info->lock,flags);
1846
1847 return rc;
1848}
1849
1850/*
1851 * perform bottom half processing
1852 */
1853static void bh_handler(void* context)
1854{
1855 struct slgt_info *info = context;
1856 int action;
1857
1858 if (!info)
1859 return;
1860 info->bh_running = 1;
1861
1862 while((action = bh_action(info))) {
1863 switch (action) {
1864 case BH_RECEIVE:
1865 DBGBH(("%s bh receive\n", info->device_name));
1866 switch(info->params.mode) {
1867 case MGSL_MODE_ASYNC:
1868 rx_async(info);
1869 break;
1870 case MGSL_MODE_HDLC:
1871 while(rx_get_frame(info));
1872 break;
1873 case MGSL_MODE_RAW:
1874 while(rx_get_buf(info));
1875 break;
1876 }
1877 /* restart receiver if rx DMA buffers exhausted */
1878 if (info->rx_restart)
1879 rx_start(info);
1880 break;
1881 case BH_TRANSMIT:
1882 bh_transmit(info);
1883 break;
1884 case BH_STATUS:
1885 DBGBH(("%s bh status\n", info->device_name));
1886 info->ri_chkcount = 0;
1887 info->dsr_chkcount = 0;
1888 info->dcd_chkcount = 0;
1889 info->cts_chkcount = 0;
1890 break;
1891 default:
1892 DBGBH(("%s unknown action\n", info->device_name));
1893 break;
1894 }
1895 }
1896 DBGBH(("%s bh_handler exit\n", info->device_name));
1897}
1898
1899static void bh_transmit(struct slgt_info *info)
1900{
1901 struct tty_struct *tty = info->tty;
1902
1903 DBGBH(("%s bh_transmit\n", info->device_name));
1904 if (tty) {
1905 tty_wakeup(tty);
1906 wake_up_interruptible(&tty->write_wait);
1907 }
1908}
1909
1910static void dsr_change(struct slgt_info *info)
1911{
1912 get_signals(info);
1913 DBGISR(("dsr_change %s signals=%04X\n", info->device_name, info->signals));
1914 if ((info->dsr_chkcount)++ == IO_PIN_SHUTDOWN_LIMIT) {
1915 slgt_irq_off(info, IRQ_DSR);
1916 return;
1917 }
1918 info->icount.dsr++;
1919 if (info->signals & SerialSignal_DSR)
1920 info->input_signal_events.dsr_up++;
1921 else
1922 info->input_signal_events.dsr_down++;
1923 wake_up_interruptible(&info->status_event_wait_q);
1924 wake_up_interruptible(&info->event_wait_q);
1925 info->pending_bh |= BH_STATUS;
1926}
1927
1928static void cts_change(struct slgt_info *info)
1929{
1930 get_signals(info);
1931 DBGISR(("cts_change %s signals=%04X\n", info->device_name, info->signals));
1932 if ((info->cts_chkcount)++ == IO_PIN_SHUTDOWN_LIMIT) {
1933 slgt_irq_off(info, IRQ_CTS);
1934 return;
1935 }
1936 info->icount.cts++;
1937 if (info->signals & SerialSignal_CTS)
1938 info->input_signal_events.cts_up++;
1939 else
1940 info->input_signal_events.cts_down++;
1941 wake_up_interruptible(&info->status_event_wait_q);
1942 wake_up_interruptible(&info->event_wait_q);
1943 info->pending_bh |= BH_STATUS;
1944
1945 if (info->flags & ASYNC_CTS_FLOW) {
1946 if (info->tty) {
1947 if (info->tty->hw_stopped) {
1948 if (info->signals & SerialSignal_CTS) {
1949 info->tty->hw_stopped = 0;
1950 info->pending_bh |= BH_TRANSMIT;
1951 return;
1952 }
1953 } else {
1954 if (!(info->signals & SerialSignal_CTS))
1955 info->tty->hw_stopped = 1;
1956 }
1957 }
1958 }
1959}
1960
1961static void dcd_change(struct slgt_info *info)
1962{
1963 get_signals(info);
1964 DBGISR(("dcd_change %s signals=%04X\n", info->device_name, info->signals));
1965 if ((info->dcd_chkcount)++ == IO_PIN_SHUTDOWN_LIMIT) {
1966 slgt_irq_off(info, IRQ_DCD);
1967 return;
1968 }
1969 info->icount.dcd++;
1970 if (info->signals & SerialSignal_DCD) {
1971 info->input_signal_events.dcd_up++;
1972 } else {
1973 info->input_signal_events.dcd_down++;
1974 }
1975#ifdef CONFIG_HDLC
1976 if (info->netcount)
1977 hdlc_set_carrier(info->signals & SerialSignal_DCD, info->netdev);
1978#endif
1979 wake_up_interruptible(&info->status_event_wait_q);
1980 wake_up_interruptible(&info->event_wait_q);
1981 info->pending_bh |= BH_STATUS;
1982
1983 if (info->flags & ASYNC_CHECK_CD) {
1984 if (info->signals & SerialSignal_DCD)
1985 wake_up_interruptible(&info->open_wait);
1986 else {
1987 if (info->tty)
1988 tty_hangup(info->tty);
1989 }
1990 }
1991}
1992
1993static void ri_change(struct slgt_info *info)
1994{
1995 get_signals(info);
1996 DBGISR(("ri_change %s signals=%04X\n", info->device_name, info->signals));
1997 if ((info->ri_chkcount)++ == IO_PIN_SHUTDOWN_LIMIT) {
1998 slgt_irq_off(info, IRQ_RI);
1999 return;
2000 }
2001 info->icount.dcd++;
2002 if (info->signals & SerialSignal_RI) {
2003 info->input_signal_events.ri_up++;
2004 } else {
2005 info->input_signal_events.ri_down++;
2006 }
2007 wake_up_interruptible(&info->status_event_wait_q);
2008 wake_up_interruptible(&info->event_wait_q);
2009 info->pending_bh |= BH_STATUS;
2010}
2011
2012static void isr_serial(struct slgt_info *info)
2013{
2014 unsigned short status = rd_reg16(info, SSR);
2015
2016 DBGISR(("%s isr_serial status=%04X\n", info->device_name, status));
2017
2018 wr_reg16(info, SSR, status); /* clear pending */
2019
2020 info->irq_occurred = 1;
2021
2022 if (info->params.mode == MGSL_MODE_ASYNC) {
2023 if (status & IRQ_TXIDLE) {
2024 if (info->tx_count)
2025 isr_txeom(info, status);
2026 }
2027 if ((status & IRQ_RXBREAK) && (status & RXBREAK)) {
2028 info->icount.brk++;
2029 /* process break detection if tty control allows */
2030 if (info->tty) {
2031 if (!(status & info->ignore_status_mask)) {
2032 if (info->read_status_mask & MASK_BREAK) {
2033 *info->tty->flip.flag_buf_ptr = TTY_BREAK;
2034 if (info->flags & ASYNC_SAK)
2035 do_SAK(info->tty);
2036 }
2037 }
2038 }
2039 }
2040 } else {
2041 if (status & (IRQ_TXIDLE + IRQ_TXUNDER))
2042 isr_txeom(info, status);
2043
2044 if (status & IRQ_RXIDLE) {
2045 if (status & RXIDLE)
2046 info->icount.rxidle++;
2047 else
2048 info->icount.exithunt++;
2049 wake_up_interruptible(&info->event_wait_q);
2050 }
2051
2052 if (status & IRQ_RXOVER)
2053 rx_start(info);
2054 }
2055
2056 if (status & IRQ_DSR)
2057 dsr_change(info);
2058 if (status & IRQ_CTS)
2059 cts_change(info);
2060 if (status & IRQ_DCD)
2061 dcd_change(info);
2062 if (status & IRQ_RI)
2063 ri_change(info);
2064}
2065
2066static void isr_rdma(struct slgt_info *info)
2067{
2068 unsigned int status = rd_reg32(info, RDCSR);
2069
2070 DBGISR(("%s isr_rdma status=%08x\n", info->device_name, status));
2071
2072 /* RDCSR (rx DMA control/status)
2073 *
2074 * 31..07 reserved
2075 * 06 save status byte to DMA buffer
2076 * 05 error
2077 * 04 eol (end of list)
2078 * 03 eob (end of buffer)
2079 * 02 IRQ enable
2080 * 01 reset
2081 * 00 enable
2082 */
2083 wr_reg32(info, RDCSR, status); /* clear pending */
2084
2085 if (status & (BIT5 + BIT4)) {
2086 DBGISR(("%s isr_rdma rx_restart=1\n", info->device_name));
2087 info->rx_restart = 1;
2088 }
2089 info->pending_bh |= BH_RECEIVE;
2090}
2091
2092static void isr_tdma(struct slgt_info *info)
2093{
2094 unsigned int status = rd_reg32(info, TDCSR);
2095
2096 DBGISR(("%s isr_tdma status=%08x\n", info->device_name, status));
2097
2098 /* TDCSR (tx DMA control/status)
2099 *
2100 * 31..06 reserved
2101 * 05 error
2102 * 04 eol (end of list)
2103 * 03 eob (end of buffer)
2104 * 02 IRQ enable
2105 * 01 reset
2106 * 00 enable
2107 */
2108 wr_reg32(info, TDCSR, status); /* clear pending */
2109
2110 if (status & (BIT5 + BIT4 + BIT3)) {
2111 // another transmit buffer has completed
2112 // run bottom half to get more send data from user
2113 info->pending_bh |= BH_TRANSMIT;
2114 }
2115}
2116
2117static void isr_txeom(struct slgt_info *info, unsigned short status)
2118{
2119 DBGISR(("%s txeom status=%04x\n", info->device_name, status));
2120
2121 slgt_irq_off(info, IRQ_TXDATA + IRQ_TXIDLE + IRQ_TXUNDER);
2122 tdma_reset(info);
2123 reset_tbufs(info);
2124 if (status & IRQ_TXUNDER) {
2125 unsigned short val = rd_reg16(info, TCR);
2126 wr_reg16(info, TCR, (unsigned short)(val | BIT2)); /* set reset bit */
2127 wr_reg16(info, TCR, val); /* clear reset bit */
2128 }
2129
2130 if (info->tx_active) {
2131 if (info->params.mode != MGSL_MODE_ASYNC) {
2132 if (status & IRQ_TXUNDER)
2133 info->icount.txunder++;
2134 else if (status & IRQ_TXIDLE)
2135 info->icount.txok++;
2136 }
2137
2138 info->tx_active = 0;
2139 info->tx_count = 0;
2140
2141 del_timer(&info->tx_timer);
2142
2143 if (info->params.mode != MGSL_MODE_ASYNC && info->drop_rts_on_tx_done) {
2144 info->signals &= ~SerialSignal_RTS;
2145 info->drop_rts_on_tx_done = 0;
2146 set_signals(info);
2147 }
2148
2149#ifdef CONFIG_HDLC
2150 if (info->netcount)
2151 hdlcdev_tx_done(info);
2152 else
2153#endif
2154 {
2155 if (info->tty && (info->tty->stopped || info->tty->hw_stopped)) {
2156 tx_stop(info);
2157 return;
2158 }
2159 info->pending_bh |= BH_TRANSMIT;
2160 }
2161 }
2162}
2163
2164/* interrupt service routine
2165 *
2166 * irq interrupt number
2167 * dev_id device ID supplied during interrupt registration
2168 * regs interrupted processor context
2169 */
2170static irqreturn_t slgt_interrupt(int irq, void *dev_id, struct pt_regs * regs)
2171{
2172 struct slgt_info *info;
2173 unsigned int gsr;
2174 unsigned int i;
2175
2176 DBGISR(("slgt_interrupt irq=%d entry\n", irq));
2177
2178 info = dev_id;
2179 if (!info)
2180 return IRQ_NONE;
2181
2182 spin_lock(&info->lock);
2183
2184 while((gsr = rd_reg32(info, GSR) & 0xffffff00)) {
2185 DBGISR(("%s gsr=%08x\n", info->device_name, gsr));
2186 info->irq_occurred = 1;
2187 for(i=0; i < info->port_count ; i++) {
2188 if (info->port_array[i] == NULL)
2189 continue;
2190 if (gsr & (BIT8 << i))
2191 isr_serial(info->port_array[i]);
2192 if (gsr & (BIT16 << (i*2)))
2193 isr_rdma(info->port_array[i]);
2194 if (gsr & (BIT17 << (i*2)))
2195 isr_tdma(info->port_array[i]);
2196 }
2197 }
2198
2199 for(i=0; i < info->port_count ; i++) {
2200 struct slgt_info *port = info->port_array[i];
2201
2202 if (port && (port->count || port->netcount) &&
2203 port->pending_bh && !port->bh_running &&
2204 !port->bh_requested) {
2205 DBGISR(("%s bh queued\n", port->device_name));
2206 schedule_work(&port->task);
2207 port->bh_requested = 1;
2208 }
2209 }
2210
2211 spin_unlock(&info->lock);
2212
2213 DBGISR(("slgt_interrupt irq=%d exit\n", irq));
2214 return IRQ_HANDLED;
2215}
2216
2217static int startup(struct slgt_info *info)
2218{
2219 DBGINFO(("%s startup\n", info->device_name));
2220
2221 if (info->flags & ASYNC_INITIALIZED)
2222 return 0;
2223
2224 if (!info->tx_buf) {
2225 info->tx_buf = kmalloc(info->max_frame_size, GFP_KERNEL);
2226 if (!info->tx_buf) {
2227 DBGERR(("%s can't allocate tx buffer\n", info->device_name));
2228 return -ENOMEM;
2229 }
2230 }
2231
2232 info->pending_bh = 0;
2233
2234 memset(&info->icount, 0, sizeof(info->icount));
2235
2236 /* program hardware for current parameters */
2237 change_params(info);
2238
2239 if (info->tty)
2240 clear_bit(TTY_IO_ERROR, &info->tty->flags);
2241
2242 info->flags |= ASYNC_INITIALIZED;
2243
2244 return 0;
2245}
2246
2247/*
2248 * called by close() and hangup() to shutdown hardware
2249 */
2250static void shutdown(struct slgt_info *info)
2251{
2252 unsigned long flags;
2253
2254 if (!(info->flags & ASYNC_INITIALIZED))
2255 return;
2256
2257 DBGINFO(("%s shutdown\n", info->device_name));
2258
2259 /* clear status wait queue because status changes */
2260 /* can't happen after shutting down the hardware */
2261 wake_up_interruptible(&info->status_event_wait_q);
2262 wake_up_interruptible(&info->event_wait_q);
2263
2264 del_timer_sync(&info->tx_timer);
2265 del_timer_sync(&info->rx_timer);
2266
2267 kfree(info->tx_buf);
2268 info->tx_buf = NULL;
2269
2270 spin_lock_irqsave(&info->lock,flags);
2271
2272 tx_stop(info);
2273 rx_stop(info);
2274
2275 slgt_irq_off(info, IRQ_ALL | IRQ_MASTER);
2276
2277 if (!info->tty || info->tty->termios->c_cflag & HUPCL) {
2278 info->signals &= ~(SerialSignal_DTR + SerialSignal_RTS);
2279 set_signals(info);
2280 }
2281
2282 spin_unlock_irqrestore(&info->lock,flags);
2283
2284 if (info->tty)
2285 set_bit(TTY_IO_ERROR, &info->tty->flags);
2286
2287 info->flags &= ~ASYNC_INITIALIZED;
2288}
2289
2290static void program_hw(struct slgt_info *info)
2291{
2292 unsigned long flags;
2293
2294 spin_lock_irqsave(&info->lock,flags);
2295
2296 rx_stop(info);
2297 tx_stop(info);
2298
2299 if (info->params.mode == MGSL_MODE_HDLC ||
2300 info->params.mode == MGSL_MODE_RAW ||
2301 info->netcount)
2302 hdlc_mode(info);
2303 else
2304 async_mode(info);
2305
2306 set_signals(info);
2307
2308 info->dcd_chkcount = 0;
2309 info->cts_chkcount = 0;
2310 info->ri_chkcount = 0;
2311 info->dsr_chkcount = 0;
2312
2313 slgt_irq_on(info, IRQ_DCD | IRQ_CTS | IRQ_DSR);
2314 get_signals(info);
2315
2316 if (info->netcount ||
2317 (info->tty && info->tty->termios->c_cflag & CREAD))
2318 rx_start(info);
2319
2320 spin_unlock_irqrestore(&info->lock,flags);
2321}
2322
2323/*
2324 * reconfigure adapter based on new parameters
2325 */
2326static void change_params(struct slgt_info *info)
2327{
2328 unsigned cflag;
2329 int bits_per_char;
2330
2331 if (!info->tty || !info->tty->termios)
2332 return;
2333 DBGINFO(("%s change_params\n", info->device_name));
2334
2335 cflag = info->tty->termios->c_cflag;
2336
2337 /* if B0 rate (hangup) specified then negate DTR and RTS */
2338 /* otherwise assert DTR and RTS */
2339 if (cflag & CBAUD)
2340 info->signals |= SerialSignal_RTS + SerialSignal_DTR;
2341 else
2342 info->signals &= ~(SerialSignal_RTS + SerialSignal_DTR);
2343
2344 /* byte size and parity */
2345
2346 switch (cflag & CSIZE) {
2347 case CS5: info->params.data_bits = 5; break;
2348 case CS6: info->params.data_bits = 6; break;
2349 case CS7: info->params.data_bits = 7; break;
2350 case CS8: info->params.data_bits = 8; break;
2351 default: info->params.data_bits = 7; break;
2352 }
2353
2354 info->params.stop_bits = (cflag & CSTOPB) ? 2 : 1;
2355
2356 if (cflag & PARENB)
2357 info->params.parity = (cflag & PARODD) ? ASYNC_PARITY_ODD : ASYNC_PARITY_EVEN;
2358 else
2359 info->params.parity = ASYNC_PARITY_NONE;
2360
2361 /* calculate number of jiffies to transmit a full
2362 * FIFO (32 bytes) at specified data rate
2363 */
2364 bits_per_char = info->params.data_bits +
2365 info->params.stop_bits + 1;
2366
2367 info->params.data_rate = tty_get_baud_rate(info->tty);
2368
2369 if (info->params.data_rate) {
2370 info->timeout = (32*HZ*bits_per_char) /
2371 info->params.data_rate;
2372 }
2373 info->timeout += HZ/50; /* Add .02 seconds of slop */
2374
2375 if (cflag & CRTSCTS)
2376 info->flags |= ASYNC_CTS_FLOW;
2377 else
2378 info->flags &= ~ASYNC_CTS_FLOW;
2379
2380 if (cflag & CLOCAL)
2381 info->flags &= ~ASYNC_CHECK_CD;
2382 else
2383 info->flags |= ASYNC_CHECK_CD;
2384
2385 /* process tty input control flags */
2386
2387 info->read_status_mask = IRQ_RXOVER;
2388 if (I_INPCK(info->tty))
2389 info->read_status_mask |= MASK_PARITY | MASK_FRAMING;
2390 if (I_BRKINT(info->tty) || I_PARMRK(info->tty))
2391 info->read_status_mask |= MASK_BREAK;
2392 if (I_IGNPAR(info->tty))
2393 info->ignore_status_mask |= MASK_PARITY | MASK_FRAMING;
2394 if (I_IGNBRK(info->tty)) {
2395 info->ignore_status_mask |= MASK_BREAK;
2396 /* If ignoring parity and break indicators, ignore
2397 * overruns too. (For real raw support).
2398 */
2399 if (I_IGNPAR(info->tty))
2400 info->ignore_status_mask |= MASK_OVERRUN;
2401 }
2402
2403 program_hw(info);
2404}
2405
2406static int get_stats(struct slgt_info *info, struct mgsl_icount __user *user_icount)
2407{
2408 DBGINFO(("%s get_stats\n", info->device_name));
2409 if (!user_icount) {
2410 memset(&info->icount, 0, sizeof(info->icount));
2411 } else {
2412 if (copy_to_user(user_icount, &info->icount, sizeof(struct mgsl_icount)))
2413 return -EFAULT;
2414 }
2415 return 0;
2416}
2417
2418static int get_params(struct slgt_info *info, MGSL_PARAMS __user *user_params)
2419{
2420 DBGINFO(("%s get_params\n", info->device_name));
2421 if (copy_to_user(user_params, &info->params, sizeof(MGSL_PARAMS)))
2422 return -EFAULT;
2423 return 0;
2424}
2425
2426static int set_params(struct slgt_info *info, MGSL_PARAMS __user *new_params)
2427{
2428 unsigned long flags;
2429 MGSL_PARAMS tmp_params;
2430
2431 DBGINFO(("%s set_params\n", info->device_name));
2432 if (copy_from_user(&tmp_params, new_params, sizeof(MGSL_PARAMS)))
2433 return -EFAULT;
2434
2435 spin_lock_irqsave(&info->lock, flags);
2436 memcpy(&info->params, &tmp_params, sizeof(MGSL_PARAMS));
2437 spin_unlock_irqrestore(&info->lock, flags);
2438
2439 change_params(info);
2440
2441 return 0;
2442}
2443
2444static int get_txidle(struct slgt_info *info, int __user *idle_mode)
2445{
2446 DBGINFO(("%s get_txidle=%d\n", info->device_name, info->idle_mode));
2447 if (put_user(info->idle_mode, idle_mode))
2448 return -EFAULT;
2449 return 0;
2450}
2451
2452static int set_txidle(struct slgt_info *info, int idle_mode)
2453{
2454 unsigned long flags;
2455 DBGINFO(("%s set_txidle(%d)\n", info->device_name, idle_mode));
2456 spin_lock_irqsave(&info->lock,flags);
2457 info->idle_mode = idle_mode;
2458 tx_set_idle(info);
2459 spin_unlock_irqrestore(&info->lock,flags);
2460 return 0;
2461}
2462
2463static int tx_enable(struct slgt_info *info, int enable)
2464{
2465 unsigned long flags;
2466 DBGINFO(("%s tx_enable(%d)\n", info->device_name, enable));
2467 spin_lock_irqsave(&info->lock,flags);
2468 if (enable) {
2469 if (!info->tx_enabled)
2470 tx_start(info);
2471 } else {
2472 if (info->tx_enabled)
2473 tx_stop(info);
2474 }
2475 spin_unlock_irqrestore(&info->lock,flags);
2476 return 0;
2477}
2478
2479/*
2480 * abort transmit HDLC frame
2481 */
2482static int tx_abort(struct slgt_info *info)
2483{
2484 unsigned long flags;
2485 DBGINFO(("%s tx_abort\n", info->device_name));
2486 spin_lock_irqsave(&info->lock,flags);
2487 tdma_reset(info);
2488 spin_unlock_irqrestore(&info->lock,flags);
2489 return 0;
2490}
2491
2492static int rx_enable(struct slgt_info *info, int enable)
2493{
2494 unsigned long flags;
2495 DBGINFO(("%s rx_enable(%d)\n", info->device_name, enable));
2496 spin_lock_irqsave(&info->lock,flags);
2497 if (enable) {
2498 if (!info->rx_enabled)
2499 rx_start(info);
2500 } else {
2501 if (info->rx_enabled)
2502 rx_stop(info);
2503 }
2504 spin_unlock_irqrestore(&info->lock,flags);
2505 return 0;
2506}
2507
2508/*
2509 * wait for specified event to occur
2510 */
2511static int wait_mgsl_event(struct slgt_info *info, int __user *mask_ptr)
2512{
2513 unsigned long flags;
2514 int s;
2515 int rc=0;
2516 struct mgsl_icount cprev, cnow;
2517 int events;
2518 int mask;
2519 struct _input_signal_events oldsigs, newsigs;
2520 DECLARE_WAITQUEUE(wait, current);
2521
2522 if (get_user(mask, mask_ptr))
2523 return -EFAULT;
2524
2525 DBGINFO(("%s wait_mgsl_event(%d)\n", info->device_name, mask));
2526
2527 spin_lock_irqsave(&info->lock,flags);
2528
2529 /* return immediately if state matches requested events */
2530 get_signals(info);
2531 s = info->signals;
2532
2533 events = mask &
2534 ( ((s & SerialSignal_DSR) ? MgslEvent_DsrActive:MgslEvent_DsrInactive) +
2535 ((s & SerialSignal_DCD) ? MgslEvent_DcdActive:MgslEvent_DcdInactive) +
2536 ((s & SerialSignal_CTS) ? MgslEvent_CtsActive:MgslEvent_CtsInactive) +
2537 ((s & SerialSignal_RI) ? MgslEvent_RiActive :MgslEvent_RiInactive) );
2538 if (events) {
2539 spin_unlock_irqrestore(&info->lock,flags);
2540 goto exit;
2541 }
2542
2543 /* save current irq counts */
2544 cprev = info->icount;
2545 oldsigs = info->input_signal_events;
2546
2547 /* enable hunt and idle irqs if needed */
2548 if (mask & (MgslEvent_ExitHuntMode+MgslEvent_IdleReceived)) {
2549 unsigned short val = rd_reg16(info, SCR);
2550 if (!(val & IRQ_RXIDLE))
2551 wr_reg16(info, SCR, (unsigned short)(val | IRQ_RXIDLE));
2552 }
2553
2554 set_current_state(TASK_INTERRUPTIBLE);
2555 add_wait_queue(&info->event_wait_q, &wait);
2556
2557 spin_unlock_irqrestore(&info->lock,flags);
2558
2559 for(;;) {
2560 schedule();
2561 if (signal_pending(current)) {
2562 rc = -ERESTARTSYS;
2563 break;
2564 }
2565
2566 /* get current irq counts */
2567 spin_lock_irqsave(&info->lock,flags);
2568 cnow = info->icount;
2569 newsigs = info->input_signal_events;
2570 set_current_state(TASK_INTERRUPTIBLE);
2571 spin_unlock_irqrestore(&info->lock,flags);
2572
2573 /* if no change, wait aborted for some reason */
2574 if (newsigs.dsr_up == oldsigs.dsr_up &&
2575 newsigs.dsr_down == oldsigs.dsr_down &&
2576 newsigs.dcd_up == oldsigs.dcd_up &&
2577 newsigs.dcd_down == oldsigs.dcd_down &&
2578 newsigs.cts_up == oldsigs.cts_up &&
2579 newsigs.cts_down == oldsigs.cts_down &&
2580 newsigs.ri_up == oldsigs.ri_up &&
2581 newsigs.ri_down == oldsigs.ri_down &&
2582 cnow.exithunt == cprev.exithunt &&
2583 cnow.rxidle == cprev.rxidle) {
2584 rc = -EIO;
2585 break;
2586 }
2587
2588 events = mask &
2589 ( (newsigs.dsr_up != oldsigs.dsr_up ? MgslEvent_DsrActive:0) +
2590 (newsigs.dsr_down != oldsigs.dsr_down ? MgslEvent_DsrInactive:0) +
2591 (newsigs.dcd_up != oldsigs.dcd_up ? MgslEvent_DcdActive:0) +
2592 (newsigs.dcd_down != oldsigs.dcd_down ? MgslEvent_DcdInactive:0) +
2593 (newsigs.cts_up != oldsigs.cts_up ? MgslEvent_CtsActive:0) +
2594 (newsigs.cts_down != oldsigs.cts_down ? MgslEvent_CtsInactive:0) +
2595 (newsigs.ri_up != oldsigs.ri_up ? MgslEvent_RiActive:0) +
2596 (newsigs.ri_down != oldsigs.ri_down ? MgslEvent_RiInactive:0) +
2597 (cnow.exithunt != cprev.exithunt ? MgslEvent_ExitHuntMode:0) +
2598 (cnow.rxidle != cprev.rxidle ? MgslEvent_IdleReceived:0) );
2599 if (events)
2600 break;
2601
2602 cprev = cnow;
2603 oldsigs = newsigs;
2604 }
2605
2606 remove_wait_queue(&info->event_wait_q, &wait);
2607 set_current_state(TASK_RUNNING);
2608
2609
2610 if (mask & (MgslEvent_ExitHuntMode + MgslEvent_IdleReceived)) {
2611 spin_lock_irqsave(&info->lock,flags);
2612 if (!waitqueue_active(&info->event_wait_q)) {
2613 /* disable enable exit hunt mode/idle rcvd IRQs */
2614 wr_reg16(info, SCR,
2615 (unsigned short)(rd_reg16(info, SCR) & ~IRQ_RXIDLE));
2616 }
2617 spin_unlock_irqrestore(&info->lock,flags);
2618 }
2619exit:
2620 if (rc == 0)
2621 rc = put_user(events, mask_ptr);
2622 return rc;
2623}
2624
2625static int get_interface(struct slgt_info *info, int __user *if_mode)
2626{
2627 DBGINFO(("%s get_interface=%x\n", info->device_name, info->if_mode));
2628 if (put_user(info->if_mode, if_mode))
2629 return -EFAULT;
2630 return 0;
2631}
2632
2633static int set_interface(struct slgt_info *info, int if_mode)
2634{
2635 unsigned long flags;
2636 unsigned char val;
2637
2638 DBGINFO(("%s set_interface=%x)\n", info->device_name, if_mode));
2639 spin_lock_irqsave(&info->lock,flags);
2640 info->if_mode = if_mode;
2641
2642 msc_set_vcr(info);
2643
2644 /* TCR (tx control) 07 1=RTS driver control */
2645 val = rd_reg16(info, TCR);
2646 if (info->if_mode & MGSL_INTERFACE_RTS_EN)
2647 val |= BIT7;
2648 else
2649 val &= ~BIT7;
2650 wr_reg16(info, TCR, val);
2651
2652 spin_unlock_irqrestore(&info->lock,flags);
2653 return 0;
2654}
2655
2656static int modem_input_wait(struct slgt_info *info,int arg)
2657{
2658 unsigned long flags;
2659 int rc;
2660 struct mgsl_icount cprev, cnow;
2661 DECLARE_WAITQUEUE(wait, current);
2662
2663 /* save current irq counts */
2664 spin_lock_irqsave(&info->lock,flags);
2665 cprev = info->icount;
2666 add_wait_queue(&info->status_event_wait_q, &wait);
2667 set_current_state(TASK_INTERRUPTIBLE);
2668 spin_unlock_irqrestore(&info->lock,flags);
2669
2670 for(;;) {
2671 schedule();
2672 if (signal_pending(current)) {
2673 rc = -ERESTARTSYS;
2674 break;
2675 }
2676
2677 /* get new irq counts */
2678 spin_lock_irqsave(&info->lock,flags);
2679 cnow = info->icount;
2680 set_current_state(TASK_INTERRUPTIBLE);
2681 spin_unlock_irqrestore(&info->lock,flags);
2682
2683 /* if no change, wait aborted for some reason */
2684 if (cnow.rng == cprev.rng && cnow.dsr == cprev.dsr &&
2685 cnow.dcd == cprev.dcd && cnow.cts == cprev.cts) {
2686 rc = -EIO;
2687 break;
2688 }
2689
2690 /* check for change in caller specified modem input */
2691 if ((arg & TIOCM_RNG && cnow.rng != cprev.rng) ||
2692 (arg & TIOCM_DSR && cnow.dsr != cprev.dsr) ||
2693 (arg & TIOCM_CD && cnow.dcd != cprev.dcd) ||
2694 (arg & TIOCM_CTS && cnow.cts != cprev.cts)) {
2695 rc = 0;
2696 break;
2697 }
2698
2699 cprev = cnow;
2700 }
2701 remove_wait_queue(&info->status_event_wait_q, &wait);
2702 set_current_state(TASK_RUNNING);
2703 return rc;
2704}
2705
2706/*
2707 * return state of serial control and status signals
2708 */
2709static int tiocmget(struct tty_struct *tty, struct file *file)
2710{
2711 struct slgt_info *info = tty->driver_data;
2712 unsigned int result;
2713 unsigned long flags;
2714
2715 spin_lock_irqsave(&info->lock,flags);
2716 get_signals(info);
2717 spin_unlock_irqrestore(&info->lock,flags);
2718
2719 result = ((info->signals & SerialSignal_RTS) ? TIOCM_RTS:0) +
2720 ((info->signals & SerialSignal_DTR) ? TIOCM_DTR:0) +
2721 ((info->signals & SerialSignal_DCD) ? TIOCM_CAR:0) +
2722 ((info->signals & SerialSignal_RI) ? TIOCM_RNG:0) +
2723 ((info->signals & SerialSignal_DSR) ? TIOCM_DSR:0) +
2724 ((info->signals & SerialSignal_CTS) ? TIOCM_CTS:0);
2725
2726 DBGINFO(("%s tiocmget value=%08X\n", info->device_name, result));
2727 return result;
2728}
2729
2730/*
2731 * set modem control signals (DTR/RTS)
2732 *
2733 * cmd signal command: TIOCMBIS = set bit TIOCMBIC = clear bit
2734 * TIOCMSET = set/clear signal values
2735 * value bit mask for command
2736 */
2737static int tiocmset(struct tty_struct *tty, struct file *file,
2738 unsigned int set, unsigned int clear)
2739{
2740 struct slgt_info *info = tty->driver_data;
2741 unsigned long flags;
2742
2743 DBGINFO(("%s tiocmset(%x,%x)\n", info->device_name, set, clear));
2744
2745 if (set & TIOCM_RTS)
2746 info->signals |= SerialSignal_RTS;
2747 if (set & TIOCM_DTR)
2748 info->signals |= SerialSignal_DTR;
2749 if (clear & TIOCM_RTS)
2750 info->signals &= ~SerialSignal_RTS;
2751 if (clear & TIOCM_DTR)
2752 info->signals &= ~SerialSignal_DTR;
2753
2754 spin_lock_irqsave(&info->lock,flags);
2755 set_signals(info);
2756 spin_unlock_irqrestore(&info->lock,flags);
2757 return 0;
2758}
2759
2760/*
2761 * block current process until the device is ready to open
2762 */
2763static int block_til_ready(struct tty_struct *tty, struct file *filp,
2764 struct slgt_info *info)
2765{
2766 DECLARE_WAITQUEUE(wait, current);
2767 int retval;
2768 int do_clocal = 0, extra_count = 0;
2769 unsigned long flags;
2770
2771 DBGINFO(("%s block_til_ready\n", tty->driver->name));
2772
2773 if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){
2774 /* nonblock mode is set or port is not enabled */
2775 info->flags |= ASYNC_NORMAL_ACTIVE;
2776 return 0;
2777 }
2778
2779 if (tty->termios->c_cflag & CLOCAL)
2780 do_clocal = 1;
2781
2782 /* Wait for carrier detect and the line to become
2783 * free (i.e., not in use by the callout). While we are in
2784 * this loop, info->count is dropped by one, so that
2785 * close() knows when to free things. We restore it upon
2786 * exit, either normal or abnormal.
2787 */
2788
2789 retval = 0;
2790 add_wait_queue(&info->open_wait, &wait);
2791
2792 spin_lock_irqsave(&info->lock, flags);
2793 if (!tty_hung_up_p(filp)) {
2794 extra_count = 1;
2795 info->count--;
2796 }
2797 spin_unlock_irqrestore(&info->lock, flags);
2798 info->blocked_open++;
2799
2800 while (1) {
2801 if ((tty->termios->c_cflag & CBAUD)) {
2802 spin_lock_irqsave(&info->lock,flags);
2803 info->signals |= SerialSignal_RTS + SerialSignal_DTR;
2804 set_signals(info);
2805 spin_unlock_irqrestore(&info->lock,flags);
2806 }
2807
2808 set_current_state(TASK_INTERRUPTIBLE);
2809
2810 if (tty_hung_up_p(filp) || !(info->flags & ASYNC_INITIALIZED)){
2811 retval = (info->flags & ASYNC_HUP_NOTIFY) ?
2812 -EAGAIN : -ERESTARTSYS;
2813 break;
2814 }
2815
2816 spin_lock_irqsave(&info->lock,flags);
2817 get_signals(info);
2818 spin_unlock_irqrestore(&info->lock,flags);
2819
2820 if (!(info->flags & ASYNC_CLOSING) &&
2821 (do_clocal || (info->signals & SerialSignal_DCD)) ) {
2822 break;
2823 }
2824
2825 if (signal_pending(current)) {
2826 retval = -ERESTARTSYS;
2827 break;
2828 }
2829
2830 DBGINFO(("%s block_til_ready wait\n", tty->driver->name));
2831 schedule();
2832 }
2833
2834 set_current_state(TASK_RUNNING);
2835 remove_wait_queue(&info->open_wait, &wait);
2836
2837 if (extra_count)
2838 info->count++;
2839 info->blocked_open--;
2840
2841 if (!retval)
2842 info->flags |= ASYNC_NORMAL_ACTIVE;
2843
2844 DBGINFO(("%s block_til_ready ready, rc=%d\n", tty->driver->name, retval));
2845 return retval;
2846}
2847
2848static int alloc_tmp_rbuf(struct slgt_info *info)
2849{
2850 info->tmp_rbuf = kmalloc(info->max_frame_size, GFP_KERNEL);
2851 if (info->tmp_rbuf == NULL)
2852 return -ENOMEM;
2853 return 0;
2854}
2855
2856static void free_tmp_rbuf(struct slgt_info *info)
2857{
2858 kfree(info->tmp_rbuf);
2859 info->tmp_rbuf = NULL;
2860}
2861
2862/*
2863 * allocate DMA descriptor lists.
2864 */
2865static int alloc_desc(struct slgt_info *info)
2866{
2867 unsigned int i;
2868 unsigned int pbufs;
2869
2870 /* allocate memory to hold descriptor lists */
2871 info->bufs = pci_alloc_consistent(info->pdev, DESC_LIST_SIZE, &info->bufs_dma_addr);
2872 if (info->bufs == NULL)
2873 return -ENOMEM;
2874
2875 memset(info->bufs, 0, DESC_LIST_SIZE);
2876
2877 info->rbufs = (struct slgt_desc*)info->bufs;
2878 info->tbufs = ((struct slgt_desc*)info->bufs) + info->rbuf_count;
2879
2880 pbufs = (unsigned int)info->bufs_dma_addr;
2881
2882 /*
2883 * Build circular lists of descriptors
2884 */
2885
2886 for (i=0; i < info->rbuf_count; i++) {
2887 /* physical address of this descriptor */
2888 info->rbufs[i].pdesc = pbufs + (i * sizeof(struct slgt_desc));
2889
2890 /* physical address of next descriptor */
2891 if (i == info->rbuf_count - 1)
2892 info->rbufs[i].next = cpu_to_le32(pbufs);
2893 else
2894 info->rbufs[i].next = cpu_to_le32(pbufs + ((i+1) * sizeof(struct slgt_desc)));
2895 set_desc_count(info->rbufs[i], DMABUFSIZE);
2896 }
2897
2898 for (i=0; i < info->tbuf_count; i++) {
2899 /* physical address of this descriptor */
2900 info->tbufs[i].pdesc = pbufs + ((info->rbuf_count + i) * sizeof(struct slgt_desc));
2901
2902 /* physical address of next descriptor */
2903 if (i == info->tbuf_count - 1)
2904 info->tbufs[i].next = cpu_to_le32(pbufs + info->rbuf_count * sizeof(struct slgt_desc));
2905 else
2906 info->tbufs[i].next = cpu_to_le32(pbufs + ((info->rbuf_count + i + 1) * sizeof(struct slgt_desc)));
2907 }
2908
2909 return 0;
2910}
2911
2912static void free_desc(struct slgt_info *info)
2913{
2914 if (info->bufs != NULL) {
2915 pci_free_consistent(info->pdev, DESC_LIST_SIZE, info->bufs, info->bufs_dma_addr);
2916 info->bufs = NULL;
2917 info->rbufs = NULL;
2918 info->tbufs = NULL;
2919 }
2920}
2921
2922static int alloc_bufs(struct slgt_info *info, struct slgt_desc *bufs, int count)
2923{
2924 int i;
2925 for (i=0; i < count; i++) {
2926 if ((bufs[i].buf = pci_alloc_consistent(info->pdev, DMABUFSIZE, &bufs[i].buf_dma_addr)) == NULL)
2927 return -ENOMEM;
2928 bufs[i].pbuf = cpu_to_le32((unsigned int)bufs[i].buf_dma_addr);
2929 }
2930 return 0;
2931}
2932
2933static void free_bufs(struct slgt_info *info, struct slgt_desc *bufs, int count)
2934{
2935 int i;
2936 for (i=0; i < count; i++) {
2937 if (bufs[i].buf == NULL)
2938 continue;
2939 pci_free_consistent(info->pdev, DMABUFSIZE, bufs[i].buf, bufs[i].buf_dma_addr);
2940 bufs[i].buf = NULL;
2941 }
2942}
2943
2944static int alloc_dma_bufs(struct slgt_info *info)
2945{
2946 info->rbuf_count = 32;
2947 info->tbuf_count = 32;
2948
2949 if (alloc_desc(info) < 0 ||
2950 alloc_bufs(info, info->rbufs, info->rbuf_count) < 0 ||
2951 alloc_bufs(info, info->tbufs, info->tbuf_count) < 0 ||
2952 alloc_tmp_rbuf(info) < 0) {
2953 DBGERR(("%s DMA buffer alloc fail\n", info->device_name));
2954 return -ENOMEM;
2955 }
2956 reset_rbufs(info);
2957 return 0;
2958}
2959
2960static void free_dma_bufs(struct slgt_info *info)
2961{
2962 if (info->bufs) {
2963 free_bufs(info, info->rbufs, info->rbuf_count);
2964 free_bufs(info, info->tbufs, info->tbuf_count);
2965 free_desc(info);
2966 }
2967 free_tmp_rbuf(info);
2968}
2969
2970static int claim_resources(struct slgt_info *info)
2971{
2972 if (request_mem_region(info->phys_reg_addr, SLGT_REG_SIZE, "synclink_gt") == NULL) {
2973 DBGERR(("%s reg addr conflict, addr=%08X\n",
2974 info->device_name, info->phys_reg_addr));
2975 info->init_error = DiagStatus_AddressConflict;
2976 goto errout;
2977 }
2978 else
2979 info->reg_addr_requested = 1;
2980
2981 info->reg_addr = ioremap(info->phys_reg_addr, PAGE_SIZE);
2982 if (!info->reg_addr) {
2983 DBGERR(("%s cant map device registers, addr=%08X\n",
2984 info->device_name, info->phys_reg_addr));
2985 info->init_error = DiagStatus_CantAssignPciResources;
2986 goto errout;
2987 }
2988 info->reg_addr += info->reg_offset;
2989 return 0;
2990
2991errout:
2992 release_resources(info);
2993 return -ENODEV;
2994}
2995
2996static void release_resources(struct slgt_info *info)
2997{
2998 if (info->irq_requested) {
2999 free_irq(info->irq_level, info);
3000 info->irq_requested = 0;
3001 }
3002
3003 if (info->reg_addr_requested) {
3004 release_mem_region(info->phys_reg_addr, SLGT_REG_SIZE);
3005 info->reg_addr_requested = 0;
3006 }
3007
3008 if (info->reg_addr) {
3009 iounmap(info->reg_addr - info->reg_offset);
3010 info->reg_addr = NULL;
3011 }
3012}
3013
3014/* Add the specified device instance data structure to the
3015 * global linked list of devices and increment the device count.
3016 */
3017static void add_device(struct slgt_info *info)
3018{
3019 char *devstr;
3020
3021 info->next_device = NULL;
3022 info->line = slgt_device_count;
3023 sprintf(info->device_name, "%s%d", tty_dev_prefix, info->line);
3024
3025 if (info->line < MAX_DEVICES) {
3026 if (maxframe[info->line])
3027 info->max_frame_size = maxframe[info->line];
3028 info->dosyncppp = dosyncppp[info->line];
3029 }
3030
3031 slgt_device_count++;
3032
3033 if (!slgt_device_list)
3034 slgt_device_list = info;
3035 else {
3036 struct slgt_info *current_dev = slgt_device_list;
3037 while(current_dev->next_device)
3038 current_dev = current_dev->next_device;
3039 current_dev->next_device = info;
3040 }
3041
3042 if (info->max_frame_size < 4096)
3043 info->max_frame_size = 4096;
3044 else if (info->max_frame_size > 65535)
3045 info->max_frame_size = 65535;
3046
3047 switch(info->pdev->device) {
3048 case SYNCLINK_GT_DEVICE_ID:
3049 devstr = "GT";
3050 break;
3051 case SYNCLINK_GT4_DEVICE_ID:
3052 devstr = "GT4";
3053 break;
3054 case SYNCLINK_AC_DEVICE_ID:
3055 devstr = "AC";
3056 info->params.mode = MGSL_MODE_ASYNC;
3057 break;
3058 default:
3059 devstr = "(unknown model)";
3060 }
3061 printk("SyncLink %s %s IO=%08x IRQ=%d MaxFrameSize=%u\n",
3062 devstr, info->device_name, info->phys_reg_addr,
3063 info->irq_level, info->max_frame_size);
3064
3065#ifdef CONFIG_HDLC
3066 hdlcdev_init(info);
3067#endif
3068}
3069
3070/*
3071 * allocate device instance structure, return NULL on failure
3072 */
3073static struct slgt_info *alloc_dev(int adapter_num, int port_num, struct pci_dev *pdev)
3074{
3075 struct slgt_info *info;
3076
3077 info = kmalloc(sizeof(struct slgt_info), GFP_KERNEL);
3078
3079 if (!info) {
3080 DBGERR(("%s device alloc failed adapter=%d port=%d\n",
3081 driver_name, adapter_num, port_num));
3082 } else {
3083 memset(info, 0, sizeof(struct slgt_info));
3084 info->magic = MGSL_MAGIC;
3085 INIT_WORK(&info->task, bh_handler, info);
3086 info->max_frame_size = 4096;
3087 info->raw_rx_size = DMABUFSIZE;
3088 info->close_delay = 5*HZ/10;
3089 info->closing_wait = 30*HZ;
3090 init_waitqueue_head(&info->open_wait);
3091 init_waitqueue_head(&info->close_wait);
3092 init_waitqueue_head(&info->status_event_wait_q);
3093 init_waitqueue_head(&info->event_wait_q);
3094 spin_lock_init(&info->netlock);
3095 memcpy(&info->params,&default_params,sizeof(MGSL_PARAMS));
3096 info->idle_mode = HDLC_TXIDLE_FLAGS;
3097 info->adapter_num = adapter_num;
3098 info->port_num = port_num;
3099
3100 init_timer(&info->tx_timer);
3101 info->tx_timer.data = (unsigned long)info;
3102 info->tx_timer.function = tx_timeout;
3103
3104 init_timer(&info->rx_timer);
3105 info->rx_timer.data = (unsigned long)info;
3106 info->rx_timer.function = rx_timeout;
3107
3108 /* Copy configuration info to device instance data */
3109 info->pdev = pdev;
3110 info->irq_level = pdev->irq;
3111 info->phys_reg_addr = pci_resource_start(pdev,0);
3112
3113 /* veremap works on page boundaries
3114 * map full page starting at the page boundary
3115 */
3116 info->reg_offset = info->phys_reg_addr & (PAGE_SIZE-1);
3117 info->phys_reg_addr &= ~(PAGE_SIZE-1);
3118
3119 info->bus_type = MGSL_BUS_TYPE_PCI;
3120 info->irq_flags = SA_SHIRQ;
3121
3122 info->init_error = -1; /* assume error, set to 0 on successful init */
3123 }
3124
3125 return info;
3126}
3127
3128static void device_init(int adapter_num, struct pci_dev *pdev)
3129{
3130 struct slgt_info *port_array[SLGT_MAX_PORTS];
3131 int i;
3132 int port_count = 1;
3133
3134 if (pdev->device == SYNCLINK_GT4_DEVICE_ID)
3135 port_count = 4;
3136
3137 /* allocate device instances for all ports */
3138 for (i=0; i < port_count; ++i) {
3139 port_array[i] = alloc_dev(adapter_num, i, pdev);
3140 if (port_array[i] == NULL) {
3141 for (--i; i >= 0; --i)
3142 kfree(port_array[i]);
3143 return;
3144 }
3145 }
3146
3147 /* give copy of port_array to all ports and add to device list */
3148 for (i=0; i < port_count; ++i) {
3149 memcpy(port_array[i]->port_array, port_array, sizeof(port_array));
3150 add_device(port_array[i]);
3151 port_array[i]->port_count = port_count;
3152 spin_lock_init(&port_array[i]->lock);
3153 }
3154
3155 /* Allocate and claim adapter resources */
3156 if (!claim_resources(port_array[0])) {
3157
3158 alloc_dma_bufs(port_array[0]);
3159
3160 /* copy resource information from first port to others */
3161 for (i = 1; i < port_count; ++i) {
3162 port_array[i]->lock = port_array[0]->lock;
3163 port_array[i]->irq_level = port_array[0]->irq_level;
3164 port_array[i]->reg_addr = port_array[0]->reg_addr;
3165 alloc_dma_bufs(port_array[i]);
3166 }
3167
3168 if (request_irq(port_array[0]->irq_level,
3169 slgt_interrupt,
3170 port_array[0]->irq_flags,
3171 port_array[0]->device_name,
3172 port_array[0]) < 0) {
3173 DBGERR(("%s request_irq failed IRQ=%d\n",
3174 port_array[0]->device_name,
3175 port_array[0]->irq_level));
3176 } else {
3177 port_array[0]->irq_requested = 1;
3178 adapter_test(port_array[0]);
3179 for (i=1 ; i < port_count ; i++)
3180 port_array[i]->init_error = port_array[0]->init_error;
3181 }
3182 }
3183}
3184
3185static int __devinit init_one(struct pci_dev *dev,
3186 const struct pci_device_id *ent)
3187{
3188 if (pci_enable_device(dev)) {
3189 printk("error enabling pci device %p\n", dev);
3190 return -EIO;
3191 }
3192 pci_set_master(dev);
3193 device_init(slgt_device_count, dev);
3194 return 0;
3195}
3196
3197static void __devexit remove_one(struct pci_dev *dev)
3198{
3199}
3200
3201static struct tty_operations ops = {
3202 .open = open,
3203 .close = close,
3204 .write = write,
3205 .put_char = put_char,
3206 .flush_chars = flush_chars,
3207 .write_room = write_room,
3208 .chars_in_buffer = chars_in_buffer,
3209 .flush_buffer = flush_buffer,
3210 .ioctl = ioctl,
3211 .throttle = throttle,
3212 .unthrottle = unthrottle,
3213 .send_xchar = send_xchar,
3214 .break_ctl = set_break,
3215 .wait_until_sent = wait_until_sent,
3216 .read_proc = read_proc,
3217 .set_termios = set_termios,
3218 .stop = tx_hold,
3219 .start = tx_release,
3220 .hangup = hangup,
3221 .tiocmget = tiocmget,
3222 .tiocmset = tiocmset,
3223};
3224
3225static void slgt_cleanup(void)
3226{
3227 int rc;
3228 struct slgt_info *info;
3229 struct slgt_info *tmp;
3230
3231 printk("unload %s %s\n", driver_name, driver_version);
3232
3233 if (serial_driver) {
3234 if ((rc = tty_unregister_driver(serial_driver)))
3235 DBGERR(("tty_unregister_driver error=%d\n", rc));
3236 put_tty_driver(serial_driver);
3237 }
3238
3239 /* reset devices */
3240 info = slgt_device_list;
3241 while(info) {
3242 reset_port(info);
3243 info = info->next_device;
3244 }
3245
3246 /* release devices */
3247 info = slgt_device_list;
3248 while(info) {
3249#ifdef CONFIG_HDLC
3250 hdlcdev_exit(info);
3251#endif
3252 free_dma_bufs(info);
3253 free_tmp_rbuf(info);
3254 if (info->port_num == 0)
3255 release_resources(info);
3256 tmp = info;
3257 info = info->next_device;
3258 kfree(tmp);
3259 }
3260
3261 if (pci_registered)
3262 pci_unregister_driver(&pci_driver);
3263}
3264
3265/*
3266 * Driver initialization entry point.
3267 */
3268static int __init slgt_init(void)
3269{
3270 int rc;
3271
3272 printk("%s %s\n", driver_name, driver_version);
3273
3274 slgt_device_count = 0;
3275 if ((rc = pci_register_driver(&pci_driver)) < 0) {
3276 printk("%s pci_register_driver error=%d\n", driver_name, rc);
3277 return rc;
3278 }
3279 pci_registered = 1;
3280
3281 if (!slgt_device_list) {
3282 printk("%s no devices found\n",driver_name);
3283 return -ENODEV;
3284 }
3285
3286 serial_driver = alloc_tty_driver(MAX_DEVICES);
3287 if (!serial_driver) {
3288 rc = -ENOMEM;
3289 goto error;
3290 }
3291
3292 /* Initialize the tty_driver structure */
3293
3294 serial_driver->owner = THIS_MODULE;
3295 serial_driver->driver_name = tty_driver_name;
3296 serial_driver->name = tty_dev_prefix;
3297 serial_driver->major = ttymajor;
3298 serial_driver->minor_start = 64;
3299 serial_driver->type = TTY_DRIVER_TYPE_SERIAL;
3300 serial_driver->subtype = SERIAL_TYPE_NORMAL;
3301 serial_driver->init_termios = tty_std_termios;
3302 serial_driver->init_termios.c_cflag =
3303 B9600 | CS8 | CREAD | HUPCL | CLOCAL;
3304 serial_driver->flags = TTY_DRIVER_REAL_RAW;
3305 tty_set_operations(serial_driver, &ops);
3306 if ((rc = tty_register_driver(serial_driver)) < 0) {
3307 DBGERR(("%s can't register serial driver\n", driver_name));
3308 put_tty_driver(serial_driver);
3309 serial_driver = NULL;
3310 goto error;
3311 }
3312
3313 printk("%s %s, tty major#%d\n",
3314 driver_name, driver_version,
3315 serial_driver->major);
3316
3317 return 0;
3318
3319error:
3320 slgt_cleanup();
3321 return rc;
3322}
3323
3324static void __exit slgt_exit(void)
3325{
3326 slgt_cleanup();
3327}
3328
3329module_init(slgt_init);
3330module_exit(slgt_exit);
3331
3332/*
3333 * register access routines
3334 */
3335
3336#define CALC_REGADDR() \
3337 unsigned long reg_addr = ((unsigned long)info->reg_addr) + addr; \
3338 if (addr >= 0x80) \
3339 reg_addr += (info->port_num) * 32;
3340
3341static __u8 rd_reg8(struct slgt_info *info, unsigned int addr)
3342{
3343 CALC_REGADDR();
3344 return readb((void __iomem *)reg_addr);
3345}
3346
3347static void wr_reg8(struct slgt_info *info, unsigned int addr, __u8 value)
3348{
3349 CALC_REGADDR();
3350 writeb(value, (void __iomem *)reg_addr);
3351}
3352
3353static __u16 rd_reg16(struct slgt_info *info, unsigned int addr)
3354{
3355 CALC_REGADDR();
3356 return readw((void __iomem *)reg_addr);
3357}
3358
3359static void wr_reg16(struct slgt_info *info, unsigned int addr, __u16 value)
3360{
3361 CALC_REGADDR();
3362 writew(value, (void __iomem *)reg_addr);
3363}
3364
3365static __u32 rd_reg32(struct slgt_info *info, unsigned int addr)
3366{
3367 CALC_REGADDR();
3368 return readl((void __iomem *)reg_addr);
3369}
3370
3371static void wr_reg32(struct slgt_info *info, unsigned int addr, __u32 value)
3372{
3373 CALC_REGADDR();
3374 writel(value, (void __iomem *)reg_addr);
3375}
3376
3377static void rdma_reset(struct slgt_info *info)
3378{
3379 unsigned int i;
3380
3381 /* set reset bit */
3382 wr_reg32(info, RDCSR, BIT1);
3383
3384 /* wait for enable bit cleared */
3385 for(i=0 ; i < 1000 ; i++)
3386 if (!(rd_reg32(info, RDCSR) & BIT0))
3387 break;
3388}
3389
3390static void tdma_reset(struct slgt_info *info)
3391{
3392 unsigned int i;
3393
3394 /* set reset bit */
3395 wr_reg32(info, TDCSR, BIT1);
3396
3397 /* wait for enable bit cleared */
3398 for(i=0 ; i < 1000 ; i++)
3399 if (!(rd_reg32(info, TDCSR) & BIT0))
3400 break;
3401}
3402
3403/*
3404 * enable internal loopback
3405 * TxCLK and RxCLK are generated from BRG
3406 * and TxD is looped back to RxD internally.
3407 */
3408static void enable_loopback(struct slgt_info *info)
3409{
3410 /* SCR (serial control) BIT2=looopback enable */
3411 wr_reg16(info, SCR, (unsigned short)(rd_reg16(info, SCR) | BIT2));
3412
3413 if (info->params.mode != MGSL_MODE_ASYNC) {
3414 /* CCR (clock control)
3415 * 07..05 tx clock source (010 = BRG)
3416 * 04..02 rx clock source (010 = BRG)
3417 * 01 auxclk enable (0 = disable)
3418 * 00 BRG enable (1 = enable)
3419 *
3420 * 0100 1001
3421 */
3422 wr_reg8(info, CCR, 0x49);
3423
3424 /* set speed if available, otherwise use default */
3425 if (info->params.clock_speed)
3426 set_rate(info, info->params.clock_speed);
3427 else
3428 set_rate(info, 3686400);
3429 }
3430}
3431
3432/*
3433 * set baud rate generator to specified rate
3434 */
3435static void set_rate(struct slgt_info *info, u32 rate)
3436{
3437 unsigned int div;
3438 static unsigned int osc = 14745600;
3439
3440 /* div = osc/rate - 1
3441 *
3442 * Round div up if osc/rate is not integer to
3443 * force to next slowest rate.
3444 */
3445
3446 if (rate) {
3447 div = osc/rate;
3448 if (!(osc % rate) && div)
3449 div--;
3450 wr_reg16(info, BDR, (unsigned short)div);
3451 }
3452}
3453
3454static void rx_stop(struct slgt_info *info)
3455{
3456 unsigned short val;
3457
3458 /* disable and reset receiver */
3459 val = rd_reg16(info, RCR) & ~BIT1; /* clear enable bit */
3460 wr_reg16(info, RCR, (unsigned short)(val | BIT2)); /* set reset bit */
3461 wr_reg16(info, RCR, val); /* clear reset bit */
3462
3463 slgt_irq_off(info, IRQ_RXOVER + IRQ_RXDATA + IRQ_RXIDLE);
3464
3465 /* clear pending rx interrupts */
3466 wr_reg16(info, SSR, IRQ_RXIDLE + IRQ_RXOVER);
3467
3468 rdma_reset(info);
3469
3470 info->rx_enabled = 0;
3471 info->rx_restart = 0;
3472}
3473
3474static void rx_start(struct slgt_info *info)
3475{
3476 unsigned short val;
3477
3478 slgt_irq_off(info, IRQ_RXOVER + IRQ_RXDATA);
3479
3480 /* clear pending rx overrun IRQ */
3481 wr_reg16(info, SSR, IRQ_RXOVER);
3482
3483 /* reset and disable receiver */
3484 val = rd_reg16(info, RCR) & ~BIT1; /* clear enable bit */
3485 wr_reg16(info, RCR, (unsigned short)(val | BIT2)); /* set reset bit */
3486 wr_reg16(info, RCR, val); /* clear reset bit */
3487
3488 rdma_reset(info);
3489 reset_rbufs(info);
3490
3491 /* set 1st descriptor address */
3492 wr_reg32(info, RDDAR, info->rbufs[0].pdesc);
3493
3494 if (info->params.mode != MGSL_MODE_ASYNC) {
3495 /* enable rx DMA and DMA interrupt */
3496 wr_reg32(info, RDCSR, (BIT2 + BIT0));
3497 } else {
3498 /* enable saving of rx status, rx DMA and DMA interrupt */
3499 wr_reg32(info, RDCSR, (BIT6 + BIT2 + BIT0));
3500 }
3501
3502 slgt_irq_on(info, IRQ_RXOVER);
3503
3504 /* enable receiver */
3505 wr_reg16(info, RCR, (unsigned short)(rd_reg16(info, RCR) | BIT1));
3506
3507 info->rx_restart = 0;
3508 info->rx_enabled = 1;
3509}
3510
3511static void tx_start(struct slgt_info *info)
3512{
3513 if (!info->tx_enabled) {
3514 wr_reg16(info, TCR,
3515 (unsigned short)(rd_reg16(info, TCR) | BIT1));
3516 info->tx_enabled = TRUE;
3517 }
3518
3519 if (info->tx_count) {
3520 info->drop_rts_on_tx_done = 0;
3521
3522 if (info->params.mode != MGSL_MODE_ASYNC) {
3523 if (info->params.flags & HDLC_FLAG_AUTO_RTS) {
3524 get_signals(info);
3525 if (!(info->signals & SerialSignal_RTS)) {
3526 info->signals |= SerialSignal_RTS;
3527 set_signals(info);
3528 info->drop_rts_on_tx_done = 1;
3529 }
3530 }
3531
3532 slgt_irq_off(info, IRQ_TXDATA);
3533 slgt_irq_on(info, IRQ_TXUNDER + IRQ_TXIDLE);
3534 /* clear tx idle and underrun status bits */
3535 wr_reg16(info, SSR, (unsigned short)(IRQ_TXIDLE + IRQ_TXUNDER));
3536
3537 if (!(rd_reg32(info, TDCSR) & BIT0)) {
3538 /* tx DMA stopped, restart tx DMA */
3539 tdma_reset(info);
3540 /* set 1st descriptor address */
3541 wr_reg32(info, TDDAR, info->tbufs[info->tbuf_start].pdesc);
3542 if (info->params.mode == MGSL_MODE_RAW)
3543 wr_reg32(info, TDCSR, BIT2 + BIT0); /* IRQ + DMA enable */
3544 else
3545 wr_reg32(info, TDCSR, BIT0); /* DMA enable */
3546 }
3547
3548 if (info->params.mode != MGSL_MODE_RAW) {
3549 info->tx_timer.expires = jiffies + msecs_to_jiffies(5000);
3550 add_timer(&info->tx_timer);
3551 }
3552 } else {
3553 tdma_reset(info);
3554 /* set 1st descriptor address */
3555 wr_reg32(info, TDDAR, info->tbufs[info->tbuf_start].pdesc);
3556
3557 slgt_irq_off(info, IRQ_TXDATA);
3558 slgt_irq_on(info, IRQ_TXIDLE);
3559 /* clear tx idle status bit */
3560 wr_reg16(info, SSR, IRQ_TXIDLE);
3561
3562 /* enable tx DMA */
3563 wr_reg32(info, TDCSR, BIT0);
3564 }
3565
3566 info->tx_active = 1;
3567 }
3568}
3569
3570static void tx_stop(struct slgt_info *info)
3571{
3572 unsigned short val;
3573
3574 del_timer(&info->tx_timer);
3575
3576 tdma_reset(info);
3577
3578 /* reset and disable transmitter */
3579 val = rd_reg16(info, TCR) & ~BIT1; /* clear enable bit */
3580 wr_reg16(info, TCR, (unsigned short)(val | BIT2)); /* set reset bit */
3581 wr_reg16(info, TCR, val); /* clear reset */
3582
3583 slgt_irq_off(info, IRQ_TXDATA + IRQ_TXIDLE + IRQ_TXUNDER);
3584
3585 /* clear tx idle and underrun status bit */
3586 wr_reg16(info, SSR, (unsigned short)(IRQ_TXIDLE + IRQ_TXUNDER));
3587
3588 reset_tbufs(info);
3589
3590 info->tx_enabled = 0;
3591 info->tx_active = 0;
3592}
3593
3594static void reset_port(struct slgt_info *info)
3595{
3596 if (!info->reg_addr)
3597 return;
3598
3599 tx_stop(info);
3600 rx_stop(info);
3601
3602 info->signals &= ~(SerialSignal_DTR + SerialSignal_RTS);
3603 set_signals(info);
3604
3605 slgt_irq_off(info, IRQ_ALL | IRQ_MASTER);
3606}
3607
3608static void reset_adapter(struct slgt_info *info)
3609{
3610 int i;
3611 for (i=0; i < info->port_count; ++i) {
3612 if (info->port_array[i])
3613 reset_port(info->port_array[i]);
3614 }
3615}
3616
3617static void async_mode(struct slgt_info *info)
3618{
3619 unsigned short val;
3620
3621 slgt_irq_off(info, IRQ_ALL | IRQ_MASTER);
3622 tx_stop(info);
3623 rx_stop(info);
3624
3625 /* TCR (tx control)
3626 *
3627 * 15..13 mode, 010=async
3628 * 12..10 encoding, 000=NRZ
3629 * 09 parity enable
3630 * 08 1=odd parity, 0=even parity
3631 * 07 1=RTS driver control
3632 * 06 1=break enable
3633 * 05..04 character length
3634 * 00=5 bits
3635 * 01=6 bits
3636 * 10=7 bits
3637 * 11=8 bits
3638 * 03 0=1 stop bit, 1=2 stop bits
3639 * 02 reset
3640 * 01 enable
3641 * 00 auto-CTS enable
3642 */
3643 val = 0x4000;
3644
3645 if (info->if_mode & MGSL_INTERFACE_RTS_EN)
3646 val |= BIT7;
3647
3648 if (info->params.parity != ASYNC_PARITY_NONE) {
3649 val |= BIT9;
3650 if (info->params.parity == ASYNC_PARITY_ODD)
3651 val |= BIT8;
3652 }
3653
3654 switch (info->params.data_bits)
3655 {
3656 case 6: val |= BIT4; break;
3657 case 7: val |= BIT5; break;
3658 case 8: val |= BIT5 + BIT4; break;
3659 }
3660
3661 if (info->params.stop_bits != 1)
3662 val |= BIT3;
3663
3664 if (info->params.flags & HDLC_FLAG_AUTO_CTS)
3665 val |= BIT0;
3666
3667 wr_reg16(info, TCR, val);
3668
3669 /* RCR (rx control)
3670 *
3671 * 15..13 mode, 010=async
3672 * 12..10 encoding, 000=NRZ
3673 * 09 parity enable
3674 * 08 1=odd parity, 0=even parity
3675 * 07..06 reserved, must be 0
3676 * 05..04 character length
3677 * 00=5 bits
3678 * 01=6 bits
3679 * 10=7 bits
3680 * 11=8 bits
3681 * 03 reserved, must be zero
3682 * 02 reset
3683 * 01 enable
3684 * 00 auto-DCD enable
3685 */
3686 val = 0x4000;
3687
3688 if (info->params.parity != ASYNC_PARITY_NONE) {
3689 val |= BIT9;
3690 if (info->params.parity == ASYNC_PARITY_ODD)
3691 val |= BIT8;
3692 }
3693
3694 switch (info->params.data_bits)
3695 {
3696 case 6: val |= BIT4; break;
3697 case 7: val |= BIT5; break;
3698 case 8: val |= BIT5 + BIT4; break;
3699 }
3700
3701 if (info->params.flags & HDLC_FLAG_AUTO_DCD)
3702 val |= BIT0;
3703
3704 wr_reg16(info, RCR, val);
3705
3706 /* CCR (clock control)
3707 *
3708 * 07..05 011 = tx clock source is BRG/16
3709 * 04..02 010 = rx clock source is BRG
3710 * 01 0 = auxclk disabled
3711 * 00 1 = BRG enabled
3712 *
3713 * 0110 1001
3714 */
3715 wr_reg8(info, CCR, 0x69);
3716
3717 msc_set_vcr(info);
3718
3719 tx_set_idle(info);
3720
3721 /* SCR (serial control)
3722 *
3723 * 15 1=tx req on FIFO half empty
3724 * 14 1=rx req on FIFO half full
3725 * 13 tx data IRQ enable
3726 * 12 tx idle IRQ enable
3727 * 11 rx break on IRQ enable
3728 * 10 rx data IRQ enable
3729 * 09 rx break off IRQ enable
3730 * 08 overrun IRQ enable
3731 * 07 DSR IRQ enable
3732 * 06 CTS IRQ enable
3733 * 05 DCD IRQ enable
3734 * 04 RI IRQ enable
3735 * 03 reserved, must be zero
3736 * 02 1=txd->rxd internal loopback enable
3737 * 01 reserved, must be zero
3738 * 00 1=master IRQ enable
3739 */
3740 val = BIT15 + BIT14 + BIT0;
3741 wr_reg16(info, SCR, val);
3742
3743 slgt_irq_on(info, IRQ_RXBREAK | IRQ_RXOVER);
3744
3745 set_rate(info, info->params.data_rate * 16);
3746
3747 if (info->params.loopback)
3748 enable_loopback(info);
3749}
3750
3751static void hdlc_mode(struct slgt_info *info)
3752{
3753 unsigned short val;
3754
3755 slgt_irq_off(info, IRQ_ALL | IRQ_MASTER);
3756 tx_stop(info);
3757 rx_stop(info);
3758
3759 /* TCR (tx control)
3760 *
3761 * 15..13 mode, 000=HDLC 001=raw sync
3762 * 12..10 encoding
3763 * 09 CRC enable
3764 * 08 CRC32
3765 * 07 1=RTS driver control
3766 * 06 preamble enable
3767 * 05..04 preamble length
3768 * 03 share open/close flag
3769 * 02 reset
3770 * 01 enable
3771 * 00 auto-CTS enable
3772 */
3773 val = 0;
3774
3775 if (info->params.mode == MGSL_MODE_RAW)
3776 val |= BIT13;
3777 if (info->if_mode & MGSL_INTERFACE_RTS_EN)
3778 val |= BIT7;
3779
3780 switch(info->params.encoding)
3781 {
3782 case HDLC_ENCODING_NRZB: val |= BIT10; break;
3783 case HDLC_ENCODING_NRZI_MARK: val |= BIT11; break;
3784 case HDLC_ENCODING_NRZI: val |= BIT11 + BIT10; break;
3785 case HDLC_ENCODING_BIPHASE_MARK: val |= BIT12; break;
3786 case HDLC_ENCODING_BIPHASE_SPACE: val |= BIT12 + BIT10; break;
3787 case HDLC_ENCODING_BIPHASE_LEVEL: val |= BIT12 + BIT11; break;
3788 case HDLC_ENCODING_DIFF_BIPHASE_LEVEL: val |= BIT12 + BIT11 + BIT10; break;
3789 }
3790
3791 switch (info->params.crc_type)
3792 {
3793 case HDLC_CRC_16_CCITT: val |= BIT9; break;
3794 case HDLC_CRC_32_CCITT: val |= BIT9 + BIT8; break;
3795 }
3796
3797 if (info->params.preamble != HDLC_PREAMBLE_PATTERN_NONE)
3798 val |= BIT6;
3799
3800 switch (info->params.preamble_length)
3801 {
3802 case HDLC_PREAMBLE_LENGTH_16BITS: val |= BIT5; break;
3803 case HDLC_PREAMBLE_LENGTH_32BITS: val |= BIT4; break;
3804 case HDLC_PREAMBLE_LENGTH_64BITS: val |= BIT5 + BIT4; break;
3805 }
3806
3807 if (info->params.flags & HDLC_FLAG_AUTO_CTS)
3808 val |= BIT0;
3809
3810 wr_reg16(info, TCR, val);
3811
3812 /* TPR (transmit preamble) */
3813
3814 switch (info->params.preamble)
3815 {
3816 case HDLC_PREAMBLE_PATTERN_FLAGS: val = 0x7e; break;
3817 case HDLC_PREAMBLE_PATTERN_ONES: val = 0xff; break;
3818 case HDLC_PREAMBLE_PATTERN_ZEROS: val = 0x00; break;
3819 case HDLC_PREAMBLE_PATTERN_10: val = 0x55; break;
3820 case HDLC_PREAMBLE_PATTERN_01: val = 0xaa; break;
3821 default: val = 0x7e; break;
3822 }
3823 wr_reg8(info, TPR, (unsigned char)val);
3824
3825 /* RCR (rx control)
3826 *
3827 * 15..13 mode, 000=HDLC 001=raw sync
3828 * 12..10 encoding
3829 * 09 CRC enable
3830 * 08 CRC32
3831 * 07..03 reserved, must be 0
3832 * 02 reset
3833 * 01 enable
3834 * 00 auto-DCD enable
3835 */
3836 val = 0;
3837
3838 if (info->params.mode == MGSL_MODE_RAW)
3839 val |= BIT13;
3840
3841 switch(info->params.encoding)
3842 {
3843 case HDLC_ENCODING_NRZB: val |= BIT10; break;
3844 case HDLC_ENCODING_NRZI_MARK: val |= BIT11; break;
3845 case HDLC_ENCODING_NRZI: val |= BIT11 + BIT10; break;
3846 case HDLC_ENCODING_BIPHASE_MARK: val |= BIT12; break;
3847 case HDLC_ENCODING_BIPHASE_SPACE: val |= BIT12 + BIT10; break;
3848 case HDLC_ENCODING_BIPHASE_LEVEL: val |= BIT12 + BIT11; break;
3849 case HDLC_ENCODING_DIFF_BIPHASE_LEVEL: val |= BIT12 + BIT11 + BIT10; break;
3850 }
3851
3852 switch (info->params.crc_type)
3853 {
3854 case HDLC_CRC_16_CCITT: val |= BIT9; break;
3855 case HDLC_CRC_32_CCITT: val |= BIT9 + BIT8; break;
3856 }
3857
3858 if (info->params.flags & HDLC_FLAG_AUTO_DCD)
3859 val |= BIT0;
3860
3861 wr_reg16(info, RCR, val);
3862
3863 /* CCR (clock control)
3864 *
3865 * 07..05 tx clock source
3866 * 04..02 rx clock source
3867 * 01 auxclk enable
3868 * 00 BRG enable
3869 */
3870 val = 0;
3871
3872 if (info->params.flags & HDLC_FLAG_TXC_BRG)
3873 {
3874 // when RxC source is DPLL, BRG generates 16X DPLL
3875 // reference clock, so take TxC from BRG/16 to get
3876 // transmit clock at actual data rate
3877 if (info->params.flags & HDLC_FLAG_RXC_DPLL)
3878 val |= BIT6 + BIT5; /* 011, txclk = BRG/16 */
3879 else
3880 val |= BIT6; /* 010, txclk = BRG */
3881 }
3882 else if (info->params.flags & HDLC_FLAG_TXC_DPLL)
3883 val |= BIT7; /* 100, txclk = DPLL Input */
3884 else if (info->params.flags & HDLC_FLAG_TXC_RXCPIN)
3885 val |= BIT5; /* 001, txclk = RXC Input */
3886
3887 if (info->params.flags & HDLC_FLAG_RXC_BRG)
3888 val |= BIT3; /* 010, rxclk = BRG */
3889 else if (info->params.flags & HDLC_FLAG_RXC_DPLL)
3890 val |= BIT4; /* 100, rxclk = DPLL */
3891 else if (info->params.flags & HDLC_FLAG_RXC_TXCPIN)
3892 val |= BIT2; /* 001, rxclk = TXC Input */
3893
3894 if (info->params.clock_speed)
3895 val |= BIT1 + BIT0;
3896
3897 wr_reg8(info, CCR, (unsigned char)val);
3898
3899 if (info->params.flags & (HDLC_FLAG_TXC_DPLL + HDLC_FLAG_RXC_DPLL))
3900 {
3901 // program DPLL mode
3902 switch(info->params.encoding)
3903 {
3904 case HDLC_ENCODING_BIPHASE_MARK:
3905 case HDLC_ENCODING_BIPHASE_SPACE:
3906 val = BIT7; break;
3907 case HDLC_ENCODING_BIPHASE_LEVEL:
3908 case HDLC_ENCODING_DIFF_BIPHASE_LEVEL:
3909 val = BIT7 + BIT6; break;
3910 default: val = BIT6; // NRZ encodings
3911 }
3912 wr_reg16(info, RCR, (unsigned short)(rd_reg16(info, RCR) | val));
3913
3914 // DPLL requires a 16X reference clock from BRG
3915 set_rate(info, info->params.clock_speed * 16);
3916 }
3917 else
3918 set_rate(info, info->params.clock_speed);
3919
3920 tx_set_idle(info);
3921
3922 msc_set_vcr(info);
3923
3924 /* SCR (serial control)
3925 *
3926 * 15 1=tx req on FIFO half empty
3927 * 14 1=rx req on FIFO half full
3928 * 13 tx data IRQ enable
3929 * 12 tx idle IRQ enable
3930 * 11 underrun IRQ enable
3931 * 10 rx data IRQ enable
3932 * 09 rx idle IRQ enable
3933 * 08 overrun IRQ enable
3934 * 07 DSR IRQ enable
3935 * 06 CTS IRQ enable
3936 * 05 DCD IRQ enable
3937 * 04 RI IRQ enable
3938 * 03 reserved, must be zero
3939 * 02 1=txd->rxd internal loopback enable
3940 * 01 reserved, must be zero
3941 * 00 1=master IRQ enable
3942 */
3943 wr_reg16(info, SCR, BIT15 + BIT14 + BIT0);
3944
3945 if (info->params.loopback)
3946 enable_loopback(info);
3947}
3948
3949/*
3950 * set transmit idle mode
3951 */
3952static void tx_set_idle(struct slgt_info *info)
3953{
3954 unsigned char val = 0xff;
3955
3956 switch(info->idle_mode)
3957 {
3958 case HDLC_TXIDLE_FLAGS: val = 0x7e; break;
3959 case HDLC_TXIDLE_ALT_ZEROS_ONES: val = 0xaa; break;
3960 case HDLC_TXIDLE_ZEROS: val = 0x00; break;
3961 case HDLC_TXIDLE_ONES: val = 0xff; break;
3962 case HDLC_TXIDLE_ALT_MARK_SPACE: val = 0xaa; break;
3963 case HDLC_TXIDLE_SPACE: val = 0x00; break;
3964 case HDLC_TXIDLE_MARK: val = 0xff; break;
3965 }
3966
3967 wr_reg8(info, TIR, val);
3968}
3969
3970/*
3971 * get state of V24 status (input) signals
3972 */
3973static void get_signals(struct slgt_info *info)
3974{
3975 unsigned short status = rd_reg16(info, SSR);
3976
3977 /* clear all serial signals except DTR and RTS */
3978 info->signals &= SerialSignal_DTR + SerialSignal_RTS;
3979
3980 if (status & BIT3)
3981 info->signals |= SerialSignal_DSR;
3982 if (status & BIT2)
3983 info->signals |= SerialSignal_CTS;
3984 if (status & BIT1)
3985 info->signals |= SerialSignal_DCD;
3986 if (status & BIT0)
3987 info->signals |= SerialSignal_RI;
3988}
3989
3990/*
3991 * set V.24 Control Register based on current configuration
3992 */
3993static void msc_set_vcr(struct slgt_info *info)
3994{
3995 unsigned char val = 0;
3996
3997 /* VCR (V.24 control)
3998 *
3999 * 07..04 serial IF select
4000 * 03 DTR
4001 * 02 RTS
4002 * 01 LL
4003 * 00 RL
4004 */
4005
4006 switch(info->if_mode & MGSL_INTERFACE_MASK)
4007 {
4008 case MGSL_INTERFACE_RS232:
4009 val |= BIT5; /* 0010 */
4010 break;
4011 case MGSL_INTERFACE_V35:
4012 val |= BIT7 + BIT6 + BIT5; /* 1110 */
4013 break;
4014 case MGSL_INTERFACE_RS422:
4015 val |= BIT6; /* 0100 */
4016 break;
4017 }
4018
4019 if (info->signals & SerialSignal_DTR)
4020 val |= BIT3;
4021 if (info->signals & SerialSignal_RTS)
4022 val |= BIT2;
4023 if (info->if_mode & MGSL_INTERFACE_LL)
4024 val |= BIT1;
4025 if (info->if_mode & MGSL_INTERFACE_RL)
4026 val |= BIT0;
4027 wr_reg8(info, VCR, val);
4028}
4029
4030/*
4031 * set state of V24 control (output) signals
4032 */
4033static void set_signals(struct slgt_info *info)
4034{
4035 unsigned char val = rd_reg8(info, VCR);
4036 if (info->signals & SerialSignal_DTR)
4037 val |= BIT3;
4038 else
4039 val &= ~BIT3;
4040 if (info->signals & SerialSignal_RTS)
4041 val |= BIT2;
4042 else
4043 val &= ~BIT2;
4044 wr_reg8(info, VCR, val);
4045}
4046
4047/*
4048 * free range of receive DMA buffers (i to last)
4049 */
4050static void free_rbufs(struct slgt_info *info, unsigned int i, unsigned int last)
4051{
4052 int done = 0;
4053
4054 while(!done) {
4055 /* reset current buffer for reuse */
4056 info->rbufs[i].status = 0;
4057 if (info->params.mode == MGSL_MODE_RAW)
4058 set_desc_count(info->rbufs[i], info->raw_rx_size);
4059 else
4060 set_desc_count(info->rbufs[i], DMABUFSIZE);
4061
4062 if (i == last)
4063 done = 1;
4064 if (++i == info->rbuf_count)
4065 i = 0;
4066 }
4067 info->rbuf_current = i;
4068}
4069
4070/*
4071 * mark all receive DMA buffers as free
4072 */
4073static void reset_rbufs(struct slgt_info *info)
4074{
4075 free_rbufs(info, 0, info->rbuf_count - 1);
4076}
4077
4078/*
4079 * pass receive HDLC frame to upper layer
4080 *
4081 * return 1 if frame available, otherwise 0
4082 */
4083static int rx_get_frame(struct slgt_info *info)
4084{
4085 unsigned int start, end;
4086 unsigned short status;
4087 unsigned int framesize = 0;
4088 int rc = 0;
4089 unsigned long flags;
4090 struct tty_struct *tty = info->tty;
4091 unsigned char addr_field = 0xff;
4092
4093check_again:
4094
4095 framesize = 0;
4096 addr_field = 0xff;
4097 start = end = info->rbuf_current;
4098
4099 for (;;) {
4100 if (!desc_complete(info->rbufs[end]))
4101 goto cleanup;
4102
4103 if (framesize == 0 && info->params.addr_filter != 0xff)
4104 addr_field = info->rbufs[end].buf[0];
4105
4106 framesize += desc_count(info->rbufs[end]);
4107
4108 if (desc_eof(info->rbufs[end]))
4109 break;
4110
4111 if (++end == info->rbuf_count)
4112 end = 0;
4113
4114 if (end == info->rbuf_current) {
4115 if (info->rx_enabled){
4116 spin_lock_irqsave(&info->lock,flags);
4117 rx_start(info);
4118 spin_unlock_irqrestore(&info->lock,flags);
4119 }
4120 goto cleanup;
4121 }
4122 }
4123
4124 /* status
4125 *
4126 * 15 buffer complete
4127 * 14..06 reserved
4128 * 05..04 residue
4129 * 02 eof (end of frame)
4130 * 01 CRC error
4131 * 00 abort
4132 */
4133 status = desc_status(info->rbufs[end]);
4134
4135 /* ignore CRC bit if not using CRC (bit is undefined) */
4136 if (info->params.crc_type == HDLC_CRC_NONE)
4137 status &= ~BIT1;
4138
4139 if (framesize == 0 ||
4140 (addr_field != 0xff && addr_field != info->params.addr_filter)) {
4141 free_rbufs(info, start, end);
4142 goto check_again;
4143 }
4144
4145 if (framesize < 2 || status & (BIT1+BIT0)) {
4146 if (framesize < 2 || (status & BIT0))
4147 info->icount.rxshort++;
4148 else
4149 info->icount.rxcrc++;
4150 framesize = 0;
4151
4152#ifdef CONFIG_HDLC
4153 {
4154 struct net_device_stats *stats = hdlc_stats(info->netdev);
4155 stats->rx_errors++;
4156 stats->rx_frame_errors++;
4157 }
4158#endif
4159 } else {
4160 /* adjust frame size for CRC, if any */
4161 if (info->params.crc_type == HDLC_CRC_16_CCITT)
4162 framesize -= 2;
4163 else if (info->params.crc_type == HDLC_CRC_32_CCITT)
4164 framesize -= 4;
4165 }
4166
4167 DBGBH(("%s rx frame status=%04X size=%d\n",
4168 info->device_name, status, framesize));
4169 DBGDATA(info, info->rbufs[start].buf, min_t(int, framesize, DMABUFSIZE), "rx");
4170
4171 if (framesize) {
4172 if (framesize > info->max_frame_size)
4173 info->icount.rxlong++;
4174 else {
4175 /* copy dma buffer(s) to contiguous temp buffer */
4176 int copy_count = framesize;
4177 int i = start;
4178 unsigned char *p = info->tmp_rbuf;
4179 info->tmp_rbuf_count = framesize;
4180
4181 info->icount.rxok++;
4182
4183 while(copy_count) {
4184 int partial_count = min(copy_count, DMABUFSIZE);
4185 memcpy(p, info->rbufs[i].buf, partial_count);
4186 p += partial_count;
4187 copy_count -= partial_count;
4188 if (++i == info->rbuf_count)
4189 i = 0;
4190 }
4191
4192#ifdef CONFIG_HDLC
4193 if (info->netcount)
4194 hdlcdev_rx(info,info->tmp_rbuf, framesize);
4195 else
4196#endif
4197 ldisc_receive_buf(tty, info->tmp_rbuf, info->flag_buf, framesize);
4198 }
4199 }
4200 free_rbufs(info, start, end);
4201 rc = 1;
4202
4203cleanup:
4204 return rc;
4205}
4206
4207/*
4208 * pass receive buffer (RAW synchronous mode) to tty layer
4209 * return 1 if buffer available, otherwise 0
4210 */
4211static int rx_get_buf(struct slgt_info *info)
4212{
4213 unsigned int i = info->rbuf_current;
4214
4215 if (!desc_complete(info->rbufs[i]))
4216 return 0;
4217 DBGDATA(info, info->rbufs[i].buf, desc_count(info->rbufs[i]), "rx");
4218 DBGINFO(("rx_get_buf size=%d\n", desc_count(info->rbufs[i])));
4219 ldisc_receive_buf(info->tty, info->rbufs[i].buf,
4220 info->flag_buf, desc_count(info->rbufs[i]));
4221 free_rbufs(info, i, i);
4222 return 1;
4223}
4224
4225static void reset_tbufs(struct slgt_info *info)
4226{
4227 unsigned int i;
4228 info->tbuf_current = 0;
4229 for (i=0 ; i < info->tbuf_count ; i++) {
4230 info->tbufs[i].status = 0;
4231 info->tbufs[i].count = 0;
4232 }
4233}
4234
4235/*
4236 * return number of free transmit DMA buffers
4237 */
4238static unsigned int free_tbuf_count(struct slgt_info *info)
4239{
4240 unsigned int count = 0;
4241 unsigned int i = info->tbuf_current;
4242
4243 do
4244 {
4245 if (desc_count(info->tbufs[i]))
4246 break; /* buffer in use */
4247 ++count;
4248 if (++i == info->tbuf_count)
4249 i=0;
4250 } while (i != info->tbuf_current);
4251
4252 /* last buffer with zero count may be in use, assume it is */
4253 if (count)
4254 --count;
4255
4256 return count;
4257}
4258
4259/*
4260 * load transmit DMA buffer(s) with data
4261 */
4262static void tx_load(struct slgt_info *info, const char *buf, unsigned int size)
4263{
4264 unsigned short count;
4265 unsigned int i;
4266 struct slgt_desc *d;
4267
4268 if (size == 0)
4269 return;
4270
4271 DBGDATA(info, buf, size, "tx");
4272
4273 info->tbuf_start = i = info->tbuf_current;
4274
4275 while (size) {
4276 d = &info->tbufs[i];
4277 if (++i == info->tbuf_count)
4278 i = 0;
4279
4280 count = (unsigned short)((size > DMABUFSIZE) ? DMABUFSIZE : size);
4281 memcpy(d->buf, buf, count);
4282
4283 size -= count;
4284 buf += count;
4285
4286 if (!size && info->params.mode != MGSL_MODE_RAW)
4287 set_desc_eof(*d, 1); /* HDLC: set EOF of last desc */
4288 else
4289 set_desc_eof(*d, 0);
4290
4291 set_desc_count(*d, count);
4292 }
4293
4294 info->tbuf_current = i;
4295}
4296
4297static int register_test(struct slgt_info *info)
4298{
4299 static unsigned short patterns[] =
4300 {0x0000, 0xffff, 0xaaaa, 0x5555, 0x6969, 0x9696};
4301 static unsigned int count = sizeof(patterns)/sizeof(patterns[0]);
4302 unsigned int i;
4303 int rc = 0;
4304
4305 for (i=0 ; i < count ; i++) {
4306 wr_reg16(info, TIR, patterns[i]);
4307 wr_reg16(info, BDR, patterns[(i+1)%count]);
4308 if ((rd_reg16(info, TIR) != patterns[i]) ||
4309 (rd_reg16(info, BDR) != patterns[(i+1)%count])) {
4310 rc = -ENODEV;
4311 break;
4312 }
4313 }
4314
4315 info->init_error = rc ? 0 : DiagStatus_AddressFailure;
4316 return rc;
4317}
4318
4319static int irq_test(struct slgt_info *info)
4320{
4321 unsigned long timeout;
4322 unsigned long flags;
4323 struct tty_struct *oldtty = info->tty;
4324 u32 speed = info->params.data_rate;
4325
4326 info->params.data_rate = 921600;
4327 info->tty = NULL;
4328
4329 spin_lock_irqsave(&info->lock, flags);
4330 async_mode(info);
4331 slgt_irq_on(info, IRQ_TXIDLE);
4332
4333 /* enable transmitter */
4334 wr_reg16(info, TCR,
4335 (unsigned short)(rd_reg16(info, TCR) | BIT1));
4336
4337 /* write one byte and wait for tx idle */
4338 wr_reg16(info, TDR, 0);
4339
4340 /* assume failure */
4341 info->init_error = DiagStatus_IrqFailure;
4342 info->irq_occurred = FALSE;
4343
4344 spin_unlock_irqrestore(&info->lock, flags);
4345
4346 timeout=100;
4347 while(timeout-- && !info->irq_occurred)
4348 msleep_interruptible(10);
4349
4350 spin_lock_irqsave(&info->lock,flags);
4351 reset_port(info);
4352 spin_unlock_irqrestore(&info->lock,flags);
4353
4354 info->params.data_rate = speed;
4355 info->tty = oldtty;
4356
4357 info->init_error = info->irq_occurred ? 0 : DiagStatus_IrqFailure;
4358 return info->irq_occurred ? 0 : -ENODEV;
4359}
4360
4361static int loopback_test_rx(struct slgt_info *info)
4362{
4363 unsigned char *src, *dest;
4364 int count;
4365
4366 if (desc_complete(info->rbufs[0])) {
4367 count = desc_count(info->rbufs[0]);
4368 src = info->rbufs[0].buf;
4369 dest = info->tmp_rbuf;
4370
4371 for( ; count ; count-=2, src+=2) {
4372 /* src=data byte (src+1)=status byte */
4373 if (!(*(src+1) & (BIT9 + BIT8))) {
4374 *dest = *src;
4375 dest++;
4376 info->tmp_rbuf_count++;
4377 }
4378 }
4379 DBGDATA(info, info->tmp_rbuf, info->tmp_rbuf_count, "rx");
4380 return 1;
4381 }
4382 return 0;
4383}
4384
4385static int loopback_test(struct slgt_info *info)
4386{
4387#define TESTFRAMESIZE 20
4388
4389 unsigned long timeout;
4390 u16 count = TESTFRAMESIZE;
4391 unsigned char buf[TESTFRAMESIZE];
4392 int rc = -ENODEV;
4393 unsigned long flags;
4394
4395 struct tty_struct *oldtty = info->tty;
4396 MGSL_PARAMS params;
4397
4398 memcpy(&params, &info->params, sizeof(params));
4399
4400 info->params.mode = MGSL_MODE_ASYNC;
4401 info->params.data_rate = 921600;
4402 info->params.loopback = 1;
4403 info->tty = NULL;
4404
4405 /* build and send transmit frame */
4406 for (count = 0; count < TESTFRAMESIZE; ++count)
4407 buf[count] = (unsigned char)count;
4408
4409 info->tmp_rbuf_count = 0;
4410 memset(info->tmp_rbuf, 0, TESTFRAMESIZE);
4411
4412 /* program hardware for HDLC and enabled receiver */
4413 spin_lock_irqsave(&info->lock,flags);
4414 async_mode(info);
4415 rx_start(info);
4416 info->tx_count = count;
4417 tx_load(info, buf, count);
4418 tx_start(info);
4419 spin_unlock_irqrestore(&info->lock, flags);
4420
4421 /* wait for receive complete */
4422 for (timeout = 100; timeout; --timeout) {
4423 msleep_interruptible(10);
4424 if (loopback_test_rx(info)) {
4425 rc = 0;
4426 break;
4427 }
4428 }
4429
4430 /* verify received frame length and contents */
4431 if (!rc && (info->tmp_rbuf_count != count ||
4432 memcmp(buf, info->tmp_rbuf, count))) {
4433 rc = -ENODEV;
4434 }
4435
4436 spin_lock_irqsave(&info->lock,flags);
4437 reset_adapter(info);
4438 spin_unlock_irqrestore(&info->lock,flags);
4439
4440 memcpy(&info->params, &params, sizeof(info->params));
4441 info->tty = oldtty;
4442
4443 info->init_error = rc ? DiagStatus_DmaFailure : 0;
4444 return rc;
4445}
4446
4447static int adapter_test(struct slgt_info *info)
4448{
4449 DBGINFO(("testing %s\n", info->device_name));
4450 if ((info->init_error = register_test(info)) < 0) {
4451 printk("register test failure %s addr=%08X\n",
4452 info->device_name, info->phys_reg_addr);
4453 } else if ((info->init_error = irq_test(info)) < 0) {
4454 printk("IRQ test failure %s IRQ=%d\n",
4455 info->device_name, info->irq_level);
4456 } else if ((info->init_error = loopback_test(info)) < 0) {
4457 printk("loopback test failure %s\n", info->device_name);
4458 }
4459 return info->init_error;
4460}
4461
4462/*
4463 * transmit timeout handler
4464 */
4465static void tx_timeout(unsigned long context)
4466{
4467 struct slgt_info *info = (struct slgt_info*)context;
4468 unsigned long flags;
4469
4470 DBGINFO(("%s tx_timeout\n", info->device_name));
4471 if(info->tx_active && info->params.mode == MGSL_MODE_HDLC) {
4472 info->icount.txtimeout++;
4473 }
4474 spin_lock_irqsave(&info->lock,flags);
4475 info->tx_active = 0;
4476 info->tx_count = 0;
4477 spin_unlock_irqrestore(&info->lock,flags);
4478
4479#ifdef CONFIG_HDLC
4480 if (info->netcount)
4481 hdlcdev_tx_done(info);
4482 else
4483#endif
4484 bh_transmit(info);
4485}
4486
4487/*
4488 * receive buffer polling timer
4489 */
4490static void rx_timeout(unsigned long context)
4491{
4492 struct slgt_info *info = (struct slgt_info*)context;
4493 unsigned long flags;
4494
4495 DBGINFO(("%s rx_timeout\n", info->device_name));
4496 spin_lock_irqsave(&info->lock, flags);
4497 info->pending_bh |= BH_RECEIVE;
4498 spin_unlock_irqrestore(&info->lock, flags);
4499 bh_handler(info);
4500}
4501
diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile
index 2392e404e8d1..ba4582d160fd 100644
--- a/drivers/char/tpm/Makefile
+++ b/drivers/char/tpm/Makefile
@@ -2,6 +2,9 @@
2# Makefile for the kernel tpm device drivers. 2# Makefile for the kernel tpm device drivers.
3# 3#
4obj-$(CONFIG_TCG_TPM) += tpm.o 4obj-$(CONFIG_TCG_TPM) += tpm.o
5ifdef CONFIG_ACPI
6 obj-$(CONFIG_TCG_TPM) += tpm_bios.o
7endif
5obj-$(CONFIG_TCG_NSC) += tpm_nsc.o 8obj-$(CONFIG_TCG_NSC) += tpm_nsc.o
6obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o 9obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o
7obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o 10obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o
diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
index a9be0e8eaea5..5a3870477ef1 100644
--- a/drivers/char/tpm/tpm.c
+++ b/drivers/char/tpm/tpm.c
@@ -466,6 +466,7 @@ void tpm_remove_hardware(struct device *dev)
466 kfree(chip->vendor->miscdev.name); 466 kfree(chip->vendor->miscdev.name);
467 467
468 sysfs_remove_group(&dev->kobj, chip->vendor->attr_group); 468 sysfs_remove_group(&dev->kobj, chip->vendor->attr_group);
469 tpm_bios_log_teardown(chip->bios_dir);
469 470
470 dev_mask[chip->dev_num / TPM_NUM_MASK_ENTRIES ] &= 471 dev_mask[chip->dev_num / TPM_NUM_MASK_ENTRIES ] &=
471 ~(1 << (chip->dev_num % TPM_NUM_MASK_ENTRIES)); 472 ~(1 << (chip->dev_num % TPM_NUM_MASK_ENTRIES));
@@ -593,6 +594,8 @@ dev_num_search_complete:
593 594
594 sysfs_create_group(&dev->kobj, chip->vendor->attr_group); 595 sysfs_create_group(&dev->kobj, chip->vendor->attr_group);
595 596
597 chip->bios_dir = tpm_bios_log_setup(devname);
598
596 return 0; 599 return 0;
597} 600}
598EXPORT_SYMBOL_GPL(tpm_register_hardware); 601EXPORT_SYMBOL_GPL(tpm_register_hardware);
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index 159882ca69dd..fd3a4beaa53d 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -82,6 +82,8 @@ struct tpm_chip {
82 82
83 struct tpm_vendor_specific *vendor; 83 struct tpm_vendor_specific *vendor;
84 84
85 struct dentry **bios_dir;
86
85 struct list_head list; 87 struct list_head list;
86}; 88};
87 89
@@ -107,3 +109,16 @@ extern ssize_t tpm_read(struct file *, char __user *, size_t, loff_t *);
107extern void tpm_remove_hardware(struct device *); 109extern void tpm_remove_hardware(struct device *);
108extern int tpm_pm_suspend(struct device *, pm_message_t); 110extern int tpm_pm_suspend(struct device *, pm_message_t);
109extern int tpm_pm_resume(struct device *); 111extern int tpm_pm_resume(struct device *);
112
113#ifdef CONFIG_ACPI
114extern struct dentry ** tpm_bios_log_setup(char *);
115extern void tpm_bios_log_teardown(struct dentry **);
116#else
117static inline struct dentry* tpm_bios_log_setup(char *name)
118{
119 return NULL;
120}
121static inline void tpm_bios_log_teardown(struct dentry **dir)
122{
123}
124#endif
diff --git a/drivers/char/tpm/tpm_bios.c b/drivers/char/tpm/tpm_bios.c
new file mode 100644
index 000000000000..aedf7a8e6da7
--- /dev/null
+++ b/drivers/char/tpm/tpm_bios.c
@@ -0,0 +1,540 @@
1/*
2 * Copyright (C) 2005 IBM Corporation
3 *
4 * Authors:
5 * Seiji Munetoh <munetoh@jp.ibm.com>
6 * Stefan Berger <stefanb@us.ibm.com>
7 * Reiner Sailer <sailer@watson.ibm.com>
8 * Kylene Hall <kjhall@us.ibm.com>
9 *
10 * Access to the eventlog extended by the TCG BIOS of PC platform
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 */
18
19#include <linux/seq_file.h>
20#include <linux/fs.h>
21#include <linux/security.h>
22#include <linux/module.h>
23#include <acpi/acpi.h>
24#include <acpi/actypes.h>
25#include <acpi/actbl.h>
26#include "tpm.h"
27
28#define TCG_EVENT_NAME_LEN_MAX 255
29#define MAX_TEXT_EVENT 1000 /* Max event string length */
30#define ACPI_TCPA_SIG "TCPA" /* 0x41504354 /'TCPA' */
31
32struct tpm_bios_log {
33 void *bios_event_log;
34 void *bios_event_log_end;
35};
36
37struct acpi_tcpa {
38 struct acpi_table_header hdr;
39 u16 reserved;
40 u32 log_max_len __attribute__ ((packed));
41 u32 log_start_addr __attribute__ ((packed));
42};
43
44struct tcpa_event {
45 u32 pcr_index;
46 u32 event_type;
47 u8 pcr_value[20]; /* SHA1 */
48 u32 event_size;
49 u8 event_data[0];
50};
51
52enum tcpa_event_types {
53 PREBOOT = 0,
54 POST_CODE,
55 UNUSED,
56 NO_ACTION,
57 SEPARATOR,
58 ACTION,
59 EVENT_TAG,
60 SCRTM_CONTENTS,
61 SCRTM_VERSION,
62 CPU_MICROCODE,
63 PLATFORM_CONFIG_FLAGS,
64 TABLE_OF_DEVICES,
65 COMPACT_HASH,
66 IPL,
67 IPL_PARTITION_DATA,
68 NONHOST_CODE,
69 NONHOST_CONFIG,
70 NONHOST_INFO,
71};
72
73static const char* tcpa_event_type_strings[] = {
74 "PREBOOT",
75 "POST CODE",
76 "",
77 "NO ACTION",
78 "SEPARATOR",
79 "ACTION",
80 "EVENT TAG",
81 "S-CRTM Contents",
82 "S-CRTM Version",
83 "CPU Microcode",
84 "Platform Config Flags",
85 "Table of Devices",
86 "Compact Hash",
87 "IPL",
88 "IPL Partition Data",
89 "Non-Host Code",
90 "Non-Host Config",
91 "Non-Host Info"
92};
93
94enum tcpa_pc_event_ids {
95 SMBIOS = 1,
96 BIS_CERT,
97 POST_BIOS_ROM,
98 ESCD,
99 CMOS,
100 NVRAM,
101 OPTION_ROM_EXEC,
102 OPTION_ROM_CONFIG,
103 OPTION_ROM_MICROCODE,
104 S_CRTM_VERSION,
105 S_CRTM_CONTENTS,
106 POST_CONTENTS,
107};
108
109static const char* tcpa_pc_event_id_strings[] = {
110 ""
111 "SMBIOS",
112 "BIS Certificate",
113 "POST BIOS ",
114 "ESCD ",
115 "CMOS",
116 "NVRAM",
117 "Option ROM",
118 "Option ROM config",
119 "Option ROM microcode",
120 "S-CRTM Version",
121 "S-CRTM Contents",
122 "S-CRTM POST Contents",
123};
124
125/* returns pointer to start of pos. entry of tcg log */
126static void *tpm_bios_measurements_start(struct seq_file *m, loff_t *pos)
127{
128 loff_t i;
129 struct tpm_bios_log *log = m->private;
130 void *addr = log->bios_event_log;
131 void *limit = log->bios_event_log_end;
132 struct tcpa_event *event;
133
134 /* read over *pos measurements */
135 for (i = 0; i < *pos; i++) {
136 event = addr;
137
138 if ((addr + sizeof(struct tcpa_event)) < limit) {
139 if (event->event_type == 0 && event->event_size == 0)
140 return NULL;
141 addr += sizeof(struct tcpa_event) + event->event_size;
142 }
143 }
144
145 /* now check if current entry is valid */
146 if ((addr + sizeof(struct tcpa_event)) >= limit)
147 return NULL;
148
149 event = addr;
150
151 if ((event->event_type == 0 && event->event_size == 0) ||
152 ((addr + sizeof(struct tcpa_event) + event->event_size) >= limit))
153 return NULL;
154
155 return addr;
156}
157
158static void *tpm_bios_measurements_next(struct seq_file *m, void *v,
159 loff_t *pos)
160{
161 struct tcpa_event *event = v;
162 struct tpm_bios_log *log = m->private;
163 void *limit = log->bios_event_log_end;
164
165 v += sizeof(struct tcpa_event) + event->event_size;
166
167 /* now check if current entry is valid */
168 if ((v + sizeof(struct tcpa_event)) >= limit)
169 return NULL;
170
171 event = v;
172
173 if (event->event_type == 0 && event->event_size == 0)
174 return NULL;
175
176 if ((event->event_type == 0 && event->event_size == 0) ||
177 ((v + sizeof(struct tcpa_event) + event->event_size) >= limit))
178 return NULL;
179
180 (*pos)++;
181 return v;
182}
183
184static void tpm_bios_measurements_stop(struct seq_file *m, void *v)
185{
186}
187
188static int get_event_name(char *dest, struct tcpa_event *event,
189 unsigned char * event_entry)
190{
191 const char *name = "";
192 char data[40] = "";
193 int i, n_len = 0, d_len = 0;
194 u32 event_id, event_data_size;
195
196 switch(event->event_type) {
197 case PREBOOT:
198 case POST_CODE:
199 case UNUSED:
200 case NO_ACTION:
201 case SCRTM_CONTENTS:
202 case SCRTM_VERSION:
203 case CPU_MICROCODE:
204 case PLATFORM_CONFIG_FLAGS:
205 case TABLE_OF_DEVICES:
206 case COMPACT_HASH:
207 case IPL:
208 case IPL_PARTITION_DATA:
209 case NONHOST_CODE:
210 case NONHOST_CONFIG:
211 case NONHOST_INFO:
212 name = tcpa_event_type_strings[event->event_type];
213 n_len = strlen(name);
214 break;
215 case SEPARATOR:
216 case ACTION:
217 if (MAX_TEXT_EVENT > event->event_size) {
218 name = event_entry;
219 n_len = event->event_size;
220 }
221 break;
222 case EVENT_TAG:
223 event_id = be32_to_cpu(event_entry);
224 event_data_size = be32_to_cpu(&event_entry[4]);
225
226 /* ToDo Row data -> Base64 */
227
228 switch (event_id) {
229 case SMBIOS:
230 case BIS_CERT:
231 case CMOS:
232 case NVRAM:
233 case OPTION_ROM_EXEC:
234 case OPTION_ROM_CONFIG:
235 case OPTION_ROM_MICROCODE:
236 case S_CRTM_VERSION:
237 case S_CRTM_CONTENTS:
238 case POST_CONTENTS:
239 name = tcpa_pc_event_id_strings[event_id];
240 n_len = strlen(name);
241 break;
242 case POST_BIOS_ROM:
243 case ESCD:
244 name = tcpa_pc_event_id_strings[event_id];
245 n_len = strlen(name);
246 for (i = 0; i < 20; i++)
247 d_len += sprintf(data, "%02x",
248 event_entry[8 + i]);
249 break;
250 default:
251 break;
252 }
253 default:
254 break;
255 }
256
257 return snprintf(dest, MAX_TEXT_EVENT, "[%.*s%.*s]",
258 n_len, name, d_len, data);
259
260}
261
262static int tpm_binary_bios_measurements_show(struct seq_file *m, void *v)
263{
264
265 char *eventname;
266 char data[4];
267 u32 help;
268 int i, len;
269 struct tcpa_event *event = (struct tcpa_event *) v;
270 unsigned char *event_entry =
271 (unsigned char *) (v + sizeof(struct tcpa_event));
272
273 eventname = kmalloc(MAX_TEXT_EVENT, GFP_KERNEL);
274 if (!eventname) {
275 printk(KERN_ERR "%s: ERROR - No Memory for event name\n ",
276 __func__);
277 return -ENOMEM;
278 }
279
280 /* 1st: PCR used is in little-endian format (4 bytes) */
281 help = le32_to_cpu(event->pcr_index);
282 memcpy(data, &help, 4);
283 for (i = 0; i < 4; i++)
284 seq_putc(m, data[i]);
285
286 /* 2nd: SHA1 (20 bytes) */
287 for (i = 0; i < 20; i++)
288 seq_putc(m, event->pcr_value[i]);
289
290 /* 3rd: event type identifier (4 bytes) */
291 help = le32_to_cpu(event->event_type);
292 memcpy(data, &help, 4);
293 for (i = 0; i < 4; i++)
294 seq_putc(m, data[i]);
295
296 len = 0;
297
298 len += get_event_name(eventname, event, event_entry);
299
300 /* 4th: filename <= 255 + \'0' delimiter */
301 if (len > TCG_EVENT_NAME_LEN_MAX)
302 len = TCG_EVENT_NAME_LEN_MAX;
303
304 for (i = 0; i < len; i++)
305 seq_putc(m, eventname[i]);
306
307 /* 5th: delimiter */
308 seq_putc(m, '\0');
309
310 return 0;
311}
312
313static int tpm_bios_measurements_release(struct inode *inode,
314 struct file *file)
315{
316 struct seq_file *seq = file->private_data;
317 struct tpm_bios_log *log = seq->private;
318
319 if (log) {
320 kfree(log->bios_event_log);
321 kfree(log);
322 }
323
324 return seq_release(inode, file);
325}
326
327static int tpm_ascii_bios_measurements_show(struct seq_file *m, void *v)
328{
329 int len = 0;
330 int i;
331 char *eventname;
332 struct tcpa_event *event = v;
333 unsigned char *event_entry =
334 (unsigned char *) (v + sizeof(struct tcpa_event));
335
336 eventname = kmalloc(MAX_TEXT_EVENT, GFP_KERNEL);
337 if (!eventname) {
338 printk(KERN_ERR "%s: ERROR - No Memory for event name\n ",
339 __func__);
340 return -EFAULT;
341 }
342
343 seq_printf(m, "%2d ", event->pcr_index);
344
345 /* 2nd: SHA1 */
346 for (i = 0; i < 20; i++)
347 seq_printf(m, "%02x", event->pcr_value[i]);
348
349 /* 3rd: event type identifier */
350 seq_printf(m, " %02x", event->event_type);
351
352 len += get_event_name(eventname, event, event_entry);
353
354 /* 4th: eventname <= max + \'0' delimiter */
355 seq_printf(m, " %s\n", eventname);
356
357 return 0;
358}
359
360static struct seq_operations tpm_ascii_b_measurments_seqops = {
361 .start = tpm_bios_measurements_start,
362 .next = tpm_bios_measurements_next,
363 .stop = tpm_bios_measurements_stop,
364 .show = tpm_ascii_bios_measurements_show,
365};
366
367static struct seq_operations tpm_binary_b_measurments_seqops = {
368 .start = tpm_bios_measurements_start,
369 .next = tpm_bios_measurements_next,
370 .stop = tpm_bios_measurements_stop,
371 .show = tpm_binary_bios_measurements_show,
372};
373
374/* read binary bios log */
375static int read_log(struct tpm_bios_log *log)
376{
377 struct acpi_tcpa *buff;
378 acpi_status status;
379 void *virt;
380
381 if (log->bios_event_log != NULL) {
382 printk(KERN_ERR
383 "%s: ERROR - Eventlog already initialized\n",
384 __func__);
385 return -EFAULT;
386 }
387
388 /* Find TCPA entry in RSDT (ACPI_LOGICAL_ADDRESSING) */
389 status = acpi_get_firmware_table(ACPI_TCPA_SIG, 1,
390 ACPI_LOGICAL_ADDRESSING,
391 (struct acpi_table_header **)
392 &buff);
393
394 if (ACPI_FAILURE(status)) {
395 printk(KERN_ERR "%s: ERROR - Could not get TCPA table\n",
396 __func__);
397 return -EIO;
398 }
399
400 if (buff->log_max_len == 0) {
401 printk(KERN_ERR "%s: ERROR - TCPA log area empty\n", __func__);
402 return -EIO;
403 }
404
405 /* malloc EventLog space */
406 log->bios_event_log = kmalloc(buff->log_max_len, GFP_KERNEL);
407 if (!log->bios_event_log) {
408 printk
409 ("%s: ERROR - Not enough Memory for BIOS measurements\n",
410 __func__);
411 return -ENOMEM;
412 }
413
414 log->bios_event_log_end = log->bios_event_log + buff->log_max_len;
415
416 acpi_os_map_memory(buff->log_start_addr, buff->log_max_len, &virt);
417
418 memcpy(log->bios_event_log, virt, buff->log_max_len);
419
420 acpi_os_unmap_memory(virt, buff->log_max_len);
421 return 0;
422}
423
424static int tpm_ascii_bios_measurements_open(struct inode *inode,
425 struct file *file)
426{
427 int err;
428 struct tpm_bios_log *log;
429 struct seq_file *seq;
430
431 log = kzalloc(sizeof(struct tpm_bios_log), GFP_KERNEL);
432 if (!log)
433 return -ENOMEM;
434
435 if ((err = read_log(log)))
436 return err;
437
438 /* now register seq file */
439 err = seq_open(file, &tpm_ascii_b_measurments_seqops);
440 if (!err) {
441 seq = file->private_data;
442 seq->private = log;
443 } else {
444 kfree(log->bios_event_log);
445 kfree(log);
446 }
447 return err;
448}
449
450struct file_operations tpm_ascii_bios_measurements_ops = {
451 .open = tpm_ascii_bios_measurements_open,
452 .read = seq_read,
453 .llseek = seq_lseek,
454 .release = tpm_bios_measurements_release,
455};
456
457static int tpm_binary_bios_measurements_open(struct inode *inode,
458 struct file *file)
459{
460 int err;
461 struct tpm_bios_log *log;
462 struct seq_file *seq;
463
464 log = kzalloc(sizeof(struct tpm_bios_log), GFP_KERNEL);
465 if (!log)
466 return -ENOMEM;
467
468 if ((err = read_log(log)))
469 return err;
470
471 /* now register seq file */
472 err = seq_open(file, &tpm_binary_b_measurments_seqops);
473 if (!err) {
474 seq = file->private_data;
475 seq->private = log;
476 } else {
477 kfree(log->bios_event_log);
478 kfree(log);
479 }
480 return err;
481}
482
483struct file_operations tpm_binary_bios_measurements_ops = {
484 .open = tpm_binary_bios_measurements_open,
485 .read = seq_read,
486 .llseek = seq_lseek,
487 .release = tpm_bios_measurements_release,
488};
489
490struct dentry **tpm_bios_log_setup(char *name)
491{
492 struct dentry **ret = NULL, *tpm_dir, *bin_file, *ascii_file;
493
494 tpm_dir = securityfs_create_dir(name, NULL);
495 if (!tpm_dir)
496 goto out;
497
498 bin_file =
499 securityfs_create_file("binary_bios_measurements",
500 S_IRUSR | S_IRGRP, tpm_dir, NULL,
501 &tpm_binary_bios_measurements_ops);
502 if (!bin_file)
503 goto out_tpm;
504
505 ascii_file =
506 securityfs_create_file("ascii_bios_measurements",
507 S_IRUSR | S_IRGRP, tpm_dir, NULL,
508 &tpm_ascii_bios_measurements_ops);
509 if (!ascii_file)
510 goto out_bin;
511
512 ret = kmalloc(3 * sizeof(struct dentry *), GFP_KERNEL);
513 if (!ret)
514 goto out_ascii;
515
516 ret[0] = ascii_file;
517 ret[1] = bin_file;
518 ret[2] = tpm_dir;
519
520 return ret;
521
522out_ascii:
523 securityfs_remove(ascii_file);
524out_bin:
525 securityfs_remove(bin_file);
526out_tpm:
527 securityfs_remove(tpm_dir);
528out:
529 return NULL;
530}
531EXPORT_SYMBOL_GPL(tpm_bios_log_setup);
532
533void tpm_bios_log_teardown(struct dentry **lst)
534{
535 int i;
536
537 for (i = 0; i < 3; i++)
538 securityfs_remove(lst[i]);
539}
540EXPORT_SYMBOL_GPL(tpm_bios_log_teardown);
diff --git a/drivers/char/vr41xx_giu.c b/drivers/char/vr41xx_giu.c
index 9ac6d43437b3..a5b18e086a94 100644
--- a/drivers/char/vr41xx_giu.c
+++ b/drivers/char/vr41xx_giu.c
@@ -718,7 +718,7 @@ static struct platform_driver giu_device_driver = {
718 }, 718 },
719}; 719};
720 720
721static int __devinit vr41xx_giu_init(void) 721static int __init vr41xx_giu_init(void)
722{ 722{
723 int retval; 723 int retval;
724 724
@@ -733,7 +733,7 @@ static int __devinit vr41xx_giu_init(void)
733 return retval; 733 return retval;
734} 734}
735 735
736static void __devexit vr41xx_giu_exit(void) 736static void __exit vr41xx_giu_exit(void)
737{ 737{
738 platform_driver_unregister(&giu_device_driver); 738 platform_driver_unregister(&giu_device_driver);
739 739
diff --git a/drivers/char/watchdog/wdt977.c b/drivers/char/watchdog/wdt977.c
index 44d49dfacbb3..3843900e94c4 100644
--- a/drivers/char/watchdog/wdt977.c
+++ b/drivers/char/watchdog/wdt977.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Wdt977 0.03: A Watchdog Device for Netwinder W83977AF chip 2 * Wdt977 0.04: A Watchdog Device for Netwinder W83977AF chip
3 * 3 *
4 * (c) Copyright 1998 Rebel.com (Woody Suwalski <woody@netwinder.org>) 4 * (c) Copyright 1998 Rebel.com (Woody Suwalski <woody@netwinder.org>)
5 * 5 *
@@ -18,6 +18,8 @@
18 * from minutes to seconds. 18 * from minutes to seconds.
19 * 07-Jul-2003 Daniele Bellucci: Audit return code of misc_register in 19 * 07-Jul-2003 Daniele Bellucci: Audit return code of misc_register in
20 * nwwatchdog_init. 20 * nwwatchdog_init.
21 * 25-Oct-2005 Woody Suwalski: Convert addresses to #defs, add spinlocks
22 * remove limitiation to be used on Netwinders only
21 */ 23 */
22 24
23#include <linux/module.h> 25#include <linux/module.h>
@@ -28,6 +30,7 @@
28#include <linux/fs.h> 30#include <linux/fs.h>
29#include <linux/miscdevice.h> 31#include <linux/miscdevice.h>
30#include <linux/init.h> 32#include <linux/init.h>
33#include <linux/ioport.h>
31#include <linux/watchdog.h> 34#include <linux/watchdog.h>
32#include <linux/notifier.h> 35#include <linux/notifier.h>
33#include <linux/reboot.h> 36#include <linux/reboot.h>
@@ -37,8 +40,18 @@
37#include <asm/mach-types.h> 40#include <asm/mach-types.h>
38#include <asm/uaccess.h> 41#include <asm/uaccess.h>
39 42
40#define PFX "Wdt977: " 43#define WATCHDOG_VERSION "0.04"
41#define WATCHDOG_MINOR 130 44#define WATCHDOG_NAME "Wdt977"
45#define PFX WATCHDOG_NAME ": "
46#define DRIVER_VERSION WATCHDOG_NAME " driver, v" WATCHDOG_VERSION "\n"
47
48#define IO_INDEX_PORT 0x370 /* on some systems it can be 0x3F0 */
49#define IO_DATA_PORT (IO_INDEX_PORT+1)
50
51#define UNLOCK_DATA 0x87
52#define LOCK_DATA 0xAA
53#define DEVICE_REGISTER 0x07
54
42 55
43#define DEFAULT_TIMEOUT 60 /* default timeout in seconds */ 56#define DEFAULT_TIMEOUT 60 /* default timeout in seconds */
44 57
@@ -47,6 +60,7 @@ static int timeoutM; /* timeout in minutes */
47static unsigned long timer_alive; 60static unsigned long timer_alive;
48static int testmode; 61static int testmode;
49static char expect_close; 62static char expect_close;
63static spinlock_t spinlock;
50 64
51module_param(timeout, int, 0); 65module_param(timeout, int, 0);
52MODULE_PARM_DESC(timeout,"Watchdog timeout in seconds (60..15300), default=" __MODULE_STRING(DEFAULT_TIMEOUT) ")"); 66MODULE_PARM_DESC(timeout,"Watchdog timeout in seconds (60..15300), default=" __MODULE_STRING(DEFAULT_TIMEOUT) ")");
@@ -63,9 +77,13 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=CON
63 77
64static int wdt977_start(void) 78static int wdt977_start(void)
65{ 79{
80 unsigned long flags;
81
82 spin_lock_irqsave(&spinlock, flags);
83
66 /* unlock the SuperIO chip */ 84 /* unlock the SuperIO chip */
67 outb(0x87,0x370); 85 outb_p(UNLOCK_DATA, IO_INDEX_PORT);
68 outb(0x87,0x370); 86 outb_p(UNLOCK_DATA, IO_INDEX_PORT);
69 87
70 /* select device Aux2 (device=8) and set watchdog regs F2, F3 and F4 88 /* select device Aux2 (device=8) and set watchdog regs F2, F3 and F4
71 * F2 has the timeout in minutes 89 * F2 has the timeout in minutes
@@ -73,28 +91,29 @@ static int wdt977_start(void)
73 * at timeout, and to reset timer on kbd/mouse activity (not impl.) 91 * at timeout, and to reset timer on kbd/mouse activity (not impl.)
74 * F4 is used to just clear the TIMEOUT'ed state (bit 0) 92 * F4 is used to just clear the TIMEOUT'ed state (bit 0)
75 */ 93 */
76 outb(0x07,0x370); 94 outb_p(DEVICE_REGISTER, IO_INDEX_PORT);
77 outb(0x08,0x371); 95 outb_p(0x08, IO_DATA_PORT);
78 outb(0xF2,0x370); 96 outb_p(0xF2, IO_INDEX_PORT);
79 outb(timeoutM,0x371); 97 outb_p(timeoutM, IO_DATA_PORT);
80 outb(0xF3,0x370); 98 outb_p(0xF3, IO_INDEX_PORT);
81 outb(0x00,0x371); /* another setting is 0E for kbd/mouse/LED */ 99 outb_p(0x00, IO_DATA_PORT); /* another setting is 0E for kbd/mouse/LED */
82 outb(0xF4,0x370); 100 outb_p(0xF4, IO_INDEX_PORT);
83 outb(0x00,0x371); 101 outb_p(0x00, IO_DATA_PORT);
84 102
85 /* at last select device Aux1 (dev=7) and set GP16 as a watchdog output */ 103 /* at last select device Aux1 (dev=7) and set GP16 as a watchdog output */
86 /* in test mode watch the bit 1 on F4 to indicate "triggered" */ 104 /* in test mode watch the bit 1 on F4 to indicate "triggered" */
87 if (!testmode) 105 if (!testmode)
88 { 106 {
89 outb(0x07,0x370); 107 outb_p(DEVICE_REGISTER, IO_INDEX_PORT);
90 outb(0x07,0x371); 108 outb_p(0x07, IO_DATA_PORT);
91 outb(0xE6,0x370); 109 outb_p(0xE6, IO_INDEX_PORT);
92 outb(0x08,0x371); 110 outb_p(0x08, IO_DATA_PORT);
93 } 111 }
94 112
95 /* lock the SuperIO chip */ 113 /* lock the SuperIO chip */
96 outb(0xAA,0x370); 114 outb_p(LOCK_DATA, IO_INDEX_PORT);
97 115
116 spin_unlock_irqrestore(&spinlock, flags);
98 printk(KERN_INFO PFX "activated.\n"); 117 printk(KERN_INFO PFX "activated.\n");
99 118
100 return 0; 119 return 0;
@@ -106,35 +125,39 @@ static int wdt977_start(void)
106 125
107static int wdt977_stop(void) 126static int wdt977_stop(void)
108{ 127{
128 unsigned long flags;
129 spin_lock_irqsave(&spinlock, flags);
130
109 /* unlock the SuperIO chip */ 131 /* unlock the SuperIO chip */
110 outb(0x87,0x370); 132 outb_p(UNLOCK_DATA, IO_INDEX_PORT);
111 outb(0x87,0x370); 133 outb_p(UNLOCK_DATA, IO_INDEX_PORT);
112 134
113 /* select device Aux2 (device=8) and set watchdog regs F2,F3 and F4 135 /* select device Aux2 (device=8) and set watchdog regs F2,F3 and F4
114 * F3 is reset to its default state 136 * F3 is reset to its default state
115 * F4 can clear the TIMEOUT'ed state (bit 0) - back to default 137 * F4 can clear the TIMEOUT'ed state (bit 0) - back to default
116 * We can not use GP17 as a PowerLed, as we use its usage as a RedLed 138 * We can not use GP17 as a PowerLed, as we use its usage as a RedLed
117 */ 139 */
118 outb(0x07,0x370); 140 outb_p(DEVICE_REGISTER, IO_INDEX_PORT);
119 outb(0x08,0x371); 141 outb_p(0x08, IO_DATA_PORT);
120 outb(0xF2,0x370); 142 outb_p(0xF2, IO_INDEX_PORT);
121 outb(0xFF,0x371); 143 outb_p(0xFF, IO_DATA_PORT);
122 outb(0xF3,0x370); 144 outb_p(0xF3, IO_INDEX_PORT);
123 outb(0x00,0x371); 145 outb_p(0x00, IO_DATA_PORT);
124 outb(0xF4,0x370); 146 outb_p(0xF4, IO_INDEX_PORT);
125 outb(0x00,0x371); 147 outb_p(0x00, IO_DATA_PORT);
126 outb(0xF2,0x370); 148 outb_p(0xF2, IO_INDEX_PORT);
127 outb(0x00,0x371); 149 outb_p(0x00, IO_DATA_PORT);
128 150
129 /* at last select device Aux1 (dev=7) and set GP16 as a watchdog output */ 151 /* at last select device Aux1 (dev=7) and set GP16 as a watchdog output */
130 outb(0x07,0x370); 152 outb_p(DEVICE_REGISTER, IO_INDEX_PORT);
131 outb(0x07,0x371); 153 outb_p(0x07, IO_DATA_PORT);
132 outb(0xE6,0x370); 154 outb_p(0xE6, IO_INDEX_PORT);
133 outb(0x08,0x371); 155 outb_p(0x08, IO_DATA_PORT);
134 156
135 /* lock the SuperIO chip */ 157 /* lock the SuperIO chip */
136 outb(0xAA,0x370); 158 outb_p(LOCK_DATA, IO_INDEX_PORT);
137 159
160 spin_unlock_irqrestore(&spinlock, flags);
138 printk(KERN_INFO PFX "shutdown.\n"); 161 printk(KERN_INFO PFX "shutdown.\n");
139 162
140 return 0; 163 return 0;
@@ -147,19 +170,23 @@ static int wdt977_stop(void)
147 170
148static int wdt977_keepalive(void) 171static int wdt977_keepalive(void)
149{ 172{
173 unsigned long flags;
174 spin_lock_irqsave(&spinlock, flags);
175
150 /* unlock the SuperIO chip */ 176 /* unlock the SuperIO chip */
151 outb(0x87,0x370); 177 outb_p(UNLOCK_DATA, IO_INDEX_PORT);
152 outb(0x87,0x370); 178 outb_p(UNLOCK_DATA, IO_INDEX_PORT);
153 179
154 /* select device Aux2 (device=8) and kicks watchdog reg F2 */ 180 /* select device Aux2 (device=8) and kicks watchdog reg F2 */
155 /* F2 has the timeout in minutes */ 181 /* F2 has the timeout in minutes */
156 outb(0x07,0x370); 182 outb_p(DEVICE_REGISTER, IO_INDEX_PORT);
157 outb(0x08,0x371); 183 outb_p(0x08, IO_DATA_PORT);
158 outb(0xF2,0x370); 184 outb_p(0xF2, IO_INDEX_PORT);
159 outb(timeoutM,0x371); 185 outb_p(timeoutM, IO_DATA_PORT);
160 186
161 /* lock the SuperIO chip */ 187 /* lock the SuperIO chip */
162 outb(0xAA,0x370); 188 outb_p(LOCK_DATA, IO_INDEX_PORT);
189 spin_unlock_irqrestore(&spinlock, flags);
163 190
164 return 0; 191 return 0;
165} 192}
@@ -198,22 +225,26 @@ static int wdt977_set_timeout(int t)
198static int wdt977_get_status(int *status) 225static int wdt977_get_status(int *status)
199{ 226{
200 int new_status; 227 int new_status;
228 unsigned long flags;
201 229
202 *status=0; 230 spin_lock_irqsave(&spinlock, flags);
203 231
204 /* unlock the SuperIO chip */ 232 /* unlock the SuperIO chip */
205 outb(0x87,0x370); 233 outb_p(UNLOCK_DATA, IO_INDEX_PORT);
206 outb(0x87,0x370); 234 outb_p(UNLOCK_DATA, IO_INDEX_PORT);
207 235
208 /* select device Aux2 (device=8) and read watchdog reg F4 */ 236 /* select device Aux2 (device=8) and read watchdog reg F4 */
209 outb(0x07,0x370); 237 outb_p(DEVICE_REGISTER, IO_INDEX_PORT);
210 outb(0x08,0x371); 238 outb_p(0x08, IO_DATA_PORT);
211 outb(0xF4,0x370); 239 outb_p(0xF4, IO_INDEX_PORT);
212 new_status = inb(0x371); 240 new_status = inb_p(IO_DATA_PORT);
213 241
214 /* lock the SuperIO chip */ 242 /* lock the SuperIO chip */
215 outb(0xAA,0x370); 243 outb_p(LOCK_DATA, IO_INDEX_PORT);
216 244
245 spin_unlock_irqrestore(&spinlock, flags);
246
247 *status=0;
217 if (new_status & 1) 248 if (new_status & 1)
218 *status |= WDIOF_CARDRESET; 249 *status |= WDIOF_CARDRESET;
219 250
@@ -249,8 +280,8 @@ static int wdt977_release(struct inode *inode, struct file *file)
249 wdt977_stop(); 280 wdt977_stop();
250 clear_bit(0,&timer_alive); 281 clear_bit(0,&timer_alive);
251 } else { 282 } else {
252 printk(KERN_CRIT PFX "Unexpected close, not stopping watchdog!\n");
253 wdt977_keepalive(); 283 wdt977_keepalive();
284 printk(KERN_CRIT PFX "Unexpected close, not stopping watchdog!\n");
254 } 285 }
255 expect_close = 0; 286 expect_close = 0;
256 return 0; 287 return 0;
@@ -271,14 +302,17 @@ static int wdt977_release(struct inode *inode, struct file *file)
271static ssize_t wdt977_write(struct file *file, const char __user *buf, 302static ssize_t wdt977_write(struct file *file, const char __user *buf,
272 size_t count, loff_t *ppos) 303 size_t count, loff_t *ppos)
273{ 304{
274 if (count) { 305 if (count)
275 if (!nowayout) { 306 {
307 if (!nowayout)
308 {
276 size_t i; 309 size_t i;
277 310
278 /* In case it was set long ago */ 311 /* In case it was set long ago */
279 expect_close = 0; 312 expect_close = 0;
280 313
281 for (i = 0; i != count; i++) { 314 for (i = 0; i != count; i++)
315 {
282 char c; 316 char c;
283 if (get_user(c, buf + i)) 317 if (get_user(c, buf + i))
284 return -EFAULT; 318 return -EFAULT;
@@ -287,6 +321,7 @@ static ssize_t wdt977_write(struct file *file, const char __user *buf,
287 } 321 }
288 } 322 }
289 323
324 /* someone wrote to us, we should restart timer */
290 wdt977_keepalive(); 325 wdt977_keepalive();
291 } 326 }
292 return count; 327 return count;
@@ -308,7 +343,7 @@ static struct watchdog_info ident = {
308 WDIOF_MAGICCLOSE | 343 WDIOF_MAGICCLOSE |
309 WDIOF_KEEPALIVEPING, 344 WDIOF_KEEPALIVEPING,
310 .firmware_version = 1, 345 .firmware_version = 1,
311 .identity = "Winbond 83977", 346 .identity = WATCHDOG_NAME,
312}; 347};
313 348
314static int wdt977_ioctl(struct inode *inode, struct file *file, 349static int wdt977_ioctl(struct inode *inode, struct file *file,
@@ -405,50 +440,81 @@ static struct notifier_block wdt977_notifier = {
405 .notifier_call = wdt977_notify_sys, 440 .notifier_call = wdt977_notify_sys,
406}; 441};
407 442
408static int __init nwwatchdog_init(void) 443static int __init wd977_init(void)
409{ 444{
410 int retval; 445 int rc;
411 if (!machine_is_netwinder()) 446
412 return -ENODEV; 447 //if (!machine_is_netwinder())
448 // return -ENODEV;
449
450 printk(KERN_INFO PFX DRIVER_VERSION);
451
452 spin_lock_init(&spinlock);
413 453
414 /* Check that the timeout value is within it's range ; if not reset to the default */ 454 /* Check that the timeout value is within it's range ; if not reset to the default */
415 if (wdt977_set_timeout(timeout)) { 455 if (wdt977_set_timeout(timeout))
456 {
416 wdt977_set_timeout(DEFAULT_TIMEOUT); 457 wdt977_set_timeout(DEFAULT_TIMEOUT);
417 printk(KERN_INFO PFX "timeout value must be 60<timeout<15300, using %d\n", 458 printk(KERN_INFO PFX "timeout value must be 60<timeout<15300, using %d\n",
418 DEFAULT_TIMEOUT); 459 DEFAULT_TIMEOUT);
419 } 460 }
420 461
421 retval = register_reboot_notifier(&wdt977_notifier); 462 /* on Netwinder the IOports are already reserved by
422 if (retval) { 463 * arch/arm/mach-footbridge/netwinder-hw.c
423 printk(KERN_ERR PFX "cannot register reboot notifier (err=%d)\n", 464 */
424 retval); 465 if (!machine_is_netwinder())
425 return retval; 466 {
467 if (!request_region(IO_INDEX_PORT, 2, WATCHDOG_NAME))
468 {
469 printk(KERN_ERR PFX "I/O address 0x%04x already in use\n",
470 IO_INDEX_PORT);
471 rc = -EIO;
472 goto err_out;
473 }
426 } 474 }
427 475
428 retval = misc_register(&wdt977_miscdev); 476 rc = misc_register(&wdt977_miscdev);
429 if (retval) { 477 if (rc)
478 {
430 printk(KERN_ERR PFX "cannot register miscdev on minor=%d (err=%d)\n", 479 printk(KERN_ERR PFX "cannot register miscdev on minor=%d (err=%d)\n",
431 WATCHDOG_MINOR, retval); 480 wdt977_miscdev.minor, rc);
432 unregister_reboot_notifier(&wdt977_notifier); 481 goto err_out_region;
433 return retval; 482 }
483
484 rc = register_reboot_notifier(&wdt977_notifier);
485 if (rc)
486 {
487 printk(KERN_ERR PFX "cannot register reboot notifier (err=%d)\n",
488 rc);
489 goto err_out_miscdev;
434 } 490 }
435 491
436 printk(KERN_INFO PFX "initialized. timeout=%d sec (nowayout=%d, testmode = %i)\n", 492 printk(KERN_INFO PFX "initialized. timeout=%d sec (nowayout=%d, testmode=%i)\n",
437 timeout, nowayout, testmode); 493 timeout, nowayout, testmode);
438 494
439 return 0; 495 return 0;
496
497err_out_miscdev:
498 misc_deregister(&wdt977_miscdev);
499err_out_region:
500 if (!machine_is_netwinder())
501 release_region(IO_INDEX_PORT,2);
502err_out:
503 return rc;
440} 504}
441 505
442static void __exit nwwatchdog_exit(void) 506static void __exit wd977_exit(void)
443{ 507{
508 wdt977_stop();
444 misc_deregister(&wdt977_miscdev); 509 misc_deregister(&wdt977_miscdev);
445 unregister_reboot_notifier(&wdt977_notifier); 510 unregister_reboot_notifier(&wdt977_notifier);
511 release_region(IO_INDEX_PORT,2);
446} 512}
447 513
448module_init(nwwatchdog_init); 514module_init(wd977_init);
449module_exit(nwwatchdog_exit); 515module_exit(wd977_exit);
450 516
451MODULE_AUTHOR("Woody Suwalski <woody@netwinder.org>"); 517MODULE_AUTHOR("Woody Suwalski <woodys@xandros.com>");
452MODULE_DESCRIPTION("W83977AF Watchdog driver"); 518MODULE_DESCRIPTION("W83977AF Watchdog driver");
453MODULE_LICENSE("GPL"); 519MODULE_LICENSE("GPL");
454MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR); 520MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR);
diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index 969d2b4aaec0..385e52930c02 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -34,14 +34,14 @@
34static atomic_t proc_event_num_listeners = ATOMIC_INIT(0); 34static atomic_t proc_event_num_listeners = ATOMIC_INIT(0);
35static struct cb_id cn_proc_event_id = { CN_IDX_PROC, CN_VAL_PROC }; 35static struct cb_id cn_proc_event_id = { CN_IDX_PROC, CN_VAL_PROC };
36 36
37/* proc_counts is used as the sequence number of the netlink message */ 37/* proc_event_counts is used as the sequence number of the netlink message */
38static DEFINE_PER_CPU(__u32, proc_event_counts) = { 0 }; 38static DEFINE_PER_CPU(__u32, proc_event_counts) = { 0 };
39 39
40static inline void get_seq(__u32 *ts, int *cpu) 40static inline void get_seq(__u32 *ts, int *cpu)
41{ 41{
42 *ts = get_cpu_var(proc_event_counts)++; 42 *ts = get_cpu_var(proc_event_counts)++;
43 *cpu = smp_processor_id(); 43 *cpu = smp_processor_id();
44 put_cpu_var(proc_counts); 44 put_cpu_var(proc_event_counts);
45} 45}
46 46
47void proc_fork_connector(struct task_struct *task) 47void proc_fork_connector(struct task_struct *task)
diff --git a/drivers/i2c/chips/tps65010.c b/drivers/i2c/chips/tps65010.c
index e70b3db69edd..1af3dfbb8086 100644
--- a/drivers/i2c/chips/tps65010.c
+++ b/drivers/i2c/chips/tps65010.c
@@ -494,6 +494,7 @@ tps65010_probe(struct i2c_adapter *bus, int address, int kind)
494{ 494{
495 struct tps65010 *tps; 495 struct tps65010 *tps;
496 int status; 496 int status;
497 unsigned long irqflags;
497 498
498 if (the_tps) { 499 if (the_tps) {
499 dev_dbg(&bus->dev, "only one %s for now\n", DRIVER_NAME); 500 dev_dbg(&bus->dev, "only one %s for now\n", DRIVER_NAME);
@@ -520,13 +521,14 @@ tps65010_probe(struct i2c_adapter *bus, int address, int kind)
520 } 521 }
521 522
522#ifdef CONFIG_ARM 523#ifdef CONFIG_ARM
524 irqflags = SA_SAMPLE_RANDOM | SA_TRIGGER_LOW;
523 if (machine_is_omap_h2()) { 525 if (machine_is_omap_h2()) {
524 tps->model = TPS65010; 526 tps->model = TPS65010;
525 omap_cfg_reg(W4_GPIO58); 527 omap_cfg_reg(W4_GPIO58);
526 tps->irq = OMAP_GPIO_IRQ(58); 528 tps->irq = OMAP_GPIO_IRQ(58);
527 omap_request_gpio(58); 529 omap_request_gpio(58);
528 omap_set_gpio_direction(58, 1); 530 omap_set_gpio_direction(58, 1);
529 set_irq_type(tps->irq, IRQT_FALLING); 531 irqflags |= SA_TRIGGER_FALLING;
530 } 532 }
531 if (machine_is_omap_osk()) { 533 if (machine_is_omap_osk()) {
532 tps->model = TPS65010; 534 tps->model = TPS65010;
@@ -534,7 +536,7 @@ tps65010_probe(struct i2c_adapter *bus, int address, int kind)
534 tps->irq = OMAP_GPIO_IRQ(OMAP_MPUIO(1)); 536 tps->irq = OMAP_GPIO_IRQ(OMAP_MPUIO(1));
535 omap_request_gpio(OMAP_MPUIO(1)); 537 omap_request_gpio(OMAP_MPUIO(1));
536 omap_set_gpio_direction(OMAP_MPUIO(1), 1); 538 omap_set_gpio_direction(OMAP_MPUIO(1), 1);
537 set_irq_type(tps->irq, IRQT_FALLING); 539 irqflags |= SA_TRIGGER_FALLING;
538 } 540 }
539 if (machine_is_omap_h3()) { 541 if (machine_is_omap_h3()) {
540 tps->model = TPS65013; 542 tps->model = TPS65013;
@@ -542,13 +544,12 @@ tps65010_probe(struct i2c_adapter *bus, int address, int kind)
542 // FIXME set up this board's IRQ ... 544 // FIXME set up this board's IRQ ...
543 } 545 }
544#else 546#else
545#define set_irq_type(num,trigger) do{}while(0) 547 irqflags = SA_SAMPLE_RANDOM;
546#endif 548#endif
547 549
548 if (tps->irq > 0) { 550 if (tps->irq > 0) {
549 set_irq_type(tps->irq, IRQT_LOW);
550 status = request_irq(tps->irq, tps65010_irq, 551 status = request_irq(tps->irq, tps65010_irq,
551 SA_SAMPLE_RANDOM, DRIVER_NAME, tps); 552 irqflags, DRIVER_NAME, tps);
552 if (status < 0) { 553 if (status < 0) {
553 dev_dbg(&tps->client.dev, "can't get IRQ %d, err %d\n", 554 dev_dbg(&tps->client.dev, "can't get IRQ %d, err %d\n",
554 tps->irq, status); 555 tps->irq, status);
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 4b441720b6ba..cab362ea0336 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -1130,6 +1130,17 @@ static int idedisk_release(struct inode *inode, struct file *filp)
1130 return 0; 1130 return 0;
1131} 1131}
1132 1132
1133static int idedisk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1134{
1135 struct ide_disk_obj *idkp = ide_disk_g(bdev->bd_disk);
1136 ide_drive_t *drive = idkp->drive;
1137
1138 geo->heads = drive->bios_head;
1139 geo->sectors = drive->bios_sect;
1140 geo->cylinders = (u16)drive->bios_cyl; /* truncate */
1141 return 0;
1142}
1143
1133static int idedisk_ioctl(struct inode *inode, struct file *file, 1144static int idedisk_ioctl(struct inode *inode, struct file *file,
1134 unsigned int cmd, unsigned long arg) 1145 unsigned int cmd, unsigned long arg)
1135{ 1146{
@@ -1164,6 +1175,7 @@ static struct block_device_operations idedisk_ops = {
1164 .open = idedisk_open, 1175 .open = idedisk_open,
1165 .release = idedisk_release, 1176 .release = idedisk_release,
1166 .ioctl = idedisk_ioctl, 1177 .ioctl = idedisk_ioctl,
1178 .getgeo = idedisk_getgeo,
1167 .media_changed = idedisk_media_changed, 1179 .media_changed = idedisk_media_changed,
1168 .revalidate_disk= idedisk_revalidate_disk 1180 .revalidate_disk= idedisk_revalidate_disk
1169}; 1181};
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index fba3fffc2d66..5945f551aaaa 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -2031,6 +2031,17 @@ static int idefloppy_release(struct inode *inode, struct file *filp)
2031 return 0; 2031 return 0;
2032} 2032}
2033 2033
2034static int idefloppy_getgeo(struct block_device *bdev, struct hd_geometry *geo)
2035{
2036 struct ide_floppy_obj *floppy = ide_floppy_g(bdev->bd_disk);
2037 ide_drive_t *drive = floppy->drive;
2038
2039 geo->heads = drive->bios_head;
2040 geo->sectors = drive->bios_sect;
2041 geo->cylinders = (u16)drive->bios_cyl; /* truncate */
2042 return 0;
2043}
2044
2034static int idefloppy_ioctl(struct inode *inode, struct file *file, 2045static int idefloppy_ioctl(struct inode *inode, struct file *file,
2035 unsigned int cmd, unsigned long arg) 2046 unsigned int cmd, unsigned long arg)
2036{ 2047{
@@ -2120,6 +2131,7 @@ static struct block_device_operations idefloppy_ops = {
2120 .open = idefloppy_open, 2131 .open = idefloppy_open,
2121 .release = idefloppy_release, 2132 .release = idefloppy_release,
2122 .ioctl = idefloppy_ioctl, 2133 .ioctl = idefloppy_ioctl,
2134 .getgeo = idefloppy_getgeo,
2123 .media_changed = idefloppy_media_changed, 2135 .media_changed = idefloppy_media_changed,
2124 .revalidate_disk= idefloppy_revalidate_disk 2136 .revalidate_disk= idefloppy_revalidate_disk
2125}; 2137};
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 4b524f6b3ecd..b069b13b75a7 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -1278,19 +1278,6 @@ int generic_ide_ioctl(ide_drive_t *drive, struct file *file, struct block_device
1278 up(&ide_setting_sem); 1278 up(&ide_setting_sem);
1279 1279
1280 switch (cmd) { 1280 switch (cmd) {
1281 case HDIO_GETGEO:
1282 {
1283 struct hd_geometry geom;
1284 if (!p || (drive->media != ide_disk && drive->media != ide_floppy)) return -EINVAL;
1285 geom.heads = drive->bios_head;
1286 geom.sectors = drive->bios_sect;
1287 geom.cylinders = (u16)drive->bios_cyl; /* truncate */
1288 geom.start = get_start_sect(bdev);
1289 if (copy_to_user(p, &geom, sizeof(struct hd_geometry)))
1290 return -EFAULT;
1291 return 0;
1292 }
1293
1294 case HDIO_OBSOLETE_IDENTITY: 1281 case HDIO_OBSOLETE_IDENTITY:
1295 case HDIO_GET_IDENTITY: 1282 case HDIO_GET_IDENTITY:
1296 if (bdev != bdev->bd_contains) 1283 if (bdev != bdev->bd_contains)
diff --git a/drivers/ide/legacy/hd.c b/drivers/ide/legacy/hd.c
index 242029c9c0ca..6439dec66881 100644
--- a/drivers/ide/legacy/hd.c
+++ b/drivers/ide/legacy/hd.c
@@ -658,22 +658,14 @@ static void do_hd_request (request_queue_t * q)
658 enable_irq(HD_IRQ); 658 enable_irq(HD_IRQ);
659} 659}
660 660
661static int hd_ioctl(struct inode * inode, struct file * file, 661static int hd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
662 unsigned int cmd, unsigned long arg)
663{ 662{
664 struct hd_i_struct *disk = inode->i_bdev->bd_disk->private_data; 663 struct hd_i_struct *disk = bdev->bd_disk->private_data;
665 struct hd_geometry __user *loc = (struct hd_geometry __user *) arg; 664
666 struct hd_geometry g; 665 geo->heads = disk->head;
667 666 geo->sectors = disk->sect;
668 if (cmd != HDIO_GETGEO) 667 geo->cylinders = disk->cyl;
669 return -EINVAL; 668 return 0;
670 if (!loc)
671 return -EINVAL;
672 g.heads = disk->head;
673 g.sectors = disk->sect;
674 g.cylinders = disk->cyl;
675 g.start = get_start_sect(inode->i_bdev);
676 return copy_to_user(loc, &g, sizeof g) ? -EFAULT : 0;
677} 669}
678 670
679/* 671/*
@@ -695,7 +687,7 @@ static irqreturn_t hd_interrupt(int irq, void *dev_id, struct pt_regs *regs)
695} 687}
696 688
697static struct block_device_operations hd_fops = { 689static struct block_device_operations hd_fops = {
698 .ioctl = hd_ioctl, 690 .getgeo = hd_getgeo,
699}; 691};
700 692
701/* 693/*
diff --git a/drivers/ide/pci/serverworks.c b/drivers/ide/pci/serverworks.c
index ff2e217a8c84..0d3073f4eab4 100644
--- a/drivers/ide/pci/serverworks.c
+++ b/drivers/ide/pci/serverworks.c
@@ -69,7 +69,7 @@ static int check_in_drive_lists (ide_drive_t *drive, const char **list)
69static u8 svwks_ratemask (ide_drive_t *drive) 69static u8 svwks_ratemask (ide_drive_t *drive)
70{ 70{
71 struct pci_dev *dev = HWIF(drive)->pci_dev; 71 struct pci_dev *dev = HWIF(drive)->pci_dev;
72 u8 mode; 72 u8 mode = 0;
73 73
74 if (!svwks_revision) 74 if (!svwks_revision)
75 pci_read_config_byte(dev, PCI_REVISION_ID, &svwks_revision); 75 pci_read_config_byte(dev, PCI_REVISION_ID, &svwks_revision);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 02110e00d145..3a611fe5497e 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -308,10 +308,11 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv)
308{ 308{
309 unsigned long flags; 309 unsigned long flags;
310 int ret; 310 int ret;
311 static int next_id;
311 312
312 do { 313 do {
313 spin_lock_irqsave(&cm.lock, flags); 314 spin_lock_irqsave(&cm.lock, flags);
314 ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, 1, 315 ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, next_id++,
315 (__force int *) &cm_id_priv->id.local_id); 316 (__force int *) &cm_id_priv->id.local_id);
316 spin_unlock_irqrestore(&cm.lock, flags); 317 spin_unlock_irqrestore(&cm.lock, flags);
317 } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) ); 318 } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
@@ -684,6 +685,13 @@ retest:
684 cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT); 685 cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
685 break; 686 break;
686 case IB_CM_REQ_SENT: 687 case IB_CM_REQ_SENT:
688 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
689 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
690 ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
691 &cm_id_priv->av.port->cm_dev->ca_guid,
692 sizeof cm_id_priv->av.port->cm_dev->ca_guid,
693 NULL, 0);
694 break;
687 case IB_CM_MRA_REQ_RCVD: 695 case IB_CM_MRA_REQ_RCVD:
688 case IB_CM_REP_SENT: 696 case IB_CM_REP_SENT:
689 case IB_CM_MRA_REP_RCVD: 697 case IB_CM_MRA_REP_RCVD:
@@ -694,10 +702,8 @@ retest:
694 case IB_CM_REP_RCVD: 702 case IB_CM_REP_RCVD:
695 case IB_CM_MRA_REP_SENT: 703 case IB_CM_MRA_REP_SENT:
696 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 704 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
697 ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT, 705 ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
698 &cm_id_priv->av.port->cm_dev->ca_guid, 706 NULL, 0, NULL, 0);
699 sizeof cm_id_priv->av.port->cm_dev->ca_guid,
700 NULL, 0);
701 break; 707 break;
702 case IB_CM_ESTABLISHED: 708 case IB_CM_ESTABLISHED:
703 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 709 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index eb7f52537ccc..c908de8db5a9 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -197,8 +197,8 @@ static void send_handler(struct ib_mad_agent *agent,
197 memcpy(timeout->mad.data, packet->mad.data, 197 memcpy(timeout->mad.data, packet->mad.data,
198 sizeof (struct ib_mad_hdr)); 198 sizeof (struct ib_mad_hdr));
199 199
200 if (!queue_packet(file, agent, timeout)) 200 if (queue_packet(file, agent, timeout))
201 return; 201 kfree(timeout);
202 } 202 }
203out: 203out:
204 kfree(packet); 204 kfree(packet);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index a57d021d435a..a02c5a05c984 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -489,6 +489,7 @@ err_idr:
489 489
490err_unreg: 490err_unreg:
491 ib_dereg_mr(mr); 491 ib_dereg_mr(mr);
492 atomic_dec(&pd->usecnt);
492 493
493err_up: 494err_up:
494 up(&ib_uverbs_idr_mutex); 495 up(&ib_uverbs_idr_mutex);
@@ -593,13 +594,18 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
593 if (cmd.comp_vector >= file->device->num_comp_vectors) 594 if (cmd.comp_vector >= file->device->num_comp_vectors)
594 return -EINVAL; 595 return -EINVAL;
595 596
596 if (cmd.comp_channel >= 0)
597 ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel);
598
599 uobj = kmalloc(sizeof *uobj, GFP_KERNEL); 597 uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
600 if (!uobj) 598 if (!uobj)
601 return -ENOMEM; 599 return -ENOMEM;
602 600
601 if (cmd.comp_channel >= 0) {
602 ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel);
603 if (!ev_file) {
604 ret = -EINVAL;
605 goto err;
606 }
607 }
608
603 uobj->uobject.user_handle = cmd.user_handle; 609 uobj->uobject.user_handle = cmd.user_handle;
604 uobj->uobject.context = file->ucontext; 610 uobj->uobject.context = file->ucontext;
605 uobj->uverbs_file = file; 611 uobj->uverbs_file = file;
@@ -663,6 +669,8 @@ err_up:
663 ib_destroy_cq(cq); 669 ib_destroy_cq(cq);
664 670
665err: 671err:
672 if (ev_file)
673 ib_uverbs_release_ucq(file, ev_file, uobj);
666 kfree(uobj); 674 kfree(uobj);
667 return ret; 675 return ret;
668} 676}
@@ -935,6 +943,11 @@ err_idr:
935 943
936err_destroy: 944err_destroy:
937 ib_destroy_qp(qp); 945 ib_destroy_qp(qp);
946 atomic_dec(&pd->usecnt);
947 atomic_dec(&attr.send_cq->usecnt);
948 atomic_dec(&attr.recv_cq->usecnt);
949 if (attr.srq)
950 atomic_dec(&attr.srq->usecnt);
938 951
939err_up: 952err_up:
940 up(&ib_uverbs_idr_mutex); 953 up(&ib_uverbs_idr_mutex);
@@ -1448,6 +1461,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
1448 attr.sl = cmd.attr.sl; 1461 attr.sl = cmd.attr.sl;
1449 attr.src_path_bits = cmd.attr.src_path_bits; 1462 attr.src_path_bits = cmd.attr.src_path_bits;
1450 attr.static_rate = cmd.attr.static_rate; 1463 attr.static_rate = cmd.attr.static_rate;
1464 attr.ah_flags = cmd.attr.is_global ? IB_AH_GRH : 0;
1451 attr.port_num = cmd.attr.port_num; 1465 attr.port_num = cmd.attr.port_num;
1452 attr.grh.flow_label = cmd.attr.grh.flow_label; 1466 attr.grh.flow_label = cmd.attr.grh.flow_label;
1453 attr.grh.sgid_index = cmd.attr.grh.sgid_index; 1467 attr.grh.sgid_index = cmd.attr.grh.sgid_index;
@@ -1729,6 +1743,7 @@ err_idr:
1729 1743
1730err_destroy: 1744err_destroy:
1731 ib_destroy_srq(srq); 1745 ib_destroy_srq(srq);
1746 atomic_dec(&pd->usecnt);
1732 1747
1733err_up: 1748err_up:
1734 up(&ib_uverbs_idr_mutex); 1749 up(&ib_uverbs_idr_mutex);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 4c15e112736c..c857361be449 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -107,9 +107,9 @@ struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc,
107 107
108 if (wc->wc_flags & IB_WC_GRH) { 108 if (wc->wc_flags & IB_WC_GRH) {
109 ah_attr.ah_flags = IB_AH_GRH; 109 ah_attr.ah_flags = IB_AH_GRH;
110 ah_attr.grh.dgid = grh->dgid; 110 ah_attr.grh.dgid = grh->sgid;
111 111
112 ret = ib_find_cached_gid(pd->device, &grh->sgid, &port_num, 112 ret = ib_find_cached_gid(pd->device, &grh->dgid, &port_num,
113 &gid_index); 113 &gid_index);
114 if (ret) 114 if (ret)
115 return ERR_PTR(ret); 115 return ERR_PTR(ret);
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 9ed34587fc5c..22ac72bc20c3 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -937,10 +937,6 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
937 if (err) 937 if (err)
938 goto out; 938 goto out;
939 939
940 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET);
941 dev_lim->max_srq_sz = (1 << field) - 1;
942 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET);
943 dev_lim->max_qp_sz = (1 << field) - 1;
944 MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_QP_OFFSET); 940 MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_QP_OFFSET);
945 dev_lim->reserved_qps = 1 << (field & 0xf); 941 dev_lim->reserved_qps = 1 << (field & 0xf);
946 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_OFFSET); 942 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_OFFSET);
@@ -1056,6 +1052,10 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
1056 mthca_dbg(dev, "Flags: %08x\n", dev_lim->flags); 1052 mthca_dbg(dev, "Flags: %08x\n", dev_lim->flags);
1057 1053
1058 if (mthca_is_memfree(dev)) { 1054 if (mthca_is_memfree(dev)) {
1055 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET);
1056 dev_lim->max_srq_sz = 1 << field;
1057 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET);
1058 dev_lim->max_qp_sz = 1 << field;
1059 MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSZ_SRQ_OFFSET); 1059 MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSZ_SRQ_OFFSET);
1060 dev_lim->hca.arbel.resize_srq = field & 1; 1060 dev_lim->hca.arbel.resize_srq = field & 1;
1061 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_RQ_OFFSET); 1061 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_RQ_OFFSET);
@@ -1087,6 +1087,10 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
1087 mthca_dbg(dev, "Max ICM size %lld MB\n", 1087 mthca_dbg(dev, "Max ICM size %lld MB\n",
1088 (unsigned long long) dev_lim->hca.arbel.max_icm_sz >> 20); 1088 (unsigned long long) dev_lim->hca.arbel.max_icm_sz >> 20);
1089 } else { 1089 } else {
1090 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET);
1091 dev_lim->max_srq_sz = (1 << field) - 1;
1092 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET);
1093 dev_lim->max_qp_sz = (1 << field) - 1;
1090 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_AV_OFFSET); 1094 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_AV_OFFSET);
1091 dev_lim->hca.tavor.max_avs = 1 << (field & 0x3f); 1095 dev_lim->hca.tavor.max_avs = 1 << (field & 0x3f);
1092 dev_lim->mpt_entry_sz = MTHCA_MPT_ENTRY_SIZE; 1096 dev_lim->mpt_entry_sz = MTHCA_MPT_ENTRY_SIZE;
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index 4a8adcef2079..96f1a86bf049 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -128,12 +128,12 @@ struct mthca_err_cqe {
128 __be32 my_qpn; 128 __be32 my_qpn;
129 u32 reserved1[3]; 129 u32 reserved1[3];
130 u8 syndrome; 130 u8 syndrome;
131 u8 reserved2; 131 u8 vendor_err;
132 __be16 db_cnt; 132 __be16 db_cnt;
133 u32 reserved3; 133 u32 reserved2;
134 __be32 wqe; 134 __be32 wqe;
135 u8 opcode; 135 u8 opcode;
136 u8 reserved4[2]; 136 u8 reserved3[2];
137 u8 owner; 137 u8 owner;
138}; 138};
139 139
@@ -253,6 +253,15 @@ void mthca_cq_event(struct mthca_dev *dev, u32 cqn,
253 wake_up(&cq->wait); 253 wake_up(&cq->wait);
254} 254}
255 255
256static inline int is_recv_cqe(struct mthca_cqe *cqe)
257{
258 if ((cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) ==
259 MTHCA_ERROR_CQE_OPCODE_MASK)
260 return !(cqe->opcode & 0x01);
261 else
262 return !(cqe->is_send & 0x80);
263}
264
256void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, 265void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
257 struct mthca_srq *srq) 266 struct mthca_srq *srq)
258{ 267{
@@ -296,7 +305,7 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
296 while ((int) --prod_index - (int) cq->cons_index >= 0) { 305 while ((int) --prod_index - (int) cq->cons_index >= 0) {
297 cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); 306 cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
298 if (cqe->my_qpn == cpu_to_be32(qpn)) { 307 if (cqe->my_qpn == cpu_to_be32(qpn)) {
299 if (srq) 308 if (srq && is_recv_cqe(cqe))
300 mthca_free_srq_wqe(srq, be32_to_cpu(cqe->wqe)); 309 mthca_free_srq_wqe(srq, be32_to_cpu(cqe->wqe));
301 ++nfreed; 310 ++nfreed;
302 } else if (nfreed) 311 } else if (nfreed)
@@ -333,8 +342,8 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
333 } 342 }
334 343
335 /* 344 /*
336 * For completions in error, only work request ID, status (and 345 * For completions in error, only work request ID, status, vendor error
337 * freed resource count for RD) have to be set. 346 * (and freed resource count for RD) have to be set.
338 */ 347 */
339 switch (cqe->syndrome) { 348 switch (cqe->syndrome) {
340 case SYNDROME_LOCAL_LENGTH_ERR: 349 case SYNDROME_LOCAL_LENGTH_ERR:
@@ -396,6 +405,8 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
396 break; 405 break;
397 } 406 }
398 407
408 entry->vendor_err = cqe->vendor_err;
409
399 /* 410 /*
400 * Mem-free HCAs always generate one CQE per WQE, even in the 411 * Mem-free HCAs always generate one CQE per WQE, even in the
401 * error case, so we don't have to check the doorbell count, etc. 412 * error case, so we don't have to check the doorbell count, etc.
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 497ff794ef6a..795b379260bf 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -43,6 +43,7 @@
43#include <linux/kernel.h> 43#include <linux/kernel.h>
44#include <linux/pci.h> 44#include <linux/pci.h>
45#include <linux/dma-mapping.h> 45#include <linux/dma-mapping.h>
46#include <linux/timer.h>
46#include <asm/semaphore.h> 47#include <asm/semaphore.h>
47 48
48#include "mthca_provider.h" 49#include "mthca_provider.h"
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 34d68e5a72d8..e8a948f087c0 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -484,8 +484,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
484 u8 intr, 484 u8 intr,
485 struct mthca_eq *eq) 485 struct mthca_eq *eq)
486{ 486{
487 int npages = (nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) / 487 int npages;
488 PAGE_SIZE;
489 u64 *dma_list = NULL; 488 u64 *dma_list = NULL;
490 dma_addr_t t; 489 dma_addr_t t;
491 struct mthca_mailbox *mailbox; 490 struct mthca_mailbox *mailbox;
@@ -496,6 +495,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
496 495
497 eq->dev = dev; 496 eq->dev = dev;
498 eq->nent = roundup_pow_of_two(max(nent, 2)); 497 eq->nent = roundup_pow_of_two(max(nent, 2));
498 npages = ALIGN(eq->nent * MTHCA_EQ_ENTRY_SIZE, PAGE_SIZE) / PAGE_SIZE;
499 499
500 eq->page_list = kmalloc(npages * sizeof *eq->page_list, 500 eq->page_list = kmalloc(npages * sizeof *eq->page_list,
501 GFP_KERNEL); 501 GFP_KERNEL);
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 6f94b25f3acd..8b00d9a0f6f4 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -261,6 +261,10 @@ static int __devinit mthca_init_tavor(struct mthca_dev *mdev)
261 } 261 }
262 262
263 err = mthca_dev_lim(mdev, &dev_lim); 263 err = mthca_dev_lim(mdev, &dev_lim);
264 if (err) {
265 mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n");
266 goto err_disable;
267 }
264 268
265 profile = default_profile; 269 profile = default_profile;
266 profile.num_uar = dev_lim.uar_size / PAGE_SIZE; 270 profile.num_uar = dev_lim.uar_size / PAGE_SIZE;
diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c
index 2fc449da418d..77bc6c746f43 100644
--- a/drivers/infiniband/hw/mthca/mthca_mcg.c
+++ b/drivers/infiniband/hw/mthca/mthca_mcg.c
@@ -111,7 +111,8 @@ static int find_mgm(struct mthca_dev *dev,
111 goto out; 111 goto out;
112 if (status) { 112 if (status) {
113 mthca_err(dev, "READ_MGM returned status %02x\n", status); 113 mthca_err(dev, "READ_MGM returned status %02x\n", status);
114 return -EINVAL; 114 err = -EINVAL;
115 goto out;
115 } 116 }
116 117
117 if (!memcmp(mgm->gid, zero_gid, 16)) { 118 if (!memcmp(mgm->gid, zero_gid, 16)) {
@@ -126,7 +127,7 @@ static int find_mgm(struct mthca_dev *dev,
126 goto out; 127 goto out;
127 128
128 *prev = *index; 129 *prev = *index;
129 *index = be32_to_cpu(mgm->next_gid_index) >> 5; 130 *index = be32_to_cpu(mgm->next_gid_index) >> 6;
130 } while (*index); 131 } while (*index);
131 132
132 *index = -1; 133 *index = -1;
@@ -153,8 +154,10 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
153 return PTR_ERR(mailbox); 154 return PTR_ERR(mailbox);
154 mgm = mailbox->buf; 155 mgm = mailbox->buf;
155 156
156 if (down_interruptible(&dev->mcg_table.sem)) 157 if (down_interruptible(&dev->mcg_table.sem)) {
157 return -EINTR; 158 err = -EINTR;
159 goto err_sem;
160 }
158 161
159 err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index); 162 err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index);
160 if (err) 163 if (err)
@@ -181,9 +184,8 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
181 err = -EINVAL; 184 err = -EINVAL;
182 goto out; 185 goto out;
183 } 186 }
184 187 memset(mgm, 0, sizeof *mgm);
185 memcpy(mgm->gid, gid->raw, 16); 188 memcpy(mgm->gid, gid->raw, 16);
186 mgm->next_gid_index = 0;
187 } 189 }
188 190
189 for (i = 0; i < MTHCA_QP_PER_MGM; ++i) 191 for (i = 0; i < MTHCA_QP_PER_MGM; ++i)
@@ -209,6 +211,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
209 if (status) { 211 if (status) {
210 mthca_err(dev, "WRITE_MGM returned status %02x\n", status); 212 mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
211 err = -EINVAL; 213 err = -EINVAL;
214 goto out;
212 } 215 }
213 216
214 if (!link) 217 if (!link)
@@ -223,7 +226,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
223 goto out; 226 goto out;
224 } 227 }
225 228
226 mgm->next_gid_index = cpu_to_be32(index << 5); 229 mgm->next_gid_index = cpu_to_be32(index << 6);
227 230
228 err = mthca_WRITE_MGM(dev, prev, mailbox, &status); 231 err = mthca_WRITE_MGM(dev, prev, mailbox, &status);
229 if (err) 232 if (err)
@@ -234,7 +237,12 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
234 } 237 }
235 238
236 out: 239 out:
240 if (err && link && index != -1) {
241 BUG_ON(index < dev->limits.num_mgms);
242 mthca_free(&dev->mcg_table.alloc, index);
243 }
237 up(&dev->mcg_table.sem); 244 up(&dev->mcg_table.sem);
245 err_sem:
238 mthca_free_mailbox(dev, mailbox); 246 mthca_free_mailbox(dev, mailbox);
239 return err; 247 return err;
240} 248}
@@ -255,8 +263,10 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
255 return PTR_ERR(mailbox); 263 return PTR_ERR(mailbox);
256 mgm = mailbox->buf; 264 mgm = mailbox->buf;
257 265
258 if (down_interruptible(&dev->mcg_table.sem)) 266 if (down_interruptible(&dev->mcg_table.sem)) {
259 return -EINTR; 267 err = -EINTR;
268 goto err_sem;
269 }
260 270
261 err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index); 271 err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index);
262 if (err) 272 if (err)
@@ -305,13 +315,11 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
305 if (i != 1) 315 if (i != 1)
306 goto out; 316 goto out;
307 317
308 goto out;
309
310 if (prev == -1) { 318 if (prev == -1) {
311 /* Remove entry from MGM */ 319 /* Remove entry from MGM */
312 if (be32_to_cpu(mgm->next_gid_index) >> 5) { 320 int amgm_index_to_free = be32_to_cpu(mgm->next_gid_index) >> 6;
313 err = mthca_READ_MGM(dev, 321 if (amgm_index_to_free) {
314 be32_to_cpu(mgm->next_gid_index) >> 5, 322 err = mthca_READ_MGM(dev, amgm_index_to_free,
315 mailbox, &status); 323 mailbox, &status);
316 if (err) 324 if (err)
317 goto out; 325 goto out;
@@ -332,9 +340,13 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
332 err = -EINVAL; 340 err = -EINVAL;
333 goto out; 341 goto out;
334 } 342 }
343 if (amgm_index_to_free) {
344 BUG_ON(amgm_index_to_free < dev->limits.num_mgms);
345 mthca_free(&dev->mcg_table.alloc, amgm_index_to_free);
346 }
335 } else { 347 } else {
336 /* Remove entry from AMGM */ 348 /* Remove entry from AMGM */
337 index = be32_to_cpu(mgm->next_gid_index) >> 5; 349 int curr_next_index = be32_to_cpu(mgm->next_gid_index) >> 6;
338 err = mthca_READ_MGM(dev, prev, mailbox, &status); 350 err = mthca_READ_MGM(dev, prev, mailbox, &status);
339 if (err) 351 if (err)
340 goto out; 352 goto out;
@@ -344,7 +356,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
344 goto out; 356 goto out;
345 } 357 }
346 358
347 mgm->next_gid_index = cpu_to_be32(index << 5); 359 mgm->next_gid_index = cpu_to_be32(curr_next_index << 6);
348 360
349 err = mthca_WRITE_MGM(dev, prev, mailbox, &status); 361 err = mthca_WRITE_MGM(dev, prev, mailbox, &status);
350 if (err) 362 if (err)
@@ -354,10 +366,13 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
354 err = -EINVAL; 366 err = -EINVAL;
355 goto out; 367 goto out;
356 } 368 }
369 BUG_ON(index < dev->limits.num_mgms);
370 mthca_free(&dev->mcg_table.alloc, index);
357 } 371 }
358 372
359 out: 373 out:
360 up(&dev->mcg_table.sem); 374 up(&dev->mcg_table.sem);
375 err_sem:
361 mthca_free_mailbox(dev, mailbox); 376 mthca_free_mailbox(dev, mailbox);
362 return err; 377 return err;
363} 378}
@@ -365,11 +380,12 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
365int __devinit mthca_init_mcg_table(struct mthca_dev *dev) 380int __devinit mthca_init_mcg_table(struct mthca_dev *dev)
366{ 381{
367 int err; 382 int err;
383 int table_size = dev->limits.num_mgms + dev->limits.num_amgms;
368 384
369 err = mthca_alloc_init(&dev->mcg_table.alloc, 385 err = mthca_alloc_init(&dev->mcg_table.alloc,
370 dev->limits.num_amgms, 386 table_size,
371 dev->limits.num_amgms - 1, 387 table_size - 1,
372 0); 388 dev->limits.num_mgms);
373 if (err) 389 if (err)
374 return err; 390 return err;
375 391
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index d72fe95cba08..9fb985a016e9 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -233,7 +233,7 @@ void *mthca_table_find(struct mthca_icm_table *table, int obj)
233 for (i = 0; i < chunk->npages; ++i) { 233 for (i = 0; i < chunk->npages; ++i) {
234 if (chunk->mem[i].length >= offset) { 234 if (chunk->mem[i].length >= offset) {
235 page = chunk->mem[i].page; 235 page = chunk->mem[i].page;
236 break; 236 goto out;
237 } 237 }
238 offset -= chunk->mem[i].length; 238 offset -= chunk->mem[i].length;
239 } 239 }
@@ -485,6 +485,8 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
485 put_page(db_tab->page[i].mem.page); 485 put_page(db_tab->page[i].mem.page);
486 } 486 }
487 } 487 }
488
489 kfree(db_tab);
488} 490}
489 491
490int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type, 492int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 7450550db736..564b6d51c394 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -383,12 +383,10 @@ static const struct {
383 [UC] = (IB_QP_CUR_STATE | 383 [UC] = (IB_QP_CUR_STATE |
384 IB_QP_ALT_PATH | 384 IB_QP_ALT_PATH |
385 IB_QP_ACCESS_FLAGS | 385 IB_QP_ACCESS_FLAGS |
386 IB_QP_PKEY_INDEX |
387 IB_QP_PATH_MIG_STATE), 386 IB_QP_PATH_MIG_STATE),
388 [RC] = (IB_QP_CUR_STATE | 387 [RC] = (IB_QP_CUR_STATE |
389 IB_QP_ALT_PATH | 388 IB_QP_ALT_PATH |
390 IB_QP_ACCESS_FLAGS | 389 IB_QP_ACCESS_FLAGS |
391 IB_QP_PKEY_INDEX |
392 IB_QP_MIN_RNR_TIMER | 390 IB_QP_MIN_RNR_TIMER |
393 IB_QP_PATH_MIG_STATE), 391 IB_QP_PATH_MIG_STATE),
394 [MLX] = (IB_QP_CUR_STATE | 392 [MLX] = (IB_QP_CUR_STATE |
@@ -476,9 +474,8 @@ static const struct {
476 .opt_param = { 474 .opt_param = {
477 [UD] = (IB_QP_CUR_STATE | 475 [UD] = (IB_QP_CUR_STATE |
478 IB_QP_QKEY), 476 IB_QP_QKEY),
479 [UC] = IB_QP_CUR_STATE, 477 [UC] = (IB_QP_CUR_STATE |
480 [RC] = (IB_QP_CUR_STATE | 478 IB_QP_ACCESS_FLAGS),
481 IB_QP_MIN_RNR_TIMER),
482 [MLX] = (IB_QP_CUR_STATE | 479 [MLX] = (IB_QP_CUR_STATE |
483 IB_QP_QKEY), 480 IB_QP_QKEY),
484 } 481 }
@@ -522,6 +519,55 @@ static void init_port(struct mthca_dev *dev, int port)
522 mthca_warn(dev, "INIT_IB returned status %02x.\n", status); 519 mthca_warn(dev, "INIT_IB returned status %02x.\n", status);
523} 520}
524 521
522static __be32 get_hw_access_flags(struct mthca_qp *qp, struct ib_qp_attr *attr,
523 int attr_mask)
524{
525 u8 dest_rd_atomic;
526 u32 access_flags;
527 u32 hw_access_flags = 0;
528
529 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
530 dest_rd_atomic = attr->max_dest_rd_atomic;
531 else
532 dest_rd_atomic = qp->resp_depth;
533
534 if (attr_mask & IB_QP_ACCESS_FLAGS)
535 access_flags = attr->qp_access_flags;
536 else
537 access_flags = qp->atomic_rd_en;
538
539 if (!dest_rd_atomic)
540 access_flags &= IB_ACCESS_REMOTE_WRITE;
541
542 if (access_flags & IB_ACCESS_REMOTE_READ)
543 hw_access_flags |= MTHCA_QP_BIT_RRE;
544 if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
545 hw_access_flags |= MTHCA_QP_BIT_RAE;
546 if (access_flags & IB_ACCESS_REMOTE_WRITE)
547 hw_access_flags |= MTHCA_QP_BIT_RWE;
548
549 return cpu_to_be32(hw_access_flags);
550}
551
552static void mthca_path_set(struct ib_ah_attr *ah, struct mthca_qp_path *path)
553{
554 path->g_mylmc = ah->src_path_bits & 0x7f;
555 path->rlid = cpu_to_be16(ah->dlid);
556 path->static_rate = !!ah->static_rate;
557
558 if (ah->ah_flags & IB_AH_GRH) {
559 path->g_mylmc |= 1 << 7;
560 path->mgid_index = ah->grh.sgid_index;
561 path->hop_limit = ah->grh.hop_limit;
562 path->sl_tclass_flowlabel =
563 cpu_to_be32((ah->sl << 28) |
564 (ah->grh.traffic_class << 20) |
565 (ah->grh.flow_label));
566 memcpy(path->rgid, ah->grh.dgid.raw, 16);
567 } else
568 path->sl_tclass_flowlabel = cpu_to_be32(ah->sl << 28);
569}
570
525int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) 571int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
526{ 572{
527 struct mthca_dev *dev = to_mdev(ibqp->device); 573 struct mthca_dev *dev = to_mdev(ibqp->device);
@@ -591,6 +637,26 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
591 return -EINVAL; 637 return -EINVAL;
592 } 638 }
593 639
640 if ((attr_mask & IB_QP_PORT) &&
641 (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) {
642 mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num);
643 return -EINVAL;
644 }
645
646 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
647 attr->max_rd_atomic > dev->limits.max_qp_init_rdma) {
648 mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n",
649 attr->max_rd_atomic, dev->limits.max_qp_init_rdma);
650 return -EINVAL;
651 }
652
653 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
654 attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) {
655 mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n",
656 attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift);
657 return -EINVAL;
658 }
659
594 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); 660 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
595 if (IS_ERR(mailbox)) 661 if (IS_ERR(mailbox))
596 return PTR_ERR(mailbox); 662 return PTR_ERR(mailbox);
@@ -665,28 +731,14 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
665 } 731 }
666 732
667 if (attr_mask & IB_QP_RNR_RETRY) { 733 if (attr_mask & IB_QP_RNR_RETRY) {
668 qp_context->pri_path.rnr_retry = attr->rnr_retry << 5; 734 qp_context->alt_path.rnr_retry = qp_context->pri_path.rnr_retry =
669 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY); 735 attr->rnr_retry << 5;
736 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY |
737 MTHCA_QP_OPTPAR_ALT_RNR_RETRY);
670 } 738 }
671 739
672 if (attr_mask & IB_QP_AV) { 740 if (attr_mask & IB_QP_AV) {
673 qp_context->pri_path.g_mylmc = attr->ah_attr.src_path_bits & 0x7f; 741 mthca_path_set(&attr->ah_attr, &qp_context->pri_path);
674 qp_context->pri_path.rlid = cpu_to_be16(attr->ah_attr.dlid);
675 qp_context->pri_path.static_rate = !!attr->ah_attr.static_rate;
676 if (attr->ah_attr.ah_flags & IB_AH_GRH) {
677 qp_context->pri_path.g_mylmc |= 1 << 7;
678 qp_context->pri_path.mgid_index = attr->ah_attr.grh.sgid_index;
679 qp_context->pri_path.hop_limit = attr->ah_attr.grh.hop_limit;
680 qp_context->pri_path.sl_tclass_flowlabel =
681 cpu_to_be32((attr->ah_attr.sl << 28) |
682 (attr->ah_attr.grh.traffic_class << 20) |
683 (attr->ah_attr.grh.flow_label));
684 memcpy(qp_context->pri_path.rgid,
685 attr->ah_attr.grh.dgid.raw, 16);
686 } else {
687 qp_context->pri_path.sl_tclass_flowlabel =
688 cpu_to_be32(attr->ah_attr.sl << 28);
689 }
690 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH); 742 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH);
691 } 743 }
692 744
@@ -695,7 +747,19 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
695 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ACK_TIMEOUT); 747 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ACK_TIMEOUT);
696 } 748 }
697 749
698 /* XXX alt_path */ 750 if (attr_mask & IB_QP_ALT_PATH) {
751 if (attr->alt_port_num == 0 || attr->alt_port_num > dev->limits.num_ports) {
752 mthca_dbg(dev, "Alternate port number (%u) is invalid\n",
753 attr->alt_port_num);
754 return -EINVAL;
755 }
756
757 mthca_path_set(&attr->alt_ah_attr, &qp_context->alt_path);
758 qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index |
759 attr->alt_port_num << 24);
760 qp_context->alt_path.ackto = attr->alt_timeout << 3;
761 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ALT_ADDR_PATH);
762 }
699 763
700 /* leave rdd as 0 */ 764 /* leave rdd as 0 */
701 qp_context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pd_num); 765 qp_context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pd_num);
@@ -703,9 +767,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
703 qp_context->wqe_lkey = cpu_to_be32(qp->mr.ibmr.lkey); 767 qp_context->wqe_lkey = cpu_to_be32(qp->mr.ibmr.lkey);
704 qp_context->params1 = cpu_to_be32((MTHCA_ACK_REQ_FREQ << 28) | 768 qp_context->params1 = cpu_to_be32((MTHCA_ACK_REQ_FREQ << 28) |
705 (MTHCA_FLIGHT_LIMIT << 24) | 769 (MTHCA_FLIGHT_LIMIT << 24) |
706 MTHCA_QP_BIT_SRE | 770 MTHCA_QP_BIT_SWE);
707 MTHCA_QP_BIT_SWE |
708 MTHCA_QP_BIT_SAE);
709 if (qp->sq_policy == IB_SIGNAL_ALL_WR) 771 if (qp->sq_policy == IB_SIGNAL_ALL_WR)
710 qp_context->params1 |= cpu_to_be32(MTHCA_QP_BIT_SSC); 772 qp_context->params1 |= cpu_to_be32(MTHCA_QP_BIT_SSC);
711 if (attr_mask & IB_QP_RETRY_CNT) { 773 if (attr_mask & IB_QP_RETRY_CNT) {
@@ -714,9 +776,13 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
714 } 776 }
715 777
716 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { 778 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
717 qp_context->params1 |= cpu_to_be32(min(attr->max_rd_atomic ? 779 if (attr->max_rd_atomic) {
718 ffs(attr->max_rd_atomic) - 1 : 0, 780 qp_context->params1 |=
719 7) << 21); 781 cpu_to_be32(MTHCA_QP_BIT_SRE |
782 MTHCA_QP_BIT_SAE);
783 qp_context->params1 |=
784 cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
785 }
720 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX); 786 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX);
721 } 787 }
722 788
@@ -729,71 +795,19 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
729 qp_context->snd_db_index = cpu_to_be32(qp->sq.db_index); 795 qp_context->snd_db_index = cpu_to_be32(qp->sq.db_index);
730 } 796 }
731 797
732 if (attr_mask & IB_QP_ACCESS_FLAGS) {
733 qp_context->params2 |=
734 cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE ?
735 MTHCA_QP_BIT_RWE : 0);
736
737 /*
738 * Only enable RDMA reads and atomics if we have
739 * responder resources set to a non-zero value.
740 */
741 if (qp->resp_depth) {
742 qp_context->params2 |=
743 cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_READ ?
744 MTHCA_QP_BIT_RRE : 0);
745 qp_context->params2 |=
746 cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC ?
747 MTHCA_QP_BIT_RAE : 0);
748 }
749
750 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE |
751 MTHCA_QP_OPTPAR_RRE |
752 MTHCA_QP_OPTPAR_RAE);
753
754 qp->atomic_rd_en = attr->qp_access_flags;
755 }
756
757 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { 798 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
758 u8 rra_max; 799 if (attr->max_dest_rd_atomic)
759
760 if (qp->resp_depth && !attr->max_dest_rd_atomic) {
761 /*
762 * Lowering our responder resources to zero.
763 * Turn off reads RDMA and atomics as responder.
764 * (RRE/RAE in params2 already zero)
765 */
766 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRE |
767 MTHCA_QP_OPTPAR_RAE);
768 }
769
770 if (!qp->resp_depth && attr->max_dest_rd_atomic) {
771 /*
772 * Increasing our responder resources from
773 * zero. Turn on RDMA reads and atomics as
774 * appropriate.
775 */
776 qp_context->params2 |= 800 qp_context->params2 |=
777 cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_READ ? 801 cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
778 MTHCA_QP_BIT_RRE : 0);
779 qp_context->params2 |=
780 cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_ATOMIC ?
781 MTHCA_QP_BIT_RAE : 0);
782
783 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRE |
784 MTHCA_QP_OPTPAR_RAE);
785 }
786 802
787 for (rra_max = 0;
788 1 << rra_max < attr->max_dest_rd_atomic &&
789 rra_max < dev->qp_table.rdb_shift;
790 ++rra_max)
791 ; /* nothing */
792
793 qp_context->params2 |= cpu_to_be32(rra_max << 21);
794 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX); 803 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX);
804 }
795 805
796 qp->resp_depth = attr->max_dest_rd_atomic; 806 if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
807 qp_context->params2 |= get_hw_access_flags(qp, attr, attr_mask);
808 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE |
809 MTHCA_QP_OPTPAR_RRE |
810 MTHCA_QP_OPTPAR_RAE);
797 } 811 }
798 812
799 qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC); 813 qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC);
@@ -835,8 +849,13 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
835 err = -EINVAL; 849 err = -EINVAL;
836 } 850 }
837 851
838 if (!err) 852 if (!err) {
839 qp->state = new_state; 853 qp->state = new_state;
854 if (attr_mask & IB_QP_ACCESS_FLAGS)
855 qp->atomic_rd_en = attr->qp_access_flags;
856 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
857 qp->resp_depth = attr->max_dest_rd_atomic;
858 }
840 859
841 mthca_free_mailbox(dev, mailbox); 860 mthca_free_mailbox(dev, mailbox);
842 861
@@ -885,18 +904,13 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
885 return err; 904 return err;
886} 905}
887 906
888static void mthca_adjust_qp_caps(struct mthca_dev *dev, 907static int mthca_max_data_size(struct mthca_dev *dev, struct mthca_qp *qp, int desc_sz)
889 struct mthca_pd *pd,
890 struct mthca_qp *qp)
891{ 908{
892 int max_data_size;
893
894 /* 909 /*
895 * Calculate the maximum size of WQE s/g segments, excluding 910 * Calculate the maximum size of WQE s/g segments, excluding
896 * the next segment and other non-data segments. 911 * the next segment and other non-data segments.
897 */ 912 */
898 max_data_size = min(dev->limits.max_desc_sz, 1 << qp->sq.wqe_shift) - 913 int max_data_size = desc_sz - sizeof (struct mthca_next_seg);
899 sizeof (struct mthca_next_seg);
900 914
901 switch (qp->transport) { 915 switch (qp->transport) {
902 case MLX: 916 case MLX:
@@ -915,11 +929,24 @@ static void mthca_adjust_qp_caps(struct mthca_dev *dev,
915 break; 929 break;
916 } 930 }
917 931
932 return max_data_size;
933}
934
935static inline int mthca_max_inline_data(struct mthca_pd *pd, int max_data_size)
936{
918 /* We don't support inline data for kernel QPs (yet). */ 937 /* We don't support inline data for kernel QPs (yet). */
919 if (!pd->ibpd.uobject) 938 return pd->ibpd.uobject ? max_data_size - MTHCA_INLINE_HEADER_SIZE : 0;
920 qp->max_inline_data = 0; 939}
921 else 940
922 qp->max_inline_data = max_data_size - MTHCA_INLINE_HEADER_SIZE; 941static void mthca_adjust_qp_caps(struct mthca_dev *dev,
942 struct mthca_pd *pd,
943 struct mthca_qp *qp)
944{
945 int max_data_size = mthca_max_data_size(dev, qp,
946 min(dev->limits.max_desc_sz,
947 1 << qp->sq.wqe_shift));
948
949 qp->max_inline_data = mthca_max_inline_data(pd, max_data_size);
923 950
924 qp->sq.max_gs = min_t(int, dev->limits.max_sg, 951 qp->sq.max_gs = min_t(int, dev->limits.max_sg,
925 max_data_size / sizeof (struct mthca_data_seg)); 952 max_data_size / sizeof (struct mthca_data_seg));
@@ -1186,13 +1213,23 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
1186} 1213}
1187 1214
1188static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap, 1215static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap,
1189 struct mthca_qp *qp) 1216 struct mthca_pd *pd, struct mthca_qp *qp)
1190{ 1217{
1218 int max_data_size = mthca_max_data_size(dev, qp, dev->limits.max_desc_sz);
1219
1191 /* Sanity check QP size before proceeding */ 1220 /* Sanity check QP size before proceeding */
1192 if (cap->max_send_wr > dev->limits.max_wqes || 1221 if (cap->max_send_wr > dev->limits.max_wqes ||
1193 cap->max_recv_wr > dev->limits.max_wqes || 1222 cap->max_recv_wr > dev->limits.max_wqes ||
1194 cap->max_send_sge > dev->limits.max_sg || 1223 cap->max_send_sge > dev->limits.max_sg ||
1195 cap->max_recv_sge > dev->limits.max_sg) 1224 cap->max_recv_sge > dev->limits.max_sg ||
1225 cap->max_inline_data > mthca_max_inline_data(pd, max_data_size))
1226 return -EINVAL;
1227
1228 /*
1229 * For MLX transport we need 2 extra S/G entries:
1230 * one for the header and one for the checksum at the end
1231 */
1232 if (qp->transport == MLX && cap->max_recv_sge + 2 > dev->limits.max_sg)
1196 return -EINVAL; 1233 return -EINVAL;
1197 1234
1198 if (mthca_is_memfree(dev)) { 1235 if (mthca_is_memfree(dev)) {
@@ -1211,14 +1248,6 @@ static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap,
1211 MTHCA_INLINE_CHUNK_SIZE) / 1248 MTHCA_INLINE_CHUNK_SIZE) /
1212 sizeof (struct mthca_data_seg)); 1249 sizeof (struct mthca_data_seg));
1213 1250
1214 /*
1215 * For MLX transport we need 2 extra S/G entries:
1216 * one for the header and one for the checksum at the end
1217 */
1218 if ((qp->transport == MLX && qp->sq.max_gs + 2 > dev->limits.max_sg) ||
1219 qp->sq.max_gs > dev->limits.max_sg || qp->rq.max_gs > dev->limits.max_sg)
1220 return -EINVAL;
1221
1222 return 0; 1251 return 0;
1223} 1252}
1224 1253
@@ -1233,7 +1262,7 @@ int mthca_alloc_qp(struct mthca_dev *dev,
1233{ 1262{
1234 int err; 1263 int err;
1235 1264
1236 err = mthca_set_qp_size(dev, cap, qp); 1265 err = mthca_set_qp_size(dev, cap, pd, qp);
1237 if (err) 1266 if (err)
1238 return err; 1267 return err;
1239 1268
@@ -1276,7 +1305,7 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
1276 u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1; 1305 u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1;
1277 int err; 1306 int err;
1278 1307
1279 err = mthca_set_qp_size(dev, cap, &sqp->qp); 1308 err = mthca_set_qp_size(dev, cap, pd, &sqp->qp);
1280 if (err) 1309 if (err)
1281 return err; 1310 return err;
1282 1311
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index f7d234295efe..e7e153d9c4c6 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -201,7 +201,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
201 if (mthca_is_memfree(dev)) 201 if (mthca_is_memfree(dev))
202 srq->max = roundup_pow_of_two(srq->max + 1); 202 srq->max = roundup_pow_of_two(srq->max + 1);
203 203
204 ds = min(64UL, 204 ds = max(64UL,
205 roundup_pow_of_two(sizeof (struct mthca_next_seg) + 205 roundup_pow_of_two(sizeof (struct mthca_next_seg) +
206 srq->max_gs * sizeof (struct mthca_data_seg))); 206 srq->max_gs * sizeof (struct mthca_data_seg)));
207 srq->wqe_shift = long_log2(ds); 207 srq->wqe_shift = long_log2(ds);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index ee9fe226ae99..dd488d3cffa9 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -39,6 +39,7 @@
39#include <linux/string.h> 39#include <linux/string.h>
40#include <linux/parser.h> 40#include <linux/parser.h>
41#include <linux/random.h> 41#include <linux/random.h>
42#include <linux/jiffies.h>
42 43
43#include <asm/atomic.h> 44#include <asm/atomic.h>
44 45
diff --git a/drivers/input/keyboard/corgikbd.c b/drivers/input/keyboard/corgikbd.c
index 64672d491222..e301ee4ca264 100644
--- a/drivers/input/keyboard/corgikbd.c
+++ b/drivers/input/keyboard/corgikbd.c
@@ -19,7 +19,6 @@
19#include <linux/jiffies.h> 19#include <linux/jiffies.h>
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <asm/irq.h>
23 22
24#include <asm/arch/corgi.h> 23#include <asm/arch/corgi.h>
25#include <asm/arch/hardware.h> 24#include <asm/arch/hardware.h>
@@ -343,10 +342,9 @@ static int __init corgikbd_probe(struct platform_device *pdev)
343 for (i = 0; i < CORGI_KEY_SENSE_NUM; i++) { 342 for (i = 0; i < CORGI_KEY_SENSE_NUM; i++) {
344 pxa_gpio_mode(CORGI_GPIO_KEY_SENSE(i) | GPIO_IN); 343 pxa_gpio_mode(CORGI_GPIO_KEY_SENSE(i) | GPIO_IN);
345 if (request_irq(CORGI_IRQ_GPIO_KEY_SENSE(i), corgikbd_interrupt, 344 if (request_irq(CORGI_IRQ_GPIO_KEY_SENSE(i), corgikbd_interrupt,
346 SA_INTERRUPT, "corgikbd", corgikbd)) 345 SA_INTERRUPT | SA_TRIGGER_RISING,
346 "corgikbd", corgikbd))
347 printk(KERN_WARNING "corgikbd: Can't get IRQ: %d!\n", i); 347 printk(KERN_WARNING "corgikbd: Can't get IRQ: %d!\n", i);
348 else
349 set_irq_type(CORGI_IRQ_GPIO_KEY_SENSE(i),IRQT_RISING);
350 } 348 }
351 349
352 /* Set Strobe lines as outputs - set high */ 350 /* Set Strobe lines as outputs - set high */
diff --git a/drivers/input/keyboard/spitzkbd.c b/drivers/input/keyboard/spitzkbd.c
index 6a15fe3bc527..83999d583122 100644
--- a/drivers/input/keyboard/spitzkbd.c
+++ b/drivers/input/keyboard/spitzkbd.c
@@ -19,7 +19,6 @@
19#include <linux/jiffies.h> 19#include <linux/jiffies.h>
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <asm/irq.h>
23 22
24#include <asm/arch/spitz.h> 23#include <asm/arch/spitz.h>
25#include <asm/arch/hardware.h> 24#include <asm/arch/hardware.h>
@@ -407,10 +406,9 @@ static int __init spitzkbd_probe(struct platform_device *dev)
407 for (i = 0; i < SPITZ_KEY_SENSE_NUM; i++) { 406 for (i = 0; i < SPITZ_KEY_SENSE_NUM; i++) {
408 pxa_gpio_mode(spitz_senses[i] | GPIO_IN); 407 pxa_gpio_mode(spitz_senses[i] | GPIO_IN);
409 if (request_irq(IRQ_GPIO(spitz_senses[i]), spitzkbd_interrupt, 408 if (request_irq(IRQ_GPIO(spitz_senses[i]), spitzkbd_interrupt,
410 SA_INTERRUPT, "Spitzkbd Sense", spitzkbd)) 409 SA_INTERRUPT|SA_TRIGGER_RISING,
410 "Spitzkbd Sense", spitzkbd))
411 printk(KERN_WARNING "spitzkbd: Can't get Sense IRQ: %d!\n", i); 411 printk(KERN_WARNING "spitzkbd: Can't get Sense IRQ: %d!\n", i);
412 else
413 set_irq_type(IRQ_GPIO(spitz_senses[i]),IRQT_RISING);
414 } 412 }
415 413
416 /* Set Strobe lines as outputs - set high */ 414 /* Set Strobe lines as outputs - set high */
@@ -422,15 +420,18 @@ static int __init spitzkbd_probe(struct platform_device *dev)
422 pxa_gpio_mode(SPITZ_GPIO_SWA | GPIO_IN); 420 pxa_gpio_mode(SPITZ_GPIO_SWA | GPIO_IN);
423 pxa_gpio_mode(SPITZ_GPIO_SWB | GPIO_IN); 421 pxa_gpio_mode(SPITZ_GPIO_SWB | GPIO_IN);
424 422
425 request_irq(SPITZ_IRQ_GPIO_SYNC, spitzkbd_interrupt, SA_INTERRUPT, "Spitzkbd Sync", spitzkbd); 423 request_irq(SPITZ_IRQ_GPIO_SYNC, spitzkbd_interrupt,
426 request_irq(SPITZ_IRQ_GPIO_ON_KEY, spitzkbd_interrupt, SA_INTERRUPT, "Spitzkbd PwrOn", spitzkbd); 424 SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING,
427 request_irq(SPITZ_IRQ_GPIO_SWA, spitzkbd_hinge_isr, SA_INTERRUPT, "Spitzkbd SWA", spitzkbd); 425 "Spitzkbd Sync", spitzkbd);
428 request_irq(SPITZ_IRQ_GPIO_SWB, spitzkbd_hinge_isr, SA_INTERRUPT, "Spitzkbd SWB", spitzkbd); 426 request_irq(SPITZ_IRQ_GPIO_ON_KEY, spitzkbd_interrupt,
429 427 SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING,
430 set_irq_type(SPITZ_IRQ_GPIO_SYNC, IRQT_BOTHEDGE); 428 "Spitzkbd PwrOn", spitzkbd);
431 set_irq_type(SPITZ_IRQ_GPIO_ON_KEY, IRQT_BOTHEDGE); 429 request_irq(SPITZ_IRQ_GPIO_SWA, spitzkbd_hinge_isr,
432 set_irq_type(SPITZ_IRQ_GPIO_SWA, IRQT_BOTHEDGE); 430 SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING,
433 set_irq_type(SPITZ_IRQ_GPIO_SWB, IRQT_BOTHEDGE); 431 "Spitzkbd SWA", spitzkbd);
432 request_irq(SPITZ_IRQ_GPIO_SWB, spitzkbd_hinge_isr,
433 SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING,
434 "Spitzkbd SWB", spitzkbd);
434 435
435 printk(KERN_INFO "input: Spitz Keyboard Registered\n"); 436 printk(KERN_INFO "input: Spitz Keyboard Registered\n");
436 437
diff --git a/drivers/input/misc/hp_sdc_rtc.c b/drivers/input/misc/hp_sdc_rtc.c
index 1cd7657f7e42..1be963961c15 100644
--- a/drivers/input/misc/hp_sdc_rtc.c
+++ b/drivers/input/misc/hp_sdc_rtc.c
@@ -60,8 +60,6 @@ static struct fasync_struct *hp_sdc_rtc_async_queue;
60 60
61static DECLARE_WAIT_QUEUE_HEAD(hp_sdc_rtc_wait); 61static DECLARE_WAIT_QUEUE_HEAD(hp_sdc_rtc_wait);
62 62
63static loff_t hp_sdc_rtc_llseek(struct file *file, loff_t offset, int origin);
64
65static ssize_t hp_sdc_rtc_read(struct file *file, char *buf, 63static ssize_t hp_sdc_rtc_read(struct file *file, char *buf,
66 size_t count, loff_t *ppos); 64 size_t count, loff_t *ppos);
67 65
@@ -387,11 +385,6 @@ static int hp_sdc_rtc_set_i8042timer (struct timeval *setto, uint8_t setcmd)
387 return 0; 385 return 0;
388} 386}
389 387
390static loff_t hp_sdc_rtc_llseek(struct file *file, loff_t offset, int origin)
391{
392 return -ESPIPE;
393}
394
395static ssize_t hp_sdc_rtc_read(struct file *file, char *buf, 388static ssize_t hp_sdc_rtc_read(struct file *file, char *buf,
396 size_t count, loff_t *ppos) { 389 size_t count, loff_t *ppos) {
397 ssize_t retval; 390 ssize_t retval;
@@ -679,7 +672,7 @@ static int hp_sdc_rtc_ioctl(struct inode *inode, struct file *file,
679 672
680static struct file_operations hp_sdc_rtc_fops = { 673static struct file_operations hp_sdc_rtc_fops = {
681 .owner = THIS_MODULE, 674 .owner = THIS_MODULE,
682 .llseek = hp_sdc_rtc_llseek, 675 .llseek = no_llseek,
683 .read = hp_sdc_rtc_read, 676 .read = hp_sdc_rtc_read,
684 .poll = hp_sdc_rtc_poll, 677 .poll = hp_sdc_rtc_poll,
685 .ioctl = hp_sdc_rtc_ioctl, 678 .ioctl = hp_sdc_rtc_ioctl,
diff --git a/drivers/isdn/act2000/act2000.h b/drivers/isdn/act2000/act2000.h
index b091d1a54125..d4c50512a1ff 100644
--- a/drivers/isdn/act2000/act2000.h
+++ b/drivers/isdn/act2000/act2000.h
@@ -181,17 +181,17 @@ typedef struct act2000_card {
181 char regname[35]; /* Name used for request_region */ 181 char regname[35]; /* Name used for request_region */
182} act2000_card; 182} act2000_card;
183 183
184extern __inline__ void act2000_schedule_tx(act2000_card *card) 184static inline void act2000_schedule_tx(act2000_card *card)
185{ 185{
186 schedule_work(&card->snd_tq); 186 schedule_work(&card->snd_tq);
187} 187}
188 188
189extern __inline__ void act2000_schedule_rx(act2000_card *card) 189static inline void act2000_schedule_rx(act2000_card *card)
190{ 190{
191 schedule_work(&card->rcv_tq); 191 schedule_work(&card->rcv_tq);
192} 192}
193 193
194extern __inline__ void act2000_schedule_poll(act2000_card *card) 194static inline void act2000_schedule_poll(act2000_card *card)
195{ 195{
196 schedule_work(&card->poll_tq); 196 schedule_work(&card->poll_tq);
197} 197}
diff --git a/drivers/isdn/act2000/capi.h b/drivers/isdn/act2000/capi.h
index f6d5f530b86b..49f453c53c64 100644
--- a/drivers/isdn/act2000/capi.h
+++ b/drivers/isdn/act2000/capi.h
@@ -78,29 +78,29 @@ typedef union actcapi_infoel { /* info element */
78typedef struct actcapi_msn { 78typedef struct actcapi_msn {
79 __u8 eaz; 79 __u8 eaz;
80 __u8 len; /* Length of MSN */ 80 __u8 len; /* Length of MSN */
81 __u8 msn[15] __attribute__ ((packed)); 81 __u8 msn[15];
82} actcapi_msn; 82} __attribute__((packed)) actcapi_msn;
83 83
84typedef struct actcapi_dlpd { 84typedef struct actcapi_dlpd {
85 __u8 len; /* Length of structure */ 85 __u8 len; /* Length of structure */
86 __u16 dlen __attribute__ ((packed)); /* Data Length */ 86 __u16 dlen; /* Data Length */
87 __u8 laa __attribute__ ((packed)); /* Link Address A */ 87 __u8 laa; /* Link Address A */
88 __u8 lab; /* Link Address B */ 88 __u8 lab; /* Link Address B */
89 __u8 modulo; /* Modulo Mode */ 89 __u8 modulo; /* Modulo Mode */
90 __u8 win; /* Window size */ 90 __u8 win; /* Window size */
91 __u8 xid[100]; /* XID Information */ 91 __u8 xid[100]; /* XID Information */
92} actcapi_dlpd; 92} __attribute__((packed)) actcapi_dlpd;
93 93
94typedef struct actcapi_ncpd { 94typedef struct actcapi_ncpd {
95 __u8 len; /* Length of structure */ 95 __u8 len; /* Length of structure */
96 __u16 lic __attribute__ ((packed)); 96 __u16 lic;
97 __u16 hic __attribute__ ((packed)); 97 __u16 hic;
98 __u16 ltc __attribute__ ((packed)); 98 __u16 ltc;
99 __u16 htc __attribute__ ((packed)); 99 __u16 htc;
100 __u16 loc __attribute__ ((packed)); 100 __u16 loc;
101 __u16 hoc __attribute__ ((packed)); 101 __u16 hoc;
102 __u8 modulo __attribute__ ((packed)); 102 __u8 modulo;
103} actcapi_ncpd; 103} __attribute__((packed)) actcapi_ncpd;
104#define actcapi_ncpi actcapi_ncpd 104#define actcapi_ncpi actcapi_ncpd
105 105
106/* 106/*
@@ -168,19 +168,19 @@ typedef struct actcapi_msg {
168 __u16 manuf_msg; 168 __u16 manuf_msg;
169 __u16 controller; 169 __u16 controller;
170 actcapi_msn msnmap; 170 actcapi_msn msnmap;
171 } manufacturer_req_msn; 171 } __attribute ((packed)) manufacturer_req_msn;
172 /* TODO: TraceInit-req/conf/ind/resp and 172 /* TODO: TraceInit-req/conf/ind/resp and
173 * TraceDump-req/conf/ind/resp 173 * TraceDump-req/conf/ind/resp
174 */ 174 */
175 struct connect_req { 175 struct connect_req {
176 __u8 controller; 176 __u8 controller;
177 __u8 bchan; 177 __u8 bchan;
178 __u32 infomask __attribute__ ((packed)); 178 __u32 infomask;
179 __u8 si1; 179 __u8 si1;
180 __u8 si2; 180 __u8 si2;
181 __u8 eaz; 181 __u8 eaz;
182 actcapi_addr addr; 182 actcapi_addr addr;
183 } connect_req; 183 } __attribute__ ((packed)) connect_req;
184 struct connect_conf { 184 struct connect_conf {
185 __u16 plci; 185 __u16 plci;
186 __u16 info; 186 __u16 info;
@@ -192,7 +192,7 @@ typedef struct actcapi_msg {
192 __u8 si2; 192 __u8 si2;
193 __u8 eaz; 193 __u8 eaz;
194 actcapi_addr addr; 194 actcapi_addr addr;
195 } connect_ind; 195 } __attribute__ ((packed)) connect_ind;
196 struct connect_resp { 196 struct connect_resp {
197 __u16 plci; 197 __u16 plci;
198 __u8 rejectcause; 198 __u8 rejectcause;
@@ -200,14 +200,14 @@ typedef struct actcapi_msg {
200 struct connect_active_ind { 200 struct connect_active_ind {
201 __u16 plci; 201 __u16 plci;
202 actcapi_addr addr; 202 actcapi_addr addr;
203 } connect_active_ind; 203 } __attribute__ ((packed)) connect_active_ind;
204 struct connect_active_resp { 204 struct connect_active_resp {
205 __u16 plci; 205 __u16 plci;
206 } connect_active_resp; 206 } connect_active_resp;
207 struct connect_b3_req { 207 struct connect_b3_req {
208 __u16 plci; 208 __u16 plci;
209 actcapi_ncpi ncpi; 209 actcapi_ncpi ncpi;
210 } connect_b3_req; 210 } __attribute__ ((packed)) connect_b3_req;
211 struct connect_b3_conf { 211 struct connect_b3_conf {
212 __u16 plci; 212 __u16 plci;
213 __u16 ncci; 213 __u16 ncci;
@@ -217,12 +217,12 @@ typedef struct actcapi_msg {
217 __u16 ncci; 217 __u16 ncci;
218 __u16 plci; 218 __u16 plci;
219 actcapi_ncpi ncpi; 219 actcapi_ncpi ncpi;
220 } connect_b3_ind; 220 } __attribute__ ((packed)) connect_b3_ind;
221 struct connect_b3_resp { 221 struct connect_b3_resp {
222 __u16 ncci; 222 __u16 ncci;
223 __u8 rejectcause; 223 __u8 rejectcause;
224 actcapi_ncpi ncpi __attribute__ ((packed)); 224 actcapi_ncpi ncpi;
225 } connect_b3_resp; 225 } __attribute__ ((packed)) connect_b3_resp;
226 struct disconnect_req { 226 struct disconnect_req {
227 __u16 plci; 227 __u16 plci;
228 __u8 cause; 228 __u8 cause;
@@ -241,14 +241,14 @@ typedef struct actcapi_msg {
241 struct connect_b3_active_ind { 241 struct connect_b3_active_ind {
242 __u16 ncci; 242 __u16 ncci;
243 actcapi_ncpi ncpi; 243 actcapi_ncpi ncpi;
244 } connect_b3_active_ind; 244 } __attribute__ ((packed)) connect_b3_active_ind;
245 struct connect_b3_active_resp { 245 struct connect_b3_active_resp {
246 __u16 ncci; 246 __u16 ncci;
247 } connect_b3_active_resp; 247 } connect_b3_active_resp;
248 struct disconnect_b3_req { 248 struct disconnect_b3_req {
249 __u16 ncci; 249 __u16 ncci;
250 actcapi_ncpi ncpi; 250 actcapi_ncpi ncpi;
251 } disconnect_b3_req; 251 } __attribute__ ((packed)) disconnect_b3_req;
252 struct disconnect_b3_conf { 252 struct disconnect_b3_conf {
253 __u16 ncci; 253 __u16 ncci;
254 __u16 info; 254 __u16 info;
@@ -257,7 +257,7 @@ typedef struct actcapi_msg {
257 __u16 ncci; 257 __u16 ncci;
258 __u16 info; 258 __u16 info;
259 actcapi_ncpi ncpi; 259 actcapi_ncpi ncpi;
260 } disconnect_b3_ind; 260 } __attribute__ ((packed)) disconnect_b3_ind;
261 struct disconnect_b3_resp { 261 struct disconnect_b3_resp {
262 __u16 ncci; 262 __u16 ncci;
263 } disconnect_b3_resp; 263 } disconnect_b3_resp;
@@ -265,7 +265,7 @@ typedef struct actcapi_msg {
265 __u16 plci; 265 __u16 plci;
266 actcapi_infonr nr; 266 actcapi_infonr nr;
267 actcapi_infoel el; 267 actcapi_infoel el;
268 } info_ind; 268 } __attribute__ ((packed)) info_ind;
269 struct info_resp { 269 struct info_resp {
270 __u16 plci; 270 __u16 plci;
271 } info_resp; 271 } info_resp;
@@ -279,8 +279,8 @@ typedef struct actcapi_msg {
279 struct select_b2_protocol_req { 279 struct select_b2_protocol_req {
280 __u16 plci; 280 __u16 plci;
281 __u8 protocol; 281 __u8 protocol;
282 actcapi_dlpd dlpd __attribute__ ((packed)); 282 actcapi_dlpd dlpd;
283 } select_b2_protocol_req; 283 } __attribute__ ((packed)) select_b2_protocol_req;
284 struct select_b2_protocol_conf { 284 struct select_b2_protocol_conf {
285 __u16 plci; 285 __u16 plci;
286 __u16 info; 286 __u16 info;
@@ -288,49 +288,49 @@ typedef struct actcapi_msg {
288 struct select_b3_protocol_req { 288 struct select_b3_protocol_req {
289 __u16 plci; 289 __u16 plci;
290 __u8 protocol; 290 __u8 protocol;
291 actcapi_ncpd ncpd __attribute__ ((packed)); 291 actcapi_ncpd ncpd;
292 } select_b3_protocol_req; 292 } __attribute__ ((packed)) select_b3_protocol_req;
293 struct select_b3_protocol_conf { 293 struct select_b3_protocol_conf {
294 __u16 plci; 294 __u16 plci;
295 __u16 info; 295 __u16 info;
296 } select_b3_protocol_conf; 296 } select_b3_protocol_conf;
297 struct listen_req { 297 struct listen_req {
298 __u8 controller; 298 __u8 controller;
299 __u32 infomask __attribute__ ((packed)); 299 __u32 infomask;
300 __u16 eazmask __attribute__ ((packed)); 300 __u16 eazmask;
301 __u16 simask __attribute__ ((packed)); 301 __u16 simask;
302 } listen_req; 302 } __attribute__ ((packed)) listen_req;
303 struct listen_conf { 303 struct listen_conf {
304 __u8 controller; 304 __u8 controller;
305 __u16 info __attribute__ ((packed)); 305 __u16 info;
306 } listen_conf; 306 } __attribute__ ((packed)) listen_conf;
307 struct data_b3_req { 307 struct data_b3_req {
308 __u16 fakencci; 308 __u16 fakencci;
309 __u16 datalen; 309 __u16 datalen;
310 __u32 unused; 310 __u32 unused;
311 __u8 blocknr; 311 __u8 blocknr;
312 __u16 flags __attribute__ ((packed)); 312 __u16 flags;
313 } data_b3_req; 313 } __attribute ((packed)) data_b3_req;
314 struct data_b3_ind { 314 struct data_b3_ind {
315 __u16 fakencci; 315 __u16 fakencci;
316 __u16 datalen; 316 __u16 datalen;
317 __u32 unused; 317 __u32 unused;
318 __u8 blocknr; 318 __u8 blocknr;
319 __u16 flags __attribute__ ((packed)); 319 __u16 flags;
320 } data_b3_ind; 320 } __attribute__ ((packed)) data_b3_ind;
321 struct data_b3_resp { 321 struct data_b3_resp {
322 __u16 ncci; 322 __u16 ncci;
323 __u8 blocknr; 323 __u8 blocknr;
324 } data_b3_resp; 324 } __attribute__ ((packed)) data_b3_resp;
325 struct data_b3_conf { 325 struct data_b3_conf {
326 __u16 ncci; 326 __u16 ncci;
327 __u8 blocknr; 327 __u8 blocknr;
328 __u16 info __attribute__ ((packed)); 328 __u16 info;
329 } data_b3_conf; 329 } __attribute__ ((packed)) data_b3_conf;
330 } msg; 330 } msg;
331} actcapi_msg; 331} __attribute__ ((packed)) actcapi_msg;
332 332
333extern __inline__ unsigned short 333static inline unsigned short
334actcapi_nextsmsg(act2000_card *card) 334actcapi_nextsmsg(act2000_card *card)
335{ 335{
336 unsigned long flags; 336 unsigned long flags;
diff --git a/drivers/isdn/capi/capifs.c b/drivers/isdn/capi/capifs.c
index 7b564c0dd996..207cae366256 100644
--- a/drivers/isdn/capi/capifs.c
+++ b/drivers/isdn/capi/capifs.c
@@ -17,6 +17,8 @@
17#include <linux/ctype.h> 17#include <linux/ctype.h>
18#include <linux/sched.h> /* current */ 18#include <linux/sched.h> /* current */
19 19
20#include "capifs.h"
21
20MODULE_DESCRIPTION("CAPI4Linux: /dev/capi/ filesystem"); 22MODULE_DESCRIPTION("CAPI4Linux: /dev/capi/ filesystem");
21MODULE_AUTHOR("Carsten Paeth"); 23MODULE_AUTHOR("Carsten Paeth");
22MODULE_LICENSE("GPL"); 24MODULE_LICENSE("GPL");
diff --git a/drivers/isdn/hardware/eicon/os_4bri.c b/drivers/isdn/hardware/eicon/os_4bri.c
index cccfabc1117d..11e6f937c1e4 100644
--- a/drivers/isdn/hardware/eicon/os_4bri.c
+++ b/drivers/isdn/hardware/eicon/os_4bri.c
@@ -16,6 +16,7 @@
16#include "diva_pci.h" 16#include "diva_pci.h"
17#include "mi_pc.h" 17#include "mi_pc.h"
18#include "dsrv4bri.h" 18#include "dsrv4bri.h"
19#include "helpers.h"
19 20
20static void *diva_xdiLoadFileFile = NULL; 21static void *diva_xdiLoadFileFile = NULL;
21static dword diva_xdiLoadFileLength = 0; 22static dword diva_xdiLoadFileLength = 0;
@@ -815,7 +816,7 @@ diva_4bri_cmd_card_proc(struct _diva_os_xdi_adapter *a,
815 return (ret); 816 return (ret);
816} 817}
817 818
818void *xdiLoadFile(char *FileName, unsigned long *FileLength, 819void *xdiLoadFile(char *FileName, dword *FileLength,
819 unsigned long lim) 820 unsigned long lim)
820{ 821{
821 void *ret = diva_xdiLoadFileFile; 822 void *ret = diva_xdiLoadFileFile;
diff --git a/drivers/isdn/hardware/eicon/os_bri.c b/drivers/isdn/hardware/eicon/os_bri.c
index 4cc44a5dd1db..f31bba5b16ff 100644
--- a/drivers/isdn/hardware/eicon/os_bri.c
+++ b/drivers/isdn/hardware/eicon/os_bri.c
@@ -16,6 +16,7 @@
16#include "diva_pci.h" 16#include "diva_pci.h"
17#include "mi_pc.h" 17#include "mi_pc.h"
18#include "pc_maint.h" 18#include "pc_maint.h"
19#include "dsrv_bri.h"
19 20
20/* 21/*
21** IMPORTS 22** IMPORTS
diff --git a/drivers/isdn/hardware/eicon/os_pri.c b/drivers/isdn/hardware/eicon/os_pri.c
index 8ac207f75e54..a296a846f296 100644
--- a/drivers/isdn/hardware/eicon/os_pri.c
+++ b/drivers/isdn/hardware/eicon/os_pri.c
@@ -18,6 +18,7 @@
18#include "pc_maint.h" 18#include "pc_maint.h"
19#include "dsp_tst.h" 19#include "dsp_tst.h"
20#include "diva_dma.h" 20#include "diva_dma.h"
21#include "dsrv_pri.h"
21 22
22/* -------------------------------------------------------------------------- 23/* --------------------------------------------------------------------------
23 OS Dependent part of XDI driver for DIVA PRI Adapter 24 OS Dependent part of XDI driver for DIVA PRI Adapter
diff --git a/drivers/isdn/hisax/Kconfig b/drivers/isdn/hisax/Kconfig
index c82105920d71..0ef560144be3 100644
--- a/drivers/isdn/hisax/Kconfig
+++ b/drivers/isdn/hisax/Kconfig
@@ -110,7 +110,7 @@ config HISAX_16_3
110 110
111config HISAX_TELESPCI 111config HISAX_TELESPCI
112 bool "Teles PCI" 112 bool "Teles PCI"
113 depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K)) 113 depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || FRV))
114 help 114 help
115 This enables HiSax support for the Teles PCI. 115 This enables HiSax support for the Teles PCI.
116 See <file:Documentation/isdn/README.HiSax> on how to configure it. 116 See <file:Documentation/isdn/README.HiSax> on how to configure it.
@@ -238,7 +238,7 @@ config HISAX_MIC
238 238
239config HISAX_NETJET 239config HISAX_NETJET
240 bool "NETjet card" 240 bool "NETjet card"
241 depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K)) 241 depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || FRV))
242 help 242 help
243 This enables HiSax support for the NetJet from Traverse 243 This enables HiSax support for the NetJet from Traverse
244 Technologies. 244 Technologies.
@@ -249,7 +249,7 @@ config HISAX_NETJET
249 249
250config HISAX_NETJET_U 250config HISAX_NETJET_U
251 bool "NETspider U card" 251 bool "NETspider U card"
252 depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K)) 252 depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || FRV))
253 help 253 help
254 This enables HiSax support for the Netspider U interface ISDN card 254 This enables HiSax support for the Netspider U interface ISDN card
255 from Traverse Technologies. 255 from Traverse Technologies.
@@ -317,7 +317,7 @@ config HISAX_GAZEL
317 317
318config HISAX_HFC_PCI 318config HISAX_HFC_PCI
319 bool "HFC PCI-Bus cards" 319 bool "HFC PCI-Bus cards"
320 depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K)) 320 depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || FRV))
321 help 321 help
322 This enables HiSax support for the HFC-S PCI 2BDS0 based cards. 322 This enables HiSax support for the HFC-S PCI 2BDS0 based cards.
323 323
@@ -344,7 +344,7 @@ config HISAX_HFC_SX
344 344
345config HISAX_ENTERNOW_PCI 345config HISAX_ENTERNOW_PCI
346 bool "Formula-n enter:now PCI card" 346 bool "Formula-n enter:now PCI card"
347 depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K)) 347 depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || FRV))
348 help 348 help
349 This enables HiSax support for the Formula-n enter:now PCI 349 This enables HiSax support for the Formula-n enter:now PCI
350 ISDN card. 350 ISDN card.
diff --git a/drivers/isdn/hisax/hisax.h b/drivers/isdn/hisax/hisax.h
index 26c545fa223b..1b85ce166af8 100644
--- a/drivers/isdn/hisax/hisax.h
+++ b/drivers/isdn/hisax/hisax.h
@@ -396,17 +396,17 @@ struct isar_hw {
396 396
397struct hdlc_stat_reg { 397struct hdlc_stat_reg {
398#ifdef __BIG_ENDIAN 398#ifdef __BIG_ENDIAN
399 u_char fill __attribute__((packed)); 399 u_char fill;
400 u_char mode __attribute__((packed)); 400 u_char mode;
401 u_char xml __attribute__((packed)); 401 u_char xml;
402 u_char cmd __attribute__((packed)); 402 u_char cmd;
403#else 403#else
404 u_char cmd __attribute__((packed)); 404 u_char cmd;
405 u_char xml __attribute__((packed)); 405 u_char xml;
406 u_char mode __attribute__((packed)); 406 u_char mode;
407 u_char fill __attribute__((packed)); 407 u_char fill;
408#endif 408#endif
409}; 409} __attribute__((packed));
410 410
411struct hdlc_hw { 411struct hdlc_hw {
412 union { 412 union {
diff --git a/drivers/isdn/hisax/hisax_fcpcipnp.h b/drivers/isdn/hisax/hisax_fcpcipnp.h
index bd8a22e4d6a2..21fbcedf3a94 100644
--- a/drivers/isdn/hisax/hisax_fcpcipnp.h
+++ b/drivers/isdn/hisax/hisax_fcpcipnp.h
@@ -12,17 +12,17 @@ enum {
12 12
13struct hdlc_stat_reg { 13struct hdlc_stat_reg {
14#ifdef __BIG_ENDIAN 14#ifdef __BIG_ENDIAN
15 u_char fill __attribute__((packed)); 15 u_char fill;
16 u_char mode __attribute__((packed)); 16 u_char mode;
17 u_char xml __attribute__((packed)); 17 u_char xml;
18 u_char cmd __attribute__((packed)); 18 u_char cmd;
19#else 19#else
20 u_char cmd __attribute__((packed)); 20 u_char cmd;
21 u_char xml __attribute__((packed)); 21 u_char xml;
22 u_char mode __attribute__((packed)); 22 u_char mode;
23 u_char fill __attribute__((packed)); 23 u_char fill;
24#endif 24#endif
25}; 25} __attribute__((packed));
26 26
27struct fritz_bcs { 27struct fritz_bcs {
28 struct hisax_b_if b_if; 28 struct hisax_b_if b_if;
diff --git a/drivers/isdn/sc/command.c b/drivers/isdn/sc/command.c
index 19f2fcf0ae4a..b4b24335f716 100644
--- a/drivers/isdn/sc/command.c
+++ b/drivers/isdn/sc/command.c
@@ -43,7 +43,6 @@ extern int send_and_receive(int, unsigned int, unsigned char, unsigned char,
43 RspMessage *, int); 43 RspMessage *, int);
44extern int sendmessage(int, unsigned int, unsigned int, unsigned int, 44extern int sendmessage(int, unsigned int, unsigned int, unsigned int,
45 unsigned int, unsigned int, unsigned int, unsigned int *); 45 unsigned int, unsigned int, unsigned int, unsigned int *);
46extern inline void pullphone(char *, char *);
47 46
48#ifdef DEBUG 47#ifdef DEBUG
49/* 48/*
diff --git a/drivers/macintosh/windfarm_smu_controls.c b/drivers/macintosh/windfarm_smu_controls.c
index 2c3158c81ff2..4d811600bdab 100644
--- a/drivers/macintosh/windfarm_smu_controls.c
+++ b/drivers/macintosh/windfarm_smu_controls.c
@@ -14,6 +14,7 @@
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/wait.h> 16#include <linux/wait.h>
17#include <linux/completion.h>
17#include <asm/prom.h> 18#include <asm/prom.h>
18#include <asm/machdep.h> 19#include <asm/machdep.h>
19#include <asm/io.h> 20#include <asm/io.h>
diff --git a/drivers/macintosh/windfarm_smu_sensors.c b/drivers/macintosh/windfarm_smu_sensors.c
index b558cc209d49..1a00d9c75a23 100644
--- a/drivers/macintosh/windfarm_smu_sensors.c
+++ b/drivers/macintosh/windfarm_smu_sensors.c
@@ -14,6 +14,7 @@
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/wait.h> 16#include <linux/wait.h>
17#include <linux/completion.h>
17#include <asm/prom.h> 18#include <asm/prom.h>
18#include <asm/machdep.h> 19#include <asm/machdep.h>
19#include <asm/io.h> 20#include <asm/io.h>
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 1b76fb29fb70..e423a16ba3c9 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3598,12 +3598,21 @@ static int set_disk_faulty(mddev_t *mddev, dev_t dev)
3598 return 0; 3598 return 0;
3599} 3599}
3600 3600
3601static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
3602{
3603 mddev_t *mddev = bdev->bd_disk->private_data;
3604
3605 geo->heads = 2;
3606 geo->sectors = 4;
3607 geo->cylinders = get_capacity(mddev->gendisk) / 8;
3608 return 0;
3609}
3610
3601static int md_ioctl(struct inode *inode, struct file *file, 3611static int md_ioctl(struct inode *inode, struct file *file,
3602 unsigned int cmd, unsigned long arg) 3612 unsigned int cmd, unsigned long arg)
3603{ 3613{
3604 int err = 0; 3614 int err = 0;
3605 void __user *argp = (void __user *)arg; 3615 void __user *argp = (void __user *)arg;
3606 struct hd_geometry __user *loc = argp;
3607 mddev_t *mddev = NULL; 3616 mddev_t *mddev = NULL;
3608 3617
3609 if (!capable(CAP_SYS_ADMIN)) 3618 if (!capable(CAP_SYS_ADMIN))
@@ -3765,24 +3774,6 @@ static int md_ioctl(struct inode *inode, struct file *file,
3765 * 4 sectors (with a BIG number of cylinders...). This drives 3774 * 4 sectors (with a BIG number of cylinders...). This drives
3766 * dosfs just mad... ;-) 3775 * dosfs just mad... ;-)
3767 */ 3776 */
3768 case HDIO_GETGEO:
3769 if (!loc) {
3770 err = -EINVAL;
3771 goto abort_unlock;
3772 }
3773 err = put_user (2, (char __user *) &loc->heads);
3774 if (err)
3775 goto abort_unlock;
3776 err = put_user (4, (char __user *) &loc->sectors);
3777 if (err)
3778 goto abort_unlock;
3779 err = put_user(get_capacity(mddev->gendisk)/8,
3780 (short __user *) &loc->cylinders);
3781 if (err)
3782 goto abort_unlock;
3783 err = put_user (get_start_sect(inode->i_bdev),
3784 (long __user *) &loc->start);
3785 goto done_unlock;
3786 } 3777 }
3787 3778
3788 /* 3779 /*
@@ -3911,6 +3902,7 @@ static struct block_device_operations md_fops =
3911 .open = md_open, 3902 .open = md_open,
3912 .release = md_release, 3903 .release = md_release,
3913 .ioctl = md_ioctl, 3904 .ioctl = md_ioctl,
3905 .getgeo = md_getgeo,
3914 .media_changed = md_media_changed, 3906 .media_changed = md_media_changed,
3915 .revalidate_disk= md_revalidate, 3907 .revalidate_disk= md_revalidate,
3916}; 3908};
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index abbca150202b..d03f99cf4b7d 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -306,9 +306,6 @@ static int raid0_run (mddev_t *mddev)
306 printk("raid0 : conf->hash_spacing is %llu blocks.\n", 306 printk("raid0 : conf->hash_spacing is %llu blocks.\n",
307 (unsigned long long)conf->hash_spacing); 307 (unsigned long long)conf->hash_spacing);
308 { 308 {
309#if __GNUC__ < 3
310 volatile
311#endif
312 sector_t s = mddev->array_size; 309 sector_t s = mddev->array_size;
313 sector_t space = conf->hash_spacing; 310 sector_t space = conf->hash_spacing;
314 int round; 311 int round;
@@ -439,9 +436,6 @@ static int raid0_make_request (request_queue_t *q, struct bio *bio)
439 436
440 437
441 { 438 {
442#if __GNUC__ < 3
443 volatile
444#endif
445 sector_t x = block >> conf->preshift; 439 sector_t x = block >> conf->preshift;
446 sector_div(x, (u32)conf->hash_spacing); 440 sector_div(x, (u32)conf->hash_spacing);
447 zone = conf->hash_table[x]; 441 zone = conf->hash_table[x];
diff --git a/drivers/media/video/v4l2-common.c b/drivers/media/video/v4l2-common.c
index 597b8db35a13..62a7d636ef11 100644
--- a/drivers/media/video/v4l2-common.c
+++ b/drivers/media/video/v4l2-common.c
@@ -191,9 +191,7 @@ char *v4l2_type_names[] = {
191}; 191};
192 192
193char *v4l2_ioctl_names[256] = { 193char *v4l2_ioctl_names[256] = {
194#if __GNUC__ >= 3
195 [0 ... 255] = "UNKNOWN", 194 [0 ... 255] = "UNKNOWN",
196#endif
197 [_IOC_NR(VIDIOC_QUERYCAP)] = "VIDIOC_QUERYCAP", 195 [_IOC_NR(VIDIOC_QUERYCAP)] = "VIDIOC_QUERYCAP",
198 [_IOC_NR(VIDIOC_RESERVED)] = "VIDIOC_RESERVED", 196 [_IOC_NR(VIDIOC_RESERVED)] = "VIDIOC_RESERVED",
199 [_IOC_NR(VIDIOC_ENUM_FMT)] = "VIDIOC_ENUM_FMT", 197 [_IOC_NR(VIDIOC_ENUM_FMT)] = "VIDIOC_ENUM_FMT",
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index 5b1febed3133..b09fb6307153 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -662,6 +662,13 @@ static int i2o_block_release(struct inode *inode, struct file *file)
662 return 0; 662 return 0;
663} 663}
664 664
665static int i2o_block_getgeo(struct block_device *bdev, struct hd_geometry *geo)
666{
667 i2o_block_biosparam(get_capacity(bdev->bd_disk),
668 &geo->cylinders, &geo->heads, &geo->sectors);
669 return 0;
670}
671
665/** 672/**
666 * i2o_block_ioctl - Issue device specific ioctl calls. 673 * i2o_block_ioctl - Issue device specific ioctl calls.
667 * @cmd: ioctl command 674 * @cmd: ioctl command
@@ -676,7 +683,6 @@ static int i2o_block_ioctl(struct inode *inode, struct file *file,
676{ 683{
677 struct gendisk *disk = inode->i_bdev->bd_disk; 684 struct gendisk *disk = inode->i_bdev->bd_disk;
678 struct i2o_block_device *dev = disk->private_data; 685 struct i2o_block_device *dev = disk->private_data;
679 void __user *argp = (void __user *)arg;
680 686
681 /* Anyone capable of this syscall can do *real bad* things */ 687 /* Anyone capable of this syscall can do *real bad* things */
682 688
@@ -684,15 +690,6 @@ static int i2o_block_ioctl(struct inode *inode, struct file *file,
684 return -EPERM; 690 return -EPERM;
685 691
686 switch (cmd) { 692 switch (cmd) {
687 case HDIO_GETGEO:
688 {
689 struct hd_geometry g;
690 i2o_block_biosparam(get_capacity(disk),
691 &g.cylinders, &g.heads, &g.sectors);
692 g.start = get_start_sect(inode->i_bdev);
693 return copy_to_user(argp, &g, sizeof(g)) ? -EFAULT : 0;
694 }
695
696 case BLKI2OGRSTRAT: 693 case BLKI2OGRSTRAT:
697 return put_user(dev->rcache, (int __user *)arg); 694 return put_user(dev->rcache, (int __user *)arg);
698 case BLKI2OGWSTRAT: 695 case BLKI2OGWSTRAT:
@@ -962,6 +959,7 @@ static struct block_device_operations i2o_block_fops = {
962 .open = i2o_block_open, 959 .open = i2o_block_open,
963 .release = i2o_block_release, 960 .release = i2o_block_release,
964 .ioctl = i2o_block_ioctl, 961 .ioctl = i2o_block_ioctl,
962 .getgeo = i2o_block_getgeo,
965 .media_changed = i2o_block_media_changed 963 .media_changed = i2o_block_media_changed
966}; 964};
967 965
diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c
index e335d54c4659..b42e0fbab59b 100644
--- a/drivers/mfd/ucb1x00-core.c
+++ b/drivers/mfd/ucb1x00-core.c
@@ -27,7 +27,6 @@
27 27
28#include <asm/dma.h> 28#include <asm/dma.h>
29#include <asm/hardware.h> 29#include <asm/hardware.h>
30#include <asm/irq.h>
31 30
32#include "ucb1x00.h" 31#include "ucb1x00.h"
33 32
@@ -507,14 +506,14 @@ static int ucb1x00_probe(struct mcp *mcp)
507 goto err_free; 506 goto err_free;
508 } 507 }
509 508
510 ret = request_irq(ucb->irq, ucb1x00_irq, 0, "UCB1x00", ucb); 509 ret = request_irq(ucb->irq, ucb1x00_irq, SA_TRIGGER_RISING,
510 "UCB1x00", ucb);
511 if (ret) { 511 if (ret) {
512 printk(KERN_ERR "ucb1x00: unable to grab irq%d: %d\n", 512 printk(KERN_ERR "ucb1x00: unable to grab irq%d: %d\n",
513 ucb->irq, ret); 513 ucb->irq, ret);
514 goto err_free; 514 goto err_free;
515 } 515 }
516 516
517 set_irq_type(ucb->irq, IRQT_RISING);
518 mcp_set_drvdata(mcp, ucb); 517 mcp_set_drvdata(mcp, ucb);
519 518
520 ret = class_device_register(&ucb->cdev); 519 ret = class_device_register(&ucb->cdev);
diff --git a/drivers/mfd/ucb1x00-ts.c b/drivers/mfd/ucb1x00-ts.c
index 551061c2eadf..79fd062ccb34 100644
--- a/drivers/mfd/ucb1x00-ts.c
+++ b/drivers/mfd/ucb1x00-ts.c
@@ -32,7 +32,6 @@
32#include <linux/suspend.h> 32#include <linux/suspend.h>
33#include <linux/slab.h> 33#include <linux/slab.h>
34#include <linux/kthread.h> 34#include <linux/kthread.h>
35#include <linux/delay.h>
36 35
37#include <asm/dma.h> 36#include <asm/dma.h>
38#include <asm/semaphore.h> 37#include <asm/semaphore.h>
diff --git a/drivers/mmc/mmc_block.c b/drivers/mmc/mmc_block.c
index 198561d21710..d5f28981596b 100644
--- a/drivers/mmc/mmc_block.c
+++ b/drivers/mmc/mmc_block.c
@@ -113,31 +113,18 @@ static int mmc_blk_release(struct inode *inode, struct file *filp)
113} 113}
114 114
115static int 115static int
116mmc_blk_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) 116mmc_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
117{ 117{
118 struct block_device *bdev = inode->i_bdev; 118 geo->cylinders = get_capacity(bdev->bd_disk) / (4 * 16);
119 119 geo->heads = 4;
120 if (cmd == HDIO_GETGEO) { 120 geo->sectors = 16;
121 struct hd_geometry geo; 121 return 0;
122
123 memset(&geo, 0, sizeof(struct hd_geometry));
124
125 geo.cylinders = get_capacity(bdev->bd_disk) / (4 * 16);
126 geo.heads = 4;
127 geo.sectors = 16;
128 geo.start = get_start_sect(bdev);
129
130 return copy_to_user((void __user *)arg, &geo, sizeof(geo))
131 ? -EFAULT : 0;
132 }
133
134 return -ENOTTY;
135} 122}
136 123
137static struct block_device_operations mmc_bdops = { 124static struct block_device_operations mmc_bdops = {
138 .open = mmc_blk_open, 125 .open = mmc_blk_open,
139 .release = mmc_blk_release, 126 .release = mmc_blk_release,
140 .ioctl = mmc_blk_ioctl, 127 .getgeo = mmc_blk_getgeo,
141 .owner = THIS_MODULE, 128 .owner = THIS_MODULE,
142}; 129};
143 130
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 339cb1218eaa..7f3ff500b68e 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -194,6 +194,14 @@ static int blktrans_release(struct inode *i, struct file *f)
194 return ret; 194 return ret;
195} 195}
196 196
197static int blktrans_getgeo(struct block_device *bdev, struct hd_geometry *geo)
198{
199 struct mtd_blktrans_dev *dev = bdev->bd_disk->private_data;
200
201 if (dev->tr->getgeo)
202 return dev->tr->getgeo(dev, geo);
203 return -ENOTTY;
204}
197 205
198static int blktrans_ioctl(struct inode *inode, struct file *file, 206static int blktrans_ioctl(struct inode *inode, struct file *file,
199 unsigned int cmd, unsigned long arg) 207 unsigned int cmd, unsigned long arg)
@@ -207,22 +215,6 @@ static int blktrans_ioctl(struct inode *inode, struct file *file,
207 return tr->flush(dev); 215 return tr->flush(dev);
208 /* The core code did the work, we had nothing to do. */ 216 /* The core code did the work, we had nothing to do. */
209 return 0; 217 return 0;
210
211 case HDIO_GETGEO:
212 if (tr->getgeo) {
213 struct hd_geometry g;
214 int ret;
215
216 memset(&g, 0, sizeof(g));
217 ret = tr->getgeo(dev, &g);
218 if (ret)
219 return ret;
220
221 g.start = get_start_sect(inode->i_bdev);
222 if (copy_to_user((void __user *)arg, &g, sizeof(g)))
223 return -EFAULT;
224 return 0;
225 } /* else */
226 default: 218 default:
227 return -ENOTTY; 219 return -ENOTTY;
228 } 220 }
@@ -233,6 +225,7 @@ struct block_device_operations mtd_blktrans_ops = {
233 .open = blktrans_open, 225 .open = blktrans_open,
234 .release = blktrans_release, 226 .release = blktrans_release,
235 .ioctl = blktrans_ioctl, 227 .ioctl = blktrans_ioctl,
228 .getgeo = blktrans_getgeo,
236}; 229};
237 230
238int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) 231int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
diff --git a/drivers/mtd/onenand/generic.c b/drivers/mtd/onenand/generic.c
index 45c077d0f063..af06a80f44de 100644
--- a/drivers/mtd/onenand/generic.c
+++ b/drivers/mtd/onenand/generic.c
@@ -14,6 +14,7 @@
14 14
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/init.h> 16#include <linux/init.h>
17#include <linux/slab.h>
17#include <linux/platform_device.h> 18#include <linux/platform_device.h>
18#include <linux/mtd/mtd.h> 19#include <linux/mtd/mtd.h>
19#include <linux/mtd/onenand.h> 20#include <linux/mtd/onenand.h>
diff --git a/drivers/mtd/rfd_ftl.c b/drivers/mtd/rfd_ftl.c
index 20ce212638fc..a3e00a4635a5 100644
--- a/drivers/mtd/rfd_ftl.c
+++ b/drivers/mtd/rfd_ftl.c
@@ -18,6 +18,7 @@
18#include <linux/mtd/blktrans.h> 18#include <linux/mtd/blktrans.h>
19#include <linux/mtd/mtd.h> 19#include <linux/mtd/mtd.h>
20#include <linux/vmalloc.h> 20#include <linux/vmalloc.h>
21#include <linux/slab.h>
21#include <linux/jiffies.h> 22#include <linux/jiffies.h>
22 23
23#include <asm/types.h> 24#include <asm/types.h>
diff --git a/drivers/net/3c527.h b/drivers/net/3c527.h
index c10f009ce9b6..53b5b071df08 100644
--- a/drivers/net/3c527.h
+++ b/drivers/net/3c527.h
@@ -32,43 +32,43 @@
32 32
33struct mc32_mailbox 33struct mc32_mailbox
34{ 34{
35 u16 mbox __attribute((packed)); 35 u16 mbox;
36 u16 data[1] __attribute((packed)); 36 u16 data[1];
37}; 37} __attribute((packed));
38 38
39struct skb_header 39struct skb_header
40{ 40{
41 u8 status __attribute((packed)); 41 u8 status;
42 u8 control __attribute((packed)); 42 u8 control;
43 u16 next __attribute((packed)); /* Do not change! */ 43 u16 next; /* Do not change! */
44 u16 length __attribute((packed)); 44 u16 length;
45 u32 data __attribute((packed)); 45 u32 data;
46}; 46} __attribute((packed));
47 47
48struct mc32_stats 48struct mc32_stats
49{ 49{
50 /* RX Errors */ 50 /* RX Errors */
51 u32 rx_crc_errors __attribute((packed)); 51 u32 rx_crc_errors;
52 u32 rx_alignment_errors __attribute((packed)); 52 u32 rx_alignment_errors;
53 u32 rx_overrun_errors __attribute((packed)); 53 u32 rx_overrun_errors;
54 u32 rx_tooshort_errors __attribute((packed)); 54 u32 rx_tooshort_errors;
55 u32 rx_toolong_errors __attribute((packed)); 55 u32 rx_toolong_errors;
56 u32 rx_outofresource_errors __attribute((packed)); 56 u32 rx_outofresource_errors;
57 57
58 u32 rx_discarded __attribute((packed)); /* via card pattern match filter */ 58 u32 rx_discarded; /* via card pattern match filter */
59 59
60 /* TX Errors */ 60 /* TX Errors */
61 u32 tx_max_collisions __attribute((packed)); 61 u32 tx_max_collisions;
62 u32 tx_carrier_errors __attribute((packed)); 62 u32 tx_carrier_errors;
63 u32 tx_underrun_errors __attribute((packed)); 63 u32 tx_underrun_errors;
64 u32 tx_cts_errors __attribute((packed)); 64 u32 tx_cts_errors;
65 u32 tx_timeout_errors __attribute((packed)) ; 65 u32 tx_timeout_errors;
66 66
67 /* various cruft */ 67 /* various cruft */
68 u32 dataA[6] __attribute((packed)); 68 u32 dataA[6];
69 u16 dataB[5] __attribute((packed)); 69 u16 dataB[5];
70 u32 dataC[14] __attribute((packed)); 70 u32 dataC[14];
71}; 71} __attribute((packed));
72 72
73#define STATUS_MASK 0x0F 73#define STATUS_MASK 0x0F
74#define COMPLETED (1<<7) 74#define COMPLETED (1<<7)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index e2fa29b612cd..1960961bf28e 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -1374,7 +1374,7 @@ config FORCEDETH
1374 1374
1375config CS89x0 1375config CS89x0
1376 tristate "CS89x0 support" 1376 tristate "CS89x0 support"
1377 depends on (NET_PCI && (ISA || ARCH_IXDP2X01)) || ARCH_PNX0105 1377 depends on NET_PCI && (ISA || ARCH_IXDP2X01 || ARCH_PNX010X)
1378 ---help--- 1378 ---help---
1379 Support for CS89x0 chipset based Ethernet cards. If you have a 1379 Support for CS89x0 chipset based Ethernet cards. If you have a
1380 network (Ethernet) card of this type, say Y and read the 1380 network (Ethernet) card of this type, say Y and read the
diff --git a/drivers/net/cs89x0.c b/drivers/net/cs89x0.c
index a6078ad9b654..907c01009746 100644
--- a/drivers/net/cs89x0.c
+++ b/drivers/net/cs89x0.c
@@ -175,7 +175,7 @@ static unsigned int cs8900_irq_map[] = {1,0,0,0};
175#include <asm/irq.h> 175#include <asm/irq.h>
176static unsigned int netcard_portlist[] __initdata = {IXDP2X01_CS8900_VIRT_BASE, 0}; 176static unsigned int netcard_portlist[] __initdata = {IXDP2X01_CS8900_VIRT_BASE, 0};
177static unsigned int cs8900_irq_map[] = {IRQ_IXDP2X01_CS8900, 0, 0, 0}; 177static unsigned int cs8900_irq_map[] = {IRQ_IXDP2X01_CS8900, 0, 0, 0};
178#elif defined(CONFIG_ARCH_PNX0105) 178#elif defined(CONFIG_ARCH_PNX010X)
179#include <asm/irq.h> 179#include <asm/irq.h>
180#include <asm/arch/gpio.h> 180#include <asm/arch/gpio.h>
181#define CIRRUS_DEFAULT_BASE IO_ADDRESS(EXT_STATIC2_s0_BASE + 0x200000) /* = Physical address 0x48200000 */ 181#define CIRRUS_DEFAULT_BASE IO_ADDRESS(EXT_STATIC2_s0_BASE + 0x200000) /* = Physical address 0x48200000 */
@@ -338,30 +338,86 @@ out:
338} 338}
339#endif 339#endif
340 340
341#if defined(CONFIG_ARCH_IXDP2X01)
341static int 342static int
342readreg(struct net_device *dev, int portno) 343readword(unsigned long base_addr, int portno)
343{ 344{
344 outw(portno, dev->base_addr + ADD_PORT); 345 return (u16)__raw_readl(base_addr + (portno << 1));
345 return inw(dev->base_addr + DATA_PORT);
346} 346}
347 347
348static void 348static void
349writereg(struct net_device *dev, int portno, int value) 349writeword(unsigned long base_addr, int portno, int value)
350{ 350{
351 outw(portno, dev->base_addr + ADD_PORT); 351 __raw_writel((u16)value, base_addr + (portno << 1));
352 outw(value, dev->base_addr + DATA_PORT); 352}
353#else
354#if defined(CONFIG_ARCH_PNX010X)
355static int
356readword(unsigned long base_addr, int portno)
357{
358 return inw(base_addr + (portno << 1));
359}
360
361static void
362writeword(unsigned long base_addr, int portno, int value)
363{
364 outw(value, base_addr + (portno << 1));
365}
366#else
367static int
368readword(unsigned long base_addr, int portno)
369{
370 return inw(base_addr + portno);
371}
372
373static void
374writeword(unsigned long base_addr, int portno, int value)
375{
376 outw(value, base_addr + portno);
377}
378#endif
379#endif
380
381static void
382readwords(unsigned long base_addr, int portno, void *buf, int length)
383{
384 u8 *buf8 = (u8 *)buf;
385
386 do {
387 u32 tmp32;
388
389 tmp32 = readword(base_addr, portno);
390 *buf8++ = (u8)tmp32;
391 *buf8++ = (u8)(tmp32 >> 8);
392 } while (--length);
393}
394
395static void
396writewords(unsigned long base_addr, int portno, void *buf, int length)
397{
398 u8 *buf8 = (u8 *)buf;
399
400 do {
401 u32 tmp32;
402
403 tmp32 = *buf8++;
404 tmp32 |= (*buf8++) << 8;
405 writeword(base_addr, portno, tmp32);
406 } while (--length);
353} 407}
354 408
355static int 409static int
356readword(struct net_device *dev, int portno) 410readreg(struct net_device *dev, int regno)
357{ 411{
358 return inw(dev->base_addr + portno); 412 writeword(dev->base_addr, ADD_PORT, regno);
413 return readword(dev->base_addr, DATA_PORT);
359} 414}
360 415
361static void 416static void
362writeword(struct net_device *dev, int portno, int value) 417writereg(struct net_device *dev, int regno, int value)
363{ 418{
364 outw(value, dev->base_addr + portno); 419 writeword(dev->base_addr, ADD_PORT, regno);
420 writeword(dev->base_addr, DATA_PORT, value);
365} 421}
366 422
367static int __init 423static int __init
@@ -456,7 +512,7 @@ cs89x0_probe1(struct net_device *dev, int ioaddr, int modular)
456#endif 512#endif
457 } 513 }
458 514
459#ifdef CONFIG_ARCH_PNX0105 515#ifdef CONFIG_ARCH_PNX010X
460 initialize_ebi(); 516 initialize_ebi();
461 517
462 /* Map GPIO registers for the pins connected to the CS8900a. */ 518 /* Map GPIO registers for the pins connected to the CS8900a. */
@@ -491,8 +547,8 @@ cs89x0_probe1(struct net_device *dev, int ioaddr, int modular)
491 547
492#ifdef CONFIG_SH_HICOSH4 548#ifdef CONFIG_SH_HICOSH4
493 /* truely reset the chip */ 549 /* truely reset the chip */
494 outw(0x0114, ioaddr + ADD_PORT); 550 writeword(ioaddr, ADD_PORT, 0x0114);
495 outw(0x0040, ioaddr + DATA_PORT); 551 writeword(ioaddr, DATA_PORT, 0x0040);
496#endif 552#endif
497 553
498 /* if they give us an odd I/O address, then do ONE write to 554 /* if they give us an odd I/O address, then do ONE write to
@@ -503,24 +559,24 @@ cs89x0_probe1(struct net_device *dev, int ioaddr, int modular)
503 if (net_debug > 1) 559 if (net_debug > 1)
504 printk(KERN_INFO "%s: odd ioaddr 0x%x\n", dev->name, ioaddr); 560 printk(KERN_INFO "%s: odd ioaddr 0x%x\n", dev->name, ioaddr);
505 if ((ioaddr & 2) != 2) 561 if ((ioaddr & 2) != 2)
506 if ((inw((ioaddr & ~3)+ ADD_PORT) & ADD_MASK) != ADD_SIG) { 562 if ((readword(ioaddr & ~3, ADD_PORT) & ADD_MASK) != ADD_SIG) {
507 printk(KERN_ERR "%s: bad signature 0x%x\n", 563 printk(KERN_ERR "%s: bad signature 0x%x\n",
508 dev->name, inw((ioaddr & ~3)+ ADD_PORT)); 564 dev->name, readword(ioaddr & ~3, ADD_PORT));
509 retval = -ENODEV; 565 retval = -ENODEV;
510 goto out2; 566 goto out2;
511 } 567 }
512 } 568 }
513 printk(KERN_DEBUG "PP_addr at %x: 0x%x\n", 569 printk(KERN_DEBUG "PP_addr at %x[%x]: 0x%x\n",
514 ioaddr + ADD_PORT, inw(ioaddr + ADD_PORT)); 570 ioaddr, ADD_PORT, readword(ioaddr, ADD_PORT));
515 571
516 ioaddr &= ~3; 572 ioaddr &= ~3;
517 outw(PP_ChipID, ioaddr + ADD_PORT); 573 writeword(ioaddr, ADD_PORT, PP_ChipID);
518 574
519 tmp = inw(ioaddr + DATA_PORT); 575 tmp = readword(ioaddr, DATA_PORT);
520 if (tmp != CHIP_EISA_ID_SIG) { 576 if (tmp != CHIP_EISA_ID_SIG) {
521 printk(KERN_DEBUG "%s: incorrect signature at %x: 0x%x!=" 577 printk(KERN_DEBUG "%s: incorrect signature at %x[%x]: 0x%x!="
522 CHIP_EISA_ID_SIG_STR "\n", 578 CHIP_EISA_ID_SIG_STR "\n",
523 dev->name, ioaddr + DATA_PORT, tmp); 579 dev->name, ioaddr, DATA_PORT, tmp);
524 retval = -ENODEV; 580 retval = -ENODEV;
525 goto out2; 581 goto out2;
526 } 582 }
@@ -724,7 +780,7 @@ cs89x0_probe1(struct net_device *dev, int ioaddr, int modular)
724 } else { 780 } else {
725 i = lp->isa_config & INT_NO_MASK; 781 i = lp->isa_config & INT_NO_MASK;
726 if (lp->chip_type == CS8900) { 782 if (lp->chip_type == CS8900) {
727#if defined(CONFIG_ARCH_IXDP2X01) || defined(CONFIG_ARCH_PNX0105) 783#if defined(CONFIG_ARCH_IXDP2X01) || defined(CONFIG_ARCH_PNX010X)
728 i = cs8900_irq_map[0]; 784 i = cs8900_irq_map[0];
729#else 785#else
730 /* Translate the IRQ using the IRQ mapping table. */ 786 /* Translate the IRQ using the IRQ mapping table. */
@@ -790,7 +846,7 @@ cs89x0_probe1(struct net_device *dev, int ioaddr, int modular)
790 goto out3; 846 goto out3;
791 return 0; 847 return 0;
792out3: 848out3:
793 outw(PP_ChipID, dev->base_addr + ADD_PORT); 849 writeword(dev->base_addr, ADD_PORT, PP_ChipID);
794out2: 850out2:
795 release_region(ioaddr & ~3, NETCARD_IO_EXTENT); 851 release_region(ioaddr & ~3, NETCARD_IO_EXTENT);
796out1: 852out1:
@@ -970,11 +1026,11 @@ void __init reset_chip(struct net_device *dev)
970#ifndef CONFIG_ARCH_IXDP2X01 1026#ifndef CONFIG_ARCH_IXDP2X01
971 if (lp->chip_type != CS8900) { 1027 if (lp->chip_type != CS8900) {
972 /* Hardware problem requires PNP registers to be reconfigured after a reset */ 1028 /* Hardware problem requires PNP registers to be reconfigured after a reset */
973 outw(PP_CS8920_ISAINT, ioaddr + ADD_PORT); 1029 writeword(ioaddr, ADD_PORT, PP_CS8920_ISAINT);
974 outb(dev->irq, ioaddr + DATA_PORT); 1030 outb(dev->irq, ioaddr + DATA_PORT);
975 outb(0, ioaddr + DATA_PORT + 1); 1031 outb(0, ioaddr + DATA_PORT + 1);
976 1032
977 outw(PP_CS8920_ISAMemB, ioaddr + ADD_PORT); 1033 writeword(ioaddr, ADD_PORT, PP_CS8920_ISAMemB);
978 outb((dev->mem_start >> 16) & 0xff, ioaddr + DATA_PORT); 1034 outb((dev->mem_start >> 16) & 0xff, ioaddr + DATA_PORT);
979 outb((dev->mem_start >> 8) & 0xff, ioaddr + DATA_PORT + 1); 1035 outb((dev->mem_start >> 8) & 0xff, ioaddr + DATA_PORT + 1);
980 } 1036 }
@@ -1104,8 +1160,8 @@ send_test_pkt(struct net_device *dev)
1104 memcpy(test_packet, dev->dev_addr, ETH_ALEN); 1160 memcpy(test_packet, dev->dev_addr, ETH_ALEN);
1105 memcpy(test_packet+ETH_ALEN, dev->dev_addr, ETH_ALEN); 1161 memcpy(test_packet+ETH_ALEN, dev->dev_addr, ETH_ALEN);
1106 1162
1107 writeword(dev, TX_CMD_PORT, TX_AFTER_ALL); 1163 writeword(dev->base_addr, TX_CMD_PORT, TX_AFTER_ALL);
1108 writeword(dev, TX_LEN_PORT, ETH_ZLEN); 1164 writeword(dev->base_addr, TX_LEN_PORT, ETH_ZLEN);
1109 1165
1110 /* Test to see if the chip has allocated memory for the packet */ 1166 /* Test to see if the chip has allocated memory for the packet */
1111 while (jiffies - timenow < 5) 1167 while (jiffies - timenow < 5)
@@ -1115,7 +1171,7 @@ send_test_pkt(struct net_device *dev)
1115 return 0; /* this shouldn't happen */ 1171 return 0; /* this shouldn't happen */
1116 1172
1117 /* Write the contents of the packet */ 1173 /* Write the contents of the packet */
1118 outsw(dev->base_addr + TX_FRAME_PORT,test_packet,(ETH_ZLEN+1) >>1); 1174 writewords(dev->base_addr, TX_FRAME_PORT,test_packet,(ETH_ZLEN+1) >>1);
1119 1175
1120 if (net_debug > 1) printk("Sending test packet "); 1176 if (net_debug > 1) printk("Sending test packet ");
1121 /* wait a couple of jiffies for packet to be received */ 1177 /* wait a couple of jiffies for packet to be received */
@@ -1200,7 +1256,7 @@ net_open(struct net_device *dev)
1200 int i; 1256 int i;
1201 int ret; 1257 int ret;
1202 1258
1203#if !defined(CONFIG_SH_HICOSH4) && !defined(CONFIG_ARCH_PNX0105) /* uses irq#1, so this won't work */ 1259#if !defined(CONFIG_SH_HICOSH4) && !defined(CONFIG_ARCH_PNX010X) /* uses irq#1, so this won't work */
1204 if (dev->irq < 2) { 1260 if (dev->irq < 2) {
1205 /* Allow interrupts to be generated by the chip */ 1261 /* Allow interrupts to be generated by the chip */
1206/* Cirrus' release had this: */ 1262/* Cirrus' release had this: */
@@ -1231,7 +1287,7 @@ net_open(struct net_device *dev)
1231 else 1287 else
1232#endif 1288#endif
1233 { 1289 {
1234#if !defined(CONFIG_ARCH_IXDP2X01) && !defined(CONFIG_ARCH_PNX0105) 1290#if !defined(CONFIG_ARCH_IXDP2X01) && !defined(CONFIG_ARCH_PNX010X)
1235 if (((1 << dev->irq) & lp->irq_map) == 0) { 1291 if (((1 << dev->irq) & lp->irq_map) == 0) {
1236 printk(KERN_ERR "%s: IRQ %d is not in our map of allowable IRQs, which is %x\n", 1292 printk(KERN_ERR "%s: IRQ %d is not in our map of allowable IRQs, which is %x\n",
1237 dev->name, dev->irq, lp->irq_map); 1293 dev->name, dev->irq, lp->irq_map);
@@ -1316,7 +1372,7 @@ net_open(struct net_device *dev)
1316 case A_CNF_MEDIA_10B_2: result = lp->adapter_cnf & A_CNF_10B_2; break; 1372 case A_CNF_MEDIA_10B_2: result = lp->adapter_cnf & A_CNF_10B_2; break;
1317 default: result = lp->adapter_cnf & (A_CNF_10B_T | A_CNF_AUI | A_CNF_10B_2); 1373 default: result = lp->adapter_cnf & (A_CNF_10B_T | A_CNF_AUI | A_CNF_10B_2);
1318 } 1374 }
1319#ifdef CONFIG_ARCH_PNX0105 1375#ifdef CONFIG_ARCH_PNX010X
1320 result = A_CNF_10B_T; 1376 result = A_CNF_10B_T;
1321#endif 1377#endif
1322 if (!result) { 1378 if (!result) {
@@ -1457,8 +1513,8 @@ static int net_send_packet(struct sk_buff *skb, struct net_device *dev)
1457 netif_stop_queue(dev); 1513 netif_stop_queue(dev);
1458 1514
1459 /* initiate a transmit sequence */ 1515 /* initiate a transmit sequence */
1460 writeword(dev, TX_CMD_PORT, lp->send_cmd); 1516 writeword(dev->base_addr, TX_CMD_PORT, lp->send_cmd);
1461 writeword(dev, TX_LEN_PORT, skb->len); 1517 writeword(dev->base_addr, TX_LEN_PORT, skb->len);
1462 1518
1463 /* Test to see if the chip has allocated memory for the packet */ 1519 /* Test to see if the chip has allocated memory for the packet */
1464 if ((readreg(dev, PP_BusST) & READY_FOR_TX_NOW) == 0) { 1520 if ((readreg(dev, PP_BusST) & READY_FOR_TX_NOW) == 0) {
@@ -1472,7 +1528,7 @@ static int net_send_packet(struct sk_buff *skb, struct net_device *dev)
1472 return 1; 1528 return 1;
1473 } 1529 }
1474 /* Write the contents of the packet */ 1530 /* Write the contents of the packet */
1475 outsw(dev->base_addr + TX_FRAME_PORT,skb->data,(skb->len+1) >>1); 1531 writewords(dev->base_addr, TX_FRAME_PORT,skb->data,(skb->len+1) >>1);
1476 spin_unlock_irq(&lp->lock); 1532 spin_unlock_irq(&lp->lock);
1477 lp->stats.tx_bytes += skb->len; 1533 lp->stats.tx_bytes += skb->len;
1478 dev->trans_start = jiffies; 1534 dev->trans_start = jiffies;
@@ -1512,7 +1568,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id, struct pt_regs * regs)
1512 course, if you're on a slow machine, and packets are arriving 1568 course, if you're on a slow machine, and packets are arriving
1513 faster than you can read them off, you're screwed. Hasta la 1569 faster than you can read them off, you're screwed. Hasta la
1514 vista, baby! */ 1570 vista, baby! */
1515 while ((status = readword(dev, ISQ_PORT))) { 1571 while ((status = readword(dev->base_addr, ISQ_PORT))) {
1516 if (net_debug > 4)printk("%s: event=%04x\n", dev->name, status); 1572 if (net_debug > 4)printk("%s: event=%04x\n", dev->name, status);
1517 handled = 1; 1573 handled = 1;
1518 switch(status & ISQ_EVENT_MASK) { 1574 switch(status & ISQ_EVENT_MASK) {
@@ -1606,8 +1662,8 @@ net_rx(struct net_device *dev)
1606 int status, length; 1662 int status, length;
1607 1663
1608 int ioaddr = dev->base_addr; 1664 int ioaddr = dev->base_addr;
1609 status = inw(ioaddr + RX_FRAME_PORT); 1665 status = readword(ioaddr, RX_FRAME_PORT);
1610 length = inw(ioaddr + RX_FRAME_PORT); 1666 length = readword(ioaddr, RX_FRAME_PORT);
1611 1667
1612 if ((status & RX_OK) == 0) { 1668 if ((status & RX_OK) == 0) {
1613 count_rx_errors(status, lp); 1669 count_rx_errors(status, lp);
@@ -1626,9 +1682,9 @@ net_rx(struct net_device *dev)
1626 skb_reserve(skb, 2); /* longword align L3 header */ 1682 skb_reserve(skb, 2); /* longword align L3 header */
1627 skb->dev = dev; 1683 skb->dev = dev;
1628 1684
1629 insw(ioaddr + RX_FRAME_PORT, skb_put(skb, length), length >> 1); 1685 readwords(ioaddr, RX_FRAME_PORT, skb_put(skb, length), length >> 1);
1630 if (length & 1) 1686 if (length & 1)
1631 skb->data[length-1] = inw(ioaddr + RX_FRAME_PORT); 1687 skb->data[length-1] = readword(ioaddr, RX_FRAME_PORT);
1632 1688
1633 if (net_debug > 3) { 1689 if (net_debug > 3) {
1634 printk( "%s: received %d byte packet of type %x\n", 1690 printk( "%s: received %d byte packet of type %x\n",
@@ -1901,7 +1957,7 @@ void
1901cleanup_module(void) 1957cleanup_module(void)
1902{ 1958{
1903 unregister_netdev(dev_cs89x0); 1959 unregister_netdev(dev_cs89x0);
1904 outw(PP_ChipID, dev_cs89x0->base_addr + ADD_PORT); 1960 writeword(dev_cs89x0->base_addr, ADD_PORT, PP_ChipID);
1905 release_region(dev_cs89x0->base_addr, NETCARD_IO_EXTENT); 1961 release_region(dev_cs89x0->base_addr, NETCARD_IO_EXTENT);
1906 free_netdev(dev_cs89x0); 1962 free_netdev(dev_cs89x0);
1907} 1963}
diff --git a/drivers/net/cs89x0.h b/drivers/net/cs89x0.h
index decea264f121..bd954aaa636f 100644
--- a/drivers/net/cs89x0.h
+++ b/drivers/net/cs89x0.h
@@ -16,13 +16,6 @@
16 16
17#include <linux/config.h> 17#include <linux/config.h>
18 18
19#if defined(CONFIG_ARCH_IXDP2X01) || defined(CONFIG_ARCH_PNX0105)
20/* IXDP2401/IXDP2801 uses dword-aligned register addressing */
21#define CS89x0_PORT(reg) ((reg) * 2)
22#else
23#define CS89x0_PORT(reg) (reg)
24#endif
25
26#define PP_ChipID 0x0000 /* offset 0h -> Corp -ID */ 19#define PP_ChipID 0x0000 /* offset 0h -> Corp -ID */
27 /* offset 2h -> Model/Product Number */ 20 /* offset 2h -> Model/Product Number */
28 /* offset 3h -> Chip Revision Number */ 21 /* offset 3h -> Chip Revision Number */
@@ -332,16 +325,16 @@
332#define RAM_SIZE 0x1000 /* The card has 4k bytes or RAM */ 325#define RAM_SIZE 0x1000 /* The card has 4k bytes or RAM */
333#define PKT_START PP_TxFrame /* Start of packet RAM */ 326#define PKT_START PP_TxFrame /* Start of packet RAM */
334 327
335#define RX_FRAME_PORT CS89x0_PORT(0x0000) 328#define RX_FRAME_PORT 0x0000
336#define TX_FRAME_PORT RX_FRAME_PORT 329#define TX_FRAME_PORT RX_FRAME_PORT
337#define TX_CMD_PORT CS89x0_PORT(0x0004) 330#define TX_CMD_PORT 0x0004
338#define TX_NOW 0x0000 /* Tx packet after 5 bytes copied */ 331#define TX_NOW 0x0000 /* Tx packet after 5 bytes copied */
339#define TX_AFTER_381 0x0040 /* Tx packet after 381 bytes copied */ 332#define TX_AFTER_381 0x0040 /* Tx packet after 381 bytes copied */
340#define TX_AFTER_ALL 0x00c0 /* Tx packet after all bytes copied */ 333#define TX_AFTER_ALL 0x00c0 /* Tx packet after all bytes copied */
341#define TX_LEN_PORT CS89x0_PORT(0x0006) 334#define TX_LEN_PORT 0x0006
342#define ISQ_PORT CS89x0_PORT(0x0008) 335#define ISQ_PORT 0x0008
343#define ADD_PORT CS89x0_PORT(0x000A) 336#define ADD_PORT 0x000A
344#define DATA_PORT CS89x0_PORT(0x000C) 337#define DATA_PORT 0x000C
345 338
346#define EEPROM_WRITE_EN 0x00F0 339#define EEPROM_WRITE_EN 0x00F0
347#define EEPROM_WRITE_DIS 0x0000 340#define EEPROM_WRITE_DIS 0x0000
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index 3e9accf137e7..41b3d83c2ab8 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -524,6 +524,7 @@ static void ax_encaps(struct net_device *dev, unsigned char *icp, int len)
524 ax->dev->trans_start = jiffies; 524 ax->dev->trans_start = jiffies;
525 ax->xleft = count - actual; 525 ax->xleft = count - actual;
526 ax->xhead = ax->xbuff + actual; 526 ax->xhead = ax->xbuff + actual;
527 spin_unlock_bh(&ax->buflock);
527} 528}
528 529
529/* Encapsulate an AX.25 packet and kick it into a TTY queue. */ 530/* Encapsulate an AX.25 packet and kick it into a TTY queue. */
diff --git a/drivers/net/irda/vlsi_ir.h b/drivers/net/irda/vlsi_ir.h
index 741aecc655df..a82a4ba8de4f 100644
--- a/drivers/net/irda/vlsi_ir.h
+++ b/drivers/net/irda/vlsi_ir.h
@@ -577,8 +577,8 @@ struct ring_descr_hw {
577 struct { 577 struct {
578 u8 addr_res[3]; 578 u8 addr_res[3];
579 volatile u8 status; /* descriptor status */ 579 volatile u8 status; /* descriptor status */
580 } rd_s __attribute__((packed)); 580 } __attribute__((packed)) rd_s;
581 } rd_u __attribute((packed)); 581 } __attribute((packed)) rd_u;
582} __attribute__ ((packed)); 582} __attribute__ ((packed));
583 583
584#define rd_addr rd_u.addr 584#define rd_addr rd_u.addr
diff --git a/drivers/net/smc91x.c b/drivers/net/smc91x.c
index 28bf2e69eb5e..7ec08127c9d6 100644
--- a/drivers/net/smc91x.c
+++ b/drivers/net/smc91x.c
@@ -88,7 +88,6 @@ static const char version[] =
88#include <linux/skbuff.h> 88#include <linux/skbuff.h>
89 89
90#include <asm/io.h> 90#include <asm/io.h>
91#include <asm/irq.h>
92 91
93#include "smc91x.h" 92#include "smc91x.h"
94 93
@@ -2007,12 +2006,10 @@ static int __init smc_probe(struct net_device *dev, void __iomem *ioaddr)
2007 } 2006 }
2008 2007
2009 /* Grab the IRQ */ 2008 /* Grab the IRQ */
2010 retval = request_irq(dev->irq, &smc_interrupt, 0, dev->name, dev); 2009 retval = request_irq(dev->irq, &smc_interrupt, SMC_IRQ_FLAGS, dev->name, dev);
2011 if (retval) 2010 if (retval)
2012 goto err_out; 2011 goto err_out;
2013 2012
2014 set_irq_type(dev->irq, SMC_IRQ_TRIGGER_TYPE);
2015
2016#ifdef SMC_USE_PXA_DMA 2013#ifdef SMC_USE_PXA_DMA
2017 { 2014 {
2018 int dma = pxa_request_dma(dev->name, DMA_PRIO_LOW, 2015 int dma = pxa_request_dma(dev->name, DMA_PRIO_LOW,
diff --git a/drivers/net/smc91x.h b/drivers/net/smc91x.h
index 5c2824be4ee6..e0efd1964e72 100644
--- a/drivers/net/smc91x.h
+++ b/drivers/net/smc91x.h
@@ -90,7 +90,7 @@
90 __l--; \ 90 __l--; \
91 } \ 91 } \
92 } while (0) 92 } while (0)
93#define set_irq_type(irq, type) 93#define SMC_IRQ_FLAGS (0)
94 94
95#elif defined(CONFIG_SA1100_PLEB) 95#elif defined(CONFIG_SA1100_PLEB)
96/* We can only do 16-bit reads and writes in the static memory space. */ 96/* We can only do 16-bit reads and writes in the static memory space. */
@@ -109,7 +109,7 @@
109#define SMC_outw(v, a, r) writew(v, (a) + (r)) 109#define SMC_outw(v, a, r) writew(v, (a) + (r))
110#define SMC_outsw(a, r, p, l) writesw((a) + (r), p, l) 110#define SMC_outsw(a, r, p, l) writesw((a) + (r), p, l)
111 111
112#define set_irq_type(irq, type) do {} while (0) 112#define SMC_IRQ_FLAGS (0)
113 113
114#elif defined(CONFIG_SA1100_ASSABET) 114#elif defined(CONFIG_SA1100_ASSABET)
115 115
@@ -185,11 +185,11 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg)
185#include <asm/mach-types.h> 185#include <asm/mach-types.h>
186#include <asm/arch/cpu.h> 186#include <asm/arch/cpu.h>
187 187
188#define SMC_IRQ_TRIGGER_TYPE (( \ 188#define SMC_IRQ_FLAGS (( \
189 machine_is_omap_h2() \ 189 machine_is_omap_h2() \
190 || machine_is_omap_h3() \ 190 || machine_is_omap_h3() \
191 || (machine_is_omap_innovator() && !cpu_is_omap1510()) \ 191 || (machine_is_omap_innovator() && !cpu_is_omap1510()) \
192 ) ? IRQT_FALLING : IRQT_RISING) 192 ) ? SA_TRIGGER_FALLING : SA_TRIGGER_RISING)
193 193
194 194
195#elif defined(CONFIG_SH_SH4202_MICRODEV) 195#elif defined(CONFIG_SH_SH4202_MICRODEV)
@@ -209,7 +209,7 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg)
209#define SMC_insw(a, r, p, l) insw((a) + (r) - 0xa0000000, p, l) 209#define SMC_insw(a, r, p, l) insw((a) + (r) - 0xa0000000, p, l)
210#define SMC_outsw(a, r, p, l) outsw((a) + (r) - 0xa0000000, p, l) 210#define SMC_outsw(a, r, p, l) outsw((a) + (r) - 0xa0000000, p, l)
211 211
212#define set_irq_type(irq, type) do {} while(0) 212#define SMC_IRQ_FLAGS (0)
213 213
214#elif defined(CONFIG_ISA) 214#elif defined(CONFIG_ISA)
215 215
@@ -237,7 +237,7 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg)
237#define SMC_insw(a, r, p, l) insw(((u32)a) + (r), p, l) 237#define SMC_insw(a, r, p, l) insw(((u32)a) + (r), p, l)
238#define SMC_outsw(a, r, p, l) outsw(((u32)a) + (r), p, l) 238#define SMC_outsw(a, r, p, l) outsw(((u32)a) + (r), p, l)
239 239
240#define set_irq_type(irq, type) do {} while(0) 240#define SMC_IRQ_FLAGS (0)
241 241
242#define RPC_LSA_DEFAULT RPC_LED_TX_RX 242#define RPC_LSA_DEFAULT RPC_LED_TX_RX
243#define RPC_LSB_DEFAULT RPC_LED_100_10 243#define RPC_LSB_DEFAULT RPC_LED_100_10
@@ -319,7 +319,7 @@ static inline void SMC_outsw (unsigned long a, int r, unsigned char* p, int l)
319 au_writew(*_p++ , _a); \ 319 au_writew(*_p++ , _a); \
320 } while(0) 320 } while(0)
321 321
322#define set_irq_type(irq, type) do {} while (0) 322#define SMC_IRQ_FLAGS (0)
323 323
324#else 324#else
325 325
@@ -342,8 +342,8 @@ static inline void SMC_outsw (unsigned long a, int r, unsigned char* p, int l)
342 342
343#endif 343#endif
344 344
345#ifndef SMC_IRQ_TRIGGER_TYPE 345#ifndef SMC_IRQ_FLAGS
346#define SMC_IRQ_TRIGGER_TYPE IRQT_RISING 346#define SMC_IRQ_FLAGS SA_TRIGGER_RISING
347#endif 347#endif
348 348
349#ifdef SMC_USE_PXA_DMA 349#ifdef SMC_USE_PXA_DMA
diff --git a/drivers/net/wan/sdla.c b/drivers/net/wan/sdla.c
index 036adc4f8ba7..22e794071cf4 100644
--- a/drivers/net/wan/sdla.c
+++ b/drivers/net/wan/sdla.c
@@ -329,9 +329,9 @@ static int sdla_cpuspeed(struct net_device *dev, struct ifreq *ifr)
329 329
330struct _dlci_stat 330struct _dlci_stat
331{ 331{
332 short dlci __attribute__((packed)); 332 short dlci;
333 char flags __attribute__((packed)); 333 char flags;
334}; 334} __attribute__((packed));
335 335
336struct _frad_stat 336struct _frad_stat
337{ 337{
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index 531b07313141..b2e8e49c8659 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -43,13 +43,16 @@ static void process_task_mortuary(void);
43 * list for processing. Only after two full buffer syncs 43 * list for processing. Only after two full buffer syncs
44 * does the task eventually get freed, because by then 44 * does the task eventually get freed, because by then
45 * we are sure we will not reference it again. 45 * we are sure we will not reference it again.
46 * Can be invoked from softirq via RCU callback due to
47 * call_rcu() of the task struct, hence the _irqsave.
46 */ 48 */
47static int task_free_notify(struct notifier_block * self, unsigned long val, void * data) 49static int task_free_notify(struct notifier_block * self, unsigned long val, void * data)
48{ 50{
51 unsigned long flags;
49 struct task_struct * task = data; 52 struct task_struct * task = data;
50 spin_lock(&task_mortuary); 53 spin_lock_irqsave(&task_mortuary, flags);
51 list_add(&task->tasks, &dying_tasks); 54 list_add(&task->tasks, &dying_tasks);
52 spin_unlock(&task_mortuary); 55 spin_unlock_irqrestore(&task_mortuary, flags);
53 return NOTIFY_OK; 56 return NOTIFY_OK;
54} 57}
55 58
@@ -431,25 +434,22 @@ static void increment_tail(struct oprofile_cpu_buffer * b)
431 */ 434 */
432static void process_task_mortuary(void) 435static void process_task_mortuary(void)
433{ 436{
434 struct list_head * pos; 437 unsigned long flags;
435 struct list_head * pos2; 438 LIST_HEAD(local_dead_tasks);
436 struct task_struct * task; 439 struct task_struct * task;
440 struct task_struct * ttask;
437 441
438 spin_lock(&task_mortuary); 442 spin_lock_irqsave(&task_mortuary, flags);
439 443
440 list_for_each_safe(pos, pos2, &dead_tasks) { 444 list_splice_init(&dead_tasks, &local_dead_tasks);
441 task = list_entry(pos, struct task_struct, tasks); 445 list_splice_init(&dying_tasks, &dead_tasks);
442 list_del(&task->tasks);
443 free_task(task);
444 }
445 446
446 list_for_each_safe(pos, pos2, &dying_tasks) { 447 spin_unlock_irqrestore(&task_mortuary, flags);
447 task = list_entry(pos, struct task_struct, tasks); 448
449 list_for_each_entry_safe(task, ttask, &local_dead_tasks, tasks) {
448 list_del(&task->tasks); 450 list_del(&task->tasks);
449 list_add_tail(&task->tasks, &dead_tasks); 451 free_task(task);
450 } 452 }
451
452 spin_unlock(&task_mortuary);
453} 453}
454 454
455 455
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index 026f671ea558..78193e4bbdb5 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -52,7 +52,8 @@ int alloc_cpu_buffers(void)
52 for_each_online_cpu(i) { 52 for_each_online_cpu(i) {
53 struct oprofile_cpu_buffer * b = &cpu_buffer[i]; 53 struct oprofile_cpu_buffer * b = &cpu_buffer[i];
54 54
55 b->buffer = vmalloc(sizeof(struct op_sample) * buffer_size); 55 b->buffer = vmalloc_node(sizeof(struct op_sample) * buffer_size,
56 cpu_to_node(i));
56 if (!b->buffer) 57 if (!b->buffer)
57 goto fail; 58 goto fail;
58 59
diff --git a/drivers/parport/Kconfig b/drivers/parport/Kconfig
index b8241561da45..a665951b1586 100644
--- a/drivers/parport/Kconfig
+++ b/drivers/parport/Kconfig
@@ -34,7 +34,7 @@ config PARPORT
34 34
35config PARPORT_PC 35config PARPORT_PC
36 tristate "PC-style hardware" 36 tristate "PC-style hardware"
37 depends on PARPORT && (!SPARC64 || PCI) && !SPARC32 && !M32R 37 depends on PARPORT && (!SPARC64 || PCI) && !SPARC32 && !M32R && !FRV
38 ---help--- 38 ---help---
39 You should say Y here if you have a PC-style parallel port. All 39 You should say Y here if you have a PC-style parallel port. All
40 IBM PC compatible computers and some Alphas have PC-style 40 IBM PC compatible computers and some Alphas have PC-style
diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c
index 18e85ccdae67..9302b8fd7461 100644
--- a/drivers/parport/parport_pc.c
+++ b/drivers/parport/parport_pc.c
@@ -2371,8 +2371,10 @@ void parport_pc_unregister_port (struct parport *p)
2371 spin_lock(&ports_lock); 2371 spin_lock(&ports_lock);
2372 list_del_init(&priv->list); 2372 list_del_init(&priv->list);
2373 spin_unlock(&ports_lock); 2373 spin_unlock(&ports_lock);
2374#if defined(CONFIG_PARPORT_PC_FIFO) && defined(HAS_DMA)
2374 if (p->dma != PARPORT_DMA_NONE) 2375 if (p->dma != PARPORT_DMA_NONE)
2375 free_dma(p->dma); 2376 free_dma(p->dma);
2377#endif
2376 if (p->irq != PARPORT_IRQ_NONE) 2378 if (p->irq != PARPORT_IRQ_NONE)
2377 free_irq(p->irq, p); 2379 free_irq(p->irq, p);
2378 release_region(p->base, 3); 2380 release_region(p->base, 3);
@@ -2380,14 +2382,12 @@ void parport_pc_unregister_port (struct parport *p)
2380 release_region(p->base + 3, p->size - 3); 2382 release_region(p->base + 3, p->size - 3);
2381 if (p->modes & PARPORT_MODE_ECP) 2383 if (p->modes & PARPORT_MODE_ECP)
2382 release_region(p->base_hi, 3); 2384 release_region(p->base_hi, 3);
2383#ifdef CONFIG_PARPORT_PC_FIFO 2385#if defined(CONFIG_PARPORT_PC_FIFO) && defined(HAS_DMA)
2384#ifdef HAS_DMA
2385 if (priv->dma_buf) 2386 if (priv->dma_buf)
2386 pci_free_consistent(priv->dev, PAGE_SIZE, 2387 pci_free_consistent(priv->dev, PAGE_SIZE,
2387 priv->dma_buf, 2388 priv->dma_buf,
2388 priv->dma_handle); 2389 priv->dma_handle);
2389#endif 2390#endif
2390#endif
2391 kfree (p->private_data); 2391 kfree (p->private_data);
2392 parport_put_port(p); 2392 parport_put_port(p);
2393 kfree (ops); /* hope no-one cached it */ 2393 kfree (ops); /* hope no-one cached it */
diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 6a61b9f286e1..0aac6a61337d 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -32,6 +32,7 @@
32#include <linux/types.h> 32#include <linux/types.h>
33#include <linux/pci.h> 33#include <linux/pci.h>
34#include <linux/delay.h> 34#include <linux/delay.h>
35#include <linux/sched.h> /* signal_pending() */
35#include <linux/pcieport_if.h> 36#include <linux/pcieport_if.h>
36#include "pci_hotplug.h" 37#include "pci_hotplug.h"
37 38
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 0b8b26beb163..ac1e495c314e 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -30,6 +30,9 @@
30#include <linux/kernel.h> 30#include <linux/kernel.h>
31#include <linux/module.h> 31#include <linux/module.h>
32#include <linux/types.h> 32#include <linux/types.h>
33#include <linux/signal.h>
34#include <linux/jiffies.h>
35#include <linux/timer.h>
33#include <linux/pci.h> 36#include <linux/pci.h>
34#include <linux/interrupt.h> 37#include <linux/interrupt.h>
35 38
diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c
index 4f7ed4bd3be9..94e30fe4b8f3 100644
--- a/drivers/rapidio/rio-scan.c
+++ b/drivers/rapidio/rio-scan.c
@@ -24,6 +24,8 @@
24#include <linux/module.h> 24#include <linux/module.h>
25#include <linux/spinlock.h> 25#include <linux/spinlock.h>
26#include <linux/timer.h> 26#include <linux/timer.h>
27#include <linux/jiffies.h>
28#include <linux/slab.h>
27 29
28#include "rio.h" 30#include "rio.h"
29 31
diff --git a/drivers/rapidio/rio-sysfs.c b/drivers/rapidio/rio-sysfs.c
index 30a11436e241..bef9316e95df 100644
--- a/drivers/rapidio/rio-sysfs.c
+++ b/drivers/rapidio/rio-sysfs.c
@@ -15,6 +15,7 @@
15#include <linux/rio.h> 15#include <linux/rio.h>
16#include <linux/rio_drv.h> 16#include <linux/rio_drv.h>
17#include <linux/stat.h> 17#include <linux/stat.h>
18#include <linux/sched.h> /* for capable() */
18 19
19#include "rio.h" 20#include "rio.h"
20 21
diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index 3ca1011ceaac..5e382470faa2 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -23,6 +23,7 @@
23#include <linux/rio_regs.h> 23#include <linux/rio_regs.h>
24#include <linux/module.h> 24#include <linux/module.h>
25#include <linux/spinlock.h> 25#include <linux/spinlock.h>
26#include <linux/slab.h>
26 27
27#include "rio.h" 28#include "rio.h"
28 29
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index f779f674dfa0..2472fa1a1be1 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -18,6 +18,7 @@
18#include <linux/major.h> 18#include <linux/major.h>
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/buffer_head.h> 20#include <linux/buffer_head.h>
21#include <linux/hdreg.h>
21 22
22#include <asm/ccwdev.h> 23#include <asm/ccwdev.h>
23#include <asm/ebcdic.h> 24#include <asm/ebcdic.h>
@@ -1723,12 +1724,34 @@ dasd_release(struct inode *inp, struct file *filp)
1723 return 0; 1724 return 0;
1724} 1725}
1725 1726
1727/*
1728 * Return disk geometry.
1729 */
1730static int
1731dasd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1732{
1733 struct dasd_device *device;
1734
1735 device = bdev->bd_disk->private_data;
1736 if (!device)
1737 return -ENODEV;
1738
1739 if (!device->discipline ||
1740 !device->discipline->fill_geometry)
1741 return -EINVAL;
1742
1743 device->discipline->fill_geometry(device, geo);
1744 geo->start = get_start_sect(bdev) >> device->s2b_shift;
1745 return 0;
1746}
1747
1726struct block_device_operations 1748struct block_device_operations
1727dasd_device_operations = { 1749dasd_device_operations = {
1728 .owner = THIS_MODULE, 1750 .owner = THIS_MODULE,
1729 .open = dasd_open, 1751 .open = dasd_open,
1730 .release = dasd_release, 1752 .release = dasd_release,
1731 .ioctl = dasd_ioctl, 1753 .ioctl = dasd_ioctl,
1754 .getgeo = dasd_getgeo,
1732}; 1755};
1733 1756
1734 1757
diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c
index 044b75371990..8e4dcd58599e 100644
--- a/drivers/s390/block/dasd_ioctl.c
+++ b/drivers/s390/block/dasd_ioctl.c
@@ -486,33 +486,6 @@ dasd_ioctl_set_ro(struct block_device *bdev, int no, long args)
486} 486}
487 487
488/* 488/*
489 * Return disk geometry.
490 */
491static int
492dasd_ioctl_getgeo(struct block_device *bdev, int no, long args)
493{
494 struct hd_geometry geo = { 0, };
495 struct dasd_device *device;
496
497 device = bdev->bd_disk->private_data;
498 if (device == NULL)
499 return -ENODEV;
500
501 if (device == NULL || device->discipline == NULL ||
502 device->discipline->fill_geometry == NULL)
503 return -EINVAL;
504
505 geo = (struct hd_geometry) {};
506 device->discipline->fill_geometry(device, &geo);
507 geo.start = get_start_sect(bdev) >> device->s2b_shift;
508 if (copy_to_user((struct hd_geometry __user *) args, &geo,
509 sizeof (struct hd_geometry)))
510 return -EFAULT;
511
512 return 0;
513}
514
515/*
516 * List of static ioctls. 489 * List of static ioctls.
517 */ 490 */
518static struct { int no; dasd_ioctl_fn_t fn; } dasd_ioctls[] = 491static struct { int no; dasd_ioctl_fn_t fn; } dasd_ioctls[] =
@@ -528,7 +501,6 @@ static struct { int no; dasd_ioctl_fn_t fn; } dasd_ioctls[] =
528 { BIODASDPRRST, dasd_ioctl_reset_profile }, 501 { BIODASDPRRST, dasd_ioctl_reset_profile },
529 { BLKROSET, dasd_ioctl_set_ro }, 502 { BLKROSET, dasd_ioctl_set_ro },
530 { DASDAPIVER, dasd_ioctl_api_version }, 503 { DASDAPIVER, dasd_ioctl_api_version },
531 { HDIO_GETGEO, dasd_ioctl_getgeo },
532 { -1, NULL } 504 { -1, NULL }
533}; 505};
534 506
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index bf3a67c3cc5e..54ecd548c318 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -328,31 +328,27 @@ fail:
328 return 0; 328 return 0;
329} 329}
330 330
331static int xpram_ioctl (struct inode *inode, struct file *filp, 331static int xpram_getgeo(struct block_device *bdev, struct hd_geometry *geo)
332 unsigned int cmd, unsigned long arg)
333{ 332{
334 struct hd_geometry __user *geo;
335 unsigned long size; 333 unsigned long size;
336 if (cmd != HDIO_GETGEO) 334
337 return -EINVAL;
338 /* 335 /*
339 * get geometry: we have to fake one... trim the size to a 336 * get geometry: we have to fake one... trim the size to a
340 * multiple of 64 (32k): tell we have 16 sectors, 4 heads, 337 * multiple of 64 (32k): tell we have 16 sectors, 4 heads,
341 * whatever cylinders. Tell also that data starts at sector. 4. 338 * whatever cylinders. Tell also that data starts at sector. 4.
342 */ 339 */
343 geo = (struct hd_geometry __user *) arg;
344 size = (xpram_pages * 8) & ~0x3f; 340 size = (xpram_pages * 8) & ~0x3f;
345 put_user(size >> 6, &geo->cylinders); 341 geo->cylinders = size >> 6;
346 put_user(4, &geo->heads); 342 geo->heads = 4;
347 put_user(16, &geo->sectors); 343 geo->sectors = 16;
348 put_user(4, &geo->start); 344 geo->start = 4;
349 return 0; 345 return 0;
350} 346}
351 347
352static struct block_device_operations xpram_devops = 348static struct block_device_operations xpram_devops =
353{ 349{
354 .owner = THIS_MODULE, 350 .owner = THIS_MODULE,
355 .ioctl = xpram_ioctl, 351 .getgeo = xpram_getgeo,
356}; 352};
357 353
358/* 354/*
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 32d4d8d7b9f3..4c5127ed379c 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -527,7 +527,7 @@ static int sd_release(struct inode *inode, struct file *filp)
527 return 0; 527 return 0;
528} 528}
529 529
530static int sd_hdio_getgeo(struct block_device *bdev, struct hd_geometry __user *loc) 530static int sd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
531{ 531{
532 struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); 532 struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk);
533 struct scsi_device *sdp = sdkp->device; 533 struct scsi_device *sdp = sdkp->device;
@@ -545,15 +545,9 @@ static int sd_hdio_getgeo(struct block_device *bdev, struct hd_geometry __user *
545 else 545 else
546 scsicam_bios_param(bdev, sdkp->capacity, diskinfo); 546 scsicam_bios_param(bdev, sdkp->capacity, diskinfo);
547 547
548 if (put_user(diskinfo[0], &loc->heads)) 548 geo->heads = diskinfo[0];
549 return -EFAULT; 549 geo->sectors = diskinfo[1];
550 if (put_user(diskinfo[1], &loc->sectors)) 550 geo->cylinders = diskinfo[2];
551 return -EFAULT;
552 if (put_user(diskinfo[2], &loc->cylinders))
553 return -EFAULT;
554 if (put_user((unsigned)get_start_sect(bdev),
555 (unsigned long __user *)&loc->start))
556 return -EFAULT;
557 return 0; 551 return 0;
558} 552}
559 553
@@ -593,12 +587,6 @@ static int sd_ioctl(struct inode * inode, struct file * filp,
593 if (!scsi_block_when_processing_errors(sdp) || !error) 587 if (!scsi_block_when_processing_errors(sdp) || !error)
594 return error; 588 return error;
595 589
596 if (cmd == HDIO_GETGEO) {
597 if (!arg)
598 return -EINVAL;
599 return sd_hdio_getgeo(bdev, p);
600 }
601
602 /* 590 /*
603 * Send SCSI addressing ioctls directly to mid level, send other 591 * Send SCSI addressing ioctls directly to mid level, send other
604 * ioctls to block level and then onto mid level if they can't be 592 * ioctls to block level and then onto mid level if they can't be
@@ -800,6 +788,7 @@ static struct block_device_operations sd_fops = {
800 .open = sd_open, 788 .open = sd_open,
801 .release = sd_release, 789 .release = sd_release,
802 .ioctl = sd_ioctl, 790 .ioctl = sd_ioctl,
791 .getgeo = sd_getgeo,
803#ifdef CONFIG_COMPAT 792#ifdef CONFIG_COMPAT
804 .compat_ioctl = sd_compat_ioctl, 793 .compat_ioctl = sd_compat_ioctl,
805#endif 794#endif
diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index c44bbedec817..4ddc453023a2 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -186,7 +186,7 @@ static void update_bus(struct dentry *bus)
186 186
187 down(&bus->d_inode->i_sem); 187 down(&bus->d_inode->i_sem);
188 188
189 list_for_each_entry(dev, &bus->d_subdirs, d_child) 189 list_for_each_entry(dev, &bus->d_subdirs, d_u.d_child)
190 if (dev->d_inode) 190 if (dev->d_inode)
191 update_dev(dev); 191 update_dev(dev);
192 192
@@ -203,7 +203,7 @@ static void update_sb(struct super_block *sb)
203 203
204 down(&root->d_inode->i_sem); 204 down(&root->d_inode->i_sem);
205 205
206 list_for_each_entry(bus, &root->d_subdirs, d_child) { 206 list_for_each_entry(bus, &root->d_subdirs, d_u.d_child) {
207 if (bus->d_inode) { 207 if (bus->d_inode) {
208 switch (S_IFMT & bus->d_inode->i_mode) { 208 switch (S_IFMT & bus->d_inode->i_mode) {
209 case S_IFDIR: 209 case S_IFDIR:
@@ -319,7 +319,7 @@ static int usbfs_empty (struct dentry *dentry)
319 spin_lock(&dcache_lock); 319 spin_lock(&dcache_lock);
320 320
321 list_for_each(list, &dentry->d_subdirs) { 321 list_for_each(list, &dentry->d_subdirs) {
322 struct dentry *de = list_entry(list, struct dentry, d_child); 322 struct dentry *de = list_entry(list, struct dentry, d_u.d_child);
323 if (usbfs_positive(de)) { 323 if (usbfs_positive(de)) {
324 spin_unlock(&dcache_lock); 324 spin_unlock(&dcache_lock);
325 return 0; 325 return 0;
diff --git a/drivers/usb/host/ohci-au1xxx.c b/drivers/usb/host/ohci-au1xxx.c
index d9cf3b327d96..77cd6ac07e3c 100644
--- a/drivers/usb/host/ohci-au1xxx.c
+++ b/drivers/usb/host/ohci-au1xxx.c
@@ -19,6 +19,7 @@
19 */ 19 */
20 20
21#include <linux/platform_device.h> 21#include <linux/platform_device.h>
22#include <linux/signal.h>
22 23
23#include <asm/mach-au1x00/au1000.h> 24#include <asm/mach-au1x00/au1000.h>
24 25
diff --git a/drivers/usb/host/ohci-lh7a404.c b/drivers/usb/host/ohci-lh7a404.c
index 3959ccc88332..0020ed7a39d0 100644
--- a/drivers/usb/host/ohci-lh7a404.c
+++ b/drivers/usb/host/ohci-lh7a404.c
@@ -17,6 +17,7 @@
17 */ 17 */
18 18
19#include <linux/platform_device.h> 19#include <linux/platform_device.h>
20#include <linux/signal.h>
20 21
21#include <asm/hardware.h> 22#include <asm/hardware.h>
22 23
diff --git a/drivers/usb/host/ohci-ppc-soc.c b/drivers/usb/host/ohci-ppc-soc.c
index 2ec6a78bd65e..b2a8dfa48870 100644
--- a/drivers/usb/host/ohci-ppc-soc.c
+++ b/drivers/usb/host/ohci-ppc-soc.c
@@ -15,6 +15,7 @@
15 */ 15 */
16 16
17#include <linux/platform_device.h> 17#include <linux/platform_device.h>
18#include <linux/signal.h>
18 19
19/* configure so an HC device and id are always provided */ 20/* configure so an HC device and id are always provided */
20/* always called with process context; sleeping is OK */ 21/* always called with process context; sleeping is OK */
diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig
index a5d09e159cd1..6ee449858a5c 100644
--- a/drivers/video/console/Kconfig
+++ b/drivers/video/console/Kconfig
@@ -6,7 +6,7 @@ menu "Console display driver support"
6 6
7config VGA_CONSOLE 7config VGA_CONSOLE
8 bool "VGA text console" if EMBEDDED || !X86 8 bool "VGA text console" if EMBEDDED || !X86
9 depends on !ARCH_ACORN && !ARCH_EBSA110 && !4xx && !8xx && !SPARC && !M68K && !PARISC && !ARCH_VERSATILE 9 depends on !ARCH_ACORN && !ARCH_EBSA110 && !4xx && !8xx && !SPARC && !M68K && !PARISC && !FRV && !ARCH_VERSATILE
10 default y 10 default y
11 help 11 help
12 Saying Y here will allow you to use Linux in text mode through a 12 Saying Y here will allow you to use Linux in text mode through a
diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c
index 167de397e4b4..12d9329d1408 100644
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -56,6 +56,8 @@
56static DEFINE_SPINLOCK(vga_lock); 56static DEFINE_SPINLOCK(vga_lock);
57static int cursor_size_lastfrom; 57static int cursor_size_lastfrom;
58static int cursor_size_lastto; 58static int cursor_size_lastto;
59static u32 vgacon_xres;
60static u32 vgacon_yres;
59static struct vgastate state; 61static struct vgastate state;
60 62
61#define BLANK 0x0020 63#define BLANK 0x0020
@@ -69,7 +71,7 @@ static struct vgastate state;
69 * appear. 71 * appear.
70 */ 72 */
71#undef TRIDENT_GLITCH 73#undef TRIDENT_GLITCH
72 74#define VGA_FONTWIDTH 8 /* VGA does not support fontwidths != 8 */
73/* 75/*
74 * Interface used by the world 76 * Interface used by the world
75 */ 77 */
@@ -325,6 +327,10 @@ static const char __init *vgacon_startup(void)
325 vga_scan_lines = 327 vga_scan_lines =
326 vga_video_font_height * vga_video_num_lines; 328 vga_video_font_height * vga_video_num_lines;
327 } 329 }
330
331 vgacon_xres = ORIG_VIDEO_COLS * VGA_FONTWIDTH;
332 vgacon_yres = vga_scan_lines;
333
328 return display_desc; 334 return display_desc;
329} 335}
330 336
@@ -503,10 +509,18 @@ static int vgacon_doresize(struct vc_data *c,
503{ 509{
504 unsigned long flags; 510 unsigned long flags;
505 unsigned int scanlines = height * c->vc_font.height; 511 unsigned int scanlines = height * c->vc_font.height;
506 u8 scanlines_lo, r7, vsync_end, mode; 512 u8 scanlines_lo, r7, vsync_end, mode, max_scan;
507 513
508 spin_lock_irqsave(&vga_lock, flags); 514 spin_lock_irqsave(&vga_lock, flags);
509 515
516 outb_p(VGA_CRTC_MAX_SCAN, vga_video_port_reg);
517 max_scan = inb_p(vga_video_port_val);
518
519 if (max_scan & 0x80)
520 scanlines <<= 1;
521
522 vgacon_xres = width * VGA_FONTWIDTH;
523 vgacon_yres = height * c->vc_font.height;
510 outb_p(VGA_CRTC_MODE, vga_video_port_reg); 524 outb_p(VGA_CRTC_MODE, vga_video_port_reg);
511 mode = inb_p(vga_video_port_val); 525 mode = inb_p(vga_video_port_val);
512 526
@@ -551,6 +565,10 @@ static int vgacon_doresize(struct vc_data *c,
551 565
552static int vgacon_switch(struct vc_data *c) 566static int vgacon_switch(struct vc_data *c)
553{ 567{
568 int x = c->vc_cols * VGA_FONTWIDTH;
569 int y = c->vc_rows * c->vc_font.height;
570 int rows = ORIG_VIDEO_LINES * vga_default_font_height/
571 c->vc_font.height;
554 /* 572 /*
555 * We need to save screen size here as it's the only way 573 * We need to save screen size here as it's the only way
556 * we can spot the screen has been resized and we need to 574 * we can spot the screen has been resized and we need to
@@ -566,10 +584,11 @@ static int vgacon_switch(struct vc_data *c)
566 scr_memcpyw((u16 *) c->vc_origin, (u16 *) c->vc_screenbuf, 584 scr_memcpyw((u16 *) c->vc_origin, (u16 *) c->vc_screenbuf,
567 c->vc_screenbuf_size > vga_vram_size ? 585 c->vc_screenbuf_size > vga_vram_size ?
568 vga_vram_size : c->vc_screenbuf_size); 586 vga_vram_size : c->vc_screenbuf_size);
569 if (!(vga_video_num_columns % 2) && 587
570 vga_video_num_columns <= ORIG_VIDEO_COLS && 588 if ((vgacon_xres != x || vgacon_yres != y) &&
571 vga_video_num_lines <= (ORIG_VIDEO_LINES * 589 (!(vga_video_num_columns % 2) &&
572 vga_default_font_height) / c->vc_font.height) 590 vga_video_num_columns <= ORIG_VIDEO_COLS &&
591 vga_video_num_lines <= rows))
573 vgacon_doresize(c, c->vc_cols, c->vc_rows); 592 vgacon_doresize(c, c->vc_cols, c->vc_rows);
574 } 593 }
575 594
@@ -993,7 +1012,8 @@ static int vgacon_font_set(struct vc_data *c, struct console_font *font, unsigne
993 if (vga_video_type < VIDEO_TYPE_EGAM) 1012 if (vga_video_type < VIDEO_TYPE_EGAM)
994 return -EINVAL; 1013 return -EINVAL;
995 1014
996 if (font->width != 8 || (charcount != 256 && charcount != 512)) 1015 if (font->width != VGA_FONTWIDTH ||
1016 (charcount != 256 && charcount != 512))
997 return -EINVAL; 1017 return -EINVAL;
998 1018
999 rc = vgacon_do_font_op(&state, font->data, 1, charcount == 512); 1019 rc = vgacon_do_font_op(&state, font->data, 1, charcount == 512);
@@ -1010,7 +1030,7 @@ static int vgacon_font_get(struct vc_data *c, struct console_font *font)
1010 if (vga_video_type < VIDEO_TYPE_EGAM) 1030 if (vga_video_type < VIDEO_TYPE_EGAM)
1011 return -EINVAL; 1031 return -EINVAL;
1012 1032
1013 font->width = 8; 1033 font->width = VGA_FONTWIDTH;
1014 font->height = c->vc_font.height; 1034 font->height = c->vc_font.height;
1015 font->charcount = vga_512_chars ? 512 : 256; 1035 font->charcount = vga_512_chars ? 512 : 256;
1016 if (!font->data) 1036 if (!font->data)
diff --git a/fs/9p/9p.c b/fs/9p/9p.c
index e847f504a47c..1a6d08761f39 100644
--- a/fs/9p/9p.c
+++ b/fs/9p/9p.c
@@ -1,8 +1,9 @@
1/* 1/*
2 * linux/fs/9p/9p.c 2 * linux/fs/9p/9p.c
3 * 3 *
4 * This file contains functions 9P2000 functions 4 * This file contains functions to perform synchronous 9P calls
5 * 5 *
6 * Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net>
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> 7 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> 8 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
8 * 9 *
@@ -33,6 +34,7 @@
33#include "debug.h" 34#include "debug.h"
34#include "v9fs.h" 35#include "v9fs.h"
35#include "9p.h" 36#include "9p.h"
37#include "conv.h"
36#include "mux.h" 38#include "mux.h"
37 39
38/** 40/**
@@ -46,16 +48,21 @@
46 48
47int 49int
48v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize, 50v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
49 char *version, struct v9fs_fcall **fcall) 51 char *version, struct v9fs_fcall **rcp)
50{ 52{
51 struct v9fs_fcall msg; 53 int ret;
54 struct v9fs_fcall *tc;
52 55
53 dprintk(DEBUG_9P, "msize: %d version: %s\n", msize, version); 56 dprintk(DEBUG_9P, "msize: %d version: %s\n", msize, version);
54 msg.id = TVERSION; 57 tc = v9fs_create_tversion(msize, version);
55 msg.params.tversion.msize = msize;
56 msg.params.tversion.version = version;
57 58
58 return v9fs_mux_rpc(v9ses, &msg, fcall); 59 if (!IS_ERR(tc)) {
60 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
61 kfree(tc);
62 } else
63 ret = PTR_ERR(tc);
64
65 return ret;
59} 66}
60 67
61/** 68/**
@@ -71,19 +78,45 @@ v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
71 78
72int 79int
73v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname, 80v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
74 u32 fid, u32 afid, struct v9fs_fcall **fcall) 81 u32 fid, u32 afid, struct v9fs_fcall **rcp)
75{ 82{
76 struct v9fs_fcall msg; 83 int ret;
84 struct v9fs_fcall* tc;
77 85
78 dprintk(DEBUG_9P, "uname '%s' aname '%s' fid %d afid %d\n", uname, 86 dprintk(DEBUG_9P, "uname '%s' aname '%s' fid %d afid %d\n", uname,
79 aname, fid, afid); 87 aname, fid, afid);
80 msg.id = TATTACH;
81 msg.params.tattach.fid = fid;
82 msg.params.tattach.afid = afid;
83 msg.params.tattach.uname = uname;
84 msg.params.tattach.aname = aname;
85 88
86 return v9fs_mux_rpc(v9ses, &msg, fcall); 89 tc = v9fs_create_tattach(fid, afid, uname, aname);
90 if (!IS_ERR(tc)) {
91 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
92 kfree(tc);
93 } else
94 ret = PTR_ERR(tc);
95
96 return ret;
97}
98
99static void v9fs_t_clunk_cb(void *a, struct v9fs_fcall *tc,
100 struct v9fs_fcall *rc, int err)
101{
102 int fid;
103 struct v9fs_session_info *v9ses;
104
105 if (err)
106 return;
107
108 fid = tc->params.tclunk.fid;
109 kfree(tc);
110
111 if (!rc)
112 return;
113
114 dprintk(DEBUG_9P, "tcall id %d rcall id %d\n", tc->id, rc->id);
115 v9ses = a;
116 if (rc->id == RCLUNK)
117 v9fs_put_idpool(fid, &v9ses->fidpool);
118
119 kfree(rc);
87} 120}
88 121
89/** 122/**
@@ -95,16 +128,25 @@ v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
95 */ 128 */
96 129
97int 130int
98v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid, 131v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid)
99 struct v9fs_fcall **fcall)
100{ 132{
101 struct v9fs_fcall msg; 133 int ret;
134 struct v9fs_fcall *tc, *rc;
102 135
103 dprintk(DEBUG_9P, "fid %d\n", fid); 136 dprintk(DEBUG_9P, "fid %d\n", fid);
104 msg.id = TCLUNK;
105 msg.params.tclunk.fid = fid;
106 137
107 return v9fs_mux_rpc(v9ses, &msg, fcall); 138 rc = NULL;
139 tc = v9fs_create_tclunk(fid);
140 if (!IS_ERR(tc))
141 ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
142 else
143 ret = PTR_ERR(tc);
144
145 if (ret)
146 dprintk(DEBUG_ERROR, "failed fid %d err %d\n", fid, ret);
147
148 v9fs_t_clunk_cb(v9ses, tc, rc, ret);
149 return ret;
108} 150}
109 151
110/** 152/**
@@ -114,14 +156,21 @@ v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid,
114 * 156 *
115 */ 157 */
116 158
117int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 tag) 159int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag)
118{ 160{
119 struct v9fs_fcall msg; 161 int ret;
162 struct v9fs_fcall *tc;
163
164 dprintk(DEBUG_9P, "oldtag %d\n", oldtag);
165
166 tc = v9fs_create_tflush(oldtag);
167 if (!IS_ERR(tc)) {
168 ret = v9fs_mux_rpc(v9ses->mux, tc, NULL);
169 kfree(tc);
170 } else
171 ret = PTR_ERR(tc);
120 172
121 dprintk(DEBUG_9P, "oldtag %d\n", tag); 173 return ret;
122 msg.id = TFLUSH;
123 msg.params.tflush.oldtag = tag;
124 return v9fs_mux_rpc(v9ses, &msg, NULL);
125} 174}
126 175
127/** 176/**
@@ -133,17 +182,22 @@ int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 tag)
133 */ 182 */
134 183
135int 184int
136v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **fcall) 185v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **rcp)
137{ 186{
138 struct v9fs_fcall msg; 187 int ret;
188 struct v9fs_fcall *tc;
139 189
140 dprintk(DEBUG_9P, "fid %d\n", fid); 190 dprintk(DEBUG_9P, "fid %d\n", fid);
141 if (fcall)
142 *fcall = NULL;
143 191
144 msg.id = TSTAT; 192 ret = -ENOMEM;
145 msg.params.tstat.fid = fid; 193 tc = v9fs_create_tstat(fid);
146 return v9fs_mux_rpc(v9ses, &msg, fcall); 194 if (!IS_ERR(tc)) {
195 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
196 kfree(tc);
197 } else
198 ret = PTR_ERR(tc);
199
200 return ret;
147} 201}
148 202
149/** 203/**
@@ -157,16 +211,21 @@ v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **fcall)
157 211
158int 212int
159v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid, 213v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
160 struct v9fs_stat *stat, struct v9fs_fcall **fcall) 214 struct v9fs_wstat *wstat, struct v9fs_fcall **rcp)
161{ 215{
162 struct v9fs_fcall msg; 216 int ret;
217 struct v9fs_fcall *tc;
218
219 dprintk(DEBUG_9P, "fid %d\n", fid);
163 220
164 dprintk(DEBUG_9P, "fid %d length %d\n", fid, (int)stat->length); 221 tc = v9fs_create_twstat(fid, wstat, v9ses->extended);
165 msg.id = TWSTAT; 222 if (!IS_ERR(tc)) {
166 msg.params.twstat.fid = fid; 223 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
167 msg.params.twstat.stat = stat; 224 kfree(tc);
225 } else
226 ret = PTR_ERR(tc);
168 227
169 return v9fs_mux_rpc(v9ses, &msg, fcall); 228 return ret;
170} 229}
171 230
172/** 231/**
@@ -183,23 +242,27 @@ v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
183 242
184int 243int
185v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid, 244v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
186 char *name, struct v9fs_fcall **fcall) 245 char *name, struct v9fs_fcall **rcp)
187{ 246{
188 struct v9fs_fcall msg; 247 int ret;
248 struct v9fs_fcall *tc;
249 int nwname;
189 250
190 dprintk(DEBUG_9P, "fid %d newfid %d wname '%s'\n", fid, newfid, name); 251 dprintk(DEBUG_9P, "fid %d newfid %d wname '%s'\n", fid, newfid, name);
191 msg.id = TWALK; 252
192 msg.params.twalk.fid = fid; 253 if (name)
193 msg.params.twalk.newfid = newfid; 254 nwname = 1;
194 255 else
195 if (name) { 256 nwname = 0;
196 msg.params.twalk.nwname = 1; 257
197 msg.params.twalk.wnames = &name; 258 tc = v9fs_create_twalk(fid, newfid, nwname, &name);
198 } else { 259 if (!IS_ERR(tc)) {
199 msg.params.twalk.nwname = 0; 260 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
200 } 261 kfree(tc);
201 262 } else
202 return v9fs_mux_rpc(v9ses, &msg, fcall); 263 ret = PTR_ERR(tc);
264
265 return ret;
203} 266}
204 267
205/** 268/**
@@ -214,19 +277,21 @@ v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
214 277
215int 278int
216v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode, 279v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
217 struct v9fs_fcall **fcall) 280 struct v9fs_fcall **rcp)
218{ 281{
219 struct v9fs_fcall msg; 282 int ret;
220 long errorno = -1; 283 struct v9fs_fcall *tc;
221 284
222 dprintk(DEBUG_9P, "fid %d mode %d\n", fid, mode); 285 dprintk(DEBUG_9P, "fid %d mode %d\n", fid, mode);
223 msg.id = TOPEN;
224 msg.params.topen.fid = fid;
225 msg.params.topen.mode = mode;
226 286
227 errorno = v9fs_mux_rpc(v9ses, &msg, fcall); 287 tc = v9fs_create_topen(fid, mode);
288 if (!IS_ERR(tc)) {
289 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
290 kfree(tc);
291 } else
292 ret = PTR_ERR(tc);
228 293
229 return errorno; 294 return ret;
230} 295}
231 296
232/** 297/**
@@ -239,14 +304,21 @@ v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
239 304
240int 305int
241v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid, 306v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
242 struct v9fs_fcall **fcall) 307 struct v9fs_fcall **rcp)
243{ 308{
244 struct v9fs_fcall msg; 309 int ret;
310 struct v9fs_fcall *tc;
245 311
246 dprintk(DEBUG_9P, "fid %d\n", fid); 312 dprintk(DEBUG_9P, "fid %d\n", fid);
247 msg.id = TREMOVE; 313
248 msg.params.tremove.fid = fid; 314 tc = v9fs_create_tremove(fid);
249 return v9fs_mux_rpc(v9ses, &msg, fcall); 315 if (!IS_ERR(tc)) {
316 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
317 kfree(tc);
318 } else
319 ret = PTR_ERR(tc);
320
321 return ret;
250} 322}
251 323
252/** 324/**
@@ -262,20 +334,22 @@ v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
262 334
263int 335int
264v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name, 336v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
265 u32 perm, u8 mode, struct v9fs_fcall **fcall) 337 u32 perm, u8 mode, struct v9fs_fcall **rcp)
266{ 338{
267 struct v9fs_fcall msg; 339 int ret;
340 struct v9fs_fcall *tc;
268 341
269 dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n", 342 dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n",
270 fid, name, perm, mode); 343 fid, name, perm, mode);
271 344
272 msg.id = TCREATE; 345 tc = v9fs_create_tcreate(fid, name, perm, mode);
273 msg.params.tcreate.fid = fid; 346 if (!IS_ERR(tc)) {
274 msg.params.tcreate.name = name; 347 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
275 msg.params.tcreate.perm = perm; 348 kfree(tc);
276 msg.params.tcreate.mode = mode; 349 } else
350 ret = PTR_ERR(tc);
277 351
278 return v9fs_mux_rpc(v9ses, &msg, fcall); 352 return ret;
279} 353}
280 354
281/** 355/**
@@ -290,31 +364,29 @@ v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
290 364
291int 365int
292v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset, 366v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
293 u32 count, struct v9fs_fcall **fcall) 367 u32 count, struct v9fs_fcall **rcp)
294{ 368{
295 struct v9fs_fcall msg; 369 int ret;
296 struct v9fs_fcall *rc = NULL; 370 struct v9fs_fcall *tc, *rc;
297 long errorno = -1; 371
298 372 dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid,
299 dprintk(DEBUG_9P, "fid %d offset 0x%lx count 0x%x\n", fid, 373 (long long unsigned) offset, count);
300 (long unsigned int)offset, count); 374
301 msg.id = TREAD; 375 tc = v9fs_create_tread(fid, offset, count);
302 msg.params.tread.fid = fid; 376 if (!IS_ERR(tc)) {
303 msg.params.tread.offset = offset; 377 ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
304 msg.params.tread.count = count; 378 if (!ret)
305 errorno = v9fs_mux_rpc(v9ses, &msg, &rc); 379 ret = rc->params.rread.count;
306 380 if (rcp)
307 if (!errorno) { 381 *rcp = rc;
308 errorno = rc->params.rread.count; 382 else
309 dump_data(rc->params.rread.data, rc->params.rread.count); 383 kfree(rc);
310 } 384
311 385 kfree(tc);
312 if (fcall) 386 } else
313 *fcall = rc; 387 ret = PTR_ERR(tc);
314 else 388
315 kfree(rc); 389 return ret;
316
317 return errorno;
318} 390}
319 391
320/** 392/**
@@ -328,32 +400,30 @@ v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
328 */ 400 */
329 401
330int 402int
331v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, 403v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset, u32 count,
332 u64 offset, u32 count, void *data, struct v9fs_fcall **fcall) 404 const char __user *data, struct v9fs_fcall **rcp)
333{ 405{
334 struct v9fs_fcall msg; 406 int ret;
335 struct v9fs_fcall *rc = NULL; 407 struct v9fs_fcall *tc, *rc;
336 long errorno = -1;
337 408
338 dprintk(DEBUG_9P, "fid %d offset 0x%llx count 0x%x\n", fid, 409 dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid,
339 (unsigned long long)offset, count); 410 (long long unsigned) offset, count);
340 dump_data(data, count);
341 411
342 msg.id = TWRITE; 412 tc = v9fs_create_twrite(fid, offset, count, data);
343 msg.params.twrite.fid = fid; 413 if (!IS_ERR(tc)) {
344 msg.params.twrite.offset = offset; 414 ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
345 msg.params.twrite.count = count;
346 msg.params.twrite.data = data;
347 415
348 errorno = v9fs_mux_rpc(v9ses, &msg, &rc); 416 if (!ret)
417 ret = rc->params.rwrite.count;
418 if (rcp)
419 *rcp = rc;
420 else
421 kfree(rc);
349 422
350 if (!errorno) 423 kfree(tc);
351 errorno = rc->params.rwrite.count; 424 } else
425 ret = PTR_ERR(tc);
352 426
353 if (fcall) 427 return ret;
354 *fcall = rc;
355 else
356 kfree(rc);
357
358 return errorno;
359} 428}
429
diff --git a/fs/9p/9p.h b/fs/9p/9p.h
index f55424216be2..0cd374d94717 100644
--- a/fs/9p/9p.h
+++ b/fs/9p/9p.h
@@ -3,6 +3,7 @@
3 * 3 *
4 * 9P protocol definitions. 4 * 9P protocol definitions.
5 * 5 *
6 * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> 7 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> 8 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
8 * 9 *
@@ -100,9 +101,18 @@ enum {
100 V9FS_QTFILE = 0x00, 101 V9FS_QTFILE = 0x00,
101}; 102};
102 103
104#define V9FS_NOTAG (u16)(~0)
105#define V9FS_NOFID (u32)(~0)
106#define V9FS_MAXWELEM 16
107
103/* ample room for Twrite/Rread header (iounit) */ 108/* ample room for Twrite/Rread header (iounit) */
104#define V9FS_IOHDRSZ 24 109#define V9FS_IOHDRSZ 24
105 110
111struct v9fs_str {
112 u16 len;
113 char *str;
114};
115
106/* qids are the unique ID for a file (like an inode */ 116/* qids are the unique ID for a file (like an inode */
107struct v9fs_qid { 117struct v9fs_qid {
108 u8 type; 118 u8 type;
@@ -120,6 +130,29 @@ struct v9fs_stat {
120 u32 atime; 130 u32 atime;
121 u32 mtime; 131 u32 mtime;
122 u64 length; 132 u64 length;
133 struct v9fs_str name;
134 struct v9fs_str uid;
135 struct v9fs_str gid;
136 struct v9fs_str muid;
137 struct v9fs_str extension; /* 9p2000.u extensions */
138 u32 n_uid; /* 9p2000.u extensions */
139 u32 n_gid; /* 9p2000.u extensions */
140 u32 n_muid; /* 9p2000.u extensions */
141};
142
143/* file metadata (stat) structure used to create Twstat message
144 The is similar to v9fs_stat, but the strings don't point to
145 the same memory block and should be freed separately
146*/
147struct v9fs_wstat {
148 u16 size;
149 u16 type;
150 u32 dev;
151 struct v9fs_qid qid;
152 u32 mode;
153 u32 atime;
154 u32 mtime;
155 u64 length;
123 char *name; 156 char *name;
124 char *uid; 157 char *uid;
125 char *gid; 158 char *gid;
@@ -128,25 +161,24 @@ struct v9fs_stat {
128 u32 n_uid; /* 9p2000.u extensions */ 161 u32 n_uid; /* 9p2000.u extensions */
129 u32 n_gid; /* 9p2000.u extensions */ 162 u32 n_gid; /* 9p2000.u extensions */
130 u32 n_muid; /* 9p2000.u extensions */ 163 u32 n_muid; /* 9p2000.u extensions */
131 char data[0];
132}; 164};
133 165
134/* Structures for Protocol Operations */ 166/* Structures for Protocol Operations */
135 167
136struct Tversion { 168struct Tversion {
137 u32 msize; 169 u32 msize;
138 char *version; 170 struct v9fs_str version;
139}; 171};
140 172
141struct Rversion { 173struct Rversion {
142 u32 msize; 174 u32 msize;
143 char *version; 175 struct v9fs_str version;
144}; 176};
145 177
146struct Tauth { 178struct Tauth {
147 u32 afid; 179 u32 afid;
148 char *uname; 180 struct v9fs_str uname;
149 char *aname; 181 struct v9fs_str aname;
150}; 182};
151 183
152struct Rauth { 184struct Rauth {
@@ -154,12 +186,12 @@ struct Rauth {
154}; 186};
155 187
156struct Rerror { 188struct Rerror {
157 char *error; 189 struct v9fs_str error;
158 u32 errno; /* 9p2000.u extension */ 190 u32 errno; /* 9p2000.u extension */
159}; 191};
160 192
161struct Tflush { 193struct Tflush {
162 u32 oldtag; 194 u16 oldtag;
163}; 195};
164 196
165struct Rflush { 197struct Rflush {
@@ -168,8 +200,8 @@ struct Rflush {
168struct Tattach { 200struct Tattach {
169 u32 fid; 201 u32 fid;
170 u32 afid; 202 u32 afid;
171 char *uname; 203 struct v9fs_str uname;
172 char *aname; 204 struct v9fs_str aname;
173}; 205};
174 206
175struct Rattach { 207struct Rattach {
@@ -179,13 +211,13 @@ struct Rattach {
179struct Twalk { 211struct Twalk {
180 u32 fid; 212 u32 fid;
181 u32 newfid; 213 u32 newfid;
182 u32 nwname; 214 u16 nwname;
183 char **wnames; 215 struct v9fs_str wnames[16];
184}; 216};
185 217
186struct Rwalk { 218struct Rwalk {
187 u32 nwqid; 219 u16 nwqid;
188 struct v9fs_qid *wqids; 220 struct v9fs_qid wqids[16];
189}; 221};
190 222
191struct Topen { 223struct Topen {
@@ -200,7 +232,7 @@ struct Ropen {
200 232
201struct Tcreate { 233struct Tcreate {
202 u32 fid; 234 u32 fid;
203 char *name; 235 struct v9fs_str name;
204 u32 perm; 236 u32 perm;
205 u8 mode; 237 u8 mode;
206}; 238};
@@ -251,12 +283,12 @@ struct Tstat {
251}; 283};
252 284
253struct Rstat { 285struct Rstat {
254 struct v9fs_stat *stat; 286 struct v9fs_stat stat;
255}; 287};
256 288
257struct Twstat { 289struct Twstat {
258 u32 fid; 290 u32 fid;
259 struct v9fs_stat *stat; 291 struct v9fs_stat stat;
260}; 292};
261 293
262struct Rwstat { 294struct Rwstat {
@@ -271,6 +303,7 @@ struct v9fs_fcall {
271 u32 size; 303 u32 size;
272 u8 id; 304 u8 id;
273 u16 tag; 305 u16 tag;
306 void *sdata;
274 307
275 union { 308 union {
276 struct Tversion tversion; 309 struct Tversion tversion;
@@ -303,7 +336,9 @@ struct v9fs_fcall {
303 } params; 336 } params;
304}; 337};
305 338
306#define FCALL_ERROR(fcall) (fcall ? fcall->params.rerror.error : "") 339#define PRINT_FCALL_ERROR(s, fcall) dprintk(DEBUG_ERROR, "%s: %.*s\n", s, \
340 fcall?fcall->params.rerror.error.len:0, \
341 fcall?fcall->params.rerror.error.str:"");
307 342
308int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize, 343int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
309 char *version, struct v9fs_fcall **rcall); 344 char *version, struct v9fs_fcall **rcall);
@@ -311,8 +346,7 @@ int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
311int v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname, 346int v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
312 u32 fid, u32 afid, struct v9fs_fcall **rcall); 347 u32 fid, u32 afid, struct v9fs_fcall **rcall);
313 348
314int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid, 349int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid);
315 struct v9fs_fcall **rcall);
316 350
317int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag); 351int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag);
318 352
@@ -320,7 +354,7 @@ int v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid,
320 struct v9fs_fcall **rcall); 354 struct v9fs_fcall **rcall);
321 355
322int v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid, 356int v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
323 struct v9fs_stat *stat, struct v9fs_fcall **rcall); 357 struct v9fs_wstat *wstat, struct v9fs_fcall **rcall);
324 358
325int v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid, 359int v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
326 char *name, struct v9fs_fcall **rcall); 360 char *name, struct v9fs_fcall **rcall);
@@ -338,4 +372,5 @@ int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid,
338 u64 offset, u32 count, struct v9fs_fcall **rcall); 372 u64 offset, u32 count, struct v9fs_fcall **rcall);
339 373
340int v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset, 374int v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
341 u32 count, void *data, struct v9fs_fcall **rcall); 375 u32 count, const char __user * data,
376 struct v9fs_fcall **rcall);
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index e4e4ffe5a7dc..3d023089707e 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -1,17 +1,17 @@
1obj-$(CONFIG_9P_FS) := 9p2000.o 1obj-$(CONFIG_9P_FS) := 9p2000.o
2 2
39p2000-objs := \ 39p2000-objs := \
4 trans_fd.o \
5 trans_sock.o \
6 mux.o \
7 9p.o \
8 conv.o \
4 vfs_super.o \ 9 vfs_super.o \
5 vfs_inode.o \ 10 vfs_inode.o \
6 vfs_file.o \ 11 vfs_file.o \
7 vfs_dir.o \ 12 vfs_dir.o \
8 vfs_dentry.o \ 13 vfs_dentry.o \
9 error.o \ 14 error.o \
10 mux.o \
11 trans_fd.o \
12 trans_sock.o \
13 9p.o \
14 conv.o \
15 v9fs.o \ 15 v9fs.o \
16 fid.o 16 fid.o
17 17
diff --git a/fs/9p/conv.c b/fs/9p/conv.c
index 18121af99d3e..55ccfa10ee9e 100644
--- a/fs/9p/conv.c
+++ b/fs/9p/conv.c
@@ -30,7 +30,7 @@
30#include <linux/errno.h> 30#include <linux/errno.h>
31#include <linux/fs.h> 31#include <linux/fs.h>
32#include <linux/idr.h> 32#include <linux/idr.h>
33 33#include <asm/uaccess.h>
34#include "debug.h" 34#include "debug.h"
35#include "v9fs.h" 35#include "v9fs.h"
36#include "9p.h" 36#include "9p.h"
@@ -58,12 +58,15 @@ static inline int buf_check_overflow(struct cbuf *buf)
58 58
59static inline int buf_check_size(struct cbuf *buf, int len) 59static inline int buf_check_size(struct cbuf *buf, int len)
60{ 60{
61 if (buf->p+len > buf->ep) { 61 if (buf->p + len > buf->ep) {
62 if (buf->p < buf->ep) { 62 if (buf->p < buf->ep) {
63 eprintk(KERN_ERR, "buffer overflow\n"); 63 eprintk(KERN_ERR, "buffer overflow: want %d has %d\n",
64 len, (int)(buf->ep - buf->p));
65 dump_stack();
64 buf->p = buf->ep + 1; 66 buf->p = buf->ep + 1;
65 return 0;
66 } 67 }
68
69 return 0;
67 } 70 }
68 71
69 return 1; 72 return 1;
@@ -127,14 +130,6 @@ static inline void buf_put_string(struct cbuf *buf, const char *s)
127 buf_put_stringn(buf, s, strlen(s)); 130 buf_put_stringn(buf, s, strlen(s));
128} 131}
129 132
130static inline void buf_put_data(struct cbuf *buf, void *data, u32 datalen)
131{
132 if (buf_check_size(buf, datalen)) {
133 memcpy(buf->p, data, datalen);
134 buf->p += datalen;
135 }
136}
137
138static inline u8 buf_get_int8(struct cbuf *buf) 133static inline u8 buf_get_int8(struct cbuf *buf)
139{ 134{
140 u8 ret = 0; 135 u8 ret = 0;
@@ -183,86 +178,37 @@ static inline u64 buf_get_int64(struct cbuf *buf)
183 return ret; 178 return ret;
184} 179}
185 180
186static inline int 181static inline void buf_get_str(struct cbuf *buf, struct v9fs_str *vstr)
187buf_get_string(struct cbuf *buf, char *data, unsigned int datalen)
188{
189 u16 len = 0;
190
191 len = buf_get_int16(buf);
192 if (!buf_check_overflow(buf) && buf_check_size(buf, len) && len+1>datalen) {
193 memcpy(data, buf->p, len);
194 data[len] = 0;
195 buf->p += len;
196 len++;
197 }
198
199 return len;
200}
201
202static inline char *buf_get_stringb(struct cbuf *buf, struct cbuf *sbuf)
203{
204 char *ret;
205 u16 len;
206
207 ret = NULL;
208 len = buf_get_int16(buf);
209
210 if (!buf_check_overflow(buf) && buf_check_size(buf, len) &&
211 buf_check_size(sbuf, len+1)) {
212
213 memcpy(sbuf->p, buf->p, len);
214 sbuf->p[len] = 0;
215 ret = sbuf->p;
216 buf->p += len;
217 sbuf->p += len + 1;
218 }
219
220 return ret;
221}
222
223static inline int buf_get_data(struct cbuf *buf, void *data, int datalen)
224{ 182{
225 int ret = 0; 183 vstr->len = buf_get_int16(buf);
226 184 if (!buf_check_overflow(buf) && buf_check_size(buf, vstr->len)) {
227 if (buf_check_size(buf, datalen)) { 185 vstr->str = buf->p;
228 memcpy(data, buf->p, datalen); 186 buf->p += vstr->len;
229 buf->p += datalen; 187 } else {
230 ret = datalen; 188 vstr->len = 0;
189 vstr->str = NULL;
231 } 190 }
232
233 return ret;
234} 191}
235 192
236static inline void *buf_get_datab(struct cbuf *buf, struct cbuf *dbuf, 193static inline void buf_get_qid(struct cbuf *bufp, struct v9fs_qid *qid)
237 int datalen)
238{ 194{
239 char *ret = NULL; 195 qid->type = buf_get_int8(bufp);
240 int n = 0; 196 qid->version = buf_get_int32(bufp);
241 197 qid->path = buf_get_int64(bufp);
242 if (buf_check_size(dbuf, datalen)) {
243 n = buf_get_data(buf, dbuf->p, datalen);
244 if (n > 0) {
245 ret = dbuf->p;
246 dbuf->p += n;
247 }
248 }
249
250 return ret;
251} 198}
252 199
253/** 200/**
254 * v9fs_size_stat - calculate the size of a variable length stat struct 201 * v9fs_size_wstat - calculate the size of a variable length stat struct
255 * @v9ses: session information
256 * @stat: metadata (stat) structure 202 * @stat: metadata (stat) structure
203 * @extended: non-zero if 9P2000.u
257 * 204 *
258 */ 205 */
259 206
260static int v9fs_size_stat(struct v9fs_session_info *v9ses, 207static int v9fs_size_wstat(struct v9fs_wstat *wstat, int extended)
261 struct v9fs_stat *stat)
262{ 208{
263 int size = 0; 209 int size = 0;
264 210
265 if (stat == NULL) { 211 if (wstat == NULL) {
266 eprintk(KERN_ERR, "v9fs_size_stat: got a NULL stat pointer\n"); 212 eprintk(KERN_ERR, "v9fs_size_stat: got a NULL stat pointer\n");
267 return 0; 213 return 0;
268 } 214 }
@@ -279,82 +225,38 @@ static int v9fs_size_stat(struct v9fs_session_info *v9ses,
279 8 + /* length[8] */ 225 8 + /* length[8] */
280 8; /* minimum sum of string lengths */ 226 8; /* minimum sum of string lengths */
281 227
282 if (stat->name) 228 if (wstat->name)
283 size += strlen(stat->name); 229 size += strlen(wstat->name);
284 if (stat->uid) 230 if (wstat->uid)
285 size += strlen(stat->uid); 231 size += strlen(wstat->uid);
286 if (stat->gid) 232 if (wstat->gid)
287 size += strlen(stat->gid); 233 size += strlen(wstat->gid);
288 if (stat->muid) 234 if (wstat->muid)
289 size += strlen(stat->muid); 235 size += strlen(wstat->muid);
290 236
291 if (v9ses->extended) { 237 if (extended) {
292 size += 4 + /* n_uid[4] */ 238 size += 4 + /* n_uid[4] */
293 4 + /* n_gid[4] */ 239 4 + /* n_gid[4] */
294 4 + /* n_muid[4] */ 240 4 + /* n_muid[4] */
295 2; /* string length of extension[4] */ 241 2; /* string length of extension[4] */
296 if (stat->extension) 242 if (wstat->extension)
297 size += strlen(stat->extension); 243 size += strlen(wstat->extension);
298 } 244 }
299 245
300 return size; 246 return size;
301} 247}
302 248
303/** 249/**
304 * serialize_stat - safely format a stat structure for transmission 250 * buf_get_stat - safely decode a recieved metadata (stat) structure
305 * @v9ses: session info
306 * @stat: metadata (stat) structure
307 * @bufp: buffer to serialize structure into
308 *
309 */
310
311static int
312serialize_stat(struct v9fs_session_info *v9ses, struct v9fs_stat *stat,
313 struct cbuf *bufp)
314{
315 buf_put_int16(bufp, stat->size);
316 buf_put_int16(bufp, stat->type);
317 buf_put_int32(bufp, stat->dev);
318 buf_put_int8(bufp, stat->qid.type);
319 buf_put_int32(bufp, stat->qid.version);
320 buf_put_int64(bufp, stat->qid.path);
321 buf_put_int32(bufp, stat->mode);
322 buf_put_int32(bufp, stat->atime);
323 buf_put_int32(bufp, stat->mtime);
324 buf_put_int64(bufp, stat->length);
325
326 buf_put_string(bufp, stat->name);
327 buf_put_string(bufp, stat->uid);
328 buf_put_string(bufp, stat->gid);
329 buf_put_string(bufp, stat->muid);
330
331 if (v9ses->extended) {
332 buf_put_string(bufp, stat->extension);
333 buf_put_int32(bufp, stat->n_uid);
334 buf_put_int32(bufp, stat->n_gid);
335 buf_put_int32(bufp, stat->n_muid);
336 }
337
338 if (buf_check_overflow(bufp))
339 return 0;
340
341 return stat->size;
342}
343
344/**
345 * deserialize_stat - safely decode a recieved metadata (stat) structure
346 * @v9ses: session info
347 * @bufp: buffer to deserialize 251 * @bufp: buffer to deserialize
348 * @stat: metadata (stat) structure 252 * @stat: metadata (stat) structure
349 * @dbufp: buffer to deserialize variable strings into 253 * @extended: non-zero if 9P2000.u
350 * 254 *
351 */ 255 */
352 256
353static inline int 257static inline void
354deserialize_stat(struct v9fs_session_info *v9ses, struct cbuf *bufp, 258buf_get_stat(struct cbuf *bufp, struct v9fs_stat *stat, int extended)
355 struct v9fs_stat *stat, struct cbuf *dbufp)
356{ 259{
357
358 stat->size = buf_get_int16(bufp); 260 stat->size = buf_get_int16(bufp);
359 stat->type = buf_get_int16(bufp); 261 stat->type = buf_get_int16(bufp);
360 stat->dev = buf_get_int32(bufp); 262 stat->dev = buf_get_int32(bufp);
@@ -365,282 +267,82 @@ deserialize_stat(struct v9fs_session_info *v9ses, struct cbuf *bufp,
365 stat->atime = buf_get_int32(bufp); 267 stat->atime = buf_get_int32(bufp);
366 stat->mtime = buf_get_int32(bufp); 268 stat->mtime = buf_get_int32(bufp);
367 stat->length = buf_get_int64(bufp); 269 stat->length = buf_get_int64(bufp);
368 stat->name = buf_get_stringb(bufp, dbufp); 270 buf_get_str(bufp, &stat->name);
369 stat->uid = buf_get_stringb(bufp, dbufp); 271 buf_get_str(bufp, &stat->uid);
370 stat->gid = buf_get_stringb(bufp, dbufp); 272 buf_get_str(bufp, &stat->gid);
371 stat->muid = buf_get_stringb(bufp, dbufp); 273 buf_get_str(bufp, &stat->muid);
372 274
373 if (v9ses->extended) { 275 if (extended) {
374 stat->extension = buf_get_stringb(bufp, dbufp); 276 buf_get_str(bufp, &stat->extension);
375 stat->n_uid = buf_get_int32(bufp); 277 stat->n_uid = buf_get_int32(bufp);
376 stat->n_gid = buf_get_int32(bufp); 278 stat->n_gid = buf_get_int32(bufp);
377 stat->n_muid = buf_get_int32(bufp); 279 stat->n_muid = buf_get_int32(bufp);
378 } 280 }
379
380 if (buf_check_overflow(bufp) || buf_check_overflow(dbufp))
381 return 0;
382
383 return stat->size + 2;
384}
385
386/**
387 * deserialize_statb - wrapper for decoding a received metadata structure
388 * @v9ses: session info
389 * @bufp: buffer to deserialize
390 * @dbufp: buffer to deserialize variable strings into
391 *
392 */
393
394static inline struct v9fs_stat *deserialize_statb(struct v9fs_session_info
395 *v9ses, struct cbuf *bufp,
396 struct cbuf *dbufp)
397{
398 struct v9fs_stat *ret = buf_alloc(dbufp, sizeof(struct v9fs_stat));
399
400 if (ret) {
401 int n = deserialize_stat(v9ses, bufp, ret, dbufp);
402 if (n <= 0)
403 return NULL;
404 }
405
406 return ret;
407} 281}
408 282
409/** 283/**
410 * v9fs_deserialize_stat - decode a received metadata structure 284 * v9fs_deserialize_stat - decode a received metadata structure
411 * @v9ses: session info
412 * @buf: buffer to deserialize 285 * @buf: buffer to deserialize
413 * @buflen: length of received buffer 286 * @buflen: length of received buffer
414 * @stat: metadata structure to decode into 287 * @stat: metadata structure to decode into
415 * @statlen: length of destination metadata structure 288 * @extended: non-zero if 9P2000.u
416 * 289 *
290 * Note: stat will point to the buf region.
417 */ 291 */
418 292
419int 293int
420v9fs_deserialize_stat(struct v9fs_session_info *v9ses, void *buf, 294v9fs_deserialize_stat(void *buf, u32 buflen, struct v9fs_stat *stat,
421 u32 buflen, struct v9fs_stat *stat, u32 statlen) 295 int extended)
422{ 296{
423 struct cbuf buffer; 297 struct cbuf buffer;
424 struct cbuf *bufp = &buffer; 298 struct cbuf *bufp = &buffer;
425 struct cbuf dbuffer; 299 unsigned char *p;
426 struct cbuf *dbufp = &dbuffer;
427 300
428 buf_init(bufp, buf, buflen); 301 buf_init(bufp, buf, buflen);
429 buf_init(dbufp, (char *)stat + sizeof(struct v9fs_stat), 302 p = bufp->p;
430 statlen - sizeof(struct v9fs_stat)); 303 buf_get_stat(bufp, stat, extended);
431
432 return deserialize_stat(v9ses, bufp, stat, dbufp);
433}
434
435static inline int
436v9fs_size_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall)
437{
438 int size = 4 + 1 + 2; /* size[4] msg[1] tag[2] */
439 int i = 0;
440
441 switch (fcall->id) {
442 default:
443 eprintk(KERN_ERR, "bad msg type %d\n", fcall->id);
444 return 0;
445 case TVERSION: /* msize[4] version[s] */
446 size += 4 + 2 + strlen(fcall->params.tversion.version);
447 break;
448 case TAUTH: /* afid[4] uname[s] aname[s] */
449 size += 4 + 2 + strlen(fcall->params.tauth.uname) +
450 2 + strlen(fcall->params.tauth.aname);
451 break;
452 case TFLUSH: /* oldtag[2] */
453 size += 2;
454 break;
455 case TATTACH: /* fid[4] afid[4] uname[s] aname[s] */
456 size += 4 + 4 + 2 + strlen(fcall->params.tattach.uname) +
457 2 + strlen(fcall->params.tattach.aname);
458 break;
459 case TWALK: /* fid[4] newfid[4] nwname[2] nwname*(wname[s]) */
460 size += 4 + 4 + 2;
461 /* now compute total for the array of names */
462 for (i = 0; i < fcall->params.twalk.nwname; i++)
463 size += 2 + strlen(fcall->params.twalk.wnames[i]);
464 break;
465 case TOPEN: /* fid[4] mode[1] */
466 size += 4 + 1;
467 break;
468 case TCREATE: /* fid[4] name[s] perm[4] mode[1] */
469 size += 4 + 2 + strlen(fcall->params.tcreate.name) + 4 + 1;
470 break;
471 case TREAD: /* fid[4] offset[8] count[4] */
472 size += 4 + 8 + 4;
473 break;
474 case TWRITE: /* fid[4] offset[8] count[4] data[count] */
475 size += 4 + 8 + 4 + fcall->params.twrite.count;
476 break;
477 case TCLUNK: /* fid[4] */
478 size += 4;
479 break;
480 case TREMOVE: /* fid[4] */
481 size += 4;
482 break;
483 case TSTAT: /* fid[4] */
484 size += 4;
485 break;
486 case TWSTAT: /* fid[4] stat[n] */
487 fcall->params.twstat.stat->size =
488 v9fs_size_stat(v9ses, fcall->params.twstat.stat);
489 size += 4 + 2 + 2 + fcall->params.twstat.stat->size;
490 }
491 return size;
492}
493
494/*
495 * v9fs_serialize_fcall - marshall fcall struct into a packet
496 * @v9ses: session information
497 * @fcall: structure to convert
498 * @data: buffer to serialize fcall into
499 * @datalen: length of buffer to serialize fcall into
500 *
501 */
502
503int
504v9fs_serialize_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall,
505 void *data, u32 datalen)
506{
507 int i = 0;
508 struct v9fs_stat *stat = NULL;
509 struct cbuf buffer;
510 struct cbuf *bufp = &buffer;
511
512 buf_init(bufp, data, datalen);
513
514 if (!fcall) {
515 eprintk(KERN_ERR, "no fcall\n");
516 return -EINVAL;
517 }
518
519 fcall->size = v9fs_size_fcall(v9ses, fcall);
520
521 buf_put_int32(bufp, fcall->size);
522 buf_put_int8(bufp, fcall->id);
523 buf_put_int16(bufp, fcall->tag);
524
525 dprintk(DEBUG_CONV, "size %d id %d tag %d\n", fcall->size, fcall->id,
526 fcall->tag);
527
528 /* now encode it */
529 switch (fcall->id) {
530 default:
531 eprintk(KERN_ERR, "bad msg type: %d\n", fcall->id);
532 return -EPROTO;
533 case TVERSION:
534 buf_put_int32(bufp, fcall->params.tversion.msize);
535 buf_put_string(bufp, fcall->params.tversion.version);
536 break;
537 case TAUTH:
538 buf_put_int32(bufp, fcall->params.tauth.afid);
539 buf_put_string(bufp, fcall->params.tauth.uname);
540 buf_put_string(bufp, fcall->params.tauth.aname);
541 break;
542 case TFLUSH:
543 buf_put_int16(bufp, fcall->params.tflush.oldtag);
544 break;
545 case TATTACH:
546 buf_put_int32(bufp, fcall->params.tattach.fid);
547 buf_put_int32(bufp, fcall->params.tattach.afid);
548 buf_put_string(bufp, fcall->params.tattach.uname);
549 buf_put_string(bufp, fcall->params.tattach.aname);
550 break;
551 case TWALK:
552 buf_put_int32(bufp, fcall->params.twalk.fid);
553 buf_put_int32(bufp, fcall->params.twalk.newfid);
554 buf_put_int16(bufp, fcall->params.twalk.nwname);
555 for (i = 0; i < fcall->params.twalk.nwname; i++)
556 buf_put_string(bufp, fcall->params.twalk.wnames[i]);
557 break;
558 case TOPEN:
559 buf_put_int32(bufp, fcall->params.topen.fid);
560 buf_put_int8(bufp, fcall->params.topen.mode);
561 break;
562 case TCREATE:
563 buf_put_int32(bufp, fcall->params.tcreate.fid);
564 buf_put_string(bufp, fcall->params.tcreate.name);
565 buf_put_int32(bufp, fcall->params.tcreate.perm);
566 buf_put_int8(bufp, fcall->params.tcreate.mode);
567 break;
568 case TREAD:
569 buf_put_int32(bufp, fcall->params.tread.fid);
570 buf_put_int64(bufp, fcall->params.tread.offset);
571 buf_put_int32(bufp, fcall->params.tread.count);
572 break;
573 case TWRITE:
574 buf_put_int32(bufp, fcall->params.twrite.fid);
575 buf_put_int64(bufp, fcall->params.twrite.offset);
576 buf_put_int32(bufp, fcall->params.twrite.count);
577 buf_put_data(bufp, fcall->params.twrite.data,
578 fcall->params.twrite.count);
579 break;
580 case TCLUNK:
581 buf_put_int32(bufp, fcall->params.tclunk.fid);
582 break;
583 case TREMOVE:
584 buf_put_int32(bufp, fcall->params.tremove.fid);
585 break;
586 case TSTAT:
587 buf_put_int32(bufp, fcall->params.tstat.fid);
588 break;
589 case TWSTAT:
590 buf_put_int32(bufp, fcall->params.twstat.fid);
591 stat = fcall->params.twstat.stat;
592
593 buf_put_int16(bufp, stat->size + 2);
594 serialize_stat(v9ses, stat, bufp);
595 break;
596 }
597 304
598 if (buf_check_overflow(bufp)) 305 if (buf_check_overflow(bufp))
599 return -EIO; 306 return 0;
600 307 else
601 return fcall->size; 308 return bufp->p - p;
602} 309}
603 310
604/** 311/**
605 * deserialize_fcall - unmarshal a response 312 * deserialize_fcall - unmarshal a response
606 * @v9ses: session information
607 * @msgsize: size of rcall message
608 * @buf: recieved buffer 313 * @buf: recieved buffer
609 * @buflen: length of received buffer 314 * @buflen: length of received buffer
610 * @rcall: fcall structure to populate 315 * @rcall: fcall structure to populate
611 * @rcalllen: length of fcall structure to populate 316 * @rcalllen: length of fcall structure to populate
317 * @extended: non-zero if 9P2000.u
612 * 318 *
613 */ 319 */
614 320
615int 321int
616v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize, 322v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall,
617 void *buf, u32 buflen, struct v9fs_fcall *rcall, 323 int extended)
618 int rcalllen)
619{ 324{
620 325
621 struct cbuf buffer; 326 struct cbuf buffer;
622 struct cbuf *bufp = &buffer; 327 struct cbuf *bufp = &buffer;
623 struct cbuf dbuffer;
624 struct cbuf *dbufp = &dbuffer;
625 int i = 0; 328 int i = 0;
626 329
627 buf_init(bufp, buf, buflen); 330 buf_init(bufp, buf, buflen);
628 buf_init(dbufp, (char *)rcall + sizeof(struct v9fs_fcall),
629 rcalllen - sizeof(struct v9fs_fcall));
630 331
631 rcall->size = msgsize; 332 rcall->size = buf_get_int32(bufp);
632 rcall->id = buf_get_int8(bufp); 333 rcall->id = buf_get_int8(bufp);
633 rcall->tag = buf_get_int16(bufp); 334 rcall->tag = buf_get_int16(bufp);
634 335
635 dprintk(DEBUG_CONV, "size %d id %d tag %d\n", rcall->size, rcall->id, 336 dprintk(DEBUG_CONV, "size %d id %d tag %d\n", rcall->size, rcall->id,
636 rcall->tag); 337 rcall->tag);
338
637 switch (rcall->id) { 339 switch (rcall->id) {
638 default: 340 default:
639 eprintk(KERN_ERR, "unknown message type: %d\n", rcall->id); 341 eprintk(KERN_ERR, "unknown message type: %d\n", rcall->id);
640 return -EPROTO; 342 return -EPROTO;
641 case RVERSION: 343 case RVERSION:
642 rcall->params.rversion.msize = buf_get_int32(bufp); 344 rcall->params.rversion.msize = buf_get_int32(bufp);
643 rcall->params.rversion.version = buf_get_stringb(bufp, dbufp); 345 buf_get_str(bufp, &rcall->params.rversion.version);
644 break; 346 break;
645 case RFLUSH: 347 case RFLUSH:
646 break; 348 break;
@@ -651,34 +353,27 @@ v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize,
651 break; 353 break;
652 case RWALK: 354 case RWALK:
653 rcall->params.rwalk.nwqid = buf_get_int16(bufp); 355 rcall->params.rwalk.nwqid = buf_get_int16(bufp);
654 rcall->params.rwalk.wqids = buf_alloc(dbufp, 356 if (rcall->params.rwalk.nwqid > V9FS_MAXWELEM) {
655 rcall->params.rwalk.nwqid * sizeof(struct v9fs_qid)); 357 eprintk(KERN_ERR, "Rwalk with more than %d qids: %d\n",
656 if (rcall->params.rwalk.wqids) 358 V9FS_MAXWELEM, rcall->params.rwalk.nwqid);
657 for (i = 0; i < rcall->params.rwalk.nwqid; i++) { 359 return -EPROTO;
658 rcall->params.rwalk.wqids[i].type = 360 }
659 buf_get_int8(bufp); 361
660 rcall->params.rwalk.wqids[i].version = 362 for (i = 0; i < rcall->params.rwalk.nwqid; i++)
661 buf_get_int16(bufp); 363 buf_get_qid(bufp, &rcall->params.rwalk.wqids[i]);
662 rcall->params.rwalk.wqids[i].path =
663 buf_get_int64(bufp);
664 }
665 break; 364 break;
666 case ROPEN: 365 case ROPEN:
667 rcall->params.ropen.qid.type = buf_get_int8(bufp); 366 buf_get_qid(bufp, &rcall->params.ropen.qid);
668 rcall->params.ropen.qid.version = buf_get_int32(bufp);
669 rcall->params.ropen.qid.path = buf_get_int64(bufp);
670 rcall->params.ropen.iounit = buf_get_int32(bufp); 367 rcall->params.ropen.iounit = buf_get_int32(bufp);
671 break; 368 break;
672 case RCREATE: 369 case RCREATE:
673 rcall->params.rcreate.qid.type = buf_get_int8(bufp); 370 buf_get_qid(bufp, &rcall->params.rcreate.qid);
674 rcall->params.rcreate.qid.version = buf_get_int32(bufp);
675 rcall->params.rcreate.qid.path = buf_get_int64(bufp);
676 rcall->params.rcreate.iounit = buf_get_int32(bufp); 371 rcall->params.rcreate.iounit = buf_get_int32(bufp);
677 break; 372 break;
678 case RREAD: 373 case RREAD:
679 rcall->params.rread.count = buf_get_int32(bufp); 374 rcall->params.rread.count = buf_get_int32(bufp);
680 rcall->params.rread.data = buf_get_datab(bufp, dbufp, 375 rcall->params.rread.data = bufp->p;
681 rcall->params.rread.count); 376 buf_check_size(bufp, rcall->params.rread.count);
682 break; 377 break;
683 case RWRITE: 378 case RWRITE:
684 rcall->params.rwrite.count = buf_get_int32(bufp); 379 rcall->params.rwrite.count = buf_get_int32(bufp);
@@ -689,20 +384,443 @@ v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize,
689 break; 384 break;
690 case RSTAT: 385 case RSTAT:
691 buf_get_int16(bufp); 386 buf_get_int16(bufp);
692 rcall->params.rstat.stat = 387 buf_get_stat(bufp, &rcall->params.rstat.stat, extended);
693 deserialize_statb(v9ses, bufp, dbufp);
694 break; 388 break;
695 case RWSTAT: 389 case RWSTAT:
696 break; 390 break;
697 case RERROR: 391 case RERROR:
698 rcall->params.rerror.error = buf_get_stringb(bufp, dbufp); 392 buf_get_str(bufp, &rcall->params.rerror.error);
699 if (v9ses->extended) 393 if (extended)
700 rcall->params.rerror.errno = buf_get_int16(bufp); 394 rcall->params.rerror.errno = buf_get_int16(bufp);
701 break; 395 break;
702 } 396 }
703 397
704 if (buf_check_overflow(bufp) || buf_check_overflow(dbufp)) 398 if (buf_check_overflow(bufp)) {
399 dprintk(DEBUG_ERROR, "buffer overflow\n");
705 return -EIO; 400 return -EIO;
401 }
402
403 return bufp->p - bufp->sp;
404}
405
406static inline void v9fs_put_int8(struct cbuf *bufp, u8 val, u8 * p)
407{
408 *p = val;
409 buf_put_int8(bufp, val);
410}
411
412static inline void v9fs_put_int16(struct cbuf *bufp, u16 val, u16 * p)
413{
414 *p = val;
415 buf_put_int16(bufp, val);
416}
417
418static inline void v9fs_put_int32(struct cbuf *bufp, u32 val, u32 * p)
419{
420 *p = val;
421 buf_put_int32(bufp, val);
422}
423
424static inline void v9fs_put_int64(struct cbuf *bufp, u64 val, u64 * p)
425{
426 *p = val;
427 buf_put_int64(bufp, val);
428}
706 429
707 return rcall->size; 430static inline void
431v9fs_put_str(struct cbuf *bufp, char *data, struct v9fs_str *str)
432{
433 if (data) {
434 str->len = strlen(data);
435 str->str = bufp->p;
436 } else {
437 str->len = 0;
438 str->str = NULL;
439 }
440
441 buf_put_stringn(bufp, data, str->len);
442}
443
444static inline int
445v9fs_put_user_data(struct cbuf *bufp, const char __user * data, int count,
446 unsigned char **pdata)
447{
448 *pdata = buf_alloc(bufp, count);
449 return copy_from_user(*pdata, data, count);
450}
451
452static void
453v9fs_put_wstat(struct cbuf *bufp, struct v9fs_wstat *wstat,
454 struct v9fs_stat *stat, int statsz, int extended)
455{
456 v9fs_put_int16(bufp, statsz, &stat->size);
457 v9fs_put_int16(bufp, wstat->type, &stat->type);
458 v9fs_put_int32(bufp, wstat->dev, &stat->dev);
459 v9fs_put_int8(bufp, wstat->qid.type, &stat->qid.type);
460 v9fs_put_int32(bufp, wstat->qid.version, &stat->qid.version);
461 v9fs_put_int64(bufp, wstat->qid.path, &stat->qid.path);
462 v9fs_put_int32(bufp, wstat->mode, &stat->mode);
463 v9fs_put_int32(bufp, wstat->atime, &stat->atime);
464 v9fs_put_int32(bufp, wstat->mtime, &stat->mtime);
465 v9fs_put_int64(bufp, wstat->length, &stat->length);
466
467 v9fs_put_str(bufp, wstat->name, &stat->name);
468 v9fs_put_str(bufp, wstat->uid, &stat->uid);
469 v9fs_put_str(bufp, wstat->gid, &stat->gid);
470 v9fs_put_str(bufp, wstat->muid, &stat->muid);
471
472 if (extended) {
473 v9fs_put_str(bufp, wstat->extension, &stat->extension);
474 v9fs_put_int32(bufp, wstat->n_uid, &stat->n_uid);
475 v9fs_put_int32(bufp, wstat->n_gid, &stat->n_gid);
476 v9fs_put_int32(bufp, wstat->n_muid, &stat->n_muid);
477 }
478}
479
480static struct v9fs_fcall *
481v9fs_create_common(struct cbuf *bufp, u32 size, u8 id)
482{
483 struct v9fs_fcall *fc;
484
485 size += 4 + 1 + 2; /* size[4] id[1] tag[2] */
486 fc = kmalloc(sizeof(struct v9fs_fcall) + size, GFP_KERNEL);
487 if (!fc)
488 return ERR_PTR(-ENOMEM);
489
490 fc->sdata = (char *)fc + sizeof(*fc);
491
492 buf_init(bufp, (char *)fc->sdata, size);
493 v9fs_put_int32(bufp, size, &fc->size);
494 v9fs_put_int8(bufp, id, &fc->id);
495 v9fs_put_int16(bufp, V9FS_NOTAG, &fc->tag);
496
497 return fc;
498}
499
500void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag)
501{
502 fc->tag = tag;
503 *(__le16 *) (fc->sdata + 5) = cpu_to_le16(tag);
504}
505
506struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version)
507{
508 int size;
509 struct v9fs_fcall *fc;
510 struct cbuf buffer;
511 struct cbuf *bufp = &buffer;
512
513 size = 4 + 2 + strlen(version); /* msize[4] version[s] */
514 fc = v9fs_create_common(bufp, size, TVERSION);
515 if (IS_ERR(fc))
516 goto error;
517
518 v9fs_put_int32(bufp, msize, &fc->params.tversion.msize);
519 v9fs_put_str(bufp, version, &fc->params.tversion.version);
520
521 if (buf_check_overflow(bufp)) {
522 kfree(fc);
523 fc = ERR_PTR(-ENOMEM);
524 }
525 error:
526 return fc;
527}
528
529struct v9fs_fcall *v9fs_create_tauth(u32 afid, char *uname, char *aname)
530{
531 int size;
532 struct v9fs_fcall *fc;
533 struct cbuf buffer;
534 struct cbuf *bufp = &buffer;
535
536 size = 4 + 2 + strlen(uname) + 2 + strlen(aname); /* afid[4] uname[s] aname[s] */
537 fc = v9fs_create_common(bufp, size, TAUTH);
538 if (IS_ERR(fc))
539 goto error;
540
541 v9fs_put_int32(bufp, afid, &fc->params.tauth.afid);
542 v9fs_put_str(bufp, uname, &fc->params.tauth.uname);
543 v9fs_put_str(bufp, aname, &fc->params.tauth.aname);
544
545 if (buf_check_overflow(bufp)) {
546 kfree(fc);
547 fc = ERR_PTR(-ENOMEM);
548 }
549 error:
550 return fc;
551}
552
553struct v9fs_fcall *
554v9fs_create_tattach(u32 fid, u32 afid, char *uname, char *aname)
555{
556 int size;
557 struct v9fs_fcall *fc;
558 struct cbuf buffer;
559 struct cbuf *bufp = &buffer;
560
561 size = 4 + 4 + 2 + strlen(uname) + 2 + strlen(aname); /* fid[4] afid[4] uname[s] aname[s] */
562 fc = v9fs_create_common(bufp, size, TATTACH);
563 if (IS_ERR(fc))
564 goto error;
565
566 v9fs_put_int32(bufp, fid, &fc->params.tattach.fid);
567 v9fs_put_int32(bufp, afid, &fc->params.tattach.afid);
568 v9fs_put_str(bufp, uname, &fc->params.tattach.uname);
569 v9fs_put_str(bufp, aname, &fc->params.tattach.aname);
570
571 error:
572 return fc;
573}
574
575struct v9fs_fcall *v9fs_create_tflush(u16 oldtag)
576{
577 int size;
578 struct v9fs_fcall *fc;
579 struct cbuf buffer;
580 struct cbuf *bufp = &buffer;
581
582 size = 2; /* oldtag[2] */
583 fc = v9fs_create_common(bufp, size, TFLUSH);
584 if (IS_ERR(fc))
585 goto error;
586
587 v9fs_put_int16(bufp, oldtag, &fc->params.tflush.oldtag);
588
589 if (buf_check_overflow(bufp)) {
590 kfree(fc);
591 fc = ERR_PTR(-ENOMEM);
592 }
593 error:
594 return fc;
595}
596
597struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname,
598 char **wnames)
599{
600 int i, size;
601 struct v9fs_fcall *fc;
602 struct cbuf buffer;
603 struct cbuf *bufp = &buffer;
604
605 if (nwname > V9FS_MAXWELEM) {
606 dprintk(DEBUG_ERROR, "nwname > %d\n", V9FS_MAXWELEM);
607 return NULL;
608 }
609
610 size = 4 + 4 + 2; /* fid[4] newfid[4] nwname[2] ... */
611 for (i = 0; i < nwname; i++) {
612 size += 2 + strlen(wnames[i]); /* wname[s] */
613 }
614
615 fc = v9fs_create_common(bufp, size, TWALK);
616 if (IS_ERR(fc))
617 goto error;
618
619 v9fs_put_int32(bufp, fid, &fc->params.twalk.fid);
620 v9fs_put_int32(bufp, newfid, &fc->params.twalk.newfid);
621 v9fs_put_int16(bufp, nwname, &fc->params.twalk.nwname);
622 for (i = 0; i < nwname; i++) {
623 v9fs_put_str(bufp, wnames[i], &fc->params.twalk.wnames[i]);
624 }
625
626 if (buf_check_overflow(bufp)) {
627 kfree(fc);
628 fc = ERR_PTR(-ENOMEM);
629 }
630 error:
631 return fc;
632}
633
634struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode)
635{
636 int size;
637 struct v9fs_fcall *fc;
638 struct cbuf buffer;
639 struct cbuf *bufp = &buffer;
640
641 size = 4 + 1; /* fid[4] mode[1] */
642 fc = v9fs_create_common(bufp, size, TOPEN);
643 if (IS_ERR(fc))
644 goto error;
645
646 v9fs_put_int32(bufp, fid, &fc->params.topen.fid);
647 v9fs_put_int8(bufp, mode, &fc->params.topen.mode);
648
649 if (buf_check_overflow(bufp)) {
650 kfree(fc);
651 fc = ERR_PTR(-ENOMEM);
652 }
653 error:
654 return fc;
655}
656
657struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode)
658{
659 int size;
660 struct v9fs_fcall *fc;
661 struct cbuf buffer;
662 struct cbuf *bufp = &buffer;
663
664 size = 4 + 2 + strlen(name) + 4 + 1; /* fid[4] name[s] perm[4] mode[1] */
665 fc = v9fs_create_common(bufp, size, TCREATE);
666 if (IS_ERR(fc))
667 goto error;
668
669 v9fs_put_int32(bufp, fid, &fc->params.tcreate.fid);
670 v9fs_put_str(bufp, name, &fc->params.tcreate.name);
671 v9fs_put_int32(bufp, perm, &fc->params.tcreate.perm);
672 v9fs_put_int8(bufp, mode, &fc->params.tcreate.mode);
673
674 if (buf_check_overflow(bufp)) {
675 kfree(fc);
676 fc = ERR_PTR(-ENOMEM);
677 }
678 error:
679 return fc;
680}
681
682struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count)
683{
684 int size;
685 struct v9fs_fcall *fc;
686 struct cbuf buffer;
687 struct cbuf *bufp = &buffer;
688
689 size = 4 + 8 + 4; /* fid[4] offset[8] count[4] */
690 fc = v9fs_create_common(bufp, size, TREAD);
691 if (IS_ERR(fc))
692 goto error;
693
694 v9fs_put_int32(bufp, fid, &fc->params.tread.fid);
695 v9fs_put_int64(bufp, offset, &fc->params.tread.offset);
696 v9fs_put_int32(bufp, count, &fc->params.tread.count);
697
698 if (buf_check_overflow(bufp)) {
699 kfree(fc);
700 fc = ERR_PTR(-ENOMEM);
701 }
702 error:
703 return fc;
704}
705
706struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count,
707 const char __user * data)
708{
709 int size, err;
710 struct v9fs_fcall *fc;
711 struct cbuf buffer;
712 struct cbuf *bufp = &buffer;
713
714 size = 4 + 8 + 4 + count; /* fid[4] offset[8] count[4] data[count] */
715 fc = v9fs_create_common(bufp, size, TWRITE);
716 if (IS_ERR(fc))
717 goto error;
718
719 v9fs_put_int32(bufp, fid, &fc->params.twrite.fid);
720 v9fs_put_int64(bufp, offset, &fc->params.twrite.offset);
721 v9fs_put_int32(bufp, count, &fc->params.twrite.count);
722 err = v9fs_put_user_data(bufp, data, count, &fc->params.twrite.data);
723 if (err) {
724 kfree(fc);
725 fc = ERR_PTR(err);
726 }
727
728 if (buf_check_overflow(bufp)) {
729 kfree(fc);
730 fc = ERR_PTR(-ENOMEM);
731 }
732 error:
733 return fc;
734}
735
736struct v9fs_fcall *v9fs_create_tclunk(u32 fid)
737{
738 int size;
739 struct v9fs_fcall *fc;
740 struct cbuf buffer;
741 struct cbuf *bufp = &buffer;
742
743 size = 4; /* fid[4] */
744 fc = v9fs_create_common(bufp, size, TCLUNK);
745 if (IS_ERR(fc))
746 goto error;
747
748 v9fs_put_int32(bufp, fid, &fc->params.tclunk.fid);
749
750 if (buf_check_overflow(bufp)) {
751 kfree(fc);
752 fc = ERR_PTR(-ENOMEM);
753 }
754 error:
755 return fc;
756}
757
758struct v9fs_fcall *v9fs_create_tremove(u32 fid)
759{
760 int size;
761 struct v9fs_fcall *fc;
762 struct cbuf buffer;
763 struct cbuf *bufp = &buffer;
764
765 size = 4; /* fid[4] */
766 fc = v9fs_create_common(bufp, size, TREMOVE);
767 if (IS_ERR(fc))
768 goto error;
769
770 v9fs_put_int32(bufp, fid, &fc->params.tremove.fid);
771
772 if (buf_check_overflow(bufp)) {
773 kfree(fc);
774 fc = ERR_PTR(-ENOMEM);
775 }
776 error:
777 return fc;
778}
779
780struct v9fs_fcall *v9fs_create_tstat(u32 fid)
781{
782 int size;
783 struct v9fs_fcall *fc;
784 struct cbuf buffer;
785 struct cbuf *bufp = &buffer;
786
787 size = 4; /* fid[4] */
788 fc = v9fs_create_common(bufp, size, TSTAT);
789 if (IS_ERR(fc))
790 goto error;
791
792 v9fs_put_int32(bufp, fid, &fc->params.tstat.fid);
793
794 if (buf_check_overflow(bufp)) {
795 kfree(fc);
796 fc = ERR_PTR(-ENOMEM);
797 }
798 error:
799 return fc;
800}
801
802struct v9fs_fcall *v9fs_create_twstat(u32 fid, struct v9fs_wstat *wstat,
803 int extended)
804{
805 int size, statsz;
806 struct v9fs_fcall *fc;
807 struct cbuf buffer;
808 struct cbuf *bufp = &buffer;
809
810 statsz = v9fs_size_wstat(wstat, extended);
811 size = 4 + 2 + 2 + statsz; /* fid[4] stat[n] */
812 fc = v9fs_create_common(bufp, size, TWSTAT);
813 if (IS_ERR(fc))
814 goto error;
815
816 v9fs_put_int32(bufp, fid, &fc->params.twstat.fid);
817 buf_put_int16(bufp, statsz + 2);
818 v9fs_put_wstat(bufp, wstat, &fc->params.twstat.stat, statsz, extended);
819
820 if (buf_check_overflow(bufp)) {
821 kfree(fc);
822 fc = ERR_PTR(-ENOMEM);
823 }
824 error:
825 return fc;
708} 826}
diff --git a/fs/9p/conv.h b/fs/9p/conv.h
index ee849613c61a..26a736e4a2e7 100644
--- a/fs/9p/conv.h
+++ b/fs/9p/conv.h
@@ -1,8 +1,9 @@
1/* 1/*
2 * linux/fs/9p/conv.h 2 * linux/fs/9p/conv.h
3 * 3 *
4 * 9P protocol conversion definitions 4 * 9P protocol conversion definitions.
5 * 5 *
6 * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> 7 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> 8 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
8 * 9 *
@@ -24,13 +25,27 @@
24 * 25 *
25 */ 26 */
26 27
27int v9fs_deserialize_stat(struct v9fs_session_info *, void *buf, 28int v9fs_deserialize_stat(void *buf, u32 buflen, struct v9fs_stat *stat,
28 u32 buflen, struct v9fs_stat *stat, u32 statlen); 29 int extended);
29int v9fs_serialize_fcall(struct v9fs_session_info *, struct v9fs_fcall *tcall, 30int v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall,
30 void *buf, u32 buflen); 31 int extended);
31int v9fs_deserialize_fcall(struct v9fs_session_info *, u32 msglen,
32 void *buf, u32 buflen, struct v9fs_fcall *rcall,
33 int rcalllen);
34 32
35/* this one is actually in error.c right now */ 33void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag);
36int v9fs_errstr2errno(char *errstr); 34
35struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version);
36struct v9fs_fcall *v9fs_create_tauth(u32 afid, char *uname, char *aname);
37struct v9fs_fcall *v9fs_create_tattach(u32 fid, u32 afid, char *uname,
38 char *aname);
39struct v9fs_fcall *v9fs_create_tflush(u16 oldtag);
40struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname,
41 char **wnames);
42struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode);
43struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode);
44struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count);
45struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count,
46 const char __user *data);
47struct v9fs_fcall *v9fs_create_tclunk(u32 fid);
48struct v9fs_fcall *v9fs_create_tremove(u32 fid);
49struct v9fs_fcall *v9fs_create_tstat(u32 fid);
50struct v9fs_fcall *v9fs_create_twstat(u32 fid, struct v9fs_wstat *wstat,
51 int extended);
diff --git a/fs/9p/debug.h b/fs/9p/debug.h
index 4445f06919d9..fe551032788b 100644
--- a/fs/9p/debug.h
+++ b/fs/9p/debug.h
@@ -51,16 +51,23 @@ do { \
51#if DEBUG_DUMP_PKT 51#if DEBUG_DUMP_PKT
52static inline void dump_data(const unsigned char *data, unsigned int datalen) 52static inline void dump_data(const unsigned char *data, unsigned int datalen)
53{ 53{
54 int i, j; 54 int i, n;
55 int len = datalen; 55 char buf[5*8];
56 56
57 printk(KERN_DEBUG "data "); 57 n = 0;
58 for (i = 0; i < len; i += 4) { 58 i = 0;
59 for (j = 0; (j < 4) && (i + j < len); j++) 59 while (i < datalen) {
60 printk(KERN_DEBUG "%02x", data[i + j]); 60 n += snprintf(buf+n, sizeof(buf)-n, "%02x", data[i++]);
61 printk(KERN_DEBUG " "); 61 if (i%4 == 0)
62 n += snprintf(buf+n, sizeof(buf)-n, " ");
63
64 if (i%16 == 0) {
65 dprintk(DEBUG_ERROR, "%s\n", buf);
66 n = 0;
67 }
62 } 68 }
63 printk(KERN_DEBUG "\n"); 69
70 dprintk(DEBUG_ERROR, "%s\n", buf);
64} 71}
65#else /* DEBUG_DUMP_PKT */ 72#else /* DEBUG_DUMP_PKT */
66static inline void dump_data(const unsigned char *data, unsigned int datalen) 73static inline void dump_data(const unsigned char *data, unsigned int datalen)
diff --git a/fs/9p/error.c b/fs/9p/error.c
index 834cb179e388..e4b6f8f38b6f 100644
--- a/fs/9p/error.c
+++ b/fs/9p/error.c
@@ -33,7 +33,6 @@
33 33
34#include <linux/list.h> 34#include <linux/list.h>
35#include <linux/jhash.h> 35#include <linux/jhash.h>
36#include <linux/string.h>
37 36
38#include "debug.h" 37#include "debug.h"
39#include "error.h" 38#include "error.h"
@@ -55,7 +54,8 @@ int v9fs_error_init(void)
55 54
56 /* load initial error map into hash table */ 55 /* load initial error map into hash table */
57 for (c = errmap; c->name != NULL; c++) { 56 for (c = errmap; c->name != NULL; c++) {
58 bucket = jhash(c->name, strlen(c->name), 0) % ERRHASHSZ; 57 c->namelen = strlen(c->name);
58 bucket = jhash(c->name, c->namelen, 0) % ERRHASHSZ;
59 INIT_HLIST_NODE(&c->list); 59 INIT_HLIST_NODE(&c->list);
60 hlist_add_head(&c->list, &hash_errmap[bucket]); 60 hlist_add_head(&c->list, &hash_errmap[bucket]);
61 } 61 }
@@ -69,15 +69,15 @@ int v9fs_error_init(void)
69 * 69 *
70 */ 70 */
71 71
72int v9fs_errstr2errno(char *errstr) 72int v9fs_errstr2errno(char *errstr, int len)
73{ 73{
74 int errno = 0; 74 int errno = 0;
75 struct hlist_node *p = NULL; 75 struct hlist_node *p = NULL;
76 struct errormap *c = NULL; 76 struct errormap *c = NULL;
77 int bucket = jhash(errstr, strlen(errstr), 0) % ERRHASHSZ; 77 int bucket = jhash(errstr, len, 0) % ERRHASHSZ;
78 78
79 hlist_for_each_entry(c, p, &hash_errmap[bucket], list) { 79 hlist_for_each_entry(c, p, &hash_errmap[bucket], list) {
80 if (!strcmp(c->name, errstr)) { 80 if (c->namelen==len && !memcmp(c->name, errstr, len)) {
81 errno = c->val; 81 errno = c->val;
82 break; 82 break;
83 } 83 }
diff --git a/fs/9p/error.h b/fs/9p/error.h
index 78f89acf7c9a..a9794e85fe51 100644
--- a/fs/9p/error.h
+++ b/fs/9p/error.h
@@ -36,6 +36,7 @@ struct errormap {
36 char *name; 36 char *name;
37 int val; 37 int val;
38 38
39 int namelen;
39 struct hlist_node list; 40 struct hlist_node list;
40}; 41};
41 42
@@ -175,4 +176,3 @@ static struct errormap errmap[] = {
175}; 176};
176 177
177extern int v9fs_error_init(void); 178extern int v9fs_error_init(void);
178extern int v9fs_errstr2errno(char *errstr);
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index d95f8626d170..eda449778fa5 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -31,9 +31,6 @@
31#include "v9fs.h" 31#include "v9fs.h"
32#include "9p.h" 32#include "9p.h"
33#include "v9fs_vfs.h" 33#include "v9fs_vfs.h"
34#include "transport.h"
35#include "mux.h"
36#include "conv.h"
37#include "fid.h" 34#include "fid.h"
38 35
39/** 36/**
@@ -164,7 +161,7 @@ static struct v9fs_fid *v9fs_fid_walk_up(struct dentry *dentry)
164 return v9fs_fid_create(dentry, v9ses, fidnum, 0); 161 return v9fs_fid_create(dentry, v9ses, fidnum, 0);
165 162
166clunk_fid: 163clunk_fid:
167 v9fs_t_clunk(v9ses, fidnum, NULL); 164 v9fs_t_clunk(v9ses, fidnum);
168 return ERR_PTR(err); 165 return ERR_PTR(err);
169} 166}
170 167
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
index 8835b576f744..945cb368d451 100644
--- a/fs/9p/mux.c
+++ b/fs/9p/mux.c
@@ -4,7 +4,7 @@
4 * Protocol Multiplexer 4 * Protocol Multiplexer
5 * 5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> 6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net> 7 * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
8 * 8 *
9 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by 10 * it under the terms of the GNU General Public License as published by
@@ -28,448 +28,943 @@
28#include <linux/module.h> 28#include <linux/module.h>
29#include <linux/errno.h> 29#include <linux/errno.h>
30#include <linux/fs.h> 30#include <linux/fs.h>
31#include <linux/poll.h>
31#include <linux/kthread.h> 32#include <linux/kthread.h>
32#include <linux/idr.h> 33#include <linux/idr.h>
33 34
34#include "debug.h" 35#include "debug.h"
35#include "v9fs.h" 36#include "v9fs.h"
36#include "9p.h" 37#include "9p.h"
37#include "transport.h"
38#include "conv.h" 38#include "conv.h"
39#include "transport.h"
39#include "mux.h" 40#include "mux.h"
40 41
42#define ERREQFLUSH 1
43#define SCHED_TIMEOUT 10
44#define MAXPOLLWADDR 2
45
46enum {
47 Rworksched = 1, /* read work scheduled or running */
48 Rpending = 2, /* can read */
49 Wworksched = 4, /* write work scheduled or running */
50 Wpending = 8, /* can write */
51};
52
53struct v9fs_mux_poll_task;
54
55struct v9fs_req {
56 int tag;
57 struct v9fs_fcall *tcall;
58 struct v9fs_fcall *rcall;
59 int err;
60 v9fs_mux_req_callback cb;
61 void *cba;
62 struct list_head req_list;
63};
64
65struct v9fs_mux_data {
66 spinlock_t lock;
67 struct list_head mux_list;
68 struct v9fs_mux_poll_task *poll_task;
69 int msize;
70 unsigned char *extended;
71 struct v9fs_transport *trans;
72 struct v9fs_idpool tidpool;
73 int err;
74 wait_queue_head_t equeue;
75 struct list_head req_list;
76 struct list_head unsent_req_list;
77 struct v9fs_fcall *rcall;
78 int rpos;
79 char *rbuf;
80 int wpos;
81 int wsize;
82 char *wbuf;
83 wait_queue_t poll_wait[MAXPOLLWADDR];
84 wait_queue_head_t *poll_waddr[MAXPOLLWADDR];
85 poll_table pt;
86 struct work_struct rq;
87 struct work_struct wq;
88 unsigned long wsched;
89};
90
91struct v9fs_mux_poll_task {
92 struct task_struct *task;
93 struct list_head mux_list;
94 int muxnum;
95};
96
97struct v9fs_mux_rpc {
98 struct v9fs_mux_data *m;
99 struct v9fs_req *req;
100 int err;
101 struct v9fs_fcall *rcall;
102 wait_queue_head_t wqueue;
103};
104
105static int v9fs_poll_proc(void *);
106static void v9fs_read_work(void *);
107static void v9fs_write_work(void *);
108static void v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address,
109 poll_table * p);
110static u16 v9fs_mux_get_tag(struct v9fs_mux_data *);
111static void v9fs_mux_put_tag(struct v9fs_mux_data *, u16);
112
113static DECLARE_MUTEX(v9fs_mux_task_lock);
114static struct workqueue_struct *v9fs_mux_wq;
115
116static int v9fs_mux_num;
117static int v9fs_mux_poll_task_num;
118static struct v9fs_mux_poll_task v9fs_mux_poll_tasks[100];
119
120int v9fs_mux_global_init(void)
121{
122 int i;
123
124 for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++)
125 v9fs_mux_poll_tasks[i].task = NULL;
126
127 v9fs_mux_wq = create_workqueue("v9fs");
128 if (!v9fs_mux_wq)
129 return -ENOMEM;
130
131 return 0;
132}
133
134void v9fs_mux_global_exit(void)
135{
136 destroy_workqueue(v9fs_mux_wq);
137}
138
41/** 139/**
42 * dprintcond - print condition of session info 140 * v9fs_mux_calc_poll_procs - calculates the number of polling procs
43 * @v9ses: session info structure 141 * based on the number of mounted v9fs filesystems.
44 * @req: RPC request structure
45 * 142 *
143 * The current implementation returns sqrt of the number of mounts.
46 */ 144 */
145inline int v9fs_mux_calc_poll_procs(int muxnum)
146{
147 int n;
148
149 if (v9fs_mux_poll_task_num)
150 n = muxnum / v9fs_mux_poll_task_num +
151 (muxnum % v9fs_mux_poll_task_num ? 1 : 0);
152 else
153 n = 1;
154
155 if (n > ARRAY_SIZE(v9fs_mux_poll_tasks))
156 n = ARRAY_SIZE(v9fs_mux_poll_tasks);
157
158 return n;
159}
47 160
48static inline int 161static int v9fs_mux_poll_start(struct v9fs_mux_data *m)
49dprintcond(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
50{ 162{
51 dprintk(DEBUG_MUX, "condition: %d, %p\n", v9ses->transport->status, 163 int i, n;
52 req->rcall); 164 struct v9fs_mux_poll_task *vpt, *vptlast;
165 struct task_struct *pproc;
166
167 dprintk(DEBUG_MUX, "mux %p muxnum %d procnum %d\n", m, v9fs_mux_num,
168 v9fs_mux_poll_task_num);
169 up(&v9fs_mux_task_lock);
170
171 n = v9fs_mux_calc_poll_procs(v9fs_mux_num + 1);
172 if (n > v9fs_mux_poll_task_num) {
173 for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) {
174 if (v9fs_mux_poll_tasks[i].task == NULL) {
175 vpt = &v9fs_mux_poll_tasks[i];
176 dprintk(DEBUG_MUX, "create proc %p\n", vpt);
177 pproc = kthread_create(v9fs_poll_proc, vpt,
178 "v9fs-poll");
179
180 if (!IS_ERR(pproc)) {
181 vpt->task = pproc;
182 INIT_LIST_HEAD(&vpt->mux_list);
183 vpt->muxnum = 0;
184 v9fs_mux_poll_task_num++;
185 wake_up_process(vpt->task);
186 }
187 break;
188 }
189 }
190
191 if (i >= ARRAY_SIZE(v9fs_mux_poll_tasks))
192 dprintk(DEBUG_ERROR, "warning: no free poll slots\n");
193 }
194
195 n = (v9fs_mux_num + 1) / v9fs_mux_poll_task_num +
196 ((v9fs_mux_num + 1) % v9fs_mux_poll_task_num ? 1 : 0);
197
198 vptlast = NULL;
199 for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) {
200 vpt = &v9fs_mux_poll_tasks[i];
201 if (vpt->task != NULL) {
202 vptlast = vpt;
203 if (vpt->muxnum < n) {
204 dprintk(DEBUG_MUX, "put in proc %d\n", i);
205 list_add(&m->mux_list, &vpt->mux_list);
206 vpt->muxnum++;
207 m->poll_task = vpt;
208 memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
209 init_poll_funcptr(&m->pt, v9fs_pollwait);
210 break;
211 }
212 }
213 }
214
215 if (i >= ARRAY_SIZE(v9fs_mux_poll_tasks)) {
216 if (vptlast == NULL)
217 return -ENOMEM;
218
219 dprintk(DEBUG_MUX, "put in proc %d\n", i);
220 list_add(&m->mux_list, &vptlast->mux_list);
221 vptlast->muxnum++;
222 m->poll_task = vptlast;
223 memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
224 init_poll_funcptr(&m->pt, v9fs_pollwait);
225 }
226
227 v9fs_mux_num++;
228 down(&v9fs_mux_task_lock);
229
53 return 0; 230 return 0;
54} 231}
55 232
233static void v9fs_mux_poll_stop(struct v9fs_mux_data *m)
234{
235 int i;
236 struct v9fs_mux_poll_task *vpt;
237
238 up(&v9fs_mux_task_lock);
239 vpt = m->poll_task;
240 list_del(&m->mux_list);
241 for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
242 if (m->poll_waddr[i] != NULL) {
243 remove_wait_queue(m->poll_waddr[i], &m->poll_wait[i]);
244 m->poll_waddr[i] = NULL;
245 }
246 }
247 vpt->muxnum--;
248 if (!vpt->muxnum) {
249 dprintk(DEBUG_MUX, "destroy proc %p\n", vpt);
250 send_sig(SIGKILL, vpt->task, 1);
251 vpt->task = NULL;
252 v9fs_mux_poll_task_num--;
253 }
254 v9fs_mux_num--;
255 down(&v9fs_mux_task_lock);
256}
257
56/** 258/**
57 * xread - force read of a certain number of bytes 259 * v9fs_mux_init - allocate and initialize the per-session mux data
58 * @v9ses: session info structure 260 * Creates the polling task if this is the first session.
59 * @ptr: pointer to buffer
60 * @sz: number of bytes to read
61 * 261 *
62 * Chuck Cranor CS-533 project1 262 * @trans - transport structure
263 * @msize - maximum message size
264 * @extended - pointer to the extended flag
63 */ 265 */
64 266struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize,
65static int xread(struct v9fs_session_info *v9ses, void *ptr, unsigned long sz) 267 unsigned char *extended)
66{ 268{
67 int rd = 0; 269 int i, n;
68 int ret = 0; 270 struct v9fs_mux_data *m, *mtmp;
69 while (rd < sz) { 271
70 ret = v9ses->transport->read(v9ses->transport, ptr, sz - rd); 272 dprintk(DEBUG_MUX, "transport %p msize %d\n", trans, msize);
71 if (ret <= 0) { 273 m = kmalloc(sizeof(struct v9fs_mux_data), GFP_KERNEL);
72 dprintk(DEBUG_ERROR, "xread errno %d\n", ret); 274 if (!m)
73 return ret; 275 return ERR_PTR(-ENOMEM);
276
277 spin_lock_init(&m->lock);
278 INIT_LIST_HEAD(&m->mux_list);
279 m->msize = msize;
280 m->extended = extended;
281 m->trans = trans;
282 idr_init(&m->tidpool.pool);
283 init_MUTEX(&m->tidpool.lock);
284 m->err = 0;
285 init_waitqueue_head(&m->equeue);
286 INIT_LIST_HEAD(&m->req_list);
287 INIT_LIST_HEAD(&m->unsent_req_list);
288 m->rcall = NULL;
289 m->rpos = 0;
290 m->rbuf = NULL;
291 m->wpos = m->wsize = 0;
292 m->wbuf = NULL;
293 INIT_WORK(&m->rq, v9fs_read_work, m);
294 INIT_WORK(&m->wq, v9fs_write_work, m);
295 m->wsched = 0;
296 memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
297 m->poll_task = NULL;
298 n = v9fs_mux_poll_start(m);
299 if (n)
300 return ERR_PTR(n);
301
302 n = trans->poll(trans, &m->pt);
303 if (n & POLLIN) {
304 dprintk(DEBUG_MUX, "mux %p can read\n", m);
305 set_bit(Rpending, &m->wsched);
306 }
307
308 if (n & POLLOUT) {
309 dprintk(DEBUG_MUX, "mux %p can write\n", m);
310 set_bit(Wpending, &m->wsched);
311 }
312
313 for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
314 if (IS_ERR(m->poll_waddr[i])) {
315 v9fs_mux_poll_stop(m);
316 mtmp = (void *)m->poll_waddr; /* the error code */
317 kfree(m);
318 m = mtmp;
319 break;
74 } 320 }
75 rd += ret;
76 ptr += ret;
77 } 321 }
78 return (rd); 322
323 return m;
79} 324}
80 325
81/** 326/**
82 * read_message - read a full 9P2000 fcall packet 327 * v9fs_mux_destroy - cancels all pending requests and frees mux resources
83 * @v9ses: session info structure
84 * @rcall: fcall structure to read into
85 * @rcalllen: size of fcall buffer
86 *
87 */ 328 */
329void v9fs_mux_destroy(struct v9fs_mux_data *m)
330{
331 dprintk(DEBUG_MUX, "mux %p prev %p next %p\n", m,
332 m->mux_list.prev, m->mux_list.next);
333 v9fs_mux_cancel(m, -ECONNRESET);
334
335 if (!list_empty(&m->req_list)) {
336 /* wait until all processes waiting on this session exit */
337 dprintk(DEBUG_MUX, "mux %p waiting for empty request queue\n",
338 m);
339 wait_event_timeout(m->equeue, (list_empty(&m->req_list)), 5000);
340 dprintk(DEBUG_MUX, "mux %p request queue empty: %d\n", m,
341 list_empty(&m->req_list));
342 }
343
344 v9fs_mux_poll_stop(m);
345 m->trans = NULL;
346
347 kfree(m);
348}
88 349
89static int 350/**
90read_message(struct v9fs_session_info *v9ses, 351 * v9fs_pollwait - called by files poll operation to add v9fs-poll task
91 struct v9fs_fcall *rcall, int rcalllen) 352 * to files wait queue
353 */
354static void
355v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address,
356 poll_table * p)
92{ 357{
93 unsigned char buf[4]; 358 int i;
94 void *data; 359 struct v9fs_mux_data *m;
95 int size = 0; 360
96 int res = 0; 361 m = container_of(p, struct v9fs_mux_data, pt);
97 362 for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++)
98 res = xread(v9ses, buf, sizeof(buf)); 363 if (m->poll_waddr[i] == NULL)
99 if (res < 0) { 364 break;
100 dprintk(DEBUG_ERROR, 365
101 "Reading of count field failed returned: %d\n", res); 366 if (i >= ARRAY_SIZE(m->poll_waddr)) {
102 return res; 367 dprintk(DEBUG_ERROR, "not enough wait_address slots\n");
368 return;
103 } 369 }
104 370
105 if (res < 4) { 371 m->poll_waddr[i] = wait_address;
106 dprintk(DEBUG_ERROR, 372
107 "Reading of count field failed returned: %d\n", res); 373 if (!wait_address) {
108 return -EIO; 374 dprintk(DEBUG_ERROR, "no wait_address\n");
375 m->poll_waddr[i] = ERR_PTR(-EIO);
376 return;
109 } 377 }
110 378
111 size = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24); 379 init_waitqueue_entry(&m->poll_wait[i], m->poll_task->task);
112 dprintk(DEBUG_MUX, "got a packet count: %d\n", size); 380 add_wait_queue(wait_address, &m->poll_wait[i]);
381}
382
383/**
384 * v9fs_poll_mux - polls a mux and schedules read or write works if necessary
385 */
386static inline void v9fs_poll_mux(struct v9fs_mux_data *m)
387{
388 int n;
113 389
114 /* adjust for the four bytes of size */ 390 if (m->err < 0)
115 size -= 4; 391 return;
116 392
117 if (size > v9ses->maxdata) { 393 n = m->trans->poll(m->trans, NULL);
118 dprintk(DEBUG_ERROR, "packet too big: %d\n", size); 394 if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) {
119 return -E2BIG; 395 dprintk(DEBUG_MUX, "error mux %p err %d\n", m, n);
396 if (n >= 0)
397 n = -ECONNRESET;
398 v9fs_mux_cancel(m, n);
120 } 399 }
121 400
122 data = kmalloc(size, GFP_KERNEL); 401 if (n & POLLIN) {
123 if (!data) { 402 set_bit(Rpending, &m->wsched);
124 eprintk(KERN_WARNING, "out of memory\n"); 403 dprintk(DEBUG_MUX, "mux %p can read\n", m);
125 return -ENOMEM; 404 if (!test_and_set_bit(Rworksched, &m->wsched)) {
405 dprintk(DEBUG_MUX, "schedule read work mux %p\n", m);
406 queue_work(v9fs_mux_wq, &m->rq);
407 }
126 } 408 }
127 409
128 res = xread(v9ses, data, size); 410 if (n & POLLOUT) {
129 if (res < size) { 411 set_bit(Wpending, &m->wsched);
130 dprintk(DEBUG_ERROR, "Reading of fcall failed returned: %d\n", 412 dprintk(DEBUG_MUX, "mux %p can write\n", m);
131 res); 413 if ((m->wsize || !list_empty(&m->unsent_req_list))
132 kfree(data); 414 && !test_and_set_bit(Wworksched, &m->wsched)) {
133 return res; 415 dprintk(DEBUG_MUX, "schedule write work mux %p\n", m);
416 queue_work(v9fs_mux_wq, &m->wq);
417 }
134 } 418 }
419}
420
421/**
422 * v9fs_poll_proc - polls all v9fs transports for new events and queues
423 * the appropriate work to the work queue
424 */
425static int v9fs_poll_proc(void *a)
426{
427 struct v9fs_mux_data *m, *mtmp;
428 struct v9fs_mux_poll_task *vpt;
135 429
136 /* we now have an in-memory string that is the reply. 430 vpt = a;
137 * deserialize it. There is very little to go wrong at this point 431 dprintk(DEBUG_MUX, "start %p %p\n", current, vpt);
138 * save for v9fs_alloc errors. 432 allow_signal(SIGKILL);
139 */ 433 while (!kthread_should_stop()) {
140 res = v9fs_deserialize_fcall(v9ses, size, data, v9ses->maxdata, 434 set_current_state(TASK_INTERRUPTIBLE);
141 rcall, rcalllen); 435 if (signal_pending(current))
436 break;
142 437
143 kfree(data); 438 list_for_each_entry_safe(m, mtmp, &vpt->mux_list, mux_list) {
439 v9fs_poll_mux(m);
440 }
144 441
145 if (res < 0) 442 dprintk(DEBUG_MUX, "sleeping...\n");
146 return res; 443 schedule_timeout(SCHED_TIMEOUT * HZ);
444 }
147 445
446 __set_current_state(TASK_RUNNING);
447 dprintk(DEBUG_MUX, "finish\n");
148 return 0; 448 return 0;
149} 449}
150 450
151/** 451/**
152 * v9fs_recv - receive an RPC response for a particular tag 452 * v9fs_write_work - called when a transport can send some data
153 * @v9ses: session info structure
154 * @req: RPC request structure
155 *
156 */ 453 */
157 454static void v9fs_write_work(void *a)
158static int v9fs_recv(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
159{ 455{
160 int ret = 0; 456 int n, err;
457 struct v9fs_mux_data *m;
458 struct v9fs_req *req;
161 459
162 dprintk(DEBUG_MUX, "waiting for response: %d\n", req->tcall->tag); 460 m = a;
163 ret = wait_event_interruptible(v9ses->read_wait,
164 ((v9ses->transport->status != Connected) ||
165 (req->rcall != 0) || (req->err < 0) ||
166 dprintcond(v9ses, req)));
167 461
168 dprintk(DEBUG_MUX, "got it: rcall %p\n", req->rcall); 462 if (m->err < 0) {
463 clear_bit(Wworksched, &m->wsched);
464 return;
465 }
169 466
170 spin_lock(&v9ses->muxlock); 467 if (!m->wsize) {
171 list_del(&req->next); 468 if (list_empty(&m->unsent_req_list)) {
172 spin_unlock(&v9ses->muxlock); 469 clear_bit(Wworksched, &m->wsched);
470 return;
471 }
173 472
174 if (req->err < 0) 473 spin_lock(&m->lock);
175 return req->err; 474 req =
475 list_entry(m->unsent_req_list.next, struct v9fs_req,
476 req_list);
477 list_move_tail(&req->req_list, &m->req_list);
478 m->wbuf = req->tcall->sdata;
479 m->wsize = req->tcall->size;
480 m->wpos = 0;
481 dump_data(m->wbuf, m->wsize);
482 spin_unlock(&m->lock);
483 }
176 484
177 if (v9ses->transport->status == Disconnected) 485 dprintk(DEBUG_MUX, "mux %p pos %d size %d\n", m, m->wpos, m->wsize);
178 return -ECONNRESET; 486 clear_bit(Wpending, &m->wsched);
487 err = m->trans->write(m->trans, m->wbuf + m->wpos, m->wsize - m->wpos);
488 dprintk(DEBUG_MUX, "mux %p sent %d bytes\n", m, err);
489 if (err == -EAGAIN) {
490 clear_bit(Wworksched, &m->wsched);
491 return;
492 }
179 493
180 return ret; 494 if (err <= 0)
181} 495 goto error;
182 496
183/** 497 m->wpos += err;
184 * v9fs_send - send a 9P request 498 if (m->wpos == m->wsize)
185 * @v9ses: session info structure 499 m->wpos = m->wsize = 0;
186 * @req: RPC request to send 500
187 * 501 if (m->wsize == 0 && !list_empty(&m->unsent_req_list)) {
188 */ 502 if (test_and_clear_bit(Wpending, &m->wsched))
503 n = POLLOUT;
504 else
505 n = m->trans->poll(m->trans, NULL);
506
507 if (n & POLLOUT) {
508 dprintk(DEBUG_MUX, "schedule write work mux %p\n", m);
509 queue_work(v9fs_mux_wq, &m->wq);
510 } else
511 clear_bit(Wworksched, &m->wsched);
512 } else
513 clear_bit(Wworksched, &m->wsched);
514
515 return;
189 516
190static int v9fs_send(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req) 517 error:
518 v9fs_mux_cancel(m, err);
519 clear_bit(Wworksched, &m->wsched);
520}
521
522static void process_request(struct v9fs_mux_data *m, struct v9fs_req *req)
191{ 523{
192 int ret = -1; 524 int ecode, tag;
193 void *data = NULL; 525 struct v9fs_str *ename;
194 struct v9fs_fcall *tcall = req->tcall;
195 526
196 data = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL); 527 tag = req->tag;
197 if (!data) 528 if (req->rcall->id == RERROR && !req->err) {
198 return -ENOMEM; 529 ecode = req->rcall->params.rerror.errno;
530 ename = &req->rcall->params.rerror.error;
199 531
200 tcall->size = 0; /* enforce size recalculation */ 532 dprintk(DEBUG_MUX, "Rerror %.*s\n", ename->len, ename->str);
201 ret =
202 v9fs_serialize_fcall(v9ses, tcall, data,
203 v9ses->maxdata + V9FS_IOHDRSZ);
204 if (ret < 0)
205 goto free_data;
206 533
207 spin_lock(&v9ses->muxlock); 534 if (*m->extended)
208 list_add(&req->next, &v9ses->mux_fcalls); 535 req->err = -ecode;
209 spin_unlock(&v9ses->muxlock);
210 536
211 dprintk(DEBUG_MUX, "sending message: tag %d size %d\n", tcall->tag, 537 if (!req->err) {
212 tcall->size); 538 req->err = v9fs_errstr2errno(ename->str, ename->len);
213 ret = v9ses->transport->write(v9ses->transport, data, tcall->size);
214 539
215 if (ret != tcall->size) { 540 if (!req->err) { /* string match failed */
216 spin_lock(&v9ses->muxlock); 541 PRINT_FCALL_ERROR("unknown error", req->rcall);
217 list_del(&req->next); 542 }
218 kfree(req->rcall); 543
544 if (!req->err)
545 req->err = -ESERVERFAULT;
546 }
547 } else if (req->tcall && req->rcall->id != req->tcall->id + 1) {
548 dprintk(DEBUG_ERROR, "fcall mismatch: expected %d, got %d\n",
549 req->tcall->id + 1, req->rcall->id);
550 if (!req->err)
551 req->err = -EIO;
552 }
219 553
220 spin_unlock(&v9ses->muxlock); 554 if (req->cb && req->err != ERREQFLUSH) {
221 if (ret >= 0) 555 dprintk(DEBUG_MUX, "calling callback tcall %p rcall %p\n",
222 ret = -EREMOTEIO; 556 req->tcall, req->rcall);
557
558 (*req->cb) (req->cba, req->tcall, req->rcall, req->err);
559 req->cb = NULL;
223 } else 560 } else
224 ret = 0; 561 kfree(req->rcall);
225 562
226 free_data: 563 v9fs_mux_put_tag(m, tag);
227 kfree(data); 564
228 return ret; 565 wake_up(&m->equeue);
566 kfree(req);
229} 567}
230 568
231/** 569/**
232 * v9fs_mux_rpc - send a request, receive a response 570 * v9fs_read_work - called when there is some data to be read from a transport
233 * @v9ses: session info structure
234 * @tcall: fcall to send
235 * @rcall: buffer to place response into
236 *
237 */ 571 */
238 572static void v9fs_read_work(void *a)
239long
240v9fs_mux_rpc(struct v9fs_session_info *v9ses, struct v9fs_fcall *tcall,
241 struct v9fs_fcall **rcall)
242{ 573{
243 int tid = -1; 574 int n, err;
244 struct v9fs_fcall *fcall = NULL; 575 struct v9fs_mux_data *m;
245 struct v9fs_rpcreq req; 576 struct v9fs_req *req, *rptr, *rreq;
246 int ret = -1; 577 struct v9fs_fcall *rcall;
247 578 char *rbuf;
248 if (!v9ses) 579
249 return -EINVAL; 580 m = a;
250 581
251 if (!v9ses->transport || v9ses->transport->status != Connected) 582 if (m->err < 0)
252 return -EIO; 583 return;
584
585 rcall = NULL;
586 dprintk(DEBUG_MUX, "start mux %p pos %d\n", m, m->rpos);
587
588 if (!m->rcall) {
589 m->rcall =
590 kmalloc(sizeof(struct v9fs_fcall) + m->msize, GFP_KERNEL);
591 if (!m->rcall) {
592 err = -ENOMEM;
593 goto error;
594 }
253 595
254 if (rcall) 596 m->rbuf = (char *)m->rcall + sizeof(struct v9fs_fcall);
255 *rcall = NULL; 597 m->rpos = 0;
598 }
256 599
257 if (tcall->id != TVERSION) { 600 clear_bit(Rpending, &m->wsched);
258 tid = v9fs_get_idpool(&v9ses->tidpool); 601 err = m->trans->read(m->trans, m->rbuf + m->rpos, m->msize - m->rpos);
259 if (tid < 0) 602 dprintk(DEBUG_MUX, "mux %p got %d bytes\n", m, err);
260 return -ENOMEM; 603 if (err == -EAGAIN) {
604 clear_bit(Rworksched, &m->wsched);
605 return;
261 } 606 }
262 607
263 tcall->tag = tid; 608 if (err <= 0)
609 goto error;
264 610
265 req.tcall = tcall; 611 m->rpos += err;
266 req.err = 0; 612 while (m->rpos > 4) {
267 req.rcall = NULL; 613 n = le32_to_cpu(*(__le32 *) m->rbuf);
614 if (n >= m->msize) {
615 dprintk(DEBUG_ERROR,
616 "requested packet size too big: %d\n", n);
617 err = -EIO;
618 goto error;
619 }
268 620
269 ret = v9fs_send(v9ses, &req); 621 if (m->rpos < n)
622 break;
270 623
271 if (ret < 0) { 624 dump_data(m->rbuf, n);
272 if (tcall->id != TVERSION) 625 err =
273 v9fs_put_idpool(tid, &v9ses->tidpool); 626 v9fs_deserialize_fcall(m->rbuf, n, m->rcall, *m->extended);
274 dprintk(DEBUG_MUX, "error %d\n", ret); 627 if (err < 0) {
275 return ret; 628 goto error;
276 } 629 }
630
631 rcall = m->rcall;
632 rbuf = m->rbuf;
633 if (m->rpos > n) {
634 m->rcall = kmalloc(sizeof(struct v9fs_fcall) + m->msize,
635 GFP_KERNEL);
636 if (!m->rcall) {
637 err = -ENOMEM;
638 goto error;
639 }
277 640
278 ret = v9fs_recv(v9ses, &req); 641 m->rbuf = (char *)m->rcall + sizeof(struct v9fs_fcall);
279 642 memmove(m->rbuf, rbuf + n, m->rpos - n);
280 fcall = req.rcall; 643 m->rpos -= n;
281 644 } else {
282 dprintk(DEBUG_MUX, "received: tag=%x, ret=%d\n", tcall->tag, ret); 645 m->rcall = NULL;
283 if (ret == -ERESTARTSYS) { 646 m->rbuf = NULL;
284 if (v9ses->transport->status != Disconnected 647 m->rpos = 0;
285 && tcall->id != TFLUSH) {
286 unsigned long flags;
287
288 dprintk(DEBUG_MUX, "flushing the tag: %d\n",
289 tcall->tag);
290 clear_thread_flag(TIF_SIGPENDING);
291 v9fs_t_flush(v9ses, tcall->tag);
292 spin_lock_irqsave(&current->sighand->siglock, flags);
293 recalc_sigpending();
294 spin_unlock_irqrestore(&current->sighand->siglock,
295 flags);
296 dprintk(DEBUG_MUX, "flushing done\n");
297 } 648 }
298 649
299 goto release_req; 650 dprintk(DEBUG_MUX, "mux %p fcall id %d tag %d\n", m, rcall->id,
300 } else if (ret < 0) 651 rcall->tag);
301 goto release_req; 652
302 653 req = NULL;
303 if (!fcall) 654 spin_lock(&m->lock);
304 ret = -EIO; 655 list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) {
305 else { 656 if (rreq->tag == rcall->tag) {
306 if (fcall->id == RERROR) { 657 req = rreq;
307 ret = v9fs_errstr2errno(fcall->params.rerror.error); 658 req->rcall = rcall;
308 if (ret == 0) { /* string match failed */ 659 list_del(&req->req_list);
309 if (fcall->params.rerror.errno) 660 spin_unlock(&m->lock);
310 ret = -(fcall->params.rerror.errno); 661 process_request(m, req);
311 else 662 break;
312 ret = -ESERVERFAULT;
313 } 663 }
314 } else if (fcall->id != tcall->id + 1) { 664
315 dprintk(DEBUG_ERROR, 665 }
316 "fcall mismatch: expected %d, got %d\n", 666
317 tcall->id + 1, fcall->id); 667 if (!req) {
318 ret = -EIO; 668 spin_unlock(&m->lock);
669 if (err >= 0 && rcall->id != RFLUSH)
670 dprintk(DEBUG_ERROR,
671 "unexpected response mux %p id %d tag %d\n",
672 m, rcall->id, rcall->tag);
673 kfree(rcall);
319 } 674 }
320 } 675 }
321 676
322 release_req: 677 if (!list_empty(&m->req_list)) {
323 if (tcall->id != TVERSION) 678 if (test_and_clear_bit(Rpending, &m->wsched))
324 v9fs_put_idpool(tid, &v9ses->tidpool); 679 n = POLLIN;
325 if (rcall) 680 else
326 *rcall = fcall; 681 n = m->trans->poll(m->trans, NULL);
327 else 682
328 kfree(fcall); 683 if (n & POLLIN) {
684 dprintk(DEBUG_MUX, "schedule read work mux %p\n", m);
685 queue_work(v9fs_mux_wq, &m->rq);
686 } else
687 clear_bit(Rworksched, &m->wsched);
688 } else
689 clear_bit(Rworksched, &m->wsched);
690
691 return;
329 692
330 return ret; 693 error:
694 v9fs_mux_cancel(m, err);
695 clear_bit(Rworksched, &m->wsched);
331} 696}
332 697
333/** 698/**
334 * v9fs_mux_cancel_requests - cancels all pending requests 699 * v9fs_send_request - send 9P request
700 * The function can sleep until the request is scheduled for sending.
701 * The function can be interrupted. Return from the function is not
702 * a guarantee that the request is sent succesfully. Can return errors
703 * that can be retrieved by PTR_ERR macros.
335 * 704 *
336 * @v9ses: session info structure 705 * @m: mux data
337 * @err: error code to return to the requests 706 * @tc: request to be sent
707 * @cb: callback function to call when response is received
708 * @cba: parameter to pass to the callback function
338 */ 709 */
339void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err) 710static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m,
711 struct v9fs_fcall *tc,
712 v9fs_mux_req_callback cb, void *cba)
340{ 713{
341 struct v9fs_rpcreq *rptr; 714 int n;
342 struct v9fs_rpcreq *rreq; 715 struct v9fs_req *req;
343 716
344 dprintk(DEBUG_MUX, " %d\n", err); 717 dprintk(DEBUG_MUX, "mux %p task %p tcall %p id %d\n", m, current,
345 spin_lock(&v9ses->muxlock); 718 tc, tc->id);
346 list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) { 719 if (m->err < 0)
347 rreq->err = err; 720 return ERR_PTR(m->err);
348 }
349 spin_unlock(&v9ses->muxlock);
350 wake_up_all(&v9ses->read_wait);
351}
352 721
353/** 722 req = kmalloc(sizeof(struct v9fs_req), GFP_KERNEL);
354 * v9fs_recvproc - kproc to handle demultiplexing responses 723 if (!req)
355 * @data: session info structure 724 return ERR_PTR(-ENOMEM);
356 *
357 */
358 725
359static int v9fs_recvproc(void *data) 726 if (tc->id == TVERSION)
360{ 727 n = V9FS_NOTAG;
361 struct v9fs_session_info *v9ses = (struct v9fs_session_info *)data; 728 else
362 struct v9fs_fcall *rcall = NULL; 729 n = v9fs_mux_get_tag(m);
363 struct v9fs_rpcreq *rptr;
364 struct v9fs_rpcreq *req;
365 struct v9fs_rpcreq *rreq;
366 int err = 0;
367 730
368 allow_signal(SIGKILL); 731 if (n < 0)
369 set_current_state(TASK_INTERRUPTIBLE); 732 return ERR_PTR(-ENOMEM);
370 complete(&v9ses->proccmpl);
371 while (!kthread_should_stop() && err >= 0) {
372 req = rptr = rreq = NULL;
373
374 rcall = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL);
375 if (!rcall) {
376 eprintk(KERN_ERR, "no memory for buffers\n");
377 break;
378 }
379 733
380 err = read_message(v9ses, rcall, v9ses->maxdata + V9FS_IOHDRSZ); 734 v9fs_set_tag(tc, n);
381 spin_lock(&v9ses->muxlock);
382 if (err < 0) {
383 list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
384 rreq->err = err;
385 }
386 if(err != -ERESTARTSYS)
387 eprintk(KERN_ERR,
388 "Transport error while reading message %d\n", err);
389 } else {
390 list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
391 if (rreq->tcall->tag == rcall->tag) {
392 req = rreq;
393 req->rcall = rcall;
394 break;
395 }
396 }
397 }
398 735
399 if (req && (req->tcall->id == TFLUSH)) { 736 req->tag = n;
400 struct v9fs_rpcreq *treq = NULL; 737 req->tcall = tc;
401 list_for_each_entry_safe(treq, rptr, &v9ses->mux_fcalls, next) { 738 req->rcall = NULL;
402 if (treq->tcall->tag == 739 req->err = 0;
403 req->tcall->params.tflush.oldtag) { 740 req->cb = cb;
404 list_del(&rptr->next); 741 req->cba = cba;
405 kfree(treq->rcall); 742
406 break; 743 spin_lock(&m->lock);
407 } 744 list_add_tail(&req->req_list, &m->unsent_req_list);
745 spin_unlock(&m->lock);
746
747 if (test_and_clear_bit(Wpending, &m->wsched))
748 n = POLLOUT;
749 else
750 n = m->trans->poll(m->trans, NULL);
751
752 if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
753 queue_work(v9fs_mux_wq, &m->wq);
754
755 return req;
756}
757
758static inline void
759v9fs_mux_flush_cb(void *a, struct v9fs_fcall *tc, struct v9fs_fcall *rc,
760 int err)
761{
762 v9fs_mux_req_callback cb;
763 int tag;
764 struct v9fs_mux_data *m;
765 struct v9fs_req *req, *rptr;
766
767 m = a;
768 dprintk(DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m, tc,
769 rc, err, tc->params.tflush.oldtag);
770
771 spin_lock(&m->lock);
772 cb = NULL;
773 tag = tc->params.tflush.oldtag;
774 list_for_each_entry_safe(req, rptr, &m->req_list, req_list) {
775 if (req->tag == tag) {
776 list_del(&req->req_list);
777 if (req->cb) {
778 cb = req->cb;
779 req->cb = NULL;
780 spin_unlock(&m->lock);
781 (*cb) (req->cba, req->tcall, req->rcall,
782 req->err);
408 } 783 }
784 kfree(req);
785 wake_up(&m->equeue);
786 break;
409 } 787 }
788 }
410 789
411 spin_unlock(&v9ses->muxlock); 790 if (!cb)
791 spin_unlock(&m->lock);
412 792
413 if (!req) { 793 v9fs_mux_put_tag(m, tag);
414 if (err >= 0) 794 kfree(tc);
415 dprintk(DEBUG_ERROR, 795 kfree(rc);
416 "unexpected response: id %d tag %d\n", 796}
417 rcall->id, rcall->tag);
418 797
419 kfree(rcall); 798static void
420 } 799v9fs_mux_flush_request(struct v9fs_mux_data *m, struct v9fs_req *req)
800{
801 struct v9fs_fcall *fc;
421 802
422 wake_up_all(&v9ses->read_wait); 803 dprintk(DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag);
423 set_current_state(TASK_INTERRUPTIBLE); 804
805 fc = v9fs_create_tflush(req->tag);
806 v9fs_send_request(m, fc, v9fs_mux_flush_cb, m);
807}
808
809static void
810v9fs_mux_rpc_cb(void *a, struct v9fs_fcall *tc, struct v9fs_fcall *rc, int err)
811{
812 struct v9fs_mux_rpc *r;
813
814 if (err == ERREQFLUSH) {
815 dprintk(DEBUG_MUX, "err req flush\n");
816 return;
424 } 817 }
425 818
426 v9ses->transport->close(v9ses->transport); 819 r = a;
820 dprintk(DEBUG_MUX, "mux %p req %p tc %p rc %p err %d\n", r->m, r->req,
821 tc, rc, err);
822 r->rcall = rc;
823 r->err = err;
824 wake_up(&r->wqueue);
825}
427 826
428 /* Inform all pending processes about the failure */ 827/**
429 wake_up_all(&v9ses->read_wait); 828 * v9fs_mux_rpc - sends 9P request and waits until a response is available.
829 * The function can be interrupted.
830 * @m: mux data
831 * @tc: request to be sent
832 * @rc: pointer where a pointer to the response is stored
833 */
834int
835v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
836 struct v9fs_fcall **rc)
837{
838 int err;
839 unsigned long flags;
840 struct v9fs_req *req;
841 struct v9fs_mux_rpc r;
842
843 r.err = 0;
844 r.rcall = NULL;
845 r.m = m;
846 init_waitqueue_head(&r.wqueue);
847
848 if (rc)
849 *rc = NULL;
850
851 req = v9fs_send_request(m, tc, v9fs_mux_rpc_cb, &r);
852 if (IS_ERR(req)) {
853 err = PTR_ERR(req);
854 dprintk(DEBUG_MUX, "error %d\n", err);
855 return PTR_ERR(req);
856 }
430 857
431 if (signal_pending(current)) 858 r.req = req;
432 complete(&v9ses->proccmpl); 859 dprintk(DEBUG_MUX, "mux %p tc %p tag %d rpc %p req %p\n", m, tc,
860 req->tag, &r, req);
861 err = wait_event_interruptible(r.wqueue, r.rcall != NULL || r.err < 0);
862 if (r.err < 0)
863 err = r.err;
864
865 if (err == -ERESTARTSYS && m->trans->status == Connected && m->err == 0) {
866 spin_lock(&m->lock);
867 req->tcall = NULL;
868 req->err = ERREQFLUSH;
869 spin_unlock(&m->lock);
870
871 clear_thread_flag(TIF_SIGPENDING);
872 v9fs_mux_flush_request(m, req);
873 spin_lock_irqsave(&current->sighand->siglock, flags);
874 recalc_sigpending();
875 spin_unlock_irqrestore(&current->sighand->siglock, flags);
876 }
433 877
434 dprintk(DEBUG_MUX, "recvproc: end\n"); 878 if (!err) {
435 v9ses->recvproc = NULL; 879 if (r.rcall)
880 dprintk(DEBUG_MUX, "got response id %d tag %d\n",
881 r.rcall->id, r.rcall->tag);
882
883 if (rc)
884 *rc = r.rcall;
885 else
886 kfree(r.rcall);
887 } else {
888 kfree(r.rcall);
889 dprintk(DEBUG_MUX, "got error %d\n", err);
890 if (err > 0)
891 err = -EIO;
892 }
436 893
437 return err >= 0; 894 return err;
438} 895}
439 896
440/** 897/**
441 * v9fs_mux_init - initialize multiplexer (spawn kproc) 898 * v9fs_mux_rpcnb - sends 9P request without waiting for response.
442 * @v9ses: session info structure 899 * @m: mux data
443 * @dev_name: mount device information (to create unique kproc) 900 * @tc: request to be sent
444 * 901 * @cb: callback function to be called when response arrives
902 * @cba: value to pass to the callback function
445 */ 903 */
904int v9fs_mux_rpcnb(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
905 v9fs_mux_req_callback cb, void *a)
906{
907 int err;
908 struct v9fs_req *req;
909
910 req = v9fs_send_request(m, tc, cb, a);
911 if (IS_ERR(req)) {
912 err = PTR_ERR(req);
913 dprintk(DEBUG_MUX, "error %d\n", err);
914 return PTR_ERR(req);
915 }
916
917 dprintk(DEBUG_MUX, "mux %p tc %p tag %d\n", m, tc, req->tag);
918 return 0;
919}
446 920
447int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name) 921/**
922 * v9fs_mux_cancel - cancel all pending requests with error
923 * @m: mux data
924 * @err: error code
925 */
926void v9fs_mux_cancel(struct v9fs_mux_data *m, int err)
448{ 927{
449 char procname[60]; 928 struct v9fs_req *req, *rtmp;
450 929 LIST_HEAD(cancel_list);
451 strncpy(procname, dev_name, sizeof(procname)); 930
452 procname[sizeof(procname) - 1] = 0; 931 dprintk(DEBUG_MUX, "mux %p err %d\n", m, err);
453 932 m->err = err;
454 init_waitqueue_head(&v9ses->read_wait); 933 spin_lock(&m->lock);
455 init_completion(&v9ses->fcread); 934 list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
456 init_completion(&v9ses->proccmpl); 935 list_move(&req->req_list, &cancel_list);
457 spin_lock_init(&v9ses->muxlock);
458 INIT_LIST_HEAD(&v9ses->mux_fcalls);
459 v9ses->recvproc = NULL;
460 v9ses->curfcall = NULL;
461
462 v9ses->recvproc = kthread_create(v9fs_recvproc, v9ses,
463 "v9fs_recvproc %s", procname);
464
465 if (IS_ERR(v9ses->recvproc)) {
466 eprintk(KERN_ERR, "cannot create receiving thread\n");
467 v9fs_session_close(v9ses);
468 return -ECONNABORTED;
469 } 936 }
937 spin_unlock(&m->lock);
470 938
471 wake_up_process(v9ses->recvproc); 939 list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
472 wait_for_completion(&v9ses->proccmpl); 940 list_del(&req->req_list);
941 if (!req->err)
942 req->err = err;
473 943
474 return 0; 944 if (req->cb)
945 (*req->cb) (req->cba, req->tcall, req->rcall, req->err);
946 else
947 kfree(req->rcall);
948
949 kfree(req);
950 }
951
952 wake_up(&m->equeue);
953}
954
955static u16 v9fs_mux_get_tag(struct v9fs_mux_data *m)
956{
957 int tag;
958
959 tag = v9fs_get_idpool(&m->tidpool);
960 if (tag < 0)
961 return V9FS_NOTAG;
962 else
963 return (u16) tag;
964}
965
966static void v9fs_mux_put_tag(struct v9fs_mux_data *m, u16 tag)
967{
968 if (tag != V9FS_NOTAG && v9fs_check_idpool(tag, &m->tidpool))
969 v9fs_put_idpool(tag, &m->tidpool);
475} 970}
diff --git a/fs/9p/mux.h b/fs/9p/mux.h
index 4994cb10badf..9473b84f24b2 100644
--- a/fs/9p/mux.h
+++ b/fs/9p/mux.h
@@ -3,6 +3,7 @@
3 * 3 *
4 * Multiplexer Definitions 4 * Multiplexer Definitions
5 * 5 *
6 * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> 7 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * 8 *
8 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
@@ -23,19 +24,35 @@
23 * 24 *
24 */ 25 */
25 26
26/* structure to manage each RPC transaction */ 27struct v9fs_mux_data;
27 28
28struct v9fs_rpcreq { 29/**
29 struct v9fs_fcall *tcall; 30 * v9fs_mux_req_callback - callback function that is called when the
30 struct v9fs_fcall *rcall; 31 * response of a request is received. The callback is called from
31 int err; /* error code if response failed */ 32 * a workqueue and shouldn't block.
33 *
34 * @a - the pointer that was specified when the request was send to be
35 * passed to the callback
36 * @tc - request call
37 * @rc - response call
38 * @err - error code (non-zero if error occured)
39 */
40typedef void (*v9fs_mux_req_callback)(void *a, struct v9fs_fcall *tc,
41 struct v9fs_fcall *rc, int err);
42
43int v9fs_mux_global_init(void);
44void v9fs_mux_global_exit(void);
32 45
33 /* XXX - could we put scatter/gather buffers here? */ 46struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize,
47 unsigned char *extended);
48void v9fs_mux_destroy(struct v9fs_mux_data *);
34 49
35 struct list_head next; 50int v9fs_mux_send(struct v9fs_mux_data *m, struct v9fs_fcall *tc);
36}; 51struct v9fs_fcall *v9fs_mux_recv(struct v9fs_mux_data *m);
52int v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc, struct v9fs_fcall **rc);
53int v9fs_mux_rpcnb(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
54 v9fs_mux_req_callback cb, void *a);
37 55
38int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name); 56void v9fs_mux_flush(struct v9fs_mux_data *m, int sendflush);
39long v9fs_mux_rpc(struct v9fs_session_info *v9ses, 57void v9fs_mux_cancel(struct v9fs_mux_data *m, int err);
40 struct v9fs_fcall *tcall, struct v9fs_fcall **rcall); 58int v9fs_errstr2errno(char *errstr, int len);
41void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err);
diff --git a/fs/9p/trans_fd.c b/fs/9p/trans_fd.c
index 63b58ce98ff4..1a28ef97a3d1 100644
--- a/fs/9p/trans_fd.c
+++ b/fs/9p/trans_fd.c
@@ -3,6 +3,7 @@
3 * 3 *
4 * File Descriptor Transport Layer 4 * File Descriptor Transport Layer
5 * 5 *
6 * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
6 * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com> 7 * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
7 * 8 *
8 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
@@ -106,9 +107,6 @@ v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
106 return -ENOPROTOOPT; 107 return -ENOPROTOOPT;
107 } 108 }
108 109
109 sema_init(&trans->writelock, 1);
110 sema_init(&trans->readlock, 1);
111
112 ts = kmalloc(sizeof(struct v9fs_trans_fd), GFP_KERNEL); 110 ts = kmalloc(sizeof(struct v9fs_trans_fd), GFP_KERNEL);
113 111
114 if (!ts) 112 if (!ts)
@@ -148,12 +146,12 @@ static void v9fs_fd_close(struct v9fs_transport *trans)
148 if (!trans) 146 if (!trans)
149 return; 147 return;
150 148
151 trans->status = Disconnected; 149 ts = xchg(&trans->priv, NULL);
152 ts = trans->priv;
153 150
154 if (!ts) 151 if (!ts)
155 return; 152 return;
156 153
154 trans->status = Disconnected;
157 if (ts->in_file) 155 if (ts->in_file)
158 fput(ts->in_file); 156 fput(ts->in_file);
159 157
@@ -163,10 +161,55 @@ static void v9fs_fd_close(struct v9fs_transport *trans)
163 kfree(ts); 161 kfree(ts);
164} 162}
165 163
164static unsigned int
165v9fs_fd_poll(struct v9fs_transport *trans, struct poll_table_struct *pt)
166{
167 int ret, n;
168 struct v9fs_trans_fd *ts;
169 mm_segment_t oldfs;
170
171 if (!trans)
172 return -EIO;
173
174 ts = trans->priv;
175 if (trans->status != Connected || !ts)
176 return -EIO;
177
178 oldfs = get_fs();
179 set_fs(get_ds());
180
181 if (!ts->in_file->f_op || !ts->in_file->f_op->poll) {
182 ret = -EIO;
183 goto end;
184 }
185
186 ret = ts->in_file->f_op->poll(ts->in_file, pt);
187
188 if (ts->out_file != ts->in_file) {
189 if (!ts->out_file->f_op || !ts->out_file->f_op->poll) {
190 ret = -EIO;
191 goto end;
192 }
193
194 n = ts->out_file->f_op->poll(ts->out_file, pt);
195
196 ret &= ~POLLOUT;
197 n &= ~POLLIN;
198
199 ret |= n;
200 }
201
202end:
203 set_fs(oldfs);
204 return ret;
205}
206
207
166struct v9fs_transport v9fs_trans_fd = { 208struct v9fs_transport v9fs_trans_fd = {
167 .init = v9fs_fd_init, 209 .init = v9fs_fd_init,
168 .write = v9fs_fd_send, 210 .write = v9fs_fd_send,
169 .read = v9fs_fd_recv, 211 .read = v9fs_fd_recv,
170 .close = v9fs_fd_close, 212 .close = v9fs_fd_close,
213 .poll = v9fs_fd_poll,
171}; 214};
172 215
diff --git a/fs/9p/trans_sock.c b/fs/9p/trans_sock.c
index 6a9a75d40f73..44e830697acb 100644
--- a/fs/9p/trans_sock.c
+++ b/fs/9p/trans_sock.c
@@ -3,6 +3,7 @@
3 * 3 *
4 * Socket Transport Layer 4 * Socket Transport Layer
5 * 5 *
6 * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> 7 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com> 8 * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
8 * Copyright (C) 1995, 1996 by Olaf Kirch <okir@monad.swb.de> 9 * Copyright (C) 1995, 1996 by Olaf Kirch <okir@monad.swb.de>
@@ -36,6 +37,7 @@
36#include <asm/uaccess.h> 37#include <asm/uaccess.h>
37#include <linux/inet.h> 38#include <linux/inet.h>
38#include <linux/idr.h> 39#include <linux/idr.h>
40#include <linux/file.h>
39 41
40#include "debug.h" 42#include "debug.h"
41#include "v9fs.h" 43#include "v9fs.h"
@@ -45,6 +47,7 @@
45 47
46struct v9fs_trans_sock { 48struct v9fs_trans_sock {
47 struct socket *s; 49 struct socket *s;
50 struct file *filp;
48}; 51};
49 52
50/** 53/**
@@ -57,41 +60,26 @@ struct v9fs_trans_sock {
57 60
58static int v9fs_sock_recv(struct v9fs_transport *trans, void *v, int len) 61static int v9fs_sock_recv(struct v9fs_transport *trans, void *v, int len)
59{ 62{
60 struct msghdr msg; 63 int ret;
61 struct kvec iov; 64 struct v9fs_trans_sock *ts;
62 int result;
63 mm_segment_t oldfs;
64 struct v9fs_trans_sock *ts = trans ? trans->priv : NULL;
65 65
66 if (trans->status == Disconnected) 66 if (!trans || trans->status == Disconnected) {
67 dprintk(DEBUG_ERROR, "disconnected ...\n");
67 return -EREMOTEIO; 68 return -EREMOTEIO;
69 }
68 70
69 result = -EINVAL; 71 ts = trans->priv;
70
71 oldfs = get_fs();
72 set_fs(get_ds());
73
74 iov.iov_base = v;
75 iov.iov_len = len;
76 msg.msg_name = NULL;
77 msg.msg_namelen = 0;
78 msg.msg_iovlen = 1;
79 msg.msg_control = NULL;
80 msg.msg_controllen = 0;
81 msg.msg_namelen = 0;
82 msg.msg_flags = MSG_NOSIGNAL;
83 72
84 result = kernel_recvmsg(ts->s, &msg, &iov, 1, len, 0); 73 if (!(ts->filp->f_flags & O_NONBLOCK))
74 dprintk(DEBUG_ERROR, "blocking read ...\n");
85 75
86 dprintk(DEBUG_TRANS, "socket state %d\n", ts->s->state); 76 ret = kernel_read(ts->filp, ts->filp->f_pos, v, len);
87 set_fs(oldfs); 77 if (ret <= 0) {
88 78 if (ret != -ERESTARTSYS && ret != -EAGAIN)
89 if (result <= 0) {
90 if (result != -ERESTARTSYS)
91 trans->status = Disconnected; 79 trans->status = Disconnected;
92 } 80 }
93 81
94 return result; 82 return ret;
95} 83}
96 84
97/** 85/**
@@ -104,40 +92,72 @@ static int v9fs_sock_recv(struct v9fs_transport *trans, void *v, int len)
104 92
105static int v9fs_sock_send(struct v9fs_transport *trans, void *v, int len) 93static int v9fs_sock_send(struct v9fs_transport *trans, void *v, int len)
106{ 94{
107 struct kvec iov; 95 int ret;
108 struct msghdr msg;
109 int result = -1;
110 mm_segment_t oldfs; 96 mm_segment_t oldfs;
111 struct v9fs_trans_sock *ts = trans ? trans->priv : NULL; 97 struct v9fs_trans_sock *ts;
112 98
113 dprintk(DEBUG_TRANS, "Sending packet size %d (%x)\n", len, len); 99 if (!trans || trans->status == Disconnected) {
114 dump_data(v, len); 100 dprintk(DEBUG_ERROR, "disconnected ...\n");
101 return -EREMOTEIO;
102 }
103
104 ts = trans->priv;
105 if (!ts) {
106 dprintk(DEBUG_ERROR, "no transport ...\n");
107 return -EREMOTEIO;
108 }
115 109
116 down(&trans->writelock); 110 if (!(ts->filp->f_flags & O_NONBLOCK))
111 dprintk(DEBUG_ERROR, "blocking write ...\n");
117 112
118 oldfs = get_fs(); 113 oldfs = get_fs();
119 set_fs(get_ds()); 114 set_fs(get_ds());
120 iov.iov_base = v; 115 ret = vfs_write(ts->filp, (void __user *)v, len, &ts->filp->f_pos);
121 iov.iov_len = len;
122 msg.msg_name = NULL;
123 msg.msg_namelen = 0;
124 msg.msg_iovlen = 1;
125 msg.msg_control = NULL;
126 msg.msg_controllen = 0;
127 msg.msg_namelen = 0;
128 msg.msg_flags = MSG_NOSIGNAL;
129 result = kernel_sendmsg(ts->s, &msg, &iov, 1, len);
130 set_fs(oldfs); 116 set_fs(oldfs);
131 117
132 if (result < 0) { 118 if (ret < 0) {
133 if (result != -ERESTARTSYS) 119 if (ret != -ERESTARTSYS)
134 trans->status = Disconnected; 120 trans->status = Disconnected;
135 } 121 }
136 122
137 up(&trans->writelock); 123 return ret;
138 return result; 124}
125
126static unsigned int v9fs_sock_poll(struct v9fs_transport *trans,
127 struct poll_table_struct *pt) {
128
129 int ret;
130 struct v9fs_trans_sock *ts;
131 mm_segment_t oldfs;
132
133 if (!trans) {
134 dprintk(DEBUG_ERROR, "no transport\n");
135 return -EIO;
136 }
137
138 ts = trans->priv;
139 if (trans->status != Connected || !ts) {
140 dprintk(DEBUG_ERROR, "transport disconnected: %d\n", trans->status);
141 return -EIO;
142 }
143
144 oldfs = get_fs();
145 set_fs(get_ds());
146
147 if (!ts->filp->f_op || !ts->filp->f_op->poll) {
148 dprintk(DEBUG_ERROR, "no poll operation\n");
149 ret = -EIO;
150 goto end;
151 }
152
153 ret = ts->filp->f_op->poll(ts->filp, pt);
154
155end:
156 set_fs(oldfs);
157 return ret;
139} 158}
140 159
160
141/** 161/**
142 * v9fs_tcp_init - initialize TCP socket 162 * v9fs_tcp_init - initialize TCP socket
143 * @v9ses: session information 163 * @v9ses: session information
@@ -154,9 +174,9 @@ v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
154 int rc = 0; 174 int rc = 0;
155 struct v9fs_trans_sock *ts = NULL; 175 struct v9fs_trans_sock *ts = NULL;
156 struct v9fs_transport *trans = v9ses->transport; 176 struct v9fs_transport *trans = v9ses->transport;
177 int fd;
157 178
158 sema_init(&trans->writelock, 1); 179 trans->status = Disconnected;
159 sema_init(&trans->readlock, 1);
160 180
161 ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL); 181 ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL);
162 182
@@ -165,6 +185,7 @@ v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
165 185
166 trans->priv = ts; 186 trans->priv = ts;
167 ts->s = NULL; 187 ts->s = NULL;
188 ts->filp = NULL;
168 189
169 if (!addr) 190 if (!addr)
170 return -EINVAL; 191 return -EINVAL;
@@ -185,7 +206,18 @@ v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
185 return rc; 206 return rc;
186 } 207 }
187 csocket->sk->sk_allocation = GFP_NOIO; 208 csocket->sk->sk_allocation = GFP_NOIO;
209
210 fd = sock_map_fd(csocket);
211 if (fd < 0) {
212 sock_release(csocket);
213 kfree(ts);
214 trans->priv = NULL;
215 return fd;
216 }
217
188 ts->s = csocket; 218 ts->s = csocket;
219 ts->filp = fget(fd);
220 ts->filp->f_flags |= O_NONBLOCK;
189 trans->status = Connected; 221 trans->status = Connected;
190 222
191 return 0; 223 return 0;
@@ -203,7 +235,7 @@ static int
203v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name, 235v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
204 char *data) 236 char *data)
205{ 237{
206 int rc; 238 int rc, fd;
207 struct socket *csocket; 239 struct socket *csocket;
208 struct sockaddr_un sun_server; 240 struct sockaddr_un sun_server;
209 struct v9fs_transport *trans; 241 struct v9fs_transport *trans;
@@ -213,6 +245,8 @@ v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
213 csocket = NULL; 245 csocket = NULL;
214 trans = v9ses->transport; 246 trans = v9ses->transport;
215 247
248 trans->status = Disconnected;
249
216 if (strlen(dev_name) > UNIX_PATH_MAX) { 250 if (strlen(dev_name) > UNIX_PATH_MAX) {
217 eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n", 251 eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n",
218 dev_name); 252 dev_name);
@@ -225,9 +259,7 @@ v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
225 259
226 trans->priv = ts; 260 trans->priv = ts;
227 ts->s = NULL; 261 ts->s = NULL;
228 262 ts->filp = NULL;
229 sema_init(&trans->writelock, 1);
230 sema_init(&trans->readlock, 1);
231 263
232 sun_server.sun_family = PF_UNIX; 264 sun_server.sun_family = PF_UNIX;
233 strcpy(sun_server.sun_path, dev_name); 265 strcpy(sun_server.sun_path, dev_name);
@@ -241,7 +273,18 @@ v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
241 return rc; 273 return rc;
242 } 274 }
243 csocket->sk->sk_allocation = GFP_NOIO; 275 csocket->sk->sk_allocation = GFP_NOIO;
276
277 fd = sock_map_fd(csocket);
278 if (fd < 0) {
279 sock_release(csocket);
280 kfree(ts);
281 trans->priv = NULL;
282 return fd;
283 }
284
244 ts->s = csocket; 285 ts->s = csocket;
286 ts->filp = fget(fd);
287 ts->filp->f_flags |= O_NONBLOCK;
245 trans->status = Connected; 288 trans->status = Connected;
246 289
247 return 0; 290 return 0;
@@ -262,12 +305,11 @@ static void v9fs_sock_close(struct v9fs_transport *trans)
262 305
263 ts = trans->priv; 306 ts = trans->priv;
264 307
265 if ((ts) && (ts->s)) { 308 if ((ts) && (ts->filp)) {
266 dprintk(DEBUG_TRANS, "closing the socket %p\n", ts->s); 309 fput(ts->filp);
267 sock_release(ts->s); 310 ts->filp = NULL;
268 ts->s = NULL; 311 ts->s = NULL;
269 trans->status = Disconnected; 312 trans->status = Disconnected;
270 dprintk(DEBUG_TRANS, "socket closed\n");
271 } 313 }
272 314
273 kfree(ts); 315 kfree(ts);
@@ -280,6 +322,7 @@ struct v9fs_transport v9fs_trans_tcp = {
280 .write = v9fs_sock_send, 322 .write = v9fs_sock_send,
281 .read = v9fs_sock_recv, 323 .read = v9fs_sock_recv,
282 .close = v9fs_sock_close, 324 .close = v9fs_sock_close,
325 .poll = v9fs_sock_poll,
283}; 326};
284 327
285struct v9fs_transport v9fs_trans_unix = { 328struct v9fs_transport v9fs_trans_unix = {
@@ -287,4 +330,5 @@ struct v9fs_transport v9fs_trans_unix = {
287 .write = v9fs_sock_send, 330 .write = v9fs_sock_send,
288 .read = v9fs_sock_recv, 331 .read = v9fs_sock_recv,
289 .close = v9fs_sock_close, 332 .close = v9fs_sock_close,
333 .poll = v9fs_sock_poll,
290}; 334};
diff --git a/fs/9p/transport.h b/fs/9p/transport.h
index 9e9cd418efd5..91fcdb94b361 100644
--- a/fs/9p/transport.h
+++ b/fs/9p/transport.h
@@ -3,6 +3,7 @@
3 * 3 *
4 * Transport Definition 4 * Transport Definition
5 * 5 *
6 * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> 7 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * 8 *
8 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
@@ -31,14 +32,13 @@ enum v9fs_transport_status {
31 32
32struct v9fs_transport { 33struct v9fs_transport {
33 enum v9fs_transport_status status; 34 enum v9fs_transport_status status;
34 struct semaphore writelock;
35 struct semaphore readlock;
36 void *priv; 35 void *priv;
37 36
38 int (*init) (struct v9fs_session_info *, const char *, char *); 37 int (*init) (struct v9fs_session_info *, const char *, char *);
39 int (*write) (struct v9fs_transport *, void *, int); 38 int (*write) (struct v9fs_transport *, void *, int);
40 int (*read) (struct v9fs_transport *, void *, int); 39 int (*read) (struct v9fs_transport *, void *, int);
41 void (*close) (struct v9fs_transport *); 40 void (*close) (struct v9fs_transport *);
41 unsigned int (*poll)(struct v9fs_transport *, struct poll_table_struct *);
42}; 42};
43 43
44extern struct v9fs_transport v9fs_trans_tcp; 44extern struct v9fs_transport v9fs_trans_tcp;
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 418c3743fdee..5250c428fc1f 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -37,7 +37,6 @@
37#include "v9fs_vfs.h" 37#include "v9fs_vfs.h"
38#include "transport.h" 38#include "transport.h"
39#include "mux.h" 39#include "mux.h"
40#include "conv.h"
41 40
42/* TODO: sysfs or debugfs interface */ 41/* TODO: sysfs or debugfs interface */
43int v9fs_debug_level = 0; /* feature-rific global debug level */ 42int v9fs_debug_level = 0; /* feature-rific global debug level */
@@ -213,7 +212,8 @@ retry:
213 return -1; 212 return -1;
214 } 213 }
215 214
216 error = idr_get_new(&p->pool, NULL, &i); 215 /* no need to store exactly p, we just need something non-null */
216 error = idr_get_new(&p->pool, p, &i);
217 up(&p->lock); 217 up(&p->lock);
218 218
219 if (error == -EAGAIN) 219 if (error == -EAGAIN)
@@ -243,6 +243,16 @@ void v9fs_put_idpool(int id, struct v9fs_idpool *p)
243} 243}
244 244
245/** 245/**
246 * v9fs_check_idpool - check if the specified id is available
247 * @id - id to check
248 * @p - pool
249 */
250int v9fs_check_idpool(int id, struct v9fs_idpool *p)
251{
252 return idr_find(&p->pool, id) != NULL;
253}
254
255/**
246 * v9fs_session_init - initialize session 256 * v9fs_session_init - initialize session
247 * @v9ses: session information structure 257 * @v9ses: session information structure
248 * @dev_name: device being mounted 258 * @dev_name: device being mounted
@@ -259,6 +269,7 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
259 int n = 0; 269 int n = 0;
260 int newfid = -1; 270 int newfid = -1;
261 int retval = -EINVAL; 271 int retval = -EINVAL;
272 struct v9fs_str *version;
262 273
263 v9ses->name = __getname(); 274 v9ses->name = __getname();
264 if (!v9ses->name) 275 if (!v9ses->name)
@@ -281,9 +292,6 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
281 /* id pools that are session-dependent: FIDs and TIDs */ 292 /* id pools that are session-dependent: FIDs and TIDs */
282 idr_init(&v9ses->fidpool.pool); 293 idr_init(&v9ses->fidpool.pool);
283 init_MUTEX(&v9ses->fidpool.lock); 294 init_MUTEX(&v9ses->fidpool.lock);
284 idr_init(&v9ses->tidpool.pool);
285 init_MUTEX(&v9ses->tidpool.lock);
286
287 295
288 switch (v9ses->proto) { 296 switch (v9ses->proto) {
289 case PROTO_TCP: 297 case PROTO_TCP:
@@ -320,7 +328,12 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
320 v9ses->shutdown = 0; 328 v9ses->shutdown = 0;
321 v9ses->session_hung = 0; 329 v9ses->session_hung = 0;
322 330
323 if ((retval = v9fs_mux_init(v9ses, dev_name)) < 0) { 331 v9ses->mux = v9fs_mux_init(v9ses->transport, v9ses->maxdata + V9FS_IOHDRSZ,
332 &v9ses->extended);
333
334 if (IS_ERR(v9ses->mux)) {
335 retval = PTR_ERR(v9ses->mux);
336 v9ses->mux = NULL;
324 dprintk(DEBUG_ERROR, "problem initializing mux\n"); 337 dprintk(DEBUG_ERROR, "problem initializing mux\n");
325 goto SessCleanUp; 338 goto SessCleanUp;
326 } 339 }
@@ -339,13 +352,16 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
339 goto FreeFcall; 352 goto FreeFcall;
340 } 353 }
341 354
342 /* Really should check for 9P1 and report error */ 355 version = &fcall->params.rversion.version;
343 if (!strcmp(fcall->params.rversion.version, "9P2000.u")) { 356 if (version->len==8 && !memcmp(version->str, "9P2000.u", 8)) {
344 dprintk(DEBUG_9P, "9P2000 UNIX extensions enabled\n"); 357 dprintk(DEBUG_9P, "9P2000 UNIX extensions enabled\n");
345 v9ses->extended = 1; 358 v9ses->extended = 1;
346 } else { 359 } else if (version->len==6 && !memcmp(version->str, "9P2000", 6)) {
347 dprintk(DEBUG_9P, "9P2000 legacy mode enabled\n"); 360 dprintk(DEBUG_9P, "9P2000 legacy mode enabled\n");
348 v9ses->extended = 0; 361 v9ses->extended = 0;
362 } else {
363 retval = -EREMOTEIO;
364 goto FreeFcall;
349 } 365 }
350 366
351 n = fcall->params.rversion.msize; 367 n = fcall->params.rversion.msize;
@@ -381,7 +397,7 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
381 } 397 }
382 398
383 if (v9ses->afid != ~0) { 399 if (v9ses->afid != ~0) {
384 if (v9fs_t_clunk(v9ses, v9ses->afid, NULL)) 400 if (v9fs_t_clunk(v9ses, v9ses->afid))
385 dprintk(DEBUG_ERROR, "clunk failed\n"); 401 dprintk(DEBUG_ERROR, "clunk failed\n");
386 } 402 }
387 403
@@ -403,13 +419,16 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
403 419
404void v9fs_session_close(struct v9fs_session_info *v9ses) 420void v9fs_session_close(struct v9fs_session_info *v9ses)
405{ 421{
406 if (v9ses->recvproc) { 422 if (v9ses->mux) {
407 send_sig(SIGKILL, v9ses->recvproc, 1); 423 v9fs_mux_destroy(v9ses->mux);
408 wait_for_completion(&v9ses->proccmpl); 424 v9ses->mux = NULL;
409 } 425 }
410 426
411 if (v9ses->transport) 427 if (v9ses->transport) {
412 v9ses->transport->close(v9ses->transport); 428 v9ses->transport->close(v9ses->transport);
429 kfree(v9ses->transport);
430 v9ses->transport = NULL;
431 }
413 432
414 __putname(v9ses->name); 433 __putname(v9ses->name);
415 __putname(v9ses->remotename); 434 __putname(v9ses->remotename);
@@ -420,8 +439,9 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
420 * and cancel all pending requests. 439 * and cancel all pending requests.
421 */ 440 */
422void v9fs_session_cancel(struct v9fs_session_info *v9ses) { 441void v9fs_session_cancel(struct v9fs_session_info *v9ses) {
442 dprintk(DEBUG_ERROR, "cancel session %p\n", v9ses);
423 v9ses->transport->status = Disconnected; 443 v9ses->transport->status = Disconnected;
424 v9fs_mux_cancel_requests(v9ses, -EIO); 444 v9fs_mux_cancel(v9ses->mux, -EIO);
425} 445}
426 446
427extern int v9fs_error_init(void); 447extern int v9fs_error_init(void);
@@ -433,11 +453,17 @@ extern int v9fs_error_init(void);
433 453
434static int __init init_v9fs(void) 454static int __init init_v9fs(void)
435{ 455{
456 int ret;
457
436 v9fs_error_init(); 458 v9fs_error_init();
437 459
438 printk(KERN_INFO "Installing v9fs 9P2000 file system support\n"); 460 printk(KERN_INFO "Installing v9fs 9P2000 file system support\n");
439 461
440 return register_filesystem(&v9fs_fs_type); 462 ret = v9fs_mux_global_init();
463 if (!ret)
464 ret = register_filesystem(&v9fs_fs_type);
465
466 return ret;
441} 467}
442 468
443/** 469/**
@@ -447,6 +473,7 @@ static int __init init_v9fs(void)
447 473
448static void __exit exit_v9fs(void) 474static void __exit exit_v9fs(void)
449{ 475{
476 v9fs_mux_global_exit();
450 unregister_filesystem(&v9fs_fs_type); 477 unregister_filesystem(&v9fs_fs_type);
451} 478}
452 479
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 45dcef42bdd6..f337da7a0eec 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -57,24 +57,14 @@ struct v9fs_session_info {
57 57
58 /* book keeping */ 58 /* book keeping */
59 struct v9fs_idpool fidpool; /* The FID pool for file descriptors */ 59 struct v9fs_idpool fidpool; /* The FID pool for file descriptors */
60 struct v9fs_idpool tidpool; /* The TID pool for transactions ids */
61 60
62 /* transport information */
63 struct v9fs_transport *transport; 61 struct v9fs_transport *transport;
62 struct v9fs_mux_data *mux;
64 63
65 int inprogress; /* session in progress => true */ 64 int inprogress; /* session in progress => true */
66 int shutdown; /* session shutting down. no more attaches. */ 65 int shutdown; /* session shutting down. no more attaches. */
67 unsigned char session_hung; 66 unsigned char session_hung;
68 67 struct dentry *debugfs_dir;
69 /* mux private data */
70 struct v9fs_fcall *curfcall;
71 wait_queue_head_t read_wait;
72 struct completion fcread;
73 struct completion proccmpl;
74 struct task_struct *recvproc;
75
76 spinlock_t muxlock;
77 struct list_head mux_fcalls;
78}; 68};
79 69
80/* possible values of ->proto */ 70/* possible values of ->proto */
@@ -84,11 +74,14 @@ enum {
84 PROTO_FD, 74 PROTO_FD,
85}; 75};
86 76
77extern struct dentry *v9fs_debugfs_root;
78
87int v9fs_session_init(struct v9fs_session_info *, const char *, char *); 79int v9fs_session_init(struct v9fs_session_info *, const char *, char *);
88struct v9fs_session_info *v9fs_inode2v9ses(struct inode *); 80struct v9fs_session_info *v9fs_inode2v9ses(struct inode *);
89void v9fs_session_close(struct v9fs_session_info *v9ses); 81void v9fs_session_close(struct v9fs_session_info *v9ses);
90int v9fs_get_idpool(struct v9fs_idpool *p); 82int v9fs_get_idpool(struct v9fs_idpool *p);
91void v9fs_put_idpool(int id, struct v9fs_idpool *p); 83void v9fs_put_idpool(int id, struct v9fs_idpool *p);
84int v9fs_check_idpool(int id, struct v9fs_idpool *p);
92void v9fs_session_cancel(struct v9fs_session_info *v9ses); 85void v9fs_session_cancel(struct v9fs_session_info *v9ses);
93 86
94#define V9FS_MAGIC 0x01021997 87#define V9FS_MAGIC 0x01021997
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 2f2cea7ee3e7..c78502ad00ed 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -45,9 +45,8 @@ extern struct dentry_operations v9fs_dentry_operations;
45 45
46struct inode *v9fs_get_inode(struct super_block *sb, int mode); 46struct inode *v9fs_get_inode(struct super_block *sb, int mode);
47ino_t v9fs_qid2ino(struct v9fs_qid *qid); 47ino_t v9fs_qid2ino(struct v9fs_qid *qid);
48void v9fs_mistat2inode(struct v9fs_stat *, struct inode *, 48void v9fs_stat2inode(struct v9fs_stat *, struct inode *, struct super_block *);
49 struct super_block *);
50int v9fs_dir_release(struct inode *inode, struct file *filp); 49int v9fs_dir_release(struct inode *inode, struct file *filp);
51int v9fs_file_open(struct inode *inode, struct file *file); 50int v9fs_file_open(struct inode *inode, struct file *file);
52void v9fs_inode2mistat(struct inode *inode, struct v9fs_stat *mistat); 51void v9fs_inode2stat(struct inode *inode, struct v9fs_stat *stat);
53void v9fs_dentry_release(struct dentry *); 52void v9fs_dentry_release(struct dentry *);
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index a6aa947de0f9..2dd806dac9f1 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -40,7 +40,6 @@
40#include "v9fs.h" 40#include "v9fs.h"
41#include "9p.h" 41#include "9p.h"
42#include "v9fs_vfs.h" 42#include "v9fs_vfs.h"
43#include "conv.h"
44#include "fid.h" 43#include "fid.h"
45 44
46/** 45/**
@@ -95,24 +94,22 @@ static int v9fs_dentry_validate(struct dentry *dentry, struct nameidata *nd)
95 94
96void v9fs_dentry_release(struct dentry *dentry) 95void v9fs_dentry_release(struct dentry *dentry)
97{ 96{
97 int err;
98
98 dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); 99 dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
99 100
100 if (dentry->d_fsdata != NULL) { 101 if (dentry->d_fsdata != NULL) {
101 struct list_head *fid_list = dentry->d_fsdata; 102 struct list_head *fid_list = dentry->d_fsdata;
102 struct v9fs_fid *temp = NULL; 103 struct v9fs_fid *temp = NULL;
103 struct v9fs_fid *current_fid = NULL; 104 struct v9fs_fid *current_fid = NULL;
104 struct v9fs_fcall *fcall = NULL;
105 105
106 list_for_each_entry_safe(current_fid, temp, fid_list, list) { 106 list_for_each_entry_safe(current_fid, temp, fid_list, list) {
107 if (v9fs_t_clunk 107 err = v9fs_t_clunk(current_fid->v9ses, current_fid->fid);
108 (current_fid->v9ses, current_fid->fid, &fcall))
109 dprintk(DEBUG_ERROR, "clunk failed: %s\n",
110 FCALL_ERROR(fcall));
111 108
112 v9fs_put_idpool(current_fid->fid, 109 if (err < 0)
113 &current_fid->v9ses->fidpool); 110 dprintk(DEBUG_ERROR, "clunk failed: %d name %s\n",
111 err, dentry->d_iname);
114 112
115 kfree(fcall);
116 v9fs_fid_destroy(current_fid); 113 v9fs_fid_destroy(current_fid);
117 } 114 }
118 115
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 57a43b8feef5..ae6d032b9b59 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -37,8 +37,8 @@
37#include "debug.h" 37#include "debug.h"
38#include "v9fs.h" 38#include "v9fs.h"
39#include "9p.h" 39#include "9p.h"
40#include "v9fs_vfs.h"
41#include "conv.h" 40#include "conv.h"
41#include "v9fs_vfs.h"
42#include "fid.h" 42#include "fid.h"
43 43
44/** 44/**
@@ -74,20 +74,16 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
74 struct inode *inode = filp->f_dentry->d_inode; 74 struct inode *inode = filp->f_dentry->d_inode;
75 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); 75 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
76 struct v9fs_fid *file = filp->private_data; 76 struct v9fs_fid *file = filp->private_data;
77 unsigned int i, n; 77 unsigned int i, n, s;
78 int fid = -1; 78 int fid = -1;
79 int ret = 0; 79 int ret = 0;
80 struct v9fs_stat *mi = NULL; 80 struct v9fs_stat stat;
81 int over = 0; 81 int over = 0;
82 82
83 dprintk(DEBUG_VFS, "name %s\n", filp->f_dentry->d_name.name); 83 dprintk(DEBUG_VFS, "name %s\n", filp->f_dentry->d_name.name);
84 84
85 fid = file->fid; 85 fid = file->fid;
86 86
87 mi = kmalloc(v9ses->maxdata, GFP_KERNEL);
88 if (!mi)
89 return -ENOMEM;
90
91 if (file->rdir_fcall && (filp->f_pos != file->rdir_pos)) { 87 if (file->rdir_fcall && (filp->f_pos != file->rdir_pos)) {
92 kfree(file->rdir_fcall); 88 kfree(file->rdir_fcall);
93 file->rdir_fcall = NULL; 89 file->rdir_fcall = NULL;
@@ -97,20 +93,20 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
97 n = file->rdir_fcall->params.rread.count; 93 n = file->rdir_fcall->params.rread.count;
98 i = file->rdir_fpos; 94 i = file->rdir_fpos;
99 while (i < n) { 95 while (i < n) {
100 int s = v9fs_deserialize_stat(v9ses, 96 s = v9fs_deserialize_stat(
101 file->rdir_fcall->params.rread.data + i, 97 file->rdir_fcall->params.rread.data + i,
102 n - i, mi, v9ses->maxdata); 98 n - i, &stat, v9ses->extended);
103 99
104 if (s == 0) { 100 if (s == 0) {
105 dprintk(DEBUG_ERROR, 101 dprintk(DEBUG_ERROR,
106 "error while deserializing mistat\n"); 102 "error while deserializing stat\n");
107 ret = -EIO; 103 ret = -EIO;
108 goto FreeStructs; 104 goto FreeStructs;
109 } 105 }
110 106
111 over = filldir(dirent, mi->name, strlen(mi->name), 107 over = filldir(dirent, stat.name.str, stat.name.len,
112 filp->f_pos, v9fs_qid2ino(&mi->qid), 108 filp->f_pos, v9fs_qid2ino(&stat.qid),
113 dt_type(mi)); 109 dt_type(&stat));
114 110
115 if (over) { 111 if (over) {
116 file->rdir_fpos = i; 112 file->rdir_fpos = i;
@@ -130,7 +126,7 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
130 126
131 while (!over) { 127 while (!over) {
132 ret = v9fs_t_read(v9ses, fid, filp->f_pos, 128 ret = v9fs_t_read(v9ses, fid, filp->f_pos,
133 v9ses->maxdata-V9FS_IOHDRSZ, &fcall); 129 v9ses->maxdata-V9FS_IOHDRSZ, &fcall);
134 if (ret < 0) { 130 if (ret < 0) {
135 dprintk(DEBUG_ERROR, "error while reading: %d: %p\n", 131 dprintk(DEBUG_ERROR, "error while reading: %d: %p\n",
136 ret, fcall); 132 ret, fcall);
@@ -141,19 +137,18 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
141 n = ret; 137 n = ret;
142 i = 0; 138 i = 0;
143 while (i < n) { 139 while (i < n) {
144 int s = v9fs_deserialize_stat(v9ses, 140 s = v9fs_deserialize_stat(fcall->params.rread.data + i,
145 fcall->params.rread.data + i, n - i, mi, 141 n - i, &stat, v9ses->extended);
146 v9ses->maxdata);
147 142
148 if (s == 0) { 143 if (s == 0) {
149 dprintk(DEBUG_ERROR, 144 dprintk(DEBUG_ERROR,
150 "error while deserializing mistat\n"); 145 "error while deserializing stat\n");
151 return -EIO; 146 return -EIO;
152 } 147 }
153 148
154 over = filldir(dirent, mi->name, strlen(mi->name), 149 over = filldir(dirent, stat.name.str, stat.name.len,
155 filp->f_pos, v9fs_qid2ino(&mi->qid), 150 filp->f_pos, v9fs_qid2ino(&stat.qid),
156 dt_type(mi)); 151 dt_type(&stat));
157 152
158 if (over) { 153 if (over) {
159 file->rdir_fcall = fcall; 154 file->rdir_fcall = fcall;
@@ -172,7 +167,6 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
172 167
173 FreeStructs: 168 FreeStructs:
174 kfree(fcall); 169 kfree(fcall);
175 kfree(mi);
176 return ret; 170 return ret;
177} 171}
178 172
@@ -193,18 +187,15 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
193 fid->fid); 187 fid->fid);
194 fidnum = fid->fid; 188 fidnum = fid->fid;
195 189
196 filemap_fdatawrite(inode->i_mapping); 190 filemap_write_and_wait(inode->i_mapping);
197 filemap_fdatawait(inode->i_mapping);
198 191
199 if (fidnum >= 0) { 192 if (fidnum >= 0) {
200 dprintk(DEBUG_VFS, "fidopen: %d v9f->fid: %d\n", fid->fidopen, 193 dprintk(DEBUG_VFS, "fidopen: %d v9f->fid: %d\n", fid->fidopen,
201 fid->fid); 194 fid->fid);
202 195
203 if (v9fs_t_clunk(v9ses, fidnum, NULL)) 196 if (v9fs_t_clunk(v9ses, fidnum))
204 dprintk(DEBUG_ERROR, "clunk failed\n"); 197 dprintk(DEBUG_ERROR, "clunk failed\n");
205 198
206 v9fs_put_idpool(fid->fid, &v9ses->fidpool);
207
208 kfree(fid->rdir_fcall); 199 kfree(fid->rdir_fcall);
209 kfree(fid); 200 kfree(fid);
210 201
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 89c849da8504..6852f0eb96ed 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -32,6 +32,7 @@
32#include <linux/string.h> 32#include <linux/string.h>
33#include <linux/smp_lock.h> 33#include <linux/smp_lock.h>
34#include <linux/inet.h> 34#include <linux/inet.h>
35#include <linux/version.h>
35#include <linux/list.h> 36#include <linux/list.h>
36#include <asm/uaccess.h> 37#include <asm/uaccess.h>
37#include <linux/idr.h> 38#include <linux/idr.h>
@@ -117,9 +118,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
117 118
118 result = v9fs_t_open(v9ses, newfid, open_mode, &fcall); 119 result = v9fs_t_open(v9ses, newfid, open_mode, &fcall);
119 if (result < 0) { 120 if (result < 0) {
120 dprintk(DEBUG_ERROR, 121 PRINT_FCALL_ERROR("open failed", fcall);
121 "open failed, open_mode 0x%x: %s\n", open_mode,
122 FCALL_ERROR(fcall));
123 kfree(fcall); 122 kfree(fcall);
124 return result; 123 return result;
125 } 124 }
@@ -165,8 +164,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
165 return -ENOLCK; 164 return -ENOLCK;
166 165
167 if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) { 166 if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
168 filemap_fdatawrite(inode->i_mapping); 167 filemap_write_and_wait(inode->i_mapping);
169 filemap_fdatawait(inode->i_mapping);
170 invalidate_inode_pages(&inode->i_data); 168 invalidate_inode_pages(&inode->i_data);
171 } 169 }
172 170
@@ -257,7 +255,6 @@ v9fs_file_write(struct file *filp, const char __user * data,
257 int result = -EIO; 255 int result = -EIO;
258 int rsize = 0; 256 int rsize = 0;
259 int total = 0; 257 int total = 0;
260 char *buf;
261 258
262 dprintk(DEBUG_VFS, "data %p count %d offset %x\n", data, (int)count, 259 dprintk(DEBUG_VFS, "data %p count %d offset %x\n", data, (int)count,
263 (int)*offset); 260 (int)*offset);
@@ -265,28 +262,14 @@ v9fs_file_write(struct file *filp, const char __user * data,
265 if (v9fid->iounit != 0 && rsize > v9fid->iounit) 262 if (v9fid->iounit != 0 && rsize > v9fid->iounit)
266 rsize = v9fid->iounit; 263 rsize = v9fid->iounit;
267 264
268 buf = kmalloc(v9ses->maxdata - V9FS_IOHDRSZ, GFP_KERNEL);
269 if (!buf)
270 return -ENOMEM;
271
272 do { 265 do {
273 if (count < rsize) 266 if (count < rsize)
274 rsize = count; 267 rsize = count;
275 268
276 result = copy_from_user(buf, data, rsize); 269 result = v9fs_t_write(v9ses, fid, *offset, rsize, data, &fcall);
277 if (result) {
278 dprintk(DEBUG_ERROR, "Problem copying from user\n");
279 kfree(buf);
280 return -EFAULT;
281 }
282
283 dump_data(buf, rsize);
284 result = v9fs_t_write(v9ses, fid, *offset, rsize, buf, &fcall);
285 if (result < 0) { 270 if (result < 0) {
286 eprintk(KERN_ERR, "error while writing: %s(%d)\n", 271 PRINT_FCALL_ERROR("error while writing", fcall);
287 FCALL_ERROR(fcall), result);
288 kfree(fcall); 272 kfree(fcall);
289 kfree(buf);
290 return result; 273 return result;
291 } else 274 } else
292 *offset += result; 275 *offset += result;
@@ -306,7 +289,6 @@ v9fs_file_write(struct file *filp, const char __user * data,
306 total += result; 289 total += result;
307 } while (count); 290 } while (count);
308 291
309 kfree(buf);
310 return total; 292 return total;
311} 293}
312 294
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 0ea965c3bb7d..d933ef1fbd8a 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -40,7 +40,6 @@
40#include "v9fs.h" 40#include "v9fs.h"
41#include "9p.h" 41#include "9p.h"
42#include "v9fs_vfs.h" 42#include "v9fs_vfs.h"
43#include "conv.h"
44#include "fid.h" 43#include "fid.h"
45 44
46static struct inode_operations v9fs_dir_inode_operations; 45static struct inode_operations v9fs_dir_inode_operations;
@@ -127,100 +126,32 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
127} 126}
128 127
129/** 128/**
130 * v9fs_blank_mistat - helper function to setup a 9P stat structure 129 * v9fs_blank_wstat - helper function to setup a 9P stat structure
131 * @v9ses: 9P session info (for determining extended mode) 130 * @v9ses: 9P session info (for determining extended mode)
132 * @mistat: structure to initialize 131 * @wstat: structure to initialize
133 * 132 *
134 */ 133 */
135 134
136static void 135static void
137v9fs_blank_mistat(struct v9fs_session_info *v9ses, struct v9fs_stat *mistat) 136v9fs_blank_wstat(struct v9fs_wstat *wstat)
138{ 137{
139 mistat->type = ~0; 138 wstat->type = ~0;
140 mistat->dev = ~0; 139 wstat->dev = ~0;
141 mistat->qid.type = ~0; 140 wstat->qid.type = ~0;
142 mistat->qid.version = ~0; 141 wstat->qid.version = ~0;
143 *((long long *)&mistat->qid.path) = ~0; 142 *((long long *)&wstat->qid.path) = ~0;
144 mistat->mode = ~0; 143 wstat->mode = ~0;
145 mistat->atime = ~0; 144 wstat->atime = ~0;
146 mistat->mtime = ~0; 145 wstat->mtime = ~0;
147 mistat->length = ~0; 146 wstat->length = ~0;
148 mistat->name = mistat->data; 147 wstat->name = NULL;
149 mistat->uid = mistat->data; 148 wstat->uid = NULL;
150 mistat->gid = mistat->data; 149 wstat->gid = NULL;
151 mistat->muid = mistat->data; 150 wstat->muid = NULL;
152 if (v9ses->extended) { 151 wstat->n_uid = ~0;
153 mistat->n_uid = ~0; 152 wstat->n_gid = ~0;
154 mistat->n_gid = ~0; 153 wstat->n_muid = ~0;
155 mistat->n_muid = ~0; 154 wstat->extension = NULL;
156 mistat->extension = mistat->data;
157 }
158 *mistat->data = 0;
159}
160
161/**
162 * v9fs_mistat2unix - convert mistat to unix stat
163 * @mistat: Plan 9 metadata (mistat) structure
164 * @buf: unix metadata (stat) structure to populate
165 * @sb: superblock
166 *
167 */
168
169static void
170v9fs_mistat2unix(struct v9fs_stat *mistat, struct stat *buf,
171 struct super_block *sb)
172{
173 struct v9fs_session_info *v9ses = sb ? sb->s_fs_info : NULL;
174
175 buf->st_nlink = 1;
176
177 buf->st_atime = mistat->atime;
178 buf->st_mtime = mistat->mtime;
179 buf->st_ctime = mistat->mtime;
180
181 buf->st_uid = (unsigned short)-1;
182 buf->st_gid = (unsigned short)-1;
183
184 if (v9ses && v9ses->extended) {
185 /* TODO: string to uid mapping via user-space daemon */
186 if (mistat->n_uid != -1)
187 sscanf(mistat->uid, "%x", (unsigned int *)&buf->st_uid);
188
189 if (mistat->n_gid != -1)
190 sscanf(mistat->gid, "%x", (unsigned int *)&buf->st_gid);
191 }
192
193 if (buf->st_uid == (unsigned short)-1)
194 buf->st_uid = v9ses->uid;
195 if (buf->st_gid == (unsigned short)-1)
196 buf->st_gid = v9ses->gid;
197
198 buf->st_mode = p9mode2unixmode(v9ses, mistat->mode);
199 if ((S_ISBLK(buf->st_mode)) || (S_ISCHR(buf->st_mode))) {
200 char type = 0;
201 int major = -1;
202 int minor = -1;
203 sscanf(mistat->extension, "%c %u %u", &type, &major, &minor);
204 switch (type) {
205 case 'c':
206 buf->st_mode &= ~S_IFBLK;
207 buf->st_mode |= S_IFCHR;
208 break;
209 case 'b':
210 break;
211 default:
212 dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n",
213 type, mistat->extension);
214 };
215 buf->st_rdev = MKDEV(major, minor);
216 } else
217 buf->st_rdev = 0;
218
219 buf->st_size = mistat->length;
220
221 buf->st_blksize = sb->s_blocksize;
222 buf->st_blocks =
223 (buf->st_size + buf->st_blksize - 1) >> sb->s_blocksize_bits;
224} 155}
225 156
226/** 157/**
@@ -312,12 +243,12 @@ v9fs_create(struct inode *dir,
312 struct inode *file_inode = NULL; 243 struct inode *file_inode = NULL;
313 struct v9fs_fcall *fcall = NULL; 244 struct v9fs_fcall *fcall = NULL;
314 struct v9fs_qid qid; 245 struct v9fs_qid qid;
315 struct stat newstat;
316 int dirfidnum = -1; 246 int dirfidnum = -1;
317 long newfid = -1; 247 long newfid = -1;
318 int result = 0; 248 int result = 0;
319 unsigned int iounit = 0; 249 unsigned int iounit = 0;
320 int wfidno = -1; 250 int wfidno = -1;
251 int err;
321 252
322 perm = unixmode2p9mode(v9ses, perm); 253 perm = unixmode2p9mode(v9ses, perm);
323 254
@@ -349,57 +280,64 @@ v9fs_create(struct inode *dir,
349 280
350 result = v9fs_t_walk(v9ses, dirfidnum, newfid, NULL, &fcall); 281 result = v9fs_t_walk(v9ses, dirfidnum, newfid, NULL, &fcall);
351 if (result < 0) { 282 if (result < 0) {
352 dprintk(DEBUG_ERROR, "clone error: %s\n", FCALL_ERROR(fcall)); 283 PRINT_FCALL_ERROR("clone error", fcall);
353 v9fs_put_idpool(newfid, &v9ses->fidpool); 284 v9fs_put_idpool(newfid, &v9ses->fidpool);
354 newfid = -1; 285 newfid = -1;
355 goto CleanUpFid; 286 goto CleanUpFid;
356 } 287 }
357 288
358 kfree(fcall); 289 kfree(fcall);
290 fcall = NULL;
359 291
360 result = v9fs_t_create(v9ses, newfid, (char *)file_dentry->d_name.name, 292 result = v9fs_t_create(v9ses, newfid, (char *)file_dentry->d_name.name,
361 perm, open_mode, &fcall); 293 perm, open_mode, &fcall);
362 if (result < 0) { 294 if (result < 0) {
363 dprintk(DEBUG_ERROR, "create fails: %s(%d)\n", 295 PRINT_FCALL_ERROR("create fails", fcall);
364 FCALL_ERROR(fcall), result);
365
366 goto CleanUpFid; 296 goto CleanUpFid;
367 } 297 }
368 298
369 iounit = fcall->params.rcreate.iounit; 299 iounit = fcall->params.rcreate.iounit;
370 qid = fcall->params.rcreate.qid; 300 qid = fcall->params.rcreate.qid;
371 kfree(fcall); 301 kfree(fcall);
302 fcall = NULL;
372 303
373 fid = v9fs_fid_create(file_dentry, v9ses, newfid, 1); 304 if (!(perm&V9FS_DMDIR)) {
374 dprintk(DEBUG_VFS, "fid %p %d\n", fid, fid->fidcreate); 305 fid = v9fs_fid_create(file_dentry, v9ses, newfid, 1);
375 if (!fid) { 306 dprintk(DEBUG_VFS, "fid %p %d\n", fid, fid->fidcreate);
376 result = -ENOMEM; 307 if (!fid) {
377 goto CleanUpFid; 308 result = -ENOMEM;
378 } 309 goto CleanUpFid;
310 }
379 311
380 fid->qid = qid; 312 fid->qid = qid;
381 fid->iounit = iounit; 313 fid->iounit = iounit;
314 } else {
315 err = v9fs_t_clunk(v9ses, newfid);
316 newfid = -1;
317 if (err < 0)
318 dprintk(DEBUG_ERROR, "clunk for mkdir failed: %d\n", err);
319 }
382 320
383 /* walk to the newly created file and put the fid in the dentry */ 321 /* walk to the newly created file and put the fid in the dentry */
384 wfidno = v9fs_get_idpool(&v9ses->fidpool); 322 wfidno = v9fs_get_idpool(&v9ses->fidpool);
385 if (newfid < 0) { 323 if (wfidno < 0) {
386 eprintk(KERN_WARNING, "no free fids available\n"); 324 eprintk(KERN_WARNING, "no free fids available\n");
387 return -ENOSPC; 325 return -ENOSPC;
388 } 326 }
389 327
390 result = v9fs_t_walk(v9ses, dirfidnum, wfidno, 328 result = v9fs_t_walk(v9ses, dirfidnum, wfidno,
391 (char *) file_dentry->d_name.name, NULL); 329 (char *) file_dentry->d_name.name, &fcall);
392 if (result < 0) { 330 if (result < 0) {
393 dprintk(DEBUG_ERROR, "clone error: %s\n", FCALL_ERROR(fcall)); 331 PRINT_FCALL_ERROR("clone error", fcall);
394 v9fs_put_idpool(wfidno, &v9ses->fidpool); 332 v9fs_put_idpool(wfidno, &v9ses->fidpool);
395 wfidno = -1; 333 wfidno = -1;
396 goto CleanUpFid; 334 goto CleanUpFid;
397 } 335 }
336 kfree(fcall);
337 fcall = NULL;
398 338
399 if (!v9fs_fid_create(file_dentry, v9ses, wfidno, 0)) { 339 if (!v9fs_fid_create(file_dentry, v9ses, wfidno, 0)) {
400 if (!v9fs_t_clunk(v9ses, newfid, &fcall)) { 340 v9fs_put_idpool(wfidno, &v9ses->fidpool);
401 v9fs_put_idpool(wfidno, &v9ses->fidpool);
402 }
403 341
404 goto CleanUpFid; 342 goto CleanUpFid;
405 } 343 }
@@ -409,62 +347,43 @@ v9fs_create(struct inode *dir,
409 (perm & V9FS_DMDEVICE)) 347 (perm & V9FS_DMDEVICE))
410 return 0; 348 return 0;
411 349
412 result = v9fs_t_stat(v9ses, newfid, &fcall); 350 result = v9fs_t_stat(v9ses, wfidno, &fcall);
413 if (result < 0) { 351 if (result < 0) {
414 dprintk(DEBUG_ERROR, "stat error: %s(%d)\n", FCALL_ERROR(fcall), 352 PRINT_FCALL_ERROR("stat error", fcall);
415 result);
416 goto CleanUpFid; 353 goto CleanUpFid;
417 } 354 }
418 355
419 v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb);
420 356
421 file_inode = v9fs_get_inode(sb, newstat.st_mode); 357 file_inode = v9fs_get_inode(sb,
358 p9mode2unixmode(v9ses, fcall->params.rstat.stat.mode));
359
422 if ((!file_inode) || IS_ERR(file_inode)) { 360 if ((!file_inode) || IS_ERR(file_inode)) {
423 dprintk(DEBUG_ERROR, "create inode failed\n"); 361 dprintk(DEBUG_ERROR, "create inode failed\n");
424 result = -EBADF; 362 result = -EBADF;
425 goto CleanUpFid; 363 goto CleanUpFid;
426 } 364 }
427 365
428 v9fs_mistat2inode(fcall->params.rstat.stat, file_inode, sb); 366 v9fs_stat2inode(&fcall->params.rstat.stat, file_inode, sb);
429 kfree(fcall); 367 kfree(fcall);
430 fcall = NULL; 368 fcall = NULL;
431 file_dentry->d_op = &v9fs_dentry_operations; 369 file_dentry->d_op = &v9fs_dentry_operations;
432 d_instantiate(file_dentry, file_inode); 370 d_instantiate(file_dentry, file_inode);
433 371
434 if (perm & V9FS_DMDIR) {
435 if (!v9fs_t_clunk(v9ses, newfid, &fcall))
436 v9fs_put_idpool(newfid, &v9ses->fidpool);
437 else
438 dprintk(DEBUG_ERROR, "clunk for mkdir failed: %s\n",
439 FCALL_ERROR(fcall));
440 kfree(fcall);
441 fid->fidopen = 0;
442 fid->fidcreate = 0;
443 d_drop(file_dentry);
444 }
445
446 return 0; 372 return 0;
447 373
448 CleanUpFid: 374 CleanUpFid:
449 kfree(fcall); 375 kfree(fcall);
376 fcall = NULL;
450 377
451 if (newfid >= 0) { 378 if (newfid >= 0) {
452 if (!v9fs_t_clunk(v9ses, newfid, &fcall)) 379 err = v9fs_t_clunk(v9ses, newfid);
453 v9fs_put_idpool(newfid, &v9ses->fidpool); 380 if (err < 0)
454 else 381 dprintk(DEBUG_ERROR, "clunk failed: %d\n", err);
455 dprintk(DEBUG_ERROR, "clunk failed: %s\n",
456 FCALL_ERROR(fcall));
457
458 kfree(fcall);
459 } 382 }
460 if (wfidno >= 0) { 383 if (wfidno >= 0) {
461 if (!v9fs_t_clunk(v9ses, wfidno, &fcall)) 384 err = v9fs_t_clunk(v9ses, wfidno);
462 v9fs_put_idpool(wfidno, &v9ses->fidpool); 385 if (err < 0)
463 else 386 dprintk(DEBUG_ERROR, "clunk failed: %d\n", err);
464 dprintk(DEBUG_ERROR, "clunk failed: %s\n",
465 FCALL_ERROR(fcall));
466
467 kfree(fcall);
468 } 387 }
469 return result; 388 return result;
470} 389}
@@ -509,10 +428,9 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
509 } 428 }
510 429
511 result = v9fs_t_remove(v9ses, fid, &fcall); 430 result = v9fs_t_remove(v9ses, fid, &fcall);
512 if (result < 0) 431 if (result < 0) {
513 dprintk(DEBUG_ERROR, "remove of file fails: %s(%d)\n", 432 PRINT_FCALL_ERROR("remove fails", fcall);
514 FCALL_ERROR(fcall), result); 433 } else {
515 else {
516 v9fs_put_idpool(fid, &v9ses->fidpool); 434 v9fs_put_idpool(fid, &v9ses->fidpool);
517 v9fs_fid_destroy(v9fid); 435 v9fs_fid_destroy(v9fid);
518 } 436 }
@@ -567,7 +485,6 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
567 struct v9fs_fid *fid; 485 struct v9fs_fid *fid;
568 struct inode *inode; 486 struct inode *inode;
569 struct v9fs_fcall *fcall = NULL; 487 struct v9fs_fcall *fcall = NULL;
570 struct stat newstat;
571 int dirfidnum = -1; 488 int dirfidnum = -1;
572 int newfid = -1; 489 int newfid = -1;
573 int result = 0; 490 int result = 0;
@@ -620,8 +537,8 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
620 goto FreeFcall; 537 goto FreeFcall;
621 } 538 }
622 539
623 v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb); 540 inode = v9fs_get_inode(sb, p9mode2unixmode(v9ses,
624 inode = v9fs_get_inode(sb, newstat.st_mode); 541 fcall->params.rstat.stat.mode));
625 542
626 if (IS_ERR(inode) && (PTR_ERR(inode) == -ENOSPC)) { 543 if (IS_ERR(inode) && (PTR_ERR(inode) == -ENOSPC)) {
627 eprintk(KERN_WARNING, "inode alloc failes, returns %ld\n", 544 eprintk(KERN_WARNING, "inode alloc failes, returns %ld\n",
@@ -631,7 +548,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
631 goto FreeFcall; 548 goto FreeFcall;
632 } 549 }
633 550
634 inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat->qid); 551 inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat.qid);
635 552
636 fid = v9fs_fid_create(dentry, v9ses, newfid, 0); 553 fid = v9fs_fid_create(dentry, v9ses, newfid, 0);
637 if (fid == NULL) { 554 if (fid == NULL) {
@@ -640,10 +557,10 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
640 goto FreeFcall; 557 goto FreeFcall;
641 } 558 }
642 559
643 fid->qid = fcall->params.rstat.stat->qid; 560 fid->qid = fcall->params.rstat.stat.qid;
644 561
645 dentry->d_op = &v9fs_dentry_operations; 562 dentry->d_op = &v9fs_dentry_operations;
646 v9fs_mistat2inode(fcall->params.rstat.stat, inode, inode->i_sb); 563 v9fs_stat2inode(&fcall->params.rstat.stat, inode, inode->i_sb);
647 564
648 d_add(dentry, inode); 565 d_add(dentry, inode);
649 kfree(fcall); 566 kfree(fcall);
@@ -699,7 +616,7 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
699 v9fs_fid_lookup(old_dentry->d_parent); 616 v9fs_fid_lookup(old_dentry->d_parent);
700 struct v9fs_fid *newdirfid = 617 struct v9fs_fid *newdirfid =
701 v9fs_fid_lookup(new_dentry->d_parent); 618 v9fs_fid_lookup(new_dentry->d_parent);
702 struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL); 619 struct v9fs_wstat wstat;
703 struct v9fs_fcall *fcall = NULL; 620 struct v9fs_fcall *fcall = NULL;
704 int fid = -1; 621 int fid = -1;
705 int olddirfidnum = -1; 622 int olddirfidnum = -1;
@@ -708,9 +625,6 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
708 625
709 dprintk(DEBUG_VFS, "\n"); 626 dprintk(DEBUG_VFS, "\n");
710 627
711 if (!mistat)
712 return -ENOMEM;
713
714 if ((!oldfid) || (!olddirfid) || (!newdirfid)) { 628 if ((!oldfid) || (!olddirfid) || (!newdirfid)) {
715 dprintk(DEBUG_ERROR, "problem with arguments\n"); 629 dprintk(DEBUG_ERROR, "problem with arguments\n");
716 return -EBADF; 630 return -EBADF;
@@ -734,26 +648,15 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
734 goto FreeFcallnBail; 648 goto FreeFcallnBail;
735 } 649 }
736 650
737 v9fs_blank_mistat(v9ses, mistat); 651 v9fs_blank_wstat(&wstat);
652 wstat.muid = v9ses->name;
653 wstat.name = (char *) new_dentry->d_name.name;
738 654
739 strcpy(mistat->data + 1, v9ses->name); 655 retval = v9fs_t_wstat(v9ses, fid, &wstat, &fcall);
740 mistat->name = mistat->data + 1 + strlen(v9ses->name);
741
742 if (new_dentry->d_name.len >
743 (v9ses->maxdata - strlen(v9ses->name) - sizeof(struct v9fs_stat))) {
744 dprintk(DEBUG_ERROR, "new name too long\n");
745 goto FreeFcallnBail;
746 }
747
748 strcpy(mistat->name, new_dentry->d_name.name);
749 retval = v9fs_t_wstat(v9ses, fid, mistat, &fcall);
750 656
751 FreeFcallnBail: 657 FreeFcallnBail:
752 kfree(mistat);
753
754 if (retval < 0) 658 if (retval < 0)
755 dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n", 659 PRINT_FCALL_ERROR("wstat error", fcall);
756 FCALL_ERROR(fcall));
757 660
758 kfree(fcall); 661 kfree(fcall);
759 return retval; 662 return retval;
@@ -788,7 +691,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
788 if (err < 0) 691 if (err < 0)
789 dprintk(DEBUG_ERROR, "stat error\n"); 692 dprintk(DEBUG_ERROR, "stat error\n");
790 else { 693 else {
791 v9fs_mistat2inode(fcall->params.rstat.stat, dentry->d_inode, 694 v9fs_stat2inode(&fcall->params.rstat.stat, dentry->d_inode,
792 dentry->d_inode->i_sb); 695 dentry->d_inode->i_sb);
793 generic_fillattr(dentry->d_inode, stat); 696 generic_fillattr(dentry->d_inode, stat);
794 } 697 }
@@ -809,57 +712,44 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
809 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); 712 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
810 struct v9fs_fid *fid = v9fs_fid_lookup(dentry); 713 struct v9fs_fid *fid = v9fs_fid_lookup(dentry);
811 struct v9fs_fcall *fcall = NULL; 714 struct v9fs_fcall *fcall = NULL;
812 struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL); 715 struct v9fs_wstat wstat;
813 int res = -EPERM; 716 int res = -EPERM;
814 717
815 dprintk(DEBUG_VFS, "\n"); 718 dprintk(DEBUG_VFS, "\n");
816 719
817 if (!mistat)
818 return -ENOMEM;
819
820 if (!fid) { 720 if (!fid) {
821 dprintk(DEBUG_ERROR, 721 dprintk(DEBUG_ERROR,
822 "Couldn't find fid associated with dentry\n"); 722 "Couldn't find fid associated with dentry\n");
823 return -EBADF; 723 return -EBADF;
824 } 724 }
825 725
826 v9fs_blank_mistat(v9ses, mistat); 726 v9fs_blank_wstat(&wstat);
827 if (iattr->ia_valid & ATTR_MODE) 727 if (iattr->ia_valid & ATTR_MODE)
828 mistat->mode = unixmode2p9mode(v9ses, iattr->ia_mode); 728 wstat.mode = unixmode2p9mode(v9ses, iattr->ia_mode);
829 729
830 if (iattr->ia_valid & ATTR_MTIME) 730 if (iattr->ia_valid & ATTR_MTIME)
831 mistat->mtime = iattr->ia_mtime.tv_sec; 731 wstat.mtime = iattr->ia_mtime.tv_sec;
832 732
833 if (iattr->ia_valid & ATTR_ATIME) 733 if (iattr->ia_valid & ATTR_ATIME)
834 mistat->atime = iattr->ia_atime.tv_sec; 734 wstat.atime = iattr->ia_atime.tv_sec;
835 735
836 if (iattr->ia_valid & ATTR_SIZE) 736 if (iattr->ia_valid & ATTR_SIZE)
837 mistat->length = iattr->ia_size; 737 wstat.length = iattr->ia_size;
838 738
839 if (v9ses->extended) { 739 if (v9ses->extended) {
840 char *ptr = mistat->data+1; 740 if (iattr->ia_valid & ATTR_UID)
841 741 wstat.n_uid = iattr->ia_uid;
842 if (iattr->ia_valid & ATTR_UID) {
843 mistat->uid = ptr;
844 ptr += 1+sprintf(ptr, "%08x", iattr->ia_uid);
845 mistat->n_uid = iattr->ia_uid;
846 }
847 742
848 if (iattr->ia_valid & ATTR_GID) { 743 if (iattr->ia_valid & ATTR_GID)
849 mistat->gid = ptr; 744 wstat.n_gid = iattr->ia_gid;
850 ptr += 1+sprintf(ptr, "%08x", iattr->ia_gid);
851 mistat->n_gid = iattr->ia_gid;
852 }
853 } 745 }
854 746
855 res = v9fs_t_wstat(v9ses, fid->fid, mistat, &fcall); 747 res = v9fs_t_wstat(v9ses, fid->fid, &wstat, &fcall);
856 748
857 if (res < 0) 749 if (res < 0)
858 dprintk(DEBUG_ERROR, "wstat error: %s\n", FCALL_ERROR(fcall)); 750 PRINT_FCALL_ERROR("wstat error", fcall);
859 751
860 kfree(mistat);
861 kfree(fcall); 752 kfree(fcall);
862
863 if (res >= 0) 753 if (res >= 0)
864 res = inode_setattr(dentry->d_inode, iattr); 754 res = inode_setattr(dentry->d_inode, iattr);
865 755
@@ -867,51 +757,47 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
867} 757}
868 758
869/** 759/**
870 * v9fs_mistat2inode - populate an inode structure with mistat info 760 * v9fs_stat2inode - populate an inode structure with mistat info
871 * @mistat: Plan 9 metadata (mistat) structure 761 * @stat: Plan 9 metadata (mistat) structure
872 * @inode: inode to populate 762 * @inode: inode to populate
873 * @sb: superblock of filesystem 763 * @sb: superblock of filesystem
874 * 764 *
875 */ 765 */
876 766
877void 767void
878v9fs_mistat2inode(struct v9fs_stat *mistat, struct inode *inode, 768v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode,
879 struct super_block *sb) 769 struct super_block *sb)
880{ 770{
771 int n;
772 char ext[32];
881 struct v9fs_session_info *v9ses = sb->s_fs_info; 773 struct v9fs_session_info *v9ses = sb->s_fs_info;
882 774
883 inode->i_nlink = 1; 775 inode->i_nlink = 1;
884 776
885 inode->i_atime.tv_sec = mistat->atime; 777 inode->i_atime.tv_sec = stat->atime;
886 inode->i_mtime.tv_sec = mistat->mtime; 778 inode->i_mtime.tv_sec = stat->mtime;
887 inode->i_ctime.tv_sec = mistat->mtime; 779 inode->i_ctime.tv_sec = stat->mtime;
888 780
889 inode->i_uid = -1; 781 inode->i_uid = v9ses->uid;
890 inode->i_gid = -1; 782 inode->i_gid = v9ses->gid;
891 783
892 if (v9ses->extended) { 784 if (v9ses->extended) {
893 /* TODO: string to uid mapping via user-space daemon */ 785 inode->i_uid = stat->n_uid;
894 inode->i_uid = mistat->n_uid; 786 inode->i_gid = stat->n_gid;
895 inode->i_gid = mistat->n_gid;
896
897 if (mistat->n_uid == -1)
898 sscanf(mistat->uid, "%x", &inode->i_uid);
899
900 if (mistat->n_gid == -1)
901 sscanf(mistat->gid, "%x", &inode->i_gid);
902 } 787 }
903 788
904 if (inode->i_uid == -1) 789 inode->i_mode = p9mode2unixmode(v9ses, stat->mode);
905 inode->i_uid = v9ses->uid;
906 if (inode->i_gid == -1)
907 inode->i_gid = v9ses->gid;
908
909 inode->i_mode = p9mode2unixmode(v9ses, mistat->mode);
910 if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) { 790 if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) {
911 char type = 0; 791 char type = 0;
912 int major = -1; 792 int major = -1;
913 int minor = -1; 793 int minor = -1;
914 sscanf(mistat->extension, "%c %u %u", &type, &major, &minor); 794
795 n = stat->extension.len;
796 if (n > sizeof(ext)-1)
797 n = sizeof(ext)-1;
798 memmove(ext, stat->extension.str, n);
799 ext[n] = 0;
800 sscanf(ext, "%c %u %u", &type, &major, &minor);
915 switch (type) { 801 switch (type) {
916 case 'c': 802 case 'c':
917 inode->i_mode &= ~S_IFBLK; 803 inode->i_mode &= ~S_IFBLK;
@@ -920,14 +806,14 @@ v9fs_mistat2inode(struct v9fs_stat *mistat, struct inode *inode,
920 case 'b': 806 case 'b':
921 break; 807 break;
922 default: 808 default:
923 dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n", 809 dprintk(DEBUG_ERROR, "Unknown special type %c (%.*s)\n",
924 type, mistat->extension); 810 type, stat->extension.len, stat->extension.str);
925 }; 811 };
926 inode->i_rdev = MKDEV(major, minor); 812 inode->i_rdev = MKDEV(major, minor);
927 } else 813 } else
928 inode->i_rdev = 0; 814 inode->i_rdev = 0;
929 815
930 inode->i_size = mistat->length; 816 inode->i_size = stat->length;
931 817
932 inode->i_blksize = sb->s_blocksize; 818 inode->i_blksize = sb->s_blocksize;
933 inode->i_blocks = 819 inode->i_blocks =
@@ -955,71 +841,6 @@ ino_t v9fs_qid2ino(struct v9fs_qid *qid)
955} 841}
956 842
957/** 843/**
958 * v9fs_vfs_symlink - helper function to create symlinks
959 * @dir: directory inode containing symlink
960 * @dentry: dentry for symlink
961 * @symname: symlink data
962 *
963 * See 9P2000.u RFC for more information
964 *
965 */
966
967static int
968v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
969{
970 int retval = -EPERM;
971 struct v9fs_fid *newfid;
972 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
973 struct v9fs_fcall *fcall = NULL;
974 struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
975
976 dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
977 symname);
978
979 if (!mistat)
980 return -ENOMEM;
981
982 if (!v9ses->extended) {
983 dprintk(DEBUG_ERROR, "not extended\n");
984 goto FreeFcall;
985 }
986
987 /* issue a create */
988 retval = v9fs_create(dir, dentry, S_IFLNK, 0);
989 if (retval != 0)
990 goto FreeFcall;
991
992 newfid = v9fs_fid_lookup(dentry);
993
994 /* issue a twstat */
995 v9fs_blank_mistat(v9ses, mistat);
996 strcpy(mistat->data + 1, symname);
997 mistat->extension = mistat->data + 1;
998 retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
999 if (retval < 0) {
1000 dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
1001 FCALL_ERROR(fcall));
1002 goto FreeFcall;
1003 }
1004
1005 kfree(fcall);
1006
1007 if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
1008 dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
1009 FCALL_ERROR(fcall));
1010 goto FreeFcall;
1011 }
1012
1013 d_drop(dentry); /* FID - will this also clunk? */
1014
1015 FreeFcall:
1016 kfree(mistat);
1017 kfree(fcall);
1018
1019 return retval;
1020}
1021
1022/**
1023 * v9fs_readlink - read a symlink's location (internal version) 844 * v9fs_readlink - read a symlink's location (internal version)
1024 * @dentry: dentry for symlink 845 * @dentry: dentry for symlink
1025 * @buffer: buffer to load symlink location into 846 * @buffer: buffer to load symlink location into
@@ -1058,16 +879,17 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
1058 if (!fcall) 879 if (!fcall)
1059 return -EIO; 880 return -EIO;
1060 881
1061 if (!(fcall->params.rstat.stat->mode & V9FS_DMSYMLINK)) { 882 if (!(fcall->params.rstat.stat.mode & V9FS_DMSYMLINK)) {
1062 retval = -EINVAL; 883 retval = -EINVAL;
1063 goto FreeFcall; 884 goto FreeFcall;
1064 } 885 }
1065 886
1066 /* copy extension buffer into buffer */ 887 /* copy extension buffer into buffer */
1067 if (strlen(fcall->params.rstat.stat->extension) < buflen) 888 if (fcall->params.rstat.stat.extension.len < buflen)
1068 buflen = strlen(fcall->params.rstat.stat->extension); 889 buflen = fcall->params.rstat.stat.extension.len;
1069 890
1070 memcpy(buffer, fcall->params.rstat.stat->extension, buflen + 1); 891 memcpy(buffer, fcall->params.rstat.stat.extension.str, buflen - 1);
892 buffer[buflen-1] = 0;
1071 893
1072 retval = buflen; 894 retval = buflen;
1073 895
@@ -1157,6 +979,77 @@ static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void
1157 __putname(s); 979 __putname(s);
1158} 980}
1159 981
982static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
983 int mode, const char *extension)
984{
985 int err, retval;
986 struct v9fs_session_info *v9ses;
987 struct v9fs_fcall *fcall;
988 struct v9fs_fid *fid;
989 struct v9fs_wstat wstat;
990
991 v9ses = v9fs_inode2v9ses(dir);
992 retval = -EPERM;
993 fcall = NULL;
994
995 if (!v9ses->extended) {
996 dprintk(DEBUG_ERROR, "not extended\n");
997 goto free_mem;
998 }
999
1000 /* issue a create */
1001 retval = v9fs_create(dir, dentry, mode, 0);
1002 if (retval != 0)
1003 goto free_mem;
1004
1005 fid = v9fs_fid_get_created(dentry);
1006 if (!fid) {
1007 dprintk(DEBUG_ERROR, "couldn't resolve fid from dentry\n");
1008 goto free_mem;
1009 }
1010
1011 /* issue a Twstat */
1012 v9fs_blank_wstat(&wstat);
1013 wstat.muid = v9ses->name;
1014 wstat.extension = (char *) extension;
1015 retval = v9fs_t_wstat(v9ses, fid->fid, &wstat, &fcall);
1016 if (retval < 0) {
1017 PRINT_FCALL_ERROR("wstat error", fcall);
1018 goto free_mem;
1019 }
1020
1021 err = v9fs_t_clunk(v9ses, fid->fid);
1022 if (err < 0) {
1023 dprintk(DEBUG_ERROR, "clunk failed: %d\n", err);
1024 goto free_mem;
1025 }
1026
1027 d_drop(dentry); /* FID - will this also clunk? */
1028
1029free_mem:
1030 kfree(fcall);
1031 return retval;
1032}
1033
1034/**
1035 * v9fs_vfs_symlink - helper function to create symlinks
1036 * @dir: directory inode containing symlink
1037 * @dentry: dentry for symlink
1038 * @symname: symlink data
1039 *
1040 * See 9P2000.u RFC for more information
1041 *
1042 */
1043
1044static int
1045v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1046{
1047 dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
1048 symname);
1049
1050 return v9fs_vfs_mkspecial(dir, dentry, S_IFLNK, symname);
1051}
1052
1160/** 1053/**
1161 * v9fs_vfs_link - create a hardlink 1054 * v9fs_vfs_link - create a hardlink
1162 * @old_dentry: dentry for file to link to 1055 * @old_dentry: dentry for file to link to
@@ -1173,64 +1066,24 @@ static int
1173v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, 1066v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
1174 struct dentry *dentry) 1067 struct dentry *dentry)
1175{ 1068{
1176 int retval = -EPERM; 1069 int retval;
1177 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); 1070 struct v9fs_fid *oldfid;
1178 struct v9fs_fcall *fcall = NULL; 1071 char *name;
1179 struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
1180 struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry);
1181 struct v9fs_fid *newfid = NULL;
1182 char *symname = __getname();
1183 1072
1184 dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, 1073 dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
1185 old_dentry->d_name.name); 1074 old_dentry->d_name.name);
1186 1075
1187 if (!v9ses->extended) { 1076 oldfid = v9fs_fid_lookup(old_dentry);
1188 dprintk(DEBUG_ERROR, "not extended\n"); 1077 if (!oldfid) {
1189 goto FreeMem; 1078 dprintk(DEBUG_ERROR, "can't find oldfid\n");
1190 } 1079 return -EPERM;
1191
1192 /* get fid of old_dentry */
1193 sprintf(symname, "hardlink(%d)\n", oldfid->fid);
1194
1195 /* issue a create */
1196 retval = v9fs_create(dir, dentry, V9FS_DMLINK, 0);
1197 if (retval != 0)
1198 goto FreeMem;
1199
1200 newfid = v9fs_fid_lookup(dentry);
1201 if (!newfid) {
1202 dprintk(DEBUG_ERROR, "couldn't resolve fid from dentry\n");
1203 goto FreeMem;
1204 }
1205
1206 /* issue a twstat */
1207 v9fs_blank_mistat(v9ses, mistat);
1208 strcpy(mistat->data + 1, symname);
1209 mistat->extension = mistat->data + 1;
1210 retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
1211 if (retval < 0) {
1212 dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
1213 FCALL_ERROR(fcall));
1214 goto FreeMem;
1215 }
1216
1217 kfree(fcall);
1218
1219 if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
1220 dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
1221 FCALL_ERROR(fcall));
1222 goto FreeMem;
1223 } 1080 }
1224 1081
1225 d_drop(dentry); /* FID - will this also clunk? */ 1082 name = __getname();
1226 1083 sprintf(name, "hardlink(%d)\n", oldfid->fid);
1227 kfree(fcall); 1084 retval = v9fs_vfs_mkspecial(dir, dentry, V9FS_DMLINK, name);
1228 fcall = NULL; 1085 __putname(name);
1229 1086
1230 FreeMem:
1231 kfree(mistat);
1232 kfree(fcall);
1233 __putname(symname);
1234 return retval; 1087 return retval;
1235} 1088}
1236 1089
@@ -1246,82 +1099,30 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
1246static int 1099static int
1247v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) 1100v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
1248{ 1101{
1249 int retval = -EPERM; 1102 int retval;
1250 struct v9fs_fid *newfid; 1103 char *name;
1251 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
1252 struct v9fs_fcall *fcall = NULL;
1253 struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
1254 char *symname = __getname();
1255 1104
1256 dprintk(DEBUG_VFS, " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, 1105 dprintk(DEBUG_VFS, " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
1257 dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev)); 1106 dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
1258 1107
1259 if (!mistat) 1108 if (!new_valid_dev(rdev))
1260 return -ENOMEM; 1109 return -EINVAL;
1261
1262 if (!new_valid_dev(rdev)) {
1263 retval = -EINVAL;
1264 goto FreeMem;
1265 }
1266
1267 if (!v9ses->extended) {
1268 dprintk(DEBUG_ERROR, "not extended\n");
1269 goto FreeMem;
1270 }
1271
1272 /* issue a create */
1273 retval = v9fs_create(dir, dentry, mode, 0);
1274
1275 if (retval != 0)
1276 goto FreeMem;
1277
1278 newfid = v9fs_fid_lookup(dentry);
1279 if (!newfid) {
1280 dprintk(DEBUG_ERROR, "coudn't resove fid from dentry\n");
1281 retval = -EINVAL;
1282 goto FreeMem;
1283 }
1284 1110
1111 name = __getname();
1285 /* build extension */ 1112 /* build extension */
1286 if (S_ISBLK(mode)) 1113 if (S_ISBLK(mode))
1287 sprintf(symname, "b %u %u", MAJOR(rdev), MINOR(rdev)); 1114 sprintf(name, "b %u %u", MAJOR(rdev), MINOR(rdev));
1288 else if (S_ISCHR(mode)) 1115 else if (S_ISCHR(mode))
1289 sprintf(symname, "c %u %u", MAJOR(rdev), MINOR(rdev)); 1116 sprintf(name, "c %u %u", MAJOR(rdev), MINOR(rdev));
1290 else if (S_ISFIFO(mode)) 1117 else if (S_ISFIFO(mode))
1291 ; /* DO NOTHING */ 1118 *name = 0;
1292 else { 1119 else {
1293 retval = -EINVAL; 1120 __putname(name);
1294 goto FreeMem; 1121 return -EINVAL;
1295 }
1296
1297 if (!S_ISFIFO(mode)) {
1298 /* issue a twstat */
1299 v9fs_blank_mistat(v9ses, mistat);
1300 strcpy(mistat->data + 1, symname);
1301 mistat->extension = mistat->data + 1;
1302 retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
1303 if (retval < 0) {
1304 dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
1305 FCALL_ERROR(fcall));
1306 goto FreeMem;
1307 }
1308 } 1122 }
1309 1123
1310 /* need to update dcache so we show up */ 1124 retval = v9fs_vfs_mkspecial(dir, dentry, mode, name);
1311 kfree(fcall); 1125 __putname(name);
1312
1313 if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
1314 dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
1315 FCALL_ERROR(fcall));
1316 goto FreeMem;
1317 }
1318
1319 d_drop(dentry); /* FID - will this also clunk? */
1320
1321 FreeMem:
1322 kfree(mistat);
1323 kfree(fcall);
1324 __putname(symname);
1325 1126
1326 return retval; 1127 return retval;
1327} 1128}
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 82c5b0084079..ae0f06b3c11a 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -44,7 +44,6 @@
44#include "v9fs.h" 44#include "v9fs.h"
45#include "9p.h" 45#include "9p.h"
46#include "v9fs_vfs.h" 46#include "v9fs_vfs.h"
47#include "conv.h"
48#include "fid.h" 47#include "fid.h"
49 48
50static void v9fs_clear_inode(struct inode *); 49static void v9fs_clear_inode(struct inode *);
@@ -123,12 +122,13 @@ static struct super_block *v9fs_get_sb(struct file_system_type
123 122
124 dprintk(DEBUG_VFS, " \n"); 123 dprintk(DEBUG_VFS, " \n");
125 124
126 v9ses = kcalloc(1, sizeof(struct v9fs_session_info), GFP_KERNEL); 125 v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL);
127 if (!v9ses) 126 if (!v9ses)
128 return ERR_PTR(-ENOMEM); 127 return ERR_PTR(-ENOMEM);
129 128
130 if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) { 129 if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) {
131 dprintk(DEBUG_ERROR, "problem initiating session\n"); 130 dprintk(DEBUG_ERROR, "problem initiating session\n");
131 kfree(v9ses);
132 return ERR_PTR(newfid); 132 return ERR_PTR(newfid);
133 } 133 }
134 134
@@ -157,7 +157,7 @@ static struct super_block *v9fs_get_sb(struct file_system_type
157 stat_result = v9fs_t_stat(v9ses, newfid, &fcall); 157 stat_result = v9fs_t_stat(v9ses, newfid, &fcall);
158 if (stat_result < 0) { 158 if (stat_result < 0) {
159 dprintk(DEBUG_ERROR, "stat error\n"); 159 dprintk(DEBUG_ERROR, "stat error\n");
160 v9fs_t_clunk(v9ses, newfid, NULL); 160 v9fs_t_clunk(v9ses, newfid);
161 v9fs_put_idpool(newfid, &v9ses->fidpool); 161 v9fs_put_idpool(newfid, &v9ses->fidpool);
162 } else { 162 } else {
163 /* Setup the Root Inode */ 163 /* Setup the Root Inode */
@@ -167,10 +167,10 @@ static struct super_block *v9fs_get_sb(struct file_system_type
167 goto put_back_sb; 167 goto put_back_sb;
168 } 168 }
169 169
170 root_fid->qid = fcall->params.rstat.stat->qid; 170 root_fid->qid = fcall->params.rstat.stat.qid;
171 root->d_inode->i_ino = 171 root->d_inode->i_ino =
172 v9fs_qid2ino(&fcall->params.rstat.stat->qid); 172 v9fs_qid2ino(&fcall->params.rstat.stat.qid);
173 v9fs_mistat2inode(fcall->params.rstat.stat, root->d_inode, sb); 173 v9fs_stat2inode(&fcall->params.rstat.stat, root->d_inode, sb);
174 } 174 }
175 175
176 kfree(fcall); 176 kfree(fcall);
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 175b2e8177c1..f3d3d81eb7e9 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -1,6 +1,6 @@
1config BINFMT_ELF 1config BINFMT_ELF
2 bool "Kernel support for ELF binaries" 2 bool "Kernel support for ELF binaries"
3 depends on MMU 3 depends on MMU && (BROKEN || !FRV)
4 default y 4 default y
5 ---help--- 5 ---help---
6 ELF (Executable and Linkable Format) is a format for libraries and 6 ELF (Executable and Linkable Format) is a format for libraries and
diff --git a/fs/Makefile b/fs/Makefile
index 73676111ebbe..35e9aec608e4 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -10,7 +10,7 @@ obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \
10 ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ 10 ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
11 attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ 11 attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
12 seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \ 12 seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
13 ioprio.o pnode.o 13 ioprio.o pnode.o drop_caches.o
14 14
15obj-$(CONFIG_INOTIFY) += inotify.o 15obj-$(CONFIG_INOTIFY) += inotify.o
16obj-$(CONFIG_EPOLL) += eventpoll.o 16obj-$(CONFIG_EPOLL) += eventpoll.o
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 6682d6d7f294..5c61c24dab2a 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -137,7 +137,7 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page)
137#endif 137#endif
138 138
139 /* determine how many magic numbers there should be in this page */ 139 /* determine how many magic numbers there should be in this page */
140 latter = dir->i_size - (page->index << PAGE_CACHE_SHIFT); 140 latter = dir->i_size - page_offset(page);
141 if (latter >= PAGE_SIZE) 141 if (latter >= PAGE_SIZE)
142 qty = PAGE_SIZE; 142 qty = PAGE_SIZE;
143 else 143 else
diff --git a/fs/afs/volume.h b/fs/afs/volume.h
index 1e691889c4c9..bfdcf19ba3f3 100644
--- a/fs/afs/volume.h
+++ b/fs/afs/volume.h
@@ -18,8 +18,6 @@
18#include "kafsasyncd.h" 18#include "kafsasyncd.h"
19#include "cache.h" 19#include "cache.h"
20 20
21#define __packed __attribute__((packed))
22
23typedef enum { 21typedef enum {
24 AFS_VLUPD_SLEEP, /* sleeping waiting for update timer to fire */ 22 AFS_VLUPD_SLEEP, /* sleeping waiting for update timer to fire */
25 AFS_VLUPD_PENDING, /* on pending queue */ 23 AFS_VLUPD_PENDING, /* on pending queue */
@@ -115,7 +113,7 @@ struct afs_volume
115 struct cachefs_cookie *cache; /* caching cookie */ 113 struct cachefs_cookie *cache; /* caching cookie */
116#endif 114#endif
117 afs_volid_t vid; /* volume ID */ 115 afs_volid_t vid; /* volume ID */
118 afs_voltype_t __packed type; /* type of volume */ 116 afs_voltype_t type; /* type of volume */
119 char type_force; /* force volume type (suppress R/O -> R/W) */ 117 char type_force; /* force volume type (suppress R/O -> R/W) */
120 unsigned short nservers; /* number of server slots filled */ 118 unsigned short nservers; /* number of server slots filled */
121 unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */ 119 unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */
diff --git a/fs/aio.c b/fs/aio.c
index 5a28b69ad223..aec2b1916d1b 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -29,7 +29,6 @@
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30#include <linux/workqueue.h> 30#include <linux/workqueue.h>
31#include <linux/security.h> 31#include <linux/security.h>
32#include <linux/rcuref.h>
33 32
34#include <asm/kmap_types.h> 33#include <asm/kmap_types.h>
35#include <asm/uaccess.h> 34#include <asm/uaccess.h>
@@ -514,7 +513,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
514 /* Must be done under the lock to serialise against cancellation. 513 /* Must be done under the lock to serialise against cancellation.
515 * Call this aio_fput as it duplicates fput via the fput_work. 514 * Call this aio_fput as it duplicates fput via the fput_work.
516 */ 515 */
517 if (unlikely(rcuref_dec_and_test(&req->ki_filp->f_count))) { 516 if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) {
518 get_ioctx(ctx); 517 get_ioctx(ctx);
519 spin_lock(&fput_lock); 518 spin_lock(&fput_lock);
520 list_add(&req->ki_list, &fput_head); 519 list_add(&req->ki_list, &fput_head);
diff --git a/fs/attr.c b/fs/attr.c
index 67bcd9b14ea5..b34732506f1d 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -67,20 +67,12 @@ EXPORT_SYMBOL(inode_change_ok);
67int inode_setattr(struct inode * inode, struct iattr * attr) 67int inode_setattr(struct inode * inode, struct iattr * attr)
68{ 68{
69 unsigned int ia_valid = attr->ia_valid; 69 unsigned int ia_valid = attr->ia_valid;
70 int error = 0; 70
71 71 if (ia_valid & ATTR_SIZE &&
72 if (ia_valid & ATTR_SIZE) { 72 attr->ia_size != i_size_read(inode)) {
73 if (attr->ia_size != i_size_read(inode)) { 73 int error = vmtruncate(inode, attr->ia_size);
74 error = vmtruncate(inode, attr->ia_size); 74 if (error)
75 if (error || (ia_valid == ATTR_SIZE)) 75 return error;
76 goto out;
77 } else {
78 /*
79 * We skipped the truncate but must still update
80 * timestamps
81 */
82 ia_valid |= ATTR_MTIME|ATTR_CTIME;
83 }
84 } 76 }
85 77
86 if (ia_valid & ATTR_UID) 78 if (ia_valid & ATTR_UID)
@@ -104,8 +96,8 @@ int inode_setattr(struct inode * inode, struct iattr * attr)
104 inode->i_mode = mode; 96 inode->i_mode = mode;
105 } 97 }
106 mark_inode_dirty(inode); 98 mark_inode_dirty(inode);
107out: 99
108 return error; 100 return 0;
109} 101}
110EXPORT_SYMBOL(inode_setattr); 102EXPORT_SYMBOL(inode_setattr);
111 103
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index fca83e28edcf..385bed09b0d8 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -209,7 +209,7 @@ static inline int simple_empty_nolock(struct dentry *dentry)
209 struct dentry *child; 209 struct dentry *child;
210 int ret = 0; 210 int ret = 0;
211 211
212 list_for_each_entry(child, &dentry->d_subdirs, d_child) 212 list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child)
213 if (simple_positive(child)) 213 if (simple_positive(child))
214 goto out; 214 goto out;
215 ret = 1; 215 ret = 1;
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index feb6ac427d05..dc39589df165 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -105,7 +105,7 @@ repeat:
105 next = this_parent->d_subdirs.next; 105 next = this_parent->d_subdirs.next;
106resume: 106resume:
107 while (next != &this_parent->d_subdirs) { 107 while (next != &this_parent->d_subdirs) {
108 struct dentry *dentry = list_entry(next, struct dentry, d_child); 108 struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
109 109
110 /* Negative dentry - give up */ 110 /* Negative dentry - give up */
111 if (!simple_positive(dentry)) { 111 if (!simple_positive(dentry)) {
@@ -138,7 +138,7 @@ resume:
138 } 138 }
139 139
140 if (this_parent != top) { 140 if (this_parent != top) {
141 next = this_parent->d_child.next; 141 next = this_parent->d_u.d_child.next;
142 this_parent = this_parent->d_parent; 142 this_parent = this_parent->d_parent;
143 goto resume; 143 goto resume;
144 } 144 }
@@ -163,7 +163,7 @@ repeat:
163 next = this_parent->d_subdirs.next; 163 next = this_parent->d_subdirs.next;
164resume: 164resume:
165 while (next != &this_parent->d_subdirs) { 165 while (next != &this_parent->d_subdirs) {
166 struct dentry *dentry = list_entry(next, struct dentry, d_child); 166 struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
167 167
168 /* Negative dentry - give up */ 168 /* Negative dentry - give up */
169 if (!simple_positive(dentry)) { 169 if (!simple_positive(dentry)) {
@@ -199,7 +199,7 @@ cont:
199 } 199 }
200 200
201 if (this_parent != parent) { 201 if (this_parent != parent) {
202 next = this_parent->d_child.next; 202 next = this_parent->d_u.d_child.next;
203 this_parent = this_parent->d_parent; 203 this_parent = this_parent->d_parent;
204 goto resume; 204 goto resume;
205 } 205 }
@@ -238,7 +238,7 @@ static struct dentry *autofs4_expire(struct super_block *sb,
238 /* On exit from the loop expire is set to a dgot dentry 238 /* On exit from the loop expire is set to a dgot dentry
239 * to expire or it's NULL */ 239 * to expire or it's NULL */
240 while ( next != &root->d_subdirs ) { 240 while ( next != &root->d_subdirs ) {
241 struct dentry *dentry = list_entry(next, struct dentry, d_child); 241 struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
242 242
243 /* Negative dentry - give up */ 243 /* Negative dentry - give up */
244 if ( !simple_positive(dentry) ) { 244 if ( !simple_positive(dentry) ) {
@@ -302,7 +302,7 @@ next:
302 expired, (int)expired->d_name.len, expired->d_name.name); 302 expired, (int)expired->d_name.len, expired->d_name.name);
303 spin_lock(&dcache_lock); 303 spin_lock(&dcache_lock);
304 list_del(&expired->d_parent->d_subdirs); 304 list_del(&expired->d_parent->d_subdirs);
305 list_add(&expired->d_parent->d_subdirs, &expired->d_child); 305 list_add(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
306 spin_unlock(&dcache_lock); 306 spin_unlock(&dcache_lock);
307 return expired; 307 return expired;
308 } 308 }
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 818b37be5153..2d3082854a29 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -91,7 +91,7 @@ repeat:
91 next = this_parent->d_subdirs.next; 91 next = this_parent->d_subdirs.next;
92resume: 92resume:
93 while (next != &this_parent->d_subdirs) { 93 while (next != &this_parent->d_subdirs) {
94 struct dentry *dentry = list_entry(next, struct dentry, d_child); 94 struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
95 95
96 /* Negative dentry - don`t care */ 96 /* Negative dentry - don`t care */
97 if (!simple_positive(dentry)) { 97 if (!simple_positive(dentry)) {
@@ -117,7 +117,7 @@ resume:
117 if (this_parent != sbi->root) { 117 if (this_parent != sbi->root) {
118 struct dentry *dentry = this_parent; 118 struct dentry *dentry = this_parent;
119 119
120 next = this_parent->d_child.next; 120 next = this_parent->d_u.d_child.next;
121 this_parent = this_parent->d_parent; 121 this_parent = this_parent->d_parent;
122 spin_unlock(&dcache_lock); 122 spin_unlock(&dcache_lock);
123 DPRINTK("parent dentry %p %.*s", 123 DPRINTK("parent dentry %p %.*s",
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 2a771ec66956..2241405ffc41 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -143,7 +143,8 @@ static int autofs4_dcache_readdir(struct file * filp, void * dirent, filldir_t f
143 } 143 }
144 144
145 while(1) { 145 while(1) {
146 struct dentry *de = list_entry(list, struct dentry, d_child); 146 struct dentry *de = list_entry(list,
147 struct dentry, d_u.d_child);
147 148
148 if (!d_unhashed(de) && de->d_inode) { 149 if (!d_unhashed(de) && de->d_inode) {
149 spin_unlock(&dcache_lock); 150 spin_unlock(&dcache_lock);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index f36f2210204f..80ca932ba0bd 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -58,7 +58,7 @@ extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
58 * If we don't support core dumping, then supply a NULL so we 58 * If we don't support core dumping, then supply a NULL so we
59 * don't even try. 59 * don't even try.
60 */ 60 */
61#ifdef USE_ELF_CORE_DUMP 61#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
62static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file); 62static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file);
63#else 63#else
64#define elf_core_dump NULL 64#define elf_core_dump NULL
@@ -288,11 +288,17 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
288 struct elf_phdr *eppnt, int prot, int type) 288 struct elf_phdr *eppnt, int prot, int type)
289{ 289{
290 unsigned long map_addr; 290 unsigned long map_addr;
291 unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
291 292
292 down_write(&current->mm->mmap_sem); 293 down_write(&current->mm->mmap_sem);
293 map_addr = do_mmap(filep, ELF_PAGESTART(addr), 294 /* mmap() will return -EINVAL if given a zero size, but a
294 eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr), prot, type, 295 * segment with zero filesize is perfectly valid */
295 eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr)); 296 if (eppnt->p_filesz + pageoffset)
297 map_addr = do_mmap(filep, ELF_PAGESTART(addr),
298 eppnt->p_filesz + pageoffset, prot, type,
299 eppnt->p_offset - pageoffset);
300 else
301 map_addr = ELF_PAGESTART(addr);
296 up_write(&current->mm->mmap_sem); 302 up_write(&current->mm->mmap_sem);
297 return(map_addr); 303 return(map_addr);
298} 304}
@@ -1107,7 +1113,7 @@ out:
1107 * Note that some platforms still use traditional core dumps and not 1113 * Note that some platforms still use traditional core dumps and not
1108 * the ELF core dump. Each platform can select it as appropriate. 1114 * the ELF core dump. Each platform can select it as appropriate.
1109 */ 1115 */
1110#ifdef USE_ELF_CORE_DUMP 1116#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1111 1117
1112/* 1118/*
1113 * ELF core dumper 1119 * ELF core dumper
diff --git a/fs/buffer.c b/fs/buffer.c
index 5287be18633b..55f0975a9b15 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -153,14 +153,8 @@ int sync_blockdev(struct block_device *bdev)
153{ 153{
154 int ret = 0; 154 int ret = 0;
155 155
156 if (bdev) { 156 if (bdev)
157 int err; 157 ret = filemap_write_and_wait(bdev->bd_inode->i_mapping);
158
159 ret = filemap_fdatawrite(bdev->bd_inode->i_mapping);
160 err = filemap_fdatawait(bdev->bd_inode->i_mapping);
161 if (!ret)
162 ret = err;
163 }
164 return ret; 158 return ret;
165} 159}
166EXPORT_SYMBOL(sync_blockdev); 160EXPORT_SYMBOL(sync_blockdev);
@@ -1768,7 +1762,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1768 * handle that here by just cleaning them. 1762 * handle that here by just cleaning them.
1769 */ 1763 */
1770 1764
1771 block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); 1765 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1772 head = page_buffers(page); 1766 head = page_buffers(page);
1773 bh = head; 1767 bh = head;
1774 1768
@@ -2160,11 +2154,12 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
2160 * truncates. Uses prepare/commit_write to allow the filesystem to 2154 * truncates. Uses prepare/commit_write to allow the filesystem to
2161 * deal with the hole. 2155 * deal with the hole.
2162 */ 2156 */
2163int generic_cont_expand(struct inode *inode, loff_t size) 2157static int __generic_cont_expand(struct inode *inode, loff_t size,
2158 pgoff_t index, unsigned int offset)
2164{ 2159{
2165 struct address_space *mapping = inode->i_mapping; 2160 struct address_space *mapping = inode->i_mapping;
2166 struct page *page; 2161 struct page *page;
2167 unsigned long index, offset, limit; 2162 unsigned long limit;
2168 int err; 2163 int err;
2169 2164
2170 err = -EFBIG; 2165 err = -EFBIG;
@@ -2176,24 +2171,24 @@ int generic_cont_expand(struct inode *inode, loff_t size)
2176 if (size > inode->i_sb->s_maxbytes) 2171 if (size > inode->i_sb->s_maxbytes)
2177 goto out; 2172 goto out;
2178 2173
2179 offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */
2180
2181 /* ugh. in prepare/commit_write, if from==to==start of block, we
2182 ** skip the prepare. make sure we never send an offset for the start
2183 ** of a block
2184 */
2185 if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
2186 offset++;
2187 }
2188 index = size >> PAGE_CACHE_SHIFT;
2189 err = -ENOMEM; 2174 err = -ENOMEM;
2190 page = grab_cache_page(mapping, index); 2175 page = grab_cache_page(mapping, index);
2191 if (!page) 2176 if (!page)
2192 goto out; 2177 goto out;
2193 err = mapping->a_ops->prepare_write(NULL, page, offset, offset); 2178 err = mapping->a_ops->prepare_write(NULL, page, offset, offset);
2194 if (!err) { 2179 if (err) {
2195 err = mapping->a_ops->commit_write(NULL, page, offset, offset); 2180 /*
2181 * ->prepare_write() may have instantiated a few blocks
2182 * outside i_size. Trim these off again.
2183 */
2184 unlock_page(page);
2185 page_cache_release(page);
2186 vmtruncate(inode, inode->i_size);
2187 goto out;
2196 } 2188 }
2189
2190 err = mapping->a_ops->commit_write(NULL, page, offset, offset);
2191
2197 unlock_page(page); 2192 unlock_page(page);
2198 page_cache_release(page); 2193 page_cache_release(page);
2199 if (err > 0) 2194 if (err > 0)
@@ -2202,6 +2197,36 @@ out:
2202 return err; 2197 return err;
2203} 2198}
2204 2199
2200int generic_cont_expand(struct inode *inode, loff_t size)
2201{
2202 pgoff_t index;
2203 unsigned int offset;
2204
2205 offset = (size & (PAGE_CACHE_SIZE - 1)); /* Within page */
2206
2207 /* ugh. in prepare/commit_write, if from==to==start of block, we
2208 ** skip the prepare. make sure we never send an offset for the start
2209 ** of a block
2210 */
2211 if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
2212 /* caller must handle this extra byte. */
2213 offset++;
2214 }
2215 index = size >> PAGE_CACHE_SHIFT;
2216
2217 return __generic_cont_expand(inode, size, index, offset);
2218}
2219
2220int generic_cont_expand_simple(struct inode *inode, loff_t size)
2221{
2222 loff_t pos = size - 1;
2223 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2224 unsigned int offset = (pos & (PAGE_CACHE_SIZE - 1)) + 1;
2225
2226 /* prepare/commit_write can handle even if from==to==start of block. */
2227 return __generic_cont_expand(inode, size, index, offset);
2228}
2229
2205/* 2230/*
2206 * For moronic filesystems that do not allow holes in file. 2231 * For moronic filesystems that do not allow holes in file.
2207 * We may have to extend the file. 2232 * We may have to extend the file.
@@ -2610,7 +2635,7 @@ int block_truncate_page(struct address_space *mapping,
2610 pgoff_t index = from >> PAGE_CACHE_SHIFT; 2635 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2611 unsigned offset = from & (PAGE_CACHE_SIZE-1); 2636 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2612 unsigned blocksize; 2637 unsigned blocksize;
2613 pgoff_t iblock; 2638 sector_t iblock;
2614 unsigned length, pos; 2639 unsigned length, pos;
2615 struct inode *inode = mapping->host; 2640 struct inode *inode = mapping->host;
2616 struct page *page; 2641 struct page *page;
@@ -2626,7 +2651,7 @@ int block_truncate_page(struct address_space *mapping,
2626 return 0; 2651 return 0;
2627 2652
2628 length = blocksize - length; 2653 length = blocksize - length;
2629 iblock = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); 2654 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2630 2655
2631 page = grab_cache_page(mapping, index); 2656 page = grab_cache_page(mapping, index);
2632 err = -ENOMEM; 2657 err = -ENOMEM;
@@ -3145,6 +3170,7 @@ EXPORT_SYMBOL(fsync_bdev);
3145EXPORT_SYMBOL(generic_block_bmap); 3170EXPORT_SYMBOL(generic_block_bmap);
3146EXPORT_SYMBOL(generic_commit_write); 3171EXPORT_SYMBOL(generic_commit_write);
3147EXPORT_SYMBOL(generic_cont_expand); 3172EXPORT_SYMBOL(generic_cont_expand);
3173EXPORT_SYMBOL(generic_cont_expand_simple);
3148EXPORT_SYMBOL(init_buffer); 3174EXPORT_SYMBOL(init_buffer);
3149EXPORT_SYMBOL(invalidate_bdev); 3175EXPORT_SYMBOL(invalidate_bdev);
3150EXPORT_SYMBOL(ll_rw_block); 3176EXPORT_SYMBOL(ll_rw_block);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 14a1c72ced92..5ade53d7bca8 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -127,8 +127,7 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
127 if (file->f_dentry->d_inode->i_mapping) { 127 if (file->f_dentry->d_inode->i_mapping) {
128 /* BB no need to lock inode until after invalidate 128 /* BB no need to lock inode until after invalidate
129 since namei code should already have it locked? */ 129 since namei code should already have it locked? */
130 filemap_fdatawrite(file->f_dentry->d_inode->i_mapping); 130 filemap_write_and_wait(file->f_dentry->d_inode->i_mapping);
131 filemap_fdatawait(file->f_dentry->d_inode->i_mapping);
132 } 131 }
133 cFYI(1, ("invalidating remote inode since open detected it " 132 cFYI(1, ("invalidating remote inode since open detected it "
134 "changed")); 133 "changed"));
@@ -419,8 +418,7 @@ static int cifs_reopen_file(struct inode *inode, struct file *file,
419 pCifsInode = CIFS_I(inode); 418 pCifsInode = CIFS_I(inode);
420 if (pCifsInode) { 419 if (pCifsInode) {
421 if (can_flush) { 420 if (can_flush) {
422 filemap_fdatawrite(inode->i_mapping); 421 filemap_write_and_wait(inode->i_mapping);
423 filemap_fdatawait(inode->i_mapping);
424 /* temporarily disable caching while we 422 /* temporarily disable caching while we
425 go to server to get inode info */ 423 go to server to get inode info */
426 pCifsInode->clientCanCacheAll = FALSE; 424 pCifsInode->clientCanCacheAll = FALSE;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 411c1f7f84da..9558f51bca55 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1148,8 +1148,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1148 /* BB check if we need to refresh inode from server now ? BB */ 1148 /* BB check if we need to refresh inode from server now ? BB */
1149 1149
1150 /* need to flush data before changing file size on server */ 1150 /* need to flush data before changing file size on server */
1151 filemap_fdatawrite(direntry->d_inode->i_mapping); 1151 filemap_write_and_wait(direntry->d_inode->i_mapping);
1152 filemap_fdatawait(direntry->d_inode->i_mapping);
1153 1152
1154 if (attrs->ia_valid & ATTR_SIZE) { 1153 if (attrs->ia_valid & ATTR_SIZE) {
1155 /* To avoid spurious oplock breaks from server, in the case of 1154 /* To avoid spurious oplock breaks from server, in the case of
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 80072fd9b7fa..c607d923350a 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -93,7 +93,7 @@ static void coda_flag_children(struct dentry *parent, int flag)
93 spin_lock(&dcache_lock); 93 spin_lock(&dcache_lock);
94 list_for_each(child, &parent->d_subdirs) 94 list_for_each(child, &parent->d_subdirs)
95 { 95 {
96 de = list_entry(child, struct dentry, d_child); 96 de = list_entry(child, struct dentry, d_u.d_child);
97 /* don't know what to do with negative dentries */ 97 /* don't know what to do with negative dentries */
98 if ( ! de->d_inode ) 98 if ( ! de->d_inode )
99 continue; 99 continue;
diff --git a/fs/compat.c b/fs/compat.c
index 55ac0324aaf1..271b75d1597f 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -494,9 +494,21 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
494 ret = sys_fcntl(fd, cmd, (unsigned long)&f); 494 ret = sys_fcntl(fd, cmd, (unsigned long)&f);
495 set_fs(old_fs); 495 set_fs(old_fs);
496 if (cmd == F_GETLK && ret == 0) { 496 if (cmd == F_GETLK && ret == 0) {
497 if ((f.l_start >= COMPAT_OFF_T_MAX) || 497 /* GETLK was successfule and we need to return the data...
498 ((f.l_start + f.l_len) > COMPAT_OFF_T_MAX)) 498 * but it needs to fit in the compat structure.
499 * l_start shouldn't be too big, unless the original
500 * start + end is greater than COMPAT_OFF_T_MAX, in which
501 * case the app was asking for trouble, so we return
502 * -EOVERFLOW in that case.
503 * l_len could be too big, in which case we just truncate it,
504 * and only allow the app to see that part of the conflicting
505 * lock that might make sense to it anyway
506 */
507
508 if (f.l_start > COMPAT_OFF_T_MAX)
499 ret = -EOVERFLOW; 509 ret = -EOVERFLOW;
510 if (f.l_len > COMPAT_OFF_T_MAX)
511 f.l_len = COMPAT_OFF_T_MAX;
500 if (ret == 0) 512 if (ret == 0)
501 ret = put_compat_flock(&f, compat_ptr(arg)); 513 ret = put_compat_flock(&f, compat_ptr(arg));
502 } 514 }
@@ -515,9 +527,11 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
515 (unsigned long)&f); 527 (unsigned long)&f);
516 set_fs(old_fs); 528 set_fs(old_fs);
517 if (cmd == F_GETLK64 && ret == 0) { 529 if (cmd == F_GETLK64 && ret == 0) {
518 if ((f.l_start >= COMPAT_LOFF_T_MAX) || 530 /* need to return lock information - see above for commentary */
519 ((f.l_start + f.l_len) > COMPAT_LOFF_T_MAX)) 531 if (f.l_start > COMPAT_LOFF_T_MAX)
520 ret = -EOVERFLOW; 532 ret = -EOVERFLOW;
533 if (f.l_len > COMPAT_LOFF_T_MAX)
534 f.l_len = COMPAT_LOFF_T_MAX;
521 if (ret == 0) 535 if (ret == 0)
522 ret = put_compat_flock64(&f, compat_ptr(arg)); 536 ret = put_compat_flock64(&f, compat_ptr(arg));
523 } 537 }
diff --git a/fs/dcache.c b/fs/dcache.c
index 17e439138681..1536f15c4d4c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -71,7 +71,7 @@ struct dentry_stat_t dentry_stat = {
71 71
72static void d_callback(struct rcu_head *head) 72static void d_callback(struct rcu_head *head)
73{ 73{
74 struct dentry * dentry = container_of(head, struct dentry, d_rcu); 74 struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu);
75 75
76 if (dname_external(dentry)) 76 if (dname_external(dentry))
77 kfree(dentry->d_name.name); 77 kfree(dentry->d_name.name);
@@ -86,7 +86,7 @@ static void d_free(struct dentry *dentry)
86{ 86{
87 if (dentry->d_op && dentry->d_op->d_release) 87 if (dentry->d_op && dentry->d_op->d_release)
88 dentry->d_op->d_release(dentry); 88 dentry->d_op->d_release(dentry);
89 call_rcu(&dentry->d_rcu, d_callback); 89 call_rcu(&dentry->d_u.d_rcu, d_callback);
90} 90}
91 91
92/* 92/*
@@ -193,7 +193,7 @@ kill_it: {
193 list_del(&dentry->d_lru); 193 list_del(&dentry->d_lru);
194 dentry_stat.nr_unused--; 194 dentry_stat.nr_unused--;
195 } 195 }
196 list_del(&dentry->d_child); 196 list_del(&dentry->d_u.d_child);
197 dentry_stat.nr_dentry--; /* For d_free, below */ 197 dentry_stat.nr_dentry--; /* For d_free, below */
198 /*drops the locks, at that point nobody can reach this dentry */ 198 /*drops the locks, at that point nobody can reach this dentry */
199 dentry_iput(dentry); 199 dentry_iput(dentry);
@@ -367,7 +367,7 @@ static inline void prune_one_dentry(struct dentry * dentry)
367 struct dentry * parent; 367 struct dentry * parent;
368 368
369 __d_drop(dentry); 369 __d_drop(dentry);
370 list_del(&dentry->d_child); 370 list_del(&dentry->d_u.d_child);
371 dentry_stat.nr_dentry--; /* For d_free, below */ 371 dentry_stat.nr_dentry--; /* For d_free, below */
372 dentry_iput(dentry); 372 dentry_iput(dentry);
373 parent = dentry->d_parent; 373 parent = dentry->d_parent;
@@ -518,7 +518,7 @@ repeat:
518resume: 518resume:
519 while (next != &this_parent->d_subdirs) { 519 while (next != &this_parent->d_subdirs) {
520 struct list_head *tmp = next; 520 struct list_head *tmp = next;
521 struct dentry *dentry = list_entry(tmp, struct dentry, d_child); 521 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
522 next = tmp->next; 522 next = tmp->next;
523 /* Have we found a mount point ? */ 523 /* Have we found a mount point ? */
524 if (d_mountpoint(dentry)) 524 if (d_mountpoint(dentry))
@@ -532,7 +532,7 @@ resume:
532 * All done at this level ... ascend and resume the search. 532 * All done at this level ... ascend and resume the search.
533 */ 533 */
534 if (this_parent != parent) { 534 if (this_parent != parent) {
535 next = this_parent->d_child.next; 535 next = this_parent->d_u.d_child.next;
536 this_parent = this_parent->d_parent; 536 this_parent = this_parent->d_parent;
537 goto resume; 537 goto resume;
538 } 538 }
@@ -569,7 +569,7 @@ repeat:
569resume: 569resume:
570 while (next != &this_parent->d_subdirs) { 570 while (next != &this_parent->d_subdirs) {
571 struct list_head *tmp = next; 571 struct list_head *tmp = next;
572 struct dentry *dentry = list_entry(tmp, struct dentry, d_child); 572 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
573 next = tmp->next; 573 next = tmp->next;
574 574
575 if (!list_empty(&dentry->d_lru)) { 575 if (!list_empty(&dentry->d_lru)) {
@@ -610,7 +610,7 @@ dentry->d_parent->d_name.name, dentry->d_name.name, found);
610 * All done at this level ... ascend and resume the search. 610 * All done at this level ... ascend and resume the search.
611 */ 611 */
612 if (this_parent != parent) { 612 if (this_parent != parent) {
613 next = this_parent->d_child.next; 613 next = this_parent->d_u.d_child.next;
614 this_parent = this_parent->d_parent; 614 this_parent = this_parent->d_parent;
615#ifdef DCACHE_DEBUG 615#ifdef DCACHE_DEBUG
616printk(KERN_DEBUG "select_parent: ascending to %s/%s, found=%d\n", 616printk(KERN_DEBUG "select_parent: ascending to %s/%s, found=%d\n",
@@ -753,12 +753,12 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
753 dentry->d_parent = dget(parent); 753 dentry->d_parent = dget(parent);
754 dentry->d_sb = parent->d_sb; 754 dentry->d_sb = parent->d_sb;
755 } else { 755 } else {
756 INIT_LIST_HEAD(&dentry->d_child); 756 INIT_LIST_HEAD(&dentry->d_u.d_child);
757 } 757 }
758 758
759 spin_lock(&dcache_lock); 759 spin_lock(&dcache_lock);
760 if (parent) 760 if (parent)
761 list_add(&dentry->d_child, &parent->d_subdirs); 761 list_add(&dentry->d_u.d_child, &parent->d_subdirs);
762 dentry_stat.nr_dentry++; 762 dentry_stat.nr_dentry++;
763 spin_unlock(&dcache_lock); 763 spin_unlock(&dcache_lock);
764 764
@@ -1310,8 +1310,8 @@ already_unhashed:
1310 /* Unhash the target: dput() will then get rid of it */ 1310 /* Unhash the target: dput() will then get rid of it */
1311 __d_drop(target); 1311 __d_drop(target);
1312 1312
1313 list_del(&dentry->d_child); 1313 list_del(&dentry->d_u.d_child);
1314 list_del(&target->d_child); 1314 list_del(&target->d_u.d_child);
1315 1315
1316 /* Switch the names.. */ 1316 /* Switch the names.. */
1317 switch_names(dentry, target); 1317 switch_names(dentry, target);
@@ -1322,15 +1322,15 @@ already_unhashed:
1322 if (IS_ROOT(dentry)) { 1322 if (IS_ROOT(dentry)) {
1323 dentry->d_parent = target->d_parent; 1323 dentry->d_parent = target->d_parent;
1324 target->d_parent = target; 1324 target->d_parent = target;
1325 INIT_LIST_HEAD(&target->d_child); 1325 INIT_LIST_HEAD(&target->d_u.d_child);
1326 } else { 1326 } else {
1327 do_switch(dentry->d_parent, target->d_parent); 1327 do_switch(dentry->d_parent, target->d_parent);
1328 1328
1329 /* And add them back to the (new) parent lists */ 1329 /* And add them back to the (new) parent lists */
1330 list_add(&target->d_child, &target->d_parent->d_subdirs); 1330 list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
1331 } 1331 }
1332 1332
1333 list_add(&dentry->d_child, &dentry->d_parent->d_subdirs); 1333 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
1334 spin_unlock(&target->d_lock); 1334 spin_unlock(&target->d_lock);
1335 spin_unlock(&dentry->d_lock); 1335 spin_unlock(&dentry->d_lock);
1336 write_sequnlock(&rename_lock); 1336 write_sequnlock(&rename_lock);
@@ -1568,7 +1568,7 @@ repeat:
1568resume: 1568resume:
1569 while (next != &this_parent->d_subdirs) { 1569 while (next != &this_parent->d_subdirs) {
1570 struct list_head *tmp = next; 1570 struct list_head *tmp = next;
1571 struct dentry *dentry = list_entry(tmp, struct dentry, d_child); 1571 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1572 next = tmp->next; 1572 next = tmp->next;
1573 if (d_unhashed(dentry)||!dentry->d_inode) 1573 if (d_unhashed(dentry)||!dentry->d_inode)
1574 continue; 1574 continue;
@@ -1579,7 +1579,7 @@ resume:
1579 atomic_dec(&dentry->d_count); 1579 atomic_dec(&dentry->d_count);
1580 } 1580 }
1581 if (this_parent != root) { 1581 if (this_parent != root) {
1582 next = this_parent->d_child.next; 1582 next = this_parent->d_u.d_child.next;
1583 atomic_dec(&this_parent->d_count); 1583 atomic_dec(&this_parent->d_count);
1584 this_parent = this_parent->d_parent; 1584 this_parent = this_parent->d_parent;
1585 goto resume; 1585 goto resume;
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
new file mode 100644
index 000000000000..4e4762389bdc
--- /dev/null
+++ b/fs/drop_caches.c
@@ -0,0 +1,68 @@
1/*
2 * Implement the manual drop-all-pagecache function
3 */
4
5#include <linux/kernel.h>
6#include <linux/mm.h>
7#include <linux/fs.h>
8#include <linux/writeback.h>
9#include <linux/sysctl.h>
10#include <linux/gfp.h>
11
12/* A global variable is a bit ugly, but it keeps the code simple */
13int sysctl_drop_caches;
14
15static void drop_pagecache_sb(struct super_block *sb)
16{
17 struct inode *inode;
18
19 spin_lock(&inode_lock);
20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
21 if (inode->i_state & (I_FREEING|I_WILL_FREE))
22 continue;
23 invalidate_inode_pages(inode->i_mapping);
24 }
25 spin_unlock(&inode_lock);
26}
27
28void drop_pagecache(void)
29{
30 struct super_block *sb;
31
32 spin_lock(&sb_lock);
33restart:
34 list_for_each_entry(sb, &super_blocks, s_list) {
35 sb->s_count++;
36 spin_unlock(&sb_lock);
37 down_read(&sb->s_umount);
38 if (sb->s_root)
39 drop_pagecache_sb(sb);
40 up_read(&sb->s_umount);
41 spin_lock(&sb_lock);
42 if (__put_super_and_need_restart(sb))
43 goto restart;
44 }
45 spin_unlock(&sb_lock);
46}
47
48void drop_slab(void)
49{
50 int nr_objects;
51
52 do {
53 nr_objects = shrink_slab(1000, GFP_KERNEL, 1000);
54 } while (nr_objects > 10);
55}
56
57int drop_caches_sysctl_handler(ctl_table *table, int write,
58 struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
59{
60 proc_dointvec_minmax(table, write, file, buffer, length, ppos);
61 if (write) {
62 if (sysctl_drop_caches & 1)
63 drop_pagecache();
64 if (sysctl_drop_caches & 2)
65 drop_slab();
66 }
67 return 0;
68}
diff --git a/fs/exec.c b/fs/exec.c
index e75a9548da8e..fd02ea4a81e9 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -760,7 +760,7 @@ no_thread_group:
760 spin_lock(&oldsighand->siglock); 760 spin_lock(&oldsighand->siglock);
761 spin_lock(&newsighand->siglock); 761 spin_lock(&newsighand->siglock);
762 762
763 current->sighand = newsighand; 763 rcu_assign_pointer(current->sighand, newsighand);
764 recalc_sigpending(); 764 recalc_sigpending();
765 765
766 spin_unlock(&newsighand->siglock); 766 spin_unlock(&newsighand->siglock);
@@ -768,7 +768,7 @@ no_thread_group:
768 write_unlock_irq(&tasklist_lock); 768 write_unlock_irq(&tasklist_lock);
769 769
770 if (atomic_dec_and_test(&oldsighand->count)) 770 if (atomic_dec_and_test(&oldsighand->count))
771 kmem_cache_free(sighand_cachep, oldsighand); 771 sighand_free(oldsighand);
772 } 772 }
773 773
774 BUG_ON(!thread_group_leader(current)); 774 BUG_ON(!thread_group_leader(current));
@@ -1462,6 +1462,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1462 if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) { 1462 if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) {
1463 current->signal->flags = SIGNAL_GROUP_EXIT; 1463 current->signal->flags = SIGNAL_GROUP_EXIT;
1464 current->signal->group_exit_code = exit_code; 1464 current->signal->group_exit_code = exit_code;
1465 current->signal->group_stop_count = 0;
1465 retval = 0; 1466 retval = 0;
1466 } 1467 }
1467 spin_unlock_irq(&current->sighand->siglock); 1468 spin_unlock_irq(&current->sighand->siglock);
@@ -1477,7 +1478,6 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1477 * Clear any false indication of pending signals that might 1478 * Clear any false indication of pending signals that might
1478 * be seen by the filesystem code called to write the core file. 1479 * be seen by the filesystem code called to write the core file.
1479 */ 1480 */
1480 current->signal->group_stop_count = 0;
1481 clear_thread_flag(TIF_SIGPENDING); 1481 clear_thread_flag(TIF_SIGPENDING);
1482 1482
1483 if (current->signal->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump) 1483 if (current->signal->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump)
@@ -1505,7 +1505,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1505 goto close_fail; 1505 goto close_fail;
1506 if (!file->f_op->write) 1506 if (!file->f_op->write)
1507 goto close_fail; 1507 goto close_fail;
1508 if (do_truncate(file->f_dentry, 0, file) != 0) 1508 if (do_truncate(file->f_dentry, 0, 0, file) != 0)
1509 goto close_fail; 1509 goto close_fail;
1510 1510
1511 retval = binfmt->core_dump(signr, regs, file); 1511 retval = binfmt->core_dump(signr, regs, file);
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 9e4a24376210..69078079b19c 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -651,7 +651,7 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
651 /* Error cases - e2fsck has already cleaned up for us */ 651 /* Error cases - e2fsck has already cleaned up for us */
652 if (ino > max_ino) { 652 if (ino > max_ino) {
653 ext3_warning(sb, __FUNCTION__, 653 ext3_warning(sb, __FUNCTION__,
654 "bad orphan ino %lu! e2fsck was run?\n", ino); 654 "bad orphan ino %lu! e2fsck was run?", ino);
655 goto out; 655 goto out;
656 } 656 }
657 657
@@ -660,7 +660,7 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
660 bitmap_bh = read_inode_bitmap(sb, block_group); 660 bitmap_bh = read_inode_bitmap(sb, block_group);
661 if (!bitmap_bh) { 661 if (!bitmap_bh) {
662 ext3_warning(sb, __FUNCTION__, 662 ext3_warning(sb, __FUNCTION__,
663 "inode bitmap error for orphan %lu\n", ino); 663 "inode bitmap error for orphan %lu", ino);
664 goto out; 664 goto out;
665 } 665 }
666 666
@@ -672,7 +672,7 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
672 !(inode = iget(sb, ino)) || is_bad_inode(inode) || 672 !(inode = iget(sb, ino)) || is_bad_inode(inode) ||
673 NEXT_ORPHAN(inode) > max_ino) { 673 NEXT_ORPHAN(inode) > max_ino) {
674 ext3_warning(sb, __FUNCTION__, 674 ext3_warning(sb, __FUNCTION__,
675 "bad orphan inode %lu! e2fsck was run?\n", ino); 675 "bad orphan inode %lu! e2fsck was run?", ino);
676 printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n", 676 printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n",
677 bit, (unsigned long long)bitmap_bh->b_blocknr, 677 bit, (unsigned long long)bitmap_bh->b_blocknr,
678 ext3_test_bit(bit, bitmap_bh->b_data)); 678 ext3_test_bit(bit, bitmap_bh->b_data));
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index b3c690a3b54a..af193a304ee5 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1476,7 +1476,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
1476 if (levels && (dx_get_count(frames->entries) == 1476 if (levels && (dx_get_count(frames->entries) ==
1477 dx_get_limit(frames->entries))) { 1477 dx_get_limit(frames->entries))) {
1478 ext3_warning(sb, __FUNCTION__, 1478 ext3_warning(sb, __FUNCTION__,
1479 "Directory index full!\n"); 1479 "Directory index full!");
1480 err = -ENOSPC; 1480 err = -ENOSPC;
1481 goto cleanup; 1481 goto cleanup;
1482 } 1482 }
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 6104ad310507..1041dab6de2f 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -31,7 +31,7 @@ static int verify_group_input(struct super_block *sb,
31 unsigned start = le32_to_cpu(es->s_blocks_count); 31 unsigned start = le32_to_cpu(es->s_blocks_count);
32 unsigned end = start + input->blocks_count; 32 unsigned end = start + input->blocks_count;
33 unsigned group = input->group; 33 unsigned group = input->group;
34 unsigned itend = input->inode_table + EXT3_SB(sb)->s_itb_per_group; 34 unsigned itend = input->inode_table + sbi->s_itb_per_group;
35 unsigned overhead = ext3_bg_has_super(sb, group) ? 35 unsigned overhead = ext3_bg_has_super(sb, group) ?
36 (1 + ext3_bg_num_gdb(sb, group) + 36 (1 + ext3_bg_num_gdb(sb, group) +
37 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; 37 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
@@ -340,7 +340,7 @@ static int verify_reserved_gdb(struct super_block *sb,
340 while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) { 340 while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) {
341 if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){ 341 if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){
342 ext3_warning(sb, __FUNCTION__, 342 ext3_warning(sb, __FUNCTION__,
343 "reserved GDT %ld missing grp %d (%ld)\n", 343 "reserved GDT %ld missing grp %d (%ld)",
344 blk, grp, 344 blk, grp,
345 grp * EXT3_BLOCKS_PER_GROUP(sb) + blk); 345 grp * EXT3_BLOCKS_PER_GROUP(sb) + blk);
346 return -EINVAL; 346 return -EINVAL;
@@ -393,7 +393,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
393 if (EXT3_SB(sb)->s_sbh->b_blocknr != 393 if (EXT3_SB(sb)->s_sbh->b_blocknr !=
394 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) { 394 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) {
395 ext3_warning(sb, __FUNCTION__, 395 ext3_warning(sb, __FUNCTION__,
396 "won't resize using backup superblock at %llu\n", 396 "won't resize using backup superblock at %llu",
397 (unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr); 397 (unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr);
398 return -EPERM; 398 return -EPERM;
399 } 399 }
@@ -417,7 +417,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
417 data = (__u32 *)dind->b_data; 417 data = (__u32 *)dind->b_data;
418 if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) { 418 if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) {
419 ext3_warning(sb, __FUNCTION__, 419 ext3_warning(sb, __FUNCTION__,
420 "new group %u GDT block %lu not reserved\n", 420 "new group %u GDT block %lu not reserved",
421 input->group, gdblock); 421 input->group, gdblock);
422 err = -EINVAL; 422 err = -EINVAL;
423 goto exit_dind; 423 goto exit_dind;
@@ -540,7 +540,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
540 for (res = 0; res < reserved_gdb; res++, blk++) { 540 for (res = 0; res < reserved_gdb; res++, blk++) {
541 if (le32_to_cpu(*data) != blk) { 541 if (le32_to_cpu(*data) != blk) {
542 ext3_warning(sb, __FUNCTION__, 542 ext3_warning(sb, __FUNCTION__,
543 "reserved block %lu not at offset %ld\n", 543 "reserved block %lu not at offset %ld",
544 blk, (long)(data - (__u32 *)dind->b_data)); 544 blk, (long)(data - (__u32 *)dind->b_data));
545 err = -EINVAL; 545 err = -EINVAL;
546 goto exit_bh; 546 goto exit_bh;
@@ -683,7 +683,7 @@ exit_err:
683 if (err) { 683 if (err) {
684 ext3_warning(sb, __FUNCTION__, 684 ext3_warning(sb, __FUNCTION__,
685 "can't update backup for group %d (err %d), " 685 "can't update backup for group %d (err %d), "
686 "forcing fsck on next reboot\n", group, err); 686 "forcing fsck on next reboot", group, err);
687 sbi->s_mount_state &= ~EXT3_VALID_FS; 687 sbi->s_mount_state &= ~EXT3_VALID_FS;
688 sbi->s_es->s_state &= ~cpu_to_le16(EXT3_VALID_FS); 688 sbi->s_es->s_state &= ~cpu_to_le16(EXT3_VALID_FS);
689 mark_buffer_dirty(sbi->s_sbh); 689 mark_buffer_dirty(sbi->s_sbh);
@@ -722,7 +722,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
722 if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb, 722 if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb,
723 EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) { 723 EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
724 ext3_warning(sb, __FUNCTION__, 724 ext3_warning(sb, __FUNCTION__,
725 "Can't resize non-sparse filesystem further\n"); 725 "Can't resize non-sparse filesystem further");
726 return -EPERM; 726 return -EPERM;
727 } 727 }
728 728
@@ -730,13 +730,13 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
730 if (!EXT3_HAS_COMPAT_FEATURE(sb, 730 if (!EXT3_HAS_COMPAT_FEATURE(sb,
731 EXT3_FEATURE_COMPAT_RESIZE_INODE)){ 731 EXT3_FEATURE_COMPAT_RESIZE_INODE)){
732 ext3_warning(sb, __FUNCTION__, 732 ext3_warning(sb, __FUNCTION__,
733 "No reserved GDT blocks, can't resize\n"); 733 "No reserved GDT blocks, can't resize");
734 return -EPERM; 734 return -EPERM;
735 } 735 }
736 inode = iget(sb, EXT3_RESIZE_INO); 736 inode = iget(sb, EXT3_RESIZE_INO);
737 if (!inode || is_bad_inode(inode)) { 737 if (!inode || is_bad_inode(inode)) {
738 ext3_warning(sb, __FUNCTION__, 738 ext3_warning(sb, __FUNCTION__,
739 "Error opening resize inode\n"); 739 "Error opening resize inode");
740 iput(inode); 740 iput(inode);
741 return -ENOENT; 741 return -ENOENT;
742 } 742 }
@@ -764,9 +764,9 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
764 } 764 }
765 765
766 lock_super(sb); 766 lock_super(sb);
767 if (input->group != EXT3_SB(sb)->s_groups_count) { 767 if (input->group != sbi->s_groups_count) {
768 ext3_warning(sb, __FUNCTION__, 768 ext3_warning(sb, __FUNCTION__,
769 "multiple resizers run on filesystem!\n"); 769 "multiple resizers run on filesystem!");
770 err = -EBUSY; 770 err = -EBUSY;
771 goto exit_journal; 771 goto exit_journal;
772 } 772 }
@@ -799,7 +799,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
799 * data. So we need to be careful to set all of the relevant 799 * data. So we need to be careful to set all of the relevant
800 * group descriptor data etc. *before* we enable the group. 800 * group descriptor data etc. *before* we enable the group.
801 * 801 *
802 * The key field here is EXT3_SB(sb)->s_groups_count: as long as 802 * The key field here is sbi->s_groups_count: as long as
803 * that retains its old value, nobody is going to access the new 803 * that retains its old value, nobody is going to access the new
804 * group. 804 * group.
805 * 805 *
@@ -859,7 +859,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
859 smp_wmb(); 859 smp_wmb();
860 860
861 /* Update the global fs size fields */ 861 /* Update the global fs size fields */
862 EXT3_SB(sb)->s_groups_count++; 862 sbi->s_groups_count++;
863 863
864 ext3_journal_dirty_metadata(handle, primary); 864 ext3_journal_dirty_metadata(handle, primary);
865 865
@@ -874,7 +874,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
874 percpu_counter_mod(&sbi->s_freeinodes_counter, 874 percpu_counter_mod(&sbi->s_freeinodes_counter,
875 EXT3_INODES_PER_GROUP(sb)); 875 EXT3_INODES_PER_GROUP(sb));
876 876
877 ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); 877 ext3_journal_dirty_metadata(handle, sbi->s_sbh);
878 sb->s_dirt = 1; 878 sb->s_dirt = 1;
879 879
880exit_journal: 880exit_journal:
@@ -937,7 +937,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
937 937
938 if (last == 0) { 938 if (last == 0) {
939 ext3_warning(sb, __FUNCTION__, 939 ext3_warning(sb, __FUNCTION__,
940 "need to use ext2online to resize further\n"); 940 "need to use ext2online to resize further");
941 return -EPERM; 941 return -EPERM;
942 } 942 }
943 943
@@ -973,7 +973,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
973 lock_super(sb); 973 lock_super(sb);
974 if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) { 974 if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
975 ext3_warning(sb, __FUNCTION__, 975 ext3_warning(sb, __FUNCTION__,
976 "multiple resizers run on filesystem!\n"); 976 "multiple resizers run on filesystem!");
977 err = -EBUSY; 977 err = -EBUSY;
978 goto exit_put; 978 goto exit_put;
979 } 979 }
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 4e6730622d90..7c45acf94589 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -43,7 +43,8 @@
43#include "acl.h" 43#include "acl.h"
44#include "namei.h" 44#include "namei.h"
45 45
46static int ext3_load_journal(struct super_block *, struct ext3_super_block *); 46static int ext3_load_journal(struct super_block *, struct ext3_super_block *,
47 unsigned long journal_devnum);
47static int ext3_create_journal(struct super_block *, struct ext3_super_block *, 48static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
48 int); 49 int);
49static void ext3_commit_super (struct super_block * sb, 50static void ext3_commit_super (struct super_block * sb,
@@ -628,7 +629,7 @@ enum {
628 Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, 629 Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
629 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 630 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
630 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, 631 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh,
631 Opt_commit, Opt_journal_update, Opt_journal_inum, 632 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
632 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 633 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
633 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 634 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
634 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 635 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
@@ -666,6 +667,7 @@ static match_table_t tokens = {
666 {Opt_commit, "commit=%u"}, 667 {Opt_commit, "commit=%u"},
667 {Opt_journal_update, "journal=update"}, 668 {Opt_journal_update, "journal=update"},
668 {Opt_journal_inum, "journal=%u"}, 669 {Opt_journal_inum, "journal=%u"},
670 {Opt_journal_dev, "journal_dev=%u"},
669 {Opt_abort, "abort"}, 671 {Opt_abort, "abort"},
670 {Opt_data_journal, "data=journal"}, 672 {Opt_data_journal, "data=journal"},
671 {Opt_data_ordered, "data=ordered"}, 673 {Opt_data_ordered, "data=ordered"},
@@ -705,8 +707,9 @@ static unsigned long get_sb_block(void **data)
705 return sb_block; 707 return sb_block;
706} 708}
707 709
708static int parse_options (char * options, struct super_block *sb, 710static int parse_options (char *options, struct super_block *sb,
709 unsigned long * inum, unsigned long *n_blocks_count, int is_remount) 711 unsigned long *inum, unsigned long *journal_devnum,
712 unsigned long *n_blocks_count, int is_remount)
710{ 713{
711 struct ext3_sb_info *sbi = EXT3_SB(sb); 714 struct ext3_sb_info *sbi = EXT3_SB(sb);
712 char * p; 715 char * p;
@@ -839,6 +842,16 @@ static int parse_options (char * options, struct super_block *sb,
839 return 0; 842 return 0;
840 *inum = option; 843 *inum = option;
841 break; 844 break;
845 case Opt_journal_dev:
846 if (is_remount) {
847 printk(KERN_ERR "EXT3-fs: cannot specify "
848 "journal on remount\n");
849 return 0;
850 }
851 if (match_int(&args[0], &option))
852 return 0;
853 *journal_devnum = option;
854 break;
842 case Opt_noload: 855 case Opt_noload:
843 set_opt (sbi->s_mount_opt, NOLOAD); 856 set_opt (sbi->s_mount_opt, NOLOAD);
844 break; 857 break;
@@ -1331,6 +1344,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1331 unsigned long logic_sb_block; 1344 unsigned long logic_sb_block;
1332 unsigned long offset = 0; 1345 unsigned long offset = 0;
1333 unsigned long journal_inum = 0; 1346 unsigned long journal_inum = 0;
1347 unsigned long journal_devnum = 0;
1334 unsigned long def_mount_opts; 1348 unsigned long def_mount_opts;
1335 struct inode *root; 1349 struct inode *root;
1336 int blocksize; 1350 int blocksize;
@@ -1411,7 +1425,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1411 1425
1412 set_opt(sbi->s_mount_opt, RESERVATION); 1426 set_opt(sbi->s_mount_opt, RESERVATION);
1413 1427
1414 if (!parse_options ((char *) data, sb, &journal_inum, NULL, 0)) 1428 if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
1429 NULL, 0))
1415 goto failed_mount; 1430 goto failed_mount;
1416 1431
1417 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 1432 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
@@ -1622,7 +1637,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1622 */ 1637 */
1623 if (!test_opt(sb, NOLOAD) && 1638 if (!test_opt(sb, NOLOAD) &&
1624 EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) { 1639 EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
1625 if (ext3_load_journal(sb, es)) 1640 if (ext3_load_journal(sb, es, journal_devnum))
1626 goto failed_mount2; 1641 goto failed_mount2;
1627 } else if (journal_inum) { 1642 } else if (journal_inum) {
1628 if (ext3_create_journal(sb, es, journal_inum)) 1643 if (ext3_create_journal(sb, es, journal_inum))
@@ -1902,15 +1917,24 @@ out_bdev:
1902 return NULL; 1917 return NULL;
1903} 1918}
1904 1919
1905static int ext3_load_journal(struct super_block * sb, 1920static int ext3_load_journal(struct super_block *sb,
1906 struct ext3_super_block * es) 1921 struct ext3_super_block *es,
1922 unsigned long journal_devnum)
1907{ 1923{
1908 journal_t *journal; 1924 journal_t *journal;
1909 int journal_inum = le32_to_cpu(es->s_journal_inum); 1925 int journal_inum = le32_to_cpu(es->s_journal_inum);
1910 dev_t journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); 1926 dev_t journal_dev;
1911 int err = 0; 1927 int err = 0;
1912 int really_read_only; 1928 int really_read_only;
1913 1929
1930 if (journal_devnum &&
1931 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
1932 printk(KERN_INFO "EXT3-fs: external journal device major/minor "
1933 "numbers have changed\n");
1934 journal_dev = new_decode_dev(journal_devnum);
1935 } else
1936 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
1937
1914 really_read_only = bdev_read_only(sb->s_bdev); 1938 really_read_only = bdev_read_only(sb->s_bdev);
1915 1939
1916 /* 1940 /*
@@ -1969,6 +1993,16 @@ static int ext3_load_journal(struct super_block * sb,
1969 1993
1970 EXT3_SB(sb)->s_journal = journal; 1994 EXT3_SB(sb)->s_journal = journal;
1971 ext3_clear_journal_err(sb, es); 1995 ext3_clear_journal_err(sb, es);
1996
1997 if (journal_devnum &&
1998 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
1999 es->s_journal_dev = cpu_to_le32(journal_devnum);
2000 sb->s_dirt = 1;
2001
2002 /* Make sure we flush the recovery flag to disk. */
2003 ext3_commit_super(sb, es, 1);
2004 }
2005
1972 return 0; 2006 return 0;
1973} 2007}
1974 2008
@@ -2197,7 +2231,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2197 /* 2231 /*
2198 * Allow the "check" option to be passed as a remount option. 2232 * Allow the "check" option to be passed as a remount option.
2199 */ 2233 */
2200 if (!parse_options(data, sb, NULL, &n_blocks_count, 1)) { 2234 if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) {
2201 err = -EINVAL; 2235 err = -EINVAL;
2202 goto restore_opts; 2236 goto restore_opts;
2203 } 2237 }
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 77c24fcf712a..1acc941245fb 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -295,7 +295,8 @@ static int fat_bmap_cluster(struct inode *inode, int cluster)
295 return dclus; 295 return dclus;
296} 296}
297 297
298int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys) 298int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
299 unsigned long *mapped_blocks)
299{ 300{
300 struct super_block *sb = inode->i_sb; 301 struct super_block *sb = inode->i_sb;
301 struct msdos_sb_info *sbi = MSDOS_SB(sb); 302 struct msdos_sb_info *sbi = MSDOS_SB(sb);
@@ -303,9 +304,12 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys)
303 int cluster, offset; 304 int cluster, offset;
304 305
305 *phys = 0; 306 *phys = 0;
307 *mapped_blocks = 0;
306 if ((sbi->fat_bits != 32) && (inode->i_ino == MSDOS_ROOT_INO)) { 308 if ((sbi->fat_bits != 32) && (inode->i_ino == MSDOS_ROOT_INO)) {
307 if (sector < (sbi->dir_entries >> sbi->dir_per_block_bits)) 309 if (sector < (sbi->dir_entries >> sbi->dir_per_block_bits)) {
308 *phys = sector + sbi->dir_start; 310 *phys = sector + sbi->dir_start;
311 *mapped_blocks = 1;
312 }
309 return 0; 313 return 0;
310 } 314 }
311 last_block = (MSDOS_I(inode)->mmu_private + (sb->s_blocksize - 1)) 315 last_block = (MSDOS_I(inode)->mmu_private + (sb->s_blocksize - 1))
@@ -318,7 +322,11 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys)
318 cluster = fat_bmap_cluster(inode, cluster); 322 cluster = fat_bmap_cluster(inode, cluster);
319 if (cluster < 0) 323 if (cluster < 0)
320 return cluster; 324 return cluster;
321 else if (cluster) 325 else if (cluster) {
322 *phys = fat_clus_to_blknr(sbi, cluster) + offset; 326 *phys = fat_clus_to_blknr(sbi, cluster) + offset;
327 *mapped_blocks = sbi->sec_per_clus - offset;
328 if (*mapped_blocks > last_block - sector)
329 *mapped_blocks = last_block - sector;
330 }
323 return 0; 331 return 0;
324} 332}
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index ba824964b9bb..eef1b81aa294 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -45,8 +45,8 @@ static inline void fat_dir_readahead(struct inode *dir, sector_t iblock,
45 if ((sbi->fat_bits != 32) && (dir->i_ino == MSDOS_ROOT_INO)) 45 if ((sbi->fat_bits != 32) && (dir->i_ino == MSDOS_ROOT_INO))
46 return; 46 return;
47 47
48 bh = sb_getblk(sb, phys); 48 bh = sb_find_get_block(sb, phys);
49 if (bh && !buffer_uptodate(bh)) { 49 if (bh == NULL || !buffer_uptodate(bh)) {
50 for (sec = 0; sec < sbi->sec_per_clus; sec++) 50 for (sec = 0; sec < sbi->sec_per_clus; sec++)
51 sb_breadahead(sb, phys + sec); 51 sb_breadahead(sb, phys + sec);
52 } 52 }
@@ -68,8 +68,8 @@ static int fat__get_entry(struct inode *dir, loff_t *pos,
68{ 68{
69 struct super_block *sb = dir->i_sb; 69 struct super_block *sb = dir->i_sb;
70 sector_t phys, iblock; 70 sector_t phys, iblock;
71 int offset; 71 unsigned long mapped_blocks;
72 int err; 72 int err, offset;
73 73
74next: 74next:
75 if (*bh) 75 if (*bh)
@@ -77,7 +77,7 @@ next:
77 77
78 *bh = NULL; 78 *bh = NULL;
79 iblock = *pos >> sb->s_blocksize_bits; 79 iblock = *pos >> sb->s_blocksize_bits;
80 err = fat_bmap(dir, iblock, &phys); 80 err = fat_bmap(dir, iblock, &phys, &mapped_blocks);
81 if (err || !phys) 81 if (err || !phys)
82 return -1; /* beyond EOF or error */ 82 return -1; /* beyond EOF or error */
83 83
@@ -418,7 +418,7 @@ EODir:
418 return err; 418 return err;
419} 419}
420 420
421EXPORT_SYMBOL(fat_search_long); 421EXPORT_SYMBOL_GPL(fat_search_long);
422 422
423struct fat_ioctl_filldir_callback { 423struct fat_ioctl_filldir_callback {
424 struct dirent __user *dirent; 424 struct dirent __user *dirent;
@@ -780,7 +780,7 @@ int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh,
780 return -ENOENT; 780 return -ENOENT;
781} 781}
782 782
783EXPORT_SYMBOL(fat_get_dotdot_entry); 783EXPORT_SYMBOL_GPL(fat_get_dotdot_entry);
784 784
785/* See if directory is empty */ 785/* See if directory is empty */
786int fat_dir_empty(struct inode *dir) 786int fat_dir_empty(struct inode *dir)
@@ -803,7 +803,7 @@ int fat_dir_empty(struct inode *dir)
803 return result; 803 return result;
804} 804}
805 805
806EXPORT_SYMBOL(fat_dir_empty); 806EXPORT_SYMBOL_GPL(fat_dir_empty);
807 807
808/* 808/*
809 * fat_subdirs counts the number of sub-directories of dir. It can be run 809 * fat_subdirs counts the number of sub-directories of dir. It can be run
@@ -849,7 +849,7 @@ int fat_scan(struct inode *dir, const unsigned char *name,
849 return -ENOENT; 849 return -ENOENT;
850} 850}
851 851
852EXPORT_SYMBOL(fat_scan); 852EXPORT_SYMBOL_GPL(fat_scan);
853 853
854static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots) 854static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots)
855{ 855{
@@ -936,7 +936,7 @@ int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo)
936 return 0; 936 return 0;
937} 937}
938 938
939EXPORT_SYMBOL(fat_remove_entries); 939EXPORT_SYMBOL_GPL(fat_remove_entries);
940 940
941static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used, 941static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used,
942 struct buffer_head **bhs, int nr_bhs) 942 struct buffer_head **bhs, int nr_bhs)
@@ -1048,7 +1048,7 @@ error:
1048 return err; 1048 return err;
1049} 1049}
1050 1050
1051EXPORT_SYMBOL(fat_alloc_new_dir); 1051EXPORT_SYMBOL_GPL(fat_alloc_new_dir);
1052 1052
1053static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots, 1053static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots,
1054 int *nr_cluster, struct msdos_dir_entry **de, 1054 int *nr_cluster, struct msdos_dir_entry **de,
@@ -1264,4 +1264,4 @@ error_remove:
1264 return err; 1264 return err;
1265} 1265}
1266 1266
1267EXPORT_SYMBOL(fat_add_entries); 1267EXPORT_SYMBOL_GPL(fat_add_entries);
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 4164cd54c4d1..a1a9e0451217 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -476,6 +476,7 @@ int fat_alloc_clusters(struct inode *inode, int *cluster, int nr_cluster)
476 sbi->prev_free = entry; 476 sbi->prev_free = entry;
477 if (sbi->free_clusters != -1) 477 if (sbi->free_clusters != -1)
478 sbi->free_clusters--; 478 sbi->free_clusters--;
479 sb->s_dirt = 1;
479 480
480 cluster[idx_clus] = entry; 481 cluster[idx_clus] = entry;
481 idx_clus++; 482 idx_clus++;
@@ -496,6 +497,7 @@ int fat_alloc_clusters(struct inode *inode, int *cluster, int nr_cluster)
496 497
497 /* Couldn't allocate the free entries */ 498 /* Couldn't allocate the free entries */
498 sbi->free_clusters = 0; 499 sbi->free_clusters = 0;
500 sb->s_dirt = 1;
499 err = -ENOSPC; 501 err = -ENOSPC;
500 502
501out: 503out:
@@ -509,7 +511,6 @@ out:
509 } 511 }
510 for (i = 0; i < nr_bhs; i++) 512 for (i = 0; i < nr_bhs; i++)
511 brelse(bhs[i]); 513 brelse(bhs[i]);
512 fat_clusters_flush(sb);
513 514
514 if (err && idx_clus) 515 if (err && idx_clus)
515 fat_free_clusters(inode, cluster[0]); 516 fat_free_clusters(inode, cluster[0]);
@@ -542,8 +543,10 @@ int fat_free_clusters(struct inode *inode, int cluster)
542 } 543 }
543 544
544 ops->ent_put(&fatent, FAT_ENT_FREE); 545 ops->ent_put(&fatent, FAT_ENT_FREE);
545 if (sbi->free_clusters != -1) 546 if (sbi->free_clusters != -1) {
546 sbi->free_clusters++; 547 sbi->free_clusters++;
548 sb->s_dirt = 1;
549 }
547 550
548 if (nr_bhs + fatent.nr_bhs > MAX_BUF_PER_PAGE) { 551 if (nr_bhs + fatent.nr_bhs > MAX_BUF_PER_PAGE) {
549 if (sb->s_flags & MS_SYNCHRONOUS) { 552 if (sb->s_flags & MS_SYNCHRONOUS) {
@@ -578,7 +581,7 @@ error:
578 return err; 581 return err;
579} 582}
580 583
581EXPORT_SYMBOL(fat_free_clusters); 584EXPORT_SYMBOL_GPL(fat_free_clusters);
582 585
583int fat_count_free_clusters(struct super_block *sb) 586int fat_count_free_clusters(struct super_block *sb)
584{ 587{
@@ -605,6 +608,7 @@ int fat_count_free_clusters(struct super_block *sb)
605 } while (fat_ent_next(sbi, &fatent)); 608 } while (fat_ent_next(sbi, &fatent));
606 } 609 }
607 sbi->free_clusters = free; 610 sbi->free_clusters = free;
611 sb->s_dirt = 1;
608 fatent_brelse(&fatent); 612 fatent_brelse(&fatent);
609out: 613out:
610 unlock_fat(sbi); 614 unlock_fat(sbi);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 7134403d5be2..9b07c328a6fc 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -11,6 +11,7 @@
11#include <linux/msdos_fs.h> 11#include <linux/msdos_fs.h>
12#include <linux/smp_lock.h> 12#include <linux/smp_lock.h>
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/writeback.h>
14 15
15int fat_generic_ioctl(struct inode *inode, struct file *filp, 16int fat_generic_ioctl(struct inode *inode, struct file *filp,
16 unsigned int cmd, unsigned long arg) 17 unsigned int cmd, unsigned long arg)
@@ -124,6 +125,24 @@ struct file_operations fat_file_operations = {
124 .sendfile = generic_file_sendfile, 125 .sendfile = generic_file_sendfile,
125}; 126};
126 127
128static int fat_cont_expand(struct inode *inode, loff_t size)
129{
130 struct address_space *mapping = inode->i_mapping;
131 loff_t start = inode->i_size, count = size - inode->i_size;
132 int err;
133
134 err = generic_cont_expand_simple(inode, size);
135 if (err)
136 goto out;
137
138 inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
139 mark_inode_dirty(inode);
140 if (IS_SYNC(inode))
141 err = sync_page_range_nolock(inode, mapping, start, count);
142out:
143 return err;
144}
145
127int fat_notify_change(struct dentry *dentry, struct iattr *attr) 146int fat_notify_change(struct dentry *dentry, struct iattr *attr)
128{ 147{
129 struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); 148 struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
@@ -132,11 +151,17 @@ int fat_notify_change(struct dentry *dentry, struct iattr *attr)
132 151
133 lock_kernel(); 152 lock_kernel();
134 153
135 /* FAT cannot truncate to a longer file */ 154 /*
155 * Expand the file. Since inode_setattr() updates ->i_size
156 * before calling the ->truncate(), but FAT needs to fill the
157 * hole before it.
158 */
136 if (attr->ia_valid & ATTR_SIZE) { 159 if (attr->ia_valid & ATTR_SIZE) {
137 if (attr->ia_size > inode->i_size) { 160 if (attr->ia_size > inode->i_size) {
138 error = -EPERM; 161 error = fat_cont_expand(inode, attr->ia_size);
139 goto out; 162 if (error || attr->ia_valid == ATTR_SIZE)
163 goto out;
164 attr->ia_valid &= ~ATTR_SIZE;
140 } 165 }
141 } 166 }
142 167
@@ -173,7 +198,7 @@ out:
173 return error; 198 return error;
174} 199}
175 200
176EXPORT_SYMBOL(fat_notify_change); 201EXPORT_SYMBOL_GPL(fat_notify_change);
177 202
178/* Free all clusters after the skip'th cluster. */ 203/* Free all clusters after the skip'th cluster. */
179static int fat_free(struct inode *inode, int skip) 204static int fat_free(struct inode *inode, int skip)
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index a0f9b9fe1307..e7f4aa7fc686 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -18,10 +18,12 @@
18#include <linux/seq_file.h> 18#include <linux/seq_file.h>
19#include <linux/msdos_fs.h> 19#include <linux/msdos_fs.h>
20#include <linux/pagemap.h> 20#include <linux/pagemap.h>
21#include <linux/mpage.h>
21#include <linux/buffer_head.h> 22#include <linux/buffer_head.h>
22#include <linux/mount.h> 23#include <linux/mount.h>
23#include <linux/vfs.h> 24#include <linux/vfs.h>
24#include <linux/parser.h> 25#include <linux/parser.h>
26#include <linux/uio.h>
25#include <asm/unaligned.h> 27#include <asm/unaligned.h>
26 28
27#ifndef CONFIG_FAT_DEFAULT_IOCHARSET 29#ifndef CONFIG_FAT_DEFAULT_IOCHARSET
@@ -48,51 +50,97 @@ static int fat_add_cluster(struct inode *inode)
48 return err; 50 return err;
49} 51}
50 52
51static int fat_get_block(struct inode *inode, sector_t iblock, 53static int __fat_get_blocks(struct inode *inode, sector_t iblock,
52 struct buffer_head *bh_result, int create) 54 unsigned long *max_blocks,
55 struct buffer_head *bh_result, int create)
53{ 56{
54 struct super_block *sb = inode->i_sb; 57 struct super_block *sb = inode->i_sb;
58 struct msdos_sb_info *sbi = MSDOS_SB(sb);
55 sector_t phys; 59 sector_t phys;
56 int err; 60 unsigned long mapped_blocks;
61 int err, offset;
57 62
58 err = fat_bmap(inode, iblock, &phys); 63 err = fat_bmap(inode, iblock, &phys, &mapped_blocks);
59 if (err) 64 if (err)
60 return err; 65 return err;
61 if (phys) { 66 if (phys) {
62 map_bh(bh_result, sb, phys); 67 map_bh(bh_result, sb, phys);
68 *max_blocks = min(mapped_blocks, *max_blocks);
63 return 0; 69 return 0;
64 } 70 }
65 if (!create) 71 if (!create)
66 return 0; 72 return 0;
73
67 if (iblock != MSDOS_I(inode)->mmu_private >> sb->s_blocksize_bits) { 74 if (iblock != MSDOS_I(inode)->mmu_private >> sb->s_blocksize_bits) {
68 fat_fs_panic(sb, "corrupted file size (i_pos %lld, %lld)", 75 fat_fs_panic(sb, "corrupted file size (i_pos %lld, %lld)",
69 MSDOS_I(inode)->i_pos, MSDOS_I(inode)->mmu_private); 76 MSDOS_I(inode)->i_pos, MSDOS_I(inode)->mmu_private);
70 return -EIO; 77 return -EIO;
71 } 78 }
72 if (!((unsigned long)iblock & (MSDOS_SB(sb)->sec_per_clus - 1))) { 79
80 offset = (unsigned long)iblock & (sbi->sec_per_clus - 1);
81 if (!offset) {
82 /* TODO: multiple cluster allocation would be desirable. */
73 err = fat_add_cluster(inode); 83 err = fat_add_cluster(inode);
74 if (err) 84 if (err)
75 return err; 85 return err;
76 } 86 }
77 MSDOS_I(inode)->mmu_private += sb->s_blocksize; 87 /* available blocks on this cluster */
78 err = fat_bmap(inode, iblock, &phys); 88 mapped_blocks = sbi->sec_per_clus - offset;
89
90 *max_blocks = min(mapped_blocks, *max_blocks);
91 MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits;
92
93 err = fat_bmap(inode, iblock, &phys, &mapped_blocks);
79 if (err) 94 if (err)
80 return err; 95 return err;
81 if (!phys) 96 BUG_ON(!phys);
82 BUG(); 97 BUG_ON(*max_blocks != mapped_blocks);
83 set_buffer_new(bh_result); 98 set_buffer_new(bh_result);
84 map_bh(bh_result, sb, phys); 99 map_bh(bh_result, sb, phys);
85 return 0; 100 return 0;
86} 101}
87 102
103static int fat_get_blocks(struct inode *inode, sector_t iblock,
104 unsigned long max_blocks,
105 struct buffer_head *bh_result, int create)
106{
107 struct super_block *sb = inode->i_sb;
108 int err;
109
110 err = __fat_get_blocks(inode, iblock, &max_blocks, bh_result, create);
111 if (err)
112 return err;
113 bh_result->b_size = max_blocks << sb->s_blocksize_bits;
114 return 0;
115}
116
117static int fat_get_block(struct inode *inode, sector_t iblock,
118 struct buffer_head *bh_result, int create)
119{
120 unsigned long max_blocks = 1;
121 return __fat_get_blocks(inode, iblock, &max_blocks, bh_result, create);
122}
123
88static int fat_writepage(struct page *page, struct writeback_control *wbc) 124static int fat_writepage(struct page *page, struct writeback_control *wbc)
89{ 125{
90 return block_write_full_page(page, fat_get_block, wbc); 126 return block_write_full_page(page, fat_get_block, wbc);
91} 127}
92 128
129static int fat_writepages(struct address_space *mapping,
130 struct writeback_control *wbc)
131{
132 return mpage_writepages(mapping, wbc, fat_get_block);
133}
134
93static int fat_readpage(struct file *file, struct page *page) 135static int fat_readpage(struct file *file, struct page *page)
94{ 136{
95 return block_read_full_page(page, fat_get_block); 137 return mpage_readpage(page, fat_get_block);
138}
139
140static int fat_readpages(struct file *file, struct address_space *mapping,
141 struct list_head *pages, unsigned nr_pages)
142{
143 return mpage_readpages(mapping, pages, nr_pages, fat_get_block);
96} 144}
97 145
98static int fat_prepare_write(struct file *file, struct page *page, 146static int fat_prepare_write(struct file *file, struct page *page,
@@ -115,6 +163,34 @@ static int fat_commit_write(struct file *file, struct page *page,
115 return err; 163 return err;
116} 164}
117 165
166static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
167 const struct iovec *iov,
168 loff_t offset, unsigned long nr_segs)
169{
170 struct file *file = iocb->ki_filp;
171 struct inode *inode = file->f_mapping->host;
172
173 if (rw == WRITE) {
174 /*
175 * FIXME: blockdev_direct_IO() doesn't use ->prepare_write(),
176 * so we need to update the ->mmu_private to block boundary.
177 *
178 * But we must fill the remaining area or hole by nul for
179 * updating ->mmu_private.
180 */
181 loff_t size = offset + iov_length(iov, nr_segs);
182 if (MSDOS_I(inode)->mmu_private < size)
183 return -EINVAL;
184 }
185
186 /*
187 * FAT need to use the DIO_LOCKING for avoiding the race
188 * condition of fat_get_block() and ->truncate().
189 */
190 return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
191 offset, nr_segs, fat_get_blocks, NULL);
192}
193
118static sector_t _fat_bmap(struct address_space *mapping, sector_t block) 194static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
119{ 195{
120 return generic_block_bmap(mapping, block, fat_get_block); 196 return generic_block_bmap(mapping, block, fat_get_block);
@@ -122,10 +198,13 @@ static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
122 198
123static struct address_space_operations fat_aops = { 199static struct address_space_operations fat_aops = {
124 .readpage = fat_readpage, 200 .readpage = fat_readpage,
201 .readpages = fat_readpages,
125 .writepage = fat_writepage, 202 .writepage = fat_writepage,
203 .writepages = fat_writepages,
126 .sync_page = block_sync_page, 204 .sync_page = block_sync_page,
127 .prepare_write = fat_prepare_write, 205 .prepare_write = fat_prepare_write,
128 .commit_write = fat_commit_write, 206 .commit_write = fat_commit_write,
207 .direct_IO = fat_direct_IO,
129 .bmap = _fat_bmap 208 .bmap = _fat_bmap
130}; 209};
131 210
@@ -182,7 +261,7 @@ void fat_attach(struct inode *inode, loff_t i_pos)
182 spin_unlock(&sbi->inode_hash_lock); 261 spin_unlock(&sbi->inode_hash_lock);
183} 262}
184 263
185EXPORT_SYMBOL(fat_attach); 264EXPORT_SYMBOL_GPL(fat_attach);
186 265
187void fat_detach(struct inode *inode) 266void fat_detach(struct inode *inode)
188{ 267{
@@ -193,7 +272,7 @@ void fat_detach(struct inode *inode)
193 spin_unlock(&sbi->inode_hash_lock); 272 spin_unlock(&sbi->inode_hash_lock);
194} 273}
195 274
196EXPORT_SYMBOL(fat_detach); 275EXPORT_SYMBOL_GPL(fat_detach);
197 276
198struct inode *fat_iget(struct super_block *sb, loff_t i_pos) 277struct inode *fat_iget(struct super_block *sb, loff_t i_pos)
199{ 278{
@@ -347,7 +426,7 @@ out:
347 return inode; 426 return inode;
348} 427}
349 428
350EXPORT_SYMBOL(fat_build_inode); 429EXPORT_SYMBOL_GPL(fat_build_inode);
351 430
352static void fat_delete_inode(struct inode *inode) 431static void fat_delete_inode(struct inode *inode)
353{ 432{
@@ -374,12 +453,17 @@ static void fat_clear_inode(struct inode *inode)
374 unlock_kernel(); 453 unlock_kernel();
375} 454}
376 455
377static void fat_put_super(struct super_block *sb) 456static void fat_write_super(struct super_block *sb)
378{ 457{
379 struct msdos_sb_info *sbi = MSDOS_SB(sb); 458 sb->s_dirt = 0;
380 459
381 if (!(sb->s_flags & MS_RDONLY)) 460 if (!(sb->s_flags & MS_RDONLY))
382 fat_clusters_flush(sb); 461 fat_clusters_flush(sb);
462}
463
464static void fat_put_super(struct super_block *sb)
465{
466 struct msdos_sb_info *sbi = MSDOS_SB(sb);
383 467
384 if (sbi->nls_disk) { 468 if (sbi->nls_disk) {
385 unload_nls(sbi->nls_disk); 469 unload_nls(sbi->nls_disk);
@@ -537,7 +621,7 @@ int fat_sync_inode(struct inode *inode)
537 return fat_write_inode(inode, 1); 621 return fat_write_inode(inode, 1);
538} 622}
539 623
540EXPORT_SYMBOL(fat_sync_inode); 624EXPORT_SYMBOL_GPL(fat_sync_inode);
541 625
542static int fat_show_options(struct seq_file *m, struct vfsmount *mnt); 626static int fat_show_options(struct seq_file *m, struct vfsmount *mnt);
543static struct super_operations fat_sops = { 627static struct super_operations fat_sops = {
@@ -546,6 +630,7 @@ static struct super_operations fat_sops = {
546 .write_inode = fat_write_inode, 630 .write_inode = fat_write_inode,
547 .delete_inode = fat_delete_inode, 631 .delete_inode = fat_delete_inode,
548 .put_super = fat_put_super, 632 .put_super = fat_put_super,
633 .write_super = fat_write_super,
549 .statfs = fat_statfs, 634 .statfs = fat_statfs,
550 .clear_inode = fat_clear_inode, 635 .clear_inode = fat_clear_inode,
551 .remount_fs = fat_remount, 636 .remount_fs = fat_remount,
@@ -1347,7 +1432,7 @@ out_fail:
1347 return error; 1432 return error;
1348} 1433}
1349 1434
1350EXPORT_SYMBOL(fat_fill_super); 1435EXPORT_SYMBOL_GPL(fat_fill_super);
1351 1436
1352int __init fat_cache_init(void); 1437int __init fat_cache_init(void);
1353void fat_cache_destroy(void); 1438void fat_cache_destroy(void);
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 2a0df2122f5d..32fb0a3f1da4 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -33,7 +33,7 @@ void fat_fs_panic(struct super_block *s, const char *fmt, ...)
33 } 33 }
34} 34}
35 35
36EXPORT_SYMBOL(fat_fs_panic); 36EXPORT_SYMBOL_GPL(fat_fs_panic);
37 37
38/* Flushes the number of free clusters on FAT32 */ 38/* Flushes the number of free clusters on FAT32 */
39/* XXX: Need to write one per FSINFO block. Currently only writes 1 */ 39/* XXX: Need to write one per FSINFO block. Currently only writes 1 */
@@ -67,8 +67,6 @@ void fat_clusters_flush(struct super_block *sb)
67 if (sbi->prev_free != -1) 67 if (sbi->prev_free != -1)
68 fsinfo->next_cluster = cpu_to_le32(sbi->prev_free); 68 fsinfo->next_cluster = cpu_to_le32(sbi->prev_free);
69 mark_buffer_dirty(bh); 69 mark_buffer_dirty(bh);
70 if (sb->s_flags & MS_SYNCHRONOUS)
71 sync_dirty_buffer(bh);
72 } 70 }
73 brelse(bh); 71 brelse(bh);
74} 72}
@@ -194,7 +192,7 @@ void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date)
194 *date = cpu_to_le16(nl_day-day_n[month-1]+1+(month << 5)+(year << 9)); 192 *date = cpu_to_le16(nl_day-day_n[month-1]+1+(month << 5)+(year << 9));
195} 193}
196 194
197EXPORT_SYMBOL(fat_date_unix2dos); 195EXPORT_SYMBOL_GPL(fat_date_unix2dos);
198 196
199int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) 197int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
200{ 198{
@@ -222,4 +220,4 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
222 return err; 220 return err;
223} 221}
224 222
225EXPORT_SYMBOL(fat_sync_bhs); 223EXPORT_SYMBOL_GPL(fat_sync_bhs);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 863b46e0d78a..9903bde475f2 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -457,11 +457,11 @@ static void send_sigio_to_task(struct task_struct *p,
457 else 457 else
458 si.si_band = band_table[reason - POLL_IN]; 458 si.si_band = band_table[reason - POLL_IN];
459 si.si_fd = fd; 459 si.si_fd = fd;
460 if (!send_group_sig_info(fown->signum, &si, p)) 460 if (!group_send_sig_info(fown->signum, &si, p))
461 break; 461 break;
462 /* fall-through: fall back on the old plain SIGIO signal */ 462 /* fall-through: fall back on the old plain SIGIO signal */
463 case 0: 463 case 0:
464 send_group_sig_info(SIGIO, SEND_SIG_PRIV, p); 464 group_send_sig_info(SIGIO, SEND_SIG_PRIV, p);
465 } 465 }
466} 466}
467 467
@@ -495,7 +495,7 @@ static void send_sigurg_to_task(struct task_struct *p,
495 struct fown_struct *fown) 495 struct fown_struct *fown)
496{ 496{
497 if (sigio_perm(p, fown, SIGURG)) 497 if (sigio_perm(p, fown, SIGURG))
498 send_group_sig_info(SIGURG, SEND_SIG_PRIV, p); 498 group_send_sig_info(SIGURG, SEND_SIG_PRIV, p);
499} 499}
500 500
501int send_sigurg(struct fown_struct *fown) 501int send_sigurg(struct fown_struct *fown)
diff --git a/fs/file_table.c b/fs/file_table.c
index c3a5e2fd663b..6142250104a6 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -117,7 +117,7 @@ EXPORT_SYMBOL(get_empty_filp);
117 117
118void fastcall fput(struct file *file) 118void fastcall fput(struct file *file)
119{ 119{
120 if (rcuref_dec_and_test(&file->f_count)) 120 if (atomic_dec_and_test(&file->f_count))
121 __fput(file); 121 __fput(file);
122} 122}
123 123
@@ -166,7 +166,7 @@ struct file fastcall *fget(unsigned int fd)
166 rcu_read_lock(); 166 rcu_read_lock();
167 file = fcheck_files(files, fd); 167 file = fcheck_files(files, fd);
168 if (file) { 168 if (file) {
169 if (!rcuref_inc_lf(&file->f_count)) { 169 if (!atomic_inc_not_zero(&file->f_count)) {
170 /* File object ref couldn't be taken */ 170 /* File object ref couldn't be taken */
171 rcu_read_unlock(); 171 rcu_read_unlock();
172 return NULL; 172 return NULL;
@@ -198,7 +198,7 @@ struct file fastcall *fget_light(unsigned int fd, int *fput_needed)
198 rcu_read_lock(); 198 rcu_read_lock();
199 file = fcheck_files(files, fd); 199 file = fcheck_files(files, fd);
200 if (file) { 200 if (file) {
201 if (rcuref_inc_lf(&file->f_count)) 201 if (atomic_inc_not_zero(&file->f_count))
202 *fput_needed = 1; 202 *fput_needed = 1;
203 else 203 else
204 /* Didn't get the reference, someone's freed */ 204 /* Didn't get the reference, someone's freed */
@@ -213,7 +213,7 @@ struct file fastcall *fget_light(unsigned int fd, int *fput_needed)
213 213
214void put_filp(struct file *file) 214void put_filp(struct file *file)
215{ 215{
216 if (rcuref_dec_and_test(&file->f_count)) { 216 if (atomic_dec_and_test(&file->f_count)) {
217 security_file_free(file); 217 security_file_free(file);
218 file_kill(file); 218 file_kill(file);
219 file_free(file); 219 file_free(file);
diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c
index d0401dc68d41..6f5df1700e95 100644
--- a/fs/freevxfs/vxfs_immed.c
+++ b/fs/freevxfs/vxfs_immed.c
@@ -99,8 +99,8 @@ static int
99vxfs_immed_readpage(struct file *fp, struct page *pp) 99vxfs_immed_readpage(struct file *fp, struct page *pp)
100{ 100{
101 struct vxfs_inode_info *vip = VXFS_INO(pp->mapping->host); 101 struct vxfs_inode_info *vip = VXFS_INO(pp->mapping->host);
102 u_int64_t offset = pp->index << PAGE_CACHE_SHIFT; 102 u_int64_t offset = (u_int64_t)pp->index << PAGE_CACHE_SHIFT;
103 caddr_t kaddr; 103 caddr_t kaddr;
104 104
105 kaddr = kmap(pp); 105 kaddr = kmap(pp);
106 memcpy(kaddr, vip->vii_immed.vi_immed + offset, PAGE_CACHE_SIZE); 106 memcpy(kaddr, vip->vii_immed.vi_immed + offset, PAGE_CACHE_SIZE);
diff --git a/fs/inode.c b/fs/inode.c
index d8d04bd72b59..fd568caf7f74 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -770,7 +770,7 @@ EXPORT_SYMBOL(igrab);
770 * 770 *
771 * Note, @test is called with the inode_lock held, so can't sleep. 771 * Note, @test is called with the inode_lock held, so can't sleep.
772 */ 772 */
773static inline struct inode *ifind(struct super_block *sb, 773static struct inode *ifind(struct super_block *sb,
774 struct hlist_head *head, int (*test)(struct inode *, void *), 774 struct hlist_head *head, int (*test)(struct inode *, void *),
775 void *data, const int wait) 775 void *data, const int wait)
776{ 776{
@@ -804,7 +804,7 @@ static inline struct inode *ifind(struct super_block *sb,
804 * 804 *
805 * Otherwise NULL is returned. 805 * Otherwise NULL is returned.
806 */ 806 */
807static inline struct inode *ifind_fast(struct super_block *sb, 807static struct inode *ifind_fast(struct super_block *sb,
808 struct hlist_head *head, unsigned long ino) 808 struct hlist_head *head, unsigned long ino)
809{ 809{
810 struct inode *inode; 810 struct inode *inode;
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index 3dcc6d2162cb..2559ee10beda 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -757,7 +757,7 @@ jffs_do_readpage_nolock(struct file *file, struct page *page)
757 757
758 read_len = 0; 758 read_len = 0;
759 result = 0; 759 result = 0;
760 offset = page->index << PAGE_CACHE_SHIFT; 760 offset = page_offset(page);
761 761
762 kmap(page); 762 kmap(page);
763 buf = page_address(page); 763 buf = page_address(page);
@@ -1545,7 +1545,7 @@ jffs_commit_write(struct file *filp, struct page *page,
1545{ 1545{
1546 void *addr = page_address(page) + from; 1546 void *addr = page_address(page) + from;
1547 /* XXX: PAGE_CACHE_SHIFT or PAGE_SHIFT */ 1547 /* XXX: PAGE_CACHE_SHIFT or PAGE_SHIFT */
1548 loff_t pos = (page->index<<PAGE_CACHE_SHIFT) + from; 1548 loff_t pos = page_offset(page) + from;
1549 1549
1550 return jffs_file_write(filp, addr, to-from, &pos); 1550 return jffs_file_write(filp, addr, to-from, &pos);
1551} /* jffs_commit_write() */ 1551} /* jffs_commit_write() */
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 68000a50ceb6..2967b7393415 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -302,8 +302,7 @@ int dbSync(struct inode *ipbmap)
302 /* 302 /*
303 * write out dirty pages of bmap 303 * write out dirty pages of bmap
304 */ 304 */
305 filemap_fdatawrite(ipbmap->i_mapping); 305 filemap_write_and_wait(ipbmap->i_mapping);
306 filemap_fdatawait(ipbmap->i_mapping);
307 306
308 diWriteSpecial(ipbmap, 0); 307 diWriteSpecial(ipbmap, 0);
309 308
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 28201b194f53..31b4aa13dd4b 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -265,8 +265,7 @@ int diSync(struct inode *ipimap)
265 /* 265 /*
266 * write out dirty pages of imap 266 * write out dirty pages of imap
267 */ 267 */
268 filemap_fdatawrite(ipimap->i_mapping); 268 filemap_write_and_wait(ipimap->i_mapping);
269 filemap_fdatawait(ipimap->i_mapping);
270 269
271 diWriteSpecial(ipimap, 0); 270 diWriteSpecial(ipimap, 0);
272 271
@@ -565,8 +564,7 @@ void diFreeSpecial(struct inode *ip)
565 jfs_err("diFreeSpecial called with NULL ip!"); 564 jfs_err("diFreeSpecial called with NULL ip!");
566 return; 565 return;
567 } 566 }
568 filemap_fdatawrite(ip->i_mapping); 567 filemap_write_and_wait(ip->i_mapping);
569 filemap_fdatawait(ip->i_mapping);
570 truncate_inode_pages(ip->i_mapping, 0); 568 truncate_inode_pages(ip->i_mapping, 0);
571 iput(ip); 569 iput(ip);
572} 570}
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index b660c93c92de..2ddb6b892bcf 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1231,10 +1231,8 @@ int txCommit(tid_t tid, /* transaction identifier */
1231 * when we don't need to worry about it at all. 1231 * when we don't need to worry about it at all.
1232 * 1232 *
1233 * if ((!S_ISDIR(ip->i_mode)) 1233 * if ((!S_ISDIR(ip->i_mode))
1234 * && (tblk->flag & COMMIT_DELETE) == 0) { 1234 * && (tblk->flag & COMMIT_DELETE) == 0)
1235 * filemap_fdatawrite(ip->i_mapping); 1235 * filemap_write_and_wait(ip->i_mapping);
1236 * filemap_fdatawait(ip->i_mapping);
1237 * }
1238 */ 1236 */
1239 1237
1240 /* 1238 /*
diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c
index 5cf91785b541..21eaf7ac0fcb 100644
--- a/fs/jfs/jfs_umount.c
+++ b/fs/jfs/jfs_umount.c
@@ -108,8 +108,7 @@ int jfs_umount(struct super_block *sb)
108 * Make sure all metadata makes it to disk before we mark 108 * Make sure all metadata makes it to disk before we mark
109 * the superblock as clean 109 * the superblock as clean
110 */ 110 */
111 filemap_fdatawrite(sbi->direct_inode->i_mapping); 111 filemap_write_and_wait(sbi->direct_inode->i_mapping);
112 filemap_fdatawait(sbi->direct_inode->i_mapping);
113 112
114 /* 113 /*
115 * ensure all file system file pages are propagated to their 114 * ensure all file system file pages are propagated to their
@@ -161,8 +160,7 @@ int jfs_umount_rw(struct super_block *sb)
161 * mark the superblock clean before everything is flushed to 160 * mark the superblock clean before everything is flushed to
162 * disk. 161 * disk.
163 */ 162 */
164 filemap_fdatawrite(sbi->direct_inode->i_mapping); 163 filemap_write_and_wait(sbi->direct_inode->i_mapping);
165 filemap_fdatawait(sbi->direct_inode->i_mapping);
166 164
167 updateSuper(sb, FM_CLEAN); 165 updateSuper(sb, FM_CLEAN);
168 166
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index c6dc254d3253..45180361871c 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -376,8 +376,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
376 * by txCommit(); 376 * by txCommit();
377 */ 377 */
378 filemap_fdatawait(ipbmap->i_mapping); 378 filemap_fdatawait(ipbmap->i_mapping);
379 filemap_fdatawrite(ipbmap->i_mapping); 379 filemap_write_and_wait(ipbmap->i_mapping);
380 filemap_fdatawait(ipbmap->i_mapping);
381 diWriteSpecial(ipbmap, 0); 380 diWriteSpecial(ipbmap, 0);
382 381
383 newPage = nPages; /* first new page number */ 382 newPage = nPages; /* first new page number */
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 4226af3ea91b..8d31f1336431 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -502,8 +502,7 @@ out_no_rw:
502 jfs_err("jfs_umount failed with return code %d", rc); 502 jfs_err("jfs_umount failed with return code %d", rc);
503 } 503 }
504out_mount_failed: 504out_mount_failed:
505 filemap_fdatawrite(sbi->direct_inode->i_mapping); 505 filemap_write_and_wait(sbi->direct_inode->i_mapping);
506 filemap_fdatawait(sbi->direct_inode->i_mapping);
507 truncate_inode_pages(sbi->direct_inode->i_mapping, 0); 506 truncate_inode_pages(sbi->direct_inode->i_mapping, 0);
508 make_bad_inode(sbi->direct_inode); 507 make_bad_inode(sbi->direct_inode);
509 iput(sbi->direct_inode); 508 iput(sbi->direct_inode);
diff --git a/fs/libfs.c b/fs/libfs.c
index 58101dff2c66..9c50523382e7 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -93,16 +93,16 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
93 loff_t n = file->f_pos - 2; 93 loff_t n = file->f_pos - 2;
94 94
95 spin_lock(&dcache_lock); 95 spin_lock(&dcache_lock);
96 list_del(&cursor->d_child); 96 list_del(&cursor->d_u.d_child);
97 p = file->f_dentry->d_subdirs.next; 97 p = file->f_dentry->d_subdirs.next;
98 while (n && p != &file->f_dentry->d_subdirs) { 98 while (n && p != &file->f_dentry->d_subdirs) {
99 struct dentry *next; 99 struct dentry *next;
100 next = list_entry(p, struct dentry, d_child); 100 next = list_entry(p, struct dentry, d_u.d_child);
101 if (!d_unhashed(next) && next->d_inode) 101 if (!d_unhashed(next) && next->d_inode)
102 n--; 102 n--;
103 p = p->next; 103 p = p->next;
104 } 104 }
105 list_add_tail(&cursor->d_child, p); 105 list_add_tail(&cursor->d_u.d_child, p);
106 spin_unlock(&dcache_lock); 106 spin_unlock(&dcache_lock);
107 } 107 }
108 } 108 }
@@ -126,7 +126,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
126{ 126{
127 struct dentry *dentry = filp->f_dentry; 127 struct dentry *dentry = filp->f_dentry;
128 struct dentry *cursor = filp->private_data; 128 struct dentry *cursor = filp->private_data;
129 struct list_head *p, *q = &cursor->d_child; 129 struct list_head *p, *q = &cursor->d_u.d_child;
130 ino_t ino; 130 ino_t ino;
131 int i = filp->f_pos; 131 int i = filp->f_pos;
132 132
@@ -153,7 +153,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
153 } 153 }
154 for (p=q->next; p != &dentry->d_subdirs; p=p->next) { 154 for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
155 struct dentry *next; 155 struct dentry *next;
156 next = list_entry(p, struct dentry, d_child); 156 next = list_entry(p, struct dentry, d_u.d_child);
157 if (d_unhashed(next) || !next->d_inode) 157 if (d_unhashed(next) || !next->d_inode)
158 continue; 158 continue;
159 159
@@ -261,7 +261,7 @@ int simple_empty(struct dentry *dentry)
261 int ret = 0; 261 int ret = 0;
262 262
263 spin_lock(&dcache_lock); 263 spin_lock(&dcache_lock);
264 list_for_each_entry(child, &dentry->d_subdirs, d_child) 264 list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child)
265 if (simple_positive(child)) 265 if (simple_positive(child))
266 goto out; 266 goto out;
267 ret = 1; 267 ret = 1;
diff --git a/fs/locks.c b/fs/locks.c
index fb32d6218e21..909eab8fb1d0 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -154,7 +154,7 @@ static struct file_lock *locks_alloc_lock(void)
154} 154}
155 155
156/* Free a lock which is not in use. */ 156/* Free a lock which is not in use. */
157static inline void locks_free_lock(struct file_lock *fl) 157static void locks_free_lock(struct file_lock *fl)
158{ 158{
159 if (fl == NULL) { 159 if (fl == NULL) {
160 BUG(); 160 BUG();
@@ -475,8 +475,7 @@ static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2)
475/* 475/*
476 * Check whether two locks have the same owner. 476 * Check whether two locks have the same owner.
477 */ 477 */
478static inline int 478static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
479posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
480{ 479{
481 if (fl1->fl_lmops && fl1->fl_lmops->fl_compare_owner) 480 if (fl1->fl_lmops && fl1->fl_lmops->fl_compare_owner)
482 return fl2->fl_lmops == fl1->fl_lmops && 481 return fl2->fl_lmops == fl1->fl_lmops &&
@@ -487,7 +486,7 @@ posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
487/* Remove waiter from blocker's block list. 486/* Remove waiter from blocker's block list.
488 * When blocker ends up pointing to itself then the list is empty. 487 * When blocker ends up pointing to itself then the list is empty.
489 */ 488 */
490static inline void __locks_delete_block(struct file_lock *waiter) 489static void __locks_delete_block(struct file_lock *waiter)
491{ 490{
492 list_del_init(&waiter->fl_block); 491 list_del_init(&waiter->fl_block);
493 list_del_init(&waiter->fl_link); 492 list_del_init(&waiter->fl_link);
diff --git a/fs/mpage.c b/fs/mpage.c
index f1d2d02bd4c8..e431cb3878d6 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -184,7 +184,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
184 if (page_has_buffers(page)) 184 if (page_has_buffers(page))
185 goto confused; 185 goto confused;
186 186
187 block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits); 187 block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
188 last_block = (i_size_read(inode) + blocksize - 1) >> blkbits; 188 last_block = (i_size_read(inode) + blocksize - 1) >> blkbits;
189 189
190 bh.b_page = page; 190 bh.b_page = page;
@@ -466,7 +466,7 @@ __mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block,
466 * The page has no buffers: map it to disk 466 * The page has no buffers: map it to disk
467 */ 467 */
468 BUG_ON(!PageUptodate(page)); 468 BUG_ON(!PageUptodate(page));
469 block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits); 469 block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
470 last_block = (i_size - 1) >> blkbits; 470 last_block = (i_size - 1) >> blkbits;
471 map_bh.b_page = page; 471 map_bh.b_page = page;
472 for (page_block = 0; page_block < blocks_per_page; ) { 472 for (page_block = 0; page_block < blocks_per_page; ) {
diff --git a/fs/namei.c b/fs/namei.c
index 6dbbd42d8b95..300eae088d5f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1491,7 +1491,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
1491 if (!error) { 1491 if (!error) {
1492 DQUOT_INIT(inode); 1492 DQUOT_INIT(inode);
1493 1493
1494 error = do_truncate(dentry, 0, NULL); 1494 error = do_truncate(dentry, 0, ATTR_MTIME|ATTR_CTIME, NULL);
1495 } 1495 }
1496 put_write_access(inode); 1496 put_write_access(inode);
1497 if (error) 1497 if (error)
diff --git a/fs/namespace.c b/fs/namespace.c
index 2019899f2ab8..3e8fb61ad597 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -451,7 +451,7 @@ EXPORT_SYMBOL(may_umount);
451void release_mounts(struct list_head *head) 451void release_mounts(struct list_head *head)
452{ 452{
453 struct vfsmount *mnt; 453 struct vfsmount *mnt;
454 while(!list_empty(head)) { 454 while (!list_empty(head)) {
455 mnt = list_entry(head->next, struct vfsmount, mnt_hash); 455 mnt = list_entry(head->next, struct vfsmount, mnt_hash);
456 list_del_init(&mnt->mnt_hash); 456 list_del_init(&mnt->mnt_hash);
457 if (mnt->mnt_parent != mnt) { 457 if (mnt->mnt_parent != mnt) {
@@ -1526,6 +1526,10 @@ static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd)
1526 * pointed to by put_old must yield the same directory as new_root. No other 1526 * pointed to by put_old must yield the same directory as new_root. No other
1527 * file system may be mounted on put_old. After all, new_root is a mountpoint. 1527 * file system may be mounted on put_old. After all, new_root is a mountpoint.
1528 * 1528 *
1529 * Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem.
1530 * See Documentation/filesystems/ramfs-rootfs-initramfs.txt for alternatives
1531 * in this situation.
1532 *
1529 * Notes: 1533 * Notes:
1530 * - we don't move root/cwd if they are not at the root (reason: if something 1534 * - we don't move root/cwd if they are not at the root (reason: if something
1531 * cared enough to change them, it's probably wrong to force them elsewhere) 1535 * cared enough to change them, it's probably wrong to force them elsewhere)
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index a9f7a8ab1d59..cfd76f431dc0 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -365,7 +365,7 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
365 spin_lock(&dcache_lock); 365 spin_lock(&dcache_lock);
366 next = parent->d_subdirs.next; 366 next = parent->d_subdirs.next;
367 while (next != &parent->d_subdirs) { 367 while (next != &parent->d_subdirs) {
368 dent = list_entry(next, struct dentry, d_child); 368 dent = list_entry(next, struct dentry, d_u.d_child);
369 if ((unsigned long)dent->d_fsdata == fpos) { 369 if ((unsigned long)dent->d_fsdata == fpos) {
370 if (dent->d_inode) 370 if (dent->d_inode)
371 dget_locked(dent); 371 dget_locked(dent);
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 9e4dc30c2435..799e5c2bec55 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -196,7 +196,7 @@ ncp_renew_dentries(struct dentry *parent)
196 spin_lock(&dcache_lock); 196 spin_lock(&dcache_lock);
197 next = parent->d_subdirs.next; 197 next = parent->d_subdirs.next;
198 while (next != &parent->d_subdirs) { 198 while (next != &parent->d_subdirs) {
199 dentry = list_entry(next, struct dentry, d_child); 199 dentry = list_entry(next, struct dentry, d_u.d_child);
200 200
201 if (dentry->d_fsdata == NULL) 201 if (dentry->d_fsdata == NULL)
202 ncp_age_dentry(server, dentry); 202 ncp_age_dentry(server, dentry);
@@ -218,7 +218,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent)
218 spin_lock(&dcache_lock); 218 spin_lock(&dcache_lock);
219 next = parent->d_subdirs.next; 219 next = parent->d_subdirs.next;
220 while (next != &parent->d_subdirs) { 220 while (next != &parent->d_subdirs) {
221 dentry = list_entry(next, struct dentry, d_child); 221 dentry = list_entry(next, struct dentry, d_u.d_child);
222 dentry->d_fsdata = NULL; 222 dentry->d_fsdata = NULL;
223 ncp_age_dentry(server, dentry); 223 ncp_age_dentry(server, dentry);
224 next = next->next; 224 next = next->next;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e7bd0d92600f..3e4ba9cb7f80 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -644,10 +644,7 @@ int nfs_sync_mapping(struct address_space *mapping)
644 if (mapping->nrpages == 0) 644 if (mapping->nrpages == 0)
645 return 0; 645 return 0;
646 unmap_mapping_range(mapping, 0, 0, 0); 646 unmap_mapping_range(mapping, 0, 0, 0);
647 ret = filemap_fdatawrite(mapping); 647 ret = filemap_write_and_wait(mapping);
648 if (ret != 0)
649 goto out;
650 ret = filemap_fdatawait(mapping);
651 if (ret != 0) 648 if (ret != 0)
652 goto out; 649 goto out;
653 ret = nfs_wb_all(mapping->host); 650 ret = nfs_wb_all(mapping->host);
@@ -864,8 +861,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
864 nfs_begin_data_update(inode); 861 nfs_begin_data_update(inode);
865 /* Write all dirty data if we're changing file permissions or size */ 862 /* Write all dirty data if we're changing file permissions or size */
866 if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) { 863 if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) {
867 if (filemap_fdatawrite(inode->i_mapping) == 0) 864 filemap_write_and_wait(inode->i_mapping);
868 filemap_fdatawait(inode->i_mapping);
869 nfs_wb_all(inode); 865 nfs_wb_all(inode);
870 } 866 }
871 /* 867 /*
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 985cc53b8dd5..e897e00c2c9d 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -275,7 +275,9 @@ static int __init root_nfs_parse(char *name, char *buf)
275 case Opt_noacl: 275 case Opt_noacl:
276 nfs_data.flags |= NFS_MOUNT_NOACL; 276 nfs_data.flags |= NFS_MOUNT_NOACL;
277 break; 277 break;
278 default : 278 default:
279 printk(KERN_WARNING "Root-NFS: unknown "
280 "option: %s\n", p);
279 return 0; 281 return 0;
280 } 282 }
281 } 283 }
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index f5ef5ea61a05..e8c56a3d9c64 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -212,11 +212,10 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
212 mlog(ML_ENTRY, "ENTRY:\n"); \ 212 mlog(ML_ENTRY, "ENTRY:\n"); \
213} while (0) 213} while (0)
214 214
215/* We disable this for old compilers since they don't have support for 215/*
216 * __builtin_types_compatible_p. 216 * We disable this for sparse.
217 */ 217 */
218#if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) && \ 218#if !defined(__CHECKER__)
219 !defined(__CHECKER__)
220#define mlog_exit(st) do { \ 219#define mlog_exit(st) do { \
221 if (__builtin_types_compatible_p(typeof(st), unsigned long)) \ 220 if (__builtin_types_compatible_p(typeof(st), unsigned long)) \
222 mlog(ML_EXIT, "EXIT: %lu\n", (unsigned long) (st)); \ 221 mlog(ML_EXIT, "EXIT: %lu\n", (unsigned long) (st)); \
diff --git a/fs/open.c b/fs/open.c
index f53a5b9ffb7d..75f3329e8a67 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -194,7 +194,8 @@ out:
194 return error; 194 return error;
195} 195}
196 196
197int do_truncate(struct dentry *dentry, loff_t length, struct file *filp) 197int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
198 struct file *filp)
198{ 199{
199 int err; 200 int err;
200 struct iattr newattrs; 201 struct iattr newattrs;
@@ -204,7 +205,7 @@ int do_truncate(struct dentry *dentry, loff_t length, struct file *filp)
204 return -EINVAL; 205 return -EINVAL;
205 206
206 newattrs.ia_size = length; 207 newattrs.ia_size = length;
207 newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; 208 newattrs.ia_valid = ATTR_SIZE | time_attrs;
208 if (filp) { 209 if (filp) {
209 newattrs.ia_file = filp; 210 newattrs.ia_file = filp;
210 newattrs.ia_valid |= ATTR_FILE; 211 newattrs.ia_valid |= ATTR_FILE;
@@ -216,7 +217,7 @@ int do_truncate(struct dentry *dentry, loff_t length, struct file *filp)
216 return err; 217 return err;
217} 218}
218 219
219static inline long do_sys_truncate(const char __user * path, loff_t length) 220static long do_sys_truncate(const char __user * path, loff_t length)
220{ 221{
221 struct nameidata nd; 222 struct nameidata nd;
222 struct inode * inode; 223 struct inode * inode;
@@ -266,7 +267,7 @@ static inline long do_sys_truncate(const char __user * path, loff_t length)
266 error = locks_verify_truncate(inode, NULL, length); 267 error = locks_verify_truncate(inode, NULL, length);
267 if (!error) { 268 if (!error) {
268 DQUOT_INIT(inode); 269 DQUOT_INIT(inode);
269 error = do_truncate(nd.dentry, length, NULL); 270 error = do_truncate(nd.dentry, length, 0, NULL);
270 } 271 }
271 put_write_access(inode); 272 put_write_access(inode);
272 273
@@ -282,7 +283,7 @@ asmlinkage long sys_truncate(const char __user * path, unsigned long length)
282 return do_sys_truncate(path, (long)length); 283 return do_sys_truncate(path, (long)length);
283} 284}
284 285
285static inline long do_sys_ftruncate(unsigned int fd, loff_t length, int small) 286static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
286{ 287{
287 struct inode * inode; 288 struct inode * inode;
288 struct dentry *dentry; 289 struct dentry *dentry;
@@ -318,7 +319,7 @@ static inline long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
318 319
319 error = locks_verify_truncate(inode, file, length); 320 error = locks_verify_truncate(inode, file, length);
320 if (!error) 321 if (!error)
321 error = do_truncate(dentry, length, file); 322 error = do_truncate(dentry, length, 0, file);
322out_putf: 323out_putf:
323 fput(file); 324 fput(file);
324out: 325out:
@@ -970,7 +971,7 @@ out:
970 971
971EXPORT_SYMBOL(get_unused_fd); 972EXPORT_SYMBOL(get_unused_fd);
972 973
973static inline void __put_unused_fd(struct files_struct *files, unsigned int fd) 974static void __put_unused_fd(struct files_struct *files, unsigned int fd)
974{ 975{
975 struct fdtable *fdt = files_fdtable(files); 976 struct fdtable *fdt = files_fdtable(files);
976 __FD_CLR(fd, fdt->open_fds); 977 __FD_CLR(fd, fdt->open_fds);
diff --git a/fs/pnode.c b/fs/pnode.c
index aeeec8ba8dd2..f1871f773f64 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -103,7 +103,7 @@ static struct vfsmount *propagation_next(struct vfsmount *m,
103 struct vfsmount *next; 103 struct vfsmount *next;
104 struct vfsmount *master = m->mnt_master; 104 struct vfsmount *master = m->mnt_master;
105 105
106 if ( master == origin->mnt_master ) { 106 if (master == origin->mnt_master) {
107 next = next_peer(m); 107 next = next_peer(m);
108 return ((next == origin) ? NULL : next); 108 return ((next == origin) ? NULL : next);
109 } else if (m->mnt_slave.next != &master->mnt_slave_list) 109 } else if (m->mnt_slave.next != &master->mnt_slave_list)
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 72b431d0a0a4..20e5c4509a43 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -21,6 +21,8 @@
21#include <linux/bitops.h> 21#include <linux/bitops.h>
22#include <asm/uaccess.h> 22#include <asm/uaccess.h>
23 23
24#include "internal.h"
25
24static ssize_t proc_file_read(struct file *file, char __user *buf, 26static ssize_t proc_file_read(struct file *file, char __user *buf,
25 size_t nbytes, loff_t *ppos); 27 size_t nbytes, loff_t *ppos);
26static ssize_t proc_file_write(struct file *file, const char __user *buffer, 28static ssize_t proc_file_write(struct file *file, const char __user *buffer,
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index e6a818a93f3d..6573f31f1fd9 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -19,7 +19,7 @@
19#include <asm/system.h> 19#include <asm/system.h>
20#include <asm/uaccess.h> 20#include <asm/uaccess.h>
21 21
22extern void free_proc_entry(struct proc_dir_entry *); 22#include "internal.h"
23 23
24static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de) 24static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de)
25{ 25{
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 3e55198f9806..95a1cf32b838 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -37,6 +37,10 @@ extern int proc_tgid_stat(struct task_struct *, char *);
37extern int proc_pid_status(struct task_struct *, char *); 37extern int proc_pid_status(struct task_struct *, char *);
38extern int proc_pid_statm(struct task_struct *, char *); 38extern int proc_pid_statm(struct task_struct *, char *);
39 39
40void free_proc_entry(struct proc_dir_entry *de);
41
42int proc_init_inodecache(void);
43
40static inline struct task_struct *proc_task(struct inode *inode) 44static inline struct task_struct *proc_task(struct inode *inode)
41{ 45{
42 return PROC_I(inode)->task; 46 return PROC_I(inode)->task;
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 5b6b0b6038a7..63bf6c00fa0c 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -323,6 +323,7 @@ static struct file_operations proc_modules_operations = {
323}; 323};
324#endif 324#endif
325 325
326#ifdef CONFIG_SLAB
326extern struct seq_operations slabinfo_op; 327extern struct seq_operations slabinfo_op;
327extern ssize_t slabinfo_write(struct file *, const char __user *, size_t, loff_t *); 328extern ssize_t slabinfo_write(struct file *, const char __user *, size_t, loff_t *);
328static int slabinfo_open(struct inode *inode, struct file *file) 329static int slabinfo_open(struct inode *inode, struct file *file)
@@ -336,6 +337,7 @@ static struct file_operations proc_slabinfo_operations = {
336 .llseek = seq_lseek, 337 .llseek = seq_lseek,
337 .release = seq_release, 338 .release = seq_release,
338}; 339};
340#endif
339 341
340static int show_stat(struct seq_file *p, void *v) 342static int show_stat(struct seq_file *p, void *v)
341{ 343{
@@ -600,7 +602,9 @@ void __init proc_misc_init(void)
600 create_seq_entry("partitions", 0, &proc_partitions_operations); 602 create_seq_entry("partitions", 0, &proc_partitions_operations);
601 create_seq_entry("stat", 0, &proc_stat_operations); 603 create_seq_entry("stat", 0, &proc_stat_operations);
602 create_seq_entry("interrupts", 0, &proc_interrupts_operations); 604 create_seq_entry("interrupts", 0, &proc_interrupts_operations);
605#ifdef CONFIG_SLAB
603 create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations); 606 create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations);
607#endif
604 create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations); 608 create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations);
605 create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations); 609 create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations);
606 create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations); 610 create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations);
diff --git a/fs/proc/root.c b/fs/proc/root.c
index aef148f099a2..68896283c8ae 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -18,6 +18,8 @@
18#include <linux/bitops.h> 18#include <linux/bitops.h>
19#include <linux/smp_lock.h> 19#include <linux/smp_lock.h>
20 20
21#include "internal.h"
22
21struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver; 23struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver;
22 24
23#ifdef CONFIG_SYSCTL 25#ifdef CONFIG_SYSCTL
@@ -36,7 +38,6 @@ static struct file_system_type proc_fs_type = {
36 .kill_sb = kill_anon_super, 38 .kill_sb = kill_anon_super,
37}; 39};
38 40
39extern int __init proc_init_inodecache(void);
40void __init proc_root_init(void) 41void __init proc_root_init(void)
41{ 42{
42 int err = proc_init_inodecache(); 43 int err = proc_init_inodecache();
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 50bd5a8f0446..0eaad41f4658 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -390,129 +390,12 @@ struct seq_operations proc_pid_smaps_op = {
390}; 390};
391 391
392#ifdef CONFIG_NUMA 392#ifdef CONFIG_NUMA
393 393extern int show_numa_map(struct seq_file *m, void *v);
394struct numa_maps {
395 unsigned long pages;
396 unsigned long anon;
397 unsigned long mapped;
398 unsigned long mapcount_max;
399 unsigned long node[MAX_NUMNODES];
400};
401
402/*
403 * Calculate numa node maps for a vma
404 */
405static struct numa_maps *get_numa_maps(struct vm_area_struct *vma)
406{
407 int i;
408 struct page *page;
409 unsigned long vaddr;
410 struct numa_maps *md = kmalloc(sizeof(struct numa_maps), GFP_KERNEL);
411
412 if (!md)
413 return NULL;
414 md->pages = 0;
415 md->anon = 0;
416 md->mapped = 0;
417 md->mapcount_max = 0;
418 for_each_node(i)
419 md->node[i] =0;
420
421 for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) {
422 page = follow_page(vma, vaddr, 0);
423 if (page) {
424 int count = page_mapcount(page);
425
426 if (count)
427 md->mapped++;
428 if (count > md->mapcount_max)
429 md->mapcount_max = count;
430 md->pages++;
431 if (PageAnon(page))
432 md->anon++;
433 md->node[page_to_nid(page)]++;
434 }
435 cond_resched();
436 }
437 return md;
438}
439
440static int show_numa_map(struct seq_file *m, void *v)
441{
442 struct task_struct *task = m->private;
443 struct vm_area_struct *vma = v;
444 struct mempolicy *pol;
445 struct numa_maps *md;
446 struct zone **z;
447 int n;
448 int first;
449
450 if (!vma->vm_mm)
451 return 0;
452
453 md = get_numa_maps(vma);
454 if (!md)
455 return 0;
456
457 seq_printf(m, "%08lx", vma->vm_start);
458 pol = get_vma_policy(task, vma, vma->vm_start);
459 /* Print policy */
460 switch (pol->policy) {
461 case MPOL_PREFERRED:
462 seq_printf(m, " prefer=%d", pol->v.preferred_node);
463 break;
464 case MPOL_BIND:
465 seq_printf(m, " bind={");
466 first = 1;
467 for (z = pol->v.zonelist->zones; *z; z++) {
468
469 if (!first)
470 seq_putc(m, ',');
471 else
472 first = 0;
473 seq_printf(m, "%d/%s", (*z)->zone_pgdat->node_id,
474 (*z)->name);
475 }
476 seq_putc(m, '}');
477 break;
478 case MPOL_INTERLEAVE:
479 seq_printf(m, " interleave={");
480 first = 1;
481 for_each_node(n) {
482 if (node_isset(n, pol->v.nodes)) {
483 if (!first)
484 seq_putc(m,',');
485 else
486 first = 0;
487 seq_printf(m, "%d",n);
488 }
489 }
490 seq_putc(m, '}');
491 break;
492 default:
493 seq_printf(m," default");
494 break;
495 }
496 seq_printf(m, " MaxRef=%lu Pages=%lu Mapped=%lu",
497 md->mapcount_max, md->pages, md->mapped);
498 if (md->anon)
499 seq_printf(m," Anon=%lu",md->anon);
500
501 for_each_online_node(n) {
502 if (md->node[n])
503 seq_printf(m, " N%d=%lu", n, md->node[n]);
504 }
505 seq_putc(m, '\n');
506 kfree(md);
507 if (m->count < m->size) /* vma is copied successfully */
508 m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0;
509 return 0;
510}
511 394
512struct seq_operations proc_pid_numa_maps_op = { 395struct seq_operations proc_pid_numa_maps_op = {
513 .start = m_start, 396 .start = m_start,
514 .next = m_next, 397 .next = m_next,
515 .stop = m_stop, 398 .stop = m_stop,
516 .show = show_numa_map 399 .show = show_numa_map
517}; 400};
518#endif 401#endif
diff --git a/fs/relayfs/buffers.c b/fs/relayfs/buffers.c
index 84e21ffa5ca8..10187812771e 100644
--- a/fs/relayfs/buffers.c
+++ b/fs/relayfs/buffers.c
@@ -185,5 +185,6 @@ void relay_destroy_buf(struct rchan_buf *buf)
185void relay_remove_buf(struct kref *kref) 185void relay_remove_buf(struct kref *kref)
186{ 186{
187 struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref); 187 struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref);
188 relayfs_remove(buf->dentry); 188 buf->chan->cb->remove_buf_file(buf->dentry);
189 relay_destroy_buf(buf);
189} 190}
diff --git a/fs/relayfs/inode.c b/fs/relayfs/inode.c
index 0f7f88d067ad..7b7f2cb5f0e1 100644
--- a/fs/relayfs/inode.c
+++ b/fs/relayfs/inode.c
@@ -26,31 +26,22 @@
26 26
27static struct vfsmount * relayfs_mount; 27static struct vfsmount * relayfs_mount;
28static int relayfs_mount_count; 28static int relayfs_mount_count;
29static kmem_cache_t * relayfs_inode_cachep;
30 29
31static struct backing_dev_info relayfs_backing_dev_info = { 30static struct backing_dev_info relayfs_backing_dev_info = {
32 .ra_pages = 0, /* No readahead */ 31 .ra_pages = 0, /* No readahead */
33 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 32 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
34}; 33};
35 34
36static struct inode *relayfs_get_inode(struct super_block *sb, int mode, 35static struct inode *relayfs_get_inode(struct super_block *sb,
37 struct rchan *chan) 36 int mode,
37 struct file_operations *fops,
38 void *data)
38{ 39{
39 struct rchan_buf *buf = NULL;
40 struct inode *inode; 40 struct inode *inode;
41 41
42 if (S_ISREG(mode)) {
43 BUG_ON(!chan);
44 buf = relay_create_buf(chan);
45 if (!buf)
46 return NULL;
47 }
48
49 inode = new_inode(sb); 42 inode = new_inode(sb);
50 if (!inode) { 43 if (!inode)
51 relay_destroy_buf(buf);
52 return NULL; 44 return NULL;
53 }
54 45
55 inode->i_mode = mode; 46 inode->i_mode = mode;
56 inode->i_uid = 0; 47 inode->i_uid = 0;
@@ -61,8 +52,9 @@ static struct inode *relayfs_get_inode(struct super_block *sb, int mode,
61 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 52 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
62 switch (mode & S_IFMT) { 53 switch (mode & S_IFMT) {
63 case S_IFREG: 54 case S_IFREG:
64 inode->i_fop = &relayfs_file_operations; 55 inode->i_fop = fops;
65 RELAYFS_I(inode)->buf = buf; 56 if (data)
57 inode->u.generic_ip = data;
66 break; 58 break;
67 case S_IFDIR: 59 case S_IFDIR:
68 inode->i_op = &simple_dir_inode_operations; 60 inode->i_op = &simple_dir_inode_operations;
@@ -83,7 +75,8 @@ static struct inode *relayfs_get_inode(struct super_block *sb, int mode,
83 * @name: the name of the file to create 75 * @name: the name of the file to create
84 * @parent: parent directory 76 * @parent: parent directory
85 * @mode: mode 77 * @mode: mode
86 * @chan: relay channel associated with the file 78 * @fops: file operations to use for the file
79 * @data: user-associated data for this file
87 * 80 *
88 * Returns the new dentry, NULL on failure 81 * Returns the new dentry, NULL on failure
89 * 82 *
@@ -92,7 +85,8 @@ static struct inode *relayfs_get_inode(struct super_block *sb, int mode,
92static struct dentry *relayfs_create_entry(const char *name, 85static struct dentry *relayfs_create_entry(const char *name,
93 struct dentry *parent, 86 struct dentry *parent,
94 int mode, 87 int mode,
95 struct rchan *chan) 88 struct file_operations *fops,
89 void *data)
96{ 90{
97 struct dentry *d; 91 struct dentry *d;
98 struct inode *inode; 92 struct inode *inode;
@@ -127,7 +121,7 @@ static struct dentry *relayfs_create_entry(const char *name,
127 goto release_mount; 121 goto release_mount;
128 } 122 }
129 123
130 inode = relayfs_get_inode(parent->d_inode->i_sb, mode, chan); 124 inode = relayfs_get_inode(parent->d_inode->i_sb, mode, fops, data);
131 if (!inode) { 125 if (!inode) {
132 d = NULL; 126 d = NULL;
133 goto release_mount; 127 goto release_mount;
@@ -155,20 +149,26 @@ exit:
155 * @name: the name of the file to create 149 * @name: the name of the file to create
156 * @parent: parent directory 150 * @parent: parent directory
157 * @mode: mode, if not specied the default perms are used 151 * @mode: mode, if not specied the default perms are used
158 * @chan: channel associated with the file 152 * @fops: file operations to use for the file
153 * @data: user-associated data for this file
159 * 154 *
160 * Returns file dentry if successful, NULL otherwise. 155 * Returns file dentry if successful, NULL otherwise.
161 * 156 *
162 * The file will be created user r on behalf of current user. 157 * The file will be created user r on behalf of current user.
163 */ 158 */
164struct dentry *relayfs_create_file(const char *name, struct dentry *parent, 159struct dentry *relayfs_create_file(const char *name,
165 int mode, struct rchan *chan) 160 struct dentry *parent,
161 int mode,
162 struct file_operations *fops,
163 void *data)
166{ 164{
165 BUG_ON(!fops);
166
167 if (!mode) 167 if (!mode)
168 mode = S_IRUSR; 168 mode = S_IRUSR;
169 mode = (mode & S_IALLUGO) | S_IFREG; 169 mode = (mode & S_IALLUGO) | S_IFREG;
170 170
171 return relayfs_create_entry(name, parent, mode, chan); 171 return relayfs_create_entry(name, parent, mode, fops, data);
172} 172}
173 173
174/** 174/**
@@ -183,7 +183,7 @@ struct dentry *relayfs_create_file(const char *name, struct dentry *parent,
183struct dentry *relayfs_create_dir(const char *name, struct dentry *parent) 183struct dentry *relayfs_create_dir(const char *name, struct dentry *parent)
184{ 184{
185 int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; 185 int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
186 return relayfs_create_entry(name, parent, mode, NULL); 186 return relayfs_create_entry(name, parent, mode, NULL, NULL);
187} 187}
188 188
189/** 189/**
@@ -225,6 +225,17 @@ int relayfs_remove(struct dentry *dentry)
225} 225}
226 226
227/** 227/**
228 * relayfs_remove_file - remove a file from relay filesystem
229 * @dentry: directory dentry
230 *
231 * Returns 0 if successful, negative otherwise.
232 */
233int relayfs_remove_file(struct dentry *dentry)
234{
235 return relayfs_remove(dentry);
236}
237
238/**
228 * relayfs_remove_dir - remove a directory in the relay filesystem 239 * relayfs_remove_dir - remove a directory in the relay filesystem
229 * @dentry: directory dentry 240 * @dentry: directory dentry
230 * 241 *
@@ -236,45 +247,45 @@ int relayfs_remove_dir(struct dentry *dentry)
236} 247}
237 248
238/** 249/**
239 * relayfs_open - open file op for relayfs files 250 * relay_file_open - open file op for relay files
240 * @inode: the inode 251 * @inode: the inode
241 * @filp: the file 252 * @filp: the file
242 * 253 *
243 * Increments the channel buffer refcount. 254 * Increments the channel buffer refcount.
244 */ 255 */
245static int relayfs_open(struct inode *inode, struct file *filp) 256static int relay_file_open(struct inode *inode, struct file *filp)
246{ 257{
247 struct rchan_buf *buf = RELAYFS_I(inode)->buf; 258 struct rchan_buf *buf = inode->u.generic_ip;
248 kref_get(&buf->kref); 259 kref_get(&buf->kref);
260 filp->private_data = buf;
249 261
250 return 0; 262 return 0;
251} 263}
252 264
253/** 265/**
254 * relayfs_mmap - mmap file op for relayfs files 266 * relay_file_mmap - mmap file op for relay files
255 * @filp: the file 267 * @filp: the file
256 * @vma: the vma describing what to map 268 * @vma: the vma describing what to map
257 * 269 *
258 * Calls upon relay_mmap_buf to map the file into user space. 270 * Calls upon relay_mmap_buf to map the file into user space.
259 */ 271 */
260static int relayfs_mmap(struct file *filp, struct vm_area_struct *vma) 272static int relay_file_mmap(struct file *filp, struct vm_area_struct *vma)
261{ 273{
262 struct inode *inode = filp->f_dentry->d_inode; 274 struct rchan_buf *buf = filp->private_data;
263 return relay_mmap_buf(RELAYFS_I(inode)->buf, vma); 275 return relay_mmap_buf(buf, vma);
264} 276}
265 277
266/** 278/**
267 * relayfs_poll - poll file op for relayfs files 279 * relay_file_poll - poll file op for relay files
268 * @filp: the file 280 * @filp: the file
269 * @wait: poll table 281 * @wait: poll table
270 * 282 *
271 * Poll implemention. 283 * Poll implemention.
272 */ 284 */
273static unsigned int relayfs_poll(struct file *filp, poll_table *wait) 285static unsigned int relay_file_poll(struct file *filp, poll_table *wait)
274{ 286{
275 unsigned int mask = 0; 287 unsigned int mask = 0;
276 struct inode *inode = filp->f_dentry->d_inode; 288 struct rchan_buf *buf = filp->private_data;
277 struct rchan_buf *buf = RELAYFS_I(inode)->buf;
278 289
279 if (buf->finalized) 290 if (buf->finalized)
280 return POLLERR; 291 return POLLERR;
@@ -289,27 +300,27 @@ static unsigned int relayfs_poll(struct file *filp, poll_table *wait)
289} 300}
290 301
291/** 302/**
292 * relayfs_release - release file op for relayfs files 303 * relay_file_release - release file op for relay files
293 * @inode: the inode 304 * @inode: the inode
294 * @filp: the file 305 * @filp: the file
295 * 306 *
296 * Decrements the channel refcount, as the filesystem is 307 * Decrements the channel refcount, as the filesystem is
297 * no longer using it. 308 * no longer using it.
298 */ 309 */
299static int relayfs_release(struct inode *inode, struct file *filp) 310static int relay_file_release(struct inode *inode, struct file *filp)
300{ 311{
301 struct rchan_buf *buf = RELAYFS_I(inode)->buf; 312 struct rchan_buf *buf = filp->private_data;
302 kref_put(&buf->kref, relay_remove_buf); 313 kref_put(&buf->kref, relay_remove_buf);
303 314
304 return 0; 315 return 0;
305} 316}
306 317
307/** 318/**
308 * relayfs_read_consume - update the consumed count for the buffer 319 * relay_file_read_consume - update the consumed count for the buffer
309 */ 320 */
310static void relayfs_read_consume(struct rchan_buf *buf, 321static void relay_file_read_consume(struct rchan_buf *buf,
311 size_t read_pos, 322 size_t read_pos,
312 size_t bytes_consumed) 323 size_t bytes_consumed)
313{ 324{
314 size_t subbuf_size = buf->chan->subbuf_size; 325 size_t subbuf_size = buf->chan->subbuf_size;
315 size_t n_subbufs = buf->chan->n_subbufs; 326 size_t n_subbufs = buf->chan->n_subbufs;
@@ -332,9 +343,9 @@ static void relayfs_read_consume(struct rchan_buf *buf,
332} 343}
333 344
334/** 345/**
335 * relayfs_read_avail - boolean, are there unconsumed bytes available? 346 * relay_file_read_avail - boolean, are there unconsumed bytes available?
336 */ 347 */
337static int relayfs_read_avail(struct rchan_buf *buf, size_t read_pos) 348static int relay_file_read_avail(struct rchan_buf *buf, size_t read_pos)
338{ 349{
339 size_t bytes_produced, bytes_consumed, write_offset; 350 size_t bytes_produced, bytes_consumed, write_offset;
340 size_t subbuf_size = buf->chan->subbuf_size; 351 size_t subbuf_size = buf->chan->subbuf_size;
@@ -365,16 +376,16 @@ static int relayfs_read_avail(struct rchan_buf *buf, size_t read_pos)
365 if (bytes_produced == bytes_consumed) 376 if (bytes_produced == bytes_consumed)
366 return 0; 377 return 0;
367 378
368 relayfs_read_consume(buf, read_pos, 0); 379 relay_file_read_consume(buf, read_pos, 0);
369 380
370 return 1; 381 return 1;
371} 382}
372 383
373/** 384/**
374 * relayfs_read_subbuf_avail - return bytes available in sub-buffer 385 * relay_file_read_subbuf_avail - return bytes available in sub-buffer
375 */ 386 */
376static size_t relayfs_read_subbuf_avail(size_t read_pos, 387static size_t relay_file_read_subbuf_avail(size_t read_pos,
377 struct rchan_buf *buf) 388 struct rchan_buf *buf)
378{ 389{
379 size_t padding, avail = 0; 390 size_t padding, avail = 0;
380 size_t read_subbuf, read_offset, write_subbuf, write_offset; 391 size_t read_subbuf, read_offset, write_subbuf, write_offset;
@@ -396,14 +407,14 @@ static size_t relayfs_read_subbuf_avail(size_t read_pos,
396} 407}
397 408
398/** 409/**
399 * relayfs_read_start_pos - find the first available byte to read 410 * relay_file_read_start_pos - find the first available byte to read
400 * 411 *
401 * If the read_pos is in the middle of padding, return the 412 * If the read_pos is in the middle of padding, return the
402 * position of the first actually available byte, otherwise 413 * position of the first actually available byte, otherwise
403 * return the original value. 414 * return the original value.
404 */ 415 */
405static size_t relayfs_read_start_pos(size_t read_pos, 416static size_t relay_file_read_start_pos(size_t read_pos,
406 struct rchan_buf *buf) 417 struct rchan_buf *buf)
407{ 418{
408 size_t read_subbuf, padding, padding_start, padding_end; 419 size_t read_subbuf, padding, padding_start, padding_end;
409 size_t subbuf_size = buf->chan->subbuf_size; 420 size_t subbuf_size = buf->chan->subbuf_size;
@@ -422,11 +433,11 @@ static size_t relayfs_read_start_pos(size_t read_pos,
422} 433}
423 434
424/** 435/**
425 * relayfs_read_end_pos - return the new read position 436 * relay_file_read_end_pos - return the new read position
426 */ 437 */
427static size_t relayfs_read_end_pos(struct rchan_buf *buf, 438static size_t relay_file_read_end_pos(struct rchan_buf *buf,
428 size_t read_pos, 439 size_t read_pos,
429 size_t count) 440 size_t count)
430{ 441{
431 size_t read_subbuf, padding, end_pos; 442 size_t read_subbuf, padding, end_pos;
432 size_t subbuf_size = buf->chan->subbuf_size; 443 size_t subbuf_size = buf->chan->subbuf_size;
@@ -445,7 +456,7 @@ static size_t relayfs_read_end_pos(struct rchan_buf *buf,
445} 456}
446 457
447/** 458/**
448 * relayfs_read - read file op for relayfs files 459 * relay_file_read - read file op for relay files
449 * @filp: the file 460 * @filp: the file
450 * @buffer: the userspace buffer 461 * @buffer: the userspace buffer
451 * @count: number of bytes to read 462 * @count: number of bytes to read
@@ -454,23 +465,23 @@ static size_t relayfs_read_end_pos(struct rchan_buf *buf,
454 * Reads count bytes or the number of bytes available in the 465 * Reads count bytes or the number of bytes available in the
455 * current sub-buffer being read, whichever is smaller. 466 * current sub-buffer being read, whichever is smaller.
456 */ 467 */
457static ssize_t relayfs_read(struct file *filp, 468static ssize_t relay_file_read(struct file *filp,
458 char __user *buffer, 469 char __user *buffer,
459 size_t count, 470 size_t count,
460 loff_t *ppos) 471 loff_t *ppos)
461{ 472{
473 struct rchan_buf *buf = filp->private_data;
462 struct inode *inode = filp->f_dentry->d_inode; 474 struct inode *inode = filp->f_dentry->d_inode;
463 struct rchan_buf *buf = RELAYFS_I(inode)->buf;
464 size_t read_start, avail; 475 size_t read_start, avail;
465 ssize_t ret = 0; 476 ssize_t ret = 0;
466 void *from; 477 void *from;
467 478
468 down(&inode->i_sem); 479 down(&inode->i_sem);
469 if(!relayfs_read_avail(buf, *ppos)) 480 if(!relay_file_read_avail(buf, *ppos))
470 goto out; 481 goto out;
471 482
472 read_start = relayfs_read_start_pos(*ppos, buf); 483 read_start = relay_file_read_start_pos(*ppos, buf);
473 avail = relayfs_read_subbuf_avail(read_start, buf); 484 avail = relay_file_read_subbuf_avail(read_start, buf);
474 if (!avail) 485 if (!avail)
475 goto out; 486 goto out;
476 487
@@ -480,58 +491,25 @@ static ssize_t relayfs_read(struct file *filp,
480 ret = -EFAULT; 491 ret = -EFAULT;
481 goto out; 492 goto out;
482 } 493 }
483 relayfs_read_consume(buf, read_start, count); 494 relay_file_read_consume(buf, read_start, count);
484 *ppos = relayfs_read_end_pos(buf, read_start, count); 495 *ppos = relay_file_read_end_pos(buf, read_start, count);
485out: 496out:
486 up(&inode->i_sem); 497 up(&inode->i_sem);
487 return ret; 498 return ret;
488} 499}
489 500
490/** 501struct file_operations relay_file_operations = {
491 * relayfs alloc_inode() implementation 502 .open = relay_file_open,
492 */ 503 .poll = relay_file_poll,
493static struct inode *relayfs_alloc_inode(struct super_block *sb) 504 .mmap = relay_file_mmap,
494{ 505 .read = relay_file_read,
495 struct relayfs_inode_info *p = kmem_cache_alloc(relayfs_inode_cachep, SLAB_KERNEL);
496 if (!p)
497 return NULL;
498 p->buf = NULL;
499
500 return &p->vfs_inode;
501}
502
503/**
504 * relayfs destroy_inode() implementation
505 */
506static void relayfs_destroy_inode(struct inode *inode)
507{
508 if (RELAYFS_I(inode)->buf)
509 relay_destroy_buf(RELAYFS_I(inode)->buf);
510
511 kmem_cache_free(relayfs_inode_cachep, RELAYFS_I(inode));
512}
513
514static void init_once(void *p, kmem_cache_t *cachep, unsigned long flags)
515{
516 struct relayfs_inode_info *i = p;
517 if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR)
518 inode_init_once(&i->vfs_inode);
519}
520
521struct file_operations relayfs_file_operations = {
522 .open = relayfs_open,
523 .poll = relayfs_poll,
524 .mmap = relayfs_mmap,
525 .read = relayfs_read,
526 .llseek = no_llseek, 506 .llseek = no_llseek,
527 .release = relayfs_release, 507 .release = relay_file_release,
528}; 508};
529 509
530static struct super_operations relayfs_ops = { 510static struct super_operations relayfs_ops = {
531 .statfs = simple_statfs, 511 .statfs = simple_statfs,
532 .drop_inode = generic_delete_inode, 512 .drop_inode = generic_delete_inode,
533 .alloc_inode = relayfs_alloc_inode,
534 .destroy_inode = relayfs_destroy_inode,
535}; 513};
536 514
537static int relayfs_fill_super(struct super_block * sb, void * data, int silent) 515static int relayfs_fill_super(struct super_block * sb, void * data, int silent)
@@ -544,7 +522,7 @@ static int relayfs_fill_super(struct super_block * sb, void * data, int silent)
544 sb->s_blocksize_bits = PAGE_CACHE_SHIFT; 522 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
545 sb->s_magic = RELAYFS_MAGIC; 523 sb->s_magic = RELAYFS_MAGIC;
546 sb->s_op = &relayfs_ops; 524 sb->s_op = &relayfs_ops;
547 inode = relayfs_get_inode(sb, mode, NULL); 525 inode = relayfs_get_inode(sb, mode, NULL, NULL);
548 526
549 if (!inode) 527 if (!inode)
550 return -ENOMEM; 528 return -ENOMEM;
@@ -575,33 +553,27 @@ static struct file_system_type relayfs_fs_type = {
575 553
576static int __init init_relayfs_fs(void) 554static int __init init_relayfs_fs(void)
577{ 555{
578 int err; 556 return register_filesystem(&relayfs_fs_type);
579
580 relayfs_inode_cachep = kmem_cache_create("relayfs_inode_cache",
581 sizeof(struct relayfs_inode_info), 0,
582 0, init_once, NULL);
583 if (!relayfs_inode_cachep)
584 return -ENOMEM;
585
586 err = register_filesystem(&relayfs_fs_type);
587 if (err)
588 kmem_cache_destroy(relayfs_inode_cachep);
589
590 return err;
591} 557}
592 558
593static void __exit exit_relayfs_fs(void) 559static void __exit exit_relayfs_fs(void)
594{ 560{
561
562
563
564
565
595 unregister_filesystem(&relayfs_fs_type); 566 unregister_filesystem(&relayfs_fs_type);
596 kmem_cache_destroy(relayfs_inode_cachep);
597} 567}
598 568
599module_init(init_relayfs_fs) 569module_init(init_relayfs_fs)
600module_exit(exit_relayfs_fs) 570module_exit(exit_relayfs_fs)
601 571
602EXPORT_SYMBOL_GPL(relayfs_file_operations); 572EXPORT_SYMBOL_GPL(relay_file_operations);
603EXPORT_SYMBOL_GPL(relayfs_create_dir); 573EXPORT_SYMBOL_GPL(relayfs_create_dir);
604EXPORT_SYMBOL_GPL(relayfs_remove_dir); 574EXPORT_SYMBOL_GPL(relayfs_remove_dir);
575EXPORT_SYMBOL_GPL(relayfs_create_file);
576EXPORT_SYMBOL_GPL(relayfs_remove_file);
605 577
606MODULE_AUTHOR("Tom Zanussi <zanussi@us.ibm.com> and Karim Yaghmour <karim@opersys.com>"); 578MODULE_AUTHOR("Tom Zanussi <zanussi@us.ibm.com> and Karim Yaghmour <karim@opersys.com>");
607MODULE_DESCRIPTION("Relay Filesystem"); 579MODULE_DESCRIPTION("Relay Filesystem");
diff --git a/fs/relayfs/relay.c b/fs/relayfs/relay.c
index 2a6f7f12b7f9..abf3ceaace49 100644
--- a/fs/relayfs/relay.c
+++ b/fs/relayfs/relay.c
@@ -80,11 +80,34 @@ static void buf_unmapped_default_callback(struct rchan_buf *buf,
80{ 80{
81} 81}
82 82
83/*
84 * create_buf_file_create() default callback. Creates file to represent buf.
85 */
86static struct dentry *create_buf_file_default_callback(const char *filename,
87 struct dentry *parent,
88 int mode,
89 struct rchan_buf *buf,
90 int *is_global)
91{
92 return relayfs_create_file(filename, parent, mode,
93 &relay_file_operations, buf);
94}
95
96/*
97 * remove_buf_file() default callback. Removes file representing relay buffer.
98 */
99static int remove_buf_file_default_callback(struct dentry *dentry)
100{
101 return relayfs_remove(dentry);
102}
103
83/* relay channel default callbacks */ 104/* relay channel default callbacks */
84static struct rchan_callbacks default_channel_callbacks = { 105static struct rchan_callbacks default_channel_callbacks = {
85 .subbuf_start = subbuf_start_default_callback, 106 .subbuf_start = subbuf_start_default_callback,
86 .buf_mapped = buf_mapped_default_callback, 107 .buf_mapped = buf_mapped_default_callback,
87 .buf_unmapped = buf_unmapped_default_callback, 108 .buf_unmapped = buf_unmapped_default_callback,
109 .create_buf_file = create_buf_file_default_callback,
110 .remove_buf_file = remove_buf_file_default_callback,
88}; 111};
89 112
90/** 113/**
@@ -148,14 +171,16 @@ static inline void __relay_reset(struct rchan_buf *buf, unsigned int init)
148void relay_reset(struct rchan *chan) 171void relay_reset(struct rchan *chan)
149{ 172{
150 unsigned int i; 173 unsigned int i;
174 struct rchan_buf *prev = NULL;
151 175
152 if (!chan) 176 if (!chan)
153 return; 177 return;
154 178
155 for (i = 0; i < NR_CPUS; i++) { 179 for (i = 0; i < NR_CPUS; i++) {
156 if (!chan->buf[i]) 180 if (!chan->buf[i] || chan->buf[i] == prev)
157 continue; 181 break;
158 __relay_reset(chan->buf[i], 0); 182 __relay_reset(chan->buf[i], 0);
183 prev = chan->buf[i];
159 } 184 }
160} 185}
161 186
@@ -166,17 +191,27 @@ void relay_reset(struct rchan *chan)
166 */ 191 */
167static struct rchan_buf *relay_open_buf(struct rchan *chan, 192static struct rchan_buf *relay_open_buf(struct rchan *chan,
168 const char *filename, 193 const char *filename,
169 struct dentry *parent) 194 struct dentry *parent,
195 int *is_global)
170{ 196{
171 struct rchan_buf *buf; 197 struct rchan_buf *buf;
172 struct dentry *dentry; 198 struct dentry *dentry;
173 199
200 if (*is_global)
201 return chan->buf[0];
202
203 buf = relay_create_buf(chan);
204 if (!buf)
205 return NULL;
206
174 /* Create file in fs */ 207 /* Create file in fs */
175 dentry = relayfs_create_file(filename, parent, S_IRUSR, chan); 208 dentry = chan->cb->create_buf_file(filename, parent, S_IRUSR,
176 if (!dentry) 209 buf, is_global);
210 if (!dentry) {
211 relay_destroy_buf(buf);
177 return NULL; 212 return NULL;
213 }
178 214
179 buf = RELAYFS_I(dentry->d_inode)->buf;
180 buf->dentry = dentry; 215 buf->dentry = dentry;
181 __relay_reset(buf, 1); 216 __relay_reset(buf, 1);
182 217
@@ -214,6 +249,10 @@ static inline void setup_callbacks(struct rchan *chan,
214 cb->buf_mapped = buf_mapped_default_callback; 249 cb->buf_mapped = buf_mapped_default_callback;
215 if (!cb->buf_unmapped) 250 if (!cb->buf_unmapped)
216 cb->buf_unmapped = buf_unmapped_default_callback; 251 cb->buf_unmapped = buf_unmapped_default_callback;
252 if (!cb->create_buf_file)
253 cb->create_buf_file = create_buf_file_default_callback;
254 if (!cb->remove_buf_file)
255 cb->remove_buf_file = remove_buf_file_default_callback;
217 chan->cb = cb; 256 chan->cb = cb;
218} 257}
219 258
@@ -241,6 +280,7 @@ struct rchan *relay_open(const char *base_filename,
241 unsigned int i; 280 unsigned int i;
242 struct rchan *chan; 281 struct rchan *chan;
243 char *tmpname; 282 char *tmpname;
283 int is_global = 0;
244 284
245 if (!base_filename) 285 if (!base_filename)
246 return NULL; 286 return NULL;
@@ -265,7 +305,8 @@ struct rchan *relay_open(const char *base_filename,
265 305
266 for_each_online_cpu(i) { 306 for_each_online_cpu(i) {
267 sprintf(tmpname, "%s%d", base_filename, i); 307 sprintf(tmpname, "%s%d", base_filename, i);
268 chan->buf[i] = relay_open_buf(chan, tmpname, parent); 308 chan->buf[i] = relay_open_buf(chan, tmpname, parent,
309 &is_global);
269 chan->buf[i]->cpu = i; 310 chan->buf[i]->cpu = i;
270 if (!chan->buf[i]) 311 if (!chan->buf[i])
271 goto free_bufs; 312 goto free_bufs;
@@ -279,6 +320,8 @@ free_bufs:
279 if (!chan->buf[i]) 320 if (!chan->buf[i])
280 break; 321 break;
281 relay_close_buf(chan->buf[i]); 322 relay_close_buf(chan->buf[i]);
323 if (is_global)
324 break;
282 } 325 }
283 kfree(tmpname); 326 kfree(tmpname);
284 327
@@ -388,14 +431,16 @@ void relay_destroy_channel(struct kref *kref)
388void relay_close(struct rchan *chan) 431void relay_close(struct rchan *chan)
389{ 432{
390 unsigned int i; 433 unsigned int i;
434 struct rchan_buf *prev = NULL;
391 435
392 if (!chan) 436 if (!chan)
393 return; 437 return;
394 438
395 for (i = 0; i < NR_CPUS; i++) { 439 for (i = 0; i < NR_CPUS; i++) {
396 if (!chan->buf[i]) 440 if (!chan->buf[i] || chan->buf[i] == prev)
397 continue; 441 break;
398 relay_close_buf(chan->buf[i]); 442 relay_close_buf(chan->buf[i]);
443 prev = chan->buf[i];
399 } 444 }
400 445
401 if (chan->last_toobig) 446 if (chan->last_toobig)
@@ -415,14 +460,16 @@ void relay_close(struct rchan *chan)
415void relay_flush(struct rchan *chan) 460void relay_flush(struct rchan *chan)
416{ 461{
417 unsigned int i; 462 unsigned int i;
463 struct rchan_buf *prev = NULL;
418 464
419 if (!chan) 465 if (!chan)
420 return; 466 return;
421 467
422 for (i = 0; i < NR_CPUS; i++) { 468 for (i = 0; i < NR_CPUS; i++) {
423 if (!chan->buf[i]) 469 if (!chan->buf[i] || chan->buf[i] == prev)
424 continue; 470 break;
425 relay_switch_subbuf(chan->buf[i], 0); 471 relay_switch_subbuf(chan->buf[i], 0);
472 prev = chan->buf[i];
426 } 473 }
427} 474}
428 475
diff --git a/fs/relayfs/relay.h b/fs/relayfs/relay.h
index 703503fa22b6..0993d3e5753b 100644
--- a/fs/relayfs/relay.h
+++ b/fs/relayfs/relay.h
@@ -1,10 +1,6 @@
1#ifndef _RELAY_H 1#ifndef _RELAY_H
2#define _RELAY_H 2#define _RELAY_H
3 3
4struct dentry *relayfs_create_file(const char *name,
5 struct dentry *parent,
6 int mode,
7 struct rchan *chan);
8extern int relayfs_remove(struct dentry *dentry); 4extern int relayfs_remove(struct dentry *dentry);
9extern int relay_buf_empty(struct rchan_buf *buf); 5extern int relay_buf_empty(struct rchan_buf *buf);
10extern void relay_destroy_channel(struct kref *kref); 6extern void relay_destroy_channel(struct kref *kref);
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index c74f382dabba..0a13859fd57b 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -418,7 +418,7 @@ static int
418romfs_readpage(struct file *file, struct page * page) 418romfs_readpage(struct file *file, struct page * page)
419{ 419{
420 struct inode *inode = page->mapping->host; 420 struct inode *inode = page->mapping->host;
421 unsigned long offset, avail, readlen; 421 loff_t offset, avail, readlen;
422 void *buf; 422 void *buf;
423 int result = -EIO; 423 int result = -EIO;
424 424
@@ -429,8 +429,8 @@ romfs_readpage(struct file *file, struct page * page)
429 goto err_out; 429 goto err_out;
430 430
431 /* 32 bit warning -- but not for us :) */ 431 /* 32 bit warning -- but not for us :) */
432 offset = page->index << PAGE_CACHE_SHIFT; 432 offset = page_offset(page);
433 if (offset < inode->i_size) { 433 if (offset < i_size_read(inode)) {
434 avail = inode->i_size-offset; 434 avail = inode->i_size-offset;
435 readlen = min_t(unsigned long, avail, PAGE_SIZE); 435 readlen = min_t(unsigned long, avail, PAGE_SIZE);
436 if (romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen) == readlen) { 436 if (romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen) == readlen) {
diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c
index f3e6b81288ab..74b86d9725a6 100644
--- a/fs/smbfs/cache.c
+++ b/fs/smbfs/cache.c
@@ -66,7 +66,7 @@ smb_invalidate_dircache_entries(struct dentry *parent)
66 spin_lock(&dcache_lock); 66 spin_lock(&dcache_lock);
67 next = parent->d_subdirs.next; 67 next = parent->d_subdirs.next;
68 while (next != &parent->d_subdirs) { 68 while (next != &parent->d_subdirs) {
69 dentry = list_entry(next, struct dentry, d_child); 69 dentry = list_entry(next, struct dentry, d_u.d_child);
70 dentry->d_fsdata = NULL; 70 dentry->d_fsdata = NULL;
71 smb_age_dentry(server, dentry); 71 smb_age_dentry(server, dentry);
72 next = next->next; 72 next = next->next;
@@ -100,7 +100,7 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
100 spin_lock(&dcache_lock); 100 spin_lock(&dcache_lock);
101 next = parent->d_subdirs.next; 101 next = parent->d_subdirs.next;
102 while (next != &parent->d_subdirs) { 102 while (next != &parent->d_subdirs) {
103 dent = list_entry(next, struct dentry, d_child); 103 dent = list_entry(next, struct dentry, d_u.d_child);
104 if ((unsigned long)dent->d_fsdata == fpos) { 104 if ((unsigned long)dent->d_fsdata == fpos) {
105 if (dent->d_inode) 105 if (dent->d_inode)
106 dget_locked(dent); 106 dget_locked(dent);
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index b4fcfa8b55a1..7042e62726a4 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -209,8 +209,8 @@ smb_updatepage(struct file *file, struct page *page, unsigned long offset,
209{ 209{
210 struct dentry *dentry = file->f_dentry; 210 struct dentry *dentry = file->f_dentry;
211 211
212 DEBUG1("(%s/%s %d@%ld)\n", DENTRY_PATH(dentry), 212 DEBUG1("(%s/%s %d@%lld)\n", DENTRY_PATH(dentry), count,
213 count, (page->index << PAGE_CACHE_SHIFT)+offset); 213 ((unsigned long long)page->index << PAGE_CACHE_SHIFT) + offset);
214 214
215 return smb_writepage_sync(dentry->d_inode, page, offset, count); 215 return smb_writepage_sync(dentry->d_inode, page, offset, count);
216} 216}
@@ -374,8 +374,7 @@ smb_file_release(struct inode *inode, struct file * file)
374 /* We must flush any dirty pages now as we won't be able to 374 /* We must flush any dirty pages now as we won't be able to
375 write anything after close. mmap can trigger this. 375 write anything after close. mmap can trigger this.
376 "openers" should perhaps include mmap'ers ... */ 376 "openers" should perhaps include mmap'ers ... */
377 filemap_fdatawrite(inode->i_mapping); 377 filemap_write_and_wait(inode->i_mapping);
378 filemap_fdatawait(inode->i_mapping);
379 smb_close(inode); 378 smb_close(inode);
380 } 379 }
381 unlock_kernel(); 380 unlock_kernel();
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 10b994428fef..6ec88bf59b2d 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -697,8 +697,7 @@ smb_notify_change(struct dentry *dentry, struct iattr *attr)
697 DENTRY_PATH(dentry), 697 DENTRY_PATH(dentry),
698 (long) inode->i_size, (long) attr->ia_size); 698 (long) inode->i_size, (long) attr->ia_size);
699 699
700 filemap_fdatawrite(inode->i_mapping); 700 filemap_write_and_wait(inode->i_mapping);
701 filemap_fdatawait(inode->i_mapping);
702 701
703 error = smb_open(dentry, O_WRONLY); 702 error = smb_open(dentry, O_WRONLY);
704 if (error) 703 if (error)
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index 38ab558835c4..d6baec0f24ad 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -3113,7 +3113,7 @@ smb_proc_setattr_unix(struct dentry *d, struct iattr *attr,
3113 LSET(data, 32, SMB_TIME_NO_CHANGE); 3113 LSET(data, 32, SMB_TIME_NO_CHANGE);
3114 LSET(data, 40, SMB_UID_NO_CHANGE); 3114 LSET(data, 40, SMB_UID_NO_CHANGE);
3115 LSET(data, 48, SMB_GID_NO_CHANGE); 3115 LSET(data, 48, SMB_GID_NO_CHANGE);
3116 LSET(data, 56, smb_filetype_from_mode(attr->ia_mode)); 3116 DSET(data, 56, smb_filetype_from_mode(attr->ia_mode));
3117 LSET(data, 60, major); 3117 LSET(data, 60, major);
3118 LSET(data, 68, minor); 3118 LSET(data, 68, minor);
3119 LSET(data, 76, 0); 3119 LSET(data, 76, 0);
diff --git a/fs/super.c b/fs/super.c
index 5a347a4f673a..0a30e51692cf 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -700,8 +700,7 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type,
700 700
701 s->s_flags = flags; 701 s->s_flags = flags;
702 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); 702 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
703 s->s_old_blocksize = block_size(bdev); 703 sb_set_blocksize(s, block_size(bdev));
704 sb_set_blocksize(s, s->s_old_blocksize);
705 error = fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); 704 error = fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
706 if (error) { 705 if (error) {
707 up_write(&s->s_umount); 706 up_write(&s->s_umount);
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 69a085abad6f..cce8b05cba5a 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -103,7 +103,7 @@ static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir)
103 offset = (char *)de - kaddr; 103 offset = (char *)de - kaddr;
104 104
105 over = filldir(dirent, name, strnlen(name,SYSV_NAMELEN), 105 over = filldir(dirent, name, strnlen(name,SYSV_NAMELEN),
106 (n<<PAGE_CACHE_SHIFT) | offset, 106 ((loff_t)n<<PAGE_CACHE_SHIFT) | offset,
107 fs16_to_cpu(SYSV_SB(sb), de->inode), 107 fs16_to_cpu(SYSV_SB(sb), de->inode),
108 DT_UNKNOWN); 108 DT_UNKNOWN);
109 if (over) { 109 if (over) {
@@ -115,7 +115,7 @@ static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir)
115 } 115 }
116 116
117done: 117done:
118 filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset; 118 filp->f_pos = ((loff_t)n << PAGE_CACHE_SHIFT) | offset;
119 unlock_kernel(); 119 unlock_kernel();
120 return 0; 120 return 0;
121} 121}
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 6598a5037ac8..4fae57d9d115 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -41,7 +41,7 @@
41#define uint(x) xuint(x) 41#define uint(x) xuint(x)
42#define xuint(x) __le ## x 42#define xuint(x) __le ## x
43 43
44extern inline int find_next_one_bit (void * addr, int size, int offset) 44static inline int find_next_one_bit (void * addr, int size, int offset)
45{ 45{
46 uintBPL_t * p = ((uintBPL_t *) addr) + (offset / BITS_PER_LONG); 46 uintBPL_t * p = ((uintBPL_t *) addr) + (offset / BITS_PER_LONG);
47 int result = offset & ~(BITS_PER_LONG-1); 47 int result = offset & ~(BITS_PER_LONG-1);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 4014f17d382e..395e582ee542 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1957,11 +1957,6 @@ int8_t inode_bmap(struct inode *inode, int block, kernel_lb_addr *bloc, uint32_t
1957 printk(KERN_ERR "udf: inode_bmap: block < 0\n"); 1957 printk(KERN_ERR "udf: inode_bmap: block < 0\n");
1958 return -1; 1958 return -1;
1959 } 1959 }
1960 if (!inode)
1961 {
1962 printk(KERN_ERR "udf: inode_bmap: NULL inode\n");
1963 return -1;
1964 }
1965 1960
1966 *extoffset = 0; 1961 *extoffset = 0;
1967 *elen = 0; 1962 *elen = 0;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 54828ebcf1ba..2ba11a9aa995 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1296,8 +1296,10 @@ static ssize_t ufs_quota_write(struct super_block *sb, int type,
1296 blk++; 1296 blk++;
1297 } 1297 }
1298out: 1298out:
1299 if (len == towrite) 1299 if (len == towrite) {
1300 up(&inode->i_sem);
1300 return err; 1301 return err;
1302 }
1301 if (inode->i_size < off+len-towrite) 1303 if (inode->i_size < off+len-towrite)
1302 i_size_write(inode, off+len-towrite); 1304 i_size_write(inode, off+len-towrite);
1303 inode->i_version++; 1305 inode->i_version++;
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index f89340c61bf2..4fa4b1a5187e 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -79,8 +79,7 @@ fs_flushinval_pages(
79 struct inode *ip = LINVFS_GET_IP(vp); 79 struct inode *ip = LINVFS_GET_IP(vp);
80 80
81 if (VN_CACHED(vp)) { 81 if (VN_CACHED(vp)) {
82 filemap_fdatawrite(ip->i_mapping); 82 filemap_write_and_wait(ip->i_mapping);
83 filemap_fdatawait(ip->i_mapping);
84 83
85 truncate_inode_pages(ip->i_mapping, first); 84 truncate_inode_pages(ip->i_mapping, first);
86 } 85 }
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 158829ca56f6..f40d4391fcfc 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -30,13 +30,7 @@
30 * By comparing each compnent, we don't have to worry about extra 30 * By comparing each compnent, we don't have to worry about extra
31 * endian issues in treating two 32 bit numbers as one 64 bit number 31 * endian issues in treating two 32 bit numbers as one 64 bit number
32 */ 32 */
33static 33static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
34#if defined(__GNUC__) && (__GNUC__ == 2) && ( (__GNUC_MINOR__ == 95) || (__GNUC_MINOR__ == 96))
35__attribute__((unused)) /* gcc 2.95, 2.96 miscompile this when inlined */
36#else
37__inline__
38#endif
39xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
40{ 34{
41 if (CYCLE_LSN(lsn1) != CYCLE_LSN(lsn2)) 35 if (CYCLE_LSN(lsn1) != CYCLE_LSN(lsn2))
42 return (CYCLE_LSN(lsn1)<CYCLE_LSN(lsn2))? -999 : 999; 36 return (CYCLE_LSN(lsn1)<CYCLE_LSN(lsn2))? -999 : 999;
diff --git a/include/asm-alpha/cache.h b/include/asm-alpha/cache.h
index e69b29501a5f..e6d4d1695e25 100644
--- a/include/asm-alpha/cache.h
+++ b/include/asm-alpha/cache.h
@@ -20,6 +20,5 @@
20 20
21#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1)) 21#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))
22#define SMP_CACHE_BYTES L1_CACHE_BYTES 22#define SMP_CACHE_BYTES L1_CACHE_BYTES
23#define L1_CACHE_SHIFT_MAX L1_CACHE_SHIFT
24 23
25#endif 24#endif
diff --git a/include/asm-alpha/compiler.h b/include/asm-alpha/compiler.h
index 0a4a8b40dfcd..00c6f57ad9a7 100644
--- a/include/asm-alpha/compiler.h
+++ b/include/asm-alpha/compiler.h
@@ -98,9 +98,7 @@
98#undef inline 98#undef inline
99#undef __inline__ 99#undef __inline__
100#undef __inline 100#undef __inline
101#if __GNUC__ == 3 && __GNUC_MINOR__ >= 1 || __GNUC__ > 3
102#undef __always_inline 101#undef __always_inline
103#define __always_inline inline __attribute__((always_inline)) 102#define __always_inline inline __attribute__((always_inline))
104#endif
105 103
106#endif /* __ALPHA_COMPILER_H */ 104#endif /* __ALPHA_COMPILER_H */
diff --git a/include/asm-alpha/futex.h b/include/asm-alpha/futex.h
index 9feff4ce1424..6a332a9f099c 100644
--- a/include/asm-alpha/futex.h
+++ b/include/asm-alpha/futex.h
@@ -1,53 +1,6 @@
1#ifndef _ASM_FUTEX_H 1#ifndef _ASM_FUTEX_H
2#define _ASM_FUTEX_H 2#define _ASM_FUTEX_H
3 3
4#ifdef __KERNEL__ 4#include <asm-generic/futex.h>
5 5
6#include <linux/futex.h>
7#include <asm/errno.h>
8#include <asm/uaccess.h>
9
10static inline int
11futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
12{
13 int op = (encoded_op >> 28) & 7;
14 int cmp = (encoded_op >> 24) & 15;
15 int oparg = (encoded_op << 8) >> 20;
16 int cmparg = (encoded_op << 20) >> 20;
17 int oldval = 0, ret;
18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
19 oparg = 1 << oparg;
20
21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
22 return -EFAULT;
23
24 inc_preempt_count();
25
26 switch (op) {
27 case FUTEX_OP_SET:
28 case FUTEX_OP_ADD:
29 case FUTEX_OP_OR:
30 case FUTEX_OP_ANDN:
31 case FUTEX_OP_XOR:
32 default:
33 ret = -ENOSYS;
34 }
35
36 dec_preempt_count();
37
38 if (!ret) {
39 switch (cmp) {
40 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
41 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
42 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
43 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
44 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
45 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
46 default: ret = -ENOSYS;
47 }
48 }
49 return ret;
50}
51
52#endif
53#endif 6#endif
diff --git a/include/asm-alpha/processor.h b/include/asm-alpha/processor.h
index 059780a7d3d7..bb1a7a3abb8b 100644
--- a/include/asm-alpha/processor.h
+++ b/include/asm-alpha/processor.h
@@ -77,7 +77,6 @@ unsigned long get_wchan(struct task_struct *p);
77#define spin_lock_prefetch(lock) do { } while (0) 77#define spin_lock_prefetch(lock) do { } while (0)
78#endif 78#endif
79 79
80#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
81extern inline void prefetch(const void *ptr) 80extern inline void prefetch(const void *ptr)
82{ 81{
83 __builtin_prefetch(ptr, 0, 3); 82 __builtin_prefetch(ptr, 0, 3);
@@ -95,24 +94,4 @@ extern inline void spin_lock_prefetch(const void *ptr)
95} 94}
96#endif 95#endif
97 96
98#else
99extern inline void prefetch(const void *ptr)
100{
101 __asm__ ("ldl $31,%0" : : "m"(*(char *)ptr));
102}
103
104extern inline void prefetchw(const void *ptr)
105{
106 __asm__ ("ldq $31,%0" : : "m"(*(char *)ptr));
107}
108
109#ifdef CONFIG_SMP
110extern inline void spin_lock_prefetch(const void *ptr)
111{
112 __asm__ ("ldq $31,%0" : : "m"(*(char *)ptr));
113}
114#endif
115
116#endif /* GCC 3.1 */
117
118#endif /* __ASM_ALPHA_PROCESSOR_H */ 97#endif /* __ASM_ALPHA_PROCESSOR_H */
diff --git a/include/asm-arm/cache.h b/include/asm-arm/cache.h
index 8d161f7c87ff..31332c8ac04e 100644
--- a/include/asm-arm/cache.h
+++ b/include/asm-arm/cache.h
@@ -7,9 +7,4 @@
7#define L1_CACHE_SHIFT 5 7#define L1_CACHE_SHIFT 5
8#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) 8#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
9 9
10/*
11 * largest L1 which this arch supports
12 */
13#define L1_CACHE_SHIFT_MAX 5
14
15#endif 10#endif
diff --git a/include/asm-arm/futex.h b/include/asm-arm/futex.h
index 9feff4ce1424..6a332a9f099c 100644
--- a/include/asm-arm/futex.h
+++ b/include/asm-arm/futex.h
@@ -1,53 +1,6 @@
1#ifndef _ASM_FUTEX_H 1#ifndef _ASM_FUTEX_H
2#define _ASM_FUTEX_H 2#define _ASM_FUTEX_H
3 3
4#ifdef __KERNEL__ 4#include <asm-generic/futex.h>
5 5
6#include <linux/futex.h>
7#include <asm/errno.h>
8#include <asm/uaccess.h>
9
10static inline int
11futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
12{
13 int op = (encoded_op >> 28) & 7;
14 int cmp = (encoded_op >> 24) & 15;
15 int oparg = (encoded_op << 8) >> 20;
16 int cmparg = (encoded_op << 20) >> 20;
17 int oldval = 0, ret;
18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
19 oparg = 1 << oparg;
20
21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
22 return -EFAULT;
23
24 inc_preempt_count();
25
26 switch (op) {
27 case FUTEX_OP_SET:
28 case FUTEX_OP_ADD:
29 case FUTEX_OP_OR:
30 case FUTEX_OP_ANDN:
31 case FUTEX_OP_XOR:
32 default:
33 ret = -ENOSYS;
34 }
35
36 dec_preempt_count();
37
38 if (!ret) {
39 switch (cmp) {
40 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
41 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
42 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
43 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
44 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
45 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
46 default: ret = -ENOSYS;
47 }
48 }
49 return ret;
50}
51
52#endif
53#endif 6#endif
diff --git a/include/asm-arm/irq.h b/include/asm-arm/irq.h
index 59975ee43cf1..7772432d3fd7 100644
--- a/include/asm-arm/irq.h
+++ b/include/asm-arm/irq.h
@@ -25,10 +25,14 @@ extern void disable_irq_nosync(unsigned int);
25extern void disable_irq(unsigned int); 25extern void disable_irq(unsigned int);
26extern void enable_irq(unsigned int); 26extern void enable_irq(unsigned int);
27 27
28#define __IRQT_FALEDGE (1 << 0) 28/*
29#define __IRQT_RISEDGE (1 << 1) 29 * These correspond with the SA_TRIGGER_* defines, and therefore the
30#define __IRQT_LOWLVL (1 << 2) 30 * IRQRESOURCE_IRQ_* defines.
31#define __IRQT_HIGHLVL (1 << 3) 31 */
32#define __IRQT_RISEDGE (1 << 0)
33#define __IRQT_FALEDGE (1 << 1)
34#define __IRQT_HIGHLVL (1 << 2)
35#define __IRQT_LOWLVL (1 << 3)
32 36
33#define IRQT_NOEDGE (0) 37#define IRQT_NOEDGE (0)
34#define IRQT_RISING (__IRQT_RISEDGE) 38#define IRQT_RISING (__IRQT_RISEDGE)
diff --git a/include/asm-arm26/futex.h b/include/asm-arm26/futex.h
index 9feff4ce1424..6a332a9f099c 100644
--- a/include/asm-arm26/futex.h
+++ b/include/asm-arm26/futex.h
@@ -1,53 +1,6 @@
1#ifndef _ASM_FUTEX_H 1#ifndef _ASM_FUTEX_H
2#define _ASM_FUTEX_H 2#define _ASM_FUTEX_H
3 3
4#ifdef __KERNEL__ 4#include <asm-generic/futex.h>
5 5
6#include <linux/futex.h>
7#include <asm/errno.h>
8#include <asm/uaccess.h>
9
10static inline int
11futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
12{
13 int op = (encoded_op >> 28) & 7;
14 int cmp = (encoded_op >> 24) & 15;
15 int oparg = (encoded_op << 8) >> 20;
16 int cmparg = (encoded_op << 20) >> 20;
17 int oldval = 0, ret;
18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
19 oparg = 1 << oparg;
20
21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
22 return -EFAULT;
23
24 inc_preempt_count();
25
26 switch (op) {
27 case FUTEX_OP_SET:
28 case FUTEX_OP_ADD:
29 case FUTEX_OP_OR:
30 case FUTEX_OP_ANDN:
31 case FUTEX_OP_XOR:
32 default:
33 ret = -ENOSYS;
34 }
35
36 dec_preempt_count();
37
38 if (!ret) {
39 switch (cmp) {
40 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
41 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
42 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
43 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
44 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
45 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
46 default: ret = -ENOSYS;
47 }
48 }
49 return ret;
50}
51
52#endif
53#endif 6#endif
diff --git a/include/asm-cris/arch-v10/cache.h b/include/asm-cris/arch-v10/cache.h
index 1d1d1ba65b1a..aea27184d2d2 100644
--- a/include/asm-cris/arch-v10/cache.h
+++ b/include/asm-cris/arch-v10/cache.h
@@ -4,6 +4,5 @@
4/* Etrax 100LX have 32-byte cache-lines. */ 4/* Etrax 100LX have 32-byte cache-lines. */
5#define L1_CACHE_BYTES 32 5#define L1_CACHE_BYTES 32
6#define L1_CACHE_SHIFT 5 6#define L1_CACHE_SHIFT 5
7#define L1_CACHE_SHIFT_MAX 5
8 7
9#endif /* _ASM_ARCH_CACHE_H */ 8#endif /* _ASM_ARCH_CACHE_H */
diff --git a/include/asm-cris/arch-v32/cache.h b/include/asm-cris/arch-v32/cache.h
index 4fed8d62ccc8..80b236b15319 100644
--- a/include/asm-cris/arch-v32/cache.h
+++ b/include/asm-cris/arch-v32/cache.h
@@ -4,6 +4,5 @@
4/* A cache-line is 32 bytes. */ 4/* A cache-line is 32 bytes. */
5#define L1_CACHE_BYTES 32 5#define L1_CACHE_BYTES 32
6#define L1_CACHE_SHIFT 5 6#define L1_CACHE_SHIFT 5
7#define L1_CACHE_SHIFT_MAX 5
8 7
9#endif /* _ASM_CRIS_ARCH_CACHE_H */ 8#endif /* _ASM_CRIS_ARCH_CACHE_H */
diff --git a/include/asm-cris/dma-mapping.h b/include/asm-cris/dma-mapping.h
index 8eff51349ae7..cbf1a98f0129 100644
--- a/include/asm-cris/dma-mapping.h
+++ b/include/asm-cris/dma-mapping.h
@@ -153,7 +153,7 @@ dma_set_mask(struct device *dev, u64 mask)
153static inline int 153static inline int
154dma_get_cache_alignment(void) 154dma_get_cache_alignment(void)
155{ 155{
156 return (1 << L1_CACHE_SHIFT_MAX); 156 return (1 << INTERNODE_CACHE_SHIFT);
157} 157}
158 158
159#define dma_is_consistent(d) (1) 159#define dma_is_consistent(d) (1)
diff --git a/include/asm-cris/futex.h b/include/asm-cris/futex.h
index 9feff4ce1424..6a332a9f099c 100644
--- a/include/asm-cris/futex.h
+++ b/include/asm-cris/futex.h
@@ -1,53 +1,6 @@
1#ifndef _ASM_FUTEX_H 1#ifndef _ASM_FUTEX_H
2#define _ASM_FUTEX_H 2#define _ASM_FUTEX_H
3 3
4#ifdef __KERNEL__ 4#include <asm-generic/futex.h>
5 5
6#include <linux/futex.h>
7#include <asm/errno.h>
8#include <asm/uaccess.h>
9
10static inline int
11futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
12{
13 int op = (encoded_op >> 28) & 7;
14 int cmp = (encoded_op >> 24) & 15;
15 int oparg = (encoded_op << 8) >> 20;
16 int cmparg = (encoded_op << 20) >> 20;
17 int oldval = 0, ret;
18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
19 oparg = 1 << oparg;
20
21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
22 return -EFAULT;
23
24 inc_preempt_count();
25
26 switch (op) {
27 case FUTEX_OP_SET:
28 case FUTEX_OP_ADD:
29 case FUTEX_OP_OR:
30 case FUTEX_OP_ANDN:
31 case FUTEX_OP_XOR:
32 default:
33 ret = -ENOSYS;
34 }
35
36 dec_preempt_count();
37
38 if (!ret) {
39 switch (cmp) {
40 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
41 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
42 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
43 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
44 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
45 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
46 default: ret = -ENOSYS;
47 }
48 }
49 return ret;
50}
51
52#endif
53#endif 6#endif
diff --git a/include/asm-frv/atomic.h b/include/asm-frv/atomic.h
index 3f54fea2b051..9c9e9499cfd8 100644
--- a/include/asm-frv/atomic.h
+++ b/include/asm-frv/atomic.h
@@ -218,51 +218,12 @@ extern unsigned long atomic_test_and_XOR_mask(unsigned long mask, volatile unsig
218 __typeof__(*(ptr)) __xg_orig; \ 218 __typeof__(*(ptr)) __xg_orig; \
219 \ 219 \
220 switch (sizeof(__xg_orig)) { \ 220 switch (sizeof(__xg_orig)) { \
221 case 1: \
222 asm volatile( \
223 "0: \n" \
224 " orcc gr0,gr0,gr0,icc3 \n" \
225 " ckeq icc3,cc7 \n" \
226 " ldub.p %M0,%1 \n" \
227 " orcr cc7,cc7,cc3 \n" \
228 " cstb.p %2,%M0 ,cc3,#1 \n" \
229 " corcc gr29,gr29,gr0 ,cc3,#1 \n" \
230 " beq icc3,#0,0b \n" \
231 : "+U"(*__xg_ptr), "=&r"(__xg_orig) \
232 : "r"(x) \
233 : "memory", "cc7", "cc3", "icc3" \
234 ); \
235 break; \
236 \
237 case 2: \
238 asm volatile( \
239 "0: \n" \
240 " orcc gr0,gr0,gr0,icc3 \n" \
241 " ckeq icc3,cc7 \n" \
242 " lduh.p %M0,%1 \n" \
243 " orcr cc7,cc7,cc3 \n" \
244 " csth.p %2,%M0 ,cc3,#1 \n" \
245 " corcc gr29,gr29,gr0 ,cc3,#1 \n" \
246 " beq icc3,#0,0b \n" \
247 : "+U"(*__xg_ptr), "=&r"(__xg_orig) \
248 : "r"(x) \
249 : "memory", "cc7", "cc3", "icc3" \
250 ); \
251 break; \
252 \
253 case 4: \ 221 case 4: \
254 asm volatile( \ 222 asm volatile( \
255 "0: \n" \ 223 "swap%I0 %2,%M0" \
256 " orcc gr0,gr0,gr0,icc3 \n" \ 224 : "+m"(*__xg_ptr), "=&r"(__xg_orig) \
257 " ckeq icc3,cc7 \n" \
258 " ld.p %M0,%1 \n" \
259 " orcr cc7,cc7,cc3 \n" \
260 " cst.p %2,%M0 ,cc3,#1 \n" \
261 " corcc gr29,gr29,gr0 ,cc3,#1 \n" \
262 " beq icc3,#0,0b \n" \
263 : "+U"(*__xg_ptr), "=&r"(__xg_orig) \
264 : "r"(x) \ 225 : "r"(x) \
265 : "memory", "cc7", "cc3", "icc3" \ 226 : "memory" \
266 ); \ 227 ); \
267 break; \ 228 break; \
268 \ 229 \
@@ -277,8 +238,6 @@ extern unsigned long atomic_test_and_XOR_mask(unsigned long mask, volatile unsig
277 238
278#else 239#else
279 240
280extern uint8_t __xchg_8 (uint8_t i, volatile void *v);
281extern uint16_t __xchg_16(uint16_t i, volatile void *v);
282extern uint32_t __xchg_32(uint32_t i, volatile void *v); 241extern uint32_t __xchg_32(uint32_t i, volatile void *v);
283 242
284#define xchg(ptr, x) \ 243#define xchg(ptr, x) \
@@ -287,8 +246,6 @@ extern uint32_t __xchg_32(uint32_t i, volatile void *v);
287 __typeof__(*(ptr)) __xg_orig; \ 246 __typeof__(*(ptr)) __xg_orig; \
288 \ 247 \
289 switch (sizeof(__xg_orig)) { \ 248 switch (sizeof(__xg_orig)) { \
290 case 1: __xg_orig = (__typeof__(*(ptr))) __xchg_8 ((uint8_t) x, __xg_ptr); break; \
291 case 2: __xg_orig = (__typeof__(*(ptr))) __xchg_16((uint16_t) x, __xg_ptr); break; \
292 case 4: __xg_orig = (__typeof__(*(ptr))) __xchg_32((uint32_t) x, __xg_ptr); break; \ 249 case 4: __xg_orig = (__typeof__(*(ptr))) __xchg_32((uint32_t) x, __xg_ptr); break; \
293 default: \ 250 default: \
294 __xg_orig = 0; \ 251 __xg_orig = 0; \
@@ -318,46 +275,6 @@ extern uint32_t __xchg_32(uint32_t i, volatile void *v);
318 __typeof__(*(ptr)) __xg_new = (new); \ 275 __typeof__(*(ptr)) __xg_new = (new); \
319 \ 276 \
320 switch (sizeof(__xg_orig)) { \ 277 switch (sizeof(__xg_orig)) { \
321 case 1: \
322 asm volatile( \
323 "0: \n" \
324 " orcc gr0,gr0,gr0,icc3 \n" \
325 " ckeq icc3,cc7 \n" \
326 " ldub.p %M0,%1 \n" \
327 " orcr cc7,cc7,cc3 \n" \
328 " sub%I4 %1,%4,%2 \n" \
329 " sllcc %2,#24,gr0,icc0 \n" \
330 " bne icc0,#0,1f \n" \
331 " cstb.p %3,%M0 ,cc3,#1 \n" \
332 " corcc gr29,gr29,gr0 ,cc3,#1 \n" \
333 " beq icc3,#0,0b \n" \
334 "1: \n" \
335 : "+U"(*__xg_ptr), "=&r"(__xg_orig), "=&r"(__xg_tmp) \
336 : "r"(__xg_new), "NPr"(__xg_test) \
337 : "memory", "cc7", "cc3", "icc3", "icc0" \
338 ); \
339 break; \
340 \
341 case 2: \
342 asm volatile( \
343 "0: \n" \
344 " orcc gr0,gr0,gr0,icc3 \n" \
345 " ckeq icc3,cc7 \n" \
346 " lduh.p %M0,%1 \n" \
347 " orcr cc7,cc7,cc3 \n" \
348 " sub%I4 %1,%4,%2 \n" \
349 " sllcc %2,#16,gr0,icc0 \n" \
350 " bne icc0,#0,1f \n" \
351 " csth.p %3,%M0 ,cc3,#1 \n" \
352 " corcc gr29,gr29,gr0 ,cc3,#1 \n" \
353 " beq icc3,#0,0b \n" \
354 "1: \n" \
355 : "+U"(*__xg_ptr), "=&r"(__xg_orig), "=&r"(__xg_tmp) \
356 : "r"(__xg_new), "NPr"(__xg_test) \
357 : "memory", "cc7", "cc3", "icc3", "icc0" \
358 ); \
359 break; \
360 \
361 case 4: \ 278 case 4: \
362 asm volatile( \ 279 asm volatile( \
363 "0: \n" \ 280 "0: \n" \
@@ -388,8 +305,6 @@ extern uint32_t __xchg_32(uint32_t i, volatile void *v);
388 305
389#else 306#else
390 307
391extern uint8_t __cmpxchg_8 (uint8_t *v, uint8_t test, uint8_t new);
392extern uint16_t __cmpxchg_16(uint16_t *v, uint16_t test, uint16_t new);
393extern uint32_t __cmpxchg_32(uint32_t *v, uint32_t test, uint32_t new); 308extern uint32_t __cmpxchg_32(uint32_t *v, uint32_t test, uint32_t new);
394 309
395#define cmpxchg(ptr, test, new) \ 310#define cmpxchg(ptr, test, new) \
@@ -400,8 +315,6 @@ extern uint32_t __cmpxchg_32(uint32_t *v, uint32_t test, uint32_t new);
400 __typeof__(*(ptr)) __xg_new = (new); \ 315 __typeof__(*(ptr)) __xg_new = (new); \
401 \ 316 \
402 switch (sizeof(__xg_orig)) { \ 317 switch (sizeof(__xg_orig)) { \
403 case 1: __xg_orig = __cmpxchg_8 (__xg_ptr, __xg_test, __xg_new); break; \
404 case 2: __xg_orig = __cmpxchg_16(__xg_ptr, __xg_test, __xg_new); break; \
405 case 4: __xg_orig = __cmpxchg_32(__xg_ptr, __xg_test, __xg_new); break; \ 318 case 4: __xg_orig = __cmpxchg_32(__xg_ptr, __xg_test, __xg_new); break; \
406 default: \ 319 default: \
407 __xg_orig = 0; \ 320 __xg_orig = 0; \
@@ -414,7 +327,7 @@ extern uint32_t __cmpxchg_32(uint32_t *v, uint32_t test, uint32_t new);
414 327
415#endif 328#endif
416 329
417#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new)) 330#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new))
418 331
419#define atomic_add_unless(v, a, u) \ 332#define atomic_add_unless(v, a, u) \
420({ \ 333({ \
@@ -424,6 +337,7 @@ extern uint32_t __cmpxchg_32(uint32_t *v, uint32_t test, uint32_t new);
424 c = old; \ 337 c = old; \
425 c != (u); \ 338 c != (u); \
426}) 339})
340
427#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) 341#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
428 342
429#include <asm-generic/atomic.h> 343#include <asm-generic/atomic.h>
diff --git a/include/asm-frv/bug.h b/include/asm-frv/bug.h
index 074c0d5770eb..451712cc3060 100644
--- a/include/asm-frv/bug.h
+++ b/include/asm-frv/bug.h
@@ -12,6 +12,7 @@
12#define _ASM_BUG_H 12#define _ASM_BUG_H
13 13
14#include <linux/config.h> 14#include <linux/config.h>
15#include <linux/linkage.h>
15 16
16#ifdef CONFIG_BUG 17#ifdef CONFIG_BUG
17/* 18/*
diff --git a/include/asm-frv/dma-mapping.h b/include/asm-frv/dma-mapping.h
index 5003e017fd1e..e9fc1d47797e 100644
--- a/include/asm-frv/dma-mapping.h
+++ b/include/asm-frv/dma-mapping.h
@@ -23,7 +23,7 @@ void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t
23 * returns, or alternatively stop on the first sg_dma_len(sg) which 23 * returns, or alternatively stop on the first sg_dma_len(sg) which
24 * is 0. 24 * is 0.
25 */ 25 */
26#define sg_dma_address(sg) ((unsigned long) (page_to_phys((sg)->page) + (sg)->offset)) 26#define sg_dma_address(sg) ((sg)->dma_address)
27#define sg_dma_len(sg) ((sg)->length) 27#define sg_dma_len(sg) ((sg)->length)
28 28
29/* 29/*
diff --git a/include/asm-frv/io.h b/include/asm-frv/io.h
index 48829f727242..075369b1a34b 100644
--- a/include/asm-frv/io.h
+++ b/include/asm-frv/io.h
@@ -18,6 +18,7 @@
18#ifdef __KERNEL__ 18#ifdef __KERNEL__
19 19
20#include <linux/config.h> 20#include <linux/config.h>
21#include <linux/types.h>
21#include <asm/virtconvert.h> 22#include <asm/virtconvert.h>
22#include <asm/string.h> 23#include <asm/string.h>
23#include <asm/mb-regs.h> 24#include <asm/mb-regs.h>
@@ -104,6 +105,8 @@ static inline void __insl(unsigned long addr, void *buf, int len, int swap)
104 __insl_sw(addr, buf, len); 105 __insl_sw(addr, buf, len);
105} 106}
106 107
108#define mmiowb() mb()
109
107/* 110/*
108 * make the short names macros so specific devices 111 * make the short names macros so specific devices
109 * can override them as required 112 * can override them as required
@@ -209,6 +212,10 @@ static inline uint32_t readl(const volatile void __iomem *addr)
209 return ret; 212 return ret;
210} 213}
211 214
215#define readb_relaxed readb
216#define readw_relaxed readw
217#define readl_relaxed readl
218
212static inline void writeb(uint8_t datum, volatile void __iomem *addr) 219static inline void writeb(uint8_t datum, volatile void __iomem *addr)
213{ 220{
214 __builtin_write8((volatile uint8_t __force *) addr, datum); 221 __builtin_write8((volatile uint8_t __force *) addr, datum);
@@ -268,11 +275,106 @@ static inline void __iomem *ioremap_fullcache(unsigned long physaddr, unsigned l
268 275
269extern void iounmap(void __iomem *addr); 276extern void iounmap(void __iomem *addr);
270 277
278static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
279{
280 return (void __iomem *) port;
281}
282
283static inline void ioport_unmap(void __iomem *p)
284{
285}
286
271static inline void flush_write_buffers(void) 287static inline void flush_write_buffers(void)
272{ 288{
273 __asm__ __volatile__ ("membar" : : :"memory"); 289 __asm__ __volatile__ ("membar" : : :"memory");
274} 290}
275 291
292/*
293 * do appropriate I/O accesses for token type
294 */
295static inline unsigned int ioread8(void __iomem *p)
296{
297 return __builtin_read8(p);
298}
299
300static inline unsigned int ioread16(void __iomem *p)
301{
302 uint16_t ret = __builtin_read16(p);
303 if (__is_PCI_addr(p))
304 ret = _swapw(ret);
305 return ret;
306}
307
308static inline unsigned int ioread32(void __iomem *p)
309{
310 uint32_t ret = __builtin_read32(p);
311 if (__is_PCI_addr(p))
312 ret = _swapl(ret);
313 return ret;
314}
315
316static inline void iowrite8(u8 val, void __iomem *p)
317{
318 __builtin_write8(p, val);
319 if (__is_PCI_MEM(p))
320 __flush_PCI_writes();
321}
322
323static inline void iowrite16(u16 val, void __iomem *p)
324{
325 if (__is_PCI_addr(p))
326 val = _swapw(val);
327 __builtin_write16(p, val);
328 if (__is_PCI_MEM(p))
329 __flush_PCI_writes();
330}
331
332static inline void iowrite32(u32 val, void __iomem *p)
333{
334 if (__is_PCI_addr(p))
335 val = _swapl(val);
336 __builtin_write32(p, val);
337 if (__is_PCI_MEM(p))
338 __flush_PCI_writes();
339}
340
341static inline void ioread8_rep(void __iomem *p, void *dst, unsigned long count)
342{
343 io_insb((unsigned long) p, dst, count);
344}
345
346static inline void ioread16_rep(void __iomem *p, void *dst, unsigned long count)
347{
348 io_insw((unsigned long) p, dst, count);
349}
350
351static inline void ioread32_rep(void __iomem *p, void *dst, unsigned long count)
352{
353 __insl_ns((unsigned long) p, dst, count);
354}
355
356static inline void iowrite8_rep(void __iomem *p, const void *src, unsigned long count)
357{
358 io_outsb((unsigned long) p, src, count);
359}
360
361static inline void iowrite16_rep(void __iomem *p, const void *src, unsigned long count)
362{
363 io_outsw((unsigned long) p, src, count);
364}
365
366static inline void iowrite32_rep(void __iomem *p, const void *src, unsigned long count)
367{
368 __outsl_ns((unsigned long) p, src, count);
369}
370
371/* Create a virtual mapping cookie for a PCI BAR (memory or IO) */
372struct pci_dev;
373extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
374static inline void pci_iounmap(struct pci_dev *dev, void __iomem *p)
375{
376}
377
276 378
277/* 379/*
278 * Convert a physical pointer to a virtual kernel pointer for /dev/mem 380 * Convert a physical pointer to a virtual kernel pointer for /dev/mem
@@ -285,6 +387,27 @@ static inline void flush_write_buffers(void)
285 */ 387 */
286#define xlate_dev_kmem_ptr(p) p 388#define xlate_dev_kmem_ptr(p) p
287 389
390/*
391 * Check BIOS signature
392 */
393static inline int check_signature(volatile void __iomem *io_addr,
394 const unsigned char *signature, int length)
395{
396 int retval = 0;
397
398 do {
399 if (readb(io_addr) != *signature)
400 goto out;
401 io_addr++;
402 signature++;
403 length--;
404 } while (length);
405
406 retval = 1;
407out:
408 return retval;
409}
410
288#endif /* __KERNEL__ */ 411#endif /* __KERNEL__ */
289 412
290#endif /* _ASM_IO_H */ 413#endif /* _ASM_IO_H */
diff --git a/include/asm-frv/mb-regs.h b/include/asm-frv/mb-regs.h
index c8f575fc42fa..93fa732fb0cd 100644
--- a/include/asm-frv/mb-regs.h
+++ b/include/asm-frv/mb-regs.h
@@ -68,6 +68,9 @@ do { \
68#define __is_PCI_MEM(addr) \ 68#define __is_PCI_MEM(addr) \
69 ((unsigned long)(addr) - __region_PCI_MEM < 0x08000000UL) 69 ((unsigned long)(addr) - __region_PCI_MEM < 0x08000000UL)
70 70
71#define __is_PCI_addr(addr) \
72 ((unsigned long)(addr) - __region_PCI_IO < 0x0c000000UL)
73
71#define __get_CLKSW() ({ *(volatile unsigned long *)(__region_CS2 + 0x0130000cUL) & 0xffUL; }) 74#define __get_CLKSW() ({ *(volatile unsigned long *)(__region_CS2 + 0x0130000cUL) & 0xffUL; })
72#define __get_CLKIN() (__get_CLKSW() * 125U * 100000U / 24U) 75#define __get_CLKIN() (__get_CLKSW() * 125U * 100000U / 24U)
73 76
@@ -149,6 +152,7 @@ do { \
149 152
150#define __is_PCI_IO(addr) 0 /* no PCI */ 153#define __is_PCI_IO(addr) 0 /* no PCI */
151#define __is_PCI_MEM(addr) 0 154#define __is_PCI_MEM(addr) 0
155#define __is_PCI_addr(addr) 0
152#define __region_PCI_IO 0 156#define __region_PCI_IO 0
153#define __region_PCI_MEM 0 157#define __region_PCI_MEM 0
154#define __flush_PCI_writes() do { } while(0) 158#define __flush_PCI_writes() do { } while(0)
diff --git a/include/asm-frv/mc146818rtc.h b/include/asm-frv/mc146818rtc.h
new file mode 100644
index 000000000000..90dfb7a633d1
--- /dev/null
+++ b/include/asm-frv/mc146818rtc.h
@@ -0,0 +1,16 @@
1/* mc146818rtc.h: RTC defs
2 *
3 * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef _ASM_MC146818RTC_H
13#define _ASM_MC146818RTC_H
14
15
16#endif /* _ASM_MC146818RTC_H */
diff --git a/include/asm-frv/module.h b/include/asm-frv/module.h
index 3223cfaef743..3d5c6360289a 100644
--- a/include/asm-frv/module.h
+++ b/include/asm-frv/module.h
@@ -11,10 +11,18 @@
11#ifndef _ASM_MODULE_H 11#ifndef _ASM_MODULE_H
12#define _ASM_MODULE_H 12#define _ASM_MODULE_H
13 13
14#define module_map(x) vmalloc(x) 14struct mod_arch_specific
15#define module_unmap(x) vfree(x) 15{
16#define module_arch_init(x) (0) 16};
17#define arch_init_modules(x) do { } while (0) 17
18#define Elf_Shdr Elf32_Shdr
19#define Elf_Sym Elf32_Sym
20#define Elf_Ehdr Elf32_Ehdr
21
22/*
23 * Include the architecture version.
24 */
25#define MODULE_ARCH_VERMAGIC __stringify(PROCESSOR_MODEL_NAME) " "
18 26
19#endif /* _ASM_MODULE_H */ 27#endif /* _ASM_MODULE_H */
20 28
diff --git a/include/asm-frv/pci.h b/include/asm-frv/pci.h
index 1168451c275f..598b0c6b695d 100644
--- a/include/asm-frv/pci.h
+++ b/include/asm-frv/pci.h
@@ -57,6 +57,14 @@ extern void pci_free_consistent(struct pci_dev *hwdev, size_t size,
57 */ 57 */
58#define PCI_DMA_BUS_IS_PHYS (1) 58#define PCI_DMA_BUS_IS_PHYS (1)
59 59
60/* pci_unmap_{page,single} is a nop so... */
61#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
62#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
63#define pci_unmap_addr(PTR, ADDR_NAME) (0)
64#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0)
65#define pci_unmap_len(PTR, LEN_NAME) (0)
66#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0)
67
60#ifdef CONFIG_PCI 68#ifdef CONFIG_PCI
61static inline void pci_dma_burst_advice(struct pci_dev *pdev, 69static inline void pci_dma_burst_advice(struct pci_dev *pdev,
62 enum pci_dma_burst_strategy *strat, 70 enum pci_dma_burst_strategy *strat,
diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h
index 844666377dcb..d1c3b182c691 100644
--- a/include/asm-frv/pgtable.h
+++ b/include/asm-frv/pgtable.h
@@ -421,6 +421,11 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
421} 421}
422 422
423/* 423/*
424 * Macro to mark a page protection value as "uncacheable"
425 */
426#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) | _PAGE_NOCACHE))
427
428/*
424 * Conversion functions: convert a page and protection to a page entry, 429 * Conversion functions: convert a page and protection to a page entry,
425 * and a page entry and page directory to the page they refer to. 430 * and a page entry and page directory to the page they refer to.
426 */ 431 */
diff --git a/include/asm-frv/types.h b/include/asm-frv/types.h
index 50605df6d8ac..2560f596a75d 100644
--- a/include/asm-frv/types.h
+++ b/include/asm-frv/types.h
@@ -59,7 +59,6 @@ typedef unsigned int u32;
59 59
60typedef signed long long s64; 60typedef signed long long s64;
61typedef unsigned long long u64; 61typedef unsigned long long u64;
62typedef u64 u_quad_t;
63 62
64/* Dma addresses are 32-bits wide. */ 63/* Dma addresses are 32-bits wide. */
65 64
diff --git a/include/asm-frv/uaccess.h b/include/asm-frv/uaccess.h
index 991b50fbba24..b6bcbe01f6ee 100644
--- a/include/asm-frv/uaccess.h
+++ b/include/asm-frv/uaccess.h
@@ -180,16 +180,16 @@ do { \
180 \ 180 \
181 switch (sizeof(*(ptr))) { \ 181 switch (sizeof(*(ptr))) { \
182 case 1: \ 182 case 1: \
183 __get_user_asm(__gu_err, __gu_val, ptr, "ub", "=r"); \ 183 __get_user_asm(__gu_err, *(u8*)&__gu_val, ptr, "ub", "=r"); \
184 break; \ 184 break; \
185 case 2: \ 185 case 2: \
186 __get_user_asm(__gu_err, __gu_val, ptr, "uh", "=r"); \ 186 __get_user_asm(__gu_err, *(u16*)&__gu_val, ptr, "uh", "=r"); \
187 break; \ 187 break; \
188 case 4: \ 188 case 4: \
189 __get_user_asm(__gu_err, __gu_val, ptr, "", "=r"); \ 189 __get_user_asm(__gu_err, *(u32*)&__gu_val, ptr, "", "=r"); \
190 break; \ 190 break; \
191 case 8: \ 191 case 8: \
192 __get_user_asm(__gu_err, __gu_val, ptr, "d", "=e"); \ 192 __get_user_asm(__gu_err, *(u64*)&__gu_val, ptr, "d", "=e"); \
193 break; \ 193 break; \
194 default: \ 194 default: \
195 __gu_err = __get_user_bad(); \ 195 __gu_err = __get_user_bad(); \
diff --git a/include/asm-frv/unistd.h b/include/asm-frv/unistd.h
index 5cf989b448d5..cde376a7a857 100644
--- a/include/asm-frv/unistd.h
+++ b/include/asm-frv/unistd.h
@@ -313,7 +313,7 @@ do { \
313 unsigned long __sr2 = (res); \ 313 unsigned long __sr2 = (res); \
314 if (__builtin_expect(__sr2 >= (unsigned long)(-4095), 0)) { \ 314 if (__builtin_expect(__sr2 >= (unsigned long)(-4095), 0)) { \
315 errno = (-__sr2); \ 315 errno = (-__sr2); \
316 __sr2 = ULONG_MAX; \ 316 __sr2 = ~0UL; \
317 } \ 317 } \
318 return (type) __sr2; \ 318 return (type) __sr2; \
319} while (0) 319} while (0)
diff --git a/include/asm-frv/vga.h b/include/asm-frv/vga.h
new file mode 100644
index 000000000000..a702c800a229
--- /dev/null
+++ b/include/asm-frv/vga.h
@@ -0,0 +1,17 @@
1/* vga.h: VGA register stuff
2 *
3 * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef _ASM_VGA_H
13#define _ASM_VGA_H
14
15
16
17#endif /* _ASM_VGA_H */
diff --git a/include/asm-frv/xor.h b/include/asm-frv/xor.h
new file mode 100644
index 000000000000..c82eb12a5b18
--- /dev/null
+++ b/include/asm-frv/xor.h
@@ -0,0 +1 @@
#include <asm-generic/xor.h>
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index e0a28b925ef0..0fada8f16dc6 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -8,6 +8,7 @@
8 * edit all arch specific atomic.h files. 8 * edit all arch specific atomic.h files.
9 */ 9 */
10 10
11#include <asm/types.h>
11 12
12/* 13/*
13 * Suppport for atomic_long_t 14 * Suppport for atomic_long_t
diff --git a/include/asm-generic/dma-mapping.h b/include/asm-generic/dma-mapping.h
index 747d790295f3..1b356207712c 100644
--- a/include/asm-generic/dma-mapping.h
+++ b/include/asm-generic/dma-mapping.h
@@ -274,7 +274,7 @@ dma_get_cache_alignment(void)
274{ 274{
275 /* no easy way to get cache size on all processors, so return 275 /* no easy way to get cache size on all processors, so return
276 * the maximum possible, to be safe */ 276 * the maximum possible, to be safe */
277 return (1 << L1_CACHE_SHIFT_MAX); 277 return (1 << INTERNODE_CACHE_SHIFT);
278} 278}
279 279
280static inline void 280static inline void
diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h
new file mode 100644
index 000000000000..3ae2c7347549
--- /dev/null
+++ b/include/asm-generic/futex.h
@@ -0,0 +1,53 @@
1#ifndef _ASM_GENERIC_FUTEX_H
2#define _ASM_GENERIC_FUTEX_H
3
4#ifdef __KERNEL__
5
6#include <linux/futex.h>
7#include <asm/errno.h>
8#include <asm/uaccess.h>
9
10static inline int
11futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
12{
13 int op = (encoded_op >> 28) & 7;
14 int cmp = (encoded_op >> 24) & 15;
15 int oparg = (encoded_op << 8) >> 20;
16 int cmparg = (encoded_op << 20) >> 20;
17 int oldval = 0, ret;
18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
19 oparg = 1 << oparg;
20
21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
22 return -EFAULT;
23
24 inc_preempt_count();
25
26 switch (op) {
27 case FUTEX_OP_SET:
28 case FUTEX_OP_ADD:
29 case FUTEX_OP_OR:
30 case FUTEX_OP_ANDN:
31 case FUTEX_OP_XOR:
32 default:
33 ret = -ENOSYS;
34 }
35
36 dec_preempt_count();
37
38 if (!ret) {
39 switch (cmp) {
40 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
41 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
42 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
43 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
44 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
45 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
46 default: ret = -ENOSYS;
47 }
48 }
49 return ret;
50}
51
52#endif
53#endif
diff --git a/include/asm-h8300/futex.h b/include/asm-h8300/futex.h
index 9feff4ce1424..6a332a9f099c 100644
--- a/include/asm-h8300/futex.h
+++ b/include/asm-h8300/futex.h
@@ -1,53 +1,6 @@
1#ifndef _ASM_FUTEX_H 1#ifndef _ASM_FUTEX_H
2#define _ASM_FUTEX_H 2#define _ASM_FUTEX_H
3 3
4#ifdef __KERNEL__ 4#include <asm-generic/futex.h>
5 5
6#include <linux/futex.h>
7#include <asm/errno.h>
8#include <asm/uaccess.h>
9
10static inline int
11futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
12{
13 int op = (encoded_op >> 28) & 7;
14 int cmp = (encoded_op >> 24) & 15;
15 int oparg = (encoded_op << 8) >> 20;
16 int cmparg = (encoded_op << 20) >> 20;
17 int oldval = 0, ret;
18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
19 oparg = 1 << oparg;
20
21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
22 return -EFAULT;
23
24 inc_preempt_count();
25
26 switch (op) {
27 case FUTEX_OP_SET:
28 case FUTEX_OP_ADD:
29 case FUTEX_OP_OR:
30 case FUTEX_OP_ANDN:
31 case FUTEX_OP_XOR:
32 default:
33 ret = -ENOSYS;
34 }
35
36 dec_preempt_count();
37
38 if (!ret) {
39 switch (cmp) {
40 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
41 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
42 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
43 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
44 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
45 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
46 default: ret = -ENOSYS;
47 }
48 }
49 return ret;
50}
51
52#endif
53#endif 6#endif
diff --git a/include/asm-i386/cache.h b/include/asm-i386/cache.h
index 849788710feb..615911e5bd24 100644
--- a/include/asm-i386/cache.h
+++ b/include/asm-i386/cache.h
@@ -10,6 +10,4 @@
10#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) 10#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT)
11#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) 11#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
12 12
13#define L1_CACHE_SHIFT_MAX 7 /* largest L1 which this arch supports */
14
15#endif 13#endif
diff --git a/include/asm-i386/dma-mapping.h b/include/asm-i386/dma-mapping.h
index e56c335f8ef9..6c37a9ab8d60 100644
--- a/include/asm-i386/dma-mapping.h
+++ b/include/asm-i386/dma-mapping.h
@@ -150,7 +150,7 @@ dma_get_cache_alignment(void)
150{ 150{
151 /* no easy way to get cache size on all x86, so return the 151 /* no easy way to get cache size on all x86, so return the
152 * maximum possible, to be safe */ 152 * maximum possible, to be safe */
153 return (1 << L1_CACHE_SHIFT_MAX); 153 return (1 << INTERNODE_CACHE_SHIFT);
154} 154}
155 155
156#define dma_is_consistent(d) (1) 156#define dma_is_consistent(d) (1)
diff --git a/include/asm-i386/irq.h b/include/asm-i386/irq.h
index 270f1986b19f..5169d7af456f 100644
--- a/include/asm-i386/irq.h
+++ b/include/asm-i386/irq.h
@@ -21,8 +21,6 @@ static __inline__ int irq_canonicalize(int irq)
21 return ((irq == 2) ? 9 : irq); 21 return ((irq == 2) ? 9 : irq);
22} 22}
23 23
24extern void release_vm86_irqs(struct task_struct *);
25
26#ifdef CONFIG_X86_LOCAL_APIC 24#ifdef CONFIG_X86_LOCAL_APIC
27# define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */ 25# define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */
28#endif 26#endif
diff --git a/include/asm-i386/ptrace.h b/include/asm-i386/ptrace.h
index 7e0f2945d17d..f324c53b6f9a 100644
--- a/include/asm-i386/ptrace.h
+++ b/include/asm-i386/ptrace.h
@@ -54,6 +54,9 @@ struct pt_regs {
54#define PTRACE_GET_THREAD_AREA 25 54#define PTRACE_GET_THREAD_AREA 25
55#define PTRACE_SET_THREAD_AREA 26 55#define PTRACE_SET_THREAD_AREA 26
56 56
57#define PTRACE_SYSEMU 31
58#define PTRACE_SYSEMU_SINGLESTEP 32
59
57#ifdef __KERNEL__ 60#ifdef __KERNEL__
58 61
59#include <asm/vm86.h> 62#include <asm/vm86.h>
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index fe38b9a96233..481c3c0ea720 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -299,8 +299,9 @@
299#define __NR_inotify_init 291 299#define __NR_inotify_init 291
300#define __NR_inotify_add_watch 292 300#define __NR_inotify_add_watch 292
301#define __NR_inotify_rm_watch 293 301#define __NR_inotify_rm_watch 293
302#define __NR_migrate_pages 294
302 303
303#define NR_syscalls 294 304#define NR_syscalls 295
304 305
305/* 306/*
306 * user-visible error numbers are in the range -1 - -128: see 307 * user-visible error numbers are in the range -1 - -128: see
diff --git a/include/asm-i386/vm86.h b/include/asm-i386/vm86.h
index 40ec82c6914d..952fd6957380 100644
--- a/include/asm-i386/vm86.h
+++ b/include/asm-i386/vm86.h
@@ -16,7 +16,11 @@
16#define IF_MASK 0x00000200 16#define IF_MASK 0x00000200
17#define IOPL_MASK 0x00003000 17#define IOPL_MASK 0x00003000
18#define NT_MASK 0x00004000 18#define NT_MASK 0x00004000
19#ifdef CONFIG_VM86
19#define VM_MASK 0x00020000 20#define VM_MASK 0x00020000
21#else
22#define VM_MASK 0 /* ignored */
23#endif
20#define AC_MASK 0x00040000 24#define AC_MASK 0x00040000
21#define VIF_MASK 0x00080000 /* virtual interrupt flag */ 25#define VIF_MASK 0x00080000 /* virtual interrupt flag */
22#define VIP_MASK 0x00100000 /* virtual interrupt pending */ 26#define VIP_MASK 0x00100000 /* virtual interrupt pending */
@@ -200,9 +204,25 @@ struct kernel_vm86_struct {
200 */ 204 */
201}; 205};
202 206
207#ifdef CONFIG_VM86
208
203void handle_vm86_fault(struct kernel_vm86_regs *, long); 209void handle_vm86_fault(struct kernel_vm86_regs *, long);
204int handle_vm86_trap(struct kernel_vm86_regs *, long, int); 210int handle_vm86_trap(struct kernel_vm86_regs *, long, int);
205 211
212struct task_struct;
213void release_vm86_irqs(struct task_struct *);
214
215#else
216
217#define handle_vm86_fault(a, b)
218#define release_vm86_irqs(a)
219
220static inline int handle_vm86_trap(struct kernel_vm86_regs *a, long b, int c) {
221 return 0;
222}
223
224#endif /* CONFIG_VM86 */
225
206#endif /* __KERNEL__ */ 226#endif /* __KERNEL__ */
207 227
208#endif 228#endif
diff --git a/include/asm-ia64/bug.h b/include/asm-ia64/bug.h
index 3aa0a0a5474b..823616b5020b 100644
--- a/include/asm-ia64/bug.h
+++ b/include/asm-ia64/bug.h
@@ -2,11 +2,7 @@
2#define _ASM_IA64_BUG_H 2#define _ASM_IA64_BUG_H
3 3
4#ifdef CONFIG_BUG 4#ifdef CONFIG_BUG
5#if (__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1) 5#define ia64_abort() __builtin_trap()
6# define ia64_abort() __builtin_trap()
7#else
8# define ia64_abort() (*(volatile int *) 0 = 0)
9#endif
10#define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); ia64_abort(); } while (0) 6#define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); ia64_abort(); } while (0)
11 7
12/* should this BUG be made generic? */ 8/* should this BUG be made generic? */
diff --git a/include/asm-ia64/cache.h b/include/asm-ia64/cache.h
index 666d8f175cb3..40dd25195d65 100644
--- a/include/asm-ia64/cache.h
+++ b/include/asm-ia64/cache.h
@@ -12,8 +12,6 @@
12#define L1_CACHE_SHIFT CONFIG_IA64_L1_CACHE_SHIFT 12#define L1_CACHE_SHIFT CONFIG_IA64_L1_CACHE_SHIFT
13#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) 13#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
14 14
15#define L1_CACHE_SHIFT_MAX 7 /* largest L1 which this arch supports */
16
17#ifdef CONFIG_SMP 15#ifdef CONFIG_SMP
18# define SMP_CACHE_SHIFT L1_CACHE_SHIFT 16# define SMP_CACHE_SHIFT L1_CACHE_SHIFT
19# define SMP_CACHE_BYTES L1_CACHE_BYTES 17# define SMP_CACHE_BYTES L1_CACHE_BYTES
diff --git a/include/asm-ia64/futex.h b/include/asm-ia64/futex.h
index 9feff4ce1424..6a332a9f099c 100644
--- a/include/asm-ia64/futex.h
+++ b/include/asm-ia64/futex.h
@@ -1,53 +1,6 @@
1#ifndef _ASM_FUTEX_H 1#ifndef _ASM_FUTEX_H
2#define _ASM_FUTEX_H 2#define _ASM_FUTEX_H
3 3
4#ifdef __KERNEL__ 4#include <asm-generic/futex.h>
5 5
6#include <linux/futex.h>
7#include <asm/errno.h>
8#include <asm/uaccess.h>
9
10static inline int
11futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
12{
13 int op = (encoded_op >> 28) & 7;
14 int cmp = (encoded_op >> 24) & 15;
15 int oparg = (encoded_op << 8) >> 20;
16 int cmparg = (encoded_op << 20) >> 20;
17 int oldval = 0, ret;
18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
19 oparg = 1 << oparg;
20
21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
22 return -EFAULT;
23
24 inc_preempt_count();
25
26 switch (op) {
27 case FUTEX_OP_SET:
28 case FUTEX_OP_ADD:
29 case FUTEX_OP_OR:
30 case FUTEX_OP_ANDN:
31 case FUTEX_OP_XOR:
32 default:
33 ret = -ENOSYS;
34 }
35
36 dec_preempt_count();
37
38 if (!ret) {
39 switch (cmp) {
40 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
41 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
42 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
43 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
44 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
45 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
46 default: ret = -ENOSYS;
47 }
48 }
49 return ret;
50}
51
52#endif
53#endif 6#endif
diff --git a/include/asm-ia64/io.h b/include/asm-ia64/io.h
index cf772a67f858..b64fdb985494 100644
--- a/include/asm-ia64/io.h
+++ b/include/asm-ia64/io.h
@@ -89,6 +89,7 @@ phys_to_virt (unsigned long address)
89 89
90#define ARCH_HAS_VALID_PHYS_ADDR_RANGE 90#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
91extern int valid_phys_addr_range (unsigned long addr, size_t *count); /* efi.c */ 91extern int valid_phys_addr_range (unsigned long addr, size_t *count); /* efi.c */
92extern int valid_mmap_phys_addr_range (unsigned long addr, size_t *count);
92 93
93/* 94/*
94 * The following two macros are deprecated and scheduled for removal. 95 * The following two macros are deprecated and scheduled for removal.
diff --git a/include/asm-ia64/spinlock.h b/include/asm-ia64/spinlock.h
index 0c91a76c5ea3..9e83210dc312 100644
--- a/include/asm-ia64/spinlock.h
+++ b/include/asm-ia64/spinlock.h
@@ -34,7 +34,7 @@ __raw_spin_lock_flags (raw_spinlock_t *lock, unsigned long flags)
34{ 34{
35 register volatile unsigned int *ptr asm ("r31") = &lock->lock; 35 register volatile unsigned int *ptr asm ("r31") = &lock->lock;
36 36
37#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3) 37#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
38# ifdef CONFIG_ITANIUM 38# ifdef CONFIG_ITANIUM
39 /* don't use brl on Itanium... */ 39 /* don't use brl on Itanium... */
40 asm volatile ("{\n\t" 40 asm volatile ("{\n\t"
diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h
index 2bf543493cb8..962f9bd1bdff 100644
--- a/include/asm-ia64/unistd.h
+++ b/include/asm-ia64/unistd.h
@@ -269,12 +269,13 @@
269#define __NR_inotify_init 1277 269#define __NR_inotify_init 1277
270#define __NR_inotify_add_watch 1278 270#define __NR_inotify_add_watch 1278
271#define __NR_inotify_rm_watch 1279 271#define __NR_inotify_rm_watch 1279
272#define __NR_migrate_pages 1280
272 273
273#ifdef __KERNEL__ 274#ifdef __KERNEL__
274 275
275#include <linux/config.h> 276#include <linux/config.h>
276 277
277#define NR_syscalls 256 /* length of syscall table */ 278#define NR_syscalls 270 /* length of syscall table */
278 279
279#define __ARCH_WANT_SYS_RT_SIGACTION 280#define __ARCH_WANT_SYS_RT_SIGACTION
280 281
diff --git a/include/asm-m32r/cache.h b/include/asm-m32r/cache.h
index 724820596980..9c2b2d9998bc 100644
--- a/include/asm-m32r/cache.h
+++ b/include/asm-m32r/cache.h
@@ -7,6 +7,4 @@
7#define L1_CACHE_SHIFT 4 7#define L1_CACHE_SHIFT 4
8#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) 8#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
9 9
10#define L1_CACHE_SHIFT_MAX 4
11
12#endif /* _ASM_M32R_CACHE_H */ 10#endif /* _ASM_M32R_CACHE_H */
diff --git a/include/asm-m32r/futex.h b/include/asm-m32r/futex.h
index 9feff4ce1424..6a332a9f099c 100644
--- a/include/asm-m32r/futex.h
+++ b/include/asm-m32r/futex.h
@@ -1,53 +1,6 @@
1#ifndef _ASM_FUTEX_H 1#ifndef _ASM_FUTEX_H
2#define _ASM_FUTEX_H 2#define _ASM_FUTEX_H
3 3
4#ifdef __KERNEL__ 4#include <asm-generic/futex.h>
5 5
6#include <linux/futex.h>
7#include <asm/errno.h>
8#include <asm/uaccess.h>
9
10static inline int
11futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
12{
13 int op = (encoded_op >> 28) & 7;
14 int cmp = (encoded_op >> 24) & 15;
15 int oparg = (encoded_op << 8) >> 20;
16 int cmparg = (encoded_op << 20) >> 20;
17 int oldval = 0, ret;
18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
19 oparg = 1 << oparg;
20
21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
22 return -EFAULT;
23
24 inc_preempt_count();
25
26 switch (op) {
27 case FUTEX_OP_SET:
28 case FUTEX_OP_ADD:
29 case FUTEX_OP_OR:
30 case FUTEX_OP_ANDN:
31 case FUTEX_OP_XOR:
32 default:
33 ret = -ENOSYS;
34 }
35
36 dec_preempt_count();
37
38 if (!ret) {
39 switch (cmp) {
40 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
41 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
42 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
43 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
44 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
45 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
46 default: ret = -ENOSYS;
47 }
48 }
49 return ret;
50}
51
52#endif
53#endif 6#endif
diff --git a/include/asm-m68k/cache.h b/include/asm-m68k/cache.h
index 6161fd3d8600..fed3fd30de7e 100644
--- a/include/asm-m68k/cache.h
+++ b/include/asm-m68k/cache.h
@@ -8,6 +8,4 @@
8#define L1_CACHE_SHIFT 4 8#define L1_CACHE_SHIFT 4
9#define L1_CACHE_BYTES (1<< L1_CACHE_SHIFT) 9#define L1_CACHE_BYTES (1<< L1_CACHE_SHIFT)
10 10
11#define L1_CACHE_SHIFT_MAX 4 /* largest L1 which this arch supports */
12
13#endif 11#endif
diff --git a/include/asm-m68k/futex.h b/include/asm-m68k/futex.h
index 9feff4ce1424..6a332a9f099c 100644
--- a/include/asm-m68k/futex.h
+++ b/include/asm-m68k/futex.h
@@ -1,53 +1,6 @@
1#ifndef _ASM_FUTEX_H 1#ifndef _ASM_FUTEX_H
2#define _ASM_FUTEX_H 2#define _ASM_FUTEX_H
3 3
4#ifdef __KERNEL__ 4#include <asm-generic/futex.h>
5 5
6#include <linux/futex.h>
7#include <asm/errno.h>
8#include <asm/uaccess.h>
9
10static inline int
11futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
12{
13 int op = (encoded_op >> 28) & 7;
14 int cmp = (encoded_op >> 24) & 15;
15 int oparg = (encoded_op << 8) >> 20;
16 int cmparg = (encoded_op << 20) >> 20;
17 int oldval = 0, ret;
18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
19 oparg = 1 << oparg;
20
21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
22 return -EFAULT;
23
24 inc_preempt_count();
25
26 switch (op) {
27 case FUTEX_OP_SET:
28 case FUTEX_OP_ADD:
29 case FUTEX_OP_OR:
30 case FUTEX_OP_ANDN:
31 case FUTEX_OP_XOR:
32 default:
33 ret = -ENOSYS;
34 }
35
36 dec_preempt_count();
37
38 if (!ret) {
39 switch (cmp) {
40 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
41 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
42 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
43 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
44 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
45 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
46 default: ret = -ENOSYS;
47 }
48 }
49 return ret;
50}
51
52#endif
53#endif 6#endif
diff --git a/include/asm-m68knommu/futex.h b/include/asm-m68knommu/futex.h
index 9feff4ce1424..6a332a9f099c 100644
--- a/include/asm-m68knommu/futex.h
+++ b/include/asm-m68knommu/futex.h
@@ -1,53 +1,6 @@
1#ifndef _ASM_FUTEX_H 1#ifndef _ASM_FUTEX_H
2#define _ASM_FUTEX_H 2#define _ASM_FUTEX_H
3 3
4#ifdef __KERNEL__ 4#include <asm-generic/futex.h>
5 5
6#include <linux/futex.h>
7#include <asm/errno.h>
8#include <asm/uaccess.h>
9
10static inline int
11futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
12{
13 int op = (encoded_op >> 28) & 7;
14 int cmp = (encoded_op >> 24) & 15;
15 int oparg = (encoded_op << 8) >> 20;
16 int cmparg = (encoded_op << 20) >> 20;
17 int oldval = 0, ret;
18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
19 oparg = 1 << oparg;
20
21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
22 return -EFAULT;
23
24 inc_preempt_count();
25
26 switch (op) {
27 case FUTEX_OP_SET:
28 case FUTEX_OP_ADD:
29 case FUTEX_OP_OR:
30 case FUTEX_OP_ANDN:
31 case FUTEX_OP_XOR:
32 default:
33 ret = -ENOSYS;
34 }
35
36 dec_preempt_count();
37
38 if (!ret) {
39 switch (cmp) {
40 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
41 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
42 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
43 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
44 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
45 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
46 default: ret = -ENOSYS;
47 }
48 }
49 return ret;
50}
51
52#endif
53#endif 6#endif
diff --git a/include/asm-mips/cache.h b/include/asm-mips/cache.h
index 1a5d1a669db3..55e19f2ff0e0 100644
--- a/include/asm-mips/cache.h
+++ b/include/asm-mips/cache.h
@@ -15,7 +15,6 @@
15#define L1_CACHE_SHIFT CONFIG_MIPS_L1_CACHE_SHIFT 15#define L1_CACHE_SHIFT CONFIG_MIPS_L1_CACHE_SHIFT
16#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) 16#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
17 17
18#define L1_CACHE_SHIFT_MAX 6
19#define SMP_CACHE_SHIFT L1_CACHE_SHIFT 18#define SMP_CACHE_SHIFT L1_CACHE_SHIFT
20#define SMP_CACHE_BYTES L1_CACHE_BYTES 19#define SMP_CACHE_BYTES L1_CACHE_BYTES
21 20
diff --git a/include/asm-parisc/cache.h b/include/asm-parisc/cache.h
index 5da72e38bdde..38d201b5652d 100644
--- a/include/asm-parisc/cache.h
+++ b/include/asm-parisc/cache.h
@@ -28,7 +28,6 @@
28#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1)) 28#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))
29 29
30#define SMP_CACHE_BYTES L1_CACHE_BYTES 30#define SMP_CACHE_BYTES L1_CACHE_BYTES
31#define L1_CACHE_SHIFT_MAX 5 /* largest L1 which this arch supports */
32 31
33extern void flush_data_cache_local(void); /* flushes local data-cache only */ 32extern void flush_data_cache_local(void); /* flushes local data-cache only */
34extern void flush_instruction_cache_local(void); /* flushes local code-cache only */ 33extern void flush_instruction_cache_local(void); /* flushes local code-cache only */
diff --git a/include/asm-parisc/futex.h b/include/asm-parisc/futex.h
index 9feff4ce1424..6a332a9f099c 100644
--- a/include/asm-parisc/futex.h
+++ b/include/asm-parisc/futex.h
@@ -1,53 +1,6 @@
1#ifndef _ASM_FUTEX_H 1#ifndef _ASM_FUTEX_H
2#define _ASM_FUTEX_H 2#define _ASM_FUTEX_H
3 3
4#ifdef __KERNEL__ 4#include <asm-generic/futex.h>
5 5
6#include <linux/futex.h>
7#include <asm/errno.h>
8#include <asm/uaccess.h>
9
10static inline int
11futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
12{
13 int op = (encoded_op >> 28) & 7;
14 int cmp = (encoded_op >> 24) & 15;
15 int oparg = (encoded_op << 8) >> 20;
16 int cmparg = (encoded_op << 20) >> 20;
17 int oldval = 0, ret;
18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
19 oparg = 1 << oparg;
20
21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
22 return -EFAULT;
23
24 inc_preempt_count();
25
26 switch (op) {
27 case FUTEX_OP_SET:
28 case FUTEX_OP_ADD:
29 case FUTEX_OP_OR:
30 case FUTEX_OP_ANDN:
31 case FUTEX_OP_XOR:
32 default:
33 ret = -ENOSYS;
34 }
35
36 dec_preempt_count();
37
38 if (!ret) {
39 switch (cmp) {
40 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
41 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
42 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
43 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
44 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
45 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
46 default: ret = -ENOSYS;
47 }
48 }
49 return ret;
50}
51
52#endif
53#endif 6#endif
diff --git a/include/asm-powerpc/cache.h b/include/asm-powerpc/cache.h
index 26ce502e76e8..6379c2df5c40 100644
--- a/include/asm-powerpc/cache.h
+++ b/include/asm-powerpc/cache.h
@@ -19,7 +19,6 @@
19#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) 19#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
20 20
21#define SMP_CACHE_BYTES L1_CACHE_BYTES 21#define SMP_CACHE_BYTES L1_CACHE_BYTES
22#define L1_CACHE_SHIFT_MAX 7 /* largest L1 which this arch supports */
23 22
24#if defined(__powerpc64__) && !defined(__ASSEMBLY__) 23#if defined(__powerpc64__) && !defined(__ASSEMBLY__)
25struct ppc64_caches { 24struct ppc64_caches {
diff --git a/include/asm-powerpc/dma-mapping.h b/include/asm-powerpc/dma-mapping.h
index 59a80163f75f..a96e5742ca32 100644
--- a/include/asm-powerpc/dma-mapping.h
+++ b/include/asm-powerpc/dma-mapping.h
@@ -229,7 +229,7 @@ static inline int dma_get_cache_alignment(void)
229#ifdef CONFIG_PPC64 229#ifdef CONFIG_PPC64
230 /* no easy way to get cache size on all processors, so return 230 /* no easy way to get cache size on all processors, so return
231 * the maximum possible, to be safe */ 231 * the maximum possible, to be safe */
232 return (1 << L1_CACHE_SHIFT_MAX); 232 return (1 << INTERNODE_CACHE_SHIFT);
233#else 233#else
234 /* 234 /*
235 * Each processor family will define its own L1_CACHE_SHIFT, 235 * Each processor family will define its own L1_CACHE_SHIFT,
diff --git a/include/asm-s390/cache.h b/include/asm-s390/cache.h
index 29845378b206..e20cdd9074db 100644
--- a/include/asm-s390/cache.h
+++ b/include/asm-s390/cache.h
@@ -13,7 +13,6 @@
13 13
14#define L1_CACHE_BYTES 256 14#define L1_CACHE_BYTES 256
15#define L1_CACHE_SHIFT 8 15#define L1_CACHE_SHIFT 8
16#define L1_CACHE_SHIFT_MAX 8 /* largest L1 which this arch supports */
17 16
18#define ARCH_KMALLOC_MINALIGN 8 17#define ARCH_KMALLOC_MINALIGN 8
19 18
diff --git a/include/asm-s390/futex.h b/include/asm-s390/futex.h
index 9feff4ce1424..6a332a9f099c 100644
--- a/include/asm-s390/futex.h
+++ b/include/asm-s390/futex.h
@@ -1,53 +1,6 @@
1#ifndef _ASM_FUTEX_H 1#ifndef _ASM_FUTEX_H
2#define _ASM_FUTEX_H 2#define _ASM_FUTEX_H
3 3
4#ifdef __KERNEL__ 4#include <asm-generic/futex.h>
5 5
6#include <linux/futex.h>
7#include <asm/errno.h>
8#include <asm/uaccess.h>
9
10static inline int
11futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
12{
13 int op = (encoded_op >> 28) & 7;
14 int cmp = (encoded_op >> 24) & 15;
15 int oparg = (encoded_op << 8) >> 20;
16 int cmparg = (encoded_op << 20) >> 20;
17 int oldval = 0, ret;
18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
19 oparg = 1 << oparg;
20
21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
22 return -EFAULT;
23
24 inc_preempt_count();
25
26 switch (op) {
27 case FUTEX_OP_SET:
28 case FUTEX_OP_ADD:
29 case FUTEX_OP_OR:
30 case FUTEX_OP_ANDN:
31 case FUTEX_OP_XOR:
32 default:
33 ret = -ENOSYS;
34 }
35
36 dec_preempt_count();
37
38 if (!ret) {
39 switch (cmp) {
40 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
41 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
42 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
43 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
44 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
45 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
46 default: ret = -ENOSYS;
47 }
48 }
49 return ret;
50}
51
52#endif
53#endif 6#endif
diff --git a/include/asm-sh/cache.h b/include/asm-sh/cache.h
index 9b4dd6d8212e..656fdfe9e8b4 100644
--- a/include/asm-sh/cache.h
+++ b/include/asm-sh/cache.h
@@ -22,8 +22,6 @@
22 22
23#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1)) 23#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))
24 24
25#define L1_CACHE_SHIFT_MAX 5 /* largest L1 which this arch supports */
26
27struct cache_info { 25struct cache_info {
28 unsigned int ways; 26 unsigned int ways;
29 unsigned int sets; 27 unsigned int sets;
diff --git a/include/asm-sh/futex.h b/include/asm-sh/futex.h
index 9feff4ce1424..6a332a9f099c 100644
--- a/include/asm-sh/futex.h
+++ b/include/asm-sh/futex.h
@@ -1,53 +1,6 @@
1#ifndef _ASM_FUTEX_H 1#ifndef _ASM_FUTEX_H
2#define _ASM_FUTEX_H 2#define _ASM_FUTEX_H
3 3
4#ifdef __KERNEL__ 4#include <asm-generic/futex.h>
5 5
6#include <linux/futex.h>
7#include <asm/errno.h>
8#include <asm/uaccess.h>
9
10static inline int
11futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
12{
13 int op = (encoded_op >> 28) & 7;
14 int cmp = (encoded_op >> 24) & 15;
15 int oparg = (encoded_op << 8) >> 20;
16 int cmparg = (encoded_op << 20) >> 20;
17 int oldval = 0, ret;
18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
19 oparg = 1 << oparg;
20
21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
22 return -EFAULT;
23
24 inc_preempt_count();
25
26 switch (op) {
27 case FUTEX_OP_SET:
28 case FUTEX_OP_ADD:
29 case FUTEX_OP_OR:
30 case FUTEX_OP_ANDN:
31 case FUTEX_OP_XOR:
32 default:
33 ret = -ENOSYS;
34 }
35
36 dec_preempt_count();
37
38 if (!ret) {
39 switch (cmp) {
40 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
41 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
42 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
43 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
44 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
45 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
46 default: ret = -ENOSYS;
47 }
48 }
49 return ret;
50}
51
52#endif
53#endif 6#endif
diff --git a/include/asm-sh64/cache.h b/include/asm-sh64/cache.h
index f54e85e8a470..a4f36f0036e1 100644
--- a/include/asm-sh64/cache.h
+++ b/include/asm-sh64/cache.h
@@ -20,8 +20,6 @@
20#define L1_CACHE_ALIGN_MASK (~(L1_CACHE_BYTES - 1)) 20#define L1_CACHE_ALIGN_MASK (~(L1_CACHE_BYTES - 1))
21#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES - 1)) & L1_CACHE_ALIGN_MASK) 21#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES - 1)) & L1_CACHE_ALIGN_MASK)
22#define L1_CACHE_SIZE_BYTES (L1_CACHE_BYTES << 10) 22#define L1_CACHE_SIZE_BYTES (L1_CACHE_BYTES << 10)
23/* Largest L1 which this arch supports */
24#define L1_CACHE_SHIFT_MAX 5
25 23
26#ifdef MODULE 24#ifdef MODULE
27#define __cacheline_aligned __attribute__((__aligned__(L1_CACHE_BYTES))) 25#define __cacheline_aligned __attribute__((__aligned__(L1_CACHE_BYTES)))
diff --git a/include/asm-sh64/futex.h b/include/asm-sh64/futex.h
index 9feff4ce1424..6a332a9f099c 100644
--- a/include/asm-sh64/futex.h
+++ b/include/asm-sh64/futex.h
@@ -1,53 +1,6 @@
1#ifndef _ASM_FUTEX_H 1#ifndef _ASM_FUTEX_H
2#define _ASM_FUTEX_H 2#define _ASM_FUTEX_H
3 3
4#ifdef __KERNEL__ 4#include <asm-generic/futex.h>
5 5
6#include <linux/futex.h>
7#include <asm/errno.h>
8#include <asm/uaccess.h>
9
10static inline int
11futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
12{
13 int op = (encoded_op >> 28) & 7;
14 int cmp = (encoded_op >> 24) & 15;
15 int oparg = (encoded_op << 8) >> 20;
16 int cmparg = (encoded_op << 20) >> 20;
17 int oldval = 0, ret;
18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
19 oparg = 1 << oparg;
20
21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
22 return -EFAULT;
23
24 inc_preempt_count();
25
26 switch (op) {
27 case FUTEX_OP_SET:
28 case FUTEX_OP_ADD:
29 case FUTEX_OP_OR:
30 case FUTEX_OP_ANDN:
31 case FUTEX_OP_XOR:
32 default:
33 ret = -ENOSYS;
34 }
35
36 dec_preempt_count();
37
38 if (!ret) {
39 switch (cmp) {
40 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
41 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
42 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
43 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
44 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
45 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
46 default: ret = -ENOSYS;
47 }
48 }
49 return ret;
50}
51
52#endif
53#endif 6#endif
diff --git a/include/asm-sparc/cache.h b/include/asm-sparc/cache.h
index a10522cb21b7..cb971e88aea4 100644
--- a/include/asm-sparc/cache.h
+++ b/include/asm-sparc/cache.h
@@ -13,7 +13,6 @@
13#define L1_CACHE_SHIFT 5 13#define L1_CACHE_SHIFT 5
14#define L1_CACHE_BYTES 32 14#define L1_CACHE_BYTES 32
15#define L1_CACHE_ALIGN(x) ((((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))) 15#define L1_CACHE_ALIGN(x) ((((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1)))
16#define L1_CACHE_SHIFT_MAX 5 /* largest L1 which this arch supports */
17 16
18#define SMP_CACHE_BYTES 32 17#define SMP_CACHE_BYTES 32
19 18
diff --git a/include/asm-sparc/futex.h b/include/asm-sparc/futex.h
index 9feff4ce1424..6a332a9f099c 100644
--- a/include/asm-sparc/futex.h
+++ b/include/asm-sparc/futex.h
@@ -1,53 +1,6 @@
1#ifndef _ASM_FUTEX_H 1#ifndef _ASM_FUTEX_H
2#define _ASM_FUTEX_H 2#define _ASM_FUTEX_H
3 3
4#ifdef __KERNEL__ 4#include <asm-generic/futex.h>
5 5
6#include <linux/futex.h>
7#include <asm/errno.h>
8#include <asm/uaccess.h>
9
10static inline int
11futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
12{
13 int op = (encoded_op >> 28) & 7;
14 int cmp = (encoded_op >> 24) & 15;
15 int oparg = (encoded_op << 8) >> 20;
16 int cmparg = (encoded_op << 20) >> 20;
17 int oldval = 0, ret;
18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
19 oparg = 1 << oparg;
20
21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
22 return -EFAULT;
23
24 inc_preempt_count();
25
26 switch (op) {
27 case FUTEX_OP_SET:
28 case FUTEX_OP_ADD:
29 case FUTEX_OP_OR:
30 case FUTEX_OP_ANDN:
31 case FUTEX_OP_XOR:
32 default:
33 ret = -ENOSYS;
34 }
35
36 dec_preempt_count();
37
38 if (!ret) {
39 switch (cmp) {
40 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
41 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
42 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
43 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
44 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
45 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
46 default: ret = -ENOSYS;
47 }
48 }
49 return ret;
50}
51
52#endif
53#endif 6#endif
diff --git a/include/asm-sparc64/cache.h b/include/asm-sparc64/cache.h
index ade5ec3bfd5a..f7d35a2ae9b8 100644
--- a/include/asm-sparc64/cache.h
+++ b/include/asm-sparc64/cache.h
@@ -9,7 +9,6 @@
9#define L1_CACHE_BYTES 32 /* Two 16-byte sub-blocks per line. */ 9#define L1_CACHE_BYTES 32 /* Two 16-byte sub-blocks per line. */
10 10
11#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1)) 11#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))
12#define L1_CACHE_SHIFT_MAX 5 /* largest L1 which this arch supports */
13 12
14#define SMP_CACHE_BYTES_SHIFT 6 13#define SMP_CACHE_BYTES_SHIFT 6
15#define SMP_CACHE_BYTES (1 << SMP_CACHE_BYTES_SHIFT) /* L2 cache line size. */ 14#define SMP_CACHE_BYTES (1 << SMP_CACHE_BYTES_SHIFT) /* L2 cache line size. */
diff --git a/include/asm-sparc64/futex.h b/include/asm-sparc64/futex.h
index 9feff4ce1424..6a332a9f099c 100644
--- a/include/asm-sparc64/futex.h
+++ b/include/asm-sparc64/futex.h
@@ -1,53 +1,6 @@
1#ifndef _ASM_FUTEX_H 1#ifndef _ASM_FUTEX_H
2#define _ASM_FUTEX_H 2#define _ASM_FUTEX_H
3 3
4#ifdef __KERNEL__ 4#include <asm-generic/futex.h>
5 5
6#include <linux/futex.h>
7#include <asm/errno.h>
8#include <asm/uaccess.h>
9
10static inline int
11futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
12{
13 int op = (encoded_op >> 28) & 7;
14 int cmp = (encoded_op >> 24) & 15;
15 int oparg = (encoded_op << 8) >> 20;
16 int cmparg = (encoded_op << 20) >> 20;
17 int oldval = 0, ret;
18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
19 oparg = 1 << oparg;
20
21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
22 return -EFAULT;
23
24 inc_preempt_count();
25
26 switch (op) {
27 case FUTEX_OP_SET:
28 case FUTEX_OP_ADD:
29 case FUTEX_OP_OR:
30 case FUTEX_OP_ANDN:
31 case FUTEX_OP_XOR:
32 default:
33 ret = -ENOSYS;
34 }
35
36 dec_preempt_count();
37
38 if (!ret) {
39 switch (cmp) {
40 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
41 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
42 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
43 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
44 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
45 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
46 default: ret = -ENOSYS;
47 }
48 }
49 return ret;
50}
51
52#endif
53#endif 6#endif
diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h
index b5417529f6f1..309f1466b6fa 100644
--- a/include/asm-sparc64/system.h
+++ b/include/asm-sparc64/system.h
@@ -193,11 +193,7 @@ do { \
193 * not preserve it's value. Hairy, but it lets us remove 2 loads 193 * not preserve it's value. Hairy, but it lets us remove 2 loads
194 * and 2 stores in this critical code path. -DaveM 194 * and 2 stores in this critical code path. -DaveM
195 */ 195 */
196#if __GNUC__ >= 3
197#define EXTRA_CLOBBER ,"%l1" 196#define EXTRA_CLOBBER ,"%l1"
198#else
199#define EXTRA_CLOBBER
200#endif
201#define switch_to(prev, next, last) \ 197#define switch_to(prev, next, last) \
202do { if (test_thread_flag(TIF_PERFCTR)) { \ 198do { if (test_thread_flag(TIF_PERFCTR)) { \
203 unsigned long __tmp; \ 199 unsigned long __tmp; \
diff --git a/include/asm-um/cache.h b/include/asm-um/cache.h
index a10602a5b2d6..3d0587075521 100644
--- a/include/asm-um/cache.h
+++ b/include/asm-um/cache.h
@@ -13,9 +13,6 @@
13# define L1_CACHE_SHIFT 5 13# define L1_CACHE_SHIFT 5
14#endif 14#endif
15 15
16/* XXX: this is valid for x86 and x86_64. */
17#define L1_CACHE_SHIFT_MAX 7 /* largest L1 which this arch supports */
18
19#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) 16#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
20 17
21#endif 18#endif
diff --git a/include/asm-um/futex.h b/include/asm-um/futex.h
index 142ee2d8e0fd..6a332a9f099c 100644
--- a/include/asm-um/futex.h
+++ b/include/asm-um/futex.h
@@ -1,12 +1,6 @@
1#ifndef __UM_FUTEX_H 1#ifndef _ASM_FUTEX_H
2#define __UM_FUTEX_H 2#define _ASM_FUTEX_H
3 3
4#include <linux/futex.h> 4#include <asm-generic/futex.h>
5#include <asm/errno.h>
6#include <asm/system.h>
7#include <asm/processor.h>
8#include <asm/uaccess.h>
9
10#include "asm/arch/futex.h"
11 5
12#endif 6#endif
diff --git a/include/asm-um/rwsem.h b/include/asm-um/rwsem.h
index 661c0e54702b..b5fc449dc86b 100644
--- a/include/asm-um/rwsem.h
+++ b/include/asm-um/rwsem.h
@@ -1,10 +1,6 @@
1#ifndef __UM_RWSEM_H__ 1#ifndef __UM_RWSEM_H__
2#define __UM_RWSEM_H__ 2#define __UM_RWSEM_H__
3 3
4#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 96)
5#define __builtin_expect(exp,c) (exp)
6#endif
7
8#include "asm/arch/rwsem.h" 4#include "asm/arch/rwsem.h"
9 5
10#endif 6#endif
diff --git a/include/asm-v850/cache.h b/include/asm-v850/cache.h
index cbf9096e8517..8832c7ea3242 100644
--- a/include/asm-v850/cache.h
+++ b/include/asm-v850/cache.h
@@ -23,6 +23,4 @@
23#define L1_CACHE_SHIFT 4 23#define L1_CACHE_SHIFT 4
24#endif 24#endif
25 25
26#define L1_CACHE_SHIFT_MAX L1_CACHE_SHIFT
27
28#endif /* __V850_CACHE_H__ */ 26#endif /* __V850_CACHE_H__ */
diff --git a/include/asm-v850/futex.h b/include/asm-v850/futex.h
index 9feff4ce1424..6a332a9f099c 100644
--- a/include/asm-v850/futex.h
+++ b/include/asm-v850/futex.h
@@ -1,53 +1,6 @@
1#ifndef _ASM_FUTEX_H 1#ifndef _ASM_FUTEX_H
2#define _ASM_FUTEX_H 2#define _ASM_FUTEX_H
3 3
4#ifdef __KERNEL__ 4#include <asm-generic/futex.h>
5 5
6#include <linux/futex.h>
7#include <asm/errno.h>
8#include <asm/uaccess.h>
9
10static inline int
11futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
12{
13 int op = (encoded_op >> 28) & 7;
14 int cmp = (encoded_op >> 24) & 15;
15 int oparg = (encoded_op << 8) >> 20;
16 int cmparg = (encoded_op << 20) >> 20;
17 int oldval = 0, ret;
18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
19 oparg = 1 << oparg;
20
21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
22 return -EFAULT;
23
24 inc_preempt_count();
25
26 switch (op) {
27 case FUTEX_OP_SET:
28 case FUTEX_OP_ADD:
29 case FUTEX_OP_OR:
30 case FUTEX_OP_ANDN:
31 case FUTEX_OP_XOR:
32 default:
33 ret = -ENOSYS;
34 }
35
36 dec_preempt_count();
37
38 if (!ret) {
39 switch (cmp) {
40 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
41 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
42 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
43 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
44 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
45 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
46 default: ret = -ENOSYS;
47 }
48 }
49 return ret;
50}
51
52#endif
53#endif 6#endif
diff --git a/include/asm-v850/unistd.h b/include/asm-v850/unistd.h
index 5a86f8e976ec..82460a7bb233 100644
--- a/include/asm-v850/unistd.h
+++ b/include/asm-v850/unistd.h
@@ -241,9 +241,6 @@
241/* User programs sometimes end up including this header file 241/* User programs sometimes end up including this header file
242 (indirectly, via uClibc header files), so I'm a bit nervous just 242 (indirectly, via uClibc header files), so I'm a bit nervous just
243 including <linux/compiler.h>. */ 243 including <linux/compiler.h>. */
244#if !defined(__builtin_expect) && __GNUC__ == 2 && __GNUC_MINOR__ < 96
245#define __builtin_expect(x, expected_value) (x)
246#endif
247 244
248#define __syscall_return(type, res) \ 245#define __syscall_return(type, res) \
249 do { \ 246 do { \
@@ -346,20 +343,6 @@ type name (atype a, btype b, ctype c, dtype d, etype e) \
346 __syscall_return (type, __ret); \ 343 __syscall_return (type, __ret); \
347} 344}
348 345
349#if __GNUC__ < 3
350/* In older versions of gcc, `asm' statements with more than 10
351 input/output arguments produce a fatal error. To work around this
352 problem, we use two versions, one for gcc-3.x and one for earlier
353 versions of gcc (the `earlier gcc' version doesn't work with gcc-3.x
354 because gcc-3.x doesn't allow clobbers to also be input arguments). */
355#define __SYSCALL6_TRAP(syscall, ret, a, b, c, d, e, f) \
356 __asm__ __volatile__ ("trap " SYSCALL_LONG_TRAP \
357 : "=r" (ret), "=r" (syscall) \
358 : "1" (syscall), \
359 "r" (a), "r" (b), "r" (c), "r" (d), \
360 "r" (e), "r" (f) \
361 : SYSCALL_CLOBBERS, SYSCALL_ARG4, SYSCALL_ARG5);
362#else /* __GNUC__ >= 3 */
363#define __SYSCALL6_TRAP(syscall, ret, a, b, c, d, e, f) \ 346#define __SYSCALL6_TRAP(syscall, ret, a, b, c, d, e, f) \
364 __asm__ __volatile__ ("trap " SYSCALL_LONG_TRAP \ 347 __asm__ __volatile__ ("trap " SYSCALL_LONG_TRAP \
365 : "=r" (ret), "=r" (syscall), \ 348 : "=r" (ret), "=r" (syscall), \
@@ -368,7 +351,6 @@ type name (atype a, btype b, ctype c, dtype d, etype e) \
368 "r" (a), "r" (b), "r" (c), "r" (d), \ 351 "r" (a), "r" (b), "r" (c), "r" (d), \
369 "2" (e), "3" (f) \ 352 "2" (e), "3" (f) \
370 : SYSCALL_CLOBBERS); 353 : SYSCALL_CLOBBERS);
371#endif
372 354
373#define _syscall6(type, name, atype, a, btype, b, ctype, c, dtype, d, etype, e, ftype, f) \ 355#define _syscall6(type, name, atype, a, btype, b, ctype, c, dtype, d, etype, e, ftype, f) \
374type name (atype a, btype b, ctype c, dtype d, etype e, ftype f) \ 356type name (atype a, btype b, ctype c, dtype d, etype e, ftype f) \
diff --git a/include/asm-x86_64/cache.h b/include/asm-x86_64/cache.h
index 33e53424128b..b4a2401de77b 100644
--- a/include/asm-x86_64/cache.h
+++ b/include/asm-x86_64/cache.h
@@ -9,6 +9,5 @@
9/* L1 cache line size */ 9/* L1 cache line size */
10#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) 10#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT)
11#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) 11#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
12#define L1_CACHE_SHIFT_MAX 7 /* largest L1 which this arch supports */
13 12
14#endif 13#endif
diff --git a/include/asm-x86_64/ia32_unistd.h b/include/asm-x86_64/ia32_unistd.h
index d5166ec3868d..e8843362a6cc 100644
--- a/include/asm-x86_64/ia32_unistd.h
+++ b/include/asm-x86_64/ia32_unistd.h
@@ -299,7 +299,8 @@
299#define __NR_ia32_inotify_init 291 299#define __NR_ia32_inotify_init 291
300#define __NR_ia32_inotify_add_watch 292 300#define __NR_ia32_inotify_add_watch 292
301#define __NR_ia32_inotify_rm_watch 293 301#define __NR_ia32_inotify_rm_watch 293
302#define __NR_ia32_migrate_pages 294
302 303
303#define IA32_NR_syscalls 294 /* must be > than biggest syscall! */ 304#define IA32_NR_syscalls 295 /* must be > than biggest syscall! */
304 305
305#endif /* _ASM_X86_64_IA32_UNISTD_H_ */ 306#endif /* _ASM_X86_64_IA32_UNISTD_H_ */
diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h
index 2c42150bce0c..e6f896161c11 100644
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -571,8 +571,10 @@ __SYSCALL(__NR_inotify_init, sys_inotify_init)
571__SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch) 571__SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch)
572#define __NR_inotify_rm_watch 255 572#define __NR_inotify_rm_watch 255
573__SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch) 573__SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch)
574#define __NR_migrate_pages 256
575__SYSCALL(__NR_migrate_pages, sys_migrate_pages)
574 576
575#define __NR_syscall_max __NR_inotify_rm_watch 577#define __NR_syscall_max __NR_migrate_pages
576#ifndef __NO_STUBS 578#ifndef __NO_STUBS
577 579
578/* user-visible error numbers are in the range -1 - -4095 */ 580/* user-visible error numbers are in the range -1 - -4095 */
diff --git a/include/linux/aio.h b/include/linux/aio.h
index 49fd37629ee4..00c8efa95cc3 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -94,26 +94,27 @@ struct kiocb {
94 ssize_t (*ki_retry)(struct kiocb *); 94 ssize_t (*ki_retry)(struct kiocb *);
95 void (*ki_dtor)(struct kiocb *); 95 void (*ki_dtor)(struct kiocb *);
96 96
97 struct list_head ki_list; /* the aio core uses this
98 * for cancellation */
99
100 union { 97 union {
101 void __user *user; 98 void __user *user;
102 struct task_struct *tsk; 99 struct task_struct *tsk;
103 } ki_obj; 100 } ki_obj;
101
104 __u64 ki_user_data; /* user's data for completion */ 102 __u64 ki_user_data; /* user's data for completion */
103 wait_queue_t ki_wait;
105 loff_t ki_pos; 104 loff_t ki_pos;
105
106 void *private;
106 /* State that we remember to be able to restart/retry */ 107 /* State that we remember to be able to restart/retry */
107 unsigned short ki_opcode; 108 unsigned short ki_opcode;
108 size_t ki_nbytes; /* copy of iocb->aio_nbytes */ 109 size_t ki_nbytes; /* copy of iocb->aio_nbytes */
109 char __user *ki_buf; /* remaining iocb->aio_buf */ 110 char __user *ki_buf; /* remaining iocb->aio_buf */
110 size_t ki_left; /* remaining bytes */ 111 size_t ki_left; /* remaining bytes */
111 wait_queue_t ki_wait;
112 long ki_retried; /* just for testing */ 112 long ki_retried; /* just for testing */
113 long ki_kicked; /* just for testing */ 113 long ki_kicked; /* just for testing */
114 long ki_queued; /* just for testing */ 114 long ki_queued; /* just for testing */
115 115
116 void *private; 116 struct list_head ki_list; /* the aio core uses this
117 * for cancellation */
117}; 118};
118 119
119#define is_sync_kiocb(iocb) ((iocb)->ki_key == KIOCB_SYNC_KEY) 120#define is_sync_kiocb(iocb) ((iocb)->ki_key == KIOCB_SYNC_KEY)
@@ -126,6 +127,7 @@ struct kiocb {
126 (x)->ki_filp = (filp); \ 127 (x)->ki_filp = (filp); \
127 (x)->ki_ctx = NULL; \ 128 (x)->ki_ctx = NULL; \
128 (x)->ki_cancel = NULL; \ 129 (x)->ki_cancel = NULL; \
130 (x)->ki_retry = NULL; \
129 (x)->ki_dtor = NULL; \ 131 (x)->ki_dtor = NULL; \
130 (x)->ki_obj.tsk = tsk; \ 132 (x)->ki_obj.tsk = tsk; \
131 (x)->ki_user_data = 0; \ 133 (x)->ki_user_data = 0; \
diff --git a/include/linux/atalk.h b/include/linux/atalk.h
index 911c09cb9bf9..6ba3aa8a81f4 100644
--- a/include/linux/atalk.h
+++ b/include/linux/atalk.h
@@ -155,15 +155,15 @@ struct elapaarp {
155#define AARP_REQUEST 1 155#define AARP_REQUEST 1
156#define AARP_REPLY 2 156#define AARP_REPLY 2
157#define AARP_PROBE 3 157#define AARP_PROBE 3
158 __u8 hw_src[ETH_ALEN] __attribute__ ((packed)); 158 __u8 hw_src[ETH_ALEN];
159 __u8 pa_src_zero __attribute__ ((packed)); 159 __u8 pa_src_zero;
160 __be16 pa_src_net __attribute__ ((packed)); 160 __be16 pa_src_net;
161 __u8 pa_src_node __attribute__ ((packed)); 161 __u8 pa_src_node;
162 __u8 hw_dst[ETH_ALEN] __attribute__ ((packed)); 162 __u8 hw_dst[ETH_ALEN];
163 __u8 pa_dst_zero __attribute__ ((packed)); 163 __u8 pa_dst_zero;
164 __be16 pa_dst_net __attribute__ ((packed)); 164 __be16 pa_dst_net;
165 __u8 pa_dst_node __attribute__ ((packed)); 165 __u8 pa_dst_node;
166}; 166} __attribute__ ((packed));
167 167
168static __inline__ struct elapaarp *aarp_hdr(struct sk_buff *skb) 168static __inline__ struct elapaarp *aarp_hdr(struct sk_buff *skb)
169{ 169{
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 1db061bb6b08..9f159baf153f 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -197,7 +197,8 @@ int block_read_full_page(struct page*, get_block_t*);
197int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*); 197int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*);
198int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*, 198int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*,
199 loff_t *); 199 loff_t *);
200int generic_cont_expand(struct inode *inode, loff_t size) ; 200int generic_cont_expand(struct inode *inode, loff_t size);
201int generic_cont_expand_simple(struct inode *inode, loff_t size);
201int block_commit_write(struct page *page, unsigned from, unsigned to); 202int block_commit_write(struct page *page, unsigned from, unsigned to);
202int block_sync_page(struct page *); 203int block_sync_page(struct page *);
203sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); 204sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h
index 04bd756efc67..e86e4a938373 100644
--- a/include/linux/byteorder/generic.h
+++ b/include/linux/byteorder/generic.h
@@ -156,7 +156,7 @@ extern __be32 htonl(__u32);
156extern __u16 ntohs(__be16); 156extern __u16 ntohs(__be16);
157extern __be16 htons(__u16); 157extern __be16 htons(__u16);
158 158
159#if defined(__GNUC__) && (__GNUC__ >= 2) && defined(__OPTIMIZE__) 159#if defined(__GNUC__) && defined(__OPTIMIZE__)
160 160
161#define ___htonl(x) __cpu_to_be32(x) 161#define ___htonl(x) __cpu_to_be32(x)
162#define ___htons(x) __cpu_to_be16(x) 162#define ___htons(x) __cpu_to_be16(x)
diff --git a/include/linux/byteorder/swab.h b/include/linux/byteorder/swab.h
index 2f1cb775125a..25f7f32883ec 100644
--- a/include/linux/byteorder/swab.h
+++ b/include/linux/byteorder/swab.h
@@ -110,7 +110,7 @@
110/* 110/*
111 * Allow constant folding 111 * Allow constant folding
112 */ 112 */
113#if defined(__GNUC__) && (__GNUC__ >= 2) && defined(__OPTIMIZE__) 113#if defined(__GNUC__) && defined(__OPTIMIZE__)
114# define __swab16(x) \ 114# define __swab16(x) \
115(__builtin_constant_p((__u16)(x)) ? \ 115(__builtin_constant_p((__u16)(x)) ? \
116 ___swab16((x)) : \ 116 ___swab16((x)) : \
diff --git a/include/linux/byteorder/swabb.h b/include/linux/byteorder/swabb.h
index d5f2a3205109..ae5e5f914bf4 100644
--- a/include/linux/byteorder/swabb.h
+++ b/include/linux/byteorder/swabb.h
@@ -77,7 +77,7 @@
77/* 77/*
78 * Allow constant folding 78 * Allow constant folding
79 */ 79 */
80#if defined(__GNUC__) && (__GNUC__ >= 2) && defined(__OPTIMIZE__) 80#if defined(__GNUC__) && defined(__OPTIMIZE__)
81# define __swahw32(x) \ 81# define __swahw32(x) \
82(__builtin_constant_p((__u32)(x)) ? \ 82(__builtin_constant_p((__u32)(x)) ? \
83 ___swahw32((x)) : \ 83 ___swahw32((x)) : \
diff --git a/include/linux/cache.h b/include/linux/cache.h
index 0b7ecf3af78a..ffe52210fc4f 100644
--- a/include/linux/cache.h
+++ b/include/linux/cache.h
@@ -45,12 +45,21 @@
45#endif /* CONFIG_SMP */ 45#endif /* CONFIG_SMP */
46#endif 46#endif
47 47
48#if !defined(____cacheline_maxaligned_in_smp) 48/*
49 * The maximum alignment needed for some critical structures
50 * These could be inter-node cacheline sizes/L3 cacheline
51 * size etc. Define this in asm/cache.h for your arch
52 */
53#ifndef INTERNODE_CACHE_SHIFT
54#define INTERNODE_CACHE_SHIFT L1_CACHE_SHIFT
55#endif
56
57#if !defined(____cacheline_internodealigned_in_smp)
49#if defined(CONFIG_SMP) 58#if defined(CONFIG_SMP)
50#define ____cacheline_maxaligned_in_smp \ 59#define ____cacheline_internodealigned_in_smp \
51 __attribute__((__aligned__(1 << (L1_CACHE_SHIFT_MAX)))) 60 __attribute__((__aligned__(1 << (INTERNODE_CACHE_SHIFT))))
52#else 61#else
53#define ____cacheline_maxaligned_in_smp 62#define ____cacheline_internodealigned_in_smp
54#endif 63#endif
55#endif 64#endif
56 65
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 152734055403..2e05e1e6b0e6 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -15,3 +15,12 @@
15 ({ unsigned long __ptr; \ 15 ({ unsigned long __ptr; \
16 __asm__ ("" : "=g"(__ptr) : "0"(ptr)); \ 16 __asm__ ("" : "=g"(__ptr) : "0"(ptr)); \
17 (typeof(ptr)) (__ptr + (off)); }) 17 (typeof(ptr)) (__ptr + (off)); })
18
19
20#define inline inline __attribute__((always_inline))
21#define __inline__ __inline__ __attribute__((always_inline))
22#define __inline __inline __attribute__((always_inline))
23#define __deprecated __attribute__((deprecated))
24#define noinline __attribute__((noinline))
25#define __attribute_pure__ __attribute__((pure))
26#define __attribute_const__ __attribute__((__const__))
diff --git a/include/linux/compiler-gcc2.h b/include/linux/compiler-gcc2.h
deleted file mode 100644
index ebed17660c5f..000000000000
--- a/include/linux/compiler-gcc2.h
+++ /dev/null
@@ -1,29 +0,0 @@
1/* Never include this file directly. Include <linux/compiler.h> instead. */
2
3/* These definitions are for GCC v2.x. */
4
5/* Somewhere in the middle of the GCC 2.96 development cycle, we implemented
6 a mechanism by which the user can annotate likely branch directions and
7 expect the blocks to be reordered appropriately. Define __builtin_expect
8 to nothing for earlier compilers. */
9#include <linux/compiler-gcc.h>
10
11#if __GNUC_MINOR__ < 96
12# define __builtin_expect(x, expected_value) (x)
13#endif
14
15#define __attribute_used__ __attribute__((__unused__))
16
17/*
18 * The attribute `pure' is not implemented in GCC versions earlier
19 * than 2.96.
20 */
21#if __GNUC_MINOR__ >= 96
22# define __attribute_pure__ __attribute__((pure))
23# define __attribute_const__ __attribute__((__const__))
24#endif
25
26/* GCC 2.95.x/2.96 recognize __va_copy, but not va_copy. Actually later GCC's
27 * define both va_copy and __va_copy, but the latter may go away, so limit this
28 * to this header */
29#define va_copy __va_copy
diff --git a/include/linux/compiler-gcc3.h b/include/linux/compiler-gcc3.h
index a6fa615afab5..4209082ee934 100644
--- a/include/linux/compiler-gcc3.h
+++ b/include/linux/compiler-gcc3.h
@@ -3,29 +3,12 @@
3/* These definitions are for GCC v3.x. */ 3/* These definitions are for GCC v3.x. */
4#include <linux/compiler-gcc.h> 4#include <linux/compiler-gcc.h>
5 5
6#if __GNUC_MINOR__ >= 1
7# define inline inline __attribute__((always_inline))
8# define __inline__ __inline__ __attribute__((always_inline))
9# define __inline __inline __attribute__((always_inline))
10#endif
11
12#if __GNUC_MINOR__ > 0
13# define __deprecated __attribute__((deprecated))
14#endif
15
16#if __GNUC_MINOR__ >= 3 6#if __GNUC_MINOR__ >= 3
17# define __attribute_used__ __attribute__((__used__)) 7# define __attribute_used__ __attribute__((__used__))
18#else 8#else
19# define __attribute_used__ __attribute__((__unused__)) 9# define __attribute_used__ __attribute__((__unused__))
20#endif 10#endif
21 11
22#define __attribute_pure__ __attribute__((pure))
23#define __attribute_const__ __attribute__((__const__))
24
25#if __GNUC_MINOR__ >= 1
26#define noinline __attribute__((noinline))
27#endif
28
29#if __GNUC_MINOR__ >= 4 12#if __GNUC_MINOR__ >= 4
30#define __must_check __attribute__((warn_unused_result)) 13#define __must_check __attribute__((warn_unused_result))
31#endif 14#endif
diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 53686c037a06..e913e9beaf69 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -3,14 +3,7 @@
3/* These definitions are for GCC v4.x. */ 3/* These definitions are for GCC v4.x. */
4#include <linux/compiler-gcc.h> 4#include <linux/compiler-gcc.h>
5 5
6#define inline inline __attribute__((always_inline))
7#define __inline__ __inline__ __attribute__((always_inline))
8#define __inline __inline __attribute__((always_inline))
9#define __deprecated __attribute__((deprecated))
10#define __attribute_used__ __attribute__((__used__)) 6#define __attribute_used__ __attribute__((__used__))
11#define __attribute_pure__ __attribute__((pure))
12#define __attribute_const__ __attribute__((__const__))
13#define noinline __attribute__((noinline))
14#define __must_check __attribute__((warn_unused_result)) 7#define __must_check __attribute__((warn_unused_result))
15#define __compiler_offsetof(a,b) __builtin_offsetof(a,b) 8#define __compiler_offsetof(a,b) __builtin_offsetof(a,b)
16 9
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index d7378215b851..f23d3c6fc2c0 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -42,8 +42,6 @@ extern void __chk_io_ptr(void __iomem *);
42# include <linux/compiler-gcc4.h> 42# include <linux/compiler-gcc4.h>
43#elif __GNUC__ == 3 43#elif __GNUC__ == 3
44# include <linux/compiler-gcc3.h> 44# include <linux/compiler-gcc3.h>
45#elif __GNUC__ == 2
46# include <linux/compiler-gcc2.h>
47#else 45#else
48# error Sorry, your compiler is too old/not recognized. 46# error Sorry, your compiler is too old/not recognized.
49#endif 47#endif
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 6e2deef96b34..c472f972bd6d 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -14,22 +14,43 @@
14 14
15#ifdef CONFIG_CPUSETS 15#ifdef CONFIG_CPUSETS
16 16
17extern int number_of_cpusets; /* How many cpusets are defined in system? */
18
19extern int cpuset_init_early(void);
17extern int cpuset_init(void); 20extern int cpuset_init(void);
18extern void cpuset_init_smp(void); 21extern void cpuset_init_smp(void);
19extern void cpuset_fork(struct task_struct *p); 22extern void cpuset_fork(struct task_struct *p);
20extern void cpuset_exit(struct task_struct *p); 23extern void cpuset_exit(struct task_struct *p);
21extern cpumask_t cpuset_cpus_allowed(const struct task_struct *p); 24extern cpumask_t cpuset_cpus_allowed(struct task_struct *p);
25extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
22void cpuset_init_current_mems_allowed(void); 26void cpuset_init_current_mems_allowed(void);
23void cpuset_update_current_mems_allowed(void); 27void cpuset_update_task_memory_state(void);
24void cpuset_restrict_to_mems_allowed(unsigned long *nodes); 28#define cpuset_nodes_subset_current_mems_allowed(nodes) \
29 nodes_subset((nodes), current->mems_allowed)
25int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); 30int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl);
26extern int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask); 31
32extern int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask);
33static int inline cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
34{
35 return number_of_cpusets <= 1 || __cpuset_zone_allowed(z, gfp_mask);
36}
37
27extern int cpuset_excl_nodes_overlap(const struct task_struct *p); 38extern int cpuset_excl_nodes_overlap(const struct task_struct *p);
39
40#define cpuset_memory_pressure_bump() \
41 do { \
42 if (cpuset_memory_pressure_enabled) \
43 __cpuset_memory_pressure_bump(); \
44 } while (0)
45extern int cpuset_memory_pressure_enabled;
46extern void __cpuset_memory_pressure_bump(void);
47
28extern struct file_operations proc_cpuset_operations; 48extern struct file_operations proc_cpuset_operations;
29extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer); 49extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer);
30 50
31#else /* !CONFIG_CPUSETS */ 51#else /* !CONFIG_CPUSETS */
32 52
53static inline int cpuset_init_early(void) { return 0; }
33static inline int cpuset_init(void) { return 0; } 54static inline int cpuset_init(void) { return 0; }
34static inline void cpuset_init_smp(void) {} 55static inline void cpuset_init_smp(void) {}
35static inline void cpuset_fork(struct task_struct *p) {} 56static inline void cpuset_fork(struct task_struct *p) {}
@@ -40,9 +61,14 @@ static inline cpumask_t cpuset_cpus_allowed(struct task_struct *p)
40 return cpu_possible_map; 61 return cpu_possible_map;
41} 62}
42 63
64static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
65{
66 return node_possible_map;
67}
68
43static inline void cpuset_init_current_mems_allowed(void) {} 69static inline void cpuset_init_current_mems_allowed(void) {}
44static inline void cpuset_update_current_mems_allowed(void) {} 70static inline void cpuset_update_task_memory_state(void) {}
45static inline void cpuset_restrict_to_mems_allowed(unsigned long *nodes) {} 71#define cpuset_nodes_subset_current_mems_allowed(nodes) (1)
46 72
47static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) 73static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
48{ 74{
@@ -59,6 +85,8 @@ static inline int cpuset_excl_nodes_overlap(const struct task_struct *p)
59 return 1; 85 return 1;
60} 86}
61 87
88static inline void cpuset_memory_pressure_bump(void) {}
89
62static inline char *cpuset_task_status_allowed(struct task_struct *task, 90static inline char *cpuset_task_status_allowed(struct task_struct *task,
63 char *buffer) 91 char *buffer)
64{ 92{
diff --git a/include/linux/cycx_x25.h b/include/linux/cycx_x25.h
index b10a7f3a8cac..f7a906583463 100644
--- a/include/linux/cycx_x25.h
+++ b/include/linux/cycx_x25.h
@@ -38,11 +38,11 @@ extern unsigned int cycx_debug;
38/* Data Structures */ 38/* Data Structures */
39/* X.25 Command Block. */ 39/* X.25 Command Block. */
40struct cycx_x25_cmd { 40struct cycx_x25_cmd {
41 u16 command PACKED; 41 u16 command;
42 u16 link PACKED; /* values: 0 or 1 */ 42 u16 link; /* values: 0 or 1 */
43 u16 len PACKED; /* values: 0 thru 0x205 (517) */ 43 u16 len; /* values: 0 thru 0x205 (517) */
44 u32 buf PACKED; 44 u32 buf;
45}; 45} PACKED;
46 46
47/* Defines for the 'command' field. */ 47/* Defines for the 'command' field. */
48#define X25_CONNECT_REQUEST 0x4401 48#define X25_CONNECT_REQUEST 0x4401
@@ -92,34 +92,34 @@ struct cycx_x25_cmd {
92 * @flags - see dosx25.doc, in portuguese, for details 92 * @flags - see dosx25.doc, in portuguese, for details
93 */ 93 */
94struct cycx_x25_config { 94struct cycx_x25_config {
95 u8 link PACKED; 95 u8 link;
96 u8 speed PACKED; 96 u8 speed;
97 u8 clock PACKED; 97 u8 clock;
98 u8 n2 PACKED; 98 u8 n2;
99 u8 n2win PACKED; 99 u8 n2win;
100 u8 n3win PACKED; 100 u8 n3win;
101 u8 nvc PACKED; 101 u8 nvc;
102 u8 pktlen PACKED; 102 u8 pktlen;
103 u8 locaddr PACKED; 103 u8 locaddr;
104 u8 remaddr PACKED; 104 u8 remaddr;
105 u16 t1 PACKED; 105 u16 t1;
106 u16 t2 PACKED; 106 u16 t2;
107 u8 t21 PACKED; 107 u8 t21;
108 u8 npvc PACKED; 108 u8 npvc;
109 u8 t23 PACKED; 109 u8 t23;
110 u8 flags PACKED; 110 u8 flags;
111}; 111} PACKED;
112 112
113struct cycx_x25_stats { 113struct cycx_x25_stats {
114 u16 rx_crc_errors PACKED; 114 u16 rx_crc_errors;
115 u16 rx_over_errors PACKED; 115 u16 rx_over_errors;
116 u16 n2_tx_frames PACKED; 116 u16 n2_tx_frames;
117 u16 n2_rx_frames PACKED; 117 u16 n2_rx_frames;
118 u16 tx_timeouts PACKED; 118 u16 tx_timeouts;
119 u16 rx_timeouts PACKED; 119 u16 rx_timeouts;
120 u16 n3_tx_packets PACKED; 120 u16 n3_tx_packets;
121 u16 n3_rx_packets PACKED; 121 u16 n3_rx_packets;
122 u16 tx_aborts PACKED; 122 u16 tx_aborts;
123 u16 rx_aborts PACKED; 123 u16 rx_aborts;
124}; 124} PACKED;
125#endif /* _CYCX_X25_H */ 125#endif /* _CYCX_X25_H */
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 46a2ba617595..a3ed5e059d47 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -95,14 +95,19 @@ struct dentry {
95 struct qstr d_name; 95 struct qstr d_name;
96 96
97 struct list_head d_lru; /* LRU list */ 97 struct list_head d_lru; /* LRU list */
98 struct list_head d_child; /* child of parent list */ 98 /*
99 * d_child and d_rcu can share memory
100 */
101 union {
102 struct list_head d_child; /* child of parent list */
103 struct rcu_head d_rcu;
104 } d_u;
99 struct list_head d_subdirs; /* our children */ 105 struct list_head d_subdirs; /* our children */
100 struct list_head d_alias; /* inode alias list */ 106 struct list_head d_alias; /* inode alias list */
101 unsigned long d_time; /* used by d_revalidate */ 107 unsigned long d_time; /* used by d_revalidate */
102 struct dentry_operations *d_op; 108 struct dentry_operations *d_op;
103 struct super_block *d_sb; /* The root of the dentry tree */ 109 struct super_block *d_sb; /* The root of the dentry tree */
104 void *d_fsdata; /* fs-specific data */ 110 void *d_fsdata; /* fs-specific data */
105 struct rcu_head d_rcu;
106 struct dcookie_struct *d_cookie; /* cookie, if any */ 111 struct dcookie_struct *d_cookie; /* cookie, if any */
107 int d_mounted; 112 int d_mounted;
108 unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */ 113 unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */
diff --git a/include/linux/elf.h b/include/linux/elf.h
index ff955dbf510d..d3bfacb24496 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -151,6 +151,8 @@ typedef __s64 Elf64_Sxword;
151#define STT_FUNC 2 151#define STT_FUNC 2
152#define STT_SECTION 3 152#define STT_SECTION 3
153#define STT_FILE 4 153#define STT_FILE 4
154#define STT_COMMON 5
155#define STT_TLS 6
154 156
155#define ELF_ST_BIND(x) ((x) >> 4) 157#define ELF_ST_BIND(x) ((x) >> 4)
156#define ELF_ST_TYPE(x) (((unsigned int) x) & 0xf) 158#define ELF_ST_TYPE(x) (((unsigned int) x) & 0xf)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2c9c48d65630..4c82219b0fae 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -9,7 +9,6 @@
9#include <linux/config.h> 9#include <linux/config.h>
10#include <linux/limits.h> 10#include <linux/limits.h>
11#include <linux/ioctl.h> 11#include <linux/ioctl.h>
12#include <linux/rcuref.h>
13 12
14/* 13/*
15 * It's silly to have NR_OPEN bigger than NR_FILE, but you can change 14 * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
@@ -104,11 +103,11 @@ extern int dir_notify_enable;
104#define MS_MOVE 8192 103#define MS_MOVE 8192
105#define MS_REC 16384 104#define MS_REC 16384
106#define MS_VERBOSE 32768 105#define MS_VERBOSE 32768
106#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */
107#define MS_UNBINDABLE (1<<17) /* change to unbindable */ 107#define MS_UNBINDABLE (1<<17) /* change to unbindable */
108#define MS_PRIVATE (1<<18) /* change to private */ 108#define MS_PRIVATE (1<<18) /* change to private */
109#define MS_SLAVE (1<<19) /* change to slave */ 109#define MS_SLAVE (1<<19) /* change to slave */
110#define MS_SHARED (1<<20) /* change to shared */ 110#define MS_SHARED (1<<20) /* change to shared */
111#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */
112#define MS_ACTIVE (1<<30) 111#define MS_ACTIVE (1<<30)
113#define MS_NOUSER (1<<31) 112#define MS_NOUSER (1<<31)
114 113
@@ -225,6 +224,7 @@ extern int dir_notify_enable;
225#include <asm/semaphore.h> 224#include <asm/semaphore.h>
226#include <asm/byteorder.h> 225#include <asm/byteorder.h>
227 226
227struct hd_geometry;
228struct iovec; 228struct iovec;
229struct nameidata; 229struct nameidata;
230struct kiocb; 230struct kiocb;
@@ -653,7 +653,7 @@ extern spinlock_t files_lock;
653#define file_list_lock() spin_lock(&files_lock); 653#define file_list_lock() spin_lock(&files_lock);
654#define file_list_unlock() spin_unlock(&files_lock); 654#define file_list_unlock() spin_unlock(&files_lock);
655 655
656#define get_file(x) rcuref_inc(&(x)->f_count) 656#define get_file(x) atomic_inc(&(x)->f_count)
657#define file_count(x) atomic_read(&(x)->f_count) 657#define file_count(x) atomic_read(&(x)->f_count)
658 658
659#define MAX_NON_LFS ((1UL<<31) - 1) 659#define MAX_NON_LFS ((1UL<<31) - 1)
@@ -808,7 +808,6 @@ struct super_block {
808 struct list_head s_list; /* Keep this first */ 808 struct list_head s_list; /* Keep this first */
809 dev_t s_dev; /* search index; _not_ kdev_t */ 809 dev_t s_dev; /* search index; _not_ kdev_t */
810 unsigned long s_blocksize; 810 unsigned long s_blocksize;
811 unsigned long s_old_blocksize;
812 unsigned char s_blocksize_bits; 811 unsigned char s_blocksize_bits;
813 unsigned char s_dirt; 812 unsigned char s_dirt;
814 unsigned long long s_maxbytes; /* Max file size */ 813 unsigned long long s_maxbytes; /* Max file size */
@@ -963,6 +962,7 @@ struct block_device_operations {
963 int (*direct_access) (struct block_device *, sector_t, unsigned long *); 962 int (*direct_access) (struct block_device *, sector_t, unsigned long *);
964 int (*media_changed) (struct gendisk *); 963 int (*media_changed) (struct gendisk *);
965 int (*revalidate_disk) (struct gendisk *); 964 int (*revalidate_disk) (struct gendisk *);
965 int (*getgeo)(struct block_device *, struct hd_geometry *);
966 struct module *owner; 966 struct module *owner;
967}; 967};
968 968
@@ -1345,7 +1345,8 @@ static inline int break_lease(struct inode *inode, unsigned int mode)
1345 1345
1346/* fs/open.c */ 1346/* fs/open.c */
1347 1347
1348extern int do_truncate(struct dentry *, loff_t start, struct file *filp); 1348extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
1349 struct file *filp);
1349extern long do_sys_open(const char __user *filename, int flags, int mode); 1350extern long do_sys_open(const char __user *filename, int flags, int mode);
1350extern struct file *filp_open(const char *, int, int); 1351extern struct file *filp_open(const char *, int, int);
1351extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); 1352extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 7b6a6a58e465..4dd6694963c0 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -801,7 +801,7 @@ typedef struct hwif_s {
801 unsigned dma; 801 unsigned dma;
802 802
803 void (*led_act)(void *data, int rw); 803 void (*led_act)(void *data, int rw);
804} ____cacheline_maxaligned_in_smp ide_hwif_t; 804} ____cacheline_internodealigned_in_smp ide_hwif_t;
805 805
806/* 806/*
807 * internal ide interrupt handler type 807 * internal ide interrupt handler type
diff --git a/include/linux/if_frad.h b/include/linux/if_frad.h
index 511999c7eeda..395f0aad9cbf 100644
--- a/include/linux/if_frad.h
+++ b/include/linux/if_frad.h
@@ -131,17 +131,17 @@ struct frad_conf
131/* these are the fields of an RFC 1490 header */ 131/* these are the fields of an RFC 1490 header */
132struct frhdr 132struct frhdr
133{ 133{
134 unsigned char control __attribute__((packed)); 134 unsigned char control;
135 135
136 /* for IP packets, this can be the NLPID */ 136 /* for IP packets, this can be the NLPID */
137 unsigned char pad __attribute__((packed)); 137 unsigned char pad;
138 138
139 unsigned char NLPID __attribute__((packed)); 139 unsigned char NLPID;
140 unsigned char OUI[3] __attribute__((packed)); 140 unsigned char OUI[3];
141 unsigned short PID __attribute__((packed)); 141 unsigned short PID;
142 142
143#define IP_NLPID pad 143#define IP_NLPID pad
144}; 144} __attribute__((packed));
145 145
146/* see RFC 1490 for the definition of the following */ 146/* see RFC 1490 for the definition of the following */
147#define FRAD_I_UI 0x03 147#define FRAD_I_UI 0x03
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 41f150a3d2dd..e50a95fbeb11 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -79,7 +79,7 @@ static inline void __deprecated save_flags(unsigned long *x)
79{ 79{
80 local_save_flags(*x); 80 local_save_flags(*x);
81} 81}
82#define save_flags(x) save_flags(&x); 82#define save_flags(x) save_flags(&x)
83static inline void __deprecated restore_flags(unsigned long x) 83static inline void __deprecated restore_flags(unsigned long x)
84{ 84{
85 local_irq_restore(x); 85 local_irq_restore(x);
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 93bbed5c6cf4..9c8f4c9ed429 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -191,6 +191,10 @@ struct inet6_skb_parm {
191 __u16 srcrt; 191 __u16 srcrt;
192 __u16 dst1; 192 __u16 dst1;
193 __u16 lastopt; 193 __u16 lastopt;
194 __u32 nhoff;
195 __u16 flags;
196
197#define IP6SKB_XFRM_TRANSFORMED 1
194}; 198};
195 199
196#define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) 200#define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb))
diff --git a/include/linux/isdnif.h b/include/linux/isdnif.h
index 7a4eacd77cb2..04e10f9f14f8 100644
--- a/include/linux/isdnif.h
+++ b/include/linux/isdnif.h
@@ -282,43 +282,43 @@ typedef struct setup_parm {
282 282
283typedef struct T30_s { 283typedef struct T30_s {
284 /* session parameters */ 284 /* session parameters */
285 __u8 resolution __attribute__ ((packed)); 285 __u8 resolution;
286 __u8 rate __attribute__ ((packed)); 286 __u8 rate;
287 __u8 width __attribute__ ((packed)); 287 __u8 width;
288 __u8 length __attribute__ ((packed)); 288 __u8 length;
289 __u8 compression __attribute__ ((packed)); 289 __u8 compression;
290 __u8 ecm __attribute__ ((packed)); 290 __u8 ecm;
291 __u8 binary __attribute__ ((packed)); 291 __u8 binary;
292 __u8 scantime __attribute__ ((packed)); 292 __u8 scantime;
293 __u8 id[FAXIDLEN] __attribute__ ((packed)); 293 __u8 id[FAXIDLEN];
294 /* additional parameters */ 294 /* additional parameters */
295 __u8 phase __attribute__ ((packed)); 295 __u8 phase;
296 __u8 direction __attribute__ ((packed)); 296 __u8 direction;
297 __u8 code __attribute__ ((packed)); 297 __u8 code;
298 __u8 badlin __attribute__ ((packed)); 298 __u8 badlin;
299 __u8 badmul __attribute__ ((packed)); 299 __u8 badmul;
300 __u8 bor __attribute__ ((packed)); 300 __u8 bor;
301 __u8 fet __attribute__ ((packed)); 301 __u8 fet;
302 __u8 pollid[FAXIDLEN] __attribute__ ((packed)); 302 __u8 pollid[FAXIDLEN];
303 __u8 cq __attribute__ ((packed)); 303 __u8 cq;
304 __u8 cr __attribute__ ((packed)); 304 __u8 cr;
305 __u8 ctcrty __attribute__ ((packed)); 305 __u8 ctcrty;
306 __u8 minsp __attribute__ ((packed)); 306 __u8 minsp;
307 __u8 phcto __attribute__ ((packed)); 307 __u8 phcto;
308 __u8 rel __attribute__ ((packed)); 308 __u8 rel;
309 __u8 nbc __attribute__ ((packed)); 309 __u8 nbc;
310 /* remote station parameters */ 310 /* remote station parameters */
311 __u8 r_resolution __attribute__ ((packed)); 311 __u8 r_resolution;
312 __u8 r_rate __attribute__ ((packed)); 312 __u8 r_rate;
313 __u8 r_width __attribute__ ((packed)); 313 __u8 r_width;
314 __u8 r_length __attribute__ ((packed)); 314 __u8 r_length;
315 __u8 r_compression __attribute__ ((packed)); 315 __u8 r_compression;
316 __u8 r_ecm __attribute__ ((packed)); 316 __u8 r_ecm;
317 __u8 r_binary __attribute__ ((packed)); 317 __u8 r_binary;
318 __u8 r_scantime __attribute__ ((packed)); 318 __u8 r_scantime;
319 __u8 r_id[FAXIDLEN] __attribute__ ((packed)); 319 __u8 r_id[FAXIDLEN];
320 __u8 r_code __attribute__ ((packed)); 320 __u8 r_code;
321} T30_s; 321} __attribute__((packed)) T30_s;
322 322
323#define ISDN_TTY_FAX_CONN_IN 0 323#define ISDN_TTY_FAX_CONN_IN 0
324#define ISDN_TTY_FAX_CONN_OUT 1 324#define ISDN_TTY_FAX_CONN_OUT 1
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index b1e407a4fbda..ca7ff8fdd090 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -316,8 +316,6 @@ extern int randomize_va_space;
316#endif 316#endif
317 317
318/* Trap pasters of __FUNCTION__ at compile-time */ 318/* Trap pasters of __FUNCTION__ at compile-time */
319#if __GNUC__ > 2 || __GNUC_MINOR__ >= 95
320#define __FUNCTION__ (__func__) 319#define __FUNCTION__ (__func__)
321#endif
322 320
323#endif 321#endif
diff --git a/include/linux/key.h b/include/linux/key.h
index 4d189e51bc6c..cbf464ad9589 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -177,6 +177,8 @@ struct key {
177/* 177/*
178 * kernel managed key type definition 178 * kernel managed key type definition
179 */ 179 */
180typedef int (*request_key_actor_t)(struct key *key, struct key *authkey, const char *op);
181
180struct key_type { 182struct key_type {
181 /* name of the type */ 183 /* name of the type */
182 const char *name; 184 const char *name;
@@ -218,6 +220,16 @@ struct key_type {
218 */ 220 */
219 long (*read)(const struct key *key, char __user *buffer, size_t buflen); 221 long (*read)(const struct key *key, char __user *buffer, size_t buflen);
220 222
223 /* handle request_key() for this type instead of invoking
224 * /sbin/request-key (optional)
225 * - key is the key to instantiate
226 * - authkey is the authority to assume when instantiating this key
227 * - op is the operation to be done, usually "create"
228 * - the call must not return until the instantiation process has run
229 * its course
230 */
231 request_key_actor_t request_key;
232
221 /* internal fields */ 233 /* internal fields */
222 struct list_head link; /* link in types list */ 234 struct list_head link; /* link in types list */
223}; 235};
diff --git a/include/linux/keyctl.h b/include/linux/keyctl.h
index 8d7c59a29e09..3365945640c9 100644
--- a/include/linux/keyctl.h
+++ b/include/linux/keyctl.h
@@ -19,6 +19,7 @@
19#define KEY_SPEC_USER_KEYRING -4 /* - key ID for UID-specific keyring */ 19#define KEY_SPEC_USER_KEYRING -4 /* - key ID for UID-specific keyring */
20#define KEY_SPEC_USER_SESSION_KEYRING -5 /* - key ID for UID-session keyring */ 20#define KEY_SPEC_USER_SESSION_KEYRING -5 /* - key ID for UID-session keyring */
21#define KEY_SPEC_GROUP_KEYRING -6 /* - key ID for GID-specific keyring */ 21#define KEY_SPEC_GROUP_KEYRING -6 /* - key ID for GID-specific keyring */
22#define KEY_SPEC_REQKEY_AUTH_KEY -7 /* - key ID for assumed request_key auth key */
22 23
23/* request-key default keyrings */ 24/* request-key default keyrings */
24#define KEY_REQKEY_DEFL_NO_CHANGE -1 25#define KEY_REQKEY_DEFL_NO_CHANGE -1
@@ -46,5 +47,7 @@
46#define KEYCTL_INSTANTIATE 12 /* instantiate a partially constructed key */ 47#define KEYCTL_INSTANTIATE 12 /* instantiate a partially constructed key */
47#define KEYCTL_NEGATE 13 /* negate a partially constructed key */ 48#define KEYCTL_NEGATE 13 /* negate a partially constructed key */
48#define KEYCTL_SET_REQKEY_KEYRING 14 /* set default request-key keyring */ 49#define KEYCTL_SET_REQKEY_KEYRING 14 /* set default request-key keyring */
50#define KEYCTL_SET_TIMEOUT 15 /* set key timeout */
51#define KEYCTL_ASSUME_AUTHORITY 16 /* assume request_key() authorisation */
49 52
50#endif /* _LINUX_KEYCTL_H */ 53#endif /* _LINUX_KEYCTL_H */
diff --git a/include/linux/memory.h b/include/linux/memory.h
index dc4081b6f161..e251dc43d0f5 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -70,21 +70,15 @@ static inline void unregister_memory_notifier(struct notifier_block *nb)
70{ 70{
71} 71}
72#else 72#else
73extern int register_memory(struct memory_block *, struct mem_section *section, struct node *);
74extern int register_new_memory(struct mem_section *); 73extern int register_new_memory(struct mem_section *);
75extern int unregister_memory_section(struct mem_section *); 74extern int unregister_memory_section(struct mem_section *);
76extern int memory_dev_init(void); 75extern int memory_dev_init(void);
77extern int register_memory_notifier(struct notifier_block *nb); 76extern int remove_memory_block(unsigned long, struct mem_section *, int);
78extern void unregister_memory_notifier(struct notifier_block *nb);
79 77
80#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT) 78#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT)
81 79
82extern int invalidate_phys_mapping(unsigned long, unsigned long);
83struct notifier_block; 80struct notifier_block;
84 81
85extern int register_memory_notifier(struct notifier_block *nb);
86extern void unregister_memory_notifier(struct notifier_block *nb);
87
88#endif /* CONFIG_MEMORY_HOTPLUG */ 82#endif /* CONFIG_MEMORY_HOTPLUG */
89 83
90#define hotplug_memory_notifier(fn, pri) { \ 84#define hotplug_memory_notifier(fn, pri) { \
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index ed00b278cb93..c7ac77e873b3 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -22,6 +22,9 @@
22 22
23/* Flags for mbind */ 23/* Flags for mbind */
24#define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */ 24#define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */
25#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform to mapping */
26#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */
27#define MPOL_MF_INTERNAL (1<<3) /* Internal flags start here */
25 28
26#ifdef __KERNEL__ 29#ifdef __KERNEL__
27 30
@@ -65,6 +68,7 @@ struct mempolicy {
65 nodemask_t nodes; /* interleave */ 68 nodemask_t nodes; /* interleave */
66 /* undefined for default */ 69 /* undefined for default */
67 } v; 70 } v;
71 nodemask_t cpuset_mems_allowed; /* mempolicy relative to these nodes */
68}; 72};
69 73
70/* 74/*
@@ -141,12 +145,21 @@ void mpol_free_shared_policy(struct shared_policy *p);
141struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, 145struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
142 unsigned long idx); 146 unsigned long idx);
143 147
144struct mempolicy *get_vma_policy(struct task_struct *task,
145 struct vm_area_struct *vma, unsigned long addr);
146
147extern void numa_default_policy(void); 148extern void numa_default_policy(void);
148extern void numa_policy_init(void); 149extern void numa_policy_init(void);
149extern void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new); 150extern void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *new);
151extern void mpol_rebind_task(struct task_struct *tsk,
152 const nodemask_t *new);
153extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
154#define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x))
155
156#ifdef CONFIG_CPUSET
157#define current_cpuset_is_being_rebound() \
158 (cpuset_being_rebound == current->cpuset)
159#else
160#define current_cpuset_is_being_rebound() 0
161#endif
162
150extern struct mempolicy default_policy; 163extern struct mempolicy default_policy;
151extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, 164extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
152 unsigned long addr); 165 unsigned long addr);
@@ -159,6 +172,11 @@ static inline void check_highest_zone(int k)
159 policy_zone = k; 172 policy_zone = k;
160} 173}
161 174
175int do_migrate_pages(struct mm_struct *mm,
176 const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags);
177
178extern void *cpuset_being_rebound; /* Trigger mpol_copy vma rebind */
179
162#else 180#else
163 181
164struct mempolicy {}; 182struct mempolicy {};
@@ -218,17 +236,35 @@ static inline void numa_default_policy(void)
218{ 236{
219} 237}
220 238
221static inline void numa_policy_rebind(const nodemask_t *old, 239static inline void mpol_rebind_policy(struct mempolicy *pol,
222 const nodemask_t *new) 240 const nodemask_t *new)
223{ 241{
224} 242}
225 243
244static inline void mpol_rebind_task(struct task_struct *tsk,
245 const nodemask_t *new)
246{
247}
248
249static inline void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
250{
251}
252
253#define set_cpuset_being_rebound(x) do {} while (0)
254
226static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, 255static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
227 unsigned long addr) 256 unsigned long addr)
228{ 257{
229 return NODE_DATA(0)->node_zonelists + gfp_zone(GFP_HIGHUSER); 258 return NODE_DATA(0)->node_zonelists + gfp_zone(GFP_HIGHUSER);
230} 259}
231 260
261static inline int do_migrate_pages(struct mm_struct *mm,
262 const nodemask_t *from_nodes,
263 const nodemask_t *to_nodes, int flags)
264{
265 return 0;
266}
267
232static inline void check_highest_zone(int k) 268static inline void check_highest_zone(int k)
233{ 269{
234} 270}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bc01fff3aa01..df80e63903b5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -223,24 +223,27 @@ struct page {
223 * & limit reverse map searches. 223 * & limit reverse map searches.
224 */ 224 */
225 union { 225 union {
226 unsigned long private; /* Mapping-private opaque data: 226 struct {
227 * usually used for buffer_heads 227 unsigned long private; /* Mapping-private opaque data:
228 * if PagePrivate set; used for 228 * usually used for buffer_heads
229 * swp_entry_t if PageSwapCache 229 * if PagePrivate set; used for
230 * When page is free, this indicates 230 * swp_entry_t if PageSwapCache.
231 * order in the buddy system. 231 * When page is free, this
232 */ 232 * indicates order in the buddy
233 * system.
234 */
235 struct address_space *mapping; /* If low bit clear, points to
236 * inode address_space, or NULL.
237 * If page mapped as anonymous
238 * memory, low bit is set, and
239 * it points to anon_vma object:
240 * see PAGE_MAPPING_ANON below.
241 */
242 };
233#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS 243#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
234 spinlock_t ptl; 244 spinlock_t ptl;
235#endif 245#endif
236 } u; 246 };
237 struct address_space *mapping; /* If low bit clear, points to
238 * inode address_space, or NULL.
239 * If page mapped as anonymous
240 * memory, low bit is set, and
241 * it points to anon_vma object:
242 * see PAGE_MAPPING_ANON below.
243 */
244 pgoff_t index; /* Our offset within mapping. */ 247 pgoff_t index; /* Our offset within mapping. */
245 struct list_head lru; /* Pageout list, eg. active_list 248 struct list_head lru; /* Pageout list, eg. active_list
246 * protected by zone->lru_lock ! 249 * protected by zone->lru_lock !
@@ -261,8 +264,8 @@ struct page {
261#endif /* WANT_PAGE_VIRTUAL */ 264#endif /* WANT_PAGE_VIRTUAL */
262}; 265};
263 266
264#define page_private(page) ((page)->u.private) 267#define page_private(page) ((page)->private)
265#define set_page_private(page, v) ((page)->u.private = (v)) 268#define set_page_private(page, v) ((page)->private = (v))
266 269
267/* 270/*
268 * FIXME: take this include out, include page-flags.h in 271 * FIXME: take this include out, include page-flags.h in
@@ -308,7 +311,7 @@ struct page {
308 */ 311 */
309#define get_page_testone(p) atomic_inc_and_test(&(p)->_count) 312#define get_page_testone(p) atomic_inc_and_test(&(p)->_count)
310 313
311#define set_page_count(p,v) atomic_set(&(p)->_count, v - 1) 314#define set_page_count(p,v) atomic_set(&(p)->_count, (v) - 1)
312#define __put_page(p) atomic_dec(&(p)->_count) 315#define __put_page(p) atomic_dec(&(p)->_count)
313 316
314extern void FASTCALL(__page_cache_release(struct page *)); 317extern void FASTCALL(__page_cache_release(struct page *));
@@ -815,7 +818,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
815 * overflow into the next struct page (as it might with DEBUG_SPINLOCK). 818 * overflow into the next struct page (as it might with DEBUG_SPINLOCK).
816 * When freeing, reset page->mapping so free_pages_check won't complain. 819 * When freeing, reset page->mapping so free_pages_check won't complain.
817 */ 820 */
818#define __pte_lockptr(page) &((page)->u.ptl) 821#define __pte_lockptr(page) &((page)->ptl)
819#define pte_lock_init(_page) do { \ 822#define pte_lock_init(_page) do { \
820 spin_lock_init(__pte_lockptr(_page)); \ 823 spin_lock_init(__pte_lockptr(_page)); \
821} while (0) 824} while (0)
@@ -1036,5 +1039,12 @@ int in_gate_area_no_task(unsigned long addr);
1036/* /proc/<pid>/oom_adj set to -17 protects from the oom-killer */ 1039/* /proc/<pid>/oom_adj set to -17 protects from the oom-killer */
1037#define OOM_DISABLE -17 1040#define OOM_DISABLE -17
1038 1041
1042int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *,
1043 void __user *, size_t *, loff_t *);
1044int shrink_slab(unsigned long scanned, gfp_t gfp_mask,
1045 unsigned long lru_pages);
1046void drop_pagecache(void);
1047void drop_slab(void);
1048
1039#endif /* __KERNEL__ */ 1049#endif /* __KERNEL__ */
1040#endif /* _LINUX_MM_H */ 1050#endif /* _LINUX_MM_H */
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 47762ca695a5..49cc68af01f8 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -38,3 +38,25 @@ del_page_from_lru(struct zone *zone, struct page *page)
38 zone->nr_inactive--; 38 zone->nr_inactive--;
39 } 39 }
40} 40}
41
42/*
43 * Isolate one page from the LRU lists.
44 *
45 * - zone->lru_lock must be held
46 */
47static inline int __isolate_lru_page(struct page *page)
48{
49 if (unlikely(!TestClearPageLRU(page)))
50 return 0;
51
52 if (get_page_testone(page)) {
53 /*
54 * It is being freed elsewhere
55 */
56 __put_page(page);
57 SetPageLRU(page);
58 return -ENOENT;
59 }
60
61 return 1;
62}
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index c34f4a2c62f8..7e4ae6ab1977 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -38,7 +38,7 @@ struct pglist_data;
38#if defined(CONFIG_SMP) 38#if defined(CONFIG_SMP)
39struct zone_padding { 39struct zone_padding {
40 char x[0]; 40 char x[0];
41} ____cacheline_maxaligned_in_smp; 41} ____cacheline_internodealigned_in_smp;
42#define ZONE_PADDING(name) struct zone_padding name; 42#define ZONE_PADDING(name) struct zone_padding name;
43#else 43#else
44#define ZONE_PADDING(name) 44#define ZONE_PADDING(name)
@@ -233,7 +233,7 @@ struct zone {
233 * rarely used fields: 233 * rarely used fields:
234 */ 234 */
235 char *name; 235 char *name;
236} ____cacheline_maxaligned_in_smp; 236} ____cacheline_internodealigned_in_smp;
237 237
238 238
239/* 239/*
@@ -437,6 +437,8 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *, int, struct file *,
437extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1]; 437extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1];
438int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *, 438int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *,
439 void __user *, size_t *, loff_t *); 439 void __user *, size_t *, loff_t *);
440int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *,
441 void __user *, size_t *, loff_t *);
440 442
441#include <linux/topology.h> 443#include <linux/topology.h>
442/* Returns the number of the current Node. */ 444/* Returns the number of the current Node. */
diff --git a/include/linux/mount.h b/include/linux/mount.h
index dd4e83eba933..b98a709f1794 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -22,7 +22,8 @@
22#define MNT_NOEXEC 0x04 22#define MNT_NOEXEC 0x04
23#define MNT_SHARED 0x10 /* if the vfsmount is a shared mount */ 23#define MNT_SHARED 0x10 /* if the vfsmount is a shared mount */
24#define MNT_UNBINDABLE 0x20 /* if the vfsmount is a unbindable mount */ 24#define MNT_UNBINDABLE 0x20 /* if the vfsmount is a unbindable mount */
25#define MNT_PNODE_MASK 0x30 /* propogation flag mask */ 25
26#define MNT_PNODE_MASK (MNT_SHARED | MNT_UNBINDABLE)
26 27
27struct vfsmount { 28struct vfsmount {
28 struct list_head mnt_hash; 29 struct list_head mnt_hash;
diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h
index 941da5c016a0..e933e2a355ad 100644
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -329,7 +329,8 @@ static inline void fatwchar_to16(__u8 *dst, const wchar_t *src, size_t len)
329extern void fat_cache_inval_inode(struct inode *inode); 329extern void fat_cache_inval_inode(struct inode *inode);
330extern int fat_get_cluster(struct inode *inode, int cluster, 330extern int fat_get_cluster(struct inode *inode, int cluster,
331 int *fclus, int *dclus); 331 int *fclus, int *dclus);
332extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys); 332extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
333 unsigned long *mapped_blocks);
333 334
334/* fat/dir.c */ 335/* fat/dir.c */
335extern struct file_operations fat_dir_operations; 336extern struct file_operations fat_dir_operations;
diff --git a/include/linux/ncp.h b/include/linux/ncp.h
index 99f77876b716..99f0adeeb3f3 100644
--- a/include/linux/ncp.h
+++ b/include/linux/ncp.h
@@ -20,29 +20,29 @@
20#define NCP_DEALLOC_SLOT_REQUEST (0x5555) 20#define NCP_DEALLOC_SLOT_REQUEST (0x5555)
21 21
22struct ncp_request_header { 22struct ncp_request_header {
23 __u16 type __attribute__((packed)); 23 __u16 type;
24 __u8 sequence __attribute__((packed)); 24 __u8 sequence;
25 __u8 conn_low __attribute__((packed)); 25 __u8 conn_low;
26 __u8 task __attribute__((packed)); 26 __u8 task;
27 __u8 conn_high __attribute__((packed)); 27 __u8 conn_high;
28 __u8 function __attribute__((packed)); 28 __u8 function;
29 __u8 data[0] __attribute__((packed)); 29 __u8 data[0];
30}; 30} __attribute__((packed));
31 31
32#define NCP_REPLY (0x3333) 32#define NCP_REPLY (0x3333)
33#define NCP_WATCHDOG (0x3E3E) 33#define NCP_WATCHDOG (0x3E3E)
34#define NCP_POSITIVE_ACK (0x9999) 34#define NCP_POSITIVE_ACK (0x9999)
35 35
36struct ncp_reply_header { 36struct ncp_reply_header {
37 __u16 type __attribute__((packed)); 37 __u16 type;
38 __u8 sequence __attribute__((packed)); 38 __u8 sequence;
39 __u8 conn_low __attribute__((packed)); 39 __u8 conn_low;
40 __u8 task __attribute__((packed)); 40 __u8 task;
41 __u8 conn_high __attribute__((packed)); 41 __u8 conn_high;
42 __u8 completion_code __attribute__((packed)); 42 __u8 completion_code;
43 __u8 connection_state __attribute__((packed)); 43 __u8 connection_state;
44 __u8 data[0] __attribute__((packed)); 44 __u8 data[0];
45}; 45} __attribute__((packed));
46 46
47#define NCP_VOLNAME_LEN (16) 47#define NCP_VOLNAME_LEN (16)
48#define NCP_NUMBER_OF_VOLUMES (256) 48#define NCP_NUMBER_OF_VOLUMES (256)
@@ -128,37 +128,37 @@ struct nw_nfs_info {
128}; 128};
129 129
130struct nw_info_struct { 130struct nw_info_struct {
131 __u32 spaceAlloc __attribute__((packed)); 131 __u32 spaceAlloc;
132 __le32 attributes __attribute__((packed)); 132 __le32 attributes;
133 __u16 flags __attribute__((packed)); 133 __u16 flags;
134 __le32 dataStreamSize __attribute__((packed)); 134 __le32 dataStreamSize;
135 __le32 totalStreamSize __attribute__((packed)); 135 __le32 totalStreamSize;
136 __u16 numberOfStreams __attribute__((packed)); 136 __u16 numberOfStreams;
137 __le16 creationTime __attribute__((packed)); 137 __le16 creationTime;
138 __le16 creationDate __attribute__((packed)); 138 __le16 creationDate;
139 __u32 creatorID __attribute__((packed)); 139 __u32 creatorID;
140 __le16 modifyTime __attribute__((packed)); 140 __le16 modifyTime;
141 __le16 modifyDate __attribute__((packed)); 141 __le16 modifyDate;
142 __u32 modifierID __attribute__((packed)); 142 __u32 modifierID;
143 __le16 lastAccessDate __attribute__((packed)); 143 __le16 lastAccessDate;
144 __u16 archiveTime __attribute__((packed)); 144 __u16 archiveTime;
145 __u16 archiveDate __attribute__((packed)); 145 __u16 archiveDate;
146 __u32 archiverID __attribute__((packed)); 146 __u32 archiverID;
147 __u16 inheritedRightsMask __attribute__((packed)); 147 __u16 inheritedRightsMask;
148 __le32 dirEntNum __attribute__((packed)); 148 __le32 dirEntNum;
149 __le32 DosDirNum __attribute__((packed)); 149 __le32 DosDirNum;
150 __u32 volNumber __attribute__((packed)); 150 __u32 volNumber;
151 __u32 EADataSize __attribute__((packed)); 151 __u32 EADataSize;
152 __u32 EAKeyCount __attribute__((packed)); 152 __u32 EAKeyCount;
153 __u32 EAKeySize __attribute__((packed)); 153 __u32 EAKeySize;
154 __u32 NSCreator __attribute__((packed)); 154 __u32 NSCreator;
155 __u8 nameLen __attribute__((packed)); 155 __u8 nameLen;
156 __u8 entryName[256] __attribute__((packed)); 156 __u8 entryName[256];
157 /* libncp may depend on there being nothing after entryName */ 157 /* libncp may depend on there being nothing after entryName */
158#ifdef __KERNEL__ 158#ifdef __KERNEL__
159 struct nw_nfs_info nfs; 159 struct nw_nfs_info nfs;
160#endif 160#endif
161}; 161} __attribute__((packed));
162 162
163/* modify mask - use with MODIFY_DOS_INFO structure */ 163/* modify mask - use with MODIFY_DOS_INFO structure */
164#define DM_ATTRIBUTES (cpu_to_le32(0x02)) 164#define DM_ATTRIBUTES (cpu_to_le32(0x02))
@@ -176,26 +176,26 @@ struct nw_info_struct {
176#define DM_MAXIMUM_SPACE (cpu_to_le32(0x2000)) 176#define DM_MAXIMUM_SPACE (cpu_to_le32(0x2000))
177 177
178struct nw_modify_dos_info { 178struct nw_modify_dos_info {
179 __le32 attributes __attribute__((packed)); 179 __le32 attributes;
180 __le16 creationDate __attribute__((packed)); 180 __le16 creationDate;
181 __le16 creationTime __attribute__((packed)); 181 __le16 creationTime;
182 __u32 creatorID __attribute__((packed)); 182 __u32 creatorID;
183 __le16 modifyDate __attribute__((packed)); 183 __le16 modifyDate;
184 __le16 modifyTime __attribute__((packed)); 184 __le16 modifyTime;
185 __u32 modifierID __attribute__((packed)); 185 __u32 modifierID;
186 __u16 archiveDate __attribute__((packed)); 186 __u16 archiveDate;
187 __u16 archiveTime __attribute__((packed)); 187 __u16 archiveTime;
188 __u32 archiverID __attribute__((packed)); 188 __u32 archiverID;
189 __le16 lastAccessDate __attribute__((packed)); 189 __le16 lastAccessDate;
190 __u16 inheritanceGrantMask __attribute__((packed)); 190 __u16 inheritanceGrantMask;
191 __u16 inheritanceRevokeMask __attribute__((packed)); 191 __u16 inheritanceRevokeMask;
192 __u32 maximumSpace __attribute__((packed)); 192 __u32 maximumSpace;
193}; 193} __attribute__((packed));
194 194
195struct nw_search_sequence { 195struct nw_search_sequence {
196 __u8 volNumber __attribute__((packed)); 196 __u8 volNumber;
197 __u32 dirBase __attribute__((packed)); 197 __u32 dirBase;
198 __u32 sequence __attribute__((packed)); 198 __u32 sequence;
199}; 199} __attribute__((packed));
200 200
201#endif /* _LINUX_NCP_H */ 201#endif /* _LINUX_NCP_H */
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index be365e70ee99..4cf6088625c1 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -168,6 +168,37 @@ void nf_log_packet(int pf,
168 const struct net_device *out, 168 const struct net_device *out,
169 struct nf_loginfo *li, 169 struct nf_loginfo *li,
170 const char *fmt, ...); 170 const char *fmt, ...);
171
172int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
173 struct net_device *indev, struct net_device *outdev,
174 int (*okfn)(struct sk_buff *), int thresh);
175
176/**
177 * nf_hook_thresh - call a netfilter hook
178 *
179 * Returns 1 if the hook has allowed the packet to pass. The function
180 * okfn must be invoked by the caller in this case. Any other return
181 * value indicates the packet has been consumed by the hook.
182 */
183static inline int nf_hook_thresh(int pf, unsigned int hook,
184 struct sk_buff **pskb,
185 struct net_device *indev,
186 struct net_device *outdev,
187 int (*okfn)(struct sk_buff *), int thresh)
188{
189#ifndef CONFIG_NETFILTER_DEBUG
190 if (list_empty(&nf_hooks[pf][hook]))
191 return 1;
192#endif
193 return nf_hook_slow(pf, hook, pskb, indev, outdev, okfn, thresh);
194}
195
196static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb,
197 struct net_device *indev, struct net_device *outdev,
198 int (*okfn)(struct sk_buff *))
199{
200 return nf_hook_thresh(pf, hook, pskb, indev, outdev, okfn, INT_MIN);
201}
171 202
172/* Activate hook; either okfn or kfree_skb called, unless a hook 203/* Activate hook; either okfn or kfree_skb called, unless a hook
173 returns NF_STOLEN (in which case, it's up to the hook to deal with 204 returns NF_STOLEN (in which case, it's up to the hook to deal with
@@ -188,35 +219,17 @@ void nf_log_packet(int pf,
188 219
189/* This is gross, but inline doesn't cut it for avoiding the function 220/* This is gross, but inline doesn't cut it for avoiding the function
190 call in fast path: gcc doesn't inline (needs value tracking?). --RR */ 221 call in fast path: gcc doesn't inline (needs value tracking?). --RR */
191#ifdef CONFIG_NETFILTER_DEBUG 222
192#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \ 223/* HX: It's slightly less gross now. */
193({int __ret; \ 224
194if ((__ret=nf_hook_slow(pf, hook, &(skb), indev, outdev, okfn, INT_MIN)) == 1) \
195 __ret = (okfn)(skb); \
196__ret;})
197#define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \
198({int __ret; \
199if ((__ret=nf_hook_slow(pf, hook, &(skb), indev, outdev, okfn, thresh)) == 1) \
200 __ret = (okfn)(skb); \
201__ret;})
202#else
203#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \
204({int __ret; \
205if (list_empty(&nf_hooks[pf][hook]) || \
206 (__ret=nf_hook_slow(pf, hook, &(skb), indev, outdev, okfn, INT_MIN)) == 1) \
207 __ret = (okfn)(skb); \
208__ret;})
209#define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \ 225#define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \
210({int __ret; \ 226({int __ret; \
211if (list_empty(&nf_hooks[pf][hook]) || \ 227if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, thresh)) == 1)\
212 (__ret=nf_hook_slow(pf, hook, &(skb), indev, outdev, okfn, thresh)) == 1) \
213 __ret = (okfn)(skb); \ 228 __ret = (okfn)(skb); \
214__ret;}) 229__ret;})
215#endif
216 230
217int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb, 231#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \
218 struct net_device *indev, struct net_device *outdev, 232 NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, INT_MIN)
219 int (*okfn)(struct sk_buff *), int thresh);
220 233
221/* Call setsockopt() */ 234/* Call setsockopt() */
222int nf_setsockopt(struct sock *sk, int pf, int optval, char __user *opt, 235int nf_setsockopt(struct sock *sk, int pf, int optval, char __user *opt,
@@ -261,6 +274,20 @@ struct nf_queue_rerouter {
261extern int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer); 274extern int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer);
262extern int nf_unregister_queue_rerouter(int pf); 275extern int nf_unregister_queue_rerouter(int pf);
263 276
277#include <net/flow.h>
278extern void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *);
279
280static inline void
281nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, int family)
282{
283#ifdef CONFIG_IP_NF_NAT_NEEDED
284 void (*decodefn)(struct sk_buff *, struct flowi *);
285
286 if (family == AF_INET && (decodefn = ip_nat_decode_session) != NULL)
287 decodefn(skb, fl);
288#endif
289}
290
264#ifdef CONFIG_PROC_FS 291#ifdef CONFIG_PROC_FS
265#include <linux/proc_fs.h> 292#include <linux/proc_fs.h>
266extern struct proc_dir_entry *proc_net_netfilter; 293extern struct proc_dir_entry *proc_net_netfilter;
@@ -268,7 +295,24 @@ extern struct proc_dir_entry *proc_net_netfilter;
268 295
269#else /* !CONFIG_NETFILTER */ 296#else /* !CONFIG_NETFILTER */
270#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) 297#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb)
298static inline int nf_hook_thresh(int pf, unsigned int hook,
299 struct sk_buff **pskb,
300 struct net_device *indev,
301 struct net_device *outdev,
302 int (*okfn)(struct sk_buff *), int thresh)
303{
304 return okfn(*pskb);
305}
306static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb,
307 struct net_device *indev, struct net_device *outdev,
308 int (*okfn)(struct sk_buff *))
309{
310 return okfn(*pskb);
311}
271static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} 312static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
313struct flowi;
314static inline void
315nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, int family) {}
272#endif /*CONFIG_NETFILTER*/ 316#endif /*CONFIG_NETFILTER*/
273 317
274#endif /*__KERNEL__*/ 318#endif /*__KERNEL__*/
diff --git a/include/linux/netfilter_ipv4/ipt_policy.h b/include/linux/netfilter_ipv4/ipt_policy.h
new file mode 100644
index 000000000000..7fd1bec453f1
--- /dev/null
+++ b/include/linux/netfilter_ipv4/ipt_policy.h
@@ -0,0 +1,52 @@
1#ifndef _IPT_POLICY_H
2#define _IPT_POLICY_H
3
4#define IPT_POLICY_MAX_ELEM 4
5
6enum ipt_policy_flags
7{
8 IPT_POLICY_MATCH_IN = 0x1,
9 IPT_POLICY_MATCH_OUT = 0x2,
10 IPT_POLICY_MATCH_NONE = 0x4,
11 IPT_POLICY_MATCH_STRICT = 0x8,
12};
13
14enum ipt_policy_modes
15{
16 IPT_POLICY_MODE_TRANSPORT,
17 IPT_POLICY_MODE_TUNNEL
18};
19
20struct ipt_policy_spec
21{
22 u_int8_t saddr:1,
23 daddr:1,
24 proto:1,
25 mode:1,
26 spi:1,
27 reqid:1;
28};
29
30struct ipt_policy_elem
31{
32 u_int32_t saddr;
33 u_int32_t smask;
34 u_int32_t daddr;
35 u_int32_t dmask;
36 u_int32_t spi;
37 u_int32_t reqid;
38 u_int8_t proto;
39 u_int8_t mode;
40
41 struct ipt_policy_spec match;
42 struct ipt_policy_spec invert;
43};
44
45struct ipt_policy_info
46{
47 struct ipt_policy_elem pol[IPT_POLICY_MAX_ELEM];
48 u_int16_t flags;
49 u_int16_t len;
50};
51
52#endif /* _IPT_POLICY_H */
diff --git a/include/linux/netfilter_ipv6/ip6t_policy.h b/include/linux/netfilter_ipv6/ip6t_policy.h
new file mode 100644
index 000000000000..5a93afcd2ff1
--- /dev/null
+++ b/include/linux/netfilter_ipv6/ip6t_policy.h
@@ -0,0 +1,52 @@
1#ifndef _IP6T_POLICY_H
2#define _IP6T_POLICY_H
3
4#define IP6T_POLICY_MAX_ELEM 4
5
6enum ip6t_policy_flags
7{
8 IP6T_POLICY_MATCH_IN = 0x1,
9 IP6T_POLICY_MATCH_OUT = 0x2,
10 IP6T_POLICY_MATCH_NONE = 0x4,
11 IP6T_POLICY_MATCH_STRICT = 0x8,
12};
13
14enum ip6t_policy_modes
15{
16 IP6T_POLICY_MODE_TRANSPORT,
17 IP6T_POLICY_MODE_TUNNEL
18};
19
20struct ip6t_policy_spec
21{
22 u_int8_t saddr:1,
23 daddr:1,
24 proto:1,
25 mode:1,
26 spi:1,
27 reqid:1;
28};
29
30struct ip6t_policy_elem
31{
32 struct in6_addr saddr;
33 struct in6_addr smask;
34 struct in6_addr daddr;
35 struct in6_addr dmask;
36 u_int32_t spi;
37 u_int32_t reqid;
38 u_int8_t proto;
39 u_int8_t mode;
40
41 struct ip6t_policy_spec match;
42 struct ip6t_policy_spec invert;
43};
44
45struct ip6t_policy_info
46{
47 struct ip6t_policy_elem pol[IP6T_POLICY_MAX_ELEM];
48 u_int16_t flags;
49 u_int16_t len;
50};
51
52#endif /* _IP6T_POLICY_H */
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index def32c5715be..8eb7fa76c1d0 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -5,6 +5,9 @@
5 * pages. A pagevec is a multipage container which is used for that. 5 * pages. A pagevec is a multipage container which is used for that.
6 */ 6 */
7 7
8#ifndef _LINUX_PAGEVEC_H
9#define _LINUX_PAGEVEC_H
10
8/* 14 pointers + two long's align the pagevec structure to a power of two */ 11/* 14 pointers + two long's align the pagevec structure to a power of two */
9#define PAGEVEC_SIZE 14 12#define PAGEVEC_SIZE 14
10 13
@@ -83,3 +86,5 @@ static inline void pagevec_lru_add(struct pagevec *pvec)
83 if (pagevec_count(pvec)) 86 if (pagevec_count(pvec))
84 __pagevec_lru_add(pvec); 87 __pagevec_lru_add(pvec);
85} 88}
89
90#endif /* _LINUX_PAGEVEC_H */
diff --git a/include/linux/parport.h b/include/linux/parport.h
index f7ff0b0c4031..f67f838a3a1f 100644
--- a/include/linux/parport.h
+++ b/include/linux/parport.h
@@ -236,12 +236,14 @@ struct pardevice {
236 236
237/* IEEE1284 information */ 237/* IEEE1284 information */
238 238
239/* IEEE1284 phases */ 239/* IEEE1284 phases. These are exposed to userland through ppdev IOCTL
240 * PP[GS]ETPHASE, so do not change existing values. */
240enum ieee1284_phase { 241enum ieee1284_phase {
241 IEEE1284_PH_FWD_DATA, 242 IEEE1284_PH_FWD_DATA,
242 IEEE1284_PH_FWD_IDLE, 243 IEEE1284_PH_FWD_IDLE,
243 IEEE1284_PH_TERMINATE, 244 IEEE1284_PH_TERMINATE,
244 IEEE1284_PH_NEGOTIATION, 245 IEEE1284_PH_NEGOTIATION,
246 IEEE1284_PH_HBUSY_DNA,
245 IEEE1284_PH_REV_IDLE, 247 IEEE1284_PH_REV_IDLE,
246 IEEE1284_PH_HBUSY_DAVAIL, 248 IEEE1284_PH_HBUSY_DAVAIL,
247 IEEE1284_PH_REV_DATA, 249 IEEE1284_PH_REV_DATA,
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index fb8d2d24e4bb..cb9039a21f2a 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -19,7 +19,6 @@
19 19
20struct percpu_data { 20struct percpu_data {
21 void *ptrs[NR_CPUS]; 21 void *ptrs[NR_CPUS];
22 void *blkp;
23}; 22};
24 23
25/* 24/*
@@ -33,14 +32,14 @@ struct percpu_data {
33 (__typeof__(ptr))__p->ptrs[(cpu)]; \ 32 (__typeof__(ptr))__p->ptrs[(cpu)]; \
34}) 33})
35 34
36extern void *__alloc_percpu(size_t size, size_t align); 35extern void *__alloc_percpu(size_t size);
37extern void free_percpu(const void *); 36extern void free_percpu(const void *);
38 37
39#else /* CONFIG_SMP */ 38#else /* CONFIG_SMP */
40 39
41#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) 40#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
42 41
43static inline void *__alloc_percpu(size_t size, size_t align) 42static inline void *__alloc_percpu(size_t size)
44{ 43{
45 void *ret = kmalloc(size, GFP_KERNEL); 44 void *ret = kmalloc(size, GFP_KERNEL);
46 if (ret) 45 if (ret)
@@ -55,7 +54,6 @@ static inline void free_percpu(const void *ptr)
55#endif /* CONFIG_SMP */ 54#endif /* CONFIG_SMP */
56 55
57/* Simple wrapper for the common case: zeros memory. */ 56/* Simple wrapper for the common case: zeros memory. */
58#define alloc_percpu(type) \ 57#define alloc_percpu(type) ((type *)(__alloc_percpu(sizeof(type))))
59 ((type *)(__alloc_percpu(sizeof(type), __alignof__(type))))
60 58
61#endif /* __LINUX_PERCPU_H */ 59#endif /* __LINUX_PERCPU_H */
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index b2b3dba1298d..9d5cd106b344 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -20,8 +20,6 @@
20#define PTRACE_DETACH 0x11 20#define PTRACE_DETACH 0x11
21 21
22#define PTRACE_SYSCALL 24 22#define PTRACE_SYSCALL 24
23#define PTRACE_SYSEMU 31
24#define PTRACE_SYSEMU_SINGLESTEP 32
25 23
26/* 0x4200-0x4300 are reserved for architecture-independent additions. */ 24/* 0x4200-0x4300 are reserved for architecture-independent additions. */
27#define PTRACE_SETOPTIONS 0x4200 25#define PTRACE_SETOPTIONS 0x4200
@@ -80,6 +78,8 @@
80 78
81 79
82extern long arch_ptrace(struct task_struct *child, long request, long addr, long data); 80extern long arch_ptrace(struct task_struct *child, long request, long addr, long data);
81extern struct task_struct *ptrace_get_task_struct(pid_t pid);
82extern int ptrace_traceme(void);
83extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len); 83extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len);
84extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len); 84extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len);
85extern int ptrace_attach(struct task_struct *tsk); 85extern int ptrace_attach(struct task_struct *tsk);
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 36e5d269612f..c57ff2fcb30a 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -19,6 +19,7 @@
19#ifndef _LINUX_RADIX_TREE_H 19#ifndef _LINUX_RADIX_TREE_H
20#define _LINUX_RADIX_TREE_H 20#define _LINUX_RADIX_TREE_H
21 21
22#include <linux/sched.h>
22#include <linux/preempt.h> 23#include <linux/preempt.h>
23#include <linux/types.h> 24#include <linux/types.h>
24 25
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index a471f3bb713e..51747cd88d1a 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -65,7 +65,7 @@ struct rcu_ctrlblk {
65 long cur; /* Current batch number. */ 65 long cur; /* Current batch number. */
66 long completed; /* Number of the last completed batch */ 66 long completed; /* Number of the last completed batch */
67 int next_pending; /* Is the next batch already waiting? */ 67 int next_pending; /* Is the next batch already waiting? */
68} ____cacheline_maxaligned_in_smp; 68} ____cacheline_internodealigned_in_smp;
69 69
70/* Is batch a before batch b ? */ 70/* Is batch a before batch b ? */
71static inline int rcu_batch_before(long a, long b) 71static inline int rcu_batch_before(long a, long b)
diff --git a/include/linux/rcuref.h b/include/linux/rcuref.h
deleted file mode 100644
index e1adbba14b67..000000000000
--- a/include/linux/rcuref.h
+++ /dev/null
@@ -1,220 +0,0 @@
1/*
2 * rcuref.h
3 *
4 * Reference counting for elements of lists/arrays protected by
5 * RCU.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 *
21 * Copyright (C) IBM Corporation, 2005
22 *
23 * Author: Dipankar Sarma <dipankar@in.ibm.com>
24 * Ravikiran Thirumalai <kiran_th@gmail.com>
25 *
26 * See Documentation/RCU/rcuref.txt for detailed user guide.
27 *
28 */
29
30#ifndef _RCUREF_H_
31#define _RCUREF_H_
32
33#ifdef __KERNEL__
34
35#include <linux/types.h>
36#include <linux/interrupt.h>
37#include <linux/spinlock.h>
38#include <asm/atomic.h>
39
40/*
41 * These APIs work on traditional atomic_t counters used in the
42 * kernel for reference counting. Under special circumstances
43 * where a lock-free get() operation races with a put() operation
44 * these APIs can be used. See Documentation/RCU/rcuref.txt.
45 */
46
47#ifdef __HAVE_ARCH_CMPXCHG
48
49/**
50 * rcuref_inc - increment refcount for object.
51 * @rcuref: reference counter in the object in question.
52 *
53 * This should be used only for objects where we use RCU and
54 * use the rcuref_inc_lf() api to acquire a reference
55 * in a lock-free reader-side critical section.
56 */
57static inline void rcuref_inc(atomic_t *rcuref)
58{
59 atomic_inc(rcuref);
60}
61
62/**
63 * rcuref_dec - decrement refcount for object.
64 * @rcuref: reference counter in the object in question.
65 *
66 * This should be used only for objects where we use RCU and
67 * use the rcuref_inc_lf() api to acquire a reference
68 * in a lock-free reader-side critical section.
69 */
70static inline void rcuref_dec(atomic_t *rcuref)
71{
72 atomic_dec(rcuref);
73}
74
75/**
76 * rcuref_dec_and_test - decrement refcount for object and test
77 * @rcuref: reference counter in the object.
78 * @release: pointer to the function that will clean up the object
79 * when the last reference to the object is released.
80 * This pointer is required.
81 *
82 * Decrement the refcount, and if 0, return 1. Else return 0.
83 *
84 * This should be used only for objects where we use RCU and
85 * use the rcuref_inc_lf() api to acquire a reference
86 * in a lock-free reader-side critical section.
87 */
88static inline int rcuref_dec_and_test(atomic_t *rcuref)
89{
90 return atomic_dec_and_test(rcuref);
91}
92
93/*
94 * cmpxchg is needed on UP too, if deletions to the list/array can happen
95 * in interrupt context.
96 */
97
98/**
99 * rcuref_inc_lf - Take reference to an object in a read-side
100 * critical section protected by RCU.
101 * @rcuref: reference counter in the object in question.
102 *
103 * Try and increment the refcount by 1. The increment might fail if
104 * the reference counter has been through a 1 to 0 transition and
105 * is no longer part of the lock-free list.
106 * Returns non-zero on successful increment and zero otherwise.
107 */
108static inline int rcuref_inc_lf(atomic_t *rcuref)
109{
110 int c, old;
111 c = atomic_read(rcuref);
112 while (c && (old = cmpxchg(&rcuref->counter, c, c + 1)) != c)
113 c = old;
114 return c;
115}
116
117#else /* !__HAVE_ARCH_CMPXCHG */
118
119extern spinlock_t __rcuref_hash[];
120
121/*
122 * Use a hash table of locks to protect the reference count
123 * since cmpxchg is not available in this arch.
124 */
125#ifdef CONFIG_SMP
126#define RCUREF_HASH_SIZE 4
127#define RCUREF_HASH(k) \
128 (&__rcuref_hash[(((unsigned long)k)>>8) & (RCUREF_HASH_SIZE-1)])
129#else
130#define RCUREF_HASH_SIZE 1
131#define RCUREF_HASH(k) &__rcuref_hash[0]
132#endif /* CONFIG_SMP */
133
134/**
135 * rcuref_inc - increment refcount for object.
136 * @rcuref: reference counter in the object in question.
137 *
138 * This should be used only for objects where we use RCU and
139 * use the rcuref_inc_lf() api to acquire a reference in a lock-free
140 * reader-side critical section.
141 */
142static inline void rcuref_inc(atomic_t *rcuref)
143{
144 unsigned long flags;
145 spin_lock_irqsave(RCUREF_HASH(rcuref), flags);
146 rcuref->counter += 1;
147 spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags);
148}
149
150/**
151 * rcuref_dec - decrement refcount for object.
152 * @rcuref: reference counter in the object in question.
153 *
154 * This should be used only for objects where we use RCU and
155 * use the rcuref_inc_lf() api to acquire a reference in a lock-free
156 * reader-side critical section.
157 */
158static inline void rcuref_dec(atomic_t *rcuref)
159{
160 unsigned long flags;
161 spin_lock_irqsave(RCUREF_HASH(rcuref), flags);
162 rcuref->counter -= 1;
163 spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags);
164}
165
166/**
167 * rcuref_dec_and_test - decrement refcount for object and test
168 * @rcuref: reference counter in the object.
169 * @release: pointer to the function that will clean up the object
170 * when the last reference to the object is released.
171 * This pointer is required.
172 *
173 * Decrement the refcount, and if 0, return 1. Else return 0.
174 *
175 * This should be used only for objects where we use RCU and
176 * use the rcuref_inc_lf() api to acquire a reference in a lock-free
177 * reader-side critical section.
178 */
179static inline int rcuref_dec_and_test(atomic_t *rcuref)
180{
181 unsigned long flags;
182 spin_lock_irqsave(RCUREF_HASH(rcuref), flags);
183 rcuref->counter--;
184 if (!rcuref->counter) {
185 spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags);
186 return 1;
187 } else {
188 spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags);
189 return 0;
190 }
191}
192
193/**
194 * rcuref_inc_lf - Take reference to an object of a lock-free collection
195 * by traversing a lock-free list/array.
196 * @rcuref: reference counter in the object in question.
197 *
198 * Try and increment the refcount by 1. The increment might fail if
199 * the reference counter has been through a 1 to 0 transition and
200 * object is no longer part of the lock-free list.
201 * Returns non-zero on successful increment and zero otherwise.
202 */
203static inline int rcuref_inc_lf(atomic_t *rcuref)
204{
205 int ret;
206 unsigned long flags;
207 spin_lock_irqsave(RCUREF_HASH(rcuref), flags);
208 if (rcuref->counter)
209 ret = rcuref->counter++;
210 else
211 ret = 0;
212 spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags);
213 return ret;
214}
215
216
217#endif /* !__HAVE_ARCH_CMPXCHG */
218
219#endif /* __KERNEL__ */
220#endif /* _RCUREF_H_ */
diff --git a/include/linux/relayfs_fs.h b/include/linux/relayfs_fs.h
index fb7e80737325..7342e66247fb 100644
--- a/include/linux/relayfs_fs.h
+++ b/include/linux/relayfs_fs.h
@@ -65,20 +65,6 @@ struct rchan
65}; 65};
66 66
67/* 67/*
68 * Relayfs inode
69 */
70struct relayfs_inode_info
71{
72 struct inode vfs_inode;
73 struct rchan_buf *buf;
74};
75
76static inline struct relayfs_inode_info *RELAYFS_I(struct inode *inode)
77{
78 return container_of(inode, struct relayfs_inode_info, vfs_inode);
79}
80
81/*
82 * Relay channel client callbacks 68 * Relay channel client callbacks
83 */ 69 */
84struct rchan_callbacks 70struct rchan_callbacks
@@ -124,6 +110,46 @@ struct rchan_callbacks
124 */ 110 */
125 void (*buf_unmapped)(struct rchan_buf *buf, 111 void (*buf_unmapped)(struct rchan_buf *buf,
126 struct file *filp); 112 struct file *filp);
113 /*
114 * create_buf_file - create file to represent a relayfs channel buffer
115 * @filename: the name of the file to create
116 * @parent: the parent of the file to create
117 * @mode: the mode of the file to create
118 * @buf: the channel buffer
119 * @is_global: outparam - set non-zero if the buffer should be global
120 *
121 * Called during relay_open(), once for each per-cpu buffer,
122 * to allow the client to create a file to be used to
123 * represent the corresponding channel buffer. If the file is
124 * created outside of relayfs, the parent must also exist in
125 * that filesystem.
126 *
127 * The callback should return the dentry of the file created
128 * to represent the relay buffer.
129 *
130 * Setting the is_global outparam to a non-zero value will
131 * cause relay_open() to create a single global buffer rather
132 * than the default set of per-cpu buffers.
133 *
134 * See Documentation/filesystems/relayfs.txt for more info.
135 */
136 struct dentry *(*create_buf_file)(const char *filename,
137 struct dentry *parent,
138 int mode,
139 struct rchan_buf *buf,
140 int *is_global);
141
142 /*
143 * remove_buf_file - remove file representing a relayfs channel buffer
144 * @dentry: the dentry of the file to remove
145 *
146 * Called during relay_close(), once for each per-cpu buffer,
147 * to allow the client to remove a file used to represent a
148 * channel buffer.
149 *
150 * The callback should return 0 if successful, negative if not.
151 */
152 int (*remove_buf_file)(struct dentry *dentry);
127}; 153};
128 154
129/* 155/*
@@ -148,6 +174,12 @@ extern size_t relay_switch_subbuf(struct rchan_buf *buf,
148extern struct dentry *relayfs_create_dir(const char *name, 174extern struct dentry *relayfs_create_dir(const char *name,
149 struct dentry *parent); 175 struct dentry *parent);
150extern int relayfs_remove_dir(struct dentry *dentry); 176extern int relayfs_remove_dir(struct dentry *dentry);
177extern struct dentry *relayfs_create_file(const char *name,
178 struct dentry *parent,
179 int mode,
180 struct file_operations *fops,
181 void *data);
182extern int relayfs_remove_file(struct dentry *dentry);
151 183
152/** 184/**
153 * relay_write - write data into the channel 185 * relay_write - write data into the channel
@@ -247,10 +279,9 @@ static inline void subbuf_start_reserve(struct rchan_buf *buf,
247} 279}
248 280
249/* 281/*
250 * exported relayfs file operations, fs/relayfs/inode.c 282 * exported relay file operations, fs/relayfs/inode.c
251 */ 283 */
252 284extern struct file_operations relay_file_operations;
253extern struct file_operations relayfs_file_operations;
254 285
255#endif /* _LINUX_RELAYFS_FS_H */ 286#endif /* _LINUX_RELAYFS_FS_H */
256 287
diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h
index 3bd7cce19e26..157d7e3236b5 100644
--- a/include/linux/rio_drv.h
+++ b/include/linux/rio_drv.h
@@ -21,6 +21,7 @@
21#include <linux/list.h> 21#include <linux/list.h>
22#include <linux/errno.h> 22#include <linux/errno.h>
23#include <linux/device.h> 23#include <linux/device.h>
24#include <linux/string.h>
24#include <linux/rio.h> 25#include <linux/rio.h>
25 26
26extern int __rio_local_read_config_32(struct rio_mport *port, u32 offset, 27extern int __rio_local_read_config_32(struct rio_mport *port, u32 offset,
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index e1aaf1fac8e0..0b2ba67ff13c 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -11,6 +11,8 @@
11#ifndef _LINUX_RTC_H_ 11#ifndef _LINUX_RTC_H_
12#define _LINUX_RTC_H_ 12#define _LINUX_RTC_H_
13 13
14#include <linux/interrupt.h>
15
14/* 16/*
15 * The struct used to pass data via the following ioctl. Similar to the 17 * The struct used to pass data via the following ioctl. Similar to the
16 * struct tm in <time.h>, but it needs to be here so that the kernel 18 * struct tm in <time.h>, but it needs to be here so that the kernel
@@ -102,6 +104,7 @@ int rtc_register(rtc_task_t *task);
102int rtc_unregister(rtc_task_t *task); 104int rtc_unregister(rtc_task_t *task);
103int rtc_control(rtc_task_t *t, unsigned int cmd, unsigned long arg); 105int rtc_control(rtc_task_t *t, unsigned int cmd, unsigned long arg);
104void rtc_get_rtc_time(struct rtc_time *rtc_tm); 106void rtc_get_rtc_time(struct rtc_time *rtc_tm);
107irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs);
105 108
106#endif /* __KERNEL__ */ 109#endif /* __KERNEL__ */
107 110
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7da33619d5d0..78eb92ae4d94 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -34,6 +34,7 @@
34#include <linux/percpu.h> 34#include <linux/percpu.h>
35#include <linux/topology.h> 35#include <linux/topology.h>
36#include <linux/seccomp.h> 36#include <linux/seccomp.h>
37#include <linux/rcupdate.h>
37 38
38#include <linux/auxvec.h> /* For AT_VECTOR_SIZE */ 39#include <linux/auxvec.h> /* For AT_VECTOR_SIZE */
39 40
@@ -350,8 +351,16 @@ struct sighand_struct {
350 atomic_t count; 351 atomic_t count;
351 struct k_sigaction action[_NSIG]; 352 struct k_sigaction action[_NSIG];
352 spinlock_t siglock; 353 spinlock_t siglock;
354 struct rcu_head rcu;
353}; 355};
354 356
357extern void sighand_free_cb(struct rcu_head *rhp);
358
359static inline void sighand_free(struct sighand_struct *sp)
360{
361 call_rcu(&sp->rcu, sighand_free_cb);
362}
363
355/* 364/*
356 * NOTE! "signal_struct" does not have it's own 365 * NOTE! "signal_struct" does not have it's own
357 * locking, because a shared signal_struct always 366 * locking, because a shared signal_struct always
@@ -762,6 +771,7 @@ struct task_struct {
762 unsigned keep_capabilities:1; 771 unsigned keep_capabilities:1;
763 struct user_struct *user; 772 struct user_struct *user;
764#ifdef CONFIG_KEYS 773#ifdef CONFIG_KEYS
774 struct key *request_key_auth; /* assumed request_key authority */
765 struct key *thread_keyring; /* keyring private to this thread */ 775 struct key *thread_keyring; /* keyring private to this thread */
766 unsigned char jit_keyring; /* default keyring to attach requested keys to */ 776 unsigned char jit_keyring; /* default keyring to attach requested keys to */
767#endif 777#endif
@@ -844,6 +854,7 @@ struct task_struct {
844 int cpuset_mems_generation; 854 int cpuset_mems_generation;
845#endif 855#endif
846 atomic_t fs_excl; /* holding fs exclusive resources */ 856 atomic_t fs_excl; /* holding fs exclusive resources */
857 struct rcu_head rcu;
847}; 858};
848 859
849static inline pid_t process_group(struct task_struct *tsk) 860static inline pid_t process_group(struct task_struct *tsk)
@@ -867,8 +878,14 @@ static inline int pid_alive(struct task_struct *p)
867extern void free_task(struct task_struct *tsk); 878extern void free_task(struct task_struct *tsk);
868extern void __put_task_struct(struct task_struct *tsk); 879extern void __put_task_struct(struct task_struct *tsk);
869#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) 880#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
870#define put_task_struct(tsk) \ 881
871do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0) 882extern void __put_task_struct_cb(struct rcu_head *rhp);
883
884static inline void put_task_struct(struct task_struct *t)
885{
886 if (atomic_dec_and_test(&t->usage))
887 call_rcu(&t->rcu, __put_task_struct_cb);
888}
872 889
873/* 890/*
874 * Per process flags 891 * Per process flags
@@ -895,6 +912,7 @@ do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0)
895#define PF_SYNCWRITE 0x00200000 /* I am doing a sync write */ 912#define PF_SYNCWRITE 0x00200000 /* I am doing a sync write */
896#define PF_BORROWED_MM 0x00400000 /* I am a kthread doing use_mm */ 913#define PF_BORROWED_MM 0x00400000 /* I am a kthread doing use_mm */
897#define PF_RANDOMIZE 0x00800000 /* randomize virtual address space */ 914#define PF_RANDOMIZE 0x00800000 /* randomize virtual address space */
915#define PF_SWAPWRITE 0x01000000 /* Allowed to write to swap */
898 916
899/* 917/*
900 * Only the _current_ task can read/write to tsk->flags, but other 918 * Only the _current_ task can read/write to tsk->flags, but other
diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h
new file mode 100644
index 000000000000..76850b75b3f6
--- /dev/null
+++ b/include/linux/screen_info.h
@@ -0,0 +1,77 @@
1#ifndef _SCREEN_INFO_H
2#define _SCREEN_INFO_H
3
4#include <linux/types.h>
5
6/*
7 * These are set up by the setup-routine at boot-time:
8 */
9
10struct screen_info {
11 u8 orig_x; /* 0x00 */
12 u8 orig_y; /* 0x01 */
13 u16 dontuse1; /* 0x02 -- EXT_MEM_K sits here */
14 u16 orig_video_page; /* 0x04 */
15 u8 orig_video_mode; /* 0x06 */
16 u8 orig_video_cols; /* 0x07 */
17 u16 unused2; /* 0x08 */
18 u16 orig_video_ega_bx; /* 0x0a */
19 u16 unused3; /* 0x0c */
20 u8 orig_video_lines; /* 0x0e */
21 u8 orig_video_isVGA; /* 0x0f */
22 u16 orig_video_points; /* 0x10 */
23
24 /* VESA graphic mode -- linear frame buffer */
25 u16 lfb_width; /* 0x12 */
26 u16 lfb_height; /* 0x14 */
27 u16 lfb_depth; /* 0x16 */
28 u32 lfb_base; /* 0x18 */
29 u32 lfb_size; /* 0x1c */
30 u16 dontuse2, dontuse3; /* 0x20 -- CL_MAGIC and CL_OFFSET here */
31 u16 lfb_linelength; /* 0x24 */
32 u8 red_size; /* 0x26 */
33 u8 red_pos; /* 0x27 */
34 u8 green_size; /* 0x28 */
35 u8 green_pos; /* 0x29 */
36 u8 blue_size; /* 0x2a */
37 u8 blue_pos; /* 0x2b */
38 u8 rsvd_size; /* 0x2c */
39 u8 rsvd_pos; /* 0x2d */
40 u16 vesapm_seg; /* 0x2e */
41 u16 vesapm_off; /* 0x30 */
42 u16 pages; /* 0x32 */
43 u16 vesa_attributes; /* 0x34 */
44 u32 capabilities; /* 0x36 */
45 /* 0x3a -- 0x3f reserved for future expansion */
46};
47
48extern struct screen_info screen_info;
49
50#define ORIG_X (screen_info.orig_x)
51#define ORIG_Y (screen_info.orig_y)
52#define ORIG_VIDEO_MODE (screen_info.orig_video_mode)
53#define ORIG_VIDEO_COLS (screen_info.orig_video_cols)
54#define ORIG_VIDEO_EGA_BX (screen_info.orig_video_ega_bx)
55#define ORIG_VIDEO_LINES (screen_info.orig_video_lines)
56#define ORIG_VIDEO_ISVGA (screen_info.orig_video_isVGA)
57#define ORIG_VIDEO_POINTS (screen_info.orig_video_points)
58
59#define VIDEO_TYPE_MDA 0x10 /* Monochrome Text Display */
60#define VIDEO_TYPE_CGA 0x11 /* CGA Display */
61#define VIDEO_TYPE_EGAM 0x20 /* EGA/VGA in Monochrome Mode */
62#define VIDEO_TYPE_EGAC 0x21 /* EGA in Color Mode */
63#define VIDEO_TYPE_VGAC 0x22 /* VGA+ in Color Mode */
64#define VIDEO_TYPE_VLFB 0x23 /* VESA VGA in graphic mode */
65
66#define VIDEO_TYPE_PICA_S3 0x30 /* ACER PICA-61 local S3 video */
67#define VIDEO_TYPE_MIPS_G364 0x31 /* MIPS Magnum 4000 G364 video */
68#define VIDEO_TYPE_SGI 0x33 /* Various SGI graphics hardware */
69
70#define VIDEO_TYPE_TGAC 0x40 /* DEC TGA */
71
72#define VIDEO_TYPE_SUN 0x50 /* Sun frame buffer. */
73#define VIDEO_TYPE_SUNPCI 0x51 /* Sun PCI based frame buffer. */
74
75#define VIDEO_TYPE_PMAC 0x60 /* PowerMacintosh frame buffer. */
76
77#endif /* _SCREEN_INFO_H */
diff --git a/include/linux/sdla.h b/include/linux/sdla.h
index 3b6afb8caa42..564acd3a71c1 100644
--- a/include/linux/sdla.h
+++ b/include/linux/sdla.h
@@ -293,46 +293,46 @@ void sdla(void *cfg_info, char *dev, struct frad_conf *conf, int quiet);
293#define SDLA_S508_INTEN 0x10 293#define SDLA_S508_INTEN 0x10
294 294
295struct sdla_cmd { 295struct sdla_cmd {
296 char opp_flag __attribute__((packed)); 296 char opp_flag;
297 char cmd __attribute__((packed)); 297 char cmd;
298 short length __attribute__((packed)); 298 short length;
299 char retval __attribute__((packed)); 299 char retval;
300 short dlci __attribute__((packed)); 300 short dlci;
301 char flags __attribute__((packed)); 301 char flags;
302 short rxlost_int __attribute__((packed)); 302 short rxlost_int;
303 long rxlost_app __attribute__((packed)); 303 long rxlost_app;
304 char reserve[2] __attribute__((packed)); 304 char reserve[2];
305 char data[SDLA_MAX_DATA] __attribute__((packed)); /* transfer data buffer */ 305 char data[SDLA_MAX_DATA]; /* transfer data buffer */
306}; 306} __attribute__((packed));
307 307
308struct intr_info { 308struct intr_info {
309 char flags __attribute__((packed)); 309 char flags;
310 short txlen __attribute__((packed)); 310 short txlen;
311 char irq __attribute__((packed)); 311 char irq;
312 char flags2 __attribute__((packed)); 312 char flags2;
313 short timeout __attribute__((packed)); 313 short timeout;
314}; 314} __attribute__((packed));
315 315
316/* found in the 508's control window at RXBUF_INFO */ 316/* found in the 508's control window at RXBUF_INFO */
317struct buf_info { 317struct buf_info {
318 unsigned short rse_num __attribute__((packed)); 318 unsigned short rse_num;
319 unsigned long rse_base __attribute__((packed)); 319 unsigned long rse_base;
320 unsigned long rse_next __attribute__((packed)); 320 unsigned long rse_next;
321 unsigned long buf_base __attribute__((packed)); 321 unsigned long buf_base;
322 unsigned short reserved __attribute__((packed)); 322 unsigned short reserved;
323 unsigned long buf_top __attribute__((packed)); 323 unsigned long buf_top;
324}; 324} __attribute__((packed));
325 325
326/* structure pointed to by rse_base in RXBUF_INFO struct */ 326/* structure pointed to by rse_base in RXBUF_INFO struct */
327struct buf_entry { 327struct buf_entry {
328 char opp_flag __attribute__((packed)); 328 char opp_flag;
329 short length __attribute__((packed)); 329 short length;
330 short dlci __attribute__((packed)); 330 short dlci;
331 char flags __attribute__((packed)); 331 char flags;
332 short timestamp __attribute__((packed)); 332 short timestamp;
333 short reserved[2] __attribute__((packed)); 333 short reserved[2];
334 long buf_addr __attribute__((packed)); 334 long buf_addr;
335}; 335} __attribute__((packed));
336 336
337#endif 337#endif
338 338
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index dc89116bb1ca..cd2773b29a64 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -26,11 +26,7 @@ static inline int has_secure_computing(struct thread_info *ti)
26 26
27#else /* CONFIG_SECCOMP */ 27#else /* CONFIG_SECCOMP */
28 28
29#if (__GNUC__ > 2) 29typedef struct { } seccomp_t;
30 typedef struct { } seccomp_t;
31#else
32 typedef struct { int gcc_is_buggy; } seccomp_t;
33#endif
34 30
35#define secure_computing(x) do { } while (0) 31#define secure_computing(x) do { } while (0)
36/* static inline to preserve typechecking */ 32/* static inline to preserve typechecking */
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 5dd5f02c5c5f..b7d093520bb6 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -18,6 +18,19 @@
18#define SA_PROBE SA_ONESHOT 18#define SA_PROBE SA_ONESHOT
19#define SA_SAMPLE_RANDOM SA_RESTART 19#define SA_SAMPLE_RANDOM SA_RESTART
20#define SA_SHIRQ 0x04000000 20#define SA_SHIRQ 0x04000000
21/*
22 * As above, these correspond to the IORESOURCE_IRQ_* defines in
23 * linux/ioport.h to select the interrupt line behaviour. When
24 * requesting an interrupt without specifying a SA_TRIGGER, the
25 * setting should be assumed to be "as already configured", which
26 * may be as per machine or firmware initialisation.
27 */
28#define SA_TRIGGER_LOW 0x00000008
29#define SA_TRIGGER_HIGH 0x00000004
30#define SA_TRIGGER_FALLING 0x00000002
31#define SA_TRIGGER_RISING 0x00000001
32#define SA_TRIGGER_MASK (SA_TRIGGER_HIGH|SA_TRIGGER_LOW|\
33 SA_TRIGGER_RISING|SA_TRIGGER_FALLING)
21 34
22/* 35/*
23 * Real Time signals may be queued. 36 * Real Time signals may be queued.
@@ -81,6 +94,23 @@ static inline int sigfindinword(unsigned long word)
81 94
82#endif /* __HAVE_ARCH_SIG_BITOPS */ 95#endif /* __HAVE_ARCH_SIG_BITOPS */
83 96
97static inline int sigisemptyset(sigset_t *set)
98{
99 extern void _NSIG_WORDS_is_unsupported_size(void);
100 switch (_NSIG_WORDS) {
101 case 4:
102 return (set->sig[3] | set->sig[2] |
103 set->sig[1] | set->sig[0]) == 0;
104 case 2:
105 return (set->sig[1] | set->sig[0]) == 0;
106 case 1:
107 return set->sig[0] == 0;
108 default:
109 _NSIG_WORDS_is_unsupported_size();
110 return 0;
111 }
112}
113
84#define sigmask(sig) (1UL << ((sig) - 1)) 114#define sigmask(sig) (1UL << ((sig) - 1))
85 115
86#ifndef __HAVE_ARCH_SIG_SETOPS 116#ifndef __HAVE_ARCH_SIG_SETOPS
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 483cfc47ec34..e5fd66c5650b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -251,7 +251,7 @@ struct sk_buff {
251 * want to keep them across layers you have to do a skb_clone() 251 * want to keep them across layers you have to do a skb_clone()
252 * first. This is owned by whoever has the skb queued ATM. 252 * first. This is owned by whoever has the skb queued ATM.
253 */ 253 */
254 char cb[40]; 254 char cb[48];
255 255
256 unsigned int len, 256 unsigned int len,
257 data_len, 257 data_len,
diff --git a/include/linux/slab.h b/include/linux/slab.h
index d1ea4051b996..1fb77a9cc148 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -53,6 +53,8 @@ typedef struct kmem_cache kmem_cache_t;
53#define SLAB_CTOR_ATOMIC 0x002UL /* tell constructor it can't sleep */ 53#define SLAB_CTOR_ATOMIC 0x002UL /* tell constructor it can't sleep */
54#define SLAB_CTOR_VERIFY 0x004UL /* tell constructor it's a verify call */ 54#define SLAB_CTOR_VERIFY 0x004UL /* tell constructor it's a verify call */
55 55
56#ifndef CONFIG_SLOB
57
56/* prototypes */ 58/* prototypes */
57extern void __init kmem_cache_init(void); 59extern void __init kmem_cache_init(void);
58 60
@@ -134,6 +136,39 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
134extern int FASTCALL(kmem_cache_reap(int)); 136extern int FASTCALL(kmem_cache_reap(int));
135extern int FASTCALL(kmem_ptr_validate(kmem_cache_t *cachep, void *ptr)); 137extern int FASTCALL(kmem_ptr_validate(kmem_cache_t *cachep, void *ptr));
136 138
139#else /* CONFIG_SLOB */
140
141/* SLOB allocator routines */
142
143void kmem_cache_init(void);
144struct kmem_cache *kmem_find_general_cachep(size_t, gfp_t gfpflags);
145struct kmem_cache *kmem_cache_create(const char *c, size_t, size_t,
146 unsigned long,
147 void (*)(void *, struct kmem_cache *, unsigned long),
148 void (*)(void *, struct kmem_cache *, unsigned long));
149int kmem_cache_destroy(struct kmem_cache *c);
150void *kmem_cache_alloc(struct kmem_cache *c, gfp_t flags);
151void kmem_cache_free(struct kmem_cache *c, void *b);
152const char *kmem_cache_name(struct kmem_cache *);
153void *kmalloc(size_t size, gfp_t flags);
154void *kzalloc(size_t size, gfp_t flags);
155void kfree(const void *m);
156unsigned int ksize(const void *m);
157unsigned int kmem_cache_size(struct kmem_cache *c);
158
159static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
160{
161 return kzalloc(n * size, flags);
162}
163
164#define kmem_cache_shrink(d) (0)
165#define kmem_cache_reap(a)
166#define kmem_ptr_validate(a, b) (0)
167#define kmem_cache_alloc_node(c, f, n) kmem_cache_alloc(c, f)
168#define kmalloc_node(s, f, n) kmalloc(s, f)
169
170#endif /* CONFIG_SLOB */
171
137/* System wide caches */ 172/* System wide caches */
138extern kmem_cache_t *vm_area_cachep; 173extern kmem_cache_t *vm_area_cachep;
139extern kmem_cache_t *names_cachep; 174extern kmem_cache_t *names_cachep;
diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h
index def2d173a8db..04135b0e198e 100644
--- a/include/linux/spinlock_types_up.h
+++ b/include/linux/spinlock_types_up.h
@@ -22,30 +22,16 @@ typedef struct {
22 22
23#else 23#else
24 24
25/*
26 * All gcc 2.95 versions and early versions of 2.96 have a nasty bug
27 * with empty initializers.
28 */
29#if (__GNUC__ > 2)
30typedef struct { } raw_spinlock_t; 25typedef struct { } raw_spinlock_t;
31 26
32#define __RAW_SPIN_LOCK_UNLOCKED { } 27#define __RAW_SPIN_LOCK_UNLOCKED { }
33#else
34typedef struct { int gcc_is_buggy; } raw_spinlock_t;
35#define __RAW_SPIN_LOCK_UNLOCKED (raw_spinlock_t) { 0 }
36#endif
37 28
38#endif 29#endif
39 30
40#if (__GNUC__ > 2)
41typedef struct { 31typedef struct {
42 /* no debug version on UP */ 32 /* no debug version on UP */
43} raw_rwlock_t; 33} raw_rwlock_t;
44 34
45#define __RAW_RW_LOCK_UNLOCKED { } 35#define __RAW_RW_LOCK_UNLOCKED { }
46#else
47typedef struct { int gcc_is_buggy; } raw_rwlock_t;
48#define __RAW_RW_LOCK_UNLOCKED (raw_rwlock_t) { 0 }
49#endif
50 36
51#endif /* __LINUX_SPINLOCK_TYPES_UP_H */ 37#endif /* __LINUX_SPINLOCK_TYPES_UP_H */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 556617bcf7ac..389d1c382e20 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -175,6 +175,13 @@ extern int try_to_free_pages(struct zone **, gfp_t);
175extern int shrink_all_memory(int); 175extern int shrink_all_memory(int);
176extern int vm_swappiness; 176extern int vm_swappiness;
177 177
178#ifdef CONFIG_MIGRATION
179extern int isolate_lru_page(struct page *p);
180extern int putback_lru_pages(struct list_head *l);
181extern int migrate_pages(struct list_head *l, struct list_head *t,
182 struct list_head *moved, struct list_head *failed);
183#endif
184
178#ifdef CONFIG_MMU 185#ifdef CONFIG_MMU
179/* linux/mm/shmem.c */ 186/* linux/mm/shmem.c */
180extern int shmem_unuse(swp_entry_t entry, struct page *page); 187extern int shmem_unuse(swp_entry_t entry, struct page *page);
@@ -192,7 +199,7 @@ extern int rw_swap_page_sync(int, swp_entry_t, struct page *);
192extern struct address_space swapper_space; 199extern struct address_space swapper_space;
193#define total_swapcache_pages swapper_space.nrpages 200#define total_swapcache_pages swapper_space.nrpages
194extern void show_swap_cache_info(void); 201extern void show_swap_cache_info(void);
195extern int add_to_swap(struct page *); 202extern int add_to_swap(struct page *, gfp_t);
196extern void __delete_from_swap_cache(struct page *); 203extern void __delete_from_swap_cache(struct page *);
197extern void delete_from_swap_cache(struct page *); 204extern void delete_from_swap_cache(struct page *);
198extern int move_to_swap_cache(struct page *, swp_entry_t); 205extern int move_to_swap_cache(struct page *, swp_entry_t);
diff --git a/include/linux/synclink.h b/include/linux/synclink.h
index 763bd290f28d..1b7cd8d1a71b 100644
--- a/include/linux/synclink.h
+++ b/include/linux/synclink.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * SyncLink Multiprotocol Serial Adapter Driver 2 * SyncLink Multiprotocol Serial Adapter Driver
3 * 3 *
4 * $Id: synclink.h,v 3.6 2002/02/20 21:58:20 paulkf Exp $ 4 * $Id: synclink.h,v 3.10 2005/11/08 19:50:54 paulkf Exp $
5 * 5 *
6 * Copyright (C) 1998-2000 by Microgate Corporation 6 * Copyright (C) 1998-2000 by Microgate Corporation
7 * 7 *
@@ -128,10 +128,14 @@
128#define MGSL_BUS_TYPE_EISA 2 128#define MGSL_BUS_TYPE_EISA 2
129#define MGSL_BUS_TYPE_PCI 5 129#define MGSL_BUS_TYPE_PCI 5
130 130
131#define MGSL_INTERFACE_MASK 0xf
131#define MGSL_INTERFACE_DISABLE 0 132#define MGSL_INTERFACE_DISABLE 0
132#define MGSL_INTERFACE_RS232 1 133#define MGSL_INTERFACE_RS232 1
133#define MGSL_INTERFACE_V35 2 134#define MGSL_INTERFACE_V35 2
134#define MGSL_INTERFACE_RS422 3 135#define MGSL_INTERFACE_RS422 3
136#define MGSL_INTERFACE_RTS_EN 0x10
137#define MGSL_INTERFACE_LL 0x20
138#define MGSL_INTERFACE_RL 0x40
135 139
136typedef struct _MGSL_PARAMS 140typedef struct _MGSL_PARAMS
137{ 141{
@@ -163,6 +167,9 @@ typedef struct _MGSL_PARAMS
163#define SYNCLINK_DEVICE_ID 0x0010 167#define SYNCLINK_DEVICE_ID 0x0010
164#define MGSCC_DEVICE_ID 0x0020 168#define MGSCC_DEVICE_ID 0x0020
165#define SYNCLINK_SCA_DEVICE_ID 0x0030 169#define SYNCLINK_SCA_DEVICE_ID 0x0030
170#define SYNCLINK_GT_DEVICE_ID 0x0070
171#define SYNCLINK_GT4_DEVICE_ID 0x0080
172#define SYNCLINK_AC_DEVICE_ID 0x0090
166#define MGSL_MAX_SERIAL_NUMBER 30 173#define MGSL_MAX_SERIAL_NUMBER 30
167 174
168/* 175/*
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index c7007b1db91d..e910d1a481df 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -511,5 +511,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio);
511asmlinkage long sys_ioprio_get(int which, int who); 511asmlinkage long sys_ioprio_get(int which, int who);
512asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask, 512asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask,
513 unsigned long maxnode); 513 unsigned long maxnode);
514asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
515 const unsigned long __user *from, const unsigned long __user *to);
514 516
515#endif 517#endif
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index a9b80fc7f0f3..7f472127b7b5 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -180,6 +180,8 @@ enum
180 VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */ 180 VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */
181 VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */ 181 VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */
182 VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */ 182 VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */
183 VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */
184 VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */
183}; 185};
184 186
185 187
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 1267f88ece6e..57449704a47b 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -23,6 +23,7 @@
23#include <linux/workqueue.h> 23#include <linux/workqueue.h>
24#include <linux/tty_driver.h> 24#include <linux/tty_driver.h>
25#include <linux/tty_ldisc.h> 25#include <linux/tty_ldisc.h>
26#include <linux/screen_info.h>
26 27
27#include <asm/system.h> 28#include <asm/system.h>
28 29
@@ -37,77 +38,6 @@
37#define NR_LDISCS 16 38#define NR_LDISCS 16
38 39
39/* 40/*
40 * These are set up by the setup-routine at boot-time:
41 */
42
43struct screen_info {
44 u8 orig_x; /* 0x00 */
45 u8 orig_y; /* 0x01 */
46 u16 dontuse1; /* 0x02 -- EXT_MEM_K sits here */
47 u16 orig_video_page; /* 0x04 */
48 u8 orig_video_mode; /* 0x06 */
49 u8 orig_video_cols; /* 0x07 */
50 u16 unused2; /* 0x08 */
51 u16 orig_video_ega_bx; /* 0x0a */
52 u16 unused3; /* 0x0c */
53 u8 orig_video_lines; /* 0x0e */
54 u8 orig_video_isVGA; /* 0x0f */
55 u16 orig_video_points; /* 0x10 */
56
57 /* VESA graphic mode -- linear frame buffer */
58 u16 lfb_width; /* 0x12 */
59 u16 lfb_height; /* 0x14 */
60 u16 lfb_depth; /* 0x16 */
61 u32 lfb_base; /* 0x18 */
62 u32 lfb_size; /* 0x1c */
63 u16 dontuse2, dontuse3; /* 0x20 -- CL_MAGIC and CL_OFFSET here */
64 u16 lfb_linelength; /* 0x24 */
65 u8 red_size; /* 0x26 */
66 u8 red_pos; /* 0x27 */
67 u8 green_size; /* 0x28 */
68 u8 green_pos; /* 0x29 */
69 u8 blue_size; /* 0x2a */
70 u8 blue_pos; /* 0x2b */
71 u8 rsvd_size; /* 0x2c */
72 u8 rsvd_pos; /* 0x2d */
73 u16 vesapm_seg; /* 0x2e */
74 u16 vesapm_off; /* 0x30 */
75 u16 pages; /* 0x32 */
76 u16 vesa_attributes; /* 0x34 */
77 u32 capabilities; /* 0x36 */
78 /* 0x3a -- 0x3f reserved for future expansion */
79};
80
81extern struct screen_info screen_info;
82
83#define ORIG_X (screen_info.orig_x)
84#define ORIG_Y (screen_info.orig_y)
85#define ORIG_VIDEO_MODE (screen_info.orig_video_mode)
86#define ORIG_VIDEO_COLS (screen_info.orig_video_cols)
87#define ORIG_VIDEO_EGA_BX (screen_info.orig_video_ega_bx)
88#define ORIG_VIDEO_LINES (screen_info.orig_video_lines)
89#define ORIG_VIDEO_ISVGA (screen_info.orig_video_isVGA)
90#define ORIG_VIDEO_POINTS (screen_info.orig_video_points)
91
92#define VIDEO_TYPE_MDA 0x10 /* Monochrome Text Display */
93#define VIDEO_TYPE_CGA 0x11 /* CGA Display */
94#define VIDEO_TYPE_EGAM 0x20 /* EGA/VGA in Monochrome Mode */
95#define VIDEO_TYPE_EGAC 0x21 /* EGA in Color Mode */
96#define VIDEO_TYPE_VGAC 0x22 /* VGA+ in Color Mode */
97#define VIDEO_TYPE_VLFB 0x23 /* VESA VGA in graphic mode */
98
99#define VIDEO_TYPE_PICA_S3 0x30 /* ACER PICA-61 local S3 video */
100#define VIDEO_TYPE_MIPS_G364 0x31 /* MIPS Magnum 4000 G364 video */
101#define VIDEO_TYPE_SGI 0x33 /* Various SGI graphics hardware */
102
103#define VIDEO_TYPE_TGAC 0x40 /* DEC TGA */
104
105#define VIDEO_TYPE_SUN 0x50 /* Sun frame buffer. */
106#define VIDEO_TYPE_SUNPCI 0x51 /* Sun PCI based frame buffer. */
107
108#define VIDEO_TYPE_PMAC 0x60 /* PowerMacintosh frame buffer. */
109
110/*
111 * This character is the same as _POSIX_VDISABLE: it cannot be used as 41 * This character is the same as _POSIX_VDISABLE: it cannot be used as
112 * a c_cc[] character, but indicates that a particular special character 42 * a c_cc[] character, but indicates that a particular special character
113 * isn't in use (eg VINTR has no character etc) 43 * isn't in use (eg VINTR has no character etc)
diff --git a/include/linux/wavefront.h b/include/linux/wavefront.h
index 61bd0fd35240..51ab3c933acd 100644
--- a/include/linux/wavefront.h
+++ b/include/linux/wavefront.h
@@ -434,22 +434,22 @@ typedef struct wf_multisample {
434} wavefront_multisample; 434} wavefront_multisample;
435 435
436typedef struct wf_alias { 436typedef struct wf_alias {
437 INT16 OriginalSample __attribute__ ((packed)); 437 INT16 OriginalSample;
438 438
439 struct wf_sample_offset sampleStartOffset __attribute__ ((packed)); 439 struct wf_sample_offset sampleStartOffset;
440 struct wf_sample_offset loopStartOffset __attribute__ ((packed)); 440 struct wf_sample_offset loopStartOffset;
441 struct wf_sample_offset sampleEndOffset __attribute__ ((packed)); 441 struct wf_sample_offset sampleEndOffset;
442 struct wf_sample_offset loopEndOffset __attribute__ ((packed)); 442 struct wf_sample_offset loopEndOffset;
443 443
444 INT16 FrequencyBias __attribute__ ((packed)); 444 INT16 FrequencyBias;
445 445
446 UCHAR8 SampleResolution:2 __attribute__ ((packed)); 446 UCHAR8 SampleResolution:2;
447 UCHAR8 Unused1:1 __attribute__ ((packed)); 447 UCHAR8 Unused1:1;
448 UCHAR8 Loop:1 __attribute__ ((packed)); 448 UCHAR8 Loop:1;
449 UCHAR8 Bidirectional:1 __attribute__ ((packed)); 449 UCHAR8 Bidirectional:1;
450 UCHAR8 Unused2:1 __attribute__ ((packed)); 450 UCHAR8 Unused2:1;
451 UCHAR8 Reverse:1 __attribute__ ((packed)); 451 UCHAR8 Reverse:1;
452 UCHAR8 Unused3:1 __attribute__ ((packed)); 452 UCHAR8 Unused3:1;
453 453
454 /* This structure is meant to be padded only to 16 bits on their 454 /* This structure is meant to be padded only to 16 bits on their
455 original. Of course, whoever wrote their documentation didn't 455 original. Of course, whoever wrote their documentation didn't
@@ -460,8 +460,8 @@ typedef struct wf_alias {
460 standard 16->32 bit issues. 460 standard 16->32 bit issues.
461 */ 461 */
462 462
463 UCHAR8 sixteen_bit_padding __attribute__ ((packed)); 463 UCHAR8 sixteen_bit_padding;
464} wavefront_alias; 464} __attribute__((packed)) wavefront_alias;
465 465
466typedef struct wf_drum { 466typedef struct wf_drum {
467 UCHAR8 PatchNumber; 467 UCHAR8 PatchNumber;
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index ac39d04d027c..86b111300231 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -65,6 +65,7 @@ extern int FASTCALL(schedule_work(struct work_struct *work));
65extern int FASTCALL(schedule_delayed_work(struct work_struct *work, unsigned long delay)); 65extern int FASTCALL(schedule_delayed_work(struct work_struct *work, unsigned long delay));
66 66
67extern int schedule_delayed_work_on(int cpu, struct work_struct *work, unsigned long delay); 67extern int schedule_delayed_work_on(int cpu, struct work_struct *work, unsigned long delay);
68extern int schedule_on_each_cpu(void (*func)(void *info), void *info);
68extern void flush_scheduled_work(void); 69extern void flush_scheduled_work(void);
69extern int current_is_keventd(void); 70extern int current_is_keventd(void);
70extern int keventd_up(void); 71extern int keventd_up(void);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index b096159086e8..beaef5c7a0ea 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -103,7 +103,9 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping);
103int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0); 103int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0);
104int do_writepages(struct address_space *mapping, struct writeback_control *wbc); 104int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
105int sync_page_range(struct inode *inode, struct address_space *mapping, 105int sync_page_range(struct inode *inode, struct address_space *mapping,
106 loff_t pos, size_t count); 106 loff_t pos, loff_t count);
107int sync_page_range_nolock(struct inode *inode, struct address_space *mapping,
108 loff_t pos, loff_t count);
107 109
108/* pdflush.c */ 110/* pdflush.c */
109extern int nr_pdflush_threads; /* Global so it can be exported to sysctl 111extern int nr_pdflush_threads; /* Global so it can be exported to sysctl
diff --git a/include/net/dn_dev.h b/include/net/dn_dev.h
index 86e8e86e624a..5a86e78081bf 100644
--- a/include/net/dn_dev.h
+++ b/include/net/dn_dev.h
@@ -88,8 +88,8 @@ struct dn_dev {
88 struct net_device *dev; 88 struct net_device *dev;
89 struct dn_dev_parms parms; 89 struct dn_dev_parms parms;
90 char use_long; 90 char use_long;
91 struct timer_list timer; 91 struct timer_list timer;
92 unsigned long t3; 92 unsigned long t3;
93 struct neigh_parms *neigh_parms; 93 struct neigh_parms *neigh_parms;
94 unsigned char addr[ETH_ALEN]; 94 unsigned char addr[ETH_ALEN];
95 struct neighbour *router; /* Default router on circuit */ 95 struct neighbour *router; /* Default router on circuit */
@@ -99,57 +99,57 @@ struct dn_dev {
99 99
100struct dn_short_packet 100struct dn_short_packet
101{ 101{
102 unsigned char msgflg __attribute__((packed)); 102 unsigned char msgflg;
103 unsigned short dstnode __attribute__((packed)); 103 unsigned short dstnode;
104 unsigned short srcnode __attribute__((packed)); 104 unsigned short srcnode;
105 unsigned char forward __attribute__((packed)); 105 unsigned char forward;
106}; 106} __attribute__((packed));
107 107
108struct dn_long_packet 108struct dn_long_packet
109{ 109{
110 unsigned char msgflg __attribute__((packed)); 110 unsigned char msgflg;
111 unsigned char d_area __attribute__((packed)); 111 unsigned char d_area;
112 unsigned char d_subarea __attribute__((packed)); 112 unsigned char d_subarea;
113 unsigned char d_id[6] __attribute__((packed)); 113 unsigned char d_id[6];
114 unsigned char s_area __attribute__((packed)); 114 unsigned char s_area;
115 unsigned char s_subarea __attribute__((packed)); 115 unsigned char s_subarea;
116 unsigned char s_id[6] __attribute__((packed)); 116 unsigned char s_id[6];
117 unsigned char nl2 __attribute__((packed)); 117 unsigned char nl2;
118 unsigned char visit_ct __attribute__((packed)); 118 unsigned char visit_ct;
119 unsigned char s_class __attribute__((packed)); 119 unsigned char s_class;
120 unsigned char pt __attribute__((packed)); 120 unsigned char pt;
121}; 121} __attribute__((packed));
122 122
123/*------------------------- DRP - Routing messages ---------------------*/ 123/*------------------------- DRP - Routing messages ---------------------*/
124 124
125struct endnode_hello_message 125struct endnode_hello_message
126{ 126{
127 unsigned char msgflg __attribute__((packed)); 127 unsigned char msgflg;
128 unsigned char tiver[3] __attribute__((packed)); 128 unsigned char tiver[3];
129 unsigned char id[6] __attribute__((packed)); 129 unsigned char id[6];
130 unsigned char iinfo __attribute__((packed)); 130 unsigned char iinfo;
131 unsigned short blksize __attribute__((packed)); 131 unsigned short blksize;
132 unsigned char area __attribute__((packed)); 132 unsigned char area;
133 unsigned char seed[8] __attribute__((packed)); 133 unsigned char seed[8];
134 unsigned char neighbor[6] __attribute__((packed)); 134 unsigned char neighbor[6];
135 unsigned short timer __attribute__((packed)); 135 unsigned short timer;
136 unsigned char mpd __attribute__((packed)); 136 unsigned char mpd;
137 unsigned char datalen __attribute__((packed)); 137 unsigned char datalen;
138 unsigned char data[2] __attribute__((packed)); 138 unsigned char data[2];
139}; 139} __attribute__((packed));
140 140
141struct rtnode_hello_message 141struct rtnode_hello_message
142{ 142{
143 unsigned char msgflg __attribute__((packed)); 143 unsigned char msgflg;
144 unsigned char tiver[3] __attribute__((packed)); 144 unsigned char tiver[3];
145 unsigned char id[6] __attribute__((packed)); 145 unsigned char id[6];
146 unsigned char iinfo __attribute__((packed)); 146 unsigned char iinfo;
147 unsigned short blksize __attribute__((packed)); 147 unsigned short blksize;
148 unsigned char priority __attribute__((packed)); 148 unsigned char priority;
149 unsigned char area __attribute__((packed)); 149 unsigned char area;
150 unsigned short timer __attribute__((packed)); 150 unsigned short timer;
151 unsigned char mpd __attribute__((packed)); 151 unsigned char mpd;
152}; 152} __attribute__((packed));
153 153
154 154
155extern void dn_dev_init(void); 155extern void dn_dev_init(void);
diff --git a/include/net/dn_nsp.h b/include/net/dn_nsp.h
index 1ba03be0af3a..e6182b86262b 100644
--- a/include/net/dn_nsp.h
+++ b/include/net/dn_nsp.h
@@ -72,78 +72,78 @@ extern struct sk_buff *dn_alloc_send_skb(struct sock *sk, size_t *size, int nobl
72 72
73struct nsp_data_seg_msg 73struct nsp_data_seg_msg
74{ 74{
75 unsigned char msgflg __attribute__((packed)); 75 unsigned char msgflg;
76 unsigned short dstaddr __attribute__((packed)); 76 unsigned short dstaddr;
77 unsigned short srcaddr __attribute__((packed)); 77 unsigned short srcaddr;
78}; 78} __attribute__((packed));
79 79
80struct nsp_data_opt_msg 80struct nsp_data_opt_msg
81{ 81{
82 unsigned short acknum __attribute__((packed)); 82 unsigned short acknum;
83 unsigned short segnum __attribute__((packed)); 83 unsigned short segnum;
84 unsigned short lsflgs __attribute__((packed)); 84 unsigned short lsflgs;
85}; 85} __attribute__((packed));
86 86
87struct nsp_data_opt_msg1 87struct nsp_data_opt_msg1
88{ 88{
89 unsigned short acknum __attribute__((packed)); 89 unsigned short acknum;
90 unsigned short segnum __attribute__((packed)); 90 unsigned short segnum;
91}; 91} __attribute__((packed));
92 92
93 93
94/* Acknowledgment Message (data/other data) */ 94/* Acknowledgment Message (data/other data) */
95struct nsp_data_ack_msg 95struct nsp_data_ack_msg
96{ 96{
97 unsigned char msgflg __attribute__((packed)); 97 unsigned char msgflg;
98 unsigned short dstaddr __attribute__((packed)); 98 unsigned short dstaddr;
99 unsigned short srcaddr __attribute__((packed)); 99 unsigned short srcaddr;
100 unsigned short acknum __attribute__((packed)); 100 unsigned short acknum;
101}; 101} __attribute__((packed));
102 102
103/* Connect Acknowledgment Message */ 103/* Connect Acknowledgment Message */
104struct nsp_conn_ack_msg 104struct nsp_conn_ack_msg
105{ 105{
106 unsigned char msgflg __attribute__((packed)); 106 unsigned char msgflg;
107 unsigned short dstaddr __attribute__((packed)); 107 unsigned short dstaddr;
108}; 108} __attribute__((packed));
109 109
110 110
111/* Connect Initiate/Retransmit Initiate/Connect Confirm */ 111/* Connect Initiate/Retransmit Initiate/Connect Confirm */
112struct nsp_conn_init_msg 112struct nsp_conn_init_msg
113{ 113{
114 unsigned char msgflg __attribute__((packed)); 114 unsigned char msgflg;
115#define NSP_CI 0x18 /* Connect Initiate */ 115#define NSP_CI 0x18 /* Connect Initiate */
116#define NSP_RCI 0x68 /* Retrans. Conn Init */ 116#define NSP_RCI 0x68 /* Retrans. Conn Init */
117 unsigned short dstaddr __attribute__((packed)); 117 unsigned short dstaddr;
118 unsigned short srcaddr __attribute__((packed)); 118 unsigned short srcaddr;
119 unsigned char services __attribute__((packed)); 119 unsigned char services;
120#define NSP_FC_NONE 0x00 /* Flow Control None */ 120#define NSP_FC_NONE 0x00 /* Flow Control None */
121#define NSP_FC_SRC 0x04 /* Seg Req. Count */ 121#define NSP_FC_SRC 0x04 /* Seg Req. Count */
122#define NSP_FC_SCMC 0x08 /* Sess. Control Mess */ 122#define NSP_FC_SCMC 0x08 /* Sess. Control Mess */
123#define NSP_FC_MASK 0x0c /* FC type mask */ 123#define NSP_FC_MASK 0x0c /* FC type mask */
124 unsigned char info __attribute__((packed)); 124 unsigned char info;
125 unsigned short segsize __attribute__((packed)); 125 unsigned short segsize;
126}; 126} __attribute__((packed));
127 127
128/* Disconnect Initiate/Disconnect Confirm */ 128/* Disconnect Initiate/Disconnect Confirm */
129struct nsp_disconn_init_msg 129struct nsp_disconn_init_msg
130{ 130{
131 unsigned char msgflg __attribute__((packed)); 131 unsigned char msgflg;
132 unsigned short dstaddr __attribute__((packed)); 132 unsigned short dstaddr;
133 unsigned short srcaddr __attribute__((packed)); 133 unsigned short srcaddr;
134 unsigned short reason __attribute__((packed)); 134 unsigned short reason;
135}; 135} __attribute__((packed));
136 136
137 137
138 138
139struct srcobj_fmt 139struct srcobj_fmt
140{ 140{
141 char format __attribute__((packed)); 141 char format;
142 unsigned char task __attribute__((packed)); 142 unsigned char task;
143 unsigned short grpcode __attribute__((packed)); 143 unsigned short grpcode;
144 unsigned short usrcode __attribute__((packed)); 144 unsigned short usrcode;
145 char dlen __attribute__((packed)); 145 char dlen;
146}; 146} __attribute__((packed));
147 147
148/* 148/*
149 * A collection of functions for manipulating the sequence 149 * A collection of functions for manipulating the sequence
diff --git a/include/net/dst.h b/include/net/dst.h
index bee8b84d329d..5161e89017f9 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -225,16 +225,7 @@ static inline void dst_set_expires(struct dst_entry *dst, int timeout)
225/* Output packet to network from transport. */ 225/* Output packet to network from transport. */
226static inline int dst_output(struct sk_buff *skb) 226static inline int dst_output(struct sk_buff *skb)
227{ 227{
228 int err; 228 return skb->dst->output(skb);
229
230 for (;;) {
231 err = skb->dst->output(skb);
232
233 if (likely(err == 0))
234 return err;
235 if (unlikely(err != NET_XMIT_BYPASS))
236 return err;
237 }
238} 229}
239 230
240/* Input packet from network to transport. */ 231/* Input packet from network to transport. */
diff --git a/include/net/ip.h b/include/net/ip.h
index 7bb5804847f2..8de0697b364c 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -37,11 +37,10 @@ struct inet_skb_parm
37 struct ip_options opt; /* Compiled IP options */ 37 struct ip_options opt; /* Compiled IP options */
38 unsigned char flags; 38 unsigned char flags;
39 39
40#define IPSKB_MASQUERADED 1 40#define IPSKB_FORWARDED 1
41#define IPSKB_TRANSLATED 2 41#define IPSKB_XFRM_TUNNEL_SIZE 2
42#define IPSKB_FORWARDED 4 42#define IPSKB_XFRM_TRANSFORMED 4
43#define IPSKB_XFRM_TUNNEL_SIZE 8 43#define IPSKB_FRAG_COMPLETE 8
44#define IPSKB_FRAG_COMPLETE 16
45}; 44};
46 45
47struct ipcm_cookie 46struct ipcm_cookie
@@ -95,7 +94,6 @@ extern int ip_local_deliver(struct sk_buff *skb);
95extern int ip_mr_input(struct sk_buff *skb); 94extern int ip_mr_input(struct sk_buff *skb);
96extern int ip_output(struct sk_buff *skb); 95extern int ip_output(struct sk_buff *skb);
97extern int ip_mc_output(struct sk_buff *skb); 96extern int ip_mc_output(struct sk_buff *skb);
98extern int ip_fragment(struct sk_buff *skb, int (*out)(struct sk_buff*));
99extern int ip_do_nat(struct sk_buff *skb); 97extern int ip_do_nat(struct sk_buff *skb);
100extern void ip_send_check(struct iphdr *ip); 98extern void ip_send_check(struct iphdr *ip);
101extern int ip_queue_xmit(struct sk_buff *skb, int ipfragok); 99extern int ip_queue_xmit(struct sk_buff *skb, int ipfragok);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 860bbac4c4ee..3b1d963d396c 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -418,6 +418,8 @@ extern int ipv6_rcv(struct sk_buff *skb,
418 struct packet_type *pt, 418 struct packet_type *pt,
419 struct net_device *orig_dev); 419 struct net_device *orig_dev);
420 420
421extern int ip6_rcv_finish(struct sk_buff *skb);
422
421/* 423/*
422 * upper-layer output functions 424 * upper-layer output functions
423 */ 425 */
diff --git a/include/net/protocol.h b/include/net/protocol.h
index 63f7db99c2a6..6dc5970612d7 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -43,7 +43,7 @@ struct net_protocol {
43#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 43#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
44struct inet6_protocol 44struct inet6_protocol
45{ 45{
46 int (*handler)(struct sk_buff **skb, unsigned int *nhoffp); 46 int (*handler)(struct sk_buff **skb);
47 47
48 void (*err_handler)(struct sk_buff *skb, 48 void (*err_handler)(struct sk_buff *skb,
49 struct inet6_skb_parm *opt, 49 struct inet6_skb_parm *opt,
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 07d7b50cdd76..d09ca0e7d139 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -668,7 +668,7 @@ static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *s
668 return xfrm_policy_check(sk, dir, skb, AF_INET6); 668 return xfrm_policy_check(sk, dir, skb, AF_INET6);
669} 669}
670 670
671 671extern int xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family);
672extern int __xfrm_route_forward(struct sk_buff *skb, unsigned short family); 672extern int __xfrm_route_forward(struct sk_buff *skb, unsigned short family);
673 673
674static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family) 674static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family)
@@ -831,7 +831,7 @@ struct xfrm_tunnel {
831}; 831};
832 832
833struct xfrm6_tunnel { 833struct xfrm6_tunnel {
834 int (*handler)(struct sk_buff **pskb, unsigned int *nhoffp); 834 int (*handler)(struct sk_buff **pskb);
835 void (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, 835 void (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt,
836 int type, int code, int offset, __u32 info); 836 int type, int code, int offset, __u32 info);
837}; 837};
@@ -866,10 +866,11 @@ extern int xfrm_state_mtu(struct xfrm_state *x, int mtu);
866extern int xfrm_init_state(struct xfrm_state *x); 866extern int xfrm_init_state(struct xfrm_state *x);
867extern int xfrm4_rcv(struct sk_buff *skb); 867extern int xfrm4_rcv(struct sk_buff *skb);
868extern int xfrm4_output(struct sk_buff *skb); 868extern int xfrm4_output(struct sk_buff *skb);
869extern int xfrm4_output_finish(struct sk_buff *skb);
869extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler); 870extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler);
870extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler); 871extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler);
871extern int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi); 872extern int xfrm6_rcv_spi(struct sk_buff **pskb, u32 spi);
872extern int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp); 873extern int xfrm6_rcv(struct sk_buff **pskb);
873extern int xfrm6_tunnel_register(struct xfrm6_tunnel *handler); 874extern int xfrm6_tunnel_register(struct xfrm6_tunnel *handler);
874extern int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler); 875extern int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler);
875extern u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr); 876extern u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr);
diff --git a/include/sound/wavefront.h b/include/sound/wavefront.h
index 9e572aed2435..15d82e594b56 100644
--- a/include/sound/wavefront.h
+++ b/include/sound/wavefront.h
@@ -454,22 +454,22 @@ typedef struct wf_multisample {
454} wavefront_multisample; 454} wavefront_multisample;
455 455
456typedef struct wf_alias { 456typedef struct wf_alias {
457 s16 OriginalSample __attribute__ ((packed)); 457 s16 OriginalSample;
458 458
459 struct wf_sample_offset sampleStartOffset __attribute__ ((packed)); 459 struct wf_sample_offset sampleStartOffset;
460 struct wf_sample_offset loopStartOffset __attribute__ ((packed)); 460 struct wf_sample_offset loopStartOffset;
461 struct wf_sample_offset sampleEndOffset __attribute__ ((packed)); 461 struct wf_sample_offset sampleEndOffset;
462 struct wf_sample_offset loopEndOffset __attribute__ ((packed)); 462 struct wf_sample_offset loopEndOffset;
463 463
464 s16 FrequencyBias __attribute__ ((packed)); 464 s16 FrequencyBias;
465 465
466 u8 SampleResolution:2 __attribute__ ((packed)); 466 u8 SampleResolution:2;
467 u8 Unused1:1 __attribute__ ((packed)); 467 u8 Unused1:1;
468 u8 Loop:1 __attribute__ ((packed)); 468 u8 Loop:1;
469 u8 Bidirectional:1 __attribute__ ((packed)); 469 u8 Bidirectional:1;
470 u8 Unused2:1 __attribute__ ((packed)); 470 u8 Unused2:1;
471 u8 Reverse:1 __attribute__ ((packed)); 471 u8 Reverse:1;
472 u8 Unused3:1 __attribute__ ((packed)); 472 u8 Unused3:1;
473 473
474 /* This structure is meant to be padded only to 16 bits on their 474 /* This structure is meant to be padded only to 16 bits on their
475 original. Of course, whoever wrote their documentation didn't 475 original. Of course, whoever wrote their documentation didn't
@@ -480,8 +480,8 @@ typedef struct wf_alias {
480 standard 16->32 bit issues. 480 standard 16->32 bit issues.
481 */ 481 */
482 482
483 u8 sixteen_bit_padding __attribute__ ((packed)); 483 u8 sixteen_bit_padding;
484} wavefront_alias; 484} __attribute__((packed)) wavefront_alias;
485 485
486typedef struct wf_drum { 486typedef struct wf_drum {
487 u8 PatchNumber; 487 u8 PatchNumber;
diff --git a/init/Kconfig b/init/Kconfig
index ba42f3793a84..f8f6929d8f25 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -228,6 +228,25 @@ config CPUSETS
228 228
229source "usr/Kconfig" 229source "usr/Kconfig"
230 230
231config UID16
232 bool "Enable 16-bit UID system calls" if EMBEDDED
233 depends !ALPHA && !PPC && !PPC64 && !PARISC && !V850 && !ARCH_S390X
234 depends !X86_64 || IA32_EMULATION
235 depends !SPARC64 || SPARC32_COMPAT
236 default y
237 help
238 This enables the legacy 16-bit UID syscall wrappers.
239
240config VM86
241 depends X86
242 default y
243 bool "Enable VM86 support" if EMBEDDED
244 help
245 This option is required by programs like DOSEMU to run 16-bit legacy
246 code on X86 processors. It also may be needed by software like
247 XFree86 to initialize some video cards via BIOS. Disabling this
248 option saves about 6k.
249
231config CC_OPTIMIZE_FOR_SIZE 250config CC_OPTIMIZE_FOR_SIZE
232 bool "Optimize for size (Look out for broken compilers!)" 251 bool "Optimize for size (Look out for broken compilers!)"
233 default y 252 default y
@@ -309,6 +328,21 @@ config BUG
309 option for embedded systems with no facilities for reporting errors. 328 option for embedded systems with no facilities for reporting errors.
310 Just say Y. 329 Just say Y.
311 330
331config DOUBLEFAULT
332 depends X86
333 default y if X86
334 bool "Enable doublefault exception handler" if EMBEDDED
335 help
336 This option allows trapping of rare doublefault exceptions that
337 would otherwise cause a system to silently reboot. Disabling this
338 option saves about 4k.
339
340config ELF_CORE
341 default y
342 bool "Enable ELF core dumps" if EMBEDDED
343 help
344 Enable support for generating core dumps. Disabling saves about 4k.
345
312config BASE_FULL 346config BASE_FULL
313 default y 347 default y
314 bool "Enable full-sized data structures for core" if EMBEDDED 348 bool "Enable full-sized data structures for core" if EMBEDDED
@@ -380,6 +414,15 @@ config CC_ALIGN_JUMPS
380 no dummy operations need be executed. 414 no dummy operations need be executed.
381 Zero means use compiler's default. 415 Zero means use compiler's default.
382 416
417config SLAB
418 default y
419 bool "Use full SLAB allocator" if EMBEDDED
420 help
421 Disabling this replaces the advanced SLAB allocator and
422 kmalloc support with the drastically simpler SLOB allocator.
423 SLOB is more space efficient but does not scale well and is
424 more susceptible to fragmentation.
425
383endmenu # General setup 426endmenu # General setup
384 427
385config TINY_SHMEM 428config TINY_SHMEM
@@ -391,6 +434,10 @@ config BASE_SMALL
391 default 0 if BASE_FULL 434 default 0 if BASE_FULL
392 default 1 if !BASE_FULL 435 default 1 if !BASE_FULL
393 436
437config SLOB
438 default !SLAB
439 bool
440
394menu "Loadable module support" 441menu "Loadable module support"
395 442
396config MODULES 443config MODULES
diff --git a/init/main.c b/init/main.c
index 2ed3638deec7..8342c2890b16 100644
--- a/init/main.c
+++ b/init/main.c
@@ -58,11 +58,6 @@
58 * This is one of the first .c files built. Error out early 58 * This is one of the first .c files built. Error out early
59 * if we have compiler trouble.. 59 * if we have compiler trouble..
60 */ 60 */
61#if __GNUC__ == 2 && __GNUC_MINOR__ == 96
62#ifdef CONFIG_FRAME_POINTER
63#error This compiler cannot compile correctly with frame pointers enabled
64#endif
65#endif
66 61
67#ifdef CONFIG_X86_LOCAL_APIC 62#ifdef CONFIG_X86_LOCAL_APIC
68#include <asm/smp.h> 63#include <asm/smp.h>
@@ -74,7 +69,7 @@
74 * To avoid associated bogus bug reports, we flatly refuse to compile 69 * To avoid associated bogus bug reports, we flatly refuse to compile
75 * with a gcc that is known to be too old from the very beginning. 70 * with a gcc that is known to be too old from the very beginning.
76 */ 71 */
77#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 95) 72#if (__GNUC__ < 3) || (__GNUC__ == 3 && __GNUC_MINOR__ < 2)
78#error Sorry, your GCC is too old. It builds incorrect kernels. 73#error Sorry, your GCC is too old. It builds incorrect kernels.
79#endif 74#endif
80 75
@@ -512,6 +507,7 @@ asmlinkage void __init start_kernel(void)
512 } 507 }
513#endif 508#endif
514 vfs_caches_init_early(); 509 vfs_caches_init_early();
510 cpuset_init_early();
515 mem_init(); 511 mem_init();
516 kmem_cache_init(); 512 kmem_cache_init();
517 setup_per_cpu_pageset(); 513 setup_per_cpu_pageset();
diff --git a/ipc/shm.c b/ipc/shm.c
index 0ef4a1cf3e27..0b92e874fc06 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -34,8 +34,6 @@
34 34
35#include "util.h" 35#include "util.h"
36 36
37#define shm_flags shm_perm.mode
38
39static struct file_operations shm_file_operations; 37static struct file_operations shm_file_operations;
40static struct vm_operations_struct shm_vm_ops; 38static struct vm_operations_struct shm_vm_ops;
41 39
@@ -148,7 +146,7 @@ static void shm_close (struct vm_area_struct *shmd)
148 shp->shm_dtim = get_seconds(); 146 shp->shm_dtim = get_seconds();
149 shp->shm_nattch--; 147 shp->shm_nattch--;
150 if(shp->shm_nattch == 0 && 148 if(shp->shm_nattch == 0 &&
151 shp->shm_flags & SHM_DEST) 149 shp->shm_perm.mode & SHM_DEST)
152 shm_destroy (shp); 150 shm_destroy (shp);
153 else 151 else
154 shm_unlock(shp); 152 shm_unlock(shp);
@@ -205,7 +203,7 @@ static int newseg (key_t key, int shmflg, size_t size)
205 return -ENOMEM; 203 return -ENOMEM;
206 204
207 shp->shm_perm.key = key; 205 shp->shm_perm.key = key;
208 shp->shm_flags = (shmflg & S_IRWXUGO); 206 shp->shm_perm.mode = (shmflg & S_IRWXUGO);
209 shp->mlock_user = NULL; 207 shp->mlock_user = NULL;
210 208
211 shp->shm_perm.security = NULL; 209 shp->shm_perm.security = NULL;
@@ -345,7 +343,7 @@ static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void __
345 343
346 out->uid = tbuf.shm_perm.uid; 344 out->uid = tbuf.shm_perm.uid;
347 out->gid = tbuf.shm_perm.gid; 345 out->gid = tbuf.shm_perm.gid;
348 out->mode = tbuf.shm_flags; 346 out->mode = tbuf.shm_perm.mode;
349 347
350 return 0; 348 return 0;
351 } 349 }
@@ -358,7 +356,7 @@ static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void __
358 356
359 out->uid = tbuf_old.shm_perm.uid; 357 out->uid = tbuf_old.shm_perm.uid;
360 out->gid = tbuf_old.shm_perm.gid; 358 out->gid = tbuf_old.shm_perm.gid;
361 out->mode = tbuf_old.shm_flags; 359 out->mode = tbuf_old.shm_perm.mode;
362 360
363 return 0; 361 return 0;
364 } 362 }
@@ -560,13 +558,13 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
560 if (!is_file_hugepages(shp->shm_file)) { 558 if (!is_file_hugepages(shp->shm_file)) {
561 err = shmem_lock(shp->shm_file, 1, user); 559 err = shmem_lock(shp->shm_file, 1, user);
562 if (!err) { 560 if (!err) {
563 shp->shm_flags |= SHM_LOCKED; 561 shp->shm_perm.mode |= SHM_LOCKED;
564 shp->mlock_user = user; 562 shp->mlock_user = user;
565 } 563 }
566 } 564 }
567 } else if (!is_file_hugepages(shp->shm_file)) { 565 } else if (!is_file_hugepages(shp->shm_file)) {
568 shmem_lock(shp->shm_file, 0, shp->mlock_user); 566 shmem_lock(shp->shm_file, 0, shp->mlock_user);
569 shp->shm_flags &= ~SHM_LOCKED; 567 shp->shm_perm.mode &= ~SHM_LOCKED;
570 shp->mlock_user = NULL; 568 shp->mlock_user = NULL;
571 } 569 }
572 shm_unlock(shp); 570 shm_unlock(shp);
@@ -605,7 +603,7 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
605 goto out_unlock_up; 603 goto out_unlock_up;
606 604
607 if (shp->shm_nattch){ 605 if (shp->shm_nattch){
608 shp->shm_flags |= SHM_DEST; 606 shp->shm_perm.mode |= SHM_DEST;
609 /* Do not find it any more */ 607 /* Do not find it any more */
610 shp->shm_perm.key = IPC_PRIVATE; 608 shp->shm_perm.key = IPC_PRIVATE;
611 shm_unlock(shp); 609 shm_unlock(shp);
@@ -644,7 +642,7 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
644 642
645 shp->shm_perm.uid = setbuf.uid; 643 shp->shm_perm.uid = setbuf.uid;
646 shp->shm_perm.gid = setbuf.gid; 644 shp->shm_perm.gid = setbuf.gid;
647 shp->shm_flags = (shp->shm_flags & ~S_IRWXUGO) 645 shp->shm_perm.mode = (shp->shm_perm.mode & ~S_IRWXUGO)
648 | (setbuf.mode & S_IRWXUGO); 646 | (setbuf.mode & S_IRWXUGO);
649 shp->shm_ctim = get_seconds(); 647 shp->shm_ctim = get_seconds();
650 break; 648 break;
@@ -777,7 +775,7 @@ invalid:
777 BUG(); 775 BUG();
778 shp->shm_nattch--; 776 shp->shm_nattch--;
779 if(shp->shm_nattch == 0 && 777 if(shp->shm_nattch == 0 &&
780 shp->shm_flags & SHM_DEST) 778 shp->shm_perm.mode & SHM_DEST)
781 shm_destroy (shp); 779 shm_destroy (shp);
782 else 780 else
783 shm_unlock(shp); 781 shm_unlock(shp);
@@ -902,7 +900,7 @@ static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
902 return seq_printf(s, format, 900 return seq_printf(s, format,
903 shp->shm_perm.key, 901 shp->shm_perm.key,
904 shp->id, 902 shp->id,
905 shp->shm_flags, 903 shp->shm_perm.mode,
906 shp->shm_segsz, 904 shp->shm_segsz,
907 shp->shm_cprid, 905 shp->shm_cprid,
908 shp->shm_lprid, 906 shp->shm_lprid,
diff --git a/kernel/audit.c b/kernel/audit.c
index 32fa03ad1984..d13ab7d2d899 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -267,7 +267,7 @@ static int audit_set_failure(int state, uid_t loginuid)
267 return old; 267 return old;
268} 268}
269 269
270int kauditd_thread(void *dummy) 270static int kauditd_thread(void *dummy)
271{ 271{
272 struct sk_buff *skb; 272 struct sk_buff *skb;
273 273
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 7430640f9816..eab64e23bcae 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -39,6 +39,7 @@
39#include <linux/namei.h> 39#include <linux/namei.h>
40#include <linux/pagemap.h> 40#include <linux/pagemap.h>
41#include <linux/proc_fs.h> 41#include <linux/proc_fs.h>
42#include <linux/rcupdate.h>
42#include <linux/sched.h> 43#include <linux/sched.h>
43#include <linux/seq_file.h> 44#include <linux/seq_file.h>
44#include <linux/slab.h> 45#include <linux/slab.h>
@@ -54,7 +55,23 @@
54#include <asm/atomic.h> 55#include <asm/atomic.h>
55#include <asm/semaphore.h> 56#include <asm/semaphore.h>
56 57
57#define CPUSET_SUPER_MAGIC 0x27e0eb 58#define CPUSET_SUPER_MAGIC 0x27e0eb
59
60/*
61 * Tracks how many cpusets are currently defined in system.
62 * When there is only one cpuset (the root cpuset) we can
63 * short circuit some hooks.
64 */
65int number_of_cpusets __read_mostly;
66
67/* See "Frequency meter" comments, below. */
68
69struct fmeter {
70 int cnt; /* unprocessed events count */
71 int val; /* most recent output value */
72 time_t time; /* clock (secs) when val computed */
73 spinlock_t lock; /* guards read or write of above */
74};
58 75
59struct cpuset { 76struct cpuset {
60 unsigned long flags; /* "unsigned long" so bitops work */ 77 unsigned long flags; /* "unsigned long" so bitops work */
@@ -80,13 +97,16 @@ struct cpuset {
80 * Copy of global cpuset_mems_generation as of the most 97 * Copy of global cpuset_mems_generation as of the most
81 * recent time this cpuset changed its mems_allowed. 98 * recent time this cpuset changed its mems_allowed.
82 */ 99 */
83 int mems_generation; 100 int mems_generation;
101
102 struct fmeter fmeter; /* memory_pressure filter */
84}; 103};
85 104
86/* bits in struct cpuset flags field */ 105/* bits in struct cpuset flags field */
87typedef enum { 106typedef enum {
88 CS_CPU_EXCLUSIVE, 107 CS_CPU_EXCLUSIVE,
89 CS_MEM_EXCLUSIVE, 108 CS_MEM_EXCLUSIVE,
109 CS_MEMORY_MIGRATE,
90 CS_REMOVED, 110 CS_REMOVED,
91 CS_NOTIFY_ON_RELEASE 111 CS_NOTIFY_ON_RELEASE
92} cpuset_flagbits_t; 112} cpuset_flagbits_t;
@@ -112,6 +132,11 @@ static inline int notify_on_release(const struct cpuset *cs)
112 return !!test_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); 132 return !!test_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
113} 133}
114 134
135static inline int is_memory_migrate(const struct cpuset *cs)
136{
137 return !!test_bit(CS_MEMORY_MIGRATE, &cs->flags);
138}
139
115/* 140/*
116 * Increment this atomic integer everytime any cpuset changes its 141 * Increment this atomic integer everytime any cpuset changes its
117 * mems_allowed value. Users of cpusets can track this generation 142 * mems_allowed value. Users of cpusets can track this generation
@@ -137,13 +162,10 @@ static struct cpuset top_cpuset = {
137 .count = ATOMIC_INIT(0), 162 .count = ATOMIC_INIT(0),
138 .sibling = LIST_HEAD_INIT(top_cpuset.sibling), 163 .sibling = LIST_HEAD_INIT(top_cpuset.sibling),
139 .children = LIST_HEAD_INIT(top_cpuset.children), 164 .children = LIST_HEAD_INIT(top_cpuset.children),
140 .parent = NULL,
141 .dentry = NULL,
142 .mems_generation = 0,
143}; 165};
144 166
145static struct vfsmount *cpuset_mount; 167static struct vfsmount *cpuset_mount;
146static struct super_block *cpuset_sb = NULL; 168static struct super_block *cpuset_sb;
147 169
148/* 170/*
149 * We have two global cpuset semaphores below. They can nest. 171 * We have two global cpuset semaphores below. They can nest.
@@ -227,6 +249,11 @@ static struct super_block *cpuset_sb = NULL;
227 * a tasks cpuset pointer we use task_lock(), which acts on a spinlock 249 * a tasks cpuset pointer we use task_lock(), which acts on a spinlock
228 * (task->alloc_lock) already in the task_struct routinely used for 250 * (task->alloc_lock) already in the task_struct routinely used for
229 * such matters. 251 * such matters.
252 *
253 * P.S. One more locking exception. RCU is used to guard the
254 * update of a tasks cpuset pointer by attach_task() and the
255 * access of task->cpuset->mems_generation via that pointer in
256 * the routine cpuset_update_task_memory_state().
230 */ 257 */
231 258
232static DECLARE_MUTEX(manage_sem); 259static DECLARE_MUTEX(manage_sem);
@@ -304,7 +331,7 @@ static void cpuset_d_remove_dir(struct dentry *dentry)
304 spin_lock(&dcache_lock); 331 spin_lock(&dcache_lock);
305 node = dentry->d_subdirs.next; 332 node = dentry->d_subdirs.next;
306 while (node != &dentry->d_subdirs) { 333 while (node != &dentry->d_subdirs) {
307 struct dentry *d = list_entry(node, struct dentry, d_child); 334 struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
308 list_del_init(node); 335 list_del_init(node);
309 if (d->d_inode) { 336 if (d->d_inode) {
310 d = dget_locked(d); 337 d = dget_locked(d);
@@ -316,7 +343,7 @@ static void cpuset_d_remove_dir(struct dentry *dentry)
316 } 343 }
317 node = dentry->d_subdirs.next; 344 node = dentry->d_subdirs.next;
318 } 345 }
319 list_del_init(&dentry->d_child); 346 list_del_init(&dentry->d_u.d_child);
320 spin_unlock(&dcache_lock); 347 spin_unlock(&dcache_lock);
321 remove_dir(dentry); 348 remove_dir(dentry);
322} 349}
@@ -570,20 +597,43 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
570 BUG_ON(!nodes_intersects(*pmask, node_online_map)); 597 BUG_ON(!nodes_intersects(*pmask, node_online_map));
571} 598}
572 599
573/* 600/**
574 * Refresh current tasks mems_allowed and mems_generation from current 601 * cpuset_update_task_memory_state - update task memory placement
575 * tasks cpuset.
576 * 602 *
577 * Call without callback_sem or task_lock() held. May be called with 603 * If the current tasks cpusets mems_allowed changed behind our
578 * or without manage_sem held. Will acquire task_lock() and might 604 * backs, update current->mems_allowed, mems_generation and task NUMA
579 * acquire callback_sem during call. 605 * mempolicy to the new value.
606 *
607 * Task mempolicy is updated by rebinding it relative to the
608 * current->cpuset if a task has its memory placement changed.
609 * Do not call this routine if in_interrupt().
580 * 610 *
581 * The task_lock() is required to dereference current->cpuset safely. 611 * Call without callback_sem or task_lock() held. May be called
582 * Without it, we could pick up the pointer value of current->cpuset 612 * with or without manage_sem held. Doesn't need task_lock to guard
583 * in one instruction, and then attach_task could give us a different 613 * against another task changing a non-NULL cpuset pointer to NULL,
584 * cpuset, and then the cpuset we had could be removed and freed, 614 * as that is only done by a task on itself, and if the current task
585 * and then on our next instruction, we could dereference a no longer 615 * is here, it is not simultaneously in the exit code NULL'ing its
586 * valid cpuset pointer to get its mems_generation field. 616 * cpuset pointer. This routine also might acquire callback_sem and
617 * current->mm->mmap_sem during call.
618 *
619 * Reading current->cpuset->mems_generation doesn't need task_lock
620 * to guard the current->cpuset derefence, because it is guarded
621 * from concurrent freeing of current->cpuset by attach_task(),
622 * using RCU.
623 *
624 * The rcu_dereference() is technically probably not needed,
625 * as I don't actually mind if I see a new cpuset pointer but
626 * an old value of mems_generation. However this really only
627 * matters on alpha systems using cpusets heavily. If I dropped
628 * that rcu_dereference(), it would save them a memory barrier.
629 * For all other arch's, rcu_dereference is a no-op anyway, and for
630 * alpha systems not using cpusets, another planned optimization,
631 * avoiding the rcu critical section for tasks in the root cpuset
632 * which is statically allocated, so can't vanish, will make this
633 * irrelevant. Better to use RCU as intended, than to engage in
634 * some cute trick to save a memory barrier that is impossible to
635 * test, for alpha systems using cpusets heavily, which might not
636 * even exist.
587 * 637 *
588 * This routine is needed to update the per-task mems_allowed data, 638 * This routine is needed to update the per-task mems_allowed data,
589 * within the tasks context, when it is trying to allocate memory 639 * within the tasks context, when it is trying to allocate memory
@@ -591,27 +641,31 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
591 * task has been modifying its cpuset. 641 * task has been modifying its cpuset.
592 */ 642 */
593 643
594static void refresh_mems(void) 644void cpuset_update_task_memory_state()
595{ 645{
596 int my_cpusets_mem_gen; 646 int my_cpusets_mem_gen;
647 struct task_struct *tsk = current;
648 struct cpuset *cs;
597 649
598 task_lock(current); 650 if (tsk->cpuset == &top_cpuset) {
599 my_cpusets_mem_gen = current->cpuset->mems_generation; 651 /* Don't need rcu for top_cpuset. It's never freed. */
600 task_unlock(current); 652 my_cpusets_mem_gen = top_cpuset.mems_generation;
601 653 } else {
602 if (current->cpuset_mems_generation != my_cpusets_mem_gen) { 654 rcu_read_lock();
603 struct cpuset *cs; 655 cs = rcu_dereference(tsk->cpuset);
604 nodemask_t oldmem = current->mems_allowed; 656 my_cpusets_mem_gen = cs->mems_generation;
657 rcu_read_unlock();
658 }
605 659
660 if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {
606 down(&callback_sem); 661 down(&callback_sem);
607 task_lock(current); 662 task_lock(tsk);
608 cs = current->cpuset; 663 cs = tsk->cpuset; /* Maybe changed when task not locked */
609 guarantee_online_mems(cs, &current->mems_allowed); 664 guarantee_online_mems(cs, &tsk->mems_allowed);
610 current->cpuset_mems_generation = cs->mems_generation; 665 tsk->cpuset_mems_generation = cs->mems_generation;
611 task_unlock(current); 666 task_unlock(tsk);
612 up(&callback_sem); 667 up(&callback_sem);
613 if (!nodes_equal(oldmem, current->mems_allowed)) 668 mpol_rebind_task(tsk, &tsk->mems_allowed);
614 numa_policy_rebind(&oldmem, &current->mems_allowed);
615 } 669 }
616} 670}
617 671
@@ -766,36 +820,150 @@ static int update_cpumask(struct cpuset *cs, char *buf)
766} 820}
767 821
768/* 822/*
823 * Handle user request to change the 'mems' memory placement
824 * of a cpuset. Needs to validate the request, update the
825 * cpusets mems_allowed and mems_generation, and for each
826 * task in the cpuset, rebind any vma mempolicies and if
827 * the cpuset is marked 'memory_migrate', migrate the tasks
828 * pages to the new memory.
829 *
769 * Call with manage_sem held. May take callback_sem during call. 830 * Call with manage_sem held. May take callback_sem during call.
831 * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
832 * lock each such tasks mm->mmap_sem, scan its vma's and rebind
833 * their mempolicies to the cpusets new mems_allowed.
770 */ 834 */
771 835
772static int update_nodemask(struct cpuset *cs, char *buf) 836static int update_nodemask(struct cpuset *cs, char *buf)
773{ 837{
774 struct cpuset trialcs; 838 struct cpuset trialcs;
839 nodemask_t oldmem;
840 struct task_struct *g, *p;
841 struct mm_struct **mmarray;
842 int i, n, ntasks;
843 int migrate;
844 int fudge;
775 int retval; 845 int retval;
776 846
777 trialcs = *cs; 847 trialcs = *cs;
778 retval = nodelist_parse(buf, trialcs.mems_allowed); 848 retval = nodelist_parse(buf, trialcs.mems_allowed);
779 if (retval < 0) 849 if (retval < 0)
780 return retval; 850 goto done;
781 nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, node_online_map); 851 nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, node_online_map);
782 if (nodes_empty(trialcs.mems_allowed)) 852 oldmem = cs->mems_allowed;
783 return -ENOSPC; 853 if (nodes_equal(oldmem, trialcs.mems_allowed)) {
854 retval = 0; /* Too easy - nothing to do */
855 goto done;
856 }
857 if (nodes_empty(trialcs.mems_allowed)) {
858 retval = -ENOSPC;
859 goto done;
860 }
784 retval = validate_change(cs, &trialcs); 861 retval = validate_change(cs, &trialcs);
785 if (retval == 0) { 862 if (retval < 0)
786 down(&callback_sem); 863 goto done;
787 cs->mems_allowed = trialcs.mems_allowed; 864
788 atomic_inc(&cpuset_mems_generation); 865 down(&callback_sem);
789 cs->mems_generation = atomic_read(&cpuset_mems_generation); 866 cs->mems_allowed = trialcs.mems_allowed;
790 up(&callback_sem); 867 atomic_inc(&cpuset_mems_generation);
868 cs->mems_generation = atomic_read(&cpuset_mems_generation);
869 up(&callback_sem);
870
871 set_cpuset_being_rebound(cs); /* causes mpol_copy() rebind */
872
873 fudge = 10; /* spare mmarray[] slots */
874 fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */
875 retval = -ENOMEM;
876
877 /*
878 * Allocate mmarray[] to hold mm reference for each task
879 * in cpuset cs. Can't kmalloc GFP_KERNEL while holding
880 * tasklist_lock. We could use GFP_ATOMIC, but with a
881 * few more lines of code, we can retry until we get a big
882 * enough mmarray[] w/o using GFP_ATOMIC.
883 */
884 while (1) {
885 ntasks = atomic_read(&cs->count); /* guess */
886 ntasks += fudge;
887 mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL);
888 if (!mmarray)
889 goto done;
890 write_lock_irq(&tasklist_lock); /* block fork */
891 if (atomic_read(&cs->count) <= ntasks)
892 break; /* got enough */
893 write_unlock_irq(&tasklist_lock); /* try again */
894 kfree(mmarray);
791 } 895 }
896
897 n = 0;
898
899 /* Load up mmarray[] with mm reference for each task in cpuset. */
900 do_each_thread(g, p) {
901 struct mm_struct *mm;
902
903 if (n >= ntasks) {
904 printk(KERN_WARNING
905 "Cpuset mempolicy rebind incomplete.\n");
906 continue;
907 }
908 if (p->cpuset != cs)
909 continue;
910 mm = get_task_mm(p);
911 if (!mm)
912 continue;
913 mmarray[n++] = mm;
914 } while_each_thread(g, p);
915 write_unlock_irq(&tasklist_lock);
916
917 /*
918 * Now that we've dropped the tasklist spinlock, we can
919 * rebind the vma mempolicies of each mm in mmarray[] to their
920 * new cpuset, and release that mm. The mpol_rebind_mm()
921 * call takes mmap_sem, which we couldn't take while holding
922 * tasklist_lock. Forks can happen again now - the mpol_copy()
923 * cpuset_being_rebound check will catch such forks, and rebind
924 * their vma mempolicies too. Because we still hold the global
925 * cpuset manage_sem, we know that no other rebind effort will
926 * be contending for the global variable cpuset_being_rebound.
927 * It's ok if we rebind the same mm twice; mpol_rebind_mm()
928 * is idempotent. Also migrate pages in each mm to new nodes.
929 */
930 migrate = is_memory_migrate(cs);
931 for (i = 0; i < n; i++) {
932 struct mm_struct *mm = mmarray[i];
933
934 mpol_rebind_mm(mm, &cs->mems_allowed);
935 if (migrate) {
936 do_migrate_pages(mm, &oldmem, &cs->mems_allowed,
937 MPOL_MF_MOVE_ALL);
938 }
939 mmput(mm);
940 }
941
942 /* We're done rebinding vma's to this cpusets new mems_allowed. */
943 kfree(mmarray);
944 set_cpuset_being_rebound(NULL);
945 retval = 0;
946done:
792 return retval; 947 return retval;
793} 948}
794 949
795/* 950/*
951 * Call with manage_sem held.
952 */
953
954static int update_memory_pressure_enabled(struct cpuset *cs, char *buf)
955{
956 if (simple_strtoul(buf, NULL, 10) != 0)
957 cpuset_memory_pressure_enabled = 1;
958 else
959 cpuset_memory_pressure_enabled = 0;
960 return 0;
961}
962
963/*
796 * update_flag - read a 0 or a 1 in a file and update associated flag 964 * update_flag - read a 0 or a 1 in a file and update associated flag
797 * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, 965 * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE,
798 * CS_NOTIFY_ON_RELEASE) 966 * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE)
799 * cs: the cpuset to update 967 * cs: the cpuset to update
800 * buf: the buffer where we read the 0 or 1 968 * buf: the buffer where we read the 0 or 1
801 * 969 *
@@ -834,6 +1002,104 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
834} 1002}
835 1003
836/* 1004/*
1005 * Frequency meter - How fast is some event occuring?
1006 *
1007 * These routines manage a digitally filtered, constant time based,
1008 * event frequency meter. There are four routines:
1009 * fmeter_init() - initialize a frequency meter.
1010 * fmeter_markevent() - called each time the event happens.
1011 * fmeter_getrate() - returns the recent rate of such events.
1012 * fmeter_update() - internal routine used to update fmeter.
1013 *
1014 * A common data structure is passed to each of these routines,
1015 * which is used to keep track of the state required to manage the
1016 * frequency meter and its digital filter.
1017 *
1018 * The filter works on the number of events marked per unit time.
1019 * The filter is single-pole low-pass recursive (IIR). The time unit
1020 * is 1 second. Arithmetic is done using 32-bit integers scaled to
1021 * simulate 3 decimal digits of precision (multiplied by 1000).
1022 *
1023 * With an FM_COEF of 933, and a time base of 1 second, the filter
1024 * has a half-life of 10 seconds, meaning that if the events quit
1025 * happening, then the rate returned from the fmeter_getrate()
1026 * will be cut in half each 10 seconds, until it converges to zero.
1027 *
1028 * It is not worth doing a real infinitely recursive filter. If more
1029 * than FM_MAXTICKS ticks have elapsed since the last filter event,
1030 * just compute FM_MAXTICKS ticks worth, by which point the level
1031 * will be stable.
1032 *
1033 * Limit the count of unprocessed events to FM_MAXCNT, so as to avoid
1034 * arithmetic overflow in the fmeter_update() routine.
1035 *
1036 * Given the simple 32 bit integer arithmetic used, this meter works
1037 * best for reporting rates between one per millisecond (msec) and
1038 * one per 32 (approx) seconds. At constant rates faster than one
1039 * per msec it maxes out at values just under 1,000,000. At constant
1040 * rates between one per msec, and one per second it will stabilize
1041 * to a value N*1000, where N is the rate of events per second.
1042 * At constant rates between one per second and one per 32 seconds,
1043 * it will be choppy, moving up on the seconds that have an event,
1044 * and then decaying until the next event. At rates slower than
1045 * about one in 32 seconds, it decays all the way back to zero between
1046 * each event.
1047 */
1048
1049#define FM_COEF 933 /* coefficient for half-life of 10 secs */
1050#define FM_MAXTICKS ((time_t)99) /* useless computing more ticks than this */
1051#define FM_MAXCNT 1000000 /* limit cnt to avoid overflow */
1052#define FM_SCALE 1000 /* faux fixed point scale */
1053
1054/* Initialize a frequency meter */
1055static void fmeter_init(struct fmeter *fmp)
1056{
1057 fmp->cnt = 0;
1058 fmp->val = 0;
1059 fmp->time = 0;
1060 spin_lock_init(&fmp->lock);
1061}
1062
1063/* Internal meter update - process cnt events and update value */
1064static void fmeter_update(struct fmeter *fmp)
1065{
1066 time_t now = get_seconds();
1067 time_t ticks = now - fmp->time;
1068
1069 if (ticks == 0)
1070 return;
1071
1072 ticks = min(FM_MAXTICKS, ticks);
1073 while (ticks-- > 0)
1074 fmp->val = (FM_COEF * fmp->val) / FM_SCALE;
1075 fmp->time = now;
1076
1077 fmp->val += ((FM_SCALE - FM_COEF) * fmp->cnt) / FM_SCALE;
1078 fmp->cnt = 0;
1079}
1080
1081/* Process any previous ticks, then bump cnt by one (times scale). */
1082static void fmeter_markevent(struct fmeter *fmp)
1083{
1084 spin_lock(&fmp->lock);
1085 fmeter_update(fmp);
1086 fmp->cnt = min(FM_MAXCNT, fmp->cnt + FM_SCALE);
1087 spin_unlock(&fmp->lock);
1088}
1089
1090/* Process any previous ticks, then return current value. */
1091static int fmeter_getrate(struct fmeter *fmp)
1092{
1093 int val;
1094
1095 spin_lock(&fmp->lock);
1096 fmeter_update(fmp);
1097 val = fmp->val;
1098 spin_unlock(&fmp->lock);
1099 return val;
1100}
1101
1102/*
837 * Attack task specified by pid in 'pidbuf' to cpuset 'cs', possibly 1103 * Attack task specified by pid in 'pidbuf' to cpuset 'cs', possibly
838 * writing the path of the old cpuset in 'ppathbuf' if it needs to be 1104 * writing the path of the old cpuset in 'ppathbuf' if it needs to be
839 * notified on release. 1105 * notified on release.
@@ -848,6 +1114,8 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
848 struct task_struct *tsk; 1114 struct task_struct *tsk;
849 struct cpuset *oldcs; 1115 struct cpuset *oldcs;
850 cpumask_t cpus; 1116 cpumask_t cpus;
1117 nodemask_t from, to;
1118 struct mm_struct *mm;
851 1119
852 if (sscanf(pidbuf, "%d", &pid) != 1) 1120 if (sscanf(pidbuf, "%d", &pid) != 1)
853 return -EIO; 1121 return -EIO;
@@ -887,14 +1155,27 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
887 return -ESRCH; 1155 return -ESRCH;
888 } 1156 }
889 atomic_inc(&cs->count); 1157 atomic_inc(&cs->count);
890 tsk->cpuset = cs; 1158 rcu_assign_pointer(tsk->cpuset, cs);
891 task_unlock(tsk); 1159 task_unlock(tsk);
892 1160
893 guarantee_online_cpus(cs, &cpus); 1161 guarantee_online_cpus(cs, &cpus);
894 set_cpus_allowed(tsk, cpus); 1162 set_cpus_allowed(tsk, cpus);
895 1163
1164 from = oldcs->mems_allowed;
1165 to = cs->mems_allowed;
1166
896 up(&callback_sem); 1167 up(&callback_sem);
1168
1169 mm = get_task_mm(tsk);
1170 if (mm) {
1171 mpol_rebind_mm(mm, &to);
1172 mmput(mm);
1173 }
1174
1175 if (is_memory_migrate(cs))
1176 do_migrate_pages(tsk->mm, &from, &to, MPOL_MF_MOVE_ALL);
897 put_task_struct(tsk); 1177 put_task_struct(tsk);
1178 synchronize_rcu();
898 if (atomic_dec_and_test(&oldcs->count)) 1179 if (atomic_dec_and_test(&oldcs->count))
899 check_for_release(oldcs, ppathbuf); 1180 check_for_release(oldcs, ppathbuf);
900 return 0; 1181 return 0;
@@ -905,11 +1186,14 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
905typedef enum { 1186typedef enum {
906 FILE_ROOT, 1187 FILE_ROOT,
907 FILE_DIR, 1188 FILE_DIR,
1189 FILE_MEMORY_MIGRATE,
908 FILE_CPULIST, 1190 FILE_CPULIST,
909 FILE_MEMLIST, 1191 FILE_MEMLIST,
910 FILE_CPU_EXCLUSIVE, 1192 FILE_CPU_EXCLUSIVE,
911 FILE_MEM_EXCLUSIVE, 1193 FILE_MEM_EXCLUSIVE,
912 FILE_NOTIFY_ON_RELEASE, 1194 FILE_NOTIFY_ON_RELEASE,
1195 FILE_MEMORY_PRESSURE_ENABLED,
1196 FILE_MEMORY_PRESSURE,
913 FILE_TASKLIST, 1197 FILE_TASKLIST,
914} cpuset_filetype_t; 1198} cpuset_filetype_t;
915 1199
@@ -960,6 +1244,15 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
960 case FILE_NOTIFY_ON_RELEASE: 1244 case FILE_NOTIFY_ON_RELEASE:
961 retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer); 1245 retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer);
962 break; 1246 break;
1247 case FILE_MEMORY_MIGRATE:
1248 retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer);
1249 break;
1250 case FILE_MEMORY_PRESSURE_ENABLED:
1251 retval = update_memory_pressure_enabled(cs, buffer);
1252 break;
1253 case FILE_MEMORY_PRESSURE:
1254 retval = -EACCES;
1255 break;
963 case FILE_TASKLIST: 1256 case FILE_TASKLIST:
964 retval = attach_task(cs, buffer, &pathbuf); 1257 retval = attach_task(cs, buffer, &pathbuf);
965 break; 1258 break;
@@ -1060,6 +1353,15 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
1060 case FILE_NOTIFY_ON_RELEASE: 1353 case FILE_NOTIFY_ON_RELEASE:
1061 *s++ = notify_on_release(cs) ? '1' : '0'; 1354 *s++ = notify_on_release(cs) ? '1' : '0';
1062 break; 1355 break;
1356 case FILE_MEMORY_MIGRATE:
1357 *s++ = is_memory_migrate(cs) ? '1' : '0';
1358 break;
1359 case FILE_MEMORY_PRESSURE_ENABLED:
1360 *s++ = cpuset_memory_pressure_enabled ? '1' : '0';
1361 break;
1362 case FILE_MEMORY_PRESSURE:
1363 s += sprintf(s, "%d", fmeter_getrate(&cs->fmeter));
1364 break;
1063 default: 1365 default:
1064 retval = -EINVAL; 1366 retval = -EINVAL;
1065 goto out; 1367 goto out;
@@ -1178,7 +1480,7 @@ static int cpuset_create_file(struct dentry *dentry, int mode)
1178 1480
1179/* 1481/*
1180 * cpuset_create_dir - create a directory for an object. 1482 * cpuset_create_dir - create a directory for an object.
1181 * cs: the cpuset we create the directory for. 1483 * cs: the cpuset we create the directory for.
1182 * It must have a valid ->parent field 1484 * It must have a valid ->parent field
1183 * And we are going to fill its ->dentry field. 1485 * And we are going to fill its ->dentry field.
1184 * name: The name to give to the cpuset directory. Will be copied. 1486 * name: The name to give to the cpuset directory. Will be copied.
@@ -1408,6 +1710,21 @@ static struct cftype cft_notify_on_release = {
1408 .private = FILE_NOTIFY_ON_RELEASE, 1710 .private = FILE_NOTIFY_ON_RELEASE,
1409}; 1711};
1410 1712
1713static struct cftype cft_memory_migrate = {
1714 .name = "memory_migrate",
1715 .private = FILE_MEMORY_MIGRATE,
1716};
1717
1718static struct cftype cft_memory_pressure_enabled = {
1719 .name = "memory_pressure_enabled",
1720 .private = FILE_MEMORY_PRESSURE_ENABLED,
1721};
1722
1723static struct cftype cft_memory_pressure = {
1724 .name = "memory_pressure",
1725 .private = FILE_MEMORY_PRESSURE,
1726};
1727
1411static int cpuset_populate_dir(struct dentry *cs_dentry) 1728static int cpuset_populate_dir(struct dentry *cs_dentry)
1412{ 1729{
1413 int err; 1730 int err;
@@ -1422,6 +1739,10 @@ static int cpuset_populate_dir(struct dentry *cs_dentry)
1422 return err; 1739 return err;
1423 if ((err = cpuset_add_file(cs_dentry, &cft_notify_on_release)) < 0) 1740 if ((err = cpuset_add_file(cs_dentry, &cft_notify_on_release)) < 0)
1424 return err; 1741 return err;
1742 if ((err = cpuset_add_file(cs_dentry, &cft_memory_migrate)) < 0)
1743 return err;
1744 if ((err = cpuset_add_file(cs_dentry, &cft_memory_pressure)) < 0)
1745 return err;
1425 if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0) 1746 if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0)
1426 return err; 1747 return err;
1427 return 0; 1748 return 0;
@@ -1446,7 +1767,7 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode)
1446 return -ENOMEM; 1767 return -ENOMEM;
1447 1768
1448 down(&manage_sem); 1769 down(&manage_sem);
1449 refresh_mems(); 1770 cpuset_update_task_memory_state();
1450 cs->flags = 0; 1771 cs->flags = 0;
1451 if (notify_on_release(parent)) 1772 if (notify_on_release(parent))
1452 set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); 1773 set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
@@ -1457,11 +1778,13 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode)
1457 INIT_LIST_HEAD(&cs->children); 1778 INIT_LIST_HEAD(&cs->children);
1458 atomic_inc(&cpuset_mems_generation); 1779 atomic_inc(&cpuset_mems_generation);
1459 cs->mems_generation = atomic_read(&cpuset_mems_generation); 1780 cs->mems_generation = atomic_read(&cpuset_mems_generation);
1781 fmeter_init(&cs->fmeter);
1460 1782
1461 cs->parent = parent; 1783 cs->parent = parent;
1462 1784
1463 down(&callback_sem); 1785 down(&callback_sem);
1464 list_add(&cs->sibling, &cs->parent->children); 1786 list_add(&cs->sibling, &cs->parent->children);
1787 number_of_cpusets++;
1465 up(&callback_sem); 1788 up(&callback_sem);
1466 1789
1467 err = cpuset_create_dir(cs, name, mode); 1790 err = cpuset_create_dir(cs, name, mode);
@@ -1503,7 +1826,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
1503 /* the vfs holds both inode->i_sem already */ 1826 /* the vfs holds both inode->i_sem already */
1504 1827
1505 down(&manage_sem); 1828 down(&manage_sem);
1506 refresh_mems(); 1829 cpuset_update_task_memory_state();
1507 if (atomic_read(&cs->count) > 0) { 1830 if (atomic_read(&cs->count) > 0) {
1508 up(&manage_sem); 1831 up(&manage_sem);
1509 return -EBUSY; 1832 return -EBUSY;
@@ -1524,6 +1847,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
1524 spin_unlock(&d->d_lock); 1847 spin_unlock(&d->d_lock);
1525 cpuset_d_remove_dir(d); 1848 cpuset_d_remove_dir(d);
1526 dput(d); 1849 dput(d);
1850 number_of_cpusets--;
1527 up(&callback_sem); 1851 up(&callback_sem);
1528 if (list_empty(&parent->children)) 1852 if (list_empty(&parent->children))
1529 check_for_release(parent, &pathbuf); 1853 check_for_release(parent, &pathbuf);
@@ -1532,6 +1856,21 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
1532 return 0; 1856 return 0;
1533} 1857}
1534 1858
1859/*
1860 * cpuset_init_early - just enough so that the calls to
1861 * cpuset_update_task_memory_state() in early init code
1862 * are harmless.
1863 */
1864
1865int __init cpuset_init_early(void)
1866{
1867 struct task_struct *tsk = current;
1868
1869 tsk->cpuset = &top_cpuset;
1870 tsk->cpuset->mems_generation = atomic_read(&cpuset_mems_generation);
1871 return 0;
1872}
1873
1535/** 1874/**
1536 * cpuset_init - initialize cpusets at system boot 1875 * cpuset_init - initialize cpusets at system boot
1537 * 1876 *
@@ -1546,6 +1885,7 @@ int __init cpuset_init(void)
1546 top_cpuset.cpus_allowed = CPU_MASK_ALL; 1885 top_cpuset.cpus_allowed = CPU_MASK_ALL;
1547 top_cpuset.mems_allowed = NODE_MASK_ALL; 1886 top_cpuset.mems_allowed = NODE_MASK_ALL;
1548 1887
1888 fmeter_init(&top_cpuset.fmeter);
1549 atomic_inc(&cpuset_mems_generation); 1889 atomic_inc(&cpuset_mems_generation);
1550 top_cpuset.mems_generation = atomic_read(&cpuset_mems_generation); 1890 top_cpuset.mems_generation = atomic_read(&cpuset_mems_generation);
1551 1891
@@ -1566,7 +1906,11 @@ int __init cpuset_init(void)
1566 root->d_inode->i_nlink++; 1906 root->d_inode->i_nlink++;
1567 top_cpuset.dentry = root; 1907 top_cpuset.dentry = root;
1568 root->d_inode->i_op = &cpuset_dir_inode_operations; 1908 root->d_inode->i_op = &cpuset_dir_inode_operations;
1909 number_of_cpusets = 1;
1569 err = cpuset_populate_dir(root); 1910 err = cpuset_populate_dir(root);
1911 /* memory_pressure_enabled is in root cpuset only */
1912 if (err == 0)
1913 err = cpuset_add_file(root, &cft_memory_pressure_enabled);
1570out: 1914out:
1571 return err; 1915 return err;
1572} 1916}
@@ -1632,15 +1976,13 @@ void cpuset_fork(struct task_struct *child)
1632 * 1976 *
1633 * We don't need to task_lock() this reference to tsk->cpuset, 1977 * We don't need to task_lock() this reference to tsk->cpuset,
1634 * because tsk is already marked PF_EXITING, so attach_task() won't 1978 * because tsk is already marked PF_EXITING, so attach_task() won't
1635 * mess with it. 1979 * mess with it, or task is a failed fork, never visible to attach_task.
1636 **/ 1980 **/
1637 1981
1638void cpuset_exit(struct task_struct *tsk) 1982void cpuset_exit(struct task_struct *tsk)
1639{ 1983{
1640 struct cpuset *cs; 1984 struct cpuset *cs;
1641 1985
1642 BUG_ON(!(tsk->flags & PF_EXITING));
1643
1644 cs = tsk->cpuset; 1986 cs = tsk->cpuset;
1645 tsk->cpuset = NULL; 1987 tsk->cpuset = NULL;
1646 1988
@@ -1667,14 +2009,14 @@ void cpuset_exit(struct task_struct *tsk)
1667 * tasks cpuset. 2009 * tasks cpuset.
1668 **/ 2010 **/
1669 2011
1670cpumask_t cpuset_cpus_allowed(const struct task_struct *tsk) 2012cpumask_t cpuset_cpus_allowed(struct task_struct *tsk)
1671{ 2013{
1672 cpumask_t mask; 2014 cpumask_t mask;
1673 2015
1674 down(&callback_sem); 2016 down(&callback_sem);
1675 task_lock((struct task_struct *)tsk); 2017 task_lock(tsk);
1676 guarantee_online_cpus(tsk->cpuset, &mask); 2018 guarantee_online_cpus(tsk->cpuset, &mask);
1677 task_unlock((struct task_struct *)tsk); 2019 task_unlock(tsk);
1678 up(&callback_sem); 2020 up(&callback_sem);
1679 2021
1680 return mask; 2022 return mask;
@@ -1686,43 +2028,26 @@ void cpuset_init_current_mems_allowed(void)
1686} 2028}
1687 2029
1688/** 2030/**
1689 * cpuset_update_current_mems_allowed - update mems parameters to new values 2031 * cpuset_mems_allowed - return mems_allowed mask from a tasks cpuset.
1690 * 2032 * @tsk: pointer to task_struct from which to obtain cpuset->mems_allowed.
1691 * If the current tasks cpusets mems_allowed changed behind our backs,
1692 * update current->mems_allowed and mems_generation to the new value.
1693 * Do not call this routine if in_interrupt().
1694 * 2033 *
1695 * Call without callback_sem or task_lock() held. May be called 2034 * Description: Returns the nodemask_t mems_allowed of the cpuset
1696 * with or without manage_sem held. Unless exiting, it will acquire 2035 * attached to the specified @tsk. Guaranteed to return some non-empty
1697 * task_lock(). Also might acquire callback_sem during call to 2036 * subset of node_online_map, even if this means going outside the
1698 * refresh_mems(). 2037 * tasks cpuset.
1699 */ 2038 **/
1700 2039
1701void cpuset_update_current_mems_allowed(void) 2040nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
1702{ 2041{
1703 struct cpuset *cs; 2042 nodemask_t mask;
1704 int need_to_refresh = 0;
1705 2043
1706 task_lock(current); 2044 down(&callback_sem);
1707 cs = current->cpuset; 2045 task_lock(tsk);
1708 if (!cs) 2046 guarantee_online_mems(tsk->cpuset, &mask);
1709 goto done; 2047 task_unlock(tsk);
1710 if (current->cpuset_mems_generation != cs->mems_generation) 2048 up(&callback_sem);
1711 need_to_refresh = 1;
1712done:
1713 task_unlock(current);
1714 if (need_to_refresh)
1715 refresh_mems();
1716}
1717 2049
1718/** 2050 return mask;
1719 * cpuset_restrict_to_mems_allowed - limit nodes to current mems_allowed
1720 * @nodes: pointer to a node bitmap that is and-ed with mems_allowed
1721 */
1722void cpuset_restrict_to_mems_allowed(unsigned long *nodes)
1723{
1724 bitmap_and(nodes, nodes, nodes_addr(current->mems_allowed),
1725 MAX_NUMNODES);
1726} 2051}
1727 2052
1728/** 2053/**
@@ -1795,7 +2120,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
1795 * GFP_USER - only nodes in current tasks mems allowed ok. 2120 * GFP_USER - only nodes in current tasks mems allowed ok.
1796 **/ 2121 **/
1797 2122
1798int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) 2123int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
1799{ 2124{
1800 int node; /* node that zone z is on */ 2125 int node; /* node that zone z is on */
1801 const struct cpuset *cs; /* current cpuset ancestors */ 2126 const struct cpuset *cs; /* current cpuset ancestors */
@@ -1867,6 +2192,42 @@ done:
1867} 2192}
1868 2193
1869/* 2194/*
2195 * Collection of memory_pressure is suppressed unless
2196 * this flag is enabled by writing "1" to the special
2197 * cpuset file 'memory_pressure_enabled' in the root cpuset.
2198 */
2199
2200int cpuset_memory_pressure_enabled __read_mostly;
2201
2202/**
2203 * cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims.
2204 *
2205 * Keep a running average of the rate of synchronous (direct)
2206 * page reclaim efforts initiated by tasks in each cpuset.
2207 *
2208 * This represents the rate at which some task in the cpuset
2209 * ran low on memory on all nodes it was allowed to use, and
2210 * had to enter the kernels page reclaim code in an effort to
2211 * create more free memory by tossing clean pages or swapping
2212 * or writing dirty pages.
2213 *
2214 * Display to user space in the per-cpuset read-only file
2215 * "memory_pressure". Value displayed is an integer
2216 * representing the recent rate of entry into the synchronous
2217 * (direct) page reclaim by any task attached to the cpuset.
2218 **/
2219
2220void __cpuset_memory_pressure_bump(void)
2221{
2222 struct cpuset *cs;
2223
2224 task_lock(current);
2225 cs = current->cpuset;
2226 fmeter_markevent(&cs->fmeter);
2227 task_unlock(current);
2228}
2229
2230/*
1870 * proc_cpuset_show() 2231 * proc_cpuset_show()
1871 * - Print tasks cpuset path into seq_file. 2232 * - Print tasks cpuset path into seq_file.
1872 * - Used for /proc/<pid>/cpuset. 2233 * - Used for /proc/<pid>/cpuset.
diff --git a/kernel/exit.c b/kernel/exit.c
index ee515683b92d..caceabf3f230 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -72,7 +72,6 @@ repeat:
72 __ptrace_unlink(p); 72 __ptrace_unlink(p);
73 BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); 73 BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
74 __exit_signal(p); 74 __exit_signal(p);
75 __exit_sighand(p);
76 /* 75 /*
77 * Note that the fastpath in sys_times depends on __exit_signal having 76 * Note that the fastpath in sys_times depends on __exit_signal having
78 * updated the counters before a task is removed from the tasklist of 77 * updated the counters before a task is removed from the tasklist of
@@ -258,7 +257,7 @@ static inline void reparent_to_init(void)
258 257
259void __set_special_pids(pid_t session, pid_t pgrp) 258void __set_special_pids(pid_t session, pid_t pgrp)
260{ 259{
261 struct task_struct *curr = current; 260 struct task_struct *curr = current->group_leader;
262 261
263 if (curr->signal->session != session) { 262 if (curr->signal->session != session) {
264 detach_pid(curr, PIDTYPE_SID); 263 detach_pid(curr, PIDTYPE_SID);
@@ -926,7 +925,6 @@ do_group_exit(int exit_code)
926 /* Another thread got here before we took the lock. */ 925 /* Another thread got here before we took the lock. */
927 exit_code = sig->group_exit_code; 926 exit_code = sig->group_exit_code;
928 else { 927 else {
929 sig->flags = SIGNAL_GROUP_EXIT;
930 sig->group_exit_code = exit_code; 928 sig->group_exit_code = exit_code;
931 zap_other_threads(current); 929 zap_other_threads(current);
932 } 930 }
diff --git a/kernel/fork.c b/kernel/fork.c
index fb8572a42297..72e3252c6763 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -743,6 +743,14 @@ int unshare_files(void)
743 743
744EXPORT_SYMBOL(unshare_files); 744EXPORT_SYMBOL(unshare_files);
745 745
746void sighand_free_cb(struct rcu_head *rhp)
747{
748 struct sighand_struct *sp;
749
750 sp = container_of(rhp, struct sighand_struct, rcu);
751 kmem_cache_free(sighand_cachep, sp);
752}
753
746static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk) 754static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
747{ 755{
748 struct sighand_struct *sig; 756 struct sighand_struct *sig;
@@ -752,7 +760,7 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t
752 return 0; 760 return 0;
753 } 761 }
754 sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL); 762 sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
755 tsk->sighand = sig; 763 rcu_assign_pointer(tsk->sighand, sig);
756 if (!sig) 764 if (!sig)
757 return -ENOMEM; 765 return -ENOMEM;
758 spin_lock_init(&sig->siglock); 766 spin_lock_init(&sig->siglock);
@@ -803,9 +811,6 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
803 sig->it_prof_expires = cputime_zero; 811 sig->it_prof_expires = cputime_zero;
804 sig->it_prof_incr = cputime_zero; 812 sig->it_prof_incr = cputime_zero;
805 813
806 sig->tty = current->signal->tty;
807 sig->pgrp = process_group(current);
808 sig->session = current->signal->session;
809 sig->leader = 0; /* session leadership doesn't inherit */ 814 sig->leader = 0; /* session leadership doesn't inherit */
810 sig->tty_old_pgrp = 0; 815 sig->tty_old_pgrp = 0;
811 816
@@ -964,12 +969,13 @@ static task_t *copy_process(unsigned long clone_flags,
964 p->io_context = NULL; 969 p->io_context = NULL;
965 p->io_wait = NULL; 970 p->io_wait = NULL;
966 p->audit_context = NULL; 971 p->audit_context = NULL;
972 cpuset_fork(p);
967#ifdef CONFIG_NUMA 973#ifdef CONFIG_NUMA
968 p->mempolicy = mpol_copy(p->mempolicy); 974 p->mempolicy = mpol_copy(p->mempolicy);
969 if (IS_ERR(p->mempolicy)) { 975 if (IS_ERR(p->mempolicy)) {
970 retval = PTR_ERR(p->mempolicy); 976 retval = PTR_ERR(p->mempolicy);
971 p->mempolicy = NULL; 977 p->mempolicy = NULL;
972 goto bad_fork_cleanup; 978 goto bad_fork_cleanup_cpuset;
973 } 979 }
974#endif 980#endif
975 981
@@ -1127,25 +1133,19 @@ static task_t *copy_process(unsigned long clone_flags,
1127 attach_pid(p, PIDTYPE_PID, p->pid); 1133 attach_pid(p, PIDTYPE_PID, p->pid);
1128 attach_pid(p, PIDTYPE_TGID, p->tgid); 1134 attach_pid(p, PIDTYPE_TGID, p->tgid);
1129 if (thread_group_leader(p)) { 1135 if (thread_group_leader(p)) {
1136 p->signal->tty = current->signal->tty;
1137 p->signal->pgrp = process_group(current);
1138 p->signal->session = current->signal->session;
1130 attach_pid(p, PIDTYPE_PGID, process_group(p)); 1139 attach_pid(p, PIDTYPE_PGID, process_group(p));
1131 attach_pid(p, PIDTYPE_SID, p->signal->session); 1140 attach_pid(p, PIDTYPE_SID, p->signal->session);
1132 if (p->pid) 1141 if (p->pid)
1133 __get_cpu_var(process_counts)++; 1142 __get_cpu_var(process_counts)++;
1134 } 1143 }
1135 1144
1136 if (!current->signal->tty && p->signal->tty)
1137 p->signal->tty = NULL;
1138
1139 nr_threads++; 1145 nr_threads++;
1140 total_forks++; 1146 total_forks++;
1141 write_unlock_irq(&tasklist_lock); 1147 write_unlock_irq(&tasklist_lock);
1142 proc_fork_connector(p); 1148 proc_fork_connector(p);
1143 cpuset_fork(p);
1144 retval = 0;
1145
1146fork_out:
1147 if (retval)
1148 return ERR_PTR(retval);
1149 return p; 1149 return p;
1150 1150
1151bad_fork_cleanup_namespace: 1151bad_fork_cleanup_namespace:
@@ -1172,7 +1172,9 @@ bad_fork_cleanup_security:
1172bad_fork_cleanup_policy: 1172bad_fork_cleanup_policy:
1173#ifdef CONFIG_NUMA 1173#ifdef CONFIG_NUMA
1174 mpol_free(p->mempolicy); 1174 mpol_free(p->mempolicy);
1175bad_fork_cleanup_cpuset:
1175#endif 1176#endif
1177 cpuset_exit(p);
1176bad_fork_cleanup: 1178bad_fork_cleanup:
1177 if (p->binfmt) 1179 if (p->binfmt)
1178 module_put(p->binfmt->module); 1180 module_put(p->binfmt->module);
@@ -1184,7 +1186,8 @@ bad_fork_cleanup_count:
1184 free_uid(p->user); 1186 free_uid(p->user);
1185bad_fork_free: 1187bad_fork_free:
1186 free_task(p); 1188 free_task(p);
1187 goto fork_out; 1189fork_out:
1190 return ERR_PTR(retval);
1188} 1191}
1189 1192
1190struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs) 1193struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 8a64a4844cde..d03b5eef8ce0 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -10,6 +10,8 @@
10#include <linux/proc_fs.h> 10#include <linux/proc_fs.h>
11#include <linux/interrupt.h> 11#include <linux/interrupt.h>
12 12
13#include "internals.h"
14
13static struct proc_dir_entry *root_irq_dir, *irq_dir[NR_IRQS]; 15static struct proc_dir_entry *root_irq_dir, *irq_dir[NR_IRQS];
14 16
15#ifdef CONFIG_SMP 17#ifdef CONFIG_SMP
diff --git a/kernel/module.c b/kernel/module.c
index 4b06bbad49c2..e4276046a1b6 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -496,15 +496,15 @@ static void module_unload_free(struct module *mod)
496} 496}
497 497
498#ifdef CONFIG_MODULE_FORCE_UNLOAD 498#ifdef CONFIG_MODULE_FORCE_UNLOAD
499static inline int try_force(unsigned int flags) 499static inline int try_force_unload(unsigned int flags)
500{ 500{
501 int ret = (flags & O_TRUNC); 501 int ret = (flags & O_TRUNC);
502 if (ret) 502 if (ret)
503 add_taint(TAINT_FORCED_MODULE); 503 add_taint(TAINT_FORCED_RMMOD);
504 return ret; 504 return ret;
505} 505}
506#else 506#else
507static inline int try_force(unsigned int flags) 507static inline int try_force_unload(unsigned int flags)
508{ 508{
509 return 0; 509 return 0;
510} 510}
@@ -524,7 +524,7 @@ static int __try_stop_module(void *_sref)
524 524
525 /* If it's not unused, quit unless we are told to block. */ 525 /* If it's not unused, quit unless we are told to block. */
526 if ((sref->flags & O_NONBLOCK) && module_refcount(sref->mod) != 0) { 526 if ((sref->flags & O_NONBLOCK) && module_refcount(sref->mod) != 0) {
527 if (!(*sref->forced = try_force(sref->flags))) 527 if (!(*sref->forced = try_force_unload(sref->flags)))
528 return -EWOULDBLOCK; 528 return -EWOULDBLOCK;
529 } 529 }
530 530
@@ -609,7 +609,7 @@ sys_delete_module(const char __user *name_user, unsigned int flags)
609 /* If it has an init func, it must have an exit func to unload */ 609 /* If it has an init func, it must have an exit func to unload */
610 if ((mod->init != NULL && mod->exit == NULL) 610 if ((mod->init != NULL && mod->exit == NULL)
611 || mod->unsafe) { 611 || mod->unsafe) {
612 forced = try_force(flags); 612 forced = try_force_unload(flags);
613 if (!forced) { 613 if (!forced) {
614 /* This module can't be removed */ 614 /* This module can't be removed */
615 ret = -EBUSY; 615 ret = -EBUSY;
@@ -958,7 +958,6 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs,
958 unsigned long ret; 958 unsigned long ret;
959 const unsigned long *crc; 959 const unsigned long *crc;
960 960
961 spin_lock_irq(&modlist_lock);
962 ret = __find_symbol(name, &owner, &crc, mod->license_gplok); 961 ret = __find_symbol(name, &owner, &crc, mod->license_gplok);
963 if (ret) { 962 if (ret) {
964 /* use_module can fail due to OOM, or module unloading */ 963 /* use_module can fail due to OOM, or module unloading */
@@ -966,7 +965,6 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs,
966 !use_module(mod, owner)) 965 !use_module(mod, owner))
967 ret = 0; 966 ret = 0;
968 } 967 }
969 spin_unlock_irq(&modlist_lock);
970 return ret; 968 return ret;
971} 969}
972 970
@@ -1204,6 +1202,39 @@ void *__symbol_get(const char *symbol)
1204} 1202}
1205EXPORT_SYMBOL_GPL(__symbol_get); 1203EXPORT_SYMBOL_GPL(__symbol_get);
1206 1204
1205/*
1206 * Ensure that an exported symbol [global namespace] does not already exist
1207 * in the Kernel or in some other modules exported symbol table.
1208 */
1209static int verify_export_symbols(struct module *mod)
1210{
1211 const char *name = NULL;
1212 unsigned long i, ret = 0;
1213 struct module *owner;
1214 const unsigned long *crc;
1215
1216 for (i = 0; i < mod->num_syms; i++)
1217 if (__find_symbol(mod->syms[i].name, &owner, &crc, 1)) {
1218 name = mod->syms[i].name;
1219 ret = -ENOEXEC;
1220 goto dup;
1221 }
1222
1223 for (i = 0; i < mod->num_gpl_syms; i++)
1224 if (__find_symbol(mod->gpl_syms[i].name, &owner, &crc, 1)) {
1225 name = mod->gpl_syms[i].name;
1226 ret = -ENOEXEC;
1227 goto dup;
1228 }
1229
1230dup:
1231 if (ret)
1232 printk(KERN_ERR "%s: exports duplicate symbol %s (owned by %s)\n",
1233 mod->name, name, module_name(owner));
1234
1235 return ret;
1236}
1237
1207/* Change all symbols so that sh_value encodes the pointer directly. */ 1238/* Change all symbols so that sh_value encodes the pointer directly. */
1208static int simplify_symbols(Elf_Shdr *sechdrs, 1239static int simplify_symbols(Elf_Shdr *sechdrs,
1209 unsigned int symindex, 1240 unsigned int symindex,
@@ -1715,6 +1746,11 @@ static struct module *load_module(void __user *umod,
1715 /* Set up license info based on the info section */ 1746 /* Set up license info based on the info section */
1716 set_license(mod, get_modinfo(sechdrs, infoindex, "license")); 1747 set_license(mod, get_modinfo(sechdrs, infoindex, "license"));
1717 1748
1749 if (strcmp(mod->name, "ndiswrapper") == 0)
1750 add_taint(TAINT_PROPRIETARY_MODULE);
1751 if (strcmp(mod->name, "driverloader") == 0)
1752 add_taint(TAINT_PROPRIETARY_MODULE);
1753
1718#ifdef CONFIG_MODULE_UNLOAD 1754#ifdef CONFIG_MODULE_UNLOAD
1719 /* Set up MODINFO_ATTR fields */ 1755 /* Set up MODINFO_ATTR fields */
1720 setup_modinfo(mod, sechdrs, infoindex); 1756 setup_modinfo(mod, sechdrs, infoindex);
@@ -1767,6 +1803,12 @@ static struct module *load_module(void __user *umod,
1767 goto cleanup; 1803 goto cleanup;
1768 } 1804 }
1769 1805
1806 /* Find duplicate symbols */
1807 err = verify_export_symbols(mod);
1808
1809 if (err < 0)
1810 goto cleanup;
1811
1770 /* Set up and sort exception table */ 1812 /* Set up and sort exception table */
1771 mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable); 1813 mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable);
1772 mod->extable = extable = (void *)sechdrs[exindex].sh_addr; 1814 mod->extable = extable = (void *)sechdrs[exindex].sh_addr;
diff --git a/kernel/pid.c b/kernel/pid.c
index edba31c681ac..1acc07246991 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -136,7 +136,7 @@ struct pid * fastcall find_pid(enum pid_type type, int nr)
136 struct hlist_node *elem; 136 struct hlist_node *elem;
137 struct pid *pid; 137 struct pid *pid;
138 138
139 hlist_for_each_entry(pid, elem, 139 hlist_for_each_entry_rcu(pid, elem,
140 &pid_hash[type][pid_hashfn(nr)], pid_chain) { 140 &pid_hash[type][pid_hashfn(nr)], pid_chain) {
141 if (pid->nr == nr) 141 if (pid->nr == nr)
142 return pid; 142 return pid;
@@ -150,15 +150,15 @@ int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
150 150
151 task_pid = &task->pids[type]; 151 task_pid = &task->pids[type];
152 pid = find_pid(type, nr); 152 pid = find_pid(type, nr);
153 task_pid->nr = nr;
153 if (pid == NULL) { 154 if (pid == NULL) {
154 hlist_add_head(&task_pid->pid_chain,
155 &pid_hash[type][pid_hashfn(nr)]);
156 INIT_LIST_HEAD(&task_pid->pid_list); 155 INIT_LIST_HEAD(&task_pid->pid_list);
156 hlist_add_head_rcu(&task_pid->pid_chain,
157 &pid_hash[type][pid_hashfn(nr)]);
157 } else { 158 } else {
158 INIT_HLIST_NODE(&task_pid->pid_chain); 159 INIT_HLIST_NODE(&task_pid->pid_chain);
159 list_add_tail(&task_pid->pid_list, &pid->pid_list); 160 list_add_tail_rcu(&task_pid->pid_list, &pid->pid_list);
160 } 161 }
161 task_pid->nr = nr;
162 162
163 return 0; 163 return 0;
164} 164}
@@ -170,20 +170,20 @@ static fastcall int __detach_pid(task_t *task, enum pid_type type)
170 170
171 pid = &task->pids[type]; 171 pid = &task->pids[type];
172 if (!hlist_unhashed(&pid->pid_chain)) { 172 if (!hlist_unhashed(&pid->pid_chain)) {
173 hlist_del(&pid->pid_chain);
174 173
175 if (list_empty(&pid->pid_list)) 174 if (list_empty(&pid->pid_list)) {
176 nr = pid->nr; 175 nr = pid->nr;
177 else { 176 hlist_del_rcu(&pid->pid_chain);
177 } else {
178 pid_next = list_entry(pid->pid_list.next, 178 pid_next = list_entry(pid->pid_list.next,
179 struct pid, pid_list); 179 struct pid, pid_list);
180 /* insert next pid from pid_list to hash */ 180 /* insert next pid from pid_list to hash */
181 hlist_add_head(&pid_next->pid_chain, 181 hlist_replace_rcu(&pid->pid_chain,
182 &pid_hash[type][pid_hashfn(pid_next->nr)]); 182 &pid_next->pid_chain);
183 } 183 }
184 } 184 }
185 185
186 list_del(&pid->pid_list); 186 list_del_rcu(&pid->pid_list);
187 pid->nr = 0; 187 pid->nr = 0;
188 188
189 return nr; 189 return nr;
diff --git a/kernel/printk.c b/kernel/printk.c
index 5287be83e3e7..2251be80cd22 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -569,7 +569,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
569 p[1] <= '7' && p[2] == '>') { 569 p[1] <= '7' && p[2] == '>') {
570 loglev_char = p[1]; 570 loglev_char = p[1];
571 p += 3; 571 p += 3;
572 printed_len += 3; 572 printed_len -= 3;
573 } else { 573 } else {
574 loglev_char = default_message_loglevel 574 loglev_char = default_message_loglevel
575 + '0'; 575 + '0';
@@ -584,7 +584,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
584 584
585 for (tp = tbuf; tp < tbuf + tlen; tp++) 585 for (tp = tbuf; tp < tbuf + tlen; tp++)
586 emit_log_char(*tp); 586 emit_log_char(*tp);
587 printed_len += tlen - 3; 587 printed_len += tlen;
588 } else { 588 } else {
589 if (p[0] != '<' || p[1] < '0' || 589 if (p[0] != '<' || p[1] < '0' ||
590 p[1] > '7' || p[2] != '>') { 590 p[1] > '7' || p[2] != '>') {
@@ -592,8 +592,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
592 emit_log_char(default_message_loglevel 592 emit_log_char(default_message_loglevel
593 + '0'); 593 + '0');
594 emit_log_char('>'); 594 emit_log_char('>');
595 printed_len += 3;
595 } 596 }
596 printed_len += 3;
597 } 597 }
598 log_level_unknown = 0; 598 log_level_unknown = 0;
599 if (!*p) 599 if (!*p)
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 656476eedb1b..cceaf09ac413 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -408,54 +408,62 @@ int ptrace_request(struct task_struct *child, long request,
408 return ret; 408 return ret;
409} 409}
410 410
411#ifndef __ARCH_SYS_PTRACE 411/**
412static int ptrace_get_task_struct(long request, long pid, 412 * ptrace_traceme -- helper for PTRACE_TRACEME
413 struct task_struct **childp) 413 *
414 * Performs checks and sets PT_PTRACED.
415 * Should be used by all ptrace implementations for PTRACE_TRACEME.
416 */
417int ptrace_traceme(void)
414{ 418{
415 struct task_struct *child;
416 int ret; 419 int ret;
417 420
418 /* 421 /*
419 * Callers use child == NULL as an indication to exit early even 422 * Are we already being traced?
420 * when the return value is 0, so make sure it is non-NULL here. 423 */
424 if (current->ptrace & PT_PTRACED)
425 return -EPERM;
426 ret = security_ptrace(current->parent, current);
427 if (ret)
428 return -EPERM;
429 /*
430 * Set the ptrace bit in the process ptrace flags.
421 */ 431 */
422 *childp = NULL; 432 current->ptrace |= PT_PTRACED;
433 return 0;
434}
423 435
424 if (request == PTRACE_TRACEME) { 436/**
425 /* 437 * ptrace_get_task_struct -- grab a task struct reference for ptrace
426 * Are we already being traced? 438 * @pid: process id to grab a task_struct reference of
427 */ 439 *
428 if (current->ptrace & PT_PTRACED) 440 * This function is a helper for ptrace implementations. It checks
429 return -EPERM; 441 * permissions and then grabs a task struct for use of the actual
430 ret = security_ptrace(current->parent, current); 442 * ptrace implementation.
431 if (ret) 443 *
432 return -EPERM; 444 * Returns the task_struct for @pid or an ERR_PTR() on failure.
433 /* 445 */
434 * Set the ptrace bit in the process ptrace flags. 446struct task_struct *ptrace_get_task_struct(pid_t pid)
435 */ 447{
436 current->ptrace |= PT_PTRACED; 448 struct task_struct *child;
437 return 0;
438 }
439 449
440 /* 450 /*
441 * You may not mess with init 451 * Tracing init is not allowed.
442 */ 452 */
443 if (pid == 1) 453 if (pid == 1)
444 return -EPERM; 454 return ERR_PTR(-EPERM);
445 455
446 ret = -ESRCH;
447 read_lock(&tasklist_lock); 456 read_lock(&tasklist_lock);
448 child = find_task_by_pid(pid); 457 child = find_task_by_pid(pid);
449 if (child) 458 if (child)
450 get_task_struct(child); 459 get_task_struct(child);
451 read_unlock(&tasklist_lock); 460 read_unlock(&tasklist_lock);
452 if (!child) 461 if (!child)
453 return -ESRCH; 462 return ERR_PTR(-ESRCH);
454 463 return child;
455 *childp = child;
456 return 0;
457} 464}
458 465
466#ifndef __ARCH_SYS_PTRACE
459asmlinkage long sys_ptrace(long request, long pid, long addr, long data) 467asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
460{ 468{
461 struct task_struct *child; 469 struct task_struct *child;
@@ -465,9 +473,16 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
465 * This lock_kernel fixes a subtle race with suid exec 473 * This lock_kernel fixes a subtle race with suid exec
466 */ 474 */
467 lock_kernel(); 475 lock_kernel();
468 ret = ptrace_get_task_struct(request, pid, &child); 476 if (request == PTRACE_TRACEME) {
469 if (!child) 477 ret = ptrace_traceme();
470 goto out; 478 goto out;
479 }
480
481 child = ptrace_get_task_struct(pid);
482 if (IS_ERR(child)) {
483 ret = PTR_ERR(child);
484 goto out;
485 }
471 486
472 if (request == PTRACE_ATTACH) { 487 if (request == PTRACE_ATTACH) {
473 ret = ptrace_attach(child); 488 ret = ptrace_attach(child);
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 48d3bce465b8..30b0bba03859 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -35,6 +35,7 @@
35#include <linux/init.h> 35#include <linux/init.h>
36#include <linux/spinlock.h> 36#include <linux/spinlock.h>
37#include <linux/smp.h> 37#include <linux/smp.h>
38#include <linux/rcupdate.h>
38#include <linux/interrupt.h> 39#include <linux/interrupt.h>
39#include <linux/sched.h> 40#include <linux/sched.h>
40#include <asm/atomic.h> 41#include <asm/atomic.h>
@@ -45,7 +46,6 @@
45#include <linux/percpu.h> 46#include <linux/percpu.h>
46#include <linux/notifier.h> 47#include <linux/notifier.h>
47#include <linux/rcupdate.h> 48#include <linux/rcupdate.h>
48#include <linux/rcuref.h>
49#include <linux/cpu.h> 49#include <linux/cpu.h>
50 50
51/* Definition for rcupdate control block. */ 51/* Definition for rcupdate control block. */
@@ -61,9 +61,9 @@ struct rcu_state {
61 /* for current batch to proceed. */ 61 /* for current batch to proceed. */
62}; 62};
63 63
64static struct rcu_state rcu_state ____cacheline_maxaligned_in_smp = 64static struct rcu_state rcu_state ____cacheline_internodealigned_in_smp =
65 {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE }; 65 {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
66static struct rcu_state rcu_bh_state ____cacheline_maxaligned_in_smp = 66static struct rcu_state rcu_bh_state ____cacheline_internodealigned_in_smp =
67 {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE }; 67 {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
68 68
69DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L }; 69DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
@@ -73,19 +73,6 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
73static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; 73static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
74static int maxbatch = 10000; 74static int maxbatch = 10000;
75 75
76#ifndef __HAVE_ARCH_CMPXCHG
77/*
78 * We use an array of spinlocks for the rcurefs -- similar to ones in sparc
79 * 32 bit atomic_t implementations, and a hash function similar to that
80 * for our refcounting needs.
81 * Can't help multiprocessors which donot have cmpxchg :(
82 */
83
84spinlock_t __rcuref_hash[RCUREF_HASH_SIZE] = {
85 [0 ... (RCUREF_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED
86};
87#endif
88
89/** 76/**
90 * call_rcu - Queue an RCU callback for invocation after a grace period. 77 * call_rcu - Queue an RCU callback for invocation after a grace period.
91 * @head: structure to be used for queueing the RCU updates. 78 * @head: structure to be used for queueing the RCU updates.
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 49fbbeff201c..773219907dd8 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -39,7 +39,6 @@
39#include <linux/moduleparam.h> 39#include <linux/moduleparam.h>
40#include <linux/percpu.h> 40#include <linux/percpu.h>
41#include <linux/notifier.h> 41#include <linux/notifier.h>
42#include <linux/rcuref.h>
43#include <linux/cpu.h> 42#include <linux/cpu.h>
44#include <linux/random.h> 43#include <linux/random.h>
45#include <linux/delay.h> 44#include <linux/delay.h>
@@ -49,9 +48,11 @@
49MODULE_LICENSE("GPL"); 48MODULE_LICENSE("GPL");
50 49
51static int nreaders = -1; /* # reader threads, defaults to 4*ncpus */ 50static int nreaders = -1; /* # reader threads, defaults to 4*ncpus */
52static int stat_interval = 0; /* Interval between stats, in seconds. */ 51static int stat_interval; /* Interval between stats, in seconds. */
53 /* Defaults to "only at end of test". */ 52 /* Defaults to "only at end of test". */
54static int verbose = 0; /* Print more debug info. */ 53static int verbose; /* Print more debug info. */
54static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */
55static int shuffle_interval = 5; /* Interval between shuffles (in sec)*/
55 56
56MODULE_PARM(nreaders, "i"); 57MODULE_PARM(nreaders, "i");
57MODULE_PARM_DESC(nreaders, "Number of RCU reader threads"); 58MODULE_PARM_DESC(nreaders, "Number of RCU reader threads");
@@ -59,6 +60,10 @@ MODULE_PARM(stat_interval, "i");
59MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s"); 60MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s");
60MODULE_PARM(verbose, "i"); 61MODULE_PARM(verbose, "i");
61MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s"); 62MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s");
63MODULE_PARM(test_no_idle_hz, "i");
64MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs");
65MODULE_PARM(shuffle_interval, "i");
66MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles");
62#define TORTURE_FLAG "rcutorture: " 67#define TORTURE_FLAG "rcutorture: "
63#define PRINTK_STRING(s) \ 68#define PRINTK_STRING(s) \
64 do { printk(KERN_ALERT TORTURE_FLAG s "\n"); } while (0) 69 do { printk(KERN_ALERT TORTURE_FLAG s "\n"); } while (0)
@@ -73,6 +78,7 @@ static int nrealreaders;
73static struct task_struct *writer_task; 78static struct task_struct *writer_task;
74static struct task_struct **reader_tasks; 79static struct task_struct **reader_tasks;
75static struct task_struct *stats_task; 80static struct task_struct *stats_task;
81static struct task_struct *shuffler_task;
76 82
77#define RCU_TORTURE_PIPE_LEN 10 83#define RCU_TORTURE_PIPE_LEN 10
78 84
@@ -103,7 +109,7 @@ atomic_t n_rcu_torture_error;
103/* 109/*
104 * Allocate an element from the rcu_tortures pool. 110 * Allocate an element from the rcu_tortures pool.
105 */ 111 */
106struct rcu_torture * 112static struct rcu_torture *
107rcu_torture_alloc(void) 113rcu_torture_alloc(void)
108{ 114{
109 struct list_head *p; 115 struct list_head *p;
@@ -376,12 +382,77 @@ rcu_torture_stats(void *arg)
376 return 0; 382 return 0;
377} 383}
378 384
385static int rcu_idle_cpu; /* Force all torture tasks off this CPU */
386
387/* Shuffle tasks such that we allow @rcu_idle_cpu to become idle. A special case
388 * is when @rcu_idle_cpu = -1, when we allow the tasks to run on all CPUs.
389 */
390void rcu_torture_shuffle_tasks(void)
391{
392 cpumask_t tmp_mask = CPU_MASK_ALL;
393 int i;
394
395 lock_cpu_hotplug();
396
397 /* No point in shuffling if there is only one online CPU (ex: UP) */
398 if (num_online_cpus() == 1) {
399 unlock_cpu_hotplug();
400 return;
401 }
402
403 if (rcu_idle_cpu != -1)
404 cpu_clear(rcu_idle_cpu, tmp_mask);
405
406 set_cpus_allowed(current, tmp_mask);
407
408 if (reader_tasks != NULL) {
409 for (i = 0; i < nrealreaders; i++)
410 if (reader_tasks[i])
411 set_cpus_allowed(reader_tasks[i], tmp_mask);
412 }
413
414 if (writer_task)
415 set_cpus_allowed(writer_task, tmp_mask);
416
417 if (stats_task)
418 set_cpus_allowed(stats_task, tmp_mask);
419
420 if (rcu_idle_cpu == -1)
421 rcu_idle_cpu = num_online_cpus() - 1;
422 else
423 rcu_idle_cpu--;
424
425 unlock_cpu_hotplug();
426}
427
428/* Shuffle tasks across CPUs, with the intent of allowing each CPU in the
429 * system to become idle at a time and cut off its timer ticks. This is meant
430 * to test the support for such tickless idle CPU in RCU.
431 */
432static int
433rcu_torture_shuffle(void *arg)
434{
435 VERBOSE_PRINTK_STRING("rcu_torture_shuffle task started");
436 do {
437 schedule_timeout_interruptible(shuffle_interval * HZ);
438 rcu_torture_shuffle_tasks();
439 } while (!kthread_should_stop());
440 VERBOSE_PRINTK_STRING("rcu_torture_shuffle task stopping");
441 return 0;
442}
443
379static void 444static void
380rcu_torture_cleanup(void) 445rcu_torture_cleanup(void)
381{ 446{
382 int i; 447 int i;
383 448
384 fullstop = 1; 449 fullstop = 1;
450 if (shuffler_task != NULL) {
451 VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task");
452 kthread_stop(shuffler_task);
453 }
454 shuffler_task = NULL;
455
385 if (writer_task != NULL) { 456 if (writer_task != NULL) {
386 VERBOSE_PRINTK_STRING("Stopping rcu_torture_writer task"); 457 VERBOSE_PRINTK_STRING("Stopping rcu_torture_writer task");
387 kthread_stop(writer_task); 458 kthread_stop(writer_task);
@@ -430,9 +501,11 @@ rcu_torture_init(void)
430 nrealreaders = nreaders; 501 nrealreaders = nreaders;
431 else 502 else
432 nrealreaders = 2 * num_online_cpus(); 503 nrealreaders = 2 * num_online_cpus();
433 printk(KERN_ALERT TORTURE_FLAG 504 printk(KERN_ALERT TORTURE_FLAG "--- Start of test: nreaders=%d "
434 "--- Start of test: nreaders=%d stat_interval=%d verbose=%d\n", 505 "stat_interval=%d verbose=%d test_no_idle_hz=%d "
435 nrealreaders, stat_interval, verbose); 506 "shuffle_interval = %d\n",
507 nrealreaders, stat_interval, verbose, test_no_idle_hz,
508 shuffle_interval);
436 fullstop = 0; 509 fullstop = 0;
437 510
438 /* Set up the freelist. */ 511 /* Set up the freelist. */
@@ -502,6 +575,18 @@ rcu_torture_init(void)
502 goto unwind; 575 goto unwind;
503 } 576 }
504 } 577 }
578 if (test_no_idle_hz) {
579 rcu_idle_cpu = num_online_cpus() - 1;
580 /* Create the shuffler thread */
581 shuffler_task = kthread_run(rcu_torture_shuffle, NULL,
582 "rcu_torture_shuffle");
583 if (IS_ERR(shuffler_task)) {
584 firsterr = PTR_ERR(shuffler_task);
585 VERBOSE_PRINTK_ERRSTRING("Failed to create shuffler");
586 shuffler_task = NULL;
587 goto unwind;
588 }
589 }
505 return 0; 590 return 0;
506 591
507unwind: 592unwind:
diff --git a/kernel/sched.c b/kernel/sched.c
index 6f46c94cc29e..92733091154c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -176,6 +176,13 @@ static unsigned int task_timeslice(task_t *p)
176#define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \ 176#define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \
177 < (long long) (sd)->cache_hot_time) 177 < (long long) (sd)->cache_hot_time)
178 178
179void __put_task_struct_cb(struct rcu_head *rhp)
180{
181 __put_task_struct(container_of(rhp, struct task_struct, rcu));
182}
183
184EXPORT_SYMBOL_GPL(__put_task_struct_cb);
185
179/* 186/*
180 * These are the runqueue data structures: 187 * These are the runqueue data structures:
181 */ 188 */
diff --git a/kernel/signal.c b/kernel/signal.c
index d7611f189ef7..08aa5b263f36 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -329,13 +329,20 @@ void __exit_sighand(struct task_struct *tsk)
329 /* Ok, we're done with the signal handlers */ 329 /* Ok, we're done with the signal handlers */
330 tsk->sighand = NULL; 330 tsk->sighand = NULL;
331 if (atomic_dec_and_test(&sighand->count)) 331 if (atomic_dec_and_test(&sighand->count))
332 kmem_cache_free(sighand_cachep, sighand); 332 sighand_free(sighand);
333} 333}
334 334
335void exit_sighand(struct task_struct *tsk) 335void exit_sighand(struct task_struct *tsk)
336{ 336{
337 write_lock_irq(&tasklist_lock); 337 write_lock_irq(&tasklist_lock);
338 __exit_sighand(tsk); 338 rcu_read_lock();
339 if (tsk->sighand != NULL) {
340 struct sighand_struct *sighand = rcu_dereference(tsk->sighand);
341 spin_lock(&sighand->siglock);
342 __exit_sighand(tsk);
343 spin_unlock(&sighand->siglock);
344 }
345 rcu_read_unlock();
339 write_unlock_irq(&tasklist_lock); 346 write_unlock_irq(&tasklist_lock);
340} 347}
341 348
@@ -345,19 +352,20 @@ void exit_sighand(struct task_struct *tsk)
345void __exit_signal(struct task_struct *tsk) 352void __exit_signal(struct task_struct *tsk)
346{ 353{
347 struct signal_struct * sig = tsk->signal; 354 struct signal_struct * sig = tsk->signal;
348 struct sighand_struct * sighand = tsk->sighand; 355 struct sighand_struct * sighand;
349 356
350 if (!sig) 357 if (!sig)
351 BUG(); 358 BUG();
352 if (!atomic_read(&sig->count)) 359 if (!atomic_read(&sig->count))
353 BUG(); 360 BUG();
361 rcu_read_lock();
362 sighand = rcu_dereference(tsk->sighand);
354 spin_lock(&sighand->siglock); 363 spin_lock(&sighand->siglock);
355 posix_cpu_timers_exit(tsk); 364 posix_cpu_timers_exit(tsk);
356 if (atomic_dec_and_test(&sig->count)) { 365 if (atomic_dec_and_test(&sig->count)) {
357 posix_cpu_timers_exit_group(tsk); 366 posix_cpu_timers_exit_group(tsk);
358 if (tsk == sig->curr_target)
359 sig->curr_target = next_thread(tsk);
360 tsk->signal = NULL; 367 tsk->signal = NULL;
368 __exit_sighand(tsk);
361 spin_unlock(&sighand->siglock); 369 spin_unlock(&sighand->siglock);
362 flush_sigqueue(&sig->shared_pending); 370 flush_sigqueue(&sig->shared_pending);
363 } else { 371 } else {
@@ -389,9 +397,11 @@ void __exit_signal(struct task_struct *tsk)
389 sig->nvcsw += tsk->nvcsw; 397 sig->nvcsw += tsk->nvcsw;
390 sig->nivcsw += tsk->nivcsw; 398 sig->nivcsw += tsk->nivcsw;
391 sig->sched_time += tsk->sched_time; 399 sig->sched_time += tsk->sched_time;
400 __exit_sighand(tsk);
392 spin_unlock(&sighand->siglock); 401 spin_unlock(&sighand->siglock);
393 sig = NULL; /* Marker for below. */ 402 sig = NULL; /* Marker for below. */
394 } 403 }
404 rcu_read_unlock();
395 clear_tsk_thread_flag(tsk,TIF_SIGPENDING); 405 clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
396 flush_sigqueue(&tsk->pending); 406 flush_sigqueue(&tsk->pending);
397 if (sig) { 407 if (sig) {
@@ -613,6 +623,33 @@ void signal_wake_up(struct task_struct *t, int resume)
613 * Returns 1 if any signals were found. 623 * Returns 1 if any signals were found.
614 * 624 *
615 * All callers must be holding the siglock. 625 * All callers must be holding the siglock.
626 *
627 * This version takes a sigset mask and looks at all signals,
628 * not just those in the first mask word.
629 */
630static int rm_from_queue_full(sigset_t *mask, struct sigpending *s)
631{
632 struct sigqueue *q, *n;
633 sigset_t m;
634
635 sigandsets(&m, mask, &s->signal);
636 if (sigisemptyset(&m))
637 return 0;
638
639 signandsets(&s->signal, &s->signal, mask);
640 list_for_each_entry_safe(q, n, &s->list, list) {
641 if (sigismember(mask, q->info.si_signo)) {
642 list_del_init(&q->list);
643 __sigqueue_free(q);
644 }
645 }
646 return 1;
647}
648/*
649 * Remove signals in mask from the pending set and queue.
650 * Returns 1 if any signals were found.
651 *
652 * All callers must be holding the siglock.
616 */ 653 */
617static int rm_from_queue(unsigned long mask, struct sigpending *s) 654static int rm_from_queue(unsigned long mask, struct sigpending *s)
618{ 655{
@@ -1080,18 +1117,29 @@ void zap_other_threads(struct task_struct *p)
1080} 1117}
1081 1118
1082/* 1119/*
1083 * Must be called with the tasklist_lock held for reading! 1120 * Must be called under rcu_read_lock() or with tasklist_lock read-held.
1084 */ 1121 */
1085int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) 1122int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
1086{ 1123{
1087 unsigned long flags; 1124 unsigned long flags;
1125 struct sighand_struct *sp;
1088 int ret; 1126 int ret;
1089 1127
1128retry:
1090 ret = check_kill_permission(sig, info, p); 1129 ret = check_kill_permission(sig, info, p);
1091 if (!ret && sig && p->sighand) { 1130 if (!ret && sig && (sp = rcu_dereference(p->sighand))) {
1092 spin_lock_irqsave(&p->sighand->siglock, flags); 1131 spin_lock_irqsave(&sp->siglock, flags);
1132 if (p->sighand != sp) {
1133 spin_unlock_irqrestore(&sp->siglock, flags);
1134 goto retry;
1135 }
1136 if ((atomic_read(&sp->count) == 0) ||
1137 (atomic_read(&p->usage) == 0)) {
1138 spin_unlock_irqrestore(&sp->siglock, flags);
1139 return -ESRCH;
1140 }
1093 ret = __group_send_sig_info(sig, info, p); 1141 ret = __group_send_sig_info(sig, info, p);
1094 spin_unlock_irqrestore(&p->sighand->siglock, flags); 1142 spin_unlock_irqrestore(&sp->siglock, flags);
1095 } 1143 }
1096 1144
1097 return ret; 1145 return ret;
@@ -1136,14 +1184,21 @@ int
1136kill_proc_info(int sig, struct siginfo *info, pid_t pid) 1184kill_proc_info(int sig, struct siginfo *info, pid_t pid)
1137{ 1185{
1138 int error; 1186 int error;
1187 int acquired_tasklist_lock = 0;
1139 struct task_struct *p; 1188 struct task_struct *p;
1140 1189
1141 read_lock(&tasklist_lock); 1190 rcu_read_lock();
1191 if (unlikely(sig_kernel_stop(sig) || sig == SIGCONT)) {
1192 read_lock(&tasklist_lock);
1193 acquired_tasklist_lock = 1;
1194 }
1142 p = find_task_by_pid(pid); 1195 p = find_task_by_pid(pid);
1143 error = -ESRCH; 1196 error = -ESRCH;
1144 if (p) 1197 if (p)
1145 error = group_send_sig_info(sig, info, p); 1198 error = group_send_sig_info(sig, info, p);
1146 read_unlock(&tasklist_lock); 1199 if (unlikely(acquired_tasklist_lock))
1200 read_unlock(&tasklist_lock);
1201 rcu_read_unlock();
1147 return error; 1202 return error;
1148} 1203}
1149 1204
@@ -1163,8 +1218,7 @@ int kill_proc_info_as_uid(int sig, struct siginfo *info, pid_t pid,
1163 ret = -ESRCH; 1218 ret = -ESRCH;
1164 goto out_unlock; 1219 goto out_unlock;
1165 } 1220 }
1166 if ((!info || ((unsigned long)info != 1 && 1221 if ((info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info)))
1167 (unsigned long)info != 2 && SI_FROMUSER(info)))
1168 && (euid != p->suid) && (euid != p->uid) 1222 && (euid != p->suid) && (euid != p->uid)
1169 && (uid != p->suid) && (uid != p->uid)) { 1223 && (uid != p->suid) && (uid != p->uid)) {
1170 ret = -EPERM; 1224 ret = -EPERM;
@@ -1355,16 +1409,54 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
1355{ 1409{
1356 unsigned long flags; 1410 unsigned long flags;
1357 int ret = 0; 1411 int ret = 0;
1412 struct sighand_struct *sh;
1358 1413
1359 BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); 1414 BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
1360 read_lock(&tasklist_lock); 1415
1416 /*
1417 * The rcu based delayed sighand destroy makes it possible to
1418 * run this without tasklist lock held. The task struct itself
1419 * cannot go away as create_timer did get_task_struct().
1420 *
1421 * We return -1, when the task is marked exiting, so
1422 * posix_timer_event can redirect it to the group leader
1423 */
1424 rcu_read_lock();
1361 1425
1362 if (unlikely(p->flags & PF_EXITING)) { 1426 if (unlikely(p->flags & PF_EXITING)) {
1363 ret = -1; 1427 ret = -1;
1364 goto out_err; 1428 goto out_err;
1365 } 1429 }
1366 1430
1367 spin_lock_irqsave(&p->sighand->siglock, flags); 1431retry:
1432 sh = rcu_dereference(p->sighand);
1433
1434 spin_lock_irqsave(&sh->siglock, flags);
1435 if (p->sighand != sh) {
1436 /* We raced with exec() in a multithreaded process... */
1437 spin_unlock_irqrestore(&sh->siglock, flags);
1438 goto retry;
1439 }
1440
1441 /*
1442 * We do the check here again to handle the following scenario:
1443 *
1444 * CPU 0 CPU 1
1445 * send_sigqueue
1446 * check PF_EXITING
1447 * interrupt exit code running
1448 * __exit_signal
1449 * lock sighand->siglock
1450 * unlock sighand->siglock
1451 * lock sh->siglock
1452 * add(tsk->pending) flush_sigqueue(tsk->pending)
1453 *
1454 */
1455
1456 if (unlikely(p->flags & PF_EXITING)) {
1457 ret = -1;
1458 goto out;
1459 }
1368 1460
1369 if (unlikely(!list_empty(&q->list))) { 1461 if (unlikely(!list_empty(&q->list))) {
1370 /* 1462 /*
@@ -1388,9 +1480,9 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
1388 signal_wake_up(p, sig == SIGKILL); 1480 signal_wake_up(p, sig == SIGKILL);
1389 1481
1390out: 1482out:
1391 spin_unlock_irqrestore(&p->sighand->siglock, flags); 1483 spin_unlock_irqrestore(&sh->siglock, flags);
1392out_err: 1484out_err:
1393 read_unlock(&tasklist_lock); 1485 rcu_read_unlock();
1394 1486
1395 return ret; 1487 return ret;
1396} 1488}
@@ -1402,7 +1494,9 @@ send_group_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
1402 int ret = 0; 1494 int ret = 0;
1403 1495
1404 BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); 1496 BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
1497
1405 read_lock(&tasklist_lock); 1498 read_lock(&tasklist_lock);
1499 /* Since it_lock is held, p->sighand cannot be NULL. */
1406 spin_lock_irqsave(&p->sighand->siglock, flags); 1500 spin_lock_irqsave(&p->sighand->siglock, flags);
1407 handle_stop_signal(sig, p); 1501 handle_stop_signal(sig, p);
1408 1502
@@ -1436,7 +1530,7 @@ send_group_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
1436out: 1530out:
1437 spin_unlock_irqrestore(&p->sighand->siglock, flags); 1531 spin_unlock_irqrestore(&p->sighand->siglock, flags);
1438 read_unlock(&tasklist_lock); 1532 read_unlock(&tasklist_lock);
1439 return(ret); 1533 return ret;
1440} 1534}
1441 1535
1442/* 1536/*
@@ -2338,6 +2432,7 @@ int
2338do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact) 2432do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
2339{ 2433{
2340 struct k_sigaction *k; 2434 struct k_sigaction *k;
2435 sigset_t mask;
2341 2436
2342 if (!valid_signal(sig) || sig < 1 || (act && sig_kernel_only(sig))) 2437 if (!valid_signal(sig) || sig < 1 || (act && sig_kernel_only(sig)))
2343 return -EINVAL; 2438 return -EINVAL;
@@ -2385,9 +2480,11 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
2385 *k = *act; 2480 *k = *act;
2386 sigdelsetmask(&k->sa.sa_mask, 2481 sigdelsetmask(&k->sa.sa_mask,
2387 sigmask(SIGKILL) | sigmask(SIGSTOP)); 2482 sigmask(SIGKILL) | sigmask(SIGSTOP));
2388 rm_from_queue(sigmask(sig), &t->signal->shared_pending); 2483 sigemptyset(&mask);
2484 sigaddset(&mask, sig);
2485 rm_from_queue_full(&mask, &t->signal->shared_pending);
2389 do { 2486 do {
2390 rm_from_queue(sigmask(sig), &t->pending); 2487 rm_from_queue_full(&mask, &t->pending);
2391 recalc_sigpending_tsk(t); 2488 recalc_sigpending_tsk(t);
2392 t = next_thread(t); 2489 t = next_thread(t);
2393 } while (t != current); 2490 } while (t != current);
diff --git a/kernel/sys.c b/kernel/sys.c
index eecf84526afe..b6941e06d5d5 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -489,6 +489,12 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user
489 magic2 != LINUX_REBOOT_MAGIC2C)) 489 magic2 != LINUX_REBOOT_MAGIC2C))
490 return -EINVAL; 490 return -EINVAL;
491 491
492 /* Instead of trying to make the power_off code look like
493 * halt when pm_power_off is not set do it the easy way.
494 */
495 if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
496 cmd = LINUX_REBOOT_CMD_HALT;
497
492 lock_kernel(); 498 lock_kernel();
493 switch (cmd) { 499 switch (cmd) {
494 case LINUX_REBOOT_CMD_RESTART: 500 case LINUX_REBOOT_CMD_RESTART:
@@ -1084,10 +1090,11 @@ asmlinkage long sys_times(struct tms __user * tbuf)
1084asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) 1090asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
1085{ 1091{
1086 struct task_struct *p; 1092 struct task_struct *p;
1093 struct task_struct *group_leader = current->group_leader;
1087 int err = -EINVAL; 1094 int err = -EINVAL;
1088 1095
1089 if (!pid) 1096 if (!pid)
1090 pid = current->pid; 1097 pid = group_leader->pid;
1091 if (!pgid) 1098 if (!pgid)
1092 pgid = pid; 1099 pgid = pid;
1093 if (pgid < 0) 1100 if (pgid < 0)
@@ -1107,16 +1114,16 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
1107 if (!thread_group_leader(p)) 1114 if (!thread_group_leader(p))
1108 goto out; 1115 goto out;
1109 1116
1110 if (p->parent == current || p->real_parent == current) { 1117 if (p->real_parent == group_leader) {
1111 err = -EPERM; 1118 err = -EPERM;
1112 if (p->signal->session != current->signal->session) 1119 if (p->signal->session != group_leader->signal->session)
1113 goto out; 1120 goto out;
1114 err = -EACCES; 1121 err = -EACCES;
1115 if (p->did_exec) 1122 if (p->did_exec)
1116 goto out; 1123 goto out;
1117 } else { 1124 } else {
1118 err = -ESRCH; 1125 err = -ESRCH;
1119 if (p != current) 1126 if (p != group_leader)
1120 goto out; 1127 goto out;
1121 } 1128 }
1122 1129
@@ -1128,7 +1135,7 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
1128 struct task_struct *p; 1135 struct task_struct *p;
1129 1136
1130 do_each_task_pid(pgid, PIDTYPE_PGID, p) { 1137 do_each_task_pid(pgid, PIDTYPE_PGID, p) {
1131 if (p->signal->session == current->signal->session) 1138 if (p->signal->session == group_leader->signal->session)
1132 goto ok_pgid; 1139 goto ok_pgid;
1133 } while_each_task_pid(pgid, PIDTYPE_PGID, p); 1140 } while_each_task_pid(pgid, PIDTYPE_PGID, p);
1134 goto out; 1141 goto out;
@@ -1208,24 +1215,22 @@ asmlinkage long sys_getsid(pid_t pid)
1208 1215
1209asmlinkage long sys_setsid(void) 1216asmlinkage long sys_setsid(void)
1210{ 1217{
1218 struct task_struct *group_leader = current->group_leader;
1211 struct pid *pid; 1219 struct pid *pid;
1212 int err = -EPERM; 1220 int err = -EPERM;
1213 1221
1214 if (!thread_group_leader(current))
1215 return -EINVAL;
1216
1217 down(&tty_sem); 1222 down(&tty_sem);
1218 write_lock_irq(&tasklist_lock); 1223 write_lock_irq(&tasklist_lock);
1219 1224
1220 pid = find_pid(PIDTYPE_PGID, current->pid); 1225 pid = find_pid(PIDTYPE_PGID, group_leader->pid);
1221 if (pid) 1226 if (pid)
1222 goto out; 1227 goto out;
1223 1228
1224 current->signal->leader = 1; 1229 group_leader->signal->leader = 1;
1225 __set_special_pids(current->pid, current->pid); 1230 __set_special_pids(group_leader->pid, group_leader->pid);
1226 current->signal->tty = NULL; 1231 group_leader->signal->tty = NULL;
1227 current->signal->tty_old_pgrp = 0; 1232 group_leader->signal->tty_old_pgrp = 0;
1228 err = process_group(current); 1233 err = process_group(group_leader);
1229out: 1234out:
1230 write_unlock_irq(&tasklist_lock); 1235 write_unlock_irq(&tasklist_lock);
1231 up(&tty_sem); 1236 up(&tty_sem);
@@ -1687,7 +1692,10 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1687 if (unlikely(!p->signal)) 1692 if (unlikely(!p->signal))
1688 return; 1693 return;
1689 1694
1695 utime = stime = cputime_zero;
1696
1690 switch (who) { 1697 switch (who) {
1698 case RUSAGE_BOTH:
1691 case RUSAGE_CHILDREN: 1699 case RUSAGE_CHILDREN:
1692 spin_lock_irqsave(&p->sighand->siglock, flags); 1700 spin_lock_irqsave(&p->sighand->siglock, flags);
1693 utime = p->signal->cutime; 1701 utime = p->signal->cutime;
@@ -1697,22 +1705,11 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1697 r->ru_minflt = p->signal->cmin_flt; 1705 r->ru_minflt = p->signal->cmin_flt;
1698 r->ru_majflt = p->signal->cmaj_flt; 1706 r->ru_majflt = p->signal->cmaj_flt;
1699 spin_unlock_irqrestore(&p->sighand->siglock, flags); 1707 spin_unlock_irqrestore(&p->sighand->siglock, flags);
1700 cputime_to_timeval(utime, &r->ru_utime); 1708
1701 cputime_to_timeval(stime, &r->ru_stime); 1709 if (who == RUSAGE_CHILDREN)
1702 break; 1710 break;
1711
1703 case RUSAGE_SELF: 1712 case RUSAGE_SELF:
1704 spin_lock_irqsave(&p->sighand->siglock, flags);
1705 utime = stime = cputime_zero;
1706 goto sum_group;
1707 case RUSAGE_BOTH:
1708 spin_lock_irqsave(&p->sighand->siglock, flags);
1709 utime = p->signal->cutime;
1710 stime = p->signal->cstime;
1711 r->ru_nvcsw = p->signal->cnvcsw;
1712 r->ru_nivcsw = p->signal->cnivcsw;
1713 r->ru_minflt = p->signal->cmin_flt;
1714 r->ru_majflt = p->signal->cmaj_flt;
1715 sum_group:
1716 utime = cputime_add(utime, p->signal->utime); 1713 utime = cputime_add(utime, p->signal->utime);
1717 stime = cputime_add(stime, p->signal->stime); 1714 stime = cputime_add(stime, p->signal->stime);
1718 r->ru_nvcsw += p->signal->nvcsw; 1715 r->ru_nvcsw += p->signal->nvcsw;
@@ -1729,13 +1726,14 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1729 r->ru_majflt += t->maj_flt; 1726 r->ru_majflt += t->maj_flt;
1730 t = next_thread(t); 1727 t = next_thread(t);
1731 } while (t != p); 1728 } while (t != p);
1732 spin_unlock_irqrestore(&p->sighand->siglock, flags);
1733 cputime_to_timeval(utime, &r->ru_utime);
1734 cputime_to_timeval(stime, &r->ru_stime);
1735 break; 1729 break;
1730
1736 default: 1731 default:
1737 BUG(); 1732 BUG();
1738 } 1733 }
1734
1735 cputime_to_timeval(utime, &r->ru_utime);
1736 cputime_to_timeval(stime, &r->ru_stime);
1739} 1737}
1740 1738
1741int getrusage(struct task_struct *p, int who, struct rusage __user *ru) 1739int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 1ab2370e2efa..bd3b9bfcfcec 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -82,6 +82,28 @@ cond_syscall(compat_sys_socketcall);
82cond_syscall(sys_inotify_init); 82cond_syscall(sys_inotify_init);
83cond_syscall(sys_inotify_add_watch); 83cond_syscall(sys_inotify_add_watch);
84cond_syscall(sys_inotify_rm_watch); 84cond_syscall(sys_inotify_rm_watch);
85cond_syscall(sys_migrate_pages);
86cond_syscall(sys_chown16);
87cond_syscall(sys_fchown16);
88cond_syscall(sys_getegid16);
89cond_syscall(sys_geteuid16);
90cond_syscall(sys_getgid16);
91cond_syscall(sys_getgroups16);
92cond_syscall(sys_getresgid16);
93cond_syscall(sys_getresuid16);
94cond_syscall(sys_getuid16);
95cond_syscall(sys_lchown16);
96cond_syscall(sys_setfsgid16);
97cond_syscall(sys_setfsuid16);
98cond_syscall(sys_setgid16);
99cond_syscall(sys_setgroups16);
100cond_syscall(sys_setregid16);
101cond_syscall(sys_setresgid16);
102cond_syscall(sys_setresuid16);
103cond_syscall(sys_setreuid16);
104cond_syscall(sys_setuid16);
105cond_syscall(sys_vm86old);
106cond_syscall(sys_vm86);
85 107
86/* arch-specific weak syscall entries */ 108/* arch-specific weak syscall entries */
87cond_syscall(sys_pciconfig_read); 109cond_syscall(sys_pciconfig_read);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index a85047bb5739..03b0598f2369 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -68,6 +68,8 @@ extern int min_free_kbytes;
68extern int printk_ratelimit_jiffies; 68extern int printk_ratelimit_jiffies;
69extern int printk_ratelimit_burst; 69extern int printk_ratelimit_burst;
70extern int pid_max_min, pid_max_max; 70extern int pid_max_min, pid_max_max;
71extern int sysctl_drop_caches;
72extern int percpu_pagelist_fraction;
71 73
72#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) 74#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
73int unknown_nmi_panic; 75int unknown_nmi_panic;
@@ -78,6 +80,7 @@ extern int proc_unknown_nmi_panic(ctl_table *, int, struct file *,
78/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ 80/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
79static int maxolduid = 65535; 81static int maxolduid = 65535;
80static int minolduid; 82static int minolduid;
83static int min_percpu_pagelist_fract = 8;
81 84
82static int ngroups_max = NGROUPS_MAX; 85static int ngroups_max = NGROUPS_MAX;
83 86
@@ -775,6 +778,15 @@ static ctl_table vm_table[] = {
775 .strategy = &sysctl_intvec, 778 .strategy = &sysctl_intvec,
776 }, 779 },
777 { 780 {
781 .ctl_name = VM_DROP_PAGECACHE,
782 .procname = "drop_caches",
783 .data = &sysctl_drop_caches,
784 .maxlen = sizeof(int),
785 .mode = 0644,
786 .proc_handler = drop_caches_sysctl_handler,
787 .strategy = &sysctl_intvec,
788 },
789 {
778 .ctl_name = VM_MIN_FREE_KBYTES, 790 .ctl_name = VM_MIN_FREE_KBYTES,
779 .procname = "min_free_kbytes", 791 .procname = "min_free_kbytes",
780 .data = &min_free_kbytes, 792 .data = &min_free_kbytes,
@@ -784,6 +796,16 @@ static ctl_table vm_table[] = {
784 .strategy = &sysctl_intvec, 796 .strategy = &sysctl_intvec,
785 .extra1 = &zero, 797 .extra1 = &zero,
786 }, 798 },
799 {
800 .ctl_name = VM_PERCPU_PAGELIST_FRACTION,
801 .procname = "percpu_pagelist_fraction",
802 .data = &percpu_pagelist_fraction,
803 .maxlen = sizeof(percpu_pagelist_fraction),
804 .mode = 0644,
805 .proc_handler = &percpu_pagelist_fraction_sysctl_handler,
806 .strategy = &sysctl_intvec,
807 .extra1 = &min_percpu_pagelist_fract,
808 },
787#ifdef CONFIG_MMU 809#ifdef CONFIG_MMU
788 { 810 {
789 .ctl_name = VM_MAX_MAP_COUNT, 811 .ctl_name = VM_MAX_MAP_COUNT,
diff --git a/kernel/timer.c b/kernel/timer.c
index fd74268d8663..074b4bd5cfd8 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -33,6 +33,7 @@
33#include <linux/posix-timers.h> 33#include <linux/posix-timers.h>
34#include <linux/cpu.h> 34#include <linux/cpu.h>
35#include <linux/syscalls.h> 35#include <linux/syscalls.h>
36#include <linux/delay.h>
36 37
37#include <asm/uaccess.h> 38#include <asm/uaccess.h>
38#include <asm/unistd.h> 39#include <asm/unistd.h>
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2bd5aee1c736..82c4fa70595c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -29,7 +29,8 @@
29#include <linux/kthread.h> 29#include <linux/kthread.h>
30 30
31/* 31/*
32 * The per-CPU workqueue (if single thread, we always use cpu 0's). 32 * The per-CPU workqueue (if single thread, we always use the first
33 * possible cpu).
33 * 34 *
34 * The sequence counters are for flush_scheduled_work(). It wants to wait 35 * The sequence counters are for flush_scheduled_work(). It wants to wait
35 * until until all currently-scheduled works are completed, but it doesn't 36 * until until all currently-scheduled works are completed, but it doesn't
@@ -69,6 +70,8 @@ struct workqueue_struct {
69static DEFINE_SPINLOCK(workqueue_lock); 70static DEFINE_SPINLOCK(workqueue_lock);
70static LIST_HEAD(workqueues); 71static LIST_HEAD(workqueues);
71 72
73static int singlethread_cpu;
74
72/* If it's single threaded, it isn't in the list of workqueues. */ 75/* If it's single threaded, it isn't in the list of workqueues. */
73static inline int is_single_threaded(struct workqueue_struct *wq) 76static inline int is_single_threaded(struct workqueue_struct *wq)
74{ 77{
@@ -102,7 +105,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work)
102 105
103 if (!test_and_set_bit(0, &work->pending)) { 106 if (!test_and_set_bit(0, &work->pending)) {
104 if (unlikely(is_single_threaded(wq))) 107 if (unlikely(is_single_threaded(wq)))
105 cpu = any_online_cpu(cpu_online_map); 108 cpu = singlethread_cpu;
106 BUG_ON(!list_empty(&work->entry)); 109 BUG_ON(!list_empty(&work->entry));
107 __queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work); 110 __queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
108 ret = 1; 111 ret = 1;
@@ -118,7 +121,7 @@ static void delayed_work_timer_fn(unsigned long __data)
118 int cpu = smp_processor_id(); 121 int cpu = smp_processor_id();
119 122
120 if (unlikely(is_single_threaded(wq))) 123 if (unlikely(is_single_threaded(wq)))
121 cpu = any_online_cpu(cpu_online_map); 124 cpu = singlethread_cpu;
122 125
123 __queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work); 126 __queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
124} 127}
@@ -267,7 +270,7 @@ void fastcall flush_workqueue(struct workqueue_struct *wq)
267 270
268 if (is_single_threaded(wq)) { 271 if (is_single_threaded(wq)) {
269 /* Always use first cpu's area. */ 272 /* Always use first cpu's area. */
270 flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, any_online_cpu(cpu_online_map))); 273 flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, singlethread_cpu));
271 } else { 274 } else {
272 int cpu; 275 int cpu;
273 276
@@ -315,12 +318,17 @@ struct workqueue_struct *__create_workqueue(const char *name,
315 return NULL; 318 return NULL;
316 319
317 wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct); 320 wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);
321 if (!wq->cpu_wq) {
322 kfree(wq);
323 return NULL;
324 }
325
318 wq->name = name; 326 wq->name = name;
319 /* We don't need the distraction of CPUs appearing and vanishing. */ 327 /* We don't need the distraction of CPUs appearing and vanishing. */
320 lock_cpu_hotplug(); 328 lock_cpu_hotplug();
321 if (singlethread) { 329 if (singlethread) {
322 INIT_LIST_HEAD(&wq->list); 330 INIT_LIST_HEAD(&wq->list);
323 p = create_workqueue_thread(wq, any_online_cpu(cpu_online_map)); 331 p = create_workqueue_thread(wq, singlethread_cpu);
324 if (!p) 332 if (!p)
325 destroy = 1; 333 destroy = 1;
326 else 334 else
@@ -374,7 +382,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
374 /* We don't need the distraction of CPUs appearing and vanishing. */ 382 /* We don't need the distraction of CPUs appearing and vanishing. */
375 lock_cpu_hotplug(); 383 lock_cpu_hotplug();
376 if (is_single_threaded(wq)) 384 if (is_single_threaded(wq))
377 cleanup_workqueue_thread(wq, any_online_cpu(cpu_online_map)); 385 cleanup_workqueue_thread(wq, singlethread_cpu);
378 else { 386 else {
379 for_each_online_cpu(cpu) 387 for_each_online_cpu(cpu)
380 cleanup_workqueue_thread(wq, cpu); 388 cleanup_workqueue_thread(wq, cpu);
@@ -419,6 +427,25 @@ int schedule_delayed_work_on(int cpu,
419 return ret; 427 return ret;
420} 428}
421 429
430int schedule_on_each_cpu(void (*func) (void *info), void *info)
431{
432 int cpu;
433 struct work_struct *work;
434
435 work = kmalloc(NR_CPUS * sizeof(struct work_struct), GFP_KERNEL);
436
437 if (!work)
438 return -ENOMEM;
439 for_each_online_cpu(cpu) {
440 INIT_WORK(work + cpu, func, info);
441 __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu),
442 work + cpu);
443 }
444 flush_workqueue(keventd_wq);
445 kfree(work);
446 return 0;
447}
448
422void flush_scheduled_work(void) 449void flush_scheduled_work(void)
423{ 450{
424 flush_workqueue(keventd_wq); 451 flush_workqueue(keventd_wq);
@@ -543,6 +570,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
543 570
544void init_workqueues(void) 571void init_workqueues(void)
545{ 572{
573 singlethread_cpu = first_cpu(cpu_possible_map);
546 hotcpu_notifier(workqueue_cpu_callback, 0); 574 hotcpu_notifier(workqueue_cpu_callback, 0);
547 keventd_wq = create_workqueue("events"); 575 keventd_wq = create_workqueue("events");
548 BUG_ON(!keventd_wq); 576 BUG_ON(!keventd_wq);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 80598cfd728c..c48260fb8fd9 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -79,7 +79,7 @@ config SCHEDSTATS
79 79
80config DEBUG_SLAB 80config DEBUG_SLAB
81 bool "Debug memory allocations" 81 bool "Debug memory allocations"
82 depends on DEBUG_KERNEL 82 depends on DEBUG_KERNEL && SLAB
83 help 83 help
84 Say Y here to have the kernel do limited verification on memory 84 Say Y here to have the kernel do limited verification on memory
85 allocation as well as poisoning memory on free to catch use of freed 85 allocation as well as poisoning memory on free to catch use of freed
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 23d3b1147fe9..48e708381d44 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -519,7 +519,7 @@ EXPORT_SYMBOL(bitmap_parselist);
519 * 519 *
520 * Map the bit at position @pos in @buf (of length @bits) to the 520 * Map the bit at position @pos in @buf (of length @bits) to the
521 * ordinal of which set bit it is. If it is not set or if @pos 521 * ordinal of which set bit it is. If it is not set or if @pos
522 * is not a valid bit position, map to zero (0). 522 * is not a valid bit position, map to -1.
523 * 523 *
524 * If for example, just bits 4 through 7 are set in @buf, then @pos 524 * If for example, just bits 4 through 7 are set in @buf, then @pos
525 * values 4 through 7 will get mapped to 0 through 3, respectively, 525 * values 4 through 7 will get mapped to 0 through 3, respectively,
@@ -531,18 +531,19 @@ EXPORT_SYMBOL(bitmap_parselist);
531 */ 531 */
532static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits) 532static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits)
533{ 533{
534 int ord = 0; 534 int i, ord;
535 535
536 if (pos >= 0 && pos < bits) { 536 if (pos < 0 || pos >= bits || !test_bit(pos, buf))
537 int i; 537 return -1;
538 538
539 for (i = find_first_bit(buf, bits); 539 i = find_first_bit(buf, bits);
540 i < pos; 540 ord = 0;
541 i = find_next_bit(buf, bits, i + 1)) 541 while (i < pos) {
542 ord++; 542 i = find_next_bit(buf, bits, i + 1);
543 if (i > pos) 543 ord++;
544 ord = 0;
545 } 544 }
545 BUG_ON(i != pos);
546
546 return ord; 547 return ord;
547} 548}
548 549
@@ -553,11 +554,12 @@ static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits)
553 * @bits: number of valid bit positions in @buf 554 * @bits: number of valid bit positions in @buf
554 * 555 *
555 * Map the ordinal offset of bit @ord in @buf to its position in @buf. 556 * Map the ordinal offset of bit @ord in @buf to its position in @buf.
556 * If @ord is not the ordinal offset of a set bit in @buf, map to zero (0). 557 * Value of @ord should be in range 0 <= @ord < weight(buf), else
558 * results are undefined.
557 * 559 *
558 * If for example, just bits 4 through 7 are set in @buf, then @ord 560 * If for example, just bits 4 through 7 are set in @buf, then @ord
559 * values 0 through 3 will get mapped to 4 through 7, respectively, 561 * values 0 through 3 will get mapped to 4 through 7, respectively,
560 * and all other @ord valuds will get mapped to 0. When @ord value 3 562 * and all other @ord values return undefined values. When @ord value 3
561 * gets mapped to (returns) @pos value 7 in this example, that means 563 * gets mapped to (returns) @pos value 7 in this example, that means
562 * that the 3rd set bit (starting with 0th) is at position 7 in @buf. 564 * that the 3rd set bit (starting with 0th) is at position 7 in @buf.
563 * 565 *
@@ -583,8 +585,8 @@ static int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits)
583 585
584/** 586/**
585 * bitmap_remap - Apply map defined by a pair of bitmaps to another bitmap 587 * bitmap_remap - Apply map defined by a pair of bitmaps to another bitmap
586 * @src: subset to be remapped
587 * @dst: remapped result 588 * @dst: remapped result
589 * @src: subset to be remapped
588 * @old: defines domain of map 590 * @old: defines domain of map
589 * @new: defines range of map 591 * @new: defines range of map
590 * @bits: number of bits in each of these bitmaps 592 * @bits: number of bits in each of these bitmaps
@@ -596,49 +598,42 @@ static int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits)
596 * weight of @old, map the position of the n-th set bit in @old to 598 * weight of @old, map the position of the n-th set bit in @old to
597 * the position of the m-th set bit in @new, where m == n % w. 599 * the position of the m-th set bit in @new, where m == n % w.
598 * 600 *
599 * If either of the @old and @new bitmaps are empty, or if@src and @dst 601 * If either of the @old and @new bitmaps are empty, or if @src and
600 * point to the same location, then this routine does nothing. 602 * @dst point to the same location, then this routine copies @src
603 * to @dst.
601 * 604 *
602 * The positions of unset bits in @old are mapped to the position of 605 * The positions of unset bits in @old are mapped to themselves
603 * the first set bit in @new. 606 * (the identify map).
604 * 607 *
605 * Apply the above specified mapping to @src, placing the result in 608 * Apply the above specified mapping to @src, placing the result in
606 * @dst, clearing any bits previously set in @dst. 609 * @dst, clearing any bits previously set in @dst.
607 * 610 *
608 * The resulting value of @dst will have either the same weight as
609 * @src, or less weight in the general case that the mapping wasn't
610 * injective due to the weight of @new being less than that of @old.
611 * The resulting value of @dst will never have greater weight than
612 * that of @src, except perhaps in the case that one of the above
613 * conditions was not met and this routine just returned.
614 *
615 * For example, lets say that @old has bits 4 through 7 set, and 611 * For example, lets say that @old has bits 4 through 7 set, and
616 * @new has bits 12 through 15 set. This defines the mapping of bit 612 * @new has bits 12 through 15 set. This defines the mapping of bit
617 * position 4 to 12, 5 to 13, 6 to 14 and 7 to 15, and of all other 613 * position 4 to 12, 5 to 13, 6 to 14 and 7 to 15, and of all other
618 * bit positions to 12 (the first set bit in @new. So if say @src 614 * bit positions unchanged. So if say @src comes into this routine
619 * comes into this routine with bits 1, 5 and 7 set, then @dst should 615 * with bits 1, 5 and 7 set, then @dst should leave with bits 1,
620 * leave with bits 12, 13 and 15 set. 616 * 13 and 15 set.
621 */ 617 */
622void bitmap_remap(unsigned long *dst, const unsigned long *src, 618void bitmap_remap(unsigned long *dst, const unsigned long *src,
623 const unsigned long *old, const unsigned long *new, 619 const unsigned long *old, const unsigned long *new,
624 int bits) 620 int bits)
625{ 621{
626 int s; 622 int oldbit, w;
627 623
628 if (bitmap_weight(old, bits) == 0)
629 return;
630 if (bitmap_weight(new, bits) == 0)
631 return;
632 if (dst == src) /* following doesn't handle inplace remaps */ 624 if (dst == src) /* following doesn't handle inplace remaps */
633 return; 625 return;
634
635 bitmap_zero(dst, bits); 626 bitmap_zero(dst, bits);
636 for (s = find_first_bit(src, bits); 627
637 s < bits; 628 w = bitmap_weight(new, bits);
638 s = find_next_bit(src, bits, s + 1)) { 629 for (oldbit = find_first_bit(src, bits);
639 int x = bitmap_pos_to_ord(old, s, bits); 630 oldbit < bits;
640 int y = bitmap_ord_to_pos(new, x, bits); 631 oldbit = find_next_bit(src, bits, oldbit + 1)) {
641 set_bit(y, dst); 632 int n = bitmap_pos_to_ord(old, oldbit, bits);
633 if (n < 0 || w == 0)
634 set_bit(oldbit, dst); /* identity map */
635 else
636 set_bit(bitmap_ord_to_pos(new, n % w, bits), dst);
642 } 637 }
643} 638}
644EXPORT_SYMBOL(bitmap_remap); 639EXPORT_SYMBOL(bitmap_remap);
@@ -657,8 +652,8 @@ EXPORT_SYMBOL(bitmap_remap);
657 * weight of @old, map the position of the n-th set bit in @old to 652 * weight of @old, map the position of the n-th set bit in @old to
658 * the position of the m-th set bit in @new, where m == n % w. 653 * the position of the m-th set bit in @new, where m == n % w.
659 * 654 *
660 * The positions of unset bits in @old are mapped to the position of 655 * The positions of unset bits in @old are mapped to themselves
661 * the first set bit in @new. 656 * (the identify map).
662 * 657 *
663 * Apply the above specified mapping to bit position @oldbit, returning 658 * Apply the above specified mapping to bit position @oldbit, returning
664 * the new bit position. 659 * the new bit position.
@@ -666,14 +661,18 @@ EXPORT_SYMBOL(bitmap_remap);
666 * For example, lets say that @old has bits 4 through 7 set, and 661 * For example, lets say that @old has bits 4 through 7 set, and
667 * @new has bits 12 through 15 set. This defines the mapping of bit 662 * @new has bits 12 through 15 set. This defines the mapping of bit
668 * position 4 to 12, 5 to 13, 6 to 14 and 7 to 15, and of all other 663 * position 4 to 12, 5 to 13, 6 to 14 and 7 to 15, and of all other
669 * bit positions to 12 (the first set bit in @new. So if say @oldbit 664 * bit positions unchanged. So if say @oldbit is 5, then this routine
670 * is 5, then this routine returns 13. 665 * returns 13.
671 */ 666 */
672int bitmap_bitremap(int oldbit, const unsigned long *old, 667int bitmap_bitremap(int oldbit, const unsigned long *old,
673 const unsigned long *new, int bits) 668 const unsigned long *new, int bits)
674{ 669{
675 int x = bitmap_pos_to_ord(old, oldbit, bits); 670 int w = bitmap_weight(new, bits);
676 return bitmap_ord_to_pos(new, x, bits); 671 int n = bitmap_pos_to_ord(old, oldbit, bits);
672 if (n < 0 || w == 0)
673 return oldbit;
674 else
675 return bitmap_ord_to_pos(new, n % w, bits);
677} 676}
678EXPORT_SYMBOL(bitmap_bitremap); 677EXPORT_SYMBOL(bitmap_bitremap);
679 678
diff --git a/lib/dec_and_lock.c b/lib/dec_and_lock.c
index 305a9663aee3..a65c31455541 100644
--- a/lib/dec_and_lock.c
+++ b/lib/dec_and_lock.c
@@ -1,47 +1,11 @@
1#include <linux/module.h> 1#include <linux/module.h>
2#include <linux/spinlock.h> 2#include <linux/spinlock.h>
3#include <asm/atomic.h> 3#include <asm/atomic.h>
4#include <asm/system.h>
5 4
6#ifdef __HAVE_ARCH_CMPXCHG
7/* 5/*
8 * This is an implementation of the notion of "decrement a 6 * This is an implementation of the notion of "decrement a
9 * reference count, and return locked if it decremented to zero". 7 * reference count, and return locked if it decremented to zero".
10 * 8 *
11 * This implementation can be used on any architecture that
12 * has a cmpxchg, and where atomic->value is an int holding
13 * the value of the atomic (i.e. the high bits aren't used
14 * for a lock or anything like that).
15 */
16int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
17{
18 int counter;
19 int newcount;
20
21 for (;;) {
22 counter = atomic_read(atomic);
23 newcount = counter - 1;
24 if (!newcount)
25 break; /* do it the slow way */
26
27 newcount = cmpxchg(&atomic->counter, counter, newcount);
28 if (newcount == counter)
29 return 0;
30 }
31
32 spin_lock(lock);
33 if (atomic_dec_and_test(atomic))
34 return 1;
35 spin_unlock(lock);
36 return 0;
37}
38#else
39/*
40 * This is an architecture-neutral, but slow,
41 * implementation of the notion of "decrement
42 * a reference count, and return locked if it
43 * decremented to zero".
44 *
45 * NOTE NOTE NOTE! This is _not_ equivalent to 9 * NOTE NOTE NOTE! This is _not_ equivalent to
46 * 10 *
47 * if (atomic_dec_and_test(&atomic)) { 11 * if (atomic_dec_and_test(&atomic)) {
@@ -52,21 +16,20 @@ int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
52 * 16 *
53 * because the spin-lock and the decrement must be 17 * because the spin-lock and the decrement must be
54 * "atomic". 18 * "atomic".
55 *
56 * This slow version gets the spinlock unconditionally,
57 * and releases it if it isn't needed. Architectures
58 * are encouraged to come up with better approaches,
59 * this is trivially done efficiently using a load-locked
60 * store-conditional approach, for example.
61 */ 19 */
62int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) 20int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
63{ 21{
22#ifdef CONFIG_SMP
23 /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
24 if (atomic_add_unless(atomic, -1, 1))
25 return 0;
26#endif
27 /* Otherwise do it the slow way */
64 spin_lock(lock); 28 spin_lock(lock);
65 if (atomic_dec_and_test(atomic)) 29 if (atomic_dec_and_test(atomic))
66 return 1; 30 return 1;
67 spin_unlock(lock); 31 spin_unlock(lock);
68 return 0; 32 return 0;
69} 33}
70#endif
71 34
72EXPORT_SYMBOL(_atomic_dec_and_lock); 35EXPORT_SYMBOL(_atomic_dec_and_lock);
diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c
index d08302d2a42c..c05b4b19cf6c 100644
--- a/lib/find_next_bit.c
+++ b/lib/find_next_bit.c
@@ -10,6 +10,7 @@
10 */ 10 */
11 11
12#include <linux/bitops.h> 12#include <linux/bitops.h>
13#include <linux/module.h>
13 14
14int find_next_bit(const unsigned long *addr, int size, int offset) 15int find_next_bit(const unsigned long *addr, int size, int offset)
15{ 16{
@@ -53,3 +54,5 @@ int find_next_bit(const unsigned long *addr, int size, int offset)
53 54
54 return offset; 55 return offset;
55} 56}
57
58EXPORT_SYMBOL(find_next_bit);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 88511c3805ad..c0bd4a914803 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -137,18 +137,31 @@ out:
137 137
138static inline void tag_set(struct radix_tree_node *node, int tag, int offset) 138static inline void tag_set(struct radix_tree_node *node, int tag, int offset)
139{ 139{
140 if (!test_bit(offset, &node->tags[tag][0])) 140 __set_bit(offset, node->tags[tag]);
141 __set_bit(offset, &node->tags[tag][0]);
142} 141}
143 142
144static inline void tag_clear(struct radix_tree_node *node, int tag, int offset) 143static inline void tag_clear(struct radix_tree_node *node, int tag, int offset)
145{ 144{
146 __clear_bit(offset, &node->tags[tag][0]); 145 __clear_bit(offset, node->tags[tag]);
147} 146}
148 147
149static inline int tag_get(struct radix_tree_node *node, int tag, int offset) 148static inline int tag_get(struct radix_tree_node *node, int tag, int offset)
150{ 149{
151 return test_bit(offset, &node->tags[tag][0]); 150 return test_bit(offset, node->tags[tag]);
151}
152
153/*
154 * Returns 1 if any slot in the node has this tag set.
155 * Otherwise returns 0.
156 */
157static inline int any_tag_set(struct radix_tree_node *node, int tag)
158{
159 int idx;
160 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
161 if (node->tags[tag][idx])
162 return 1;
163 }
164 return 0;
152} 165}
153 166
154/* 167/*
@@ -185,15 +198,9 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
185 * into the newly-pushed top-level node(s) 198 * into the newly-pushed top-level node(s)
186 */ 199 */
187 for (tag = 0; tag < RADIX_TREE_TAGS; tag++) { 200 for (tag = 0; tag < RADIX_TREE_TAGS; tag++) {
188 int idx;
189
190 tags[tag] = 0; 201 tags[tag] = 0;
191 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 202 if (any_tag_set(root->rnode, tag))
192 if (root->rnode->tags[tag][idx]) { 203 tags[tag] = 1;
193 tags[tag] = 1;
194 break;
195 }
196 }
197 } 204 }
198 205
199 do { 206 do {
@@ -246,7 +253,7 @@ int radix_tree_insert(struct radix_tree_root *root,
246 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 253 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
247 254
248 offset = 0; /* uninitialised var warning */ 255 offset = 0; /* uninitialised var warning */
249 while (height > 0) { 256 do {
250 if (slot == NULL) { 257 if (slot == NULL) {
251 /* Have to add a child node. */ 258 /* Have to add a child node. */
252 if (!(slot = radix_tree_node_alloc(root))) 259 if (!(slot = radix_tree_node_alloc(root)))
@@ -264,18 +271,16 @@ int radix_tree_insert(struct radix_tree_root *root,
264 slot = node->slots[offset]; 271 slot = node->slots[offset];
265 shift -= RADIX_TREE_MAP_SHIFT; 272 shift -= RADIX_TREE_MAP_SHIFT;
266 height--; 273 height--;
267 } 274 } while (height > 0);
268 275
269 if (slot != NULL) 276 if (slot != NULL)
270 return -EEXIST; 277 return -EEXIST;
271 278
272 if (node) { 279 BUG_ON(!node);
273 node->count++; 280 node->count++;
274 node->slots[offset] = item; 281 node->slots[offset] = item;
275 BUG_ON(tag_get(node, 0, offset)); 282 BUG_ON(tag_get(node, 0, offset));
276 BUG_ON(tag_get(node, 1, offset)); 283 BUG_ON(tag_get(node, 1, offset));
277 } else
278 root->rnode = item;
279 284
280 return 0; 285 return 0;
281} 286}
@@ -367,7 +372,8 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
367 int offset; 372 int offset;
368 373
369 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 374 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
370 tag_set(slot, tag, offset); 375 if (!tag_get(slot, tag, offset))
376 tag_set(slot, tag, offset);
371 slot = slot->slots[offset]; 377 slot = slot->slots[offset];
372 BUG_ON(slot == NULL); 378 BUG_ON(slot == NULL);
373 shift -= RADIX_TREE_MAP_SHIFT; 379 shift -= RADIX_TREE_MAP_SHIFT;
@@ -427,13 +433,11 @@ void *radix_tree_tag_clear(struct radix_tree_root *root,
427 goto out; 433 goto out;
428 434
429 do { 435 do {
430 int idx; 436 if (!tag_get(pathp->node, tag, pathp->offset))
431 437 goto out;
432 tag_clear(pathp->node, tag, pathp->offset); 438 tag_clear(pathp->node, tag, pathp->offset);
433 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 439 if (any_tag_set(pathp->node, tag))
434 if (pathp->node->tags[tag][idx]) 440 goto out;
435 goto out;
436 }
437 pathp--; 441 pathp--;
438 } while (pathp->node); 442 } while (pathp->node);
439out: 443out:
@@ -674,6 +678,29 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
674EXPORT_SYMBOL(radix_tree_gang_lookup_tag); 678EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
675 679
676/** 680/**
681 * radix_tree_shrink - shrink height of a radix tree to minimal
682 * @root radix tree root
683 */
684static inline void radix_tree_shrink(struct radix_tree_root *root)
685{
686 /* try to shrink tree height */
687 while (root->height > 1 &&
688 root->rnode->count == 1 &&
689 root->rnode->slots[0]) {
690 struct radix_tree_node *to_free = root->rnode;
691
692 root->rnode = to_free->slots[0];
693 root->height--;
694 /* must only free zeroed nodes into the slab */
695 tag_clear(to_free, 0, 0);
696 tag_clear(to_free, 1, 0);
697 to_free->slots[0] = NULL;
698 to_free->count = 0;
699 radix_tree_node_free(to_free);
700 }
701}
702
703/**
677 * radix_tree_delete - delete an item from a radix tree 704 * radix_tree_delete - delete an item from a radix tree
678 * @root: radix tree root 705 * @root: radix tree root
679 * @index: index key 706 * @index: index key
@@ -691,6 +718,8 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
691 void *ret = NULL; 718 void *ret = NULL;
692 char tags[RADIX_TREE_TAGS]; 719 char tags[RADIX_TREE_TAGS];
693 int nr_cleared_tags; 720 int nr_cleared_tags;
721 int tag;
722 int offset;
694 723
695 height = root->height; 724 height = root->height;
696 if (index > radix_tree_maxindex(height)) 725 if (index > radix_tree_maxindex(height))
@@ -701,16 +730,14 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
701 slot = root->rnode; 730 slot = root->rnode;
702 731
703 for ( ; height > 0; height--) { 732 for ( ; height > 0; height--) {
704 int offset;
705
706 if (slot == NULL) 733 if (slot == NULL)
707 goto out; 734 goto out;
708 735
736 pathp++;
709 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 737 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
710 pathp[1].offset = offset; 738 pathp->offset = offset;
711 pathp[1].node = slot; 739 pathp->node = slot;
712 slot = slot->slots[offset]; 740 slot = slot->slots[offset];
713 pathp++;
714 shift -= RADIX_TREE_MAP_SHIFT; 741 shift -= RADIX_TREE_MAP_SHIFT;
715 } 742 }
716 743
@@ -723,35 +750,39 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
723 /* 750 /*
724 * Clear all tags associated with the just-deleted item 751 * Clear all tags associated with the just-deleted item
725 */ 752 */
726 memset(tags, 0, sizeof(tags)); 753 nr_cleared_tags = 0;
727 do { 754 for (tag = 0; tag < RADIX_TREE_TAGS; tag++) {
728 int tag; 755 if (tag_get(pathp->node, tag, pathp->offset)) {
756 tag_clear(pathp->node, tag, pathp->offset);
757 tags[tag] = 0;
758 nr_cleared_tags++;
759 } else
760 tags[tag] = 1;
761 }
729 762
730 nr_cleared_tags = RADIX_TREE_TAGS; 763 for (pathp--; nr_cleared_tags && pathp->node; pathp--) {
731 for (tag = 0; tag < RADIX_TREE_TAGS; tag++) { 764 for (tag = 0; tag < RADIX_TREE_TAGS; tag++) {
732 int idx;
733
734 if (tags[tag]) 765 if (tags[tag])
735 continue; 766 continue;
736 767
737 tag_clear(pathp->node, tag, pathp->offset); 768 tag_clear(pathp->node, tag, pathp->offset);
738 769 if (any_tag_set(pathp->node, tag)) {
739 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 770 tags[tag] = 1;
740 if (pathp->node->tags[tag][idx]) { 771 nr_cleared_tags--;
741 tags[tag] = 1;
742 nr_cleared_tags--;
743 break;
744 }
745 } 772 }
746 } 773 }
747 pathp--; 774 }
748 } while (pathp->node && nr_cleared_tags);
749 775
750 /* Now free the nodes we do not need anymore */ 776 /* Now free the nodes we do not need anymore */
751 for (pathp = orig_pathp; pathp->node; pathp--) { 777 for (pathp = orig_pathp; pathp->node; pathp--) {
752 pathp->node->slots[pathp->offset] = NULL; 778 pathp->node->slots[pathp->offset] = NULL;
753 if (--pathp->node->count) 779 pathp->node->count--;
780
781 if (pathp->node->count) {
782 if (pathp->node == root->rnode)
783 radix_tree_shrink(root);
754 goto out; 784 goto out;
785 }
755 786
756 /* Node with zero slots in use so free it */ 787 /* Node with zero slots in use so free it */
757 radix_tree_node_free(pathp->node); 788 radix_tree_node_free(pathp->node);
@@ -770,15 +801,11 @@ EXPORT_SYMBOL(radix_tree_delete);
770 */ 801 */
771int radix_tree_tagged(struct radix_tree_root *root, int tag) 802int radix_tree_tagged(struct radix_tree_root *root, int tag)
772{ 803{
773 int idx; 804 struct radix_tree_node *rnode;
774 805 rnode = root->rnode;
775 if (!root->rnode) 806 if (!rnode)
776 return 0; 807 return 0;
777 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 808 return any_tag_set(rnode, tag);
778 if (root->rnode->tags[tag][idx])
779 return 1;
780 }
781 return 0;
782} 809}
783EXPORT_SYMBOL(radix_tree_tagged); 810EXPORT_SYMBOL(radix_tree_tagged);
784 811
diff --git a/mm/Kconfig b/mm/Kconfig
index b3db11f137e0..a9cb80ae6409 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -132,3 +132,10 @@ config SPLIT_PTLOCK_CPUS
132 default "4096" if ARM && !CPU_CACHE_VIPT 132 default "4096" if ARM && !CPU_CACHE_VIPT
133 default "4096" if PARISC && !PA20 133 default "4096" if PARISC && !PA20
134 default "4" 134 default "4"
135
136#
137# support for page migration
138#
139config MIGRATION
140 def_bool y if NUMA || SPARSEMEM || DISCONTIGMEM
141 depends on SWAP
diff --git a/mm/Makefile b/mm/Makefile
index 2fa6d2ca9f28..9aa03fa1dcc3 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -9,8 +9,8 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \
9 9
10obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ 10obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
11 page_alloc.o page-writeback.o pdflush.o \ 11 page_alloc.o page-writeback.o pdflush.o \
12 readahead.o slab.o swap.o truncate.o vmscan.o \ 12 readahead.o swap.o truncate.o vmscan.o \
13 prio_tree.o $(mmu-y) 13 prio_tree.o util.o $(mmu-y)
14 14
15obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o 15obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o
16obj-$(CONFIG_HUGETLBFS) += hugetlb.o 16obj-$(CONFIG_HUGETLBFS) += hugetlb.o
@@ -18,5 +18,7 @@ obj-$(CONFIG_NUMA) += mempolicy.o
18obj-$(CONFIG_SPARSEMEM) += sparse.o 18obj-$(CONFIG_SPARSEMEM) += sparse.o
19obj-$(CONFIG_SHMEM) += shmem.o 19obj-$(CONFIG_SHMEM) += shmem.o
20obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o 20obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o
21obj-$(CONFIG_SLOB) += slob.o
22obj-$(CONFIG_SLAB) += slab.o
21obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o 23obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
22obj-$(CONFIG_FS_XIP) += filemap_xip.o 24obj-$(CONFIG_FS_XIP) += filemap_xip.o
diff --git a/mm/fadvise.c b/mm/fadvise.c
index 5f19e87bc5af..d257c89e7704 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -37,6 +37,11 @@ asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
37 if (!file) 37 if (!file)
38 return -EBADF; 38 return -EBADF;
39 39
40 if (S_ISFIFO(file->f_dentry->d_inode->i_mode)) {
41 ret = -ESPIPE;
42 goto out;
43 }
44
40 mapping = file->f_mapping; 45 mapping = file->f_mapping;
41 if (!mapping || len < 0) { 46 if (!mapping || len < 0) {
42 ret = -EINVAL; 47 ret = -EINVAL;
diff --git a/mm/filemap.c b/mm/filemap.c
index 4ef24a397684..478f4c74cc31 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -280,7 +280,7 @@ static int wait_on_page_writeback_range(struct address_space *mapping,
280 * it is otherwise livelockable. 280 * it is otherwise livelockable.
281 */ 281 */
282int sync_page_range(struct inode *inode, struct address_space *mapping, 282int sync_page_range(struct inode *inode, struct address_space *mapping,
283 loff_t pos, size_t count) 283 loff_t pos, loff_t count)
284{ 284{
285 pgoff_t start = pos >> PAGE_CACHE_SHIFT; 285 pgoff_t start = pos >> PAGE_CACHE_SHIFT;
286 pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT; 286 pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
@@ -305,9 +305,8 @@ EXPORT_SYMBOL(sync_page_range);
305 * as it forces O_SYNC writers to different parts of the same file 305 * as it forces O_SYNC writers to different parts of the same file
306 * to be serialised right until io completion. 306 * to be serialised right until io completion.
307 */ 307 */
308static int sync_page_range_nolock(struct inode *inode, 308int sync_page_range_nolock(struct inode *inode, struct address_space *mapping,
309 struct address_space *mapping, 309 loff_t pos, loff_t count)
310 loff_t pos, size_t count)
311{ 310{
312 pgoff_t start = pos >> PAGE_CACHE_SHIFT; 311 pgoff_t start = pos >> PAGE_CACHE_SHIFT;
313 pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT; 312 pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
@@ -322,6 +321,7 @@ static int sync_page_range_nolock(struct inode *inode,
322 ret = wait_on_page_writeback_range(mapping, start, end); 321 ret = wait_on_page_writeback_range(mapping, start, end);
323 return ret; 322 return ret;
324} 323}
324EXPORT_SYMBOL(sync_page_range_nolock);
325 325
326/** 326/**
327 * filemap_fdatawait - walk the list of under-writeback pages of the given 327 * filemap_fdatawait - walk the list of under-writeback pages of the given
@@ -343,30 +343,44 @@ EXPORT_SYMBOL(filemap_fdatawait);
343 343
344int filemap_write_and_wait(struct address_space *mapping) 344int filemap_write_and_wait(struct address_space *mapping)
345{ 345{
346 int retval = 0; 346 int err = 0;
347 347
348 if (mapping->nrpages) { 348 if (mapping->nrpages) {
349 retval = filemap_fdatawrite(mapping); 349 err = filemap_fdatawrite(mapping);
350 if (retval == 0) 350 /*
351 retval = filemap_fdatawait(mapping); 351 * Even if the above returned error, the pages may be
352 * written partially (e.g. -ENOSPC), so we wait for it.
353 * But the -EIO is special case, it may indicate the worst
354 * thing (e.g. bug) happened, so we avoid waiting for it.
355 */
356 if (err != -EIO) {
357 int err2 = filemap_fdatawait(mapping);
358 if (!err)
359 err = err2;
360 }
352 } 361 }
353 return retval; 362 return err;
354} 363}
364EXPORT_SYMBOL(filemap_write_and_wait);
355 365
356int filemap_write_and_wait_range(struct address_space *mapping, 366int filemap_write_and_wait_range(struct address_space *mapping,
357 loff_t lstart, loff_t lend) 367 loff_t lstart, loff_t lend)
358{ 368{
359 int retval = 0; 369 int err = 0;
360 370
361 if (mapping->nrpages) { 371 if (mapping->nrpages) {
362 retval = __filemap_fdatawrite_range(mapping, lstart, lend, 372 err = __filemap_fdatawrite_range(mapping, lstart, lend,
363 WB_SYNC_ALL); 373 WB_SYNC_ALL);
364 if (retval == 0) 374 /* See comment of filemap_write_and_wait() */
365 retval = wait_on_page_writeback_range(mapping, 375 if (err != -EIO) {
366 lstart >> PAGE_CACHE_SHIFT, 376 int err2 = wait_on_page_writeback_range(mapping,
367 lend >> PAGE_CACHE_SHIFT); 377 lstart >> PAGE_CACHE_SHIFT,
378 lend >> PAGE_CACHE_SHIFT);
379 if (!err)
380 err = err2;
381 }
368 } 382 }
369 return retval; 383 return err;
370} 384}
371 385
372/* 386/*
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f4c43d7980ba..b21d78c941b5 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -12,6 +12,7 @@
12#include <linux/nodemask.h> 12#include <linux/nodemask.h>
13#include <linux/pagemap.h> 13#include <linux/pagemap.h>
14#include <linux/mempolicy.h> 14#include <linux/mempolicy.h>
15#include <linux/cpuset.h>
15 16
16#include <asm/page.h> 17#include <asm/page.h>
17#include <asm/pgtable.h> 18#include <asm/pgtable.h>
@@ -48,7 +49,8 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
48 49
49 for (z = zonelist->zones; *z; z++) { 50 for (z = zonelist->zones; *z; z++) {
50 nid = (*z)->zone_pgdat->node_id; 51 nid = (*z)->zone_pgdat->node_id;
51 if (!list_empty(&hugepage_freelists[nid])) 52 if (cpuset_zone_allowed(*z, GFP_HIGHUSER) &&
53 !list_empty(&hugepage_freelists[nid]))
52 break; 54 break;
53 } 55 }
54 56
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 0f1d2b8a952b..1850d0aef4ac 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -83,9 +83,18 @@
83#include <linux/init.h> 83#include <linux/init.h>
84#include <linux/compat.h> 84#include <linux/compat.h>
85#include <linux/mempolicy.h> 85#include <linux/mempolicy.h>
86#include <linux/swap.h>
87#include <linux/seq_file.h>
88#include <linux/proc_fs.h>
89
86#include <asm/tlbflush.h> 90#include <asm/tlbflush.h>
87#include <asm/uaccess.h> 91#include <asm/uaccess.h>
88 92
93/* Internal flags */
94#define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0) /* Skip checks for continuous vmas */
95#define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */
96#define MPOL_MF_STATS (MPOL_MF_INTERNAL << 2) /* Gather statistics */
97
89static kmem_cache_t *policy_cache; 98static kmem_cache_t *policy_cache;
90static kmem_cache_t *sn_cache; 99static kmem_cache_t *sn_cache;
91 100
@@ -171,12 +180,19 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes)
171 break; 180 break;
172 } 181 }
173 policy->policy = mode; 182 policy->policy = mode;
183 policy->cpuset_mems_allowed = cpuset_mems_allowed(current);
174 return policy; 184 return policy;
175} 185}
176 186
177/* Ensure all existing pages follow the policy. */ 187static void gather_stats(struct page *, void *);
188static void migrate_page_add(struct vm_area_struct *vma,
189 struct page *page, struct list_head *pagelist, unsigned long flags);
190
191/* Scan through pages checking if pages follow certain conditions. */
178static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, 192static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
179 unsigned long addr, unsigned long end, nodemask_t *nodes) 193 unsigned long addr, unsigned long end,
194 const nodemask_t *nodes, unsigned long flags,
195 void *private)
180{ 196{
181 pte_t *orig_pte; 197 pte_t *orig_pte;
182 pte_t *pte; 198 pte_t *pte;
@@ -193,7 +209,17 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
193 if (!page) 209 if (!page)
194 continue; 210 continue;
195 nid = page_to_nid(page); 211 nid = page_to_nid(page);
196 if (!node_isset(nid, *nodes)) 212 if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))
213 continue;
214
215 if (flags & MPOL_MF_STATS)
216 gather_stats(page, private);
217 else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
218 spin_unlock(ptl);
219 migrate_page_add(vma, page, private, flags);
220 spin_lock(ptl);
221 }
222 else
197 break; 223 break;
198 } while (pte++, addr += PAGE_SIZE, addr != end); 224 } while (pte++, addr += PAGE_SIZE, addr != end);
199 pte_unmap_unlock(orig_pte, ptl); 225 pte_unmap_unlock(orig_pte, ptl);
@@ -201,7 +227,9 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
201} 227}
202 228
203static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud, 229static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud,
204 unsigned long addr, unsigned long end, nodemask_t *nodes) 230 unsigned long addr, unsigned long end,
231 const nodemask_t *nodes, unsigned long flags,
232 void *private)
205{ 233{
206 pmd_t *pmd; 234 pmd_t *pmd;
207 unsigned long next; 235 unsigned long next;
@@ -211,14 +239,17 @@ static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud,
211 next = pmd_addr_end(addr, end); 239 next = pmd_addr_end(addr, end);
212 if (pmd_none_or_clear_bad(pmd)) 240 if (pmd_none_or_clear_bad(pmd))
213 continue; 241 continue;
214 if (check_pte_range(vma, pmd, addr, next, nodes)) 242 if (check_pte_range(vma, pmd, addr, next, nodes,
243 flags, private))
215 return -EIO; 244 return -EIO;
216 } while (pmd++, addr = next, addr != end); 245 } while (pmd++, addr = next, addr != end);
217 return 0; 246 return 0;
218} 247}
219 248
220static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd, 249static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
221 unsigned long addr, unsigned long end, nodemask_t *nodes) 250 unsigned long addr, unsigned long end,
251 const nodemask_t *nodes, unsigned long flags,
252 void *private)
222{ 253{
223 pud_t *pud; 254 pud_t *pud;
224 unsigned long next; 255 unsigned long next;
@@ -228,14 +259,17 @@ static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
228 next = pud_addr_end(addr, end); 259 next = pud_addr_end(addr, end);
229 if (pud_none_or_clear_bad(pud)) 260 if (pud_none_or_clear_bad(pud))
230 continue; 261 continue;
231 if (check_pmd_range(vma, pud, addr, next, nodes)) 262 if (check_pmd_range(vma, pud, addr, next, nodes,
263 flags, private))
232 return -EIO; 264 return -EIO;
233 } while (pud++, addr = next, addr != end); 265 } while (pud++, addr = next, addr != end);
234 return 0; 266 return 0;
235} 267}
236 268
237static inline int check_pgd_range(struct vm_area_struct *vma, 269static inline int check_pgd_range(struct vm_area_struct *vma,
238 unsigned long addr, unsigned long end, nodemask_t *nodes) 270 unsigned long addr, unsigned long end,
271 const nodemask_t *nodes, unsigned long flags,
272 void *private)
239{ 273{
240 pgd_t *pgd; 274 pgd_t *pgd;
241 unsigned long next; 275 unsigned long next;
@@ -245,16 +279,30 @@ static inline int check_pgd_range(struct vm_area_struct *vma,
245 next = pgd_addr_end(addr, end); 279 next = pgd_addr_end(addr, end);
246 if (pgd_none_or_clear_bad(pgd)) 280 if (pgd_none_or_clear_bad(pgd))
247 continue; 281 continue;
248 if (check_pud_range(vma, pgd, addr, next, nodes)) 282 if (check_pud_range(vma, pgd, addr, next, nodes,
283 flags, private))
249 return -EIO; 284 return -EIO;
250 } while (pgd++, addr = next, addr != end); 285 } while (pgd++, addr = next, addr != end);
251 return 0; 286 return 0;
252} 287}
253 288
254/* Step 1: check the range */ 289/* Check if a vma is migratable */
290static inline int vma_migratable(struct vm_area_struct *vma)
291{
292 if (vma->vm_flags & (
293 VM_LOCKED|VM_IO|VM_HUGETLB|VM_PFNMAP))
294 return 0;
295 return 1;
296}
297
298/*
299 * Check if all pages in a range are on a set of nodes.
300 * If pagelist != NULL then isolate pages from the LRU and
301 * put them on the pagelist.
302 */
255static struct vm_area_struct * 303static struct vm_area_struct *
256check_range(struct mm_struct *mm, unsigned long start, unsigned long end, 304check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
257 nodemask_t *nodes, unsigned long flags) 305 const nodemask_t *nodes, unsigned long flags, void *private)
258{ 306{
259 int err; 307 int err;
260 struct vm_area_struct *first, *vma, *prev; 308 struct vm_area_struct *first, *vma, *prev;
@@ -264,17 +312,24 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
264 return ERR_PTR(-EFAULT); 312 return ERR_PTR(-EFAULT);
265 prev = NULL; 313 prev = NULL;
266 for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) { 314 for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) {
267 if (!vma->vm_next && vma->vm_end < end) 315 if (!(flags & MPOL_MF_DISCONTIG_OK)) {
268 return ERR_PTR(-EFAULT); 316 if (!vma->vm_next && vma->vm_end < end)
269 if (prev && prev->vm_end < vma->vm_start) 317 return ERR_PTR(-EFAULT);
270 return ERR_PTR(-EFAULT); 318 if (prev && prev->vm_end < vma->vm_start)
271 if ((flags & MPOL_MF_STRICT) && !is_vm_hugetlb_page(vma)) { 319 return ERR_PTR(-EFAULT);
320 }
321 if (!is_vm_hugetlb_page(vma) &&
322 ((flags & MPOL_MF_STRICT) ||
323 ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) &&
324 vma_migratable(vma)))) {
272 unsigned long endvma = vma->vm_end; 325 unsigned long endvma = vma->vm_end;
326
273 if (endvma > end) 327 if (endvma > end)
274 endvma = end; 328 endvma = end;
275 if (vma->vm_start > start) 329 if (vma->vm_start > start)
276 start = vma->vm_start; 330 start = vma->vm_start;
277 err = check_pgd_range(vma, start, endvma, nodes); 331 err = check_pgd_range(vma, start, endvma, nodes,
332 flags, private);
278 if (err) { 333 if (err) {
279 first = ERR_PTR(err); 334 first = ERR_PTR(err);
280 break; 335 break;
@@ -333,51 +388,10 @@ static int contextualize_policy(int mode, nodemask_t *nodes)
333 if (!nodes) 388 if (!nodes)
334 return 0; 389 return 0;
335 390
336 /* Update current mems_allowed */ 391 cpuset_update_task_memory_state();
337 cpuset_update_current_mems_allowed(); 392 if (!cpuset_nodes_subset_current_mems_allowed(*nodes))
338 /* Ignore nodes not set in current->mems_allowed */
339 cpuset_restrict_to_mems_allowed(nodes->bits);
340 return mpol_check_policy(mode, nodes);
341}
342
343long do_mbind(unsigned long start, unsigned long len,
344 unsigned long mode, nodemask_t *nmask, unsigned long flags)
345{
346 struct vm_area_struct *vma;
347 struct mm_struct *mm = current->mm;
348 struct mempolicy *new;
349 unsigned long end;
350 int err;
351
352 if ((flags & ~(unsigned long)(MPOL_MF_STRICT)) || mode > MPOL_MAX)
353 return -EINVAL;
354 if (start & ~PAGE_MASK)
355 return -EINVAL;
356 if (mode == MPOL_DEFAULT)
357 flags &= ~MPOL_MF_STRICT;
358 len = (len + PAGE_SIZE - 1) & PAGE_MASK;
359 end = start + len;
360 if (end < start)
361 return -EINVAL; 393 return -EINVAL;
362 if (end == start) 394 return mpol_check_policy(mode, nodes);
363 return 0;
364 if (mpol_check_policy(mode, nmask))
365 return -EINVAL;
366 new = mpol_new(mode, nmask);
367 if (IS_ERR(new))
368 return PTR_ERR(new);
369
370 PDprintk("mbind %lx-%lx mode:%ld nodes:%lx\n",start,start+len,
371 mode,nodes_addr(nodes)[0]);
372
373 down_write(&mm->mmap_sem);
374 vma = check_range(mm, start, end, nmask, flags);
375 err = PTR_ERR(vma);
376 if (!IS_ERR(vma))
377 err = mbind_range(vma, start, end, new);
378 up_write(&mm->mmap_sem);
379 mpol_free(new);
380 return err;
381} 395}
382 396
383/* Set the process memory policy */ 397/* Set the process memory policy */
@@ -448,7 +462,7 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask,
448 struct vm_area_struct *vma = NULL; 462 struct vm_area_struct *vma = NULL;
449 struct mempolicy *pol = current->mempolicy; 463 struct mempolicy *pol = current->mempolicy;
450 464
451 cpuset_update_current_mems_allowed(); 465 cpuset_update_task_memory_state();
452 if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR)) 466 if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR))
453 return -EINVAL; 467 return -EINVAL;
454 if (flags & MPOL_F_ADDR) { 468 if (flags & MPOL_F_ADDR) {
@@ -500,11 +514,177 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask,
500} 514}
501 515
502/* 516/*
517 * page migration
518 */
519
520/* Check if we are the only process mapping the page in question */
521static inline int single_mm_mapping(struct mm_struct *mm,
522 struct address_space *mapping)
523{
524 struct vm_area_struct *vma;
525 struct prio_tree_iter iter;
526 int rc = 1;
527
528 spin_lock(&mapping->i_mmap_lock);
529 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX)
530 if (mm != vma->vm_mm) {
531 rc = 0;
532 goto out;
533 }
534 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
535 if (mm != vma->vm_mm) {
536 rc = 0;
537 goto out;
538 }
539out:
540 spin_unlock(&mapping->i_mmap_lock);
541 return rc;
542}
543
544/*
545 * Add a page to be migrated to the pagelist
546 */
547static void migrate_page_add(struct vm_area_struct *vma,
548 struct page *page, struct list_head *pagelist, unsigned long flags)
549{
550 /*
551 * Avoid migrating a page that is shared by others and not writable.
552 */
553 if ((flags & MPOL_MF_MOVE_ALL) || !page->mapping || PageAnon(page) ||
554 mapping_writably_mapped(page->mapping) ||
555 single_mm_mapping(vma->vm_mm, page->mapping)) {
556 int rc = isolate_lru_page(page);
557
558 if (rc == 1)
559 list_add(&page->lru, pagelist);
560 /*
561 * If the isolate attempt was not successful then we just
562 * encountered an unswappable page. Something must be wrong.
563 */
564 WARN_ON(rc == 0);
565 }
566}
567
568static int swap_pages(struct list_head *pagelist)
569{
570 LIST_HEAD(moved);
571 LIST_HEAD(failed);
572 int n;
573
574 n = migrate_pages(pagelist, NULL, &moved, &failed);
575 putback_lru_pages(&failed);
576 putback_lru_pages(&moved);
577
578 return n;
579}
580
581/*
582 * For now migrate_pages simply swaps out the pages from nodes that are in
583 * the source set but not in the target set. In the future, we would
584 * want a function that moves pages between the two nodesets in such
585 * a way as to preserve the physical layout as much as possible.
586 *
587 * Returns the number of page that could not be moved.
588 */
589int do_migrate_pages(struct mm_struct *mm,
590 const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags)
591{
592 LIST_HEAD(pagelist);
593 int count = 0;
594 nodemask_t nodes;
595
596 nodes_andnot(nodes, *from_nodes, *to_nodes);
597
598 down_read(&mm->mmap_sem);
599 check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nodes,
600 flags | MPOL_MF_DISCONTIG_OK, &pagelist);
601
602 if (!list_empty(&pagelist)) {
603 count = swap_pages(&pagelist);
604 putback_lru_pages(&pagelist);
605 }
606
607 up_read(&mm->mmap_sem);
608 return count;
609}
610
611long do_mbind(unsigned long start, unsigned long len,
612 unsigned long mode, nodemask_t *nmask, unsigned long flags)
613{
614 struct vm_area_struct *vma;
615 struct mm_struct *mm = current->mm;
616 struct mempolicy *new;
617 unsigned long end;
618 int err;
619 LIST_HEAD(pagelist);
620
621 if ((flags & ~(unsigned long)(MPOL_MF_STRICT |
622 MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
623 || mode > MPOL_MAX)
624 return -EINVAL;
625 if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_RESOURCE))
626 return -EPERM;
627
628 if (start & ~PAGE_MASK)
629 return -EINVAL;
630
631 if (mode == MPOL_DEFAULT)
632 flags &= ~MPOL_MF_STRICT;
633
634 len = (len + PAGE_SIZE - 1) & PAGE_MASK;
635 end = start + len;
636
637 if (end < start)
638 return -EINVAL;
639 if (end == start)
640 return 0;
641
642 if (mpol_check_policy(mode, nmask))
643 return -EINVAL;
644
645 new = mpol_new(mode, nmask);
646 if (IS_ERR(new))
647 return PTR_ERR(new);
648
649 /*
650 * If we are using the default policy then operation
651 * on discontinuous address spaces is okay after all
652 */
653 if (!new)
654 flags |= MPOL_MF_DISCONTIG_OK;
655
656 PDprintk("mbind %lx-%lx mode:%ld nodes:%lx\n",start,start+len,
657 mode,nodes_addr(nodes)[0]);
658
659 down_write(&mm->mmap_sem);
660 vma = check_range(mm, start, end, nmask,
661 flags | MPOL_MF_INVERT, &pagelist);
662
663 err = PTR_ERR(vma);
664 if (!IS_ERR(vma)) {
665 int nr_failed = 0;
666
667 err = mbind_range(vma, start, end, new);
668 if (!list_empty(&pagelist))
669 nr_failed = swap_pages(&pagelist);
670
671 if (!err && nr_failed && (flags & MPOL_MF_STRICT))
672 err = -EIO;
673 }
674 if (!list_empty(&pagelist))
675 putback_lru_pages(&pagelist);
676
677 up_write(&mm->mmap_sem);
678 mpol_free(new);
679 return err;
680}
681
682/*
503 * User space interface with variable sized bitmaps for nodelists. 683 * User space interface with variable sized bitmaps for nodelists.
504 */ 684 */
505 685
506/* Copy a node mask from user space. */ 686/* Copy a node mask from user space. */
507static int get_nodes(nodemask_t *nodes, unsigned long __user *nmask, 687static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask,
508 unsigned long maxnode) 688 unsigned long maxnode)
509{ 689{
510 unsigned long k; 690 unsigned long k;
@@ -593,6 +773,65 @@ asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask,
593 return do_set_mempolicy(mode, &nodes); 773 return do_set_mempolicy(mode, &nodes);
594} 774}
595 775
776asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
777 const unsigned long __user *old_nodes,
778 const unsigned long __user *new_nodes)
779{
780 struct mm_struct *mm;
781 struct task_struct *task;
782 nodemask_t old;
783 nodemask_t new;
784 nodemask_t task_nodes;
785 int err;
786
787 err = get_nodes(&old, old_nodes, maxnode);
788 if (err)
789 return err;
790
791 err = get_nodes(&new, new_nodes, maxnode);
792 if (err)
793 return err;
794
795 /* Find the mm_struct */
796 read_lock(&tasklist_lock);
797 task = pid ? find_task_by_pid(pid) : current;
798 if (!task) {
799 read_unlock(&tasklist_lock);
800 return -ESRCH;
801 }
802 mm = get_task_mm(task);
803 read_unlock(&tasklist_lock);
804
805 if (!mm)
806 return -EINVAL;
807
808 /*
809 * Check if this process has the right to modify the specified
810 * process. The right exists if the process has administrative
811 * capabilities, superuser priviledges or the same
812 * userid as the target process.
813 */
814 if ((current->euid != task->suid) && (current->euid != task->uid) &&
815 (current->uid != task->suid) && (current->uid != task->uid) &&
816 !capable(CAP_SYS_ADMIN)) {
817 err = -EPERM;
818 goto out;
819 }
820
821 task_nodes = cpuset_mems_allowed(task);
822 /* Is the user allowed to access the target nodes? */
823 if (!nodes_subset(new, task_nodes) && !capable(CAP_SYS_ADMIN)) {
824 err = -EPERM;
825 goto out;
826 }
827
828 err = do_migrate_pages(mm, &old, &new, MPOL_MF_MOVE);
829out:
830 mmput(mm);
831 return err;
832}
833
834
596/* Retrieve NUMA policy */ 835/* Retrieve NUMA policy */
597asmlinkage long sys_get_mempolicy(int __user *policy, 836asmlinkage long sys_get_mempolicy(int __user *policy,
598 unsigned long __user *nmask, 837 unsigned long __user *nmask,
@@ -699,8 +938,8 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len,
699#endif 938#endif
700 939
701/* Return effective policy for a VMA */ 940/* Return effective policy for a VMA */
702struct mempolicy * 941static struct mempolicy * get_vma_policy(struct task_struct *task,
703get_vma_policy(struct task_struct *task, struct vm_area_struct *vma, unsigned long addr) 942 struct vm_area_struct *vma, unsigned long addr)
704{ 943{
705 struct mempolicy *pol = task->mempolicy; 944 struct mempolicy *pol = task->mempolicy;
706 945
@@ -848,7 +1087,7 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
848{ 1087{
849 struct mempolicy *pol = get_vma_policy(current, vma, addr); 1088 struct mempolicy *pol = get_vma_policy(current, vma, addr);
850 1089
851 cpuset_update_current_mems_allowed(); 1090 cpuset_update_task_memory_state();
852 1091
853 if (unlikely(pol->policy == MPOL_INTERLEAVE)) { 1092 if (unlikely(pol->policy == MPOL_INTERLEAVE)) {
854 unsigned nid; 1093 unsigned nid;
@@ -874,7 +1113,7 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
874 * interrupt context and apply the current process NUMA policy. 1113 * interrupt context and apply the current process NUMA policy.
875 * Returns NULL when no page can be allocated. 1114 * Returns NULL when no page can be allocated.
876 * 1115 *
877 * Don't call cpuset_update_current_mems_allowed() unless 1116 * Don't call cpuset_update_task_memory_state() unless
878 * 1) it's ok to take cpuset_sem (can WAIT), and 1117 * 1) it's ok to take cpuset_sem (can WAIT), and
879 * 2) allocating for current task (not interrupt). 1118 * 2) allocating for current task (not interrupt).
880 */ 1119 */
@@ -883,7 +1122,7 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
883 struct mempolicy *pol = current->mempolicy; 1122 struct mempolicy *pol = current->mempolicy;
884 1123
885 if ((gfp & __GFP_WAIT) && !in_interrupt()) 1124 if ((gfp & __GFP_WAIT) && !in_interrupt())
886 cpuset_update_current_mems_allowed(); 1125 cpuset_update_task_memory_state();
887 if (!pol || in_interrupt()) 1126 if (!pol || in_interrupt())
888 pol = &default_policy; 1127 pol = &default_policy;
889 if (pol->policy == MPOL_INTERLEAVE) 1128 if (pol->policy == MPOL_INTERLEAVE)
@@ -892,6 +1131,15 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
892} 1131}
893EXPORT_SYMBOL(alloc_pages_current); 1132EXPORT_SYMBOL(alloc_pages_current);
894 1133
1134/*
1135 * If mpol_copy() sees current->cpuset == cpuset_being_rebound, then it
1136 * rebinds the mempolicy its copying by calling mpol_rebind_policy()
1137 * with the mems_allowed returned by cpuset_mems_allowed(). This
1138 * keeps mempolicies cpuset relative after its cpuset moves. See
1139 * further kernel/cpuset.c update_nodemask().
1140 */
1141void *cpuset_being_rebound;
1142
895/* Slow path of a mempolicy copy */ 1143/* Slow path of a mempolicy copy */
896struct mempolicy *__mpol_copy(struct mempolicy *old) 1144struct mempolicy *__mpol_copy(struct mempolicy *old)
897{ 1145{
@@ -899,6 +1147,10 @@ struct mempolicy *__mpol_copy(struct mempolicy *old)
899 1147
900 if (!new) 1148 if (!new)
901 return ERR_PTR(-ENOMEM); 1149 return ERR_PTR(-ENOMEM);
1150 if (current_cpuset_is_being_rebound()) {
1151 nodemask_t mems = cpuset_mems_allowed(current);
1152 mpol_rebind_policy(old, &mems);
1153 }
902 *new = *old; 1154 *new = *old;
903 atomic_set(&new->refcnt, 1); 1155 atomic_set(&new->refcnt, 1);
904 if (new->policy == MPOL_BIND) { 1156 if (new->policy == MPOL_BIND) {
@@ -1173,25 +1425,31 @@ void numa_default_policy(void)
1173} 1425}
1174 1426
1175/* Migrate a policy to a different set of nodes */ 1427/* Migrate a policy to a different set of nodes */
1176static void rebind_policy(struct mempolicy *pol, const nodemask_t *old, 1428void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask)
1177 const nodemask_t *new)
1178{ 1429{
1430 nodemask_t *mpolmask;
1179 nodemask_t tmp; 1431 nodemask_t tmp;
1180 1432
1181 if (!pol) 1433 if (!pol)
1182 return; 1434 return;
1435 mpolmask = &pol->cpuset_mems_allowed;
1436 if (nodes_equal(*mpolmask, *newmask))
1437 return;
1183 1438
1184 switch (pol->policy) { 1439 switch (pol->policy) {
1185 case MPOL_DEFAULT: 1440 case MPOL_DEFAULT:
1186 break; 1441 break;
1187 case MPOL_INTERLEAVE: 1442 case MPOL_INTERLEAVE:
1188 nodes_remap(tmp, pol->v.nodes, *old, *new); 1443 nodes_remap(tmp, pol->v.nodes, *mpolmask, *newmask);
1189 pol->v.nodes = tmp; 1444 pol->v.nodes = tmp;
1190 current->il_next = node_remap(current->il_next, *old, *new); 1445 *mpolmask = *newmask;
1446 current->il_next = node_remap(current->il_next,
1447 *mpolmask, *newmask);
1191 break; 1448 break;
1192 case MPOL_PREFERRED: 1449 case MPOL_PREFERRED:
1193 pol->v.preferred_node = node_remap(pol->v.preferred_node, 1450 pol->v.preferred_node = node_remap(pol->v.preferred_node,
1194 *old, *new); 1451 *mpolmask, *newmask);
1452 *mpolmask = *newmask;
1195 break; 1453 break;
1196 case MPOL_BIND: { 1454 case MPOL_BIND: {
1197 nodemask_t nodes; 1455 nodemask_t nodes;
@@ -1201,7 +1459,7 @@ static void rebind_policy(struct mempolicy *pol, const nodemask_t *old,
1201 nodes_clear(nodes); 1459 nodes_clear(nodes);
1202 for (z = pol->v.zonelist->zones; *z; z++) 1460 for (z = pol->v.zonelist->zones; *z; z++)
1203 node_set((*z)->zone_pgdat->node_id, nodes); 1461 node_set((*z)->zone_pgdat->node_id, nodes);
1204 nodes_remap(tmp, nodes, *old, *new); 1462 nodes_remap(tmp, nodes, *mpolmask, *newmask);
1205 nodes = tmp; 1463 nodes = tmp;
1206 1464
1207 zonelist = bind_zonelist(&nodes); 1465 zonelist = bind_zonelist(&nodes);
@@ -1216,6 +1474,7 @@ static void rebind_policy(struct mempolicy *pol, const nodemask_t *old,
1216 kfree(pol->v.zonelist); 1474 kfree(pol->v.zonelist);
1217 pol->v.zonelist = zonelist; 1475 pol->v.zonelist = zonelist;
1218 } 1476 }
1477 *mpolmask = *newmask;
1219 break; 1478 break;
1220 } 1479 }
1221 default: 1480 default:
@@ -1225,12 +1484,156 @@ static void rebind_policy(struct mempolicy *pol, const nodemask_t *old,
1225} 1484}
1226 1485
1227/* 1486/*
1228 * Someone moved this task to different nodes. Fixup mempolicies. 1487 * Wrapper for mpol_rebind_policy() that just requires task
1488 * pointer, and updates task mempolicy.
1489 */
1490
1491void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new)
1492{
1493 mpol_rebind_policy(tsk->mempolicy, new);
1494}
1495
1496/*
1497 * Rebind each vma in mm to new nodemask.
1229 * 1498 *
1230 * TODO - fixup current->mm->vma and shmfs/tmpfs/hugetlbfs policies as well, 1499 * Call holding a reference to mm. Takes mm->mmap_sem during call.
1231 * once we have a cpuset mechanism to mark which cpuset subtree is migrating.
1232 */ 1500 */
1233void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new) 1501
1502void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
1234{ 1503{
1235 rebind_policy(current->mempolicy, old, new); 1504 struct vm_area_struct *vma;
1505
1506 down_write(&mm->mmap_sem);
1507 for (vma = mm->mmap; vma; vma = vma->vm_next)
1508 mpol_rebind_policy(vma->vm_policy, new);
1509 up_write(&mm->mmap_sem);
1236} 1510}
1511
1512/*
1513 * Display pages allocated per node and memory policy via /proc.
1514 */
1515
1516static const char *policy_types[] = { "default", "prefer", "bind",
1517 "interleave" };
1518
1519/*
1520 * Convert a mempolicy into a string.
1521 * Returns the number of characters in buffer (if positive)
1522 * or an error (negative)
1523 */
1524static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
1525{
1526 char *p = buffer;
1527 int l;
1528 nodemask_t nodes;
1529 int mode = pol ? pol->policy : MPOL_DEFAULT;
1530
1531 switch (mode) {
1532 case MPOL_DEFAULT:
1533 nodes_clear(nodes);
1534 break;
1535
1536 case MPOL_PREFERRED:
1537 nodes_clear(nodes);
1538 node_set(pol->v.preferred_node, nodes);
1539 break;
1540
1541 case MPOL_BIND:
1542 get_zonemask(pol, &nodes);
1543 break;
1544
1545 case MPOL_INTERLEAVE:
1546 nodes = pol->v.nodes;
1547 break;
1548
1549 default:
1550 BUG();
1551 return -EFAULT;
1552 }
1553
1554 l = strlen(policy_types[mode]);
1555 if (buffer + maxlen < p + l + 1)
1556 return -ENOSPC;
1557
1558 strcpy(p, policy_types[mode]);
1559 p += l;
1560
1561 if (!nodes_empty(nodes)) {
1562 if (buffer + maxlen < p + 2)
1563 return -ENOSPC;
1564 *p++ = '=';
1565 p += nodelist_scnprintf(p, buffer + maxlen - p, nodes);
1566 }
1567 return p - buffer;
1568}
1569
1570struct numa_maps {
1571 unsigned long pages;
1572 unsigned long anon;
1573 unsigned long mapped;
1574 unsigned long mapcount_max;
1575 unsigned long node[MAX_NUMNODES];
1576};
1577
1578static void gather_stats(struct page *page, void *private)
1579{
1580 struct numa_maps *md = private;
1581 int count = page_mapcount(page);
1582
1583 if (count)
1584 md->mapped++;
1585
1586 if (count > md->mapcount_max)
1587 md->mapcount_max = count;
1588
1589 md->pages++;
1590
1591 if (PageAnon(page))
1592 md->anon++;
1593
1594 md->node[page_to_nid(page)]++;
1595 cond_resched();
1596}
1597
1598int show_numa_map(struct seq_file *m, void *v)
1599{
1600 struct task_struct *task = m->private;
1601 struct vm_area_struct *vma = v;
1602 struct numa_maps *md;
1603 int n;
1604 char buffer[50];
1605
1606 if (!vma->vm_mm)
1607 return 0;
1608
1609 md = kzalloc(sizeof(struct numa_maps), GFP_KERNEL);
1610 if (!md)
1611 return 0;
1612
1613 check_pgd_range(vma, vma->vm_start, vma->vm_end,
1614 &node_online_map, MPOL_MF_STATS, md);
1615
1616 if (md->pages) {
1617 mpol_to_str(buffer, sizeof(buffer),
1618 get_vma_policy(task, vma, vma->vm_start));
1619
1620 seq_printf(m, "%08lx %s pages=%lu mapped=%lu maxref=%lu",
1621 vma->vm_start, buffer, md->pages,
1622 md->mapped, md->mapcount_max);
1623
1624 if (md->anon)
1625 seq_printf(m," anon=%lu",md->anon);
1626
1627 for_each_online_node(n)
1628 if (md->node[n])
1629 seq_printf(m, " N%d=%lu", n, md->node[n]);
1630
1631 seq_putc(m, '\n');
1632 }
1633 kfree(md);
1634
1635 if (m->count < m->size)
1636 m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0;
1637 return 0;
1638}
1639
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index d348b9035955..4748b906aff2 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -298,7 +298,8 @@ retry:
298 298
299 /* 299 /*
300 * Give "p" a good chance of killing itself before we 300 * Give "p" a good chance of killing itself before we
301 * retry to allocate memory. 301 * retry to allocate memory unless "p" is current
302 */ 302 */
303 schedule_timeout_interruptible(1); 303 if (!test_thread_flag(TIF_MEMDIE))
304 schedule_timeout_interruptible(1);
304} 305}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fd47494cb989..e0e84924171b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -53,6 +53,7 @@ struct pglist_data *pgdat_list __read_mostly;
53unsigned long totalram_pages __read_mostly; 53unsigned long totalram_pages __read_mostly;
54unsigned long totalhigh_pages __read_mostly; 54unsigned long totalhigh_pages __read_mostly;
55long nr_swap_pages; 55long nr_swap_pages;
56int percpu_pagelist_fraction;
56 57
57static void fastcall free_hot_cold_page(struct page *page, int cold); 58static void fastcall free_hot_cold_page(struct page *page, int cold);
58 59
@@ -307,7 +308,7 @@ static inline int page_is_buddy(struct page *page, int order)
307 * -- wli 308 * -- wli
308 */ 309 */
309 310
310static inline void __free_pages_bulk (struct page *page, 311static inline void __free_one_page(struct page *page,
311 struct zone *zone, unsigned int order) 312 struct zone *zone, unsigned int order)
312{ 313{
313 unsigned long page_idx; 314 unsigned long page_idx;
@@ -382,40 +383,42 @@ static inline int free_pages_check(struct page *page)
382 * And clear the zone's pages_scanned counter, to hold off the "all pages are 383 * And clear the zone's pages_scanned counter, to hold off the "all pages are
383 * pinned" detection logic. 384 * pinned" detection logic.
384 */ 385 */
385static int 386static void free_pages_bulk(struct zone *zone, int count,
386free_pages_bulk(struct zone *zone, int count, 387 struct list_head *list, int order)
387 struct list_head *list, unsigned int order)
388{ 388{
389 struct page *page = NULL;
390 int ret = 0;
391
392 spin_lock(&zone->lock); 389 spin_lock(&zone->lock);
393 zone->all_unreclaimable = 0; 390 zone->all_unreclaimable = 0;
394 zone->pages_scanned = 0; 391 zone->pages_scanned = 0;
395 while (!list_empty(list) && count--) { 392 while (count--) {
393 struct page *page;
394
395 BUG_ON(list_empty(list));
396 page = list_entry(list->prev, struct page, lru); 396 page = list_entry(list->prev, struct page, lru);
397 /* have to delete it as __free_pages_bulk list manipulates */ 397 /* have to delete it as __free_one_page list manipulates */
398 list_del(&page->lru); 398 list_del(&page->lru);
399 __free_pages_bulk(page, zone, order); 399 __free_one_page(page, zone, order);
400 ret++;
401 } 400 }
402 spin_unlock(&zone->lock); 401 spin_unlock(&zone->lock);
403 return ret;
404} 402}
405 403
406void __free_pages_ok(struct page *page, unsigned int order) 404static void free_one_page(struct zone *zone, struct page *page, int order)
407{ 405{
408 unsigned long flags;
409 LIST_HEAD(list); 406 LIST_HEAD(list);
407 list_add(&page->lru, &list);
408 free_pages_bulk(zone, 1, &list, order);
409}
410
411static void __free_pages_ok(struct page *page, unsigned int order)
412{
413 unsigned long flags;
410 int i; 414 int i;
411 int reserved = 0; 415 int reserved = 0;
412 416
413 arch_free_page(page, order); 417 arch_free_page(page, order);
414 418
415#ifndef CONFIG_MMU 419#ifndef CONFIG_MMU
416 if (order > 0) 420 for (i = 1 ; i < (1 << order) ; ++i)
417 for (i = 1 ; i < (1 << order) ; ++i) 421 __put_page(page + i);
418 __put_page(page + i);
419#endif 422#endif
420 423
421 for (i = 0 ; i < (1 << order) ; ++i) 424 for (i = 0 ; i < (1 << order) ; ++i)
@@ -423,11 +426,10 @@ void __free_pages_ok(struct page *page, unsigned int order)
423 if (reserved) 426 if (reserved)
424 return; 427 return;
425 428
426 list_add(&page->lru, &list); 429 kernel_map_pages(page, 1 << order, 0);
427 kernel_map_pages(page, 1<<order, 0);
428 local_irq_save(flags); 430 local_irq_save(flags);
429 __mod_page_state(pgfree, 1 << order); 431 __mod_page_state(pgfree, 1 << order);
430 free_pages_bulk(page_zone(page), 1, &list, order); 432 free_one_page(page_zone(page), page, order);
431 local_irq_restore(flags); 433 local_irq_restore(flags);
432} 434}
433 435
@@ -596,14 +598,13 @@ void drain_remote_pages(void)
596 if (zone->zone_pgdat->node_id == numa_node_id()) 598 if (zone->zone_pgdat->node_id == numa_node_id())
597 continue; 599 continue;
598 600
599 pset = zone->pageset[smp_processor_id()]; 601 pset = zone_pcp(zone, smp_processor_id());
600 for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { 602 for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
601 struct per_cpu_pages *pcp; 603 struct per_cpu_pages *pcp;
602 604
603 pcp = &pset->pcp[i]; 605 pcp = &pset->pcp[i];
604 if (pcp->count) 606 free_pages_bulk(zone, pcp->count, &pcp->list, 0);
605 pcp->count -= free_pages_bulk(zone, pcp->count, 607 pcp->count = 0;
606 &pcp->list, 0);
607 } 608 }
608 } 609 }
609 local_irq_restore(flags); 610 local_irq_restore(flags);
@@ -626,8 +627,8 @@ static void __drain_pages(unsigned int cpu)
626 627
627 pcp = &pset->pcp[i]; 628 pcp = &pset->pcp[i];
628 local_irq_save(flags); 629 local_irq_save(flags);
629 pcp->count -= free_pages_bulk(zone, pcp->count, 630 free_pages_bulk(zone, pcp->count, &pcp->list, 0);
630 &pcp->list, 0); 631 pcp->count = 0;
631 local_irq_restore(flags); 632 local_irq_restore(flags);
632 } 633 }
633 } 634 }
@@ -718,8 +719,10 @@ static void fastcall free_hot_cold_page(struct page *page, int cold)
718 __inc_page_state(pgfree); 719 __inc_page_state(pgfree);
719 list_add(&page->lru, &pcp->list); 720 list_add(&page->lru, &pcp->list);
720 pcp->count++; 721 pcp->count++;
721 if (pcp->count >= pcp->high) 722 if (pcp->count >= pcp->high) {
722 pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0); 723 free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
724 pcp->count -= pcp->batch;
725 }
723 local_irq_restore(flags); 726 local_irq_restore(flags);
724 put_cpu(); 727 put_cpu();
725} 728}
@@ -758,7 +761,7 @@ static struct page *buffered_rmqueue(struct zonelist *zonelist,
758 761
759again: 762again:
760 cpu = get_cpu(); 763 cpu = get_cpu();
761 if (order == 0) { 764 if (likely(order == 0)) {
762 struct per_cpu_pages *pcp; 765 struct per_cpu_pages *pcp;
763 766
764 pcp = &zone_pcp(zone, cpu)->pcp[cold]; 767 pcp = &zone_pcp(zone, cpu)->pcp[cold];
@@ -973,6 +976,7 @@ rebalance:
973 cond_resched(); 976 cond_resched();
974 977
975 /* We now go into synchronous reclaim */ 978 /* We now go into synchronous reclaim */
979 cpuset_memory_pressure_bump();
976 p->flags |= PF_MEMALLOC; 980 p->flags |= PF_MEMALLOC;
977 reclaim_state.reclaimed_slab = 0; 981 reclaim_state.reclaimed_slab = 0;
978 p->reclaim_state = &reclaim_state; 982 p->reclaim_state = &reclaim_state;
@@ -1204,6 +1208,7 @@ static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask)
1204 int cpu = 0; 1208 int cpu = 0;
1205 1209
1206 memset(ret, 0, sizeof(*ret)); 1210 memset(ret, 0, sizeof(*ret));
1211 cpus_and(*cpumask, *cpumask, cpu_online_map);
1207 1212
1208 cpu = first_cpu(*cpumask); 1213 cpu = first_cpu(*cpumask);
1209 while (cpu < NR_CPUS) { 1214 while (cpu < NR_CPUS) {
@@ -1256,7 +1261,7 @@ unsigned long read_page_state_offset(unsigned long offset)
1256 unsigned long ret = 0; 1261 unsigned long ret = 0;
1257 int cpu; 1262 int cpu;
1258 1263
1259 for_each_cpu(cpu) { 1264 for_each_online_cpu(cpu) {
1260 unsigned long in; 1265 unsigned long in;
1261 1266
1262 in = (unsigned long)&per_cpu(page_states, cpu) + offset; 1267 in = (unsigned long)&per_cpu(page_states, cpu) + offset;
@@ -1830,6 +1835,24 @@ inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
1830 INIT_LIST_HEAD(&pcp->list); 1835 INIT_LIST_HEAD(&pcp->list);
1831} 1836}
1832 1837
1838/*
1839 * setup_pagelist_highmark() sets the high water mark for hot per_cpu_pagelist
1840 * to the value high for the pageset p.
1841 */
1842
1843static void setup_pagelist_highmark(struct per_cpu_pageset *p,
1844 unsigned long high)
1845{
1846 struct per_cpu_pages *pcp;
1847
1848 pcp = &p->pcp[0]; /* hot list */
1849 pcp->high = high;
1850 pcp->batch = max(1UL, high/4);
1851 if ((high/4) > (PAGE_SHIFT * 8))
1852 pcp->batch = PAGE_SHIFT * 8;
1853}
1854
1855
1833#ifdef CONFIG_NUMA 1856#ifdef CONFIG_NUMA
1834/* 1857/*
1835 * Boot pageset table. One per cpu which is going to be used for all 1858 * Boot pageset table. One per cpu which is going to be used for all
@@ -1861,12 +1884,16 @@ static int __devinit process_zones(int cpu)
1861 1884
1862 for_each_zone(zone) { 1885 for_each_zone(zone) {
1863 1886
1864 zone->pageset[cpu] = kmalloc_node(sizeof(struct per_cpu_pageset), 1887 zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset),
1865 GFP_KERNEL, cpu_to_node(cpu)); 1888 GFP_KERNEL, cpu_to_node(cpu));
1866 if (!zone->pageset[cpu]) 1889 if (!zone_pcp(zone, cpu))
1867 goto bad; 1890 goto bad;
1868 1891
1869 setup_pageset(zone->pageset[cpu], zone_batchsize(zone)); 1892 setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone));
1893
1894 if (percpu_pagelist_fraction)
1895 setup_pagelist_highmark(zone_pcp(zone, cpu),
1896 (zone->present_pages / percpu_pagelist_fraction));
1870 } 1897 }
1871 1898
1872 return 0; 1899 return 0;
@@ -1874,15 +1901,14 @@ bad:
1874 for_each_zone(dzone) { 1901 for_each_zone(dzone) {
1875 if (dzone == zone) 1902 if (dzone == zone)
1876 break; 1903 break;
1877 kfree(dzone->pageset[cpu]); 1904 kfree(zone_pcp(dzone, cpu));
1878 dzone->pageset[cpu] = NULL; 1905 zone_pcp(dzone, cpu) = NULL;
1879 } 1906 }
1880 return -ENOMEM; 1907 return -ENOMEM;
1881} 1908}
1882 1909
1883static inline void free_zone_pagesets(int cpu) 1910static inline void free_zone_pagesets(int cpu)
1884{ 1911{
1885#ifdef CONFIG_NUMA
1886 struct zone *zone; 1912 struct zone *zone;
1887 1913
1888 for_each_zone(zone) { 1914 for_each_zone(zone) {
@@ -1891,7 +1917,6 @@ static inline void free_zone_pagesets(int cpu)
1891 zone_pcp(zone, cpu) = NULL; 1917 zone_pcp(zone, cpu) = NULL;
1892 kfree(pset); 1918 kfree(pset);
1893 } 1919 }
1894#endif
1895} 1920}
1896 1921
1897static int __devinit pageset_cpuup_callback(struct notifier_block *nfb, 1922static int __devinit pageset_cpuup_callback(struct notifier_block *nfb,
@@ -1962,7 +1987,7 @@ static __devinit void zone_pcp_init(struct zone *zone)
1962 for (cpu = 0; cpu < NR_CPUS; cpu++) { 1987 for (cpu = 0; cpu < NR_CPUS; cpu++) {
1963#ifdef CONFIG_NUMA 1988#ifdef CONFIG_NUMA
1964 /* Early boot. Slab allocator not functional yet */ 1989 /* Early boot. Slab allocator not functional yet */
1965 zone->pageset[cpu] = &boot_pageset[cpu]; 1990 zone_pcp(zone, cpu) = &boot_pageset[cpu];
1966 setup_pageset(&boot_pageset[cpu],0); 1991 setup_pageset(&boot_pageset[cpu],0);
1967#else 1992#else
1968 setup_pageset(zone_pcp(zone,cpu), batch); 1993 setup_pageset(zone_pcp(zone,cpu), batch);
@@ -2205,7 +2230,7 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
2205 seq_printf(m, 2230 seq_printf(m,
2206 ")" 2231 ")"
2207 "\n pagesets"); 2232 "\n pagesets");
2208 for (i = 0; i < ARRAY_SIZE(zone->pageset); i++) { 2233 for_each_online_cpu(i) {
2209 struct per_cpu_pageset *pageset; 2234 struct per_cpu_pageset *pageset;
2210 int j; 2235 int j;
2211 2236
@@ -2568,6 +2593,32 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
2568 return 0; 2593 return 0;
2569} 2594}
2570 2595
2596/*
2597 * percpu_pagelist_fraction - changes the pcp->high for each zone on each
2598 * cpu. It is the fraction of total pages in each zone that a hot per cpu pagelist
2599 * can have before it gets flushed back to buddy allocator.
2600 */
2601
2602int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
2603 struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
2604{
2605 struct zone *zone;
2606 unsigned int cpu;
2607 int ret;
2608
2609 ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
2610 if (!write || (ret == -EINVAL))
2611 return ret;
2612 for_each_zone(zone) {
2613 for_each_online_cpu(cpu) {
2614 unsigned long high;
2615 high = zone->present_pages / percpu_pagelist_fraction;
2616 setup_pagelist_highmark(zone_pcp(zone, cpu), high);
2617 }
2618 }
2619 return 0;
2620}
2621
2571__initdata int hashdist = HASHDIST_DEFAULT; 2622__initdata int hashdist = HASHDIST_DEFAULT;
2572 2623
2573#ifdef CONFIG_NUMA 2624#ifdef CONFIG_NUMA
diff --git a/mm/pdflush.c b/mm/pdflush.c
index 52822c98c489..c4b6d0afd736 100644
--- a/mm/pdflush.c
+++ b/mm/pdflush.c
@@ -90,7 +90,7 @@ struct pdflush_work {
90 90
91static int __pdflush(struct pdflush_work *my_work) 91static int __pdflush(struct pdflush_work *my_work)
92{ 92{
93 current->flags |= PF_FLUSHER; 93 current->flags |= PF_FLUSHER | PF_SWAPWRITE;
94 my_work->fn = NULL; 94 my_work->fn = NULL;
95 my_work->who = current; 95 my_work->who = current;
96 INIT_LIST_HEAD(&my_work->list); 96 INIT_LIST_HEAD(&my_work->list);
diff --git a/mm/rmap.c b/mm/rmap.c
index 6f3f7db27128..66ec43053a4d 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -514,6 +514,13 @@ void page_add_file_rmap(struct page *page)
514void page_remove_rmap(struct page *page) 514void page_remove_rmap(struct page *page)
515{ 515{
516 if (atomic_add_negative(-1, &page->_mapcount)) { 516 if (atomic_add_negative(-1, &page->_mapcount)) {
517 if (page_mapcount(page) < 0) {
518 printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page));
519 printk (KERN_EMERG " page->flags = %lx\n", page->flags);
520 printk (KERN_EMERG " page->count = %x\n", page_count(page));
521 printk (KERN_EMERG " page->mapping = %p\n", page->mapping);
522 }
523
517 BUG_ON(page_mapcount(page) < 0); 524 BUG_ON(page_mapcount(page) < 0);
518 /* 525 /*
519 * It would be tidy to reset the PageAnon mapping here, 526 * It would be tidy to reset the PageAnon mapping here,
diff --git a/mm/slab.c b/mm/slab.c
index e5ec26e0c460..1c46c6383552 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -130,7 +130,6 @@
130#define FORCED_DEBUG 0 130#define FORCED_DEBUG 0
131#endif 131#endif
132 132
133
134/* Shouldn't this be in a header file somewhere? */ 133/* Shouldn't this be in a header file somewhere? */
135#define BYTES_PER_WORD sizeof(void *) 134#define BYTES_PER_WORD sizeof(void *)
136 135
@@ -217,12 +216,12 @@ static unsigned long offslab_limit;
217 * Slabs are chained into three list: fully used, partial, fully free slabs. 216 * Slabs are chained into three list: fully used, partial, fully free slabs.
218 */ 217 */
219struct slab { 218struct slab {
220 struct list_head list; 219 struct list_head list;
221 unsigned long colouroff; 220 unsigned long colouroff;
222 void *s_mem; /* including colour offset */ 221 void *s_mem; /* including colour offset */
223 unsigned int inuse; /* num of objs active in slab */ 222 unsigned int inuse; /* num of objs active in slab */
224 kmem_bufctl_t free; 223 kmem_bufctl_t free;
225 unsigned short nodeid; 224 unsigned short nodeid;
226}; 225};
227 226
228/* 227/*
@@ -242,9 +241,9 @@ struct slab {
242 * We assume struct slab_rcu can overlay struct slab when destroying. 241 * We assume struct slab_rcu can overlay struct slab when destroying.
243 */ 242 */
244struct slab_rcu { 243struct slab_rcu {
245 struct rcu_head head; 244 struct rcu_head head;
246 kmem_cache_t *cachep; 245 kmem_cache_t *cachep;
247 void *addr; 246 void *addr;
248}; 247};
249 248
250/* 249/*
@@ -279,23 +278,23 @@ struct array_cache {
279#define BOOT_CPUCACHE_ENTRIES 1 278#define BOOT_CPUCACHE_ENTRIES 1
280struct arraycache_init { 279struct arraycache_init {
281 struct array_cache cache; 280 struct array_cache cache;
282 void * entries[BOOT_CPUCACHE_ENTRIES]; 281 void *entries[BOOT_CPUCACHE_ENTRIES];
283}; 282};
284 283
285/* 284/*
286 * The slab lists for all objects. 285 * The slab lists for all objects.
287 */ 286 */
288struct kmem_list3 { 287struct kmem_list3 {
289 struct list_head slabs_partial; /* partial list first, better asm code */ 288 struct list_head slabs_partial; /* partial list first, better asm code */
290 struct list_head slabs_full; 289 struct list_head slabs_full;
291 struct list_head slabs_free; 290 struct list_head slabs_free;
292 unsigned long free_objects; 291 unsigned long free_objects;
293 unsigned long next_reap; 292 unsigned long next_reap;
294 int free_touched; 293 int free_touched;
295 unsigned int free_limit; 294 unsigned int free_limit;
296 spinlock_t list_lock; 295 spinlock_t list_lock;
297 struct array_cache *shared; /* shared per node */ 296 struct array_cache *shared; /* shared per node */
298 struct array_cache **alien; /* on other nodes */ 297 struct array_cache **alien; /* on other nodes */
299}; 298};
300 299
301/* 300/*
@@ -367,63 +366,63 @@ static inline void kmem_list3_init(struct kmem_list3 *parent)
367 * 366 *
368 * manages a cache. 367 * manages a cache.
369 */ 368 */
370 369
371struct kmem_cache { 370struct kmem_cache {
372/* 1) per-cpu data, touched during every alloc/free */ 371/* 1) per-cpu data, touched during every alloc/free */
373 struct array_cache *array[NR_CPUS]; 372 struct array_cache *array[NR_CPUS];
374 unsigned int batchcount; 373 unsigned int batchcount;
375 unsigned int limit; 374 unsigned int limit;
376 unsigned int shared; 375 unsigned int shared;
377 unsigned int objsize; 376 unsigned int objsize;
378/* 2) touched by every alloc & free from the backend */ 377/* 2) touched by every alloc & free from the backend */
379 struct kmem_list3 *nodelists[MAX_NUMNODES]; 378 struct kmem_list3 *nodelists[MAX_NUMNODES];
380 unsigned int flags; /* constant flags */ 379 unsigned int flags; /* constant flags */
381 unsigned int num; /* # of objs per slab */ 380 unsigned int num; /* # of objs per slab */
382 spinlock_t spinlock; 381 spinlock_t spinlock;
383 382
384/* 3) cache_grow/shrink */ 383/* 3) cache_grow/shrink */
385 /* order of pgs per slab (2^n) */ 384 /* order of pgs per slab (2^n) */
386 unsigned int gfporder; 385 unsigned int gfporder;
387 386
388 /* force GFP flags, e.g. GFP_DMA */ 387 /* force GFP flags, e.g. GFP_DMA */
389 gfp_t gfpflags; 388 gfp_t gfpflags;
390 389
391 size_t colour; /* cache colouring range */ 390 size_t colour; /* cache colouring range */
392 unsigned int colour_off; /* colour offset */ 391 unsigned int colour_off; /* colour offset */
393 unsigned int colour_next; /* cache colouring */ 392 unsigned int colour_next; /* cache colouring */
394 kmem_cache_t *slabp_cache; 393 kmem_cache_t *slabp_cache;
395 unsigned int slab_size; 394 unsigned int slab_size;
396 unsigned int dflags; /* dynamic flags */ 395 unsigned int dflags; /* dynamic flags */
397 396
398 /* constructor func */ 397 /* constructor func */
399 void (*ctor)(void *, kmem_cache_t *, unsigned long); 398 void (*ctor) (void *, kmem_cache_t *, unsigned long);
400 399
401 /* de-constructor func */ 400 /* de-constructor func */
402 void (*dtor)(void *, kmem_cache_t *, unsigned long); 401 void (*dtor) (void *, kmem_cache_t *, unsigned long);
403 402
404/* 4) cache creation/removal */ 403/* 4) cache creation/removal */
405 const char *name; 404 const char *name;
406 struct list_head next; 405 struct list_head next;
407 406
408/* 5) statistics */ 407/* 5) statistics */
409#if STATS 408#if STATS
410 unsigned long num_active; 409 unsigned long num_active;
411 unsigned long num_allocations; 410 unsigned long num_allocations;
412 unsigned long high_mark; 411 unsigned long high_mark;
413 unsigned long grown; 412 unsigned long grown;
414 unsigned long reaped; 413 unsigned long reaped;
415 unsigned long errors; 414 unsigned long errors;
416 unsigned long max_freeable; 415 unsigned long max_freeable;
417 unsigned long node_allocs; 416 unsigned long node_allocs;
418 unsigned long node_frees; 417 unsigned long node_frees;
419 atomic_t allochit; 418 atomic_t allochit;
420 atomic_t allocmiss; 419 atomic_t allocmiss;
421 atomic_t freehit; 420 atomic_t freehit;
422 atomic_t freemiss; 421 atomic_t freemiss;
423#endif 422#endif
424#if DEBUG 423#if DEBUG
425 int dbghead; 424 int dbghead;
426 int reallen; 425 int reallen;
427#endif 426#endif
428}; 427};
429 428
@@ -523,14 +522,15 @@ static unsigned long *dbg_redzone2(kmem_cache_t *cachep, void *objp)
523{ 522{
524 BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); 523 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
525 if (cachep->flags & SLAB_STORE_USER) 524 if (cachep->flags & SLAB_STORE_USER)
526 return (unsigned long*) (objp+cachep->objsize-2*BYTES_PER_WORD); 525 return (unsigned long *)(objp + cachep->objsize -
527 return (unsigned long*) (objp+cachep->objsize-BYTES_PER_WORD); 526 2 * BYTES_PER_WORD);
527 return (unsigned long *)(objp + cachep->objsize - BYTES_PER_WORD);
528} 528}
529 529
530static void **dbg_userword(kmem_cache_t *cachep, void *objp) 530static void **dbg_userword(kmem_cache_t *cachep, void *objp)
531{ 531{
532 BUG_ON(!(cachep->flags & SLAB_STORE_USER)); 532 BUG_ON(!(cachep->flags & SLAB_STORE_USER));
533 return (void**)(objp+cachep->objsize-BYTES_PER_WORD); 533 return (void **)(objp + cachep->objsize - BYTES_PER_WORD);
534} 534}
535 535
536#else 536#else
@@ -607,31 +607,31 @@ struct cache_names {
607static struct cache_names __initdata cache_names[] = { 607static struct cache_names __initdata cache_names[] = {
608#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" }, 608#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },
609#include <linux/kmalloc_sizes.h> 609#include <linux/kmalloc_sizes.h>
610 { NULL, } 610 {NULL,}
611#undef CACHE 611#undef CACHE
612}; 612};
613 613
614static struct arraycache_init initarray_cache __initdata = 614static struct arraycache_init initarray_cache __initdata =
615 { { 0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; 615 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
616static struct arraycache_init initarray_generic = 616static struct arraycache_init initarray_generic =
617 { { 0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; 617 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
618 618
619/* internal cache of cache description objs */ 619/* internal cache of cache description objs */
620static kmem_cache_t cache_cache = { 620static kmem_cache_t cache_cache = {
621 .batchcount = 1, 621 .batchcount = 1,
622 .limit = BOOT_CPUCACHE_ENTRIES, 622 .limit = BOOT_CPUCACHE_ENTRIES,
623 .shared = 1, 623 .shared = 1,
624 .objsize = sizeof(kmem_cache_t), 624 .objsize = sizeof(kmem_cache_t),
625 .flags = SLAB_NO_REAP, 625 .flags = SLAB_NO_REAP,
626 .spinlock = SPIN_LOCK_UNLOCKED, 626 .spinlock = SPIN_LOCK_UNLOCKED,
627 .name = "kmem_cache", 627 .name = "kmem_cache",
628#if DEBUG 628#if DEBUG
629 .reallen = sizeof(kmem_cache_t), 629 .reallen = sizeof(kmem_cache_t),
630#endif 630#endif
631}; 631};
632 632
633/* Guard access to the cache-chain. */ 633/* Guard access to the cache-chain. */
634static struct semaphore cache_chain_sem; 634static struct semaphore cache_chain_sem;
635static struct list_head cache_chain; 635static struct list_head cache_chain;
636 636
637/* 637/*
@@ -655,9 +655,9 @@ static enum {
655 655
656static DEFINE_PER_CPU(struct work_struct, reap_work); 656static DEFINE_PER_CPU(struct work_struct, reap_work);
657 657
658static void free_block(kmem_cache_t* cachep, void** objpp, int len, int node); 658static void free_block(kmem_cache_t *cachep, void **objpp, int len, int node);
659static void enable_cpucache (kmem_cache_t *cachep); 659static void enable_cpucache(kmem_cache_t *cachep);
660static void cache_reap (void *unused); 660static void cache_reap(void *unused);
661static int __node_shrink(kmem_cache_t *cachep, int node); 661static int __node_shrink(kmem_cache_t *cachep, int node);
662 662
663static inline struct array_cache *ac_data(kmem_cache_t *cachep) 663static inline struct array_cache *ac_data(kmem_cache_t *cachep)
@@ -671,9 +671,9 @@ static inline kmem_cache_t *__find_general_cachep(size_t size, gfp_t gfpflags)
671 671
672#if DEBUG 672#if DEBUG
673 /* This happens if someone tries to call 673 /* This happens if someone tries to call
674 * kmem_cache_create(), or __kmalloc(), before 674 * kmem_cache_create(), or __kmalloc(), before
675 * the generic caches are initialized. 675 * the generic caches are initialized.
676 */ 676 */
677 BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL); 677 BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL);
678#endif 678#endif
679 while (size > csizep->cs_size) 679 while (size > csizep->cs_size)
@@ -697,10 +697,10 @@ EXPORT_SYMBOL(kmem_find_general_cachep);
697 697
698/* Cal the num objs, wastage, and bytes left over for a given slab size. */ 698/* Cal the num objs, wastage, and bytes left over for a given slab size. */
699static void cache_estimate(unsigned long gfporder, size_t size, size_t align, 699static void cache_estimate(unsigned long gfporder, size_t size, size_t align,
700 int flags, size_t *left_over, unsigned int *num) 700 int flags, size_t *left_over, unsigned int *num)
701{ 701{
702 int i; 702 int i;
703 size_t wastage = PAGE_SIZE<<gfporder; 703 size_t wastage = PAGE_SIZE << gfporder;
704 size_t extra = 0; 704 size_t extra = 0;
705 size_t base = 0; 705 size_t base = 0;
706 706
@@ -709,7 +709,7 @@ static void cache_estimate(unsigned long gfporder, size_t size, size_t align,
709 extra = sizeof(kmem_bufctl_t); 709 extra = sizeof(kmem_bufctl_t);
710 } 710 }
711 i = 0; 711 i = 0;
712 while (i*size + ALIGN(base+i*extra, align) <= wastage) 712 while (i * size + ALIGN(base + i * extra, align) <= wastage)
713 i++; 713 i++;
714 if (i > 0) 714 if (i > 0)
715 i--; 715 i--;
@@ -718,8 +718,8 @@ static void cache_estimate(unsigned long gfporder, size_t size, size_t align,
718 i = SLAB_LIMIT; 718 i = SLAB_LIMIT;
719 719
720 *num = i; 720 *num = i;
721 wastage -= i*size; 721 wastage -= i * size;
722 wastage -= ALIGN(base+i*extra, align); 722 wastage -= ALIGN(base + i * extra, align);
723 *left_over = wastage; 723 *left_over = wastage;
724} 724}
725 725
@@ -728,7 +728,7 @@ static void cache_estimate(unsigned long gfporder, size_t size, size_t align,
728static void __slab_error(const char *function, kmem_cache_t *cachep, char *msg) 728static void __slab_error(const char *function, kmem_cache_t *cachep, char *msg)
729{ 729{
730 printk(KERN_ERR "slab error in %s(): cache `%s': %s\n", 730 printk(KERN_ERR "slab error in %s(): cache `%s': %s\n",
731 function, cachep->name, msg); 731 function, cachep->name, msg);
732 dump_stack(); 732 dump_stack();
733} 733}
734 734
@@ -755,9 +755,9 @@ static void __devinit start_cpu_timer(int cpu)
755} 755}
756 756
757static struct array_cache *alloc_arraycache(int node, int entries, 757static struct array_cache *alloc_arraycache(int node, int entries,
758 int batchcount) 758 int batchcount)
759{ 759{
760 int memsize = sizeof(void*)*entries+sizeof(struct array_cache); 760 int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
761 struct array_cache *nc = NULL; 761 struct array_cache *nc = NULL;
762 762
763 nc = kmalloc_node(memsize, GFP_KERNEL, node); 763 nc = kmalloc_node(memsize, GFP_KERNEL, node);
@@ -775,7 +775,7 @@ static struct array_cache *alloc_arraycache(int node, int entries,
775static inline struct array_cache **alloc_alien_cache(int node, int limit) 775static inline struct array_cache **alloc_alien_cache(int node, int limit)
776{ 776{
777 struct array_cache **ac_ptr; 777 struct array_cache **ac_ptr;
778 int memsize = sizeof(void*)*MAX_NUMNODES; 778 int memsize = sizeof(void *) * MAX_NUMNODES;
779 int i; 779 int i;
780 780
781 if (limit > 1) 781 if (limit > 1)
@@ -789,7 +789,7 @@ static inline struct array_cache **alloc_alien_cache(int node, int limit)
789 } 789 }
790 ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d); 790 ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d);
791 if (!ac_ptr[i]) { 791 if (!ac_ptr[i]) {
792 for (i--; i <=0; i--) 792 for (i--; i <= 0; i--)
793 kfree(ac_ptr[i]); 793 kfree(ac_ptr[i]);
794 kfree(ac_ptr); 794 kfree(ac_ptr);
795 return NULL; 795 return NULL;
@@ -807,12 +807,13 @@ static inline void free_alien_cache(struct array_cache **ac_ptr)
807 return; 807 return;
808 808
809 for_each_node(i) 809 for_each_node(i)
810 kfree(ac_ptr[i]); 810 kfree(ac_ptr[i]);
811 811
812 kfree(ac_ptr); 812 kfree(ac_ptr);
813} 813}
814 814
815static inline void __drain_alien_cache(kmem_cache_t *cachep, struct array_cache *ac, int node) 815static inline void __drain_alien_cache(kmem_cache_t *cachep,
816 struct array_cache *ac, int node)
816{ 817{
817 struct kmem_list3 *rl3 = cachep->nodelists[node]; 818 struct kmem_list3 *rl3 = cachep->nodelists[node];
818 819
@@ -826,7 +827,7 @@ static inline void __drain_alien_cache(kmem_cache_t *cachep, struct array_cache
826 827
827static void drain_alien_cache(kmem_cache_t *cachep, struct kmem_list3 *l3) 828static void drain_alien_cache(kmem_cache_t *cachep, struct kmem_list3 *l3)
828{ 829{
829 int i=0; 830 int i = 0;
830 struct array_cache *ac; 831 struct array_cache *ac;
831 unsigned long flags; 832 unsigned long flags;
832 833
@@ -846,14 +847,13 @@ static void drain_alien_cache(kmem_cache_t *cachep, struct kmem_list3 *l3)
846#endif 847#endif
847 848
848static int __devinit cpuup_callback(struct notifier_block *nfb, 849static int __devinit cpuup_callback(struct notifier_block *nfb,
849 unsigned long action, void *hcpu) 850 unsigned long action, void *hcpu)
850{ 851{
851 long cpu = (long)hcpu; 852 long cpu = (long)hcpu;
852 kmem_cache_t* cachep; 853 kmem_cache_t *cachep;
853 struct kmem_list3 *l3 = NULL; 854 struct kmem_list3 *l3 = NULL;
854 int node = cpu_to_node(cpu); 855 int node = cpu_to_node(cpu);
855 int memsize = sizeof(struct kmem_list3); 856 int memsize = sizeof(struct kmem_list3);
856 struct array_cache *nc = NULL;
857 857
858 switch (action) { 858 switch (action) {
859 case CPU_UP_PREPARE: 859 case CPU_UP_PREPARE:
@@ -871,27 +871,29 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
871 */ 871 */
872 if (!cachep->nodelists[node]) { 872 if (!cachep->nodelists[node]) {
873 if (!(l3 = kmalloc_node(memsize, 873 if (!(l3 = kmalloc_node(memsize,
874 GFP_KERNEL, node))) 874 GFP_KERNEL, node)))
875 goto bad; 875 goto bad;
876 kmem_list3_init(l3); 876 kmem_list3_init(l3);
877 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + 877 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
878 ((unsigned long)cachep)%REAPTIMEOUT_LIST3; 878 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
879 879
880 cachep->nodelists[node] = l3; 880 cachep->nodelists[node] = l3;
881 } 881 }
882 882
883 spin_lock_irq(&cachep->nodelists[node]->list_lock); 883 spin_lock_irq(&cachep->nodelists[node]->list_lock);
884 cachep->nodelists[node]->free_limit = 884 cachep->nodelists[node]->free_limit =
885 (1 + nr_cpus_node(node)) * 885 (1 + nr_cpus_node(node)) *
886 cachep->batchcount + cachep->num; 886 cachep->batchcount + cachep->num;
887 spin_unlock_irq(&cachep->nodelists[node]->list_lock); 887 spin_unlock_irq(&cachep->nodelists[node]->list_lock);
888 } 888 }
889 889
890 /* Now we can go ahead with allocating the shared array's 890 /* Now we can go ahead with allocating the shared array's
891 & array cache's */ 891 & array cache's */
892 list_for_each_entry(cachep, &cache_chain, next) { 892 list_for_each_entry(cachep, &cache_chain, next) {
893 struct array_cache *nc;
894
893 nc = alloc_arraycache(node, cachep->limit, 895 nc = alloc_arraycache(node, cachep->limit,
894 cachep->batchcount); 896 cachep->batchcount);
895 if (!nc) 897 if (!nc)
896 goto bad; 898 goto bad;
897 cachep->array[cpu] = nc; 899 cachep->array[cpu] = nc;
@@ -900,12 +902,13 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
900 BUG_ON(!l3); 902 BUG_ON(!l3);
901 if (!l3->shared) { 903 if (!l3->shared) {
902 if (!(nc = alloc_arraycache(node, 904 if (!(nc = alloc_arraycache(node,
903 cachep->shared*cachep->batchcount, 905 cachep->shared *
904 0xbaadf00d))) 906 cachep->batchcount,
905 goto bad; 907 0xbaadf00d)))
908 goto bad;
906 909
907 /* we are serialised from CPU_DEAD or 910 /* we are serialised from CPU_DEAD or
908 CPU_UP_CANCELLED by the cpucontrol lock */ 911 CPU_UP_CANCELLED by the cpucontrol lock */
909 l3->shared = nc; 912 l3->shared = nc;
910 } 913 }
911 } 914 }
@@ -942,13 +945,13 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
942 free_block(cachep, nc->entry, nc->avail, node); 945 free_block(cachep, nc->entry, nc->avail, node);
943 946
944 if (!cpus_empty(mask)) { 947 if (!cpus_empty(mask)) {
945 spin_unlock(&l3->list_lock); 948 spin_unlock(&l3->list_lock);
946 goto unlock_cache; 949 goto unlock_cache;
947 } 950 }
948 951
949 if (l3->shared) { 952 if (l3->shared) {
950 free_block(cachep, l3->shared->entry, 953 free_block(cachep, l3->shared->entry,
951 l3->shared->avail, node); 954 l3->shared->avail, node);
952 kfree(l3->shared); 955 kfree(l3->shared);
953 l3->shared = NULL; 956 l3->shared = NULL;
954 } 957 }
@@ -966,7 +969,7 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
966 } else { 969 } else {
967 spin_unlock(&l3->list_lock); 970 spin_unlock(&l3->list_lock);
968 } 971 }
969unlock_cache: 972 unlock_cache:
970 spin_unlock_irq(&cachep->spinlock); 973 spin_unlock_irq(&cachep->spinlock);
971 kfree(nc); 974 kfree(nc);
972 } 975 }
@@ -975,7 +978,7 @@ unlock_cache:
975#endif 978#endif
976 } 979 }
977 return NOTIFY_OK; 980 return NOTIFY_OK;
978bad: 981 bad:
979 up(&cache_chain_sem); 982 up(&cache_chain_sem);
980 return NOTIFY_BAD; 983 return NOTIFY_BAD;
981} 984}
@@ -985,8 +988,7 @@ static struct notifier_block cpucache_notifier = { &cpuup_callback, NULL, 0 };
985/* 988/*
986 * swap the static kmem_list3 with kmalloced memory 989 * swap the static kmem_list3 with kmalloced memory
987 */ 990 */
988static void init_list(kmem_cache_t *cachep, struct kmem_list3 *list, 991static void init_list(kmem_cache_t *cachep, struct kmem_list3 *list, int nodeid)
989 int nodeid)
990{ 992{
991 struct kmem_list3 *ptr; 993 struct kmem_list3 *ptr;
992 994
@@ -1055,14 +1057,14 @@ void __init kmem_cache_init(void)
1055 cache_cache.objsize = ALIGN(cache_cache.objsize, cache_line_size()); 1057 cache_cache.objsize = ALIGN(cache_cache.objsize, cache_line_size());
1056 1058
1057 cache_estimate(0, cache_cache.objsize, cache_line_size(), 0, 1059 cache_estimate(0, cache_cache.objsize, cache_line_size(), 0,
1058 &left_over, &cache_cache.num); 1060 &left_over, &cache_cache.num);
1059 if (!cache_cache.num) 1061 if (!cache_cache.num)
1060 BUG(); 1062 BUG();
1061 1063
1062 cache_cache.colour = left_over/cache_cache.colour_off; 1064 cache_cache.colour = left_over / cache_cache.colour_off;
1063 cache_cache.colour_next = 0; 1065 cache_cache.colour_next = 0;
1064 cache_cache.slab_size = ALIGN(cache_cache.num*sizeof(kmem_bufctl_t) + 1066 cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
1065 sizeof(struct slab), cache_line_size()); 1067 sizeof(struct slab), cache_line_size());
1066 1068
1067 /* 2+3) create the kmalloc caches */ 1069 /* 2+3) create the kmalloc caches */
1068 sizes = malloc_sizes; 1070 sizes = malloc_sizes;
@@ -1074,14 +1076,18 @@ void __init kmem_cache_init(void)
1074 */ 1076 */
1075 1077
1076 sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name, 1078 sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,
1077 sizes[INDEX_AC].cs_size, ARCH_KMALLOC_MINALIGN, 1079 sizes[INDEX_AC].cs_size,
1078 (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, NULL); 1080 ARCH_KMALLOC_MINALIGN,
1081 (ARCH_KMALLOC_FLAGS |
1082 SLAB_PANIC), NULL, NULL);
1079 1083
1080 if (INDEX_AC != INDEX_L3) 1084 if (INDEX_AC != INDEX_L3)
1081 sizes[INDEX_L3].cs_cachep = 1085 sizes[INDEX_L3].cs_cachep =
1082 kmem_cache_create(names[INDEX_L3].name, 1086 kmem_cache_create(names[INDEX_L3].name,
1083 sizes[INDEX_L3].cs_size, ARCH_KMALLOC_MINALIGN, 1087 sizes[INDEX_L3].cs_size,
1084 (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, NULL); 1088 ARCH_KMALLOC_MINALIGN,
1089 (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL,
1090 NULL);
1085 1091
1086 while (sizes->cs_size != ULONG_MAX) { 1092 while (sizes->cs_size != ULONG_MAX) {
1087 /* 1093 /*
@@ -1091,35 +1097,41 @@ void __init kmem_cache_init(void)
1091 * Note for systems short on memory removing the alignment will 1097 * Note for systems short on memory removing the alignment will
1092 * allow tighter packing of the smaller caches. 1098 * allow tighter packing of the smaller caches.
1093 */ 1099 */
1094 if(!sizes->cs_cachep) 1100 if (!sizes->cs_cachep)
1095 sizes->cs_cachep = kmem_cache_create(names->name, 1101 sizes->cs_cachep = kmem_cache_create(names->name,
1096 sizes->cs_size, ARCH_KMALLOC_MINALIGN, 1102 sizes->cs_size,
1097 (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, NULL); 1103 ARCH_KMALLOC_MINALIGN,
1104 (ARCH_KMALLOC_FLAGS
1105 | SLAB_PANIC),
1106 NULL, NULL);
1098 1107
1099 /* Inc off-slab bufctl limit until the ceiling is hit. */ 1108 /* Inc off-slab bufctl limit until the ceiling is hit. */
1100 if (!(OFF_SLAB(sizes->cs_cachep))) { 1109 if (!(OFF_SLAB(sizes->cs_cachep))) {
1101 offslab_limit = sizes->cs_size-sizeof(struct slab); 1110 offslab_limit = sizes->cs_size - sizeof(struct slab);
1102 offslab_limit /= sizeof(kmem_bufctl_t); 1111 offslab_limit /= sizeof(kmem_bufctl_t);
1103 } 1112 }
1104 1113
1105 sizes->cs_dmacachep = kmem_cache_create(names->name_dma, 1114 sizes->cs_dmacachep = kmem_cache_create(names->name_dma,
1106 sizes->cs_size, ARCH_KMALLOC_MINALIGN, 1115 sizes->cs_size,
1107 (ARCH_KMALLOC_FLAGS | SLAB_CACHE_DMA | SLAB_PANIC), 1116 ARCH_KMALLOC_MINALIGN,
1108 NULL, NULL); 1117 (ARCH_KMALLOC_FLAGS |
1118 SLAB_CACHE_DMA |
1119 SLAB_PANIC), NULL,
1120 NULL);
1109 1121
1110 sizes++; 1122 sizes++;
1111 names++; 1123 names++;
1112 } 1124 }
1113 /* 4) Replace the bootstrap head arrays */ 1125 /* 4) Replace the bootstrap head arrays */
1114 { 1126 {
1115 void * ptr; 1127 void *ptr;
1116 1128
1117 ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); 1129 ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
1118 1130
1119 local_irq_disable(); 1131 local_irq_disable();
1120 BUG_ON(ac_data(&cache_cache) != &initarray_cache.cache); 1132 BUG_ON(ac_data(&cache_cache) != &initarray_cache.cache);
1121 memcpy(ptr, ac_data(&cache_cache), 1133 memcpy(ptr, ac_data(&cache_cache),
1122 sizeof(struct arraycache_init)); 1134 sizeof(struct arraycache_init));
1123 cache_cache.array[smp_processor_id()] = ptr; 1135 cache_cache.array[smp_processor_id()] = ptr;
1124 local_irq_enable(); 1136 local_irq_enable();
1125 1137
@@ -1127,11 +1139,11 @@ void __init kmem_cache_init(void)
1127 1139
1128 local_irq_disable(); 1140 local_irq_disable();
1129 BUG_ON(ac_data(malloc_sizes[INDEX_AC].cs_cachep) 1141 BUG_ON(ac_data(malloc_sizes[INDEX_AC].cs_cachep)
1130 != &initarray_generic.cache); 1142 != &initarray_generic.cache);
1131 memcpy(ptr, ac_data(malloc_sizes[INDEX_AC].cs_cachep), 1143 memcpy(ptr, ac_data(malloc_sizes[INDEX_AC].cs_cachep),
1132 sizeof(struct arraycache_init)); 1144 sizeof(struct arraycache_init));
1133 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = 1145 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
1134 ptr; 1146 ptr;
1135 local_irq_enable(); 1147 local_irq_enable();
1136 } 1148 }
1137 /* 5) Replace the bootstrap kmem_list3's */ 1149 /* 5) Replace the bootstrap kmem_list3's */
@@ -1139,16 +1151,16 @@ void __init kmem_cache_init(void)
1139 int node; 1151 int node;
1140 /* Replace the static kmem_list3 structures for the boot cpu */ 1152 /* Replace the static kmem_list3 structures for the boot cpu */
1141 init_list(&cache_cache, &initkmem_list3[CACHE_CACHE], 1153 init_list(&cache_cache, &initkmem_list3[CACHE_CACHE],
1142 numa_node_id()); 1154 numa_node_id());
1143 1155
1144 for_each_online_node(node) { 1156 for_each_online_node(node) {
1145 init_list(malloc_sizes[INDEX_AC].cs_cachep, 1157 init_list(malloc_sizes[INDEX_AC].cs_cachep,
1146 &initkmem_list3[SIZE_AC+node], node); 1158 &initkmem_list3[SIZE_AC + node], node);
1147 1159
1148 if (INDEX_AC != INDEX_L3) { 1160 if (INDEX_AC != INDEX_L3) {
1149 init_list(malloc_sizes[INDEX_L3].cs_cachep, 1161 init_list(malloc_sizes[INDEX_L3].cs_cachep,
1150 &initkmem_list3[SIZE_L3+node], 1162 &initkmem_list3[SIZE_L3 + node],
1151 node); 1163 node);
1152 } 1164 }
1153 } 1165 }
1154 } 1166 }
@@ -1158,7 +1170,7 @@ void __init kmem_cache_init(void)
1158 kmem_cache_t *cachep; 1170 kmem_cache_t *cachep;
1159 down(&cache_chain_sem); 1171 down(&cache_chain_sem);
1160 list_for_each_entry(cachep, &cache_chain, next) 1172 list_for_each_entry(cachep, &cache_chain, next)
1161 enable_cpucache(cachep); 1173 enable_cpucache(cachep);
1162 up(&cache_chain_sem); 1174 up(&cache_chain_sem);
1163 } 1175 }
1164 1176
@@ -1184,7 +1196,7 @@ static int __init cpucache_init(void)
1184 * pages to gfp. 1196 * pages to gfp.
1185 */ 1197 */
1186 for_each_online_cpu(cpu) 1198 for_each_online_cpu(cpu)
1187 start_cpu_timer(cpu); 1199 start_cpu_timer(cpu);
1188 1200
1189 return 0; 1201 return 0;
1190} 1202}
@@ -1226,7 +1238,7 @@ static void *kmem_getpages(kmem_cache_t *cachep, gfp_t flags, int nodeid)
1226 */ 1238 */
1227static void kmem_freepages(kmem_cache_t *cachep, void *addr) 1239static void kmem_freepages(kmem_cache_t *cachep, void *addr)
1228{ 1240{
1229 unsigned long i = (1<<cachep->gfporder); 1241 unsigned long i = (1 << cachep->gfporder);
1230 struct page *page = virt_to_page(addr); 1242 struct page *page = virt_to_page(addr);
1231 const unsigned long nr_freed = i; 1243 const unsigned long nr_freed = i;
1232 1244
@@ -1239,13 +1251,13 @@ static void kmem_freepages(kmem_cache_t *cachep, void *addr)
1239 if (current->reclaim_state) 1251 if (current->reclaim_state)
1240 current->reclaim_state->reclaimed_slab += nr_freed; 1252 current->reclaim_state->reclaimed_slab += nr_freed;
1241 free_pages((unsigned long)addr, cachep->gfporder); 1253 free_pages((unsigned long)addr, cachep->gfporder);
1242 if (cachep->flags & SLAB_RECLAIM_ACCOUNT) 1254 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1243 atomic_sub(1<<cachep->gfporder, &slab_reclaim_pages); 1255 atomic_sub(1 << cachep->gfporder, &slab_reclaim_pages);
1244} 1256}
1245 1257
1246static void kmem_rcu_free(struct rcu_head *head) 1258static void kmem_rcu_free(struct rcu_head *head)
1247{ 1259{
1248 struct slab_rcu *slab_rcu = (struct slab_rcu *) head; 1260 struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
1249 kmem_cache_t *cachep = slab_rcu->cachep; 1261 kmem_cache_t *cachep = slab_rcu->cachep;
1250 1262
1251 kmem_freepages(cachep, slab_rcu->addr); 1263 kmem_freepages(cachep, slab_rcu->addr);
@@ -1257,19 +1269,19 @@ static void kmem_rcu_free(struct rcu_head *head)
1257 1269
1258#ifdef CONFIG_DEBUG_PAGEALLOC 1270#ifdef CONFIG_DEBUG_PAGEALLOC
1259static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr, 1271static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr,
1260 unsigned long caller) 1272 unsigned long caller)
1261{ 1273{
1262 int size = obj_reallen(cachep); 1274 int size = obj_reallen(cachep);
1263 1275
1264 addr = (unsigned long *)&((char*)addr)[obj_dbghead(cachep)]; 1276 addr = (unsigned long *)&((char *)addr)[obj_dbghead(cachep)];
1265 1277
1266 if (size < 5*sizeof(unsigned long)) 1278 if (size < 5 * sizeof(unsigned long))
1267 return; 1279 return;
1268 1280
1269 *addr++=0x12345678; 1281 *addr++ = 0x12345678;
1270 *addr++=caller; 1282 *addr++ = caller;
1271 *addr++=smp_processor_id(); 1283 *addr++ = smp_processor_id();
1272 size -= 3*sizeof(unsigned long); 1284 size -= 3 * sizeof(unsigned long);
1273 { 1285 {
1274 unsigned long *sptr = &caller; 1286 unsigned long *sptr = &caller;
1275 unsigned long svalue; 1287 unsigned long svalue;
@@ -1277,7 +1289,7 @@ static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr,
1277 while (!kstack_end(sptr)) { 1289 while (!kstack_end(sptr)) {
1278 svalue = *sptr++; 1290 svalue = *sptr++;
1279 if (kernel_text_address(svalue)) { 1291 if (kernel_text_address(svalue)) {
1280 *addr++=svalue; 1292 *addr++ = svalue;
1281 size -= sizeof(unsigned long); 1293 size -= sizeof(unsigned long);
1282 if (size <= sizeof(unsigned long)) 1294 if (size <= sizeof(unsigned long))
1283 break; 1295 break;
@@ -1285,25 +1297,25 @@ static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr,
1285 } 1297 }
1286 1298
1287 } 1299 }
1288 *addr++=0x87654321; 1300 *addr++ = 0x87654321;
1289} 1301}
1290#endif 1302#endif
1291 1303
1292static void poison_obj(kmem_cache_t *cachep, void *addr, unsigned char val) 1304static void poison_obj(kmem_cache_t *cachep, void *addr, unsigned char val)
1293{ 1305{
1294 int size = obj_reallen(cachep); 1306 int size = obj_reallen(cachep);
1295 addr = &((char*)addr)[obj_dbghead(cachep)]; 1307 addr = &((char *)addr)[obj_dbghead(cachep)];
1296 1308
1297 memset(addr, val, size); 1309 memset(addr, val, size);
1298 *(unsigned char *)(addr+size-1) = POISON_END; 1310 *(unsigned char *)(addr + size - 1) = POISON_END;
1299} 1311}
1300 1312
1301static void dump_line(char *data, int offset, int limit) 1313static void dump_line(char *data, int offset, int limit)
1302{ 1314{
1303 int i; 1315 int i;
1304 printk(KERN_ERR "%03x:", offset); 1316 printk(KERN_ERR "%03x:", offset);
1305 for (i=0;i<limit;i++) { 1317 for (i = 0; i < limit; i++) {
1306 printk(" %02x", (unsigned char)data[offset+i]); 1318 printk(" %02x", (unsigned char)data[offset + i]);
1307 } 1319 }
1308 printk("\n"); 1320 printk("\n");
1309} 1321}
@@ -1318,24 +1330,24 @@ static void print_objinfo(kmem_cache_t *cachep, void *objp, int lines)
1318 1330
1319 if (cachep->flags & SLAB_RED_ZONE) { 1331 if (cachep->flags & SLAB_RED_ZONE) {
1320 printk(KERN_ERR "Redzone: 0x%lx/0x%lx.\n", 1332 printk(KERN_ERR "Redzone: 0x%lx/0x%lx.\n",
1321 *dbg_redzone1(cachep, objp), 1333 *dbg_redzone1(cachep, objp),
1322 *dbg_redzone2(cachep, objp)); 1334 *dbg_redzone2(cachep, objp));
1323 } 1335 }
1324 1336
1325 if (cachep->flags & SLAB_STORE_USER) { 1337 if (cachep->flags & SLAB_STORE_USER) {
1326 printk(KERN_ERR "Last user: [<%p>]", 1338 printk(KERN_ERR "Last user: [<%p>]",
1327 *dbg_userword(cachep, objp)); 1339 *dbg_userword(cachep, objp));
1328 print_symbol("(%s)", 1340 print_symbol("(%s)",
1329 (unsigned long)*dbg_userword(cachep, objp)); 1341 (unsigned long)*dbg_userword(cachep, objp));
1330 printk("\n"); 1342 printk("\n");
1331 } 1343 }
1332 realobj = (char*)objp+obj_dbghead(cachep); 1344 realobj = (char *)objp + obj_dbghead(cachep);
1333 size = obj_reallen(cachep); 1345 size = obj_reallen(cachep);
1334 for (i=0; i<size && lines;i+=16, lines--) { 1346 for (i = 0; i < size && lines; i += 16, lines--) {
1335 int limit; 1347 int limit;
1336 limit = 16; 1348 limit = 16;
1337 if (i+limit > size) 1349 if (i + limit > size)
1338 limit = size-i; 1350 limit = size - i;
1339 dump_line(realobj, i, limit); 1351 dump_line(realobj, i, limit);
1340 } 1352 }
1341} 1353}
@@ -1346,27 +1358,28 @@ static void check_poison_obj(kmem_cache_t *cachep, void *objp)
1346 int size, i; 1358 int size, i;
1347 int lines = 0; 1359 int lines = 0;
1348 1360
1349 realobj = (char*)objp+obj_dbghead(cachep); 1361 realobj = (char *)objp + obj_dbghead(cachep);
1350 size = obj_reallen(cachep); 1362 size = obj_reallen(cachep);
1351 1363
1352 for (i=0;i<size;i++) { 1364 for (i = 0; i < size; i++) {
1353 char exp = POISON_FREE; 1365 char exp = POISON_FREE;
1354 if (i == size-1) 1366 if (i == size - 1)
1355 exp = POISON_END; 1367 exp = POISON_END;
1356 if (realobj[i] != exp) { 1368 if (realobj[i] != exp) {
1357 int limit; 1369 int limit;
1358 /* Mismatch ! */ 1370 /* Mismatch ! */
1359 /* Print header */ 1371 /* Print header */
1360 if (lines == 0) { 1372 if (lines == 0) {
1361 printk(KERN_ERR "Slab corruption: start=%p, len=%d\n", 1373 printk(KERN_ERR
1362 realobj, size); 1374 "Slab corruption: start=%p, len=%d\n",
1375 realobj, size);
1363 print_objinfo(cachep, objp, 0); 1376 print_objinfo(cachep, objp, 0);
1364 } 1377 }
1365 /* Hexdump the affected line */ 1378 /* Hexdump the affected line */
1366 i = (i/16)*16; 1379 i = (i / 16) * 16;
1367 limit = 16; 1380 limit = 16;
1368 if (i+limit > size) 1381 if (i + limit > size)
1369 limit = size-i; 1382 limit = size - i;
1370 dump_line(realobj, i, limit); 1383 dump_line(realobj, i, limit);
1371 i += 16; 1384 i += 16;
1372 lines++; 1385 lines++;
@@ -1382,19 +1395,19 @@ static void check_poison_obj(kmem_cache_t *cachep, void *objp)
1382 struct slab *slabp = page_get_slab(virt_to_page(objp)); 1395 struct slab *slabp = page_get_slab(virt_to_page(objp));
1383 int objnr; 1396 int objnr;
1384 1397
1385 objnr = (objp-slabp->s_mem)/cachep->objsize; 1398 objnr = (objp - slabp->s_mem) / cachep->objsize;
1386 if (objnr) { 1399 if (objnr) {
1387 objp = slabp->s_mem+(objnr-1)*cachep->objsize; 1400 objp = slabp->s_mem + (objnr - 1) * cachep->objsize;
1388 realobj = (char*)objp+obj_dbghead(cachep); 1401 realobj = (char *)objp + obj_dbghead(cachep);
1389 printk(KERN_ERR "Prev obj: start=%p, len=%d\n", 1402 printk(KERN_ERR "Prev obj: start=%p, len=%d\n",
1390 realobj, size); 1403 realobj, size);
1391 print_objinfo(cachep, objp, 2); 1404 print_objinfo(cachep, objp, 2);
1392 } 1405 }
1393 if (objnr+1 < cachep->num) { 1406 if (objnr + 1 < cachep->num) {
1394 objp = slabp->s_mem+(objnr+1)*cachep->objsize; 1407 objp = slabp->s_mem + (objnr + 1) * cachep->objsize;
1395 realobj = (char*)objp+obj_dbghead(cachep); 1408 realobj = (char *)objp + obj_dbghead(cachep);
1396 printk(KERN_ERR "Next obj: start=%p, len=%d\n", 1409 printk(KERN_ERR "Next obj: start=%p, len=%d\n",
1397 realobj, size); 1410 realobj, size);
1398 print_objinfo(cachep, objp, 2); 1411 print_objinfo(cachep, objp, 2);
1399 } 1412 }
1400 } 1413 }
@@ -1405,7 +1418,7 @@ static void check_poison_obj(kmem_cache_t *cachep, void *objp)
1405 * Before calling the slab must have been unlinked from the cache. 1418 * Before calling the slab must have been unlinked from the cache.
1406 * The cache-lock is not held/needed. 1419 * The cache-lock is not held/needed.
1407 */ 1420 */
1408static void slab_destroy (kmem_cache_t *cachep, struct slab *slabp) 1421static void slab_destroy(kmem_cache_t *cachep, struct slab *slabp)
1409{ 1422{
1410 void *addr = slabp->s_mem - slabp->colouroff; 1423 void *addr = slabp->s_mem - slabp->colouroff;
1411 1424
@@ -1416,8 +1429,11 @@ static void slab_destroy (kmem_cache_t *cachep, struct slab *slabp)
1416 1429
1417 if (cachep->flags & SLAB_POISON) { 1430 if (cachep->flags & SLAB_POISON) {
1418#ifdef CONFIG_DEBUG_PAGEALLOC 1431#ifdef CONFIG_DEBUG_PAGEALLOC
1419 if ((cachep->objsize%PAGE_SIZE)==0 && OFF_SLAB(cachep)) 1432 if ((cachep->objsize % PAGE_SIZE) == 0
1420 kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE,1); 1433 && OFF_SLAB(cachep))
1434 kernel_map_pages(virt_to_page(objp),
1435 cachep->objsize / PAGE_SIZE,
1436 1);
1421 else 1437 else
1422 check_poison_obj(cachep, objp); 1438 check_poison_obj(cachep, objp);
1423#else 1439#else
@@ -1427,20 +1443,20 @@ static void slab_destroy (kmem_cache_t *cachep, struct slab *slabp)
1427 if (cachep->flags & SLAB_RED_ZONE) { 1443 if (cachep->flags & SLAB_RED_ZONE) {
1428 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE) 1444 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
1429 slab_error(cachep, "start of a freed object " 1445 slab_error(cachep, "start of a freed object "
1430 "was overwritten"); 1446 "was overwritten");
1431 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE) 1447 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
1432 slab_error(cachep, "end of a freed object " 1448 slab_error(cachep, "end of a freed object "
1433 "was overwritten"); 1449 "was overwritten");
1434 } 1450 }
1435 if (cachep->dtor && !(cachep->flags & SLAB_POISON)) 1451 if (cachep->dtor && !(cachep->flags & SLAB_POISON))
1436 (cachep->dtor)(objp+obj_dbghead(cachep), cachep, 0); 1452 (cachep->dtor) (objp + obj_dbghead(cachep), cachep, 0);
1437 } 1453 }
1438#else 1454#else
1439 if (cachep->dtor) { 1455 if (cachep->dtor) {
1440 int i; 1456 int i;
1441 for (i = 0; i < cachep->num; i++) { 1457 for (i = 0; i < cachep->num; i++) {
1442 void* objp = slabp->s_mem+cachep->objsize*i; 1458 void *objp = slabp->s_mem + cachep->objsize * i;
1443 (cachep->dtor)(objp, cachep, 0); 1459 (cachep->dtor) (objp, cachep, 0);
1444 } 1460 }
1445 } 1461 }
1446#endif 1462#endif
@@ -1448,7 +1464,7 @@ static void slab_destroy (kmem_cache_t *cachep, struct slab *slabp)
1448 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) { 1464 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
1449 struct slab_rcu *slab_rcu; 1465 struct slab_rcu *slab_rcu;
1450 1466
1451 slab_rcu = (struct slab_rcu *) slabp; 1467 slab_rcu = (struct slab_rcu *)slabp;
1452 slab_rcu->cachep = cachep; 1468 slab_rcu->cachep = cachep;
1453 slab_rcu->addr = addr; 1469 slab_rcu->addr = addr;
1454 call_rcu(&slab_rcu->head, kmem_rcu_free); 1470 call_rcu(&slab_rcu->head, kmem_rcu_free);
@@ -1466,11 +1482,58 @@ static inline void set_up_list3s(kmem_cache_t *cachep, int index)
1466 int node; 1482 int node;
1467 1483
1468 for_each_online_node(node) { 1484 for_each_online_node(node) {
1469 cachep->nodelists[node] = &initkmem_list3[index+node]; 1485 cachep->nodelists[node] = &initkmem_list3[index + node];
1470 cachep->nodelists[node]->next_reap = jiffies + 1486 cachep->nodelists[node]->next_reap = jiffies +
1471 REAPTIMEOUT_LIST3 + 1487 REAPTIMEOUT_LIST3 +
1472 ((unsigned long)cachep)%REAPTIMEOUT_LIST3; 1488 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1489 }
1490}
1491
1492/**
1493 * calculate_slab_order - calculate size (page order) of slabs and the number
1494 * of objects per slab.
1495 *
1496 * This could be made much more intelligent. For now, try to avoid using
1497 * high order pages for slabs. When the gfp() functions are more friendly
1498 * towards high-order requests, this should be changed.
1499 */
1500static inline size_t calculate_slab_order(kmem_cache_t *cachep, size_t size,
1501 size_t align, gfp_t flags)
1502{
1503 size_t left_over = 0;
1504
1505 for (;; cachep->gfporder++) {
1506 unsigned int num;
1507 size_t remainder;
1508
1509 if (cachep->gfporder > MAX_GFP_ORDER) {
1510 cachep->num = 0;
1511 break;
1512 }
1513
1514 cache_estimate(cachep->gfporder, size, align, flags,
1515 &remainder, &num);
1516 if (!num)
1517 continue;
1518 /* More than offslab_limit objects will cause problems */
1519 if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit)
1520 break;
1521
1522 cachep->num = num;
1523 left_over = remainder;
1524
1525 /*
1526 * Large number of objects is good, but very large slabs are
1527 * currently bad for the gfp()s.
1528 */
1529 if (cachep->gfporder >= slab_break_gfp_order)
1530 break;
1531
1532 if ((left_over * 8) <= (PAGE_SIZE << cachep->gfporder))
1533 /* Acceptable internal fragmentation */
1534 break;
1473 } 1535 }
1536 return left_over;
1474} 1537}
1475 1538
1476/** 1539/**
@@ -1519,14 +1582,13 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1519 * Sanity checks... these are all serious usage bugs. 1582 * Sanity checks... these are all serious usage bugs.
1520 */ 1583 */
1521 if ((!name) || 1584 if ((!name) ||
1522 in_interrupt() || 1585 in_interrupt() ||
1523 (size < BYTES_PER_WORD) || 1586 (size < BYTES_PER_WORD) ||
1524 (size > (1<<MAX_OBJ_ORDER)*PAGE_SIZE) || 1587 (size > (1 << MAX_OBJ_ORDER) * PAGE_SIZE) || (dtor && !ctor)) {
1525 (dtor && !ctor)) { 1588 printk(KERN_ERR "%s: Early error in slab %s\n",
1526 printk(KERN_ERR "%s: Early error in slab %s\n", 1589 __FUNCTION__, name);
1527 __FUNCTION__, name); 1590 BUG();
1528 BUG(); 1591 }
1529 }
1530 1592
1531 down(&cache_chain_sem); 1593 down(&cache_chain_sem);
1532 1594
@@ -1546,11 +1608,11 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1546 set_fs(old_fs); 1608 set_fs(old_fs);
1547 if (res) { 1609 if (res) {
1548 printk("SLAB: cache with size %d has lost its name\n", 1610 printk("SLAB: cache with size %d has lost its name\n",
1549 pc->objsize); 1611 pc->objsize);
1550 continue; 1612 continue;
1551 } 1613 }
1552 1614
1553 if (!strcmp(pc->name,name)) { 1615 if (!strcmp(pc->name, name)) {
1554 printk("kmem_cache_create: duplicate cache %s\n", name); 1616 printk("kmem_cache_create: duplicate cache %s\n", name);
1555 dump_stack(); 1617 dump_stack();
1556 goto oops; 1618 goto oops;
@@ -1562,10 +1624,9 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1562 if ((flags & SLAB_DEBUG_INITIAL) && !ctor) { 1624 if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
1563 /* No constructor, but inital state check requested */ 1625 /* No constructor, but inital state check requested */
1564 printk(KERN_ERR "%s: No con, but init state check " 1626 printk(KERN_ERR "%s: No con, but init state check "
1565 "requested - %s\n", __FUNCTION__, name); 1627 "requested - %s\n", __FUNCTION__, name);
1566 flags &= ~SLAB_DEBUG_INITIAL; 1628 flags &= ~SLAB_DEBUG_INITIAL;
1567 } 1629 }
1568
1569#if FORCED_DEBUG 1630#if FORCED_DEBUG
1570 /* 1631 /*
1571 * Enable redzoning and last user accounting, except for caches with 1632 * Enable redzoning and last user accounting, except for caches with
@@ -1573,8 +1634,9 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1573 * above the next power of two: caches with object sizes just above a 1634 * above the next power of two: caches with object sizes just above a
1574 * power of two have a significant amount of internal fragmentation. 1635 * power of two have a significant amount of internal fragmentation.
1575 */ 1636 */
1576 if ((size < 4096 || fls(size-1) == fls(size-1+3*BYTES_PER_WORD))) 1637 if ((size < 4096
1577 flags |= SLAB_RED_ZONE|SLAB_STORE_USER; 1638 || fls(size - 1) == fls(size - 1 + 3 * BYTES_PER_WORD)))
1639 flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
1578 if (!(flags & SLAB_DESTROY_BY_RCU)) 1640 if (!(flags & SLAB_DESTROY_BY_RCU))
1579 flags |= SLAB_POISON; 1641 flags |= SLAB_POISON;
1580#endif 1642#endif
@@ -1595,9 +1657,9 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1595 * unaligned accesses for some archs when redzoning is used, and makes 1657 * unaligned accesses for some archs when redzoning is used, and makes
1596 * sure any on-slab bufctl's are also correctly aligned. 1658 * sure any on-slab bufctl's are also correctly aligned.
1597 */ 1659 */
1598 if (size & (BYTES_PER_WORD-1)) { 1660 if (size & (BYTES_PER_WORD - 1)) {
1599 size += (BYTES_PER_WORD-1); 1661 size += (BYTES_PER_WORD - 1);
1600 size &= ~(BYTES_PER_WORD-1); 1662 size &= ~(BYTES_PER_WORD - 1);
1601 } 1663 }
1602 1664
1603 /* calculate out the final buffer alignment: */ 1665 /* calculate out the final buffer alignment: */
@@ -1608,7 +1670,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1608 * objects into one cacheline. 1670 * objects into one cacheline.
1609 */ 1671 */
1610 ralign = cache_line_size(); 1672 ralign = cache_line_size();
1611 while (size <= ralign/2) 1673 while (size <= ralign / 2)
1612 ralign /= 2; 1674 ralign /= 2;
1613 } else { 1675 } else {
1614 ralign = BYTES_PER_WORD; 1676 ralign = BYTES_PER_WORD;
@@ -1617,13 +1679,13 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1617 if (ralign < ARCH_SLAB_MINALIGN) { 1679 if (ralign < ARCH_SLAB_MINALIGN) {
1618 ralign = ARCH_SLAB_MINALIGN; 1680 ralign = ARCH_SLAB_MINALIGN;
1619 if (ralign > BYTES_PER_WORD) 1681 if (ralign > BYTES_PER_WORD)
1620 flags &= ~(SLAB_RED_ZONE|SLAB_STORE_USER); 1682 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
1621 } 1683 }
1622 /* 3) caller mandated alignment: disables debug if necessary */ 1684 /* 3) caller mandated alignment: disables debug if necessary */
1623 if (ralign < align) { 1685 if (ralign < align) {
1624 ralign = align; 1686 ralign = align;
1625 if (ralign > BYTES_PER_WORD) 1687 if (ralign > BYTES_PER_WORD)
1626 flags &= ~(SLAB_RED_ZONE|SLAB_STORE_USER); 1688 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
1627 } 1689 }
1628 /* 4) Store it. Note that the debug code below can reduce 1690 /* 4) Store it. Note that the debug code below can reduce
1629 * the alignment to BYTES_PER_WORD. 1691 * the alignment to BYTES_PER_WORD.
@@ -1645,7 +1707,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1645 1707
1646 /* add space for red zone words */ 1708 /* add space for red zone words */
1647 cachep->dbghead += BYTES_PER_WORD; 1709 cachep->dbghead += BYTES_PER_WORD;
1648 size += 2*BYTES_PER_WORD; 1710 size += 2 * BYTES_PER_WORD;
1649 } 1711 }
1650 if (flags & SLAB_STORE_USER) { 1712 if (flags & SLAB_STORE_USER) {
1651 /* user store requires word alignment and 1713 /* user store requires word alignment and
@@ -1656,7 +1718,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1656 size += BYTES_PER_WORD; 1718 size += BYTES_PER_WORD;
1657 } 1719 }
1658#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) 1720#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
1659 if (size >= malloc_sizes[INDEX_L3+1].cs_size && cachep->reallen > cache_line_size() && size < PAGE_SIZE) { 1721 if (size >= malloc_sizes[INDEX_L3 + 1].cs_size
1722 && cachep->reallen > cache_line_size() && size < PAGE_SIZE) {
1660 cachep->dbghead += PAGE_SIZE - size; 1723 cachep->dbghead += PAGE_SIZE - size;
1661 size = PAGE_SIZE; 1724 size = PAGE_SIZE;
1662 } 1725 }
@@ -1664,7 +1727,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1664#endif 1727#endif
1665 1728
1666 /* Determine if the slab management is 'on' or 'off' slab. */ 1729 /* Determine if the slab management is 'on' or 'off' slab. */
1667 if (size >= (PAGE_SIZE>>3)) 1730 if (size >= (PAGE_SIZE >> 3))
1668 /* 1731 /*
1669 * Size is large, assume best to place the slab management obj 1732 * Size is large, assume best to place the slab management obj
1670 * off-slab (should allow better packing of objs). 1733 * off-slab (should allow better packing of objs).
@@ -1681,47 +1744,9 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1681 */ 1744 */
1682 cachep->gfporder = 0; 1745 cachep->gfporder = 0;
1683 cache_estimate(cachep->gfporder, size, align, flags, 1746 cache_estimate(cachep->gfporder, size, align, flags,
1684 &left_over, &cachep->num); 1747 &left_over, &cachep->num);
1685 } else { 1748 } else
1686 /* 1749 left_over = calculate_slab_order(cachep, size, align, flags);
1687 * Calculate size (in pages) of slabs, and the num of objs per
1688 * slab. This could be made much more intelligent. For now,
1689 * try to avoid using high page-orders for slabs. When the
1690 * gfp() funcs are more friendly towards high-order requests,
1691 * this should be changed.
1692 */
1693 do {
1694 unsigned int break_flag = 0;
1695cal_wastage:
1696 cache_estimate(cachep->gfporder, size, align, flags,
1697 &left_over, &cachep->num);
1698 if (break_flag)
1699 break;
1700 if (cachep->gfporder >= MAX_GFP_ORDER)
1701 break;
1702 if (!cachep->num)
1703 goto next;
1704 if (flags & CFLGS_OFF_SLAB &&
1705 cachep->num > offslab_limit) {
1706 /* This num of objs will cause problems. */
1707 cachep->gfporder--;
1708 break_flag++;
1709 goto cal_wastage;
1710 }
1711
1712 /*
1713 * Large num of objs is good, but v. large slabs are
1714 * currently bad for the gfp()s.
1715 */
1716 if (cachep->gfporder >= slab_break_gfp_order)
1717 break;
1718
1719 if ((left_over*8) <= (PAGE_SIZE<<cachep->gfporder))
1720 break; /* Acceptable internal fragmentation. */
1721next:
1722 cachep->gfporder++;
1723 } while (1);
1724 }
1725 1750
1726 if (!cachep->num) { 1751 if (!cachep->num) {
1727 printk("kmem_cache_create: couldn't create cache %s.\n", name); 1752 printk("kmem_cache_create: couldn't create cache %s.\n", name);
@@ -1729,8 +1754,8 @@ next:
1729 cachep = NULL; 1754 cachep = NULL;
1730 goto oops; 1755 goto oops;
1731 } 1756 }
1732 slab_size = ALIGN(cachep->num*sizeof(kmem_bufctl_t) 1757 slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
1733 + sizeof(struct slab), align); 1758 + sizeof(struct slab), align);
1734 1759
1735 /* 1760 /*
1736 * If the slab has been placed off-slab, and we have enough space then 1761 * If the slab has been placed off-slab, and we have enough space then
@@ -1743,14 +1768,15 @@ next:
1743 1768
1744 if (flags & CFLGS_OFF_SLAB) { 1769 if (flags & CFLGS_OFF_SLAB) {
1745 /* really off slab. No need for manual alignment */ 1770 /* really off slab. No need for manual alignment */
1746 slab_size = cachep->num*sizeof(kmem_bufctl_t)+sizeof(struct slab); 1771 slab_size =
1772 cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
1747 } 1773 }
1748 1774
1749 cachep->colour_off = cache_line_size(); 1775 cachep->colour_off = cache_line_size();
1750 /* Offset must be a multiple of the alignment. */ 1776 /* Offset must be a multiple of the alignment. */
1751 if (cachep->colour_off < align) 1777 if (cachep->colour_off < align)
1752 cachep->colour_off = align; 1778 cachep->colour_off = align;
1753 cachep->colour = left_over/cachep->colour_off; 1779 cachep->colour = left_over / cachep->colour_off;
1754 cachep->slab_size = slab_size; 1780 cachep->slab_size = slab_size;
1755 cachep->flags = flags; 1781 cachep->flags = flags;
1756 cachep->gfpflags = 0; 1782 cachep->gfpflags = 0;
@@ -1777,7 +1803,7 @@ next:
1777 * the creation of further caches will BUG(). 1803 * the creation of further caches will BUG().
1778 */ 1804 */
1779 cachep->array[smp_processor_id()] = 1805 cachep->array[smp_processor_id()] =
1780 &initarray_generic.cache; 1806 &initarray_generic.cache;
1781 1807
1782 /* If the cache that's used by 1808 /* If the cache that's used by
1783 * kmalloc(sizeof(kmem_list3)) is the first cache, 1809 * kmalloc(sizeof(kmem_list3)) is the first cache,
@@ -1791,8 +1817,7 @@ next:
1791 g_cpucache_up = PARTIAL_AC; 1817 g_cpucache_up = PARTIAL_AC;
1792 } else { 1818 } else {
1793 cachep->array[smp_processor_id()] = 1819 cachep->array[smp_processor_id()] =
1794 kmalloc(sizeof(struct arraycache_init), 1820 kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
1795 GFP_KERNEL);
1796 1821
1797 if (g_cpucache_up == PARTIAL_AC) { 1822 if (g_cpucache_up == PARTIAL_AC) {
1798 set_up_list3s(cachep, SIZE_L3); 1823 set_up_list3s(cachep, SIZE_L3);
@@ -1802,16 +1827,18 @@ next:
1802 for_each_online_node(node) { 1827 for_each_online_node(node) {
1803 1828
1804 cachep->nodelists[node] = 1829 cachep->nodelists[node] =
1805 kmalloc_node(sizeof(struct kmem_list3), 1830 kmalloc_node(sizeof
1806 GFP_KERNEL, node); 1831 (struct kmem_list3),
1832 GFP_KERNEL, node);
1807 BUG_ON(!cachep->nodelists[node]); 1833 BUG_ON(!cachep->nodelists[node]);
1808 kmem_list3_init(cachep->nodelists[node]); 1834 kmem_list3_init(cachep->
1835 nodelists[node]);
1809 } 1836 }
1810 } 1837 }
1811 } 1838 }
1812 cachep->nodelists[numa_node_id()]->next_reap = 1839 cachep->nodelists[numa_node_id()]->next_reap =
1813 jiffies + REAPTIMEOUT_LIST3 + 1840 jiffies + REAPTIMEOUT_LIST3 +
1814 ((unsigned long)cachep)%REAPTIMEOUT_LIST3; 1841 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1815 1842
1816 BUG_ON(!ac_data(cachep)); 1843 BUG_ON(!ac_data(cachep));
1817 ac_data(cachep)->avail = 0; 1844 ac_data(cachep)->avail = 0;
@@ -1820,15 +1847,15 @@ next:
1820 ac_data(cachep)->touched = 0; 1847 ac_data(cachep)->touched = 0;
1821 cachep->batchcount = 1; 1848 cachep->batchcount = 1;
1822 cachep->limit = BOOT_CPUCACHE_ENTRIES; 1849 cachep->limit = BOOT_CPUCACHE_ENTRIES;
1823 } 1850 }
1824 1851
1825 /* cache setup completed, link it into the list */ 1852 /* cache setup completed, link it into the list */
1826 list_add(&cachep->next, &cache_chain); 1853 list_add(&cachep->next, &cache_chain);
1827 unlock_cpu_hotplug(); 1854 unlock_cpu_hotplug();
1828oops: 1855 oops:
1829 if (!cachep && (flags & SLAB_PANIC)) 1856 if (!cachep && (flags & SLAB_PANIC))
1830 panic("kmem_cache_create(): failed to create slab `%s'\n", 1857 panic("kmem_cache_create(): failed to create slab `%s'\n",
1831 name); 1858 name);
1832 up(&cache_chain_sem); 1859 up(&cache_chain_sem);
1833 return cachep; 1860 return cachep;
1834} 1861}
@@ -1871,7 +1898,7 @@ static inline void check_spinlock_acquired_node(kmem_cache_t *cachep, int node)
1871/* 1898/*
1872 * Waits for all CPUs to execute func(). 1899 * Waits for all CPUs to execute func().
1873 */ 1900 */
1874static void smp_call_function_all_cpus(void (*func) (void *arg), void *arg) 1901static void smp_call_function_all_cpus(void (*func)(void *arg), void *arg)
1875{ 1902{
1876 check_irq_on(); 1903 check_irq_on();
1877 preempt_disable(); 1904 preempt_disable();
@@ -1886,12 +1913,12 @@ static void smp_call_function_all_cpus(void (*func) (void *arg), void *arg)
1886 preempt_enable(); 1913 preempt_enable();
1887} 1914}
1888 1915
1889static void drain_array_locked(kmem_cache_t* cachep, 1916static void drain_array_locked(kmem_cache_t *cachep, struct array_cache *ac,
1890 struct array_cache *ac, int force, int node); 1917 int force, int node);
1891 1918
1892static void do_drain(void *arg) 1919static void do_drain(void *arg)
1893{ 1920{
1894 kmem_cache_t *cachep = (kmem_cache_t*)arg; 1921 kmem_cache_t *cachep = (kmem_cache_t *) arg;
1895 struct array_cache *ac; 1922 struct array_cache *ac;
1896 int node = numa_node_id(); 1923 int node = numa_node_id();
1897 1924
@@ -1911,7 +1938,7 @@ static void drain_cpu_caches(kmem_cache_t *cachep)
1911 smp_call_function_all_cpus(do_drain, cachep); 1938 smp_call_function_all_cpus(do_drain, cachep);
1912 check_irq_on(); 1939 check_irq_on();
1913 spin_lock_irq(&cachep->spinlock); 1940 spin_lock_irq(&cachep->spinlock);
1914 for_each_online_node(node) { 1941 for_each_online_node(node) {
1915 l3 = cachep->nodelists[node]; 1942 l3 = cachep->nodelists[node];
1916 if (l3) { 1943 if (l3) {
1917 spin_lock(&l3->list_lock); 1944 spin_lock(&l3->list_lock);
@@ -1949,8 +1976,7 @@ static int __node_shrink(kmem_cache_t *cachep, int node)
1949 slab_destroy(cachep, slabp); 1976 slab_destroy(cachep, slabp);
1950 spin_lock_irq(&l3->list_lock); 1977 spin_lock_irq(&l3->list_lock);
1951 } 1978 }
1952 ret = !list_empty(&l3->slabs_full) || 1979 ret = !list_empty(&l3->slabs_full) || !list_empty(&l3->slabs_partial);
1953 !list_empty(&l3->slabs_partial);
1954 return ret; 1980 return ret;
1955} 1981}
1956 1982
@@ -2006,7 +2032,7 @@ EXPORT_SYMBOL(kmem_cache_shrink);
2006 * The caller must guarantee that noone will allocate memory from the cache 2032 * The caller must guarantee that noone will allocate memory from the cache
2007 * during the kmem_cache_destroy(). 2033 * during the kmem_cache_destroy().
2008 */ 2034 */
2009int kmem_cache_destroy(kmem_cache_t * cachep) 2035int kmem_cache_destroy(kmem_cache_t *cachep)
2010{ 2036{
2011 int i; 2037 int i;
2012 struct kmem_list3 *l3; 2038 struct kmem_list3 *l3;
@@ -2028,7 +2054,7 @@ int kmem_cache_destroy(kmem_cache_t * cachep)
2028 if (__cache_shrink(cachep)) { 2054 if (__cache_shrink(cachep)) {
2029 slab_error(cachep, "Can't free all objects"); 2055 slab_error(cachep, "Can't free all objects");
2030 down(&cache_chain_sem); 2056 down(&cache_chain_sem);
2031 list_add(&cachep->next,&cache_chain); 2057 list_add(&cachep->next, &cache_chain);
2032 up(&cache_chain_sem); 2058 up(&cache_chain_sem);
2033 unlock_cpu_hotplug(); 2059 unlock_cpu_hotplug();
2034 return 1; 2060 return 1;
@@ -2038,7 +2064,7 @@ int kmem_cache_destroy(kmem_cache_t * cachep)
2038 synchronize_rcu(); 2064 synchronize_rcu();
2039 2065
2040 for_each_online_cpu(i) 2066 for_each_online_cpu(i)
2041 kfree(cachep->array[i]); 2067 kfree(cachep->array[i]);
2042 2068
2043 /* NUMA: free the list3 structures */ 2069 /* NUMA: free the list3 structures */
2044 for_each_online_node(i) { 2070 for_each_online_node(i) {
@@ -2057,39 +2083,39 @@ int kmem_cache_destroy(kmem_cache_t * cachep)
2057EXPORT_SYMBOL(kmem_cache_destroy); 2083EXPORT_SYMBOL(kmem_cache_destroy);
2058 2084
2059/* Get the memory for a slab management obj. */ 2085/* Get the memory for a slab management obj. */
2060static struct slab* alloc_slabmgmt(kmem_cache_t *cachep, void *objp, 2086static struct slab *alloc_slabmgmt(kmem_cache_t *cachep, void *objp,
2061 int colour_off, gfp_t local_flags) 2087 int colour_off, gfp_t local_flags)
2062{ 2088{
2063 struct slab *slabp; 2089 struct slab *slabp;
2064 2090
2065 if (OFF_SLAB(cachep)) { 2091 if (OFF_SLAB(cachep)) {
2066 /* Slab management obj is off-slab. */ 2092 /* Slab management obj is off-slab. */
2067 slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags); 2093 slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
2068 if (!slabp) 2094 if (!slabp)
2069 return NULL; 2095 return NULL;
2070 } else { 2096 } else {
2071 slabp = objp+colour_off; 2097 slabp = objp + colour_off;
2072 colour_off += cachep->slab_size; 2098 colour_off += cachep->slab_size;
2073 } 2099 }
2074 slabp->inuse = 0; 2100 slabp->inuse = 0;
2075 slabp->colouroff = colour_off; 2101 slabp->colouroff = colour_off;
2076 slabp->s_mem = objp+colour_off; 2102 slabp->s_mem = objp + colour_off;
2077 2103
2078 return slabp; 2104 return slabp;
2079} 2105}
2080 2106
2081static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp) 2107static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
2082{ 2108{
2083 return (kmem_bufctl_t *)(slabp+1); 2109 return (kmem_bufctl_t *) (slabp + 1);
2084} 2110}
2085 2111
2086static void cache_init_objs(kmem_cache_t *cachep, 2112static void cache_init_objs(kmem_cache_t *cachep,
2087 struct slab *slabp, unsigned long ctor_flags) 2113 struct slab *slabp, unsigned long ctor_flags)
2088{ 2114{
2089 int i; 2115 int i;
2090 2116
2091 for (i = 0; i < cachep->num; i++) { 2117 for (i = 0; i < cachep->num; i++) {
2092 void *objp = slabp->s_mem+cachep->objsize*i; 2118 void *objp = slabp->s_mem + cachep->objsize * i;
2093#if DEBUG 2119#if DEBUG
2094 /* need to poison the objs? */ 2120 /* need to poison the objs? */
2095 if (cachep->flags & SLAB_POISON) 2121 if (cachep->flags & SLAB_POISON)
@@ -2107,25 +2133,28 @@ static void cache_init_objs(kmem_cache_t *cachep,
2107 * Otherwise, deadlock. They must also be threaded. 2133 * Otherwise, deadlock. They must also be threaded.
2108 */ 2134 */
2109 if (cachep->ctor && !(cachep->flags & SLAB_POISON)) 2135 if (cachep->ctor && !(cachep->flags & SLAB_POISON))
2110 cachep->ctor(objp+obj_dbghead(cachep), cachep, ctor_flags); 2136 cachep->ctor(objp + obj_dbghead(cachep), cachep,
2137 ctor_flags);
2111 2138
2112 if (cachep->flags & SLAB_RED_ZONE) { 2139 if (cachep->flags & SLAB_RED_ZONE) {
2113 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE) 2140 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
2114 slab_error(cachep, "constructor overwrote the" 2141 slab_error(cachep, "constructor overwrote the"
2115 " end of an object"); 2142 " end of an object");
2116 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE) 2143 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
2117 slab_error(cachep, "constructor overwrote the" 2144 slab_error(cachep, "constructor overwrote the"
2118 " start of an object"); 2145 " start of an object");
2119 } 2146 }
2120 if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep) && cachep->flags & SLAB_POISON) 2147 if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep)
2121 kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 0); 2148 && cachep->flags & SLAB_POISON)
2149 kernel_map_pages(virt_to_page(objp),
2150 cachep->objsize / PAGE_SIZE, 0);
2122#else 2151#else
2123 if (cachep->ctor) 2152 if (cachep->ctor)
2124 cachep->ctor(objp, cachep, ctor_flags); 2153 cachep->ctor(objp, cachep, ctor_flags);
2125#endif 2154#endif
2126 slab_bufctl(slabp)[i] = i+1; 2155 slab_bufctl(slabp)[i] = i + 1;
2127 } 2156 }
2128 slab_bufctl(slabp)[i-1] = BUFCTL_END; 2157 slab_bufctl(slabp)[i - 1] = BUFCTL_END;
2129 slabp->free = 0; 2158 slabp->free = 0;
2130} 2159}
2131 2160
@@ -2161,17 +2190,17 @@ static void set_slab_attr(kmem_cache_t *cachep, struct slab *slabp, void *objp)
2161 */ 2190 */
2162static int cache_grow(kmem_cache_t *cachep, gfp_t flags, int nodeid) 2191static int cache_grow(kmem_cache_t *cachep, gfp_t flags, int nodeid)
2163{ 2192{
2164 struct slab *slabp; 2193 struct slab *slabp;
2165 void *objp; 2194 void *objp;
2166 size_t offset; 2195 size_t offset;
2167 gfp_t local_flags; 2196 gfp_t local_flags;
2168 unsigned long ctor_flags; 2197 unsigned long ctor_flags;
2169 struct kmem_list3 *l3; 2198 struct kmem_list3 *l3;
2170 2199
2171 /* Be lazy and only check for valid flags here, 2200 /* Be lazy and only check for valid flags here,
2172 * keeping it out of the critical path in kmem_cache_alloc(). 2201 * keeping it out of the critical path in kmem_cache_alloc().
2173 */ 2202 */
2174 if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW)) 2203 if (flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW))
2175 BUG(); 2204 BUG();
2176 if (flags & SLAB_NO_GROW) 2205 if (flags & SLAB_NO_GROW)
2177 return 0; 2206 return 0;
@@ -2237,9 +2266,9 @@ static int cache_grow(kmem_cache_t *cachep, gfp_t flags, int nodeid)
2237 l3->free_objects += cachep->num; 2266 l3->free_objects += cachep->num;
2238 spin_unlock(&l3->list_lock); 2267 spin_unlock(&l3->list_lock);
2239 return 1; 2268 return 1;
2240opps1: 2269 opps1:
2241 kmem_freepages(cachep, objp); 2270 kmem_freepages(cachep, objp);
2242failed: 2271 failed:
2243 if (local_flags & __GFP_WAIT) 2272 if (local_flags & __GFP_WAIT)
2244 local_irq_disable(); 2273 local_irq_disable();
2245 return 0; 2274 return 0;
@@ -2259,18 +2288,19 @@ static void kfree_debugcheck(const void *objp)
2259 2288
2260 if (!virt_addr_valid(objp)) { 2289 if (!virt_addr_valid(objp)) {
2261 printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n", 2290 printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n",
2262 (unsigned long)objp); 2291 (unsigned long)objp);
2263 BUG(); 2292 BUG();
2264 } 2293 }
2265 page = virt_to_page(objp); 2294 page = virt_to_page(objp);
2266 if (!PageSlab(page)) { 2295 if (!PageSlab(page)) {
2267 printk(KERN_ERR "kfree_debugcheck: bad ptr %lxh.\n", (unsigned long)objp); 2296 printk(KERN_ERR "kfree_debugcheck: bad ptr %lxh.\n",
2297 (unsigned long)objp);
2268 BUG(); 2298 BUG();
2269 } 2299 }
2270} 2300}
2271 2301
2272static void *cache_free_debugcheck(kmem_cache_t *cachep, void *objp, 2302static void *cache_free_debugcheck(kmem_cache_t *cachep, void *objp,
2273 void *caller) 2303 void *caller)
2274{ 2304{
2275 struct page *page; 2305 struct page *page;
2276 unsigned int objnr; 2306 unsigned int objnr;
@@ -2281,20 +2311,26 @@ static void *cache_free_debugcheck(kmem_cache_t *cachep, void *objp,
2281 page = virt_to_page(objp); 2311 page = virt_to_page(objp);
2282 2312
2283 if (page_get_cache(page) != cachep) { 2313 if (page_get_cache(page) != cachep) {
2284 printk(KERN_ERR "mismatch in kmem_cache_free: expected cache %p, got %p\n", 2314 printk(KERN_ERR
2285 page_get_cache(page),cachep); 2315 "mismatch in kmem_cache_free: expected cache %p, got %p\n",
2316 page_get_cache(page), cachep);
2286 printk(KERN_ERR "%p is %s.\n", cachep, cachep->name); 2317 printk(KERN_ERR "%p is %s.\n", cachep, cachep->name);
2287 printk(KERN_ERR "%p is %s.\n", page_get_cache(page), page_get_cache(page)->name); 2318 printk(KERN_ERR "%p is %s.\n", page_get_cache(page),
2319 page_get_cache(page)->name);
2288 WARN_ON(1); 2320 WARN_ON(1);
2289 } 2321 }
2290 slabp = page_get_slab(page); 2322 slabp = page_get_slab(page);
2291 2323
2292 if (cachep->flags & SLAB_RED_ZONE) { 2324 if (cachep->flags & SLAB_RED_ZONE) {
2293 if (*dbg_redzone1(cachep, objp) != RED_ACTIVE || *dbg_redzone2(cachep, objp) != RED_ACTIVE) { 2325 if (*dbg_redzone1(cachep, objp) != RED_ACTIVE
2294 slab_error(cachep, "double free, or memory outside" 2326 || *dbg_redzone2(cachep, objp) != RED_ACTIVE) {
2295 " object was overwritten"); 2327 slab_error(cachep,
2296 printk(KERN_ERR "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n", 2328 "double free, or memory outside"
2297 objp, *dbg_redzone1(cachep, objp), *dbg_redzone2(cachep, objp)); 2329 " object was overwritten");
2330 printk(KERN_ERR
2331 "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n",
2332 objp, *dbg_redzone1(cachep, objp),
2333 *dbg_redzone2(cachep, objp));
2298 } 2334 }
2299 *dbg_redzone1(cachep, objp) = RED_INACTIVE; 2335 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
2300 *dbg_redzone2(cachep, objp) = RED_INACTIVE; 2336 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
@@ -2302,30 +2338,31 @@ static void *cache_free_debugcheck(kmem_cache_t *cachep, void *objp,
2302 if (cachep->flags & SLAB_STORE_USER) 2338 if (cachep->flags & SLAB_STORE_USER)
2303 *dbg_userword(cachep, objp) = caller; 2339 *dbg_userword(cachep, objp) = caller;
2304 2340
2305 objnr = (objp-slabp->s_mem)/cachep->objsize; 2341 objnr = (objp - slabp->s_mem) / cachep->objsize;
2306 2342
2307 BUG_ON(objnr >= cachep->num); 2343 BUG_ON(objnr >= cachep->num);
2308 BUG_ON(objp != slabp->s_mem + objnr*cachep->objsize); 2344 BUG_ON(objp != slabp->s_mem + objnr * cachep->objsize);
2309 2345
2310 if (cachep->flags & SLAB_DEBUG_INITIAL) { 2346 if (cachep->flags & SLAB_DEBUG_INITIAL) {
2311 /* Need to call the slab's constructor so the 2347 /* Need to call the slab's constructor so the
2312 * caller can perform a verify of its state (debugging). 2348 * caller can perform a verify of its state (debugging).
2313 * Called without the cache-lock held. 2349 * Called without the cache-lock held.
2314 */ 2350 */
2315 cachep->ctor(objp+obj_dbghead(cachep), 2351 cachep->ctor(objp + obj_dbghead(cachep),
2316 cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY); 2352 cachep, SLAB_CTOR_CONSTRUCTOR | SLAB_CTOR_VERIFY);
2317 } 2353 }
2318 if (cachep->flags & SLAB_POISON && cachep->dtor) { 2354 if (cachep->flags & SLAB_POISON && cachep->dtor) {
2319 /* we want to cache poison the object, 2355 /* we want to cache poison the object,
2320 * call the destruction callback 2356 * call the destruction callback
2321 */ 2357 */
2322 cachep->dtor(objp+obj_dbghead(cachep), cachep, 0); 2358 cachep->dtor(objp + obj_dbghead(cachep), cachep, 0);
2323 } 2359 }
2324 if (cachep->flags & SLAB_POISON) { 2360 if (cachep->flags & SLAB_POISON) {
2325#ifdef CONFIG_DEBUG_PAGEALLOC 2361#ifdef CONFIG_DEBUG_PAGEALLOC
2326 if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) { 2362 if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) {
2327 store_stackinfo(cachep, objp, (unsigned long)caller); 2363 store_stackinfo(cachep, objp, (unsigned long)caller);
2328 kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 0); 2364 kernel_map_pages(virt_to_page(objp),
2365 cachep->objsize / PAGE_SIZE, 0);
2329 } else { 2366 } else {
2330 poison_obj(cachep, objp, POISON_FREE); 2367 poison_obj(cachep, objp, POISON_FREE);
2331 } 2368 }
@@ -2340,7 +2377,7 @@ static void check_slabp(kmem_cache_t *cachep, struct slab *slabp)
2340{ 2377{
2341 kmem_bufctl_t i; 2378 kmem_bufctl_t i;
2342 int entries = 0; 2379 int entries = 0;
2343 2380
2344 /* Check slab's freelist to see if this obj is there. */ 2381 /* Check slab's freelist to see if this obj is there. */
2345 for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) { 2382 for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
2346 entries++; 2383 entries++;
@@ -2348,13 +2385,16 @@ static void check_slabp(kmem_cache_t *cachep, struct slab *slabp)
2348 goto bad; 2385 goto bad;
2349 } 2386 }
2350 if (entries != cachep->num - slabp->inuse) { 2387 if (entries != cachep->num - slabp->inuse) {
2351bad: 2388 bad:
2352 printk(KERN_ERR "slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n", 2389 printk(KERN_ERR
2353 cachep->name, cachep->num, slabp, slabp->inuse); 2390 "slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n",
2354 for (i=0;i<sizeof(slabp)+cachep->num*sizeof(kmem_bufctl_t);i++) { 2391 cachep->name, cachep->num, slabp, slabp->inuse);
2355 if ((i%16)==0) 2392 for (i = 0;
2393 i < sizeof(slabp) + cachep->num * sizeof(kmem_bufctl_t);
2394 i++) {
2395 if ((i % 16) == 0)
2356 printk("\n%03x:", i); 2396 printk("\n%03x:", i);
2357 printk(" %02x", ((unsigned char*)slabp)[i]); 2397 printk(" %02x", ((unsigned char *)slabp)[i]);
2358 } 2398 }
2359 printk("\n"); 2399 printk("\n");
2360 BUG(); 2400 BUG();
@@ -2374,7 +2414,7 @@ static void *cache_alloc_refill(kmem_cache_t *cachep, gfp_t flags)
2374 2414
2375 check_irq_off(); 2415 check_irq_off();
2376 ac = ac_data(cachep); 2416 ac = ac_data(cachep);
2377retry: 2417 retry:
2378 batchcount = ac->batchcount; 2418 batchcount = ac->batchcount;
2379 if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { 2419 if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
2380 /* if there was little recent activity on this 2420 /* if there was little recent activity on this
@@ -2396,8 +2436,8 @@ retry:
2396 shared_array->avail -= batchcount; 2436 shared_array->avail -= batchcount;
2397 ac->avail = batchcount; 2437 ac->avail = batchcount;
2398 memcpy(ac->entry, 2438 memcpy(ac->entry,
2399 &(shared_array->entry[shared_array->avail]), 2439 &(shared_array->entry[shared_array->avail]),
2400 sizeof(void*)*batchcount); 2440 sizeof(void *) * batchcount);
2401 shared_array->touched = 1; 2441 shared_array->touched = 1;
2402 goto alloc_done; 2442 goto alloc_done;
2403 } 2443 }
@@ -2425,7 +2465,7 @@ retry:
2425 2465
2426 /* get obj pointer */ 2466 /* get obj pointer */
2427 ac->entry[ac->avail++] = slabp->s_mem + 2467 ac->entry[ac->avail++] = slabp->s_mem +
2428 slabp->free*cachep->objsize; 2468 slabp->free * cachep->objsize;
2429 2469
2430 slabp->inuse++; 2470 slabp->inuse++;
2431 next = slab_bufctl(slabp)[slabp->free]; 2471 next = slab_bufctl(slabp)[slabp->free];
@@ -2433,7 +2473,7 @@ retry:
2433 slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE; 2473 slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
2434 WARN_ON(numa_node_id() != slabp->nodeid); 2474 WARN_ON(numa_node_id() != slabp->nodeid);
2435#endif 2475#endif
2436 slabp->free = next; 2476 slabp->free = next;
2437 } 2477 }
2438 check_slabp(cachep, slabp); 2478 check_slabp(cachep, slabp);
2439 2479
@@ -2445,9 +2485,9 @@ retry:
2445 list_add(&slabp->list, &l3->slabs_partial); 2485 list_add(&slabp->list, &l3->slabs_partial);
2446 } 2486 }
2447 2487
2448must_grow: 2488 must_grow:
2449 l3->free_objects -= ac->avail; 2489 l3->free_objects -= ac->avail;
2450alloc_done: 2490 alloc_done:
2451 spin_unlock(&l3->list_lock); 2491 spin_unlock(&l3->list_lock);
2452 2492
2453 if (unlikely(!ac->avail)) { 2493 if (unlikely(!ac->avail)) {
@@ -2459,7 +2499,7 @@ alloc_done:
2459 if (!x && ac->avail == 0) // no objects in sight? abort 2499 if (!x && ac->avail == 0) // no objects in sight? abort
2460 return NULL; 2500 return NULL;
2461 2501
2462 if (!ac->avail) // objects refilled by interrupt? 2502 if (!ac->avail) // objects refilled by interrupt?
2463 goto retry; 2503 goto retry;
2464 } 2504 }
2465 ac->touched = 1; 2505 ac->touched = 1;
@@ -2476,16 +2516,16 @@ cache_alloc_debugcheck_before(kmem_cache_t *cachep, gfp_t flags)
2476} 2516}
2477 2517
2478#if DEBUG 2518#if DEBUG
2479static void * 2519static void *cache_alloc_debugcheck_after(kmem_cache_t *cachep, gfp_t flags,
2480cache_alloc_debugcheck_after(kmem_cache_t *cachep, 2520 void *objp, void *caller)
2481 gfp_t flags, void *objp, void *caller)
2482{ 2521{
2483 if (!objp) 2522 if (!objp)
2484 return objp; 2523 return objp;
2485 if (cachep->flags & SLAB_POISON) { 2524 if (cachep->flags & SLAB_POISON) {
2486#ifdef CONFIG_DEBUG_PAGEALLOC 2525#ifdef CONFIG_DEBUG_PAGEALLOC
2487 if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) 2526 if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
2488 kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 1); 2527 kernel_map_pages(virt_to_page(objp),
2528 cachep->objsize / PAGE_SIZE, 1);
2489 else 2529 else
2490 check_poison_obj(cachep, objp); 2530 check_poison_obj(cachep, objp);
2491#else 2531#else
@@ -2497,24 +2537,28 @@ cache_alloc_debugcheck_after(kmem_cache_t *cachep,
2497 *dbg_userword(cachep, objp) = caller; 2537 *dbg_userword(cachep, objp) = caller;
2498 2538
2499 if (cachep->flags & SLAB_RED_ZONE) { 2539 if (cachep->flags & SLAB_RED_ZONE) {
2500 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE || *dbg_redzone2(cachep, objp) != RED_INACTIVE) { 2540 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE
2501 slab_error(cachep, "double free, or memory outside" 2541 || *dbg_redzone2(cachep, objp) != RED_INACTIVE) {
2502 " object was overwritten"); 2542 slab_error(cachep,
2503 printk(KERN_ERR "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n", 2543 "double free, or memory outside"
2504 objp, *dbg_redzone1(cachep, objp), *dbg_redzone2(cachep, objp)); 2544 " object was overwritten");
2545 printk(KERN_ERR
2546 "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n",
2547 objp, *dbg_redzone1(cachep, objp),
2548 *dbg_redzone2(cachep, objp));
2505 } 2549 }
2506 *dbg_redzone1(cachep, objp) = RED_ACTIVE; 2550 *dbg_redzone1(cachep, objp) = RED_ACTIVE;
2507 *dbg_redzone2(cachep, objp) = RED_ACTIVE; 2551 *dbg_redzone2(cachep, objp) = RED_ACTIVE;
2508 } 2552 }
2509 objp += obj_dbghead(cachep); 2553 objp += obj_dbghead(cachep);
2510 if (cachep->ctor && cachep->flags & SLAB_POISON) { 2554 if (cachep->ctor && cachep->flags & SLAB_POISON) {
2511 unsigned long ctor_flags = SLAB_CTOR_CONSTRUCTOR; 2555 unsigned long ctor_flags = SLAB_CTOR_CONSTRUCTOR;
2512 2556
2513 if (!(flags & __GFP_WAIT)) 2557 if (!(flags & __GFP_WAIT))
2514 ctor_flags |= SLAB_CTOR_ATOMIC; 2558 ctor_flags |= SLAB_CTOR_ATOMIC;
2515 2559
2516 cachep->ctor(objp, cachep, ctor_flags); 2560 cachep->ctor(objp, cachep, ctor_flags);
2517 } 2561 }
2518 return objp; 2562 return objp;
2519} 2563}
2520#else 2564#else
@@ -2523,7 +2567,7 @@ cache_alloc_debugcheck_after(kmem_cache_t *cachep,
2523 2567
2524static inline void *____cache_alloc(kmem_cache_t *cachep, gfp_t flags) 2568static inline void *____cache_alloc(kmem_cache_t *cachep, gfp_t flags)
2525{ 2569{
2526 void* objp; 2570 void *objp;
2527 struct array_cache *ac; 2571 struct array_cache *ac;
2528 2572
2529 check_irq_off(); 2573 check_irq_off();
@@ -2542,7 +2586,7 @@ static inline void *____cache_alloc(kmem_cache_t *cachep, gfp_t flags)
2542static inline void *__cache_alloc(kmem_cache_t *cachep, gfp_t flags) 2586static inline void *__cache_alloc(kmem_cache_t *cachep, gfp_t flags)
2543{ 2587{
2544 unsigned long save_flags; 2588 unsigned long save_flags;
2545 void* objp; 2589 void *objp;
2546 2590
2547 cache_alloc_debugcheck_before(cachep, flags); 2591 cache_alloc_debugcheck_before(cachep, flags);
2548 2592
@@ -2550,7 +2594,7 @@ static inline void *__cache_alloc(kmem_cache_t *cachep, gfp_t flags)
2550 objp = ____cache_alloc(cachep, flags); 2594 objp = ____cache_alloc(cachep, flags);
2551 local_irq_restore(save_flags); 2595 local_irq_restore(save_flags);
2552 objp = cache_alloc_debugcheck_after(cachep, flags, objp, 2596 objp = cache_alloc_debugcheck_after(cachep, flags, objp,
2553 __builtin_return_address(0)); 2597 __builtin_return_address(0));
2554 prefetchw(objp); 2598 prefetchw(objp);
2555 return objp; 2599 return objp;
2556} 2600}
@@ -2562,74 +2606,75 @@ static inline void *__cache_alloc(kmem_cache_t *cachep, gfp_t flags)
2562static void *__cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid) 2606static void *__cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid)
2563{ 2607{
2564 struct list_head *entry; 2608 struct list_head *entry;
2565 struct slab *slabp; 2609 struct slab *slabp;
2566 struct kmem_list3 *l3; 2610 struct kmem_list3 *l3;
2567 void *obj; 2611 void *obj;
2568 kmem_bufctl_t next; 2612 kmem_bufctl_t next;
2569 int x; 2613 int x;
2570 2614
2571 l3 = cachep->nodelists[nodeid]; 2615 l3 = cachep->nodelists[nodeid];
2572 BUG_ON(!l3); 2616 BUG_ON(!l3);
2573 2617
2574retry: 2618 retry:
2575 spin_lock(&l3->list_lock); 2619 spin_lock(&l3->list_lock);
2576 entry = l3->slabs_partial.next; 2620 entry = l3->slabs_partial.next;
2577 if (entry == &l3->slabs_partial) { 2621 if (entry == &l3->slabs_partial) {
2578 l3->free_touched = 1; 2622 l3->free_touched = 1;
2579 entry = l3->slabs_free.next; 2623 entry = l3->slabs_free.next;
2580 if (entry == &l3->slabs_free) 2624 if (entry == &l3->slabs_free)
2581 goto must_grow; 2625 goto must_grow;
2582 } 2626 }
2583 2627
2584 slabp = list_entry(entry, struct slab, list); 2628 slabp = list_entry(entry, struct slab, list);
2585 check_spinlock_acquired_node(cachep, nodeid); 2629 check_spinlock_acquired_node(cachep, nodeid);
2586 check_slabp(cachep, slabp); 2630 check_slabp(cachep, slabp);
2587 2631
2588 STATS_INC_NODEALLOCS(cachep); 2632 STATS_INC_NODEALLOCS(cachep);
2589 STATS_INC_ACTIVE(cachep); 2633 STATS_INC_ACTIVE(cachep);
2590 STATS_SET_HIGH(cachep); 2634 STATS_SET_HIGH(cachep);
2591 2635
2592 BUG_ON(slabp->inuse == cachep->num); 2636 BUG_ON(slabp->inuse == cachep->num);
2593 2637
2594 /* get obj pointer */ 2638 /* get obj pointer */
2595 obj = slabp->s_mem + slabp->free*cachep->objsize; 2639 obj = slabp->s_mem + slabp->free * cachep->objsize;
2596 slabp->inuse++; 2640 slabp->inuse++;
2597 next = slab_bufctl(slabp)[slabp->free]; 2641 next = slab_bufctl(slabp)[slabp->free];
2598#if DEBUG 2642#if DEBUG
2599 slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE; 2643 slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
2600#endif 2644#endif
2601 slabp->free = next; 2645 slabp->free = next;
2602 check_slabp(cachep, slabp); 2646 check_slabp(cachep, slabp);
2603 l3->free_objects--; 2647 l3->free_objects--;
2604 /* move slabp to correct slabp list: */ 2648 /* move slabp to correct slabp list: */
2605 list_del(&slabp->list); 2649 list_del(&slabp->list);
2606 2650
2607 if (slabp->free == BUFCTL_END) { 2651 if (slabp->free == BUFCTL_END) {
2608 list_add(&slabp->list, &l3->slabs_full); 2652 list_add(&slabp->list, &l3->slabs_full);
2609 } else { 2653 } else {
2610 list_add(&slabp->list, &l3->slabs_partial); 2654 list_add(&slabp->list, &l3->slabs_partial);
2611 } 2655 }
2612 2656
2613 spin_unlock(&l3->list_lock); 2657 spin_unlock(&l3->list_lock);
2614 goto done; 2658 goto done;
2615 2659
2616must_grow: 2660 must_grow:
2617 spin_unlock(&l3->list_lock); 2661 spin_unlock(&l3->list_lock);
2618 x = cache_grow(cachep, flags, nodeid); 2662 x = cache_grow(cachep, flags, nodeid);
2619 2663
2620 if (!x) 2664 if (!x)
2621 return NULL; 2665 return NULL;
2622 2666
2623 goto retry; 2667 goto retry;
2624done: 2668 done:
2625 return obj; 2669 return obj;
2626} 2670}
2627#endif 2671#endif
2628 2672
2629/* 2673/*
2630 * Caller needs to acquire correct kmem_list's list_lock 2674 * Caller needs to acquire correct kmem_list's list_lock
2631 */ 2675 */
2632static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, int node) 2676static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects,
2677 int node)
2633{ 2678{
2634 int i; 2679 int i;
2635 struct kmem_list3 *l3; 2680 struct kmem_list3 *l3;
@@ -2652,7 +2697,7 @@ static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, int n
2652 2697
2653 if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) { 2698 if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) {
2654 printk(KERN_ERR "slab: double free detected in cache " 2699 printk(KERN_ERR "slab: double free detected in cache "
2655 "'%s', objp %p\n", cachep->name, objp); 2700 "'%s', objp %p\n", cachep->name, objp);
2656 BUG(); 2701 BUG();
2657 } 2702 }
2658#endif 2703#endif
@@ -2696,20 +2741,19 @@ static void cache_flusharray(kmem_cache_t *cachep, struct array_cache *ac)
2696 spin_lock(&l3->list_lock); 2741 spin_lock(&l3->list_lock);
2697 if (l3->shared) { 2742 if (l3->shared) {
2698 struct array_cache *shared_array = l3->shared; 2743 struct array_cache *shared_array = l3->shared;
2699 int max = shared_array->limit-shared_array->avail; 2744 int max = shared_array->limit - shared_array->avail;
2700 if (max) { 2745 if (max) {
2701 if (batchcount > max) 2746 if (batchcount > max)
2702 batchcount = max; 2747 batchcount = max;
2703 memcpy(&(shared_array->entry[shared_array->avail]), 2748 memcpy(&(shared_array->entry[shared_array->avail]),
2704 ac->entry, 2749 ac->entry, sizeof(void *) * batchcount);
2705 sizeof(void*)*batchcount);
2706 shared_array->avail += batchcount; 2750 shared_array->avail += batchcount;
2707 goto free_done; 2751 goto free_done;
2708 } 2752 }
2709 } 2753 }
2710 2754
2711 free_block(cachep, ac->entry, batchcount, node); 2755 free_block(cachep, ac->entry, batchcount, node);
2712free_done: 2756 free_done:
2713#if STATS 2757#if STATS
2714 { 2758 {
2715 int i = 0; 2759 int i = 0;
@@ -2731,10 +2775,9 @@ free_done:
2731 spin_unlock(&l3->list_lock); 2775 spin_unlock(&l3->list_lock);
2732 ac->avail -= batchcount; 2776 ac->avail -= batchcount;
2733 memmove(ac->entry, &(ac->entry[batchcount]), 2777 memmove(ac->entry, &(ac->entry[batchcount]),
2734 sizeof(void*)*ac->avail); 2778 sizeof(void *) * ac->avail);
2735} 2779}
2736 2780
2737
2738/* 2781/*
2739 * __cache_free 2782 * __cache_free
2740 * Release an obj back to its cache. If the obj has a constructed 2783 * Release an obj back to its cache. If the obj has a constructed
@@ -2759,7 +2802,8 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp)
2759 if (unlikely(slabp->nodeid != numa_node_id())) { 2802 if (unlikely(slabp->nodeid != numa_node_id())) {
2760 struct array_cache *alien = NULL; 2803 struct array_cache *alien = NULL;
2761 int nodeid = slabp->nodeid; 2804 int nodeid = slabp->nodeid;
2762 struct kmem_list3 *l3 = cachep->nodelists[numa_node_id()]; 2805 struct kmem_list3 *l3 =
2806 cachep->nodelists[numa_node_id()];
2763 2807
2764 STATS_INC_NODEFREES(cachep); 2808 STATS_INC_NODEFREES(cachep);
2765 if (l3->alien && l3->alien[nodeid]) { 2809 if (l3->alien && l3->alien[nodeid]) {
@@ -2767,15 +2811,15 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp)
2767 spin_lock(&alien->lock); 2811 spin_lock(&alien->lock);
2768 if (unlikely(alien->avail == alien->limit)) 2812 if (unlikely(alien->avail == alien->limit))
2769 __drain_alien_cache(cachep, 2813 __drain_alien_cache(cachep,
2770 alien, nodeid); 2814 alien, nodeid);
2771 alien->entry[alien->avail++] = objp; 2815 alien->entry[alien->avail++] = objp;
2772 spin_unlock(&alien->lock); 2816 spin_unlock(&alien->lock);
2773 } else { 2817 } else {
2774 spin_lock(&(cachep->nodelists[nodeid])-> 2818 spin_lock(&(cachep->nodelists[nodeid])->
2775 list_lock); 2819 list_lock);
2776 free_block(cachep, &objp, 1, nodeid); 2820 free_block(cachep, &objp, 1, nodeid);
2777 spin_unlock(&(cachep->nodelists[nodeid])-> 2821 spin_unlock(&(cachep->nodelists[nodeid])->
2778 list_lock); 2822 list_lock);
2779 } 2823 }
2780 return; 2824 return;
2781 } 2825 }
@@ -2822,9 +2866,9 @@ EXPORT_SYMBOL(kmem_cache_alloc);
2822 */ 2866 */
2823int fastcall kmem_ptr_validate(kmem_cache_t *cachep, void *ptr) 2867int fastcall kmem_ptr_validate(kmem_cache_t *cachep, void *ptr)
2824{ 2868{
2825 unsigned long addr = (unsigned long) ptr; 2869 unsigned long addr = (unsigned long)ptr;
2826 unsigned long min_addr = PAGE_OFFSET; 2870 unsigned long min_addr = PAGE_OFFSET;
2827 unsigned long align_mask = BYTES_PER_WORD-1; 2871 unsigned long align_mask = BYTES_PER_WORD - 1;
2828 unsigned long size = cachep->objsize; 2872 unsigned long size = cachep->objsize;
2829 struct page *page; 2873 struct page *page;
2830 2874
@@ -2844,7 +2888,7 @@ int fastcall kmem_ptr_validate(kmem_cache_t *cachep, void *ptr)
2844 if (unlikely(page_get_cache(page) != cachep)) 2888 if (unlikely(page_get_cache(page) != cachep))
2845 goto out; 2889 goto out;
2846 return 1; 2890 return 1;
2847out: 2891 out:
2848 return 0; 2892 return 0;
2849} 2893}
2850 2894
@@ -2871,8 +2915,10 @@ void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid)
2871 2915
2872 if (unlikely(!cachep->nodelists[nodeid])) { 2916 if (unlikely(!cachep->nodelists[nodeid])) {
2873 /* Fall back to __cache_alloc if we run into trouble */ 2917 /* Fall back to __cache_alloc if we run into trouble */
2874 printk(KERN_WARNING "slab: not allocating in inactive node %d for cache %s\n", nodeid, cachep->name); 2918 printk(KERN_WARNING
2875 return __cache_alloc(cachep,flags); 2919 "slab: not allocating in inactive node %d for cache %s\n",
2920 nodeid, cachep->name);
2921 return __cache_alloc(cachep, flags);
2876 } 2922 }
2877 2923
2878 cache_alloc_debugcheck_before(cachep, flags); 2924 cache_alloc_debugcheck_before(cachep, flags);
@@ -2882,7 +2928,9 @@ void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid)
2882 else 2928 else
2883 ptr = __cache_alloc_node(cachep, flags, nodeid); 2929 ptr = __cache_alloc_node(cachep, flags, nodeid);
2884 local_irq_restore(save_flags); 2930 local_irq_restore(save_flags);
2885 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, __builtin_return_address(0)); 2931 ptr =
2932 cache_alloc_debugcheck_after(cachep, flags, ptr,
2933 __builtin_return_address(0));
2886 2934
2887 return ptr; 2935 return ptr;
2888} 2936}
@@ -2944,12 +2992,11 @@ EXPORT_SYMBOL(__kmalloc);
2944 * Objects should be dereferenced using the per_cpu_ptr macro only. 2992 * Objects should be dereferenced using the per_cpu_ptr macro only.
2945 * 2993 *
2946 * @size: how many bytes of memory are required. 2994 * @size: how many bytes of memory are required.
2947 * @align: the alignment, which can't be greater than SMP_CACHE_BYTES.
2948 */ 2995 */
2949void *__alloc_percpu(size_t size, size_t align) 2996void *__alloc_percpu(size_t size)
2950{ 2997{
2951 int i; 2998 int i;
2952 struct percpu_data *pdata = kmalloc(sizeof (*pdata), GFP_KERNEL); 2999 struct percpu_data *pdata = kmalloc(sizeof(*pdata), GFP_KERNEL);
2953 3000
2954 if (!pdata) 3001 if (!pdata)
2955 return NULL; 3002 return NULL;
@@ -2973,9 +3020,9 @@ void *__alloc_percpu(size_t size, size_t align)
2973 } 3020 }
2974 3021
2975 /* Catch derefs w/o wrappers */ 3022 /* Catch derefs w/o wrappers */
2976 return (void *) (~(unsigned long) pdata); 3023 return (void *)(~(unsigned long)pdata);
2977 3024
2978unwind_oom: 3025 unwind_oom:
2979 while (--i >= 0) { 3026 while (--i >= 0) {
2980 if (!cpu_possible(i)) 3027 if (!cpu_possible(i))
2981 continue; 3028 continue;
@@ -3006,20 +3053,6 @@ void kmem_cache_free(kmem_cache_t *cachep, void *objp)
3006EXPORT_SYMBOL(kmem_cache_free); 3053EXPORT_SYMBOL(kmem_cache_free);
3007 3054
3008/** 3055/**
3009 * kzalloc - allocate memory. The memory is set to zero.
3010 * @size: how many bytes of memory are required.
3011 * @flags: the type of memory to allocate.
3012 */
3013void *kzalloc(size_t size, gfp_t flags)
3014{
3015 void *ret = kmalloc(size, flags);
3016 if (ret)
3017 memset(ret, 0, size);
3018 return ret;
3019}
3020EXPORT_SYMBOL(kzalloc);
3021
3022/**
3023 * kfree - free previously allocated memory 3056 * kfree - free previously allocated memory
3024 * @objp: pointer returned by kmalloc. 3057 * @objp: pointer returned by kmalloc.
3025 * 3058 *
@@ -3038,7 +3071,7 @@ void kfree(const void *objp)
3038 local_irq_save(flags); 3071 local_irq_save(flags);
3039 kfree_debugcheck(objp); 3072 kfree_debugcheck(objp);
3040 c = page_get_cache(virt_to_page(objp)); 3073 c = page_get_cache(virt_to_page(objp));
3041 __cache_free(c, (void*)objp); 3074 __cache_free(c, (void *)objp);
3042 local_irq_restore(flags); 3075 local_irq_restore(flags);
3043} 3076}
3044EXPORT_SYMBOL(kfree); 3077EXPORT_SYMBOL(kfree);
@@ -3051,17 +3084,16 @@ EXPORT_SYMBOL(kfree);
3051 * Don't free memory not originally allocated by alloc_percpu() 3084 * Don't free memory not originally allocated by alloc_percpu()
3052 * The complemented objp is to check for that. 3085 * The complemented objp is to check for that.
3053 */ 3086 */
3054void 3087void free_percpu(const void *objp)
3055free_percpu(const void *objp)
3056{ 3088{
3057 int i; 3089 int i;
3058 struct percpu_data *p = (struct percpu_data *) (~(unsigned long) objp); 3090 struct percpu_data *p = (struct percpu_data *)(~(unsigned long)objp);
3059 3091
3060 /* 3092 /*
3061 * We allocate for all cpus so we cannot use for online cpu here. 3093 * We allocate for all cpus so we cannot use for online cpu here.
3062 */ 3094 */
3063 for_each_cpu(i) 3095 for_each_cpu(i)
3064 kfree(p->ptrs[i]); 3096 kfree(p->ptrs[i]);
3065 kfree(p); 3097 kfree(p);
3066} 3098}
3067EXPORT_SYMBOL(free_percpu); 3099EXPORT_SYMBOL(free_percpu);
@@ -3095,44 +3127,44 @@ static int alloc_kmemlist(kmem_cache_t *cachep)
3095 if (!(new_alien = alloc_alien_cache(node, cachep->limit))) 3127 if (!(new_alien = alloc_alien_cache(node, cachep->limit)))
3096 goto fail; 3128 goto fail;
3097#endif 3129#endif
3098 if (!(new = alloc_arraycache(node, (cachep->shared* 3130 if (!(new = alloc_arraycache(node, (cachep->shared *
3099 cachep->batchcount), 0xbaadf00d))) 3131 cachep->batchcount),
3132 0xbaadf00d)))
3100 goto fail; 3133 goto fail;
3101 if ((l3 = cachep->nodelists[node])) { 3134 if ((l3 = cachep->nodelists[node])) {
3102 3135
3103 spin_lock_irq(&l3->list_lock); 3136 spin_lock_irq(&l3->list_lock);
3104 3137
3105 if ((nc = cachep->nodelists[node]->shared)) 3138 if ((nc = cachep->nodelists[node]->shared))
3106 free_block(cachep, nc->entry, 3139 free_block(cachep, nc->entry, nc->avail, node);
3107 nc->avail, node);
3108 3140
3109 l3->shared = new; 3141 l3->shared = new;
3110 if (!cachep->nodelists[node]->alien) { 3142 if (!cachep->nodelists[node]->alien) {
3111 l3->alien = new_alien; 3143 l3->alien = new_alien;
3112 new_alien = NULL; 3144 new_alien = NULL;
3113 } 3145 }
3114 l3->free_limit = (1 + nr_cpus_node(node))* 3146 l3->free_limit = (1 + nr_cpus_node(node)) *
3115 cachep->batchcount + cachep->num; 3147 cachep->batchcount + cachep->num;
3116 spin_unlock_irq(&l3->list_lock); 3148 spin_unlock_irq(&l3->list_lock);
3117 kfree(nc); 3149 kfree(nc);
3118 free_alien_cache(new_alien); 3150 free_alien_cache(new_alien);
3119 continue; 3151 continue;
3120 } 3152 }
3121 if (!(l3 = kmalloc_node(sizeof(struct kmem_list3), 3153 if (!(l3 = kmalloc_node(sizeof(struct kmem_list3),
3122 GFP_KERNEL, node))) 3154 GFP_KERNEL, node)))
3123 goto fail; 3155 goto fail;
3124 3156
3125 kmem_list3_init(l3); 3157 kmem_list3_init(l3);
3126 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + 3158 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
3127 ((unsigned long)cachep)%REAPTIMEOUT_LIST3; 3159 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
3128 l3->shared = new; 3160 l3->shared = new;
3129 l3->alien = new_alien; 3161 l3->alien = new_alien;
3130 l3->free_limit = (1 + nr_cpus_node(node))* 3162 l3->free_limit = (1 + nr_cpus_node(node)) *
3131 cachep->batchcount + cachep->num; 3163 cachep->batchcount + cachep->num;
3132 cachep->nodelists[node] = l3; 3164 cachep->nodelists[node] = l3;
3133 } 3165 }
3134 return err; 3166 return err;
3135fail: 3167 fail:
3136 err = -ENOMEM; 3168 err = -ENOMEM;
3137 return err; 3169 return err;
3138} 3170}
@@ -3154,18 +3186,19 @@ static void do_ccupdate_local(void *info)
3154 new->new[smp_processor_id()] = old; 3186 new->new[smp_processor_id()] = old;
3155} 3187}
3156 3188
3157
3158static int do_tune_cpucache(kmem_cache_t *cachep, int limit, int batchcount, 3189static int do_tune_cpucache(kmem_cache_t *cachep, int limit, int batchcount,
3159 int shared) 3190 int shared)
3160{ 3191{
3161 struct ccupdate_struct new; 3192 struct ccupdate_struct new;
3162 int i, err; 3193 int i, err;
3163 3194
3164 memset(&new.new,0,sizeof(new.new)); 3195 memset(&new.new, 0, sizeof(new.new));
3165 for_each_online_cpu(i) { 3196 for_each_online_cpu(i) {
3166 new.new[i] = alloc_arraycache(cpu_to_node(i), limit, batchcount); 3197 new.new[i] =
3198 alloc_arraycache(cpu_to_node(i), limit, batchcount);
3167 if (!new.new[i]) { 3199 if (!new.new[i]) {
3168 for (i--; i >= 0; i--) kfree(new.new[i]); 3200 for (i--; i >= 0; i--)
3201 kfree(new.new[i]);
3169 return -ENOMEM; 3202 return -ENOMEM;
3170 } 3203 }
3171 } 3204 }
@@ -3193,13 +3226,12 @@ static int do_tune_cpucache(kmem_cache_t *cachep, int limit, int batchcount,
3193 err = alloc_kmemlist(cachep); 3226 err = alloc_kmemlist(cachep);
3194 if (err) { 3227 if (err) {
3195 printk(KERN_ERR "alloc_kmemlist failed for %s, error %d.\n", 3228 printk(KERN_ERR "alloc_kmemlist failed for %s, error %d.\n",
3196 cachep->name, -err); 3229 cachep->name, -err);
3197 BUG(); 3230 BUG();
3198 } 3231 }
3199 return 0; 3232 return 0;
3200} 3233}
3201 3234
3202
3203static void enable_cpucache(kmem_cache_t *cachep) 3235static void enable_cpucache(kmem_cache_t *cachep)
3204{ 3236{
3205 int err; 3237 int err;
@@ -3246,14 +3278,14 @@ static void enable_cpucache(kmem_cache_t *cachep)
3246 if (limit > 32) 3278 if (limit > 32)
3247 limit = 32; 3279 limit = 32;
3248#endif 3280#endif
3249 err = do_tune_cpucache(cachep, limit, (limit+1)/2, shared); 3281 err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared);
3250 if (err) 3282 if (err)
3251 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", 3283 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
3252 cachep->name, -err); 3284 cachep->name, -err);
3253} 3285}
3254 3286
3255static void drain_array_locked(kmem_cache_t *cachep, 3287static void drain_array_locked(kmem_cache_t *cachep, struct array_cache *ac,
3256 struct array_cache *ac, int force, int node) 3288 int force, int node)
3257{ 3289{
3258 int tofree; 3290 int tofree;
3259 3291
@@ -3261,14 +3293,14 @@ static void drain_array_locked(kmem_cache_t *cachep,
3261 if (ac->touched && !force) { 3293 if (ac->touched && !force) {
3262 ac->touched = 0; 3294 ac->touched = 0;
3263 } else if (ac->avail) { 3295 } else if (ac->avail) {
3264 tofree = force ? ac->avail : (ac->limit+4)/5; 3296 tofree = force ? ac->avail : (ac->limit + 4) / 5;
3265 if (tofree > ac->avail) { 3297 if (tofree > ac->avail) {
3266 tofree = (ac->avail+1)/2; 3298 tofree = (ac->avail + 1) / 2;
3267 } 3299 }
3268 free_block(cachep, ac->entry, tofree, node); 3300 free_block(cachep, ac->entry, tofree, node);
3269 ac->avail -= tofree; 3301 ac->avail -= tofree;
3270 memmove(ac->entry, &(ac->entry[tofree]), 3302 memmove(ac->entry, &(ac->entry[tofree]),
3271 sizeof(void*)*ac->avail); 3303 sizeof(void *) * ac->avail);
3272 } 3304 }
3273} 3305}
3274 3306
@@ -3291,13 +3323,14 @@ static void cache_reap(void *unused)
3291 3323
3292 if (down_trylock(&cache_chain_sem)) { 3324 if (down_trylock(&cache_chain_sem)) {
3293 /* Give up. Setup the next iteration. */ 3325 /* Give up. Setup the next iteration. */
3294 schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); 3326 schedule_delayed_work(&__get_cpu_var(reap_work),
3327 REAPTIMEOUT_CPUC);
3295 return; 3328 return;
3296 } 3329 }
3297 3330
3298 list_for_each(walk, &cache_chain) { 3331 list_for_each(walk, &cache_chain) {
3299 kmem_cache_t *searchp; 3332 kmem_cache_t *searchp;
3300 struct list_head* p; 3333 struct list_head *p;
3301 int tofree; 3334 int tofree;
3302 struct slab *slabp; 3335 struct slab *slabp;
3303 3336
@@ -3314,7 +3347,7 @@ static void cache_reap(void *unused)
3314 spin_lock_irq(&l3->list_lock); 3347 spin_lock_irq(&l3->list_lock);
3315 3348
3316 drain_array_locked(searchp, ac_data(searchp), 0, 3349 drain_array_locked(searchp, ac_data(searchp), 0,
3317 numa_node_id()); 3350 numa_node_id());
3318 3351
3319 if (time_after(l3->next_reap, jiffies)) 3352 if (time_after(l3->next_reap, jiffies))
3320 goto next_unlock; 3353 goto next_unlock;
@@ -3323,14 +3356,16 @@ static void cache_reap(void *unused)
3323 3356
3324 if (l3->shared) 3357 if (l3->shared)
3325 drain_array_locked(searchp, l3->shared, 0, 3358 drain_array_locked(searchp, l3->shared, 0,
3326 numa_node_id()); 3359 numa_node_id());
3327 3360
3328 if (l3->free_touched) { 3361 if (l3->free_touched) {
3329 l3->free_touched = 0; 3362 l3->free_touched = 0;
3330 goto next_unlock; 3363 goto next_unlock;
3331 } 3364 }
3332 3365
3333 tofree = (l3->free_limit+5*searchp->num-1)/(5*searchp->num); 3366 tofree =
3367 (l3->free_limit + 5 * searchp->num -
3368 1) / (5 * searchp->num);
3334 do { 3369 do {
3335 p = l3->slabs_free.next; 3370 p = l3->slabs_free.next;
3336 if (p == &(l3->slabs_free)) 3371 if (p == &(l3->slabs_free))
@@ -3350,10 +3385,10 @@ static void cache_reap(void *unused)
3350 spin_unlock_irq(&l3->list_lock); 3385 spin_unlock_irq(&l3->list_lock);
3351 slab_destroy(searchp, slabp); 3386 slab_destroy(searchp, slabp);
3352 spin_lock_irq(&l3->list_lock); 3387 spin_lock_irq(&l3->list_lock);
3353 } while(--tofree > 0); 3388 } while (--tofree > 0);
3354next_unlock: 3389 next_unlock:
3355 spin_unlock_irq(&l3->list_lock); 3390 spin_unlock_irq(&l3->list_lock);
3356next: 3391 next:
3357 cond_resched(); 3392 cond_resched();
3358 } 3393 }
3359 check_irq_on(); 3394 check_irq_on();
@@ -3365,32 +3400,37 @@ next:
3365 3400
3366#ifdef CONFIG_PROC_FS 3401#ifdef CONFIG_PROC_FS
3367 3402
3368static void *s_start(struct seq_file *m, loff_t *pos) 3403static void print_slabinfo_header(struct seq_file *m)
3369{ 3404{
3370 loff_t n = *pos; 3405 /*
3371 struct list_head *p; 3406 * Output format version, so at least we can change it
3372 3407 * without _too_ many complaints.
3373 down(&cache_chain_sem); 3408 */
3374 if (!n) {
3375 /*
3376 * Output format version, so at least we can change it
3377 * without _too_ many complaints.
3378 */
3379#if STATS 3409#if STATS
3380 seq_puts(m, "slabinfo - version: 2.1 (statistics)\n"); 3410 seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
3381#else 3411#else
3382 seq_puts(m, "slabinfo - version: 2.1\n"); 3412 seq_puts(m, "slabinfo - version: 2.1\n");
3383#endif 3413#endif
3384 seq_puts(m, "# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>"); 3414 seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
3385 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); 3415 "<objperslab> <pagesperslab>");
3386 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); 3416 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
3417 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
3387#if STATS 3418#if STATS
3388 seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped>" 3419 seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
3389 " <error> <maxfreeable> <nodeallocs> <remotefrees>"); 3420 "<error> <maxfreeable> <nodeallocs> <remotefrees>");
3390 seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>"); 3421 seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
3391#endif 3422#endif
3392 seq_putc(m, '\n'); 3423 seq_putc(m, '\n');
3393 } 3424}
3425
3426static void *s_start(struct seq_file *m, loff_t *pos)
3427{
3428 loff_t n = *pos;
3429 struct list_head *p;
3430
3431 down(&cache_chain_sem);
3432 if (!n)
3433 print_slabinfo_header(m);
3394 p = cache_chain.next; 3434 p = cache_chain.next;
3395 while (n--) { 3435 while (n--) {
3396 p = p->next; 3436 p = p->next;
@@ -3405,7 +3445,7 @@ static void *s_next(struct seq_file *m, void *p, loff_t *pos)
3405 kmem_cache_t *cachep = p; 3445 kmem_cache_t *cachep = p;
3406 ++*pos; 3446 ++*pos;
3407 return cachep->next.next == &cache_chain ? NULL 3447 return cachep->next.next == &cache_chain ? NULL
3408 : list_entry(cachep->next.next, kmem_cache_t, next); 3448 : list_entry(cachep->next.next, kmem_cache_t, next);
3409} 3449}
3410 3450
3411static void s_stop(struct seq_file *m, void *p) 3451static void s_stop(struct seq_file *m, void *p)
@@ -3417,11 +3457,11 @@ static int s_show(struct seq_file *m, void *p)
3417{ 3457{
3418 kmem_cache_t *cachep = p; 3458 kmem_cache_t *cachep = p;
3419 struct list_head *q; 3459 struct list_head *q;
3420 struct slab *slabp; 3460 struct slab *slabp;
3421 unsigned long active_objs; 3461 unsigned long active_objs;
3422 unsigned long num_objs; 3462 unsigned long num_objs;
3423 unsigned long active_slabs = 0; 3463 unsigned long active_slabs = 0;
3424 unsigned long num_slabs, free_objects = 0, shared_avail = 0; 3464 unsigned long num_slabs, free_objects = 0, shared_avail = 0;
3425 const char *name; 3465 const char *name;
3426 char *error = NULL; 3466 char *error = NULL;
3427 int node; 3467 int node;
@@ -3438,14 +3478,14 @@ static int s_show(struct seq_file *m, void *p)
3438 3478
3439 spin_lock(&l3->list_lock); 3479 spin_lock(&l3->list_lock);
3440 3480
3441 list_for_each(q,&l3->slabs_full) { 3481 list_for_each(q, &l3->slabs_full) {
3442 slabp = list_entry(q, struct slab, list); 3482 slabp = list_entry(q, struct slab, list);
3443 if (slabp->inuse != cachep->num && !error) 3483 if (slabp->inuse != cachep->num && !error)
3444 error = "slabs_full accounting error"; 3484 error = "slabs_full accounting error";
3445 active_objs += cachep->num; 3485 active_objs += cachep->num;
3446 active_slabs++; 3486 active_slabs++;
3447 } 3487 }
3448 list_for_each(q,&l3->slabs_partial) { 3488 list_for_each(q, &l3->slabs_partial) {
3449 slabp = list_entry(q, struct slab, list); 3489 slabp = list_entry(q, struct slab, list);
3450 if (slabp->inuse == cachep->num && !error) 3490 if (slabp->inuse == cachep->num && !error)
3451 error = "slabs_partial inuse accounting error"; 3491 error = "slabs_partial inuse accounting error";
@@ -3454,7 +3494,7 @@ static int s_show(struct seq_file *m, void *p)
3454 active_objs += slabp->inuse; 3494 active_objs += slabp->inuse;
3455 active_slabs++; 3495 active_slabs++;
3456 } 3496 }
3457 list_for_each(q,&l3->slabs_free) { 3497 list_for_each(q, &l3->slabs_free) {
3458 slabp = list_entry(q, struct slab, list); 3498 slabp = list_entry(q, struct slab, list);
3459 if (slabp->inuse && !error) 3499 if (slabp->inuse && !error)
3460 error = "slabs_free/inuse accounting error"; 3500 error = "slabs_free/inuse accounting error";
@@ -3465,25 +3505,24 @@ static int s_show(struct seq_file *m, void *p)
3465 3505
3466 spin_unlock(&l3->list_lock); 3506 spin_unlock(&l3->list_lock);
3467 } 3507 }
3468 num_slabs+=active_slabs; 3508 num_slabs += active_slabs;
3469 num_objs = num_slabs*cachep->num; 3509 num_objs = num_slabs * cachep->num;
3470 if (num_objs - active_objs != free_objects && !error) 3510 if (num_objs - active_objs != free_objects && !error)
3471 error = "free_objects accounting error"; 3511 error = "free_objects accounting error";
3472 3512
3473 name = cachep->name; 3513 name = cachep->name;
3474 if (error) 3514 if (error)
3475 printk(KERN_ERR "slab: cache %s error: %s\n", name, error); 3515 printk(KERN_ERR "slab: cache %s error: %s\n", name, error);
3476 3516
3477 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", 3517 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
3478 name, active_objs, num_objs, cachep->objsize, 3518 name, active_objs, num_objs, cachep->objsize,
3479 cachep->num, (1<<cachep->gfporder)); 3519 cachep->num, (1 << cachep->gfporder));
3480 seq_printf(m, " : tunables %4u %4u %4u", 3520 seq_printf(m, " : tunables %4u %4u %4u",
3481 cachep->limit, cachep->batchcount, 3521 cachep->limit, cachep->batchcount, cachep->shared);
3482 cachep->shared);
3483 seq_printf(m, " : slabdata %6lu %6lu %6lu", 3522 seq_printf(m, " : slabdata %6lu %6lu %6lu",
3484 active_slabs, num_slabs, shared_avail); 3523 active_slabs, num_slabs, shared_avail);
3485#if STATS 3524#if STATS
3486 { /* list3 stats */ 3525 { /* list3 stats */
3487 unsigned long high = cachep->high_mark; 3526 unsigned long high = cachep->high_mark;
3488 unsigned long allocs = cachep->num_allocations; 3527 unsigned long allocs = cachep->num_allocations;
3489 unsigned long grown = cachep->grown; 3528 unsigned long grown = cachep->grown;
@@ -3494,9 +3533,7 @@ static int s_show(struct seq_file *m, void *p)
3494 unsigned long node_frees = cachep->node_frees; 3533 unsigned long node_frees = cachep->node_frees;
3495 3534
3496 seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \ 3535 seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \
3497 %4lu %4lu %4lu %4lu", 3536 %4lu %4lu %4lu %4lu", allocs, high, grown, reaped, errors, max_freeable, node_allocs, node_frees);
3498 allocs, high, grown, reaped, errors,
3499 max_freeable, node_allocs, node_frees);
3500 } 3537 }
3501 /* cpu stats */ 3538 /* cpu stats */
3502 { 3539 {
@@ -3506,7 +3543,7 @@ static int s_show(struct seq_file *m, void *p)
3506 unsigned long freemiss = atomic_read(&cachep->freemiss); 3543 unsigned long freemiss = atomic_read(&cachep->freemiss);
3507 3544
3508 seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu", 3545 seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu",
3509 allochit, allocmiss, freehit, freemiss); 3546 allochit, allocmiss, freehit, freemiss);
3510 } 3547 }
3511#endif 3548#endif
3512 seq_putc(m, '\n'); 3549 seq_putc(m, '\n');
@@ -3529,10 +3566,10 @@ static int s_show(struct seq_file *m, void *p)
3529 */ 3566 */
3530 3567
3531struct seq_operations slabinfo_op = { 3568struct seq_operations slabinfo_op = {
3532 .start = s_start, 3569 .start = s_start,
3533 .next = s_next, 3570 .next = s_next,
3534 .stop = s_stop, 3571 .stop = s_stop,
3535 .show = s_show, 3572 .show = s_show,
3536}; 3573};
3537 3574
3538#define MAX_SLABINFO_WRITE 128 3575#define MAX_SLABINFO_WRITE 128
@@ -3543,18 +3580,18 @@ struct seq_operations slabinfo_op = {
3543 * @count: data length 3580 * @count: data length
3544 * @ppos: unused 3581 * @ppos: unused
3545 */ 3582 */
3546ssize_t slabinfo_write(struct file *file, const char __user *buffer, 3583ssize_t slabinfo_write(struct file *file, const char __user * buffer,
3547 size_t count, loff_t *ppos) 3584 size_t count, loff_t *ppos)
3548{ 3585{
3549 char kbuf[MAX_SLABINFO_WRITE+1], *tmp; 3586 char kbuf[MAX_SLABINFO_WRITE + 1], *tmp;
3550 int limit, batchcount, shared, res; 3587 int limit, batchcount, shared, res;
3551 struct list_head *p; 3588 struct list_head *p;
3552 3589
3553 if (count > MAX_SLABINFO_WRITE) 3590 if (count > MAX_SLABINFO_WRITE)
3554 return -EINVAL; 3591 return -EINVAL;
3555 if (copy_from_user(&kbuf, buffer, count)) 3592 if (copy_from_user(&kbuf, buffer, count))
3556 return -EFAULT; 3593 return -EFAULT;
3557 kbuf[MAX_SLABINFO_WRITE] = '\0'; 3594 kbuf[MAX_SLABINFO_WRITE] = '\0';
3558 3595
3559 tmp = strchr(kbuf, ' '); 3596 tmp = strchr(kbuf, ' ');
3560 if (!tmp) 3597 if (!tmp)
@@ -3567,18 +3604,17 @@ ssize_t slabinfo_write(struct file *file, const char __user *buffer,
3567 /* Find the cache in the chain of caches. */ 3604 /* Find the cache in the chain of caches. */
3568 down(&cache_chain_sem); 3605 down(&cache_chain_sem);
3569 res = -EINVAL; 3606 res = -EINVAL;
3570 list_for_each(p,&cache_chain) { 3607 list_for_each(p, &cache_chain) {
3571 kmem_cache_t *cachep = list_entry(p, kmem_cache_t, next); 3608 kmem_cache_t *cachep = list_entry(p, kmem_cache_t, next);
3572 3609
3573 if (!strcmp(cachep->name, kbuf)) { 3610 if (!strcmp(cachep->name, kbuf)) {
3574 if (limit < 1 || 3611 if (limit < 1 ||
3575 batchcount < 1 || 3612 batchcount < 1 ||
3576 batchcount > limit || 3613 batchcount > limit || shared < 0) {
3577 shared < 0) {
3578 res = 0; 3614 res = 0;
3579 } else { 3615 } else {
3580 res = do_tune_cpucache(cachep, limit, 3616 res = do_tune_cpucache(cachep, limit,
3581 batchcount, shared); 3617 batchcount, shared);
3582 } 3618 }
3583 break; 3619 break;
3584 } 3620 }
@@ -3609,26 +3645,3 @@ unsigned int ksize(const void *objp)
3609 3645
3610 return obj_reallen(page_get_cache(virt_to_page(objp))); 3646 return obj_reallen(page_get_cache(virt_to_page(objp)));
3611} 3647}
3612
3613
3614/*
3615 * kstrdup - allocate space for and copy an existing string
3616 *
3617 * @s: the string to duplicate
3618 * @gfp: the GFP mask used in the kmalloc() call when allocating memory
3619 */
3620char *kstrdup(const char *s, gfp_t gfp)
3621{
3622 size_t len;
3623 char *buf;
3624
3625 if (!s)
3626 return NULL;
3627
3628 len = strlen(s) + 1;
3629 buf = kmalloc(len, gfp);
3630 if (buf)
3631 memcpy(buf, s, len);
3632 return buf;
3633}
3634EXPORT_SYMBOL(kstrdup);
diff --git a/mm/slob.c b/mm/slob.c
new file mode 100644
index 000000000000..1c240c4b71d9
--- /dev/null
+++ b/mm/slob.c
@@ -0,0 +1,385 @@
1/*
2 * SLOB Allocator: Simple List Of Blocks
3 *
4 * Matt Mackall <mpm@selenic.com> 12/30/03
5 *
6 * How SLOB works:
7 *
8 * The core of SLOB is a traditional K&R style heap allocator, with
9 * support for returning aligned objects. The granularity of this
10 * allocator is 8 bytes on x86, though it's perhaps possible to reduce
11 * this to 4 if it's deemed worth the effort. The slob heap is a
12 * singly-linked list of pages from __get_free_page, grown on demand
13 * and allocation from the heap is currently first-fit.
14 *
15 * Above this is an implementation of kmalloc/kfree. Blocks returned
16 * from kmalloc are 8-byte aligned and prepended with a 8-byte header.
17 * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls
18 * __get_free_pages directly so that it can return page-aligned blocks
19 * and keeps a linked list of such pages and their orders. These
20 * objects are detected in kfree() by their page alignment.
21 *
22 * SLAB is emulated on top of SLOB by simply calling constructors and
23 * destructors for every SLAB allocation. Objects are returned with
24 * the 8-byte alignment unless the SLAB_MUST_HWCACHE_ALIGN flag is
25 * set, in which case the low-level allocator will fragment blocks to
26 * create the proper alignment. Again, objects of page-size or greater
27 * are allocated by calling __get_free_pages. As SLAB objects know
28 * their size, no separate size bookkeeping is necessary and there is
29 * essentially no allocation space overhead.
30 */
31
32#include <linux/config.h>
33#include <linux/slab.h>
34#include <linux/mm.h>
35#include <linux/cache.h>
36#include <linux/init.h>
37#include <linux/module.h>
38#include <linux/timer.h>
39
40struct slob_block {
41 int units;
42 struct slob_block *next;
43};
44typedef struct slob_block slob_t;
45
46#define SLOB_UNIT sizeof(slob_t)
47#define SLOB_UNITS(size) (((size) + SLOB_UNIT - 1)/SLOB_UNIT)
48#define SLOB_ALIGN L1_CACHE_BYTES
49
50struct bigblock {
51 int order;
52 void *pages;
53 struct bigblock *next;
54};
55typedef struct bigblock bigblock_t;
56
57static slob_t arena = { .next = &arena, .units = 1 };
58static slob_t *slobfree = &arena;
59static bigblock_t *bigblocks;
60static DEFINE_SPINLOCK(slob_lock);
61static DEFINE_SPINLOCK(block_lock);
62
63static void slob_free(void *b, int size);
64
65static void *slob_alloc(size_t size, gfp_t gfp, int align)
66{
67 slob_t *prev, *cur, *aligned = 0;
68 int delta = 0, units = SLOB_UNITS(size);
69 unsigned long flags;
70
71 spin_lock_irqsave(&slob_lock, flags);
72 prev = slobfree;
73 for (cur = prev->next; ; prev = cur, cur = cur->next) {
74 if (align) {
75 aligned = (slob_t *)ALIGN((unsigned long)cur, align);
76 delta = aligned - cur;
77 }
78 if (cur->units >= units + delta) { /* room enough? */
79 if (delta) { /* need to fragment head to align? */
80 aligned->units = cur->units - delta;
81 aligned->next = cur->next;
82 cur->next = aligned;
83 cur->units = delta;
84 prev = cur;
85 cur = aligned;
86 }
87
88 if (cur->units == units) /* exact fit? */
89 prev->next = cur->next; /* unlink */
90 else { /* fragment */
91 prev->next = cur + units;
92 prev->next->units = cur->units - units;
93 prev->next->next = cur->next;
94 cur->units = units;
95 }
96
97 slobfree = prev;
98 spin_unlock_irqrestore(&slob_lock, flags);
99 return cur;
100 }
101 if (cur == slobfree) {
102 spin_unlock_irqrestore(&slob_lock, flags);
103
104 if (size == PAGE_SIZE) /* trying to shrink arena? */
105 return 0;
106
107 cur = (slob_t *)__get_free_page(gfp);
108 if (!cur)
109 return 0;
110
111 slob_free(cur, PAGE_SIZE);
112 spin_lock_irqsave(&slob_lock, flags);
113 cur = slobfree;
114 }
115 }
116}
117
118static void slob_free(void *block, int size)
119{
120 slob_t *cur, *b = (slob_t *)block;
121 unsigned long flags;
122
123 if (!block)
124 return;
125
126 if (size)
127 b->units = SLOB_UNITS(size);
128
129 /* Find reinsertion point */
130 spin_lock_irqsave(&slob_lock, flags);
131 for (cur = slobfree; !(b > cur && b < cur->next); cur = cur->next)
132 if (cur >= cur->next && (b > cur || b < cur->next))
133 break;
134
135 if (b + b->units == cur->next) {
136 b->units += cur->next->units;
137 b->next = cur->next->next;
138 } else
139 b->next = cur->next;
140
141 if (cur + cur->units == b) {
142 cur->units += b->units;
143 cur->next = b->next;
144 } else
145 cur->next = b;
146
147 slobfree = cur;
148
149 spin_unlock_irqrestore(&slob_lock, flags);
150}
151
152static int FASTCALL(find_order(int size));
153static int fastcall find_order(int size)
154{
155 int order = 0;
156 for ( ; size > 4096 ; size >>=1)
157 order++;
158 return order;
159}
160
161void *kmalloc(size_t size, gfp_t gfp)
162{
163 slob_t *m;
164 bigblock_t *bb;
165 unsigned long flags;
166
167 if (size < PAGE_SIZE - SLOB_UNIT) {
168 m = slob_alloc(size + SLOB_UNIT, gfp, 0);
169 return m ? (void *)(m + 1) : 0;
170 }
171
172 bb = slob_alloc(sizeof(bigblock_t), gfp, 0);
173 if (!bb)
174 return 0;
175
176 bb->order = find_order(size);
177 bb->pages = (void *)__get_free_pages(gfp, bb->order);
178
179 if (bb->pages) {
180 spin_lock_irqsave(&block_lock, flags);
181 bb->next = bigblocks;
182 bigblocks = bb;
183 spin_unlock_irqrestore(&block_lock, flags);
184 return bb->pages;
185 }
186
187 slob_free(bb, sizeof(bigblock_t));
188 return 0;
189}
190
191EXPORT_SYMBOL(kmalloc);
192
193void kfree(const void *block)
194{
195 bigblock_t *bb, **last = &bigblocks;
196 unsigned long flags;
197
198 if (!block)
199 return;
200
201 if (!((unsigned long)block & (PAGE_SIZE-1))) {
202 /* might be on the big block list */
203 spin_lock_irqsave(&block_lock, flags);
204 for (bb = bigblocks; bb; last = &bb->next, bb = bb->next) {
205 if (bb->pages == block) {
206 *last = bb->next;
207 spin_unlock_irqrestore(&block_lock, flags);
208 free_pages((unsigned long)block, bb->order);
209 slob_free(bb, sizeof(bigblock_t));
210 return;
211 }
212 }
213 spin_unlock_irqrestore(&block_lock, flags);
214 }
215
216 slob_free((slob_t *)block - 1, 0);
217 return;
218}
219
220EXPORT_SYMBOL(kfree);
221
222unsigned int ksize(const void *block)
223{
224 bigblock_t *bb;
225 unsigned long flags;
226
227 if (!block)
228 return 0;
229
230 if (!((unsigned long)block & (PAGE_SIZE-1))) {
231 spin_lock_irqsave(&block_lock, flags);
232 for (bb = bigblocks; bb; bb = bb->next)
233 if (bb->pages == block) {
234 spin_unlock_irqrestore(&slob_lock, flags);
235 return PAGE_SIZE << bb->order;
236 }
237 spin_unlock_irqrestore(&block_lock, flags);
238 }
239
240 return ((slob_t *)block - 1)->units * SLOB_UNIT;
241}
242
243struct kmem_cache {
244 unsigned int size, align;
245 const char *name;
246 void (*ctor)(void *, struct kmem_cache *, unsigned long);
247 void (*dtor)(void *, struct kmem_cache *, unsigned long);
248};
249
250struct kmem_cache *kmem_cache_create(const char *name, size_t size,
251 size_t align, unsigned long flags,
252 void (*ctor)(void*, struct kmem_cache *, unsigned long),
253 void (*dtor)(void*, struct kmem_cache *, unsigned long))
254{
255 struct kmem_cache *c;
256
257 c = slob_alloc(sizeof(struct kmem_cache), flags, 0);
258
259 if (c) {
260 c->name = name;
261 c->size = size;
262 c->ctor = ctor;
263 c->dtor = dtor;
264 /* ignore alignment unless it's forced */
265 c->align = (flags & SLAB_MUST_HWCACHE_ALIGN) ? SLOB_ALIGN : 0;
266 if (c->align < align)
267 c->align = align;
268 }
269
270 return c;
271}
272EXPORT_SYMBOL(kmem_cache_create);
273
274int kmem_cache_destroy(struct kmem_cache *c)
275{
276 slob_free(c, sizeof(struct kmem_cache));
277 return 0;
278}
279EXPORT_SYMBOL(kmem_cache_destroy);
280
281void *kmem_cache_alloc(struct kmem_cache *c, gfp_t flags)
282{
283 void *b;
284
285 if (c->size < PAGE_SIZE)
286 b = slob_alloc(c->size, flags, c->align);
287 else
288 b = (void *)__get_free_pages(flags, find_order(c->size));
289
290 if (c->ctor)
291 c->ctor(b, c, SLAB_CTOR_CONSTRUCTOR);
292
293 return b;
294}
295EXPORT_SYMBOL(kmem_cache_alloc);
296
297void kmem_cache_free(struct kmem_cache *c, void *b)
298{
299 if (c->dtor)
300 c->dtor(b, c, 0);
301
302 if (c->size < PAGE_SIZE)
303 slob_free(b, c->size);
304 else
305 free_pages((unsigned long)b, find_order(c->size));
306}
307EXPORT_SYMBOL(kmem_cache_free);
308
309unsigned int kmem_cache_size(struct kmem_cache *c)
310{
311 return c->size;
312}
313EXPORT_SYMBOL(kmem_cache_size);
314
315const char *kmem_cache_name(struct kmem_cache *c)
316{
317 return c->name;
318}
319EXPORT_SYMBOL(kmem_cache_name);
320
321static struct timer_list slob_timer = TIMER_INITIALIZER(
322 (void (*)(unsigned long))kmem_cache_init, 0, 0);
323
324void kmem_cache_init(void)
325{
326 void *p = slob_alloc(PAGE_SIZE, 0, PAGE_SIZE-1);
327
328 if (p)
329 free_page((unsigned long)p);
330
331 mod_timer(&slob_timer, jiffies + HZ);
332}
333
334atomic_t slab_reclaim_pages = ATOMIC_INIT(0);
335EXPORT_SYMBOL(slab_reclaim_pages);
336
337#ifdef CONFIG_SMP
338
339void *__alloc_percpu(size_t size, size_t align)
340{
341 int i;
342 struct percpu_data *pdata = kmalloc(sizeof (*pdata), GFP_KERNEL);
343
344 if (!pdata)
345 return NULL;
346
347 for (i = 0; i < NR_CPUS; i++) {
348 if (!cpu_possible(i))
349 continue;
350 pdata->ptrs[i] = kmalloc(size, GFP_KERNEL);
351 if (!pdata->ptrs[i])
352 goto unwind_oom;
353 memset(pdata->ptrs[i], 0, size);
354 }
355
356 /* Catch derefs w/o wrappers */
357 return (void *) (~(unsigned long) pdata);
358
359unwind_oom:
360 while (--i >= 0) {
361 if (!cpu_possible(i))
362 continue;
363 kfree(pdata->ptrs[i]);
364 }
365 kfree(pdata);
366 return NULL;
367}
368EXPORT_SYMBOL(__alloc_percpu);
369
370void
371free_percpu(const void *objp)
372{
373 int i;
374 struct percpu_data *p = (struct percpu_data *) (~(unsigned long) objp);
375
376 for (i = 0; i < NR_CPUS; i++) {
377 if (!cpu_possible(i))
378 continue;
379 kfree(p->ptrs[i]);
380 }
381 kfree(p);
382}
383EXPORT_SYMBOL(free_percpu);
384
385#endif
diff --git a/mm/sparse.c b/mm/sparse.c
index 72079b538e2d..0a51f36ba3a1 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -18,10 +18,10 @@
18 */ 18 */
19#ifdef CONFIG_SPARSEMEM_EXTREME 19#ifdef CONFIG_SPARSEMEM_EXTREME
20struct mem_section *mem_section[NR_SECTION_ROOTS] 20struct mem_section *mem_section[NR_SECTION_ROOTS]
21 ____cacheline_maxaligned_in_smp; 21 ____cacheline_internodealigned_in_smp;
22#else 22#else
23struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT] 23struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
24 ____cacheline_maxaligned_in_smp; 24 ____cacheline_internodealigned_in_smp;
25#endif 25#endif
26EXPORT_SYMBOL(mem_section); 26EXPORT_SYMBOL(mem_section);
27 27
diff --git a/mm/swap_state.c b/mm/swap_state.c
index fc2aecb70a95..7b09ac503fec 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -141,7 +141,7 @@ void __delete_from_swap_cache(struct page *page)
141 * Allocate swap space for the page and add the page to the 141 * Allocate swap space for the page and add the page to the
142 * swap cache. Caller needs to hold the page lock. 142 * swap cache. Caller needs to hold the page lock.
143 */ 143 */
144int add_to_swap(struct page * page) 144int add_to_swap(struct page * page, gfp_t gfp_mask)
145{ 145{
146 swp_entry_t entry; 146 swp_entry_t entry;
147 int err; 147 int err;
@@ -166,7 +166,7 @@ int add_to_swap(struct page * page)
166 * Add it to the swap cache and mark it dirty 166 * Add it to the swap cache and mark it dirty
167 */ 167 */
168 err = __add_to_swap_cache(page, entry, 168 err = __add_to_swap_cache(page, entry,
169 GFP_ATOMIC|__GFP_NOMEMALLOC|__GFP_NOWARN); 169 gfp_mask|__GFP_NOMEMALLOC|__GFP_NOWARN);
170 170
171 switch (err) { 171 switch (err) {
172 case 0: /* Success */ 172 case 0: /* Success */
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 6da4b28b896b..80f948a2028b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1493,7 +1493,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
1493 goto bad_swap; 1493 goto bad_swap;
1494 if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) 1494 if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
1495 goto bad_swap; 1495 goto bad_swap;
1496 1496
1497 /* OK, set up the swap map and apply the bad block list */ 1497 /* OK, set up the swap map and apply the bad block list */
1498 if (!(p->swap_map = vmalloc(maxpages * sizeof(short)))) { 1498 if (!(p->swap_map = vmalloc(maxpages * sizeof(short)))) {
1499 error = -ENOMEM; 1499 error = -ENOMEM;
@@ -1502,17 +1502,17 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
1502 1502
1503 error = 0; 1503 error = 0;
1504 memset(p->swap_map, 0, maxpages * sizeof(short)); 1504 memset(p->swap_map, 0, maxpages * sizeof(short));
1505 for (i=0; i<swap_header->info.nr_badpages; i++) { 1505 for (i = 0; i < swap_header->info.nr_badpages; i++) {
1506 int page = swap_header->info.badpages[i]; 1506 int page_nr = swap_header->info.badpages[i];
1507 if (page <= 0 || page >= swap_header->info.last_page) 1507 if (page_nr <= 0 || page_nr >= swap_header->info.last_page)
1508 error = -EINVAL; 1508 error = -EINVAL;
1509 else 1509 else
1510 p->swap_map[page] = SWAP_MAP_BAD; 1510 p->swap_map[page_nr] = SWAP_MAP_BAD;
1511 } 1511 }
1512 nr_good_pages = swap_header->info.last_page - 1512 nr_good_pages = swap_header->info.last_page -
1513 swap_header->info.nr_badpages - 1513 swap_header->info.nr_badpages -
1514 1 /* header page */; 1514 1 /* header page */;
1515 if (error) 1515 if (error)
1516 goto bad_swap; 1516 goto bad_swap;
1517 } 1517 }
1518 1518
diff --git a/mm/truncate.c b/mm/truncate.c
index 7dee32745901..b1a463d0fe71 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -249,7 +249,6 @@ unlock:
249 break; 249 break;
250 } 250 }
251 pagevec_release(&pvec); 251 pagevec_release(&pvec);
252 cond_resched();
253 } 252 }
254 return ret; 253 return ret;
255} 254}
diff --git a/mm/util.c b/mm/util.c
new file mode 100644
index 000000000000..5f4bb59da63c
--- /dev/null
+++ b/mm/util.c
@@ -0,0 +1,39 @@
1#include <linux/slab.h>
2#include <linux/string.h>
3#include <linux/module.h>
4
5/**
6 * kzalloc - allocate memory. The memory is set to zero.
7 * @size: how many bytes of memory are required.
8 * @flags: the type of memory to allocate.
9 */
10void *kzalloc(size_t size, gfp_t flags)
11{
12 void *ret = kmalloc(size, flags);
13 if (ret)
14 memset(ret, 0, size);
15 return ret;
16}
17EXPORT_SYMBOL(kzalloc);
18
19/*
20 * kstrdup - allocate space for and copy an existing string
21 *
22 * @s: the string to duplicate
23 * @gfp: the GFP mask used in the kmalloc() call when allocating memory
24 */
25char *kstrdup(const char *s, gfp_t gfp)
26{
27 size_t len;
28 char *buf;
29
30 if (!s)
31 return NULL;
32
33 len = strlen(s) + 1;
34 buf = kmalloc(len, gfp);
35 if (buf)
36 memcpy(buf, s, len);
37 return buf;
38}
39EXPORT_SYMBOL(kstrdup);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index be8235fb1939..bf903b2d198f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -180,8 +180,7 @@ EXPORT_SYMBOL(remove_shrinker);
180 * 180 *
181 * Returns the number of slab objects which we shrunk. 181 * Returns the number of slab objects which we shrunk.
182 */ 182 */
183static int shrink_slab(unsigned long scanned, gfp_t gfp_mask, 183int shrink_slab(unsigned long scanned, gfp_t gfp_mask, unsigned long lru_pages)
184 unsigned long lru_pages)
185{ 184{
186 struct shrinker *shrinker; 185 struct shrinker *shrinker;
187 int ret = 0; 186 int ret = 0;
@@ -269,9 +268,7 @@ static inline int is_page_cache_freeable(struct page *page)
269 268
270static int may_write_to_queue(struct backing_dev_info *bdi) 269static int may_write_to_queue(struct backing_dev_info *bdi)
271{ 270{
272 if (current_is_kswapd()) 271 if (current->flags & PF_SWAPWRITE)
273 return 1;
274 if (current_is_pdflush()) /* This is unlikely, but why not... */
275 return 1; 272 return 1;
276 if (!bdi_write_congested(bdi)) 273 if (!bdi_write_congested(bdi))
277 return 1; 274 return 1;
@@ -376,6 +373,43 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
376 return PAGE_CLEAN; 373 return PAGE_CLEAN;
377} 374}
378 375
376static int remove_mapping(struct address_space *mapping, struct page *page)
377{
378 if (!mapping)
379 return 0; /* truncate got there first */
380
381 write_lock_irq(&mapping->tree_lock);
382
383 /*
384 * The non-racy check for busy page. It is critical to check
385 * PageDirty _after_ making sure that the page is freeable and
386 * not in use by anybody. (pagecache + us == 2)
387 */
388 if (unlikely(page_count(page) != 2))
389 goto cannot_free;
390 smp_rmb();
391 if (unlikely(PageDirty(page)))
392 goto cannot_free;
393
394 if (PageSwapCache(page)) {
395 swp_entry_t swap = { .val = page_private(page) };
396 __delete_from_swap_cache(page);
397 write_unlock_irq(&mapping->tree_lock);
398 swap_free(swap);
399 __put_page(page); /* The pagecache ref */
400 return 1;
401 }
402
403 __remove_from_page_cache(page);
404 write_unlock_irq(&mapping->tree_lock);
405 __put_page(page);
406 return 1;
407
408cannot_free:
409 write_unlock_irq(&mapping->tree_lock);
410 return 0;
411}
412
379/* 413/*
380 * shrink_list adds the number of reclaimed pages to sc->nr_reclaimed 414 * shrink_list adds the number of reclaimed pages to sc->nr_reclaimed
381 */ 415 */
@@ -424,7 +458,7 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
424 * Try to allocate it some swap space here. 458 * Try to allocate it some swap space here.
425 */ 459 */
426 if (PageAnon(page) && !PageSwapCache(page)) { 460 if (PageAnon(page) && !PageSwapCache(page)) {
427 if (!add_to_swap(page)) 461 if (!add_to_swap(page, GFP_ATOMIC))
428 goto activate_locked; 462 goto activate_locked;
429 } 463 }
430#endif /* CONFIG_SWAP */ 464#endif /* CONFIG_SWAP */
@@ -507,36 +541,8 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
507 goto free_it; 541 goto free_it;
508 } 542 }
509 543
510 if (!mapping) 544 if (!remove_mapping(mapping, page))
511 goto keep_locked; /* truncate got there first */ 545 goto keep_locked;
512
513 write_lock_irq(&mapping->tree_lock);
514
515 /*
516 * The non-racy check for busy page. It is critical to check
517 * PageDirty _after_ making sure that the page is freeable and
518 * not in use by anybody. (pagecache + us == 2)
519 */
520 if (unlikely(page_count(page) != 2))
521 goto cannot_free;
522 smp_rmb();
523 if (unlikely(PageDirty(page)))
524 goto cannot_free;
525
526#ifdef CONFIG_SWAP
527 if (PageSwapCache(page)) {
528 swp_entry_t swap = { .val = page_private(page) };
529 __delete_from_swap_cache(page);
530 write_unlock_irq(&mapping->tree_lock);
531 swap_free(swap);
532 __put_page(page); /* The pagecache ref */
533 goto free_it;
534 }
535#endif /* CONFIG_SWAP */
536
537 __remove_from_page_cache(page);
538 write_unlock_irq(&mapping->tree_lock);
539 __put_page(page);
540 546
541free_it: 547free_it:
542 unlock_page(page); 548 unlock_page(page);
@@ -545,10 +551,6 @@ free_it:
545 __pagevec_release_nonlru(&freed_pvec); 551 __pagevec_release_nonlru(&freed_pvec);
546 continue; 552 continue;
547 553
548cannot_free:
549 write_unlock_irq(&mapping->tree_lock);
550 goto keep_locked;
551
552activate_locked: 554activate_locked:
553 SetPageActive(page); 555 SetPageActive(page);
554 pgactivate++; 556 pgactivate++;
@@ -566,6 +568,241 @@ keep:
566 return reclaimed; 568 return reclaimed;
567} 569}
568 570
571#ifdef CONFIG_MIGRATION
572static inline void move_to_lru(struct page *page)
573{
574 list_del(&page->lru);
575 if (PageActive(page)) {
576 /*
577 * lru_cache_add_active checks that
578 * the PG_active bit is off.
579 */
580 ClearPageActive(page);
581 lru_cache_add_active(page);
582 } else {
583 lru_cache_add(page);
584 }
585 put_page(page);
586}
587
588/*
589 * Add isolated pages on the list back to the LRU
590 *
591 * returns the number of pages put back.
592 */
593int putback_lru_pages(struct list_head *l)
594{
595 struct page *page;
596 struct page *page2;
597 int count = 0;
598
599 list_for_each_entry_safe(page, page2, l, lru) {
600 move_to_lru(page);
601 count++;
602 }
603 return count;
604}
605
606/*
607 * swapout a single page
608 * page is locked upon entry, unlocked on exit
609 */
610static int swap_page(struct page *page)
611{
612 struct address_space *mapping = page_mapping(page);
613
614 if (page_mapped(page) && mapping)
615 if (try_to_unmap(page) != SWAP_SUCCESS)
616 goto unlock_retry;
617
618 if (PageDirty(page)) {
619 /* Page is dirty, try to write it out here */
620 switch(pageout(page, mapping)) {
621 case PAGE_KEEP:
622 case PAGE_ACTIVATE:
623 goto unlock_retry;
624
625 case PAGE_SUCCESS:
626 goto retry;
627
628 case PAGE_CLEAN:
629 ; /* try to free the page below */
630 }
631 }
632
633 if (PagePrivate(page)) {
634 if (!try_to_release_page(page, GFP_KERNEL) ||
635 (!mapping && page_count(page) == 1))
636 goto unlock_retry;
637 }
638
639 if (remove_mapping(mapping, page)) {
640 /* Success */
641 unlock_page(page);
642 return 0;
643 }
644
645unlock_retry:
646 unlock_page(page);
647
648retry:
649 return -EAGAIN;
650}
651/*
652 * migrate_pages
653 *
654 * Two lists are passed to this function. The first list
655 * contains the pages isolated from the LRU to be migrated.
656 * The second list contains new pages that the pages isolated
657 * can be moved to. If the second list is NULL then all
658 * pages are swapped out.
659 *
660 * The function returns after 10 attempts or if no pages
661 * are movable anymore because t has become empty
662 * or no retryable pages exist anymore.
663 *
664 * SIMPLIFIED VERSION: This implementation of migrate_pages
665 * is only swapping out pages and never touches the second
666 * list. The direct migration patchset
667 * extends this function to avoid the use of swap.
668 *
669 * Return: Number of pages not migrated when "to" ran empty.
670 */
671int migrate_pages(struct list_head *from, struct list_head *to,
672 struct list_head *moved, struct list_head *failed)
673{
674 int retry;
675 int nr_failed = 0;
676 int pass = 0;
677 struct page *page;
678 struct page *page2;
679 int swapwrite = current->flags & PF_SWAPWRITE;
680 int rc;
681
682 if (!swapwrite)
683 current->flags |= PF_SWAPWRITE;
684
685redo:
686 retry = 0;
687
688 list_for_each_entry_safe(page, page2, from, lru) {
689 cond_resched();
690
691 rc = 0;
692 if (page_count(page) == 1)
693 /* page was freed from under us. So we are done. */
694 goto next;
695
696 /*
697 * Skip locked pages during the first two passes to give the
698 * functions holding the lock time to release the page. Later we
699 * use lock_page() to have a higher chance of acquiring the
700 * lock.
701 */
702 rc = -EAGAIN;
703 if (pass > 2)
704 lock_page(page);
705 else
706 if (TestSetPageLocked(page))
707 goto next;
708
709 /*
710 * Only wait on writeback if we have already done a pass where
711 * we we may have triggered writeouts for lots of pages.
712 */
713 if (pass > 0) {
714 wait_on_page_writeback(page);
715 } else {
716 if (PageWriteback(page))
717 goto unlock_page;
718 }
719
720 /*
721 * Anonymous pages must have swap cache references otherwise
722 * the information contained in the page maps cannot be
723 * preserved.
724 */
725 if (PageAnon(page) && !PageSwapCache(page)) {
726 if (!add_to_swap(page, GFP_KERNEL)) {
727 rc = -ENOMEM;
728 goto unlock_page;
729 }
730 }
731
732 /*
733 * Page is properly locked and writeback is complete.
734 * Try to migrate the page.
735 */
736 rc = swap_page(page);
737 goto next;
738
739unlock_page:
740 unlock_page(page);
741
742next:
743 if (rc == -EAGAIN) {
744 retry++;
745 } else if (rc) {
746 /* Permanent failure */
747 list_move(&page->lru, failed);
748 nr_failed++;
749 } else {
750 /* Success */
751 list_move(&page->lru, moved);
752 }
753 }
754 if (retry && pass++ < 10)
755 goto redo;
756
757 if (!swapwrite)
758 current->flags &= ~PF_SWAPWRITE;
759
760 return nr_failed + retry;
761}
762
763static void lru_add_drain_per_cpu(void *dummy)
764{
765 lru_add_drain();
766}
767
768/*
769 * Isolate one page from the LRU lists and put it on the
770 * indicated list. Do necessary cache draining if the
771 * page is not on the LRU lists yet.
772 *
773 * Result:
774 * 0 = page not on LRU list
775 * 1 = page removed from LRU list and added to the specified list.
776 * -ENOENT = page is being freed elsewhere.
777 */
778int isolate_lru_page(struct page *page)
779{
780 int rc = 0;
781 struct zone *zone = page_zone(page);
782
783redo:
784 spin_lock_irq(&zone->lru_lock);
785 rc = __isolate_lru_page(page);
786 if (rc == 1) {
787 if (PageActive(page))
788 del_page_from_active_list(zone, page);
789 else
790 del_page_from_inactive_list(zone, page);
791 }
792 spin_unlock_irq(&zone->lru_lock);
793 if (rc == 0) {
794 /*
795 * Maybe this page is still waiting for a cpu to drain it
796 * from one of the lru lists?
797 */
798 rc = schedule_on_each_cpu(lru_add_drain_per_cpu, NULL);
799 if (rc == 0 && PageLRU(page))
800 goto redo;
801 }
802 return rc;
803}
804#endif
805
569/* 806/*
570 * zone->lru_lock is heavily contended. Some of the functions that 807 * zone->lru_lock is heavily contended. Some of the functions that
571 * shrink the lists perform better by taking out a batch of pages 808 * shrink the lists perform better by taking out a batch of pages
@@ -594,20 +831,18 @@ static int isolate_lru_pages(int nr_to_scan, struct list_head *src,
594 page = lru_to_page(src); 831 page = lru_to_page(src);
595 prefetchw_prev_lru_page(page, src, flags); 832 prefetchw_prev_lru_page(page, src, flags);
596 833
597 if (!TestClearPageLRU(page)) 834 switch (__isolate_lru_page(page)) {
598 BUG(); 835 case 1:
599 list_del(&page->lru); 836 /* Succeeded to isolate page */
600 if (get_page_testone(page)) { 837 list_move(&page->lru, dst);
601 /*
602 * It is being freed elsewhere
603 */
604 __put_page(page);
605 SetPageLRU(page);
606 list_add(&page->lru, src);
607 continue;
608 } else {
609 list_add(&page->lru, dst);
610 nr_taken++; 838 nr_taken++;
839 break;
840 case -ENOENT:
841 /* Not possible to isolate */
842 list_move(&page->lru, src);
843 break;
844 default:
845 BUG();
611 } 846 }
612 } 847 }
613 848
@@ -1226,7 +1461,7 @@ static int kswapd(void *p)
1226 * us from recursively trying to free more memory as we're 1461 * us from recursively trying to free more memory as we're
1227 * trying to free the first piece of memory in the first place). 1462 * trying to free the first piece of memory in the first place).
1228 */ 1463 */
1229 tsk->flags |= PF_MEMALLOC|PF_KSWAPD; 1464 tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
1230 1465
1231 order = 0; 1466 order = 0;
1232 for ( ; ; ) { 1467 for ( ; ; ) {
diff --git a/net/802/Makefile b/net/802/Makefile
index 01861929591a..977704a54f68 100644
--- a/net/802/Makefile
+++ b/net/802/Makefile
@@ -2,8 +2,6 @@
2# Makefile for the Linux 802.x protocol layers. 2# Makefile for the Linux 802.x protocol layers.
3# 3#
4 4
5obj-y := p8023.o
6
7# Check the p8022 selections against net/core/Makefile. 5# Check the p8022 selections against net/core/Makefile.
8obj-$(CONFIG_SYSCTL) += sysctl_net_802.o 6obj-$(CONFIG_SYSCTL) += sysctl_net_802.o
9obj-$(CONFIG_LLC) += p8022.o psnap.o 7obj-$(CONFIG_LLC) += p8022.o psnap.o
@@ -11,5 +9,5 @@ obj-$(CONFIG_TR) += p8022.o psnap.o tr.o sysctl_net_802.o
11obj-$(CONFIG_NET_FC) += fc.o 9obj-$(CONFIG_NET_FC) += fc.o
12obj-$(CONFIG_FDDI) += fddi.o 10obj-$(CONFIG_FDDI) += fddi.o
13obj-$(CONFIG_HIPPI) += hippi.o 11obj-$(CONFIG_HIPPI) += hippi.o
14obj-$(CONFIG_IPX) += p8022.o psnap.o 12obj-$(CONFIG_IPX) += p8022.o psnap.o p8023.o
15obj-$(CONFIG_ATALK) += p8022.o psnap.o 13obj-$(CONFIG_ATALK) += p8022.o psnap.o
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 3f244670764a..00f983226672 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -986,6 +986,7 @@ int dccp_v4_rcv(struct sk_buff *skb)
986 986
987 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 987 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
988 goto discard_and_relse; 988 goto discard_and_relse;
989 nf_reset(skb);
989 990
990 return sk_receive_skb(sk, skb); 991 return sk_receive_skb(sk, skb);
991 992
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index c609dc78f487..df074259f9c3 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -27,6 +27,7 @@
27#include <net/ipv6.h> 27#include <net/ipv6.h>
28#include <net/protocol.h> 28#include <net/protocol.h>
29#include <net/transp_v6.h> 29#include <net/transp_v6.h>
30#include <net/ip6_checksum.h>
30#include <net/xfrm.h> 31#include <net/xfrm.h>
31 32
32#include "dccp.h" 33#include "dccp.h"
@@ -1028,7 +1029,7 @@ discard:
1028 return 0; 1029 return 0;
1029} 1030}
1030 1031
1031static int dccp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) 1032static int dccp_v6_rcv(struct sk_buff **pskb)
1032{ 1033{
1033 const struct dccp_hdr *dh; 1034 const struct dccp_hdr *dh;
1034 struct sk_buff *skb = *pskb; 1035 struct sk_buff *skb = *pskb;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 912c42f57c79..de16e944777f 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -832,6 +832,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
832 skb->h.raw = skb->nh.raw; 832 skb->h.raw = skb->nh.raw;
833 skb->nh.raw = skb_push(skb, gre_hlen); 833 skb->nh.raw = skb_push(skb, gre_hlen);
834 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 834 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
835 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED);
835 dst_release(skb->dst); 836 dst_release(skb->dst);
836 skb->dst = &rt->u.dst; 837 skb->dst = &rt->u.dst;
837 838
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index e45846ae570b..18d7fad474d7 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -185,7 +185,6 @@ int ip_call_ra_chain(struct sk_buff *skb)
185 raw_rcv(last, skb2); 185 raw_rcv(last, skb2);
186 } 186 }
187 last = sk; 187 last = sk;
188 nf_reset(skb);
189 } 188 }
190 } 189 }
191 190
@@ -204,10 +203,6 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
204 203
205 __skb_pull(skb, ihl); 204 __skb_pull(skb, ihl);
206 205
207 /* Free reference early: we don't need it any more, and it may
208 hold ip_conntrack module loaded indefinitely. */
209 nf_reset(skb);
210
211 /* Point into the IP datagram, just past the header. */ 206 /* Point into the IP datagram, just past the header. */
212 skb->h.raw = skb->data; 207 skb->h.raw = skb->data;
213 208
@@ -232,10 +227,12 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
232 if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) { 227 if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) {
233 int ret; 228 int ret;
234 229
235 if (!ipprot->no_policy && 230 if (!ipprot->no_policy) {
236 !xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 231 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
237 kfree_skb(skb); 232 kfree_skb(skb);
238 goto out; 233 goto out;
234 }
235 nf_reset(skb);
239 } 236 }
240 ret = ipprot->handler(skb); 237 ret = ipprot->handler(skb);
241 if (ret < 0) { 238 if (ret < 0) {
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 8b1c9bd0091e..c2169b47ddfd 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -85,6 +85,8 @@
85 85
86int sysctl_ip_default_ttl = IPDEFTTL; 86int sysctl_ip_default_ttl = IPDEFTTL;
87 87
88static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*));
89
88/* Generate a checksum for an outgoing IP datagram. */ 90/* Generate a checksum for an outgoing IP datagram. */
89__inline__ void ip_send_check(struct iphdr *iph) 91__inline__ void ip_send_check(struct iphdr *iph)
90{ 92{
@@ -202,6 +204,11 @@ static inline int ip_finish_output2(struct sk_buff *skb)
202 204
203static inline int ip_finish_output(struct sk_buff *skb) 205static inline int ip_finish_output(struct sk_buff *skb)
204{ 206{
207#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
208 /* Policy lookup after SNAT yielded a new policy */
209 if (skb->dst->xfrm != NULL)
210 return xfrm4_output_finish(skb);
211#endif
205 if (skb->len > dst_mtu(skb->dst) && 212 if (skb->len > dst_mtu(skb->dst) &&
206 !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size)) 213 !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
207 return ip_fragment(skb, ip_finish_output2); 214 return ip_fragment(skb, ip_finish_output2);
@@ -409,7 +416,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
409 * single device frame, and queue such a frame for sending. 416 * single device frame, and queue such a frame for sending.
410 */ 417 */
411 418
412int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) 419static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
413{ 420{
414 struct iphdr *iph; 421 struct iphdr *iph;
415 int raw = 0; 422 int raw = 0;
@@ -1391,7 +1398,6 @@ void __init ip_init(void)
1391#endif 1398#endif
1392} 1399}
1393 1400
1394EXPORT_SYMBOL(ip_fragment);
1395EXPORT_SYMBOL(ip_generic_getfrag); 1401EXPORT_SYMBOL(ip_generic_getfrag);
1396EXPORT_SYMBOL(ip_queue_xmit); 1402EXPORT_SYMBOL(ip_queue_xmit);
1397EXPORT_SYMBOL(ip_send_check); 1403EXPORT_SYMBOL(ip_send_check);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 35571cff81c6..bbd85f5ec985 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -621,6 +621,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
621 skb->h.raw = skb->nh.raw; 621 skb->h.raw = skb->nh.raw;
622 skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); 622 skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
623 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 623 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
624 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED);
624 dst_release(skb->dst); 625 dst_release(skb->dst);
625 skb->dst = &rt->u.dst; 626 skb->dst = &rt->u.dst;
626 627
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index ae0779d82c5d..3321092b0914 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -7,11 +7,13 @@
7#include <linux/netfilter.h> 7#include <linux/netfilter.h>
8#include <linux/netfilter_ipv4.h> 8#include <linux/netfilter_ipv4.h>
9 9
10#include <linux/ip.h>
10#include <linux/tcp.h> 11#include <linux/tcp.h>
11#include <linux/udp.h> 12#include <linux/udp.h>
12#include <linux/icmp.h> 13#include <linux/icmp.h>
13#include <net/route.h> 14#include <net/route.h>
14#include <linux/ip.h> 15#include <net/xfrm.h>
16#include <net/ip.h>
15 17
16/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ 18/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
17int ip_route_me_harder(struct sk_buff **pskb) 19int ip_route_me_harder(struct sk_buff **pskb)
@@ -33,7 +35,6 @@ int ip_route_me_harder(struct sk_buff **pskb)
33#ifdef CONFIG_IP_ROUTE_FWMARK 35#ifdef CONFIG_IP_ROUTE_FWMARK
34 fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark; 36 fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
35#endif 37#endif
36 fl.proto = iph->protocol;
37 if (ip_route_output_key(&rt, &fl) != 0) 38 if (ip_route_output_key(&rt, &fl) != 0)
38 return -1; 39 return -1;
39 40
@@ -60,6 +61,13 @@ int ip_route_me_harder(struct sk_buff **pskb)
60 if ((*pskb)->dst->error) 61 if ((*pskb)->dst->error)
61 return -1; 62 return -1;
62 63
64#ifdef CONFIG_XFRM
65 if (!(IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) &&
66 xfrm_decode_session(*pskb, &fl, AF_INET) == 0)
67 if (xfrm_lookup(&(*pskb)->dst, &fl, (*pskb)->sk, 0))
68 return -1;
69#endif
70
63 /* Change in oif may mean change in hh_len. */ 71 /* Change in oif may mean change in hh_len. */
64 hh_len = (*pskb)->dst->dev->hard_header_len; 72 hh_len = (*pskb)->dst->dev->hard_header_len;
65 if (skb_headroom(*pskb) < hh_len) { 73 if (skb_headroom(*pskb) < hh_len) {
@@ -78,6 +86,9 @@ int ip_route_me_harder(struct sk_buff **pskb)
78} 86}
79EXPORT_SYMBOL(ip_route_me_harder); 87EXPORT_SYMBOL(ip_route_me_harder);
80 88
89void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *);
90EXPORT_SYMBOL(ip_nat_decode_session);
91
81/* 92/*
82 * Extra routing may needed on local out, as the QUEUE target never 93 * Extra routing may needed on local out, as the QUEUE target never
83 * returns control to the table. 94 * returns control to the table.
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 88a60650e6b8..a9893ec03e02 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -487,6 +487,16 @@ config IP_NF_MATCH_STRING
487 487
488 To compile it as a module, choose M here. If unsure, say N. 488 To compile it as a module, choose M here. If unsure, say N.
489 489
490config IP_NF_MATCH_POLICY
491 tristate "IPsec policy match support"
492 depends on IP_NF_IPTABLES && XFRM
493 help
494 Policy matching allows you to match packets based on the
495 IPsec policy that was used during decapsulation/will
496 be used during encapsulation.
497
498 To compile it as a module, choose M here. If unsure, say N.
499
490# `filter', generic and specific targets 500# `filter', generic and specific targets
491config IP_NF_FILTER 501config IP_NF_FILTER
492 tristate "Packet filtering" 502 tristate "Packet filtering"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index d0a447e520a2..549b01a648b3 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -72,6 +72,7 @@ obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o
72obj-$(CONFIG_IP_NF_MATCH_REALM) += ipt_realm.o 72obj-$(CONFIG_IP_NF_MATCH_REALM) += ipt_realm.o
73obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o 73obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
74obj-$(CONFIG_IP_NF_MATCH_PHYSDEV) += ipt_physdev.o 74obj-$(CONFIG_IP_NF_MATCH_PHYSDEV) += ipt_physdev.o
75obj-$(CONFIG_IP_NF_MATCH_POLICY) += ipt_policy.o
75obj-$(CONFIG_IP_NF_MATCH_COMMENT) += ipt_comment.o 76obj-$(CONFIG_IP_NF_MATCH_COMMENT) += ipt_comment.o
76obj-$(CONFIG_IP_NF_MATCH_STRING) += ipt_string.o 77obj-$(CONFIG_IP_NF_MATCH_STRING) += ipt_string.o
77 78
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
index 977fb59d4563..0b25050981a1 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -16,6 +16,7 @@
16#include <linux/types.h> 16#include <linux/types.h>
17#include <linux/sched.h> 17#include <linux/sched.h>
18#include <linux/timer.h> 18#include <linux/timer.h>
19#include <linux/interrupt.h>
19#include <linux/netfilter.h> 20#include <linux/netfilter.h>
20#include <linux/module.h> 21#include <linux/module.h>
21#include <linux/in.h> 22#include <linux/in.h>
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index f04111f74e09..8b8a1f00bbf4 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -55,6 +55,44 @@
55 : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \ 55 : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \
56 : "*ERROR*"))) 56 : "*ERROR*")))
57 57
58#ifdef CONFIG_XFRM
59static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
60{
61 struct ip_conntrack *ct;
62 struct ip_conntrack_tuple *t;
63 enum ip_conntrack_info ctinfo;
64 enum ip_conntrack_dir dir;
65 unsigned long statusbit;
66
67 ct = ip_conntrack_get(skb, &ctinfo);
68 if (ct == NULL)
69 return;
70 dir = CTINFO2DIR(ctinfo);
71 t = &ct->tuplehash[dir].tuple;
72
73 if (dir == IP_CT_DIR_ORIGINAL)
74 statusbit = IPS_DST_NAT;
75 else
76 statusbit = IPS_SRC_NAT;
77
78 if (ct->status & statusbit) {
79 fl->fl4_dst = t->dst.ip;
80 if (t->dst.protonum == IPPROTO_TCP ||
81 t->dst.protonum == IPPROTO_UDP)
82 fl->fl_ip_dport = t->dst.u.tcp.port;
83 }
84
85 statusbit ^= IPS_NAT_MASK;
86
87 if (ct->status & statusbit) {
88 fl->fl4_src = t->src.ip;
89 if (t->dst.protonum == IPPROTO_TCP ||
90 t->dst.protonum == IPPROTO_UDP)
91 fl->fl_ip_sport = t->src.u.tcp.port;
92 }
93}
94#endif
95
58static unsigned int 96static unsigned int
59ip_nat_fn(unsigned int hooknum, 97ip_nat_fn(unsigned int hooknum,
60 struct sk_buff **pskb, 98 struct sk_buff **pskb,
@@ -162,18 +200,20 @@ ip_nat_in(unsigned int hooknum,
162 const struct net_device *out, 200 const struct net_device *out,
163 int (*okfn)(struct sk_buff *)) 201 int (*okfn)(struct sk_buff *))
164{ 202{
165 u_int32_t saddr, daddr; 203 struct ip_conntrack *ct;
204 enum ip_conntrack_info ctinfo;
166 unsigned int ret; 205 unsigned int ret;
167 206
168 saddr = (*pskb)->nh.iph->saddr;
169 daddr = (*pskb)->nh.iph->daddr;
170
171 ret = ip_nat_fn(hooknum, pskb, in, out, okfn); 207 ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
172 if (ret != NF_DROP && ret != NF_STOLEN 208 if (ret != NF_DROP && ret != NF_STOLEN
173 && ((*pskb)->nh.iph->saddr != saddr 209 && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
174 || (*pskb)->nh.iph->daddr != daddr)) { 210 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
175 dst_release((*pskb)->dst); 211
176 (*pskb)->dst = NULL; 212 if (ct->tuplehash[dir].tuple.src.ip !=
213 ct->tuplehash[!dir].tuple.dst.ip) {
214 dst_release((*pskb)->dst);
215 (*pskb)->dst = NULL;
216 }
177 } 217 }
178 return ret; 218 return ret;
179} 219}
@@ -185,12 +225,30 @@ ip_nat_out(unsigned int hooknum,
185 const struct net_device *out, 225 const struct net_device *out,
186 int (*okfn)(struct sk_buff *)) 226 int (*okfn)(struct sk_buff *))
187{ 227{
228 struct ip_conntrack *ct;
229 enum ip_conntrack_info ctinfo;
230 unsigned int ret;
231
188 /* root is playing with raw sockets. */ 232 /* root is playing with raw sockets. */
189 if ((*pskb)->len < sizeof(struct iphdr) 233 if ((*pskb)->len < sizeof(struct iphdr)
190 || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) 234 || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
191 return NF_ACCEPT; 235 return NF_ACCEPT;
192 236
193 return ip_nat_fn(hooknum, pskb, in, out, okfn); 237 ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
238 if (ret != NF_DROP && ret != NF_STOLEN
239 && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
240 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
241
242 if (ct->tuplehash[dir].tuple.src.ip !=
243 ct->tuplehash[!dir].tuple.dst.ip
244#ifdef CONFIG_XFRM
245 || ct->tuplehash[dir].tuple.src.u.all !=
246 ct->tuplehash[!dir].tuple.dst.u.all
247#endif
248 )
249 return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
250 }
251 return ret;
194} 252}
195 253
196static unsigned int 254static unsigned int
@@ -200,7 +258,8 @@ ip_nat_local_fn(unsigned int hooknum,
200 const struct net_device *out, 258 const struct net_device *out,
201 int (*okfn)(struct sk_buff *)) 259 int (*okfn)(struct sk_buff *))
202{ 260{
203 u_int32_t saddr, daddr; 261 struct ip_conntrack *ct;
262 enum ip_conntrack_info ctinfo;
204 unsigned int ret; 263 unsigned int ret;
205 264
206 /* root is playing with raw sockets. */ 265 /* root is playing with raw sockets. */
@@ -208,14 +267,20 @@ ip_nat_local_fn(unsigned int hooknum,
208 || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) 267 || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
209 return NF_ACCEPT; 268 return NF_ACCEPT;
210 269
211 saddr = (*pskb)->nh.iph->saddr;
212 daddr = (*pskb)->nh.iph->daddr;
213
214 ret = ip_nat_fn(hooknum, pskb, in, out, okfn); 270 ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
215 if (ret != NF_DROP && ret != NF_STOLEN 271 if (ret != NF_DROP && ret != NF_STOLEN
216 && ((*pskb)->nh.iph->saddr != saddr 272 && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
217 || (*pskb)->nh.iph->daddr != daddr)) 273 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
218 return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; 274
275 if (ct->tuplehash[dir].tuple.dst.ip !=
276 ct->tuplehash[!dir].tuple.src.ip
277#ifdef CONFIG_XFRM
278 || ct->tuplehash[dir].tuple.dst.u.all !=
279 ct->tuplehash[dir].tuple.src.u.all
280#endif
281 )
282 return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
283 }
219 return ret; 284 return ret;
220} 285}
221 286
@@ -303,10 +368,14 @@ static int init_or_cleanup(int init)
303 368
304 if (!init) goto cleanup; 369 if (!init) goto cleanup;
305 370
371#ifdef CONFIG_XFRM
372 BUG_ON(ip_nat_decode_session != NULL);
373 ip_nat_decode_session = nat_decode_session;
374#endif
306 ret = ip_nat_rule_init(); 375 ret = ip_nat_rule_init();
307 if (ret < 0) { 376 if (ret < 0) {
308 printk("ip_nat_init: can't setup rules.\n"); 377 printk("ip_nat_init: can't setup rules.\n");
309 goto cleanup_nothing; 378 goto cleanup_decode_session;
310 } 379 }
311 ret = nf_register_hook(&ip_nat_in_ops); 380 ret = nf_register_hook(&ip_nat_in_ops);
312 if (ret < 0) { 381 if (ret < 0) {
@@ -354,7 +423,11 @@ static int init_or_cleanup(int init)
354 nf_unregister_hook(&ip_nat_in_ops); 423 nf_unregister_hook(&ip_nat_in_ops);
355 cleanup_rule_init: 424 cleanup_rule_init:
356 ip_nat_rule_cleanup(); 425 ip_nat_rule_cleanup();
357 cleanup_nothing: 426 cleanup_decode_session:
427#ifdef CONFIG_XFRM
428 ip_nat_decode_session = NULL;
429 synchronize_net();
430#endif
358 return ret; 431 return ret;
359} 432}
360 433
diff --git a/net/ipv4/netfilter/ipt_policy.c b/net/ipv4/netfilter/ipt_policy.c
new file mode 100644
index 000000000000..709debcc69c9
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_policy.c
@@ -0,0 +1,170 @@
1/* IP tables module for matching IPsec policy
2 *
3 * Copyright (c) 2004,2005 Patrick McHardy, <kaber@trash.net>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/kernel.h>
11#include <linux/config.h>
12#include <linux/module.h>
13#include <linux/skbuff.h>
14#include <linux/init.h>
15#include <net/xfrm.h>
16
17#include <linux/netfilter_ipv4.h>
18#include <linux/netfilter_ipv4/ip_tables.h>
19#include <linux/netfilter_ipv4/ipt_policy.h>
20
21MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
22MODULE_DESCRIPTION("IPtables IPsec policy matching module");
23MODULE_LICENSE("GPL");
24
25
26static inline int
27match_xfrm_state(struct xfrm_state *x, const struct ipt_policy_elem *e)
28{
29#define MATCH(x,y) (!e->match.x || ((e->x == (y)) ^ e->invert.x))
30
31 return MATCH(saddr, x->props.saddr.a4 & e->smask) &&
32 MATCH(daddr, x->id.daddr.a4 & e->dmask) &&
33 MATCH(proto, x->id.proto) &&
34 MATCH(mode, x->props.mode) &&
35 MATCH(spi, x->id.spi) &&
36 MATCH(reqid, x->props.reqid);
37}
38
39static int
40match_policy_in(const struct sk_buff *skb, const struct ipt_policy_info *info)
41{
42 const struct ipt_policy_elem *e;
43 struct sec_path *sp = skb->sp;
44 int strict = info->flags & IPT_POLICY_MATCH_STRICT;
45 int i, pos;
46
47 if (sp == NULL)
48 return -1;
49 if (strict && info->len != sp->len)
50 return 0;
51
52 for (i = sp->len - 1; i >= 0; i--) {
53 pos = strict ? i - sp->len + 1 : 0;
54 if (pos >= info->len)
55 return 0;
56 e = &info->pol[pos];
57
58 if (match_xfrm_state(sp->x[i].xvec, e)) {
59 if (!strict)
60 return 1;
61 } else if (strict)
62 return 0;
63 }
64
65 return strict ? 1 : 0;
66}
67
68static int
69match_policy_out(const struct sk_buff *skb, const struct ipt_policy_info *info)
70{
71 const struct ipt_policy_elem *e;
72 struct dst_entry *dst = skb->dst;
73 int strict = info->flags & IPT_POLICY_MATCH_STRICT;
74 int i, pos;
75
76 if (dst->xfrm == NULL)
77 return -1;
78
79 for (i = 0; dst && dst->xfrm; dst = dst->child, i++) {
80 pos = strict ? i : 0;
81 if (pos >= info->len)
82 return 0;
83 e = &info->pol[pos];
84
85 if (match_xfrm_state(dst->xfrm, e)) {
86 if (!strict)
87 return 1;
88 } else if (strict)
89 return 0;
90 }
91
92 return strict ? 1 : 0;
93}
94
95static int match(const struct sk_buff *skb,
96 const struct net_device *in,
97 const struct net_device *out,
98 const void *matchinfo, int offset, int *hotdrop)
99{
100 const struct ipt_policy_info *info = matchinfo;
101 int ret;
102
103 if (info->flags & IPT_POLICY_MATCH_IN)
104 ret = match_policy_in(skb, info);
105 else
106 ret = match_policy_out(skb, info);
107
108 if (ret < 0)
109 ret = info->flags & IPT_POLICY_MATCH_NONE ? 1 : 0;
110 else if (info->flags & IPT_POLICY_MATCH_NONE)
111 ret = 0;
112
113 return ret;
114}
115
116static int checkentry(const char *tablename, const struct ipt_ip *ip,
117 void *matchinfo, unsigned int matchsize,
118 unsigned int hook_mask)
119{
120 struct ipt_policy_info *info = matchinfo;
121
122 if (matchsize != IPT_ALIGN(sizeof(*info))) {
123 printk(KERN_ERR "ipt_policy: matchsize %u != %zu\n",
124 matchsize, IPT_ALIGN(sizeof(*info)));
125 return 0;
126 }
127 if (!(info->flags & (IPT_POLICY_MATCH_IN|IPT_POLICY_MATCH_OUT))) {
128 printk(KERN_ERR "ipt_policy: neither incoming nor "
129 "outgoing policy selected\n");
130 return 0;
131 }
132 if (hook_mask & (1 << NF_IP_PRE_ROUTING | 1 << NF_IP_LOCAL_IN)
133 && info->flags & IPT_POLICY_MATCH_OUT) {
134 printk(KERN_ERR "ipt_policy: output policy not valid in "
135 "PRE_ROUTING and INPUT\n");
136 return 0;
137 }
138 if (hook_mask & (1 << NF_IP_POST_ROUTING | 1 << NF_IP_LOCAL_OUT)
139 && info->flags & IPT_POLICY_MATCH_IN) {
140 printk(KERN_ERR "ipt_policy: input policy not valid in "
141 "POST_ROUTING and OUTPUT\n");
142 return 0;
143 }
144 if (info->len > IPT_POLICY_MAX_ELEM) {
145 printk(KERN_ERR "ipt_policy: too many policy elements\n");
146 return 0;
147 }
148
149 return 1;
150}
151
152static struct ipt_match policy_match = {
153 .name = "policy",
154 .match = match,
155 .checkentry = checkentry,
156 .me = THIS_MODULE,
157};
158
159static int __init init(void)
160{
161 return ipt_register_match(&policy_match);
162}
163
164static void __exit fini(void)
165{
166 ipt_unregister_match(&policy_match);
167}
168
169module_init(init);
170module_exit(fini);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 4b0d7e4d6269..165a4d81efa4 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -255,6 +255,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
255 kfree_skb(skb); 255 kfree_skb(skb);
256 return NET_RX_DROP; 256 return NET_RX_DROP;
257 } 257 }
258 nf_reset(skb);
258 259
259 skb_push(skb, skb->data - skb->nh.raw); 260 skb_push(skb, skb->data - skb->nh.raw);
260 261
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e9f83e5b28ce..6ea353907af5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1080,6 +1080,7 @@ process:
1080 1080
1081 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 1081 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1082 goto discard_and_relse; 1082 goto discard_and_relse;
1083 nf_reset(skb);
1083 1084
1084 if (sk_filter(sk, skb, 0)) 1085 if (sk_filter(sk, skb, 0))
1085 goto discard_and_relse; 1086 goto discard_and_relse;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 223abaa72bc5..00840474a449 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -989,6 +989,7 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
989 kfree_skb(skb); 989 kfree_skb(skb);
990 return -1; 990 return -1;
991 } 991 }
992 nf_reset(skb);
992 993
993 if (up->encap_type) { 994 if (up->encap_type) {
994 /* 995 /*
@@ -1149,6 +1150,7 @@ int udp_rcv(struct sk_buff *skb)
1149 1150
1150 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 1151 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1151 goto drop; 1152 goto drop;
1153 nf_reset(skb);
1152 1154
1153 /* No socket. Drop packet silently, if checksum is wrong */ 1155 /* No socket. Drop packet silently, if checksum is wrong */
1154 if (udp_checksum_complete(skb)) 1156 if (udp_checksum_complete(skb))
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 2d3849c38a0f..850d919591d1 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -11,6 +11,8 @@
11 11
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/string.h> 13#include <linux/string.h>
14#include <linux/netfilter.h>
15#include <linux/netfilter_ipv4.h>
14#include <net/inet_ecn.h> 16#include <net/inet_ecn.h>
15#include <net/ip.h> 17#include <net/ip.h>
16#include <net/xfrm.h> 18#include <net/xfrm.h>
@@ -45,6 +47,23 @@ static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq)
45 return xfrm_parse_spi(skb, nexthdr, spi, seq); 47 return xfrm_parse_spi(skb, nexthdr, spi, seq);
46} 48}
47 49
50#ifdef CONFIG_NETFILTER
51static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
52{
53 struct iphdr *iph = skb->nh.iph;
54
55 if (skb->dst == NULL) {
56 if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
57 skb->dev))
58 goto drop;
59 }
60 return dst_input(skb);
61drop:
62 kfree_skb(skb);
63 return NET_RX_DROP;
64}
65#endif
66
48int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) 67int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
49{ 68{
50 int err; 69 int err;
@@ -137,6 +156,8 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
137 memcpy(skb->sp->x+skb->sp->len, xfrm_vec, xfrm_nr*sizeof(struct sec_decap_state)); 156 memcpy(skb->sp->x+skb->sp->len, xfrm_vec, xfrm_nr*sizeof(struct sec_decap_state));
138 skb->sp->len += xfrm_nr; 157 skb->sp->len += xfrm_nr;
139 158
159 nf_reset(skb);
160
140 if (decaps) { 161 if (decaps) {
141 if (!(skb->dev->flags&IFF_LOOPBACK)) { 162 if (!(skb->dev->flags&IFF_LOOPBACK)) {
142 dst_release(skb->dst); 163 dst_release(skb->dst);
@@ -145,7 +166,17 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
145 netif_rx(skb); 166 netif_rx(skb);
146 return 0; 167 return 0;
147 } else { 168 } else {
169#ifdef CONFIG_NETFILTER
170 __skb_push(skb, skb->data - skb->nh.raw);
171 skb->nh.iph->tot_len = htons(skb->len);
172 ip_send_check(skb->nh.iph);
173
174 NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL,
175 xfrm4_rcv_encap_finish);
176 return 0;
177#else
148 return -skb->nh.iph->protocol; 178 return -skb->nh.iph->protocol;
179#endif
149 } 180 }
150 181
151drop_unlock: 182drop_unlock:
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 66620a95942a..d4df0ddd424b 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -8,8 +8,10 @@
8 * 2 of the License, or (at your option) any later version. 8 * 2 of the License, or (at your option) any later version.
9 */ 9 */
10 10
11#include <linux/compiler.h>
11#include <linux/skbuff.h> 12#include <linux/skbuff.h>
12#include <linux/spinlock.h> 13#include <linux/spinlock.h>
14#include <linux/netfilter_ipv4.h>
13#include <net/inet_ecn.h> 15#include <net/inet_ecn.h>
14#include <net/ip.h> 16#include <net/ip.h>
15#include <net/xfrm.h> 17#include <net/xfrm.h>
@@ -95,7 +97,7 @@ out:
95 return ret; 97 return ret;
96} 98}
97 99
98int xfrm4_output(struct sk_buff *skb) 100static int xfrm4_output_one(struct sk_buff *skb)
99{ 101{
100 struct dst_entry *dst = skb->dst; 102 struct dst_entry *dst = skb->dst;
101 struct xfrm_state *x = dst->xfrm; 103 struct xfrm_state *x = dst->xfrm;
@@ -113,27 +115,33 @@ int xfrm4_output(struct sk_buff *skb)
113 goto error_nolock; 115 goto error_nolock;
114 } 116 }
115 117
116 spin_lock_bh(&x->lock); 118 do {
117 err = xfrm_state_check(x, skb); 119 spin_lock_bh(&x->lock);
118 if (err) 120 err = xfrm_state_check(x, skb);
119 goto error; 121 if (err)
122 goto error;
120 123
121 xfrm4_encap(skb); 124 xfrm4_encap(skb);
122 125
123 err = x->type->output(x, skb); 126 err = x->type->output(x, skb);
124 if (err) 127 if (err)
125 goto error; 128 goto error;
126 129
127 x->curlft.bytes += skb->len; 130 x->curlft.bytes += skb->len;
128 x->curlft.packets++; 131 x->curlft.packets++;
129 132
130 spin_unlock_bh(&x->lock); 133 spin_unlock_bh(&x->lock);
131 134
132 if (!(skb->dst = dst_pop(dst))) { 135 if (!(skb->dst = dst_pop(dst))) {
133 err = -EHOSTUNREACH; 136 err = -EHOSTUNREACH;
134 goto error_nolock; 137 goto error_nolock;
135 } 138 }
136 err = NET_XMIT_BYPASS; 139 dst = skb->dst;
140 x = dst->xfrm;
141 } while (x && !x->props.mode);
142
143 IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
144 err = 0;
137 145
138out_exit: 146out_exit:
139 return err; 147 return err;
@@ -143,3 +151,33 @@ error_nolock:
143 kfree_skb(skb); 151 kfree_skb(skb);
144 goto out_exit; 152 goto out_exit;
145} 153}
154
155int xfrm4_output_finish(struct sk_buff *skb)
156{
157 int err;
158
159 while (likely((err = xfrm4_output_one(skb)) == 0)) {
160 nf_reset(skb);
161
162 err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, &skb, NULL,
163 skb->dst->dev, dst_output);
164 if (unlikely(err != 1))
165 break;
166
167 if (!skb->dst->xfrm)
168 return dst_output(skb);
169
170 err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL,
171 skb->dst->dev, xfrm4_output_finish);
172 if (unlikely(err != 1))
173 break;
174 }
175
176 return err;
177}
178
179int xfrm4_output(struct sk_buff *skb)
180{
181 return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev,
182 xfrm4_output_finish);
183}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 704fb73e6c5f..e53e421eeee9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1228,7 +1228,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
1228 1228
1229/* Gets referenced address, destroys ifaddr */ 1229/* Gets referenced address, destroys ifaddr */
1230 1230
1231void addrconf_dad_stop(struct inet6_ifaddr *ifp) 1231static void addrconf_dad_stop(struct inet6_ifaddr *ifp)
1232{ 1232{
1233 if (ifp->flags&IFA_F_PERMANENT) { 1233 if (ifp->flags&IFA_F_PERMANENT) {
1234 spin_lock_bh(&ifp->lock); 1234 spin_lock_bh(&ifp->lock);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 68afc53be662..25c3fe5005d9 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -689,11 +689,11 @@ snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
689 if (ptr == NULL) 689 if (ptr == NULL)
690 return -EINVAL; 690 return -EINVAL;
691 691
692 ptr[0] = __alloc_percpu(mibsize, mibalign); 692 ptr[0] = __alloc_percpu(mibsize);
693 if (!ptr[0]) 693 if (!ptr[0])
694 goto err0; 694 goto err0;
695 695
696 ptr[1] = __alloc_percpu(mibsize, mibalign); 696 ptr[1] = __alloc_percpu(mibsize);
697 if (!ptr[1]) 697 if (!ptr[1])
698 goto err1; 698 goto err1;
699 699
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 113374dc342c..2a1e7e45b890 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -152,7 +152,7 @@ static struct tlvtype_proc tlvprocdestopt_lst[] = {
152 {-1, NULL} 152 {-1, NULL}
153}; 153};
154 154
155static int ipv6_destopt_rcv(struct sk_buff **skbp, unsigned int *nhoffp) 155static int ipv6_destopt_rcv(struct sk_buff **skbp)
156{ 156{
157 struct sk_buff *skb = *skbp; 157 struct sk_buff *skb = *skbp;
158 struct inet6_skb_parm *opt = IP6CB(skb); 158 struct inet6_skb_parm *opt = IP6CB(skb);
@@ -169,7 +169,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
169 169
170 if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { 170 if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
171 skb->h.raw += ((skb->h.raw[1]+1)<<3); 171 skb->h.raw += ((skb->h.raw[1]+1)<<3);
172 *nhoffp = opt->dst1; 172 opt->nhoff = opt->dst1;
173 return 1; 173 return 1;
174 } 174 }
175 175
@@ -192,7 +192,7 @@ void __init ipv6_destopt_init(void)
192 NONE header. No data in packet. 192 NONE header. No data in packet.
193 ********************************/ 193 ********************************/
194 194
195static int ipv6_nodata_rcv(struct sk_buff **skbp, unsigned int *nhoffp) 195static int ipv6_nodata_rcv(struct sk_buff **skbp)
196{ 196{
197 struct sk_buff *skb = *skbp; 197 struct sk_buff *skb = *skbp;
198 198
@@ -215,7 +215,7 @@ void __init ipv6_nodata_init(void)
215 Routing header. 215 Routing header.
216 ********************************/ 216 ********************************/
217 217
218static int ipv6_rthdr_rcv(struct sk_buff **skbp, unsigned int *nhoffp) 218static int ipv6_rthdr_rcv(struct sk_buff **skbp)
219{ 219{
220 struct sk_buff *skb = *skbp; 220 struct sk_buff *skb = *skbp;
221 struct inet6_skb_parm *opt = IP6CB(skb); 221 struct inet6_skb_parm *opt = IP6CB(skb);
@@ -249,7 +249,7 @@ looped_back:
249 skb->h.raw += (hdr->hdrlen + 1) << 3; 249 skb->h.raw += (hdr->hdrlen + 1) << 3;
250 opt->dst0 = opt->dst1; 250 opt->dst0 = opt->dst1;
251 opt->dst1 = 0; 251 opt->dst1 = 0;
252 *nhoffp = (&hdr->nexthdr) - skb->nh.raw; 252 opt->nhoff = (&hdr->nexthdr) - skb->nh.raw;
253 return 1; 253 return 1;
254 } 254 }
255 255
@@ -487,9 +487,14 @@ static struct tlvtype_proc tlvprochopopt_lst[] = {
487 487
488int ipv6_parse_hopopts(struct sk_buff *skb, int nhoff) 488int ipv6_parse_hopopts(struct sk_buff *skb, int nhoff)
489{ 489{
490 IP6CB(skb)->hop = sizeof(struct ipv6hdr); 490 struct inet6_skb_parm *opt = IP6CB(skb);
491 if (ip6_parse_tlv(tlvprochopopt_lst, skb)) 491
492 opt->hop = sizeof(struct ipv6hdr);
493 if (ip6_parse_tlv(tlvprochopopt_lst, skb)) {
494 skb->h.raw += (skb->h.raw[1]+1)<<3;
495 opt->nhoff = sizeof(struct ipv6hdr);
492 return sizeof(struct ipv6hdr); 496 return sizeof(struct ipv6hdr);
497 }
493 return -1; 498 return -1;
494} 499}
495 500
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 6ec6a2b549bb..53c81fcd20ba 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -79,7 +79,7 @@ DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly;
79static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL; 79static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL;
80#define icmpv6_socket __get_cpu_var(__icmpv6_socket) 80#define icmpv6_socket __get_cpu_var(__icmpv6_socket)
81 81
82static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp); 82static int icmpv6_rcv(struct sk_buff **pskb);
83 83
84static struct inet6_protocol icmpv6_protocol = { 84static struct inet6_protocol icmpv6_protocol = {
85 .handler = icmpv6_rcv, 85 .handler = icmpv6_rcv,
@@ -581,7 +581,7 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info)
581 * Handle icmp messages 581 * Handle icmp messages
582 */ 582 */
583 583
584static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) 584static int icmpv6_rcv(struct sk_buff **pskb)
585{ 585{
586 struct sk_buff *skb = *pskb; 586 struct sk_buff *skb = *pskb;
587 struct net_device *dev = skb->dev; 587 struct net_device *dev = skb->dev;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 792f90f0f9ec..f8f3a37a1494 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -25,6 +25,7 @@
25#include <net/inet_hashtables.h> 25#include <net/inet_hashtables.h>
26#include <net/ip6_route.h> 26#include <net/ip6_route.h>
27#include <net/sock.h> 27#include <net/sock.h>
28#include <net/inet6_connection_sock.h>
28 29
29int inet6_csk_bind_conflict(const struct sock *sk, 30int inet6_csk_bind_conflict(const struct sock *sk,
30 const struct inet_bind_bucket *tb) 31 const struct inet_bind_bucket *tb)
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index a6026d2787d2..29f73592e68e 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -48,7 +48,7 @@
48 48
49 49
50 50
51static inline int ip6_rcv_finish( struct sk_buff *skb) 51inline int ip6_rcv_finish( struct sk_buff *skb)
52{ 52{
53 if (skb->dst == NULL) 53 if (skb->dst == NULL)
54 ip6_route_input(skb); 54 ip6_route_input(skb);
@@ -97,6 +97,9 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
97 if (hdr->version != 6) 97 if (hdr->version != 6)
98 goto err; 98 goto err;
99 99
100 skb->h.raw = (u8 *)(hdr + 1);
101 IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
102
100 pkt_len = ntohs(hdr->payload_len); 103 pkt_len = ntohs(hdr->payload_len);
101 104
102 /* pkt_len may be zero if Jumbo payload option is present */ 105 /* pkt_len may be zero if Jumbo payload option is present */
@@ -111,8 +114,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
111 } 114 }
112 115
113 if (hdr->nexthdr == NEXTHDR_HOP) { 116 if (hdr->nexthdr == NEXTHDR_HOP) {
114 skb->h.raw = (u8*)(hdr+1); 117 if (ipv6_parse_hopopts(skb, IP6CB(skb)->nhoff) < 0) {
115 if (ipv6_parse_hopopts(skb, offsetof(struct ipv6hdr, nexthdr)) < 0) {
116 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); 118 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
117 return 0; 119 return 0;
118 } 120 }
@@ -143,26 +145,15 @@ static inline int ip6_input_finish(struct sk_buff *skb)
143 int nexthdr; 145 int nexthdr;
144 u8 hash; 146 u8 hash;
145 147
146 skb->h.raw = skb->nh.raw + sizeof(struct ipv6hdr);
147
148 /* 148 /*
149 * Parse extension headers 149 * Parse extension headers
150 */ 150 */
151 151
152 nexthdr = skb->nh.ipv6h->nexthdr;
153 nhoff = offsetof(struct ipv6hdr, nexthdr);
154
155 /* Skip hop-by-hop options, they are already parsed. */
156 if (nexthdr == NEXTHDR_HOP) {
157 nhoff = sizeof(struct ipv6hdr);
158 nexthdr = skb->h.raw[0];
159 skb->h.raw += (skb->h.raw[1]+1)<<3;
160 }
161
162 rcu_read_lock(); 152 rcu_read_lock();
163resubmit: 153resubmit:
164 if (!pskb_pull(skb, skb->h.raw - skb->data)) 154 if (!pskb_pull(skb, skb->h.raw - skb->data))
165 goto discard; 155 goto discard;
156 nhoff = IP6CB(skb)->nhoff;
166 nexthdr = skb->nh.raw[nhoff]; 157 nexthdr = skb->nh.raw[nhoff];
167 158
168 raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]); 159 raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]);
@@ -194,7 +185,7 @@ resubmit:
194 !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 185 !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
195 goto discard; 186 goto discard;
196 187
197 ret = ipprot->handler(&skb, &nhoff); 188 ret = ipprot->handler(&skb);
198 if (ret > 0) 189 if (ret > 0)
199 goto resubmit; 190 goto resubmit;
200 else if (ret == 0) 191 else if (ret == 0)
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index e315d0f80af1..f079621c8b67 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -510,7 +510,7 @@ static inline void ip6ip6_ecn_decapsulate(struct ipv6hdr *outer_iph,
510 **/ 510 **/
511 511
512static int 512static int
513ip6ip6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) 513ip6ip6_rcv(struct sk_buff **pskb)
514{ 514{
515 struct sk_buff *skb = *pskb; 515 struct sk_buff *skb = *pskb;
516 struct ipv6hdr *ipv6h; 516 struct ipv6hdr *ipv6h;
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index f8626ebf90fd..b63678328a3b 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -10,6 +10,7 @@
10#include <net/dst.h> 10#include <net/dst.h>
11#include <net/ipv6.h> 11#include <net/ipv6.h>
12#include <net/ip6_route.h> 12#include <net/ip6_route.h>
13#include <net/xfrm.h>
13 14
14int ip6_route_me_harder(struct sk_buff *skb) 15int ip6_route_me_harder(struct sk_buff *skb)
15{ 16{
@@ -21,11 +22,17 @@ int ip6_route_me_harder(struct sk_buff *skb)
21 { .ip6_u = 22 { .ip6_u =
22 { .daddr = iph->daddr, 23 { .daddr = iph->daddr,
23 .saddr = iph->saddr, } }, 24 .saddr = iph->saddr, } },
24 .proto = iph->nexthdr,
25 }; 25 };
26 26
27 dst = ip6_route_output(skb->sk, &fl); 27 dst = ip6_route_output(skb->sk, &fl);
28 28
29#ifdef CONFIG_XFRM
30 if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
31 xfrm_decode_session(skb, &fl, AF_INET6) == 0)
32 if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0))
33 return -1;
34#endif
35
29 if (dst->error) { 36 if (dst->error) {
30 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); 37 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
31 LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); 38 LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 04912f9b35c3..105dd69ee9fb 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -179,6 +179,16 @@ config IP6_NF_MATCH_PHYSDEV
179 179
180 To compile it as a module, choose M here. If unsure, say N. 180 To compile it as a module, choose M here. If unsure, say N.
181 181
182config IP6_NF_MATCH_POLICY
183 tristate "IPsec policy match support"
184 depends on IP6_NF_IPTABLES && XFRM
185 help
186 Policy matching allows you to match packets based on the
187 IPsec policy that was used during decapsulation/will
188 be used during encapsulation.
189
190 To compile it as a module, choose M here. If unsure, say N.
191
182# The targets 192# The targets
183config IP6_NF_FILTER 193config IP6_NF_FILTER
184 tristate "Packet filtering" 194 tristate "Packet filtering"
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 9ab5b2ca1f59..c0c809b426e8 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o ip6t_dst.o
13obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o 13obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o
14obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o 14obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o
15obj-$(CONFIG_IP6_NF_MATCH_AHESP) += ip6t_esp.o ip6t_ah.o 15obj-$(CONFIG_IP6_NF_MATCH_AHESP) += ip6t_esp.o ip6t_ah.o
16obj-$(CONFIG_IP6_NF_MATCH_POLICY) += ip6t_policy.o
16obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o 17obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
17obj-$(CONFIG_IP6_NF_MATCH_MULTIPORT) += ip6t_multiport.o 18obj-$(CONFIG_IP6_NF_MATCH_MULTIPORT) += ip6t_multiport.o
18obj-$(CONFIG_IP6_NF_MATCH_OWNER) += ip6t_owner.o 19obj-$(CONFIG_IP6_NF_MATCH_OWNER) += ip6t_owner.o
diff --git a/net/ipv6/netfilter/ip6t_policy.c b/net/ipv6/netfilter/ip6t_policy.c
new file mode 100644
index 000000000000..13fedad48c1d
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_policy.c
@@ -0,0 +1,175 @@
1/* IP tables module for matching IPsec policy
2 *
3 * Copyright (c) 2004,2005 Patrick McHardy, <kaber@trash.net>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/kernel.h>
11#include <linux/config.h>
12#include <linux/module.h>
13#include <linux/skbuff.h>
14#include <linux/init.h>
15#include <net/xfrm.h>
16
17#include <linux/netfilter_ipv6.h>
18#include <linux/netfilter_ipv6/ip6_tables.h>
19#include <linux/netfilter_ipv6/ip6t_policy.h>
20
21MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
22MODULE_DESCRIPTION("IPtables IPsec policy matching module");
23MODULE_LICENSE("GPL");
24
25
26static inline int
27match_xfrm_state(struct xfrm_state *x, const struct ip6t_policy_elem *e)
28{
29#define MATCH_ADDR(x,y,z) (!e->match.x || \
30 ((ip6_masked_addrcmp((z), &e->x, &e->y)) == 0) ^ e->invert.x)
31#define MATCH(x,y) (!e->match.x || ((e->x == (y)) ^ e->invert.x))
32
33 return MATCH_ADDR(saddr, smask, (struct in6_addr *)&x->props.saddr.a6) &&
34 MATCH_ADDR(daddr, dmask, (struct in6_addr *)&x->id.daddr.a6) &&
35 MATCH(proto, x->id.proto) &&
36 MATCH(mode, x->props.mode) &&
37 MATCH(spi, x->id.spi) &&
38 MATCH(reqid, x->props.reqid);
39}
40
41static int
42match_policy_in(const struct sk_buff *skb, const struct ip6t_policy_info *info)
43{
44 const struct ip6t_policy_elem *e;
45 struct sec_path *sp = skb->sp;
46 int strict = info->flags & IP6T_POLICY_MATCH_STRICT;
47 int i, pos;
48
49 if (sp == NULL)
50 return -1;
51 if (strict && info->len != sp->len)
52 return 0;
53
54 for (i = sp->len - 1; i >= 0; i--) {
55 pos = strict ? i - sp->len + 1 : 0;
56 if (pos >= info->len)
57 return 0;
58 e = &info->pol[pos];
59
60 if (match_xfrm_state(sp->x[i].xvec, e)) {
61 if (!strict)
62 return 1;
63 } else if (strict)
64 return 0;
65 }
66
67 return strict ? 1 : 0;
68}
69
70static int
71match_policy_out(const struct sk_buff *skb, const struct ip6t_policy_info *info)
72{
73 const struct ip6t_policy_elem *e;
74 struct dst_entry *dst = skb->dst;
75 int strict = info->flags & IP6T_POLICY_MATCH_STRICT;
76 int i, pos;
77
78 if (dst->xfrm == NULL)
79 return -1;
80
81 for (i = 0; dst && dst->xfrm; dst = dst->child, i++) {
82 pos = strict ? i : 0;
83 if (pos >= info->len)
84 return 0;
85 e = &info->pol[pos];
86
87 if (match_xfrm_state(dst->xfrm, e)) {
88 if (!strict)
89 return 1;
90 } else if (strict)
91 return 0;
92 }
93
94 return strict ? 1 : 0;
95}
96
97static int match(const struct sk_buff *skb,
98 const struct net_device *in,
99 const struct net_device *out,
100 const void *matchinfo,
101 int offset,
102 unsigned int protoff,
103 int *hotdrop)
104{
105 const struct ip6t_policy_info *info = matchinfo;
106 int ret;
107
108 if (info->flags & IP6T_POLICY_MATCH_IN)
109 ret = match_policy_in(skb, info);
110 else
111 ret = match_policy_out(skb, info);
112
113 if (ret < 0)
114 ret = info->flags & IP6T_POLICY_MATCH_NONE ? 1 : 0;
115 else if (info->flags & IP6T_POLICY_MATCH_NONE)
116 ret = 0;
117
118 return ret;
119}
120
121static int checkentry(const char *tablename, const struct ip6t_ip6 *ip,
122 void *matchinfo, unsigned int matchsize,
123 unsigned int hook_mask)
124{
125 struct ip6t_policy_info *info = matchinfo;
126
127 if (matchsize != IP6T_ALIGN(sizeof(*info))) {
128 printk(KERN_ERR "ip6t_policy: matchsize %u != %zu\n",
129 matchsize, IP6T_ALIGN(sizeof(*info)));
130 return 0;
131 }
132 if (!(info->flags & (IP6T_POLICY_MATCH_IN|IP6T_POLICY_MATCH_OUT))) {
133 printk(KERN_ERR "ip6t_policy: neither incoming nor "
134 "outgoing policy selected\n");
135 return 0;
136 }
137 if (hook_mask & (1 << NF_IP6_PRE_ROUTING | 1 << NF_IP6_LOCAL_IN)
138 && info->flags & IP6T_POLICY_MATCH_OUT) {
139 printk(KERN_ERR "ip6t_policy: output policy not valid in "
140 "PRE_ROUTING and INPUT\n");
141 return 0;
142 }
143 if (hook_mask & (1 << NF_IP6_POST_ROUTING | 1 << NF_IP6_LOCAL_OUT)
144 && info->flags & IP6T_POLICY_MATCH_IN) {
145 printk(KERN_ERR "ip6t_policy: input policy not valid in "
146 "POST_ROUTING and OUTPUT\n");
147 return 0;
148 }
149 if (info->len > IP6T_POLICY_MAX_ELEM) {
150 printk(KERN_ERR "ip6t_policy: too many policy elements\n");
151 return 0;
152 }
153
154 return 1;
155}
156
157static struct ip6t_match policy_match = {
158 .name = "policy",
159 .match = match,
160 .checkentry = checkentry,
161 .me = THIS_MODULE,
162};
163
164static int __init init(void)
165{
166 return ip6t_register_match(&policy_match);
167}
168
169static void __exit fini(void)
170{
171 ip6t_unregister_match(&policy_match);
172}
173
174module_init(init);
175module_exit(fini);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 5d316cb72ec9..15e1456b3f18 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -581,7 +581,6 @@ err:
581 * the last and the first frames arrived and all the bits are here. 581 * the last and the first frames arrived and all the bits are here.
582 */ 582 */
583static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, 583static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
584 unsigned int *nhoffp,
585 struct net_device *dev) 584 struct net_device *dev)
586{ 585{
587 struct sk_buff *fp, *head = fq->fragments; 586 struct sk_buff *fp, *head = fq->fragments;
@@ -654,6 +653,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
654 head->dev = dev; 653 head->dev = dev;
655 skb_set_timestamp(head, &fq->stamp); 654 skb_set_timestamp(head, &fq->stamp);
656 head->nh.ipv6h->payload_len = htons(payload_len); 655 head->nh.ipv6h->payload_len = htons(payload_len);
656 IP6CB(head)->nhoff = nhoff;
657 657
658 *skb_in = head; 658 *skb_in = head;
659 659
@@ -663,7 +663,6 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
663 663
664 IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS); 664 IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
665 fq->fragments = NULL; 665 fq->fragments = NULL;
666 *nhoffp = nhoff;
667 return 1; 666 return 1;
668 667
669out_oversize: 668out_oversize:
@@ -678,7 +677,7 @@ out_fail:
678 return -1; 677 return -1;
679} 678}
680 679
681static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp) 680static int ipv6_frag_rcv(struct sk_buff **skbp)
682{ 681{
683 struct sk_buff *skb = *skbp; 682 struct sk_buff *skb = *skbp;
684 struct net_device *dev = skb->dev; 683 struct net_device *dev = skb->dev;
@@ -710,7 +709,7 @@ static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
710 skb->h.raw += sizeof(struct frag_hdr); 709 skb->h.raw += sizeof(struct frag_hdr);
711 IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS); 710 IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
712 711
713 *nhoffp = (u8*)fhdr - skb->nh.raw; 712 IP6CB(skb)->nhoff = (u8*)fhdr - skb->nh.raw;
714 return 1; 713 return 1;
715 } 714 }
716 715
@@ -722,11 +721,11 @@ static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
722 721
723 spin_lock(&fq->lock); 722 spin_lock(&fq->lock);
724 723
725 ip6_frag_queue(fq, skb, fhdr, *nhoffp); 724 ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
726 725
727 if (fq->last_in == (FIRST_IN|LAST_IN) && 726 if (fq->last_in == (FIRST_IN|LAST_IN) &&
728 fq->meat == fq->len) 727 fq->meat == fq->len)
729 ret = ip6_frag_reasm(fq, skbp, nhoffp, dev); 728 ret = ip6_frag_reasm(fq, skbp, dev);
730 729
731 spin_unlock(&fq->lock); 730 spin_unlock(&fq->lock);
732 fq_put(fq, NULL); 731 fq_put(fq, NULL);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 577d49732b0f..02872ae8a439 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -381,6 +381,7 @@ static int ipip6_rcv(struct sk_buff *skb)
381 skb->mac.raw = skb->nh.raw; 381 skb->mac.raw = skb->nh.raw;
382 skb->nh.raw = skb->data; 382 skb->nh.raw = skb->data;
383 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); 383 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
384 IPCB(skb)->flags = 0;
384 skb->protocol = htons(ETH_P_IPV6); 385 skb->protocol = htons(ETH_P_IPV6);
385 skb->pkt_type = PACKET_HOST; 386 skb->pkt_type = PACKET_HOST;
386 tunnel->stat.rx_packets++; 387 tunnel->stat.rx_packets++;
@@ -552,6 +553,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
552 skb->h.raw = skb->nh.raw; 553 skb->h.raw = skb->nh.raw;
553 skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); 554 skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
554 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 555 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
556 IPCB(skb)->flags = 0;
555 dst_release(skb->dst); 557 dst_release(skb->dst);
556 skb->dst = &rt->u.dst; 558 skb->dst = &rt->u.dst;
557 559
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2947bc56d8a0..a25f4e8a8ada 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1153,7 +1153,7 @@ ipv6_pktoptions:
1153 return 0; 1153 return 0;
1154} 1154}
1155 1155
1156static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) 1156static int tcp_v6_rcv(struct sk_buff **pskb)
1157{ 1157{
1158 struct sk_buff *skb = *pskb; 1158 struct sk_buff *skb = *pskb;
1159 struct tcphdr *th; 1159 struct tcphdr *th;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index d8538dcea813..c47648892c04 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -435,7 +435,7 @@ out:
435 read_unlock(&udp_hash_lock); 435 read_unlock(&udp_hash_lock);
436} 436}
437 437
438static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) 438static int udpv6_rcv(struct sk_buff **pskb)
439{ 439{
440 struct sk_buff *skb = *pskb; 440 struct sk_buff *skb = *pskb;
441 struct sock *sk; 441 struct sock *sk;
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 28c29d78338e..1ca2da68ef69 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -11,6 +11,8 @@
11 11
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/string.h> 13#include <linux/string.h>
14#include <linux/netfilter.h>
15#include <linux/netfilter_ipv6.h>
14#include <net/dsfield.h> 16#include <net/dsfield.h>
15#include <net/inet_ecn.h> 17#include <net/inet_ecn.h>
16#include <net/ip.h> 18#include <net/ip.h>
@@ -26,7 +28,7 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
26 IP6_ECN_set_ce(inner_iph); 28 IP6_ECN_set_ce(inner_iph);
27} 29}
28 30
29int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi) 31int xfrm6_rcv_spi(struct sk_buff **pskb, u32 spi)
30{ 32{
31 struct sk_buff *skb = *pskb; 33 struct sk_buff *skb = *pskb;
32 int err; 34 int err;
@@ -38,7 +40,7 @@ int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi)
38 int nexthdr; 40 int nexthdr;
39 unsigned int nhoff; 41 unsigned int nhoff;
40 42
41 nhoff = *nhoffp; 43 nhoff = IP6CB(skb)->nhoff;
42 nexthdr = skb->nh.raw[nhoff]; 44 nexthdr = skb->nh.raw[nhoff];
43 45
44 seq = 0; 46 seq = 0;
@@ -121,6 +123,8 @@ int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi)
121 skb->sp->len += xfrm_nr; 123 skb->sp->len += xfrm_nr;
122 skb->ip_summed = CHECKSUM_NONE; 124 skb->ip_summed = CHECKSUM_NONE;
123 125
126 nf_reset(skb);
127
124 if (decaps) { 128 if (decaps) {
125 if (!(skb->dev->flags&IFF_LOOPBACK)) { 129 if (!(skb->dev->flags&IFF_LOOPBACK)) {
126 dst_release(skb->dst); 130 dst_release(skb->dst);
@@ -129,7 +133,16 @@ int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi)
129 netif_rx(skb); 133 netif_rx(skb);
130 return -1; 134 return -1;
131 } else { 135 } else {
136#ifdef CONFIG_NETFILTER
137 skb->nh.ipv6h->payload_len = htons(skb->len);
138 __skb_push(skb, skb->data - skb->nh.raw);
139
140 NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL,
141 ip6_rcv_finish);
142 return -1;
143#else
132 return 1; 144 return 1;
145#endif
133 } 146 }
134 147
135drop_unlock: 148drop_unlock:
@@ -144,7 +157,7 @@ drop:
144 157
145EXPORT_SYMBOL(xfrm6_rcv_spi); 158EXPORT_SYMBOL(xfrm6_rcv_spi);
146 159
147int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) 160int xfrm6_rcv(struct sk_buff **pskb)
148{ 161{
149 return xfrm6_rcv_spi(pskb, nhoffp, 0); 162 return xfrm6_rcv_spi(pskb, 0);
150} 163}
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 6b9867717d11..80242172a5df 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -9,9 +9,11 @@
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12#include <linux/compiler.h>
12#include <linux/skbuff.h> 13#include <linux/skbuff.h>
13#include <linux/spinlock.h> 14#include <linux/spinlock.h>
14#include <linux/icmpv6.h> 15#include <linux/icmpv6.h>
16#include <linux/netfilter_ipv6.h>
15#include <net/dsfield.h> 17#include <net/dsfield.h>
16#include <net/inet_ecn.h> 18#include <net/inet_ecn.h>
17#include <net/ipv6.h> 19#include <net/ipv6.h>
@@ -92,7 +94,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
92 return ret; 94 return ret;
93} 95}
94 96
95int xfrm6_output(struct sk_buff *skb) 97static int xfrm6_output_one(struct sk_buff *skb)
96{ 98{
97 struct dst_entry *dst = skb->dst; 99 struct dst_entry *dst = skb->dst;
98 struct xfrm_state *x = dst->xfrm; 100 struct xfrm_state *x = dst->xfrm;
@@ -110,29 +112,35 @@ int xfrm6_output(struct sk_buff *skb)
110 goto error_nolock; 112 goto error_nolock;
111 } 113 }
112 114
113 spin_lock_bh(&x->lock); 115 do {
114 err = xfrm_state_check(x, skb); 116 spin_lock_bh(&x->lock);
115 if (err) 117 err = xfrm_state_check(x, skb);
116 goto error; 118 if (err)
119 goto error;
117 120
118 xfrm6_encap(skb); 121 xfrm6_encap(skb);
119 122
120 err = x->type->output(x, skb); 123 err = x->type->output(x, skb);
121 if (err) 124 if (err)
122 goto error; 125 goto error;
123 126
124 x->curlft.bytes += skb->len; 127 x->curlft.bytes += skb->len;
125 x->curlft.packets++; 128 x->curlft.packets++;
126 129
127 spin_unlock_bh(&x->lock); 130 spin_unlock_bh(&x->lock);
128 131
129 skb->nh.raw = skb->data; 132 skb->nh.raw = skb->data;
130 133
131 if (!(skb->dst = dst_pop(dst))) { 134 if (!(skb->dst = dst_pop(dst))) {
132 err = -EHOSTUNREACH; 135 err = -EHOSTUNREACH;
133 goto error_nolock; 136 goto error_nolock;
134 } 137 }
135 err = NET_XMIT_BYPASS; 138 dst = skb->dst;
139 x = dst->xfrm;
140 } while (x && !x->props.mode);
141
142 IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
143 err = 0;
136 144
137out_exit: 145out_exit:
138 return err; 146 return err;
@@ -142,3 +150,33 @@ error_nolock:
142 kfree_skb(skb); 150 kfree_skb(skb);
143 goto out_exit; 151 goto out_exit;
144} 152}
153
154static int xfrm6_output_finish(struct sk_buff *skb)
155{
156 int err;
157
158 while (likely((err = xfrm6_output_one(skb)) == 0)) {
159 nf_reset(skb);
160
161 err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, &skb, NULL,
162 skb->dst->dev, dst_output);
163 if (unlikely(err != 1))
164 break;
165
166 if (!skb->dst->xfrm)
167 return dst_output(skb);
168
169 err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, &skb, NULL,
170 skb->dst->dev, xfrm6_output_finish);
171 if (unlikely(err != 1))
172 break;
173 }
174
175 return err;
176}
177
178int xfrm6_output(struct sk_buff *skb)
179{
180 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev,
181 xfrm6_output_finish);
182}
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index fbef7826a74f..da09ff258648 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -397,7 +397,7 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler)
397 397
398EXPORT_SYMBOL(xfrm6_tunnel_deregister); 398EXPORT_SYMBOL(xfrm6_tunnel_deregister);
399 399
400static int xfrm6_tunnel_rcv(struct sk_buff **pskb, unsigned int *nhoffp) 400static int xfrm6_tunnel_rcv(struct sk_buff **pskb)
401{ 401{
402 struct sk_buff *skb = *pskb; 402 struct sk_buff *skb = *pskb;
403 struct xfrm6_tunnel *handler = xfrm6_tunnel_handler; 403 struct xfrm6_tunnel *handler = xfrm6_tunnel_handler;
@@ -405,11 +405,11 @@ static int xfrm6_tunnel_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
405 u32 spi; 405 u32 spi;
406 406
407 /* device-like_ip6ip6_handler() */ 407 /* device-like_ip6ip6_handler() */
408 if (handler && handler->handler(pskb, nhoffp) == 0) 408 if (handler && handler->handler(pskb) == 0)
409 return 0; 409 return 0;
410 410
411 spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr); 411 spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr);
412 return xfrm6_rcv_spi(pskb, nhoffp, spi); 412 return xfrm6_rcv_spi(pskb, spi);
413} 413}
414 414
415static void xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 415static void xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 238f1bffa684..4aa6fc60357c 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -225,6 +225,7 @@ int sctp_rcv(struct sk_buff *skb)
225 225
226 if (!xfrm_policy_check(sk, XFRM_POLICY_IN, skb, family)) 226 if (!xfrm_policy_check(sk, XFRM_POLICY_IN, skb, family))
227 goto discard_release; 227 goto discard_release;
228 nf_reset(skb);
228 229
229 ret = sk_filter(sk, skb, 1); 230 ret = sk_filter(sk, skb, 1);
230 if (ret) 231 if (ret)
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 15c05165c905..04c7fab4edc4 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -905,7 +905,7 @@ static struct inet_protosw sctpv6_stream_protosw = {
905 .flags = SCTP_PROTOSW_FLAG, 905 .flags = SCTP_PROTOSW_FLAG,
906}; 906};
907 907
908static int sctp6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) 908static int sctp6_rcv(struct sk_buff **pskb)
909{ 909{
910 return sctp_rcv(*pskb) ? -1 : 0; 910 return sctp_rcv(*pskb) ? -1 : 0;
911} 911}
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 24cc23af9b95..e14c1cae7460 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -495,7 +495,7 @@ rpc_depopulate(struct dentry *parent)
495repeat: 495repeat:
496 spin_lock(&dcache_lock); 496 spin_lock(&dcache_lock);
497 list_for_each_safe(pos, next, &parent->d_subdirs) { 497 list_for_each_safe(pos, next, &parent->d_subdirs) {
498 dentry = list_entry(pos, struct dentry, d_child); 498 dentry = list_entry(pos, struct dentry, d_u.d_child);
499 spin_lock(&dentry->d_lock); 499 spin_lock(&dentry->d_lock);
500 if (!d_unhashed(dentry)) { 500 if (!d_unhashed(dentry)) {
501 dget_locked(dentry); 501 dget_locked(dentry);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 64a447375fdb..59614a994b4e 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -22,6 +22,7 @@
22#include <linux/workqueue.h> 22#include <linux/workqueue.h>
23#include <linux/notifier.h> 23#include <linux/notifier.h>
24#include <linux/netdevice.h> 24#include <linux/netdevice.h>
25#include <linux/netfilter.h>
25#include <linux/module.h> 26#include <linux/module.h>
26#include <net/xfrm.h> 27#include <net/xfrm.h>
27#include <net/ip.h> 28#include <net/ip.h>
@@ -951,8 +952,8 @@ xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
951 return start; 952 return start;
952} 953}
953 954
954static int 955int
955_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) 956xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
956{ 957{
957 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 958 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
958 959
@@ -963,6 +964,7 @@ _decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
963 xfrm_policy_put_afinfo(afinfo); 964 xfrm_policy_put_afinfo(afinfo);
964 return 0; 965 return 0;
965} 966}
967EXPORT_SYMBOL(xfrm_decode_session);
966 968
967static inline int secpath_has_tunnel(struct sec_path *sp, int k) 969static inline int secpath_has_tunnel(struct sec_path *sp, int k)
968{ 970{
@@ -982,8 +984,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
982 u8 fl_dir = policy_to_flow_dir(dir); 984 u8 fl_dir = policy_to_flow_dir(dir);
983 u32 sk_sid; 985 u32 sk_sid;
984 986
985 if (_decode_session(skb, &fl, family) < 0) 987 if (xfrm_decode_session(skb, &fl, family) < 0)
986 return 0; 988 return 0;
989 nf_nat_decode_session(skb, &fl, family);
987 990
988 sk_sid = security_sk_sid(sk, &fl, fl_dir); 991 sk_sid = security_sk_sid(sk, &fl, fl_dir);
989 992
@@ -1055,7 +1058,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
1055{ 1058{
1056 struct flowi fl; 1059 struct flowi fl;
1057 1060
1058 if (_decode_session(skb, &fl, family) < 0) 1061 if (xfrm_decode_session(skb, &fl, family) < 0)
1059 return 0; 1062 return 0;
1060 1063
1061 return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0; 1064 return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter
new file mode 100644
index 000000000000..75f21d843c1d
--- /dev/null
+++ b/scripts/bloat-o-meter
@@ -0,0 +1,58 @@
1#!/usr/bin/python
2#
3# Copyright 2004 Matt Mackall <mpm@selenic.com>
4#
5# inspired by perl Bloat-O-Meter (c) 1997 by Andi Kleen
6#
7# This software may be used and distributed according to the terms
8# of the GNU General Public License, incorporated herein by reference.
9
10import sys, os, re
11
12if len(sys.argv) != 3:
13 sys.stderr.write("usage: %s file1 file2\n" % sys.argv[0])
14 sys.exit(-1)
15
16def getsizes(file):
17 sym = {}
18 for l in os.popen("nm --size-sort " + file).readlines():
19 size, type, name = l[:-1].split()
20 if type in "tTdDbB":
21 sym[name] = int(size, 16)
22 return sym
23
24old = getsizes(sys.argv[1])
25new = getsizes(sys.argv[2])
26grow, shrink, add, remove, up, down = 0, 0, 0, 0, 0, 0
27delta, common = [], {}
28
29for a in old:
30 if a in new:
31 common[a] = 1
32
33for name in old:
34 if name not in common:
35 remove += 1
36 down += old[name]
37 delta.append((-old[name], name))
38
39for name in new:
40 if name not in common:
41 add += 1
42 up += new[name]
43 delta.append((new[name], name))
44
45for name in common:
46 d = new.get(name, 0) - old.get(name, 0)
47 if d>0: grow, up = grow+1, up+d
48 if d<0: shrink, down = shrink+1, down-d
49 delta.append((d, name))
50
51delta.sort()
52delta.reverse()
53
54print "add/remove: %s/%s grow/shrink: %s/%s up/down: %s/%s (%s)" % \
55 (add, remove, grow, shrink, up, -down, up-down)
56print "%-40s %7s %7s %+7s" % ("function", "old", "new", "delta")
57for d, n in delta:
58 if d: print "%-40s %7s %7s %+7d" % (n, old.get(n,"-"), new.get(n,"-"), d)
diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c
index 8ba5d29d3d42..10eeae53d827 100644
--- a/scripts/kconfig/conf.c
+++ b/scripts/kconfig/conf.c
@@ -63,6 +63,20 @@ static void check_stdin(void)
63 } 63 }
64} 64}
65 65
66static char *fgets_check_stream(char *s, int size, FILE *stream)
67{
68 char *ret = fgets(s, size, stream);
69
70 if (ret == NULL && feof(stream)) {
71 printf(_("aborted!\n\n"));
72 printf(_("Console input is closed. "));
73 printf(_("Run 'make oldconfig' to update configuration.\n\n"));
74 exit(1);
75 }
76
77 return ret;
78}
79
66static void conf_askvalue(struct symbol *sym, const char *def) 80static void conf_askvalue(struct symbol *sym, const char *def)
67{ 81{
68 enum symbol_type type = sym_get_type(sym); 82 enum symbol_type type = sym_get_type(sym);
@@ -100,7 +114,7 @@ static void conf_askvalue(struct symbol *sym, const char *def)
100 check_stdin(); 114 check_stdin();
101 case ask_all: 115 case ask_all:
102 fflush(stdout); 116 fflush(stdout);
103 fgets(line, 128, stdin); 117 fgets_check_stream(line, 128, stdin);
104 return; 118 return;
105 case set_default: 119 case set_default:
106 printf("%s\n", def); 120 printf("%s\n", def);
@@ -356,7 +370,7 @@ static int conf_choice(struct menu *menu)
356 check_stdin(); 370 check_stdin();
357 case ask_all: 371 case ask_all:
358 fflush(stdout); 372 fflush(stdout);
359 fgets(line, 128, stdin); 373 fgets_check_stream(line, 128, stdin);
360 strip(line); 374 strip(line);
361 if (line[0] == '?') { 375 if (line[0] == '?') {
362 printf("\n%s\n", menu->sym->help ? 376 printf("\n%s\n", menu->sym->help ?
diff --git a/scripts/kconfig/qconf.h b/scripts/kconfig/qconf.h
index 7c03927d4c7c..e52f3e90bf0c 100644
--- a/scripts/kconfig/qconf.h
+++ b/scripts/kconfig/qconf.h
@@ -22,8 +22,8 @@ public:
22 22
23#if QT_VERSION >= 300 23#if QT_VERSION >= 300
24 void readListSettings(); 24 void readListSettings();
25 QValueList<int> ConfigSettings::readSizes(const QString& key, bool *ok); 25 QValueList<int> readSizes(const QString& key, bool *ok);
26 bool ConfigSettings::writeSizes(const QString& key, const QValueList<int>& value); 26 bool writeSizes(const QString& key, const QValueList<int>& value);
27#endif 27#endif
28 28
29 bool showAll; 29 bool showAll;
@@ -124,7 +124,7 @@ public:
124 void setParentMenu(void); 124 void setParentMenu(void);
125 125
126 template <class P> 126 template <class P>
127 void ConfigList::updateMenuList(P*, struct menu*); 127 void updateMenuList(P*, struct menu*);
128 128
129 bool updateAll; 129 bool updateAll;
130 130
diff --git a/security/keys/compat.c b/security/keys/compat.c
index 3303673c636e..bcdb28533733 100644
--- a/security/keys/compat.c
+++ b/security/keys/compat.c
@@ -74,6 +74,12 @@ asmlinkage long compat_sys_keyctl(u32 option,
74 case KEYCTL_SET_REQKEY_KEYRING: 74 case KEYCTL_SET_REQKEY_KEYRING:
75 return keyctl_set_reqkey_keyring(arg2); 75 return keyctl_set_reqkey_keyring(arg2);
76 76
77 case KEYCTL_SET_TIMEOUT:
78 return keyctl_set_timeout(arg2, arg3);
79
80 case KEYCTL_ASSUME_AUTHORITY:
81 return keyctl_assume_authority(arg2);
82
77 default: 83 default:
78 return -EOPNOTSUPP; 84 return -EOPNOTSUPP;
79 } 85 }
diff --git a/security/keys/internal.h b/security/keys/internal.h
index 39cba97c5eb9..e066e6057955 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -107,12 +107,13 @@ extern struct key *request_key_and_link(struct key_type *type,
107struct request_key_auth { 107struct request_key_auth {
108 struct key *target_key; 108 struct key *target_key;
109 struct task_struct *context; 109 struct task_struct *context;
110 const char *callout_info;
110 pid_t pid; 111 pid_t pid;
111}; 112};
112 113
113extern struct key_type key_type_request_key_auth; 114extern struct key_type key_type_request_key_auth;
114extern struct key *request_key_auth_new(struct key *target, 115extern struct key *request_key_auth_new(struct key *target,
115 struct key **_rkakey); 116 const char *callout_info);
116 117
117extern struct key *key_get_instantiation_authkey(key_serial_t target_id); 118extern struct key *key_get_instantiation_authkey(key_serial_t target_id);
118 119
@@ -136,6 +137,8 @@ extern long keyctl_instantiate_key(key_serial_t, const void __user *,
136 size_t, key_serial_t); 137 size_t, key_serial_t);
137extern long keyctl_negate_key(key_serial_t, unsigned, key_serial_t); 138extern long keyctl_negate_key(key_serial_t, unsigned, key_serial_t);
138extern long keyctl_set_reqkey_keyring(int); 139extern long keyctl_set_reqkey_keyring(int);
140extern long keyctl_set_timeout(key_serial_t, unsigned);
141extern long keyctl_assume_authority(key_serial_t);
139 142
140 143
141/* 144/*
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index b7a468fabdf9..3d2ebae029c1 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -834,6 +834,17 @@ long keyctl_instantiate_key(key_serial_t id,
834 if (plen > 32767) 834 if (plen > 32767)
835 goto error; 835 goto error;
836 836
837 /* the appropriate instantiation authorisation key must have been
838 * assumed before calling this */
839 ret = -EPERM;
840 instkey = current->request_key_auth;
841 if (!instkey)
842 goto error;
843
844 rka = instkey->payload.data;
845 if (rka->target_key->serial != id)
846 goto error;
847
837 /* pull the payload in if one was supplied */ 848 /* pull the payload in if one was supplied */
838 payload = NULL; 849 payload = NULL;
839 850
@@ -848,15 +859,6 @@ long keyctl_instantiate_key(key_serial_t id,
848 goto error2; 859 goto error2;
849 } 860 }
850 861
851 /* find the instantiation authorisation key */
852 instkey = key_get_instantiation_authkey(id);
853 if (IS_ERR(instkey)) {
854 ret = PTR_ERR(instkey);
855 goto error2;
856 }
857
858 rka = instkey->payload.data;
859
860 /* find the destination keyring amongst those belonging to the 862 /* find the destination keyring amongst those belonging to the
861 * requesting task */ 863 * requesting task */
862 keyring_ref = NULL; 864 keyring_ref = NULL;
@@ -865,7 +867,7 @@ long keyctl_instantiate_key(key_serial_t id,
865 KEY_WRITE); 867 KEY_WRITE);
866 if (IS_ERR(keyring_ref)) { 868 if (IS_ERR(keyring_ref)) {
867 ret = PTR_ERR(keyring_ref); 869 ret = PTR_ERR(keyring_ref);
868 goto error3; 870 goto error2;
869 } 871 }
870 } 872 }
871 873
@@ -874,11 +876,17 @@ long keyctl_instantiate_key(key_serial_t id,
874 key_ref_to_ptr(keyring_ref), instkey); 876 key_ref_to_ptr(keyring_ref), instkey);
875 877
876 key_ref_put(keyring_ref); 878 key_ref_put(keyring_ref);
877 error3: 879
878 key_put(instkey); 880 /* discard the assumed authority if it's just been disabled by
879 error2: 881 * instantiation of the key */
882 if (ret == 0) {
883 key_put(current->request_key_auth);
884 current->request_key_auth = NULL;
885 }
886
887error2:
880 kfree(payload); 888 kfree(payload);
881 error: 889error:
882 return ret; 890 return ret;
883 891
884} /* end keyctl_instantiate_key() */ 892} /* end keyctl_instantiate_key() */
@@ -895,14 +903,16 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid)
895 key_ref_t keyring_ref; 903 key_ref_t keyring_ref;
896 long ret; 904 long ret;
897 905
898 /* find the instantiation authorisation key */ 906 /* the appropriate instantiation authorisation key must have been
899 instkey = key_get_instantiation_authkey(id); 907 * assumed before calling this */
900 if (IS_ERR(instkey)) { 908 ret = -EPERM;
901 ret = PTR_ERR(instkey); 909 instkey = current->request_key_auth;
910 if (!instkey)
902 goto error; 911 goto error;
903 }
904 912
905 rka = instkey->payload.data; 913 rka = instkey->payload.data;
914 if (rka->target_key->serial != id)
915 goto error;
906 916
907 /* find the destination keyring if present (which must also be 917 /* find the destination keyring if present (which must also be
908 * writable) */ 918 * writable) */
@@ -911,7 +921,7 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid)
911 keyring_ref = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE); 921 keyring_ref = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE);
912 if (IS_ERR(keyring_ref)) { 922 if (IS_ERR(keyring_ref)) {
913 ret = PTR_ERR(keyring_ref); 923 ret = PTR_ERR(keyring_ref);
914 goto error2; 924 goto error;
915 } 925 }
916 } 926 }
917 927
@@ -920,9 +930,15 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid)
920 key_ref_to_ptr(keyring_ref), instkey); 930 key_ref_to_ptr(keyring_ref), instkey);
921 931
922 key_ref_put(keyring_ref); 932 key_ref_put(keyring_ref);
923 error2: 933
924 key_put(instkey); 934 /* discard the assumed authority if it's just been disabled by
925 error: 935 * instantiation of the key */
936 if (ret == 0) {
937 key_put(current->request_key_auth);
938 current->request_key_auth = NULL;
939 }
940
941error:
926 return ret; 942 return ret;
927 943
928} /* end keyctl_negate_key() */ 944} /* end keyctl_negate_key() */
@@ -967,6 +983,88 @@ long keyctl_set_reqkey_keyring(int reqkey_defl)
967 983
968/*****************************************************************************/ 984/*****************************************************************************/
969/* 985/*
986 * set or clear the timeout for a key
987 */
988long keyctl_set_timeout(key_serial_t id, unsigned timeout)
989{
990 struct timespec now;
991 struct key *key;
992 key_ref_t key_ref;
993 time_t expiry;
994 long ret;
995
996 key_ref = lookup_user_key(NULL, id, 1, 1, KEY_SETATTR);
997 if (IS_ERR(key_ref)) {
998 ret = PTR_ERR(key_ref);
999 goto error;
1000 }
1001
1002 key = key_ref_to_ptr(key_ref);
1003
1004 /* make the changes with the locks held to prevent races */
1005 down_write(&key->sem);
1006
1007 expiry = 0;
1008 if (timeout > 0) {
1009 now = current_kernel_time();
1010 expiry = now.tv_sec + timeout;
1011 }
1012
1013 key->expiry = expiry;
1014
1015 up_write(&key->sem);
1016 key_put(key);
1017
1018 ret = 0;
1019error:
1020 return ret;
1021
1022} /* end keyctl_set_timeout() */
1023
1024/*****************************************************************************/
1025/*
1026 * assume the authority to instantiate the specified key
1027 */
1028long keyctl_assume_authority(key_serial_t id)
1029{
1030 struct key *authkey;
1031 long ret;
1032
1033 /* special key IDs aren't permitted */
1034 ret = -EINVAL;
1035 if (id < 0)
1036 goto error;
1037
1038 /* we divest ourselves of authority if given an ID of 0 */
1039 if (id == 0) {
1040 key_put(current->request_key_auth);
1041 current->request_key_auth = NULL;
1042 ret = 0;
1043 goto error;
1044 }
1045
1046 /* attempt to assume the authority temporarily granted to us whilst we
1047 * instantiate the specified key
1048 * - the authorisation key must be in the current task's keyrings
1049 * somewhere
1050 */
1051 authkey = key_get_instantiation_authkey(id);
1052 if (IS_ERR(authkey)) {
1053 ret = PTR_ERR(authkey);
1054 goto error;
1055 }
1056
1057 key_put(current->request_key_auth);
1058 current->request_key_auth = authkey;
1059 ret = authkey->serial;
1060
1061error:
1062 return ret;
1063
1064} /* end keyctl_assume_authority() */
1065
1066/*****************************************************************************/
1067/*
970 * the key control system call 1068 * the key control system call
971 */ 1069 */
972asmlinkage long sys_keyctl(int option, unsigned long arg2, unsigned long arg3, 1070asmlinkage long sys_keyctl(int option, unsigned long arg2, unsigned long arg3,
@@ -1038,6 +1136,13 @@ asmlinkage long sys_keyctl(int option, unsigned long arg2, unsigned long arg3,
1038 case KEYCTL_SET_REQKEY_KEYRING: 1136 case KEYCTL_SET_REQKEY_KEYRING:
1039 return keyctl_set_reqkey_keyring(arg2); 1137 return keyctl_set_reqkey_keyring(arg2);
1040 1138
1139 case KEYCTL_SET_TIMEOUT:
1140 return keyctl_set_timeout((key_serial_t) arg2,
1141 (unsigned) arg3);
1142
1143 case KEYCTL_ASSUME_AUTHORITY:
1144 return keyctl_assume_authority((key_serial_t) arg2);
1145
1041 default: 1146 default:
1042 return -EOPNOTSUPP; 1147 return -EOPNOTSUPP;
1043 } 1148 }
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 5d22c0388b32..d65a180f888d 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -481,51 +481,6 @@ key_ref_t __keyring_search_one(key_ref_t keyring_ref,
481 481
482/*****************************************************************************/ 482/*****************************************************************************/
483/* 483/*
484 * search for an instantiation authorisation key matching a target key
485 * - the RCU read lock must be held by the caller
486 * - a target_id of zero specifies any valid token
487 */
488struct key *keyring_search_instkey(struct key *keyring,
489 key_serial_t target_id)
490{
491 struct request_key_auth *rka;
492 struct keyring_list *klist;
493 struct key *instkey;
494 int loop;
495
496 klist = rcu_dereference(keyring->payload.subscriptions);
497 if (klist) {
498 for (loop = 0; loop < klist->nkeys; loop++) {
499 instkey = klist->keys[loop];
500
501 if (instkey->type != &key_type_request_key_auth)
502 continue;
503
504 rka = instkey->payload.data;
505 if (target_id && rka->target_key->serial != target_id)
506 continue;
507
508 /* the auth key is revoked during instantiation */
509 if (!test_bit(KEY_FLAG_REVOKED, &instkey->flags))
510 goto found;
511
512 instkey = ERR_PTR(-EKEYREVOKED);
513 goto error;
514 }
515 }
516
517 instkey = ERR_PTR(-EACCES);
518 goto error;
519
520found:
521 atomic_inc(&instkey->usage);
522error:
523 return instkey;
524
525} /* end keyring_search_instkey() */
526
527/*****************************************************************************/
528/*
529 * find a keyring with the specified name 484 * find a keyring with the specified name
530 * - all named keyrings are searched 485 * - all named keyrings are searched
531 * - only find keyrings with search permission for the process 486 * - only find keyrings with search permission for the process
@@ -684,15 +639,31 @@ static void keyring_link_rcu_disposal(struct rcu_head *rcu)
684 639
685/*****************************************************************************/ 640/*****************************************************************************/
686/* 641/*
642 * dispose of a keyring list after the RCU grace period, freeing the unlinked
643 * key
644 */
645static void keyring_unlink_rcu_disposal(struct rcu_head *rcu)
646{
647 struct keyring_list *klist =
648 container_of(rcu, struct keyring_list, rcu);
649
650 key_put(klist->keys[klist->delkey]);
651 kfree(klist);
652
653} /* end keyring_unlink_rcu_disposal() */
654
655/*****************************************************************************/
656/*
687 * link a key into to a keyring 657 * link a key into to a keyring
688 * - must be called with the keyring's semaphore write-locked 658 * - must be called with the keyring's semaphore write-locked
659 * - discard already extant link to matching key if there is one
689 */ 660 */
690int __key_link(struct key *keyring, struct key *key) 661int __key_link(struct key *keyring, struct key *key)
691{ 662{
692 struct keyring_list *klist, *nklist; 663 struct keyring_list *klist, *nklist;
693 unsigned max; 664 unsigned max;
694 size_t size; 665 size_t size;
695 int ret; 666 int loop, ret;
696 667
697 ret = -EKEYREVOKED; 668 ret = -EKEYREVOKED;
698 if (test_bit(KEY_FLAG_REVOKED, &keyring->flags)) 669 if (test_bit(KEY_FLAG_REVOKED, &keyring->flags))
@@ -714,6 +685,48 @@ int __key_link(struct key *keyring, struct key *key)
714 goto error2; 685 goto error2;
715 } 686 }
716 687
688 /* see if there's a matching key we can displace */
689 klist = keyring->payload.subscriptions;
690
691 if (klist && klist->nkeys > 0) {
692 struct key_type *type = key->type;
693
694 for (loop = klist->nkeys - 1; loop >= 0; loop--) {
695 if (klist->keys[loop]->type == type &&
696 strcmp(klist->keys[loop]->description,
697 key->description) == 0
698 ) {
699 /* found a match - replace with new key */
700 size = sizeof(struct key *) * klist->maxkeys;
701 size += sizeof(*klist);
702 BUG_ON(size > PAGE_SIZE);
703
704 ret = -ENOMEM;
705 nklist = kmalloc(size, GFP_KERNEL);
706 if (!nklist)
707 goto error2;
708
709 memcpy(nklist, klist, size);
710
711 /* replace matched key */
712 atomic_inc(&key->usage);
713 nklist->keys[loop] = key;
714
715 rcu_assign_pointer(
716 keyring->payload.subscriptions,
717 nklist);
718
719 /* dispose of the old keyring list and the
720 * displaced key */
721 klist->delkey = loop;
722 call_rcu(&klist->rcu,
723 keyring_unlink_rcu_disposal);
724
725 goto done;
726 }
727 }
728 }
729
717 /* check that we aren't going to overrun the user's quota */ 730 /* check that we aren't going to overrun the user's quota */
718 ret = key_payload_reserve(keyring, 731 ret = key_payload_reserve(keyring,
719 keyring->datalen + KEYQUOTA_LINK_BYTES); 732 keyring->datalen + KEYQUOTA_LINK_BYTES);
@@ -730,8 +743,6 @@ int __key_link(struct key *keyring, struct key *key)
730 smp_wmb(); 743 smp_wmb();
731 klist->nkeys++; 744 klist->nkeys++;
732 smp_wmb(); 745 smp_wmb();
733
734 ret = 0;
735 } 746 }
736 else { 747 else {
737 /* grow the key list */ 748 /* grow the key list */
@@ -769,16 +780,16 @@ int __key_link(struct key *keyring, struct key *key)
769 /* dispose of the old keyring list */ 780 /* dispose of the old keyring list */
770 if (klist) 781 if (klist)
771 call_rcu(&klist->rcu, keyring_link_rcu_disposal); 782 call_rcu(&klist->rcu, keyring_link_rcu_disposal);
772
773 ret = 0;
774 } 783 }
775 784
776 error2: 785done:
786 ret = 0;
787error2:
777 up_write(&keyring_serialise_link_sem); 788 up_write(&keyring_serialise_link_sem);
778 error: 789error:
779 return ret; 790 return ret;
780 791
781 error3: 792error3:
782 /* undo the quota changes */ 793 /* undo the quota changes */
783 key_payload_reserve(keyring, 794 key_payload_reserve(keyring,
784 keyring->datalen - KEYQUOTA_LINK_BYTES); 795 keyring->datalen - KEYQUOTA_LINK_BYTES);
@@ -809,21 +820,6 @@ EXPORT_SYMBOL(key_link);
809 820
810/*****************************************************************************/ 821/*****************************************************************************/
811/* 822/*
812 * dispose of a keyring list after the RCU grace period, freeing the unlinked
813 * key
814 */
815static void keyring_unlink_rcu_disposal(struct rcu_head *rcu)
816{
817 struct keyring_list *klist =
818 container_of(rcu, struct keyring_list, rcu);
819
820 key_put(klist->keys[klist->delkey]);
821 kfree(klist);
822
823} /* end keyring_unlink_rcu_disposal() */
824
825/*****************************************************************************/
826/*
827 * unlink the first link to a key from a keyring 823 * unlink the first link to a key from a keyring
828 */ 824 */
829int key_unlink(struct key *keyring, struct key *key) 825int key_unlink(struct key *keyring, struct key *key)
diff --git a/security/keys/permission.c b/security/keys/permission.c
index e7f579c0eaf5..3b41f9b52537 100644
--- a/security/keys/permission.c
+++ b/security/keys/permission.c
@@ -73,3 +73,35 @@ use_these_perms:
73} /* end key_task_permission() */ 73} /* end key_task_permission() */
74 74
75EXPORT_SYMBOL(key_task_permission); 75EXPORT_SYMBOL(key_task_permission);
76
77/*****************************************************************************/
78/*
79 * validate a key
80 */
81int key_validate(struct key *key)
82{
83 struct timespec now;
84 int ret = 0;
85
86 if (key) {
87 /* check it's still accessible */
88 ret = -EKEYREVOKED;
89 if (test_bit(KEY_FLAG_REVOKED, &key->flags) ||
90 test_bit(KEY_FLAG_DEAD, &key->flags))
91 goto error;
92
93 /* check it hasn't expired */
94 ret = 0;
95 if (key->expiry) {
96 now = current_kernel_time();
97 if (now.tv_sec >= key->expiry)
98 ret = -EKEYEXPIRED;
99 }
100 }
101
102 error:
103 return ret;
104
105} /* end key_validate() */
106
107EXPORT_SYMBOL(key_validate);
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 566b1cc0118a..74cb79eb917e 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -270,9 +270,14 @@ int copy_thread_group_keys(struct task_struct *tsk)
270int copy_keys(unsigned long clone_flags, struct task_struct *tsk) 270int copy_keys(unsigned long clone_flags, struct task_struct *tsk)
271{ 271{
272 key_check(tsk->thread_keyring); 272 key_check(tsk->thread_keyring);
273 key_check(tsk->request_key_auth);
273 274
274 /* no thread keyring yet */ 275 /* no thread keyring yet */
275 tsk->thread_keyring = NULL; 276 tsk->thread_keyring = NULL;
277
278 /* copy the request_key() authorisation for this thread */
279 key_get(tsk->request_key_auth);
280
276 return 0; 281 return 0;
277 282
278} /* end copy_keys() */ 283} /* end copy_keys() */
@@ -290,11 +295,12 @@ void exit_thread_group_keys(struct signal_struct *tg)
290 295
291/*****************************************************************************/ 296/*****************************************************************************/
292/* 297/*
293 * dispose of keys upon thread exit 298 * dispose of per-thread keys upon thread exit
294 */ 299 */
295void exit_keys(struct task_struct *tsk) 300void exit_keys(struct task_struct *tsk)
296{ 301{
297 key_put(tsk->thread_keyring); 302 key_put(tsk->thread_keyring);
303 key_put(tsk->request_key_auth);
298 304
299} /* end exit_keys() */ 305} /* end exit_keys() */
300 306
@@ -382,7 +388,7 @@ key_ref_t search_process_keyrings(struct key_type *type,
382 struct task_struct *context) 388 struct task_struct *context)
383{ 389{
384 struct request_key_auth *rka; 390 struct request_key_auth *rka;
385 key_ref_t key_ref, ret, err, instkey_ref; 391 key_ref_t key_ref, ret, err;
386 392
387 /* we want to return -EAGAIN or -ENOKEY if any of the keyrings were 393 /* we want to return -EAGAIN or -ENOKEY if any of the keyrings were
388 * searchable, but we failed to find a key or we found a negative key; 394 * searchable, but we failed to find a key or we found a negative key;
@@ -461,30 +467,12 @@ key_ref_t search_process_keyrings(struct key_type *type,
461 err = key_ref; 467 err = key_ref;
462 break; 468 break;
463 } 469 }
464 470 }
465 /* if this process has a session keyring and that has an 471 /* or search the user-session keyring */
466 * instantiation authorisation key in the bottom level, then we 472 else {
467 * also search the keyrings of the process mentioned there */ 473 key_ref = keyring_search_aux(
468 if (context != current) 474 make_key_ref(context->user->session_keyring, 1),
469 goto no_key; 475 context, type, description, match);
470
471 rcu_read_lock();
472 instkey_ref = __keyring_search_one(
473 make_key_ref(rcu_dereference(
474 context->signal->session_keyring),
475 1),
476 &key_type_request_key_auth, NULL, 0);
477 rcu_read_unlock();
478
479 if (IS_ERR(instkey_ref))
480 goto no_key;
481
482 rka = key_ref_to_ptr(instkey_ref)->payload.data;
483
484 key_ref = search_process_keyrings(type, description, match,
485 rka->context);
486 key_ref_put(instkey_ref);
487
488 if (!IS_ERR(key_ref)) 476 if (!IS_ERR(key_ref))
489 goto found; 477 goto found;
490 478
@@ -500,11 +488,21 @@ key_ref_t search_process_keyrings(struct key_type *type,
500 break; 488 break;
501 } 489 }
502 } 490 }
503 /* or search the user-session keyring */ 491
504 else { 492 /* if this process has an instantiation authorisation key, then we also
505 key_ref = keyring_search_aux( 493 * search the keyrings of the process mentioned there
506 make_key_ref(context->user->session_keyring, 1), 494 * - we don't permit access to request_key auth keys via this method
507 context, type, description, match); 495 */
496 if (context->request_key_auth &&
497 context == current &&
498 type != &key_type_request_key_auth &&
499 key_validate(context->request_key_auth) == 0
500 ) {
501 rka = context->request_key_auth->payload.data;
502
503 key_ref = search_process_keyrings(type, description, match,
504 rka->context);
505
508 if (!IS_ERR(key_ref)) 506 if (!IS_ERR(key_ref))
509 goto found; 507 goto found;
510 508
@@ -521,8 +519,6 @@ key_ref_t search_process_keyrings(struct key_type *type,
521 } 519 }
522 } 520 }
523 521
524
525no_key:
526 /* no key - decide on the error we're going to go for */ 522 /* no key - decide on the error we're going to go for */
527 key_ref = ret ? ret : err; 523 key_ref = ret ? ret : err;
528 524
@@ -628,6 +624,15 @@ key_ref_t lookup_user_key(struct task_struct *context, key_serial_t id,
628 key = ERR_PTR(-EINVAL); 624 key = ERR_PTR(-EINVAL);
629 goto error; 625 goto error;
630 626
627 case KEY_SPEC_REQKEY_AUTH_KEY:
628 key = context->request_key_auth;
629 if (!key)
630 goto error;
631
632 atomic_inc(&key->usage);
633 key_ref = make_key_ref(key, 1);
634 break;
635
631 default: 636 default:
632 key_ref = ERR_PTR(-EINVAL); 637 key_ref = ERR_PTR(-EINVAL);
633 if (id < 1) 638 if (id < 1)
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 5cc4bba70db6..f030a0ccbb93 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -29,28 +29,36 @@ DECLARE_WAIT_QUEUE_HEAD(request_key_conswq);
29/*****************************************************************************/ 29/*****************************************************************************/
30/* 30/*
31 * request userspace finish the construction of a key 31 * request userspace finish the construction of a key
32 * - execute "/sbin/request-key <op> <key> <uid> <gid> <keyring> <keyring> <keyring> <info>" 32 * - execute "/sbin/request-key <op> <key> <uid> <gid> <keyring> <keyring> <keyring>"
33 */ 33 */
34static int call_request_key(struct key *key, 34static int call_sbin_request_key(struct key *key,
35 const char *op, 35 struct key *authkey,
36 const char *callout_info) 36 const char *op)
37{ 37{
38 struct task_struct *tsk = current; 38 struct task_struct *tsk = current;
39 key_serial_t prkey, sskey; 39 key_serial_t prkey, sskey;
40 struct key *session_keyring, *rkakey; 40 struct key *keyring;
41 char *argv[10], *envp[3], uid_str[12], gid_str[12]; 41 char *argv[9], *envp[3], uid_str[12], gid_str[12];
42 char key_str[12], keyring_str[3][12]; 42 char key_str[12], keyring_str[3][12];
43 char desc[20];
43 int ret, i; 44 int ret, i;
44 45
45 kenter("{%d},%s,%s", key->serial, op, callout_info); 46 kenter("{%d},{%d},%s", key->serial, authkey->serial, op);
46 47
47 /* generate a new session keyring with an auth key in it */ 48 /* allocate a new session keyring */
48 session_keyring = request_key_auth_new(key, &rkakey); 49 sprintf(desc, "_req.%u", key->serial);
49 if (IS_ERR(session_keyring)) { 50
50 ret = PTR_ERR(session_keyring); 51 keyring = keyring_alloc(desc, current->fsuid, current->fsgid, 1, NULL);
51 goto error; 52 if (IS_ERR(keyring)) {
53 ret = PTR_ERR(keyring);
54 goto error_alloc;
52 } 55 }
53 56
57 /* attach the auth key to the session keyring */
58 ret = __key_link(keyring, authkey);
59 if (ret < 0)
60 goto error_link;
61
54 /* record the UID and GID */ 62 /* record the UID and GID */
55 sprintf(uid_str, "%d", current->fsuid); 63 sprintf(uid_str, "%d", current->fsuid);
56 sprintf(gid_str, "%d", current->fsgid); 64 sprintf(gid_str, "%d", current->fsgid);
@@ -95,22 +103,19 @@ static int call_request_key(struct key *key,
95 argv[i++] = keyring_str[0]; 103 argv[i++] = keyring_str[0];
96 argv[i++] = keyring_str[1]; 104 argv[i++] = keyring_str[1];
97 argv[i++] = keyring_str[2]; 105 argv[i++] = keyring_str[2];
98 argv[i++] = (char *) callout_info;
99 argv[i] = NULL; 106 argv[i] = NULL;
100 107
101 /* do it */ 108 /* do it */
102 ret = call_usermodehelper_keys(argv[0], argv, envp, session_keyring, 1); 109 ret = call_usermodehelper_keys(argv[0], argv, envp, keyring, 1);
103 110
104 /* dispose of the special keys */ 111error_link:
105 key_revoke(rkakey); 112 key_put(keyring);
106 key_put(rkakey);
107 key_put(session_keyring);
108 113
109 error: 114error_alloc:
110 kleave(" = %d", ret); 115 kleave(" = %d", ret);
111 return ret; 116 return ret;
112 117
113} /* end call_request_key() */ 118} /* end call_sbin_request_key() */
114 119
115/*****************************************************************************/ 120/*****************************************************************************/
116/* 121/*
@@ -122,9 +127,10 @@ static struct key *__request_key_construction(struct key_type *type,
122 const char *description, 127 const char *description,
123 const char *callout_info) 128 const char *callout_info)
124{ 129{
130 request_key_actor_t actor;
125 struct key_construction cons; 131 struct key_construction cons;
126 struct timespec now; 132 struct timespec now;
127 struct key *key; 133 struct key *key, *authkey;
128 int ret, negated; 134 int ret, negated;
129 135
130 kenter("%s,%s,%s", type->name, description, callout_info); 136 kenter("%s,%s,%s", type->name, description, callout_info);
@@ -143,8 +149,19 @@ static struct key *__request_key_construction(struct key_type *type,
143 /* we drop the construction sem here on behalf of the caller */ 149 /* we drop the construction sem here on behalf of the caller */
144 up_write(&key_construction_sem); 150 up_write(&key_construction_sem);
145 151
152 /* allocate an authorisation key */
153 authkey = request_key_auth_new(key, callout_info);
154 if (IS_ERR(authkey)) {
155 ret = PTR_ERR(authkey);
156 authkey = NULL;
157 goto alloc_authkey_failed;
158 }
159
146 /* make the call */ 160 /* make the call */
147 ret = call_request_key(key, "create", callout_info); 161 actor = call_sbin_request_key;
162 if (type->request_key)
163 actor = type->request_key;
164 ret = actor(key, authkey, "create");
148 if (ret < 0) 165 if (ret < 0)
149 goto request_failed; 166 goto request_failed;
150 167
@@ -153,22 +170,29 @@ static struct key *__request_key_construction(struct key_type *type,
153 if (!test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) 170 if (!test_bit(KEY_FLAG_INSTANTIATED, &key->flags))
154 goto request_failed; 171 goto request_failed;
155 172
173 key_revoke(authkey);
174 key_put(authkey);
175
156 down_write(&key_construction_sem); 176 down_write(&key_construction_sem);
157 list_del(&cons.link); 177 list_del(&cons.link);
158 up_write(&key_construction_sem); 178 up_write(&key_construction_sem);
159 179
160 /* also give an error if the key was negatively instantiated */ 180 /* also give an error if the key was negatively instantiated */
161 check_not_negative: 181check_not_negative:
162 if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) { 182 if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) {
163 key_put(key); 183 key_put(key);
164 key = ERR_PTR(-ENOKEY); 184 key = ERR_PTR(-ENOKEY);
165 } 185 }
166 186
167 out: 187out:
168 kleave(" = %p", key); 188 kleave(" = %p", key);
169 return key; 189 return key;
170 190
171 request_failed: 191request_failed:
192 key_revoke(authkey);
193 key_put(authkey);
194
195alloc_authkey_failed:
172 /* it wasn't instantiated 196 /* it wasn't instantiated
173 * - remove from construction queue 197 * - remove from construction queue
174 * - mark the key as dead 198 * - mark the key as dead
@@ -217,7 +241,7 @@ static struct key *__request_key_construction(struct key_type *type,
217 key = ERR_PTR(ret); 241 key = ERR_PTR(ret);
218 goto out; 242 goto out;
219 243
220 alloc_failed: 244alloc_failed:
221 up_write(&key_construction_sem); 245 up_write(&key_construction_sem);
222 goto out; 246 goto out;
223 247
@@ -464,35 +488,3 @@ struct key *request_key(struct key_type *type,
464} /* end request_key() */ 488} /* end request_key() */
465 489
466EXPORT_SYMBOL(request_key); 490EXPORT_SYMBOL(request_key);
467
468/*****************************************************************************/
469/*
470 * validate a key
471 */
472int key_validate(struct key *key)
473{
474 struct timespec now;
475 int ret = 0;
476
477 if (key) {
478 /* check it's still accessible */
479 ret = -EKEYREVOKED;
480 if (test_bit(KEY_FLAG_REVOKED, &key->flags) ||
481 test_bit(KEY_FLAG_DEAD, &key->flags))
482 goto error;
483
484 /* check it hasn't expired */
485 ret = 0;
486 if (key->expiry) {
487 now = current_kernel_time();
488 if (now.tv_sec >= key->expiry)
489 ret = -EKEYEXPIRED;
490 }
491 }
492
493 error:
494 return ret;
495
496} /* end key_validate() */
497
498EXPORT_SYMBOL(key_validate);
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index a8e4069d48cb..cce6ba6b0323 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -15,11 +15,13 @@
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/err.h> 16#include <linux/err.h>
17#include <linux/seq_file.h> 17#include <linux/seq_file.h>
18#include <asm/uaccess.h>
18#include "internal.h" 19#include "internal.h"
19 20
20static int request_key_auth_instantiate(struct key *, const void *, size_t); 21static int request_key_auth_instantiate(struct key *, const void *, size_t);
21static void request_key_auth_describe(const struct key *, struct seq_file *); 22static void request_key_auth_describe(const struct key *, struct seq_file *);
22static void request_key_auth_destroy(struct key *); 23static void request_key_auth_destroy(struct key *);
24static long request_key_auth_read(const struct key *, char __user *, size_t);
23 25
24/* 26/*
25 * the request-key authorisation key type definition 27 * the request-key authorisation key type definition
@@ -30,51 +32,25 @@ struct key_type key_type_request_key_auth = {
30 .instantiate = request_key_auth_instantiate, 32 .instantiate = request_key_auth_instantiate,
31 .describe = request_key_auth_describe, 33 .describe = request_key_auth_describe,
32 .destroy = request_key_auth_destroy, 34 .destroy = request_key_auth_destroy,
35 .read = request_key_auth_read,
33}; 36};
34 37
35/*****************************************************************************/ 38/*****************************************************************************/
36/* 39/*
37 * instantiate a request-key authorisation record 40 * instantiate a request-key authorisation key
38 */ 41 */
39static int request_key_auth_instantiate(struct key *key, 42static int request_key_auth_instantiate(struct key *key,
40 const void *data, 43 const void *data,
41 size_t datalen) 44 size_t datalen)
42{ 45{
43 struct request_key_auth *rka, *irka; 46 key->payload.data = (struct request_key_auth *) data;
44 struct key *instkey; 47 return 0;
45 int ret;
46
47 ret = -ENOMEM;
48 rka = kmalloc(sizeof(*rka), GFP_KERNEL);
49 if (rka) {
50 /* see if the calling process is already servicing the key
51 * request of another process */
52 instkey = key_get_instantiation_authkey(0);
53 if (!IS_ERR(instkey)) {
54 /* it is - use that instantiation context here too */
55 irka = instkey->payload.data;
56 rka->context = irka->context;
57 rka->pid = irka->pid;
58 key_put(instkey);
59 }
60 else {
61 /* it isn't - use this process as the context */
62 rka->context = current;
63 rka->pid = current->pid;
64 }
65
66 rka->target_key = key_get((struct key *) data);
67 key->payload.data = rka;
68 ret = 0;
69 }
70
71 return ret;
72 48
73} /* end request_key_auth_instantiate() */ 49} /* end request_key_auth_instantiate() */
74 50
75/*****************************************************************************/ 51/*****************************************************************************/
76/* 52/*
77 * 53 * reading a request-key authorisation key retrieves the callout information
78 */ 54 */
79static void request_key_auth_describe(const struct key *key, 55static void request_key_auth_describe(const struct key *key,
80 struct seq_file *m) 56 struct seq_file *m)
@@ -83,12 +59,40 @@ static void request_key_auth_describe(const struct key *key,
83 59
84 seq_puts(m, "key:"); 60 seq_puts(m, "key:");
85 seq_puts(m, key->description); 61 seq_puts(m, key->description);
86 seq_printf(m, " pid:%d", rka->pid); 62 seq_printf(m, " pid:%d ci:%zu", rka->pid, strlen(rka->callout_info));
87 63
88} /* end request_key_auth_describe() */ 64} /* end request_key_auth_describe() */
89 65
90/*****************************************************************************/ 66/*****************************************************************************/
91/* 67/*
68 * read the callout_info data
69 * - the key's semaphore is read-locked
70 */
71static long request_key_auth_read(const struct key *key,
72 char __user *buffer, size_t buflen)
73{
74 struct request_key_auth *rka = key->payload.data;
75 size_t datalen;
76 long ret;
77
78 datalen = strlen(rka->callout_info);
79 ret = datalen;
80
81 /* we can return the data as is */
82 if (buffer && buflen > 0) {
83 if (buflen > datalen)
84 buflen = datalen;
85
86 if (copy_to_user(buffer, rka->callout_info, buflen) != 0)
87 ret = -EFAULT;
88 }
89
90 return ret;
91
92} /* end request_key_auth_read() */
93
94/*****************************************************************************/
95/*
92 * destroy an instantiation authorisation token key 96 * destroy an instantiation authorisation token key
93 */ 97 */
94static void request_key_auth_destroy(struct key *key) 98static void request_key_auth_destroy(struct key *key)
@@ -104,56 +108,89 @@ static void request_key_auth_destroy(struct key *key)
104 108
105/*****************************************************************************/ 109/*****************************************************************************/
106/* 110/*
107 * create a session keyring to be for the invokation of /sbin/request-key and 111 * create an authorisation token for /sbin/request-key or whoever to gain
108 * stick an authorisation token in it 112 * access to the caller's security data
109 */ 113 */
110struct key *request_key_auth_new(struct key *target, struct key **_rkakey) 114struct key *request_key_auth_new(struct key *target, const char *callout_info)
111{ 115{
112 struct key *keyring, *rkakey = NULL; 116 struct request_key_auth *rka, *irka;
117 struct key *authkey = NULL;
113 char desc[20]; 118 char desc[20];
114 int ret; 119 int ret;
115 120
116 kenter("%d,", target->serial); 121 kenter("%d,", target->serial);
117 122
118 /* allocate a new session keyring */ 123 /* allocate a auth record */
119 sprintf(desc, "_req.%u", target->serial); 124 rka = kmalloc(sizeof(*rka), GFP_KERNEL);
125 if (!rka) {
126 kleave(" = -ENOMEM");
127 return ERR_PTR(-ENOMEM);
128 }
120 129
121 keyring = keyring_alloc(desc, current->fsuid, current->fsgid, 1, NULL); 130 /* see if the calling process is already servicing the key request of
122 if (IS_ERR(keyring)) { 131 * another process */
123 kleave("= %ld", PTR_ERR(keyring)); 132 if (current->request_key_auth) {
124 return keyring; 133 /* it is - use that instantiation context here too */
134 irka = current->request_key_auth->payload.data;
135 rka->context = irka->context;
136 rka->pid = irka->pid;
125 } 137 }
138 else {
139 /* it isn't - use this process as the context */
140 rka->context = current;
141 rka->pid = current->pid;
142 }
143
144 rka->target_key = key_get(target);
145 rka->callout_info = callout_info;
126 146
127 /* allocate the auth key */ 147 /* allocate the auth key */
128 sprintf(desc, "%x", target->serial); 148 sprintf(desc, "%x", target->serial);
129 149
130 rkakey = key_alloc(&key_type_request_key_auth, desc, 150 authkey = key_alloc(&key_type_request_key_auth, desc,
131 current->fsuid, current->fsgid, 151 current->fsuid, current->fsgid,
132 KEY_POS_VIEW | KEY_USR_VIEW, 1); 152 KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH |
133 if (IS_ERR(rkakey)) { 153 KEY_USR_VIEW, 1);
134 key_put(keyring); 154 if (IS_ERR(authkey)) {
135 kleave("= %ld", PTR_ERR(rkakey)); 155 ret = PTR_ERR(authkey);
136 return rkakey; 156 goto error_alloc;
137 } 157 }
138 158
139 /* construct and attach to the keyring */ 159 /* construct and attach to the keyring */
140 ret = key_instantiate_and_link(rkakey, target, 0, keyring, NULL); 160 ret = key_instantiate_and_link(authkey, rka, 0, NULL, NULL);
141 if (ret < 0) { 161 if (ret < 0)
142 key_revoke(rkakey); 162 goto error_inst;
143 key_put(rkakey);
144 key_put(keyring);
145 kleave("= %d", ret);
146 return ERR_PTR(ret);
147 }
148 163
149 *_rkakey = rkakey; 164 kleave(" = {%d})", authkey->serial);
150 kleave(" = {%d} ({%d})", keyring->serial, rkakey->serial); 165 return authkey;
151 return keyring; 166
167error_inst:
168 key_revoke(authkey);
169 key_put(authkey);
170error_alloc:
171 key_put(rka->target_key);
172 kfree(rka);
173 kleave("= %d", ret);
174 return ERR_PTR(ret);
152 175
153} /* end request_key_auth_new() */ 176} /* end request_key_auth_new() */
154 177
155/*****************************************************************************/ 178/*****************************************************************************/
156/* 179/*
180 * see if an authorisation key is associated with a particular key
181 */
182static int key_get_instantiation_authkey_match(const struct key *key,
183 const void *_id)
184{
185 struct request_key_auth *rka = key->payload.data;
186 key_serial_t id = (key_serial_t)(unsigned long) _id;
187
188 return rka->target_key->serial == id;
189
190} /* end key_get_instantiation_authkey_match() */
191
192/*****************************************************************************/
193/*
157 * get the authorisation key for instantiation of a specific key if attached to 194 * get the authorisation key for instantiation of a specific key if attached to
158 * the current process's keyrings 195 * the current process's keyrings
159 * - this key is inserted into a keyring and that is set as /sbin/request-key's 196 * - this key is inserted into a keyring and that is set as /sbin/request-key's
@@ -162,22 +199,27 @@ struct key *request_key_auth_new(struct key *target, struct key **_rkakey)
162 */ 199 */
163struct key *key_get_instantiation_authkey(key_serial_t target_id) 200struct key *key_get_instantiation_authkey(key_serial_t target_id)
164{ 201{
165 struct task_struct *tsk = current; 202 struct key *authkey;
166 struct key *instkey; 203 key_ref_t authkey_ref;
167 204
168 /* we must have our own personal session keyring */ 205 authkey_ref = search_process_keyrings(
169 if (!tsk->signal->session_keyring) 206 &key_type_request_key_auth,
170 return ERR_PTR(-EACCES); 207 (void *) (unsigned long) target_id,
171 208 key_get_instantiation_authkey_match,
172 /* and it must contain a suitable request authorisation key 209 current);
173 * - lock RCU against session keyring changing 210
174 */ 211 if (IS_ERR(authkey_ref)) {
175 rcu_read_lock(); 212 authkey = ERR_PTR(PTR_ERR(authkey_ref));
213 goto error;
214 }
176 215
177 instkey = keyring_search_instkey( 216 authkey = key_ref_to_ptr(authkey_ref);
178 rcu_dereference(tsk->signal->session_keyring), target_id); 217 if (test_bit(KEY_FLAG_REVOKED, &authkey->flags)) {
218 key_put(authkey);
219 authkey = ERR_PTR(-EKEYREVOKED);
220 }
179 221
180 rcu_read_unlock(); 222error:
181 return instkey; 223 return authkey;
182 224
183} /* end key_get_instantiation_authkey() */ 225} /* end key_get_instantiation_authkey() */
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 3d496eae1b47..6647204e4636 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1663,7 +1663,7 @@ static inline void flush_unauthorized_files(struct files_struct * files)
1663 continue; 1663 continue;
1664 } 1664 }
1665 if (devnull) { 1665 if (devnull) {
1666 rcuref_inc(&devnull->f_count); 1666 get_file(devnull);
1667 } else { 1667 } else {
1668 devnull = dentry_open(dget(selinux_null), mntget(selinuxfs_mount), O_RDWR); 1668 devnull = dentry_open(dget(selinux_null), mntget(selinuxfs_mount), O_RDWR);
1669 if (!devnull) { 1669 if (!devnull) {
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index e59da6398d44..b5fa02d17b1e 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -889,7 +889,7 @@ static void sel_remove_bools(struct dentry *de)
889 spin_lock(&dcache_lock); 889 spin_lock(&dcache_lock);
890 node = de->d_subdirs.next; 890 node = de->d_subdirs.next;
891 while (node != &de->d_subdirs) { 891 while (node != &de->d_subdirs) {
892 struct dentry *d = list_entry(node, struct dentry, d_child); 892 struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
893 list_del_init(node); 893 list_del_init(node);
894 894
895 if (d->d_inode) { 895 if (d->d_inode) {
diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c
index 5b7776504e4c..b2af7ca496c1 100644
--- a/security/selinux/xfrm.c
+++ b/security/selinux/xfrm.c
@@ -146,7 +146,7 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_us
146 return rc; 146 return rc;
147 147
148out: 148out:
149 *ctxp = 0; 149 *ctxp = NULL;
150 kfree(ctx); 150 kfree(ctx);
151 return rc; 151 return rc;
152} 152}
diff --git a/sound/isa/wavefront/wavefront_synth.c b/sound/isa/wavefront/wavefront_synth.c
index 679d0ae97e4f..ed81eec6e732 100644
--- a/sound/isa/wavefront/wavefront_synth.c
+++ b/sound/isa/wavefront/wavefront_synth.c
@@ -115,18 +115,11 @@ MODULE_PARM_DESC(osrun_time, "how many seconds to wait for the ICS2115 OS");
115 115
116#ifdef WF_DEBUG 116#ifdef WF_DEBUG
117 117
118#if defined(NEW_MACRO_VARARGS) || __GNUC__ >= 3
119#define DPRINT(cond, ...) \ 118#define DPRINT(cond, ...) \
120 if ((dev->debug & (cond)) == (cond)) { \ 119 if ((dev->debug & (cond)) == (cond)) { \
121 snd_printk (__VA_ARGS__); \ 120 snd_printk (__VA_ARGS__); \
122 } 121 }
123#else 122#else
124#define DPRINT(cond, args...) \
125 if ((dev->debug & (cond)) == (cond)) { \
126 snd_printk (args); \
127 }
128#endif
129#else
130#define DPRINT(cond, args...) 123#define DPRINT(cond, args...)
131#endif /* WF_DEBUG */ 124#endif /* WF_DEBUG */
132 125
diff --git a/sound/oss/i810_audio.c b/sound/oss/i810_audio.c
index b9a640fe48b1..4600cd6742ce 100644
--- a/sound/oss/i810_audio.c
+++ b/sound/oss/i810_audio.c
@@ -3359,12 +3359,6 @@ static int __devinit i810_probe(struct pci_dev *pci_dev, const struct pci_device
3359 goto out_region2; 3359 goto out_region2;
3360 } 3360 }
3361 3361
3362 if (request_irq(card->irq, &i810_interrupt, SA_SHIRQ,
3363 card_names[pci_id->driver_data], card)) {
3364 printk(KERN_ERR "i810_audio: unable to allocate irq %d\n", card->irq);
3365 goto out_pio;
3366 }
3367
3368 if (card->use_mmio) { 3362 if (card->use_mmio) {
3369 if (request_mem_region(card->ac97base_mmio_phys, 512, "ich_audio MMBAR")) { 3363 if (request_mem_region(card->ac97base_mmio_phys, 512, "ich_audio MMBAR")) {
3370 if ((card->ac97base_mmio = ioremap(card->ac97base_mmio_phys, 512))) { /*@FIXME can ioremap fail? don't know (jsaw) */ 3364 if ((card->ac97base_mmio = ioremap(card->ac97base_mmio_phys, 512))) { /*@FIXME can ioremap fail? don't know (jsaw) */
@@ -3395,10 +3389,8 @@ static int __devinit i810_probe(struct pci_dev *pci_dev, const struct pci_device
3395 } 3389 }
3396 3390
3397 /* initialize AC97 codec and register /dev/mixer */ 3391 /* initialize AC97 codec and register /dev/mixer */
3398 if (i810_ac97_init(card) <= 0) { 3392 if (i810_ac97_init(card) <= 0)
3399 free_irq(card->irq, card);
3400 goto out_iospace; 3393 goto out_iospace;
3401 }
3402 pci_set_drvdata(pci_dev, card); 3394 pci_set_drvdata(pci_dev, card);
3403 3395
3404 if(clocking == 0) { 3396 if(clocking == 0) {
@@ -3410,7 +3402,6 @@ static int __devinit i810_probe(struct pci_dev *pci_dev, const struct pci_device
3410 if ((card->dev_audio = register_sound_dsp(&i810_audio_fops, -1)) < 0) { 3402 if ((card->dev_audio = register_sound_dsp(&i810_audio_fops, -1)) < 0) {
3411 int i; 3403 int i;
3412 printk(KERN_ERR "i810_audio: couldn't register DSP device!\n"); 3404 printk(KERN_ERR "i810_audio: couldn't register DSP device!\n");
3413 free_irq(card->irq, card);
3414 for (i = 0; i < NR_AC97; i++) 3405 for (i = 0; i < NR_AC97; i++)
3415 if (card->ac97_codec[i] != NULL) { 3406 if (card->ac97_codec[i] != NULL) {
3416 unregister_sound_mixer(card->ac97_codec[i]->dev_mixer); 3407 unregister_sound_mixer(card->ac97_codec[i]->dev_mixer);
@@ -3419,6 +3410,13 @@ static int __devinit i810_probe(struct pci_dev *pci_dev, const struct pci_device
3419 goto out_iospace; 3410 goto out_iospace;
3420 } 3411 }
3421 3412
3413 if (request_irq(card->irq, &i810_interrupt, SA_SHIRQ,
3414 card_names[pci_id->driver_data], card)) {
3415 printk(KERN_ERR "i810_audio: unable to allocate irq %d\n", card->irq);
3416 goto out_iospace;
3417 }
3418
3419
3422 card->initializing = 0; 3420 card->initializing = 0;
3423 return 0; 3421 return 0;
3424 3422