aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/DocBook/mtdnand.tmpl2
-rw-r--r--Documentation/DocBook/scsi.tmpl2
-rw-r--r--Documentation/SubmittingPatches2
-rw-r--r--Documentation/filesystems/nfs41-server.txt54
-rw-r--r--Documentation/filesystems/nfsroot.txt2
-rw-r--r--Documentation/filesystems/proc.txt22
-rw-r--r--Documentation/gcov.txt2
-rw-r--r--Documentation/hwmon/hpfall.c115
-rw-r--r--Documentation/hwmon/pc874272
-rw-r--r--Documentation/kernel-parameters.txt2
-rw-r--r--Documentation/kmemcheck.txt21
-rw-r--r--Documentation/memory.txt31
-rw-r--r--Documentation/networking/regulatory.txt2
-rw-r--r--Documentation/numastat.txt8
-rw-r--r--Documentation/powerpc/dts-bindings/marvell.txt2
-rw-r--r--Documentation/scsi/ChangeLog.megaraid2
-rw-r--r--Documentation/scsi/scsi_fc_transport.txt2
-rw-r--r--Documentation/sound/alsa/HD-Audio-Models.txt2
-rw-r--r--Documentation/sysctl/kernel.txt30
-rw-r--r--Documentation/sysctl/vm.txt4
-rw-r--r--Documentation/trace/events-kmem.txt107
-rw-r--r--Documentation/trace/events.txt2
-rw-r--r--Documentation/trace/ftrace.txt2
-rw-r--r--Documentation/trace/postprocess/trace-pagealloc-postprocess.pl418
-rw-r--r--Documentation/trace/tracepoint-analysis.txt327
-rw-r--r--Documentation/vm/00-INDEX4
-rw-r--r--Documentation/vm/hugetlbpage.txt147
-rw-r--r--Documentation/vm/ksm.txt89
-rw-r--r--Documentation/vm/map_hugetlb.c77
-rw-r--r--MAINTAINERS137
-rw-r--r--arch/alpha/Kconfig8
-rw-r--r--arch/alpha/boot/tools/objstrip.c2
-rw-r--r--arch/alpha/include/asm/hardirq.h14
-rw-r--r--arch/alpha/include/asm/mman.h5
-rw-r--r--arch/alpha/kernel/pci_iommu.c6
-rw-r--r--arch/alpha/kernel/time.c79
-rw-r--r--arch/alpha/mm/init.c2
-rw-r--r--arch/alpha/mm/numa.c2
-rw-r--r--arch/arm/Makefile2
-rw-r--r--arch/arm/include/asm/mman.h18
-rw-r--r--arch/arm/mach-at91/board-afeb-9260v1.c2
-rw-r--r--arch/arm/mach-at91/board-cam60.c2
-rw-r--r--arch/arm/mach-at91/board-neocore926.c2
-rw-r--r--arch/arm/mach-at91/board-qil-a9260.c2
-rw-r--r--arch/arm/mach-at91/board-sam9260ek.c2
-rw-r--r--arch/arm/mach-at91/board-sam9261ek.c2
-rw-r--r--arch/arm/mach-at91/board-sam9263ek.c2
-rw-r--r--arch/arm/mach-at91/board-sam9g20ek.c2
-rw-r--r--arch/arm/mach-at91/board-sam9rlek.c2
-rw-r--r--arch/arm/mach-at91/board-usb-a9260.c2
-rw-r--r--arch/arm/mach-at91/board-usb-a9263.c2
-rw-r--r--arch/arm/mm/init.c4
-rw-r--r--arch/avr32/include/asm/mman.h18
-rw-r--r--arch/avr32/mm/init.c2
-rw-r--r--arch/blackfin/mach-bf538/include/mach/defBF539.h2
-rw-r--r--arch/cris/include/asm/mman.h20
-rw-r--r--arch/cris/mm/init.c2
-rw-r--r--arch/frv/include/asm/mman.h19
-rw-r--r--arch/frv/lib/cache.S2
-rw-r--r--arch/h8300/include/asm/hardirq.h15
-rw-r--r--arch/h8300/include/asm/mman.h18
-rw-r--r--arch/h8300/kernel/irq.c5
-rw-r--r--arch/h8300/kernel/timer/tpu.c1
-rw-r--r--arch/ia64/ia32/sys_ia32.c2
-rw-r--r--arch/ia64/include/asm/mman.h14
-rw-r--r--arch/ia64/mm/init.c2
-rw-r--r--arch/m32r/Kconfig6
-rw-r--r--arch/m32r/include/asm/hardirq.h15
-rw-r--r--arch/m32r/include/asm/mman.h18
-rw-r--r--arch/m32r/kernel/ptrace.c5
-rw-r--r--arch/m32r/kernel/smpboot.c2
-rw-r--r--arch/m32r/kernel/time.c74
-rw-r--r--arch/m32r/mm/init.c2
-rw-r--r--arch/m68k/Kconfig6
-rw-r--r--arch/m68k/include/asm/hardirq_mm.h12
-rw-r--r--arch/m68k/include/asm/mman.h18
-rw-r--r--arch/m68k/kernel/time.c70
-rw-r--r--arch/m68k/mm/init.c2
-rw-r--r--arch/microblaze/mm/init.c2
-rw-r--r--arch/mips/include/asm/mman.h5
-rw-r--r--arch/mips/include/asm/pgtable.h10
-rw-r--r--arch/mips/mm/init.c2
-rw-r--r--arch/mn10300/include/asm/cacheflush.h4
-rw-r--r--arch/mn10300/include/asm/mman.h29
-rw-r--r--arch/mn10300/mm/init.c2
-rw-r--r--arch/parisc/include/asm/mman.h5
-rw-r--r--arch/parisc/mm/init.c2
-rw-r--r--arch/powerpc/include/asm/mman.h2
-rw-r--r--arch/powerpc/kernel/udbg_16550.c2
-rw-r--r--arch/powerpc/mm/mem.c2
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c4
-rw-r--r--arch/powerpc/platforms/powermac/udbg_scc.c2
-rw-r--r--arch/powerpc/sysdev/axonram.c2
-rw-r--r--arch/s390/hypfs/inode.c6
-rw-r--r--arch/s390/include/asm/mman.h13
-rw-r--r--arch/s390/kvm/interrupt.c2
-rw-r--r--arch/s390/mm/init.c2
-rw-r--r--arch/sh/mm/init.c2
-rw-r--r--arch/sparc/include/asm/mman.h2
-rw-r--r--arch/sparc/kernel/irq_64.c2
-rw-r--r--arch/sparc/mm/init_32.c2
-rw-r--r--arch/um/drivers/net_kern.c2
-rw-r--r--arch/um/drivers/ubd_kern.c2
-rw-r--r--arch/um/include/asm/hardirq.h26
-rw-r--r--arch/um/include/shared/ptrace_user.h2
-rw-r--r--arch/um/kernel/mem.c2
-rw-r--r--arch/um/kernel/skas/mmu.c4
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/e820.c2
-rw-r--r--arch/x86/kernel/microcode_core.c4
-rw-r--r--arch/x86/mm/init_32.c6
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/mm/kmemcheck/kmemcheck.c3
-rw-r--r--arch/xtensa/include/asm/mman.h5
-rw-r--r--arch/xtensa/mm/init.c2
-rw-r--r--drivers/ata/pata_hpt37x.c2
-rw-r--r--drivers/base/node.c5
-rw-r--r--drivers/block/DAC960.c10
-rw-r--r--drivers/block/amiflop.c2
-rw-r--r--drivers/block/aoe/aoeblk.c2
-rw-r--r--drivers/block/ataflop.c2
-rw-r--r--drivers/block/brd.c2
-rw-r--r--drivers/block/cciss.c2
-rw-r--r--drivers/block/cpqarray.c2
-rw-r--r--drivers/block/floppy.c2
-rw-r--r--drivers/block/hd.c2
-rw-r--r--drivers/block/loop.c2
-rw-r--r--drivers/block/mg_disk.c2
-rw-r--r--drivers/block/nbd.c2
-rw-r--r--drivers/block/osdblk.c2
-rw-r--r--drivers/block/paride/pcd.c2
-rw-r--r--drivers/block/paride/pd.c2
-rw-r--r--drivers/block/paride/pf.c2
-rw-r--r--drivers/block/pktcdvd.c2
-rw-r--r--drivers/block/ps3disk.c2
-rw-r--r--drivers/block/ps3vram.c2
-rw-r--r--drivers/block/sunvdc.c2
-rw-r--r--drivers/block/swim.c2
-rw-r--r--drivers/block/swim3.c4
-rw-r--r--drivers/block/sx8.c2
-rw-r--r--drivers/block/ub.c2
-rw-r--r--drivers/block/umem.c3
-rw-r--r--drivers/block/viodasd.c2
-rw-r--r--drivers/block/virtio_blk.c2
-rw-r--r--drivers/block/xd.c2
-rw-r--r--drivers/block/xen-blkfront.c4
-rw-r--r--drivers/block/xsysace.c2
-rw-r--r--drivers/block/z2ram.c3
-rw-r--r--drivers/cdrom/gdrom.c2
-rw-r--r--drivers/cdrom/viocd.c2
-rw-r--r--drivers/char/agp/backend.c4
-rw-r--r--drivers/char/agp/uninorth-agp.c2
-rw-r--r--drivers/char/epca.c2
-rw-r--r--drivers/char/ipmi/ipmi_poweroff.c4
-rw-r--r--drivers/char/pcmcia/cm4000_cs.c2
-rw-r--r--drivers/cpuidle/governors/menu.c271
-rw-r--r--drivers/edac/edac_core.h2
-rw-r--r--drivers/firmware/memmap.c2
-rw-r--r--drivers/gpio/bt8xxgpio.c4
-rw-r--r--drivers/gpu/drm/i915/intel_dp.c2
-rw-r--r--drivers/gpu/drm/mga/mga_state.c4
-rw-r--r--drivers/hid/Kconfig7
-rw-r--r--drivers/hid/hid-core.c14
-rw-r--r--drivers/hid/usbhid/hid-core.c16
-rw-r--r--drivers/hwmon/adm1021.c79
-rw-r--r--drivers/hwmon/applesmc.c38
-rw-r--r--drivers/hwmon/coretemp.c4
-rw-r--r--drivers/hwmon/lis3lv02d.c9
-rw-r--r--drivers/hwmon/lis3lv02d.h24
-rw-r--r--drivers/hwmon/lis3lv02d_spi.c45
-rw-r--r--drivers/hwmon/sht15.c6
-rw-r--r--drivers/ide/ide-cd.c2
-rw-r--r--drivers/ide/ide-gd.c2
-rw-r--r--drivers/ide/ide-probe.c2
-rw-r--r--drivers/ide/ide-tape.c2
-rw-r--r--drivers/ide/umc8672.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6110.c2
-rw-r--r--drivers/input/keyboard/atkbd.c2
-rw-r--r--drivers/input/misc/Kconfig16
-rw-r--r--drivers/input/misc/Makefile1
-rw-r--r--drivers/input/misc/winbond-cir.c1614
-rw-r--r--drivers/isdn/capi/capifs.c2
-rw-r--r--drivers/isdn/capi/capiutil.c2
-rw-r--r--drivers/isdn/i4l/isdn_common.c4
-rw-r--r--drivers/lguest/page_tables.c2
-rw-r--r--drivers/macintosh/rack-meter.c2
-rw-r--r--drivers/md/dm.c4
-rw-r--r--drivers/md/md.c4
-rw-r--r--drivers/md/md.h2
-rw-r--r--drivers/md/multipath.c3
-rw-r--r--drivers/media/dvb/pt1/pt1.c1
-rw-r--r--drivers/media/dvb/siano/smscoreapi.c2
-rw-r--r--drivers/media/dvb/siano/smscoreapi.h4
-rw-r--r--drivers/media/radio/radio-mr800.c2
-rw-r--r--drivers/media/video/cx88/cx88-blackbird.c4
-rw-r--r--drivers/media/video/cx88/cx88-dvb.c4
-rw-r--r--drivers/media/video/cx88/cx88-mpeg.c4
-rw-r--r--drivers/media/video/cx88/cx88-video.c4
-rw-r--r--drivers/media/video/gspca/m5602/m5602_core.c2
-rw-r--r--drivers/memstick/core/mspro_block.c2
-rw-r--r--drivers/message/fusion/mptbase.c4
-rw-r--r--drivers/message/i2o/i2o_block.c2
-rw-r--r--drivers/mfd/ab3100-core.c2
-rw-r--r--drivers/misc/ibmasm/ibmasmfs.c2
-rw-r--r--drivers/mmc/card/block.c2
-rw-r--r--drivers/mmc/host/mxcmmc.c2
-rw-r--r--drivers/mtd/devices/m25p80.c4
-rw-r--r--drivers/mtd/devices/slram.c2
-rw-r--r--drivers/mtd/ftl.c2
-rw-r--r--drivers/mtd/maps/ixp2000.c2
-rw-r--r--drivers/mtd/mtd_blkdevs.c2
-rw-r--r--drivers/mtd/nand/cafe_nand.c4
-rw-r--r--drivers/mtd/nand/cmx270_nand.c4
-rw-r--r--drivers/mtd/ubi/eba.c2
-rw-r--r--drivers/mtd/ubi/ubi.h2
-rw-r--r--drivers/net/arcnet/arc-rawmode.c1
-rw-r--r--drivers/net/arcnet/capmode.c1
-rw-r--r--drivers/net/bnx2x_reg.h2
-rw-r--r--drivers/net/bonding/bond_3ad.c2
-rw-r--r--drivers/net/e1000/e1000_hw.c2
-rw-r--r--drivers/net/gianfar_ethtool.c2
-rw-r--r--drivers/net/ibm_newemac/core.c8
-rw-r--r--drivers/net/igb/igb_main.c2
-rw-r--r--drivers/net/ll_temac_main.c2
-rw-r--r--drivers/net/macb.c2
-rw-r--r--drivers/net/ni52.c4
-rw-r--r--drivers/net/qlge/qlge_main.c4
-rw-r--r--drivers/net/rionet.c2
-rw-r--r--drivers/net/skfp/pcmplc.c2
-rw-r--r--drivers/net/skfp/pmf.c8
-rw-r--r--drivers/net/skge.c2
-rw-r--r--drivers/net/sky2.c2
-rw-r--r--drivers/net/vxge/vxge-config.h2
-rw-r--r--drivers/net/vxge/vxge-main.c2
-rw-r--r--drivers/net/wireless/ath/ath5k/reg.h2
-rw-r--r--drivers/net/wireless/atmel.c2
-rw-r--r--drivers/net/wireless/zd1211rw/zd_chip.c2
-rw-r--r--drivers/oprofile/oprofilefs.c2
-rw-r--r--drivers/parisc/ccio-dma.c4
-rw-r--r--drivers/parisc/sba_iommu.c4
-rw-r--r--drivers/pcmcia/pcmcia_ioctl.c36
-rw-r--r--drivers/pcmcia/sa1100_jornada720.c156
-rw-r--r--drivers/pcmcia/yenta_socket.c2
-rw-r--r--drivers/pnp/driver.c10
-rw-r--r--drivers/rtc/rtc-omap.c2
-rw-r--r--drivers/s390/block/dasd.c2
-rw-r--r--drivers/s390/block/dasd_eckd.c2
-rw-r--r--drivers/s390/block/dasd_int.h2
-rw-r--r--drivers/s390/block/dcssblk.c2
-rw-r--r--drivers/s390/block/xpram.c2
-rw-r--r--drivers/s390/char/tape_block.c2
-rw-r--r--drivers/s390/net/netiucv.c2
-rw-r--r--drivers/s390/scsi/zfcp_scsi.c2
-rw-r--r--drivers/sbus/char/jsflash.c2
-rw-r--r--drivers/scsi/aic7xxx/aic7xxx_core.c2
-rw-r--r--drivers/scsi/bnx2i/bnx2i_hwi.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_ct.c2
-rw-r--r--drivers/scsi/megaraid/megaraid_sas.c2
-rw-r--r--drivers/scsi/qla4xxx/ql4_os.c4
-rw-r--r--drivers/scsi/sd.c2
-rw-r--r--drivers/scsi/sr.c2
-rw-r--r--drivers/spi/omap_uwire.c2
-rw-r--r--drivers/spi/spi_s3c24xx.c2
-rw-r--r--drivers/staging/rt2860/rtmp.h2
-rw-r--r--drivers/usb/class/cdc-wdm.c2
-rw-r--r--drivers/usb/core/inode.c3
-rw-r--r--drivers/usb/gadget/inode.c2
-rw-r--r--drivers/usb/host/ehci-pci.c2
-rw-r--r--drivers/usb/host/ehci.h2
-rw-r--r--drivers/usb/host/ohci-q.c2
-rw-r--r--drivers/usb/host/xhci.h2
-rw-r--r--drivers/usb/serial/cypress_m8.h2
-rw-r--r--drivers/usb/serial/io_edgeport.c2
-rw-r--r--drivers/usb/serial/kl5kusb105.c2
-rw-r--r--drivers/usb/serial/spcp8x5.c2
-rw-r--r--drivers/usb/wusbcore/wa-hc.h2
-rw-r--r--drivers/uwb/i1480/i1480u-wlp/netdev.c2
-rw-r--r--drivers/video/cfbcopyarea.c2
-rw-r--r--drivers/video/imxfb.c2
-rw-r--r--drivers/video/omap/lcd_h3.c4
-rw-r--r--drivers/video/omap/lcd_h4.c4
-rw-r--r--drivers/video/omap/lcd_inn1510.c4
-rw-r--r--drivers/video/omap/lcd_inn1610.c4
-rw-r--r--drivers/video/omap/lcd_osk.c4
-rw-r--r--drivers/video/omap/lcd_palmte.c4
-rw-r--r--drivers/video/omap/lcd_palmtt.c4
-rw-r--r--drivers/video/omap/lcd_palmz71.c4
-rw-r--r--drivers/video/s3c2410fb.c2
-rw-r--r--drivers/xen/balloon.c6
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/afs/flock.c2
-rw-r--r--fs/aio.c47
-rw-r--r--fs/autofs/dirhash.c2
-rw-r--r--fs/befs/linuxvfs.c2
-rw-r--r--fs/binfmt_elf.c44
-rw-r--r--fs/binfmt_elf_fdpic.c56
-rw-r--r--fs/block_dev.c2
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/inode.c28
-rw-r--r--fs/btrfs/super.c4
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/cifs/cifs_dfs_ref.c4
-rw-r--r--fs/cifs/cifsfs.c4
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h2
-rw-r--r--fs/ecryptfs/mmap.c2
-rw-r--r--fs/ext2/xip.c2
-rw-r--r--fs/ext3/super.c4
-rw-r--r--fs/ext4/inode.c2
-rw-r--r--fs/ext4/super.c4
-rw-r--r--fs/gfs2/rgrp.c2
-rw-r--r--fs/hugetlbfs/inode.c21
-rw-r--r--fs/inode.c16
-rw-r--r--fs/jffs2/super.c2
-rw-r--r--fs/lockd/clntlock.c2
-rw-r--r--fs/lockd/clntproc.c2
-rw-r--r--fs/lockd/host.c4
-rw-r--r--fs/lockd/mon.c2
-rw-r--r--fs/lockd/svclock.c2
-rw-r--r--fs/lockd/svcsubs.c2
-rw-r--r--fs/locks.c2
-rw-r--r--fs/nfs/callback_xdr.c2
-rw-r--r--fs/nfs/nfs4state.c2
-rw-r--r--fs/nfsd/export.c2
-rw-r--r--fs/nfsd/nfs3xdr.c75
-rw-r--r--fs/nfsd/nfs4acl.c4
-rw-r--r--fs/nfsd/nfs4callback.c263
-rw-r--r--fs/nfsd/nfs4proc.c89
-rw-r--r--fs/nfsd/nfs4state.c685
-rw-r--r--fs/nfsd/nfs4xdr.c42
-rw-r--r--fs/nfsd/nfsctl.c8
-rw-r--r--fs/nfsd/nfsfh.c158
-rw-r--r--fs/nfsd/nfssvc.c54
-rw-r--r--fs/nfsd/vfs.c9
-rw-r--r--fs/nilfs2/btnode.c2
-rw-r--r--fs/nilfs2/file.c2
-rw-r--r--fs/nilfs2/gcinode.c2
-rw-r--r--fs/nilfs2/inode.c2
-rw-r--r--fs/nilfs2/mdt.c4
-rw-r--r--fs/nilfs2/namei.c6
-rw-r--r--fs/nilfs2/nilfs.h10
-rw-r--r--fs/nilfs2/super.c4
-rw-r--r--fs/ntfs/layout.h2
-rw-r--r--fs/ntfs/malloc.h2
-rw-r--r--fs/ocfs2/quota.h2
-rw-r--r--fs/ocfs2/quota_global.c4
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--fs/omfs/dir.c2
-rw-r--r--fs/omfs/file.c4
-rw-r--r--fs/omfs/inode.c2
-rw-r--r--fs/omfs/omfs.h6
-rw-r--r--fs/partitions/check.c2
-rw-r--r--fs/proc/base.c44
-rw-r--r--fs/proc/kcore.c35
-rw-r--r--fs/proc/meminfo.c4
-rw-r--r--fs/proc/page.c5
-rw-r--r--fs/proc/task_mmu.c28
-rw-r--r--fs/quota/dquot.c4
-rw-r--r--fs/reiserfs/super.c4
-rw-r--r--fs/romfs/super.c2
-rw-r--r--fs/squashfs/super.c4
-rw-r--r--fs/super.c2
-rw-r--r--fs/ubifs/xattr.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_quotaops.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h2
-rw-r--r--fs/xfs/xfs_fs.h2
-rw-r--r--include/acpi/actypes.h2
-rw-r--r--include/acpi/platform/acgcc.h2
-rw-r--r--include/asm-generic/mman-common.h3
-rw-r--r--include/asm-generic/mman.h1
-rw-r--r--include/linux/bootmem.h5
-rw-r--r--include/linux/capability.h2
-rw-r--r--include/linux/flex_array.h32
-rw-r--r--include/linux/fs.h8
-rw-r--r--include/linux/genhd.h2
-rw-r--r--include/linux/gfp.h13
-rw-r--r--include/linux/hid.h3
-rw-r--r--include/linux/hugetlb.h26
-rw-r--r--include/linux/kmemcheck.h5
-rw-r--r--include/linux/ksm.h79
-rw-r--r--include/linux/lis3lv02d.h11
-rw-r--r--include/linux/lockd/lockd.h45
-rw-r--r--include/linux/mISDNif.h2
-rw-r--r--include/linux/mempool.h10
-rw-r--r--include/linux/mm.h19
-rw-r--r--include/linux/mm_inline.h31
-rw-r--r--include/linux/mmu_context.h9
-rw-r--r--include/linux/mmu_notifier.h34
-rw-r--r--include/linux/mmzone.h17
-rw-r--r--include/linux/namei.h2
-rw-r--r--include/linux/nfs4.h2
-rw-r--r--include/linux/nfsd/nfsd.h7
-rw-r--r--include/linux/nfsd/state.h77
-rw-r--r--include/linux/nfsd/xdr4.h19
-rw-r--r--include/linux/oom.h11
-rw-r--r--include/linux/page-flags.h25
-rw-r--r--include/linux/pnp.h1
-rw-r--r--include/linux/poison.h3
-rw-r--r--include/linux/quotaops.h4
-rw-r--r--include/linux/rmap.h6
-rw-r--r--include/linux/sched.h16
-rw-r--r--include/linux/sunrpc/auth.h4
-rw-r--r--include/linux/sunrpc/clnt.h114
-rw-r--r--include/linux/sunrpc/svc.h2
-rw-r--r--include/linux/sunrpc/svc_xprt.h1
-rw-r--r--include/linux/sunrpc/svcsock.h1
-rw-r--r--include/linux/sunrpc/xprt.h19
-rw-r--r--include/linux/sunrpc/xprtrdma.h5
-rw-r--r--include/linux/sunrpc/xprtsock.h11
-rw-r--r--include/linux/swap.h20
-rw-r--r--include/linux/usb.h2
-rw-r--r--include/linux/vmstat.h16
-rw-r--r--include/linux/workqueue.h2
-rw-r--r--include/rdma/ib_cm.h2
-rw-r--r--include/scsi/fc/fc_fc2.h3
-rw-r--r--include/trace/events/kmem.h163
-rw-r--r--init/main.c4
-rw-r--r--ipc/mqueue.c4
-rw-r--r--ipc/shm.c2
-rw-r--r--kernel/cgroup.c6
-rw-r--r--kernel/fork.c21
-rw-r--r--kernel/module.c5
-rw-r--r--kernel/panic.c2
-rw-r--r--kernel/pid.c15
-rw-r--r--kernel/power/process.c1
-rw-r--r--kernel/power/snapshot.c2
-rw-r--r--kernel/sched.c13
-rw-r--r--kernel/trace/Kconfig2
-rw-r--r--kernel/trace/trace_hw_branches.c2
-rw-r--r--kernel/tracepoint.c2
-rw-r--r--lib/Kconfig.kmemcheck3
-rw-r--r--lib/flex_array.c121
-rw-r--r--lib/vsprintf.c14
-rw-r--r--lib/zlib_deflate/deflate.c4
-rw-r--r--mm/Kconfig12
-rw-r--r--mm/Kconfig.debug12
-rw-r--r--mm/Makefile3
-rw-r--r--mm/filemap.c4
-rw-r--r--mm/hugetlb.c251
-rw-r--r--mm/internal.h10
-rw-r--r--mm/ksm.c1703
-rw-r--r--mm/madvise.c53
-rw-r--r--mm/memcontrol.c2
-rw-r--r--mm/memory.c212
-rw-r--r--mm/memory_hotplug.c7
-rw-r--r--mm/mempool.c7
-rw-r--r--mm/migrate.c24
-rw-r--r--mm/mlock.c128
-rw-r--r--mm/mmap.c51
-rw-r--r--mm/mmu_context.c58
-rw-r--r--mm/mmu_notifier.c20
-rw-r--r--mm/mremap.c14
-rw-r--r--mm/nommu.c45
-rw-r--r--mm/oom_kill.c86
-rw-r--r--mm/page-writeback.c5
-rw-r--r--mm/page_alloc.c284
-rw-r--r--mm/page_cgroup.c12
-rw-r--r--mm/rmap.c78
-rw-r--r--mm/shmem.c15
-rw-r--r--mm/slab.c2
-rw-r--r--mm/slub.c3
-rw-r--r--mm/sparse-vmemmap.c8
-rw-r--r--mm/sparse.c9
-rw-r--r--mm/swap.c8
-rw-r--r--mm/swap_state.c143
-rw-r--r--mm/swapfile.c4
-rw-r--r--mm/vmalloc.c221
-rw-r--r--mm/vmscan.c213
-rw-r--r--mm/vmstat.c5
-rw-r--r--net/bluetooth/hidp/core.c7
-rw-r--r--net/core/sock.c4
-rw-r--r--net/dccp/proto.c6
-rw-r--r--net/decnet/dn_route.c2
-rw-r--r--net/ipv4/route.c2
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/netfilter/nf_conntrack_core.c4
-rw-r--r--net/netfilter/x_tables.c2
-rw-r--r--net/netfilter/xt_hashlimit.c8
-rw-r--r--net/netlink/af_netlink.c6
-rw-r--r--net/rxrpc/ar-call.c2
-rw-r--r--net/sched/sch_hfsc.c2
-rw-r--r--net/sctp/protocol.c6
-rw-r--r--net/socket.c2
-rw-r--r--net/sunrpc/auth.c20
-rw-r--r--net/sunrpc/auth_generic.c4
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c6
-rw-r--r--net/sunrpc/cache.c109
-rw-r--r--net/sunrpc/clnt.c1
-rw-r--r--net/sunrpc/rpc_pipe.c2
-rw-r--r--net/sunrpc/sched.c7
-rw-r--r--net/sunrpc/sunrpc.h14
-rw-r--r--net/sunrpc/svc_xprt.c25
-rw-r--r--net/sunrpc/svcauth_unix.c1
-rw-r--r--net/sunrpc/svcsock.c335
-rw-r--r--net/sunrpc/xprt.c15
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c4
-rw-r--r--net/sunrpc/xprtsock.c242
-rw-r--r--net/wireless/wext-compat.c2
-rw-r--r--scripts/basic/fixdep.c4
-rwxr-xr-xscripts/checkpatch.pl106
-rwxr-xr-xscripts/get_maintainer.pl421
-rw-r--r--scripts/mod/sumversion.c2
-rw-r--r--sound/oss/swarm_cs4297a.c3
-rw-r--r--sound/oss/sys_timer.c3
-rw-r--r--sound/soc/codecs/wm9081.c2
-rw-r--r--sound/soc/pxa/pxa-ssp.c2
-rw-r--r--sound/soc/s3c24xx/s3c24xx_uda134x.c2
508 files changed, 9801 insertions, 3351 deletions
diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl
index 8e145857fc9..df0d089d0fb 100644
--- a/Documentation/DocBook/mtdnand.tmpl
+++ b/Documentation/DocBook/mtdnand.tmpl
@@ -568,7 +568,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip)
568 <para> 568 <para>
569 The blocks in which the tables are stored are procteted against 569 The blocks in which the tables are stored are procteted against
570 accidental access by marking them bad in the memory bad block 570 accidental access by marking them bad in the memory bad block
571 table. The bad block table managment functions are allowed 571 table. The bad block table management functions are allowed
572 to circumvernt this protection. 572 to circumvernt this protection.
573 </para> 573 </para>
574 <para> 574 <para>
diff --git a/Documentation/DocBook/scsi.tmpl b/Documentation/DocBook/scsi.tmpl
index 10a150ae2a7..d87f4569e76 100644
--- a/Documentation/DocBook/scsi.tmpl
+++ b/Documentation/DocBook/scsi.tmpl
@@ -317,7 +317,7 @@
317 <para> 317 <para>
318 The SAS transport class contains common code to deal with SAS HBAs, 318 The SAS transport class contains common code to deal with SAS HBAs,
319 an aproximated representation of SAS topologies in the driver model, 319 an aproximated representation of SAS topologies in the driver model,
320 and various sysfs attributes to expose these topologies and managment 320 and various sysfs attributes to expose these topologies and management
321 interfaces to userspace. 321 interfaces to userspace.
322 </para> 322 </para>
323 <para> 323 <para>
diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches
index 5c555a8b39e..b7f9d3b4bbf 100644
--- a/Documentation/SubmittingPatches
+++ b/Documentation/SubmittingPatches
@@ -183,7 +183,7 @@ the MAN-PAGES maintainer (as listed in the MAINTAINERS file)
183a man-pages patch, or at least a notification of the change, 183a man-pages patch, or at least a notification of the change,
184so that some information makes its way into the manual pages. 184so that some information makes its way into the manual pages.
185 185
186Even if the maintainer did not respond in step #4, make sure to ALWAYS 186Even if the maintainer did not respond in step #5, make sure to ALWAYS
187copy the maintainer when you change their code. 187copy the maintainer when you change their code.
188 188
189For small patches you may want to CC the Trivial Patch Monkey 189For small patches you may want to CC the Trivial Patch Monkey
diff --git a/Documentation/filesystems/nfs41-server.txt b/Documentation/filesystems/nfs41-server.txt
index 05d81cbcb2e..5920fe26e6f 100644
--- a/Documentation/filesystems/nfs41-server.txt
+++ b/Documentation/filesystems/nfs41-server.txt
@@ -11,6 +11,11 @@ the /proc/fs/nfsd/versions control file. Note that to write this
11control file, the nfsd service must be taken down. Use your user-mode 11control file, the nfsd service must be taken down. Use your user-mode
12nfs-utils to set this up; see rpc.nfsd(8) 12nfs-utils to set this up; see rpc.nfsd(8)
13 13
14(Warning: older servers will interpret "+4.1" and "-4.1" as "+4" and
15"-4", respectively. Therefore, code meant to work on both new and old
16kernels must turn 4.1 on or off *before* turning support for version 4
17on or off; rpc.nfsd does this correctly.)
18
14The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based 19The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based
15on the latest NFSv4.1 Internet Draft: 20on the latest NFSv4.1 Internet Draft:
16http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-29 21http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-29
@@ -25,6 +30,49 @@ are still under development out of tree.
25See http://wiki.linux-nfs.org/wiki/index.php/PNFS_prototype_design 30See http://wiki.linux-nfs.org/wiki/index.php/PNFS_prototype_design
26for more information. 31for more information.
27 32
33The current implementation is intended for developers only: while it
34does support ordinary file operations on clients we have tested against
35(including the linux client), it is incomplete in ways which may limit
36features unexpectedly, cause known bugs in rare cases, or cause
37interoperability problems with future clients. Known issues:
38
39 - gss support is questionable: currently mounts with kerberos
40 from a linux client are possible, but we aren't really
41 conformant with the spec (for example, we don't use kerberos
42 on the backchannel correctly).
43 - no trunking support: no clients currently take advantage of
44 trunking, but this is a mandatory failure, and its use is
45 recommended to clients in a number of places. (E.g. to ensure
46 timely renewal in case an existing connection's retry timeouts
47 have gotten too long; see section 8.3 of the draft.)
48 Therefore, lack of this feature may cause future clients to
49 fail.
50 - Incomplete backchannel support: incomplete backchannel gss
51 support and no support for BACKCHANNEL_CTL mean that
52 callbacks (hence delegations and layouts) may not be
53 available and clients confused by the incomplete
54 implementation may fail.
55 - Server reboot recovery is unsupported; if the server reboots,
56 clients may fail.
57 - We do not support SSV, which provides security for shared
58 client-server state (thus preventing unauthorized tampering
59 with locks and opens, for example). It is mandatory for
60 servers to support this, though no clients use it yet.
61 - Mandatory operations which we do not support, such as
62 DESTROY_CLIENTID, FREE_STATEID, SECINFO_NO_NAME, and
63 TEST_STATEID, are not currently used by clients, but will be
64 (and the spec recommends their uses in common cases), and
65 clients should not be expected to know how to recover from the
66 case where they are not supported. This will eventually cause
67 interoperability failures.
68
69In addition, some limitations are inherited from the current NFSv4
70implementation:
71
72 - Incomplete delegation enforcement: if a file is renamed or
73 unlinked, a client holding a delegation may continue to
74 indefinitely allow opens of the file under the old name.
75
28The table below, taken from the NFSv4.1 document, lists 76The table below, taken from the NFSv4.1 document, lists
29the operations that are mandatory to implement (REQ), optional 77the operations that are mandatory to implement (REQ), optional
30(OPT), and NFSv4.0 operations that are required not to implement (MNI) 78(OPT), and NFSv4.0 operations that are required not to implement (MNI)
@@ -142,6 +190,12 @@ NS*| CB_WANTS_CANCELLED | OPT | FDELG, | Section 20.10 |
142 190
143Implementation notes: 191Implementation notes:
144 192
193DELEGPURGE:
194* mandatory only for servers that support CLAIM_DELEGATE_PREV and/or
195 CLAIM_DELEG_PREV_FH (which allows clients to keep delegations that
196 persist across client reboots). Thus we need not implement this for
197 now.
198
145EXCHANGE_ID: 199EXCHANGE_ID:
146* only SP4_NONE state protection supported 200* only SP4_NONE state protection supported
147* implementation ids are ignored 201* implementation ids are ignored
diff --git a/Documentation/filesystems/nfsroot.txt b/Documentation/filesystems/nfsroot.txt
index 68baddf3c3e..3ba0b945aaf 100644
--- a/Documentation/filesystems/nfsroot.txt
+++ b/Documentation/filesystems/nfsroot.txt
@@ -105,7 +105,7 @@ ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>
105 the client address and this parameter is NOT empty only 105 the client address and this parameter is NOT empty only
106 replies from the specified server are accepted. 106 replies from the specified server are accepted.
107 107
108 Only required for for NFS root. That is autoconfiguration 108 Only required for NFS root. That is autoconfiguration
109 will not be triggered if it is missing and NFS root is not 109 will not be triggered if it is missing and NFS root is not
110 in operation. 110 in operation.
111 111
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index ffead13f944..75988ba26a5 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -375,6 +375,19 @@ of memory currently marked as referenced or accessed.
375This file is only present if the CONFIG_MMU kernel configuration option is 375This file is only present if the CONFIG_MMU kernel configuration option is
376enabled. 376enabled.
377 377
378The /proc/PID/clear_refs is used to reset the PG_Referenced and ACCESSED/YOUNG
379bits on both physical and virtual pages associated with a process.
380To clear the bits for all the pages associated with the process
381 > echo 1 > /proc/PID/clear_refs
382
383To clear the bits for the anonymous pages associated with the process
384 > echo 2 > /proc/PID/clear_refs
385
386To clear the bits for the file mapped pages associated with the process
387 > echo 3 > /proc/PID/clear_refs
388Any other value written to /proc/PID/clear_refs will have no effect.
389
390
3781.2 Kernel data 3911.2 Kernel data
379--------------- 392---------------
380 393
@@ -1032,9 +1045,9 @@ Various pieces of information about kernel activity are available in the
1032since the system first booted. For a quick look, simply cat the file: 1045since the system first booted. For a quick look, simply cat the file:
1033 1046
1034 > cat /proc/stat 1047 > cat /proc/stat
1035 cpu 2255 34 2290 22625563 6290 127 456 0 1048 cpu 2255 34 2290 22625563 6290 127 456 0 0
1036 cpu0 1132 34 1441 11311718 3675 127 438 0 1049 cpu0 1132 34 1441 11311718 3675 127 438 0 0
1037 cpu1 1123 0 849 11313845 2614 0 18 0 1050 cpu1 1123 0 849 11313845 2614 0 18 0 0
1038 intr 114930548 113199788 3 0 5 263 0 4 [... lots more numbers ...] 1051 intr 114930548 113199788 3 0 5 263 0 4 [... lots more numbers ...]
1039 ctxt 1990473 1052 ctxt 1990473
1040 btime 1062191376 1053 btime 1062191376
@@ -1056,6 +1069,7 @@ second). The meanings of the columns are as follows, from left to right:
1056- irq: servicing interrupts 1069- irq: servicing interrupts
1057- softirq: servicing softirqs 1070- softirq: servicing softirqs
1058- steal: involuntary wait 1071- steal: involuntary wait
1072- guest: running a guest
1059 1073
1060The "intr" line gives counts of interrupts serviced since boot time, for each 1074The "intr" line gives counts of interrupts serviced since boot time, for each
1061of the possible system interrupts. The first column is the total of all 1075of the possible system interrupts. The first column is the total of all
@@ -1191,7 +1205,7 @@ The following heuristics are then applied:
1191 * if the task was reniced, its score doubles 1205 * if the task was reniced, its score doubles
1192 * superuser or direct hardware access tasks (CAP_SYS_ADMIN, CAP_SYS_RESOURCE 1206 * superuser or direct hardware access tasks (CAP_SYS_ADMIN, CAP_SYS_RESOURCE
1193 or CAP_SYS_RAWIO) have their score divided by 4 1207 or CAP_SYS_RAWIO) have their score divided by 4
1194 * if oom condition happened in one cpuset and checked task does not belong 1208 * if oom condition happened in one cpuset and checked process does not belong
1195 to it, its score is divided by 8 1209 to it, its score is divided by 8
1196 * the resulting score is multiplied by two to the power of oom_adj, i.e. 1210 * the resulting score is multiplied by two to the power of oom_adj, i.e.
1197 points <<= oom_adj when it is positive and 1211 points <<= oom_adj when it is positive and
diff --git a/Documentation/gcov.txt b/Documentation/gcov.txt
index 40ec6335276..e7ca6478cd9 100644
--- a/Documentation/gcov.txt
+++ b/Documentation/gcov.txt
@@ -47,7 +47,7 @@ Possible uses:
47 47
48Configure the kernel with: 48Configure the kernel with:
49 49
50 CONFIG_DEBUGFS=y 50 CONFIG_DEBUG_FS=y
51 CONFIG_GCOV_KERNEL=y 51 CONFIG_GCOV_KERNEL=y
52 52
53and to get coverage data for the entire kernel: 53and to get coverage data for the entire kernel:
diff --git a/Documentation/hwmon/hpfall.c b/Documentation/hwmon/hpfall.c
index bbea1ccfd46..681ec22b9d0 100644
--- a/Documentation/hwmon/hpfall.c
+++ b/Documentation/hwmon/hpfall.c
@@ -16,6 +16,34 @@
16#include <stdint.h> 16#include <stdint.h>
17#include <errno.h> 17#include <errno.h>
18#include <signal.h> 18#include <signal.h>
19#include <sys/mman.h>
20#include <sched.h>
21
22char unload_heads_path[64];
23
24int set_unload_heads_path(char *device)
25{
26 char devname[64];
27
28 if (strlen(device) <= 5 || strncmp(device, "/dev/", 5) != 0)
29 return -EINVAL;
30 strncpy(devname, device + 5, sizeof(devname));
31
32 snprintf(unload_heads_path, sizeof(unload_heads_path),
33 "/sys/block/%s/device/unload_heads", devname);
34 return 0;
35}
36int valid_disk(void)
37{
38 int fd = open(unload_heads_path, O_RDONLY);
39 if (fd < 0) {
40 perror(unload_heads_path);
41 return 0;
42 }
43
44 close(fd);
45 return 1;
46}
19 47
20void write_int(char *path, int i) 48void write_int(char *path, int i)
21{ 49{
@@ -40,7 +68,7 @@ void set_led(int on)
40 68
41void protect(int seconds) 69void protect(int seconds)
42{ 70{
43 write_int("/sys/block/sda/device/unload_heads", seconds*1000); 71 write_int(unload_heads_path, seconds*1000);
44} 72}
45 73
46int on_ac(void) 74int on_ac(void)
@@ -57,45 +85,62 @@ void ignore_me(void)
57{ 85{
58 protect(0); 86 protect(0);
59 set_led(0); 87 set_led(0);
60
61} 88}
62 89
63int main(int argc, char* argv[]) 90int main(int argc, char **argv)
64{ 91{
65 int fd, ret; 92 int fd, ret;
93 struct sched_param param;
94
95 if (argc == 1)
96 ret = set_unload_heads_path("/dev/sda");
97 else if (argc == 2)
98 ret = set_unload_heads_path(argv[1]);
99 else
100 ret = -EINVAL;
101
102 if (ret || !valid_disk()) {
103 fprintf(stderr, "usage: %s <device> (default: /dev/sda)\n",
104 argv[0]);
105 exit(1);
106 }
107
108 fd = open("/dev/freefall", O_RDONLY);
109 if (fd < 0) {
110 perror("/dev/freefall");
111 return EXIT_FAILURE;
112 }
66 113
67 fd = open("/dev/freefall", O_RDONLY); 114 daemon(0, 0);
68 if (fd < 0) { 115 param.sched_priority = sched_get_priority_max(SCHED_FIFO);
69 perror("open"); 116 sched_setscheduler(0, SCHED_FIFO, &param);
70 return EXIT_FAILURE; 117 mlockall(MCL_CURRENT|MCL_FUTURE);
71 }
72 118
73 signal(SIGALRM, ignore_me); 119 signal(SIGALRM, ignore_me);
74 120
75 for (;;) { 121 for (;;) {
76 unsigned char count; 122 unsigned char count;
77 123
78 ret = read(fd, &count, sizeof(count)); 124 ret = read(fd, &count, sizeof(count));
79 alarm(0); 125 alarm(0);
80 if ((ret == -1) && (errno == EINTR)) { 126 if ((ret == -1) && (errno == EINTR)) {
81 /* Alarm expired, time to unpark the heads */ 127 /* Alarm expired, time to unpark the heads */
82 continue; 128 continue;
83 } 129 }
84 130
85 if (ret != sizeof(count)) { 131 if (ret != sizeof(count)) {
86 perror("read"); 132 perror("read");
87 break; 133 break;
88 } 134 }
89 135
90 protect(21); 136 protect(21);
91 set_led(1); 137 set_led(1);
92 if (1 || on_ac() || lid_open()) { 138 if (1 || on_ac() || lid_open())
93 alarm(2); 139 alarm(2);
94 } else { 140 else
95 alarm(20); 141 alarm(20);
96 } 142 }
97 } 143
98 144 close(fd);
99 close(fd); 145 return EXIT_SUCCESS;
100 return EXIT_SUCCESS;
101} 146}
diff --git a/Documentation/hwmon/pc87427 b/Documentation/hwmon/pc87427
index d1ebbe510f3..db5cc1227a8 100644
--- a/Documentation/hwmon/pc87427
+++ b/Documentation/hwmon/pc87427
@@ -34,5 +34,5 @@ Fan rotation speeds are reported as 14-bit values from a gated clock
34signal. Speeds down to 83 RPM can be measured. 34signal. Speeds down to 83 RPM can be measured.
35 35
36An alarm is triggered if the rotation speed drops below a programmable 36An alarm is triggered if the rotation speed drops below a programmable
37limit. Another alarm is triggered if the speed is too low to to be measured 37limit. Another alarm is triggered if the speed is too low to be measured
38(including stalled or missing fan). 38(including stalled or missing fan).
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 0f17d16dc10..c363840cdce 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -933,7 +933,7 @@ and is between 256 and 4096 characters. It is defined in the file
933 1 -- enable informational integrity auditing messages. 933 1 -- enable informational integrity auditing messages.
934 934
935 ima_hash= [IMA] 935 ima_hash= [IMA]
936 Formt: { "sha1" | "md5" } 936 Format: { "sha1" | "md5" }
937 default: "sha1" 937 default: "sha1"
938 938
939 ima_tcb [IMA] 939 ima_tcb [IMA]
diff --git a/Documentation/kmemcheck.txt b/Documentation/kmemcheck.txt
index 363044609da..c28f82895d6 100644
--- a/Documentation/kmemcheck.txt
+++ b/Documentation/kmemcheck.txt
@@ -43,26 +43,7 @@ feature.
431. Downloading 431. Downloading
44============== 44==============
45 45
46kmemcheck can only be downloaded using git. If you want to write patches 46As of version 2.6.31-rc1, kmemcheck is included in the mainline kernel.
47against the current code, you should use the kmemcheck development branch of
48the tip tree. It is also possible to use the linux-next tree, which also
49includes the latest version of kmemcheck.
50
51Assuming that you've already cloned the linux-2.6.git repository, all you
52have to do is add the -tip tree as a remote, like this:
53
54 $ git remote add tip git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git
55
56To actually download the tree, fetch the remote:
57
58 $ git fetch tip
59
60And to check out a new local branch with the kmemcheck code:
61
62 $ git checkout -b kmemcheck tip/kmemcheck
63
64General instructions for the -tip tree can be found here:
65http://people.redhat.com/mingo/tip.git/readme.txt
66 47
67 48
682. Configuring and compiling 492. Configuring and compiling
diff --git a/Documentation/memory.txt b/Documentation/memory.txt
index 2b3dedd3953..802efe58647 100644
--- a/Documentation/memory.txt
+++ b/Documentation/memory.txt
@@ -1,18 +1,7 @@
1There are several classic problems related to memory on Linux 1There are several classic problems related to memory on Linux
2systems. 2systems.
3 3
4 1) There are some buggy motherboards which cannot properly 4 1) There are some motherboards that will not cache above
5 deal with the memory above 16MB. Consider exchanging
6 your motherboard.
7
8 2) You cannot do DMA on the ISA bus to addresses above
9 16M. Most device drivers under Linux allow the use
10 of bounce buffers which work around this problem. Drivers
11 that don't use bounce buffers will be unstable with
12 more than 16M installed. Drivers that use bounce buffers
13 will be OK, but may have slightly higher overhead.
14
15 3) There are some motherboards that will not cache above
16 a certain quantity of memory. If you have one of these 5 a certain quantity of memory. If you have one of these
17 motherboards, your system will be SLOWER, not faster 6 motherboards, your system will be SLOWER, not faster
18 as you add more memory. Consider exchanging your 7 as you add more memory. Consider exchanging your
@@ -24,7 +13,7 @@ It can also tell Linux to use less memory than is actually installed.
24If you use "mem=" on a machine with PCI, consider using "memmap=" to avoid 13If you use "mem=" on a machine with PCI, consider using "memmap=" to avoid
25physical address space collisions. 14physical address space collisions.
26 15
27See the documentation of your boot loader (LILO, loadlin, etc.) about 16See the documentation of your boot loader (LILO, grub, loadlin, etc.) about
28how to pass options to the kernel. 17how to pass options to the kernel.
29 18
30There are other memory problems which Linux cannot deal with. Random 19There are other memory problems which Linux cannot deal with. Random
@@ -42,19 +31,3 @@ Try:
42 with the vendor. Consider testing it with memtest86 yourself. 31 with the vendor. Consider testing it with memtest86 yourself.
43 32
44 * Exchanging your CPU, cache, or motherboard for one that works. 33 * Exchanging your CPU, cache, or motherboard for one that works.
45
46 * Disabling the cache from the BIOS.
47
48 * Try passing the "mem=4M" option to the kernel to limit
49 Linux to using a very small amount of memory. Use "memmap="-option
50 together with "mem=" on systems with PCI to avoid physical address
51 space collisions.
52
53
54Other tricks:
55
56 * Try passing the "no-387" option to the kernel to ignore
57 a buggy FPU.
58
59 * Try passing the "no-hlt" option to disable the potentially
60 buggy HLT instruction in your CPU.
diff --git a/Documentation/networking/regulatory.txt b/Documentation/networking/regulatory.txt
index eaa1a25946c..ee31369e9e5 100644
--- a/Documentation/networking/regulatory.txt
+++ b/Documentation/networking/regulatory.txt
@@ -96,7 +96,7 @@ Example code - drivers hinting an alpha2:
96 96
97This example comes from the zd1211rw device driver. You can start 97This example comes from the zd1211rw device driver. You can start
98by having a mapping of your device's EEPROM country/regulatory 98by having a mapping of your device's EEPROM country/regulatory
99domain value to to a specific alpha2 as follows: 99domain value to a specific alpha2 as follows:
100 100
101static struct zd_reg_alpha2_map reg_alpha2_map[] = { 101static struct zd_reg_alpha2_map reg_alpha2_map[] = {
102 { ZD_REGDOMAIN_FCC, "US" }, 102 { ZD_REGDOMAIN_FCC, "US" },
diff --git a/Documentation/numastat.txt b/Documentation/numastat.txt
index 80133ace1eb..9fcc9a608dc 100644
--- a/Documentation/numastat.txt
+++ b/Documentation/numastat.txt
@@ -7,10 +7,10 @@ All units are pages. Hugepages have separate counters.
7 7
8numa_hit A process wanted to allocate memory from this node, 8numa_hit A process wanted to allocate memory from this node,
9 and succeeded. 9 and succeeded.
10numa_miss A process wanted to allocate memory from this node, 10numa_miss A process wanted to allocate memory from another node,
11 but ended up with memory from another. 11 but ended up with memory from this node.
12numa_foreign A process wanted to allocate on another node, 12numa_foreign A process wanted to allocate on this node,
13 but ended up with memory from this one. 13 but ended up with memory from another one.
14local_node A process ran on this node and got memory from it. 14local_node A process ran on this node and got memory from it.
15other_node A process ran on this node and got memory from another node. 15other_node A process ran on this node and got memory from another node.
16interleave_hit Interleaving wanted to allocate from this node 16interleave_hit Interleaving wanted to allocate from this node
diff --git a/Documentation/powerpc/dts-bindings/marvell.txt b/Documentation/powerpc/dts-bindings/marvell.txt
index 3708a2fd474..f1533d91953 100644
--- a/Documentation/powerpc/dts-bindings/marvell.txt
+++ b/Documentation/powerpc/dts-bindings/marvell.txt
@@ -32,7 +32,7 @@ prefixed with the string "marvell,", for Marvell Technology Group Ltd.
32 devices. This field represents the number of cells needed to 32 devices. This field represents the number of cells needed to
33 represent the address of the memory-mapped registers of devices 33 represent the address of the memory-mapped registers of devices
34 within the system controller chip. 34 within the system controller chip.
35 - #size-cells : Size representation for for the memory-mapped 35 - #size-cells : Size representation for the memory-mapped
36 registers within the system controller chip. 36 registers within the system controller chip.
37 - #interrupt-cells : Defines the width of cells used to represent 37 - #interrupt-cells : Defines the width of cells used to represent
38 interrupts. 38 interrupts.
diff --git a/Documentation/scsi/ChangeLog.megaraid b/Documentation/scsi/ChangeLog.megaraid
index eaa4801f2ce..38e9e7cadc9 100644
--- a/Documentation/scsi/ChangeLog.megaraid
+++ b/Documentation/scsi/ChangeLog.megaraid
@@ -514,7 +514,7 @@ iv. Remove yield() while mailbox handshake in synchronous commands
514 514
515v. Remove redundant __megaraid_busywait_mbox routine 515v. Remove redundant __megaraid_busywait_mbox routine
516 516
517vi. Fix bug in the managment module, which causes a system lockup when the 517vi. Fix bug in the management module, which causes a system lockup when the
518 IO module is loaded and then unloaded, followed by executing any 518 IO module is loaded and then unloaded, followed by executing any
519 management utility. The current version of management module does not 519 management utility. The current version of management module does not
520 handle the adapter unregister properly. 520 handle the adapter unregister properly.
diff --git a/Documentation/scsi/scsi_fc_transport.txt b/Documentation/scsi/scsi_fc_transport.txt
index d7f181701dc..aec6549ab09 100644
--- a/Documentation/scsi/scsi_fc_transport.txt
+++ b/Documentation/scsi/scsi_fc_transport.txt
@@ -378,7 +378,7 @@ Vport Disable/Enable:
378 int vport_disable(struct fc_vport *vport, bool disable) 378 int vport_disable(struct fc_vport *vport, bool disable)
379 379
380 where: 380 where:
381 vport: Is vport to to be enabled or disabled 381 vport: Is vport to be enabled or disabled
382 disable: If "true", the vport is to be disabled. 382 disable: If "true", the vport is to be disabled.
383 If "false", the vport is to be enabled. 383 If "false", the vport is to be enabled.
384 384
diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt
index 97eebd63bed..f1708b79f96 100644
--- a/Documentation/sound/alsa/HD-Audio-Models.txt
+++ b/Documentation/sound/alsa/HD-Audio-Models.txt
@@ -387,7 +387,7 @@ STAC92HD73*
387STAC92HD83* 387STAC92HD83*
388=========== 388===========
389 ref Reference board 389 ref Reference board
390 mic-ref Reference board with power managment for ports 390 mic-ref Reference board with power management for ports
391 dell-s14 Dell laptop 391 dell-s14 Dell laptop
392 auto BIOS setup (default) 392 auto BIOS setup (default)
393 393
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 2dbff53369d..3e5b63ebb82 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -319,25 +319,29 @@ This option can be used to select the type of process address
319space randomization that is used in the system, for architectures 319space randomization that is used in the system, for architectures
320that support this feature. 320that support this feature.
321 321
3220 - Turn the process address space randomization off by default. 3220 - Turn the process address space randomization off. This is the
323 default for architectures that do not support this feature anyways,
324 and kernels that are booted with the "norandmaps" parameter.
323 325
3241 - Make the addresses of mmap base, stack and VDSO page randomized. 3261 - Make the addresses of mmap base, stack and VDSO page randomized.
325 This, among other things, implies that shared libraries will be 327 This, among other things, implies that shared libraries will be
326 loaded to random addresses. Also for PIE-linked binaries, the location 328 loaded to random addresses. Also for PIE-linked binaries, the
327 of code start is randomized. 329 location of code start is randomized. This is the default if the
330 CONFIG_COMPAT_BRK option is enabled.
328 331
329 With heap randomization, the situation is a little bit more 3322 - Additionally enable heap randomization. This is the default if
330 complicated. 333 CONFIG_COMPAT_BRK is disabled.
331 There a few legacy applications out there (such as some ancient 334
335 There are a few legacy applications out there (such as some ancient
332 versions of libc.so.5 from 1996) that assume that brk area starts 336 versions of libc.so.5 from 1996) that assume that brk area starts
333 just after the end of the code+bss. These applications break when 337 just after the end of the code+bss. These applications break when
334 start of the brk area is randomized. There are however no known 338 start of the brk area is randomized. There are however no known
335 non-legacy applications that would be broken this way, so for most 339 non-legacy applications that would be broken this way, so for most
336 systems it is safe to choose full randomization. However there is 340 systems it is safe to choose full randomization.
337 a CONFIG_COMPAT_BRK option for systems with ancient and/or broken 341
338 binaries, that makes heap non-randomized, but keeps all other 342 Systems with ancient and/or broken binaries should be configured
339 parts of process address space randomized if randomize_va_space 343 with CONFIG_COMPAT_BRK enabled, which excludes the heap from process
340 sysctl is turned on. 344 address space randomization.
341 345
342============================================================== 346==============================================================
343 347
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index c4de6359d44..e6fb1ec2744 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -585,7 +585,9 @@ caching of directory and inode objects.
585At the default value of vfs_cache_pressure=100 the kernel will attempt to 585At the default value of vfs_cache_pressure=100 the kernel will attempt to
586reclaim dentries and inodes at a "fair" rate with respect to pagecache and 586reclaim dentries and inodes at a "fair" rate with respect to pagecache and
587swapcache reclaim. Decreasing vfs_cache_pressure causes the kernel to prefer 587swapcache reclaim. Decreasing vfs_cache_pressure causes the kernel to prefer
588to retain dentry and inode caches. Increasing vfs_cache_pressure beyond 100 588to retain dentry and inode caches. When vfs_cache_pressure=0, the kernel will
589never reclaim dentries and inodes due to memory pressure and this can easily
590lead to out-of-memory conditions. Increasing vfs_cache_pressure beyond 100
589causes the kernel to prefer to reclaim dentries and inodes. 591causes the kernel to prefer to reclaim dentries and inodes.
590 592
591============================================================== 593==============================================================
diff --git a/Documentation/trace/events-kmem.txt b/Documentation/trace/events-kmem.txt
new file mode 100644
index 00000000000..6ef2a8652e1
--- /dev/null
+++ b/Documentation/trace/events-kmem.txt
@@ -0,0 +1,107 @@
1 Subsystem Trace Points: kmem
2
3The tracing system kmem captures events related to object and page allocation
4within the kernel. Broadly speaking there are four major subheadings.
5
6 o Slab allocation of small objects of unknown type (kmalloc)
7 o Slab allocation of small objects of known type
8 o Page allocation
9 o Per-CPU Allocator Activity
10 o External Fragmentation
11
12This document will describe what each of the tracepoints are and why they
13might be useful.
14
151. Slab allocation of small objects of unknown type
16===================================================
17kmalloc call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s
18kmalloc_node call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d
19kfree call_site=%lx ptr=%p
20
21Heavy activity for these events may indicate that a specific cache is
22justified, particularly if kmalloc slab pages are getting significantly
23internal fragmented as a result of the allocation pattern. By correlating
24kmalloc with kfree, it may be possible to identify memory leaks and where
25the allocation sites were.
26
27
282. Slab allocation of small objects of known type
29=================================================
30kmem_cache_alloc call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s
31kmem_cache_alloc_node call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d
32kmem_cache_free call_site=%lx ptr=%p
33
34These events are similar in usage to the kmalloc-related events except that
35it is likely easier to pin the event down to a specific cache. At the time
36of writing, no information is available on what slab is being allocated from,
37but the call_site can usually be used to extrapolate that information
38
393. Page allocation
40==================
41mm_page_alloc page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s
42mm_page_alloc_zone_locked page=%p pfn=%lu order=%u migratetype=%d cpu=%d percpu_refill=%d
43mm_page_free_direct page=%p pfn=%lu order=%d
44mm_pagevec_free page=%p pfn=%lu order=%d cold=%d
45
46These four events deal with page allocation and freeing. mm_page_alloc is
47a simple indicator of page allocator activity. Pages may be allocated from
48the per-CPU allocator (high performance) or the buddy allocator.
49
50If pages are allocated directly from the buddy allocator, the
51mm_page_alloc_zone_locked event is triggered. This event is important as high
52amounts of activity imply high activity on the zone->lock. Taking this lock
53impairs performance by disabling interrupts, dirtying cache lines between
54CPUs and serialising many CPUs.
55
56When a page is freed directly by the caller, the mm_page_free_direct event
57is triggered. Significant amounts of activity here could indicate that the
58callers should be batching their activities.
59
60When pages are freed using a pagevec, the mm_pagevec_free is
61triggered. Broadly speaking, pages are taken off the LRU lock in bulk and
62freed in batch with a pagevec. Significant amounts of activity here could
63indicate that the system is under memory pressure and can also indicate
64contention on the zone->lru_lock.
65
664. Per-CPU Allocator Activity
67=============================
68mm_page_alloc_zone_locked page=%p pfn=%lu order=%u migratetype=%d cpu=%d percpu_refill=%d
69mm_page_pcpu_drain page=%p pfn=%lu order=%d cpu=%d migratetype=%d
70
71In front of the page allocator is a per-cpu page allocator. It exists only
72for order-0 pages, reduces contention on the zone->lock and reduces the
73amount of writing on struct page.
74
75When a per-CPU list is empty or pages of the wrong type are allocated,
76the zone->lock will be taken once and the per-CPU list refilled. The event
77triggered is mm_page_alloc_zone_locked for each page allocated with the
78event indicating whether it is for a percpu_refill or not.
79
80When the per-CPU list is too full, a number of pages are freed, each one
81which triggers a mm_page_pcpu_drain event.
82
83The individual nature of the events are so that pages can be tracked
84between allocation and freeing. A number of drain or refill pages that occur
85consecutively imply the zone->lock being taken once. Large amounts of PCP
86refills and drains could imply an imbalance between CPUs where too much work
87is being concentrated in one place. It could also indicate that the per-CPU
88lists should be a larger size. Finally, large amounts of refills on one CPU
89and drains on another could be a factor in causing large amounts of cache
90line bounces due to writes between CPUs and worth investigating if pages
91can be allocated and freed on the same CPU through some algorithm change.
92
935. External Fragmentation
94=========================
95mm_page_alloc_extfrag page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d
96
97External fragmentation affects whether a high-order allocation will be
98successful or not. For some types of hardware, this is important although
99it is avoided where possible. If the system is using huge pages and needs
100to be able to resize the pool over the lifetime of the system, this value
101is important.
102
103Large numbers of this event implies that memory is fragmenting and
104high-order allocations will start failing at some time in the future. One
105means of reducing the occurange of this event is to increase the size of
106min_free_kbytes in increments of 3*pageblock_size*nr_online_nodes where
107pageblock_size is usually the size of the default hugepage size.
diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt
index 78c45a87be5..02ac6ed38b2 100644
--- a/Documentation/trace/events.txt
+++ b/Documentation/trace/events.txt
@@ -72,7 +72,7 @@ To enable all events in sched subsystem:
72 72
73 # echo 1 > /sys/kernel/debug/tracing/events/sched/enable 73 # echo 1 > /sys/kernel/debug/tracing/events/sched/enable
74 74
75To eanble all events: 75To enable all events:
76 76
77 # echo 1 > /sys/kernel/debug/tracing/events/enable 77 # echo 1 > /sys/kernel/debug/tracing/events/enable
78 78
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 1b6292bbdd6..957b22fde2d 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -133,7 +133,7 @@ of ftrace. Here is a list of some of the key files:
133 than requested, the rest of the page will be used, 133 than requested, the rest of the page will be used,
134 making the actual allocation bigger than requested. 134 making the actual allocation bigger than requested.
135 ( Note, the size may not be a multiple of the page size 135 ( Note, the size may not be a multiple of the page size
136 due to buffer managment overhead. ) 136 due to buffer management overhead. )
137 137
138 This can only be updated when the current_tracer 138 This can only be updated when the current_tracer
139 is set to "nop". 139 is set to "nop".
diff --git a/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl b/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl
new file mode 100644
index 00000000000..7df50e8cf4d
--- /dev/null
+++ b/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl
@@ -0,0 +1,418 @@
1#!/usr/bin/perl
2# This is a POC (proof of concept or piece of crap, take your pick) for reading the
3# text representation of trace output related to page allocation. It makes an attempt
4# to extract some high-level information on what is going on. The accuracy of the parser
5# may vary considerably
6#
7# Example usage: trace-pagealloc-postprocess.pl < /sys/kernel/debug/tracing/trace_pipe
8# other options
9# --prepend-parent Report on the parent proc and PID
10# --read-procstat If the trace lacks process info, get it from /proc
11# --ignore-pid Aggregate processes of the same name together
12#
13# Copyright (c) IBM Corporation 2009
14# Author: Mel Gorman <mel@csn.ul.ie>
15use strict;
16use Getopt::Long;
17
18# Tracepoint events
19use constant MM_PAGE_ALLOC => 1;
20use constant MM_PAGE_FREE_DIRECT => 2;
21use constant MM_PAGEVEC_FREE => 3;
22use constant MM_PAGE_PCPU_DRAIN => 4;
23use constant MM_PAGE_ALLOC_ZONE_LOCKED => 5;
24use constant MM_PAGE_ALLOC_EXTFRAG => 6;
25use constant EVENT_UNKNOWN => 7;
26
27# Constants used to track state
28use constant STATE_PCPU_PAGES_DRAINED => 8;
29use constant STATE_PCPU_PAGES_REFILLED => 9;
30
31# High-level events extrapolated from tracepoints
32use constant HIGH_PCPU_DRAINS => 10;
33use constant HIGH_PCPU_REFILLS => 11;
34use constant HIGH_EXT_FRAGMENT => 12;
35use constant HIGH_EXT_FRAGMENT_SEVERE => 13;
36use constant HIGH_EXT_FRAGMENT_MODERATE => 14;
37use constant HIGH_EXT_FRAGMENT_CHANGED => 15;
38
39my %perprocesspid;
40my %perprocess;
41my $opt_ignorepid;
42my $opt_read_procstat;
43my $opt_prepend_parent;
44
45# Catch sigint and exit on request
46my $sigint_report = 0;
47my $sigint_exit = 0;
48my $sigint_pending = 0;
49my $sigint_received = 0;
50sub sigint_handler {
51 my $current_time = time;
52 if ($current_time - 2 > $sigint_received) {
53 print "SIGINT received, report pending. Hit ctrl-c again to exit\n";
54 $sigint_report = 1;
55 } else {
56 if (!$sigint_exit) {
57 print "Second SIGINT received quickly, exiting\n";
58 }
59 $sigint_exit++;
60 }
61
62 if ($sigint_exit > 3) {
63 print "Many SIGINTs received, exiting now without report\n";
64 exit;
65 }
66
67 $sigint_received = $current_time;
68 $sigint_pending = 1;
69}
70$SIG{INT} = "sigint_handler";
71
72# Parse command line options
73GetOptions(
74 'ignore-pid' => \$opt_ignorepid,
75 'read-procstat' => \$opt_read_procstat,
76 'prepend-parent' => \$opt_prepend_parent,
77);
78
79# Defaults for dynamically discovered regex's
80my $regex_fragdetails_default = 'page=([0-9a-f]*) pfn=([0-9]*) alloc_order=([-0-9]*) fallback_order=([-0-9]*) pageblock_order=([-0-9]*) alloc_migratetype=([-0-9]*) fallback_migratetype=([-0-9]*) fragmenting=([-0-9]) change_ownership=([-0-9])';
81
82# Dyanically discovered regex
83my $regex_fragdetails;
84
85# Static regex used. Specified like this for readability and for use with /o
86# (process_pid) (cpus ) ( time ) (tpoint ) (details)
87my $regex_traceevent = '\s*([a-zA-Z0-9-]*)\s*(\[[0-9]*\])\s*([0-9.]*):\s*([a-zA-Z_]*):\s*(.*)';
88my $regex_statname = '[-0-9]*\s\((.*)\).*';
89my $regex_statppid = '[-0-9]*\s\(.*\)\s[A-Za-z]\s([0-9]*).*';
90
91sub generate_traceevent_regex {
92 my $event = shift;
93 my $default = shift;
94 my $regex;
95
96 # Read the event format or use the default
97 if (!open (FORMAT, "/sys/kernel/debug/tracing/events/$event/format")) {
98 $regex = $default;
99 } else {
100 my $line;
101 while (!eof(FORMAT)) {
102 $line = <FORMAT>;
103 if ($line =~ /^print fmt:\s"(.*)",.*/) {
104 $regex = $1;
105 $regex =~ s/%p/\([0-9a-f]*\)/g;
106 $regex =~ s/%d/\([-0-9]*\)/g;
107 $regex =~ s/%lu/\([0-9]*\)/g;
108 }
109 }
110 }
111
112 # Verify fields are in the right order
113 my $tuple;
114 foreach $tuple (split /\s/, $regex) {
115 my ($key, $value) = split(/=/, $tuple);
116 my $expected = shift;
117 if ($key ne $expected) {
118 print("WARNING: Format not as expected '$key' != '$expected'");
119 $regex =~ s/$key=\((.*)\)/$key=$1/;
120 }
121 }
122
123 if (defined shift) {
124 die("Fewer fields than expected in format");
125 }
126
127 return $regex;
128}
129$regex_fragdetails = generate_traceevent_regex("kmem/mm_page_alloc_extfrag",
130 $regex_fragdetails_default,
131 "page", "pfn",
132 "alloc_order", "fallback_order", "pageblock_order",
133 "alloc_migratetype", "fallback_migratetype",
134 "fragmenting", "change_ownership");
135
136sub read_statline($) {
137 my $pid = $_[0];
138 my $statline;
139
140 if (open(STAT, "/proc/$pid/stat")) {
141 $statline = <STAT>;
142 close(STAT);
143 }
144
145 if ($statline eq '') {
146 $statline = "-1 (UNKNOWN_PROCESS_NAME) R 0";
147 }
148
149 return $statline;
150}
151
152sub guess_process_pid($$) {
153 my $pid = $_[0];
154 my $statline = $_[1];
155
156 if ($pid == 0) {
157 return "swapper-0";
158 }
159
160 if ($statline !~ /$regex_statname/o) {
161 die("Failed to math stat line for process name :: $statline");
162 }
163 return "$1-$pid";
164}
165
166sub parent_info($$) {
167 my $pid = $_[0];
168 my $statline = $_[1];
169 my $ppid;
170
171 if ($pid == 0) {
172 return "NOPARENT-0";
173 }
174
175 if ($statline !~ /$regex_statppid/o) {
176 die("Failed to match stat line process ppid:: $statline");
177 }
178
179 # Read the ppid stat line
180 $ppid = $1;
181 return guess_process_pid($ppid, read_statline($ppid));
182}
183
184sub process_events {
185 my $traceevent;
186 my $process_pid;
187 my $cpus;
188 my $timestamp;
189 my $tracepoint;
190 my $details;
191 my $statline;
192
193 # Read each line of the event log
194EVENT_PROCESS:
195 while ($traceevent = <STDIN>) {
196 if ($traceevent =~ /$regex_traceevent/o) {
197 $process_pid = $1;
198 $tracepoint = $4;
199
200 if ($opt_read_procstat || $opt_prepend_parent) {
201 $process_pid =~ /(.*)-([0-9]*)$/;
202 my $process = $1;
203 my $pid = $2;
204
205 $statline = read_statline($pid);
206
207 if ($opt_read_procstat && $process eq '') {
208 $process_pid = guess_process_pid($pid, $statline);
209 }
210
211 if ($opt_prepend_parent) {
212 $process_pid = parent_info($pid, $statline) . " :: $process_pid";
213 }
214 }
215
216 # Unnecessary in this script. Uncomment if required
217 # $cpus = $2;
218 # $timestamp = $3;
219 } else {
220 next;
221 }
222
223 # Perl Switch() sucks majorly
224 if ($tracepoint eq "mm_page_alloc") {
225 $perprocesspid{$process_pid}->{MM_PAGE_ALLOC}++;
226 } elsif ($tracepoint eq "mm_page_free_direct") {
227 $perprocesspid{$process_pid}->{MM_PAGE_FREE_DIRECT}++;
228 } elsif ($tracepoint eq "mm_pagevec_free") {
229 $perprocesspid{$process_pid}->{MM_PAGEVEC_FREE}++;
230 } elsif ($tracepoint eq "mm_page_pcpu_drain") {
231 $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN}++;
232 $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED}++;
233 } elsif ($tracepoint eq "mm_page_alloc_zone_locked") {
234 $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}++;
235 $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED}++;
236 } elsif ($tracepoint eq "mm_page_alloc_extfrag") {
237
238 # Extract the details of the event now
239 $details = $5;
240
241 my ($page, $pfn);
242 my ($alloc_order, $fallback_order, $pageblock_order);
243 my ($alloc_migratetype, $fallback_migratetype);
244 my ($fragmenting, $change_ownership);
245
246 if ($details !~ /$regex_fragdetails/o) {
247 print "WARNING: Failed to parse mm_page_alloc_extfrag as expected\n";
248 next;
249 }
250
251 $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG}++;
252 $page = $1;
253 $pfn = $2;
254 $alloc_order = $3;
255 $fallback_order = $4;
256 $pageblock_order = $5;
257 $alloc_migratetype = $6;
258 $fallback_migratetype = $7;
259 $fragmenting = $8;
260 $change_ownership = $9;
261
262 if ($fragmenting) {
263 $perprocesspid{$process_pid}->{HIGH_EXT_FRAG}++;
264 if ($fallback_order <= 3) {
265 $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE}++;
266 } else {
267 $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE}++;
268 }
269 }
270 if ($change_ownership) {
271 $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED}++;
272 }
273 } else {
274 $perprocesspid{$process_pid}->{EVENT_UNKNOWN}++;
275 }
276
277 # Catch a full pcpu drain event
278 if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} &&
279 $tracepoint ne "mm_page_pcpu_drain") {
280
281 $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS}++;
282 $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0;
283 }
284
285 # Catch a full pcpu refill event
286 if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} &&
287 $tracepoint ne "mm_page_alloc_zone_locked") {
288 $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS}++;
289 $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0;
290 }
291
292 if ($sigint_pending) {
293 last EVENT_PROCESS;
294 }
295 }
296}
297
298sub dump_stats {
299 my $hashref = shift;
300 my %stats = %$hashref;
301
302 # Dump per-process stats
303 my $process_pid;
304 my $max_strlen = 0;
305
306 # Get the maximum process name
307 foreach $process_pid (keys %perprocesspid) {
308 my $len = length($process_pid);
309 if ($len > $max_strlen) {
310 $max_strlen = $len;
311 }
312 }
313 $max_strlen += 2;
314
315 printf("\n");
316 printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n",
317 "Process", "Pages", "Pages", "Pages", "Pages", "PCPU", "PCPU", "PCPU", "Fragment", "Fragment", "MigType", "Fragment", "Fragment", "Unknown");
318 printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n",
319 "details", "allocd", "allocd", "freed", "freed", "pages", "drains", "refills", "Fallback", "Causing", "Changed", "Severe", "Moderate", "");
320
321 printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n",
322 "", "", "under lock", "direct", "pagevec", "drain", "", "", "", "", "", "", "", "");
323
324 foreach $process_pid (keys %stats) {
325 # Dump final aggregates
326 if ($stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED}) {
327 $stats{$process_pid}->{HIGH_PCPU_DRAINS}++;
328 $stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0;
329 }
330 if ($stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED}) {
331 $stats{$process_pid}->{HIGH_PCPU_REFILLS}++;
332 $stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0;
333 }
334
335 printf("%-" . $max_strlen . "s %8d %10d %8d %8d %8d %8d %8d %8d %8d %8d %8d %8d %8d\n",
336 $process_pid,
337 $stats{$process_pid}->{MM_PAGE_ALLOC},
338 $stats{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED},
339 $stats{$process_pid}->{MM_PAGE_FREE_DIRECT},
340 $stats{$process_pid}->{MM_PAGEVEC_FREE},
341 $stats{$process_pid}->{MM_PAGE_PCPU_DRAIN},
342 $stats{$process_pid}->{HIGH_PCPU_DRAINS},
343 $stats{$process_pid}->{HIGH_PCPU_REFILLS},
344 $stats{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG},
345 $stats{$process_pid}->{HIGH_EXT_FRAG},
346 $stats{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED},
347 $stats{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE},
348 $stats{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE},
349 $stats{$process_pid}->{EVENT_UNKNOWN});
350 }
351}
352
353sub aggregate_perprocesspid() {
354 my $process_pid;
355 my $process;
356 undef %perprocess;
357
358 foreach $process_pid (keys %perprocesspid) {
359 $process = $process_pid;
360 $process =~ s/-([0-9])*$//;
361 if ($process eq '') {
362 $process = "NO_PROCESS_NAME";
363 }
364
365 $perprocess{$process}->{MM_PAGE_ALLOC} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC};
366 $perprocess{$process}->{MM_PAGE_ALLOC_ZONE_LOCKED} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED};
367 $perprocess{$process}->{MM_PAGE_FREE_DIRECT} += $perprocesspid{$process_pid}->{MM_PAGE_FREE_DIRECT};
368 $perprocess{$process}->{MM_PAGEVEC_FREE} += $perprocesspid{$process_pid}->{MM_PAGEVEC_FREE};
369 $perprocess{$process}->{MM_PAGE_PCPU_DRAIN} += $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN};
370 $perprocess{$process}->{HIGH_PCPU_DRAINS} += $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS};
371 $perprocess{$process}->{HIGH_PCPU_REFILLS} += $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS};
372 $perprocess{$process}->{MM_PAGE_ALLOC_EXTFRAG} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG};
373 $perprocess{$process}->{HIGH_EXT_FRAG} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAG};
374 $perprocess{$process}->{HIGH_EXT_FRAGMENT_CHANGED} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED};
375 $perprocess{$process}->{HIGH_EXT_FRAGMENT_SEVERE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE};
376 $perprocess{$process}->{HIGH_EXT_FRAGMENT_MODERATE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE};
377 $perprocess{$process}->{EVENT_UNKNOWN} += $perprocesspid{$process_pid}->{EVENT_UNKNOWN};
378 }
379}
380
381sub report() {
382 if (!$opt_ignorepid) {
383 dump_stats(\%perprocesspid);
384 } else {
385 aggregate_perprocesspid();
386 dump_stats(\%perprocess);
387 }
388}
389
390# Process events or signals until neither is available
391sub signal_loop() {
392 my $sigint_processed;
393 do {
394 $sigint_processed = 0;
395 process_events();
396
397 # Handle pending signals if any
398 if ($sigint_pending) {
399 my $current_time = time;
400
401 if ($sigint_exit) {
402 print "Received exit signal\n";
403 $sigint_pending = 0;
404 }
405 if ($sigint_report) {
406 if ($current_time >= $sigint_received + 2) {
407 report();
408 $sigint_report = 0;
409 $sigint_pending = 0;
410 $sigint_processed = 1;
411 }
412 }
413 }
414 } while ($sigint_pending || $sigint_processed);
415}
416
417signal_loop();
418report();
diff --git a/Documentation/trace/tracepoint-analysis.txt b/Documentation/trace/tracepoint-analysis.txt
new file mode 100644
index 00000000000..5eb4e487e66
--- /dev/null
+++ b/Documentation/trace/tracepoint-analysis.txt
@@ -0,0 +1,327 @@
1 Notes on Analysing Behaviour Using Events and Tracepoints
2
3 Documentation written by Mel Gorman
4 PCL information heavily based on email from Ingo Molnar
5
61. Introduction
7===============
8
9Tracepoints (see Documentation/trace/tracepoints.txt) can be used without
10creating custom kernel modules to register probe functions using the event
11tracing infrastructure.
12
13Simplistically, tracepoints will represent an important event that when can
14be taken in conjunction with other tracepoints to build a "Big Picture" of
15what is going on within the system. There are a large number of methods for
16gathering and interpreting these events. Lacking any current Best Practises,
17this document describes some of the methods that can be used.
18
19This document assumes that debugfs is mounted on /sys/kernel/debug and that
20the appropriate tracing options have been configured into the kernel. It is
21assumed that the PCL tool tools/perf has been installed and is in your path.
22
232. Listing Available Events
24===========================
25
262.1 Standard Utilities
27----------------------
28
29All possible events are visible from /sys/kernel/debug/tracing/events. Simply
30calling
31
32 $ find /sys/kernel/debug/tracing/events -type d
33
34will give a fair indication of the number of events available.
35
362.2 PCL
37-------
38
39Discovery and enumeration of all counters and events, including tracepoints
40are available with the perf tool. Getting a list of available events is a
41simple case of
42
43 $ perf list 2>&1 | grep Tracepoint
44 ext4:ext4_free_inode [Tracepoint event]
45 ext4:ext4_request_inode [Tracepoint event]
46 ext4:ext4_allocate_inode [Tracepoint event]
47 ext4:ext4_write_begin [Tracepoint event]
48 ext4:ext4_ordered_write_end [Tracepoint event]
49 [ .... remaining output snipped .... ]
50
51
522. Enabling Events
53==================
54
552.1 System-Wide Event Enabling
56------------------------------
57
58See Documentation/trace/events.txt for a proper description on how events
59can be enabled system-wide. A short example of enabling all events related
60to page allocation would look something like
61
62 $ for i in `find /sys/kernel/debug/tracing/events -name "enable" | grep mm_`; do echo 1 > $i; done
63
642.2 System-Wide Event Enabling with SystemTap
65---------------------------------------------
66
67In SystemTap, tracepoints are accessible using the kernel.trace() function
68call. The following is an example that reports every 5 seconds what processes
69were allocating the pages.
70
71 global page_allocs
72
73 probe kernel.trace("mm_page_alloc") {
74 page_allocs[execname()]++
75 }
76
77 function print_count() {
78 printf ("%-25s %-s\n", "#Pages Allocated", "Process Name")
79 foreach (proc in page_allocs-)
80 printf("%-25d %s\n", page_allocs[proc], proc)
81 printf ("\n")
82 delete page_allocs
83 }
84
85 probe timer.s(5) {
86 print_count()
87 }
88
892.3 System-Wide Event Enabling with PCL
90---------------------------------------
91
92By specifying the -a switch and analysing sleep, the system-wide events
93for a duration of time can be examined.
94
95 $ perf stat -a \
96 -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \
97 -e kmem:mm_pagevec_free \
98 sleep 10
99 Performance counter stats for 'sleep 10':
100
101 9630 kmem:mm_page_alloc
102 2143 kmem:mm_page_free_direct
103 7424 kmem:mm_pagevec_free
104
105 10.002577764 seconds time elapsed
106
107Similarly, one could execute a shell and exit it as desired to get a report
108at that point.
109
1102.4 Local Event Enabling
111------------------------
112
113Documentation/trace/ftrace.txt describes how to enable events on a per-thread
114basis using set_ftrace_pid.
115
1162.5 Local Event Enablement with PCL
117-----------------------------------
118
119Events can be activate and tracked for the duration of a process on a local
120basis using PCL such as follows.
121
122 $ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \
123 -e kmem:mm_pagevec_free ./hackbench 10
124 Time: 0.909
125
126 Performance counter stats for './hackbench 10':
127
128 17803 kmem:mm_page_alloc
129 12398 kmem:mm_page_free_direct
130 4827 kmem:mm_pagevec_free
131
132 0.973913387 seconds time elapsed
133
1343. Event Filtering
135==================
136
137Documentation/trace/ftrace.txt covers in-depth how to filter events in
138ftrace. Obviously using grep and awk of trace_pipe is an option as well
139as any script reading trace_pipe.
140
1414. Analysing Event Variances with PCL
142=====================================
143
144Any workload can exhibit variances between runs and it can be important
145to know what the standard deviation in. By and large, this is left to the
146performance analyst to do it by hand. In the event that the discrete event
147occurrences are useful to the performance analyst, then perf can be used.
148
149 $ perf stat --repeat 5 -e kmem:mm_page_alloc -e kmem:mm_page_free_direct
150 -e kmem:mm_pagevec_free ./hackbench 10
151 Time: 0.890
152 Time: 0.895
153 Time: 0.915
154 Time: 1.001
155 Time: 0.899
156
157 Performance counter stats for './hackbench 10' (5 runs):
158
159 16630 kmem:mm_page_alloc ( +- 3.542% )
160 11486 kmem:mm_page_free_direct ( +- 4.771% )
161 4730 kmem:mm_pagevec_free ( +- 2.325% )
162
163 0.982653002 seconds time elapsed ( +- 1.448% )
164
165In the event that some higher-level event is required that depends on some
166aggregation of discrete events, then a script would need to be developed.
167
168Using --repeat, it is also possible to view how events are fluctuating over
169time on a system wide basis using -a and sleep.
170
171 $ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \
172 -e kmem:mm_pagevec_free \
173 -a --repeat 10 \
174 sleep 1
175 Performance counter stats for 'sleep 1' (10 runs):
176
177 1066 kmem:mm_page_alloc ( +- 26.148% )
178 182 kmem:mm_page_free_direct ( +- 5.464% )
179 890 kmem:mm_pagevec_free ( +- 30.079% )
180
181 1.002251757 seconds time elapsed ( +- 0.005% )
182
1835. Higher-Level Analysis with Helper Scripts
184============================================
185
186When events are enabled the events that are triggering can be read from
187/sys/kernel/debug/tracing/trace_pipe in human-readable format although binary
188options exist as well. By post-processing the output, further information can
189be gathered on-line as appropriate. Examples of post-processing might include
190
191 o Reading information from /proc for the PID that triggered the event
192 o Deriving a higher-level event from a series of lower-level events.
193 o Calculate latencies between two events
194
195Documentation/trace/postprocess/trace-pagealloc-postprocess.pl is an example
196script that can read trace_pipe from STDIN or a copy of a trace. When used
197on-line, it can be interrupted once to generate a report without existing
198and twice to exit.
199
200Simplistically, the script just reads STDIN and counts up events but it
201also can do more such as
202
203 o Derive high-level events from many low-level events. If a number of pages
204 are freed to the main allocator from the per-CPU lists, it recognises
205 that as one per-CPU drain even though there is no specific tracepoint
206 for that event
207 o It can aggregate based on PID or individual process number
208 o In the event memory is getting externally fragmented, it reports
209 on whether the fragmentation event was severe or moderate.
210 o When receiving an event about a PID, it can record who the parent was so
211 that if large numbers of events are coming from very short-lived
212 processes, the parent process responsible for creating all the helpers
213 can be identified
214
2156. Lower-Level Analysis with PCL
216================================
217
218There may also be a requirement to identify what functions with a program
219were generating events within the kernel. To begin this sort of analysis, the
220data must be recorded. At the time of writing, this required root
221
222 $ perf record -c 1 \
223 -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \
224 -e kmem:mm_pagevec_free \
225 ./hackbench 10
226 Time: 0.894
227 [ perf record: Captured and wrote 0.733 MB perf.data (~32010 samples) ]
228
229Note the use of '-c 1' to set the event period to sample. The default sample
230period is quite high to minimise overhead but the information collected can be
231very coarse as a result.
232
233This record outputted a file called perf.data which can be analysed using
234perf report.
235
236 $ perf report
237 # Samples: 30922
238 #
239 # Overhead Command Shared Object
240 # ........ ......... ................................
241 #
242 87.27% hackbench [vdso]
243 6.85% hackbench /lib/i686/cmov/libc-2.9.so
244 2.62% hackbench /lib/ld-2.9.so
245 1.52% perf [vdso]
246 1.22% hackbench ./hackbench
247 0.48% hackbench [kernel]
248 0.02% perf /lib/i686/cmov/libc-2.9.so
249 0.01% perf /usr/bin/perf
250 0.01% perf /lib/ld-2.9.so
251 0.00% hackbench /lib/i686/cmov/libpthread-2.9.so
252 #
253 # (For more details, try: perf report --sort comm,dso,symbol)
254 #
255
256According to this, the vast majority of events occured triggered on events
257within the VDSO. With simple binaries, this will often be the case so lets
258take a slightly different example. In the course of writing this, it was
259noticed that X was generating an insane amount of page allocations so lets look
260at it
261
262 $ perf record -c 1 -f \
263 -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \
264 -e kmem:mm_pagevec_free \
265 -p `pidof X`
266
267This was interrupted after a few seconds and
268
269 $ perf report
270 # Samples: 27666
271 #
272 # Overhead Command Shared Object
273 # ........ ....... .......................................
274 #
275 51.95% Xorg [vdso]
276 47.95% Xorg /opt/gfx-test/lib/libpixman-1.so.0.13.1
277 0.09% Xorg /lib/i686/cmov/libc-2.9.so
278 0.01% Xorg [kernel]
279 #
280 # (For more details, try: perf report --sort comm,dso,symbol)
281 #
282
283So, almost half of the events are occuring in a library. To get an idea which
284symbol.
285
286 $ perf report --sort comm,dso,symbol
287 # Samples: 27666
288 #
289 # Overhead Command Shared Object Symbol
290 # ........ ....... ....................................... ......
291 #
292 51.95% Xorg [vdso] [.] 0x000000ffffe424
293 47.93% Xorg /opt/gfx-test/lib/libpixman-1.so.0.13.1 [.] pixmanFillsse2
294 0.09% Xorg /lib/i686/cmov/libc-2.9.so [.] _int_malloc
295 0.01% Xorg /opt/gfx-test/lib/libpixman-1.so.0.13.1 [.] pixman_region32_copy_f
296 0.01% Xorg [kernel] [k] read_hpet
297 0.01% Xorg /opt/gfx-test/lib/libpixman-1.so.0.13.1 [.] get_fast_path
298 0.00% Xorg [kernel] [k] ftrace_trace_userstack
299
300To see where within the function pixmanFillsse2 things are going wrong
301
302 $ perf annotate pixmanFillsse2
303 [ ... ]
304 0.00 : 34eeb: 0f 18 08 prefetcht0 (%eax)
305 : }
306 :
307 : extern __inline void __attribute__((__gnu_inline__, __always_inline__, _
308 : _mm_store_si128 (__m128i *__P, __m128i __B) : {
309 : *__P = __B;
310 12.40 : 34eee: 66 0f 7f 80 40 ff ff movdqa %xmm0,-0xc0(%eax)
311 0.00 : 34ef5: ff
312 12.40 : 34ef6: 66 0f 7f 80 50 ff ff movdqa %xmm0,-0xb0(%eax)
313 0.00 : 34efd: ff
314 12.39 : 34efe: 66 0f 7f 80 60 ff ff movdqa %xmm0,-0xa0(%eax)
315 0.00 : 34f05: ff
316 12.67 : 34f06: 66 0f 7f 80 70 ff ff movdqa %xmm0,-0x90(%eax)
317 0.00 : 34f0d: ff
318 12.58 : 34f0e: 66 0f 7f 40 80 movdqa %xmm0,-0x80(%eax)
319 12.31 : 34f13: 66 0f 7f 40 90 movdqa %xmm0,-0x70(%eax)
320 12.40 : 34f18: 66 0f 7f 40 a0 movdqa %xmm0,-0x60(%eax)
321 12.31 : 34f1d: 66 0f 7f 40 b0 movdqa %xmm0,-0x50(%eax)
322
323At a glance, it looks like the time is being spent copying pixmaps to
324the card. Further investigation would be needed to determine why pixmaps
325are being copied around so much but a starting point would be to take an
326ancient build of libpixmap out of the library path where it was totally
327forgotten about from months ago!
diff --git a/Documentation/vm/00-INDEX b/Documentation/vm/00-INDEX
index 2f77ced35df..e57d6a9dd32 100644
--- a/Documentation/vm/00-INDEX
+++ b/Documentation/vm/00-INDEX
@@ -6,6 +6,8 @@ balance
6 - various information on memory balancing. 6 - various information on memory balancing.
7hugetlbpage.txt 7hugetlbpage.txt
8 - a brief summary of hugetlbpage support in the Linux kernel. 8 - a brief summary of hugetlbpage support in the Linux kernel.
9ksm.txt
10 - how to use the Kernel Samepage Merging feature.
9locking 11locking
10 - info on how locking and synchronization is done in the Linux vm code. 12 - info on how locking and synchronization is done in the Linux vm code.
11numa 13numa
@@ -20,3 +22,5 @@ slabinfo.c
20 - source code for a tool to get reports about slabs. 22 - source code for a tool to get reports about slabs.
21slub.txt 23slub.txt
22 - a short users guide for SLUB. 24 - a short users guide for SLUB.
25map_hugetlb.c
26 - an example program that uses the MAP_HUGETLB mmap flag.
diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/vm/hugetlbpage.txt
index ea8714fcc3a..82a7bd1800b 100644
--- a/Documentation/vm/hugetlbpage.txt
+++ b/Documentation/vm/hugetlbpage.txt
@@ -18,13 +18,13 @@ First the Linux kernel needs to be built with the CONFIG_HUGETLBFS
18automatically when CONFIG_HUGETLBFS is selected) configuration 18automatically when CONFIG_HUGETLBFS is selected) configuration
19options. 19options.
20 20
21The kernel built with hugepage support should show the number of configured 21The kernel built with huge page support should show the number of configured
22hugepages in the system by running the "cat /proc/meminfo" command. 22huge pages in the system by running the "cat /proc/meminfo" command.
23 23
24/proc/meminfo also provides information about the total number of hugetlb 24/proc/meminfo also provides information about the total number of hugetlb
25pages configured in the kernel. It also displays information about the 25pages configured in the kernel. It also displays information about the
26number of free hugetlb pages at any time. It also displays information about 26number of free hugetlb pages at any time. It also displays information about
27the configured hugepage size - this is needed for generating the proper 27the configured huge page size - this is needed for generating the proper
28alignment and size of the arguments to the above system calls. 28alignment and size of the arguments to the above system calls.
29 29
30The output of "cat /proc/meminfo" will have lines like: 30The output of "cat /proc/meminfo" will have lines like:
@@ -37,25 +37,27 @@ HugePages_Surp: yyy
37Hugepagesize: zzz kB 37Hugepagesize: zzz kB
38 38
39where: 39where:
40HugePages_Total is the size of the pool of hugepages. 40HugePages_Total is the size of the pool of huge pages.
41HugePages_Free is the number of hugepages in the pool that are not yet 41HugePages_Free is the number of huge pages in the pool that are not yet
42allocated. 42 allocated.
43HugePages_Rsvd is short for "reserved," and is the number of hugepages 43HugePages_Rsvd is short for "reserved," and is the number of huge pages for
44for which a commitment to allocate from the pool has been made, but no 44 which a commitment to allocate from the pool has been made,
45allocation has yet been made. It's vaguely analogous to overcommit. 45 but no allocation has yet been made. Reserved huge pages
46HugePages_Surp is short for "surplus," and is the number of hugepages in 46 guarantee that an application will be able to allocate a
47the pool above the value in /proc/sys/vm/nr_hugepages. The maximum 47 huge page from the pool of huge pages at fault time.
48number of surplus hugepages is controlled by 48HugePages_Surp is short for "surplus," and is the number of huge pages in
49/proc/sys/vm/nr_overcommit_hugepages. 49 the pool above the value in /proc/sys/vm/nr_hugepages. The
50 maximum number of surplus huge pages is controlled by
51 /proc/sys/vm/nr_overcommit_hugepages.
50 52
51/proc/filesystems should also show a filesystem of type "hugetlbfs" configured 53/proc/filesystems should also show a filesystem of type "hugetlbfs" configured
52in the kernel. 54in the kernel.
53 55
54/proc/sys/vm/nr_hugepages indicates the current number of configured hugetlb 56/proc/sys/vm/nr_hugepages indicates the current number of configured hugetlb
55pages in the kernel. Super user can dynamically request more (or free some 57pages in the kernel. Super user can dynamically request more (or free some
56pre-configured) hugepages. 58pre-configured) huge pages.
57The allocation (or deallocation) of hugetlb pages is possible only if there are 59The allocation (or deallocation) of hugetlb pages is possible only if there are
58enough physically contiguous free pages in system (freeing of hugepages is 60enough physically contiguous free pages in system (freeing of huge pages is
59possible only if there are enough hugetlb pages free that can be transferred 61possible only if there are enough hugetlb pages free that can be transferred
60back to regular memory pool). 62back to regular memory pool).
61 63
@@ -67,43 +69,82 @@ use either the mmap system call or shared memory system calls to start using
67the huge pages. It is required that the system administrator preallocate 69the huge pages. It is required that the system administrator preallocate
68enough memory for huge page purposes. 70enough memory for huge page purposes.
69 71
70Use the following command to dynamically allocate/deallocate hugepages: 72The administrator can preallocate huge pages on the kernel boot command line by
73specifying the "hugepages=N" parameter, where 'N' = the number of huge pages
74requested. This is the most reliable method for preallocating huge pages as
75memory has not yet become fragmented.
76
77Some platforms support multiple huge page sizes. To preallocate huge pages
78of a specific size, one must preceed the huge pages boot command parameters
79with a huge page size selection parameter "hugepagesz=<size>". <size> must
80be specified in bytes with optional scale suffix [kKmMgG]. The default huge
81page size may be selected with the "default_hugepagesz=<size>" boot parameter.
82
83/proc/sys/vm/nr_hugepages indicates the current number of configured [default
84size] hugetlb pages in the kernel. Super user can dynamically request more
85(or free some pre-configured) huge pages.
86
87Use the following command to dynamically allocate/deallocate default sized
88huge pages:
71 89
72 echo 20 > /proc/sys/vm/nr_hugepages 90 echo 20 > /proc/sys/vm/nr_hugepages
73 91
74This command will try to configure 20 hugepages in the system. The success 92This command will try to configure 20 default sized huge pages in the system.
75or failure of allocation depends on the amount of physically contiguous 93On a NUMA platform, the kernel will attempt to distribute the huge page pool
76memory that is preset in system at this time. System administrators may want 94over the all on-line nodes. These huge pages, allocated when nr_hugepages
77to put this command in one of the local rc init files. This will enable the 95is increased, are called "persistent huge pages".
78kernel to request huge pages early in the boot process (when the possibility 96
79of getting physical contiguous pages is still very high). In either 97The success or failure of huge page allocation depends on the amount of
80case, administrators will want to verify the number of hugepages actually 98physically contiguous memory that is preset in system at the time of the
81allocated by checking the sysctl or meminfo. 99allocation attempt. If the kernel is unable to allocate huge pages from
82 100some nodes in a NUMA system, it will attempt to make up the difference by
83/proc/sys/vm/nr_overcommit_hugepages indicates how large the pool of 101allocating extra pages on other nodes with sufficient available contiguous
84hugepages can grow, if more hugepages than /proc/sys/vm/nr_hugepages are 102memory, if any.
85requested by applications. echo'ing any non-zero value into this file 103
86indicates that the hugetlb subsystem is allowed to try to obtain 104System administrators may want to put this command in one of the local rc init
87hugepages from the buddy allocator, if the normal pool is exhausted. As 105files. This will enable the kernel to request huge pages early in the boot
88these surplus hugepages go out of use, they are freed back to the buddy 106process when the possibility of getting physical contiguous pages is still
107very high. Administrators can verify the number of huge pages actually
108allocated by checking the sysctl or meminfo. To check the per node
109distribution of huge pages in a NUMA system, use:
110
111 cat /sys/devices/system/node/node*/meminfo | fgrep Huge
112
113/proc/sys/vm/nr_overcommit_hugepages specifies how large the pool of
114huge pages can grow, if more huge pages than /proc/sys/vm/nr_hugepages are
115requested by applications. Writing any non-zero value into this file
116indicates that the hugetlb subsystem is allowed to try to obtain "surplus"
117huge pages from the buddy allocator, when the normal pool is exhausted. As
118these surplus huge pages go out of use, they are freed back to the buddy
89allocator. 119allocator.
90 120
121When increasing the huge page pool size via nr_hugepages, any surplus
122pages will first be promoted to persistent huge pages. Then, additional
123huge pages will be allocated, if necessary and if possible, to fulfill
124the new huge page pool size.
125
126The administrator may shrink the pool of preallocated huge pages for
127the default huge page size by setting the nr_hugepages sysctl to a
128smaller value. The kernel will attempt to balance the freeing of huge pages
129across all on-line nodes. Any free huge pages on the selected nodes will
130be freed back to the buddy allocator.
131
91Caveat: Shrinking the pool via nr_hugepages such that it becomes less 132Caveat: Shrinking the pool via nr_hugepages such that it becomes less
92than the number of hugepages in use will convert the balance to surplus 133than the number of huge pages in use will convert the balance to surplus
93huge pages even if it would exceed the overcommit value. As long as 134huge pages even if it would exceed the overcommit value. As long as
94this condition holds, however, no more surplus huge pages will be 135this condition holds, however, no more surplus huge pages will be
95allowed on the system until one of the two sysctls are increased 136allowed on the system until one of the two sysctls are increased
96sufficiently, or the surplus huge pages go out of use and are freed. 137sufficiently, or the surplus huge pages go out of use and are freed.
97 138
98With support for multiple hugepage pools at run-time available, much of 139With support for multiple huge page pools at run-time available, much of
99the hugepage userspace interface has been duplicated in sysfs. The above 140the huge page userspace interface has been duplicated in sysfs. The above
100information applies to the default hugepage size (which will be 141information applies to the default huge page size which will be
101controlled by the proc interfaces for backwards compatibility). The root 142controlled by the /proc interfaces for backwards compatibility. The root
102hugepage control directory is 143huge page control directory in sysfs is:
103 144
104 /sys/kernel/mm/hugepages 145 /sys/kernel/mm/hugepages
105 146
106For each hugepage size supported by the running kernel, a subdirectory 147For each huge page size supported by the running kernel, a subdirectory
107will exist, of the form 148will exist, of the form
108 149
109 hugepages-${size}kB 150 hugepages-${size}kB
@@ -116,9 +157,9 @@ Inside each of these directories, the same set of files will exist:
116 resv_hugepages 157 resv_hugepages
117 surplus_hugepages 158 surplus_hugepages
118 159
119which function as described above for the default hugepage-sized case. 160which function as described above for the default huge page-sized case.
120 161
121If the user applications are going to request hugepages using mmap system 162If the user applications are going to request huge pages using mmap system
122call, then it is required that system administrator mount a file system of 163call, then it is required that system administrator mount a file system of
123type hugetlbfs: 164type hugetlbfs:
124 165
@@ -127,7 +168,7 @@ type hugetlbfs:
127 none /mnt/huge 168 none /mnt/huge
128 169
129This command mounts a (pseudo) filesystem of type hugetlbfs on the directory 170This command mounts a (pseudo) filesystem of type hugetlbfs on the directory
130/mnt/huge. Any files created on /mnt/huge uses hugepages. The uid and gid 171/mnt/huge. Any files created on /mnt/huge uses huge pages. The uid and gid
131options sets the owner and group of the root of the file system. By default 172options sets the owner and group of the root of the file system. By default
132the uid and gid of the current process are taken. The mode option sets the 173the uid and gid of the current process are taken. The mode option sets the
133mode of root of file system to value & 0777. This value is given in octal. 174mode of root of file system to value & 0777. This value is given in octal.
@@ -146,24 +187,26 @@ Regular chown, chgrp, and chmod commands (with right permissions) could be
146used to change the file attributes on hugetlbfs. 187used to change the file attributes on hugetlbfs.
147 188
148Also, it is important to note that no such mount command is required if the 189Also, it is important to note that no such mount command is required if the
149applications are going to use only shmat/shmget system calls. Users who 190applications are going to use only shmat/shmget system calls or mmap with
150wish to use hugetlb page via shared memory segment should be a member of 191MAP_HUGETLB. Users who wish to use hugetlb page via shared memory segment
151a supplementary group and system admin needs to configure that gid into 192should be a member of a supplementary group and system admin needs to
152/proc/sys/vm/hugetlb_shm_group. It is possible for same or different 193configure that gid into /proc/sys/vm/hugetlb_shm_group. It is possible for
153applications to use any combination of mmaps and shm* calls, though the 194same or different applications to use any combination of mmaps and shm*
154mount of filesystem will be required for using mmap calls. 195calls, though the mount of filesystem will be required for using mmap calls
196without MAP_HUGETLB. For an example of how to use mmap with MAP_HUGETLB see
197map_hugetlb.c.
155 198
156******************************************************************* 199*******************************************************************
157 200
158/* 201/*
159 * Example of using hugepage memory in a user application using Sys V shared 202 * Example of using huge page memory in a user application using Sys V shared
160 * memory system calls. In this example the app is requesting 256MB of 203 * memory system calls. In this example the app is requesting 256MB of
161 * memory that is backed by huge pages. The application uses the flag 204 * memory that is backed by huge pages. The application uses the flag
162 * SHM_HUGETLB in the shmget system call to inform the kernel that it is 205 * SHM_HUGETLB in the shmget system call to inform the kernel that it is
163 * requesting hugepages. 206 * requesting huge pages.
164 * 207 *
165 * For the ia64 architecture, the Linux kernel reserves Region number 4 for 208 * For the ia64 architecture, the Linux kernel reserves Region number 4 for
166 * hugepages. That means the addresses starting with 0x800000... will need 209 * huge pages. That means the addresses starting with 0x800000... will need
167 * to be specified. Specifying a fixed address is not required on ppc64, 210 * to be specified. Specifying a fixed address is not required on ppc64,
168 * i386 or x86_64. 211 * i386 or x86_64.
169 * 212 *
@@ -252,14 +295,14 @@ int main(void)
252******************************************************************* 295*******************************************************************
253 296
254/* 297/*
255 * Example of using hugepage memory in a user application using the mmap 298 * Example of using huge page memory in a user application using the mmap
256 * system call. Before running this application, make sure that the 299 * system call. Before running this application, make sure that the
257 * administrator has mounted the hugetlbfs filesystem (on some directory 300 * administrator has mounted the hugetlbfs filesystem (on some directory
258 * like /mnt) using the command mount -t hugetlbfs nodev /mnt. In this 301 * like /mnt) using the command mount -t hugetlbfs nodev /mnt. In this
259 * example, the app is requesting memory of size 256MB that is backed by 302 * example, the app is requesting memory of size 256MB that is backed by
260 * huge pages. 303 * huge pages.
261 * 304 *
262 * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages. 305 * For ia64 architecture, Linux kernel reserves Region number 4 for huge pages.
263 * That means the addresses starting with 0x800000... will need to be 306 * That means the addresses starting with 0x800000... will need to be
264 * specified. Specifying a fixed address is not required on ppc64, i386 307 * specified. Specifying a fixed address is not required on ppc64, i386
265 * or x86_64. 308 * or x86_64.
diff --git a/Documentation/vm/ksm.txt b/Documentation/vm/ksm.txt
new file mode 100644
index 00000000000..72a22f65960
--- /dev/null
+++ b/Documentation/vm/ksm.txt
@@ -0,0 +1,89 @@
1How to use the Kernel Samepage Merging feature
2----------------------------------------------
3
4KSM is a memory-saving de-duplication feature, enabled by CONFIG_KSM=y,
5added to the Linux kernel in 2.6.32. See mm/ksm.c for its implementation,
6and http://lwn.net/Articles/306704/ and http://lwn.net/Articles/330589/
7
8The KSM daemon ksmd periodically scans those areas of user memory which
9have been registered with it, looking for pages of identical content which
10can be replaced by a single write-protected page (which is automatically
11copied if a process later wants to update its content).
12
13KSM was originally developed for use with KVM (where it was known as
14Kernel Shared Memory), to fit more virtual machines into physical memory,
15by sharing the data common between them. But it can be useful to any
16application which generates many instances of the same data.
17
18KSM only merges anonymous (private) pages, never pagecache (file) pages.
19KSM's merged pages are at present locked into kernel memory for as long
20as they are shared: so cannot be swapped out like the user pages they
21replace (but swapping KSM pages should follow soon in a later release).
22
23KSM only operates on those areas of address space which an application
24has advised to be likely candidates for merging, by using the madvise(2)
25system call: int madvise(addr, length, MADV_MERGEABLE).
26
27The app may call int madvise(addr, length, MADV_UNMERGEABLE) to cancel
28that advice and restore unshared pages: whereupon KSM unmerges whatever
29it merged in that range. Note: this unmerging call may suddenly require
30more memory than is available - possibly failing with EAGAIN, but more
31probably arousing the Out-Of-Memory killer.
32
33If KSM is not configured into the running kernel, madvise MADV_MERGEABLE
34and MADV_UNMERGEABLE simply fail with EINVAL. If the running kernel was
35built with CONFIG_KSM=y, those calls will normally succeed: even if the
36the KSM daemon is not currently running, MADV_MERGEABLE still registers
37the range for whenever the KSM daemon is started; even if the range
38cannot contain any pages which KSM could actually merge; even if
39MADV_UNMERGEABLE is applied to a range which was never MADV_MERGEABLE.
40
41Like other madvise calls, they are intended for use on mapped areas of
42the user address space: they will report ENOMEM if the specified range
43includes unmapped gaps (though working on the intervening mapped areas),
44and might fail with EAGAIN if not enough memory for internal structures.
45
46Applications should be considerate in their use of MADV_MERGEABLE,
47restricting its use to areas likely to benefit. KSM's scans may use
48a lot of processing power, and its kernel-resident pages are a limited
49resource. Some installations will disable KSM for these reasons.
50
51The KSM daemon is controlled by sysfs files in /sys/kernel/mm/ksm/,
52readable by all but writable only by root:
53
54max_kernel_pages - set to maximum number of kernel pages that KSM may use
55 e.g. "echo 2000 > /sys/kernel/mm/ksm/max_kernel_pages"
56 Value 0 imposes no limit on the kernel pages KSM may use;
57 but note that any process using MADV_MERGEABLE can cause
58 KSM to allocate these pages, unswappable until it exits.
59 Default: 2000 (chosen for demonstration purposes)
60
61pages_to_scan - how many present pages to scan before ksmd goes to sleep
62 e.g. "echo 200 > /sys/kernel/mm/ksm/pages_to_scan"
63 Default: 200 (chosen for demonstration purposes)
64
65sleep_millisecs - how many milliseconds ksmd should sleep before next scan
66 e.g. "echo 20 > /sys/kernel/mm/ksm/sleep_millisecs"
67 Default: 20 (chosen for demonstration purposes)
68
69run - set 0 to stop ksmd from running but keep merged pages,
70 set 1 to run ksmd e.g. "echo 1 > /sys/kernel/mm/ksm/run",
71 set 2 to stop ksmd and unmerge all pages currently merged,
72 but leave mergeable areas registered for next run
73 Default: 1 (for immediate use by apps which register)
74
75The effectiveness of KSM and MADV_MERGEABLE is shown in /sys/kernel/mm/ksm/:
76
77pages_shared - how many shared unswappable kernel pages KSM is using
78pages_sharing - how many more sites are sharing them i.e. how much saved
79pages_unshared - how many pages unique but repeatedly checked for merging
80pages_volatile - how many pages changing too fast to be placed in a tree
81full_scans - how many times all mergeable areas have been scanned
82
83A high ratio of pages_sharing to pages_shared indicates good sharing, but
84a high ratio of pages_unshared to pages_sharing indicates wasted effort.
85pages_volatile embraces several different kinds of activity, but a high
86proportion there would also indicate poor use of madvise MADV_MERGEABLE.
87
88Izik Eidus,
89Hugh Dickins, 30 July 2009
diff --git a/Documentation/vm/map_hugetlb.c b/Documentation/vm/map_hugetlb.c
new file mode 100644
index 00000000000..e2bdae37f49
--- /dev/null
+++ b/Documentation/vm/map_hugetlb.c
@@ -0,0 +1,77 @@
1/*
2 * Example of using hugepage memory in a user application using the mmap
3 * system call with MAP_HUGETLB flag. Before running this program make
4 * sure the administrator has allocated enough default sized huge pages
5 * to cover the 256 MB allocation.
6 *
7 * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages.
8 * That means the addresses starting with 0x800000... will need to be
9 * specified. Specifying a fixed address is not required on ppc64, i386
10 * or x86_64.
11 */
12#include <stdlib.h>
13#include <stdio.h>
14#include <unistd.h>
15#include <sys/mman.h>
16#include <fcntl.h>
17
18#define LENGTH (256UL*1024*1024)
19#define PROTECTION (PROT_READ | PROT_WRITE)
20
21#ifndef MAP_HUGETLB
22#define MAP_HUGETLB 0x40
23#endif
24
25/* Only ia64 requires this */
26#ifdef __ia64__
27#define ADDR (void *)(0x8000000000000000UL)
28#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED)
29#else
30#define ADDR (void *)(0x0UL)
31#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB)
32#endif
33
34void check_bytes(char *addr)
35{
36 printf("First hex is %x\n", *((unsigned int *)addr));
37}
38
39void write_bytes(char *addr)
40{
41 unsigned long i;
42
43 for (i = 0; i < LENGTH; i++)
44 *(addr + i) = (char)i;
45}
46
47void read_bytes(char *addr)
48{
49 unsigned long i;
50
51 check_bytes(addr);
52 for (i = 0; i < LENGTH; i++)
53 if (*(addr + i) != (char)i) {
54 printf("Mismatch at %lu\n", i);
55 break;
56 }
57}
58
59int main(void)
60{
61 void *addr;
62
63 addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, 0, 0);
64 if (addr == MAP_FAILED) {
65 perror("mmap");
66 exit(1);
67 }
68
69 printf("Returned address is %p\n", addr);
70 check_bytes(addr);
71 write_bytes(addr);
72 read_bytes(addr);
73
74 munmap(addr, LENGTH);
75
76 return 0;
77}
diff --git a/MAINTAINERS b/MAINTAINERS
index 751a307dc44..d24c8823a8c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -233,6 +233,7 @@ S: Supported
233F: drivers/acpi/ 233F: drivers/acpi/
234F: drivers/pnp/pnpacpi/ 234F: drivers/pnp/pnpacpi/
235F: include/linux/acpi.h 235F: include/linux/acpi.h
236F: include/acpi/
236 237
237ACPI BATTERY DRIVERS 238ACPI BATTERY DRIVERS
238M: Alexey Starikovskiy <astarikovskiy@suse.de> 239M: Alexey Starikovskiy <astarikovskiy@suse.de>
@@ -497,7 +498,7 @@ F: arch/arm/include/asm/floppy.h
497 498
498ARM PORT 499ARM PORT
499M: Russell King <linux@arm.linux.org.uk> 500M: Russell King <linux@arm.linux.org.uk>
500L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 501L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
501W: http://www.arm.linux.org.uk/ 502W: http://www.arm.linux.org.uk/
502S: Maintained 503S: Maintained
503F: arch/arm/ 504F: arch/arm/
@@ -508,36 +509,36 @@ F: drivers/mmc/host/mmci.*
508 509
509ARM/ADI ROADRUNNER MACHINE SUPPORT 510ARM/ADI ROADRUNNER MACHINE SUPPORT
510M: Lennert Buytenhek <kernel@wantstofly.org> 511M: Lennert Buytenhek <kernel@wantstofly.org>
511L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 512L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
512S: Maintained 513S: Maintained
513F: arch/arm/mach-ixp23xx/ 514F: arch/arm/mach-ixp23xx/
514F: arch/arm/mach-ixp23xx/include/mach/ 515F: arch/arm/mach-ixp23xx/include/mach/
515 516
516ARM/ADS SPHERE MACHINE SUPPORT 517ARM/ADS SPHERE MACHINE SUPPORT
517M: Lennert Buytenhek <kernel@wantstofly.org> 518M: Lennert Buytenhek <kernel@wantstofly.org>
518L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 519L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
519S: Maintained 520S: Maintained
520 521
521ARM/AFEB9260 MACHINE SUPPORT 522ARM/AFEB9260 MACHINE SUPPORT
522M: Sergey Lapin <slapin@ossfans.org> 523M: Sergey Lapin <slapin@ossfans.org>
523L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 524L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
524S: Maintained 525S: Maintained
525 526
526ARM/AJECO 1ARM MACHINE SUPPORT 527ARM/AJECO 1ARM MACHINE SUPPORT
527M: Lennert Buytenhek <kernel@wantstofly.org> 528M: Lennert Buytenhek <kernel@wantstofly.org>
528L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 529L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
529S: Maintained 530S: Maintained
530 531
531ARM/ATMEL AT91RM9200 ARM ARCHITECTURE 532ARM/ATMEL AT91RM9200 ARM ARCHITECTURE
532M: Andrew Victor <linux@maxim.org.za> 533M: Andrew Victor <linux@maxim.org.za>
533L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 534L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
534W: http://maxim.org.za/at91_26.html 535W: http://maxim.org.za/at91_26.html
535S: Maintained 536S: Maintained
536 537
537ARM/BCMRING ARM ARCHITECTURE 538ARM/BCMRING ARM ARCHITECTURE
538M: Leo Chen <leochen@broadcom.com> 539M: Leo Chen <leochen@broadcom.com>
539M: Scott Branden <sbranden@broadcom.com> 540M: Scott Branden <sbranden@broadcom.com>
540L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 541L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
541S: Maintained 542S: Maintained
542F: arch/arm/mach-bcmring 543F: arch/arm/mach-bcmring
543 544
@@ -554,25 +555,25 @@ F: drivers/mtd/nand/nand_bcm_umi.h
554ARM/CIRRUS LOGIC EP93XX ARM ARCHITECTURE 555ARM/CIRRUS LOGIC EP93XX ARM ARCHITECTURE
555M: Hartley Sweeten <hsweeten@visionengravers.com> 556M: Hartley Sweeten <hsweeten@visionengravers.com>
556M: Ryan Mallon <ryan@bluewatersys.com> 557M: Ryan Mallon <ryan@bluewatersys.com>
557L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 558L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
558S: Maintained 559S: Maintained
559F: arch/arm/mach-ep93xx/ 560F: arch/arm/mach-ep93xx/
560F: arch/arm/mach-ep93xx/include/mach/ 561F: arch/arm/mach-ep93xx/include/mach/
561 562
562ARM/CIRRUS LOGIC EDB9315A MACHINE SUPPORT 563ARM/CIRRUS LOGIC EDB9315A MACHINE SUPPORT
563M: Lennert Buytenhek <kernel@wantstofly.org> 564M: Lennert Buytenhek <kernel@wantstofly.org>
564L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 565L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
565S: Maintained 566S: Maintained
566 567
567ARM/CLKDEV SUPPORT 568ARM/CLKDEV SUPPORT
568M: Russell King <linux@arm.linux.org.uk> 569M: Russell King <linux@arm.linux.org.uk>
569L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 570L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
570F: arch/arm/common/clkdev.c 571F: arch/arm/common/clkdev.c
571F: arch/arm/include/asm/clkdev.h 572F: arch/arm/include/asm/clkdev.h
572 573
573ARM/COMPULAB CM-X270/EM-X270 and CM-X300 MACHINE SUPPORT 574ARM/COMPULAB CM-X270/EM-X270 and CM-X300 MACHINE SUPPORT
574M: Mike Rapoport <mike@compulab.co.il> 575M: Mike Rapoport <mike@compulab.co.il>
575L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 576L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
576S: Maintained 577S: Maintained
577 578
578ARM/CORGI MACHINE SUPPORT 579ARM/CORGI MACHINE SUPPORT
@@ -581,14 +582,14 @@ S: Maintained
581 582
582ARM/CORTINA SYSTEMS GEMINI ARM ARCHITECTURE 583ARM/CORTINA SYSTEMS GEMINI ARM ARCHITECTURE
583M: Paulius Zaleckas <paulius.zaleckas@teltonika.lt> 584M: Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
584L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 585L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
585T: git git://gitorious.org/linux-gemini/mainline.git 586T: git git://gitorious.org/linux-gemini/mainline.git
586S: Maintained 587S: Maintained
587F: arch/arm/mach-gemini/ 588F: arch/arm/mach-gemini/
588 589
589ARM/EBSA110 MACHINE SUPPORT 590ARM/EBSA110 MACHINE SUPPORT
590M: Russell King <linux@arm.linux.org.uk> 591M: Russell King <linux@arm.linux.org.uk>
591L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 592L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
592W: http://www.arm.linux.org.uk/ 593W: http://www.arm.linux.org.uk/
593S: Maintained 594S: Maintained
594F: arch/arm/mach-ebsa110/ 595F: arch/arm/mach-ebsa110/
@@ -606,13 +607,13 @@ F: arch/arm/mach-pxa/ezx.c
606 607
607ARM/FARADAY FA526 PORT 608ARM/FARADAY FA526 PORT
608M: Paulius Zaleckas <paulius.zaleckas@teltonika.lt> 609M: Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
609L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 610L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
610S: Maintained 611S: Maintained
611F: arch/arm/mm/*-fa* 612F: arch/arm/mm/*-fa*
612 613
613ARM/FOOTBRIDGE ARCHITECTURE 614ARM/FOOTBRIDGE ARCHITECTURE
614M: Russell King <linux@arm.linux.org.uk> 615M: Russell King <linux@arm.linux.org.uk>
615L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 616L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
616W: http://www.arm.linux.org.uk/ 617W: http://www.arm.linux.org.uk/
617S: Maintained 618S: Maintained
618F: arch/arm/include/asm/hardware/dec21285.h 619F: arch/arm/include/asm/hardware/dec21285.h
@@ -620,17 +621,17 @@ F: arch/arm/mach-footbridge/
620 621
621ARM/FREESCALE IMX / MXC ARM ARCHITECTURE 622ARM/FREESCALE IMX / MXC ARM ARCHITECTURE
622M: Sascha Hauer <kernel@pengutronix.de> 623M: Sascha Hauer <kernel@pengutronix.de>
623L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 624L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
624S: Maintained 625S: Maintained
625 626
626ARM/GLOMATION GESBC9312SX MACHINE SUPPORT 627ARM/GLOMATION GESBC9312SX MACHINE SUPPORT
627M: Lennert Buytenhek <kernel@wantstofly.org> 628M: Lennert Buytenhek <kernel@wantstofly.org>
628L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 629L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
629S: Maintained 630S: Maintained
630 631
631ARM/GUMSTIX MACHINE SUPPORT 632ARM/GUMSTIX MACHINE SUPPORT
632M: Steve Sakoman <sakoman@gmail.com> 633M: Steve Sakoman <sakoman@gmail.com>
633L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 634L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
634S: Maintained 635S: Maintained
635 636
636ARM/H4700 (HP IPAQ HX4700) MACHINE SUPPORT 637ARM/H4700 (HP IPAQ HX4700) MACHINE SUPPORT
@@ -650,55 +651,55 @@ F: arch/arm/mach-sa1100/include/mach/jornada720.h
650ARM/INTEL IOP32X ARM ARCHITECTURE 651ARM/INTEL IOP32X ARM ARCHITECTURE
651M: Lennert Buytenhek <kernel@wantstofly.org> 652M: Lennert Buytenhek <kernel@wantstofly.org>
652M: Dan Williams <dan.j.williams@intel.com> 653M: Dan Williams <dan.j.williams@intel.com>
653L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 654L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
654S: Supported 655S: Supported
655 656
656ARM/INTEL IOP33X ARM ARCHITECTURE 657ARM/INTEL IOP33X ARM ARCHITECTURE
657M: Dan Williams <dan.j.williams@intel.com> 658M: Dan Williams <dan.j.williams@intel.com>
658L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 659L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
659S: Supported 660S: Supported
660 661
661ARM/INTEL IOP13XX ARM ARCHITECTURE 662ARM/INTEL IOP13XX ARM ARCHITECTURE
662M: Lennert Buytenhek <kernel@wantstofly.org> 663M: Lennert Buytenhek <kernel@wantstofly.org>
663M: Dan Williams <dan.j.williams@intel.com> 664M: Dan Williams <dan.j.williams@intel.com>
664L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 665L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
665S: Supported 666S: Supported
666 667
667ARM/INTEL IQ81342EX MACHINE SUPPORT 668ARM/INTEL IQ81342EX MACHINE SUPPORT
668M: Lennert Buytenhek <kernel@wantstofly.org> 669M: Lennert Buytenhek <kernel@wantstofly.org>
669M: Dan Williams <dan.j.williams@intel.com> 670M: Dan Williams <dan.j.williams@intel.com>
670L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 671L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
671S: Supported 672S: Supported
672 673
673ARM/INTEL IXP2000 ARM ARCHITECTURE 674ARM/INTEL IXP2000 ARM ARCHITECTURE
674M: Lennert Buytenhek <kernel@wantstofly.org> 675M: Lennert Buytenhek <kernel@wantstofly.org>
675L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 676L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
676S: Maintained 677S: Maintained
677 678
678ARM/INTEL IXDP2850 MACHINE SUPPORT 679ARM/INTEL IXDP2850 MACHINE SUPPORT
679M: Lennert Buytenhek <kernel@wantstofly.org> 680M: Lennert Buytenhek <kernel@wantstofly.org>
680L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 681L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
681S: Maintained 682S: Maintained
682 683
683ARM/INTEL IXP23XX ARM ARCHITECTURE 684ARM/INTEL IXP23XX ARM ARCHITECTURE
684M: Lennert Buytenhek <kernel@wantstofly.org> 685M: Lennert Buytenhek <kernel@wantstofly.org>
685L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 686L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
686S: Maintained 687S: Maintained
687 688
688ARM/INTEL XSC3 (MANZANO) ARM CORE 689ARM/INTEL XSC3 (MANZANO) ARM CORE
689M: Lennert Buytenhek <kernel@wantstofly.org> 690M: Lennert Buytenhek <kernel@wantstofly.org>
690M: Dan Williams <dan.j.williams@intel.com> 691M: Dan Williams <dan.j.williams@intel.com>
691L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 692L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
692S: Supported 693S: Supported
693 694
694ARM/IP FABRICS DOUBLE ESPRESSO MACHINE SUPPORT 695ARM/IP FABRICS DOUBLE ESPRESSO MACHINE SUPPORT
695M: Lennert Buytenhek <kernel@wantstofly.org> 696M: Lennert Buytenhek <kernel@wantstofly.org>
696L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 697L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
697S: Maintained 698S: Maintained
698 699
699ARM/LOGICPD PXA270 MACHINE SUPPORT 700ARM/LOGICPD PXA270 MACHINE SUPPORT
700M: Lennert Buytenhek <kernel@wantstofly.org> 701M: Lennert Buytenhek <kernel@wantstofly.org>
701L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 702L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
702S: Maintained 703S: Maintained
703 704
704ARM/MAGICIAN MACHINE SUPPORT 705ARM/MAGICIAN MACHINE SUPPORT
@@ -708,7 +709,7 @@ S: Maintained
708ARM/Marvell Loki/Kirkwood/MV78xx0/Orion SOC support 709ARM/Marvell Loki/Kirkwood/MV78xx0/Orion SOC support
709M: Lennert Buytenhek <buytenh@marvell.com> 710M: Lennert Buytenhek <buytenh@marvell.com>
710M: Nicolas Pitre <nico@marvell.com> 711M: Nicolas Pitre <nico@marvell.com>
711L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 712L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
712T: git git://git.marvell.com/orion 713T: git git://git.marvell.com/orion
713S: Maintained 714S: Maintained
714F: arch/arm/mach-loki/ 715F: arch/arm/mach-loki/
@@ -719,7 +720,7 @@ F: arch/arm/plat-orion/
719 720
720ARM/MIOA701 MACHINE SUPPORT 721ARM/MIOA701 MACHINE SUPPORT
721M: Robert Jarzmik <robert.jarzmik@free.fr> 722M: Robert Jarzmik <robert.jarzmik@free.fr>
722L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 723L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
723F: arch/arm/mach-pxa/mioa701.c 724F: arch/arm/mach-pxa/mioa701.c
724S: Maintained 725S: Maintained
725 726
@@ -760,18 +761,18 @@ S: Maintained
760 761
761ARM/PT DIGITAL BOARD PORT 762ARM/PT DIGITAL BOARD PORT
762M: Stefan Eletzhofer <stefan.eletzhofer@eletztrick.de> 763M: Stefan Eletzhofer <stefan.eletzhofer@eletztrick.de>
763L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 764L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
764W: http://www.arm.linux.org.uk/ 765W: http://www.arm.linux.org.uk/
765S: Maintained 766S: Maintained
766 767
767ARM/RADISYS ENP2611 MACHINE SUPPORT 768ARM/RADISYS ENP2611 MACHINE SUPPORT
768M: Lennert Buytenhek <kernel@wantstofly.org> 769M: Lennert Buytenhek <kernel@wantstofly.org>
769L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 770L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
770S: Maintained 771S: Maintained
771 772
772ARM/RISCPC ARCHITECTURE 773ARM/RISCPC ARCHITECTURE
773M: Russell King <linux@arm.linux.org.uk> 774M: Russell King <linux@arm.linux.org.uk>
774L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 775L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
775W: http://www.arm.linux.org.uk/ 776W: http://www.arm.linux.org.uk/
776S: Maintained 777S: Maintained
777F: arch/arm/common/time-acorn.c 778F: arch/arm/common/time-acorn.c
@@ -790,7 +791,7 @@ S: Maintained
790 791
791ARM/SAMSUNG ARM ARCHITECTURES 792ARM/SAMSUNG ARM ARCHITECTURES
792M: Ben Dooks <ben-linux@fluff.org> 793M: Ben Dooks <ben-linux@fluff.org>
793L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 794L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
794W: http://www.fluff.org/ben/linux/ 795W: http://www.fluff.org/ben/linux/
795S: Maintained 796S: Maintained
796F: arch/arm/plat-s3c/ 797F: arch/arm/plat-s3c/
@@ -798,65 +799,65 @@ F: arch/arm/plat-s3c24xx/
798 799
799ARM/S3C2410 ARM ARCHITECTURE 800ARM/S3C2410 ARM ARCHITECTURE
800M: Ben Dooks <ben-linux@fluff.org> 801M: Ben Dooks <ben-linux@fluff.org>
801L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 802L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
802W: http://www.fluff.org/ben/linux/ 803W: http://www.fluff.org/ben/linux/
803S: Maintained 804S: Maintained
804F: arch/arm/mach-s3c2410/ 805F: arch/arm/mach-s3c2410/
805 806
806ARM/S3C2440 ARM ARCHITECTURE 807ARM/S3C2440 ARM ARCHITECTURE
807M: Ben Dooks <ben-linux@fluff.org> 808M: Ben Dooks <ben-linux@fluff.org>
808L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 809L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
809W: http://www.fluff.org/ben/linux/ 810W: http://www.fluff.org/ben/linux/
810S: Maintained 811S: Maintained
811F: arch/arm/mach-s3c2440/ 812F: arch/arm/mach-s3c2440/
812 813
813ARM/S3C2442 ARM ARCHITECTURE 814ARM/S3C2442 ARM ARCHITECTURE
814M: Ben Dooks <ben-linux@fluff.org> 815M: Ben Dooks <ben-linux@fluff.org>
815L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 816L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
816W: http://www.fluff.org/ben/linux/ 817W: http://www.fluff.org/ben/linux/
817S: Maintained 818S: Maintained
818F: arch/arm/mach-s3c2442/ 819F: arch/arm/mach-s3c2442/
819 820
820ARM/S3C2443 ARM ARCHITECTURE 821ARM/S3C2443 ARM ARCHITECTURE
821M: Ben Dooks <ben-linux@fluff.org> 822M: Ben Dooks <ben-linux@fluff.org>
822L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 823L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
823W: http://www.fluff.org/ben/linux/ 824W: http://www.fluff.org/ben/linux/
824S: Maintained 825S: Maintained
825F: arch/arm/mach-s3c2443/ 826F: arch/arm/mach-s3c2443/
826 827
827ARM/S3C6400 ARM ARCHITECTURE 828ARM/S3C6400 ARM ARCHITECTURE
828M: Ben Dooks <ben-linux@fluff.org> 829M: Ben Dooks <ben-linux@fluff.org>
829L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 830L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
830W: http://www.fluff.org/ben/linux/ 831W: http://www.fluff.org/ben/linux/
831S: Maintained 832S: Maintained
832F: arch/arm/mach-s3c6400/ 833F: arch/arm/mach-s3c6400/
833 834
834ARM/S3C6410 ARM ARCHITECTURE 835ARM/S3C6410 ARM ARCHITECTURE
835M: Ben Dooks <ben-linux@fluff.org> 836M: Ben Dooks <ben-linux@fluff.org>
836L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 837L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
837W: http://www.fluff.org/ben/linux/ 838W: http://www.fluff.org/ben/linux/
838S: Maintained 839S: Maintained
839F: arch/arm/mach-s3c6410/ 840F: arch/arm/mach-s3c6410/
840 841
841ARM/TECHNOLOGIC SYSTEMS TS7250 MACHINE SUPPORT 842ARM/TECHNOLOGIC SYSTEMS TS7250 MACHINE SUPPORT
842M: Lennert Buytenhek <kernel@wantstofly.org> 843M: Lennert Buytenhek <kernel@wantstofly.org>
843L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 844L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
844S: Maintained 845S: Maintained
845 846
846ARM/THECUS N2100 MACHINE SUPPORT 847ARM/THECUS N2100 MACHINE SUPPORT
847M: Lennert Buytenhek <kernel@wantstofly.org> 848M: Lennert Buytenhek <kernel@wantstofly.org>
848L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 849L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
849S: Maintained 850S: Maintained
850 851
851ARM/NUVOTON W90X900 ARM ARCHITECTURE 852ARM/NUVOTON W90X900 ARM ARCHITECTURE
852M: Wan ZongShun <mcuos.com@gmail.com> 853M: Wan ZongShun <mcuos.com@gmail.com>
853L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 854L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
854W: http://www.mcuos.com 855W: http://www.mcuos.com
855S: Maintained 856S: Maintained
856 857
857ARM/VFP SUPPORT 858ARM/VFP SUPPORT
858M: Russell King <linux@arm.linux.org.uk> 859M: Russell King <linux@arm.linux.org.uk>
859L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 860L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
860W: http://www.arm.linux.org.uk/ 861W: http://www.arm.linux.org.uk/
861S: Maintained 862S: Maintained
862F: arch/arm/vfp/ 863F: arch/arm/vfp/
@@ -963,7 +964,7 @@ F: include/linux/atm*
963 964
964ATMEL AT91 MCI DRIVER 965ATMEL AT91 MCI DRIVER
965M: Nicolas Ferre <nicolas.ferre@atmel.com> 966M: Nicolas Ferre <nicolas.ferre@atmel.com>
966L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 967L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
967W: http://www.atmel.com/products/AT91/ 968W: http://www.atmel.com/products/AT91/
968W: http://www.at91.com/ 969W: http://www.at91.com/
969S: Maintained 970S: Maintained
@@ -1541,7 +1542,7 @@ F: drivers/infiniband/hw/cxgb3/
1541 1542
1542CYBERPRO FB DRIVER 1543CYBERPRO FB DRIVER
1543M: Russell King <linux@arm.linux.org.uk> 1544M: Russell King <linux@arm.linux.org.uk>
1544L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 1545L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
1545W: http://www.arm.linux.org.uk/ 1546W: http://www.arm.linux.org.uk/
1546S: Maintained 1547S: Maintained
1547F: drivers/video/cyber2000fb.* 1548F: drivers/video/cyber2000fb.*
@@ -2085,7 +2086,7 @@ F: drivers/i2c/busses/i2c-cpm.c
2085FREESCALE IMX / MXC FRAMEBUFFER DRIVER 2086FREESCALE IMX / MXC FRAMEBUFFER DRIVER
2086M: Sascha Hauer <kernel@pengutronix.de> 2087M: Sascha Hauer <kernel@pengutronix.de>
2087L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers) 2088L: linux-fbdev-devel@lists.sourceforge.net (moderated for non-subscribers)
2088L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 2089L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
2089S: Maintained 2090S: Maintained
2090F: arch/arm/plat-mxc/include/mach/imxfb.h 2091F: arch/arm/plat-mxc/include/mach/imxfb.h
2091F: drivers/video/imxfb.c 2092F: drivers/video/imxfb.c
@@ -2803,6 +2804,8 @@ L: netdev@vger.kernel.org
2803L: lvs-devel@vger.kernel.org 2804L: lvs-devel@vger.kernel.org
2804S: Maintained 2805S: Maintained
2805F: Documentation/networking/ipvs-sysctl.txt 2806F: Documentation/networking/ipvs-sysctl.txt
2807F: include/net/ip_vs.h
2808F: include/linux/ip_vs.h
2806F: net/netfilter/ipvs/ 2809F: net/netfilter/ipvs/
2807 2810
2808IPWIRELESS DRIVER 2811IPWIRELESS DRIVER
@@ -2955,7 +2958,7 @@ F: scripts/Makefile.*
2955KERNEL JANITORS 2958KERNEL JANITORS
2956L: kernel-janitors@vger.kernel.org 2959L: kernel-janitors@vger.kernel.org
2957W: http://www.kerneljanitors.org/ 2960W: http://www.kerneljanitors.org/
2958S: Odd fixes 2961S: Maintained
2959 2962
2960KERNEL NFSD, SUNRPC, AND LOCKD SERVERS 2963KERNEL NFSD, SUNRPC, AND LOCKD SERVERS
2961M: "J. Bruce Fields" <bfields@fieldses.org> 2964M: "J. Bruce Fields" <bfields@fieldses.org>
@@ -3449,7 +3452,7 @@ F: include/linux/meye.h
3449 3452
3450MOTOROLA IMX MMC/SD HOST CONTROLLER INTERFACE DRIVER 3453MOTOROLA IMX MMC/SD HOST CONTROLLER INTERFACE DRIVER
3451M: Pavel Pisa <ppisa@pikron.com> 3454M: Pavel Pisa <ppisa@pikron.com>
3452L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 3455L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
3453S: Maintained 3456S: Maintained
3454F: drivers/mmc/host/imxmmc.* 3457F: drivers/mmc/host/imxmmc.*
3455 3458
@@ -3734,7 +3737,7 @@ W: http://www.muru.com/linux/omap/
3734W: http://linux.omap.com/ 3737W: http://linux.omap.com/
3735T: git git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap-2.6.git 3738T: git git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap-2.6.git
3736S: Maintained 3739S: Maintained
3737F: arch/arm/*omap* 3740F: arch/arm/*omap*/
3738 3741
3739OMAP CLOCK FRAMEWORK SUPPORT 3742OMAP CLOCK FRAMEWORK SUPPORT
3740M: Paul Walmsley <paul@pwsan.com> 3743M: Paul Walmsley <paul@pwsan.com>
@@ -4025,8 +4028,7 @@ F: drivers/block/pktcdvd.c
4025F: include/linux/pktcdvd.h 4028F: include/linux/pktcdvd.h
4026 4029
4027PMC SIERRA MaxRAID DRIVER 4030PMC SIERRA MaxRAID DRIVER
4028P: Anil Ravindranath 4031M: Anil Ravindranath <anil_ravindranath@pmc-sierra.com>
4029M: anil_ravindranath@pmc-sierra.com
4030L: linux-scsi@vger.kernel.org 4032L: linux-scsi@vger.kernel.org
4031W: http://www.pmc-sierra.com/ 4033W: http://www.pmc-sierra.com/
4032S: Supported 4034S: Supported
@@ -4168,7 +4170,7 @@ F: drivers/media/video/pvrusb2/
4168PXA2xx/PXA3xx SUPPORT 4170PXA2xx/PXA3xx SUPPORT
4169M: Eric Miao <eric.y.miao@gmail.com> 4171M: Eric Miao <eric.y.miao@gmail.com>
4170M: Russell King <linux@arm.linux.org.uk> 4172M: Russell King <linux@arm.linux.org.uk>
4171L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 4173L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
4172S: Maintained 4174S: Maintained
4173F: arch/arm/mach-pxa/ 4175F: arch/arm/mach-pxa/
4174F: drivers/pcmcia/pxa2xx* 4176F: drivers/pcmcia/pxa2xx*
@@ -4181,13 +4183,13 @@ F: sound/soc/pxa
4181PXA168 SUPPORT 4183PXA168 SUPPORT
4182M: Eric Miao <eric.y.miao@gmail.com> 4184M: Eric Miao <eric.y.miao@gmail.com>
4183M: Jason Chagas <jason.chagas@marvell.com> 4185M: Jason Chagas <jason.chagas@marvell.com>
4184L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 4186L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
4185T: git git://git.kernel.org/pub/scm/linux/kernel/git/ycmiao/pxa-linux-2.6.git 4187T: git git://git.kernel.org/pub/scm/linux/kernel/git/ycmiao/pxa-linux-2.6.git
4186S: Maintained 4188S: Maintained
4187 4189
4188PXA910 SUPPORT 4190PXA910 SUPPORT
4189M: Eric Miao <eric.y.miao@gmail.com> 4191M: Eric Miao <eric.y.miao@gmail.com>
4190L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 4192L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
4191T: git git://git.kernel.org/pub/scm/linux/kernel/git/ycmiao/pxa-linux-2.6.git 4193T: git git://git.kernel.org/pub/scm/linux/kernel/git/ycmiao/pxa-linux-2.6.git
4192S: Maintained 4194S: Maintained
4193 4195
@@ -4428,7 +4430,7 @@ F: net/iucv/
4428 4430
4429S3C24XX SD/MMC Driver 4431S3C24XX SD/MMC Driver
4430M: Ben Dooks <ben-linux@fluff.org> 4432M: Ben Dooks <ben-linux@fluff.org>
4431L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 4433L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
4432S: Supported 4434S: Supported
4433F: drivers/mmc/host/s3cmci.* 4435F: drivers/mmc/host/s3cmci.*
4434 4436
@@ -4533,20 +4535,20 @@ S: Maintained
4533F: drivers/mmc/host/sdricoh_cs.c 4535F: drivers/mmc/host/sdricoh_cs.c
4534 4536
4535SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) DRIVER 4537SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) DRIVER
4536M: Pierre Ossman <pierre@ossman.eu> 4538S: Orphan
4537L: sdhci-devel@lists.ossman.eu 4539L: linux-mmc@vger.kernel.org
4538S: Maintained 4540F: drivers/mmc/host/sdhci.*
4539 4541
4540SECURE DIGITAL HOST CONTROLLER INTERFACE, OPEN FIRMWARE BINDINGS (SDHCI-OF) 4542SECURE DIGITAL HOST CONTROLLER INTERFACE, OPEN FIRMWARE BINDINGS (SDHCI-OF)
4541M: Anton Vorontsov <avorontsov@ru.mvista.com> 4543M: Anton Vorontsov <avorontsov@ru.mvista.com>
4542L: linuxppc-dev@ozlabs.org 4544L: linuxppc-dev@ozlabs.org
4543L: sdhci-devel@lists.ossman.eu 4545L: linux-mmc@vger.kernel.org
4544S: Maintained 4546S: Maintained
4545F: drivers/mmc/host/sdhci.* 4547F: drivers/mmc/host/sdhci-of.*
4546 4548
4547SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) SAMSUNG DRIVER 4549SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) SAMSUNG DRIVER
4548M: Ben Dooks <ben-linux@fluff.org> 4550M: Ben Dooks <ben-linux@fluff.org>
4549L: sdhci-devel@lists.ossman.eu 4551L: linux-mmc@vger.kernel.org
4550S: Maintained 4552S: Maintained
4551F: drivers/mmc/host/sdhci-s3c.c 4553F: drivers/mmc/host/sdhci-s3c.c
4552 4554
@@ -4632,7 +4634,7 @@ F: drivers/misc/sgi-xp/
4632SHARP LH SUPPORT (LH7952X & LH7A40X) 4634SHARP LH SUPPORT (LH7952X & LH7A40X)
4633M: Marc Singer <elf@buici.com> 4635M: Marc Singer <elf@buici.com>
4634W: http://projects.buici.com/arm 4636W: http://projects.buici.com/arm
4635L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) 4637L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
4636S: Maintained 4638S: Maintained
4637F: Documentation/arm/Sharp-LH/ADC-LH7-Touchscreen 4639F: Documentation/arm/Sharp-LH/ADC-LH7-Touchscreen
4638F: arch/arm/mach-lh7a40x/ 4640F: arch/arm/mach-lh7a40x/
@@ -5638,6 +5640,12 @@ L: linux-scsi@vger.kernel.org
5638S: Maintained 5640S: Maintained
5639F: drivers/scsi/wd7000.c 5641F: drivers/scsi/wd7000.c
5640 5642
5643WINBOND CIR DRIVER
5644P: David Härdeman
5645M: david@hardeman.nu
5646S: Maintained
5647F: drivers/input/misc/winbond-cir.c
5648
5641WIMAX STACK 5649WIMAX STACK
5642M: Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com> 5650M: Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
5643M: linux-wimax@intel.com 5651M: linux-wimax@intel.com
@@ -5657,8 +5665,7 @@ S: Maintained
5657F: drivers/input/misc/wistron_btns.c 5665F: drivers/input/misc/wistron_btns.c
5658 5666
5659WL1251 WIRELESS DRIVER 5667WL1251 WIRELESS DRIVER
5660P: Kalle Valo 5668M: Kalle Valo <kalle.valo@nokia.com>
5661M: kalle.valo@nokia.com
5662L: linux-wireless@vger.kernel.org 5669L: linux-wireless@vger.kernel.org
5663W: http://wireless.kernel.org 5670W: http://wireless.kernel.org
5664T: git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git 5671T: git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 9fb8aae5c39..443448154f3 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -45,6 +45,14 @@ config GENERIC_CALIBRATE_DELAY
45 bool 45 bool
46 default y 46 default y
47 47
48config GENERIC_TIME
49 bool
50 default y
51
52config ARCH_USES_GETTIMEOFFSET
53 bool
54 default y
55
48config ZONE_DMA 56config ZONE_DMA
49 bool 57 bool
50 default y 58 default y
diff --git a/arch/alpha/boot/tools/objstrip.c b/arch/alpha/boot/tools/objstrip.c
index ef183823029..9d0727d18ae 100644
--- a/arch/alpha/boot/tools/objstrip.c
+++ b/arch/alpha/boot/tools/objstrip.c
@@ -93,7 +93,7 @@ main (int argc, char *argv[])
93 ofd = 1; 93 ofd = 1;
94 if (i < argc) { 94 if (i < argc) {
95 ofd = open(argv[i++], O_WRONLY | O_CREAT | O_TRUNC, 0666); 95 ofd = open(argv[i++], O_WRONLY | O_CREAT | O_TRUNC, 0666);
96 if (fd == -1) { 96 if (ofd == -1) {
97 perror("open"); 97 perror("open");
98 exit(1); 98 exit(1);
99 } 99 }
diff --git a/arch/alpha/include/asm/hardirq.h b/arch/alpha/include/asm/hardirq.h
index 88971460fa6..242c09ba98c 100644
--- a/arch/alpha/include/asm/hardirq.h
+++ b/arch/alpha/include/asm/hardirq.h
@@ -1,17 +1,9 @@
1#ifndef _ALPHA_HARDIRQ_H 1#ifndef _ALPHA_HARDIRQ_H
2#define _ALPHA_HARDIRQ_H 2#define _ALPHA_HARDIRQ_H
3 3
4#include <linux/threads.h>
5#include <linux/cache.h>
6
7
8/* entry.S is sensitive to the offsets of these fields */
9typedef struct {
10 unsigned long __softirq_pending;
11} ____cacheline_aligned irq_cpustat_t;
12
13#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
14
15void ack_bad_irq(unsigned int irq); 4void ack_bad_irq(unsigned int irq);
5#define ack_bad_irq ack_bad_irq
6
7#include <asm-generic/hardirq.h>
16 8
17#endif /* _ALPHA_HARDIRQ_H */ 9#endif /* _ALPHA_HARDIRQ_H */
diff --git a/arch/alpha/include/asm/mman.h b/arch/alpha/include/asm/mman.h
index 90d7c35d286..99c56d47879 100644
--- a/arch/alpha/include/asm/mman.h
+++ b/arch/alpha/include/asm/mman.h
@@ -28,6 +28,8 @@
28#define MAP_NORESERVE 0x10000 /* don't check for reservations */ 28#define MAP_NORESERVE 0x10000 /* don't check for reservations */
29#define MAP_POPULATE 0x20000 /* populate (prefault) pagetables */ 29#define MAP_POPULATE 0x20000 /* populate (prefault) pagetables */
30#define MAP_NONBLOCK 0x40000 /* do not block on IO */ 30#define MAP_NONBLOCK 0x40000 /* do not block on IO */
31#define MAP_STACK 0x80000 /* give out an address that is best suited for process/thread stacks */
32#define MAP_HUGETLB 0x100000 /* create a huge page mapping */
31 33
32#define MS_ASYNC 1 /* sync memory asynchronously */ 34#define MS_ASYNC 1 /* sync memory asynchronously */
33#define MS_SYNC 2 /* synchronous memory sync */ 35#define MS_SYNC 2 /* synchronous memory sync */
@@ -48,6 +50,9 @@
48#define MADV_DONTFORK 10 /* don't inherit across fork */ 50#define MADV_DONTFORK 10 /* don't inherit across fork */
49#define MADV_DOFORK 11 /* do inherit across fork */ 51#define MADV_DOFORK 11 /* do inherit across fork */
50 52
53#define MADV_MERGEABLE 12 /* KSM may merge identical pages */
54#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */
55
51/* compatibility flags */ 56/* compatibility flags */
52#define MAP_FILE 0 57#define MAP_FILE 0
53 58
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index bfb880af959..d15aedfe606 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -268,11 +268,7 @@ pci_map_single_1(struct pci_dev *pdev, void *cpu_addr, size_t size,
268 assume it doesn't support sg mapping, and, since we tried to 268 assume it doesn't support sg mapping, and, since we tried to
269 use direct_map above, it now must be considered an error. */ 269 use direct_map above, it now must be considered an error. */
270 if (! alpha_mv.mv_pci_tbi) { 270 if (! alpha_mv.mv_pci_tbi) {
271 static int been_here = 0; /* Only print the message once. */ 271 printk_once(KERN_WARNING "pci_map_single: no HW sg\n");
272 if (!been_here) {
273 printk(KERN_WARNING "pci_map_single: no HW sg\n");
274 been_here = 1;
275 }
276 return 0; 272 return 0;
277 } 273 }
278 274
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index b04e2cbf23a..5d0826654c6 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -408,28 +408,17 @@ time_init(void)
408 * part. So we can't do the "find absolute time in terms of cycles" thing 408 * part. So we can't do the "find absolute time in terms of cycles" thing
409 * that the other ports do. 409 * that the other ports do.
410 */ 410 */
411void 411u32 arch_gettimeoffset(void)
412do_gettimeofday(struct timeval *tv)
413{ 412{
414 unsigned long flags;
415 unsigned long sec, usec, seq;
416 unsigned long delta_cycles, delta_usec, partial_tick;
417
418 do {
419 seq = read_seqbegin_irqsave(&xtime_lock, flags);
420
421 delta_cycles = rpcc() - state.last_time;
422 sec = xtime.tv_sec;
423 usec = (xtime.tv_nsec / 1000);
424 partial_tick = state.partial_tick;
425
426 } while (read_seqretry_irqrestore(&xtime_lock, seq, flags));
427
428#ifdef CONFIG_SMP 413#ifdef CONFIG_SMP
429 /* Until and unless we figure out how to get cpu cycle counters 414 /* Until and unless we figure out how to get cpu cycle counters
430 in sync and keep them there, we can't use the rpcc tricks. */ 415 in sync and keep them there, we can't use the rpcc tricks. */
431 delta_usec = 0; 416 return 0;
432#else 417#else
418 unsigned long delta_cycles, delta_usec, partial_tick;
419
420 delta_cycles = rpcc() - state.last_time;
421 partial_tick = state.partial_tick;
433 /* 422 /*
434 * usec = cycles * ticks_per_cycle * 2**48 * 1e6 / (2**48 * ticks) 423 * usec = cycles * ticks_per_cycle * 2**48 * 1e6 / (2**48 * ticks)
435 * = cycles * (s_t_p_c) * 1e6 / (2**48 * ticks) 424 * = cycles * (s_t_p_c) * 1e6 / (2**48 * ticks)
@@ -446,64 +435,10 @@ do_gettimeofday(struct timeval *tv)
446 delta_usec = (delta_cycles * state.scaled_ticks_per_cycle 435 delta_usec = (delta_cycles * state.scaled_ticks_per_cycle
447 + partial_tick) * 15625; 436 + partial_tick) * 15625;
448 delta_usec = ((delta_usec / ((1UL << (FIX_SHIFT-6-1)) * HZ)) + 1) / 2; 437 delta_usec = ((delta_usec / ((1UL << (FIX_SHIFT-6-1)) * HZ)) + 1) / 2;
438 return delta_usec * 1000;
449#endif 439#endif
450
451 usec += delta_usec;
452 if (usec >= 1000000) {
453 sec += 1;
454 usec -= 1000000;
455 }
456
457 tv->tv_sec = sec;
458 tv->tv_usec = usec;
459} 440}
460 441
461EXPORT_SYMBOL(do_gettimeofday);
462
463int
464do_settimeofday(struct timespec *tv)
465{
466 time_t wtm_sec, sec = tv->tv_sec;
467 long wtm_nsec, nsec = tv->tv_nsec;
468 unsigned long delta_nsec;
469
470 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
471 return -EINVAL;
472
473 write_seqlock_irq(&xtime_lock);
474
475 /* The offset that is added into time in do_gettimeofday above
476 must be subtracted out here to keep a coherent view of the
477 time. Without this, a full-tick error is possible. */
478
479#ifdef CONFIG_SMP
480 delta_nsec = 0;
481#else
482 delta_nsec = rpcc() - state.last_time;
483 delta_nsec = (delta_nsec * state.scaled_ticks_per_cycle
484 + state.partial_tick) * 15625;
485 delta_nsec = ((delta_nsec / ((1UL << (FIX_SHIFT-6-1)) * HZ)) + 1) / 2;
486 delta_nsec *= 1000;
487#endif
488
489 nsec -= delta_nsec;
490
491 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
492 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
493
494 set_normalized_timespec(&xtime, sec, nsec);
495 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
496
497 ntp_clear();
498
499 write_sequnlock_irq(&xtime_lock);
500 clock_was_set();
501 return 0;
502}
503
504EXPORT_SYMBOL(do_settimeofday);
505
506
507/* 442/*
508 * In order to set the CMOS clock precisely, set_rtc_mmss has to be 443 * In order to set the CMOS clock precisely, set_rtc_mmss has to be
509 * called 500 ms after the second nowtime has started, because when 444 * called 500 ms after the second nowtime has started, because when
diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index af71d38c8e4..a0902c20d67 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -299,7 +299,7 @@ printk_memory_info(void)
299 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; 299 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
300 300
301 printk("Memory: %luk/%luk available (%luk kernel code, %luk reserved, %luk data, %luk init)\n", 301 printk("Memory: %luk/%luk available (%luk kernel code, %luk reserved, %luk data, %luk init)\n",
302 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), 302 nr_free_pages() << (PAGE_SHIFT-10),
303 max_mapnr << (PAGE_SHIFT-10), 303 max_mapnr << (PAGE_SHIFT-10),
304 codesize >> 10, 304 codesize >> 10,
305 reservedpages << (PAGE_SHIFT-10), 305 reservedpages << (PAGE_SHIFT-10),
diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
index 0eab5574942..10b403554b6 100644
--- a/arch/alpha/mm/numa.c
+++ b/arch/alpha/mm/numa.c
@@ -349,7 +349,7 @@ void __init mem_init(void)
349 349
350 printk("Memory: %luk/%luk available (%luk kernel code, %luk reserved, " 350 printk("Memory: %luk/%luk available (%luk kernel code, %luk reserved, "
351 "%luk data, %luk init)\n", 351 "%luk data, %luk init)\n",
352 (unsigned long)nr_free_pages() << (PAGE_SHIFT-10), 352 nr_free_pages() << (PAGE_SHIFT-10),
353 num_physpages << (PAGE_SHIFT-10), 353 num_physpages << (PAGE_SHIFT-10),
354 codesize >> 10, 354 codesize >> 10,
355 reservedpages << (PAGE_SHIFT-10), 355 reservedpages << (PAGE_SHIFT-10),
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 7350557a81e..54661125a8b 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -25,7 +25,7 @@ KBUILD_CFLAGS +=$(call cc-option,-marm,)
25# Select a platform tht is kept up-to-date 25# Select a platform tht is kept up-to-date
26KBUILD_DEFCONFIG := versatile_defconfig 26KBUILD_DEFCONFIG := versatile_defconfig
27 27
28# defines filename extension depending memory manement type. 28# defines filename extension depending memory management type.
29ifeq ($(CONFIG_MMU),) 29ifeq ($(CONFIG_MMU),)
30MMUEXT := -nommu 30MMUEXT := -nommu
31endif 31endif
diff --git a/arch/arm/include/asm/mman.h b/arch/arm/include/asm/mman.h
index fc26976d8e3..8eebf89f5ab 100644
--- a/arch/arm/include/asm/mman.h
+++ b/arch/arm/include/asm/mman.h
@@ -1,17 +1 @@
1#ifndef __ARM_MMAN_H__ #include <asm-generic/mman.h>
2#define __ARM_MMAN_H__
3
4#include <asm-generic/mman-common.h>
5
6#define MAP_GROWSDOWN 0x0100 /* stack-like segment */
7#define MAP_DENYWRITE 0x0800 /* ETXTBSY */
8#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */
9#define MAP_LOCKED 0x2000 /* pages are locked */
10#define MAP_NORESERVE 0x4000 /* don't check for reservations */
11#define MAP_POPULATE 0x8000 /* populate (prefault) page tables */
12#define MAP_NONBLOCK 0x10000 /* do not block on IO */
13
14#define MCL_CURRENT 1 /* lock all current mappings */
15#define MCL_FUTURE 2 /* lock all future mappings */
16
17#endif /* __ARM_MMAN_H__ */
diff --git a/arch/arm/mach-at91/board-afeb-9260v1.c b/arch/arm/mach-at91/board-afeb-9260v1.c
index 61e52b66bc7..50667bed7cc 100644
--- a/arch/arm/mach-at91/board-afeb-9260v1.c
+++ b/arch/arm/mach-at91/board-afeb-9260v1.c
@@ -53,7 +53,7 @@ static void __init afeb9260_map_io(void)
53 /* Initialize processor: 18.432 MHz crystal */ 53 /* Initialize processor: 18.432 MHz crystal */
54 at91sam9260_initialize(18432000); 54 at91sam9260_initialize(18432000);
55 55
56 /* DGBU on ttyS0. (Rx & Tx only) */ 56 /* DBGU on ttyS0. (Rx & Tx only) */
57 at91_register_uart(0, 0, 0); 57 at91_register_uart(0, 0, 0);
58 58
59 /* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */ 59 /* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */
diff --git a/arch/arm/mach-at91/board-cam60.c b/arch/arm/mach-at91/board-cam60.c
index d3ba29c5d8c..02138af631e 100644
--- a/arch/arm/mach-at91/board-cam60.c
+++ b/arch/arm/mach-at91/board-cam60.c
@@ -50,7 +50,7 @@ static void __init cam60_map_io(void)
50 /* Initialize processor: 10 MHz crystal */ 50 /* Initialize processor: 10 MHz crystal */
51 at91sam9260_initialize(10000000); 51 at91sam9260_initialize(10000000);
52 52
53 /* DGBU on ttyS0. (Rx & Tx only) */ 53 /* DBGU on ttyS0. (Rx & Tx only) */
54 at91_register_uart(0, 0, 0); 54 at91_register_uart(0, 0, 0);
55 55
56 /* set serial console to ttyS0 (ie, DBGU) */ 56 /* set serial console to ttyS0 (ie, DBGU) */
diff --git a/arch/arm/mach-at91/board-neocore926.c b/arch/arm/mach-at91/board-neocore926.c
index 9ba7ba2cc3b..8c0b71c95be 100644
--- a/arch/arm/mach-at91/board-neocore926.c
+++ b/arch/arm/mach-at91/board-neocore926.c
@@ -56,7 +56,7 @@ static void __init neocore926_map_io(void)
56 /* Initialize processor: 20 MHz crystal */ 56 /* Initialize processor: 20 MHz crystal */
57 at91sam9263_initialize(20000000); 57 at91sam9263_initialize(20000000);
58 58
59 /* DGBU on ttyS0. (Rx & Tx only) */ 59 /* DBGU on ttyS0. (Rx & Tx only) */
60 at91_register_uart(0, 0, 0); 60 at91_register_uart(0, 0, 0);
61 61
62 /* USART0 on ttyS1. (Rx, Tx, RTS, CTS) */ 62 /* USART0 on ttyS1. (Rx, Tx, RTS, CTS) */
diff --git a/arch/arm/mach-at91/board-qil-a9260.c b/arch/arm/mach-at91/board-qil-a9260.c
index 4cff9a7e61d..664938e8f66 100644
--- a/arch/arm/mach-at91/board-qil-a9260.c
+++ b/arch/arm/mach-at91/board-qil-a9260.c
@@ -53,7 +53,7 @@ static void __init ek_map_io(void)
53 /* Initialize processor: 12.000 MHz crystal */ 53 /* Initialize processor: 12.000 MHz crystal */
54 at91sam9260_initialize(12000000); 54 at91sam9260_initialize(12000000);
55 55
56 /* DGBU on ttyS0. (Rx & Tx only) */ 56 /* DBGU on ttyS0. (Rx & Tx only) */
57 at91_register_uart(0, 0, 0); 57 at91_register_uart(0, 0, 0);
58 58
59 /* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */ 59 /* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */
diff --git a/arch/arm/mach-at91/board-sam9260ek.c b/arch/arm/mach-at91/board-sam9260ek.c
index 93a0f8b100e..ba9d501b5c5 100644
--- a/arch/arm/mach-at91/board-sam9260ek.c
+++ b/arch/arm/mach-at91/board-sam9260ek.c
@@ -54,7 +54,7 @@ static void __init ek_map_io(void)
54 /* Initialize processor: 18.432 MHz crystal */ 54 /* Initialize processor: 18.432 MHz crystal */
55 at91sam9260_initialize(18432000); 55 at91sam9260_initialize(18432000);
56 56
57 /* DGBU on ttyS0. (Rx & Tx only) */ 57 /* DBGU on ttyS0. (Rx & Tx only) */
58 at91_register_uart(0, 0, 0); 58 at91_register_uart(0, 0, 0);
59 59
60 /* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */ 60 /* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */
diff --git a/arch/arm/mach-at91/board-sam9261ek.c b/arch/arm/mach-at91/board-sam9261ek.c
index f9b19993a7a..c4c8865d52d 100644
--- a/arch/arm/mach-at91/board-sam9261ek.c
+++ b/arch/arm/mach-at91/board-sam9261ek.c
@@ -61,7 +61,7 @@ static void __init ek_map_io(void)
61 /* Setup the LEDs */ 61 /* Setup the LEDs */
62 at91_init_leds(AT91_PIN_PA13, AT91_PIN_PA14); 62 at91_init_leds(AT91_PIN_PA13, AT91_PIN_PA14);
63 63
64 /* DGBU on ttyS0. (Rx & Tx only) */ 64 /* DBGU on ttyS0. (Rx & Tx only) */
65 at91_register_uart(0, 0, 0); 65 at91_register_uart(0, 0, 0);
66 66
67 /* set serial console to ttyS0 (ie, DBGU) */ 67 /* set serial console to ttyS0 (ie, DBGU) */
diff --git a/arch/arm/mach-at91/board-sam9263ek.c b/arch/arm/mach-at91/board-sam9263ek.c
index 1bf7bd4cbe1..26f1aa6049a 100644
--- a/arch/arm/mach-at91/board-sam9263ek.c
+++ b/arch/arm/mach-at91/board-sam9263ek.c
@@ -57,7 +57,7 @@ static void __init ek_map_io(void)
57 /* Initialize processor: 16.367 MHz crystal */ 57 /* Initialize processor: 16.367 MHz crystal */
58 at91sam9263_initialize(16367660); 58 at91sam9263_initialize(16367660);
59 59
60 /* DGBU on ttyS0. (Rx & Tx only) */ 60 /* DBGU on ttyS0. (Rx & Tx only) */
61 at91_register_uart(0, 0, 0); 61 at91_register_uart(0, 0, 0);
62 62
63 /* USART0 on ttyS1. (Rx, Tx, RTS, CTS) */ 63 /* USART0 on ttyS1. (Rx, Tx, RTS, CTS) */
diff --git a/arch/arm/mach-at91/board-sam9g20ek.c b/arch/arm/mach-at91/board-sam9g20ek.c
index ca470d504ea..29cf8317748 100644
--- a/arch/arm/mach-at91/board-sam9g20ek.c
+++ b/arch/arm/mach-at91/board-sam9g20ek.c
@@ -50,7 +50,7 @@ static void __init ek_map_io(void)
50 /* Initialize processor: 18.432 MHz crystal */ 50 /* Initialize processor: 18.432 MHz crystal */
51 at91sam9260_initialize(18432000); 51 at91sam9260_initialize(18432000);
52 52
53 /* DGBU on ttyS0. (Rx & Tx only) */ 53 /* DBGU on ttyS0. (Rx & Tx only) */
54 at91_register_uart(0, 0, 0); 54 at91_register_uart(0, 0, 0);
55 55
56 /* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */ 56 /* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */
diff --git a/arch/arm/mach-at91/board-sam9rlek.c b/arch/arm/mach-at91/board-sam9rlek.c
index 9d07679efce..94ffb5c103b 100644
--- a/arch/arm/mach-at91/board-sam9rlek.c
+++ b/arch/arm/mach-at91/board-sam9rlek.c
@@ -43,7 +43,7 @@ static void __init ek_map_io(void)
43 /* Initialize processor: 12.000 MHz crystal */ 43 /* Initialize processor: 12.000 MHz crystal */
44 at91sam9rl_initialize(12000000); 44 at91sam9rl_initialize(12000000);
45 45
46 /* DGBU on ttyS0. (Rx & Tx only) */ 46 /* DBGU on ttyS0. (Rx & Tx only) */
47 at91_register_uart(0, 0, 0); 47 at91_register_uart(0, 0, 0);
48 48
49 /* USART0 on ttyS1. (Rx, Tx, CTS, RTS) */ 49 /* USART0 on ttyS1. (Rx, Tx, CTS, RTS) */
diff --git a/arch/arm/mach-at91/board-usb-a9260.c b/arch/arm/mach-at91/board-usb-a9260.c
index d13304c0bc4..905d6ef7680 100644
--- a/arch/arm/mach-at91/board-usb-a9260.c
+++ b/arch/arm/mach-at91/board-usb-a9260.c
@@ -53,7 +53,7 @@ static void __init ek_map_io(void)
53 /* Initialize processor: 12.000 MHz crystal */ 53 /* Initialize processor: 12.000 MHz crystal */
54 at91sam9260_initialize(12000000); 54 at91sam9260_initialize(12000000);
55 55
56 /* DGBU on ttyS0. (Rx & Tx only) */ 56 /* DBGU on ttyS0. (Rx & Tx only) */
57 at91_register_uart(0, 0, 0); 57 at91_register_uart(0, 0, 0);
58 58
59 /* set serial console to ttyS0 (ie, DBGU) */ 59 /* set serial console to ttyS0 (ie, DBGU) */
diff --git a/arch/arm/mach-at91/board-usb-a9263.c b/arch/arm/mach-at91/board-usb-a9263.c
index d96405b7d57..b6a3480383e 100644
--- a/arch/arm/mach-at91/board-usb-a9263.c
+++ b/arch/arm/mach-at91/board-usb-a9263.c
@@ -52,7 +52,7 @@ static void __init ek_map_io(void)
52 /* Initialize processor: 12.00 MHz crystal */ 52 /* Initialize processor: 12.00 MHz crystal */
53 at91sam9263_initialize(12000000); 53 at91sam9263_initialize(12000000);
54 54
55 /* DGBU on ttyS0. (Rx & Tx only) */ 55 /* DBGU on ttyS0. (Rx & Tx only) */
56 at91_register_uart(0, 0, 0); 56 at91_register_uart(0, 0, 0);
57 57
58 /* set serial console to ttyS0 (ie, DBGU) */ 58 /* set serial console to ttyS0 (ie, DBGU) */
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index ea36186f32c..f982606d7bf 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -596,8 +596,8 @@ void __init mem_init(void)
596 596
597 printk(KERN_NOTICE "Memory: %luKB available (%dK code, " 597 printk(KERN_NOTICE "Memory: %luKB available (%dK code, "
598 "%dK data, %dK init, %luK highmem)\n", 598 "%dK data, %dK init, %luK highmem)\n",
599 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), 599 nr_free_pages() << (PAGE_SHIFT-10), codesize >> 10,
600 codesize >> 10, datasize >> 10, initsize >> 10, 600 datasize >> 10, initsize >> 10,
601 (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))); 601 (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
602 602
603 if (PAGE_SIZE >= 16384 && num_physpages <= 128) { 603 if (PAGE_SIZE >= 16384 && num_physpages <= 128) {
diff --git a/arch/avr32/include/asm/mman.h b/arch/avr32/include/asm/mman.h
index 9a92b15f6a6..8eebf89f5ab 100644
--- a/arch/avr32/include/asm/mman.h
+++ b/arch/avr32/include/asm/mman.h
@@ -1,17 +1 @@
1#ifndef __ASM_AVR32_MMAN_H__ #include <asm-generic/mman.h>
2#define __ASM_AVR32_MMAN_H__
3
4#include <asm-generic/mman-common.h>
5
6#define MAP_GROWSDOWN 0x0100 /* stack-like segment */
7#define MAP_DENYWRITE 0x0800 /* ETXTBSY */
8#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */
9#define MAP_LOCKED 0x2000 /* pages are locked */
10#define MAP_NORESERVE 0x4000 /* don't check for reservations */
11#define MAP_POPULATE 0x8000 /* populate (prefault) page tables */
12#define MAP_NONBLOCK 0x10000 /* do not block on IO */
13
14#define MCL_CURRENT 1 /* lock all current mappings */
15#define MCL_FUTURE 2 /* lock all future mappings */
16
17#endif /* __ASM_AVR32_MMAN_H__ */
diff --git a/arch/avr32/mm/init.c b/arch/avr32/mm/init.c
index e819fa69a90..376f18c4a6c 100644
--- a/arch/avr32/mm/init.c
+++ b/arch/avr32/mm/init.c
@@ -141,7 +141,7 @@ void __init mem_init(void)
141 141
142 printk ("Memory: %luk/%luk available (%dk kernel code, " 142 printk ("Memory: %luk/%luk available (%dk kernel code, "
143 "%dk reserved, %dk data, %dk init)\n", 143 "%dk reserved, %dk data, %dk init)\n",
144 (unsigned long)nr_free_pages() << (PAGE_SHIFT - 10), 144 nr_free_pages() << (PAGE_SHIFT - 10),
145 totalram_pages << (PAGE_SHIFT - 10), 145 totalram_pages << (PAGE_SHIFT - 10),
146 codesize >> 10, 146 codesize >> 10,
147 reservedpages << (PAGE_SHIFT - 10), 147 reservedpages << (PAGE_SHIFT - 10),
diff --git a/arch/blackfin/mach-bf538/include/mach/defBF539.h b/arch/blackfin/mach-bf538/include/mach/defBF539.h
index bdc330cd0e1..1c58914a874 100644
--- a/arch/blackfin/mach-bf538/include/mach/defBF539.h
+++ b/arch/blackfin/mach-bf538/include/mach/defBF539.h
@@ -2325,7 +2325,7 @@
2325#define AMBEN_B0_B1 0x0004 /* Enable Asynchronous Memory Banks 0 & 1 only */ 2325#define AMBEN_B0_B1 0x0004 /* Enable Asynchronous Memory Banks 0 & 1 only */
2326#define AMBEN_B0_B1_B2 0x0006 /* Enable Asynchronous Memory Banks 0, 1, and 2 */ 2326#define AMBEN_B0_B1_B2 0x0006 /* Enable Asynchronous Memory Banks 0, 1, and 2 */
2327#define AMBEN_ALL 0x0008 /* Enable Asynchronous Memory Banks (all) 0, 1, 2, and 3 */ 2327#define AMBEN_ALL 0x0008 /* Enable Asynchronous Memory Banks (all) 0, 1, 2, and 3 */
2328#define CDPRIO 0x0100 /* DMA has priority over core for for external accesses */ 2328#define CDPRIO 0x0100 /* DMA has priority over core for external accesses */
2329 2329
2330/* EBIU_AMGCTL Bit Positions */ 2330/* EBIU_AMGCTL Bit Positions */
2331#define AMCKEN_P 0x0000 /* Enable CLKOUT */ 2331#define AMCKEN_P 0x0000 /* Enable CLKOUT */
diff --git a/arch/cris/include/asm/mman.h b/arch/cris/include/asm/mman.h
index b7f0afba3ce..8eebf89f5ab 100644
--- a/arch/cris/include/asm/mman.h
+++ b/arch/cris/include/asm/mman.h
@@ -1,19 +1 @@
1#ifndef __CRIS_MMAN_H__ #include <asm-generic/mman.h>
2#define __CRIS_MMAN_H__
3
4/* verbatim copy of asm-i386/ version */
5
6#include <asm-generic/mman-common.h>
7
8#define MAP_GROWSDOWN 0x0100 /* stack-like segment */
9#define MAP_DENYWRITE 0x0800 /* ETXTBSY */
10#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */
11#define MAP_LOCKED 0x2000 /* pages are locked */
12#define MAP_NORESERVE 0x4000 /* don't check for reservations */
13#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
14#define MAP_NONBLOCK 0x10000 /* do not block on IO */
15
16#define MCL_CURRENT 1 /* lock all current mappings */
17#define MCL_FUTURE 2 /* lock all future mappings */
18
19#endif /* __CRIS_MMAN_H__ */
diff --git a/arch/cris/mm/init.c b/arch/cris/mm/init.c
index 514f46a4b23..ff68b9f516a 100644
--- a/arch/cris/mm/init.c
+++ b/arch/cris/mm/init.c
@@ -54,7 +54,7 @@ mem_init(void)
54 printk(KERN_INFO 54 printk(KERN_INFO
55 "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, " 55 "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, "
56 "%dk init)\n" , 56 "%dk init)\n" ,
57 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), 57 nr_free_pages() << (PAGE_SHIFT-10),
58 max_mapnr << (PAGE_SHIFT-10), 58 max_mapnr << (PAGE_SHIFT-10),
59 codesize >> 10, 59 codesize >> 10,
60 reservedpages << (PAGE_SHIFT-10), 60 reservedpages << (PAGE_SHIFT-10),
diff --git a/arch/frv/include/asm/mman.h b/arch/frv/include/asm/mman.h
index 58c1d11e2ac..8eebf89f5ab 100644
--- a/arch/frv/include/asm/mman.h
+++ b/arch/frv/include/asm/mman.h
@@ -1,18 +1 @@
1#ifndef __ASM_MMAN_H__ #include <asm-generic/mman.h>
2#define __ASM_MMAN_H__
3
4#include <asm-generic/mman-common.h>
5
6#define MAP_GROWSDOWN 0x0100 /* stack-like segment */
7#define MAP_DENYWRITE 0x0800 /* ETXTBSY */
8#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */
9#define MAP_LOCKED 0x2000 /* pages are locked */
10#define MAP_NORESERVE 0x4000 /* don't check for reservations */
11#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
12#define MAP_NONBLOCK 0x10000 /* do not block on IO */
13
14#define MCL_CURRENT 1 /* lock all current mappings */
15#define MCL_FUTURE 2 /* lock all future mappings */
16
17#endif /* __ASM_MMAN_H__ */
18
diff --git a/arch/frv/lib/cache.S b/arch/frv/lib/cache.S
index 0e10ad8dc46..0c4fb204911 100644
--- a/arch/frv/lib/cache.S
+++ b/arch/frv/lib/cache.S
@@ -1,4 +1,4 @@
1/* cache.S: cache managment routines 1/* cache.S: cache management routines
2 * 2 *
3 * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
diff --git a/arch/h8300/include/asm/hardirq.h b/arch/h8300/include/asm/hardirq.h
index 9d7f7a7462b..c2e1aa0f0d1 100644
--- a/arch/h8300/include/asm/hardirq.h
+++ b/arch/h8300/include/asm/hardirq.h
@@ -1,18 +1,7 @@
1#ifndef __H8300_HARDIRQ_H 1#ifndef __H8300_HARDIRQ_H
2#define __H8300_HARDIRQ_H 2#define __H8300_HARDIRQ_H
3 3
4#include <linux/kernel.h> 4#include <asm/irq.h>
5#include <linux/threads.h>
6#include <linux/interrupt.h>
7#include <linux/irq.h>
8
9typedef struct {
10 unsigned int __softirq_pending;
11} ____cacheline_aligned irq_cpustat_t;
12
13#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
14
15extern void ack_bad_irq(unsigned int irq);
16 5
17#define HARDIRQ_BITS 8 6#define HARDIRQ_BITS 8
18 7
@@ -25,4 +14,6 @@ extern void ack_bad_irq(unsigned int irq);
25# error HARDIRQ_BITS is too low! 14# error HARDIRQ_BITS is too low!
26#endif 15#endif
27 16
17#include <asm-generic/hardirq.h>
18
28#endif 19#endif
diff --git a/arch/h8300/include/asm/mman.h b/arch/h8300/include/asm/mman.h
index cf35f0a6f12..8eebf89f5ab 100644
--- a/arch/h8300/include/asm/mman.h
+++ b/arch/h8300/include/asm/mman.h
@@ -1,17 +1 @@
1#ifndef __H8300_MMAN_H__ #include <asm-generic/mman.h>
2#define __H8300_MMAN_H__
3
4#include <asm-generic/mman-common.h>
5
6#define MAP_GROWSDOWN 0x0100 /* stack-like segment */
7#define MAP_DENYWRITE 0x0800 /* ETXTBSY */
8#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */
9#define MAP_LOCKED 0x2000 /* pages are locked */
10#define MAP_NORESERVE 0x4000 /* don't check for reservations */
11#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
12#define MAP_NONBLOCK 0x10000 /* do not block on IO */
13
14#define MCL_CURRENT 1 /* lock all current mappings */
15#define MCL_FUTURE 2 /* lock all future mappings */
16
17#endif /* __H8300_MMAN_H__ */
diff --git a/arch/h8300/kernel/irq.c b/arch/h8300/kernel/irq.c
index 74f8dd7b34d..5c913d47211 100644
--- a/arch/h8300/kernel/irq.c
+++ b/arch/h8300/kernel/irq.c
@@ -81,11 +81,6 @@ struct irq_chip h8300irq_chip = {
81 .end = h8300_end_irq, 81 .end = h8300_end_irq,
82}; 82};
83 83
84void ack_bad_irq(unsigned int irq)
85{
86 printk("unexpected IRQ trap at vector %02x\n", irq);
87}
88
89#if defined(CONFIG_RAMKERNEL) 84#if defined(CONFIG_RAMKERNEL)
90static unsigned long __init *get_vector_address(void) 85static unsigned long __init *get_vector_address(void)
91{ 86{
diff --git a/arch/h8300/kernel/timer/tpu.c b/arch/h8300/kernel/timer/tpu.c
index e7c6e614a75..2193a2e2859 100644
--- a/arch/h8300/kernel/timer/tpu.c
+++ b/arch/h8300/kernel/timer/tpu.c
@@ -7,7 +7,6 @@
7 * 7 *
8 */ 8 */
9 9
10#include <linux/config.h>
11#include <linux/errno.h> 10#include <linux/errno.h>
12#include <linux/sched.h> 11#include <linux/sched.h>
13#include <linux/kernel.h> 12#include <linux/kernel.h>
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index 16ef61a91d9..625ed8f76fc 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -1270,7 +1270,7 @@ putreg (struct task_struct *child, int regno, unsigned int value)
1270 case PT_CS: 1270 case PT_CS:
1271 if (value != __USER_CS) 1271 if (value != __USER_CS)
1272 printk(KERN_ERR 1272 printk(KERN_ERR
1273 "ia32.putreg: attempt to to set invalid segment register %d = %x\n", 1273 "ia32.putreg: attempt to set invalid segment register %d = %x\n",
1274 regno, value); 1274 regno, value);
1275 break; 1275 break;
1276 default: 1276 default:
diff --git a/arch/ia64/include/asm/mman.h b/arch/ia64/include/asm/mman.h
index 48cf8b98a0b..4459028e5aa 100644
--- a/arch/ia64/include/asm/mman.h
+++ b/arch/ia64/include/asm/mman.h
@@ -8,19 +8,9 @@
8 * David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co 8 * David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
9 */ 9 */
10 10
11#include <asm-generic/mman-common.h> 11#include <asm-generic/mman.h>
12 12
13#define MAP_GROWSDOWN 0x00100 /* stack-like segment */ 13#define MAP_GROWSUP 0x0200 /* register stack-like segment */
14#define MAP_GROWSUP 0x00200 /* register stack-like segment */
15#define MAP_DENYWRITE 0x00800 /* ETXTBSY */
16#define MAP_EXECUTABLE 0x01000 /* mark it as an executable */
17#define MAP_LOCKED 0x02000 /* pages are locked */
18#define MAP_NORESERVE 0x04000 /* don't check for reservations */
19#define MAP_POPULATE 0x08000 /* populate (prefault) pagetables */
20#define MAP_NONBLOCK 0x10000 /* do not block on IO */
21
22#define MCL_CURRENT 1 /* lock all current mappings */
23#define MCL_FUTURE 2 /* lock all future mappings */
24 14
25#ifdef __KERNEL__ 15#ifdef __KERNEL__
26#ifndef __ASSEMBLY__ 16#ifndef __ASSEMBLY__
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index b115b3bbf04..1d286244a56 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -655,7 +655,7 @@ mem_init (void)
655 initsize = (unsigned long) __init_end - (unsigned long) __init_begin; 655 initsize = (unsigned long) __init_end - (unsigned long) __init_begin;
656 656
657 printk(KERN_INFO "Memory: %luk/%luk available (%luk code, %luk reserved, " 657 printk(KERN_INFO "Memory: %luk/%luk available (%luk code, %luk reserved, "
658 "%luk data, %luk init)\n", (unsigned long) nr_free_pages() << (PAGE_SHIFT - 10), 658 "%luk data, %luk init)\n", nr_free_pages() << (PAGE_SHIFT - 10),
659 num_physpages << (PAGE_SHIFT - 10), codesize >> 10, 659 num_physpages << (PAGE_SHIFT - 10), codesize >> 10,
660 reserved_pages << (PAGE_SHIFT - 10), datasize >> 10, initsize >> 10); 660 reserved_pages << (PAGE_SHIFT - 10), datasize >> 10, initsize >> 10);
661 661
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index cabba332cc4..c41234f1b82 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -41,6 +41,12 @@ config HZ
41 int 41 int
42 default 100 42 default 100
43 43
44config GENERIC_TIME
45 def_bool y
46
47config ARCH_USES_GETTIMEOFFSET
48 def_bool y
49
44source "init/Kconfig" 50source "init/Kconfig"
45 51
46source "kernel/Kconfig.freezer" 52source "kernel/Kconfig.freezer"
diff --git a/arch/m32r/include/asm/hardirq.h b/arch/m32r/include/asm/hardirq.h
index cb8aa762f23..4c31c0ae215 100644
--- a/arch/m32r/include/asm/hardirq.h
+++ b/arch/m32r/include/asm/hardirq.h
@@ -2,14 +2,7 @@
2#ifndef __ASM_HARDIRQ_H 2#ifndef __ASM_HARDIRQ_H
3#define __ASM_HARDIRQ_H 3#define __ASM_HARDIRQ_H
4 4
5#include <linux/threads.h> 5#include <asm/irq.h>
6#include <linux/irq.h>
7
8typedef struct {
9 unsigned int __softirq_pending;
10} ____cacheline_aligned irq_cpustat_t;
11
12#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
13 6
14#if NR_IRQS > 256 7#if NR_IRQS > 256
15#define HARDIRQ_BITS 9 8#define HARDIRQ_BITS 9
@@ -26,11 +19,7 @@ typedef struct {
26# error HARDIRQ_BITS is too low! 19# error HARDIRQ_BITS is too low!
27#endif 20#endif
28 21
29static inline void ack_bad_irq(int irq) 22#include <asm-generic/hardirq.h>
30{
31 printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
32 BUG();
33}
34 23
35#endif /* __ASM_HARDIRQ_H */ 24#endif /* __ASM_HARDIRQ_H */
36#endif /* __KERNEL__ */ 25#endif /* __KERNEL__ */
diff --git a/arch/m32r/include/asm/mman.h b/arch/m32r/include/asm/mman.h
index 04a5f40aa40..8eebf89f5ab 100644
--- a/arch/m32r/include/asm/mman.h
+++ b/arch/m32r/include/asm/mman.h
@@ -1,17 +1 @@
1#ifndef __M32R_MMAN_H__ #include <asm-generic/mman.h>
2#define __M32R_MMAN_H__
3
4#include <asm-generic/mman-common.h>
5
6#define MAP_GROWSDOWN 0x0100 /* stack-like segment */
7#define MAP_DENYWRITE 0x0800 /* ETXTBSY */
8#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */
9#define MAP_LOCKED 0x2000 /* pages are locked */
10#define MAP_NORESERVE 0x4000 /* don't check for reservations */
11#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
12#define MAP_NONBLOCK 0x10000 /* do not block on IO */
13
14#define MCL_CURRENT 1 /* lock all current mappings */
15#define MCL_FUTURE 2 /* lock all future mappings */
16
17#endif /* __M32R_MMAN_H__ */
diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c
index 98b8feb12ed..98682bba0ed 100644
--- a/arch/m32r/kernel/ptrace.c
+++ b/arch/m32r/kernel/ptrace.c
@@ -77,7 +77,7 @@ static int ptrace_read_user(struct task_struct *tsk, unsigned long off,
77 struct user * dummy = NULL; 77 struct user * dummy = NULL;
78#endif 78#endif
79 79
80 if ((off & 3) || (off < 0) || (off > sizeof(struct user) - 3)) 80 if ((off & 3) || off > sizeof(struct user) - 3)
81 return -EIO; 81 return -EIO;
82 82
83 off >>= 2; 83 off >>= 2;
@@ -139,8 +139,7 @@ static int ptrace_write_user(struct task_struct *tsk, unsigned long off,
139 struct user * dummy = NULL; 139 struct user * dummy = NULL;
140#endif 140#endif
141 141
142 if ((off & 3) || off < 0 || 142 if ((off & 3) || off > sizeof(struct user) - 3)
143 off > sizeof(struct user) - 3)
144 return -EIO; 143 return -EIO;
145 144
146 off >>= 2; 145 off >>= 2;
diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c
index 2547d6c4a82..655ea1c47a0 100644
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c
@@ -213,7 +213,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
213 if (!physid_isset(phys_id, phys_cpu_present_map)) 213 if (!physid_isset(phys_id, phys_cpu_present_map))
214 continue; 214 continue;
215 215
216 if ((max_cpus >= 0) && (max_cpus <= cpucount + 1)) 216 if (max_cpus <= cpucount + 1)
217 continue; 217 continue;
218 218
219 do_boot_cpu(phys_id); 219 do_boot_cpu(phys_id);
diff --git a/arch/m32r/kernel/time.c b/arch/m32r/kernel/time.c
index cada3ba4b99..ba61c4c7320 100644
--- a/arch/m32r/kernel/time.c
+++ b/arch/m32r/kernel/time.c
@@ -48,7 +48,7 @@ extern void smp_local_timer_interrupt(void);
48 48
49static unsigned long latch; 49static unsigned long latch;
50 50
51static unsigned long do_gettimeoffset(void) 51u32 arch_gettimeoffset(void)
52{ 52{
53 unsigned long elapsed_time = 0; /* [us] */ 53 unsigned long elapsed_time = 0; /* [us] */
54 54
@@ -93,79 +93,10 @@ static unsigned long do_gettimeoffset(void)
93#error no chip configuration 93#error no chip configuration
94#endif 94#endif
95 95
96 return elapsed_time; 96 return elapsed_time * 1000;
97} 97}
98 98
99/* 99/*
100 * This version of gettimeofday has near microsecond resolution.
101 */
102void do_gettimeofday(struct timeval *tv)
103{
104 unsigned long seq;
105 unsigned long usec, sec;
106 unsigned long max_ntp_tick = tick_usec - tickadj;
107
108 do {
109 seq = read_seqbegin(&xtime_lock);
110
111 usec = do_gettimeoffset();
112
113 /*
114 * If time_adjust is negative then NTP is slowing the clock
115 * so make sure not to go into next possible interval.
116 * Better to lose some accuracy than have time go backwards..
117 */
118 if (unlikely(time_adjust < 0))
119 usec = min(usec, max_ntp_tick);
120
121 sec = xtime.tv_sec;
122 usec += (xtime.tv_nsec / 1000);
123 } while (read_seqretry(&xtime_lock, seq));
124
125 while (usec >= 1000000) {
126 usec -= 1000000;
127 sec++;
128 }
129
130 tv->tv_sec = sec;
131 tv->tv_usec = usec;
132}
133
134EXPORT_SYMBOL(do_gettimeofday);
135
136int do_settimeofday(struct timespec *tv)
137{
138 time_t wtm_sec, sec = tv->tv_sec;
139 long wtm_nsec, nsec = tv->tv_nsec;
140
141 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
142 return -EINVAL;
143
144 write_seqlock_irq(&xtime_lock);
145 /*
146 * This is revolting. We need to set "xtime" correctly. However, the
147 * value in this location is the value at the most recent update of
148 * wall time. Discover what correction gettimeofday() would have
149 * made, and then undo it!
150 */
151 nsec -= do_gettimeoffset() * NSEC_PER_USEC;
152
153 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
154 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
155
156 set_normalized_timespec(&xtime, sec, nsec);
157 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
158
159 ntp_clear();
160 write_sequnlock_irq(&xtime_lock);
161 clock_was_set();
162
163 return 0;
164}
165
166EXPORT_SYMBOL(do_settimeofday);
167
168/*
169 * In order to set the CMOS clock precisely, set_rtc_mmss has to be 100 * In order to set the CMOS clock precisely, set_rtc_mmss has to be
170 * called 500 ms after the second nowtime has started, because when 101 * called 500 ms after the second nowtime has started, because when
171 * nowtime is written into the registers of the CMOS clock, it will 102 * nowtime is written into the registers of the CMOS clock, it will
@@ -192,6 +123,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
192#ifndef CONFIG_SMP 123#ifndef CONFIG_SMP
193 profile_tick(CPU_PROFILING); 124 profile_tick(CPU_PROFILING);
194#endif 125#endif
126 /* XXX FIXME. Uh, the xtime_lock should be held here, no? */
195 do_timer(1); 127 do_timer(1);
196 128
197#ifndef CONFIG_SMP 129#ifndef CONFIG_SMP
diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c
index 24d429f9358..9f581df3952 100644
--- a/arch/m32r/mm/init.c
+++ b/arch/m32r/mm/init.c
@@ -171,7 +171,7 @@ void __init mem_init(void)
171 171
172 printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, " 172 printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
173 "%dk reserved, %dk data, %dk init)\n", 173 "%dk reserved, %dk data, %dk init)\n",
174 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), 174 nr_free_pages() << (PAGE_SHIFT-10),
175 num_physpages << (PAGE_SHIFT-10), 175 num_physpages << (PAGE_SHIFT-10),
176 codesize >> 10, 176 codesize >> 10,
177 reservedpages << (PAGE_SHIFT-10), 177 reservedpages << (PAGE_SHIFT-10),
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index fb87c08c6b5..29dd8489ffe 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -58,6 +58,12 @@ config HZ
58 int 58 int
59 default 100 59 default 100
60 60
61config GENERIC_TIME
62 def_bool y
63
64config ARCH_USES_GETTIMEOFFSET
65 def_bool y
66
61mainmenu "Linux/68k Kernel Configuration" 67mainmenu "Linux/68k Kernel Configuration"
62 68
63source "init/Kconfig" 69source "init/Kconfig"
diff --git a/arch/m68k/include/asm/hardirq_mm.h b/arch/m68k/include/asm/hardirq_mm.h
index 394ee946015..554f65b6cd3 100644
--- a/arch/m68k/include/asm/hardirq_mm.h
+++ b/arch/m68k/include/asm/hardirq_mm.h
@@ -1,16 +1,8 @@
1#ifndef __M68K_HARDIRQ_H 1#ifndef __M68K_HARDIRQ_H
2#define __M68K_HARDIRQ_H 2#define __M68K_HARDIRQ_H
3 3
4#include <linux/threads.h>
5#include <linux/cache.h>
6
7/* entry.S is sensitive to the offsets of these fields */
8typedef struct {
9 unsigned int __softirq_pending;
10} ____cacheline_aligned irq_cpustat_t;
11
12#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
13
14#define HARDIRQ_BITS 8 4#define HARDIRQ_BITS 8
15 5
6#include <asm-generic/hardirq.h>
7
16#endif 8#endif
diff --git a/arch/m68k/include/asm/mman.h b/arch/m68k/include/asm/mman.h
index 9f5c4c4b3c7..8eebf89f5ab 100644
--- a/arch/m68k/include/asm/mman.h
+++ b/arch/m68k/include/asm/mman.h
@@ -1,17 +1 @@
1#ifndef __M68K_MMAN_H__ #include <asm-generic/mman.h>
2#define __M68K_MMAN_H__
3
4#include <asm-generic/mman-common.h>
5
6#define MAP_GROWSDOWN 0x0100 /* stack-like segment */
7#define MAP_DENYWRITE 0x0800 /* ETXTBSY */
8#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */
9#define MAP_LOCKED 0x2000 /* pages are locked */
10#define MAP_NORESERVE 0x4000 /* don't check for reservations */
11#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
12#define MAP_NONBLOCK 0x10000 /* do not block on IO */
13
14#define MCL_CURRENT 1 /* lock all current mappings */
15#define MCL_FUTURE 2 /* lock all future mappings */
16
17#endif /* __M68K_MMAN_H__ */
diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c
index 54d980795fc..17dc2a31a7c 100644
--- a/arch/m68k/kernel/time.c
+++ b/arch/m68k/kernel/time.c
@@ -91,77 +91,11 @@ void __init time_init(void)
91 mach_sched_init(timer_interrupt); 91 mach_sched_init(timer_interrupt);
92} 92}
93 93
94/* 94u32 arch_gettimeoffset(void)
95 * This version of gettimeofday has near microsecond resolution.
96 */
97void do_gettimeofday(struct timeval *tv)
98{ 95{
99 unsigned long flags; 96 return mach_gettimeoffset() * 1000;
100 unsigned long seq;
101 unsigned long usec, sec;
102 unsigned long max_ntp_tick = tick_usec - tickadj;
103
104 do {
105 seq = read_seqbegin_irqsave(&xtime_lock, flags);
106
107 usec = mach_gettimeoffset();
108
109 /*
110 * If time_adjust is negative then NTP is slowing the clock
111 * so make sure not to go into next possible interval.
112 * Better to lose some accuracy than have time go backwards..
113 */
114 if (unlikely(time_adjust < 0))
115 usec = min(usec, max_ntp_tick);
116
117 sec = xtime.tv_sec;
118 usec += xtime.tv_nsec/1000;
119 } while (read_seqretry_irqrestore(&xtime_lock, seq, flags));
120
121
122 while (usec >= 1000000) {
123 usec -= 1000000;
124 sec++;
125 }
126
127 tv->tv_sec = sec;
128 tv->tv_usec = usec;
129}
130
131EXPORT_SYMBOL(do_gettimeofday);
132
133int do_settimeofday(struct timespec *tv)
134{
135 time_t wtm_sec, sec = tv->tv_sec;
136 long wtm_nsec, nsec = tv->tv_nsec;
137
138 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
139 return -EINVAL;
140
141 write_seqlock_irq(&xtime_lock);
142 /* This is revolting. We need to set the xtime.tv_nsec
143 * correctly. However, the value in this location is
144 * is value at the last tick.
145 * Discover what correction gettimeofday
146 * would have done, and then undo it!
147 */
148 nsec -= 1000 * mach_gettimeoffset();
149
150 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
151 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
152
153 set_normalized_timespec(&xtime, sec, nsec);
154 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
155
156 ntp_clear();
157 write_sequnlock_irq(&xtime_lock);
158 clock_was_set();
159 return 0;
160} 97}
161 98
162EXPORT_SYMBOL(do_settimeofday);
163
164
165static int __init rtc_init(void) 99static int __init rtc_init(void)
166{ 100{
167 struct platform_device *pdev; 101 struct platform_device *pdev;
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index 0007b2adf3a..774549accd2 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -126,7 +126,7 @@ void __init mem_init(void)
126#endif 126#endif
127 127
128 printk("Memory: %luk/%luk available (%dk kernel code, %dk data, %dk init)\n", 128 printk("Memory: %luk/%luk available (%dk kernel code, %dk data, %dk init)\n",
129 (unsigned long)nr_free_pages() << (PAGE_SHIFT-10), 129 nr_free_pages() << (PAGE_SHIFT-10),
130 totalram_pages << (PAGE_SHIFT-10), 130 totalram_pages << (PAGE_SHIFT-10),
131 codepages << (PAGE_SHIFT-10), 131 codepages << (PAGE_SHIFT-10),
132 datapages << (PAGE_SHIFT-10), 132 datapages << (PAGE_SHIFT-10),
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index f207f1a94db..1110784eb3f 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -204,7 +204,7 @@ void __init mem_init(void)
204 totalram_pages += free_all_bootmem(); 204 totalram_pages += free_all_bootmem();
205 205
206 printk(KERN_INFO "Memory: %luk/%luk available\n", 206 printk(KERN_INFO "Memory: %luk/%luk available\n",
207 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), 207 nr_free_pages() << (PAGE_SHIFT-10),
208 num_physpages << (PAGE_SHIFT-10)); 208 num_physpages << (PAGE_SHIFT-10));
209#ifdef CONFIG_MMU 209#ifdef CONFIG_MMU
210 mem_init_done = 1; 210 mem_init_done = 1;
diff --git a/arch/mips/include/asm/mman.h b/arch/mips/include/asm/mman.h
index e4d6f1fb1cf..a2250f390a2 100644
--- a/arch/mips/include/asm/mman.h
+++ b/arch/mips/include/asm/mman.h
@@ -46,6 +46,8 @@
46#define MAP_LOCKED 0x8000 /* pages are locked */ 46#define MAP_LOCKED 0x8000 /* pages are locked */
47#define MAP_POPULATE 0x10000 /* populate (prefault) pagetables */ 47#define MAP_POPULATE 0x10000 /* populate (prefault) pagetables */
48#define MAP_NONBLOCK 0x20000 /* do not block on IO */ 48#define MAP_NONBLOCK 0x20000 /* do not block on IO */
49#define MAP_STACK 0x40000 /* give out an address that is best suited for process/thread stacks */
50#define MAP_HUGETLB 0x80000 /* create a huge page mapping */
49 51
50/* 52/*
51 * Flags for msync 53 * Flags for msync
@@ -71,6 +73,9 @@
71#define MADV_DONTFORK 10 /* don't inherit across fork */ 73#define MADV_DONTFORK 10 /* don't inherit across fork */
72#define MADV_DOFORK 11 /* do inherit across fork */ 74#define MADV_DOFORK 11 /* do inherit across fork */
73 75
76#define MADV_MERGEABLE 12 /* KSM may merge identical pages */
77#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */
78
74/* compatibility flags */ 79/* compatibility flags */
75#define MAP_FILE 0 80#define MAP_FILE 0
76 81
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 1a9f9b25755..d6eb6134abe 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -76,6 +76,16 @@ extern unsigned long zero_page_mask;
76#define ZERO_PAGE(vaddr) \ 76#define ZERO_PAGE(vaddr) \
77 (virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask)))) 77 (virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask))))
78 78
79#define is_zero_pfn is_zero_pfn
80static inline int is_zero_pfn(unsigned long pfn)
81{
82 extern unsigned long zero_pfn;
83 unsigned long offset_from_zero_pfn = pfn - zero_pfn;
84 return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
85}
86
87#define my_zero_pfn(addr) page_to_pfn(ZERO_PAGE(addr))
88
79extern void paging_init(void); 89extern void paging_init(void);
80 90
81/* 91/*
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 38c79c55b06..1f4ee4797a6 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -417,7 +417,7 @@ void __init mem_init(void)
417 417
418 printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " 418 printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
419 "%ldk reserved, %ldk data, %ldk init, %ldk highmem)\n", 419 "%ldk reserved, %ldk data, %ldk init, %ldk highmem)\n",
420 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), 420 nr_free_pages() << (PAGE_SHIFT-10),
421 ram << (PAGE_SHIFT-10), 421 ram << (PAGE_SHIFT-10),
422 codesize >> 10, 422 codesize >> 10,
423 reservedpages << (PAGE_SHIFT-10), 423 reservedpages << (PAGE_SHIFT-10),
diff --git a/arch/mn10300/include/asm/cacheflush.h b/arch/mn10300/include/asm/cacheflush.h
index 2db746a251f..1a55d61f0d0 100644
--- a/arch/mn10300/include/asm/cacheflush.h
+++ b/arch/mn10300/include/asm/cacheflush.h
@@ -17,7 +17,7 @@
17#include <linux/mm.h> 17#include <linux/mm.h>
18 18
19/* 19/*
20 * virtually-indexed cache managment (our cache is physically indexed) 20 * virtually-indexed cache management (our cache is physically indexed)
21 */ 21 */
22#define flush_cache_all() do {} while (0) 22#define flush_cache_all() do {} while (0)
23#define flush_cache_mm(mm) do {} while (0) 23#define flush_cache_mm(mm) do {} while (0)
@@ -31,7 +31,7 @@
31#define flush_dcache_mmap_unlock(mapping) do {} while (0) 31#define flush_dcache_mmap_unlock(mapping) do {} while (0)
32 32
33/* 33/*
34 * physically-indexed cache managment 34 * physically-indexed cache management
35 */ 35 */
36#ifndef CONFIG_MN10300_CACHE_DISABLED 36#ifndef CONFIG_MN10300_CACHE_DISABLED
37 37
diff --git a/arch/mn10300/include/asm/mman.h b/arch/mn10300/include/asm/mman.h
index d04fac1da5a..8eebf89f5ab 100644
--- a/arch/mn10300/include/asm/mman.h
+++ b/arch/mn10300/include/asm/mman.h
@@ -1,28 +1 @@
1/* MN10300 Constants for mmap and co. #include <asm-generic/mman.h>
2 *
3 * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
4 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
5 * - Derived from asm-x86/mman.h
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public Licence
9 * as published by the Free Software Foundation; either version
10 * 2 of the Licence, or (at your option) any later version.
11 */
12#ifndef _ASM_MMAN_H
13#define _ASM_MMAN_H
14
15#include <asm-generic/mman-common.h>
16
17#define MAP_GROWSDOWN 0x0100 /* stack-like segment */
18#define MAP_DENYWRITE 0x0800 /* ETXTBSY */
19#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */
20#define MAP_LOCKED 0x2000 /* pages are locked */
21#define MAP_NORESERVE 0x4000 /* don't check for reservations */
22#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
23#define MAP_NONBLOCK 0x10000 /* do not block on IO */
24
25#define MCL_CURRENT 1 /* lock all current mappings */
26#define MCL_FUTURE 2 /* lock all future mappings */
27
28#endif /* _ASM_MMAN_H */
diff --git a/arch/mn10300/mm/init.c b/arch/mn10300/mm/init.c
index 8cee387a24f..ec1420562dc 100644
--- a/arch/mn10300/mm/init.c
+++ b/arch/mn10300/mm/init.c
@@ -112,7 +112,7 @@ void __init mem_init(void)
112 "Memory: %luk/%luk available" 112 "Memory: %luk/%luk available"
113 " (%dk kernel code, %dk reserved, %dk data, %dk init," 113 " (%dk kernel code, %dk reserved, %dk data, %dk init,"
114 " %ldk highmem)\n", 114 " %ldk highmem)\n",
115 (unsigned long) nr_free_pages() << (PAGE_SHIFT - 10), 115 nr_free_pages() << (PAGE_SHIFT - 10),
116 max_mapnr << (PAGE_SHIFT - 10), 116 max_mapnr << (PAGE_SHIFT - 10),
117 codesize >> 10, 117 codesize >> 10,
118 reservedpages << (PAGE_SHIFT - 10), 118 reservedpages << (PAGE_SHIFT - 10),
diff --git a/arch/parisc/include/asm/mman.h b/arch/parisc/include/asm/mman.h
index defe752cc99..9749c8afe83 100644
--- a/arch/parisc/include/asm/mman.h
+++ b/arch/parisc/include/asm/mman.h
@@ -22,6 +22,8 @@
22#define MAP_GROWSDOWN 0x8000 /* stack-like segment */ 22#define MAP_GROWSDOWN 0x8000 /* stack-like segment */
23#define MAP_POPULATE 0x10000 /* populate (prefault) pagetables */ 23#define MAP_POPULATE 0x10000 /* populate (prefault) pagetables */
24#define MAP_NONBLOCK 0x20000 /* do not block on IO */ 24#define MAP_NONBLOCK 0x20000 /* do not block on IO */
25#define MAP_STACK 0x40000 /* give out an address that is best suited for process/thread stacks */
26#define MAP_HUGETLB 0x80000 /* create a huge page mapping */
25 27
26#define MS_SYNC 1 /* synchronous memory sync */ 28#define MS_SYNC 1 /* synchronous memory sync */
27#define MS_ASYNC 2 /* sync memory asynchronously */ 29#define MS_ASYNC 2 /* sync memory asynchronously */
@@ -54,6 +56,9 @@
54#define MADV_16M_PAGES 24 /* Use 16 Megabyte pages */ 56#define MADV_16M_PAGES 24 /* Use 16 Megabyte pages */
55#define MADV_64M_PAGES 26 /* Use 64 Megabyte pages */ 57#define MADV_64M_PAGES 26 /* Use 64 Megabyte pages */
56 58
59#define MADV_MERGEABLE 65 /* KSM may merge identical pages */
60#define MADV_UNMERGEABLE 66 /* KSM may not merge identical pages */
61
57/* compatibility flags */ 62/* compatibility flags */
58#define MAP_FILE 0 63#define MAP_FILE 0
59#define MAP_VARIABLE 0 64#define MAP_VARIABLE 0
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index b0831d9e35c..d5aca31fddb 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -506,7 +506,7 @@ void __init mem_init(void)
506#endif 506#endif
507 507
508 printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n", 508 printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n",
509 (unsigned long)nr_free_pages() << (PAGE_SHIFT-10), 509 nr_free_pages() << (PAGE_SHIFT-10),
510 num_physpages << (PAGE_SHIFT-10), 510 num_physpages << (PAGE_SHIFT-10),
511 codesize >> 10, 511 codesize >> 10,
512 reservedpages << (PAGE_SHIFT-10), 512 reservedpages << (PAGE_SHIFT-10),
diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h
index 7b1c49811a2..d4a7f645c5d 100644
--- a/arch/powerpc/include/asm/mman.h
+++ b/arch/powerpc/include/asm/mman.h
@@ -25,6 +25,8 @@
25 25
26#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ 26#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
27#define MAP_NONBLOCK 0x10000 /* do not block on IO */ 27#define MAP_NONBLOCK 0x10000 /* do not block on IO */
28#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */
29#define MAP_HUGETLB 0x40000 /* create a huge page mapping */
28 30
29#ifdef __KERNEL__ 31#ifdef __KERNEL__
30#ifdef CONFIG_PPC64 32#ifdef CONFIG_PPC64
diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
index acb74a17bbb..b4b167b3364 100644
--- a/arch/powerpc/kernel/udbg_16550.c
+++ b/arch/powerpc/kernel/udbg_16550.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * udbg for for NS16550 compatable serial ports 2 * udbg for NS16550 compatable serial ports
3 * 3 *
4 * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp 4 * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp
5 * 5 *
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 579382c163a..0e5c59b995e 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -372,7 +372,7 @@ void __init mem_init(void)
372 372
373 printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, " 373 printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, "
374 "%luk reserved, %luk data, %luk bss, %luk init)\n", 374 "%luk reserved, %luk data, %luk bss, %luk init)\n",
375 (unsigned long)nr_free_pages() << (PAGE_SHIFT-10), 375 nr_free_pages() << (PAGE_SHIFT-10),
376 num_physpages << (PAGE_SHIFT-10), 376 num_physpages << (PAGE_SHIFT-10),
377 codesize >> 10, 377 codesize >> 10,
378 reservedpages << (PAGE_SHIFT-10), 378 reservedpages << (PAGE_SHIFT-10),
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 24b30b6909c..fc1b1c42b1d 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -119,7 +119,7 @@ spufs_new_file(struct super_block *sb, struct dentry *dentry,
119 const struct file_operations *fops, int mode, 119 const struct file_operations *fops, int mode,
120 size_t size, struct spu_context *ctx) 120 size_t size, struct spu_context *ctx)
121{ 121{
122 static struct inode_operations spufs_file_iops = { 122 static const struct inode_operations spufs_file_iops = {
123 .setattr = spufs_setattr, 123 .setattr = spufs_setattr,
124 }; 124 };
125 struct inode *inode; 125 struct inode *inode;
@@ -773,7 +773,7 @@ static int
773spufs_fill_super(struct super_block *sb, void *data, int silent) 773spufs_fill_super(struct super_block *sb, void *data, int silent)
774{ 774{
775 struct spufs_sb_info *info; 775 struct spufs_sb_info *info;
776 static struct super_operations s_ops = { 776 static const struct super_operations s_ops = {
777 .alloc_inode = spufs_alloc_inode, 777 .alloc_inode = spufs_alloc_inode,
778 .destroy_inode = spufs_destroy_inode, 778 .destroy_inode = spufs_destroy_inode,
779 .statfs = simple_statfs, 779 .statfs = simple_statfs,
diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c
index 572771fd846..9490157da62 100644
--- a/arch/powerpc/platforms/powermac/udbg_scc.c
+++ b/arch/powerpc/platforms/powermac/udbg_scc.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * udbg for for zilog scc ports as found on Apple PowerMacs 2 * udbg for zilog scc ports as found on Apple PowerMacs
3 * 3 *
4 * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp 4 * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp
5 * 5 *
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index a4779912a5c..88f4ae78783 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -165,7 +165,7 @@ axon_ram_direct_access(struct block_device *device, sector_t sector,
165 return 0; 165 return 0;
166} 166}
167 167
168static struct block_device_operations axon_ram_devops = { 168static const struct block_device_operations axon_ram_devops = {
169 .owner = THIS_MODULE, 169 .owner = THIS_MODULE,
170 .direct_access = axon_ram_direct_access 170 .direct_access = axon_ram_direct_access
171}; 171};
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index bd9914b8948..341aff2687a 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -41,7 +41,7 @@ struct hypfs_sb_info {
41 41
42static const struct file_operations hypfs_file_ops; 42static const struct file_operations hypfs_file_ops;
43static struct file_system_type hypfs_type; 43static struct file_system_type hypfs_type;
44static struct super_operations hypfs_s_ops; 44static const struct super_operations hypfs_s_ops;
45 45
46/* start of list of all dentries, which have to be deleted on update */ 46/* start of list of all dentries, which have to be deleted on update */
47static struct dentry *hypfs_last_dentry; 47static struct dentry *hypfs_last_dentry;
@@ -472,7 +472,7 @@ static struct file_system_type hypfs_type = {
472 .kill_sb = hypfs_kill_super 472 .kill_sb = hypfs_kill_super
473}; 473};
474 474
475static struct super_operations hypfs_s_ops = { 475static const struct super_operations hypfs_s_ops = {
476 .statfs = simple_statfs, 476 .statfs = simple_statfs,
477 .drop_inode = hypfs_drop_inode, 477 .drop_inode = hypfs_drop_inode,
478 .show_options = hypfs_show_options, 478 .show_options = hypfs_show_options,
@@ -496,7 +496,7 @@ static int __init hypfs_init(void)
496 } 496 }
497 s390_kobj = kobject_create_and_add("s390", hypervisor_kobj); 497 s390_kobj = kobject_create_and_add("s390", hypervisor_kobj);
498 if (!s390_kobj) { 498 if (!s390_kobj) {
499 rc = -ENOMEM;; 499 rc = -ENOMEM;
500 goto fail_sysfs; 500 goto fail_sysfs;
501 } 501 }
502 rc = register_filesystem(&hypfs_type); 502 rc = register_filesystem(&hypfs_type);
diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h
index f63fe7b431e..4e9c8ae0a63 100644
--- a/arch/s390/include/asm/mman.h
+++ b/arch/s390/include/asm/mman.h
@@ -9,18 +9,7 @@
9#ifndef __S390_MMAN_H__ 9#ifndef __S390_MMAN_H__
10#define __S390_MMAN_H__ 10#define __S390_MMAN_H__
11 11
12#include <asm-generic/mman-common.h> 12#include <asm-generic/mman.h>
13
14#define MAP_GROWSDOWN 0x0100 /* stack-like segment */
15#define MAP_DENYWRITE 0x0800 /* ETXTBSY */
16#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */
17#define MAP_LOCKED 0x2000 /* pages are locked */
18#define MAP_NORESERVE 0x4000 /* don't check for reservations */
19#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
20#define MAP_NONBLOCK 0x10000 /* do not block on IO */
21
22#define MCL_CURRENT 1 /* lock all current mappings */
23#define MCL_FUTURE 2 /* lock all future mappings */
24 13
25#if defined(__KERNEL__) && !defined(__ASSEMBLY__) && defined(CONFIG_64BIT) 14#if defined(__KERNEL__) && !defined(__ASSEMBLY__) && defined(CONFIG_64BIT)
26int s390_mmap_check(unsigned long addr, unsigned long len); 15int s390_mmap_check(unsigned long addr, unsigned long len);
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 2c2f9835341..43486c2408e 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -478,7 +478,7 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
478 if (!inti) 478 if (!inti)
479 return -ENOMEM; 479 return -ENOMEM;
480 480
481 inti->type = KVM_S390_PROGRAM_INT;; 481 inti->type = KVM_S390_PROGRAM_INT;
482 inti->pgm.code = code; 482 inti->pgm.code = code;
483 483
484 VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code); 484 VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index c634dfbe92e..76564795222 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -105,7 +105,7 @@ void __init mem_init(void)
105 datasize = (unsigned long) &_edata - (unsigned long) &_etext; 105 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
106 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; 106 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
107 printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n", 107 printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
108 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), 108 nr_free_pages() << (PAGE_SHIFT-10),
109 max_mapnr << (PAGE_SHIFT-10), 109 max_mapnr << (PAGE_SHIFT-10),
110 codesize >> 10, 110 codesize >> 10,
111 reservedpages << (PAGE_SHIFT-10), 111 reservedpages << (PAGE_SHIFT-10),
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index edc842ff61e..fabb7c6f48d 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -232,7 +232,7 @@ void __init mem_init(void)
232 232
233 printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, " 233 printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
234 "%dk data, %dk init)\n", 234 "%dk data, %dk init)\n",
235 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), 235 nr_free_pages() << (PAGE_SHIFT-10),
236 num_physpages << (PAGE_SHIFT-10), 236 num_physpages << (PAGE_SHIFT-10),
237 codesize >> 10, 237 codesize >> 10,
238 datasize >> 10, 238 datasize >> 10,
diff --git a/arch/sparc/include/asm/mman.h b/arch/sparc/include/asm/mman.h
index 988192e8e95..c3029ad6619 100644
--- a/arch/sparc/include/asm/mman.h
+++ b/arch/sparc/include/asm/mman.h
@@ -20,6 +20,8 @@
20 20
21#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ 21#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
22#define MAP_NONBLOCK 0x10000 /* do not block on IO */ 22#define MAP_NONBLOCK 0x10000 /* do not block on IO */
23#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */
24#define MAP_HUGETLB 0x40000 /* create a huge page mapping */
23 25
24#ifdef __KERNEL__ 26#ifdef __KERNEL__
25#ifndef __ASSEMBLY__ 27#ifndef __ASSEMBLY__
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index 8daab33fc17..8ab1d4728a4 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -229,7 +229,7 @@ static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid)
229 tid = ((a << IMAP_AID_SHIFT) | 229 tid = ((a << IMAP_AID_SHIFT) |
230 (n << IMAP_NID_SHIFT)); 230 (n << IMAP_NID_SHIFT));
231 tid &= (IMAP_AID_SAFARI | 231 tid &= (IMAP_AID_SAFARI |
232 IMAP_NID_SAFARI);; 232 IMAP_NID_SAFARI);
233 } 233 }
234 } else { 234 } else {
235 tid = cpuid << IMAP_TID_SHIFT; 235 tid = cpuid << IMAP_TID_SHIFT;
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index 54114ad0bde..dc7c3b17a15 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -472,7 +472,7 @@ void __init mem_init(void)
472 reservedpages++; 472 reservedpages++;
473 473
474 printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n", 474 printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
475 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), 475 nr_free_pages() << (PAGE_SHIFT-10),
476 num_physpages << (PAGE_SHIFT - 10), 476 num_physpages << (PAGE_SHIFT - 10),
477 codepages << (PAGE_SHIFT-10), 477 codepages << (PAGE_SHIFT-10),
478 reservedpages << (PAGE_SHIFT - 10), 478 reservedpages << (PAGE_SHIFT - 10),
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index f114813ae25..a74245ae3a8 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -533,7 +533,7 @@ static int eth_parse(char *str, int *index_out, char **str_out,
533 char **error_out) 533 char **error_out)
534{ 534{
535 char *end; 535 char *end;
536 int n, err = -EINVAL;; 536 int n, err = -EINVAL;
537 537
538 n = simple_strtoul(str, &end, 0); 538 n = simple_strtoul(str, &end, 0);
539 if (end == str) { 539 if (end == str) {
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 8f05d4d9da1..635d16d90a8 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -106,7 +106,7 @@ static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
106 106
107#define MAX_DEV (16) 107#define MAX_DEV (16)
108 108
109static struct block_device_operations ubd_blops = { 109static const struct block_device_operations ubd_blops = {
110 .owner = THIS_MODULE, 110 .owner = THIS_MODULE,
111 .open = ubd_open, 111 .open = ubd_open,
112 .release = ubd_release, 112 .release = ubd_release,
diff --git a/arch/um/include/asm/hardirq.h b/arch/um/include/asm/hardirq.h
index 313ebb8a256..fb3c05a0cbb 100644
--- a/arch/um/include/asm/hardirq.h
+++ b/arch/um/include/asm/hardirq.h
@@ -1,25 +1 @@
1/* (c) 2004 cw@f00f.org, GPLv2 blah blah */ #include <asm-generic/hardirq.h>
2
3#ifndef __ASM_UM_HARDIRQ_H
4#define __ASM_UM_HARDIRQ_H
5
6#include <linux/threads.h>
7#include <linux/irq.h>
8
9/* NOTE: When SMP works again we might want to make this
10 * ____cacheline_aligned or maybe use per_cpu state? --cw */
11typedef struct {
12 unsigned int __softirq_pending;
13} irq_cpustat_t;
14
15#include <linux/irq_cpustat.h>
16
17/* As this would be very strange for UML to get we BUG() after the
18 * printk. */
19static inline void ack_bad_irq(unsigned int irq)
20{
21 printk(KERN_ERR "unexpected IRQ %02x\n", irq);
22 BUG();
23}
24
25#endif /* __ASM_UM_HARDIRQ_H */
diff --git a/arch/um/include/shared/ptrace_user.h b/arch/um/include/shared/ptrace_user.h
index 4bce6e01288..7fd8539bc19 100644
--- a/arch/um/include/shared/ptrace_user.h
+++ b/arch/um/include/shared/ptrace_user.h
@@ -29,7 +29,7 @@ extern int ptrace_setregs(long pid, unsigned long *regs_in);
29 * recompilation. So, we use PTRACE_OLDSETOPTIONS in UML. 29 * recompilation. So, we use PTRACE_OLDSETOPTIONS in UML.
30 * We also want to be able to build the kernel on 2.4, which doesn't 30 * We also want to be able to build the kernel on 2.4, which doesn't
31 * have PTRACE_OLDSETOPTIONS. So, if it is missing, we declare 31 * have PTRACE_OLDSETOPTIONS. So, if it is missing, we declare
32 * PTRACE_OLDSETOPTIONS to to be the same as PTRACE_SETOPTIONS. 32 * PTRACE_OLDSETOPTIONS to be the same as PTRACE_SETOPTIONS.
33 * 33 *
34 * On architectures, that start to support PTRACE_O_TRACESYSGOOD on 34 * On architectures, that start to support PTRACE_O_TRACESYSGOOD on
35 * linux 2.6, PTRACE_OLDSETOPTIONS never is defined, and also isn't 35 * linux 2.6, PTRACE_OLDSETOPTIONS never is defined, and also isn't
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 61d7e6138ff..a5d5e70cf6f 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -77,7 +77,7 @@ void __init mem_init(void)
77 num_physpages = totalram_pages; 77 num_physpages = totalram_pages;
78 max_pfn = totalram_pages; 78 max_pfn = totalram_pages;
79 printk(KERN_INFO "Memory: %luk available\n", 79 printk(KERN_INFO "Memory: %luk available\n",
80 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10)); 80 nr_free_pages() << (PAGE_SHIFT-10));
81 kmalloc_ok = 1; 81 kmalloc_ok = 1;
82 82
83#ifdef CONFIG_HIGHMEM 83#ifdef CONFIG_HIGHMEM
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 0cd9a7a05e7..8bfd1e90581 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -38,10 +38,10 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
38 *pte = pte_mkread(*pte); 38 *pte = pte_mkread(*pte);
39 return 0; 39 return 0;
40 40
41 out_pmd:
42 pud_free(mm, pud);
43 out_pte: 41 out_pte:
44 pmd_free(mm, pmd); 42 pmd_free(mm, pmd);
43 out_pmd:
44 pud_free(mm, pud);
45 out: 45 out:
46 return -ENOMEM; 46 return -ENOMEM;
47} 47}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f32fa71ccf9..c910a716a71 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -184,7 +184,7 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
184 * approved Athlon 184 * approved Athlon
185 */ 185 */
186 WARN_ONCE(1, "WARNING: This combination of AMD" 186 WARN_ONCE(1, "WARNING: This combination of AMD"
187 "processors is not suitable for SMP.\n"); 187 " processors is not suitable for SMP.\n");
188 if (!test_taint(TAINT_UNSAFE_SMP)) 188 if (!test_taint(TAINT_UNSAFE_SMP))
189 add_taint(TAINT_UNSAFE_SMP); 189 add_taint(TAINT_UNSAFE_SMP);
190 190
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index a3210ce1ecc..85419bb7d4a 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1331,7 +1331,7 @@ void __init e820_reserve_resources(void)
1331 struct resource *res; 1331 struct resource *res;
1332 u64 end; 1332 u64 end;
1333 1333
1334 res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); 1334 res = alloc_bootmem(sizeof(struct resource) * e820.nr_map);
1335 e820_res = res; 1335 e820_res = res;
1336 for (i = 0; i < e820.nr_map; i++) { 1336 for (i = 0; i < e820.nr_map; i++) {
1337 end = e820.map[i].addr + e820.map[i].size - 1; 1337 end = e820.map[i].addr + e820.map[i].size - 1;
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 0db7969b0dd..378e9a8f1bf 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -210,8 +210,8 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
210{ 210{
211 ssize_t ret = -EINVAL; 211 ssize_t ret = -EINVAL;
212 212
213 if ((len >> PAGE_SHIFT) > num_physpages) { 213 if ((len >> PAGE_SHIFT) > totalram_pages) {
214 pr_err("microcode: too much data (max %ld pages)\n", num_physpages); 214 pr_err("microcode: too much data (max %ld pages)\n", totalram_pages);
215 return ret; 215 return ret;
216 } 216 }
217 217
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 3cd7711bb94..b49b4f67453 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -84,7 +84,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
84#ifdef CONFIG_X86_PAE 84#ifdef CONFIG_X86_PAE
85 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { 85 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
86 if (after_bootmem) 86 if (after_bootmem)
87 pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); 87 pmd_table = (pmd_t *)alloc_bootmem_pages(PAGE_SIZE);
88 else 88 else
89 pmd_table = (pmd_t *)alloc_low_page(); 89 pmd_table = (pmd_t *)alloc_low_page();
90 paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); 90 paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
@@ -116,7 +116,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
116#endif 116#endif
117 if (!page_table) 117 if (!page_table)
118 page_table = 118 page_table =
119 (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); 119 (pte_t *)alloc_bootmem_pages(PAGE_SIZE);
120 } else 120 } else
121 page_table = (pte_t *)alloc_low_page(); 121 page_table = (pte_t *)alloc_low_page();
122 122
@@ -892,7 +892,7 @@ void __init mem_init(void)
892 892
893 printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, " 893 printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
894 "%dk reserved, %dk data, %dk init, %ldk highmem)\n", 894 "%dk reserved, %dk data, %dk init, %ldk highmem)\n",
895 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), 895 nr_free_pages() << (PAGE_SHIFT-10),
896 num_physpages << (PAGE_SHIFT-10), 896 num_physpages << (PAGE_SHIFT-10),
897 codesize >> 10, 897 codesize >> 10,
898 reservedpages << (PAGE_SHIFT-10), 898 reservedpages << (PAGE_SHIFT-10),
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index ea56b8cbb6a..810bd31e7f5 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -687,7 +687,7 @@ void __init mem_init(void)
687 687
688 printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " 688 printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
689 "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n", 689 "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n",
690 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), 690 nr_free_pages() << (PAGE_SHIFT-10),
691 max_pfn << (PAGE_SHIFT-10), 691 max_pfn << (PAGE_SHIFT-10),
692 codesize >> 10, 692 codesize >> 10,
693 absent_pages << (PAGE_SHIFT-10), 693 absent_pages << (PAGE_SHIFT-10),
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
index 528bf954eb7..8cc18334414 100644
--- a/arch/x86/mm/kmemcheck/kmemcheck.c
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -225,9 +225,6 @@ void kmemcheck_hide(struct pt_regs *regs)
225 225
226 BUG_ON(!irqs_disabled()); 226 BUG_ON(!irqs_disabled());
227 227
228 if (data->balance == 0)
229 return;
230
231 if (unlikely(data->balance != 1)) { 228 if (unlikely(data->balance != 1)) {
232 kmemcheck_show_all(); 229 kmemcheck_show_all();
233 kmemcheck_error_save_bug(regs); 230 kmemcheck_error_save_bug(regs);
diff --git a/arch/xtensa/include/asm/mman.h b/arch/xtensa/include/asm/mman.h
index 9b92620c8a1..fca4db425f6 100644
--- a/arch/xtensa/include/asm/mman.h
+++ b/arch/xtensa/include/asm/mman.h
@@ -53,6 +53,8 @@
53#define MAP_LOCKED 0x8000 /* pages are locked */ 53#define MAP_LOCKED 0x8000 /* pages are locked */
54#define MAP_POPULATE 0x10000 /* populate (prefault) pagetables */ 54#define MAP_POPULATE 0x10000 /* populate (prefault) pagetables */
55#define MAP_NONBLOCK 0x20000 /* do not block on IO */ 55#define MAP_NONBLOCK 0x20000 /* do not block on IO */
56#define MAP_STACK 0x40000 /* give out an address that is best suited for process/thread stacks */
57#define MAP_HUGETLB 0x80000 /* create a huge page mapping */
56 58
57/* 59/*
58 * Flags for msync 60 * Flags for msync
@@ -78,6 +80,9 @@
78#define MADV_DONTFORK 10 /* don't inherit across fork */ 80#define MADV_DONTFORK 10 /* don't inherit across fork */
79#define MADV_DOFORK 11 /* do inherit across fork */ 81#define MADV_DOFORK 11 /* do inherit across fork */
80 82
83#define MADV_MERGEABLE 12 /* KSM may merge identical pages */
84#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */
85
81/* compatibility flags */ 86/* compatibility flags */
82#define MAP_FILE 0 87#define MAP_FILE 0
83 88
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index 427e14fa43c..cdbc27ca966 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -203,7 +203,7 @@ void __init mem_init(void)
203 203
204 printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, " 204 printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, "
205 "%ldk data, %ldk init %ldk highmem)\n", 205 "%ldk data, %ldk init %ldk highmem)\n",
206 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), 206 nr_free_pages() << (PAGE_SHIFT-10),
207 ram << (PAGE_SHIFT-10), 207 ram << (PAGE_SHIFT-10),
208 codesize >> 10, 208 codesize >> 10,
209 reservedpages << (PAGE_SHIFT-10), 209 reservedpages << (PAGE_SHIFT-10),
diff --git a/drivers/ata/pata_hpt37x.c b/drivers/ata/pata_hpt37x.c
index 122c786449a..d0a7df2e5ca 100644
--- a/drivers/ata/pata_hpt37x.c
+++ b/drivers/ata/pata_hpt37x.c
@@ -624,7 +624,7 @@ static struct ata_port_operations hpt374_fn1_port_ops = {
624}; 624};
625 625
626/** 626/**
627 * htp37x_clock_slot - Turn timing to PC clock entry 627 * hpt37x_clock_slot - Turn timing to PC clock entry
628 * @freq: Reported frequency timing 628 * @freq: Reported frequency timing
629 * @base: Base timing 629 * @base: Base timing
630 * 630 *
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 91d4087b403..1fe5536d404 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -85,6 +85,8 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
85 "Node %d FilePages: %8lu kB\n" 85 "Node %d FilePages: %8lu kB\n"
86 "Node %d Mapped: %8lu kB\n" 86 "Node %d Mapped: %8lu kB\n"
87 "Node %d AnonPages: %8lu kB\n" 87 "Node %d AnonPages: %8lu kB\n"
88 "Node %d Shmem: %8lu kB\n"
89 "Node %d KernelStack: %8lu kB\n"
88 "Node %d PageTables: %8lu kB\n" 90 "Node %d PageTables: %8lu kB\n"
89 "Node %d NFS_Unstable: %8lu kB\n" 91 "Node %d NFS_Unstable: %8lu kB\n"
90 "Node %d Bounce: %8lu kB\n" 92 "Node %d Bounce: %8lu kB\n"
@@ -116,6 +118,9 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
116 nid, K(node_page_state(nid, NR_FILE_PAGES)), 118 nid, K(node_page_state(nid, NR_FILE_PAGES)),
117 nid, K(node_page_state(nid, NR_FILE_MAPPED)), 119 nid, K(node_page_state(nid, NR_FILE_MAPPED)),
118 nid, K(node_page_state(nid, NR_ANON_PAGES)), 120 nid, K(node_page_state(nid, NR_ANON_PAGES)),
121 nid, K(node_page_state(nid, NR_SHMEM)),
122 nid, node_page_state(nid, NR_KERNEL_STACK) *
123 THREAD_SIZE / 1024,
119 nid, K(node_page_state(nid, NR_PAGETABLE)), 124 nid, K(node_page_state(nid, NR_PAGETABLE)),
120 nid, K(node_page_state(nid, NR_UNSTABLE_NFS)), 125 nid, K(node_page_state(nid, NR_UNSTABLE_NFS)),
121 nid, K(node_page_state(nid, NR_BOUNCE)), 126 nid, K(node_page_state(nid, NR_BOUNCE)),
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 1e6b7c14f69..c77b6f3c28e 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -152,7 +152,7 @@ static int DAC960_revalidate_disk(struct gendisk *disk)
152 return 0; 152 return 0;
153} 153}
154 154
155static struct block_device_operations DAC960_BlockDeviceOperations = { 155static const struct block_device_operations DAC960_BlockDeviceOperations = {
156 .owner = THIS_MODULE, 156 .owner = THIS_MODULE,
157 .open = DAC960_open, 157 .open = DAC960_open,
158 .getgeo = DAC960_getgeo, 158 .getgeo = DAC960_getgeo,
@@ -6653,7 +6653,7 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request,
6653 else ErrorCode = get_user(ControllerNumber, 6653 else ErrorCode = get_user(ControllerNumber,
6654 &UserSpaceControllerInfo->ControllerNumber); 6654 &UserSpaceControllerInfo->ControllerNumber);
6655 if (ErrorCode != 0) 6655 if (ErrorCode != 0)
6656 break;; 6656 break;
6657 ErrorCode = -ENXIO; 6657 ErrorCode = -ENXIO;
6658 if (ControllerNumber < 0 || 6658 if (ControllerNumber < 0 ||
6659 ControllerNumber > DAC960_ControllerCount - 1) { 6659 ControllerNumber > DAC960_ControllerCount - 1) {
@@ -6661,7 +6661,7 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request,
6661 } 6661 }
6662 Controller = DAC960_Controllers[ControllerNumber]; 6662 Controller = DAC960_Controllers[ControllerNumber];
6663 if (Controller == NULL) 6663 if (Controller == NULL)
6664 break;; 6664 break;
6665 memset(&ControllerInfo, 0, sizeof(DAC960_ControllerInfo_T)); 6665 memset(&ControllerInfo, 0, sizeof(DAC960_ControllerInfo_T));
6666 ControllerInfo.ControllerNumber = ControllerNumber; 6666 ControllerInfo.ControllerNumber = ControllerNumber;
6667 ControllerInfo.FirmwareType = Controller->FirmwareType; 6667 ControllerInfo.FirmwareType = Controller->FirmwareType;
@@ -7210,7 +7210,7 @@ static struct pci_driver DAC960_pci_driver = {
7210 .remove = DAC960_Remove, 7210 .remove = DAC960_Remove,
7211}; 7211};
7212 7212
7213static int DAC960_init_module(void) 7213static int __init DAC960_init_module(void)
7214{ 7214{
7215 int ret; 7215 int ret;
7216 7216
@@ -7222,7 +7222,7 @@ static int DAC960_init_module(void)
7222 return ret; 7222 return ret;
7223} 7223}
7224 7224
7225static void DAC960_cleanup_module(void) 7225static void __exit DAC960_cleanup_module(void)
7226{ 7226{
7227 int i; 7227 int i;
7228 7228
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 2f07b7c99a9..05522583902 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1632,7 +1632,7 @@ static int amiga_floppy_change(struct gendisk *disk)
1632 return 0; 1632 return 0;
1633} 1633}
1634 1634
1635static struct block_device_operations floppy_fops = { 1635static const struct block_device_operations floppy_fops = {
1636 .owner = THIS_MODULE, 1636 .owner = THIS_MODULE,
1637 .open = floppy_open, 1637 .open = floppy_open,
1638 .release = floppy_release, 1638 .release = floppy_release,
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index b6cd571adbf..3af97d4da2d 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -237,7 +237,7 @@ aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
237 return 0; 237 return 0;
238} 238}
239 239
240static struct block_device_operations aoe_bdops = { 240static const struct block_device_operations aoe_bdops = {
241 .open = aoeblk_open, 241 .open = aoeblk_open,
242 .release = aoeblk_release, 242 .release = aoeblk_release,
243 .getgeo = aoeblk_getgeo, 243 .getgeo = aoeblk_getgeo,
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 3ff02941b3d..847a9e57570 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -1856,7 +1856,7 @@ static int floppy_release(struct gendisk *disk, fmode_t mode)
1856 return 0; 1856 return 0;
1857} 1857}
1858 1858
1859static struct block_device_operations floppy_fops = { 1859static const struct block_device_operations floppy_fops = {
1860 .owner = THIS_MODULE, 1860 .owner = THIS_MODULE,
1861 .open = floppy_open, 1861 .open = floppy_open,
1862 .release = floppy_release, 1862 .release = floppy_release,
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 4bf8705b3ac..4f688434daf 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -375,7 +375,7 @@ static int brd_ioctl(struct block_device *bdev, fmode_t mode,
375 return error; 375 return error;
376} 376}
377 377
378static struct block_device_operations brd_fops = { 378static const struct block_device_operations brd_fops = {
379 .owner = THIS_MODULE, 379 .owner = THIS_MODULE,
380 .locked_ioctl = brd_ioctl, 380 .locked_ioctl = brd_ioctl,
381#ifdef CONFIG_BLK_DEV_XIP 381#ifdef CONFIG_BLK_DEV_XIP
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index d8372b43282..4f19105f755 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -205,7 +205,7 @@ static int cciss_compat_ioctl(struct block_device *, fmode_t,
205 unsigned, unsigned long); 205 unsigned, unsigned long);
206#endif 206#endif
207 207
208static struct block_device_operations cciss_fops = { 208static const struct block_device_operations cciss_fops = {
209 .owner = THIS_MODULE, 209 .owner = THIS_MODULE,
210 .open = cciss_open, 210 .open = cciss_open,
211 .release = cciss_release, 211 .release = cciss_release,
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 44fa2018f6b..b82d438e260 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -193,7 +193,7 @@ static inline ctlr_info_t *get_host(struct gendisk *disk)
193} 193}
194 194
195 195
196static struct block_device_operations ida_fops = { 196static const struct block_device_operations ida_fops = {
197 .owner = THIS_MODULE, 197 .owner = THIS_MODULE,
198 .open = ida_open, 198 .open = ida_open,
199 .release = ida_release, 199 .release = ida_release,
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 2b387c2260d..5c01f747571 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3907,7 +3907,7 @@ static int floppy_revalidate(struct gendisk *disk)
3907 return res; 3907 return res;
3908} 3908}
3909 3909
3910static struct block_device_operations floppy_fops = { 3910static const struct block_device_operations floppy_fops = {
3911 .owner = THIS_MODULE, 3911 .owner = THIS_MODULE,
3912 .open = floppy_open, 3912 .open = floppy_open,
3913 .release = floppy_release, 3913 .release = floppy_release,
diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index f9d01608cbe..d5cdce08ffd 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -692,7 +692,7 @@ static irqreturn_t hd_interrupt(int irq, void *dev_id)
692 return IRQ_HANDLED; 692 return IRQ_HANDLED;
693} 693}
694 694
695static struct block_device_operations hd_fops = { 695static const struct block_device_operations hd_fops = {
696 .getgeo = hd_getgeo, 696 .getgeo = hd_getgeo,
697}; 697};
698 698
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index bbb79441d89..edda9ea7c62 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1438,7 +1438,7 @@ out_unlocked:
1438 return 0; 1438 return 0;
1439} 1439}
1440 1440
1441static struct block_device_operations lo_fops = { 1441static const struct block_device_operations lo_fops = {
1442 .owner = THIS_MODULE, 1442 .owner = THIS_MODULE,
1443 .open = lo_open, 1443 .open = lo_open,
1444 .release = lo_release, 1444 .release = lo_release,
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index 6d7fbaa9224..e0339aaa181 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -775,7 +775,7 @@ static int mg_getgeo(struct block_device *bdev, struct hd_geometry *geo)
775 return 0; 775 return 0;
776} 776}
777 777
778static struct block_device_operations mg_disk_ops = { 778static const struct block_device_operations mg_disk_ops = {
779 .getgeo = mg_getgeo 779 .getgeo = mg_getgeo
780}; 780};
781 781
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 5d23ffad7c7..cc923a5b430 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -722,7 +722,7 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
722 return error; 722 return error;
723} 723}
724 724
725static struct block_device_operations nbd_fops = 725static const struct block_device_operations nbd_fops =
726{ 726{
727 .owner = THIS_MODULE, 727 .owner = THIS_MODULE,
728 .locked_ioctl = nbd_ioctl, 728 .locked_ioctl = nbd_ioctl,
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c
index 13c1aee6aa3..a808b1530b3 100644
--- a/drivers/block/osdblk.c
+++ b/drivers/block/osdblk.c
@@ -125,7 +125,7 @@ static struct class *class_osdblk; /* /sys/class/osdblk */
125static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ 125static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */
126static LIST_HEAD(osdblkdev_list); 126static LIST_HEAD(osdblkdev_list);
127 127
128static struct block_device_operations osdblk_bd_ops = { 128static const struct block_device_operations osdblk_bd_ops = {
129 .owner = THIS_MODULE, 129 .owner = THIS_MODULE,
130}; 130};
131 131
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 9f3518c515a..8866ca369d5 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -247,7 +247,7 @@ static int pcd_block_media_changed(struct gendisk *disk)
247 return cdrom_media_changed(&cd->info); 247 return cdrom_media_changed(&cd->info);
248} 248}
249 249
250static struct block_device_operations pcd_bdops = { 250static const struct block_device_operations pcd_bdops = {
251 .owner = THIS_MODULE, 251 .owner = THIS_MODULE,
252 .open = pcd_block_open, 252 .open = pcd_block_open,
253 .release = pcd_block_release, 253 .release = pcd_block_release,
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index bf5955b3d87..569e39e8f11 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -807,7 +807,7 @@ static int pd_revalidate(struct gendisk *p)
807 return 0; 807 return 0;
808} 808}
809 809
810static struct block_device_operations pd_fops = { 810static const struct block_device_operations pd_fops = {
811 .owner = THIS_MODULE, 811 .owner = THIS_MODULE,
812 .open = pd_open, 812 .open = pd_open,
813 .release = pd_release, 813 .release = pd_release,
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index 68a90834e99..ea54ea39355 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -262,7 +262,7 @@ static char *pf_buf; /* buffer for request in progress */
262 262
263/* kernel glue structures */ 263/* kernel glue structures */
264 264
265static struct block_device_operations pf_fops = { 265static const struct block_device_operations pf_fops = {
266 .owner = THIS_MODULE, 266 .owner = THIS_MODULE,
267 .open = pf_open, 267 .open = pf_open,
268 .release = pf_release, 268 .release = pf_release,
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index fd5bb8ad59a..2ddf03ae034 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2849,7 +2849,7 @@ static int pkt_media_changed(struct gendisk *disk)
2849 return attached_disk->fops->media_changed(attached_disk); 2849 return attached_disk->fops->media_changed(attached_disk);
2850} 2850}
2851 2851
2852static struct block_device_operations pktcdvd_ops = { 2852static const struct block_device_operations pktcdvd_ops = {
2853 .owner = THIS_MODULE, 2853 .owner = THIS_MODULE,
2854 .open = pkt_open, 2854 .open = pkt_open,
2855 .release = pkt_close, 2855 .release = pkt_close,
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index 34cbb7f3efa..03a130dca8a 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -82,7 +82,7 @@ enum lv1_ata_in_out {
82static int ps3disk_major; 82static int ps3disk_major;
83 83
84 84
85static struct block_device_operations ps3disk_fops = { 85static const struct block_device_operations ps3disk_fops = {
86 .owner = THIS_MODULE, 86 .owner = THIS_MODULE,
87}; 87};
88 88
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index c8753a9ed29..3bb7c47c869 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -88,7 +88,7 @@ struct ps3vram_priv {
88static int ps3vram_major; 88static int ps3vram_major;
89 89
90 90
91static struct block_device_operations ps3vram_fops = { 91static const struct block_device_operations ps3vram_fops = {
92 .owner = THIS_MODULE, 92 .owner = THIS_MODULE,
93}; 93};
94 94
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index cbfd9c0aef0..411f064760b 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -103,7 +103,7 @@ static int vdc_getgeo(struct block_device *bdev, struct hd_geometry *geo)
103 return 0; 103 return 0;
104} 104}
105 105
106static struct block_device_operations vdc_fops = { 106static const struct block_device_operations vdc_fops = {
107 .owner = THIS_MODULE, 107 .owner = THIS_MODULE,
108 .getgeo = vdc_getgeo, 108 .getgeo = vdc_getgeo,
109}; 109};
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index cf7877fb8a7..8f569e3df89 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -748,7 +748,7 @@ static int floppy_revalidate(struct gendisk *disk)
748 return !fs->disk_in; 748 return !fs->disk_in;
749} 749}
750 750
751static struct block_device_operations floppy_fops = { 751static const struct block_device_operations floppy_fops = {
752 .owner = THIS_MODULE, 752 .owner = THIS_MODULE,
753 .open = floppy_open, 753 .open = floppy_open,
754 .release = floppy_release, 754 .release = floppy_release,
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 80df93e3cdd..6380ad8d91b 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -998,7 +998,7 @@ static int floppy_revalidate(struct gendisk *disk)
998 return ret; 998 return ret;
999} 999}
1000 1000
1001static struct block_device_operations floppy_fops = { 1001static const struct block_device_operations floppy_fops = {
1002 .open = floppy_open, 1002 .open = floppy_open,
1003 .release = floppy_release, 1003 .release = floppy_release,
1004 .locked_ioctl = floppy_ioctl, 1004 .locked_ioctl = floppy_ioctl,
@@ -1062,7 +1062,7 @@ static int swim3_add_device(struct macio_dev *mdev, int index)
1062 goto out_release; 1062 goto out_release;
1063 } 1063 }
1064 fs->swim3_intr = macio_irq(mdev, 0); 1064 fs->swim3_intr = macio_irq(mdev, 0);
1065 fs->dma_intr = macio_irq(mdev, 1);; 1065 fs->dma_intr = macio_irq(mdev, 1);
1066 fs->cur_cyl = -1; 1066 fs->cur_cyl = -1;
1067 fs->cur_sector = -1; 1067 fs->cur_sector = -1;
1068 fs->secpercyl = 36; 1068 fs->secpercyl = 36;
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index f5cd2e83ebc..a7c4184f4a6 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -423,7 +423,7 @@ static struct pci_driver carm_driver = {
423 .remove = carm_remove_one, 423 .remove = carm_remove_one,
424}; 424};
425 425
426static struct block_device_operations carm_bd_ops = { 426static const struct block_device_operations carm_bd_ops = {
427 .owner = THIS_MODULE, 427 .owner = THIS_MODULE,
428 .getgeo = carm_bdev_getgeo, 428 .getgeo = carm_bdev_getgeo,
429}; 429};
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index cc54473b8e7..c739b203fe9 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -1789,7 +1789,7 @@ static int ub_bd_media_changed(struct gendisk *disk)
1789 return lun->changed; 1789 return lun->changed;
1790} 1790}
1791 1791
1792static struct block_device_operations ub_bd_fops = { 1792static const struct block_device_operations ub_bd_fops = {
1793 .owner = THIS_MODULE, 1793 .owner = THIS_MODULE,
1794 .open = ub_bd_open, 1794 .open = ub_bd_open,
1795 .release = ub_bd_release, 1795 .release = ub_bd_release,
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 858c34dd032..ad1ba393801 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -140,7 +140,6 @@ struct cardinfo {
140}; 140};
141 141
142static struct cardinfo cards[MM_MAXCARDS]; 142static struct cardinfo cards[MM_MAXCARDS];
143static struct block_device_operations mm_fops;
144static struct timer_list battery_timer; 143static struct timer_list battery_timer;
145 144
146static int num_cards; 145static int num_cards;
@@ -789,7 +788,7 @@ static int mm_check_change(struct gendisk *disk)
789 return 0; 788 return 0;
790} 789}
791 790
792static struct block_device_operations mm_fops = { 791static const struct block_device_operations mm_fops = {
793 .owner = THIS_MODULE, 792 .owner = THIS_MODULE,
794 .getgeo = mm_getgeo, 793 .getgeo = mm_getgeo,
795 .revalidate_disk = mm_revalidate, 794 .revalidate_disk = mm_revalidate,
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
index b441ce3832e..a8c8b56b275 100644
--- a/drivers/block/viodasd.c
+++ b/drivers/block/viodasd.c
@@ -219,7 +219,7 @@ static int viodasd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
219/* 219/*
220 * Our file operations table 220 * Our file operations table
221 */ 221 */
222static struct block_device_operations viodasd_fops = { 222static const struct block_device_operations viodasd_fops = {
223 .owner = THIS_MODULE, 223 .owner = THIS_MODULE,
224 .open = viodasd_open, 224 .open = viodasd_open,
225 .release = viodasd_release, 225 .release = viodasd_release,
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index aa1a3d5a3e2..aa89fe45237 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -243,7 +243,7 @@ static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
243 return 0; 243 return 0;
244} 244}
245 245
246static struct block_device_operations virtblk_fops = { 246static const struct block_device_operations virtblk_fops = {
247 .locked_ioctl = virtblk_ioctl, 247 .locked_ioctl = virtblk_ioctl,
248 .owner = THIS_MODULE, 248 .owner = THIS_MODULE,
249 .getgeo = virtblk_getgeo, 249 .getgeo = virtblk_getgeo,
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index ce242921992..0877d3628fd 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c
@@ -130,7 +130,7 @@ static struct gendisk *xd_gendisk[2];
130 130
131static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo); 131static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
132 132
133static struct block_device_operations xd_fops = { 133static const struct block_device_operations xd_fops = {
134 .owner = THIS_MODULE, 134 .owner = THIS_MODULE,
135 .locked_ioctl = xd_ioctl, 135 .locked_ioctl = xd_ioctl,
136 .getgeo = xd_getgeo, 136 .getgeo = xd_getgeo,
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index e53284767f7..b8578bb3f4c 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -65,7 +65,7 @@ struct blk_shadow {
65 unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 65 unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
66}; 66};
67 67
68static struct block_device_operations xlvbd_block_fops; 68static const struct block_device_operations xlvbd_block_fops;
69 69
70#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE) 70#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
71 71
@@ -1039,7 +1039,7 @@ static int blkif_release(struct gendisk *disk, fmode_t mode)
1039 return 0; 1039 return 0;
1040} 1040}
1041 1041
1042static struct block_device_operations xlvbd_block_fops = 1042static const struct block_device_operations xlvbd_block_fops =
1043{ 1043{
1044 .owner = THIS_MODULE, 1044 .owner = THIS_MODULE,
1045 .open = blkif_open, 1045 .open = blkif_open,
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index b20abe102a2..e5c5415eb45 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -941,7 +941,7 @@ static int ace_getgeo(struct block_device *bdev, struct hd_geometry *geo)
941 return 0; 941 return 0;
942} 942}
943 943
944static struct block_device_operations ace_fops = { 944static const struct block_device_operations ace_fops = {
945 .owner = THIS_MODULE, 945 .owner = THIS_MODULE,
946 .open = ace_open, 946 .open = ace_open,
947 .release = ace_release, 947 .release = ace_release,
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index b2590409f25..64f941e0f14 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -64,7 +64,6 @@ static int current_device = -1;
64 64
65static DEFINE_SPINLOCK(z2ram_lock); 65static DEFINE_SPINLOCK(z2ram_lock);
66 66
67static struct block_device_operations z2_fops;
68static struct gendisk *z2ram_gendisk; 67static struct gendisk *z2ram_gendisk;
69 68
70static void do_z2_request(struct request_queue *q) 69static void do_z2_request(struct request_queue *q)
@@ -315,7 +314,7 @@ z2_release(struct gendisk *disk, fmode_t mode)
315 return 0; 314 return 0;
316} 315}
317 316
318static struct block_device_operations z2_fops = 317static const struct block_device_operations z2_fops =
319{ 318{
320 .owner = THIS_MODULE, 319 .owner = THIS_MODULE,
321 .open = z2_open, 320 .open = z2_open,
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index b5621f27c4b..a762283d2a2 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -512,7 +512,7 @@ static int gdrom_bdops_ioctl(struct block_device *bdev, fmode_t mode,
512 return cdrom_ioctl(gd.cd_info, bdev, mode, cmd, arg); 512 return cdrom_ioctl(gd.cd_info, bdev, mode, cmd, arg);
513} 513}
514 514
515static struct block_device_operations gdrom_bdops = { 515static const struct block_device_operations gdrom_bdops = {
516 .owner = THIS_MODULE, 516 .owner = THIS_MODULE,
517 .open = gdrom_bdops_open, 517 .open = gdrom_bdops_open,
518 .release = gdrom_bdops_release, 518 .release = gdrom_bdops_release,
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index 0fff646cc2f..57ca69e0ac5 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -177,7 +177,7 @@ static int viocd_blk_media_changed(struct gendisk *disk)
177 return cdrom_media_changed(&di->viocd_info); 177 return cdrom_media_changed(&di->viocd_info);
178} 178}
179 179
180struct block_device_operations viocd_fops = { 180static const struct block_device_operations viocd_fops = {
181 .owner = THIS_MODULE, 181 .owner = THIS_MODULE,
182 .open = viocd_blk_open, 182 .open = viocd_blk_open,
183 .release = viocd_blk_release, 183 .release = viocd_blk_release,
diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c
index ad87753f6de..a56ca080e10 100644
--- a/drivers/char/agp/backend.c
+++ b/drivers/char/agp/backend.c
@@ -114,9 +114,9 @@ static int agp_find_max(void)
114 long memory, index, result; 114 long memory, index, result;
115 115
116#if PAGE_SHIFT < 20 116#if PAGE_SHIFT < 20
117 memory = num_physpages >> (20 - PAGE_SHIFT); 117 memory = totalram_pages >> (20 - PAGE_SHIFT);
118#else 118#else
119 memory = num_physpages << (PAGE_SHIFT - 20); 119 memory = totalram_pages << (PAGE_SHIFT - 20);
120#endif 120#endif
121 index = 1; 121 index = 1;
122 122
diff --git a/drivers/char/agp/uninorth-agp.c b/drivers/char/agp/uninorth-agp.c
index 20ef1bf5e72..703959eba45 100644
--- a/drivers/char/agp/uninorth-agp.c
+++ b/drivers/char/agp/uninorth-agp.c
@@ -270,7 +270,7 @@ static void uninorth_agp_enable(struct agp_bridge_data *bridge, u32 mode)
270 270
271 if ((uninorth_rev >= 0x30) && (uninorth_rev <= 0x33)) { 271 if ((uninorth_rev >= 0x30) && (uninorth_rev <= 0x33)) {
272 /* 272 /*
273 * We need to to set REQ_DEPTH to 7 for U3 versions 1.0, 2.1, 273 * We need to set REQ_DEPTH to 7 for U3 versions 1.0, 2.1,
274 * 2.2 and 2.3, Darwin do so. 274 * 2.2 and 2.3, Darwin do so.
275 */ 275 */
276 if ((command >> AGPSTAT_RQ_DEPTH_SHIFT) > 7) 276 if ((command >> AGPSTAT_RQ_DEPTH_SHIFT) > 7)
diff --git a/drivers/char/epca.c b/drivers/char/epca.c
index ff647ca1c48..9d589e3144d 100644
--- a/drivers/char/epca.c
+++ b/drivers/char/epca.c
@@ -2239,7 +2239,7 @@ static void do_softint(struct work_struct *work)
2239 struct channel *ch = container_of(work, struct channel, tqueue); 2239 struct channel *ch = container_of(work, struct channel, tqueue);
2240 /* Called in response to a modem change event */ 2240 /* Called in response to a modem change event */
2241 if (ch && ch->magic == EPCA_MAGIC) { 2241 if (ch && ch->magic == EPCA_MAGIC) {
2242 struct tty_struct *tty = tty_port_tty_get(&ch->port);; 2242 struct tty_struct *tty = tty_port_tty_get(&ch->port);
2243 2243
2244 if (tty && tty->driver_data) { 2244 if (tty && tty->driver_data) {
2245 if (test_and_clear_bit(EPCA_EVENT_HANGUP, &ch->event)) { 2245 if (test_and_clear_bit(EPCA_EVENT_HANGUP, &ch->event)) {
diff --git a/drivers/char/ipmi/ipmi_poweroff.c b/drivers/char/ipmi/ipmi_poweroff.c
index a261bd735df..2e66b5f773d 100644
--- a/drivers/char/ipmi/ipmi_poweroff.c
+++ b/drivers/char/ipmi/ipmi_poweroff.c
@@ -691,7 +691,7 @@ static struct ctl_table_header *ipmi_table_header;
691/* 691/*
692 * Startup and shutdown functions. 692 * Startup and shutdown functions.
693 */ 693 */
694static int ipmi_poweroff_init(void) 694static int __init ipmi_poweroff_init(void)
695{ 695{
696 int rv; 696 int rv;
697 697
@@ -725,7 +725,7 @@ static int ipmi_poweroff_init(void)
725} 725}
726 726
727#ifdef MODULE 727#ifdef MODULE
728static __exit void ipmi_poweroff_cleanup(void) 728static void __exit ipmi_poweroff_cleanup(void)
729{ 729{
730 int rv; 730 int rv;
731 731
diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c
index 881934c068c..c250a31efa5 100644
--- a/drivers/char/pcmcia/cm4000_cs.c
+++ b/drivers/char/pcmcia/cm4000_cs.c
@@ -1017,7 +1017,7 @@ static ssize_t cmm_read(struct file *filp, __user char *buf, size_t count,
1017 } 1017 }
1018 } 1018 }
1019 1019
1020 if (dev->proto == 0 && count > dev->rlen - dev->rpos) { 1020 if (dev->proto == 0 && count > dev->rlen - dev->rpos && i) {
1021 DEBUGP(4, dev, "T=0 and count > buffer\n"); 1021 DEBUGP(4, dev, "T=0 and count > buffer\n");
1022 dev->rbuf[i] = dev->rbuf[i - 1]; 1022 dev->rbuf[i] = dev->rbuf[i - 1];
1023 dev->rbuf[i - 1] = dev->procbyte; 1023 dev->rbuf[i - 1] = dev->procbyte;
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index f1df59f59a3..68104434ebb 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -2,8 +2,12 @@
2 * menu.c - the menu idle governor 2 * menu.c - the menu idle governor
3 * 3 *
4 * Copyright (C) 2006-2007 Adam Belay <abelay@novell.com> 4 * Copyright (C) 2006-2007 Adam Belay <abelay@novell.com>
5 * Copyright (C) 2009 Intel Corporation
6 * Author:
7 * Arjan van de Ven <arjan@linux.intel.com>
5 * 8 *
6 * This code is licenced under the GPL. 9 * This code is licenced under the GPL version 2 as described
10 * in the COPYING file that acompanies the Linux Kernel.
7 */ 11 */
8 12
9#include <linux/kernel.h> 13#include <linux/kernel.h>
@@ -13,22 +17,158 @@
13#include <linux/ktime.h> 17#include <linux/ktime.h>
14#include <linux/hrtimer.h> 18#include <linux/hrtimer.h>
15#include <linux/tick.h> 19#include <linux/tick.h>
20#include <linux/sched.h>
16 21
17#define BREAK_FUZZ 4 /* 4 us */ 22#define BUCKETS 12
18#define PRED_HISTORY_PCT 50 23#define RESOLUTION 1024
24#define DECAY 4
25#define MAX_INTERESTING 50000
26
27/*
28 * Concepts and ideas behind the menu governor
29 *
30 * For the menu governor, there are 3 decision factors for picking a C
31 * state:
32 * 1) Energy break even point
33 * 2) Performance impact
34 * 3) Latency tolerance (from pmqos infrastructure)
35 * These these three factors are treated independently.
36 *
37 * Energy break even point
38 * -----------------------
39 * C state entry and exit have an energy cost, and a certain amount of time in
40 * the C state is required to actually break even on this cost. CPUIDLE
41 * provides us this duration in the "target_residency" field. So all that we
42 * need is a good prediction of how long we'll be idle. Like the traditional
43 * menu governor, we start with the actual known "next timer event" time.
44 *
45 * Since there are other source of wakeups (interrupts for example) than
46 * the next timer event, this estimation is rather optimistic. To get a
47 * more realistic estimate, a correction factor is applied to the estimate,
48 * that is based on historic behavior. For example, if in the past the actual
49 * duration always was 50% of the next timer tick, the correction factor will
50 * be 0.5.
51 *
52 * menu uses a running average for this correction factor, however it uses a
53 * set of factors, not just a single factor. This stems from the realization
54 * that the ratio is dependent on the order of magnitude of the expected
55 * duration; if we expect 500 milliseconds of idle time the likelihood of
56 * getting an interrupt very early is much higher than if we expect 50 micro
57 * seconds of idle time. A second independent factor that has big impact on
58 * the actual factor is if there is (disk) IO outstanding or not.
59 * (as a special twist, we consider every sleep longer than 50 milliseconds
60 * as perfect; there are no power gains for sleeping longer than this)
61 *
62 * For these two reasons we keep an array of 12 independent factors, that gets
63 * indexed based on the magnitude of the expected duration as well as the
64 * "is IO outstanding" property.
65 *
66 * Limiting Performance Impact
67 * ---------------------------
68 * C states, especially those with large exit latencies, can have a real
69 * noticable impact on workloads, which is not acceptable for most sysadmins,
70 * and in addition, less performance has a power price of its own.
71 *
72 * As a general rule of thumb, menu assumes that the following heuristic
73 * holds:
74 * The busier the system, the less impact of C states is acceptable
75 *
76 * This rule-of-thumb is implemented using a performance-multiplier:
77 * If the exit latency times the performance multiplier is longer than
78 * the predicted duration, the C state is not considered a candidate
79 * for selection due to a too high performance impact. So the higher
80 * this multiplier is, the longer we need to be idle to pick a deep C
81 * state, and thus the less likely a busy CPU will hit such a deep
82 * C state.
83 *
84 * Two factors are used in determing this multiplier:
85 * a value of 10 is added for each point of "per cpu load average" we have.
86 * a value of 5 points is added for each process that is waiting for
87 * IO on this CPU.
88 * (these values are experimentally determined)
89 *
90 * The load average factor gives a longer term (few seconds) input to the
91 * decision, while the iowait value gives a cpu local instantanious input.
92 * The iowait factor may look low, but realize that this is also already
93 * represented in the system load average.
94 *
95 */
19 96
20struct menu_device { 97struct menu_device {
21 int last_state_idx; 98 int last_state_idx;
99 int needs_update;
22 100
23 unsigned int expected_us; 101 unsigned int expected_us;
24 unsigned int predicted_us; 102 u64 predicted_us;
25 unsigned int current_predicted_us; 103 unsigned int measured_us;
26 unsigned int last_measured_us; 104 unsigned int exit_us;
27 unsigned int elapsed_us; 105 unsigned int bucket;
106 u64 correction_factor[BUCKETS];
28}; 107};
29 108
109
110#define LOAD_INT(x) ((x) >> FSHIFT)
111#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
112
113static int get_loadavg(void)
114{
115 unsigned long this = this_cpu_load();
116
117
118 return LOAD_INT(this) * 10 + LOAD_FRAC(this) / 10;
119}
120
121static inline int which_bucket(unsigned int duration)
122{
123 int bucket = 0;
124
125 /*
126 * We keep two groups of stats; one with no
127 * IO pending, one without.
128 * This allows us to calculate
129 * E(duration)|iowait
130 */
131 if (nr_iowait_cpu())
132 bucket = BUCKETS/2;
133
134 if (duration < 10)
135 return bucket;
136 if (duration < 100)
137 return bucket + 1;
138 if (duration < 1000)
139 return bucket + 2;
140 if (duration < 10000)
141 return bucket + 3;
142 if (duration < 100000)
143 return bucket + 4;
144 return bucket + 5;
145}
146
147/*
148 * Return a multiplier for the exit latency that is intended
149 * to take performance requirements into account.
150 * The more performance critical we estimate the system
151 * to be, the higher this multiplier, and thus the higher
152 * the barrier to go to an expensive C state.
153 */
154static inline int performance_multiplier(void)
155{
156 int mult = 1;
157
158 /* for higher loadavg, we are more reluctant */
159
160 mult += 2 * get_loadavg();
161
162 /* for IO wait tasks (per cpu!) we add 5x each */
163 mult += 10 * nr_iowait_cpu();
164
165 return mult;
166}
167
30static DEFINE_PER_CPU(struct menu_device, menu_devices); 168static DEFINE_PER_CPU(struct menu_device, menu_devices);
31 169
170static void menu_update(struct cpuidle_device *dev);
171
32/** 172/**
33 * menu_select - selects the next idle state to enter 173 * menu_select - selects the next idle state to enter
34 * @dev: the CPU 174 * @dev: the CPU
@@ -38,41 +178,68 @@ static int menu_select(struct cpuidle_device *dev)
38 struct menu_device *data = &__get_cpu_var(menu_devices); 178 struct menu_device *data = &__get_cpu_var(menu_devices);
39 int latency_req = pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY); 179 int latency_req = pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY);
40 int i; 180 int i;
181 int multiplier;
182
183 data->last_state_idx = 0;
184 data->exit_us = 0;
185
186 if (data->needs_update) {
187 menu_update(dev);
188 data->needs_update = 0;
189 }
41 190
42 /* Special case when user has set very strict latency requirement */ 191 /* Special case when user has set very strict latency requirement */
43 if (unlikely(latency_req == 0)) { 192 if (unlikely(latency_req == 0))
44 data->last_state_idx = 0;
45 return 0; 193 return 0;
46 }
47 194
48 /* determine the expected residency time */ 195 /* determine the expected residency time, round up */
49 data->expected_us = 196 data->expected_us =
50 (u32) ktime_to_ns(tick_nohz_get_sleep_length()) / 1000; 197 DIV_ROUND_UP((u32)ktime_to_ns(tick_nohz_get_sleep_length()), 1000);
198
199
200 data->bucket = which_bucket(data->expected_us);
201
202 multiplier = performance_multiplier();
203
204 /*
205 * if the correction factor is 0 (eg first time init or cpu hotplug
206 * etc), we actually want to start out with a unity factor.
207 */
208 if (data->correction_factor[data->bucket] == 0)
209 data->correction_factor[data->bucket] = RESOLUTION * DECAY;
210
211 /* Make sure to round up for half microseconds */
212 data->predicted_us = DIV_ROUND_CLOSEST(
213 data->expected_us * data->correction_factor[data->bucket],
214 RESOLUTION * DECAY);
215
216 /*
217 * We want to default to C1 (hlt), not to busy polling
218 * unless the timer is happening really really soon.
219 */
220 if (data->expected_us > 5)
221 data->last_state_idx = CPUIDLE_DRIVER_STATE_START;
51 222
52 /* Recalculate predicted_us based on prediction_history_pct */
53 data->predicted_us *= PRED_HISTORY_PCT;
54 data->predicted_us += (100 - PRED_HISTORY_PCT) *
55 data->current_predicted_us;
56 data->predicted_us /= 100;
57 223
58 /* find the deepest idle state that satisfies our constraints */ 224 /* find the deepest idle state that satisfies our constraints */
59 for (i = CPUIDLE_DRIVER_STATE_START + 1; i < dev->state_count; i++) { 225 for (i = CPUIDLE_DRIVER_STATE_START; i < dev->state_count; i++) {
60 struct cpuidle_state *s = &dev->states[i]; 226 struct cpuidle_state *s = &dev->states[i];
61 227
62 if (s->target_residency > data->expected_us)
63 break;
64 if (s->target_residency > data->predicted_us) 228 if (s->target_residency > data->predicted_us)
65 break; 229 break;
66 if (s->exit_latency > latency_req) 230 if (s->exit_latency > latency_req)
67 break; 231 break;
232 if (s->exit_latency * multiplier > data->predicted_us)
233 break;
234 data->exit_us = s->exit_latency;
235 data->last_state_idx = i;
68 } 236 }
69 237
70 data->last_state_idx = i - 1; 238 return data->last_state_idx;
71 return i - 1;
72} 239}
73 240
74/** 241/**
75 * menu_reflect - attempts to guess what happened after entry 242 * menu_reflect - records that data structures need update
76 * @dev: the CPU 243 * @dev: the CPU
77 * 244 *
78 * NOTE: it's important to be fast here because this operation will add to 245 * NOTE: it's important to be fast here because this operation will add to
@@ -81,39 +248,63 @@ static int menu_select(struct cpuidle_device *dev)
81static void menu_reflect(struct cpuidle_device *dev) 248static void menu_reflect(struct cpuidle_device *dev)
82{ 249{
83 struct menu_device *data = &__get_cpu_var(menu_devices); 250 struct menu_device *data = &__get_cpu_var(menu_devices);
251 data->needs_update = 1;
252}
253
254/**
255 * menu_update - attempts to guess what happened after entry
256 * @dev: the CPU
257 */
258static void menu_update(struct cpuidle_device *dev)
259{
260 struct menu_device *data = &__get_cpu_var(menu_devices);
84 int last_idx = data->last_state_idx; 261 int last_idx = data->last_state_idx;
85 unsigned int last_idle_us = cpuidle_get_last_residency(dev); 262 unsigned int last_idle_us = cpuidle_get_last_residency(dev);
86 struct cpuidle_state *target = &dev->states[last_idx]; 263 struct cpuidle_state *target = &dev->states[last_idx];
87 unsigned int measured_us; 264 unsigned int measured_us;
265 u64 new_factor;
88 266
89 /* 267 /*
90 * Ugh, this idle state doesn't support residency measurements, so we 268 * Ugh, this idle state doesn't support residency measurements, so we
91 * are basically lost in the dark. As a compromise, assume we slept 269 * are basically lost in the dark. As a compromise, assume we slept
92 * for one full standard timer tick. However, be aware that this 270 * for the whole expected time.
93 * could potentially result in a suboptimal state transition.
94 */ 271 */
95 if (unlikely(!(target->flags & CPUIDLE_FLAG_TIME_VALID))) 272 if (unlikely(!(target->flags & CPUIDLE_FLAG_TIME_VALID)))
96 last_idle_us = USEC_PER_SEC / HZ; 273 last_idle_us = data->expected_us;
274
275
276 measured_us = last_idle_us;
97 277
98 /* 278 /*
99 * measured_us and elapsed_us are the cumulative idle time, since the 279 * We correct for the exit latency; we are assuming here that the
100 * last time we were woken out of idle by an interrupt. 280 * exit latency happens after the event that we're interested in.
101 */ 281 */
102 if (data->elapsed_us <= data->elapsed_us + last_idle_us) 282 if (measured_us > data->exit_us)
103 measured_us = data->elapsed_us + last_idle_us; 283 measured_us -= data->exit_us;
284
285
286 /* update our correction ratio */
287
288 new_factor = data->correction_factor[data->bucket]
289 * (DECAY - 1) / DECAY;
290
291 if (data->expected_us > 0 && data->measured_us < MAX_INTERESTING)
292 new_factor += RESOLUTION * measured_us / data->expected_us;
104 else 293 else
105 measured_us = -1; 294 /*
295 * we were idle so long that we count it as a perfect
296 * prediction
297 */
298 new_factor += RESOLUTION;
106 299
107 /* Predict time until next break event */ 300 /*
108 data->current_predicted_us = max(measured_us, data->last_measured_us); 301 * We don't want 0 as factor; we always want at least
302 * a tiny bit of estimated time.
303 */
304 if (new_factor == 0)
305 new_factor = 1;
109 306
110 if (last_idle_us + BREAK_FUZZ < 307 data->correction_factor[data->bucket] = new_factor;
111 data->expected_us - target->exit_latency) {
112 data->last_measured_us = measured_us;
113 data->elapsed_us = 0;
114 } else {
115 data->elapsed_us = measured_us;
116 }
117} 308}
118 309
119/** 310/**
diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index 871c13b4c14..12f355cafdb 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -286,7 +286,7 @@ enum scrub_type {
286 * is irrespective of the memory devices being mounted 286 * is irrespective of the memory devices being mounted
287 * on both sides of the memory stick. 287 * on both sides of the memory stick.
288 * 288 *
289 * Socket set: All of the memory sticks that are required for for 289 * Socket set: All of the memory sticks that are required for
290 * a single memory access or all of the memory sticks 290 * a single memory access or all of the memory sticks
291 * spanned by a chip-select row. A single socket set 291 * spanned by a chip-select row. A single socket set
292 * has two chip-select rows and if double-sided sticks 292 * has two chip-select rows and if double-sided sticks
diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
index d5ea8a68d33..56f9234781f 100644
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -164,7 +164,7 @@ int __init firmware_map_add_early(u64 start, u64 end, const char *type)
164{ 164{
165 struct firmware_map_entry *entry; 165 struct firmware_map_entry *entry;
166 166
167 entry = alloc_bootmem_low(sizeof(struct firmware_map_entry)); 167 entry = alloc_bootmem(sizeof(struct firmware_map_entry));
168 if (WARN_ON(!entry)) 168 if (WARN_ON(!entry))
169 return -ENOMEM; 169 return -ENOMEM;
170 170
diff --git a/drivers/gpio/bt8xxgpio.c b/drivers/gpio/bt8xxgpio.c
index 984b587f0f9..55904140213 100644
--- a/drivers/gpio/bt8xxgpio.c
+++ b/drivers/gpio/bt8xxgpio.c
@@ -331,13 +331,13 @@ static struct pci_driver bt8xxgpio_pci_driver = {
331 .resume = bt8xxgpio_resume, 331 .resume = bt8xxgpio_resume,
332}; 332};
333 333
334static int bt8xxgpio_init(void) 334static int __init bt8xxgpio_init(void)
335{ 335{
336 return pci_register_driver(&bt8xxgpio_pci_driver); 336 return pci_register_driver(&bt8xxgpio_pci_driver);
337} 337}
338module_init(bt8xxgpio_init) 338module_init(bt8xxgpio_init)
339 339
340static void bt8xxgpio_exit(void) 340static void __exit bt8xxgpio_exit(void)
341{ 341{
342 pci_unregister_driver(&bt8xxgpio_pci_driver); 342 pci_unregister_driver(&bt8xxgpio_pci_driver);
343} 343}
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 2b914d73207..f4856a51047 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -232,7 +232,7 @@ intel_dp_aux_ch(struct intel_output *intel_output,
232 for (try = 0; try < 5; try++) { 232 for (try = 0; try < 5; try++) {
233 /* Load the send data into the aux channel data registers */ 233 /* Load the send data into the aux channel data registers */
234 for (i = 0; i < send_bytes; i += 4) { 234 for (i = 0; i < send_bytes; i += 4) {
235 uint32_t d = pack_aux(send + i, send_bytes - i);; 235 uint32_t d = pack_aux(send + i, send_bytes - i);
236 236
237 I915_WRITE(ch_data + i, d); 237 I915_WRITE(ch_data + i, d);
238 } 238 }
diff --git a/drivers/gpu/drm/mga/mga_state.c b/drivers/gpu/drm/mga/mga_state.c
index b710fab21cb..a53b848e0f1 100644
--- a/drivers/gpu/drm/mga/mga_state.c
+++ b/drivers/gpu/drm/mga/mga_state.c
@@ -239,7 +239,7 @@ static __inline__ void mga_g200_emit_pipe(drm_mga_private_t * dev_priv)
239 MGA_WR34, 0x00000000, 239 MGA_WR34, 0x00000000,
240 MGA_WR42, 0x0000ffff, MGA_WR60, 0x0000ffff); 240 MGA_WR42, 0x0000ffff, MGA_WR60, 0x0000ffff);
241 241
242 /* Padding required to to hardware bug. 242 /* Padding required due to hardware bug.
243 */ 243 */
244 DMA_BLOCK(MGA_DMAPAD, 0xffffffff, 244 DMA_BLOCK(MGA_DMAPAD, 0xffffffff,
245 MGA_DMAPAD, 0xffffffff, 245 MGA_DMAPAD, 0xffffffff,
@@ -317,7 +317,7 @@ static __inline__ void mga_g400_emit_pipe(drm_mga_private_t * dev_priv)
317 MGA_WR52, MGA_G400_WR_MAGIC, /* tex1 width */ 317 MGA_WR52, MGA_G400_WR_MAGIC, /* tex1 width */
318 MGA_WR60, MGA_G400_WR_MAGIC); /* tex1 height */ 318 MGA_WR60, MGA_G400_WR_MAGIC); /* tex1 height */
319 319
320 /* Padding required to to hardware bug */ 320 /* Padding required due to hardware bug */
321 DMA_BLOCK(MGA_DMAPAD, 0xffffffff, 321 DMA_BLOCK(MGA_DMAPAD, 0xffffffff,
322 MGA_DMAPAD, 0xffffffff, 322 MGA_DMAPAD, 0xffffffff,
323 MGA_DMAPAD, 0xffffffff, 323 MGA_DMAPAD, 0xffffffff,
diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 111afbe8de0..24d90ea246c 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -205,13 +205,6 @@ config HID_NTRIG
205 Support for N-Trig touch screen. 205 Support for N-Trig touch screen.
206 206
207config HID_PANTHERLORD 207config HID_PANTHERLORD
208 tristate "Pantherlord devices support" if EMBEDDED
209 depends on USB_HID
210 default !EMBEDDED
211 ---help---
212 Support for PantherLord/GreenAsia based device support.
213
214config HID_PANTHERLORD
215 tristate "Pantherlord support" if EMBEDDED 208 tristate "Pantherlord support" if EMBEDDED
216 depends on USB_HID 209 depends on USB_HID
217 default !EMBEDDED 210 default !EMBEDDED
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 342b7d36d7b..be34d32906b 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1089,8 +1089,7 @@ int hid_input_report(struct hid_device *hid, int type, u8 *data, int size, int i
1089 return -1; 1089 return -1;
1090 } 1090 }
1091 1091
1092 buf = kmalloc(sizeof(char) * HID_DEBUG_BUFSIZE, 1092 buf = kmalloc(sizeof(char) * HID_DEBUG_BUFSIZE, GFP_ATOMIC);
1093 interrupt ? GFP_ATOMIC : GFP_KERNEL);
1094 1093
1095 if (!buf) { 1094 if (!buf) {
1096 report = hid_get_report(report_enum, data); 1095 report = hid_get_report(report_enum, data);
@@ -1238,6 +1237,17 @@ int hid_connect(struct hid_device *hdev, unsigned int connect_mask)
1238} 1237}
1239EXPORT_SYMBOL_GPL(hid_connect); 1238EXPORT_SYMBOL_GPL(hid_connect);
1240 1239
1240void hid_disconnect(struct hid_device *hdev)
1241{
1242 if (hdev->claimed & HID_CLAIMED_INPUT)
1243 hidinput_disconnect(hdev);
1244 if (hdev->claimed & HID_CLAIMED_HIDDEV)
1245 hdev->hiddev_disconnect(hdev);
1246 if (hdev->claimed & HID_CLAIMED_HIDRAW)
1247 hidraw_disconnect(hdev);
1248}
1249EXPORT_SYMBOL_GPL(hid_disconnect);
1250
1241/* a list of devices for which there is a specialized driver on HID bus */ 1251/* a list of devices for which there is a specialized driver on HID bus */
1242static const struct hid_device_id hid_blacklist[] = { 1252static const struct hid_device_id hid_blacklist[] = {
1243 { HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_WCP32PU) }, 1253 { HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_WCP32PU) },
diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index 1b0e07a67d6..03bd703255a 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -1041,13 +1041,6 @@ static void usbhid_stop(struct hid_device *hid)
1041 1041
1042 hid_cancel_delayed_stuff(usbhid); 1042 hid_cancel_delayed_stuff(usbhid);
1043 1043
1044 if (hid->claimed & HID_CLAIMED_INPUT)
1045 hidinput_disconnect(hid);
1046 if (hid->claimed & HID_CLAIMED_HIDDEV)
1047 hiddev_disconnect(hid);
1048 if (hid->claimed & HID_CLAIMED_HIDRAW)
1049 hidraw_disconnect(hid);
1050
1051 hid->claimed = 0; 1044 hid->claimed = 0;
1052 1045
1053 usb_free_urb(usbhid->urbin); 1046 usb_free_urb(usbhid->urbin);
@@ -1085,7 +1078,7 @@ static struct hid_ll_driver usb_hid_driver = {
1085 .hidinput_input_event = usb_hidinput_input_event, 1078 .hidinput_input_event = usb_hidinput_input_event,
1086}; 1079};
1087 1080
1088static int hid_probe(struct usb_interface *intf, const struct usb_device_id *id) 1081static int usbhid_probe(struct usb_interface *intf, const struct usb_device_id *id)
1089{ 1082{
1090 struct usb_host_interface *interface = intf->cur_altsetting; 1083 struct usb_host_interface *interface = intf->cur_altsetting;
1091 struct usb_device *dev = interface_to_usbdev(intf); 1084 struct usb_device *dev = interface_to_usbdev(intf);
@@ -1117,6 +1110,7 @@ static int hid_probe(struct usb_interface *intf, const struct usb_device_id *id)
1117 hid->ff_init = hid_pidff_init; 1110 hid->ff_init = hid_pidff_init;
1118#ifdef CONFIG_USB_HIDDEV 1111#ifdef CONFIG_USB_HIDDEV
1119 hid->hiddev_connect = hiddev_connect; 1112 hid->hiddev_connect = hiddev_connect;
1113 hid->hiddev_disconnect = hiddev_disconnect;
1120 hid->hiddev_hid_event = hiddev_hid_event; 1114 hid->hiddev_hid_event = hiddev_hid_event;
1121 hid->hiddev_report_event = hiddev_report_event; 1115 hid->hiddev_report_event = hiddev_report_event;
1122#endif 1116#endif
@@ -1177,7 +1171,7 @@ err:
1177 return ret; 1171 return ret;
1178} 1172}
1179 1173
1180static void hid_disconnect(struct usb_interface *intf) 1174static void usbhid_disconnect(struct usb_interface *intf)
1181{ 1175{
1182 struct hid_device *hid = usb_get_intfdata(intf); 1176 struct hid_device *hid = usb_get_intfdata(intf);
1183 struct usbhid_device *usbhid; 1177 struct usbhid_device *usbhid;
@@ -1359,8 +1353,8 @@ MODULE_DEVICE_TABLE (usb, hid_usb_ids);
1359 1353
1360static struct usb_driver hid_driver = { 1354static struct usb_driver hid_driver = {
1361 .name = "usbhid", 1355 .name = "usbhid",
1362 .probe = hid_probe, 1356 .probe = usbhid_probe,
1363 .disconnect = hid_disconnect, 1357 .disconnect = usbhid_disconnect,
1364#ifdef CONFIG_PM 1358#ifdef CONFIG_PM
1365 .suspend = hid_suspend, 1359 .suspend = hid_suspend,
1366 .resume = hid_resume, 1360 .resume = hid_resume,
diff --git a/drivers/hwmon/adm1021.c b/drivers/hwmon/adm1021.c
index b11e06f644b..afc59431812 100644
--- a/drivers/hwmon/adm1021.c
+++ b/drivers/hwmon/adm1021.c
@@ -83,16 +83,14 @@ struct adm1021_data {
83 83
84 struct mutex update_lock; 84 struct mutex update_lock;
85 char valid; /* !=0 if following fields are valid */ 85 char valid; /* !=0 if following fields are valid */
86 char low_power; /* !=0 if device in low power mode */
86 unsigned long last_updated; /* In jiffies */ 87 unsigned long last_updated; /* In jiffies */
87 88
88 s8 temp_max[2]; /* Register values */ 89 int temp_max[2]; /* Register values */
89 s8 temp_min[2]; 90 int temp_min[2];
90 s8 temp[2]; 91 int temp[2];
91 u8 alarms; 92 u8 alarms;
92 /* Special values for ADM1023 only */ 93 /* Special values for ADM1023 only */
93 u8 remote_temp_prec;
94 u8 remote_temp_os_prec;
95 u8 remote_temp_hyst_prec;
96 u8 remote_temp_offset; 94 u8 remote_temp_offset;
97 u8 remote_temp_offset_prec; 95 u8 remote_temp_offset_prec;
98}; 96};
@@ -141,7 +139,7 @@ static ssize_t show_temp(struct device *dev,
141 int index = to_sensor_dev_attr(devattr)->index; 139 int index = to_sensor_dev_attr(devattr)->index;
142 struct adm1021_data *data = adm1021_update_device(dev); 140 struct adm1021_data *data = adm1021_update_device(dev);
143 141
144 return sprintf(buf, "%d\n", 1000 * data->temp[index]); 142 return sprintf(buf, "%d\n", data->temp[index]);
145} 143}
146 144
147static ssize_t show_temp_max(struct device *dev, 145static ssize_t show_temp_max(struct device *dev,
@@ -150,7 +148,7 @@ static ssize_t show_temp_max(struct device *dev,
150 int index = to_sensor_dev_attr(devattr)->index; 148 int index = to_sensor_dev_attr(devattr)->index;
151 struct adm1021_data *data = adm1021_update_device(dev); 149 struct adm1021_data *data = adm1021_update_device(dev);
152 150
153 return sprintf(buf, "%d\n", 1000 * data->temp_max[index]); 151 return sprintf(buf, "%d\n", data->temp_max[index]);
154} 152}
155 153
156static ssize_t show_temp_min(struct device *dev, 154static ssize_t show_temp_min(struct device *dev,
@@ -159,7 +157,7 @@ static ssize_t show_temp_min(struct device *dev,
159 int index = to_sensor_dev_attr(devattr)->index; 157 int index = to_sensor_dev_attr(devattr)->index;
160 struct adm1021_data *data = adm1021_update_device(dev); 158 struct adm1021_data *data = adm1021_update_device(dev);
161 159
162 return sprintf(buf, "%d\n", 1000 * data->temp_min[index]); 160 return sprintf(buf, "%d\n", data->temp_min[index]);
163} 161}
164 162
165static ssize_t show_alarm(struct device *dev, struct device_attribute *attr, 163static ssize_t show_alarm(struct device *dev, struct device_attribute *attr,
@@ -216,6 +214,35 @@ static ssize_t set_temp_min(struct device *dev,
216 return count; 214 return count;
217} 215}
218 216
217static ssize_t show_low_power(struct device *dev,
218 struct device_attribute *devattr, char *buf)
219{
220 struct adm1021_data *data = adm1021_update_device(dev);
221 return sprintf(buf, "%d\n", data->low_power);
222}
223
224static ssize_t set_low_power(struct device *dev,
225 struct device_attribute *devattr,
226 const char *buf, size_t count)
227{
228 struct i2c_client *client = to_i2c_client(dev);
229 struct adm1021_data *data = i2c_get_clientdata(client);
230 int low_power = simple_strtol(buf, NULL, 10) != 0;
231
232 mutex_lock(&data->update_lock);
233 if (low_power != data->low_power) {
234 int config = i2c_smbus_read_byte_data(
235 client, ADM1021_REG_CONFIG_R);
236 data->low_power = low_power;
237 i2c_smbus_write_byte_data(client, ADM1021_REG_CONFIG_W,
238 (config & 0xBF) | (low_power << 6));
239 }
240 mutex_unlock(&data->update_lock);
241
242 return count;
243}
244
245
219static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, show_temp, NULL, 0); 246static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, show_temp, NULL, 0);
220static SENSOR_DEVICE_ATTR(temp1_max, S_IWUSR | S_IRUGO, show_temp_max, 247static SENSOR_DEVICE_ATTR(temp1_max, S_IWUSR | S_IRUGO, show_temp_max,
221 set_temp_max, 0); 248 set_temp_max, 0);
@@ -233,6 +260,7 @@ static SENSOR_DEVICE_ATTR(temp2_min_alarm, S_IRUGO, show_alarm, NULL, 3);
233static SENSOR_DEVICE_ATTR(temp2_fault, S_IRUGO, show_alarm, NULL, 2); 260static SENSOR_DEVICE_ATTR(temp2_fault, S_IRUGO, show_alarm, NULL, 2);
234 261
235static DEVICE_ATTR(alarms, S_IRUGO, show_alarms, NULL); 262static DEVICE_ATTR(alarms, S_IRUGO, show_alarms, NULL);
263static DEVICE_ATTR(low_power, S_IWUSR | S_IRUGO, show_low_power, set_low_power);
236 264
237static struct attribute *adm1021_attributes[] = { 265static struct attribute *adm1021_attributes[] = {
238 &sensor_dev_attr_temp1_max.dev_attr.attr, 266 &sensor_dev_attr_temp1_max.dev_attr.attr,
@@ -247,6 +275,7 @@ static struct attribute *adm1021_attributes[] = {
247 &sensor_dev_attr_temp2_min_alarm.dev_attr.attr, 275 &sensor_dev_attr_temp2_min_alarm.dev_attr.attr,
248 &sensor_dev_attr_temp2_fault.dev_attr.attr, 276 &sensor_dev_attr_temp2_fault.dev_attr.attr,
249 &dev_attr_alarms.attr, 277 &dev_attr_alarms.attr,
278 &dev_attr_low_power.attr,
250 NULL 279 NULL
251}; 280};
252 281
@@ -412,25 +441,27 @@ static struct adm1021_data *adm1021_update_device(struct device *dev)
412 dev_dbg(&client->dev, "Starting adm1021 update\n"); 441 dev_dbg(&client->dev, "Starting adm1021 update\n");
413 442
414 for (i = 0; i < 2; i++) { 443 for (i = 0; i < 2; i++) {
415 data->temp[i] = i2c_smbus_read_byte_data(client, 444 data->temp[i] = 1000 *
416 ADM1021_REG_TEMP(i)); 445 (s8) i2c_smbus_read_byte_data(
417 data->temp_max[i] = i2c_smbus_read_byte_data(client, 446 client, ADM1021_REG_TEMP(i));
418 ADM1021_REG_TOS_R(i)); 447 data->temp_max[i] = 1000 *
419 data->temp_min[i] = i2c_smbus_read_byte_data(client, 448 (s8) i2c_smbus_read_byte_data(
420 ADM1021_REG_THYST_R(i)); 449 client, ADM1021_REG_TOS_R(i));
450 data->temp_min[i] = 1000 *
451 (s8) i2c_smbus_read_byte_data(
452 client, ADM1021_REG_THYST_R(i));
421 } 453 }
422 data->alarms = i2c_smbus_read_byte_data(client, 454 data->alarms = i2c_smbus_read_byte_data(client,
423 ADM1021_REG_STATUS) & 0x7c; 455 ADM1021_REG_STATUS) & 0x7c;
424 if (data->type == adm1023) { 456 if (data->type == adm1023) {
425 data->remote_temp_prec = 457 /* The ADM1023 provides 3 extra bits of precision for
426 i2c_smbus_read_byte_data(client, 458 * the remote sensor in extra registers. */
427 ADM1023_REG_REM_TEMP_PREC); 459 data->temp[1] += 125 * (i2c_smbus_read_byte_data(
428 data->remote_temp_os_prec = 460 client, ADM1023_REG_REM_TEMP_PREC) >> 5);
429 i2c_smbus_read_byte_data(client, 461 data->temp_max[1] += 125 * (i2c_smbus_read_byte_data(
430 ADM1023_REG_REM_TOS_PREC); 462 client, ADM1023_REG_REM_TOS_PREC) >> 5);
431 data->remote_temp_hyst_prec = 463 data->temp_min[1] += 125 * (i2c_smbus_read_byte_data(
432 i2c_smbus_read_byte_data(client, 464 client, ADM1023_REG_REM_THYST_PREC) >> 5);
433 ADM1023_REG_REM_THYST_PREC);
434 data->remote_temp_offset = 465 data->remote_temp_offset =
435 i2c_smbus_read_byte_data(client, 466 i2c_smbus_read_byte_data(client,
436 ADM1023_REG_REM_OFFSET); 467 ADM1023_REG_REM_OFFSET);
diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
index 753b34885f9..7ea6a8f6605 100644
--- a/drivers/hwmon/applesmc.c
+++ b/drivers/hwmon/applesmc.c
@@ -178,6 +178,8 @@ static const int debug;
178static struct platform_device *pdev; 178static struct platform_device *pdev;
179static s16 rest_x; 179static s16 rest_x;
180static s16 rest_y; 180static s16 rest_y;
181static u8 backlight_state[2];
182
181static struct device *hwmon_dev; 183static struct device *hwmon_dev;
182static struct input_polled_dev *applesmc_idev; 184static struct input_polled_dev *applesmc_idev;
183 185
@@ -497,17 +499,36 @@ static int applesmc_probe(struct platform_device *dev)
497 return 0; 499 return 0;
498} 500}
499 501
500static int applesmc_resume(struct platform_device *dev) 502/* Synchronize device with memorized backlight state */
503static int applesmc_pm_resume(struct device *dev)
501{ 504{
502 return applesmc_device_init(); 505 mutex_lock(&applesmc_lock);
506 if (applesmc_light)
507 applesmc_write_key(BACKLIGHT_KEY, backlight_state, 2);
508 mutex_unlock(&applesmc_lock);
509 return 0;
503} 510}
504 511
512/* Reinitialize device on resume from hibernation */
513static int applesmc_pm_restore(struct device *dev)
514{
515 int ret = applesmc_device_init();
516 if (ret)
517 return ret;
518 return applesmc_pm_resume(dev);
519}
520
521static struct dev_pm_ops applesmc_pm_ops = {
522 .resume = applesmc_pm_resume,
523 .restore = applesmc_pm_restore,
524};
525
505static struct platform_driver applesmc_driver = { 526static struct platform_driver applesmc_driver = {
506 .probe = applesmc_probe, 527 .probe = applesmc_probe,
507 .resume = applesmc_resume,
508 .driver = { 528 .driver = {
509 .name = "applesmc", 529 .name = "applesmc",
510 .owner = THIS_MODULE, 530 .owner = THIS_MODULE,
531 .pm = &applesmc_pm_ops,
511 }, 532 },
512}; 533};
513 534
@@ -804,17 +825,10 @@ static ssize_t applesmc_calibrate_store(struct device *dev,
804 return count; 825 return count;
805} 826}
806 827
807/* Store the next backlight value to be written by the work */
808static unsigned int backlight_value;
809
810static void applesmc_backlight_set(struct work_struct *work) 828static void applesmc_backlight_set(struct work_struct *work)
811{ 829{
812 u8 buffer[2];
813
814 mutex_lock(&applesmc_lock); 830 mutex_lock(&applesmc_lock);
815 buffer[0] = backlight_value; 831 applesmc_write_key(BACKLIGHT_KEY, backlight_state, 2);
816 buffer[1] = 0x00;
817 applesmc_write_key(BACKLIGHT_KEY, buffer, 2);
818 mutex_unlock(&applesmc_lock); 832 mutex_unlock(&applesmc_lock);
819} 833}
820static DECLARE_WORK(backlight_work, &applesmc_backlight_set); 834static DECLARE_WORK(backlight_work, &applesmc_backlight_set);
@@ -824,7 +838,7 @@ static void applesmc_brightness_set(struct led_classdev *led_cdev,
824{ 838{
825 int ret; 839 int ret;
826 840
827 backlight_value = value; 841 backlight_state[0] = value;
828 ret = queue_work(applesmc_led_wq, &backlight_work); 842 ret = queue_work(applesmc_led_wq, &backlight_work);
829 843
830 if (debug && (!ret)) 844 if (debug && (!ret))
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 93c17223b52..972cf4ba963 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -185,7 +185,7 @@ static int __devinit adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *
185 } 185 }
186 } 186 }
187 187
188 if (ismobile) { 188 if (ismobile || c->x86_model == 0x1c) {
189 189
190 err = rdmsr_safe_on_cpu(id, 0xee, &eax, &edx); 190 err = rdmsr_safe_on_cpu(id, 0xee, &eax, &edx);
191 if (err) { 191 if (err) {
@@ -417,7 +417,7 @@ static int __init coretemp_init(void)
417 if ((c->cpuid_level < 0) || (c->x86 != 0x6) || 417 if ((c->cpuid_level < 0) || (c->x86 != 0x6) ||
418 !((c->x86_model == 0xe) || (c->x86_model == 0xf) || 418 !((c->x86_model == 0xe) || (c->x86_model == 0xf) ||
419 (c->x86_model == 0x16) || (c->x86_model == 0x17) || 419 (c->x86_model == 0x16) || (c->x86_model == 0x17) ||
420 (c->x86_model == 0x1A))) { 420 (c->x86_model == 0x1A) || (c->x86_model == 0x1c))) {
421 421
422 /* supported CPU not found, but report the unknown 422 /* supported CPU not found, but report the unknown
423 family 6 CPU */ 423 family 6 CPU */
diff --git a/drivers/hwmon/lis3lv02d.c b/drivers/hwmon/lis3lv02d.c
index 271338bdb6b..cf5afb9a10a 100644
--- a/drivers/hwmon/lis3lv02d.c
+++ b/drivers/hwmon/lis3lv02d.c
@@ -454,6 +454,15 @@ int lis3lv02d_init_device(struct lis3lv02d *dev)
454 (p->click_thresh_y << 4)); 454 (p->click_thresh_y << 4));
455 } 455 }
456 456
457 if (p->wakeup_flags && (dev->whoami == LIS_SINGLE_ID)) {
458 dev->write(dev, FF_WU_CFG_1, p->wakeup_flags);
459 dev->write(dev, FF_WU_THS_1, p->wakeup_thresh & 0x7f);
460 /* default to 2.5ms for now */
461 dev->write(dev, FF_WU_DURATION_1, 1);
462 /* enable high pass filter for both free-fall units */
463 dev->write(dev, CTRL_REG2, HP_FF_WU1 | HP_FF_WU2);
464 }
465
457 if (p->irq_cfg) 466 if (p->irq_cfg)
458 dev->write(dev, CTRL_REG3, p->irq_cfg); 467 dev->write(dev, CTRL_REG3, p->irq_cfg);
459 } 468 }
diff --git a/drivers/hwmon/lis3lv02d.h b/drivers/hwmon/lis3lv02d.h
index e320e2f511f..3e1ff46f72d 100644
--- a/drivers/hwmon/lis3lv02d.h
+++ b/drivers/hwmon/lis3lv02d.h
@@ -58,15 +58,17 @@ enum lis3_reg {
58 OUTZ_L = 0x2C, 58 OUTZ_L = 0x2C,
59 OUTZ_H = 0x2D, 59 OUTZ_H = 0x2D,
60 OUTZ = 0x2D, 60 OUTZ = 0x2D,
61 FF_WU_CFG = 0x30,
62 FF_WU_SRC = 0x31,
63 FF_WU_ACK = 0x32,
64 FF_WU_THS_L = 0x34,
65 FF_WU_THS_H = 0x35,
66 FF_WU_DURATION = 0x36,
67}; 61};
68 62
69enum lis302d_reg { 63enum lis302d_reg {
64 FF_WU_CFG_1 = 0x30,
65 FF_WU_SRC_1 = 0x31,
66 FF_WU_THS_1 = 0x32,
67 FF_WU_DURATION_1 = 0x33,
68 FF_WU_CFG_2 = 0x34,
69 FF_WU_SRC_2 = 0x35,
70 FF_WU_THS_2 = 0x36,
71 FF_WU_DURATION_2 = 0x37,
70 CLICK_CFG = 0x38, 72 CLICK_CFG = 0x38,
71 CLICK_SRC = 0x39, 73 CLICK_SRC = 0x39,
72 CLICK_THSY_X = 0x3B, 74 CLICK_THSY_X = 0x3B,
@@ -77,6 +79,12 @@ enum lis302d_reg {
77}; 79};
78 80
79enum lis3lv02d_reg { 81enum lis3lv02d_reg {
82 FF_WU_CFG = 0x30,
83 FF_WU_SRC = 0x31,
84 FF_WU_ACK = 0x32,
85 FF_WU_THS_L = 0x34,
86 FF_WU_THS_H = 0x35,
87 FF_WU_DURATION = 0x36,
80 DD_CFG = 0x38, 88 DD_CFG = 0x38,
81 DD_SRC = 0x39, 89 DD_SRC = 0x39,
82 DD_ACK = 0x3A, 90 DD_ACK = 0x3A,
@@ -107,6 +115,10 @@ enum lis3lv02d_ctrl2 {
107 CTRL2_FS = 0x80, /* Full Scale selection */ 115 CTRL2_FS = 0x80, /* Full Scale selection */
108}; 116};
109 117
118enum lis302d_ctrl2 {
119 HP_FF_WU2 = 0x08,
120 HP_FF_WU1 = 0x04,
121};
110 122
111enum lis3lv02d_ctrl3 { 123enum lis3lv02d_ctrl3 {
112 CTRL3_CFS0 = 0x01, 124 CTRL3_CFS0 = 0x01,
diff --git a/drivers/hwmon/lis3lv02d_spi.c b/drivers/hwmon/lis3lv02d_spi.c
index 3827ff04485..82ebca5a699 100644
--- a/drivers/hwmon/lis3lv02d_spi.c
+++ b/drivers/hwmon/lis3lv02d_spi.c
@@ -66,17 +66,16 @@ static int __devinit lis302dl_spi_probe(struct spi_device *spi)
66 if (ret < 0) 66 if (ret < 0)
67 return ret; 67 return ret;
68 68
69 lis3_dev.bus_priv = spi; 69 lis3_dev.bus_priv = spi;
70 lis3_dev.init = lis3_spi_init; 70 lis3_dev.init = lis3_spi_init;
71 lis3_dev.read = lis3_spi_read; 71 lis3_dev.read = lis3_spi_read;
72 lis3_dev.write = lis3_spi_write; 72 lis3_dev.write = lis3_spi_write;
73 lis3_dev.irq = spi->irq; 73 lis3_dev.irq = spi->irq;
74 lis3_dev.ac = lis3lv02d_axis_normal; 74 lis3_dev.ac = lis3lv02d_axis_normal;
75 lis3_dev.pdata = spi->dev.platform_data; 75 lis3_dev.pdata = spi->dev.platform_data;
76 spi_set_drvdata(spi, &lis3_dev); 76 spi_set_drvdata(spi, &lis3_dev);
77 77
78 ret = lis3lv02d_init_device(&lis3_dev); 78 return lis3lv02d_init_device(&lis3_dev);
79 return ret;
80} 79}
81 80
82static int __devexit lis302dl_spi_remove(struct spi_device *spi) 81static int __devexit lis302dl_spi_remove(struct spi_device *spi)
@@ -87,6 +86,32 @@ static int __devexit lis302dl_spi_remove(struct spi_device *spi)
87 return 0; 86 return 0;
88} 87}
89 88
89#ifdef CONFIG_PM
90static int lis3lv02d_spi_suspend(struct spi_device *spi, pm_message_t mesg)
91{
92 struct lis3lv02d *lis3 = spi_get_drvdata(spi);
93
94 if (!lis3->pdata->wakeup_flags)
95 lis3lv02d_poweroff(&lis3_dev);
96
97 return 0;
98}
99
100static int lis3lv02d_spi_resume(struct spi_device *spi)
101{
102 struct lis3lv02d *lis3 = spi_get_drvdata(spi);
103
104 if (!lis3->pdata->wakeup_flags)
105 lis3lv02d_poweron(lis3);
106
107 return 0;
108}
109
110#else
111#define lis3lv02d_spi_suspend NULL
112#define lis3lv02d_spi_resume NULL
113#endif
114
90static struct spi_driver lis302dl_spi_driver = { 115static struct spi_driver lis302dl_spi_driver = {
91 .driver = { 116 .driver = {
92 .name = DRV_NAME, 117 .name = DRV_NAME,
@@ -94,6 +119,8 @@ static struct spi_driver lis302dl_spi_driver = {
94 }, 119 },
95 .probe = lis302dl_spi_probe, 120 .probe = lis302dl_spi_probe,
96 .remove = __devexit_p(lis302dl_spi_remove), 121 .remove = __devexit_p(lis302dl_spi_remove),
122 .suspend = lis3lv02d_spi_suspend,
123 .resume = lis3lv02d_spi_resume,
97}; 124};
98 125
99static int __init lis302dl_init(void) 126static int __init lis302dl_init(void)
diff --git a/drivers/hwmon/sht15.c b/drivers/hwmon/sht15.c
index 6290a259456..303c02694c3 100644
--- a/drivers/hwmon/sht15.c
+++ b/drivers/hwmon/sht15.c
@@ -562,7 +562,7 @@ static int __devinit sht15_probe(struct platform_device *pdev)
562 ret = sysfs_create_group(&pdev->dev.kobj, &sht15_attr_group); 562 ret = sysfs_create_group(&pdev->dev.kobj, &sht15_attr_group);
563 if (ret) { 563 if (ret) {
564 dev_err(&pdev->dev, "sysfs create failed"); 564 dev_err(&pdev->dev, "sysfs create failed");
565 goto err_free_data; 565 goto err_release_gpio_data;
566 } 566 }
567 567
568 ret = request_irq(gpio_to_irq(data->pdata->gpio_data), 568 ret = request_irq(gpio_to_irq(data->pdata->gpio_data),
@@ -581,10 +581,12 @@ static int __devinit sht15_probe(struct platform_device *pdev)
581 data->hwmon_dev = hwmon_device_register(data->dev); 581 data->hwmon_dev = hwmon_device_register(data->dev);
582 if (IS_ERR(data->hwmon_dev)) { 582 if (IS_ERR(data->hwmon_dev)) {
583 ret = PTR_ERR(data->hwmon_dev); 583 ret = PTR_ERR(data->hwmon_dev);
584 goto err_release_gpio_data; 584 goto err_release_irq;
585 } 585 }
586 return 0; 586 return 0;
587 587
588err_release_irq:
589 free_irq(gpio_to_irq(data->pdata->gpio_data), data);
588err_release_gpio_data: 590err_release_gpio_data:
589 gpio_free(data->pdata->gpio_data); 591 gpio_free(data->pdata->gpio_data);
590err_release_gpio_sck: 592err_release_gpio_sck:
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index b79ca419d8d..64207df8da8 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1686,7 +1686,7 @@ static int idecd_revalidate_disk(struct gendisk *disk)
1686 return 0; 1686 return 0;
1687} 1687}
1688 1688
1689static struct block_device_operations idecd_ops = { 1689static const struct block_device_operations idecd_ops = {
1690 .owner = THIS_MODULE, 1690 .owner = THIS_MODULE,
1691 .open = idecd_open, 1691 .open = idecd_open,
1692 .release = idecd_release, 1692 .release = idecd_release,
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
index 214119026b3..753241429c2 100644
--- a/drivers/ide/ide-gd.c
+++ b/drivers/ide/ide-gd.c
@@ -321,7 +321,7 @@ static int ide_gd_ioctl(struct block_device *bdev, fmode_t mode,
321 return drive->disk_ops->ioctl(drive, bdev, mode, cmd, arg); 321 return drive->disk_ops->ioctl(drive, bdev, mode, cmd, arg);
322} 322}
323 323
324static struct block_device_operations ide_gd_ops = { 324static const struct block_device_operations ide_gd_ops = {
325 .owner = THIS_MODULE, 325 .owner = THIS_MODULE,
326 .open = ide_gd_open, 326 .open = ide_gd_open,
327 .release = ide_gd_release, 327 .release = ide_gd_release,
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 8de442cbee9..63c53d65e87 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1212,7 +1212,7 @@ static int ide_find_port_slot(const struct ide_port_info *d)
1212{ 1212{
1213 int idx = -ENOENT; 1213 int idx = -ENOENT;
1214 u8 bootable = (d && (d->host_flags & IDE_HFLAG_NON_BOOTABLE)) ? 0 : 1; 1214 u8 bootable = (d && (d->host_flags & IDE_HFLAG_NON_BOOTABLE)) ? 0 : 1;
1215 u8 i = (d && (d->host_flags & IDE_HFLAG_QD_2ND_PORT)) ? 1 : 0;; 1215 u8 i = (d && (d->host_flags & IDE_HFLAG_QD_2ND_PORT)) ? 1 : 0;
1216 1216
1217 /* 1217 /*
1218 * Claim an unassigned slot. 1218 * Claim an unassigned slot.
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 9d6f62baac2..58fc920d5c3 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -1913,7 +1913,7 @@ static int idetape_ioctl(struct block_device *bdev, fmode_t mode,
1913 return err; 1913 return err;
1914} 1914}
1915 1915
1916static struct block_device_operations idetape_block_ops = { 1916static const struct block_device_operations idetape_block_ops = {
1917 .owner = THIS_MODULE, 1917 .owner = THIS_MODULE,
1918 .open = idetape_open, 1918 .open = idetape_open,
1919 .release = idetape_release, 1919 .release = idetape_release,
diff --git a/drivers/ide/umc8672.c b/drivers/ide/umc8672.c
index 0608d41fb6d..60f936e2319 100644
--- a/drivers/ide/umc8672.c
+++ b/drivers/ide/umc8672.c
@@ -170,9 +170,9 @@ static int __init umc8672_init(void)
170 goto out; 170 goto out;
171 171
172 if (umc8672_probe() == 0) 172 if (umc8672_probe() == 0)
173 return 0;; 173 return 0;
174out: 174out:
175 return -ENODEV;; 175 return -ENODEV;
176} 176}
177 177
178module_init(umc8672_init); 178module_init(umc8672_init);
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 02831ad070b..4bd39c8af80 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -809,7 +809,7 @@ static int ipath_setup_ht_reset(struct ipath_devdata *dd)
809 * errors. We only bother to do this at load time, because it's OK if 809 * errors. We only bother to do this at load time, because it's OK if
810 * it happened before we were loaded (first time after boot/reset), 810 * it happened before we were loaded (first time after boot/reset),
811 * but any time after that, it's fatal anyway. Also need to not check 811 * but any time after that, it's fatal anyway. Also need to not check
812 * for for upper byte errors if we are in 8 bit mode, so figure out 812 * for upper byte errors if we are in 8 bit mode, so figure out
813 * our width. For now, at least, also complain if it's 8 bit. 813 * our width. For now, at least, also complain if it's 8 bit.
814 */ 814 */
815static void slave_or_pri_blk(struct ipath_devdata *dd, struct pci_dev *pdev, 815static void slave_or_pri_blk(struct ipath_devdata *dd, struct pci_dev *pdev,
diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index c9523e48c6a..adb09e2ba39 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -229,7 +229,7 @@ struct atkbd {
229}; 229};
230 230
231/* 231/*
232 * System-specific ketymap fixup routine 232 * System-specific keymap fixup routine
233 */ 233 */
234static void (*atkbd_platform_fixup)(struct atkbd *, const void *data); 234static void (*atkbd_platform_fixup)(struct atkbd *, const void *data);
235static void *atkbd_platform_fixup_data; 235static void *atkbd_platform_fixup_data;
diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
index 1a50be379cb..76d6751f89a 100644
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig
@@ -222,6 +222,22 @@ config INPUT_SGI_BTNS
222 To compile this driver as a module, choose M here: the 222 To compile this driver as a module, choose M here: the
223 module will be called sgi_btns. 223 module will be called sgi_btns.
224 224
225config INPUT_WINBOND_CIR
226 tristate "Winbond IR remote control"
227 depends on X86 && PNP
228 select LEDS_CLASS
229 select BITREVERSE
230 help
231 Say Y here if you want to use the IR remote functionality found
232 in some Winbond SuperI/O chips. Currently only the WPCD376I
233 chip is supported (included in some Intel Media series motherboards).
234
235 IR Receive and wake-on-IR from suspend and power-off is currently
236 supported.
237
238 To compile this driver as a module, choose M here: the module will be
239 called winbond_cir.
240
225config HP_SDC_RTC 241config HP_SDC_RTC
226 tristate "HP SDC Real Time Clock" 242 tristate "HP SDC Real Time Clock"
227 depends on (GSC || HP300) && SERIO 243 depends on (GSC || HP300) && SERIO
diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile
index bf4db626c31..a8b84854fb7 100644
--- a/drivers/input/misc/Makefile
+++ b/drivers/input/misc/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_INPUT_SGI_BTNS) += sgi_btns.o
26obj-$(CONFIG_INPUT_SPARCSPKR) += sparcspkr.o 26obj-$(CONFIG_INPUT_SPARCSPKR) += sparcspkr.o
27obj-$(CONFIG_INPUT_TWL4030_PWRBUTTON) += twl4030-pwrbutton.o 27obj-$(CONFIG_INPUT_TWL4030_PWRBUTTON) += twl4030-pwrbutton.o
28obj-$(CONFIG_INPUT_UINPUT) += uinput.o 28obj-$(CONFIG_INPUT_UINPUT) += uinput.o
29obj-$(CONFIG_INPUT_WINBOND_CIR) += winbond-cir.o
29obj-$(CONFIG_INPUT_WISTRON_BTNS) += wistron_btns.o 30obj-$(CONFIG_INPUT_WISTRON_BTNS) += wistron_btns.o
30obj-$(CONFIG_INPUT_WM831X_ON) += wm831x-on.o 31obj-$(CONFIG_INPUT_WM831X_ON) += wm831x-on.o
31obj-$(CONFIG_INPUT_YEALINK) += yealink.o 32obj-$(CONFIG_INPUT_YEALINK) += yealink.o
diff --git a/drivers/input/misc/winbond-cir.c b/drivers/input/misc/winbond-cir.c
new file mode 100644
index 00000000000..33309fe44e2
--- /dev/null
+++ b/drivers/input/misc/winbond-cir.c
@@ -0,0 +1,1614 @@
1/*
2 * winbond-cir.c - Driver for the Consumer IR functionality of Winbond
3 * SuperI/O chips.
4 *
5 * Currently supports the Winbond WPCD376i chip (PNP id WEC1022), but
6 * could probably support others (Winbond WEC102X, NatSemi, etc)
7 * with minor modifications.
8 *
9 * Original Author: David Härdeman <david@hardeman.nu>
10 * Copyright (C) 2009 David Härdeman <david@hardeman.nu>
11 *
12 * Dedicated to Matilda, my newborn daughter, without whose loving attention
13 * this driver would have been finished in half the time and with a fraction
14 * of the bugs.
15 *
16 * Written using:
17 * o Winbond WPCD376I datasheet helpfully provided by Jesse Barnes at Intel
18 * o NatSemi PC87338/PC97338 datasheet (for the serial port stuff)
19 * o DSDT dumps
20 *
21 * Supported features:
22 * o RC6
23 * o Wake-On-CIR functionality
24 *
25 * To do:
26 * o Test NEC and RC5
27 *
28 * Left as an exercise for the reader:
29 * o Learning (I have neither the hardware, nor the need)
30 * o IR Transmit (ibid)
31 *
32 * This program is free software; you can redistribute it and/or modify
33 * it under the terms of the GNU General Public License as published by
34 * the Free Software Foundation; either version 2 of the License, or
35 * (at your option) any later version.
36 *
37 * This program is distributed in the hope that it will be useful,
38 * but WITHOUT ANY WARRANTY; without even the implied warranty of
39 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
40 * GNU General Public License for more details.
41 *
42 * You should have received a copy of the GNU General Public License
43 * along with this program; if not, write to the Free Software
44 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
45 */
46
47#include <linux/module.h>
48#include <linux/pnp.h>
49#include <linux/interrupt.h>
50#include <linux/timer.h>
51#include <linux/input.h>
52#include <linux/leds.h>
53#include <linux/list.h>
54#include <linux/spinlock.h>
55#include <linux/pci_ids.h>
56#include <linux/io.h>
57#include <linux/bitrev.h>
58#include <linux/bitops.h>
59
60#define DRVNAME "winbond-cir"
61
62/* CEIR Wake-Up Registers, relative to data->wbase */
63#define WBCIR_REG_WCEIR_CTL 0x03 /* CEIR Receiver Control */
64#define WBCIR_REG_WCEIR_STS 0x04 /* CEIR Receiver Status */
65#define WBCIR_REG_WCEIR_EV_EN 0x05 /* CEIR Receiver Event Enable */
66#define WBCIR_REG_WCEIR_CNTL 0x06 /* CEIR Receiver Counter Low */
67#define WBCIR_REG_WCEIR_CNTH 0x07 /* CEIR Receiver Counter High */
68#define WBCIR_REG_WCEIR_INDEX 0x08 /* CEIR Receiver Index */
69#define WBCIR_REG_WCEIR_DATA 0x09 /* CEIR Receiver Data */
70#define WBCIR_REG_WCEIR_CSL 0x0A /* CEIR Re. Compare Strlen */
71#define WBCIR_REG_WCEIR_CFG1 0x0B /* CEIR Re. Configuration 1 */
72#define WBCIR_REG_WCEIR_CFG2 0x0C /* CEIR Re. Configuration 2 */
73
74/* CEIR Enhanced Functionality Registers, relative to data->ebase */
75#define WBCIR_REG_ECEIR_CTS 0x00 /* Enhanced IR Control Status */
76#define WBCIR_REG_ECEIR_CCTL 0x01 /* Infrared Counter Control */
77#define WBCIR_REG_ECEIR_CNT_LO 0x02 /* Infrared Counter LSB */
78#define WBCIR_REG_ECEIR_CNT_HI 0x03 /* Infrared Counter MSB */
79#define WBCIR_REG_ECEIR_IREM 0x04 /* Infrared Emitter Status */
80
81/* SP3 Banked Registers, relative to data->sbase */
82#define WBCIR_REG_SP3_BSR 0x03 /* Bank Select, all banks */
83 /* Bank 0 */
84#define WBCIR_REG_SP3_RXDATA 0x00 /* FIFO RX data (r) */
85#define WBCIR_REG_SP3_TXDATA 0x00 /* FIFO TX data (w) */
86#define WBCIR_REG_SP3_IER 0x01 /* Interrupt Enable */
87#define WBCIR_REG_SP3_EIR 0x02 /* Event Identification (r) */
88#define WBCIR_REG_SP3_FCR 0x02 /* FIFO Control (w) */
89#define WBCIR_REG_SP3_MCR 0x04 /* Mode Control */
90#define WBCIR_REG_SP3_LSR 0x05 /* Link Status */
91#define WBCIR_REG_SP3_MSR 0x06 /* Modem Status */
92#define WBCIR_REG_SP3_ASCR 0x07 /* Aux Status and Control */
93 /* Bank 2 */
94#define WBCIR_REG_SP3_BGDL 0x00 /* Baud Divisor LSB */
95#define WBCIR_REG_SP3_BGDH 0x01 /* Baud Divisor MSB */
96#define WBCIR_REG_SP3_EXCR1 0x02 /* Extended Control 1 */
97#define WBCIR_REG_SP3_EXCR2 0x04 /* Extended Control 2 */
98#define WBCIR_REG_SP3_TXFLV 0x06 /* TX FIFO Level */
99#define WBCIR_REG_SP3_RXFLV 0x07 /* RX FIFO Level */
100 /* Bank 3 */
101#define WBCIR_REG_SP3_MRID 0x00 /* Module Identification */
102#define WBCIR_REG_SP3_SH_LCR 0x01 /* LCR Shadow */
103#define WBCIR_REG_SP3_SH_FCR 0x02 /* FCR Shadow */
104 /* Bank 4 */
105#define WBCIR_REG_SP3_IRCR1 0x02 /* Infrared Control 1 */
106 /* Bank 5 */
107#define WBCIR_REG_SP3_IRCR2 0x04 /* Infrared Control 2 */
108 /* Bank 6 */
109#define WBCIR_REG_SP3_IRCR3 0x00 /* Infrared Control 3 */
110#define WBCIR_REG_SP3_SIR_PW 0x02 /* SIR Pulse Width */
111 /* Bank 7 */
112#define WBCIR_REG_SP3_IRRXDC 0x00 /* IR RX Demod Control */
113#define WBCIR_REG_SP3_IRTXMC 0x01 /* IR TX Mod Control */
114#define WBCIR_REG_SP3_RCCFG 0x02 /* CEIR Config */
115#define WBCIR_REG_SP3_IRCFG1 0x04 /* Infrared Config 1 */
116#define WBCIR_REG_SP3_IRCFG4 0x07 /* Infrared Config 4 */
117
118/*
119 * Magic values follow
120 */
121
122/* No interrupts for WBCIR_REG_SP3_IER and WBCIR_REG_SP3_EIR */
123#define WBCIR_IRQ_NONE 0x00
124/* RX data bit for WBCIR_REG_SP3_IER and WBCIR_REG_SP3_EIR */
125#define WBCIR_IRQ_RX 0x01
126/* Over/Under-flow bit for WBCIR_REG_SP3_IER and WBCIR_REG_SP3_EIR */
127#define WBCIR_IRQ_ERR 0x04
128/* Led enable/disable bit for WBCIR_REG_ECEIR_CTS */
129#define WBCIR_LED_ENABLE 0x80
130/* RX data available bit for WBCIR_REG_SP3_LSR */
131#define WBCIR_RX_AVAIL 0x01
132/* RX disable bit for WBCIR_REG_SP3_ASCR */
133#define WBCIR_RX_DISABLE 0x20
134/* Extended mode enable bit for WBCIR_REG_SP3_EXCR1 */
135#define WBCIR_EXT_ENABLE 0x01
136/* Select compare register in WBCIR_REG_WCEIR_INDEX (bits 5 & 6) */
137#define WBCIR_REGSEL_COMPARE 0x10
138/* Select mask register in WBCIR_REG_WCEIR_INDEX (bits 5 & 6) */
139#define WBCIR_REGSEL_MASK 0x20
140/* Starting address of selected register in WBCIR_REG_WCEIR_INDEX */
141#define WBCIR_REG_ADDR0 0x00
142
143/* Valid banks for the SP3 UART */
144enum wbcir_bank {
145 WBCIR_BANK_0 = 0x00,
146 WBCIR_BANK_1 = 0x80,
147 WBCIR_BANK_2 = 0xE0,
148 WBCIR_BANK_3 = 0xE4,
149 WBCIR_BANK_4 = 0xE8,
150 WBCIR_BANK_5 = 0xEC,
151 WBCIR_BANK_6 = 0xF0,
152 WBCIR_BANK_7 = 0xF4,
153};
154
155/* Supported IR Protocols */
156enum wbcir_protocol {
157 IR_PROTOCOL_RC5 = 0x0,
158 IR_PROTOCOL_NEC = 0x1,
159 IR_PROTOCOL_RC6 = 0x2,
160};
161
162/* Misc */
163#define WBCIR_NAME "Winbond CIR"
164#define WBCIR_ID_FAMILY 0xF1 /* Family ID for the WPCD376I */
165#define WBCIR_ID_CHIP 0x04 /* Chip ID for the WPCD376I */
166#define IR_KEYPRESS_TIMEOUT 250 /* FIXME: should be per-protocol? */
167#define INVALID_SCANCODE 0x7FFFFFFF /* Invalid with all protos */
168#define WAKEUP_IOMEM_LEN 0x10 /* Wake-Up I/O Reg Len */
169#define EHFUNC_IOMEM_LEN 0x10 /* Enhanced Func I/O Reg Len */
170#define SP_IOMEM_LEN 0x08 /* Serial Port 3 (IR) Reg Len */
171#define WBCIR_MAX_IDLE_BYTES 10
172
173static DEFINE_SPINLOCK(wbcir_lock);
174static DEFINE_RWLOCK(keytable_lock);
175
176struct wbcir_key {
177 u32 scancode;
178 unsigned int keycode;
179};
180
181struct wbcir_keyentry {
182 struct wbcir_key key;
183 struct list_head list;
184};
185
186static struct wbcir_key rc6_def_keymap[] = {
187 { 0x800F0400, KEY_NUMERIC_0 },
188 { 0x800F0401, KEY_NUMERIC_1 },
189 { 0x800F0402, KEY_NUMERIC_2 },
190 { 0x800F0403, KEY_NUMERIC_3 },
191 { 0x800F0404, KEY_NUMERIC_4 },
192 { 0x800F0405, KEY_NUMERIC_5 },
193 { 0x800F0406, KEY_NUMERIC_6 },
194 { 0x800F0407, KEY_NUMERIC_7 },
195 { 0x800F0408, KEY_NUMERIC_8 },
196 { 0x800F0409, KEY_NUMERIC_9 },
197 { 0x800F041D, KEY_NUMERIC_STAR },
198 { 0x800F041C, KEY_NUMERIC_POUND },
199 { 0x800F0410, KEY_VOLUMEUP },
200 { 0x800F0411, KEY_VOLUMEDOWN },
201 { 0x800F0412, KEY_CHANNELUP },
202 { 0x800F0413, KEY_CHANNELDOWN },
203 { 0x800F040E, KEY_MUTE },
204 { 0x800F040D, KEY_VENDOR }, /* Vista Logo Key */
205 { 0x800F041E, KEY_UP },
206 { 0x800F041F, KEY_DOWN },
207 { 0x800F0420, KEY_LEFT },
208 { 0x800F0421, KEY_RIGHT },
209 { 0x800F0422, KEY_OK },
210 { 0x800F0423, KEY_ESC },
211 { 0x800F040F, KEY_INFO },
212 { 0x800F040A, KEY_CLEAR },
213 { 0x800F040B, KEY_ENTER },
214 { 0x800F045B, KEY_RED },
215 { 0x800F045C, KEY_GREEN },
216 { 0x800F045D, KEY_YELLOW },
217 { 0x800F045E, KEY_BLUE },
218 { 0x800F045A, KEY_TEXT },
219 { 0x800F0427, KEY_SWITCHVIDEOMODE },
220 { 0x800F040C, KEY_POWER },
221 { 0x800F0450, KEY_RADIO },
222 { 0x800F0448, KEY_PVR },
223 { 0x800F0447, KEY_AUDIO },
224 { 0x800F0426, KEY_EPG },
225 { 0x800F0449, KEY_CAMERA },
226 { 0x800F0425, KEY_TV },
227 { 0x800F044A, KEY_VIDEO },
228 { 0x800F0424, KEY_DVD },
229 { 0x800F0416, KEY_PLAY },
230 { 0x800F0418, KEY_PAUSE },
231 { 0x800F0419, KEY_STOP },
232 { 0x800F0414, KEY_FASTFORWARD },
233 { 0x800F041A, KEY_NEXT },
234 { 0x800F041B, KEY_PREVIOUS },
235 { 0x800F0415, KEY_REWIND },
236 { 0x800F0417, KEY_RECORD },
237};
238
239/* Registers and other state is protected by wbcir_lock */
240struct wbcir_data {
241 unsigned long wbase; /* Wake-Up Baseaddr */
242 unsigned long ebase; /* Enhanced Func. Baseaddr */
243 unsigned long sbase; /* Serial Port Baseaddr */
244 unsigned int irq; /* Serial Port IRQ */
245
246 struct input_dev *input_dev;
247 struct timer_list timer_keyup;
248 struct led_trigger *rxtrigger;
249 struct led_trigger *txtrigger;
250 struct led_classdev led;
251
252 u32 last_scancode;
253 unsigned int last_keycode;
254 u8 last_toggle;
255 u8 keypressed;
256 unsigned long keyup_jiffies;
257 unsigned int idle_count;
258
259 /* RX irdata and parsing state */
260 unsigned long irdata[30];
261 unsigned int irdata_count;
262 unsigned int irdata_idle;
263 unsigned int irdata_off;
264 unsigned int irdata_error;
265
266 /* Protected by keytable_lock */
267 struct list_head keytable;
268};
269
270static enum wbcir_protocol protocol = IR_PROTOCOL_RC6;
271module_param(protocol, uint, 0444);
272MODULE_PARM_DESC(protocol, "IR protocol to use "
273 "(0 = RC5, 1 = NEC, 2 = RC6A, default)");
274
275static int invert; /* default = 0 */
276module_param(invert, bool, 0444);
277MODULE_PARM_DESC(invert, "Invert the signal from the IR receiver");
278
279static unsigned int wake_sc = 0x800F040C;
280module_param(wake_sc, uint, 0644);
281MODULE_PARM_DESC(wake_sc, "Scancode of the power-on IR command");
282
283static unsigned int wake_rc6mode = 6;
284module_param(wake_rc6mode, uint, 0644);
285MODULE_PARM_DESC(wake_rc6mode, "RC6 mode for the power-on command "
286 "(0 = 0, 6 = 6A, default)");
287
288
289
290/*****************************************************************************
291 *
292 * UTILITY FUNCTIONS
293 *
294 *****************************************************************************/
295
296/* Caller needs to hold wbcir_lock */
297static void
298wbcir_set_bits(unsigned long addr, u8 bits, u8 mask)
299{
300 u8 val;
301
302 val = inb(addr);
303 val = ((val & ~mask) | (bits & mask));
304 outb(val, addr);
305}
306
307/* Selects the register bank for the serial port */
308static inline void
309wbcir_select_bank(struct wbcir_data *data, enum wbcir_bank bank)
310{
311 outb(bank, data->sbase + WBCIR_REG_SP3_BSR);
312}
313
314static enum led_brightness
315wbcir_led_brightness_get(struct led_classdev *led_cdev)
316{
317 struct wbcir_data *data = container_of(led_cdev,
318 struct wbcir_data,
319 led);
320
321 if (inb(data->ebase + WBCIR_REG_ECEIR_CTS) & WBCIR_LED_ENABLE)
322 return LED_FULL;
323 else
324 return LED_OFF;
325}
326
327static void
328wbcir_led_brightness_set(struct led_classdev *led_cdev,
329 enum led_brightness brightness)
330{
331 struct wbcir_data *data = container_of(led_cdev,
332 struct wbcir_data,
333 led);
334
335 wbcir_set_bits(data->ebase + WBCIR_REG_ECEIR_CTS,
336 brightness == LED_OFF ? 0x00 : WBCIR_LED_ENABLE,
337 WBCIR_LED_ENABLE);
338}
339
340/* Manchester encodes bits to RC6 message cells (see wbcir_parse_rc6) */
341static u8
342wbcir_to_rc6cells(u8 val)
343{
344 u8 coded = 0x00;
345 int i;
346
347 val &= 0x0F;
348 for (i = 0; i < 4; i++) {
349 if (val & 0x01)
350 coded |= 0x02 << (i * 2);
351 else
352 coded |= 0x01 << (i * 2);
353 val >>= 1;
354 }
355
356 return coded;
357}
358
359
360
361/*****************************************************************************
362 *
363 * INPUT FUNCTIONS
364 *
365 *****************************************************************************/
366
367static unsigned int
368wbcir_do_getkeycode(struct wbcir_data *data, u32 scancode)
369{
370 struct wbcir_keyentry *keyentry;
371 unsigned int keycode = KEY_RESERVED;
372 unsigned long flags;
373
374 read_lock_irqsave(&keytable_lock, flags);
375
376 list_for_each_entry(keyentry, &data->keytable, list) {
377 if (keyentry->key.scancode == scancode) {
378 keycode = keyentry->key.keycode;
379 break;
380 }
381 }
382
383 read_unlock_irqrestore(&keytable_lock, flags);
384 return keycode;
385}
386
387static int
388wbcir_getkeycode(struct input_dev *dev, int scancode, int *keycode)
389{
390 struct wbcir_data *data = input_get_drvdata(dev);
391
392 *keycode = (int)wbcir_do_getkeycode(data, (u32)scancode);
393 return 0;
394}
395
396static int
397wbcir_setkeycode(struct input_dev *dev, int sscancode, int keycode)
398{
399 struct wbcir_data *data = input_get_drvdata(dev);
400 struct wbcir_keyentry *keyentry;
401 struct wbcir_keyentry *new_keyentry;
402 unsigned long flags;
403 unsigned int old_keycode = KEY_RESERVED;
404 u32 scancode = (u32)sscancode;
405
406 if (keycode < 0 || keycode > KEY_MAX)
407 return -EINVAL;
408
409 new_keyentry = kmalloc(sizeof(*new_keyentry), GFP_KERNEL);
410 if (!new_keyentry)
411 return -ENOMEM;
412
413 write_lock_irqsave(&keytable_lock, flags);
414
415 list_for_each_entry(keyentry, &data->keytable, list) {
416 if (keyentry->key.scancode != scancode)
417 continue;
418
419 old_keycode = keyentry->key.keycode;
420 keyentry->key.keycode = keycode;
421
422 if (keyentry->key.keycode == KEY_RESERVED) {
423 list_del(&keyentry->list);
424 kfree(keyentry);
425 }
426
427 break;
428 }
429
430 set_bit(keycode, dev->keybit);
431
432 if (old_keycode == KEY_RESERVED) {
433 new_keyentry->key.scancode = scancode;
434 new_keyentry->key.keycode = keycode;
435 list_add(&new_keyentry->list, &data->keytable);
436 } else {
437 kfree(new_keyentry);
438 clear_bit(old_keycode, dev->keybit);
439 list_for_each_entry(keyentry, &data->keytable, list) {
440 if (keyentry->key.keycode == old_keycode) {
441 set_bit(old_keycode, dev->keybit);
442 break;
443 }
444 }
445 }
446
447 write_unlock_irqrestore(&keytable_lock, flags);
448 return 0;
449}
450
451/*
452 * Timer function to report keyup event some time after keydown is
453 * reported by the ISR.
454 */
455static void
456wbcir_keyup(unsigned long cookie)
457{
458 struct wbcir_data *data = (struct wbcir_data *)cookie;
459 unsigned long flags;
460
461 /*
462 * data->keyup_jiffies is used to prevent a race condition if a
463 * hardware interrupt occurs at this point and the keyup timer
464 * event is moved further into the future as a result.
465 *
466 * The timer will then be reactivated and this function called
467 * again in the future. We need to exit gracefully in that case
468 * to allow the input subsystem to do its auto-repeat magic or
469 * a keyup event might follow immediately after the keydown.
470 */
471
472 spin_lock_irqsave(&wbcir_lock, flags);
473
474 if (time_is_after_eq_jiffies(data->keyup_jiffies) && data->keypressed) {
475 data->keypressed = 0;
476 led_trigger_event(data->rxtrigger, LED_OFF);
477 input_report_key(data->input_dev, data->last_keycode, 0);
478 input_sync(data->input_dev);
479 }
480
481 spin_unlock_irqrestore(&wbcir_lock, flags);
482}
483
484static void
485wbcir_keydown(struct wbcir_data *data, u32 scancode, u8 toggle)
486{
487 unsigned int keycode;
488
489 /* Repeat? */
490 if (data->last_scancode == scancode &&
491 data->last_toggle == toggle &&
492 data->keypressed)
493 goto set_timer;
494 data->last_scancode = scancode;
495
496 /* Do we need to release an old keypress? */
497 if (data->keypressed) {
498 input_report_key(data->input_dev, data->last_keycode, 0);
499 input_sync(data->input_dev);
500 data->keypressed = 0;
501 }
502
503 /* Report scancode */
504 input_event(data->input_dev, EV_MSC, MSC_SCAN, (int)scancode);
505
506 /* Do we know this scancode? */
507 keycode = wbcir_do_getkeycode(data, scancode);
508 if (keycode == KEY_RESERVED)
509 goto set_timer;
510
511 /* Register a keypress */
512 input_report_key(data->input_dev, keycode, 1);
513 data->keypressed = 1;
514 data->last_keycode = keycode;
515 data->last_toggle = toggle;
516
517set_timer:
518 input_sync(data->input_dev);
519 led_trigger_event(data->rxtrigger,
520 data->keypressed ? LED_FULL : LED_OFF);
521 data->keyup_jiffies = jiffies + msecs_to_jiffies(IR_KEYPRESS_TIMEOUT);
522 mod_timer(&data->timer_keyup, data->keyup_jiffies);
523}
524
525
526
527/*****************************************************************************
528 *
529 * IR PARSING FUNCTIONS
530 *
531 *****************************************************************************/
532
533/* Resets all irdata */
534static void
535wbcir_reset_irdata(struct wbcir_data *data)
536{
537 memset(data->irdata, 0, sizeof(data->irdata));
538 data->irdata_count = 0;
539 data->irdata_off = 0;
540 data->irdata_error = 0;
541}
542
543/* Adds one bit of irdata */
544static void
545add_irdata_bit(struct wbcir_data *data, int set)
546{
547 if (data->irdata_count >= sizeof(data->irdata) * 8) {
548 data->irdata_error = 1;
549 return;
550 }
551
552 if (set)
553 __set_bit(data->irdata_count, data->irdata);
554 data->irdata_count++;
555}
556
557/* Gets count bits of irdata */
558static u16
559get_bits(struct wbcir_data *data, int count)
560{
561 u16 val = 0x0;
562
563 if (data->irdata_count - data->irdata_off < count) {
564 data->irdata_error = 1;
565 return 0x0;
566 }
567
568 while (count > 0) {
569 val <<= 1;
570 if (test_bit(data->irdata_off, data->irdata))
571 val |= 0x1;
572 count--;
573 data->irdata_off++;
574 }
575
576 return val;
577}
578
579/* Reads 16 cells and converts them to a byte */
580static u8
581wbcir_rc6cells_to_byte(struct wbcir_data *data)
582{
583 u16 raw = get_bits(data, 16);
584 u8 val = 0x00;
585 int bit;
586
587 for (bit = 0; bit < 8; bit++) {
588 switch (raw & 0x03) {
589 case 0x01:
590 break;
591 case 0x02:
592 val |= (0x01 << bit);
593 break;
594 default:
595 data->irdata_error = 1;
596 break;
597 }
598 raw >>= 2;
599 }
600
601 return val;
602}
603
604/* Decodes a number of bits from raw RC5 data */
605static u8
606wbcir_get_rc5bits(struct wbcir_data *data, unsigned int count)
607{
608 u16 raw = get_bits(data, count * 2);
609 u8 val = 0x00;
610 int bit;
611
612 for (bit = 0; bit < count; bit++) {
613 switch (raw & 0x03) {
614 case 0x01:
615 val |= (0x01 << bit);
616 break;
617 case 0x02:
618 break;
619 default:
620 data->irdata_error = 1;
621 break;
622 }
623 raw >>= 2;
624 }
625
626 return val;
627}
628
629static void
630wbcir_parse_rc6(struct device *dev, struct wbcir_data *data)
631{
632 /*
633 * Normal bits are manchester coded as follows:
634 * cell0 + cell1 = logic "0"
635 * cell1 + cell0 = logic "1"
636 *
637 * The IR pulse has the following components:
638 *
639 * Leader - 6 * cell1 - discarded
640 * Gap - 2 * cell0 - discarded
641 * Start bit - Normal Coding - always "1"
642 * Mode Bit 2 - 0 - Normal Coding
643 * Toggle bit - Normal Coding with double bit time,
644 * e.g. cell0 + cell0 + cell1 + cell1
645 * means logic "0".
646 *
647 * The rest depends on the mode, the following modes are known:
648 *
649 * MODE 0:
650 * Address Bit 7 - 0 - Normal Coding
651 * Command Bit 7 - 0 - Normal Coding
652 *
653 * MODE 6:
654 * The above Toggle Bit is used as a submode bit, 0 = A, 1 = B.
655 * Submode B is for pointing devices, only remotes using submode A
656 * are supported.
657 *
658 * Customer range bit - 0 => Customer = 7 bits, 0...127
659 * 1 => Customer = 15 bits, 32768...65535
660 * Customer Bits - Normal Coding
661 *
662 * Customer codes are allocated by Philips. The rest of the bits
663 * are customer dependent. The following is commonly used (and the
664 * only supported config):
665 *
666 * Toggle Bit - Normal Coding
667 * Address Bit 6 - 0 - Normal Coding
668 * Command Bit 7 - 0 - Normal Coding
669 *
670 * All modes are followed by at least 6 * cell0.
671 *
672 * MODE 0 msglen:
673 * 1 * 2 (start bit) + 3 * 2 (mode) + 2 * 2 (toggle) +
674 * 8 * 2 (address) + 8 * 2 (command) =
675 * 44 cells
676 *
677 * MODE 6A msglen:
678 * 1 * 2 (start bit) + 3 * 2 (mode) + 2 * 2 (submode) +
679 * 1 * 2 (customer range bit) + 7/15 * 2 (customer bits) +
680 * 1 * 2 (toggle bit) + 7 * 2 (address) + 8 * 2 (command) =
681 * 60 - 76 cells
682 */
683 u8 mode;
684 u8 toggle;
685 u16 customer = 0x0;
686 u8 address;
687 u8 command;
688 u32 scancode;
689
690 /* Leader mark */
691 while (get_bits(data, 1) && !data->irdata_error)
692 /* Do nothing */;
693
694 /* Leader space */
695 if (get_bits(data, 1)) {
696 dev_dbg(dev, "RC6 - Invalid leader space\n");
697 return;
698 }
699
700 /* Start bit */
701 if (get_bits(data, 2) != 0x02) {
702 dev_dbg(dev, "RC6 - Invalid start bit\n");
703 return;
704 }
705
706 /* Mode */
707 mode = get_bits(data, 6);
708 switch (mode) {
709 case 0x15: /* 010101 = b000 */
710 mode = 0;
711 break;
712 case 0x29: /* 101001 = b110 */
713 mode = 6;
714 break;
715 default:
716 dev_dbg(dev, "RC6 - Invalid mode\n");
717 return;
718 }
719
720 /* Toggle bit / Submode bit */
721 toggle = get_bits(data, 4);
722 switch (toggle) {
723 case 0x03:
724 toggle = 0;
725 break;
726 case 0x0C:
727 toggle = 1;
728 break;
729 default:
730 dev_dbg(dev, "RC6 - Toggle bit error\n");
731 break;
732 }
733
734 /* Customer */
735 if (mode == 6) {
736 if (toggle != 0) {
737 dev_dbg(dev, "RC6B - Not Supported\n");
738 return;
739 }
740
741 customer = wbcir_rc6cells_to_byte(data);
742
743 if (customer & 0x80) {
744 /* 15 bit customer value */
745 customer <<= 8;
746 customer |= wbcir_rc6cells_to_byte(data);
747 }
748 }
749
750 /* Address */
751 address = wbcir_rc6cells_to_byte(data);
752 if (mode == 6) {
753 toggle = address >> 7;
754 address &= 0x7F;
755 }
756
757 /* Command */
758 command = wbcir_rc6cells_to_byte(data);
759
760 /* Create scancode */
761 scancode = command;
762 scancode |= address << 8;
763 scancode |= customer << 16;
764
765 /* Last sanity check */
766 if (data->irdata_error) {
767 dev_dbg(dev, "RC6 - Cell error(s)\n");
768 return;
769 }
770
771 dev_info(dev, "IR-RC6 ad 0x%02X cm 0x%02X cu 0x%04X "
772 "toggle %u mode %u scan 0x%08X\n",
773 address,
774 command,
775 customer,
776 (unsigned int)toggle,
777 (unsigned int)mode,
778 scancode);
779
780 wbcir_keydown(data, scancode, toggle);
781}
782
783static void
784wbcir_parse_rc5(struct device *dev, struct wbcir_data *data)
785{
786 /*
787 * Bits are manchester coded as follows:
788 * cell1 + cell0 = logic "0"
789 * cell0 + cell1 = logic "1"
790 * (i.e. the reverse of RC6)
791 *
792 * Start bit 1 - "1" - discarded
793 * Start bit 2 - Must be inverted to get command bit 6
794 * Toggle bit
795 * Address Bit 4 - 0
796 * Command Bit 5 - 0
797 */
798 u8 toggle;
799 u8 address;
800 u8 command;
801 u32 scancode;
802
803 /* Start bit 1 */
804 if (!get_bits(data, 1)) {
805 dev_dbg(dev, "RC5 - Invalid start bit\n");
806 return;
807 }
808
809 /* Start bit 2 */
810 if (!wbcir_get_rc5bits(data, 1))
811 command = 0x40;
812 else
813 command = 0x00;
814
815 toggle = wbcir_get_rc5bits(data, 1);
816 address = wbcir_get_rc5bits(data, 5);
817 command |= wbcir_get_rc5bits(data, 6);
818 scancode = address << 7 | command;
819
820 /* Last sanity check */
821 if (data->irdata_error) {
822 dev_dbg(dev, "RC5 - Invalid message\n");
823 return;
824 }
825
826 dev_dbg(dev, "IR-RC5 ad %u cm %u t %u s %u\n",
827 (unsigned int)address,
828 (unsigned int)command,
829 (unsigned int)toggle,
830 (unsigned int)scancode);
831
832 wbcir_keydown(data, scancode, toggle);
833}
834
835static void
836wbcir_parse_nec(struct device *dev, struct wbcir_data *data)
837{
838 /*
839 * Each bit represents 560 us.
840 *
841 * Leader - 9 ms burst
842 * Gap - 4.5 ms silence
843 * Address1 bit 0 - 7 - Address 1
844 * Address2 bit 0 - 7 - Address 2
845 * Command1 bit 0 - 7 - Command 1
846 * Command2 bit 0 - 7 - Command 2
847 *
848 * Note the bit order!
849 *
850 * With the old NEC protocol, Address2 was the inverse of Address1
851 * and Command2 was the inverse of Command1 and were used as
852 * an error check.
853 *
854 * With NEC extended, Address1 is the LSB of the Address and
855 * Address2 is the MSB, Command parsing remains unchanged.
856 *
857 * A repeat message is coded as:
858 * Leader - 9 ms burst
859 * Gap - 2.25 ms silence
860 * Repeat - 560 us active
861 */
862 u8 address1;
863 u8 address2;
864 u8 command1;
865 u8 command2;
866 u16 address;
867 u32 scancode;
868
869 /* Leader mark */
870 while (get_bits(data, 1) && !data->irdata_error)
871 /* Do nothing */;
872
873 /* Leader space */
874 if (get_bits(data, 4)) {
875 dev_dbg(dev, "NEC - Invalid leader space\n");
876 return;
877 }
878
879 /* Repeat? */
880 if (get_bits(data, 1)) {
881 if (!data->keypressed) {
882 dev_dbg(dev, "NEC - Stray repeat message\n");
883 return;
884 }
885
886 dev_dbg(dev, "IR-NEC repeat s %u\n",
887 (unsigned int)data->last_scancode);
888
889 wbcir_keydown(data, data->last_scancode, data->last_toggle);
890 return;
891 }
892
893 /* Remaining leader space */
894 if (get_bits(data, 3)) {
895 dev_dbg(dev, "NEC - Invalid leader space\n");
896 return;
897 }
898
899 address1 = bitrev8(get_bits(data, 8));
900 address2 = bitrev8(get_bits(data, 8));
901 command1 = bitrev8(get_bits(data, 8));
902 command2 = bitrev8(get_bits(data, 8));
903
904 /* Sanity check */
905 if (data->irdata_error) {
906 dev_dbg(dev, "NEC - Invalid message\n");
907 return;
908 }
909
910 /* Check command validity */
911 if (command1 != ~command2) {
912 dev_dbg(dev, "NEC - Command bytes mismatch\n");
913 return;
914 }
915
916 /* Check for extended NEC protocol */
917 address = address1;
918 if (address1 != ~address2)
919 address |= address2 << 8;
920
921 scancode = address << 8 | command1;
922
923 dev_dbg(dev, "IR-NEC ad %u cm %u s %u\n",
924 (unsigned int)address,
925 (unsigned int)command1,
926 (unsigned int)scancode);
927
928 wbcir_keydown(data, scancode, !data->last_toggle);
929}
930
931
932
933/*****************************************************************************
934 *
935 * INTERRUPT FUNCTIONS
936 *
937 *****************************************************************************/
938
939static irqreturn_t
940wbcir_irq_handler(int irqno, void *cookie)
941{
942 struct pnp_dev *device = cookie;
943 struct wbcir_data *data = pnp_get_drvdata(device);
944 struct device *dev = &device->dev;
945 u8 status;
946 unsigned long flags;
947 u8 irdata[8];
948 int i;
949 unsigned int hw;
950
951 spin_lock_irqsave(&wbcir_lock, flags);
952
953 wbcir_select_bank(data, WBCIR_BANK_0);
954
955 status = inb(data->sbase + WBCIR_REG_SP3_EIR);
956
957 if (!(status & (WBCIR_IRQ_RX | WBCIR_IRQ_ERR))) {
958 spin_unlock_irqrestore(&wbcir_lock, flags);
959 return IRQ_NONE;
960 }
961
962 if (status & WBCIR_IRQ_ERR)
963 data->irdata_error = 1;
964
965 if (!(status & WBCIR_IRQ_RX))
966 goto out;
967
968 /* Since RXHDLEV is set, at least 8 bytes are in the FIFO */
969 insb(data->sbase + WBCIR_REG_SP3_RXDATA, &irdata[0], 8);
970
971 for (i = 0; i < sizeof(irdata); i++) {
972 hw = hweight8(irdata[i]);
973 if (hw > 4)
974 add_irdata_bit(data, 0);
975 else
976 add_irdata_bit(data, 1);
977
978 if (hw == 8)
979 data->idle_count++;
980 else
981 data->idle_count = 0;
982 }
983
984 if (data->idle_count > WBCIR_MAX_IDLE_BYTES) {
985 /* Set RXINACTIVE... */
986 outb(WBCIR_RX_DISABLE, data->sbase + WBCIR_REG_SP3_ASCR);
987
988 /* ...and drain the FIFO */
989 while (inb(data->sbase + WBCIR_REG_SP3_LSR) & WBCIR_RX_AVAIL)
990 inb(data->sbase + WBCIR_REG_SP3_RXDATA);
991
992 dev_dbg(dev, "IRDATA:\n");
993 for (i = 0; i < data->irdata_count; i += BITS_PER_LONG)
994 dev_dbg(dev, "0x%08lX\n", data->irdata[i/BITS_PER_LONG]);
995
996 switch (protocol) {
997 case IR_PROTOCOL_RC5:
998 wbcir_parse_rc5(dev, data);
999 break;
1000 case IR_PROTOCOL_RC6:
1001 wbcir_parse_rc6(dev, data);
1002 break;
1003 case IR_PROTOCOL_NEC:
1004 wbcir_parse_nec(dev, data);
1005 break;
1006 }
1007
1008 wbcir_reset_irdata(data);
1009 data->idle_count = 0;
1010 }
1011
1012out:
1013 spin_unlock_irqrestore(&wbcir_lock, flags);
1014 return IRQ_HANDLED;
1015}
1016
1017
1018
1019/*****************************************************************************
1020 *
1021 * SUSPEND/RESUME FUNCTIONS
1022 *
1023 *****************************************************************************/
1024
1025static void
1026wbcir_shutdown(struct pnp_dev *device)
1027{
1028 struct device *dev = &device->dev;
1029 struct wbcir_data *data = pnp_get_drvdata(device);
1030 int do_wake = 1;
1031 u8 match[11];
1032 u8 mask[11];
1033 u8 rc6_csl = 0;
1034 int i;
1035
1036 memset(match, 0, sizeof(match));
1037 memset(mask, 0, sizeof(mask));
1038
1039 if (wake_sc == INVALID_SCANCODE || !device_may_wakeup(dev)) {
1040 do_wake = 0;
1041 goto finish;
1042 }
1043
1044 switch (protocol) {
1045 case IR_PROTOCOL_RC5:
1046 if (wake_sc > 0xFFF) {
1047 do_wake = 0;
1048 dev_err(dev, "RC5 - Invalid wake scancode\n");
1049 break;
1050 }
1051
1052 /* Mask = 13 bits, ex toggle */
1053 mask[0] = 0xFF;
1054 mask[1] = 0x17;
1055
1056 match[0] = (wake_sc & 0x003F); /* 6 command bits */
1057 match[0] |= (wake_sc & 0x0180) >> 1; /* 2 address bits */
1058 match[1] = (wake_sc & 0x0E00) >> 9; /* 3 address bits */
1059 if (!(wake_sc & 0x0040)) /* 2nd start bit */
1060 match[1] |= 0x10;
1061
1062 break;
1063
1064 case IR_PROTOCOL_NEC:
1065 if (wake_sc > 0xFFFFFF) {
1066 do_wake = 0;
1067 dev_err(dev, "NEC - Invalid wake scancode\n");
1068 break;
1069 }
1070
1071 mask[0] = mask[1] = mask[2] = mask[3] = 0xFF;
1072
1073 match[1] = bitrev8((wake_sc & 0xFF));
1074 match[0] = ~match[1];
1075
1076 match[3] = bitrev8((wake_sc & 0xFF00) >> 8);
1077 if (wake_sc > 0xFFFF)
1078 match[2] = bitrev8((wake_sc & 0xFF0000) >> 16);
1079 else
1080 match[2] = ~match[3];
1081
1082 break;
1083
1084 case IR_PROTOCOL_RC6:
1085
1086 if (wake_rc6mode == 0) {
1087 if (wake_sc > 0xFFFF) {
1088 do_wake = 0;
1089 dev_err(dev, "RC6 - Invalid wake scancode\n");
1090 break;
1091 }
1092
1093 /* Command */
1094 match[0] = wbcir_to_rc6cells(wake_sc >> 0);
1095 mask[0] = 0xFF;
1096 match[1] = wbcir_to_rc6cells(wake_sc >> 4);
1097 mask[1] = 0xFF;
1098
1099 /* Address */
1100 match[2] = wbcir_to_rc6cells(wake_sc >> 8);
1101 mask[2] = 0xFF;
1102 match[3] = wbcir_to_rc6cells(wake_sc >> 12);
1103 mask[3] = 0xFF;
1104
1105 /* Header */
1106 match[4] = 0x50; /* mode1 = mode0 = 0, ignore toggle */
1107 mask[4] = 0xF0;
1108 match[5] = 0x09; /* start bit = 1, mode2 = 0 */
1109 mask[5] = 0x0F;
1110
1111 rc6_csl = 44;
1112
1113 } else if (wake_rc6mode == 6) {
1114 i = 0;
1115
1116 /* Command */
1117 match[i] = wbcir_to_rc6cells(wake_sc >> 0);
1118 mask[i++] = 0xFF;
1119 match[i] = wbcir_to_rc6cells(wake_sc >> 4);
1120 mask[i++] = 0xFF;
1121
1122 /* Address + Toggle */
1123 match[i] = wbcir_to_rc6cells(wake_sc >> 8);
1124 mask[i++] = 0xFF;
1125 match[i] = wbcir_to_rc6cells(wake_sc >> 12);
1126 mask[i++] = 0x3F;
1127
1128 /* Customer bits 7 - 0 */
1129 match[i] = wbcir_to_rc6cells(wake_sc >> 16);
1130 mask[i++] = 0xFF;
1131 match[i] = wbcir_to_rc6cells(wake_sc >> 20);
1132 mask[i++] = 0xFF;
1133
1134 if (wake_sc & 0x80000000) {
1135 /* Customer range bit and bits 15 - 8 */
1136 match[i] = wbcir_to_rc6cells(wake_sc >> 24);
1137 mask[i++] = 0xFF;
1138 match[i] = wbcir_to_rc6cells(wake_sc >> 28);
1139 mask[i++] = 0xFF;
1140 rc6_csl = 76;
1141 } else if (wake_sc <= 0x007FFFFF) {
1142 rc6_csl = 60;
1143 } else {
1144 do_wake = 0;
1145 dev_err(dev, "RC6 - Invalid wake scancode\n");
1146 break;
1147 }
1148
1149 /* Header */
1150 match[i] = 0x93; /* mode1 = mode0 = 1, submode = 0 */
1151 mask[i++] = 0xFF;
1152 match[i] = 0x0A; /* start bit = 1, mode2 = 1 */
1153 mask[i++] = 0x0F;
1154
1155 } else {
1156 do_wake = 0;
1157 dev_err(dev, "RC6 - Invalid wake mode\n");
1158 }
1159
1160 break;
1161
1162 default:
1163 do_wake = 0;
1164 break;
1165 }
1166
1167finish:
1168 if (do_wake) {
1169 /* Set compare and compare mask */
1170 wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_INDEX,
1171 WBCIR_REGSEL_COMPARE | WBCIR_REG_ADDR0,
1172 0x3F);
1173 outsb(data->wbase + WBCIR_REG_WCEIR_DATA, match, 11);
1174 wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_INDEX,
1175 WBCIR_REGSEL_MASK | WBCIR_REG_ADDR0,
1176 0x3F);
1177 outsb(data->wbase + WBCIR_REG_WCEIR_DATA, mask, 11);
1178
1179 /* RC6 Compare String Len */
1180 outb(rc6_csl, data->wbase + WBCIR_REG_WCEIR_CSL);
1181
1182 /* Clear status bits NEC_REP, BUFF, MSG_END, MATCH */
1183 wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_STS, 0x17, 0x17);
1184
1185 /* Clear BUFF_EN, Clear END_EN, Set MATCH_EN */
1186 wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_EV_EN, 0x01, 0x07);
1187
1188 /* Set CEIR_EN */
1189 wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_CTL, 0x01, 0x01);
1190
1191 } else {
1192 /* Clear BUFF_EN, Clear END_EN, Clear MATCH_EN */
1193 wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_EV_EN, 0x00, 0x07);
1194
1195 /* Clear CEIR_EN */
1196 wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_CTL, 0x00, 0x01);
1197 }
1198
1199 /* Disable interrupts */
1200 outb(WBCIR_IRQ_NONE, data->sbase + WBCIR_REG_SP3_IER);
1201}
1202
1203static int
1204wbcir_suspend(struct pnp_dev *device, pm_message_t state)
1205{
1206 wbcir_shutdown(device);
1207 return 0;
1208}
1209
1210static int
1211wbcir_resume(struct pnp_dev *device)
1212{
1213 struct wbcir_data *data = pnp_get_drvdata(device);
1214
1215 /* Clear BUFF_EN, Clear END_EN, Clear MATCH_EN */
1216 wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_EV_EN, 0x00, 0x07);
1217
1218 /* Clear CEIR_EN */
1219 wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_CTL, 0x00, 0x01);
1220
1221 /* Enable interrupts */
1222 wbcir_reset_irdata(data);
1223 outb(WBCIR_IRQ_RX | WBCIR_IRQ_ERR, data->sbase + WBCIR_REG_SP3_IER);
1224
1225 return 0;
1226}
1227
1228
1229
1230/*****************************************************************************
1231 *
1232 * SETUP/INIT FUNCTIONS
1233 *
1234 *****************************************************************************/
1235
1236static void
1237wbcir_cfg_ceir(struct wbcir_data *data)
1238{
1239 u8 tmp;
1240
1241 /* Set PROT_SEL, RX_INV, Clear CEIR_EN (needed for the led) */
1242 tmp = protocol << 4;
1243 if (invert)
1244 tmp |= 0x08;
1245 outb(tmp, data->wbase + WBCIR_REG_WCEIR_CTL);
1246
1247 /* Clear status bits NEC_REP, BUFF, MSG_END, MATCH */
1248 wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_STS, 0x17, 0x17);
1249
1250 /* Clear BUFF_EN, Clear END_EN, Clear MATCH_EN */
1251 wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_EV_EN, 0x00, 0x07);
1252
1253 /* Set RC5 cell time to correspond to 36 kHz */
1254 wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_CFG1, 0x4A, 0x7F);
1255
1256 /* Set IRTX_INV */
1257 if (invert)
1258 outb(0x04, data->ebase + WBCIR_REG_ECEIR_CCTL);
1259 else
1260 outb(0x00, data->ebase + WBCIR_REG_ECEIR_CCTL);
1261
1262 /*
1263 * Clear IR LED, set SP3 clock to 24Mhz
1264 * set SP3_IRRX_SW to binary 01, helpfully not documented
1265 */
1266 outb(0x10, data->ebase + WBCIR_REG_ECEIR_CTS);
1267}
1268
1269static int __devinit
1270wbcir_probe(struct pnp_dev *device, const struct pnp_device_id *dev_id)
1271{
1272 struct device *dev = &device->dev;
1273 struct wbcir_data *data;
1274 int err;
1275
1276 if (!(pnp_port_len(device, 0) == EHFUNC_IOMEM_LEN &&
1277 pnp_port_len(device, 1) == WAKEUP_IOMEM_LEN &&
1278 pnp_port_len(device, 2) == SP_IOMEM_LEN)) {
1279 dev_err(dev, "Invalid resources\n");
1280 return -ENODEV;
1281 }
1282
1283 data = kzalloc(sizeof(*data), GFP_KERNEL);
1284 if (!data) {
1285 err = -ENOMEM;
1286 goto exit;
1287 }
1288
1289 pnp_set_drvdata(device, data);
1290
1291 data->ebase = pnp_port_start(device, 0);
1292 data->wbase = pnp_port_start(device, 1);
1293 data->sbase = pnp_port_start(device, 2);
1294 data->irq = pnp_irq(device, 0);
1295
1296 if (data->wbase == 0 || data->ebase == 0 ||
1297 data->sbase == 0 || data->irq == 0) {
1298 err = -ENODEV;
1299 dev_err(dev, "Invalid resources\n");
1300 goto exit_free_data;
1301 }
1302
1303 dev_dbg(&device->dev, "Found device "
1304 "(w: 0x%lX, e: 0x%lX, s: 0x%lX, i: %u)\n",
1305 data->wbase, data->ebase, data->sbase, data->irq);
1306
1307 if (!request_region(data->wbase, WAKEUP_IOMEM_LEN, DRVNAME)) {
1308 dev_err(dev, "Region 0x%lx-0x%lx already in use!\n",
1309 data->wbase, data->wbase + WAKEUP_IOMEM_LEN - 1);
1310 err = -EBUSY;
1311 goto exit_free_data;
1312 }
1313
1314 if (!request_region(data->ebase, EHFUNC_IOMEM_LEN, DRVNAME)) {
1315 dev_err(dev, "Region 0x%lx-0x%lx already in use!\n",
1316 data->ebase, data->ebase + EHFUNC_IOMEM_LEN - 1);
1317 err = -EBUSY;
1318 goto exit_release_wbase;
1319 }
1320
1321 if (!request_region(data->sbase, SP_IOMEM_LEN, DRVNAME)) {
1322 dev_err(dev, "Region 0x%lx-0x%lx already in use!\n",
1323 data->sbase, data->sbase + SP_IOMEM_LEN - 1);
1324 err = -EBUSY;
1325 goto exit_release_ebase;
1326 }
1327
1328 err = request_irq(data->irq, wbcir_irq_handler,
1329 IRQF_DISABLED, DRVNAME, device);
1330 if (err) {
1331 dev_err(dev, "Failed to claim IRQ %u\n", data->irq);
1332 err = -EBUSY;
1333 goto exit_release_sbase;
1334 }
1335
1336 led_trigger_register_simple("cir-tx", &data->txtrigger);
1337 if (!data->txtrigger) {
1338 err = -ENOMEM;
1339 goto exit_free_irq;
1340 }
1341
1342 led_trigger_register_simple("cir-rx", &data->rxtrigger);
1343 if (!data->rxtrigger) {
1344 err = -ENOMEM;
1345 goto exit_unregister_txtrigger;
1346 }
1347
1348 data->led.name = "cir::activity";
1349 data->led.default_trigger = "cir-rx";
1350 data->led.brightness_set = wbcir_led_brightness_set;
1351 data->led.brightness_get = wbcir_led_brightness_get;
1352 err = led_classdev_register(&device->dev, &data->led);
1353 if (err)
1354 goto exit_unregister_rxtrigger;
1355
1356 data->input_dev = input_allocate_device();
1357 if (!data->input_dev) {
1358 err = -ENOMEM;
1359 goto exit_unregister_led;
1360 }
1361
1362 data->input_dev->evbit[0] = BIT(EV_KEY);
1363 data->input_dev->name = WBCIR_NAME;
1364 data->input_dev->phys = "wbcir/cir0";
1365 data->input_dev->id.bustype = BUS_HOST;
1366 data->input_dev->id.vendor = PCI_VENDOR_ID_WINBOND;
1367 data->input_dev->id.product = WBCIR_ID_FAMILY;
1368 data->input_dev->id.version = WBCIR_ID_CHIP;
1369 data->input_dev->getkeycode = wbcir_getkeycode;
1370 data->input_dev->setkeycode = wbcir_setkeycode;
1371 input_set_capability(data->input_dev, EV_MSC, MSC_SCAN);
1372 input_set_drvdata(data->input_dev, data);
1373
1374 err = input_register_device(data->input_dev);
1375 if (err)
1376 goto exit_free_input;
1377
1378 data->last_scancode = INVALID_SCANCODE;
1379 INIT_LIST_HEAD(&data->keytable);
1380 setup_timer(&data->timer_keyup, wbcir_keyup, (unsigned long)data);
1381
1382 /* Load default keymaps */
1383 if (protocol == IR_PROTOCOL_RC6) {
1384 int i;
1385 for (i = 0; i < ARRAY_SIZE(rc6_def_keymap); i++) {
1386 err = wbcir_setkeycode(data->input_dev,
1387 (int)rc6_def_keymap[i].scancode,
1388 (int)rc6_def_keymap[i].keycode);
1389 if (err)
1390 goto exit_unregister_keys;
1391 }
1392 }
1393
1394 device_init_wakeup(&device->dev, 1);
1395
1396 wbcir_cfg_ceir(data);
1397
1398 /* Disable interrupts */
1399 wbcir_select_bank(data, WBCIR_BANK_0);
1400 outb(WBCIR_IRQ_NONE, data->sbase + WBCIR_REG_SP3_IER);
1401
1402 /* Enable extended mode */
1403 wbcir_select_bank(data, WBCIR_BANK_2);
1404 outb(WBCIR_EXT_ENABLE, data->sbase + WBCIR_REG_SP3_EXCR1);
1405
1406 /*
1407 * Configure baud generator, IR data will be sampled at
1408 * a bitrate of: (24Mhz * prescaler) / (divisor * 16).
1409 *
1410 * The ECIR registers include a flag to change the
1411 * 24Mhz clock freq to 48Mhz.
1412 *
1413 * It's not documented in the specs, but fifo levels
1414 * other than 16 seems to be unsupported.
1415 */
1416
1417 /* prescaler 1.0, tx/rx fifo lvl 16 */
1418 outb(0x30, data->sbase + WBCIR_REG_SP3_EXCR2);
1419
1420 /* Set baud divisor to generate one byte per bit/cell */
1421 switch (protocol) {
1422 case IR_PROTOCOL_RC5:
1423 outb(0xA7, data->sbase + WBCIR_REG_SP3_BGDL);
1424 break;
1425 case IR_PROTOCOL_RC6:
1426 outb(0x53, data->sbase + WBCIR_REG_SP3_BGDL);
1427 break;
1428 case IR_PROTOCOL_NEC:
1429 outb(0x69, data->sbase + WBCIR_REG_SP3_BGDL);
1430 break;
1431 }
1432 outb(0x00, data->sbase + WBCIR_REG_SP3_BGDH);
1433
1434 /* Set CEIR mode */
1435 wbcir_select_bank(data, WBCIR_BANK_0);
1436 outb(0xC0, data->sbase + WBCIR_REG_SP3_MCR);
1437 inb(data->sbase + WBCIR_REG_SP3_LSR); /* Clear LSR */
1438 inb(data->sbase + WBCIR_REG_SP3_MSR); /* Clear MSR */
1439
1440 /* Disable RX demod, run-length encoding/decoding, set freq span */
1441 wbcir_select_bank(data, WBCIR_BANK_7);
1442 outb(0x10, data->sbase + WBCIR_REG_SP3_RCCFG);
1443
1444 /* Disable timer */
1445 wbcir_select_bank(data, WBCIR_BANK_4);
1446 outb(0x00, data->sbase + WBCIR_REG_SP3_IRCR1);
1447
1448 /* Enable MSR interrupt, Clear AUX_IRX */
1449 wbcir_select_bank(data, WBCIR_BANK_5);
1450 outb(0x00, data->sbase + WBCIR_REG_SP3_IRCR2);
1451
1452 /* Disable CRC */
1453 wbcir_select_bank(data, WBCIR_BANK_6);
1454 outb(0x20, data->sbase + WBCIR_REG_SP3_IRCR3);
1455
1456 /* Set RX/TX (de)modulation freq, not really used */
1457 wbcir_select_bank(data, WBCIR_BANK_7);
1458 outb(0xF2, data->sbase + WBCIR_REG_SP3_IRRXDC);
1459 outb(0x69, data->sbase + WBCIR_REG_SP3_IRTXMC);
1460
1461 /* Set invert and pin direction */
1462 if (invert)
1463 outb(0x10, data->sbase + WBCIR_REG_SP3_IRCFG4);
1464 else
1465 outb(0x00, data->sbase + WBCIR_REG_SP3_IRCFG4);
1466
1467 /* Set FIFO thresholds (RX = 8, TX = 3), reset RX/TX */
1468 wbcir_select_bank(data, WBCIR_BANK_0);
1469 outb(0x97, data->sbase + WBCIR_REG_SP3_FCR);
1470
1471 /* Clear AUX status bits */
1472 outb(0xE0, data->sbase + WBCIR_REG_SP3_ASCR);
1473
1474 /* Enable interrupts */
1475 outb(WBCIR_IRQ_RX | WBCIR_IRQ_ERR, data->sbase + WBCIR_REG_SP3_IER);
1476
1477 return 0;
1478
1479exit_unregister_keys:
1480 if (!list_empty(&data->keytable)) {
1481 struct wbcir_keyentry *key;
1482 struct wbcir_keyentry *keytmp;
1483
1484 list_for_each_entry_safe(key, keytmp, &data->keytable, list) {
1485 list_del(&key->list);
1486 kfree(key);
1487 }
1488 }
1489 input_unregister_device(data->input_dev);
1490 /* Can't call input_free_device on an unregistered device */
1491 data->input_dev = NULL;
1492exit_free_input:
1493 input_free_device(data->input_dev);
1494exit_unregister_led:
1495 led_classdev_unregister(&data->led);
1496exit_unregister_rxtrigger:
1497 led_trigger_unregister_simple(data->rxtrigger);
1498exit_unregister_txtrigger:
1499 led_trigger_unregister_simple(data->txtrigger);
1500exit_free_irq:
1501 free_irq(data->irq, device);
1502exit_release_sbase:
1503 release_region(data->sbase, SP_IOMEM_LEN);
1504exit_release_ebase:
1505 release_region(data->ebase, EHFUNC_IOMEM_LEN);
1506exit_release_wbase:
1507 release_region(data->wbase, WAKEUP_IOMEM_LEN);
1508exit_free_data:
1509 kfree(data);
1510 pnp_set_drvdata(device, NULL);
1511exit:
1512 return err;
1513}
1514
1515static void __devexit
1516wbcir_remove(struct pnp_dev *device)
1517{
1518 struct wbcir_data *data = pnp_get_drvdata(device);
1519 struct wbcir_keyentry *key;
1520 struct wbcir_keyentry *keytmp;
1521
1522 /* Disable interrupts */
1523 wbcir_select_bank(data, WBCIR_BANK_0);
1524 outb(WBCIR_IRQ_NONE, data->sbase + WBCIR_REG_SP3_IER);
1525
1526 del_timer_sync(&data->timer_keyup);
1527
1528 free_irq(data->irq, device);
1529
1530 /* Clear status bits NEC_REP, BUFF, MSG_END, MATCH */
1531 wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_STS, 0x17, 0x17);
1532
1533 /* Clear CEIR_EN */
1534 wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_CTL, 0x00, 0x01);
1535
1536 /* Clear BUFF_EN, END_EN, MATCH_EN */
1537 wbcir_set_bits(data->wbase + WBCIR_REG_WCEIR_EV_EN, 0x00, 0x07);
1538
1539 /* This will generate a keyup event if necessary */
1540 input_unregister_device(data->input_dev);
1541
1542 led_trigger_unregister_simple(data->rxtrigger);
1543 led_trigger_unregister_simple(data->txtrigger);
1544 led_classdev_unregister(&data->led);
1545
1546 /* This is ok since &data->led isn't actually used */
1547 wbcir_led_brightness_set(&data->led, LED_OFF);
1548
1549 release_region(data->wbase, WAKEUP_IOMEM_LEN);
1550 release_region(data->ebase, EHFUNC_IOMEM_LEN);
1551 release_region(data->sbase, SP_IOMEM_LEN);
1552
1553 list_for_each_entry_safe(key, keytmp, &data->keytable, list) {
1554 list_del(&key->list);
1555 kfree(key);
1556 }
1557
1558 kfree(data);
1559
1560 pnp_set_drvdata(device, NULL);
1561}
1562
1563static const struct pnp_device_id wbcir_ids[] = {
1564 { "WEC1022", 0 },
1565 { "", 0 }
1566};
1567MODULE_DEVICE_TABLE(pnp, wbcir_ids);
1568
1569static struct pnp_driver wbcir_driver = {
1570 .name = WBCIR_NAME,
1571 .id_table = wbcir_ids,
1572 .probe = wbcir_probe,
1573 .remove = __devexit_p(wbcir_remove),
1574 .suspend = wbcir_suspend,
1575 .resume = wbcir_resume,
1576 .shutdown = wbcir_shutdown
1577};
1578
1579static int __init
1580wbcir_init(void)
1581{
1582 int ret;
1583
1584 switch (protocol) {
1585 case IR_PROTOCOL_RC5:
1586 case IR_PROTOCOL_NEC:
1587 case IR_PROTOCOL_RC6:
1588 break;
1589 default:
1590 printk(KERN_ERR DRVNAME ": Invalid protocol argument\n");
1591 return -EINVAL;
1592 }
1593
1594 ret = pnp_register_driver(&wbcir_driver);
1595 if (ret)
1596 printk(KERN_ERR DRVNAME ": Unable to register driver\n");
1597
1598 return ret;
1599}
1600
1601static void __exit
1602wbcir_exit(void)
1603{
1604 pnp_unregister_driver(&wbcir_driver);
1605}
1606
1607MODULE_AUTHOR("David Härdeman <david@hardeman.nu>");
1608MODULE_DESCRIPTION("Winbond SuperI/O Consumer IR Driver");
1609MODULE_LICENSE("GPL");
1610
1611module_init(wbcir_init);
1612module_exit(wbcir_exit);
1613
1614
diff --git a/drivers/isdn/capi/capifs.c b/drivers/isdn/capi/capifs.c
index bff72d81f26..9f8f67b6c07 100644
--- a/drivers/isdn/capi/capifs.c
+++ b/drivers/isdn/capi/capifs.c
@@ -89,7 +89,7 @@ static int capifs_remount(struct super_block *s, int *flags, char *data)
89 return 0; 89 return 0;
90} 90}
91 91
92static struct super_operations capifs_sops = 92static const struct super_operations capifs_sops =
93{ 93{
94 .statfs = simple_statfs, 94 .statfs = simple_statfs,
95 .remount_fs = capifs_remount, 95 .remount_fs = capifs_remount,
diff --git a/drivers/isdn/capi/capiutil.c b/drivers/isdn/capi/capiutil.c
index 16f2e465e5f..26626eead82 100644
--- a/drivers/isdn/capi/capiutil.c
+++ b/drivers/isdn/capi/capiutil.c
@@ -1019,7 +1019,7 @@ int __init cdebug_init(void)
1019 if (!g_debbuf->buf) { 1019 if (!g_debbuf->buf) {
1020 kfree(g_cmsg); 1020 kfree(g_cmsg);
1021 kfree(g_debbuf); 1021 kfree(g_debbuf);
1022 return -ENOMEM;; 1022 return -ENOMEM;
1023 } 1023 }
1024 g_debbuf->size = CDEBUG_GSIZE; 1024 g_debbuf->size = CDEBUG_GSIZE;
1025 g_debbuf->buf[0] = 0; 1025 g_debbuf->buf[0] = 0;
diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c
index 7188c59a76f..adb1e8c36b4 100644
--- a/drivers/isdn/i4l/isdn_common.c
+++ b/drivers/isdn/i4l/isdn_common.c
@@ -761,7 +761,7 @@ isdn_getnum(char **p)
761 * Be aware that this is not an atomic operation when sleep != 0, even though 761 * Be aware that this is not an atomic operation when sleep != 0, even though
762 * interrupts are turned off! Well, like that we are currently only called 762 * interrupts are turned off! Well, like that we are currently only called
763 * on behalf of a read system call on raw device files (which are documented 763 * on behalf of a read system call on raw device files (which are documented
764 * to be dangerous and for for debugging purpose only). The inode semaphore 764 * to be dangerous and for debugging purpose only). The inode semaphore
765 * takes care that this is not called for the same minor device number while 765 * takes care that this is not called for the same minor device number while
766 * we are sleeping, but access is not serialized against simultaneous read() 766 * we are sleeping, but access is not serialized against simultaneous read()
767 * from the corresponding ttyI device. Can other ugly events, like changes 767 * from the corresponding ttyI device. Can other ugly events, like changes
@@ -873,7 +873,7 @@ isdn_readbchan(int di, int channel, u_char * buf, u_char * fp, int len, wait_que
873 * Be aware that this is not an atomic operation when sleep != 0, even though 873 * Be aware that this is not an atomic operation when sleep != 0, even though
874 * interrupts are turned off! Well, like that we are currently only called 874 * interrupts are turned off! Well, like that we are currently only called
875 * on behalf of a read system call on raw device files (which are documented 875 * on behalf of a read system call on raw device files (which are documented
876 * to be dangerous and for for debugging purpose only). The inode semaphore 876 * to be dangerous and for debugging purpose only). The inode semaphore
877 * takes care that this is not called for the same minor device number while 877 * takes care that this is not called for the same minor device number while
878 * we are sleeping, but access is not serialized against simultaneous read() 878 * we are sleeping, but access is not serialized against simultaneous read()
879 * from the corresponding ttyI device. Can other ugly events, like changes 879 * from the corresponding ttyI device. Can other ugly events, like changes
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index a8d0aee3bc0..8aaad65c3bb 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -894,7 +894,7 @@ void guest_set_pte(struct lg_cpu *cpu,
894 * tells us they've changed. When the Guest tries to use the new entry it will 894 * tells us they've changed. When the Guest tries to use the new entry it will
895 * fault and demand_page() will fix it up. 895 * fault and demand_page() will fix it up.
896 * 896 *
897 * So with that in mind here's our code to to update a (top-level) PGD entry: 897 * So with that in mind here's our code to update a (top-level) PGD entry:
898 */ 898 */
899void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 idx) 899void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 idx)
900{ 900{
diff --git a/drivers/macintosh/rack-meter.c b/drivers/macintosh/rack-meter.c
index a98ab72adf9..93fb32038b1 100644
--- a/drivers/macintosh/rack-meter.c
+++ b/drivers/macintosh/rack-meter.c
@@ -274,7 +274,7 @@ static void __devinit rackmeter_init_cpu_sniffer(struct rackmeter *rm)
274 274
275 if (cpu > 1) 275 if (cpu > 1)
276 continue; 276 continue;
277 rcpu = &rm->cpu[cpu];; 277 rcpu = &rm->cpu[cpu];
278 rcpu->prev_idle = get_cpu_idle_time(cpu); 278 rcpu->prev_idle = get_cpu_idle_time(cpu);
279 rcpu->prev_wall = jiffies64_to_cputime64(get_jiffies_64()); 279 rcpu->prev_wall = jiffies64_to_cputime64(get_jiffies_64());
280 schedule_delayed_work_on(cpu, &rm->cpu[cpu].sniffer, 280 schedule_delayed_work_on(cpu, &rm->cpu[cpu].sniffer,
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index eee28fac210..376f1ab48a2 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1716,7 +1716,7 @@ out:
1716 return r; 1716 return r;
1717} 1717}
1718 1718
1719static struct block_device_operations dm_blk_dops; 1719static const struct block_device_operations dm_blk_dops;
1720 1720
1721static void dm_wq_work(struct work_struct *work); 1721static void dm_wq_work(struct work_struct *work);
1722 1722
@@ -2663,7 +2663,7 @@ void dm_free_md_mempools(struct dm_md_mempools *pools)
2663 kfree(pools); 2663 kfree(pools);
2664} 2664}
2665 2665
2666static struct block_device_operations dm_blk_dops = { 2666static const struct block_device_operations dm_blk_dops = {
2667 .open = dm_blk_open, 2667 .open = dm_blk_open,
2668 .release = dm_blk_close, 2668 .release = dm_blk_close,
2669 .ioctl = dm_blk_ioctl, 2669 .ioctl = dm_blk_ioctl,
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 9dd872000ce..6aa497e4baf 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -138,7 +138,7 @@ static ctl_table raid_root_table[] = {
138 { .ctl_name = 0 } 138 { .ctl_name = 0 }
139}; 139};
140 140
141static struct block_device_operations md_fops; 141static const struct block_device_operations md_fops;
142 142
143static int start_readonly; 143static int start_readonly;
144 144
@@ -5556,7 +5556,7 @@ static int md_revalidate(struct gendisk *disk)
5556 mddev->changed = 0; 5556 mddev->changed = 0;
5557 return 0; 5557 return 0;
5558} 5558}
5559static struct block_device_operations md_fops = 5559static const struct block_device_operations md_fops =
5560{ 5560{
5561 .owner = THIS_MODULE, 5561 .owner = THIS_MODULE,
5562 .open = md_open, 5562 .open = md_open,
diff --git a/drivers/md/md.h b/drivers/md/md.h
index f8fc188bc76..f55d2ff9513 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -201,7 +201,7 @@ struct mddev_s
201 * INTR: resync needs to be aborted for some reason 201 * INTR: resync needs to be aborted for some reason
202 * DONE: thread is done and is waiting to be reaped 202 * DONE: thread is done and is waiting to be reaped
203 * REQUEST: user-space has requested a sync (used with SYNC) 203 * REQUEST: user-space has requested a sync (used with SYNC)
204 * CHECK: user-space request for for check-only, no repair 204 * CHECK: user-space request for check-only, no repair
205 * RESHAPE: A reshape is happening 205 * RESHAPE: A reshape is happening
206 * 206 *
207 * If neither SYNC or RESHAPE are set, then it is a recovery. 207 * If neither SYNC or RESHAPE are set, then it is a recovery.
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 89e76819f61..d2d3fd54cc6 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -150,6 +150,7 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio)
150 } 150 }
151 151
152 mp_bh = mempool_alloc(conf->pool, GFP_NOIO); 152 mp_bh = mempool_alloc(conf->pool, GFP_NOIO);
153 memset(mp_bh, 0, sizeof(*mp_bh));
153 154
154 mp_bh->master_bio = bio; 155 mp_bh->master_bio = bio;
155 mp_bh->mddev = mddev; 156 mp_bh->mddev = mddev;
@@ -493,7 +494,7 @@ static int multipath_run (mddev_t *mddev)
493 } 494 }
494 mddev->degraded = conf->raid_disks - conf->working_disks; 495 mddev->degraded = conf->raid_disks - conf->working_disks;
495 496
496 conf->pool = mempool_create_kzalloc_pool(NR_RESERVED_BUFS, 497 conf->pool = mempool_create_kmalloc_pool(NR_RESERVED_BUFS,
497 sizeof(struct multipath_bh)); 498 sizeof(struct multipath_bh));
498 if (conf->pool == NULL) { 499 if (conf->pool == NULL) {
499 printk(KERN_ERR 500 printk(KERN_ERR
diff --git a/drivers/media/dvb/pt1/pt1.c b/drivers/media/dvb/pt1/pt1.c
index 8ffbcecad93..81e623a90f0 100644
--- a/drivers/media/dvb/pt1/pt1.c
+++ b/drivers/media/dvb/pt1/pt1.c
@@ -23,6 +23,7 @@
23 23
24#include <linux/kernel.h> 24#include <linux/kernel.h>
25#include <linux/module.h> 25#include <linux/module.h>
26#include <linux/vmalloc.h>
26#include <linux/pci.h> 27#include <linux/pci.h>
27#include <linux/kthread.h> 28#include <linux/kthread.h>
28#include <linux/freezer.h> 29#include <linux/freezer.h>
diff --git a/drivers/media/dvb/siano/smscoreapi.c b/drivers/media/dvb/siano/smscoreapi.c
index bd9ab9d0d12..fa6a62369a7 100644
--- a/drivers/media/dvb/siano/smscoreapi.c
+++ b/drivers/media/dvb/siano/smscoreapi.c
@@ -1367,7 +1367,7 @@ int smscore_set_gpio(struct smscore_device_t *coredev, u32 pin, int level)
1367 &msg, sizeof(msg)); 1367 &msg, sizeof(msg));
1368} 1368}
1369 1369
1370/* new GPIO managment implementation */ 1370/* new GPIO management implementation */
1371static int GetGpioPinParams(u32 PinNum, u32 *pTranslatedPinNum, 1371static int GetGpioPinParams(u32 PinNum, u32 *pTranslatedPinNum,
1372 u32 *pGroupNum, u32 *pGroupCfg) { 1372 u32 *pGroupNum, u32 *pGroupCfg) {
1373 1373
diff --git a/drivers/media/dvb/siano/smscoreapi.h b/drivers/media/dvb/siano/smscoreapi.h
index f1108c64e89..eec18aaf551 100644
--- a/drivers/media/dvb/siano/smscoreapi.h
+++ b/drivers/media/dvb/siano/smscoreapi.h
@@ -657,12 +657,12 @@ struct smscore_buffer_t *smscore_getbuffer(struct smscore_device_t *coredev);
657extern void smscore_putbuffer(struct smscore_device_t *coredev, 657extern void smscore_putbuffer(struct smscore_device_t *coredev,
658 struct smscore_buffer_t *cb); 658 struct smscore_buffer_t *cb);
659 659
660/* old GPIO managment */ 660/* old GPIO management */
661int smscore_configure_gpio(struct smscore_device_t *coredev, u32 pin, 661int smscore_configure_gpio(struct smscore_device_t *coredev, u32 pin,
662 struct smscore_config_gpio *pinconfig); 662 struct smscore_config_gpio *pinconfig);
663int smscore_set_gpio(struct smscore_device_t *coredev, u32 pin, int level); 663int smscore_set_gpio(struct smscore_device_t *coredev, u32 pin, int level);
664 664
665/* new GPIO managment */ 665/* new GPIO management */
666extern int smscore_gpio_configure(struct smscore_device_t *coredev, u8 PinNum, 666extern int smscore_gpio_configure(struct smscore_device_t *coredev, u8 PinNum,
667 struct smscore_gpio_config *pGpioConfig); 667 struct smscore_gpio_config *pGpioConfig);
668extern int smscore_gpio_set_level(struct smscore_device_t *coredev, u8 PinNum, 668extern int smscore_gpio_set_level(struct smscore_device_t *coredev, u8 PinNum,
diff --git a/drivers/media/radio/radio-mr800.c b/drivers/media/radio/radio-mr800.c
index 575bf9d8941..a1239083472 100644
--- a/drivers/media/radio/radio-mr800.c
+++ b/drivers/media/radio/radio-mr800.c
@@ -46,7 +46,7 @@
46 * Version 0.11: Converted to v4l2_device. 46 * Version 0.11: Converted to v4l2_device.
47 * 47 *
48 * Many things to do: 48 * Many things to do:
49 * - Correct power managment of device (suspend & resume) 49 * - Correct power management of device (suspend & resume)
50 * - Add code for scanning and smooth tuning 50 * - Add code for scanning and smooth tuning
51 * - Add code for sensitivity value 51 * - Add code for sensitivity value
52 * - Correct mistakes 52 * - Correct mistakes
diff --git a/drivers/media/video/cx88/cx88-blackbird.c b/drivers/media/video/cx88/cx88-blackbird.c
index 356d6896da3..fbdc1cde56a 100644
--- a/drivers/media/video/cx88/cx88-blackbird.c
+++ b/drivers/media/video/cx88/cx88-blackbird.c
@@ -1371,7 +1371,7 @@ static struct cx8802_driver cx8802_blackbird_driver = {
1371 .advise_release = cx8802_blackbird_advise_release, 1371 .advise_release = cx8802_blackbird_advise_release,
1372}; 1372};
1373 1373
1374static int blackbird_init(void) 1374static int __init blackbird_init(void)
1375{ 1375{
1376 printk(KERN_INFO "cx2388x blackbird driver version %d.%d.%d loaded\n", 1376 printk(KERN_INFO "cx2388x blackbird driver version %d.%d.%d loaded\n",
1377 (CX88_VERSION_CODE >> 16) & 0xff, 1377 (CX88_VERSION_CODE >> 16) & 0xff,
@@ -1384,7 +1384,7 @@ static int blackbird_init(void)
1384 return cx8802_register_driver(&cx8802_blackbird_driver); 1384 return cx8802_register_driver(&cx8802_blackbird_driver);
1385} 1385}
1386 1386
1387static void blackbird_fini(void) 1387static void __exit blackbird_fini(void)
1388{ 1388{
1389 cx8802_unregister_driver(&cx8802_blackbird_driver); 1389 cx8802_unregister_driver(&cx8802_blackbird_driver);
1390} 1390}
diff --git a/drivers/media/video/cx88/cx88-dvb.c b/drivers/media/video/cx88/cx88-dvb.c
index 6e5d142b5b0..518bcfe18bc 100644
--- a/drivers/media/video/cx88/cx88-dvb.c
+++ b/drivers/media/video/cx88/cx88-dvb.c
@@ -1350,7 +1350,7 @@ static struct cx8802_driver cx8802_dvb_driver = {
1350 .advise_release = cx8802_dvb_advise_release, 1350 .advise_release = cx8802_dvb_advise_release,
1351}; 1351};
1352 1352
1353static int dvb_init(void) 1353static int __init dvb_init(void)
1354{ 1354{
1355 printk(KERN_INFO "cx88/2: cx2388x dvb driver version %d.%d.%d loaded\n", 1355 printk(KERN_INFO "cx88/2: cx2388x dvb driver version %d.%d.%d loaded\n",
1356 (CX88_VERSION_CODE >> 16) & 0xff, 1356 (CX88_VERSION_CODE >> 16) & 0xff,
@@ -1363,7 +1363,7 @@ static int dvb_init(void)
1363 return cx8802_register_driver(&cx8802_dvb_driver); 1363 return cx8802_register_driver(&cx8802_dvb_driver);
1364} 1364}
1365 1365
1366static void dvb_fini(void) 1366static void __exit dvb_fini(void)
1367{ 1367{
1368 cx8802_unregister_driver(&cx8802_dvb_driver); 1368 cx8802_unregister_driver(&cx8802_dvb_driver);
1369} 1369}
diff --git a/drivers/media/video/cx88/cx88-mpeg.c b/drivers/media/video/cx88/cx88-mpeg.c
index 7172dcf2a4f..de9ff0fc741 100644
--- a/drivers/media/video/cx88/cx88-mpeg.c
+++ b/drivers/media/video/cx88/cx88-mpeg.c
@@ -870,7 +870,7 @@ static struct pci_driver cx8802_pci_driver = {
870 .remove = __devexit_p(cx8802_remove), 870 .remove = __devexit_p(cx8802_remove),
871}; 871};
872 872
873static int cx8802_init(void) 873static int __init cx8802_init(void)
874{ 874{
875 printk(KERN_INFO "cx88/2: cx2388x MPEG-TS Driver Manager version %d.%d.%d loaded\n", 875 printk(KERN_INFO "cx88/2: cx2388x MPEG-TS Driver Manager version %d.%d.%d loaded\n",
876 (CX88_VERSION_CODE >> 16) & 0xff, 876 (CX88_VERSION_CODE >> 16) & 0xff,
@@ -883,7 +883,7 @@ static int cx8802_init(void)
883 return pci_register_driver(&cx8802_pci_driver); 883 return pci_register_driver(&cx8802_pci_driver);
884} 884}
885 885
886static void cx8802_fini(void) 886static void __exit cx8802_fini(void)
887{ 887{
888 pci_unregister_driver(&cx8802_pci_driver); 888 pci_unregister_driver(&cx8802_pci_driver);
889} 889}
diff --git a/drivers/media/video/cx88/cx88-video.c b/drivers/media/video/cx88/cx88-video.c
index 81d2b5dea18..57e6b124109 100644
--- a/drivers/media/video/cx88/cx88-video.c
+++ b/drivers/media/video/cx88/cx88-video.c
@@ -2113,7 +2113,7 @@ static struct pci_driver cx8800_pci_driver = {
2113#endif 2113#endif
2114}; 2114};
2115 2115
2116static int cx8800_init(void) 2116static int __init cx8800_init(void)
2117{ 2117{
2118 printk(KERN_INFO "cx88/0: cx2388x v4l2 driver version %d.%d.%d loaded\n", 2118 printk(KERN_INFO "cx88/0: cx2388x v4l2 driver version %d.%d.%d loaded\n",
2119 (CX88_VERSION_CODE >> 16) & 0xff, 2119 (CX88_VERSION_CODE >> 16) & 0xff,
@@ -2126,7 +2126,7 @@ static int cx8800_init(void)
2126 return pci_register_driver(&cx8800_pci_driver); 2126 return pci_register_driver(&cx8800_pci_driver);
2127} 2127}
2128 2128
2129static void cx8800_fini(void) 2129static void __exit cx8800_fini(void)
2130{ 2130{
2131 pci_unregister_driver(&cx8800_pci_driver); 2131 pci_unregister_driver(&cx8800_pci_driver);
2132} 2132}
diff --git a/drivers/media/video/gspca/m5602/m5602_core.c b/drivers/media/video/gspca/m5602/m5602_core.c
index 8a5bba16ff3..7f1e5415850 100644
--- a/drivers/media/video/gspca/m5602/m5602_core.c
+++ b/drivers/media/video/gspca/m5602/m5602_core.c
@@ -56,7 +56,7 @@ int m5602_read_bridge(struct sd *sd, const u8 address, u8 *i2c_data)
56 return (err < 0) ? err : 0; 56 return (err < 0) ? err : 0;
57} 57}
58 58
59/* Writes a byte to to the m5602 */ 59/* Writes a byte to the m5602 */
60int m5602_write_bridge(struct sd *sd, const u8 address, const u8 i2c_data) 60int m5602_write_bridge(struct sd *sd, const u8 address, const u8 i2c_data)
61{ 61{
62 int err; 62 int err;
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index 7847bbc1440..bd83fa0a497 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -235,7 +235,7 @@ static int mspro_block_bd_getgeo(struct block_device *bdev,
235 return 0; 235 return 0;
236} 236}
237 237
238static struct block_device_operations ms_block_bdops = { 238static const struct block_device_operations ms_block_bdops = {
239 .open = mspro_block_bd_open, 239 .open = mspro_block_bd_open,
240 .release = mspro_block_bd_release, 240 .release = mspro_block_bd_release,
241 .getgeo = mspro_block_bd_getgeo, 241 .getgeo = mspro_block_bd_getgeo,
diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index 76fa2ee0b57..610e914abe6 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -6821,7 +6821,7 @@ mpt_print_ioc_summary(MPT_ADAPTER *ioc, char *buffer, int *size, int len, int sh
6821 *size = y; 6821 *size = y;
6822} 6822}
6823/** 6823/**
6824 * mpt_set_taskmgmt_in_progress_flag - set flags associated with task managment 6824 * mpt_set_taskmgmt_in_progress_flag - set flags associated with task management
6825 * @ioc: Pointer to MPT_ADAPTER structure 6825 * @ioc: Pointer to MPT_ADAPTER structure
6826 * 6826 *
6827 * Returns 0 for SUCCESS or -1 if FAILED. 6827 * Returns 0 for SUCCESS or -1 if FAILED.
@@ -6854,7 +6854,7 @@ mpt_set_taskmgmt_in_progress_flag(MPT_ADAPTER *ioc)
6854EXPORT_SYMBOL(mpt_set_taskmgmt_in_progress_flag); 6854EXPORT_SYMBOL(mpt_set_taskmgmt_in_progress_flag);
6855 6855
6856/** 6856/**
6857 * mpt_clear_taskmgmt_in_progress_flag - clear flags associated with task managment 6857 * mpt_clear_taskmgmt_in_progress_flag - clear flags associated with task management
6858 * @ioc: Pointer to MPT_ADAPTER structure 6858 * @ioc: Pointer to MPT_ADAPTER structure
6859 * 6859 *
6860 **/ 6860 **/
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index 335d4c78a77..d505b68cd37 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -925,7 +925,7 @@ static void i2o_block_request_fn(struct request_queue *q)
925}; 925};
926 926
927/* I2O Block device operations definition */ 927/* I2O Block device operations definition */
928static struct block_device_operations i2o_block_fops = { 928static const struct block_device_operations i2o_block_fops = {
929 .owner = THIS_MODULE, 929 .owner = THIS_MODULE,
930 .open = i2o_block_open, 930 .open = i2o_block_open,
931 .release = i2o_block_release, 931 .release = i2o_block_release,
diff --git a/drivers/mfd/ab3100-core.c b/drivers/mfd/ab3100-core.c
index c533f86ff5e..5447da16a17 100644
--- a/drivers/mfd/ab3100-core.c
+++ b/drivers/mfd/ab3100-core.c
@@ -647,7 +647,7 @@ struct ab3100_init_setting {
647 u8 setting; 647 u8 setting;
648}; 648};
649 649
650static const struct ab3100_init_setting __initdata 650static const struct ab3100_init_setting __initconst
651ab3100_init_settings[] = { 651ab3100_init_settings[] = {
652 { 652 {
653 .abreg = AB3100_MCA, 653 .abreg = AB3100_MCA,
diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c
index de966a6fb7e..aecf40ecb3a 100644
--- a/drivers/misc/ibmasm/ibmasmfs.c
+++ b/drivers/misc/ibmasm/ibmasmfs.c
@@ -97,7 +97,7 @@ static int ibmasmfs_get_super(struct file_system_type *fst,
97 return get_sb_single(fst, flags, data, ibmasmfs_fill_super, mnt); 97 return get_sb_single(fst, flags, data, ibmasmfs_fill_super, mnt);
98} 98}
99 99
100static struct super_operations ibmasmfs_s_ops = { 100static const struct super_operations ibmasmfs_s_ops = {
101 .statfs = simple_statfs, 101 .statfs = simple_statfs,
102 .drop_inode = generic_delete_inode, 102 .drop_inode = generic_delete_inode,
103}; 103};
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index adc205c49fb..85f0e8cd875 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -130,7 +130,7 @@ mmc_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
130 return 0; 130 return 0;
131} 131}
132 132
133static struct block_device_operations mmc_bdops = { 133static const struct block_device_operations mmc_bdops = {
134 .open = mmc_blk_open, 134 .open = mmc_blk_open,
135 .release = mmc_blk_release, 135 .release = mmc_blk_release,
136 .getgeo = mmc_blk_getgeo, 136 .getgeo = mmc_blk_getgeo,
diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index bc14bb1b057..88671529c45 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -512,7 +512,7 @@ static void mxcmci_cmd_done(struct mxcmci_host *host, unsigned int stat)
512 } 512 }
513 513
514 /* For the DMA case the DMA engine handles the data transfer 514 /* For the DMA case the DMA engine handles the data transfer
515 * automatically. For non DMA we have to to it ourselves. 515 * automatically. For non DMA we have to do it ourselves.
516 * Don't do it in interrupt context though. 516 * Don't do it in interrupt context though.
517 */ 517 */
518 if (!mxcmci_use_dma(host) && host->data) 518 if (!mxcmci_use_dma(host) && host->data)
diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index 10ed195c0c1..eb495d83064 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -776,13 +776,13 @@ static struct spi_driver m25p80_driver = {
776}; 776};
777 777
778 778
779static int m25p80_init(void) 779static int __init m25p80_init(void)
780{ 780{
781 return spi_register_driver(&m25p80_driver); 781 return spi_register_driver(&m25p80_driver);
782} 782}
783 783
784 784
785static void m25p80_exit(void) 785static void __exit m25p80_exit(void)
786{ 786{
787 spi_unregister_driver(&m25p80_driver); 787 spi_unregister_driver(&m25p80_driver);
788} 788}
diff --git a/drivers/mtd/devices/slram.c b/drivers/mtd/devices/slram.c
index 00248e81ecd..7d846e9173d 100644
--- a/drivers/mtd/devices/slram.c
+++ b/drivers/mtd/devices/slram.c
@@ -303,7 +303,7 @@ __setup("slram=", mtd_slram_setup);
303 303
304#endif 304#endif
305 305
306static int init_slram(void) 306static int __init init_slram(void)
307{ 307{
308 char *devname; 308 char *devname;
309 int i; 309 int i;
diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c
index a790c062af1..e56d6b42f02 100644
--- a/drivers/mtd/ftl.c
+++ b/drivers/mtd/ftl.c
@@ -1099,7 +1099,7 @@ static struct mtd_blktrans_ops ftl_tr = {
1099 .owner = THIS_MODULE, 1099 .owner = THIS_MODULE,
1100}; 1100};
1101 1101
1102static int init_ftl(void) 1102static int __init init_ftl(void)
1103{ 1103{
1104 return register_mtd_blktrans(&ftl_tr); 1104 return register_mtd_blktrans(&ftl_tr);
1105} 1105}
diff --git a/drivers/mtd/maps/ixp2000.c b/drivers/mtd/maps/ixp2000.c
index d4fb9a3ab4d..1bdf0ee6d0b 100644
--- a/drivers/mtd/maps/ixp2000.c
+++ b/drivers/mtd/maps/ixp2000.c
@@ -184,7 +184,7 @@ static int ixp2000_flash_probe(struct platform_device *dev)
184 info->map.bankwidth = 1; 184 info->map.bankwidth = 1;
185 185
186 /* 186 /*
187 * map_priv_2 is used to store a ptr to to the bank_setup routine 187 * map_priv_2 is used to store a ptr to the bank_setup routine
188 */ 188 */
189 info->map.map_priv_2 = (unsigned long) ixp_data->bank_setup; 189 info->map.map_priv_2 = (unsigned long) ixp_data->bank_setup;
190 190
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 7baba40c1ed..0acbf4f5be5 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -210,7 +210,7 @@ static int blktrans_ioctl(struct block_device *bdev, fmode_t mode,
210 } 210 }
211} 211}
212 212
213static struct block_device_operations mtd_blktrans_ops = { 213static const struct block_device_operations mtd_blktrans_ops = {
214 .owner = THIS_MODULE, 214 .owner = THIS_MODULE,
215 .open = blktrans_open, 215 .open = blktrans_open,
216 .release = blktrans_release, 216 .release = blktrans_release,
diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c
index 29acd06b1c3..1b4690bdfdb 100644
--- a/drivers/mtd/nand/cafe_nand.c
+++ b/drivers/mtd/nand/cafe_nand.c
@@ -903,12 +903,12 @@ static struct pci_driver cafe_nand_pci_driver = {
903 .resume = cafe_nand_resume, 903 .resume = cafe_nand_resume,
904}; 904};
905 905
906static int cafe_nand_init(void) 906static int __init cafe_nand_init(void)
907{ 907{
908 return pci_register_driver(&cafe_nand_pci_driver); 908 return pci_register_driver(&cafe_nand_pci_driver);
909} 909}
910 910
911static void cafe_nand_exit(void) 911static void __exit cafe_nand_exit(void)
912{ 912{
913 pci_unregister_driver(&cafe_nand_pci_driver); 913 pci_unregister_driver(&cafe_nand_pci_driver);
914} 914}
diff --git a/drivers/mtd/nand/cmx270_nand.c b/drivers/mtd/nand/cmx270_nand.c
index 10081e656a6..826cacffcef 100644
--- a/drivers/mtd/nand/cmx270_nand.c
+++ b/drivers/mtd/nand/cmx270_nand.c
@@ -147,7 +147,7 @@ static int cmx270_device_ready(struct mtd_info *mtd)
147/* 147/*
148 * Main initialization routine 148 * Main initialization routine
149 */ 149 */
150static int cmx270_init(void) 150static int __init cmx270_init(void)
151{ 151{
152 struct nand_chip *this; 152 struct nand_chip *this;
153 const char *part_type; 153 const char *part_type;
@@ -261,7 +261,7 @@ module_init(cmx270_init);
261/* 261/*
262 * Clean up routine 262 * Clean up routine
263 */ 263 */
264static void cmx270_cleanup(void) 264static void __exit cmx270_cleanup(void)
265{ 265{
266 /* Release resources, unregister device */ 266 /* Release resources, unregister device */
267 nand_release(cmx270_nand_mtd); 267 nand_release(cmx270_nand_mtd);
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index e4d9ef0c965..9f87c99189a 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -1065,7 +1065,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
1065 } 1065 }
1066 1066
1067 /* 1067 /*
1068 * Now we have got to calculate how much data we have to to copy. In 1068 * Now we have got to calculate how much data we have to copy. In
1069 * case of a static volume it is fairly easy - the VID header contains 1069 * case of a static volume it is fairly easy - the VID header contains
1070 * the data size. In case of a dynamic volume it is more difficult - we 1070 * the data size. In case of a dynamic volume it is more difficult - we
1071 * have to read the contents, cut 0xFF bytes from the end and copy only 1071 * have to read the contents, cut 0xFF bytes from the end and copy only
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index c290f51dd17..1af08178def 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -570,7 +570,7 @@ void ubi_do_get_volume_info(struct ubi_device *ubi, struct ubi_volume *vol,
570 570
571/* 571/*
572 * ubi_rb_for_each_entry - walk an RB-tree. 572 * ubi_rb_for_each_entry - walk an RB-tree.
573 * @rb: a pointer to type 'struct rb_node' to to use as a loop counter 573 * @rb: a pointer to type 'struct rb_node' to use as a loop counter
574 * @pos: a pointer to RB-tree entry type to use as a loop counter 574 * @pos: a pointer to RB-tree entry type to use as a loop counter
575 * @root: RB-tree's root 575 * @root: RB-tree's root
576 * @member: the name of the 'struct rb_node' within the RB-tree entry 576 * @member: the name of the 'struct rb_node' within the RB-tree entry
diff --git a/drivers/net/arcnet/arc-rawmode.c b/drivers/net/arcnet/arc-rawmode.c
index 646dfc5f50c..8ea9c7545c1 100644
--- a/drivers/net/arcnet/arc-rawmode.c
+++ b/drivers/net/arcnet/arc-rawmode.c
@@ -123,7 +123,6 @@ static void rx(struct net_device *dev, int bufnum,
123 BUGLVL(D_SKB) arcnet_dump_skb(dev, skb, "rx"); 123 BUGLVL(D_SKB) arcnet_dump_skb(dev, skb, "rx");
124 124
125 skb->protocol = cpu_to_be16(ETH_P_ARCNET); 125 skb->protocol = cpu_to_be16(ETH_P_ARCNET);
126;
127 netif_rx(skb); 126 netif_rx(skb);
128} 127}
129 128
diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c
index 083e21094b2..66bcbbb6bab 100644
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c
@@ -149,7 +149,6 @@ static void rx(struct net_device *dev, int bufnum,
149 BUGLVL(D_SKB) arcnet_dump_skb(dev, skb, "rx"); 149 BUGLVL(D_SKB) arcnet_dump_skb(dev, skb, "rx");
150 150
151 skb->protocol = cpu_to_be16(ETH_P_ARCNET); 151 skb->protocol = cpu_to_be16(ETH_P_ARCNET);
152;
153 netif_rx(skb); 152 netif_rx(skb);
154} 153}
155 154
diff --git a/drivers/net/bnx2x_reg.h b/drivers/net/bnx2x_reg.h
index 0695be14cf9..aa76cbada5e 100644
--- a/drivers/net/bnx2x_reg.h
+++ b/drivers/net/bnx2x_reg.h
@@ -3122,7 +3122,7 @@
3122 The fields are:[4:0] - tail pointer; [10:5] - Link List size; 15:11] - 3122 The fields are:[4:0] - tail pointer; [10:5] - Link List size; 15:11] -
3123 header pointer. */ 3123 header pointer. */
3124#define TCM_REG_XX_TABLE 0x50240 3124#define TCM_REG_XX_TABLE 0x50240
3125/* [RW 4] Load value for for cfc ac credit cnt. */ 3125/* [RW 4] Load value for cfc ac credit cnt. */
3126#define TM_REG_CFC_AC_CRDCNT_VAL 0x164208 3126#define TM_REG_CFC_AC_CRDCNT_VAL 0x164208
3127/* [RW 4] Load value for cfc cld credit cnt. */ 3127/* [RW 4] Load value for cfc cld credit cnt. */
3128#define TM_REG_CFC_CLD_CRDCNT_VAL 0x164210 3128#define TM_REG_CFC_CLD_CRDCNT_VAL 0x164210
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index cea5cfe23b7..c3fa31c9f2a 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -1987,7 +1987,7 @@ void bond_3ad_unbind_slave(struct slave *slave)
1987 // find new aggregator for the related port(s) 1987 // find new aggregator for the related port(s)
1988 new_aggregator = __get_first_agg(port); 1988 new_aggregator = __get_first_agg(port);
1989 for (; new_aggregator; new_aggregator = __get_next_agg(new_aggregator)) { 1989 for (; new_aggregator; new_aggregator = __get_next_agg(new_aggregator)) {
1990 // if the new aggregator is empty, or it connected to to our port only 1990 // if the new aggregator is empty, or it is connected to our port only
1991 if (!new_aggregator->lag_ports || ((new_aggregator->lag_ports == port) && !new_aggregator->lag_ports->next_port_in_aggregator)) { 1991 if (!new_aggregator->lag_ports || ((new_aggregator->lag_ports == port) && !new_aggregator->lag_ports->next_port_in_aggregator)) {
1992 break; 1992 break;
1993 } 1993 }
diff --git a/drivers/net/e1000/e1000_hw.c b/drivers/net/e1000/e1000_hw.c
index cda6b397550..45ac225a7aa 100644
--- a/drivers/net/e1000/e1000_hw.c
+++ b/drivers/net/e1000/e1000_hw.c
@@ -3035,7 +3035,7 @@ s32 e1000_check_for_link(struct e1000_hw *hw)
3035 /* If TBI compatibility is was previously off, turn it on. For 3035 /* If TBI compatibility is was previously off, turn it on. For
3036 * compatibility with a TBI link partner, we will store bad 3036 * compatibility with a TBI link partner, we will store bad
3037 * packets. Some frames have an additional byte on the end and 3037 * packets. Some frames have an additional byte on the end and
3038 * will look like CRC errors to to the hardware. 3038 * will look like CRC errors to the hardware.
3039 */ 3039 */
3040 if (!hw->tbi_compatibility_on) { 3040 if (!hw->tbi_compatibility_on) {
3041 hw->tbi_compatibility_on = true; 3041 hw->tbi_compatibility_on = true;
diff --git a/drivers/net/gianfar_ethtool.c b/drivers/net/gianfar_ethtool.c
index 2234118eedb..6c144b525b4 100644
--- a/drivers/net/gianfar_ethtool.c
+++ b/drivers/net/gianfar_ethtool.c
@@ -293,7 +293,7 @@ static int gfar_gcoalesce(struct net_device *dev, struct ethtool_coalesce *cvals
293 rxtime = get_ictt_value(priv->rxic); 293 rxtime = get_ictt_value(priv->rxic);
294 rxcount = get_icft_value(priv->rxic); 294 rxcount = get_icft_value(priv->rxic);
295 txtime = get_ictt_value(priv->txic); 295 txtime = get_ictt_value(priv->txic);
296 txcount = get_icft_value(priv->txic);; 296 txcount = get_icft_value(priv->txic);
297 cvals->rx_coalesce_usecs = gfar_ticks2usecs(priv, rxtime); 297 cvals->rx_coalesce_usecs = gfar_ticks2usecs(priv, rxtime);
298 cvals->rx_max_coalesced_frames = rxcount; 298 cvals->rx_max_coalesced_frames = rxcount;
299 299
diff --git a/drivers/net/ibm_newemac/core.c b/drivers/net/ibm_newemac/core.c
index 1d7d7fef414..89c82c5e63e 100644
--- a/drivers/net/ibm_newemac/core.c
+++ b/drivers/net/ibm_newemac/core.c
@@ -2556,13 +2556,13 @@ static int __devinit emac_init_config(struct emac_instance *dev)
2556 if (emac_read_uint_prop(np, "mdio-device", &dev->mdio_ph, 0)) 2556 if (emac_read_uint_prop(np, "mdio-device", &dev->mdio_ph, 0))
2557 dev->mdio_ph = 0; 2557 dev->mdio_ph = 0;
2558 if (emac_read_uint_prop(np, "zmii-device", &dev->zmii_ph, 0)) 2558 if (emac_read_uint_prop(np, "zmii-device", &dev->zmii_ph, 0))
2559 dev->zmii_ph = 0;; 2559 dev->zmii_ph = 0;
2560 if (emac_read_uint_prop(np, "zmii-channel", &dev->zmii_port, 0)) 2560 if (emac_read_uint_prop(np, "zmii-channel", &dev->zmii_port, 0))
2561 dev->zmii_port = 0xffffffff;; 2561 dev->zmii_port = 0xffffffff;
2562 if (emac_read_uint_prop(np, "rgmii-device", &dev->rgmii_ph, 0)) 2562 if (emac_read_uint_prop(np, "rgmii-device", &dev->rgmii_ph, 0))
2563 dev->rgmii_ph = 0;; 2563 dev->rgmii_ph = 0;
2564 if (emac_read_uint_prop(np, "rgmii-channel", &dev->rgmii_port, 0)) 2564 if (emac_read_uint_prop(np, "rgmii-channel", &dev->rgmii_port, 0))
2565 dev->rgmii_port = 0xffffffff;; 2565 dev->rgmii_port = 0xffffffff;
2566 if (emac_read_uint_prop(np, "fifo-entry-size", &dev->fifo_entry_size, 0)) 2566 if (emac_read_uint_prop(np, "fifo-entry-size", &dev->fifo_entry_size, 0))
2567 dev->fifo_entry_size = 16; 2567 dev->fifo_entry_size = 16;
2568 if (emac_read_uint_prop(np, "mal-burst-size", &dev->mal_burst_size, 0)) 2568 if (emac_read_uint_prop(np, "mal-burst-size", &dev->mal_burst_size, 0))
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index d2639c4a086..5d6c1530a8c 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -3966,7 +3966,7 @@ static int igb_set_vf_multicasts(struct igb_adapter *adapter,
3966 /* VFs are limited to using the MTA hash table for their multicast 3966 /* VFs are limited to using the MTA hash table for their multicast
3967 * addresses */ 3967 * addresses */
3968 for (i = 0; i < n; i++) 3968 for (i = 0; i < n; i++)
3969 vf_data->vf_mc_hashes[i] = hash_list[i];; 3969 vf_data->vf_mc_hashes[i] = hash_list[i];
3970 3970
3971 /* Flush and reset the mta with the new values */ 3971 /* Flush and reset the mta with the new values */
3972 igb_set_rx_mode(adapter->netdev); 3972 igb_set_rx_mode(adapter->netdev);
diff --git a/drivers/net/ll_temac_main.c b/drivers/net/ll_temac_main.c
index da8d0a0ca94..f2a197fd47a 100644
--- a/drivers/net/ll_temac_main.c
+++ b/drivers/net/ll_temac_main.c
@@ -865,7 +865,7 @@ temac_of_probe(struct of_device *op, const struct of_device_id *match)
865 dcrs = dcr_resource_start(np, 0); 865 dcrs = dcr_resource_start(np, 0);
866 if (dcrs == 0) { 866 if (dcrs == 0) {
867 dev_err(&op->dev, "could not get DMA register address\n"); 867 dev_err(&op->dev, "could not get DMA register address\n");
868 goto nodev;; 868 goto nodev;
869 } 869 }
870 lp->sdma_dcrs = dcr_map(np, dcrs, dcr_resource_len(np, 0)); 870 lp->sdma_dcrs = dcr_map(np, dcrs, dcr_resource_len(np, 0));
871 dev_dbg(&op->dev, "DCR base: %x\n", dcrs); 871 dev_dbg(&op->dev, "DCR base: %x\n", dcrs);
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index fb65b427c69..1d0d4d9ab62 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -241,7 +241,7 @@ static int macb_mii_init(struct macb *bp)
241 struct eth_platform_data *pdata; 241 struct eth_platform_data *pdata;
242 int err = -ENXIO, i; 242 int err = -ENXIO, i;
243 243
244 /* Enable managment port */ 244 /* Enable management port */
245 macb_writel(bp, NCR, MACB_BIT(MPE)); 245 macb_writel(bp, NCR, MACB_BIT(MPE));
246 246
247 bp->mii_bus = mdiobus_alloc(); 247 bp->mii_bus = mdiobus_alloc();
diff --git a/drivers/net/ni52.c b/drivers/net/ni52.c
index bd0ac690d12..aad3b370c56 100644
--- a/drivers/net/ni52.c
+++ b/drivers/net/ni52.c
@@ -615,10 +615,10 @@ static int init586(struct net_device *dev)
615 /* addr_len |!src_insert |pre-len |loopback */ 615 /* addr_len |!src_insert |pre-len |loopback */
616 writeb(0x2e, &cfg_cmd->adr_len); 616 writeb(0x2e, &cfg_cmd->adr_len);
617 writeb(0x00, &cfg_cmd->priority); 617 writeb(0x00, &cfg_cmd->priority);
618 writeb(0x60, &cfg_cmd->ifs);; 618 writeb(0x60, &cfg_cmd->ifs);
619 writeb(0x00, &cfg_cmd->time_low); 619 writeb(0x00, &cfg_cmd->time_low);
620 writeb(0xf2, &cfg_cmd->time_high); 620 writeb(0xf2, &cfg_cmd->time_high);
621 writeb(0x00, &cfg_cmd->promisc);; 621 writeb(0x00, &cfg_cmd->promisc);
622 if (dev->flags & IFF_ALLMULTI) { 622 if (dev->flags & IFF_ALLMULTI) {
623 int len = ((char __iomem *)p->iscp - (char __iomem *)ptr - 8) / 6; 623 int len = ((char __iomem *)p->iscp - (char __iomem *)ptr - 8) / 6;
624 if (num_addrs > len) { 624 if (num_addrs > len) {
diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index 22052925782..7783c5db81d 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -2630,7 +2630,7 @@ static int ql_start_rx_ring(struct ql_adapter *qdev, struct rx_ring *rx_ring)
2630 FLAGS_LI; /* Load irq delay values */ 2630 FLAGS_LI; /* Load irq delay values */
2631 if (rx_ring->lbq_len) { 2631 if (rx_ring->lbq_len) {
2632 cqicb->flags |= FLAGS_LL; /* Load lbq values */ 2632 cqicb->flags |= FLAGS_LL; /* Load lbq values */
2633 tmp = (u64)rx_ring->lbq_base_dma;; 2633 tmp = (u64)rx_ring->lbq_base_dma;
2634 base_indirect_ptr = (__le64 *) rx_ring->lbq_base_indirect; 2634 base_indirect_ptr = (__le64 *) rx_ring->lbq_base_indirect;
2635 page_entries = 0; 2635 page_entries = 0;
2636 do { 2636 do {
@@ -2654,7 +2654,7 @@ static int ql_start_rx_ring(struct ql_adapter *qdev, struct rx_ring *rx_ring)
2654 } 2654 }
2655 if (rx_ring->sbq_len) { 2655 if (rx_ring->sbq_len) {
2656 cqicb->flags |= FLAGS_LS; /* Load sbq values */ 2656 cqicb->flags |= FLAGS_LS; /* Load sbq values */
2657 tmp = (u64)rx_ring->sbq_base_dma;; 2657 tmp = (u64)rx_ring->sbq_base_dma;
2658 base_indirect_ptr = (__le64 *) rx_ring->sbq_base_indirect; 2658 base_indirect_ptr = (__le64 *) rx_ring->sbq_base_indirect;
2659 page_entries = 0; 2659 page_entries = 0;
2660 do { 2660 do {
diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index bc98e7f69ee..ede937ee50c 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -72,7 +72,7 @@ static int rionet_check = 0;
72static int rionet_capable = 1; 72static int rionet_capable = 1;
73 73
74/* 74/*
75 * This is a fast lookup table for for translating TX 75 * This is a fast lookup table for translating TX
76 * Ethernet packets into a destination RIO device. It 76 * Ethernet packets into a destination RIO device. It
77 * could be made into a hash table to save memory depending 77 * could be made into a hash table to save memory depending
78 * on system trade-offs. 78 * on system trade-offs.
diff --git a/drivers/net/skfp/pcmplc.c b/drivers/net/skfp/pcmplc.c
index f1df2ec8ad4..e6b33ee05ed 100644
--- a/drivers/net/skfp/pcmplc.c
+++ b/drivers/net/skfp/pcmplc.c
@@ -960,7 +960,7 @@ static void pcm_fsm(struct s_smc *smc, struct s_phy *phy, int cmd)
960 /*PC88b*/ 960 /*PC88b*/
961 if (!phy->cf_join) { 961 if (!phy->cf_join) {
962 phy->cf_join = TRUE ; 962 phy->cf_join = TRUE ;
963 queue_event(smc,EVENT_CFM,CF_JOIN+np) ; ; 963 queue_event(smc,EVENT_CFM,CF_JOIN+np) ;
964 } 964 }
965 if (cmd == PC_JOIN) 965 if (cmd == PC_JOIN)
966 GO_STATE(PC8_ACTIVE) ; 966 GO_STATE(PC8_ACTIVE) ;
diff --git a/drivers/net/skfp/pmf.c b/drivers/net/skfp/pmf.c
index 79e665e0853..a320fdb3727 100644
--- a/drivers/net/skfp/pmf.c
+++ b/drivers/net/skfp/pmf.c
@@ -807,9 +807,9 @@ void smt_add_para(struct s_smc *smc, struct s_pcon *pcon, u_short para,
807 mib_p->fddiPORTLerFlag ; 807 mib_p->fddiPORTLerFlag ;
808 sp->p4050_pad = 0 ; 808 sp->p4050_pad = 0 ;
809 sp->p4050_cutoff = 809 sp->p4050_cutoff =
810 mib_p->fddiPORTLer_Cutoff ; ; 810 mib_p->fddiPORTLer_Cutoff ;
811 sp->p4050_alarm = 811 sp->p4050_alarm =
812 mib_p->fddiPORTLer_Alarm ; ; 812 mib_p->fddiPORTLer_Alarm ;
813 sp->p4050_estimate = 813 sp->p4050_estimate =
814 mib_p->fddiPORTLer_Estimate ; 814 mib_p->fddiPORTLer_Estimate ;
815 sp->p4050_reject_ct = 815 sp->p4050_reject_ct =
@@ -829,7 +829,7 @@ void smt_add_para(struct s_smc *smc, struct s_pcon *pcon, u_short para,
829 sp->p4051_porttype = 829 sp->p4051_porttype =
830 mib_p->fddiPORTMy_Type ; 830 mib_p->fddiPORTMy_Type ;
831 sp->p4051_connectstate = 831 sp->p4051_connectstate =
832 mib_p->fddiPORTConnectState ; ; 832 mib_p->fddiPORTConnectState ;
833 sp->p4051_pc_neighbor = 833 sp->p4051_pc_neighbor =
834 mib_p->fddiPORTNeighborType ; 834 mib_p->fddiPORTNeighborType ;
835 sp->p4051_pc_withhold = 835 sp->p4051_pc_withhold =
@@ -853,7 +853,7 @@ void smt_add_para(struct s_smc *smc, struct s_pcon *pcon, u_short para,
853 struct smt_p_4053 *sp ; 853 struct smt_p_4053 *sp ;
854 sp = (struct smt_p_4053 *) to ; 854 sp = (struct smt_p_4053 *) to ;
855 sp->p4053_multiple = 855 sp->p4053_multiple =
856 mib_p->fddiPORTMultiple_P ; ; 856 mib_p->fddiPORTMultiple_P ;
857 sp->p4053_availablepaths = 857 sp->p4053_availablepaths =
858 mib_p->fddiPORTAvailablePaths ; 858 mib_p->fddiPORTAvailablePaths ;
859 sp->p4053_currentpath = 859 sp->p4053_currentpath =
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index 62e852e21ab..55bad408196 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -215,7 +215,7 @@ static void skge_wol_init(struct skge_port *skge)
215 if (skge->wol & WAKE_MAGIC) 215 if (skge->wol & WAKE_MAGIC)
216 ctrl |= WOL_CTL_ENA_PME_ON_MAGIC_PKT|WOL_CTL_ENA_MAGIC_PKT_UNIT; 216 ctrl |= WOL_CTL_ENA_PME_ON_MAGIC_PKT|WOL_CTL_ENA_MAGIC_PKT_UNIT;
217 else 217 else
218 ctrl |= WOL_CTL_DIS_PME_ON_MAGIC_PKT|WOL_CTL_DIS_MAGIC_PKT_UNIT;; 218 ctrl |= WOL_CTL_DIS_PME_ON_MAGIC_PKT|WOL_CTL_DIS_MAGIC_PKT_UNIT;
219 219
220 ctrl |= WOL_CTL_DIS_PME_ON_PATTERN|WOL_CTL_DIS_PATTERN_UNIT; 220 ctrl |= WOL_CTL_DIS_PME_ON_PATTERN|WOL_CTL_DIS_PATTERN_UNIT;
221 skge_write16(hw, WOL_REGS(port, WOL_CTRL_STAT), ctrl); 221 skge_write16(hw, WOL_REGS(port, WOL_CTRL_STAT), ctrl);
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 4bb52e9cd37..15140f9f2e9 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -765,7 +765,7 @@ static void sky2_wol_init(struct sky2_port *sky2)
765 if (sky2->wol & WAKE_MAGIC) 765 if (sky2->wol & WAKE_MAGIC)
766 ctrl |= WOL_CTL_ENA_PME_ON_MAGIC_PKT|WOL_CTL_ENA_MAGIC_PKT_UNIT; 766 ctrl |= WOL_CTL_ENA_PME_ON_MAGIC_PKT|WOL_CTL_ENA_MAGIC_PKT_UNIT;
767 else 767 else
768 ctrl |= WOL_CTL_DIS_PME_ON_MAGIC_PKT|WOL_CTL_DIS_MAGIC_PKT_UNIT;; 768 ctrl |= WOL_CTL_DIS_PME_ON_MAGIC_PKT|WOL_CTL_DIS_MAGIC_PKT_UNIT;
769 769
770 ctrl |= WOL_CTL_DIS_PME_ON_PATTERN|WOL_CTL_DIS_PATTERN_UNIT; 770 ctrl |= WOL_CTL_DIS_PME_ON_PATTERN|WOL_CTL_DIS_PATTERN_UNIT;
771 sky2_write16(hw, WOL_REGS(port, WOL_CTRL_STAT), ctrl); 771 sky2_write16(hw, WOL_REGS(port, WOL_CTRL_STAT), ctrl);
diff --git a/drivers/net/vxge/vxge-config.h b/drivers/net/vxge/vxge-config.h
index 62779a520ca..3e94f0ce090 100644
--- a/drivers/net/vxge/vxge-config.h
+++ b/drivers/net/vxge/vxge-config.h
@@ -1541,7 +1541,7 @@ void vxge_hw_ring_rxd_1b_info_get(
1541 rxd_info->l4_cksum_valid = 1541 rxd_info->l4_cksum_valid =
1542 (u32)VXGE_HW_RING_RXD_L4_CKSUM_CORRECT_GET(rxdp->control_0); 1542 (u32)VXGE_HW_RING_RXD_L4_CKSUM_CORRECT_GET(rxdp->control_0);
1543 rxd_info->l4_cksum = 1543 rxd_info->l4_cksum =
1544 (u32)VXGE_HW_RING_RXD_L4_CKSUM_GET(rxdp->control_0);; 1544 (u32)VXGE_HW_RING_RXD_L4_CKSUM_GET(rxdp->control_0);
1545 rxd_info->frame = 1545 rxd_info->frame =
1546 (u32)VXGE_HW_RING_RXD_ETHER_ENCAP_GET(rxdp->control_0); 1546 (u32)VXGE_HW_RING_RXD_ETHER_ENCAP_GET(rxdp->control_0);
1547 rxd_info->proto = 1547 rxd_info->proto =
diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c
index b378037a29b..068d7a9d3e3 100644
--- a/drivers/net/vxge/vxge-main.c
+++ b/drivers/net/vxge/vxge-main.c
@@ -2350,7 +2350,7 @@ static int vxge_enable_msix(struct vxgedev *vdev)
2350 enum vxge_hw_status status; 2350 enum vxge_hw_status status;
2351 /* 0 - Tx, 1 - Rx */ 2351 /* 0 - Tx, 1 - Rx */
2352 int tim_msix_id[4]; 2352 int tim_msix_id[4];
2353 int alarm_msix_id = 0, msix_intr_vect = 0;; 2353 int alarm_msix_id = 0, msix_intr_vect = 0;
2354 vdev->intr_cnt = 0; 2354 vdev->intr_cnt = 0;
2355 2355
2356 /* allocate msix vectors */ 2356 /* allocate msix vectors */
diff --git a/drivers/net/wireless/ath/ath5k/reg.h b/drivers/net/wireless/ath/ath5k/reg.h
index debad07d990..c63ea6afd96 100644
--- a/drivers/net/wireless/ath/ath5k/reg.h
+++ b/drivers/net/wireless/ath/ath5k/reg.h
@@ -982,7 +982,7 @@
982#define AR5K_5414_CBCFG_BUF_DIS 0x10 /* Disable buffer */ 982#define AR5K_5414_CBCFG_BUF_DIS 0x10 /* Disable buffer */
983 983
984/* 984/*
985 * PCI-E Power managment configuration 985 * PCI-E Power management configuration
986 * and status register [5424+] 986 * and status register [5424+]
987 */ 987 */
988#define AR5K_PCIE_PM_CTL 0x4068 /* Register address */ 988#define AR5K_PCIE_PM_CTL 0x4068 /* Register address */
diff --git a/drivers/net/wireless/atmel.c b/drivers/net/wireless/atmel.c
index a3b36b3a9d6..cce188837d1 100644
--- a/drivers/net/wireless/atmel.c
+++ b/drivers/net/wireless/atmel.c
@@ -3330,7 +3330,7 @@ static void atmel_smooth_qual(struct atmel_private *priv)
3330 priv->wstats.qual.updated &= ~IW_QUAL_QUAL_INVALID; 3330 priv->wstats.qual.updated &= ~IW_QUAL_QUAL_INVALID;
3331} 3331}
3332 3332
3333/* deals with incoming managment frames. */ 3333/* deals with incoming management frames. */
3334static void atmel_management_frame(struct atmel_private *priv, 3334static void atmel_management_frame(struct atmel_private *priv,
3335 struct ieee80211_hdr *header, 3335 struct ieee80211_hdr *header,
3336 u16 frame_len, u8 rssi) 3336 u16 frame_len, u8 rssi)
diff --git a/drivers/net/wireless/zd1211rw/zd_chip.c b/drivers/net/wireless/zd1211rw/zd_chip.c
index 5e110a2328a..4e79a980013 100644
--- a/drivers/net/wireless/zd1211rw/zd_chip.c
+++ b/drivers/net/wireless/zd1211rw/zd_chip.c
@@ -368,7 +368,7 @@ error:
368 return r; 368 return r;
369} 369}
370 370
371/* MAC address: if custom mac addresses are to to be used CR_MAC_ADDR_P1 and 371/* MAC address: if custom mac addresses are to be used CR_MAC_ADDR_P1 and
372 * CR_MAC_ADDR_P2 must be overwritten 372 * CR_MAC_ADDR_P2 must be overwritten
373 */ 373 */
374int zd_write_mac_addr(struct zd_chip *chip, const u8 *mac_addr) 374int zd_write_mac_addr(struct zd_chip *chip, const u8 *mac_addr)
diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c
index b7e4cee2426..2766a6d3c2e 100644
--- a/drivers/oprofile/oprofilefs.c
+++ b/drivers/oprofile/oprofilefs.c
@@ -35,7 +35,7 @@ static struct inode *oprofilefs_get_inode(struct super_block *sb, int mode)
35} 35}
36 36
37 37
38static struct super_operations s_ops = { 38static const struct super_operations s_ops = {
39 .statfs = simple_statfs, 39 .statfs = simple_statfs,
40 .drop_inode = generic_delete_inode, 40 .drop_inode = generic_delete_inode,
41}; 41};
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index a45b0c0d574..a6b4a5a53d4 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -1266,7 +1266,7 @@ ccio_ioc_init(struct ioc *ioc)
1266 ** Hot-Plug/Removal of PCI cards. (aka PCI OLARD). 1266 ** Hot-Plug/Removal of PCI cards. (aka PCI OLARD).
1267 */ 1267 */
1268 1268
1269 iova_space_size = (u32) (num_physpages / count_parisc_driver(&ccio_driver)); 1269 iova_space_size = (u32) (totalram_pages / count_parisc_driver(&ccio_driver));
1270 1270
1271 /* limit IOVA space size to 1MB-1GB */ 1271 /* limit IOVA space size to 1MB-1GB */
1272 1272
@@ -1305,7 +1305,7 @@ ccio_ioc_init(struct ioc *ioc)
1305 1305
1306 DBG_INIT("%s() hpa 0x%p mem %luMB IOV %dMB (%d bits)\n", 1306 DBG_INIT("%s() hpa 0x%p mem %luMB IOV %dMB (%d bits)\n",
1307 __func__, ioc->ioc_regs, 1307 __func__, ioc->ioc_regs,
1308 (unsigned long) num_physpages >> (20 - PAGE_SHIFT), 1308 (unsigned long) totalram_pages >> (20 - PAGE_SHIFT),
1309 iova_space_size>>20, 1309 iova_space_size>>20,
1310 iov_order + PAGE_SHIFT); 1310 iov_order + PAGE_SHIFT);
1311 1311
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
index 123d8fe3427..57a6d19eba4 100644
--- a/drivers/parisc/sba_iommu.c
+++ b/drivers/parisc/sba_iommu.c
@@ -1390,7 +1390,7 @@ sba_ioc_init(struct parisc_device *sba, struct ioc *ioc, int ioc_num)
1390 ** for DMA hints - ergo only 30 bits max. 1390 ** for DMA hints - ergo only 30 bits max.
1391 */ 1391 */
1392 1392
1393 iova_space_size = (u32) (num_physpages/global_ioc_cnt); 1393 iova_space_size = (u32) (totalram_pages/global_ioc_cnt);
1394 1394
1395 /* limit IOVA space size to 1MB-1GB */ 1395 /* limit IOVA space size to 1MB-1GB */
1396 if (iova_space_size < (1 << (20 - PAGE_SHIFT))) { 1396 if (iova_space_size < (1 << (20 - PAGE_SHIFT))) {
@@ -1415,7 +1415,7 @@ sba_ioc_init(struct parisc_device *sba, struct ioc *ioc, int ioc_num)
1415 DBG_INIT("%s() hpa 0x%lx mem %ldMB IOV %dMB (%d bits)\n", 1415 DBG_INIT("%s() hpa 0x%lx mem %ldMB IOV %dMB (%d bits)\n",
1416 __func__, 1416 __func__,
1417 ioc->ioc_hpa, 1417 ioc->ioc_hpa,
1418 (unsigned long) num_physpages >> (20 - PAGE_SHIFT), 1418 (unsigned long) totalram_pages >> (20 - PAGE_SHIFT),
1419 iova_space_size>>20, 1419 iova_space_size>>20,
1420 iov_order + PAGE_SHIFT); 1420 iov_order + PAGE_SHIFT);
1421 1421
diff --git a/drivers/pcmcia/pcmcia_ioctl.c b/drivers/pcmcia/pcmcia_ioctl.c
index 7b424e0b044..32c44040c1e 100644
--- a/drivers/pcmcia/pcmcia_ioctl.c
+++ b/drivers/pcmcia/pcmcia_ioctl.c
@@ -27,6 +27,7 @@
27#include <linux/proc_fs.h> 27#include <linux/proc_fs.h>
28#include <linux/poll.h> 28#include <linux/poll.h>
29#include <linux/pci.h> 29#include <linux/pci.h>
30#include <linux/seq_file.h>
30#include <linux/smp_lock.h> 31#include <linux/smp_lock.h>
31#include <linux/workqueue.h> 32#include <linux/workqueue.h>
32 33
@@ -105,37 +106,40 @@ static struct pcmcia_driver *get_pcmcia_driver(dev_info_t *dev_info)
105#ifdef CONFIG_PROC_FS 106#ifdef CONFIG_PROC_FS
106static struct proc_dir_entry *proc_pccard = NULL; 107static struct proc_dir_entry *proc_pccard = NULL;
107 108
108static int proc_read_drivers_callback(struct device_driver *driver, void *d) 109static int proc_read_drivers_callback(struct device_driver *driver, void *_m)
109{ 110{
110 char **p = d; 111 struct seq_file *m = _m;
111 struct pcmcia_driver *p_drv = container_of(driver, 112 struct pcmcia_driver *p_drv = container_of(driver,
112 struct pcmcia_driver, drv); 113 struct pcmcia_driver, drv);
113 114
114 *p += sprintf(*p, "%-24.24s 1 %d\n", p_drv->drv.name, 115 seq_printf(m, "%-24.24s 1 %d\n", p_drv->drv.name,
115#ifdef CONFIG_MODULE_UNLOAD 116#ifdef CONFIG_MODULE_UNLOAD
116 (p_drv->owner) ? module_refcount(p_drv->owner) : 1 117 (p_drv->owner) ? module_refcount(p_drv->owner) : 1
117#else 118#else
118 1 119 1
119#endif 120#endif
120 ); 121 );
121 d = (void *) p;
122
123 return 0; 122 return 0;
124} 123}
125 124
126static int proc_read_drivers(char *buf, char **start, off_t pos, 125static int pccard_drivers_proc_show(struct seq_file *m, void *v)
127 int count, int *eof, void *data)
128{ 126{
129 char *p = buf; 127 return bus_for_each_drv(&pcmcia_bus_type, NULL,
130 int rc; 128 m, proc_read_drivers_callback);
131 129}
132 rc = bus_for_each_drv(&pcmcia_bus_type, NULL,
133 (void *) &p, proc_read_drivers_callback);
134 if (rc < 0)
135 return rc;
136 130
137 return (p - buf); 131static int pccard_drivers_proc_open(struct inode *inode, struct file *file)
132{
133 return single_open(file, pccard_drivers_proc_show, NULL);
138} 134}
135
136static const struct file_operations pccard_drivers_proc_fops = {
137 .owner = THIS_MODULE,
138 .open = pccard_drivers_proc_open,
139 .read = seq_read,
140 .llseek = seq_lseek,
141 .release = single_release,
142};
139#endif 143#endif
140 144
141 145
@@ -1011,7 +1015,7 @@ void __init pcmcia_setup_ioctl(void) {
1011#ifdef CONFIG_PROC_FS 1015#ifdef CONFIG_PROC_FS
1012 proc_pccard = proc_mkdir("bus/pccard", NULL); 1016 proc_pccard = proc_mkdir("bus/pccard", NULL);
1013 if (proc_pccard) 1017 if (proc_pccard)
1014 create_proc_read_entry("drivers",0,proc_pccard,proc_read_drivers,NULL); 1018 proc_create("drivers", 0, proc_pccard, &pccard_drivers_proc_fops);
1015#endif 1019#endif
1016} 1020}
1017 1021
diff --git a/drivers/pcmcia/sa1100_jornada720.c b/drivers/pcmcia/sa1100_jornada720.c
index 57ca085473d..7eedb42f800 100644
--- a/drivers/pcmcia/sa1100_jornada720.c
+++ b/drivers/pcmcia/sa1100_jornada720.c
@@ -16,89 +16,103 @@
16 16
17#include "sa1111_generic.h" 17#include "sa1111_generic.h"
18 18
19#define SOCKET0_POWER GPIO_GPIO0 19/* Does SOCKET1_3V actually do anything? */
20#define SOCKET0_3V GPIO_GPIO2 20#define SOCKET0_POWER GPIO_GPIO0
21#define SOCKET1_POWER (GPIO_GPIO1 | GPIO_GPIO3) 21#define SOCKET0_3V GPIO_GPIO2
22#warning *** Does SOCKET1_3V actually do anything? 22#define SOCKET1_POWER (GPIO_GPIO1 | GPIO_GPIO3)
23#define SOCKET1_3V GPIO_GPIO3 23#define SOCKET1_3V GPIO_GPIO3
24 24
25static int jornada720_pcmcia_hw_init(struct soc_pcmcia_socket *skt) 25static int jornada720_pcmcia_hw_init(struct soc_pcmcia_socket *skt)
26{ 26{
27 /* 27 unsigned int pin = GPIO_A0 | GPIO_A1 | GPIO_A2 | GPIO_A3;
28 * What is all this crap for? 28
29 */ 29 /*
30 GRER |= 0x00000002; 30 * What is all this crap for?
31 /* Set GPIO_A<3:1> to be outputs for PCMCIA/CF power controller: */ 31 */
32 sa1111_set_io_dir(SA1111_DEV(skt->dev), GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0, 0); 32 GRER |= 0x00000002;
33 sa1111_set_io(SA1111_DEV(skt->dev), GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); 33 /* Set GPIO_A<3:1> to be outputs for PCMCIA/CF power controller: */
34 sa1111_set_sleep_io(SA1111_DEV(skt->dev), GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); 34 sa1111_set_io_dir(SA1111_DEV(skt->dev), pin, 0, 0);
35 35 sa1111_set_io(SA1111_DEV(skt->dev), pin, 0);
36 return sa1111_pcmcia_hw_init(skt); 36 sa1111_set_sleep_io(SA1111_DEV(skt->dev), pin, 0);
37
38 return sa1111_pcmcia_hw_init(skt);
37} 39}
38 40
39static int 41static int
40jornada720_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_state_t *state) 42jornada720_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_state_t *state)
41{ 43{
42 unsigned int pa_dwr_mask, pa_dwr_set; 44 unsigned int pa_dwr_mask, pa_dwr_set;
43 int ret; 45 int ret;
44 46
45printk("%s(): config socket %d vcc %d vpp %d\n", __func__, 47 printk(KERN_INFO "%s(): config socket %d vcc %d vpp %d\n", __func__,
46 skt->nr, state->Vcc, state->Vpp); 48 skt->nr, state->Vcc, state->Vpp);
47 49
48 switch (skt->nr) { 50 switch (skt->nr) {
49 case 0: 51 case 0:
50 pa_dwr_mask = SOCKET0_POWER | SOCKET0_3V; 52 pa_dwr_mask = SOCKET0_POWER | SOCKET0_3V;
51 53
52 switch (state->Vcc) { 54 switch (state->Vcc) {
53 default: 55 default:
54 case 0: pa_dwr_set = 0; break; 56 case 0:
55 case 33: pa_dwr_set = SOCKET0_POWER | SOCKET0_3V; break; 57 pa_dwr_set = 0;
56 case 50: pa_dwr_set = SOCKET0_POWER; break; 58 break;
57 } 59 case 33:
58 break; 60 pa_dwr_set = SOCKET0_POWER | SOCKET0_3V;
59 61 break;
60 case 1: 62 case 50:
61 pa_dwr_mask = SOCKET1_POWER; 63 pa_dwr_set = SOCKET0_POWER;
62 64 break;
63 switch (state->Vcc) { 65 }
64 default: 66 break;
65 case 0: pa_dwr_set = 0; break; 67
66 case 33: pa_dwr_set = SOCKET1_POWER; break; 68 case 1:
67 case 50: pa_dwr_set = SOCKET1_POWER; break; 69 pa_dwr_mask = SOCKET1_POWER;
68 } 70
69 break; 71 switch (state->Vcc) {
70 72 default:
71 default: 73 case 0:
72 return -1; 74 pa_dwr_set = 0;
73 } 75 break;
74 76 case 33:
75 if (state->Vpp != state->Vcc && state->Vpp != 0) { 77 pa_dwr_set = SOCKET1_POWER;
76 printk(KERN_ERR "%s(): slot cannot support VPP %u\n", 78 break;
77 __func__, state->Vpp); 79 case 50:
78 return -1; 80 pa_dwr_set = SOCKET1_POWER;
79 } 81 break;
80 82 }
81 ret = sa1111_pcmcia_configure_socket(skt, state); 83 break;
82 if (ret == 0) { 84
83 unsigned long flags; 85 default:
84 86 return -1;
85 local_irq_save(flags); 87 }
86 sa1111_set_io(SA1111_DEV(skt->dev), pa_dwr_mask, pa_dwr_set); 88
87 local_irq_restore(flags); 89 if (state->Vpp != state->Vcc && state->Vpp != 0) {
88 } 90 printk(KERN_ERR "%s(): slot cannot support VPP %u\n",
89 91 __func__, state->Vpp);
90 return ret; 92 return -EPERM;
93 }
94
95 ret = sa1111_pcmcia_configure_socket(skt, state);
96 if (ret == 0) {
97 unsigned long flags;
98
99 local_irq_save(flags);
100 sa1111_set_io(SA1111_DEV(skt->dev), pa_dwr_mask, pa_dwr_set);
101 local_irq_restore(flags);
102 }
103
104 return ret;
91} 105}
92 106
93static struct pcmcia_low_level jornada720_pcmcia_ops = { 107static struct pcmcia_low_level jornada720_pcmcia_ops = {
94 .owner = THIS_MODULE, 108 .owner = THIS_MODULE,
95 .hw_init = jornada720_pcmcia_hw_init, 109 .hw_init = jornada720_pcmcia_hw_init,
96 .hw_shutdown = sa1111_pcmcia_hw_shutdown, 110 .hw_shutdown = sa1111_pcmcia_hw_shutdown,
97 .socket_state = sa1111_pcmcia_socket_state, 111 .socket_state = sa1111_pcmcia_socket_state,
98 .configure_socket = jornada720_pcmcia_configure_socket, 112 .configure_socket = jornada720_pcmcia_configure_socket,
99 113
100 .socket_init = sa1111_pcmcia_socket_init, 114 .socket_init = sa1111_pcmcia_socket_init,
101 .socket_suspend = sa1111_pcmcia_socket_suspend, 115 .socket_suspend = sa1111_pcmcia_socket_suspend,
102}; 116};
103 117
104int __devinit pcmcia_jornada720_init(struct device *dev) 118int __devinit pcmcia_jornada720_init(struct device *dev)
diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c
index 737fe5d87c4..b459e87a30a 100644
--- a/drivers/pcmcia/yenta_socket.c
+++ b/drivers/pcmcia/yenta_socket.c
@@ -717,7 +717,7 @@ static void yenta_free_resources(struct yenta_socket *socket)
717/* 717/*
718 * Close it down - release our resources and go home.. 718 * Close it down - release our resources and go home..
719 */ 719 */
720static void yenta_close(struct pci_dev *dev) 720static void __devexit yenta_close(struct pci_dev *dev)
721{ 721{
722 struct yenta_socket *sock = pci_get_drvdata(dev); 722 struct yenta_socket *sock = pci_get_drvdata(dev);
723 723
diff --git a/drivers/pnp/driver.c b/drivers/pnp/driver.c
index 527ee764c93..cd11b113494 100644
--- a/drivers/pnp/driver.c
+++ b/drivers/pnp/driver.c
@@ -135,6 +135,15 @@ static int pnp_device_remove(struct device *dev)
135 return 0; 135 return 0;
136} 136}
137 137
138static void pnp_device_shutdown(struct device *dev)
139{
140 struct pnp_dev *pnp_dev = to_pnp_dev(dev);
141 struct pnp_driver *drv = pnp_dev->driver;
142
143 if (drv && drv->shutdown)
144 drv->shutdown(pnp_dev);
145}
146
138static int pnp_bus_match(struct device *dev, struct device_driver *drv) 147static int pnp_bus_match(struct device *dev, struct device_driver *drv)
139{ 148{
140 struct pnp_dev *pnp_dev = to_pnp_dev(dev); 149 struct pnp_dev *pnp_dev = to_pnp_dev(dev);
@@ -203,6 +212,7 @@ struct bus_type pnp_bus_type = {
203 .match = pnp_bus_match, 212 .match = pnp_bus_match,
204 .probe = pnp_device_probe, 213 .probe = pnp_device_probe,
205 .remove = pnp_device_remove, 214 .remove = pnp_device_remove,
215 .shutdown = pnp_device_shutdown,
206 .suspend = pnp_bus_suspend, 216 .suspend = pnp_bus_suspend,
207 .resume = pnp_bus_resume, 217 .resume = pnp_bus_resume,
208 .dev_attrs = pnp_interface_attrs, 218 .dev_attrs = pnp_interface_attrs,
diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index bd1ce8e2bc1..0587d53987f 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -430,7 +430,7 @@ fail:
430 430
431static int __exit omap_rtc_remove(struct platform_device *pdev) 431static int __exit omap_rtc_remove(struct platform_device *pdev)
432{ 432{
433 struct rtc_device *rtc = platform_get_drvdata(pdev);; 433 struct rtc_device *rtc = platform_get_drvdata(pdev);
434 434
435 device_init_wakeup(&pdev->dev, 0); 435 device_init_wakeup(&pdev->dev, 0);
436 436
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index e109da4583a..dad0449475b 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -2146,7 +2146,7 @@ static int dasd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
2146 return 0; 2146 return 0;
2147} 2147}
2148 2148
2149struct block_device_operations 2149const struct block_device_operations
2150dasd_device_operations = { 2150dasd_device_operations = {
2151 .owner = THIS_MODULE, 2151 .owner = THIS_MODULE,
2152 .open = dasd_open, 2152 .open = dasd_open,
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index a1ce573648a..bd9fe2e36dc 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -706,7 +706,7 @@ static int dasd_eckd_generate_uid(struct dasd_device *device,
706 sizeof(uid->serial) - 1); 706 sizeof(uid->serial) - 1);
707 EBCASC(uid->serial, sizeof(uid->serial) - 1); 707 EBCASC(uid->serial, sizeof(uid->serial) - 1);
708 uid->ssid = private->gneq->subsystemID; 708 uid->ssid = private->gneq->subsystemID;
709 uid->real_unit_addr = private->ned->unit_addr;; 709 uid->real_unit_addr = private->ned->unit_addr;
710 if (private->sneq) { 710 if (private->sneq) {
711 uid->type = private->sneq->sua_flags; 711 uid->type = private->sneq->sua_flags;
712 if (uid->type == UA_BASE_PAV_ALIAS) 712 if (uid->type == UA_BASE_PAV_ALIAS)
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 5e47a1ee52b..8afd9fa0087 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -540,7 +540,7 @@ dasd_check_blocksize(int bsize)
540extern debug_info_t *dasd_debug_area; 540extern debug_info_t *dasd_debug_area;
541extern struct dasd_profile_info_t dasd_global_profile; 541extern struct dasd_profile_info_t dasd_global_profile;
542extern unsigned int dasd_profile_level; 542extern unsigned int dasd_profile_level;
543extern struct block_device_operations dasd_device_operations; 543extern const struct block_device_operations dasd_device_operations;
544 544
545extern struct kmem_cache *dasd_page_cache; 545extern struct kmem_cache *dasd_page_cache;
546 546
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index d34617682a6..f76f4bd82b9 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -34,7 +34,7 @@ static int dcssblk_direct_access(struct block_device *bdev, sector_t secnum,
34static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0"; 34static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0";
35 35
36static int dcssblk_major; 36static int dcssblk_major;
37static struct block_device_operations dcssblk_devops = { 37static const struct block_device_operations dcssblk_devops = {
38 .owner = THIS_MODULE, 38 .owner = THIS_MODULE,
39 .open = dcssblk_open, 39 .open = dcssblk_open,
40 .release = dcssblk_release, 40 .release = dcssblk_release,
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index ee604e92a5f..116d1b3eeb1 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -244,7 +244,7 @@ static int xpram_getgeo(struct block_device *bdev, struct hd_geometry *geo)
244 return 0; 244 return 0;
245} 245}
246 246
247static struct block_device_operations xpram_devops = 247static const struct block_device_operations xpram_devops =
248{ 248{
249 .owner = THIS_MODULE, 249 .owner = THIS_MODULE,
250 .getgeo = xpram_getgeo, 250 .getgeo = xpram_getgeo,
diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c
index 4cb9e70507a..64f57ef2763 100644
--- a/drivers/s390/char/tape_block.c
+++ b/drivers/s390/char/tape_block.c
@@ -50,7 +50,7 @@ static int tapeblock_ioctl(struct block_device *, fmode_t, unsigned int,
50static int tapeblock_medium_changed(struct gendisk *); 50static int tapeblock_medium_changed(struct gendisk *);
51static int tapeblock_revalidate_disk(struct gendisk *); 51static int tapeblock_revalidate_disk(struct gendisk *);
52 52
53static struct block_device_operations tapeblock_fops = { 53static const struct block_device_operations tapeblock_fops = {
54 .owner = THIS_MODULE, 54 .owner = THIS_MODULE,
55 .open = tapeblock_open, 55 .open = tapeblock_open,
56 .release = tapeblock_release, 56 .release = tapeblock_release,
diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index a4b2c576144..c84eadd3602 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -2113,7 +2113,7 @@ static ssize_t remove_write (struct device_driver *drv,
2113 IUCV_DBF_TEXT(trace, 3, __func__); 2113 IUCV_DBF_TEXT(trace, 3, __func__);
2114 2114
2115 if (count >= IFNAMSIZ) 2115 if (count >= IFNAMSIZ)
2116 count = IFNAMSIZ - 1;; 2116 count = IFNAMSIZ - 1;
2117 2117
2118 for (i = 0, p = buf; i < count && *p; i++, p++) { 2118 for (i = 0, p = buf; i < count && *p; i++, p++) {
2119 if (*p == '\n' || *p == ' ') 2119 if (*p == '\n' || *p == ' ')
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index 3ff726afafc..0e1a34627a2 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -102,7 +102,7 @@ static int zfcp_scsi_queuecommand(struct scsi_cmnd *scpnt,
102 if (unlikely((status & ZFCP_STATUS_COMMON_ERP_FAILED) || 102 if (unlikely((status & ZFCP_STATUS_COMMON_ERP_FAILED) ||
103 !(status & ZFCP_STATUS_COMMON_RUNNING))) { 103 !(status & ZFCP_STATUS_COMMON_RUNNING))) {
104 zfcp_scsi_command_fail(scpnt, DID_ERROR); 104 zfcp_scsi_command_fail(scpnt, DID_ERROR);
105 return 0;; 105 return 0;
106 } 106 }
107 107
108 ret = zfcp_fsf_send_fcp_command_task(unit, scpnt); 108 ret = zfcp_fsf_send_fcp_command_task(unit, scpnt);
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index 6d465168468..869a30b49ed 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -452,7 +452,7 @@ static const struct file_operations jsf_fops = {
452 452
453static struct miscdevice jsf_dev = { JSF_MINOR, "jsflash", &jsf_fops }; 453static struct miscdevice jsf_dev = { JSF_MINOR, "jsflash", &jsf_fops };
454 454
455static struct block_device_operations jsfd_fops = { 455static const struct block_device_operations jsfd_fops = {
456 .owner = THIS_MODULE, 456 .owner = THIS_MODULE,
457}; 457};
458 458
diff --git a/drivers/scsi/aic7xxx/aic7xxx_core.c b/drivers/scsi/aic7xxx/aic7xxx_core.c
index e6f2bb7365e..8dfb59d5899 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_core.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_core.c
@@ -5223,7 +5223,7 @@ ahc_chip_init(struct ahc_softc *ahc)
5223 5223
5224 /* 5224 /*
5225 * Setup the allowed SCSI Sequences based on operational mode. 5225 * Setup the allowed SCSI Sequences based on operational mode.
5226 * If we are a target, we'll enalbe select in operations once 5226 * If we are a target, we'll enable select in operations once
5227 * we've had a lun enabled. 5227 * we've had a lun enabled.
5228 */ 5228 */
5229 scsiseq_template = ENSELO|ENAUTOATNO|ENAUTOATNP; 5229 scsiseq_template = ENSELO|ENAUTOATNO|ENAUTOATNP;
diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c
index 906cef5cda8..41e1b0e7e2e 100644
--- a/drivers/scsi/bnx2i/bnx2i_hwi.c
+++ b/drivers/scsi/bnx2i/bnx2i_hwi.c
@@ -1340,7 +1340,7 @@ static int bnx2i_process_login_resp(struct iscsi_session *session,
1340 resp_hdr->opcode = login->op_code; 1340 resp_hdr->opcode = login->op_code;
1341 resp_hdr->flags = login->response_flags; 1341 resp_hdr->flags = login->response_flags;
1342 resp_hdr->max_version = login->version_max; 1342 resp_hdr->max_version = login->version_max;
1343 resp_hdr->active_version = login->version_active;; 1343 resp_hdr->active_version = login->version_active;
1344 resp_hdr->hlength = 0; 1344 resp_hdr->hlength = 0;
1345 1345
1346 hton24(resp_hdr->dlength, login->data_length); 1346 hton24(resp_hdr->dlength, login->data_length);
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 9df7ed38e1b..9a1bd9534d7 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -1207,7 +1207,7 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
1207 vport->ct_flags &= ~FC_CT_RFF_ID; 1207 vport->ct_flags &= ~FC_CT_RFF_ID;
1208 CtReq->CommandResponse.bits.CmdRsp = 1208 CtReq->CommandResponse.bits.CmdRsp =
1209 be16_to_cpu(SLI_CTNS_RFF_ID); 1209 be16_to_cpu(SLI_CTNS_RFF_ID);
1210 CtReq->un.rff.PortId = cpu_to_be32(vport->fc_myDID);; 1210 CtReq->un.rff.PortId = cpu_to_be32(vport->fc_myDID);
1211 CtReq->un.rff.fbits = FC4_FEATURE_INIT; 1211 CtReq->un.rff.fbits = FC4_FEATURE_INIT;
1212 CtReq->un.rff.type_code = FC_FCP_DATA; 1212 CtReq->un.rff.type_code = FC_FCP_DATA;
1213 cmpl = lpfc_cmpl_ct_cmd_rff_id; 1213 cmpl = lpfc_cmpl_ct_cmd_rff_id;
diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c
index 7dc3d1894b1..a39addc3a59 100644
--- a/drivers/scsi/megaraid/megaraid_sas.c
+++ b/drivers/scsi/megaraid/megaraid_sas.c
@@ -718,7 +718,7 @@ megasas_build_dcdb(struct megasas_instance *instance, struct scsi_cmnd *scp,
718 * megasas_build_ldio - Prepares IOs to logical devices 718 * megasas_build_ldio - Prepares IOs to logical devices
719 * @instance: Adapter soft state 719 * @instance: Adapter soft state
720 * @scp: SCSI command 720 * @scp: SCSI command
721 * @cmd: Command to to be prepared 721 * @cmd: Command to be prepared
722 * 722 *
723 * Frames (and accompanying SGLs) for regular SCSI IOs use this function. 723 * Frames (and accompanying SGLs) for regular SCSI IOs use this function.
724 */ 724 */
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 40e3cafb3a9..83c8b5e4fc8 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -1422,7 +1422,7 @@ static void qla4xxx_slave_destroy(struct scsi_device *sdev)
1422/** 1422/**
1423 * qla4xxx_del_from_active_array - returns an active srb 1423 * qla4xxx_del_from_active_array - returns an active srb
1424 * @ha: Pointer to host adapter structure. 1424 * @ha: Pointer to host adapter structure.
1425 * @index: index into to the active_array 1425 * @index: index into the active_array
1426 * 1426 *
1427 * This routine removes and returns the srb at the specified index 1427 * This routine removes and returns the srb at the specified index
1428 **/ 1428 **/
@@ -1500,7 +1500,7 @@ static int qla4xxx_wait_for_hba_online(struct scsi_qla_host *ha)
1500 1500
1501/** 1501/**
1502 * qla4xxx_eh_wait_for_commands - wait for active cmds to finish. 1502 * qla4xxx_eh_wait_for_commands - wait for active cmds to finish.
1503 * @ha: pointer to to HBA 1503 * @ha: pointer to HBA
1504 * @t: target id 1504 * @t: target id
1505 * @l: lun id 1505 * @l: lun id
1506 * 1506 *
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index a89c421dab5..8dd96dcd716 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -956,7 +956,7 @@ static int sd_compat_ioctl(struct block_device *bdev, fmode_t mode,
956} 956}
957#endif 957#endif
958 958
959static struct block_device_operations sd_fops = { 959static const struct block_device_operations sd_fops = {
960 .owner = THIS_MODULE, 960 .owner = THIS_MODULE,
961 .open = sd_open, 961 .open = sd_open,
962 .release = sd_release, 962 .release = sd_release,
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index cce0fe4c8a3..eb61f7a70e1 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -525,7 +525,7 @@ static int sr_block_media_changed(struct gendisk *disk)
525 return cdrom_media_changed(&cd->cdi); 525 return cdrom_media_changed(&cd->cdi);
526} 526}
527 527
528static struct block_device_operations sr_bdops = 528static const struct block_device_operations sr_bdops =
529{ 529{
530 .owner = THIS_MODULE, 530 .owner = THIS_MODULE,
531 .open = sr_block_open, 531 .open = sr_block_open,
diff --git a/drivers/spi/omap_uwire.c b/drivers/spi/omap_uwire.c
index 8980a5640bd..e75ba9b2889 100644
--- a/drivers/spi/omap_uwire.c
+++ b/drivers/spi/omap_uwire.c
@@ -213,7 +213,7 @@ static int uwire_txrx(struct spi_device *spi, struct spi_transfer *t)
213 unsigned bits = ust->bits_per_word; 213 unsigned bits = ust->bits_per_word;
214 unsigned bytes; 214 unsigned bytes;
215 u16 val, w; 215 u16 val, w;
216 int status = 0;; 216 int status = 0;
217 217
218 if (!t->tx_buf && !t->rx_buf) 218 if (!t->tx_buf && !t->rx_buf)
219 return 0; 219 return 0;
diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c
index 3f3119d760d..6ba8aece90b 100644
--- a/drivers/spi/spi_s3c24xx.c
+++ b/drivers/spi/spi_s3c24xx.c
@@ -388,7 +388,7 @@ static int __init s3c24xx_spi_probe(struct platform_device *pdev)
388 388
389 err_no_iores: 389 err_no_iores:
390 err_no_pdata: 390 err_no_pdata:
391 spi_master_put(hw->master);; 391 spi_master_put(hw->master);
392 392
393 err_nomem: 393 err_nomem:
394 return err; 394 return err;
diff --git a/drivers/staging/rt2860/rtmp.h b/drivers/staging/rt2860/rtmp.h
index 3f498f6f3ff..90fd40f2473 100644
--- a/drivers/staging/rt2860/rtmp.h
+++ b/drivers/staging/rt2860/rtmp.h
@@ -2060,7 +2060,7 @@ typedef struct _STA_ADMIN_CONFIG {
2060 BOOLEAN AdhocBGJoined; // Indicate Adhoc B/G Join. 2060 BOOLEAN AdhocBGJoined; // Indicate Adhoc B/G Join.
2061 BOOLEAN Adhoc20NJoined; // Indicate Adhoc 20MHz N Join. 2061 BOOLEAN Adhoc20NJoined; // Indicate Adhoc 20MHz N Join.
2062#endif 2062#endif
2063 // New for WPA, windows want us to to keep association information and 2063 // New for WPA, windows want us to keep association information and
2064 // Fixed IEs from last association response 2064 // Fixed IEs from last association response
2065 NDIS_802_11_ASSOCIATION_INFORMATION AssocInfo; 2065 NDIS_802_11_ASSOCIATION_INFORMATION AssocInfo;
2066 USHORT ReqVarIELen; // Length of next VIE include EID & Length 2066 USHORT ReqVarIELen; // Length of next VIE include EID & Length
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index ba589d4ca8b..8c64c018b67 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -506,8 +506,6 @@ static int wdm_open(struct inode *inode, struct file *file)
506 desc = usb_get_intfdata(intf); 506 desc = usb_get_intfdata(intf);
507 if (test_bit(WDM_DISCONNECTING, &desc->flags)) 507 if (test_bit(WDM_DISCONNECTING, &desc->flags))
508 goto out; 508 goto out;
509
510 ;
511 file->private_data = desc; 509 file->private_data = desc;
512 510
513 rv = usb_autopm_get_interface(desc->intf); 511 rv = usb_autopm_get_interface(desc->intf);
diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index ffe75e83787..97b40ce133f 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -48,7 +48,6 @@
48#define USBFS_DEFAULT_BUSMODE (S_IXUGO | S_IRUGO) 48#define USBFS_DEFAULT_BUSMODE (S_IXUGO | S_IRUGO)
49#define USBFS_DEFAULT_LISTMODE S_IRUGO 49#define USBFS_DEFAULT_LISTMODE S_IRUGO
50 50
51static struct super_operations usbfs_ops;
52static const struct file_operations default_file_operations; 51static const struct file_operations default_file_operations;
53static struct vfsmount *usbfs_mount; 52static struct vfsmount *usbfs_mount;
54static int usbfs_mount_count; /* = 0 */ 53static int usbfs_mount_count; /* = 0 */
@@ -449,7 +448,7 @@ static const struct file_operations default_file_operations = {
449 .llseek = default_file_lseek, 448 .llseek = default_file_lseek,
450}; 449};
451 450
452static struct super_operations usbfs_ops = { 451static const struct super_operations usbfs_ops = {
453 .statfs = simple_statfs, 452 .statfs = simple_statfs,
454 .drop_inode = generic_delete_inode, 453 .drop_inode = generic_delete_inode,
455 .remount_fs = remount, 454 .remount_fs = remount,
diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
index 7d33f50b587..c44367fea18 100644
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -2033,7 +2033,7 @@ gadgetfs_create_file (struct super_block *sb, char const *name,
2033 return inode; 2033 return inode;
2034} 2034}
2035 2035
2036static struct super_operations gadget_fs_operations = { 2036static const struct super_operations gadget_fs_operations = {
2037 .statfs = simple_statfs, 2037 .statfs = simple_statfs,
2038 .drop_inode = generic_delete_inode, 2038 .drop_inode = generic_delete_inode,
2039}; 2039};
diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c
index c2f1b7df918..b5b83c43898 100644
--- a/drivers/usb/host/ehci-pci.c
+++ b/drivers/usb/host/ehci-pci.c
@@ -242,7 +242,7 @@ static int ehci_pci_setup(struct usb_hcd *hcd)
242 * System suspend currently expects to be able to suspend the entire 242 * System suspend currently expects to be able to suspend the entire
243 * device tree, device-at-a-time. If we failed selective suspend 243 * device tree, device-at-a-time. If we failed selective suspend
244 * reports, system suspend would fail; so the root hub code must claim 244 * reports, system suspend would fail; so the root hub code must claim
245 * success. That's lying to usbcore, and it matters for for runtime 245 * success. That's lying to usbcore, and it matters for runtime
246 * PM scenarios with selective suspend and remote wakeup... 246 * PM scenarios with selective suspend and remote wakeup...
247 */ 247 */
248 if (ehci->no_selective_suspend && device_can_wakeup(&pdev->dev)) 248 if (ehci->no_selective_suspend && device_can_wakeup(&pdev->dev))
diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
index 2bfff30f470..48b9e889a18 100644
--- a/drivers/usb/host/ehci.h
+++ b/drivers/usb/host/ehci.h
@@ -37,7 +37,7 @@ typedef __u16 __bitwise __hc16;
37#define __hc16 __le16 37#define __hc16 __le16
38#endif 38#endif
39 39
40/* statistics can be kept for for tuning/monitoring */ 40/* statistics can be kept for tuning/monitoring */
41struct ehci_stats { 41struct ehci_stats {
42 /* irq usage */ 42 /* irq usage */
43 unsigned long normal; 43 unsigned long normal;
diff --git a/drivers/usb/host/ohci-q.c b/drivers/usb/host/ohci-q.c
index c2d80f80448..16fecb8ecc3 100644
--- a/drivers/usb/host/ohci-q.c
+++ b/drivers/usb/host/ohci-q.c
@@ -418,7 +418,7 @@ static struct ed *ed_get (
418 is_out = !(ep->desc.bEndpointAddress & USB_DIR_IN); 418 is_out = !(ep->desc.bEndpointAddress & USB_DIR_IN);
419 419
420 /* FIXME usbcore changes dev->devnum before SET_ADDRESS 420 /* FIXME usbcore changes dev->devnum before SET_ADDRESS
421 * suceeds ... otherwise we wouldn't need "pipe". 421 * succeeds ... otherwise we wouldn't need "pipe".
422 */ 422 */
423 info = usb_pipedevice (pipe); 423 info = usb_pipedevice (pipe);
424 ed->type = usb_pipetype(pipe); 424 ed->type = usb_pipetype(pipe);
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index d31d32206ba..ffe1625d4e1 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1150,7 +1150,7 @@ void xhci_dbg_cmd_ptrs(struct xhci_hcd *xhci);
1150void xhci_dbg_ring_ptrs(struct xhci_hcd *xhci, struct xhci_ring *ring); 1150void xhci_dbg_ring_ptrs(struct xhci_hcd *xhci, struct xhci_ring *ring);
1151void xhci_dbg_ctx(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx, unsigned int last_ep); 1151void xhci_dbg_ctx(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx, unsigned int last_ep);
1152 1152
1153/* xHCI memory managment */ 1153/* xHCI memory management */
1154void xhci_mem_cleanup(struct xhci_hcd *xhci); 1154void xhci_mem_cleanup(struct xhci_hcd *xhci);
1155int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags); 1155int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags);
1156void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id); 1156void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id);
diff --git a/drivers/usb/serial/cypress_m8.h b/drivers/usb/serial/cypress_m8.h
index e772b01ac3a..1fd360e0406 100644
--- a/drivers/usb/serial/cypress_m8.h
+++ b/drivers/usb/serial/cypress_m8.h
@@ -57,7 +57,7 @@
57#define UART_RI 0x10 /* ring indicator - modem - device to host */ 57#define UART_RI 0x10 /* ring indicator - modem - device to host */
58#define UART_CD 0x40 /* carrier detect - modem - device to host */ 58#define UART_CD 0x40 /* carrier detect - modem - device to host */
59#define CYP_ERROR 0x08 /* received from input report - device to host */ 59#define CYP_ERROR 0x08 /* received from input report - device to host */
60/* Note - the below has nothing to to with the "feature report" reset */ 60/* Note - the below has nothing to do with the "feature report" reset */
61#define CONTROL_RESET 0x08 /* sent with output report - host to device */ 61#define CONTROL_RESET 0x08 /* sent with output report - host to device */
62 62
63/* End of RS-232 protocol definitions */ 63/* End of RS-232 protocol definitions */
diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c
index dc0f832657e..b97960ac92f 100644
--- a/drivers/usb/serial/io_edgeport.c
+++ b/drivers/usb/serial/io_edgeport.c
@@ -2540,7 +2540,7 @@ static int calc_baud_rate_divisor(int baudrate, int *divisor)
2540 2540
2541/***************************************************************************** 2541/*****************************************************************************
2542 * send_cmd_write_uart_register 2542 * send_cmd_write_uart_register
2543 * this function builds up a uart register message and sends to to the device. 2543 * this function builds up a uart register message and sends to the device.
2544 *****************************************************************************/ 2544 *****************************************************************************/
2545static int send_cmd_write_uart_register(struct edgeport_port *edge_port, 2545static int send_cmd_write_uart_register(struct edgeport_port *edge_port,
2546 __u8 regNum, __u8 regValue) 2546 __u8 regNum, __u8 regValue)
diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c
index a61673133d7..f7373371b13 100644
--- a/drivers/usb/serial/kl5kusb105.c
+++ b/drivers/usb/serial/kl5kusb105.c
@@ -38,7 +38,7 @@
38 * 0.3a - implemented pools of write URBs 38 * 0.3a - implemented pools of write URBs
39 * 0.3 - alpha version for public testing 39 * 0.3 - alpha version for public testing
40 * 0.2 - TIOCMGET works, so autopilot(1) can be used! 40 * 0.2 - TIOCMGET works, so autopilot(1) can be used!
41 * 0.1 - can be used to to pilot-xfer -p /dev/ttyUSB0 -l 41 * 0.1 - can be used to do pilot-xfer -p /dev/ttyUSB0 -l
42 * 42 *
43 * The driver skeleton is mainly based on mct_u232.c and various other 43 * The driver skeleton is mainly based on mct_u232.c and various other
44 * pieces of code shamelessly copied from the drivers/usb/serial/ directory. 44 * pieces of code shamelessly copied from the drivers/usb/serial/ directory.
diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c
index 61e7c40b94f..1e58220403d 100644
--- a/drivers/usb/serial/spcp8x5.c
+++ b/drivers/usb/serial/spcp8x5.c
@@ -544,7 +544,7 @@ static void spcp8x5_set_termios(struct tty_struct *tty,
544 } 544 }
545 545
546 /* Set Baud Rate */ 546 /* Set Baud Rate */
547 baud = tty_get_baud_rate(tty);; 547 baud = tty_get_baud_rate(tty);
548 switch (baud) { 548 switch (baud) {
549 case 300: buf[0] = 0x00; break; 549 case 300: buf[0] = 0x00; break;
550 case 600: buf[0] = 0x01; break; 550 case 600: buf[0] = 0x01; break;
diff --git a/drivers/usb/wusbcore/wa-hc.h b/drivers/usb/wusbcore/wa-hc.h
index 586d350cdb4..d6bea3e0b54 100644
--- a/drivers/usb/wusbcore/wa-hc.h
+++ b/drivers/usb/wusbcore/wa-hc.h
@@ -47,7 +47,7 @@
47 * to an endpoint on a WUSB device that is connected to a 47 * to an endpoint on a WUSB device that is connected to a
48 * HWA RC. 48 * HWA RC.
49 * 49 *
50 * xfer Transfer managment -- this is all the code that gets a 50 * xfer Transfer management -- this is all the code that gets a
51 * buffer and pushes it to a device (or viceversa). * 51 * buffer and pushes it to a device (or viceversa). *
52 * 52 *
53 * Some day a lot of this code will be shared between this driver and 53 * Some day a lot of this code will be shared between this driver and
diff --git a/drivers/uwb/i1480/i1480u-wlp/netdev.c b/drivers/uwb/i1480/i1480u-wlp/netdev.c
index 73055530e60..b236e696994 100644
--- a/drivers/uwb/i1480/i1480u-wlp/netdev.c
+++ b/drivers/uwb/i1480/i1480u-wlp/netdev.c
@@ -214,7 +214,7 @@ int i1480u_open(struct net_device *net_dev)
214 214
215 netif_wake_queue(net_dev); 215 netif_wake_queue(net_dev);
216#ifdef i1480u_FLOW_CONTROL 216#ifdef i1480u_FLOW_CONTROL
217 result = usb_submit_urb(i1480u->notif_urb, GFP_KERNEL);; 217 result = usb_submit_urb(i1480u->notif_urb, GFP_KERNEL);
218 if (result < 0) { 218 if (result < 0) {
219 dev_err(dev, "Can't submit notification URB: %d\n", result); 219 dev_err(dev, "Can't submit notification URB: %d\n", result);
220 goto error_notif_urb_submit; 220 goto error_notif_urb_submit;
diff --git a/drivers/video/cfbcopyarea.c b/drivers/video/cfbcopyarea.c
index df03f3776dc..79e5f40e648 100644
--- a/drivers/video/cfbcopyarea.c
+++ b/drivers/video/cfbcopyarea.c
@@ -114,7 +114,7 @@ bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx,
114 d0 >>= right; 114 d0 >>= right;
115 } else if (src_idx+n <= bits) { 115 } else if (src_idx+n <= bits) {
116 // Single source word 116 // Single source word
117 d0 <<= left;; 117 d0 <<= left;
118 } else { 118 } else {
119 // 2 source words 119 // 2 source words
120 d1 = FB_READL(src + 1); 120 d1 = FB_READL(src + 1);
diff --git a/drivers/video/imxfb.c b/drivers/video/imxfb.c
index 30ae3022f63..66358fa825f 100644
--- a/drivers/video/imxfb.c
+++ b/drivers/video/imxfb.c
@@ -710,7 +710,7 @@ static int __init imxfb_probe(struct platform_device *pdev)
710 710
711 fbi->clk = clk_get(&pdev->dev, NULL); 711 fbi->clk = clk_get(&pdev->dev, NULL);
712 if (IS_ERR(fbi->clk)) { 712 if (IS_ERR(fbi->clk)) {
713 ret = PTR_ERR(fbi->clk);; 713 ret = PTR_ERR(fbi->clk);
714 dev_err(&pdev->dev, "unable to get clock: %d\n", ret); 714 dev_err(&pdev->dev, "unable to get clock: %d\n", ret);
715 goto failed_getclock; 715 goto failed_getclock;
716 } 716 }
diff --git a/drivers/video/omap/lcd_h3.c b/drivers/video/omap/lcd_h3.c
index 2486237ebba..417ae5efa8b 100644
--- a/drivers/video/omap/lcd_h3.c
+++ b/drivers/video/omap/lcd_h3.c
@@ -124,12 +124,12 @@ struct platform_driver h3_panel_driver = {
124 }, 124 },
125}; 125};
126 126
127static int h3_panel_drv_init(void) 127static int __init h3_panel_drv_init(void)
128{ 128{
129 return platform_driver_register(&h3_panel_driver); 129 return platform_driver_register(&h3_panel_driver);
130} 130}
131 131
132static void h3_panel_drv_cleanup(void) 132static void __exit h3_panel_drv_cleanup(void)
133{ 133{
134 platform_driver_unregister(&h3_panel_driver); 134 platform_driver_unregister(&h3_panel_driver);
135} 135}
diff --git a/drivers/video/omap/lcd_h4.c b/drivers/video/omap/lcd_h4.c
index 6ff56430341..0c398bda760 100644
--- a/drivers/video/omap/lcd_h4.c
+++ b/drivers/video/omap/lcd_h4.c
@@ -102,12 +102,12 @@ static struct platform_driver h4_panel_driver = {
102 }, 102 },
103}; 103};
104 104
105static int h4_panel_drv_init(void) 105static int __init h4_panel_drv_init(void)
106{ 106{
107 return platform_driver_register(&h4_panel_driver); 107 return platform_driver_register(&h4_panel_driver);
108} 108}
109 109
110static void h4_panel_drv_cleanup(void) 110static void __exit h4_panel_drv_cleanup(void)
111{ 111{
112 platform_driver_unregister(&h4_panel_driver); 112 platform_driver_unregister(&h4_panel_driver);
113} 113}
diff --git a/drivers/video/omap/lcd_inn1510.c b/drivers/video/omap/lcd_inn1510.c
index 6953ed4b582..cdbd8bb607b 100644
--- a/drivers/video/omap/lcd_inn1510.c
+++ b/drivers/video/omap/lcd_inn1510.c
@@ -109,12 +109,12 @@ struct platform_driver innovator1510_panel_driver = {
109 }, 109 },
110}; 110};
111 111
112static int innovator1510_panel_drv_init(void) 112static int __init innovator1510_panel_drv_init(void)
113{ 113{
114 return platform_driver_register(&innovator1510_panel_driver); 114 return platform_driver_register(&innovator1510_panel_driver);
115} 115}
116 116
117static void innovator1510_panel_drv_cleanup(void) 117static void __exit innovator1510_panel_drv_cleanup(void)
118{ 118{
119 platform_driver_unregister(&innovator1510_panel_driver); 119 platform_driver_unregister(&innovator1510_panel_driver);
120} 120}
diff --git a/drivers/video/omap/lcd_inn1610.c b/drivers/video/omap/lcd_inn1610.c
index 4c4f7ee6d73..268f7f808a4 100644
--- a/drivers/video/omap/lcd_inn1610.c
+++ b/drivers/video/omap/lcd_inn1610.c
@@ -133,12 +133,12 @@ struct platform_driver innovator1610_panel_driver = {
133 }, 133 },
134}; 134};
135 135
136static int innovator1610_panel_drv_init(void) 136static int __init innovator1610_panel_drv_init(void)
137{ 137{
138 return platform_driver_register(&innovator1610_panel_driver); 138 return platform_driver_register(&innovator1610_panel_driver);
139} 139}
140 140
141static void innovator1610_panel_drv_cleanup(void) 141static void __exit innovator1610_panel_drv_cleanup(void)
142{ 142{
143 platform_driver_unregister(&innovator1610_panel_driver); 143 platform_driver_unregister(&innovator1610_panel_driver);
144} 144}
diff --git a/drivers/video/omap/lcd_osk.c b/drivers/video/omap/lcd_osk.c
index 379c96d36da..b3fa88bc626 100644
--- a/drivers/video/omap/lcd_osk.c
+++ b/drivers/video/omap/lcd_osk.c
@@ -127,12 +127,12 @@ struct platform_driver osk_panel_driver = {
127 }, 127 },
128}; 128};
129 129
130static int osk_panel_drv_init(void) 130static int __init osk_panel_drv_init(void)
131{ 131{
132 return platform_driver_register(&osk_panel_driver); 132 return platform_driver_register(&osk_panel_driver);
133} 133}
134 134
135static void osk_panel_drv_cleanup(void) 135static void __exit osk_panel_drv_cleanup(void)
136{ 136{
137 platform_driver_unregister(&osk_panel_driver); 137 platform_driver_unregister(&osk_panel_driver);
138} 138}
diff --git a/drivers/video/omap/lcd_palmte.c b/drivers/video/omap/lcd_palmte.c
index 218317366e6..4bf3c79f3cc 100644
--- a/drivers/video/omap/lcd_palmte.c
+++ b/drivers/video/omap/lcd_palmte.c
@@ -108,12 +108,12 @@ struct platform_driver palmte_panel_driver = {
108 }, 108 },
109}; 109};
110 110
111static int palmte_panel_drv_init(void) 111static int __init palmte_panel_drv_init(void)
112{ 112{
113 return platform_driver_register(&palmte_panel_driver); 113 return platform_driver_register(&palmte_panel_driver);
114} 114}
115 115
116static void palmte_panel_drv_cleanup(void) 116static void __exit palmte_panel_drv_cleanup(void)
117{ 117{
118 platform_driver_unregister(&palmte_panel_driver); 118 platform_driver_unregister(&palmte_panel_driver);
119} 119}
diff --git a/drivers/video/omap/lcd_palmtt.c b/drivers/video/omap/lcd_palmtt.c
index 57b0f6cf6a5..48ea1f9f2cb 100644
--- a/drivers/video/omap/lcd_palmtt.c
+++ b/drivers/video/omap/lcd_palmtt.c
@@ -113,12 +113,12 @@ struct platform_driver palmtt_panel_driver = {
113 }, 113 },
114}; 114};
115 115
116static int palmtt_panel_drv_init(void) 116static int __init palmtt_panel_drv_init(void)
117{ 117{
118 return platform_driver_register(&palmtt_panel_driver); 118 return platform_driver_register(&palmtt_panel_driver);
119} 119}
120 120
121static void palmtt_panel_drv_cleanup(void) 121static void __exit palmtt_panel_drv_cleanup(void)
122{ 122{
123 platform_driver_unregister(&palmtt_panel_driver); 123 platform_driver_unregister(&palmtt_panel_driver);
124} 124}
diff --git a/drivers/video/omap/lcd_palmz71.c b/drivers/video/omap/lcd_palmz71.c
index d33d78b1172..0697d29b4d3 100644
--- a/drivers/video/omap/lcd_palmz71.c
+++ b/drivers/video/omap/lcd_palmz71.c
@@ -109,12 +109,12 @@ struct platform_driver palmz71_panel_driver = {
109 }, 109 },
110}; 110};
111 111
112static int palmz71_panel_drv_init(void) 112static int __init palmz71_panel_drv_init(void)
113{ 113{
114 return platform_driver_register(&palmz71_panel_driver); 114 return platform_driver_register(&palmz71_panel_driver);
115} 115}
116 116
117static void palmz71_panel_drv_cleanup(void) 117static void __exit palmz71_panel_drv_cleanup(void)
118{ 118{
119 platform_driver_unregister(&palmz71_panel_driver); 119 platform_driver_unregister(&palmz71_panel_driver);
120} 120}
diff --git a/drivers/video/s3c2410fb.c b/drivers/video/s3c2410fb.c
index 7da0027e240..5ffca2adc6a 100644
--- a/drivers/video/s3c2410fb.c
+++ b/drivers/video/s3c2410fb.c
@@ -1119,7 +1119,7 @@ int __init s3c2410fb_init(void)
1119 int ret = platform_driver_register(&s3c2410fb_driver); 1119 int ret = platform_driver_register(&s3c2410fb_driver);
1120 1120
1121 if (ret == 0) 1121 if (ret == 0)
1122 ret = platform_driver_register(&s3c2412fb_driver);; 1122 ret = platform_driver_register(&s3c2412fb_driver);
1123 1123
1124 return ret; 1124 return ret;
1125} 1125}
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index f5bbd9e8341..d31505b6f7a 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -96,11 +96,7 @@ static struct balloon_stats balloon_stats;
96/* We increase/decrease in batches which fit in a page */ 96/* We increase/decrease in batches which fit in a page */
97static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; 97static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
98 98
99/* VM /proc information for memory */
100extern unsigned long totalram_pages;
101
102#ifdef CONFIG_HIGHMEM 99#ifdef CONFIG_HIGHMEM
103extern unsigned long totalhigh_pages;
104#define inc_totalhigh_pages() (totalhigh_pages++) 100#define inc_totalhigh_pages() (totalhigh_pages++)
105#define dec_totalhigh_pages() (totalhigh_pages--) 101#define dec_totalhigh_pages() (totalhigh_pages--)
106#else 102#else
@@ -214,7 +210,7 @@ static int increase_reservation(unsigned long nr_pages)
214 page = balloon_first_page(); 210 page = balloon_first_page();
215 for (i = 0; i < nr_pages; i++) { 211 for (i = 0; i < nr_pages; i++) {
216 BUG_ON(page == NULL); 212 BUG_ON(page == NULL);
217 frame_list[i] = page_to_pfn(page);; 213 frame_list[i] = page_to_pfn(page);
218 page = balloon_next_page(page); 214 page = balloon_next_page(page);
219 } 215 }
220 216
diff --git a/fs/Kconfig b/fs/Kconfig
index 455aa207e67..d4bf8caad8d 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -109,6 +109,7 @@ source "fs/sysfs/Kconfig"
109 109
110config TMPFS 110config TMPFS
111 bool "Virtual memory file system support (former shm fs)" 111 bool "Virtual memory file system support (former shm fs)"
112 depends on SHMEM
112 help 113 help
113 Tmpfs is a file system which keeps all files in virtual memory. 114 Tmpfs is a file system which keeps all files in virtual memory.
114 115
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 3ff8bdd18fb..0931bc1325e 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -21,7 +21,7 @@ static void afs_fl_release_private(struct file_lock *fl);
21static struct workqueue_struct *afs_lock_manager; 21static struct workqueue_struct *afs_lock_manager;
22static DEFINE_MUTEX(afs_lock_manager_mutex); 22static DEFINE_MUTEX(afs_lock_manager_mutex);
23 23
24static struct file_lock_operations afs_lock_ops = { 24static const struct file_lock_operations afs_lock_ops = {
25 .fl_copy_lock = afs_fl_copy_lock, 25 .fl_copy_lock = afs_fl_copy_lock,
26 .fl_release_private = afs_fl_release_private, 26 .fl_release_private = afs_fl_release_private,
27}; 27};
diff --git a/fs/aio.c b/fs/aio.c
index d065b2c3273..fc21c23b238 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -24,6 +24,7 @@
24#include <linux/file.h> 24#include <linux/file.h>
25#include <linux/mm.h> 25#include <linux/mm.h>
26#include <linux/mman.h> 26#include <linux/mman.h>
27#include <linux/mmu_context.h>
27#include <linux/slab.h> 28#include <linux/slab.h>
28#include <linux/timer.h> 29#include <linux/timer.h>
29#include <linux/aio.h> 30#include <linux/aio.h>
@@ -34,7 +35,6 @@
34 35
35#include <asm/kmap_types.h> 36#include <asm/kmap_types.h>
36#include <asm/uaccess.h> 37#include <asm/uaccess.h>
37#include <asm/mmu_context.h>
38 38
39#if DEBUG > 1 39#if DEBUG > 1
40#define dprintk printk 40#define dprintk printk
@@ -595,51 +595,6 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
595} 595}
596 596
597/* 597/*
598 * use_mm
599 * Makes the calling kernel thread take on the specified
600 * mm context.
601 * Called by the retry thread execute retries within the
602 * iocb issuer's mm context, so that copy_from/to_user
603 * operations work seamlessly for aio.
604 * (Note: this routine is intended to be called only
605 * from a kernel thread context)
606 */
607static void use_mm(struct mm_struct *mm)
608{
609 struct mm_struct *active_mm;
610 struct task_struct *tsk = current;
611
612 task_lock(tsk);
613 active_mm = tsk->active_mm;
614 atomic_inc(&mm->mm_count);
615 tsk->mm = mm;
616 tsk->active_mm = mm;
617 switch_mm(active_mm, mm, tsk);
618 task_unlock(tsk);
619
620 mmdrop(active_mm);
621}
622
623/*
624 * unuse_mm
625 * Reverses the effect of use_mm, i.e. releases the
626 * specified mm context which was earlier taken on
627 * by the calling kernel thread
628 * (Note: this routine is intended to be called only
629 * from a kernel thread context)
630 */
631static void unuse_mm(struct mm_struct *mm)
632{
633 struct task_struct *tsk = current;
634
635 task_lock(tsk);
636 tsk->mm = NULL;
637 /* active_mm is still 'mm' */
638 enter_lazy_tlb(mm, tsk);
639 task_unlock(tsk);
640}
641
642/*
643 * Queue up a kiocb to be retried. Assumes that the kiocb 598 * Queue up a kiocb to be retried. Assumes that the kiocb
644 * has already been marked as kicked, and places it on 599 * has already been marked as kicked, and places it on
645 * the retry run list for the corresponding ioctx, if it 600 * the retry run list for the corresponding ioctx, if it
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
index 2316e944a10..e947915109e 100644
--- a/fs/autofs/dirhash.c
+++ b/fs/autofs/dirhash.c
@@ -90,7 +90,7 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb,
90 DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); 90 DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
91 continue; 91 continue;
92 } 92 }
93 while (d_mountpoint(path.dentry) && follow_down(&path)); 93 while (d_mountpoint(path.dentry) && follow_down(&path))
94 ; 94 ;
95 umount_ok = may_umount(path.mnt); 95 umount_ok = may_umount(path.mnt);
96 path_put(&path); 96 path_put(&path);
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 615d5496fe0..dd376c124e7 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -842,7 +842,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
842 sb->s_magic = BEFS_SUPER_MAGIC; 842 sb->s_magic = BEFS_SUPER_MAGIC;
843 /* Set real blocksize of fs */ 843 /* Set real blocksize of fs */
844 sb_set_blocksize(sb, (ulong) befs_sb->block_size); 844 sb_set_blocksize(sb, (ulong) befs_sb->block_size);
845 sb->s_op = (struct super_operations *) &befs_sops; 845 sb->s_op = &befs_sops;
846 root = befs_iget(sb, iaddr2blockno(sb, &(befs_sb->root_dir))); 846 root = befs_iget(sb, iaddr2blockno(sb, &(befs_sb->root_dir)));
847 if (IS_ERR(root)) { 847 if (IS_ERR(root)) {
848 ret = PTR_ERR(root); 848 ret = PTR_ERR(root);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 7c1e65d5487..442d94fe255 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1280,9 +1280,6 @@ static int writenote(struct memelfnote *men, struct file *file,
1280#define DUMP_WRITE(addr, nr) \ 1280#define DUMP_WRITE(addr, nr) \
1281 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \ 1281 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1282 goto end_coredump; 1282 goto end_coredump;
1283#define DUMP_SEEK(off) \
1284 if (!dump_seek(file, (off))) \
1285 goto end_coredump;
1286 1283
1287static void fill_elf_header(struct elfhdr *elf, int segs, 1284static void fill_elf_header(struct elfhdr *elf, int segs,
1288 u16 machine, u32 flags, u8 osabi) 1285 u16 machine, u32 flags, u8 osabi)
@@ -2016,7 +2013,8 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
2016 goto end_coredump; 2013 goto end_coredump;
2017 2014
2018 /* Align to page */ 2015 /* Align to page */
2019 DUMP_SEEK(dataoff - foffset); 2016 if (!dump_seek(file, dataoff - foffset))
2017 goto end_coredump;
2020 2018
2021 for (vma = first_vma(current, gate_vma); vma != NULL; 2019 for (vma = first_vma(current, gate_vma); vma != NULL;
2022 vma = next_vma(vma, gate_vma)) { 2020 vma = next_vma(vma, gate_vma)) {
@@ -2027,33 +2025,19 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
2027 2025
2028 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) { 2026 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2029 struct page *page; 2027 struct page *page;
2030 struct vm_area_struct *tmp_vma; 2028 int stop;
2031 2029
2032 if (get_user_pages(current, current->mm, addr, 1, 0, 1, 2030 page = get_dump_page(addr);
2033 &page, &tmp_vma) <= 0) { 2031 if (page) {
2034 DUMP_SEEK(PAGE_SIZE); 2032 void *kaddr = kmap(page);
2035 } else { 2033 stop = ((size += PAGE_SIZE) > limit) ||
2036 if (page == ZERO_PAGE(0)) { 2034 !dump_write(file, kaddr, PAGE_SIZE);
2037 if (!dump_seek(file, PAGE_SIZE)) { 2035 kunmap(page);
2038 page_cache_release(page);
2039 goto end_coredump;
2040 }
2041 } else {
2042 void *kaddr;
2043 flush_cache_page(tmp_vma, addr,
2044 page_to_pfn(page));
2045 kaddr = kmap(page);
2046 if ((size += PAGE_SIZE) > limit ||
2047 !dump_write(file, kaddr,
2048 PAGE_SIZE)) {
2049 kunmap(page);
2050 page_cache_release(page);
2051 goto end_coredump;
2052 }
2053 kunmap(page);
2054 }
2055 page_cache_release(page); 2036 page_cache_release(page);
2056 } 2037 } else
2038 stop = !dump_seek(file, PAGE_SIZE);
2039 if (stop)
2040 goto end_coredump;
2057 } 2041 }
2058 } 2042 }
2059 2043
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 20fbeced472..76285471073 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1325,9 +1325,6 @@ static int writenote(struct memelfnote *men, struct file *file)
1325#define DUMP_WRITE(addr, nr) \ 1325#define DUMP_WRITE(addr, nr) \
1326 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \ 1326 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1327 goto end_coredump; 1327 goto end_coredump;
1328#define DUMP_SEEK(off) \
1329 if (!dump_seek(file, (off))) \
1330 goto end_coredump;
1331 1328
1332static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs) 1329static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
1333{ 1330{
@@ -1518,6 +1515,7 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
1518 unsigned long *limit, unsigned long mm_flags) 1515 unsigned long *limit, unsigned long mm_flags)
1519{ 1516{
1520 struct vm_area_struct *vma; 1517 struct vm_area_struct *vma;
1518 int err = 0;
1521 1519
1522 for (vma = current->mm->mmap; vma; vma = vma->vm_next) { 1520 for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
1523 unsigned long addr; 1521 unsigned long addr;
@@ -1525,43 +1523,26 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
1525 if (!maydump(vma, mm_flags)) 1523 if (!maydump(vma, mm_flags))
1526 continue; 1524 continue;
1527 1525
1528 for (addr = vma->vm_start; 1526 for (addr = vma->vm_start; addr < vma->vm_end;
1529 addr < vma->vm_end; 1527 addr += PAGE_SIZE) {
1530 addr += PAGE_SIZE 1528 struct page *page = get_dump_page(addr);
1531 ) { 1529 if (page) {
1532 struct vm_area_struct *vma; 1530 void *kaddr = kmap(page);
1533 struct page *page; 1531 *size += PAGE_SIZE;
1534 1532 if (*size > *limit)
1535 if (get_user_pages(current, current->mm, addr, 1, 0, 1, 1533 err = -EFBIG;
1536 &page, &vma) <= 0) { 1534 else if (!dump_write(file, kaddr, PAGE_SIZE))
1537 DUMP_SEEK(file->f_pos + PAGE_SIZE); 1535 err = -EIO;
1538 }
1539 else if (page == ZERO_PAGE(0)) {
1540 page_cache_release(page);
1541 DUMP_SEEK(file->f_pos + PAGE_SIZE);
1542 }
1543 else {
1544 void *kaddr;
1545
1546 flush_cache_page(vma, addr, page_to_pfn(page));
1547 kaddr = kmap(page);
1548 if ((*size += PAGE_SIZE) > *limit ||
1549 !dump_write(file, kaddr, PAGE_SIZE)
1550 ) {
1551 kunmap(page);
1552 page_cache_release(page);
1553 return -EIO;
1554 }
1555 kunmap(page); 1536 kunmap(page);
1556 page_cache_release(page); 1537 page_cache_release(page);
1557 } 1538 } else if (!dump_seek(file, file->f_pos + PAGE_SIZE))
1539 err = -EFBIG;
1540 if (err)
1541 goto out;
1558 } 1542 }
1559 } 1543 }
1560 1544out:
1561 return 0; 1545 return err;
1562
1563end_coredump:
1564 return -EFBIG;
1565} 1546}
1566#endif 1547#endif
1567 1548
@@ -1802,7 +1783,8 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1802 goto end_coredump; 1783 goto end_coredump;
1803 } 1784 }
1804 1785
1805 DUMP_SEEK(dataoff); 1786 if (!dump_seek(file, dataoff))
1787 goto end_coredump;
1806 1788
1807 if (elf_fdpic_dump_segments(file, &size, &limit, mm_flags) < 0) 1789 if (elf_fdpic_dump_segments(file, &size, &limit, mm_flags) < 0)
1808 goto end_coredump; 1790 goto end_coredump;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 71e7e03ac34..5d1ed50bd46 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1114,7 +1114,7 @@ EXPORT_SYMBOL(revalidate_disk);
1114int check_disk_change(struct block_device *bdev) 1114int check_disk_change(struct block_device *bdev)
1115{ 1115{
1116 struct gendisk *disk = bdev->bd_disk; 1116 struct gendisk *disk = bdev->bd_disk;
1117 struct block_device_operations * bdops = disk->fops; 1117 const struct block_device_operations *bdops = disk->fops;
1118 1118
1119 if (!bdops->media_changed) 1119 if (!bdops->media_changed)
1120 return 0; 1120 return 0;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8b819279001..6c4173146bb 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -772,7 +772,7 @@ static void btree_invalidatepage(struct page *page, unsigned long offset)
772 } 772 }
773} 773}
774 774
775static struct address_space_operations btree_aops = { 775static const struct address_space_operations btree_aops = {
776 .readpage = btree_readpage, 776 .readpage = btree_readpage,
777 .writepage = btree_writepage, 777 .writepage = btree_writepage,
778 .writepages = btree_writepages, 778 .writepages = btree_writepages,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 59cba180fe8..9096fd0ca3c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -55,13 +55,13 @@ struct btrfs_iget_args {
55 struct btrfs_root *root; 55 struct btrfs_root *root;
56}; 56};
57 57
58static struct inode_operations btrfs_dir_inode_operations; 58static const struct inode_operations btrfs_dir_inode_operations;
59static struct inode_operations btrfs_symlink_inode_operations; 59static const struct inode_operations btrfs_symlink_inode_operations;
60static struct inode_operations btrfs_dir_ro_inode_operations; 60static const struct inode_operations btrfs_dir_ro_inode_operations;
61static struct inode_operations btrfs_special_inode_operations; 61static const struct inode_operations btrfs_special_inode_operations;
62static struct inode_operations btrfs_file_inode_operations; 62static const struct inode_operations btrfs_file_inode_operations;
63static struct address_space_operations btrfs_aops; 63static const struct address_space_operations btrfs_aops;
64static struct address_space_operations btrfs_symlink_aops; 64static const struct address_space_operations btrfs_symlink_aops;
65static struct file_operations btrfs_dir_file_operations; 65static struct file_operations btrfs_dir_file_operations;
66static struct extent_io_ops btrfs_extent_io_ops; 66static struct extent_io_ops btrfs_extent_io_ops;
67 67
@@ -5201,7 +5201,7 @@ static int btrfs_permission(struct inode *inode, int mask)
5201 return generic_permission(inode, mask, btrfs_check_acl); 5201 return generic_permission(inode, mask, btrfs_check_acl);
5202} 5202}
5203 5203
5204static struct inode_operations btrfs_dir_inode_operations = { 5204static const struct inode_operations btrfs_dir_inode_operations = {
5205 .getattr = btrfs_getattr, 5205 .getattr = btrfs_getattr,
5206 .lookup = btrfs_lookup, 5206 .lookup = btrfs_lookup,
5207 .create = btrfs_create, 5207 .create = btrfs_create,
@@ -5219,7 +5219,7 @@ static struct inode_operations btrfs_dir_inode_operations = {
5219 .removexattr = btrfs_removexattr, 5219 .removexattr = btrfs_removexattr,
5220 .permission = btrfs_permission, 5220 .permission = btrfs_permission,
5221}; 5221};
5222static struct inode_operations btrfs_dir_ro_inode_operations = { 5222static const struct inode_operations btrfs_dir_ro_inode_operations = {
5223 .lookup = btrfs_lookup, 5223 .lookup = btrfs_lookup,
5224 .permission = btrfs_permission, 5224 .permission = btrfs_permission,
5225}; 5225};
@@ -5259,7 +5259,7 @@ static struct extent_io_ops btrfs_extent_io_ops = {
5259 * 5259 *
5260 * For now we're avoiding this by dropping bmap. 5260 * For now we're avoiding this by dropping bmap.
5261 */ 5261 */
5262static struct address_space_operations btrfs_aops = { 5262static const struct address_space_operations btrfs_aops = {
5263 .readpage = btrfs_readpage, 5263 .readpage = btrfs_readpage,
5264 .writepage = btrfs_writepage, 5264 .writepage = btrfs_writepage,
5265 .writepages = btrfs_writepages, 5265 .writepages = btrfs_writepages,
@@ -5271,14 +5271,14 @@ static struct address_space_operations btrfs_aops = {
5271 .set_page_dirty = btrfs_set_page_dirty, 5271 .set_page_dirty = btrfs_set_page_dirty,
5272}; 5272};
5273 5273
5274static struct address_space_operations btrfs_symlink_aops = { 5274static const struct address_space_operations btrfs_symlink_aops = {
5275 .readpage = btrfs_readpage, 5275 .readpage = btrfs_readpage,
5276 .writepage = btrfs_writepage, 5276 .writepage = btrfs_writepage,
5277 .invalidatepage = btrfs_invalidatepage, 5277 .invalidatepage = btrfs_invalidatepage,
5278 .releasepage = btrfs_releasepage, 5278 .releasepage = btrfs_releasepage,
5279}; 5279};
5280 5280
5281static struct inode_operations btrfs_file_inode_operations = { 5281static const struct inode_operations btrfs_file_inode_operations = {
5282 .truncate = btrfs_truncate, 5282 .truncate = btrfs_truncate,
5283 .getattr = btrfs_getattr, 5283 .getattr = btrfs_getattr,
5284 .setattr = btrfs_setattr, 5284 .setattr = btrfs_setattr,
@@ -5290,7 +5290,7 @@ static struct inode_operations btrfs_file_inode_operations = {
5290 .fallocate = btrfs_fallocate, 5290 .fallocate = btrfs_fallocate,
5291 .fiemap = btrfs_fiemap, 5291 .fiemap = btrfs_fiemap,
5292}; 5292};
5293static struct inode_operations btrfs_special_inode_operations = { 5293static const struct inode_operations btrfs_special_inode_operations = {
5294 .getattr = btrfs_getattr, 5294 .getattr = btrfs_getattr,
5295 .setattr = btrfs_setattr, 5295 .setattr = btrfs_setattr,
5296 .permission = btrfs_permission, 5296 .permission = btrfs_permission,
@@ -5299,7 +5299,7 @@ static struct inode_operations btrfs_special_inode_operations = {
5299 .listxattr = btrfs_listxattr, 5299 .listxattr = btrfs_listxattr,
5300 .removexattr = btrfs_removexattr, 5300 .removexattr = btrfs_removexattr,
5301}; 5301};
5302static struct inode_operations btrfs_symlink_inode_operations = { 5302static const struct inode_operations btrfs_symlink_inode_operations = {
5303 .readlink = generic_readlink, 5303 .readlink = generic_readlink,
5304 .follow_link = page_follow_link_light, 5304 .follow_link = page_follow_link_light,
5305 .put_link = page_put_link, 5305 .put_link = page_put_link,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 6d6d06cb6df..2db17cd66fc 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -51,7 +51,7 @@
51#include "export.h" 51#include "export.h"
52#include "compression.h" 52#include "compression.h"
53 53
54static struct super_operations btrfs_super_ops; 54static const struct super_operations btrfs_super_ops;
55 55
56static void btrfs_put_super(struct super_block *sb) 56static void btrfs_put_super(struct super_block *sb)
57{ 57{
@@ -675,7 +675,7 @@ static int btrfs_unfreeze(struct super_block *sb)
675 return 0; 675 return 0;
676} 676}
677 677
678static struct super_operations btrfs_super_ops = { 678static const struct super_operations btrfs_super_ops = {
679 .delete_inode = btrfs_delete_inode, 679 .delete_inode = btrfs_delete_inode,
680 .put_super = btrfs_put_super, 680 .put_super = btrfs_put_super,
681 .sync_fs = btrfs_sync_fs, 681 .sync_fs = btrfs_sync_fs,
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index d91b0de7c50..30c0d45c1b5 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2605,7 +2605,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2605 extent); 2605 extent);
2606 cs = btrfs_file_extent_offset(src, extent); 2606 cs = btrfs_file_extent_offset(src, extent);
2607 cl = btrfs_file_extent_num_bytes(src, 2607 cl = btrfs_file_extent_num_bytes(src,
2608 extent);; 2608 extent);
2609 if (btrfs_file_extent_compression(src, 2609 if (btrfs_file_extent_compression(src,
2610 extent)) { 2610 extent)) {
2611 cs = 0; 2611 cs = 0;
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 606912d8f2a..fea9e898c4b 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -142,7 +142,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
142 rc = dns_resolve_server_name_to_ip(*devname, &srvIP); 142 rc = dns_resolve_server_name_to_ip(*devname, &srvIP);
143 if (rc != 0) { 143 if (rc != 0) {
144 cERROR(1, ("%s: Failed to resolve server part of %s to IP: %d", 144 cERROR(1, ("%s: Failed to resolve server part of %s to IP: %d",
145 __func__, *devname, rc));; 145 __func__, *devname, rc));
146 goto compose_mount_options_err; 146 goto compose_mount_options_err;
147 } 147 }
148 /* md_len = strlen(...) + 12 for 'sep+prefixpath=' 148 /* md_len = strlen(...) + 12 for 'sep+prefixpath='
@@ -385,7 +385,7 @@ out_err:
385 goto out; 385 goto out;
386} 386}
387 387
388struct inode_operations cifs_dfs_referral_inode_operations = { 388const struct inode_operations cifs_dfs_referral_inode_operations = {
389 .follow_link = cifs_dfs_follow_mountpoint, 389 .follow_link = cifs_dfs_follow_mountpoint,
390}; 390};
391 391
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 3610e9958b4..d79ce2e95c2 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -50,7 +50,7 @@
50#define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */ 50#define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */
51 51
52#ifdef CONFIG_CIFS_QUOTA 52#ifdef CONFIG_CIFS_QUOTA
53static struct quotactl_ops cifs_quotactl_ops; 53static const struct quotactl_ops cifs_quotactl_ops;
54#endif /* QUOTA */ 54#endif /* QUOTA */
55 55
56int cifsFYI = 0; 56int cifsFYI = 0;
@@ -517,7 +517,7 @@ int cifs_xstate_get(struct super_block *sb, struct fs_quota_stat *qstats)
517 return rc; 517 return rc;
518} 518}
519 519
520static struct quotactl_ops cifs_quotactl_ops = { 520static const struct quotactl_ops cifs_quotactl_ops = {
521 .set_xquota = cifs_xquota_set, 521 .set_xquota = cifs_xquota_set,
522 .get_xquota = cifs_xquota_get, 522 .get_xquota = cifs_xquota_get,
523 .set_xstate = cifs_xstate_set, 523 .set_xstate = cifs_xstate_set,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 094325e3f71..ac2b24c192f 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -67,7 +67,7 @@ extern int cifs_setattr(struct dentry *, struct iattr *);
67 67
68extern const struct inode_operations cifs_file_inode_ops; 68extern const struct inode_operations cifs_file_inode_ops;
69extern const struct inode_operations cifs_symlink_inode_ops; 69extern const struct inode_operations cifs_symlink_inode_ops;
70extern struct inode_operations cifs_dfs_referral_inode_operations; 70extern const struct inode_operations cifs_dfs_referral_inode_operations;
71 71
72 72
73/* Functions related to files and directories */ 73/* Functions related to files and directories */
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 00b30a2d546..542f625312f 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -582,7 +582,7 @@ extern const struct inode_operations ecryptfs_dir_iops;
582extern const struct inode_operations ecryptfs_symlink_iops; 582extern const struct inode_operations ecryptfs_symlink_iops;
583extern const struct super_operations ecryptfs_sops; 583extern const struct super_operations ecryptfs_sops;
584extern const struct dentry_operations ecryptfs_dops; 584extern const struct dentry_operations ecryptfs_dops;
585extern struct address_space_operations ecryptfs_aops; 585extern const struct address_space_operations ecryptfs_aops;
586extern int ecryptfs_verbosity; 586extern int ecryptfs_verbosity;
587extern unsigned int ecryptfs_message_buf_len; 587extern unsigned int ecryptfs_message_buf_len;
588extern signed long ecryptfs_message_wait_timeout; 588extern signed long ecryptfs_message_wait_timeout;
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 5c6bab9786e..05772aeaa8f 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -545,7 +545,7 @@ static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block)
545 return rc; 545 return rc;
546} 546}
547 547
548struct address_space_operations ecryptfs_aops = { 548const struct address_space_operations ecryptfs_aops = {
549 .writepage = ecryptfs_writepage, 549 .writepage = ecryptfs_writepage,
550 .readpage = ecryptfs_readpage, 550 .readpage = ecryptfs_readpage,
551 .write_begin = ecryptfs_write_begin, 551 .write_begin = ecryptfs_write_begin,
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
index b72b8588422..c18fbf3e406 100644
--- a/fs/ext2/xip.c
+++ b/fs/ext2/xip.c
@@ -20,7 +20,7 @@ __inode_direct_access(struct inode *inode, sector_t block,
20 void **kaddr, unsigned long *pfn) 20 void **kaddr, unsigned long *pfn)
21{ 21{
22 struct block_device *bdev = inode->i_sb->s_bdev; 22 struct block_device *bdev = inode->i_sb->s_bdev;
23 struct block_device_operations *ops = bdev->bd_disk->fops; 23 const struct block_device_operations *ops = bdev->bd_disk->fops;
24 sector_t sector; 24 sector_t sector;
25 25
26 sector = block * (PAGE_SIZE / 512); /* ext2 block to bdev sector */ 26 sector = block * (PAGE_SIZE / 512); /* ext2 block to bdev sector */
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index a8d80a7f110..72743d36050 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -720,7 +720,7 @@ static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
720static ssize_t ext3_quota_write(struct super_block *sb, int type, 720static ssize_t ext3_quota_write(struct super_block *sb, int type,
721 const char *data, size_t len, loff_t off); 721 const char *data, size_t len, loff_t off);
722 722
723static struct dquot_operations ext3_quota_operations = { 723static const struct dquot_operations ext3_quota_operations = {
724 .initialize = dquot_initialize, 724 .initialize = dquot_initialize,
725 .drop = dquot_drop, 725 .drop = dquot_drop,
726 .alloc_space = dquot_alloc_space, 726 .alloc_space = dquot_alloc_space,
@@ -737,7 +737,7 @@ static struct dquot_operations ext3_quota_operations = {
737 .destroy_dquot = dquot_destroy, 737 .destroy_dquot = dquot_destroy,
738}; 738};
739 739
740static struct quotactl_ops ext3_qctl_operations = { 740static const struct quotactl_ops ext3_qctl_operations = {
741 .quota_on = ext3_quota_on, 741 .quota_on = ext3_quota_on,
742 .quota_off = vfs_quota_off, 742 .quota_off = vfs_quota_off,
743 .quota_sync = vfs_quota_sync, 743 .quota_sync = vfs_quota_sync,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4abd683b963..3a798737e30 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2337,7 +2337,7 @@ static int __mpage_da_writepage(struct page *page,
2337 /* 2337 /*
2338 * Rest of the page in the page_vec 2338 * Rest of the page in the page_vec
2339 * redirty then and skip then. We will 2339 * redirty then and skip then. We will
2340 * try to to write them again after 2340 * try to write them again after
2341 * starting a new transaction 2341 * starting a new transaction
2342 */ 2342 */
2343 redirty_page_for_writepage(wbc, page); 2343 redirty_page_for_writepage(wbc, page);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a6b1ab73472..df539ba2777 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -964,7 +964,7 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
964static ssize_t ext4_quota_write(struct super_block *sb, int type, 964static ssize_t ext4_quota_write(struct super_block *sb, int type,
965 const char *data, size_t len, loff_t off); 965 const char *data, size_t len, loff_t off);
966 966
967static struct dquot_operations ext4_quota_operations = { 967static const struct dquot_operations ext4_quota_operations = {
968 .initialize = dquot_initialize, 968 .initialize = dquot_initialize,
969 .drop = dquot_drop, 969 .drop = dquot_drop,
970 .alloc_space = dquot_alloc_space, 970 .alloc_space = dquot_alloc_space,
@@ -985,7 +985,7 @@ static struct dquot_operations ext4_quota_operations = {
985 .destroy_dquot = dquot_destroy, 985 .destroy_dquot = dquot_destroy,
986}; 986};
987 987
988static struct quotactl_ops ext4_qctl_operations = { 988static const struct quotactl_ops ext4_qctl_operations = {
989 .quota_on = ext4_quota_on, 989 .quota_on = ext4_quota_on,
990 .quota_off = vfs_quota_off, 990 .quota_off = vfs_quota_off,
991 .quota_sync = vfs_quota_sync, 991 .quota_sync = vfs_quota_sync,
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 28c590b7c9d..8f1cfb02a6c 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -179,7 +179,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
179 * always aligned to a 64 bit boundary. 179 * always aligned to a 64 bit boundary.
180 * 180 *
181 * The size of the buffer is in bytes, but is it assumed that it is 181 * The size of the buffer is in bytes, but is it assumed that it is
182 * always ok to to read a complete multiple of 64 bits at the end 182 * always ok to read a complete multiple of 64 bits at the end
183 * of the block in case the end is no aligned to a natural boundary. 183 * of the block in case the end is no aligned to a natural boundary.
184 * 184 *
185 * Return: the block number (bitmap buffer scope) that was found 185 * Return: the block number (bitmap buffer scope) that was found
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a93b885311d..06b7c2623f9 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -507,6 +507,13 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
507 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 507 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
508 INIT_LIST_HEAD(&inode->i_mapping->private_list); 508 INIT_LIST_HEAD(&inode->i_mapping->private_list);
509 info = HUGETLBFS_I(inode); 509 info = HUGETLBFS_I(inode);
510 /*
511 * The policy is initialized here even if we are creating a
512 * private inode because initialization simply creates an
513 * an empty rb tree and calls spin_lock_init(), later when we
514 * call mpol_free_shared_policy() it will just return because
515 * the rb tree will still be empty.
516 */
510 mpol_shared_policy_init(&info->policy, NULL); 517 mpol_shared_policy_init(&info->policy, NULL);
511 switch (mode & S_IFMT) { 518 switch (mode & S_IFMT) {
512 default: 519 default:
@@ -931,13 +938,19 @@ static struct file_system_type hugetlbfs_fs_type = {
931 938
932static struct vfsmount *hugetlbfs_vfsmount; 939static struct vfsmount *hugetlbfs_vfsmount;
933 940
934static int can_do_hugetlb_shm(void) 941static int can_do_hugetlb_shm(int creat_flags)
935{ 942{
936 return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); 943 if (creat_flags != HUGETLB_SHMFS_INODE)
944 return 0;
945 if (capable(CAP_IPC_LOCK))
946 return 1;
947 if (in_group_p(sysctl_hugetlb_shm_group))
948 return 1;
949 return 0;
937} 950}
938 951
939struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, 952struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
940 struct user_struct **user) 953 struct user_struct **user, int creat_flags)
941{ 954{
942 int error = -ENOMEM; 955 int error = -ENOMEM;
943 struct file *file; 956 struct file *file;
@@ -949,7 +962,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
949 if (!hugetlbfs_vfsmount) 962 if (!hugetlbfs_vfsmount)
950 return ERR_PTR(-ENOENT); 963 return ERR_PTR(-ENOENT);
951 964
952 if (!can_do_hugetlb_shm()) { 965 if (!can_do_hugetlb_shm(creat_flags)) {
953 *user = current_user(); 966 *user = current_user();
954 if (user_shm_lock(size, *user)) { 967 if (user_shm_lock(size, *user)) {
955 WARN_ONCE(1, 968 WARN_ONCE(1,
diff --git a/fs/inode.c b/fs/inode.c
index b2ba83d2c4e..f5ff71cb3e2 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -123,7 +123,7 @@ static void wake_up_inode(struct inode *inode)
123int inode_init_always(struct super_block *sb, struct inode *inode) 123int inode_init_always(struct super_block *sb, struct inode *inode)
124{ 124{
125 static const struct address_space_operations empty_aops; 125 static const struct address_space_operations empty_aops;
126 static struct inode_operations empty_iops; 126 static const struct inode_operations empty_iops;
127 static const struct file_operations empty_fops; 127 static const struct file_operations empty_fops;
128 struct address_space *const mapping = &inode->i_data; 128 struct address_space *const mapping = &inode->i_data;
129 129
@@ -695,13 +695,15 @@ void unlock_new_inode(struct inode *inode)
695 } 695 }
696#endif 696#endif
697 /* 697 /*
698 * This is special! We do not need the spinlock 698 * This is special! We do not need the spinlock when clearing I_LOCK,
699 * when clearing I_LOCK, because we're guaranteed 699 * because we're guaranteed that nobody else tries to do anything about
700 * that nobody else tries to do anything about the 700 * the state of the inode when it is locked, as we just created it (so
701 * state of the inode when it is locked, as we 701 * there can be no old holders that haven't tested I_LOCK).
702 * just created it (so there can be no old holders 702 * However we must emit the memory barrier so that other CPUs reliably
703 * that haven't tested I_LOCK). 703 * see the clearing of I_LOCK after the other inode initialisation has
704 * completed.
704 */ 705 */
706 smp_mb();
705 WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW)); 707 WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW));
706 inode->i_state &= ~(I_LOCK|I_NEW); 708 inode->i_state &= ~(I_LOCK|I_NEW);
707 wake_up_inode(inode); 709 wake_up_inode(inode);
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 0035c021395..9a80e8e595d 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -123,7 +123,7 @@ static struct dentry *jffs2_get_parent(struct dentry *child)
123 return d_obtain_alias(jffs2_iget(child->d_inode->i_sb, pino)); 123 return d_obtain_alias(jffs2_iget(child->d_inode->i_sb, pino));
124} 124}
125 125
126static struct export_operations jffs2_export_ops = { 126static const struct export_operations jffs2_export_ops = {
127 .get_parent = jffs2_get_parent, 127 .get_parent = jffs2_get_parent,
128 .fh_to_dentry = jffs2_fh_to_dentry, 128 .fh_to_dentry = jffs2_fh_to_dentry,
129 .fh_to_parent = jffs2_fh_to_parent, 129 .fh_to_parent = jffs2_fh_to_parent,
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 1f3b0fc0d35..fc9032dc886 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -166,7 +166,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
166 */ 166 */
167 if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid) 167 if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid)
168 continue; 168 continue;
169 if (!nlm_cmp_addr(nlm_addr(block->b_host), addr)) 169 if (!rpc_cmp_addr(nlm_addr(block->b_host), addr))
170 continue; 170 continue;
171 if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0) 171 if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0)
172 continue; 172 continue;
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 4336adba952..c81249fef11 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -458,7 +458,7 @@ static void nlmclnt_locks_release_private(struct file_lock *fl)
458 nlm_put_lockowner(fl->fl_u.nfs_fl.owner); 458 nlm_put_lockowner(fl->fl_u.nfs_fl.owner);
459} 459}
460 460
461static struct file_lock_operations nlmclnt_lock_ops = { 461static const struct file_lock_operations nlmclnt_lock_ops = {
462 .fl_copy_lock = nlmclnt_locks_copy_lock, 462 .fl_copy_lock = nlmclnt_locks_copy_lock,
463 .fl_release_private = nlmclnt_locks_release_private, 463 .fl_release_private = nlmclnt_locks_release_private,
464}; 464};
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 7cb076ac6b4..4600c2037b8 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -111,7 +111,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
111 */ 111 */
112 chain = &nlm_hosts[nlm_hash_address(ni->sap)]; 112 chain = &nlm_hosts[nlm_hash_address(ni->sap)];
113 hlist_for_each_entry(host, pos, chain, h_hash) { 113 hlist_for_each_entry(host, pos, chain, h_hash) {
114 if (!nlm_cmp_addr(nlm_addr(host), ni->sap)) 114 if (!rpc_cmp_addr(nlm_addr(host), ni->sap))
115 continue; 115 continue;
116 116
117 /* See if we have an NSM handle for this client */ 117 /* See if we have an NSM handle for this client */
@@ -125,7 +125,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
125 if (host->h_server != ni->server) 125 if (host->h_server != ni->server)
126 continue; 126 continue;
127 if (ni->server && 127 if (ni->server &&
128 !nlm_cmp_addr(nlm_srcaddr(host), ni->src_sap)) 128 !rpc_cmp_addr(nlm_srcaddr(host), ni->src_sap))
129 continue; 129 continue;
130 130
131 /* Move to head of hash chain. */ 131 /* Move to head of hash chain. */
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 30c933188dd..f956651d0f6 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -209,7 +209,7 @@ static struct nsm_handle *nsm_lookup_addr(const struct sockaddr *sap)
209 struct nsm_handle *nsm; 209 struct nsm_handle *nsm;
210 210
211 list_for_each_entry(nsm, &nsm_handles, sm_link) 211 list_for_each_entry(nsm, &nsm_handles, sm_link)
212 if (nlm_cmp_addr(nsm_addr(nsm), sap)) 212 if (rpc_cmp_addr(nsm_addr(nsm), sap))
213 return nsm; 213 return nsm;
214 return NULL; 214 return NULL;
215} 215}
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index e577a78d7ba..d1001790fa9 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -705,7 +705,7 @@ static int nlmsvc_same_owner(struct file_lock *fl1, struct file_lock *fl2)
705 return fl1->fl_owner == fl2->fl_owner && fl1->fl_pid == fl2->fl_pid; 705 return fl1->fl_owner == fl2->fl_owner && fl1->fl_pid == fl2->fl_pid;
706} 706}
707 707
708struct lock_manager_operations nlmsvc_lock_operations = { 708const struct lock_manager_operations nlmsvc_lock_operations = {
709 .fl_compare_owner = nlmsvc_same_owner, 709 .fl_compare_owner = nlmsvc_same_owner,
710 .fl_notify = nlmsvc_notify_blocked, 710 .fl_notify = nlmsvc_notify_blocked,
711 .fl_grant = nlmsvc_grant_deferred, 711 .fl_grant = nlmsvc_grant_deferred,
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 9e4d6aab611..ad478da7ca6 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -417,7 +417,7 @@ EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb);
417static int 417static int
418nlmsvc_match_ip(void *datap, struct nlm_host *host) 418nlmsvc_match_ip(void *datap, struct nlm_host *host)
419{ 419{
420 return nlm_cmp_addr(nlm_srcaddr(host), datap); 420 return rpc_cmp_addr(nlm_srcaddr(host), datap);
421} 421}
422 422
423/** 423/**
diff --git a/fs/locks.c b/fs/locks.c
index 19ee18a6829..a8794f233bc 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -434,7 +434,7 @@ static int lease_mylease_callback(struct file_lock *fl, struct file_lock *try)
434 return fl->fl_file == try->fl_file; 434 return fl->fl_file == try->fl_file;
435} 435}
436 436
437static struct lock_manager_operations lease_manager_ops = { 437static const struct lock_manager_operations lease_manager_ops = {
438 .fl_break = lease_break_callback, 438 .fl_break = lease_break_callback,
439 .fl_release_private = lease_release_private_callback, 439 .fl_release_private = lease_release_private_callback,
440 .fl_mylease = lease_mylease_callback, 440 .fl_mylease = lease_mylease_callback,
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index e5a2dac5f71..76b0aa0f73b 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -222,7 +222,7 @@ static unsigned decode_sessionid(struct xdr_stream *xdr,
222 222
223 p = read_buf(xdr, len); 223 p = read_buf(xdr, len);
224 if (unlikely(p == NULL)) 224 if (unlikely(p == NULL))
225 return htonl(NFS4ERR_RESOURCE);; 225 return htonl(NFS4ERR_RESOURCE);
226 226
227 memcpy(sid->data, p, len); 227 memcpy(sid->data, p, len);
228 return 0; 228 return 0;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 1434080aefe..2ef4fecf398 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -638,7 +638,7 @@ static void nfs4_fl_release_lock(struct file_lock *fl)
638 nfs4_put_lock_state(fl->fl_u.nfs4_fl.owner); 638 nfs4_put_lock_state(fl->fl_u.nfs4_fl.owner);
639} 639}
640 640
641static struct file_lock_operations nfs4_fl_lock_ops = { 641static const struct file_lock_operations nfs4_fl_lock_ops = {
642 .fl_copy_lock = nfs4_fl_copy_lock, 642 .fl_copy_lock = nfs4_fl_copy_lock,
643 .fl_release_private = nfs4_fl_release_lock, 643 .fl_release_private = nfs4_fl_release_lock,
644}; 644};
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index d9462643155..984a5ebcc1d 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1341,6 +1341,8 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp)
1341 if (rv) 1341 if (rv)
1342 goto out; 1342 goto out;
1343 rv = check_nfsd_access(exp, rqstp); 1343 rv = check_nfsd_access(exp, rqstp);
1344 if (rv)
1345 fh_put(fhp);
1344out: 1346out:
1345 exp_put(exp); 1347 exp_put(exp);
1346 return rv; 1348 return rv;
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 01d4ec1c88e..edf926e1062 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -814,17 +814,6 @@ encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name,
814 return p; 814 return p;
815} 815}
816 816
817static __be32 *
818encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p,
819 struct svc_fh *fhp)
820{
821 p = encode_post_op_attr(cd->rqstp, p, fhp);
822 *p++ = xdr_one; /* yes, a file handle follows */
823 p = encode_fh(p, fhp);
824 fh_put(fhp);
825 return p;
826}
827
828static int 817static int
829compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, 818compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
830 const char *name, int namlen) 819 const char *name, int namlen)
@@ -836,29 +825,54 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
836 dparent = cd->fh.fh_dentry; 825 dparent = cd->fh.fh_dentry;
837 exp = cd->fh.fh_export; 826 exp = cd->fh.fh_export;
838 827
839 fh_init(fhp, NFS3_FHSIZE);
840 if (isdotent(name, namlen)) { 828 if (isdotent(name, namlen)) {
841 if (namlen == 2) { 829 if (namlen == 2) {
842 dchild = dget_parent(dparent); 830 dchild = dget_parent(dparent);
843 if (dchild == dparent) { 831 if (dchild == dparent) {
844 /* filesystem root - cannot return filehandle for ".." */ 832 /* filesystem root - cannot return filehandle for ".." */
845 dput(dchild); 833 dput(dchild);
846 return 1; 834 return -ENOENT;
847 } 835 }
848 } else 836 } else
849 dchild = dget(dparent); 837 dchild = dget(dparent);
850 } else 838 } else
851 dchild = lookup_one_len(name, dparent, namlen); 839 dchild = lookup_one_len(name, dparent, namlen);
852 if (IS_ERR(dchild)) 840 if (IS_ERR(dchild))
853 return 1; 841 return -ENOENT;
854 if (d_mountpoint(dchild) || 842 rv = -ENOENT;
855 fh_compose(fhp, exp, dchild, &cd->fh) != 0 || 843 if (d_mountpoint(dchild))
856 !dchild->d_inode) 844 goto out;
857 rv = 1; 845 rv = fh_compose(fhp, exp, dchild, &cd->fh);
846 if (rv)
847 goto out;
848 if (!dchild->d_inode)
849 goto out;
850 rv = 0;
851out:
858 dput(dchild); 852 dput(dchild);
859 return rv; 853 return rv;
860} 854}
861 855
856__be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen)
857{
858 struct svc_fh fh;
859 int err;
860
861 fh_init(&fh, NFS3_FHSIZE);
862 err = compose_entry_fh(cd, &fh, name, namlen);
863 if (err) {
864 *p++ = 0;
865 *p++ = 0;
866 goto out;
867 }
868 p = encode_post_op_attr(cd->rqstp, p, &fh);
869 *p++ = xdr_one; /* yes, a file handle follows */
870 p = encode_fh(p, &fh);
871out:
872 fh_put(&fh);
873 return p;
874}
875
862/* 876/*
863 * Encode a directory entry. This one works for both normal readdir 877 * Encode a directory entry. This one works for both normal readdir
864 * and readdirplus. 878 * and readdirplus.
@@ -929,16 +943,8 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
929 943
930 p = encode_entry_baggage(cd, p, name, namlen, ino); 944 p = encode_entry_baggage(cd, p, name, namlen, ino);
931 945
932 /* throw in readdirplus baggage */ 946 if (plus)
933 if (plus) { 947 p = encode_entryplus_baggage(cd, p, name, namlen);
934 struct svc_fh fh;
935
936 if (compose_entry_fh(cd, &fh, name, namlen) > 0) {
937 *p++ = 0;
938 *p++ = 0;
939 } else
940 p = encode_entryplus_baggage(cd, p, &fh);
941 }
942 num_entry_words = p - cd->buffer; 948 num_entry_words = p - cd->buffer;
943 } else if (cd->rqstp->rq_respages[pn+1] != NULL) { 949 } else if (cd->rqstp->rq_respages[pn+1] != NULL) {
944 /* temporarily encode entry into next page, then move back to 950 /* temporarily encode entry into next page, then move back to
@@ -951,17 +957,8 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
951 957
952 p1 = encode_entry_baggage(cd, p1, name, namlen, ino); 958 p1 = encode_entry_baggage(cd, p1, name, namlen, ino);
953 959
954 /* throw in readdirplus baggage */ 960 if (plus)
955 if (plus) { 961 p = encode_entryplus_baggage(cd, p1, name, namlen);
956 struct svc_fh fh;
957
958 if (compose_entry_fh(cd, &fh, name, namlen) > 0) {
959 /* zero out the filehandle */
960 *p1++ = 0;
961 *p1++ = 0;
962 } else
963 p1 = encode_entryplus_baggage(cd, p1, &fh);
964 }
965 962
966 /* determine entry word length and lengths to go in pages */ 963 /* determine entry word length and lengths to go in pages */
967 num_entry_words = p1 - tmp; 964 num_entry_words = p1 - tmp;
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 54b8b4140c8..725d02f210e 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -321,7 +321,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
321 deny = ~pas.group & pas.other; 321 deny = ~pas.group & pas.other;
322 if (deny) { 322 if (deny) {
323 ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; 323 ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE;
324 ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP; 324 ace->flag = eflag;
325 ace->access_mask = deny_mask_from_posix(deny, flags); 325 ace->access_mask = deny_mask_from_posix(deny, flags);
326 ace->whotype = NFS4_ACL_WHO_GROUP; 326 ace->whotype = NFS4_ACL_WHO_GROUP;
327 ace++; 327 ace++;
@@ -335,7 +335,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
335 if (deny) { 335 if (deny) {
336 ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; 336 ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE;
337 ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP; 337 ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP;
338 ace->access_mask = mask_from_posix(deny, flags); 338 ace->access_mask = deny_mask_from_posix(deny, flags);
339 ace->whotype = NFS4_ACL_WHO_NAMED; 339 ace->whotype = NFS4_ACL_WHO_NAMED;
340 ace->who = pa->e_id; 340 ace->who = pa->e_id;
341 ace++; 341 ace++;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 3fd23f7acec..24e8d78f8dd 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -43,25 +43,30 @@
43#include <linux/sunrpc/xdr.h> 43#include <linux/sunrpc/xdr.h>
44#include <linux/sunrpc/svc.h> 44#include <linux/sunrpc/svc.h>
45#include <linux/sunrpc/clnt.h> 45#include <linux/sunrpc/clnt.h>
46#include <linux/sunrpc/svcsock.h>
46#include <linux/nfsd/nfsd.h> 47#include <linux/nfsd/nfsd.h>
47#include <linux/nfsd/state.h> 48#include <linux/nfsd/state.h>
48#include <linux/sunrpc/sched.h> 49#include <linux/sunrpc/sched.h>
49#include <linux/nfs4.h> 50#include <linux/nfs4.h>
51#include <linux/sunrpc/xprtsock.h>
50 52
51#define NFSDDBG_FACILITY NFSDDBG_PROC 53#define NFSDDBG_FACILITY NFSDDBG_PROC
52 54
53#define NFSPROC4_CB_NULL 0 55#define NFSPROC4_CB_NULL 0
54#define NFSPROC4_CB_COMPOUND 1 56#define NFSPROC4_CB_COMPOUND 1
57#define NFS4_STATEID_SIZE 16
55 58
56/* Index of predefined Linux callback client operations */ 59/* Index of predefined Linux callback client operations */
57 60
58enum { 61enum {
59 NFSPROC4_CLNT_CB_NULL = 0, 62 NFSPROC4_CLNT_CB_NULL = 0,
60 NFSPROC4_CLNT_CB_RECALL, 63 NFSPROC4_CLNT_CB_RECALL,
64 NFSPROC4_CLNT_CB_SEQUENCE,
61}; 65};
62 66
63enum nfs_cb_opnum4 { 67enum nfs_cb_opnum4 {
64 OP_CB_RECALL = 4, 68 OP_CB_RECALL = 4,
69 OP_CB_SEQUENCE = 11,
65}; 70};
66 71
67#define NFS4_MAXTAGLEN 20 72#define NFS4_MAXTAGLEN 20
@@ -70,17 +75,29 @@ enum nfs_cb_opnum4 {
70#define NFS4_dec_cb_null_sz 0 75#define NFS4_dec_cb_null_sz 0
71#define cb_compound_enc_hdr_sz 4 76#define cb_compound_enc_hdr_sz 4
72#define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2)) 77#define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2))
78#define sessionid_sz (NFS4_MAX_SESSIONID_LEN >> 2)
79#define cb_sequence_enc_sz (sessionid_sz + 4 + \
80 1 /* no referring calls list yet */)
81#define cb_sequence_dec_sz (op_dec_sz + sessionid_sz + 4)
82
73#define op_enc_sz 1 83#define op_enc_sz 1
74#define op_dec_sz 2 84#define op_dec_sz 2
75#define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2)) 85#define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2))
76#define enc_stateid_sz (NFS4_STATEID_SIZE >> 2) 86#define enc_stateid_sz (NFS4_STATEID_SIZE >> 2)
77#define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \ 87#define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \
88 cb_sequence_enc_sz + \
78 1 + enc_stateid_sz + \ 89 1 + enc_stateid_sz + \
79 enc_nfs4_fh_sz) 90 enc_nfs4_fh_sz)
80 91
81#define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ 92#define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \
93 cb_sequence_dec_sz + \
82 op_dec_sz) 94 op_dec_sz)
83 95
96struct nfs4_rpc_args {
97 void *args_op;
98 struct nfsd4_cb_sequence args_seq;
99};
100
84/* 101/*
85* Generic encode routines from fs/nfs/nfs4xdr.c 102* Generic encode routines from fs/nfs/nfs4xdr.c
86*/ 103*/
@@ -137,11 +154,13 @@ xdr_error: \
137} while (0) 154} while (0)
138 155
139struct nfs4_cb_compound_hdr { 156struct nfs4_cb_compound_hdr {
140 int status; 157 /* args */
141 u32 ident; 158 u32 ident; /* minorversion 0 only */
142 u32 nops; 159 u32 nops;
143 __be32 *nops_p; 160 __be32 *nops_p;
144 u32 minorversion; 161 u32 minorversion;
162 /* res */
163 int status;
145 u32 taglen; 164 u32 taglen;
146 char *tag; 165 char *tag;
147}; 166};
@@ -238,6 +257,27 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp,
238 hdr->nops++; 257 hdr->nops++;
239} 258}
240 259
260static void
261encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *args,
262 struct nfs4_cb_compound_hdr *hdr)
263{
264 __be32 *p;
265
266 if (hdr->minorversion == 0)
267 return;
268
269 RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20);
270
271 WRITE32(OP_CB_SEQUENCE);
272 WRITEMEM(args->cbs_clp->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN);
273 WRITE32(args->cbs_clp->cl_cb_seq_nr);
274 WRITE32(0); /* slotid, always 0 */
275 WRITE32(0); /* highest slotid always 0 */
276 WRITE32(0); /* cachethis always 0 */
277 WRITE32(0); /* FIXME: support referring_call_lists */
278 hdr->nops++;
279}
280
241static int 281static int
242nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p) 282nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
243{ 283{
@@ -249,15 +289,19 @@ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
249} 289}
250 290
251static int 291static int
252nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, struct nfs4_delegation *args) 292nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p,
293 struct nfs4_rpc_args *rpc_args)
253{ 294{
254 struct xdr_stream xdr; 295 struct xdr_stream xdr;
296 struct nfs4_delegation *args = rpc_args->args_op;
255 struct nfs4_cb_compound_hdr hdr = { 297 struct nfs4_cb_compound_hdr hdr = {
256 .ident = args->dl_ident, 298 .ident = args->dl_ident,
299 .minorversion = rpc_args->args_seq.cbs_minorversion,
257 }; 300 };
258 301
259 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 302 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
260 encode_cb_compound_hdr(&xdr, &hdr); 303 encode_cb_compound_hdr(&xdr, &hdr);
304 encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr);
261 encode_cb_recall(&xdr, args, &hdr); 305 encode_cb_recall(&xdr, args, &hdr);
262 encode_cb_nops(&hdr); 306 encode_cb_nops(&hdr);
263 return 0; 307 return 0;
@@ -299,6 +343,57 @@ decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
299 return 0; 343 return 0;
300} 344}
301 345
346/*
347 * Our current back channel implmentation supports a single backchannel
348 * with a single slot.
349 */
350static int
351decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *res,
352 struct rpc_rqst *rqstp)
353{
354 struct nfs4_sessionid id;
355 int status;
356 u32 dummy;
357 __be32 *p;
358
359 if (res->cbs_minorversion == 0)
360 return 0;
361
362 status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE);
363 if (status)
364 return status;
365
366 /*
367 * If the server returns different values for sessionID, slotID or
368 * sequence number, the server is looney tunes.
369 */
370 status = -ESERVERFAULT;
371
372 READ_BUF(NFS4_MAX_SESSIONID_LEN + 16);
373 memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
374 p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN);
375 if (memcmp(id.data, res->cbs_clp->cl_sessionid.data,
376 NFS4_MAX_SESSIONID_LEN)) {
377 dprintk("%s Invalid session id\n", __func__);
378 goto out;
379 }
380 READ32(dummy);
381 if (dummy != res->cbs_clp->cl_cb_seq_nr) {
382 dprintk("%s Invalid sequence number\n", __func__);
383 goto out;
384 }
385 READ32(dummy); /* slotid must be 0 */
386 if (dummy != 0) {
387 dprintk("%s Invalid slotid\n", __func__);
388 goto out;
389 }
390 /* FIXME: process highest slotid and target highest slotid */
391 status = 0;
392out:
393 return status;
394}
395
396
302static int 397static int
303nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p) 398nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p)
304{ 399{
@@ -306,7 +401,8 @@ nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p)
306} 401}
307 402
308static int 403static int
309nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p) 404nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p,
405 struct nfsd4_cb_sequence *seq)
310{ 406{
311 struct xdr_stream xdr; 407 struct xdr_stream xdr;
312 struct nfs4_cb_compound_hdr hdr; 408 struct nfs4_cb_compound_hdr hdr;
@@ -316,6 +412,11 @@ nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p)
316 status = decode_cb_compound_hdr(&xdr, &hdr); 412 status = decode_cb_compound_hdr(&xdr, &hdr);
317 if (status) 413 if (status)
318 goto out; 414 goto out;
415 if (seq) {
416 status = decode_cb_sequence(&xdr, seq, rqstp);
417 if (status)
418 goto out;
419 }
319 status = decode_cb_op_hdr(&xdr, OP_CB_RECALL); 420 status = decode_cb_op_hdr(&xdr, OP_CB_RECALL);
320out: 421out:
321 return status; 422 return status;
@@ -377,16 +478,15 @@ static int max_cb_time(void)
377 478
378int setup_callback_client(struct nfs4_client *clp) 479int setup_callback_client(struct nfs4_client *clp)
379{ 480{
380 struct sockaddr_in addr;
381 struct nfs4_cb_conn *cb = &clp->cl_cb_conn; 481 struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
382 struct rpc_timeout timeparms = { 482 struct rpc_timeout timeparms = {
383 .to_initval = max_cb_time(), 483 .to_initval = max_cb_time(),
384 .to_retries = 0, 484 .to_retries = 0,
385 }; 485 };
386 struct rpc_create_args args = { 486 struct rpc_create_args args = {
387 .protocol = IPPROTO_TCP, 487 .protocol = XPRT_TRANSPORT_TCP,
388 .address = (struct sockaddr *)&addr, 488 .address = (struct sockaddr *) &cb->cb_addr,
389 .addrsize = sizeof(addr), 489 .addrsize = cb->cb_addrlen,
390 .timeout = &timeparms, 490 .timeout = &timeparms,
391 .program = &cb_program, 491 .program = &cb_program,
392 .prognumber = cb->cb_prog, 492 .prognumber = cb->cb_prog,
@@ -399,13 +499,10 @@ int setup_callback_client(struct nfs4_client *clp)
399 499
400 if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) 500 if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
401 return -EINVAL; 501 return -EINVAL;
402 502 if (cb->cb_minorversion) {
403 /* Initialize address */ 503 args.bc_xprt = clp->cl_cb_xprt;
404 memset(&addr, 0, sizeof(addr)); 504 args.protocol = XPRT_TRANSPORT_BC_TCP;
405 addr.sin_family = AF_INET; 505 }
406 addr.sin_port = htons(cb->cb_port);
407 addr.sin_addr.s_addr = htonl(cb->cb_addr);
408
409 /* Create RPC client */ 506 /* Create RPC client */
410 client = rpc_create(&args); 507 client = rpc_create(&args);
411 if (IS_ERR(client)) { 508 if (IS_ERR(client)) {
@@ -439,42 +536,29 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = {
439 .rpc_call_done = nfsd4_cb_probe_done, 536 .rpc_call_done = nfsd4_cb_probe_done,
440}; 537};
441 538
442static struct rpc_cred *lookup_cb_cred(struct nfs4_cb_conn *cb) 539static struct rpc_cred *callback_cred;
443{
444 struct auth_cred acred = {
445 .machine_cred = 1
446 };
447 540
448 /* 541int set_callback_cred(void)
449 * Note in the gss case this doesn't actually have to wait for a 542{
450 * gss upcall (or any calls to the client); this just creates a 543 callback_cred = rpc_lookup_machine_cred();
451 * non-uptodate cred which the rpc state machine will fill in with 544 if (!callback_cred)
452 * a refresh_upcall later. 545 return -ENOMEM;
453 */ 546 return 0;
454 return rpcauth_lookup_credcache(cb->cb_client->cl_auth, &acred,
455 RPCAUTH_LOOKUP_NEW);
456} 547}
457 548
549
458void do_probe_callback(struct nfs4_client *clp) 550void do_probe_callback(struct nfs4_client *clp)
459{ 551{
460 struct nfs4_cb_conn *cb = &clp->cl_cb_conn; 552 struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
461 struct rpc_message msg = { 553 struct rpc_message msg = {
462 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], 554 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
463 .rpc_argp = clp, 555 .rpc_argp = clp,
556 .rpc_cred = callback_cred
464 }; 557 };
465 struct rpc_cred *cred;
466 int status; 558 int status;
467 559
468 cred = lookup_cb_cred(cb);
469 if (IS_ERR(cred)) {
470 status = PTR_ERR(cred);
471 goto out;
472 }
473 cb->cb_cred = cred;
474 msg.rpc_cred = cb->cb_cred;
475 status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_SOFT, 560 status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_SOFT,
476 &nfsd4_cb_probe_ops, (void *)clp); 561 &nfsd4_cb_probe_ops, (void *)clp);
477out:
478 if (status) { 562 if (status) {
479 warn_no_callback_path(clp, status); 563 warn_no_callback_path(clp, status);
480 put_nfs4_client(clp); 564 put_nfs4_client(clp);
@@ -503,11 +587,95 @@ nfsd4_probe_callback(struct nfs4_client *clp)
503 do_probe_callback(clp); 587 do_probe_callback(clp);
504} 588}
505 589
590/*
591 * There's currently a single callback channel slot.
592 * If the slot is available, then mark it busy. Otherwise, set the
593 * thread for sleeping on the callback RPC wait queue.
594 */
595static int nfsd41_cb_setup_sequence(struct nfs4_client *clp,
596 struct rpc_task *task)
597{
598 struct nfs4_rpc_args *args = task->tk_msg.rpc_argp;
599 u32 *ptr = (u32 *)clp->cl_sessionid.data;
600 int status = 0;
601
602 dprintk("%s: %u:%u:%u:%u\n", __func__,
603 ptr[0], ptr[1], ptr[2], ptr[3]);
604
605 if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
606 rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
607 dprintk("%s slot is busy\n", __func__);
608 status = -EAGAIN;
609 goto out;
610 }
611
612 /*
613 * We'll need the clp during XDR encoding and decoding,
614 * and the sequence during decoding to verify the reply
615 */
616 args->args_seq.cbs_clp = clp;
617 task->tk_msg.rpc_resp = &args->args_seq;
618
619out:
620 dprintk("%s status=%d\n", __func__, status);
621 return status;
622}
623
624/*
625 * TODO: cb_sequence should support referring call lists, cachethis, multiple
626 * slots, and mark callback channel down on communication errors.
627 */
628static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
629{
630 struct nfs4_delegation *dp = calldata;
631 struct nfs4_client *clp = dp->dl_client;
632 struct nfs4_rpc_args *args = task->tk_msg.rpc_argp;
633 u32 minorversion = clp->cl_cb_conn.cb_minorversion;
634 int status = 0;
635
636 args->args_seq.cbs_minorversion = minorversion;
637 if (minorversion) {
638 status = nfsd41_cb_setup_sequence(clp, task);
639 if (status) {
640 if (status != -EAGAIN) {
641 /* terminate rpc task */
642 task->tk_status = status;
643 task->tk_action = NULL;
644 }
645 return;
646 }
647 }
648 rpc_call_start(task);
649}
650
651static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
652{
653 struct nfs4_delegation *dp = calldata;
654 struct nfs4_client *clp = dp->dl_client;
655
656 dprintk("%s: minorversion=%d\n", __func__,
657 clp->cl_cb_conn.cb_minorversion);
658
659 if (clp->cl_cb_conn.cb_minorversion) {
660 /* No need for lock, access serialized in nfsd4_cb_prepare */
661 ++clp->cl_cb_seq_nr;
662 clear_bit(0, &clp->cl_cb_slot_busy);
663 rpc_wake_up_next(&clp->cl_cb_waitq);
664 dprintk("%s: freed slot, new seqid=%d\n", __func__,
665 clp->cl_cb_seq_nr);
666
667 /* We're done looking into the sequence information */
668 task->tk_msg.rpc_resp = NULL;
669 }
670}
671
506static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) 672static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
507{ 673{
508 struct nfs4_delegation *dp = calldata; 674 struct nfs4_delegation *dp = calldata;
509 struct nfs4_client *clp = dp->dl_client; 675 struct nfs4_client *clp = dp->dl_client;
510 676
677 nfsd4_cb_done(task, calldata);
678
511 switch (task->tk_status) { 679 switch (task->tk_status) {
512 case -EIO: 680 case -EIO:
513 /* Network partition? */ 681 /* Network partition? */
@@ -520,16 +688,19 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
520 break; 688 break;
521 default: 689 default:
522 /* success, or error we can't handle */ 690 /* success, or error we can't handle */
523 return; 691 goto done;
524 } 692 }
525 if (dp->dl_retries--) { 693 if (dp->dl_retries--) {
526 rpc_delay(task, 2*HZ); 694 rpc_delay(task, 2*HZ);
527 task->tk_status = 0; 695 task->tk_status = 0;
528 rpc_restart_call(task); 696 rpc_restart_call(task);
697 return;
529 } else { 698 } else {
530 atomic_set(&clp->cl_cb_conn.cb_set, 0); 699 atomic_set(&clp->cl_cb_conn.cb_set, 0);
531 warn_no_callback_path(clp, task->tk_status); 700 warn_no_callback_path(clp, task->tk_status);
532 } 701 }
702done:
703 kfree(task->tk_msg.rpc_argp);
533} 704}
534 705
535static void nfsd4_cb_recall_release(void *calldata) 706static void nfsd4_cb_recall_release(void *calldata)
@@ -542,6 +713,7 @@ static void nfsd4_cb_recall_release(void *calldata)
542} 713}
543 714
544static const struct rpc_call_ops nfsd4_cb_recall_ops = { 715static const struct rpc_call_ops nfsd4_cb_recall_ops = {
716 .rpc_call_prepare = nfsd4_cb_prepare,
545 .rpc_call_done = nfsd4_cb_recall_done, 717 .rpc_call_done = nfsd4_cb_recall_done,
546 .rpc_release = nfsd4_cb_recall_release, 718 .rpc_release = nfsd4_cb_recall_release,
547}; 719};
@@ -554,17 +726,24 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
554{ 726{
555 struct nfs4_client *clp = dp->dl_client; 727 struct nfs4_client *clp = dp->dl_client;
556 struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client; 728 struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
729 struct nfs4_rpc_args *args;
557 struct rpc_message msg = { 730 struct rpc_message msg = {
558 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], 731 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
559 .rpc_argp = dp, 732 .rpc_cred = callback_cred
560 .rpc_cred = clp->cl_cb_conn.cb_cred
561 }; 733 };
562 int status; 734 int status = -ENOMEM;
563 735
736 args = kzalloc(sizeof(*args), GFP_KERNEL);
737 if (!args)
738 goto out;
739 args->args_op = dp;
740 msg.rpc_argp = args;
564 dp->dl_retries = 1; 741 dp->dl_retries = 1;
565 status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT, 742 status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
566 &nfsd4_cb_recall_ops, dp); 743 &nfsd4_cb_recall_ops, dp);
744out:
567 if (status) { 745 if (status) {
746 kfree(args);
568 put_nfs4_client(clp); 747 put_nfs4_client(clp);
569 nfs4_put_delegation(dp); 748 nfs4_put_delegation(dp);
570 } 749 }
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 7c8801769a3..bebc0c2e1b0 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -68,7 +68,6 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
68 u32 *bmval, u32 *writable) 68 u32 *bmval, u32 *writable)
69{ 69{
70 struct dentry *dentry = cstate->current_fh.fh_dentry; 70 struct dentry *dentry = cstate->current_fh.fh_dentry;
71 struct svc_export *exp = cstate->current_fh.fh_export;
72 71
73 /* 72 /*
74 * Check about attributes are supported by the NFSv4 server or not. 73 * Check about attributes are supported by the NFSv4 server or not.
@@ -80,17 +79,13 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
80 return nfserr_attrnotsupp; 79 return nfserr_attrnotsupp;
81 80
82 /* 81 /*
83 * Check FATTR4_WORD0_ACL & FATTR4_WORD0_FS_LOCATIONS can be supported 82 * Check FATTR4_WORD0_ACL can be supported
84 * in current environment or not. 83 * in current environment or not.
85 */ 84 */
86 if (bmval[0] & FATTR4_WORD0_ACL) { 85 if (bmval[0] & FATTR4_WORD0_ACL) {
87 if (!IS_POSIXACL(dentry->d_inode)) 86 if (!IS_POSIXACL(dentry->d_inode))
88 return nfserr_attrnotsupp; 87 return nfserr_attrnotsupp;
89 } 88 }
90 if (bmval[0] & FATTR4_WORD0_FS_LOCATIONS) {
91 if (exp->ex_fslocs.locations == NULL)
92 return nfserr_attrnotsupp;
93 }
94 89
95 /* 90 /*
96 * According to spec, read-only attributes return ERR_INVAL. 91 * According to spec, read-only attributes return ERR_INVAL.
@@ -123,6 +118,35 @@ nfsd4_check_open_attributes(struct svc_rqst *rqstp,
123 return status; 118 return status;
124} 119}
125 120
121static int
122is_create_with_attrs(struct nfsd4_open *open)
123{
124 return open->op_create == NFS4_OPEN_CREATE
125 && (open->op_createmode == NFS4_CREATE_UNCHECKED
126 || open->op_createmode == NFS4_CREATE_GUARDED
127 || open->op_createmode == NFS4_CREATE_EXCLUSIVE4_1);
128}
129
130/*
131 * if error occurs when setting the acl, just clear the acl bit
132 * in the returned attr bitmap.
133 */
134static void
135do_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
136 struct nfs4_acl *acl, u32 *bmval)
137{
138 __be32 status;
139
140 status = nfsd4_set_nfs4_acl(rqstp, fhp, acl);
141 if (status)
142 /*
143 * We should probably fail the whole open at this point,
144 * but we've already created the file, so it's too late;
145 * So this seems the least of evils:
146 */
147 bmval[0] &= ~FATTR4_WORD0_ACL;
148}
149
126static inline void 150static inline void
127fh_dup2(struct svc_fh *dst, struct svc_fh *src) 151fh_dup2(struct svc_fh *dst, struct svc_fh *src)
128{ 152{
@@ -206,6 +230,9 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
206 if (status) 230 if (status)
207 goto out; 231 goto out;
208 232
233 if (is_create_with_attrs(open) && open->op_acl != NULL)
234 do_set_nfs4_acl(rqstp, &resfh, open->op_acl, open->op_bmval);
235
209 set_change_info(&open->op_cinfo, current_fh); 236 set_change_info(&open->op_cinfo, current_fh);
210 fh_dup2(current_fh, &resfh); 237 fh_dup2(current_fh, &resfh);
211 238
@@ -536,12 +563,17 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
536 status = nfserr_badtype; 563 status = nfserr_badtype;
537 } 564 }
538 565
539 if (!status) { 566 if (status)
540 fh_unlock(&cstate->current_fh); 567 goto out;
541 set_change_info(&create->cr_cinfo, &cstate->current_fh); 568
542 fh_dup2(&cstate->current_fh, &resfh); 569 if (create->cr_acl != NULL)
543 } 570 do_set_nfs4_acl(rqstp, &resfh, create->cr_acl,
571 create->cr_bmval);
544 572
573 fh_unlock(&cstate->current_fh);
574 set_change_info(&create->cr_cinfo, &cstate->current_fh);
575 fh_dup2(&cstate->current_fh, &resfh);
576out:
545 fh_put(&resfh); 577 fh_put(&resfh);
546 return status; 578 return status;
547} 579}
@@ -947,34 +979,6 @@ static struct nfsd4_operation nfsd4_ops[];
947static const char *nfsd4_op_name(unsigned opnum); 979static const char *nfsd4_op_name(unsigned opnum);
948 980
949/* 981/*
950 * This is a replay of a compound for which no cache entry pages
951 * were used. Encode the sequence operation, and if cachethis is FALSE
952 * encode the uncache rep error on the next operation.
953 */
954static __be32
955nfsd4_enc_uncached_replay(struct nfsd4_compoundargs *args,
956 struct nfsd4_compoundres *resp)
957{
958 struct nfsd4_op *op;
959
960 dprintk("--> %s resp->opcnt %d ce_cachethis %u \n", __func__,
961 resp->opcnt, resp->cstate.slot->sl_cache_entry.ce_cachethis);
962
963 /* Encode the replayed sequence operation */
964 BUG_ON(resp->opcnt != 1);
965 op = &args->ops[resp->opcnt - 1];
966 nfsd4_encode_operation(resp, op);
967
968 /*return nfserr_retry_uncached_rep in next operation. */
969 if (resp->cstate.slot->sl_cache_entry.ce_cachethis == 0) {
970 op = &args->ops[resp->opcnt++];
971 op->status = nfserr_retry_uncached_rep;
972 nfsd4_encode_operation(resp, op);
973 }
974 return op->status;
975}
976
977/*
978 * Enforce NFSv4.1 COMPOUND ordering rules. 982 * Enforce NFSv4.1 COMPOUND ordering rules.
979 * 983 *
980 * TODO: 984 * TODO:
@@ -1083,13 +1087,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
1083 BUG_ON(op->status == nfs_ok); 1087 BUG_ON(op->status == nfs_ok);
1084 1088
1085encode_op: 1089encode_op:
1086 /* Only from SEQUENCE or CREATE_SESSION */ 1090 /* Only from SEQUENCE */
1087 if (resp->cstate.status == nfserr_replay_cache) { 1091 if (resp->cstate.status == nfserr_replay_cache) {
1088 dprintk("%s NFS4.1 replay from cache\n", __func__); 1092 dprintk("%s NFS4.1 replay from cache\n", __func__);
1089 if (nfsd4_not_cached(resp)) 1093 status = op->status;
1090 status = nfsd4_enc_uncached_replay(args, resp);
1091 else
1092 status = op->status;
1093 goto out; 1094 goto out;
1094 } 1095 }
1095 if (op->status == nfserr_replay_me) { 1096 if (op->status == nfserr_replay_me) {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 980a216a48c..2153f9bdbeb 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -55,6 +55,7 @@
55#include <linux/lockd/bind.h> 55#include <linux/lockd/bind.h>
56#include <linux/module.h> 56#include <linux/module.h>
57#include <linux/sunrpc/svcauth_gss.h> 57#include <linux/sunrpc/svcauth_gss.h>
58#include <linux/sunrpc/clnt.h>
58 59
59#define NFSDDBG_FACILITY NFSDDBG_PROC 60#define NFSDDBG_FACILITY NFSDDBG_PROC
60 61
@@ -413,36 +414,65 @@ gen_sessionid(struct nfsd4_session *ses)
413} 414}
414 415
415/* 416/*
416 * Give the client the number of slots it requests bound by 417 * The protocol defines ca_maxresponssize_cached to include the size of
417 * NFSD_MAX_SLOTS_PER_SESSION and by sv_drc_max_pages. 418 * the rpc header, but all we need to cache is the data starting after
419 * the end of the initial SEQUENCE operation--the rest we regenerate
420 * each time. Therefore we can advertise a ca_maxresponssize_cached
421 * value that is the number of bytes in our cache plus a few additional
422 * bytes. In order to stay on the safe side, and not promise more than
423 * we can cache, those additional bytes must be the minimum possible: 24
424 * bytes of rpc header (xid through accept state, with AUTH_NULL
425 * verifier), 12 for the compound header (with zero-length tag), and 44
426 * for the SEQUENCE op response:
427 */
428#define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44)
429
430/*
431 * Give the client the number of ca_maxresponsesize_cached slots it
432 * requests, of size bounded by NFSD_SLOT_CACHE_SIZE,
433 * NFSD_MAX_MEM_PER_SESSION, and nfsd_drc_max_mem. Do not allow more
434 * than NFSD_MAX_SLOTS_PER_SESSION.
418 * 435 *
419 * If we run out of pages (sv_drc_pages_used == sv_drc_max_pages) we 436 * If we run out of reserved DRC memory we should (up to a point)
420 * should (up to a point) re-negotiate active sessions and reduce their 437 * re-negotiate active sessions and reduce their slot usage to make
421 * slot usage to make rooom for new connections. For now we just fail the 438 * rooom for new connections. For now we just fail the create session.
422 * create session.
423 */ 439 */
424static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan) 440static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan)
425{ 441{
426 int status = 0, np = fchan->maxreqs * NFSD_PAGES_PER_SLOT; 442 int mem, size = fchan->maxresp_cached;
427 443
428 if (fchan->maxreqs < 1) 444 if (fchan->maxreqs < 1)
429 return nfserr_inval; 445 return nfserr_inval;
430 else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
431 fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
432 446
433 spin_lock(&nfsd_serv->sv_lock); 447 if (size < NFSD_MIN_HDR_SEQ_SZ)
434 if (np + nfsd_serv->sv_drc_pages_used > nfsd_serv->sv_drc_max_pages) 448 size = NFSD_MIN_HDR_SEQ_SZ;
435 np = nfsd_serv->sv_drc_max_pages - nfsd_serv->sv_drc_pages_used; 449 size -= NFSD_MIN_HDR_SEQ_SZ;
436 nfsd_serv->sv_drc_pages_used += np; 450 if (size > NFSD_SLOT_CACHE_SIZE)
437 spin_unlock(&nfsd_serv->sv_lock); 451 size = NFSD_SLOT_CACHE_SIZE;
452
453 /* bound the maxreqs by NFSD_MAX_MEM_PER_SESSION */
454 mem = fchan->maxreqs * size;
455 if (mem > NFSD_MAX_MEM_PER_SESSION) {
456 fchan->maxreqs = NFSD_MAX_MEM_PER_SESSION / size;
457 if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
458 fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
459 mem = fchan->maxreqs * size;
460 }
438 461
439 if (np <= 0) { 462 spin_lock(&nfsd_drc_lock);
440 status = nfserr_resource; 463 /* bound the total session drc memory ussage */
441 fchan->maxreqs = 0; 464 if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem) {
442 } else 465 fchan->maxreqs = (nfsd_drc_max_mem - nfsd_drc_mem_used) / size;
443 fchan->maxreqs = np / NFSD_PAGES_PER_SLOT; 466 mem = fchan->maxreqs * size;
467 }
468 nfsd_drc_mem_used += mem;
469 spin_unlock(&nfsd_drc_lock);
444 470
445 return status; 471 if (fchan->maxreqs == 0)
472 return nfserr_serverfault;
473
474 fchan->maxresp_cached = size + NFSD_MIN_HDR_SEQ_SZ;
475 return 0;
446} 476}
447 477
448/* 478/*
@@ -466,36 +496,41 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp,
466 fchan->maxresp_sz = maxcount; 496 fchan->maxresp_sz = maxcount;
467 session_fchan->maxresp_sz = fchan->maxresp_sz; 497 session_fchan->maxresp_sz = fchan->maxresp_sz;
468 498
469 /* Set the max response cached size our default which is
470 * a multiple of PAGE_SIZE and small */
471 session_fchan->maxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE;
472 fchan->maxresp_cached = session_fchan->maxresp_cached;
473
474 /* Use the client's maxops if possible */ 499 /* Use the client's maxops if possible */
475 if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND) 500 if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND)
476 fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND; 501 fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND;
477 session_fchan->maxops = fchan->maxops; 502 session_fchan->maxops = fchan->maxops;
478 503
479 /* try to use the client requested number of slots */
480 if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
481 fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
482
483 /* FIXME: Error means no more DRC pages so the server should 504 /* FIXME: Error means no more DRC pages so the server should
484 * recover pages from existing sessions. For now fail session 505 * recover pages from existing sessions. For now fail session
485 * creation. 506 * creation.
486 */ 507 */
487 status = set_forechannel_maxreqs(fchan); 508 status = set_forechannel_drc_size(fchan);
488 509
510 session_fchan->maxresp_cached = fchan->maxresp_cached;
489 session_fchan->maxreqs = fchan->maxreqs; 511 session_fchan->maxreqs = fchan->maxreqs;
512
513 dprintk("%s status %d\n", __func__, status);
490 return status; 514 return status;
491} 515}
492 516
517static void
518free_session_slots(struct nfsd4_session *ses)
519{
520 int i;
521
522 for (i = 0; i < ses->se_fchannel.maxreqs; i++)
523 kfree(ses->se_slots[i]);
524}
525
493static int 526static int
494alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, 527alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
495 struct nfsd4_create_session *cses) 528 struct nfsd4_create_session *cses)
496{ 529{
497 struct nfsd4_session *new, tmp; 530 struct nfsd4_session *new, tmp;
498 int idx, status = nfserr_resource, slotsize; 531 struct nfsd4_slot *sp;
532 int idx, slotsize, cachesize, i;
533 int status;
499 534
500 memset(&tmp, 0, sizeof(tmp)); 535 memset(&tmp, 0, sizeof(tmp));
501 536
@@ -506,14 +541,27 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
506 if (status) 541 if (status)
507 goto out; 542 goto out;
508 543
509 /* allocate struct nfsd4_session and slot table in one piece */ 544 BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot)
510 slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot); 545 + sizeof(struct nfsd4_session) > PAGE_SIZE);
546
547 status = nfserr_serverfault;
548 /* allocate struct nfsd4_session and slot table pointers in one piece */
549 slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot *);
511 new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL); 550 new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL);
512 if (!new) 551 if (!new)
513 goto out; 552 goto out;
514 553
515 memcpy(new, &tmp, sizeof(*new)); 554 memcpy(new, &tmp, sizeof(*new));
516 555
556 /* allocate each struct nfsd4_slot and data cache in one piece */
557 cachesize = new->se_fchannel.maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
558 for (i = 0; i < new->se_fchannel.maxreqs; i++) {
559 sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL);
560 if (!sp)
561 goto out_free;
562 new->se_slots[i] = sp;
563 }
564
517 new->se_client = clp; 565 new->se_client = clp;
518 gen_sessionid(new); 566 gen_sessionid(new);
519 idx = hash_sessionid(&new->se_sessionid); 567 idx = hash_sessionid(&new->se_sessionid);
@@ -530,6 +578,10 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
530 status = nfs_ok; 578 status = nfs_ok;
531out: 579out:
532 return status; 580 return status;
581out_free:
582 free_session_slots(new);
583 kfree(new);
584 goto out;
533} 585}
534 586
535/* caller must hold sessionid_lock */ 587/* caller must hold sessionid_lock */
@@ -572,19 +624,16 @@ release_session(struct nfsd4_session *ses)
572 nfsd4_put_session(ses); 624 nfsd4_put_session(ses);
573} 625}
574 626
575static void nfsd4_release_respages(struct page **respages, short resused);
576
577void 627void
578free_session(struct kref *kref) 628free_session(struct kref *kref)
579{ 629{
580 struct nfsd4_session *ses; 630 struct nfsd4_session *ses;
581 int i;
582 631
583 ses = container_of(kref, struct nfsd4_session, se_ref); 632 ses = container_of(kref, struct nfsd4_session, se_ref);
584 for (i = 0; i < ses->se_fchannel.maxreqs; i++) { 633 spin_lock(&nfsd_drc_lock);
585 struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry; 634 nfsd_drc_mem_used -= ses->se_fchannel.maxreqs * NFSD_SLOT_CACHE_SIZE;
586 nfsd4_release_respages(e->ce_respages, e->ce_resused); 635 spin_unlock(&nfsd_drc_lock);
587 } 636 free_session_slots(ses);
588 kfree(ses); 637 kfree(ses);
589} 638}
590 639
@@ -647,18 +696,14 @@ shutdown_callback_client(struct nfs4_client *clp)
647 clp->cl_cb_conn.cb_client = NULL; 696 clp->cl_cb_conn.cb_client = NULL;
648 rpc_shutdown_client(clnt); 697 rpc_shutdown_client(clnt);
649 } 698 }
650 if (clp->cl_cb_conn.cb_cred) {
651 put_rpccred(clp->cl_cb_conn.cb_cred);
652 clp->cl_cb_conn.cb_cred = NULL;
653 }
654} 699}
655 700
656static inline void 701static inline void
657free_client(struct nfs4_client *clp) 702free_client(struct nfs4_client *clp)
658{ 703{
659 shutdown_callback_client(clp); 704 shutdown_callback_client(clp);
660 nfsd4_release_respages(clp->cl_slot.sl_cache_entry.ce_respages, 705 if (clp->cl_cb_xprt)
661 clp->cl_slot.sl_cache_entry.ce_resused); 706 svc_xprt_put(clp->cl_cb_xprt);
662 if (clp->cl_cred.cr_group_info) 707 if (clp->cl_cred.cr_group_info)
663 put_group_info(clp->cl_cred.cr_group_info); 708 put_group_info(clp->cl_cred.cr_group_info);
664 kfree(clp->cl_principal); 709 kfree(clp->cl_principal);
@@ -714,25 +759,6 @@ expire_client(struct nfs4_client *clp)
714 put_nfs4_client(clp); 759 put_nfs4_client(clp);
715} 760}
716 761
717static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir)
718{
719 struct nfs4_client *clp;
720
721 clp = alloc_client(name);
722 if (clp == NULL)
723 return NULL;
724 memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
725 atomic_set(&clp->cl_count, 1);
726 atomic_set(&clp->cl_cb_conn.cb_set, 0);
727 INIT_LIST_HEAD(&clp->cl_idhash);
728 INIT_LIST_HEAD(&clp->cl_strhash);
729 INIT_LIST_HEAD(&clp->cl_openowners);
730 INIT_LIST_HEAD(&clp->cl_delegations);
731 INIT_LIST_HEAD(&clp->cl_sessions);
732 INIT_LIST_HEAD(&clp->cl_lru);
733 return clp;
734}
735
736static void copy_verf(struct nfs4_client *target, nfs4_verifier *source) 762static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
737{ 763{
738 memcpy(target->cl_verifier.data, source->data, 764 memcpy(target->cl_verifier.data, source->data,
@@ -795,6 +821,46 @@ static void gen_confirm(struct nfs4_client *clp)
795 *p++ = i++; 821 *p++ = i++;
796} 822}
797 823
824static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
825 struct svc_rqst *rqstp, nfs4_verifier *verf)
826{
827 struct nfs4_client *clp;
828 struct sockaddr *sa = svc_addr(rqstp);
829 char *princ;
830
831 clp = alloc_client(name);
832 if (clp == NULL)
833 return NULL;
834
835 princ = svc_gss_principal(rqstp);
836 if (princ) {
837 clp->cl_principal = kstrdup(princ, GFP_KERNEL);
838 if (clp->cl_principal == NULL) {
839 free_client(clp);
840 return NULL;
841 }
842 }
843
844 memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
845 atomic_set(&clp->cl_count, 1);
846 atomic_set(&clp->cl_cb_conn.cb_set, 0);
847 INIT_LIST_HEAD(&clp->cl_idhash);
848 INIT_LIST_HEAD(&clp->cl_strhash);
849 INIT_LIST_HEAD(&clp->cl_openowners);
850 INIT_LIST_HEAD(&clp->cl_delegations);
851 INIT_LIST_HEAD(&clp->cl_sessions);
852 INIT_LIST_HEAD(&clp->cl_lru);
853 clear_bit(0, &clp->cl_cb_slot_busy);
854 rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
855 copy_verf(clp, verf);
856 rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa);
857 clp->cl_flavor = rqstp->rq_flavor;
858 copy_cred(&clp->cl_cred, &rqstp->rq_cred);
859 gen_confirm(clp);
860
861 return clp;
862}
863
798static int check_name(struct xdr_netobj name) 864static int check_name(struct xdr_netobj name)
799{ 865{
800 if (name.len == 0) 866 if (name.len == 0)
@@ -902,93 +968,40 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval,
902 return NULL; 968 return NULL;
903} 969}
904 970
905/* a helper function for parse_callback */
906static int
907parse_octet(unsigned int *lenp, char **addrp)
908{
909 unsigned int len = *lenp;
910 char *p = *addrp;
911 int n = -1;
912 char c;
913
914 for (;;) {
915 if (!len)
916 break;
917 len--;
918 c = *p++;
919 if (c == '.')
920 break;
921 if ((c < '0') || (c > '9')) {
922 n = -1;
923 break;
924 }
925 if (n < 0)
926 n = 0;
927 n = (n * 10) + (c - '0');
928 if (n > 255) {
929 n = -1;
930 break;
931 }
932 }
933 *lenp = len;
934 *addrp = p;
935 return n;
936}
937
938/* parse and set the setclientid ipv4 callback address */
939static int
940parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp)
941{
942 int temp = 0;
943 u32 cbaddr = 0;
944 u16 cbport = 0;
945 u32 addrlen = addr_len;
946 char *addr = addr_val;
947 int i, shift;
948
949 /* ipaddress */
950 shift = 24;
951 for(i = 4; i > 0 ; i--) {
952 if ((temp = parse_octet(&addrlen, &addr)) < 0) {
953 return 0;
954 }
955 cbaddr |= (temp << shift);
956 if (shift > 0)
957 shift -= 8;
958 }
959 *cbaddrp = cbaddr;
960
961 /* port */
962 shift = 8;
963 for(i = 2; i > 0 ; i--) {
964 if ((temp = parse_octet(&addrlen, &addr)) < 0) {
965 return 0;
966 }
967 cbport |= (temp << shift);
968 if (shift > 0)
969 shift -= 8;
970 }
971 *cbportp = cbport;
972 return 1;
973}
974
975static void 971static void
976gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) 972gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid)
977{ 973{
978 struct nfs4_cb_conn *cb = &clp->cl_cb_conn; 974 struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
979 975 unsigned short expected_family;
980 /* Currently, we only support tcp for the callback channel */ 976
981 if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3)) 977 /* Currently, we only support tcp and tcp6 for the callback channel */
978 if (se->se_callback_netid_len == 3 &&
979 !memcmp(se->se_callback_netid_val, "tcp", 3))
980 expected_family = AF_INET;
981 else if (se->se_callback_netid_len == 4 &&
982 !memcmp(se->se_callback_netid_val, "tcp6", 4))
983 expected_family = AF_INET6;
984 else
982 goto out_err; 985 goto out_err;
983 986
984 if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val, 987 cb->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val,
985 &cb->cb_addr, &cb->cb_port))) 988 se->se_callback_addr_len,
989 (struct sockaddr *) &cb->cb_addr,
990 sizeof(cb->cb_addr));
991
992 if (!cb->cb_addrlen || cb->cb_addr.ss_family != expected_family)
986 goto out_err; 993 goto out_err;
994
995 if (cb->cb_addr.ss_family == AF_INET6)
996 ((struct sockaddr_in6 *) &cb->cb_addr)->sin6_scope_id = scopeid;
997
987 cb->cb_minorversion = 0; 998 cb->cb_minorversion = 0;
988 cb->cb_prog = se->se_callback_prog; 999 cb->cb_prog = se->se_callback_prog;
989 cb->cb_ident = se->se_callback_ident; 1000 cb->cb_ident = se->se_callback_ident;
990 return; 1001 return;
991out_err: 1002out_err:
1003 cb->cb_addr.ss_family = AF_UNSPEC;
1004 cb->cb_addrlen = 0;
992 dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " 1005 dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
993 "will not receive delegations\n", 1006 "will not receive delegations\n",
994 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); 1007 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
@@ -996,175 +1009,87 @@ out_err:
996 return; 1009 return;
997} 1010}
998 1011
999void
1000nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp)
1001{
1002 struct nfsd4_compoundres *resp = rqstp->rq_resp;
1003
1004 resp->cstate.statp = statp;
1005}
1006
1007/* 1012/*
1008 * Dereference the result pages. 1013 * Cache a reply. nfsd4_check_drc_limit() has bounded the cache size.
1009 */ 1014 */
1010static void 1015void
1011nfsd4_release_respages(struct page **respages, short resused) 1016nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
1012{ 1017{
1013 int i; 1018 struct nfsd4_slot *slot = resp->cstate.slot;
1019 unsigned int base;
1014 1020
1015 dprintk("--> %s\n", __func__); 1021 dprintk("--> %s slot %p\n", __func__, slot);
1016 for (i = 0; i < resused; i++) {
1017 if (!respages[i])
1018 continue;
1019 put_page(respages[i]);
1020 respages[i] = NULL;
1021 }
1022}
1023 1022
1024static void 1023 slot->sl_opcnt = resp->opcnt;
1025nfsd4_copy_pages(struct page **topages, struct page **frompages, short count) 1024 slot->sl_status = resp->cstate.status;
1026{
1027 int i;
1028 1025
1029 for (i = 0; i < count; i++) { 1026 if (nfsd4_not_cached(resp)) {
1030 topages[i] = frompages[i]; 1027 slot->sl_datalen = 0;
1031 if (!topages[i]) 1028 return;
1032 continue;
1033 get_page(topages[i]);
1034 } 1029 }
1030 slot->sl_datalen = (char *)resp->p - (char *)resp->cstate.datap;
1031 base = (char *)resp->cstate.datap -
1032 (char *)resp->xbuf->head[0].iov_base;
1033 if (read_bytes_from_xdr_buf(resp->xbuf, base, slot->sl_data,
1034 slot->sl_datalen))
1035 WARN("%s: sessions DRC could not cache compound\n", __func__);
1036 return;
1035} 1037}
1036 1038
1037/* 1039/*
1038 * Cache the reply pages up to NFSD_PAGES_PER_SLOT + 1, clearing the previous 1040 * Encode the replay sequence operation from the slot values.
1039 * pages. We add a page to NFSD_PAGES_PER_SLOT for the case where the total 1041 * If cachethis is FALSE encode the uncached rep error on the next
1040 * length of the XDR response is less than se_fmaxresp_cached 1042 * operation which sets resp->p and increments resp->opcnt for
1041 * (NFSD_PAGES_PER_SLOT * PAGE_SIZE) but the xdr_buf pages is used for a 1043 * nfs4svc_encode_compoundres.
1042 * of the reply (e.g. readdir).
1043 * 1044 *
1044 * Store the base and length of the rq_req.head[0] page
1045 * of the NFSv4.1 data, just past the rpc header.
1046 */ 1045 */
1047void 1046static __be32
1048nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) 1047nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
1048 struct nfsd4_compoundres *resp)
1049{ 1049{
1050 struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry; 1050 struct nfsd4_op *op;
1051 struct svc_rqst *rqstp = resp->rqstp; 1051 struct nfsd4_slot *slot = resp->cstate.slot;
1052 struct nfsd4_compoundargs *args = rqstp->rq_argp;
1053 struct nfsd4_op *op = &args->ops[resp->opcnt];
1054 struct kvec *resv = &rqstp->rq_res.head[0];
1055
1056 dprintk("--> %s entry %p\n", __func__, entry);
1057
1058 /* Don't cache a failed OP_SEQUENCE. */
1059 if (resp->opcnt == 1 && op->opnum == OP_SEQUENCE && resp->cstate.status)
1060 return;
1061 1052
1062 nfsd4_release_respages(entry->ce_respages, entry->ce_resused); 1053 dprintk("--> %s resp->opcnt %d cachethis %u \n", __func__,
1063 entry->ce_opcnt = resp->opcnt; 1054 resp->opcnt, resp->cstate.slot->sl_cachethis);
1064 entry->ce_status = resp->cstate.status;
1065 1055
1066 /* 1056 /* Encode the replayed sequence operation */
1067 * Don't need a page to cache just the sequence operation - the slot 1057 op = &args->ops[resp->opcnt - 1];
1068 * does this for us! 1058 nfsd4_encode_operation(resp, op);
1069 */
1070 1059
1071 if (nfsd4_not_cached(resp)) { 1060 /* Return nfserr_retry_uncached_rep in next operation. */
1072 entry->ce_resused = 0; 1061 if (args->opcnt > 1 && slot->sl_cachethis == 0) {
1073 entry->ce_rpchdrlen = 0; 1062 op = &args->ops[resp->opcnt++];
1074 dprintk("%s Just cache SEQUENCE. ce_cachethis %d\n", __func__, 1063 op->status = nfserr_retry_uncached_rep;
1075 resp->cstate.slot->sl_cache_entry.ce_cachethis); 1064 nfsd4_encode_operation(resp, op);
1076 return;
1077 }
1078 entry->ce_resused = rqstp->rq_resused;
1079 if (entry->ce_resused > NFSD_PAGES_PER_SLOT + 1)
1080 entry->ce_resused = NFSD_PAGES_PER_SLOT + 1;
1081 nfsd4_copy_pages(entry->ce_respages, rqstp->rq_respages,
1082 entry->ce_resused);
1083 entry->ce_datav.iov_base = resp->cstate.statp;
1084 entry->ce_datav.iov_len = resv->iov_len - ((char *)resp->cstate.statp -
1085 (char *)page_address(rqstp->rq_respages[0]));
1086 /* Current request rpc header length*/
1087 entry->ce_rpchdrlen = (char *)resp->cstate.statp -
1088 (char *)page_address(rqstp->rq_respages[0]);
1089}
1090
1091/*
1092 * We keep the rpc header, but take the nfs reply from the replycache.
1093 */
1094static int
1095nfsd41_copy_replay_data(struct nfsd4_compoundres *resp,
1096 struct nfsd4_cache_entry *entry)
1097{
1098 struct svc_rqst *rqstp = resp->rqstp;
1099 struct kvec *resv = &resp->rqstp->rq_res.head[0];
1100 int len;
1101
1102 /* Current request rpc header length*/
1103 len = (char *)resp->cstate.statp -
1104 (char *)page_address(rqstp->rq_respages[0]);
1105 if (entry->ce_datav.iov_len + len > PAGE_SIZE) {
1106 dprintk("%s v41 cached reply too large (%Zd).\n", __func__,
1107 entry->ce_datav.iov_len);
1108 return 0;
1109 } 1065 }
1110 /* copy the cached reply nfsd data past the current rpc header */ 1066 return op->status;
1111 memcpy((char *)resv->iov_base + len, entry->ce_datav.iov_base,
1112 entry->ce_datav.iov_len);
1113 resv->iov_len = len + entry->ce_datav.iov_len;
1114 return 1;
1115} 1067}
1116 1068
1117/* 1069/*
1118 * Keep the first page of the replay. Copy the NFSv4.1 data from the first 1070 * The sequence operation is not cached because we can use the slot and
1119 * cached page. Replace any futher replay pages from the cache. 1071 * session values.
1120 */ 1072 */
1121__be32 1073__be32
1122nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, 1074nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
1123 struct nfsd4_sequence *seq) 1075 struct nfsd4_sequence *seq)
1124{ 1076{
1125 struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry; 1077 struct nfsd4_slot *slot = resp->cstate.slot;
1126 __be32 status; 1078 __be32 status;
1127 1079
1128 dprintk("--> %s entry %p\n", __func__, entry); 1080 dprintk("--> %s slot %p\n", __func__, slot);
1129
1130 /*
1131 * If this is just the sequence operation, we did not keep
1132 * a page in the cache entry because we can just use the
1133 * slot info stored in struct nfsd4_sequence that was checked
1134 * against the slot in nfsd4_sequence().
1135 *
1136 * This occurs when seq->cachethis is FALSE, or when the client
1137 * session inactivity timer fires and a solo sequence operation
1138 * is sent (lease renewal).
1139 */
1140 if (seq && nfsd4_not_cached(resp)) {
1141 seq->maxslots = resp->cstate.session->se_fchannel.maxreqs;
1142 return nfs_ok;
1143 }
1144
1145 if (!nfsd41_copy_replay_data(resp, entry)) {
1146 /*
1147 * Not enough room to use the replay rpc header, send the
1148 * cached header. Release all the allocated result pages.
1149 */
1150 svc_free_res_pages(resp->rqstp);
1151 nfsd4_copy_pages(resp->rqstp->rq_respages, entry->ce_respages,
1152 entry->ce_resused);
1153 } else {
1154 /* Release all but the first allocated result page */
1155 1081
1156 resp->rqstp->rq_resused--; 1082 /* Either returns 0 or nfserr_retry_uncached */
1157 svc_free_res_pages(resp->rqstp); 1083 status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp);
1084 if (status == nfserr_retry_uncached_rep)
1085 return status;
1158 1086
1159 nfsd4_copy_pages(&resp->rqstp->rq_respages[1], 1087 /* The sequence operation has been encoded, cstate->datap set. */
1160 &entry->ce_respages[1], 1088 memcpy(resp->cstate.datap, slot->sl_data, slot->sl_datalen);
1161 entry->ce_resused - 1);
1162 }
1163 1089
1164 resp->rqstp->rq_resused = entry->ce_resused; 1090 resp->opcnt = slot->sl_opcnt;
1165 resp->opcnt = entry->ce_opcnt; 1091 resp->p = resp->cstate.datap + XDR_QUADLEN(slot->sl_datalen);
1166 resp->cstate.iovlen = entry->ce_datav.iov_len + entry->ce_rpchdrlen; 1092 status = slot->sl_status;
1167 status = entry->ce_status;
1168 1093
1169 return status; 1094 return status;
1170} 1095}
@@ -1194,13 +1119,15 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
1194 int status; 1119 int status;
1195 unsigned int strhashval; 1120 unsigned int strhashval;
1196 char dname[HEXDIR_LEN]; 1121 char dname[HEXDIR_LEN];
1122 char addr_str[INET6_ADDRSTRLEN];
1197 nfs4_verifier verf = exid->verifier; 1123 nfs4_verifier verf = exid->verifier;
1198 u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; 1124 struct sockaddr *sa = svc_addr(rqstp);
1199 1125
1126 rpc_ntop(sa, addr_str, sizeof(addr_str));
1200 dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p " 1127 dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p "
1201 " ip_addr=%u flags %x, spa_how %d\n", 1128 "ip_addr=%s flags %x, spa_how %d\n",
1202 __func__, rqstp, exid, exid->clname.len, exid->clname.data, 1129 __func__, rqstp, exid, exid->clname.len, exid->clname.data,
1203 ip_addr, exid->flags, exid->spa_how); 1130 addr_str, exid->flags, exid->spa_how);
1204 1131
1205 if (!check_name(exid->clname) || (exid->flags & ~EXCHGID4_FLAG_MASK_A)) 1132 if (!check_name(exid->clname) || (exid->flags & ~EXCHGID4_FLAG_MASK_A))
1206 return nfserr_inval; 1133 return nfserr_inval;
@@ -1281,28 +1208,23 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
1281 1208
1282out_new: 1209out_new:
1283 /* Normal case */ 1210 /* Normal case */
1284 new = create_client(exid->clname, dname); 1211 new = create_client(exid->clname, dname, rqstp, &verf);
1285 if (new == NULL) { 1212 if (new == NULL) {
1286 status = nfserr_resource; 1213 status = nfserr_serverfault;
1287 goto out; 1214 goto out;
1288 } 1215 }
1289 1216
1290 copy_verf(new, &verf);
1291 copy_cred(&new->cl_cred, &rqstp->rq_cred);
1292 new->cl_addr = ip_addr;
1293 gen_clid(new); 1217 gen_clid(new);
1294 gen_confirm(new);
1295 add_to_unconfirmed(new, strhashval); 1218 add_to_unconfirmed(new, strhashval);
1296out_copy: 1219out_copy:
1297 exid->clientid.cl_boot = new->cl_clientid.cl_boot; 1220 exid->clientid.cl_boot = new->cl_clientid.cl_boot;
1298 exid->clientid.cl_id = new->cl_clientid.cl_id; 1221 exid->clientid.cl_id = new->cl_clientid.cl_id;
1299 1222
1300 new->cl_slot.sl_seqid = 0;
1301 exid->seqid = 1; 1223 exid->seqid = 1;
1302 nfsd4_set_ex_flags(new, exid); 1224 nfsd4_set_ex_flags(new, exid);
1303 1225
1304 dprintk("nfsd4_exchange_id seqid %d flags %x\n", 1226 dprintk("nfsd4_exchange_id seqid %d flags %x\n",
1305 new->cl_slot.sl_seqid, new->cl_exchange_flags); 1227 new->cl_cs_slot.sl_seqid, new->cl_exchange_flags);
1306 status = nfs_ok; 1228 status = nfs_ok;
1307 1229
1308out: 1230out:
@@ -1313,40 +1235,60 @@ error:
1313} 1235}
1314 1236
1315static int 1237static int
1316check_slot_seqid(u32 seqid, struct nfsd4_slot *slot) 1238check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse)
1317{ 1239{
1318 dprintk("%s enter. seqid %d slot->sl_seqid %d\n", __func__, seqid, 1240 dprintk("%s enter. seqid %d slot_seqid %d\n", __func__, seqid,
1319 slot->sl_seqid); 1241 slot_seqid);
1320 1242
1321 /* The slot is in use, and no response has been sent. */ 1243 /* The slot is in use, and no response has been sent. */
1322 if (slot->sl_inuse) { 1244 if (slot_inuse) {
1323 if (seqid == slot->sl_seqid) 1245 if (seqid == slot_seqid)
1324 return nfserr_jukebox; 1246 return nfserr_jukebox;
1325 else 1247 else
1326 return nfserr_seq_misordered; 1248 return nfserr_seq_misordered;
1327 } 1249 }
1328 /* Normal */ 1250 /* Normal */
1329 if (likely(seqid == slot->sl_seqid + 1)) 1251 if (likely(seqid == slot_seqid + 1))
1330 return nfs_ok; 1252 return nfs_ok;
1331 /* Replay */ 1253 /* Replay */
1332 if (seqid == slot->sl_seqid) 1254 if (seqid == slot_seqid)
1333 return nfserr_replay_cache; 1255 return nfserr_replay_cache;
1334 /* Wraparound */ 1256 /* Wraparound */
1335 if (seqid == 1 && (slot->sl_seqid + 1) == 0) 1257 if (seqid == 1 && (slot_seqid + 1) == 0)
1336 return nfs_ok; 1258 return nfs_ok;
1337 /* Misordered replay or misordered new request */ 1259 /* Misordered replay or misordered new request */
1338 return nfserr_seq_misordered; 1260 return nfserr_seq_misordered;
1339} 1261}
1340 1262
1263/*
1264 * Cache the create session result into the create session single DRC
1265 * slot cache by saving the xdr structure. sl_seqid has been set.
1266 * Do this for solo or embedded create session operations.
1267 */
1268static void
1269nfsd4_cache_create_session(struct nfsd4_create_session *cr_ses,
1270 struct nfsd4_clid_slot *slot, int nfserr)
1271{
1272 slot->sl_status = nfserr;
1273 memcpy(&slot->sl_cr_ses, cr_ses, sizeof(*cr_ses));
1274}
1275
1276static __be32
1277nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses,
1278 struct nfsd4_clid_slot *slot)
1279{
1280 memcpy(cr_ses, &slot->sl_cr_ses, sizeof(*cr_ses));
1281 return slot->sl_status;
1282}
1283
1341__be32 1284__be32
1342nfsd4_create_session(struct svc_rqst *rqstp, 1285nfsd4_create_session(struct svc_rqst *rqstp,
1343 struct nfsd4_compound_state *cstate, 1286 struct nfsd4_compound_state *cstate,
1344 struct nfsd4_create_session *cr_ses) 1287 struct nfsd4_create_session *cr_ses)
1345{ 1288{
1346 u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; 1289 struct sockaddr *sa = svc_addr(rqstp);
1347 struct nfsd4_compoundres *resp = rqstp->rq_resp;
1348 struct nfs4_client *conf, *unconf; 1290 struct nfs4_client *conf, *unconf;
1349 struct nfsd4_slot *slot = NULL; 1291 struct nfsd4_clid_slot *cs_slot = NULL;
1350 int status = 0; 1292 int status = 0;
1351 1293
1352 nfs4_lock_state(); 1294 nfs4_lock_state();
@@ -1354,40 +1296,38 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1354 conf = find_confirmed_client(&cr_ses->clientid); 1296 conf = find_confirmed_client(&cr_ses->clientid);
1355 1297
1356 if (conf) { 1298 if (conf) {
1357 slot = &conf->cl_slot; 1299 cs_slot = &conf->cl_cs_slot;
1358 status = check_slot_seqid(cr_ses->seqid, slot); 1300 status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
1359 if (status == nfserr_replay_cache) { 1301 if (status == nfserr_replay_cache) {
1360 dprintk("Got a create_session replay! seqid= %d\n", 1302 dprintk("Got a create_session replay! seqid= %d\n",
1361 slot->sl_seqid); 1303 cs_slot->sl_seqid);
1362 cstate->slot = slot;
1363 cstate->status = status;
1364 /* Return the cached reply status */ 1304 /* Return the cached reply status */
1365 status = nfsd4_replay_cache_entry(resp, NULL); 1305 status = nfsd4_replay_create_session(cr_ses, cs_slot);
1366 goto out; 1306 goto out;
1367 } else if (cr_ses->seqid != conf->cl_slot.sl_seqid + 1) { 1307 } else if (cr_ses->seqid != cs_slot->sl_seqid + 1) {
1368 status = nfserr_seq_misordered; 1308 status = nfserr_seq_misordered;
1369 dprintk("Sequence misordered!\n"); 1309 dprintk("Sequence misordered!\n");
1370 dprintk("Expected seqid= %d but got seqid= %d\n", 1310 dprintk("Expected seqid= %d but got seqid= %d\n",
1371 slot->sl_seqid, cr_ses->seqid); 1311 cs_slot->sl_seqid, cr_ses->seqid);
1372 goto out; 1312 goto out;
1373 } 1313 }
1374 conf->cl_slot.sl_seqid++; 1314 cs_slot->sl_seqid++;
1375 } else if (unconf) { 1315 } else if (unconf) {
1376 if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || 1316 if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
1377 (ip_addr != unconf->cl_addr)) { 1317 !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) {
1378 status = nfserr_clid_inuse; 1318 status = nfserr_clid_inuse;
1379 goto out; 1319 goto out;
1380 } 1320 }
1381 1321
1382 slot = &unconf->cl_slot; 1322 cs_slot = &unconf->cl_cs_slot;
1383 status = check_slot_seqid(cr_ses->seqid, slot); 1323 status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
1384 if (status) { 1324 if (status) {
1385 /* an unconfirmed replay returns misordered */ 1325 /* an unconfirmed replay returns misordered */
1386 status = nfserr_seq_misordered; 1326 status = nfserr_seq_misordered;
1387 goto out; 1327 goto out_cache;
1388 } 1328 }
1389 1329
1390 slot->sl_seqid++; /* from 0 to 1 */ 1330 cs_slot->sl_seqid++; /* from 0 to 1 */
1391 move_to_confirmed(unconf); 1331 move_to_confirmed(unconf);
1392 1332
1393 /* 1333 /*
@@ -1396,6 +1336,19 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1396 cr_ses->flags &= ~SESSION4_PERSIST; 1336 cr_ses->flags &= ~SESSION4_PERSIST;
1397 cr_ses->flags &= ~SESSION4_RDMA; 1337 cr_ses->flags &= ~SESSION4_RDMA;
1398 1338
1339 if (cr_ses->flags & SESSION4_BACK_CHAN) {
1340 unconf->cl_cb_xprt = rqstp->rq_xprt;
1341 svc_xprt_get(unconf->cl_cb_xprt);
1342 rpc_copy_addr(
1343 (struct sockaddr *)&unconf->cl_cb_conn.cb_addr,
1344 sa);
1345 unconf->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
1346 unconf->cl_cb_conn.cb_minorversion =
1347 cstate->minorversion;
1348 unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
1349 unconf->cl_cb_seq_nr = 1;
1350 nfsd4_probe_callback(unconf);
1351 }
1399 conf = unconf; 1352 conf = unconf;
1400 } else { 1353 } else {
1401 status = nfserr_stale_clientid; 1354 status = nfserr_stale_clientid;
@@ -1408,12 +1361,11 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1408 1361
1409 memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data, 1362 memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data,
1410 NFS4_MAX_SESSIONID_LEN); 1363 NFS4_MAX_SESSIONID_LEN);
1411 cr_ses->seqid = slot->sl_seqid; 1364 cr_ses->seqid = cs_slot->sl_seqid;
1412 1365
1413 slot->sl_inuse = true; 1366out_cache:
1414 cstate->slot = slot; 1367 /* cache solo and embedded create sessions under the state lock */
1415 /* Ensure a page is used for the cache */ 1368 nfsd4_cache_create_session(cr_ses, cs_slot, status);
1416 slot->sl_cache_entry.ce_cachethis = 1;
1417out: 1369out:
1418 nfs4_unlock_state(); 1370 nfs4_unlock_state();
1419 dprintk("%s returns %d\n", __func__, ntohl(status)); 1371 dprintk("%s returns %d\n", __func__, ntohl(status));
@@ -1478,18 +1430,23 @@ nfsd4_sequence(struct svc_rqst *rqstp,
1478 if (seq->slotid >= session->se_fchannel.maxreqs) 1430 if (seq->slotid >= session->se_fchannel.maxreqs)
1479 goto out; 1431 goto out;
1480 1432
1481 slot = &session->se_slots[seq->slotid]; 1433 slot = session->se_slots[seq->slotid];
1482 dprintk("%s: slotid %d\n", __func__, seq->slotid); 1434 dprintk("%s: slotid %d\n", __func__, seq->slotid);
1483 1435
1484 status = check_slot_seqid(seq->seqid, slot); 1436 /* We do not negotiate the number of slots yet, so set the
1437 * maxslots to the session maxreqs which is used to encode
1438 * sr_highest_slotid and the sr_target_slot id to maxslots */
1439 seq->maxslots = session->se_fchannel.maxreqs;
1440
1441 status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_inuse);
1485 if (status == nfserr_replay_cache) { 1442 if (status == nfserr_replay_cache) {
1486 cstate->slot = slot; 1443 cstate->slot = slot;
1487 cstate->session = session; 1444 cstate->session = session;
1488 /* Return the cached reply status and set cstate->status 1445 /* Return the cached reply status and set cstate->status
1489 * for nfsd4_svc_encode_compoundres processing */ 1446 * for nfsd4_proc_compound processing */
1490 status = nfsd4_replay_cache_entry(resp, seq); 1447 status = nfsd4_replay_cache_entry(resp, seq);
1491 cstate->status = nfserr_replay_cache; 1448 cstate->status = nfserr_replay_cache;
1492 goto replay_cache; 1449 goto out;
1493 } 1450 }
1494 if (status) 1451 if (status)
1495 goto out; 1452 goto out;
@@ -1497,23 +1454,23 @@ nfsd4_sequence(struct svc_rqst *rqstp,
1497 /* Success! bump slot seqid */ 1454 /* Success! bump slot seqid */
1498 slot->sl_inuse = true; 1455 slot->sl_inuse = true;
1499 slot->sl_seqid = seq->seqid; 1456 slot->sl_seqid = seq->seqid;
1500 slot->sl_cache_entry.ce_cachethis = seq->cachethis; 1457 slot->sl_cachethis = seq->cachethis;
1501 /* Always set the cache entry cachethis for solo sequence */
1502 if (nfsd4_is_solo_sequence(resp))
1503 slot->sl_cache_entry.ce_cachethis = 1;
1504 1458
1505 cstate->slot = slot; 1459 cstate->slot = slot;
1506 cstate->session = session; 1460 cstate->session = session;
1507 1461
1508replay_cache: 1462 /* Hold a session reference until done processing the compound:
1509 /* Renew the clientid on success and on replay.
1510 * Hold a session reference until done processing the compound:
1511 * nfsd4_put_session called only if the cstate slot is set. 1463 * nfsd4_put_session called only if the cstate slot is set.
1512 */ 1464 */
1513 renew_client(session->se_client);
1514 nfsd4_get_session(session); 1465 nfsd4_get_session(session);
1515out: 1466out:
1516 spin_unlock(&sessionid_lock); 1467 spin_unlock(&sessionid_lock);
1468 /* Renew the clientid on success and on replay */
1469 if (cstate->session) {
1470 nfs4_lock_state();
1471 renew_client(session->se_client);
1472 nfs4_unlock_state();
1473 }
1517 dprintk("%s: return %d\n", __func__, ntohl(status)); 1474 dprintk("%s: return %d\n", __func__, ntohl(status));
1518 return status; 1475 return status;
1519} 1476}
@@ -1522,7 +1479,7 @@ __be32
1522nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 1479nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1523 struct nfsd4_setclientid *setclid) 1480 struct nfsd4_setclientid *setclid)
1524{ 1481{
1525 struct sockaddr_in *sin = svc_addr_in(rqstp); 1482 struct sockaddr *sa = svc_addr(rqstp);
1526 struct xdr_netobj clname = { 1483 struct xdr_netobj clname = {
1527 .len = setclid->se_namelen, 1484 .len = setclid->se_namelen,
1528 .data = setclid->se_name, 1485 .data = setclid->se_name,
@@ -1531,7 +1488,6 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1531 unsigned int strhashval; 1488 unsigned int strhashval;
1532 struct nfs4_client *conf, *unconf, *new; 1489 struct nfs4_client *conf, *unconf, *new;
1533 __be32 status; 1490 __be32 status;
1534 char *princ;
1535 char dname[HEXDIR_LEN]; 1491 char dname[HEXDIR_LEN];
1536 1492
1537 if (!check_name(clname)) 1493 if (!check_name(clname))
@@ -1554,8 +1510,11 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1554 /* RFC 3530 14.2.33 CASE 0: */ 1510 /* RFC 3530 14.2.33 CASE 0: */
1555 status = nfserr_clid_inuse; 1511 status = nfserr_clid_inuse;
1556 if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { 1512 if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
1557 dprintk("NFSD: setclientid: string in use by client" 1513 char addr_str[INET6_ADDRSTRLEN];
1558 " at %pI4\n", &conf->cl_addr); 1514 rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str,
1515 sizeof(addr_str));
1516 dprintk("NFSD: setclientid: string in use by client "
1517 "at %s\n", addr_str);
1559 goto out; 1518 goto out;
1560 } 1519 }
1561 } 1520 }
@@ -1573,7 +1532,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1573 */ 1532 */
1574 if (unconf) 1533 if (unconf)
1575 expire_client(unconf); 1534 expire_client(unconf);
1576 new = create_client(clname, dname); 1535 new = create_client(clname, dname, rqstp, &clverifier);
1577 if (new == NULL) 1536 if (new == NULL)
1578 goto out; 1537 goto out;
1579 gen_clid(new); 1538 gen_clid(new);
@@ -1590,7 +1549,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1590 */ 1549 */
1591 expire_client(unconf); 1550 expire_client(unconf);
1592 } 1551 }
1593 new = create_client(clname, dname); 1552 new = create_client(clname, dname, rqstp, &clverifier);
1594 if (new == NULL) 1553 if (new == NULL)
1595 goto out; 1554 goto out;
1596 copy_clid(new, conf); 1555 copy_clid(new, conf);
@@ -1600,7 +1559,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1600 * probable client reboot; state will be removed if 1559 * probable client reboot; state will be removed if
1601 * confirmed. 1560 * confirmed.
1602 */ 1561 */
1603 new = create_client(clname, dname); 1562 new = create_client(clname, dname, rqstp, &clverifier);
1604 if (new == NULL) 1563 if (new == NULL)
1605 goto out; 1564 goto out;
1606 gen_clid(new); 1565 gen_clid(new);
@@ -1611,25 +1570,12 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1611 * confirmed. 1570 * confirmed.
1612 */ 1571 */
1613 expire_client(unconf); 1572 expire_client(unconf);
1614 new = create_client(clname, dname); 1573 new = create_client(clname, dname, rqstp, &clverifier);
1615 if (new == NULL) 1574 if (new == NULL)
1616 goto out; 1575 goto out;
1617 gen_clid(new); 1576 gen_clid(new);
1618 } 1577 }
1619 copy_verf(new, &clverifier); 1578 gen_callback(new, setclid, rpc_get_scope_id(sa));
1620 new->cl_addr = sin->sin_addr.s_addr;
1621 new->cl_flavor = rqstp->rq_flavor;
1622 princ = svc_gss_principal(rqstp);
1623 if (princ) {
1624 new->cl_principal = kstrdup(princ, GFP_KERNEL);
1625 if (new->cl_principal == NULL) {
1626 free_client(new);
1627 goto out;
1628 }
1629 }
1630 copy_cred(&new->cl_cred, &rqstp->rq_cred);
1631 gen_confirm(new);
1632 gen_callback(new, setclid);
1633 add_to_unconfirmed(new, strhashval); 1579 add_to_unconfirmed(new, strhashval);
1634 setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; 1580 setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
1635 setclid->se_clientid.cl_id = new->cl_clientid.cl_id; 1581 setclid->se_clientid.cl_id = new->cl_clientid.cl_id;
@@ -1651,7 +1597,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
1651 struct nfsd4_compound_state *cstate, 1597 struct nfsd4_compound_state *cstate,
1652 struct nfsd4_setclientid_confirm *setclientid_confirm) 1598 struct nfsd4_setclientid_confirm *setclientid_confirm)
1653{ 1599{
1654 struct sockaddr_in *sin = svc_addr_in(rqstp); 1600 struct sockaddr *sa = svc_addr(rqstp);
1655 struct nfs4_client *conf, *unconf; 1601 struct nfs4_client *conf, *unconf;
1656 nfs4_verifier confirm = setclientid_confirm->sc_confirm; 1602 nfs4_verifier confirm = setclientid_confirm->sc_confirm;
1657 clientid_t * clid = &setclientid_confirm->sc_clientid; 1603 clientid_t * clid = &setclientid_confirm->sc_clientid;
@@ -1670,9 +1616,9 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
1670 unconf = find_unconfirmed_client(clid); 1616 unconf = find_unconfirmed_client(clid);
1671 1617
1672 status = nfserr_clid_inuse; 1618 status = nfserr_clid_inuse;
1673 if (conf && conf->cl_addr != sin->sin_addr.s_addr) 1619 if (conf && !rpc_cmp_addr((struct sockaddr *) &conf->cl_addr, sa))
1674 goto out; 1620 goto out;
1675 if (unconf && unconf->cl_addr != sin->sin_addr.s_addr) 1621 if (unconf && !rpc_cmp_addr((struct sockaddr *) &unconf->cl_addr, sa))
1676 goto out; 1622 goto out;
1677 1623
1678 /* 1624 /*
@@ -2163,7 +2109,7 @@ int nfsd_change_deleg_cb(struct file_lock **onlist, int arg)
2163 return -EAGAIN; 2109 return -EAGAIN;
2164} 2110}
2165 2111
2166static struct lock_manager_operations nfsd_lease_mng_ops = { 2112static const struct lock_manager_operations nfsd_lease_mng_ops = {
2167 .fl_break = nfsd_break_deleg_cb, 2113 .fl_break = nfsd_break_deleg_cb,
2168 .fl_release_private = nfsd_release_deleg_cb, 2114 .fl_release_private = nfsd_release_deleg_cb,
2169 .fl_copy_lock = nfsd_copy_lock_deleg_cb, 2115 .fl_copy_lock = nfsd_copy_lock_deleg_cb,
@@ -3368,7 +3314,7 @@ nfs4_transform_lock_offset(struct file_lock *lock)
3368 3314
3369/* Hack!: For now, we're defining this just so we can use a pointer to it 3315/* Hack!: For now, we're defining this just so we can use a pointer to it
3370 * as a unique cookie to identify our (NFSv4's) posix locks. */ 3316 * as a unique cookie to identify our (NFSv4's) posix locks. */
3371static struct lock_manager_operations nfsd_posix_mng_ops = { 3317static const struct lock_manager_operations nfsd_posix_mng_ops = {
3372}; 3318};
3373 3319
3374static inline void 3320static inline void
@@ -4072,7 +4018,7 @@ set_max_delegations(void)
4072 4018
4073/* initialization to perform when the nfsd service is started: */ 4019/* initialization to perform when the nfsd service is started: */
4074 4020
4075static void 4021static int
4076__nfs4_state_start(void) 4022__nfs4_state_start(void)
4077{ 4023{
4078 unsigned long grace_time; 4024 unsigned long grace_time;
@@ -4084,19 +4030,26 @@ __nfs4_state_start(void)
4084 printk(KERN_INFO "NFSD: starting %ld-second grace period\n", 4030 printk(KERN_INFO "NFSD: starting %ld-second grace period\n",
4085 grace_time/HZ); 4031 grace_time/HZ);
4086 laundry_wq = create_singlethread_workqueue("nfsd4"); 4032 laundry_wq = create_singlethread_workqueue("nfsd4");
4033 if (laundry_wq == NULL)
4034 return -ENOMEM;
4087 queue_delayed_work(laundry_wq, &laundromat_work, grace_time); 4035 queue_delayed_work(laundry_wq, &laundromat_work, grace_time);
4088 set_max_delegations(); 4036 set_max_delegations();
4037 return set_callback_cred();
4089} 4038}
4090 4039
4091void 4040int
4092nfs4_state_start(void) 4041nfs4_state_start(void)
4093{ 4042{
4043 int ret;
4044
4094 if (nfs4_init) 4045 if (nfs4_init)
4095 return; 4046 return 0;
4096 nfsd4_load_reboot_recovery_data(); 4047 nfsd4_load_reboot_recovery_data();
4097 __nfs4_state_start(); 4048 ret = __nfs4_state_start();
4049 if (ret)
4050 return ret;
4098 nfs4_init = 1; 4051 nfs4_init = 1;
4099 return; 4052 return 0;
4100} 4053}
4101 4054
4102time_t 4055time_t
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2dcc7feaa6f..0fbd50cee1f 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1599,7 +1599,8 @@ static __be32 nfsd4_encode_fs_location4(struct nfsd4_fs_location *location,
1599static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *stat) 1599static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *stat)
1600{ 1600{
1601 struct svc_fh tmp_fh; 1601 struct svc_fh tmp_fh;
1602 char *path, *rootpath; 1602 char *path = NULL, *rootpath;
1603 size_t rootlen;
1603 1604
1604 fh_init(&tmp_fh, NFS4_FHSIZE); 1605 fh_init(&tmp_fh, NFS4_FHSIZE);
1605 *stat = exp_pseudoroot(rqstp, &tmp_fh); 1606 *stat = exp_pseudoroot(rqstp, &tmp_fh);
@@ -1609,14 +1610,18 @@ static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *
1609 1610
1610 path = exp->ex_pathname; 1611 path = exp->ex_pathname;
1611 1612
1612 if (strncmp(path, rootpath, strlen(rootpath))) { 1613 rootlen = strlen(rootpath);
1614 if (strncmp(path, rootpath, rootlen)) {
1613 dprintk("nfsd: fs_locations failed;" 1615 dprintk("nfsd: fs_locations failed;"
1614 "%s is not contained in %s\n", path, rootpath); 1616 "%s is not contained in %s\n", path, rootpath);
1615 *stat = nfserr_notsupp; 1617 *stat = nfserr_notsupp;
1616 return NULL; 1618 path = NULL;
1619 goto out;
1617 } 1620 }
1618 1621 path += rootlen;
1619 return path + strlen(rootpath); 1622out:
1623 fh_put(&tmp_fh);
1624 return path;
1620} 1625}
1621 1626
1622/* 1627/*
@@ -1793,11 +1798,6 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1793 goto out_nfserr; 1798 goto out_nfserr;
1794 } 1799 }
1795 } 1800 }
1796 if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) {
1797 if (exp->ex_fslocs.locations == NULL) {
1798 bmval0 &= ~FATTR4_WORD0_FS_LOCATIONS;
1799 }
1800 }
1801 if ((buflen -= 16) < 0) 1801 if ((buflen -= 16) < 0)
1802 goto out_resource; 1802 goto out_resource;
1803 1803
@@ -1825,8 +1825,6 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1825 goto out_resource; 1825 goto out_resource;
1826 if (!aclsupport) 1826 if (!aclsupport)
1827 word0 &= ~FATTR4_WORD0_ACL; 1827 word0 &= ~FATTR4_WORD0_ACL;
1828 if (!exp->ex_fslocs.locations)
1829 word0 &= ~FATTR4_WORD0_FS_LOCATIONS;
1830 if (!word2) { 1828 if (!word2) {
1831 WRITE32(2); 1829 WRITE32(2);
1832 WRITE32(word0); 1830 WRITE32(word0);
@@ -3064,6 +3062,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr,
3064 WRITE32(0); 3062 WRITE32(0);
3065 3063
3066 ADJUST_ARGS(); 3064 ADJUST_ARGS();
3065 resp->cstate.datap = p; /* DRC cache data pointer */
3067 return 0; 3066 return 0;
3068} 3067}
3069 3068
@@ -3166,7 +3165,7 @@ static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp)
3166 return status; 3165 return status;
3167 3166
3168 session = resp->cstate.session; 3167 session = resp->cstate.session;
3169 if (session == NULL || slot->sl_cache_entry.ce_cachethis == 0) 3168 if (session == NULL || slot->sl_cachethis == 0)
3170 return status; 3169 return status;
3171 3170
3172 if (resp->opcnt >= args->opcnt) 3171 if (resp->opcnt >= args->opcnt)
@@ -3291,6 +3290,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
3291 /* 3290 /*
3292 * All that remains is to write the tag and operation count... 3291 * All that remains is to write the tag and operation count...
3293 */ 3292 */
3293 struct nfsd4_compound_state *cs = &resp->cstate;
3294 struct kvec *iov; 3294 struct kvec *iov;
3295 p = resp->tagp; 3295 p = resp->tagp;
3296 *p++ = htonl(resp->taglen); 3296 *p++ = htonl(resp->taglen);
@@ -3304,17 +3304,11 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
3304 iov = &rqstp->rq_res.head[0]; 3304 iov = &rqstp->rq_res.head[0];
3305 iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; 3305 iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base;
3306 BUG_ON(iov->iov_len > PAGE_SIZE); 3306 BUG_ON(iov->iov_len > PAGE_SIZE);
3307 if (nfsd4_has_session(&resp->cstate)) { 3307 if (nfsd4_has_session(cs) && cs->status != nfserr_replay_cache) {
3308 if (resp->cstate.status == nfserr_replay_cache && 3308 nfsd4_store_cache_entry(resp);
3309 !nfsd4_not_cached(resp)) { 3309 dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
3310 iov->iov_len = resp->cstate.iovlen; 3310 resp->cstate.slot->sl_inuse = false;
3311 } else { 3311 nfsd4_put_session(resp->cstate.session);
3312 nfsd4_store_cache_entry(resp);
3313 dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
3314 resp->cstate.slot->sl_inuse = 0;
3315 }
3316 if (resp->cstate.session)
3317 nfsd4_put_session(resp->cstate.session);
3318 } 3312 }
3319 return 1; 3313 return 1;
3320} 3314}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 7e906c5b767..00388d2a3c9 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -174,12 +174,13 @@ static const struct file_operations exports_operations = {
174}; 174};
175 175
176extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); 176extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
177extern int nfsd_pool_stats_release(struct inode *inode, struct file *file);
177 178
178static struct file_operations pool_stats_operations = { 179static struct file_operations pool_stats_operations = {
179 .open = nfsd_pool_stats_open, 180 .open = nfsd_pool_stats_open,
180 .read = seq_read, 181 .read = seq_read,
181 .llseek = seq_lseek, 182 .llseek = seq_lseek,
182 .release = seq_release, 183 .release = nfsd_pool_stats_release,
183 .owner = THIS_MODULE, 184 .owner = THIS_MODULE,
184}; 185};
185 186
@@ -776,10 +777,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
776 size -= len; 777 size -= len;
777 mesg += len; 778 mesg += len;
778 } 779 }
779 780 rv = mesg - buf;
780 mutex_unlock(&nfsd_mutex);
781 return (mesg-buf);
782
783out_free: 781out_free:
784 kfree(nthreads); 782 kfree(nthreads);
785 mutex_unlock(&nfsd_mutex); 783 mutex_unlock(&nfsd_mutex);
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 8847f3fbfc1..01965b2f3a7 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -397,44 +397,51 @@ static inline void _fh_update_old(struct dentry *dentry,
397 fh->ofh_dirino = 0; 397 fh->ofh_dirino = 0;
398} 398}
399 399
400__be32 400static bool is_root_export(struct svc_export *exp)
401fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
402 struct svc_fh *ref_fh)
403{ 401{
404 /* ref_fh is a reference file handle. 402 return exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root;
405 * if it is non-null and for the same filesystem, then we should compose 403}
406 * a filehandle which is of the same version, where possible.
407 * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca
408 * Then create a 32byte filehandle using nfs_fhbase_old
409 *
410 */
411 404
412 u8 version; 405static struct super_block *exp_sb(struct svc_export *exp)
413 u8 fsid_type = 0; 406{
414 struct inode * inode = dentry->d_inode; 407 return exp->ex_path.dentry->d_inode->i_sb;
415 struct dentry *parent = dentry->d_parent; 408}
416 __u32 *datap;
417 dev_t ex_dev = exp->ex_path.dentry->d_inode->i_sb->s_dev;
418 int root_export = (exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root);
419 409
420 dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n", 410static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp)
421 MAJOR(ex_dev), MINOR(ex_dev), 411{
422 (long) exp->ex_path.dentry->d_inode->i_ino, 412 switch (fsid_type) {
423 parent->d_name.name, dentry->d_name.name, 413 case FSID_DEV:
424 (inode ? inode->i_ino : 0)); 414 if (!old_valid_dev(exp_sb(exp)->s_dev))
415 return 0;
416 /* FALL THROUGH */
417 case FSID_MAJOR_MINOR:
418 case FSID_ENCODE_DEV:
419 return exp_sb(exp)->s_type->fs_flags & FS_REQUIRES_DEV;
420 case FSID_NUM:
421 return exp->ex_flags & NFSEXP_FSID;
422 case FSID_UUID8:
423 case FSID_UUID16:
424 if (!is_root_export(exp))
425 return 0;
426 /* fall through */
427 case FSID_UUID4_INUM:
428 case FSID_UUID16_INUM:
429 return exp->ex_uuid != NULL;
430 }
431 return 1;
432}
425 433
426 /* Choose filehandle version and fsid type based on 434
427 * the reference filehandle (if it is in the same export) 435static void set_version_and_fsid_type(struct svc_fh *fhp, struct svc_export *exp, struct svc_fh *ref_fh)
428 * or the export options. 436{
429 */ 437 u8 version;
430 retry: 438 u8 fsid_type;
439retry:
431 version = 1; 440 version = 1;
432 if (ref_fh && ref_fh->fh_export == exp) { 441 if (ref_fh && ref_fh->fh_export == exp) {
433 version = ref_fh->fh_handle.fh_version; 442 version = ref_fh->fh_handle.fh_version;
434 fsid_type = ref_fh->fh_handle.fh_fsid_type; 443 fsid_type = ref_fh->fh_handle.fh_fsid_type;
435 444
436 if (ref_fh == fhp)
437 fh_put(ref_fh);
438 ref_fh = NULL; 445 ref_fh = NULL;
439 446
440 switch (version) { 447 switch (version) {
@@ -447,58 +454,66 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
447 goto retry; 454 goto retry;
448 } 455 }
449 456
450 /* Need to check that this type works for this 457 /*
451 * export point. As the fsid -> filesystem mapping 458 * As the fsid -> filesystem mapping was guided by
452 * was guided by user-space, there is no guarantee 459 * user-space, there is no guarantee that the filesystem
453 * that the filesystem actually supports that fsid 460 * actually supports that fsid type. If it doesn't we
454 * type. If it doesn't we loop around again without 461 * loop around again without ref_fh set.
455 * ref_fh set.
456 */ 462 */
457 switch(fsid_type) { 463 if (!fsid_type_ok_for_exp(fsid_type, exp))
458 case FSID_DEV: 464 goto retry;
459 if (!old_valid_dev(ex_dev))
460 goto retry;
461 /* FALL THROUGH */
462 case FSID_MAJOR_MINOR:
463 case FSID_ENCODE_DEV:
464 if (!(exp->ex_path.dentry->d_inode->i_sb->s_type->fs_flags
465 & FS_REQUIRES_DEV))
466 goto retry;
467 break;
468 case FSID_NUM:
469 if (! (exp->ex_flags & NFSEXP_FSID))
470 goto retry;
471 break;
472 case FSID_UUID8:
473 case FSID_UUID16:
474 if (!root_export)
475 goto retry;
476 /* fall through */
477 case FSID_UUID4_INUM:
478 case FSID_UUID16_INUM:
479 if (exp->ex_uuid == NULL)
480 goto retry;
481 break;
482 }
483 } else if (exp->ex_flags & NFSEXP_FSID) { 465 } else if (exp->ex_flags & NFSEXP_FSID) {
484 fsid_type = FSID_NUM; 466 fsid_type = FSID_NUM;
485 } else if (exp->ex_uuid) { 467 } else if (exp->ex_uuid) {
486 if (fhp->fh_maxsize >= 64) { 468 if (fhp->fh_maxsize >= 64) {
487 if (root_export) 469 if (is_root_export(exp))
488 fsid_type = FSID_UUID16; 470 fsid_type = FSID_UUID16;
489 else 471 else
490 fsid_type = FSID_UUID16_INUM; 472 fsid_type = FSID_UUID16_INUM;
491 } else { 473 } else {
492 if (root_export) 474 if (is_root_export(exp))
493 fsid_type = FSID_UUID8; 475 fsid_type = FSID_UUID8;
494 else 476 else
495 fsid_type = FSID_UUID4_INUM; 477 fsid_type = FSID_UUID4_INUM;
496 } 478 }
497 } else if (!old_valid_dev(ex_dev)) 479 } else if (!old_valid_dev(exp_sb(exp)->s_dev))
498 /* for newer device numbers, we must use a newer fsid format */ 480 /* for newer device numbers, we must use a newer fsid format */
499 fsid_type = FSID_ENCODE_DEV; 481 fsid_type = FSID_ENCODE_DEV;
500 else 482 else
501 fsid_type = FSID_DEV; 483 fsid_type = FSID_DEV;
484 fhp->fh_handle.fh_version = version;
485 if (version)
486 fhp->fh_handle.fh_fsid_type = fsid_type;
487}
488
489__be32
490fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
491 struct svc_fh *ref_fh)
492{
493 /* ref_fh is a reference file handle.
494 * if it is non-null and for the same filesystem, then we should compose
495 * a filehandle which is of the same version, where possible.
496 * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca
497 * Then create a 32byte filehandle using nfs_fhbase_old
498 *
499 */
500
501 struct inode * inode = dentry->d_inode;
502 struct dentry *parent = dentry->d_parent;
503 __u32 *datap;
504 dev_t ex_dev = exp_sb(exp)->s_dev;
505
506 dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n",
507 MAJOR(ex_dev), MINOR(ex_dev),
508 (long) exp->ex_path.dentry->d_inode->i_ino,
509 parent->d_name.name, dentry->d_name.name,
510 (inode ? inode->i_ino : 0));
511
512 /* Choose filehandle version and fsid type based on
513 * the reference filehandle (if it is in the same export)
514 * or the export options.
515 */
516 set_version_and_fsid_type(fhp, exp, ref_fh);
502 517
503 if (ref_fh == fhp) 518 if (ref_fh == fhp)
504 fh_put(ref_fh); 519 fh_put(ref_fh);
@@ -516,7 +531,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
516 fhp->fh_export = exp; 531 fhp->fh_export = exp;
517 cache_get(&exp->h); 532 cache_get(&exp->h);
518 533
519 if (version == 0xca) { 534 if (fhp->fh_handle.fh_version == 0xca) {
520 /* old style filehandle please */ 535 /* old style filehandle please */
521 memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE); 536 memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE);
522 fhp->fh_handle.fh_size = NFS_FHSIZE; 537 fhp->fh_handle.fh_size = NFS_FHSIZE;
@@ -530,22 +545,22 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
530 _fh_update_old(dentry, exp, &fhp->fh_handle); 545 _fh_update_old(dentry, exp, &fhp->fh_handle);
531 } else { 546 } else {
532 int len; 547 int len;
533 fhp->fh_handle.fh_version = 1;
534 fhp->fh_handle.fh_auth_type = 0; 548 fhp->fh_handle.fh_auth_type = 0;
535 datap = fhp->fh_handle.fh_auth+0; 549 datap = fhp->fh_handle.fh_auth+0;
536 fhp->fh_handle.fh_fsid_type = fsid_type; 550 mk_fsid(fhp->fh_handle.fh_fsid_type, datap, ex_dev,
537 mk_fsid(fsid_type, datap, ex_dev,
538 exp->ex_path.dentry->d_inode->i_ino, 551 exp->ex_path.dentry->d_inode->i_ino,
539 exp->ex_fsid, exp->ex_uuid); 552 exp->ex_fsid, exp->ex_uuid);
540 553
541 len = key_len(fsid_type); 554 len = key_len(fhp->fh_handle.fh_fsid_type);
542 datap += len/4; 555 datap += len/4;
543 fhp->fh_handle.fh_size = 4 + len; 556 fhp->fh_handle.fh_size = 4 + len;
544 557
545 if (inode) 558 if (inode)
546 _fh_update(fhp, exp, dentry); 559 _fh_update(fhp, exp, dentry);
547 if (fhp->fh_handle.fh_fileid_type == 255) 560 if (fhp->fh_handle.fh_fileid_type == 255) {
561 fh_put(fhp);
548 return nfserr_opnotsupp; 562 return nfserr_opnotsupp;
563 }
549 } 564 }
550 565
551 return 0; 566 return 0;
@@ -639,8 +654,7 @@ enum fsid_source fsid_source(struct svc_fh *fhp)
639 case FSID_DEV: 654 case FSID_DEV:
640 case FSID_ENCODE_DEV: 655 case FSID_ENCODE_DEV:
641 case FSID_MAJOR_MINOR: 656 case FSID_MAJOR_MINOR:
642 if (fhp->fh_export->ex_path.dentry->d_inode->i_sb->s_type->fs_flags 657 if (exp_sb(fhp->fh_export)->s_type->fs_flags & FS_REQUIRES_DEV)
643 & FS_REQUIRES_DEV)
644 return FSIDSOURCE_DEV; 658 return FSIDSOURCE_DEV;
645 break; 659 break;
646 case FSID_NUM: 660 case FSID_NUM:
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 24d58adfe5f..67ea83eedd4 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -34,6 +34,7 @@
34#include <linux/nfsd/syscall.h> 34#include <linux/nfsd/syscall.h>
35#include <linux/lockd/bind.h> 35#include <linux/lockd/bind.h>
36#include <linux/nfsacl.h> 36#include <linux/nfsacl.h>
37#include <linux/seq_file.h>
37 38
38#define NFSDDBG_FACILITY NFSDDBG_SVC 39#define NFSDDBG_FACILITY NFSDDBG_SVC
39 40
@@ -66,6 +67,16 @@ struct timeval nfssvc_boot;
66DEFINE_MUTEX(nfsd_mutex); 67DEFINE_MUTEX(nfsd_mutex);
67struct svc_serv *nfsd_serv; 68struct svc_serv *nfsd_serv;
68 69
70/*
71 * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used.
72 * nfsd_drc_max_pages limits the total amount of memory available for
73 * version 4.1 DRC caches.
74 * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage.
75 */
76spinlock_t nfsd_drc_lock;
77unsigned int nfsd_drc_max_mem;
78unsigned int nfsd_drc_mem_used;
79
69#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) 80#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
70static struct svc_stat nfsd_acl_svcstats; 81static struct svc_stat nfsd_acl_svcstats;
71static struct svc_version * nfsd_acl_version[] = { 82static struct svc_version * nfsd_acl_version[] = {
@@ -235,13 +246,12 @@ void nfsd_reset_versions(void)
235 */ 246 */
236static void set_max_drc(void) 247static void set_max_drc(void)
237{ 248{
238 /* The percent of nr_free_buffer_pages used by the V4.1 server DRC */ 249 #define NFSD_DRC_SIZE_SHIFT 10
239 #define NFSD_DRC_SIZE_SHIFT 7 250 nfsd_drc_max_mem = (nr_free_buffer_pages()
240 nfsd_serv->sv_drc_max_pages = nr_free_buffer_pages() 251 >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
241 >> NFSD_DRC_SIZE_SHIFT; 252 nfsd_drc_mem_used = 0;
242 nfsd_serv->sv_drc_pages_used = 0; 253 spin_lock_init(&nfsd_drc_lock);
243 dprintk("%s svc_drc_max_pages %u\n", __func__, 254 dprintk("%s nfsd_drc_max_mem %u \n", __func__, nfsd_drc_max_mem);
244 nfsd_serv->sv_drc_max_pages);
245} 255}
246 256
247int nfsd_create_serv(void) 257int nfsd_create_serv(void)
@@ -401,7 +411,9 @@ nfsd_svc(unsigned short port, int nrservs)
401 error = nfsd_racache_init(2*nrservs); 411 error = nfsd_racache_init(2*nrservs);
402 if (error<0) 412 if (error<0)
403 goto out; 413 goto out;
404 nfs4_state_start(); 414 error = nfs4_state_start();
415 if (error)
416 goto out;
405 417
406 nfsd_reset_versions(); 418 nfsd_reset_versions();
407 419
@@ -569,10 +581,6 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
569 + rqstp->rq_res.head[0].iov_len; 581 + rqstp->rq_res.head[0].iov_len;
570 rqstp->rq_res.head[0].iov_len += sizeof(__be32); 582 rqstp->rq_res.head[0].iov_len += sizeof(__be32);
571 583
572 /* NFSv4.1 DRC requires statp */
573 if (rqstp->rq_vers == 4)
574 nfsd4_set_statp(rqstp, statp);
575
576 /* Now call the procedure handler, and encode NFS status. */ 584 /* Now call the procedure handler, and encode NFS status. */
577 nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); 585 nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
578 nfserr = map_new_errors(rqstp->rq_vers, nfserr); 586 nfserr = map_new_errors(rqstp->rq_vers, nfserr);
@@ -607,7 +615,25 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
607 615
608int nfsd_pool_stats_open(struct inode *inode, struct file *file) 616int nfsd_pool_stats_open(struct inode *inode, struct file *file)
609{ 617{
610 if (nfsd_serv == NULL) 618 int ret;
619 mutex_lock(&nfsd_mutex);
620 if (nfsd_serv == NULL) {
621 mutex_unlock(&nfsd_mutex);
611 return -ENODEV; 622 return -ENODEV;
612 return svc_pool_stats_open(nfsd_serv, file); 623 }
624 /* bump up the psudo refcount while traversing */
625 svc_get(nfsd_serv);
626 ret = svc_pool_stats_open(nfsd_serv, file);
627 mutex_unlock(&nfsd_mutex);
628 return ret;
629}
630
631int nfsd_pool_stats_release(struct inode *inode, struct file *file)
632{
633 int ret = seq_release(inode, file);
634 mutex_lock(&nfsd_mutex);
635 /* this function really, really should have been called svc_put() */
636 svc_destroy(nfsd_serv);
637 mutex_unlock(&nfsd_mutex);
638 return ret;
613} 639}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 8fa09bfbcba..a293f027326 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -89,6 +89,12 @@ struct raparm_hbucket {
89#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1) 89#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
90static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE]; 90static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
91 91
92static inline int
93nfsd_v4client(struct svc_rqst *rq)
94{
95 return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4;
96}
97
92/* 98/*
93 * Called from nfsd_lookup and encode_dirent. Check if we have crossed 99 * Called from nfsd_lookup and encode_dirent. Check if we have crossed
94 * a mount point. 100 * a mount point.
@@ -115,7 +121,8 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
115 path_put(&path); 121 path_put(&path);
116 goto out; 122 goto out;
117 } 123 }
118 if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { 124 if (nfsd_v4client(rqstp) ||
125 (exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
119 /* successfully crossed mount point */ 126 /* successfully crossed mount point */
120 /* 127 /*
121 * This is subtle: path.dentry is *not* on path.mnt 128 * This is subtle: path.dentry is *not* on path.mnt
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index c668bca579c..6a2711f4c32 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -46,7 +46,7 @@ void nilfs_btnode_cache_init_once(struct address_space *btnc)
46 INIT_LIST_HEAD(&btnc->i_mmap_nonlinear); 46 INIT_LIST_HEAD(&btnc->i_mmap_nonlinear);
47} 47}
48 48
49static struct address_space_operations def_btnode_aops = { 49static const struct address_space_operations def_btnode_aops = {
50 .sync_page = block_sync_page, 50 .sync_page = block_sync_page,
51}; 51};
52 52
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 6bd84a0d823..fc8278c77cd 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -151,7 +151,7 @@ struct file_operations nilfs_file_operations = {
151 .splice_read = generic_file_splice_read, 151 .splice_read = generic_file_splice_read,
152}; 152};
153 153
154struct inode_operations nilfs_file_inode_operations = { 154const struct inode_operations nilfs_file_inode_operations = {
155 .truncate = nilfs_truncate, 155 .truncate = nilfs_truncate,
156 .setattr = nilfs_setattr, 156 .setattr = nilfs_setattr,
157 .permission = nilfs_permission, 157 .permission = nilfs_permission,
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 1b3c2bb20da..e6de0a27ab5 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -52,7 +52,7 @@
52#include "dat.h" 52#include "dat.h"
53#include "ifile.h" 53#include "ifile.h"
54 54
55static struct address_space_operations def_gcinode_aops = { 55static const struct address_space_operations def_gcinode_aops = {
56 .sync_page = block_sync_page, 56 .sync_page = block_sync_page,
57}; 57};
58 58
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 807e584b163..2d2c501deb5 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -238,7 +238,7 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
238 return size; 238 return size;
239} 239}
240 240
241struct address_space_operations nilfs_aops = { 241const struct address_space_operations nilfs_aops = {
242 .writepage = nilfs_writepage, 242 .writepage = nilfs_writepage,
243 .readpage = nilfs_readpage, 243 .readpage = nilfs_readpage,
244 .sync_page = block_sync_page, 244 .sync_page = block_sync_page,
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 156bf6091a9..b18c4998f8d 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -427,12 +427,12 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
427} 427}
428 428
429 429
430static struct address_space_operations def_mdt_aops = { 430static const struct address_space_operations def_mdt_aops = {
431 .writepage = nilfs_mdt_write_page, 431 .writepage = nilfs_mdt_write_page,
432 .sync_page = block_sync_page, 432 .sync_page = block_sync_page,
433}; 433};
434 434
435static struct inode_operations def_mdt_iops; 435static const struct inode_operations def_mdt_iops;
436static struct file_operations def_mdt_fops; 436static struct file_operations def_mdt_fops;
437 437
438/* 438/*
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index df70dadb336..ed02e886fa7 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -448,7 +448,7 @@ out:
448 return err; 448 return err;
449} 449}
450 450
451struct inode_operations nilfs_dir_inode_operations = { 451const struct inode_operations nilfs_dir_inode_operations = {
452 .create = nilfs_create, 452 .create = nilfs_create,
453 .lookup = nilfs_lookup, 453 .lookup = nilfs_lookup,
454 .link = nilfs_link, 454 .link = nilfs_link,
@@ -462,12 +462,12 @@ struct inode_operations nilfs_dir_inode_operations = {
462 .permission = nilfs_permission, 462 .permission = nilfs_permission,
463}; 463};
464 464
465struct inode_operations nilfs_special_inode_operations = { 465const struct inode_operations nilfs_special_inode_operations = {
466 .setattr = nilfs_setattr, 466 .setattr = nilfs_setattr,
467 .permission = nilfs_permission, 467 .permission = nilfs_permission,
468}; 468};
469 469
470struct inode_operations nilfs_symlink_inode_operations = { 470const struct inode_operations nilfs_symlink_inode_operations = {
471 .readlink = generic_readlink, 471 .readlink = generic_readlink,
472 .follow_link = page_follow_link_light, 472 .follow_link = page_follow_link_light,
473 .put_link = page_put_link, 473 .put_link = page_put_link,
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 724c63766e8..bad7368782d 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -295,12 +295,12 @@ void nilfs_clear_gcdat_inode(struct the_nilfs *);
295 * Inodes and files operations 295 * Inodes and files operations
296 */ 296 */
297extern struct file_operations nilfs_dir_operations; 297extern struct file_operations nilfs_dir_operations;
298extern struct inode_operations nilfs_file_inode_operations; 298extern const struct inode_operations nilfs_file_inode_operations;
299extern struct file_operations nilfs_file_operations; 299extern struct file_operations nilfs_file_operations;
300extern struct address_space_operations nilfs_aops; 300extern const struct address_space_operations nilfs_aops;
301extern struct inode_operations nilfs_dir_inode_operations; 301extern const struct inode_operations nilfs_dir_inode_operations;
302extern struct inode_operations nilfs_special_inode_operations; 302extern const struct inode_operations nilfs_special_inode_operations;
303extern struct inode_operations nilfs_symlink_inode_operations; 303extern const struct inode_operations nilfs_symlink_inode_operations;
304 304
305/* 305/*
306 * filesystem type 306 * filesystem type
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 55f3d6b6073..644e66727dd 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -504,7 +504,7 @@ static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
504 return 0; 504 return 0;
505} 505}
506 506
507static struct super_operations nilfs_sops = { 507static const struct super_operations nilfs_sops = {
508 .alloc_inode = nilfs_alloc_inode, 508 .alloc_inode = nilfs_alloc_inode,
509 .destroy_inode = nilfs_destroy_inode, 509 .destroy_inode = nilfs_destroy_inode,
510 .dirty_inode = nilfs_dirty_inode, 510 .dirty_inode = nilfs_dirty_inode,
@@ -560,7 +560,7 @@ nilfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len,
560 nilfs_nfs_get_inode); 560 nilfs_nfs_get_inode);
561} 561}
562 562
563static struct export_operations nilfs_export_ops = { 563static const struct export_operations nilfs_export_ops = {
564 .fh_to_dentry = nilfs_fh_to_dentry, 564 .fh_to_dentry = nilfs_fh_to_dentry,
565 .fh_to_parent = nilfs_fh_to_parent, 565 .fh_to_parent = nilfs_fh_to_parent,
566 .get_parent = nilfs_get_parent, 566 .get_parent = nilfs_get_parent,
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index 50931b1ce4b..8b2549f672b 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -829,7 +829,7 @@ enum {
829 /* Note, FILE_ATTR_VALID_SET_FLAGS masks out the old DOS VolId, the 829 /* Note, FILE_ATTR_VALID_SET_FLAGS masks out the old DOS VolId, the
830 F_A_DEVICE, F_A_DIRECTORY, F_A_SPARSE_FILE, F_A_REPARSE_POINT, 830 F_A_DEVICE, F_A_DIRECTORY, F_A_SPARSE_FILE, F_A_REPARSE_POINT,
831 F_A_COMPRESSED, and F_A_ENCRYPTED and preserves the rest. This mask 831 F_A_COMPRESSED, and F_A_ENCRYPTED and preserves the rest. This mask
832 is used to to obtain all flags that are valid for setting. */ 832 is used to obtain all flags that are valid for setting. */
833 /* 833 /*
834 * The flag FILE_ATTR_DUP_FILENAME_INDEX_PRESENT is present in all 834 * The flag FILE_ATTR_DUP_FILENAME_INDEX_PRESENT is present in all
835 * FILENAME_ATTR attributes but not in the STANDARD_INFORMATION 835 * FILENAME_ATTR attributes but not in the STANDARD_INFORMATION
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h
index cd0be3f5c3c..a44b14cbcee 100644
--- a/fs/ntfs/malloc.h
+++ b/fs/ntfs/malloc.h
@@ -47,7 +47,7 @@ static inline void *__ntfs_malloc(unsigned long size, gfp_t gfp_mask)
47 return kmalloc(PAGE_SIZE, gfp_mask & ~__GFP_HIGHMEM); 47 return kmalloc(PAGE_SIZE, gfp_mask & ~__GFP_HIGHMEM);
48 /* return (void *)__get_free_page(gfp_mask); */ 48 /* return (void *)__get_free_page(gfp_mask); */
49 } 49 }
50 if (likely(size >> PAGE_SHIFT < num_physpages)) 50 if (likely((size >> PAGE_SHIFT) < totalram_pages))
51 return __vmalloc(size, gfp_mask, PAGE_KERNEL); 51 return __vmalloc(size, gfp_mask, PAGE_KERNEL);
52 return NULL; 52 return NULL;
53} 53}
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index 3fb96fcd4c8..e5df9d170b0 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -109,7 +109,7 @@ void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex);
109int ocfs2_read_quota_block(struct inode *inode, u64 v_block, 109int ocfs2_read_quota_block(struct inode *inode, u64 v_block,
110 struct buffer_head **bh); 110 struct buffer_head **bh);
111 111
112extern struct dquot_operations ocfs2_quota_operations; 112extern const struct dquot_operations ocfs2_quota_operations;
113extern struct quota_format_type ocfs2_quota_format; 113extern struct quota_format_type ocfs2_quota_format;
114 114
115int ocfs2_quota_setup(void); 115int ocfs2_quota_setup(void);
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 44f2a5e1d04..3cf0ec0acdd 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -154,7 +154,7 @@ static int ocfs2_get_quota_block(struct inode *inode, int block,
154 err = -EIO; 154 err = -EIO;
155 mlog_errno(err); 155 mlog_errno(err);
156 } 156 }
157 return err;; 157 return err;
158} 158}
159 159
160/* Read data from global quotafile - avoid pagecache and such because we cannot 160/* Read data from global quotafile - avoid pagecache and such because we cannot
@@ -849,7 +849,7 @@ static void ocfs2_destroy_dquot(struct dquot *dquot)
849 kmem_cache_free(ocfs2_dquot_cachep, dquot); 849 kmem_cache_free(ocfs2_dquot_cachep, dquot);
850} 850}
851 851
852struct dquot_operations ocfs2_quota_operations = { 852const struct dquot_operations ocfs2_quota_operations = {
853 .initialize = dquot_initialize, 853 .initialize = dquot_initialize,
854 .drop = dquot_drop, 854 .drop = dquot_drop,
855 .alloc_space = dquot_alloc_space, 855 .alloc_space = dquot_alloc_space,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index a3f8871d21f..faca4720aa4 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -965,7 +965,7 @@ static int ocfs2_quota_off(struct super_block *sb, int type, int remount)
965 return vfs_quota_disable(sb, type, DQUOT_LIMITS_ENABLED); 965 return vfs_quota_disable(sb, type, DQUOT_LIMITS_ENABLED);
966} 966}
967 967
968static struct quotactl_ops ocfs2_quotactl_ops = { 968static const struct quotactl_ops ocfs2_quotactl_ops = {
969 .quota_on = ocfs2_quota_on, 969 .quota_on = ocfs2_quota_on,
970 .quota_off = ocfs2_quota_off, 970 .quota_off = ocfs2_quota_off,
971 .quota_sync = vfs_quota_sync, 971 .quota_sync = vfs_quota_sync,
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index c7275cfbdcf..3680bae335b 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -489,7 +489,7 @@ out:
489 return ret; 489 return ret;
490} 490}
491 491
492struct inode_operations omfs_dir_inops = { 492const struct inode_operations omfs_dir_inops = {
493 .lookup = omfs_lookup, 493 .lookup = omfs_lookup,
494 .mkdir = omfs_mkdir, 494 .mkdir = omfs_mkdir,
495 .rename = omfs_rename, 495 .rename = omfs_rename,
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index d17e774eaf4..4845fbb18e6 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -333,11 +333,11 @@ struct file_operations omfs_file_operations = {
333 .splice_read = generic_file_splice_read, 333 .splice_read = generic_file_splice_read,
334}; 334};
335 335
336struct inode_operations omfs_file_inops = { 336const struct inode_operations omfs_file_inops = {
337 .truncate = omfs_truncate 337 .truncate = omfs_truncate
338}; 338};
339 339
340struct address_space_operations omfs_aops = { 340const struct address_space_operations omfs_aops = {
341 .readpage = omfs_readpage, 341 .readpage = omfs_readpage,
342 .readpages = omfs_readpages, 342 .readpages = omfs_readpages,
343 .writepage = omfs_writepage, 343 .writepage = omfs_writepage,
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 379ae5fb441..f3b7c1541f3 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -278,7 +278,7 @@ static int omfs_statfs(struct dentry *dentry, struct kstatfs *buf)
278 return 0; 278 return 0;
279} 279}
280 280
281static struct super_operations omfs_sops = { 281static const struct super_operations omfs_sops = {
282 .write_inode = omfs_write_inode, 282 .write_inode = omfs_write_inode,
283 .delete_inode = omfs_delete_inode, 283 .delete_inode = omfs_delete_inode,
284 .put_super = omfs_put_super, 284 .put_super = omfs_put_super,
diff --git a/fs/omfs/omfs.h b/fs/omfs/omfs.h
index 2bc0f067040..df71039945a 100644
--- a/fs/omfs/omfs.h
+++ b/fs/omfs/omfs.h
@@ -45,15 +45,15 @@ extern int omfs_clear_range(struct super_block *sb, u64 block, int count);
45 45
46/* dir.c */ 46/* dir.c */
47extern struct file_operations omfs_dir_operations; 47extern struct file_operations omfs_dir_operations;
48extern struct inode_operations omfs_dir_inops; 48extern const struct inode_operations omfs_dir_inops;
49extern int omfs_make_empty(struct inode *inode, struct super_block *sb); 49extern int omfs_make_empty(struct inode *inode, struct super_block *sb);
50extern int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header, 50extern int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header,
51 u64 fsblock); 51 u64 fsblock);
52 52
53/* file.c */ 53/* file.c */
54extern struct file_operations omfs_file_operations; 54extern struct file_operations omfs_file_operations;
55extern struct inode_operations omfs_file_inops; 55extern const struct inode_operations omfs_file_inops;
56extern struct address_space_operations omfs_aops; 56extern const struct address_space_operations omfs_aops;
57extern void omfs_make_empty_table(struct buffer_head *bh, int offset); 57extern void omfs_make_empty_table(struct buffer_head *bh, int offset);
58extern int omfs_shrink_inode(struct inode *inode); 58extern int omfs_shrink_inode(struct inode *inode);
59 59
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index fbeaddf595d..7b685e10cba 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -581,7 +581,7 @@ try_scan:
581 } 581 }
582 582
583 if (from + size > get_capacity(disk)) { 583 if (from + size > get_capacity(disk)) {
584 struct block_device_operations *bdops = disk->fops; 584 const struct block_device_operations *bdops = disk->fops;
585 unsigned long long capacity; 585 unsigned long long capacity;
586 586
587 printk(KERN_WARNING 587 printk(KERN_WARNING
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6f742f6658a..55c4c805a75 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -447,7 +447,7 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
447 447
448 do_posix_clock_monotonic_gettime(&uptime); 448 do_posix_clock_monotonic_gettime(&uptime);
449 read_lock(&tasklist_lock); 449 read_lock(&tasklist_lock);
450 points = badness(task, uptime.tv_sec); 450 points = badness(task->group_leader, uptime.tv_sec);
451 read_unlock(&tasklist_lock); 451 read_unlock(&tasklist_lock);
452 return sprintf(buffer, "%lu\n", points); 452 return sprintf(buffer, "%lu\n", points);
453} 453}
@@ -999,11 +999,17 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf,
999 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 999 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
1000 char buffer[PROC_NUMBUF]; 1000 char buffer[PROC_NUMBUF];
1001 size_t len; 1001 size_t len;
1002 int oom_adjust; 1002 int oom_adjust = OOM_DISABLE;
1003 unsigned long flags;
1003 1004
1004 if (!task) 1005 if (!task)
1005 return -ESRCH; 1006 return -ESRCH;
1006 oom_adjust = task->oomkilladj; 1007
1008 if (lock_task_sighand(task, &flags)) {
1009 oom_adjust = task->signal->oom_adj;
1010 unlock_task_sighand(task, &flags);
1011 }
1012
1007 put_task_struct(task); 1013 put_task_struct(task);
1008 1014
1009 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); 1015 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
@@ -1015,32 +1021,44 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1015 size_t count, loff_t *ppos) 1021 size_t count, loff_t *ppos)
1016{ 1022{
1017 struct task_struct *task; 1023 struct task_struct *task;
1018 char buffer[PROC_NUMBUF], *end; 1024 char buffer[PROC_NUMBUF];
1019 int oom_adjust; 1025 long oom_adjust;
1026 unsigned long flags;
1027 int err;
1020 1028
1021 memset(buffer, 0, sizeof(buffer)); 1029 memset(buffer, 0, sizeof(buffer));
1022 if (count > sizeof(buffer) - 1) 1030 if (count > sizeof(buffer) - 1)
1023 count = sizeof(buffer) - 1; 1031 count = sizeof(buffer) - 1;
1024 if (copy_from_user(buffer, buf, count)) 1032 if (copy_from_user(buffer, buf, count))
1025 return -EFAULT; 1033 return -EFAULT;
1026 oom_adjust = simple_strtol(buffer, &end, 0); 1034
1035 err = strict_strtol(strstrip(buffer), 0, &oom_adjust);
1036 if (err)
1037 return -EINVAL;
1027 if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) && 1038 if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) &&
1028 oom_adjust != OOM_DISABLE) 1039 oom_adjust != OOM_DISABLE)
1029 return -EINVAL; 1040 return -EINVAL;
1030 if (*end == '\n') 1041
1031 end++;
1032 task = get_proc_task(file->f_path.dentry->d_inode); 1042 task = get_proc_task(file->f_path.dentry->d_inode);
1033 if (!task) 1043 if (!task)
1034 return -ESRCH; 1044 return -ESRCH;
1035 if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) { 1045 if (!lock_task_sighand(task, &flags)) {
1046 put_task_struct(task);
1047 return -ESRCH;
1048 }
1049
1050 if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) {
1051 unlock_task_sighand(task, &flags);
1036 put_task_struct(task); 1052 put_task_struct(task);
1037 return -EACCES; 1053 return -EACCES;
1038 } 1054 }
1039 task->oomkilladj = oom_adjust; 1055
1056 task->signal->oom_adj = oom_adjust;
1057
1058 unlock_task_sighand(task, &flags);
1040 put_task_struct(task); 1059 put_task_struct(task);
1041 if (end - buffer == 0) 1060
1042 return -EIO; 1061 return count;
1043 return end - buffer;
1044} 1062}
1045 1063
1046static const struct file_operations proc_oom_adjust_operations = { 1064static const struct file_operations proc_oom_adjust_operations = {
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 59b43a06887..f06f45b4218 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -328,43 +328,12 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
328 return -EFAULT; 328 return -EFAULT;
329 } else if (is_vmalloc_addr((void *)start)) { 329 } else if (is_vmalloc_addr((void *)start)) {
330 char * elf_buf; 330 char * elf_buf;
331 struct vm_struct *m;
332 unsigned long curstart = start;
333 unsigned long cursize = tsz;
334 331
335 elf_buf = kzalloc(tsz, GFP_KERNEL); 332 elf_buf = kzalloc(tsz, GFP_KERNEL);
336 if (!elf_buf) 333 if (!elf_buf)
337 return -ENOMEM; 334 return -ENOMEM;
338 335 vread(elf_buf, (char *)start, tsz);
339 read_lock(&vmlist_lock); 336 /* we have to zero-fill user buffer even if no read */
340 for (m=vmlist; m && cursize; m=m->next) {
341 unsigned long vmstart;
342 unsigned long vmsize;
343 unsigned long msize = m->size - PAGE_SIZE;
344
345 if (((unsigned long)m->addr + msize) <
346 curstart)
347 continue;
348 if ((unsigned long)m->addr > (curstart +
349 cursize))
350 break;
351 vmstart = (curstart < (unsigned long)m->addr ?
352 (unsigned long)m->addr : curstart);
353 if (((unsigned long)m->addr + msize) >
354 (curstart + cursize))
355 vmsize = curstart + cursize - vmstart;
356 else
357 vmsize = (unsigned long)m->addr +
358 msize - vmstart;
359 curstart = vmstart + vmsize;
360 cursize -= vmsize;
361 /* don't dump ioremap'd stuff! (TA) */
362 if (m->flags & VM_IOREMAP)
363 continue;
364 memcpy(elf_buf + (vmstart - start),
365 (char *)vmstart, vmsize);
366 }
367 read_unlock(&vmlist_lock);
368 if (copy_to_user(buffer, elf_buf, tsz)) { 337 if (copy_to_user(buffer, elf_buf, tsz)) {
369 kfree(elf_buf); 338 kfree(elf_buf);
370 return -EFAULT; 339 return -EFAULT;
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index d5c410d47fa..171e052c07b 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -81,9 +81,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
81 "Writeback: %8lu kB\n" 81 "Writeback: %8lu kB\n"
82 "AnonPages: %8lu kB\n" 82 "AnonPages: %8lu kB\n"
83 "Mapped: %8lu kB\n" 83 "Mapped: %8lu kB\n"
84 "Shmem: %8lu kB\n"
84 "Slab: %8lu kB\n" 85 "Slab: %8lu kB\n"
85 "SReclaimable: %8lu kB\n" 86 "SReclaimable: %8lu kB\n"
86 "SUnreclaim: %8lu kB\n" 87 "SUnreclaim: %8lu kB\n"
88 "KernelStack: %8lu kB\n"
87 "PageTables: %8lu kB\n" 89 "PageTables: %8lu kB\n"
88#ifdef CONFIG_QUICKLIST 90#ifdef CONFIG_QUICKLIST
89 "Quicklists: %8lu kB\n" 91 "Quicklists: %8lu kB\n"
@@ -124,10 +126,12 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
124 K(global_page_state(NR_WRITEBACK)), 126 K(global_page_state(NR_WRITEBACK)),
125 K(global_page_state(NR_ANON_PAGES)), 127 K(global_page_state(NR_ANON_PAGES)),
126 K(global_page_state(NR_FILE_MAPPED)), 128 K(global_page_state(NR_FILE_MAPPED)),
129 K(global_page_state(NR_SHMEM)),
127 K(global_page_state(NR_SLAB_RECLAIMABLE) + 130 K(global_page_state(NR_SLAB_RECLAIMABLE) +
128 global_page_state(NR_SLAB_UNRECLAIMABLE)), 131 global_page_state(NR_SLAB_UNRECLAIMABLE)),
129 K(global_page_state(NR_SLAB_RECLAIMABLE)), 132 K(global_page_state(NR_SLAB_RECLAIMABLE)),
130 K(global_page_state(NR_SLAB_UNRECLAIMABLE)), 133 K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
134 global_page_state(NR_KERNEL_STACK) * THREAD_SIZE / 1024,
131 K(global_page_state(NR_PAGETABLE)), 135 K(global_page_state(NR_PAGETABLE)),
132#ifdef CONFIG_QUICKLIST 136#ifdef CONFIG_QUICKLIST
133 K(quicklist_total_size()), 137 K(quicklist_total_size()),
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 2707c6c7a20..2281c2cbfe2 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -2,6 +2,7 @@
2#include <linux/compiler.h> 2#include <linux/compiler.h>
3#include <linux/fs.h> 3#include <linux/fs.h>
4#include <linux/init.h> 4#include <linux/init.h>
5#include <linux/ksm.h>
5#include <linux/mm.h> 6#include <linux/mm.h>
6#include <linux/mmzone.h> 7#include <linux/mmzone.h>
7#include <linux/proc_fs.h> 8#include <linux/proc_fs.h>
@@ -95,6 +96,8 @@ static const struct file_operations proc_kpagecount_operations = {
95#define KPF_UNEVICTABLE 18 96#define KPF_UNEVICTABLE 18
96#define KPF_NOPAGE 20 97#define KPF_NOPAGE 20
97 98
99#define KPF_KSM 21
100
98/* kernel hacking assistances 101/* kernel hacking assistances
99 * WARNING: subject to change, never rely on them! 102 * WARNING: subject to change, never rely on them!
100 */ 103 */
@@ -137,6 +140,8 @@ static u64 get_uflags(struct page *page)
137 u |= 1 << KPF_MMAP; 140 u |= 1 << KPF_MMAP;
138 if (PageAnon(page)) 141 if (PageAnon(page))
139 u |= 1 << KPF_ANON; 142 u |= 1 << KPF_ANON;
143 if (PageKsm(page))
144 u |= 1 << KPF_KSM;
140 145
141 /* 146 /*
142 * compound pages: export both head/tail info 147 * compound pages: export both head/tail info
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 9bd8be1d235..59e98fea34a 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -465,6 +465,10 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
465 return 0; 465 return 0;
466} 466}
467 467
468#define CLEAR_REFS_ALL 1
469#define CLEAR_REFS_ANON 2
470#define CLEAR_REFS_MAPPED 3
471
468static ssize_t clear_refs_write(struct file *file, const char __user *buf, 472static ssize_t clear_refs_write(struct file *file, const char __user *buf,
469 size_t count, loff_t *ppos) 473 size_t count, loff_t *ppos)
470{ 474{
@@ -472,13 +476,15 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
472 char buffer[PROC_NUMBUF], *end; 476 char buffer[PROC_NUMBUF], *end;
473 struct mm_struct *mm; 477 struct mm_struct *mm;
474 struct vm_area_struct *vma; 478 struct vm_area_struct *vma;
479 int type;
475 480
476 memset(buffer, 0, sizeof(buffer)); 481 memset(buffer, 0, sizeof(buffer));
477 if (count > sizeof(buffer) - 1) 482 if (count > sizeof(buffer) - 1)
478 count = sizeof(buffer) - 1; 483 count = sizeof(buffer) - 1;
479 if (copy_from_user(buffer, buf, count)) 484 if (copy_from_user(buffer, buf, count))
480 return -EFAULT; 485 return -EFAULT;
481 if (!simple_strtol(buffer, &end, 0)) 486 type = simple_strtol(buffer, &end, 0);
487 if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
482 return -EINVAL; 488 return -EINVAL;
483 if (*end == '\n') 489 if (*end == '\n')
484 end++; 490 end++;
@@ -494,9 +500,23 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
494 down_read(&mm->mmap_sem); 500 down_read(&mm->mmap_sem);
495 for (vma = mm->mmap; vma; vma = vma->vm_next) { 501 for (vma = mm->mmap; vma; vma = vma->vm_next) {
496 clear_refs_walk.private = vma; 502 clear_refs_walk.private = vma;
497 if (!is_vm_hugetlb_page(vma)) 503 if (is_vm_hugetlb_page(vma))
498 walk_page_range(vma->vm_start, vma->vm_end, 504 continue;
499 &clear_refs_walk); 505 /*
506 * Writing 1 to /proc/pid/clear_refs affects all pages.
507 *
508 * Writing 2 to /proc/pid/clear_refs only affects
509 * Anonymous pages.
510 *
511 * Writing 3 to /proc/pid/clear_refs only affects file
512 * mapped pages.
513 */
514 if (type == CLEAR_REFS_ANON && vma->vm_file)
515 continue;
516 if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
517 continue;
518 walk_page_range(vma->vm_start, vma->vm_end,
519 &clear_refs_walk);
500 } 520 }
501 flush_tlb_mm(mm); 521 flush_tlb_mm(mm);
502 up_read(&mm->mmap_sem); 522 up_read(&mm->mmap_sem);
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 38f7bd559f3..39b49c42a7e 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1839,7 +1839,7 @@ EXPORT_SYMBOL(dquot_commit_info);
1839/* 1839/*
1840 * Definitions of diskquota operations. 1840 * Definitions of diskquota operations.
1841 */ 1841 */
1842struct dquot_operations dquot_operations = { 1842const struct dquot_operations dquot_operations = {
1843 .initialize = dquot_initialize, 1843 .initialize = dquot_initialize,
1844 .drop = dquot_drop, 1844 .drop = dquot_drop,
1845 .alloc_space = dquot_alloc_space, 1845 .alloc_space = dquot_alloc_space,
@@ -2461,7 +2461,7 @@ out:
2461} 2461}
2462EXPORT_SYMBOL(vfs_set_dqinfo); 2462EXPORT_SYMBOL(vfs_set_dqinfo);
2463 2463
2464struct quotactl_ops vfs_quotactl_ops = { 2464const struct quotactl_ops vfs_quotactl_ops = {
2465 .quota_on = vfs_quota_on, 2465 .quota_on = vfs_quota_on,
2466 .quota_off = vfs_quota_off, 2466 .quota_off = vfs_quota_off,
2467 .quota_sync = vfs_quota_sync, 2467 .quota_sync = vfs_quota_sync,
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 7adea74d6a8..f0ad05f3802 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -612,7 +612,7 @@ static int reiserfs_mark_dquot_dirty(struct dquot *);
612static int reiserfs_write_info(struct super_block *, int); 612static int reiserfs_write_info(struct super_block *, int);
613static int reiserfs_quota_on(struct super_block *, int, int, char *, int); 613static int reiserfs_quota_on(struct super_block *, int, int, char *, int);
614 614
615static struct dquot_operations reiserfs_quota_operations = { 615static const struct dquot_operations reiserfs_quota_operations = {
616 .initialize = dquot_initialize, 616 .initialize = dquot_initialize,
617 .drop = dquot_drop, 617 .drop = dquot_drop,
618 .alloc_space = dquot_alloc_space, 618 .alloc_space = dquot_alloc_space,
@@ -629,7 +629,7 @@ static struct dquot_operations reiserfs_quota_operations = {
629 .destroy_dquot = dquot_destroy, 629 .destroy_dquot = dquot_destroy,
630}; 630};
631 631
632static struct quotactl_ops reiserfs_qctl_operations = { 632static const struct quotactl_ops reiserfs_qctl_operations = {
633 .quota_on = reiserfs_quota_on, 633 .quota_on = reiserfs_quota_on,
634 .quota_off = vfs_quota_off, 634 .quota_off = vfs_quota_off,
635 .quota_sync = vfs_quota_sync, 635 .quota_sync = vfs_quota_sync,
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 4ab3c03d8f9..47f132df0c3 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -284,7 +284,7 @@ static const struct file_operations romfs_dir_operations = {
284 .readdir = romfs_readdir, 284 .readdir = romfs_readdir,
285}; 285};
286 286
287static struct inode_operations romfs_dir_inode_operations = { 287static const struct inode_operations romfs_dir_inode_operations = {
288 .lookup = romfs_lookup, 288 .lookup = romfs_lookup,
289}; 289};
290 290
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index cb5fc57e370..6c197ef53ad 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -44,7 +44,7 @@
44#include "squashfs.h" 44#include "squashfs.h"
45 45
46static struct file_system_type squashfs_fs_type; 46static struct file_system_type squashfs_fs_type;
47static struct super_operations squashfs_super_ops; 47static const struct super_operations squashfs_super_ops;
48 48
49static int supported_squashfs_filesystem(short major, short minor, short comp) 49static int supported_squashfs_filesystem(short major, short minor, short comp)
50{ 50{
@@ -444,7 +444,7 @@ static struct file_system_type squashfs_fs_type = {
444 .fs_flags = FS_REQUIRES_DEV 444 .fs_flags = FS_REQUIRES_DEV
445}; 445};
446 446
447static struct super_operations squashfs_super_ops = { 447static const struct super_operations squashfs_super_ops = {
448 .alloc_inode = squashfs_alloc_inode, 448 .alloc_inode = squashfs_alloc_inode,
449 .destroy_inode = squashfs_destroy_inode, 449 .destroy_inode = squashfs_destroy_inode,
450 .statfs = squashfs_statfs, 450 .statfs = squashfs_statfs,
diff --git a/fs/super.c b/fs/super.c
index b03fea8fbfb..0e7207b9815 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -54,7 +54,7 @@ DEFINE_SPINLOCK(sb_lock);
54static struct super_block *alloc_super(struct file_system_type *type) 54static struct super_block *alloc_super(struct file_system_type *type)
55{ 55{
56 struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); 56 struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER);
57 static struct super_operations default_op; 57 static const struct super_operations default_op;
58 58
59 if (s) { 59 if (s) {
60 if (security_sb_alloc(s)) { 60 if (security_sb_alloc(s)) {
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 7998cc37825..195830f4756 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -79,7 +79,7 @@ enum {
79}; 79};
80 80
81static const struct inode_operations none_inode_operations; 81static const struct inode_operations none_inode_operations;
82static struct address_space_operations none_address_operations; 82static const struct address_space_operations none_address_operations;
83static const struct file_operations none_file_operations; 83static const struct file_operations none_file_operations;
84 84
85/** 85/**
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index cb6e2cca214..9e41f91aa26 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -150,7 +150,7 @@ xfs_fs_set_xquota(
150 return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq); 150 return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq);
151} 151}
152 152
153struct quotactl_ops xfs_quotactl_operations = { 153const struct quotactl_ops xfs_quotactl_operations = {
154 .quota_sync = xfs_fs_quota_sync, 154 .quota_sync = xfs_fs_quota_sync,
155 .get_xstate = xfs_fs_get_xstate, 155 .get_xstate = xfs_fs_get_xstate,
156 .set_xstate = xfs_fs_set_xstate, 156 .set_xstate = xfs_fs_set_xstate,
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 5d7c60ac77b..bdd41c8c342 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -67,7 +67,7 @@
67#include <linux/freezer.h> 67#include <linux/freezer.h>
68#include <linux/parser.h> 68#include <linux/parser.h>
69 69
70static struct super_operations xfs_super_operations; 70static const struct super_operations xfs_super_operations;
71static kmem_zone_t *xfs_ioend_zone; 71static kmem_zone_t *xfs_ioend_zone;
72mempool_t *xfs_ioend_pool; 72mempool_t *xfs_ioend_pool;
73 73
@@ -1536,7 +1536,7 @@ xfs_fs_get_sb(
1536 mnt); 1536 mnt);
1537} 1537}
1538 1538
1539static struct super_operations xfs_super_operations = { 1539static const struct super_operations xfs_super_operations = {
1540 .alloc_inode = xfs_fs_alloc_inode, 1540 .alloc_inode = xfs_fs_alloc_inode,
1541 .destroy_inode = xfs_fs_destroy_inode, 1541 .destroy_inode = xfs_fs_destroy_inode,
1542 .write_inode = xfs_fs_write_inode, 1542 .write_inode = xfs_fs_write_inode,
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 5a2ea3a2178..18175ebd58e 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -93,7 +93,7 @@ extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
93 93
94extern const struct export_operations xfs_export_operations; 94extern const struct export_operations xfs_export_operations;
95extern struct xattr_handler *xfs_xattr_handlers[]; 95extern struct xattr_handler *xfs_xattr_handlers[];
96extern struct quotactl_ops xfs_quotactl_operations; 96extern const struct quotactl_ops xfs_quotactl_operations;
97 97
98#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) 98#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info))
99 99
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index c4ea51b55dc..f52ac276277 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -117,7 +117,7 @@ struct getbmapx {
117#define BMV_IF_VALID \ 117#define BMV_IF_VALID \
118 (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|BMV_IF_DELALLOC) 118 (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|BMV_IF_DELALLOC)
119 119
120/* bmv_oflags values - returned for for each non-header segment */ 120/* bmv_oflags values - returned for each non-header segment */
121#define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ 121#define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */
122#define BMV_OF_DELALLOC 0x2 /* segment = delayed allocation */ 122#define BMV_OF_DELALLOC 0x2 /* segment = delayed allocation */
123#define BMV_OF_LAST 0x4 /* segment is the last in the file */ 123#define BMV_OF_LAST 0x4 /* segment is the last in the file */
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 37ba576d06e..8052236d1a3 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -288,7 +288,7 @@ typedef u32 acpi_physical_address;
288/* 288/*
289 * Some compilers complain about unused variables. Sometimes we don't want to 289 * Some compilers complain about unused variables. Sometimes we don't want to
290 * use all the variables (for example, _acpi_module_name). This allows us 290 * use all the variables (for example, _acpi_module_name). This allows us
291 * to to tell the compiler in a per-variable manner that a variable 291 * to tell the compiler in a per-variable manner that a variable
292 * is unused 292 * is unused
293 */ 293 */
294#ifndef ACPI_UNUSED_VAR 294#ifndef ACPI_UNUSED_VAR
diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h
index 935c5d7fc86..6aadbf84ae7 100644
--- a/include/acpi/platform/acgcc.h
+++ b/include/acpi/platform/acgcc.h
@@ -57,7 +57,7 @@
57/* 57/*
58 * Some compilers complain about unused variables. Sometimes we don't want to 58 * Some compilers complain about unused variables. Sometimes we don't want to
59 * use all the variables (for example, _acpi_module_name). This allows us 59 * use all the variables (for example, _acpi_module_name). This allows us
60 * to to tell the compiler warning in a per-variable manner that a variable 60 * to tell the compiler warning in a per-variable manner that a variable
61 * is unused. 61 * is unused.
62 */ 62 */
63#define ACPI_UNUSED_VAR __attribute__ ((unused)) 63#define ACPI_UNUSED_VAR __attribute__ ((unused))
diff --git a/include/asm-generic/mman-common.h b/include/asm-generic/mman-common.h
index 3b69ad34189..dd63bd38864 100644
--- a/include/asm-generic/mman-common.h
+++ b/include/asm-generic/mman-common.h
@@ -35,6 +35,9 @@
35#define MADV_DONTFORK 10 /* don't inherit across fork */ 35#define MADV_DONTFORK 10 /* don't inherit across fork */
36#define MADV_DOFORK 11 /* do inherit across fork */ 36#define MADV_DOFORK 11 /* do inherit across fork */
37 37
38#define MADV_MERGEABLE 12 /* KSM may merge identical pages */
39#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */
40
38/* compatibility flags */ 41/* compatibility flags */
39#define MAP_FILE 0 42#define MAP_FILE 0
40 43
diff --git a/include/asm-generic/mman.h b/include/asm-generic/mman.h
index 7cab4de2bca..32c8bd6a196 100644
--- a/include/asm-generic/mman.h
+++ b/include/asm-generic/mman.h
@@ -11,6 +11,7 @@
11#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ 11#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
12#define MAP_NONBLOCK 0x10000 /* do not block on IO */ 12#define MAP_NONBLOCK 0x10000 /* do not block on IO */
13#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ 13#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */
14#define MAP_HUGETLB 0x40000 /* create a huge page mapping */
14 15
15#define MCL_CURRENT 1 /* lock all current mappings */ 16#define MCL_CURRENT 1 /* lock all current mappings */
16#define MCL_FUTURE 2 /* lock all future mappings */ 17#define MCL_FUTURE 2 /* lock all future mappings */
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index bc3ab707369..dd97fb8408a 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -132,9 +132,6 @@ static inline void *alloc_remap(int nid, unsigned long size)
132} 132}
133#endif /* CONFIG_HAVE_ARCH_ALLOC_REMAP */ 133#endif /* CONFIG_HAVE_ARCH_ALLOC_REMAP */
134 134
135extern unsigned long __meminitdata nr_kernel_pages;
136extern unsigned long __meminitdata nr_all_pages;
137
138extern void *alloc_large_system_hash(const char *tablename, 135extern void *alloc_large_system_hash(const char *tablename,
139 unsigned long bucketsize, 136 unsigned long bucketsize,
140 unsigned long numentries, 137 unsigned long numentries,
@@ -145,6 +142,8 @@ extern void *alloc_large_system_hash(const char *tablename,
145 unsigned long limit); 142 unsigned long limit);
146 143
147#define HASH_EARLY 0x00000001 /* Allocating during early boot? */ 144#define HASH_EARLY 0x00000001 /* Allocating during early boot? */
145#define HASH_SMALL 0x00000002 /* sub-page allocation allowed, min
146 * shift passed via *_hash_shift */
148 147
149/* Only NUMA needs hash distribution. 64bit NUMA architectures have 148/* Only NUMA needs hash distribution. 64bit NUMA architectures have
150 * sufficient vmalloc space. 149 * sufficient vmalloc space.
diff --git a/include/linux/capability.h b/include/linux/capability.h
index c3021105edc..c8f2a5f70ed 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -7,7 +7,7 @@
7 * 7 *
8 * See here for the libcap library ("POSIX draft" compliance): 8 * See here for the libcap library ("POSIX draft" compliance):
9 * 9 *
10 * ftp://linux.kernel.org/pub/linux/libs/security/linux-privs/kernel-2.6/ 10 * ftp://www.kernel.org/pub/linux/libs/security/linux-privs/kernel-2.6/
11 */ 11 */
12 12
13#ifndef _LINUX_CAPABILITY_H 13#ifndef _LINUX_CAPABILITY_H
diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h
index 45ff1849151..1d747f72298 100644
--- a/include/linux/flex_array.h
+++ b/include/linux/flex_array.h
@@ -31,10 +31,32 @@ struct flex_array {
31 }; 31 };
32}; 32};
33 33
34#define FLEX_ARRAY_INIT(size, total) { { {\ 34/* Number of bytes left in base struct flex_array, excluding metadata */
35 .element_size = (size), \ 35#define FLEX_ARRAY_BASE_BYTES_LEFT \
36 .total_nr_elements = (total), \ 36 (FLEX_ARRAY_BASE_SIZE - offsetof(struct flex_array, parts))
37} } } 37
38/* Number of pointers in base to struct flex_array_part pages */
39#define FLEX_ARRAY_NR_BASE_PTRS \
40 (FLEX_ARRAY_BASE_BYTES_LEFT / sizeof(struct flex_array_part *))
41
42/* Number of elements of size that fit in struct flex_array_part */
43#define FLEX_ARRAY_ELEMENTS_PER_PART(size) \
44 (FLEX_ARRAY_PART_SIZE / size)
45
46/*
47 * Defines a statically allocated flex array and ensures its parameters are
48 * valid.
49 */
50#define DEFINE_FLEX_ARRAY(__arrayname, __element_size, __total) \
51 struct flex_array __arrayname = { { { \
52 .element_size = (__element_size), \
53 .total_nr_elements = (__total), \
54 } } }; \
55 static inline void __arrayname##_invalid_parameter(void) \
56 { \
57 BUILD_BUG_ON((__total) > FLEX_ARRAY_NR_BASE_PTRS * \
58 FLEX_ARRAY_ELEMENTS_PER_PART(__element_size)); \
59 }
38 60
39struct flex_array *flex_array_alloc(int element_size, unsigned int total, 61struct flex_array *flex_array_alloc(int element_size, unsigned int total,
40 gfp_t flags); 62 gfp_t flags);
@@ -44,6 +66,8 @@ void flex_array_free(struct flex_array *fa);
44void flex_array_free_parts(struct flex_array *fa); 66void flex_array_free_parts(struct flex_array *fa);
45int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, 67int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
46 gfp_t flags); 68 gfp_t flags);
69int flex_array_clear(struct flex_array *fa, unsigned int element_nr);
47void *flex_array_get(struct flex_array *fa, unsigned int element_nr); 70void *flex_array_get(struct flex_array *fa, unsigned int element_nr);
71int flex_array_shrink(struct flex_array *fa);
48 72
49#endif /* _FLEX_ARRAY_H */ 73#endif /* _FLEX_ARRAY_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 90162fb3bf0..51803528b09 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1066,8 +1066,8 @@ struct file_lock {
1066 struct fasync_struct * fl_fasync; /* for lease break notifications */ 1066 struct fasync_struct * fl_fasync; /* for lease break notifications */
1067 unsigned long fl_break_time; /* for nonblocking lease breaks */ 1067 unsigned long fl_break_time; /* for nonblocking lease breaks */
1068 1068
1069 struct file_lock_operations *fl_ops; /* Callbacks for filesystems */ 1069 const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */
1070 struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */ 1070 const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */
1071 union { 1071 union {
1072 struct nfs_lock_info nfs_fl; 1072 struct nfs_lock_info nfs_fl;
1073 struct nfs4_lock_info nfs4_fl; 1073 struct nfs4_lock_info nfs4_fl;
@@ -1318,8 +1318,8 @@ struct super_block {
1318 unsigned long long s_maxbytes; /* Max file size */ 1318 unsigned long long s_maxbytes; /* Max file size */
1319 struct file_system_type *s_type; 1319 struct file_system_type *s_type;
1320 const struct super_operations *s_op; 1320 const struct super_operations *s_op;
1321 struct dquot_operations *dq_op; 1321 const struct dquot_operations *dq_op;
1322 struct quotactl_ops *s_qcop; 1322 const struct quotactl_ops *s_qcop;
1323 const struct export_operations *s_export_op; 1323 const struct export_operations *s_export_op;
1324 unsigned long s_flags; 1324 unsigned long s_flags;
1325 unsigned long s_magic; 1325 unsigned long s_magic;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 109d179adb9..297df45ffd0 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -151,7 +151,7 @@ struct gendisk {
151 struct disk_part_tbl *part_tbl; 151 struct disk_part_tbl *part_tbl;
152 struct hd_struct part0; 152 struct hd_struct part0;
153 153
154 struct block_device_operations *fops; 154 const struct block_device_operations *fops;
155 struct request_queue *queue; 155 struct request_queue *queue;
156 void *private_data; 156 void *private_data;
157 157
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 7c777a0da17..f53e9b868c2 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -326,7 +326,6 @@ void free_pages_exact(void *virt, size_t size);
326extern void __free_pages(struct page *page, unsigned int order); 326extern void __free_pages(struct page *page, unsigned int order);
327extern void free_pages(unsigned long addr, unsigned int order); 327extern void free_pages(unsigned long addr, unsigned int order);
328extern void free_hot_page(struct page *page); 328extern void free_hot_page(struct page *page);
329extern void free_cold_page(struct page *page);
330 329
331#define __free_page(page) __free_pages((page), 0) 330#define __free_page(page) __free_pages((page), 0)
332#define free_page(addr) free_pages((addr),0) 331#define free_page(addr) free_pages((addr),0)
@@ -336,18 +335,6 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
336void drain_all_pages(void); 335void drain_all_pages(void);
337void drain_local_pages(void *dummy); 336void drain_local_pages(void *dummy);
338 337
339extern bool oom_killer_disabled;
340
341static inline void oom_killer_disable(void)
342{
343 oom_killer_disabled = true;
344}
345
346static inline void oom_killer_enable(void)
347{
348 oom_killer_disabled = false;
349}
350
351extern gfp_t gfp_allowed_mask; 338extern gfp_t gfp_allowed_mask;
352 339
353static inline void set_gfp_allowed_mask(gfp_t mask) 340static inline void set_gfp_allowed_mask(gfp_t mask)
diff --git a/include/linux/hid.h b/include/linux/hid.h
index a0ebdace7ba..10f62841674 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -494,6 +494,7 @@ struct hid_device { /* device report descriptor */
494 494
495 /* hiddev event handler */ 495 /* hiddev event handler */
496 int (*hiddev_connect)(struct hid_device *, unsigned int); 496 int (*hiddev_connect)(struct hid_device *, unsigned int);
497 void (*hiddev_disconnect)(struct hid_device *);
497 void (*hiddev_hid_event) (struct hid_device *, struct hid_field *field, 498 void (*hiddev_hid_event) (struct hid_device *, struct hid_field *field,
498 struct hid_usage *, __s32); 499 struct hid_usage *, __s32);
499 void (*hiddev_report_event) (struct hid_device *, struct hid_report *); 500 void (*hiddev_report_event) (struct hid_device *, struct hid_report *);
@@ -691,6 +692,7 @@ struct hid_device *hid_allocate_device(void);
691int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size); 692int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size);
692int hid_check_keys_pressed(struct hid_device *hid); 693int hid_check_keys_pressed(struct hid_device *hid);
693int hid_connect(struct hid_device *hid, unsigned int connect_mask); 694int hid_connect(struct hid_device *hid, unsigned int connect_mask);
695void hid_disconnect(struct hid_device *hid);
694 696
695/** 697/**
696 * hid_map_usage - map usage input bits 698 * hid_map_usage - map usage input bits
@@ -800,6 +802,7 @@ static inline int __must_check hid_hw_start(struct hid_device *hdev,
800 */ 802 */
801static inline void hid_hw_stop(struct hid_device *hdev) 803static inline void hid_hw_stop(struct hid_device *hdev)
802{ 804{
805 hid_disconnect(hdev);
803 hdev->ll_driver->stop(hdev); 806 hdev->ll_driver->stop(hdev);
804} 807}
805 808
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 5cbc620bdfe..176e7ee73ef 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -24,7 +24,9 @@ int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *
24int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); 24int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
25int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); 25int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
26int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); 26int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
27int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int, int); 27int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
28 struct page **, struct vm_area_struct **,
29 unsigned long *, int *, int, unsigned int flags);
28void unmap_hugepage_range(struct vm_area_struct *, 30void unmap_hugepage_range(struct vm_area_struct *,
29 unsigned long, unsigned long, struct page *); 31 unsigned long, unsigned long, struct page *);
30void __unmap_hugepage_range(struct vm_area_struct *, 32void __unmap_hugepage_range(struct vm_area_struct *,
@@ -110,6 +112,21 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)
110 112
111#endif /* !CONFIG_HUGETLB_PAGE */ 113#endif /* !CONFIG_HUGETLB_PAGE */
112 114
115#define HUGETLB_ANON_FILE "anon_hugepage"
116
117enum {
118 /*
119 * The file will be used as an shm file so shmfs accounting rules
120 * apply
121 */
122 HUGETLB_SHMFS_INODE = 1,
123 /*
124 * The file is being created on the internal vfs mount and shmfs
125 * accounting rules do not apply
126 */
127 HUGETLB_ANONHUGE_INODE = 2,
128};
129
113#ifdef CONFIG_HUGETLBFS 130#ifdef CONFIG_HUGETLBFS
114struct hugetlbfs_config { 131struct hugetlbfs_config {
115 uid_t uid; 132 uid_t uid;
@@ -148,7 +165,7 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
148extern const struct file_operations hugetlbfs_file_operations; 165extern const struct file_operations hugetlbfs_file_operations;
149extern struct vm_operations_struct hugetlb_vm_ops; 166extern struct vm_operations_struct hugetlb_vm_ops;
150struct file *hugetlb_file_setup(const char *name, size_t size, int acct, 167struct file *hugetlb_file_setup(const char *name, size_t size, int acct,
151 struct user_struct **user); 168 struct user_struct **user, int creat_flags);
152int hugetlb_get_quota(struct address_space *mapping, long delta); 169int hugetlb_get_quota(struct address_space *mapping, long delta);
153void hugetlb_put_quota(struct address_space *mapping, long delta); 170void hugetlb_put_quota(struct address_space *mapping, long delta);
154 171
@@ -170,7 +187,7 @@ static inline void set_file_hugepages(struct file *file)
170 187
171#define is_file_hugepages(file) 0 188#define is_file_hugepages(file) 0
172#define set_file_hugepages(file) BUG() 189#define set_file_hugepages(file) BUG()
173#define hugetlb_file_setup(name,size,acct,user) ERR_PTR(-ENOSYS) 190#define hugetlb_file_setup(name,size,acct,user,creat) ERR_PTR(-ENOSYS)
174 191
175#endif /* !CONFIG_HUGETLBFS */ 192#endif /* !CONFIG_HUGETLBFS */
176 193
@@ -185,7 +202,8 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
185#define HSTATE_NAME_LEN 32 202#define HSTATE_NAME_LEN 32
186/* Defines one hugetlb page size */ 203/* Defines one hugetlb page size */
187struct hstate { 204struct hstate {
188 int hugetlb_next_nid; 205 int next_nid_to_alloc;
206 int next_nid_to_free;
189 unsigned int order; 207 unsigned int order;
190 unsigned long mask; 208 unsigned long mask;
191 unsigned long max_huge_pages; 209 unsigned long max_huge_pages;
diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
index dc2fd545db0..c8006607f94 100644
--- a/include/linux/kmemcheck.h
+++ b/include/linux/kmemcheck.h
@@ -144,7 +144,10 @@ static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
144 int name##_end[0]; 144 int name##_end[0];
145 145
146#define kmemcheck_annotate_bitfield(ptr, name) \ 146#define kmemcheck_annotate_bitfield(ptr, name) \
147 do if (ptr) { \ 147 do { \
148 if (!ptr) \
149 break; \
150 \
148 int _n = (long) &((ptr)->name##_end) \ 151 int _n = (long) &((ptr)->name##_end) \
149 - (long) &((ptr)->name##_begin); \ 152 - (long) &((ptr)->name##_begin); \
150 BUILD_BUG_ON(_n < 0); \ 153 BUILD_BUG_ON(_n < 0); \
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
new file mode 100644
index 00000000000..a485c14ecd5
--- /dev/null
+++ b/include/linux/ksm.h
@@ -0,0 +1,79 @@
1#ifndef __LINUX_KSM_H
2#define __LINUX_KSM_H
3/*
4 * Memory merging support.
5 *
6 * This code enables dynamic sharing of identical pages found in different
7 * memory areas, even if they are not shared by fork().
8 */
9
10#include <linux/bitops.h>
11#include <linux/mm.h>
12#include <linux/sched.h>
13#include <linux/vmstat.h>
14
15#ifdef CONFIG_KSM
16int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
17 unsigned long end, int advice, unsigned long *vm_flags);
18int __ksm_enter(struct mm_struct *mm);
19void __ksm_exit(struct mm_struct *mm);
20
21static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
22{
23 if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
24 return __ksm_enter(mm);
25 return 0;
26}
27
28static inline void ksm_exit(struct mm_struct *mm)
29{
30 if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
31 __ksm_exit(mm);
32}
33
34/*
35 * A KSM page is one of those write-protected "shared pages" or "merged pages"
36 * which KSM maps into multiple mms, wherever identical anonymous page content
37 * is found in VM_MERGEABLE vmas. It's a PageAnon page, with NULL anon_vma.
38 */
39static inline int PageKsm(struct page *page)
40{
41 return ((unsigned long)page->mapping == PAGE_MAPPING_ANON);
42}
43
44/*
45 * But we have to avoid the checking which page_add_anon_rmap() performs.
46 */
47static inline void page_add_ksm_rmap(struct page *page)
48{
49 if (atomic_inc_and_test(&page->_mapcount)) {
50 page->mapping = (void *) PAGE_MAPPING_ANON;
51 __inc_zone_page_state(page, NR_ANON_PAGES);
52 }
53}
54#else /* !CONFIG_KSM */
55
56static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
57 unsigned long end, int advice, unsigned long *vm_flags)
58{
59 return 0;
60}
61
62static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
63{
64 return 0;
65}
66
67static inline void ksm_exit(struct mm_struct *mm)
68{
69}
70
71static inline int PageKsm(struct page *page)
72{
73 return 0;
74}
75
76/* No stub required for page_add_ksm_rmap(page) */
77#endif /* !CONFIG_KSM */
78
79#endif
diff --git a/include/linux/lis3lv02d.h b/include/linux/lis3lv02d.h
index ad651f4e45a..3cc2f2c53e4 100644
--- a/include/linux/lis3lv02d.h
+++ b/include/linux/lis3lv02d.h
@@ -32,8 +32,17 @@ struct lis3lv02d_platform_data {
32#define LIS3_IRQ2_DATA_READY (4 << 3) 32#define LIS3_IRQ2_DATA_READY (4 << 3)
33#define LIS3_IRQ2_CLICK (7 << 3) 33#define LIS3_IRQ2_CLICK (7 << 3)
34#define LIS3_IRQ_OPEN_DRAIN (1 << 6) 34#define LIS3_IRQ_OPEN_DRAIN (1 << 6)
35#define LIS3_IRQ_ACTIVE_HIGH (1 << 7) 35#define LIS3_IRQ_ACTIVE_LOW (1 << 7)
36 unsigned char irq_cfg; 36 unsigned char irq_cfg;
37
38#define LIS3_WAKEUP_X_LO (1 << 0)
39#define LIS3_WAKEUP_X_HI (1 << 1)
40#define LIS3_WAKEUP_Y_LO (1 << 2)
41#define LIS3_WAKEUP_Y_HI (1 << 3)
42#define LIS3_WAKEUP_Z_LO (1 << 4)
43#define LIS3_WAKEUP_Z_HI (1 << 5)
44 unsigned char wakeup_flags;
45 unsigned char wakeup_thresh;
37}; 46};
38 47
39#endif /* __LIS3LV02D_H_ */ 48#endif /* __LIS3LV02D_H_ */
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index c325b187966..a34dea46b62 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -338,49 +338,6 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp)
338 } 338 }
339} 339}
340 340
341static inline int __nlm_cmp_addr4(const struct sockaddr *sap1,
342 const struct sockaddr *sap2)
343{
344 const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1;
345 const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2;
346 return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
347}
348
349#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
350static inline int __nlm_cmp_addr6(const struct sockaddr *sap1,
351 const struct sockaddr *sap2)
352{
353 const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1;
354 const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2;
355 return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr);
356}
357#else /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
358static inline int __nlm_cmp_addr6(const struct sockaddr *sap1,
359 const struct sockaddr *sap2)
360{
361 return 0;
362}
363#endif /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
364
365/*
366 * Compare two host addresses
367 *
368 * Return TRUE if the addresses are the same; otherwise FALSE.
369 */
370static inline int nlm_cmp_addr(const struct sockaddr *sap1,
371 const struct sockaddr *sap2)
372{
373 if (sap1->sa_family == sap2->sa_family) {
374 switch (sap1->sa_family) {
375 case AF_INET:
376 return __nlm_cmp_addr4(sap1, sap2);
377 case AF_INET6:
378 return __nlm_cmp_addr6(sap1, sap2);
379 }
380 }
381 return 0;
382}
383
384/* 341/*
385 * Compare two NLM locks. 342 * Compare two NLM locks.
386 * When the second lock is of type F_UNLCK, this acts like a wildcard. 343 * When the second lock is of type F_UNLCK, this acts like a wildcard.
@@ -395,7 +352,7 @@ static inline int nlm_compare_locks(const struct file_lock *fl1,
395 &&(fl1->fl_type == fl2->fl_type || fl2->fl_type == F_UNLCK); 352 &&(fl1->fl_type == fl2->fl_type || fl2->fl_type == F_UNLCK);
396} 353}
397 354
398extern struct lock_manager_operations nlmsvc_lock_operations; 355extern const struct lock_manager_operations nlmsvc_lock_operations;
399 356
400#endif /* __KERNEL__ */ 357#endif /* __KERNEL__ */
401 358
diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
index 536ca12442c..78c3bed1c3f 100644
--- a/include/linux/mISDNif.h
+++ b/include/linux/mISDNif.h
@@ -104,7 +104,7 @@
104#define DL_UNITDATA_IND 0x3108 104#define DL_UNITDATA_IND 0x3108
105#define DL_INFORMATION_IND 0x0008 105#define DL_INFORMATION_IND 0x0008
106 106
107/* intern layer 2 managment */ 107/* intern layer 2 management */
108#define MDL_ASSIGN_REQ 0x1804 108#define MDL_ASSIGN_REQ 0x1804
109#define MDL_ASSIGN_IND 0x1904 109#define MDL_ASSIGN_IND 0x1904
110#define MDL_REMOVE_REQ 0x1A04 110#define MDL_REMOVE_REQ 0x1A04
diff --git a/include/linux/mempool.h b/include/linux/mempool.h
index 9be484d1128..7c08052e332 100644
--- a/include/linux/mempool.h
+++ b/include/linux/mempool.h
@@ -47,22 +47,16 @@ mempool_create_slab_pool(int min_nr, struct kmem_cache *kc)
47} 47}
48 48
49/* 49/*
50 * 2 mempool_alloc_t's and a mempool_free_t to kmalloc/kzalloc and kfree 50 * a mempool_alloc_t and a mempool_free_t to kmalloc and kfree the
51 * the amount of memory specified by pool_data 51 * amount of memory specified by pool_data
52 */ 52 */
53void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data); 53void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data);
54void *mempool_kzalloc(gfp_t gfp_mask, void *pool_data);
55void mempool_kfree(void *element, void *pool_data); 54void mempool_kfree(void *element, void *pool_data);
56static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size) 55static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size)
57{ 56{
58 return mempool_create(min_nr, mempool_kmalloc, mempool_kfree, 57 return mempool_create(min_nr, mempool_kmalloc, mempool_kfree,
59 (void *) size); 58 (void *) size);
60} 59}
61static inline mempool_t *mempool_create_kzalloc_pool(int min_nr, size_t size)
62{
63 return mempool_create(min_nr, mempool_kzalloc, mempool_kfree,
64 (void *) size);
65}
66 60
67/* 61/*
68 * A mempool_alloc_t and mempool_free_t for a simple page allocator that 62 * A mempool_alloc_t and mempool_free_t for a simple page allocator that
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9a72cc78e6b..5946e2ff9fe 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -25,6 +25,7 @@ extern unsigned long max_mapnr;
25#endif 25#endif
26 26
27extern unsigned long num_physpages; 27extern unsigned long num_physpages;
28extern unsigned long totalram_pages;
28extern void * high_memory; 29extern void * high_memory;
29extern int page_cluster; 30extern int page_cluster;
30 31
@@ -103,6 +104,7 @@ extern unsigned int kobjsize(const void *objp);
103#define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ 104#define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */
104#define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */ 105#define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */
105#define VM_PFN_AT_MMAP 0x40000000 /* PFNMAP vma that is fully mapped at mmap time */ 106#define VM_PFN_AT_MMAP 0x40000000 /* PFNMAP vma that is fully mapped at mmap time */
107#define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */
106 108
107#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ 109#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
108#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS 110#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
@@ -700,17 +702,8 @@ extern void pagefault_out_of_memory(void);
700 702
701extern void show_free_areas(void); 703extern void show_free_areas(void);
702 704
703#ifdef CONFIG_SHMEM 705int shmem_lock(struct file *file, int lock, struct user_struct *user);
704extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
705#else
706static inline int shmem_lock(struct file *file, int lock,
707 struct user_struct *user)
708{
709 return 0;
710}
711#endif
712struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags); 706struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags);
713
714int shmem_zero_setup(struct vm_area_struct *); 707int shmem_zero_setup(struct vm_area_struct *);
715 708
716#ifndef CONFIG_MMU 709#ifndef CONFIG_MMU
@@ -815,6 +808,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
815 struct page **pages, struct vm_area_struct **vmas); 808 struct page **pages, struct vm_area_struct **vmas);
816int get_user_pages_fast(unsigned long start, int nr_pages, int write, 809int get_user_pages_fast(unsigned long start, int nr_pages, int write,
817 struct page **pages); 810 struct page **pages);
811struct page *get_dump_page(unsigned long addr);
818 812
819extern int try_to_release_page(struct page * page, gfp_t gfp_mask); 813extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
820extern void do_invalidatepage(struct page *page, unsigned long offset); 814extern void do_invalidatepage(struct page *page, unsigned long offset);
@@ -1058,6 +1052,8 @@ extern void setup_per_cpu_pageset(void);
1058static inline void setup_per_cpu_pageset(void) {} 1052static inline void setup_per_cpu_pageset(void) {}
1059#endif 1053#endif
1060 1054
1055extern void zone_pcp_update(struct zone *zone);
1056
1061/* nommu.c */ 1057/* nommu.c */
1062extern atomic_long_t mmap_pages_allocated; 1058extern atomic_long_t mmap_pages_allocated;
1063 1059
@@ -1226,7 +1222,8 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
1226#define FOLL_WRITE 0x01 /* check pte is writable */ 1222#define FOLL_WRITE 0x01 /* check pte is writable */
1227#define FOLL_TOUCH 0x02 /* mark page accessed */ 1223#define FOLL_TOUCH 0x02 /* mark page accessed */
1228#define FOLL_GET 0x04 /* do get_page on page */ 1224#define FOLL_GET 0x04 /* do get_page on page */
1229#define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */ 1225#define FOLL_DUMP 0x08 /* give error on hole if it would be zero */
1226#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */
1230 1227
1231typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, 1228typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
1232 void *data); 1229 void *data);
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 7fbb9726755..8835b877b8d 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -5,7 +5,7 @@
5 * page_is_file_cache - should the page be on a file LRU or anon LRU? 5 * page_is_file_cache - should the page be on a file LRU or anon LRU?
6 * @page: the page to test 6 * @page: the page to test
7 * 7 *
8 * Returns LRU_FILE if @page is page cache page backed by a regular filesystem, 8 * Returns 1 if @page is page cache page backed by a regular filesystem,
9 * or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed. 9 * or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed.
10 * Used by functions that manipulate the LRU lists, to sort a page 10 * Used by functions that manipulate the LRU lists, to sort a page
11 * onto the right LRU list. 11 * onto the right LRU list.
@@ -16,11 +16,7 @@
16 */ 16 */
17static inline int page_is_file_cache(struct page *page) 17static inline int page_is_file_cache(struct page *page)
18{ 18{
19 if (PageSwapBacked(page)) 19 return !PageSwapBacked(page);
20 return 0;
21
22 /* The page is page cache backed by a normal filesystem. */
23 return LRU_FILE;
24} 20}
25 21
26static inline void 22static inline void
@@ -39,21 +35,36 @@ del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
39 mem_cgroup_del_lru_list(page, l); 35 mem_cgroup_del_lru_list(page, l);
40} 36}
41 37
38/**
39 * page_lru_base_type - which LRU list type should a page be on?
40 * @page: the page to test
41 *
42 * Used for LRU list index arithmetic.
43 *
44 * Returns the base LRU type - file or anon - @page should be on.
45 */
46static inline enum lru_list page_lru_base_type(struct page *page)
47{
48 if (page_is_file_cache(page))
49 return LRU_INACTIVE_FILE;
50 return LRU_INACTIVE_ANON;
51}
52
42static inline void 53static inline void
43del_page_from_lru(struct zone *zone, struct page *page) 54del_page_from_lru(struct zone *zone, struct page *page)
44{ 55{
45 enum lru_list l = LRU_BASE; 56 enum lru_list l;
46 57
47 list_del(&page->lru); 58 list_del(&page->lru);
48 if (PageUnevictable(page)) { 59 if (PageUnevictable(page)) {
49 __ClearPageUnevictable(page); 60 __ClearPageUnevictable(page);
50 l = LRU_UNEVICTABLE; 61 l = LRU_UNEVICTABLE;
51 } else { 62 } else {
63 l = page_lru_base_type(page);
52 if (PageActive(page)) { 64 if (PageActive(page)) {
53 __ClearPageActive(page); 65 __ClearPageActive(page);
54 l += LRU_ACTIVE; 66 l += LRU_ACTIVE;
55 } 67 }
56 l += page_is_file_cache(page);
57 } 68 }
58 __dec_zone_state(zone, NR_LRU_BASE + l); 69 __dec_zone_state(zone, NR_LRU_BASE + l);
59 mem_cgroup_del_lru_list(page, l); 70 mem_cgroup_del_lru_list(page, l);
@@ -68,14 +79,14 @@ del_page_from_lru(struct zone *zone, struct page *page)
68 */ 79 */
69static inline enum lru_list page_lru(struct page *page) 80static inline enum lru_list page_lru(struct page *page)
70{ 81{
71 enum lru_list lru = LRU_BASE; 82 enum lru_list lru;
72 83
73 if (PageUnevictable(page)) 84 if (PageUnevictable(page))
74 lru = LRU_UNEVICTABLE; 85 lru = LRU_UNEVICTABLE;
75 else { 86 else {
87 lru = page_lru_base_type(page);
76 if (PageActive(page)) 88 if (PageActive(page))
77 lru += LRU_ACTIVE; 89 lru += LRU_ACTIVE;
78 lru += page_is_file_cache(page);
79 } 90 }
80 91
81 return lru; 92 return lru;
diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h
new file mode 100644
index 00000000000..70fffeba749
--- /dev/null
+++ b/include/linux/mmu_context.h
@@ -0,0 +1,9 @@
1#ifndef _LINUX_MMU_CONTEXT_H
2#define _LINUX_MMU_CONTEXT_H
3
4struct mm_struct;
5
6void use_mm(struct mm_struct *mm);
7void unuse_mm(struct mm_struct *mm);
8
9#endif
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index b77486d152c..4e02ee2b071 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -62,6 +62,15 @@ struct mmu_notifier_ops {
62 unsigned long address); 62 unsigned long address);
63 63
64 /* 64 /*
65 * change_pte is called in cases that pte mapping to page is changed:
66 * for example, when ksm remaps pte to point to a new shared page.
67 */
68 void (*change_pte)(struct mmu_notifier *mn,
69 struct mm_struct *mm,
70 unsigned long address,
71 pte_t pte);
72
73 /*
65 * Before this is invoked any secondary MMU is still ok to 74 * Before this is invoked any secondary MMU is still ok to
66 * read/write to the page previously pointed to by the Linux 75 * read/write to the page previously pointed to by the Linux
67 * pte because the page hasn't been freed yet and it won't be 76 * pte because the page hasn't been freed yet and it won't be
@@ -154,6 +163,8 @@ extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
154extern void __mmu_notifier_release(struct mm_struct *mm); 163extern void __mmu_notifier_release(struct mm_struct *mm);
155extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, 164extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
156 unsigned long address); 165 unsigned long address);
166extern void __mmu_notifier_change_pte(struct mm_struct *mm,
167 unsigned long address, pte_t pte);
157extern void __mmu_notifier_invalidate_page(struct mm_struct *mm, 168extern void __mmu_notifier_invalidate_page(struct mm_struct *mm,
158 unsigned long address); 169 unsigned long address);
159extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, 170extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
@@ -175,6 +186,13 @@ static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
175 return 0; 186 return 0;
176} 187}
177 188
189static inline void mmu_notifier_change_pte(struct mm_struct *mm,
190 unsigned long address, pte_t pte)
191{
192 if (mm_has_notifiers(mm))
193 __mmu_notifier_change_pte(mm, address, pte);
194}
195
178static inline void mmu_notifier_invalidate_page(struct mm_struct *mm, 196static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
179 unsigned long address) 197 unsigned long address)
180{ 198{
@@ -236,6 +254,16 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
236 __young; \ 254 __young; \
237}) 255})
238 256
257#define set_pte_at_notify(__mm, __address, __ptep, __pte) \
258({ \
259 struct mm_struct *___mm = __mm; \
260 unsigned long ___address = __address; \
261 pte_t ___pte = __pte; \
262 \
263 set_pte_at(___mm, ___address, __ptep, ___pte); \
264 mmu_notifier_change_pte(___mm, ___address, ___pte); \
265})
266
239#else /* CONFIG_MMU_NOTIFIER */ 267#else /* CONFIG_MMU_NOTIFIER */
240 268
241static inline void mmu_notifier_release(struct mm_struct *mm) 269static inline void mmu_notifier_release(struct mm_struct *mm)
@@ -248,6 +276,11 @@ static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
248 return 0; 276 return 0;
249} 277}
250 278
279static inline void mmu_notifier_change_pte(struct mm_struct *mm,
280 unsigned long address, pte_t pte)
281{
282}
283
251static inline void mmu_notifier_invalidate_page(struct mm_struct *mm, 284static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
252 unsigned long address) 285 unsigned long address)
253{ 286{
@@ -273,6 +306,7 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
273 306
274#define ptep_clear_flush_young_notify ptep_clear_flush_young 307#define ptep_clear_flush_young_notify ptep_clear_flush_young
275#define ptep_clear_flush_notify ptep_clear_flush 308#define ptep_clear_flush_notify ptep_clear_flush
309#define set_pte_at_notify set_pte_at
276 310
277#endif /* CONFIG_MMU_NOTIFIER */ 311#endif /* CONFIG_MMU_NOTIFIER */
278 312
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 88959853737..652ef01be58 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -38,6 +38,7 @@
38#define MIGRATE_UNMOVABLE 0 38#define MIGRATE_UNMOVABLE 0
39#define MIGRATE_RECLAIMABLE 1 39#define MIGRATE_RECLAIMABLE 1
40#define MIGRATE_MOVABLE 2 40#define MIGRATE_MOVABLE 2
41#define MIGRATE_PCPTYPES 3 /* the number of types on the pcp lists */
41#define MIGRATE_RESERVE 3 42#define MIGRATE_RESERVE 3
42#define MIGRATE_ISOLATE 4 /* can't allocate from here */ 43#define MIGRATE_ISOLATE 4 /* can't allocate from here */
43#define MIGRATE_TYPES 5 44#define MIGRATE_TYPES 5
@@ -94,11 +95,15 @@ enum zone_stat_item {
94 NR_SLAB_RECLAIMABLE, 95 NR_SLAB_RECLAIMABLE,
95 NR_SLAB_UNRECLAIMABLE, 96 NR_SLAB_UNRECLAIMABLE,
96 NR_PAGETABLE, /* used for pagetables */ 97 NR_PAGETABLE, /* used for pagetables */
98 NR_KERNEL_STACK,
99 /* Second 128 byte cacheline */
97 NR_UNSTABLE_NFS, /* NFS unstable pages */ 100 NR_UNSTABLE_NFS, /* NFS unstable pages */
98 NR_BOUNCE, 101 NR_BOUNCE,
99 NR_VMSCAN_WRITE, 102 NR_VMSCAN_WRITE,
100 /* Second 128 byte cacheline */
101 NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ 103 NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */
104 NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */
105 NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */
106 NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */
102#ifdef CONFIG_NUMA 107#ifdef CONFIG_NUMA
103 NUMA_HIT, /* allocated in intended node */ 108 NUMA_HIT, /* allocated in intended node */
104 NUMA_MISS, /* allocated in non intended node */ 109 NUMA_MISS, /* allocated in non intended node */
@@ -165,7 +170,9 @@ struct per_cpu_pages {
165 int count; /* number of pages in the list */ 170 int count; /* number of pages in the list */
166 int high; /* high watermark, emptying needed */ 171 int high; /* high watermark, emptying needed */
167 int batch; /* chunk size for buddy add/remove */ 172 int batch; /* chunk size for buddy add/remove */
168 struct list_head list; /* the list of pages */ 173
174 /* Lists of pages, one per migrate type stored on the pcp-lists */
175 struct list_head lists[MIGRATE_PCPTYPES];
169}; 176};
170 177
171struct per_cpu_pageset { 178struct per_cpu_pageset {
@@ -269,6 +276,11 @@ struct zone_reclaim_stat {
269 */ 276 */
270 unsigned long recent_rotated[2]; 277 unsigned long recent_rotated[2];
271 unsigned long recent_scanned[2]; 278 unsigned long recent_scanned[2];
279
280 /*
281 * accumulated for batching
282 */
283 unsigned long nr_saved_scan[NR_LRU_LISTS];
272}; 284};
273 285
274struct zone { 286struct zone {
@@ -323,7 +335,6 @@ struct zone {
323 spinlock_t lru_lock; 335 spinlock_t lru_lock;
324 struct zone_lru { 336 struct zone_lru {
325 struct list_head list; 337 struct list_head list;
326 unsigned long nr_saved_scan; /* accumulated for batching */
327 } lru[NR_LRU_LISTS]; 338 } lru[NR_LRU_LISTS];
328 339
329 struct zone_reclaim_stat reclaim_stat; 340 struct zone_reclaim_stat reclaim_stat;
diff --git a/include/linux/namei.h b/include/linux/namei.h
index d870ae2faed..ec0f607b364 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -40,7 +40,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
40 * - follow links at the end 40 * - follow links at the end
41 * - require a directory 41 * - require a directory
42 * - ending slashes ok even for nonexistent files 42 * - ending slashes ok even for nonexistent files
43 * - internal "there are more path compnents" flag 43 * - internal "there are more path components" flag
44 * - locked when lookup done with dcache_lock held 44 * - locked when lookup done with dcache_lock held
45 * - dentry cache is untrusted; force a real lookup 45 * - dentry cache is untrusted; force a real lookup
46 */ 46 */
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 33b283601f6..c4c06020810 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -234,7 +234,7 @@ enum nfs_opnum4 {
234Needs to be updated if more operations are defined in future.*/ 234Needs to be updated if more operations are defined in future.*/
235 235
236#define FIRST_NFS4_OP OP_ACCESS 236#define FIRST_NFS4_OP OP_ACCESS
237#define LAST_NFS4_OP OP_RELEASE_LOCKOWNER 237#define LAST_NFS4_OP OP_RECLAIM_COMPLETE
238 238
239enum nfsstat4 { 239enum nfsstat4 {
240 NFS4_OK = 0, 240 NFS4_OK = 0,
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 2b49d676d0c..03bbe903910 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -56,6 +56,9 @@ extern struct svc_version nfsd_version2, nfsd_version3,
56extern u32 nfsd_supported_minorversion; 56extern u32 nfsd_supported_minorversion;
57extern struct mutex nfsd_mutex; 57extern struct mutex nfsd_mutex;
58extern struct svc_serv *nfsd_serv; 58extern struct svc_serv *nfsd_serv;
59extern spinlock_t nfsd_drc_lock;
60extern unsigned int nfsd_drc_max_mem;
61extern unsigned int nfsd_drc_mem_used;
59 62
60extern struct seq_operations nfs_exports_op; 63extern struct seq_operations nfs_exports_op;
61 64
@@ -163,7 +166,7 @@ extern int nfsd_max_blksize;
163extern unsigned int max_delegations; 166extern unsigned int max_delegations;
164int nfs4_state_init(void); 167int nfs4_state_init(void);
165void nfsd4_free_slabs(void); 168void nfsd4_free_slabs(void);
166void nfs4_state_start(void); 169int nfs4_state_start(void);
167void nfs4_state_shutdown(void); 170void nfs4_state_shutdown(void);
168time_t nfs4_lease_time(void); 171time_t nfs4_lease_time(void);
169void nfs4_reset_lease(time_t leasetime); 172void nfs4_reset_lease(time_t leasetime);
@@ -171,7 +174,7 @@ int nfs4_reset_recoverydir(char *recdir);
171#else 174#else
172static inline int nfs4_state_init(void) { return 0; } 175static inline int nfs4_state_init(void) { return 0; }
173static inline void nfsd4_free_slabs(void) { } 176static inline void nfsd4_free_slabs(void) { }
174static inline void nfs4_state_start(void) { } 177static inline int nfs4_state_start(void) { return 0; }
175static inline void nfs4_state_shutdown(void) { } 178static inline void nfs4_state_shutdown(void) { }
176static inline time_t nfs4_lease_time(void) { return 0; } 179static inline time_t nfs4_lease_time(void) { return 0; }
177static inline void nfs4_reset_lease(time_t leasetime) { } 180static inline void nfs4_reset_lease(time_t leasetime) { }
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 57ab2ed0845..b38d1132418 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -60,6 +60,12 @@ typedef struct {
60#define si_stateownerid si_opaque.so_stateownerid 60#define si_stateownerid si_opaque.so_stateownerid
61#define si_fileid si_opaque.so_fileid 61#define si_fileid si_opaque.so_fileid
62 62
63struct nfsd4_cb_sequence {
64 /* args/res */
65 u32 cbs_minorversion;
66 struct nfs4_client *cbs_clp;
67};
68
63struct nfs4_delegation { 69struct nfs4_delegation {
64 struct list_head dl_perfile; 70 struct list_head dl_perfile;
65 struct list_head dl_perclnt; 71 struct list_head dl_perclnt;
@@ -81,38 +87,35 @@ struct nfs4_delegation {
81/* client delegation callback info */ 87/* client delegation callback info */
82struct nfs4_cb_conn { 88struct nfs4_cb_conn {
83 /* SETCLIENTID info */ 89 /* SETCLIENTID info */
84 u32 cb_addr; 90 struct sockaddr_storage cb_addr;
85 unsigned short cb_port; 91 size_t cb_addrlen;
86 u32 cb_prog; 92 u32 cb_prog;
87 u32 cb_minorversion; 93 u32 cb_minorversion;
88 u32 cb_ident; /* minorversion 0 only */ 94 u32 cb_ident; /* minorversion 0 only */
89 /* RPC client info */ 95 /* RPC client info */
90 atomic_t cb_set; /* successful CB_NULL call */ 96 atomic_t cb_set; /* successful CB_NULL call */
91 struct rpc_clnt * cb_client; 97 struct rpc_clnt * cb_client;
92 struct rpc_cred * cb_cred;
93}; 98};
94 99
95/* Maximum number of slots per session. 128 is useful for long haul TCP */ 100/* Maximum number of slots per session. 160 is useful for long haul TCP */
96#define NFSD_MAX_SLOTS_PER_SESSION 128 101#define NFSD_MAX_SLOTS_PER_SESSION 160
97/* Maximum number of pages per slot cache entry */
98#define NFSD_PAGES_PER_SLOT 1
99/* Maximum number of operations per session compound */ 102/* Maximum number of operations per session compound */
100#define NFSD_MAX_OPS_PER_COMPOUND 16 103#define NFSD_MAX_OPS_PER_COMPOUND 16
101 104/* Maximum session per slot cache size */
102struct nfsd4_cache_entry { 105#define NFSD_SLOT_CACHE_SIZE 1024
103 __be32 ce_status; 106/* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
104 struct kvec ce_datav; /* encoded NFSv4.1 data in rq_res.head[0] */ 107#define NFSD_CACHE_SIZE_SLOTS_PER_SESSION 32
105 struct page *ce_respages[NFSD_PAGES_PER_SLOT + 1]; 108#define NFSD_MAX_MEM_PER_SESSION \
106 int ce_cachethis; 109 (NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE)
107 short ce_resused;
108 int ce_opcnt;
109 int ce_rpchdrlen;
110};
111 110
112struct nfsd4_slot { 111struct nfsd4_slot {
113 bool sl_inuse; 112 bool sl_inuse;
114 u32 sl_seqid; 113 bool sl_cachethis;
115 struct nfsd4_cache_entry sl_cache_entry; 114 u16 sl_opcnt;
115 u32 sl_seqid;
116 __be32 sl_status;
117 u32 sl_datalen;
118 char sl_data[];
116}; 119};
117 120
118struct nfsd4_channel_attrs { 121struct nfsd4_channel_attrs {
@@ -126,6 +129,25 @@ struct nfsd4_channel_attrs {
126 u32 rdma_attrs; 129 u32 rdma_attrs;
127}; 130};
128 131
132struct nfsd4_create_session {
133 clientid_t clientid;
134 struct nfs4_sessionid sessionid;
135 u32 seqid;
136 u32 flags;
137 struct nfsd4_channel_attrs fore_channel;
138 struct nfsd4_channel_attrs back_channel;
139 u32 callback_prog;
140 u32 uid;
141 u32 gid;
142};
143
144/* The single slot clientid cache structure */
145struct nfsd4_clid_slot {
146 u32 sl_seqid;
147 __be32 sl_status;
148 struct nfsd4_create_session sl_cr_ses;
149};
150
129struct nfsd4_session { 151struct nfsd4_session {
130 struct kref se_ref; 152 struct kref se_ref;
131 struct list_head se_hash; /* hash by sessionid */ 153 struct list_head se_hash; /* hash by sessionid */
@@ -135,7 +157,7 @@ struct nfsd4_session {
135 struct nfs4_sessionid se_sessionid; 157 struct nfs4_sessionid se_sessionid;
136 struct nfsd4_channel_attrs se_fchannel; 158 struct nfsd4_channel_attrs se_fchannel;
137 struct nfsd4_channel_attrs se_bchannel; 159 struct nfsd4_channel_attrs se_bchannel;
138 struct nfsd4_slot se_slots[]; /* forward channel slots */ 160 struct nfsd4_slot *se_slots[]; /* forward channel slots */
139}; 161};
140 162
141static inline void 163static inline void
@@ -180,7 +202,7 @@ struct nfs4_client {
180 char cl_recdir[HEXDIR_LEN]; /* recovery dir */ 202 char cl_recdir[HEXDIR_LEN]; /* recovery dir */
181 nfs4_verifier cl_verifier; /* generated by client */ 203 nfs4_verifier cl_verifier; /* generated by client */
182 time_t cl_time; /* time of last lease renewal */ 204 time_t cl_time; /* time of last lease renewal */
183 __be32 cl_addr; /* client ipaddress */ 205 struct sockaddr_storage cl_addr; /* client ipaddress */
184 u32 cl_flavor; /* setclientid pseudoflavor */ 206 u32 cl_flavor; /* setclientid pseudoflavor */
185 char *cl_principal; /* setclientid principal name */ 207 char *cl_principal; /* setclientid principal name */
186 struct svc_cred cl_cred; /* setclientid principal */ 208 struct svc_cred cl_cred; /* setclientid principal */
@@ -192,9 +214,17 @@ struct nfs4_client {
192 214
193 /* for nfs41 */ 215 /* for nfs41 */
194 struct list_head cl_sessions; 216 struct list_head cl_sessions;
195 struct nfsd4_slot cl_slot; /* create_session slot */ 217 struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */
196 u32 cl_exchange_flags; 218 u32 cl_exchange_flags;
197 struct nfs4_sessionid cl_sessionid; 219 struct nfs4_sessionid cl_sessionid;
220
221 /* for nfs41 callbacks */
222 /* We currently support a single back channel with a single slot */
223 unsigned long cl_cb_slot_busy;
224 u32 cl_cb_seq_nr;
225 struct svc_xprt *cl_cb_xprt; /* 4.1 callback transport */
226 struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */
227 /* wait here for slots */
198}; 228};
199 229
200/* struct nfs4_client_reset 230/* struct nfs4_client_reset
@@ -345,6 +375,7 @@ extern int nfs4_in_grace(void);
345extern __be32 nfs4_check_open_reclaim(clientid_t *clid); 375extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
346extern void put_nfs4_client(struct nfs4_client *clp); 376extern void put_nfs4_client(struct nfs4_client *clp);
347extern void nfs4_free_stateowner(struct kref *kref); 377extern void nfs4_free_stateowner(struct kref *kref);
378extern int set_callback_cred(void);
348extern void nfsd4_probe_callback(struct nfs4_client *clp); 379extern void nfsd4_probe_callback(struct nfs4_client *clp);
349extern void nfsd4_cb_recall(struct nfs4_delegation *dp); 380extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
350extern void nfs4_put_delegation(struct nfs4_delegation *dp); 381extern void nfs4_put_delegation(struct nfs4_delegation *dp);
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
index 2bacf753506..73164c2b3d2 100644
--- a/include/linux/nfsd/xdr4.h
+++ b/include/linux/nfsd/xdr4.h
@@ -51,7 +51,7 @@ struct nfsd4_compound_state {
51 /* For sessions DRC */ 51 /* For sessions DRC */
52 struct nfsd4_session *session; 52 struct nfsd4_session *session;
53 struct nfsd4_slot *slot; 53 struct nfsd4_slot *slot;
54 __be32 *statp; 54 __be32 *datap;
55 size_t iovlen; 55 size_t iovlen;
56 u32 minorversion; 56 u32 minorversion;
57 u32 status; 57 u32 status;
@@ -366,18 +366,6 @@ struct nfsd4_exchange_id {
366 int spa_how; 366 int spa_how;
367}; 367};
368 368
369struct nfsd4_create_session {
370 clientid_t clientid;
371 struct nfs4_sessionid sessionid;
372 u32 seqid;
373 u32 flags;
374 struct nfsd4_channel_attrs fore_channel;
375 struct nfsd4_channel_attrs back_channel;
376 u32 callback_prog;
377 u32 uid;
378 u32 gid;
379};
380
381struct nfsd4_sequence { 369struct nfsd4_sequence {
382 struct nfs4_sessionid sessionid; /* request/response */ 370 struct nfs4_sessionid sessionid; /* request/response */
383 u32 seqid; /* request/response */ 371 u32 seqid; /* request/response */
@@ -479,13 +467,12 @@ struct nfsd4_compoundres {
479static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp) 467static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
480{ 468{
481 struct nfsd4_compoundargs *args = resp->rqstp->rq_argp; 469 struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
482 return args->opcnt == 1; 470 return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE;
483} 471}
484 472
485static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp) 473static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
486{ 474{
487 return !resp->cstate.slot->sl_cache_entry.ce_cachethis || 475 return !resp->cstate.slot->sl_cachethis || nfsd4_is_solo_sequence(resp);
488 nfsd4_is_solo_sequence(resp);
489} 476}
490 477
491#define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs) 478#define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs)
diff --git a/include/linux/oom.h b/include/linux/oom.h
index a7979baf1e3..6aac5fe4f6f 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -30,5 +30,16 @@ extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order);
30extern int register_oom_notifier(struct notifier_block *nb); 30extern int register_oom_notifier(struct notifier_block *nb);
31extern int unregister_oom_notifier(struct notifier_block *nb); 31extern int unregister_oom_notifier(struct notifier_block *nb);
32 32
33extern bool oom_killer_disabled;
34
35static inline void oom_killer_disable(void)
36{
37 oom_killer_disabled = true;
38}
39
40static inline void oom_killer_enable(void)
41{
42 oom_killer_disabled = false;
43}
33#endif /* __KERNEL__*/ 44#endif /* __KERNEL__*/
34#endif /* _INCLUDE_LINUX_OOM_H */ 45#endif /* _INCLUDE_LINUX_OOM_H */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 2b87acfc5f8..13de789f0a5 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -158,6 +158,9 @@ static inline int TestSetPage##uname(struct page *page) \
158static inline int TestClearPage##uname(struct page *page) \ 158static inline int TestClearPage##uname(struct page *page) \
159 { return test_and_clear_bit(PG_##lname, &page->flags); } 159 { return test_and_clear_bit(PG_##lname, &page->flags); }
160 160
161#define __TESTCLEARFLAG(uname, lname) \
162static inline int __TestClearPage##uname(struct page *page) \
163 { return __test_and_clear_bit(PG_##lname, &page->flags); }
161 164
162#define PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname) \ 165#define PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname) \
163 SETPAGEFLAG(uname, lname) CLEARPAGEFLAG(uname, lname) 166 SETPAGEFLAG(uname, lname) CLEARPAGEFLAG(uname, lname)
@@ -184,6 +187,9 @@ static inline void __ClearPage##uname(struct page *page) { }
184#define TESTCLEARFLAG_FALSE(uname) \ 187#define TESTCLEARFLAG_FALSE(uname) \
185static inline int TestClearPage##uname(struct page *page) { return 0; } 188static inline int TestClearPage##uname(struct page *page) { return 0; }
186 189
190#define __TESTCLEARFLAG_FALSE(uname) \
191static inline int __TestClearPage##uname(struct page *page) { return 0; }
192
187struct page; /* forward declaration */ 193struct page; /* forward declaration */
188 194
189TESTPAGEFLAG(Locked, locked) TESTSETFLAG(Locked, locked) 195TESTPAGEFLAG(Locked, locked) TESTSETFLAG(Locked, locked)
@@ -250,11 +256,11 @@ PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
250#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT 256#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
251#define MLOCK_PAGES 1 257#define MLOCK_PAGES 1
252PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked) 258PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked)
253 TESTSCFLAG(Mlocked, mlocked) 259 TESTSCFLAG(Mlocked, mlocked) __TESTCLEARFLAG(Mlocked, mlocked)
254#else 260#else
255#define MLOCK_PAGES 0 261#define MLOCK_PAGES 0
256PAGEFLAG_FALSE(Mlocked) 262PAGEFLAG_FALSE(Mlocked) SETPAGEFLAG_NOOP(Mlocked)
257 SETPAGEFLAG_NOOP(Mlocked) TESTCLEARFLAG_FALSE(Mlocked) 263 TESTCLEARFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked)
258#endif 264#endif
259 265
260#ifdef CONFIG_ARCH_USES_PG_UNCACHED 266#ifdef CONFIG_ARCH_USES_PG_UNCACHED
@@ -396,8 +402,8 @@ static inline void __ClearPageTail(struct page *page)
396 */ 402 */
397#define PAGE_FLAGS_CHECK_AT_PREP ((1 << NR_PAGEFLAGS) - 1) 403#define PAGE_FLAGS_CHECK_AT_PREP ((1 << NR_PAGEFLAGS) - 1)
398 404
399#endif /* !__GENERATING_BOUNDS_H */ 405#define PAGE_FLAGS_PRIVATE \
400 406 (1 << PG_private | 1 << PG_private_2)
401/** 407/**
402 * page_has_private - Determine if page has private stuff 408 * page_has_private - Determine if page has private stuff
403 * @page: The page to be checked 409 * @page: The page to be checked
@@ -405,8 +411,11 @@ static inline void __ClearPageTail(struct page *page)
405 * Determine if a page has private stuff, indicating that release routines 411 * Determine if a page has private stuff, indicating that release routines
406 * should be invoked upon it. 412 * should be invoked upon it.
407 */ 413 */
408#define page_has_private(page) \ 414static inline int page_has_private(struct page *page)
409 ((page)->flags & ((1 << PG_private) | \ 415{
410 (1 << PG_private_2))) 416 return !!(page->flags & PAGE_FLAGS_PRIVATE);
417}
418
419#endif /* !__GENERATING_BOUNDS_H */
411 420
412#endif /* PAGE_FLAGS_H */ 421#endif /* PAGE_FLAGS_H */
diff --git a/include/linux/pnp.h b/include/linux/pnp.h
index b063c7328ba..fddfafaed02 100644
--- a/include/linux/pnp.h
+++ b/include/linux/pnp.h
@@ -360,6 +360,7 @@ struct pnp_driver {
360 unsigned int flags; 360 unsigned int flags;
361 int (*probe) (struct pnp_dev *dev, const struct pnp_device_id *dev_id); 361 int (*probe) (struct pnp_dev *dev, const struct pnp_device_id *dev_id);
362 void (*remove) (struct pnp_dev *dev); 362 void (*remove) (struct pnp_dev *dev);
363 void (*shutdown) (struct pnp_dev *dev);
363 int (*suspend) (struct pnp_dev *dev, pm_message_t state); 364 int (*suspend) (struct pnp_dev *dev, pm_message_t state);
364 int (*resume) (struct pnp_dev *dev); 365 int (*resume) (struct pnp_dev *dev);
365 struct device_driver driver; 366 struct device_driver driver;
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 6729f7dcd60..7fc194aef8c 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -65,6 +65,9 @@
65#define MUTEX_DEBUG_INIT 0x11 65#define MUTEX_DEBUG_INIT 0x11
66#define MUTEX_DEBUG_FREE 0x22 66#define MUTEX_DEBUG_FREE 0x22
67 67
68/********** lib/flex_array.c **********/
69#define FLEX_ARRAY_FREE 0x6c /* for use-after-free poisoning */
70
68/********** security/ **********/ 71/********** security/ **********/
69#define KEY_DESTROY 0xbd 72#define KEY_DESTROY 0xbd
70 73
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 26361c4c037..3ebb2315364 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -135,8 +135,8 @@ static inline int sb_any_quota_active(struct super_block *sb)
135/* 135/*
136 * Operations supported for diskquotas. 136 * Operations supported for diskquotas.
137 */ 137 */
138extern struct dquot_operations dquot_operations; 138extern const struct dquot_operations dquot_operations;
139extern struct quotactl_ops vfs_quotactl_ops; 139extern const struct quotactl_ops vfs_quotactl_ops;
140 140
141#define sb_dquot_ops (&dquot_operations) 141#define sb_dquot_ops (&dquot_operations)
142#define sb_quotactl_ops (&vfs_quotactl_ops) 142#define sb_quotactl_ops (&vfs_quotactl_ops)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bf116d0dbf2..477841d29fc 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -71,14 +71,10 @@ void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned lon
71void page_add_file_rmap(struct page *); 71void page_add_file_rmap(struct page *);
72void page_remove_rmap(struct page *); 72void page_remove_rmap(struct page *);
73 73
74#ifdef CONFIG_DEBUG_VM 74static inline void page_dup_rmap(struct page *page)
75void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address);
76#else
77static inline void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address)
78{ 75{
79 atomic_inc(&page->_mapcount); 76 atomic_inc(&page->_mapcount);
80} 77}
81#endif
82 78
83/* 79/*
84 * Called from mm/vmscan.c to handle paging out 80 * Called from mm/vmscan.c to handle paging out
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8fe351c3914..97b10da0a3e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -140,6 +140,10 @@ extern int nr_processes(void);
140extern unsigned long nr_running(void); 140extern unsigned long nr_running(void);
141extern unsigned long nr_uninterruptible(void); 141extern unsigned long nr_uninterruptible(void);
142extern unsigned long nr_iowait(void); 142extern unsigned long nr_iowait(void);
143extern unsigned long nr_iowait_cpu(void);
144extern unsigned long this_cpu_load(void);
145
146
143extern void calc_global_load(void); 147extern void calc_global_load(void);
144extern u64 cpu_nr_migrations(int cpu); 148extern u64 cpu_nr_migrations(int cpu);
145 149
@@ -434,7 +438,9 @@ extern int get_dumpable(struct mm_struct *mm);
434/* dumpable bits */ 438/* dumpable bits */
435#define MMF_DUMPABLE 0 /* core dump is permitted */ 439#define MMF_DUMPABLE 0 /* core dump is permitted */
436#define MMF_DUMP_SECURELY 1 /* core file is readable only by root */ 440#define MMF_DUMP_SECURELY 1 /* core file is readable only by root */
441
437#define MMF_DUMPABLE_BITS 2 442#define MMF_DUMPABLE_BITS 2
443#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1)
438 444
439/* coredump filter bits */ 445/* coredump filter bits */
440#define MMF_DUMP_ANON_PRIVATE 2 446#define MMF_DUMP_ANON_PRIVATE 2
@@ -444,6 +450,7 @@ extern int get_dumpable(struct mm_struct *mm);
444#define MMF_DUMP_ELF_HEADERS 6 450#define MMF_DUMP_ELF_HEADERS 6
445#define MMF_DUMP_HUGETLB_PRIVATE 7 451#define MMF_DUMP_HUGETLB_PRIVATE 7
446#define MMF_DUMP_HUGETLB_SHARED 8 452#define MMF_DUMP_HUGETLB_SHARED 8
453
447#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS 454#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS
448#define MMF_DUMP_FILTER_BITS 7 455#define MMF_DUMP_FILTER_BITS 7
449#define MMF_DUMP_FILTER_MASK \ 456#define MMF_DUMP_FILTER_MASK \
@@ -457,6 +464,10 @@ extern int get_dumpable(struct mm_struct *mm);
457#else 464#else
458# define MMF_DUMP_MASK_DEFAULT_ELF 0 465# define MMF_DUMP_MASK_DEFAULT_ELF 0
459#endif 466#endif
467 /* leave room for more dump flags */
468#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */
469
470#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
460 471
461struct sighand_struct { 472struct sighand_struct {
462 atomic_t count; 473 atomic_t count;
@@ -632,6 +643,8 @@ struct signal_struct {
632 unsigned audit_tty; 643 unsigned audit_tty;
633 struct tty_audit_buf *tty_audit_buf; 644 struct tty_audit_buf *tty_audit_buf;
634#endif 645#endif
646
647 int oom_adj; /* OOM kill score adjustment (bit shift) */
635}; 648};
636 649
637/* Context switch must be unlocked if interrupts are to be enabled */ 650/* Context switch must be unlocked if interrupts are to be enabled */
@@ -1214,7 +1227,6 @@ struct task_struct {
1214 * a short time 1227 * a short time
1215 */ 1228 */
1216 unsigned char fpu_counter; 1229 unsigned char fpu_counter;
1217 s8 oomkilladj; /* OOM kill score adjustment (bit shift). */
1218#ifdef CONFIG_BLK_DEV_IO_TRACE 1230#ifdef CONFIG_BLK_DEV_IO_TRACE
1219 unsigned int btrace_seq; 1231 unsigned int btrace_seq;
1220#endif 1232#endif
@@ -1713,7 +1725,7 @@ extern cputime_t task_gtime(struct task_struct *p);
1713#define PF_FROZEN 0x00010000 /* frozen for system suspend */ 1725#define PF_FROZEN 0x00010000 /* frozen for system suspend */
1714#define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ 1726#define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */
1715#define PF_KSWAPD 0x00040000 /* I am kswapd */ 1727#define PF_KSWAPD 0x00040000 /* I am kswapd */
1716#define PF_SWAPOFF 0x00080000 /* I am in swapoff */ 1728#define PF_OOM_ORIGIN 0x00080000 /* Allocating much memory to others */
1717#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ 1729#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */
1718#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ 1730#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
1719#define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ 1731#define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */
diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 3f632182d8e..996df4dac7d 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -111,7 +111,7 @@ struct rpc_credops {
111 void (*crdestroy)(struct rpc_cred *); 111 void (*crdestroy)(struct rpc_cred *);
112 112
113 int (*crmatch)(struct auth_cred *, struct rpc_cred *, int); 113 int (*crmatch)(struct auth_cred *, struct rpc_cred *, int);
114 void (*crbind)(struct rpc_task *, struct rpc_cred *); 114 void (*crbind)(struct rpc_task *, struct rpc_cred *, int);
115 __be32 * (*crmarshal)(struct rpc_task *, __be32 *); 115 __be32 * (*crmarshal)(struct rpc_task *, __be32 *);
116 int (*crrefresh)(struct rpc_task *); 116 int (*crrefresh)(struct rpc_task *);
117 __be32 * (*crvalidate)(struct rpc_task *, __be32 *); 117 __be32 * (*crvalidate)(struct rpc_task *, __be32 *);
@@ -140,7 +140,7 @@ struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *
140void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *); 140void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *);
141struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int); 141struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int);
142void rpcauth_bindcred(struct rpc_task *, struct rpc_cred *, int); 142void rpcauth_bindcred(struct rpc_task *, struct rpc_cred *, int);
143void rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *); 143void rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *, int);
144void put_rpccred(struct rpc_cred *); 144void put_rpccred(struct rpc_cred *);
145void rpcauth_unbindcred(struct rpc_task *); 145void rpcauth_unbindcred(struct rpc_task *);
146__be32 * rpcauth_marshcred(struct rpc_task *, __be32 *); 146__be32 * rpcauth_marshcred(struct rpc_task *, __be32 *);
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index ab3f6e90caa..8ed9642a5a7 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -22,6 +22,7 @@
22#include <linux/sunrpc/timer.h> 22#include <linux/sunrpc/timer.h>
23#include <asm/signal.h> 23#include <asm/signal.h>
24#include <linux/path.h> 24#include <linux/path.h>
25#include <net/ipv6.h>
25 26
26struct rpc_inode; 27struct rpc_inode;
27 28
@@ -113,6 +114,7 @@ struct rpc_create_args {
113 rpc_authflavor_t authflavor; 114 rpc_authflavor_t authflavor;
114 unsigned long flags; 115 unsigned long flags;
115 char *client_name; 116 char *client_name;
117 struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */
116}; 118};
117 119
118/* Values for "flags" field */ 120/* Values for "flags" field */
@@ -188,5 +190,117 @@ static inline void rpc_set_port(struct sockaddr *sap,
188#define IPV6_SCOPE_DELIMITER '%' 190#define IPV6_SCOPE_DELIMITER '%'
189#define IPV6_SCOPE_ID_LEN sizeof("%nnnnnnnnnn") 191#define IPV6_SCOPE_ID_LEN sizeof("%nnnnnnnnnn")
190 192
193static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1,
194 const struct sockaddr *sap2)
195{
196 const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1;
197 const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2;
198
199 return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
200}
201
202static inline bool __rpc_copy_addr4(struct sockaddr *dst,
203 const struct sockaddr *src)
204{
205 const struct sockaddr_in *ssin = (struct sockaddr_in *) src;
206 struct sockaddr_in *dsin = (struct sockaddr_in *) dst;
207
208 dsin->sin_family = ssin->sin_family;
209 dsin->sin_addr.s_addr = ssin->sin_addr.s_addr;
210 return true;
211}
212
213#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
214static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
215 const struct sockaddr *sap2)
216{
217 const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1;
218 const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2;
219 return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr);
220}
221
222static inline bool __rpc_copy_addr6(struct sockaddr *dst,
223 const struct sockaddr *src)
224{
225 const struct sockaddr_in6 *ssin6 = (const struct sockaddr_in6 *) src;
226 struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst;
227
228 dsin6->sin6_family = ssin6->sin6_family;
229 ipv6_addr_copy(&dsin6->sin6_addr, &ssin6->sin6_addr);
230 return true;
231}
232#else /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
233static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
234 const struct sockaddr *sap2)
235{
236 return false;
237}
238
239static inline bool __rpc_copy_addr6(struct sockaddr *dst,
240 const struct sockaddr *src)
241{
242 return false;
243}
244#endif /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
245
246/**
247 * rpc_cmp_addr - compare the address portion of two sockaddrs.
248 * @sap1: first sockaddr
249 * @sap2: second sockaddr
250 *
251 * Just compares the family and address portion. Ignores port, scope, etc.
252 * Returns true if the addrs are equal, false if they aren't.
253 */
254static inline bool rpc_cmp_addr(const struct sockaddr *sap1,
255 const struct sockaddr *sap2)
256{
257 if (sap1->sa_family == sap2->sa_family) {
258 switch (sap1->sa_family) {
259 case AF_INET:
260 return __rpc_cmp_addr4(sap1, sap2);
261 case AF_INET6:
262 return __rpc_cmp_addr6(sap1, sap2);
263 }
264 }
265 return false;
266}
267
268/**
269 * rpc_copy_addr - copy the address portion of one sockaddr to another
270 * @dst: destination sockaddr
271 * @src: source sockaddr
272 *
273 * Just copies the address portion and family. Ignores port, scope, etc.
274 * Caller is responsible for making certain that dst is large enough to hold
275 * the address in src. Returns true if address family is supported. Returns
276 * false otherwise.
277 */
278static inline bool rpc_copy_addr(struct sockaddr *dst,
279 const struct sockaddr *src)
280{
281 switch (src->sa_family) {
282 case AF_INET:
283 return __rpc_copy_addr4(dst, src);
284 case AF_INET6:
285 return __rpc_copy_addr6(dst, src);
286 }
287 return false;
288}
289
290/**
291 * rpc_get_scope_id - return scopeid for a given sockaddr
292 * @sa: sockaddr to get scopeid from
293 *
294 * Returns the value of the sin6_scope_id for AF_INET6 addrs, or 0 if
295 * not an AF_INET6 address.
296 */
297static inline u32 rpc_get_scope_id(const struct sockaddr *sa)
298{
299 if (sa->sa_family != AF_INET6)
300 return 0;
301
302 return ((struct sockaddr_in6 *) sa)->sin6_scope_id;
303}
304
191#endif /* __KERNEL__ */ 305#endif /* __KERNEL__ */
192#endif /* _LINUX_SUNRPC_CLNT_H */ 306#endif /* _LINUX_SUNRPC_CLNT_H */
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index ea8009695c6..52e8cb0a756 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -94,8 +94,6 @@ struct svc_serv {
94 struct module * sv_module; /* optional module to count when 94 struct module * sv_module; /* optional module to count when
95 * adding threads */ 95 * adding threads */
96 svc_thread_fn sv_function; /* main function for threads */ 96 svc_thread_fn sv_function; /* main function for threads */
97 unsigned int sv_drc_max_pages; /* Total pages for DRC */
98 unsigned int sv_drc_pages_used;/* DRC pages used */
99#if defined(CONFIG_NFS_V4_1) 97#if defined(CONFIG_NFS_V4_1)
100 struct list_head sv_cb_list; /* queue for callback requests 98 struct list_head sv_cb_list; /* queue for callback requests
101 * that arrive over the same 99 * that arrive over the same
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 2223ae0b5ed..5f4e18b3ce7 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -65,6 +65,7 @@ struct svc_xprt {
65 size_t xpt_locallen; /* length of address */ 65 size_t xpt_locallen; /* length of address */
66 struct sockaddr_storage xpt_remote; /* remote peer's address */ 66 struct sockaddr_storage xpt_remote; /* remote peer's address */
67 size_t xpt_remotelen; /* length of address */ 67 size_t xpt_remotelen; /* length of address */
68 struct rpc_wait_queue xpt_bc_pending; /* backchannel wait queue */
68}; 69};
69 70
70int svc_reg_xprt_class(struct svc_xprt_class *); 71int svc_reg_xprt_class(struct svc_xprt_class *);
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 04dba23c59f..1b353a76c30 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -28,6 +28,7 @@ struct svc_sock {
28 /* private TCP part */ 28 /* private TCP part */
29 u32 sk_reclen; /* length of record */ 29 u32 sk_reclen; /* length of record */
30 u32 sk_tcplen; /* current read length */ 30 u32 sk_tcplen; /* current read length */
31 struct rpc_xprt *sk_bc_xprt; /* NFSv4.1 backchannel xprt */
31}; 32};
32 33
33/* 34/*
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index c090df44257..6f9457a75b8 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -124,6 +124,23 @@ struct rpc_xprt_ops {
124 void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq); 124 void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq);
125}; 125};
126 126
127/*
128 * RPC transport identifiers
129 *
130 * To preserve compatibility with the historical use of raw IP protocol
131 * id's for transport selection, UDP and TCP identifiers are specified
132 * with the previous values. No such restriction exists for new transports,
133 * except that they may not collide with these values (17 and 6,
134 * respectively).
135 */
136#define XPRT_TRANSPORT_BC (1 << 31)
137enum xprt_transports {
138 XPRT_TRANSPORT_UDP = IPPROTO_UDP,
139 XPRT_TRANSPORT_TCP = IPPROTO_TCP,
140 XPRT_TRANSPORT_BC_TCP = IPPROTO_TCP | XPRT_TRANSPORT_BC,
141 XPRT_TRANSPORT_RDMA = 256
142};
143
127struct rpc_xprt { 144struct rpc_xprt {
128 struct kref kref; /* Reference count */ 145 struct kref kref; /* Reference count */
129 struct rpc_xprt_ops * ops; /* transport methods */ 146 struct rpc_xprt_ops * ops; /* transport methods */
@@ -179,6 +196,7 @@ struct rpc_xprt {
179 spinlock_t reserve_lock; /* lock slot table */ 196 spinlock_t reserve_lock; /* lock slot table */
180 u32 xid; /* Next XID value to use */ 197 u32 xid; /* Next XID value to use */
181 struct rpc_task * snd_task; /* Task blocked in send */ 198 struct rpc_task * snd_task; /* Task blocked in send */
199 struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */
182#if defined(CONFIG_NFS_V4_1) 200#if defined(CONFIG_NFS_V4_1)
183 struct svc_serv *bc_serv; /* The RPC service which will */ 201 struct svc_serv *bc_serv; /* The RPC service which will */
184 /* process the callback */ 202 /* process the callback */
@@ -231,6 +249,7 @@ struct xprt_create {
231 struct sockaddr * srcaddr; /* optional local address */ 249 struct sockaddr * srcaddr; /* optional local address */
232 struct sockaddr * dstaddr; /* remote peer address */ 250 struct sockaddr * dstaddr; /* remote peer address */
233 size_t addrlen; 251 size_t addrlen;
252 struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */
234}; 253};
235 254
236struct xprt_class { 255struct xprt_class {
diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h
index 54a379c9e8e..c2f04e1ae15 100644
--- a/include/linux/sunrpc/xprtrdma.h
+++ b/include/linux/sunrpc/xprtrdma.h
@@ -41,11 +41,6 @@
41#define _LINUX_SUNRPC_XPRTRDMA_H 41#define _LINUX_SUNRPC_XPRTRDMA_H
42 42
43/* 43/*
44 * RPC transport identifier for RDMA
45 */
46#define XPRT_TRANSPORT_RDMA 256
47
48/*
49 * rpcbind (v3+) RDMA netid. 44 * rpcbind (v3+) RDMA netid.
50 */ 45 */
51#define RPCBIND_NETID_RDMA "rdma" 46#define RPCBIND_NETID_RDMA "rdma"
diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
index c2a46c45c8f..3f14a02e9cc 100644
--- a/include/linux/sunrpc/xprtsock.h
+++ b/include/linux/sunrpc/xprtsock.h
@@ -13,17 +13,6 @@ int init_socket_xprt(void);
13void cleanup_socket_xprt(void); 13void cleanup_socket_xprt(void);
14 14
15/* 15/*
16 * RPC transport identifiers for UDP, TCP
17 *
18 * To preserve compatibility with the historical use of raw IP protocol
19 * id's for transport selection, these are specified with the previous
20 * values. No such restriction exists for new transports, except that
21 * they may not collide with these values (17 and 6, respectively).
22 */
23#define XPRT_TRANSPORT_UDP IPPROTO_UDP
24#define XPRT_TRANSPORT_TCP IPPROTO_TCP
25
26/*
27 * RPC slot table sizes for UDP, TCP transports 16 * RPC slot table sizes for UDP, TCP transports
28 */ 17 */
29extern unsigned int xprt_udp_slot_table_entries; 18extern unsigned int xprt_udp_slot_table_entries;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7c15334f3ff..6c990e658f4 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -419,10 +419,22 @@ static inline swp_entry_t get_swap_page(void)
419} 419}
420 420
421/* linux/mm/thrash.c */ 421/* linux/mm/thrash.c */
422#define put_swap_token(mm) do { } while (0) 422static inline void put_swap_token(struct mm_struct *mm)
423#define grab_swap_token(mm) do { } while (0) 423{
424#define has_swap_token(mm) 0 424}
425#define disable_swap_token() do { } while (0) 425
426static inline void grab_swap_token(struct mm_struct *mm)
427{
428}
429
430static inline int has_swap_token(struct mm_struct *mm)
431{
432 return 0;
433}
434
435static inline void disable_swap_token(void)
436{
437}
426 438
427static inline void 439static inline void
428mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) 440mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
diff --git a/include/linux/usb.h b/include/linux/usb.h
index a8fe05f224e..19fabc487be 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1071,7 +1071,7 @@ typedef void (*usb_complete_t)(struct urb *);
1071 * @start_frame: Returns the initial frame for isochronous transfers. 1071 * @start_frame: Returns the initial frame for isochronous transfers.
1072 * @number_of_packets: Lists the number of ISO transfer buffers. 1072 * @number_of_packets: Lists the number of ISO transfer buffers.
1073 * @interval: Specifies the polling interval for interrupt or isochronous 1073 * @interval: Specifies the polling interval for interrupt or isochronous
1074 * transfers. The units are frames (milliseconds) for for full and low 1074 * transfers. The units are frames (milliseconds) for full and low
1075 * speed devices, and microframes (1/8 millisecond) for highspeed ones. 1075 * speed devices, and microframes (1/8 millisecond) for highspeed ones.
1076 * @error_count: Returns the number of ISO transfers that reported errors. 1076 * @error_count: Returns the number of ISO transfers that reported errors.
1077 * @context: For use in completion functions. This normally points to 1077 * @context: For use in completion functions. This normally points to
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 81a97cf8f0a..2d0f222388a 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -166,15 +166,8 @@ static inline unsigned long zone_page_state(struct zone *zone,
166 return x; 166 return x;
167} 167}
168 168
169extern unsigned long global_lru_pages(void); 169extern unsigned long global_reclaimable_pages(void);
170 170extern unsigned long zone_reclaimable_pages(struct zone *zone);
171static inline unsigned long zone_lru_pages(struct zone *zone)
172{
173 return (zone_page_state(zone, NR_ACTIVE_ANON)
174 + zone_page_state(zone, NR_ACTIVE_FILE)
175 + zone_page_state(zone, NR_INACTIVE_ANON)
176 + zone_page_state(zone, NR_INACTIVE_FILE));
177}
178 171
179#ifdef CONFIG_NUMA 172#ifdef CONFIG_NUMA
180/* 173/*
@@ -210,11 +203,6 @@ extern void zone_statistics(struct zone *, struct zone *);
210 203
211#endif /* CONFIG_NUMA */ 204#endif /* CONFIG_NUMA */
212 205
213#define __add_zone_page_state(__z, __i, __d) \
214 __mod_zone_page_state(__z, __i, __d)
215#define __sub_zone_page_state(__z, __i, __d) \
216 __mod_zone_page_state(__z, __i,-(__d))
217
218#define add_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, __d) 206#define add_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, __d)
219#define sub_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, -(__d)) 207#define sub_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, -(__d))
220 208
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 6273fa97b52..7ef0c7b94f3 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -94,7 +94,7 @@ struct execute_work {
94/* 94/*
95 * initialize all of a work item in one go 95 * initialize all of a work item in one go
96 * 96 *
97 * NOTE! No point in using "atomic_long_set()": useing a direct 97 * NOTE! No point in using "atomic_long_set()": using a direct
98 * assignment of the work data initializer allows the compiler 98 * assignment of the work data initializer allows the compiler
99 * to generate better code. 99 * to generate better code.
100 */ 100 */
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index 93885830430..c8f94e8db69 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -482,7 +482,7 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id,
482 * message. 482 * message.
483 * @cm_id: Connection identifier associated with the connection message. 483 * @cm_id: Connection identifier associated with the connection message.
484 * @service_timeout: The lower 5-bits specify the maximum time required for 484 * @service_timeout: The lower 5-bits specify the maximum time required for
485 * the sender to reply to to the connection message. The upper 3-bits 485 * the sender to reply to the connection message. The upper 3-bits
486 * specify additional control flags. 486 * specify additional control flags.
487 * @private_data: Optional user-defined private data sent with the 487 * @private_data: Optional user-defined private data sent with the
488 * message receipt acknowledgement. 488 * message receipt acknowledgement.
diff --git a/include/scsi/fc/fc_fc2.h b/include/scsi/fc/fc_fc2.h
index cff8a8c22f5..f87777d0d5b 100644
--- a/include/scsi/fc/fc_fc2.h
+++ b/include/scsi/fc/fc_fc2.h
@@ -92,8 +92,7 @@ struct fc_esb {
92 __u8 _esb_resvd[4]; 92 __u8 _esb_resvd[4];
93 __u8 esb_service_params[112]; /* TBD */ 93 __u8 esb_service_params[112]; /* TBD */
94 __u8 esb_seq_status[8]; /* sequence statuses, 8 bytes each */ 94 __u8 esb_seq_status[8]; /* sequence statuses, 8 bytes each */
95} __attribute__((packed));; 95} __attribute__((packed));
96
97 96
98/* 97/*
99 * Define expected size for ASSERTs. 98 * Define expected size for ASSERTs.
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 1493c541f9c..eaf46bdd18a 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -225,6 +225,169 @@ TRACE_EVENT(kmem_cache_free,
225 225
226 TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) 226 TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
227); 227);
228
229TRACE_EVENT(mm_page_free_direct,
230
231 TP_PROTO(struct page *page, unsigned int order),
232
233 TP_ARGS(page, order),
234
235 TP_STRUCT__entry(
236 __field( struct page *, page )
237 __field( unsigned int, order )
238 ),
239
240 TP_fast_assign(
241 __entry->page = page;
242 __entry->order = order;
243 ),
244
245 TP_printk("page=%p pfn=%lu order=%d",
246 __entry->page,
247 page_to_pfn(__entry->page),
248 __entry->order)
249);
250
251TRACE_EVENT(mm_pagevec_free,
252
253 TP_PROTO(struct page *page, int cold),
254
255 TP_ARGS(page, cold),
256
257 TP_STRUCT__entry(
258 __field( struct page *, page )
259 __field( int, cold )
260 ),
261
262 TP_fast_assign(
263 __entry->page = page;
264 __entry->cold = cold;
265 ),
266
267 TP_printk("page=%p pfn=%lu order=0 cold=%d",
268 __entry->page,
269 page_to_pfn(__entry->page),
270 __entry->cold)
271);
272
273TRACE_EVENT(mm_page_alloc,
274
275 TP_PROTO(struct page *page, unsigned int order,
276 gfp_t gfp_flags, int migratetype),
277
278 TP_ARGS(page, order, gfp_flags, migratetype),
279
280 TP_STRUCT__entry(
281 __field( struct page *, page )
282 __field( unsigned int, order )
283 __field( gfp_t, gfp_flags )
284 __field( int, migratetype )
285 ),
286
287 TP_fast_assign(
288 __entry->page = page;
289 __entry->order = order;
290 __entry->gfp_flags = gfp_flags;
291 __entry->migratetype = migratetype;
292 ),
293
294 TP_printk("page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s",
295 __entry->page,
296 page_to_pfn(__entry->page),
297 __entry->order,
298 __entry->migratetype,
299 show_gfp_flags(__entry->gfp_flags))
300);
301
302TRACE_EVENT(mm_page_alloc_zone_locked,
303
304 TP_PROTO(struct page *page, unsigned int order, int migratetype),
305
306 TP_ARGS(page, order, migratetype),
307
308 TP_STRUCT__entry(
309 __field( struct page *, page )
310 __field( unsigned int, order )
311 __field( int, migratetype )
312 ),
313
314 TP_fast_assign(
315 __entry->page = page;
316 __entry->order = order;
317 __entry->migratetype = migratetype;
318 ),
319
320 TP_printk("page=%p pfn=%lu order=%u migratetype=%d percpu_refill=%d",
321 __entry->page,
322 page_to_pfn(__entry->page),
323 __entry->order,
324 __entry->migratetype,
325 __entry->order == 0)
326);
327
328TRACE_EVENT(mm_page_pcpu_drain,
329
330 TP_PROTO(struct page *page, int order, int migratetype),
331
332 TP_ARGS(page, order, migratetype),
333
334 TP_STRUCT__entry(
335 __field( struct page *, page )
336 __field( int, order )
337 __field( int, migratetype )
338 ),
339
340 TP_fast_assign(
341 __entry->page = page;
342 __entry->order = order;
343 __entry->migratetype = migratetype;
344 ),
345
346 TP_printk("page=%p pfn=%lu order=%d migratetype=%d",
347 __entry->page,
348 page_to_pfn(__entry->page),
349 __entry->order,
350 __entry->migratetype)
351);
352
353TRACE_EVENT(mm_page_alloc_extfrag,
354
355 TP_PROTO(struct page *page,
356 int alloc_order, int fallback_order,
357 int alloc_migratetype, int fallback_migratetype),
358
359 TP_ARGS(page,
360 alloc_order, fallback_order,
361 alloc_migratetype, fallback_migratetype),
362
363 TP_STRUCT__entry(
364 __field( struct page *, page )
365 __field( int, alloc_order )
366 __field( int, fallback_order )
367 __field( int, alloc_migratetype )
368 __field( int, fallback_migratetype )
369 ),
370
371 TP_fast_assign(
372 __entry->page = page;
373 __entry->alloc_order = alloc_order;
374 __entry->fallback_order = fallback_order;
375 __entry->alloc_migratetype = alloc_migratetype;
376 __entry->fallback_migratetype = fallback_migratetype;
377 ),
378
379 TP_printk("page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d",
380 __entry->page,
381 page_to_pfn(__entry->page),
382 __entry->alloc_order,
383 __entry->fallback_order,
384 pageblock_order,
385 __entry->alloc_migratetype,
386 __entry->fallback_migratetype,
387 __entry->fallback_order < pageblock_order,
388 __entry->alloc_migratetype == __entry->fallback_migratetype)
389);
390
228#endif /* _TRACE_KMEM_H */ 391#endif /* _TRACE_KMEM_H */
229 392
230/* This part must be outside protection */ 393/* This part must be outside protection */
diff --git a/init/main.c b/init/main.c
index 34971becbd3..2c48c315316 100644
--- a/init/main.c
+++ b/init/main.c
@@ -668,12 +668,12 @@ asmlinkage void __init start_kernel(void)
668#endif 668#endif
669 thread_info_cache_init(); 669 thread_info_cache_init();
670 cred_init(); 670 cred_init();
671 fork_init(num_physpages); 671 fork_init(totalram_pages);
672 proc_caches_init(); 672 proc_caches_init();
673 buffer_init(); 673 buffer_init();
674 key_init(); 674 key_init();
675 security_init(); 675 security_init();
676 vfs_caches_init(num_physpages); 676 vfs_caches_init(totalram_pages);
677 radix_tree_init(); 677 radix_tree_init();
678 signals_init(); 678 signals_init();
679 /* rootfs populating might need page-writeback */ 679 /* rootfs populating might need page-writeback */
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index c5e68adc673..ee9d69707c0 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -77,7 +77,7 @@ struct mqueue_inode_info {
77 77
78static const struct inode_operations mqueue_dir_inode_operations; 78static const struct inode_operations mqueue_dir_inode_operations;
79static const struct file_operations mqueue_file_operations; 79static const struct file_operations mqueue_file_operations;
80static struct super_operations mqueue_super_ops; 80static const struct super_operations mqueue_super_ops;
81static void remove_notification(struct mqueue_inode_info *info); 81static void remove_notification(struct mqueue_inode_info *info);
82 82
83static struct kmem_cache *mqueue_inode_cachep; 83static struct kmem_cache *mqueue_inode_cachep;
@@ -1224,7 +1224,7 @@ static const struct file_operations mqueue_file_operations = {
1224 .read = mqueue_read_file, 1224 .read = mqueue_read_file,
1225}; 1225};
1226 1226
1227static struct super_operations mqueue_super_ops = { 1227static const struct super_operations mqueue_super_ops = {
1228 .alloc_inode = mqueue_alloc_inode, 1228 .alloc_inode = mqueue_alloc_inode,
1229 .destroy_inode = mqueue_destroy_inode, 1229 .destroy_inode = mqueue_destroy_inode,
1230 .statfs = simple_statfs, 1230 .statfs = simple_statfs,
diff --git a/ipc/shm.c b/ipc/shm.c
index 30162a59621..9eb1488b543 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -370,7 +370,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
370 if (shmflg & SHM_NORESERVE) 370 if (shmflg & SHM_NORESERVE)
371 acctflag = VM_NORESERVE; 371 acctflag = VM_NORESERVE;
372 file = hugetlb_file_setup(name, size, acctflag, 372 file = hugetlb_file_setup(name, size, acctflag,
373 &shp->mlock_user); 373 &shp->mlock_user, HUGETLB_SHMFS_INODE);
374 } else { 374 } else {
375 /* 375 /*
376 * Do not allow no accounting for OVERCOMMIT_NEVER, even 376 * Do not allow no accounting for OVERCOMMIT_NEVER, even
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index c7ece8f027f..213b7f92fcd 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -596,7 +596,7 @@ void cgroup_unlock(void)
596static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode); 596static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);
597static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); 597static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
598static int cgroup_populate_dir(struct cgroup *cgrp); 598static int cgroup_populate_dir(struct cgroup *cgrp);
599static struct inode_operations cgroup_dir_inode_operations; 599static const struct inode_operations cgroup_dir_inode_operations;
600static struct file_operations proc_cgroupstats_operations; 600static struct file_operations proc_cgroupstats_operations;
601 601
602static struct backing_dev_info cgroup_backing_dev_info = { 602static struct backing_dev_info cgroup_backing_dev_info = {
@@ -961,7 +961,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
961 return ret; 961 return ret;
962} 962}
963 963
964static struct super_operations cgroup_ops = { 964static const struct super_operations cgroup_ops = {
965 .statfs = simple_statfs, 965 .statfs = simple_statfs,
966 .drop_inode = generic_delete_inode, 966 .drop_inode = generic_delete_inode,
967 .show_options = cgroup_show_options, 967 .show_options = cgroup_show_options,
@@ -1711,7 +1711,7 @@ static struct file_operations cgroup_file_operations = {
1711 .release = cgroup_file_release, 1711 .release = cgroup_file_release,
1712}; 1712};
1713 1713
1714static struct inode_operations cgroup_dir_inode_operations = { 1714static const struct inode_operations cgroup_dir_inode_operations = {
1715 .lookup = simple_lookup, 1715 .lookup = simple_lookup,
1716 .mkdir = cgroup_mkdir, 1716 .mkdir = cgroup_mkdir,
1717 .rmdir = cgroup_rmdir, 1717 .rmdir = cgroup_rmdir,
diff --git a/kernel/fork.c b/kernel/fork.c
index 2cebfb23b0b..1020977b57c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -49,6 +49,7 @@
49#include <linux/ftrace.h> 49#include <linux/ftrace.h>
50#include <linux/profile.h> 50#include <linux/profile.h>
51#include <linux/rmap.h> 51#include <linux/rmap.h>
52#include <linux/ksm.h>
52#include <linux/acct.h> 53#include <linux/acct.h>
53#include <linux/tsacct_kern.h> 54#include <linux/tsacct_kern.h>
54#include <linux/cn_proc.h> 55#include <linux/cn_proc.h>
@@ -136,9 +137,17 @@ struct kmem_cache *vm_area_cachep;
136/* SLAB cache for mm_struct structures (tsk->mm) */ 137/* SLAB cache for mm_struct structures (tsk->mm) */
137static struct kmem_cache *mm_cachep; 138static struct kmem_cache *mm_cachep;
138 139
140static void account_kernel_stack(struct thread_info *ti, int account)
141{
142 struct zone *zone = page_zone(virt_to_page(ti));
143
144 mod_zone_page_state(zone, NR_KERNEL_STACK, account);
145}
146
139void free_task(struct task_struct *tsk) 147void free_task(struct task_struct *tsk)
140{ 148{
141 prop_local_destroy_single(&tsk->dirties); 149 prop_local_destroy_single(&tsk->dirties);
150 account_kernel_stack(tsk->stack, -1);
142 free_thread_info(tsk->stack); 151 free_thread_info(tsk->stack);
143 rt_mutex_debug_task_free(tsk); 152 rt_mutex_debug_task_free(tsk);
144 ftrace_graph_exit_task(tsk); 153 ftrace_graph_exit_task(tsk);
@@ -253,6 +262,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
253 tsk->btrace_seq = 0; 262 tsk->btrace_seq = 0;
254#endif 263#endif
255 tsk->splice_pipe = NULL; 264 tsk->splice_pipe = NULL;
265
266 account_kernel_stack(ti, 1);
267
256 return tsk; 268 return tsk;
257 269
258out: 270out:
@@ -288,6 +300,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
288 rb_link = &mm->mm_rb.rb_node; 300 rb_link = &mm->mm_rb.rb_node;
289 rb_parent = NULL; 301 rb_parent = NULL;
290 pprev = &mm->mmap; 302 pprev = &mm->mmap;
303 retval = ksm_fork(mm, oldmm);
304 if (retval)
305 goto out;
291 306
292 for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { 307 for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
293 struct file *file; 308 struct file *file;
@@ -424,7 +439,8 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
424 atomic_set(&mm->mm_count, 1); 439 atomic_set(&mm->mm_count, 1);
425 init_rwsem(&mm->mmap_sem); 440 init_rwsem(&mm->mmap_sem);
426 INIT_LIST_HEAD(&mm->mmlist); 441 INIT_LIST_HEAD(&mm->mmlist);
427 mm->flags = (current->mm) ? current->mm->flags : default_dump_filter; 442 mm->flags = (current->mm) ?
443 (current->mm->flags & MMF_INIT_MASK) : default_dump_filter;
428 mm->core_state = NULL; 444 mm->core_state = NULL;
429 mm->nr_ptes = 0; 445 mm->nr_ptes = 0;
430 set_mm_counter(mm, file_rss, 0); 446 set_mm_counter(mm, file_rss, 0);
@@ -485,6 +501,7 @@ void mmput(struct mm_struct *mm)
485 501
486 if (atomic_dec_and_test(&mm->mm_users)) { 502 if (atomic_dec_and_test(&mm->mm_users)) {
487 exit_aio(mm); 503 exit_aio(mm);
504 ksm_exit(mm);
488 exit_mmap(mm); 505 exit_mmap(mm);
489 set_mm_exe_file(mm, NULL); 506 set_mm_exe_file(mm, NULL);
490 if (!list_empty(&mm->mmlist)) { 507 if (!list_empty(&mm->mmlist)) {
@@ -863,6 +880,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
863 880
864 tty_audit_fork(sig); 881 tty_audit_fork(sig);
865 882
883 sig->oom_adj = current->signal->oom_adj;
884
866 return 0; 885 return 0;
867} 886}
868 887
diff --git a/kernel/module.c b/kernel/module.c
index b6ee424245d..e6bc4b28aa6 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -47,6 +47,7 @@
47#include <linux/rculist.h> 47#include <linux/rculist.h>
48#include <asm/uaccess.h> 48#include <asm/uaccess.h>
49#include <asm/cacheflush.h> 49#include <asm/cacheflush.h>
50#include <asm/mmu_context.h>
50#include <linux/license.h> 51#include <linux/license.h>
51#include <asm/sections.h> 52#include <asm/sections.h>
52#include <linux/tracepoint.h> 53#include <linux/tracepoint.h>
@@ -1535,6 +1536,10 @@ static void free_module(struct module *mod)
1535 1536
1536 /* Finally, free the core (containing the module structure) */ 1537 /* Finally, free the core (containing the module structure) */
1537 module_free(mod, mod->module_core); 1538 module_free(mod, mod->module_core);
1539
1540#ifdef CONFIG_MPU
1541 update_protections(current->mm);
1542#endif
1538} 1543}
1539 1544
1540void *__symbol_get(const char *symbol) 1545void *__symbol_get(const char *symbol)
diff --git a/kernel/panic.c b/kernel/panic.c
index 512ab73b0ca..bcdef26e333 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -177,7 +177,7 @@ static const struct tnt tnts[] = {
177 * 'W' - Taint on warning. 177 * 'W' - Taint on warning.
178 * 'C' - modules from drivers/staging are loaded. 178 * 'C' - modules from drivers/staging are loaded.
179 * 179 *
180 * The string is overwritten by the next call to print_taint(). 180 * The string is overwritten by the next call to print_tainted().
181 */ 181 */
182const char *print_tainted(void) 182const char *print_tainted(void)
183{ 183{
diff --git a/kernel/pid.c b/kernel/pid.c
index 31310b5d3f5..d3f722d20f9 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -40,7 +40,7 @@
40#define pid_hashfn(nr, ns) \ 40#define pid_hashfn(nr, ns) \
41 hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift) 41 hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
42static struct hlist_head *pid_hash; 42static struct hlist_head *pid_hash;
43static int pidhash_shift; 43static unsigned int pidhash_shift = 4;
44struct pid init_struct_pid = INIT_STRUCT_PID; 44struct pid init_struct_pid = INIT_STRUCT_PID;
45 45
46int pid_max = PID_MAX_DEFAULT; 46int pid_max = PID_MAX_DEFAULT;
@@ -499,19 +499,12 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
499void __init pidhash_init(void) 499void __init pidhash_init(void)
500{ 500{
501 int i, pidhash_size; 501 int i, pidhash_size;
502 unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT);
503 502
504 pidhash_shift = max(4, fls(megabytes * 4)); 503 pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18,
505 pidhash_shift = min(12, pidhash_shift); 504 HASH_EARLY | HASH_SMALL,
505 &pidhash_shift, NULL, 4096);
506 pidhash_size = 1 << pidhash_shift; 506 pidhash_size = 1 << pidhash_shift;
507 507
508 printk("PID hash table entries: %d (order: %d, %Zd bytes)\n",
509 pidhash_size, pidhash_shift,
510 pidhash_size * sizeof(struct hlist_head));
511
512 pid_hash = alloc_bootmem(pidhash_size * sizeof(*(pid_hash)));
513 if (!pid_hash)
514 panic("Could not alloc pidhash!\n");
515 for (i = 0; i < pidhash_size; i++) 508 for (i = 0; i < pidhash_size; i++)
516 INIT_HLIST_HEAD(&pid_hash[i]); 509 INIT_HLIST_HEAD(&pid_hash[i]);
517} 510}
diff --git a/kernel/power/process.c b/kernel/power/process.c
index da2072d7381..cc2e55373b6 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -9,6 +9,7 @@
9#undef DEBUG 9#undef DEBUG
10 10
11#include <linux/interrupt.h> 11#include <linux/interrupt.h>
12#include <linux/oom.h>
12#include <linux/suspend.h> 13#include <linux/suspend.h>
13#include <linux/module.h> 14#include <linux/module.h>
14#include <linux/syscalls.h> 15#include <linux/syscalls.h>
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 97955b0e44f..36cb168e433 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -619,7 +619,7 @@ __register_nosave_region(unsigned long start_pfn, unsigned long end_pfn,
619 BUG_ON(!region); 619 BUG_ON(!region);
620 } else 620 } else
621 /* This allocation cannot fail */ 621 /* This allocation cannot fail */
622 region = alloc_bootmem_low(sizeof(struct nosave_region)); 622 region = alloc_bootmem(sizeof(struct nosave_region));
623 region->start_pfn = start_pfn; 623 region->start_pfn = start_pfn;
624 region->end_pfn = end_pfn; 624 region->end_pfn = end_pfn;
625 list_add_tail(&region->list, &nosave_regions); 625 list_add_tail(&region->list, &nosave_regions);
diff --git a/kernel/sched.c b/kernel/sched.c
index 91843ba7f23..0ac9053c21d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2904,6 +2904,19 @@ unsigned long nr_iowait(void)
2904 return sum; 2904 return sum;
2905} 2905}
2906 2906
2907unsigned long nr_iowait_cpu(void)
2908{
2909 struct rq *this = this_rq();
2910 return atomic_read(&this->nr_iowait);
2911}
2912
2913unsigned long this_cpu_load(void)
2914{
2915 struct rq *this = this_rq();
2916 return this->cpu_load[0];
2917}
2918
2919
2907/* Variables and functions for calc_load */ 2920/* Variables and functions for calc_load */
2908static atomic_long_t calc_load_tasks; 2921static atomic_long_t calc_load_tasks;
2909static unsigned long calc_load_update; 2922static unsigned long calc_load_update;
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e7163460440..b416512ad17 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -83,7 +83,7 @@ config RING_BUFFER_ALLOW_SWAP
83# This allows those options to appear when no other tracer is selected. But the 83# This allows those options to appear when no other tracer is selected. But the
84# options do not appear when something else selects it. We need the two options 84# options do not appear when something else selects it. We need the two options
85# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the 85# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the
86# hidding of the automatic options options. 86# hidding of the automatic options.
87 87
88config TRACING 88config TRACING
89 bool 89 bool
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index ca7d7c4d0c2..23b63859130 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -155,7 +155,7 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
155 seq_print_ip_sym(seq, it->from, symflags) && 155 seq_print_ip_sym(seq, it->from, symflags) &&
156 trace_seq_printf(seq, "\n")) 156 trace_seq_printf(seq, "\n"))
157 return TRACE_TYPE_HANDLED; 157 return TRACE_TYPE_HANDLED;
158 return TRACE_TYPE_PARTIAL_LINE;; 158 return TRACE_TYPE_PARTIAL_LINE;
159 } 159 }
160 return TRACE_TYPE_UNHANDLED; 160 return TRACE_TYPE_UNHANDLED;
161} 161}
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 9489a0a9b1b..cc89be5bc0f 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -48,7 +48,7 @@ static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
48 48
49/* 49/*
50 * Note about RCU : 50 * Note about RCU :
51 * It is used to to delay the free of multiple probes array until a quiescent 51 * It is used to delay the free of multiple probes array until a quiescent
52 * state is reached. 52 * state is reached.
53 * Tracepoint entries modifications are protected by the tracepoints_mutex. 53 * Tracepoint entries modifications are protected by the tracepoints_mutex.
54 */ 54 */
diff --git a/lib/Kconfig.kmemcheck b/lib/Kconfig.kmemcheck
index 603c81b6654..846e039a86b 100644
--- a/lib/Kconfig.kmemcheck
+++ b/lib/Kconfig.kmemcheck
@@ -1,6 +1,8 @@
1config HAVE_ARCH_KMEMCHECK 1config HAVE_ARCH_KMEMCHECK
2 bool 2 bool
3 3
4if HAVE_ARCH_KMEMCHECK
5
4menuconfig KMEMCHECK 6menuconfig KMEMCHECK
5 bool "kmemcheck: trap use of uninitialized memory" 7 bool "kmemcheck: trap use of uninitialized memory"
6 depends on DEBUG_KERNEL 8 depends on DEBUG_KERNEL
@@ -89,3 +91,4 @@ config KMEMCHECK_BITOPS_OK
89 accesses where not all the bits are initialized at the same time. 91 accesses where not all the bits are initialized at the same time.
90 This may also hide some real bugs. 92 This may also hide some real bugs.
91 93
94endif
diff --git a/lib/flex_array.c b/lib/flex_array.c
index 7baed2fc3bc..66eef2e4483 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -28,23 +28,6 @@ struct flex_array_part {
28 char elements[FLEX_ARRAY_PART_SIZE]; 28 char elements[FLEX_ARRAY_PART_SIZE];
29}; 29};
30 30
31static inline int __elements_per_part(int element_size)
32{
33 return FLEX_ARRAY_PART_SIZE / element_size;
34}
35
36static inline int bytes_left_in_base(void)
37{
38 int element_offset = offsetof(struct flex_array, parts);
39 int bytes_left = FLEX_ARRAY_BASE_SIZE - element_offset;
40 return bytes_left;
41}
42
43static inline int nr_base_part_ptrs(void)
44{
45 return bytes_left_in_base() / sizeof(struct flex_array_part *);
46}
47
48/* 31/*
49 * If a user requests an allocation which is small 32 * If a user requests an allocation which is small
50 * enough, we may simply use the space in the 33 * enough, we may simply use the space in the
@@ -54,7 +37,7 @@ static inline int nr_base_part_ptrs(void)
54static inline int elements_fit_in_base(struct flex_array *fa) 37static inline int elements_fit_in_base(struct flex_array *fa)
55{ 38{
56 int data_size = fa->element_size * fa->total_nr_elements; 39 int data_size = fa->element_size * fa->total_nr_elements;
57 if (data_size <= bytes_left_in_base()) 40 if (data_size <= FLEX_ARRAY_BASE_BYTES_LEFT)
58 return 1; 41 return 1;
59 return 0; 42 return 0;
60} 43}
@@ -63,6 +46,7 @@ static inline int elements_fit_in_base(struct flex_array *fa)
63 * flex_array_alloc - allocate a new flexible array 46 * flex_array_alloc - allocate a new flexible array
64 * @element_size: the size of individual elements in the array 47 * @element_size: the size of individual elements in the array
65 * @total: total number of elements that this should hold 48 * @total: total number of elements that this should hold
49 * @flags: page allocation flags to use for base array
66 * 50 *
67 * Note: all locking must be provided by the caller. 51 * Note: all locking must be provided by the caller.
68 * 52 *
@@ -103,7 +87,8 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total,
103 gfp_t flags) 87 gfp_t flags)
104{ 88{
105 struct flex_array *ret; 89 struct flex_array *ret;
106 int max_size = nr_base_part_ptrs() * __elements_per_part(element_size); 90 int max_size = FLEX_ARRAY_NR_BASE_PTRS *
91 FLEX_ARRAY_ELEMENTS_PER_PART(element_size);
107 92
108 /* max_size will end up 0 if element_size > PAGE_SIZE */ 93 /* max_size will end up 0 if element_size > PAGE_SIZE */
109 if (total > max_size) 94 if (total > max_size)
@@ -113,17 +98,21 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total,
113 return NULL; 98 return NULL;
114 ret->element_size = element_size; 99 ret->element_size = element_size;
115 ret->total_nr_elements = total; 100 ret->total_nr_elements = total;
101 if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO))
102 memset(ret->parts[0], FLEX_ARRAY_FREE,
103 FLEX_ARRAY_BASE_BYTES_LEFT);
116 return ret; 104 return ret;
117} 105}
118 106
119static int fa_element_to_part_nr(struct flex_array *fa, 107static int fa_element_to_part_nr(struct flex_array *fa,
120 unsigned int element_nr) 108 unsigned int element_nr)
121{ 109{
122 return element_nr / __elements_per_part(fa->element_size); 110 return element_nr / FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size);
123} 111}
124 112
125/** 113/**
126 * flex_array_free_parts - just free the second-level pages 114 * flex_array_free_parts - just free the second-level pages
115 * @fa: the flex array from which to free parts
127 * 116 *
128 * This is to be used in cases where the base 'struct flex_array' 117 * This is to be used in cases where the base 'struct flex_array'
129 * has been statically allocated and should not be free. 118 * has been statically allocated and should not be free.
@@ -131,11 +120,10 @@ static int fa_element_to_part_nr(struct flex_array *fa,
131void flex_array_free_parts(struct flex_array *fa) 120void flex_array_free_parts(struct flex_array *fa)
132{ 121{
133 int part_nr; 122 int part_nr;
134 int max_part = nr_base_part_ptrs();
135 123
136 if (elements_fit_in_base(fa)) 124 if (elements_fit_in_base(fa))
137 return; 125 return;
138 for (part_nr = 0; part_nr < max_part; part_nr++) 126 for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++)
139 kfree(fa->parts[part_nr]); 127 kfree(fa->parts[part_nr]);
140} 128}
141 129
@@ -150,7 +138,8 @@ static unsigned int index_inside_part(struct flex_array *fa,
150{ 138{
151 unsigned int part_offset; 139 unsigned int part_offset;
152 140
153 part_offset = element_nr % __elements_per_part(fa->element_size); 141 part_offset = element_nr %
142 FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size);
154 return part_offset * fa->element_size; 143 return part_offset * fa->element_size;
155} 144}
156 145
@@ -159,15 +148,12 @@ __fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)
159{ 148{
160 struct flex_array_part *part = fa->parts[part_nr]; 149 struct flex_array_part *part = fa->parts[part_nr];
161 if (!part) { 150 if (!part) {
162 /* 151 part = kmalloc(sizeof(struct flex_array_part), flags);
163 * This leaves the part pages uninitialized
164 * and with potentially random data, just
165 * as if the user had kmalloc()'d the whole.
166 * __GFP_ZERO can be used to zero it.
167 */
168 part = kmalloc(FLEX_ARRAY_PART_SIZE, flags);
169 if (!part) 152 if (!part)
170 return NULL; 153 return NULL;
154 if (!(flags & __GFP_ZERO))
155 memset(part, FLEX_ARRAY_FREE,
156 sizeof(struct flex_array_part));
171 fa->parts[part_nr] = part; 157 fa->parts[part_nr] = part;
172 } 158 }
173 return part; 159 return part;
@@ -175,9 +161,12 @@ __fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)
175 161
176/** 162/**
177 * flex_array_put - copy data into the array at @element_nr 163 * flex_array_put - copy data into the array at @element_nr
178 * @src: address of data to copy into the array 164 * @fa: the flex array to copy data into
179 * @element_nr: index of the position in which to insert 165 * @element_nr: index of the position in which to insert
180 * the new element. 166 * the new element.
167 * @src: address of data to copy into the array
168 * @flags: page allocation flags to use for array expansion
169 *
181 * 170 *
182 * Note that this *copies* the contents of @src into 171 * Note that this *copies* the contents of @src into
183 * the array. If you are trying to store an array of 172 * the array. If you are trying to store an array of
@@ -207,9 +196,38 @@ int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
207} 196}
208 197
209/** 198/**
199 * flex_array_clear - clear element in array at @element_nr
200 * @fa: the flex array of the element.
201 * @element_nr: index of the position to clear.
202 *
203 * Locking must be provided by the caller.
204 */
205int flex_array_clear(struct flex_array *fa, unsigned int element_nr)
206{
207 int part_nr = fa_element_to_part_nr(fa, element_nr);
208 struct flex_array_part *part;
209 void *dst;
210
211 if (element_nr >= fa->total_nr_elements)
212 return -ENOSPC;
213 if (elements_fit_in_base(fa))
214 part = (struct flex_array_part *)&fa->parts[0];
215 else {
216 part = fa->parts[part_nr];
217 if (!part)
218 return -EINVAL;
219 }
220 dst = &part->elements[index_inside_part(fa, element_nr)];
221 memset(dst, FLEX_ARRAY_FREE, fa->element_size);
222 return 0;
223}
224
225/**
210 * flex_array_prealloc - guarantee that array space exists 226 * flex_array_prealloc - guarantee that array space exists
227 * @fa: the flex array for which to preallocate parts
211 * @start: index of first array element for which space is allocated 228 * @start: index of first array element for which space is allocated
212 * @end: index of last (inclusive) element for which space is allocated 229 * @end: index of last (inclusive) element for which space is allocated
230 * @flags: page allocation flags
213 * 231 *
214 * This will guarantee that no future calls to flex_array_put() 232 * This will guarantee that no future calls to flex_array_put()
215 * will allocate memory. It can be used if you are expecting to 233 * will allocate memory. It can be used if you are expecting to
@@ -242,6 +260,7 @@ int flex_array_prealloc(struct flex_array *fa, unsigned int start,
242 260
243/** 261/**
244 * flex_array_get - pull data back out of the array 262 * flex_array_get - pull data back out of the array
263 * @fa: the flex array from which to extract data
245 * @element_nr: index of the element to fetch from the array 264 * @element_nr: index of the element to fetch from the array
246 * 265 *
247 * Returns a pointer to the data at index @element_nr. Note 266 * Returns a pointer to the data at index @element_nr. Note
@@ -266,3 +285,43 @@ void *flex_array_get(struct flex_array *fa, unsigned int element_nr)
266 } 285 }
267 return &part->elements[index_inside_part(fa, element_nr)]; 286 return &part->elements[index_inside_part(fa, element_nr)];
268} 287}
288
289static int part_is_free(struct flex_array_part *part)
290{
291 int i;
292
293 for (i = 0; i < sizeof(struct flex_array_part); i++)
294 if (part->elements[i] != FLEX_ARRAY_FREE)
295 return 0;
296 return 1;
297}
298
299/**
300 * flex_array_shrink - free unused second-level pages
301 * @fa: the flex array to shrink
302 *
303 * Frees all second-level pages that consist solely of unused
304 * elements. Returns the number of pages freed.
305 *
306 * Locking must be provided by the caller.
307 */
308int flex_array_shrink(struct flex_array *fa)
309{
310 struct flex_array_part *part;
311 int part_nr;
312 int ret = 0;
313
314 if (elements_fit_in_base(fa))
315 return ret;
316 for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) {
317 part = fa->parts[part_nr];
318 if (!part)
319 continue;
320 if (part_is_free(part)) {
321 fa->parts[part_nr] = NULL;
322 kfree(part);
323 ret++;
324 }
325 }
326 return ret;
327}
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index d320c1816a7..73a14b8c6d1 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1092,13 +1092,8 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
1092 1092
1093 /* Reject out-of-range values early. Large positive sizes are 1093 /* Reject out-of-range values early. Large positive sizes are
1094 used for unknown buffer sizes. */ 1094 used for unknown buffer sizes. */
1095 if (unlikely((int) size < 0)) { 1095 if (WARN_ON_ONCE((int) size < 0))
1096 /* There can be only one.. */
1097 static char warn = 1;
1098 WARN_ON(warn);
1099 warn = 0;
1100 return 0; 1096 return 0;
1101 }
1102 1097
1103 str = buf; 1098 str = buf;
1104 end = buf + size; 1099 end = buf + size;
@@ -1544,13 +1539,8 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
1544 1539
1545 struct printf_spec spec = {0}; 1540 struct printf_spec spec = {0};
1546 1541
1547 if (unlikely((int) size < 0)) { 1542 if (WARN_ON_ONCE((int) size < 0))
1548 /* There can be only one.. */
1549 static char warn = 1;
1550 WARN_ON(warn);
1551 warn = 0;
1552 return 0; 1543 return 0;
1553 }
1554 1544
1555 str = buf; 1545 str = buf;
1556 end = buf + size; 1546 end = buf + size;
diff --git a/lib/zlib_deflate/deflate.c b/lib/zlib_deflate/deflate.c
index c3e4a2baf83..46a31e5f49c 100644
--- a/lib/zlib_deflate/deflate.c
+++ b/lib/zlib_deflate/deflate.c
@@ -135,7 +135,7 @@ static const config configuration_table[10] = {
135 135
136/* =========================================================================== 136/* ===========================================================================
137 * Update a hash value with the given input byte 137 * Update a hash value with the given input byte
138 * IN assertion: all calls to to UPDATE_HASH are made with consecutive 138 * IN assertion: all calls to UPDATE_HASH are made with consecutive
139 * input characters, so that a running hash key can be computed from the 139 * input characters, so that a running hash key can be computed from the
140 * previous key instead of complete recalculation each time. 140 * previous key instead of complete recalculation each time.
141 */ 141 */
@@ -146,7 +146,7 @@ static const config configuration_table[10] = {
146 * Insert string str in the dictionary and set match_head to the previous head 146 * Insert string str in the dictionary and set match_head to the previous head
147 * of the hash chain (the most recent string with same hash key). Return 147 * of the hash chain (the most recent string with same hash key). Return
148 * the previous length of the hash chain. 148 * the previous length of the hash chain.
149 * IN assertion: all calls to to INSERT_STRING are made with consecutive 149 * IN assertion: all calls to INSERT_STRING are made with consecutive
150 * input characters and the first MIN_MATCH bytes of str are valid 150 * input characters and the first MIN_MATCH bytes of str are valid
151 * (except for the last MIN_MATCH-1 bytes of the input file). 151 * (except for the last MIN_MATCH-1 bytes of the input file).
152 */ 152 */
diff --git a/mm/Kconfig b/mm/Kconfig
index 3aa519f52e1..71eb0b4cce8 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -214,6 +214,18 @@ config HAVE_MLOCKED_PAGE_BIT
214config MMU_NOTIFIER 214config MMU_NOTIFIER
215 bool 215 bool
216 216
217config KSM
218 bool "Enable KSM for page merging"
219 depends on MMU
220 help
221 Enable Kernel Samepage Merging: KSM periodically scans those areas
222 of an application's address space that an app has advised may be
223 mergeable. When it finds pages of identical content, it replaces
224 the many instances by a single resident page with that content, so
225 saving memory until one or another app needs to modify the content.
226 Recommended for use with KVM, or with other duplicative applications.
227 See Documentation/vm/ksm.txt for more information.
228
217config DEFAULT_MMAP_MIN_ADDR 229config DEFAULT_MMAP_MIN_ADDR
218 int "Low address space to protect from user allocation" 230 int "Low address space to protect from user allocation"
219 default 4096 231 default 4096
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index aa99fd1f710..af7cfb43d2f 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -6,7 +6,7 @@ config DEBUG_PAGEALLOC
6 ---help--- 6 ---help---
7 Unmap pages from the kernel linear mapping after free_pages(). 7 Unmap pages from the kernel linear mapping after free_pages().
8 This results in a large slowdown, but helps to find certain types 8 This results in a large slowdown, but helps to find certain types
9 of memory corruptions. 9 of memory corruption.
10 10
11config WANT_PAGE_DEBUG_FLAGS 11config WANT_PAGE_DEBUG_FLAGS
12 bool 12 bool
@@ -17,11 +17,11 @@ config PAGE_POISONING
17 depends on !HIBERNATION 17 depends on !HIBERNATION
18 select DEBUG_PAGEALLOC 18 select DEBUG_PAGEALLOC
19 select WANT_PAGE_DEBUG_FLAGS 19 select WANT_PAGE_DEBUG_FLAGS
20 help 20 ---help---
21 Fill the pages with poison patterns after free_pages() and verify 21 Fill the pages with poison patterns after free_pages() and verify
22 the patterns before alloc_pages(). This results in a large slowdown, 22 the patterns before alloc_pages(). This results in a large slowdown,
23 but helps to find certain types of memory corruptions. 23 but helps to find certain types of memory corruption.
24 24
25 This option cannot enalbe with hibernation. Otherwise, it will get 25 This option cannot be enabled in combination with hibernation as
26 wrong messages for memory corruption because the free pages are not 26 that would result in incorrect warnings of memory corruption after
27 saved to the suspend image. 27 a resume because free pages are not saved to the suspend image.
diff --git a/mm/Makefile b/mm/Makefile
index ea4b18bd396..728a9fde49d 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -11,7 +11,7 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
11 maccess.o page_alloc.o page-writeback.o \ 11 maccess.o page_alloc.o page-writeback.o \
12 readahead.o swap.o truncate.o vmscan.o shmem.o \ 12 readahead.o swap.o truncate.o vmscan.o shmem.o \
13 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ 13 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
14 page_isolation.o mm_init.o $(mmu-y) 14 page_isolation.o mm_init.o mmu_context.o $(mmu-y)
15obj-y += init-mm.o 15obj-y += init-mm.o
16 16
17obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o 17obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o
@@ -25,6 +25,7 @@ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
25obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o 25obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
26obj-$(CONFIG_SLOB) += slob.o 26obj-$(CONFIG_SLOB) += slob.o
27obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o 27obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
28obj-$(CONFIG_KSM) += ksm.o
28obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o 29obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o
29obj-$(CONFIG_SLAB) += slab.o 30obj-$(CONFIG_SLAB) += slab.o
30obj-$(CONFIG_SLUB) += slub.o 31obj-$(CONFIG_SLUB) += slub.o
diff --git a/mm/filemap.c b/mm/filemap.c
index dd51c68e2b8..bcc7372aebb 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -119,6 +119,8 @@ void __remove_from_page_cache(struct page *page)
119 page->mapping = NULL; 119 page->mapping = NULL;
120 mapping->nrpages--; 120 mapping->nrpages--;
121 __dec_zone_page_state(page, NR_FILE_PAGES); 121 __dec_zone_page_state(page, NR_FILE_PAGES);
122 if (PageSwapBacked(page))
123 __dec_zone_page_state(page, NR_SHMEM);
122 BUG_ON(page_mapped(page)); 124 BUG_ON(page_mapped(page));
123 125
124 /* 126 /*
@@ -431,6 +433,8 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
431 if (likely(!error)) { 433 if (likely(!error)) {
432 mapping->nrpages++; 434 mapping->nrpages++;
433 __inc_zone_page_state(page, NR_FILE_PAGES); 435 __inc_zone_page_state(page, NR_FILE_PAGES);
436 if (PageSwapBacked(page))
437 __inc_zone_page_state(page, NR_SHMEM);
434 spin_unlock_irq(&mapping->tree_lock); 438 spin_unlock_irq(&mapping->tree_lock);
435 } else { 439 } else {
436 page->mapping = NULL; 440 page->mapping = NULL;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b16d6363477..815dbd4a6dc 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -456,24 +456,6 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
456 h->free_huge_pages_node[nid]++; 456 h->free_huge_pages_node[nid]++;
457} 457}
458 458
459static struct page *dequeue_huge_page(struct hstate *h)
460{
461 int nid;
462 struct page *page = NULL;
463
464 for (nid = 0; nid < MAX_NUMNODES; ++nid) {
465 if (!list_empty(&h->hugepage_freelists[nid])) {
466 page = list_entry(h->hugepage_freelists[nid].next,
467 struct page, lru);
468 list_del(&page->lru);
469 h->free_huge_pages--;
470 h->free_huge_pages_node[nid]--;
471 break;
472 }
473 }
474 return page;
475}
476
477static struct page *dequeue_huge_page_vma(struct hstate *h, 459static struct page *dequeue_huge_page_vma(struct hstate *h,
478 struct vm_area_struct *vma, 460 struct vm_area_struct *vma,
479 unsigned long address, int avoid_reserve) 461 unsigned long address, int avoid_reserve)
@@ -641,7 +623,7 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
641 623
642/* 624/*
643 * Use a helper variable to find the next node and then 625 * Use a helper variable to find the next node and then
644 * copy it back to hugetlb_next_nid afterwards: 626 * copy it back to next_nid_to_alloc afterwards:
645 * otherwise there's a window in which a racer might 627 * otherwise there's a window in which a racer might
646 * pass invalid nid MAX_NUMNODES to alloc_pages_exact_node. 628 * pass invalid nid MAX_NUMNODES to alloc_pages_exact_node.
647 * But we don't need to use a spin_lock here: it really 629 * But we don't need to use a spin_lock here: it really
@@ -650,13 +632,13 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
650 * if we just successfully allocated a hugepage so that 632 * if we just successfully allocated a hugepage so that
651 * the next caller gets hugepages on the next node. 633 * the next caller gets hugepages on the next node.
652 */ 634 */
653static int hstate_next_node(struct hstate *h) 635static int hstate_next_node_to_alloc(struct hstate *h)
654{ 636{
655 int next_nid; 637 int next_nid;
656 next_nid = next_node(h->hugetlb_next_nid, node_online_map); 638 next_nid = next_node(h->next_nid_to_alloc, node_online_map);
657 if (next_nid == MAX_NUMNODES) 639 if (next_nid == MAX_NUMNODES)
658 next_nid = first_node(node_online_map); 640 next_nid = first_node(node_online_map);
659 h->hugetlb_next_nid = next_nid; 641 h->next_nid_to_alloc = next_nid;
660 return next_nid; 642 return next_nid;
661} 643}
662 644
@@ -667,14 +649,15 @@ static int alloc_fresh_huge_page(struct hstate *h)
667 int next_nid; 649 int next_nid;
668 int ret = 0; 650 int ret = 0;
669 651
670 start_nid = h->hugetlb_next_nid; 652 start_nid = h->next_nid_to_alloc;
653 next_nid = start_nid;
671 654
672 do { 655 do {
673 page = alloc_fresh_huge_page_node(h, h->hugetlb_next_nid); 656 page = alloc_fresh_huge_page_node(h, next_nid);
674 if (page) 657 if (page)
675 ret = 1; 658 ret = 1;
676 next_nid = hstate_next_node(h); 659 next_nid = hstate_next_node_to_alloc(h);
677 } while (!page && h->hugetlb_next_nid != start_nid); 660 } while (!page && next_nid != start_nid);
678 661
679 if (ret) 662 if (ret)
680 count_vm_event(HTLB_BUDDY_PGALLOC); 663 count_vm_event(HTLB_BUDDY_PGALLOC);
@@ -684,6 +667,61 @@ static int alloc_fresh_huge_page(struct hstate *h)
684 return ret; 667 return ret;
685} 668}
686 669
670/*
671 * helper for free_pool_huge_page() - find next node
672 * from which to free a huge page
673 */
674static int hstate_next_node_to_free(struct hstate *h)
675{
676 int next_nid;
677 next_nid = next_node(h->next_nid_to_free, node_online_map);
678 if (next_nid == MAX_NUMNODES)
679 next_nid = first_node(node_online_map);
680 h->next_nid_to_free = next_nid;
681 return next_nid;
682}
683
684/*
685 * Free huge page from pool from next node to free.
686 * Attempt to keep persistent huge pages more or less
687 * balanced over allowed nodes.
688 * Called with hugetlb_lock locked.
689 */
690static int free_pool_huge_page(struct hstate *h, bool acct_surplus)
691{
692 int start_nid;
693 int next_nid;
694 int ret = 0;
695
696 start_nid = h->next_nid_to_free;
697 next_nid = start_nid;
698
699 do {
700 /*
701 * If we're returning unused surplus pages, only examine
702 * nodes with surplus pages.
703 */
704 if ((!acct_surplus || h->surplus_huge_pages_node[next_nid]) &&
705 !list_empty(&h->hugepage_freelists[next_nid])) {
706 struct page *page =
707 list_entry(h->hugepage_freelists[next_nid].next,
708 struct page, lru);
709 list_del(&page->lru);
710 h->free_huge_pages--;
711 h->free_huge_pages_node[next_nid]--;
712 if (acct_surplus) {
713 h->surplus_huge_pages--;
714 h->surplus_huge_pages_node[next_nid]--;
715 }
716 update_and_free_page(h, page);
717 ret = 1;
718 }
719 next_nid = hstate_next_node_to_free(h);
720 } while (!ret && next_nid != start_nid);
721
722 return ret;
723}
724
687static struct page *alloc_buddy_huge_page(struct hstate *h, 725static struct page *alloc_buddy_huge_page(struct hstate *h,
688 struct vm_area_struct *vma, unsigned long address) 726 struct vm_area_struct *vma, unsigned long address)
689{ 727{
@@ -855,22 +893,13 @@ free:
855 * When releasing a hugetlb pool reservation, any surplus pages that were 893 * When releasing a hugetlb pool reservation, any surplus pages that were
856 * allocated to satisfy the reservation must be explicitly freed if they were 894 * allocated to satisfy the reservation must be explicitly freed if they were
857 * never used. 895 * never used.
896 * Called with hugetlb_lock held.
858 */ 897 */
859static void return_unused_surplus_pages(struct hstate *h, 898static void return_unused_surplus_pages(struct hstate *h,
860 unsigned long unused_resv_pages) 899 unsigned long unused_resv_pages)
861{ 900{
862 static int nid = -1;
863 struct page *page;
864 unsigned long nr_pages; 901 unsigned long nr_pages;
865 902
866 /*
867 * We want to release as many surplus pages as possible, spread
868 * evenly across all nodes. Iterate across all nodes until we
869 * can no longer free unreserved surplus pages. This occurs when
870 * the nodes with surplus pages have no free pages.
871 */
872 unsigned long remaining_iterations = nr_online_nodes;
873
874 /* Uncommit the reservation */ 903 /* Uncommit the reservation */
875 h->resv_huge_pages -= unused_resv_pages; 904 h->resv_huge_pages -= unused_resv_pages;
876 905
@@ -880,26 +909,17 @@ static void return_unused_surplus_pages(struct hstate *h,
880 909
881 nr_pages = min(unused_resv_pages, h->surplus_huge_pages); 910 nr_pages = min(unused_resv_pages, h->surplus_huge_pages);
882 911
883 while (remaining_iterations-- && nr_pages) { 912 /*
884 nid = next_node(nid, node_online_map); 913 * We want to release as many surplus pages as possible, spread
885 if (nid == MAX_NUMNODES) 914 * evenly across all nodes. Iterate across all nodes until we
886 nid = first_node(node_online_map); 915 * can no longer free unreserved surplus pages. This occurs when
887 916 * the nodes with surplus pages have no free pages.
888 if (!h->surplus_huge_pages_node[nid]) 917 * free_pool_huge_page() will balance the the frees across the
889 continue; 918 * on-line nodes for us and will handle the hstate accounting.
890 919 */
891 if (!list_empty(&h->hugepage_freelists[nid])) { 920 while (nr_pages--) {
892 page = list_entry(h->hugepage_freelists[nid].next, 921 if (!free_pool_huge_page(h, 1))
893 struct page, lru); 922 break;
894 list_del(&page->lru);
895 update_and_free_page(h, page);
896 h->free_huge_pages--;
897 h->free_huge_pages_node[nid]--;
898 h->surplus_huge_pages--;
899 h->surplus_huge_pages_node[nid]--;
900 nr_pages--;
901 remaining_iterations = nr_online_nodes;
902 }
903 } 923 }
904} 924}
905 925
@@ -1008,9 +1028,10 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
1008 void *addr; 1028 void *addr;
1009 1029
1010 addr = __alloc_bootmem_node_nopanic( 1030 addr = __alloc_bootmem_node_nopanic(
1011 NODE_DATA(h->hugetlb_next_nid), 1031 NODE_DATA(h->next_nid_to_alloc),
1012 huge_page_size(h), huge_page_size(h), 0); 1032 huge_page_size(h), huge_page_size(h), 0);
1013 1033
1034 hstate_next_node_to_alloc(h);
1014 if (addr) { 1035 if (addr) {
1015 /* 1036 /*
1016 * Use the beginning of the huge page to store the 1037 * Use the beginning of the huge page to store the
@@ -1020,7 +1041,6 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
1020 m = addr; 1041 m = addr;
1021 goto found; 1042 goto found;
1022 } 1043 }
1023 hstate_next_node(h);
1024 nr_nodes--; 1044 nr_nodes--;
1025 } 1045 }
1026 return 0; 1046 return 0;
@@ -1141,31 +1161,43 @@ static inline void try_to_free_low(struct hstate *h, unsigned long count)
1141 */ 1161 */
1142static int adjust_pool_surplus(struct hstate *h, int delta) 1162static int adjust_pool_surplus(struct hstate *h, int delta)
1143{ 1163{
1144 static int prev_nid; 1164 int start_nid, next_nid;
1145 int nid = prev_nid;
1146 int ret = 0; 1165 int ret = 0;
1147 1166
1148 VM_BUG_ON(delta != -1 && delta != 1); 1167 VM_BUG_ON(delta != -1 && delta != 1);
1149 do {
1150 nid = next_node(nid, node_online_map);
1151 if (nid == MAX_NUMNODES)
1152 nid = first_node(node_online_map);
1153 1168
1154 /* To shrink on this node, there must be a surplus page */ 1169 if (delta < 0)
1155 if (delta < 0 && !h->surplus_huge_pages_node[nid]) 1170 start_nid = h->next_nid_to_alloc;
1156 continue; 1171 else
1157 /* Surplus cannot exceed the total number of pages */ 1172 start_nid = h->next_nid_to_free;
1158 if (delta > 0 && h->surplus_huge_pages_node[nid] >= 1173 next_nid = start_nid;
1174
1175 do {
1176 int nid = next_nid;
1177 if (delta < 0) {
1178 next_nid = hstate_next_node_to_alloc(h);
1179 /*
1180 * To shrink on this node, there must be a surplus page
1181 */
1182 if (!h->surplus_huge_pages_node[nid])
1183 continue;
1184 }
1185 if (delta > 0) {
1186 next_nid = hstate_next_node_to_free(h);
1187 /*
1188 * Surplus cannot exceed the total number of pages
1189 */
1190 if (h->surplus_huge_pages_node[nid] >=
1159 h->nr_huge_pages_node[nid]) 1191 h->nr_huge_pages_node[nid])
1160 continue; 1192 continue;
1193 }
1161 1194
1162 h->surplus_huge_pages += delta; 1195 h->surplus_huge_pages += delta;
1163 h->surplus_huge_pages_node[nid] += delta; 1196 h->surplus_huge_pages_node[nid] += delta;
1164 ret = 1; 1197 ret = 1;
1165 break; 1198 break;
1166 } while (nid != prev_nid); 1199 } while (next_nid != start_nid);
1167 1200
1168 prev_nid = nid;
1169 return ret; 1201 return ret;
1170} 1202}
1171 1203
@@ -1227,10 +1259,8 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
1227 min_count = max(count, min_count); 1259 min_count = max(count, min_count);
1228 try_to_free_low(h, min_count); 1260 try_to_free_low(h, min_count);
1229 while (min_count < persistent_huge_pages(h)) { 1261 while (min_count < persistent_huge_pages(h)) {
1230 struct page *page = dequeue_huge_page(h); 1262 if (!free_pool_huge_page(h, 0))
1231 if (!page)
1232 break; 1263 break;
1233 update_and_free_page(h, page);
1234 } 1264 }
1235 while (count < persistent_huge_pages(h)) { 1265 while (count < persistent_huge_pages(h)) {
1236 if (!adjust_pool_surplus(h, 1)) 1266 if (!adjust_pool_surplus(h, 1))
@@ -1442,7 +1472,8 @@ void __init hugetlb_add_hstate(unsigned order)
1442 h->free_huge_pages = 0; 1472 h->free_huge_pages = 0;
1443 for (i = 0; i < MAX_NUMNODES; ++i) 1473 for (i = 0; i < MAX_NUMNODES; ++i)
1444 INIT_LIST_HEAD(&h->hugepage_freelists[i]); 1474 INIT_LIST_HEAD(&h->hugepage_freelists[i]);
1445 h->hugetlb_next_nid = first_node(node_online_map); 1475 h->next_nid_to_alloc = first_node(node_online_map);
1476 h->next_nid_to_free = first_node(node_online_map);
1446 snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB", 1477 snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
1447 huge_page_size(h)/1024); 1478 huge_page_size(h)/1024);
1448 1479
@@ -1985,6 +2016,26 @@ static struct page *hugetlbfs_pagecache_page(struct hstate *h,
1985 return find_lock_page(mapping, idx); 2016 return find_lock_page(mapping, idx);
1986} 2017}
1987 2018
2019/*
2020 * Return whether there is a pagecache page to back given address within VMA.
2021 * Caller follow_hugetlb_page() holds page_table_lock so we cannot lock_page.
2022 */
2023static bool hugetlbfs_pagecache_present(struct hstate *h,
2024 struct vm_area_struct *vma, unsigned long address)
2025{
2026 struct address_space *mapping;
2027 pgoff_t idx;
2028 struct page *page;
2029
2030 mapping = vma->vm_file->f_mapping;
2031 idx = vma_hugecache_offset(h, vma, address);
2032
2033 page = find_get_page(mapping, idx);
2034 if (page)
2035 put_page(page);
2036 return page != NULL;
2037}
2038
1988static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, 2039static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
1989 unsigned long address, pte_t *ptep, unsigned int flags) 2040 unsigned long address, pte_t *ptep, unsigned int flags)
1990{ 2041{
@@ -2180,54 +2231,55 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
2180 return NULL; 2231 return NULL;
2181} 2232}
2182 2233
2183static int huge_zeropage_ok(pte_t *ptep, int write, int shared)
2184{
2185 if (!ptep || write || shared)
2186 return 0;
2187 else
2188 return huge_pte_none(huge_ptep_get(ptep));
2189}
2190
2191int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, 2234int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
2192 struct page **pages, struct vm_area_struct **vmas, 2235 struct page **pages, struct vm_area_struct **vmas,
2193 unsigned long *position, int *length, int i, 2236 unsigned long *position, int *length, int i,
2194 int write) 2237 unsigned int flags)
2195{ 2238{
2196 unsigned long pfn_offset; 2239 unsigned long pfn_offset;
2197 unsigned long vaddr = *position; 2240 unsigned long vaddr = *position;
2198 int remainder = *length; 2241 int remainder = *length;
2199 struct hstate *h = hstate_vma(vma); 2242 struct hstate *h = hstate_vma(vma);
2200 int zeropage_ok = 0;
2201 int shared = vma->vm_flags & VM_SHARED;
2202 2243
2203 spin_lock(&mm->page_table_lock); 2244 spin_lock(&mm->page_table_lock);
2204 while (vaddr < vma->vm_end && remainder) { 2245 while (vaddr < vma->vm_end && remainder) {
2205 pte_t *pte; 2246 pte_t *pte;
2247 int absent;
2206 struct page *page; 2248 struct page *page;
2207 2249
2208 /* 2250 /*
2209 * Some archs (sparc64, sh*) have multiple pte_ts to 2251 * Some archs (sparc64, sh*) have multiple pte_ts to
2210 * each hugepage. We have to make * sure we get the 2252 * each hugepage. We have to make sure we get the
2211 * first, for the page indexing below to work. 2253 * first, for the page indexing below to work.
2212 */ 2254 */
2213 pte = huge_pte_offset(mm, vaddr & huge_page_mask(h)); 2255 pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
2214 if (huge_zeropage_ok(pte, write, shared)) 2256 absent = !pte || huge_pte_none(huge_ptep_get(pte));
2215 zeropage_ok = 1; 2257
2258 /*
2259 * When coredumping, it suits get_dump_page if we just return
2260 * an error where there's an empty slot with no huge pagecache
2261 * to back it. This way, we avoid allocating a hugepage, and
2262 * the sparse dumpfile avoids allocating disk blocks, but its
2263 * huge holes still show up with zeroes where they need to be.
2264 */
2265 if (absent && (flags & FOLL_DUMP) &&
2266 !hugetlbfs_pagecache_present(h, vma, vaddr)) {
2267 remainder = 0;
2268 break;
2269 }
2216 2270
2217 if (!pte || 2271 if (absent ||
2218 (huge_pte_none(huge_ptep_get(pte)) && !zeropage_ok) || 2272 ((flags & FOLL_WRITE) && !pte_write(huge_ptep_get(pte)))) {
2219 (write && !pte_write(huge_ptep_get(pte)))) {
2220 int ret; 2273 int ret;
2221 2274
2222 spin_unlock(&mm->page_table_lock); 2275 spin_unlock(&mm->page_table_lock);
2223 ret = hugetlb_fault(mm, vma, vaddr, write); 2276 ret = hugetlb_fault(mm, vma, vaddr,
2277 (flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0);
2224 spin_lock(&mm->page_table_lock); 2278 spin_lock(&mm->page_table_lock);
2225 if (!(ret & VM_FAULT_ERROR)) 2279 if (!(ret & VM_FAULT_ERROR))
2226 continue; 2280 continue;
2227 2281
2228 remainder = 0; 2282 remainder = 0;
2229 if (!i)
2230 i = -EFAULT;
2231 break; 2283 break;
2232 } 2284 }
2233 2285
@@ -2235,10 +2287,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
2235 page = pte_page(huge_ptep_get(pte)); 2287 page = pte_page(huge_ptep_get(pte));
2236same_page: 2288same_page:
2237 if (pages) { 2289 if (pages) {
2238 if (zeropage_ok) 2290 pages[i] = mem_map_offset(page, pfn_offset);
2239 pages[i] = ZERO_PAGE(0);
2240 else
2241 pages[i] = mem_map_offset(page, pfn_offset);
2242 get_page(pages[i]); 2291 get_page(pages[i]);
2243 } 2292 }
2244 2293
@@ -2262,7 +2311,7 @@ same_page:
2262 *length = remainder; 2311 *length = remainder;
2263 *position = vaddr; 2312 *position = vaddr;
2264 2313
2265 return i; 2314 return i ? i : -EFAULT;
2266} 2315}
2267 2316
2268void hugetlb_change_protection(struct vm_area_struct *vma, 2317void hugetlb_change_protection(struct vm_area_struct *vma,
diff --git a/mm/internal.h b/mm/internal.h
index f290c4db528..22ec8d2b0fb 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -37,6 +37,8 @@ static inline void __put_page(struct page *page)
37 atomic_dec(&page->_count); 37 atomic_dec(&page->_count);
38} 38}
39 39
40extern unsigned long highest_memmap_pfn;
41
40/* 42/*
41 * in mm/vmscan.c: 43 * in mm/vmscan.c:
42 */ 44 */
@@ -46,7 +48,6 @@ extern void putback_lru_page(struct page *page);
46/* 48/*
47 * in mm/page_alloc.c 49 * in mm/page_alloc.c
48 */ 50 */
49extern unsigned long highest_memmap_pfn;
50extern void __free_pages_bootmem(struct page *page, unsigned int order); 51extern void __free_pages_bootmem(struct page *page, unsigned int order);
51extern void prep_compound_page(struct page *page, unsigned long order); 52extern void prep_compound_page(struct page *page, unsigned long order);
52 53
@@ -250,13 +251,8 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
250} 251}
251#endif /* CONFIG_SPARSEMEM */ 252#endif /* CONFIG_SPARSEMEM */
252 253
253#define GUP_FLAGS_WRITE 0x1
254#define GUP_FLAGS_FORCE 0x2
255#define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
256#define GUP_FLAGS_IGNORE_SIGKILL 0x8
257
258int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 254int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
259 unsigned long start, int len, int flags, 255 unsigned long start, int len, unsigned int foll_flags,
260 struct page **pages, struct vm_area_struct **vmas); 256 struct page **pages, struct vm_area_struct **vmas);
261 257
262#define ZONE_RECLAIM_NOSCAN -2 258#define ZONE_RECLAIM_NOSCAN -2
diff --git a/mm/ksm.c b/mm/ksm.c
new file mode 100644
index 00000000000..37cc3732509
--- /dev/null
+++ b/mm/ksm.c
@@ -0,0 +1,1703 @@
1/*
2 * Memory merging support.
3 *
4 * This code enables dynamic sharing of identical pages found in different
5 * memory areas, even if they are not shared by fork()
6 *
7 * Copyright (C) 2008-2009 Red Hat, Inc.
8 * Authors:
9 * Izik Eidus
10 * Andrea Arcangeli
11 * Chris Wright
12 * Hugh Dickins
13 *
14 * This work is licensed under the terms of the GNU GPL, version 2.
15 */
16
17#include <linux/errno.h>
18#include <linux/mm.h>
19#include <linux/fs.h>
20#include <linux/mman.h>
21#include <linux/sched.h>
22#include <linux/rwsem.h>
23#include <linux/pagemap.h>
24#include <linux/rmap.h>
25#include <linux/spinlock.h>
26#include <linux/jhash.h>
27#include <linux/delay.h>
28#include <linux/kthread.h>
29#include <linux/wait.h>
30#include <linux/slab.h>
31#include <linux/rbtree.h>
32#include <linux/mmu_notifier.h>
33#include <linux/ksm.h>
34
35#include <asm/tlbflush.h>
36
37/*
38 * A few notes about the KSM scanning process,
39 * to make it easier to understand the data structures below:
40 *
41 * In order to reduce excessive scanning, KSM sorts the memory pages by their
42 * contents into a data structure that holds pointers to the pages' locations.
43 *
44 * Since the contents of the pages may change at any moment, KSM cannot just
45 * insert the pages into a normal sorted tree and expect it to find anything.
46 * Therefore KSM uses two data structures - the stable and the unstable tree.
47 *
48 * The stable tree holds pointers to all the merged pages (ksm pages), sorted
49 * by their contents. Because each such page is write-protected, searching on
50 * this tree is fully assured to be working (except when pages are unmapped),
51 * and therefore this tree is called the stable tree.
52 *
53 * In addition to the stable tree, KSM uses a second data structure called the
54 * unstable tree: this tree holds pointers to pages which have been found to
55 * be "unchanged for a period of time". The unstable tree sorts these pages
56 * by their contents, but since they are not write-protected, KSM cannot rely
57 * upon the unstable tree to work correctly - the unstable tree is liable to
58 * be corrupted as its contents are modified, and so it is called unstable.
59 *
60 * KSM solves this problem by several techniques:
61 *
62 * 1) The unstable tree is flushed every time KSM completes scanning all
63 * memory areas, and then the tree is rebuilt again from the beginning.
64 * 2) KSM will only insert into the unstable tree, pages whose hash value
65 * has not changed since the previous scan of all memory areas.
66 * 3) The unstable tree is a RedBlack Tree - so its balancing is based on the
67 * colors of the nodes and not on their contents, assuring that even when
68 * the tree gets "corrupted" it won't get out of balance, so scanning time
69 * remains the same (also, searching and inserting nodes in an rbtree uses
70 * the same algorithm, so we have no overhead when we flush and rebuild).
71 * 4) KSM never flushes the stable tree, which means that even if it were to
72 * take 10 attempts to find a page in the unstable tree, once it is found,
73 * it is secured in the stable tree. (When we scan a new page, we first
74 * compare it against the stable tree, and then against the unstable tree.)
75 */
76
77/**
78 * struct mm_slot - ksm information per mm that is being scanned
79 * @link: link to the mm_slots hash list
80 * @mm_list: link into the mm_slots list, rooted in ksm_mm_head
81 * @rmap_list: head for this mm_slot's list of rmap_items
82 * @mm: the mm that this information is valid for
83 */
84struct mm_slot {
85 struct hlist_node link;
86 struct list_head mm_list;
87 struct list_head rmap_list;
88 struct mm_struct *mm;
89};
90
91/**
92 * struct ksm_scan - cursor for scanning
93 * @mm_slot: the current mm_slot we are scanning
94 * @address: the next address inside that to be scanned
95 * @rmap_item: the current rmap that we are scanning inside the rmap_list
96 * @seqnr: count of completed full scans (needed when removing unstable node)
97 *
98 * There is only the one ksm_scan instance of this cursor structure.
99 */
100struct ksm_scan {
101 struct mm_slot *mm_slot;
102 unsigned long address;
103 struct rmap_item *rmap_item;
104 unsigned long seqnr;
105};
106
107/**
108 * struct rmap_item - reverse mapping item for virtual addresses
109 * @link: link into mm_slot's rmap_list (rmap_list is per mm)
110 * @mm: the memory structure this rmap_item is pointing into
111 * @address: the virtual address this rmap_item tracks (+ flags in low bits)
112 * @oldchecksum: previous checksum of the page at that virtual address
113 * @node: rb_node of this rmap_item in either unstable or stable tree
114 * @next: next rmap_item hanging off the same node of the stable tree
115 * @prev: previous rmap_item hanging off the same node of the stable tree
116 */
117struct rmap_item {
118 struct list_head link;
119 struct mm_struct *mm;
120 unsigned long address; /* + low bits used for flags below */
121 union {
122 unsigned int oldchecksum; /* when unstable */
123 struct rmap_item *next; /* when stable */
124 };
125 union {
126 struct rb_node node; /* when tree node */
127 struct rmap_item *prev; /* in stable list */
128 };
129};
130
131#define SEQNR_MASK 0x0ff /* low bits of unstable tree seqnr */
132#define NODE_FLAG 0x100 /* is a node of unstable or stable tree */
133#define STABLE_FLAG 0x200 /* is a node or list item of stable tree */
134
135/* The stable and unstable tree heads */
136static struct rb_root root_stable_tree = RB_ROOT;
137static struct rb_root root_unstable_tree = RB_ROOT;
138
139#define MM_SLOTS_HASH_HEADS 1024
140static struct hlist_head *mm_slots_hash;
141
142static struct mm_slot ksm_mm_head = {
143 .mm_list = LIST_HEAD_INIT(ksm_mm_head.mm_list),
144};
145static struct ksm_scan ksm_scan = {
146 .mm_slot = &ksm_mm_head,
147};
148
149static struct kmem_cache *rmap_item_cache;
150static struct kmem_cache *mm_slot_cache;
151
152/* The number of nodes in the stable tree */
153static unsigned long ksm_pages_shared;
154
155/* The number of page slots additionally sharing those nodes */
156static unsigned long ksm_pages_sharing;
157
158/* The number of nodes in the unstable tree */
159static unsigned long ksm_pages_unshared;
160
161/* The number of rmap_items in use: to calculate pages_volatile */
162static unsigned long ksm_rmap_items;
163
164/* Limit on the number of unswappable pages used */
165static unsigned long ksm_max_kernel_pages = 2000;
166
167/* Number of pages ksmd should scan in one batch */
168static unsigned int ksm_thread_pages_to_scan = 200;
169
170/* Milliseconds ksmd should sleep between batches */
171static unsigned int ksm_thread_sleep_millisecs = 20;
172
173#define KSM_RUN_STOP 0
174#define KSM_RUN_MERGE 1
175#define KSM_RUN_UNMERGE 2
176static unsigned int ksm_run = KSM_RUN_MERGE;
177
178static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
179static DEFINE_MUTEX(ksm_thread_mutex);
180static DEFINE_SPINLOCK(ksm_mmlist_lock);
181
182#define KSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("ksm_"#__struct,\
183 sizeof(struct __struct), __alignof__(struct __struct),\
184 (__flags), NULL)
185
186static int __init ksm_slab_init(void)
187{
188 rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0);
189 if (!rmap_item_cache)
190 goto out;
191
192 mm_slot_cache = KSM_KMEM_CACHE(mm_slot, 0);
193 if (!mm_slot_cache)
194 goto out_free;
195
196 return 0;
197
198out_free:
199 kmem_cache_destroy(rmap_item_cache);
200out:
201 return -ENOMEM;
202}
203
204static void __init ksm_slab_free(void)
205{
206 kmem_cache_destroy(mm_slot_cache);
207 kmem_cache_destroy(rmap_item_cache);
208 mm_slot_cache = NULL;
209}
210
211static inline struct rmap_item *alloc_rmap_item(void)
212{
213 struct rmap_item *rmap_item;
214
215 rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL);
216 if (rmap_item)
217 ksm_rmap_items++;
218 return rmap_item;
219}
220
221static inline void free_rmap_item(struct rmap_item *rmap_item)
222{
223 ksm_rmap_items--;
224 rmap_item->mm = NULL; /* debug safety */
225 kmem_cache_free(rmap_item_cache, rmap_item);
226}
227
228static inline struct mm_slot *alloc_mm_slot(void)
229{
230 if (!mm_slot_cache) /* initialization failed */
231 return NULL;
232 return kmem_cache_zalloc(mm_slot_cache, GFP_KERNEL);
233}
234
235static inline void free_mm_slot(struct mm_slot *mm_slot)
236{
237 kmem_cache_free(mm_slot_cache, mm_slot);
238}
239
240static int __init mm_slots_hash_init(void)
241{
242 mm_slots_hash = kzalloc(MM_SLOTS_HASH_HEADS * sizeof(struct hlist_head),
243 GFP_KERNEL);
244 if (!mm_slots_hash)
245 return -ENOMEM;
246 return 0;
247}
248
249static void __init mm_slots_hash_free(void)
250{
251 kfree(mm_slots_hash);
252}
253
254static struct mm_slot *get_mm_slot(struct mm_struct *mm)
255{
256 struct mm_slot *mm_slot;
257 struct hlist_head *bucket;
258 struct hlist_node *node;
259
260 bucket = &mm_slots_hash[((unsigned long)mm / sizeof(struct mm_struct))
261 % MM_SLOTS_HASH_HEADS];
262 hlist_for_each_entry(mm_slot, node, bucket, link) {
263 if (mm == mm_slot->mm)
264 return mm_slot;
265 }
266 return NULL;
267}
268
269static void insert_to_mm_slots_hash(struct mm_struct *mm,
270 struct mm_slot *mm_slot)
271{
272 struct hlist_head *bucket;
273
274 bucket = &mm_slots_hash[((unsigned long)mm / sizeof(struct mm_struct))
275 % MM_SLOTS_HASH_HEADS];
276 mm_slot->mm = mm;
277 INIT_LIST_HEAD(&mm_slot->rmap_list);
278 hlist_add_head(&mm_slot->link, bucket);
279}
280
281static inline int in_stable_tree(struct rmap_item *rmap_item)
282{
283 return rmap_item->address & STABLE_FLAG;
284}
285
286/*
287 * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
288 * page tables after it has passed through ksm_exit() - which, if necessary,
289 * takes mmap_sem briefly to serialize against them. ksm_exit() does not set
290 * a special flag: they can just back out as soon as mm_users goes to zero.
291 * ksm_test_exit() is used throughout to make this test for exit: in some
292 * places for correctness, in some places just to avoid unnecessary work.
293 */
294static inline bool ksm_test_exit(struct mm_struct *mm)
295{
296 return atomic_read(&mm->mm_users) == 0;
297}
298
299/*
300 * We use break_ksm to break COW on a ksm page: it's a stripped down
301 *
302 * if (get_user_pages(current, mm, addr, 1, 1, 1, &page, NULL) == 1)
303 * put_page(page);
304 *
305 * but taking great care only to touch a ksm page, in a VM_MERGEABLE vma,
306 * in case the application has unmapped and remapped mm,addr meanwhile.
307 * Could a ksm page appear anywhere else? Actually yes, in a VM_PFNMAP
308 * mmap of /dev/mem or /dev/kmem, where we would not want to touch it.
309 */
310static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
311{
312 struct page *page;
313 int ret = 0;
314
315 do {
316 cond_resched();
317 page = follow_page(vma, addr, FOLL_GET);
318 if (!page)
319 break;
320 if (PageKsm(page))
321 ret = handle_mm_fault(vma->vm_mm, vma, addr,
322 FAULT_FLAG_WRITE);
323 else
324 ret = VM_FAULT_WRITE;
325 put_page(page);
326 } while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_OOM)));
327 /*
328 * We must loop because handle_mm_fault() may back out if there's
329 * any difficulty e.g. if pte accessed bit gets updated concurrently.
330 *
331 * VM_FAULT_WRITE is what we have been hoping for: it indicates that
332 * COW has been broken, even if the vma does not permit VM_WRITE;
333 * but note that a concurrent fault might break PageKsm for us.
334 *
335 * VM_FAULT_SIGBUS could occur if we race with truncation of the
336 * backing file, which also invalidates anonymous pages: that's
337 * okay, that truncation will have unmapped the PageKsm for us.
338 *
339 * VM_FAULT_OOM: at the time of writing (late July 2009), setting
340 * aside mem_cgroup limits, VM_FAULT_OOM would only be set if the
341 * current task has TIF_MEMDIE set, and will be OOM killed on return
342 * to user; and ksmd, having no mm, would never be chosen for that.
343 *
344 * But if the mm is in a limited mem_cgroup, then the fault may fail
345 * with VM_FAULT_OOM even if the current task is not TIF_MEMDIE; and
346 * even ksmd can fail in this way - though it's usually breaking ksm
347 * just to undo a merge it made a moment before, so unlikely to oom.
348 *
349 * That's a pity: we might therefore have more kernel pages allocated
350 * than we're counting as nodes in the stable tree; but ksm_do_scan
351 * will retry to break_cow on each pass, so should recover the page
352 * in due course. The important thing is to not let VM_MERGEABLE
353 * be cleared while any such pages might remain in the area.
354 */
355 return (ret & VM_FAULT_OOM) ? -ENOMEM : 0;
356}
357
358static void break_cow(struct mm_struct *mm, unsigned long addr)
359{
360 struct vm_area_struct *vma;
361
362 down_read(&mm->mmap_sem);
363 if (ksm_test_exit(mm))
364 goto out;
365 vma = find_vma(mm, addr);
366 if (!vma || vma->vm_start > addr)
367 goto out;
368 if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
369 goto out;
370 break_ksm(vma, addr);
371out:
372 up_read(&mm->mmap_sem);
373}
374
375static struct page *get_mergeable_page(struct rmap_item *rmap_item)
376{
377 struct mm_struct *mm = rmap_item->mm;
378 unsigned long addr = rmap_item->address;
379 struct vm_area_struct *vma;
380 struct page *page;
381
382 down_read(&mm->mmap_sem);
383 if (ksm_test_exit(mm))
384 goto out;
385 vma = find_vma(mm, addr);
386 if (!vma || vma->vm_start > addr)
387 goto out;
388 if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
389 goto out;
390
391 page = follow_page(vma, addr, FOLL_GET);
392 if (!page)
393 goto out;
394 if (PageAnon(page)) {
395 flush_anon_page(vma, page, addr);
396 flush_dcache_page(page);
397 } else {
398 put_page(page);
399out: page = NULL;
400 }
401 up_read(&mm->mmap_sem);
402 return page;
403}
404
405/*
406 * get_ksm_page: checks if the page at the virtual address in rmap_item
407 * is still PageKsm, in which case we can trust the content of the page,
408 * and it returns the gotten page; but NULL if the page has been zapped.
409 */
410static struct page *get_ksm_page(struct rmap_item *rmap_item)
411{
412 struct page *page;
413
414 page = get_mergeable_page(rmap_item);
415 if (page && !PageKsm(page)) {
416 put_page(page);
417 page = NULL;
418 }
419 return page;
420}
421
422/*
423 * Removing rmap_item from stable or unstable tree.
424 * This function will clean the information from the stable/unstable tree.
425 */
426static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
427{
428 if (in_stable_tree(rmap_item)) {
429 struct rmap_item *next_item = rmap_item->next;
430
431 if (rmap_item->address & NODE_FLAG) {
432 if (next_item) {
433 rb_replace_node(&rmap_item->node,
434 &next_item->node,
435 &root_stable_tree);
436 next_item->address |= NODE_FLAG;
437 ksm_pages_sharing--;
438 } else {
439 rb_erase(&rmap_item->node, &root_stable_tree);
440 ksm_pages_shared--;
441 }
442 } else {
443 struct rmap_item *prev_item = rmap_item->prev;
444
445 BUG_ON(prev_item->next != rmap_item);
446 prev_item->next = next_item;
447 if (next_item) {
448 BUG_ON(next_item->prev != rmap_item);
449 next_item->prev = rmap_item->prev;
450 }
451 ksm_pages_sharing--;
452 }
453
454 rmap_item->next = NULL;
455
456 } else if (rmap_item->address & NODE_FLAG) {
457 unsigned char age;
458 /*
459 * Usually ksmd can and must skip the rb_erase, because
460 * root_unstable_tree was already reset to RB_ROOT.
461 * But be careful when an mm is exiting: do the rb_erase
462 * if this rmap_item was inserted by this scan, rather
463 * than left over from before.
464 */
465 age = (unsigned char)(ksm_scan.seqnr - rmap_item->address);
466 BUG_ON(age > 1);
467 if (!age)
468 rb_erase(&rmap_item->node, &root_unstable_tree);
469 ksm_pages_unshared--;
470 }
471
472 rmap_item->address &= PAGE_MASK;
473
474 cond_resched(); /* we're called from many long loops */
475}
476
477static void remove_trailing_rmap_items(struct mm_slot *mm_slot,
478 struct list_head *cur)
479{
480 struct rmap_item *rmap_item;
481
482 while (cur != &mm_slot->rmap_list) {
483 rmap_item = list_entry(cur, struct rmap_item, link);
484 cur = cur->next;
485 remove_rmap_item_from_tree(rmap_item);
486 list_del(&rmap_item->link);
487 free_rmap_item(rmap_item);
488 }
489}
490
491/*
492 * Though it's very tempting to unmerge in_stable_tree(rmap_item)s rather
493 * than check every pte of a given vma, the locking doesn't quite work for
494 * that - an rmap_item is assigned to the stable tree after inserting ksm
495 * page and upping mmap_sem. Nor does it fit with the way we skip dup'ing
496 * rmap_items from parent to child at fork time (so as not to waste time
497 * if exit comes before the next scan reaches it).
498 *
499 * Similarly, although we'd like to remove rmap_items (so updating counts
500 * and freeing memory) when unmerging an area, it's easier to leave that
501 * to the next pass of ksmd - consider, for example, how ksmd might be
502 * in cmp_and_merge_page on one of the rmap_items we would be removing.
503 */
504static int unmerge_ksm_pages(struct vm_area_struct *vma,
505 unsigned long start, unsigned long end)
506{
507 unsigned long addr;
508 int err = 0;
509
510 for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
511 if (ksm_test_exit(vma->vm_mm))
512 break;
513 if (signal_pending(current))
514 err = -ERESTARTSYS;
515 else
516 err = break_ksm(vma, addr);
517 }
518 return err;
519}
520
521#ifdef CONFIG_SYSFS
522/*
523 * Only called through the sysfs control interface:
524 */
525static int unmerge_and_remove_all_rmap_items(void)
526{
527 struct mm_slot *mm_slot;
528 struct mm_struct *mm;
529 struct vm_area_struct *vma;
530 int err = 0;
531
532 spin_lock(&ksm_mmlist_lock);
533 ksm_scan.mm_slot = list_entry(ksm_mm_head.mm_list.next,
534 struct mm_slot, mm_list);
535 spin_unlock(&ksm_mmlist_lock);
536
537 for (mm_slot = ksm_scan.mm_slot;
538 mm_slot != &ksm_mm_head; mm_slot = ksm_scan.mm_slot) {
539 mm = mm_slot->mm;
540 down_read(&mm->mmap_sem);
541 for (vma = mm->mmap; vma; vma = vma->vm_next) {
542 if (ksm_test_exit(mm))
543 break;
544 if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
545 continue;
546 err = unmerge_ksm_pages(vma,
547 vma->vm_start, vma->vm_end);
548 if (err)
549 goto error;
550 }
551
552 remove_trailing_rmap_items(mm_slot, mm_slot->rmap_list.next);
553
554 spin_lock(&ksm_mmlist_lock);
555 ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
556 struct mm_slot, mm_list);
557 if (ksm_test_exit(mm)) {
558 hlist_del(&mm_slot->link);
559 list_del(&mm_slot->mm_list);
560 spin_unlock(&ksm_mmlist_lock);
561
562 free_mm_slot(mm_slot);
563 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
564 up_read(&mm->mmap_sem);
565 mmdrop(mm);
566 } else {
567 spin_unlock(&ksm_mmlist_lock);
568 up_read(&mm->mmap_sem);
569 }
570 }
571
572 ksm_scan.seqnr = 0;
573 return 0;
574
575error:
576 up_read(&mm->mmap_sem);
577 spin_lock(&ksm_mmlist_lock);
578 ksm_scan.mm_slot = &ksm_mm_head;
579 spin_unlock(&ksm_mmlist_lock);
580 return err;
581}
582#endif /* CONFIG_SYSFS */
583
584static u32 calc_checksum(struct page *page)
585{
586 u32 checksum;
587 void *addr = kmap_atomic(page, KM_USER0);
588 checksum = jhash2(addr, PAGE_SIZE / 4, 17);
589 kunmap_atomic(addr, KM_USER0);
590 return checksum;
591}
592
593static int memcmp_pages(struct page *page1, struct page *page2)
594{
595 char *addr1, *addr2;
596 int ret;
597
598 addr1 = kmap_atomic(page1, KM_USER0);
599 addr2 = kmap_atomic(page2, KM_USER1);
600 ret = memcmp(addr1, addr2, PAGE_SIZE);
601 kunmap_atomic(addr2, KM_USER1);
602 kunmap_atomic(addr1, KM_USER0);
603 return ret;
604}
605
606static inline int pages_identical(struct page *page1, struct page *page2)
607{
608 return !memcmp_pages(page1, page2);
609}
610
611static int write_protect_page(struct vm_area_struct *vma, struct page *page,
612 pte_t *orig_pte)
613{
614 struct mm_struct *mm = vma->vm_mm;
615 unsigned long addr;
616 pte_t *ptep;
617 spinlock_t *ptl;
618 int swapped;
619 int err = -EFAULT;
620
621 addr = page_address_in_vma(page, vma);
622 if (addr == -EFAULT)
623 goto out;
624
625 ptep = page_check_address(page, mm, addr, &ptl, 0);
626 if (!ptep)
627 goto out;
628
629 if (pte_write(*ptep)) {
630 pte_t entry;
631
632 swapped = PageSwapCache(page);
633 flush_cache_page(vma, addr, page_to_pfn(page));
634 /*
635 * Ok this is tricky, when get_user_pages_fast() run it doesnt
636 * take any lock, therefore the check that we are going to make
637 * with the pagecount against the mapcount is racey and
638 * O_DIRECT can happen right after the check.
639 * So we clear the pte and flush the tlb before the check
640 * this assure us that no O_DIRECT can happen after the check
641 * or in the middle of the check.
642 */
643 entry = ptep_clear_flush(vma, addr, ptep);
644 /*
645 * Check that no O_DIRECT or similar I/O is in progress on the
646 * page
647 */
648 if ((page_mapcount(page) + 2 + swapped) != page_count(page)) {
649 set_pte_at_notify(mm, addr, ptep, entry);
650 goto out_unlock;
651 }
652 entry = pte_wrprotect(entry);
653 set_pte_at_notify(mm, addr, ptep, entry);
654 }
655 *orig_pte = *ptep;
656 err = 0;
657
658out_unlock:
659 pte_unmap_unlock(ptep, ptl);
660out:
661 return err;
662}
663
664/**
665 * replace_page - replace page in vma by new ksm page
666 * @vma: vma that holds the pte pointing to oldpage
667 * @oldpage: the page we are replacing by newpage
668 * @newpage: the ksm page we replace oldpage by
669 * @orig_pte: the original value of the pte
670 *
671 * Returns 0 on success, -EFAULT on failure.
672 */
673static int replace_page(struct vm_area_struct *vma, struct page *oldpage,
674 struct page *newpage, pte_t orig_pte)
675{
676 struct mm_struct *mm = vma->vm_mm;
677 pgd_t *pgd;
678 pud_t *pud;
679 pmd_t *pmd;
680 pte_t *ptep;
681 spinlock_t *ptl;
682 unsigned long addr;
683 pgprot_t prot;
684 int err = -EFAULT;
685
686 prot = vm_get_page_prot(vma->vm_flags & ~VM_WRITE);
687
688 addr = page_address_in_vma(oldpage, vma);
689 if (addr == -EFAULT)
690 goto out;
691
692 pgd = pgd_offset(mm, addr);
693 if (!pgd_present(*pgd))
694 goto out;
695
696 pud = pud_offset(pgd, addr);
697 if (!pud_present(*pud))
698 goto out;
699
700 pmd = pmd_offset(pud, addr);
701 if (!pmd_present(*pmd))
702 goto out;
703
704 ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
705 if (!pte_same(*ptep, orig_pte)) {
706 pte_unmap_unlock(ptep, ptl);
707 goto out;
708 }
709
710 get_page(newpage);
711 page_add_ksm_rmap(newpage);
712
713 flush_cache_page(vma, addr, pte_pfn(*ptep));
714 ptep_clear_flush(vma, addr, ptep);
715 set_pte_at_notify(mm, addr, ptep, mk_pte(newpage, prot));
716
717 page_remove_rmap(oldpage);
718 put_page(oldpage);
719
720 pte_unmap_unlock(ptep, ptl);
721 err = 0;
722out:
723 return err;
724}
725
726/*
727 * try_to_merge_one_page - take two pages and merge them into one
728 * @vma: the vma that hold the pte pointing into oldpage
729 * @oldpage: the page that we want to replace with newpage
730 * @newpage: the page that we want to map instead of oldpage
731 *
732 * Note:
733 * oldpage should be a PageAnon page, while newpage should be a PageKsm page,
734 * or a newly allocated kernel page which page_add_ksm_rmap will make PageKsm.
735 *
736 * This function returns 0 if the pages were merged, -EFAULT otherwise.
737 */
738static int try_to_merge_one_page(struct vm_area_struct *vma,
739 struct page *oldpage,
740 struct page *newpage)
741{
742 pte_t orig_pte = __pte(0);
743 int err = -EFAULT;
744
745 if (!(vma->vm_flags & VM_MERGEABLE))
746 goto out;
747
748 if (!PageAnon(oldpage))
749 goto out;
750
751 get_page(newpage);
752 get_page(oldpage);
753
754 /*
755 * We need the page lock to read a stable PageSwapCache in
756 * write_protect_page(). We use trylock_page() instead of
757 * lock_page() because we don't want to wait here - we
758 * prefer to continue scanning and merging different pages,
759 * then come back to this page when it is unlocked.
760 */
761 if (!trylock_page(oldpage))
762 goto out_putpage;
763 /*
764 * If this anonymous page is mapped only here, its pte may need
765 * to be write-protected. If it's mapped elsewhere, all of its
766 * ptes are necessarily already write-protected. But in either
767 * case, we need to lock and check page_count is not raised.
768 */
769 if (write_protect_page(vma, oldpage, &orig_pte)) {
770 unlock_page(oldpage);
771 goto out_putpage;
772 }
773 unlock_page(oldpage);
774
775 if (pages_identical(oldpage, newpage))
776 err = replace_page(vma, oldpage, newpage, orig_pte);
777
778out_putpage:
779 put_page(oldpage);
780 put_page(newpage);
781out:
782 return err;
783}
784
785/*
786 * try_to_merge_with_ksm_page - like try_to_merge_two_pages,
787 * but no new kernel page is allocated: kpage must already be a ksm page.
788 */
789static int try_to_merge_with_ksm_page(struct mm_struct *mm1,
790 unsigned long addr1,
791 struct page *page1,
792 struct page *kpage)
793{
794 struct vm_area_struct *vma;
795 int err = -EFAULT;
796
797 down_read(&mm1->mmap_sem);
798 if (ksm_test_exit(mm1))
799 goto out;
800
801 vma = find_vma(mm1, addr1);
802 if (!vma || vma->vm_start > addr1)
803 goto out;
804
805 err = try_to_merge_one_page(vma, page1, kpage);
806out:
807 up_read(&mm1->mmap_sem);
808 return err;
809}
810
811/*
812 * try_to_merge_two_pages - take two identical pages and prepare them
813 * to be merged into one page.
814 *
815 * This function returns 0 if we successfully mapped two identical pages
816 * into one page, -EFAULT otherwise.
817 *
818 * Note that this function allocates a new kernel page: if one of the pages
819 * is already a ksm page, try_to_merge_with_ksm_page should be used.
820 */
821static int try_to_merge_two_pages(struct mm_struct *mm1, unsigned long addr1,
822 struct page *page1, struct mm_struct *mm2,
823 unsigned long addr2, struct page *page2)
824{
825 struct vm_area_struct *vma;
826 struct page *kpage;
827 int err = -EFAULT;
828
829 /*
830 * The number of nodes in the stable tree
831 * is the number of kernel pages that we hold.
832 */
833 if (ksm_max_kernel_pages &&
834 ksm_max_kernel_pages <= ksm_pages_shared)
835 return err;
836
837 kpage = alloc_page(GFP_HIGHUSER);
838 if (!kpage)
839 return err;
840
841 down_read(&mm1->mmap_sem);
842 if (ksm_test_exit(mm1)) {
843 up_read(&mm1->mmap_sem);
844 goto out;
845 }
846 vma = find_vma(mm1, addr1);
847 if (!vma || vma->vm_start > addr1) {
848 up_read(&mm1->mmap_sem);
849 goto out;
850 }
851
852 copy_user_highpage(kpage, page1, addr1, vma);
853 err = try_to_merge_one_page(vma, page1, kpage);
854 up_read(&mm1->mmap_sem);
855
856 if (!err) {
857 err = try_to_merge_with_ksm_page(mm2, addr2, page2, kpage);
858 /*
859 * If that fails, we have a ksm page with only one pte
860 * pointing to it: so break it.
861 */
862 if (err)
863 break_cow(mm1, addr1);
864 }
865out:
866 put_page(kpage);
867 return err;
868}
869
870/*
871 * stable_tree_search - search page inside the stable tree
872 * @page: the page that we are searching identical pages to.
873 * @page2: pointer into identical page that we are holding inside the stable
874 * tree that we have found.
875 * @rmap_item: the reverse mapping item
876 *
877 * This function checks if there is a page inside the stable tree
878 * with identical content to the page that we are scanning right now.
879 *
880 * This function return rmap_item pointer to the identical item if found,
881 * NULL otherwise.
882 */
883static struct rmap_item *stable_tree_search(struct page *page,
884 struct page **page2,
885 struct rmap_item *rmap_item)
886{
887 struct rb_node *node = root_stable_tree.rb_node;
888
889 while (node) {
890 struct rmap_item *tree_rmap_item, *next_rmap_item;
891 int ret;
892
893 tree_rmap_item = rb_entry(node, struct rmap_item, node);
894 while (tree_rmap_item) {
895 BUG_ON(!in_stable_tree(tree_rmap_item));
896 cond_resched();
897 page2[0] = get_ksm_page(tree_rmap_item);
898 if (page2[0])
899 break;
900 next_rmap_item = tree_rmap_item->next;
901 remove_rmap_item_from_tree(tree_rmap_item);
902 tree_rmap_item = next_rmap_item;
903 }
904 if (!tree_rmap_item)
905 return NULL;
906
907 ret = memcmp_pages(page, page2[0]);
908
909 if (ret < 0) {
910 put_page(page2[0]);
911 node = node->rb_left;
912 } else if (ret > 0) {
913 put_page(page2[0]);
914 node = node->rb_right;
915 } else {
916 return tree_rmap_item;
917 }
918 }
919
920 return NULL;
921}
922
923/*
924 * stable_tree_insert - insert rmap_item pointing to new ksm page
925 * into the stable tree.
926 *
927 * @page: the page that we are searching identical page to inside the stable
928 * tree.
929 * @rmap_item: pointer to the reverse mapping item.
930 *
931 * This function returns rmap_item if success, NULL otherwise.
932 */
933static struct rmap_item *stable_tree_insert(struct page *page,
934 struct rmap_item *rmap_item)
935{
936 struct rb_node **new = &root_stable_tree.rb_node;
937 struct rb_node *parent = NULL;
938
939 while (*new) {
940 struct rmap_item *tree_rmap_item, *next_rmap_item;
941 struct page *tree_page;
942 int ret;
943
944 tree_rmap_item = rb_entry(*new, struct rmap_item, node);
945 while (tree_rmap_item) {
946 BUG_ON(!in_stable_tree(tree_rmap_item));
947 cond_resched();
948 tree_page = get_ksm_page(tree_rmap_item);
949 if (tree_page)
950 break;
951 next_rmap_item = tree_rmap_item->next;
952 remove_rmap_item_from_tree(tree_rmap_item);
953 tree_rmap_item = next_rmap_item;
954 }
955 if (!tree_rmap_item)
956 return NULL;
957
958 ret = memcmp_pages(page, tree_page);
959 put_page(tree_page);
960
961 parent = *new;
962 if (ret < 0)
963 new = &parent->rb_left;
964 else if (ret > 0)
965 new = &parent->rb_right;
966 else {
967 /*
968 * It is not a bug that stable_tree_search() didn't
969 * find this node: because at that time our page was
970 * not yet write-protected, so may have changed since.
971 */
972 return NULL;
973 }
974 }
975
976 rmap_item->address |= NODE_FLAG | STABLE_FLAG;
977 rmap_item->next = NULL;
978 rb_link_node(&rmap_item->node, parent, new);
979 rb_insert_color(&rmap_item->node, &root_stable_tree);
980
981 ksm_pages_shared++;
982 return rmap_item;
983}
984
985/*
986 * unstable_tree_search_insert - search and insert items into the unstable tree.
987 *
988 * @page: the page that we are going to search for identical page or to insert
989 * into the unstable tree
990 * @page2: pointer into identical page that was found inside the unstable tree
991 * @rmap_item: the reverse mapping item of page
992 *
993 * This function searches for a page in the unstable tree identical to the
994 * page currently being scanned; and if no identical page is found in the
995 * tree, we insert rmap_item as a new object into the unstable tree.
996 *
997 * This function returns pointer to rmap_item found to be identical
998 * to the currently scanned page, NULL otherwise.
999 *
1000 * This function does both searching and inserting, because they share
1001 * the same walking algorithm in an rbtree.
1002 */
1003static struct rmap_item *unstable_tree_search_insert(struct page *page,
1004 struct page **page2,
1005 struct rmap_item *rmap_item)
1006{
1007 struct rb_node **new = &root_unstable_tree.rb_node;
1008 struct rb_node *parent = NULL;
1009
1010 while (*new) {
1011 struct rmap_item *tree_rmap_item;
1012 int ret;
1013
1014 tree_rmap_item = rb_entry(*new, struct rmap_item, node);
1015 page2[0] = get_mergeable_page(tree_rmap_item);
1016 if (!page2[0])
1017 return NULL;
1018
1019 /*
1020 * Don't substitute an unswappable ksm page
1021 * just for one good swappable forked page.
1022 */
1023 if (page == page2[0]) {
1024 put_page(page2[0]);
1025 return NULL;
1026 }
1027
1028 ret = memcmp_pages(page, page2[0]);
1029
1030 parent = *new;
1031 if (ret < 0) {
1032 put_page(page2[0]);
1033 new = &parent->rb_left;
1034 } else if (ret > 0) {
1035 put_page(page2[0]);
1036 new = &parent->rb_right;
1037 } else {
1038 return tree_rmap_item;
1039 }
1040 }
1041
1042 rmap_item->address |= NODE_FLAG;
1043 rmap_item->address |= (ksm_scan.seqnr & SEQNR_MASK);
1044 rb_link_node(&rmap_item->node, parent, new);
1045 rb_insert_color(&rmap_item->node, &root_unstable_tree);
1046
1047 ksm_pages_unshared++;
1048 return NULL;
1049}
1050
1051/*
1052 * stable_tree_append - add another rmap_item to the linked list of
1053 * rmap_items hanging off a given node of the stable tree, all sharing
1054 * the same ksm page.
1055 */
1056static void stable_tree_append(struct rmap_item *rmap_item,
1057 struct rmap_item *tree_rmap_item)
1058{
1059 rmap_item->next = tree_rmap_item->next;
1060 rmap_item->prev = tree_rmap_item;
1061
1062 if (tree_rmap_item->next)
1063 tree_rmap_item->next->prev = rmap_item;
1064
1065 tree_rmap_item->next = rmap_item;
1066 rmap_item->address |= STABLE_FLAG;
1067
1068 ksm_pages_sharing++;
1069}
1070
1071/*
1072 * cmp_and_merge_page - first see if page can be merged into the stable tree;
1073 * if not, compare checksum to previous and if it's the same, see if page can
1074 * be inserted into the unstable tree, or merged with a page already there and
1075 * both transferred to the stable tree.
1076 *
1077 * @page: the page that we are searching identical page to.
1078 * @rmap_item: the reverse mapping into the virtual address of this page
1079 */
1080static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
1081{
1082 struct page *page2[1];
1083 struct rmap_item *tree_rmap_item;
1084 unsigned int checksum;
1085 int err;
1086
1087 if (in_stable_tree(rmap_item))
1088 remove_rmap_item_from_tree(rmap_item);
1089
1090 /* We first start with searching the page inside the stable tree */
1091 tree_rmap_item = stable_tree_search(page, page2, rmap_item);
1092 if (tree_rmap_item) {
1093 if (page == page2[0]) /* forked */
1094 err = 0;
1095 else
1096 err = try_to_merge_with_ksm_page(rmap_item->mm,
1097 rmap_item->address,
1098 page, page2[0]);
1099 put_page(page2[0]);
1100
1101 if (!err) {
1102 /*
1103 * The page was successfully merged:
1104 * add its rmap_item to the stable tree.
1105 */
1106 stable_tree_append(rmap_item, tree_rmap_item);
1107 }
1108 return;
1109 }
1110
1111 /*
1112 * A ksm page might have got here by fork, but its other
1113 * references have already been removed from the stable tree.
1114 * Or it might be left over from a break_ksm which failed
1115 * when the mem_cgroup had reached its limit: try again now.
1116 */
1117 if (PageKsm(page))
1118 break_cow(rmap_item->mm, rmap_item->address);
1119
1120 /*
1121 * In case the hash value of the page was changed from the last time we
1122 * have calculated it, this page to be changed frequely, therefore we
1123 * don't want to insert it to the unstable tree, and we don't want to
1124 * waste our time to search if there is something identical to it there.
1125 */
1126 checksum = calc_checksum(page);
1127 if (rmap_item->oldchecksum != checksum) {
1128 rmap_item->oldchecksum = checksum;
1129 return;
1130 }
1131
1132 tree_rmap_item = unstable_tree_search_insert(page, page2, rmap_item);
1133 if (tree_rmap_item) {
1134 err = try_to_merge_two_pages(rmap_item->mm,
1135 rmap_item->address, page,
1136 tree_rmap_item->mm,
1137 tree_rmap_item->address, page2[0]);
1138 /*
1139 * As soon as we merge this page, we want to remove the
1140 * rmap_item of the page we have merged with from the unstable
1141 * tree, and insert it instead as new node in the stable tree.
1142 */
1143 if (!err) {
1144 rb_erase(&tree_rmap_item->node, &root_unstable_tree);
1145 tree_rmap_item->address &= ~NODE_FLAG;
1146 ksm_pages_unshared--;
1147
1148 /*
1149 * If we fail to insert the page into the stable tree,
1150 * we will have 2 virtual addresses that are pointing
1151 * to a ksm page left outside the stable tree,
1152 * in which case we need to break_cow on both.
1153 */
1154 if (stable_tree_insert(page2[0], tree_rmap_item))
1155 stable_tree_append(rmap_item, tree_rmap_item);
1156 else {
1157 break_cow(tree_rmap_item->mm,
1158 tree_rmap_item->address);
1159 break_cow(rmap_item->mm, rmap_item->address);
1160 }
1161 }
1162
1163 put_page(page2[0]);
1164 }
1165}
1166
1167static struct rmap_item *get_next_rmap_item(struct mm_slot *mm_slot,
1168 struct list_head *cur,
1169 unsigned long addr)
1170{
1171 struct rmap_item *rmap_item;
1172
1173 while (cur != &mm_slot->rmap_list) {
1174 rmap_item = list_entry(cur, struct rmap_item, link);
1175 if ((rmap_item->address & PAGE_MASK) == addr) {
1176 if (!in_stable_tree(rmap_item))
1177 remove_rmap_item_from_tree(rmap_item);
1178 return rmap_item;
1179 }
1180 if (rmap_item->address > addr)
1181 break;
1182 cur = cur->next;
1183 remove_rmap_item_from_tree(rmap_item);
1184 list_del(&rmap_item->link);
1185 free_rmap_item(rmap_item);
1186 }
1187
1188 rmap_item = alloc_rmap_item();
1189 if (rmap_item) {
1190 /* It has already been zeroed */
1191 rmap_item->mm = mm_slot->mm;
1192 rmap_item->address = addr;
1193 list_add_tail(&rmap_item->link, cur);
1194 }
1195 return rmap_item;
1196}
1197
1198static struct rmap_item *scan_get_next_rmap_item(struct page **page)
1199{
1200 struct mm_struct *mm;
1201 struct mm_slot *slot;
1202 struct vm_area_struct *vma;
1203 struct rmap_item *rmap_item;
1204
1205 if (list_empty(&ksm_mm_head.mm_list))
1206 return NULL;
1207
1208 slot = ksm_scan.mm_slot;
1209 if (slot == &ksm_mm_head) {
1210 root_unstable_tree = RB_ROOT;
1211
1212 spin_lock(&ksm_mmlist_lock);
1213 slot = list_entry(slot->mm_list.next, struct mm_slot, mm_list);
1214 ksm_scan.mm_slot = slot;
1215 spin_unlock(&ksm_mmlist_lock);
1216next_mm:
1217 ksm_scan.address = 0;
1218 ksm_scan.rmap_item = list_entry(&slot->rmap_list,
1219 struct rmap_item, link);
1220 }
1221
1222 mm = slot->mm;
1223 down_read(&mm->mmap_sem);
1224 if (ksm_test_exit(mm))
1225 vma = NULL;
1226 else
1227 vma = find_vma(mm, ksm_scan.address);
1228
1229 for (; vma; vma = vma->vm_next) {
1230 if (!(vma->vm_flags & VM_MERGEABLE))
1231 continue;
1232 if (ksm_scan.address < vma->vm_start)
1233 ksm_scan.address = vma->vm_start;
1234 if (!vma->anon_vma)
1235 ksm_scan.address = vma->vm_end;
1236
1237 while (ksm_scan.address < vma->vm_end) {
1238 if (ksm_test_exit(mm))
1239 break;
1240 *page = follow_page(vma, ksm_scan.address, FOLL_GET);
1241 if (*page && PageAnon(*page)) {
1242 flush_anon_page(vma, *page, ksm_scan.address);
1243 flush_dcache_page(*page);
1244 rmap_item = get_next_rmap_item(slot,
1245 ksm_scan.rmap_item->link.next,
1246 ksm_scan.address);
1247 if (rmap_item) {
1248 ksm_scan.rmap_item = rmap_item;
1249 ksm_scan.address += PAGE_SIZE;
1250 } else
1251 put_page(*page);
1252 up_read(&mm->mmap_sem);
1253 return rmap_item;
1254 }
1255 if (*page)
1256 put_page(*page);
1257 ksm_scan.address += PAGE_SIZE;
1258 cond_resched();
1259 }
1260 }
1261
1262 if (ksm_test_exit(mm)) {
1263 ksm_scan.address = 0;
1264 ksm_scan.rmap_item = list_entry(&slot->rmap_list,
1265 struct rmap_item, link);
1266 }
1267 /*
1268 * Nuke all the rmap_items that are above this current rmap:
1269 * because there were no VM_MERGEABLE vmas with such addresses.
1270 */
1271 remove_trailing_rmap_items(slot, ksm_scan.rmap_item->link.next);
1272
1273 spin_lock(&ksm_mmlist_lock);
1274 ksm_scan.mm_slot = list_entry(slot->mm_list.next,
1275 struct mm_slot, mm_list);
1276 if (ksm_scan.address == 0) {
1277 /*
1278 * We've completed a full scan of all vmas, holding mmap_sem
1279 * throughout, and found no VM_MERGEABLE: so do the same as
1280 * __ksm_exit does to remove this mm from all our lists now.
1281 * This applies either when cleaning up after __ksm_exit
1282 * (but beware: we can reach here even before __ksm_exit),
1283 * or when all VM_MERGEABLE areas have been unmapped (and
1284 * mmap_sem then protects against race with MADV_MERGEABLE).
1285 */
1286 hlist_del(&slot->link);
1287 list_del(&slot->mm_list);
1288 spin_unlock(&ksm_mmlist_lock);
1289
1290 free_mm_slot(slot);
1291 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
1292 up_read(&mm->mmap_sem);
1293 mmdrop(mm);
1294 } else {
1295 spin_unlock(&ksm_mmlist_lock);
1296 up_read(&mm->mmap_sem);
1297 }
1298
1299 /* Repeat until we've completed scanning the whole list */
1300 slot = ksm_scan.mm_slot;
1301 if (slot != &ksm_mm_head)
1302 goto next_mm;
1303
1304 ksm_scan.seqnr++;
1305 return NULL;
1306}
1307
1308/**
1309 * ksm_do_scan - the ksm scanner main worker function.
1310 * @scan_npages - number of pages we want to scan before we return.
1311 */
1312static void ksm_do_scan(unsigned int scan_npages)
1313{
1314 struct rmap_item *rmap_item;
1315 struct page *page;
1316
1317 while (scan_npages--) {
1318 cond_resched();
1319 rmap_item = scan_get_next_rmap_item(&page);
1320 if (!rmap_item)
1321 return;
1322 if (!PageKsm(page) || !in_stable_tree(rmap_item))
1323 cmp_and_merge_page(page, rmap_item);
1324 else if (page_mapcount(page) == 1) {
1325 /*
1326 * Replace now-unshared ksm page by ordinary page.
1327 */
1328 break_cow(rmap_item->mm, rmap_item->address);
1329 remove_rmap_item_from_tree(rmap_item);
1330 rmap_item->oldchecksum = calc_checksum(page);
1331 }
1332 put_page(page);
1333 }
1334}
1335
1336static int ksmd_should_run(void)
1337{
1338 return (ksm_run & KSM_RUN_MERGE) && !list_empty(&ksm_mm_head.mm_list);
1339}
1340
1341static int ksm_scan_thread(void *nothing)
1342{
1343 set_user_nice(current, 5);
1344
1345 while (!kthread_should_stop()) {
1346 mutex_lock(&ksm_thread_mutex);
1347 if (ksmd_should_run())
1348 ksm_do_scan(ksm_thread_pages_to_scan);
1349 mutex_unlock(&ksm_thread_mutex);
1350
1351 if (ksmd_should_run()) {
1352 schedule_timeout_interruptible(
1353 msecs_to_jiffies(ksm_thread_sleep_millisecs));
1354 } else {
1355 wait_event_interruptible(ksm_thread_wait,
1356 ksmd_should_run() || kthread_should_stop());
1357 }
1358 }
1359 return 0;
1360}
1361
1362int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
1363 unsigned long end, int advice, unsigned long *vm_flags)
1364{
1365 struct mm_struct *mm = vma->vm_mm;
1366 int err;
1367
1368 switch (advice) {
1369 case MADV_MERGEABLE:
1370 /*
1371 * Be somewhat over-protective for now!
1372 */
1373 if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE |
1374 VM_PFNMAP | VM_IO | VM_DONTEXPAND |
1375 VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
1376 VM_MIXEDMAP | VM_SAO))
1377 return 0; /* just ignore the advice */
1378
1379 if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
1380 err = __ksm_enter(mm);
1381 if (err)
1382 return err;
1383 }
1384
1385 *vm_flags |= VM_MERGEABLE;
1386 break;
1387
1388 case MADV_UNMERGEABLE:
1389 if (!(*vm_flags & VM_MERGEABLE))
1390 return 0; /* just ignore the advice */
1391
1392 if (vma->anon_vma) {
1393 err = unmerge_ksm_pages(vma, start, end);
1394 if (err)
1395 return err;
1396 }
1397
1398 *vm_flags &= ~VM_MERGEABLE;
1399 break;
1400 }
1401
1402 return 0;
1403}
1404
1405int __ksm_enter(struct mm_struct *mm)
1406{
1407 struct mm_slot *mm_slot;
1408 int needs_wakeup;
1409
1410 mm_slot = alloc_mm_slot();
1411 if (!mm_slot)
1412 return -ENOMEM;
1413
1414 /* Check ksm_run too? Would need tighter locking */
1415 needs_wakeup = list_empty(&ksm_mm_head.mm_list);
1416
1417 spin_lock(&ksm_mmlist_lock);
1418 insert_to_mm_slots_hash(mm, mm_slot);
1419 /*
1420 * Insert just behind the scanning cursor, to let the area settle
1421 * down a little; when fork is followed by immediate exec, we don't
1422 * want ksmd to waste time setting up and tearing down an rmap_list.
1423 */
1424 list_add_tail(&mm_slot->mm_list, &ksm_scan.mm_slot->mm_list);
1425 spin_unlock(&ksm_mmlist_lock);
1426
1427 set_bit(MMF_VM_MERGEABLE, &mm->flags);
1428 atomic_inc(&mm->mm_count);
1429
1430 if (needs_wakeup)
1431 wake_up_interruptible(&ksm_thread_wait);
1432
1433 return 0;
1434}
1435
1436void __ksm_exit(struct mm_struct *mm)
1437{
1438 struct mm_slot *mm_slot;
1439 int easy_to_free = 0;
1440
1441 /*
1442 * This process is exiting: if it's straightforward (as is the
1443 * case when ksmd was never running), free mm_slot immediately.
1444 * But if it's at the cursor or has rmap_items linked to it, use
1445 * mmap_sem to synchronize with any break_cows before pagetables
1446 * are freed, and leave the mm_slot on the list for ksmd to free.
1447 * Beware: ksm may already have noticed it exiting and freed the slot.
1448 */
1449
1450 spin_lock(&ksm_mmlist_lock);
1451 mm_slot = get_mm_slot(mm);
1452 if (mm_slot && ksm_scan.mm_slot != mm_slot) {
1453 if (list_empty(&mm_slot->rmap_list)) {
1454 hlist_del(&mm_slot->link);
1455 list_del(&mm_slot->mm_list);
1456 easy_to_free = 1;
1457 } else {
1458 list_move(&mm_slot->mm_list,
1459 &ksm_scan.mm_slot->mm_list);
1460 }
1461 }
1462 spin_unlock(&ksm_mmlist_lock);
1463
1464 if (easy_to_free) {
1465 free_mm_slot(mm_slot);
1466 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
1467 mmdrop(mm);
1468 } else if (mm_slot) {
1469 down_write(&mm->mmap_sem);
1470 up_write(&mm->mmap_sem);
1471 }
1472}
1473
1474#ifdef CONFIG_SYSFS
1475/*
1476 * This all compiles without CONFIG_SYSFS, but is a waste of space.
1477 */
1478
1479#define KSM_ATTR_RO(_name) \
1480 static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
1481#define KSM_ATTR(_name) \
1482 static struct kobj_attribute _name##_attr = \
1483 __ATTR(_name, 0644, _name##_show, _name##_store)
1484
1485static ssize_t sleep_millisecs_show(struct kobject *kobj,
1486 struct kobj_attribute *attr, char *buf)
1487{
1488 return sprintf(buf, "%u\n", ksm_thread_sleep_millisecs);
1489}
1490
1491static ssize_t sleep_millisecs_store(struct kobject *kobj,
1492 struct kobj_attribute *attr,
1493 const char *buf, size_t count)
1494{
1495 unsigned long msecs;
1496 int err;
1497
1498 err = strict_strtoul(buf, 10, &msecs);
1499 if (err || msecs > UINT_MAX)
1500 return -EINVAL;
1501
1502 ksm_thread_sleep_millisecs = msecs;
1503
1504 return count;
1505}
1506KSM_ATTR(sleep_millisecs);
1507
1508static ssize_t pages_to_scan_show(struct kobject *kobj,
1509 struct kobj_attribute *attr, char *buf)
1510{
1511 return sprintf(buf, "%u\n", ksm_thread_pages_to_scan);
1512}
1513
1514static ssize_t pages_to_scan_store(struct kobject *kobj,
1515 struct kobj_attribute *attr,
1516 const char *buf, size_t count)
1517{
1518 int err;
1519 unsigned long nr_pages;
1520
1521 err = strict_strtoul(buf, 10, &nr_pages);
1522 if (err || nr_pages > UINT_MAX)
1523 return -EINVAL;
1524
1525 ksm_thread_pages_to_scan = nr_pages;
1526
1527 return count;
1528}
1529KSM_ATTR(pages_to_scan);
1530
1531static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr,
1532 char *buf)
1533{
1534 return sprintf(buf, "%u\n", ksm_run);
1535}
1536
1537static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
1538 const char *buf, size_t count)
1539{
1540 int err;
1541 unsigned long flags;
1542
1543 err = strict_strtoul(buf, 10, &flags);
1544 if (err || flags > UINT_MAX)
1545 return -EINVAL;
1546 if (flags > KSM_RUN_UNMERGE)
1547 return -EINVAL;
1548
1549 /*
1550 * KSM_RUN_MERGE sets ksmd running, and 0 stops it running.
1551 * KSM_RUN_UNMERGE stops it running and unmerges all rmap_items,
1552 * breaking COW to free the unswappable pages_shared (but leaves
1553 * mm_slots on the list for when ksmd may be set running again).
1554 */
1555
1556 mutex_lock(&ksm_thread_mutex);
1557 if (ksm_run != flags) {
1558 ksm_run = flags;
1559 if (flags & KSM_RUN_UNMERGE) {
1560 current->flags |= PF_OOM_ORIGIN;
1561 err = unmerge_and_remove_all_rmap_items();
1562 current->flags &= ~PF_OOM_ORIGIN;
1563 if (err) {
1564 ksm_run = KSM_RUN_STOP;
1565 count = err;
1566 }
1567 }
1568 }
1569 mutex_unlock(&ksm_thread_mutex);
1570
1571 if (flags & KSM_RUN_MERGE)
1572 wake_up_interruptible(&ksm_thread_wait);
1573
1574 return count;
1575}
1576KSM_ATTR(run);
1577
1578static ssize_t max_kernel_pages_store(struct kobject *kobj,
1579 struct kobj_attribute *attr,
1580 const char *buf, size_t count)
1581{
1582 int err;
1583 unsigned long nr_pages;
1584
1585 err = strict_strtoul(buf, 10, &nr_pages);
1586 if (err)
1587 return -EINVAL;
1588
1589 ksm_max_kernel_pages = nr_pages;
1590
1591 return count;
1592}
1593
1594static ssize_t max_kernel_pages_show(struct kobject *kobj,
1595 struct kobj_attribute *attr, char *buf)
1596{
1597 return sprintf(buf, "%lu\n", ksm_max_kernel_pages);
1598}
1599KSM_ATTR(max_kernel_pages);
1600
1601static ssize_t pages_shared_show(struct kobject *kobj,
1602 struct kobj_attribute *attr, char *buf)
1603{
1604 return sprintf(buf, "%lu\n", ksm_pages_shared);
1605}
1606KSM_ATTR_RO(pages_shared);
1607
1608static ssize_t pages_sharing_show(struct kobject *kobj,
1609 struct kobj_attribute *attr, char *buf)
1610{
1611 return sprintf(buf, "%lu\n", ksm_pages_sharing);
1612}
1613KSM_ATTR_RO(pages_sharing);
1614
1615static ssize_t pages_unshared_show(struct kobject *kobj,
1616 struct kobj_attribute *attr, char *buf)
1617{
1618 return sprintf(buf, "%lu\n", ksm_pages_unshared);
1619}
1620KSM_ATTR_RO(pages_unshared);
1621
1622static ssize_t pages_volatile_show(struct kobject *kobj,
1623 struct kobj_attribute *attr, char *buf)
1624{
1625 long ksm_pages_volatile;
1626
1627 ksm_pages_volatile = ksm_rmap_items - ksm_pages_shared
1628 - ksm_pages_sharing - ksm_pages_unshared;
1629 /*
1630 * It was not worth any locking to calculate that statistic,
1631 * but it might therefore sometimes be negative: conceal that.
1632 */
1633 if (ksm_pages_volatile < 0)
1634 ksm_pages_volatile = 0;
1635 return sprintf(buf, "%ld\n", ksm_pages_volatile);
1636}
1637KSM_ATTR_RO(pages_volatile);
1638
1639static ssize_t full_scans_show(struct kobject *kobj,
1640 struct kobj_attribute *attr, char *buf)
1641{
1642 return sprintf(buf, "%lu\n", ksm_scan.seqnr);
1643}
1644KSM_ATTR_RO(full_scans);
1645
1646static struct attribute *ksm_attrs[] = {
1647 &sleep_millisecs_attr.attr,
1648 &pages_to_scan_attr.attr,
1649 &run_attr.attr,
1650 &max_kernel_pages_attr.attr,
1651 &pages_shared_attr.attr,
1652 &pages_sharing_attr.attr,
1653 &pages_unshared_attr.attr,
1654 &pages_volatile_attr.attr,
1655 &full_scans_attr.attr,
1656 NULL,
1657};
1658
1659static struct attribute_group ksm_attr_group = {
1660 .attrs = ksm_attrs,
1661 .name = "ksm",
1662};
1663#endif /* CONFIG_SYSFS */
1664
1665static int __init ksm_init(void)
1666{
1667 struct task_struct *ksm_thread;
1668 int err;
1669
1670 err = ksm_slab_init();
1671 if (err)
1672 goto out;
1673
1674 err = mm_slots_hash_init();
1675 if (err)
1676 goto out_free1;
1677
1678 ksm_thread = kthread_run(ksm_scan_thread, NULL, "ksmd");
1679 if (IS_ERR(ksm_thread)) {
1680 printk(KERN_ERR "ksm: creating kthread failed\n");
1681 err = PTR_ERR(ksm_thread);
1682 goto out_free2;
1683 }
1684
1685#ifdef CONFIG_SYSFS
1686 err = sysfs_create_group(mm_kobj, &ksm_attr_group);
1687 if (err) {
1688 printk(KERN_ERR "ksm: register sysfs failed\n");
1689 kthread_stop(ksm_thread);
1690 goto out_free2;
1691 }
1692#endif /* CONFIG_SYSFS */
1693
1694 return 0;
1695
1696out_free2:
1697 mm_slots_hash_free();
1698out_free1:
1699 ksm_slab_free();
1700out:
1701 return err;
1702}
1703module_init(ksm_init)
diff --git a/mm/madvise.c b/mm/madvise.c
index 76eb4193acd..d9ae2067952 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -11,6 +11,7 @@
11#include <linux/mempolicy.h> 11#include <linux/mempolicy.h>
12#include <linux/hugetlb.h> 12#include <linux/hugetlb.h>
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/ksm.h>
14 15
15/* 16/*
16 * Any behaviour which results in changes to the vma->vm_flags needs to 17 * Any behaviour which results in changes to the vma->vm_flags needs to
@@ -41,7 +42,7 @@ static long madvise_behavior(struct vm_area_struct * vma,
41 struct mm_struct * mm = vma->vm_mm; 42 struct mm_struct * mm = vma->vm_mm;
42 int error = 0; 43 int error = 0;
43 pgoff_t pgoff; 44 pgoff_t pgoff;
44 int new_flags = vma->vm_flags; 45 unsigned long new_flags = vma->vm_flags;
45 46
46 switch (behavior) { 47 switch (behavior) {
47 case MADV_NORMAL: 48 case MADV_NORMAL:
@@ -57,8 +58,18 @@ static long madvise_behavior(struct vm_area_struct * vma,
57 new_flags |= VM_DONTCOPY; 58 new_flags |= VM_DONTCOPY;
58 break; 59 break;
59 case MADV_DOFORK: 60 case MADV_DOFORK:
61 if (vma->vm_flags & VM_IO) {
62 error = -EINVAL;
63 goto out;
64 }
60 new_flags &= ~VM_DONTCOPY; 65 new_flags &= ~VM_DONTCOPY;
61 break; 66 break;
67 case MADV_MERGEABLE:
68 case MADV_UNMERGEABLE:
69 error = ksm_madvise(vma, start, end, behavior, &new_flags);
70 if (error)
71 goto out;
72 break;
62 } 73 }
63 74
64 if (new_flags == vma->vm_flags) { 75 if (new_flags == vma->vm_flags) {
@@ -211,37 +222,16 @@ static long
211madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, 222madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
212 unsigned long start, unsigned long end, int behavior) 223 unsigned long start, unsigned long end, int behavior)
213{ 224{
214 long error;
215
216 switch (behavior) { 225 switch (behavior) {
217 case MADV_DOFORK:
218 if (vma->vm_flags & VM_IO) {
219 error = -EINVAL;
220 break;
221 }
222 case MADV_DONTFORK:
223 case MADV_NORMAL:
224 case MADV_SEQUENTIAL:
225 case MADV_RANDOM:
226 error = madvise_behavior(vma, prev, start, end, behavior);
227 break;
228 case MADV_REMOVE: 226 case MADV_REMOVE:
229 error = madvise_remove(vma, prev, start, end); 227 return madvise_remove(vma, prev, start, end);
230 break;
231
232 case MADV_WILLNEED: 228 case MADV_WILLNEED:
233 error = madvise_willneed(vma, prev, start, end); 229 return madvise_willneed(vma, prev, start, end);
234 break;
235
236 case MADV_DONTNEED: 230 case MADV_DONTNEED:
237 error = madvise_dontneed(vma, prev, start, end); 231 return madvise_dontneed(vma, prev, start, end);
238 break;
239
240 default: 232 default:
241 BUG(); 233 return madvise_behavior(vma, prev, start, end, behavior);
242 break;
243 } 234 }
244 return error;
245} 235}
246 236
247static int 237static int
@@ -256,12 +246,17 @@ madvise_behavior_valid(int behavior)
256 case MADV_REMOVE: 246 case MADV_REMOVE:
257 case MADV_WILLNEED: 247 case MADV_WILLNEED:
258 case MADV_DONTNEED: 248 case MADV_DONTNEED:
249#ifdef CONFIG_KSM
250 case MADV_MERGEABLE:
251 case MADV_UNMERGEABLE:
252#endif
259 return 1; 253 return 1;
260 254
261 default: 255 default:
262 return 0; 256 return 0;
263 } 257 }
264} 258}
259
265/* 260/*
266 * The madvise(2) system call. 261 * The madvise(2) system call.
267 * 262 *
@@ -286,6 +281,12 @@ madvise_behavior_valid(int behavior)
286 * so the kernel can free resources associated with it. 281 * so the kernel can free resources associated with it.
287 * MADV_REMOVE - the application wants to free up the given range of 282 * MADV_REMOVE - the application wants to free up the given range of
288 * pages and associated backing store. 283 * pages and associated backing store.
284 * MADV_DONTFORK - omit this area from child's address space when forking:
285 * typically, to avoid COWing pages pinned by get_user_pages().
286 * MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking.
287 * MADV_MERGEABLE - the application recommends that KSM try to merge pages in
288 * this area with pages of identical content from other such areas.
289 * MADV_UNMERGEABLE- cancel MADV_MERGEABLE: no longer merge pages with others.
289 * 290 *
290 * return values: 291 * return values:
291 * zero - success 292 * zero - success
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index fd4529d86de..9b10d875378 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -648,7 +648,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
648 int nid = z->zone_pgdat->node_id; 648 int nid = z->zone_pgdat->node_id;
649 int zid = zone_idx(z); 649 int zid = zone_idx(z);
650 struct mem_cgroup_per_zone *mz; 650 struct mem_cgroup_per_zone *mz;
651 int lru = LRU_FILE * !!file + !!active; 651 int lru = LRU_FILE * file + active;
652 int ret; 652 int ret;
653 653
654 BUG_ON(!mem_cont); 654 BUG_ON(!mem_cont);
diff --git a/mm/memory.c b/mm/memory.c
index e8f63d9961e..b1443ac07c0 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -45,6 +45,7 @@
45#include <linux/swap.h> 45#include <linux/swap.h>
46#include <linux/highmem.h> 46#include <linux/highmem.h>
47#include <linux/pagemap.h> 47#include <linux/pagemap.h>
48#include <linux/ksm.h>
48#include <linux/rmap.h> 49#include <linux/rmap.h>
49#include <linux/module.h> 50#include <linux/module.h>
50#include <linux/delayacct.h> 51#include <linux/delayacct.h>
@@ -107,6 +108,18 @@ static int __init disable_randmaps(char *s)
107} 108}
108__setup("norandmaps", disable_randmaps); 109__setup("norandmaps", disable_randmaps);
109 110
111unsigned long zero_pfn __read_mostly;
112unsigned long highest_memmap_pfn __read_mostly;
113
114/*
115 * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
116 */
117static int __init init_zero_pfn(void)
118{
119 zero_pfn = page_to_pfn(ZERO_PAGE(0));
120 return 0;
121}
122core_initcall(init_zero_pfn);
110 123
111/* 124/*
112 * If a p?d_bad entry is found while walking page tables, report 125 * If a p?d_bad entry is found while walking page tables, report
@@ -443,6 +456,20 @@ static inline int is_cow_mapping(unsigned int flags)
443 return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; 456 return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
444} 457}
445 458
459#ifndef is_zero_pfn
460static inline int is_zero_pfn(unsigned long pfn)
461{
462 return pfn == zero_pfn;
463}
464#endif
465
466#ifndef my_zero_pfn
467static inline unsigned long my_zero_pfn(unsigned long addr)
468{
469 return zero_pfn;
470}
471#endif
472
446/* 473/*
447 * vm_normal_page -- This function gets the "struct page" associated with a pte. 474 * vm_normal_page -- This function gets the "struct page" associated with a pte.
448 * 475 *
@@ -498,7 +525,9 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
498 if (HAVE_PTE_SPECIAL) { 525 if (HAVE_PTE_SPECIAL) {
499 if (likely(!pte_special(pte))) 526 if (likely(!pte_special(pte)))
500 goto check_pfn; 527 goto check_pfn;
501 if (!(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))) 528 if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
529 return NULL;
530 if (!is_zero_pfn(pfn))
502 print_bad_pte(vma, addr, pte, NULL); 531 print_bad_pte(vma, addr, pte, NULL);
503 return NULL; 532 return NULL;
504 } 533 }
@@ -520,6 +549,8 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
520 } 549 }
521 } 550 }
522 551
552 if (is_zero_pfn(pfn))
553 return NULL;
523check_pfn: 554check_pfn:
524 if (unlikely(pfn > highest_memmap_pfn)) { 555 if (unlikely(pfn > highest_memmap_pfn)) {
525 print_bad_pte(vma, addr, pte, NULL); 556 print_bad_pte(vma, addr, pte, NULL);
@@ -597,8 +628,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
597 page = vm_normal_page(vma, addr, pte); 628 page = vm_normal_page(vma, addr, pte);
598 if (page) { 629 if (page) {
599 get_page(page); 630 get_page(page);
600 page_dup_rmap(page, vma, addr); 631 page_dup_rmap(page);
601 rss[!!PageAnon(page)]++; 632 rss[PageAnon(page)]++;
602 } 633 }
603 634
604out_set_pte: 635out_set_pte:
@@ -1143,9 +1174,14 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
1143 goto no_page; 1174 goto no_page;
1144 if ((flags & FOLL_WRITE) && !pte_write(pte)) 1175 if ((flags & FOLL_WRITE) && !pte_write(pte))
1145 goto unlock; 1176 goto unlock;
1177
1146 page = vm_normal_page(vma, address, pte); 1178 page = vm_normal_page(vma, address, pte);
1147 if (unlikely(!page)) 1179 if (unlikely(!page)) {
1148 goto bad_page; 1180 if ((flags & FOLL_DUMP) ||
1181 !is_zero_pfn(pte_pfn(pte)))
1182 goto bad_page;
1183 page = pte_page(pte);
1184 }
1149 1185
1150 if (flags & FOLL_GET) 1186 if (flags & FOLL_GET)
1151 get_page(page); 1187 get_page(page);
@@ -1173,65 +1209,46 @@ no_page:
1173 pte_unmap_unlock(ptep, ptl); 1209 pte_unmap_unlock(ptep, ptl);
1174 if (!pte_none(pte)) 1210 if (!pte_none(pte))
1175 return page; 1211 return page;
1176 /* Fall through to ZERO_PAGE handling */ 1212
1177no_page_table: 1213no_page_table:
1178 /* 1214 /*
1179 * When core dumping an enormous anonymous area that nobody 1215 * When core dumping an enormous anonymous area that nobody
1180 * has touched so far, we don't want to allocate page tables. 1216 * has touched so far, we don't want to allocate unnecessary pages or
1217 * page tables. Return error instead of NULL to skip handle_mm_fault,
1218 * then get_dump_page() will return NULL to leave a hole in the dump.
1219 * But we can only make this optimization where a hole would surely
1220 * be zero-filled if handle_mm_fault() actually did handle it.
1181 */ 1221 */
1182 if (flags & FOLL_ANON) { 1222 if ((flags & FOLL_DUMP) &&
1183 page = ZERO_PAGE(0); 1223 (!vma->vm_ops || !vma->vm_ops->fault))
1184 if (flags & FOLL_GET) 1224 return ERR_PTR(-EFAULT);
1185 get_page(page);
1186 BUG_ON(flags & FOLL_WRITE);
1187 }
1188 return page; 1225 return page;
1189} 1226}
1190 1227
1191/* Can we do the FOLL_ANON optimization? */
1192static inline int use_zero_page(struct vm_area_struct *vma)
1193{
1194 /*
1195 * We don't want to optimize FOLL_ANON for make_pages_present()
1196 * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
1197 * we want to get the page from the page tables to make sure
1198 * that we serialize and update with any other user of that
1199 * mapping.
1200 */
1201 if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
1202 return 0;
1203 /*
1204 * And if we have a fault routine, it's not an anonymous region.
1205 */
1206 return !vma->vm_ops || !vma->vm_ops->fault;
1207}
1208
1209
1210
1211int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 1228int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1212 unsigned long start, int nr_pages, int flags, 1229 unsigned long start, int nr_pages, unsigned int gup_flags,
1213 struct page **pages, struct vm_area_struct **vmas) 1230 struct page **pages, struct vm_area_struct **vmas)
1214{ 1231{
1215 int i; 1232 int i;
1216 unsigned int vm_flags = 0; 1233 unsigned long vm_flags;
1217 int write = !!(flags & GUP_FLAGS_WRITE);
1218 int force = !!(flags & GUP_FLAGS_FORCE);
1219 int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
1220 int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL);
1221 1234
1222 if (nr_pages <= 0) 1235 if (nr_pages <= 0)
1223 return 0; 1236 return 0;
1237
1238 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
1239
1224 /* 1240 /*
1225 * Require read or write permissions. 1241 * Require read or write permissions.
1226 * If 'force' is set, we only require the "MAY" flags. 1242 * If FOLL_FORCE is set, we only require the "MAY" flags.
1227 */ 1243 */
1228 vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); 1244 vm_flags = (gup_flags & FOLL_WRITE) ?
1229 vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); 1245 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
1246 vm_flags &= (gup_flags & FOLL_FORCE) ?
1247 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
1230 i = 0; 1248 i = 0;
1231 1249
1232 do { 1250 do {
1233 struct vm_area_struct *vma; 1251 struct vm_area_struct *vma;
1234 unsigned int foll_flags;
1235 1252
1236 vma = find_extend_vma(mm, start); 1253 vma = find_extend_vma(mm, start);
1237 if (!vma && in_gate_area(tsk, start)) { 1254 if (!vma && in_gate_area(tsk, start)) {
@@ -1243,7 +1260,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1243 pte_t *pte; 1260 pte_t *pte;
1244 1261
1245 /* user gate pages are read-only */ 1262 /* user gate pages are read-only */
1246 if (!ignore && write) 1263 if (gup_flags & FOLL_WRITE)
1247 return i ? : -EFAULT; 1264 return i ? : -EFAULT;
1248 if (pg > TASK_SIZE) 1265 if (pg > TASK_SIZE)
1249 pgd = pgd_offset_k(pg); 1266 pgd = pgd_offset_k(pg);
@@ -1277,38 +1294,26 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1277 1294
1278 if (!vma || 1295 if (!vma ||
1279 (vma->vm_flags & (VM_IO | VM_PFNMAP)) || 1296 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
1280 (!ignore && !(vm_flags & vma->vm_flags))) 1297 !(vm_flags & vma->vm_flags))
1281 return i ? : -EFAULT; 1298 return i ? : -EFAULT;
1282 1299
1283 if (is_vm_hugetlb_page(vma)) { 1300 if (is_vm_hugetlb_page(vma)) {
1284 i = follow_hugetlb_page(mm, vma, pages, vmas, 1301 i = follow_hugetlb_page(mm, vma, pages, vmas,
1285 &start, &nr_pages, i, write); 1302 &start, &nr_pages, i, gup_flags);
1286 continue; 1303 continue;
1287 } 1304 }
1288 1305
1289 foll_flags = FOLL_TOUCH;
1290 if (pages)
1291 foll_flags |= FOLL_GET;
1292 if (!write && use_zero_page(vma))
1293 foll_flags |= FOLL_ANON;
1294
1295 do { 1306 do {
1296 struct page *page; 1307 struct page *page;
1308 unsigned int foll_flags = gup_flags;
1297 1309
1298 /* 1310 /*
1299 * If we have a pending SIGKILL, don't keep faulting 1311 * If we have a pending SIGKILL, don't keep faulting
1300 * pages and potentially allocating memory, unless 1312 * pages and potentially allocating memory.
1301 * current is handling munlock--e.g., on exit. In
1302 * that case, we are not allocating memory. Rather,
1303 * we're only unlocking already resident/mapped pages.
1304 */ 1313 */
1305 if (unlikely(!ignore_sigkill && 1314 if (unlikely(fatal_signal_pending(current)))
1306 fatal_signal_pending(current)))
1307 return i ? i : -ERESTARTSYS; 1315 return i ? i : -ERESTARTSYS;
1308 1316
1309 if (write)
1310 foll_flags |= FOLL_WRITE;
1311
1312 cond_resched(); 1317 cond_resched();
1313 while (!(page = follow_page(vma, start, foll_flags))) { 1318 while (!(page = follow_page(vma, start, foll_flags))) {
1314 int ret; 1319 int ret;
@@ -1419,18 +1424,47 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1419 unsigned long start, int nr_pages, int write, int force, 1424 unsigned long start, int nr_pages, int write, int force,
1420 struct page **pages, struct vm_area_struct **vmas) 1425 struct page **pages, struct vm_area_struct **vmas)
1421{ 1426{
1422 int flags = 0; 1427 int flags = FOLL_TOUCH;
1423 1428
1429 if (pages)
1430 flags |= FOLL_GET;
1424 if (write) 1431 if (write)
1425 flags |= GUP_FLAGS_WRITE; 1432 flags |= FOLL_WRITE;
1426 if (force) 1433 if (force)
1427 flags |= GUP_FLAGS_FORCE; 1434 flags |= FOLL_FORCE;
1428 1435
1429 return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas); 1436 return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
1430} 1437}
1431
1432EXPORT_SYMBOL(get_user_pages); 1438EXPORT_SYMBOL(get_user_pages);
1433 1439
1440/**
1441 * get_dump_page() - pin user page in memory while writing it to core dump
1442 * @addr: user address
1443 *
1444 * Returns struct page pointer of user page pinned for dump,
1445 * to be freed afterwards by page_cache_release() or put_page().
1446 *
1447 * Returns NULL on any kind of failure - a hole must then be inserted into
1448 * the corefile, to preserve alignment with its headers; and also returns
1449 * NULL wherever the ZERO_PAGE, or an anonymous pte_none, has been found -
1450 * allowing a hole to be left in the corefile to save diskspace.
1451 *
1452 * Called without mmap_sem, but after all other threads have been killed.
1453 */
1454#ifdef CONFIG_ELF_CORE
1455struct page *get_dump_page(unsigned long addr)
1456{
1457 struct vm_area_struct *vma;
1458 struct page *page;
1459
1460 if (__get_user_pages(current, current->mm, addr, 1,
1461 FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma) < 1)
1462 return NULL;
1463 flush_cache_page(vma, addr, page_to_pfn(page));
1464 return page;
1465}
1466#endif /* CONFIG_ELF_CORE */
1467
1434pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, 1468pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
1435 spinlock_t **ptl) 1469 spinlock_t **ptl)
1436{ 1470{
@@ -1608,7 +1642,8 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
1608 * If we don't have pte special, then we have to use the pfn_valid() 1642 * If we don't have pte special, then we have to use the pfn_valid()
1609 * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must* 1643 * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must*
1610 * refcount the page if pfn_valid is true (hence insert_page rather 1644 * refcount the page if pfn_valid is true (hence insert_page rather
1611 * than insert_pfn). 1645 * than insert_pfn). If a zero_pfn were inserted into a VM_MIXEDMAP
1646 * without pte special, it would there be refcounted as a normal page.
1612 */ 1647 */
1613 if (!HAVE_PTE_SPECIAL && pfn_valid(pfn)) { 1648 if (!HAVE_PTE_SPECIAL && pfn_valid(pfn)) {
1614 struct page *page; 1649 struct page *page;
@@ -1974,7 +2009,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1974 * Take out anonymous pages first, anonymous shared vmas are 2009 * Take out anonymous pages first, anonymous shared vmas are
1975 * not dirty accountable. 2010 * not dirty accountable.
1976 */ 2011 */
1977 if (PageAnon(old_page)) { 2012 if (PageAnon(old_page) && !PageKsm(old_page)) {
1978 if (!trylock_page(old_page)) { 2013 if (!trylock_page(old_page)) {
1979 page_cache_get(old_page); 2014 page_cache_get(old_page);
1980 pte_unmap_unlock(page_table, ptl); 2015 pte_unmap_unlock(page_table, ptl);
@@ -2075,10 +2110,19 @@ gotten:
2075 2110
2076 if (unlikely(anon_vma_prepare(vma))) 2111 if (unlikely(anon_vma_prepare(vma)))
2077 goto oom; 2112 goto oom;
2078 VM_BUG_ON(old_page == ZERO_PAGE(0)); 2113
2079 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); 2114 if (is_zero_pfn(pte_pfn(orig_pte))) {
2080 if (!new_page) 2115 new_page = alloc_zeroed_user_highpage_movable(vma, address);
2081 goto oom; 2116 if (!new_page)
2117 goto oom;
2118 } else {
2119 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
2120 if (!new_page)
2121 goto oom;
2122 cow_user_page(new_page, old_page, address, vma);
2123 }
2124 __SetPageUptodate(new_page);
2125
2082 /* 2126 /*
2083 * Don't let another task, with possibly unlocked vma, 2127 * Don't let another task, with possibly unlocked vma,
2084 * keep the mlocked page. 2128 * keep the mlocked page.
@@ -2088,8 +2132,6 @@ gotten:
2088 clear_page_mlock(old_page); 2132 clear_page_mlock(old_page);
2089 unlock_page(old_page); 2133 unlock_page(old_page);
2090 } 2134 }
2091 cow_user_page(new_page, old_page, address, vma);
2092 __SetPageUptodate(new_page);
2093 2135
2094 if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)) 2136 if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))
2095 goto oom_free_new; 2137 goto oom_free_new;
@@ -2115,9 +2157,14 @@ gotten:
2115 * seen in the presence of one thread doing SMC and another 2157 * seen in the presence of one thread doing SMC and another
2116 * thread doing COW. 2158 * thread doing COW.
2117 */ 2159 */
2118 ptep_clear_flush_notify(vma, address, page_table); 2160 ptep_clear_flush(vma, address, page_table);
2119 page_add_new_anon_rmap(new_page, vma, address); 2161 page_add_new_anon_rmap(new_page, vma, address);
2120 set_pte_at(mm, address, page_table, entry); 2162 /*
2163 * We call the notify macro here because, when using secondary
2164 * mmu page tables (such as kvm shadow page tables), we want the
2165 * new page to be mapped directly into the secondary page table.
2166 */
2167 set_pte_at_notify(mm, address, page_table, entry);
2121 update_mmu_cache(vma, address, entry); 2168 update_mmu_cache(vma, address, entry);
2122 if (old_page) { 2169 if (old_page) {
2123 /* 2170 /*
@@ -2625,6 +2672,16 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
2625 spinlock_t *ptl; 2672 spinlock_t *ptl;
2626 pte_t entry; 2673 pte_t entry;
2627 2674
2675 if (!(flags & FAULT_FLAG_WRITE)) {
2676 entry = pte_mkspecial(pfn_pte(my_zero_pfn(address),
2677 vma->vm_page_prot));
2678 ptl = pte_lockptr(mm, pmd);
2679 spin_lock(ptl);
2680 if (!pte_none(*page_table))
2681 goto unlock;
2682 goto setpte;
2683 }
2684
2628 /* Allocate our own private page. */ 2685 /* Allocate our own private page. */
2629 pte_unmap(page_table); 2686 pte_unmap(page_table);
2630 2687
@@ -2639,13 +2696,16 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
2639 goto oom_free_page; 2696 goto oom_free_page;
2640 2697
2641 entry = mk_pte(page, vma->vm_page_prot); 2698 entry = mk_pte(page, vma->vm_page_prot);
2642 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 2699 if (vma->vm_flags & VM_WRITE)
2700 entry = pte_mkwrite(pte_mkdirty(entry));
2643 2701
2644 page_table = pte_offset_map_lock(mm, pmd, address, &ptl); 2702 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
2645 if (!pte_none(*page_table)) 2703 if (!pte_none(*page_table))
2646 goto release; 2704 goto release;
2705
2647 inc_mm_counter(mm, anon_rss); 2706 inc_mm_counter(mm, anon_rss);
2648 page_add_new_anon_rmap(page, vma, address); 2707 page_add_new_anon_rmap(page, vma, address);
2708setpte:
2649 set_pte_at(mm, address, page_table, entry); 2709 set_pte_at(mm, address, page_table, entry);
2650 2710
2651 /* No need to invalidate - it was non-present before */ 2711 /* No need to invalidate - it was non-present before */
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index e4412a676c8..efe3e0ec2e6 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -339,8 +339,11 @@ EXPORT_SYMBOL_GPL(__remove_pages);
339 339
340void online_page(struct page *page) 340void online_page(struct page *page)
341{ 341{
342 unsigned long pfn = page_to_pfn(page);
343
342 totalram_pages++; 344 totalram_pages++;
343 num_physpages++; 345 if (pfn >= num_physpages)
346 num_physpages = pfn + 1;
344 347
345#ifdef CONFIG_HIGHMEM 348#ifdef CONFIG_HIGHMEM
346 if (PageHighMem(page)) 349 if (PageHighMem(page))
@@ -422,6 +425,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
422 zone->present_pages += onlined_pages; 425 zone->present_pages += onlined_pages;
423 zone->zone_pgdat->node_present_pages += onlined_pages; 426 zone->zone_pgdat->node_present_pages += onlined_pages;
424 427
428 zone_pcp_update(zone);
425 setup_per_zone_wmarks(); 429 setup_per_zone_wmarks();
426 calculate_zone_inactive_ratio(zone); 430 calculate_zone_inactive_ratio(zone);
427 if (onlined_pages) { 431 if (onlined_pages) {
@@ -831,7 +835,6 @@ repeat:
831 zone->present_pages -= offlined_pages; 835 zone->present_pages -= offlined_pages;
832 zone->zone_pgdat->node_present_pages -= offlined_pages; 836 zone->zone_pgdat->node_present_pages -= offlined_pages;
833 totalram_pages -= offlined_pages; 837 totalram_pages -= offlined_pages;
834 num_physpages -= offlined_pages;
835 838
836 setup_per_zone_wmarks(); 839 setup_per_zone_wmarks();
837 calculate_zone_inactive_ratio(zone); 840 calculate_zone_inactive_ratio(zone);
diff --git a/mm/mempool.c b/mm/mempool.c
index 32e75d40050..1a3bc3d4d55 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -308,13 +308,6 @@ void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data)
308} 308}
309EXPORT_SYMBOL(mempool_kmalloc); 309EXPORT_SYMBOL(mempool_kmalloc);
310 310
311void *mempool_kzalloc(gfp_t gfp_mask, void *pool_data)
312{
313 size_t size = (size_t)pool_data;
314 return kzalloc(size, gfp_mask);
315}
316EXPORT_SYMBOL(mempool_kzalloc);
317
318void mempool_kfree(void *element, void *pool_data) 311void mempool_kfree(void *element, void *pool_data)
319{ 312{
320 kfree(element); 313 kfree(element);
diff --git a/mm/migrate.c b/mm/migrate.c
index 939888f9dda..16052e80aaa 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -67,6 +67,8 @@ int putback_lru_pages(struct list_head *l)
67 67
68 list_for_each_entry_safe(page, page2, l, lru) { 68 list_for_each_entry_safe(page, page2, l, lru) {
69 list_del(&page->lru); 69 list_del(&page->lru);
70 dec_zone_page_state(page, NR_ISOLATED_ANON +
71 page_is_file_cache(page));
70 putback_lru_page(page); 72 putback_lru_page(page);
71 count++; 73 count++;
72 } 74 }
@@ -147,7 +149,7 @@ out:
147static void remove_file_migration_ptes(struct page *old, struct page *new) 149static void remove_file_migration_ptes(struct page *old, struct page *new)
148{ 150{
149 struct vm_area_struct *vma; 151 struct vm_area_struct *vma;
150 struct address_space *mapping = page_mapping(new); 152 struct address_space *mapping = new->mapping;
151 struct prio_tree_iter iter; 153 struct prio_tree_iter iter;
152 pgoff_t pgoff = new->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); 154 pgoff_t pgoff = new->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
153 155
@@ -270,7 +272,7 @@ static int migrate_page_move_mapping(struct address_space *mapping,
270 pslot = radix_tree_lookup_slot(&mapping->page_tree, 272 pslot = radix_tree_lookup_slot(&mapping->page_tree,
271 page_index(page)); 273 page_index(page));
272 274
273 expected_count = 2 + !!page_has_private(page); 275 expected_count = 2 + page_has_private(page);
274 if (page_count(page) != expected_count || 276 if (page_count(page) != expected_count ||
275 (struct page *)radix_tree_deref_slot(pslot) != page) { 277 (struct page *)radix_tree_deref_slot(pslot) != page) {
276 spin_unlock_irq(&mapping->tree_lock); 278 spin_unlock_irq(&mapping->tree_lock);
@@ -312,7 +314,10 @@ static int migrate_page_move_mapping(struct address_space *mapping,
312 */ 314 */
313 __dec_zone_page_state(page, NR_FILE_PAGES); 315 __dec_zone_page_state(page, NR_FILE_PAGES);
314 __inc_zone_page_state(newpage, NR_FILE_PAGES); 316 __inc_zone_page_state(newpage, NR_FILE_PAGES);
315 317 if (PageSwapBacked(page)) {
318 __dec_zone_page_state(page, NR_SHMEM);
319 __inc_zone_page_state(newpage, NR_SHMEM);
320 }
316 spin_unlock_irq(&mapping->tree_lock); 321 spin_unlock_irq(&mapping->tree_lock);
317 322
318 return 0; 323 return 0;
@@ -664,13 +669,15 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
664 * needs to be effective. 669 * needs to be effective.
665 */ 670 */
666 try_to_free_buffers(page); 671 try_to_free_buffers(page);
672 goto rcu_unlock;
667 } 673 }
668 goto rcu_unlock; 674 goto skip_unmap;
669 } 675 }
670 676
671 /* Establish migration ptes or remove ptes */ 677 /* Establish migration ptes or remove ptes */
672 try_to_unmap(page, 1); 678 try_to_unmap(page, 1);
673 679
680skip_unmap:
674 if (!page_mapped(page)) 681 if (!page_mapped(page))
675 rc = move_to_new_page(newpage, page); 682 rc = move_to_new_page(newpage, page);
676 683
@@ -693,6 +700,8 @@ unlock:
693 * restored. 700 * restored.
694 */ 701 */
695 list_del(&page->lru); 702 list_del(&page->lru);
703 dec_zone_page_state(page, NR_ISOLATED_ANON +
704 page_is_file_cache(page));
696 putback_lru_page(page); 705 putback_lru_page(page);
697 } 706 }
698 707
@@ -737,6 +746,13 @@ int migrate_pages(struct list_head *from,
737 struct page *page2; 746 struct page *page2;
738 int swapwrite = current->flags & PF_SWAPWRITE; 747 int swapwrite = current->flags & PF_SWAPWRITE;
739 int rc; 748 int rc;
749 unsigned long flags;
750
751 local_irq_save(flags);
752 list_for_each_entry(page, from, lru)
753 __inc_zone_page_state(page, NR_ISOLATED_ANON +
754 page_is_file_cache(page));
755 local_irq_restore(flags);
740 756
741 if (!swapwrite) 757 if (!swapwrite)
742 current->flags |= PF_SWAPWRITE; 758 current->flags |= PF_SWAPWRITE;
diff --git a/mm/mlock.c b/mm/mlock.c
index 45eb650b965..bd6f0e466f6 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -139,49 +139,36 @@ static void munlock_vma_page(struct page *page)
139} 139}
140 140
141/** 141/**
142 * __mlock_vma_pages_range() - mlock/munlock a range of pages in the vma. 142 * __mlock_vma_pages_range() - mlock a range of pages in the vma.
143 * @vma: target vma 143 * @vma: target vma
144 * @start: start address 144 * @start: start address
145 * @end: end address 145 * @end: end address
146 * @mlock: 0 indicate munlock, otherwise mlock.
147 * 146 *
148 * If @mlock == 0, unlock an mlocked range; 147 * This takes care of making the pages present too.
149 * else mlock the range of pages. This takes care of making the pages present ,
150 * too.
151 * 148 *
152 * return 0 on success, negative error code on error. 149 * return 0 on success, negative error code on error.
153 * 150 *
154 * vma->vm_mm->mmap_sem must be held for at least read. 151 * vma->vm_mm->mmap_sem must be held for at least read.
155 */ 152 */
156static long __mlock_vma_pages_range(struct vm_area_struct *vma, 153static long __mlock_vma_pages_range(struct vm_area_struct *vma,
157 unsigned long start, unsigned long end, 154 unsigned long start, unsigned long end)
158 int mlock)
159{ 155{
160 struct mm_struct *mm = vma->vm_mm; 156 struct mm_struct *mm = vma->vm_mm;
161 unsigned long addr = start; 157 unsigned long addr = start;
162 struct page *pages[16]; /* 16 gives a reasonable batch */ 158 struct page *pages[16]; /* 16 gives a reasonable batch */
163 int nr_pages = (end - start) / PAGE_SIZE; 159 int nr_pages = (end - start) / PAGE_SIZE;
164 int ret = 0; 160 int ret = 0;
165 int gup_flags = 0; 161 int gup_flags;
166 162
167 VM_BUG_ON(start & ~PAGE_MASK); 163 VM_BUG_ON(start & ~PAGE_MASK);
168 VM_BUG_ON(end & ~PAGE_MASK); 164 VM_BUG_ON(end & ~PAGE_MASK);
169 VM_BUG_ON(start < vma->vm_start); 165 VM_BUG_ON(start < vma->vm_start);
170 VM_BUG_ON(end > vma->vm_end); 166 VM_BUG_ON(end > vma->vm_end);
171 VM_BUG_ON((!rwsem_is_locked(&mm->mmap_sem)) && 167 VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
172 (atomic_read(&mm->mm_users) != 0));
173
174 /*
175 * mlock: don't page populate if vma has PROT_NONE permission.
176 * munlock: always do munlock although the vma has PROT_NONE
177 * permission, or SIGKILL is pending.
178 */
179 if (!mlock)
180 gup_flags |= GUP_FLAGS_IGNORE_VMA_PERMISSIONS |
181 GUP_FLAGS_IGNORE_SIGKILL;
182 168
169 gup_flags = FOLL_TOUCH | FOLL_GET;
183 if (vma->vm_flags & VM_WRITE) 170 if (vma->vm_flags & VM_WRITE)
184 gup_flags |= GUP_FLAGS_WRITE; 171 gup_flags |= FOLL_WRITE;
185 172
186 while (nr_pages > 0) { 173 while (nr_pages > 0) {
187 int i; 174 int i;
@@ -201,51 +188,45 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
201 * This can happen for, e.g., VM_NONLINEAR regions before 188 * This can happen for, e.g., VM_NONLINEAR regions before
202 * a page has been allocated and mapped at a given offset, 189 * a page has been allocated and mapped at a given offset,
203 * or for addresses that map beyond end of a file. 190 * or for addresses that map beyond end of a file.
204 * We'll mlock the the pages if/when they get faulted in. 191 * We'll mlock the pages if/when they get faulted in.
205 */ 192 */
206 if (ret < 0) 193 if (ret < 0)
207 break; 194 break;
208 if (ret == 0) {
209 /*
210 * We know the vma is there, so the only time
211 * we cannot get a single page should be an
212 * error (ret < 0) case.
213 */
214 WARN_ON(1);
215 break;
216 }
217 195
218 lru_add_drain(); /* push cached pages to LRU */ 196 lru_add_drain(); /* push cached pages to LRU */
219 197
220 for (i = 0; i < ret; i++) { 198 for (i = 0; i < ret; i++) {
221 struct page *page = pages[i]; 199 struct page *page = pages[i];
222 200
223 lock_page(page);
224 /*
225 * Because we lock page here and migration is blocked
226 * by the elevated reference, we need only check for
227 * page truncation (file-cache only).
228 */
229 if (page->mapping) { 201 if (page->mapping) {
230 if (mlock) 202 /*
203 * That preliminary check is mainly to avoid
204 * the pointless overhead of lock_page on the
205 * ZERO_PAGE: which might bounce very badly if
206 * there is contention. However, we're still
207 * dirtying its cacheline with get/put_page:
208 * we'll add another __get_user_pages flag to
209 * avoid it if that case turns out to matter.
210 */
211 lock_page(page);
212 /*
213 * Because we lock page here and migration is
214 * blocked by the elevated reference, we need
215 * only check for file-cache page truncation.
216 */
217 if (page->mapping)
231 mlock_vma_page(page); 218 mlock_vma_page(page);
232 else 219 unlock_page(page);
233 munlock_vma_page(page);
234 } 220 }
235 unlock_page(page); 221 put_page(page); /* ref from get_user_pages() */
236 put_page(page); /* ref from get_user_pages() */
237
238 /*
239 * here we assume that get_user_pages() has given us
240 * a list of virtually contiguous pages.
241 */
242 addr += PAGE_SIZE; /* for next get_user_pages() */
243 nr_pages--;
244 } 222 }
223
224 addr += ret * PAGE_SIZE;
225 nr_pages -= ret;
245 ret = 0; 226 ret = 0;
246 } 227 }
247 228
248 return ret; /* count entire vma as locked_vm */ 229 return ret; /* 0 or negative error code */
249} 230}
250 231
251/* 232/*
@@ -289,7 +270,7 @@ long mlock_vma_pages_range(struct vm_area_struct *vma,
289 is_vm_hugetlb_page(vma) || 270 is_vm_hugetlb_page(vma) ||
290 vma == get_gate_vma(current))) { 271 vma == get_gate_vma(current))) {
291 272
292 __mlock_vma_pages_range(vma, start, end, 1); 273 __mlock_vma_pages_range(vma, start, end);
293 274
294 /* Hide errors from mmap() and other callers */ 275 /* Hide errors from mmap() and other callers */
295 return 0; 276 return 0;
@@ -310,7 +291,6 @@ no_mlock:
310 return nr_pages; /* error or pages NOT mlocked */ 291 return nr_pages; /* error or pages NOT mlocked */
311} 292}
312 293
313
314/* 294/*
315 * munlock_vma_pages_range() - munlock all pages in the vma range.' 295 * munlock_vma_pages_range() - munlock all pages in the vma range.'
316 * @vma - vma containing range to be munlock()ed. 296 * @vma - vma containing range to be munlock()ed.
@@ -330,10 +310,38 @@ no_mlock:
330 * free them. This will result in freeing mlocked pages. 310 * free them. This will result in freeing mlocked pages.
331 */ 311 */
332void munlock_vma_pages_range(struct vm_area_struct *vma, 312void munlock_vma_pages_range(struct vm_area_struct *vma,
333 unsigned long start, unsigned long end) 313 unsigned long start, unsigned long end)
334{ 314{
315 unsigned long addr;
316
317 lru_add_drain();
335 vma->vm_flags &= ~VM_LOCKED; 318 vma->vm_flags &= ~VM_LOCKED;
336 __mlock_vma_pages_range(vma, start, end, 0); 319
320 for (addr = start; addr < end; addr += PAGE_SIZE) {
321 struct page *page;
322 /*
323 * Although FOLL_DUMP is intended for get_dump_page(),
324 * it just so happens that its special treatment of the
325 * ZERO_PAGE (returning an error instead of doing get_page)
326 * suits munlock very well (and if somehow an abnormal page
327 * has sneaked into the range, we won't oops here: great).
328 */
329 page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP);
330 if (page && !IS_ERR(page)) {
331 lock_page(page);
332 /*
333 * Like in __mlock_vma_pages_range(),
334 * because we lock page here and migration is
335 * blocked by the elevated reference, we need
336 * only check for file-cache page truncation.
337 */
338 if (page->mapping)
339 munlock_vma_page(page);
340 unlock_page(page);
341 put_page(page);
342 }
343 cond_resched();
344 }
337} 345}
338 346
339/* 347/*
@@ -400,18 +408,14 @@ success:
400 * It's okay if try_to_unmap_one unmaps a page just after we 408 * It's okay if try_to_unmap_one unmaps a page just after we
401 * set VM_LOCKED, __mlock_vma_pages_range will bring it back. 409 * set VM_LOCKED, __mlock_vma_pages_range will bring it back.
402 */ 410 */
403 vma->vm_flags = newflags;
404 411
405 if (lock) { 412 if (lock) {
406 ret = __mlock_vma_pages_range(vma, start, end, 1); 413 vma->vm_flags = newflags;
407 414 ret = __mlock_vma_pages_range(vma, start, end);
408 if (ret > 0) { 415 if (ret < 0)
409 mm->locked_vm -= ret; 416 ret = __mlock_posix_error_return(ret);
410 ret = 0;
411 } else
412 ret = __mlock_posix_error_return(ret); /* translate if needed */
413 } else { 417 } else {
414 __mlock_vma_pages_range(vma, start, end, 0); 418 munlock_vma_pages_range(vma, start, end);
415 } 419 }
416 420
417out: 421out:
diff --git a/mm/mmap.c b/mm/mmap.c
index 376492ed08f..21d4029a07b 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -570,9 +570,9 @@ again: remove_next = 1 + (end > next->vm_end);
570 570
571 /* 571 /*
572 * When changing only vma->vm_end, we don't really need 572 * When changing only vma->vm_end, we don't really need
573 * anon_vma lock: but is that case worth optimizing out? 573 * anon_vma lock.
574 */ 574 */
575 if (vma->anon_vma) 575 if (vma->anon_vma && (insert || importer || start != vma->vm_start))
576 anon_vma = vma->anon_vma; 576 anon_vma = vma->anon_vma;
577 if (anon_vma) { 577 if (anon_vma) {
578 spin_lock(&anon_vma->lock); 578 spin_lock(&anon_vma->lock);
@@ -656,9 +656,6 @@ again: remove_next = 1 + (end > next->vm_end);
656 validate_mm(mm); 656 validate_mm(mm);
657} 657}
658 658
659/* Flags that can be inherited from an existing mapping when merging */
660#define VM_MERGEABLE_FLAGS (VM_CAN_NONLINEAR)
661
662/* 659/*
663 * If the vma has a ->close operation then the driver probably needs to release 660 * If the vma has a ->close operation then the driver probably needs to release
664 * per-vma resources, so we don't attempt to merge those. 661 * per-vma resources, so we don't attempt to merge those.
@@ -666,7 +663,8 @@ again: remove_next = 1 + (end > next->vm_end);
666static inline int is_mergeable_vma(struct vm_area_struct *vma, 663static inline int is_mergeable_vma(struct vm_area_struct *vma,
667 struct file *file, unsigned long vm_flags) 664 struct file *file, unsigned long vm_flags)
668{ 665{
669 if ((vma->vm_flags ^ vm_flags) & ~VM_MERGEABLE_FLAGS) 666 /* VM_CAN_NONLINEAR may get set later by f_op->mmap() */
667 if ((vma->vm_flags ^ vm_flags) & ~VM_CAN_NONLINEAR)
670 return 0; 668 return 0;
671 if (vma->vm_file != file) 669 if (vma->vm_file != file)
672 return 0; 670 return 0;
@@ -951,6 +949,24 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
951 if (mm->map_count > sysctl_max_map_count) 949 if (mm->map_count > sysctl_max_map_count)
952 return -ENOMEM; 950 return -ENOMEM;
953 951
952 if (flags & MAP_HUGETLB) {
953 struct user_struct *user = NULL;
954 if (file)
955 return -EINVAL;
956
957 /*
958 * VM_NORESERVE is used because the reservations will be
959 * taken when vm_ops->mmap() is called
960 * A dummy user value is used because we are not locking
961 * memory so no accounting is necessary
962 */
963 len = ALIGN(len, huge_page_size(&default_hstate));
964 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE,
965 &user, HUGETLB_ANONHUGE_INODE);
966 if (IS_ERR(file))
967 return PTR_ERR(file);
968 }
969
954 /* Obtain the address to map to. we verify (or select) it and ensure 970 /* Obtain the address to map to. we verify (or select) it and ensure
955 * that it represents a valid section of the address space. 971 * that it represents a valid section of the address space.
956 */ 972 */
@@ -965,11 +981,9 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
965 vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) | 981 vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
966 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; 982 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
967 983
968 if (flags & MAP_LOCKED) { 984 if (flags & MAP_LOCKED)
969 if (!can_do_mlock()) 985 if (!can_do_mlock())
970 return -EPERM; 986 return -EPERM;
971 vm_flags |= VM_LOCKED;
972 }
973 987
974 /* mlock MCL_FUTURE? */ 988 /* mlock MCL_FUTURE? */
975 if (vm_flags & VM_LOCKED) { 989 if (vm_flags & VM_LOCKED) {
@@ -1195,21 +1209,21 @@ munmap_back:
1195 goto unmap_and_free_vma; 1209 goto unmap_and_free_vma;
1196 if (vm_flags & VM_EXECUTABLE) 1210 if (vm_flags & VM_EXECUTABLE)
1197 added_exe_file_vma(mm); 1211 added_exe_file_vma(mm);
1212
1213 /* Can addr have changed??
1214 *
1215 * Answer: Yes, several device drivers can do it in their
1216 * f_op->mmap method. -DaveM
1217 */
1218 addr = vma->vm_start;
1219 pgoff = vma->vm_pgoff;
1220 vm_flags = vma->vm_flags;
1198 } else if (vm_flags & VM_SHARED) { 1221 } else if (vm_flags & VM_SHARED) {
1199 error = shmem_zero_setup(vma); 1222 error = shmem_zero_setup(vma);
1200 if (error) 1223 if (error)
1201 goto free_vma; 1224 goto free_vma;
1202 } 1225 }
1203 1226
1204 /* Can addr have changed??
1205 *
1206 * Answer: Yes, several device drivers can do it in their
1207 * f_op->mmap method. -DaveM
1208 */
1209 addr = vma->vm_start;
1210 pgoff = vma->vm_pgoff;
1211 vm_flags = vma->vm_flags;
1212
1213 if (vma_wants_writenotify(vma)) 1227 if (vma_wants_writenotify(vma))
1214 vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED); 1228 vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
1215 1229
@@ -2111,6 +2125,7 @@ void exit_mmap(struct mm_struct *mm)
2111 /* Use -1 here to ensure all VMAs in the mm are unmapped */ 2125 /* Use -1 here to ensure all VMAs in the mm are unmapped */
2112 end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL); 2126 end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
2113 vm_unacct_memory(nr_accounted); 2127 vm_unacct_memory(nr_accounted);
2128
2114 free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0); 2129 free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
2115 tlb_finish_mmu(tlb, 0, end); 2130 tlb_finish_mmu(tlb, 0, end);
2116 2131
diff --git a/mm/mmu_context.c b/mm/mmu_context.c
new file mode 100644
index 00000000000..ded9081f402
--- /dev/null
+++ b/mm/mmu_context.c
@@ -0,0 +1,58 @@
1/* Copyright (C) 2009 Red Hat, Inc.
2 *
3 * See ../COPYING for licensing terms.
4 */
5
6#include <linux/mm.h>
7#include <linux/mmu_context.h>
8#include <linux/sched.h>
9
10#include <asm/mmu_context.h>
11
12/*
13 * use_mm
14 * Makes the calling kernel thread take on the specified
15 * mm context.
16 * Called by the retry thread execute retries within the
17 * iocb issuer's mm context, so that copy_from/to_user
18 * operations work seamlessly for aio.
19 * (Note: this routine is intended to be called only
20 * from a kernel thread context)
21 */
22void use_mm(struct mm_struct *mm)
23{
24 struct mm_struct *active_mm;
25 struct task_struct *tsk = current;
26
27 task_lock(tsk);
28 active_mm = tsk->active_mm;
29 if (active_mm != mm) {
30 atomic_inc(&mm->mm_count);
31 tsk->active_mm = mm;
32 }
33 tsk->mm = mm;
34 switch_mm(active_mm, mm, tsk);
35 task_unlock(tsk);
36
37 if (active_mm != mm)
38 mmdrop(active_mm);
39}
40
41/*
42 * unuse_mm
43 * Reverses the effect of use_mm, i.e. releases the
44 * specified mm context which was earlier taken on
45 * by the calling kernel thread
46 * (Note: this routine is intended to be called only
47 * from a kernel thread context)
48 */
49void unuse_mm(struct mm_struct *mm)
50{
51 struct task_struct *tsk = current;
52
53 task_lock(tsk);
54 tsk->mm = NULL;
55 /* active_mm is still 'mm' */
56 enter_lazy_tlb(mm, tsk);
57 task_unlock(tsk);
58}
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 5f4ef0250be..7e33f2cb3c7 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -99,6 +99,26 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
99 return young; 99 return young;
100} 100}
101 101
102void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
103 pte_t pte)
104{
105 struct mmu_notifier *mn;
106 struct hlist_node *n;
107
108 rcu_read_lock();
109 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
110 if (mn->ops->change_pte)
111 mn->ops->change_pte(mn, mm, address, pte);
112 /*
113 * Some drivers don't have change_pte,
114 * so we must call invalidate_page in that case.
115 */
116 else if (mn->ops->invalidate_page)
117 mn->ops->invalidate_page(mn, mm, address);
118 }
119 rcu_read_unlock();
120}
121
102void __mmu_notifier_invalidate_page(struct mm_struct *mm, 122void __mmu_notifier_invalidate_page(struct mm_struct *mm,
103 unsigned long address) 123 unsigned long address)
104{ 124{
diff --git a/mm/mremap.c b/mm/mremap.c
index a39b7b91be4..20a07dba6be 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -11,6 +11,7 @@
11#include <linux/hugetlb.h> 11#include <linux/hugetlb.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/shm.h> 13#include <linux/shm.h>
14#include <linux/ksm.h>
14#include <linux/mman.h> 15#include <linux/mman.h>
15#include <linux/swap.h> 16#include <linux/swap.h>
16#include <linux/capability.h> 17#include <linux/capability.h>
@@ -174,6 +175,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
174 unsigned long excess = 0; 175 unsigned long excess = 0;
175 unsigned long hiwater_vm; 176 unsigned long hiwater_vm;
176 int split = 0; 177 int split = 0;
178 int err;
177 179
178 /* 180 /*
179 * We'd prefer to avoid failure later on in do_munmap: 181 * We'd prefer to avoid failure later on in do_munmap:
@@ -182,6 +184,18 @@ static unsigned long move_vma(struct vm_area_struct *vma,
182 if (mm->map_count >= sysctl_max_map_count - 3) 184 if (mm->map_count >= sysctl_max_map_count - 3)
183 return -ENOMEM; 185 return -ENOMEM;
184 186
187 /*
188 * Advise KSM to break any KSM pages in the area to be moved:
189 * it would be confusing if they were to turn up at the new
190 * location, where they happen to coincide with different KSM
191 * pages recently unmapped. But leave vma->vm_flags as it was,
192 * so KSM can come around to merge on vma and new_vma afterwards.
193 */
194 err = ksm_madvise(vma, old_addr, old_addr + old_len,
195 MADV_UNMERGEABLE, &vm_flags);
196 if (err)
197 return err;
198
185 new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT); 199 new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
186 new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff); 200 new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff);
187 if (!new_vma) 201 if (!new_vma)
diff --git a/mm/nommu.c b/mm/nommu.c
index 66e81e7e9fe..1a4473faac4 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -33,6 +33,7 @@
33#include <asm/uaccess.h> 33#include <asm/uaccess.h>
34#include <asm/tlb.h> 34#include <asm/tlb.h>
35#include <asm/tlbflush.h> 35#include <asm/tlbflush.h>
36#include <asm/mmu_context.h>
36#include "internal.h" 37#include "internal.h"
37 38
38static inline __attribute__((format(printf, 1, 2))) 39static inline __attribute__((format(printf, 1, 2)))
@@ -56,8 +57,6 @@ void no_printk(const char *fmt, ...)
56 no_printk(KERN_DEBUG FMT"\n", ##__VA_ARGS__) 57 no_printk(KERN_DEBUG FMT"\n", ##__VA_ARGS__)
57#endif 58#endif
58 59
59#include "internal.h"
60
61void *high_memory; 60void *high_memory;
62struct page *mem_map; 61struct page *mem_map;
63unsigned long max_mapnr; 62unsigned long max_mapnr;
@@ -170,21 +169,20 @@ unsigned int kobjsize(const void *objp)
170} 169}
171 170
172int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 171int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
173 unsigned long start, int nr_pages, int flags, 172 unsigned long start, int nr_pages, int foll_flags,
174 struct page **pages, struct vm_area_struct **vmas) 173 struct page **pages, struct vm_area_struct **vmas)
175{ 174{
176 struct vm_area_struct *vma; 175 struct vm_area_struct *vma;
177 unsigned long vm_flags; 176 unsigned long vm_flags;
178 int i; 177 int i;
179 int write = !!(flags & GUP_FLAGS_WRITE);
180 int force = !!(flags & GUP_FLAGS_FORCE);
181 int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
182 178
183 /* calculate required read or write permissions. 179 /* calculate required read or write permissions.
184 * - if 'force' is set, we only require the "MAY" flags. 180 * If FOLL_FORCE is set, we only require the "MAY" flags.
185 */ 181 */
186 vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); 182 vm_flags = (foll_flags & FOLL_WRITE) ?
187 vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); 183 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
184 vm_flags &= (foll_flags & FOLL_FORCE) ?
185 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
188 186
189 for (i = 0; i < nr_pages; i++) { 187 for (i = 0; i < nr_pages; i++) {
190 vma = find_vma(mm, start); 188 vma = find_vma(mm, start);
@@ -192,8 +190,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
192 goto finish_or_fault; 190 goto finish_or_fault;
193 191
194 /* protect what we can, including chardevs */ 192 /* protect what we can, including chardevs */
195 if (vma->vm_flags & (VM_IO | VM_PFNMAP) || 193 if ((vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
196 (!ignore && !(vm_flags & vma->vm_flags))) 194 !(vm_flags & vma->vm_flags))
197 goto finish_or_fault; 195 goto finish_or_fault;
198 196
199 if (pages) { 197 if (pages) {
@@ -212,7 +210,6 @@ finish_or_fault:
212 return i ? : -EFAULT; 210 return i ? : -EFAULT;
213} 211}
214 212
215
216/* 213/*
217 * get a list of pages in an address range belonging to the specified process 214 * get a list of pages in an address range belonging to the specified process
218 * and indicate the VMA that covers each page 215 * and indicate the VMA that covers each page
@@ -227,9 +224,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
227 int flags = 0; 224 int flags = 0;
228 225
229 if (write) 226 if (write)
230 flags |= GUP_FLAGS_WRITE; 227 flags |= FOLL_WRITE;
231 if (force) 228 if (force)
232 flags |= GUP_FLAGS_FORCE; 229 flags |= FOLL_FORCE;
233 230
234 return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas); 231 return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
235} 232}
@@ -627,6 +624,22 @@ static void put_nommu_region(struct vm_region *region)
627} 624}
628 625
629/* 626/*
627 * update protection on a vma
628 */
629static void protect_vma(struct vm_area_struct *vma, unsigned long flags)
630{
631#ifdef CONFIG_MPU
632 struct mm_struct *mm = vma->vm_mm;
633 long start = vma->vm_start & PAGE_MASK;
634 while (start < vma->vm_end) {
635 protect_page(mm, start, flags);
636 start += PAGE_SIZE;
637 }
638 update_protections(mm);
639#endif
640}
641
642/*
630 * add a VMA into a process's mm_struct in the appropriate place in the list 643 * add a VMA into a process's mm_struct in the appropriate place in the list
631 * and tree and add to the address space's page tree also if not an anonymous 644 * and tree and add to the address space's page tree also if not an anonymous
632 * page 645 * page
@@ -645,6 +658,8 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
645 mm->map_count++; 658 mm->map_count++;
646 vma->vm_mm = mm; 659 vma->vm_mm = mm;
647 660
661 protect_vma(vma, vma->vm_flags);
662
648 /* add the VMA to the mapping */ 663 /* add the VMA to the mapping */
649 if (vma->vm_file) { 664 if (vma->vm_file) {
650 mapping = vma->vm_file->f_mapping; 665 mapping = vma->vm_file->f_mapping;
@@ -707,6 +722,8 @@ static void delete_vma_from_mm(struct vm_area_struct *vma)
707 722
708 kenter("%p", vma); 723 kenter("%p", vma);
709 724
725 protect_vma(vma, 0);
726
710 mm->map_count--; 727 mm->map_count--;
711 if (mm->mmap_cache == vma) 728 if (mm->mmap_cache == vma)
712 mm->mmap_cache = NULL; 729 mm->mmap_cache = NULL;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index a7b2460e922..ea2147dabba 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -34,6 +34,23 @@ int sysctl_oom_dump_tasks;
34static DEFINE_SPINLOCK(zone_scan_lock); 34static DEFINE_SPINLOCK(zone_scan_lock);
35/* #define DEBUG */ 35/* #define DEBUG */
36 36
37/*
38 * Is all threads of the target process nodes overlap ours?
39 */
40static int has_intersects_mems_allowed(struct task_struct *tsk)
41{
42 struct task_struct *t;
43
44 t = tsk;
45 do {
46 if (cpuset_mems_allowed_intersects(current, t))
47 return 1;
48 t = next_thread(t);
49 } while (t != tsk);
50
51 return 0;
52}
53
37/** 54/**
38 * badness - calculate a numeric value for how bad this task has been 55 * badness - calculate a numeric value for how bad this task has been
39 * @p: task struct of which task we should calculate 56 * @p: task struct of which task we should calculate
@@ -58,6 +75,13 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
58 unsigned long points, cpu_time, run_time; 75 unsigned long points, cpu_time, run_time;
59 struct mm_struct *mm; 76 struct mm_struct *mm;
60 struct task_struct *child; 77 struct task_struct *child;
78 int oom_adj = p->signal->oom_adj;
79 struct task_cputime task_time;
80 unsigned long utime;
81 unsigned long stime;
82
83 if (oom_adj == OOM_DISABLE)
84 return 0;
61 85
62 task_lock(p); 86 task_lock(p);
63 mm = p->mm; 87 mm = p->mm;
@@ -79,7 +103,7 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
79 /* 103 /*
80 * swapoff can easily use up all memory, so kill those first. 104 * swapoff can easily use up all memory, so kill those first.
81 */ 105 */
82 if (p->flags & PF_SWAPOFF) 106 if (p->flags & PF_OOM_ORIGIN)
83 return ULONG_MAX; 107 return ULONG_MAX;
84 108
85 /* 109 /*
@@ -102,8 +126,11 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
102 * of seconds. There is no particular reason for this other than 126 * of seconds. There is no particular reason for this other than
103 * that it turned out to work very well in practice. 127 * that it turned out to work very well in practice.
104 */ 128 */
105 cpu_time = (cputime_to_jiffies(p->utime) + cputime_to_jiffies(p->stime)) 129 thread_group_cputime(p, &task_time);
106 >> (SHIFT_HZ + 3); 130 utime = cputime_to_jiffies(task_time.utime);
131 stime = cputime_to_jiffies(task_time.stime);
132 cpu_time = (utime + stime) >> (SHIFT_HZ + 3);
133
107 134
108 if (uptime >= p->start_time.tv_sec) 135 if (uptime >= p->start_time.tv_sec)
109 run_time = (uptime - p->start_time.tv_sec) >> 10; 136 run_time = (uptime - p->start_time.tv_sec) >> 10;
@@ -144,19 +171,19 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
144 * because p may have allocated or otherwise mapped memory on 171 * because p may have allocated or otherwise mapped memory on
145 * this node before. However it will be less likely. 172 * this node before. However it will be less likely.
146 */ 173 */
147 if (!cpuset_mems_allowed_intersects(current, p)) 174 if (!has_intersects_mems_allowed(p))
148 points /= 8; 175 points /= 8;
149 176
150 /* 177 /*
151 * Adjust the score by oomkilladj. 178 * Adjust the score by oom_adj.
152 */ 179 */
153 if (p->oomkilladj) { 180 if (oom_adj) {
154 if (p->oomkilladj > 0) { 181 if (oom_adj > 0) {
155 if (!points) 182 if (!points)
156 points = 1; 183 points = 1;
157 points <<= p->oomkilladj; 184 points <<= oom_adj;
158 } else 185 } else
159 points >>= -(p->oomkilladj); 186 points >>= -(oom_adj);
160 } 187 }
161 188
162#ifdef DEBUG 189#ifdef DEBUG
@@ -200,13 +227,13 @@ static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist,
200static struct task_struct *select_bad_process(unsigned long *ppoints, 227static struct task_struct *select_bad_process(unsigned long *ppoints,
201 struct mem_cgroup *mem) 228 struct mem_cgroup *mem)
202{ 229{
203 struct task_struct *g, *p; 230 struct task_struct *p;
204 struct task_struct *chosen = NULL; 231 struct task_struct *chosen = NULL;
205 struct timespec uptime; 232 struct timespec uptime;
206 *ppoints = 0; 233 *ppoints = 0;
207 234
208 do_posix_clock_monotonic_gettime(&uptime); 235 do_posix_clock_monotonic_gettime(&uptime);
209 do_each_thread(g, p) { 236 for_each_process(p) {
210 unsigned long points; 237 unsigned long points;
211 238
212 /* 239 /*
@@ -251,7 +278,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints,
251 *ppoints = ULONG_MAX; 278 *ppoints = ULONG_MAX;
252 } 279 }
253 280
254 if (p->oomkilladj == OOM_DISABLE) 281 if (p->signal->oom_adj == OOM_DISABLE)
255 continue; 282 continue;
256 283
257 points = badness(p, uptime.tv_sec); 284 points = badness(p, uptime.tv_sec);
@@ -259,7 +286,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints,
259 chosen = p; 286 chosen = p;
260 *ppoints = points; 287 *ppoints = points;
261 } 288 }
262 } while_each_thread(g, p); 289 }
263 290
264 return chosen; 291 return chosen;
265} 292}
@@ -304,7 +331,7 @@ static void dump_tasks(const struct mem_cgroup *mem)
304 } 331 }
305 printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", 332 printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n",
306 p->pid, __task_cred(p)->uid, p->tgid, mm->total_vm, 333 p->pid, __task_cred(p)->uid, p->tgid, mm->total_vm,
307 get_mm_rss(mm), (int)task_cpu(p), p->oomkilladj, 334 get_mm_rss(mm), (int)task_cpu(p), p->signal->oom_adj,
308 p->comm); 335 p->comm);
309 task_unlock(p); 336 task_unlock(p);
310 } while_each_thread(g, p); 337 } while_each_thread(g, p);
@@ -346,11 +373,6 @@ static void __oom_kill_task(struct task_struct *p, int verbose)
346 373
347static int oom_kill_task(struct task_struct *p) 374static int oom_kill_task(struct task_struct *p)
348{ 375{
349 struct mm_struct *mm;
350 struct task_struct *g, *q;
351
352 mm = p->mm;
353
354 /* WARNING: mm may not be dereferenced since we did not obtain its 376 /* WARNING: mm may not be dereferenced since we did not obtain its
355 * value from get_task_mm(p). This is OK since all we need to do is 377 * value from get_task_mm(p). This is OK since all we need to do is
356 * compare mm to q->mm below. 378 * compare mm to q->mm below.
@@ -359,30 +381,11 @@ static int oom_kill_task(struct task_struct *p)
359 * change to NULL at any time since we do not hold task_lock(p). 381 * change to NULL at any time since we do not hold task_lock(p).
360 * However, this is of no concern to us. 382 * However, this is of no concern to us.
361 */ 383 */
362 384 if (!p->mm || p->signal->oom_adj == OOM_DISABLE)
363 if (mm == NULL)
364 return 1; 385 return 1;
365 386
366 /*
367 * Don't kill the process if any threads are set to OOM_DISABLE
368 */
369 do_each_thread(g, q) {
370 if (q->mm == mm && q->oomkilladj == OOM_DISABLE)
371 return 1;
372 } while_each_thread(g, q);
373
374 __oom_kill_task(p, 1); 387 __oom_kill_task(p, 1);
375 388
376 /*
377 * kill all processes that share the ->mm (i.e. all threads),
378 * but are in a different thread group. Don't let them have access
379 * to memory reserves though, otherwise we might deplete all memory.
380 */
381 do_each_thread(g, q) {
382 if (q->mm == mm && !same_thread_group(q, p))
383 force_sig(SIGKILL, q);
384 } while_each_thread(g, q);
385
386 return 0; 389 return 0;
387} 390}
388 391
@@ -394,8 +397,9 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
394 397
395 if (printk_ratelimit()) { 398 if (printk_ratelimit()) {
396 printk(KERN_WARNING "%s invoked oom-killer: " 399 printk(KERN_WARNING "%s invoked oom-killer: "
397 "gfp_mask=0x%x, order=%d, oomkilladj=%d\n", 400 "gfp_mask=0x%x, order=%d, oom_adj=%d\n",
398 current->comm, gfp_mask, order, current->oomkilladj); 401 current->comm, gfp_mask, order,
402 current->signal->oom_adj);
399 task_lock(current); 403 task_lock(current);
400 cpuset_print_task_mems_allowed(current); 404 cpuset_print_task_mems_allowed(current);
401 task_unlock(current); 405 task_unlock(current);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d1ba4644105..5f378dd5880 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -380,7 +380,8 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
380 struct zone *z = 380 struct zone *z =
381 &NODE_DATA(node)->node_zones[ZONE_HIGHMEM]; 381 &NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
382 382
383 x += zone_page_state(z, NR_FREE_PAGES) + zone_lru_pages(z); 383 x += zone_page_state(z, NR_FREE_PAGES) +
384 zone_reclaimable_pages(z);
384 } 385 }
385 /* 386 /*
386 * Make sure that the number of highmem pages is never larger 387 * Make sure that the number of highmem pages is never larger
@@ -404,7 +405,7 @@ unsigned long determine_dirtyable_memory(void)
404{ 405{
405 unsigned long x; 406 unsigned long x;
406 407
407 x = global_page_state(NR_FREE_PAGES) + global_lru_pages(); 408 x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();
408 409
409 if (!vm_highmem_is_dirtyable) 410 if (!vm_highmem_is_dirtyable)
410 x -= highmem_dirtyable_memory(x); 411 x -= highmem_dirtyable_memory(x);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a0de15f4698..5717f27a070 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -48,6 +48,7 @@
48#include <linux/page_cgroup.h> 48#include <linux/page_cgroup.h>
49#include <linux/debugobjects.h> 49#include <linux/debugobjects.h>
50#include <linux/kmemleak.h> 50#include <linux/kmemleak.h>
51#include <trace/events/kmem.h>
51 52
52#include <asm/tlbflush.h> 53#include <asm/tlbflush.h>
53#include <asm/div64.h> 54#include <asm/div64.h>
@@ -71,7 +72,6 @@ EXPORT_SYMBOL(node_states);
71 72
72unsigned long totalram_pages __read_mostly; 73unsigned long totalram_pages __read_mostly;
73unsigned long totalreserve_pages __read_mostly; 74unsigned long totalreserve_pages __read_mostly;
74unsigned long highest_memmap_pfn __read_mostly;
75int percpu_pagelist_fraction; 75int percpu_pagelist_fraction;
76gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; 76gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
77 77
@@ -123,8 +123,8 @@ static char * const zone_names[MAX_NR_ZONES] = {
123 123
124int min_free_kbytes = 1024; 124int min_free_kbytes = 1024;
125 125
126unsigned long __meminitdata nr_kernel_pages; 126static unsigned long __meminitdata nr_kernel_pages;
127unsigned long __meminitdata nr_all_pages; 127static unsigned long __meminitdata nr_all_pages;
128static unsigned long __meminitdata dma_reserve; 128static unsigned long __meminitdata dma_reserve;
129 129
130#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 130#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
@@ -510,7 +510,7 @@ static inline int free_pages_check(struct page *page)
510} 510}
511 511
512/* 512/*
513 * Frees a list of pages. 513 * Frees a number of pages from the PCP lists
514 * Assumes all pages on list are in same zone, and of same order. 514 * Assumes all pages on list are in same zone, and of same order.
515 * count is the number of pages to free. 515 * count is the number of pages to free.
516 * 516 *
@@ -520,22 +520,42 @@ static inline int free_pages_check(struct page *page)
520 * And clear the zone's pages_scanned counter, to hold off the "all pages are 520 * And clear the zone's pages_scanned counter, to hold off the "all pages are
521 * pinned" detection logic. 521 * pinned" detection logic.
522 */ 522 */
523static void free_pages_bulk(struct zone *zone, int count, 523static void free_pcppages_bulk(struct zone *zone, int count,
524 struct list_head *list, int order) 524 struct per_cpu_pages *pcp)
525{ 525{
526 int migratetype = 0;
527 int batch_free = 0;
528
526 spin_lock(&zone->lock); 529 spin_lock(&zone->lock);
527 zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE); 530 zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE);
528 zone->pages_scanned = 0; 531 zone->pages_scanned = 0;
529 532
530 __mod_zone_page_state(zone, NR_FREE_PAGES, count << order); 533 __mod_zone_page_state(zone, NR_FREE_PAGES, count);
531 while (count--) { 534 while (count) {
532 struct page *page; 535 struct page *page;
536 struct list_head *list;
533 537
534 VM_BUG_ON(list_empty(list)); 538 /*
535 page = list_entry(list->prev, struct page, lru); 539 * Remove pages from lists in a round-robin fashion. A
536 /* have to delete it as __free_one_page list manipulates */ 540 * batch_free count is maintained that is incremented when an
537 list_del(&page->lru); 541 * empty list is encountered. This is so more pages are freed
538 __free_one_page(page, zone, order, page_private(page)); 542 * off fuller lists instead of spinning excessively around empty
543 * lists
544 */
545 do {
546 batch_free++;
547 if (++migratetype == MIGRATE_PCPTYPES)
548 migratetype = 0;
549 list = &pcp->lists[migratetype];
550 } while (list_empty(list));
551
552 do {
553 page = list_entry(list->prev, struct page, lru);
554 /* must delete as __free_one_page list manipulates */
555 list_del(&page->lru);
556 __free_one_page(page, zone, 0, migratetype);
557 trace_mm_page_pcpu_drain(page, 0, migratetype);
558 } while (--count && --batch_free && !list_empty(list));
539 } 559 }
540 spin_unlock(&zone->lock); 560 spin_unlock(&zone->lock);
541} 561}
@@ -557,7 +577,7 @@ static void __free_pages_ok(struct page *page, unsigned int order)
557 unsigned long flags; 577 unsigned long flags;
558 int i; 578 int i;
559 int bad = 0; 579 int bad = 0;
560 int wasMlocked = TestClearPageMlocked(page); 580 int wasMlocked = __TestClearPageMlocked(page);
561 581
562 kmemcheck_free_shadow(page, order); 582 kmemcheck_free_shadow(page, order);
563 583
@@ -783,6 +803,17 @@ static int move_freepages_block(struct zone *zone, struct page *page,
783 return move_freepages(zone, start_page, end_page, migratetype); 803 return move_freepages(zone, start_page, end_page, migratetype);
784} 804}
785 805
806static void change_pageblock_range(struct page *pageblock_page,
807 int start_order, int migratetype)
808{
809 int nr_pageblocks = 1 << (start_order - pageblock_order);
810
811 while (nr_pageblocks--) {
812 set_pageblock_migratetype(pageblock_page, migratetype);
813 pageblock_page += pageblock_nr_pages;
814 }
815}
816
786/* Remove an element from the buddy allocator from the fallback list */ 817/* Remove an element from the buddy allocator from the fallback list */
787static inline struct page * 818static inline struct page *
788__rmqueue_fallback(struct zone *zone, int order, int start_migratetype) 819__rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
@@ -836,11 +867,16 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
836 list_del(&page->lru); 867 list_del(&page->lru);
837 rmv_page_order(page); 868 rmv_page_order(page);
838 869
839 if (current_order == pageblock_order) 870 /* Take ownership for orders >= pageblock_order */
840 set_pageblock_migratetype(page, 871 if (current_order >= pageblock_order)
872 change_pageblock_range(page, current_order,
841 start_migratetype); 873 start_migratetype);
842 874
843 expand(zone, page, order, current_order, area, migratetype); 875 expand(zone, page, order, current_order, area, migratetype);
876
877 trace_mm_page_alloc_extfrag(page, order, current_order,
878 start_migratetype, migratetype);
879
844 return page; 880 return page;
845 } 881 }
846 } 882 }
@@ -874,6 +910,7 @@ retry_reserve:
874 } 910 }
875 } 911 }
876 912
913 trace_mm_page_alloc_zone_locked(page, order, migratetype);
877 return page; 914 return page;
878} 915}
879 916
@@ -934,7 +971,7 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
934 to_drain = pcp->batch; 971 to_drain = pcp->batch;
935 else 972 else
936 to_drain = pcp->count; 973 to_drain = pcp->count;
937 free_pages_bulk(zone, to_drain, &pcp->list, 0); 974 free_pcppages_bulk(zone, to_drain, pcp);
938 pcp->count -= to_drain; 975 pcp->count -= to_drain;
939 local_irq_restore(flags); 976 local_irq_restore(flags);
940} 977}
@@ -960,7 +997,7 @@ static void drain_pages(unsigned int cpu)
960 997
961 pcp = &pset->pcp; 998 pcp = &pset->pcp;
962 local_irq_save(flags); 999 local_irq_save(flags);
963 free_pages_bulk(zone, pcp->count, &pcp->list, 0); 1000 free_pcppages_bulk(zone, pcp->count, pcp);
964 pcp->count = 0; 1001 pcp->count = 0;
965 local_irq_restore(flags); 1002 local_irq_restore(flags);
966 } 1003 }
@@ -1026,7 +1063,8 @@ static void free_hot_cold_page(struct page *page, int cold)
1026 struct zone *zone = page_zone(page); 1063 struct zone *zone = page_zone(page);
1027 struct per_cpu_pages *pcp; 1064 struct per_cpu_pages *pcp;
1028 unsigned long flags; 1065 unsigned long flags;
1029 int wasMlocked = TestClearPageMlocked(page); 1066 int migratetype;
1067 int wasMlocked = __TestClearPageMlocked(page);
1030 1068
1031 kmemcheck_free_shadow(page, 0); 1069 kmemcheck_free_shadow(page, 0);
1032 1070
@@ -1043,35 +1081,49 @@ static void free_hot_cold_page(struct page *page, int cold)
1043 kernel_map_pages(page, 1, 0); 1081 kernel_map_pages(page, 1, 0);
1044 1082
1045 pcp = &zone_pcp(zone, get_cpu())->pcp; 1083 pcp = &zone_pcp(zone, get_cpu())->pcp;
1046 set_page_private(page, get_pageblock_migratetype(page)); 1084 migratetype = get_pageblock_migratetype(page);
1085 set_page_private(page, migratetype);
1047 local_irq_save(flags); 1086 local_irq_save(flags);
1048 if (unlikely(wasMlocked)) 1087 if (unlikely(wasMlocked))
1049 free_page_mlock(page); 1088 free_page_mlock(page);
1050 __count_vm_event(PGFREE); 1089 __count_vm_event(PGFREE);
1051 1090
1091 /*
1092 * We only track unmovable, reclaimable and movable on pcp lists.
1093 * Free ISOLATE pages back to the allocator because they are being
1094 * offlined but treat RESERVE as movable pages so we can get those
1095 * areas back if necessary. Otherwise, we may have to free
1096 * excessively into the page allocator
1097 */
1098 if (migratetype >= MIGRATE_PCPTYPES) {
1099 if (unlikely(migratetype == MIGRATE_ISOLATE)) {
1100 free_one_page(zone, page, 0, migratetype);
1101 goto out;
1102 }
1103 migratetype = MIGRATE_MOVABLE;
1104 }
1105
1052 if (cold) 1106 if (cold)
1053 list_add_tail(&page->lru, &pcp->list); 1107 list_add_tail(&page->lru, &pcp->lists[migratetype]);
1054 else 1108 else
1055 list_add(&page->lru, &pcp->list); 1109 list_add(&page->lru, &pcp->lists[migratetype]);
1056 pcp->count++; 1110 pcp->count++;
1057 if (pcp->count >= pcp->high) { 1111 if (pcp->count >= pcp->high) {
1058 free_pages_bulk(zone, pcp->batch, &pcp->list, 0); 1112 free_pcppages_bulk(zone, pcp->batch, pcp);
1059 pcp->count -= pcp->batch; 1113 pcp->count -= pcp->batch;
1060 } 1114 }
1115
1116out:
1061 local_irq_restore(flags); 1117 local_irq_restore(flags);
1062 put_cpu(); 1118 put_cpu();
1063} 1119}
1064 1120
1065void free_hot_page(struct page *page) 1121void free_hot_page(struct page *page)
1066{ 1122{
1123 trace_mm_page_free_direct(page, 0);
1067 free_hot_cold_page(page, 0); 1124 free_hot_cold_page(page, 0);
1068} 1125}
1069 1126
1070void free_cold_page(struct page *page)
1071{
1072 free_hot_cold_page(page, 1);
1073}
1074
1075/* 1127/*
1076 * split_page takes a non-compound higher-order page, and splits it into 1128 * split_page takes a non-compound higher-order page, and splits it into
1077 * n (1<<order) sub-pages: page[0..n] 1129 * n (1<<order) sub-pages: page[0..n]
@@ -1119,35 +1171,23 @@ again:
1119 cpu = get_cpu(); 1171 cpu = get_cpu();
1120 if (likely(order == 0)) { 1172 if (likely(order == 0)) {
1121 struct per_cpu_pages *pcp; 1173 struct per_cpu_pages *pcp;
1174 struct list_head *list;
1122 1175
1123 pcp = &zone_pcp(zone, cpu)->pcp; 1176 pcp = &zone_pcp(zone, cpu)->pcp;
1177 list = &pcp->lists[migratetype];
1124 local_irq_save(flags); 1178 local_irq_save(flags);
1125 if (!pcp->count) { 1179 if (list_empty(list)) {
1126 pcp->count = rmqueue_bulk(zone, 0, 1180 pcp->count += rmqueue_bulk(zone, 0,
1127 pcp->batch, &pcp->list, 1181 pcp->batch, list,
1128 migratetype, cold); 1182 migratetype, cold);
1129 if (unlikely(!pcp->count)) 1183 if (unlikely(list_empty(list)))
1130 goto failed; 1184 goto failed;
1131 } 1185 }
1132 1186
1133 /* Find a page of the appropriate migrate type */ 1187 if (cold)
1134 if (cold) { 1188 page = list_entry(list->prev, struct page, lru);
1135 list_for_each_entry_reverse(page, &pcp->list, lru) 1189 else
1136 if (page_private(page) == migratetype) 1190 page = list_entry(list->next, struct page, lru);
1137 break;
1138 } else {
1139 list_for_each_entry(page, &pcp->list, lru)
1140 if (page_private(page) == migratetype)
1141 break;
1142 }
1143
1144 /* Allocate more to the pcp list if necessary */
1145 if (unlikely(&page->lru == &pcp->list)) {
1146 pcp->count += rmqueue_bulk(zone, 0,
1147 pcp->batch, &pcp->list,
1148 migratetype, cold);
1149 page = list_entry(pcp->list.next, struct page, lru);
1150 }
1151 1191
1152 list_del(&page->lru); 1192 list_del(&page->lru);
1153 pcp->count--; 1193 pcp->count--;
@@ -1627,10 +1667,6 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
1627 1667
1628 /* We now go into synchronous reclaim */ 1668 /* We now go into synchronous reclaim */
1629 cpuset_memory_pressure_bump(); 1669 cpuset_memory_pressure_bump();
1630
1631 /*
1632 * The task's cpuset might have expanded its set of allowable nodes
1633 */
1634 p->flags |= PF_MEMALLOC; 1670 p->flags |= PF_MEMALLOC;
1635 lockdep_set_current_reclaim_state(gfp_mask); 1671 lockdep_set_current_reclaim_state(gfp_mask);
1636 reclaim_state.reclaimed_slab = 0; 1672 reclaim_state.reclaimed_slab = 0;
@@ -1765,6 +1801,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
1765 1801
1766 wake_all_kswapd(order, zonelist, high_zoneidx); 1802 wake_all_kswapd(order, zonelist, high_zoneidx);
1767 1803
1804restart:
1768 /* 1805 /*
1769 * OK, we're below the kswapd watermark and have kicked background 1806 * OK, we're below the kswapd watermark and have kicked background
1770 * reclaim. Now things get more complex, so set up alloc_flags according 1807 * reclaim. Now things get more complex, so set up alloc_flags according
@@ -1772,7 +1809,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
1772 */ 1809 */
1773 alloc_flags = gfp_to_alloc_flags(gfp_mask); 1810 alloc_flags = gfp_to_alloc_flags(gfp_mask);
1774 1811
1775restart:
1776 /* This is the last chance, in general, before the goto nopage. */ 1812 /* This is the last chance, in general, before the goto nopage. */
1777 page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, 1813 page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
1778 high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, 1814 high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS,
@@ -1907,6 +1943,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
1907 zonelist, high_zoneidx, nodemask, 1943 zonelist, high_zoneidx, nodemask,
1908 preferred_zone, migratetype); 1944 preferred_zone, migratetype);
1909 1945
1946 trace_mm_page_alloc(page, order, gfp_mask, migratetype);
1910 return page; 1947 return page;
1911} 1948}
1912EXPORT_SYMBOL(__alloc_pages_nodemask); 1949EXPORT_SYMBOL(__alloc_pages_nodemask);
@@ -1916,44 +1953,41 @@ EXPORT_SYMBOL(__alloc_pages_nodemask);
1916 */ 1953 */
1917unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order) 1954unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order)
1918{ 1955{
1919 struct page * page; 1956 struct page *page;
1957
1958 /*
1959 * __get_free_pages() returns a 32-bit address, which cannot represent
1960 * a highmem page
1961 */
1962 VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
1963
1920 page = alloc_pages(gfp_mask, order); 1964 page = alloc_pages(gfp_mask, order);
1921 if (!page) 1965 if (!page)
1922 return 0; 1966 return 0;
1923 return (unsigned long) page_address(page); 1967 return (unsigned long) page_address(page);
1924} 1968}
1925
1926EXPORT_SYMBOL(__get_free_pages); 1969EXPORT_SYMBOL(__get_free_pages);
1927 1970
1928unsigned long get_zeroed_page(gfp_t gfp_mask) 1971unsigned long get_zeroed_page(gfp_t gfp_mask)
1929{ 1972{
1930 struct page * page; 1973 return __get_free_pages(gfp_mask | __GFP_ZERO, 0);
1931
1932 /*
1933 * get_zeroed_page() returns a 32-bit address, which cannot represent
1934 * a highmem page
1935 */
1936 VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
1937
1938 page = alloc_pages(gfp_mask | __GFP_ZERO, 0);
1939 if (page)
1940 return (unsigned long) page_address(page);
1941 return 0;
1942} 1974}
1943
1944EXPORT_SYMBOL(get_zeroed_page); 1975EXPORT_SYMBOL(get_zeroed_page);
1945 1976
1946void __pagevec_free(struct pagevec *pvec) 1977void __pagevec_free(struct pagevec *pvec)
1947{ 1978{
1948 int i = pagevec_count(pvec); 1979 int i = pagevec_count(pvec);
1949 1980
1950 while (--i >= 0) 1981 while (--i >= 0) {
1982 trace_mm_pagevec_free(pvec->pages[i], pvec->cold);
1951 free_hot_cold_page(pvec->pages[i], pvec->cold); 1983 free_hot_cold_page(pvec->pages[i], pvec->cold);
1984 }
1952} 1985}
1953 1986
1954void __free_pages(struct page *page, unsigned int order) 1987void __free_pages(struct page *page, unsigned int order)
1955{ 1988{
1956 if (put_page_testzero(page)) { 1989 if (put_page_testzero(page)) {
1990 trace_mm_page_free_direct(page, order);
1957 if (order == 0) 1991 if (order == 0)
1958 free_hot_page(page); 1992 free_hot_page(page);
1959 else 1993 else
@@ -2128,23 +2162,28 @@ void show_free_areas(void)
2128 } 2162 }
2129 } 2163 }
2130 2164
2131 printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n" 2165 printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
2132 " inactive_file:%lu" 2166 " active_file:%lu inactive_file:%lu isolated_file:%lu\n"
2133 " unevictable:%lu" 2167 " unevictable:%lu"
2134 " dirty:%lu writeback:%lu unstable:%lu\n" 2168 " dirty:%lu writeback:%lu unstable:%lu buffer:%lu\n"
2135 " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n", 2169 " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n"
2170 " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n",
2136 global_page_state(NR_ACTIVE_ANON), 2171 global_page_state(NR_ACTIVE_ANON),
2137 global_page_state(NR_ACTIVE_FILE),
2138 global_page_state(NR_INACTIVE_ANON), 2172 global_page_state(NR_INACTIVE_ANON),
2173 global_page_state(NR_ISOLATED_ANON),
2174 global_page_state(NR_ACTIVE_FILE),
2139 global_page_state(NR_INACTIVE_FILE), 2175 global_page_state(NR_INACTIVE_FILE),
2176 global_page_state(NR_ISOLATED_FILE),
2140 global_page_state(NR_UNEVICTABLE), 2177 global_page_state(NR_UNEVICTABLE),
2141 global_page_state(NR_FILE_DIRTY), 2178 global_page_state(NR_FILE_DIRTY),
2142 global_page_state(NR_WRITEBACK), 2179 global_page_state(NR_WRITEBACK),
2143 global_page_state(NR_UNSTABLE_NFS), 2180 global_page_state(NR_UNSTABLE_NFS),
2181 nr_blockdev_pages(),
2144 global_page_state(NR_FREE_PAGES), 2182 global_page_state(NR_FREE_PAGES),
2145 global_page_state(NR_SLAB_RECLAIMABLE) + 2183 global_page_state(NR_SLAB_RECLAIMABLE),
2146 global_page_state(NR_SLAB_UNRECLAIMABLE), 2184 global_page_state(NR_SLAB_UNRECLAIMABLE),
2147 global_page_state(NR_FILE_MAPPED), 2185 global_page_state(NR_FILE_MAPPED),
2186 global_page_state(NR_SHMEM),
2148 global_page_state(NR_PAGETABLE), 2187 global_page_state(NR_PAGETABLE),
2149 global_page_state(NR_BOUNCE)); 2188 global_page_state(NR_BOUNCE));
2150 2189
@@ -2162,7 +2201,21 @@ void show_free_areas(void)
2162 " active_file:%lukB" 2201 " active_file:%lukB"
2163 " inactive_file:%lukB" 2202 " inactive_file:%lukB"
2164 " unevictable:%lukB" 2203 " unevictable:%lukB"
2204 " isolated(anon):%lukB"
2205 " isolated(file):%lukB"
2165 " present:%lukB" 2206 " present:%lukB"
2207 " mlocked:%lukB"
2208 " dirty:%lukB"
2209 " writeback:%lukB"
2210 " mapped:%lukB"
2211 " shmem:%lukB"
2212 " slab_reclaimable:%lukB"
2213 " slab_unreclaimable:%lukB"
2214 " kernel_stack:%lukB"
2215 " pagetables:%lukB"
2216 " unstable:%lukB"
2217 " bounce:%lukB"
2218 " writeback_tmp:%lukB"
2166 " pages_scanned:%lu" 2219 " pages_scanned:%lu"
2167 " all_unreclaimable? %s" 2220 " all_unreclaimable? %s"
2168 "\n", 2221 "\n",
@@ -2176,7 +2229,22 @@ void show_free_areas(void)
2176 K(zone_page_state(zone, NR_ACTIVE_FILE)), 2229 K(zone_page_state(zone, NR_ACTIVE_FILE)),
2177 K(zone_page_state(zone, NR_INACTIVE_FILE)), 2230 K(zone_page_state(zone, NR_INACTIVE_FILE)),
2178 K(zone_page_state(zone, NR_UNEVICTABLE)), 2231 K(zone_page_state(zone, NR_UNEVICTABLE)),
2232 K(zone_page_state(zone, NR_ISOLATED_ANON)),
2233 K(zone_page_state(zone, NR_ISOLATED_FILE)),
2179 K(zone->present_pages), 2234 K(zone->present_pages),
2235 K(zone_page_state(zone, NR_MLOCK)),
2236 K(zone_page_state(zone, NR_FILE_DIRTY)),
2237 K(zone_page_state(zone, NR_WRITEBACK)),
2238 K(zone_page_state(zone, NR_FILE_MAPPED)),
2239 K(zone_page_state(zone, NR_SHMEM)),
2240 K(zone_page_state(zone, NR_SLAB_RECLAIMABLE)),
2241 K(zone_page_state(zone, NR_SLAB_UNRECLAIMABLE)),
2242 zone_page_state(zone, NR_KERNEL_STACK) *
2243 THREAD_SIZE / 1024,
2244 K(zone_page_state(zone, NR_PAGETABLE)),
2245 K(zone_page_state(zone, NR_UNSTABLE_NFS)),
2246 K(zone_page_state(zone, NR_BOUNCE)),
2247 K(zone_page_state(zone, NR_WRITEBACK_TEMP)),
2180 zone->pages_scanned, 2248 zone->pages_scanned,
2181 (zone_is_all_unreclaimable(zone) ? "yes" : "no") 2249 (zone_is_all_unreclaimable(zone) ? "yes" : "no")
2182 ); 2250 );
@@ -2783,7 +2851,8 @@ static void setup_zone_migrate_reserve(struct zone *zone)
2783{ 2851{
2784 unsigned long start_pfn, pfn, end_pfn; 2852 unsigned long start_pfn, pfn, end_pfn;
2785 struct page *page; 2853 struct page *page;
2786 unsigned long reserve, block_migratetype; 2854 unsigned long block_migratetype;
2855 int reserve;
2787 2856
2788 /* Get the start pfn, end pfn and the number of blocks to reserve */ 2857 /* Get the start pfn, end pfn and the number of blocks to reserve */
2789 start_pfn = zone->zone_start_pfn; 2858 start_pfn = zone->zone_start_pfn;
@@ -2791,6 +2860,15 @@ static void setup_zone_migrate_reserve(struct zone *zone)
2791 reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> 2860 reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
2792 pageblock_order; 2861 pageblock_order;
2793 2862
2863 /*
2864 * Reserve blocks are generally in place to help high-order atomic
2865 * allocations that are short-lived. A min_free_kbytes value that
2866 * would result in more than 2 reserve blocks for atomic allocations
2867 * is assumed to be in place to help anti-fragmentation for the
2868 * future allocation of hugepages at runtime.
2869 */
2870 reserve = min(2, reserve);
2871
2794 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { 2872 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
2795 if (!pfn_valid(pfn)) 2873 if (!pfn_valid(pfn))
2796 continue; 2874 continue;
@@ -2961,6 +3039,7 @@ static int zone_batchsize(struct zone *zone)
2961static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) 3039static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
2962{ 3040{
2963 struct per_cpu_pages *pcp; 3041 struct per_cpu_pages *pcp;
3042 int migratetype;
2964 3043
2965 memset(p, 0, sizeof(*p)); 3044 memset(p, 0, sizeof(*p));
2966 3045
@@ -2968,7 +3047,8 @@ static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
2968 pcp->count = 0; 3047 pcp->count = 0;
2969 pcp->high = 6 * batch; 3048 pcp->high = 6 * batch;
2970 pcp->batch = max(1UL, 1 * batch); 3049 pcp->batch = max(1UL, 1 * batch);
2971 INIT_LIST_HEAD(&pcp->list); 3050 for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++)
3051 INIT_LIST_HEAD(&pcp->lists[migratetype]);
2972} 3052}
2973 3053
2974/* 3054/*
@@ -3146,6 +3226,32 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
3146 return 0; 3226 return 0;
3147} 3227}
3148 3228
3229static int __zone_pcp_update(void *data)
3230{
3231 struct zone *zone = data;
3232 int cpu;
3233 unsigned long batch = zone_batchsize(zone), flags;
3234
3235 for (cpu = 0; cpu < NR_CPUS; cpu++) {
3236 struct per_cpu_pageset *pset;
3237 struct per_cpu_pages *pcp;
3238
3239 pset = zone_pcp(zone, cpu);
3240 pcp = &pset->pcp;
3241
3242 local_irq_save(flags);
3243 free_pcppages_bulk(zone, pcp->count, pcp);
3244 setup_pageset(pset, batch);
3245 local_irq_restore(flags);
3246 }
3247 return 0;
3248}
3249
3250void zone_pcp_update(struct zone *zone)
3251{
3252 stop_machine(__zone_pcp_update, zone, NULL);
3253}
3254
3149static __meminit void zone_pcp_init(struct zone *zone) 3255static __meminit void zone_pcp_init(struct zone *zone)
3150{ 3256{
3151 int cpu; 3257 int cpu;
@@ -3720,7 +3826,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
3720 zone_pcp_init(zone); 3826 zone_pcp_init(zone);
3721 for_each_lru(l) { 3827 for_each_lru(l) {
3722 INIT_LIST_HEAD(&zone->lru[l].list); 3828 INIT_LIST_HEAD(&zone->lru[l].list);
3723 zone->lru[l].nr_saved_scan = 0; 3829 zone->reclaim_stat.nr_saved_scan[l] = 0;
3724 } 3830 }
3725 zone->reclaim_stat.recent_rotated[0] = 0; 3831 zone->reclaim_stat.recent_rotated[0] = 0;
3726 zone->reclaim_stat.recent_rotated[1] = 0; 3832 zone->reclaim_stat.recent_rotated[1] = 0;
@@ -4509,7 +4615,7 @@ void setup_per_zone_wmarks(void)
4509 calculate_totalreserve_pages(); 4615 calculate_totalreserve_pages();
4510} 4616}
4511 4617
4512/** 4618/*
4513 * The inactive anon list should be small enough that the VM never has to 4619 * The inactive anon list should be small enough that the VM never has to
4514 * do too much work, but large enough that each inactive page has a chance 4620 * do too much work, but large enough that each inactive page has a chance
4515 * to be referenced again before it is swapped out. 4621 * to be referenced again before it is swapped out.
@@ -4732,7 +4838,14 @@ void *__init alloc_large_system_hash(const char *tablename,
4732 numentries <<= (PAGE_SHIFT - scale); 4838 numentries <<= (PAGE_SHIFT - scale);
4733 4839
4734 /* Make sure we've got at least a 0-order allocation.. */ 4840 /* Make sure we've got at least a 0-order allocation.. */
4735 if (unlikely((numentries * bucketsize) < PAGE_SIZE)) 4841 if (unlikely(flags & HASH_SMALL)) {
4842 /* Makes no sense without HASH_EARLY */
4843 WARN_ON(!(flags & HASH_EARLY));
4844 if (!(numentries >> *_hash_shift)) {
4845 numentries = 1UL << *_hash_shift;
4846 BUG_ON(!numentries);
4847 }
4848 } else if (unlikely((numentries * bucketsize) < PAGE_SIZE))
4736 numentries = PAGE_SIZE / bucketsize; 4849 numentries = PAGE_SIZE / bucketsize;
4737 } 4850 }
4738 numentries = roundup_pow_of_two(numentries); 4851 numentries = roundup_pow_of_two(numentries);
@@ -4874,13 +4987,16 @@ int set_migratetype_isolate(struct page *page)
4874 struct zone *zone; 4987 struct zone *zone;
4875 unsigned long flags; 4988 unsigned long flags;
4876 int ret = -EBUSY; 4989 int ret = -EBUSY;
4990 int zone_idx;
4877 4991
4878 zone = page_zone(page); 4992 zone = page_zone(page);
4993 zone_idx = zone_idx(zone);
4879 spin_lock_irqsave(&zone->lock, flags); 4994 spin_lock_irqsave(&zone->lock, flags);
4880 /* 4995 /*
4881 * In future, more migrate types will be able to be isolation target. 4996 * In future, more migrate types will be able to be isolation target.
4882 */ 4997 */
4883 if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE) 4998 if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE &&
4999 zone_idx != ZONE_MOVABLE)
4884 goto out; 5000 goto out;
4885 set_pageblock_migratetype(page, MIGRATE_ISOLATE); 5001 set_pageblock_migratetype(page, MIGRATE_ISOLATE);
4886 move_freepages_block(zone, page, MIGRATE_ISOLATE); 5002 move_freepages_block(zone, page, MIGRATE_ISOLATE);
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index f22b4ebbd8d..3d535d59482 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -116,10 +116,16 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn)
116 nid = page_to_nid(pfn_to_page(pfn)); 116 nid = page_to_nid(pfn_to_page(pfn));
117 table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; 117 table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
118 VM_BUG_ON(!slab_is_available()); 118 VM_BUG_ON(!slab_is_available());
119 base = kmalloc_node(table_size, 119 if (node_state(nid, N_HIGH_MEMORY)) {
120 base = kmalloc_node(table_size,
120 GFP_KERNEL | __GFP_NOWARN, nid); 121 GFP_KERNEL | __GFP_NOWARN, nid);
121 if (!base) 122 if (!base)
122 base = vmalloc_node(table_size, nid); 123 base = vmalloc_node(table_size, nid);
124 } else {
125 base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN);
126 if (!base)
127 base = vmalloc(table_size);
128 }
123 } else { 129 } else {
124 /* 130 /*
125 * We don't have to allocate page_cgroup again, but 131 * We don't have to allocate page_cgroup again, but
diff --git a/mm/rmap.c b/mm/rmap.c
index 0895b5c7cbf..720fc03a7bc 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -710,27 +710,6 @@ void page_add_file_rmap(struct page *page)
710 } 710 }
711} 711}
712 712
713#ifdef CONFIG_DEBUG_VM
714/**
715 * page_dup_rmap - duplicate pte mapping to a page
716 * @page: the page to add the mapping to
717 * @vma: the vm area being duplicated
718 * @address: the user virtual address mapped
719 *
720 * For copy_page_range only: minimal extract from page_add_file_rmap /
721 * page_add_anon_rmap, avoiding unnecessary tests (already checked) so it's
722 * quicker.
723 *
724 * The caller needs to hold the pte lock.
725 */
726void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address)
727{
728 if (PageAnon(page))
729 __page_check_anon_rmap(page, vma, address);
730 atomic_inc(&page->_mapcount);
731}
732#endif
733
734/** 713/**
735 * page_remove_rmap - take down pte mapping from a page 714 * page_remove_rmap - take down pte mapping from a page
736 * @page: page to remove mapping from 715 * @page: page to remove mapping from
@@ -739,34 +718,37 @@ void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long
739 */ 718 */
740void page_remove_rmap(struct page *page) 719void page_remove_rmap(struct page *page)
741{ 720{
742 if (atomic_add_negative(-1, &page->_mapcount)) { 721 /* page still mapped by someone else? */
743 /* 722 if (!atomic_add_negative(-1, &page->_mapcount))
744 * Now that the last pte has gone, s390 must transfer dirty 723 return;
745 * flag from storage key to struct page. We can usually skip 724
746 * this if the page is anon, so about to be freed; but perhaps 725 /*
747 * not if it's in swapcache - there might be another pte slot 726 * Now that the last pte has gone, s390 must transfer dirty
748 * containing the swap entry, but page not yet written to swap. 727 * flag from storage key to struct page. We can usually skip
749 */ 728 * this if the page is anon, so about to be freed; but perhaps
750 if ((!PageAnon(page) || PageSwapCache(page)) && 729 * not if it's in swapcache - there might be another pte slot
751 page_test_dirty(page)) { 730 * containing the swap entry, but page not yet written to swap.
752 page_clear_dirty(page); 731 */
753 set_page_dirty(page); 732 if ((!PageAnon(page) || PageSwapCache(page)) && page_test_dirty(page)) {
754 } 733 page_clear_dirty(page);
755 if (PageAnon(page)) 734 set_page_dirty(page);
756 mem_cgroup_uncharge_page(page);
757 __dec_zone_page_state(page,
758 PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
759 mem_cgroup_update_mapped_file_stat(page, -1);
760 /*
761 * It would be tidy to reset the PageAnon mapping here,
762 * but that might overwrite a racing page_add_anon_rmap
763 * which increments mapcount after us but sets mapping
764 * before us: so leave the reset to free_hot_cold_page,
765 * and remember that it's only reliable while mapped.
766 * Leaving it set also helps swapoff to reinstate ptes
767 * faster for those pages still in swapcache.
768 */
769 } 735 }
736 if (PageAnon(page)) {
737 mem_cgroup_uncharge_page(page);
738 __dec_zone_page_state(page, NR_ANON_PAGES);
739 } else {
740 __dec_zone_page_state(page, NR_FILE_MAPPED);
741 }
742 mem_cgroup_update_mapped_file_stat(page, -1);
743 /*
744 * It would be tidy to reset the PageAnon mapping here,
745 * but that might overwrite a racing page_add_anon_rmap
746 * which increments mapcount after us but sets mapping
747 * before us: so leave the reset to free_hot_cold_page,
748 * and remember that it's only reliable while mapped.
749 * Leaving it set also helps swapoff to reinstate ptes
750 * faster for those pages still in swapcache.
751 */
770} 752}
771 753
772/* 754/*
diff --git a/mm/shmem.c b/mm/shmem.c
index bd20f8bb02a..b206a7a32e2 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -49,7 +49,6 @@ static struct vfsmount *shm_mnt;
49#include <linux/backing-dev.h> 49#include <linux/backing-dev.h>
50#include <linux/shmem_fs.h> 50#include <linux/shmem_fs.h>
51#include <linux/writeback.h> 51#include <linux/writeback.h>
52#include <linux/vfs.h>
53#include <linux/blkdev.h> 52#include <linux/blkdev.h>
54#include <linux/security.h> 53#include <linux/security.h>
55#include <linux/swapops.h> 54#include <linux/swapops.h>
@@ -1097,6 +1096,10 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
1097 shmem_swp_unmap(entry); 1096 shmem_swp_unmap(entry);
1098unlock: 1097unlock:
1099 spin_unlock(&info->lock); 1098 spin_unlock(&info->lock);
1099 /*
1100 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
1101 * clear SWAP_HAS_CACHE flag.
1102 */
1100 swapcache_free(swap, NULL); 1103 swapcache_free(swap, NULL);
1101redirty: 1104redirty:
1102 set_page_dirty(page); 1105 set_page_dirty(page);
@@ -2306,17 +2309,14 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
2306 int err = -ENOMEM; 2309 int err = -ENOMEM;
2307 2310
2308 /* Round up to L1_CACHE_BYTES to resist false sharing */ 2311 /* Round up to L1_CACHE_BYTES to resist false sharing */
2309 sbinfo = kmalloc(max((int)sizeof(struct shmem_sb_info), 2312 sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info),
2310 L1_CACHE_BYTES), GFP_KERNEL); 2313 L1_CACHE_BYTES), GFP_KERNEL);
2311 if (!sbinfo) 2314 if (!sbinfo)
2312 return -ENOMEM; 2315 return -ENOMEM;
2313 2316
2314 sbinfo->max_blocks = 0;
2315 sbinfo->max_inodes = 0;
2316 sbinfo->mode = S_IRWXUGO | S_ISVTX; 2317 sbinfo->mode = S_IRWXUGO | S_ISVTX;
2317 sbinfo->uid = current_fsuid(); 2318 sbinfo->uid = current_fsuid();
2318 sbinfo->gid = current_fsgid(); 2319 sbinfo->gid = current_fsgid();
2319 sbinfo->mpol = NULL;
2320 sb->s_fs_info = sbinfo; 2320 sb->s_fs_info = sbinfo;
2321 2321
2322#ifdef CONFIG_TMPFS 2322#ifdef CONFIG_TMPFS
@@ -2590,6 +2590,11 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
2590 return 0; 2590 return 0;
2591} 2591}
2592 2592
2593int shmem_lock(struct file *file, int lock, struct user_struct *user)
2594{
2595 return 0;
2596}
2597
2593#define shmem_vm_ops generic_file_vm_ops 2598#define shmem_vm_ops generic_file_vm_ops
2594#define shmem_file_operations ramfs_file_operations 2599#define shmem_file_operations ramfs_file_operations
2595#define shmem_get_inode(sb, mode, dev, flags) ramfs_get_inode(sb, mode, dev) 2600#define shmem_get_inode(sb, mode, dev, flags) ramfs_get_inode(sb, mode, dev)
diff --git a/mm/slab.c b/mm/slab.c
index 7b5d4deacfc..7dfa481c96b 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1384,7 +1384,7 @@ void __init kmem_cache_init(void)
1384 * Fragmentation resistance on low memory - only use bigger 1384 * Fragmentation resistance on low memory - only use bigger
1385 * page orders on machines with more than 32MB of memory. 1385 * page orders on machines with more than 32MB of memory.
1386 */ 1386 */
1387 if (num_physpages > (32 << 20) >> PAGE_SHIFT) 1387 if (totalram_pages > (32 << 20) >> PAGE_SHIFT)
1388 slab_break_gfp_order = BREAK_GFP_ORDER_HI; 1388 slab_break_gfp_order = BREAK_GFP_ORDER_HI;
1389 1389
1390 /* Bootstrap is tricky, because several objects are allocated 1390 /* Bootstrap is tricky, because several objects are allocated
diff --git a/mm/slub.c b/mm/slub.c
index 0a216aae227..4996fc71955 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3345,6 +3345,9 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
3345{ 3345{
3346 struct kmem_cache *s; 3346 struct kmem_cache *s;
3347 3347
3348 if (WARN_ON(!name))
3349 return NULL;
3350
3348 down_write(&slub_lock); 3351 down_write(&slub_lock);
3349 s = find_mergeable(size, align, flags, name, ctor); 3352 s = find_mergeable(size, align, flags, name, ctor);
3350 if (s) { 3353 if (s) {
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index a13ea6401ae..d9714bdcb4a 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -48,8 +48,14 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node)
48{ 48{
49 /* If the main allocator is up use that, fallback to bootmem. */ 49 /* If the main allocator is up use that, fallback to bootmem. */
50 if (slab_is_available()) { 50 if (slab_is_available()) {
51 struct page *page = alloc_pages_node(node, 51 struct page *page;
52
53 if (node_state(node, N_HIGH_MEMORY))
54 page = alloc_pages_node(node,
52 GFP_KERNEL | __GFP_ZERO, get_order(size)); 55 GFP_KERNEL | __GFP_ZERO, get_order(size));
56 else
57 page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
58 get_order(size));
53 if (page) 59 if (page)
54 return page_address(page); 60 return page_address(page);
55 return NULL; 61 return NULL;
diff --git a/mm/sparse.c b/mm/sparse.c
index da432d9f0ae..6ce4aab69e9 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -62,9 +62,12 @@ static struct mem_section noinline __init_refok *sparse_index_alloc(int nid)
62 unsigned long array_size = SECTIONS_PER_ROOT * 62 unsigned long array_size = SECTIONS_PER_ROOT *
63 sizeof(struct mem_section); 63 sizeof(struct mem_section);
64 64
65 if (slab_is_available()) 65 if (slab_is_available()) {
66 section = kmalloc_node(array_size, GFP_KERNEL, nid); 66 if (node_state(nid, N_HIGH_MEMORY))
67 else 67 section = kmalloc_node(array_size, GFP_KERNEL, nid);
68 else
69 section = kmalloc(array_size, GFP_KERNEL);
70 } else
68 section = alloc_bootmem_node(NODE_DATA(nid), array_size); 71 section = alloc_bootmem_node(NODE_DATA(nid), array_size);
69 72
70 if (section) 73 if (section)
diff --git a/mm/swap.c b/mm/swap.c
index cb29ae5d33a..308e57d8d7e 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -118,7 +118,7 @@ static void pagevec_move_tail(struct pagevec *pvec)
118 spin_lock(&zone->lru_lock); 118 spin_lock(&zone->lru_lock);
119 } 119 }
120 if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { 120 if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
121 int lru = page_is_file_cache(page); 121 int lru = page_lru_base_type(page);
122 list_move_tail(&page->lru, &zone->lru[lru].list); 122 list_move_tail(&page->lru, &zone->lru[lru].list);
123 pgmoved++; 123 pgmoved++;
124 } 124 }
@@ -181,7 +181,7 @@ void activate_page(struct page *page)
181 spin_lock_irq(&zone->lru_lock); 181 spin_lock_irq(&zone->lru_lock);
182 if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { 182 if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
183 int file = page_is_file_cache(page); 183 int file = page_is_file_cache(page);
184 int lru = LRU_BASE + file; 184 int lru = page_lru_base_type(page);
185 del_page_from_lru_list(zone, page, lru); 185 del_page_from_lru_list(zone, page, lru);
186 186
187 SetPageActive(page); 187 SetPageActive(page);
@@ -189,7 +189,7 @@ void activate_page(struct page *page)
189 add_page_to_lru_list(zone, page, lru); 189 add_page_to_lru_list(zone, page, lru);
190 __count_vm_event(PGACTIVATE); 190 __count_vm_event(PGACTIVATE);
191 191
192 update_page_reclaim_stat(zone, page, !!file, 1); 192 update_page_reclaim_stat(zone, page, file, 1);
193 } 193 }
194 spin_unlock_irq(&zone->lru_lock); 194 spin_unlock_irq(&zone->lru_lock);
195} 195}
@@ -496,7 +496,7 @@ EXPORT_SYMBOL(pagevec_lookup_tag);
496 */ 496 */
497void __init swap_setup(void) 497void __init swap_setup(void)
498{ 498{
499 unsigned long megs = num_physpages >> (20 - PAGE_SHIFT); 499 unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT);
500 500
501#ifdef CONFIG_SWAP 501#ifdef CONFIG_SWAP
502 bdi_init(swapper_space.backing_dev_info); 502 bdi_init(swapper_space.backing_dev_info);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 5ae6b8b78c8..6d1daeb1cb4 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -67,10 +67,10 @@ void show_swap_cache_info(void)
67} 67}
68 68
69/* 69/*
70 * add_to_swap_cache resembles add_to_page_cache_locked on swapper_space, 70 * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
71 * but sets SwapCache flag and private instead of mapping and index. 71 * but sets SwapCache flag and private instead of mapping and index.
72 */ 72 */
73int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) 73static int __add_to_swap_cache(struct page *page, swp_entry_t entry)
74{ 74{
75 int error; 75 int error;
76 76
@@ -78,28 +78,43 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
78 VM_BUG_ON(PageSwapCache(page)); 78 VM_BUG_ON(PageSwapCache(page));
79 VM_BUG_ON(!PageSwapBacked(page)); 79 VM_BUG_ON(!PageSwapBacked(page));
80 80
81 page_cache_get(page);
82 SetPageSwapCache(page);
83 set_page_private(page, entry.val);
84
85 spin_lock_irq(&swapper_space.tree_lock);
86 error = radix_tree_insert(&swapper_space.page_tree, entry.val, page);
87 if (likely(!error)) {
88 total_swapcache_pages++;
89 __inc_zone_page_state(page, NR_FILE_PAGES);
90 INC_CACHE_INFO(add_total);
91 }
92 spin_unlock_irq(&swapper_space.tree_lock);
93
94 if (unlikely(error)) {
95 /*
96 * Only the context which have set SWAP_HAS_CACHE flag
97 * would call add_to_swap_cache().
98 * So add_to_swap_cache() doesn't returns -EEXIST.
99 */
100 VM_BUG_ON(error == -EEXIST);
101 set_page_private(page, 0UL);
102 ClearPageSwapCache(page);
103 page_cache_release(page);
104 }
105
106 return error;
107}
108
109
110int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
111{
112 int error;
113
81 error = radix_tree_preload(gfp_mask); 114 error = radix_tree_preload(gfp_mask);
82 if (!error) { 115 if (!error) {
83 page_cache_get(page); 116 error = __add_to_swap_cache(page, entry);
84 SetPageSwapCache(page);
85 set_page_private(page, entry.val);
86
87 spin_lock_irq(&swapper_space.tree_lock);
88 error = radix_tree_insert(&swapper_space.page_tree,
89 entry.val, page);
90 if (likely(!error)) {
91 total_swapcache_pages++;
92 __inc_zone_page_state(page, NR_FILE_PAGES);
93 INC_CACHE_INFO(add_total);
94 }
95 spin_unlock_irq(&swapper_space.tree_lock);
96 radix_tree_preload_end(); 117 radix_tree_preload_end();
97
98 if (unlikely(error)) {
99 set_page_private(page, 0UL);
100 ClearPageSwapCache(page);
101 page_cache_release(page);
102 }
103 } 118 }
104 return error; 119 return error;
105} 120}
@@ -137,38 +152,34 @@ int add_to_swap(struct page *page)
137 VM_BUG_ON(!PageLocked(page)); 152 VM_BUG_ON(!PageLocked(page));
138 VM_BUG_ON(!PageUptodate(page)); 153 VM_BUG_ON(!PageUptodate(page));
139 154
140 for (;;) { 155 entry = get_swap_page();
141 entry = get_swap_page(); 156 if (!entry.val)
142 if (!entry.val) 157 return 0;
143 return 0;
144 158
159 /*
160 * Radix-tree node allocations from PF_MEMALLOC contexts could
161 * completely exhaust the page allocator. __GFP_NOMEMALLOC
162 * stops emergency reserves from being allocated.
163 *
164 * TODO: this could cause a theoretical memory reclaim
165 * deadlock in the swap out path.
166 */
167 /*
168 * Add it to the swap cache and mark it dirty
169 */
170 err = add_to_swap_cache(page, entry,
171 __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN);
172
173 if (!err) { /* Success */
174 SetPageDirty(page);
175 return 1;
176 } else { /* -ENOMEM radix-tree allocation failure */
145 /* 177 /*
146 * Radix-tree node allocations from PF_MEMALLOC contexts could 178 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
147 * completely exhaust the page allocator. __GFP_NOMEMALLOC 179 * clear SWAP_HAS_CACHE flag.
148 * stops emergency reserves from being allocated.
149 *
150 * TODO: this could cause a theoretical memory reclaim
151 * deadlock in the swap out path.
152 */
153 /*
154 * Add it to the swap cache and mark it dirty
155 */ 180 */
156 err = add_to_swap_cache(page, entry, 181 swapcache_free(entry, NULL);
157 __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN); 182 return 0;
158
159 switch (err) {
160 case 0: /* Success */
161 SetPageDirty(page);
162 return 1;
163 case -EEXIST:
164 /* Raced with "speculative" read_swap_cache_async */
165 swapcache_free(entry, NULL);
166 continue;
167 default:
168 /* -ENOMEM radix-tree allocation failure */
169 swapcache_free(entry, NULL);
170 return 0;
171 }
172 } 183 }
173} 184}
174 185
@@ -290,26 +301,31 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
290 } 301 }
291 302
292 /* 303 /*
304 * call radix_tree_preload() while we can wait.
305 */
306 err = radix_tree_preload(gfp_mask & GFP_KERNEL);
307 if (err)
308 break;
309
310 /*
293 * Swap entry may have been freed since our caller observed it. 311 * Swap entry may have been freed since our caller observed it.
294 */ 312 */
295 err = swapcache_prepare(entry); 313 err = swapcache_prepare(entry);
296 if (err == -EEXIST) /* seems racy */ 314 if (err == -EEXIST) { /* seems racy */
315 radix_tree_preload_end();
297 continue; 316 continue;
298 if (err) /* swp entry is obsolete ? */ 317 }
318 if (err) { /* swp entry is obsolete ? */
319 radix_tree_preload_end();
299 break; 320 break;
321 }
300 322
301 /* 323 /* May fail (-ENOMEM) if radix-tree node allocation failed. */
302 * Associate the page with swap entry in the swap cache.
303 * May fail (-EEXIST) if there is already a page associated
304 * with this entry in the swap cache: added by a racing
305 * read_swap_cache_async, or add_to_swap or shmem_writepage
306 * re-using the just freed swap entry for an existing page.
307 * May fail (-ENOMEM) if radix-tree node allocation failed.
308 */
309 __set_page_locked(new_page); 324 __set_page_locked(new_page);
310 SetPageSwapBacked(new_page); 325 SetPageSwapBacked(new_page);
311 err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL); 326 err = __add_to_swap_cache(new_page, entry);
312 if (likely(!err)) { 327 if (likely(!err)) {
328 radix_tree_preload_end();
313 /* 329 /*
314 * Initiate read into locked page and return. 330 * Initiate read into locked page and return.
315 */ 331 */
@@ -317,8 +333,13 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
317 swap_readpage(new_page); 333 swap_readpage(new_page);
318 return new_page; 334 return new_page;
319 } 335 }
336 radix_tree_preload_end();
320 ClearPageSwapBacked(new_page); 337 ClearPageSwapBacked(new_page);
321 __clear_page_locked(new_page); 338 __clear_page_locked(new_page);
339 /*
340 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
341 * clear SWAP_HAS_CACHE flag.
342 */
322 swapcache_free(entry, NULL); 343 swapcache_free(entry, NULL);
323 } while (err != -ENOMEM); 344 } while (err != -ENOMEM);
324 345
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 74f1102e874..f1bf19daadc 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1575,9 +1575,9 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1575 p->flags &= ~SWP_WRITEOK; 1575 p->flags &= ~SWP_WRITEOK;
1576 spin_unlock(&swap_lock); 1576 spin_unlock(&swap_lock);
1577 1577
1578 current->flags |= PF_SWAPOFF; 1578 current->flags |= PF_OOM_ORIGIN;
1579 err = try_to_unuse(type); 1579 err = try_to_unuse(type);
1580 current->flags &= ~PF_SWAPOFF; 1580 current->flags &= ~PF_OOM_ORIGIN;
1581 1581
1582 if (err) { 1582 if (err) {
1583 /* re-insert swap space back into swap_list */ 1583 /* re-insert swap space back into swap_list */
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 204b8243d8a..5535da1d696 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -25,7 +25,7 @@
25#include <linux/rcupdate.h> 25#include <linux/rcupdate.h>
26#include <linux/pfn.h> 26#include <linux/pfn.h>
27#include <linux/kmemleak.h> 27#include <linux/kmemleak.h>
28 28#include <linux/highmem.h>
29#include <asm/atomic.h> 29#include <asm/atomic.h>
30#include <asm/uaccess.h> 30#include <asm/uaccess.h>
31#include <asm/tlbflush.h> 31#include <asm/tlbflush.h>
@@ -168,11 +168,9 @@ static int vmap_page_range_noflush(unsigned long start, unsigned long end,
168 next = pgd_addr_end(addr, end); 168 next = pgd_addr_end(addr, end);
169 err = vmap_pud_range(pgd, addr, next, prot, pages, &nr); 169 err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
170 if (err) 170 if (err)
171 break; 171 return err;
172 } while (pgd++, addr = next, addr != end); 172 } while (pgd++, addr = next, addr != end);
173 173
174 if (unlikely(err))
175 return err;
176 return nr; 174 return nr;
177} 175}
178 176
@@ -1272,17 +1270,21 @@ struct vm_struct *remove_vm_area(const void *addr)
1272 if (va && va->flags & VM_VM_AREA) { 1270 if (va && va->flags & VM_VM_AREA) {
1273 struct vm_struct *vm = va->private; 1271 struct vm_struct *vm = va->private;
1274 struct vm_struct *tmp, **p; 1272 struct vm_struct *tmp, **p;
1275 1273 /*
1276 vmap_debug_free_range(va->va_start, va->va_end); 1274 * remove from list and disallow access to this vm_struct
1277 free_unmap_vmap_area(va); 1275 * before unmap. (address range confliction is maintained by
1278 vm->size -= PAGE_SIZE; 1276 * vmap.)
1279 1277 */
1280 write_lock(&vmlist_lock); 1278 write_lock(&vmlist_lock);
1281 for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next) 1279 for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next)
1282 ; 1280 ;
1283 *p = tmp->next; 1281 *p = tmp->next;
1284 write_unlock(&vmlist_lock); 1282 write_unlock(&vmlist_lock);
1285 1283
1284 vmap_debug_free_range(va->va_start, va->va_end);
1285 free_unmap_vmap_area(va);
1286 vm->size -= PAGE_SIZE;
1287
1286 return vm; 1288 return vm;
1287 } 1289 }
1288 return NULL; 1290 return NULL;
@@ -1384,7 +1386,7 @@ void *vmap(struct page **pages, unsigned int count,
1384 1386
1385 might_sleep(); 1387 might_sleep();
1386 1388
1387 if (count > num_physpages) 1389 if (count > totalram_pages)
1388 return NULL; 1390 return NULL;
1389 1391
1390 area = get_vm_area_caller((count << PAGE_SHIFT), flags, 1392 area = get_vm_area_caller((count << PAGE_SHIFT), flags,
@@ -1491,7 +1493,7 @@ static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
1491 unsigned long real_size = size; 1493 unsigned long real_size = size;
1492 1494
1493 size = PAGE_ALIGN(size); 1495 size = PAGE_ALIGN(size);
1494 if (!size || (size >> PAGE_SHIFT) > num_physpages) 1496 if (!size || (size >> PAGE_SHIFT) > totalram_pages)
1495 return NULL; 1497 return NULL;
1496 1498
1497 area = __get_vm_area_node(size, VM_ALLOC, VMALLOC_START, VMALLOC_END, 1499 area = __get_vm_area_node(size, VM_ALLOC, VMALLOC_START, VMALLOC_END,
@@ -1641,10 +1643,120 @@ void *vmalloc_32_user(unsigned long size)
1641} 1643}
1642EXPORT_SYMBOL(vmalloc_32_user); 1644EXPORT_SYMBOL(vmalloc_32_user);
1643 1645
1646/*
1647 * small helper routine , copy contents to buf from addr.
1648 * If the page is not present, fill zero.
1649 */
1650
1651static int aligned_vread(char *buf, char *addr, unsigned long count)
1652{
1653 struct page *p;
1654 int copied = 0;
1655
1656 while (count) {
1657 unsigned long offset, length;
1658
1659 offset = (unsigned long)addr & ~PAGE_MASK;
1660 length = PAGE_SIZE - offset;
1661 if (length > count)
1662 length = count;
1663 p = vmalloc_to_page(addr);
1664 /*
1665 * To do safe access to this _mapped_ area, we need
1666 * lock. But adding lock here means that we need to add
1667 * overhead of vmalloc()/vfree() calles for this _debug_
1668 * interface, rarely used. Instead of that, we'll use
1669 * kmap() and get small overhead in this access function.
1670 */
1671 if (p) {
1672 /*
1673 * we can expect USER0 is not used (see vread/vwrite's
1674 * function description)
1675 */
1676 void *map = kmap_atomic(p, KM_USER0);
1677 memcpy(buf, map + offset, length);
1678 kunmap_atomic(map, KM_USER0);
1679 } else
1680 memset(buf, 0, length);
1681
1682 addr += length;
1683 buf += length;
1684 copied += length;
1685 count -= length;
1686 }
1687 return copied;
1688}
1689
1690static int aligned_vwrite(char *buf, char *addr, unsigned long count)
1691{
1692 struct page *p;
1693 int copied = 0;
1694
1695 while (count) {
1696 unsigned long offset, length;
1697
1698 offset = (unsigned long)addr & ~PAGE_MASK;
1699 length = PAGE_SIZE - offset;
1700 if (length > count)
1701 length = count;
1702 p = vmalloc_to_page(addr);
1703 /*
1704 * To do safe access to this _mapped_ area, we need
1705 * lock. But adding lock here means that we need to add
1706 * overhead of vmalloc()/vfree() calles for this _debug_
1707 * interface, rarely used. Instead of that, we'll use
1708 * kmap() and get small overhead in this access function.
1709 */
1710 if (p) {
1711 /*
1712 * we can expect USER0 is not used (see vread/vwrite's
1713 * function description)
1714 */
1715 void *map = kmap_atomic(p, KM_USER0);
1716 memcpy(map + offset, buf, length);
1717 kunmap_atomic(map, KM_USER0);
1718 }
1719 addr += length;
1720 buf += length;
1721 copied += length;
1722 count -= length;
1723 }
1724 return copied;
1725}
1726
1727/**
1728 * vread() - read vmalloc area in a safe way.
1729 * @buf: buffer for reading data
1730 * @addr: vm address.
1731 * @count: number of bytes to be read.
1732 *
1733 * Returns # of bytes which addr and buf should be increased.
1734 * (same number to @count). Returns 0 if [addr...addr+count) doesn't
1735 * includes any intersect with alive vmalloc area.
1736 *
1737 * This function checks that addr is a valid vmalloc'ed area, and
1738 * copy data from that area to a given buffer. If the given memory range
1739 * of [addr...addr+count) includes some valid address, data is copied to
1740 * proper area of @buf. If there are memory holes, they'll be zero-filled.
1741 * IOREMAP area is treated as memory hole and no copy is done.
1742 *
1743 * If [addr...addr+count) doesn't includes any intersects with alive
1744 * vm_struct area, returns 0.
1745 * @buf should be kernel's buffer. Because this function uses KM_USER0,
1746 * the caller should guarantee KM_USER0 is not used.
1747 *
1748 * Note: In usual ops, vread() is never necessary because the caller
1749 * should know vmalloc() area is valid and can use memcpy().
1750 * This is for routines which have to access vmalloc area without
1751 * any informaion, as /dev/kmem.
1752 *
1753 */
1754
1644long vread(char *buf, char *addr, unsigned long count) 1755long vread(char *buf, char *addr, unsigned long count)
1645{ 1756{
1646 struct vm_struct *tmp; 1757 struct vm_struct *tmp;
1647 char *vaddr, *buf_start = buf; 1758 char *vaddr, *buf_start = buf;
1759 unsigned long buflen = count;
1648 unsigned long n; 1760 unsigned long n;
1649 1761
1650 /* Don't allow overflow */ 1762 /* Don't allow overflow */
@@ -1652,7 +1764,7 @@ long vread(char *buf, char *addr, unsigned long count)
1652 count = -(unsigned long) addr; 1764 count = -(unsigned long) addr;
1653 1765
1654 read_lock(&vmlist_lock); 1766 read_lock(&vmlist_lock);
1655 for (tmp = vmlist; tmp; tmp = tmp->next) { 1767 for (tmp = vmlist; count && tmp; tmp = tmp->next) {
1656 vaddr = (char *) tmp->addr; 1768 vaddr = (char *) tmp->addr;
1657 if (addr >= vaddr + tmp->size - PAGE_SIZE) 1769 if (addr >= vaddr + tmp->size - PAGE_SIZE)
1658 continue; 1770 continue;
@@ -1665,32 +1777,72 @@ long vread(char *buf, char *addr, unsigned long count)
1665 count--; 1777 count--;
1666 } 1778 }
1667 n = vaddr + tmp->size - PAGE_SIZE - addr; 1779 n = vaddr + tmp->size - PAGE_SIZE - addr;
1668 do { 1780 if (n > count)
1669 if (count == 0) 1781 n = count;
1670 goto finished; 1782 if (!(tmp->flags & VM_IOREMAP))
1671 *buf = *addr; 1783 aligned_vread(buf, addr, n);
1672 buf++; 1784 else /* IOREMAP area is treated as memory hole */
1673 addr++; 1785 memset(buf, 0, n);
1674 count--; 1786 buf += n;
1675 } while (--n > 0); 1787 addr += n;
1788 count -= n;
1676 } 1789 }
1677finished: 1790finished:
1678 read_unlock(&vmlist_lock); 1791 read_unlock(&vmlist_lock);
1679 return buf - buf_start; 1792
1793 if (buf == buf_start)
1794 return 0;
1795 /* zero-fill memory holes */
1796 if (buf != buf_start + buflen)
1797 memset(buf, 0, buflen - (buf - buf_start));
1798
1799 return buflen;
1680} 1800}
1681 1801
1802/**
1803 * vwrite() - write vmalloc area in a safe way.
1804 * @buf: buffer for source data
1805 * @addr: vm address.
1806 * @count: number of bytes to be read.
1807 *
1808 * Returns # of bytes which addr and buf should be incresed.
1809 * (same number to @count).
1810 * If [addr...addr+count) doesn't includes any intersect with valid
1811 * vmalloc area, returns 0.
1812 *
1813 * This function checks that addr is a valid vmalloc'ed area, and
1814 * copy data from a buffer to the given addr. If specified range of
1815 * [addr...addr+count) includes some valid address, data is copied from
1816 * proper area of @buf. If there are memory holes, no copy to hole.
1817 * IOREMAP area is treated as memory hole and no copy is done.
1818 *
1819 * If [addr...addr+count) doesn't includes any intersects with alive
1820 * vm_struct area, returns 0.
1821 * @buf should be kernel's buffer. Because this function uses KM_USER0,
1822 * the caller should guarantee KM_USER0 is not used.
1823 *
1824 * Note: In usual ops, vwrite() is never necessary because the caller
1825 * should know vmalloc() area is valid and can use memcpy().
1826 * This is for routines which have to access vmalloc area without
1827 * any informaion, as /dev/kmem.
1828 *
1829 * The caller should guarantee KM_USER1 is not used.
1830 */
1831
1682long vwrite(char *buf, char *addr, unsigned long count) 1832long vwrite(char *buf, char *addr, unsigned long count)
1683{ 1833{
1684 struct vm_struct *tmp; 1834 struct vm_struct *tmp;
1685 char *vaddr, *buf_start = buf; 1835 char *vaddr;
1686 unsigned long n; 1836 unsigned long n, buflen;
1837 int copied = 0;
1687 1838
1688 /* Don't allow overflow */ 1839 /* Don't allow overflow */
1689 if ((unsigned long) addr + count < count) 1840 if ((unsigned long) addr + count < count)
1690 count = -(unsigned long) addr; 1841 count = -(unsigned long) addr;
1842 buflen = count;
1691 1843
1692 read_lock(&vmlist_lock); 1844 read_lock(&vmlist_lock);
1693 for (tmp = vmlist; tmp; tmp = tmp->next) { 1845 for (tmp = vmlist; count && tmp; tmp = tmp->next) {
1694 vaddr = (char *) tmp->addr; 1846 vaddr = (char *) tmp->addr;
1695 if (addr >= vaddr + tmp->size - PAGE_SIZE) 1847 if (addr >= vaddr + tmp->size - PAGE_SIZE)
1696 continue; 1848 continue;
@@ -1702,18 +1854,21 @@ long vwrite(char *buf, char *addr, unsigned long count)
1702 count--; 1854 count--;
1703 } 1855 }
1704 n = vaddr + tmp->size - PAGE_SIZE - addr; 1856 n = vaddr + tmp->size - PAGE_SIZE - addr;
1705 do { 1857 if (n > count)
1706 if (count == 0) 1858 n = count;
1707 goto finished; 1859 if (!(tmp->flags & VM_IOREMAP)) {
1708 *addr = *buf; 1860 aligned_vwrite(buf, addr, n);
1709 buf++; 1861 copied++;
1710 addr++; 1862 }
1711 count--; 1863 buf += n;
1712 } while (--n > 0); 1864 addr += n;
1865 count -= n;
1713 } 1866 }
1714finished: 1867finished:
1715 read_unlock(&vmlist_lock); 1868 read_unlock(&vmlist_lock);
1716 return buf - buf_start; 1869 if (!copied)
1870 return 0;
1871 return buflen;
1717} 1872}
1718 1873
1719/** 1874/**
diff --git a/mm/vmscan.c b/mm/vmscan.c
index ba8228e0a80..613e89f471d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -148,8 +148,8 @@ static struct zone_reclaim_stat *get_reclaim_stat(struct zone *zone,
148 return &zone->reclaim_stat; 148 return &zone->reclaim_stat;
149} 149}
150 150
151static unsigned long zone_nr_pages(struct zone *zone, struct scan_control *sc, 151static unsigned long zone_nr_lru_pages(struct zone *zone,
152 enum lru_list lru) 152 struct scan_control *sc, enum lru_list lru)
153{ 153{
154 if (!scanning_global_lru(sc)) 154 if (!scanning_global_lru(sc))
155 return mem_cgroup_zone_nr_pages(sc->mem_cgroup, zone, lru); 155 return mem_cgroup_zone_nr_pages(sc->mem_cgroup, zone, lru);
@@ -286,7 +286,12 @@ static inline int page_mapping_inuse(struct page *page)
286 286
287static inline int is_page_cache_freeable(struct page *page) 287static inline int is_page_cache_freeable(struct page *page)
288{ 288{
289 return page_count(page) - !!page_has_private(page) == 2; 289 /*
290 * A freeable page cache page is referenced only by the caller
291 * that isolated the page, the page cache radix tree and
292 * optional buffer heads at page->private.
293 */
294 return page_count(page) - page_has_private(page) == 2;
290} 295}
291 296
292static int may_write_to_queue(struct backing_dev_info *bdi) 297static int may_write_to_queue(struct backing_dev_info *bdi)
@@ -361,7 +366,6 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
361 * block, for some throttling. This happens by accident, because 366 * block, for some throttling. This happens by accident, because
362 * swap_backing_dev_info is bust: it doesn't reflect the 367 * swap_backing_dev_info is bust: it doesn't reflect the
363 * congestion state of the swapdevs. Easy to fix, if needed. 368 * congestion state of the swapdevs. Easy to fix, if needed.
364 * See swapfile.c:page_queue_congested().
365 */ 369 */
366 if (!is_page_cache_freeable(page)) 370 if (!is_page_cache_freeable(page))
367 return PAGE_KEEP; 371 return PAGE_KEEP;
@@ -531,7 +535,7 @@ redo:
531 * unevictable page on [in]active list. 535 * unevictable page on [in]active list.
532 * We know how to handle that. 536 * We know how to handle that.
533 */ 537 */
534 lru = active + page_is_file_cache(page); 538 lru = active + page_lru_base_type(page);
535 lru_cache_add_lru(page, lru); 539 lru_cache_add_lru(page, lru);
536 } else { 540 } else {
537 /* 541 /*
@@ -821,7 +825,7 @@ int __isolate_lru_page(struct page *page, int mode, int file)
821 if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode)) 825 if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode))
822 return ret; 826 return ret;
823 827
824 if (mode != ISOLATE_BOTH && (!page_is_file_cache(page) != !file)) 828 if (mode != ISOLATE_BOTH && page_is_file_cache(page) != file)
825 return ret; 829 return ret;
826 830
827 /* 831 /*
@@ -935,6 +939,16 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
935 /* Check that we have not crossed a zone boundary. */ 939 /* Check that we have not crossed a zone boundary. */
936 if (unlikely(page_zone_id(cursor_page) != zone_id)) 940 if (unlikely(page_zone_id(cursor_page) != zone_id))
937 continue; 941 continue;
942
943 /*
944 * If we don't have enough swap space, reclaiming of
945 * anon page which don't already have a swap slot is
946 * pointless.
947 */
948 if (nr_swap_pages <= 0 && PageAnon(cursor_page) &&
949 !PageSwapCache(cursor_page))
950 continue;
951
938 if (__isolate_lru_page(cursor_page, mode, file) == 0) { 952 if (__isolate_lru_page(cursor_page, mode, file) == 0) {
939 list_move(&cursor_page->lru, dst); 953 list_move(&cursor_page->lru, dst);
940 mem_cgroup_del_lru(cursor_page); 954 mem_cgroup_del_lru(cursor_page);
@@ -961,7 +975,7 @@ static unsigned long isolate_pages_global(unsigned long nr,
961 if (file) 975 if (file)
962 lru += LRU_FILE; 976 lru += LRU_FILE;
963 return isolate_lru_pages(nr, &z->lru[lru].list, dst, scanned, order, 977 return isolate_lru_pages(nr, &z->lru[lru].list, dst, scanned, order,
964 mode, !!file); 978 mode, file);
965} 979}
966 980
967/* 981/*
@@ -976,7 +990,7 @@ static unsigned long clear_active_flags(struct list_head *page_list,
976 struct page *page; 990 struct page *page;
977 991
978 list_for_each_entry(page, page_list, lru) { 992 list_for_each_entry(page, page_list, lru) {
979 lru = page_is_file_cache(page); 993 lru = page_lru_base_type(page);
980 if (PageActive(page)) { 994 if (PageActive(page)) {
981 lru += LRU_ACTIVE; 995 lru += LRU_ACTIVE;
982 ClearPageActive(page); 996 ClearPageActive(page);
@@ -1034,6 +1048,31 @@ int isolate_lru_page(struct page *page)
1034} 1048}
1035 1049
1036/* 1050/*
1051 * Are there way too many processes in the direct reclaim path already?
1052 */
1053static int too_many_isolated(struct zone *zone, int file,
1054 struct scan_control *sc)
1055{
1056 unsigned long inactive, isolated;
1057
1058 if (current_is_kswapd())
1059 return 0;
1060
1061 if (!scanning_global_lru(sc))
1062 return 0;
1063
1064 if (file) {
1065 inactive = zone_page_state(zone, NR_INACTIVE_FILE);
1066 isolated = zone_page_state(zone, NR_ISOLATED_FILE);
1067 } else {
1068 inactive = zone_page_state(zone, NR_INACTIVE_ANON);
1069 isolated = zone_page_state(zone, NR_ISOLATED_ANON);
1070 }
1071
1072 return isolated > inactive;
1073}
1074
1075/*
1037 * shrink_inactive_list() is a helper for shrink_zone(). It returns the number 1076 * shrink_inactive_list() is a helper for shrink_zone(). It returns the number
1038 * of reclaimed pages 1077 * of reclaimed pages
1039 */ 1078 */
@@ -1048,6 +1087,14 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
1048 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); 1087 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
1049 int lumpy_reclaim = 0; 1088 int lumpy_reclaim = 0;
1050 1089
1090 while (unlikely(too_many_isolated(zone, file, sc))) {
1091 congestion_wait(WRITE, HZ/10);
1092
1093 /* We are about to die and free our memory. Return now. */
1094 if (fatal_signal_pending(current))
1095 return SWAP_CLUSTER_MAX;
1096 }
1097
1051 /* 1098 /*
1052 * If we need a large contiguous chunk of memory, or have 1099 * If we need a large contiguous chunk of memory, or have
1053 * trouble getting a small set of contiguous pages, we 1100 * trouble getting a small set of contiguous pages, we
@@ -1072,10 +1119,26 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
1072 unsigned long nr_active; 1119 unsigned long nr_active;
1073 unsigned int count[NR_LRU_LISTS] = { 0, }; 1120 unsigned int count[NR_LRU_LISTS] = { 0, };
1074 int mode = lumpy_reclaim ? ISOLATE_BOTH : ISOLATE_INACTIVE; 1121 int mode = lumpy_reclaim ? ISOLATE_BOTH : ISOLATE_INACTIVE;
1122 unsigned long nr_anon;
1123 unsigned long nr_file;
1075 1124
1076 nr_taken = sc->isolate_pages(sc->swap_cluster_max, 1125 nr_taken = sc->isolate_pages(sc->swap_cluster_max,
1077 &page_list, &nr_scan, sc->order, mode, 1126 &page_list, &nr_scan, sc->order, mode,
1078 zone, sc->mem_cgroup, 0, file); 1127 zone, sc->mem_cgroup, 0, file);
1128
1129 if (scanning_global_lru(sc)) {
1130 zone->pages_scanned += nr_scan;
1131 if (current_is_kswapd())
1132 __count_zone_vm_events(PGSCAN_KSWAPD, zone,
1133 nr_scan);
1134 else
1135 __count_zone_vm_events(PGSCAN_DIRECT, zone,
1136 nr_scan);
1137 }
1138
1139 if (nr_taken == 0)
1140 goto done;
1141
1079 nr_active = clear_active_flags(&page_list, count); 1142 nr_active = clear_active_flags(&page_list, count);
1080 __count_vm_events(PGDEACTIVATE, nr_active); 1143 __count_vm_events(PGDEACTIVATE, nr_active);
1081 1144
@@ -1088,8 +1151,10 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
1088 __mod_zone_page_state(zone, NR_INACTIVE_ANON, 1151 __mod_zone_page_state(zone, NR_INACTIVE_ANON,
1089 -count[LRU_INACTIVE_ANON]); 1152 -count[LRU_INACTIVE_ANON]);
1090 1153
1091 if (scanning_global_lru(sc)) 1154 nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];
1092 zone->pages_scanned += nr_scan; 1155 nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];
1156 __mod_zone_page_state(zone, NR_ISOLATED_ANON, nr_anon);
1157 __mod_zone_page_state(zone, NR_ISOLATED_FILE, nr_file);
1093 1158
1094 reclaim_stat->recent_scanned[0] += count[LRU_INACTIVE_ANON]; 1159 reclaim_stat->recent_scanned[0] += count[LRU_INACTIVE_ANON];
1095 reclaim_stat->recent_scanned[0] += count[LRU_ACTIVE_ANON]; 1160 reclaim_stat->recent_scanned[0] += count[LRU_ACTIVE_ANON];
@@ -1123,18 +1188,12 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
1123 } 1188 }
1124 1189
1125 nr_reclaimed += nr_freed; 1190 nr_reclaimed += nr_freed;
1191
1126 local_irq_disable(); 1192 local_irq_disable();
1127 if (current_is_kswapd()) { 1193 if (current_is_kswapd())
1128 __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scan);
1129 __count_vm_events(KSWAPD_STEAL, nr_freed); 1194 __count_vm_events(KSWAPD_STEAL, nr_freed);
1130 } else if (scanning_global_lru(sc))
1131 __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scan);
1132
1133 __count_zone_vm_events(PGSTEAL, zone, nr_freed); 1195 __count_zone_vm_events(PGSTEAL, zone, nr_freed);
1134 1196
1135 if (nr_taken == 0)
1136 goto done;
1137
1138 spin_lock(&zone->lru_lock); 1197 spin_lock(&zone->lru_lock);
1139 /* 1198 /*
1140 * Put back any unfreeable pages. 1199 * Put back any unfreeable pages.
@@ -1153,8 +1212,8 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
1153 SetPageLRU(page); 1212 SetPageLRU(page);
1154 lru = page_lru(page); 1213 lru = page_lru(page);
1155 add_page_to_lru_list(zone, page, lru); 1214 add_page_to_lru_list(zone, page, lru);
1156 if (PageActive(page)) { 1215 if (is_active_lru(lru)) {
1157 int file = !!page_is_file_cache(page); 1216 int file = is_file_lru(lru);
1158 reclaim_stat->recent_rotated[file]++; 1217 reclaim_stat->recent_rotated[file]++;
1159 } 1218 }
1160 if (!pagevec_add(&pvec, page)) { 1219 if (!pagevec_add(&pvec, page)) {
@@ -1163,10 +1222,13 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
1163 spin_lock_irq(&zone->lru_lock); 1222 spin_lock_irq(&zone->lru_lock);
1164 } 1223 }
1165 } 1224 }
1225 __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
1226 __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
1227
1166 } while (nr_scanned < max_scan); 1228 } while (nr_scanned < max_scan);
1167 spin_unlock(&zone->lru_lock); 1229
1168done: 1230done:
1169 local_irq_enable(); 1231 spin_unlock_irq(&zone->lru_lock);
1170 pagevec_release(&pvec); 1232 pagevec_release(&pvec);
1171 return nr_reclaimed; 1233 return nr_reclaimed;
1172} 1234}
@@ -1215,15 +1277,10 @@ static void move_active_pages_to_lru(struct zone *zone,
1215 1277
1216 while (!list_empty(list)) { 1278 while (!list_empty(list)) {
1217 page = lru_to_page(list); 1279 page = lru_to_page(list);
1218 prefetchw_prev_lru_page(page, list, flags);
1219 1280
1220 VM_BUG_ON(PageLRU(page)); 1281 VM_BUG_ON(PageLRU(page));
1221 SetPageLRU(page); 1282 SetPageLRU(page);
1222 1283
1223 VM_BUG_ON(!PageActive(page));
1224 if (!is_active_lru(lru))
1225 ClearPageActive(page); /* we are de-activating */
1226
1227 list_move(&page->lru, &zone->lru[lru].list); 1284 list_move(&page->lru, &zone->lru[lru].list);
1228 mem_cgroup_add_lru_list(page, lru); 1285 mem_cgroup_add_lru_list(page, lru);
1229 pgmoved++; 1286 pgmoved++;
@@ -1244,7 +1301,7 @@ static void move_active_pages_to_lru(struct zone *zone,
1244static void shrink_active_list(unsigned long nr_pages, struct zone *zone, 1301static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1245 struct scan_control *sc, int priority, int file) 1302 struct scan_control *sc, int priority, int file)
1246{ 1303{
1247 unsigned long pgmoved; 1304 unsigned long nr_taken;
1248 unsigned long pgscanned; 1305 unsigned long pgscanned;
1249 unsigned long vm_flags; 1306 unsigned long vm_flags;
1250 LIST_HEAD(l_hold); /* The pages which were snipped off */ 1307 LIST_HEAD(l_hold); /* The pages which were snipped off */
@@ -1252,10 +1309,11 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1252 LIST_HEAD(l_inactive); 1309 LIST_HEAD(l_inactive);
1253 struct page *page; 1310 struct page *page;
1254 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); 1311 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
1312 unsigned long nr_rotated = 0;
1255 1313
1256 lru_add_drain(); 1314 lru_add_drain();
1257 spin_lock_irq(&zone->lru_lock); 1315 spin_lock_irq(&zone->lru_lock);
1258 pgmoved = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order, 1316 nr_taken = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order,
1259 ISOLATE_ACTIVE, zone, 1317 ISOLATE_ACTIVE, zone,
1260 sc->mem_cgroup, 1, file); 1318 sc->mem_cgroup, 1, file);
1261 /* 1319 /*
@@ -1265,16 +1323,16 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1265 if (scanning_global_lru(sc)) { 1323 if (scanning_global_lru(sc)) {
1266 zone->pages_scanned += pgscanned; 1324 zone->pages_scanned += pgscanned;
1267 } 1325 }
1268 reclaim_stat->recent_scanned[!!file] += pgmoved; 1326 reclaim_stat->recent_scanned[file] += nr_taken;
1269 1327
1270 __count_zone_vm_events(PGREFILL, zone, pgscanned); 1328 __count_zone_vm_events(PGREFILL, zone, pgscanned);
1271 if (file) 1329 if (file)
1272 __mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved); 1330 __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken);
1273 else 1331 else
1274 __mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved); 1332 __mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken);
1333 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
1275 spin_unlock_irq(&zone->lru_lock); 1334 spin_unlock_irq(&zone->lru_lock);
1276 1335
1277 pgmoved = 0; /* count referenced (mapping) mapped pages */
1278 while (!list_empty(&l_hold)) { 1336 while (!list_empty(&l_hold)) {
1279 cond_resched(); 1337 cond_resched();
1280 page = lru_to_page(&l_hold); 1338 page = lru_to_page(&l_hold);
@@ -1288,7 +1346,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1288 /* page_referenced clears PageReferenced */ 1346 /* page_referenced clears PageReferenced */
1289 if (page_mapping_inuse(page) && 1347 if (page_mapping_inuse(page) &&
1290 page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) { 1348 page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) {
1291 pgmoved++; 1349 nr_rotated++;
1292 /* 1350 /*
1293 * Identify referenced, file-backed active pages and 1351 * Identify referenced, file-backed active pages and
1294 * give them one more trip around the active list. So 1352 * give them one more trip around the active list. So
@@ -1304,6 +1362,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1304 } 1362 }
1305 } 1363 }
1306 1364
1365 ClearPageActive(page); /* we are de-activating */
1307 list_add(&page->lru, &l_inactive); 1366 list_add(&page->lru, &l_inactive);
1308 } 1367 }
1309 1368
@@ -1317,13 +1376,13 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1317 * helps balance scan pressure between file and anonymous pages in 1376 * helps balance scan pressure between file and anonymous pages in
1318 * get_scan_ratio. 1377 * get_scan_ratio.
1319 */ 1378 */
1320 reclaim_stat->recent_rotated[!!file] += pgmoved; 1379 reclaim_stat->recent_rotated[file] += nr_rotated;
1321 1380
1322 move_active_pages_to_lru(zone, &l_active, 1381 move_active_pages_to_lru(zone, &l_active,
1323 LRU_ACTIVE + file * LRU_FILE); 1382 LRU_ACTIVE + file * LRU_FILE);
1324 move_active_pages_to_lru(zone, &l_inactive, 1383 move_active_pages_to_lru(zone, &l_inactive,
1325 LRU_BASE + file * LRU_FILE); 1384 LRU_BASE + file * LRU_FILE);
1326 1385 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
1327 spin_unlock_irq(&zone->lru_lock); 1386 spin_unlock_irq(&zone->lru_lock);
1328} 1387}
1329 1388
@@ -1429,10 +1488,10 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
1429 unsigned long ap, fp; 1488 unsigned long ap, fp;
1430 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); 1489 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
1431 1490
1432 anon = zone_nr_pages(zone, sc, LRU_ACTIVE_ANON) + 1491 anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
1433 zone_nr_pages(zone, sc, LRU_INACTIVE_ANON); 1492 zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
1434 file = zone_nr_pages(zone, sc, LRU_ACTIVE_FILE) + 1493 file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) +
1435 zone_nr_pages(zone, sc, LRU_INACTIVE_FILE); 1494 zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
1436 1495
1437 if (scanning_global_lru(sc)) { 1496 if (scanning_global_lru(sc)) {
1438 free = zone_page_state(zone, NR_FREE_PAGES); 1497 free = zone_page_state(zone, NR_FREE_PAGES);
@@ -1526,6 +1585,7 @@ static void shrink_zone(int priority, struct zone *zone,
1526 enum lru_list l; 1585 enum lru_list l;
1527 unsigned long nr_reclaimed = sc->nr_reclaimed; 1586 unsigned long nr_reclaimed = sc->nr_reclaimed;
1528 unsigned long swap_cluster_max = sc->swap_cluster_max; 1587 unsigned long swap_cluster_max = sc->swap_cluster_max;
1588 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
1529 int noswap = 0; 1589 int noswap = 0;
1530 1590
1531 /* If we have no swap space, do not bother scanning anon pages. */ 1591 /* If we have no swap space, do not bother scanning anon pages. */
@@ -1540,17 +1600,14 @@ static void shrink_zone(int priority, struct zone *zone,
1540 int file = is_file_lru(l); 1600 int file = is_file_lru(l);
1541 unsigned long scan; 1601 unsigned long scan;
1542 1602
1543 scan = zone_nr_pages(zone, sc, l); 1603 scan = zone_nr_lru_pages(zone, sc, l);
1544 if (priority || noswap) { 1604 if (priority || noswap) {
1545 scan >>= priority; 1605 scan >>= priority;
1546 scan = (scan * percent[file]) / 100; 1606 scan = (scan * percent[file]) / 100;
1547 } 1607 }
1548 if (scanning_global_lru(sc)) 1608 nr[l] = nr_scan_try_batch(scan,
1549 nr[l] = nr_scan_try_batch(scan, 1609 &reclaim_stat->nr_saved_scan[l],
1550 &zone->lru[l].nr_saved_scan, 1610 swap_cluster_max);
1551 swap_cluster_max);
1552 else
1553 nr[l] = scan;
1554 } 1611 }
1555 1612
1556 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || 1613 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
@@ -1685,7 +1742,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1685 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) 1742 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1686 continue; 1743 continue;
1687 1744
1688 lru_pages += zone_lru_pages(zone); 1745 lru_pages += zone_reclaimable_pages(zone);
1689 } 1746 }
1690 } 1747 }
1691 1748
@@ -1902,7 +1959,7 @@ loop_again:
1902 for (i = 0; i <= end_zone; i++) { 1959 for (i = 0; i <= end_zone; i++) {
1903 struct zone *zone = pgdat->node_zones + i; 1960 struct zone *zone = pgdat->node_zones + i;
1904 1961
1905 lru_pages += zone_lru_pages(zone); 1962 lru_pages += zone_reclaimable_pages(zone);
1906 } 1963 }
1907 1964
1908 /* 1965 /*
@@ -1946,7 +2003,7 @@ loop_again:
1946 if (zone_is_all_unreclaimable(zone)) 2003 if (zone_is_all_unreclaimable(zone))
1947 continue; 2004 continue;
1948 if (nr_slab == 0 && zone->pages_scanned >= 2005 if (nr_slab == 0 && zone->pages_scanned >=
1949 (zone_lru_pages(zone) * 6)) 2006 (zone_reclaimable_pages(zone) * 6))
1950 zone_set_flag(zone, 2007 zone_set_flag(zone,
1951 ZONE_ALL_UNRECLAIMABLE); 2008 ZONE_ALL_UNRECLAIMABLE);
1952 /* 2009 /*
@@ -2113,12 +2170,39 @@ void wakeup_kswapd(struct zone *zone, int order)
2113 wake_up_interruptible(&pgdat->kswapd_wait); 2170 wake_up_interruptible(&pgdat->kswapd_wait);
2114} 2171}
2115 2172
2116unsigned long global_lru_pages(void) 2173/*
2174 * The reclaimable count would be mostly accurate.
2175 * The less reclaimable pages may be
2176 * - mlocked pages, which will be moved to unevictable list when encountered
2177 * - mapped pages, which may require several travels to be reclaimed
2178 * - dirty pages, which is not "instantly" reclaimable
2179 */
2180unsigned long global_reclaimable_pages(void)
2181{
2182 int nr;
2183
2184 nr = global_page_state(NR_ACTIVE_FILE) +
2185 global_page_state(NR_INACTIVE_FILE);
2186
2187 if (nr_swap_pages > 0)
2188 nr += global_page_state(NR_ACTIVE_ANON) +
2189 global_page_state(NR_INACTIVE_ANON);
2190
2191 return nr;
2192}
2193
2194unsigned long zone_reclaimable_pages(struct zone *zone)
2117{ 2195{
2118 return global_page_state(NR_ACTIVE_ANON) 2196 int nr;
2119 + global_page_state(NR_ACTIVE_FILE) 2197
2120 + global_page_state(NR_INACTIVE_ANON) 2198 nr = zone_page_state(zone, NR_ACTIVE_FILE) +
2121 + global_page_state(NR_INACTIVE_FILE); 2199 zone_page_state(zone, NR_INACTIVE_FILE);
2200
2201 if (nr_swap_pages > 0)
2202 nr += zone_page_state(zone, NR_ACTIVE_ANON) +
2203 zone_page_state(zone, NR_INACTIVE_ANON);
2204
2205 return nr;
2122} 2206}
2123 2207
2124#ifdef CONFIG_HIBERNATION 2208#ifdef CONFIG_HIBERNATION
@@ -2133,6 +2217,7 @@ static void shrink_all_zones(unsigned long nr_pages, int prio,
2133{ 2217{
2134 struct zone *zone; 2218 struct zone *zone;
2135 unsigned long nr_reclaimed = 0; 2219 unsigned long nr_reclaimed = 0;
2220 struct zone_reclaim_stat *reclaim_stat;
2136 2221
2137 for_each_populated_zone(zone) { 2222 for_each_populated_zone(zone) {
2138 enum lru_list l; 2223 enum lru_list l;
@@ -2149,11 +2234,14 @@ static void shrink_all_zones(unsigned long nr_pages, int prio,
2149 l == LRU_ACTIVE_FILE)) 2234 l == LRU_ACTIVE_FILE))
2150 continue; 2235 continue;
2151 2236
2152 zone->lru[l].nr_saved_scan += (lru_pages >> prio) + 1; 2237 reclaim_stat = get_reclaim_stat(zone, sc);
2153 if (zone->lru[l].nr_saved_scan >= nr_pages || pass > 3) { 2238 reclaim_stat->nr_saved_scan[l] +=
2239 (lru_pages >> prio) + 1;
2240 if (reclaim_stat->nr_saved_scan[l]
2241 >= nr_pages || pass > 3) {
2154 unsigned long nr_to_scan; 2242 unsigned long nr_to_scan;
2155 2243
2156 zone->lru[l].nr_saved_scan = 0; 2244 reclaim_stat->nr_saved_scan[l] = 0;
2157 nr_to_scan = min(nr_pages, lru_pages); 2245 nr_to_scan = min(nr_pages, lru_pages);
2158 nr_reclaimed += shrink_list(l, nr_to_scan, zone, 2246 nr_reclaimed += shrink_list(l, nr_to_scan, zone,
2159 sc, prio); 2247 sc, prio);
@@ -2190,7 +2278,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
2190 2278
2191 current->reclaim_state = &reclaim_state; 2279 current->reclaim_state = &reclaim_state;
2192 2280
2193 lru_pages = global_lru_pages(); 2281 lru_pages = global_reclaimable_pages();
2194 nr_slab = global_page_state(NR_SLAB_RECLAIMABLE); 2282 nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);
2195 /* If slab caches are huge, it's better to hit them first */ 2283 /* If slab caches are huge, it's better to hit them first */
2196 while (nr_slab >= lru_pages) { 2284 while (nr_slab >= lru_pages) {
@@ -2232,7 +2320,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
2232 2320
2233 reclaim_state.reclaimed_slab = 0; 2321 reclaim_state.reclaimed_slab = 0;
2234 shrink_slab(sc.nr_scanned, sc.gfp_mask, 2322 shrink_slab(sc.nr_scanned, sc.gfp_mask,
2235 global_lru_pages()); 2323 global_reclaimable_pages());
2236 sc.nr_reclaimed += reclaim_state.reclaimed_slab; 2324 sc.nr_reclaimed += reclaim_state.reclaimed_slab;
2237 if (sc.nr_reclaimed >= nr_pages) 2325 if (sc.nr_reclaimed >= nr_pages)
2238 goto out; 2326 goto out;
@@ -2249,7 +2337,8 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
2249 if (!sc.nr_reclaimed) { 2337 if (!sc.nr_reclaimed) {
2250 do { 2338 do {
2251 reclaim_state.reclaimed_slab = 0; 2339 reclaim_state.reclaimed_slab = 0;
2252 shrink_slab(nr_pages, sc.gfp_mask, global_lru_pages()); 2340 shrink_slab(nr_pages, sc.gfp_mask,
2341 global_reclaimable_pages());
2253 sc.nr_reclaimed += reclaim_state.reclaimed_slab; 2342 sc.nr_reclaimed += reclaim_state.reclaimed_slab;
2254 } while (sc.nr_reclaimed < nr_pages && 2343 } while (sc.nr_reclaimed < nr_pages &&
2255 reclaim_state.reclaimed_slab > 0); 2344 reclaim_state.reclaimed_slab > 0);
@@ -2569,7 +2658,7 @@ static void check_move_unevictable_page(struct page *page, struct zone *zone)
2569retry: 2658retry:
2570 ClearPageUnevictable(page); 2659 ClearPageUnevictable(page);
2571 if (page_evictable(page, NULL)) { 2660 if (page_evictable(page, NULL)) {
2572 enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page); 2661 enum lru_list l = page_lru_base_type(page);
2573 2662
2574 __dec_zone_state(zone, NR_UNEVICTABLE); 2663 __dec_zone_state(zone, NR_UNEVICTABLE);
2575 list_move(&page->lru, &zone->lru[l].list); 2664 list_move(&page->lru, &zone->lru[l].list);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 138bed53706..c81321f9fee 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -639,11 +639,14 @@ static const char * const vmstat_text[] = {
639 "nr_slab_reclaimable", 639 "nr_slab_reclaimable",
640 "nr_slab_unreclaimable", 640 "nr_slab_unreclaimable",
641 "nr_page_table_pages", 641 "nr_page_table_pages",
642 "nr_kernel_stack",
642 "nr_unstable", 643 "nr_unstable",
643 "nr_bounce", 644 "nr_bounce",
644 "nr_vmscan_write", 645 "nr_vmscan_write",
645 "nr_writeback_temp", 646 "nr_writeback_temp",
646 647 "nr_isolated_anon",
648 "nr_isolated_file",
649 "nr_shmem",
647#ifdef CONFIG_NUMA 650#ifdef CONFIG_NUMA
648 "numa_hit", 651 "numa_hit",
649 "numa_miss", 652 "numa_miss",
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 09bedeb5579..49d8495d69b 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -577,11 +577,6 @@ static int hidp_session(void *arg)
577 } 577 }
578 578
579 if (session->hid) { 579 if (session->hid) {
580 if (session->hid->claimed & HID_CLAIMED_INPUT)
581 hidinput_disconnect(session->hid);
582 if (session->hid->claimed & HID_CLAIMED_HIDRAW)
583 hidraw_disconnect(session->hid);
584
585 hid_destroy_device(session->hid); 580 hid_destroy_device(session->hid);
586 session->hid = NULL; 581 session->hid = NULL;
587 } 582 }
@@ -747,8 +742,6 @@ static void hidp_stop(struct hid_device *hid)
747 skb_queue_purge(&session->ctrl_transmit); 742 skb_queue_purge(&session->ctrl_transmit);
748 skb_queue_purge(&session->intr_transmit); 743 skb_queue_purge(&session->intr_transmit);
749 744
750 if (hid->claimed & HID_CLAIMED_INPUT)
751 hidinput_disconnect(hid);
752 hid->claimed = 0; 745 hid->claimed = 0;
753} 746}
754 747
diff --git a/net/core/sock.c b/net/core/sock.c
index 30d5446512f..524712a7b15 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1206,12 +1206,12 @@ EXPORT_SYMBOL_GPL(sk_setup_caps);
1206 1206
1207void __init sk_init(void) 1207void __init sk_init(void)
1208{ 1208{
1209 if (num_physpages <= 4096) { 1209 if (totalram_pages <= 4096) {
1210 sysctl_wmem_max = 32767; 1210 sysctl_wmem_max = 32767;
1211 sysctl_rmem_max = 32767; 1211 sysctl_rmem_max = 32767;
1212 sysctl_wmem_default = 32767; 1212 sysctl_wmem_default = 32767;
1213 sysctl_rmem_default = 32767; 1213 sysctl_rmem_default = 32767;
1214 } else if (num_physpages >= 131072) { 1214 } else if (totalram_pages >= 131072) {
1215 sysctl_wmem_max = 131071; 1215 sysctl_wmem_max = 131071;
1216 sysctl_rmem_max = 131071; 1216 sysctl_rmem_max = 131071;
1217 } 1217 }
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 923db06c7e5..bc4467082a0 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1049,10 +1049,10 @@ static int __init dccp_init(void)
1049 * 1049 *
1050 * The methodology is similar to that of the buffer cache. 1050 * The methodology is similar to that of the buffer cache.
1051 */ 1051 */
1052 if (num_physpages >= (128 * 1024)) 1052 if (totalram_pages >= (128 * 1024))
1053 goal = num_physpages >> (21 - PAGE_SHIFT); 1053 goal = totalram_pages >> (21 - PAGE_SHIFT);
1054 else 1054 else
1055 goal = num_physpages >> (23 - PAGE_SHIFT); 1055 goal = totalram_pages >> (23 - PAGE_SHIFT);
1056 1056
1057 if (thash_entries) 1057 if (thash_entries)
1058 goal = (thash_entries * 1058 goal = (thash_entries *
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 9383d3e5a1a..57662cabaf9 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1750,7 +1750,7 @@ void __init dn_route_init(void)
1750 dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ; 1750 dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
1751 add_timer(&dn_route_timer); 1751 add_timer(&dn_route_timer);
1752 1752
1753 goal = num_physpages >> (26 - PAGE_SHIFT); 1753 goal = totalram_pages >> (26 - PAGE_SHIFT);
1754 1754
1755 for(order = 0; (1UL << order) < goal; order++) 1755 for(order = 0; (1UL << order) < goal; order++)
1756 /* NOTHING */; 1756 /* NOTHING */;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 91867d3e632..df934731453 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3414,7 +3414,7 @@ int __init ip_rt_init(void)
3414 alloc_large_system_hash("IP route cache", 3414 alloc_large_system_hash("IP route cache",
3415 sizeof(struct rt_hash_bucket), 3415 sizeof(struct rt_hash_bucket),
3416 rhash_entries, 3416 rhash_entries,
3417 (num_physpages >= 128 * 1024) ? 3417 (totalram_pages >= 128 * 1024) ?
3418 15 : 17, 3418 15 : 17,
3419 0, 3419 0,
3420 &rt_hash_log, 3420 &rt_hash_log,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 19a0612b8a2..21387ebabf0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2862,7 +2862,7 @@ void __init tcp_init(void)
2862 alloc_large_system_hash("TCP established", 2862 alloc_large_system_hash("TCP established",
2863 sizeof(struct inet_ehash_bucket), 2863 sizeof(struct inet_ehash_bucket),
2864 thash_entries, 2864 thash_entries,
2865 (num_physpages >= 128 * 1024) ? 2865 (totalram_pages >= 128 * 1024) ?
2866 13 : 15, 2866 13 : 15,
2867 0, 2867 0,
2868 &tcp_hashinfo.ehash_size, 2868 &tcp_hashinfo.ehash_size,
@@ -2879,7 +2879,7 @@ void __init tcp_init(void)
2879 alloc_large_system_hash("TCP bind", 2879 alloc_large_system_hash("TCP bind",
2880 sizeof(struct inet_bind_hashbucket), 2880 sizeof(struct inet_bind_hashbucket),
2881 tcp_hashinfo.ehash_size, 2881 tcp_hashinfo.ehash_size,
2882 (num_physpages >= 128 * 1024) ? 2882 (totalram_pages >= 128 * 1024) ?
2883 13 : 15, 2883 13 : 15,
2884 0, 2884 0,
2885 &tcp_hashinfo.bhash_size, 2885 &tcp_hashinfo.bhash_size,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index b37109817a9..7c9ec3dee96 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1245,9 +1245,9 @@ static int nf_conntrack_init_init_net(void)
1245 * machine has 512 buckets. >= 1GB machines have 16384 buckets. */ 1245 * machine has 512 buckets. >= 1GB machines have 16384 buckets. */
1246 if (!nf_conntrack_htable_size) { 1246 if (!nf_conntrack_htable_size) {
1247 nf_conntrack_htable_size 1247 nf_conntrack_htable_size
1248 = (((num_physpages << PAGE_SHIFT) / 16384) 1248 = (((totalram_pages << PAGE_SHIFT) / 16384)
1249 / sizeof(struct hlist_head)); 1249 / sizeof(struct hlist_head));
1250 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) 1250 if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
1251 nf_conntrack_htable_size = 16384; 1251 nf_conntrack_htable_size = 16384;
1252 if (nf_conntrack_htable_size < 32) 1252 if (nf_conntrack_htable_size < 32)
1253 nf_conntrack_htable_size = 32; 1253 nf_conntrack_htable_size = 32;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index a6ac83a9334..f01955cce31 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -617,7 +617,7 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
617 int cpu; 617 int cpu;
618 618
619 /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */ 619 /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
620 if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > num_physpages) 620 if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages)
621 return NULL; 621 return NULL;
622 622
623 newinfo = kzalloc(XT_TABLE_INFO_SZ, GFP_KERNEL); 623 newinfo = kzalloc(XT_TABLE_INFO_SZ, GFP_KERNEL);
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 219dcdbe388..dd16e404424 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -194,9 +194,9 @@ static int htable_create_v0(struct xt_hashlimit_info *minfo, u_int8_t family)
194 if (minfo->cfg.size) 194 if (minfo->cfg.size)
195 size = minfo->cfg.size; 195 size = minfo->cfg.size;
196 else { 196 else {
197 size = ((num_physpages << PAGE_SHIFT) / 16384) / 197 size = ((totalram_pages << PAGE_SHIFT) / 16384) /
198 sizeof(struct list_head); 198 sizeof(struct list_head);
199 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) 199 if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
200 size = 8192; 200 size = 8192;
201 if (size < 16) 201 if (size < 16)
202 size = 16; 202 size = 16;
@@ -266,9 +266,9 @@ static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, u_int8_t family)
266 if (minfo->cfg.size) { 266 if (minfo->cfg.size) {
267 size = minfo->cfg.size; 267 size = minfo->cfg.size;
268 } else { 268 } else {
269 size = (num_physpages << PAGE_SHIFT) / 16384 / 269 size = (totalram_pages << PAGE_SHIFT) / 16384 /
270 sizeof(struct list_head); 270 sizeof(struct list_head);
271 if (num_physpages > 1024 * 1024 * 1024 / PAGE_SIZE) 271 if (totalram_pages > 1024 * 1024 * 1024 / PAGE_SIZE)
272 size = 8192; 272 size = 8192;
273 if (size < 16) 273 if (size < 16)
274 size = 16; 274 size = 16;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index c5aab6a368c..55180b99562 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2091,10 +2091,10 @@ static int __init netlink_proto_init(void)
2091 if (!nl_table) 2091 if (!nl_table)
2092 goto panic; 2092 goto panic;
2093 2093
2094 if (num_physpages >= (128 * 1024)) 2094 if (totalram_pages >= (128 * 1024))
2095 limit = num_physpages >> (21 - PAGE_SHIFT); 2095 limit = totalram_pages >> (21 - PAGE_SHIFT);
2096 else 2096 else
2097 limit = num_physpages >> (23 - PAGE_SHIFT); 2097 limit = totalram_pages >> (23 - PAGE_SHIFT);
2098 2098
2099 order = get_bitmask_order(limit) - 1 + PAGE_SHIFT; 2099 order = get_bitmask_order(limit) - 1 + PAGE_SHIFT;
2100 limit = (1UL << order) / sizeof(struct hlist_head); 2100 limit = (1UL << order) / sizeof(struct hlist_head);
diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c
index d9231245a79..bc0019f704f 100644
--- a/net/rxrpc/ar-call.c
+++ b/net/rxrpc/ar-call.c
@@ -96,7 +96,7 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
96} 96}
97 97
98/* 98/*
99 * allocate a new client call and attempt to to get a connection slot for it 99 * allocate a new client call and attempt to get a connection slot for it
100 */ 100 */
101static struct rxrpc_call *rxrpc_alloc_client_call( 101static struct rxrpc_call *rxrpc_alloc_client_call(
102 struct rxrpc_sock *rx, 102 struct rxrpc_sock *rx,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 375d64cb1a3..2c5c76be18f 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -77,7 +77,7 @@
77 * The service curve parameters are converted to the internal 77 * The service curve parameters are converted to the internal
78 * representation. The slope values are scaled to avoid overflow. 78 * representation. The slope values are scaled to avoid overflow.
79 * the inverse slope values as well as the y-projection of the 1st 79 * the inverse slope values as well as the y-projection of the 1st
80 * segment are kept in order to to avoid 64-bit divide operations 80 * segment are kept in order to avoid 64-bit divide operations
81 * that are expensive on 32-bit architectures. 81 * that are expensive on 32-bit architectures.
82 */ 82 */
83 83
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index c557f1fb1c6..612dc878e05 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1184,10 +1184,10 @@ SCTP_STATIC __init int sctp_init(void)
1184 /* Size and allocate the association hash table. 1184 /* Size and allocate the association hash table.
1185 * The methodology is similar to that of the tcp hash tables. 1185 * The methodology is similar to that of the tcp hash tables.
1186 */ 1186 */
1187 if (num_physpages >= (128 * 1024)) 1187 if (totalram_pages >= (128 * 1024))
1188 goal = num_physpages >> (22 - PAGE_SHIFT); 1188 goal = totalram_pages >> (22 - PAGE_SHIFT);
1189 else 1189 else
1190 goal = num_physpages >> (24 - PAGE_SHIFT); 1190 goal = totalram_pages >> (24 - PAGE_SHIFT);
1191 1191
1192 for (order = 0; (1UL << order) < goal; order++) 1192 for (order = 0; (1UL << order) < goal; order++)
1193 ; 1193 ;
diff --git a/net/socket.c b/net/socket.c
index 2a022c00d85..0ad02ae61a9 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -285,7 +285,7 @@ static int init_inodecache(void)
285 return 0; 285 return 0;
286} 286}
287 287
288static struct super_operations sockfs_ops = { 288static const struct super_operations sockfs_ops = {
289 .alloc_inode = sock_alloc_inode, 289 .alloc_inode = sock_alloc_inode,
290 .destroy_inode =sock_destroy_inode, 290 .destroy_inode =sock_destroy_inode,
291 .statfs = simple_statfs, 291 .statfs = simple_statfs,
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 0c431c277af..54a4e042f10 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -385,7 +385,7 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
385EXPORT_SYMBOL_GPL(rpcauth_init_cred); 385EXPORT_SYMBOL_GPL(rpcauth_init_cred);
386 386
387void 387void
388rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred) 388rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags)
389{ 389{
390 task->tk_msg.rpc_cred = get_rpccred(cred); 390 task->tk_msg.rpc_cred = get_rpccred(cred);
391 dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid, 391 dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid,
@@ -394,7 +394,7 @@ rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred)
394EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred); 394EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred);
395 395
396static void 396static void
397rpcauth_bind_root_cred(struct rpc_task *task) 397rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags)
398{ 398{
399 struct rpc_auth *auth = task->tk_client->cl_auth; 399 struct rpc_auth *auth = task->tk_client->cl_auth;
400 struct auth_cred acred = { 400 struct auth_cred acred = {
@@ -405,7 +405,7 @@ rpcauth_bind_root_cred(struct rpc_task *task)
405 405
406 dprintk("RPC: %5u looking up %s cred\n", 406 dprintk("RPC: %5u looking up %s cred\n",
407 task->tk_pid, task->tk_client->cl_auth->au_ops->au_name); 407 task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
408 ret = auth->au_ops->lookup_cred(auth, &acred, 0); 408 ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags);
409 if (!IS_ERR(ret)) 409 if (!IS_ERR(ret))
410 task->tk_msg.rpc_cred = ret; 410 task->tk_msg.rpc_cred = ret;
411 else 411 else
@@ -413,14 +413,14 @@ rpcauth_bind_root_cred(struct rpc_task *task)
413} 413}
414 414
415static void 415static void
416rpcauth_bind_new_cred(struct rpc_task *task) 416rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags)
417{ 417{
418 struct rpc_auth *auth = task->tk_client->cl_auth; 418 struct rpc_auth *auth = task->tk_client->cl_auth;
419 struct rpc_cred *ret; 419 struct rpc_cred *ret;
420 420
421 dprintk("RPC: %5u looking up %s cred\n", 421 dprintk("RPC: %5u looking up %s cred\n",
422 task->tk_pid, auth->au_ops->au_name); 422 task->tk_pid, auth->au_ops->au_name);
423 ret = rpcauth_lookupcred(auth, 0); 423 ret = rpcauth_lookupcred(auth, lookupflags);
424 if (!IS_ERR(ret)) 424 if (!IS_ERR(ret))
425 task->tk_msg.rpc_cred = ret; 425 task->tk_msg.rpc_cred = ret;
426 else 426 else
@@ -430,12 +430,16 @@ rpcauth_bind_new_cred(struct rpc_task *task)
430void 430void
431rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags) 431rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags)
432{ 432{
433 int lookupflags = 0;
434
435 if (flags & RPC_TASK_ASYNC)
436 lookupflags |= RPCAUTH_LOOKUP_NEW;
433 if (cred != NULL) 437 if (cred != NULL)
434 cred->cr_ops->crbind(task, cred); 438 cred->cr_ops->crbind(task, cred, lookupflags);
435 else if (flags & RPC_TASK_ROOTCREDS) 439 else if (flags & RPC_TASK_ROOTCREDS)
436 rpcauth_bind_root_cred(task); 440 rpcauth_bind_root_cred(task, lookupflags);
437 else 441 else
438 rpcauth_bind_new_cred(task); 442 rpcauth_bind_new_cred(task, lookupflags);
439} 443}
440 444
441void 445void
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index 4028502f052..bf88bf8e936 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -55,13 +55,13 @@ struct rpc_cred *rpc_lookup_machine_cred(void)
55EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred); 55EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred);
56 56
57static void 57static void
58generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred) 58generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags)
59{ 59{
60 struct rpc_auth *auth = task->tk_client->cl_auth; 60 struct rpc_auth *auth = task->tk_client->cl_auth;
61 struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred; 61 struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred;
62 struct rpc_cred *ret; 62 struct rpc_cred *ret;
63 63
64 ret = auth->au_ops->lookup_cred(auth, acred, 0); 64 ret = auth->au_ops->lookup_cred(auth, acred, lookupflags);
65 if (!IS_ERR(ret)) 65 if (!IS_ERR(ret))
66 task->tk_msg.rpc_cred = ret; 66 task->tk_msg.rpc_cred = ret;
67 else 67 else
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 2e6a148d277..f6c51e562a0 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1374,8 +1374,10 @@ svcauth_gss_release(struct svc_rqst *rqstp)
1374 if (stat) 1374 if (stat)
1375 goto out_err; 1375 goto out_err;
1376 break; 1376 break;
1377 default: 1377 /*
1378 goto out_err; 1378 * For any other gc_svc value, svcauth_gss_accept() already set
1379 * the auth_error appropriately; just fall through:
1380 */
1379 } 1381 }
1380 1382
1381out: 1383out:
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 45cdaff9b36..d6eee291a0e 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -103,23 +103,21 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
103EXPORT_SYMBOL_GPL(sunrpc_cache_lookup); 103EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);
104 104
105 105
106static void queue_loose(struct cache_detail *detail, struct cache_head *ch); 106static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
107 107
108static int cache_fresh_locked(struct cache_head *head, time_t expiry) 108static void cache_fresh_locked(struct cache_head *head, time_t expiry)
109{ 109{
110 head->expiry_time = expiry; 110 head->expiry_time = expiry;
111 head->last_refresh = get_seconds(); 111 head->last_refresh = get_seconds();
112 return !test_and_set_bit(CACHE_VALID, &head->flags); 112 set_bit(CACHE_VALID, &head->flags);
113} 113}
114 114
115static void cache_fresh_unlocked(struct cache_head *head, 115static void cache_fresh_unlocked(struct cache_head *head,
116 struct cache_detail *detail, int new) 116 struct cache_detail *detail)
117{ 117{
118 if (new)
119 cache_revisit_request(head);
120 if (test_and_clear_bit(CACHE_PENDING, &head->flags)) { 118 if (test_and_clear_bit(CACHE_PENDING, &head->flags)) {
121 cache_revisit_request(head); 119 cache_revisit_request(head);
122 queue_loose(detail, head); 120 cache_dequeue(detail, head);
123 } 121 }
124} 122}
125 123
@@ -132,7 +130,6 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
132 */ 130 */
133 struct cache_head **head; 131 struct cache_head **head;
134 struct cache_head *tmp; 132 struct cache_head *tmp;
135 int is_new;
136 133
137 if (!test_bit(CACHE_VALID, &old->flags)) { 134 if (!test_bit(CACHE_VALID, &old->flags)) {
138 write_lock(&detail->hash_lock); 135 write_lock(&detail->hash_lock);
@@ -141,9 +138,9 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
141 set_bit(CACHE_NEGATIVE, &old->flags); 138 set_bit(CACHE_NEGATIVE, &old->flags);
142 else 139 else
143 detail->update(old, new); 140 detail->update(old, new);
144 is_new = cache_fresh_locked(old, new->expiry_time); 141 cache_fresh_locked(old, new->expiry_time);
145 write_unlock(&detail->hash_lock); 142 write_unlock(&detail->hash_lock);
146 cache_fresh_unlocked(old, detail, is_new); 143 cache_fresh_unlocked(old, detail);
147 return old; 144 return old;
148 } 145 }
149 write_unlock(&detail->hash_lock); 146 write_unlock(&detail->hash_lock);
@@ -167,11 +164,11 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
167 *head = tmp; 164 *head = tmp;
168 detail->entries++; 165 detail->entries++;
169 cache_get(tmp); 166 cache_get(tmp);
170 is_new = cache_fresh_locked(tmp, new->expiry_time); 167 cache_fresh_locked(tmp, new->expiry_time);
171 cache_fresh_locked(old, 0); 168 cache_fresh_locked(old, 0);
172 write_unlock(&detail->hash_lock); 169 write_unlock(&detail->hash_lock);
173 cache_fresh_unlocked(tmp, detail, is_new); 170 cache_fresh_unlocked(tmp, detail);
174 cache_fresh_unlocked(old, detail, 0); 171 cache_fresh_unlocked(old, detail);
175 cache_put(old, detail); 172 cache_put(old, detail);
176 return tmp; 173 return tmp;
177} 174}
@@ -184,6 +181,22 @@ static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h)
184 return cd->cache_upcall(cd, h); 181 return cd->cache_upcall(cd, h);
185} 182}
186 183
184static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h)
185{
186 if (!test_bit(CACHE_VALID, &h->flags) ||
187 h->expiry_time < get_seconds())
188 return -EAGAIN;
189 else if (detail->flush_time > h->last_refresh)
190 return -EAGAIN;
191 else {
192 /* entry is valid */
193 if (test_bit(CACHE_NEGATIVE, &h->flags))
194 return -ENOENT;
195 else
196 return 0;
197 }
198}
199
187/* 200/*
188 * This is the generic cache management routine for all 201 * This is the generic cache management routine for all
189 * the authentication caches. 202 * the authentication caches.
@@ -192,8 +205,10 @@ static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h)
192 * 205 *
193 * 206 *
194 * Returns 0 if the cache_head can be used, or cache_puts it and returns 207 * Returns 0 if the cache_head can be used, or cache_puts it and returns
195 * -EAGAIN if upcall is pending, 208 * -EAGAIN if upcall is pending and request has been queued
196 * -ETIMEDOUT if upcall failed and should be retried, 209 * -ETIMEDOUT if upcall failed or request could not be queue or
210 * upcall completed but item is still invalid (implying that
211 * the cache item has been replaced with a newer one).
197 * -ENOENT if cache entry was negative 212 * -ENOENT if cache entry was negative
198 */ 213 */
199int cache_check(struct cache_detail *detail, 214int cache_check(struct cache_detail *detail,
@@ -203,17 +218,7 @@ int cache_check(struct cache_detail *detail,
203 long refresh_age, age; 218 long refresh_age, age;
204 219
205 /* First decide return status as best we can */ 220 /* First decide return status as best we can */
206 if (!test_bit(CACHE_VALID, &h->flags) || 221 rv = cache_is_valid(detail, h);
207 h->expiry_time < get_seconds())
208 rv = -EAGAIN;
209 else if (detail->flush_time > h->last_refresh)
210 rv = -EAGAIN;
211 else {
212 /* entry is valid */
213 if (test_bit(CACHE_NEGATIVE, &h->flags))
214 rv = -ENOENT;
215 else rv = 0;
216 }
217 222
218 /* now see if we want to start an upcall */ 223 /* now see if we want to start an upcall */
219 refresh_age = (h->expiry_time - h->last_refresh); 224 refresh_age = (h->expiry_time - h->last_refresh);
@@ -229,10 +234,11 @@ int cache_check(struct cache_detail *detail,
229 switch (cache_make_upcall(detail, h)) { 234 switch (cache_make_upcall(detail, h)) {
230 case -EINVAL: 235 case -EINVAL:
231 clear_bit(CACHE_PENDING, &h->flags); 236 clear_bit(CACHE_PENDING, &h->flags);
237 cache_revisit_request(h);
232 if (rv == -EAGAIN) { 238 if (rv == -EAGAIN) {
233 set_bit(CACHE_NEGATIVE, &h->flags); 239 set_bit(CACHE_NEGATIVE, &h->flags);
234 cache_fresh_unlocked(h, detail, 240 cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY);
235 cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY)); 241 cache_fresh_unlocked(h, detail);
236 rv = -ENOENT; 242 rv = -ENOENT;
237 } 243 }
238 break; 244 break;
@@ -245,10 +251,14 @@ int cache_check(struct cache_detail *detail,
245 } 251 }
246 } 252 }
247 253
248 if (rv == -EAGAIN) 254 if (rv == -EAGAIN) {
249 if (cache_defer_req(rqstp, h) != 0) 255 if (cache_defer_req(rqstp, h) < 0) {
250 rv = -ETIMEDOUT; 256 /* Request is not deferred */
251 257 rv = cache_is_valid(detail, h);
258 if (rv == -EAGAIN)
259 rv = -ETIMEDOUT;
260 }
261 }
252 if (rv) 262 if (rv)
253 cache_put(h, detail); 263 cache_put(h, detail);
254 return rv; 264 return rv;
@@ -396,7 +406,7 @@ static int cache_clean(void)
396 ) 406 )
397 continue; 407 continue;
398 if (test_and_clear_bit(CACHE_PENDING, &ch->flags)) 408 if (test_and_clear_bit(CACHE_PENDING, &ch->flags))
399 queue_loose(current_detail, ch); 409 cache_dequeue(current_detail, ch);
400 410
401 if (atomic_read(&ch->ref.refcount) == 1) 411 if (atomic_read(&ch->ref.refcount) == 1)
402 break; 412 break;
@@ -412,8 +422,10 @@ static int cache_clean(void)
412 if (!ch) 422 if (!ch)
413 current_index ++; 423 current_index ++;
414 spin_unlock(&cache_list_lock); 424 spin_unlock(&cache_list_lock);
415 if (ch) 425 if (ch) {
426 cache_revisit_request(ch);
416 cache_put(ch, d); 427 cache_put(ch, d);
428 }
417 } else 429 } else
418 spin_unlock(&cache_list_lock); 430 spin_unlock(&cache_list_lock);
419 431
@@ -488,7 +500,7 @@ static int cache_defer_cnt;
488 500
489static int cache_defer_req(struct cache_req *req, struct cache_head *item) 501static int cache_defer_req(struct cache_req *req, struct cache_head *item)
490{ 502{
491 struct cache_deferred_req *dreq; 503 struct cache_deferred_req *dreq, *discard;
492 int hash = DFR_HASH(item); 504 int hash = DFR_HASH(item);
493 505
494 if (cache_defer_cnt >= DFR_MAX) { 506 if (cache_defer_cnt >= DFR_MAX) {
@@ -496,11 +508,11 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item)
496 * or continue and drop the oldest below 508 * or continue and drop the oldest below
497 */ 509 */
498 if (net_random()&1) 510 if (net_random()&1)
499 return -ETIMEDOUT; 511 return -ENOMEM;
500 } 512 }
501 dreq = req->defer(req); 513 dreq = req->defer(req);
502 if (dreq == NULL) 514 if (dreq == NULL)
503 return -ETIMEDOUT; 515 return -ENOMEM;
504 516
505 dreq->item = item; 517 dreq->item = item;
506 518
@@ -513,23 +525,24 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item)
513 list_add(&dreq->hash, &cache_defer_hash[hash]); 525 list_add(&dreq->hash, &cache_defer_hash[hash]);
514 526
515 /* it is in, now maybe clean up */ 527 /* it is in, now maybe clean up */
516 dreq = NULL; 528 discard = NULL;
517 if (++cache_defer_cnt > DFR_MAX) { 529 if (++cache_defer_cnt > DFR_MAX) {
518 dreq = list_entry(cache_defer_list.prev, 530 discard = list_entry(cache_defer_list.prev,
519 struct cache_deferred_req, recent); 531 struct cache_deferred_req, recent);
520 list_del(&dreq->recent); 532 list_del_init(&discard->recent);
521 list_del(&dreq->hash); 533 list_del_init(&discard->hash);
522 cache_defer_cnt--; 534 cache_defer_cnt--;
523 } 535 }
524 spin_unlock(&cache_defer_lock); 536 spin_unlock(&cache_defer_lock);
525 537
526 if (dreq) { 538 if (discard)
527 /* there was one too many */ 539 /* there was one too many */
528 dreq->revisit(dreq, 1); 540 discard->revisit(discard, 1);
529 } 541
530 if (!test_bit(CACHE_PENDING, &item->flags)) { 542 if (!test_bit(CACHE_PENDING, &item->flags)) {
531 /* must have just been validated... */ 543 /* must have just been validated... */
532 cache_revisit_request(item); 544 cache_revisit_request(item);
545 return -EAGAIN;
533 } 546 }
534 return 0; 547 return 0;
535} 548}
@@ -551,7 +564,7 @@ static void cache_revisit_request(struct cache_head *item)
551 dreq = list_entry(lp, struct cache_deferred_req, hash); 564 dreq = list_entry(lp, struct cache_deferred_req, hash);
552 lp = lp->next; 565 lp = lp->next;
553 if (dreq->item == item) { 566 if (dreq->item == item) {
554 list_del(&dreq->hash); 567 list_del_init(&dreq->hash);
555 list_move(&dreq->recent, &pending); 568 list_move(&dreq->recent, &pending);
556 cache_defer_cnt--; 569 cache_defer_cnt--;
557 } 570 }
@@ -577,7 +590,7 @@ void cache_clean_deferred(void *owner)
577 590
578 list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) { 591 list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) {
579 if (dreq->owner == owner) { 592 if (dreq->owner == owner) {
580 list_del(&dreq->hash); 593 list_del_init(&dreq->hash);
581 list_move(&dreq->recent, &pending); 594 list_move(&dreq->recent, &pending);
582 cache_defer_cnt--; 595 cache_defer_cnt--;
583 } 596 }
@@ -887,7 +900,7 @@ static int cache_release(struct inode *inode, struct file *filp,
887 900
888 901
889 902
890static void queue_loose(struct cache_detail *detail, struct cache_head *ch) 903static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch)
891{ 904{
892 struct cache_queue *cq; 905 struct cache_queue *cq;
893 spin_lock(&queue_lock); 906 spin_lock(&queue_lock);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index fac0ca93f06..a417d5ab5dd 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -288,6 +288,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
288 .srcaddr = args->saddress, 288 .srcaddr = args->saddress,
289 .dstaddr = args->address, 289 .dstaddr = args->address,
290 .addrlen = args->addrsize, 290 .addrlen = args->addrsize,
291 .bc_xprt = args->bc_xprt,
291 }; 292 };
292 char servername[48]; 293 char servername[48];
293 294
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 7f676bdf70d..858a443f418 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -930,7 +930,7 @@ void rpc_remove_cache_dir(struct dentry *dentry)
930/* 930/*
931 * populate the filesystem 931 * populate the filesystem
932 */ 932 */
933static struct super_operations s_ops = { 933static const struct super_operations s_ops = {
934 .alloc_inode = rpc_alloc_inode, 934 .alloc_inode = rpc_alloc_inode,
935 .destroy_inode = rpc_destroy_inode, 935 .destroy_inode = rpc_destroy_inode,
936 .statfs = simple_statfs, 936 .statfs = simple_statfs,
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 8f459abe97c..cef74ba0666 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -21,6 +21,8 @@
21 21
22#include <linux/sunrpc/clnt.h> 22#include <linux/sunrpc/clnt.h>
23 23
24#include "sunrpc.h"
25
24#ifdef RPC_DEBUG 26#ifdef RPC_DEBUG
25#define RPCDBG_FACILITY RPCDBG_SCHED 27#define RPCDBG_FACILITY RPCDBG_SCHED
26#define RPC_TASK_MAGIC_ID 0xf00baa 28#define RPC_TASK_MAGIC_ID 0xf00baa
@@ -711,11 +713,6 @@ static void rpc_async_schedule(struct work_struct *work)
711 __rpc_execute(container_of(work, struct rpc_task, u.tk_work)); 713 __rpc_execute(container_of(work, struct rpc_task, u.tk_work));
712} 714}
713 715
714struct rpc_buffer {
715 size_t len;
716 char data[];
717};
718
719/** 716/**
720 * rpc_malloc - allocate an RPC buffer 717 * rpc_malloc - allocate an RPC buffer
721 * @task: RPC task that will use this buffer 718 * @task: RPC task that will use this buffer
diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h
index 5d9dd742264..90c292e2738 100644
--- a/net/sunrpc/sunrpc.h
+++ b/net/sunrpc/sunrpc.h
@@ -27,11 +27,25 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27#ifndef _NET_SUNRPC_SUNRPC_H 27#ifndef _NET_SUNRPC_SUNRPC_H
28#define _NET_SUNRPC_SUNRPC_H 28#define _NET_SUNRPC_SUNRPC_H
29 29
30#include <linux/net.h>
31
32/*
33 * Header for dynamically allocated rpc buffers.
34 */
35struct rpc_buffer {
36 size_t len;
37 char data[];
38};
39
30static inline int rpc_reply_expected(struct rpc_task *task) 40static inline int rpc_reply_expected(struct rpc_task *task)
31{ 41{
32 return (task->tk_msg.rpc_proc != NULL) && 42 return (task->tk_msg.rpc_proc != NULL) &&
33 (task->tk_msg.rpc_proc->p_decode != NULL); 43 (task->tk_msg.rpc_proc->p_decode != NULL);
34} 44}
35 45
46int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
47 struct page *headpage, unsigned long headoffset,
48 struct page *tailpage, unsigned long tailoffset);
49
36#endif /* _NET_SUNRPC_SUNRPC_H */ 50#endif /* _NET_SUNRPC_SUNRPC_H */
37 51
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 27d44332f01..df124f78ee4 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -160,6 +160,7 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
160 mutex_init(&xprt->xpt_mutex); 160 mutex_init(&xprt->xpt_mutex);
161 spin_lock_init(&xprt->xpt_lock); 161 spin_lock_init(&xprt->xpt_lock);
162 set_bit(XPT_BUSY, &xprt->xpt_flags); 162 set_bit(XPT_BUSY, &xprt->xpt_flags);
163 rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
163} 164}
164EXPORT_SYMBOL_GPL(svc_xprt_init); 165EXPORT_SYMBOL_GPL(svc_xprt_init);
165 166
@@ -710,10 +711,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
710 spin_unlock_bh(&pool->sp_lock); 711 spin_unlock_bh(&pool->sp_lock);
711 712
712 len = 0; 713 len = 0;
713 if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { 714 if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
714 dprintk("svc_recv: found XPT_CLOSE\n");
715 svc_delete_xprt(xprt);
716 } else if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
717 struct svc_xprt *newxpt; 715 struct svc_xprt *newxpt;
718 newxpt = xprt->xpt_ops->xpo_accept(xprt); 716 newxpt = xprt->xpt_ops->xpo_accept(xprt);
719 if (newxpt) { 717 if (newxpt) {
@@ -739,7 +737,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
739 svc_xprt_received(newxpt); 737 svc_xprt_received(newxpt);
740 } 738 }
741 svc_xprt_received(xprt); 739 svc_xprt_received(xprt);
742 } else { 740 } else if (!test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
743 dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", 741 dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
744 rqstp, pool->sp_id, xprt, 742 rqstp, pool->sp_id, xprt,
745 atomic_read(&xprt->xpt_ref.refcount)); 743 atomic_read(&xprt->xpt_ref.refcount));
@@ -752,6 +750,11 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
752 dprintk("svc: got len=%d\n", len); 750 dprintk("svc: got len=%d\n", len);
753 } 751 }
754 752
753 if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
754 dprintk("svc_recv: found XPT_CLOSE\n");
755 svc_delete_xprt(xprt);
756 }
757
755 /* No data, incomplete (TCP) read, or accept() */ 758 /* No data, incomplete (TCP) read, or accept() */
756 if (len == 0 || len == -EAGAIN) { 759 if (len == 0 || len == -EAGAIN) {
757 rqstp->rq_res.len = 0; 760 rqstp->rq_res.len = 0;
@@ -808,6 +811,7 @@ int svc_send(struct svc_rqst *rqstp)
808 else 811 else
809 len = xprt->xpt_ops->xpo_sendto(rqstp); 812 len = xprt->xpt_ops->xpo_sendto(rqstp);
810 mutex_unlock(&xprt->xpt_mutex); 813 mutex_unlock(&xprt->xpt_mutex);
814 rpc_wake_up(&xprt->xpt_bc_pending);
811 svc_xprt_release(rqstp); 815 svc_xprt_release(rqstp);
812 816
813 if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) 817 if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN)
@@ -1166,11 +1170,6 @@ static void *svc_pool_stats_start(struct seq_file *m, loff_t *pos)
1166 1170
1167 dprintk("svc_pool_stats_start, *pidx=%u\n", pidx); 1171 dprintk("svc_pool_stats_start, *pidx=%u\n", pidx);
1168 1172
1169 lock_kernel();
1170 /* bump up the pseudo refcount while traversing */
1171 svc_get(serv);
1172 unlock_kernel();
1173
1174 if (!pidx) 1173 if (!pidx)
1175 return SEQ_START_TOKEN; 1174 return SEQ_START_TOKEN;
1176 return (pidx > serv->sv_nrpools ? NULL : &serv->sv_pools[pidx-1]); 1175 return (pidx > serv->sv_nrpools ? NULL : &serv->sv_pools[pidx-1]);
@@ -1198,12 +1197,6 @@ static void *svc_pool_stats_next(struct seq_file *m, void *p, loff_t *pos)
1198 1197
1199static void svc_pool_stats_stop(struct seq_file *m, void *p) 1198static void svc_pool_stats_stop(struct seq_file *m, void *p)
1200{ 1199{
1201 struct svc_serv *serv = m->private;
1202
1203 lock_kernel();
1204 /* this function really, really should have been called svc_put() */
1205 svc_destroy(serv);
1206 unlock_kernel();
1207} 1200}
1208 1201
1209static int svc_pool_stats_show(struct seq_file *m, void *p) 1202static int svc_pool_stats_show(struct seq_file *m, void *p)
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 6caffa34ac0..117f68a8aa4 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -668,6 +668,7 @@ static int unix_gid_find(uid_t uid, struct group_info **gip,
668 case 0: 668 case 0:
669 *gip = ug->gi; 669 *gip = ug->gi;
670 get_group_info(*gip); 670 get_group_info(*gip);
671 cache_put(&ug->h, &unix_gid_cache);
671 return 0; 672 return 0;
672 default: 673 default:
673 return -EAGAIN; 674 return -EAGAIN;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 23128ee191a..ccc5e83cae5 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -49,6 +49,7 @@
49#include <linux/sunrpc/msg_prot.h> 49#include <linux/sunrpc/msg_prot.h>
50#include <linux/sunrpc/svcsock.h> 50#include <linux/sunrpc/svcsock.h>
51#include <linux/sunrpc/stats.h> 51#include <linux/sunrpc/stats.h>
52#include <linux/sunrpc/xprt.h>
52 53
53#define RPCDBG_FACILITY RPCDBG_SVCXPRT 54#define RPCDBG_FACILITY RPCDBG_SVCXPRT
54 55
@@ -153,49 +154,27 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
153} 154}
154 155
155/* 156/*
156 * Generic sendto routine 157 * send routine intended to be shared by the fore- and back-channel
157 */ 158 */
158static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) 159int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
160 struct page *headpage, unsigned long headoffset,
161 struct page *tailpage, unsigned long tailoffset)
159{ 162{
160 struct svc_sock *svsk =
161 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
162 struct socket *sock = svsk->sk_sock;
163 int slen;
164 union {
165 struct cmsghdr hdr;
166 long all[SVC_PKTINFO_SPACE / sizeof(long)];
167 } buffer;
168 struct cmsghdr *cmh = &buffer.hdr;
169 int len = 0;
170 int result; 163 int result;
171 int size; 164 int size;
172 struct page **ppage = xdr->pages; 165 struct page **ppage = xdr->pages;
173 size_t base = xdr->page_base; 166 size_t base = xdr->page_base;
174 unsigned int pglen = xdr->page_len; 167 unsigned int pglen = xdr->page_len;
175 unsigned int flags = MSG_MORE; 168 unsigned int flags = MSG_MORE;
176 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); 169 int slen;
170 int len = 0;
177 171
178 slen = xdr->len; 172 slen = xdr->len;
179 173
180 if (rqstp->rq_prot == IPPROTO_UDP) {
181 struct msghdr msg = {
182 .msg_name = &rqstp->rq_addr,
183 .msg_namelen = rqstp->rq_addrlen,
184 .msg_control = cmh,
185 .msg_controllen = sizeof(buffer),
186 .msg_flags = MSG_MORE,
187 };
188
189 svc_set_cmsg_data(rqstp, cmh);
190
191 if (sock_sendmsg(sock, &msg, 0) < 0)
192 goto out;
193 }
194
195 /* send head */ 174 /* send head */
196 if (slen == xdr->head[0].iov_len) 175 if (slen == xdr->head[0].iov_len)
197 flags = 0; 176 flags = 0;
198 len = kernel_sendpage(sock, rqstp->rq_respages[0], 0, 177 len = kernel_sendpage(sock, headpage, headoffset,
199 xdr->head[0].iov_len, flags); 178 xdr->head[0].iov_len, flags);
200 if (len != xdr->head[0].iov_len) 179 if (len != xdr->head[0].iov_len)
201 goto out; 180 goto out;
@@ -219,16 +198,58 @@ static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
219 base = 0; 198 base = 0;
220 ppage++; 199 ppage++;
221 } 200 }
201
222 /* send tail */ 202 /* send tail */
223 if (xdr->tail[0].iov_len) { 203 if (xdr->tail[0].iov_len) {
224 result = kernel_sendpage(sock, rqstp->rq_respages[0], 204 result = kernel_sendpage(sock, tailpage, tailoffset,
225 ((unsigned long)xdr->tail[0].iov_base) 205 xdr->tail[0].iov_len, 0);
226 & (PAGE_SIZE-1),
227 xdr->tail[0].iov_len, 0);
228
229 if (result > 0) 206 if (result > 0)
230 len += result; 207 len += result;
231 } 208 }
209
210out:
211 return len;
212}
213
214
215/*
216 * Generic sendto routine
217 */
218static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
219{
220 struct svc_sock *svsk =
221 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
222 struct socket *sock = svsk->sk_sock;
223 union {
224 struct cmsghdr hdr;
225 long all[SVC_PKTINFO_SPACE / sizeof(long)];
226 } buffer;
227 struct cmsghdr *cmh = &buffer.hdr;
228 int len = 0;
229 unsigned long tailoff;
230 unsigned long headoff;
231 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
232
233 if (rqstp->rq_prot == IPPROTO_UDP) {
234 struct msghdr msg = {
235 .msg_name = &rqstp->rq_addr,
236 .msg_namelen = rqstp->rq_addrlen,
237 .msg_control = cmh,
238 .msg_controllen = sizeof(buffer),
239 .msg_flags = MSG_MORE,
240 };
241
242 svc_set_cmsg_data(rqstp, cmh);
243
244 if (sock_sendmsg(sock, &msg, 0) < 0)
245 goto out;
246 }
247
248 tailoff = ((unsigned long)xdr->tail[0].iov_base) & (PAGE_SIZE-1);
249 headoff = 0;
250 len = svc_send_common(sock, xdr, rqstp->rq_respages[0], headoff,
251 rqstp->rq_respages[0], tailoff);
252
232out: 253out:
233 dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n", 254 dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n",
234 svsk, xdr->head[0].iov_base, xdr->head[0].iov_len, 255 svsk, xdr->head[0].iov_base, xdr->head[0].iov_len,
@@ -432,29 +453,49 @@ static void svc_tcp_write_space(struct sock *sk)
432} 453}
433 454
434/* 455/*
456 * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo
457 */
458static int svc_udp_get_dest_address4(struct svc_rqst *rqstp,
459 struct cmsghdr *cmh)
460{
461 struct in_pktinfo *pki = CMSG_DATA(cmh);
462 if (cmh->cmsg_type != IP_PKTINFO)
463 return 0;
464 rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr;
465 return 1;
466}
467
468/*
469 * See net/ipv6/datagram.c : datagram_recv_ctl
470 */
471static int svc_udp_get_dest_address6(struct svc_rqst *rqstp,
472 struct cmsghdr *cmh)
473{
474 struct in6_pktinfo *pki = CMSG_DATA(cmh);
475 if (cmh->cmsg_type != IPV6_PKTINFO)
476 return 0;
477 ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr);
478 return 1;
479}
480
481/*
435 * Copy the UDP datagram's destination address to the rqstp structure. 482 * Copy the UDP datagram's destination address to the rqstp structure.
436 * The 'destination' address in this case is the address to which the 483 * The 'destination' address in this case is the address to which the
437 * peer sent the datagram, i.e. our local address. For multihomed 484 * peer sent the datagram, i.e. our local address. For multihomed
438 * hosts, this can change from msg to msg. Note that only the IP 485 * hosts, this can change from msg to msg. Note that only the IP
439 * address changes, the port number should remain the same. 486 * address changes, the port number should remain the same.
440 */ 487 */
441static void svc_udp_get_dest_address(struct svc_rqst *rqstp, 488static int svc_udp_get_dest_address(struct svc_rqst *rqstp,
442 struct cmsghdr *cmh) 489 struct cmsghdr *cmh)
443{ 490{
444 struct svc_sock *svsk = 491 switch (cmh->cmsg_level) {
445 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); 492 case SOL_IP:
446 switch (svsk->sk_sk->sk_family) { 493 return svc_udp_get_dest_address4(rqstp, cmh);
447 case AF_INET: { 494 case SOL_IPV6:
448 struct in_pktinfo *pki = CMSG_DATA(cmh); 495 return svc_udp_get_dest_address6(rqstp, cmh);
449 rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr;
450 break;
451 }
452 case AF_INET6: {
453 struct in6_pktinfo *pki = CMSG_DATA(cmh);
454 ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr);
455 break;
456 }
457 } 496 }
497
498 return 0;
458} 499}
459 500
460/* 501/*
@@ -531,16 +572,15 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
531 572
532 rqstp->rq_prot = IPPROTO_UDP; 573 rqstp->rq_prot = IPPROTO_UDP;
533 574
534 if (cmh->cmsg_level != IPPROTO_IP || 575 if (!svc_udp_get_dest_address(rqstp, cmh)) {
535 cmh->cmsg_type != IP_PKTINFO) {
536 if (net_ratelimit()) 576 if (net_ratelimit())
537 printk("rpcsvc: received unknown control message:" 577 printk(KERN_WARNING
538 "%d/%d\n", 578 "svc: received unknown control message %d/%d; "
539 cmh->cmsg_level, cmh->cmsg_type); 579 "dropping RPC reply datagram\n",
580 cmh->cmsg_level, cmh->cmsg_type);
540 skb_free_datagram(svsk->sk_sk, skb); 581 skb_free_datagram(svsk->sk_sk, skb);
541 return 0; 582 return 0;
542 } 583 }
543 svc_udp_get_dest_address(rqstp, cmh);
544 584
545 if (skb_is_nonlinear(skb)) { 585 if (skb_is_nonlinear(skb)) {
546 /* we have to copy */ 586 /* we have to copy */
@@ -651,8 +691,7 @@ static struct svc_xprt_class svc_udp_class = {
651 691
652static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) 692static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
653{ 693{
654 int one = 1; 694 int err, level, optname, one = 1;
655 mm_segment_t oldfs;
656 695
657 svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv); 696 svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv);
658 clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); 697 clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
@@ -671,12 +710,22 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
671 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 710 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
672 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); 711 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
673 712
674 oldfs = get_fs();
675 set_fs(KERNEL_DS);
676 /* make sure we get destination address info */ 713 /* make sure we get destination address info */
677 svsk->sk_sock->ops->setsockopt(svsk->sk_sock, IPPROTO_IP, IP_PKTINFO, 714 switch (svsk->sk_sk->sk_family) {
678 (char __user *)&one, sizeof(one)); 715 case AF_INET:
679 set_fs(oldfs); 716 level = SOL_IP;
717 optname = IP_PKTINFO;
718 break;
719 case AF_INET6:
720 level = SOL_IPV6;
721 optname = IPV6_RECVPKTINFO;
722 break;
723 default:
724 BUG();
725 }
726 err = kernel_setsockopt(svsk->sk_sock, level, optname,
727 (char *)&one, sizeof(one));
728 dprintk("svc: kernel_setsockopt returned %d\n", err);
680} 729}
681 730
682/* 731/*
@@ -826,21 +875,15 @@ failed:
826} 875}
827 876
828/* 877/*
829 * Receive data from a TCP socket. 878 * Receive data.
879 * If we haven't gotten the record length yet, get the next four bytes.
880 * Otherwise try to gobble up as much as possible up to the complete
881 * record length.
830 */ 882 */
831static int svc_tcp_recvfrom(struct svc_rqst *rqstp) 883static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
832{ 884{
833 struct svc_sock *svsk =
834 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
835 struct svc_serv *serv = svsk->sk_xprt.xpt_server; 885 struct svc_serv *serv = svsk->sk_xprt.xpt_server;
836 int len; 886 int len;
837 struct kvec *vec;
838 int pnum, vlen;
839
840 dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
841 svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
842 test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags),
843 test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
844 887
845 if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) 888 if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
846 /* sndbuf needs to have room for one request 889 /* sndbuf needs to have room for one request
@@ -861,10 +904,6 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
861 904
862 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 905 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
863 906
864 /* Receive data. If we haven't got the record length yet, get
865 * the next four bytes. Otherwise try to gobble up as much as
866 * possible up to the complete record length.
867 */
868 if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { 907 if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) {
869 int want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; 908 int want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
870 struct kvec iov; 909 struct kvec iov;
@@ -879,7 +918,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
879 dprintk("svc: short recvfrom while reading record " 918 dprintk("svc: short recvfrom while reading record "
880 "length (%d of %d)\n", len, want); 919 "length (%d of %d)\n", len, want);
881 svc_xprt_received(&svsk->sk_xprt); 920 svc_xprt_received(&svsk->sk_xprt);
882 return -EAGAIN; /* record header not complete */ 921 goto err_again; /* record header not complete */
883 } 922 }
884 923
885 svsk->sk_reclen = ntohl(svsk->sk_reclen); 924 svsk->sk_reclen = ntohl(svsk->sk_reclen);
@@ -894,6 +933,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
894 "per record not supported\n"); 933 "per record not supported\n");
895 goto err_delete; 934 goto err_delete;
896 } 935 }
936
897 svsk->sk_reclen &= RPC_FRAGMENT_SIZE_MASK; 937 svsk->sk_reclen &= RPC_FRAGMENT_SIZE_MASK;
898 dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen); 938 dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen);
899 if (svsk->sk_reclen > serv->sv_max_mesg) { 939 if (svsk->sk_reclen > serv->sv_max_mesg) {
@@ -914,17 +954,121 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
914 dprintk("svc: incomplete TCP record (%d of %d)\n", 954 dprintk("svc: incomplete TCP record (%d of %d)\n",
915 len, svsk->sk_reclen); 955 len, svsk->sk_reclen);
916 svc_xprt_received(&svsk->sk_xprt); 956 svc_xprt_received(&svsk->sk_xprt);
917 return -EAGAIN; /* record not complete */ 957 goto err_again; /* record not complete */
918 } 958 }
919 len = svsk->sk_reclen; 959 len = svsk->sk_reclen;
920 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 960 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
921 961
962 return len;
963 error:
964 if (len == -EAGAIN) {
965 dprintk("RPC: TCP recv_record got EAGAIN\n");
966 svc_xprt_received(&svsk->sk_xprt);
967 }
968 return len;
969 err_delete:
970 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
971 err_again:
972 return -EAGAIN;
973}
974
975static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp,
976 struct rpc_rqst **reqpp, struct kvec *vec)
977{
978 struct rpc_rqst *req = NULL;
979 u32 *p;
980 u32 xid;
981 u32 calldir;
982 int len;
983
984 len = svc_recvfrom(rqstp, vec, 1, 8);
985 if (len < 0)
986 goto error;
987
988 p = (u32 *)rqstp->rq_arg.head[0].iov_base;
989 xid = *p++;
990 calldir = *p;
991
992 if (calldir == 0) {
993 /* REQUEST is the most common case */
994 vec[0] = rqstp->rq_arg.head[0];
995 } else {
996 /* REPLY */
997 if (svsk->sk_bc_xprt)
998 req = xprt_lookup_rqst(svsk->sk_bc_xprt, xid);
999
1000 if (!req) {
1001 printk(KERN_NOTICE
1002 "%s: Got unrecognized reply: "
1003 "calldir 0x%x sk_bc_xprt %p xid %08x\n",
1004 __func__, ntohl(calldir),
1005 svsk->sk_bc_xprt, xid);
1006 vec[0] = rqstp->rq_arg.head[0];
1007 goto out;
1008 }
1009
1010 memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
1011 sizeof(struct xdr_buf));
1012 /* copy the xid and call direction */
1013 memcpy(req->rq_private_buf.head[0].iov_base,
1014 rqstp->rq_arg.head[0].iov_base, 8);
1015 vec[0] = req->rq_private_buf.head[0];
1016 }
1017 out:
1018 vec[0].iov_base += 8;
1019 vec[0].iov_len -= 8;
1020 len = svsk->sk_reclen - 8;
1021 error:
1022 *reqpp = req;
1023 return len;
1024}
1025
1026/*
1027 * Receive data from a TCP socket.
1028 */
1029static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
1030{
1031 struct svc_sock *svsk =
1032 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
1033 struct svc_serv *serv = svsk->sk_xprt.xpt_server;
1034 int len;
1035 struct kvec *vec;
1036 int pnum, vlen;
1037 struct rpc_rqst *req = NULL;
1038
1039 dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
1040 svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
1041 test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags),
1042 test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
1043
1044 len = svc_tcp_recv_record(svsk, rqstp);
1045 if (len < 0)
1046 goto error;
1047
922 vec = rqstp->rq_vec; 1048 vec = rqstp->rq_vec;
923 vec[0] = rqstp->rq_arg.head[0]; 1049 vec[0] = rqstp->rq_arg.head[0];
924 vlen = PAGE_SIZE; 1050 vlen = PAGE_SIZE;
1051
1052 /*
1053 * We have enough data for the whole tcp record. Let's try and read the
1054 * first 8 bytes to get the xid and the call direction. We can use this
1055 * to figure out if this is a call or a reply to a callback. If
1056 * sk_reclen is < 8 (xid and calldir), then this is a malformed packet.
1057 * In that case, don't bother with the calldir and just read the data.
1058 * It will be rejected in svc_process.
1059 */
1060 if (len >= 8) {
1061 len = svc_process_calldir(svsk, rqstp, &req, vec);
1062 if (len < 0)
1063 goto err_again;
1064 vlen -= 8;
1065 }
1066
925 pnum = 1; 1067 pnum = 1;
926 while (vlen < len) { 1068 while (vlen < len) {
927 vec[pnum].iov_base = page_address(rqstp->rq_pages[pnum]); 1069 vec[pnum].iov_base = (req) ?
1070 page_address(req->rq_private_buf.pages[pnum - 1]) :
1071 page_address(rqstp->rq_pages[pnum]);
928 vec[pnum].iov_len = PAGE_SIZE; 1072 vec[pnum].iov_len = PAGE_SIZE;
929 pnum++; 1073 pnum++;
930 vlen += PAGE_SIZE; 1074 vlen += PAGE_SIZE;
@@ -934,8 +1078,18 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
934 /* Now receive data */ 1078 /* Now receive data */
935 len = svc_recvfrom(rqstp, vec, pnum, len); 1079 len = svc_recvfrom(rqstp, vec, pnum, len);
936 if (len < 0) 1080 if (len < 0)
937 goto error; 1081 goto err_again;
938 1082
1083 /*
1084 * Account for the 8 bytes we read earlier
1085 */
1086 len += 8;
1087
1088 if (req) {
1089 xprt_complete_rqst(req->rq_task, len);
1090 len = 0;
1091 goto out;
1092 }
939 dprintk("svc: TCP complete record (%d bytes)\n", len); 1093 dprintk("svc: TCP complete record (%d bytes)\n", len);
940 rqstp->rq_arg.len = len; 1094 rqstp->rq_arg.len = len;
941 rqstp->rq_arg.page_base = 0; 1095 rqstp->rq_arg.page_base = 0;
@@ -949,6 +1103,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
949 rqstp->rq_xprt_ctxt = NULL; 1103 rqstp->rq_xprt_ctxt = NULL;
950 rqstp->rq_prot = IPPROTO_TCP; 1104 rqstp->rq_prot = IPPROTO_TCP;
951 1105
1106out:
952 /* Reset TCP read info */ 1107 /* Reset TCP read info */
953 svsk->sk_reclen = 0; 1108 svsk->sk_reclen = 0;
954 svsk->sk_tcplen = 0; 1109 svsk->sk_tcplen = 0;
@@ -960,21 +1115,19 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
960 1115
961 return len; 1116 return len;
962 1117
963 err_delete: 1118err_again:
964 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
965 return -EAGAIN;
966
967 error:
968 if (len == -EAGAIN) { 1119 if (len == -EAGAIN) {
969 dprintk("RPC: TCP recvfrom got EAGAIN\n"); 1120 dprintk("RPC: TCP recvfrom got EAGAIN\n");
970 svc_xprt_received(&svsk->sk_xprt); 1121 svc_xprt_received(&svsk->sk_xprt);
971 } else { 1122 return len;
1123 }
1124error:
1125 if (len != -EAGAIN) {
972 printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", 1126 printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
973 svsk->sk_xprt.xpt_server->sv_name, -len); 1127 svsk->sk_xprt.xpt_server->sv_name, -len);
974 goto err_delete; 1128 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
975 } 1129 }
976 1130 return -EAGAIN;
977 return len;
978} 1131}
979 1132
980/* 1133/*
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index f412a852bc7..fd46d42afa8 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -832,6 +832,11 @@ static void xprt_timer(struct rpc_task *task)
832 spin_unlock_bh(&xprt->transport_lock); 832 spin_unlock_bh(&xprt->transport_lock);
833} 833}
834 834
835static inline int xprt_has_timer(struct rpc_xprt *xprt)
836{
837 return xprt->idle_timeout != 0;
838}
839
835/** 840/**
836 * xprt_prepare_transmit - reserve the transport before sending a request 841 * xprt_prepare_transmit - reserve the transport before sending a request
837 * @task: RPC task about to send a request 842 * @task: RPC task about to send a request
@@ -1013,7 +1018,7 @@ void xprt_release(struct rpc_task *task)
1013 if (!list_empty(&req->rq_list)) 1018 if (!list_empty(&req->rq_list))
1014 list_del(&req->rq_list); 1019 list_del(&req->rq_list);
1015 xprt->last_used = jiffies; 1020 xprt->last_used = jiffies;
1016 if (list_empty(&xprt->recv)) 1021 if (list_empty(&xprt->recv) && xprt_has_timer(xprt))
1017 mod_timer(&xprt->timer, 1022 mod_timer(&xprt->timer,
1018 xprt->last_used + xprt->idle_timeout); 1023 xprt->last_used + xprt->idle_timeout);
1019 spin_unlock_bh(&xprt->transport_lock); 1024 spin_unlock_bh(&xprt->transport_lock);
@@ -1082,8 +1087,11 @@ found:
1082#endif /* CONFIG_NFS_V4_1 */ 1087#endif /* CONFIG_NFS_V4_1 */
1083 1088
1084 INIT_WORK(&xprt->task_cleanup, xprt_autoclose); 1089 INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
1085 setup_timer(&xprt->timer, xprt_init_autodisconnect, 1090 if (xprt_has_timer(xprt))
1086 (unsigned long)xprt); 1091 setup_timer(&xprt->timer, xprt_init_autodisconnect,
1092 (unsigned long)xprt);
1093 else
1094 init_timer(&xprt->timer);
1087 xprt->last_used = jiffies; 1095 xprt->last_used = jiffies;
1088 xprt->cwnd = RPC_INITCWND; 1096 xprt->cwnd = RPC_INITCWND;
1089 xprt->bind_index = 0; 1097 xprt->bind_index = 0;
@@ -1102,7 +1110,6 @@ found:
1102 1110
1103 dprintk("RPC: created transport %p with %u slots\n", xprt, 1111 dprintk("RPC: created transport %p with %u slots\n", xprt,
1104 xprt->max_reqs); 1112 xprt->max_reqs);
1105
1106 return xprt; 1113 return xprt;
1107} 1114}
1108 1115
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 5151f9f6c57..0cf5e8c27a1 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -730,12 +730,12 @@ static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
730 goto err; 730 goto err;
731 731
732 mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES); 732 mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES);
733 if (!mr) 733 if (IS_ERR(mr))
734 goto err_free_frmr; 734 goto err_free_frmr;
735 735
736 pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device, 736 pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device,
737 RPCSVC_MAXPAGES); 737 RPCSVC_MAXPAGES);
738 if (!pl) 738 if (IS_ERR(pl))
739 goto err_free_mr; 739 goto err_free_mr;
740 740
741 frmr->mr = mr; 741 frmr->mr = mr;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 62438f3a914..bee41546575 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -32,6 +32,7 @@
32#include <linux/tcp.h> 32#include <linux/tcp.h>
33#include <linux/sunrpc/clnt.h> 33#include <linux/sunrpc/clnt.h>
34#include <linux/sunrpc/sched.h> 34#include <linux/sunrpc/sched.h>
35#include <linux/sunrpc/svcsock.h>
35#include <linux/sunrpc/xprtsock.h> 36#include <linux/sunrpc/xprtsock.h>
36#include <linux/file.h> 37#include <linux/file.h>
37#ifdef CONFIG_NFS_V4_1 38#ifdef CONFIG_NFS_V4_1
@@ -43,6 +44,7 @@
43#include <net/udp.h> 44#include <net/udp.h>
44#include <net/tcp.h> 45#include <net/tcp.h>
45 46
47#include "sunrpc.h"
46/* 48/*
47 * xprtsock tunables 49 * xprtsock tunables
48 */ 50 */
@@ -2098,6 +2100,134 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
2098 xprt->stat.bklog_u); 2100 xprt->stat.bklog_u);
2099} 2101}
2100 2102
2103/*
2104 * Allocate a bunch of pages for a scratch buffer for the rpc code. The reason
2105 * we allocate pages instead doing a kmalloc like rpc_malloc is because we want
2106 * to use the server side send routines.
2107 */
2108void *bc_malloc(struct rpc_task *task, size_t size)
2109{
2110 struct page *page;
2111 struct rpc_buffer *buf;
2112
2113 BUG_ON(size > PAGE_SIZE - sizeof(struct rpc_buffer));
2114 page = alloc_page(GFP_KERNEL);
2115
2116 if (!page)
2117 return NULL;
2118
2119 buf = page_address(page);
2120 buf->len = PAGE_SIZE;
2121
2122 return buf->data;
2123}
2124
2125/*
2126 * Free the space allocated in the bc_alloc routine
2127 */
2128void bc_free(void *buffer)
2129{
2130 struct rpc_buffer *buf;
2131
2132 if (!buffer)
2133 return;
2134
2135 buf = container_of(buffer, struct rpc_buffer, data);
2136 free_page((unsigned long)buf);
2137}
2138
2139/*
2140 * Use the svc_sock to send the callback. Must be called with svsk->sk_mutex
2141 * held. Borrows heavily from svc_tcp_sendto and xs_tcp_send_request.
2142 */
2143static int bc_sendto(struct rpc_rqst *req)
2144{
2145 int len;
2146 struct xdr_buf *xbufp = &req->rq_snd_buf;
2147 struct rpc_xprt *xprt = req->rq_xprt;
2148 struct sock_xprt *transport =
2149 container_of(xprt, struct sock_xprt, xprt);
2150 struct socket *sock = transport->sock;
2151 unsigned long headoff;
2152 unsigned long tailoff;
2153
2154 /*
2155 * Set up the rpc header and record marker stuff
2156 */
2157 xs_encode_tcp_record_marker(xbufp);
2158
2159 tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK;
2160 headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK;
2161 len = svc_send_common(sock, xbufp,
2162 virt_to_page(xbufp->head[0].iov_base), headoff,
2163 xbufp->tail[0].iov_base, tailoff);
2164
2165 if (len != xbufp->len) {
2166 printk(KERN_NOTICE "Error sending entire callback!\n");
2167 len = -EAGAIN;
2168 }
2169
2170 return len;
2171}
2172
2173/*
2174 * The send routine. Borrows from svc_send
2175 */
2176static int bc_send_request(struct rpc_task *task)
2177{
2178 struct rpc_rqst *req = task->tk_rqstp;
2179 struct svc_xprt *xprt;
2180 struct svc_sock *svsk;
2181 u32 len;
2182
2183 dprintk("sending request with xid: %08x\n", ntohl(req->rq_xid));
2184 /*
2185 * Get the server socket associated with this callback xprt
2186 */
2187 xprt = req->rq_xprt->bc_xprt;
2188 svsk = container_of(xprt, struct svc_sock, sk_xprt);
2189
2190 /*
2191 * Grab the mutex to serialize data as the connection is shared
2192 * with the fore channel
2193 */
2194 if (!mutex_trylock(&xprt->xpt_mutex)) {
2195 rpc_sleep_on(&xprt->xpt_bc_pending, task, NULL);
2196 if (!mutex_trylock(&xprt->xpt_mutex))
2197 return -EAGAIN;
2198 rpc_wake_up_queued_task(&xprt->xpt_bc_pending, task);
2199 }
2200 if (test_bit(XPT_DEAD, &xprt->xpt_flags))
2201 len = -ENOTCONN;
2202 else
2203 len = bc_sendto(req);
2204 mutex_unlock(&xprt->xpt_mutex);
2205
2206 if (len > 0)
2207 len = 0;
2208
2209 return len;
2210}
2211
2212/*
2213 * The close routine. Since this is client initiated, we do nothing
2214 */
2215
2216static void bc_close(struct rpc_xprt *xprt)
2217{
2218 return;
2219}
2220
2221/*
2222 * The xprt destroy routine. Again, because this connection is client
2223 * initiated, we do nothing
2224 */
2225
2226static void bc_destroy(struct rpc_xprt *xprt)
2227{
2228 return;
2229}
2230
2101static struct rpc_xprt_ops xs_udp_ops = { 2231static struct rpc_xprt_ops xs_udp_ops = {
2102 .set_buffer_size = xs_udp_set_buffer_size, 2232 .set_buffer_size = xs_udp_set_buffer_size,
2103 .reserve_xprt = xprt_reserve_xprt_cong, 2233 .reserve_xprt = xprt_reserve_xprt_cong,
@@ -2134,6 +2264,22 @@ static struct rpc_xprt_ops xs_tcp_ops = {
2134 .print_stats = xs_tcp_print_stats, 2264 .print_stats = xs_tcp_print_stats,
2135}; 2265};
2136 2266
2267/*
2268 * The rpc_xprt_ops for the server backchannel
2269 */
2270
2271static struct rpc_xprt_ops bc_tcp_ops = {
2272 .reserve_xprt = xprt_reserve_xprt,
2273 .release_xprt = xprt_release_xprt,
2274 .buf_alloc = bc_malloc,
2275 .buf_free = bc_free,
2276 .send_request = bc_send_request,
2277 .set_retrans_timeout = xprt_set_retrans_timeout_def,
2278 .close = bc_close,
2279 .destroy = bc_destroy,
2280 .print_stats = xs_tcp_print_stats,
2281};
2282
2137static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, 2283static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
2138 unsigned int slot_table_size) 2284 unsigned int slot_table_size)
2139{ 2285{
@@ -2322,11 +2468,93 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2322 return ERR_PTR(-EINVAL); 2468 return ERR_PTR(-EINVAL);
2323} 2469}
2324 2470
2471/**
2472 * xs_setup_bc_tcp - Set up transport to use a TCP backchannel socket
2473 * @args: rpc transport creation arguments
2474 *
2475 */
2476static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
2477{
2478 struct sockaddr *addr = args->dstaddr;
2479 struct rpc_xprt *xprt;
2480 struct sock_xprt *transport;
2481 struct svc_sock *bc_sock;
2482
2483 if (!args->bc_xprt)
2484 ERR_PTR(-EINVAL);
2485
2486 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
2487 if (IS_ERR(xprt))
2488 return xprt;
2489 transport = container_of(xprt, struct sock_xprt, xprt);
2490
2491 xprt->prot = IPPROTO_TCP;
2492 xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
2493 xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
2494 xprt->timeout = &xs_tcp_default_timeout;
2495
2496 /* backchannel */
2497 xprt_set_bound(xprt);
2498 xprt->bind_timeout = 0;
2499 xprt->connect_timeout = 0;
2500 xprt->reestablish_timeout = 0;
2501 xprt->idle_timeout = 0;
2502
2503 /*
2504 * The backchannel uses the same socket connection as the
2505 * forechannel
2506 */
2507 xprt->bc_xprt = args->bc_xprt;
2508 bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt);
2509 bc_sock->sk_bc_xprt = xprt;
2510 transport->sock = bc_sock->sk_sock;
2511 transport->inet = bc_sock->sk_sk;
2512
2513 xprt->ops = &bc_tcp_ops;
2514
2515 switch (addr->sa_family) {
2516 case AF_INET:
2517 xs_format_peer_addresses(xprt, "tcp",
2518 RPCBIND_NETID_TCP);
2519 break;
2520 case AF_INET6:
2521 xs_format_peer_addresses(xprt, "tcp",
2522 RPCBIND_NETID_TCP6);
2523 break;
2524 default:
2525 kfree(xprt);
2526 return ERR_PTR(-EAFNOSUPPORT);
2527 }
2528
2529 if (xprt_bound(xprt))
2530 dprintk("RPC: set up xprt to %s (port %s) via %s\n",
2531 xprt->address_strings[RPC_DISPLAY_ADDR],
2532 xprt->address_strings[RPC_DISPLAY_PORT],
2533 xprt->address_strings[RPC_DISPLAY_PROTO]);
2534 else
2535 dprintk("RPC: set up xprt to %s (autobind) via %s\n",
2536 xprt->address_strings[RPC_DISPLAY_ADDR],
2537 xprt->address_strings[RPC_DISPLAY_PROTO]);
2538
2539 /*
2540 * Since we don't want connections for the backchannel, we set
2541 * the xprt status to connected
2542 */
2543 xprt_set_connected(xprt);
2544
2545
2546 if (try_module_get(THIS_MODULE))
2547 return xprt;
2548 kfree(xprt->slot);
2549 kfree(xprt);
2550 return ERR_PTR(-EINVAL);
2551}
2552
2325static struct xprt_class xs_udp_transport = { 2553static struct xprt_class xs_udp_transport = {
2326 .list = LIST_HEAD_INIT(xs_udp_transport.list), 2554 .list = LIST_HEAD_INIT(xs_udp_transport.list),
2327 .name = "udp", 2555 .name = "udp",
2328 .owner = THIS_MODULE, 2556 .owner = THIS_MODULE,
2329 .ident = IPPROTO_UDP, 2557 .ident = XPRT_TRANSPORT_UDP,
2330 .setup = xs_setup_udp, 2558 .setup = xs_setup_udp,
2331}; 2559};
2332 2560
@@ -2334,10 +2562,18 @@ static struct xprt_class xs_tcp_transport = {
2334 .list = LIST_HEAD_INIT(xs_tcp_transport.list), 2562 .list = LIST_HEAD_INIT(xs_tcp_transport.list),
2335 .name = "tcp", 2563 .name = "tcp",
2336 .owner = THIS_MODULE, 2564 .owner = THIS_MODULE,
2337 .ident = IPPROTO_TCP, 2565 .ident = XPRT_TRANSPORT_TCP,
2338 .setup = xs_setup_tcp, 2566 .setup = xs_setup_tcp,
2339}; 2567};
2340 2568
2569static struct xprt_class xs_bc_tcp_transport = {
2570 .list = LIST_HEAD_INIT(xs_bc_tcp_transport.list),
2571 .name = "tcp NFSv4.1 backchannel",
2572 .owner = THIS_MODULE,
2573 .ident = XPRT_TRANSPORT_BC_TCP,
2574 .setup = xs_setup_bc_tcp,
2575};
2576
2341/** 2577/**
2342 * init_socket_xprt - set up xprtsock's sysctls, register with RPC client 2578 * init_socket_xprt - set up xprtsock's sysctls, register with RPC client
2343 * 2579 *
@@ -2351,6 +2587,7 @@ int init_socket_xprt(void)
2351 2587
2352 xprt_register_transport(&xs_udp_transport); 2588 xprt_register_transport(&xs_udp_transport);
2353 xprt_register_transport(&xs_tcp_transport); 2589 xprt_register_transport(&xs_tcp_transport);
2590 xprt_register_transport(&xs_bc_tcp_transport);
2354 2591
2355 return 0; 2592 return 0;
2356} 2593}
@@ -2370,6 +2607,7 @@ void cleanup_socket_xprt(void)
2370 2607
2371 xprt_unregister_transport(&xs_udp_transport); 2608 xprt_unregister_transport(&xs_udp_transport);
2372 xprt_unregister_transport(&xs_tcp_transport); 2609 xprt_unregister_transport(&xs_tcp_transport);
2610 xprt_unregister_transport(&xs_bc_tcp_transport);
2373} 2611}
2374 2612
2375static int param_set_uint_minmax(const char *val, struct kernel_param *kp, 2613static int param_set_uint_minmax(const char *val, struct kernel_param *kp,
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 429dd06a4ec..561a45cf2a6 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -834,7 +834,7 @@ int cfg80211_wext_siwtxpower(struct net_device *dev,
834 return 0; 834 return 0;
835 } 835 }
836 836
837 return rdev->ops->set_tx_power(wdev->wiphy, type, dbm);; 837 return rdev->ops->set_tx_power(wdev->wiphy, type, dbm);
838} 838}
839EXPORT_SYMBOL_GPL(cfg80211_wext_siwtxpower); 839EXPORT_SYMBOL_GPL(cfg80211_wext_siwtxpower);
840 840
diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c
index 72c15205bb2..8ab44861168 100644
--- a/scripts/basic/fixdep.c
+++ b/scripts/basic/fixdep.c
@@ -16,8 +16,7 @@
16 * tells make when to remake a file. 16 * tells make when to remake a file.
17 * 17 *
18 * To use this list as-is however has the drawback that virtually 18 * To use this list as-is however has the drawback that virtually
19 * every file in the kernel includes <linux/config.h> which then again 19 * every file in the kernel includes <linux/autoconf.h>.
20 * includes <linux/autoconf.h>
21 * 20 *
22 * If the user re-runs make *config, linux/autoconf.h will be 21 * If the user re-runs make *config, linux/autoconf.h will be
23 * regenerated. make notices that and will rebuild every file which 22 * regenerated. make notices that and will rebuild every file which
@@ -126,7 +125,6 @@ char *depfile;
126char *cmdline; 125char *cmdline;
127 126
128void usage(void) 127void usage(void)
129
130{ 128{
131 fprintf(stderr, "Usage: fixdep <depfile> <target> <cmdline>\n"); 129 fprintf(stderr, "Usage: fixdep <depfile> <target> <cmdline>\n");
132 exit(1); 130 exit(1);
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 2d5ece798c4..87bbb8bce9b 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -10,7 +10,7 @@ use strict;
10my $P = $0; 10my $P = $0;
11$P =~ s@.*/@@g; 11$P =~ s@.*/@@g;
12 12
13my $V = '0.28'; 13my $V = '0.29';
14 14
15use Getopt::Long qw(:config no_auto_abbrev); 15use Getopt::Long qw(:config no_auto_abbrev);
16 16
@@ -28,6 +28,41 @@ my $mailback = 0;
28my $summary_file = 0; 28my $summary_file = 0;
29my $root; 29my $root;
30my %debug; 30my %debug;
31my $help = 0;
32
33sub help {
34 my ($exitcode) = @_;
35
36 print << "EOM";
37Usage: $P [OPTION]... [FILE]...
38Version: $V
39
40Options:
41 -q, --quiet quiet
42 --no-tree run without a kernel tree
43 --no-signoff do not check for 'Signed-off-by' line
44 --patch treat FILE as patchfile (default)
45 --emacs emacs compile window format
46 --terse one line per report
47 -f, --file treat FILE as regular source file
48 --subjective, --strict enable more subjective tests
49 --root=PATH PATH to the kernel tree root
50 --no-summary suppress the per-file summary
51 --mailback only produce a report in case of warnings/errors
52 --summary-file include the filename in summary
53 --debug KEY=[0|1] turn on/off debugging of KEY, where KEY is one of
54 'values', 'possible', 'type', and 'attr' (default
55 is all off)
56 --test-only=WORD report only warnings/errors containing WORD
57 literally
58 -h, --help, --version display this help and exit
59
60When FILE is - read standard input.
61EOM
62
63 exit($exitcode);
64}
65
31GetOptions( 66GetOptions(
32 'q|quiet+' => \$quiet, 67 'q|quiet+' => \$quiet,
33 'tree!' => \$tree, 68 'tree!' => \$tree,
@@ -35,7 +70,7 @@ GetOptions(
35 'patch!' => \$chk_patch, 70 'patch!' => \$chk_patch,
36 'emacs!' => \$emacs, 71 'emacs!' => \$emacs,
37 'terse!' => \$terse, 72 'terse!' => \$terse,
38 'file!' => \$file, 73 'f|file!' => \$file,
39 'subjective!' => \$check, 74 'subjective!' => \$check,
40 'strict!' => \$check, 75 'strict!' => \$check,
41 'root=s' => \$root, 76 'root=s' => \$root,
@@ -45,22 +80,16 @@ GetOptions(
45 80
46 'debug=s' => \%debug, 81 'debug=s' => \%debug,
47 'test-only=s' => \$tst_only, 82 'test-only=s' => \$tst_only,
48) or exit; 83 'h|help' => \$help,
84 'version' => \$help
85) or help(1);
86
87help(0) if ($help);
49 88
50my $exit = 0; 89my $exit = 0;
51 90
52if ($#ARGV < 0) { 91if ($#ARGV < 0) {
53 print "usage: $P [options] patchfile\n"; 92 print "$P: no input files\n";
54 print "version: $V\n";
55 print "options: -q => quiet\n";
56 print " --no-tree => run without a kernel tree\n";
57 print " --terse => one line per report\n";
58 print " --emacs => emacs compile window format\n";
59 print " --file => check a source file\n";
60 print " --strict => enable more subjective tests\n";
61 print " --root => path to the kernel tree root\n";
62 print " --no-summary => suppress the per-file summary\n";
63 print " --summary-file => include the filename in summary\n";
64 exit(1); 93 exit(1);
65} 94}
66 95
@@ -153,7 +182,7 @@ our $UTF8 = qr {
153}x; 182}x;
154 183
155our $typeTypedefs = qr{(?x: 184our $typeTypedefs = qr{(?x:
156 (?:__)?(?:u|s|be|le)(?:\d|\d\d)| 185 (?:__)?(?:u|s|be|le)(?:8|16|32|64)|
157 atomic_t 186 atomic_t
158)}; 187)};
159 188
@@ -356,6 +385,13 @@ sub sanitise_line {
356 $off++; 385 $off++;
357 next; 386 next;
358 } 387 }
388 if ($sanitise_quote eq '' && substr($line, $off, 2) eq '//') {
389 $sanitise_quote = '//';
390
391 substr($res, $off, 2, $sanitise_quote);
392 $off++;
393 next;
394 }
359 395
360 # A \ in a string means ignore the next character. 396 # A \ in a string means ignore the next character.
361 if (($sanitise_quote eq "'" || $sanitise_quote eq '"') && 397 if (($sanitise_quote eq "'" || $sanitise_quote eq '"') &&
@@ -379,6 +415,8 @@ sub sanitise_line {
379 #print "c<$c> SQ<$sanitise_quote>\n"; 415 #print "c<$c> SQ<$sanitise_quote>\n";
380 if ($off != 0 && $sanitise_quote eq '*/' && $c ne "\t") { 416 if ($off != 0 && $sanitise_quote eq '*/' && $c ne "\t") {
381 substr($res, $off, 1, $;); 417 substr($res, $off, 1, $;);
418 } elsif ($off != 0 && $sanitise_quote eq '//' && $c ne "\t") {
419 substr($res, $off, 1, $;);
382 } elsif ($off != 0 && $sanitise_quote && $c ne "\t") { 420 } elsif ($off != 0 && $sanitise_quote && $c ne "\t") {
383 substr($res, $off, 1, 'X'); 421 substr($res, $off, 1, 'X');
384 } else { 422 } else {
@@ -386,6 +424,10 @@ sub sanitise_line {
386 } 424 }
387 } 425 }
388 426
427 if ($sanitise_quote eq '//') {
428 $sanitise_quote = '';
429 }
430
389 # The pathname on a #include may be surrounded by '<' and '>'. 431 # The pathname on a #include may be surrounded by '<' and '>'.
390 if ($res =~ /^.\s*\#\s*include\s+\<(.*)\>/) { 432 if ($res =~ /^.\s*\#\s*include\s+\<(.*)\>/) {
391 my $clean = 'X' x length($1); 433 my $clean = 'X' x length($1);
@@ -1336,6 +1378,18 @@ sub process {
1336 WARN("adding a line without newline at end of file\n" . $herecurr); 1378 WARN("adding a line without newline at end of file\n" . $herecurr);
1337 } 1379 }
1338 1380
1381# Blackfin: use hi/lo macros
1382 if ($realfile =~ m@arch/blackfin/.*\.S$@) {
1383 if ($line =~ /\.[lL][[:space:]]*=.*&[[:space:]]*0x[fF][fF][fF][fF]/) {
1384 my $herevet = "$here\n" . cat_vet($line) . "\n";
1385 ERROR("use the LO() macro, not (... & 0xFFFF)\n" . $herevet);
1386 }
1387 if ($line =~ /\.[hH][[:space:]]*=.*>>[[:space:]]*16/) {
1388 my $herevet = "$here\n" . cat_vet($line) . "\n";
1389 ERROR("use the HI() macro, not (... >> 16)\n" . $herevet);
1390 }
1391 }
1392
1339# check we are in a valid source file C or perl if not then ignore this hunk 1393# check we are in a valid source file C or perl if not then ignore this hunk
1340 next if ($realfile !~ /\.(h|c|pl)$/); 1394 next if ($realfile !~ /\.(h|c|pl)$/);
1341 1395
@@ -1355,6 +1409,16 @@ sub process {
1355 WARN("CVS style keyword markers, these will _not_ be updated\n". $herecurr); 1409 WARN("CVS style keyword markers, these will _not_ be updated\n". $herecurr);
1356 } 1410 }
1357 1411
1412# Blackfin: don't use __builtin_bfin_[cs]sync
1413 if ($line =~ /__builtin_bfin_csync/) {
1414 my $herevet = "$here\n" . cat_vet($line) . "\n";
1415 ERROR("use the CSYNC() macro in asm/blackfin.h\n" . $herevet);
1416 }
1417 if ($line =~ /__builtin_bfin_ssync/) {
1418 my $herevet = "$here\n" . cat_vet($line) . "\n";
1419 ERROR("use the SSYNC() macro in asm/blackfin.h\n" . $herevet);
1420 }
1421
1358# Check for potential 'bare' types 1422# Check for potential 'bare' types
1359 my ($stat, $cond, $line_nr_next, $remain_next, $off_next); 1423 my ($stat, $cond, $line_nr_next, $remain_next, $off_next);
1360 if ($realcnt && $line =~ /.\s*\S/) { 1424 if ($realcnt && $line =~ /.\s*\S/) {
@@ -1372,6 +1436,8 @@ sub process {
1372 # Ignore functions being called 1436 # Ignore functions being called
1373 } elsif ($s =~ /^.\s*$Ident\s*\(/s) { 1437 } elsif ($s =~ /^.\s*$Ident\s*\(/s) {
1374 1438
1439 } elsif ($s =~ /^.\s*else\b/s) {
1440
1375 # declarations always start with types 1441 # declarations always start with types
1376 } elsif ($prev_values eq 'E' && $s =~ /^.\s*(?:$Storage\s+)?(?:$Inline\s+)?(?:const\s+)?((?:\s*$Ident)+?)\b(?:\s+$Sparse)?\s*\**\s*(?:$Ident|\(\*[^\)]*\))(?:\s*$Modifier)?\s*(?:;|=|,|\()/s) { 1442 } elsif ($prev_values eq 'E' && $s =~ /^.\s*(?:$Storage\s+)?(?:$Inline\s+)?(?:const\s+)?((?:\s*$Ident)+?)\b(?:\s+$Sparse)?\s*\**\s*(?:$Ident|\(\*[^\)]*\))(?:\s*$Modifier)?\s*(?:;|=|,|\()/s) {
1377 my $type = $1; 1443 my $type = $1;
@@ -1532,8 +1598,9 @@ sub process {
1532 $s =~ /^\s*#\s*?/ || 1598 $s =~ /^\s*#\s*?/ ||
1533 $s =~ /^\s*$Ident\s*:/) { 1599 $s =~ /^\s*$Ident\s*:/) {
1534 $continuation = ($s =~ /^.*?\\\n/) ? 1 : 0; 1600 $continuation = ($s =~ /^.*?\\\n/) ? 1 : 0;
1535 $s =~ s/^.*?\n//; 1601 if ($s =~ s/^.*?\n//) {
1536 $cond_lines++; 1602 $cond_lines++;
1603 }
1537 } 1604 }
1538 } 1605 }
1539 1606
@@ -1891,7 +1958,7 @@ sub process {
1891 # A unary '*' may be const 1958 # A unary '*' may be const
1892 1959
1893 } elsif ($ctx =~ /.xW/) { 1960 } elsif ($ctx =~ /.xW/) {
1894 ERROR("Aspace prohibited after that '$op' $at\n" . $hereptr); 1961 ERROR("space prohibited after that '$op' $at\n" . $hereptr);
1895 } 1962 }
1896 1963
1897 # unary ++ and unary -- are allowed no space on one side. 1964 # unary ++ and unary -- are allowed no space on one side.
@@ -2243,7 +2310,8 @@ sub process {
2243 DECLARE_PER_CPU| 2310 DECLARE_PER_CPU|
2244 DEFINE_PER_CPU| 2311 DEFINE_PER_CPU|
2245 __typeof__\(| 2312 __typeof__\(|
2246 \.$Ident\s*=\s* 2313 \.$Ident\s*=\s*|
2314 ^\"|\"$
2247 }x; 2315 }x;
2248 #print "REST<$rest> dstat<$dstat>\n"; 2316 #print "REST<$rest> dstat<$dstat>\n";
2249 if ($rest ne '') { 2317 if ($rest ne '') {
diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 278a45bd45a..cdb44b63342 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -13,7 +13,7 @@
13use strict; 13use strict;
14 14
15my $P = $0; 15my $P = $0;
16my $V = '0.17'; 16my $V = '0.20';
17 17
18use Getopt::Long qw(:config no_auto_abbrev); 18use Getopt::Long qw(:config no_auto_abbrev);
19 19
@@ -29,6 +29,8 @@ my $email_git_min_signatures = 1;
29my $email_git_max_maintainers = 5; 29my $email_git_max_maintainers = 5;
30my $email_git_min_percent = 5; 30my $email_git_min_percent = 5;
31my $email_git_since = "1-year-ago"; 31my $email_git_since = "1-year-ago";
32my $email_git_blame = 0;
33my $email_remove_duplicates = 1;
32my $output_multiline = 1; 34my $output_multiline = 1;
33my $output_separator = ", "; 35my $output_separator = ", ";
34my $scm = 0; 36my $scm = 0;
@@ -36,6 +38,7 @@ my $web = 0;
36my $subsystem = 0; 38my $subsystem = 0;
37my $status = 0; 39my $status = 0;
38my $from_filename = 0; 40my $from_filename = 0;
41my $pattern_depth = 0;
39my $version = 0; 42my $version = 0;
40my $help = 0; 43my $help = 0;
41 44
@@ -68,6 +71,8 @@ if (!GetOptions(
68 'git-max-maintainers=i' => \$email_git_max_maintainers, 71 'git-max-maintainers=i' => \$email_git_max_maintainers,
69 'git-min-percent=i' => \$email_git_min_percent, 72 'git-min-percent=i' => \$email_git_min_percent,
70 'git-since=s' => \$email_git_since, 73 'git-since=s' => \$email_git_since,
74 'git-blame!' => \$email_git_blame,
75 'remove-duplicates!' => \$email_remove_duplicates,
71 'm!' => \$email_maintainer, 76 'm!' => \$email_maintainer,
72 'n!' => \$email_usename, 77 'n!' => \$email_usename,
73 'l!' => \$email_list, 78 'l!' => \$email_list,
@@ -78,6 +83,7 @@ if (!GetOptions(
78 'status!' => \$status, 83 'status!' => \$status,
79 'scm!' => \$scm, 84 'scm!' => \$scm,
80 'web!' => \$web, 85 'web!' => \$web,
86 'pattern-depth=i' => \$pattern_depth,
81 'f|file' => \$from_filename, 87 'f|file' => \$from_filename,
82 'v|version' => \$version, 88 'v|version' => \$version,
83 'h|help' => \$help, 89 'h|help' => \$help,
@@ -101,14 +107,19 @@ if ($#ARGV < 0) {
101 die "$P: argument missing: patchfile or -f file please\n"; 107 die "$P: argument missing: patchfile or -f file please\n";
102} 108}
103 109
110if ($output_separator ne ", ") {
111 $output_multiline = 0;
112}
113
104my $selections = $email + $scm + $status + $subsystem + $web; 114my $selections = $email + $scm + $status + $subsystem + $web;
105if ($selections == 0) { 115if ($selections == 0) {
106 usage(); 116 usage();
107 die "$P: Missing required option: email, scm, status, subsystem or web\n"; 117 die "$P: Missing required option: email, scm, status, subsystem or web\n";
108} 118}
109 119
110if ($email && ($email_maintainer + $email_list + $email_subscriber_list 120if ($email &&
111 + $email_git + $email_git_penguin_chiefs) == 0) { 121 ($email_maintainer + $email_list + $email_subscriber_list +
122 $email_git + $email_git_penguin_chiefs + $email_git_blame) == 0) {
112 usage(); 123 usage();
113 die "$P: Please select at least 1 email option\n"; 124 die "$P: Please select at least 1 email option\n";
114} 125}
@@ -147,9 +158,36 @@ while (<MAINT>) {
147} 158}
148close(MAINT); 159close(MAINT);
149 160
161my %mailmap;
162
163if ($email_remove_duplicates) {
164 open(MAILMAP, "<${lk_path}.mailmap") || warn "$P: Can't open .mailmap\n";
165 while (<MAILMAP>) {
166 my $line = $_;
167
168 next if ($line =~ m/^\s*#/);
169 next if ($line =~ m/^\s*$/);
170
171 my ($name, $address) = parse_email($line);
172 $line = format_email($name, $address);
173
174 next if ($line =~ m/^\s*$/);
175
176 if (exists($mailmap{$name})) {
177 my $obj = $mailmap{$name};
178 push(@$obj, $address);
179 } else {
180 my @arr = ($address);
181 $mailmap{$name} = \@arr;
182 }
183 }
184 close(MAILMAP);
185}
186
150## use the filenames on the command line or find the filenames in the patchfiles 187## use the filenames on the command line or find the filenames in the patchfiles
151 188
152my @files = (); 189my @files = ();
190my @range = ();
153 191
154foreach my $file (@ARGV) { 192foreach my $file (@ARGV) {
155 ##if $file is a directory and it lacks a trailing slash, add one 193 ##if $file is a directory and it lacks a trailing slash, add one
@@ -162,13 +200,19 @@ foreach my $file (@ARGV) {
162 push(@files, $file); 200 push(@files, $file);
163 } else { 201 } else {
164 my $file_cnt = @files; 202 my $file_cnt = @files;
203 my $lastfile;
165 open(PATCH, "<$file") or die "$P: Can't open ${file}\n"; 204 open(PATCH, "<$file") or die "$P: Can't open ${file}\n";
166 while (<PATCH>) { 205 while (<PATCH>) {
167 if (m/^\+\+\+\s+(\S+)/) { 206 if (m/^\+\+\+\s+(\S+)/) {
168 my $filename = $1; 207 my $filename = $1;
169 $filename =~ s@^[^/]*/@@; 208 $filename =~ s@^[^/]*/@@;
170 $filename =~ s@\n@@; 209 $filename =~ s@\n@@;
210 $lastfile = $filename;
171 push(@files, $filename); 211 push(@files, $filename);
212 } elsif (m/^\@\@ -(\d+),(\d+)/) {
213 if ($email_git_blame) {
214 push(@range, "$lastfile:$1:$2");
215 }
172 } 216 }
173 } 217 }
174 close(PATCH); 218 close(PATCH);
@@ -201,6 +245,7 @@ foreach my $file (@files) {
201 if ($type eq 'X') { 245 if ($type eq 'X') {
202 if (file_match_pattern($file, $value)) { 246 if (file_match_pattern($file, $value)) {
203 $exclude = 1; 247 $exclude = 1;
248 last;
204 } 249 }
205 } 250 }
206 } 251 }
@@ -208,35 +253,45 @@ foreach my $file (@files) {
208 253
209 if (!$exclude) { 254 if (!$exclude) {
210 my $tvi = 0; 255 my $tvi = 0;
256 my %hash;
211 foreach my $line (@typevalue) { 257 foreach my $line (@typevalue) {
212 if ($line =~ m/^(\C):\s*(.*)/) { 258 if ($line =~ m/^(\C):\s*(.*)/) {
213 my $type = $1; 259 my $type = $1;
214 my $value = $2; 260 my $value = $2;
215 if ($type eq 'F') { 261 if ($type eq 'F') {
216 if (file_match_pattern($file, $value)) { 262 if (file_match_pattern($file, $value)) {
217 add_categories($tvi); 263 my $value_pd = ($value =~ tr@/@@);
264 my $file_pd = ($file =~ tr@/@@);
265 $value_pd++ if (substr($value,-1,1) ne "/");
266 if ($pattern_depth == 0 ||
267 (($file_pd - $value_pd) < $pattern_depth)) {
268 $hash{$tvi} = $value_pd;
269 }
218 } 270 }
219 } 271 }
220 } 272 }
221 $tvi++; 273 $tvi++;
222 } 274 }
275 foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
276 add_categories($line);
277 }
223 } 278 }
224 279
225 if ($email && $email_git) { 280 if ($email && $email_git) {
226 recent_git_signoffs($file); 281 recent_git_signoffs($file);
227 } 282 }
228 283
284 if ($email && $email_git_blame) {
285 git_assign_blame($file);
286 }
229} 287}
230 288
231if ($email) { 289if ($email) {
232 foreach my $chief (@penguin_chief) { 290 foreach my $chief (@penguin_chief) {
233 if ($chief =~ m/^(.*):(.*)/) { 291 if ($chief =~ m/^(.*):(.*)/) {
234 my $email_address; 292 my $email_address;
235 if ($email_usename) { 293
236 $email_address = format_email($1, $2); 294 $email_address = format_email($1, $2);
237 } else {
238 $email_address = $2;
239 }
240 if ($email_git_penguin_chiefs) { 295 if ($email_git_penguin_chiefs) {
241 push(@email_to, $email_address); 296 push(@email_to, $email_address);
242 } else { 297 } else {
@@ -258,22 +313,22 @@ if ($email || $email_list) {
258} 313}
259 314
260if ($scm) { 315if ($scm) {
261 @scm = sort_and_uniq(@scm); 316 @scm = uniq(@scm);
262 output(@scm); 317 output(@scm);
263} 318}
264 319
265if ($status) { 320if ($status) {
266 @status = sort_and_uniq(@status); 321 @status = uniq(@status);
267 output(@status); 322 output(@status);
268} 323}
269 324
270if ($subsystem) { 325if ($subsystem) {
271 @subsystem = sort_and_uniq(@subsystem); 326 @subsystem = uniq(@subsystem);
272 output(@subsystem); 327 output(@subsystem);
273} 328}
274 329
275if ($web) { 330if ($web) {
276 @web = sort_and_uniq(@web); 331 @web = uniq(@web);
277 output(@web); 332 output(@web);
278} 333}
279 334
@@ -311,10 +366,12 @@ MAINTAINER field selection options:
311 --git-max-maintainers => maximum maintainers to add (default: 5) 366 --git-max-maintainers => maximum maintainers to add (default: 5)
312 --git-min-percent => minimum percentage of commits required (default: 5) 367 --git-min-percent => minimum percentage of commits required (default: 5)
313 --git-since => git history to use (default: 1-year-ago) 368 --git-since => git history to use (default: 1-year-ago)
369 --git-blame => use git blame to find modified commits for patch or file
314 --m => include maintainer(s) if any 370 --m => include maintainer(s) if any
315 --n => include name 'Full Name <addr\@domain.tld>' 371 --n => include name 'Full Name <addr\@domain.tld>'
316 --l => include list(s) if any 372 --l => include list(s) if any
317 --s => include subscriber only list(s) if any 373 --s => include subscriber only list(s) if any
374 --remove-duplicates => minimize duplicate email names/addresses
318 --scm => print SCM tree(s) if any 375 --scm => print SCM tree(s) if any
319 --status => print status if any 376 --status => print status if any
320 --subsystem => print subsystem name if any 377 --subsystem => print subsystem name if any
@@ -322,24 +379,28 @@ MAINTAINER field selection options:
322 379
323Output type options: 380Output type options:
324 --separator [, ] => separator for multiple entries on 1 line 381 --separator [, ] => separator for multiple entries on 1 line
382 using --separator also sets --nomultiline if --separator is not [, ]
325 --multiline => print 1 entry per line 383 --multiline => print 1 entry per line
326 384
327Default options:
328 [--email --git --m --n --l --multiline]
329
330Other options: 385Other options:
386 --pattern-depth => Number of pattern directory traversals (default: 0 (all))
331 --version => show version 387 --version => show version
332 --help => show this help information 388 --help => show this help information
333 389
390Default options:
391 [--email --git --m --n --l --multiline --pattern-depth=0 --remove-duplicates]
392
334Notes: 393Notes:
335 Using "-f directory" may give unexpected results: 394 Using "-f directory" may give unexpected results:
336 395 Used with "--git", git signators for _all_ files in and below
337 Used with "--git", git signators for _all_ files in and below 396 directory are examined as git recurses directories.
338 directory are examined as git recurses directories. 397 Any specified X: (exclude) pattern matches are _not_ ignored.
339 Any specified X: (exclude) pattern matches are _not_ ignored. 398 Used with "--nogit", directory is used as a pattern match,
340 Used with "--nogit", directory is used as a pattern match, 399 no individual file within the directory or subdirectory
341 no individual file within the directory or subdirectory 400 is matched.
342 is matched. 401 Used with "--git-blame", does not iterate all files in directory
402 Using "--git-blame" is slow and may add old committers and authors
403 that are no longer active maintainers to the output.
343EOT 404EOT
344} 405}
345 406
@@ -370,30 +431,100 @@ sub top_of_kernel_tree {
370 return 0; 431 return 0;
371} 432}
372 433
373sub format_email { 434sub parse_email {
374 my ($name, $email) = @_; 435 my ($formatted_email) = @_;
436
437 my $name = "";
438 my $address = "";
439
440 if ($formatted_email =~ /^([^<]+)<(.+\@.*)>.*$/) {
441 $name = $1;
442 $address = $2;
443 } elsif ($formatted_email =~ /^\s*<(.+\@\S*)>.*$/) {
444 $address = $1;
445 } elsif ($formatted_email =~ /^(.+\@\S*).*$/) {
446 $address = $1;
447 }
375 448
376 $name =~ s/^\s+|\s+$//g; 449 $name =~ s/^\s+|\s+$//g;
377 $name =~ s/^\"|\"$//g; 450 $name =~ s/^\"|\"$//g;
378 $email =~ s/^\s+|\s+$//g; 451 $address =~ s/^\s+|\s+$//g;
452
453 if ($name =~ /[^a-z0-9 \.\-]/i) { ##has "must quote" chars
454 $name =~ s/(?<!\\)"/\\"/g; ##escape quotes
455 $name = "\"$name\"";
456 }
457
458 return ($name, $address);
459}
460
461sub format_email {
462 my ($name, $address) = @_;
379 463
380 my $formatted_email = ""; 464 my $formatted_email;
465
466 $name =~ s/^\s+|\s+$//g;
467 $name =~ s/^\"|\"$//g;
468 $address =~ s/^\s+|\s+$//g;
381 469
382 if ($name =~ /[^a-z0-9 \.\-]/i) { ##has "must quote" chars 470 if ($name =~ /[^a-z0-9 \.\-]/i) { ##has "must quote" chars
383 $name =~ s/(?<!\\)"/\\"/g; ##escape quotes 471 $name =~ s/(?<!\\)"/\\"/g; ##escape quotes
384 $formatted_email = "\"${name}\"\ \<${email}\>"; 472 $name = "\"$name\"";
473 }
474
475 if ($email_usename) {
476 if ("$name" eq "") {
477 $formatted_email = "$address";
478 } else {
479 $formatted_email = "$name <${address}>";
480 }
385 } else { 481 } else {
386 $formatted_email = "${name} \<${email}\>"; 482 $formatted_email = $address;
387 } 483 }
484
388 return $formatted_email; 485 return $formatted_email;
389} 486}
390 487
391sub add_categories { 488sub find_starting_index {
489
392 my ($index) = @_; 490 my ($index) = @_;
393 491
394 $index = $index - 1; 492 while ($index > 0) {
395 while ($index >= 0) {
396 my $tv = $typevalue[$index]; 493 my $tv = $typevalue[$index];
494 if (!($tv =~ m/^(\C):\s*(.*)/)) {
495 last;
496 }
497 $index--;
498 }
499
500 return $index;
501}
502
503sub find_ending_index {
504 my ($index) = @_;
505
506 while ($index < @typevalue) {
507 my $tv = $typevalue[$index];
508 if (!($tv =~ m/^(\C):\s*(.*)/)) {
509 last;
510 }
511 $index++;
512 }
513
514 return $index;
515}
516
517sub add_categories {
518 my ($index) = @_;
519
520 my $i;
521 my $start = find_starting_index($index);
522 my $end = find_ending_index($index);
523
524 push(@subsystem, $typevalue[$start]);
525
526 for ($i = $start + 1; $i < $end; $i++) {
527 my $tv = $typevalue[$i];
397 if ($tv =~ m/^(\C):\s*(.*)/) { 528 if ($tv =~ m/^(\C):\s*(.*)/) {
398 my $ptype = $1; 529 my $ptype = $1;
399 my $pvalue = $2; 530 my $pvalue = $2;
@@ -414,19 +545,19 @@ sub add_categories {
414 } 545 }
415 } 546 }
416 } elsif ($ptype eq "M") { 547 } elsif ($ptype eq "M") {
417 my $p_used = 0; 548 my ($name, $address) = parse_email($pvalue);
418 if ($index >= 0) { 549 if ($name eq "") {
419 my $tv = $typevalue[$index - 1]; 550 if ($i > 0) {
420 if ($tv =~ m/^(\C):\s*(.*)/) { 551 my $tv = $typevalue[$i - 1];
421 if ($1 eq "P") { 552 if ($tv =~ m/^(\C):\s*(.*)/) {
422 if ($email_usename) { 553 if ($1 eq "P") {
423 push_email_address(format_email($2, $pvalue)); 554 $name = $2;
424 $p_used = 1; 555 $pvalue = format_email($name, $address);
425 } 556 }
426 } 557 }
427 } 558 }
428 } 559 }
429 if (!$p_used) { 560 if ($email_maintainer) {
430 push_email_addresses($pvalue); 561 push_email_addresses($pvalue);
431 } 562 }
432 } elsif ($ptype eq "T") { 563 } elsif ($ptype eq "T") {
@@ -436,31 +567,41 @@ sub add_categories {
436 } elsif ($ptype eq "S") { 567 } elsif ($ptype eq "S") {
437 push(@status, $pvalue); 568 push(@status, $pvalue);
438 } 569 }
439
440 $index--;
441 } else {
442 push(@subsystem,$tv);
443 $index = -1;
444 } 570 }
445 } 571 }
446} 572}
447 573
574my %email_hash_name;
575my %email_hash_address;
576
577sub email_inuse {
578 my ($name, $address) = @_;
579
580 return 1 if (($name eq "") && ($address eq ""));
581 return 1 if (($name ne "") && exists($email_hash_name{$name}));
582 return 1 if (($address ne "") && exists($email_hash_address{$address}));
583
584 return 0;
585}
586
448sub push_email_address { 587sub push_email_address {
449 my ($email_address) = @_; 588 my ($line) = @_;
589
590 my ($name, $address) = parse_email($line);
450 591
451 my $email_name = ""; 592 if ($address eq "") {
452 if ($email_address =~ m/([^<]+)<(.*\@.*)>$/) { 593 return 0;
453 $email_name = $1;
454 $email_address = $2;
455 } 594 }
456 595
457 if ($email_maintainer) { 596 if (!$email_remove_duplicates) {
458 if ($email_usename && $email_name) { 597 push(@email_to, format_email($name, $address));
459 push(@email_to, format_email($email_name, $email_address)); 598 } elsif (!email_inuse($name, $address)) {
460 } else { 599 push(@email_to, format_email($name, $address));
461 push(@email_to, $email_address); 600 $email_hash_name{$name}++;
462 } 601 $email_hash_address{$address}++;
463 } 602 }
603
604 return 1;
464} 605}
465 606
466sub push_email_addresses { 607sub push_email_addresses {
@@ -476,7 +617,9 @@ sub push_email_addresses {
476 push_email_address($entry); 617 push_email_address($entry);
477 } 618 }
478 } else { 619 } else {
479 warn("Invalid MAINTAINERS address: '" . $address . "'\n"); 620 if (!push_email_address($address)) {
621 warn("Invalid MAINTAINERS address: '" . $address . "'\n");
622 }
480 } 623 }
481} 624}
482 625
@@ -492,6 +635,32 @@ sub which {
492 return ""; 635 return "";
493} 636}
494 637
638sub mailmap {
639 my @lines = @_;
640 my %hash;
641
642 foreach my $line (@lines) {
643 my ($name, $address) = parse_email($line);
644 if (!exists($hash{$name})) {
645 $hash{$name} = $address;
646 } elsif ($address ne $hash{$name}) {
647 $address = $hash{$name};
648 $line = format_email($name, $address);
649 }
650 if (exists($mailmap{$name})) {
651 my $obj = $mailmap{$name};
652 foreach my $map_address (@$obj) {
653 if (($map_address eq $address) &&
654 ($map_address ne $hash{$name})) {
655 $line = format_email($name, $hash{$name});
656 }
657 }
658 }
659 }
660
661 return @lines;
662}
663
495sub recent_git_signoffs { 664sub recent_git_signoffs {
496 my ($file) = @_; 665 my ($file) = @_;
497 666
@@ -500,6 +669,7 @@ sub recent_git_signoffs {
500 my $output = ""; 669 my $output = "";
501 my $count = 0; 670 my $count = 0;
502 my @lines = (); 671 my @lines = ();
672 my %hash;
503 my $total_sign_offs; 673 my $total_sign_offs;
504 674
505 if (which("git") eq "") { 675 if (which("git") eq "") {
@@ -513,52 +683,119 @@ sub recent_git_signoffs {
513 } 683 }
514 684
515 $cmd = "git log --since=${email_git_since} -- ${file}"; 685 $cmd = "git log --since=${email_git_since} -- ${file}";
516 $cmd .= " | grep -Ei \"^[-_ a-z]+by:.*\\\@.*\$\"";
517 if (!$email_git_penguin_chiefs) {
518 $cmd .= " | grep -Ev \"${penguin_chiefs}\"";
519 }
520 $cmd .= " | cut -f2- -d\":\"";
521 $cmd .= " | sort | uniq -c | sort -rn";
522 686
523 $output = `${cmd}`; 687 $output = `${cmd}`;
524 $output =~ s/^\s*//gm; 688 $output =~ s/^\s*//gm;
525 689
526 @lines = split("\n", $output); 690 @lines = split("\n", $output);
527 691
528 $total_sign_offs = 0; 692 @lines = grep(/^[-_ a-z]+by:.*\@.*$/i, @lines);
693 if (!$email_git_penguin_chiefs) {
694 @lines = grep(!/${penguin_chiefs}/i, @lines);
695 }
696 # cut -f2- -d":"
697 s/.*:\s*(.+)\s*/$1/ for (@lines);
698
699 $total_sign_offs = @lines;
700
701 if ($email_remove_duplicates) {
702 @lines = mailmap(@lines);
703 }
704
705 @lines = sort(@lines);
706
707 # uniq -c
708 $hash{$_}++ for @lines;
709
710 # sort -rn
711 foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
712 my $sign_offs = $hash{$line};
713 $count++;
714 last if ($sign_offs < $email_git_min_signatures ||
715 $count > $email_git_max_maintainers ||
716 $sign_offs * 100 / $total_sign_offs < $email_git_min_percent);
717 push_email_address($line);
718 }
719}
720
721sub save_commits {
722 my ($cmd, @commits) = @_;
723 my $output;
724 my @lines = ();
725
726 $output = `${cmd}`;
727
728 @lines = split("\n", $output);
529 foreach my $line (@lines) { 729 foreach my $line (@lines) {
530 if ($line =~ m/([0-9]+)\s+(.*)/) { 730 if ($line =~ m/^(\w+) /) {
531 $total_sign_offs += $1; 731 push (@commits, $1);
532 } else {
533 die("$P: Unexpected git output: ${line}\n");
534 } 732 }
535 } 733 }
734 return @commits;
735}
536 736
537 foreach my $line (@lines) { 737sub git_assign_blame {
538 if ($line =~ m/([0-9]+)\s+(.*)/) { 738 my ($file) = @_;
539 my $sign_offs = $1; 739
540 $line = $2; 740 my @lines = ();
541 $count++; 741 my @commits = ();
542 if ($sign_offs < $email_git_min_signatures || 742 my $cmd;
543 $count > $email_git_max_maintainers || 743 my $output;
544 $sign_offs * 100 / $total_sign_offs < $email_git_min_percent) { 744 my %hash;
545 last; 745 my $total_sign_offs;
546 } 746 my $count;
747
748 if (@range) {
749 foreach my $file_range_diff (@range) {
750 next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
751 my $diff_file = $1;
752 my $diff_start = $2;
753 my $diff_length = $3;
754 next if (!("$file" eq "$diff_file"));
755 $cmd = "git blame -l -L $diff_start,+$diff_length $file";
756 @commits = save_commits($cmd, @commits);
547 } 757 }
548 if ($line =~ m/(.+)<(.+)>/) { 758 } else {
549 my $git_name = $1; 759 if (-f $file) {
550 my $git_addr = $2; 760 $cmd = "git blame -l $file";
551 if ($email_usename) { 761 @commits = save_commits($cmd, @commits);
552 push(@email_to, format_email($git_name, $git_addr)); 762 }
553 } else { 763 }
554 push(@email_to, $git_addr); 764
555 } 765 $total_sign_offs = 0;
556 } elsif ($line =~ m/<(.+)>/) { 766 @commits = uniq(@commits);
557 my $git_addr = $1; 767 foreach my $commit (@commits) {
558 push(@email_to, $git_addr); 768 $cmd = "git log -1 ${commit}";
559 } else { 769
560 push(@email_to, $line); 770 $output = `${cmd}`;
771 $output =~ s/^\s*//gm;
772 @lines = split("\n", $output);
773
774 @lines = grep(/^[-_ a-z]+by:.*\@.*$/i, @lines);
775 if (!$email_git_penguin_chiefs) {
776 @lines = grep(!/${penguin_chiefs}/i, @lines);
777 }
778
779 # cut -f2- -d":"
780 s/.*:\s*(.+)\s*/$1/ for (@lines);
781
782 $total_sign_offs += @lines;
783
784 if ($email_remove_duplicates) {
785 @lines = mailmap(@lines);
561 } 786 }
787
788 $hash{$_}++ for @lines;
789 }
790
791 $count = 0;
792 foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
793 my $sign_offs = $hash{$line};
794 $count++;
795 last if ($sign_offs < $email_git_min_signatures ||
796 $count > $email_git_max_maintainers ||
797 $sign_offs * 100 / $total_sign_offs < $email_git_min_percent);
798 push_email_address($line);
562 } 799 }
563} 800}
564 801
diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c
index aadc5223dcd..ecf9c7dc182 100644
--- a/scripts/mod/sumversion.c
+++ b/scripts/mod/sumversion.c
@@ -334,8 +334,6 @@ static int parse_source_files(const char *objfile, struct md4_ctx *md)
334 deps_drivers/net/dummy.o := \ 334 deps_drivers/net/dummy.o := \
335 drivers/net/dummy.c \ 335 drivers/net/dummy.c \
336 $(wildcard include/config/net/fastroute.h) \ 336 $(wildcard include/config/net/fastroute.h) \
337 include/linux/config.h \
338 $(wildcard include/config/h.h) \
339 include/linux/module.h \ 337 include/linux/module.h \
340 338
341 Sum all files in the same dir or subdirs. 339 Sum all files in the same dir or subdirs.
diff --git a/sound/oss/swarm_cs4297a.c b/sound/oss/swarm_cs4297a.c
index 1edab7b4ea8..3136c88eacd 100644
--- a/sound/oss/swarm_cs4297a.c
+++ b/sound/oss/swarm_cs4297a.c
@@ -110,9 +110,6 @@ static void start_adc(struct cs4297a_state *s);
110// rather than 64k as some of the games work more responsively. 110// rather than 64k as some of the games work more responsively.
111// log base 2( buff sz = 32k). 111// log base 2( buff sz = 32k).
112 112
113//static unsigned long defaultorder = 3;
114//MODULE_PARM(defaultorder, "i");
115
116// 113//
117// Turn on/off debugging compilation by commenting out "#define CSDEBUG" 114// Turn on/off debugging compilation by commenting out "#define CSDEBUG"
118// 115//
diff --git a/sound/oss/sys_timer.c b/sound/oss/sys_timer.c
index 107534477a2..8db6aefe15e 100644
--- a/sound/oss/sys_timer.c
+++ b/sound/oss/sys_timer.c
@@ -100,9 +100,6 @@ def_tmr_open(int dev, int mode)
100 curr_tempo = 60; 100 curr_tempo = 60;
101 curr_timebase = 100; 101 curr_timebase = 100;
102 opened = 1; 102 opened = 1;
103
104 ;
105
106 { 103 {
107 def_tmr.expires = (1) + jiffies; 104 def_tmr.expires = (1) + jiffies;
108 add_timer(&def_tmr); 105 add_timer(&def_tmr);
diff --git a/sound/soc/codecs/wm9081.c b/sound/soc/codecs/wm9081.c
index c64e55aa63b..686e5aa9720 100644
--- a/sound/soc/codecs/wm9081.c
+++ b/sound/soc/codecs/wm9081.c
@@ -1027,7 +1027,7 @@ static int wm9081_hw_params(struct snd_pcm_substream *substream,
1027 - wm9081->fs); 1027 - wm9081->fs);
1028 for (i = 1; i < ARRAY_SIZE(clk_sys_rates); i++) { 1028 for (i = 1; i < ARRAY_SIZE(clk_sys_rates); i++) {
1029 cur_val = abs((wm9081->sysclk_rate / 1029 cur_val = abs((wm9081->sysclk_rate /
1030 clk_sys_rates[i].ratio) - wm9081->fs);; 1030 clk_sys_rates[i].ratio) - wm9081->fs);
1031 if (cur_val < best_val) { 1031 if (cur_val < best_val) {
1032 best = i; 1032 best = i;
1033 best_val = cur_val; 1033 best_val = cur_val;
diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c
index 5b9ed646478..d11a6d7e384 100644
--- a/sound/soc/pxa/pxa-ssp.c
+++ b/sound/soc/pxa/pxa-ssp.c
@@ -351,7 +351,7 @@ static int pxa_ssp_set_dai_pll(struct snd_soc_dai *cpu_dai,
351 do_div(tmp, freq_out); 351 do_div(tmp, freq_out);
352 val = tmp; 352 val = tmp;
353 353
354 val = (val << 16) | 64;; 354 val = (val << 16) | 64;
355 ssp_write_reg(ssp, SSACDD, val); 355 ssp_write_reg(ssp, SSACDD, val);
356 356
357 ssacd |= (0x6 << 4); 357 ssacd |= (0x6 << 4);
diff --git a/sound/soc/s3c24xx/s3c24xx_uda134x.c b/sound/soc/s3c24xx/s3c24xx_uda134x.c
index 8e79a416db5..c215d32d632 100644
--- a/sound/soc/s3c24xx/s3c24xx_uda134x.c
+++ b/sound/soc/s3c24xx/s3c24xx_uda134x.c
@@ -67,7 +67,7 @@ static int s3c24xx_uda134x_startup(struct snd_pcm_substream *substream)
67{ 67{
68 int ret = 0; 68 int ret = 0;
69#ifdef ENFORCE_RATES 69#ifdef ENFORCE_RATES
70 struct snd_pcm_runtime *runtime = substream->runtime;; 70 struct snd_pcm_runtime *runtime = substream->runtime;
71#endif 71#endif
72 72
73 mutex_lock(&clk_lock); 73 mutex_lock(&clk_lock);