aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/DocBook/kernel-api.tmpl2
-rw-r--r--Documentation/accounting/cgroupstats.txt27
-rw-r--r--Documentation/cachetlb.txt27
-rw-r--r--Documentation/cgroups.txt545
-rw-r--r--Documentation/cpu-hotplug.txt4
-rw-r--r--Documentation/cpusets.txt226
-rw-r--r--Documentation/input/input-programming.txt15
-rw-r--r--Documentation/kdump/kdump.txt26
-rw-r--r--Documentation/kernel-parameters.txt7
-rw-r--r--Documentation/markers.txt81
-rw-r--r--Makefile3
-rw-r--r--arch/alpha/Kconfig2
-rw-r--r--arch/alpha/kernel/semaphore.c16
-rw-r--r--arch/alpha/kernel/traps.c6
-rw-r--r--arch/alpha/lib/fls.c2
-rw-r--r--arch/alpha/mm/fault.c4
-rw-r--r--arch/alpha/oprofile/Kconfig23
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm/kernel/process.c2
-rw-r--r--arch/arm/kernel/ptrace.c8
-rw-r--r--arch/arm/kernel/traps.c10
-rw-r--r--arch/arm/mm/alignment.c2
-rw-r--r--arch/arm/mm/fault.c2
-rw-r--r--arch/arm/oprofile/Kconfig42
-rw-r--r--arch/avr32/kernel/traps.c2
-rw-r--r--arch/avr32/mm/fault.c6
-rw-r--r--arch/blackfin/Kconfig2
-rw-r--r--arch/blackfin/oprofile/Kconfig29
-rw-r--r--arch/cris/Kconfig2
-rw-r--r--arch/frv/Kconfig2
-rw-r--r--arch/frv/kernel/irq-mb93091.c2
-rw-r--r--arch/frv/kernel/irq-mb93093.c2
-rw-r--r--arch/frv/kernel/irq-mb93493.c2
-rw-r--r--arch/frv/kernel/irq.c2
-rw-r--r--arch/h8300/Kconfig2
-rw-r--r--arch/i386/Kconfig25
-rw-r--r--arch/ia64/Kconfig15
-rw-r--r--arch/ia64/configs/sn2_defconfig1
-rw-r--r--arch/ia64/ia32/sys_ia32.c6
-rw-r--r--arch/ia64/kernel/efi.c4
-rw-r--r--arch/ia64/kernel/perfmon.c161
-rw-r--r--arch/ia64/kernel/perfmon_default_smpl.c8
-rw-r--r--arch/ia64/kernel/process.c3
-rw-r--r--arch/ia64/kernel/setup.c88
-rw-r--r--arch/ia64/kernel/signal.c4
-rw-r--r--arch/ia64/kernel/traps.c6
-rw-r--r--arch/ia64/kernel/unaligned.c5
-rw-r--r--arch/ia64/mm/fault.c2
-rw-r--r--arch/ia64/mm/init.c2
-rw-r--r--arch/ia64/oprofile/Kconfig20
-rw-r--r--arch/m32r/Kconfig2
-rw-r--r--arch/m32r/kernel/traps.c2
-rw-r--r--arch/m32r/mm/fault.c2
-rw-r--r--arch/m32r/oprofile/Kconfig23
-rw-r--r--arch/m68k/Kconfig2
-rw-r--r--arch/m68k/kernel/traps.c4
-rw-r--r--arch/m68k/mm/fault.c2
-rw-r--r--arch/m68knommu/Kconfig2
-rw-r--r--arch/mips/Kconfig2
-rw-r--r--arch/mips/au1000/pb1200/irqmap.c2
-rw-r--r--arch/mips/basler/excite/excite_irq.c2
-rw-r--r--arch/mips/configs/ip27_defconfig1
-rw-r--r--arch/mips/configs/sb1250-swarm_defconfig1
-rw-r--r--arch/mips/kernel/irixelf.c4
-rw-r--r--arch/mips/kernel/irixsig.c2
-rw-r--r--arch/mips/kernel/sysirix.c4
-rw-r--r--arch/mips/kernel/traps.c2
-rw-r--r--arch/mips/mm/fault.c2
-rw-r--r--arch/mips/oprofile/Kconfig23
-rw-r--r--arch/mips/tx4938/common/setup.c2
-rw-r--r--arch/parisc/Kconfig2
-rw-r--r--arch/parisc/kernel/signal.c2
-rw-r--r--arch/parisc/kernel/traps.c10
-rw-r--r--arch/parisc/kernel/unaligned.c2
-rw-r--r--arch/parisc/mm/fault.c2
-rw-r--r--arch/parisc/oprofile/Kconfig23
-rw-r--r--arch/powerpc/Kconfig15
-rw-r--r--arch/powerpc/configs/cell_defconfig1
-rw-r--r--arch/powerpc/configs/ppc64_defconfig1
-rw-r--r--arch/powerpc/configs/pseries_defconfig1
-rw-r--r--arch/powerpc/kernel/machine_kexec.c54
-rw-r--r--arch/powerpc/kernel/process.c2
-rw-r--r--arch/powerpc/kernel/traps.c4
-rw-r--r--arch/powerpc/mm/fault.c2
-rw-r--r--arch/powerpc/oprofile/Kconfig24
-rw-r--r--arch/powerpc/platforms/maple/setup.c2
-rw-r--r--arch/powerpc/platforms/pseries/ras.c2
-rw-r--r--arch/ppc/Kconfig2
-rw-r--r--arch/ppc/kernel/traps.c2
-rw-r--r--arch/ppc/mm/fault.c2
-rw-r--r--arch/ppc/platforms/chestnut.c1
-rw-r--r--arch/s390/Kconfig16
-rw-r--r--arch/s390/kernel/process.c2
-rw-r--r--arch/s390/lib/uaccess_pt.c2
-rw-r--r--arch/s390/mm/fault.c2
-rw-r--r--arch/s390/oprofile/Kconfig22
-rw-r--r--arch/sh/Kconfig2
-rw-r--r--arch/sh/kernel/machine_kexec.c21
-rw-r--r--arch/sh/kernel/process.c2
-rw-r--r--arch/sh/kernel/setup.c38
-rw-r--r--arch/sh/kernel/signal.c4
-rw-r--r--arch/sh/kernel/traps.c7
-rw-r--r--arch/sh/mm/fault.c2
-rw-r--r--arch/sh/oprofile/Kconfig23
-rw-r--r--arch/sh64/Kconfig2
-rw-r--r--arch/sh64/kernel/traps.c4
-rw-r--r--arch/sh64/mm/fault.c10
-rw-r--r--arch/sh64/oprofile/Kconfig23
-rw-r--r--arch/sparc/Kconfig6
-rw-r--r--arch/sparc/kernel/ptrace.c4
-rw-r--r--arch/sparc/kernel/sys_sparc.c2
-rw-r--r--arch/sparc/kernel/sys_sunos.c2
-rw-r--r--arch/sparc/kernel/traps.c4
-rw-r--r--arch/sparc/oprofile/Kconfig17
-rw-r--r--arch/sparc64/Kconfig15
-rw-r--r--arch/sparc64/kernel/sys_sunos32.c2
-rw-r--r--arch/sparc64/kernel/traps.c2
-rw-r--r--arch/sparc64/oprofile/Kconfig17
-rw-r--r--arch/sparc64/solaris/misc.c4
-rw-r--r--arch/um/Kconfig2
-rw-r--r--arch/um/kernel/trap.c2
-rw-r--r--arch/um/sys-x86_64/sysrq.c4
-rw-r--r--arch/v850/Kconfig2
-rw-r--r--arch/x86/kernel/crash_dump_32.c1
-rw-r--r--arch/x86/kernel/e820_32.c3
-rw-r--r--arch/x86/kernel/e820_64.c3
-rw-r--r--arch/x86/kernel/machine_kexec_32.c22
-rw-r--r--arch/x86/kernel/machine_kexec_64.c27
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/setup_32.c49
-rw-r--r--arch/x86/kernel/setup_64.c39
-rw-r--r--arch/x86/kernel/signal_32.c4
-rw-r--r--arch/x86/kernel/traps_32.c4
-rw-r--r--arch/x86/lib/usercopy_32.c2
-rw-r--r--arch/x86/mm/fault_32.c6
-rw-r--r--arch/x86/mm/fault_64.c2
-rw-r--r--arch/x86/mm/pageattr_64.c9
-rw-r--r--arch/x86_64/Kconfig15
-rw-r--r--arch/xtensa/Kconfig2
-rw-r--r--arch/xtensa/kernel/traps.c6
-rw-r--r--arch/xtensa/mm/fault.c2
-rw-r--r--block/ll_rw_blk.c4
-rw-r--r--drivers/acpi/button.c6
-rw-r--r--drivers/block/nbd.c2
-rw-r--r--drivers/cdrom/cdrom.c2
-rw-r--r--drivers/char/drm/drm_bufs.c2
-rw-r--r--drivers/char/drm/drm_drv.c2
-rw-r--r--drivers/char/drm/drm_fops.c7
-rw-r--r--drivers/char/drm/drm_lock.c6
-rw-r--r--drivers/char/drm/drm_os_linux.h2
-rw-r--r--drivers/char/drm/i810_dma.c2
-rw-r--r--drivers/char/drm/i830_dma.c2
-rw-r--r--drivers/char/esp.c2
-rw-r--r--drivers/char/keyboard.c48
-rw-r--r--drivers/char/mxser.c2
-rw-r--r--drivers/char/mxser_new.c2
-rw-r--r--drivers/char/sonypi.c8
-rw-r--r--drivers/char/sx.c2
-rw-r--r--drivers/char/sysrq.c2
-rw-r--r--drivers/char/tty_io.c17
-rw-r--r--drivers/char/vt.c45
-rw-r--r--drivers/edac/edac_core.h2
-rw-r--r--drivers/edac/pasemi_edac.c1
-rw-r--r--drivers/firmware/dcdbas.h1
-rw-r--r--drivers/hid/hidraw.c4
-rw-r--r--drivers/hid/usbhid/usbkbd.c7
-rw-r--r--drivers/hid/usbhid/usbmouse.c12
-rw-r--r--drivers/hwmon/applesmc.c2
-rw-r--r--drivers/hwmon/hdaps.c2
-rw-r--r--drivers/i2c/busses/i2c-pxa.c54
-rw-r--r--drivers/ide/ide-io.c2
-rw-r--r--drivers/infiniband/core/cma.c5
-rw-r--r--drivers/input/evdev.c12
-rw-r--r--drivers/input/input.c10
-rw-r--r--drivers/input/joydev.c16
-rw-r--r--drivers/input/joystick/a3d.c28
-rw-r--r--drivers/input/joystick/adi.c2
-rw-r--r--drivers/input/joystick/amijoy.c7
-rw-r--r--drivers/input/joystick/analog.c2
-rw-r--r--drivers/input/joystick/cobra.c2
-rw-r--r--drivers/input/joystick/db9.c2
-rw-r--r--drivers/input/joystick/gamecon.c4
-rw-r--r--drivers/input/joystick/gf2k.c2
-rw-r--r--drivers/input/joystick/grip.c2
-rw-r--r--drivers/input/joystick/grip_mp.c2
-rw-r--r--drivers/input/joystick/guillemot.c2
-rw-r--r--drivers/input/joystick/iforce/iforce-main.c3
-rw-r--r--drivers/input/joystick/iforce/iforce.h4
-rw-r--r--drivers/input/joystick/interact.c2
-rw-r--r--drivers/input/joystick/magellan.c2
-rw-r--r--drivers/input/joystick/sidewinder.c2
-rw-r--r--drivers/input/joystick/spaceball.c17
-rw-r--r--drivers/input/joystick/spaceorb.c2
-rw-r--r--drivers/input/joystick/stinger.c9
-rw-r--r--drivers/input/joystick/tmdc.c2
-rw-r--r--drivers/input/joystick/turbografx.c2
-rw-r--r--drivers/input/joystick/twidjoy.c2
-rw-r--r--drivers/input/joystick/warrior.c8
-rw-r--r--drivers/input/joystick/xpad.c2
-rw-r--r--drivers/input/keyboard/aaed2000_kbd.c2
-rw-r--r--drivers/input/keyboard/amikbd.c2
-rw-r--r--drivers/input/keyboard/atakbd.c2
-rw-r--r--drivers/input/keyboard/atkbd.c21
-rw-r--r--drivers/input/keyboard/corgikbd.c3
-rw-r--r--drivers/input/keyboard/gpio_keys.c2
-rw-r--r--drivers/input/keyboard/hil_kbd.c5
-rw-r--r--drivers/input/keyboard/hilkbd.c5
-rw-r--r--drivers/input/keyboard/locomokbd.c2
-rw-r--r--drivers/input/keyboard/newtonkbd.c2
-rw-r--r--drivers/input/keyboard/pxa27x_keyboard.c5
-rw-r--r--drivers/input/keyboard/spitzkbd.c3
-rw-r--r--drivers/input/keyboard/stowaway.c2
-rw-r--r--drivers/input/keyboard/sunkbd.c8
-rw-r--r--drivers/input/keyboard/xtkbd.c2
-rw-r--r--drivers/input/misc/ati_remote.c8
-rw-r--r--drivers/input/misc/ati_remote2.c7
-rw-r--r--drivers/input/misc/atlas_btns.c2
-rw-r--r--drivers/input/misc/cobalt_btns.c2
-rw-r--r--drivers/input/misc/ixp4xx-beeper.c4
-rw-r--r--drivers/input/misc/keyspan_remote.c2
-rw-r--r--drivers/input/misc/m68kspkr.c4
-rw-r--r--drivers/input/misc/pcspkr.c4
-rw-r--r--drivers/input/misc/powermate.c9
-rw-r--r--drivers/input/misc/sparcspkr.c4
-rw-r--r--drivers/input/misc/yealink.c2
-rw-r--r--drivers/input/mouse/alps.c26
-rw-r--r--drivers/input/mouse/amimouse.c7
-rw-r--r--drivers/input/mouse/atarimouse.c7
-rw-r--r--drivers/input/mouse/hil_ptr.c6
-rw-r--r--drivers/input/mouse/inport.c7
-rw-r--r--drivers/input/mouse/lifebook.c11
-rw-r--r--drivers/input/mouse/logibm.c7
-rw-r--r--drivers/input/mouse/pc110pad.c6
-rw-r--r--drivers/input/mouse/psmouse-base.c7
-rw-r--r--drivers/input/mouse/rpcmouse.c7
-rw-r--r--drivers/input/mouse/sermouse.c7
-rw-r--r--drivers/input/mouse/touchkit_ps2.c2
-rw-r--r--drivers/input/mousedev.c26
-rw-r--r--drivers/input/tablet/acecad.c12
-rw-r--r--drivers/input/tablet/gtco.c6
-rw-r--r--drivers/input/tablet/kbtab.c11
-rw-r--r--drivers/input/tablet/wacom_sys.c58
-rw-r--r--drivers/input/touchscreen/ads7846.c4
-rw-r--r--drivers/input/touchscreen/corgi_ts.c4
-rw-r--r--drivers/input/touchscreen/elo.c4
-rw-r--r--drivers/input/touchscreen/fujitsu_ts.c4
-rw-r--r--drivers/input/touchscreen/gunze.c4
-rw-r--r--drivers/input/touchscreen/h3600_ts_input.c5
-rw-r--r--drivers/input/touchscreen/hp680_ts_input.c4
-rw-r--r--drivers/input/touchscreen/mk712.c4
-rw-r--r--drivers/input/touchscreen/mtouch.c4
-rw-r--r--drivers/input/touchscreen/penmount.c4
-rw-r--r--drivers/input/touchscreen/touchright.c4
-rw-r--r--drivers/input/touchscreen/touchwin.c4
-rw-r--r--drivers/input/touchscreen/ucb1400_ts.c2
-rw-r--r--drivers/input/touchscreen/usbtouchscreen.c4
-rw-r--r--drivers/isdn/hardware/avm/b1dma.c28
-rw-r--r--drivers/isdn/hardware/avm/c4.c2
-rw-r--r--drivers/isdn/hardware/avm/t1isa.c28
-rw-r--r--drivers/isdn/sc/debug.h19
-rw-r--r--drivers/isdn/sc/includes.h1
-rw-r--r--drivers/isdn/sc/init.c2
-rw-r--r--drivers/macintosh/adbhid.c19
-rw-r--r--drivers/macintosh/mac_hid.c7
-rw-r--r--drivers/md/md.c2
-rw-r--r--drivers/media/dvb/cinergyT2/cinergyT2.c2
-rw-r--r--drivers/media/dvb/dvb-usb/dvb-usb-remote.c2
-rw-r--r--drivers/media/dvb/ttpci/av7110_ir.c2
-rw-r--r--drivers/media/dvb/ttusb-dec/ttusb_dec.c2
-rw-r--r--drivers/media/video/usbvideo/konicawc.c4
-rw-r--r--drivers/media/video/usbvideo/quickcam_messenger.c4
-rw-r--r--drivers/media/video/zoran_driver.c4
-rw-r--r--drivers/misc/ibmasm/remote.c8
-rw-r--r--drivers/misc/phantom.c97
-rw-r--r--drivers/misc/sony-laptop.c6
-rw-r--r--drivers/mtd/ubi/wl.c2
-rw-r--r--drivers/net/bnx2.c2
-rw-r--r--drivers/net/bonding/bond_3ad.c32
-rw-r--r--drivers/net/bonding/bond_3ad.h12
-rw-r--r--drivers/net/cris/eth_v10.c2
-rw-r--r--drivers/net/cxgb3/adapter.h2
-rw-r--r--drivers/net/eth16i.c1
-rw-r--r--drivers/net/hamradio/dmascc.c2
-rw-r--r--drivers/net/mac89x0.c2
-rw-r--r--drivers/net/meth.h3
-rw-r--r--drivers/net/s2io-regs.h632
-rw-r--r--drivers/net/s2io.c16
-rw-r--r--drivers/net/s2io.h84
-rw-r--r--drivers/net/spider_net.c2
-rw-r--r--drivers/net/tulip/uli526x.c2
-rw-r--r--drivers/net/wireless/bcm43xx/bcm43xx_leds.c2
-rw-r--r--drivers/net/wireless/hostap/hostap_common.h3
-rw-r--r--drivers/net/wireless/hostap/hostap_ioctl.c2
-rw-r--r--drivers/pcmcia/m32r_pcc.c2
-rw-r--r--drivers/pcmcia/m8xx_pcmcia.c2
-rw-r--r--drivers/ps3/ps3av.c6
-rw-r--r--drivers/ps3/vuart.c2
-rw-r--r--drivers/rtc/rtc-pl031.c2
-rw-r--r--drivers/rtc/rtc-sa1100.c2
-rw-r--r--drivers/s390/cio/idset.c2
-rw-r--r--drivers/s390/net/claw.c2
-rw-r--r--drivers/scsi/FlashPoint.c83
-rw-r--r--drivers/scsi/Kconfig2
-rw-r--r--drivers/scsi/ide-scsi.c2
-rw-r--r--drivers/scsi/libsas/sas_discover.c8
-rw-r--r--drivers/scsi/nsp32.h5
-rw-r--r--drivers/scsi/pcmcia/nsp_cs.h1
-rw-r--r--drivers/scsi/qla4xxx/ql4_fw.h2
-rw-r--r--drivers/scsi/qla4xxx/ql4_iocb.c2
-rw-r--r--drivers/serial/amba-pl011.c26
-rw-r--r--drivers/serial/crisv10.c2
-rw-r--r--drivers/usb/core/devio.c2
-rw-r--r--drivers/usb/gadget/file_storage.c2
-rw-r--r--drivers/video/cyber2000fb.c236
-rw-r--r--drivers/video/pnx4008/sdum.h3
-rw-r--r--drivers/watchdog/at91rm9200_wdt.c2
-rw-r--r--drivers/watchdog/ks8695_wdt.c2
-rw-r--r--drivers/watchdog/omap_wdt.c2
-rw-r--r--drivers/watchdog/sa1100_wdt.c2
-rw-r--r--fs/Kconfig10
-rw-r--r--fs/autofs/inode.c2
-rw-r--r--fs/autofs/root.c6
-rw-r--r--fs/autofs4/autofs_i.h2
-rw-r--r--fs/autofs4/inode.c4
-rw-r--r--fs/autofs4/root.c4
-rw-r--r--fs/binfmt_elf.c16
-rw-r--r--fs/binfmt_elf_fdpic.c16
-rw-r--r--fs/cifs/CHANGES21
-rw-r--r--fs/cifs/Makefile2
-rw-r--r--fs/cifs/asn1.c10
-rw-r--r--fs/cifs/cifs_debug.c28
-rw-r--r--fs/cifs/cifsacl.c333
-rw-r--r--fs/cifs/cifsacl.h64
-rw-r--r--fs/cifs/cifsencrypt.c5
-rw-r--r--fs/cifs/cifsfs.c38
-rw-r--r--fs/cifs/cifsfs.h11
-rw-r--r--fs/cifs/cifsglob.h31
-rw-r--r--fs/cifs/cifspdu.h20
-rw-r--r--fs/cifs/cifsproto.h8
-rw-r--r--fs/cifs/cifssmb.c158
-rw-r--r--fs/cifs/connect.c264
-rw-r--r--fs/cifs/dir.c9
-rw-r--r--fs/cifs/export.c1
-rw-r--r--fs/cifs/file.c86
-rw-r--r--fs/cifs/inode.c52
-rw-r--r--fs/cifs/link.c2
-rw-r--r--fs/cifs/misc.c1
-rw-r--r--fs/cifs/netmisc.c33
-rw-r--r--fs/cifs/readdir.c8
-rw-r--r--fs/cifs/sess.c103
-rw-r--r--fs/cifs/smberr.h5
-rw-r--r--fs/cifs/transport.c34
-rw-r--r--fs/cifs/xattr.c19
-rw-r--r--fs/coda/upcall.c2
-rw-r--r--fs/dlm/user.c2
-rw-r--r--fs/eventpoll.c5
-rw-r--r--fs/exec.c16
-rw-r--r--fs/ext3/inode.c2
-rw-r--r--fs/ext3/xattr.c3
-rw-r--r--fs/fcntl.c5
-rw-r--r--fs/file_table.c5
-rw-r--r--fs/fs-writeback.c2
-rw-r--r--fs/ioprio.c9
-rw-r--r--fs/jbd/commit.c8
-rw-r--r--fs/jbd/journal.c67
-rw-r--r--fs/jbd/recovery.c6
-rw-r--r--fs/jbd/transaction.c3
-rw-r--r--fs/jffs2/debug.h8
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/nfsd/vfs.c6
-rw-r--r--fs/ocfs2/cluster/heartbeat.c2
-rw-r--r--fs/ocfs2/cluster/masklog.h2
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c10
-rw-r--r--fs/proc/array.c30
-rw-r--r--fs/proc/base.c202
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/proc_misc.c3
-rw-r--r--fs/proc/root.c83
-rw-r--r--fs/reiserfs/bitmap.c111
-rw-r--r--fs/reiserfs/inode.c8
-rw-r--r--fs/reiserfs/journal.c24
-rw-r--r--fs/reiserfs/prints.c10
-rw-r--r--fs/reiserfs/resize.c15
-rw-r--r--fs/reiserfs/stree.c6
-rw-r--r--fs/reiserfs/super.c15
-rw-r--r--fs/reiserfs/xattr.c5
-rw-r--r--fs/select.c5
-rw-r--r--fs/super.c32
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_export.h6
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c24
-rw-r--r--fs/xfs/xfs_dmops.c21
-rw-r--r--fs/xfs/xfs_fs.h29
-rw-r--r--fs/xfs/xfs_qmops.c22
-rw-r--r--fs/xfs/xfs_vfsops.c9
-rw-r--r--fs/xfs/xfs_vfsops.h4
-rw-r--r--fs/xfs/xfs_vnodeops.c13
-rw-r--r--fs/xfs/xfs_vnodeops.h2
-rw-r--r--include/asm-alpha/bitops.h4
-rw-r--r--include/asm-alpha/tlbflush.h11
-rw-r--r--include/asm-arm/arch-ixp4xx/io.h3
-rw-r--r--include/asm-arm/bitops.h4
-rw-r--r--include/asm-arm/tlbflush.h5
-rw-r--r--include/asm-avr32/bitops.h4
-rw-r--r--include/asm-avr32/tlbflush.h7
-rw-r--r--include/asm-blackfin/bitops.h4
-rw-r--r--include/asm-blackfin/tlbflush.h6
-rw-r--r--include/asm-cris/bitops.h4
-rw-r--r--include/asm-cris/posix_types.h2
-rw-r--r--include/asm-cris/tlbflush.h7
-rw-r--r--include/asm-frv/bitops.h4
-rw-r--r--include/asm-frv/tlbflush.h2
-rw-r--r--include/asm-generic/bitops.h4
-rw-r--r--include/asm-generic/bitops/atomic.h27
-rw-r--r--include/asm-generic/bitops/non-atomic.h29
-rw-r--r--include/asm-generic/vmlinux.lds.h7
-rw-r--r--include/asm-h8300/bitops.h5
-rw-r--r--include/asm-h8300/tlbflush.h6
-rw-r--r--include/asm-ia64/bitops.h4
-rw-r--r--include/asm-ia64/cacheflush.h2
-rw-r--r--include/asm-ia64/meminit.h2
-rw-r--r--include/asm-ia64/pgtable.h2
-rw-r--r--include/asm-ia64/smp.h2
-rw-r--r--include/asm-ia64/spinlock.h2
-rw-r--r--include/asm-ia64/tlbflush.h13
-rw-r--r--include/asm-m32r/bitops.h4
-rw-r--r--include/asm-m32r/pgtable.h2
-rw-r--r--include/asm-m32r/tlbflush.h3
-rw-r--r--include/asm-m68k/bitops.h4
-rw-r--r--include/asm-m68k/tlbflush.h10
-rw-r--r--include/asm-m68knommu/bitops.h4
-rw-r--r--include/asm-m68knommu/tlbflush.h6
-rw-r--r--include/asm-mips/bitops.h4
-rw-r--r--include/asm-mips/fpu.h2
-rw-r--r--include/asm-mips/ip32/crime.h3
-rw-r--r--include/asm-mips/ip32/mace.h3
-rw-r--r--include/asm-mips/tlbflush.h7
-rw-r--r--include/asm-parisc/bitops.h4
-rw-r--r--include/asm-parisc/pgtable.h2
-rw-r--r--include/asm-parisc/tlbflush.h4
-rw-r--r--include/asm-powerpc/bitops.h4
-rw-r--r--include/asm-powerpc/iommu.h2
-rw-r--r--include/asm-powerpc/mmu_context.h2
-rw-r--r--include/asm-powerpc/tlbflush.h11
-rw-r--r--include/asm-ppc/mmu_context.h3
-rw-r--r--include/asm-s390/bitops.h4
-rw-r--r--include/asm-s390/tlbflush.h7
-rw-r--r--include/asm-sh/bitops.h5
-rw-r--r--include/asm-sh/tlbflush.h6
-rw-r--r--include/asm-sh64/bitops.h5
-rw-r--r--include/asm-sh64/tlbflush.h4
-rw-r--r--include/asm-sparc/bitops.h4
-rw-r--r--include/asm-sparc/tlbflush.h6
-rw-r--r--include/asm-sparc64/bitops.h4
-rw-r--r--include/asm-sparc64/smp.h2
-rw-r--r--include/asm-sparc64/tlbflush.h7
-rw-r--r--include/asm-um/bitops.h4
-rw-r--r--include/asm-um/tlbflush.h6
-rw-r--r--include/asm-v850/bitops.h3
-rw-r--r--include/asm-v850/tlbflush.h6
-rw-r--r--include/asm-x86/bitops_32.h4
-rw-r--r--include/asm-x86/bitops_64.h4
-rw-r--r--include/asm-x86/pgtable_32.h5
-rw-r--r--include/asm-x86/pgtable_64.h2
-rw-r--r--include/asm-x86/smp_32.h2
-rw-r--r--include/asm-x86/tlbflush_32.h7
-rw-r--r--include/asm-x86/tlbflush_64.h9
-rw-r--r--include/asm-x86/topology_64.h2
-rw-r--r--include/asm-xtensa/bitops.h4
-rw-r--r--include/asm-xtensa/tlbflush.h11
-rw-r--r--include/linux/Kbuild1
-rw-r--r--include/linux/bitmap.h1
-rw-r--r--include/linux/bitops.h8
-rw-r--r--include/linux/cgroup.h327
-rw-r--r--include/linux/cgroup_subsys.h38
-rw-r--r--include/linux/cgroupstats.h70
-rw-r--r--include/linux/cpu_acct.h14
-rw-r--r--include/linux/cpuset.h17
-rw-r--r--include/linux/delayacct.h13
-rw-r--r--include/linux/fs.h6
-rw-r--r--include/linux/hid.h4
-rw-r--r--include/linux/init_task.h12
-rw-r--r--include/linux/input.h47
-rw-r--r--include/linux/ipc.h3
-rw-r--r--include/linux/jbd.h18
-rw-r--r--include/linux/kexec.h2
-rw-r--r--include/linux/keyboard.h11
-rw-r--r--include/linux/list.h3
-rw-r--r--include/linux/lockdep.h8
-rw-r--r--include/linux/magic.h1
-rw-r--r--include/linux/marker.h129
-rw-r--r--include/linux/mempolicy.h12
-rw-r--r--include/linux/module.h12
-rw-r--r--include/linux/msg.h1
-rw-r--r--include/linux/notifier.h17
-rw-r--r--include/linux/nsproxy.h50
-rw-r--r--include/linux/of.h2
-rw-r--r--include/linux/phantom.h6
-rw-r--r--include/linux/pid.h61
-rw-r--r--include/linux/pid_namespace.h26
-rw-r--r--include/linux/prio_heap.h58
-rw-r--r--include/linux/proc_fs.h19
-rw-r--r--include/linux/reiserfs_fs.h28
-rw-r--r--include/linux/reiserfs_fs_sb.h4
-rw-r--r--include/linux/sched.h171
-rw-r--r--include/linux/sem.h1
-rw-r--r--include/linux/shm.h1
-rw-r--r--include/linux/types.h3
-rw-r--r--include/linux/uinput.h4
-rw-r--r--include/linux/vt.h12
-rw-r--r--include/linux/workqueue.h49
-rw-r--r--include/net/9p/9p.h4
-rw-r--r--include/net/scm.h4
-rw-r--r--include/video/sstfb.h1
-rw-r--r--include/video/tdfx.h2
-rw-r--r--init/Kconfig51
-rw-r--r--init/main.c3
-rw-r--r--ipc/mqueue.c7
-rw-r--r--ipc/msg.c253
-rw-r--r--ipc/sem.c308
-rw-r--r--ipc/shm.c316
-rw-r--r--ipc/util.c506
-rw-r--r--ipc/util.h168
-rw-r--r--kernel/Kconfig.instrumentation49
-rw-r--r--kernel/Makefile9
-rw-r--r--kernel/capability.c15
-rw-r--r--kernel/cgroup.c2805
-rw-r--r--kernel/cgroup_debug.c97
-rw-r--r--kernel/cpu.c12
-rw-r--r--kernel/cpu_acct.c186
-rw-r--r--kernel/cpuset.c1601
-rw-r--r--kernel/die_notifier.c38
-rw-r--r--kernel/exit.c157
-rw-r--r--kernel/fork.c133
-rw-r--r--kernel/futex.c26
-rw-r--r--kernel/futex_compat.c3
-rw-r--r--kernel/kexec.c168
-rw-r--r--kernel/lockdep.c24
-rw-r--r--kernel/marker.c525
-rw-r--r--kernel/module.c30
-rw-r--r--kernel/notifier.c539
-rw-r--r--kernel/ns_cgroup.c100
-rw-r--r--kernel/nsproxy.c62
-rw-r--r--kernel/pid.c353
-rw-r--r--kernel/posix-cpu-timers.c12
-rw-r--r--kernel/posix-timers.c4
-rw-r--r--kernel/ptrace.c5
-rw-r--r--kernel/rtmutex-debug.c15
-rw-r--r--kernel/rtmutex.c2
-rw-r--r--kernel/sched.c269
-rw-r--r--kernel/signal.c66
-rw-r--r--kernel/softlockup.c2
-rw-r--r--kernel/sys.c586
-rw-r--r--kernel/sysctl.c4
-rw-r--r--kernel/taskstats.c67
-rw-r--r--kernel/time/clocksource.c22
-rw-r--r--kernel/timer.c7
-rw-r--r--kernel/workqueue.c38
-rw-r--r--lib/Kconfig.debug2
-rw-r--r--lib/Makefile2
-rw-r--r--lib/hweight.c2
-rw-r--r--lib/percpu_counter.c5
-rw-r--r--lib/prio_heap.c70
-rw-r--r--lib/spinlock_debug.c8
-rw-r--r--mm/filemap.c2
-rw-r--r--mm/memory.c3
-rw-r--r--mm/mempolicy.c5
-rw-r--r--mm/migrate.c3
-rw-r--r--mm/mmap.c11
-rw-r--r--mm/mprotect.c6
-rw-r--r--mm/oom_kill.c15
-rw-r--r--net/bluetooth/hidp/core.c12
-rw-r--r--net/core/filter.c3
-rw-r--r--net/core/gen_estimator.c2
-rw-r--r--net/core/pktgen.c4
-rw-r--r--net/core/rtnetlink.c8
-rw-r--r--net/core/scm.c4
-rw-r--r--net/core/sock.c2
-rw-r--r--net/ipv4/fib_trie.c2
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/inet_hashtables.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_sync.c6
-rw-r--r--net/ipv4/sysctl_net_ipv4.c4
-rw-r--r--net/ipv4/tcp.c2
-rw-r--r--net/ipv4/udp.c5
-rw-r--r--net/ipv6/inet6_hashtables.c2
-rw-r--r--net/llc/af_llc.c2
-rw-r--r--net/mac80211/ieee80211_i.h2
-rw-r--r--net/mac80211/ieee80211_sta.c55
-rw-r--r--net/netfilter/xt_connbytes.c2
-rw-r--r--net/packet/af_packet.c5
-rw-r--r--net/rfkill/rfkill-input.c12
-rw-r--r--net/sched/Kconfig10
-rw-r--r--net/sched/sch_generic.c26
-rw-r--r--net/sunrpc/sched.c2
-rw-r--r--net/unix/af_unix.c6
-rw-r--r--samples/Kconfig16
-rw-r--r--samples/Makefile3
-rw-r--r--samples/markers/Makefile4
-rw-r--r--samples/markers/marker-example.c54
-rw-r--r--samples/markers/probe-example.c98
-rwxr-xr-xscripts/checkstack.pl2
-rw-r--r--security/commoncap.c3
-rw-r--r--sound/ppc/beep.c4
-rw-r--r--sound/usb/caiaq/caiaq-input.c9
605 files changed, 12816 insertions, 5683 deletions
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index d3290c46af51..aa38cc5692a0 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -46,7 +46,7 @@
46 46
47 <sect1><title>Atomic and pointer manipulation</title> 47 <sect1><title>Atomic and pointer manipulation</title>
48!Iinclude/asm-x86/atomic_32.h 48!Iinclude/asm-x86/atomic_32.h
49!Iinclude/asm-x86/unaligned_32.h 49!Iinclude/asm-x86/unaligned.h
50 </sect1> 50 </sect1>
51 51
52 <sect1><title>Delaying, scheduling, and timer routines</title> 52 <sect1><title>Delaying, scheduling, and timer routines</title>
diff --git a/Documentation/accounting/cgroupstats.txt b/Documentation/accounting/cgroupstats.txt
new file mode 100644
index 000000000000..eda40fd39cad
--- /dev/null
+++ b/Documentation/accounting/cgroupstats.txt
@@ -0,0 +1,27 @@
1Control Groupstats is inspired by the discussion at
2http://lkml.org/lkml/2007/4/11/187 and implements per cgroup statistics as
3suggested by Andrew Morton in http://lkml.org/lkml/2007/4/11/263.
4
5Per cgroup statistics infrastructure re-uses code from the taskstats
6interface. A new set of cgroup operations are registered with commands
7and attributes specific to cgroups. It should be very easy to
8extend per cgroup statistics, by adding members to the cgroupstats
9structure.
10
11The current model for cgroupstats is a pull, a push model (to post
12statistics on interesting events), should be very easy to add. Currently
13user space requests for statistics by passing the cgroup path.
14Statistics about the state of all the tasks in the cgroup is returned to
15user space.
16
17NOTE: We currently rely on delay accounting for extracting information
18about tasks blocked on I/O. If CONFIG_TASK_DELAY_ACCT is disabled, this
19information will not be available.
20
21To extract cgroup statistics a utility very similar to getdelays.c
22has been developed, the sample output of the utility is shown below
23
24~/balbir/cgroupstats # ./getdelays -C "/cgroup/a"
25sleeping 1, blocked 0, running 1, stopped 0, uninterruptible 0
26~/balbir/cgroupstats # ./getdelays -C "/cgroup"
27sleeping 155, blocked 0, running 1, stopped 0, uninterruptible 2
diff --git a/Documentation/cachetlb.txt b/Documentation/cachetlb.txt
index 552cabac0608..da42ab414c48 100644
--- a/Documentation/cachetlb.txt
+++ b/Documentation/cachetlb.txt
@@ -87,30 +87,7 @@ changes occur:
87 87
88 This is used primarily during fault processing. 88 This is used primarily during fault processing.
89 89
905) void flush_tlb_pgtables(struct mm_struct *mm, 905) void update_mmu_cache(struct vm_area_struct *vma,
91 unsigned long start, unsigned long end)
92
93 The software page tables for address space 'mm' for virtual
94 addresses in the range 'start' to 'end-1' are being torn down.
95
96 Some platforms cache the lowest level of the software page tables
97 in a linear virtually mapped array, to make TLB miss processing
98 more efficient. On such platforms, since the TLB is caching the
99 software page table structure, it needs to be flushed when parts
100 of the software page table tree are unlinked/freed.
101
102 Sparc64 is one example of a platform which does this.
103
104 Usually, when munmap()'ing an area of user virtual address
105 space, the kernel leaves the page table parts around and just
106 marks the individual pte's as invalid. However, if very large
107 portions of the address space are unmapped, the kernel frees up
108 those portions of the software page tables to prevent potential
109 excessive kernel memory usage caused by erratic mmap/mmunmap
110 sequences. It is at these times that flush_tlb_pgtables will
111 be invoked.
112
1136) void update_mmu_cache(struct vm_area_struct *vma,
114 unsigned long address, pte_t pte) 91 unsigned long address, pte_t pte)
115 92
116 At the end of every page fault, this routine is invoked to 93 At the end of every page fault, this routine is invoked to
@@ -123,7 +100,7 @@ changes occur:
123 translations for software managed TLB configurations. 100 translations for software managed TLB configurations.
124 The sparc64 port currently does this. 101 The sparc64 port currently does this.
125 102
1267) void tlb_migrate_finish(struct mm_struct *mm) 1036) void tlb_migrate_finish(struct mm_struct *mm)
127 104
128 This interface is called at the end of an explicit 105 This interface is called at the end of an explicit
129 process migration. This interface provides a hook 106 process migration. This interface provides a hook
diff --git a/Documentation/cgroups.txt b/Documentation/cgroups.txt
new file mode 100644
index 000000000000..98a26f81fa75
--- /dev/null
+++ b/Documentation/cgroups.txt
@@ -0,0 +1,545 @@
1 CGROUPS
2 -------
3
4Written by Paul Menage <menage@google.com> based on Documentation/cpusets.txt
5
6Original copyright statements from cpusets.txt:
7Portions Copyright (C) 2004 BULL SA.
8Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
9Modified by Paul Jackson <pj@sgi.com>
10Modified by Christoph Lameter <clameter@sgi.com>
11
12CONTENTS:
13=========
14
151. Control Groups
16 1.1 What are cgroups ?
17 1.2 Why are cgroups needed ?
18 1.3 How are cgroups implemented ?
19 1.4 What does notify_on_release do ?
20 1.5 How do I use cgroups ?
212. Usage Examples and Syntax
22 2.1 Basic Usage
23 2.2 Attaching processes
243. Kernel API
25 3.1 Overview
26 3.2 Synchronization
27 3.3 Subsystem API
284. Questions
29
301. Control Groups
31==========
32
331.1 What are cgroups ?
34----------------------
35
36Control Groups provide a mechanism for aggregating/partitioning sets of
37tasks, and all their future children, into hierarchical groups with
38specialized behaviour.
39
40Definitions:
41
42A *cgroup* associates a set of tasks with a set of parameters for one
43or more subsystems.
44
45A *subsystem* is a module that makes use of the task grouping
46facilities provided by cgroups to treat groups of tasks in
47particular ways. A subsystem is typically a "resource controller" that
48schedules a resource or applies per-cgroup limits, but it may be
49anything that wants to act on a group of processes, e.g. a
50virtualization subsystem.
51
52A *hierarchy* is a set of cgroups arranged in a tree, such that
53every task in the system is in exactly one of the cgroups in the
54hierarchy, and a set of subsystems; each subsystem has system-specific
55state attached to each cgroup in the hierarchy. Each hierarchy has
56an instance of the cgroup virtual filesystem associated with it.
57
58At any one time there may be multiple active hierachies of task
59cgroups. Each hierarchy is a partition of all tasks in the system.
60
61User level code may create and destroy cgroups by name in an
62instance of the cgroup virtual file system, specify and query to
63which cgroup a task is assigned, and list the task pids assigned to
64a cgroup. Those creations and assignments only affect the hierarchy
65associated with that instance of the cgroup file system.
66
67On their own, the only use for cgroups is for simple job
68tracking. The intention is that other subsystems hook into the generic
69cgroup support to provide new attributes for cgroups, such as
70accounting/limiting the resources which processes in a cgroup can
71access. For example, cpusets (see Documentation/cpusets.txt) allows
72you to associate a set of CPUs and a set of memory nodes with the
73tasks in each cgroup.
74
751.2 Why are cgroups needed ?
76----------------------------
77
78There are multiple efforts to provide process aggregations in the
79Linux kernel, mainly for resource tracking purposes. Such efforts
80include cpusets, CKRM/ResGroups, UserBeanCounters, and virtual server
81namespaces. These all require the basic notion of a
82grouping/partitioning of processes, with newly forked processes ending
83in the same group (cgroup) as their parent process.
84
85The kernel cgroup patch provides the minimum essential kernel
86mechanisms required to efficiently implement such groups. It has
87minimal impact on the system fast paths, and provides hooks for
88specific subsystems such as cpusets to provide additional behaviour as
89desired.
90
91Multiple hierarchy support is provided to allow for situations where
92the division of tasks into cgroups is distinctly different for
93different subsystems - having parallel hierarchies allows each
94hierarchy to be a natural division of tasks, without having to handle
95complex combinations of tasks that would be present if several
96unrelated subsystems needed to be forced into the same tree of
97cgroups.
98
99At one extreme, each resource controller or subsystem could be in a
100separate hierarchy; at the other extreme, all subsystems
101would be attached to the same hierarchy.
102
103As an example of a scenario (originally proposed by vatsa@in.ibm.com)
104that can benefit from multiple hierarchies, consider a large
105university server with various users - students, professors, system
106tasks etc. The resource planning for this server could be along the
107following lines:
108
109 CPU : Top cpuset
110 / \
111 CPUSet1 CPUSet2
112 | |
113 (Profs) (Students)
114
115 In addition (system tasks) are attached to topcpuset (so
116 that they can run anywhere) with a limit of 20%
117
118 Memory : Professors (50%), students (30%), system (20%)
119
120 Disk : Prof (50%), students (30%), system (20%)
121
122 Network : WWW browsing (20%), Network File System (60%), others (20%)
123 / \
124 Prof (15%) students (5%)
125
126Browsers like firefox/lynx go into the WWW network class, while (k)nfsd go
127into NFS network class.
128
129At the same time firefox/lynx will share an appropriate CPU/Memory class
130depending on who launched it (prof/student).
131
132With the ability to classify tasks differently for different resources
133(by putting those resource subsystems in different hierarchies) then
134the admin can easily set up a script which receives exec notifications
135and depending on who is launching the browser he can
136
137 # echo browser_pid > /mnt/<restype>/<userclass>/tasks
138
139With only a single hierarchy, he now would potentially have to create
140a separate cgroup for every browser launched and associate it with
141approp network and other resource class. This may lead to
142proliferation of such cgroups.
143
144Also lets say that the administrator would like to give enhanced network
145access temporarily to a student's browser (since it is night and the user
146wants to do online gaming :) OR give one of the students simulation
147apps enhanced CPU power,
148
149With ability to write pids directly to resource classes, its just a
150matter of :
151
152 # echo pid > /mnt/network/<new_class>/tasks
153 (after some time)
154 # echo pid > /mnt/network/<orig_class>/tasks
155
156Without this ability, he would have to split the cgroup into
157multiple separate ones and then associate the new cgroups with the
158new resource classes.
159
160
161
1621.3 How are cgroups implemented ?
163---------------------------------
164
165Control Groups extends the kernel as follows:
166
167 - Each task in the system has a reference-counted pointer to a
168 css_set.
169
170 - A css_set contains a set of reference-counted pointers to
171 cgroup_subsys_state objects, one for each cgroup subsystem
172 registered in the system. There is no direct link from a task to
173 the cgroup of which it's a member in each hierarchy, but this
174 can be determined by following pointers through the
175 cgroup_subsys_state objects. This is because accessing the
176 subsystem state is something that's expected to happen frequently
177 and in performance-critical code, whereas operations that require a
178 task's actual cgroup assignments (in particular, moving between
179 cgroups) are less common. A linked list runs through the cg_list
180 field of each task_struct using the css_set, anchored at
181 css_set->tasks.
182
183 - A cgroup hierarchy filesystem can be mounted for browsing and
184 manipulation from user space.
185
186 - You can list all the tasks (by pid) attached to any cgroup.
187
188The implementation of cgroups requires a few, simple hooks
189into the rest of the kernel, none in performance critical paths:
190
191 - in init/main.c, to initialize the root cgroups and initial
192 css_set at system boot.
193
194 - in fork and exit, to attach and detach a task from its css_set.
195
196In addition a new file system, of type "cgroup" may be mounted, to
197enable browsing and modifying the cgroups presently known to the
198kernel. When mounting a cgroup hierarchy, you may specify a
199comma-separated list of subsystems to mount as the filesystem mount
200options. By default, mounting the cgroup filesystem attempts to
201mount a hierarchy containing all registered subsystems.
202
203If an active hierarchy with exactly the same set of subsystems already
204exists, it will be reused for the new mount. If no existing hierarchy
205matches, and any of the requested subsystems are in use in an existing
206hierarchy, the mount will fail with -EBUSY. Otherwise, a new hierarchy
207is activated, associated with the requested subsystems.
208
209It's not currently possible to bind a new subsystem to an active
210cgroup hierarchy, or to unbind a subsystem from an active cgroup
211hierarchy. This may be possible in future, but is fraught with nasty
212error-recovery issues.
213
214When a cgroup filesystem is unmounted, if there are any
215child cgroups created below the top-level cgroup, that hierarchy
216will remain active even though unmounted; if there are no
217child cgroups then the hierarchy will be deactivated.
218
219No new system calls are added for cgroups - all support for
220querying and modifying cgroups is via this cgroup file system.
221
222Each task under /proc has an added file named 'cgroup' displaying,
223for each active hierarchy, the subsystem names and the cgroup name
224as the path relative to the root of the cgroup file system.
225
226Each cgroup is represented by a directory in the cgroup file system
227containing the following files describing that cgroup:
228
229 - tasks: list of tasks (by pid) attached to that cgroup
230 - notify_on_release flag: run /sbin/cgroup_release_agent on exit?
231
232Other subsystems such as cpusets may add additional files in each
233cgroup dir
234
235New cgroups are created using the mkdir system call or shell
236command. The properties of a cgroup, such as its flags, are
237modified by writing to the appropriate file in that cgroups
238directory, as listed above.
239
240The named hierarchical structure of nested cgroups allows partitioning
241a large system into nested, dynamically changeable, "soft-partitions".
242
243The attachment of each task, automatically inherited at fork by any
244children of that task, to a cgroup allows organizing the work load
245on a system into related sets of tasks. A task may be re-attached to
246any other cgroup, if allowed by the permissions on the necessary
247cgroup file system directories.
248
249When a task is moved from one cgroup to another, it gets a new
250css_set pointer - if there's an already existing css_set with the
251desired collection of cgroups then that group is reused, else a new
252css_set is allocated. Note that the current implementation uses a
253linear search to locate an appropriate existing css_set, so isn't
254very efficient. A future version will use a hash table for better
255performance.
256
257To allow access from a cgroup to the css_sets (and hence tasks)
258that comprise it, a set of cg_cgroup_link objects form a lattice;
259each cg_cgroup_link is linked into a list of cg_cgroup_links for
260a single cgroup on its cont_link_list field, and a list of
261cg_cgroup_links for a single css_set on its cg_link_list.
262
263Thus the set of tasks in a cgroup can be listed by iterating over
264each css_set that references the cgroup, and sub-iterating over
265each css_set's task set.
266
267The use of a Linux virtual file system (vfs) to represent the
268cgroup hierarchy provides for a familiar permission and name space
269for cgroups, with a minimum of additional kernel code.
270
2711.4 What does notify_on_release do ?
272------------------------------------
273
274*** notify_on_release is disabled in the current patch set. It will be
275*** reactivated in a future patch in a less-intrusive manner
276
277If the notify_on_release flag is enabled (1) in a cgroup, then
278whenever the last task in the cgroup leaves (exits or attaches to
279some other cgroup) and the last child cgroup of that cgroup
280is removed, then the kernel runs the command specified by the contents
281of the "release_agent" file in that hierarchy's root directory,
282supplying the pathname (relative to the mount point of the cgroup
283file system) of the abandoned cgroup. This enables automatic
284removal of abandoned cgroups. The default value of
285notify_on_release in the root cgroup at system boot is disabled
286(0). The default value of other cgroups at creation is the current
287value of their parents notify_on_release setting. The default value of
288a cgroup hierarchy's release_agent path is empty.
289
2901.5 How do I use cgroups ?
291--------------------------
292
293To start a new job that is to be contained within a cgroup, using
294the "cpuset" cgroup subsystem, the steps are something like:
295
296 1) mkdir /dev/cgroup
297 2) mount -t cgroup -ocpuset cpuset /dev/cgroup
298 3) Create the new cgroup by doing mkdir's and write's (or echo's) in
299 the /dev/cgroup virtual file system.
300 4) Start a task that will be the "founding father" of the new job.
301 5) Attach that task to the new cgroup by writing its pid to the
302 /dev/cgroup tasks file for that cgroup.
303 6) fork, exec or clone the job tasks from this founding father task.
304
305For example, the following sequence of commands will setup a cgroup
306named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
307and then start a subshell 'sh' in that cgroup:
308
309 mount -t cgroup cpuset -ocpuset /dev/cgroup
310 cd /dev/cgroup
311 mkdir Charlie
312 cd Charlie
313 /bin/echo 2-3 > cpus
314 /bin/echo 1 > mems
315 /bin/echo $$ > tasks
316 sh
317 # The subshell 'sh' is now running in cgroup Charlie
318 # The next line should display '/Charlie'
319 cat /proc/self/cgroup
320
3212. Usage Examples and Syntax
322============================
323
3242.1 Basic Usage
325---------------
326
327Creating, modifying, using the cgroups can be done through the cgroup
328virtual filesystem.
329
330To mount a cgroup hierarchy will all available subsystems, type:
331# mount -t cgroup xxx /dev/cgroup
332
333The "xxx" is not interpreted by the cgroup code, but will appear in
334/proc/mounts so may be any useful identifying string that you like.
335
336To mount a cgroup hierarchy with just the cpuset and numtasks
337subsystems, type:
338# mount -t cgroup -o cpuset,numtasks hier1 /dev/cgroup
339
340To change the set of subsystems bound to a mounted hierarchy, just
341remount with different options:
342
343# mount -o remount,cpuset,ns /dev/cgroup
344
345Note that changing the set of subsystems is currently only supported
346when the hierarchy consists of a single (root) cgroup. Supporting
347the ability to arbitrarily bind/unbind subsystems from an existing
348cgroup hierarchy is intended to be implemented in the future.
349
350Then under /dev/cgroup you can find a tree that corresponds to the
351tree of the cgroups in the system. For instance, /dev/cgroup
352is the cgroup that holds the whole system.
353
354If you want to create a new cgroup under /dev/cgroup:
355# cd /dev/cgroup
356# mkdir my_cgroup
357
358Now you want to do something with this cgroup.
359# cd my_cgroup
360
361In this directory you can find several files:
362# ls
363notify_on_release release_agent tasks
364(plus whatever files are added by the attached subsystems)
365
366Now attach your shell to this cgroup:
367# /bin/echo $$ > tasks
368
369You can also create cgroups inside your cgroup by using mkdir in this
370directory.
371# mkdir my_sub_cs
372
373To remove a cgroup, just use rmdir:
374# rmdir my_sub_cs
375
376This will fail if the cgroup is in use (has cgroups inside, or
377has processes attached, or is held alive by other subsystem-specific
378reference).
379
3802.2 Attaching processes
381-----------------------
382
383# /bin/echo PID > tasks
384
385Note that it is PID, not PIDs. You can only attach ONE task at a time.
386If you have several tasks to attach, you have to do it one after another:
387
388# /bin/echo PID1 > tasks
389# /bin/echo PID2 > tasks
390 ...
391# /bin/echo PIDn > tasks
392
3933. Kernel API
394=============
395
3963.1 Overview
397------------
398
399Each kernel subsystem that wants to hook into the generic cgroup
400system needs to create a cgroup_subsys object. This contains
401various methods, which are callbacks from the cgroup system, along
402with a subsystem id which will be assigned by the cgroup system.
403
404Other fields in the cgroup_subsys object include:
405
406- subsys_id: a unique array index for the subsystem, indicating which
407 entry in cgroup->subsys[] this subsystem should be
408 managing. Initialized by cgroup_register_subsys(); prior to this
409 it should be initialized to -1
410
411- hierarchy: an index indicating which hierarchy, if any, this
412 subsystem is currently attached to. If this is -1, then the
413 subsystem is not attached to any hierarchy, and all tasks should be
414 considered to be members of the subsystem's top_cgroup. It should
415 be initialized to -1.
416
417- name: should be initialized to a unique subsystem name prior to
418 calling cgroup_register_subsystem. Should be no longer than
419 MAX_CGROUP_TYPE_NAMELEN
420
421Each cgroup object created by the system has an array of pointers,
422indexed by subsystem id; this pointer is entirely managed by the
423subsystem; the generic cgroup code will never touch this pointer.
424
4253.2 Synchronization
426-------------------
427
428There is a global mutex, cgroup_mutex, used by the cgroup
429system. This should be taken by anything that wants to modify a
430cgroup. It may also be taken to prevent cgroups from being
431modified, but more specific locks may be more appropriate in that
432situation.
433
434See kernel/cgroup.c for more details.
435
436Subsystems can take/release the cgroup_mutex via the functions
437cgroup_lock()/cgroup_unlock(), and can
438take/release the callback_mutex via the functions
439cgroup_lock()/cgroup_unlock().
440
441Accessing a task's cgroup pointer may be done in the following ways:
442- while holding cgroup_mutex
443- while holding the task's alloc_lock (via task_lock())
444- inside an rcu_read_lock() section via rcu_dereference()
445
4463.3 Subsystem API
447--------------------------
448
449Each subsystem should:
450
451- add an entry in linux/cgroup_subsys.h
452- define a cgroup_subsys object called <name>_subsys
453
454Each subsystem may export the following methods. The only mandatory
455methods are create/destroy. Any others that are null are presumed to
456be successful no-ops.
457
458struct cgroup_subsys_state *create(struct cgroup *cont)
459LL=cgroup_mutex
460
461Called to create a subsystem state object for a cgroup. The
462subsystem should allocate its subsystem state object for the passed
463cgroup, returning a pointer to the new object on success or a
464negative error code. On success, the subsystem pointer should point to
465a structure of type cgroup_subsys_state (typically embedded in a
466larger subsystem-specific object), which will be initialized by the
467cgroup system. Note that this will be called at initialization to
468create the root subsystem state for this subsystem; this case can be
469identified by the passed cgroup object having a NULL parent (since
470it's the root of the hierarchy) and may be an appropriate place for
471initialization code.
472
473void destroy(struct cgroup *cont)
474LL=cgroup_mutex
475
476The cgroup system is about to destroy the passed cgroup; the
477subsystem should do any necessary cleanup
478
479int can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
480 struct task_struct *task)
481LL=cgroup_mutex
482
483Called prior to moving a task into a cgroup; if the subsystem
484returns an error, this will abort the attach operation. If a NULL
485task is passed, then a successful result indicates that *any*
486unspecified task can be moved into the cgroup. Note that this isn't
487called on a fork. If this method returns 0 (success) then this should
488remain valid while the caller holds cgroup_mutex.
489
490void attach(struct cgroup_subsys *ss, struct cgroup *cont,
491 struct cgroup *old_cont, struct task_struct *task)
492LL=cgroup_mutex
493
494
495Called after the task has been attached to the cgroup, to allow any
496post-attachment activity that requires memory allocations or blocking.
497
498void fork(struct cgroup_subsy *ss, struct task_struct *task)
499LL=callback_mutex, maybe read_lock(tasklist_lock)
500
501Called when a task is forked into a cgroup. Also called during
502registration for all existing tasks.
503
504void exit(struct cgroup_subsys *ss, struct task_struct *task)
505LL=callback_mutex
506
507Called during task exit
508
509int populate(struct cgroup_subsys *ss, struct cgroup *cont)
510LL=none
511
512Called after creation of a cgroup to allow a subsystem to populate
513the cgroup directory with file entries. The subsystem should make
514calls to cgroup_add_file() with objects of type cftype (see
515include/linux/cgroup.h for details). Note that although this
516method can return an error code, the error code is currently not
517always handled well.
518
519void post_clone(struct cgroup_subsys *ss, struct cgroup *cont)
520
521Called at the end of cgroup_clone() to do any paramater
522initialization which might be required before a task could attach. For
523example in cpusets, no task may attach before 'cpus' and 'mems' are set
524up.
525
526void bind(struct cgroup_subsys *ss, struct cgroup *root)
527LL=callback_mutex
528
529Called when a cgroup subsystem is rebound to a different hierarchy
530and root cgroup. Currently this will only involve movement between
531the default hierarchy (which never has sub-cgroups) and a hierarchy
532that is being created/destroyed (and hence has no sub-cgroups).
533
5344. Questions
535============
536
537Q: what's up with this '/bin/echo' ?
538A: bash's builtin 'echo' command does not check calls to write() against
539 errors. If you use it in the cgroup file system, you won't be
540 able to tell whether a command succeeded or failed.
541
542Q: When I attach processes, only the first of the line gets really attached !
543A: We can only return one error code per call to write(). So you should also
544 put only ONE pid.
545
diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt
index b6d24c22274b..a741f658a3c9 100644
--- a/Documentation/cpu-hotplug.txt
+++ b/Documentation/cpu-hotplug.txt
@@ -220,7 +220,9 @@ A: The following happen, listed in no particular order :-)
220 CPU_DOWN_PREPARE or CPU_DOWN_PREPARE_FROZEN, depending on whether or not the 220 CPU_DOWN_PREPARE or CPU_DOWN_PREPARE_FROZEN, depending on whether or not the
221 CPU is being offlined while tasks are frozen due to a suspend operation in 221 CPU is being offlined while tasks are frozen due to a suspend operation in
222 progress 222 progress
223- All process is migrated away from this outgoing CPU to a new CPU 223- All processes are migrated away from this outgoing CPU to new CPUs.
224 The new CPU is chosen from each process' current cpuset, which may be
225 a subset of all online CPUs.
224- All interrupts targeted to this CPU is migrated to a new CPU 226- All interrupts targeted to this CPU is migrated to a new CPU
225- timers/bottom half/task lets are also migrated to a new CPU 227- timers/bottom half/task lets are also migrated to a new CPU
226- Once all services are migrated, kernel calls an arch specific routine 228- Once all services are migrated, kernel calls an arch specific routine
diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt
index ec9de6917f01..141bef1c8599 100644
--- a/Documentation/cpusets.txt
+++ b/Documentation/cpusets.txt
@@ -7,6 +7,7 @@ Written by Simon.Derr@bull.net
7Portions Copyright (c) 2004-2006 Silicon Graphics, Inc. 7Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
8Modified by Paul Jackson <pj@sgi.com> 8Modified by Paul Jackson <pj@sgi.com>
9Modified by Christoph Lameter <clameter@sgi.com> 9Modified by Christoph Lameter <clameter@sgi.com>
10Modified by Paul Menage <menage@google.com>
10 11
11CONTENTS: 12CONTENTS:
12========= 13=========
@@ -16,9 +17,9 @@ CONTENTS:
16 1.2 Why are cpusets needed ? 17 1.2 Why are cpusets needed ?
17 1.3 How are cpusets implemented ? 18 1.3 How are cpusets implemented ?
18 1.4 What are exclusive cpusets ? 19 1.4 What are exclusive cpusets ?
19 1.5 What does notify_on_release do ? 20 1.5 What is memory_pressure ?
20 1.6 What is memory_pressure ? 21 1.6 What is memory spread ?
21 1.7 What is memory spread ? 22 1.7 What is sched_load_balance ?
22 1.8 How do I use cpusets ? 23 1.8 How do I use cpusets ?
232. Usage Examples and Syntax 242. Usage Examples and Syntax
24 2.1 Basic Usage 25 2.1 Basic Usage
@@ -44,18 +45,19 @@ hierarchy visible in a virtual file system. These are the essential
44hooks, beyond what is already present, required to manage dynamic 45hooks, beyond what is already present, required to manage dynamic
45job placement on large systems. 46job placement on large systems.
46 47
47Each task has a pointer to a cpuset. Multiple tasks may reference 48Cpusets use the generic cgroup subsystem described in
48the same cpuset. Requests by a task, using the sched_setaffinity(2) 49Documentation/cgroup.txt.
49system call to include CPUs in its CPU affinity mask, and using the 50
50mbind(2) and set_mempolicy(2) system calls to include Memory Nodes 51Requests by a task, using the sched_setaffinity(2) system call to
51in its memory policy, are both filtered through that tasks cpuset, 52include CPUs in its CPU affinity mask, and using the mbind(2) and
52filtering out any CPUs or Memory Nodes not in that cpuset. The 53set_mempolicy(2) system calls to include Memory Nodes in its memory
53scheduler will not schedule a task on a CPU that is not allowed in 54policy, are both filtered through that tasks cpuset, filtering out any
54its cpus_allowed vector, and the kernel page allocator will not 55CPUs or Memory Nodes not in that cpuset. The scheduler will not
55allocate a page on a node that is not allowed in the requesting tasks 56schedule a task on a CPU that is not allowed in its cpus_allowed
56mems_allowed vector. 57vector, and the kernel page allocator will not allocate a page on a
57 58node that is not allowed in the requesting tasks mems_allowed vector.
58User level code may create and destroy cpusets by name in the cpuset 59
60User level code may create and destroy cpusets by name in the cgroup
59virtual file system, manage the attributes and permissions of these 61virtual file system, manage the attributes and permissions of these
60cpusets and which CPUs and Memory Nodes are assigned to each cpuset, 62cpusets and which CPUs and Memory Nodes are assigned to each cpuset,
61specify and query to which cpuset a task is assigned, and list the 63specify and query to which cpuset a task is assigned, and list the
@@ -115,7 +117,7 @@ Cpusets extends these two mechanisms as follows:
115 - Cpusets are sets of allowed CPUs and Memory Nodes, known to the 117 - Cpusets are sets of allowed CPUs and Memory Nodes, known to the
116 kernel. 118 kernel.
117 - Each task in the system is attached to a cpuset, via a pointer 119 - Each task in the system is attached to a cpuset, via a pointer
118 in the task structure to a reference counted cpuset structure. 120 in the task structure to a reference counted cgroup structure.
119 - Calls to sched_setaffinity are filtered to just those CPUs 121 - Calls to sched_setaffinity are filtered to just those CPUs
120 allowed in that tasks cpuset. 122 allowed in that tasks cpuset.
121 - Calls to mbind and set_mempolicy are filtered to just 123 - Calls to mbind and set_mempolicy are filtered to just
@@ -145,15 +147,10 @@ into the rest of the kernel, none in performance critical paths:
145 - in page_alloc.c, to restrict memory to allowed nodes. 147 - in page_alloc.c, to restrict memory to allowed nodes.
146 - in vmscan.c, to restrict page recovery to the current cpuset. 148 - in vmscan.c, to restrict page recovery to the current cpuset.
147 149
148In addition a new file system, of type "cpuset" may be mounted, 150You should mount the "cgroup" filesystem type in order to enable
149typically at /dev/cpuset, to enable browsing and modifying the cpusets 151browsing and modifying the cpusets presently known to the kernel. No
150presently known to the kernel. No new system calls are added for 152new system calls are added for cpusets - all support for querying and
151cpusets - all support for querying and modifying cpusets is via 153modifying cpusets is via this cpuset file system.
152this cpuset file system.
153
154Each task under /proc has an added file named 'cpuset', displaying
155the cpuset name, as the path relative to the root of the cpuset file
156system.
157 154
158The /proc/<pid>/status file for each task has two added lines, 155The /proc/<pid>/status file for each task has two added lines,
159displaying the tasks cpus_allowed (on which CPUs it may be scheduled) 156displaying the tasks cpus_allowed (on which CPUs it may be scheduled)
@@ -163,16 +160,15 @@ in the format seen in the following example:
163 Cpus_allowed: ffffffff,ffffffff,ffffffff,ffffffff 160 Cpus_allowed: ffffffff,ffffffff,ffffffff,ffffffff
164 Mems_allowed: ffffffff,ffffffff 161 Mems_allowed: ffffffff,ffffffff
165 162
166Each cpuset is represented by a directory in the cpuset file system 163Each cpuset is represented by a directory in the cgroup file system
167containing the following files describing that cpuset: 164containing (on top of the standard cgroup files) the following
165files describing that cpuset:
168 166
169 - cpus: list of CPUs in that cpuset 167 - cpus: list of CPUs in that cpuset
170 - mems: list of Memory Nodes in that cpuset 168 - mems: list of Memory Nodes in that cpuset
171 - memory_migrate flag: if set, move pages to cpusets nodes 169 - memory_migrate flag: if set, move pages to cpusets nodes
172 - cpu_exclusive flag: is cpu placement exclusive? 170 - cpu_exclusive flag: is cpu placement exclusive?
173 - mem_exclusive flag: is memory placement exclusive? 171 - mem_exclusive flag: is memory placement exclusive?
174 - tasks: list of tasks (by pid) attached to that cpuset
175 - notify_on_release flag: run /sbin/cpuset_release_agent on exit?
176 - memory_pressure: measure of how much paging pressure in cpuset 172 - memory_pressure: measure of how much paging pressure in cpuset
177 173
178In addition, the root cpuset only has the following file: 174In addition, the root cpuset only has the following file:
@@ -237,21 +233,7 @@ such as requests from interrupt handlers, is allowed to be taken
237outside even a mem_exclusive cpuset. 233outside even a mem_exclusive cpuset.
238 234
239 235
2401.5 What does notify_on_release do ? 2361.5 What is memory_pressure ?
241------------------------------------
242
243If the notify_on_release flag is enabled (1) in a cpuset, then whenever
244the last task in the cpuset leaves (exits or attaches to some other
245cpuset) and the last child cpuset of that cpuset is removed, then
246the kernel runs the command /sbin/cpuset_release_agent, supplying the
247pathname (relative to the mount point of the cpuset file system) of the
248abandoned cpuset. This enables automatic removal of abandoned cpusets.
249The default value of notify_on_release in the root cpuset at system
250boot is disabled (0). The default value of other cpusets at creation
251is the current value of their parents notify_on_release setting.
252
253
2541.6 What is memory_pressure ?
255----------------------------- 237-----------------------------
256The memory_pressure of a cpuset provides a simple per-cpuset metric 238The memory_pressure of a cpuset provides a simple per-cpuset metric
257of the rate that the tasks in a cpuset are attempting to free up in 239of the rate that the tasks in a cpuset are attempting to free up in
@@ -308,7 +290,7 @@ the tasks in the cpuset, in units of reclaims attempted per second,
308times 1000. 290times 1000.
309 291
310 292
3111.7 What is memory spread ? 2931.6 What is memory spread ?
312--------------------------- 294---------------------------
313There are two boolean flag files per cpuset that control where the 295There are two boolean flag files per cpuset that control where the
314kernel allocates pages for the file system buffers and related in 296kernel allocates pages for the file system buffers and related in
@@ -378,6 +360,142 @@ policy, especially for jobs that might have one thread reading in the
378data set, the memory allocation across the nodes in the jobs cpuset 360data set, the memory allocation across the nodes in the jobs cpuset
379can become very uneven. 361can become very uneven.
380 362
3631.7 What is sched_load_balance ?
364--------------------------------
365
366The kernel scheduler (kernel/sched.c) automatically load balances
367tasks. If one CPU is underutilized, kernel code running on that
368CPU will look for tasks on other more overloaded CPUs and move those
369tasks to itself, within the constraints of such placement mechanisms
370as cpusets and sched_setaffinity.
371
372The algorithmic cost of load balancing and its impact on key shared
373kernel data structures such as the task list increases more than
374linearly with the number of CPUs being balanced. So the scheduler
375has support to partition the systems CPUs into a number of sched
376domains such that it only load balances within each sched domain.
377Each sched domain covers some subset of the CPUs in the system;
378no two sched domains overlap; some CPUs might not be in any sched
379domain and hence won't be load balanced.
380
381Put simply, it costs less to balance between two smaller sched domains
382than one big one, but doing so means that overloads in one of the
383two domains won't be load balanced to the other one.
384
385By default, there is one sched domain covering all CPUs, except those
386marked isolated using the kernel boot time "isolcpus=" argument.
387
388This default load balancing across all CPUs is not well suited for
389the following two situations:
390 1) On large systems, load balancing across many CPUs is expensive.
391 If the system is managed using cpusets to place independent jobs
392 on separate sets of CPUs, full load balancing is unnecessary.
393 2) Systems supporting realtime on some CPUs need to minimize
394 system overhead on those CPUs, including avoiding task load
395 balancing if that is not needed.
396
397When the per-cpuset flag "sched_load_balance" is enabled (the default
398setting), it requests that all the CPUs in that cpusets allowed 'cpus'
399be contained in a single sched domain, ensuring that load balancing
400can move a task (not otherwised pinned, as by sched_setaffinity)
401from any CPU in that cpuset to any other.
402
403When the per-cpuset flag "sched_load_balance" is disabled, then the
404scheduler will avoid load balancing across the CPUs in that cpuset,
405--except-- in so far as is necessary because some overlapping cpuset
406has "sched_load_balance" enabled.
407
408So, for example, if the top cpuset has the flag "sched_load_balance"
409enabled, then the scheduler will have one sched domain covering all
410CPUs, and the setting of the "sched_load_balance" flag in any other
411cpusets won't matter, as we're already fully load balancing.
412
413Therefore in the above two situations, the top cpuset flag
414"sched_load_balance" should be disabled, and only some of the smaller,
415child cpusets have this flag enabled.
416
417When doing this, you don't usually want to leave any unpinned tasks in
418the top cpuset that might use non-trivial amounts of CPU, as such tasks
419may be artificially constrained to some subset of CPUs, depending on
420the particulars of this flag setting in descendent cpusets. Even if
421such a task could use spare CPU cycles in some other CPUs, the kernel
422scheduler might not consider the possibility of load balancing that
423task to that underused CPU.
424
425Of course, tasks pinned to a particular CPU can be left in a cpuset
426that disables "sched_load_balance" as those tasks aren't going anywhere
427else anyway.
428
429There is an impedance mismatch here, between cpusets and sched domains.
430Cpusets are hierarchical and nest. Sched domains are flat; they don't
431overlap and each CPU is in at most one sched domain.
432
433It is necessary for sched domains to be flat because load balancing
434across partially overlapping sets of CPUs would risk unstable dynamics
435that would be beyond our understanding. So if each of two partially
436overlapping cpusets enables the flag 'sched_load_balance', then we
437form a single sched domain that is a superset of both. We won't move
438a task to a CPU outside it cpuset, but the scheduler load balancing
439code might waste some compute cycles considering that possibility.
440
441This mismatch is why there is not a simple one-to-one relation
442between which cpusets have the flag "sched_load_balance" enabled,
443and the sched domain configuration. If a cpuset enables the flag, it
444will get balancing across all its CPUs, but if it disables the flag,
445it will only be assured of no load balancing if no other overlapping
446cpuset enables the flag.
447
448If two cpusets have partially overlapping 'cpus' allowed, and only
449one of them has this flag enabled, then the other may find its
450tasks only partially load balanced, just on the overlapping CPUs.
451This is just the general case of the top_cpuset example given a few
452paragraphs above. In the general case, as in the top cpuset case,
453don't leave tasks that might use non-trivial amounts of CPU in
454such partially load balanced cpusets, as they may be artificially
455constrained to some subset of the CPUs allowed to them, for lack of
456load balancing to the other CPUs.
457
4581.7.1 sched_load_balance implementation details.
459------------------------------------------------
460
461The per-cpuset flag 'sched_load_balance' defaults to enabled (contrary
462to most cpuset flags.) When enabled for a cpuset, the kernel will
463ensure that it can load balance across all the CPUs in that cpuset
464(makes sure that all the CPUs in the cpus_allowed of that cpuset are
465in the same sched domain.)
466
467If two overlapping cpusets both have 'sched_load_balance' enabled,
468then they will be (must be) both in the same sched domain.
469
470If, as is the default, the top cpuset has 'sched_load_balance' enabled,
471then by the above that means there is a single sched domain covering
472the whole system, regardless of any other cpuset settings.
473
474The kernel commits to user space that it will avoid load balancing
475where it can. It will pick as fine a granularity partition of sched
476domains as it can while still providing load balancing for any set
477of CPUs allowed to a cpuset having 'sched_load_balance' enabled.
478
479The internal kernel cpuset to scheduler interface passes from the
480cpuset code to the scheduler code a partition of the load balanced
481CPUs in the system. This partition is a set of subsets (represented
482as an array of cpumask_t) of CPUs, pairwise disjoint, that cover all
483the CPUs that must be load balanced.
484
485Whenever the 'sched_load_balance' flag changes, or CPUs come or go
486from a cpuset with this flag enabled, or a cpuset with this flag
487enabled is removed, the cpuset code builds a new such partition and
488passes it to the scheduler sched domain setup code, to have the sched
489domains rebuilt as necessary.
490
491This partition exactly defines what sched domains the scheduler should
492setup - one sched domain for each element (cpumask_t) in the partition.
493
494The scheduler remembers the currently active sched domain partitions.
495When the scheduler routine partition_sched_domains() is invoked from
496the cpuset code to update these sched domains, it compares the new
497partition requested with the current, and updates its sched domains,
498removing the old and adding the new, for each change.
381 499
3821.8 How do I use cpusets ? 5001.8 How do I use cpusets ?
383-------------------------- 501--------------------------
@@ -469,7 +587,7 @@ than stress the kernel.
469To start a new job that is to be contained within a cpuset, the steps are: 587To start a new job that is to be contained within a cpuset, the steps are:
470 588
471 1) mkdir /dev/cpuset 589 1) mkdir /dev/cpuset
472 2) mount -t cpuset none /dev/cpuset 590 2) mount -t cgroup -ocpuset cpuset /dev/cpuset
473 3) Create the new cpuset by doing mkdir's and write's (or echo's) in 591 3) Create the new cpuset by doing mkdir's and write's (or echo's) in
474 the /dev/cpuset virtual file system. 592 the /dev/cpuset virtual file system.
475 4) Start a task that will be the "founding father" of the new job. 593 4) Start a task that will be the "founding father" of the new job.
@@ -481,7 +599,7 @@ For example, the following sequence of commands will setup a cpuset
481named "Charlie", containing just CPUs 2 and 3, and Memory Node 1, 599named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
482and then start a subshell 'sh' in that cpuset: 600and then start a subshell 'sh' in that cpuset:
483 601
484 mount -t cpuset none /dev/cpuset 602 mount -t cgroup -ocpuset cpuset /dev/cpuset
485 cd /dev/cpuset 603 cd /dev/cpuset
486 mkdir Charlie 604 mkdir Charlie
487 cd Charlie 605 cd Charlie
@@ -513,7 +631,7 @@ Creating, modifying, using the cpusets can be done through the cpuset
513virtual filesystem. 631virtual filesystem.
514 632
515To mount it, type: 633To mount it, type:
516# mount -t cpuset none /dev/cpuset 634# mount -t cgroup -o cpuset cpuset /dev/cpuset
517 635
518Then under /dev/cpuset you can find a tree that corresponds to the 636Then under /dev/cpuset you can find a tree that corresponds to the
519tree of the cpusets in the system. For instance, /dev/cpuset 637tree of the cpusets in the system. For instance, /dev/cpuset
@@ -556,6 +674,18 @@ To remove a cpuset, just use rmdir:
556This will fail if the cpuset is in use (has cpusets inside, or has 674This will fail if the cpuset is in use (has cpusets inside, or has
557processes attached). 675processes attached).
558 676
677Note that for legacy reasons, the "cpuset" filesystem exists as a
678wrapper around the cgroup filesystem.
679
680The command
681
682mount -t cpuset X /dev/cpuset
683
684is equivalent to
685
686mount -t cgroup -ocpuset X /dev/cpuset
687echo "/sbin/cpuset_release_agent" > /dev/cpuset/release_agent
688
5592.2 Adding/removing cpus 6892.2 Adding/removing cpus
560------------------------ 690------------------------
561 691
diff --git a/Documentation/input/input-programming.txt b/Documentation/input/input-programming.txt
index d9d523099bb7..4d932dc66098 100644
--- a/Documentation/input/input-programming.txt
+++ b/Documentation/input/input-programming.txt
@@ -42,8 +42,8 @@ static int __init button_init(void)
42 goto err_free_irq; 42 goto err_free_irq;
43 } 43 }
44 44
45 button_dev->evbit[0] = BIT(EV_KEY); 45 button_dev->evbit[0] = BIT_MASK(EV_KEY);
46 button_dev->keybit[LONG(BTN_0)] = BIT(BTN_0); 46 button_dev->keybit[BIT_WORD(BTN_0)] = BIT_MASK(BTN_0);
47 47
48 error = input_register_device(button_dev); 48 error = input_register_device(button_dev);
49 if (error) { 49 if (error) {
@@ -217,14 +217,15 @@ If you don't need absfuzz and absflat, you can set them to zero, which mean
217that the thing is precise and always returns to exactly the center position 217that the thing is precise and always returns to exactly the center position
218(if it has any). 218(if it has any).
219 219
2201.4 NBITS(), LONG(), BIT() 2201.4 BITS_TO_LONGS(), BIT_WORD(), BIT_MASK()
221~~~~~~~~~~~~~~~~~~~~~~~~~~ 221~~~~~~~~~~~~~~~~~~~~~~~~~~
222 222
223These three macros from input.h help some bitfield computations: 223These three macros from bitops.h help some bitfield computations:
224 224
225 NBITS(x) - returns the length of a bitfield array in longs for x bits 225 BITS_TO_LONGS(x) - returns the length of a bitfield array in longs for
226 LONG(x) - returns the index in the array in longs for bit x 226 x bits
227 BIT(x) - returns the index in a long for bit x 227 BIT_WORD(x) - returns the index in the array in longs for bit x
228 BIT_MASK(x) - returns the index in a long for bit x
228 229
2291.5 The id* and name fields 2301.5 The id* and name fields
230~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 231~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
index 1b37b28cc234..d0ac72cc19ff 100644
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt
@@ -231,6 +231,32 @@ Dump-capture kernel config options (Arch Dependent, ia64)
231 any space below the alignment point will be wasted. 231 any space below the alignment point will be wasted.
232 232
233 233
234Extended crashkernel syntax
235===========================
236
237While the "crashkernel=size[@offset]" syntax is sufficient for most
238configurations, sometimes it's handy to have the reserved memory dependent
239on the value of System RAM -- that's mostly for distributors that pre-setup
240the kernel command line to avoid a unbootable system after some memory has
241been removed from the machine.
242
243The syntax is:
244
245 crashkernel=<range1>:<size1>[,<range2>:<size2>,...][@offset]
246 range=start-[end]
247
248For example:
249
250 crashkernel=512M-2G:64M,2G-:128M
251
252This would mean:
253
254 1) if the RAM is smaller than 512M, then don't reserve anything
255 (this is the "rescue" case)
256 2) if the RAM size is between 512M and 2G, then reserve 64M
257 3) if the RAM size is larger than 2G, then reserve 128M
258
259
234Boot into System Kernel 260Boot into System Kernel
235======================= 261=======================
236 262
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 189df0bcab99..0a3fed445249 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -497,6 +497,13 @@ and is between 256 and 4096 characters. It is defined in the file
497 [KNL] Reserve a chunk of physical memory to 497 [KNL] Reserve a chunk of physical memory to
498 hold a kernel to switch to with kexec on panic. 498 hold a kernel to switch to with kexec on panic.
499 499
500 crashkernel=range1:size1[,range2:size2,...][@offset]
501 [KNL] Same as above, but depends on the memory
502 in the running system. The syntax of range is
503 start-[end] where start and end are both
504 a memory unit (amount[KMG]). See also
505 Documentation/kdump/kdump.txt for a example.
506
500 cs4232= [HW,OSS] 507 cs4232= [HW,OSS]
501 Format: <io>,<irq>,<dma>,<dma2>,<mpuio>,<mpuirq> 508 Format: <io>,<irq>,<dma>,<dma2>,<mpuio>,<mpuirq>
502 509
diff --git a/Documentation/markers.txt b/Documentation/markers.txt
new file mode 100644
index 000000000000..295a71bc301e
--- /dev/null
+++ b/Documentation/markers.txt
@@ -0,0 +1,81 @@
1 Using the Linux Kernel Markers
2
3 Mathieu Desnoyers
4
5
6This document introduces Linux Kernel Markers and their use. It provides
7examples of how to insert markers in the kernel and connect probe functions to
8them and provides some examples of probe functions.
9
10
11* Purpose of markers
12
13A marker placed in code provides a hook to call a function (probe) that you can
14provide at runtime. A marker can be "on" (a probe is connected to it) or "off"
15(no probe is attached). When a marker is "off" it has no effect, except for
16adding a tiny time penalty (checking a condition for a branch) and space
17penalty (adding a few bytes for the function call at the end of the
18instrumented function and adds a data structure in a separate section). When a
19marker is "on", the function you provide is called each time the marker is
20executed, in the execution context of the caller. When the function provided
21ends its execution, it returns to the caller (continuing from the marker site).
22
23You can put markers at important locations in the code. Markers are
24lightweight hooks that can pass an arbitrary number of parameters,
25described in a printk-like format string, to the attached probe function.
26
27They can be used for tracing and performance accounting.
28
29
30* Usage
31
32In order to use the macro trace_mark, you should include linux/marker.h.
33
34#include <linux/marker.h>
35
36And,
37
38trace_mark(subsystem_event, "%d %s", someint, somestring);
39Where :
40- subsystem_event is an identifier unique to your event
41 - subsystem is the name of your subsystem.
42 - event is the name of the event to mark.
43- "%d %s" is the formatted string for the serializer.
44- someint is an integer.
45- somestring is a char pointer.
46
47Connecting a function (probe) to a marker is done by providing a probe (function
48to call) for the specific marker through marker_probe_register() and can be
49activated by calling marker_arm(). Marker deactivation can be done by calling
50marker_disarm() as many times as marker_arm() has been called. Removing a probe
51is done through marker_probe_unregister(); it will disarm the probe and make
52sure there is no caller left using the probe when it returns. Probe removal is
53preempt-safe because preemption is disabled around the probe call. See the
54"Probe example" section below for a sample probe module.
55
56The marker mechanism supports inserting multiple instances of the same marker.
57Markers can be put in inline functions, inlined static functions, and
58unrolled loops as well as regular functions.
59
60The naming scheme "subsystem_event" is suggested here as a convention intended
61to limit collisions. Marker names are global to the kernel: they are considered
62as being the same whether they are in the core kernel image or in modules.
63Conflicting format strings for markers with the same name will cause the markers
64to be detected to have a different format string not to be armed and will output
65a printk warning which identifies the inconsistency:
66
67"Format mismatch for probe probe_name (format), marker (format)"
68
69
70* Probe / marker example
71
72See the example provided in samples/markers/src
73
74Compile them with your kernel.
75
76Run, as root :
77modprobe marker-example (insmod order is not important)
78modprobe probe-example
79cat /proc/marker-example (returns an expected error)
80rmmod marker-example probe-example
81dmesg
diff --git a/Makefile b/Makefile
index 529b9048d97e..68ef80b30340 100644
--- a/Makefile
+++ b/Makefile
@@ -774,6 +774,9 @@ vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) vmlinux.o
774ifdef CONFIG_HEADERS_CHECK 774ifdef CONFIG_HEADERS_CHECK
775 $(Q)$(MAKE) -f $(srctree)/Makefile headers_check 775 $(Q)$(MAKE) -f $(srctree)/Makefile headers_check
776endif 776endif
777ifdef CONFIG_SAMPLES
778 $(Q)$(MAKE) $(build)=samples
779endif
777 $(call vmlinux-modpost) 780 $(call vmlinux-modpost)
778 $(call if_changed_rule,vmlinux__) 781 $(call if_changed_rule,vmlinux__)
779 $(Q)rm -f .old_version 782 $(Q)rm -f .old_version
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 2a85dc33907c..4c002ba37e50 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -654,7 +654,7 @@ source "drivers/Kconfig"
654 654
655source "fs/Kconfig" 655source "fs/Kconfig"
656 656
657source "arch/alpha/oprofile/Kconfig" 657source "kernel/Kconfig.instrumentation"
658 658
659source "arch/alpha/Kconfig.debug" 659source "arch/alpha/Kconfig.debug"
660 660
diff --git a/arch/alpha/kernel/semaphore.c b/arch/alpha/kernel/semaphore.c
index 8c8aaa205eae..8d2982aa1b8d 100644
--- a/arch/alpha/kernel/semaphore.c
+++ b/arch/alpha/kernel/semaphore.c
@@ -69,7 +69,7 @@ __down_failed(struct semaphore *sem)
69 69
70#ifdef CONFIG_DEBUG_SEMAPHORE 70#ifdef CONFIG_DEBUG_SEMAPHORE
71 printk("%s(%d): down failed(%p)\n", 71 printk("%s(%d): down failed(%p)\n",
72 tsk->comm, tsk->pid, sem); 72 tsk->comm, task_pid_nr(tsk), sem);
73#endif 73#endif
74 74
75 tsk->state = TASK_UNINTERRUPTIBLE; 75 tsk->state = TASK_UNINTERRUPTIBLE;
@@ -98,7 +98,7 @@ __down_failed(struct semaphore *sem)
98 98
99#ifdef CONFIG_DEBUG_SEMAPHORE 99#ifdef CONFIG_DEBUG_SEMAPHORE
100 printk("%s(%d): down acquired(%p)\n", 100 printk("%s(%d): down acquired(%p)\n",
101 tsk->comm, tsk->pid, sem); 101 tsk->comm, task_pid_nr(tsk), sem);
102#endif 102#endif
103} 103}
104 104
@@ -111,7 +111,7 @@ __down_failed_interruptible(struct semaphore *sem)
111 111
112#ifdef CONFIG_DEBUG_SEMAPHORE 112#ifdef CONFIG_DEBUG_SEMAPHORE
113 printk("%s(%d): down failed(%p)\n", 113 printk("%s(%d): down failed(%p)\n",
114 tsk->comm, tsk->pid, sem); 114 tsk->comm, task_pid_nr(tsk), sem);
115#endif 115#endif
116 116
117 tsk->state = TASK_INTERRUPTIBLE; 117 tsk->state = TASK_INTERRUPTIBLE;
@@ -139,7 +139,7 @@ __down_failed_interruptible(struct semaphore *sem)
139 139
140#ifdef CONFIG_DEBUG_SEMAPHORE 140#ifdef CONFIG_DEBUG_SEMAPHORE
141 printk("%s(%d): down %s(%p)\n", 141 printk("%s(%d): down %s(%p)\n",
142 current->comm, current->pid, 142 current->comm, task_pid_nr(current),
143 (ret < 0 ? "interrupted" : "acquired"), sem); 143 (ret < 0 ? "interrupted" : "acquired"), sem);
144#endif 144#endif
145 return ret; 145 return ret;
@@ -168,7 +168,7 @@ down(struct semaphore *sem)
168#endif 168#endif
169#ifdef CONFIG_DEBUG_SEMAPHORE 169#ifdef CONFIG_DEBUG_SEMAPHORE
170 printk("%s(%d): down(%p) <count=%d> from %p\n", 170 printk("%s(%d): down(%p) <count=%d> from %p\n",
171 current->comm, current->pid, sem, 171 current->comm, task_pid_nr(current), sem,
172 atomic_read(&sem->count), __builtin_return_address(0)); 172 atomic_read(&sem->count), __builtin_return_address(0));
173#endif 173#endif
174 __down(sem); 174 __down(sem);
@@ -182,7 +182,7 @@ down_interruptible(struct semaphore *sem)
182#endif 182#endif
183#ifdef CONFIG_DEBUG_SEMAPHORE 183#ifdef CONFIG_DEBUG_SEMAPHORE
184 printk("%s(%d): down(%p) <count=%d> from %p\n", 184 printk("%s(%d): down(%p) <count=%d> from %p\n",
185 current->comm, current->pid, sem, 185 current->comm, task_pid_nr(current), sem,
186 atomic_read(&sem->count), __builtin_return_address(0)); 186 atomic_read(&sem->count), __builtin_return_address(0));
187#endif 187#endif
188 return __down_interruptible(sem); 188 return __down_interruptible(sem);
@@ -201,7 +201,7 @@ down_trylock(struct semaphore *sem)
201 201
202#ifdef CONFIG_DEBUG_SEMAPHORE 202#ifdef CONFIG_DEBUG_SEMAPHORE
203 printk("%s(%d): down_trylock %s from %p\n", 203 printk("%s(%d): down_trylock %s from %p\n",
204 current->comm, current->pid, 204 current->comm, task_pid_nr(current),
205 ret ? "failed" : "acquired", 205 ret ? "failed" : "acquired",
206 __builtin_return_address(0)); 206 __builtin_return_address(0));
207#endif 207#endif
@@ -217,7 +217,7 @@ up(struct semaphore *sem)
217#endif 217#endif
218#ifdef CONFIG_DEBUG_SEMAPHORE 218#ifdef CONFIG_DEBUG_SEMAPHORE
219 printk("%s(%d): up(%p) <count=%d> from %p\n", 219 printk("%s(%d): up(%p) <count=%d> from %p\n",
220 current->comm, current->pid, sem, 220 current->comm, task_pid_nr(current), sem,
221 atomic_read(&sem->count), __builtin_return_address(0)); 221 atomic_read(&sem->count), __builtin_return_address(0));
222#endif 222#endif
223 __up(sem); 223 __up(sem);
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index ec0f05e0d8ff..2dc7f9fed213 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -182,7 +182,7 @@ die_if_kernel(char * str, struct pt_regs *regs, long err, unsigned long *r9_15)
182#ifdef CONFIG_SMP 182#ifdef CONFIG_SMP
183 printk("CPU %d ", hard_smp_processor_id()); 183 printk("CPU %d ", hard_smp_processor_id());
184#endif 184#endif
185 printk("%s(%d): %s %ld\n", current->comm, current->pid, str, err); 185 printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err);
186 dik_show_regs(regs, r9_15); 186 dik_show_regs(regs, r9_15);
187 add_taint(TAINT_DIE); 187 add_taint(TAINT_DIE);
188 dik_show_trace((unsigned long *)(regs+1)); 188 dik_show_trace((unsigned long *)(regs+1));
@@ -646,7 +646,7 @@ got_exception:
646 lock_kernel(); 646 lock_kernel();
647 647
648 printk("%s(%d): unhandled unaligned exception\n", 648 printk("%s(%d): unhandled unaligned exception\n",
649 current->comm, current->pid); 649 current->comm, task_pid_nr(current));
650 650
651 printk("pc = [<%016lx>] ra = [<%016lx>] ps = %04lx\n", 651 printk("pc = [<%016lx>] ra = [<%016lx>] ps = %04lx\n",
652 pc, una_reg(26), regs->ps); 652 pc, una_reg(26), regs->ps);
@@ -786,7 +786,7 @@ do_entUnaUser(void __user * va, unsigned long opcode,
786 } 786 }
787 if (++cnt < 5) { 787 if (++cnt < 5) {
788 printk("%s(%d): unaligned trap at %016lx: %p %lx %ld\n", 788 printk("%s(%d): unaligned trap at %016lx: %p %lx %ld\n",
789 current->comm, current->pid, 789 current->comm, task_pid_nr(current),
790 regs->pc - 4, va, opcode, reg); 790 regs->pc - 4, va, opcode, reg);
791 } 791 }
792 last_time = jiffies; 792 last_time = jiffies;
diff --git a/arch/alpha/lib/fls.c b/arch/alpha/lib/fls.c
index 7ad84ea0acf8..32afaa3fa686 100644
--- a/arch/alpha/lib/fls.c
+++ b/arch/alpha/lib/fls.c
@@ -3,7 +3,7 @@
3 */ 3 */
4 4
5#include <linux/module.h> 5#include <linux/module.h>
6#include <asm/bitops.h> 6#include <linux/bitops.h>
7 7
8/* This is fls(x)-1, except zero is held to zero. This allows most 8/* This is fls(x)-1, except zero is held to zero. This allows most
9 efficent input into extbl, plus it allows easy handling of fls(0)=0. */ 9 efficent input into extbl, plus it allows easy handling of fls(0)=0. */
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index 25154df3055a..4829f96585b1 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -188,13 +188,13 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
188 /* We ran out of memory, or some other thing happened to us that 188 /* We ran out of memory, or some other thing happened to us that
189 made us unable to handle the page fault gracefully. */ 189 made us unable to handle the page fault gracefully. */
190 out_of_memory: 190 out_of_memory:
191 if (is_init(current)) { 191 if (is_global_init(current)) {
192 yield(); 192 yield();
193 down_read(&mm->mmap_sem); 193 down_read(&mm->mmap_sem);
194 goto survive; 194 goto survive;
195 } 195 }
196 printk(KERN_ALERT "VM: killing process %s(%d)\n", 196 printk(KERN_ALERT "VM: killing process %s(%d)\n",
197 current->comm, current->pid); 197 current->comm, task_pid_nr(current));
198 if (!user_mode(regs)) 198 if (!user_mode(regs))
199 goto no_context; 199 goto no_context;
200 do_group_exit(SIGKILL); 200 do_group_exit(SIGKILL);
diff --git a/arch/alpha/oprofile/Kconfig b/arch/alpha/oprofile/Kconfig
deleted file mode 100644
index 5ade19801b97..000000000000
--- a/arch/alpha/oprofile/Kconfig
+++ /dev/null
@@ -1,23 +0,0 @@
1
2menu "Profiling support"
3 depends on EXPERIMENTAL
4
5config PROFILING
6 bool "Profiling support (EXPERIMENTAL)"
7 help
8 Say Y here to enable the extended profiling support mechanisms used
9 by profilers such as OProfile.
10
11
12config OPROFILE
13 tristate "OProfile system profiling (EXPERIMENTAL)"
14 depends on PROFILING
15 help
16 OProfile is a profiling system capable of profiling the
17 whole system, include the kernel, kernel modules, libraries,
18 and applications.
19
20 If unsure, say N.
21
22endmenu
23
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 0a0c88d0039c..4cee938df01e 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1068,7 +1068,7 @@ endmenu
1068 1068
1069source "fs/Kconfig" 1069source "fs/Kconfig"
1070 1070
1071source "arch/arm/oprofile/Kconfig" 1071source "kernel/Kconfig.instrumentation"
1072 1072
1073source "arch/arm/Kconfig.debug" 1073source "arch/arm/Kconfig.debug"
1074 1074
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 93b7f8e22dcc..4f1a03124a74 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -265,7 +265,7 @@ void __show_regs(struct pt_regs *regs)
265void show_regs(struct pt_regs * regs) 265void show_regs(struct pt_regs * regs)
266{ 266{
267 printk("\n"); 267 printk("\n");
268 printk("Pid: %d, comm: %20s\n", current->pid, current->comm); 268 printk("Pid: %d, comm: %20s\n", task_pid_nr(current), current->comm);
269 __show_regs(regs); 269 __show_regs(regs);
270 __backtrace(); 270 __backtrace();
271} 271}
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 5feee722ea98..4b05dc5c1023 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -382,16 +382,16 @@ static void clear_breakpoint(struct task_struct *task, struct debug_entry *bp)
382 382
383 if (ret != 2 || old_insn.thumb != BREAKINST_THUMB) 383 if (ret != 2 || old_insn.thumb != BREAKINST_THUMB)
384 printk(KERN_ERR "%s:%d: corrupted Thumb breakpoint at " 384 printk(KERN_ERR "%s:%d: corrupted Thumb breakpoint at "
385 "0x%08lx (0x%04x)\n", task->comm, task->pid, 385 "0x%08lx (0x%04x)\n", task->comm,
386 addr, old_insn.thumb); 386 task_pid_nr(task), addr, old_insn.thumb);
387 } else { 387 } else {
388 ret = swap_insn(task, addr & ~3, &old_insn.arm, 388 ret = swap_insn(task, addr & ~3, &old_insn.arm,
389 &bp->insn.arm, 4); 389 &bp->insn.arm, 4);
390 390
391 if (ret != 4 || old_insn.arm != BREAKINST_ARM) 391 if (ret != 4 || old_insn.arm != BREAKINST_ARM)
392 printk(KERN_ERR "%s:%d: corrupted ARM breakpoint at " 392 printk(KERN_ERR "%s:%d: corrupted ARM breakpoint at "
393 "0x%08lx (0x%08x)\n", task->comm, task->pid, 393 "0x%08lx (0x%08x)\n", task->comm,
394 addr, old_insn.arm); 394 task_pid_nr(task), addr, old_insn.arm);
395 } 395 }
396} 396}
397 397
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 8ad47619c079..4764bd9ccee8 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -223,7 +223,7 @@ static void __die(const char *str, int err, struct thread_info *thread, struct p
223 print_modules(); 223 print_modules();
224 __show_regs(regs); 224 __show_regs(regs);
225 printk("Process %s (pid: %d, stack limit = 0x%p)\n", 225 printk("Process %s (pid: %d, stack limit = 0x%p)\n",
226 tsk->comm, tsk->pid, thread + 1); 226 tsk->comm, task_pid_nr(tsk), thread + 1);
227 227
228 if (!user_mode(regs) || in_interrupt()) { 228 if (!user_mode(regs) || in_interrupt()) {
229 dump_mem("Stack: ", regs->ARM_sp, 229 dump_mem("Stack: ", regs->ARM_sp,
@@ -337,7 +337,7 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
337#ifdef CONFIG_DEBUG_USER 337#ifdef CONFIG_DEBUG_USER
338 if (user_debug & UDBG_UNDEFINED) { 338 if (user_debug & UDBG_UNDEFINED) {
339 printk(KERN_INFO "%s (%d): undefined instruction: pc=%p\n", 339 printk(KERN_INFO "%s (%d): undefined instruction: pc=%p\n",
340 current->comm, current->pid, pc); 340 current->comm, task_pid_nr(current), pc);
341 dump_instr(regs); 341 dump_instr(regs);
342 } 342 }
343#endif 343#endif
@@ -388,7 +388,7 @@ static int bad_syscall(int n, struct pt_regs *regs)
388#ifdef CONFIG_DEBUG_USER 388#ifdef CONFIG_DEBUG_USER
389 if (user_debug & UDBG_SYSCALL) { 389 if (user_debug & UDBG_SYSCALL) {
390 printk(KERN_ERR "[%d] %s: obsolete system call %08x.\n", 390 printk(KERN_ERR "[%d] %s: obsolete system call %08x.\n",
391 current->pid, current->comm, n); 391 task_pid_nr(current), current->comm, n);
392 dump_instr(regs); 392 dump_instr(regs);
393 } 393 }
394#endif 394#endif
@@ -565,7 +565,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
565 */ 565 */
566 if (user_debug & UDBG_SYSCALL) { 566 if (user_debug & UDBG_SYSCALL) {
567 printk("[%d] %s: arm syscall %d\n", 567 printk("[%d] %s: arm syscall %d\n",
568 current->pid, current->comm, no); 568 task_pid_nr(current), current->comm, no);
569 dump_instr(regs); 569 dump_instr(regs);
570 if (user_mode(regs)) { 570 if (user_mode(regs)) {
571 __show_regs(regs); 571 __show_regs(regs);
@@ -642,7 +642,7 @@ baddataabort(int code, unsigned long instr, struct pt_regs *regs)
642#ifdef CONFIG_DEBUG_USER 642#ifdef CONFIG_DEBUG_USER
643 if (user_debug & UDBG_BADABORT) { 643 if (user_debug & UDBG_BADABORT) {
644 printk(KERN_ERR "[%d] %s: bad data abort: code %d instr 0x%08lx\n", 644 printk(KERN_ERR "[%d] %s: bad data abort: code %d instr 0x%08lx\n",
645 current->pid, current->comm, code, instr); 645 task_pid_nr(current), current->comm, code, instr);
646 dump_instr(regs); 646 dump_instr(regs);
647 show_pte(current->mm, addr); 647 show_pte(current->mm, addr);
648 } 648 }
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 074b7cb07743..e162cca5917f 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -757,7 +757,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
757 if (ai_usermode & 1) 757 if (ai_usermode & 1)
758 printk("Alignment trap: %s (%d) PC=0x%08lx Instr=0x%0*lx " 758 printk("Alignment trap: %s (%d) PC=0x%08lx Instr=0x%0*lx "
759 "Address=0x%08lx FSR 0x%03x\n", current->comm, 759 "Address=0x%08lx FSR 0x%03x\n", current->comm,
760 current->pid, instrptr, 760 task_pid_nr(current), instrptr,
761 thumb_mode(regs) ? 4 : 8, 761 thumb_mode(regs) ? 4 : 8,
762 thumb_mode(regs) ? tinstr : instr, 762 thumb_mode(regs) ? tinstr : instr,
763 addr, fsr); 763 addr, fsr);
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 59ed1d05b71b..a8a7dab757eb 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -197,7 +197,7 @@ survive:
197 return fault; 197 return fault;
198 198
199out_of_memory: 199out_of_memory:
200 if (!is_init(tsk)) 200 if (!is_global_init(tsk))
201 goto out; 201 goto out;
202 202
203 /* 203 /*
diff --git a/arch/arm/oprofile/Kconfig b/arch/arm/oprofile/Kconfig
deleted file mode 100644
index afd93ad02feb..000000000000
--- a/arch/arm/oprofile/Kconfig
+++ /dev/null
@@ -1,42 +0,0 @@
1
2menu "Profiling support"
3 depends on EXPERIMENTAL
4
5config PROFILING
6 bool "Profiling support (EXPERIMENTAL)"
7 help
8 Say Y here to enable the extended profiling support mechanisms used
9 by profilers such as OProfile.
10
11
12config OPROFILE
13 tristate "OProfile system profiling (EXPERIMENTAL)"
14 depends on PROFILING
15 help
16 OProfile is a profiling system capable of profiling the
17 whole system, include the kernel, kernel modules, libraries,
18 and applications.
19
20 If unsure, say N.
21
22if OPROFILE
23
24config OPROFILE_ARMV6
25 bool
26 depends on CPU_V6 && !SMP
27 default y
28 select OPROFILE_ARM11_CORE
29
30config OPROFILE_MPCORE
31 bool
32 depends on CPU_V6 && SMP
33 default y
34 select OPROFILE_ARM11_CORE
35
36config OPROFILE_ARM11_CORE
37 bool
38
39endif
40
41endmenu
42
diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c
index 9a73ce7eb50f..8a7caf8e7b45 100644
--- a/arch/avr32/kernel/traps.c
+++ b/arch/avr32/kernel/traps.c
@@ -89,7 +89,7 @@ void _exception(long signr, struct pt_regs *regs, int code,
89 * generate the same exception over and over again and we get 89 * generate the same exception over and over again and we get
90 * nowhere. Better to kill it and let the kernel panic. 90 * nowhere. Better to kill it and let the kernel panic.
91 */ 91 */
92 if (is_init(current)) { 92 if (is_global_init(current)) {
93 __sighandler_t handler; 93 __sighandler_t handler;
94 94
95 spin_lock_irq(&current->sighand->siglock); 95 spin_lock_irq(&current->sighand->siglock);
diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c
index 11472f8701bd..6560cb18b4e3 100644
--- a/arch/avr32/mm/fault.c
+++ b/arch/avr32/mm/fault.c
@@ -160,7 +160,7 @@ bad_area:
160 if (exception_trace && printk_ratelimit()) 160 if (exception_trace && printk_ratelimit())
161 printk("%s%s[%d]: segfault at %08lx pc %08lx " 161 printk("%s%s[%d]: segfault at %08lx pc %08lx "
162 "sp %08lx ecr %lu\n", 162 "sp %08lx ecr %lu\n",
163 is_init(tsk) ? KERN_EMERG : KERN_INFO, 163 is_global_init(tsk) ? KERN_EMERG : KERN_INFO,
164 tsk->comm, tsk->pid, address, regs->pc, 164 tsk->comm, tsk->pid, address, regs->pc,
165 regs->sp, ecr); 165 regs->sp, ecr);
166 _exception(SIGSEGV, regs, code, address); 166 _exception(SIGSEGV, regs, code, address);
@@ -209,7 +209,7 @@ no_context:
209 */ 209 */
210out_of_memory: 210out_of_memory:
211 up_read(&mm->mmap_sem); 211 up_read(&mm->mmap_sem);
212 if (is_init(current)) { 212 if (is_global_init(current)) {
213 yield(); 213 yield();
214 down_read(&mm->mmap_sem); 214 down_read(&mm->mmap_sem);
215 goto survive; 215 goto survive;
@@ -231,7 +231,7 @@ do_sigbus:
231 if (exception_trace) 231 if (exception_trace)
232 printk("%s%s[%d]: bus error at %08lx pc %08lx " 232 printk("%s%s[%d]: bus error at %08lx pc %08lx "
233 "sp %08lx ecr %lu\n", 233 "sp %08lx ecr %lu\n",
234 is_init(tsk) ? KERN_EMERG : KERN_INFO, 234 is_global_init(tsk) ? KERN_EMERG : KERN_INFO,
235 tsk->comm, tsk->pid, address, regs->pc, 235 tsk->comm, tsk->pid, address, regs->pc,
236 regs->sp, ecr); 236 regs->sp, ecr);
237 237
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index aa9db3073312..4c5ca9d5e40f 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -1012,7 +1012,7 @@ source "drivers/Kconfig"
1012 1012
1013source "fs/Kconfig" 1013source "fs/Kconfig"
1014 1014
1015source "arch/blackfin/oprofile/Kconfig" 1015source "kernel/Kconfig.instrumentation"
1016 1016
1017menu "Kernel hacking" 1017menu "Kernel hacking"
1018 1018
diff --git a/arch/blackfin/oprofile/Kconfig b/arch/blackfin/oprofile/Kconfig
deleted file mode 100644
index 0a2fd999c941..000000000000
--- a/arch/blackfin/oprofile/Kconfig
+++ /dev/null
@@ -1,29 +0,0 @@
1menu "Profiling support"
2depends on EXPERIMENTAL
3
4config PROFILING
5 bool "Profiling support (EXPERIMENTAL)"
6 help
7 Say Y here to enable the extended profiling support mechanisms used
8 by profilers such as OProfile.
9
10config OPROFILE
11 tristate "OProfile system profiling (EXPERIMENTAL)"
12 depends on PROFILING
13 help
14 OProfile is a profiling system capable of profiling the
15 whole system, include the kernel, kernel modules, libraries,
16 and applications.
17
18 If unsure, say N.
19
20config HARDWARE_PM
21 tristate "Hardware Performance Monitor Profiling"
22 depends on PROFILING
23 help
24 take use of hardware performance monitor to profiling the kernel
25 and application.
26
27 If unsure, say N.
28
29endmenu
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 6b4d026a00a1..21900a9378bb 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -196,6 +196,8 @@ source "sound/Kconfig"
196 196
197source "drivers/usb/Kconfig" 197source "drivers/usb/Kconfig"
198 198
199source "kernel/Kconfig.instrumentation"
200
199source "arch/cris/Kconfig.debug" 201source "arch/cris/Kconfig.debug"
200 202
201source "security/Kconfig" 203source "security/Kconfig"
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index 74eef7111f2b..43153e767bb1 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -375,6 +375,8 @@ source "drivers/Kconfig"
375 375
376source "fs/Kconfig" 376source "fs/Kconfig"
377 377
378source "kernel/Kconfig.instrumentation"
379
378source "arch/frv/Kconfig.debug" 380source "arch/frv/Kconfig.debug"
379 381
380source "security/Kconfig" 382source "security/Kconfig"
diff --git a/arch/frv/kernel/irq-mb93091.c b/arch/frv/kernel/irq-mb93091.c
index ad753c1e9b8f..9e38f99bbab8 100644
--- a/arch/frv/kernel/irq-mb93091.c
+++ b/arch/frv/kernel/irq-mb93091.c
@@ -17,10 +17,10 @@
17#include <linux/interrupt.h> 17#include <linux/interrupt.h>
18#include <linux/init.h> 18#include <linux/init.h>
19#include <linux/irq.h> 19#include <linux/irq.h>
20#include <linux/bitops.h>
20 21
21#include <asm/io.h> 22#include <asm/io.h>
22#include <asm/system.h> 23#include <asm/system.h>
23#include <asm/bitops.h>
24#include <asm/delay.h> 24#include <asm/delay.h>
25#include <asm/irq.h> 25#include <asm/irq.h>
26#include <asm/irc-regs.h> 26#include <asm/irc-regs.h>
diff --git a/arch/frv/kernel/irq-mb93093.c b/arch/frv/kernel/irq-mb93093.c
index e0983f6926ed..3c2752ca9775 100644
--- a/arch/frv/kernel/irq-mb93093.c
+++ b/arch/frv/kernel/irq-mb93093.c
@@ -17,10 +17,10 @@
17#include <linux/interrupt.h> 17#include <linux/interrupt.h>
18#include <linux/init.h> 18#include <linux/init.h>
19#include <linux/irq.h> 19#include <linux/irq.h>
20#include <linux/bitops.h>
20 21
21#include <asm/io.h> 22#include <asm/io.h>
22#include <asm/system.h> 23#include <asm/system.h>
23#include <asm/bitops.h>
24#include <asm/delay.h> 24#include <asm/delay.h>
25#include <asm/irq.h> 25#include <asm/irq.h>
26#include <asm/irc-regs.h> 26#include <asm/irc-regs.h>
diff --git a/arch/frv/kernel/irq-mb93493.c b/arch/frv/kernel/irq-mb93493.c
index c157eeff871d..7754c7338e4b 100644
--- a/arch/frv/kernel/irq-mb93493.c
+++ b/arch/frv/kernel/irq-mb93493.c
@@ -17,10 +17,10 @@
17#include <linux/interrupt.h> 17#include <linux/interrupt.h>
18#include <linux/init.h> 18#include <linux/init.h>
19#include <linux/irq.h> 19#include <linux/irq.h>
20#include <linux/bitops.h>
20 21
21#include <asm/io.h> 22#include <asm/io.h>
22#include <asm/system.h> 23#include <asm/system.h>
23#include <asm/bitops.h>
24#include <asm/delay.h> 24#include <asm/delay.h>
25#include <asm/irq.h> 25#include <asm/irq.h>
26#include <asm/irc-regs.h> 26#include <asm/irc-regs.h>
diff --git a/arch/frv/kernel/irq.c b/arch/frv/kernel/irq.c
index c7e59dcadee4..7ddb69089ed4 100644
--- a/arch/frv/kernel/irq.c
+++ b/arch/frv/kernel/irq.c
@@ -24,12 +24,12 @@
24#include <linux/proc_fs.h> 24#include <linux/proc_fs.h>
25#include <linux/seq_file.h> 25#include <linux/seq_file.h>
26#include <linux/module.h> 26#include <linux/module.h>
27#include <linux/bitops.h>
27 28
28#include <asm/atomic.h> 29#include <asm/atomic.h>
29#include <asm/io.h> 30#include <asm/io.h>
30#include <asm/smp.h> 31#include <asm/smp.h>
31#include <asm/system.h> 32#include <asm/system.h>
32#include <asm/bitops.h>
33#include <asm/uaccess.h> 33#include <asm/uaccess.h>
34#include <asm/pgalloc.h> 34#include <asm/pgalloc.h>
35#include <asm/delay.h> 35#include <asm/delay.h>
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index e35f74e6e505..e2e9f57abe2e 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -223,6 +223,8 @@ endmenu
223 223
224source "fs/Kconfig" 224source "fs/Kconfig"
225 225
226source "kernel/Kconfig.instrumentation"
227
226source "arch/h8300/Kconfig.debug" 228source "arch/h8300/Kconfig.debug"
227 229
228source "security/Kconfig" 230source "security/Kconfig"
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index b84d5050e92e..04be7a7d090f 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -1256,31 +1256,6 @@ source "drivers/Kconfig"
1256 1256
1257source "fs/Kconfig" 1257source "fs/Kconfig"
1258 1258
1259menuconfig INSTRUMENTATION
1260 bool "Instrumentation Support"
1261 default y
1262 ---help---
1263 Say Y here to get to see options related to performance measurement,
1264 debugging, and testing. This option alone does not add any kernel code.
1265
1266 If you say N, all options in this submenu will be skipped and disabled.
1267
1268if INSTRUMENTATION
1269
1270source "arch/x86/oprofile/Kconfig"
1271
1272config KPROBES
1273 bool "Kprobes"
1274 depends on KALLSYMS && MODULES
1275 help
1276 Kprobes allows you to trap at almost any kernel address and
1277 execute a callback function. register_kprobe() establishes
1278 a probepoint and specifies the callback. Kprobes is useful
1279 for kernel debugging, non-intrusive instrumentation and testing.
1280 If in doubt, say "N".
1281
1282endif # INSTRUMENTATION
1283
1284source "arch/i386/Kconfig.debug" 1259source "arch/i386/Kconfig.debug"
1285 1260
1286source "security/Kconfig" 1261source "security/Kconfig"
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index c60532d93c54..c89108e9770d 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -592,20 +592,7 @@ config IRQ_PER_CPU
592 592
593source "arch/ia64/hp/sim/Kconfig" 593source "arch/ia64/hp/sim/Kconfig"
594 594
595menu "Instrumentation Support" 595source "kernel/Kconfig.instrumentation"
596
597source "arch/ia64/oprofile/Kconfig"
598
599config KPROBES
600 bool "Kprobes"
601 depends on KALLSYMS && MODULES
602 help
603 Kprobes allows you to trap at almost any kernel address and
604 execute a callback function. register_kprobe() establishes
605 a probepoint and specifies the callback. Kprobes is useful
606 for kernel debugging, non-intrusive instrumentation and testing.
607 If in doubt, say "N".
608endmenu
609 596
610source "arch/ia64/Kconfig.debug" 597source "arch/ia64/Kconfig.debug"
611 598
diff --git a/arch/ia64/configs/sn2_defconfig b/arch/ia64/configs/sn2_defconfig
index 449d3e75bfc2..75fd90dc76a3 100644
--- a/arch/ia64/configs/sn2_defconfig
+++ b/arch/ia64/configs/sn2_defconfig
@@ -26,6 +26,7 @@ CONFIG_TASK_IO_ACCOUNTING=y
26# CONFIG_AUDIT is not set 26# CONFIG_AUDIT is not set
27# CONFIG_IKCONFIG is not set 27# CONFIG_IKCONFIG is not set
28CONFIG_LOG_BUF_SHIFT=20 28CONFIG_LOG_BUF_SHIFT=20
29CONFIG_CGROUPS=y
29CONFIG_CPUSETS=y 30CONFIG_CPUSETS=y
30CONFIG_SYSFS_DEPRECATED=y 31CONFIG_SYSFS_DEPRECATED=y
31CONFIG_RELAY=y 32CONFIG_RELAY=y
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index a3405b3c1eef..d025a22eb225 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -773,7 +773,7 @@ emulate_mmap (struct file *file, unsigned long start, unsigned long len, int pro
773 if (flags & MAP_SHARED) 773 if (flags & MAP_SHARED)
774 printk(KERN_INFO 774 printk(KERN_INFO
775 "%s(%d): emulate_mmap() can't share head (addr=0x%lx)\n", 775 "%s(%d): emulate_mmap() can't share head (addr=0x%lx)\n",
776 current->comm, current->pid, start); 776 current->comm, task_pid_nr(current), start);
777 ret = mmap_subpage(file, start, min(PAGE_ALIGN(start), end), prot, flags, 777 ret = mmap_subpage(file, start, min(PAGE_ALIGN(start), end), prot, flags,
778 off); 778 off);
779 if (IS_ERR((void *) ret)) 779 if (IS_ERR((void *) ret))
@@ -786,7 +786,7 @@ emulate_mmap (struct file *file, unsigned long start, unsigned long len, int pro
786 if (flags & MAP_SHARED) 786 if (flags & MAP_SHARED)
787 printk(KERN_INFO 787 printk(KERN_INFO
788 "%s(%d): emulate_mmap() can't share tail (end=0x%lx)\n", 788 "%s(%d): emulate_mmap() can't share tail (end=0x%lx)\n",
789 current->comm, current->pid, end); 789 current->comm, task_pid_nr(current), end);
790 ret = mmap_subpage(file, max(start, PAGE_START(end)), end, prot, flags, 790 ret = mmap_subpage(file, max(start, PAGE_START(end)), end, prot, flags,
791 (off + len) - offset_in_page(end)); 791 (off + len) - offset_in_page(end));
792 if (IS_ERR((void *) ret)) 792 if (IS_ERR((void *) ret))
@@ -816,7 +816,7 @@ emulate_mmap (struct file *file, unsigned long start, unsigned long len, int pro
816 816
817 if ((flags & MAP_SHARED) && !is_congruent) 817 if ((flags & MAP_SHARED) && !is_congruent)
818 printk(KERN_INFO "%s(%d): emulate_mmap() can't share contents of incongruent mmap " 818 printk(KERN_INFO "%s(%d): emulate_mmap() can't share contents of incongruent mmap "
819 "(addr=0x%lx,off=0x%llx)\n", current->comm, current->pid, start, off); 819 "(addr=0x%lx,off=0x%llx)\n", current->comm, task_pid_nr(current), start, off);
820 820
821 DBG("mmap_body: mapping [0x%lx-0x%lx) %s with poff 0x%llx\n", pstart, pend, 821 DBG("mmap_body: mapping [0x%lx-0x%lx) %s with poff 0x%llx\n", pstart, pend,
822 is_congruent ? "congruent" : "not congruent", poff); 822 is_congruent ? "congruent" : "not congruent", poff);
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 73ca86d03810..8e4894b205e2 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -967,7 +967,7 @@ find_memmap_space (void)
967 * to use. We can allocate partial granules only if the unavailable 967 * to use. We can allocate partial granules only if the unavailable
968 * parts exist, and are WB. 968 * parts exist, and are WB.
969 */ 969 */
970void 970unsigned long
971efi_memmap_init(unsigned long *s, unsigned long *e) 971efi_memmap_init(unsigned long *s, unsigned long *e)
972{ 972{
973 struct kern_memdesc *k, *prev = NULL; 973 struct kern_memdesc *k, *prev = NULL;
@@ -1084,6 +1084,8 @@ efi_memmap_init(unsigned long *s, unsigned long *e)
1084 /* reserve the memory we are using for kern_memmap */ 1084 /* reserve the memory we are using for kern_memmap */
1085 *s = (u64)kern_memmap; 1085 *s = (u64)kern_memmap;
1086 *e = (u64)++k; 1086 *e = (u64)++k;
1087
1088 return total_mem;
1087} 1089}
1088 1090
1089void 1091void
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index f55fa07849c4..59169bf7145f 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -158,14 +158,14 @@
158 */ 158 */
159#define PROTECT_CTX(c, f) \ 159#define PROTECT_CTX(c, f) \
160 do { \ 160 do { \
161 DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, current->pid)); \ 161 DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, task_pid_nr(current))); \
162 spin_lock_irqsave(&(c)->ctx_lock, f); \ 162 spin_lock_irqsave(&(c)->ctx_lock, f); \
163 DPRINT(("spinlocked ctx %p by [%d]\n", c, current->pid)); \ 163 DPRINT(("spinlocked ctx %p by [%d]\n", c, task_pid_nr(current))); \
164 } while(0) 164 } while(0)
165 165
166#define UNPROTECT_CTX(c, f) \ 166#define UNPROTECT_CTX(c, f) \
167 do { \ 167 do { \
168 DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, current->pid)); \ 168 DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, task_pid_nr(current))); \
169 spin_unlock_irqrestore(&(c)->ctx_lock, f); \ 169 spin_unlock_irqrestore(&(c)->ctx_lock, f); \
170 } while(0) 170 } while(0)
171 171
@@ -227,12 +227,12 @@
227#ifdef PFM_DEBUGGING 227#ifdef PFM_DEBUGGING
228#define DPRINT(a) \ 228#define DPRINT(a) \
229 do { \ 229 do { \
230 if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \ 230 if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \
231 } while (0) 231 } while (0)
232 232
233#define DPRINT_ovfl(a) \ 233#define DPRINT_ovfl(a) \
234 do { \ 234 do { \
235 if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \ 235 if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \
236 } while (0) 236 } while (0)
237#endif 237#endif
238 238
@@ -913,7 +913,7 @@ pfm_mask_monitoring(struct task_struct *task)
913 unsigned long mask, val, ovfl_mask; 913 unsigned long mask, val, ovfl_mask;
914 int i; 914 int i;
915 915
916 DPRINT_ovfl(("masking monitoring for [%d]\n", task->pid)); 916 DPRINT_ovfl(("masking monitoring for [%d]\n", task_pid_nr(task)));
917 917
918 ovfl_mask = pmu_conf->ovfl_val; 918 ovfl_mask = pmu_conf->ovfl_val;
919 /* 919 /*
@@ -992,12 +992,12 @@ pfm_restore_monitoring(struct task_struct *task)
992 ovfl_mask = pmu_conf->ovfl_val; 992 ovfl_mask = pmu_conf->ovfl_val;
993 993
994 if (task != current) { 994 if (task != current) {
995 printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task->pid, current->pid); 995 printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task_pid_nr(task), task_pid_nr(current));
996 return; 996 return;
997 } 997 }
998 if (ctx->ctx_state != PFM_CTX_MASKED) { 998 if (ctx->ctx_state != PFM_CTX_MASKED) {
999 printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__, 999 printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__,
1000 task->pid, current->pid, ctx->ctx_state); 1000 task_pid_nr(task), task_pid_nr(current), ctx->ctx_state);
1001 return; 1001 return;
1002 } 1002 }
1003 psr = pfm_get_psr(); 1003 psr = pfm_get_psr();
@@ -1051,7 +1051,8 @@ pfm_restore_monitoring(struct task_struct *task)
1051 if ((mask & 0x1) == 0UL) continue; 1051 if ((mask & 0x1) == 0UL) continue;
1052 ctx->th_pmcs[i] = ctx->ctx_pmcs[i]; 1052 ctx->th_pmcs[i] = ctx->ctx_pmcs[i];
1053 ia64_set_pmc(i, ctx->th_pmcs[i]); 1053 ia64_set_pmc(i, ctx->th_pmcs[i]);
1054 DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, ctx->th_pmcs[i])); 1054 DPRINT(("[%d] pmc[%d]=0x%lx\n",
1055 task_pid_nr(task), i, ctx->th_pmcs[i]));
1055 } 1056 }
1056 ia64_srlz_d(); 1057 ia64_srlz_d();
1057 1058
@@ -1370,7 +1371,7 @@ pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
1370 1371
1371error_conflict: 1372error_conflict:
1372 DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n", 1373 DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n",
1373 pfm_sessions.pfs_sys_session[cpu]->pid, 1374 task_pid_nr(pfm_sessions.pfs_sys_session[cpu]),
1374 cpu)); 1375 cpu));
1375abort: 1376abort:
1376 UNLOCK_PFS(flags); 1377 UNLOCK_PFS(flags);
@@ -1442,7 +1443,7 @@ pfm_remove_smpl_mapping(struct task_struct *task, void *vaddr, unsigned long siz
1442 1443
1443 /* sanity checks */ 1444 /* sanity checks */
1444 if (task->mm == NULL || size == 0UL || vaddr == NULL) { 1445 if (task->mm == NULL || size == 0UL || vaddr == NULL) {
1445 printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task->pid, task->mm); 1446 printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task_pid_nr(task), task->mm);
1446 return -EINVAL; 1447 return -EINVAL;
1447 } 1448 }
1448 1449
@@ -1459,7 +1460,7 @@ pfm_remove_smpl_mapping(struct task_struct *task, void *vaddr, unsigned long siz
1459 1460
1460 up_write(&task->mm->mmap_sem); 1461 up_write(&task->mm->mmap_sem);
1461 if (r !=0) { 1462 if (r !=0) {
1462 printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task->pid, vaddr, size); 1463 printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task_pid_nr(task), vaddr, size);
1463 } 1464 }
1464 1465
1465 DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r)); 1466 DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r));
@@ -1501,7 +1502,7 @@ pfm_free_smpl_buffer(pfm_context_t *ctx)
1501 return 0; 1502 return 0;
1502 1503
1503invalid_free: 1504invalid_free:
1504 printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", current->pid); 1505 printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", task_pid_nr(current));
1505 return -EINVAL; 1506 return -EINVAL;
1506} 1507}
1507#endif 1508#endif
@@ -1547,13 +1548,13 @@ pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
1547 unsigned long flags; 1548 unsigned long flags;
1548 DECLARE_WAITQUEUE(wait, current); 1549 DECLARE_WAITQUEUE(wait, current);
1549 if (PFM_IS_FILE(filp) == 0) { 1550 if (PFM_IS_FILE(filp) == 0) {
1550 printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid); 1551 printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current));
1551 return -EINVAL; 1552 return -EINVAL;
1552 } 1553 }
1553 1554
1554 ctx = (pfm_context_t *)filp->private_data; 1555 ctx = (pfm_context_t *)filp->private_data;
1555 if (ctx == NULL) { 1556 if (ctx == NULL) {
1556 printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", current->pid); 1557 printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", task_pid_nr(current));
1557 return -EINVAL; 1558 return -EINVAL;
1558 } 1559 }
1559 1560
@@ -1607,7 +1608,7 @@ pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
1607 1608
1608 PROTECT_CTX(ctx, flags); 1609 PROTECT_CTX(ctx, flags);
1609 } 1610 }
1610 DPRINT(("[%d] back to running ret=%ld\n", current->pid, ret)); 1611 DPRINT(("[%d] back to running ret=%ld\n", task_pid_nr(current), ret));
1611 set_current_state(TASK_RUNNING); 1612 set_current_state(TASK_RUNNING);
1612 remove_wait_queue(&ctx->ctx_msgq_wait, &wait); 1613 remove_wait_queue(&ctx->ctx_msgq_wait, &wait);
1613 1614
@@ -1616,7 +1617,7 @@ pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
1616 ret = -EINVAL; 1617 ret = -EINVAL;
1617 msg = pfm_get_next_msg(ctx); 1618 msg = pfm_get_next_msg(ctx);
1618 if (msg == NULL) { 1619 if (msg == NULL) {
1619 printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, current->pid); 1620 printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, task_pid_nr(current));
1620 goto abort_locked; 1621 goto abort_locked;
1621 } 1622 }
1622 1623
@@ -1647,13 +1648,13 @@ pfm_poll(struct file *filp, poll_table * wait)
1647 unsigned int mask = 0; 1648 unsigned int mask = 0;
1648 1649
1649 if (PFM_IS_FILE(filp) == 0) { 1650 if (PFM_IS_FILE(filp) == 0) {
1650 printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid); 1651 printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current));
1651 return 0; 1652 return 0;
1652 } 1653 }
1653 1654
1654 ctx = (pfm_context_t *)filp->private_data; 1655 ctx = (pfm_context_t *)filp->private_data;
1655 if (ctx == NULL) { 1656 if (ctx == NULL) {
1656 printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", current->pid); 1657 printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", task_pid_nr(current));
1657 return 0; 1658 return 0;
1658 } 1659 }
1659 1660
@@ -1692,7 +1693,7 @@ pfm_do_fasync(int fd, struct file *filp, pfm_context_t *ctx, int on)
1692 ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue); 1693 ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue);
1693 1694
1694 DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n", 1695 DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n",
1695 current->pid, 1696 task_pid_nr(current),
1696 fd, 1697 fd,
1697 on, 1698 on,
1698 ctx->ctx_async_queue, ret)); 1699 ctx->ctx_async_queue, ret));
@@ -1707,13 +1708,13 @@ pfm_fasync(int fd, struct file *filp, int on)
1707 int ret; 1708 int ret;
1708 1709
1709 if (PFM_IS_FILE(filp) == 0) { 1710 if (PFM_IS_FILE(filp) == 0) {
1710 printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", current->pid); 1711 printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", task_pid_nr(current));
1711 return -EBADF; 1712 return -EBADF;
1712 } 1713 }
1713 1714
1714 ctx = (pfm_context_t *)filp->private_data; 1715 ctx = (pfm_context_t *)filp->private_data;
1715 if (ctx == NULL) { 1716 if (ctx == NULL) {
1716 printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", current->pid); 1717 printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", task_pid_nr(current));
1717 return -EBADF; 1718 return -EBADF;
1718 } 1719 }
1719 /* 1720 /*
@@ -1759,7 +1760,7 @@ pfm_syswide_force_stop(void *info)
1759 if (owner != ctx->ctx_task) { 1760 if (owner != ctx->ctx_task) {
1760 printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n", 1761 printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n",
1761 smp_processor_id(), 1762 smp_processor_id(),
1762 owner->pid, ctx->ctx_task->pid); 1763 task_pid_nr(owner), task_pid_nr(ctx->ctx_task));
1763 return; 1764 return;
1764 } 1765 }
1765 if (GET_PMU_CTX() != ctx) { 1766 if (GET_PMU_CTX() != ctx) {
@@ -1769,7 +1770,7 @@ pfm_syswide_force_stop(void *info)
1769 return; 1770 return;
1770 } 1771 }
1771 1772
1772 DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), ctx->ctx_task->pid)); 1773 DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), task_pid_nr(ctx->ctx_task)));
1773 /* 1774 /*
1774 * the context is already protected in pfm_close(), we simply 1775 * the context is already protected in pfm_close(), we simply
1775 * need to mask interrupts to avoid a PMU interrupt race on 1776 * need to mask interrupts to avoid a PMU interrupt race on
@@ -1821,7 +1822,7 @@ pfm_flush(struct file *filp, fl_owner_t id)
1821 1822
1822 ctx = (pfm_context_t *)filp->private_data; 1823 ctx = (pfm_context_t *)filp->private_data;
1823 if (ctx == NULL) { 1824 if (ctx == NULL) {
1824 printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", current->pid); 1825 printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", task_pid_nr(current));
1825 return -EBADF; 1826 return -EBADF;
1826 } 1827 }
1827 1828
@@ -1969,7 +1970,7 @@ pfm_close(struct inode *inode, struct file *filp)
1969 1970
1970 ctx = (pfm_context_t *)filp->private_data; 1971 ctx = (pfm_context_t *)filp->private_data;
1971 if (ctx == NULL) { 1972 if (ctx == NULL) {
1972 printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", current->pid); 1973 printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", task_pid_nr(current));
1973 return -EBADF; 1974 return -EBADF;
1974 } 1975 }
1975 1976
@@ -2066,7 +2067,7 @@ pfm_close(struct inode *inode, struct file *filp)
2066 */ 2067 */
2067 ctx->ctx_state = PFM_CTX_ZOMBIE; 2068 ctx->ctx_state = PFM_CTX_ZOMBIE;
2068 2069
2069 DPRINT(("zombie ctx for [%d]\n", task->pid)); 2070 DPRINT(("zombie ctx for [%d]\n", task_pid_nr(task)));
2070 /* 2071 /*
2071 * cannot free the context on the spot. deferred until 2072 * cannot free the context on the spot. deferred until
2072 * the task notices the ZOMBIE state 2073 * the task notices the ZOMBIE state
@@ -2472,7 +2473,7 @@ pfm_setup_buffer_fmt(struct task_struct *task, struct file *filp, pfm_context_t
2472 /* invoke and lock buffer format, if found */ 2473 /* invoke and lock buffer format, if found */
2473 fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id); 2474 fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id);
2474 if (fmt == NULL) { 2475 if (fmt == NULL) {
2475 DPRINT(("[%d] cannot find buffer format\n", task->pid)); 2476 DPRINT(("[%d] cannot find buffer format\n", task_pid_nr(task)));
2476 return -EINVAL; 2477 return -EINVAL;
2477 } 2478 }
2478 2479
@@ -2483,7 +2484,7 @@ pfm_setup_buffer_fmt(struct task_struct *task, struct file *filp, pfm_context_t
2483 2484
2484 ret = pfm_buf_fmt_validate(fmt, task, ctx_flags, cpu, fmt_arg); 2485 ret = pfm_buf_fmt_validate(fmt, task, ctx_flags, cpu, fmt_arg);
2485 2486
2486 DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task->pid, ctx_flags, cpu, fmt_arg, ret)); 2487 DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task_pid_nr(task), ctx_flags, cpu, fmt_arg, ret));
2487 2488
2488 if (ret) goto error; 2489 if (ret) goto error;
2489 2490
@@ -2605,23 +2606,23 @@ pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task)
2605 * no kernel task or task not owner by caller 2606 * no kernel task or task not owner by caller
2606 */ 2607 */
2607 if (task->mm == NULL) { 2608 if (task->mm == NULL) {
2608 DPRINT(("task [%d] has not memory context (kernel thread)\n", task->pid)); 2609 DPRINT(("task [%d] has not memory context (kernel thread)\n", task_pid_nr(task)));
2609 return -EPERM; 2610 return -EPERM;
2610 } 2611 }
2611 if (pfm_bad_permissions(task)) { 2612 if (pfm_bad_permissions(task)) {
2612 DPRINT(("no permission to attach to [%d]\n", task->pid)); 2613 DPRINT(("no permission to attach to [%d]\n", task_pid_nr(task)));
2613 return -EPERM; 2614 return -EPERM;
2614 } 2615 }
2615 /* 2616 /*
2616 * cannot block in self-monitoring mode 2617 * cannot block in self-monitoring mode
2617 */ 2618 */
2618 if (CTX_OVFL_NOBLOCK(ctx) == 0 && task == current) { 2619 if (CTX_OVFL_NOBLOCK(ctx) == 0 && task == current) {
2619 DPRINT(("cannot load a blocking context on self for [%d]\n", task->pid)); 2620 DPRINT(("cannot load a blocking context on self for [%d]\n", task_pid_nr(task)));
2620 return -EINVAL; 2621 return -EINVAL;
2621 } 2622 }
2622 2623
2623 if (task->exit_state == EXIT_ZOMBIE) { 2624 if (task->exit_state == EXIT_ZOMBIE) {
2624 DPRINT(("cannot attach to zombie task [%d]\n", task->pid)); 2625 DPRINT(("cannot attach to zombie task [%d]\n", task_pid_nr(task)));
2625 return -EBUSY; 2626 return -EBUSY;
2626 } 2627 }
2627 2628
@@ -2631,7 +2632,7 @@ pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task)
2631 if (task == current) return 0; 2632 if (task == current) return 0;
2632 2633
2633 if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) { 2634 if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
2634 DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task->pid, task->state)); 2635 DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task_pid_nr(task), task->state));
2635 return -EBUSY; 2636 return -EBUSY;
2636 } 2637 }
2637 /* 2638 /*
@@ -3512,7 +3513,7 @@ pfm_use_debug_registers(struct task_struct *task)
3512 3513
3513 if (pmu_conf->use_rr_dbregs == 0) return 0; 3514 if (pmu_conf->use_rr_dbregs == 0) return 0;
3514 3515
3515 DPRINT(("called for [%d]\n", task->pid)); 3516 DPRINT(("called for [%d]\n", task_pid_nr(task)));
3516 3517
3517 /* 3518 /*
3518 * do it only once 3519 * do it only once
@@ -3543,7 +3544,7 @@ pfm_use_debug_registers(struct task_struct *task)
3543 DPRINT(("ptrace_use_dbregs=%u sys_use_dbregs=%u by [%d] ret = %d\n", 3544 DPRINT(("ptrace_use_dbregs=%u sys_use_dbregs=%u by [%d] ret = %d\n",
3544 pfm_sessions.pfs_ptrace_use_dbregs, 3545 pfm_sessions.pfs_ptrace_use_dbregs,
3545 pfm_sessions.pfs_sys_use_dbregs, 3546 pfm_sessions.pfs_sys_use_dbregs,
3546 task->pid, ret)); 3547 task_pid_nr(task), ret));
3547 3548
3548 UNLOCK_PFS(flags); 3549 UNLOCK_PFS(flags);
3549 3550
@@ -3568,7 +3569,7 @@ pfm_release_debug_registers(struct task_struct *task)
3568 3569
3569 LOCK_PFS(flags); 3570 LOCK_PFS(flags);
3570 if (pfm_sessions.pfs_ptrace_use_dbregs == 0) { 3571 if (pfm_sessions.pfs_ptrace_use_dbregs == 0) {
3571 printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task->pid); 3572 printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task_pid_nr(task));
3572 ret = -1; 3573 ret = -1;
3573 } else { 3574 } else {
3574 pfm_sessions.pfs_ptrace_use_dbregs--; 3575 pfm_sessions.pfs_ptrace_use_dbregs--;
@@ -3620,7 +3621,7 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
3620 3621
3621 /* sanity check */ 3622 /* sanity check */
3622 if (unlikely(task == NULL)) { 3623 if (unlikely(task == NULL)) {
3623 printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", current->pid); 3624 printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", task_pid_nr(current));
3624 return -EINVAL; 3625 return -EINVAL;
3625 } 3626 }
3626 3627
@@ -3629,7 +3630,7 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
3629 fmt = ctx->ctx_buf_fmt; 3630 fmt = ctx->ctx_buf_fmt;
3630 3631
3631 DPRINT(("restarting self %d ovfl=0x%lx\n", 3632 DPRINT(("restarting self %d ovfl=0x%lx\n",
3632 task->pid, 3633 task_pid_nr(task),
3633 ctx->ctx_ovfl_regs[0])); 3634 ctx->ctx_ovfl_regs[0]));
3634 3635
3635 if (CTX_HAS_SMPL(ctx)) { 3636 if (CTX_HAS_SMPL(ctx)) {
@@ -3653,11 +3654,11 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
3653 pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET); 3654 pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET);
3654 3655
3655 if (rst_ctrl.bits.mask_monitoring == 0) { 3656 if (rst_ctrl.bits.mask_monitoring == 0) {
3656 DPRINT(("resuming monitoring for [%d]\n", task->pid)); 3657 DPRINT(("resuming monitoring for [%d]\n", task_pid_nr(task)));
3657 3658
3658 if (state == PFM_CTX_MASKED) pfm_restore_monitoring(task); 3659 if (state == PFM_CTX_MASKED) pfm_restore_monitoring(task);
3659 } else { 3660 } else {
3660 DPRINT(("keeping monitoring stopped for [%d]\n", task->pid)); 3661 DPRINT(("keeping monitoring stopped for [%d]\n", task_pid_nr(task)));
3661 3662
3662 // cannot use pfm_stop_monitoring(task, regs); 3663 // cannot use pfm_stop_monitoring(task, regs);
3663 } 3664 }
@@ -3714,10 +3715,10 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
3714 * "self-monitoring". 3715 * "self-monitoring".
3715 */ 3716 */
3716 if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) { 3717 if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) {
3717 DPRINT(("unblocking [%d] \n", task->pid)); 3718 DPRINT(("unblocking [%d] \n", task_pid_nr(task)));
3718 complete(&ctx->ctx_restart_done); 3719 complete(&ctx->ctx_restart_done);
3719 } else { 3720 } else {
3720 DPRINT(("[%d] armed exit trap\n", task->pid)); 3721 DPRINT(("[%d] armed exit trap\n", task_pid_nr(task)));
3721 3722
3722 ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET; 3723 ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET;
3723 3724
@@ -3805,7 +3806,7 @@ pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_
3805 * don't bother if we are loaded and task is being debugged 3806 * don't bother if we are loaded and task is being debugged
3806 */ 3807 */
3807 if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) { 3808 if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) {
3808 DPRINT(("debug registers already in use for [%d]\n", task->pid)); 3809 DPRINT(("debug registers already in use for [%d]\n", task_pid_nr(task)));
3809 return -EBUSY; 3810 return -EBUSY;
3810 } 3811 }
3811 3812
@@ -3846,7 +3847,7 @@ pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_
3846 * is shared by all processes running on it 3847 * is shared by all processes running on it
3847 */ 3848 */
3848 if (first_time && can_access_pmu) { 3849 if (first_time && can_access_pmu) {
3849 DPRINT(("[%d] clearing ibrs, dbrs\n", task->pid)); 3850 DPRINT(("[%d] clearing ibrs, dbrs\n", task_pid_nr(task)));
3850 for (i=0; i < pmu_conf->num_ibrs; i++) { 3851 for (i=0; i < pmu_conf->num_ibrs; i++) {
3851 ia64_set_ibr(i, 0UL); 3852 ia64_set_ibr(i, 0UL);
3852 ia64_dv_serialize_instruction(); 3853 ia64_dv_serialize_instruction();
@@ -4035,7 +4036,7 @@ pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
4035 return -EBUSY; 4036 return -EBUSY;
4036 } 4037 }
4037 DPRINT(("task [%d] ctx_state=%d is_system=%d\n", 4038 DPRINT(("task [%d] ctx_state=%d is_system=%d\n",
4038 PFM_CTX_TASK(ctx)->pid, 4039 task_pid_nr(PFM_CTX_TASK(ctx)),
4039 state, 4040 state,
4040 is_system)); 4041 is_system));
4041 /* 4042 /*
@@ -4093,7 +4094,7 @@ pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
4093 * monitoring disabled in kernel at next reschedule 4094 * monitoring disabled in kernel at next reschedule
4094 */ 4095 */
4095 ctx->ctx_saved_psr_up = 0; 4096 ctx->ctx_saved_psr_up = 0;
4096 DPRINT(("task=[%d]\n", task->pid)); 4097 DPRINT(("task=[%d]\n", task_pid_nr(task)));
4097 } 4098 }
4098 return 0; 4099 return 0;
4099} 4100}
@@ -4298,11 +4299,12 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
4298 4299
4299 if (is_system) { 4300 if (is_system) {
4300 if (pfm_sessions.pfs_ptrace_use_dbregs) { 4301 if (pfm_sessions.pfs_ptrace_use_dbregs) {
4301 DPRINT(("cannot load [%d] dbregs in use\n", task->pid)); 4302 DPRINT(("cannot load [%d] dbregs in use\n",
4303 task_pid_nr(task)));
4302 ret = -EBUSY; 4304 ret = -EBUSY;
4303 } else { 4305 } else {
4304 pfm_sessions.pfs_sys_use_dbregs++; 4306 pfm_sessions.pfs_sys_use_dbregs++;
4305 DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task->pid, pfm_sessions.pfs_sys_use_dbregs)); 4307 DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task_pid_nr(task), pfm_sessions.pfs_sys_use_dbregs));
4306 set_dbregs = 1; 4308 set_dbregs = 1;
4307 } 4309 }
4308 } 4310 }
@@ -4394,7 +4396,7 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
4394 4396
4395 /* allow user level control */ 4397 /* allow user level control */
4396 ia64_psr(regs)->sp = 0; 4398 ia64_psr(regs)->sp = 0;
4397 DPRINT(("clearing psr.sp for [%d]\n", task->pid)); 4399 DPRINT(("clearing psr.sp for [%d]\n", task_pid_nr(task)));
4398 4400
4399 SET_LAST_CPU(ctx, smp_processor_id()); 4401 SET_LAST_CPU(ctx, smp_processor_id());
4400 INC_ACTIVATION(); 4402 INC_ACTIVATION();
@@ -4429,7 +4431,7 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
4429 */ 4431 */
4430 SET_PMU_OWNER(task, ctx); 4432 SET_PMU_OWNER(task, ctx);
4431 4433
4432 DPRINT(("context loaded on PMU for [%d]\n", task->pid)); 4434 DPRINT(("context loaded on PMU for [%d]\n", task_pid_nr(task)));
4433 } else { 4435 } else {
4434 /* 4436 /*
4435 * when not current, task MUST be stopped, so this is safe 4437 * when not current, task MUST be stopped, so this is safe
@@ -4493,7 +4495,7 @@ pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
4493 int prev_state, is_system; 4495 int prev_state, is_system;
4494 int ret; 4496 int ret;
4495 4497
4496 DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task->pid : -1)); 4498 DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task_pid_nr(task) : -1));
4497 4499
4498 prev_state = ctx->ctx_state; 4500 prev_state = ctx->ctx_state;
4499 is_system = ctx->ctx_fl_system; 4501 is_system = ctx->ctx_fl_system;
@@ -4568,7 +4570,7 @@ pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
4568 */ 4570 */
4569 ia64_psr(regs)->sp = 1; 4571 ia64_psr(regs)->sp = 1;
4570 4572
4571 DPRINT(("setting psr.sp for [%d]\n", task->pid)); 4573 DPRINT(("setting psr.sp for [%d]\n", task_pid_nr(task)));
4572 } 4574 }
4573 /* 4575 /*
4574 * save PMDs to context 4576 * save PMDs to context
@@ -4608,7 +4610,7 @@ pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
4608 ctx->ctx_fl_can_restart = 0; 4610 ctx->ctx_fl_can_restart = 0;
4609 ctx->ctx_fl_going_zombie = 0; 4611 ctx->ctx_fl_going_zombie = 0;
4610 4612
4611 DPRINT(("disconnected [%d] from context\n", task->pid)); 4613 DPRINT(("disconnected [%d] from context\n", task_pid_nr(task)));
4612 4614
4613 return 0; 4615 return 0;
4614} 4616}
@@ -4631,7 +4633,7 @@ pfm_exit_thread(struct task_struct *task)
4631 4633
4632 PROTECT_CTX(ctx, flags); 4634 PROTECT_CTX(ctx, flags);
4633 4635
4634 DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task->pid)); 4636 DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task_pid_nr(task)));
4635 4637
4636 state = ctx->ctx_state; 4638 state = ctx->ctx_state;
4637 switch(state) { 4639 switch(state) {
@@ -4640,13 +4642,13 @@ pfm_exit_thread(struct task_struct *task)
4640 * only comes to this function if pfm_context is not NULL, i.e., cannot 4642 * only comes to this function if pfm_context is not NULL, i.e., cannot
4641 * be in unloaded state 4643 * be in unloaded state
4642 */ 4644 */
4643 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task->pid); 4645 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task_pid_nr(task));
4644 break; 4646 break;
4645 case PFM_CTX_LOADED: 4647 case PFM_CTX_LOADED:
4646 case PFM_CTX_MASKED: 4648 case PFM_CTX_MASKED:
4647 ret = pfm_context_unload(ctx, NULL, 0, regs); 4649 ret = pfm_context_unload(ctx, NULL, 0, regs);
4648 if (ret) { 4650 if (ret) {
4649 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret); 4651 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret);
4650 } 4652 }
4651 DPRINT(("ctx unloaded for current state was %d\n", state)); 4653 DPRINT(("ctx unloaded for current state was %d\n", state));
4652 4654
@@ -4655,12 +4657,12 @@ pfm_exit_thread(struct task_struct *task)
4655 case PFM_CTX_ZOMBIE: 4657 case PFM_CTX_ZOMBIE:
4656 ret = pfm_context_unload(ctx, NULL, 0, regs); 4658 ret = pfm_context_unload(ctx, NULL, 0, regs);
4657 if (ret) { 4659 if (ret) {
4658 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret); 4660 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret);
4659 } 4661 }
4660 free_ok = 1; 4662 free_ok = 1;
4661 break; 4663 break;
4662 default: 4664 default:
4663 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task->pid, state); 4665 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task_pid_nr(task), state);
4664 break; 4666 break;
4665 } 4667 }
4666 UNPROTECT_CTX(ctx, flags); 4668 UNPROTECT_CTX(ctx, flags);
@@ -4744,7 +4746,7 @@ recheck:
4744 DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n", 4746 DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n",
4745 ctx->ctx_fd, 4747 ctx->ctx_fd,
4746 state, 4748 state,
4747 task->pid, 4749 task_pid_nr(task),
4748 task->state, PFM_CMD_STOPPED(cmd))); 4750 task->state, PFM_CMD_STOPPED(cmd)));
4749 4751
4750 /* 4752 /*
@@ -4791,7 +4793,7 @@ recheck:
4791 */ 4793 */
4792 if (PFM_CMD_STOPPED(cmd)) { 4794 if (PFM_CMD_STOPPED(cmd)) {
4793 if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) { 4795 if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
4794 DPRINT(("[%d] task not in stopped state\n", task->pid)); 4796 DPRINT(("[%d] task not in stopped state\n", task_pid_nr(task)));
4795 return -EBUSY; 4797 return -EBUSY;
4796 } 4798 }
4797 /* 4799 /*
@@ -4884,7 +4886,7 @@ restart_args:
4884 * limit abuse to min page size 4886 * limit abuse to min page size
4885 */ 4887 */
4886 if (unlikely(sz > PFM_MAX_ARGSIZE)) { 4888 if (unlikely(sz > PFM_MAX_ARGSIZE)) {
4887 printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", current->pid, sz); 4889 printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", task_pid_nr(current), sz);
4888 return -E2BIG; 4890 return -E2BIG;
4889 } 4891 }
4890 4892
@@ -5031,11 +5033,11 @@ pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs)
5031{ 5033{
5032 int ret; 5034 int ret;
5033 5035
5034 DPRINT(("entering for [%d]\n", current->pid)); 5036 DPRINT(("entering for [%d]\n", task_pid_nr(current)));
5035 5037
5036 ret = pfm_context_unload(ctx, NULL, 0, regs); 5038 ret = pfm_context_unload(ctx, NULL, 0, regs);
5037 if (ret) { 5039 if (ret) {
5038 printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", current->pid, ret); 5040 printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", task_pid_nr(current), ret);
5039 } 5041 }
5040 5042
5041 /* 5043 /*
@@ -5072,7 +5074,7 @@ pfm_handle_work(void)
5072 5074
5073 ctx = PFM_GET_CTX(current); 5075 ctx = PFM_GET_CTX(current);
5074 if (ctx == NULL) { 5076 if (ctx == NULL) {
5075 printk(KERN_ERR "perfmon: [%d] has no PFM context\n", current->pid); 5077 printk(KERN_ERR "perfmon: [%d] has no PFM context\n", task_pid_nr(current));
5076 return; 5078 return;
5077 } 5079 }
5078 5080
@@ -5269,7 +5271,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
5269 DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s " 5271 DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s "
5270 "used_pmds=0x%lx\n", 5272 "used_pmds=0x%lx\n",
5271 pmc0, 5273 pmc0,
5272 task ? task->pid: -1, 5274 task ? task_pid_nr(task): -1,
5273 (regs ? regs->cr_iip : 0), 5275 (regs ? regs->cr_iip : 0),
5274 CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking", 5276 CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking",
5275 ctx->ctx_used_pmds[0])); 5277 ctx->ctx_used_pmds[0]));
@@ -5458,7 +5460,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
5458 } 5460 }
5459 5461
5460 DPRINT_ovfl(("owner [%d] pending=%ld reason=%u ovfl_pmds=0x%lx ovfl_notify=0x%lx masked=%d\n", 5462 DPRINT_ovfl(("owner [%d] pending=%ld reason=%u ovfl_pmds=0x%lx ovfl_notify=0x%lx masked=%d\n",
5461 GET_PMU_OWNER() ? GET_PMU_OWNER()->pid : -1, 5463 GET_PMU_OWNER() ? task_pid_nr(GET_PMU_OWNER()) : -1,
5462 PFM_GET_WORK_PENDING(task), 5464 PFM_GET_WORK_PENDING(task),
5463 ctx->ctx_fl_trap_reason, 5465 ctx->ctx_fl_trap_reason,
5464 ovfl_pmds, 5466 ovfl_pmds,
@@ -5483,7 +5485,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
5483sanity_check: 5485sanity_check:
5484 printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n", 5486 printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n",
5485 smp_processor_id(), 5487 smp_processor_id(),
5486 task ? task->pid : -1, 5488 task ? task_pid_nr(task) : -1,
5487 pmc0); 5489 pmc0);
5488 return; 5490 return;
5489 5491
@@ -5516,7 +5518,7 @@ stop_monitoring:
5516 * 5518 *
5517 * Overall pretty hairy stuff.... 5519 * Overall pretty hairy stuff....
5518 */ 5520 */
5519 DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task->pid: -1)); 5521 DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task_pid_nr(task): -1));
5520 pfm_clear_psr_up(); 5522 pfm_clear_psr_up();
5521 ia64_psr(regs)->up = 0; 5523 ia64_psr(regs)->up = 0;
5522 ia64_psr(regs)->sp = 1; 5524 ia64_psr(regs)->sp = 1;
@@ -5577,13 +5579,13 @@ pfm_do_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
5577 5579
5578report_spurious1: 5580report_spurious1:
5579 printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d has no PFM context\n", 5581 printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d has no PFM context\n",
5580 this_cpu, task->pid); 5582 this_cpu, task_pid_nr(task));
5581 pfm_unfreeze_pmu(); 5583 pfm_unfreeze_pmu();
5582 return -1; 5584 return -1;
5583report_spurious2: 5585report_spurious2:
5584 printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d, invalid flag\n", 5586 printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d, invalid flag\n",
5585 this_cpu, 5587 this_cpu,
5586 task->pid); 5588 task_pid_nr(task));
5587 pfm_unfreeze_pmu(); 5589 pfm_unfreeze_pmu();
5588 return -1; 5590 return -1;
5589} 5591}
@@ -5870,7 +5872,8 @@ pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs)
5870 ia64_psr(regs)->sp = 1; 5872 ia64_psr(regs)->sp = 1;
5871 5873
5872 if (GET_PMU_OWNER() == task) { 5874 if (GET_PMU_OWNER() == task) {
5873 DPRINT(("cleared ownership for [%d]\n", ctx->ctx_task->pid)); 5875 DPRINT(("cleared ownership for [%d]\n",
5876 task_pid_nr(ctx->ctx_task)));
5874 SET_PMU_OWNER(NULL, NULL); 5877 SET_PMU_OWNER(NULL, NULL);
5875 } 5878 }
5876 5879
@@ -5882,7 +5885,7 @@ pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs)
5882 task->thread.pfm_context = NULL; 5885 task->thread.pfm_context = NULL;
5883 task->thread.flags &= ~IA64_THREAD_PM_VALID; 5886 task->thread.flags &= ~IA64_THREAD_PM_VALID;
5884 5887
5885 DPRINT(("force cleanup for [%d]\n", task->pid)); 5888 DPRINT(("force cleanup for [%d]\n", task_pid_nr(task)));
5886} 5889}
5887 5890
5888 5891
@@ -6426,7 +6429,7 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
6426 6429
6427 if (PMD_IS_COUNTING(i)) { 6430 if (PMD_IS_COUNTING(i)) {
6428 DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n", 6431 DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n",
6429 task->pid, 6432 task_pid_nr(task),
6430 i, 6433 i,
6431 ctx->ctx_pmds[i].val, 6434 ctx->ctx_pmds[i].val,
6432 val & ovfl_val)); 6435 val & ovfl_val));
@@ -6448,11 +6451,11 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
6448 */ 6451 */
6449 if (pmc0 & (1UL << i)) { 6452 if (pmc0 & (1UL << i)) {
6450 val += 1 + ovfl_val; 6453 val += 1 + ovfl_val;
6451 DPRINT(("[%d] pmd[%d] overflowed\n", task->pid, i)); 6454 DPRINT(("[%d] pmd[%d] overflowed\n", task_pid_nr(task), i));
6452 } 6455 }
6453 } 6456 }
6454 6457
6455 DPRINT(("[%d] ctx_pmd[%d]=0x%lx pmd_val=0x%lx\n", task->pid, i, val, pmd_val)); 6458 DPRINT(("[%d] ctx_pmd[%d]=0x%lx pmd_val=0x%lx\n", task_pid_nr(task), i, val, pmd_val));
6456 6459
6457 if (is_self) ctx->th_pmds[i] = pmd_val; 6460 if (is_self) ctx->th_pmds[i] = pmd_val;
6458 6461
@@ -6793,14 +6796,14 @@ dump_pmu_state(const char *from)
6793 printk("CPU%d from %s() current [%d] iip=0x%lx %s\n", 6796 printk("CPU%d from %s() current [%d] iip=0x%lx %s\n",
6794 this_cpu, 6797 this_cpu,
6795 from, 6798 from,
6796 current->pid, 6799 task_pid_nr(current),
6797 regs->cr_iip, 6800 regs->cr_iip,
6798 current->comm); 6801 current->comm);
6799 6802
6800 task = GET_PMU_OWNER(); 6803 task = GET_PMU_OWNER();
6801 ctx = GET_PMU_CTX(); 6804 ctx = GET_PMU_CTX();
6802 6805
6803 printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task->pid : -1, ctx); 6806 printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task_pid_nr(task) : -1, ctx);
6804 6807
6805 psr = pfm_get_psr(); 6808 psr = pfm_get_psr();
6806 6809
@@ -6848,7 +6851,7 @@ pfm_inherit(struct task_struct *task, struct pt_regs *regs)
6848{ 6851{
6849 struct thread_struct *thread; 6852 struct thread_struct *thread;
6850 6853
6851 DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task->pid)); 6854 DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task_pid_nr(task)));
6852 6855
6853 thread = &task->thread; 6856 thread = &task->thread;
6854 6857
diff --git a/arch/ia64/kernel/perfmon_default_smpl.c b/arch/ia64/kernel/perfmon_default_smpl.c
index ff80eab83b38..a7af1cb419f9 100644
--- a/arch/ia64/kernel/perfmon_default_smpl.c
+++ b/arch/ia64/kernel/perfmon_default_smpl.c
@@ -44,11 +44,11 @@ default_validate(struct task_struct *task, unsigned int flags, int cpu, void *da
44 int ret = 0; 44 int ret = 0;
45 45
46 if (data == NULL) { 46 if (data == NULL) {
47 DPRINT(("[%d] no argument passed\n", task->pid)); 47 DPRINT(("[%d] no argument passed\n", task_pid_nr(task)));
48 return -EINVAL; 48 return -EINVAL;
49 } 49 }
50 50
51 DPRINT(("[%d] validate flags=0x%x CPU%d\n", task->pid, flags, cpu)); 51 DPRINT(("[%d] validate flags=0x%x CPU%d\n", task_pid_nr(task), flags, cpu));
52 52
53 /* 53 /*
54 * must hold at least the buffer header + one minimally sized entry 54 * must hold at least the buffer header + one minimally sized entry
@@ -88,7 +88,7 @@ default_init(struct task_struct *task, void *buf, unsigned int flags, int cpu, v
88 hdr->hdr_count = 0UL; 88 hdr->hdr_count = 0UL;
89 89
90 DPRINT(("[%d] buffer=%p buf_size=%lu hdr_size=%lu hdr_version=%u cur_offs=%lu\n", 90 DPRINT(("[%d] buffer=%p buf_size=%lu hdr_size=%lu hdr_version=%u cur_offs=%lu\n",
91 task->pid, 91 task_pid_nr(task),
92 buf, 92 buf,
93 hdr->hdr_buf_size, 93 hdr->hdr_buf_size,
94 sizeof(*hdr), 94 sizeof(*hdr),
@@ -245,7 +245,7 @@ default_restart(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, stru
245static int 245static int
246default_exit(struct task_struct *task, void *buf, struct pt_regs *regs) 246default_exit(struct task_struct *task, void *buf, struct pt_regs *regs)
247{ 247{
248 DPRINT(("[%d] exit(%p)\n", task->pid, buf)); 248 DPRINT(("[%d] exit(%p)\n", task_pid_nr(task), buf));
249 return 0; 249 return 0;
250} 250}
251 251
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index c613fc0e91cc..2418289ee5ca 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -105,7 +105,8 @@ show_regs (struct pt_regs *regs)
105 unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri; 105 unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
106 106
107 print_modules(); 107 print_modules();
108 printk("\nPid: %d, CPU %d, comm: %20s\n", current->pid, smp_processor_id(), current->comm); 108 printk("\nPid: %d, CPU %d, comm: %20s\n", task_pid_nr(current),
109 smp_processor_id(), current->comm);
109 printk("psr : %016lx ifs : %016lx ip : [<%016lx>] %s\n", 110 printk("psr : %016lx ifs : %016lx ip : [<%016lx>] %s\n",
110 regs->cr_ipsr, regs->cr_ifs, ip, print_tainted()); 111 regs->cr_ipsr, regs->cr_ifs, ip, print_tainted());
111 print_symbol("ip is at %s\n", ip); 112 print_symbol("ip is at %s\n", ip);
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index c5cfcfa4c87c..cbf67f1aa291 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -208,6 +208,48 @@ static int __init register_memory(void)
208 208
209__initcall(register_memory); 209__initcall(register_memory);
210 210
211
212#ifdef CONFIG_KEXEC
213static void __init setup_crashkernel(unsigned long total, int *n)
214{
215 unsigned long long base = 0, size = 0;
216 int ret;
217
218 ret = parse_crashkernel(boot_command_line, total,
219 &size, &base);
220 if (ret == 0 && size > 0) {
221 if (!base) {
222 sort_regions(rsvd_region, *n);
223 base = kdump_find_rsvd_region(size,
224 rsvd_region, *n);
225 }
226 if (base != ~0UL) {
227 printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
228 "for crashkernel (System RAM: %ldMB)\n",
229 (unsigned long)(size >> 20),
230 (unsigned long)(base >> 20),
231 (unsigned long)(total >> 20));
232 rsvd_region[*n].start =
233 (unsigned long)__va(base);
234 rsvd_region[*n].end =
235 (unsigned long)__va(base + size);
236 (*n)++;
237 crashk_res.start = base;
238 crashk_res.end = base + size - 1;
239 }
240 }
241 efi_memmap_res.start = ia64_boot_param->efi_memmap;
242 efi_memmap_res.end = efi_memmap_res.start +
243 ia64_boot_param->efi_memmap_size;
244 boot_param_res.start = __pa(ia64_boot_param);
245 boot_param_res.end = boot_param_res.start +
246 sizeof(*ia64_boot_param);
247}
248#else
249static inline void __init setup_crashkernel(unsigned long total, int *n)
250{}
251#endif
252
211/** 253/**
212 * reserve_memory - setup reserved memory areas 254 * reserve_memory - setup reserved memory areas
213 * 255 *
@@ -219,6 +261,7 @@ void __init
219reserve_memory (void) 261reserve_memory (void)
220{ 262{
221 int n = 0; 263 int n = 0;
264 unsigned long total_memory;
222 265
223 /* 266 /*
224 * none of the entries in this table overlap 267 * none of the entries in this table overlap
@@ -254,50 +297,11 @@ reserve_memory (void)
254 n++; 297 n++;
255#endif 298#endif
256 299
257 efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end); 300 total_memory = efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
258 n++; 301 n++;
259 302
260#ifdef CONFIG_KEXEC 303 setup_crashkernel(total_memory, &n);
261 /* crashkernel=size@offset specifies the size to reserve for a crash 304
262 * kernel. If offset is 0, then it is determined automatically.
263 * By reserving this memory we guarantee that linux never set's it
264 * up as a DMA target.Useful for holding code to do something
265 * appropriate after a kernel panic.
266 */
267 {
268 char *from = strstr(boot_command_line, "crashkernel=");
269 unsigned long base, size;
270 if (from) {
271 size = memparse(from + 12, &from);
272 if (*from == '@')
273 base = memparse(from+1, &from);
274 else
275 base = 0;
276 if (size) {
277 if (!base) {
278 sort_regions(rsvd_region, n);
279 base = kdump_find_rsvd_region(size,
280 rsvd_region, n);
281 }
282 if (base != ~0UL) {
283 rsvd_region[n].start =
284 (unsigned long)__va(base);
285 rsvd_region[n].end =
286 (unsigned long)__va(base + size);
287 n++;
288 crashk_res.start = base;
289 crashk_res.end = base + size - 1;
290 }
291 }
292 }
293 efi_memmap_res.start = ia64_boot_param->efi_memmap;
294 efi_memmap_res.end = efi_memmap_res.start +
295 ia64_boot_param->efi_memmap_size;
296 boot_param_res.start = __pa(ia64_boot_param);
297 boot_param_res.end = boot_param_res.start +
298 sizeof(*ia64_boot_param);
299 }
300#endif
301 /* end of memory marker */ 305 /* end of memory marker */
302 rsvd_region[n].start = ~0UL; 306 rsvd_region[n].start = ~0UL;
303 rsvd_region[n].end = ~0UL; 307 rsvd_region[n].end = ~0UL;
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index aeec8184e862..cdb64cc4d9c8 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -227,7 +227,7 @@ ia64_rt_sigreturn (struct sigscratch *scr)
227 si.si_signo = SIGSEGV; 227 si.si_signo = SIGSEGV;
228 si.si_errno = 0; 228 si.si_errno = 0;
229 si.si_code = SI_KERNEL; 229 si.si_code = SI_KERNEL;
230 si.si_pid = current->pid; 230 si.si_pid = task_pid_vnr(current);
231 si.si_uid = current->uid; 231 si.si_uid = current->uid;
232 si.si_addr = sc; 232 si.si_addr = sc;
233 force_sig_info(SIGSEGV, &si, current); 233 force_sig_info(SIGSEGV, &si, current);
@@ -332,7 +332,7 @@ force_sigsegv_info (int sig, void __user *addr)
332 si.si_signo = SIGSEGV; 332 si.si_signo = SIGSEGV;
333 si.si_errno = 0; 333 si.si_errno = 0;
334 si.si_code = SI_KERNEL; 334 si.si_code = SI_KERNEL;
335 si.si_pid = current->pid; 335 si.si_pid = task_pid_vnr(current);
336 si.si_uid = current->uid; 336 si.si_uid = current->uid;
337 si.si_addr = addr; 337 si.si_addr = addr;
338 force_sig_info(SIGSEGV, &si, current); 338 force_sig_info(SIGSEGV, &si, current);
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index 3aeaf15e468b..78d65cb947d2 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -61,7 +61,7 @@ die (const char *str, struct pt_regs *regs, long err)
61 61
62 if (++die.lock_owner_depth < 3) { 62 if (++die.lock_owner_depth < 3) {
63 printk("%s[%d]: %s %ld [%d]\n", 63 printk("%s[%d]: %s %ld [%d]\n",
64 current->comm, current->pid, str, err, ++die_counter); 64 current->comm, task_pid_nr(current), str, err, ++die_counter);
65 (void) notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV); 65 (void) notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
66 show_regs(regs); 66 show_regs(regs);
67 } else 67 } else
@@ -315,7 +315,7 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
315 last.time = current_jiffies + 5 * HZ; 315 last.time = current_jiffies + 5 * HZ;
316 printk(KERN_WARNING 316 printk(KERN_WARNING
317 "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n", 317 "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n",
318 current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri, isr); 318 current->comm, task_pid_nr(current), regs->cr_iip + ia64_psr(regs)->ri, isr);
319 } 319 }
320 } 320 }
321 } 321 }
@@ -453,7 +453,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
453 if (code == 8) { 453 if (code == 8) {
454# ifdef CONFIG_IA64_PRINT_HAZARDS 454# ifdef CONFIG_IA64_PRINT_HAZARDS
455 printk("%s[%d]: possible hazard @ ip=%016lx (pr = %016lx)\n", 455 printk("%s[%d]: possible hazard @ ip=%016lx (pr = %016lx)\n",
456 current->comm, current->pid, 456 current->comm, task_pid_nr(current),
457 regs.cr_iip + ia64_psr(&regs)->ri, regs.pr); 457 regs.cr_iip + ia64_psr(&regs)->ri, regs.pr);
458# endif 458# endif
459 return; 459 return;
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
index fe6aa5a9f8fa..2173de9fe917 100644
--- a/arch/ia64/kernel/unaligned.c
+++ b/arch/ia64/kernel/unaligned.c
@@ -1340,7 +1340,8 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
1340 size_t len; 1340 size_t len;
1341 1341
1342 len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, " 1342 len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, "
1343 "ip=0x%016lx\n\r", current->comm, current->pid, 1343 "ip=0x%016lx\n\r", current->comm,
1344 task_pid_nr(current),
1344 ifa, regs->cr_iip + ipsr->ri); 1345 ifa, regs->cr_iip + ipsr->ri);
1345 /* 1346 /*
1346 * Don't call tty_write_message() if we're in the kernel; we might 1347 * Don't call tty_write_message() if we're in the kernel; we might
@@ -1363,7 +1364,7 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
1363 "administrator\n" 1364 "administrator\n"
1364 "echo 0 > /proc/sys/kernel/ignore-" 1365 "echo 0 > /proc/sys/kernel/ignore-"
1365 "unaligned-usertrap to re-enable\n", 1366 "unaligned-usertrap to re-enable\n",
1366 current->comm, current->pid); 1367 current->comm, task_pid_nr(current));
1367 } 1368 }
1368 } 1369 }
1369 } else { 1370 } else {
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 32f26253c4e8..7571076a16a1 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -274,7 +274,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
274 274
275 out_of_memory: 275 out_of_memory:
276 up_read(&mm->mmap_sem); 276 up_read(&mm->mmap_sem);
277 if (is_init(current)) { 277 if (is_global_init(current)) {
278 yield(); 278 yield();
279 down_read(&mm->mmap_sem); 279 down_read(&mm->mmap_sem);
280 goto survive; 280 goto survive;
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 3e10152abbf0..c6c19bf11bec 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -127,8 +127,8 @@ ia64_init_addr_space (void)
127 vma->vm_mm = current->mm; 127 vma->vm_mm = current->mm;
128 vma->vm_start = current->thread.rbs_bot & PAGE_MASK; 128 vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
129 vma->vm_end = vma->vm_start + PAGE_SIZE; 129 vma->vm_end = vma->vm_start + PAGE_SIZE;
130 vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7];
131 vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT; 130 vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT;
131 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
132 down_write(&current->mm->mmap_sem); 132 down_write(&current->mm->mmap_sem);
133 if (insert_vm_struct(current->mm, vma)) { 133 if (insert_vm_struct(current->mm, vma)) {
134 up_write(&current->mm->mmap_sem); 134 up_write(&current->mm->mmap_sem);
diff --git a/arch/ia64/oprofile/Kconfig b/arch/ia64/oprofile/Kconfig
deleted file mode 100644
index 97271ab484dc..000000000000
--- a/arch/ia64/oprofile/Kconfig
+++ /dev/null
@@ -1,20 +0,0 @@
1config PROFILING
2 bool "Profiling support (EXPERIMENTAL)"
3 help
4 Say Y here to enable the extended profiling support mechanisms used
5 by profilers such as OProfile.
6
7config OPROFILE
8 tristate "OProfile system profiling (EXPERIMENTAL)"
9 depends on PROFILING
10 help
11 OProfile is a profiling system capable of profiling the
12 whole system, include the kernel, kernel modules, libraries,
13 and applications.
14
15 Due to firmware bugs, you may need to use the "nohalt" boot
16 option if you're using OProfile with the hardware performance
17 counters.
18
19 If unsure, say N.
20
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index bd5fe76401f1..ab9a264cb194 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -426,7 +426,7 @@ source "drivers/Kconfig"
426 426
427source "fs/Kconfig" 427source "fs/Kconfig"
428 428
429source "arch/m32r/oprofile/Kconfig" 429source "kernel/Kconfig.instrumentation"
430 430
431source "arch/m32r/Kconfig.debug" 431source "arch/m32r/Kconfig.debug"
432 432
diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c
index 97e0b1c0830e..89ba4a0b5d51 100644
--- a/arch/m32r/kernel/traps.c
+++ b/arch/m32r/kernel/traps.c
@@ -196,7 +196,7 @@ static void show_registers(struct pt_regs *regs)
196 printk("SPI: %08lx\n", sp); 196 printk("SPI: %08lx\n", sp);
197 } 197 }
198 printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)", 198 printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)",
199 current->comm, current->pid, 0xffff & i, 4096+(unsigned long)current); 199 current->comm, task_pid_nr(current), 0xffff & i, 4096+(unsigned long)current);
200 200
201 /* 201 /*
202 * When in-kernel, we also print out the stack and code at the 202 * When in-kernel, we also print out the stack and code at the
diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c
index 70a766aad3e0..4a71df4c1b30 100644
--- a/arch/m32r/mm/fault.c
+++ b/arch/m32r/mm/fault.c
@@ -271,7 +271,7 @@ no_context:
271 */ 271 */
272out_of_memory: 272out_of_memory:
273 up_read(&mm->mmap_sem); 273 up_read(&mm->mmap_sem);
274 if (is_init(tsk)) { 274 if (is_global_init(tsk)) {
275 yield(); 275 yield();
276 down_read(&mm->mmap_sem); 276 down_read(&mm->mmap_sem);
277 goto survive; 277 goto survive;
diff --git a/arch/m32r/oprofile/Kconfig b/arch/m32r/oprofile/Kconfig
deleted file mode 100644
index 19d37730b664..000000000000
--- a/arch/m32r/oprofile/Kconfig
+++ /dev/null
@@ -1,23 +0,0 @@
1
2menu "Profiling support"
3 depends on EXPERIMENTAL
4
5config PROFILING
6 bool "Profiling support (EXPERIMENTAL)"
7 help
8 Say Y here to enable the extended profiling support mechanisms used
9 by profilers such as OProfile.
10
11
12config OPROFILE
13 tristate "OProfile system profiling (EXPERIMENTAL)"
14 depends on PROFILING
15 help
16 OProfile is a profiling system capable of profiling the
17 whole system, include the kernel, kernel modules, libraries,
18 and applications.
19
20 If unsure, say N.
21
22endmenu
23
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 20a9c08e59c3..01dee84f840a 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -683,6 +683,8 @@ endmenu
683 683
684source "fs/Kconfig" 684source "fs/Kconfig"
685 685
686source "kernel/Kconfig.instrumentation"
687
686source "arch/m68k/Kconfig.debug" 688source "arch/m68k/Kconfig.debug"
687 689
688source "security/Kconfig" 690source "security/Kconfig"
diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c
index 4e2752a0e89b..97f556fa4932 100644
--- a/arch/m68k/kernel/traps.c
+++ b/arch/m68k/kernel/traps.c
@@ -900,7 +900,7 @@ void show_registers(struct pt_regs *regs)
900 regs->d4, regs->d5, regs->a0, regs->a1); 900 regs->d4, regs->d5, regs->a0, regs->a1);
901 901
902 printk("Process %s (pid: %d, task=%p)\n", 902 printk("Process %s (pid: %d, task=%p)\n",
903 current->comm, current->pid, current); 903 current->comm, task_pid_nr(current), current);
904 addr = (unsigned long)&fp->un; 904 addr = (unsigned long)&fp->un;
905 printk("Frame format=%X ", regs->format); 905 printk("Frame format=%X ", regs->format);
906 switch (regs->format) { 906 switch (regs->format) {
@@ -1038,7 +1038,7 @@ void bad_super_trap (struct frame *fp)
1038 fp->un.fmtb.daddr, space_names[ssw & DFC], 1038 fp->un.fmtb.daddr, space_names[ssw & DFC],
1039 fp->ptregs.pc); 1039 fp->ptregs.pc);
1040 } 1040 }
1041 printk ("Current process id is %d\n", current->pid); 1041 printk ("Current process id is %d\n", task_pid_nr(current));
1042 die_if_kernel("BAD KERNEL TRAP", &fp->ptregs, 0); 1042 die_if_kernel("BAD KERNEL TRAP", &fp->ptregs, 0);
1043} 1043}
1044 1044
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index eaa618681159..f493f03231d5 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -180,7 +180,7 @@ good_area:
180 */ 180 */
181out_of_memory: 181out_of_memory:
182 up_read(&mm->mmap_sem); 182 up_read(&mm->mmap_sem);
183 if (is_init(current)) { 183 if (is_global_init(current)) {
184 yield(); 184 yield();
185 down_read(&mm->mmap_sem); 185 down_read(&mm->mmap_sem);
186 goto survive; 186 goto survive;
diff --git a/arch/m68knommu/Kconfig b/arch/m68knommu/Kconfig
index 185906b54cb0..f52c627bdadd 100644
--- a/arch/m68knommu/Kconfig
+++ b/arch/m68knommu/Kconfig
@@ -696,6 +696,8 @@ source "drivers/Kconfig"
696 696
697source "fs/Kconfig" 697source "fs/Kconfig"
698 698
699source "kernel/Kconfig.instrumentation"
700
699source "arch/m68knommu/Kconfig.debug" 701source "arch/m68knommu/Kconfig.debug"
700 702
701source "security/Kconfig" 703source "security/Kconfig"
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index cb027580cd1d..4dc142d394a3 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2005,7 +2005,7 @@ source "drivers/Kconfig"
2005 2005
2006source "fs/Kconfig" 2006source "fs/Kconfig"
2007 2007
2008source "arch/mips/oprofile/Kconfig" 2008source "kernel/Kconfig.instrumentation"
2009 2009
2010source "arch/mips/Kconfig.debug" 2010source "arch/mips/Kconfig.debug"
2011 2011
diff --git a/arch/mips/au1000/pb1200/irqmap.c b/arch/mips/au1000/pb1200/irqmap.c
index 5f48b0603796..bdf00e2a35e4 100644
--- a/arch/mips/au1000/pb1200/irqmap.c
+++ b/arch/mips/au1000/pb1200/irqmap.c
@@ -36,8 +36,8 @@
36#include <linux/slab.h> 36#include <linux/slab.h>
37#include <linux/random.h> 37#include <linux/random.h>
38#include <linux/delay.h> 38#include <linux/delay.h>
39#include <linux/bitops.h>
39 40
40#include <asm/bitops.h>
41#include <asm/bootinfo.h> 41#include <asm/bootinfo.h>
42#include <asm/io.h> 42#include <asm/io.h>
43#include <asm/mipsregs.h> 43#include <asm/mipsregs.h>
diff --git a/arch/mips/basler/excite/excite_irq.c b/arch/mips/basler/excite/excite_irq.c
index 1ecab6350421..4903e067916b 100644
--- a/arch/mips/basler/excite/excite_irq.c
+++ b/arch/mips/basler/excite/excite_irq.c
@@ -29,7 +29,7 @@
29#include <linux/timex.h> 29#include <linux/timex.h>
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/random.h> 31#include <linux/random.h>
32#include <asm/bitops.h> 32#include <linux/bitops.h>
33#include <asm/bootinfo.h> 33#include <asm/bootinfo.h>
34#include <asm/io.h> 34#include <asm/io.h>
35#include <asm/irq.h> 35#include <asm/irq.h>
diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig
index 49bcc58929ba..892d4c38fd0d 100644
--- a/arch/mips/configs/ip27_defconfig
+++ b/arch/mips/configs/ip27_defconfig
@@ -175,6 +175,7 @@ CONFIG_POSIX_MQUEUE=y
175CONFIG_IKCONFIG=y 175CONFIG_IKCONFIG=y
176CONFIG_IKCONFIG_PROC=y 176CONFIG_IKCONFIG_PROC=y
177CONFIG_LOG_BUF_SHIFT=15 177CONFIG_LOG_BUF_SHIFT=15
178CONFIG_CGROUPS=y
178CONFIG_CPUSETS=y 179CONFIG_CPUSETS=y
179CONFIG_SYSFS_DEPRECATED=y 180CONFIG_SYSFS_DEPRECATED=y
180CONFIG_RELAY=y 181CONFIG_RELAY=y
diff --git a/arch/mips/configs/sb1250-swarm_defconfig b/arch/mips/configs/sb1250-swarm_defconfig
index 3ed991ae0ebe..49dfcef2518c 100644
--- a/arch/mips/configs/sb1250-swarm_defconfig
+++ b/arch/mips/configs/sb1250-swarm_defconfig
@@ -196,6 +196,7 @@ CONFIG_SYSVIPC_SYSCTL=y
196# CONFIG_UTS_NS is not set 196# CONFIG_UTS_NS is not set
197# CONFIG_AUDIT is not set 197# CONFIG_AUDIT is not set
198# CONFIG_IKCONFIG is not set 198# CONFIG_IKCONFIG is not set
199CONFIG_CGROUPS=y
199CONFIG_CPUSETS=y 200CONFIG_CPUSETS=y
200CONFIG_SYSFS_DEPRECATED=y 201CONFIG_SYSFS_DEPRECATED=y
201CONFIG_RELAY=y 202CONFIG_RELAY=y
diff --git a/arch/mips/kernel/irixelf.c b/arch/mips/kernel/irixelf.c
index b997af713eb3..7852c7cdf29e 100644
--- a/arch/mips/kernel/irixelf.c
+++ b/arch/mips/kernel/irixelf.c
@@ -1172,8 +1172,8 @@ static int irix_core_dump(long signr, struct pt_regs *regs, struct file *file, u
1172 prstatus.pr_sighold = current->blocked.sig[0]; 1172 prstatus.pr_sighold = current->blocked.sig[0];
1173 psinfo.pr_pid = prstatus.pr_pid = current->pid; 1173 psinfo.pr_pid = prstatus.pr_pid = current->pid;
1174 psinfo.pr_ppid = prstatus.pr_ppid = current->parent->pid; 1174 psinfo.pr_ppid = prstatus.pr_ppid = current->parent->pid;
1175 psinfo.pr_pgrp = prstatus.pr_pgrp = process_group(current); 1175 psinfo.pr_pgrp = prstatus.pr_pgrp = task_pgrp_nr(current);
1176 psinfo.pr_sid = prstatus.pr_sid = process_session(current); 1176 psinfo.pr_sid = prstatus.pr_sid = task_session_nr(current);
1177 if (current->pid == current->tgid) { 1177 if (current->pid == current->tgid) {
1178 /* 1178 /*
1179 * This is the record for the group leader. Add in the 1179 * This is the record for the group leader. Add in the
diff --git a/arch/mips/kernel/irixsig.c b/arch/mips/kernel/irixsig.c
index 85c2e389edd6..a0a91056fda7 100644
--- a/arch/mips/kernel/irixsig.c
+++ b/arch/mips/kernel/irixsig.c
@@ -609,7 +609,7 @@ repeat:
609 p = list_entry(_p, struct task_struct, sibling); 609 p = list_entry(_p, struct task_struct, sibling);
610 if ((type == IRIX_P_PID) && p->pid != pid) 610 if ((type == IRIX_P_PID) && p->pid != pid)
611 continue; 611 continue;
612 if ((type == IRIX_P_PGID) && process_group(p) != pid) 612 if ((type == IRIX_P_PGID) && task_pgrp_nr(p) != pid)
613 continue; 613 continue;
614 if ((p->exit_signal != SIGCHLD)) 614 if ((p->exit_signal != SIGCHLD))
615 continue; 615 continue;
diff --git a/arch/mips/kernel/sysirix.c b/arch/mips/kernel/sysirix.c
index ee7790d9debe..4c477c7ff74a 100644
--- a/arch/mips/kernel/sysirix.c
+++ b/arch/mips/kernel/sysirix.c
@@ -763,11 +763,11 @@ asmlinkage int irix_setpgrp(int flags)
763 printk("[%s:%d] setpgrp(%d) ", current->comm, current->pid, flags); 763 printk("[%s:%d] setpgrp(%d) ", current->comm, current->pid, flags);
764#endif 764#endif
765 if(!flags) 765 if(!flags)
766 error = process_group(current); 766 error = task_pgrp_nr(current);
767 else 767 else
768 error = sys_setsid(); 768 error = sys_setsid();
769#ifdef DEBUG_PROCGRPS 769#ifdef DEBUG_PROCGRPS
770 printk("returning %d\n", process_group(current)); 770 printk("returning %d\n", task_pgrp_nr(current));
771#endif 771#endif
772 772
773 return error; 773 return error;
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 7b78d137259f..fa500787152d 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -314,7 +314,7 @@ void show_registers(const struct pt_regs *regs)
314 __show_regs(regs); 314 __show_regs(regs);
315 print_modules(); 315 print_modules();
316 printk("Process %s (pid: %d, threadinfo=%p, task=%p)\n", 316 printk("Process %s (pid: %d, threadinfo=%p, task=%p)\n",
317 current->comm, current->pid, current_thread_info(), current); 317 current->comm, task_pid_nr(current), current_thread_info(), current);
318 show_stacktrace(current, regs); 318 show_stacktrace(current, regs);
319 show_code((unsigned int __user *) regs->cp0_epc); 319 show_code((unsigned int __user *) regs->cp0_epc);
320 printk("\n"); 320 printk("\n");
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 5699c7713e2f..fa636fc6b7b9 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -173,7 +173,7 @@ no_context:
173 */ 173 */
174out_of_memory: 174out_of_memory:
175 up_read(&mm->mmap_sem); 175 up_read(&mm->mmap_sem);
176 if (is_init(tsk)) { 176 if (is_global_init(tsk)) {
177 yield(); 177 yield();
178 down_read(&mm->mmap_sem); 178 down_read(&mm->mmap_sem);
179 goto survive; 179 goto survive;
diff --git a/arch/mips/oprofile/Kconfig b/arch/mips/oprofile/Kconfig
deleted file mode 100644
index fb6f235348b0..000000000000
--- a/arch/mips/oprofile/Kconfig
+++ /dev/null
@@ -1,23 +0,0 @@
1
2menu "Profiling support"
3 depends on EXPERIMENTAL
4
5config PROFILING
6 bool "Profiling support (EXPERIMENTAL)"
7 help
8 Say Y here to enable the extended profiling support mechanisms used
9 by profilers such as OProfile.
10
11
12config OPROFILE
13 tristate "OProfile system profiling (EXPERIMENTAL)"
14 depends on PROFILING && !MIPS_MT_SMTC && EXPERIMENTAL
15 help
16 OProfile is a profiling system capable of profiling the
17 whole system, include the kernel, kernel modules, libraries,
18 and applications.
19
20 If unsure, say N.
21
22endmenu
23
diff --git a/arch/mips/tx4938/common/setup.c b/arch/mips/tx4938/common/setup.c
index be3b88dd4c1f..3ba4101d141e 100644
--- a/arch/mips/tx4938/common/setup.c
+++ b/arch/mips/tx4938/common/setup.c
@@ -24,7 +24,7 @@
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/random.h> 25#include <linux/random.h>
26#include <linux/irq.h> 26#include <linux/irq.h>
27#include <asm/bitops.h> 27#include <linux/bitops.h>
28#include <asm/bootinfo.h> 28#include <asm/bootinfo.h>
29#include <asm/io.h> 29#include <asm/io.h>
30#include <asm/irq.h> 30#include <asm/irq.h>
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 3d73545e8c48..b8ef1787a191 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -267,7 +267,7 @@ source "drivers/Kconfig"
267 267
268source "fs/Kconfig" 268source "fs/Kconfig"
269 269
270source "arch/parisc/oprofile/Kconfig" 270source "kernel/Kconfig.instrumentation"
271 271
272source "arch/parisc/Kconfig.debug" 272source "arch/parisc/Kconfig.debug"
273 273
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index fb35ebc0c4da..2ce3806f02e1 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -181,7 +181,7 @@ give_sigsegv:
181 si.si_signo = SIGSEGV; 181 si.si_signo = SIGSEGV;
182 si.si_errno = 0; 182 si.si_errno = 0;
183 si.si_code = SI_KERNEL; 183 si.si_code = SI_KERNEL;
184 si.si_pid = current->pid; 184 si.si_pid = task_pid_vnr(current);
185 si.si_uid = current->uid; 185 si.si_uid = current->uid;
186 si.si_addr = &frame->uc; 186 si.si_addr = &frame->uc;
187 force_sig_info(SIGSEGV, &si, current); 187 force_sig_info(SIGSEGV, &si, current);
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index bbf029a184ac..99fd56939afa 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -219,7 +219,7 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err)
219 return; /* STFU */ 219 return; /* STFU */
220 220
221 printk(KERN_CRIT "%s (pid %d): %s (code %ld) at " RFMT "\n", 221 printk(KERN_CRIT "%s (pid %d): %s (code %ld) at " RFMT "\n",
222 current->comm, current->pid, str, err, regs->iaoq[0]); 222 current->comm, task_pid_nr(current), str, err, regs->iaoq[0]);
223#ifdef PRINT_USER_FAULTS 223#ifdef PRINT_USER_FAULTS
224 /* XXX for debugging only */ 224 /* XXX for debugging only */
225 show_regs(regs); 225 show_regs(regs);
@@ -252,7 +252,7 @@ KERN_CRIT " || ||\n");
252 252
253 if (err) 253 if (err)
254 printk(KERN_CRIT "%s (pid %d): %s (code %ld)\n", 254 printk(KERN_CRIT "%s (pid %d): %s (code %ld)\n",
255 current->comm, current->pid, str, err); 255 current->comm, task_pid_nr(current), str, err);
256 256
257 /* Wot's wrong wif bein' racy? */ 257 /* Wot's wrong wif bein' racy? */
258 if (current->thread.flags & PARISC_KERNEL_DEATH) { 258 if (current->thread.flags & PARISC_KERNEL_DEATH) {
@@ -317,7 +317,7 @@ static void handle_break(struct pt_regs *regs)
317 if (unlikely(iir != GDB_BREAK_INSN)) { 317 if (unlikely(iir != GDB_BREAK_INSN)) {
318 printk(KERN_DEBUG "break %d,%d: pid=%d command='%s'\n", 318 printk(KERN_DEBUG "break %d,%d: pid=%d command='%s'\n",
319 iir & 31, (iir>>13) & ((1<<13)-1), 319 iir & 31, (iir>>13) & ((1<<13)-1),
320 current->pid, current->comm); 320 task_pid_nr(current), current->comm);
321 show_regs(regs); 321 show_regs(regs);
322 } 322 }
323#endif 323#endif
@@ -747,7 +747,7 @@ void handle_interruption(int code, struct pt_regs *regs)
747 if (user_mode(regs)) { 747 if (user_mode(regs)) {
748#ifdef PRINT_USER_FAULTS 748#ifdef PRINT_USER_FAULTS
749 printk(KERN_DEBUG "\nhandle_interruption() pid=%d command='%s'\n", 749 printk(KERN_DEBUG "\nhandle_interruption() pid=%d command='%s'\n",
750 current->pid, current->comm); 750 task_pid_nr(current), current->comm);
751 show_regs(regs); 751 show_regs(regs);
752#endif 752#endif
753 /* SIGBUS, for lack of a better one. */ 753 /* SIGBUS, for lack of a better one. */
@@ -772,7 +772,7 @@ void handle_interruption(int code, struct pt_regs *regs)
772 else 772 else
773 printk(KERN_DEBUG "User Fault (long pointer) (fault %d) ", 773 printk(KERN_DEBUG "User Fault (long pointer) (fault %d) ",
774 code); 774 code);
775 printk("pid=%d command='%s'\n", current->pid, current->comm); 775 printk("pid=%d command='%s'\n", task_pid_nr(current), current->comm);
776 show_regs(regs); 776 show_regs(regs);
777#endif 777#endif
778 si.si_signo = SIGSEGV; 778 si.si_signo = SIGSEGV;
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index 347bb922e6d0..aebf3c168871 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -469,7 +469,7 @@ void handle_unaligned(struct pt_regs *regs)
469 && ++unaligned_count < 5) { 469 && ++unaligned_count < 5) {
470 char buf[256]; 470 char buf[256];
471 sprintf(buf, "%s(%d): unaligned access to 0x" RFMT " at ip=0x" RFMT "\n", 471 sprintf(buf, "%s(%d): unaligned access to 0x" RFMT " at ip=0x" RFMT "\n",
472 current->comm, current->pid, regs->ior, regs->iaoq[0]); 472 current->comm, task_pid_nr(current), regs->ior, regs->iaoq[0]);
473 printk(KERN_WARNING "%s", buf); 473 printk(KERN_WARNING "%s", buf);
474#ifdef DEBUG_UNALIGNED 474#ifdef DEBUG_UNALIGNED
475 show_regs(regs); 475 show_regs(regs);
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 1c091b415cd9..b2e3e9a8cece 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -211,7 +211,7 @@ bad_area:
211#ifdef PRINT_USER_FAULTS 211#ifdef PRINT_USER_FAULTS
212 printk(KERN_DEBUG "\n"); 212 printk(KERN_DEBUG "\n");
213 printk(KERN_DEBUG "do_page_fault() pid=%d command='%s' type=%lu address=0x%08lx\n", 213 printk(KERN_DEBUG "do_page_fault() pid=%d command='%s' type=%lu address=0x%08lx\n",
214 tsk->pid, tsk->comm, code, address); 214 task_pid_nr(tsk), tsk->comm, code, address);
215 if (vma) { 215 if (vma) {
216 printk(KERN_DEBUG "vm_start = 0x%08lx, vm_end = 0x%08lx\n", 216 printk(KERN_DEBUG "vm_start = 0x%08lx, vm_end = 0x%08lx\n",
217 vma->vm_start, vma->vm_end); 217 vma->vm_start, vma->vm_end);
diff --git a/arch/parisc/oprofile/Kconfig b/arch/parisc/oprofile/Kconfig
deleted file mode 100644
index 5ade19801b97..000000000000
--- a/arch/parisc/oprofile/Kconfig
+++ /dev/null
@@ -1,23 +0,0 @@
1
2menu "Profiling support"
3 depends on EXPERIMENTAL
4
5config PROFILING
6 bool "Profiling support (EXPERIMENTAL)"
7 help
8 Say Y here to enable the extended profiling support mechanisms used
9 by profilers such as OProfile.
10
11
12config OPROFILE
13 tristate "OProfile system profiling (EXPERIMENTAL)"
14 depends on PROFILING
15 help
16 OProfile is a profiling system capable of profiling the
17 whole system, include the kernel, kernel modules, libraries,
18 and applications.
19
20 If unsure, say N.
21
22endmenu
23
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 3763f681ce4c..18f397ca05ef 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -669,20 +669,7 @@ source "arch/powerpc/sysdev/qe_lib/Kconfig"
669 669
670source "lib/Kconfig" 670source "lib/Kconfig"
671 671
672menu "Instrumentation Support" 672source "kernel/Kconfig.instrumentation"
673
674source "arch/powerpc/oprofile/Kconfig"
675
676config KPROBES
677 bool "Kprobes"
678 depends on !BOOKE && !4xx && KALLSYMS && MODULES
679 help
680 Kprobes allows you to trap at almost any kernel address and
681 execute a callback function. register_kprobe() establishes
682 a probepoint and specifies the callback. Kprobes is useful
683 for kernel debugging, non-intrusive instrumentation and testing.
684 If in doubt, say "N".
685endmenu
686 673
687source "arch/powerpc/Kconfig.debug" 674source "arch/powerpc/Kconfig.debug"
688 675
diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig
index 8b47c846421c..dcd7c02727c2 100644
--- a/arch/powerpc/configs/cell_defconfig
+++ b/arch/powerpc/configs/cell_defconfig
@@ -68,6 +68,7 @@ CONFIG_SYSVIPC_SYSCTL=y
68CONFIG_IKCONFIG=y 68CONFIG_IKCONFIG=y
69CONFIG_IKCONFIG_PROC=y 69CONFIG_IKCONFIG_PROC=y
70CONFIG_LOG_BUF_SHIFT=15 70CONFIG_LOG_BUF_SHIFT=15
71CONFIG_CGROUPS=y
71CONFIG_CPUSETS=y 72CONFIG_CPUSETS=y
72CONFIG_SYSFS_DEPRECATED=y 73CONFIG_SYSFS_DEPRECATED=y
73# CONFIG_RELAY is not set 74# CONFIG_RELAY is not set
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index bb8d4e46f0c5..05582af50c5b 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -71,6 +71,7 @@ CONFIG_TASK_DELAY_ACCT=y
71CONFIG_IKCONFIG=y 71CONFIG_IKCONFIG=y
72CONFIG_IKCONFIG_PROC=y 72CONFIG_IKCONFIG_PROC=y
73CONFIG_LOG_BUF_SHIFT=17 73CONFIG_LOG_BUF_SHIFT=17
74CONFIG_CGROUPS=y
74CONFIG_CPUSETS=y 75CONFIG_CPUSETS=y
75CONFIG_SYSFS_DEPRECATED=y 76CONFIG_SYSFS_DEPRECATED=y
76CONFIG_RELAY=y 77CONFIG_RELAY=y
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index c09eb8cfbe71..62a38406b62f 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -71,6 +71,7 @@ CONFIG_AUDITSYSCALL=y
71CONFIG_IKCONFIG=y 71CONFIG_IKCONFIG=y
72CONFIG_IKCONFIG_PROC=y 72CONFIG_IKCONFIG_PROC=y
73CONFIG_LOG_BUF_SHIFT=17 73CONFIG_LOG_BUF_SHIFT=17
74CONFIG_CGROUPS=y
74CONFIG_CPUSETS=y 75CONFIG_CPUSETS=y
75CONFIG_SYSFS_DEPRECATED=y 76CONFIG_SYSFS_DEPRECATED=y
76# CONFIG_RELAY is not set 77# CONFIG_RELAY is not set
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index e60a0c544d63..c0c8e8c3ced9 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -61,45 +61,39 @@ NORET_TYPE void machine_kexec(struct kimage *image)
61 for(;;); 61 for(;;);
62} 62}
63 63
64static int __init early_parse_crashk(char *p)
65{
66 unsigned long size;
67
68 if (!p)
69 return 1;
70
71 size = memparse(p, &p);
72
73 if (*p == '@')
74 crashk_res.start = memparse(p + 1, &p);
75 else
76 crashk_res.start = KDUMP_KERNELBASE;
77
78 crashk_res.end = crashk_res.start + size - 1;
79
80 return 0;
81}
82early_param("crashkernel", early_parse_crashk);
83
84void __init reserve_crashkernel(void) 64void __init reserve_crashkernel(void)
85{ 65{
86 unsigned long size; 66 unsigned long long crash_size, crash_base;
67 int ret;
68
69 /* this is necessary because of lmb_phys_mem_size() */
70 lmb_analyze();
71
72 /* use common parsing */
73 ret = parse_crashkernel(boot_command_line, lmb_phys_mem_size(),
74 &crash_size, &crash_base);
75 if (ret == 0 && crash_size > 0) {
76 if (crash_base == 0)
77 crash_base = KDUMP_KERNELBASE;
78 crashk_res.start = crash_base;
79 } else {
80 /* handle the device tree */
81 crash_size = crashk_res.end - crashk_res.start + 1;
82 }
87 83
88 if (crashk_res.start == 0) 84 if (crash_size == 0)
89 return; 85 return;
90 86
91 /* We might have got these values via the command line or the 87 /* We might have got these values via the command line or the
92 * device tree, either way sanitise them now. */ 88 * device tree, either way sanitise them now. */
93 89
94 size = crashk_res.end - crashk_res.start + 1;
95
96 if (crashk_res.start != KDUMP_KERNELBASE) 90 if (crashk_res.start != KDUMP_KERNELBASE)
97 printk("Crash kernel location must be 0x%x\n", 91 printk("Crash kernel location must be 0x%x\n",
98 KDUMP_KERNELBASE); 92 KDUMP_KERNELBASE);
99 93
100 crashk_res.start = KDUMP_KERNELBASE; 94 crashk_res.start = KDUMP_KERNELBASE;
101 size = PAGE_ALIGN(size); 95 crash_size = PAGE_ALIGN(crash_size);
102 crashk_res.end = crashk_res.start + size - 1; 96 crashk_res.end = crashk_res.start + crash_size - 1;
103 97
104 /* Crash kernel trumps memory limit */ 98 /* Crash kernel trumps memory limit */
105 if (memory_limit && memory_limit <= crashk_res.end) { 99 if (memory_limit && memory_limit <= crashk_res.end) {
@@ -108,7 +102,13 @@ void __init reserve_crashkernel(void)
108 memory_limit); 102 memory_limit);
109 } 103 }
110 104
111 lmb_reserve(crashk_res.start, size); 105 printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
106 "for crashkernel (System RAM: %ldMB)\n",
107 (unsigned long)(crash_size >> 20),
108 (unsigned long)(crashk_res.start >> 20),
109 (unsigned long)(lmb_phys_mem_size() >> 20));
110
111 lmb_reserve(crashk_res.start, crash_size);
112} 112}
113 113
114int overlaps_crashkernel(unsigned long start, unsigned long size) 114int overlaps_crashkernel(unsigned long start, unsigned long size)
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index ea6ad7a2a7e3..b9d88374f14f 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -459,7 +459,7 @@ void show_regs(struct pt_regs * regs)
459 printk("DAR: "REG", DSISR: "REG"\n", regs->dar, regs->dsisr); 459 printk("DAR: "REG", DSISR: "REG"\n", regs->dar, regs->dsisr);
460#endif 460#endif
461 printk("TASK = %p[%d] '%s' THREAD: %p", 461 printk("TASK = %p[%d] '%s' THREAD: %p",
462 current, current->pid, current->comm, task_thread_info(current)); 462 current, task_pid_nr(current), current->comm, task_thread_info(current));
463 463
464#ifdef CONFIG_SMP 464#ifdef CONFIG_SMP
465 printk(" CPU: %d", smp_processor_id()); 465 printk(" CPU: %d", smp_processor_id());
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index bf9e39c6e296..59c464e26f38 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -201,7 +201,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
201 * generate the same exception over and over again and we get 201 * generate the same exception over and over again and we get
202 * nowhere. Better to kill it and let the kernel panic. 202 * nowhere. Better to kill it and let the kernel panic.
203 */ 203 */
204 if (is_init(current)) { 204 if (is_global_init(current)) {
205 __sighandler_t handler; 205 __sighandler_t handler;
206 206
207 spin_lock_irq(&current->sighand->siglock); 207 spin_lock_irq(&current->sighand->siglock);
@@ -881,7 +881,7 @@ void nonrecoverable_exception(struct pt_regs *regs)
881void trace_syscall(struct pt_regs *regs) 881void trace_syscall(struct pt_regs *regs)
882{ 882{
883 printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n", 883 printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n",
884 current, current->pid, regs->nip, regs->link, regs->gpr[0], 884 current, task_pid_nr(current), regs->nip, regs->link, regs->gpr[0],
885 regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted()); 885 regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted());
886} 886}
887 887
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index ab3546c5ac3a..a18fda361cc0 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -375,7 +375,7 @@ bad_area_nosemaphore:
375 */ 375 */
376out_of_memory: 376out_of_memory:
377 up_read(&mm->mmap_sem); 377 up_read(&mm->mmap_sem);
378 if (is_init(current)) { 378 if (is_global_init(current)) {
379 yield(); 379 yield();
380 down_read(&mm->mmap_sem); 380 down_read(&mm->mmap_sem);
381 goto survive; 381 goto survive;
diff --git a/arch/powerpc/oprofile/Kconfig b/arch/powerpc/oprofile/Kconfig
deleted file mode 100644
index 7089e79689b9..000000000000
--- a/arch/powerpc/oprofile/Kconfig
+++ /dev/null
@@ -1,24 +0,0 @@
1config PROFILING
2 bool "Profiling support (EXPERIMENTAL)"
3 help
4 Say Y here to enable the extended profiling support mechanisms used
5 by profilers such as OProfile.
6
7
8config OPROFILE
9 tristate "OProfile system profiling (EXPERIMENTAL)"
10 depends on PROFILING
11 help
12 OProfile is a profiling system capable of profiling the
13 whole system, include the kernel, kernel modules, libraries,
14 and applications.
15
16 If unsure, say N.
17
18config OPROFILE_CELL
19 bool "OProfile for Cell Broadband Engine"
20 depends on (SPU_FS = y && OPROFILE = m) || (SPU_FS = y && OPROFILE = y) || (SPU_FS = m && OPROFILE = m)
21 default y
22 help
23 Profiling of Cell BE SPUs requires special support enabled
24 by this option.
diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c
index 354c05861629..144177d77cf1 100644
--- a/arch/powerpc/platforms/maple/setup.c
+++ b/arch/powerpc/platforms/maple/setup.c
@@ -41,13 +41,13 @@
41#include <linux/root_dev.h> 41#include <linux/root_dev.h>
42#include <linux/serial.h> 42#include <linux/serial.h>
43#include <linux/smp.h> 43#include <linux/smp.h>
44#include <linux/bitops.h>
44 45
45#include <asm/processor.h> 46#include <asm/processor.h>
46#include <asm/sections.h> 47#include <asm/sections.h>
47#include <asm/prom.h> 48#include <asm/prom.h>
48#include <asm/system.h> 49#include <asm/system.h>
49#include <asm/pgtable.h> 50#include <asm/pgtable.h>
50#include <asm/bitops.h>
51#include <asm/io.h> 51#include <asm/io.h>
52#include <asm/kexec.h> 52#include <asm/kexec.h>
53#include <asm/pci-bridge.h> 53#include <asm/pci-bridge.h>
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 3a393c7f390e..a1ab25c7082f 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -332,7 +332,7 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log * err)
332 err->disposition == RTAS_DISP_NOT_RECOVERED && 332 err->disposition == RTAS_DISP_NOT_RECOVERED &&
333 err->target == RTAS_TARGET_MEMORY && 333 err->target == RTAS_TARGET_MEMORY &&
334 err->type == RTAS_TYPE_ECC_UNCORR && 334 err->type == RTAS_TYPE_ECC_UNCORR &&
335 !(current->pid == 0 || is_init(current))) { 335 !(current->pid == 0 || is_global_init(current))) {
336 /* Kill off a user process with an ECC error */ 336 /* Kill off a user process with an ECC error */
337 printk(KERN_ERR "MCE: uncorrectable ecc error for pid %d\n", 337 printk(KERN_ERR "MCE: uncorrectable ecc error for pid %d\n",
338 current->pid); 338 current->pid);
diff --git a/arch/ppc/Kconfig b/arch/ppc/Kconfig
index 607925c8a99e..6473fa7cb4b9 100644
--- a/arch/ppc/Kconfig
+++ b/arch/ppc/Kconfig
@@ -1317,7 +1317,7 @@ endmenu
1317 1317
1318source "lib/Kconfig" 1318source "lib/Kconfig"
1319 1319
1320source "arch/powerpc/oprofile/Kconfig" 1320source "kernel/Kconfig.instrumentation"
1321 1321
1322source "arch/ppc/Kconfig.debug" 1322source "arch/ppc/Kconfig.debug"
1323 1323
diff --git a/arch/ppc/kernel/traps.c b/arch/ppc/kernel/traps.c
index 3f3b292eb773..c78568905c3b 100644
--- a/arch/ppc/kernel/traps.c
+++ b/arch/ppc/kernel/traps.c
@@ -121,7 +121,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
121 * generate the same exception over and over again and we get 121 * generate the same exception over and over again and we get
122 * nowhere. Better to kill it and let the kernel panic. 122 * nowhere. Better to kill it and let the kernel panic.
123 */ 123 */
124 if (is_init(current)) { 124 if (is_global_init(current)) {
125 __sighandler_t handler; 125 __sighandler_t handler;
126 126
127 spin_lock_irq(&current->sighand->siglock); 127 spin_lock_irq(&current->sighand->siglock);
diff --git a/arch/ppc/mm/fault.c b/arch/ppc/mm/fault.c
index 94913ddcf76e..254c23b755e6 100644
--- a/arch/ppc/mm/fault.c
+++ b/arch/ppc/mm/fault.c
@@ -290,7 +290,7 @@ bad_area:
290 */ 290 */
291out_of_memory: 291out_of_memory:
292 up_read(&mm->mmap_sem); 292 up_read(&mm->mmap_sem);
293 if (is_init(current)) { 293 if (is_global_init(current)) {
294 yield(); 294 yield();
295 down_read(&mm->mmap_sem); 295 down_read(&mm->mmap_sem);
296 goto survive; 296 goto survive;
diff --git a/arch/ppc/platforms/chestnut.c b/arch/ppc/platforms/chestnut.c
index 248684f50dd9..dcd6070b85eb 100644
--- a/arch/ppc/platforms/chestnut.c
+++ b/arch/ppc/platforms/chestnut.c
@@ -49,7 +49,6 @@ extern void gen550_progress(char *, unsigned short);
49extern void gen550_init(int, struct uart_port *); 49extern void gen550_init(int, struct uart_port *);
50extern void mv64360_pcibios_fixup(mv64x60_handle_t *bh); 50extern void mv64360_pcibios_fixup(mv64x60_handle_t *bh);
51 51
52#define BIT(x) (1<<x)
53#define CHESTNUT_PRESERVE_MASK (BIT(MV64x60_CPU2DEV_0_WIN) | \ 52#define CHESTNUT_PRESERVE_MASK (BIT(MV64x60_CPU2DEV_0_WIN) | \
54 BIT(MV64x60_CPU2DEV_1_WIN) | \ 53 BIT(MV64x60_CPU2DEV_1_WIN) | \
55 BIT(MV64x60_CPU2DEV_2_WIN) | \ 54 BIT(MV64x60_CPU2DEV_2_WIN) | \
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b71132166f60..4ec716d8c1a6 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -529,21 +529,7 @@ source "drivers/Kconfig"
529 529
530source "fs/Kconfig" 530source "fs/Kconfig"
531 531
532menu "Instrumentation Support" 532source "kernel/Kconfig.instrumentation"
533
534source "arch/s390/oprofile/Kconfig"
535
536config KPROBES
537 bool "Kprobes (EXPERIMENTAL)"
538 depends on EXPERIMENTAL && MODULES
539 help
540 Kprobes allows you to trap at almost any kernel address and
541 execute a callback function. register_kprobe() establishes
542 a probepoint and specifies the callback. Kprobes is useful
543 for kernel debugging, non-intrusive instrumentation and testing.
544 If in doubt, say "N".
545
546endmenu
547 533
548source "arch/s390/Kconfig.debug" 534source "arch/s390/Kconfig.debug"
549 535
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index abb447a3e472..70c57378f426 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -166,7 +166,7 @@ void show_regs(struct pt_regs *regs)
166 166
167 printk("CPU: %d %s\n", task_thread_info(tsk)->cpu, print_tainted()); 167 printk("CPU: %d %s\n", task_thread_info(tsk)->cpu, print_tainted());
168 printk("Process %s (pid: %d, task: %p, ksp: %p)\n", 168 printk("Process %s (pid: %d, task: %p, ksp: %p)\n",
169 current->comm, current->pid, (void *) tsk, 169 current->comm, task_pid_nr(current), (void *) tsk,
170 (void *) tsk->thread.ksp); 170 (void *) tsk->thread.ksp);
171 171
172 show_registers(regs); 172 show_registers(regs);
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index 60604b2819b2..b159a9d65680 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -64,7 +64,7 @@ out:
64 64
65out_of_memory: 65out_of_memory:
66 up_read(&mm->mmap_sem); 66 up_read(&mm->mmap_sem);
67 if (is_init(current)) { 67 if (is_global_init(current)) {
68 yield(); 68 yield();
69 down_read(&mm->mmap_sem); 69 down_read(&mm->mmap_sem);
70 goto survive; 70 goto survive;
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 14c241ccdd4d..2456b52ed068 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -211,7 +211,7 @@ static int do_out_of_memory(struct pt_regs *regs, unsigned long error_code,
211 struct mm_struct *mm = tsk->mm; 211 struct mm_struct *mm = tsk->mm;
212 212
213 up_read(&mm->mmap_sem); 213 up_read(&mm->mmap_sem);
214 if (is_init(tsk)) { 214 if (is_global_init(tsk)) {
215 yield(); 215 yield();
216 down_read(&mm->mmap_sem); 216 down_read(&mm->mmap_sem);
217 return 1; 217 return 1;
diff --git a/arch/s390/oprofile/Kconfig b/arch/s390/oprofile/Kconfig
deleted file mode 100644
index 208220a5f23f..000000000000
--- a/arch/s390/oprofile/Kconfig
+++ /dev/null
@@ -1,22 +0,0 @@
1
2menu "Profiling support"
3
4config PROFILING
5 bool "Profiling support"
6 help
7 Say Y here to enable profiling support mechanisms used by
8 profilers such as readprofile or OProfile.
9
10
11config OPROFILE
12 tristate "OProfile system profiling"
13 depends on PROFILING
14 help
15 OProfile is a profiling system capable of profiling the
16 whole system, include the kernel, kernel modules, libraries,
17 and applications.
18
19 If unsure, say N.
20
21endmenu
22
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 44982c1dfa23..247f8a65e733 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -758,7 +758,7 @@ source "drivers/Kconfig"
758 758
759source "fs/Kconfig" 759source "fs/Kconfig"
760 760
761source "arch/sh/oprofile/Kconfig" 761source "kernel/Kconfig.instrumentation"
762 762
763source "arch/sh/Kconfig.debug" 763source "arch/sh/Kconfig.debug"
764 764
diff --git a/arch/sh/kernel/machine_kexec.c b/arch/sh/kernel/machine_kexec.c
index 790ed69b8666..5c17de51987e 100644
--- a/arch/sh/kernel/machine_kexec.c
+++ b/arch/sh/kernel/machine_kexec.c
@@ -104,24 +104,3 @@ NORET_TYPE void machine_kexec(struct kimage *image)
104 (*rnk)(page_list, reboot_code_buffer, image->start, vbr_reg); 104 (*rnk)(page_list, reboot_code_buffer, image->start, vbr_reg);
105} 105}
106 106
107/* crashkernel=size@addr specifies the location to reserve for
108 * a crash kernel. By reserving this memory we guarantee
109 * that linux never sets it up as a DMA target.
110 * Useful for holding code to do something appropriate
111 * after a kernel panic.
112 */
113static int __init parse_crashkernel(char *arg)
114{
115 unsigned long size, base;
116 size = memparse(arg, &arg);
117 if (*arg == '@') {
118 base = memparse(arg+1, &arg);
119 /* FIXME: Do I want a sanity check
120 * to validate the memory range?
121 */
122 crashk_res.start = base;
123 crashk_res.end = base + size - 1;
124 }
125 return 0;
126}
127early_param("crashkernel", parse_crashkernel);
diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c
index b4469992d6b2..6d7f2b07e491 100644
--- a/arch/sh/kernel/process.c
+++ b/arch/sh/kernel/process.c
@@ -121,7 +121,7 @@ void machine_power_off(void)
121void show_regs(struct pt_regs * regs) 121void show_regs(struct pt_regs * regs)
122{ 122{
123 printk("\n"); 123 printk("\n");
124 printk("Pid : %d, Comm: %20s\n", current->pid, current->comm); 124 printk("Pid : %d, Comm: %20s\n", task_pid_nr(current), current->comm);
125 print_symbol("PC is at %s\n", instruction_pointer(regs)); 125 print_symbol("PC is at %s\n", instruction_pointer(regs));
126 printk("PC : %08lx SP : %08lx SR : %08lx ", 126 printk("PC : %08lx SP : %08lx SR : %08lx ",
127 regs->pc, regs->regs[15], regs->sr); 127 regs->pc, regs->regs[15], regs->sr);
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index b3027a6775b9..b749403f6b38 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -128,6 +128,37 @@ static void __init register_bootmem_low_pages(void)
128 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(pages)); 128 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(pages));
129} 129}
130 130
131#ifdef CONFIG_KEXEC
132static void __init reserve_crashkernel(void)
133{
134 unsigned long long free_mem;
135 unsigned long long crash_size, crash_base;
136 int ret;
137
138 free_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
139
140 ret = parse_crashkernel(boot_command_line, free_mem,
141 &crash_size, &crash_base);
142 if (ret == 0 && crash_size) {
143 if (crash_base > 0) {
144 printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
145 "for crashkernel (System RAM: %ldMB)\n",
146 (unsigned long)(crash_size >> 20),
147 (unsigned long)(crash_base >> 20),
148 (unsigned long)(free_mem >> 20));
149 crashk_res.start = crash_base;
150 crashk_res.end = crash_base + crash_size - 1;
151 reserve_bootmem(crash_base, crash_size);
152 } else
153 printk(KERN_INFO "crashkernel reservation failed - "
154 "you have to specify a base address\n");
155 }
156}
157#else
158static inline void __init reserve_crashkernel(void)
159{}
160#endif
161
131void __init setup_bootmem_allocator(unsigned long free_pfn) 162void __init setup_bootmem_allocator(unsigned long free_pfn)
132{ 163{
133 unsigned long bootmap_size; 164 unsigned long bootmap_size;
@@ -189,11 +220,8 @@ void __init setup_bootmem_allocator(unsigned long free_pfn)
189 } 220 }
190 } 221 }
191#endif 222#endif
192#ifdef CONFIG_KEXEC 223
193 if (crashk_res.start != crashk_res.end) 224 reserve_crashkernel();
194 reserve_bootmem(crashk_res.start,
195 crashk_res.end - crashk_res.start + 1);
196#endif
197} 225}
198 226
199#ifndef CONFIG_NEED_MULTIPLE_NODES 227#ifndef CONFIG_NEED_MULTIPLE_NODES
diff --git a/arch/sh/kernel/signal.c b/arch/sh/kernel/signal.c
index 2f42442cf164..ca754fd42437 100644
--- a/arch/sh/kernel/signal.c
+++ b/arch/sh/kernel/signal.c
@@ -382,7 +382,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
382 set_fs(USER_DS); 382 set_fs(USER_DS);
383 383
384 pr_debug("SIG deliver (%s:%d): sp=%p pc=%08lx pr=%08lx\n", 384 pr_debug("SIG deliver (%s:%d): sp=%p pc=%08lx pr=%08lx\n",
385 current->comm, current->pid, frame, regs->pc, regs->pr); 385 current->comm, task_pid_nr(current), frame, regs->pc, regs->pr);
386 386
387 flush_cache_sigtramp(regs->pr); 387 flush_cache_sigtramp(regs->pr);
388 388
@@ -462,7 +462,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
462 set_fs(USER_DS); 462 set_fs(USER_DS);
463 463
464 pr_debug("SIG deliver (%s:%d): sp=%p pc=%08lx pr=%08lx\n", 464 pr_debug("SIG deliver (%s:%d): sp=%p pc=%08lx pr=%08lx\n",
465 current->comm, current->pid, frame, regs->pc, regs->pr); 465 current->comm, task_pid_nr(current), frame, regs->pc, regs->pr);
466 466
467 flush_cache_sigtramp(regs->pr); 467 flush_cache_sigtramp(regs->pr);
468 468
diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c
index dcb46e71da1c..cf99111cb33f 100644
--- a/arch/sh/kernel/traps.c
+++ b/arch/sh/kernel/traps.c
@@ -95,8 +95,8 @@ void die(const char * str, struct pt_regs * regs, long err)
95 print_modules(); 95 print_modules();
96 show_regs(regs); 96 show_regs(regs);
97 97
98 printk("Process: %s (pid: %d, stack limit = %p)\n", 98 printk("Process: %s (pid: %d, stack limit = %p)\n", current->comm,
99 current->comm, current->pid, task_stack_page(current) + 1); 99 task_pid_nr(current), task_stack_page(current) + 1);
100 100
101 if (!user_mode(regs) || in_interrupt()) 101 if (!user_mode(regs) || in_interrupt())
102 dump_mem("Stack: ", regs->regs[15], THREAD_SIZE + 102 dump_mem("Stack: ", regs->regs[15], THREAD_SIZE +
@@ -386,7 +386,8 @@ static int handle_unaligned_access(u16 instruction, struct pt_regs *regs)
386 386
387 printk(KERN_NOTICE "Fixing up unaligned userspace access " 387 printk(KERN_NOTICE "Fixing up unaligned userspace access "
388 "in \"%s\" pid=%d pc=0x%p ins=0x%04hx\n", 388 "in \"%s\" pid=%d pc=0x%p ins=0x%04hx\n",
389 current->comm,current->pid,(u16*)regs->pc,instruction); 389 current->comm, task_pid_nr(current),
390 (u16 *)regs->pc, instruction);
390 } 391 }
391 392
392 ret = -EFAULT; 393 ret = -EFAULT;
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 4729668ce5bf..f33cedb353fc 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -207,7 +207,7 @@ no_context:
207 */ 207 */
208out_of_memory: 208out_of_memory:
209 up_read(&mm->mmap_sem); 209 up_read(&mm->mmap_sem);
210 if (is_init(current)) { 210 if (is_global_init(current)) {
211 yield(); 211 yield();
212 down_read(&mm->mmap_sem); 212 down_read(&mm->mmap_sem);
213 goto survive; 213 goto survive;
diff --git a/arch/sh/oprofile/Kconfig b/arch/sh/oprofile/Kconfig
deleted file mode 100644
index 5ade19801b97..000000000000
--- a/arch/sh/oprofile/Kconfig
+++ /dev/null
@@ -1,23 +0,0 @@
1
2menu "Profiling support"
3 depends on EXPERIMENTAL
4
5config PROFILING
6 bool "Profiling support (EXPERIMENTAL)"
7 help
8 Say Y here to enable the extended profiling support mechanisms used
9 by profilers such as OProfile.
10
11
12config OPROFILE
13 tristate "OProfile system profiling (EXPERIMENTAL)"
14 depends on PROFILING
15 help
16 OProfile is a profiling system capable of profiling the
17 whole system, include the kernel, kernel modules, libraries,
18 and applications.
19
20 If unsure, say N.
21
22endmenu
23
diff --git a/arch/sh64/Kconfig b/arch/sh64/Kconfig
index b3327ce8e82f..ba204bac49df 100644
--- a/arch/sh64/Kconfig
+++ b/arch/sh64/Kconfig
@@ -284,7 +284,7 @@ source "drivers/Kconfig"
284 284
285source "fs/Kconfig" 285source "fs/Kconfig"
286 286
287source "arch/sh64/oprofile/Kconfig" 287source "kernel/Kconfig.instrumentation"
288 288
289source "arch/sh64/Kconfig.debug" 289source "arch/sh64/Kconfig.debug"
290 290
diff --git a/arch/sh64/kernel/traps.c b/arch/sh64/kernel/traps.c
index 9d0d58fb29fa..c03101fab467 100644
--- a/arch/sh64/kernel/traps.c
+++ b/arch/sh64/kernel/traps.c
@@ -764,7 +764,7 @@ static int misaligned_fixup(struct pt_regs *regs)
764 --user_mode_unaligned_fixup_count; 764 --user_mode_unaligned_fixup_count;
765 /* Only do 'count' worth of these reports, to remove a potential DoS against syslog */ 765 /* Only do 'count' worth of these reports, to remove a potential DoS against syslog */
766 printk("Fixing up unaligned userspace access in \"%s\" pid=%d pc=0x%08x ins=0x%08lx\n", 766 printk("Fixing up unaligned userspace access in \"%s\" pid=%d pc=0x%08x ins=0x%08lx\n",
767 current->comm, current->pid, (__u32)regs->pc, opcode); 767 current->comm, task_pid_nr(current), (__u32)regs->pc, opcode);
768 } else 768 } else
769#endif 769#endif
770 if (!user_mode(regs) && (kernel_mode_unaligned_fixup_count > 0)) { 770 if (!user_mode(regs) && (kernel_mode_unaligned_fixup_count > 0)) {
@@ -774,7 +774,7 @@ static int misaligned_fixup(struct pt_regs *regs)
774 (__u32)regs->pc, opcode); 774 (__u32)regs->pc, opcode);
775 } else { 775 } else {
776 printk("Fixing up unaligned kernelspace access in \"%s\" pid=%d pc=0x%08x ins=0x%08lx\n", 776 printk("Fixing up unaligned kernelspace access in \"%s\" pid=%d pc=0x%08x ins=0x%08lx\n",
777 current->comm, current->pid, (__u32)regs->pc, opcode); 777 current->comm, task_pid_nr(current), (__u32)regs->pc, opcode);
778 } 778 }
779 } 779 }
780 780
diff --git a/arch/sh64/mm/fault.c b/arch/sh64/mm/fault.c
index dd81c669c79b..7c79a1ba8059 100644
--- a/arch/sh64/mm/fault.c
+++ b/arch/sh64/mm/fault.c
@@ -81,7 +81,7 @@ static inline void print_vma(struct vm_area_struct *vma)
81 81
82static inline void print_task(struct task_struct *tsk) 82static inline void print_task(struct task_struct *tsk)
83{ 83{
84 printk("Task pid %d\n", tsk->pid); 84 printk("Task pid %d\n", task_pid_nr(tsk));
85} 85}
86 86
87static pte_t *lookup_pte(struct mm_struct *mm, unsigned long address) 87static pte_t *lookup_pte(struct mm_struct *mm, unsigned long address)
@@ -272,13 +272,13 @@ bad_area:
272 * usermode, so only need a few */ 272 * usermode, so only need a few */
273 count++; 273 count++;
274 printk("user mode bad_area address=%08lx pid=%d (%s) pc=%08lx\n", 274 printk("user mode bad_area address=%08lx pid=%d (%s) pc=%08lx\n",
275 address, current->pid, current->comm, 275 address, task_pid_nr(current), current->comm,
276 (unsigned long) regs->pc); 276 (unsigned long) regs->pc);
277#if 0 277#if 0
278 show_regs(regs); 278 show_regs(regs);
279#endif 279#endif
280 } 280 }
281 if (is_init(tsk)) { 281 if (is_global_init(tsk)) {
282 panic("INIT had user mode bad_area\n"); 282 panic("INIT had user mode bad_area\n");
283 } 283 }
284 tsk->thread.address = address; 284 tsk->thread.address = address;
@@ -320,14 +320,14 @@ no_context:
320 * us unable to handle the page fault gracefully. 320 * us unable to handle the page fault gracefully.
321 */ 321 */
322out_of_memory: 322out_of_memory:
323 if (is_init(current)) { 323 if (is_global_init(current)) {
324 panic("INIT out of memory\n"); 324 panic("INIT out of memory\n");
325 yield(); 325 yield();
326 goto survive; 326 goto survive;
327 } 327 }
328 printk("fault:Out of memory\n"); 328 printk("fault:Out of memory\n");
329 up_read(&mm->mmap_sem); 329 up_read(&mm->mmap_sem);
330 if (is_init(current)) { 330 if (is_global_init(current)) {
331 yield(); 331 yield();
332 down_read(&mm->mmap_sem); 332 down_read(&mm->mmap_sem);
333 goto survive; 333 goto survive;
diff --git a/arch/sh64/oprofile/Kconfig b/arch/sh64/oprofile/Kconfig
deleted file mode 100644
index 19d37730b664..000000000000
--- a/arch/sh64/oprofile/Kconfig
+++ /dev/null
@@ -1,23 +0,0 @@
1
2menu "Profiling support"
3 depends on EXPERIMENTAL
4
5config PROFILING
6 bool "Profiling support (EXPERIMENTAL)"
7 help
8 Say Y here to enable the extended profiling support mechanisms used
9 by profilers such as OProfile.
10
11
12config OPROFILE
13 tristate "OProfile system profiling (EXPERIMENTAL)"
14 depends on PROFILING
15 help
16 OProfile is a profiling system capable of profiling the
17 whole system, include the kernel, kernel modules, libraries,
18 and applications.
19
20 If unsure, say N.
21
22endmenu
23
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index c0f4ba109daa..527adc808ad6 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -320,11 +320,7 @@ endmenu
320 320
321source "fs/Kconfig" 321source "fs/Kconfig"
322 322
323menu "Instrumentation Support" 323source "kernel/Kconfig.instrumentation"
324
325source "arch/sparc/oprofile/Kconfig"
326
327endmenu
328 324
329source "arch/sparc/Kconfig.debug" 325source "arch/sparc/Kconfig.debug"
330 326
diff --git a/arch/sparc/kernel/ptrace.c b/arch/sparc/kernel/ptrace.c
index 003f8eed32f4..fe562db475e9 100644
--- a/arch/sparc/kernel/ptrace.c
+++ b/arch/sparc/kernel/ptrace.c
@@ -155,7 +155,7 @@ static inline void read_sunos_user(struct pt_regs *regs, unsigned long offset,
155 /* Rest of them are completely unsupported. */ 155 /* Rest of them are completely unsupported. */
156 default: 156 default:
157 printk("%s [%d]: Wants to read user offset %ld\n", 157 printk("%s [%d]: Wants to read user offset %ld\n",
158 current->comm, current->pid, offset); 158 current->comm, task_pid_nr(current), offset);
159 pt_error_return(regs, EIO); 159 pt_error_return(regs, EIO);
160 return; 160 return;
161 } 161 }
@@ -222,7 +222,7 @@ static inline void write_sunos_user(struct pt_regs *regs, unsigned long offset,
222 /* Rest of them are completely unsupported or "no-touch". */ 222 /* Rest of them are completely unsupported or "no-touch". */
223 default: 223 default:
224 printk("%s [%d]: Wants to write user offset %ld\n", 224 printk("%s [%d]: Wants to write user offset %ld\n",
225 current->comm, current->pid, offset); 225 current->comm, task_pid_nr(current), offset);
226 goto failure; 226 goto failure;
227 } 227 }
228success: 228success:
diff --git a/arch/sparc/kernel/sys_sparc.c b/arch/sparc/kernel/sys_sparc.c
index 6c0221e9a9f5..42bf09db9a81 100644
--- a/arch/sparc/kernel/sys_sparc.c
+++ b/arch/sparc/kernel/sys_sparc.c
@@ -357,7 +357,7 @@ c_sys_nis_syscall (struct pt_regs *regs)
357 if (count++ > 5) 357 if (count++ > 5)
358 return -ENOSYS; 358 return -ENOSYS;
359 printk ("%s[%d]: Unimplemented SPARC system call %d\n", 359 printk ("%s[%d]: Unimplemented SPARC system call %d\n",
360 current->comm, current->pid, (int)regs->u_regs[1]); 360 current->comm, task_pid_nr(current), (int)regs->u_regs[1]);
361#ifdef DEBUG_UNIMP_SYSCALL 361#ifdef DEBUG_UNIMP_SYSCALL
362 show_regs (regs); 362 show_regs (regs);
363#endif 363#endif
diff --git a/arch/sparc/kernel/sys_sunos.c b/arch/sparc/kernel/sys_sunos.c
index f807172cab0e..28c187c5d9fd 100644
--- a/arch/sparc/kernel/sys_sunos.c
+++ b/arch/sparc/kernel/sys_sunos.c
@@ -866,7 +866,7 @@ asmlinkage int sunos_killpg(int pgrp, int sig)
866 rcu_read_lock(); 866 rcu_read_lock();
867 ret = -EINVAL; 867 ret = -EINVAL;
868 if (pgrp > 0) 868 if (pgrp > 0)
869 ret = kill_pgrp(find_pid(pgrp), sig, 0); 869 ret = kill_pgrp(find_vpid(pgrp), sig, 0);
870 rcu_read_unlock(); 870 rcu_read_unlock();
871 871
872 return ret; 872 return ret;
diff --git a/arch/sparc/kernel/traps.c b/arch/sparc/kernel/traps.c
index 3bc3bff51e08..d404e7994527 100644
--- a/arch/sparc/kernel/traps.c
+++ b/arch/sparc/kernel/traps.c
@@ -38,7 +38,7 @@ struct trap_trace_entry trapbuf[1024];
38 38
39void syscall_trace_entry(struct pt_regs *regs) 39void syscall_trace_entry(struct pt_regs *regs)
40{ 40{
41 printk("%s[%d]: ", current->comm, current->pid); 41 printk("%s[%d]: ", current->comm, task_pid_nr(current));
42 printk("scall<%d> (could be %d)\n", (int) regs->u_regs[UREG_G1], 42 printk("scall<%d> (could be %d)\n", (int) regs->u_regs[UREG_G1],
43 (int) regs->u_regs[UREG_I0]); 43 (int) regs->u_regs[UREG_I0]);
44} 44}
@@ -99,7 +99,7 @@ void die_if_kernel(char *str, struct pt_regs *regs)
99" /_| \\__/ |_\\\n" 99" /_| \\__/ |_\\\n"
100" \\__U_/\n"); 100" \\__U_/\n");
101 101
102 printk("%s(%d): %s [#%d]\n", current->comm, current->pid, str, ++die_counter); 102 printk("%s(%d): %s [#%d]\n", current->comm, task_pid_nr(current), str, ++die_counter);
103 show_regs(regs); 103 show_regs(regs);
104 add_taint(TAINT_DIE); 104 add_taint(TAINT_DIE);
105 105
diff --git a/arch/sparc/oprofile/Kconfig b/arch/sparc/oprofile/Kconfig
deleted file mode 100644
index d8a84088471a..000000000000
--- a/arch/sparc/oprofile/Kconfig
+++ /dev/null
@@ -1,17 +0,0 @@
1config PROFILING
2 bool "Profiling support (EXPERIMENTAL)"
3 help
4 Say Y here to enable the extended profiling support mechanisms used
5 by profilers such as OProfile.
6
7
8config OPROFILE
9 tristate "OProfile system profiling (EXPERIMENTAL)"
10 depends on PROFILING
11 help
12 OProfile is a profiling system capable of profiling the
13 whole system, include the kernel, kernel modules, libraries,
14 and applications.
15
16 If unsure, say N.
17
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index 59c4d752d286..c7a74e376985 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -460,20 +460,7 @@ source "drivers/fc4/Kconfig"
460 460
461source "fs/Kconfig" 461source "fs/Kconfig"
462 462
463menu "Instrumentation Support" 463source "kernel/Kconfig.instrumentation"
464
465source "arch/sparc64/oprofile/Kconfig"
466
467config KPROBES
468 bool "Kprobes (EXPERIMENTAL)"
469 depends on KALLSYMS && EXPERIMENTAL && MODULES
470 help
471 Kprobes allows you to trap at almost any kernel address and
472 execute a callback function. register_kprobe() establishes
473 a probepoint and specifies the callback. Kprobes is useful
474 for kernel debugging, non-intrusive instrumentation and testing.
475 If in doubt, say "N".
476endmenu
477 464
478source "arch/sparc64/Kconfig.debug" 465source "arch/sparc64/Kconfig.debug"
479 466
diff --git a/arch/sparc64/kernel/sys_sunos32.c b/arch/sparc64/kernel/sys_sunos32.c
index 8f7a06e2c7e7..170d6ca8de6f 100644
--- a/arch/sparc64/kernel/sys_sunos32.c
+++ b/arch/sparc64/kernel/sys_sunos32.c
@@ -831,7 +831,7 @@ asmlinkage int sunos_killpg(int pgrp, int sig)
831 rcu_read_lock(); 831 rcu_read_lock();
832 ret = -EINVAL; 832 ret = -EINVAL;
833 if (pgrp > 0) 833 if (pgrp > 0)
834 ret = kill_pgrp(find_pid(pgrp), sig, 0); 834 ret = kill_pgrp(find_vpid(pgrp), sig, 0);
835 rcu_read_unlock(); 835 rcu_read_unlock();
836 836
837 return ret; 837 return ret;
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
index 34573a55b6e5..e9c7e4f07abf 100644
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@@ -2225,7 +2225,7 @@ void die_if_kernel(char *str, struct pt_regs *regs)
2225" /_| \\__/ |_\\\n" 2225" /_| \\__/ |_\\\n"
2226" \\__U_/\n"); 2226" \\__U_/\n");
2227 2227
2228 printk("%s(%d): %s [#%d]\n", current->comm, current->pid, str, ++die_counter); 2228 printk("%s(%d): %s [#%d]\n", current->comm, task_pid_nr(current), str, ++die_counter);
2229 notify_die(DIE_OOPS, str, regs, 0, 255, SIGSEGV); 2229 notify_die(DIE_OOPS, str, regs, 0, 255, SIGSEGV);
2230 __asm__ __volatile__("flushw"); 2230 __asm__ __volatile__("flushw");
2231 __show_regs(regs); 2231 __show_regs(regs);
diff --git a/arch/sparc64/oprofile/Kconfig b/arch/sparc64/oprofile/Kconfig
deleted file mode 100644
index d8a84088471a..000000000000
--- a/arch/sparc64/oprofile/Kconfig
+++ /dev/null
@@ -1,17 +0,0 @@
1config PROFILING
2 bool "Profiling support (EXPERIMENTAL)"
3 help
4 Say Y here to enable the extended profiling support mechanisms used
5 by profilers such as OProfile.
6
7
8config OPROFILE
9 tristate "OProfile system profiling (EXPERIMENTAL)"
10 depends on PROFILING
11 help
12 OProfile is a profiling system capable of profiling the
13 whole system, include the kernel, kernel modules, libraries,
14 and applications.
15
16 If unsure, say N.
17
diff --git a/arch/sparc64/solaris/misc.c b/arch/sparc64/solaris/misc.c
index 3b67de7455f1..c86cb3091a8e 100644
--- a/arch/sparc64/solaris/misc.c
+++ b/arch/sparc64/solaris/misc.c
@@ -415,7 +415,7 @@ asmlinkage int solaris_procids(int cmd, s32 pid, s32 pgid)
415 415
416 switch (cmd) { 416 switch (cmd) {
417 case 0: /* getpgrp */ 417 case 0: /* getpgrp */
418 return process_group(current); 418 return task_pgrp_nr(current);
419 case 1: /* setpgrp */ 419 case 1: /* setpgrp */
420 { 420 {
421 int (*sys_setpgid)(pid_t,pid_t) = 421 int (*sys_setpgid)(pid_t,pid_t) =
@@ -426,7 +426,7 @@ asmlinkage int solaris_procids(int cmd, s32 pid, s32 pgid)
426 ret = sys_setpgid(0, 0); 426 ret = sys_setpgid(0, 0);
427 if (ret) return ret; 427 if (ret) return ret;
428 proc_clear_tty(current); 428 proc_clear_tty(current);
429 return process_group(current); 429 return task_pgrp_nr(current);
430 } 430 }
431 case 2: /* getsid */ 431 case 2: /* getsid */
432 { 432 {
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 740d8a922e48..d8925d285573 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -289,4 +289,6 @@ config INPUT
289 bool 289 bool
290 default n 290 default n
291 291
292source "kernel/Kconfig.instrumentation"
293
292source "arch/um/Kconfig.debug" 294source "arch/um/Kconfig.debug"
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index bd060551e619..cb3321f8e0a9 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -108,7 +108,7 @@ out_nosemaphore:
108 * us unable to handle the page fault gracefully. 108 * us unable to handle the page fault gracefully.
109 */ 109 */
110out_of_memory: 110out_of_memory:
111 if (is_init(current)) { 111 if (is_global_init(current)) {
112 up_read(&mm->mmap_sem); 112 up_read(&mm->mmap_sem);
113 yield(); 113 yield();
114 down_read(&mm->mmap_sem); 114 down_read(&mm->mmap_sem);
diff --git a/arch/um/sys-x86_64/sysrq.c b/arch/um/sys-x86_64/sysrq.c
index ce3e07fcf283..765444031819 100644
--- a/arch/um/sys-x86_64/sysrq.c
+++ b/arch/um/sys-x86_64/sysrq.c
@@ -15,8 +15,8 @@ void __show_regs(struct pt_regs * regs)
15{ 15{
16 printk("\n"); 16 printk("\n");
17 print_modules(); 17 print_modules();
18 printk("Pid: %d, comm: %.20s %s %s\n", 18 printk("Pid: %d, comm: %.20s %s %s\n", task_pid_nr(current),
19 current->pid, current->comm, print_tainted(), init_utsname()->release); 19 current->comm, print_tainted(), init_utsname()->release);
20 printk("RIP: %04lx:[<%016lx>] ", PT_REGS_CS(regs) & 0xffff, 20 printk("RIP: %04lx:[<%016lx>] ", PT_REGS_CS(regs) & 0xffff,
21 PT_REGS_RIP(regs)); 21 PT_REGS_RIP(regs));
22 printk("\nRSP: %016lx EFLAGS: %08lx\n", PT_REGS_RSP(regs), 22 printk("\nRSP: %016lx EFLAGS: %08lx\n", PT_REGS_RSP(regs),
diff --git a/arch/v850/Kconfig b/arch/v850/Kconfig
index ace479ab273f..b6a50b8b38de 100644
--- a/arch/v850/Kconfig
+++ b/arch/v850/Kconfig
@@ -331,6 +331,8 @@ source "sound/Kconfig"
331 331
332source "drivers/usb/Kconfig" 332source "drivers/usb/Kconfig"
333 333
334source "kernel/Kconfig.instrumentation"
335
334source "arch/v850/Kconfig.debug" 336source "arch/v850/Kconfig.debug"
335 337
336source "security/Kconfig" 338source "security/Kconfig"
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index 32e75d0731a9..72d0c56c1b48 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -47,6 +47,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
47 if (!kdump_buf_page) { 47 if (!kdump_buf_page) {
48 printk(KERN_WARNING "Kdump: Kdump buffer page not" 48 printk(KERN_WARNING "Kdump: Kdump buffer page not"
49 " allocated\n"); 49 " allocated\n");
50 kunmap_atomic(vaddr, KM_PTE0);
50 return -EFAULT; 51 return -EFAULT;
51 } 52 }
52 copy_page(kdump_buf_page, vaddr); 53 copy_page(kdump_buf_page, vaddr);
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
index 3c86b979a40a..d58039e8de74 100644
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -288,7 +288,8 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat
288 request_resource(res, code_resource); 288 request_resource(res, code_resource);
289 request_resource(res, data_resource); 289 request_resource(res, data_resource);
290#ifdef CONFIG_KEXEC 290#ifdef CONFIG_KEXEC
291 request_resource(res, &crashk_res); 291 if (crashk_res.start != crashk_res.end)
292 request_resource(res, &crashk_res);
292#endif 293#endif
293 } 294 }
294 } 295 }
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index e422b8159f69..57616865d8a0 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -226,7 +226,8 @@ void __init e820_reserve_resources(void)
226 request_resource(res, &code_resource); 226 request_resource(res, &code_resource);
227 request_resource(res, &data_resource); 227 request_resource(res, &data_resource);
228#ifdef CONFIG_KEXEC 228#ifdef CONFIG_KEXEC
229 request_resource(res, &crashk_res); 229 if (crashk_res.start != crashk_res.end)
230 request_resource(res, &crashk_res);
230#endif 231#endif
231 } 232 }
232 } 233 }
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 8459ca64bc2f..11b935f4f886 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -149,28 +149,6 @@ NORET_TYPE void machine_kexec(struct kimage *image)
149 image->start, cpu_has_pae); 149 image->start, cpu_has_pae);
150} 150}
151 151
152/* crashkernel=size@addr specifies the location to reserve for
153 * a crash kernel. By reserving this memory we guarantee
154 * that linux never sets it up as a DMA target.
155 * Useful for holding code to do something appropriate
156 * after a kernel panic.
157 */
158static int __init parse_crashkernel(char *arg)
159{
160 unsigned long size, base;
161 size = memparse(arg, &arg);
162 if (*arg == '@') {
163 base = memparse(arg+1, &arg);
164 /* FIXME: Do I want a sanity check
165 * to validate the memory range?
166 */
167 crashk_res.start = base;
168 crashk_res.end = base + size - 1;
169 }
170 return 0;
171}
172early_param("crashkernel", parse_crashkernel);
173
174void arch_crash_save_vmcoreinfo(void) 152void arch_crash_save_vmcoreinfo(void)
175{ 153{
176#ifdef CONFIG_ARCH_DISCONTIGMEM_ENABLE 154#ifdef CONFIG_ARCH_DISCONTIGMEM_ENABLE
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 7450b69710b5..0d8577f05422 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -231,33 +231,6 @@ NORET_TYPE void machine_kexec(struct kimage *image)
231 image->start); 231 image->start);
232} 232}
233 233
234/* crashkernel=size@addr specifies the location to reserve for
235 * a crash kernel. By reserving this memory we guarantee
236 * that linux never set's it up as a DMA target.
237 * Useful for holding code to do something appropriate
238 * after a kernel panic.
239 */
240static int __init setup_crashkernel(char *arg)
241{
242 unsigned long size, base;
243 char *p;
244 if (!arg)
245 return -EINVAL;
246 size = memparse(arg, &p);
247 if (arg == p)
248 return -EINVAL;
249 if (*p == '@') {
250 base = memparse(p+1, &p);
251 /* FIXME: Do I want a sanity check to validate the
252 * memory range? Yes you do, but it's too early for
253 * e820 -AK */
254 crashk_res.start = base;
255 crashk_res.end = base + size - 1;
256 }
257 return 0;
258}
259early_param("crashkernel", setup_crashkernel);
260
261void arch_crash_save_vmcoreinfo(void) 234void arch_crash_save_vmcoreinfo(void)
262{ 235{
263#ifdef CONFIG_ARCH_DISCONTIGMEM_ENABLE 236#ifdef CONFIG_ARCH_DISCONTIGMEM_ENABLE
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 097aeafce5ff..044a47745a5c 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -301,7 +301,7 @@ void show_regs(struct pt_regs * regs)
301 unsigned long d0, d1, d2, d3, d6, d7; 301 unsigned long d0, d1, d2, d3, d6, d7;
302 302
303 printk("\n"); 303 printk("\n");
304 printk("Pid: %d, comm: %20s\n", current->pid, current->comm); 304 printk("Pid: %d, comm: %20s\n", task_pid_nr(current), current->comm);
305 printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); 305 printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
306 print_symbol("EIP is at %s\n", regs->eip); 306 print_symbol("EIP is at %s\n", regs->eip);
307 307
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index b87a6fd5ba48..978dc0196a0f 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -378,6 +378,49 @@ extern unsigned long __init setup_memory(void);
378extern void zone_sizes_init(void); 378extern void zone_sizes_init(void);
379#endif /* !CONFIG_NEED_MULTIPLE_NODES */ 379#endif /* !CONFIG_NEED_MULTIPLE_NODES */
380 380
381static inline unsigned long long get_total_mem(void)
382{
383 unsigned long long total;
384
385 total = max_low_pfn - min_low_pfn;
386#ifdef CONFIG_HIGHMEM
387 total += highend_pfn - highstart_pfn;
388#endif
389
390 return total << PAGE_SHIFT;
391}
392
393#ifdef CONFIG_KEXEC
394static void __init reserve_crashkernel(void)
395{
396 unsigned long long total_mem;
397 unsigned long long crash_size, crash_base;
398 int ret;
399
400 total_mem = get_total_mem();
401
402 ret = parse_crashkernel(boot_command_line, total_mem,
403 &crash_size, &crash_base);
404 if (ret == 0 && crash_size > 0) {
405 if (crash_base > 0) {
406 printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
407 "for crashkernel (System RAM: %ldMB)\n",
408 (unsigned long)(crash_size >> 20),
409 (unsigned long)(crash_base >> 20),
410 (unsigned long)(total_mem >> 20));
411 crashk_res.start = crash_base;
412 crashk_res.end = crash_base + crash_size - 1;
413 reserve_bootmem(crash_base, crash_size);
414 } else
415 printk(KERN_INFO "crashkernel reservation failed - "
416 "you have to specify a base address\n");
417 }
418}
419#else
420static inline void __init reserve_crashkernel(void)
421{}
422#endif
423
381void __init setup_bootmem_allocator(void) 424void __init setup_bootmem_allocator(void)
382{ 425{
383 unsigned long bootmap_size; 426 unsigned long bootmap_size;
@@ -453,11 +496,7 @@ void __init setup_bootmem_allocator(void)
453 } 496 }
454 } 497 }
455#endif 498#endif
456#ifdef CONFIG_KEXEC 499 reserve_crashkernel();
457 if (crashk_res.start != crashk_res.end)
458 reserve_bootmem(crashk_res.start,
459 crashk_res.end - crashk_res.start + 1);
460#endif
461} 500}
462 501
463/* 502/*
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 5a19f0cc5b67..cdcba6975226 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -191,6 +191,37 @@ static inline void copy_edd(void)
191} 191}
192#endif 192#endif
193 193
194#ifdef CONFIG_KEXEC
195static void __init reserve_crashkernel(void)
196{
197 unsigned long long free_mem;
198 unsigned long long crash_size, crash_base;
199 int ret;
200
201 free_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
202
203 ret = parse_crashkernel(boot_command_line, free_mem,
204 &crash_size, &crash_base);
205 if (ret == 0 && crash_size) {
206 if (crash_base > 0) {
207 printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
208 "for crashkernel (System RAM: %ldMB)\n",
209 (unsigned long)(crash_size >> 20),
210 (unsigned long)(crash_base >> 20),
211 (unsigned long)(free_mem >> 20));
212 crashk_res.start = crash_base;
213 crashk_res.end = crash_base + crash_size - 1;
214 reserve_bootmem(crash_base, crash_size);
215 } else
216 printk(KERN_INFO "crashkernel reservation failed - "
217 "you have to specify a base address\n");
218 }
219}
220#else
221static inline void __init reserve_crashkernel(void)
222{}
223#endif
224
194#define EBDA_ADDR_POINTER 0x40E 225#define EBDA_ADDR_POINTER 0x40E
195 226
196unsigned __initdata ebda_addr; 227unsigned __initdata ebda_addr;
@@ -357,13 +388,7 @@ void __init setup_arch(char **cmdline_p)
357 } 388 }
358 } 389 }
359#endif 390#endif
360#ifdef CONFIG_KEXEC 391 reserve_crashkernel();
361 if (crashk_res.start != crashk_res.end) {
362 reserve_bootmem_generic(crashk_res.start,
363 crashk_res.end - crashk_res.start + 1);
364 }
365#endif
366
367 paging_init(); 392 paging_init();
368 393
369#ifdef CONFIG_PCI 394#ifdef CONFIG_PCI
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 0d79df3c5631..6dc394b87255 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -200,8 +200,8 @@ badframe:
200 if (show_unhandled_signals && printk_ratelimit()) 200 if (show_unhandled_signals && printk_ratelimit())
201 printk("%s%s[%d] bad frame in sigreturn frame:%p eip:%lx" 201 printk("%s%s[%d] bad frame in sigreturn frame:%p eip:%lx"
202 " esp:%lx oeax:%lx\n", 202 " esp:%lx oeax:%lx\n",
203 current->pid > 1 ? KERN_INFO : KERN_EMERG, 203 task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
204 current->comm, current->pid, frame, regs->eip, 204 current->comm, task_pid_nr(current), frame, regs->eip,
205 regs->esp, regs->orig_eax); 205 regs->esp, regs->orig_eax);
206 206
207 force_sig(SIGSEGV, current); 207 force_sig(SIGSEGV, current);
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index b132d3957dfc..1e9d57256eb1 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -316,7 +316,7 @@ void show_registers(struct pt_regs *regs)
316 printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n", 316 printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
317 regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss); 317 regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss);
318 printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", 318 printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
319 TASK_COMM_LEN, current->comm, current->pid, 319 TASK_COMM_LEN, current->comm, task_pid_nr(current),
320 current_thread_info(), current, task_thread_info(current)); 320 current_thread_info(), current, task_thread_info(current));
321 /* 321 /*
322 * When in-kernel, we also print out the stack and code at the 322 * When in-kernel, we also print out the stack and code at the
@@ -622,7 +622,7 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs,
622 printk_ratelimit()) 622 printk_ratelimit())
623 printk(KERN_INFO 623 printk(KERN_INFO
624 "%s[%d] general protection eip:%lx esp:%lx error:%lx\n", 624 "%s[%d] general protection eip:%lx esp:%lx error:%lx\n",
625 current->comm, current->pid, 625 current->comm, task_pid_nr(current),
626 regs->eip, regs->esp, error_code); 626 regs->eip, regs->esp, error_code);
627 627
628 force_sig(SIGSEGV, current); 628 force_sig(SIGSEGV, current);
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 9f38b12b4af1..8bab2b2efaff 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -748,7 +748,7 @@ survive:
748 retval = get_user_pages(current, current->mm, 748 retval = get_user_pages(current, current->mm,
749 (unsigned long )to, 1, 1, 0, &pg, NULL); 749 (unsigned long )to, 1, 1, 0, &pg, NULL);
750 750
751 if (retval == -ENOMEM && is_init(current)) { 751 if (retval == -ENOMEM && is_global_init(current)) {
752 up_read(&current->mm->mmap_sem); 752 up_read(&current->mm->mmap_sem);
753 congestion_wait(WRITE, HZ/50); 753 congestion_wait(WRITE, HZ/50);
754 goto survive; 754 goto survive;
diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index 6555c3d14371..4d3e538c57ab 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -471,8 +471,8 @@ bad_area_nosemaphore:
471 printk_ratelimit()) { 471 printk_ratelimit()) {
472 printk("%s%s[%d]: segfault at %08lx eip %08lx " 472 printk("%s%s[%d]: segfault at %08lx eip %08lx "
473 "esp %08lx error %lx\n", 473 "esp %08lx error %lx\n",
474 tsk->pid > 1 ? KERN_INFO : KERN_EMERG, 474 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
475 tsk->comm, tsk->pid, address, regs->eip, 475 tsk->comm, task_pid_nr(tsk), address, regs->eip,
476 regs->esp, error_code); 476 regs->esp, error_code);
477 } 477 }
478 tsk->thread.cr2 = address; 478 tsk->thread.cr2 = address;
@@ -587,7 +587,7 @@ no_context:
587 */ 587 */
588out_of_memory: 588out_of_memory:
589 up_read(&mm->mmap_sem); 589 up_read(&mm->mmap_sem);
590 if (is_init(tsk)) { 590 if (is_global_init(tsk)) {
591 yield(); 591 yield();
592 down_read(&mm->mmap_sem); 592 down_read(&mm->mmap_sem);
593 goto survive; 593 goto survive;
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index 5e0e54906c48..5149ac136a5d 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -554,7 +554,7 @@ no_context:
554 */ 554 */
555out_of_memory: 555out_of_memory:
556 up_read(&mm->mmap_sem); 556 up_read(&mm->mmap_sem);
557 if (is_init(current)) { 557 if (is_global_init(current)) {
558 yield(); 558 yield();
559 goto again; 559 goto again;
560 } 560 }
diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c
index 8a4f65bf956e..c7b7dfe1d405 100644
--- a/arch/x86/mm/pageattr_64.c
+++ b/arch/x86/mm/pageattr_64.c
@@ -230,9 +230,14 @@ void global_flush_tlb(void)
230 struct page *pg, *next; 230 struct page *pg, *next;
231 struct list_head l; 231 struct list_head l;
232 232
233 down_read(&init_mm.mmap_sem); 233 /*
234 * Write-protect the semaphore, to exclude two contexts
235 * doing a list_replace_init() call in parallel and to
236 * exclude new additions to the deferred_pages list:
237 */
238 down_write(&init_mm.mmap_sem);
234 list_replace_init(&deferred_pages, &l); 239 list_replace_init(&deferred_pages, &l);
235 up_read(&init_mm.mmap_sem); 240 up_write(&init_mm.mmap_sem);
236 241
237 flush_map(&l); 242 flush_map(&l);
238 243
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 78cb68f2ebbd..d2521942e5bd 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -799,21 +799,6 @@ source "drivers/firmware/Kconfig"
799 799
800source fs/Kconfig 800source fs/Kconfig
801 801
802menu "Instrumentation Support"
803
804source "arch/x86/oprofile/Kconfig"
805
806config KPROBES
807 bool "Kprobes"
808 depends on KALLSYMS && MODULES
809 help
810 Kprobes allows you to trap at almost any kernel address and
811 execute a callback function. register_kprobe() establishes
812 a probepoint and specifies the callback. Kprobes is useful
813 for kernel debugging, non-intrusive instrumentation and testing.
814 If in doubt, say "N".
815endmenu
816
817source "arch/x86_64/Kconfig.debug" 802source "arch/x86_64/Kconfig.debug"
818 803
819source "security/Kconfig" 804source "security/Kconfig"
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 7fbb44bea37f..85ffbb491490 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -251,6 +251,8 @@ config EMBEDDED_RAMDISK_IMAGE
251 provide one yourself. 251 provide one yourself.
252endmenu 252endmenu
253 253
254source "kernel/Kconfig.instrumentation"
255
254source "arch/xtensa/Kconfig.debug" 256source "arch/xtensa/Kconfig.debug"
255 257
256source "security/Kconfig" 258source "security/Kconfig"
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 8be99c777d9d..397bcd6ad08d 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -176,7 +176,7 @@ void do_unhandled(struct pt_regs *regs, unsigned long exccause)
176 printk("Caught unhandled exception in '%s' " 176 printk("Caught unhandled exception in '%s' "
177 "(pid = %d, pc = %#010lx) - should not happen\n" 177 "(pid = %d, pc = %#010lx) - should not happen\n"
178 "\tEXCCAUSE is %ld\n", 178 "\tEXCCAUSE is %ld\n",
179 current->comm, current->pid, regs->pc, exccause); 179 current->comm, task_pid_nr(current), regs->pc, exccause);
180 force_sig(SIGILL, current); 180 force_sig(SIGILL, current);
181} 181}
182 182
@@ -228,7 +228,7 @@ do_illegal_instruction(struct pt_regs *regs)
228 /* If in user mode, send SIGILL signal to current process. */ 228 /* If in user mode, send SIGILL signal to current process. */
229 229
230 printk("Illegal Instruction in '%s' (pid = %d, pc = %#010lx)\n", 230 printk("Illegal Instruction in '%s' (pid = %d, pc = %#010lx)\n",
231 current->comm, current->pid, regs->pc); 231 current->comm, task_pid_nr(current), regs->pc);
232 force_sig(SIGILL, current); 232 force_sig(SIGILL, current);
233} 233}
234 234
@@ -254,7 +254,7 @@ do_unaligned_user (struct pt_regs *regs)
254 current->thread.error_code = -3; 254 current->thread.error_code = -3;
255 printk("Unaligned memory access to %08lx in '%s' " 255 printk("Unaligned memory access to %08lx in '%s' "
256 "(pid = %d, pc = %#010lx)\n", 256 "(pid = %d, pc = %#010lx)\n",
257 regs->excvaddr, current->comm, current->pid, regs->pc); 257 regs->excvaddr, current->comm, task_pid_nr(current), regs->pc);
258 info.si_signo = SIGBUS; 258 info.si_signo = SIGBUS;
259 info.si_errno = 0; 259 info.si_errno = 0;
260 info.si_code = BUS_ADRALN; 260 info.si_code = BUS_ADRALN;
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index 2f842859948f..33f366be323f 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -145,7 +145,7 @@ bad_area:
145 */ 145 */
146out_of_memory: 146out_of_memory:
147 up_read(&mm->mmap_sem); 147 up_read(&mm->mmap_sem);
148 if (is_init(current)) { 148 if (is_global_init(current)) {
149 yield(); 149 yield();
150 down_read(&mm->mmap_sem); 150 down_read(&mm->mmap_sem);
151 goto survive; 151 goto survive;
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 3935469e3662..8025d646ab30 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -3367,7 +3367,7 @@ void submit_bio(int rw, struct bio *bio)
3367 if (unlikely(block_dump)) { 3367 if (unlikely(block_dump)) {
3368 char b[BDEVNAME_SIZE]; 3368 char b[BDEVNAME_SIZE];
3369 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n", 3369 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
3370 current->comm, current->pid, 3370 current->comm, task_pid_nr(current),
3371 (rw & WRITE) ? "WRITE" : "READ", 3371 (rw & WRITE) ? "WRITE" : "READ",
3372 (unsigned long long)bio->bi_sector, 3372 (unsigned long long)bio->bi_sector,
3373 bdevname(bio->bi_bdev,b)); 3373 bdevname(bio->bi_bdev,b));
@@ -3739,7 +3739,7 @@ EXPORT_SYMBOL(end_dequeued_request);
3739 3739
3740/** 3740/**
3741 * end_request - end I/O on the current segment of the request 3741 * end_request - end I/O on the current segment of the request
3742 * @rq: the request being processed 3742 * @req: the request being processed
3743 * @uptodate: error value or 0/1 uptodate flag 3743 * @uptodate: error value or 0/1 uptodate flag
3744 * 3744 *
3745 * Description: 3745 * Description:
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index 2e79a3395ecf..301e832e6961 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -434,18 +434,18 @@ static int acpi_button_add(struct acpi_device *device)
434 switch (button->type) { 434 switch (button->type) {
435 case ACPI_BUTTON_TYPE_POWER: 435 case ACPI_BUTTON_TYPE_POWER:
436 case ACPI_BUTTON_TYPE_POWERF: 436 case ACPI_BUTTON_TYPE_POWERF:
437 input->evbit[0] = BIT(EV_KEY); 437 input->evbit[0] = BIT_MASK(EV_KEY);
438 set_bit(KEY_POWER, input->keybit); 438 set_bit(KEY_POWER, input->keybit);
439 break; 439 break;
440 440
441 case ACPI_BUTTON_TYPE_SLEEP: 441 case ACPI_BUTTON_TYPE_SLEEP:
442 case ACPI_BUTTON_TYPE_SLEEPF: 442 case ACPI_BUTTON_TYPE_SLEEPF:
443 input->evbit[0] = BIT(EV_KEY); 443 input->evbit[0] = BIT_MASK(EV_KEY);
444 set_bit(KEY_SLEEP, input->keybit); 444 set_bit(KEY_SLEEP, input->keybit);
445 break; 445 break;
446 446
447 case ACPI_BUTTON_TYPE_LID: 447 case ACPI_BUTTON_TYPE_LID:
448 input->evbit[0] = BIT(EV_SW); 448 input->evbit[0] = BIT_MASK(EV_SW);
449 set_bit(SW_LID, input->swbit); 449 set_bit(SW_LID, input->swbit);
450 break; 450 break;
451 } 451 }
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index cb136a919f2a..ac4a0cb217ab 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -188,7 +188,7 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size,
188 if (signal_pending(current)) { 188 if (signal_pending(current)) {
189 siginfo_t info; 189 siginfo_t info;
190 printk(KERN_WARNING "nbd (pid %d: %s) got signal %d\n", 190 printk(KERN_WARNING "nbd (pid %d: %s) got signal %d\n",
191 current->pid, current->comm, 191 task_pid_nr(current), current->comm,
192 dequeue_signal_lock(current, &current->blocked, &info)); 192 dequeue_signal_lock(current, &current->blocked, &info));
193 result = -EINTR; 193 result = -EINTR;
194 sock_shutdown(lo, !send); 194 sock_shutdown(lo, !send);
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index d70745c84250..af0561053167 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -1107,7 +1107,7 @@ int open_for_data(struct cdrom_device_info * cdi)
1107 is the default case! */ 1107 is the default case! */
1108 cdinfo(CD_OPEN, "bummer. wrong media type.\n"); 1108 cdinfo(CD_OPEN, "bummer. wrong media type.\n");
1109 cdinfo(CD_WARNING, "pid %d must open device O_NONBLOCK!\n", 1109 cdinfo(CD_WARNING, "pid %d must open device O_NONBLOCK!\n",
1110 (unsigned int)current->pid); 1110 (unsigned int)task_pid_nr(current));
1111 ret=-EMEDIUMTYPE; 1111 ret=-EMEDIUMTYPE;
1112 goto clean_up_and_return; 1112 goto clean_up_and_return;
1113 } 1113 }
diff --git a/drivers/char/drm/drm_bufs.c b/drivers/char/drm/drm_bufs.c
index 856774fbe025..d24a6c2c2c24 100644
--- a/drivers/char/drm/drm_bufs.c
+++ b/drivers/char/drm/drm_bufs.c
@@ -1456,7 +1456,7 @@ int drm_freebufs(struct drm_device *dev, void *data,
1456 buf = dma->buflist[idx]; 1456 buf = dma->buflist[idx];
1457 if (buf->file_priv != file_priv) { 1457 if (buf->file_priv != file_priv) {
1458 DRM_ERROR("Process %d freeing buffer not owned\n", 1458 DRM_ERROR("Process %d freeing buffer not owned\n",
1459 current->pid); 1459 task_pid_nr(current));
1460 return -EINVAL; 1460 return -EINVAL;
1461 } 1461 }
1462 drm_free_buffer(dev, buf); 1462 drm_free_buffer(dev, buf);
diff --git a/drivers/char/drm/drm_drv.c b/drivers/char/drm/drm_drv.c
index 72668b15e5ce..44a46268b02b 100644
--- a/drivers/char/drm/drm_drv.c
+++ b/drivers/char/drm/drm_drv.c
@@ -463,7 +463,7 @@ int drm_ioctl(struct inode *inode, struct file *filp,
463 ++file_priv->ioctl_count; 463 ++file_priv->ioctl_count;
464 464
465 DRM_DEBUG("pid=%d, cmd=0x%02x, nr=0x%02x, dev 0x%lx, auth=%d\n", 465 DRM_DEBUG("pid=%d, cmd=0x%02x, nr=0x%02x, dev 0x%lx, auth=%d\n",
466 current->pid, cmd, nr, 466 task_pid_nr(current), cmd, nr,
467 (long)old_encode_dev(file_priv->head->device), 467 (long)old_encode_dev(file_priv->head->device),
468 file_priv->authenticated); 468 file_priv->authenticated);
469 469
diff --git a/drivers/char/drm/drm_fops.c b/drivers/char/drm/drm_fops.c
index f383fc37190c..3992f73299cc 100644
--- a/drivers/char/drm/drm_fops.c
+++ b/drivers/char/drm/drm_fops.c
@@ -234,7 +234,7 @@ static int drm_open_helper(struct inode *inode, struct file *filp,
234 if (!drm_cpu_valid()) 234 if (!drm_cpu_valid())
235 return -EINVAL; 235 return -EINVAL;
236 236
237 DRM_DEBUG("pid = %d, minor = %d\n", current->pid, minor); 237 DRM_DEBUG("pid = %d, minor = %d\n", task_pid_nr(current), minor);
238 238
239 priv = drm_alloc(sizeof(*priv), DRM_MEM_FILES); 239 priv = drm_alloc(sizeof(*priv), DRM_MEM_FILES);
240 if (!priv) 240 if (!priv)
@@ -244,7 +244,7 @@ static int drm_open_helper(struct inode *inode, struct file *filp,
244 filp->private_data = priv; 244 filp->private_data = priv;
245 priv->filp = filp; 245 priv->filp = filp;
246 priv->uid = current->euid; 246 priv->uid = current->euid;
247 priv->pid = current->pid; 247 priv->pid = task_pid_nr(current);
248 priv->minor = minor; 248 priv->minor = minor;
249 priv->head = drm_heads[minor]; 249 priv->head = drm_heads[minor];
250 priv->ioctl_count = 0; 250 priv->ioctl_count = 0;
@@ -339,7 +339,8 @@ int drm_release(struct inode *inode, struct file *filp)
339 */ 339 */
340 340
341 DRM_DEBUG("pid = %d, device = 0x%lx, open_count = %d\n", 341 DRM_DEBUG("pid = %d, device = 0x%lx, open_count = %d\n",
342 current->pid, (long)old_encode_dev(file_priv->head->device), 342 task_pid_nr(current),
343 (long)old_encode_dev(file_priv->head->device),
343 dev->open_count); 344 dev->open_count);
344 345
345 if (dev->driver->reclaim_buffers_locked && dev->lock.hw_lock) { 346 if (dev->driver->reclaim_buffers_locked && dev->lock.hw_lock) {
diff --git a/drivers/char/drm/drm_lock.c b/drivers/char/drm/drm_lock.c
index c6b73e744d67..bea2a7d5b2b2 100644
--- a/drivers/char/drm/drm_lock.c
+++ b/drivers/char/drm/drm_lock.c
@@ -58,12 +58,12 @@ int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv)
58 58
59 if (lock->context == DRM_KERNEL_CONTEXT) { 59 if (lock->context == DRM_KERNEL_CONTEXT) {
60 DRM_ERROR("Process %d using kernel context %d\n", 60 DRM_ERROR("Process %d using kernel context %d\n",
61 current->pid, lock->context); 61 task_pid_nr(current), lock->context);
62 return -EINVAL; 62 return -EINVAL;
63 } 63 }
64 64
65 DRM_DEBUG("%d (pid %d) requests lock (0x%08x), flags = 0x%08x\n", 65 DRM_DEBUG("%d (pid %d) requests lock (0x%08x), flags = 0x%08x\n",
66 lock->context, current->pid, 66 lock->context, task_pid_nr(current),
67 dev->lock.hw_lock->lock, lock->flags); 67 dev->lock.hw_lock->lock, lock->flags);
68 68
69 if (drm_core_check_feature(dev, DRIVER_DMA_QUEUE)) 69 if (drm_core_check_feature(dev, DRIVER_DMA_QUEUE))
@@ -153,7 +153,7 @@ int drm_unlock(struct drm_device *dev, void *data, struct drm_file *file_priv)
153 153
154 if (lock->context == DRM_KERNEL_CONTEXT) { 154 if (lock->context == DRM_KERNEL_CONTEXT) {
155 DRM_ERROR("Process %d using kernel context %d\n", 155 DRM_ERROR("Process %d using kernel context %d\n",
156 current->pid, lock->context); 156 task_pid_nr(current), lock->context);
157 return -EINVAL; 157 return -EINVAL;
158 } 158 }
159 159
diff --git a/drivers/char/drm/drm_os_linux.h b/drivers/char/drm/drm_os_linux.h
index 114e54e0f61b..76e44ac94fb5 100644
--- a/drivers/char/drm/drm_os_linux.h
+++ b/drivers/char/drm/drm_os_linux.h
@@ -7,7 +7,7 @@
7#include <linux/delay.h> 7#include <linux/delay.h>
8 8
9/** Current process ID */ 9/** Current process ID */
10#define DRM_CURRENTPID current->pid 10#define DRM_CURRENTPID task_pid_nr(current)
11#define DRM_SUSER(p) capable(CAP_SYS_ADMIN) 11#define DRM_SUSER(p) capable(CAP_SYS_ADMIN)
12#define DRM_UDELAY(d) udelay(d) 12#define DRM_UDELAY(d) udelay(d)
13/** Read a byte from a MMIO region */ 13/** Read a byte from a MMIO region */
diff --git a/drivers/char/drm/i810_dma.c b/drivers/char/drm/i810_dma.c
index 8e841bdee6dc..eb381a7c5bee 100644
--- a/drivers/char/drm/i810_dma.c
+++ b/drivers/char/drm/i810_dma.c
@@ -1024,7 +1024,7 @@ static int i810_getbuf(struct drm_device *dev, void *data,
1024 retcode = i810_dma_get_buffer(dev, d, file_priv); 1024 retcode = i810_dma_get_buffer(dev, d, file_priv);
1025 1025
1026 DRM_DEBUG("i810_dma: %d returning %d, granted = %d\n", 1026 DRM_DEBUG("i810_dma: %d returning %d, granted = %d\n",
1027 current->pid, retcode, d->granted); 1027 task_pid_nr(current), retcode, d->granted);
1028 1028
1029 sarea_priv->last_dispatch = (int)hw_status[5]; 1029 sarea_priv->last_dispatch = (int)hw_status[5];
1030 1030
diff --git a/drivers/char/drm/i830_dma.c b/drivers/char/drm/i830_dma.c
index 43a1f78712d6..69a363edb0d2 100644
--- a/drivers/char/drm/i830_dma.c
+++ b/drivers/char/drm/i830_dma.c
@@ -1409,7 +1409,7 @@ static int i830_getbuf(struct drm_device *dev, void *data,
1409 retcode = i830_dma_get_buffer(dev, d, file_priv); 1409 retcode = i830_dma_get_buffer(dev, d, file_priv);
1410 1410
1411 DRM_DEBUG("i830_dma: %d returning %d, granted = %d\n", 1411 DRM_DEBUG("i830_dma: %d returning %d, granted = %d\n",
1412 current->pid, retcode, d->granted); 1412 task_pid_nr(current), retcode, d->granted);
1413 1413
1414 sarea_priv->last_dispatch = (int)hw_status[5]; 1414 sarea_priv->last_dispatch = (int)hw_status[5];
1415 1415
diff --git a/drivers/char/esp.c b/drivers/char/esp.c
index 2e7ae42a5503..0f8fb135da53 100644
--- a/drivers/char/esp.c
+++ b/drivers/char/esp.c
@@ -58,10 +58,10 @@
58#include <linux/mm.h> 58#include <linux/mm.h>
59#include <linux/init.h> 59#include <linux/init.h>
60#include <linux/delay.h> 60#include <linux/delay.h>
61#include <linux/bitops.h>
61 62
62#include <asm/system.h> 63#include <asm/system.h>
63#include <asm/io.h> 64#include <asm/io.h>
64#include <asm/bitops.h>
65 65
66#include <asm/dma.h> 66#include <asm/dma.h>
67#include <linux/slab.h> 67#include <linux/slab.h>
diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c
index 212276affa1f..fc54d234507a 100644
--- a/drivers/char/keyboard.c
+++ b/drivers/char/keyboard.c
@@ -42,6 +42,7 @@
42#include <linux/sysrq.h> 42#include <linux/sysrq.h>
43#include <linux/input.h> 43#include <linux/input.h>
44#include <linux/reboot.h> 44#include <linux/reboot.h>
45#include <linux/notifier.h>
45 46
46extern void ctrl_alt_del(void); 47extern void ctrl_alt_del(void);
47 48
@@ -81,7 +82,8 @@ void compute_shiftstate(void);
81typedef void (k_handler_fn)(struct vc_data *vc, unsigned char value, 82typedef void (k_handler_fn)(struct vc_data *vc, unsigned char value,
82 char up_flag); 83 char up_flag);
83static k_handler_fn K_HANDLERS; 84static k_handler_fn K_HANDLERS;
84static k_handler_fn *k_handler[16] = { K_HANDLERS }; 85k_handler_fn *k_handler[16] = { K_HANDLERS };
86EXPORT_SYMBOL_GPL(k_handler);
85 87
86#define FN_HANDLERS\ 88#define FN_HANDLERS\
87 fn_null, fn_enter, fn_show_ptregs, fn_show_mem,\ 89 fn_null, fn_enter, fn_show_ptregs, fn_show_mem,\
@@ -127,7 +129,7 @@ int shift_state = 0;
127 */ 129 */
128 130
129static struct input_handler kbd_handler; 131static struct input_handler kbd_handler;
130static unsigned long key_down[NBITS(KEY_MAX)]; /* keyboard key bitmap */ 132static unsigned long key_down[BITS_TO_LONGS(KEY_CNT)]; /* keyboard key bitmap */
131static unsigned char shift_down[NR_SHIFT]; /* shift state counters.. */ 133static unsigned char shift_down[NR_SHIFT]; /* shift state counters.. */
132static int dead_key_next; 134static int dead_key_next;
133static int npadch = -1; /* -1 or number assembled on pad */ 135static int npadch = -1; /* -1 or number assembled on pad */
@@ -160,6 +162,23 @@ static int sysrq_alt_use;
160static int sysrq_alt; 162static int sysrq_alt;
161 163
162/* 164/*
165 * Notifier list for console keyboard events
166 */
167static ATOMIC_NOTIFIER_HEAD(keyboard_notifier_list);
168
169int register_keyboard_notifier(struct notifier_block *nb)
170{
171 return atomic_notifier_chain_register(&keyboard_notifier_list, nb);
172}
173EXPORT_SYMBOL_GPL(register_keyboard_notifier);
174
175int unregister_keyboard_notifier(struct notifier_block *nb)
176{
177 return atomic_notifier_chain_unregister(&keyboard_notifier_list, nb);
178}
179EXPORT_SYMBOL_GPL(unregister_keyboard_notifier);
180
181/*
163 * Translation of scancodes to keycodes. We set them on only the first 182 * Translation of scancodes to keycodes. We set them on only the first
164 * keyboard in the list that accepts the scancode and keycode. 183 * keyboard in the list that accepts the scancode and keycode.
165 * Explanation for not choosing the first attached keyboard anymore: 184 * Explanation for not choosing the first attached keyboard anymore:
@@ -1130,6 +1149,7 @@ static void kbd_keycode(unsigned int keycode, int down, int hw_raw)
1130 unsigned char type, raw_mode; 1149 unsigned char type, raw_mode;
1131 struct tty_struct *tty; 1150 struct tty_struct *tty;
1132 int shift_final; 1151 int shift_final;
1152 struct keyboard_notifier_param param = { .vc = vc, .value = keycode, .down = down };
1133 1153
1134 tty = vc->vc_tty; 1154 tty = vc->vc_tty;
1135 1155
@@ -1217,10 +1237,11 @@ static void kbd_keycode(unsigned int keycode, int down, int hw_raw)
1217 return; 1237 return;
1218 } 1238 }
1219 1239
1220 shift_final = (shift_state | kbd->slockstate) ^ kbd->lockstate; 1240 param.shift = shift_final = (shift_state | kbd->slockstate) ^ kbd->lockstate;
1221 key_map = key_maps[shift_final]; 1241 key_map = key_maps[shift_final];
1222 1242
1223 if (!key_map) { 1243 if (atomic_notifier_call_chain(&keyboard_notifier_list, KBD_KEYCODE, &param) == NOTIFY_STOP || !key_map) {
1244 atomic_notifier_call_chain(&keyboard_notifier_list, KBD_UNBOUND_KEYCODE, &param);
1224 compute_shiftstate(); 1245 compute_shiftstate();
1225 kbd->slockstate = 0; 1246 kbd->slockstate = 0;
1226 return; 1247 return;
@@ -1237,6 +1258,9 @@ static void kbd_keycode(unsigned int keycode, int down, int hw_raw)
1237 type = KTYP(keysym); 1258 type = KTYP(keysym);
1238 1259
1239 if (type < 0xf0) { 1260 if (type < 0xf0) {
1261 param.value = keysym;
1262 if (atomic_notifier_call_chain(&keyboard_notifier_list, KBD_UNICODE, &param) == NOTIFY_STOP)
1263 return;
1240 if (down && !raw_mode) 1264 if (down && !raw_mode)
1241 to_utf8(vc, keysym); 1265 to_utf8(vc, keysym);
1242 return; 1266 return;
@@ -1244,9 +1268,6 @@ static void kbd_keycode(unsigned int keycode, int down, int hw_raw)
1244 1268
1245 type -= 0xf0; 1269 type -= 0xf0;
1246 1270
1247 if (raw_mode && type != KT_SPEC && type != KT_SHIFT)
1248 return;
1249
1250 if (type == KT_LETTER) { 1271 if (type == KT_LETTER) {
1251 type = KT_LATIN; 1272 type = KT_LATIN;
1252 if (vc_kbd_led(kbd, VC_CAPSLOCK)) { 1273 if (vc_kbd_led(kbd, VC_CAPSLOCK)) {
@@ -1255,9 +1276,18 @@ static void kbd_keycode(unsigned int keycode, int down, int hw_raw)
1255 keysym = key_map[keycode]; 1276 keysym = key_map[keycode];
1256 } 1277 }
1257 } 1278 }
1279 param.value = keysym;
1280
1281 if (atomic_notifier_call_chain(&keyboard_notifier_list, KBD_KEYSYM, &param) == NOTIFY_STOP)
1282 return;
1283
1284 if (raw_mode && type != KT_SPEC && type != KT_SHIFT)
1285 return;
1258 1286
1259 (*k_handler[type])(vc, keysym & 0xff, !down); 1287 (*k_handler[type])(vc, keysym & 0xff, !down);
1260 1288
1289 atomic_notifier_call_chain(&keyboard_notifier_list, KBD_POST_KEYSYM, &param);
1290
1261 if (type != KT_SLOCK) 1291 if (type != KT_SLOCK)
1262 kbd->slockstate = 0; 1292 kbd->slockstate = 0;
1263} 1293}
@@ -1347,12 +1377,12 @@ static void kbd_start(struct input_handle *handle)
1347static const struct input_device_id kbd_ids[] = { 1377static const struct input_device_id kbd_ids[] = {
1348 { 1378 {
1349 .flags = INPUT_DEVICE_ID_MATCH_EVBIT, 1379 .flags = INPUT_DEVICE_ID_MATCH_EVBIT,
1350 .evbit = { BIT(EV_KEY) }, 1380 .evbit = { BIT_MASK(EV_KEY) },
1351 }, 1381 },
1352 1382
1353 { 1383 {
1354 .flags = INPUT_DEVICE_ID_MATCH_EVBIT, 1384 .flags = INPUT_DEVICE_ID_MATCH_EVBIT,
1355 .evbit = { BIT(EV_SND) }, 1385 .evbit = { BIT_MASK(EV_SND) },
1356 }, 1386 },
1357 1387
1358 { }, /* Terminating entry */ 1388 { }, /* Terminating entry */
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index 661aca0e155d..fd0abef7ee08 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -56,11 +56,11 @@
56#include <linux/mm.h> 56#include <linux/mm.h>
57#include <linux/delay.h> 57#include <linux/delay.h>
58#include <linux/pci.h> 58#include <linux/pci.h>
59#include <linux/bitops.h>
59 60
60#include <asm/system.h> 61#include <asm/system.h>
61#include <asm/io.h> 62#include <asm/io.h>
62#include <asm/irq.h> 63#include <asm/irq.h>
63#include <asm/bitops.h>
64#include <asm/uaccess.h> 64#include <asm/uaccess.h>
65 65
66#include "mxser.h" 66#include "mxser.h"
diff --git a/drivers/char/mxser_new.c b/drivers/char/mxser_new.c
index 854dbf59eb68..081c84c7b548 100644
--- a/drivers/char/mxser_new.c
+++ b/drivers/char/mxser_new.c
@@ -39,11 +39,11 @@
39#include <linux/mm.h> 39#include <linux/mm.h>
40#include <linux/delay.h> 40#include <linux/delay.h>
41#include <linux/pci.h> 41#include <linux/pci.h>
42#include <linux/bitops.h>
42 43
43#include <asm/system.h> 44#include <asm/system.h>
44#include <asm/io.h> 45#include <asm/io.h>
45#include <asm/irq.h> 46#include <asm/irq.h>
46#include <asm/bitops.h>
47#include <asm/uaccess.h> 47#include <asm/uaccess.h>
48 48
49#include "mxser_new.h" 49#include "mxser_new.h"
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index 859858561ab6..9782cb4d30dc 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -1178,9 +1178,9 @@ static int __devinit sonypi_create_input_devices(void)
1178 jog_dev->id.bustype = BUS_ISA; 1178 jog_dev->id.bustype = BUS_ISA;
1179 jog_dev->id.vendor = PCI_VENDOR_ID_SONY; 1179 jog_dev->id.vendor = PCI_VENDOR_ID_SONY;
1180 1180
1181 jog_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL); 1181 jog_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
1182 jog_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_MIDDLE); 1182 jog_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_MIDDLE);
1183 jog_dev->relbit[0] = BIT(REL_WHEEL); 1183 jog_dev->relbit[0] = BIT_MASK(REL_WHEEL);
1184 1184
1185 sonypi_device.input_key_dev = key_dev = input_allocate_device(); 1185 sonypi_device.input_key_dev = key_dev = input_allocate_device();
1186 if (!key_dev) { 1186 if (!key_dev) {
@@ -1193,7 +1193,7 @@ static int __devinit sonypi_create_input_devices(void)
1193 key_dev->id.vendor = PCI_VENDOR_ID_SONY; 1193 key_dev->id.vendor = PCI_VENDOR_ID_SONY;
1194 1194
1195 /* Initialize the Input Drivers: special keys */ 1195 /* Initialize the Input Drivers: special keys */
1196 key_dev->evbit[0] = BIT(EV_KEY); 1196 key_dev->evbit[0] = BIT_MASK(EV_KEY);
1197 for (i = 0; sonypi_inputkeys[i].sonypiev; i++) 1197 for (i = 0; sonypi_inputkeys[i].sonypiev; i++)
1198 if (sonypi_inputkeys[i].inputev) 1198 if (sonypi_inputkeys[i].inputev)
1199 set_bit(sonypi_inputkeys[i].inputev, key_dev->keybit); 1199 set_bit(sonypi_inputkeys[i].inputev, key_dev->keybit);
diff --git a/drivers/char/sx.c b/drivers/char/sx.c
index 85a23283dff5..a6e1c9ba1217 100644
--- a/drivers/char/sx.c
+++ b/drivers/char/sx.c
@@ -1467,7 +1467,7 @@ static int sx_open(struct tty_struct *tty, struct file *filp)
1467 1467
1468 line = tty->index; 1468 line = tty->index;
1469 sx_dprintk(SX_DEBUG_OPEN, "%d: opening line %d. tty=%p ctty=%p, " 1469 sx_dprintk(SX_DEBUG_OPEN, "%d: opening line %d. tty=%p ctty=%p, "
1470 "np=%d)\n", current->pid, line, tty, 1470 "np=%d)\n", task_pid_nr(current), line, tty,
1471 current->signal->tty, sx_nports); 1471 current->signal->tty, sx_nports);
1472 1472
1473 if ((line < 0) || (line >= SX_NPORTS) || (line >= sx_nports)) 1473 if ((line < 0) || (line >= SX_NPORTS) || (line >= sx_nports))
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 78d14935f2b8..de60e1ea4fb3 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -251,7 +251,7 @@ static void send_sig_all(int sig)
251 struct task_struct *p; 251 struct task_struct *p;
252 252
253 for_each_process(p) { 253 for_each_process(p) {
254 if (p->mm && !is_init(p)) 254 if (p->mm && !is_global_init(p))
255 /* Not swapper, init nor kernel thread */ 255 /* Not swapper, init nor kernel thread */
256 force_sig(sig, p); 256 force_sig(sig, p);
257 } 257 }
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 9c867cf6de64..13a53575a016 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -103,6 +103,7 @@
103#include <linux/selection.h> 103#include <linux/selection.h>
104 104
105#include <linux/kmod.h> 105#include <linux/kmod.h>
106#include <linux/nsproxy.h>
106 107
107#undef TTY_DEBUG_HANGUP 108#undef TTY_DEBUG_HANGUP
108 109
@@ -3107,7 +3108,7 @@ static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t
3107 */ 3108 */
3108 if (tty == real_tty && current->signal->tty != real_tty) 3109 if (tty == real_tty && current->signal->tty != real_tty)
3109 return -ENOTTY; 3110 return -ENOTTY;
3110 return put_user(pid_nr(real_tty->pgrp), p); 3111 return put_user(pid_vnr(real_tty->pgrp), p);
3111} 3112}
3112 3113
3113/** 3114/**
@@ -3141,7 +3142,7 @@ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t
3141 if (pgrp_nr < 0) 3142 if (pgrp_nr < 0)
3142 return -EINVAL; 3143 return -EINVAL;
3143 rcu_read_lock(); 3144 rcu_read_lock();
3144 pgrp = find_pid(pgrp_nr); 3145 pgrp = find_vpid(pgrp_nr);
3145 retval = -ESRCH; 3146 retval = -ESRCH;
3146 if (!pgrp) 3147 if (!pgrp)
3147 goto out_unlock; 3148 goto out_unlock;
@@ -3178,7 +3179,7 @@ static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t _
3178 return -ENOTTY; 3179 return -ENOTTY;
3179 if (!real_tty->session) 3180 if (!real_tty->session)
3180 return -ENOTTY; 3181 return -ENOTTY;
3181 return put_user(pid_nr(real_tty->session), p); 3182 return put_user(pid_vnr(real_tty->session), p);
3182} 3183}
3183 3184
3184/** 3185/**
@@ -3528,8 +3529,8 @@ void __do_SAK(struct tty_struct *tty)
3528 /* Kill the entire session */ 3529 /* Kill the entire session */
3529 do_each_pid_task(session, PIDTYPE_SID, p) { 3530 do_each_pid_task(session, PIDTYPE_SID, p) {
3530 printk(KERN_NOTICE "SAK: killed process %d" 3531 printk(KERN_NOTICE "SAK: killed process %d"
3531 " (%s): process_session(p)==tty->session\n", 3532 " (%s): task_session_nr(p)==tty->session\n",
3532 p->pid, p->comm); 3533 task_pid_nr(p), p->comm);
3533 send_sig(SIGKILL, p, 1); 3534 send_sig(SIGKILL, p, 1);
3534 } while_each_pid_task(session, PIDTYPE_SID, p); 3535 } while_each_pid_task(session, PIDTYPE_SID, p);
3535 /* Now kill any processes that happen to have the 3536 /* Now kill any processes that happen to have the
@@ -3538,8 +3539,8 @@ void __do_SAK(struct tty_struct *tty)
3538 do_each_thread(g, p) { 3539 do_each_thread(g, p) {
3539 if (p->signal->tty == tty) { 3540 if (p->signal->tty == tty) {
3540 printk(KERN_NOTICE "SAK: killed process %d" 3541 printk(KERN_NOTICE "SAK: killed process %d"
3541 " (%s): process_session(p)==tty->session\n", 3542 " (%s): task_session_nr(p)==tty->session\n",
3542 p->pid, p->comm); 3543 task_pid_nr(p), p->comm);
3543 send_sig(SIGKILL, p, 1); 3544 send_sig(SIGKILL, p, 1);
3544 continue; 3545 continue;
3545 } 3546 }
@@ -3559,7 +3560,7 @@ void __do_SAK(struct tty_struct *tty)
3559 filp->private_data == tty) { 3560 filp->private_data == tty) {
3560 printk(KERN_NOTICE "SAK: killed process %d" 3561 printk(KERN_NOTICE "SAK: killed process %d"
3561 " (%s): fd#%d opened to the tty\n", 3562 " (%s): fd#%d opened to the tty\n",
3562 p->pid, p->comm, i); 3563 task_pid_nr(p), p->comm, i);
3563 force_sig(SIGKILL, p); 3564 force_sig(SIGKILL, p);
3564 break; 3565 break;
3565 } 3566 }
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 1764c67b585f..7a5badfb7d84 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -99,6 +99,7 @@
99#include <linux/pm.h> 99#include <linux/pm.h>
100#include <linux/font.h> 100#include <linux/font.h>
101#include <linux/bitops.h> 101#include <linux/bitops.h>
102#include <linux/notifier.h>
102 103
103#include <asm/io.h> 104#include <asm/io.h>
104#include <asm/system.h> 105#include <asm/system.h>
@@ -223,6 +224,35 @@ enum {
223}; 224};
224 225
225/* 226/*
227 * Notifier list for console events.
228 */
229static ATOMIC_NOTIFIER_HEAD(vt_notifier_list);
230
231int register_vt_notifier(struct notifier_block *nb)
232{
233 return atomic_notifier_chain_register(&vt_notifier_list, nb);
234}
235EXPORT_SYMBOL_GPL(register_vt_notifier);
236
237int unregister_vt_notifier(struct notifier_block *nb)
238{
239 return atomic_notifier_chain_unregister(&vt_notifier_list, nb);
240}
241EXPORT_SYMBOL_GPL(unregister_vt_notifier);
242
243static void notify_write(struct vc_data *vc, unsigned int unicode)
244{
245 struct vt_notifier_param param = { .vc = vc, unicode = unicode };
246 atomic_notifier_call_chain(&vt_notifier_list, VT_WRITE, &param);
247}
248
249static void notify_update(struct vc_data *vc)
250{
251 struct vt_notifier_param param = { .vc = vc };
252 atomic_notifier_call_chain(&vt_notifier_list, VT_UPDATE, &param);
253}
254
255/*
226 * Low-Level Functions 256 * Low-Level Functions
227 */ 257 */
228 258
@@ -718,6 +748,7 @@ int vc_allocate(unsigned int currcons) /* return 0 on success */
718 return -ENXIO; 748 return -ENXIO;
719 if (!vc_cons[currcons].d) { 749 if (!vc_cons[currcons].d) {
720 struct vc_data *vc; 750 struct vc_data *vc;
751 struct vt_notifier_param param;
721 752
722 /* prevent users from taking too much memory */ 753 /* prevent users from taking too much memory */
723 if (currcons >= MAX_NR_USER_CONSOLES && !capable(CAP_SYS_RESOURCE)) 754 if (currcons >= MAX_NR_USER_CONSOLES && !capable(CAP_SYS_RESOURCE))
@@ -729,7 +760,7 @@ int vc_allocate(unsigned int currcons) /* return 0 on success */
729 /* although the numbers above are not valid since long ago, the 760 /* although the numbers above are not valid since long ago, the
730 point is still up-to-date and the comment still has its value 761 point is still up-to-date and the comment still has its value
731 even if only as a historical artifact. --mj, July 1998 */ 762 even if only as a historical artifact. --mj, July 1998 */
732 vc = kzalloc(sizeof(struct vc_data), GFP_KERNEL); 763 param.vc = vc = kzalloc(sizeof(struct vc_data), GFP_KERNEL);
733 if (!vc) 764 if (!vc)
734 return -ENOMEM; 765 return -ENOMEM;
735 vc_cons[currcons].d = vc; 766 vc_cons[currcons].d = vc;
@@ -746,6 +777,7 @@ int vc_allocate(unsigned int currcons) /* return 0 on success */
746 } 777 }
747 vc->vc_kmalloced = 1; 778 vc->vc_kmalloced = 1;
748 vc_init(vc, vc->vc_rows, vc->vc_cols, 1); 779 vc_init(vc, vc->vc_rows, vc->vc_cols, 1);
780 atomic_notifier_call_chain(&vt_notifier_list, VT_ALLOCATE, &param);
749 } 781 }
750 return 0; 782 return 0;
751} 783}
@@ -907,6 +939,8 @@ void vc_deallocate(unsigned int currcons)
907 939
908 if (vc_cons_allocated(currcons)) { 940 if (vc_cons_allocated(currcons)) {
909 struct vc_data *vc = vc_cons[currcons].d; 941 struct vc_data *vc = vc_cons[currcons].d;
942 struct vt_notifier_param param = { .vc = vc };
943 atomic_notifier_call_chain(&vt_notifier_list, VT_DEALLOCATE, &param);
910 vc->vc_sw->con_deinit(vc); 944 vc->vc_sw->con_deinit(vc);
911 put_pid(vc->vt_pid); 945 put_pid(vc->vt_pid);
912 module_put(vc->vc_sw->owner); 946 module_put(vc->vc_sw->owner);
@@ -1019,6 +1053,7 @@ static void lf(struct vc_data *vc)
1019 vc->vc_pos += vc->vc_size_row; 1053 vc->vc_pos += vc->vc_size_row;
1020 } 1054 }
1021 vc->vc_need_wrap = 0; 1055 vc->vc_need_wrap = 0;
1056 notify_write(vc, '\n');
1022} 1057}
1023 1058
1024static void ri(struct vc_data *vc) 1059static void ri(struct vc_data *vc)
@@ -1039,6 +1074,7 @@ static inline void cr(struct vc_data *vc)
1039{ 1074{
1040 vc->vc_pos -= vc->vc_x << 1; 1075 vc->vc_pos -= vc->vc_x << 1;
1041 vc->vc_need_wrap = vc->vc_x = 0; 1076 vc->vc_need_wrap = vc->vc_x = 0;
1077 notify_write(vc, '\r');
1042} 1078}
1043 1079
1044static inline void bs(struct vc_data *vc) 1080static inline void bs(struct vc_data *vc)
@@ -1047,6 +1083,7 @@ static inline void bs(struct vc_data *vc)
1047 vc->vc_pos -= 2; 1083 vc->vc_pos -= 2;
1048 vc->vc_x--; 1084 vc->vc_x--;
1049 vc->vc_need_wrap = 0; 1085 vc->vc_need_wrap = 0;
1086 notify_write(vc, '\b');
1050 } 1087 }
1051} 1088}
1052 1089
@@ -1593,6 +1630,7 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c)
1593 break; 1630 break;
1594 } 1631 }
1595 vc->vc_pos += (vc->vc_x << 1); 1632 vc->vc_pos += (vc->vc_x << 1);
1633 notify_write(vc, '\t');
1596 return; 1634 return;
1597 case 10: case 11: case 12: 1635 case 10: case 11: case 12:
1598 lf(vc); 1636 lf(vc);
@@ -2252,6 +2290,7 @@ rescan_last_byte:
2252 tc = conv_uni_to_pc(vc, ' '); /* A space is printed in the second column */ 2290 tc = conv_uni_to_pc(vc, ' '); /* A space is printed in the second column */
2253 if (tc < 0) tc = ' '; 2291 if (tc < 0) tc = ' ';
2254 } 2292 }
2293 notify_write(vc, c);
2255 2294
2256 if (inverse) { 2295 if (inverse) {
2257 FLUSH 2296 FLUSH
@@ -2274,6 +2313,7 @@ rescan_last_byte:
2274 release_console_sem(); 2313 release_console_sem();
2275 2314
2276out: 2315out:
2316 notify_update(vc);
2277 return n; 2317 return n;
2278#undef FLUSH 2318#undef FLUSH
2279} 2319}
@@ -2317,6 +2357,7 @@ static void console_callback(struct work_struct *ignored)
2317 do_blank_screen(0); 2357 do_blank_screen(0);
2318 blank_timer_expired = 0; 2358 blank_timer_expired = 0;
2319 } 2359 }
2360 notify_update(vc_cons[fg_console].d);
2320 2361
2321 release_console_sem(); 2362 release_console_sem();
2322} 2363}
@@ -2418,6 +2459,7 @@ static void vt_console_print(struct console *co, const char *b, unsigned count)
2418 continue; 2459 continue;
2419 } 2460 }
2420 scr_writew((vc->vc_attr << 8) + c, (unsigned short *)vc->vc_pos); 2461 scr_writew((vc->vc_attr << 8) + c, (unsigned short *)vc->vc_pos);
2462 notify_write(vc, c);
2421 cnt++; 2463 cnt++;
2422 if (myx == vc->vc_cols - 1) { 2464 if (myx == vc->vc_cols - 1) {
2423 vc->vc_need_wrap = 1; 2465 vc->vc_need_wrap = 1;
@@ -2436,6 +2478,7 @@ static void vt_console_print(struct console *co, const char *b, unsigned count)
2436 } 2478 }
2437 } 2479 }
2438 set_cursor(vc); 2480 set_cursor(vc);
2481 notify_update(vc);
2439 2482
2440quit: 2483quit:
2441 clear_bit(0, &printing); 2484 clear_bit(0, &printing);
diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index e80af67664cc..2d23e304f5ec 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -94,8 +94,6 @@ extern int edac_debug_level;
94 94
95#endif /* !CONFIG_EDAC_DEBUG */ 95#endif /* !CONFIG_EDAC_DEBUG */
96 96
97#define BIT(x) (1 << (x))
98
99#define PCI_VEND_DEV(vend, dev) PCI_VENDOR_ID_ ## vend, \ 97#define PCI_VEND_DEV(vend, dev) PCI_VENDOR_ID_ ## vend, \
100 PCI_DEVICE_ID_ ## vend ## _ ## dev 98 PCI_DEVICE_ID_ ## vend ## _ ## dev
101 99
diff --git a/drivers/edac/pasemi_edac.c b/drivers/edac/pasemi_edac.c
index e66cdd42a392..9007d0677220 100644
--- a/drivers/edac/pasemi_edac.c
+++ b/drivers/edac/pasemi_edac.c
@@ -270,6 +270,7 @@ static void __devexit pasemi_edac_remove(struct pci_dev *pdev)
270 270
271static const struct pci_device_id pasemi_edac_pci_tbl[] = { 271static const struct pci_device_id pasemi_edac_pci_tbl[] = {
272 { PCI_DEVICE(PCI_VENDOR_ID_PASEMI, 0xa00a) }, 272 { PCI_DEVICE(PCI_VENDOR_ID_PASEMI, 0xa00a) },
273 { }
273}; 274};
274 275
275MODULE_DEVICE_TABLE(pci, pasemi_edac_pci_tbl); 276MODULE_DEVICE_TABLE(pci, pasemi_edac_pci_tbl);
diff --git a/drivers/firmware/dcdbas.h b/drivers/firmware/dcdbas.h
index dcdba0f1b32c..87bc3417de27 100644
--- a/drivers/firmware/dcdbas.h
+++ b/drivers/firmware/dcdbas.h
@@ -17,7 +17,6 @@
17#define _DCDBAS_H_ 17#define _DCDBAS_H_
18 18
19#include <linux/device.h> 19#include <linux/device.h>
20#include <linux/input.h>
21#include <linux/sysfs.h> 20#include <linux/sysfs.h>
22#include <linux/types.h> 21#include <linux/types.h>
23 22
diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index a702e2f6da7d..1ca6f4635eeb 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -113,13 +113,13 @@ static ssize_t hidraw_write(struct file *file, const char __user *buffer, size_t
113 113
114 if (count > HID_MIN_BUFFER_SIZE) { 114 if (count > HID_MIN_BUFFER_SIZE) {
115 printk(KERN_WARNING "hidraw: pid %d passed too large report\n", 115 printk(KERN_WARNING "hidraw: pid %d passed too large report\n",
116 current->pid); 116 task_pid_nr(current));
117 return -EINVAL; 117 return -EINVAL;
118 } 118 }
119 119
120 if (count < 2) { 120 if (count < 2) {
121 printk(KERN_WARNING "hidraw: pid %d passed too short report\n", 121 printk(KERN_WARNING "hidraw: pid %d passed too short report\n",
122 current->pid); 122 task_pid_nr(current));
123 return -EINVAL; 123 return -EINVAL;
124 } 124 }
125 125
diff --git a/drivers/hid/usbhid/usbkbd.c b/drivers/hid/usbhid/usbkbd.c
index b76b02f7b52d..775a1ef28a29 100644
--- a/drivers/hid/usbhid/usbkbd.c
+++ b/drivers/hid/usbhid/usbkbd.c
@@ -274,8 +274,11 @@ static int usb_kbd_probe(struct usb_interface *iface,
274 274
275 input_set_drvdata(input_dev, kbd); 275 input_set_drvdata(input_dev, kbd);
276 276
277 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_LED) | BIT(EV_REP); 277 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_LED) |
278 input_dev->ledbit[0] = BIT(LED_NUML) | BIT(LED_CAPSL) | BIT(LED_SCROLLL) | BIT(LED_COMPOSE) | BIT(LED_KANA); 278 BIT_MASK(EV_REP);
279 input_dev->ledbit[0] = BIT_MASK(LED_NUML) | BIT_MASK(LED_CAPSL) |
280 BIT_MASK(LED_SCROLLL) | BIT_MASK(LED_COMPOSE) |
281 BIT_MASK(LED_KANA);
279 282
280 for (i = 0; i < 255; i++) 283 for (i = 0; i < 255; i++)
281 set_bit(usb_kbd_keycode[i], input_dev->keybit); 284 set_bit(usb_kbd_keycode[i], input_dev->keybit);
diff --git a/drivers/hid/usbhid/usbmouse.c b/drivers/hid/usbhid/usbmouse.c
index 5345c73bcf62..f8ad6910d3d9 100644
--- a/drivers/hid/usbhid/usbmouse.c
+++ b/drivers/hid/usbhid/usbmouse.c
@@ -173,11 +173,13 @@ static int usb_mouse_probe(struct usb_interface *intf, const struct usb_device_i
173 usb_to_input_id(dev, &input_dev->id); 173 usb_to_input_id(dev, &input_dev->id);
174 input_dev->dev.parent = &intf->dev; 174 input_dev->dev.parent = &intf->dev;
175 175
176 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL); 176 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
177 input_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_RIGHT) | BIT(BTN_MIDDLE); 177 input_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
178 input_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y); 178 BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
179 input_dev->keybit[LONG(BTN_MOUSE)] |= BIT(BTN_SIDE) | BIT(BTN_EXTRA); 179 input_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
180 input_dev->relbit[0] |= BIT(REL_WHEEL); 180 input_dev->keybit[BIT_WORD(BTN_MOUSE)] |= BIT_MASK(BTN_SIDE) |
181 BIT_MASK(BTN_EXTRA);
182 input_dev->relbit[0] |= BIT_MASK(REL_WHEEL);
181 183
182 input_set_drvdata(input_dev, mouse); 184 input_set_drvdata(input_dev, mouse);
183 185
diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
index 4879125b4cdc..1001d2e122a2 100644
--- a/drivers/hwmon/applesmc.c
+++ b/drivers/hwmon/applesmc.c
@@ -1099,7 +1099,7 @@ static int applesmc_create_accelerometer(void)
1099 idev->name = "applesmc"; 1099 idev->name = "applesmc";
1100 idev->id.bustype = BUS_HOST; 1100 idev->id.bustype = BUS_HOST;
1101 idev->dev.parent = &pdev->dev; 1101 idev->dev.parent = &pdev->dev;
1102 idev->evbit[0] = BIT(EV_ABS); 1102 idev->evbit[0] = BIT_MASK(EV_ABS);
1103 input_set_abs_params(idev, ABS_X, 1103 input_set_abs_params(idev, ABS_X,
1104 -256, 256, APPLESMC_INPUT_FUZZ, APPLESMC_INPUT_FLAT); 1104 -256, 256, APPLESMC_INPUT_FUZZ, APPLESMC_INPUT_FLAT);
1105 input_set_abs_params(idev, ABS_Y, 1105 input_set_abs_params(idev, ABS_Y,
diff --git a/drivers/hwmon/hdaps.c b/drivers/hwmon/hdaps.c
index 8a7ae03aeee4..bab5fd2e4dfd 100644
--- a/drivers/hwmon/hdaps.c
+++ b/drivers/hwmon/hdaps.c
@@ -574,7 +574,7 @@ static int __init hdaps_init(void)
574 idev = hdaps_idev->input; 574 idev = hdaps_idev->input;
575 idev->name = "hdaps"; 575 idev->name = "hdaps";
576 idev->dev.parent = &pdev->dev; 576 idev->dev.parent = &pdev->dev;
577 idev->evbit[0] = BIT(EV_ABS); 577 idev->evbit[0] = BIT_MASK(EV_ABS);
578 input_set_abs_params(idev, ABS_X, 578 input_set_abs_params(idev, ABS_X,
579 -256, 256, HDAPS_INPUT_FUZZ, HDAPS_INPUT_FLAT); 579 -256, 256, HDAPS_INPUT_FUZZ, HDAPS_INPUT_FLAT);
580 input_set_abs_params(idev, ABS_Y, 580 input_set_abs_params(idev, ABS_Y,
diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index 00fad11733ad..6426a61f8d4d 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -85,7 +85,7 @@ struct bits {
85 const char *set; 85 const char *set;
86 const char *unset; 86 const char *unset;
87}; 87};
88#define BIT(m, s, u) { .mask = m, .set = s, .unset = u } 88#define PXA_BIT(m, s, u) { .mask = m, .set = s, .unset = u }
89 89
90static inline void 90static inline void
91decode_bits(const char *prefix, const struct bits *bits, int num, u32 val) 91decode_bits(const char *prefix, const struct bits *bits, int num, u32 val)
@@ -100,17 +100,17 @@ decode_bits(const char *prefix, const struct bits *bits, int num, u32 val)
100} 100}
101 101
102static const struct bits isr_bits[] = { 102static const struct bits isr_bits[] = {
103 BIT(ISR_RWM, "RX", "TX"), 103 PXA_BIT(ISR_RWM, "RX", "TX"),
104 BIT(ISR_ACKNAK, "NAK", "ACK"), 104 PXA_BIT(ISR_ACKNAK, "NAK", "ACK"),
105 BIT(ISR_UB, "Bsy", "Rdy"), 105 PXA_BIT(ISR_UB, "Bsy", "Rdy"),
106 BIT(ISR_IBB, "BusBsy", "BusRdy"), 106 PXA_BIT(ISR_IBB, "BusBsy", "BusRdy"),
107 BIT(ISR_SSD, "SlaveStop", NULL), 107 PXA_BIT(ISR_SSD, "SlaveStop", NULL),
108 BIT(ISR_ALD, "ALD", NULL), 108 PXA_BIT(ISR_ALD, "ALD", NULL),
109 BIT(ISR_ITE, "TxEmpty", NULL), 109 PXA_BIT(ISR_ITE, "TxEmpty", NULL),
110 BIT(ISR_IRF, "RxFull", NULL), 110 PXA_BIT(ISR_IRF, "RxFull", NULL),
111 BIT(ISR_GCAD, "GenCall", NULL), 111 PXA_BIT(ISR_GCAD, "GenCall", NULL),
112 BIT(ISR_SAD, "SlaveAddr", NULL), 112 PXA_BIT(ISR_SAD, "SlaveAddr", NULL),
113 BIT(ISR_BED, "BusErr", NULL), 113 PXA_BIT(ISR_BED, "BusErr", NULL),
114}; 114};
115 115
116static void decode_ISR(unsigned int val) 116static void decode_ISR(unsigned int val)
@@ -120,21 +120,21 @@ static void decode_ISR(unsigned int val)
120} 120}
121 121
122static const struct bits icr_bits[] = { 122static const struct bits icr_bits[] = {
123 BIT(ICR_START, "START", NULL), 123 PXA_BIT(ICR_START, "START", NULL),
124 BIT(ICR_STOP, "STOP", NULL), 124 PXA_BIT(ICR_STOP, "STOP", NULL),
125 BIT(ICR_ACKNAK, "ACKNAK", NULL), 125 PXA_BIT(ICR_ACKNAK, "ACKNAK", NULL),
126 BIT(ICR_TB, "TB", NULL), 126 PXA_BIT(ICR_TB, "TB", NULL),
127 BIT(ICR_MA, "MA", NULL), 127 PXA_BIT(ICR_MA, "MA", NULL),
128 BIT(ICR_SCLE, "SCLE", "scle"), 128 PXA_BIT(ICR_SCLE, "SCLE", "scle"),
129 BIT(ICR_IUE, "IUE", "iue"), 129 PXA_BIT(ICR_IUE, "IUE", "iue"),
130 BIT(ICR_GCD, "GCD", NULL), 130 PXA_BIT(ICR_GCD, "GCD", NULL),
131 BIT(ICR_ITEIE, "ITEIE", NULL), 131 PXA_BIT(ICR_ITEIE, "ITEIE", NULL),
132 BIT(ICR_IRFIE, "IRFIE", NULL), 132 PXA_BIT(ICR_IRFIE, "IRFIE", NULL),
133 BIT(ICR_BEIE, "BEIE", NULL), 133 PXA_BIT(ICR_BEIE, "BEIE", NULL),
134 BIT(ICR_SSDIE, "SSDIE", NULL), 134 PXA_BIT(ICR_SSDIE, "SSDIE", NULL),
135 BIT(ICR_ALDIE, "ALDIE", NULL), 135 PXA_BIT(ICR_ALDIE, "ALDIE", NULL),
136 BIT(ICR_SADIE, "SADIE", NULL), 136 PXA_BIT(ICR_SADIE, "SADIE", NULL),
137 BIT(ICR_UR, "UR", "ur"), 137 PXA_BIT(ICR_UR, "UR", "ur"),
138}; 138};
139 139
140static void decode_ICR(unsigned int val) 140static void decode_ICR(unsigned int val)
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 5c8b008676fb..32eaa3f80515 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -47,12 +47,12 @@
47#include <linux/device.h> 47#include <linux/device.h>
48#include <linux/kmod.h> 48#include <linux/kmod.h>
49#include <linux/scatterlist.h> 49#include <linux/scatterlist.h>
50#include <linux/bitops.h>
50 51
51#include <asm/byteorder.h> 52#include <asm/byteorder.h>
52#include <asm/irq.h> 53#include <asm/irq.h>
53#include <asm/uaccess.h> 54#include <asm/uaccess.h>
54#include <asm/io.h> 55#include <asm/io.h>
55#include <asm/bitops.h>
56 56
57static int __ide_end_request(ide_drive_t *drive, struct request *rq, 57static int __ide_end_request(ide_drive_t *drive, struct request *rq,
58 int uptodate, unsigned int nr_bytes) 58 int uptodate, unsigned int nr_bytes)
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 93644f82592c..d08fb30768bc 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2797,11 +2797,12 @@ static void cma_remove_one(struct ib_device *device)
2797 2797
2798static int cma_init(void) 2798static int cma_init(void)
2799{ 2799{
2800 int ret, low, high; 2800 int ret, low, high, remaining;
2801 2801
2802 get_random_bytes(&next_port, sizeof next_port); 2802 get_random_bytes(&next_port, sizeof next_port);
2803 inet_get_local_port_range(&low, &high); 2803 inet_get_local_port_range(&low, &high);
2804 next_port = ((unsigned int) next_port % (high - low)) + low; 2804 remaining = (high - low) + 1;
2805 next_port = ((unsigned int) next_port % remaining) + low;
2805 2806
2806 cma_wq = create_singlethread_workqueue("rdma_cm"); 2807 cma_wq = create_singlethread_workqueue("rdma_cm");
2807 if (!cma_wq) 2808 if (!cma_wq)
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index 1d62c8b88e12..e5b4e9bfbdc5 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -495,7 +495,7 @@ static unsigned int evdev_poll(struct file *file, poll_table *wait)
495#ifdef CONFIG_COMPAT 495#ifdef CONFIG_COMPAT
496 496
497#define BITS_PER_LONG_COMPAT (sizeof(compat_long_t) * 8) 497#define BITS_PER_LONG_COMPAT (sizeof(compat_long_t) * 8)
498#define NBITS_COMPAT(x) ((((x) - 1) / BITS_PER_LONG_COMPAT) + 1) 498#define BITS_TO_LONGS_COMPAT(x) ((((x) - 1) / BITS_PER_LONG_COMPAT) + 1)
499 499
500#ifdef __BIG_ENDIAN 500#ifdef __BIG_ENDIAN
501static int bits_to_user(unsigned long *bits, unsigned int maxbit, 501static int bits_to_user(unsigned long *bits, unsigned int maxbit,
@@ -504,7 +504,7 @@ static int bits_to_user(unsigned long *bits, unsigned int maxbit,
504 int len, i; 504 int len, i;
505 505
506 if (compat) { 506 if (compat) {
507 len = NBITS_COMPAT(maxbit) * sizeof(compat_long_t); 507 len = BITS_TO_LONGS_COMPAT(maxbit) * sizeof(compat_long_t);
508 if (len > maxlen) 508 if (len > maxlen)
509 len = maxlen; 509 len = maxlen;
510 510
@@ -515,7 +515,7 @@ static int bits_to_user(unsigned long *bits, unsigned int maxbit,
515 sizeof(compat_long_t))) 515 sizeof(compat_long_t)))
516 return -EFAULT; 516 return -EFAULT;
517 } else { 517 } else {
518 len = NBITS(maxbit) * sizeof(long); 518 len = BITS_TO_LONGS(maxbit) * sizeof(long);
519 if (len > maxlen) 519 if (len > maxlen)
520 len = maxlen; 520 len = maxlen;
521 521
@@ -530,8 +530,8 @@ static int bits_to_user(unsigned long *bits, unsigned int maxbit,
530 unsigned int maxlen, void __user *p, int compat) 530 unsigned int maxlen, void __user *p, int compat)
531{ 531{
532 int len = compat ? 532 int len = compat ?
533 NBITS_COMPAT(maxbit) * sizeof(compat_long_t) : 533 BITS_TO_LONGS_COMPAT(maxbit) * sizeof(compat_long_t) :
534 NBITS(maxbit) * sizeof(long); 534 BITS_TO_LONGS(maxbit) * sizeof(long);
535 535
536 if (len > maxlen) 536 if (len > maxlen)
537 len = maxlen; 537 len = maxlen;
@@ -545,7 +545,7 @@ static int bits_to_user(unsigned long *bits, unsigned int maxbit,
545static int bits_to_user(unsigned long *bits, unsigned int maxbit, 545static int bits_to_user(unsigned long *bits, unsigned int maxbit,
546 unsigned int maxlen, void __user *p, int compat) 546 unsigned int maxlen, void __user *p, int compat)
547{ 547{
548 int len = NBITS(maxbit) * sizeof(long); 548 int len = BITS_TO_LONGS(maxbit) * sizeof(long);
549 549
550 if (len > maxlen) 550 if (len > maxlen)
551 len = maxlen; 551 len = maxlen;
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 2f2b020cd629..307c7b5c2b33 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -584,10 +584,10 @@ static int input_default_setkeycode(struct input_dev *dev,
584 584
585 585
586#define MATCH_BIT(bit, max) \ 586#define MATCH_BIT(bit, max) \
587 for (i = 0; i < NBITS(max); i++) \ 587 for (i = 0; i < BITS_TO_LONGS(max); i++) \
588 if ((id->bit[i] & dev->bit[i]) != id->bit[i]) \ 588 if ((id->bit[i] & dev->bit[i]) != id->bit[i]) \
589 break; \ 589 break; \
590 if (i != NBITS(max)) \ 590 if (i != BITS_TO_LONGS(max)) \
591 continue; 591 continue;
592 592
593static const struct input_device_id *input_match_device(const struct input_device_id *id, 593static const struct input_device_id *input_match_device(const struct input_device_id *id,
@@ -698,7 +698,7 @@ static void input_seq_print_bitmap(struct seq_file *seq, const char *name,
698{ 698{
699 int i; 699 int i;
700 700
701 for (i = NBITS(max) - 1; i > 0; i--) 701 for (i = BITS_TO_LONGS(max) - 1; i > 0; i--)
702 if (bitmap[i]) 702 if (bitmap[i])
703 break; 703 break;
704 704
@@ -892,7 +892,7 @@ static int input_print_modalias_bits(char *buf, int size,
892 892
893 len += snprintf(buf, max(size, 0), "%c", name); 893 len += snprintf(buf, max(size, 0), "%c", name);
894 for (i = min_bit; i < max_bit; i++) 894 for (i = min_bit; i < max_bit; i++)
895 if (bm[LONG(i)] & BIT(i)) 895 if (bm[BIT_WORD(i)] & BIT_MASK(i))
896 len += snprintf(buf + len, max(size - len, 0), "%X,", i); 896 len += snprintf(buf + len, max(size - len, 0), "%X,", i);
897 return len; 897 return len;
898} 898}
@@ -991,7 +991,7 @@ static int input_print_bitmap(char *buf, int buf_size, unsigned long *bitmap,
991 int i; 991 int i;
992 int len = 0; 992 int len = 0;
993 993
994 for (i = NBITS(max) - 1; i > 0; i--) 994 for (i = BITS_TO_LONGS(max) - 1; i > 0; i--)
995 if (bitmap[i]) 995 if (bitmap[i])
996 break; 996 break;
997 997
diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c
index 2b201f9aa024..22b2789ef58a 100644
--- a/drivers/input/joydev.c
+++ b/drivers/input/joydev.c
@@ -844,8 +844,8 @@ static const struct input_device_id joydev_blacklist[] = {
844 { 844 {
845 .flags = INPUT_DEVICE_ID_MATCH_EVBIT | 845 .flags = INPUT_DEVICE_ID_MATCH_EVBIT |
846 INPUT_DEVICE_ID_MATCH_KEYBIT, 846 INPUT_DEVICE_ID_MATCH_KEYBIT,
847 .evbit = { BIT(EV_KEY) }, 847 .evbit = { BIT_MASK(EV_KEY) },
848 .keybit = { [LONG(BTN_TOUCH)] = BIT(BTN_TOUCH) }, 848 .keybit = { [BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH) },
849 }, /* Avoid itouchpads, touchscreens and tablets */ 849 }, /* Avoid itouchpads, touchscreens and tablets */
850 { } /* Terminating entry */ 850 { } /* Terminating entry */
851}; 851};
@@ -854,20 +854,20 @@ static const struct input_device_id joydev_ids[] = {
854 { 854 {
855 .flags = INPUT_DEVICE_ID_MATCH_EVBIT | 855 .flags = INPUT_DEVICE_ID_MATCH_EVBIT |
856 INPUT_DEVICE_ID_MATCH_ABSBIT, 856 INPUT_DEVICE_ID_MATCH_ABSBIT,
857 .evbit = { BIT(EV_ABS) }, 857 .evbit = { BIT_MASK(EV_ABS) },
858 .absbit = { BIT(ABS_X) }, 858 .absbit = { BIT_MASK(ABS_X) },
859 }, 859 },
860 { 860 {
861 .flags = INPUT_DEVICE_ID_MATCH_EVBIT | 861 .flags = INPUT_DEVICE_ID_MATCH_EVBIT |
862 INPUT_DEVICE_ID_MATCH_ABSBIT, 862 INPUT_DEVICE_ID_MATCH_ABSBIT,
863 .evbit = { BIT(EV_ABS) }, 863 .evbit = { BIT_MASK(EV_ABS) },
864 .absbit = { BIT(ABS_WHEEL) }, 864 .absbit = { BIT_MASK(ABS_WHEEL) },
865 }, 865 },
866 { 866 {
867 .flags = INPUT_DEVICE_ID_MATCH_EVBIT | 867 .flags = INPUT_DEVICE_ID_MATCH_EVBIT |
868 INPUT_DEVICE_ID_MATCH_ABSBIT, 868 INPUT_DEVICE_ID_MATCH_ABSBIT,
869 .evbit = { BIT(EV_ABS) }, 869 .evbit = { BIT_MASK(EV_ABS) },
870 .absbit = { BIT(ABS_THROTTLE) }, 870 .absbit = { BIT_MASK(ABS_THROTTLE) },
871 }, 871 },
872 { } /* Terminating entry */ 872 { } /* Terminating entry */
873}; 873};
diff --git a/drivers/input/joystick/a3d.c b/drivers/input/joystick/a3d.c
index ff701ab10d74..52ba16f487c7 100644
--- a/drivers/input/joystick/a3d.c
+++ b/drivers/input/joystick/a3d.c
@@ -326,14 +326,19 @@ static int a3d_connect(struct gameport *gameport, struct gameport_driver *drv)
326 326
327 a3d->length = 33; 327 a3d->length = 33;
328 328
329 input_dev->evbit[0] |= BIT(EV_ABS) | BIT(EV_KEY) | BIT(EV_REL); 329 input_dev->evbit[0] |= BIT_MASK(EV_ABS) | BIT_MASK(EV_KEY) |
330 input_dev->relbit[0] |= BIT(REL_X) | BIT(REL_Y); 330 BIT_MASK(EV_REL);
331 input_dev->absbit[0] |= BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_THROTTLE) | BIT(ABS_RUDDER) 331 input_dev->relbit[0] |= BIT_MASK(REL_X) | BIT_MASK(REL_Y);
332 | BIT(ABS_HAT0X) | BIT(ABS_HAT0Y) | BIT(ABS_HAT1X) | BIT(ABS_HAT1Y); 332 input_dev->absbit[0] |= BIT_MASK(ABS_X) | BIT_MASK(ABS_Y) |
333 input_dev->keybit[LONG(BTN_MOUSE)] |= BIT(BTN_RIGHT) | BIT(BTN_LEFT) | BIT(BTN_MIDDLE) 333 BIT_MASK(ABS_THROTTLE) | BIT_MASK(ABS_RUDDER) |
334 | BIT(BTN_SIDE) | BIT(BTN_EXTRA); 334 BIT_MASK(ABS_HAT0X) | BIT_MASK(ABS_HAT0Y) |
335 input_dev->keybit[LONG(BTN_JOYSTICK)] |= BIT(BTN_TRIGGER) | BIT(BTN_THUMB) | BIT(BTN_TOP) 335 BIT_MASK(ABS_HAT1X) | BIT_MASK(ABS_HAT1Y);
336 | BIT(BTN_PINKIE); 336 input_dev->keybit[BIT_WORD(BTN_MOUSE)] |= BIT_MASK(BTN_RIGHT) |
337 BIT_MASK(BTN_LEFT) | BIT_MASK(BTN_MIDDLE) |
338 BIT_MASK(BTN_SIDE) | BIT_MASK(BTN_EXTRA);
339 input_dev->keybit[BIT_WORD(BTN_JOYSTICK)] |=
340 BIT_MASK(BTN_TRIGGER) | BIT_MASK(BTN_THUMB) |
341 BIT_MASK(BTN_TOP) | BIT_MASK(BTN_PINKIE);
337 342
338 a3d_read(a3d, data); 343 a3d_read(a3d, data);
339 344
@@ -348,9 +353,10 @@ static int a3d_connect(struct gameport *gameport, struct gameport_driver *drv)
348 } else { 353 } else {
349 a3d->length = 29; 354 a3d->length = 29;
350 355
351 input_dev->evbit[0] |= BIT(EV_KEY) | BIT(EV_REL); 356 input_dev->evbit[0] |= BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
352 input_dev->relbit[0] |= BIT(REL_X) | BIT(REL_Y); 357 input_dev->relbit[0] |= BIT_MASK(REL_X) | BIT_MASK(REL_Y);
353 input_dev->keybit[LONG(BTN_MOUSE)] |= BIT(BTN_RIGHT) | BIT(BTN_LEFT) | BIT(BTN_MIDDLE); 358 input_dev->keybit[BIT_WORD(BTN_MOUSE)] |= BIT_MASK(BTN_RIGHT) |
359 BIT_MASK(BTN_LEFT) | BIT_MASK(BTN_MIDDLE);
354 360
355 a3d_read(a3d, data); 361 a3d_read(a3d, data);
356 362
diff --git a/drivers/input/joystick/adi.c b/drivers/input/joystick/adi.c
index 28140c4a110d..d1ca8a14950f 100644
--- a/drivers/input/joystick/adi.c
+++ b/drivers/input/joystick/adi.c
@@ -431,7 +431,7 @@ static int adi_init_input(struct adi *adi, struct adi_port *port, int half)
431 input_dev->open = adi_open; 431 input_dev->open = adi_open;
432 input_dev->close = adi_close; 432 input_dev->close = adi_close;
433 433
434 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 434 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
435 435
436 for (i = 0; i < adi->axes10 + adi->axes8 + (adi->hats + (adi->pad != -1)) * 2; i++) 436 for (i = 0; i < adi->axes10 + adi->axes8 + (adi->hats + (adi->pad != -1)) * 2; i++)
437 set_bit(adi->abs[i], input_dev->absbit); 437 set_bit(adi->abs[i], input_dev->absbit);
diff --git a/drivers/input/joystick/amijoy.c b/drivers/input/joystick/amijoy.c
index b0f5541ec3e6..5cf9f3610e67 100644
--- a/drivers/input/joystick/amijoy.c
+++ b/drivers/input/joystick/amijoy.c
@@ -137,9 +137,10 @@ static int __init amijoy_init(void)
137 amijoy_dev[i]->open = amijoy_open; 137 amijoy_dev[i]->open = amijoy_open;
138 amijoy_dev[i]->close = amijoy_close; 138 amijoy_dev[i]->close = amijoy_close;
139 139
140 amijoy_dev[i]->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 140 amijoy_dev[i]->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
141 amijoy_dev[i]->absbit[0] = BIT(ABS_X) | BIT(ABS_Y); 141 amijoy_dev[i]->absbit[0] = BIT_MASK(ABS_X) | BIT_MASK(ABS_Y);
142 amijoy_dev[i]->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT); 142 amijoy_dev[i]->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
143 BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
143 for (j = 0; j < 2; j++) { 144 for (j = 0; j < 2; j++) {
144 amijoy_dev[i]->absmin[ABS_X + j] = -1; 145 amijoy_dev[i]->absmin[ABS_X + j] = -1;
145 amijoy_dev[i]->absmax[ABS_X + j] = 1; 146 amijoy_dev[i]->absmax[ABS_X + j] = 1;
diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c
index bdd157c1ebf8..15739880afc6 100644
--- a/drivers/input/joystick/analog.c
+++ b/drivers/input/joystick/analog.c
@@ -456,7 +456,7 @@ static int analog_init_device(struct analog_port *port, struct analog *analog, i
456 input_dev->open = analog_open; 456 input_dev->open = analog_open;
457 input_dev->close = analog_close; 457 input_dev->close = analog_close;
458 458
459 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 459 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
460 460
461 for (i = j = 0; i < 4; i++) 461 for (i = j = 0; i < 4; i++)
462 if (analog->mask & (1 << i)) { 462 if (analog->mask & (1 << i)) {
diff --git a/drivers/input/joystick/cobra.c b/drivers/input/joystick/cobra.c
index d3352a849b85..55646a6d89f5 100644
--- a/drivers/input/joystick/cobra.c
+++ b/drivers/input/joystick/cobra.c
@@ -218,7 +218,7 @@ static int cobra_connect(struct gameport *gameport, struct gameport_driver *drv)
218 input_dev->open = cobra_open; 218 input_dev->open = cobra_open;
219 input_dev->close = cobra_close; 219 input_dev->close = cobra_close;
220 220
221 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 221 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
222 input_set_abs_params(input_dev, ABS_X, -1, 1, 0, 0); 222 input_set_abs_params(input_dev, ABS_X, -1, 1, 0, 0);
223 input_set_abs_params(input_dev, ABS_Y, -1, 1, 0, 0); 223 input_set_abs_params(input_dev, ABS_Y, -1, 1, 0, 0);
224 for (j = 0; cobra_btn[j]; j++) 224 for (j = 0; cobra_btn[j]; j++)
diff --git a/drivers/input/joystick/db9.c b/drivers/input/joystick/db9.c
index b069ee18e353..27fc475bd3a1 100644
--- a/drivers/input/joystick/db9.c
+++ b/drivers/input/joystick/db9.c
@@ -631,7 +631,7 @@ static struct db9 __init *db9_probe(int parport, int mode)
631 input_dev->open = db9_open; 631 input_dev->open = db9_open;
632 input_dev->close = db9_close; 632 input_dev->close = db9_close;
633 633
634 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 634 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
635 for (j = 0; j < db9_mode->n_buttons; j++) 635 for (j = 0; j < db9_mode->n_buttons; j++)
636 set_bit(db9_mode->buttons[j], input_dev->keybit); 636 set_bit(db9_mode->buttons[j], input_dev->keybit);
637 for (j = 0; j < db9_mode->n_axis; j++) { 637 for (j = 0; j < db9_mode->n_axis; j++) {
diff --git a/drivers/input/joystick/gamecon.c b/drivers/input/joystick/gamecon.c
index 1a452e0e5f25..df2a9d02ca6c 100644
--- a/drivers/input/joystick/gamecon.c
+++ b/drivers/input/joystick/gamecon.c
@@ -653,12 +653,12 @@ static int __init gc_setup_pad(struct gc *gc, int idx, int pad_type)
653 input_dev->close = gc_close; 653 input_dev->close = gc_close;
654 654
655 if (pad_type != GC_SNESMOUSE) { 655 if (pad_type != GC_SNESMOUSE) {
656 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 656 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
657 657
658 for (i = 0; i < 2; i++) 658 for (i = 0; i < 2; i++)
659 input_set_abs_params(input_dev, ABS_X + i, -1, 1, 0, 0); 659 input_set_abs_params(input_dev, ABS_X + i, -1, 1, 0, 0);
660 } else 660 } else
661 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL); 661 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
662 662
663 gc->pads[0] |= gc_status_bit[idx]; 663 gc->pads[0] |= gc_status_bit[idx];
664 gc->pads[pad_type] |= gc_status_bit[idx]; 664 gc->pads[pad_type] |= gc_status_bit[idx];
diff --git a/drivers/input/joystick/gf2k.c b/drivers/input/joystick/gf2k.c
index d514aebf7554..1f6302c0eb3f 100644
--- a/drivers/input/joystick/gf2k.c
+++ b/drivers/input/joystick/gf2k.c
@@ -315,7 +315,7 @@ static int gf2k_connect(struct gameport *gameport, struct gameport_driver *drv)
315 input_dev->open = gf2k_open; 315 input_dev->open = gf2k_open;
316 input_dev->close = gf2k_close; 316 input_dev->close = gf2k_close;
317 317
318 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 318 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
319 319
320 for (i = 0; i < gf2k_axes[gf2k->id]; i++) 320 for (i = 0; i < gf2k_axes[gf2k->id]; i++)
321 set_bit(gf2k_abs[i], input_dev->absbit); 321 set_bit(gf2k_abs[i], input_dev->absbit);
diff --git a/drivers/input/joystick/grip.c b/drivers/input/joystick/grip.c
index 73eb5ab6f140..fd3853ab1aad 100644
--- a/drivers/input/joystick/grip.c
+++ b/drivers/input/joystick/grip.c
@@ -370,7 +370,7 @@ static int grip_connect(struct gameport *gameport, struct gameport_driver *drv)
370 input_dev->open = grip_open; 370 input_dev->open = grip_open;
371 input_dev->close = grip_close; 371 input_dev->close = grip_close;
372 372
373 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 373 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
374 374
375 for (j = 0; (t = grip_abs[grip->mode[i]][j]) >= 0; j++) { 375 for (j = 0; (t = grip_abs[grip->mode[i]][j]) >= 0; j++) {
376 376
diff --git a/drivers/input/joystick/grip_mp.c b/drivers/input/joystick/grip_mp.c
index 4ed3a3eadf19..c57e21d68c00 100644
--- a/drivers/input/joystick/grip_mp.c
+++ b/drivers/input/joystick/grip_mp.c
@@ -606,7 +606,7 @@ static int register_slot(int slot, struct grip_mp *grip)
606 input_dev->open = grip_open; 606 input_dev->open = grip_open;
607 input_dev->close = grip_close; 607 input_dev->close = grip_close;
608 608
609 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 609 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
610 610
611 for (j = 0; (t = grip_abs[port->mode][j]) >= 0; j++) 611 for (j = 0; (t = grip_abs[port->mode][j]) >= 0; j++)
612 input_set_abs_params(input_dev, t, -1, 1, 0, 0); 612 input_set_abs_params(input_dev, t, -1, 1, 0, 0);
diff --git a/drivers/input/joystick/guillemot.c b/drivers/input/joystick/guillemot.c
index d4e8073caf27..aa6bfb3fb8cd 100644
--- a/drivers/input/joystick/guillemot.c
+++ b/drivers/input/joystick/guillemot.c
@@ -238,7 +238,7 @@ static int guillemot_connect(struct gameport *gameport, struct gameport_driver *
238 input_dev->open = guillemot_open; 238 input_dev->open = guillemot_open;
239 input_dev->close = guillemot_close; 239 input_dev->close = guillemot_close;
240 240
241 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 241 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
242 242
243 for (i = 0; (t = guillemot->type->abs[i]) >= 0; i++) 243 for (i = 0; (t = guillemot->type->abs[i]) >= 0; i++)
244 input_set_abs_params(input_dev, t, 0, 255, 0, 0); 244 input_set_abs_params(input_dev, t, 0, 255, 0, 0);
diff --git a/drivers/input/joystick/iforce/iforce-main.c b/drivers/input/joystick/iforce/iforce-main.c
index 682244b1c042..6f826b37d9aa 100644
--- a/drivers/input/joystick/iforce/iforce-main.c
+++ b/drivers/input/joystick/iforce/iforce-main.c
@@ -389,7 +389,8 @@ int iforce_init_device(struct iforce *iforce)
389 * Set input device bitfields and ranges. 389 * Set input device bitfields and ranges.
390 */ 390 */
391 391
392 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS) | BIT(EV_FF_STATUS); 392 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) |
393 BIT_MASK(EV_FF_STATUS);
393 394
394 for (i = 0; iforce->type->btn[i] >= 0; i++) 395 for (i = 0; iforce->type->btn[i] >= 0; i++)
395 set_bit(iforce->type->btn[i], input_dev->keybit); 396 set_bit(iforce->type->btn[i], input_dev->keybit);
diff --git a/drivers/input/joystick/iforce/iforce.h b/drivers/input/joystick/iforce/iforce.h
index 40a853ac21c7..a964a7cfd210 100644
--- a/drivers/input/joystick/iforce/iforce.h
+++ b/drivers/input/joystick/iforce/iforce.h
@@ -62,13 +62,13 @@
62#define FF_CORE_IS_PLAYED 3 /* Effect is currently being played */ 62#define FF_CORE_IS_PLAYED 3 /* Effect is currently being played */
63#define FF_CORE_SHOULD_PLAY 4 /* User wants the effect to be played */ 63#define FF_CORE_SHOULD_PLAY 4 /* User wants the effect to be played */
64#define FF_CORE_UPDATE 5 /* Effect is being updated */ 64#define FF_CORE_UPDATE 5 /* Effect is being updated */
65#define FF_MODCORE_MAX 5 65#define FF_MODCORE_CNT 6
66 66
67struct iforce_core_effect { 67struct iforce_core_effect {
68 /* Information about where modifiers are stored in the device's memory */ 68 /* Information about where modifiers are stored in the device's memory */
69 struct resource mod1_chunk; 69 struct resource mod1_chunk;
70 struct resource mod2_chunk; 70 struct resource mod2_chunk;
71 unsigned long flags[NBITS(FF_MODCORE_MAX)]; 71 unsigned long flags[BITS_TO_LONGS(FF_MODCORE_CNT)];
72}; 72};
73 73
74#define FF_CMD_EFFECT 0x010e 74#define FF_CMD_EFFECT 0x010e
diff --git a/drivers/input/joystick/interact.c b/drivers/input/joystick/interact.c
index 1aec1e9d7c59..bc8ea95dfd0e 100644
--- a/drivers/input/joystick/interact.c
+++ b/drivers/input/joystick/interact.c
@@ -269,7 +269,7 @@ static int interact_connect(struct gameport *gameport, struct gameport_driver *d
269 input_dev->open = interact_open; 269 input_dev->open = interact_open;
270 input_dev->close = interact_close; 270 input_dev->close = interact_close;
271 271
272 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 272 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
273 273
274 for (i = 0; (t = interact_type[interact->type].abs[i]) >= 0; i++) { 274 for (i = 0; (t = interact_type[interact->type].abs[i]) >= 0; i++) {
275 set_bit(t, input_dev->absbit); 275 set_bit(t, input_dev->absbit);
diff --git a/drivers/input/joystick/magellan.c b/drivers/input/joystick/magellan.c
index b35604ee43ae..54e676948ebb 100644
--- a/drivers/input/joystick/magellan.c
+++ b/drivers/input/joystick/magellan.c
@@ -170,7 +170,7 @@ static int magellan_connect(struct serio *serio, struct serio_driver *drv)
170 input_dev->id.version = 0x0100; 170 input_dev->id.version = 0x0100;
171 input_dev->dev.parent = &serio->dev; 171 input_dev->dev.parent = &serio->dev;
172 172
173 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 173 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
174 174
175 for (i = 0; i < 9; i++) 175 for (i = 0; i < 9; i++)
176 set_bit(magellan_buttons[i], input_dev->keybit); 176 set_bit(magellan_buttons[i], input_dev->keybit);
diff --git a/drivers/input/joystick/sidewinder.c b/drivers/input/joystick/sidewinder.c
index 2adf73f63c94..7b4865fdee54 100644
--- a/drivers/input/joystick/sidewinder.c
+++ b/drivers/input/joystick/sidewinder.c
@@ -758,7 +758,7 @@ static int sw_connect(struct gameport *gameport, struct gameport_driver *drv)
758 input_dev->open = sw_open; 758 input_dev->open = sw_open;
759 input_dev->close = sw_close; 759 input_dev->close = sw_close;
760 760
761 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 761 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
762 762
763 for (j = 0; (bits = sw_bit[sw->type][j]); j++) { 763 for (j = 0; (bits = sw_bit[sw->type][j]); j++) {
764 code = sw_abs[sw->type][j]; 764 code = sw_abs[sw->type][j];
diff --git a/drivers/input/joystick/spaceball.c b/drivers/input/joystick/spaceball.c
index abb7c4cf54ad..d4087fd49656 100644
--- a/drivers/input/joystick/spaceball.c
+++ b/drivers/input/joystick/spaceball.c
@@ -228,18 +228,23 @@ static int spaceball_connect(struct serio *serio, struct serio_driver *drv)
228 input_dev->id.version = 0x0100; 228 input_dev->id.version = 0x0100;
229 input_dev->dev.parent = &serio->dev; 229 input_dev->dev.parent = &serio->dev;
230 230
231 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 231 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
232 232
233 switch (id) { 233 switch (id) {
234 case SPACEBALL_4000FLX: 234 case SPACEBALL_4000FLX:
235 case SPACEBALL_4000FLX_L: 235 case SPACEBALL_4000FLX_L:
236 input_dev->keybit[LONG(BTN_0)] |= BIT(BTN_9); 236 input_dev->keybit[BIT_WORD(BTN_0)] |= BIT_MASK(BTN_9);
237 input_dev->keybit[LONG(BTN_A)] |= BIT(BTN_A) | BIT(BTN_B) | BIT(BTN_C) | BIT(BTN_MODE); 237 input_dev->keybit[BIT_WORD(BTN_A)] |= BIT_MASK(BTN_A) |
238 BIT_MASK(BTN_B) | BIT_MASK(BTN_C) |
239 BIT_MASK(BTN_MODE);
238 default: 240 default:
239 input_dev->keybit[LONG(BTN_0)] |= BIT(BTN_2) | BIT(BTN_3) | BIT(BTN_4) 241 input_dev->keybit[BIT_WORD(BTN_0)] |= BIT_MASK(BTN_2) |
240 | BIT(BTN_5) | BIT(BTN_6) | BIT(BTN_7) | BIT(BTN_8); 242 BIT_MASK(BTN_3) | BIT_MASK(BTN_4) |
243 BIT_MASK(BTN_5) | BIT_MASK(BTN_6) |
244 BIT_MASK(BTN_7) | BIT_MASK(BTN_8);
241 case SPACEBALL_3003C: 245 case SPACEBALL_3003C:
242 input_dev->keybit[LONG(BTN_0)] |= BIT(BTN_1) | BIT(BTN_8); 246 input_dev->keybit[BIT_WORD(BTN_0)] |= BIT_MASK(BTN_1) |
247 BIT_MASK(BTN_8);
243 } 248 }
244 249
245 for (i = 0; i < 3; i++) { 250 for (i = 0; i < 3; i++) {
diff --git a/drivers/input/joystick/spaceorb.c b/drivers/input/joystick/spaceorb.c
index c4937f1e837c..f7ce4004f4ba 100644
--- a/drivers/input/joystick/spaceorb.c
+++ b/drivers/input/joystick/spaceorb.c
@@ -185,7 +185,7 @@ static int spaceorb_connect(struct serio *serio, struct serio_driver *drv)
185 input_dev->id.version = 0x0100; 185 input_dev->id.version = 0x0100;
186 input_dev->dev.parent = &serio->dev; 186 input_dev->dev.parent = &serio->dev;
187 187
188 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 188 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
189 189
190 for (i = 0; i < 6; i++) 190 for (i = 0; i < 6; i++)
191 set_bit(spaceorb_buttons[i], input_dev->keybit); 191 set_bit(spaceorb_buttons[i], input_dev->keybit);
diff --git a/drivers/input/joystick/stinger.c b/drivers/input/joystick/stinger.c
index 8581ee991d4e..baa10b2f7ba1 100644
--- a/drivers/input/joystick/stinger.c
+++ b/drivers/input/joystick/stinger.c
@@ -156,10 +156,11 @@ static int stinger_connect(struct serio *serio, struct serio_driver *drv)
156 input_dev->id.version = 0x0100; 156 input_dev->id.version = 0x0100;
157 input_dev->dev.parent = &serio->dev; 157 input_dev->dev.parent = &serio->dev;
158 158
159 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 159 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
160 input_dev->keybit[LONG(BTN_A)] = BIT(BTN_A) | BIT(BTN_B) | BIT(BTN_C) | BIT(BTN_X) | 160 input_dev->keybit[BIT_WORD(BTN_A)] = BIT_MASK(BTN_A) | BIT_MASK(BTN_B) |
161 BIT(BTN_Y) | BIT(BTN_Z) | BIT(BTN_TL) | BIT(BTN_TR) | 161 BIT_MASK(BTN_C) | BIT_MASK(BTN_X) | BIT_MASK(BTN_Y) |
162 BIT(BTN_START) | BIT(BTN_SELECT); 162 BIT_MASK(BTN_Z) | BIT_MASK(BTN_TL) | BIT_MASK(BTN_TR) |
163 BIT_MASK(BTN_START) | BIT_MASK(BTN_SELECT);
163 input_set_abs_params(input_dev, ABS_X, -64, 64, 0, 4); 164 input_set_abs_params(input_dev, ABS_X, -64, 64, 0, 4);
164 input_set_abs_params(input_dev, ABS_Y, -64, 64, 0, 4); 165 input_set_abs_params(input_dev, ABS_Y, -64, 64, 0, 4);
165 166
diff --git a/drivers/input/joystick/tmdc.c b/drivers/input/joystick/tmdc.c
index 3b36ee04f726..0feeb8acb532 100644
--- a/drivers/input/joystick/tmdc.c
+++ b/drivers/input/joystick/tmdc.c
@@ -333,7 +333,7 @@ static int tmdc_setup_port(struct tmdc *tmdc, int idx, unsigned char *data)
333 input_dev->open = tmdc_open; 333 input_dev->open = tmdc_open;
334 input_dev->close = tmdc_close; 334 input_dev->close = tmdc_close;
335 335
336 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 336 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
337 337
338 for (i = 0; i < port->absc && i < TMDC_ABS; i++) 338 for (i = 0; i < port->absc && i < TMDC_ABS; i++)
339 if (port->abs[i] >= 0) 339 if (port->abs[i] >= 0)
diff --git a/drivers/input/joystick/turbografx.c b/drivers/input/joystick/turbografx.c
index 8381c6f14373..bbebd4e2ad7f 100644
--- a/drivers/input/joystick/turbografx.c
+++ b/drivers/input/joystick/turbografx.c
@@ -229,7 +229,7 @@ static struct tgfx __init *tgfx_probe(int parport, int *n_buttons, int n_devs)
229 input_dev->open = tgfx_open; 229 input_dev->open = tgfx_open;
230 input_dev->close = tgfx_close; 230 input_dev->close = tgfx_close;
231 231
232 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 232 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
233 input_set_abs_params(input_dev, ABS_X, -1, 1, 0, 0); 233 input_set_abs_params(input_dev, ABS_X, -1, 1, 0, 0);
234 input_set_abs_params(input_dev, ABS_Y, -1, 1, 0, 0); 234 input_set_abs_params(input_dev, ABS_Y, -1, 1, 0, 0);
235 235
diff --git a/drivers/input/joystick/twidjoy.c b/drivers/input/joystick/twidjoy.c
index c91504ec38eb..1085c841fec4 100644
--- a/drivers/input/joystick/twidjoy.c
+++ b/drivers/input/joystick/twidjoy.c
@@ -207,7 +207,7 @@ static int twidjoy_connect(struct serio *serio, struct serio_driver *drv)
207 input_dev->id.version = 0x0100; 207 input_dev->id.version = 0x0100;
208 input_dev->dev.parent = &serio->dev; 208 input_dev->dev.parent = &serio->dev;
209 209
210 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 210 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
211 input_set_abs_params(input_dev, ABS_X, -50, 50, 4, 4); 211 input_set_abs_params(input_dev, ABS_X, -50, 50, 4, 4);
212 input_set_abs_params(input_dev, ABS_Y, -50, 50, 4, 4); 212 input_set_abs_params(input_dev, ABS_Y, -50, 50, 4, 4);
213 213
diff --git a/drivers/input/joystick/warrior.c b/drivers/input/joystick/warrior.c
index 4e85f72eefd7..e928b6e3724a 100644
--- a/drivers/input/joystick/warrior.c
+++ b/drivers/input/joystick/warrior.c
@@ -162,9 +162,11 @@ static int warrior_connect(struct serio *serio, struct serio_driver *drv)
162 input_dev->id.version = 0x0100; 162 input_dev->id.version = 0x0100;
163 input_dev->dev.parent = &serio->dev; 163 input_dev->dev.parent = &serio->dev;
164 164
165 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_ABS); 165 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL) |
166 input_dev->keybit[LONG(BTN_TRIGGER)] = BIT(BTN_TRIGGER) | BIT(BTN_THUMB) | BIT(BTN_TOP) | BIT(BTN_TOP2); 166 BIT_MASK(EV_ABS);
167 input_dev->relbit[0] = BIT(REL_DIAL); 167 input_dev->keybit[BIT_WORD(BTN_TRIGGER)] = BIT_MASK(BTN_TRIGGER) |
168 BIT_MASK(BTN_THUMB) | BIT_MASK(BTN_TOP) | BIT_MASK(BTN_TOP2);
169 input_dev->relbit[0] = BIT_MASK(REL_DIAL);
168 input_set_abs_params(input_dev, ABS_X, -64, 64, 0, 8); 170 input_set_abs_params(input_dev, ABS_X, -64, 64, 0, 8);
169 input_set_abs_params(input_dev, ABS_Y, -64, 64, 0, 8); 171 input_set_abs_params(input_dev, ABS_Y, -64, 64, 0, 8);
170 input_set_abs_params(input_dev, ABS_THROTTLE, -112, 112, 0, 0); 172 input_set_abs_params(input_dev, ABS_THROTTLE, -112, 112, 0, 0);
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 623629a69b03..6dd375825a14 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -658,7 +658,7 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
658 input_dev->open = xpad_open; 658 input_dev->open = xpad_open;
659 input_dev->close = xpad_close; 659 input_dev->close = xpad_close;
660 660
661 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 661 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
662 662
663 /* set up buttons */ 663 /* set up buttons */
664 for (i = 0; xpad_btn[i] >= 0; i++) 664 for (i = 0; xpad_btn[i] >= 0; i++)
diff --git a/drivers/input/keyboard/aaed2000_kbd.c b/drivers/input/keyboard/aaed2000_kbd.c
index 63d6ead6b877..72abc196ce66 100644
--- a/drivers/input/keyboard/aaed2000_kbd.c
+++ b/drivers/input/keyboard/aaed2000_kbd.c
@@ -125,7 +125,7 @@ static int __devinit aaedkbd_probe(struct platform_device *pdev)
125 input_dev->id.version = 0x0100; 125 input_dev->id.version = 0x0100;
126 input_dev->dev.parent = &pdev->dev; 126 input_dev->dev.parent = &pdev->dev;
127 127
128 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP); 128 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
129 input_dev->keycode = aaedkbd->keycode; 129 input_dev->keycode = aaedkbd->keycode;
130 input_dev->keycodesize = sizeof(unsigned char); 130 input_dev->keycodesize = sizeof(unsigned char);
131 input_dev->keycodemax = ARRAY_SIZE(aaedkbd_keycode); 131 input_dev->keycodemax = ARRAY_SIZE(aaedkbd_keycode);
diff --git a/drivers/input/keyboard/amikbd.c b/drivers/input/keyboard/amikbd.c
index c67e84ec2d6a..81bf7562aca0 100644
--- a/drivers/input/keyboard/amikbd.c
+++ b/drivers/input/keyboard/amikbd.c
@@ -209,7 +209,7 @@ static int __init amikbd_init(void)
209 amikbd_dev->id.product = 0x0001; 209 amikbd_dev->id.product = 0x0001;
210 amikbd_dev->id.version = 0x0100; 210 amikbd_dev->id.version = 0x0100;
211 211
212 amikbd_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP); 212 amikbd_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
213 213
214 for (i = 0; i < 0x78; i++) 214 for (i = 0; i < 0x78; i++)
215 set_bit(i, amikbd_dev->keybit); 215 set_bit(i, amikbd_dev->keybit);
diff --git a/drivers/input/keyboard/atakbd.c b/drivers/input/keyboard/atakbd.c
index a1800151b6ce..4e92100c56a8 100644
--- a/drivers/input/keyboard/atakbd.c
+++ b/drivers/input/keyboard/atakbd.c
@@ -237,7 +237,7 @@ static int __init atakbd_init(void)
237 atakbd_dev->id.product = 0x0001; 237 atakbd_dev->id.product = 0x0001;
238 atakbd_dev->id.version = 0x0100; 238 atakbd_dev->id.version = 0x0100;
239 239
240 atakbd_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP); 240 atakbd_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
241 atakbd_dev->keycode = atakbd_keycode; 241 atakbd_dev->keycode = atakbd_keycode;
242 atakbd_dev->keycodesize = sizeof(unsigned char); 242 atakbd_dev->keycodesize = sizeof(unsigned char);
243 atakbd_dev->keycodemax = ARRAY_SIZE(atakbd_keycode); 243 atakbd_dev->keycodemax = ARRAY_SIZE(atakbd_keycode);
diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index 41fc3d03b6eb..b39c5b31e620 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -900,27 +900,32 @@ static void atkbd_set_device_attrs(struct atkbd *atkbd)
900 900
901 input_set_drvdata(input_dev, atkbd); 901 input_set_drvdata(input_dev, atkbd);
902 902
903 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP) | BIT(EV_MSC); 903 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) |
904 BIT_MASK(EV_MSC);
904 905
905 if (atkbd->write) { 906 if (atkbd->write) {
906 input_dev->evbit[0] |= BIT(EV_LED); 907 input_dev->evbit[0] |= BIT_MASK(EV_LED);
907 input_dev->ledbit[0] = BIT(LED_NUML) | BIT(LED_CAPSL) | BIT(LED_SCROLLL); 908 input_dev->ledbit[0] = BIT_MASK(LED_NUML) |
909 BIT_MASK(LED_CAPSL) | BIT_MASK(LED_SCROLLL);
908 } 910 }
909 911
910 if (atkbd->extra) 912 if (atkbd->extra)
911 input_dev->ledbit[0] |= BIT(LED_COMPOSE) | BIT(LED_SUSPEND) | 913 input_dev->ledbit[0] |= BIT_MASK(LED_COMPOSE) |
912 BIT(LED_SLEEP) | BIT(LED_MUTE) | BIT(LED_MISC); 914 BIT_MASK(LED_SUSPEND) | BIT_MASK(LED_SLEEP) |
915 BIT_MASK(LED_MUTE) | BIT_MASK(LED_MISC);
913 916
914 if (!atkbd->softrepeat) { 917 if (!atkbd->softrepeat) {
915 input_dev->rep[REP_DELAY] = 250; 918 input_dev->rep[REP_DELAY] = 250;
916 input_dev->rep[REP_PERIOD] = 33; 919 input_dev->rep[REP_PERIOD] = 33;
917 } 920 }
918 921
919 input_dev->mscbit[0] = atkbd->softraw ? BIT(MSC_SCAN) : BIT(MSC_RAW) | BIT(MSC_SCAN); 922 input_dev->mscbit[0] = atkbd->softraw ? BIT_MASK(MSC_SCAN) :
923 BIT_MASK(MSC_RAW) | BIT_MASK(MSC_SCAN);
920 924
921 if (atkbd->scroll) { 925 if (atkbd->scroll) {
922 input_dev->evbit[0] |= BIT(EV_REL); 926 input_dev->evbit[0] |= BIT_MASK(EV_REL);
923 input_dev->relbit[0] = BIT(REL_WHEEL) | BIT(REL_HWHEEL); 927 input_dev->relbit[0] = BIT_MASK(REL_WHEEL) |
928 BIT_MASK(REL_HWHEEL);
924 set_bit(BTN_MIDDLE, input_dev->keybit); 929 set_bit(BTN_MIDDLE, input_dev->keybit);
925 } 930 }
926 931
diff --git a/drivers/input/keyboard/corgikbd.c b/drivers/input/keyboard/corgikbd.c
index 6578bfff644b..790fed368aae 100644
--- a/drivers/input/keyboard/corgikbd.c
+++ b/drivers/input/keyboard/corgikbd.c
@@ -325,7 +325,8 @@ static int __init corgikbd_probe(struct platform_device *pdev)
325 input_dev->id.version = 0x0100; 325 input_dev->id.version = 0x0100;
326 input_dev->dev.parent = &pdev->dev; 326 input_dev->dev.parent = &pdev->dev;
327 327
328 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP) | BIT(EV_PWR) | BIT(EV_SW); 328 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) |
329 BIT_MASK(EV_PWR) | BIT_MASK(EV_SW);
329 input_dev->keycode = corgikbd->keycode; 330 input_dev->keycode = corgikbd->keycode;
330 input_dev->keycodesize = sizeof(unsigned char); 331 input_dev->keycodesize = sizeof(unsigned char);
331 input_dev->keycodemax = ARRAY_SIZE(corgikbd_keycode); 332 input_dev->keycodemax = ARRAY_SIZE(corgikbd_keycode);
diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c
index e2a3293bc67e..3eddf52a0bba 100644
--- a/drivers/input/keyboard/gpio_keys.c
+++ b/drivers/input/keyboard/gpio_keys.c
@@ -62,7 +62,7 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev)
62 62
63 platform_set_drvdata(pdev, input); 63 platform_set_drvdata(pdev, input);
64 64
65 input->evbit[0] = BIT(EV_KEY); 65 input->evbit[0] = BIT_MASK(EV_KEY);
66 66
67 input->name = pdev->name; 67 input->name = pdev->name;
68 input->phys = "gpio-keys/input0"; 68 input->phys = "gpio-keys/input0";
diff --git a/drivers/input/keyboard/hil_kbd.c b/drivers/input/keyboard/hil_kbd.c
index cdd254f2e6c7..adbf29f0169d 100644
--- a/drivers/input/keyboard/hil_kbd.c
+++ b/drivers/input/keyboard/hil_kbd.c
@@ -323,8 +323,9 @@ static int hil_kbd_connect(struct serio *serio, struct serio_driver *drv)
323 goto bail2; 323 goto bail2;
324 } 324 }
325 325
326 kbd->dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP); 326 kbd->dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
327 kbd->dev->ledbit[0] = BIT(LED_NUML) | BIT(LED_CAPSL) | BIT(LED_SCROLLL); 327 kbd->dev->ledbit[0] = BIT_MASK(LED_NUML) | BIT_MASK(LED_CAPSL) |
328 BIT_MASK(LED_SCROLLL);
328 kbd->dev->keycodemax = HIL_KEYCODES_SET1_TBLSIZE; 329 kbd->dev->keycodemax = HIL_KEYCODES_SET1_TBLSIZE;
329 kbd->dev->keycodesize = sizeof(hil_kbd_set1[0]); 330 kbd->dev->keycodesize = sizeof(hil_kbd_set1[0]);
330 kbd->dev->keycode = hil_kbd_set1; 331 kbd->dev->keycode = hil_kbd_set1;
diff --git a/drivers/input/keyboard/hilkbd.c b/drivers/input/keyboard/hilkbd.c
index 499b6974457f..50d80ecf0b80 100644
--- a/drivers/input/keyboard/hilkbd.c
+++ b/drivers/input/keyboard/hilkbd.c
@@ -266,8 +266,9 @@ hil_keyb_init(void)
266 if (hphilkeyb_keycode[i] != KEY_RESERVED) 266 if (hphilkeyb_keycode[i] != KEY_RESERVED)
267 set_bit(hphilkeyb_keycode[i], hil_dev.dev->keybit); 267 set_bit(hphilkeyb_keycode[i], hil_dev.dev->keybit);
268 268
269 hil_dev.dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP); 269 hil_dev.dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
270 hil_dev.dev->ledbit[0] = BIT(LED_NUML) | BIT(LED_CAPSL) | BIT(LED_SCROLLL); 270 hil_dev.dev->ledbit[0] = BIT_MASK(LED_NUML) | BIT_MASK(LED_CAPSL) |
271 BIT_MASK(LED_SCROLLL);
271 hil_dev.dev->keycodemax = HIL_KEYCODES_SET1_TBLSIZE; 272 hil_dev.dev->keycodemax = HIL_KEYCODES_SET1_TBLSIZE;
272 hil_dev.dev->keycodesize= sizeof(hphilkeyb_keycode[0]); 273 hil_dev.dev->keycodesize= sizeof(hphilkeyb_keycode[0]);
273 hil_dev.dev->keycode = hphilkeyb_keycode; 274 hil_dev.dev->keycode = hphilkeyb_keycode;
diff --git a/drivers/input/keyboard/locomokbd.c b/drivers/input/keyboard/locomokbd.c
index 7a41b271f222..5a0ca18d6755 100644
--- a/drivers/input/keyboard/locomokbd.c
+++ b/drivers/input/keyboard/locomokbd.c
@@ -233,7 +233,7 @@ static int locomokbd_probe(struct locomo_dev *dev)
233 input_dev->id.version = 0x0100; 233 input_dev->id.version = 0x0100;
234 input_dev->dev.parent = &dev->dev; 234 input_dev->dev.parent = &dev->dev;
235 235
236 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP); 236 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
237 input_dev->keycode = locomokbd->keycode; 237 input_dev->keycode = locomokbd->keycode;
238 input_dev->keycodesize = sizeof(unsigned char); 238 input_dev->keycodesize = sizeof(unsigned char);
239 input_dev->keycodemax = ARRAY_SIZE(locomokbd_keycode); 239 input_dev->keycodemax = ARRAY_SIZE(locomokbd_keycode);
diff --git a/drivers/input/keyboard/newtonkbd.c b/drivers/input/keyboard/newtonkbd.c
index b97a41e3ee56..48d1cab0aa1c 100644
--- a/drivers/input/keyboard/newtonkbd.c
+++ b/drivers/input/keyboard/newtonkbd.c
@@ -106,7 +106,7 @@ static int nkbd_connect(struct serio *serio, struct serio_driver *drv)
106 input_dev->id.version = 0x0100; 106 input_dev->id.version = 0x0100;
107 input_dev->dev.parent = &serio->dev; 107 input_dev->dev.parent = &serio->dev;
108 108
109 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP); 109 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
110 input_dev->keycode = nkbd->keycode; 110 input_dev->keycode = nkbd->keycode;
111 input_dev->keycodesize = sizeof(unsigned char); 111 input_dev->keycodesize = sizeof(unsigned char);
112 input_dev->keycodemax = ARRAY_SIZE(nkbd_keycode); 112 input_dev->keycodemax = ARRAY_SIZE(nkbd_keycode);
diff --git a/drivers/input/keyboard/pxa27x_keyboard.c b/drivers/input/keyboard/pxa27x_keyboard.c
index b7061aa38816..bdd64ee4c5c8 100644
--- a/drivers/input/keyboard/pxa27x_keyboard.c
+++ b/drivers/input/keyboard/pxa27x_keyboard.c
@@ -183,8 +183,9 @@ static int __devinit pxakbd_probe(struct platform_device *pdev)
183 input_dev->close = pxakbd_close; 183 input_dev->close = pxakbd_close;
184 input_dev->dev.parent = &pdev->dev; 184 input_dev->dev.parent = &pdev->dev;
185 185
186 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP) | BIT(EV_REL); 186 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) |
187 input_dev->relbit[LONG(REL_WHEEL)] = BIT(REL_WHEEL); 187 BIT_MASK(EV_REL);
188 input_dev->relbit[BIT_WORD(REL_WHEEL)] = BIT_MASK(REL_WHEEL);
188 for (row = 0; row < pdata->nr_rows; row++) { 189 for (row = 0; row < pdata->nr_rows; row++) {
189 for (col = 0; col < pdata->nr_cols; col++) { 190 for (col = 0; col < pdata->nr_cols; col++) {
190 int code = pdata->keycodes[row][col]; 191 int code = pdata->keycodes[row][col];
diff --git a/drivers/input/keyboard/spitzkbd.c b/drivers/input/keyboard/spitzkbd.c
index 41b80385476c..410d78a774d0 100644
--- a/drivers/input/keyboard/spitzkbd.c
+++ b/drivers/input/keyboard/spitzkbd.c
@@ -381,7 +381,8 @@ static int __init spitzkbd_probe(struct platform_device *dev)
381 input_dev->id.product = 0x0001; 381 input_dev->id.product = 0x0001;
382 input_dev->id.version = 0x0100; 382 input_dev->id.version = 0x0100;
383 383
384 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP) | BIT(EV_PWR) | BIT(EV_SW); 384 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) |
385 BIT_MASK(EV_PWR) | BIT_MASK(EV_SW);
385 input_dev->keycode = spitzkbd->keycode; 386 input_dev->keycode = spitzkbd->keycode;
386 input_dev->keycodesize = sizeof(unsigned char); 387 input_dev->keycodesize = sizeof(unsigned char);
387 input_dev->keycodemax = ARRAY_SIZE(spitzkbd_keycode); 388 input_dev->keycodemax = ARRAY_SIZE(spitzkbd_keycode);
diff --git a/drivers/input/keyboard/stowaway.c b/drivers/input/keyboard/stowaway.c
index b44b0684d543..7437219370b1 100644
--- a/drivers/input/keyboard/stowaway.c
+++ b/drivers/input/keyboard/stowaway.c
@@ -110,7 +110,7 @@ static int skbd_connect(struct serio *serio, struct serio_driver *drv)
110 input_dev->id.version = 0x0100; 110 input_dev->id.version = 0x0100;
111 input_dev->dev.parent = &serio->dev; 111 input_dev->dev.parent = &serio->dev;
112 112
113 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP); 113 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
114 input_dev->keycode = skbd->keycode; 114 input_dev->keycode = skbd->keycode;
115 input_dev->keycodesize = sizeof(unsigned char); 115 input_dev->keycodesize = sizeof(unsigned char);
116 input_dev->keycodemax = ARRAY_SIZE(skbd_keycode); 116 input_dev->keycodemax = ARRAY_SIZE(skbd_keycode);
diff --git a/drivers/input/keyboard/sunkbd.c b/drivers/input/keyboard/sunkbd.c
index 1d4e39624cfe..be0f5d19d023 100644
--- a/drivers/input/keyboard/sunkbd.c
+++ b/drivers/input/keyboard/sunkbd.c
@@ -277,9 +277,11 @@ static int sunkbd_connect(struct serio *serio, struct serio_driver *drv)
277 277
278 input_dev->event = sunkbd_event; 278 input_dev->event = sunkbd_event;
279 279
280 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_LED) | BIT(EV_SND) | BIT(EV_REP); 280 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_LED) |
281 input_dev->ledbit[0] = BIT(LED_CAPSL) | BIT(LED_COMPOSE) | BIT(LED_SCROLLL) | BIT(LED_NUML); 281 BIT_MASK(EV_SND) | BIT_MASK(EV_REP);
282 input_dev->sndbit[0] = BIT(SND_CLICK) | BIT(SND_BELL); 282 input_dev->ledbit[0] = BIT_MASK(LED_CAPSL) | BIT_MASK(LED_COMPOSE) |
283 BIT_MASK(LED_SCROLLL) | BIT_MASK(LED_NUML);
284 input_dev->sndbit[0] = BIT_MASK(SND_CLICK) | BIT_MASK(SND_BELL);
283 285
284 input_dev->keycode = sunkbd->keycode; 286 input_dev->keycode = sunkbd->keycode;
285 input_dev->keycodesize = sizeof(unsigned char); 287 input_dev->keycodesize = sizeof(unsigned char);
diff --git a/drivers/input/keyboard/xtkbd.c b/drivers/input/keyboard/xtkbd.c
index f3a56eb58ed1..152a2c070508 100644
--- a/drivers/input/keyboard/xtkbd.c
+++ b/drivers/input/keyboard/xtkbd.c
@@ -110,7 +110,7 @@ static int xtkbd_connect(struct serio *serio, struct serio_driver *drv)
110 input_dev->id.version = 0x0100; 110 input_dev->id.version = 0x0100;
111 input_dev->dev.parent = &serio->dev; 111 input_dev->dev.parent = &serio->dev;
112 112
113 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP); 113 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
114 input_dev->keycode = xtkbd->keycode; 114 input_dev->keycode = xtkbd->keycode;
115 input_dev->keycodesize = sizeof(unsigned char); 115 input_dev->keycodesize = sizeof(unsigned char);
116 input_dev->keycodemax = ARRAY_SIZE(xtkbd_keycode); 116 input_dev->keycodemax = ARRAY_SIZE(xtkbd_keycode);
diff --git a/drivers/input/misc/ati_remote.c b/drivers/input/misc/ati_remote.c
index 471aab206443..3a7937481ad8 100644
--- a/drivers/input/misc/ati_remote.c
+++ b/drivers/input/misc/ati_remote.c
@@ -662,10 +662,10 @@ static void ati_remote_input_init(struct ati_remote *ati_remote)
662 struct input_dev *idev = ati_remote->idev; 662 struct input_dev *idev = ati_remote->idev;
663 int i; 663 int i;
664 664
665 idev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL); 665 idev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
666 idev->keybit[LONG(BTN_MOUSE)] = ( BIT(BTN_LEFT) | BIT(BTN_RIGHT) | 666 idev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
667 BIT(BTN_SIDE) | BIT(BTN_EXTRA) ); 667 BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_SIDE) | BIT_MASK(BTN_EXTRA);
668 idev->relbit[0] = BIT(REL_X) | BIT(REL_Y); 668 idev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
669 for (i = 0; ati_remote_tbl[i].kind != KIND_END; i++) 669 for (i = 0; ati_remote_tbl[i].kind != KIND_END; i++)
670 if (ati_remote_tbl[i].type == EV_KEY) 670 if (ati_remote_tbl[i].type == EV_KEY)
671 set_bit(ati_remote_tbl[i].code, idev->keybit); 671 set_bit(ati_remote_tbl[i].code, idev->keybit);
diff --git a/drivers/input/misc/ati_remote2.c b/drivers/input/misc/ati_remote2.c
index 1031543e5c3f..f2709b82485c 100644
--- a/drivers/input/misc/ati_remote2.c
+++ b/drivers/input/misc/ati_remote2.c
@@ -346,9 +346,10 @@ static int ati_remote2_input_init(struct ati_remote2 *ar2)
346 ar2->idev = idev; 346 ar2->idev = idev;
347 input_set_drvdata(idev, ar2); 347 input_set_drvdata(idev, ar2);
348 348
349 idev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP) | BIT(EV_REL); 349 idev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) | BIT_MASK(EV_REL);
350 idev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_RIGHT); 350 idev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
351 idev->relbit[0] = BIT(REL_X) | BIT(REL_Y); 351 BIT_MASK(BTN_RIGHT);
352 idev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
352 for (i = 0; ati_remote2_key_table[i].key_code != KEY_RESERVED; i++) 353 for (i = 0; ati_remote2_key_table[i].key_code != KEY_RESERVED; i++)
353 set_bit(ati_remote2_key_table[i].key_code, idev->keybit); 354 set_bit(ati_remote2_key_table[i].key_code, idev->keybit);
354 355
diff --git a/drivers/input/misc/atlas_btns.c b/drivers/input/misc/atlas_btns.c
index e43e92fd9e23..4e3ad657ed80 100644
--- a/drivers/input/misc/atlas_btns.c
+++ b/drivers/input/misc/atlas_btns.c
@@ -81,7 +81,7 @@ static int atlas_acpi_button_add(struct acpi_device *device)
81 input_dev->name = "Atlas ACPI button driver"; 81 input_dev->name = "Atlas ACPI button driver";
82 input_dev->phys = "ASIM0000/atlas/input0"; 82 input_dev->phys = "ASIM0000/atlas/input0";
83 input_dev->id.bustype = BUS_HOST; 83 input_dev->id.bustype = BUS_HOST;
84 input_dev->evbit[LONG(EV_KEY)] = BIT(EV_KEY); 84 input_dev->evbit[BIT_WORD(EV_KEY)] = BIT_MASK(EV_KEY);
85 85
86 set_bit(KEY_F1, input_dev->keybit); 86 set_bit(KEY_F1, input_dev->keybit);
87 set_bit(KEY_F2, input_dev->keybit); 87 set_bit(KEY_F2, input_dev->keybit);
diff --git a/drivers/input/misc/cobalt_btns.c b/drivers/input/misc/cobalt_btns.c
index 064b07936019..1aef97ed5e84 100644
--- a/drivers/input/misc/cobalt_btns.c
+++ b/drivers/input/misc/cobalt_btns.c
@@ -104,7 +104,7 @@ static int __devinit cobalt_buttons_probe(struct platform_device *pdev)
104 input->id.bustype = BUS_HOST; 104 input->id.bustype = BUS_HOST;
105 input->cdev.dev = &pdev->dev; 105 input->cdev.dev = &pdev->dev;
106 106
107 input->evbit[0] = BIT(EV_KEY); 107 input->evbit[0] = BIT_MASK(EV_KEY);
108 for (i = 0; i < ARRAY_SIZE(buttons_map); i++) { 108 for (i = 0; i < ARRAY_SIZE(buttons_map); i++) {
109 set_bit(buttons_map[i].keycode, input->keybit); 109 set_bit(buttons_map[i].keycode, input->keybit);
110 buttons_map[i].count = 0; 110 buttons_map[i].count = 0;
diff --git a/drivers/input/misc/ixp4xx-beeper.c b/drivers/input/misc/ixp4xx-beeper.c
index e759944041ab..d2ade7443b7d 100644
--- a/drivers/input/misc/ixp4xx-beeper.c
+++ b/drivers/input/misc/ixp4xx-beeper.c
@@ -109,8 +109,8 @@ static int __devinit ixp4xx_spkr_probe(struct platform_device *dev)
109 input_dev->id.version = 0x0100; 109 input_dev->id.version = 0x0100;
110 input_dev->dev.parent = &dev->dev; 110 input_dev->dev.parent = &dev->dev;
111 111
112 input_dev->evbit[0] = BIT(EV_SND); 112 input_dev->evbit[0] = BIT_MASK(EV_SND);
113 input_dev->sndbit[0] = BIT(SND_BELL) | BIT(SND_TONE); 113 input_dev->sndbit[0] = BIT_MASK(SND_BELL) | BIT_MASK(SND_TONE);
114 input_dev->event = ixp4xx_spkr_event; 114 input_dev->event = ixp4xx_spkr_event;
115 115
116 err = request_irq(IRQ_IXP4XX_TIMER2, &ixp4xx_spkr_interrupt, 116 err = request_irq(IRQ_IXP4XX_TIMER2, &ixp4xx_spkr_interrupt,
diff --git a/drivers/input/misc/keyspan_remote.c b/drivers/input/misc/keyspan_remote.c
index 1bffc9fa98c2..fd74347047dd 100644
--- a/drivers/input/misc/keyspan_remote.c
+++ b/drivers/input/misc/keyspan_remote.c
@@ -497,7 +497,7 @@ static int keyspan_probe(struct usb_interface *interface, const struct usb_devic
497 usb_to_input_id(udev, &input_dev->id); 497 usb_to_input_id(udev, &input_dev->id);
498 input_dev->dev.parent = &interface->dev; 498 input_dev->dev.parent = &interface->dev;
499 499
500 input_dev->evbit[0] = BIT(EV_KEY); /* We will only report KEY events. */ 500 input_dev->evbit[0] = BIT_MASK(EV_KEY); /* We will only report KEY events. */
501 for (i = 0; i < ARRAY_SIZE(keyspan_key_table); i++) 501 for (i = 0; i < ARRAY_SIZE(keyspan_key_table); i++)
502 if (keyspan_key_table[i] != KEY_RESERVED) 502 if (keyspan_key_table[i] != KEY_RESERVED)
503 set_bit(keyspan_key_table[i], input_dev->keybit); 503 set_bit(keyspan_key_table[i], input_dev->keybit);
diff --git a/drivers/input/misc/m68kspkr.c b/drivers/input/misc/m68kspkr.c
index e9f26e766b4d..0c64d9bb718e 100644
--- a/drivers/input/misc/m68kspkr.c
+++ b/drivers/input/misc/m68kspkr.c
@@ -65,8 +65,8 @@ static int __devinit m68kspkr_probe(struct platform_device *dev)
65 input_dev->id.version = 0x0100; 65 input_dev->id.version = 0x0100;
66 input_dev->dev.parent = &dev->dev; 66 input_dev->dev.parent = &dev->dev;
67 67
68 input_dev->evbit[0] = BIT(EV_SND); 68 input_dev->evbit[0] = BIT_MASK(EV_SND);
69 input_dev->sndbit[0] = BIT(SND_BELL) | BIT(SND_TONE); 69 input_dev->sndbit[0] = BIT_MASK(SND_BELL) | BIT_MASK(SND_TONE);
70 input_dev->event = m68kspkr_event; 70 input_dev->event = m68kspkr_event;
71 71
72 err = input_register_device(input_dev); 72 err = input_register_device(input_dev);
diff --git a/drivers/input/misc/pcspkr.c b/drivers/input/misc/pcspkr.c
index c19f77fbaf2a..4941a9e61e90 100644
--- a/drivers/input/misc/pcspkr.c
+++ b/drivers/input/misc/pcspkr.c
@@ -86,8 +86,8 @@ static int __devinit pcspkr_probe(struct platform_device *dev)
86 pcspkr_dev->id.version = 0x0100; 86 pcspkr_dev->id.version = 0x0100;
87 pcspkr_dev->dev.parent = &dev->dev; 87 pcspkr_dev->dev.parent = &dev->dev;
88 88
89 pcspkr_dev->evbit[0] = BIT(EV_SND); 89 pcspkr_dev->evbit[0] = BIT_MASK(EV_SND);
90 pcspkr_dev->sndbit[0] = BIT(SND_BELL) | BIT(SND_TONE); 90 pcspkr_dev->sndbit[0] = BIT_MASK(SND_BELL) | BIT_MASK(SND_TONE);
91 pcspkr_dev->event = pcspkr_event; 91 pcspkr_dev->event = pcspkr_event;
92 92
93 err = input_register_device(pcspkr_dev); 93 err = input_register_device(pcspkr_dev);
diff --git a/drivers/input/misc/powermate.c b/drivers/input/misc/powermate.c
index 448a470d28f2..7a7b8c7b9633 100644
--- a/drivers/input/misc/powermate.c
+++ b/drivers/input/misc/powermate.c
@@ -363,10 +363,11 @@ static int powermate_probe(struct usb_interface *intf, const struct usb_device_i
363 363
364 input_dev->event = powermate_input_event; 364 input_dev->event = powermate_input_event;
365 365
366 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_MSC); 366 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL) |
367 input_dev->keybit[LONG(BTN_0)] = BIT(BTN_0); 367 BIT_MASK(EV_MSC);
368 input_dev->relbit[LONG(REL_DIAL)] = BIT(REL_DIAL); 368 input_dev->keybit[BIT_WORD(BTN_0)] = BIT_MASK(BTN_0);
369 input_dev->mscbit[LONG(MSC_PULSELED)] = BIT(MSC_PULSELED); 369 input_dev->relbit[BIT_WORD(REL_DIAL)] = BIT_MASK(REL_DIAL);
370 input_dev->mscbit[BIT_WORD(MSC_PULSELED)] = BIT_MASK(MSC_PULSELED);
370 371
371 /* get a handle to the interrupt data pipe */ 372 /* get a handle to the interrupt data pipe */
372 pipe = usb_rcvintpipe(udev, endpoint->bEndpointAddress); 373 pipe = usb_rcvintpipe(udev, endpoint->bEndpointAddress);
diff --git a/drivers/input/misc/sparcspkr.c b/drivers/input/misc/sparcspkr.c
index e36ec1d92be8..a3637d870880 100644
--- a/drivers/input/misc/sparcspkr.c
+++ b/drivers/input/misc/sparcspkr.c
@@ -115,8 +115,8 @@ static int __devinit sparcspkr_probe(struct device *dev)
115 input_dev->id.version = 0x0100; 115 input_dev->id.version = 0x0100;
116 input_dev->dev.parent = dev; 116 input_dev->dev.parent = dev;
117 117
118 input_dev->evbit[0] = BIT(EV_SND); 118 input_dev->evbit[0] = BIT_MASK(EV_SND);
119 input_dev->sndbit[0] = BIT(SND_BELL) | BIT(SND_TONE); 119 input_dev->sndbit[0] = BIT_MASK(SND_BELL) | BIT_MASK(SND_TONE);
120 120
121 input_dev->event = state->event; 121 input_dev->event = state->event;
122 122
diff --git a/drivers/input/misc/yealink.c b/drivers/input/misc/yealink.c
index ab15880fd566..46279ef2b649 100644
--- a/drivers/input/misc/yealink.c
+++ b/drivers/input/misc/yealink.c
@@ -945,7 +945,7 @@ static int usb_probe(struct usb_interface *intf, const struct usb_device_id *id)
945 /* input_dev->event = input_ev; TODO */ 945 /* input_dev->event = input_ev; TODO */
946 946
947 /* register available key events */ 947 /* register available key events */
948 input_dev->evbit[0] = BIT(EV_KEY); 948 input_dev->evbit[0] = BIT_MASK(EV_KEY);
949 for (i = 0; i < 256; i++) { 949 for (i = 0; i < 256; i++) {
950 int k = map_p1k_to_key(i); 950 int k = map_p1k_to_key(i);
951 if (k >= 0) { 951 if (k >= 0) {
diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index 64d70a9b714c..2b5ed119c9a9 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -455,24 +455,25 @@ int alps_init(struct psmouse *psmouse)
455 if (alps_hw_init(psmouse, &version)) 455 if (alps_hw_init(psmouse, &version))
456 goto init_fail; 456 goto init_fail;
457 457
458 dev1->evbit[LONG(EV_KEY)] |= BIT(EV_KEY); 458 dev1->evbit[BIT_WORD(EV_KEY)] |= BIT_MASK(EV_KEY);
459 dev1->keybit[LONG(BTN_TOUCH)] |= BIT(BTN_TOUCH); 459 dev1->keybit[BIT_WORD(BTN_TOUCH)] |= BIT_MASK(BTN_TOUCH);
460 dev1->keybit[LONG(BTN_TOOL_FINGER)] |= BIT(BTN_TOOL_FINGER); 460 dev1->keybit[BIT_WORD(BTN_TOOL_FINGER)] |= BIT_MASK(BTN_TOOL_FINGER);
461 dev1->keybit[LONG(BTN_LEFT)] |= BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT); 461 dev1->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_LEFT) |
462 BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
462 463
463 dev1->evbit[LONG(EV_ABS)] |= BIT(EV_ABS); 464 dev1->evbit[BIT_WORD(EV_ABS)] |= BIT_MASK(EV_ABS);
464 input_set_abs_params(dev1, ABS_X, 0, 1023, 0, 0); 465 input_set_abs_params(dev1, ABS_X, 0, 1023, 0, 0);
465 input_set_abs_params(dev1, ABS_Y, 0, 767, 0, 0); 466 input_set_abs_params(dev1, ABS_Y, 0, 767, 0, 0);
466 input_set_abs_params(dev1, ABS_PRESSURE, 0, 127, 0, 0); 467 input_set_abs_params(dev1, ABS_PRESSURE, 0, 127, 0, 0);
467 468
468 if (priv->i->flags & ALPS_WHEEL) { 469 if (priv->i->flags & ALPS_WHEEL) {
469 dev1->evbit[LONG(EV_REL)] |= BIT(EV_REL); 470 dev1->evbit[BIT_WORD(EV_REL)] |= BIT_MASK(EV_REL);
470 dev1->relbit[LONG(REL_WHEEL)] |= BIT(REL_WHEEL); 471 dev1->relbit[BIT_WORD(REL_WHEEL)] |= BIT_MASK(REL_WHEEL);
471 } 472 }
472 473
473 if (priv->i->flags & (ALPS_FW_BK_1 | ALPS_FW_BK_2)) { 474 if (priv->i->flags & (ALPS_FW_BK_1 | ALPS_FW_BK_2)) {
474 dev1->keybit[LONG(BTN_FORWARD)] |= BIT(BTN_FORWARD); 475 dev1->keybit[BIT_WORD(BTN_FORWARD)] |= BIT_MASK(BTN_FORWARD);
475 dev1->keybit[LONG(BTN_BACK)] |= BIT(BTN_BACK); 476 dev1->keybit[BIT_WORD(BTN_BACK)] |= BIT_MASK(BTN_BACK);
476 } 477 }
477 478
478 snprintf(priv->phys, sizeof(priv->phys), "%s/input1", psmouse->ps2dev.serio->phys); 479 snprintf(priv->phys, sizeof(priv->phys), "%s/input1", psmouse->ps2dev.serio->phys);
@@ -483,9 +484,10 @@ int alps_init(struct psmouse *psmouse)
483 dev2->id.product = PSMOUSE_ALPS; 484 dev2->id.product = PSMOUSE_ALPS;
484 dev2->id.version = 0x0000; 485 dev2->id.version = 0x0000;
485 486
486 dev2->evbit[0] = BIT(EV_KEY) | BIT(EV_REL); 487 dev2->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
487 dev2->relbit[LONG(REL_X)] |= BIT(REL_X) | BIT(REL_Y); 488 dev2->relbit[BIT_WORD(REL_X)] |= BIT_MASK(REL_X) | BIT_MASK(REL_Y);
488 dev2->keybit[LONG(BTN_LEFT)] |= BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT); 489 dev2->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_LEFT) |
490 BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
489 491
490 if (input_register_device(priv->dev2)) 492 if (input_register_device(priv->dev2))
491 goto init_fail; 493 goto init_fail;
diff --git a/drivers/input/mouse/amimouse.c b/drivers/input/mouse/amimouse.c
index 239a0e16d91a..a185ac78a42c 100644
--- a/drivers/input/mouse/amimouse.c
+++ b/drivers/input/mouse/amimouse.c
@@ -111,9 +111,10 @@ static int __init amimouse_init(void)
111 amimouse_dev->id.product = 0x0002; 111 amimouse_dev->id.product = 0x0002;
112 amimouse_dev->id.version = 0x0100; 112 amimouse_dev->id.version = 0x0100;
113 113
114 amimouse_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL); 114 amimouse_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
115 amimouse_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y); 115 amimouse_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
116 amimouse_dev->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT); 116 amimouse_dev->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
117 BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
117 amimouse_dev->open = amimouse_open; 118 amimouse_dev->open = amimouse_open;
118 amimouse_dev->close = amimouse_close; 119 amimouse_dev->close = amimouse_close;
119 120
diff --git a/drivers/input/mouse/atarimouse.c b/drivers/input/mouse/atarimouse.c
index c8c7244b48a1..98a3561d4b05 100644
--- a/drivers/input/mouse/atarimouse.c
+++ b/drivers/input/mouse/atarimouse.c
@@ -137,9 +137,10 @@ static int __init atamouse_init(void)
137 atamouse_dev->id.product = 0x0002; 137 atamouse_dev->id.product = 0x0002;
138 atamouse_dev->id.version = 0x0100; 138 atamouse_dev->id.version = 0x0100;
139 139
140 atamouse_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL); 140 atamouse_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
141 atamouse_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y); 141 atamouse_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
142 atamouse_dev->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT); 142 atamouse_dev->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
143 BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
143 atamouse_dev->open = atamouse_open; 144 atamouse_dev->open = atamouse_open;
144 atamouse_dev->close = atamouse_close; 145 atamouse_dev->close = atamouse_close;
145 146
diff --git a/drivers/input/mouse/hil_ptr.c b/drivers/input/mouse/hil_ptr.c
index 449bf4dcbbcc..27f88fbb7136 100644
--- a/drivers/input/mouse/hil_ptr.c
+++ b/drivers/input/mouse/hil_ptr.c
@@ -298,12 +298,12 @@ static int hil_ptr_connect(struct serio *serio, struct serio_driver *driver)
298 idd = ptr->idd + 1; 298 idd = ptr->idd + 1;
299 txt = "unknown"; 299 txt = "unknown";
300 if ((did & HIL_IDD_DID_TYPE_MASK) == HIL_IDD_DID_TYPE_REL) { 300 if ((did & HIL_IDD_DID_TYPE_MASK) == HIL_IDD_DID_TYPE_REL) {
301 ptr->dev->evbit[0] = BIT(EV_REL); 301 ptr->dev->evbit[0] = BIT_MASK(EV_REL);
302 txt = "relative"; 302 txt = "relative";
303 } 303 }
304 304
305 if ((did & HIL_IDD_DID_TYPE_MASK) == HIL_IDD_DID_TYPE_ABS) { 305 if ((did & HIL_IDD_DID_TYPE_MASK) == HIL_IDD_DID_TYPE_ABS) {
306 ptr->dev->evbit[0] = BIT(EV_ABS); 306 ptr->dev->evbit[0] = BIT_MASK(EV_ABS);
307 txt = "absolute"; 307 txt = "absolute";
308 } 308 }
309 if (!ptr->dev->evbit[0]) 309 if (!ptr->dev->evbit[0])
@@ -311,7 +311,7 @@ static int hil_ptr_connect(struct serio *serio, struct serio_driver *driver)
311 311
312 ptr->nbtn = HIL_IDD_NUM_BUTTONS(idd); 312 ptr->nbtn = HIL_IDD_NUM_BUTTONS(idd);
313 if (ptr->nbtn) 313 if (ptr->nbtn)
314 ptr->dev->evbit[0] |= BIT(EV_KEY); 314 ptr->dev->evbit[0] |= BIT_MASK(EV_KEY);
315 315
316 naxsets = HIL_IDD_NUM_AXSETS(*idd); 316 naxsets = HIL_IDD_NUM_AXSETS(*idd);
317 ptr->naxes = HIL_IDD_NUM_AXES_PER_SET(*idd); 317 ptr->naxes = HIL_IDD_NUM_AXES_PER_SET(*idd);
diff --git a/drivers/input/mouse/inport.c b/drivers/input/mouse/inport.c
index 79b624fe8994..655a39217432 100644
--- a/drivers/input/mouse/inport.c
+++ b/drivers/input/mouse/inport.c
@@ -163,9 +163,10 @@ static int __init inport_init(void)
163 inport_dev->id.product = 0x0001; 163 inport_dev->id.product = 0x0001;
164 inport_dev->id.version = 0x0100; 164 inport_dev->id.version = 0x0100;
165 165
166 inport_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL); 166 inport_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
167 inport_dev->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT); 167 inport_dev->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
168 inport_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y); 168 BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
169 inport_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
169 170
170 inport_dev->open = inport_open; 171 inport_dev->open = inport_open;
171 inport_dev->close = inport_close; 172 inport_dev->close = inport_close;
diff --git a/drivers/input/mouse/lifebook.c b/drivers/input/mouse/lifebook.c
index d7de4c53b3d8..9ec57d80186e 100644
--- a/drivers/input/mouse/lifebook.c
+++ b/drivers/input/mouse/lifebook.c
@@ -270,9 +270,10 @@ static int lifebook_create_relative_device(struct psmouse *psmouse)
270 dev2->id.version = 0x0000; 270 dev2->id.version = 0x0000;
271 dev2->dev.parent = &psmouse->ps2dev.serio->dev; 271 dev2->dev.parent = &psmouse->ps2dev.serio->dev;
272 272
273 dev2->evbit[0] = BIT(EV_KEY) | BIT(EV_REL); 273 dev2->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
274 dev2->relbit[LONG(REL_X)] = BIT(REL_X) | BIT(REL_Y); 274 dev2->relbit[BIT_WORD(REL_X)] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
275 dev2->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_RIGHT); 275 dev2->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
276 BIT_MASK(BTN_RIGHT);
276 277
277 error = input_register_device(priv->dev2); 278 error = input_register_device(priv->dev2);
278 if (error) 279 if (error)
@@ -295,9 +296,9 @@ int lifebook_init(struct psmouse *psmouse)
295 if (lifebook_absolute_mode(psmouse)) 296 if (lifebook_absolute_mode(psmouse))
296 return -1; 297 return -1;
297 298
298 dev1->evbit[0] = BIT(EV_ABS) | BIT(EV_KEY); 299 dev1->evbit[0] = BIT_MASK(EV_ABS) | BIT_MASK(EV_KEY);
299 dev1->relbit[0] = 0; 300 dev1->relbit[0] = 0;
300 dev1->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH); 301 dev1->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
301 input_set_abs_params(dev1, ABS_X, 0, max_coord, 0, 0); 302 input_set_abs_params(dev1, ABS_X, 0, max_coord, 0, 0);
302 input_set_abs_params(dev1, ABS_Y, 0, max_coord, 0, 0); 303 input_set_abs_params(dev1, ABS_Y, 0, max_coord, 0, 0);
303 304
diff --git a/drivers/input/mouse/logibm.c b/drivers/input/mouse/logibm.c
index 26c3b2e2ca94..b23a4f3ea5cd 100644
--- a/drivers/input/mouse/logibm.c
+++ b/drivers/input/mouse/logibm.c
@@ -156,9 +156,10 @@ static int __init logibm_init(void)
156 logibm_dev->id.product = 0x0001; 156 logibm_dev->id.product = 0x0001;
157 logibm_dev->id.version = 0x0100; 157 logibm_dev->id.version = 0x0100;
158 158
159 logibm_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL); 159 logibm_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
160 logibm_dev->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT); 160 logibm_dev->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
161 logibm_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y); 161 BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
162 logibm_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
162 163
163 logibm_dev->open = logibm_open; 164 logibm_dev->open = logibm_open;
164 logibm_dev->close = logibm_close; 165 logibm_dev->close = logibm_close;
diff --git a/drivers/input/mouse/pc110pad.c b/drivers/input/mouse/pc110pad.c
index 05d992e514f0..8991ab0b4fe3 100644
--- a/drivers/input/mouse/pc110pad.c
+++ b/drivers/input/mouse/pc110pad.c
@@ -144,9 +144,9 @@ static int __init pc110pad_init(void)
144 pc110pad_dev->id.product = 0x0001; 144 pc110pad_dev->id.product = 0x0001;
145 pc110pad_dev->id.version = 0x0100; 145 pc110pad_dev->id.version = 0x0100;
146 146
147 pc110pad_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 147 pc110pad_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
148 pc110pad_dev->absbit[0] = BIT(ABS_X) | BIT(ABS_Y); 148 pc110pad_dev->absbit[0] = BIT_MASK(ABS_X) | BIT_MASK(ABS_Y);
149 pc110pad_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH); 149 pc110pad_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
150 150
151 pc110pad_dev->absmax[ABS_X] = 0x1ff; 151 pc110pad_dev->absmax[ABS_X] = 0x1ff;
152 pc110pad_dev->absmax[ABS_Y] = 0x0ff; 152 pc110pad_dev->absmax[ABS_Y] = 0x0ff;
diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index 073525756532..da316d13d7f5 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c
@@ -1115,9 +1115,10 @@ static int psmouse_switch_protocol(struct psmouse *psmouse, const struct psmouse
1115 1115
1116 input_dev->dev.parent = &psmouse->ps2dev.serio->dev; 1116 input_dev->dev.parent = &psmouse->ps2dev.serio->dev;
1117 1117
1118 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL); 1118 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
1119 input_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT); 1119 input_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
1120 input_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y); 1120 BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
1121 input_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
1121 1122
1122 psmouse->set_rate = psmouse_set_rate; 1123 psmouse->set_rate = psmouse_set_rate;
1123 psmouse->set_resolution = psmouse_set_resolution; 1124 psmouse->set_resolution = psmouse_set_resolution;
diff --git a/drivers/input/mouse/rpcmouse.c b/drivers/input/mouse/rpcmouse.c
index 355efd0423e7..18a48636ba4a 100644
--- a/drivers/input/mouse/rpcmouse.c
+++ b/drivers/input/mouse/rpcmouse.c
@@ -78,9 +78,10 @@ static int __init rpcmouse_init(void)
78 rpcmouse_dev->id.product = 0x0001; 78 rpcmouse_dev->id.product = 0x0001;
79 rpcmouse_dev->id.version = 0x0100; 79 rpcmouse_dev->id.version = 0x0100;
80 80
81 rpcmouse_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL); 81 rpcmouse_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
82 rpcmouse_dev->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT); 82 rpcmouse_dev->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
83 rpcmouse_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y); 83 BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
84 rpcmouse_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
84 85
85 rpcmouse_lastx = (short) iomd_readl(IOMD_MOUSEX); 86 rpcmouse_lastx = (short) iomd_readl(IOMD_MOUSEX);
86 rpcmouse_lasty = (short) iomd_readl(IOMD_MOUSEY); 87 rpcmouse_lasty = (short) iomd_readl(IOMD_MOUSEY);
diff --git a/drivers/input/mouse/sermouse.c b/drivers/input/mouse/sermouse.c
index 77b8ee2b9651..ed917bfd086a 100644
--- a/drivers/input/mouse/sermouse.c
+++ b/drivers/input/mouse/sermouse.c
@@ -268,9 +268,10 @@ static int sermouse_connect(struct serio *serio, struct serio_driver *drv)
268 input_dev->id.version = 0x0100; 268 input_dev->id.version = 0x0100;
269 input_dev->dev.parent = &serio->dev; 269 input_dev->dev.parent = &serio->dev;
270 270
271 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL); 271 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
272 input_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_RIGHT); 272 input_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
273 input_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y); 273 BIT_MASK(BTN_RIGHT);
274 input_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
274 275
275 if (c & 0x01) set_bit(BTN_MIDDLE, input_dev->keybit); 276 if (c & 0x01) set_bit(BTN_MIDDLE, input_dev->keybit);
276 if (c & 0x02) set_bit(BTN_SIDE, input_dev->keybit); 277 if (c & 0x02) set_bit(BTN_SIDE, input_dev->keybit);
diff --git a/drivers/input/mouse/touchkit_ps2.c b/drivers/input/mouse/touchkit_ps2.c
index 7b977fd23571..3fadb2accac0 100644
--- a/drivers/input/mouse/touchkit_ps2.c
+++ b/drivers/input/mouse/touchkit_ps2.c
@@ -85,7 +85,7 @@ int touchkit_ps2_detect(struct psmouse *psmouse, int set_properties)
85 return -ENODEV; 85 return -ENODEV;
86 86
87 if (set_properties) { 87 if (set_properties) {
88 dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 88 dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
89 set_bit(BTN_TOUCH, dev->keybit); 89 set_bit(BTN_TOUCH, dev->keybit);
90 input_set_abs_params(dev, ABS_X, 0, TOUCHKIT_MAX_XC, 0, 0); 90 input_set_abs_params(dev, ABS_X, 0, TOUCHKIT_MAX_XC, 0, 0);
91 input_set_abs_params(dev, ABS_Y, 0, TOUCHKIT_MAX_YC, 0, 0); 91 input_set_abs_params(dev, ABS_Y, 0, TOUCHKIT_MAX_YC, 0, 0);
diff --git a/drivers/input/mousedev.c b/drivers/input/mousedev.c
index 79146d6ed2ab..78c3ea75da2a 100644
--- a/drivers/input/mousedev.c
+++ b/drivers/input/mousedev.c
@@ -998,34 +998,36 @@ static const struct input_device_id mousedev_ids[] = {
998 .flags = INPUT_DEVICE_ID_MATCH_EVBIT | 998 .flags = INPUT_DEVICE_ID_MATCH_EVBIT |
999 INPUT_DEVICE_ID_MATCH_KEYBIT | 999 INPUT_DEVICE_ID_MATCH_KEYBIT |
1000 INPUT_DEVICE_ID_MATCH_RELBIT, 1000 INPUT_DEVICE_ID_MATCH_RELBIT,
1001 .evbit = { BIT(EV_KEY) | BIT(EV_REL) }, 1001 .evbit = { BIT_MASK(EV_KEY) | BIT_MASK(EV_REL) },
1002 .keybit = { [LONG(BTN_LEFT)] = BIT(BTN_LEFT) }, 1002 .keybit = { [BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) },
1003 .relbit = { BIT(REL_X) | BIT(REL_Y) }, 1003 .relbit = { BIT_MASK(REL_X) | BIT_MASK(REL_Y) },
1004 }, /* A mouse like device, at least one button, 1004 }, /* A mouse like device, at least one button,
1005 two relative axes */ 1005 two relative axes */
1006 { 1006 {
1007 .flags = INPUT_DEVICE_ID_MATCH_EVBIT | 1007 .flags = INPUT_DEVICE_ID_MATCH_EVBIT |
1008 INPUT_DEVICE_ID_MATCH_RELBIT, 1008 INPUT_DEVICE_ID_MATCH_RELBIT,
1009 .evbit = { BIT(EV_KEY) | BIT(EV_REL) }, 1009 .evbit = { BIT_MASK(EV_KEY) | BIT_MASK(EV_REL) },
1010 .relbit = { BIT(REL_WHEEL) }, 1010 .relbit = { BIT_MASK(REL_WHEEL) },
1011 }, /* A separate scrollwheel */ 1011 }, /* A separate scrollwheel */
1012 { 1012 {
1013 .flags = INPUT_DEVICE_ID_MATCH_EVBIT | 1013 .flags = INPUT_DEVICE_ID_MATCH_EVBIT |
1014 INPUT_DEVICE_ID_MATCH_KEYBIT | 1014 INPUT_DEVICE_ID_MATCH_KEYBIT |
1015 INPUT_DEVICE_ID_MATCH_ABSBIT, 1015 INPUT_DEVICE_ID_MATCH_ABSBIT,
1016 .evbit = { BIT(EV_KEY) | BIT(EV_ABS) }, 1016 .evbit = { BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) },
1017 .keybit = { [LONG(BTN_TOUCH)] = BIT(BTN_TOUCH) }, 1017 .keybit = { [BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH) },
1018 .absbit = { BIT(ABS_X) | BIT(ABS_Y) }, 1018 .absbit = { BIT_MASK(ABS_X) | BIT_MASK(ABS_Y) },
1019 }, /* A tablet like device, at least touch detection, 1019 }, /* A tablet like device, at least touch detection,
1020 two absolute axes */ 1020 two absolute axes */
1021 { 1021 {
1022 .flags = INPUT_DEVICE_ID_MATCH_EVBIT | 1022 .flags = INPUT_DEVICE_ID_MATCH_EVBIT |
1023 INPUT_DEVICE_ID_MATCH_KEYBIT | 1023 INPUT_DEVICE_ID_MATCH_KEYBIT |
1024 INPUT_DEVICE_ID_MATCH_ABSBIT, 1024 INPUT_DEVICE_ID_MATCH_ABSBIT,
1025 .evbit = { BIT(EV_KEY) | BIT(EV_ABS) }, 1025 .evbit = { BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) },
1026 .keybit = { [LONG(BTN_TOOL_FINGER)] = BIT(BTN_TOOL_FINGER) }, 1026 .keybit = { [BIT_WORD(BTN_TOOL_FINGER)] =
1027 .absbit = { BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_PRESSURE) | 1027 BIT_MASK(BTN_TOOL_FINGER) },
1028 BIT(ABS_TOOL_WIDTH) }, 1028 .absbit = { BIT_MASK(ABS_X) | BIT_MASK(ABS_Y) |
1029 BIT_MASK(ABS_PRESSURE) |
1030 BIT_MASK(ABS_TOOL_WIDTH) },
1029 }, /* A touchpad */ 1031 }, /* A touchpad */
1030 1032
1031 { }, /* Terminating entry */ 1033 { }, /* Terminating entry */
diff --git a/drivers/input/tablet/acecad.c b/drivers/input/tablet/acecad.c
index dd2310458c46..b973d0ef6d16 100644
--- a/drivers/input/tablet/acecad.c
+++ b/drivers/input/tablet/acecad.c
@@ -192,10 +192,14 @@ static int usb_acecad_probe(struct usb_interface *intf, const struct usb_device_
192 input_dev->open = usb_acecad_open; 192 input_dev->open = usb_acecad_open;
193 input_dev->close = usb_acecad_close; 193 input_dev->close = usb_acecad_close;
194 194
195 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 195 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
196 input_dev->absbit[0] = BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_PRESSURE); 196 input_dev->absbit[0] = BIT_MASK(ABS_X) | BIT_MASK(ABS_Y) |
197 input_dev->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_RIGHT) | BIT(BTN_MIDDLE); 197 BIT_MASK(ABS_PRESSURE);
198 input_dev->keybit[LONG(BTN_DIGI)] = BIT(BTN_TOOL_PEN) |BIT(BTN_TOUCH) | BIT(BTN_STYLUS) | BIT(BTN_STYLUS2); 198 input_dev->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
199 BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
200 input_dev->keybit[BIT_WORD(BTN_DIGI)] = BIT_MASK(BTN_TOOL_PEN) |
201 BIT_MASK(BTN_TOUCH) | BIT_MASK(BTN_STYLUS) |
202 BIT_MASK(BTN_STYLUS2);
199 203
200 switch (id->driver_info) { 204 switch (id->driver_info) {
201 case 0: 205 case 0:
diff --git a/drivers/input/tablet/gtco.c b/drivers/input/tablet/gtco.c
index b2ca10f2fe0e..d2c6da264722 100644
--- a/drivers/input/tablet/gtco.c
+++ b/drivers/input/tablet/gtco.c
@@ -573,10 +573,12 @@ static void gtco_setup_caps(struct input_dev *inputdev)
573 struct gtco *device = input_get_drvdata(inputdev); 573 struct gtco *device = input_get_drvdata(inputdev);
574 574
575 /* Which events */ 575 /* Which events */
576 inputdev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS) | BIT(EV_MSC); 576 inputdev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) |
577 BIT_MASK(EV_MSC);
577 578
578 /* Misc event menu block */ 579 /* Misc event menu block */
579 inputdev->mscbit[0] = BIT(MSC_SCAN)|BIT(MSC_SERIAL)|BIT(MSC_RAW) ; 580 inputdev->mscbit[0] = BIT_MASK(MSC_SCAN) | BIT_MASK(MSC_SERIAL) |
581 BIT_MASK(MSC_RAW);
580 582
581 /* Absolute values based on HID report info */ 583 /* Absolute values based on HID report info */
582 input_set_abs_params(inputdev, ABS_X, device->min_X, device->max_X, 584 input_set_abs_params(inputdev, ABS_X, device->min_X, device->max_X,
diff --git a/drivers/input/tablet/kbtab.c b/drivers/input/tablet/kbtab.c
index 91e6d00d4a43..1182fc133167 100644
--- a/drivers/input/tablet/kbtab.c
+++ b/drivers/input/tablet/kbtab.c
@@ -153,10 +153,13 @@ static int kbtab_probe(struct usb_interface *intf, const struct usb_device_id *i
153 input_dev->open = kbtab_open; 153 input_dev->open = kbtab_open;
154 input_dev->close = kbtab_close; 154 input_dev->close = kbtab_close;
155 155
156 input_dev->evbit[0] |= BIT(EV_KEY) | BIT(EV_ABS) | BIT(EV_MSC); 156 input_dev->evbit[0] |= BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) |
157 input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_LEFT) | BIT(BTN_RIGHT) | BIT(BTN_MIDDLE); 157 BIT_MASK(EV_MSC);
158 input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_PEN) | BIT(BTN_TOUCH); 158 input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_LEFT) |
159 input_dev->mscbit[0] |= BIT(MSC_SERIAL); 159 BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
160 input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_PEN) |
161 BIT_MASK(BTN_TOUCH);
162 input_dev->mscbit[0] |= BIT_MASK(MSC_SERIAL);
160 input_set_abs_params(input_dev, ABS_X, 0, 0x2000, 4, 0); 163 input_set_abs_params(input_dev, ABS_X, 0, 0x2000, 4, 0);
161 input_set_abs_params(input_dev, ABS_Y, 0, 0x1750, 4, 0); 164 input_set_abs_params(input_dev, ABS_Y, 0, 0x1750, 4, 0);
162 input_set_abs_params(input_dev, ABS_PRESSURE, 0, 0xff, 0, 0); 165 input_set_abs_params(input_dev, ABS_PRESSURE, 0, 0xff, 0, 0);
diff --git a/drivers/input/tablet/wacom_sys.c b/drivers/input/tablet/wacom_sys.c
index 064e123c9b76..d64b1ea136b3 100644
--- a/drivers/input/tablet/wacom_sys.c
+++ b/drivers/input/tablet/wacom_sys.c
@@ -140,48 +140,58 @@ static void wacom_close(struct input_dev *dev)
140 140
141void input_dev_mo(struct input_dev *input_dev, struct wacom_wac *wacom_wac) 141void input_dev_mo(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
142{ 142{
143 input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_1) | BIT(BTN_5); 143 input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_1) |
144 BIT_MASK(BTN_5);
144 input_set_abs_params(input_dev, ABS_WHEEL, 0, 71, 0, 0); 145 input_set_abs_params(input_dev, ABS_WHEEL, 0, 71, 0, 0);
145} 146}
146 147
147void input_dev_g4(struct input_dev *input_dev, struct wacom_wac *wacom_wac) 148void input_dev_g4(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
148{ 149{
149 input_dev->evbit[0] |= BIT(EV_MSC); 150 input_dev->evbit[0] |= BIT_MASK(EV_MSC);
150 input_dev->mscbit[0] |= BIT(MSC_SERIAL); 151 input_dev->mscbit[0] |= BIT_MASK(MSC_SERIAL);
151 input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_FINGER); 152 input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_FINGER);
152 input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_0) | BIT(BTN_4); 153 input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_0) |
154 BIT_MASK(BTN_4);
153} 155}
154 156
155void input_dev_g(struct input_dev *input_dev, struct wacom_wac *wacom_wac) 157void input_dev_g(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
156{ 158{
157 input_dev->evbit[0] |= BIT(EV_REL); 159 input_dev->evbit[0] |= BIT_MASK(EV_REL);
158 input_dev->relbit[0] |= BIT(REL_WHEEL); 160 input_dev->relbit[0] |= BIT_MASK(REL_WHEEL);
159 input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_LEFT) | BIT(BTN_RIGHT) | BIT(BTN_MIDDLE); 161 input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_LEFT) |
160 input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_RUBBER) | BIT(BTN_TOOL_MOUSE) | BIT(BTN_STYLUS2); 162 BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
163 input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_RUBBER) |
164 BIT_MASK(BTN_TOOL_MOUSE) | BIT_MASK(BTN_STYLUS2);
161 input_set_abs_params(input_dev, ABS_DISTANCE, 0, wacom_wac->features->distance_max, 0, 0); 165 input_set_abs_params(input_dev, ABS_DISTANCE, 0, wacom_wac->features->distance_max, 0, 0);
162} 166}
163 167
164void input_dev_i3s(struct input_dev *input_dev, struct wacom_wac *wacom_wac) 168void input_dev_i3s(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
165{ 169{
166 input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_FINGER); 170 input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_FINGER);
167 input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_0) | BIT(BTN_1) | BIT(BTN_2) | BIT(BTN_3); 171 input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_0) |
172 BIT_MASK(BTN_1) | BIT_MASK(BTN_2) | BIT_MASK(BTN_3);
168 input_set_abs_params(input_dev, ABS_RX, 0, 4096, 0, 0); 173 input_set_abs_params(input_dev, ABS_RX, 0, 4096, 0, 0);
169} 174}
170 175
171void input_dev_i3(struct input_dev *input_dev, struct wacom_wac *wacom_wac) 176void input_dev_i3(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
172{ 177{
173 input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_4) | BIT(BTN_5) | BIT(BTN_6) | BIT(BTN_7); 178 input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_4) |
179 BIT_MASK(BTN_5) | BIT_MASK(BTN_6) | BIT_MASK(BTN_7);
174 input_set_abs_params(input_dev, ABS_RY, 0, 4096, 0, 0); 180 input_set_abs_params(input_dev, ABS_RY, 0, 4096, 0, 0);
175} 181}
176 182
177void input_dev_i(struct input_dev *input_dev, struct wacom_wac *wacom_wac) 183void input_dev_i(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
178{ 184{
179 input_dev->evbit[0] |= BIT(EV_MSC) | BIT(EV_REL); 185 input_dev->evbit[0] |= BIT_MASK(EV_MSC) | BIT_MASK(EV_REL);
180 input_dev->mscbit[0] |= BIT(MSC_SERIAL); 186 input_dev->mscbit[0] |= BIT_MASK(MSC_SERIAL);
181 input_dev->relbit[0] |= BIT(REL_WHEEL); 187 input_dev->relbit[0] |= BIT_MASK(REL_WHEEL);
182 input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_LEFT) | BIT(BTN_RIGHT) | BIT(BTN_MIDDLE) | BIT(BTN_SIDE) | BIT(BTN_EXTRA); 188 input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_LEFT) |
183 input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_RUBBER) | BIT(BTN_TOOL_MOUSE) | BIT(BTN_TOOL_BRUSH) 189 BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE) |
184 | BIT(BTN_TOOL_PENCIL) | BIT(BTN_TOOL_AIRBRUSH) | BIT(BTN_TOOL_LENS) | BIT(BTN_STYLUS2); 190 BIT_MASK(BTN_SIDE) | BIT_MASK(BTN_EXTRA);
191 input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_RUBBER) |
192 BIT_MASK(BTN_TOOL_MOUSE) | BIT_MASK(BTN_TOOL_BRUSH) |
193 BIT_MASK(BTN_TOOL_PENCIL) | BIT_MASK(BTN_TOOL_AIRBRUSH) |
194 BIT_MASK(BTN_TOOL_LENS) | BIT_MASK(BTN_STYLUS2);
185 input_set_abs_params(input_dev, ABS_DISTANCE, 0, wacom_wac->features->distance_max, 0, 0); 195 input_set_abs_params(input_dev, ABS_DISTANCE, 0, wacom_wac->features->distance_max, 0, 0);
186 input_set_abs_params(input_dev, ABS_WHEEL, 0, 1023, 0, 0); 196 input_set_abs_params(input_dev, ABS_WHEEL, 0, 1023, 0, 0);
187 input_set_abs_params(input_dev, ABS_TILT_X, 0, 127, 0, 0); 197 input_set_abs_params(input_dev, ABS_TILT_X, 0, 127, 0, 0);
@@ -192,12 +202,13 @@ void input_dev_i(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
192 202
193void input_dev_pl(struct input_dev *input_dev, struct wacom_wac *wacom_wac) 203void input_dev_pl(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
194{ 204{
195 input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_STYLUS2) | BIT(BTN_TOOL_RUBBER); 205 input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_STYLUS2) |
206 BIT_MASK(BTN_TOOL_RUBBER);
196} 207}
197 208
198void input_dev_pt(struct input_dev *input_dev, struct wacom_wac *wacom_wac) 209void input_dev_pt(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
199{ 210{
200 input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_RUBBER); 211 input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_RUBBER);
201} 212}
202 213
203static int wacom_probe(struct usb_interface *intf, const struct usb_device_id *id) 214static int wacom_probe(struct usb_interface *intf, const struct usb_device_id *id)
@@ -243,12 +254,13 @@ static int wacom_probe(struct usb_interface *intf, const struct usb_device_id *i
243 input_dev->open = wacom_open; 254 input_dev->open = wacom_open;
244 input_dev->close = wacom_close; 255 input_dev->close = wacom_close;
245 256
246 input_dev->evbit[0] |= BIT(EV_KEY) | BIT(EV_ABS); 257 input_dev->evbit[0] |= BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
247 input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_PEN) | BIT(BTN_TOUCH) | BIT(BTN_STYLUS); 258 input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_PEN) |
259 BIT_MASK(BTN_TOUCH) | BIT_MASK(BTN_STYLUS);
248 input_set_abs_params(input_dev, ABS_X, 0, wacom_wac->features->x_max, 4, 0); 260 input_set_abs_params(input_dev, ABS_X, 0, wacom_wac->features->x_max, 4, 0);
249 input_set_abs_params(input_dev, ABS_Y, 0, wacom_wac->features->y_max, 4, 0); 261 input_set_abs_params(input_dev, ABS_Y, 0, wacom_wac->features->y_max, 4, 0);
250 input_set_abs_params(input_dev, ABS_PRESSURE, 0, wacom_wac->features->pressure_max, 0, 0); 262 input_set_abs_params(input_dev, ABS_PRESSURE, 0, wacom_wac->features->pressure_max, 0, 0);
251 input_dev->absbit[LONG(ABS_MISC)] |= BIT(ABS_MISC); 263 input_dev->absbit[BIT_WORD(ABS_MISC)] |= BIT_MASK(ABS_MISC);
252 264
253 wacom_init_input_dev(input_dev, wacom_wac); 265 wacom_init_input_dev(input_dev, wacom_wac);
254 266
diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index 51ae4fb7d123..f59aecf5ec15 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -917,8 +917,8 @@ static int __devinit ads7846_probe(struct spi_device *spi)
917 input_dev->phys = ts->phys; 917 input_dev->phys = ts->phys;
918 input_dev->dev.parent = &spi->dev; 918 input_dev->dev.parent = &spi->dev;
919 919
920 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 920 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
921 input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH); 921 input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
922 input_set_abs_params(input_dev, ABS_X, 922 input_set_abs_params(input_dev, ABS_X,
923 pdata->x_min ? : 0, 923 pdata->x_min ? : 0,
924 pdata->x_max ? : MAX_12BIT, 924 pdata->x_max ? : MAX_12BIT,
diff --git a/drivers/input/touchscreen/corgi_ts.c b/drivers/input/touchscreen/corgi_ts.c
index e6a31d118786..b1b2e07bf080 100644
--- a/drivers/input/touchscreen/corgi_ts.c
+++ b/drivers/input/touchscreen/corgi_ts.c
@@ -302,8 +302,8 @@ static int __init corgits_probe(struct platform_device *pdev)
302 input_dev->id.version = 0x0100; 302 input_dev->id.version = 0x0100;
303 input_dev->dev.parent = &pdev->dev; 303 input_dev->dev.parent = &pdev->dev;
304 304
305 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 305 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
306 input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH); 306 input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
307 input_set_abs_params(input_dev, ABS_X, X_AXIS_MIN, X_AXIS_MAX, 0, 0); 307 input_set_abs_params(input_dev, ABS_X, X_AXIS_MIN, X_AXIS_MAX, 0, 0);
308 input_set_abs_params(input_dev, ABS_Y, Y_AXIS_MIN, Y_AXIS_MAX, 0, 0); 308 input_set_abs_params(input_dev, ABS_Y, Y_AXIS_MIN, Y_AXIS_MAX, 0, 0);
309 input_set_abs_params(input_dev, ABS_PRESSURE, PRESSURE_MIN, PRESSURE_MAX, 0, 0); 309 input_set_abs_params(input_dev, ABS_PRESSURE, PRESSURE_MIN, PRESSURE_MAX, 0, 0);
diff --git a/drivers/input/touchscreen/elo.c b/drivers/input/touchscreen/elo.c
index 557d781719f1..d20689cdbd5d 100644
--- a/drivers/input/touchscreen/elo.c
+++ b/drivers/input/touchscreen/elo.c
@@ -320,8 +320,8 @@ static int elo_connect(struct serio *serio, struct serio_driver *drv)
320 input_dev->id.version = 0x0100; 320 input_dev->id.version = 0x0100;
321 input_dev->dev.parent = &serio->dev; 321 input_dev->dev.parent = &serio->dev;
322 322
323 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 323 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
324 input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH); 324 input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
325 325
326 serio_set_drvdata(serio, elo); 326 serio_set_drvdata(serio, elo);
327 err = serio_open(serio, drv); 327 err = serio_open(serio, drv);
diff --git a/drivers/input/touchscreen/fujitsu_ts.c b/drivers/input/touchscreen/fujitsu_ts.c
index daf7a4afc935..80b21800355f 100644
--- a/drivers/input/touchscreen/fujitsu_ts.c
+++ b/drivers/input/touchscreen/fujitsu_ts.c
@@ -122,8 +122,8 @@ static int fujitsu_connect(struct serio *serio, struct serio_driver *drv)
122 input_dev->id.vendor = SERIO_FUJITSU; 122 input_dev->id.vendor = SERIO_FUJITSU;
123 input_dev->id.product = 0; 123 input_dev->id.product = 0;
124 input_dev->id.version = 0x0100; 124 input_dev->id.version = 0x0100;
125 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 125 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
126 input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH); 126 input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
127 127
128 input_set_abs_params(input_dev, ABS_X, 0, 4096, 0, 0); 128 input_set_abs_params(input_dev, ABS_X, 0, 4096, 0, 0);
129 input_set_abs_params(input_dev, ABS_Y, 0, 4096, 0, 0); 129 input_set_abs_params(input_dev, ABS_Y, 0, 4096, 0, 0);
diff --git a/drivers/input/touchscreen/gunze.c b/drivers/input/touchscreen/gunze.c
index 39d602600d7c..a48a15868c4a 100644
--- a/drivers/input/touchscreen/gunze.c
+++ b/drivers/input/touchscreen/gunze.c
@@ -137,8 +137,8 @@ static int gunze_connect(struct serio *serio, struct serio_driver *drv)
137 input_dev->id.product = 0x0051; 137 input_dev->id.product = 0x0051;
138 input_dev->id.version = 0x0100; 138 input_dev->id.version = 0x0100;
139 input_dev->dev.parent = &serio->dev; 139 input_dev->dev.parent = &serio->dev;
140 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 140 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
141 input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH); 141 input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
142 input_set_abs_params(input_dev, ABS_X, 24, 1000, 0, 0); 142 input_set_abs_params(input_dev, ABS_X, 24, 1000, 0, 0);
143 input_set_abs_params(input_dev, ABS_Y, 24, 1000, 0, 0); 143 input_set_abs_params(input_dev, ABS_Y, 24, 1000, 0, 0);
144 144
diff --git a/drivers/input/touchscreen/h3600_ts_input.c b/drivers/input/touchscreen/h3600_ts_input.c
index 09ed7803cb8f..2ae6c6016a86 100644
--- a/drivers/input/touchscreen/h3600_ts_input.c
+++ b/drivers/input/touchscreen/h3600_ts_input.c
@@ -373,8 +373,9 @@ static int h3600ts_connect(struct serio *serio, struct serio_driver *drv)
373 373
374 input_dev->event = h3600ts_event; 374 input_dev->event = h3600ts_event;
375 375
376 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS) | BIT(EV_LED) | BIT(EV_PWR); 376 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) |
377 input_dev->ledbit[0] = BIT(LED_SLEEP); 377 BIT_MASK(EV_LED) | BIT_MASK(EV_PWR);
378 input_dev->ledbit[0] = BIT_MASK(LED_SLEEP);
378 input_set_abs_params(input_dev, ABS_X, 60, 985, 0, 0); 379 input_set_abs_params(input_dev, ABS_X, 60, 985, 0, 0);
379 input_set_abs_params(input_dev, ABS_Y, 35, 1024, 0, 0); 380 input_set_abs_params(input_dev, ABS_Y, 35, 1024, 0, 0);
380 381
diff --git a/drivers/input/touchscreen/hp680_ts_input.c b/drivers/input/touchscreen/hp680_ts_input.c
index 1a15475aedfc..c38d4e0f95c6 100644
--- a/drivers/input/touchscreen/hp680_ts_input.c
+++ b/drivers/input/touchscreen/hp680_ts_input.c
@@ -81,8 +81,8 @@ static int __init hp680_ts_init(void)
81 if (!hp680_ts_dev) 81 if (!hp680_ts_dev)
82 return -ENOMEM; 82 return -ENOMEM;
83 83
84 hp680_ts_dev->evbit[0] = BIT(EV_ABS) | BIT(EV_KEY); 84 hp680_ts_dev->evbit[0] = BIT_MASK(EV_ABS) | BIT_MASK(EV_KEY);
85 hp680_ts_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH); 85 hp680_ts_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
86 86
87 input_set_abs_params(hp680_ts_dev, ABS_X, 87 input_set_abs_params(hp680_ts_dev, ABS_X,
88 HP680_TS_ABS_X_MIN, HP680_TS_ABS_X_MAX, 0, 0); 88 HP680_TS_ABS_X_MIN, HP680_TS_ABS_X_MAX, 0, 0);
diff --git a/drivers/input/touchscreen/mk712.c b/drivers/input/touchscreen/mk712.c
index 44140feeffc5..80a658868706 100644
--- a/drivers/input/touchscreen/mk712.c
+++ b/drivers/input/touchscreen/mk712.c
@@ -186,8 +186,8 @@ static int __init mk712_init(void)
186 mk712_dev->open = mk712_open; 186 mk712_dev->open = mk712_open;
187 mk712_dev->close = mk712_close; 187 mk712_dev->close = mk712_close;
188 188
189 mk712_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 189 mk712_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
190 mk712_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH); 190 mk712_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
191 input_set_abs_params(mk712_dev, ABS_X, 0, 0xfff, 88, 0); 191 input_set_abs_params(mk712_dev, ABS_X, 0, 0xfff, 88, 0);
192 input_set_abs_params(mk712_dev, ABS_Y, 0, 0xfff, 88, 0); 192 input_set_abs_params(mk712_dev, ABS_Y, 0, 0xfff, 88, 0);
193 193
diff --git a/drivers/input/touchscreen/mtouch.c b/drivers/input/touchscreen/mtouch.c
index 4ec3b1f940c8..9077228418b7 100644
--- a/drivers/input/touchscreen/mtouch.c
+++ b/drivers/input/touchscreen/mtouch.c
@@ -151,8 +151,8 @@ static int mtouch_connect(struct serio *serio, struct serio_driver *drv)
151 input_dev->id.product = 0; 151 input_dev->id.product = 0;
152 input_dev->id.version = 0x0100; 152 input_dev->id.version = 0x0100;
153 input_dev->dev.parent = &serio->dev; 153 input_dev->dev.parent = &serio->dev;
154 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 154 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
155 input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH); 155 input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
156 input_set_abs_params(mtouch->dev, ABS_X, MTOUCH_MIN_XC, MTOUCH_MAX_XC, 0, 0); 156 input_set_abs_params(mtouch->dev, ABS_X, MTOUCH_MIN_XC, MTOUCH_MAX_XC, 0, 0);
157 input_set_abs_params(mtouch->dev, ABS_Y, MTOUCH_MIN_YC, MTOUCH_MAX_YC, 0, 0); 157 input_set_abs_params(mtouch->dev, ABS_Y, MTOUCH_MIN_YC, MTOUCH_MAX_YC, 0, 0);
158 158
diff --git a/drivers/input/touchscreen/penmount.c b/drivers/input/touchscreen/penmount.c
index f2c0d3c7149c..c7f9cebebbb6 100644
--- a/drivers/input/touchscreen/penmount.c
+++ b/drivers/input/touchscreen/penmount.c
@@ -113,8 +113,8 @@ static int pm_connect(struct serio *serio, struct serio_driver *drv)
113 input_dev->id.version = 0x0100; 113 input_dev->id.version = 0x0100;
114 input_dev->dev.parent = &serio->dev; 114 input_dev->dev.parent = &serio->dev;
115 115
116 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 116 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
117 input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH); 117 input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
118 input_set_abs_params(pm->dev, ABS_X, 0, 0x3ff, 0, 0); 118 input_set_abs_params(pm->dev, ABS_X, 0, 0x3ff, 0, 0);
119 input_set_abs_params(pm->dev, ABS_Y, 0, 0x3ff, 0, 0); 119 input_set_abs_params(pm->dev, ABS_Y, 0, 0x3ff, 0, 0);
120 120
diff --git a/drivers/input/touchscreen/touchright.c b/drivers/input/touchscreen/touchright.c
index 3def7bb1df44..3a5c142c2a78 100644
--- a/drivers/input/touchscreen/touchright.c
+++ b/drivers/input/touchscreen/touchright.c
@@ -125,8 +125,8 @@ static int tr_connect(struct serio *serio, struct serio_driver *drv)
125 input_dev->id.product = 0; 125 input_dev->id.product = 0;
126 input_dev->id.version = 0x0100; 126 input_dev->id.version = 0x0100;
127 input_dev->dev.parent = &serio->dev; 127 input_dev->dev.parent = &serio->dev;
128 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 128 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
129 input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH); 129 input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
130 input_set_abs_params(tr->dev, ABS_X, TR_MIN_XC, TR_MAX_XC, 0, 0); 130 input_set_abs_params(tr->dev, ABS_X, TR_MIN_XC, TR_MAX_XC, 0, 0);
131 input_set_abs_params(tr->dev, ABS_Y, TR_MIN_YC, TR_MAX_YC, 0, 0); 131 input_set_abs_params(tr->dev, ABS_Y, TR_MIN_YC, TR_MAX_YC, 0, 0);
132 132
diff --git a/drivers/input/touchscreen/touchwin.c b/drivers/input/touchscreen/touchwin.c
index ac4bdcf18666..763a656a59f8 100644
--- a/drivers/input/touchscreen/touchwin.c
+++ b/drivers/input/touchscreen/touchwin.c
@@ -132,8 +132,8 @@ static int tw_connect(struct serio *serio, struct serio_driver *drv)
132 input_dev->id.product = 0; 132 input_dev->id.product = 0;
133 input_dev->id.version = 0x0100; 133 input_dev->id.version = 0x0100;
134 input_dev->dev.parent = &serio->dev; 134 input_dev->dev.parent = &serio->dev;
135 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 135 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
136 input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH); 136 input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
137 input_set_abs_params(tw->dev, ABS_X, TW_MIN_XC, TW_MAX_XC, 0, 0); 137 input_set_abs_params(tw->dev, ABS_X, TW_MIN_XC, TW_MAX_XC, 0, 0);
138 input_set_abs_params(tw->dev, ABS_Y, TW_MIN_YC, TW_MAX_YC, 0, 0); 138 input_set_abs_params(tw->dev, ABS_Y, TW_MIN_YC, TW_MAX_YC, 0, 0);
139 139
diff --git a/drivers/input/touchscreen/ucb1400_ts.c b/drivers/input/touchscreen/ucb1400_ts.c
index 89373b01d8f5..7549939b9535 100644
--- a/drivers/input/touchscreen/ucb1400_ts.c
+++ b/drivers/input/touchscreen/ucb1400_ts.c
@@ -517,7 +517,7 @@ static int ucb1400_ts_probe(struct device *dev)
517 idev->id.product = id; 517 idev->id.product = id;
518 idev->open = ucb1400_ts_open; 518 idev->open = ucb1400_ts_open;
519 idev->close = ucb1400_ts_close; 519 idev->close = ucb1400_ts_close;
520 idev->evbit[0] = BIT(EV_ABS); 520 idev->evbit[0] = BIT_MASK(EV_ABS);
521 521
522 ucb1400_adc_enable(ucb); 522 ucb1400_adc_enable(ucb);
523 x_res = ucb1400_ts_read_xres(ucb); 523 x_res = ucb1400_ts_read_xres(ucb);
diff --git a/drivers/input/touchscreen/usbtouchscreen.c b/drivers/input/touchscreen/usbtouchscreen.c
index 9fb3d5c30999..5f34b78d5ddb 100644
--- a/drivers/input/touchscreen/usbtouchscreen.c
+++ b/drivers/input/touchscreen/usbtouchscreen.c
@@ -868,8 +868,8 @@ static int usbtouch_probe(struct usb_interface *intf,
868 input_dev->open = usbtouch_open; 868 input_dev->open = usbtouch_open;
869 input_dev->close = usbtouch_close; 869 input_dev->close = usbtouch_close;
870 870
871 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 871 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
872 input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH); 872 input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
873 input_set_abs_params(input_dev, ABS_X, type->min_xc, type->max_xc, 0, 0); 873 input_set_abs_params(input_dev, ABS_X, type->min_xc, type->max_xc, 0, 0);
874 input_set_abs_params(input_dev, ABS_Y, type->min_yc, type->max_yc, 0, 0); 874 input_set_abs_params(input_dev, ABS_Y, type->min_yc, type->max_yc, 0, 0);
875 if (type->max_press) 875 if (type->max_press)
diff --git a/drivers/isdn/hardware/avm/b1dma.c b/drivers/isdn/hardware/avm/b1dma.c
index 428872b653e9..669f6f67449c 100644
--- a/drivers/isdn/hardware/avm/b1dma.c
+++ b/drivers/isdn/hardware/avm/b1dma.c
@@ -486,11 +486,13 @@ static void b1dma_handle_rx(avmcard *card)
486 card->name); 486 card->name);
487 } else { 487 } else {
488 memcpy(skb_put(skb, MsgLen), card->msgbuf, MsgLen); 488 memcpy(skb_put(skb, MsgLen), card->msgbuf, MsgLen);
489 if (CAPIMSG_CMD(skb->data) == CAPI_DATA_B3_CONF) 489 if (CAPIMSG_CMD(skb->data) == CAPI_DATA_B3_CONF) {
490 spin_lock(&card->lock);
490 capilib_data_b3_conf(&cinfo->ncci_head, ApplId, 491 capilib_data_b3_conf(&cinfo->ncci_head, ApplId,
491 CAPIMSG_NCCI(skb->data), 492 CAPIMSG_NCCI(skb->data),
492 CAPIMSG_MSGID(skb->data)); 493 CAPIMSG_MSGID(skb->data));
493 494 spin_unlock(&card->lock);
495 }
494 capi_ctr_handle_message(ctrl, ApplId, skb); 496 capi_ctr_handle_message(ctrl, ApplId, skb);
495 } 497 }
496 break; 498 break;
@@ -500,9 +502,9 @@ static void b1dma_handle_rx(avmcard *card)
500 ApplId = _get_word(&p); 502 ApplId = _get_word(&p);
501 NCCI = _get_word(&p); 503 NCCI = _get_word(&p);
502 WindowSize = _get_word(&p); 504 WindowSize = _get_word(&p);
503 505 spin_lock(&card->lock);
504 capilib_new_ncci(&cinfo->ncci_head, ApplId, NCCI, WindowSize); 506 capilib_new_ncci(&cinfo->ncci_head, ApplId, NCCI, WindowSize);
505 507 spin_unlock(&card->lock);
506 break; 508 break;
507 509
508 case RECEIVE_FREE_NCCI: 510 case RECEIVE_FREE_NCCI:
@@ -510,9 +512,11 @@ static void b1dma_handle_rx(avmcard *card)
510 ApplId = _get_word(&p); 512 ApplId = _get_word(&p);
511 NCCI = _get_word(&p); 513 NCCI = _get_word(&p);
512 514
513 if (NCCI != 0xffffffff) 515 if (NCCI != 0xffffffff) {
516 spin_lock(&card->lock);
514 capilib_free_ncci(&cinfo->ncci_head, ApplId, NCCI); 517 capilib_free_ncci(&cinfo->ncci_head, ApplId, NCCI);
515 518 spin_unlock(&card->lock);
519 }
516 break; 520 break;
517 521
518 case RECEIVE_START: 522 case RECEIVE_START:
@@ -751,10 +755,10 @@ void b1dma_reset_ctr(struct capi_ctr *ctrl)
751 755
752 spin_lock_irqsave(&card->lock, flags); 756 spin_lock_irqsave(&card->lock, flags);
753 b1dma_reset(card); 757 b1dma_reset(card);
754 spin_unlock_irqrestore(&card->lock, flags);
755 758
756 memset(cinfo->version, 0, sizeof(cinfo->version)); 759 memset(cinfo->version, 0, sizeof(cinfo->version));
757 capilib_release(&cinfo->ncci_head); 760 capilib_release(&cinfo->ncci_head);
761 spin_unlock_irqrestore(&card->lock, flags);
758 capi_ctr_reseted(ctrl); 762 capi_ctr_reseted(ctrl);
759} 763}
760 764
@@ -803,8 +807,11 @@ void b1dma_release_appl(struct capi_ctr *ctrl, u16 appl)
803 avmcard *card = cinfo->card; 807 avmcard *card = cinfo->card;
804 struct sk_buff *skb; 808 struct sk_buff *skb;
805 void *p; 809 void *p;
810 unsigned long flags;
806 811
812 spin_lock_irqsave(&card->lock, flags);
807 capilib_release_appl(&cinfo->ncci_head, appl); 813 capilib_release_appl(&cinfo->ncci_head, appl);
814 spin_unlock_irqrestore(&card->lock, flags);
808 815
809 skb = alloc_skb(7, GFP_ATOMIC); 816 skb = alloc_skb(7, GFP_ATOMIC);
810 if (!skb) { 817 if (!skb) {
@@ -832,10 +839,13 @@ u16 b1dma_send_message(struct capi_ctr *ctrl, struct sk_buff *skb)
832 u16 retval = CAPI_NOERROR; 839 u16 retval = CAPI_NOERROR;
833 840
834 if (CAPIMSG_CMD(skb->data) == CAPI_DATA_B3_REQ) { 841 if (CAPIMSG_CMD(skb->data) == CAPI_DATA_B3_REQ) {
842 unsigned long flags;
843 spin_lock_irqsave(&card->lock, flags);
835 retval = capilib_data_b3_req(&cinfo->ncci_head, 844 retval = capilib_data_b3_req(&cinfo->ncci_head,
836 CAPIMSG_APPID(skb->data), 845 CAPIMSG_APPID(skb->data),
837 CAPIMSG_NCCI(skb->data), 846 CAPIMSG_NCCI(skb->data),
838 CAPIMSG_MSGID(skb->data)); 847 CAPIMSG_MSGID(skb->data));
848 spin_unlock_irqrestore(&card->lock, flags);
839 } 849 }
840 if (retval == CAPI_NOERROR) 850 if (retval == CAPI_NOERROR)
841 b1dma_queue_tx(card, skb); 851 b1dma_queue_tx(card, skb);
diff --git a/drivers/isdn/hardware/avm/c4.c b/drivers/isdn/hardware/avm/c4.c
index 8710cf6214d9..4bbbbe688077 100644
--- a/drivers/isdn/hardware/avm/c4.c
+++ b/drivers/isdn/hardware/avm/c4.c
@@ -678,7 +678,9 @@ static irqreturn_t c4_handle_interrupt(avmcard *card)
678 for (i=0; i < card->nr_controllers; i++) { 678 for (i=0; i < card->nr_controllers; i++) {
679 avmctrl_info *cinfo = &card->ctrlinfo[i]; 679 avmctrl_info *cinfo = &card->ctrlinfo[i];
680 memset(cinfo->version, 0, sizeof(cinfo->version)); 680 memset(cinfo->version, 0, sizeof(cinfo->version));
681 spin_lock_irqsave(&card->lock, flags);
681 capilib_release(&cinfo->ncci_head); 682 capilib_release(&cinfo->ncci_head);
683 spin_unlock_irqrestore(&card->lock, flags);
682 capi_ctr_reseted(&cinfo->capi_ctrl); 684 capi_ctr_reseted(&cinfo->capi_ctrl);
683 } 685 }
684 card->nlogcontr = 0; 686 card->nlogcontr = 0;
diff --git a/drivers/isdn/hardware/avm/t1isa.c b/drivers/isdn/hardware/avm/t1isa.c
index c925020fe9b7..6130724e46e7 100644
--- a/drivers/isdn/hardware/avm/t1isa.c
+++ b/drivers/isdn/hardware/avm/t1isa.c
@@ -180,8 +180,8 @@ static irqreturn_t t1isa_interrupt(int interrupt, void *devptr)
180 180
181 ApplId = (unsigned) b1_get_word(card->port); 181 ApplId = (unsigned) b1_get_word(card->port);
182 MsgLen = t1_get_slice(card->port, card->msgbuf); 182 MsgLen = t1_get_slice(card->port, card->msgbuf);
183 spin_unlock_irqrestore(&card->lock, flags);
184 if (!(skb = alloc_skb(MsgLen, GFP_ATOMIC))) { 183 if (!(skb = alloc_skb(MsgLen, GFP_ATOMIC))) {
184 spin_unlock_irqrestore(&card->lock, flags);
185 printk(KERN_ERR "%s: incoming packet dropped\n", 185 printk(KERN_ERR "%s: incoming packet dropped\n",
186 card->name); 186 card->name);
187 } else { 187 } else {
@@ -190,7 +190,7 @@ static irqreturn_t t1isa_interrupt(int interrupt, void *devptr)
190 capilib_data_b3_conf(&cinfo->ncci_head, ApplId, 190 capilib_data_b3_conf(&cinfo->ncci_head, ApplId,
191 CAPIMSG_NCCI(skb->data), 191 CAPIMSG_NCCI(skb->data),
192 CAPIMSG_MSGID(skb->data)); 192 CAPIMSG_MSGID(skb->data));
193 193 spin_unlock_irqrestore(&card->lock, flags);
194 capi_ctr_handle_message(ctrl, ApplId, skb); 194 capi_ctr_handle_message(ctrl, ApplId, skb);
195 } 195 }
196 break; 196 break;
@@ -200,21 +200,17 @@ static irqreturn_t t1isa_interrupt(int interrupt, void *devptr)
200 ApplId = b1_get_word(card->port); 200 ApplId = b1_get_word(card->port);
201 NCCI = b1_get_word(card->port); 201 NCCI = b1_get_word(card->port);
202 WindowSize = b1_get_word(card->port); 202 WindowSize = b1_get_word(card->port);
203 spin_unlock_irqrestore(&card->lock, flags);
204
205 capilib_new_ncci(&cinfo->ncci_head, ApplId, NCCI, WindowSize); 203 capilib_new_ncci(&cinfo->ncci_head, ApplId, NCCI, WindowSize);
206 204 spin_unlock_irqrestore(&card->lock, flags);
207 break; 205 break;
208 206
209 case RECEIVE_FREE_NCCI: 207 case RECEIVE_FREE_NCCI:
210 208
211 ApplId = b1_get_word(card->port); 209 ApplId = b1_get_word(card->port);
212 NCCI = b1_get_word(card->port); 210 NCCI = b1_get_word(card->port);
213 spin_unlock_irqrestore(&card->lock, flags);
214
215 if (NCCI != 0xffffffff) 211 if (NCCI != 0xffffffff)
216 capilib_free_ncci(&cinfo->ncci_head, ApplId, NCCI); 212 capilib_free_ncci(&cinfo->ncci_head, ApplId, NCCI);
217 213 spin_unlock_irqrestore(&card->lock, flags);
218 break; 214 break;
219 215
220 case RECEIVE_START: 216 case RECEIVE_START:
@@ -333,13 +329,16 @@ static void t1isa_reset_ctr(struct capi_ctr *ctrl)
333 avmctrl_info *cinfo = (avmctrl_info *)(ctrl->driverdata); 329 avmctrl_info *cinfo = (avmctrl_info *)(ctrl->driverdata);
334 avmcard *card = cinfo->card; 330 avmcard *card = cinfo->card;
335 unsigned int port = card->port; 331 unsigned int port = card->port;
332 unsigned long flags;
336 333
337 t1_disable_irq(port); 334 t1_disable_irq(port);
338 b1_reset(port); 335 b1_reset(port);
339 b1_reset(port); 336 b1_reset(port);
340 337
341 memset(cinfo->version, 0, sizeof(cinfo->version)); 338 memset(cinfo->version, 0, sizeof(cinfo->version));
339 spin_lock_irqsave(&card->lock, flags);
342 capilib_release(&cinfo->ncci_head); 340 capilib_release(&cinfo->ncci_head);
341 spin_unlock_irqrestore(&card->lock, flags);
343 capi_ctr_reseted(ctrl); 342 capi_ctr_reseted(ctrl);
344} 343}
345 344
@@ -466,29 +465,26 @@ static u16 t1isa_send_message(struct capi_ctr *ctrl, struct sk_buff *skb)
466 u8 subcmd = CAPIMSG_SUBCOMMAND(skb->data); 465 u8 subcmd = CAPIMSG_SUBCOMMAND(skb->data);
467 u16 dlen, retval; 466 u16 dlen, retval;
468 467
468 spin_lock_irqsave(&card->lock, flags);
469 if (CAPICMD(cmd, subcmd) == CAPI_DATA_B3_REQ) { 469 if (CAPICMD(cmd, subcmd) == CAPI_DATA_B3_REQ) {
470 retval = capilib_data_b3_req(&cinfo->ncci_head, 470 retval = capilib_data_b3_req(&cinfo->ncci_head,
471 CAPIMSG_APPID(skb->data), 471 CAPIMSG_APPID(skb->data),
472 CAPIMSG_NCCI(skb->data), 472 CAPIMSG_NCCI(skb->data),
473 CAPIMSG_MSGID(skb->data)); 473 CAPIMSG_MSGID(skb->data));
474 if (retval != CAPI_NOERROR) 474 if (retval != CAPI_NOERROR) {
475 spin_unlock_irqrestore(&card->lock, flags);
475 return retval; 476 return retval;
476 477 }
477 dlen = CAPIMSG_DATALEN(skb->data); 478 dlen = CAPIMSG_DATALEN(skb->data);
478 479
479 spin_lock_irqsave(&card->lock, flags);
480 b1_put_byte(port, SEND_DATA_B3_REQ); 480 b1_put_byte(port, SEND_DATA_B3_REQ);
481 t1_put_slice(port, skb->data, len); 481 t1_put_slice(port, skb->data, len);
482 t1_put_slice(port, skb->data + len, dlen); 482 t1_put_slice(port, skb->data + len, dlen);
483 spin_unlock_irqrestore(&card->lock, flags);
484 } else { 483 } else {
485
486 spin_lock_irqsave(&card->lock, flags);
487 b1_put_byte(port, SEND_MESSAGE); 484 b1_put_byte(port, SEND_MESSAGE);
488 t1_put_slice(port, skb->data, len); 485 t1_put_slice(port, skb->data, len);
489 spin_unlock_irqrestore(&card->lock, flags);
490 } 486 }
491 487 spin_unlock_irqrestore(&card->lock, flags);
492 dev_kfree_skb_any(skb); 488 dev_kfree_skb_any(skb);
493 return CAPI_NOERROR; 489 return CAPI_NOERROR;
494} 490}
diff --git a/drivers/isdn/sc/debug.h b/drivers/isdn/sc/debug.h
deleted file mode 100644
index e9db96ede4b2..000000000000
--- a/drivers/isdn/sc/debug.h
+++ /dev/null
@@ -1,19 +0,0 @@
1/* $Id: debug.h,v 1.2.8.1 2001/09/23 22:24:59 kai Exp $
2 *
3 * Copyright (C) 1996 SpellCaster Telecommunications Inc.
4 *
5 * This software may be used and distributed according to the terms
6 * of the GNU General Public License, incorporated herein by reference.
7 *
8 * For more information, please contact gpl-info@spellcast.com or write:
9 *
10 * SpellCaster Telecommunications Inc.
11 * 5621 Finch Avenue East, Unit #3
12 * Scarborough, Ontario Canada
13 * M1B 2T9
14 * +1 (416) 297-8565
15 * +1 (416) 297-6433 Facsimile
16 */
17
18#define REQUEST_IRQ(a,b,c,d,e) request_irq(a,b,c,d,e)
19#define FREE_IRQ(a,b) free_irq(a,b)
diff --git a/drivers/isdn/sc/includes.h b/drivers/isdn/sc/includes.h
index 5286e0c810a9..4766e5b77378 100644
--- a/drivers/isdn/sc/includes.h
+++ b/drivers/isdn/sc/includes.h
@@ -14,4 +14,3 @@
14#include <linux/timer.h> 14#include <linux/timer.h>
15#include <linux/wait.h> 15#include <linux/wait.h>
16#include <linux/isdnif.h> 16#include <linux/isdnif.h>
17#include "debug.h"
diff --git a/drivers/isdn/sc/init.c b/drivers/isdn/sc/init.c
index 0bf76344a0d5..d09c854cfac7 100644
--- a/drivers/isdn/sc/init.c
+++ b/drivers/isdn/sc/init.c
@@ -404,7 +404,7 @@ static void __exit sc_exit(void)
404 /* 404 /*
405 * Release the IRQ 405 * Release the IRQ
406 */ 406 */
407 FREE_IRQ(sc_adapter[i]->interrupt, NULL); 407 free_irq(sc_adapter[i]->interrupt, NULL);
408 408
409 /* 409 /*
410 * Reset for a clean start 410 * Reset for a clean start
diff --git a/drivers/macintosh/adbhid.c b/drivers/macintosh/adbhid.c
index 2766e4fc4ea8..883da72b5368 100644
--- a/drivers/macintosh/adbhid.c
+++ b/drivers/macintosh/adbhid.c
@@ -791,8 +791,10 @@ adbhid_input_register(int id, int default_id, int original_handler_id,
791 if (hid->keycode[i]) 791 if (hid->keycode[i])
792 set_bit(hid->keycode[i], input_dev->keybit); 792 set_bit(hid->keycode[i], input_dev->keybit);
793 793
794 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_LED) | BIT(EV_REP); 794 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_LED) |
795 input_dev->ledbit[0] = BIT(LED_SCROLLL) | BIT(LED_CAPSL) | BIT(LED_NUML); 795 BIT_MASK(EV_REP);
796 input_dev->ledbit[0] = BIT_MASK(LED_SCROLLL) |
797 BIT_MASK(LED_CAPSL) | BIT_MASK(LED_NUML);
796 input_dev->event = adbhid_kbd_event; 798 input_dev->event = adbhid_kbd_event;
797 input_dev->keycodemax = KEY_FN; 799 input_dev->keycodemax = KEY_FN;
798 input_dev->keycodesize = sizeof(hid->keycode[0]); 800 input_dev->keycodesize = sizeof(hid->keycode[0]);
@@ -801,16 +803,18 @@ adbhid_input_register(int id, int default_id, int original_handler_id,
801 case ADB_MOUSE: 803 case ADB_MOUSE:
802 sprintf(hid->name, "ADB mouse"); 804 sprintf(hid->name, "ADB mouse");
803 805
804 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL); 806 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
805 input_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT); 807 input_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
806 input_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y); 808 BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
809 input_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
807 break; 810 break;
808 811
809 case ADB_MISC: 812 case ADB_MISC:
810 switch (original_handler_id) { 813 switch (original_handler_id) {
811 case 0x02: /* Adjustable keyboard button device */ 814 case 0x02: /* Adjustable keyboard button device */
812 sprintf(hid->name, "ADB adjustable keyboard buttons"); 815 sprintf(hid->name, "ADB adjustable keyboard buttons");
813 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP); 816 input_dev->evbit[0] = BIT_MASK(EV_KEY) |
817 BIT_MASK(EV_REP);
814 set_bit(KEY_SOUND, input_dev->keybit); 818 set_bit(KEY_SOUND, input_dev->keybit);
815 set_bit(KEY_MUTE, input_dev->keybit); 819 set_bit(KEY_MUTE, input_dev->keybit);
816 set_bit(KEY_VOLUMEUP, input_dev->keybit); 820 set_bit(KEY_VOLUMEUP, input_dev->keybit);
@@ -818,7 +822,8 @@ adbhid_input_register(int id, int default_id, int original_handler_id,
818 break; 822 break;
819 case 0x1f: /* Powerbook button device */ 823 case 0x1f: /* Powerbook button device */
820 sprintf(hid->name, "ADB Powerbook buttons"); 824 sprintf(hid->name, "ADB Powerbook buttons");
821 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP); 825 input_dev->evbit[0] = BIT_MASK(EV_KEY) |
826 BIT_MASK(EV_REP);
822 set_bit(KEY_MUTE, input_dev->keybit); 827 set_bit(KEY_MUTE, input_dev->keybit);
823 set_bit(KEY_VOLUMEUP, input_dev->keybit); 828 set_bit(KEY_VOLUMEUP, input_dev->keybit);
824 set_bit(KEY_VOLUMEDOWN, input_dev->keybit); 829 set_bit(KEY_VOLUMEDOWN, input_dev->keybit);
diff --git a/drivers/macintosh/mac_hid.c b/drivers/macintosh/mac_hid.c
index 33dee3a773ed..89302309da92 100644
--- a/drivers/macintosh/mac_hid.c
+++ b/drivers/macintosh/mac_hid.c
@@ -117,9 +117,10 @@ static int emumousebtn_input_register(void)
117 emumousebtn->id.product = 0x0001; 117 emumousebtn->id.product = 0x0001;
118 emumousebtn->id.version = 0x0100; 118 emumousebtn->id.version = 0x0100;
119 119
120 emumousebtn->evbit[0] = BIT(EV_KEY) | BIT(EV_REL); 120 emumousebtn->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
121 emumousebtn->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT); 121 emumousebtn->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
122 emumousebtn->relbit[0] = BIT(REL_X) | BIT(REL_Y); 122 BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
123 emumousebtn->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
123 124
124 ret = input_register_device(emumousebtn); 125 ret = input_register_device(emumousebtn);
125 if (ret) 126 if (ret)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index c059ae6f37e5..808cd9549456 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4717,7 +4717,7 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
4717 4717
4718void md_unregister_thread(mdk_thread_t *thread) 4718void md_unregister_thread(mdk_thread_t *thread)
4719{ 4719{
4720 dprintk("interrupting MD-thread pid %d\n", thread->tsk->pid); 4720 dprintk("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
4721 4721
4722 kthread_stop(thread->tsk); 4722 kthread_stop(thread->tsk);
4723 kfree(thread); 4723 kfree(thread);
diff --git a/drivers/media/dvb/cinergyT2/cinergyT2.c b/drivers/media/dvb/cinergyT2/cinergyT2.c
index 5a12b5679556..154a7ce7cb82 100644
--- a/drivers/media/dvb/cinergyT2/cinergyT2.c
+++ b/drivers/media/dvb/cinergyT2/cinergyT2.c
@@ -820,7 +820,7 @@ static int cinergyt2_register_rc(struct cinergyt2 *cinergyt2)
820 820
821 input_dev->name = DRIVER_NAME " remote control"; 821 input_dev->name = DRIVER_NAME " remote control";
822 input_dev->phys = cinergyt2->phys; 822 input_dev->phys = cinergyt2->phys;
823 input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP); 823 input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
824 for (i = 0; i < ARRAY_SIZE(rc_keys); i += 3) 824 for (i = 0; i < ARRAY_SIZE(rc_keys); i += 3)
825 set_bit(rc_keys[i + 2], input_dev->keybit); 825 set_bit(rc_keys[i + 2], input_dev->keybit);
826 input_dev->keycodesize = 0; 826 input_dev->keycodesize = 0;
diff --git a/drivers/media/dvb/dvb-usb/dvb-usb-remote.c b/drivers/media/dvb/dvb-usb/dvb-usb-remote.c
index 7b9f35bfb4f0..c0c2c22ddd83 100644
--- a/drivers/media/dvb/dvb-usb/dvb-usb-remote.c
+++ b/drivers/media/dvb/dvb-usb/dvb-usb-remote.c
@@ -106,7 +106,7 @@ int dvb_usb_remote_init(struct dvb_usb_device *d)
106 if (!input_dev) 106 if (!input_dev)
107 return -ENOMEM; 107 return -ENOMEM;
108 108
109 input_dev->evbit[0] = BIT(EV_KEY); 109 input_dev->evbit[0] = BIT_MASK(EV_KEY);
110 input_dev->name = "IR-receiver inside an USB DVB receiver"; 110 input_dev->name = "IR-receiver inside an USB DVB receiver";
111 input_dev->phys = d->rc_phys; 111 input_dev->phys = d->rc_phys;
112 usb_to_input_id(d->udev, &input_dev->id); 112 usb_to_input_id(d->udev, &input_dev->id);
diff --git a/drivers/media/dvb/ttpci/av7110_ir.c b/drivers/media/dvb/ttpci/av7110_ir.c
index 5d19c402dad1..a283e1de83fa 100644
--- a/drivers/media/dvb/ttpci/av7110_ir.c
+++ b/drivers/media/dvb/ttpci/av7110_ir.c
@@ -27,7 +27,7 @@
27#include <linux/module.h> 27#include <linux/module.h>
28#include <linux/proc_fs.h> 28#include <linux/proc_fs.h>
29#include <linux/kernel.h> 29#include <linux/kernel.h>
30#include <asm/bitops.h> 30#include <linux/bitops.h>
31 31
32#include "av7110.h" 32#include "av7110.h"
33#include "av7110_hw.h" 33#include "av7110_hw.h"
diff --git a/drivers/media/dvb/ttusb-dec/ttusb_dec.c b/drivers/media/dvb/ttusb-dec/ttusb_dec.c
index 5e691fd79904..1ec981d98b91 100644
--- a/drivers/media/dvb/ttusb-dec/ttusb_dec.c
+++ b/drivers/media/dvb/ttusb-dec/ttusb_dec.c
@@ -1198,7 +1198,7 @@ static int ttusb_init_rc( struct ttusb_dec *dec)
1198 1198
1199 input_dev->name = "ttusb_dec remote control"; 1199 input_dev->name = "ttusb_dec remote control";
1200 input_dev->phys = dec->rc_phys; 1200 input_dev->phys = dec->rc_phys;
1201 input_dev->evbit[0] = BIT(EV_KEY); 1201 input_dev->evbit[0] = BIT_MASK(EV_KEY);
1202 input_dev->keycodesize = sizeof(u16); 1202 input_dev->keycodesize = sizeof(u16);
1203 input_dev->keycodemax = 0x1a; 1203 input_dev->keycodemax = 0x1a;
1204 input_dev->keycode = rc_keys; 1204 input_dev->keycode = rc_keys;
diff --git a/drivers/media/video/usbvideo/konicawc.c b/drivers/media/video/usbvideo/konicawc.c
index 491505d6fdee..3e93f8058770 100644
--- a/drivers/media/video/usbvideo/konicawc.c
+++ b/drivers/media/video/usbvideo/konicawc.c
@@ -238,8 +238,8 @@ static void konicawc_register_input(struct konicawc *cam, struct usb_device *dev
238 usb_to_input_id(dev, &input_dev->id); 238 usb_to_input_id(dev, &input_dev->id);
239 input_dev->dev.parent = &dev->dev; 239 input_dev->dev.parent = &dev->dev;
240 240
241 input_dev->evbit[0] = BIT(EV_KEY); 241 input_dev->evbit[0] = BIT_MASK(EV_KEY);
242 input_dev->keybit[LONG(BTN_0)] = BIT(BTN_0); 242 input_dev->keybit[BIT_WORD(BTN_0)] = BIT_MASK(BTN_0);
243 243
244 input_dev->private = cam; 244 input_dev->private = cam;
245 245
diff --git a/drivers/media/video/usbvideo/quickcam_messenger.c b/drivers/media/video/usbvideo/quickcam_messenger.c
index dd1a6d6bbc9e..d847273eeba0 100644
--- a/drivers/media/video/usbvideo/quickcam_messenger.c
+++ b/drivers/media/video/usbvideo/quickcam_messenger.c
@@ -102,8 +102,8 @@ static void qcm_register_input(struct qcm *cam, struct usb_device *dev)
102 usb_to_input_id(dev, &input_dev->id); 102 usb_to_input_id(dev, &input_dev->id);
103 input_dev->dev.parent = &dev->dev; 103 input_dev->dev.parent = &dev->dev;
104 104
105 input_dev->evbit[0] = BIT(EV_KEY); 105 input_dev->evbit[0] = BIT_MASK(EV_KEY);
106 input_dev->keybit[LONG(BTN_0)] = BIT(BTN_0); 106 input_dev->keybit[BIT_WORD(BTN_0)] = BIT_MASK(BTN_0);
107 107
108 input_dev->private = cam; 108 input_dev->private = cam;
109 109
diff --git a/drivers/media/video/zoran_driver.c b/drivers/media/video/zoran_driver.c
index 1c14fa2bd411..419e5af78533 100644
--- a/drivers/media/video/zoran_driver.c
+++ b/drivers/media/video/zoran_driver.c
@@ -1285,7 +1285,7 @@ zoran_open (struct inode *inode,
1285 } 1285 }
1286 1286
1287 dprintk(1, KERN_INFO "%s: zoran_open(%s, pid=[%d]), users(-)=%d\n", 1287 dprintk(1, KERN_INFO "%s: zoran_open(%s, pid=[%d]), users(-)=%d\n",
1288 ZR_DEVNAME(zr), current->comm, current->pid, zr->user); 1288 ZR_DEVNAME(zr), current->comm, task_pid_nr(current), zr->user);
1289 1289
1290 /* now, create the open()-specific file_ops struct */ 1290 /* now, create the open()-specific file_ops struct */
1291 fh = kzalloc(sizeof(struct zoran_fh), GFP_KERNEL); 1291 fh = kzalloc(sizeof(struct zoran_fh), GFP_KERNEL);
@@ -1358,7 +1358,7 @@ zoran_close (struct inode *inode,
1358 struct zoran *zr = fh->zr; 1358 struct zoran *zr = fh->zr;
1359 1359
1360 dprintk(1, KERN_INFO "%s: zoran_close(%s, pid=[%d]), users(+)=%d\n", 1360 dprintk(1, KERN_INFO "%s: zoran_close(%s, pid=[%d]), users(+)=%d\n",
1361 ZR_DEVNAME(zr), current->comm, current->pid, zr->user); 1361 ZR_DEVNAME(zr), current->comm, task_pid_nr(current), zr->user);
1362 1362
1363 /* kernel locks (fs/device.c), so don't do that ourselves 1363 /* kernel locks (fs/device.c), so don't do that ourselves
1364 * (prevents deadlocks) */ 1364 * (prevents deadlocks) */
diff --git a/drivers/misc/ibmasm/remote.c b/drivers/misc/ibmasm/remote.c
index 0550ce075fc4..1d9defb1a10c 100644
--- a/drivers/misc/ibmasm/remote.c
+++ b/drivers/misc/ibmasm/remote.c
@@ -226,9 +226,9 @@ int ibmasm_init_remote_input_dev(struct service_processor *sp)
226 mouse_dev->id.product = pdev->device; 226 mouse_dev->id.product = pdev->device;
227 mouse_dev->id.version = 1; 227 mouse_dev->id.version = 1;
228 mouse_dev->dev.parent = sp->dev; 228 mouse_dev->dev.parent = sp->dev;
229 mouse_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 229 mouse_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
230 mouse_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | 230 mouse_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
231 BIT(BTN_RIGHT) | BIT(BTN_MIDDLE); 231 BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
232 set_bit(BTN_TOUCH, mouse_dev->keybit); 232 set_bit(BTN_TOUCH, mouse_dev->keybit);
233 mouse_dev->name = "ibmasm RSA I remote mouse"; 233 mouse_dev->name = "ibmasm RSA I remote mouse";
234 input_set_abs_params(mouse_dev, ABS_X, 0, MOUSE_X_MAX, 0, 0); 234 input_set_abs_params(mouse_dev, ABS_X, 0, MOUSE_X_MAX, 0, 0);
@@ -239,7 +239,7 @@ int ibmasm_init_remote_input_dev(struct service_processor *sp)
239 keybd_dev->id.product = pdev->device; 239 keybd_dev->id.product = pdev->device;
240 keybd_dev->id.version = 2; 240 keybd_dev->id.version = 2;
241 keybd_dev->dev.parent = sp->dev; 241 keybd_dev->dev.parent = sp->dev;
242 keybd_dev->evbit[0] = BIT(EV_KEY); 242 keybd_dev->evbit[0] = BIT_MASK(EV_KEY);
243 keybd_dev->name = "ibmasm RSA I remote keyboard"; 243 keybd_dev->name = "ibmasm RSA I remote keyboard";
244 244
245 for (i = 0; i < XLATE_SIZE; i++) { 245 for (i = 0; i < XLATE_SIZE; i++) {
diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c
index 5108b7c576df..cd221fd0fb94 100644
--- a/drivers/misc/phantom.c
+++ b/drivers/misc/phantom.c
@@ -9,6 +9,7 @@
9 * You need an userspace library to cooperate with this driver. It (and other 9 * You need an userspace library to cooperate with this driver. It (and other
10 * info) may be obtained here: 10 * info) may be obtained here:
11 * http://www.fi.muni.cz/~xslaby/phantom.html 11 * http://www.fi.muni.cz/~xslaby/phantom.html
12 * or alternatively, you might use OpenHaptics provided by Sensable.
12 */ 13 */
13 14
14#include <linux/kernel.h> 15#include <linux/kernel.h>
@@ -24,13 +25,14 @@
24#include <asm/atomic.h> 25#include <asm/atomic.h>
25#include <asm/io.h> 26#include <asm/io.h>
26 27
27#define PHANTOM_VERSION "n0.9.5" 28#define PHANTOM_VERSION "n0.9.7"
28 29
29#define PHANTOM_MAX_MINORS 8 30#define PHANTOM_MAX_MINORS 8
30 31
31#define PHN_IRQCTL 0x4c /* irq control in caddr space */ 32#define PHN_IRQCTL 0x4c /* irq control in caddr space */
32 33
33#define PHB_RUNNING 1 34#define PHB_RUNNING 1
35#define PHB_NOT_OH 2
34 36
35static struct class *phantom_class; 37static struct class *phantom_class;
36static int phantom_major; 38static int phantom_major;
@@ -47,7 +49,11 @@ struct phantom_device {
47 struct cdev cdev; 49 struct cdev cdev;
48 50
49 struct mutex open_lock; 51 struct mutex open_lock;
50 spinlock_t ioctl_lock; 52 spinlock_t regs_lock;
53
54 /* used in NOT_OH mode */
55 struct phm_regs oregs;
56 u32 ctl_reg;
51}; 57};
52 58
53static unsigned char phantom_devices[PHANTOM_MAX_MINORS]; 59static unsigned char phantom_devices[PHANTOM_MAX_MINORS];
@@ -82,6 +88,7 @@ static long phantom_ioctl(struct file *file, unsigned int cmd,
82 struct phm_regs rs; 88 struct phm_regs rs;
83 struct phm_reg r; 89 struct phm_reg r;
84 void __user *argp = (void __user *)arg; 90 void __user *argp = (void __user *)arg;
91 unsigned long flags;
85 unsigned int i; 92 unsigned int i;
86 93
87 if (_IOC_TYPE(cmd) != PH_IOC_MAGIC || 94 if (_IOC_TYPE(cmd) != PH_IOC_MAGIC ||
@@ -96,32 +103,45 @@ static long phantom_ioctl(struct file *file, unsigned int cmd,
96 if (r.reg > 7) 103 if (r.reg > 7)
97 return -EINVAL; 104 return -EINVAL;
98 105
99 spin_lock(&dev->ioctl_lock); 106 spin_lock_irqsave(&dev->regs_lock, flags);
100 if (r.reg == PHN_CONTROL && (r.value & PHN_CTL_IRQ) && 107 if (r.reg == PHN_CONTROL && (r.value & PHN_CTL_IRQ) &&
101 phantom_status(dev, dev->status | PHB_RUNNING)){ 108 phantom_status(dev, dev->status | PHB_RUNNING)){
102 spin_unlock(&dev->ioctl_lock); 109 spin_unlock_irqrestore(&dev->regs_lock, flags);
103 return -ENODEV; 110 return -ENODEV;
104 } 111 }
105 112
106 pr_debug("phantom: writing %x to %u\n", r.value, r.reg); 113 pr_debug("phantom: writing %x to %u\n", r.value, r.reg);
114
115 /* preserve amp bit (don't allow to change it when in NOT_OH) */
116 if (r.reg == PHN_CONTROL && (dev->status & PHB_NOT_OH)) {
117 r.value &= ~PHN_CTL_AMP;
118 r.value |= dev->ctl_reg & PHN_CTL_AMP;
119 dev->ctl_reg = r.value;
120 }
121
107 iowrite32(r.value, dev->iaddr + r.reg); 122 iowrite32(r.value, dev->iaddr + r.reg);
108 ioread32(dev->iaddr); /* PCI posting */ 123 ioread32(dev->iaddr); /* PCI posting */
109 124
110 if (r.reg == PHN_CONTROL && !(r.value & PHN_CTL_IRQ)) 125 if (r.reg == PHN_CONTROL && !(r.value & PHN_CTL_IRQ))
111 phantom_status(dev, dev->status & ~PHB_RUNNING); 126 phantom_status(dev, dev->status & ~PHB_RUNNING);
112 spin_unlock(&dev->ioctl_lock); 127 spin_unlock_irqrestore(&dev->regs_lock, flags);
113 break; 128 break;
114 case PHN_SET_REGS: 129 case PHN_SET_REGS:
115 if (copy_from_user(&rs, argp, sizeof(rs))) 130 if (copy_from_user(&rs, argp, sizeof(rs)))
116 return -EFAULT; 131 return -EFAULT;
117 132
118 pr_debug("phantom: SRS %u regs %x\n", rs.count, rs.mask); 133 pr_debug("phantom: SRS %u regs %x\n", rs.count, rs.mask);
119 spin_lock(&dev->ioctl_lock); 134 spin_lock_irqsave(&dev->regs_lock, flags);
120 for (i = 0; i < min(rs.count, 8U); i++) 135 if (dev->status & PHB_NOT_OH)
121 if ((1 << i) & rs.mask) 136 memcpy(&dev->oregs, &rs, sizeof(rs));
122 iowrite32(rs.values[i], dev->oaddr + i); 137 else {
123 ioread32(dev->iaddr); /* PCI posting */ 138 u32 m = min(rs.count, 8U);
124 spin_unlock(&dev->ioctl_lock); 139 for (i = 0; i < m; i++)
140 if (rs.mask & BIT(i))
141 iowrite32(rs.values[i], dev->oaddr + i);
142 ioread32(dev->iaddr); /* PCI posting */
143 }
144 spin_unlock_irqrestore(&dev->regs_lock, flags);
125 break; 145 break;
126 case PHN_GET_REG: 146 case PHN_GET_REG:
127 if (copy_from_user(&r, argp, sizeof(r))) 147 if (copy_from_user(&r, argp, sizeof(r)))
@@ -135,20 +155,35 @@ static long phantom_ioctl(struct file *file, unsigned int cmd,
135 if (copy_to_user(argp, &r, sizeof(r))) 155 if (copy_to_user(argp, &r, sizeof(r)))
136 return -EFAULT; 156 return -EFAULT;
137 break; 157 break;
138 case PHN_GET_REGS: 158 case PHN_GET_REGS: {
159 u32 m;
160
139 if (copy_from_user(&rs, argp, sizeof(rs))) 161 if (copy_from_user(&rs, argp, sizeof(rs)))
140 return -EFAULT; 162 return -EFAULT;
141 163
164 m = min(rs.count, 8U);
165
142 pr_debug("phantom: GRS %u regs %x\n", rs.count, rs.mask); 166 pr_debug("phantom: GRS %u regs %x\n", rs.count, rs.mask);
143 spin_lock(&dev->ioctl_lock); 167 spin_lock_irqsave(&dev->regs_lock, flags);
144 for (i = 0; i < min(rs.count, 8U); i++) 168 for (i = 0; i < m; i++)
145 if ((1 << i) & rs.mask) 169 if (rs.mask & BIT(i))
146 rs.values[i] = ioread32(dev->iaddr + i); 170 rs.values[i] = ioread32(dev->iaddr + i);
147 spin_unlock(&dev->ioctl_lock); 171 spin_unlock_irqrestore(&dev->regs_lock, flags);
148 172
149 if (copy_to_user(argp, &rs, sizeof(rs))) 173 if (copy_to_user(argp, &rs, sizeof(rs)))
150 return -EFAULT; 174 return -EFAULT;
151 break; 175 break;
176 } case PHN_NOT_OH:
177 spin_lock_irqsave(&dev->regs_lock, flags);
178 if (dev->status & PHB_RUNNING) {
179 printk(KERN_ERR "phantom: you need to set NOT_OH "
180 "before you start the device!\n");
181 spin_unlock_irqrestore(&dev->regs_lock, flags);
182 return -EINVAL;
183 }
184 dev->status |= PHB_NOT_OH;
185 spin_unlock_irqrestore(&dev->regs_lock, flags);
186 break;
152 default: 187 default:
153 return -ENOTTY; 188 return -ENOTTY;
154 } 189 }
@@ -171,8 +206,11 @@ static int phantom_open(struct inode *inode, struct file *file)
171 return -EINVAL; 206 return -EINVAL;
172 } 207 }
173 208
209 WARN_ON(dev->status & PHB_NOT_OH);
210
174 file->private_data = dev; 211 file->private_data = dev;
175 212
213 atomic_set(&dev->counter, 0);
176 dev->opened++; 214 dev->opened++;
177 mutex_unlock(&dev->open_lock); 215 mutex_unlock(&dev->open_lock);
178 216
@@ -187,6 +225,7 @@ static int phantom_release(struct inode *inode, struct file *file)
187 225
188 dev->opened = 0; 226 dev->opened = 0;
189 phantom_status(dev, dev->status & ~PHB_RUNNING); 227 phantom_status(dev, dev->status & ~PHB_RUNNING);
228 dev->status &= ~PHB_NOT_OH;
190 229
191 mutex_unlock(&dev->open_lock); 230 mutex_unlock(&dev->open_lock);
192 231
@@ -220,12 +259,32 @@ static struct file_operations phantom_file_ops = {
220static irqreturn_t phantom_isr(int irq, void *data) 259static irqreturn_t phantom_isr(int irq, void *data)
221{ 260{
222 struct phantom_device *dev = data; 261 struct phantom_device *dev = data;
262 unsigned int i;
263 u32 ctl;
223 264
224 if (!(ioread32(dev->iaddr + PHN_CONTROL) & PHN_CTL_IRQ)) 265 spin_lock(&dev->regs_lock);
266 ctl = ioread32(dev->iaddr + PHN_CONTROL);
267 if (!(ctl & PHN_CTL_IRQ)) {
268 spin_unlock(&dev->regs_lock);
225 return IRQ_NONE; 269 return IRQ_NONE;
270 }
226 271
227 iowrite32(0, dev->iaddr); 272 iowrite32(0, dev->iaddr);
228 iowrite32(0xc0, dev->iaddr); 273 iowrite32(0xc0, dev->iaddr);
274
275 if (dev->status & PHB_NOT_OH) {
276 struct phm_regs *r = &dev->oregs;
277 u32 m = min(r->count, 8U);
278
279 for (i = 0; i < m; i++)
280 if (r->mask & BIT(i))
281 iowrite32(r->values[i], dev->oaddr + i);
282
283 dev->ctl_reg ^= PHN_CTL_AMP;
284 iowrite32(dev->ctl_reg, dev->iaddr + PHN_CONTROL);
285 }
286 spin_unlock(&dev->regs_lock);
287
229 ioread32(dev->iaddr); /* PCI posting */ 288 ioread32(dev->iaddr); /* PCI posting */
230 289
231 atomic_inc(&dev->counter); 290 atomic_inc(&dev->counter);
@@ -297,7 +356,7 @@ static int __devinit phantom_probe(struct pci_dev *pdev,
297 } 356 }
298 357
299 mutex_init(&pht->open_lock); 358 mutex_init(&pht->open_lock);
300 spin_lock_init(&pht->ioctl_lock); 359 spin_lock_init(&pht->regs_lock);
301 init_waitqueue_head(&pht->wait); 360 init_waitqueue_head(&pht->wait);
302 cdev_init(&pht->cdev, &phantom_file_ops); 361 cdev_init(&pht->cdev, &phantom_file_ops);
303 pht->cdev.owner = THIS_MODULE; 362 pht->cdev.owner = THIS_MODULE;
@@ -378,6 +437,8 @@ static int phantom_suspend(struct pci_dev *pdev, pm_message_t state)
378 iowrite32(0, dev->caddr + PHN_IRQCTL); 437 iowrite32(0, dev->caddr + PHN_IRQCTL);
379 ioread32(dev->caddr + PHN_IRQCTL); /* PCI posting */ 438 ioread32(dev->caddr + PHN_IRQCTL); /* PCI posting */
380 439
440 synchronize_irq(pdev->irq);
441
381 return 0; 442 return 0;
382} 443}
383 444
diff --git a/drivers/misc/sony-laptop.c b/drivers/misc/sony-laptop.c
index e73a71f04bb4..86da96becd28 100644
--- a/drivers/misc/sony-laptop.c
+++ b/drivers/misc/sony-laptop.c
@@ -411,9 +411,9 @@ static int sony_laptop_setup_input(void)
411 jog_dev->id.bustype = BUS_ISA; 411 jog_dev->id.bustype = BUS_ISA;
412 jog_dev->id.vendor = PCI_VENDOR_ID_SONY; 412 jog_dev->id.vendor = PCI_VENDOR_ID_SONY;
413 413
414 jog_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL); 414 jog_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
415 jog_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_MIDDLE); 415 jog_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_MIDDLE);
416 jog_dev->relbit[0] = BIT(REL_WHEEL); 416 jog_dev->relbit[0] = BIT_MASK(REL_WHEEL);
417 417
418 error = input_register_device(jog_dev); 418 error = input_register_device(jog_dev);
419 if (error) 419 if (error)
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index a4f1bf33164a..6330c8cc72b5 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -1309,7 +1309,7 @@ static int ubi_thread(void *u)
1309 struct ubi_device *ubi = u; 1309 struct ubi_device *ubi = u;
1310 1310
1311 ubi_msg("background thread \"%s\" started, PID %d", 1311 ubi_msg("background thread \"%s\" started, PID %d",
1312 ubi->bgt_name, current->pid); 1312 ubi->bgt_name, task_pid_nr(current));
1313 1313
1314 set_freezable(); 1314 set_freezable();
1315 for (;;) { 1315 for (;;) {
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 96cee4badd28..da767d3d5af5 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -26,7 +26,7 @@
26#include <linux/etherdevice.h> 26#include <linux/etherdevice.h>
27#include <linux/skbuff.h> 27#include <linux/skbuff.h>
28#include <linux/dma-mapping.h> 28#include <linux/dma-mapping.h>
29#include <asm/bitops.h> 29#include <linux/bitops.h>
30#include <asm/io.h> 30#include <asm/io.h>
31#include <asm/irq.h> 31#include <asm/irq.h>
32#include <linux/delay.h> 32#include <linux/delay.h>
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 7a045a37056e..084f0292ea6e 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -126,7 +126,7 @@ static struct aggregator *__get_active_agg(struct aggregator *aggregator);
126 126
127// ================= main 802.3ad protocol functions ================== 127// ================= main 802.3ad protocol functions ==================
128static int ad_lacpdu_send(struct port *port); 128static int ad_lacpdu_send(struct port *port);
129static int ad_marker_send(struct port *port, struct marker *marker); 129static int ad_marker_send(struct port *port, struct bond_marker *marker);
130static void ad_mux_machine(struct port *port); 130static void ad_mux_machine(struct port *port);
131static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); 131static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port);
132static void ad_tx_machine(struct port *port); 132static void ad_tx_machine(struct port *port);
@@ -139,8 +139,8 @@ static void ad_initialize_port(struct port *port, int lacp_fast);
139static void ad_initialize_lacpdu(struct lacpdu *Lacpdu); 139static void ad_initialize_lacpdu(struct lacpdu *Lacpdu);
140static void ad_enable_collecting_distributing(struct port *port); 140static void ad_enable_collecting_distributing(struct port *port);
141static void ad_disable_collecting_distributing(struct port *port); 141static void ad_disable_collecting_distributing(struct port *port);
142static void ad_marker_info_received(struct marker *marker_info, struct port *port); 142static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port);
143static void ad_marker_response_received(struct marker *marker, struct port *port); 143static void ad_marker_response_received(struct bond_marker *marker, struct port *port);
144 144
145 145
146///////////////////////////////////////////////////////////////////////////////// 146/////////////////////////////////////////////////////////////////////////////////
@@ -889,12 +889,12 @@ static int ad_lacpdu_send(struct port *port)
889 * Returns: 0 on success 889 * Returns: 0 on success
890 * < 0 on error 890 * < 0 on error
891 */ 891 */
892static int ad_marker_send(struct port *port, struct marker *marker) 892static int ad_marker_send(struct port *port, struct bond_marker *marker)
893{ 893{
894 struct slave *slave = port->slave; 894 struct slave *slave = port->slave;
895 struct sk_buff *skb; 895 struct sk_buff *skb;
896 struct marker_header *marker_header; 896 struct bond_marker_header *marker_header;
897 int length = sizeof(struct marker_header); 897 int length = sizeof(struct bond_marker_header);
898 struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR; 898 struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR;
899 899
900 skb = dev_alloc_skb(length + 16); 900 skb = dev_alloc_skb(length + 16);
@@ -909,7 +909,7 @@ static int ad_marker_send(struct port *port, struct marker *marker)
909 skb->network_header = skb->mac_header + ETH_HLEN; 909 skb->network_header = skb->mac_header + ETH_HLEN;
910 skb->protocol = PKT_TYPE_LACPDU; 910 skb->protocol = PKT_TYPE_LACPDU;
911 911
912 marker_header = (struct marker_header *)skb_put(skb, length); 912 marker_header = (struct bond_marker_header *)skb_put(skb, length);
913 913
914 marker_header->ad_header.destination_address = lacpdu_multicast_address; 914 marker_header->ad_header.destination_address = lacpdu_multicast_address;
915 /* Note: source addres is set to be the member's PERMANENT address, because we use it 915 /* Note: source addres is set to be the member's PERMANENT address, because we use it
@@ -1709,7 +1709,7 @@ static void ad_disable_collecting_distributing(struct port *port)
1709 */ 1709 */
1710static void ad_marker_info_send(struct port *port) 1710static void ad_marker_info_send(struct port *port)
1711{ 1711{
1712 struct marker marker; 1712 struct bond_marker marker;
1713 u16 index; 1713 u16 index;
1714 1714
1715 // fill the marker PDU with the appropriate values 1715 // fill the marker PDU with the appropriate values
@@ -1742,13 +1742,14 @@ static void ad_marker_info_send(struct port *port)
1742 * @port: the port we're looking at 1742 * @port: the port we're looking at
1743 * 1743 *
1744 */ 1744 */
1745static void ad_marker_info_received(struct marker *marker_info,struct port *port) 1745static void ad_marker_info_received(struct bond_marker *marker_info,
1746 struct port *port)
1746{ 1747{
1747 struct marker marker; 1748 struct bond_marker marker;
1748 1749
1749 // copy the received marker data to the response marker 1750 // copy the received marker data to the response marker
1750 //marker = *marker_info; 1751 //marker = *marker_info;
1751 memcpy(&marker, marker_info, sizeof(struct marker)); 1752 memcpy(&marker, marker_info, sizeof(struct bond_marker));
1752 // change the marker subtype to marker response 1753 // change the marker subtype to marker response
1753 marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE; 1754 marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE;
1754 // send the marker response 1755 // send the marker response
@@ -1767,7 +1768,8 @@ static void ad_marker_info_received(struct marker *marker_info,struct port *port
1767 * response for marker PDU's, in this stage, but only to respond to marker 1768 * response for marker PDU's, in this stage, but only to respond to marker
1768 * information. 1769 * information.
1769 */ 1770 */
1770static void ad_marker_response_received(struct marker *marker, struct port *port) 1771static void ad_marker_response_received(struct bond_marker *marker,
1772 struct port *port)
1771{ 1773{
1772 marker=NULL; // just to satisfy the compiler 1774 marker=NULL; // just to satisfy the compiler
1773 port=NULL; // just to satisfy the compiler 1775 port=NULL; // just to satisfy the compiler
@@ -2164,15 +2166,15 @@ static void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u
2164 case AD_TYPE_MARKER: 2166 case AD_TYPE_MARKER:
2165 // No need to convert fields to Little Endian since we don't use the marker's fields. 2167 // No need to convert fields to Little Endian since we don't use the marker's fields.
2166 2168
2167 switch (((struct marker *)lacpdu)->tlv_type) { 2169 switch (((struct bond_marker *)lacpdu)->tlv_type) {
2168 case AD_MARKER_INFORMATION_SUBTYPE: 2170 case AD_MARKER_INFORMATION_SUBTYPE:
2169 dprintk("Received Marker Information on port %d\n", port->actor_port_number); 2171 dprintk("Received Marker Information on port %d\n", port->actor_port_number);
2170 ad_marker_info_received((struct marker *)lacpdu, port); 2172 ad_marker_info_received((struct bond_marker *)lacpdu, port);
2171 break; 2173 break;
2172 2174
2173 case AD_MARKER_RESPONSE_SUBTYPE: 2175 case AD_MARKER_RESPONSE_SUBTYPE:
2174 dprintk("Received Marker Response on port %d\n", port->actor_port_number); 2176 dprintk("Received Marker Response on port %d\n", port->actor_port_number);
2175 ad_marker_response_received((struct marker *)lacpdu, port); 2177 ad_marker_response_received((struct bond_marker *)lacpdu, port);
2176 break; 2178 break;
2177 2179
2178 default: 2180 default:
diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h
index 862952fa6fd9..f16557264944 100644
--- a/drivers/net/bonding/bond_3ad.h
+++ b/drivers/net/bonding/bond_3ad.h
@@ -92,7 +92,7 @@ typedef enum {
92typedef enum { 92typedef enum {
93 AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype 93 AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype
94 AD_MARKER_RESPONSE_SUBTYPE // marker response subtype 94 AD_MARKER_RESPONSE_SUBTYPE // marker response subtype
95} marker_subtype_t; 95} bond_marker_subtype_t;
96 96
97// timers types(43.4.9 in the 802.3ad standard) 97// timers types(43.4.9 in the 802.3ad standard)
98typedef enum { 98typedef enum {
@@ -148,7 +148,7 @@ typedef struct lacpdu_header {
148} lacpdu_header_t; 148} lacpdu_header_t;
149 149
150// Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard) 150// Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard)
151typedef struct marker { 151typedef struct bond_marker {
152 u8 subtype; // = 0x02 (marker PDU) 152 u8 subtype; // = 0x02 (marker PDU)
153 u8 version_number; // = 0x01 153 u8 version_number; // = 0x01
154 u8 tlv_type; // = 0x01 (marker information) 154 u8 tlv_type; // = 0x01 (marker information)
@@ -161,12 +161,12 @@ typedef struct marker {
161 u8 tlv_type_terminator; // = 0x00 161 u8 tlv_type_terminator; // = 0x00
162 u8 terminator_length; // = 0x00 162 u8 terminator_length; // = 0x00
163 u8 reserved_90[90]; // = 0 163 u8 reserved_90[90]; // = 0
164} marker_t; 164} bond_marker_t;
165 165
166typedef struct marker_header { 166typedef struct bond_marker_header {
167 struct ad_header ad_header; 167 struct ad_header ad_header;
168 struct marker marker; 168 struct bond_marker marker;
169} marker_header_t; 169} bond_marker_header_t;
170 170
171#pragma pack() 171#pragma pack()
172 172
diff --git a/drivers/net/cris/eth_v10.c b/drivers/net/cris/eth_v10.c
index 314b2f68f78f..edd6828f0a78 100644
--- a/drivers/net/cris/eth_v10.c
+++ b/drivers/net/cris/eth_v10.c
@@ -234,6 +234,7 @@
234#include <linux/spinlock.h> 234#include <linux/spinlock.h>
235#include <linux/errno.h> 235#include <linux/errno.h>
236#include <linux/init.h> 236#include <linux/init.h>
237#include <linux/bitops.h>
237 238
238#include <linux/if.h> 239#include <linux/if.h>
239#include <linux/mii.h> 240#include <linux/mii.h>
@@ -247,7 +248,6 @@
247#include <asm/irq.h> 248#include <asm/irq.h>
248#include <asm/dma.h> 249#include <asm/dma.h>
249#include <asm/system.h> 250#include <asm/system.h>
250#include <asm/bitops.h>
251#include <asm/ethernet.h> 251#include <asm/ethernet.h>
252#include <asm/cache.h> 252#include <asm/cache.h>
253 253
diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h
index 044261703381..2a3df145850d 100644
--- a/drivers/net/cxgb3/adapter.h
+++ b/drivers/net/cxgb3/adapter.h
@@ -41,9 +41,9 @@
41#include <linux/timer.h> 41#include <linux/timer.h>
42#include <linux/cache.h> 42#include <linux/cache.h>
43#include <linux/mutex.h> 43#include <linux/mutex.h>
44#include <linux/bitops.h>
44#include "t3cdev.h" 45#include "t3cdev.h"
45#include <asm/semaphore.h> 46#include <asm/semaphore.h>
46#include <asm/bitops.h>
47#include <asm/io.h> 47#include <asm/io.h>
48 48
49typedef irqreturn_t(*intr_handler_t) (int, void *); 49typedef irqreturn_t(*intr_handler_t) (int, void *);
diff --git a/drivers/net/eth16i.c b/drivers/net/eth16i.c
index 243fc6b354b5..e3dd8b136908 100644
--- a/drivers/net/eth16i.c
+++ b/drivers/net/eth16i.c
@@ -170,7 +170,6 @@ static char *version =
170 170
171 171
172/* Few macros */ 172/* Few macros */
173#define BIT(a) ( (1 << (a)) )
174#define BITSET(ioaddr, bnum) ((outb(((inb(ioaddr)) | (bnum)), ioaddr))) 173#define BITSET(ioaddr, bnum) ((outb(((inb(ioaddr)) | (bnum)), ioaddr)))
175#define BITCLR(ioaddr, bnum) ((outb(((inb(ioaddr)) & (~(bnum))), ioaddr))) 174#define BITCLR(ioaddr, bnum) ((outb(((inb(ioaddr)) & (~(bnum))), ioaddr)))
176 175
diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c
index bc02e4694804..11b83dae00ac 100644
--- a/drivers/net/hamradio/dmascc.c
+++ b/drivers/net/hamradio/dmascc.c
@@ -21,6 +21,7 @@
21 21
22 22
23#include <linux/module.h> 23#include <linux/module.h>
24#include <linux/bitops.h>
24#include <linux/delay.h> 25#include <linux/delay.h>
25#include <linux/errno.h> 26#include <linux/errno.h>
26#include <linux/if_arp.h> 27#include <linux/if_arp.h>
@@ -35,7 +36,6 @@
35#include <linux/sockios.h> 36#include <linux/sockios.h>
36#include <linux/workqueue.h> 37#include <linux/workqueue.h>
37#include <asm/atomic.h> 38#include <asm/atomic.h>
38#include <asm/bitops.h>
39#include <asm/dma.h> 39#include <asm/dma.h>
40#include <asm/io.h> 40#include <asm/io.h>
41#include <asm/irq.h> 41#include <asm/irq.h>
diff --git a/drivers/net/mac89x0.c b/drivers/net/mac89x0.c
index 30854f094965..a19b5958cee9 100644
--- a/drivers/net/mac89x0.c
+++ b/drivers/net/mac89x0.c
@@ -99,9 +99,9 @@ static char *version =
99#include <linux/etherdevice.h> 99#include <linux/etherdevice.h>
100#include <linux/skbuff.h> 100#include <linux/skbuff.h>
101#include <linux/delay.h> 101#include <linux/delay.h>
102#include <linux/bitops.h>
102 103
103#include <asm/system.h> 104#include <asm/system.h>
104#include <asm/bitops.h>
105#include <asm/io.h> 105#include <asm/io.h>
106#include <asm/hwtest.h> 106#include <asm/hwtest.h>
107#include <asm/macints.h> 107#include <asm/macints.h>
diff --git a/drivers/net/meth.h b/drivers/net/meth.h
index ea3b8fc86d1e..a78dc1ca8c29 100644
--- a/drivers/net/meth.h
+++ b/drivers/net/meth.h
@@ -28,9 +28,6 @@
28#define RX_BUFFER_OFFSET (sizeof(rx_status_vector)+2) /* staus vector + 2 bytes of padding */ 28#define RX_BUFFER_OFFSET (sizeof(rx_status_vector)+2) /* staus vector + 2 bytes of padding */
29#define RX_BUCKET_SIZE 256 29#define RX_BUCKET_SIZE 256
30 30
31#undef BIT
32#define BIT(x) (1UL << (x))
33
34/* For more detailed explanations of what each field menas, 31/* For more detailed explanations of what each field menas,
35 see Nick's great comments to #defines below (or docs, if 32 see Nick's great comments to #defines below (or docs, if
36 you are lucky enough toget hold of them :)*/ 33 you are lucky enough toget hold of them :)*/
diff --git a/drivers/net/s2io-regs.h b/drivers/net/s2io-regs.h
index aef66e2d98d2..01f08d726ace 100644
--- a/drivers/net/s2io-regs.h
+++ b/drivers/net/s2io-regs.h
@@ -20,17 +20,17 @@ struct XENA_dev_config {
20 20
21/* General Control-Status Registers */ 21/* General Control-Status Registers */
22 u64 general_int_status; 22 u64 general_int_status;
23#define GEN_INTR_TXPIC BIT(0) 23#define GEN_INTR_TXPIC s2BIT(0)
24#define GEN_INTR_TXDMA BIT(1) 24#define GEN_INTR_TXDMA s2BIT(1)
25#define GEN_INTR_TXMAC BIT(2) 25#define GEN_INTR_TXMAC s2BIT(2)
26#define GEN_INTR_TXXGXS BIT(3) 26#define GEN_INTR_TXXGXS s2BIT(3)
27#define GEN_INTR_TXTRAFFIC BIT(8) 27#define GEN_INTR_TXTRAFFIC s2BIT(8)
28#define GEN_INTR_RXPIC BIT(32) 28#define GEN_INTR_RXPIC s2BIT(32)
29#define GEN_INTR_RXDMA BIT(33) 29#define GEN_INTR_RXDMA s2BIT(33)
30#define GEN_INTR_RXMAC BIT(34) 30#define GEN_INTR_RXMAC s2BIT(34)
31#define GEN_INTR_MC BIT(35) 31#define GEN_INTR_MC s2BIT(35)
32#define GEN_INTR_RXXGXS BIT(36) 32#define GEN_INTR_RXXGXS s2BIT(36)
33#define GEN_INTR_RXTRAFFIC BIT(40) 33#define GEN_INTR_RXTRAFFIC s2BIT(40)
34#define GEN_ERROR_INTR GEN_INTR_TXPIC | GEN_INTR_RXPIC | \ 34#define GEN_ERROR_INTR GEN_INTR_TXPIC | GEN_INTR_RXPIC | \
35 GEN_INTR_TXDMA | GEN_INTR_RXDMA | \ 35 GEN_INTR_TXDMA | GEN_INTR_RXDMA | \
36 GEN_INTR_TXMAC | GEN_INTR_RXMAC | \ 36 GEN_INTR_TXMAC | GEN_INTR_RXMAC | \
@@ -54,36 +54,36 @@ struct XENA_dev_config {
54 54
55 55
56 u64 adapter_status; 56 u64 adapter_status;
57#define ADAPTER_STATUS_TDMA_READY BIT(0) 57#define ADAPTER_STATUS_TDMA_READY s2BIT(0)
58#define ADAPTER_STATUS_RDMA_READY BIT(1) 58#define ADAPTER_STATUS_RDMA_READY s2BIT(1)
59#define ADAPTER_STATUS_PFC_READY BIT(2) 59#define ADAPTER_STATUS_PFC_READY s2BIT(2)
60#define ADAPTER_STATUS_TMAC_BUF_EMPTY BIT(3) 60#define ADAPTER_STATUS_TMAC_BUF_EMPTY s2BIT(3)
61#define ADAPTER_STATUS_PIC_QUIESCENT BIT(5) 61#define ADAPTER_STATUS_PIC_QUIESCENT s2BIT(5)
62#define ADAPTER_STATUS_RMAC_REMOTE_FAULT BIT(6) 62#define ADAPTER_STATUS_RMAC_REMOTE_FAULT s2BIT(6)
63#define ADAPTER_STATUS_RMAC_LOCAL_FAULT BIT(7) 63#define ADAPTER_STATUS_RMAC_LOCAL_FAULT s2BIT(7)
64#define ADAPTER_STATUS_RMAC_PCC_IDLE vBIT(0xFF,8,8) 64#define ADAPTER_STATUS_RMAC_PCC_IDLE vBIT(0xFF,8,8)
65#define ADAPTER_STATUS_RMAC_PCC_FOUR_IDLE vBIT(0x0F,8,8) 65#define ADAPTER_STATUS_RMAC_PCC_FOUR_IDLE vBIT(0x0F,8,8)
66#define ADAPTER_STATUS_RC_PRC_QUIESCENT vBIT(0xFF,16,8) 66#define ADAPTER_STATUS_RC_PRC_QUIESCENT vBIT(0xFF,16,8)
67#define ADAPTER_STATUS_MC_DRAM_READY BIT(24) 67#define ADAPTER_STATUS_MC_DRAM_READY s2BIT(24)
68#define ADAPTER_STATUS_MC_QUEUES_READY BIT(25) 68#define ADAPTER_STATUS_MC_QUEUES_READY s2BIT(25)
69#define ADAPTER_STATUS_M_PLL_LOCK BIT(30) 69#define ADAPTER_STATUS_M_PLL_LOCK s2BIT(30)
70#define ADAPTER_STATUS_P_PLL_LOCK BIT(31) 70#define ADAPTER_STATUS_P_PLL_LOCK s2BIT(31)
71 71
72 u64 adapter_control; 72 u64 adapter_control;
73#define ADAPTER_CNTL_EN BIT(7) 73#define ADAPTER_CNTL_EN s2BIT(7)
74#define ADAPTER_EOI_TX_ON BIT(15) 74#define ADAPTER_EOI_TX_ON s2BIT(15)
75#define ADAPTER_LED_ON BIT(23) 75#define ADAPTER_LED_ON s2BIT(23)
76#define ADAPTER_UDPI(val) vBIT(val,36,4) 76#define ADAPTER_UDPI(val) vBIT(val,36,4)
77#define ADAPTER_WAIT_INT BIT(48) 77#define ADAPTER_WAIT_INT s2BIT(48)
78#define ADAPTER_ECC_EN BIT(55) 78#define ADAPTER_ECC_EN s2BIT(55)
79 79
80 u64 serr_source; 80 u64 serr_source;
81#define SERR_SOURCE_PIC BIT(0) 81#define SERR_SOURCE_PIC s2BIT(0)
82#define SERR_SOURCE_TXDMA BIT(1) 82#define SERR_SOURCE_TXDMA s2BIT(1)
83#define SERR_SOURCE_RXDMA BIT(2) 83#define SERR_SOURCE_RXDMA s2BIT(2)
84#define SERR_SOURCE_MAC BIT(3) 84#define SERR_SOURCE_MAC s2BIT(3)
85#define SERR_SOURCE_MC BIT(4) 85#define SERR_SOURCE_MC s2BIT(4)
86#define SERR_SOURCE_XGXS BIT(5) 86#define SERR_SOURCE_XGXS s2BIT(5)
87#define SERR_SOURCE_ANY (SERR_SOURCE_PIC | \ 87#define SERR_SOURCE_ANY (SERR_SOURCE_PIC | \
88 SERR_SOURCE_TXDMA | \ 88 SERR_SOURCE_TXDMA | \
89 SERR_SOURCE_RXDMA | \ 89 SERR_SOURCE_RXDMA | \
@@ -101,41 +101,41 @@ struct XENA_dev_config {
101#define PCI_MODE_PCIX_M2_66 0x5 101#define PCI_MODE_PCIX_M2_66 0x5
102#define PCI_MODE_PCIX_M2_100 0x6 102#define PCI_MODE_PCIX_M2_100 0x6
103#define PCI_MODE_PCIX_M2_133 0x7 103#define PCI_MODE_PCIX_M2_133 0x7
104#define PCI_MODE_UNSUPPORTED BIT(0) 104#define PCI_MODE_UNSUPPORTED s2BIT(0)
105#define PCI_MODE_32_BITS BIT(8) 105#define PCI_MODE_32_BITS s2BIT(8)
106#define PCI_MODE_UNKNOWN_MODE BIT(9) 106#define PCI_MODE_UNKNOWN_MODE s2BIT(9)
107 107
108 u8 unused_0[0x800 - 0x128]; 108 u8 unused_0[0x800 - 0x128];
109 109
110/* PCI-X Controller registers */ 110/* PCI-X Controller registers */
111 u64 pic_int_status; 111 u64 pic_int_status;
112 u64 pic_int_mask; 112 u64 pic_int_mask;
113#define PIC_INT_TX BIT(0) 113#define PIC_INT_TX s2BIT(0)
114#define PIC_INT_FLSH BIT(1) 114#define PIC_INT_FLSH s2BIT(1)
115#define PIC_INT_MDIO BIT(2) 115#define PIC_INT_MDIO s2BIT(2)
116#define PIC_INT_IIC BIT(3) 116#define PIC_INT_IIC s2BIT(3)
117#define PIC_INT_GPIO BIT(4) 117#define PIC_INT_GPIO s2BIT(4)
118#define PIC_INT_RX BIT(32) 118#define PIC_INT_RX s2BIT(32)
119 119
120 u64 txpic_int_reg; 120 u64 txpic_int_reg;
121 u64 txpic_int_mask; 121 u64 txpic_int_mask;
122#define PCIX_INT_REG_ECC_SG_ERR BIT(0) 122#define PCIX_INT_REG_ECC_SG_ERR s2BIT(0)
123#define PCIX_INT_REG_ECC_DB_ERR BIT(1) 123#define PCIX_INT_REG_ECC_DB_ERR s2BIT(1)
124#define PCIX_INT_REG_FLASHR_R_FSM_ERR BIT(8) 124#define PCIX_INT_REG_FLASHR_R_FSM_ERR s2BIT(8)
125#define PCIX_INT_REG_FLASHR_W_FSM_ERR BIT(9) 125#define PCIX_INT_REG_FLASHR_W_FSM_ERR s2BIT(9)
126#define PCIX_INT_REG_INI_TX_FSM_SERR BIT(10) 126#define PCIX_INT_REG_INI_TX_FSM_SERR s2BIT(10)
127#define PCIX_INT_REG_INI_TXO_FSM_ERR BIT(11) 127#define PCIX_INT_REG_INI_TXO_FSM_ERR s2BIT(11)
128#define PCIX_INT_REG_TRT_FSM_SERR BIT(13) 128#define PCIX_INT_REG_TRT_FSM_SERR s2BIT(13)
129#define PCIX_INT_REG_SRT_FSM_SERR BIT(14) 129#define PCIX_INT_REG_SRT_FSM_SERR s2BIT(14)
130#define PCIX_INT_REG_PIFR_FSM_SERR BIT(15) 130#define PCIX_INT_REG_PIFR_FSM_SERR s2BIT(15)
131#define PCIX_INT_REG_WRC_TX_SEND_FSM_SERR BIT(21) 131#define PCIX_INT_REG_WRC_TX_SEND_FSM_SERR s2BIT(21)
132#define PCIX_INT_REG_RRC_TX_REQ_FSM_SERR BIT(23) 132#define PCIX_INT_REG_RRC_TX_REQ_FSM_SERR s2BIT(23)
133#define PCIX_INT_REG_INI_RX_FSM_SERR BIT(48) 133#define PCIX_INT_REG_INI_RX_FSM_SERR s2BIT(48)
134#define PCIX_INT_REG_RA_RX_FSM_SERR BIT(50) 134#define PCIX_INT_REG_RA_RX_FSM_SERR s2BIT(50)
135/* 135/*
136#define PCIX_INT_REG_WRC_RX_SEND_FSM_SERR BIT(52) 136#define PCIX_INT_REG_WRC_RX_SEND_FSM_SERR s2BIT(52)
137#define PCIX_INT_REG_RRC_RX_REQ_FSM_SERR BIT(54) 137#define PCIX_INT_REG_RRC_RX_REQ_FSM_SERR s2BIT(54)
138#define PCIX_INT_REG_RRC_RX_SPLIT_FSM_SERR BIT(58) 138#define PCIX_INT_REG_RRC_RX_SPLIT_FSM_SERR s2BIT(58)
139*/ 139*/
140 u64 txpic_alarms; 140 u64 txpic_alarms;
141 u64 rxpic_int_reg; 141 u64 rxpic_int_reg;
@@ -144,92 +144,92 @@ struct XENA_dev_config {
144 144
145 u64 flsh_int_reg; 145 u64 flsh_int_reg;
146 u64 flsh_int_mask; 146 u64 flsh_int_mask;
147#define PIC_FLSH_INT_REG_CYCLE_FSM_ERR BIT(63) 147#define PIC_FLSH_INT_REG_CYCLE_FSM_ERR s2BIT(63)
148#define PIC_FLSH_INT_REG_ERR BIT(62) 148#define PIC_FLSH_INT_REG_ERR s2BIT(62)
149 u64 flash_alarms; 149 u64 flash_alarms;
150 150
151 u64 mdio_int_reg; 151 u64 mdio_int_reg;
152 u64 mdio_int_mask; 152 u64 mdio_int_mask;
153#define MDIO_INT_REG_MDIO_BUS_ERR BIT(0) 153#define MDIO_INT_REG_MDIO_BUS_ERR s2BIT(0)
154#define MDIO_INT_REG_DTX_BUS_ERR BIT(8) 154#define MDIO_INT_REG_DTX_BUS_ERR s2BIT(8)
155#define MDIO_INT_REG_LASI BIT(39) 155#define MDIO_INT_REG_LASI s2BIT(39)
156 u64 mdio_alarms; 156 u64 mdio_alarms;
157 157
158 u64 iic_int_reg; 158 u64 iic_int_reg;
159 u64 iic_int_mask; 159 u64 iic_int_mask;
160#define IIC_INT_REG_BUS_FSM_ERR BIT(4) 160#define IIC_INT_REG_BUS_FSM_ERR s2BIT(4)
161#define IIC_INT_REG_BIT_FSM_ERR BIT(5) 161#define IIC_INT_REG_BIT_FSM_ERR s2BIT(5)
162#define IIC_INT_REG_CYCLE_FSM_ERR BIT(6) 162#define IIC_INT_REG_CYCLE_FSM_ERR s2BIT(6)
163#define IIC_INT_REG_REQ_FSM_ERR BIT(7) 163#define IIC_INT_REG_REQ_FSM_ERR s2BIT(7)
164#define IIC_INT_REG_ACK_ERR BIT(8) 164#define IIC_INT_REG_ACK_ERR s2BIT(8)
165 u64 iic_alarms; 165 u64 iic_alarms;
166 166
167 u8 unused4[0x08]; 167 u8 unused4[0x08];
168 168
169 u64 gpio_int_reg; 169 u64 gpio_int_reg;
170#define GPIO_INT_REG_DP_ERR_INT BIT(0) 170#define GPIO_INT_REG_DP_ERR_INT s2BIT(0)
171#define GPIO_INT_REG_LINK_DOWN BIT(1) 171#define GPIO_INT_REG_LINK_DOWN s2BIT(1)
172#define GPIO_INT_REG_LINK_UP BIT(2) 172#define GPIO_INT_REG_LINK_UP s2BIT(2)
173 u64 gpio_int_mask; 173 u64 gpio_int_mask;
174#define GPIO_INT_MASK_LINK_DOWN BIT(1) 174#define GPIO_INT_MASK_LINK_DOWN s2BIT(1)
175#define GPIO_INT_MASK_LINK_UP BIT(2) 175#define GPIO_INT_MASK_LINK_UP s2BIT(2)
176 u64 gpio_alarms; 176 u64 gpio_alarms;
177 177
178 u8 unused5[0x38]; 178 u8 unused5[0x38];
179 179
180 u64 tx_traffic_int; 180 u64 tx_traffic_int;
181#define TX_TRAFFIC_INT_n(n) BIT(n) 181#define TX_TRAFFIC_INT_n(n) s2BIT(n)
182 u64 tx_traffic_mask; 182 u64 tx_traffic_mask;
183 183
184 u64 rx_traffic_int; 184 u64 rx_traffic_int;
185#define RX_TRAFFIC_INT_n(n) BIT(n) 185#define RX_TRAFFIC_INT_n(n) s2BIT(n)
186 u64 rx_traffic_mask; 186 u64 rx_traffic_mask;
187 187
188/* PIC Control registers */ 188/* PIC Control registers */
189 u64 pic_control; 189 u64 pic_control;
190#define PIC_CNTL_RX_ALARM_MAP_1 BIT(0) 190#define PIC_CNTL_RX_ALARM_MAP_1 s2BIT(0)
191#define PIC_CNTL_SHARED_SPLITS(n) vBIT(n,11,5) 191#define PIC_CNTL_SHARED_SPLITS(n) vBIT(n,11,5)
192 192
193 u64 swapper_ctrl; 193 u64 swapper_ctrl;
194#define SWAPPER_CTRL_PIF_R_FE BIT(0) 194#define SWAPPER_CTRL_PIF_R_FE s2BIT(0)
195#define SWAPPER_CTRL_PIF_R_SE BIT(1) 195#define SWAPPER_CTRL_PIF_R_SE s2BIT(1)
196#define SWAPPER_CTRL_PIF_W_FE BIT(8) 196#define SWAPPER_CTRL_PIF_W_FE s2BIT(8)
197#define SWAPPER_CTRL_PIF_W_SE BIT(9) 197#define SWAPPER_CTRL_PIF_W_SE s2BIT(9)
198#define SWAPPER_CTRL_TXP_FE BIT(16) 198#define SWAPPER_CTRL_TXP_FE s2BIT(16)
199#define SWAPPER_CTRL_TXP_SE BIT(17) 199#define SWAPPER_CTRL_TXP_SE s2BIT(17)
200#define SWAPPER_CTRL_TXD_R_FE BIT(18) 200#define SWAPPER_CTRL_TXD_R_FE s2BIT(18)
201#define SWAPPER_CTRL_TXD_R_SE BIT(19) 201#define SWAPPER_CTRL_TXD_R_SE s2BIT(19)
202#define SWAPPER_CTRL_TXD_W_FE BIT(20) 202#define SWAPPER_CTRL_TXD_W_FE s2BIT(20)
203#define SWAPPER_CTRL_TXD_W_SE BIT(21) 203#define SWAPPER_CTRL_TXD_W_SE s2BIT(21)
204#define SWAPPER_CTRL_TXF_R_FE BIT(22) 204#define SWAPPER_CTRL_TXF_R_FE s2BIT(22)
205#define SWAPPER_CTRL_TXF_R_SE BIT(23) 205#define SWAPPER_CTRL_TXF_R_SE s2BIT(23)
206#define SWAPPER_CTRL_RXD_R_FE BIT(32) 206#define SWAPPER_CTRL_RXD_R_FE s2BIT(32)
207#define SWAPPER_CTRL_RXD_R_SE BIT(33) 207#define SWAPPER_CTRL_RXD_R_SE s2BIT(33)
208#define SWAPPER_CTRL_RXD_W_FE BIT(34) 208#define SWAPPER_CTRL_RXD_W_FE s2BIT(34)
209#define SWAPPER_CTRL_RXD_W_SE BIT(35) 209#define SWAPPER_CTRL_RXD_W_SE s2BIT(35)
210#define SWAPPER_CTRL_RXF_W_FE BIT(36) 210#define SWAPPER_CTRL_RXF_W_FE s2BIT(36)
211#define SWAPPER_CTRL_RXF_W_SE BIT(37) 211#define SWAPPER_CTRL_RXF_W_SE s2BIT(37)
212#define SWAPPER_CTRL_XMSI_FE BIT(40) 212#define SWAPPER_CTRL_XMSI_FE s2BIT(40)
213#define SWAPPER_CTRL_XMSI_SE BIT(41) 213#define SWAPPER_CTRL_XMSI_SE s2BIT(41)
214#define SWAPPER_CTRL_STATS_FE BIT(48) 214#define SWAPPER_CTRL_STATS_FE s2BIT(48)
215#define SWAPPER_CTRL_STATS_SE BIT(49) 215#define SWAPPER_CTRL_STATS_SE s2BIT(49)
216 216
217 u64 pif_rd_swapper_fb; 217 u64 pif_rd_swapper_fb;
218#define IF_RD_SWAPPER_FB 0x0123456789ABCDEF 218#define IF_RD_SWAPPER_FB 0x0123456789ABCDEF
219 219
220 u64 scheduled_int_ctrl; 220 u64 scheduled_int_ctrl;
221#define SCHED_INT_CTRL_TIMER_EN BIT(0) 221#define SCHED_INT_CTRL_TIMER_EN s2BIT(0)
222#define SCHED_INT_CTRL_ONE_SHOT BIT(1) 222#define SCHED_INT_CTRL_ONE_SHOT s2BIT(1)
223#define SCHED_INT_CTRL_INT2MSI(val) vBIT(val,10,6) 223#define SCHED_INT_CTRL_INT2MSI(val) vBIT(val,10,6)
224#define SCHED_INT_PERIOD TBD 224#define SCHED_INT_PERIOD TBD
225 225
226 u64 txreqtimeout; 226 u64 txreqtimeout;
227#define TXREQTO_VAL(val) vBIT(val,0,32) 227#define TXREQTO_VAL(val) vBIT(val,0,32)
228#define TXREQTO_EN BIT(63) 228#define TXREQTO_EN s2BIT(63)
229 229
230 u64 statsreqtimeout; 230 u64 statsreqtimeout;
231#define STATREQTO_VAL(n) TBD 231#define STATREQTO_VAL(n) TBD
232#define STATREQTO_EN BIT(63) 232#define STATREQTO_EN s2BIT(63)
233 233
234 u64 read_retry_delay; 234 u64 read_retry_delay;
235 u64 read_retry_acceleration; 235 u64 read_retry_acceleration;
@@ -255,10 +255,10 @@ struct XENA_dev_config {
255 255
256 /* Automated statistics collection */ 256 /* Automated statistics collection */
257 u64 stat_cfg; 257 u64 stat_cfg;
258#define STAT_CFG_STAT_EN BIT(0) 258#define STAT_CFG_STAT_EN s2BIT(0)
259#define STAT_CFG_ONE_SHOT_EN BIT(1) 259#define STAT_CFG_ONE_SHOT_EN s2BIT(1)
260#define STAT_CFG_STAT_NS_EN BIT(8) 260#define STAT_CFG_STAT_NS_EN s2BIT(8)
261#define STAT_CFG_STAT_RO BIT(9) 261#define STAT_CFG_STAT_RO s2BIT(9)
262#define STAT_TRSF_PER(n) TBD 262#define STAT_TRSF_PER(n) TBD
263#define PER_SEC 0x208d5 263#define PER_SEC 0x208d5
264#define SET_UPDT_PERIOD(n) vBIT((PER_SEC*n),32,32) 264#define SET_UPDT_PERIOD(n) vBIT((PER_SEC*n),32,32)
@@ -290,18 +290,18 @@ struct XENA_dev_config {
290#define I2C_CONTROL_DEV_ID(id) vBIT(id,1,3) 290#define I2C_CONTROL_DEV_ID(id) vBIT(id,1,3)
291#define I2C_CONTROL_ADDR(addr) vBIT(addr,5,11) 291#define I2C_CONTROL_ADDR(addr) vBIT(addr,5,11)
292#define I2C_CONTROL_BYTE_CNT(cnt) vBIT(cnt,22,2) 292#define I2C_CONTROL_BYTE_CNT(cnt) vBIT(cnt,22,2)
293#define I2C_CONTROL_READ BIT(24) 293#define I2C_CONTROL_READ s2BIT(24)
294#define I2C_CONTROL_NACK BIT(25) 294#define I2C_CONTROL_NACK s2BIT(25)
295#define I2C_CONTROL_CNTL_START vBIT(0xE,28,4) 295#define I2C_CONTROL_CNTL_START vBIT(0xE,28,4)
296#define I2C_CONTROL_CNTL_END(val) (val & vBIT(0x1,28,4)) 296#define I2C_CONTROL_CNTL_END(val) (val & vBIT(0x1,28,4))
297#define I2C_CONTROL_GET_DATA(val) (u32)(val & 0xFFFFFFFF) 297#define I2C_CONTROL_GET_DATA(val) (u32)(val & 0xFFFFFFFF)
298#define I2C_CONTROL_SET_DATA(val) vBIT(val,32,32) 298#define I2C_CONTROL_SET_DATA(val) vBIT(val,32,32)
299 299
300 u64 gpio_control; 300 u64 gpio_control;
301#define GPIO_CTRL_GPIO_0 BIT(8) 301#define GPIO_CTRL_GPIO_0 s2BIT(8)
302 u64 misc_control; 302 u64 misc_control;
303#define FAULT_BEHAVIOUR BIT(0) 303#define FAULT_BEHAVIOUR s2BIT(0)
304#define EXT_REQ_EN BIT(1) 304#define EXT_REQ_EN s2BIT(1)
305#define MISC_LINK_STABILITY_PRD(val) vBIT(val,29,3) 305#define MISC_LINK_STABILITY_PRD(val) vBIT(val,29,3)
306 306
307 u8 unused7_1[0x230 - 0x208]; 307 u8 unused7_1[0x230 - 0x208];
@@ -317,29 +317,29 @@ struct XENA_dev_config {
317/* TxDMA registers */ 317/* TxDMA registers */
318 u64 txdma_int_status; 318 u64 txdma_int_status;
319 u64 txdma_int_mask; 319 u64 txdma_int_mask;
320#define TXDMA_PFC_INT BIT(0) 320#define TXDMA_PFC_INT s2BIT(0)
321#define TXDMA_TDA_INT BIT(1) 321#define TXDMA_TDA_INT s2BIT(1)
322#define TXDMA_PCC_INT BIT(2) 322#define TXDMA_PCC_INT s2BIT(2)
323#define TXDMA_TTI_INT BIT(3) 323#define TXDMA_TTI_INT s2BIT(3)
324#define TXDMA_LSO_INT BIT(4) 324#define TXDMA_LSO_INT s2BIT(4)
325#define TXDMA_TPA_INT BIT(5) 325#define TXDMA_TPA_INT s2BIT(5)
326#define TXDMA_SM_INT BIT(6) 326#define TXDMA_SM_INT s2BIT(6)
327 u64 pfc_err_reg; 327 u64 pfc_err_reg;
328#define PFC_ECC_SG_ERR BIT(7) 328#define PFC_ECC_SG_ERR s2BIT(7)
329#define PFC_ECC_DB_ERR BIT(15) 329#define PFC_ECC_DB_ERR s2BIT(15)
330#define PFC_SM_ERR_ALARM BIT(23) 330#define PFC_SM_ERR_ALARM s2BIT(23)
331#define PFC_MISC_0_ERR BIT(31) 331#define PFC_MISC_0_ERR s2BIT(31)
332#define PFC_MISC_1_ERR BIT(32) 332#define PFC_MISC_1_ERR s2BIT(32)
333#define PFC_PCIX_ERR BIT(39) 333#define PFC_PCIX_ERR s2BIT(39)
334 u64 pfc_err_mask; 334 u64 pfc_err_mask;
335 u64 pfc_err_alarm; 335 u64 pfc_err_alarm;
336 336
337 u64 tda_err_reg; 337 u64 tda_err_reg;
338#define TDA_Fn_ECC_SG_ERR vBIT(0xff,0,8) 338#define TDA_Fn_ECC_SG_ERR vBIT(0xff,0,8)
339#define TDA_Fn_ECC_DB_ERR vBIT(0xff,8,8) 339#define TDA_Fn_ECC_DB_ERR vBIT(0xff,8,8)
340#define TDA_SM0_ERR_ALARM BIT(22) 340#define TDA_SM0_ERR_ALARM s2BIT(22)
341#define TDA_SM1_ERR_ALARM BIT(23) 341#define TDA_SM1_ERR_ALARM s2BIT(23)
342#define TDA_PCIX_ERR BIT(39) 342#define TDA_PCIX_ERR s2BIT(39)
343 u64 tda_err_mask; 343 u64 tda_err_mask;
344 u64 tda_err_alarm; 344 u64 tda_err_alarm;
345 345
@@ -351,40 +351,40 @@ struct XENA_dev_config {
351#define PCC_SM_ERR_ALARM vBIT(0xff,32,8) 351#define PCC_SM_ERR_ALARM vBIT(0xff,32,8)
352#define PCC_WR_ERR_ALARM vBIT(0xff,40,8) 352#define PCC_WR_ERR_ALARM vBIT(0xff,40,8)
353#define PCC_N_SERR vBIT(0xff,48,8) 353#define PCC_N_SERR vBIT(0xff,48,8)
354#define PCC_6_COF_OV_ERR BIT(56) 354#define PCC_6_COF_OV_ERR s2BIT(56)
355#define PCC_7_COF_OV_ERR BIT(57) 355#define PCC_7_COF_OV_ERR s2BIT(57)
356#define PCC_6_LSO_OV_ERR BIT(58) 356#define PCC_6_LSO_OV_ERR s2BIT(58)
357#define PCC_7_LSO_OV_ERR BIT(59) 357#define PCC_7_LSO_OV_ERR s2BIT(59)
358#define PCC_ENABLE_FOUR vBIT(0x0F,0,8) 358#define PCC_ENABLE_FOUR vBIT(0x0F,0,8)
359 u64 pcc_err_mask; 359 u64 pcc_err_mask;
360 u64 pcc_err_alarm; 360 u64 pcc_err_alarm;
361 361
362 u64 tti_err_reg; 362 u64 tti_err_reg;
363#define TTI_ECC_SG_ERR BIT(7) 363#define TTI_ECC_SG_ERR s2BIT(7)
364#define TTI_ECC_DB_ERR BIT(15) 364#define TTI_ECC_DB_ERR s2BIT(15)
365#define TTI_SM_ERR_ALARM BIT(23) 365#define TTI_SM_ERR_ALARM s2BIT(23)
366 u64 tti_err_mask; 366 u64 tti_err_mask;
367 u64 tti_err_alarm; 367 u64 tti_err_alarm;
368 368
369 u64 lso_err_reg; 369 u64 lso_err_reg;
370#define LSO6_SEND_OFLOW BIT(12) 370#define LSO6_SEND_OFLOW s2BIT(12)
371#define LSO7_SEND_OFLOW BIT(13) 371#define LSO7_SEND_OFLOW s2BIT(13)
372#define LSO6_ABORT BIT(14) 372#define LSO6_ABORT s2BIT(14)
373#define LSO7_ABORT BIT(15) 373#define LSO7_ABORT s2BIT(15)
374#define LSO6_SM_ERR_ALARM BIT(22) 374#define LSO6_SM_ERR_ALARM s2BIT(22)
375#define LSO7_SM_ERR_ALARM BIT(23) 375#define LSO7_SM_ERR_ALARM s2BIT(23)
376 u64 lso_err_mask; 376 u64 lso_err_mask;
377 u64 lso_err_alarm; 377 u64 lso_err_alarm;
378 378
379 u64 tpa_err_reg; 379 u64 tpa_err_reg;
380#define TPA_TX_FRM_DROP BIT(7) 380#define TPA_TX_FRM_DROP s2BIT(7)
381#define TPA_SM_ERR_ALARM BIT(23) 381#define TPA_SM_ERR_ALARM s2BIT(23)
382 382
383 u64 tpa_err_mask; 383 u64 tpa_err_mask;
384 u64 tpa_err_alarm; 384 u64 tpa_err_alarm;
385 385
386 u64 sm_err_reg; 386 u64 sm_err_reg;
387#define SM_SM_ERR_ALARM BIT(15) 387#define SM_SM_ERR_ALARM s2BIT(15)
388 u64 sm_err_mask; 388 u64 sm_err_mask;
389 u64 sm_err_alarm; 389 u64 sm_err_alarm;
390 390
@@ -397,7 +397,7 @@ struct XENA_dev_config {
397#define X_MAX_FIFOS 8 397#define X_MAX_FIFOS 8
398#define X_FIFO_MAX_LEN 0x1FFF /*8191 */ 398#define X_FIFO_MAX_LEN 0x1FFF /*8191 */
399 u64 tx_fifo_partition_0; 399 u64 tx_fifo_partition_0;
400#define TX_FIFO_PARTITION_EN BIT(0) 400#define TX_FIFO_PARTITION_EN s2BIT(0)
401#define TX_FIFO_PARTITION_0_PRI(val) vBIT(val,5,3) 401#define TX_FIFO_PARTITION_0_PRI(val) vBIT(val,5,3)
402#define TX_FIFO_PARTITION_0_LEN(val) vBIT(val,19,13) 402#define TX_FIFO_PARTITION_0_LEN(val) vBIT(val,19,13)
403#define TX_FIFO_PARTITION_1_PRI(val) vBIT(val,37,3) 403#define TX_FIFO_PARTITION_1_PRI(val) vBIT(val,37,3)
@@ -437,16 +437,16 @@ struct XENA_dev_config {
437 u64 tx_w_round_robin_4; 437 u64 tx_w_round_robin_4;
438 438
439 u64 tti_command_mem; 439 u64 tti_command_mem;
440#define TTI_CMD_MEM_WE BIT(7) 440#define TTI_CMD_MEM_WE s2BIT(7)
441#define TTI_CMD_MEM_STROBE_NEW_CMD BIT(15) 441#define TTI_CMD_MEM_STROBE_NEW_CMD s2BIT(15)
442#define TTI_CMD_MEM_STROBE_BEING_EXECUTED BIT(15) 442#define TTI_CMD_MEM_STROBE_BEING_EXECUTED s2BIT(15)
443#define TTI_CMD_MEM_OFFSET(n) vBIT(n,26,6) 443#define TTI_CMD_MEM_OFFSET(n) vBIT(n,26,6)
444 444
445 u64 tti_data1_mem; 445 u64 tti_data1_mem;
446#define TTI_DATA1_MEM_TX_TIMER_VAL(n) vBIT(n,6,26) 446#define TTI_DATA1_MEM_TX_TIMER_VAL(n) vBIT(n,6,26)
447#define TTI_DATA1_MEM_TX_TIMER_AC_CI(n) vBIT(n,38,2) 447#define TTI_DATA1_MEM_TX_TIMER_AC_CI(n) vBIT(n,38,2)
448#define TTI_DATA1_MEM_TX_TIMER_AC_EN BIT(38) 448#define TTI_DATA1_MEM_TX_TIMER_AC_EN s2BIT(38)
449#define TTI_DATA1_MEM_TX_TIMER_CI_EN BIT(39) 449#define TTI_DATA1_MEM_TX_TIMER_CI_EN s2BIT(39)
450#define TTI_DATA1_MEM_TX_URNG_A(n) vBIT(n,41,7) 450#define TTI_DATA1_MEM_TX_URNG_A(n) vBIT(n,41,7)
451#define TTI_DATA1_MEM_TX_URNG_B(n) vBIT(n,49,7) 451#define TTI_DATA1_MEM_TX_URNG_B(n) vBIT(n,49,7)
452#define TTI_DATA1_MEM_TX_URNG_C(n) vBIT(n,57,7) 452#define TTI_DATA1_MEM_TX_URNG_C(n) vBIT(n,57,7)
@@ -459,11 +459,11 @@ struct XENA_dev_config {
459 459
460/* Tx Protocol assist */ 460/* Tx Protocol assist */
461 u64 tx_pa_cfg; 461 u64 tx_pa_cfg;
462#define TX_PA_CFG_IGNORE_FRM_ERR BIT(1) 462#define TX_PA_CFG_IGNORE_FRM_ERR s2BIT(1)
463#define TX_PA_CFG_IGNORE_SNAP_OUI BIT(2) 463#define TX_PA_CFG_IGNORE_SNAP_OUI s2BIT(2)
464#define TX_PA_CFG_IGNORE_LLC_CTRL BIT(3) 464#define TX_PA_CFG_IGNORE_LLC_CTRL s2BIT(3)
465#define TX_PA_CFG_IGNORE_L2_ERR BIT(6) 465#define TX_PA_CFG_IGNORE_L2_ERR s2BIT(6)
466#define RX_PA_CFG_STRIP_VLAN_TAG BIT(15) 466#define RX_PA_CFG_STRIP_VLAN_TAG s2BIT(15)
467 467
468/* Recent add, used only debug purposes. */ 468/* Recent add, used only debug purposes. */
469 u64 pcc_enable; 469 u64 pcc_enable;
@@ -477,31 +477,31 @@ struct XENA_dev_config {
477/* RxDMA Registers */ 477/* RxDMA Registers */
478 u64 rxdma_int_status; 478 u64 rxdma_int_status;
479 u64 rxdma_int_mask; 479 u64 rxdma_int_mask;
480#define RXDMA_INT_RC_INT_M BIT(0) 480#define RXDMA_INT_RC_INT_M s2BIT(0)
481#define RXDMA_INT_RPA_INT_M BIT(1) 481#define RXDMA_INT_RPA_INT_M s2BIT(1)
482#define RXDMA_INT_RDA_INT_M BIT(2) 482#define RXDMA_INT_RDA_INT_M s2BIT(2)
483#define RXDMA_INT_RTI_INT_M BIT(3) 483#define RXDMA_INT_RTI_INT_M s2BIT(3)
484 484
485 u64 rda_err_reg; 485 u64 rda_err_reg;
486#define RDA_RXDn_ECC_SG_ERR vBIT(0xFF,0,8) 486#define RDA_RXDn_ECC_SG_ERR vBIT(0xFF,0,8)
487#define RDA_RXDn_ECC_DB_ERR vBIT(0xFF,8,8) 487#define RDA_RXDn_ECC_DB_ERR vBIT(0xFF,8,8)
488#define RDA_FRM_ECC_SG_ERR BIT(23) 488#define RDA_FRM_ECC_SG_ERR s2BIT(23)
489#define RDA_FRM_ECC_DB_N_AERR BIT(31) 489#define RDA_FRM_ECC_DB_N_AERR s2BIT(31)
490#define RDA_SM1_ERR_ALARM BIT(38) 490#define RDA_SM1_ERR_ALARM s2BIT(38)
491#define RDA_SM0_ERR_ALARM BIT(39) 491#define RDA_SM0_ERR_ALARM s2BIT(39)
492#define RDA_MISC_ERR BIT(47) 492#define RDA_MISC_ERR s2BIT(47)
493#define RDA_PCIX_ERR BIT(55) 493#define RDA_PCIX_ERR s2BIT(55)
494#define RDA_RXD_ECC_DB_SERR BIT(63) 494#define RDA_RXD_ECC_DB_SERR s2BIT(63)
495 u64 rda_err_mask; 495 u64 rda_err_mask;
496 u64 rda_err_alarm; 496 u64 rda_err_alarm;
497 497
498 u64 rc_err_reg; 498 u64 rc_err_reg;
499#define RC_PRCn_ECC_SG_ERR vBIT(0xFF,0,8) 499#define RC_PRCn_ECC_SG_ERR vBIT(0xFF,0,8)
500#define RC_PRCn_ECC_DB_ERR vBIT(0xFF,8,8) 500#define RC_PRCn_ECC_DB_ERR vBIT(0xFF,8,8)
501#define RC_FTC_ECC_SG_ERR BIT(23) 501#define RC_FTC_ECC_SG_ERR s2BIT(23)
502#define RC_FTC_ECC_DB_ERR BIT(31) 502#define RC_FTC_ECC_DB_ERR s2BIT(31)
503#define RC_PRCn_SM_ERR_ALARM vBIT(0xFF,32,8) 503#define RC_PRCn_SM_ERR_ALARM vBIT(0xFF,32,8)
504#define RC_FTC_SM_ERR_ALARM BIT(47) 504#define RC_FTC_SM_ERR_ALARM s2BIT(47)
505#define RC_RDA_FAIL_WR_Rn vBIT(0xFF,48,8) 505#define RC_RDA_FAIL_WR_Rn vBIT(0xFF,48,8)
506 u64 rc_err_mask; 506 u64 rc_err_mask;
507 u64 rc_err_alarm; 507 u64 rc_err_alarm;
@@ -517,18 +517,18 @@ struct XENA_dev_config {
517 u64 prc_pcix_err_alarm; 517 u64 prc_pcix_err_alarm;
518 518
519 u64 rpa_err_reg; 519 u64 rpa_err_reg;
520#define RPA_ECC_SG_ERR BIT(7) 520#define RPA_ECC_SG_ERR s2BIT(7)
521#define RPA_ECC_DB_ERR BIT(15) 521#define RPA_ECC_DB_ERR s2BIT(15)
522#define RPA_FLUSH_REQUEST BIT(22) 522#define RPA_FLUSH_REQUEST s2BIT(22)
523#define RPA_SM_ERR_ALARM BIT(23) 523#define RPA_SM_ERR_ALARM s2BIT(23)
524#define RPA_CREDIT_ERR BIT(31) 524#define RPA_CREDIT_ERR s2BIT(31)
525 u64 rpa_err_mask; 525 u64 rpa_err_mask;
526 u64 rpa_err_alarm; 526 u64 rpa_err_alarm;
527 527
528 u64 rti_err_reg; 528 u64 rti_err_reg;
529#define RTI_ECC_SG_ERR BIT(7) 529#define RTI_ECC_SG_ERR s2BIT(7)
530#define RTI_ECC_DB_ERR BIT(15) 530#define RTI_ECC_DB_ERR s2BIT(15)
531#define RTI_SM_ERR_ALARM BIT(23) 531#define RTI_SM_ERR_ALARM s2BIT(23)
532 u64 rti_err_mask; 532 u64 rti_err_mask;
533 u64 rti_err_alarm; 533 u64 rti_err_alarm;
534 534
@@ -568,49 +568,49 @@ struct XENA_dev_config {
568#endif 568#endif
569 u64 prc_rxd0_n[RX_MAX_RINGS]; 569 u64 prc_rxd0_n[RX_MAX_RINGS];
570 u64 prc_ctrl_n[RX_MAX_RINGS]; 570 u64 prc_ctrl_n[RX_MAX_RINGS];
571#define PRC_CTRL_RC_ENABLED BIT(7) 571#define PRC_CTRL_RC_ENABLED s2BIT(7)
572#define PRC_CTRL_RING_MODE (BIT(14)|BIT(15)) 572#define PRC_CTRL_RING_MODE (s2BIT(14)|s2BIT(15))
573#define PRC_CTRL_RING_MODE_1 vBIT(0,14,2) 573#define PRC_CTRL_RING_MODE_1 vBIT(0,14,2)
574#define PRC_CTRL_RING_MODE_3 vBIT(1,14,2) 574#define PRC_CTRL_RING_MODE_3 vBIT(1,14,2)
575#define PRC_CTRL_RING_MODE_5 vBIT(2,14,2) 575#define PRC_CTRL_RING_MODE_5 vBIT(2,14,2)
576#define PRC_CTRL_RING_MODE_x vBIT(3,14,2) 576#define PRC_CTRL_RING_MODE_x vBIT(3,14,2)
577#define PRC_CTRL_NO_SNOOP (BIT(22)|BIT(23)) 577#define PRC_CTRL_NO_SNOOP (s2BIT(22)|s2BIT(23))
578#define PRC_CTRL_NO_SNOOP_DESC BIT(22) 578#define PRC_CTRL_NO_SNOOP_DESC s2BIT(22)
579#define PRC_CTRL_NO_SNOOP_BUFF BIT(23) 579#define PRC_CTRL_NO_SNOOP_BUFF s2BIT(23)
580#define PRC_CTRL_BIMODAL_INTERRUPT BIT(37) 580#define PRC_CTRL_BIMODAL_INTERRUPT s2BIT(37)
581#define PRC_CTRL_GROUP_READS BIT(38) 581#define PRC_CTRL_GROUP_READS s2BIT(38)
582#define PRC_CTRL_RXD_BACKOFF_INTERVAL(val) vBIT(val,40,24) 582#define PRC_CTRL_RXD_BACKOFF_INTERVAL(val) vBIT(val,40,24)
583 583
584 u64 prc_alarm_action; 584 u64 prc_alarm_action;
585#define PRC_ALARM_ACTION_RR_R0_STOP BIT(3) 585#define PRC_ALARM_ACTION_RR_R0_STOP s2BIT(3)
586#define PRC_ALARM_ACTION_RW_R0_STOP BIT(7) 586#define PRC_ALARM_ACTION_RW_R0_STOP s2BIT(7)
587#define PRC_ALARM_ACTION_RR_R1_STOP BIT(11) 587#define PRC_ALARM_ACTION_RR_R1_STOP s2BIT(11)
588#define PRC_ALARM_ACTION_RW_R1_STOP BIT(15) 588#define PRC_ALARM_ACTION_RW_R1_STOP s2BIT(15)
589#define PRC_ALARM_ACTION_RR_R2_STOP BIT(19) 589#define PRC_ALARM_ACTION_RR_R2_STOP s2BIT(19)
590#define PRC_ALARM_ACTION_RW_R2_STOP BIT(23) 590#define PRC_ALARM_ACTION_RW_R2_STOP s2BIT(23)
591#define PRC_ALARM_ACTION_RR_R3_STOP BIT(27) 591#define PRC_ALARM_ACTION_RR_R3_STOP s2BIT(27)
592#define PRC_ALARM_ACTION_RW_R3_STOP BIT(31) 592#define PRC_ALARM_ACTION_RW_R3_STOP s2BIT(31)
593#define PRC_ALARM_ACTION_RR_R4_STOP BIT(35) 593#define PRC_ALARM_ACTION_RR_R4_STOP s2BIT(35)
594#define PRC_ALARM_ACTION_RW_R4_STOP BIT(39) 594#define PRC_ALARM_ACTION_RW_R4_STOP s2BIT(39)
595#define PRC_ALARM_ACTION_RR_R5_STOP BIT(43) 595#define PRC_ALARM_ACTION_RR_R5_STOP s2BIT(43)
596#define PRC_ALARM_ACTION_RW_R5_STOP BIT(47) 596#define PRC_ALARM_ACTION_RW_R5_STOP s2BIT(47)
597#define PRC_ALARM_ACTION_RR_R6_STOP BIT(51) 597#define PRC_ALARM_ACTION_RR_R6_STOP s2BIT(51)
598#define PRC_ALARM_ACTION_RW_R6_STOP BIT(55) 598#define PRC_ALARM_ACTION_RW_R6_STOP s2BIT(55)
599#define PRC_ALARM_ACTION_RR_R7_STOP BIT(59) 599#define PRC_ALARM_ACTION_RR_R7_STOP s2BIT(59)
600#define PRC_ALARM_ACTION_RW_R7_STOP BIT(63) 600#define PRC_ALARM_ACTION_RW_R7_STOP s2BIT(63)
601 601
602/* Receive traffic interrupts */ 602/* Receive traffic interrupts */
603 u64 rti_command_mem; 603 u64 rti_command_mem;
604#define RTI_CMD_MEM_WE BIT(7) 604#define RTI_CMD_MEM_WE s2BIT(7)
605#define RTI_CMD_MEM_STROBE BIT(15) 605#define RTI_CMD_MEM_STROBE s2BIT(15)
606#define RTI_CMD_MEM_STROBE_NEW_CMD BIT(15) 606#define RTI_CMD_MEM_STROBE_NEW_CMD s2BIT(15)
607#define RTI_CMD_MEM_STROBE_CMD_BEING_EXECUTED BIT(15) 607#define RTI_CMD_MEM_STROBE_CMD_BEING_EXECUTED s2BIT(15)
608#define RTI_CMD_MEM_OFFSET(n) vBIT(n,29,3) 608#define RTI_CMD_MEM_OFFSET(n) vBIT(n,29,3)
609 609
610 u64 rti_data1_mem; 610 u64 rti_data1_mem;
611#define RTI_DATA1_MEM_RX_TIMER_VAL(n) vBIT(n,3,29) 611#define RTI_DATA1_MEM_RX_TIMER_VAL(n) vBIT(n,3,29)
612#define RTI_DATA1_MEM_RX_TIMER_AC_EN BIT(38) 612#define RTI_DATA1_MEM_RX_TIMER_AC_EN s2BIT(38)
613#define RTI_DATA1_MEM_RX_TIMER_CI_EN BIT(39) 613#define RTI_DATA1_MEM_RX_TIMER_CI_EN s2BIT(39)
614#define RTI_DATA1_MEM_RX_URNG_A(n) vBIT(n,41,7) 614#define RTI_DATA1_MEM_RX_URNG_A(n) vBIT(n,41,7)
615#define RTI_DATA1_MEM_RX_URNG_B(n) vBIT(n,49,7) 615#define RTI_DATA1_MEM_RX_URNG_B(n) vBIT(n,49,7)
616#define RTI_DATA1_MEM_RX_URNG_C(n) vBIT(n,57,7) 616#define RTI_DATA1_MEM_RX_URNG_C(n) vBIT(n,57,7)
@@ -622,10 +622,10 @@ struct XENA_dev_config {
622#define RTI_DATA2_MEM_RX_UFC_D(n) vBIT(n,48,16) 622#define RTI_DATA2_MEM_RX_UFC_D(n) vBIT(n,48,16)
623 623
624 u64 rx_pa_cfg; 624 u64 rx_pa_cfg;
625#define RX_PA_CFG_IGNORE_FRM_ERR BIT(1) 625#define RX_PA_CFG_IGNORE_FRM_ERR s2BIT(1)
626#define RX_PA_CFG_IGNORE_SNAP_OUI BIT(2) 626#define RX_PA_CFG_IGNORE_SNAP_OUI s2BIT(2)
627#define RX_PA_CFG_IGNORE_LLC_CTRL BIT(3) 627#define RX_PA_CFG_IGNORE_LLC_CTRL s2BIT(3)
628#define RX_PA_CFG_IGNORE_L2_ERR BIT(6) 628#define RX_PA_CFG_IGNORE_L2_ERR s2BIT(6)
629 629
630 u64 unused_11_1; 630 u64 unused_11_1;
631 631
@@ -641,64 +641,64 @@ struct XENA_dev_config {
641/* Media Access Controller Register */ 641/* Media Access Controller Register */
642 u64 mac_int_status; 642 u64 mac_int_status;
643 u64 mac_int_mask; 643 u64 mac_int_mask;
644#define MAC_INT_STATUS_TMAC_INT BIT(0) 644#define MAC_INT_STATUS_TMAC_INT s2BIT(0)
645#define MAC_INT_STATUS_RMAC_INT BIT(1) 645#define MAC_INT_STATUS_RMAC_INT s2BIT(1)
646 646
647 u64 mac_tmac_err_reg; 647 u64 mac_tmac_err_reg;
648#define TMAC_ECC_SG_ERR BIT(7) 648#define TMAC_ECC_SG_ERR s2BIT(7)
649#define TMAC_ECC_DB_ERR BIT(15) 649#define TMAC_ECC_DB_ERR s2BIT(15)
650#define TMAC_TX_BUF_OVRN BIT(23) 650#define TMAC_TX_BUF_OVRN s2BIT(23)
651#define TMAC_TX_CRI_ERR BIT(31) 651#define TMAC_TX_CRI_ERR s2BIT(31)
652#define TMAC_TX_SM_ERR BIT(39) 652#define TMAC_TX_SM_ERR s2BIT(39)
653#define TMAC_DESC_ECC_SG_ERR BIT(47) 653#define TMAC_DESC_ECC_SG_ERR s2BIT(47)
654#define TMAC_DESC_ECC_DB_ERR BIT(55) 654#define TMAC_DESC_ECC_DB_ERR s2BIT(55)
655 655
656 u64 mac_tmac_err_mask; 656 u64 mac_tmac_err_mask;
657 u64 mac_tmac_err_alarm; 657 u64 mac_tmac_err_alarm;
658 658
659 u64 mac_rmac_err_reg; 659 u64 mac_rmac_err_reg;
660#define RMAC_RX_BUFF_OVRN BIT(0) 660#define RMAC_RX_BUFF_OVRN s2BIT(0)
661#define RMAC_FRM_RCVD_INT BIT(1) 661#define RMAC_FRM_RCVD_INT s2BIT(1)
662#define RMAC_UNUSED_INT BIT(2) 662#define RMAC_UNUSED_INT s2BIT(2)
663#define RMAC_RTS_PNUM_ECC_SG_ERR BIT(5) 663#define RMAC_RTS_PNUM_ECC_SG_ERR s2BIT(5)
664#define RMAC_RTS_DS_ECC_SG_ERR BIT(6) 664#define RMAC_RTS_DS_ECC_SG_ERR s2BIT(6)
665#define RMAC_RD_BUF_ECC_SG_ERR BIT(7) 665#define RMAC_RD_BUF_ECC_SG_ERR s2BIT(7)
666#define RMAC_RTH_MAP_ECC_SG_ERR BIT(8) 666#define RMAC_RTH_MAP_ECC_SG_ERR s2BIT(8)
667#define RMAC_RTH_SPDM_ECC_SG_ERR BIT(9) 667#define RMAC_RTH_SPDM_ECC_SG_ERR s2BIT(9)
668#define RMAC_RTS_VID_ECC_SG_ERR BIT(10) 668#define RMAC_RTS_VID_ECC_SG_ERR s2BIT(10)
669#define RMAC_DA_SHADOW_ECC_SG_ERR BIT(11) 669#define RMAC_DA_SHADOW_ECC_SG_ERR s2BIT(11)
670#define RMAC_RTS_PNUM_ECC_DB_ERR BIT(13) 670#define RMAC_RTS_PNUM_ECC_DB_ERR s2BIT(13)
671#define RMAC_RTS_DS_ECC_DB_ERR BIT(14) 671#define RMAC_RTS_DS_ECC_DB_ERR s2BIT(14)
672#define RMAC_RD_BUF_ECC_DB_ERR BIT(15) 672#define RMAC_RD_BUF_ECC_DB_ERR s2BIT(15)
673#define RMAC_RTH_MAP_ECC_DB_ERR BIT(16) 673#define RMAC_RTH_MAP_ECC_DB_ERR s2BIT(16)
674#define RMAC_RTH_SPDM_ECC_DB_ERR BIT(17) 674#define RMAC_RTH_SPDM_ECC_DB_ERR s2BIT(17)
675#define RMAC_RTS_VID_ECC_DB_ERR BIT(18) 675#define RMAC_RTS_VID_ECC_DB_ERR s2BIT(18)
676#define RMAC_DA_SHADOW_ECC_DB_ERR BIT(19) 676#define RMAC_DA_SHADOW_ECC_DB_ERR s2BIT(19)
677#define RMAC_LINK_STATE_CHANGE_INT BIT(31) 677#define RMAC_LINK_STATE_CHANGE_INT s2BIT(31)
678#define RMAC_RX_SM_ERR BIT(39) 678#define RMAC_RX_SM_ERR s2BIT(39)
679#define RMAC_SINGLE_ECC_ERR (BIT(5) | BIT(6) | BIT(7) |\ 679#define RMAC_SINGLE_ECC_ERR (s2BIT(5) | s2BIT(6) | s2BIT(7) |\
680 BIT(8) | BIT(9) | BIT(10)|\ 680 s2BIT(8) | s2BIT(9) | s2BIT(10)|\
681 BIT(11)) 681 s2BIT(11))
682#define RMAC_DOUBLE_ECC_ERR (BIT(13) | BIT(14) | BIT(15) |\ 682#define RMAC_DOUBLE_ECC_ERR (s2BIT(13) | s2BIT(14) | s2BIT(15) |\
683 BIT(16) | BIT(17) | BIT(18)|\ 683 s2BIT(16) | s2BIT(17) | s2BIT(18)|\
684 BIT(19)) 684 s2BIT(19))
685 u64 mac_rmac_err_mask; 685 u64 mac_rmac_err_mask;
686 u64 mac_rmac_err_alarm; 686 u64 mac_rmac_err_alarm;
687 687
688 u8 unused14[0x100 - 0x40]; 688 u8 unused14[0x100 - 0x40];
689 689
690 u64 mac_cfg; 690 u64 mac_cfg;
691#define MAC_CFG_TMAC_ENABLE BIT(0) 691#define MAC_CFG_TMAC_ENABLE s2BIT(0)
692#define MAC_CFG_RMAC_ENABLE BIT(1) 692#define MAC_CFG_RMAC_ENABLE s2BIT(1)
693#define MAC_CFG_LAN_NOT_WAN BIT(2) 693#define MAC_CFG_LAN_NOT_WAN s2BIT(2)
694#define MAC_CFG_TMAC_LOOPBACK BIT(3) 694#define MAC_CFG_TMAC_LOOPBACK s2BIT(3)
695#define MAC_CFG_TMAC_APPEND_PAD BIT(4) 695#define MAC_CFG_TMAC_APPEND_PAD s2BIT(4)
696#define MAC_CFG_RMAC_STRIP_FCS BIT(5) 696#define MAC_CFG_RMAC_STRIP_FCS s2BIT(5)
697#define MAC_CFG_RMAC_STRIP_PAD BIT(6) 697#define MAC_CFG_RMAC_STRIP_PAD s2BIT(6)
698#define MAC_CFG_RMAC_PROM_ENABLE BIT(7) 698#define MAC_CFG_RMAC_PROM_ENABLE s2BIT(7)
699#define MAC_RMAC_DISCARD_PFRM BIT(8) 699#define MAC_RMAC_DISCARD_PFRM s2BIT(8)
700#define MAC_RMAC_BCAST_ENABLE BIT(9) 700#define MAC_RMAC_BCAST_ENABLE s2BIT(9)
701#define MAC_RMAC_ALL_ADDR_ENABLE BIT(10) 701#define MAC_RMAC_ALL_ADDR_ENABLE s2BIT(10)
702#define MAC_RMAC_INVLD_IPG_THR(val) vBIT(val,16,8) 702#define MAC_RMAC_INVLD_IPG_THR(val) vBIT(val,16,8)
703 703
704 u64 tmac_avg_ipg; 704 u64 tmac_avg_ipg;
@@ -710,14 +710,14 @@ struct XENA_dev_config {
710#define RMAC_MAX_PYLD_LEN_JUMBO_DEF vBIT(9600,2,14) 710#define RMAC_MAX_PYLD_LEN_JUMBO_DEF vBIT(9600,2,14)
711 711
712 u64 rmac_err_cfg; 712 u64 rmac_err_cfg;
713#define RMAC_ERR_FCS BIT(0) 713#define RMAC_ERR_FCS s2BIT(0)
714#define RMAC_ERR_FCS_ACCEPT BIT(1) 714#define RMAC_ERR_FCS_ACCEPT s2BIT(1)
715#define RMAC_ERR_TOO_LONG BIT(1) 715#define RMAC_ERR_TOO_LONG s2BIT(1)
716#define RMAC_ERR_TOO_LONG_ACCEPT BIT(1) 716#define RMAC_ERR_TOO_LONG_ACCEPT s2BIT(1)
717#define RMAC_ERR_RUNT BIT(2) 717#define RMAC_ERR_RUNT s2BIT(2)
718#define RMAC_ERR_RUNT_ACCEPT BIT(2) 718#define RMAC_ERR_RUNT_ACCEPT s2BIT(2)
719#define RMAC_ERR_LEN_MISMATCH BIT(3) 719#define RMAC_ERR_LEN_MISMATCH s2BIT(3)
720#define RMAC_ERR_LEN_MISMATCH_ACCEPT BIT(3) 720#define RMAC_ERR_LEN_MISMATCH_ACCEPT s2BIT(3)
721 721
722 u64 rmac_cfg_key; 722 u64 rmac_cfg_key;
723#define RMAC_CFG_KEY(val) vBIT(val,0,16) 723#define RMAC_CFG_KEY(val) vBIT(val,0,16)
@@ -728,15 +728,15 @@ struct XENA_dev_config {
728#define MAC_MC_ADDR_START_OFFSET 16 728#define MAC_MC_ADDR_START_OFFSET 16
729#define MAC_MC_ALL_MC_ADDR_OFFSET 63 /* enables all multicast pkts */ 729#define MAC_MC_ALL_MC_ADDR_OFFSET 63 /* enables all multicast pkts */
730 u64 rmac_addr_cmd_mem; 730 u64 rmac_addr_cmd_mem;
731#define RMAC_ADDR_CMD_MEM_WE BIT(7) 731#define RMAC_ADDR_CMD_MEM_WE s2BIT(7)
732#define RMAC_ADDR_CMD_MEM_RD 0 732#define RMAC_ADDR_CMD_MEM_RD 0
733#define RMAC_ADDR_CMD_MEM_STROBE_NEW_CMD BIT(15) 733#define RMAC_ADDR_CMD_MEM_STROBE_NEW_CMD s2BIT(15)
734#define RMAC_ADDR_CMD_MEM_STROBE_CMD_EXECUTING BIT(15) 734#define RMAC_ADDR_CMD_MEM_STROBE_CMD_EXECUTING s2BIT(15)
735#define RMAC_ADDR_CMD_MEM_OFFSET(n) vBIT(n,26,6) 735#define RMAC_ADDR_CMD_MEM_OFFSET(n) vBIT(n,26,6)
736 736
737 u64 rmac_addr_data0_mem; 737 u64 rmac_addr_data0_mem;
738#define RMAC_ADDR_DATA0_MEM_ADDR(n) vBIT(n,0,48) 738#define RMAC_ADDR_DATA0_MEM_ADDR(n) vBIT(n,0,48)
739#define RMAC_ADDR_DATA0_MEM_USER BIT(48) 739#define RMAC_ADDR_DATA0_MEM_USER s2BIT(48)
740 740
741 u64 rmac_addr_data1_mem; 741 u64 rmac_addr_data1_mem;
742#define RMAC_ADDR_DATA1_MEM_MASK(n) vBIT(n,0,48) 742#define RMAC_ADDR_DATA1_MEM_MASK(n) vBIT(n,0,48)
@@ -753,10 +753,10 @@ struct XENA_dev_config {
753 u64 tmac_ipg_cfg; 753 u64 tmac_ipg_cfg;
754 754
755 u64 rmac_pause_cfg; 755 u64 rmac_pause_cfg;
756#define RMAC_PAUSE_GEN BIT(0) 756#define RMAC_PAUSE_GEN s2BIT(0)
757#define RMAC_PAUSE_GEN_ENABLE BIT(0) 757#define RMAC_PAUSE_GEN_ENABLE s2BIT(0)
758#define RMAC_PAUSE_RX BIT(1) 758#define RMAC_PAUSE_RX s2BIT(1)
759#define RMAC_PAUSE_RX_ENABLE BIT(1) 759#define RMAC_PAUSE_RX_ENABLE s2BIT(1)
760#define RMAC_PAUSE_HG_PTIME_DEF vBIT(0xFFFF,16,16) 760#define RMAC_PAUSE_HG_PTIME_DEF vBIT(0xFFFF,16,16)
761#define RMAC_PAUSE_HG_PTIME(val) vBIT(val,16,16) 761#define RMAC_PAUSE_HG_PTIME(val) vBIT(val,16,16)
762 762
@@ -787,29 +787,29 @@ struct XENA_dev_config {
787#define MAX_DIX_MAP 4 787#define MAX_DIX_MAP 4
788 u64 rts_dix_map_n[MAX_DIX_MAP]; 788 u64 rts_dix_map_n[MAX_DIX_MAP];
789#define RTS_DIX_MAP_ETYPE(val) vBIT(val,0,16) 789#define RTS_DIX_MAP_ETYPE(val) vBIT(val,0,16)
790#define RTS_DIX_MAP_SCW(val) BIT(val,21) 790#define RTS_DIX_MAP_SCW(val) s2BIT(val,21)
791 791
792 u64 rts_q_alternates; 792 u64 rts_q_alternates;
793 u64 rts_default_q; 793 u64 rts_default_q;
794 794
795 u64 rts_ctrl; 795 u64 rts_ctrl;
796#define RTS_CTRL_IGNORE_SNAP_OUI BIT(2) 796#define RTS_CTRL_IGNORE_SNAP_OUI s2BIT(2)
797#define RTS_CTRL_IGNORE_LLC_CTRL BIT(3) 797#define RTS_CTRL_IGNORE_LLC_CTRL s2BIT(3)
798 798
799 u64 rts_pn_cam_ctrl; 799 u64 rts_pn_cam_ctrl;
800#define RTS_PN_CAM_CTRL_WE BIT(7) 800#define RTS_PN_CAM_CTRL_WE s2BIT(7)
801#define RTS_PN_CAM_CTRL_STROBE_NEW_CMD BIT(15) 801#define RTS_PN_CAM_CTRL_STROBE_NEW_CMD s2BIT(15)
802#define RTS_PN_CAM_CTRL_STROBE_BEING_EXECUTED BIT(15) 802#define RTS_PN_CAM_CTRL_STROBE_BEING_EXECUTED s2BIT(15)
803#define RTS_PN_CAM_CTRL_OFFSET(n) vBIT(n,24,8) 803#define RTS_PN_CAM_CTRL_OFFSET(n) vBIT(n,24,8)
804 u64 rts_pn_cam_data; 804 u64 rts_pn_cam_data;
805#define RTS_PN_CAM_DATA_TCP_SELECT BIT(7) 805#define RTS_PN_CAM_DATA_TCP_SELECT s2BIT(7)
806#define RTS_PN_CAM_DATA_PORT(val) vBIT(val,8,16) 806#define RTS_PN_CAM_DATA_PORT(val) vBIT(val,8,16)
807#define RTS_PN_CAM_DATA_SCW(val) vBIT(val,24,8) 807#define RTS_PN_CAM_DATA_SCW(val) vBIT(val,24,8)
808 808
809 u64 rts_ds_mem_ctrl; 809 u64 rts_ds_mem_ctrl;
810#define RTS_DS_MEM_CTRL_WE BIT(7) 810#define RTS_DS_MEM_CTRL_WE s2BIT(7)
811#define RTS_DS_MEM_CTRL_STROBE_NEW_CMD BIT(15) 811#define RTS_DS_MEM_CTRL_STROBE_NEW_CMD s2BIT(15)
812#define RTS_DS_MEM_CTRL_STROBE_CMD_BEING_EXECUTED BIT(15) 812#define RTS_DS_MEM_CTRL_STROBE_CMD_BEING_EXECUTED s2BIT(15)
813#define RTS_DS_MEM_CTRL_OFFSET(n) vBIT(n,26,6) 813#define RTS_DS_MEM_CTRL_OFFSET(n) vBIT(n,26,6)
814 u64 rts_ds_mem_data; 814 u64 rts_ds_mem_data;
815#define RTS_DS_MEM_DATA(n) vBIT(n,0,8) 815#define RTS_DS_MEM_DATA(n) vBIT(n,0,8)
@@ -823,23 +823,23 @@ struct XENA_dev_config {
823 823
824/* memory controller registers */ 824/* memory controller registers */
825 u64 mc_int_status; 825 u64 mc_int_status;
826#define MC_INT_STATUS_MC_INT BIT(0) 826#define MC_INT_STATUS_MC_INT s2BIT(0)
827 u64 mc_int_mask; 827 u64 mc_int_mask;
828#define MC_INT_MASK_MC_INT BIT(0) 828#define MC_INT_MASK_MC_INT s2BIT(0)
829 829
830 u64 mc_err_reg; 830 u64 mc_err_reg;
831#define MC_ERR_REG_ECC_DB_ERR_L BIT(14) 831#define MC_ERR_REG_ECC_DB_ERR_L s2BIT(14)
832#define MC_ERR_REG_ECC_DB_ERR_U BIT(15) 832#define MC_ERR_REG_ECC_DB_ERR_U s2BIT(15)
833#define MC_ERR_REG_MIRI_ECC_DB_ERR_0 BIT(18) 833#define MC_ERR_REG_MIRI_ECC_DB_ERR_0 s2BIT(18)
834#define MC_ERR_REG_MIRI_ECC_DB_ERR_1 BIT(20) 834#define MC_ERR_REG_MIRI_ECC_DB_ERR_1 s2BIT(20)
835#define MC_ERR_REG_MIRI_CRI_ERR_0 BIT(22) 835#define MC_ERR_REG_MIRI_CRI_ERR_0 s2BIT(22)
836#define MC_ERR_REG_MIRI_CRI_ERR_1 BIT(23) 836#define MC_ERR_REG_MIRI_CRI_ERR_1 s2BIT(23)
837#define MC_ERR_REG_SM_ERR BIT(31) 837#define MC_ERR_REG_SM_ERR s2BIT(31)
838#define MC_ERR_REG_ECC_ALL_SNG (BIT(2) | BIT(3) | BIT(4) | BIT(5) |\ 838#define MC_ERR_REG_ECC_ALL_SNG (s2BIT(2) | s2BIT(3) | s2BIT(4) | s2BIT(5) |\
839 BIT(17) | BIT(19)) 839 s2BIT(17) | s2BIT(19))
840#define MC_ERR_REG_ECC_ALL_DBL (BIT(10) | BIT(11) | BIT(12) |\ 840#define MC_ERR_REG_ECC_ALL_DBL (s2BIT(10) | s2BIT(11) | s2BIT(12) |\
841 BIT(13) | BIT(18) | BIT(20)) 841 s2BIT(13) | s2BIT(18) | s2BIT(20))
842#define PLL_LOCK_N BIT(39) 842#define PLL_LOCK_N s2BIT(39)
843 u64 mc_err_mask; 843 u64 mc_err_mask;
844 u64 mc_err_alarm; 844 u64 mc_err_alarm;
845 845
@@ -857,8 +857,8 @@ struct XENA_dev_config {
857#define RX_QUEUE_CFG_Q7_SZ(n) vBIT(n,56,8) 857#define RX_QUEUE_CFG_Q7_SZ(n) vBIT(n,56,8)
858 858
859 u64 mc_rldram_mrs; 859 u64 mc_rldram_mrs;
860#define MC_RLDRAM_QUEUE_SIZE_ENABLE BIT(39) 860#define MC_RLDRAM_QUEUE_SIZE_ENABLE s2BIT(39)
861#define MC_RLDRAM_MRS_ENABLE BIT(47) 861#define MC_RLDRAM_MRS_ENABLE s2BIT(47)
862 862
863 u64 mc_rldram_interleave; 863 u64 mc_rldram_interleave;
864 864
@@ -871,11 +871,11 @@ struct XENA_dev_config {
871 u64 mc_rldram_ref_per; 871 u64 mc_rldram_ref_per;
872 u8 unused20[0x220 - 0x208]; 872 u8 unused20[0x220 - 0x208];
873 u64 mc_rldram_test_ctrl; 873 u64 mc_rldram_test_ctrl;
874#define MC_RLDRAM_TEST_MODE BIT(47) 874#define MC_RLDRAM_TEST_MODE s2BIT(47)
875#define MC_RLDRAM_TEST_WRITE BIT(7) 875#define MC_RLDRAM_TEST_WRITE s2BIT(7)
876#define MC_RLDRAM_TEST_GO BIT(15) 876#define MC_RLDRAM_TEST_GO s2BIT(15)
877#define MC_RLDRAM_TEST_DONE BIT(23) 877#define MC_RLDRAM_TEST_DONE s2BIT(23)
878#define MC_RLDRAM_TEST_PASS BIT(31) 878#define MC_RLDRAM_TEST_PASS s2BIT(31)
879 879
880 u8 unused21[0x240 - 0x228]; 880 u8 unused21[0x240 - 0x228];
881 u64 mc_rldram_test_add; 881 u64 mc_rldram_test_add;
@@ -888,7 +888,7 @@ struct XENA_dev_config {
888 888
889 u8 unused24_1[0x360 - 0x308]; 889 u8 unused24_1[0x360 - 0x308];
890 u64 mc_rldram_ctrl; 890 u64 mc_rldram_ctrl;
891#define MC_RLDRAM_ENABLE_ODT BIT(7) 891#define MC_RLDRAM_ENABLE_ODT s2BIT(7)
892 892
893 u8 unused24_2[0x640 - 0x368]; 893 u8 unused24_2[0x640 - 0x368];
894 u64 mc_rldram_ref_per_herc; 894 u64 mc_rldram_ref_per_herc;
@@ -906,24 +906,24 @@ struct XENA_dev_config {
906 /* XGXS control registers */ 906 /* XGXS control registers */
907 907
908 u64 xgxs_int_status; 908 u64 xgxs_int_status;
909#define XGXS_INT_STATUS_TXGXS BIT(0) 909#define XGXS_INT_STATUS_TXGXS s2BIT(0)
910#define XGXS_INT_STATUS_RXGXS BIT(1) 910#define XGXS_INT_STATUS_RXGXS s2BIT(1)
911 u64 xgxs_int_mask; 911 u64 xgxs_int_mask;
912#define XGXS_INT_MASK_TXGXS BIT(0) 912#define XGXS_INT_MASK_TXGXS s2BIT(0)
913#define XGXS_INT_MASK_RXGXS BIT(1) 913#define XGXS_INT_MASK_RXGXS s2BIT(1)
914 914
915 u64 xgxs_txgxs_err_reg; 915 u64 xgxs_txgxs_err_reg;
916#define TXGXS_ECC_SG_ERR BIT(7) 916#define TXGXS_ECC_SG_ERR s2BIT(7)
917#define TXGXS_ECC_DB_ERR BIT(15) 917#define TXGXS_ECC_DB_ERR s2BIT(15)
918#define TXGXS_ESTORE_UFLOW BIT(31) 918#define TXGXS_ESTORE_UFLOW s2BIT(31)
919#define TXGXS_TX_SM_ERR BIT(39) 919#define TXGXS_TX_SM_ERR s2BIT(39)
920 920
921 u64 xgxs_txgxs_err_mask; 921 u64 xgxs_txgxs_err_mask;
922 u64 xgxs_txgxs_err_alarm; 922 u64 xgxs_txgxs_err_alarm;
923 923
924 u64 xgxs_rxgxs_err_reg; 924 u64 xgxs_rxgxs_err_reg;
925#define RXGXS_ESTORE_OFLOW BIT(7) 925#define RXGXS_ESTORE_OFLOW s2BIT(7)
926#define RXGXS_RX_SM_ERR BIT(39) 926#define RXGXS_RX_SM_ERR s2BIT(39)
927 u64 xgxs_rxgxs_err_mask; 927 u64 xgxs_rxgxs_err_mask;
928 u64 xgxs_rxgxs_err_alarm; 928 u64 xgxs_rxgxs_err_alarm;
929 929
@@ -942,10 +942,10 @@ struct XENA_dev_config {
942#define SPI_CONTROL_BYTECNT(cnt) vBIT(cnt,29,3) 942#define SPI_CONTROL_BYTECNT(cnt) vBIT(cnt,29,3)
943#define SPI_CONTROL_CMD(cmd) vBIT(cmd,32,8) 943#define SPI_CONTROL_CMD(cmd) vBIT(cmd,32,8)
944#define SPI_CONTROL_ADDR(addr) vBIT(addr,40,24) 944#define SPI_CONTROL_ADDR(addr) vBIT(addr,40,24)
945#define SPI_CONTROL_SEL1 BIT(4) 945#define SPI_CONTROL_SEL1 s2BIT(4)
946#define SPI_CONTROL_REQ BIT(7) 946#define SPI_CONTROL_REQ s2BIT(7)
947#define SPI_CONTROL_NACK BIT(5) 947#define SPI_CONTROL_NACK s2BIT(5)
948#define SPI_CONTROL_DONE BIT(6) 948#define SPI_CONTROL_DONE s2BIT(6)
949 u64 spi_data; 949 u64 spi_data;
950#define SPI_DATA_WRITE(data,len) vBIT(data,0,len) 950#define SPI_DATA_WRITE(data,len) vBIT(data,0,len)
951}; 951};
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index 22e4054d4fcb..b8c0e7b4ca1c 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -1716,7 +1716,7 @@ static int init_nic(struct s2io_nic *nic)
1716 MISC_LINK_STABILITY_PRD(3); 1716 MISC_LINK_STABILITY_PRD(3);
1717 writeq(val64, &bar0->misc_control); 1717 writeq(val64, &bar0->misc_control);
1718 val64 = readq(&bar0->pic_control2); 1718 val64 = readq(&bar0->pic_control2);
1719 val64 &= ~(BIT(13)|BIT(14)|BIT(15)); 1719 val64 &= ~(s2BIT(13)|s2BIT(14)|s2BIT(15));
1720 writeq(val64, &bar0->pic_control2); 1720 writeq(val64, &bar0->pic_control2);
1721 } 1721 }
1722 if (strstr(nic->product_name, "CX4")) { 1722 if (strstr(nic->product_name, "CX4")) {
@@ -2427,7 +2427,7 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
2427 } 2427 }
2428 if ((rxdp->Control_1 & RXD_OWN_XENA) && 2428 if ((rxdp->Control_1 & RXD_OWN_XENA) &&
2429 ((nic->rxd_mode == RXD_MODE_3B) && 2429 ((nic->rxd_mode == RXD_MODE_3B) &&
2430 (rxdp->Control_2 & BIT(0)))) { 2430 (rxdp->Control_2 & s2BIT(0)))) {
2431 mac_control->rings[ring_no].rx_curr_put_info. 2431 mac_control->rings[ring_no].rx_curr_put_info.
2432 offset = off; 2432 offset = off;
2433 goto end; 2433 goto end;
@@ -2540,7 +2540,7 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
2540 rxdp->Control_2 |= SET_BUFFER2_SIZE_3 2540 rxdp->Control_2 |= SET_BUFFER2_SIZE_3
2541 (dev->mtu + 4); 2541 (dev->mtu + 4);
2542 } 2542 }
2543 rxdp->Control_2 |= BIT(0); 2543 rxdp->Control_2 |= s2BIT(0);
2544 } 2544 }
2545 rxdp->Host_Control = (unsigned long) (skb); 2545 rxdp->Host_Control = (unsigned long) (skb);
2546 if (alloc_tab & ((1 << rxsync_frequency) - 1)) 2546 if (alloc_tab & ((1 << rxsync_frequency) - 1))
@@ -3377,7 +3377,7 @@ static void s2io_reset(struct s2io_nic * sp)
3377 pci_write_config_dword(sp->pdev, 0x68, 0x7C); 3377 pci_write_config_dword(sp->pdev, 0x68, 0x7C);
3378 3378
3379 /* Clearing PCI_STATUS error reflected here */ 3379 /* Clearing PCI_STATUS error reflected here */
3380 writeq(BIT(62), &bar0->txpic_int_reg); 3380 writeq(s2BIT(62), &bar0->txpic_int_reg);
3381 } 3381 }
3382 3382
3383 /* Reset device statistics maintained by OS */ 3383 /* Reset device statistics maintained by OS */
@@ -3575,7 +3575,7 @@ static int wait_for_msix_trans(struct s2io_nic *nic, int i)
3575 3575
3576 do { 3576 do {
3577 val64 = readq(&bar0->xmsi_access); 3577 val64 = readq(&bar0->xmsi_access);
3578 if (!(val64 & BIT(15))) 3578 if (!(val64 & s2BIT(15)))
3579 break; 3579 break;
3580 mdelay(1); 3580 mdelay(1);
3581 cnt++; 3581 cnt++;
@@ -3597,7 +3597,7 @@ static void restore_xmsi_data(struct s2io_nic *nic)
3597 for (i=0; i < MAX_REQUESTED_MSI_X; i++) { 3597 for (i=0; i < MAX_REQUESTED_MSI_X; i++) {
3598 writeq(nic->msix_info[i].addr, &bar0->xmsi_address); 3598 writeq(nic->msix_info[i].addr, &bar0->xmsi_address);
3599 writeq(nic->msix_info[i].data, &bar0->xmsi_data); 3599 writeq(nic->msix_info[i].data, &bar0->xmsi_data);
3600 val64 = (BIT(7) | BIT(15) | vBIT(i, 26, 6)); 3600 val64 = (s2BIT(7) | s2BIT(15) | vBIT(i, 26, 6));
3601 writeq(val64, &bar0->xmsi_access); 3601 writeq(val64, &bar0->xmsi_access);
3602 if (wait_for_msix_trans(nic, i)) { 3602 if (wait_for_msix_trans(nic, i)) {
3603 DBG_PRINT(ERR_DBG, "failed in %s\n", __FUNCTION__); 3603 DBG_PRINT(ERR_DBG, "failed in %s\n", __FUNCTION__);
@@ -3614,7 +3614,7 @@ static void store_xmsi_data(struct s2io_nic *nic)
3614 3614
3615 /* Store and display */ 3615 /* Store and display */
3616 for (i=0; i < MAX_REQUESTED_MSI_X; i++) { 3616 for (i=0; i < MAX_REQUESTED_MSI_X; i++) {
3617 val64 = (BIT(15) | vBIT(i, 26, 6)); 3617 val64 = (s2BIT(15) | vBIT(i, 26, 6));
3618 writeq(val64, &bar0->xmsi_access); 3618 writeq(val64, &bar0->xmsi_access);
3619 if (wait_for_msix_trans(nic, i)) { 3619 if (wait_for_msix_trans(nic, i)) {
3620 DBG_PRINT(ERR_DBG, "failed in %s\n", __FUNCTION__); 3620 DBG_PRINT(ERR_DBG, "failed in %s\n", __FUNCTION__);
@@ -4634,7 +4634,7 @@ static void s2io_updt_stats(struct s2io_nic *sp)
4634 do { 4634 do {
4635 udelay(100); 4635 udelay(100);
4636 val64 = readq(&bar0->stat_cfg); 4636 val64 = readq(&bar0->stat_cfg);
4637 if (!(val64 & BIT(0))) 4637 if (!(val64 & s2BIT(0)))
4638 break; 4638 break;
4639 cnt++; 4639 cnt++;
4640 if (cnt == 5) 4640 if (cnt == 5)
diff --git a/drivers/net/s2io.h b/drivers/net/s2io.h
index f6b45565304f..cc1797a071aa 100644
--- a/drivers/net/s2io.h
+++ b/drivers/net/s2io.h
@@ -14,7 +14,7 @@
14#define _S2IO_H 14#define _S2IO_H
15 15
16#define TBD 0 16#define TBD 0
17#define BIT(loc) (0x8000000000000000ULL >> (loc)) 17#define s2BIT(loc) (0x8000000000000000ULL >> (loc))
18#define vBIT(val, loc, sz) (((u64)val) << (64-loc-sz)) 18#define vBIT(val, loc, sz) (((u64)val) << (64-loc-sz))
19#define INV(d) ((d&0xff)<<24) | (((d>>8)&0xff)<<16) | (((d>>16)&0xff)<<8)| ((d>>24)&0xff) 19#define INV(d) ((d&0xff)<<24) | (((d>>8)&0xff)<<16) | (((d>>16)&0xff)<<8)| ((d>>24)&0xff)
20 20
@@ -473,42 +473,42 @@ struct TxFIFO_element {
473 473
474 u64 List_Control; 474 u64 List_Control;
475#define TX_FIFO_LAST_TXD_NUM( val) vBIT(val,0,8) 475#define TX_FIFO_LAST_TXD_NUM( val) vBIT(val,0,8)
476#define TX_FIFO_FIRST_LIST BIT(14) 476#define TX_FIFO_FIRST_LIST s2BIT(14)
477#define TX_FIFO_LAST_LIST BIT(15) 477#define TX_FIFO_LAST_LIST s2BIT(15)
478#define TX_FIFO_FIRSTNLAST_LIST vBIT(3,14,2) 478#define TX_FIFO_FIRSTNLAST_LIST vBIT(3,14,2)
479#define TX_FIFO_SPECIAL_FUNC BIT(23) 479#define TX_FIFO_SPECIAL_FUNC s2BIT(23)
480#define TX_FIFO_DS_NO_SNOOP BIT(31) 480#define TX_FIFO_DS_NO_SNOOP s2BIT(31)
481#define TX_FIFO_BUFF_NO_SNOOP BIT(30) 481#define TX_FIFO_BUFF_NO_SNOOP s2BIT(30)
482}; 482};
483 483
484/* Tx descriptor structure */ 484/* Tx descriptor structure */
485struct TxD { 485struct TxD {
486 u64 Control_1; 486 u64 Control_1;
487/* bit mask */ 487/* bit mask */
488#define TXD_LIST_OWN_XENA BIT(7) 488#define TXD_LIST_OWN_XENA s2BIT(7)
489#define TXD_T_CODE (BIT(12)|BIT(13)|BIT(14)|BIT(15)) 489#define TXD_T_CODE (s2BIT(12)|s2BIT(13)|s2BIT(14)|s2BIT(15))
490#define TXD_T_CODE_OK(val) (|(val & TXD_T_CODE)) 490#define TXD_T_CODE_OK(val) (|(val & TXD_T_CODE))
491#define GET_TXD_T_CODE(val) ((val & TXD_T_CODE)<<12) 491#define GET_TXD_T_CODE(val) ((val & TXD_T_CODE)<<12)
492#define TXD_GATHER_CODE (BIT(22) | BIT(23)) 492#define TXD_GATHER_CODE (s2BIT(22) | s2BIT(23))
493#define TXD_GATHER_CODE_FIRST BIT(22) 493#define TXD_GATHER_CODE_FIRST s2BIT(22)
494#define TXD_GATHER_CODE_LAST BIT(23) 494#define TXD_GATHER_CODE_LAST s2BIT(23)
495#define TXD_TCP_LSO_EN BIT(30) 495#define TXD_TCP_LSO_EN s2BIT(30)
496#define TXD_UDP_COF_EN BIT(31) 496#define TXD_UDP_COF_EN s2BIT(31)
497#define TXD_UFO_EN BIT(31) | BIT(30) 497#define TXD_UFO_EN s2BIT(31) | s2BIT(30)
498#define TXD_TCP_LSO_MSS(val) vBIT(val,34,14) 498#define TXD_TCP_LSO_MSS(val) vBIT(val,34,14)
499#define TXD_UFO_MSS(val) vBIT(val,34,14) 499#define TXD_UFO_MSS(val) vBIT(val,34,14)
500#define TXD_BUFFER0_SIZE(val) vBIT(val,48,16) 500#define TXD_BUFFER0_SIZE(val) vBIT(val,48,16)
501 501
502 u64 Control_2; 502 u64 Control_2;
503#define TXD_TX_CKO_CONTROL (BIT(5)|BIT(6)|BIT(7)) 503#define TXD_TX_CKO_CONTROL (s2BIT(5)|s2BIT(6)|s2BIT(7))
504#define TXD_TX_CKO_IPV4_EN BIT(5) 504#define TXD_TX_CKO_IPV4_EN s2BIT(5)
505#define TXD_TX_CKO_TCP_EN BIT(6) 505#define TXD_TX_CKO_TCP_EN s2BIT(6)
506#define TXD_TX_CKO_UDP_EN BIT(7) 506#define TXD_TX_CKO_UDP_EN s2BIT(7)
507#define TXD_VLAN_ENABLE BIT(15) 507#define TXD_VLAN_ENABLE s2BIT(15)
508#define TXD_VLAN_TAG(val) vBIT(val,16,16) 508#define TXD_VLAN_TAG(val) vBIT(val,16,16)
509#define TXD_INT_NUMBER(val) vBIT(val,34,6) 509#define TXD_INT_NUMBER(val) vBIT(val,34,6)
510#define TXD_INT_TYPE_PER_LIST BIT(47) 510#define TXD_INT_TYPE_PER_LIST s2BIT(47)
511#define TXD_INT_TYPE_UTILZ BIT(46) 511#define TXD_INT_TYPE_UTILZ s2BIT(46)
512#define TXD_SET_MARKER vBIT(0x6,0,4) 512#define TXD_SET_MARKER vBIT(0x6,0,4)
513 513
514 u64 Buffer_Pointer; 514 u64 Buffer_Pointer;
@@ -525,14 +525,14 @@ struct list_info_hold {
525struct RxD_t { 525struct RxD_t {
526 u64 Host_Control; /* reserved for host */ 526 u64 Host_Control; /* reserved for host */
527 u64 Control_1; 527 u64 Control_1;
528#define RXD_OWN_XENA BIT(7) 528#define RXD_OWN_XENA s2BIT(7)
529#define RXD_T_CODE (BIT(12)|BIT(13)|BIT(14)|BIT(15)) 529#define RXD_T_CODE (s2BIT(12)|s2BIT(13)|s2BIT(14)|s2BIT(15))
530#define RXD_FRAME_PROTO vBIT(0xFFFF,24,8) 530#define RXD_FRAME_PROTO vBIT(0xFFFF,24,8)
531#define RXD_FRAME_PROTO_IPV4 BIT(27) 531#define RXD_FRAME_PROTO_IPV4 s2BIT(27)
532#define RXD_FRAME_PROTO_IPV6 BIT(28) 532#define RXD_FRAME_PROTO_IPV6 s2BIT(28)
533#define RXD_FRAME_IP_FRAG BIT(29) 533#define RXD_FRAME_IP_FRAG s2BIT(29)
534#define RXD_FRAME_PROTO_TCP BIT(30) 534#define RXD_FRAME_PROTO_TCP s2BIT(30)
535#define RXD_FRAME_PROTO_UDP BIT(31) 535#define RXD_FRAME_PROTO_UDP s2BIT(31)
536#define TCP_OR_UDP_FRAME (RXD_FRAME_PROTO_TCP | RXD_FRAME_PROTO_UDP) 536#define TCP_OR_UDP_FRAME (RXD_FRAME_PROTO_TCP | RXD_FRAME_PROTO_UDP)
537#define RXD_GET_L3_CKSUM(val) ((u16)(val>> 16) & 0xFFFF) 537#define RXD_GET_L3_CKSUM(val) ((u16)(val>> 16) & 0xFFFF)
538#define RXD_GET_L4_CKSUM(val) ((u16)(val) & 0xFFFF) 538#define RXD_GET_L4_CKSUM(val) ((u16)(val) & 0xFFFF)
@@ -998,26 +998,26 @@ static inline void SPECIAL_REG_WRITE(u64 val, void __iomem *addr, int order)
998/* Interrupt masks for the general interrupt mask register */ 998/* Interrupt masks for the general interrupt mask register */
999#define DISABLE_ALL_INTRS 0xFFFFFFFFFFFFFFFFULL 999#define DISABLE_ALL_INTRS 0xFFFFFFFFFFFFFFFFULL
1000 1000
1001#define TXPIC_INT_M BIT(0) 1001#define TXPIC_INT_M s2BIT(0)
1002#define TXDMA_INT_M BIT(1) 1002#define TXDMA_INT_M s2BIT(1)
1003#define TXMAC_INT_M BIT(2) 1003#define TXMAC_INT_M s2BIT(2)
1004#define TXXGXS_INT_M BIT(3) 1004#define TXXGXS_INT_M s2BIT(3)
1005#define TXTRAFFIC_INT_M BIT(8) 1005#define TXTRAFFIC_INT_M s2BIT(8)
1006#define PIC_RX_INT_M BIT(32) 1006#define PIC_RX_INT_M s2BIT(32)
1007#define RXDMA_INT_M BIT(33) 1007#define RXDMA_INT_M s2BIT(33)
1008#define RXMAC_INT_M BIT(34) 1008#define RXMAC_INT_M s2BIT(34)
1009#define MC_INT_M BIT(35) 1009#define MC_INT_M s2BIT(35)
1010#define RXXGXS_INT_M BIT(36) 1010#define RXXGXS_INT_M s2BIT(36)
1011#define RXTRAFFIC_INT_M BIT(40) 1011#define RXTRAFFIC_INT_M s2BIT(40)
1012 1012
1013/* PIC level Interrupts TODO*/ 1013/* PIC level Interrupts TODO*/
1014 1014
1015/* DMA level Inressupts */ 1015/* DMA level Inressupts */
1016#define TXDMA_PFC_INT_M BIT(0) 1016#define TXDMA_PFC_INT_M s2BIT(0)
1017#define TXDMA_PCC_INT_M BIT(2) 1017#define TXDMA_PCC_INT_M s2BIT(2)
1018 1018
1019/* PFC block interrupts */ 1019/* PFC block interrupts */
1020#define PFC_MISC_ERR_1 BIT(0) /* Interrupt to indicate FIFO full */ 1020#define PFC_MISC_ERR_1 s2BIT(0) /* Interrupt to indicate FIFO full */
1021 1021
1022/* PCC block interrupts. */ 1022/* PCC block interrupts. */
1023#define PCC_FB_ECC_ERR vBIT(0xff, 16, 8) /* Interrupt to indicate 1023#define PCC_FB_ECC_ERR vBIT(0xff, 16, 8) /* Interrupt to indicate
diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c
index fab055ffcc90..571060a3c91e 100644
--- a/drivers/net/spider_net.c
+++ b/drivers/net/spider_net.c
@@ -46,7 +46,7 @@
46#include <linux/vmalloc.h> 46#include <linux/vmalloc.h>
47#include <linux/wait.h> 47#include <linux/wait.h>
48#include <linux/workqueue.h> 48#include <linux/workqueue.h>
49#include <asm/bitops.h> 49#include <linux/bitops.h>
50#include <asm/pci-bridge.h> 50#include <asm/pci-bridge.h>
51#include <net/checksum.h> 51#include <net/checksum.h>
52 52
diff --git a/drivers/net/tulip/uli526x.c b/drivers/net/tulip/uli526x.c
index 76e55612430b..a7afeea156bd 100644
--- a/drivers/net/tulip/uli526x.c
+++ b/drivers/net/tulip/uli526x.c
@@ -34,9 +34,9 @@
34#include <linux/delay.h> 34#include <linux/delay.h>
35#include <linux/spinlock.h> 35#include <linux/spinlock.h>
36#include <linux/dma-mapping.h> 36#include <linux/dma-mapping.h>
37#include <linux/bitops.h>
37 38
38#include <asm/processor.h> 39#include <asm/processor.h>
39#include <asm/bitops.h>
40#include <asm/io.h> 40#include <asm/io.h>
41#include <asm/dma.h> 41#include <asm/dma.h>
42#include <asm/uaccess.h> 42#include <asm/uaccess.h>
diff --git a/drivers/net/wireless/bcm43xx/bcm43xx_leds.c b/drivers/net/wireless/bcm43xx/bcm43xx_leds.c
index 8f198befba39..cb51dc51cce6 100644
--- a/drivers/net/wireless/bcm43xx/bcm43xx_leds.c
+++ b/drivers/net/wireless/bcm43xx/bcm43xx_leds.c
@@ -29,7 +29,7 @@
29#include "bcm43xx_radio.h" 29#include "bcm43xx_radio.h"
30#include "bcm43xx.h" 30#include "bcm43xx.h"
31 31
32#include <asm/bitops.h> 32#include <linux/bitops.h>
33 33
34 34
35static void bcm43xx_led_changestate(struct bcm43xx_led *led) 35static void bcm43xx_led_changestate(struct bcm43xx_led *led)
diff --git a/drivers/net/wireless/hostap/hostap_common.h b/drivers/net/wireless/hostap/hostap_common.h
index ceb7f1e5e9e0..517f89845144 100644
--- a/drivers/net/wireless/hostap/hostap_common.h
+++ b/drivers/net/wireless/hostap/hostap_common.h
@@ -4,9 +4,6 @@
4#include <linux/types.h> 4#include <linux/types.h>
5#include <linux/if_ether.h> 5#include <linux/if_ether.h>
6 6
7#define BIT(x) (1 << (x))
8
9
10/* IEEE 802.11 defines */ 7/* IEEE 802.11 defines */
11 8
12/* Information Element IDs */ 9/* Information Element IDs */
diff --git a/drivers/net/wireless/hostap/hostap_ioctl.c b/drivers/net/wireless/hostap/hostap_ioctl.c
index 40f516d42c5e..d8f5efcfcab9 100644
--- a/drivers/net/wireless/hostap/hostap_ioctl.c
+++ b/drivers/net/wireless/hostap/hostap_ioctl.c
@@ -2920,7 +2920,7 @@ static int prism2_ioctl_priv_monitor(struct net_device *dev, int *i)
2920 2920
2921 printk(KERN_DEBUG "%s: process %d (%s) used deprecated iwpriv monitor " 2921 printk(KERN_DEBUG "%s: process %d (%s) used deprecated iwpriv monitor "
2922 "- update software to use iwconfig mode monitor\n", 2922 "- update software to use iwconfig mode monitor\n",
2923 dev->name, current->pid, current->comm); 2923 dev->name, task_pid_nr(current), current->comm);
2924 2924
2925 /* Backward compatibility code - this can be removed at some point */ 2925 /* Backward compatibility code - this can be removed at some point */
2926 2926
diff --git a/drivers/pcmcia/m32r_pcc.c b/drivers/pcmcia/m32r_pcc.c
index 67d28ee80f22..c5e0d89c3ece 100644
--- a/drivers/pcmcia/m32r_pcc.c
+++ b/drivers/pcmcia/m32r_pcc.c
@@ -22,9 +22,9 @@
22#include <linux/workqueue.h> 22#include <linux/workqueue.h>
23#include <linux/interrupt.h> 23#include <linux/interrupt.h>
24#include <linux/platform_device.h> 24#include <linux/platform_device.h>
25#include <linux/bitops.h>
25#include <asm/irq.h> 26#include <asm/irq.h>
26#include <asm/io.h> 27#include <asm/io.h>
27#include <asm/bitops.h>
28#include <asm/system.h> 28#include <asm/system.h>
29#include <asm/addrspace.h> 29#include <asm/addrspace.h>
30 30
diff --git a/drivers/pcmcia/m8xx_pcmcia.c b/drivers/pcmcia/m8xx_pcmcia.c
index b01985498460..d182760f035b 100644
--- a/drivers/pcmcia/m8xx_pcmcia.c
+++ b/drivers/pcmcia/m8xx_pcmcia.c
@@ -48,9 +48,9 @@
48#include <linux/delay.h> 48#include <linux/delay.h>
49#include <linux/interrupt.h> 49#include <linux/interrupt.h>
50#include <linux/fsl_devices.h> 50#include <linux/fsl_devices.h>
51#include <linux/bitops.h>
51 52
52#include <asm/io.h> 53#include <asm/io.h>
53#include <asm/bitops.h>
54#include <asm/system.h> 54#include <asm/system.h>
55#include <asm/time.h> 55#include <asm/time.h>
56#include <asm/mpc8xx.h> 56#include <asm/mpc8xx.h>
diff --git a/drivers/ps3/ps3av.c b/drivers/ps3/ps3av.c
index 397f4ce849dc..87b3493d88e5 100644
--- a/drivers/ps3/ps3av.c
+++ b/drivers/ps3/ps3av.c
@@ -729,7 +729,7 @@ static void ps3av_monitor_info_dump(const struct ps3av_pkt_av_get_monitor_info *
729 729
730static const struct ps3av_monitor_quirk { 730static const struct ps3av_monitor_quirk {
731 const char *monitor_name; 731 const char *monitor_name;
732 u32 clear_60, clear_50, clear_vesa; 732 u32 clear_60;
733} ps3av_monitor_quirks[] = { 733} ps3av_monitor_quirks[] = {
734 { 734 {
735 .monitor_name = "DELL 2007WFP", 735 .monitor_name = "DELL 2007WFP",
@@ -757,10 +757,6 @@ static void ps3av_fixup_monitor_info(struct ps3av_info_monitor *info)
757 quirk->monitor_name); 757 quirk->monitor_name);
758 info->res_60.res_bits &= ~quirk->clear_60; 758 info->res_60.res_bits &= ~quirk->clear_60;
759 info->res_60.native &= ~quirk->clear_60; 759 info->res_60.native &= ~quirk->clear_60;
760 info->res_50.res_bits &= ~quirk->clear_50;
761 info->res_50.native &= ~quirk->clear_50;
762 info->res_vesa.res_bits &= ~quirk->clear_vesa;
763 info->res_vesa.native &= ~quirk->clear_vesa;
764 break; 760 break;
765 } 761 }
766 } 762 }
diff --git a/drivers/ps3/vuart.c b/drivers/ps3/vuart.c
index bea25a1391ee..9dea585ef806 100644
--- a/drivers/ps3/vuart.c
+++ b/drivers/ps3/vuart.c
@@ -22,11 +22,11 @@
22#include <linux/module.h> 22#include <linux/module.h>
23#include <linux/interrupt.h> 23#include <linux/interrupt.h>
24#include <linux/workqueue.h> 24#include <linux/workqueue.h>
25#include <linux/bitops.h>
25#include <asm/ps3.h> 26#include <asm/ps3.h>
26 27
27#include <asm/firmware.h> 28#include <asm/firmware.h>
28#include <asm/lv1call.h> 29#include <asm/lv1call.h>
29#include <asm/bitops.h>
30 30
31#include "vuart.h" 31#include "vuart.h"
32 32
diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c
index e4bf68ca96f7..2fd49edcc712 100644
--- a/drivers/rtc/rtc-pl031.c
+++ b/drivers/rtc/rtc-pl031.c
@@ -21,11 +21,11 @@
21#include <linux/interrupt.h> 21#include <linux/interrupt.h>
22#include <linux/string.h> 22#include <linux/string.h>
23#include <linux/pm.h> 23#include <linux/pm.h>
24#include <linux/bitops.h>
24 25
25#include <linux/amba/bus.h> 26#include <linux/amba/bus.h>
26 27
27#include <asm/io.h> 28#include <asm/io.h>
28#include <asm/bitops.h>
29#include <asm/hardware.h> 29#include <asm/hardware.h>
30#include <asm/irq.h> 30#include <asm/irq.h>
31#include <asm/rtc.h> 31#include <asm/rtc.h>
diff --git a/drivers/rtc/rtc-sa1100.c b/drivers/rtc/rtc-sa1100.c
index 0918b787c4dd..6f1e9a9804bc 100644
--- a/drivers/rtc/rtc-sa1100.c
+++ b/drivers/rtc/rtc-sa1100.c
@@ -29,8 +29,8 @@
29#include <linux/interrupt.h> 29#include <linux/interrupt.h>
30#include <linux/string.h> 30#include <linux/string.h>
31#include <linux/pm.h> 31#include <linux/pm.h>
32#include <linux/bitops.h>
32 33
33#include <asm/bitops.h>
34#include <asm/hardware.h> 34#include <asm/hardware.h>
35#include <asm/irq.h> 35#include <asm/irq.h>
36#include <asm/rtc.h> 36#include <asm/rtc.h>
diff --git a/drivers/s390/cio/idset.c b/drivers/s390/cio/idset.c
index 16ea828e99f7..ef7bc0a125ef 100644
--- a/drivers/s390/cio/idset.c
+++ b/drivers/s390/cio/idset.c
@@ -6,7 +6,7 @@
6 */ 6 */
7 7
8#include <linux/slab.h> 8#include <linux/slab.h>
9#include <asm/bitops.h> 9#include <linux/bitops.h>
10#include "idset.h" 10#include "idset.h"
11#include "css.h" 11#include "css.h"
12 12
diff --git a/drivers/s390/net/claw.c b/drivers/s390/net/claw.c
index 399695f7b1af..3561982749e3 100644
--- a/drivers/s390/net/claw.c
+++ b/drivers/s390/net/claw.c
@@ -59,13 +59,13 @@
59 * 1.15 Changed for 2.6 Kernel No longer compiles on 2.4 or lower 59 * 1.15 Changed for 2.6 Kernel No longer compiles on 2.4 or lower
60 * 1.25 Added Packing support 60 * 1.25 Added Packing support
61 */ 61 */
62#include <asm/bitops.h>
63#include <asm/ccwdev.h> 62#include <asm/ccwdev.h>
64#include <asm/ccwgroup.h> 63#include <asm/ccwgroup.h>
65#include <asm/debug.h> 64#include <asm/debug.h>
66#include <asm/idals.h> 65#include <asm/idals.h>
67#include <asm/io.h> 66#include <asm/io.h>
68 67
68#include <linux/bitops.h>
69#include <linux/ctype.h> 69#include <linux/ctype.h>
70#include <linux/delay.h> 70#include <linux/delay.h>
71#include <linux/errno.h> 71#include <linux/errno.h>
diff --git a/drivers/scsi/FlashPoint.c b/drivers/scsi/FlashPoint.c
index a7f916c0c9cd..1c9078191d9e 100644
--- a/drivers/scsi/FlashPoint.c
+++ b/drivers/scsi/FlashPoint.c
@@ -25,9 +25,6 @@
25 25
26#define FAILURE 0xFFFFFFFFL 26#define FAILURE 0xFFFFFFFFL
27 27
28#define BIT(x) ((unsigned char)(1<<(x))) /* single-bit mask in bit position x */
29#define BITW(x) ((unsigned short)(1<<(x))) /* single-bit mask in bit position x */
30
31struct sccb; 28struct sccb;
32typedef void (*CALL_BK_FN) (struct sccb *); 29typedef void (*CALL_BK_FN) (struct sccb *);
33 30
@@ -374,9 +371,9 @@ typedef struct SCCBscam_info {
374#define SCAM_ENABLED BIT(2) 371#define SCAM_ENABLED BIT(2)
375#define SCAM_LEVEL2 BIT(3) 372#define SCAM_LEVEL2 BIT(3)
376 373
377#define RENEGO_ENA BITW(10) 374#define RENEGO_ENA BIT(10)
378#define CONNIO_ENA BITW(11) 375#define CONNIO_ENA BIT(11)
379#define GREEN_PC_ENA BITW(12) 376#define GREEN_PC_ENA BIT(12)
380 377
381#define AUTO_RATE_00 00 378#define AUTO_RATE_00 00
382#define AUTO_RATE_05 01 379#define AUTO_RATE_05 01
@@ -511,23 +508,23 @@ typedef struct SCCBscam_info {
511 508
512#define hp_intena 0x40 509#define hp_intena 0x40
513 510
514#define RESET BITW(7) 511#define RESET BIT(7)
515#define PROG_HLT BITW(6) 512#define PROG_HLT BIT(6)
516#define PARITY BITW(5) 513#define PARITY BIT(5)
517#define FIFO BITW(4) 514#define FIFO BIT(4)
518#define SEL BITW(3) 515#define SEL BIT(3)
519#define SCAM_SEL BITW(2) 516#define SCAM_SEL BIT(2)
520#define RSEL BITW(1) 517#define RSEL BIT(1)
521#define TIMEOUT BITW(0) 518#define TIMEOUT BIT(0)
522#define BUS_FREE BITW(15) 519#define BUS_FREE BIT(15)
523#define XFER_CNT_0 BITW(14) 520#define XFER_CNT_0 BIT(14)
524#define PHASE BITW(13) 521#define PHASE BIT(13)
525#define IUNKWN BITW(12) 522#define IUNKWN BIT(12)
526#define ICMD_COMP BITW(11) 523#define ICMD_COMP BIT(11)
527#define ITICKLE BITW(10) 524#define ITICKLE BIT(10)
528#define IDO_STRT BITW(9) 525#define IDO_STRT BIT(9)
529#define ITAR_DISC BITW(8) 526#define ITAR_DISC BIT(8)
530#define AUTO_INT (BITW(12)+BITW(11)+BITW(10)+BITW(9)+BITW(8)) 527#define AUTO_INT (BIT(12)+BIT(11)+BIT(10)+BIT(9)+BIT(8))
531#define CLR_ALL_INT 0xFFFF 528#define CLR_ALL_INT 0xFFFF
532#define CLR_ALL_INT_1 0xFF00 529#define CLR_ALL_INT_1 0xFF00
533 530
@@ -674,37 +671,37 @@ typedef struct SCCBscam_info {
674#define BIOS_DATA_OFFSET 0x60 671#define BIOS_DATA_OFFSET 0x60
675#define BIOS_RELATIVE_CARD 0x64 672#define BIOS_RELATIVE_CARD 0x64
676 673
677#define AR3 (BITW(9) + BITW(8)) 674#define AR3 (BIT(9) + BIT(8))
678#define SDATA BITW(10) 675#define SDATA BIT(10)
679 676
680#define CRD_OP BITW(11) /* Cmp Reg. w/ Data */ 677#define CRD_OP BIT(11) /* Cmp Reg. w/ Data */
681 678
682#define CRR_OP BITW(12) /* Cmp Reg. w. Reg. */ 679#define CRR_OP BIT(12) /* Cmp Reg. w. Reg. */
683 680
684#define CPE_OP (BITW(14)+BITW(11)) /* Cmp SCSI phs & Branch EQ */ 681#define CPE_OP (BIT(14)+BIT(11)) /* Cmp SCSI phs & Branch EQ */
685 682
686#define CPN_OP (BITW(14)+BITW(12)) /* Cmp SCSI phs & Branch NOT EQ */ 683#define CPN_OP (BIT(14)+BIT(12)) /* Cmp SCSI phs & Branch NOT EQ */
687 684
688#define ADATA_OUT 0x00 685#define ADATA_OUT 0x00
689#define ADATA_IN BITW(8) 686#define ADATA_IN BIT(8)
690#define ACOMMAND BITW(10) 687#define ACOMMAND BIT(10)
691#define ASTATUS (BITW(10)+BITW(8)) 688#define ASTATUS (BIT(10)+BIT(8))
692#define AMSG_OUT (BITW(10)+BITW(9)) 689#define AMSG_OUT (BIT(10)+BIT(9))
693#define AMSG_IN (BITW(10)+BITW(9)+BITW(8)) 690#define AMSG_IN (BIT(10)+BIT(9)+BIT(8))
694 691
695#define BRH_OP BITW(13) /* Branch */ 692#define BRH_OP BIT(13) /* Branch */
696 693
697#define ALWAYS 0x00 694#define ALWAYS 0x00
698#define EQUAL BITW(8) 695#define EQUAL BIT(8)
699#define NOT_EQ BITW(9) 696#define NOT_EQ BIT(9)
700 697
701#define TCB_OP (BITW(13)+BITW(11)) /* Test condition & branch */ 698#define TCB_OP (BIT(13)+BIT(11)) /* Test condition & branch */
702 699
703#define FIFO_0 BITW(10) 700#define FIFO_0 BIT(10)
704 701
705#define MPM_OP BITW(15) /* Match phase and move data */ 702#define MPM_OP BIT(15) /* Match phase and move data */
706 703
707#define MRR_OP BITW(14) /* Move DReg. to Reg. */ 704#define MRR_OP BIT(14) /* Move DReg. to Reg. */
708 705
709#define S_IDREG (BIT(2)+BIT(1)+BIT(0)) 706#define S_IDREG (BIT(2)+BIT(1)+BIT(0))
710 707
@@ -712,9 +709,9 @@ typedef struct SCCBscam_info {
712#define D_AR1 BIT(0) 709#define D_AR1 BIT(0)
713#define D_BUCKET (BIT(2) + BIT(1) + BIT(0)) 710#define D_BUCKET (BIT(2) + BIT(1) + BIT(0))
714 711
715#define RAT_OP (BITW(14)+BITW(13)+BITW(11)) 712#define RAT_OP (BIT(14)+BIT(13)+BIT(11))
716 713
717#define SSI_OP (BITW(15)+BITW(11)) 714#define SSI_OP (BIT(15)+BIT(11))
718 715
719#define SSI_ITAR_DISC (ITAR_DISC >> 8) 716#define SSI_ITAR_DISC (ITAR_DISC >> 8)
720#define SSI_IDO_STRT (IDO_STRT >> 8) 717#define SSI_IDO_STRT (IDO_STRT >> 8)
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 30905cebefbb..a5763c6e9362 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -521,7 +521,7 @@ config SCSI_DPT_I2O
521 521
522config SCSI_ADVANSYS 522config SCSI_ADVANSYS
523 tristate "AdvanSys SCSI support" 523 tristate "AdvanSys SCSI support"
524 depends on SCSI 524 depends on SCSI && VIRT_TO_BUS
525 depends on ISA || EISA || PCI 525 depends on ISA || EISA || PCI
526 help 526 help
527 This is a driver for all SCSI host adapters manufactured by 527 This is a driver for all SCSI host adapters manufactured by
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index fa7ba64483fb..252d1806467f 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -47,9 +47,9 @@
47#include <linux/scatterlist.h> 47#include <linux/scatterlist.h>
48#include <linux/delay.h> 48#include <linux/delay.h>
49#include <linux/mutex.h> 49#include <linux/mutex.h>
50#include <linux/bitops.h>
50 51
51#include <asm/io.h> 52#include <asm/io.h>
52#include <asm/bitops.h>
53#include <asm/uaccess.h> 53#include <asm/uaccess.h>
54 54
55#include <scsi/scsi.h> 55#include <scsi/scsi.h>
diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
index 7ef0afc3cd68..5f3a0d7b18de 100644
--- a/drivers/scsi/libsas/sas_discover.c
+++ b/drivers/scsi/libsas/sas_discover.c
@@ -285,7 +285,7 @@ static void sas_discover_domain(struct work_struct *work)
285 dev = port->port_dev; 285 dev = port->port_dev;
286 286
287 SAS_DPRINTK("DOING DISCOVERY on port %d, pid:%d\n", port->id, 287 SAS_DPRINTK("DOING DISCOVERY on port %d, pid:%d\n", port->id,
288 current->pid); 288 task_pid_nr(current));
289 289
290 switch (dev->dev_type) { 290 switch (dev->dev_type) {
291 case SAS_END_DEV: 291 case SAS_END_DEV:
@@ -320,7 +320,7 @@ static void sas_discover_domain(struct work_struct *work)
320 } 320 }
321 321
322 SAS_DPRINTK("DONE DISCOVERY on port %d, pid:%d, result:%d\n", port->id, 322 SAS_DPRINTK("DONE DISCOVERY on port %d, pid:%d, result:%d\n", port->id,
323 current->pid, error); 323 task_pid_nr(current), error);
324} 324}
325 325
326static void sas_revalidate_domain(struct work_struct *work) 326static void sas_revalidate_domain(struct work_struct *work)
@@ -334,12 +334,12 @@ static void sas_revalidate_domain(struct work_struct *work)
334 &port->disc.pending); 334 &port->disc.pending);
335 335
336 SAS_DPRINTK("REVALIDATING DOMAIN on port %d, pid:%d\n", port->id, 336 SAS_DPRINTK("REVALIDATING DOMAIN on port %d, pid:%d\n", port->id,
337 current->pid); 337 task_pid_nr(current));
338 if (port->port_dev) 338 if (port->port_dev)
339 res = sas_ex_revalidate_domain(port->port_dev); 339 res = sas_ex_revalidate_domain(port->port_dev);
340 340
341 SAS_DPRINTK("done REVALIDATING DOMAIN on port %d, pid:%d, res 0x%x\n", 341 SAS_DPRINTK("done REVALIDATING DOMAIN on port %d, pid:%d, res 0x%x\n",
342 port->id, current->pid, res); 342 port->id, task_pid_nr(current), res);
343} 343}
344 344
345/* ---------- Events ---------- */ 345/* ---------- Events ---------- */
diff --git a/drivers/scsi/nsp32.h b/drivers/scsi/nsp32.h
index a976e8193d16..6715ecb3bfca 100644
--- a/drivers/scsi/nsp32.h
+++ b/drivers/scsi/nsp32.h
@@ -69,11 +69,6 @@ typedef u32 u32_le;
69typedef u16 u16_le; 69typedef u16 u16_le;
70 70
71/* 71/*
72 * MACRO
73 */
74#define BIT(x) (1UL << (x))
75
76/*
77 * BASIC Definitions 72 * BASIC Definitions
78 */ 73 */
79#ifndef TRUE 74#ifndef TRUE
diff --git a/drivers/scsi/pcmcia/nsp_cs.h b/drivers/scsi/pcmcia/nsp_cs.h
index b7f0fa246413..98397559c53b 100644
--- a/drivers/scsi/pcmcia/nsp_cs.h
+++ b/drivers/scsi/pcmcia/nsp_cs.h
@@ -24,7 +24,6 @@
24/************************************ 24/************************************
25 * Some useful macros... 25 * Some useful macros...
26 */ 26 */
27#define BIT(x) (1L << (x))
28 27
29/* SCSI initiator must be ID 7 */ 28/* SCSI initiator must be ID 7 */
30#define NSP_INITIATOR_ID 7 29#define NSP_INITIATOR_ID 7
diff --git a/drivers/scsi/qla4xxx/ql4_fw.h b/drivers/scsi/qla4xxx/ql4_fw.h
index 9bb3d1d2a925..fe415ec85655 100644
--- a/drivers/scsi/qla4xxx/ql4_fw.h
+++ b/drivers/scsi/qla4xxx/ql4_fw.h
@@ -671,7 +671,7 @@ struct continuation_t1_entry {
671#define ET_CONTINUE ET_CONT_T1 671#define ET_CONTINUE ET_CONT_T1
672 672
673/* Marker entry structure*/ 673/* Marker entry structure*/
674struct marker_entry { 674struct qla4_marker_entry {
675 struct qla4_header hdr; /* 00-03 */ 675 struct qla4_header hdr; /* 00-03 */
676 676
677 uint32_t system_defined; /* 04-07 */ 677 uint32_t system_defined; /* 04-07 */
diff --git a/drivers/scsi/qla4xxx/ql4_iocb.c b/drivers/scsi/qla4xxx/ql4_iocb.c
index 5006ecb3ef5e..e4461b5d767a 100644
--- a/drivers/scsi/qla4xxx/ql4_iocb.c
+++ b/drivers/scsi/qla4xxx/ql4_iocb.c
@@ -69,7 +69,7 @@ static int qla4xxx_get_req_pkt(struct scsi_qla_host *ha,
69static int qla4xxx_send_marker_iocb(struct scsi_qla_host *ha, 69static int qla4xxx_send_marker_iocb(struct scsi_qla_host *ha,
70 struct ddb_entry *ddb_entry, int lun) 70 struct ddb_entry *ddb_entry, int lun)
71{ 71{
72 struct marker_entry *marker_entry; 72 struct qla4_marker_entry *marker_entry;
73 unsigned long flags = 0; 73 unsigned long flags = 0;
74 uint8_t status = QLA_SUCCESS; 74 uint8_t status = QLA_SUCCESS;
75 75
diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c
index 72229df9dc11..40604a092921 100644
--- a/drivers/serial/amba-pl011.c
+++ b/drivers/serial/amba-pl011.c
@@ -263,15 +263,15 @@ static unsigned int pl01x_get_mctrl(struct uart_port *port)
263 unsigned int result = 0; 263 unsigned int result = 0;
264 unsigned int status = readw(uap->port.membase + UART01x_FR); 264 unsigned int status = readw(uap->port.membase + UART01x_FR);
265 265
266#define BIT(uartbit, tiocmbit) \ 266#define TIOCMBIT(uartbit, tiocmbit) \
267 if (status & uartbit) \ 267 if (status & uartbit) \
268 result |= tiocmbit 268 result |= tiocmbit
269 269
270 BIT(UART01x_FR_DCD, TIOCM_CAR); 270 TIOCMBIT(UART01x_FR_DCD, TIOCM_CAR);
271 BIT(UART01x_FR_DSR, TIOCM_DSR); 271 TIOCMBIT(UART01x_FR_DSR, TIOCM_DSR);
272 BIT(UART01x_FR_CTS, TIOCM_CTS); 272 TIOCMBIT(UART01x_FR_CTS, TIOCM_CTS);
273 BIT(UART011_FR_RI, TIOCM_RNG); 273 TIOCMBIT(UART011_FR_RI, TIOCM_RNG);
274#undef BIT 274#undef TIOCMBIT
275 return result; 275 return result;
276} 276}
277 277
@@ -282,18 +282,18 @@ static void pl011_set_mctrl(struct uart_port *port, unsigned int mctrl)
282 282
283 cr = readw(uap->port.membase + UART011_CR); 283 cr = readw(uap->port.membase + UART011_CR);
284 284
285#define BIT(tiocmbit, uartbit) \ 285#define TIOCMBIT(tiocmbit, uartbit) \
286 if (mctrl & tiocmbit) \ 286 if (mctrl & tiocmbit) \
287 cr |= uartbit; \ 287 cr |= uartbit; \
288 else \ 288 else \
289 cr &= ~uartbit 289 cr &= ~uartbit
290 290
291 BIT(TIOCM_RTS, UART011_CR_RTS); 291 TIOCMBIT(TIOCM_RTS, UART011_CR_RTS);
292 BIT(TIOCM_DTR, UART011_CR_DTR); 292 TIOCMBIT(TIOCM_DTR, UART011_CR_DTR);
293 BIT(TIOCM_OUT1, UART011_CR_OUT1); 293 TIOCMBIT(TIOCM_OUT1, UART011_CR_OUT1);
294 BIT(TIOCM_OUT2, UART011_CR_OUT2); 294 TIOCMBIT(TIOCM_OUT2, UART011_CR_OUT2);
295 BIT(TIOCM_LOOP, UART011_CR_LBE); 295 TIOCMBIT(TIOCM_LOOP, UART011_CR_LBE);
296#undef BIT 296#undef TIOCMBIT
297 297
298 writew(cr, uap->port.membase + UART011_CR); 298 writew(cr, uap->port.membase + UART011_CR);
299} 299}
diff --git a/drivers/serial/crisv10.c b/drivers/serial/crisv10.c
index 7e8724d3571f..f523cdf4b02b 100644
--- a/drivers/serial/crisv10.c
+++ b/drivers/serial/crisv10.c
@@ -442,11 +442,11 @@ static char *serial_version = "$Revision: 1.25 $";
442#include <asm/uaccess.h> 442#include <asm/uaccess.h>
443#include <linux/kernel.h> 443#include <linux/kernel.h>
444#include <linux/mutex.h> 444#include <linux/mutex.h>
445#include <linux/bitops.h>
445 446
446#include <asm/io.h> 447#include <asm/io.h>
447#include <asm/irq.h> 448#include <asm/irq.h>
448#include <asm/system.h> 449#include <asm/system.h>
449#include <asm/bitops.h>
450#include <linux/delay.h> 450#include <linux/delay.h>
451 451
452#include <asm/arch/svinto.h> 452#include <asm/arch/svinto.h>
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index f013b4012c9a..1f4f6d02fe25 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -460,7 +460,7 @@ static int checkintf(struct dev_state *ps, unsigned int ifnum)
460 return 0; 460 return 0;
461 /* if not yet claimed, claim it for the driver */ 461 /* if not yet claimed, claim it for the driver */
462 dev_warn(&ps->dev->dev, "usbfs: process %d (%s) did not claim interface %u before use\n", 462 dev_warn(&ps->dev->dev, "usbfs: process %d (%s) did not claim interface %u before use\n",
463 current->pid, current->comm, ifnum); 463 task_pid_nr(current), current->comm, ifnum);
464 return claimintf(ps, ifnum); 464 return claimintf(ps, ifnum);
465} 465}
466 466
diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c
index 73726c570a6e..1d174dcb3ac9 100644
--- a/drivers/usb/gadget/file_storage.c
+++ b/drivers/usb/gadget/file_storage.c
@@ -4006,7 +4006,7 @@ static int __init fsg_bind(struct usb_gadget *gadget)
4006 DBG(fsg, "removable=%d, stall=%d, buflen=%u\n", 4006 DBG(fsg, "removable=%d, stall=%d, buflen=%u\n",
4007 mod_data.removable, mod_data.can_stall, 4007 mod_data.removable, mod_data.can_stall,
4008 mod_data.buflen); 4008 mod_data.buflen);
4009 DBG(fsg, "I/O thread pid: %d\n", fsg->thread_task->pid); 4009 DBG(fsg, "I/O thread pid: %d\n", task_pid_nr(fsg->thread_task));
4010 4010
4011 set_bit(REGISTERED, &fsg->atomic_bitflags); 4011 set_bit(REGISTERED, &fsg->atomic_bitflags);
4012 4012
diff --git a/drivers/video/cyber2000fb.c b/drivers/video/cyber2000fb.c
index 9bb2cbfe4a3d..5fb8675e0d6b 100644
--- a/drivers/video/cyber2000fb.c
+++ b/drivers/video/cyber2000fb.c
@@ -62,7 +62,7 @@ struct cfb_info {
62 struct display_switch *dispsw; 62 struct display_switch *dispsw;
63 struct display *display; 63 struct display *display;
64 struct pci_dev *dev; 64 struct pci_dev *dev;
65 unsigned char __iomem *region; 65 unsigned char __iomem *region;
66 unsigned char __iomem *regs; 66 unsigned char __iomem *regs;
67 u_int id; 67 u_int id;
68 int func_use_count; 68 int func_use_count;
@@ -97,11 +97,11 @@ MODULE_PARM_DESC(default_font, "Default font name");
97/* 97/*
98 * Our access methods. 98 * Our access methods.
99 */ 99 */
100#define cyber2000fb_writel(val,reg,cfb) writel(val, (cfb)->regs + (reg)) 100#define cyber2000fb_writel(val, reg, cfb) writel(val, (cfb)->regs + (reg))
101#define cyber2000fb_writew(val,reg,cfb) writew(val, (cfb)->regs + (reg)) 101#define cyber2000fb_writew(val, reg, cfb) writew(val, (cfb)->regs + (reg))
102#define cyber2000fb_writeb(val,reg,cfb) writeb(val, (cfb)->regs + (reg)) 102#define cyber2000fb_writeb(val, reg, cfb) writeb(val, (cfb)->regs + (reg))
103 103
104#define cyber2000fb_readb(reg,cfb) readb((cfb)->regs + (reg)) 104#define cyber2000fb_readb(reg, cfb) readb((cfb)->regs + (reg))
105 105
106static inline void 106static inline void
107cyber2000_crtcw(unsigned int reg, unsigned int val, struct cfb_info *cfb) 107cyber2000_crtcw(unsigned int reg, unsigned int val, struct cfb_info *cfb)
@@ -221,12 +221,8 @@ cyber2000fb_copyarea(struct fb_info *info, const struct fb_copyarea *region)
221static void 221static void
222cyber2000fb_imageblit(struct fb_info *info, const struct fb_image *image) 222cyber2000fb_imageblit(struct fb_info *info, const struct fb_image *image)
223{ 223{
224// struct cfb_info *cfb = (struct cfb_info *)info; 224 cfb_imageblit(info, image);
225 225 return;
226// if (!(cfb->fb.var.accel_flags & FB_ACCELF_TEXT)) {
227 cfb_imageblit(info, image);
228 return;
229// }
230} 226}
231 227
232static int cyber2000fb_sync(struct fb_info *info) 228static int cyber2000fb_sync(struct fb_info *info)
@@ -277,12 +273,12 @@ cyber2000fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
277 273
278 /* 274 /*
279 * Pseudocolour: 275 * Pseudocolour:
280 * 8 8 276 * 8 8
281 * pixel --/--+--/--> red lut --> red dac 277 * pixel --/--+--/--> red lut --> red dac
282 * | 8 278 * | 8
283 * +--/--> green lut --> green dac 279 * +--/--> green lut --> green dac
284 * | 8 280 * | 8
285 * +--/--> blue lut --> blue dac 281 * +--/--> blue lut --> blue dac
286 */ 282 */
287 case FB_VISUAL_PSEUDOCOLOR: 283 case FB_VISUAL_PSEUDOCOLOR:
288 if (regno >= NR_PALETTE) 284 if (regno >= NR_PALETTE)
@@ -292,9 +288,9 @@ cyber2000fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
292 green >>= 8; 288 green >>= 8;
293 blue >>= 8; 289 blue >>= 8;
294 290
295 cfb->palette[regno].red = red; 291 cfb->palette[regno].red = red;
296 cfb->palette[regno].green = green; 292 cfb->palette[regno].green = green;
297 cfb->palette[regno].blue = blue; 293 cfb->palette[regno].blue = blue;
298 294
299 cyber2000fb_writeb(regno, 0x3c8, cfb); 295 cyber2000fb_writeb(regno, 0x3c8, cfb);
300 cyber2000fb_writeb(red, 0x3c9, cfb); 296 cyber2000fb_writeb(red, 0x3c9, cfb);
@@ -304,12 +300,12 @@ cyber2000fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
304 300
305 /* 301 /*
306 * Direct colour: 302 * Direct colour:
307 * n rl 303 * n rl
308 * pixel --/--+--/--> red lut --> red dac 304 * pixel --/--+--/--> red lut --> red dac
309 * | gl 305 * | gl
310 * +--/--> green lut --> green dac 306 * +--/--> green lut --> green dac
311 * | bl 307 * | bl
312 * +--/--> blue lut --> blue dac 308 * +--/--> blue lut --> blue dac
313 * n = bpp, rl = red length, gl = green length, bl = blue length 309 * n = bpp, rl = red length, gl = green length, bl = blue length
314 */ 310 */
315 case FB_VISUAL_DIRECTCOLOR: 311 case FB_VISUAL_DIRECTCOLOR:
@@ -325,9 +321,11 @@ cyber2000fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
325 * to the high 6 bits of the LUT. 321 * to the high 6 bits of the LUT.
326 */ 322 */
327 cyber2000fb_writeb(regno << 2, 0x3c8, cfb); 323 cyber2000fb_writeb(regno << 2, 0x3c8, cfb);
328 cyber2000fb_writeb(cfb->palette[regno >> 1].red, 0x3c9, cfb); 324 cyber2000fb_writeb(cfb->palette[regno >> 1].red,
325 0x3c9, cfb);
329 cyber2000fb_writeb(green, 0x3c9, cfb); 326 cyber2000fb_writeb(green, 0x3c9, cfb);
330 cyber2000fb_writeb(cfb->palette[regno >> 1].blue, 0x3c9, cfb); 327 cyber2000fb_writeb(cfb->palette[regno >> 1].blue,
328 0x3c9, cfb);
331 329
332 green = cfb->palette[regno << 3].green; 330 green = cfb->palette[regno << 3].green;
333 331
@@ -335,9 +333,9 @@ cyber2000fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
335 } 333 }
336 334
337 if (var->green.length >= 5 && regno < 32) { 335 if (var->green.length >= 5 && regno < 32) {
338 cfb->palette[regno << 3].red = red; 336 cfb->palette[regno << 3].red = red;
339 cfb->palette[regno << 3].green = green; 337 cfb->palette[regno << 3].green = green;
340 cfb->palette[regno << 3].blue = blue; 338 cfb->palette[regno << 3].blue = blue;
341 339
342 /* 340 /*
343 * The 5 bits of each colour component are 341 * The 5 bits of each colour component are
@@ -351,9 +349,9 @@ cyber2000fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
351 } 349 }
352 350
353 if (var->green.length == 4 && regno < 16) { 351 if (var->green.length == 4 && regno < 16) {
354 cfb->palette[regno << 4].red = red; 352 cfb->palette[regno << 4].red = red;
355 cfb->palette[regno << 4].green = green; 353 cfb->palette[regno << 4].green = green;
356 cfb->palette[regno << 4].blue = blue; 354 cfb->palette[regno << 4].blue = blue;
357 355
358 /* 356 /*
359 * The 5 bits of each colour component are 357 * The 5 bits of each colour component are
@@ -377,12 +375,12 @@ cyber2000fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
377 375
378 /* 376 /*
379 * True colour: 377 * True colour:
380 * n rl 378 * n rl
381 * pixel --/--+--/--> red dac 379 * pixel --/--+--/--> red dac
382 * | gl 380 * | gl
383 * +--/--> green dac 381 * +--/--> green dac
384 * | bl 382 * | bl
385 * +--/--> blue dac 383 * +--/--> blue dac
386 * n = bpp, rl = red length, gl = green length, bl = blue length 384 * n = bpp, rl = red length, gl = green length, bl = blue length
387 */ 385 */
388 case FB_VISUAL_TRUECOLOR: 386 case FB_VISUAL_TRUECOLOR:
@@ -494,9 +492,9 @@ static void cyber2000fb_set_timing(struct cfb_info *cfb, struct par_info *hw)
494 492
495 /* PLL registers */ 493 /* PLL registers */
496 cyber2000_grphw(EXT_DCLK_MULT, hw->clock_mult, cfb); 494 cyber2000_grphw(EXT_DCLK_MULT, hw->clock_mult, cfb);
497 cyber2000_grphw(EXT_DCLK_DIV, hw->clock_div, cfb); 495 cyber2000_grphw(EXT_DCLK_DIV, hw->clock_div, cfb);
498 cyber2000_grphw(EXT_MCLK_MULT, cfb->mclk_mult, cfb); 496 cyber2000_grphw(EXT_MCLK_MULT, cfb->mclk_mult, cfb);
499 cyber2000_grphw(EXT_MCLK_DIV, cfb->mclk_div, cfb); 497 cyber2000_grphw(EXT_MCLK_DIV, cfb->mclk_div, cfb);
500 cyber2000_grphw(0x90, 0x01, cfb); 498 cyber2000_grphw(0x90, 0x01, cfb);
501 cyber2000_grphw(0xb9, 0x80, cfb); 499 cyber2000_grphw(0xb9, 0x80, cfb);
502 cyber2000_grphw(0xb9, 0x00, cfb); 500 cyber2000_grphw(0xb9, 0x00, cfb);
@@ -515,8 +513,8 @@ static void cyber2000fb_set_timing(struct cfb_info *cfb, struct par_info *hw)
515 /* 513 /*
516 * Set up accelerator registers 514 * Set up accelerator registers
517 */ 515 */
518 cyber2000fb_writew(hw->width, CO_REG_SRC_WIDTH, cfb); 516 cyber2000fb_writew(hw->width, CO_REG_SRC_WIDTH, cfb);
519 cyber2000fb_writew(hw->width, CO_REG_DEST_WIDTH, cfb); 517 cyber2000fb_writew(hw->width, CO_REG_DEST_WIDTH, cfb);
520 cyber2000fb_writeb(hw->co_pixfmt, CO_REG_PIXFMT, cfb); 518 cyber2000fb_writeb(hw->co_pixfmt, CO_REG_PIXFMT, cfb);
521} 519}
522 520
@@ -549,15 +547,15 @@ cyber2000fb_decode_crtc(struct par_info *hw, struct cfb_info *cfb,
549{ 547{
550 u_int Htotal, Hblankend, Hsyncend; 548 u_int Htotal, Hblankend, Hsyncend;
551 u_int Vtotal, Vdispend, Vblankstart, Vblankend, Vsyncstart, Vsyncend; 549 u_int Vtotal, Vdispend, Vblankstart, Vblankend, Vsyncstart, Vsyncend;
552#define BIT(v,b1,m,b2) (((v >> b1) & m) << b2) 550#define ENCODE_BIT(v, b1, m, b2) ((((v) >> (b1)) & (m)) << (b2))
553 551
554 hw->crtc[13] = hw->pitch; 552 hw->crtc[13] = hw->pitch;
555 hw->crtc[17] = 0xe3; 553 hw->crtc[17] = 0xe3;
556 hw->crtc[14] = 0; 554 hw->crtc[14] = 0;
557 hw->crtc[8] = 0; 555 hw->crtc[8] = 0;
558 556
559 Htotal = var->xres + var->right_margin + 557 Htotal = var->xres + var->right_margin +
560 var->hsync_len + var->left_margin; 558 var->hsync_len + var->left_margin;
561 559
562 if (Htotal > 2080) 560 if (Htotal > 2080)
563 return -EINVAL; 561 return -EINVAL;
@@ -567,15 +565,15 @@ cyber2000fb_decode_crtc(struct par_info *hw, struct cfb_info *cfb,
567 hw->crtc[2] = var->xres >> 3; 565 hw->crtc[2] = var->xres >> 3;
568 hw->crtc[4] = (var->xres + var->right_margin) >> 3; 566 hw->crtc[4] = (var->xres + var->right_margin) >> 3;
569 567
570 Hblankend = (Htotal - 4*8) >> 3; 568 Hblankend = (Htotal - 4 * 8) >> 3;
571 569
572 hw->crtc[3] = BIT(Hblankend, 0, 0x1f, 0) | 570 hw->crtc[3] = ENCODE_BIT(Hblankend, 0, 0x1f, 0) |
573 BIT(1, 0, 0x01, 7); 571 ENCODE_BIT(1, 0, 0x01, 7);
574 572
575 Hsyncend = (var->xres + var->right_margin + var->hsync_len) >> 3; 573 Hsyncend = (var->xres + var->right_margin + var->hsync_len) >> 3;
576 574
577 hw->crtc[5] = BIT(Hsyncend, 0, 0x1f, 0) | 575 hw->crtc[5] = ENCODE_BIT(Hsyncend, 0, 0x1f, 0) |
578 BIT(Hblankend, 5, 0x01, 7); 576 ENCODE_BIT(Hblankend, 5, 0x01, 7);
579 577
580 Vdispend = var->yres - 1; 578 Vdispend = var->yres - 1;
581 Vsyncstart = var->yres + var->lower_margin; 579 Vsyncstart = var->yres + var->lower_margin;
@@ -590,20 +588,20 @@ cyber2000fb_decode_crtc(struct par_info *hw, struct cfb_info *cfb,
590 Vblankend = Vtotal - 10; 588 Vblankend = Vtotal - 10;
591 589
592 hw->crtc[6] = Vtotal; 590 hw->crtc[6] = Vtotal;
593 hw->crtc[7] = BIT(Vtotal, 8, 0x01, 0) | 591 hw->crtc[7] = ENCODE_BIT(Vtotal, 8, 0x01, 0) |
594 BIT(Vdispend, 8, 0x01, 1) | 592 ENCODE_BIT(Vdispend, 8, 0x01, 1) |
595 BIT(Vsyncstart, 8, 0x01, 2) | 593 ENCODE_BIT(Vsyncstart, 8, 0x01, 2) |
596 BIT(Vblankstart,8, 0x01, 3) | 594 ENCODE_BIT(Vblankstart, 8, 0x01, 3) |
597 BIT(1, 0, 0x01, 4) | 595 ENCODE_BIT(1, 0, 0x01, 4) |
598 BIT(Vtotal, 9, 0x01, 5) | 596 ENCODE_BIT(Vtotal, 9, 0x01, 5) |
599 BIT(Vdispend, 9, 0x01, 6) | 597 ENCODE_BIT(Vdispend, 9, 0x01, 6) |
600 BIT(Vsyncstart, 9, 0x01, 7); 598 ENCODE_BIT(Vsyncstart, 9, 0x01, 7);
601 hw->crtc[9] = BIT(0, 0, 0x1f, 0) | 599 hw->crtc[9] = ENCODE_BIT(0, 0, 0x1f, 0) |
602 BIT(Vblankstart,9, 0x01, 5) | 600 ENCODE_BIT(Vblankstart, 9, 0x01, 5) |
603 BIT(1, 0, 0x01, 6); 601 ENCODE_BIT(1, 0, 0x01, 6);
604 hw->crtc[10] = Vsyncstart; 602 hw->crtc[10] = Vsyncstart;
605 hw->crtc[11] = BIT(Vsyncend, 0, 0x0f, 0) | 603 hw->crtc[11] = ENCODE_BIT(Vsyncend, 0, 0x0f, 0) |
606 BIT(1, 0, 0x01, 7); 604 ENCODE_BIT(1, 0, 0x01, 7);
607 hw->crtc[12] = Vdispend; 605 hw->crtc[12] = Vdispend;
608 hw->crtc[15] = Vblankstart; 606 hw->crtc[15] = Vblankstart;
609 hw->crtc[16] = Vblankend; 607 hw->crtc[16] = Vblankend;
@@ -615,10 +613,10 @@ cyber2000fb_decode_crtc(struct par_info *hw, struct cfb_info *cfb,
615 * 4=LINECOMP:10 5-IVIDEO 6=FIXCNT 613 * 4=LINECOMP:10 5-IVIDEO 6=FIXCNT
616 */ 614 */
617 hw->crtc_ofl = 615 hw->crtc_ofl =
618 BIT(Vtotal, 10, 0x01, 0) | 616 ENCODE_BIT(Vtotal, 10, 0x01, 0) |
619 BIT(Vdispend, 10, 0x01, 1) | 617 ENCODE_BIT(Vdispend, 10, 0x01, 1) |
620 BIT(Vsyncstart, 10, 0x01, 2) | 618 ENCODE_BIT(Vsyncstart, 10, 0x01, 2) |
621 BIT(Vblankstart,10, 0x01, 3) | 619 ENCODE_BIT(Vblankstart, 10, 0x01, 3) |
622 EXT_CRT_VRTOFL_LINECOMP10; 620 EXT_CRT_VRTOFL_LINECOMP10;
623 621
624 /* woody: set the interlaced bit... */ 622 /* woody: set the interlaced bit... */
@@ -750,11 +748,11 @@ cyber2000fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
750 var->red.msb_right = 0; 748 var->red.msb_right = 0;
751 var->green.msb_right = 0; 749 var->green.msb_right = 0;
752 var->blue.msb_right = 0; 750 var->blue.msb_right = 0;
751 var->transp.offset = 0;
752 var->transp.length = 0;
753 753
754 switch (var->bits_per_pixel) { 754 switch (var->bits_per_pixel) {
755 case 8: /* PSEUDOCOLOUR, 256 */ 755 case 8: /* PSEUDOCOLOUR, 256 */
756 var->transp.offset = 0;
757 var->transp.length = 0;
758 var->red.offset = 0; 756 var->red.offset = 0;
759 var->red.length = 8; 757 var->red.length = 8;
760 var->green.offset = 0; 758 var->green.offset = 0;
@@ -766,8 +764,6 @@ cyber2000fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
766 case 16:/* DIRECTCOLOUR, 64k or 32k */ 764 case 16:/* DIRECTCOLOUR, 64k or 32k */
767 switch (var->green.length) { 765 switch (var->green.length) {
768 case 6: /* RGB565, 64k */ 766 case 6: /* RGB565, 64k */
769 var->transp.offset = 0;
770 var->transp.length = 0;
771 var->red.offset = 11; 767 var->red.offset = 11;
772 var->red.length = 5; 768 var->red.length = 5;
773 var->green.offset = 5; 769 var->green.offset = 5;
@@ -778,8 +774,6 @@ cyber2000fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
778 774
779 default: 775 default:
780 case 5: /* RGB555, 32k */ 776 case 5: /* RGB555, 32k */
781 var->transp.offset = 0;
782 var->transp.length = 0;
783 var->red.offset = 10; 777 var->red.offset = 10;
784 var->red.length = 5; 778 var->red.length = 5;
785 var->green.offset = 5; 779 var->green.offset = 5;
@@ -802,8 +796,6 @@ cyber2000fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
802 break; 796 break;
803 797
804 case 24:/* TRUECOLOUR, 16m */ 798 case 24:/* TRUECOLOUR, 16m */
805 var->transp.offset = 0;
806 var->transp.length = 0;
807 var->red.offset = 16; 799 var->red.offset = 16;
808 var->red.length = 8; 800 var->red.length = 8;
809 var->green.offset = 8; 801 var->green.offset = 8;
@@ -830,7 +822,7 @@ cyber2000fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
830 mem = var->xres_virtual * var->yres_virtual * (var->bits_per_pixel / 8); 822 mem = var->xres_virtual * var->yres_virtual * (var->bits_per_pixel / 8);
831 if (mem > cfb->fb.fix.smem_len) 823 if (mem > cfb->fb.fix.smem_len)
832 var->yres_virtual = cfb->fb.fix.smem_len * 8 / 824 var->yres_virtual = cfb->fb.fix.smem_len * 8 /
833 (var->bits_per_pixel * var->xres_virtual); 825 (var->bits_per_pixel * var->xres_virtual);
834 826
835 if (var->yres > var->yres_virtual) 827 if (var->yres > var->yres_virtual)
836 var->yres = var->yres_virtual; 828 var->yres = var->yres_virtual;
@@ -921,7 +913,7 @@ static int cyber2000fb_set_par(struct fb_info *info)
921 hw.fetch <<= 1; 913 hw.fetch <<= 1;
922 hw.fetch += 1; 914 hw.fetch += 1;
923 915
924 cfb->fb.fix.line_length = var->xres_virtual * var->bits_per_pixel / 8; 916 cfb->fb.fix.line_length = var->xres_virtual * var->bits_per_pixel / 8;
925 917
926 /* 918 /*
927 * Same here - if the size of the video mode exceeds the 919 * Same here - if the size of the video mode exceeds the
@@ -952,7 +944,6 @@ static int cyber2000fb_set_par(struct fb_info *info)
952 return 0; 944 return 0;
953} 945}
954 946
955
956/* 947/*
957 * Pan or Wrap the Display 948 * Pan or Wrap the Display
958 */ 949 */
@@ -1002,15 +993,15 @@ static int cyber2000fb_blank(int blank, struct fb_info *info)
1002 switch (blank) { 993 switch (blank) {
1003 case FB_BLANK_POWERDOWN: /* powerdown - both sync lines down */ 994 case FB_BLANK_POWERDOWN: /* powerdown - both sync lines down */
1004 sync = EXT_SYNC_CTL_VS_0 | EXT_SYNC_CTL_HS_0; 995 sync = EXT_SYNC_CTL_VS_0 | EXT_SYNC_CTL_HS_0;
1005 break; 996 break;
1006 case FB_BLANK_HSYNC_SUSPEND: /* hsync off */ 997 case FB_BLANK_HSYNC_SUSPEND: /* hsync off */
1007 sync = EXT_SYNC_CTL_VS_NORMAL | EXT_SYNC_CTL_HS_0; 998 sync = EXT_SYNC_CTL_VS_NORMAL | EXT_SYNC_CTL_HS_0;
1008 break; 999 break;
1009 case FB_BLANK_VSYNC_SUSPEND: /* vsync off */ 1000 case FB_BLANK_VSYNC_SUSPEND: /* vsync off */
1010 sync = EXT_SYNC_CTL_VS_0 | EXT_SYNC_CTL_HS_NORMAL; 1001 sync = EXT_SYNC_CTL_VS_0 | EXT_SYNC_CTL_HS_NORMAL;
1011 break; 1002 break;
1012 case FB_BLANK_NORMAL: /* soft blank */ 1003 case FB_BLANK_NORMAL: /* soft blank */
1013 default: /* unblank */ 1004 default: /* unblank */
1014 break; 1005 break;
1015 } 1006 }
1016 1007
@@ -1018,7 +1009,8 @@ static int cyber2000fb_blank(int blank, struct fb_info *info)
1018 1009
1019 if (blank <= 1) { 1010 if (blank <= 1) {
1020 /* turn on ramdacs */ 1011 /* turn on ramdacs */
1021 cfb->ramdac_powerdown &= ~(RAMDAC_DACPWRDN | RAMDAC_BYPASS | RAMDAC_RAMPWRDN); 1012 cfb->ramdac_powerdown &= ~(RAMDAC_DACPWRDN | RAMDAC_BYPASS |
1013 RAMDAC_RAMPWRDN);
1022 cyber2000fb_write_ramdac_ctrl(cfb); 1014 cyber2000fb_write_ramdac_ctrl(cfb);
1023 } 1015 }
1024 1016
@@ -1043,7 +1035,8 @@ static int cyber2000fb_blank(int blank, struct fb_info *info)
1043 1035
1044 if (blank >= 2) { 1036 if (blank >= 2) {
1045 /* turn off ramdacs */ 1037 /* turn off ramdacs */
1046 cfb->ramdac_powerdown |= RAMDAC_DACPWRDN | RAMDAC_BYPASS | RAMDAC_RAMPWRDN; 1038 cfb->ramdac_powerdown |= RAMDAC_DACPWRDN | RAMDAC_BYPASS |
1039 RAMDAC_RAMPWRDN;
1047 cyber2000fb_write_ramdac_ctrl(cfb); 1040 cyber2000fb_write_ramdac_ctrl(cfb);
1048 } 1041 }
1049 1042
@@ -1068,7 +1061,7 @@ static struct fb_ops cyber2000fb_ops = {
1068 * of this driver. It is here solely at the moment to support the other 1061 * of this driver. It is here solely at the moment to support the other
1069 * CyberPro modules external to this driver. 1062 * CyberPro modules external to this driver.
1070 */ 1063 */
1071static struct cfb_info *int_cfb_info; 1064static struct cfb_info *int_cfb_info;
1072 1065
1073/* 1066/*
1074 * Enable access to the extended registers 1067 * Enable access to the extended registers
@@ -1085,6 +1078,7 @@ void cyber2000fb_enable_extregs(struct cfb_info *cfb)
1085 cyber2000_grphw(EXT_FUNC_CTL, old, cfb); 1078 cyber2000_grphw(EXT_FUNC_CTL, old, cfb);
1086 } 1079 }
1087} 1080}
1081EXPORT_SYMBOL(cyber2000fb_enable_extregs);
1088 1082
1089/* 1083/*
1090 * Disable access to the extended registers 1084 * Disable access to the extended registers
@@ -1104,11 +1098,13 @@ void cyber2000fb_disable_extregs(struct cfb_info *cfb)
1104 else 1098 else
1105 cfb->func_use_count -= 1; 1099 cfb->func_use_count -= 1;
1106} 1100}
1101EXPORT_SYMBOL(cyber2000fb_disable_extregs);
1107 1102
1108void cyber2000fb_get_fb_var(struct cfb_info *cfb, struct fb_var_screeninfo *var) 1103void cyber2000fb_get_fb_var(struct cfb_info *cfb, struct fb_var_screeninfo *var)
1109{ 1104{
1110 memcpy(var, &cfb->fb.var, sizeof(struct fb_var_screeninfo)); 1105 memcpy(var, &cfb->fb.var, sizeof(struct fb_var_screeninfo));
1111} 1106}
1107EXPORT_SYMBOL(cyber2000fb_get_fb_var);
1112 1108
1113/* 1109/*
1114 * Attach a capture/tv driver to the core CyberX0X0 driver. 1110 * Attach a capture/tv driver to the core CyberX0X0 driver.
@@ -1122,13 +1118,15 @@ int cyber2000fb_attach(struct cyberpro_info *info, int idx)
1122 info->fb_size = int_cfb_info->fb.fix.smem_len; 1118 info->fb_size = int_cfb_info->fb.fix.smem_len;
1123 info->enable_extregs = cyber2000fb_enable_extregs; 1119 info->enable_extregs = cyber2000fb_enable_extregs;
1124 info->disable_extregs = cyber2000fb_disable_extregs; 1120 info->disable_extregs = cyber2000fb_disable_extregs;
1125 info->info = int_cfb_info; 1121 info->info = int_cfb_info;
1126 1122
1127 strlcpy(info->dev_name, int_cfb_info->fb.fix.id, sizeof(info->dev_name)); 1123 strlcpy(info->dev_name, int_cfb_info->fb.fix.id,
1124 sizeof(info->dev_name));
1128 } 1125 }
1129 1126
1130 return int_cfb_info != NULL; 1127 return int_cfb_info != NULL;
1131} 1128}
1129EXPORT_SYMBOL(cyber2000fb_attach);
1132 1130
1133/* 1131/*
1134 * Detach a capture/tv driver from the core CyberX0X0 driver. 1132 * Detach a capture/tv driver from the core CyberX0X0 driver.
@@ -1136,12 +1134,7 @@ int cyber2000fb_attach(struct cyberpro_info *info, int idx)
1136void cyber2000fb_detach(int idx) 1134void cyber2000fb_detach(int idx)
1137{ 1135{
1138} 1136}
1139
1140EXPORT_SYMBOL(cyber2000fb_attach);
1141EXPORT_SYMBOL(cyber2000fb_detach); 1137EXPORT_SYMBOL(cyber2000fb_detach);
1142EXPORT_SYMBOL(cyber2000fb_enable_extregs);
1143EXPORT_SYMBOL(cyber2000fb_disable_extregs);
1144EXPORT_SYMBOL(cyber2000fb_get_fb_var);
1145 1138
1146/* 1139/*
1147 * These parameters give 1140 * These parameters give
@@ -1205,7 +1198,7 @@ static void cyberpro_init_hw(struct cfb_info *cfb)
1205 int i; 1198 int i;
1206 1199
1207 for (i = 0; i < sizeof(igs_regs); i += 2) 1200 for (i = 0; i < sizeof(igs_regs); i += 2)
1208 cyber2000_grphw(igs_regs[i], igs_regs[i+1], cfb); 1201 cyber2000_grphw(igs_regs[i], igs_regs[i + 1], cfb);
1209 1202
1210 if (cfb->id == ID_CYBERPRO_5000) { 1203 if (cfb->id == ID_CYBERPRO_5000) {
1211 unsigned char val; 1204 unsigned char val;
@@ -1215,8 +1208,8 @@ static void cyberpro_init_hw(struct cfb_info *cfb)
1215 } 1208 }
1216} 1209}
1217 1210
1218static struct cfb_info * __devinit 1211static struct cfb_info __devinit *cyberpro_alloc_fb_info(unsigned int id,
1219cyberpro_alloc_fb_info(unsigned int id, char *name) 1212 char *name)
1220{ 1213{
1221 struct cfb_info *cfb; 1214 struct cfb_info *cfb;
1222 1215
@@ -1228,9 +1221,9 @@ cyberpro_alloc_fb_info(unsigned int id, char *name)
1228 cfb->id = id; 1221 cfb->id = id;
1229 1222
1230 if (id == ID_CYBERPRO_5000) 1223 if (id == ID_CYBERPRO_5000)
1231 cfb->ref_ps = 40690; // 24.576 MHz 1224 cfb->ref_ps = 40690; /* 24.576 MHz */
1232 else 1225 else
1233 cfb->ref_ps = 69842; // 14.31818 MHz (69841?) 1226 cfb->ref_ps = 69842; /* 14.31818 MHz (69841?) */
1234 1227
1235 cfb->divisors[0] = 1; 1228 cfb->divisors[0] = 1;
1236 cfb->divisors[1] = 2; 1229 cfb->divisors[1] = 2;
@@ -1282,8 +1275,7 @@ cyberpro_alloc_fb_info(unsigned int id, char *name)
1282 return cfb; 1275 return cfb;
1283} 1276}
1284 1277
1285static void 1278static void cyberpro_free_fb_info(struct cfb_info *cfb)
1286cyberpro_free_fb_info(struct cfb_info *cfb)
1287{ 1279{
1288 if (cfb) { 1280 if (cfb) {
1289 /* 1281 /*
@@ -1300,8 +1292,7 @@ cyberpro_free_fb_info(struct cfb_info *cfb)
1300 * video=cyber2000:font:fontname 1292 * video=cyber2000:font:fontname
1301 */ 1293 */
1302#ifndef MODULE 1294#ifndef MODULE
1303static int 1295static int cyber2000fb_setup(char *options)
1304cyber2000fb_setup(char *options)
1305{ 1296{
1306 char *opt; 1297 char *opt;
1307 1298
@@ -1315,7 +1306,8 @@ cyber2000fb_setup(char *options)
1315 if (strncmp(opt, "font:", 5) == 0) { 1306 if (strncmp(opt, "font:", 5) == 0) {
1316 static char default_font_storage[40]; 1307 static char default_font_storage[40];
1317 1308
1318 strlcpy(default_font_storage, opt + 5, sizeof(default_font_storage)); 1309 strlcpy(default_font_storage, opt + 5,
1310 sizeof(default_font_storage));
1319 default_font = default_font_storage; 1311 default_font = default_font_storage;
1320 continue; 1312 continue;
1321 } 1313 }
@@ -1354,10 +1346,18 @@ static int __devinit cyberpro_common_probe(struct cfb_info *cfb)
1354 * Determine the size of the memory. 1346 * Determine the size of the memory.
1355 */ 1347 */
1356 switch (cfb->mem_ctl2 & MEM_CTL2_SIZE_MASK) { 1348 switch (cfb->mem_ctl2 & MEM_CTL2_SIZE_MASK) {
1357 case MEM_CTL2_SIZE_4MB: smem_size = 0x00400000; break; 1349 case MEM_CTL2_SIZE_4MB:
1358 case MEM_CTL2_SIZE_2MB: smem_size = 0x00200000; break; 1350 smem_size = 0x00400000;
1359 case MEM_CTL2_SIZE_1MB: smem_size = 0x00100000; break; 1351 break;
1360 default: smem_size = 0x00100000; break; 1352 case MEM_CTL2_SIZE_2MB:
1353 smem_size = 0x00200000;
1354 break;
1355 case MEM_CTL2_SIZE_1MB:
1356 smem_size = 0x00100000;
1357 break;
1358 default:
1359 smem_size = 0x00100000;
1360 break;
1361 } 1361 }
1362 1362
1363 cfb->fb.fix.smem_len = smem_size; 1363 cfb->fb.fix.smem_len = smem_size;
@@ -1366,8 +1366,8 @@ static int __devinit cyberpro_common_probe(struct cfb_info *cfb)
1366 1366
1367 err = -EINVAL; 1367 err = -EINVAL;
1368 if (!fb_find_mode(&cfb->fb.var, &cfb->fb, NULL, NULL, 0, 1368 if (!fb_find_mode(&cfb->fb.var, &cfb->fb, NULL, NULL, 0,
1369 &cyber2000fb_default_mode, 8)) { 1369 &cyber2000fb_default_mode, 8)) {
1370 printk("%s: no valid mode found\n", cfb->fb.fix.id); 1370 printk(KERN_ERR "%s: no valid mode found\n", cfb->fb.fix.id);
1371 goto failed; 1371 goto failed;
1372 } 1372 }
1373 1373
@@ -1377,7 +1377,7 @@ static int __devinit cyberpro_common_probe(struct cfb_info *cfb)
1377 if (cfb->fb.var.yres_virtual < cfb->fb.var.yres) 1377 if (cfb->fb.var.yres_virtual < cfb->fb.var.yres)
1378 cfb->fb.var.yres_virtual = cfb->fb.var.yres; 1378 cfb->fb.var.yres_virtual = cfb->fb.var.yres;
1379 1379
1380// fb_set_var(&cfb->fb.var, -1, &cfb->fb); 1380/* fb_set_var(&cfb->fb.var, -1, &cfb->fb); */
1381 1381
1382 /* 1382 /*
1383 * Calculate the hsync and vsync frequencies. Note that 1383 * Calculate the hsync and vsync frequencies. Note that
@@ -1425,20 +1425,20 @@ static void cyberpro_common_resume(struct cfb_info *cfb)
1425 1425
1426#include <asm/arch/hardware.h> 1426#include <asm/arch/hardware.h>
1427 1427
1428static int __devinit 1428static int __devinit cyberpro_vl_probe(void)
1429cyberpro_vl_probe(void)
1430{ 1429{
1431 struct cfb_info *cfb; 1430 struct cfb_info *cfb;
1432 int err = -ENOMEM; 1431 int err = -ENOMEM;
1433 1432
1434 if (!request_mem_region(FB_START,FB_SIZE,"CyberPro2010")) return err; 1433 if (!request_mem_region(FB_START, FB_SIZE, "CyberPro2010"))
1434 return err;
1435 1435
1436 cfb = cyberpro_alloc_fb_info(ID_CYBERPRO_2010, "CyberPro2010"); 1436 cfb = cyberpro_alloc_fb_info(ID_CYBERPRO_2010, "CyberPro2010");
1437 if (!cfb) 1437 if (!cfb)
1438 goto failed_release; 1438 goto failed_release;
1439 1439
1440 cfb->dev = NULL; 1440 cfb->dev = NULL;
1441 cfb->region = ioremap(FB_START,FB_SIZE); 1441 cfb->region = ioremap(FB_START, FB_SIZE);
1442 if (!cfb->region) 1442 if (!cfb->region)
1443 goto failed_ioremap; 1443 goto failed_ioremap;
1444 1444
@@ -1475,7 +1475,7 @@ failed:
1475failed_ioremap: 1475failed_ioremap:
1476 cyberpro_free_fb_info(cfb); 1476 cyberpro_free_fb_info(cfb);
1477failed_release: 1477failed_release:
1478 release_mem_region(FB_START,FB_SIZE); 1478 release_mem_region(FB_START, FB_SIZE);
1479 1479
1480 return err; 1480 return err;
1481} 1481}
@@ -1538,7 +1538,8 @@ static int cyberpro_pci_enable_mmio(struct cfb_info *cfb)
1538 * Allow the CyberPro to accept PCI burst accesses 1538 * Allow the CyberPro to accept PCI burst accesses
1539 */ 1539 */
1540 if (cfb->id == ID_CYBERPRO_2010) { 1540 if (cfb->id == ID_CYBERPRO_2010) {
1541 printk(KERN_INFO "%s: NOT enabling PCI bursts\n", cfb->fb.fix.id); 1541 printk(KERN_INFO "%s: NOT enabling PCI bursts\n",
1542 cfb->fb.fix.id);
1542 } else { 1543 } else {
1543 val = cyber2000_grphr(EXT_BUS_CTL, cfb); 1544 val = cyber2000_grphr(EXT_BUS_CTL, cfb);
1544 if (!(val & EXT_BUS_CTL_PCIBURST_WRITE)) { 1545 if (!(val & EXT_BUS_CTL_PCIBURST_WRITE)) {
@@ -1688,9 +1689,10 @@ static int cyberpro_pci_resume(struct pci_dev *dev)
1688} 1689}
1689 1690
1690static struct pci_device_id cyberpro_pci_table[] = { 1691static struct pci_device_id cyberpro_pci_table[] = {
1691// Not yet 1692/* Not yet
1692// { PCI_VENDOR_ID_INTERG, PCI_DEVICE_ID_INTERG_1682, 1693 * { PCI_VENDOR_ID_INTERG, PCI_DEVICE_ID_INTERG_1682,
1693// PCI_ANY_ID, PCI_ANY_ID, 0, 0, ID_IGA_1682 }, 1694 * PCI_ANY_ID, PCI_ANY_ID, 0, 0, ID_IGA_1682 },
1695 */
1694 { PCI_VENDOR_ID_INTERG, PCI_DEVICE_ID_INTERG_2000, 1696 { PCI_VENDOR_ID_INTERG, PCI_DEVICE_ID_INTERG_2000,
1695 PCI_ANY_ID, PCI_ANY_ID, 0, 0, ID_CYBERPRO_2000 }, 1697 PCI_ANY_ID, PCI_ANY_ID, 0, 0, ID_CYBERPRO_2000 },
1696 { PCI_VENDOR_ID_INTERG, PCI_DEVICE_ID_INTERG_2010, 1698 { PCI_VENDOR_ID_INTERG, PCI_DEVICE_ID_INTERG_2010,
@@ -1700,7 +1702,7 @@ static struct pci_device_id cyberpro_pci_table[] = {
1700 { 0, } 1702 { 0, }
1701}; 1703};
1702 1704
1703MODULE_DEVICE_TABLE(pci,cyberpro_pci_table); 1705MODULE_DEVICE_TABLE(pci, cyberpro_pci_table);
1704 1706
1705static struct pci_driver cyberpro_driver = { 1707static struct pci_driver cyberpro_driver = {
1706 .name = "CyberPro", 1708 .name = "CyberPro",
diff --git a/drivers/video/pnx4008/sdum.h b/drivers/video/pnx4008/sdum.h
index e8c5dcdd8813..189c3d641383 100644
--- a/drivers/video/pnx4008/sdum.h
+++ b/drivers/video/pnx4008/sdum.h
@@ -77,9 +77,6 @@
77#define CONF_DIRTYDETECTION_OFF (0x600) 77#define CONF_DIRTYDETECTION_OFF (0x600)
78#define CONF_DIRTYDETECTION_ON (0x601) 78#define CONF_DIRTYDETECTION_ON (0x601)
79 79
80/* Set the corresponding bit. */
81#define BIT(n) (0x1U << (n))
82
83struct dumchannel_uf { 80struct dumchannel_uf {
84 int channelnr; 81 int channelnr;
85 u32 *dirty; 82 u32 *dirty;
diff --git a/drivers/watchdog/at91rm9200_wdt.c b/drivers/watchdog/at91rm9200_wdt.c
index 38bd37372599..a684b1e87372 100644
--- a/drivers/watchdog/at91rm9200_wdt.c
+++ b/drivers/watchdog/at91rm9200_wdt.c
@@ -9,6 +9,7 @@
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12#include <linux/bitops.h>
12#include <linux/errno.h> 13#include <linux/errno.h>
13#include <linux/fs.h> 14#include <linux/fs.h>
14#include <linux/init.h> 15#include <linux/init.h>
@@ -19,7 +20,6 @@
19#include <linux/platform_device.h> 20#include <linux/platform_device.h>
20#include <linux/types.h> 21#include <linux/types.h>
21#include <linux/watchdog.h> 22#include <linux/watchdog.h>
22#include <asm/bitops.h>
23#include <asm/uaccess.h> 23#include <asm/uaccess.h>
24#include <asm/arch/at91_st.h> 24#include <asm/arch/at91_st.h>
25 25
diff --git a/drivers/watchdog/ks8695_wdt.c b/drivers/watchdog/ks8695_wdt.c
index 7150fb945eaf..e3a29c302309 100644
--- a/drivers/watchdog/ks8695_wdt.c
+++ b/drivers/watchdog/ks8695_wdt.c
@@ -8,6 +8,7 @@
8 * published by the Free Software Foundation. 8 * published by the Free Software Foundation.
9 */ 9 */
10 10
11#include <linux/bitops.h>
11#include <linux/errno.h> 12#include <linux/errno.h>
12#include <linux/fs.h> 13#include <linux/fs.h>
13#include <linux/init.h> 14#include <linux/init.h>
@@ -18,7 +19,6 @@
18#include <linux/platform_device.h> 19#include <linux/platform_device.h>
19#include <linux/types.h> 20#include <linux/types.h>
20#include <linux/watchdog.h> 21#include <linux/watchdog.h>
21#include <asm/bitops.h>
22#include <asm/io.h> 22#include <asm/io.h>
23#include <asm/uaccess.h> 23#include <asm/uaccess.h>
24#include <asm/arch/regs-timer.h> 24#include <asm/arch/regs-timer.h>
diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c
index 719b066f73c4..635ca454f56b 100644
--- a/drivers/watchdog/omap_wdt.c
+++ b/drivers/watchdog/omap_wdt.c
@@ -39,11 +39,11 @@
39#include <linux/platform_device.h> 39#include <linux/platform_device.h>
40#include <linux/moduleparam.h> 40#include <linux/moduleparam.h>
41#include <linux/clk.h> 41#include <linux/clk.h>
42#include <linux/bitops.h>
42 43
43#include <asm/io.h> 44#include <asm/io.h>
44#include <asm/uaccess.h> 45#include <asm/uaccess.h>
45#include <asm/hardware.h> 46#include <asm/hardware.h>
46#include <asm/bitops.h>
47 47
48#include <asm/arch/prcm.h> 48#include <asm/arch/prcm.h>
49 49
diff --git a/drivers/watchdog/sa1100_wdt.c b/drivers/watchdog/sa1100_wdt.c
index 3475f47aaa45..34a2b3b81800 100644
--- a/drivers/watchdog/sa1100_wdt.c
+++ b/drivers/watchdog/sa1100_wdt.c
@@ -25,13 +25,13 @@
25#include <linux/miscdevice.h> 25#include <linux/miscdevice.h>
26#include <linux/watchdog.h> 26#include <linux/watchdog.h>
27#include <linux/init.h> 27#include <linux/init.h>
28#include <linux/bitops.h>
28 29
29#ifdef CONFIG_ARCH_PXA 30#ifdef CONFIG_ARCH_PXA
30#include <asm/arch/pxa-regs.h> 31#include <asm/arch/pxa-regs.h>
31#endif 32#endif
32 33
33#include <asm/hardware.h> 34#include <asm/hardware.h>
34#include <asm/bitops.h>
35#include <asm/uaccess.h> 35#include <asm/uaccess.h>
36 36
37#define OSCR_FREQ CLOCK_TICK_RATE 37#define OSCR_FREQ CLOCK_TICK_RATE
diff --git a/fs/Kconfig b/fs/Kconfig
index e31f3691b151..cc28a69246a7 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -220,7 +220,7 @@ config JBD
220 220
221config JBD_DEBUG 221config JBD_DEBUG
222 bool "JBD (ext3) debugging support" 222 bool "JBD (ext3) debugging support"
223 depends on JBD 223 depends on JBD && DEBUG_FS
224 help 224 help
225 If you are using the ext3 journaled file system (or potentially any 225 If you are using the ext3 journaled file system (or potentially any
226 other file system/device using JBD), this option allows you to 226 other file system/device using JBD), this option allows you to
@@ -229,10 +229,10 @@ config JBD_DEBUG
229 debugging output will be turned off. 229 debugging output will be turned off.
230 230
231 If you select Y here, then you will be able to turn on debugging 231 If you select Y here, then you will be able to turn on debugging
232 with "echo N > /proc/sys/fs/jbd-debug", where N is a number between 232 with "echo N > /sys/kernel/debug/jbd/jbd-debug", where N is a
233 1 and 5, the higher the number, the more debugging output is 233 number between 1 and 5, the higher the number, the more debugging
234 generated. To turn debugging off again, do 234 output is generated. To turn debugging off again, do
235 "echo 0 > /proc/sys/fs/jbd-debug". 235 "echo 0 > /sys/kernel/debug/jbd/jbd-debug".
236 236
237config JBD2 237config JBD2
238 tristate 238 tristate
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index e7204d71acc9..45f5992a0957 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -80,7 +80,7 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
80 80
81 *uid = current->uid; 81 *uid = current->uid;
82 *gid = current->gid; 82 *gid = current->gid;
83 *pgrp = process_group(current); 83 *pgrp = task_pgrp_nr(current);
84 84
85 *minproto = *maxproto = AUTOFS_PROTO_VERSION; 85 *minproto = *maxproto = AUTOFS_PROTO_VERSION;
86 86
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index c1489533277a..5efff3c0d886 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -214,8 +214,8 @@ static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentr
214 214
215 oz_mode = autofs_oz_mode(sbi); 215 oz_mode = autofs_oz_mode(sbi);
216 DPRINTK(("autofs_lookup: pid = %u, pgrp = %u, catatonic = %d, " 216 DPRINTK(("autofs_lookup: pid = %u, pgrp = %u, catatonic = %d, "
217 "oz_mode = %d\n", pid_nr(task_pid(current)), 217 "oz_mode = %d\n", task_pid_nr(current),
218 process_group(current), sbi->catatonic, 218 task_pgrp_nr(current), sbi->catatonic,
219 oz_mode)); 219 oz_mode));
220 220
221 /* 221 /*
@@ -536,7 +536,7 @@ static int autofs_root_ioctl(struct inode *inode, struct file *filp,
536 struct autofs_sb_info *sbi = autofs_sbi(inode->i_sb); 536 struct autofs_sb_info *sbi = autofs_sbi(inode->i_sb);
537 void __user *argp = (void __user *)arg; 537 void __user *argp = (void __user *)arg;
538 538
539 DPRINTK(("autofs_ioctl: cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u\n",cmd,arg,sbi,process_group(current))); 539 DPRINTK(("autofs_ioctl: cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u\n",cmd,arg,sbi,task_pgrp_nr(current)));
540 540
541 if (_IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) || 541 if (_IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) ||
542 _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT) 542 _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT)
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index d85f42fa9206..2d4ae40718d9 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -131,7 +131,7 @@ static inline struct autofs_info *autofs4_dentry_ino(struct dentry *dentry)
131 filesystem without "magic".) */ 131 filesystem without "magic".) */
132 132
133static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) { 133static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
134 return sbi->catatonic || process_group(current) == sbi->oz_pgrp; 134 return sbi->catatonic || task_pgrp_nr(current) == sbi->oz_pgrp;
135} 135}
136 136
137/* Does a dentry have some pending activity? */ 137/* Does a dentry have some pending activity? */
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index cd81f0836671..7f05d6ccdb13 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -226,7 +226,7 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
226 226
227 *uid = current->uid; 227 *uid = current->uid;
228 *gid = current->gid; 228 *gid = current->gid;
229 *pgrp = process_group(current); 229 *pgrp = task_pgrp_nr(current);
230 230
231 *minproto = AUTOFS_MIN_PROTO_VERSION; 231 *minproto = AUTOFS_MIN_PROTO_VERSION;
232 *maxproto = AUTOFS_MAX_PROTO_VERSION; 232 *maxproto = AUTOFS_MAX_PROTO_VERSION;
@@ -323,7 +323,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
323 sbi->pipe = NULL; 323 sbi->pipe = NULL;
324 sbi->catatonic = 1; 324 sbi->catatonic = 1;
325 sbi->exp_timeout = 0; 325 sbi->exp_timeout = 0;
326 sbi->oz_pgrp = process_group(current); 326 sbi->oz_pgrp = task_pgrp_nr(current);
327 sbi->sb = s; 327 sbi->sb = s;
328 sbi->version = 0; 328 sbi->version = 0;
329 sbi->sub_version = 0; 329 sbi->sub_version = 0;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 45ff3d63b758..2bbcc8151dc3 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -582,7 +582,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
582 oz_mode = autofs4_oz_mode(sbi); 582 oz_mode = autofs4_oz_mode(sbi);
583 583
584 DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d", 584 DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
585 current->pid, process_group(current), sbi->catatonic, oz_mode); 585 current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode);
586 586
587 unhashed = autofs4_lookup_unhashed(sbi, dentry->d_parent, &dentry->d_name); 587 unhashed = autofs4_lookup_unhashed(sbi, dentry->d_parent, &dentry->d_name);
588 if (!unhashed) { 588 if (!unhashed) {
@@ -976,7 +976,7 @@ static int autofs4_root_ioctl(struct inode *inode, struct file *filp,
976 void __user *p = (void __user *)arg; 976 void __user *p = (void __user *)arg;
977 977
978 DPRINTK("cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u", 978 DPRINTK("cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u",
979 cmd,arg,sbi,process_group(current)); 979 cmd,arg,sbi,task_pgrp_nr(current));
980 980
981 if (_IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) || 981 if (_IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) ||
982 _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT) 982 _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 6e2f3b8dde7f..ba8de7ca260b 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1383,10 +1383,10 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
1383 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr; 1383 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1384 prstatus->pr_sigpend = p->pending.signal.sig[0]; 1384 prstatus->pr_sigpend = p->pending.signal.sig[0];
1385 prstatus->pr_sighold = p->blocked.sig[0]; 1385 prstatus->pr_sighold = p->blocked.sig[0];
1386 prstatus->pr_pid = p->pid; 1386 prstatus->pr_pid = task_pid_vnr(p);
1387 prstatus->pr_ppid = p->parent->pid; 1387 prstatus->pr_ppid = task_pid_vnr(p->parent);
1388 prstatus->pr_pgrp = process_group(p); 1388 prstatus->pr_pgrp = task_pgrp_vnr(p);
1389 prstatus->pr_sid = process_session(p); 1389 prstatus->pr_sid = task_session_vnr(p);
1390 if (thread_group_leader(p)) { 1390 if (thread_group_leader(p)) {
1391 /* 1391 /*
1392 * This is the record for the group leader. Add in the 1392 * This is the record for the group leader. Add in the
@@ -1429,10 +1429,10 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1429 psinfo->pr_psargs[i] = ' '; 1429 psinfo->pr_psargs[i] = ' ';
1430 psinfo->pr_psargs[len] = 0; 1430 psinfo->pr_psargs[len] = 0;
1431 1431
1432 psinfo->pr_pid = p->pid; 1432 psinfo->pr_pid = task_pid_vnr(p);
1433 psinfo->pr_ppid = p->parent->pid; 1433 psinfo->pr_ppid = task_pid_vnr(p->parent);
1434 psinfo->pr_pgrp = process_group(p); 1434 psinfo->pr_pgrp = task_pgrp_vnr(p);
1435 psinfo->pr_sid = process_session(p); 1435 psinfo->pr_sid = task_session_vnr(p);
1436 1436
1437 i = p->state ? ffz(~p->state) + 1 : 0; 1437 i = p->state ? ffz(~p->state) + 1 : 0;
1438 psinfo->pr_state = i; 1438 psinfo->pr_state = i;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 033861c6b8f1..32649f2a1654 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1342,10 +1342,10 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
1342 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr; 1342 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1343 prstatus->pr_sigpend = p->pending.signal.sig[0]; 1343 prstatus->pr_sigpend = p->pending.signal.sig[0];
1344 prstatus->pr_sighold = p->blocked.sig[0]; 1344 prstatus->pr_sighold = p->blocked.sig[0];
1345 prstatus->pr_pid = p->pid; 1345 prstatus->pr_pid = task_pid_vnr(p);
1346 prstatus->pr_ppid = p->parent->pid; 1346 prstatus->pr_ppid = task_pid_vnr(p->parent);
1347 prstatus->pr_pgrp = process_group(p); 1347 prstatus->pr_pgrp = task_pgrp_vnr(p);
1348 prstatus->pr_sid = process_session(p); 1348 prstatus->pr_sid = task_session_vnr(p);
1349 if (thread_group_leader(p)) { 1349 if (thread_group_leader(p)) {
1350 /* 1350 /*
1351 * This is the record for the group leader. Add in the 1351 * This is the record for the group leader. Add in the
@@ -1391,10 +1391,10 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1391 psinfo->pr_psargs[i] = ' '; 1391 psinfo->pr_psargs[i] = ' ';
1392 psinfo->pr_psargs[len] = 0; 1392 psinfo->pr_psargs[len] = 0;
1393 1393
1394 psinfo->pr_pid = p->pid; 1394 psinfo->pr_pid = task_pid_vnr(p);
1395 psinfo->pr_ppid = p->parent->pid; 1395 psinfo->pr_ppid = task_pid_vnr(p->parent);
1396 psinfo->pr_pgrp = process_group(p); 1396 psinfo->pr_pgrp = task_pgrp_vnr(p);
1397 psinfo->pr_sid = process_session(p); 1397 psinfo->pr_sid = task_session_vnr(p);
1398 1398
1399 i = p->state ? ffz(~p->state) + 1 : 0; 1399 i = p->state ? ffz(~p->state) + 1 : 0;
1400 psinfo->pr_state = i; 1400 psinfo->pr_state = i;
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index bed6215c0794..3d419163c3d3 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,19 @@
1Version 1.51
2------------
3Fix memory leak in statfs when mounted to very old servers (e.g.
4Windows 9x). Add new feature "POSIX open" which allows servers
5which support the current POSIX Extensions to provide better semantics
6(e.g. delete for open files opened with posix open). Take into
7account umask on posix mkdir not just older style mkdir. Add
8ability to mount to IPC$ share (which allows CIFS named pipes to be
9opened, read and written as if they were files). When 1st tree
10connect fails (e.g. due to signing negotiation failure) fix
11leak that causes cifsd not to stop and rmmod to fail to cleanup
12cifs_request_buffers pool. Fix problem with POSIX Open/Mkdir on
13bigendian architectures. Fix possible memory corruption when
14EAGAIN returned on kern_recvmsg. Return better error if server
15requires packet signing but client has disabled it.
16
1Version 1.50 17Version 1.50
2------------ 18------------
3Fix NTLMv2 signing. NFS server mounted over cifs works (if cifs mount is 19Fix NTLMv2 signing. NFS server mounted over cifs works (if cifs mount is
@@ -6,7 +22,10 @@ done with "serverino" mount option). Add support for POSIX Unlink
6Samba supports newer POSIX CIFS Protocol Extensions). Add "nounix" 22Samba supports newer POSIX CIFS Protocol Extensions). Add "nounix"
7mount option to allow disabling the CIFS Unix Extensions for just 23mount option to allow disabling the CIFS Unix Extensions for just
8that mount. Fix hang on spinlock in find_writable_file (race when 24that mount. Fix hang on spinlock in find_writable_file (race when
9reopening file after session crash). 25reopening file after session crash). Byte range unlock request to
26windows server could unlock more bytes (on server copy of file)
27than intended if start of unlock request is well before start of
28a previous byte range lock that we issued.
10 29
11Version 1.49 30Version 1.49
12------------ 31------------
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 6ecd9d6ba3f3..ff6ba8d823f0 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -3,4 +3,4 @@
3# 3#
4obj-$(CONFIG_CIFS) += cifs.o 4obj-$(CONFIG_CIFS) += cifs.o
5 5
6cifs-objs := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o fcntl.o readdir.o ioctl.o sess.o export.o 6cifs-objs := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o fcntl.o readdir.o ioctl.o sess.o export.o cifsacl.o
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index f50a88d58f78..2a01f3ef96a0 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -385,10 +385,9 @@ asn1_oid_decode(struct asn1_ctx *ctx,
385 unsigned long *optr; 385 unsigned long *optr;
386 386
387 size = eoc - ctx->pointer + 1; 387 size = eoc - ctx->pointer + 1;
388 *oid = kmalloc(size * sizeof (unsigned long), GFP_ATOMIC); 388 *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
389 if (*oid == NULL) { 389 if (*oid == NULL)
390 return 0; 390 return 0;
391 }
392 391
393 optr = *oid; 392 optr = *oid;
394 393
@@ -581,9 +580,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
581 return 0; 580 return 0;
582 } else if ((cls != ASN1_UNI) || (con != ASN1_CON) 581 } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
583 || (tag != ASN1_SEQ)) { 582 || (tag != ASN1_SEQ)) {
584 cFYI(1, 583 cFYI(1, ("cls = %d con = %d tag = %d end = %p (%d)",
585 ("Exit 6 cls = %d con = %d tag = %d end = %p (%d)", 584 cls, con, tag, end, *end));
586 cls, con, tag, end, *end));
587 } 585 }
588 586
589 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 587 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 1bf8cf522ad6..73c4c419663c 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -209,13 +209,16 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
209 i++; 209 i++;
210 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); 210 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
211 dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType); 211 dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType);
212 length = 212 length = sprintf(buf, "\n%d) %s Uses: %d ", i,
213 sprintf(buf, 213 tcon->treeName, atomic_read(&tcon->useCount));
214 "\n%d) %s Uses: %d Type: %s DevInfo: 0x%x " 214 buf += length;
215 "Attributes: 0x%x\nPathComponentMax: %d Status: %d", 215 if (tcon->nativeFileSystem) {
216 i, tcon->treeName, 216 length = sprintf(buf, "Type: %s ",
217 atomic_read(&tcon->useCount), 217 tcon->nativeFileSystem);
218 tcon->nativeFileSystem, 218 buf += length;
219 }
220 length = sprintf(buf, "DevInfo: 0x%x Attributes: 0x%x"
221 "\nPathComponentMax: %d Status: %d",
219 le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics), 222 le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics),
220 le32_to_cpu(tcon->fsAttrInfo.Attributes), 223 le32_to_cpu(tcon->fsAttrInfo.Attributes),
221 le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength), 224 le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength),
@@ -876,11 +879,16 @@ security_flags_write(struct file *file, const char __user *buffer,
876 if (count < 3) { 879 if (count < 3) {
877 /* single char or single char followed by null */ 880 /* single char or single char followed by null */
878 c = flags_string[0]; 881 c = flags_string[0];
879 if (c == '0' || c == 'n' || c == 'N') 882 if (c == '0' || c == 'n' || c == 'N') {
880 extended_security = CIFSSEC_DEF; /* default */ 883 extended_security = CIFSSEC_DEF; /* default */
881 else if (c == '1' || c == 'y' || c == 'Y') 884 return count;
885 } else if (c == '1' || c == 'y' || c == 'Y') {
882 extended_security = CIFSSEC_MAX; 886 extended_security = CIFSSEC_MAX;
883 return count; 887 return count;
888 } else if (!isdigit(c)) {
889 cERROR(1, ("invalid flag %c", c));
890 return -EINVAL;
891 }
884 } 892 }
885 /* else we have a number */ 893 /* else we have a number */
886 894
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
new file mode 100644
index 000000000000..e8e56353f5a1
--- /dev/null
+++ b/fs/cifs/cifsacl.c
@@ -0,0 +1,333 @@
1/*
2 * fs/cifs/cifsacl.c
3 *
4 * Copyright (C) International Business Machines Corp., 2007
5 * Author(s): Steve French (sfrench@us.ibm.com)
6 *
7 * Contains the routines for mapping CIFS/NTFS ACLs
8 *
9 * This library is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License as published
11 * by the Free Software Foundation; either version 2.1 of the License, or
12 * (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
17 * the GNU Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public License
20 * along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23
24#include <linux/fs.h>
25#include "cifspdu.h"
26#include "cifsglob.h"
27#include "cifsacl.h"
28#include "cifsproto.h"
29#include "cifs_debug.h"
30
31
32#ifdef CONFIG_CIFS_EXPERIMENTAL
33
34static struct cifs_wksid wksidarr[NUM_WK_SIDS] = {
35 {{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"},
36 {{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"},
37 {{1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(11), 0, 0, 0, 0} }, "net-users"},
38 {{1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(18), 0, 0, 0, 0} }, "sys"},
39 {{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(544), 0, 0, 0} }, "root"},
40 {{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(545), 0, 0, 0} }, "users"},
41 {{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(546), 0, 0, 0} }, "guest"}
42};
43
44
45/* security id for everyone */
46static const struct cifs_sid sid_everyone =
47 {1, 1, {0, 0, 0, 0, 0, 0}, {} };
48/* group users */
49static const struct cifs_sid sid_user =
50 {1, 2 , {0, 0, 0, 0, 0, 5}, {} };
51
52
53int match_sid(struct cifs_sid *ctsid)
54{
55 int i, j;
56 int num_subauth, num_sat, num_saw;
57 struct cifs_sid *cwsid;
58
59 if (!ctsid)
60 return (-1);
61
62 for (i = 0; i < NUM_WK_SIDS; ++i) {
63 cwsid = &(wksidarr[i].cifssid);
64
65 /* compare the revision */
66 if (ctsid->revision != cwsid->revision)
67 continue;
68
69 /* compare all of the six auth values */
70 for (j = 0; j < 6; ++j) {
71 if (ctsid->authority[j] != cwsid->authority[j])
72 break;
73 }
74 if (j < 6)
75 continue; /* all of the auth values did not match */
76
77 /* compare all of the subauth values if any */
78 num_sat = ctsid->num_subauth;
79 num_saw = cwsid->num_subauth;
80 num_subauth = num_sat < num_saw ? num_sat : num_saw;
81 if (num_subauth) {
82 for (j = 0; j < num_subauth; ++j) {
83 if (ctsid->sub_auth[j] != cwsid->sub_auth[j])
84 break;
85 }
86 if (j < num_subauth)
87 continue; /* all sub_auth values do not match */
88 }
89
90 cFYI(1, ("matching sid: %s\n", wksidarr[i].sidname));
91 return (0); /* sids compare/match */
92 }
93
94 cFYI(1, ("No matching sid"));
95 return (-1);
96}
97
98/* if the two SIDs (roughly equivalent to a UUID for a user or group) are
99 the same returns 1, if they do not match returns 0 */
100int compare_sids(struct cifs_sid *ctsid, struct cifs_sid *cwsid)
101{
102 int i;
103 int num_subauth, num_sat, num_saw;
104
105 if ((!ctsid) || (!cwsid))
106 return (0);
107
108 /* compare the revision */
109 if (ctsid->revision != cwsid->revision)
110 return (0);
111
112 /* compare all of the six auth values */
113 for (i = 0; i < 6; ++i) {
114 if (ctsid->authority[i] != cwsid->authority[i])
115 return (0);
116 }
117
118 /* compare all of the subauth values if any */
119 num_sat = ctsid->num_subauth;
120 num_saw = cwsid->num_subauth;
121 num_subauth = num_sat < num_saw ? num_sat : num_saw;
122 if (num_subauth) {
123 for (i = 0; i < num_subauth; ++i) {
124 if (ctsid->sub_auth[i] != cwsid->sub_auth[i])
125 return (0);
126 }
127 }
128
129 return (1); /* sids compare/match */
130}
131
132
133static void parse_ace(struct cifs_ace *pace, char *end_of_acl)
134{
135 int num_subauth;
136
137 /* validate that we do not go past end of acl */
138
139 /* XXX this if statement can be removed
140 if (end_of_acl < (char *)pace + sizeof(struct cifs_ace)) {
141 cERROR(1, ("ACL too small to parse ACE"));
142 return;
143 } */
144
145 num_subauth = pace->num_subauth;
146 if (num_subauth) {
147#ifdef CONFIG_CIFS_DEBUG2
148 int i;
149 cFYI(1, ("ACE revision %d num_subauth %d",
150 pace->revision, pace->num_subauth));
151 for (i = 0; i < num_subauth; ++i) {
152 cFYI(1, ("ACE sub_auth[%d]: 0x%x", i,
153 le32_to_cpu(pace->sub_auth[i])));
154 }
155
156 /* BB add length check to make sure that we do not have huge
157 num auths and therefore go off the end */
158
159 cFYI(1, ("RID %d", le32_to_cpu(pace->sub_auth[num_subauth-1])));
160#endif
161 }
162
163 return;
164}
165
166static void parse_ntace(struct cifs_ntace *pntace, char *end_of_acl)
167{
168 /* validate that we do not go past end of acl */
169 if (end_of_acl < (char *)pntace + sizeof(struct cifs_ntace)) {
170 cERROR(1, ("ACL too small to parse NT ACE"));
171 return;
172 }
173
174#ifdef CONFIG_CIFS_DEBUG2
175 cFYI(1, ("NTACE type %d flags 0x%x size %d, access Req 0x%x",
176 pntace->type, pntace->flags, pntace->size,
177 pntace->access_req));
178#endif
179 return;
180}
181
182
183
184static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
185 struct cifs_sid *pownersid, struct cifs_sid *pgrpsid)
186{
187 int i;
188 int num_aces = 0;
189 int acl_size;
190 char *acl_base;
191 struct cifs_ntace **ppntace;
192 struct cifs_ace **ppace;
193
194 /* BB need to add parm so we can store the SID BB */
195
196 /* validate that we do not go past end of acl */
197 if (end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size)) {
198 cERROR(1, ("ACL too small to parse DACL"));
199 return;
200 }
201
202#ifdef CONFIG_CIFS_DEBUG2
203 cFYI(1, ("DACL revision %d size %d num aces %d",
204 le16_to_cpu(pdacl->revision), le16_to_cpu(pdacl->size),
205 le32_to_cpu(pdacl->num_aces)));
206#endif
207
208 acl_base = (char *)pdacl;
209 acl_size = sizeof(struct cifs_acl);
210
211 num_aces = le32_to_cpu(pdacl->num_aces);
212 if (num_aces > 0) {
213 ppntace = kmalloc(num_aces * sizeof(struct cifs_ntace *),
214 GFP_KERNEL);
215 ppace = kmalloc(num_aces * sizeof(struct cifs_ace *),
216 GFP_KERNEL);
217
218/* cifscred->cecount = pdacl->num_aces;
219 cifscred->ntaces = kmalloc(num_aces *
220 sizeof(struct cifs_ntace *), GFP_KERNEL);
221 cifscred->aces = kmalloc(num_aces *
222 sizeof(struct cifs_ace *), GFP_KERNEL);*/
223
224 for (i = 0; i < num_aces; ++i) {
225 ppntace[i] = (struct cifs_ntace *)
226 (acl_base + acl_size);
227 ppace[i] = (struct cifs_ace *) ((char *)ppntace[i] +
228 sizeof(struct cifs_ntace));
229
230 parse_ntace(ppntace[i], end_of_acl);
231 if (end_of_acl < ((char *)ppace[i] +
232 (le16_to_cpu(ppntace[i]->size) -
233 sizeof(struct cifs_ntace)))) {
234 cERROR(1, ("ACL too small to parse ACE"));
235 break;
236 } else
237 parse_ace(ppace[i], end_of_acl);
238
239/* memcpy((void *)(&(cifscred->ntaces[i])),
240 (void *)ppntace[i],
241 sizeof(struct cifs_ntace));
242 memcpy((void *)(&(cifscred->aces[i])),
243 (void *)ppace[i],
244 sizeof(struct cifs_ace)); */
245
246 acl_base = (char *)ppntace[i];
247 acl_size = le16_to_cpu(ppntace[i]->size);
248 }
249
250 kfree(ppace);
251 kfree(ppntace);
252 }
253
254 return;
255}
256
257
258static int parse_sid(struct cifs_sid *psid, char *end_of_acl)
259{
260
261 /* BB need to add parm so we can store the SID BB */
262
263 /* validate that we do not go past end of acl */
264 if (end_of_acl < (char *)psid + sizeof(struct cifs_sid)) {
265 cERROR(1, ("ACL too small to parse SID"));
266 return -EINVAL;
267 }
268
269 if (psid->num_subauth) {
270#ifdef CONFIG_CIFS_DEBUG2
271 int i;
272 cFYI(1, ("SID revision %d num_auth %d First subauth 0x%x",
273 psid->revision, psid->num_subauth, psid->sub_auth[0]));
274
275 for (i = 0; i < psid->num_subauth; i++) {
276 cFYI(1, ("SID sub_auth[%d]: 0x%x ", i,
277 le32_to_cpu(psid->sub_auth[i])));
278 }
279
280 /* BB add length check to make sure that we do not have huge
281 num auths and therefore go off the end */
282 cFYI(1, ("RID 0x%x",
283 le32_to_cpu(psid->sub_auth[psid->num_subauth-1])));
284#endif
285 }
286
287 return 0;
288}
289
290
291/* Convert CIFS ACL to POSIX form */
292int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len)
293{
294 int rc;
295 struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
296 struct cifs_acl *dacl_ptr; /* no need for SACL ptr */
297 char *end_of_acl = ((char *)pntsd) + acl_len;
298
299 owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
300 le32_to_cpu(pntsd->osidoffset));
301 group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
302 le32_to_cpu(pntsd->gsidoffset));
303 dacl_ptr = (struct cifs_acl *)((char *)pntsd +
304 le32_to_cpu(pntsd->dacloffset));
305#ifdef CONFIG_CIFS_DEBUG2
306 cFYI(1, ("revision %d type 0x%x ooffset 0x%x goffset 0x%x "
307 "sacloffset 0x%x dacloffset 0x%x",
308 pntsd->revision, pntsd->type, le32_to_cpu(pntsd->osidoffset),
309 le32_to_cpu(pntsd->gsidoffset),
310 le32_to_cpu(pntsd->sacloffset),
311 le32_to_cpu(pntsd->dacloffset)));
312#endif
313 rc = parse_sid(owner_sid_ptr, end_of_acl);
314 if (rc)
315 return rc;
316
317 rc = parse_sid(group_sid_ptr, end_of_acl);
318 if (rc)
319 return rc;
320
321 parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr, group_sid_ptr);
322
323/* cifscred->uid = owner_sid_ptr->rid;
324 cifscred->gid = group_sid_ptr->rid;
325 memcpy((void *)(&(cifscred->osid)), (void *)owner_sid_ptr,
326 sizeof (struct cifs_sid));
327 memcpy((void *)(&(cifscred->gsid)), (void *)group_sid_ptr,
328 sizeof (struct cifs_sid)); */
329
330
331 return (0);
332}
333#endif /* CONFIG_CIFS_EXPERIMENTAL */
diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h
index 5eff35d6e564..420f87813647 100644
--- a/fs/cifs/cifsacl.h
+++ b/fs/cifs/cifsacl.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/cifsacl.h 2 * fs/cifs/cifsacl.h
3 * 3 *
4 * Copyright (c) International Business Machines Corp., 2005 4 * Copyright (c) International Business Machines Corp., 2007
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * This library is free software; you can redistribute it and/or modify 7 * This library is free software; you can redistribute it and/or modify
@@ -22,17 +22,65 @@
22#ifndef _CIFSACL_H 22#ifndef _CIFSACL_H
23#define _CIFSACL_H 23#define _CIFSACL_H
24 24
25
26#define NUM_AUTHS 6 /* number of authority fields */
27#define NUM_SUBAUTHS 5 /* number of sub authority fields */
28#define NUM_WK_SIDS 7 /* number of well known sids */
29#define SIDNAMELENGTH 20 /* long enough for the ones we care about */
30
31#define READ_BIT 0x4
32#define WRITE_BIT 0x2
33#define EXEC_BIT 0x1
34
35#define UBITSHIFT 6
36#define GBITSHIFT 3
37
38struct cifs_ntsd {
39 __le16 revision; /* revision level */
40 __le16 type;
41 __le32 osidoffset;
42 __le32 gsidoffset;
43 __le32 sacloffset;
44 __le32 dacloffset;
45} __attribute__((packed));
46
25struct cifs_sid { 47struct cifs_sid {
26 __u8 revision; /* revision level */ 48 __u8 revision; /* revision level */
27 __u8 num_subauths; 49 __u8 num_subauth;
50 __u8 authority[6];
51 __le32 sub_auth[5]; /* sub_auth[num_subauth] */ /* BB FIXME endianness BB */
52} __attribute__((packed));
53
54struct cifs_acl {
55 __le16 revision; /* revision level */
56 __le16 size;
57 __le32 num_aces;
58} __attribute__((packed));
59
60struct cifs_ntace { /* first part of ACE which contains perms */
61 __u8 type;
62 __u8 flags;
63 __le16 size;
64 __le32 access_req;
65} __attribute__((packed));
66
67struct cifs_ace { /* last part of ACE which includes user info */
68 __u8 revision; /* revision level */
69 __u8 num_subauth;
28 __u8 authority[6]; 70 __u8 authority[6];
29 __u32 sub_auth[4]; 71 __le32 sub_auth[5];
30 /* next sub_auth if any ... */
31} __attribute__((packed)); 72} __attribute__((packed));
32 73
33/* everyone */ 74struct cifs_wksid {
34/* extern const struct cifs_sid sid_everyone;*/ 75 struct cifs_sid cifssid;
35/* group users */ 76 char sidname[SIDNAMELENGTH];
36/* extern const struct cifs_sid sid_user;*/ 77} __attribute__((packed));
78
79#ifdef CONFIG_CIFS_EXPERIMENTAL
80
81extern int match_sid(struct cifs_sid *);
82extern int compare_sids(struct cifs_sid *, struct cifs_sid *);
83
84#endif /* CONFIG_CIFS_EXPERIMENTAL */
37 85
38#endif /* _CIFSACL_H */ 86#endif /* _CIFSACL_H */
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 36272293027d..632070b4275d 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -345,7 +345,7 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
345 user = kmalloc(2 + (len * 2), GFP_KERNEL); 345 user = kmalloc(2 + (len * 2), GFP_KERNEL);
346 if (user == NULL) 346 if (user == NULL)
347 goto calc_exit_2; 347 goto calc_exit_2;
348 len = cifs_strtoUCS(user, ses->userName, len, nls_cp); 348 len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp);
349 UniStrupr(user); 349 UniStrupr(user);
350 hmac_md5_update((char *)user, 2*len, pctxt); 350 hmac_md5_update((char *)user, 2*len, pctxt);
351 351
@@ -356,7 +356,8 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
356 domain = kmalloc(2 + (len * 2), GFP_KERNEL); 356 domain = kmalloc(2 + (len * 2), GFP_KERNEL);
357 if (domain == NULL) 357 if (domain == NULL)
358 goto calc_exit_1; 358 goto calc_exit_1;
359 len = cifs_strtoUCS(domain, ses->domainName, len, nls_cp); 359 len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len,
360 nls_cp);
360 /* the following line was removed since it didn't work well 361 /* the following line was removed since it didn't work well
361 with lower cased domain name that passed as an option. 362 with lower cased domain name that passed as an option.
362 Maybe converting the domain name earlier makes sense */ 363 Maybe converting the domain name earlier makes sense */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index ba8f7868cb23..a6fbea57c4b1 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -49,10 +49,6 @@
49static struct quotactl_ops cifs_quotactl_ops; 49static struct quotactl_ops cifs_quotactl_ops;
50#endif /* QUOTA */ 50#endif /* QUOTA */
51 51
52#ifdef CONFIG_CIFS_EXPERIMENTAL
53extern struct export_operations cifs_export_ops;
54#endif /* EXPERIMENTAL */
55
56int cifsFYI = 0; 52int cifsFYI = 0;
57int cifsERROR = 1; 53int cifsERROR = 1;
58int traceSMB = 0; 54int traceSMB = 0;
@@ -240,9 +236,9 @@ static int cifs_permission(struct inode *inode, int mask, struct nameidata *nd)
240 236
241 cifs_sb = CIFS_SB(inode->i_sb); 237 cifs_sb = CIFS_SB(inode->i_sb);
242 238
243 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) { 239 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
244 return 0; 240 return 0;
245 } else /* file mode might have been restricted at mount time 241 else /* file mode might have been restricted at mount time
246 on the client (above and beyond ACL on servers) for 242 on the client (above and beyond ACL on servers) for
247 servers which do not support setting and viewing mode bits, 243 servers which do not support setting and viewing mode bits,
248 so allowing client to check permissions is useful */ 244 so allowing client to check permissions is useful */
@@ -312,15 +308,15 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
312 seq_printf(s, ",domain=%s", 308 seq_printf(s, ",domain=%s",
313 cifs_sb->tcon->ses->domainName); 309 cifs_sb->tcon->ses->domainName);
314 } 310 }
311 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) ||
312 !(cifs_sb->tcon->unix_ext))
313 seq_printf(s, ",uid=%d", cifs_sb->mnt_uid);
314 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) ||
315 !(cifs_sb->tcon->unix_ext))
316 seq_printf(s, ",gid=%d", cifs_sb->mnt_gid);
315 } 317 }
316 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) 318 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
317 seq_printf(s, ",posixpaths"); 319 seq_printf(s, ",posixpaths");
318 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) ||
319 !(cifs_sb->tcon->unix_ext))
320 seq_printf(s, ",uid=%d", cifs_sb->mnt_uid);
321 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) ||
322 !(cifs_sb->tcon->unix_ext))
323 seq_printf(s, ",gid=%d", cifs_sb->mnt_gid);
324 seq_printf(s, ",rsize=%d", cifs_sb->rsize); 320 seq_printf(s, ",rsize=%d", cifs_sb->rsize);
325 seq_printf(s, ",wsize=%d", cifs_sb->wsize); 321 seq_printf(s, ",wsize=%d", cifs_sb->wsize);
326 } 322 }
@@ -346,7 +342,7 @@ int cifs_xquota_set(struct super_block *sb, int quota_type, qid_t qid,
346 if (pTcon) { 342 if (pTcon) {
347 cFYI(1, ("set type: 0x%x id: %d", quota_type, qid)); 343 cFYI(1, ("set type: 0x%x id: %d", quota_type, qid));
348 } else { 344 } else {
349 return -EIO; 345 rc = -EIO;
350 } 346 }
351 347
352 FreeXid(xid); 348 FreeXid(xid);
@@ -716,7 +712,7 @@ static int
716cifs_init_inodecache(void) 712cifs_init_inodecache(void)
717{ 713{
718 cifs_inode_cachep = kmem_cache_create("cifs_inode_cache", 714 cifs_inode_cachep = kmem_cache_create("cifs_inode_cache",
719 sizeof (struct cifsInodeInfo), 715 sizeof(struct cifsInodeInfo),
720 0, (SLAB_RECLAIM_ACCOUNT| 716 0, (SLAB_RECLAIM_ACCOUNT|
721 SLAB_MEM_SPREAD), 717 SLAB_MEM_SPREAD),
722 cifs_init_once); 718 cifs_init_once);
@@ -816,8 +812,8 @@ static int
816cifs_init_mids(void) 812cifs_init_mids(void)
817{ 813{
818 cifs_mid_cachep = kmem_cache_create("cifs_mpx_ids", 814 cifs_mid_cachep = kmem_cache_create("cifs_mpx_ids",
819 sizeof (struct mid_q_entry), 0, 815 sizeof(struct mid_q_entry), 0,
820 SLAB_HWCACHE_ALIGN, NULL); 816 SLAB_HWCACHE_ALIGN, NULL);
821 if (cifs_mid_cachep == NULL) 817 if (cifs_mid_cachep == NULL)
822 return -ENOMEM; 818 return -ENOMEM;
823 819
@@ -829,8 +825,8 @@ cifs_init_mids(void)
829 } 825 }
830 826
831 cifs_oplock_cachep = kmem_cache_create("cifs_oplock_structs", 827 cifs_oplock_cachep = kmem_cache_create("cifs_oplock_structs",
832 sizeof (struct oplock_q_entry), 0, 828 sizeof(struct oplock_q_entry), 0,
833 SLAB_HWCACHE_ALIGN, NULL); 829 SLAB_HWCACHE_ALIGN, NULL);
834 if (cifs_oplock_cachep == NULL) { 830 if (cifs_oplock_cachep == NULL) {
835 mempool_destroy(cifs_mid_poolp); 831 mempool_destroy(cifs_mid_poolp);
836 kmem_cache_destroy(cifs_mid_cachep); 832 kmem_cache_destroy(cifs_mid_cachep);
@@ -882,7 +878,8 @@ static int cifs_oplock_thread(void *dummyarg)
882 the call */ 878 the call */
883 /* mutex_lock(&inode->i_mutex);*/ 879 /* mutex_lock(&inode->i_mutex);*/
884 if (S_ISREG(inode->i_mode)) { 880 if (S_ISREG(inode->i_mode)) {
885 rc = filemap_fdatawrite(inode->i_mapping); 881 rc =
882 filemap_fdatawrite(inode->i_mapping);
886 if (CIFS_I(inode)->clientCanCacheRead 883 if (CIFS_I(inode)->clientCanCacheRead
887 == 0) { 884 == 0) {
888 filemap_fdatawait(inode->i_mapping); 885 filemap_fdatawait(inode->i_mapping);
@@ -907,8 +904,7 @@ static int cifs_oplock_thread(void *dummyarg)
907 0 /* len */ , 0 /* offset */, 0, 904 0 /* len */ , 0 /* offset */, 0,
908 0, LOCKING_ANDX_OPLOCK_RELEASE, 905 0, LOCKING_ANDX_OPLOCK_RELEASE,
909 0 /* wait flag */); 906 0 /* wait flag */);
910 cFYI(1, 907 cFYI(1, ("Oplock release rc = %d", rc));
911 ("Oplock release rc = %d ", rc));
912 } 908 }
913 } else 909 } else
914 spin_unlock(&GlobalMid_Lock); 910 spin_unlock(&GlobalMid_Lock);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index a20de77a3856..0a3ee5a322b0 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/cifsfs.h 2 * fs/cifs/cifsfs.h
3 * 3 *
4 * Copyright (c) International Business Machines Corp., 2002, 2005 4 * Copyright (c) International Business Machines Corp., 2002, 2007
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * This library is free software; you can redistribute it and/or modify 7 * This library is free software; you can redistribute it and/or modify
@@ -99,7 +99,12 @@ extern int cifs_setxattr(struct dentry *, const char *, const void *,
99 size_t, int); 99 size_t, int);
100extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t); 100extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t);
101extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); 101extern ssize_t cifs_listxattr(struct dentry *, char *, size_t);
102extern int cifs_ioctl (struct inode *inode, struct file *filep, 102extern int cifs_ioctl(struct inode *inode, struct file *filep,
103 unsigned int command, unsigned long arg); 103 unsigned int command, unsigned long arg);
104#define CIFS_VERSION "1.50" 104
105#ifdef CONFIG_CIFS_EXPERIMENTAL
106extern struct export_operations cifs_export_ops;
107#endif /* EXPERIMENTAL */
108
109#define CIFS_VERSION "1.51"
105#endif /* _CIFSFS_H */ 110#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index b98742fc3b5a..87f51f23276f 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -19,6 +19,7 @@
19#include <linux/in.h> 19#include <linux/in.h>
20#include <linux/in6.h> 20#include <linux/in6.h>
21#include "cifs_fs_sb.h" 21#include "cifs_fs_sb.h"
22#include "cifsacl.h"
22/* 23/*
23 * The sizes of various internal tables and strings 24 * The sizes of various internal tables and strings
24 */ 25 */
@@ -89,7 +90,8 @@ enum statusEnum {
89}; 90};
90 91
91enum securityEnum { 92enum securityEnum {
92 LANMAN = 0, /* Legacy LANMAN auth */ 93 PLAINTXT = 0, /* Legacy with Plaintext passwords */
94 LANMAN, /* Legacy LANMAN auth */
93 NTLM, /* Legacy NTLM012 auth with NTLM hash */ 95 NTLM, /* Legacy NTLM012 auth with NTLM hash */
94 NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ 96 NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */
95 RawNTLMSSP, /* NTLMSSP without SPNEGO */ 97 RawNTLMSSP, /* NTLMSSP without SPNEGO */
@@ -115,6 +117,17 @@ struct mac_key {
115 } data; 117 } data;
116}; 118};
117 119
120struct cifs_cred {
121 int uid;
122 int gid;
123 int mode;
124 int cecount;
125 struct cifs_sid osid;
126 struct cifs_sid gsid;
127 struct cifs_ntace *ntaces;
128 struct cifs_ace *aces;
129};
130
118/* 131/*
119 ***************************************************************** 132 *****************************************************************
120 * Except the CIFS PDUs themselves all the 133 * Except the CIFS PDUs themselves all the
@@ -279,6 +292,7 @@ struct cifsTconInfo {
279 FILE_SYSTEM_DEVICE_INFO fsDevInfo; 292 FILE_SYSTEM_DEVICE_INFO fsDevInfo;
280 FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */ 293 FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */
281 FILE_SYSTEM_UNIX_INFO fsUnixInfo; 294 FILE_SYSTEM_UNIX_INFO fsUnixInfo;
295 unsigned ipc:1; /* set if connection to IPC$ eg for RPC/PIPES */
282 unsigned retry:1; 296 unsigned retry:1;
283 unsigned nocase:1; 297 unsigned nocase:1;
284 unsigned unix_ext:1; /* if off disable Linux extensions to CIFS protocol 298 unsigned unix_ext:1; /* if off disable Linux extensions to CIFS protocol
@@ -329,6 +343,7 @@ struct cifsFileInfo {
329 struct list_head llist; /* list of byte range locks we have. */ 343 struct list_head llist; /* list of byte range locks we have. */
330 unsigned closePend:1; /* file is marked to close */ 344 unsigned closePend:1; /* file is marked to close */
331 unsigned invalidHandle:1; /* file closed via session abend */ 345 unsigned invalidHandle:1; /* file closed via session abend */
346 unsigned messageMode:1; /* for pipes: message vs byte mode */
332 atomic_t wrtPending; /* handle in use - defer close */ 347 atomic_t wrtPending; /* handle in use - defer close */
333 struct semaphore fh_sem; /* prevents reopen race after dead ses*/ 348 struct semaphore fh_sem; /* prevents reopen race after dead ses*/
334 char *search_resume_name; /* BB removeme BB */ 349 char *search_resume_name; /* BB removeme BB */
@@ -464,6 +479,9 @@ struct dir_notify_req {
464#ifdef CONFIG_CIFS_WEAK_PW_HASH 479#ifdef CONFIG_CIFS_WEAK_PW_HASH
465#define CIFSSEC_MAY_LANMAN 0x00010 480#define CIFSSEC_MAY_LANMAN 0x00010
466#define CIFSSEC_MAY_PLNTXT 0x00020 481#define CIFSSEC_MAY_PLNTXT 0x00020
482#else
483#define CIFSSEC_MAY_LANMAN 0
484#define CIFSSEC_MAY_PLNTXT 0
467#endif /* weak passwords */ 485#endif /* weak passwords */
468#define CIFSSEC_MAY_SEAL 0x00040 /* not supported yet */ 486#define CIFSSEC_MAY_SEAL 0x00040 /* not supported yet */
469 487
@@ -477,14 +495,23 @@ require use of the stronger protocol */
477#ifdef CONFIG_CIFS_WEAK_PW_HASH 495#ifdef CONFIG_CIFS_WEAK_PW_HASH
478#define CIFSSEC_MUST_LANMAN 0x10010 496#define CIFSSEC_MUST_LANMAN 0x10010
479#define CIFSSEC_MUST_PLNTXT 0x20020 497#define CIFSSEC_MUST_PLNTXT 0x20020
498#ifdef CONFIG_CIFS_UPCALL
499#define CIFSSEC_MASK 0x3F03F /* allows weak security but also krb5 */
500#else
480#define CIFSSEC_MASK 0x37037 /* current flags supported if weak */ 501#define CIFSSEC_MASK 0x37037 /* current flags supported if weak */
502#endif /* UPCALL */
503#else /* do not allow weak pw hash */
504#ifdef CONFIG_CIFS_UPCALL
505#define CIFSSEC_MASK 0x0F00F /* flags supported if no weak allowed */
481#else 506#else
482#define CIFSSEC_MASK 0x07007 /* flags supported if no weak config */ 507#define CIFSSEC_MASK 0x07007 /* flags supported if no weak allowed */
508#endif /* UPCALL */
483#endif /* WEAK_PW_HASH */ 509#endif /* WEAK_PW_HASH */
484#define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */ 510#define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */
485 511
486#define CIFSSEC_DEF CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 512#define CIFSSEC_DEF CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2
487#define CIFSSEC_MAX CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2 513#define CIFSSEC_MAX CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2
514#define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5)
488/* 515/*
489 ***************************************************************** 516 *****************************************************************
490 * All constants go here 517 * All constants go here
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 6a2056e58ceb..c41ff74e9128 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -215,6 +215,12 @@
215 /* file_execute, file_read_attributes*/ 215 /* file_execute, file_read_attributes*/
216 /* write_dac, and delete. */ 216 /* write_dac, and delete. */
217 217
218#define FILE_READ_RIGHTS (FILE_READ_DATA | FILE_READ_EA | FILE_READ_ATTRIBUTES)
219#define FILE_WRITE_RIGHTS (FILE_WRITE_DATA | FILE_APPEND_DATA \
220 | FILE_WRITE_EA | FILE_WRITE_ATTRIBUTES)
221#define FILE_EXEC_RIGHTS (FILE_EXECUTE)
222
223
218/* 224/*
219 * Invalid readdir handle 225 * Invalid readdir handle
220 */ 226 */
@@ -360,10 +366,10 @@ struct smb_hdr {
360 __u8 WordCount; 366 __u8 WordCount;
361} __attribute__((packed)); 367} __attribute__((packed));
362/* given a pointer to an smb_hdr retrieve the value of byte count */ 368/* given a pointer to an smb_hdr retrieve the value of byte count */
363#define BCC(smb_var) ( *(__u16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2* smb_var->WordCount) ) ) 369#define BCC(smb_var) ( *(__u16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount)))
364#define BCC_LE(smb_var) ( *(__le16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2* smb_var->WordCount) ) ) 370#define BCC_LE(smb_var) ( *(__le16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount)))
365/* given a pointer to an smb_hdr retrieve the pointer to the byte area */ 371/* given a pointer to an smb_hdr retrieve the pointer to the byte area */
366#define pByteArea(smb_var) ((unsigned char *)smb_var + sizeof(struct smb_hdr) + (2* smb_var->WordCount) + 2 ) 372#define pByteArea(smb_var) ((unsigned char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount) + 2)
367 373
368/* 374/*
369 * Computer Name Length (since Netbios name was length 16 with last byte 0x20) 375 * Computer Name Length (since Netbios name was length 16 with last byte 0x20)
@@ -716,6 +722,14 @@ typedef struct smb_com_findclose_req {
716#define REQ_OPENDIRONLY 0x00000008 722#define REQ_OPENDIRONLY 0x00000008
717#define REQ_EXTENDED_INFO 0x00000010 723#define REQ_EXTENDED_INFO 0x00000010
718 724
725/* File type */
726#define DISK_TYPE 0x0000
727#define BYTE_PIPE_TYPE 0x0001
728#define MESSAGE_PIPE_TYPE 0x0002
729#define PRINTER_TYPE 0x0003
730#define COMM_DEV_TYPE 0x0004
731#define UNKNOWN_TYPE 0xFFFF
732
719typedef struct smb_com_open_req { /* also handles create */ 733typedef struct smb_com_open_req { /* also handles create */
720 struct smb_hdr hdr; /* wct = 24 */ 734 struct smb_hdr hdr; /* wct = 24 */
721 __u8 AndXCommand; 735 __u8 AndXCommand;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 04a69dafedba..1a883663b22d 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -50,7 +50,8 @@ extern int SendReceive(const unsigned int /* xid */ , struct cifsSesInfo *,
50 int * /* bytes returned */ , const int long_op); 50 int * /* bytes returned */ , const int long_op);
51extern int SendReceive2(const unsigned int /* xid */ , struct cifsSesInfo *, 51extern int SendReceive2(const unsigned int /* xid */ , struct cifsSesInfo *,
52 struct kvec *, int /* nvec to send */, 52 struct kvec *, int /* nvec to send */,
53 int * /* type of buf returned */ , const int long_op); 53 int * /* type of buf returned */ , const int long_op,
54 const int logError /* whether to log status code*/ );
54extern int SendReceiveBlockingLock(const unsigned int /* xid */ , 55extern int SendReceiveBlockingLock(const unsigned int /* xid */ ,
55 struct cifsTconInfo *, 56 struct cifsTconInfo *,
56 struct smb_hdr * /* input */ , 57 struct smb_hdr * /* input */ ,
@@ -65,7 +66,7 @@ extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
65extern int decode_negTokenInit(unsigned char *security_blob, int length, 66extern int decode_negTokenInit(unsigned char *security_blob, int length,
66 enum securityEnum *secType); 67 enum securityEnum *secType);
67extern int cifs_inet_pton(int, char *source, void *dst); 68extern int cifs_inet_pton(int, char *source, void *dst);
68extern int map_smb_to_linux_error(struct smb_hdr *smb); 69extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr);
69extern void header_assemble(struct smb_hdr *, char /* command */ , 70extern void header_assemble(struct smb_hdr *, char /* command */ ,
70 const struct cifsTconInfo *, int /* length of 71 const struct cifsTconInfo *, int /* length of
71 fixed section (word count) in two byte units */); 72 fixed section (word count) in two byte units */);
@@ -304,12 +305,13 @@ extern int cifs_calculate_mac_key(struct mac_key *key, const char *rn,
304 const char *pass); 305 const char *pass);
305extern int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *, 306extern int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *,
306 const struct nls_table *); 307 const struct nls_table *);
307extern void CalcNTLMv2_response(const struct cifsSesInfo *, char * ); 308extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *);
308extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *, 309extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *,
309 const struct nls_table *); 310 const struct nls_table *);
310#ifdef CONFIG_CIFS_WEAK_PW_HASH 311#ifdef CONFIG_CIFS_WEAK_PW_HASH
311extern void calc_lanman_hash(struct cifsSesInfo *ses, char *lnm_session_key); 312extern void calc_lanman_hash(struct cifsSesInfo *ses, char *lnm_session_key);
312#endif /* CIFS_WEAK_PW_HASH */ 313#endif /* CIFS_WEAK_PW_HASH */
314extern int parse_sec_desc(struct cifs_ntsd *, int);
313extern int CIFSSMBCopy(int xid, 315extern int CIFSSMBCopy(int xid,
314 struct cifsTconInfo *source_tcon, 316 struct cifsTconInfo *source_tcon,
315 const char *fromName, 317 const char *fromName,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 8eb102f940d4..f0d9a485d095 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -34,10 +34,10 @@
34#include <asm/uaccess.h> 34#include <asm/uaccess.h>
35#include "cifspdu.h" 35#include "cifspdu.h"
36#include "cifsglob.h" 36#include "cifsglob.h"
37#include "cifsacl.h"
37#include "cifsproto.h" 38#include "cifsproto.h"
38#include "cifs_unicode.h" 39#include "cifs_unicode.h"
39#include "cifs_debug.h" 40#include "cifs_debug.h"
40#include "cifsacl.h"
41 41
42#ifdef CONFIG_CIFS_POSIX 42#ifdef CONFIG_CIFS_POSIX
43static struct { 43static struct {
@@ -94,9 +94,8 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon)
94 write_lock(&GlobalSMBSeslock); 94 write_lock(&GlobalSMBSeslock);
95 list_for_each_safe(tmp, tmp1, &pTcon->openFileList) { 95 list_for_each_safe(tmp, tmp1, &pTcon->openFileList) {
96 open_file = list_entry(tmp, struct cifsFileInfo, tlist); 96 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
97 if (open_file) { 97 if (open_file)
98 open_file->invalidHandle = TRUE; 98 open_file->invalidHandle = TRUE;
99 }
100 } 99 }
101 write_unlock(&GlobalSMBSeslock); 100 write_unlock(&GlobalSMBSeslock);
102 /* BB Add call to invalidate_inodes(sb) for all superblocks mounted 101 /* BB Add call to invalidate_inodes(sb) for all superblocks mounted
@@ -439,8 +438,13 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
439 438
440 pSMB->hdr.Mid = GetNextMid(server); 439 pSMB->hdr.Mid = GetNextMid(server);
441 pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS); 440 pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS);
441
442 if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5) 442 if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5)
443 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; 443 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
444 else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_KRB5) {
445 cFYI(1, ("Kerberos only mechanism, enable extended security"));
446 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
447 }
444 448
445 count = 0; 449 count = 0;
446 for (i = 0; i < CIFS_NUM_PROT; i++) { 450 for (i = 0; i < CIFS_NUM_PROT; i++) {
@@ -513,7 +517,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
513 (int)ts.tv_sec, (int)utc.tv_sec, 517 (int)ts.tv_sec, (int)utc.tv_sec,
514 (int)(utc.tv_sec - ts.tv_sec))); 518 (int)(utc.tv_sec - ts.tv_sec)));
515 val = (int)(utc.tv_sec - ts.tv_sec); 519 val = (int)(utc.tv_sec - ts.tv_sec);
516 seconds = val < 0 ? -val : val; 520 seconds = abs(val);
517 result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ; 521 result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ;
518 remain = seconds % MIN_TZ_ADJ; 522 remain = seconds % MIN_TZ_ADJ;
519 if (remain >= (MIN_TZ_ADJ / 2)) 523 if (remain >= (MIN_TZ_ADJ / 2))
@@ -574,7 +578,20 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
574 server->secType = NTLM; 578 server->secType = NTLM;
575 else if (secFlags & CIFSSEC_MAY_NTLMV2) 579 else if (secFlags & CIFSSEC_MAY_NTLMV2)
576 server->secType = NTLMv2; 580 server->secType = NTLMv2;
577 /* else krb5 ... any others ... */ 581 else if (secFlags & CIFSSEC_MAY_KRB5)
582 server->secType = Kerberos;
583 else if (secFlags & CIFSSEC_MAY_LANMAN)
584 server->secType = LANMAN;
585/* #ifdef CONFIG_CIFS_EXPERIMENTAL
586 else if (secFlags & CIFSSEC_MAY_PLNTXT)
587 server->secType = ??
588#endif */
589 else {
590 rc = -EOPNOTSUPP;
591 cERROR(1, ("Invalid security type"));
592 goto neg_err_exit;
593 }
594 /* else ... any others ...? */
578 595
579 /* one byte, so no need to convert this or EncryptionKeyLen from 596 /* one byte, so no need to convert this or EncryptionKeyLen from
580 little endian */ 597 little endian */
@@ -604,22 +621,26 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
604 if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) && 621 if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) &&
605 (server->capabilities & CAP_EXTENDED_SECURITY)) { 622 (server->capabilities & CAP_EXTENDED_SECURITY)) {
606 count = pSMBr->ByteCount; 623 count = pSMBr->ByteCount;
607 if (count < 16) 624 if (count < 16) {
608 rc = -EIO; 625 rc = -EIO;
609 else if (count == 16) { 626 goto neg_err_exit;
610 server->secType = RawNTLMSSP; 627 }
611 if (server->socketUseCount.counter > 1) { 628
612 if (memcmp(server->server_GUID, 629 if (server->socketUseCount.counter > 1) {
613 pSMBr->u.extended_response. 630 if (memcmp(server->server_GUID,
614 GUID, 16) != 0) { 631 pSMBr->u.extended_response.
615 cFYI(1, ("server UID changed")); 632 GUID, 16) != 0) {
616 memcpy(server->server_GUID, 633 cFYI(1, ("server UID changed"));
617 pSMBr->u.extended_response.GUID,
618 16);
619 }
620 } else
621 memcpy(server->server_GUID, 634 memcpy(server->server_GUID,
622 pSMBr->u.extended_response.GUID, 16); 635 pSMBr->u.extended_response.GUID,
636 16);
637 }
638 } else
639 memcpy(server->server_GUID,
640 pSMBr->u.extended_response.GUID, 16);
641
642 if (count == 16) {
643 server->secType = RawNTLMSSP;
623 } else { 644 } else {
624 rc = decode_negTokenInit(pSMBr->u.extended_response. 645 rc = decode_negTokenInit(pSMBr->u.extended_response.
625 SecurityBlob, 646 SecurityBlob,
@@ -642,10 +663,12 @@ signing_check:
642 /* MUST_SIGN already includes the MAY_SIGN FLAG 663 /* MUST_SIGN already includes the MAY_SIGN FLAG
643 so if this is zero it means that signing is disabled */ 664 so if this is zero it means that signing is disabled */
644 cFYI(1, ("Signing disabled")); 665 cFYI(1, ("Signing disabled"));
645 if (server->secMode & SECMODE_SIGN_REQUIRED) 666 if (server->secMode & SECMODE_SIGN_REQUIRED) {
646 cERROR(1, ("Server requires " 667 cERROR(1, ("Server requires "
647 "/proc/fs/cifs/PacketSigningEnabled " 668 "packet signing to be enabled in "
648 "to be on")); 669 "/proc/fs/cifs/SecurityFlags."));
670 rc = -EOPNOTSUPP;
671 }
649 server->secMode &= 672 server->secMode &=
650 ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); 673 ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
651 } else if ((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) { 674 } else if ((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) {
@@ -1052,7 +1075,7 @@ PsxCreat:
1052 InformationLevel) - 4; 1075 InformationLevel) - 4;
1053 offset = param_offset + params; 1076 offset = param_offset + params;
1054 pdata = (OPEN_PSX_REQ *)(((char *)&pSMB->hdr.Protocol) + offset); 1077 pdata = (OPEN_PSX_REQ *)(((char *)&pSMB->hdr.Protocol) + offset);
1055 pdata->Level = SMB_QUERY_FILE_UNIX_BASIC; 1078 pdata->Level = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC);
1056 pdata->Permissions = cpu_to_le64(mode); 1079 pdata->Permissions = cpu_to_le64(mode);
1057 pdata->PosixOpenFlags = cpu_to_le32(posix_flags); 1080 pdata->PosixOpenFlags = cpu_to_le32(posix_flags);
1058 pdata->OpenFlags = cpu_to_le32(*pOplock); 1081 pdata->OpenFlags = cpu_to_le32(*pOplock);
@@ -1098,8 +1121,8 @@ PsxCreat:
1098 if (cpu_to_le32(FILE_CREATE) == psx_rsp->CreateAction) 1121 if (cpu_to_le32(FILE_CREATE) == psx_rsp->CreateAction)
1099 *pOplock |= CIFS_CREATE_ACTION; 1122 *pOplock |= CIFS_CREATE_ACTION;
1100 /* check to make sure response data is there */ 1123 /* check to make sure response data is there */
1101 if (psx_rsp->ReturnedLevel != SMB_QUERY_FILE_UNIX_BASIC) { 1124 if (psx_rsp->ReturnedLevel != cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC)) {
1102 pRetData->Type = -1; /* unknown */ 1125 pRetData->Type = cpu_to_le32(-1); /* unknown */
1103#ifdef CONFIG_CIFS_DEBUG2 1126#ifdef CONFIG_CIFS_DEBUG2
1104 cFYI(1, ("unknown type")); 1127 cFYI(1, ("unknown type"));
1105#endif 1128#endif
@@ -1107,12 +1130,12 @@ PsxCreat:
1107 if (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP) 1130 if (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP)
1108 + sizeof(FILE_UNIX_BASIC_INFO)) { 1131 + sizeof(FILE_UNIX_BASIC_INFO)) {
1109 cERROR(1, ("Open response data too small")); 1132 cERROR(1, ("Open response data too small"));
1110 pRetData->Type = -1; 1133 pRetData->Type = cpu_to_le32(-1);
1111 goto psx_create_err; 1134 goto psx_create_err;
1112 } 1135 }
1113 memcpy((char *) pRetData, 1136 memcpy((char *) pRetData,
1114 (char *)psx_rsp + sizeof(OPEN_PSX_RSP), 1137 (char *)psx_rsp + sizeof(OPEN_PSX_RSP),
1115 sizeof (FILE_UNIX_BASIC_INFO)); 1138 sizeof(FILE_UNIX_BASIC_INFO));
1116 } 1139 }
1117 1140
1118psx_create_err: 1141psx_create_err:
@@ -1193,9 +1216,9 @@ OldOpenRetry:
1193 } 1216 }
1194 if (*pOplock & REQ_OPLOCK) 1217 if (*pOplock & REQ_OPLOCK)
1195 pSMB->OpenFlags = cpu_to_le16(REQ_OPLOCK); 1218 pSMB->OpenFlags = cpu_to_le16(REQ_OPLOCK);
1196 else if (*pOplock & REQ_BATCHOPLOCK) { 1219 else if (*pOplock & REQ_BATCHOPLOCK)
1197 pSMB->OpenFlags = cpu_to_le16(REQ_BATCHOPLOCK); 1220 pSMB->OpenFlags = cpu_to_le16(REQ_BATCHOPLOCK);
1198 } 1221
1199 pSMB->OpenFlags |= cpu_to_le16(REQ_MORE_INFO); 1222 pSMB->OpenFlags |= cpu_to_le16(REQ_MORE_INFO);
1200 /* BB fixme add conversion for access_flags to bits 0 - 2 of mode */ 1223 /* BB fixme add conversion for access_flags to bits 0 - 2 of mode */
1201 /* 0 = read 1224 /* 0 = read
@@ -1310,9 +1333,8 @@ openRetry:
1310 } 1333 }
1311 if (*pOplock & REQ_OPLOCK) 1334 if (*pOplock & REQ_OPLOCK)
1312 pSMB->OpenFlags = cpu_to_le32(REQ_OPLOCK); 1335 pSMB->OpenFlags = cpu_to_le32(REQ_OPLOCK);
1313 else if (*pOplock & REQ_BATCHOPLOCK) { 1336 else if (*pOplock & REQ_BATCHOPLOCK)
1314 pSMB->OpenFlags = cpu_to_le32(REQ_BATCHOPLOCK); 1337 pSMB->OpenFlags = cpu_to_le32(REQ_BATCHOPLOCK);
1315 }
1316 pSMB->DesiredAccess = cpu_to_le32(access_flags); 1338 pSMB->DesiredAccess = cpu_to_le32(access_flags);
1317 pSMB->AllocationSize = 0; 1339 pSMB->AllocationSize = 0;
1318 /* set file as system file if special file such 1340 /* set file as system file if special file such
@@ -1424,9 +1446,8 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
1424 1446
1425 iov[0].iov_base = (char *)pSMB; 1447 iov[0].iov_base = (char *)pSMB;
1426 iov[0].iov_len = pSMB->hdr.smb_buf_length + 4; 1448 iov[0].iov_len = pSMB->hdr.smb_buf_length + 4;
1427 rc = SendReceive2(xid, tcon->ses, iov, 1449 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */,
1428 1 /* num iovecs */, 1450 &resp_buf_type, 0 /* not long op */, 1 /* log err */ );
1429 &resp_buf_type, 0);
1430 cifs_stats_inc(&tcon->num_reads); 1451 cifs_stats_inc(&tcon->num_reads);
1431 pSMBr = (READ_RSP *)iov[0].iov_base; 1452 pSMBr = (READ_RSP *)iov[0].iov_base;
1432 if (rc) { 1453 if (rc) {
@@ -1446,11 +1467,11 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
1446 *nbytes = 0; 1467 *nbytes = 0;
1447 } else { 1468 } else {
1448 pReadData = (char *) (&pSMBr->hdr.Protocol) + 1469 pReadData = (char *) (&pSMBr->hdr.Protocol) +
1449 le16_to_cpu(pSMBr->DataOffset); 1470 le16_to_cpu(pSMBr->DataOffset);
1450/* if (rc = copy_to_user(buf, pReadData, data_length)) { 1471/* if (rc = copy_to_user(buf, pReadData, data_length)) {
1451 cERROR(1,("Faulting on read rc = %d",rc)); 1472 cERROR(1,("Faulting on read rc = %d",rc));
1452 rc = -EFAULT; 1473 rc = -EFAULT;
1453 }*/ /* can not use copy_to_user when using page cache*/ 1474 }*/ /* can not use copy_to_user when using page cache*/
1454 if (*buf) 1475 if (*buf)
1455 memcpy(*buf, pReadData, data_length); 1476 memcpy(*buf, pReadData, data_length);
1456 } 1477 }
@@ -1645,7 +1666,7 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
1645 1666
1646 1667
1647 rc = SendReceive2(xid, tcon->ses, iov, n_vec + 1, &resp_buf_type, 1668 rc = SendReceive2(xid, tcon->ses, iov, n_vec + 1, &resp_buf_type,
1648 long_op); 1669 long_op, 0 /* do not log STATUS code */ );
1649 cifs_stats_inc(&tcon->num_writes); 1670 cifs_stats_inc(&tcon->num_writes);
1650 if (rc) { 1671 if (rc) {
1651 cFYI(1, ("Send error Write2 = %d", rc)); 1672 cFYI(1, ("Send error Write2 = %d", rc));
@@ -2538,7 +2559,7 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
2538 cFYI(1, ("data starts after end of smb")); 2559 cFYI(1, ("data starts after end of smb"));
2539 return -EINVAL; 2560 return -EINVAL;
2540 } else if (data_count + *ppdata > end_of_smb) { 2561 } else if (data_count + *ppdata > end_of_smb) {
2541 cFYI(1,("data %p + count %d (%p) ends after end of smb %p start %p", 2562 cFYI(1, ("data %p + count %d (%p) ends after end of smb %p start %p",
2542 *ppdata, data_count, (data_count + *ppdata), 2563 *ppdata, data_count, (data_count + *ppdata),
2543 end_of_smb, pSMBr)); 2564 end_of_smb, pSMBr));
2544 return -EINVAL; 2565 return -EINVAL;
@@ -2615,7 +2636,7 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
2615 reparse_buf->TargetNameOffset + 2636 reparse_buf->TargetNameOffset +
2616 reparse_buf->TargetNameLen) > 2637 reparse_buf->TargetNameLen) >
2617 end_of_smb) { 2638 end_of_smb) {
2618 cFYI(1,("reparse buf goes beyond SMB")); 2639 cFYI(1, ("reparse buf beyond SMB"));
2619 rc = -EIO; 2640 rc = -EIO;
2620 goto qreparse_out; 2641 goto qreparse_out;
2621 } 2642 }
@@ -3042,25 +3063,12 @@ GetExtAttrOut:
3042 3063
3043#endif /* CONFIG_POSIX */ 3064#endif /* CONFIG_POSIX */
3044 3065
3045 3066#ifdef CONFIG_CIFS_EXPERIMENTAL
3046/* security id for everyone */
3047static const struct cifs_sid sid_everyone =
3048 {1, 1, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0}};
3049/* group users */
3050static const struct cifs_sid sid_user =
3051 {1, 2 , {0, 0, 0, 0, 0, 5}, {32, 545, 0, 0}};
3052
3053/* Convert CIFS ACL to POSIX form */
3054static int parse_sec_desc(struct cifs_sid *psec_desc, int acl_len)
3055{
3056 return 0;
3057}
3058
3059/* Get Security Descriptor (by handle) from remote server for a file or dir */ 3067/* Get Security Descriptor (by handle) from remote server for a file or dir */
3060int 3068int
3061CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid, 3069CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
3062 /* BB fix up return info */ char *acl_inf, const int buflen, 3070 /* BB fix up return info */ char *acl_inf, const int buflen,
3063 const int acl_type /* ACCESS/DEFAULT not sure implication */) 3071 const int acl_type)
3064{ 3072{
3065 int rc = 0; 3073 int rc = 0;
3066 int buf_type = 0; 3074 int buf_type = 0;
@@ -3085,12 +3093,13 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
3085 iov[0].iov_base = (char *)pSMB; 3093 iov[0].iov_base = (char *)pSMB;
3086 iov[0].iov_len = pSMB->hdr.smb_buf_length + 4; 3094 iov[0].iov_len = pSMB->hdr.smb_buf_length + 4;
3087 3095
3088 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type, 0); 3096 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type,
3097 0 /* not long op */, 0 /* do not log STATUS codes */ );
3089 cifs_stats_inc(&tcon->num_acl_get); 3098 cifs_stats_inc(&tcon->num_acl_get);
3090 if (rc) { 3099 if (rc) {
3091 cFYI(1, ("Send error in QuerySecDesc = %d", rc)); 3100 cFYI(1, ("Send error in QuerySecDesc = %d", rc));
3092 } else { /* decode response */ 3101 } else { /* decode response */
3093 struct cifs_sid *psec_desc; 3102 struct cifs_ntsd *psec_desc;
3094 __le32 * parm; 3103 __le32 * parm;
3095 int parm_len; 3104 int parm_len;
3096 int data_len; 3105 int data_len;
@@ -3105,8 +3114,7 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
3105 goto qsec_out; 3114 goto qsec_out;
3106 pSMBr = (struct smb_com_ntransact_rsp *)iov[0].iov_base; 3115 pSMBr = (struct smb_com_ntransact_rsp *)iov[0].iov_base;
3107 3116
3108 cERROR(1, ("smb %p parm %p data %p", 3117 cFYI(1, ("smb %p parm %p data %p", pSMBr, parm, psec_desc));
3109 pSMBr, parm, psec_desc)); /* BB removeme BB */
3110 3118
3111 if (le32_to_cpu(pSMBr->ParameterCount) != 4) { 3119 if (le32_to_cpu(pSMBr->ParameterCount) != 4) {
3112 rc = -EIO; /* bad smb */ 3120 rc = -EIO; /* bad smb */
@@ -3115,7 +3123,7 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
3115 3123
3116/* BB check that data area is minimum length and as big as acl_len */ 3124/* BB check that data area is minimum length and as big as acl_len */
3117 3125
3118 acl_len = le32_to_cpu(*(__le32 *)parm); 3126 acl_len = le32_to_cpu(*parm);
3119 /* BB check if (acl_len > bufsize) */ 3127 /* BB check if (acl_len > bufsize) */
3120 3128
3121 parse_sec_desc(psec_desc, acl_len); 3129 parse_sec_desc(psec_desc, acl_len);
@@ -3128,6 +3136,7 @@ qsec_out:
3128/* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */ 3136/* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */
3129 return rc; 3137 return rc;
3130} 3138}
3139#endif /* CONFIG_CIFS_EXPERIMENTAL */
3131 3140
3132/* Legacy Query Path Information call for lookup to old servers such 3141/* Legacy Query Path Information call for lookup to old servers such
3133 as Win9x/WinME */ 3142 as Win9x/WinME */
@@ -3363,6 +3372,9 @@ UnixQPathInfoRetry:
3363 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3372 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
3364 3373
3365 if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) { 3374 if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) {
3375 cERROR(1, ("Malformed FILE_UNIX_BASIC_INFO response.\n"
3376 "Unix Extensions can be disabled on mount "
3377 "by specifying the nosfu mount option."));
3366 rc = -EIO; /* bad smb */ 3378 rc = -EIO; /* bad smb */
3367 } else { 3379 } else {
3368 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 3380 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -3883,12 +3895,10 @@ getDFSRetry:
3883 pSMB->hdr.Mid = GetNextMid(ses->server); 3895 pSMB->hdr.Mid = GetNextMid(ses->server);
3884 pSMB->hdr.Tid = ses->ipc_tid; 3896 pSMB->hdr.Tid = ses->ipc_tid;
3885 pSMB->hdr.Uid = ses->Suid; 3897 pSMB->hdr.Uid = ses->Suid;
3886 if (ses->capabilities & CAP_STATUS32) { 3898 if (ses->capabilities & CAP_STATUS32)
3887 pSMB->hdr.Flags2 |= SMBFLG2_ERR_STATUS; 3899 pSMB->hdr.Flags2 |= SMBFLG2_ERR_STATUS;
3888 } 3900 if (ses->capabilities & CAP_DFS)
3889 if (ses->capabilities & CAP_DFS) {
3890 pSMB->hdr.Flags2 |= SMBFLG2_DFS; 3901 pSMB->hdr.Flags2 |= SMBFLG2_DFS;
3891 }
3892 3902
3893 if (ses->capabilities & CAP_UNICODE) { 3903 if (ses->capabilities & CAP_UNICODE) {
3894 pSMB->hdr.Flags2 |= SMBFLG2_UNICODE; 3904 pSMB->hdr.Flags2 |= SMBFLG2_UNICODE;
@@ -4060,10 +4070,6 @@ oldQFSInfoRetry:
4060 (void **) &pSMBr); 4070 (void **) &pSMBr);
4061 if (rc) 4071 if (rc)
4062 return rc; 4072 return rc;
4063 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
4064 (void **) &pSMBr);
4065 if (rc)
4066 return rc;
4067 4073
4068 params = 2; /* level */ 4074 params = 2; /* level */
4069 pSMB->TotalDataCount = 0; 4075 pSMB->TotalDataCount = 0;
@@ -4265,7 +4271,7 @@ QFSAttributeRetry:
4265 *) (((char *) &pSMBr->hdr.Protocol) + 4271 *) (((char *) &pSMBr->hdr.Protocol) +
4266 data_offset); 4272 data_offset);
4267 memcpy(&tcon->fsAttrInfo, response_data, 4273 memcpy(&tcon->fsAttrInfo, response_data,
4268 sizeof (FILE_SYSTEM_ATTRIBUTE_INFO)); 4274 sizeof(FILE_SYSTEM_ATTRIBUTE_INFO));
4269 } 4275 }
4270 } 4276 }
4271 cifs_buf_release(pSMB); 4277 cifs_buf_release(pSMB);
@@ -4334,7 +4340,7 @@ QFSDeviceRetry:
4334 (((char *) &pSMBr->hdr.Protocol) + 4340 (((char *) &pSMBr->hdr.Protocol) +
4335 data_offset); 4341 data_offset);
4336 memcpy(&tcon->fsDevInfo, response_data, 4342 memcpy(&tcon->fsDevInfo, response_data,
4337 sizeof (FILE_SYSTEM_DEVICE_INFO)); 4343 sizeof(FILE_SYSTEM_DEVICE_INFO));
4338 } 4344 }
4339 } 4345 }
4340 cifs_buf_release(pSMB); 4346 cifs_buf_release(pSMB);
@@ -4402,7 +4408,7 @@ QFSUnixRetry:
4402 *) (((char *) &pSMBr->hdr.Protocol) + 4408 *) (((char *) &pSMBr->hdr.Protocol) +
4403 data_offset); 4409 data_offset);
4404 memcpy(&tcon->fsUnixInfo, response_data, 4410 memcpy(&tcon->fsUnixInfo, response_data,
4405 sizeof (FILE_SYSTEM_UNIX_INFO)); 4411 sizeof(FILE_SYSTEM_UNIX_INFO));
4406 } 4412 }
4407 } 4413 }
4408 cifs_buf_release(pSMB); 4414 cifs_buf_release(pSMB);
@@ -4612,7 +4618,7 @@ SetEOFRetry:
4612 strncpy(pSMB->FileName, fileName, name_len); 4618 strncpy(pSMB->FileName, fileName, name_len);
4613 } 4619 }
4614 params = 6 + name_len; 4620 params = 6 + name_len;
4615 data_count = sizeof (struct file_end_of_file_info); 4621 data_count = sizeof(struct file_end_of_file_info);
4616 pSMB->MaxParameterCount = cpu_to_le16(2); 4622 pSMB->MaxParameterCount = cpu_to_le16(2);
4617 pSMB->MaxDataCount = cpu_to_le16(4100); 4623 pSMB->MaxDataCount = cpu_to_le16(4100);
4618 pSMB->MaxSetupCount = 0; 4624 pSMB->MaxSetupCount = 0;
@@ -4800,7 +4806,7 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
4800 4806
4801 data_offset = (char *) (&pSMB->hdr.Protocol) + offset; 4807 data_offset = (char *) (&pSMB->hdr.Protocol) + offset;
4802 4808
4803 count = sizeof (FILE_BASIC_INFO); 4809 count = sizeof(FILE_BASIC_INFO);
4804 pSMB->MaxParameterCount = cpu_to_le16(2); 4810 pSMB->MaxParameterCount = cpu_to_le16(2);
4805 pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find max SMB PDU from sess */ 4811 pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find max SMB PDU from sess */
4806 pSMB->SetupCount = 1; 4812 pSMB->SetupCount = 1;
@@ -4871,7 +4877,7 @@ SetTimesRetry:
4871 } 4877 }
4872 4878
4873 params = 6 + name_len; 4879 params = 6 + name_len;
4874 count = sizeof (FILE_BASIC_INFO); 4880 count = sizeof(FILE_BASIC_INFO);
4875 pSMB->MaxParameterCount = cpu_to_le16(2); 4881 pSMB->MaxParameterCount = cpu_to_le16(2);
4876 pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find exact max SMB PDU from sess structure BB */ 4882 pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find exact max SMB PDU from sess structure BB */
4877 pSMB->MaxSetupCount = 0; 4883 pSMB->MaxSetupCount = 0;
@@ -4900,7 +4906,7 @@ SetTimesRetry:
4900 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO); 4906 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO);
4901 pSMB->Reserved4 = 0; 4907 pSMB->Reserved4 = 0;
4902 pSMB->hdr.smb_buf_length += byte_count; 4908 pSMB->hdr.smb_buf_length += byte_count;
4903 memcpy(data_offset, data, sizeof (FILE_BASIC_INFO)); 4909 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
4904 pSMB->ByteCount = cpu_to_le16(byte_count); 4910 pSMB->ByteCount = cpu_to_le16(byte_count);
4905 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4911 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
4906 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 4912 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -5003,7 +5009,7 @@ setPermsRetry:
5003 } 5009 }
5004 5010
5005 params = 6 + name_len; 5011 params = 6 + name_len;
5006 count = sizeof (FILE_UNIX_BASIC_INFO); 5012 count = sizeof(FILE_UNIX_BASIC_INFO);
5007 pSMB->MaxParameterCount = cpu_to_le16(2); 5013 pSMB->MaxParameterCount = cpu_to_le16(2);
5008 pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find exact max SMB PDU from sess structure BB */ 5014 pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find exact max SMB PDU from sess structure BB */
5009 pSMB->MaxSetupCount = 0; 5015 pSMB->MaxSetupCount = 0;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 4af3588c1a96..19ee11f7f35a 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -124,7 +124,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
124 struct mid_q_entry *mid_entry; 124 struct mid_q_entry *mid_entry;
125 125
126 spin_lock(&GlobalMid_Lock); 126 spin_lock(&GlobalMid_Lock);
127 if ( kthread_should_stop() ) { 127 if (kthread_should_stop()) {
128 /* the demux thread will exit normally 128 /* the demux thread will exit normally
129 next time through the loop */ 129 next time through the loop */
130 spin_unlock(&GlobalMid_Lock); 130 spin_unlock(&GlobalMid_Lock);
@@ -151,9 +151,8 @@ cifs_reconnect(struct TCP_Server_Info *server)
151 } 151 }
152 list_for_each(tmp, &GlobalTreeConnectionList) { 152 list_for_each(tmp, &GlobalTreeConnectionList) {
153 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); 153 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
154 if ((tcon) && (tcon->ses) && (tcon->ses->server == server)) { 154 if ((tcon) && (tcon->ses) && (tcon->ses->server == server))
155 tcon->tidStatus = CifsNeedReconnect; 155 tcon->tidStatus = CifsNeedReconnect;
156 }
157 } 156 }
158 read_unlock(&GlobalSMBSeslock); 157 read_unlock(&GlobalSMBSeslock);
159 /* do not want to be sending data on a socket we are freeing */ 158 /* do not want to be sending data on a socket we are freeing */
@@ -187,7 +186,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
187 spin_unlock(&GlobalMid_Lock); 186 spin_unlock(&GlobalMid_Lock);
188 up(&server->tcpSem); 187 up(&server->tcpSem);
189 188
190 while ( (!kthread_should_stop()) && (server->tcpStatus != CifsGood)) { 189 while ((!kthread_should_stop()) && (server->tcpStatus != CifsGood)) {
191 try_to_freeze(); 190 try_to_freeze();
192 if (server->protocolType == IPV6) { 191 if (server->protocolType == IPV6) {
193 rc = ipv6_connect(&server->addr.sockAddr6, 192 rc = ipv6_connect(&server->addr.sockAddr6,
@@ -204,7 +203,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
204 } else { 203 } else {
205 atomic_inc(&tcpSesReconnectCount); 204 atomic_inc(&tcpSesReconnectCount);
206 spin_lock(&GlobalMid_Lock); 205 spin_lock(&GlobalMid_Lock);
207 if ( !kthread_should_stop() ) 206 if (!kthread_should_stop())
208 server->tcpStatus = CifsGood; 207 server->tcpStatus = CifsGood;
209 server->sequence_number = 0; 208 server->sequence_number = 0;
210 spin_unlock(&GlobalMid_Lock); 209 spin_unlock(&GlobalMid_Lock);
@@ -352,17 +351,15 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
352 351
353 current->flags |= PF_MEMALLOC; 352 current->flags |= PF_MEMALLOC;
354 server->tsk = current; /* save process info to wake at shutdown */ 353 server->tsk = current; /* save process info to wake at shutdown */
355 cFYI(1, ("Demultiplex PID: %d", current->pid)); 354 cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current)));
356 write_lock(&GlobalSMBSeslock); 355 write_lock(&GlobalSMBSeslock);
357 atomic_inc(&tcpSesAllocCount); 356 atomic_inc(&tcpSesAllocCount);
358 length = tcpSesAllocCount.counter; 357 length = tcpSesAllocCount.counter;
359 write_unlock(&GlobalSMBSeslock); 358 write_unlock(&GlobalSMBSeslock);
360 complete(&cifsd_complete); 359 complete(&cifsd_complete);
361 if (length > 1) { 360 if (length > 1)
362 mempool_resize(cifs_req_poolp, 361 mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
363 length + cifs_min_rcv, 362 GFP_KERNEL);
364 GFP_KERNEL);
365 }
366 363
367 set_freezable(); 364 set_freezable();
368 while (!kthread_should_stop()) { 365 while (!kthread_should_stop()) {
@@ -378,7 +375,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
378 } 375 }
379 } else if (isLargeBuf) { 376 } else if (isLargeBuf) {
380 /* we are reusing a dirty large buf, clear its start */ 377 /* we are reusing a dirty large buf, clear its start */
381 memset(bigbuf, 0, sizeof (struct smb_hdr)); 378 memset(bigbuf, 0, sizeof(struct smb_hdr));
382 } 379 }
383 380
384 if (smallbuf == NULL) { 381 if (smallbuf == NULL) {
@@ -391,7 +388,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
391 } 388 }
392 /* beginning of smb buffer is cleared in our buf_get */ 389 /* beginning of smb buffer is cleared in our buf_get */
393 } else /* if existing small buf clear beginning */ 390 } else /* if existing small buf clear beginning */
394 memset(smallbuf, 0, sizeof (struct smb_hdr)); 391 memset(smallbuf, 0, sizeof(struct smb_hdr));
395 392
396 isLargeBuf = FALSE; 393 isLargeBuf = FALSE;
397 isMultiRsp = FALSE; 394 isMultiRsp = FALSE;
@@ -400,11 +397,13 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
400 iov.iov_len = 4; 397 iov.iov_len = 4;
401 smb_msg.msg_control = NULL; 398 smb_msg.msg_control = NULL;
402 smb_msg.msg_controllen = 0; 399 smb_msg.msg_controllen = 0;
400 pdu_length = 4; /* enough to get RFC1001 header */
401incomplete_rcv:
403 length = 402 length =
404 kernel_recvmsg(csocket, &smb_msg, 403 kernel_recvmsg(csocket, &smb_msg,
405 &iov, 1, 4, 0 /* BB see socket.h flags */); 404 &iov, 1, pdu_length, 0 /* BB other flags? */);
406 405
407 if ( kthread_should_stop() ) { 406 if (kthread_should_stop()) {
408 break; 407 break;
409 } else if (server->tcpStatus == CifsNeedReconnect) { 408 } else if (server->tcpStatus == CifsNeedReconnect) {
410 cFYI(1, ("Reconnect after server stopped responding")); 409 cFYI(1, ("Reconnect after server stopped responding"));
@@ -416,7 +415,10 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
416 msleep(1); /* minimum sleep to prevent looping 415 msleep(1); /* minimum sleep to prevent looping
417 allowing socket to clear and app threads to set 416 allowing socket to clear and app threads to set
418 tcpStatus CifsNeedReconnect if server hung */ 417 tcpStatus CifsNeedReconnect if server hung */
419 continue; 418 if (pdu_length < 4)
419 goto incomplete_rcv;
420 else
421 continue;
420 } else if (length <= 0) { 422 } else if (length <= 0) {
421 if (server->tcpStatus == CifsNew) { 423 if (server->tcpStatus == CifsNew) {
422 cFYI(1, ("tcp session abend after SMBnegprot")); 424 cFYI(1, ("tcp session abend after SMBnegprot"));
@@ -437,13 +439,11 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
437 wake_up(&server->response_q); 439 wake_up(&server->response_q);
438 continue; 440 continue;
439 } else if (length < 4) { 441 } else if (length < 4) {
440 cFYI(1, 442 cFYI(1, ("less than four bytes received (%d bytes)",
441 ("Frame under four bytes received (%d bytes long)",
442 length)); 443 length));
443 cifs_reconnect(server); 444 pdu_length -= length;
444 csocket = server->ssocket; 445 msleep(1);
445 wake_up(&server->response_q); 446 goto incomplete_rcv;
446 continue;
447 } 447 }
448 448
449 /* The right amount was read from socket - 4 bytes */ 449 /* The right amount was read from socket - 4 bytes */
@@ -504,7 +504,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
504 504
505 /* else we have an SMB response */ 505 /* else we have an SMB response */
506 if ((pdu_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) || 506 if ((pdu_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) ||
507 (pdu_length < sizeof (struct smb_hdr) - 1 - 4)) { 507 (pdu_length < sizeof(struct smb_hdr) - 1 - 4)) {
508 cERROR(1, ("Invalid size SMB length %d pdu_length %d", 508 cERROR(1, ("Invalid size SMB length %d pdu_length %d",
509 length, pdu_length+4)); 509 length, pdu_length+4));
510 cifs_reconnect(server); 510 cifs_reconnect(server);
@@ -528,7 +528,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
528 total_read += length) { 528 total_read += length) {
529 length = kernel_recvmsg(csocket, &smb_msg, &iov, 1, 529 length = kernel_recvmsg(csocket, &smb_msg, &iov, 1,
530 pdu_length - total_read, 0); 530 pdu_length - total_read, 0);
531 if ( kthread_should_stop() || 531 if (kthread_should_stop() ||
532 (length == -EINTR)) { 532 (length == -EINTR)) {
533 /* then will exit */ 533 /* then will exit */
534 reconnect = 2; 534 reconnect = 2;
@@ -546,6 +546,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
546 allowing socket to clear and app 546 allowing socket to clear and app
547 threads to set tcpStatus 547 threads to set tcpStatus
548 CifsNeedReconnect if server hung*/ 548 CifsNeedReconnect if server hung*/
549 length = 0;
549 continue; 550 continue;
550 } else if (length <= 0) { 551 } else if (length <= 0) {
551 cERROR(1, ("Received no data, expecting %d", 552 cERROR(1, ("Received no data, expecting %d",
@@ -631,9 +632,9 @@ multi_t2_fnd:
631 /* Was previous buf put in mpx struct for multi-rsp? */ 632 /* Was previous buf put in mpx struct for multi-rsp? */
632 if (!isMultiRsp) { 633 if (!isMultiRsp) {
633 /* smb buffer will be freed by user thread */ 634 /* smb buffer will be freed by user thread */
634 if (isLargeBuf) { 635 if (isLargeBuf)
635 bigbuf = NULL; 636 bigbuf = NULL;
636 } else 637 else
637 smallbuf = NULL; 638 smallbuf = NULL;
638 } 639 }
639 wake_up_process(task_to_wake); 640 wake_up_process(task_to_wake);
@@ -676,9 +677,8 @@ multi_t2_fnd:
676 server->ssocket = NULL; 677 server->ssocket = NULL;
677 } 678 }
678 /* buffer usuallly freed in free_mid - need to free it here on exit */ 679 /* buffer usuallly freed in free_mid - need to free it here on exit */
679 if (bigbuf != NULL) 680 cifs_buf_release(bigbuf);
680 cifs_buf_release(bigbuf); 681 if (smallbuf) /* no sense logging a debug message if NULL */
681 if (smallbuf != NULL)
682 cifs_small_buf_release(smallbuf); 682 cifs_small_buf_release(smallbuf);
683 683
684 read_lock(&GlobalSMBSeslock); 684 read_lock(&GlobalSMBSeslock);
@@ -702,9 +702,8 @@ multi_t2_fnd:
702 list_for_each(tmp, &GlobalSMBSessionList) { 702 list_for_each(tmp, &GlobalSMBSessionList) {
703 ses = list_entry(tmp, struct cifsSesInfo, 703 ses = list_entry(tmp, struct cifsSesInfo,
704 cifsSessionList); 704 cifsSessionList);
705 if (ses->server == server) { 705 if (ses->server == server)
706 ses->status = CifsExiting; 706 ses->status = CifsExiting;
707 }
708 } 707 }
709 708
710 spin_lock(&GlobalMid_Lock); 709 spin_lock(&GlobalMid_Lock);
@@ -714,9 +713,8 @@ multi_t2_fnd:
714 cFYI(1, ("Clearing Mid 0x%x - waking up ", 713 cFYI(1, ("Clearing Mid 0x%x - waking up ",
715 mid_entry->mid)); 714 mid_entry->mid));
716 task_to_wake = mid_entry->tsk; 715 task_to_wake = mid_entry->tsk;
717 if (task_to_wake) { 716 if (task_to_wake)
718 wake_up_process(task_to_wake); 717 wake_up_process(task_to_wake);
719 }
720 } 718 }
721 } 719 }
722 spin_unlock(&GlobalMid_Lock); 720 spin_unlock(&GlobalMid_Lock);
@@ -749,18 +747,15 @@ multi_t2_fnd:
749 list_for_each(tmp, &GlobalSMBSessionList) { 747 list_for_each(tmp, &GlobalSMBSessionList) {
750 ses = list_entry(tmp, struct cifsSesInfo, 748 ses = list_entry(tmp, struct cifsSesInfo,
751 cifsSessionList); 749 cifsSessionList);
752 if (ses->server == server) { 750 if (ses->server == server)
753 ses->server = NULL; 751 ses->server = NULL;
754 }
755 } 752 }
756 write_unlock(&GlobalSMBSeslock); 753 write_unlock(&GlobalSMBSeslock);
757 754
758 kfree(server); 755 kfree(server);
759 if (length > 0) { 756 if (length > 0)
760 mempool_resize(cifs_req_poolp, 757 mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
761 length + cifs_min_rcv, 758 GFP_KERNEL);
762 GFP_KERNEL);
763 }
764 759
765 return 0; 760 return 0;
766} 761}
@@ -1477,7 +1472,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
1477 if (psin_server->sin_port) { /* user overrode default port */ 1472 if (psin_server->sin_port) { /* user overrode default port */
1478 rc = (*csocket)->ops->connect(*csocket, 1473 rc = (*csocket)->ops->connect(*csocket,
1479 (struct sockaddr *) psin_server, 1474 (struct sockaddr *) psin_server,
1480 sizeof (struct sockaddr_in), 0); 1475 sizeof(struct sockaddr_in), 0);
1481 if (rc >= 0) 1476 if (rc >= 0)
1482 connected = 1; 1477 connected = 1;
1483 } 1478 }
@@ -1493,7 +1488,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
1493 1488
1494 rc = (*csocket)->ops->connect(*csocket, 1489 rc = (*csocket)->ops->connect(*csocket,
1495 (struct sockaddr *) psin_server, 1490 (struct sockaddr *) psin_server,
1496 sizeof (struct sockaddr_in), 0); 1491 sizeof(struct sockaddr_in), 0);
1497 if (rc >= 0) 1492 if (rc >= 0)
1498 connected = 1; 1493 connected = 1;
1499 } 1494 }
@@ -1502,7 +1497,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
1502 psin_server->sin_port = htons(RFC1001_PORT); 1497 psin_server->sin_port = htons(RFC1001_PORT);
1503 rc = (*csocket)->ops->connect(*csocket, (struct sockaddr *) 1498 rc = (*csocket)->ops->connect(*csocket, (struct sockaddr *)
1504 psin_server, 1499 psin_server,
1505 sizeof (struct sockaddr_in), 0); 1500 sizeof(struct sockaddr_in), 0);
1506 if (rc >= 0) 1501 if (rc >= 0)
1507 connected = 1; 1502 connected = 1;
1508 } 1503 }
@@ -1610,7 +1605,7 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket)
1610 if (psin_server->sin6_port) { /* user overrode default port */ 1605 if (psin_server->sin6_port) { /* user overrode default port */
1611 rc = (*csocket)->ops->connect(*csocket, 1606 rc = (*csocket)->ops->connect(*csocket,
1612 (struct sockaddr *) psin_server, 1607 (struct sockaddr *) psin_server,
1613 sizeof (struct sockaddr_in6), 0); 1608 sizeof(struct sockaddr_in6), 0);
1614 if (rc >= 0) 1609 if (rc >= 0)
1615 connected = 1; 1610 connected = 1;
1616 } 1611 }
@@ -1626,7 +1621,7 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket)
1626 1621
1627 rc = (*csocket)->ops->connect(*csocket, 1622 rc = (*csocket)->ops->connect(*csocket,
1628 (struct sockaddr *) psin_server, 1623 (struct sockaddr *) psin_server,
1629 sizeof (struct sockaddr_in6), 0); 1624 sizeof(struct sockaddr_in6), 0);
1630 if (rc >= 0) 1625 if (rc >= 0)
1631 connected = 1; 1626 connected = 1;
1632 } 1627 }
@@ -1634,7 +1629,7 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket)
1634 if (!connected) { 1629 if (!connected) {
1635 psin_server->sin6_port = htons(RFC1001_PORT); 1630 psin_server->sin6_port = htons(RFC1001_PORT);
1636 rc = (*csocket)->ops->connect(*csocket, (struct sockaddr *) 1631 rc = (*csocket)->ops->connect(*csocket, (struct sockaddr *)
1637 psin_server, sizeof (struct sockaddr_in6), 0); 1632 psin_server, sizeof(struct sockaddr_in6), 0);
1638 if (rc >= 0) 1633 if (rc >= 0)
1639 connected = 1; 1634 connected = 1;
1640 } 1635 }
@@ -1750,7 +1745,16 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
1750 cFYI(1, ("very large write cap")); 1745 cFYI(1, ("very large write cap"));
1751#endif /* CIFS_DEBUG2 */ 1746#endif /* CIFS_DEBUG2 */
1752 if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) { 1747 if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) {
1753 cFYI(1, ("setting capabilities failed")); 1748 if (vol_info == NULL) {
1749 cFYI(1, ("resetting capabilities failed"));
1750 } else
1751 cERROR(1, ("Negotiating Unix capabilities "
1752 "with the server failed. Consider "
1753 "mounting with the Unix Extensions\n"
1754 "disabled, if problems are found, "
1755 "by specifying the nounix mount "
1756 "option."));
1757
1754 } 1758 }
1755 } 1759 }
1756} 1760}
@@ -1909,8 +1913,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1909 return rc; 1913 return rc;
1910 } 1914 }
1911 1915
1912 srvTcp = kmalloc(sizeof (struct TCP_Server_Info), GFP_KERNEL); 1916 srvTcp = kzalloc(sizeof(struct TCP_Server_Info), GFP_KERNEL);
1913 if (srvTcp == NULL) { 1917 if (!srvTcp) {
1914 rc = -ENOMEM; 1918 rc = -ENOMEM;
1915 sock_release(csocket); 1919 sock_release(csocket);
1916 kfree(volume_info.UNC); 1920 kfree(volume_info.UNC);
@@ -1919,9 +1923,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1919 FreeXid(xid); 1923 FreeXid(xid);
1920 return rc; 1924 return rc;
1921 } else { 1925 } else {
1922 memset(srvTcp, 0, sizeof (struct TCP_Server_Info));
1923 memcpy(&srvTcp->addr.sockAddr, &sin_server, 1926 memcpy(&srvTcp->addr.sockAddr, &sin_server,
1924 sizeof (struct sockaddr_in)); 1927 sizeof(struct sockaddr_in));
1925 atomic_set(&srvTcp->inFlight, 0); 1928 atomic_set(&srvTcp->inFlight, 0);
1926 /* BB Add code for ipv6 case too */ 1929 /* BB Add code for ipv6 case too */
1927 srvTcp->ssocket = csocket; 1930 srvTcp->ssocket = csocket;
@@ -2173,8 +2176,18 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2173 if (tsk) 2176 if (tsk)
2174 kthread_stop(tsk); 2177 kthread_stop(tsk);
2175 } 2178 }
2176 } else 2179 } else {
2177 cFYI(1, ("No session or bad tcon")); 2180 cFYI(1, ("No session or bad tcon"));
2181 if ((pSesInfo->server) &&
2182 (pSesInfo->server->tsk)) {
2183 struct task_struct *tsk;
2184 force_sig(SIGKILL,
2185 pSesInfo->server->tsk);
2186 tsk = pSesInfo->server->tsk;
2187 if (tsk)
2188 kthread_stop(tsk);
2189 }
2190 }
2178 sesInfoFree(pSesInfo); 2191 sesInfoFree(pSesInfo);
2179 /* pSesInfo = NULL; */ 2192 /* pSesInfo = NULL; */
2180 } 2193 }
@@ -2185,8 +2198,10 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2185 tcon->ses = pSesInfo; 2198 tcon->ses = pSesInfo;
2186 2199
2187 /* do not care if following two calls succeed - informational */ 2200 /* do not care if following two calls succeed - informational */
2188 CIFSSMBQFSDeviceInfo(xid, tcon); 2201 if (!tcon->ipc) {
2189 CIFSSMBQFSAttributeInfo(xid, tcon); 2202 CIFSSMBQFSDeviceInfo(xid, tcon);
2203 CIFSSMBQFSAttributeInfo(xid, tcon);
2204 }
2190 2205
2191 /* tell server which Unix caps we support */ 2206 /* tell server which Unix caps we support */
2192 if (tcon->ses->capabilities & CAP_UNIX) 2207 if (tcon->ses->capabilities & CAP_UNIX)
@@ -2526,8 +2541,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2526sesssetup_nomem: /* do not return an error on nomem for the info strings, 2541sesssetup_nomem: /* do not return an error on nomem for the info strings,
2527 since that could make reconnection harder, and 2542 since that could make reconnection harder, and
2528 reconnection might be needed to free memory */ 2543 reconnection might be needed to free memory */
2529 if (smb_buffer) 2544 cifs_buf_release(smb_buffer);
2530 cifs_buf_release(smb_buffer);
2531 2545
2532 return rc; 2546 return rc;
2533} 2547}
@@ -2547,7 +2561,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2547 int remaining_words = 0; 2561 int remaining_words = 0;
2548 int bytes_returned = 0; 2562 int bytes_returned = 0;
2549 int len; 2563 int len;
2550 int SecurityBlobLength = sizeof (NEGOTIATE_MESSAGE); 2564 int SecurityBlobLength = sizeof(NEGOTIATE_MESSAGE);
2551 PNEGOTIATE_MESSAGE SecurityBlob; 2565 PNEGOTIATE_MESSAGE SecurityBlob;
2552 PCHALLENGE_MESSAGE SecurityBlob2; 2566 PCHALLENGE_MESSAGE SecurityBlob2;
2553 __u32 negotiate_flags, capabilities; 2567 __u32 negotiate_flags, capabilities;
@@ -2865,15 +2879,14 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2865 rc = -EIO; 2879 rc = -EIO;
2866 } 2880 }
2867 2881
2868 if (smb_buffer) 2882 cifs_buf_release(smb_buffer);
2869 cifs_buf_release(smb_buffer);
2870 2883
2871 return rc; 2884 return rc;
2872} 2885}
2873static int 2886static int
2874CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, 2887CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2875 char *ntlm_session_key, int ntlmv2_flag, 2888 char *ntlm_session_key, int ntlmv2_flag,
2876 const struct nls_table *nls_codepage) 2889 const struct nls_table *nls_codepage)
2877{ 2890{
2878 struct smb_hdr *smb_buffer; 2891 struct smb_hdr *smb_buffer;
2879 struct smb_hdr *smb_buffer_response; 2892 struct smb_hdr *smb_buffer_response;
@@ -2886,7 +2899,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2886 int remaining_words = 0; 2899 int remaining_words = 0;
2887 int bytes_returned = 0; 2900 int bytes_returned = 0;
2888 int len; 2901 int len;
2889 int SecurityBlobLength = sizeof (AUTHENTICATE_MESSAGE); 2902 int SecurityBlobLength = sizeof(AUTHENTICATE_MESSAGE);
2890 PAUTHENTICATE_MESSAGE SecurityBlob; 2903 PAUTHENTICATE_MESSAGE SecurityBlob;
2891 __u32 negotiate_flags, capabilities; 2904 __u32 negotiate_flags, capabilities;
2892 __u16 count; 2905 __u16 count;
@@ -2901,8 +2914,8 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2901 return -ENOMEM; 2914 return -ENOMEM;
2902 } 2915 }
2903 smb_buffer_response = smb_buffer; 2916 smb_buffer_response = smb_buffer;
2904 pSMB = (SESSION_SETUP_ANDX *) smb_buffer; 2917 pSMB = (SESSION_SETUP_ANDX *)smb_buffer;
2905 pSMBr = (SESSION_SETUP_ANDX *) smb_buffer_response; 2918 pSMBr = (SESSION_SETUP_ANDX *)smb_buffer_response;
2906 2919
2907 /* send SMBsessionSetup here */ 2920 /* send SMBsessionSetup here */
2908 header_assemble(smb_buffer, SMB_COM_SESSION_SETUP_ANDX, 2921 header_assemble(smb_buffer, SMB_COM_SESSION_SETUP_ANDX,
@@ -2921,7 +2934,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2921 smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; 2934 smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
2922 2935
2923 capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS | 2936 capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS |
2924 CAP_EXTENDED_SECURITY; 2937 CAP_EXTENDED_SECURITY;
2925 if (ses->capabilities & CAP_UNICODE) { 2938 if (ses->capabilities & CAP_UNICODE) {
2926 smb_buffer->Flags2 |= SMBFLG2_UNICODE; 2939 smb_buffer->Flags2 |= SMBFLG2_UNICODE;
2927 capabilities |= CAP_UNICODE; 2940 capabilities |= CAP_UNICODE;
@@ -2936,15 +2949,14 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2936 } 2949 }
2937 pSMB->req.Capabilities = cpu_to_le32(capabilities); 2950 pSMB->req.Capabilities = cpu_to_le32(capabilities);
2938 2951
2939 bcc_ptr = (char *) &pSMB->req.SecurityBlob; 2952 bcc_ptr = (char *)&pSMB->req.SecurityBlob;
2940 SecurityBlob = (PAUTHENTICATE_MESSAGE) bcc_ptr; 2953 SecurityBlob = (PAUTHENTICATE_MESSAGE)bcc_ptr;
2941 strncpy(SecurityBlob->Signature, NTLMSSP_SIGNATURE, 8); 2954 strncpy(SecurityBlob->Signature, NTLMSSP_SIGNATURE, 8);
2942 SecurityBlob->MessageType = NtLmAuthenticate; 2955 SecurityBlob->MessageType = NtLmAuthenticate;
2943 bcc_ptr += SecurityBlobLength; 2956 bcc_ptr += SecurityBlobLength;
2944 negotiate_flags = 2957 negotiate_flags = NTLMSSP_NEGOTIATE_UNICODE | NTLMSSP_REQUEST_TARGET |
2945 NTLMSSP_NEGOTIATE_UNICODE | NTLMSSP_REQUEST_TARGET | 2958 NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_TARGET_INFO |
2946 NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_TARGET_INFO | 2959 0x80000000 | NTLMSSP_NEGOTIATE_128;
2947 0x80000000 | NTLMSSP_NEGOTIATE_128;
2948 if (sign_CIFS_PDUs) 2960 if (sign_CIFS_PDUs)
2949 negotiate_flags |= /* NTLMSSP_NEGOTIATE_ALWAYS_SIGN |*/ NTLMSSP_NEGOTIATE_SIGN; 2961 negotiate_flags |= /* NTLMSSP_NEGOTIATE_ALWAYS_SIGN |*/ NTLMSSP_NEGOTIATE_SIGN;
2950 if (ntlmv2_flag) 2962 if (ntlmv2_flag)
@@ -2979,36 +2991,32 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2979 SecurityBlob->DomainName.Length = 0; 2991 SecurityBlob->DomainName.Length = 0;
2980 SecurityBlob->DomainName.MaximumLength = 0; 2992 SecurityBlob->DomainName.MaximumLength = 0;
2981 } else { 2993 } else {
2982 __u16 len = 2994 __u16 ln = cifs_strtoUCS((__le16 *) bcc_ptr, domain, 64,
2983 cifs_strtoUCS((__le16 *) bcc_ptr, domain, 64,
2984 nls_codepage); 2995 nls_codepage);
2985 len *= 2; 2996 ln *= 2;
2986 SecurityBlob->DomainName.MaximumLength = 2997 SecurityBlob->DomainName.MaximumLength =
2987 cpu_to_le16(len); 2998 cpu_to_le16(ln);
2988 SecurityBlob->DomainName.Buffer = 2999 SecurityBlob->DomainName.Buffer =
2989 cpu_to_le32(SecurityBlobLength); 3000 cpu_to_le32(SecurityBlobLength);
2990 bcc_ptr += len; 3001 bcc_ptr += ln;
2991 SecurityBlobLength += len; 3002 SecurityBlobLength += ln;
2992 SecurityBlob->DomainName.Length = 3003 SecurityBlob->DomainName.Length = cpu_to_le16(ln);
2993 cpu_to_le16(len);
2994 } 3004 }
2995 if (user == NULL) { 3005 if (user == NULL) {
2996 SecurityBlob->UserName.Buffer = 0; 3006 SecurityBlob->UserName.Buffer = 0;
2997 SecurityBlob->UserName.Length = 0; 3007 SecurityBlob->UserName.Length = 0;
2998 SecurityBlob->UserName.MaximumLength = 0; 3008 SecurityBlob->UserName.MaximumLength = 0;
2999 } else { 3009 } else {
3000 __u16 len = 3010 __u16 ln = cifs_strtoUCS((__le16 *) bcc_ptr, user, 64,
3001 cifs_strtoUCS((__le16 *) bcc_ptr, user, 64,
3002 nls_codepage); 3011 nls_codepage);
3003 len *= 2; 3012 ln *= 2;
3004 SecurityBlob->UserName.MaximumLength = 3013 SecurityBlob->UserName.MaximumLength =
3005 cpu_to_le16(len); 3014 cpu_to_le16(ln);
3006 SecurityBlob->UserName.Buffer = 3015 SecurityBlob->UserName.Buffer =
3007 cpu_to_le32(SecurityBlobLength); 3016 cpu_to_le32(SecurityBlobLength);
3008 bcc_ptr += len; 3017 bcc_ptr += ln;
3009 SecurityBlobLength += len; 3018 SecurityBlobLength += ln;
3010 SecurityBlob->UserName.Length = 3019 SecurityBlob->UserName.Length = cpu_to_le16(ln);
3011 cpu_to_le16(len);
3012 } 3020 }
3013 3021
3014 /* SecurityBlob->WorkstationName.Length = 3022 /* SecurityBlob->WorkstationName.Length =
@@ -3052,33 +3060,32 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
3052 SecurityBlob->DomainName.Length = 0; 3060 SecurityBlob->DomainName.Length = 0;
3053 SecurityBlob->DomainName.MaximumLength = 0; 3061 SecurityBlob->DomainName.MaximumLength = 0;
3054 } else { 3062 } else {
3055 __u16 len; 3063 __u16 ln;
3056 negotiate_flags |= NTLMSSP_NEGOTIATE_DOMAIN_SUPPLIED; 3064 negotiate_flags |= NTLMSSP_NEGOTIATE_DOMAIN_SUPPLIED;
3057 strncpy(bcc_ptr, domain, 63); 3065 strncpy(bcc_ptr, domain, 63);
3058 len = strnlen(domain, 64); 3066 ln = strnlen(domain, 64);
3059 SecurityBlob->DomainName.MaximumLength = 3067 SecurityBlob->DomainName.MaximumLength =
3060 cpu_to_le16(len); 3068 cpu_to_le16(ln);
3061 SecurityBlob->DomainName.Buffer = 3069 SecurityBlob->DomainName.Buffer =
3062 cpu_to_le32(SecurityBlobLength); 3070 cpu_to_le32(SecurityBlobLength);
3063 bcc_ptr += len; 3071 bcc_ptr += ln;
3064 SecurityBlobLength += len; 3072 SecurityBlobLength += ln;
3065 SecurityBlob->DomainName.Length = cpu_to_le16(len); 3073 SecurityBlob->DomainName.Length = cpu_to_le16(ln);
3066 } 3074 }
3067 if (user == NULL) { 3075 if (user == NULL) {
3068 SecurityBlob->UserName.Buffer = 0; 3076 SecurityBlob->UserName.Buffer = 0;
3069 SecurityBlob->UserName.Length = 0; 3077 SecurityBlob->UserName.Length = 0;
3070 SecurityBlob->UserName.MaximumLength = 0; 3078 SecurityBlob->UserName.MaximumLength = 0;
3071 } else { 3079 } else {
3072 __u16 len; 3080 __u16 ln;
3073 strncpy(bcc_ptr, user, 63); 3081 strncpy(bcc_ptr, user, 63);
3074 len = strnlen(user, 64); 3082 ln = strnlen(user, 64);
3075 SecurityBlob->UserName.MaximumLength = 3083 SecurityBlob->UserName.MaximumLength = cpu_to_le16(ln);
3076 cpu_to_le16(len);
3077 SecurityBlob->UserName.Buffer = 3084 SecurityBlob->UserName.Buffer =
3078 cpu_to_le32(SecurityBlobLength); 3085 cpu_to_le32(SecurityBlobLength);
3079 bcc_ptr += len; 3086 bcc_ptr += ln;
3080 SecurityBlobLength += len; 3087 SecurityBlobLength += ln;
3081 SecurityBlob->UserName.Length = cpu_to_le16(len); 3088 SecurityBlob->UserName.Length = cpu_to_le16(ln);
3082 } 3089 }
3083 /* BB fill in our workstation name if known BB */ 3090 /* BB fill in our workstation name if known BB */
3084 3091
@@ -3100,12 +3107,11 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
3100 rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, 3107 rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response,
3101 &bytes_returned, 1); 3108 &bytes_returned, 1);
3102 if (rc) { 3109 if (rc) {
3103/* rc = map_smb_to_linux_error(smb_buffer_response); *//* done in SendReceive now */ 3110/* rc = map_smb_to_linux_error(smb_buffer_response) done in SendReceive now */
3104 } else if ((smb_buffer_response->WordCount == 3) 3111 } else if ((smb_buffer_response->WordCount == 3) ||
3105 || (smb_buffer_response->WordCount == 4)) { 3112 (smb_buffer_response->WordCount == 4)) {
3106 __u16 action = le16_to_cpu(pSMBr->resp.Action); 3113 __u16 action = le16_to_cpu(pSMBr->resp.Action);
3107 __u16 blob_len = 3114 __u16 blob_len = le16_to_cpu(pSMBr->resp.SecurityBlobLength);
3108 le16_to_cpu(pSMBr->resp.SecurityBlobLength);
3109 if (action & GUEST_LOGIN) 3115 if (action & GUEST_LOGIN)
3110 cFYI(1, (" Guest login")); /* BB Should we set anything 3116 cFYI(1, (" Guest login")); /* BB Should we set anything
3111 in SesInfo struct ? */ 3117 in SesInfo struct ? */
@@ -3145,8 +3151,8 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
3145 } else { 3151 } else {
3146 remaining_words = BCC(smb_buffer_response) / 2; 3152 remaining_words = BCC(smb_buffer_response) / 2;
3147 } 3153 }
3148 len = 3154 len = UniStrnlen((wchar_t *) bcc_ptr,
3149 UniStrnlen((wchar_t *) bcc_ptr,remaining_words - 1); 3155 remaining_words - 1);
3150/* We look for obvious messed up bcc or strings in response so we do not go off 3156/* We look for obvious messed up bcc or strings in response so we do not go off
3151 the end since (at least) WIN2K and Windows XP have a major bug in not null 3157 the end since (at least) WIN2K and Windows XP have a major bug in not null
3152 terminating last Unicode string in response */ 3158 terminating last Unicode string in response */
@@ -3230,7 +3236,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
3230 <= BCC(smb_buffer_response)) { 3236 <= BCC(smb_buffer_response)) {
3231 if (ses->serverOS) 3237 if (ses->serverOS)
3232 kfree(ses->serverOS); 3238 kfree(ses->serverOS);
3233 ses->serverOS = kzalloc(len + 1,GFP_KERNEL); 3239 ses->serverOS = kzalloc(len + 1, GFP_KERNEL);
3234 strncpy(ses->serverOS,bcc_ptr, len); 3240 strncpy(ses->serverOS,bcc_ptr, len);
3235 3241
3236 bcc_ptr += len; 3242 bcc_ptr += len;
@@ -3259,28 +3265,24 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
3259 bcc_ptr[0] = 0; 3265 bcc_ptr[0] = 0;
3260 bcc_ptr++; 3266 bcc_ptr++;
3261 } else 3267 } else
3262 cFYI(1, 3268 cFYI(1, ("field of length %d "
3263 ("field of length %d "
3264 "extends beyond end of smb ", 3269 "extends beyond end of smb ",
3265 len)); 3270 len));
3266 } 3271 }
3267 } else { 3272 } else {
3268 cERROR(1, 3273 cERROR(1, ("Security Blob extends beyond end "
3269 (" Security Blob extends beyond end "
3270 "of SMB")); 3274 "of SMB"));
3271 } 3275 }
3272 } else { 3276 } else {
3273 cERROR(1, ("No session structure passed in.")); 3277 cERROR(1, ("No session structure passed in."));
3274 } 3278 }
3275 } else { 3279 } else {
3276 cERROR(1, 3280 cERROR(1, ("Invalid Word count %d: ",
3277 (" Invalid Word count %d: ",
3278 smb_buffer_response->WordCount)); 3281 smb_buffer_response->WordCount));
3279 rc = -EIO; 3282 rc = -EIO;
3280 } 3283 }
3281 3284
3282 if (smb_buffer) 3285 cifs_buf_release(smb_buffer);
3283 cifs_buf_release(smb_buffer);
3284 3286
3285 return rc; 3287 return rc;
3286} 3288}
@@ -3389,6 +3391,18 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3389 bcc_ptr = pByteArea(smb_buffer_response); 3391 bcc_ptr = pByteArea(smb_buffer_response);
3390 length = strnlen(bcc_ptr, BCC(smb_buffer_response) - 2); 3392 length = strnlen(bcc_ptr, BCC(smb_buffer_response) - 2);
3391 /* skip service field (NB: this field is always ASCII) */ 3393 /* skip service field (NB: this field is always ASCII) */
3394 if (length == 3) {
3395 if ((bcc_ptr[0] == 'I') && (bcc_ptr[1] == 'P') &&
3396 (bcc_ptr[2] == 'C')) {
3397 cFYI(1, ("IPC connection"));
3398 tcon->ipc = 1;
3399 }
3400 } else if (length == 2) {
3401 if ((bcc_ptr[0] == 'A') && (bcc_ptr[1] == ':')) {
3402 /* the most common case */
3403 cFYI(1, ("disk share connection"));
3404 }
3405 }
3392 bcc_ptr += length + 1; 3406 bcc_ptr += length + 1;
3393 strncpy(tcon->treeName, tree, MAX_TREE_SIZE); 3407 strncpy(tcon->treeName, tree, MAX_TREE_SIZE);
3394 if (smb_buffer->Flags2 & SMBFLG2_UNICODE) { 3408 if (smb_buffer->Flags2 & SMBFLG2_UNICODE) {
@@ -3399,9 +3413,11 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3399 kfree(tcon->nativeFileSystem); 3413 kfree(tcon->nativeFileSystem);
3400 tcon->nativeFileSystem = 3414 tcon->nativeFileSystem =
3401 kzalloc(length + 2, GFP_KERNEL); 3415 kzalloc(length + 2, GFP_KERNEL);
3402 cifs_strfromUCS_le(tcon->nativeFileSystem, 3416 if (tcon->nativeFileSystem)
3403 (__le16 *) bcc_ptr, 3417 cifs_strfromUCS_le(
3404 length, nls_codepage); 3418 tcon->nativeFileSystem,
3419 (__le16 *) bcc_ptr,
3420 length, nls_codepage);
3405 bcc_ptr += 2 * length; 3421 bcc_ptr += 2 * length;
3406 bcc_ptr[0] = 0; /* null terminate the string */ 3422 bcc_ptr[0] = 0; /* null terminate the string */
3407 bcc_ptr[1] = 0; 3423 bcc_ptr[1] = 0;
@@ -3416,8 +3432,9 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3416 kfree(tcon->nativeFileSystem); 3432 kfree(tcon->nativeFileSystem);
3417 tcon->nativeFileSystem = 3433 tcon->nativeFileSystem =
3418 kzalloc(length + 1, GFP_KERNEL); 3434 kzalloc(length + 1, GFP_KERNEL);
3419 strncpy(tcon->nativeFileSystem, bcc_ptr, 3435 if (tcon->nativeFileSystem)
3420 length); 3436 strncpy(tcon->nativeFileSystem, bcc_ptr,
3437 length);
3421 } 3438 }
3422 /* else do not bother copying these information fields*/ 3439 /* else do not bother copying these information fields*/
3423 } 3440 }
@@ -3433,8 +3450,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3433 ses->ipc_tid = smb_buffer_response->Tid; 3450 ses->ipc_tid = smb_buffer_response->Tid;
3434 } 3451 }
3435 3452
3436 if (smb_buffer) 3453 cifs_buf_release(smb_buffer);
3437 cifs_buf_release(smb_buffer);
3438 return rc; 3454 return rc;
3439} 3455}
3440 3456
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 4830acc86d74..793404b10925 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * vfs operations that deal with dentries 4 * vfs operations that deal with dentries
5 * 5 *
6 * Copyright (C) International Business Machines Corp., 2002,2005 6 * Copyright (C) International Business Machines Corp., 2002,2007
7 * Author(s): Steve French (sfrench@us.ibm.com) 7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * 8 *
9 * This library is free software; you can redistribute it and/or modify 9 * This library is free software; you can redistribute it and/or modify
@@ -269,7 +269,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
269 CIFSSMBClose(xid, pTcon, fileHandle); 269 CIFSSMBClose(xid, pTcon, fileHandle);
270 } else if (newinode) { 270 } else if (newinode) {
271 pCifsFile = 271 pCifsFile =
272 kzalloc(sizeof (struct cifsFileInfo), GFP_KERNEL); 272 kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
273 273
274 if (pCifsFile == NULL) 274 if (pCifsFile == NULL)
275 goto cifs_create_out; 275 goto cifs_create_out;
@@ -397,7 +397,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
397 /* BB Do not bother to decode buf since no 397 /* BB Do not bother to decode buf since no
398 local inode yet to put timestamps in, 398 local inode yet to put timestamps in,
399 but we can reuse it safely */ 399 but we can reuse it safely */
400 int bytes_written; 400 unsigned int bytes_written;
401 struct win_dev *pdev; 401 struct win_dev *pdev;
402 pdev = (struct win_dev *)buf; 402 pdev = (struct win_dev *)buf;
403 if (S_ISCHR(mode)) { 403 if (S_ISCHR(mode)) {
@@ -450,8 +450,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
450 450
451 xid = GetXid(); 451 xid = GetXid();
452 452
453 cFYI(1, 453 cFYI(1, (" parent inode = 0x%p name is: %s and dentry = 0x%p",
454 (" parent inode = 0x%p name is: %s and dentry = 0x%p",
455 parent_dir_inode, direntry->d_name.name, direntry)); 454 parent_dir_inode, direntry->d_name.name, direntry));
456 455
457 /* check whether path exists */ 456 /* check whether path exists */
diff --git a/fs/cifs/export.c b/fs/cifs/export.c
index 893fd0aebff8..d614b91caeca 100644
--- a/fs/cifs/export.c
+++ b/fs/cifs/export.c
@@ -43,6 +43,7 @@
43#include <linux/exportfs.h> 43#include <linux/exportfs.h>
44#include "cifsglob.h" 44#include "cifsglob.h"
45#include "cifs_debug.h" 45#include "cifs_debug.h"
46#include "cifsfs.h"
46 47
47#ifdef CONFIG_CIFS_EXPERIMENTAL 48#ifdef CONFIG_CIFS_EXPERIMENTAL
48static struct dentry *cifs_get_parent(struct dentry *dentry) 49static struct dentry *cifs_get_parent(struct dentry *dentry)
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 894b1f7b299d..1e7e4c06d9e3 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -467,7 +467,7 @@ reopen_error_exit:
467int cifs_close(struct inode *inode, struct file *file) 467int cifs_close(struct inode *inode, struct file *file)
468{ 468{
469 int rc = 0; 469 int rc = 0;
470 int xid; 470 int xid, timeout;
471 struct cifs_sb_info *cifs_sb; 471 struct cifs_sb_info *cifs_sb;
472 struct cifsTconInfo *pTcon; 472 struct cifsTconInfo *pTcon;
473 struct cifsFileInfo *pSMBFile = 473 struct cifsFileInfo *pSMBFile =
@@ -485,9 +485,9 @@ int cifs_close(struct inode *inode, struct file *file)
485 /* no sense reconnecting to close a file that is 485 /* no sense reconnecting to close a file that is
486 already closed */ 486 already closed */
487 if (pTcon->tidStatus != CifsNeedReconnect) { 487 if (pTcon->tidStatus != CifsNeedReconnect) {
488 int timeout = 2; 488 timeout = 2;
489 while ((atomic_read(&pSMBFile->wrtPending) != 0) 489 while ((atomic_read(&pSMBFile->wrtPending) != 0)
490 && (timeout < 1000) ) { 490 && (timeout <= 2048)) {
491 /* Give write a better chance to get to 491 /* Give write a better chance to get to
492 server ahead of the close. We do not 492 server ahead of the close. We do not
493 want to add a wait_q here as it would 493 want to add a wait_q here as it would
@@ -522,12 +522,30 @@ int cifs_close(struct inode *inode, struct file *file)
522 list_del(&pSMBFile->flist); 522 list_del(&pSMBFile->flist);
523 list_del(&pSMBFile->tlist); 523 list_del(&pSMBFile->tlist);
524 write_unlock(&GlobalSMBSeslock); 524 write_unlock(&GlobalSMBSeslock);
525 timeout = 10;
526 /* We waited above to give the SMBWrite a chance to issue
527 on the wire (so we do not get SMBWrite returning EBADF
528 if writepages is racing with close. Note that writepages
529 does not specify a file handle, so it is possible for a file
530 to be opened twice, and the application close the "wrong"
531 file handle - in these cases we delay long enough to allow
532 the SMBWrite to get on the wire before the SMB Close.
533 We allow total wait here over 45 seconds, more than
534 oplock break time, and more than enough to allow any write
535 to complete on the server, or to time out on the client */
536 while ((atomic_read(&pSMBFile->wrtPending) != 0)
537 && (timeout <= 50000)) {
538 cERROR(1, ("writes pending, delay free of handle"));
539 msleep(timeout);
540 timeout *= 8;
541 }
525 kfree(pSMBFile->search_resume_name); 542 kfree(pSMBFile->search_resume_name);
526 kfree(file->private_data); 543 kfree(file->private_data);
527 file->private_data = NULL; 544 file->private_data = NULL;
528 } else 545 } else
529 rc = -EBADF; 546 rc = -EBADF;
530 547
548 read_lock(&GlobalSMBSeslock);
531 if (list_empty(&(CIFS_I(inode)->openFileList))) { 549 if (list_empty(&(CIFS_I(inode)->openFileList))) {
532 cFYI(1, ("closing last open instance for inode %p", inode)); 550 cFYI(1, ("closing last open instance for inode %p", inode));
533 /* if the file is not open we do not know if we can cache info 551 /* if the file is not open we do not know if we can cache info
@@ -535,6 +553,7 @@ int cifs_close(struct inode *inode, struct file *file)
535 CIFS_I(inode)->clientCanCacheRead = FALSE; 553 CIFS_I(inode)->clientCanCacheRead = FALSE;
536 CIFS_I(inode)->clientCanCacheAll = FALSE; 554 CIFS_I(inode)->clientCanCacheAll = FALSE;
537 } 555 }
556 read_unlock(&GlobalSMBSeslock);
538 if ((rc == 0) && CIFS_I(inode)->write_behind_rc) 557 if ((rc == 0) && CIFS_I(inode)->write_behind_rc)
539 rc = CIFS_I(inode)->write_behind_rc; 558 rc = CIFS_I(inode)->write_behind_rc;
540 FreeXid(xid); 559 FreeXid(xid);
@@ -767,7 +786,8 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
767 mutex_lock(&fid->lock_mutex); 786 mutex_lock(&fid->lock_mutex);
768 list_for_each_entry_safe(li, tmp, &fid->llist, llist) { 787 list_for_each_entry_safe(li, tmp, &fid->llist, llist) {
769 if (pfLock->fl_start <= li->offset && 788 if (pfLock->fl_start <= li->offset &&
770 length >= li->length) { 789 (pfLock->fl_start + length) >=
790 (li->offset + li->length)) {
771 stored_rc = CIFSSMBLock(xid, pTcon, 791 stored_rc = CIFSSMBLock(xid, pTcon,
772 netfid, 792 netfid,
773 li->length, li->offset, 793 li->length, li->offset,
@@ -1022,6 +1042,7 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
1022 } 1042 }
1023 1043
1024 read_lock(&GlobalSMBSeslock); 1044 read_lock(&GlobalSMBSeslock);
1045refind_writable:
1025 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 1046 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1026 if (open_file->closePend) 1047 if (open_file->closePend)
1027 continue; 1048 continue;
@@ -1029,24 +1050,49 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
1029 ((open_file->pfile->f_flags & O_RDWR) || 1050 ((open_file->pfile->f_flags & O_RDWR) ||
1030 (open_file->pfile->f_flags & O_WRONLY))) { 1051 (open_file->pfile->f_flags & O_WRONLY))) {
1031 atomic_inc(&open_file->wrtPending); 1052 atomic_inc(&open_file->wrtPending);
1053
1054 if (!open_file->invalidHandle) {
1055 /* found a good writable file */
1056 read_unlock(&GlobalSMBSeslock);
1057 return open_file;
1058 }
1059
1032 read_unlock(&GlobalSMBSeslock); 1060 read_unlock(&GlobalSMBSeslock);
1033 if ((open_file->invalidHandle) && 1061 /* Had to unlock since following call can block */
1034 (!open_file->closePend) /* BB fixme -since the second clause can not be true remove it BB */) { 1062 rc = cifs_reopen_file(open_file->pfile, FALSE);
1035 rc = cifs_reopen_file(open_file->pfile, FALSE); 1063 if (!rc) {
1036 /* if it fails, try another handle - might be */ 1064 if (!open_file->closePend)
1037 /* dangerous to hold up writepages with retry */ 1065 return open_file;
1038 if (rc) { 1066 else { /* start over in case this was deleted */
1039 cFYI(1, 1067 /* since the list could be modified */
1040 ("failed on reopen file in wp"));
1041 read_lock(&GlobalSMBSeslock); 1068 read_lock(&GlobalSMBSeslock);
1042 /* can not use this handle, no write 1069 atomic_dec(&open_file->wrtPending);
1043 pending on this one after all */ 1070 goto refind_writable;
1044 atomic_dec
1045 (&open_file->wrtPending);
1046 continue;
1047 } 1071 }
1048 } 1072 }
1049 return open_file; 1073
1074 /* if it fails, try another handle if possible -
1075 (we can not do this if closePending since
1076 loop could be modified - in which case we
1077 have to start at the beginning of the list
1078 again. Note that it would be bad
1079 to hold up writepages here (rather than
1080 in caller) with continuous retries */
1081 cFYI(1, ("wp failed on reopen file"));
1082 read_lock(&GlobalSMBSeslock);
1083 /* can not use this handle, no write
1084 pending on this one after all */
1085 atomic_dec(&open_file->wrtPending);
1086
1087 if (open_file->closePend) /* list could have changed */
1088 goto refind_writable;
1089 /* else we simply continue to the next entry. Thus
1090 we do not loop on reopen errors. If we
1091 can not reopen the file, for example if we
1092 reconnected to a server with another client
1093 racing to delete or lock the file we would not
1094 make progress if we restarted before the beginning
1095 of the loop here. */
1050 } 1096 }
1051 } 1097 }
1052 read_unlock(&GlobalSMBSeslock); 1098 read_unlock(&GlobalSMBSeslock);
@@ -1709,7 +1755,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
1709 struct page *page; 1755 struct page *page;
1710 struct cifs_sb_info *cifs_sb; 1756 struct cifs_sb_info *cifs_sb;
1711 struct cifsTconInfo *pTcon; 1757 struct cifsTconInfo *pTcon;
1712 int bytes_read = 0; 1758 unsigned int bytes_read = 0;
1713 unsigned int read_size, i; 1759 unsigned int read_size, i;
1714 char *smb_read_data = NULL; 1760 char *smb_read_data = NULL;
1715 struct smb_com_read_rsp *pSMBr; 1761 struct smb_com_read_rsp *pSMBr;
@@ -1803,7 +1849,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
1803 1849
1804 i += bytes_read >> PAGE_CACHE_SHIFT; 1850 i += bytes_read >> PAGE_CACHE_SHIFT;
1805 cifs_stats_bytes_read(pTcon, bytes_read); 1851 cifs_stats_bytes_read(pTcon, bytes_read);
1806 if ((int)(bytes_read & PAGE_CACHE_MASK) != bytes_read) { 1852 if ((bytes_read & PAGE_CACHE_MASK) != bytes_read) {
1807 i++; /* account for partial page */ 1853 i++; /* account for partial page */
1808 1854
1809 /* server copy of file can have smaller size 1855 /* server copy of file can have smaller size
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 279f3c5e0ce3..5e8b388be3b6 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -115,7 +115,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
115 inode->i_mode = le64_to_cpu(findData.Permissions); 115 inode->i_mode = le64_to_cpu(findData.Permissions);
116 /* since we set the inode type below we need to mask off 116 /* since we set the inode type below we need to mask off
117 to avoid strange results if bits set above */ 117 to avoid strange results if bits set above */
118 inode->i_mode &= ~S_IFMT; 118 inode->i_mode &= ~S_IFMT;
119 if (type == UNIX_FILE) { 119 if (type == UNIX_FILE) {
120 inode->i_mode |= S_IFREG; 120 inode->i_mode |= S_IFREG;
121 } else if (type == UNIX_SYMLINK) { 121 } else if (type == UNIX_SYMLINK) {
@@ -575,19 +575,33 @@ int cifs_get_inode_info(struct inode **pinode,
575 return rc; 575 return rc;
576} 576}
577 577
578static const struct inode_operations cifs_ipc_inode_ops = {
579 .lookup = cifs_lookup,
580};
581
578/* gets root inode */ 582/* gets root inode */
579void cifs_read_inode(struct inode *inode) 583void cifs_read_inode(struct inode *inode)
580{ 584{
581 int xid; 585 int xid, rc;
582 struct cifs_sb_info *cifs_sb; 586 struct cifs_sb_info *cifs_sb;
583 587
584 cifs_sb = CIFS_SB(inode->i_sb); 588 cifs_sb = CIFS_SB(inode->i_sb);
585 xid = GetXid(); 589 xid = GetXid();
586 590
587 if (cifs_sb->tcon->unix_ext) 591 if (cifs_sb->tcon->unix_ext)
588 cifs_get_inode_info_unix(&inode, "", inode->i_sb, xid); 592 rc = cifs_get_inode_info_unix(&inode, "", inode->i_sb, xid);
589 else 593 else
590 cifs_get_inode_info(&inode, "", NULL, inode->i_sb, xid); 594 rc = cifs_get_inode_info(&inode, "", NULL, inode->i_sb, xid);
595 if (rc && cifs_sb->tcon->ipc) {
596 cFYI(1, ("ipc connection - fake read inode"));
597 inode->i_mode |= S_IFDIR;
598 inode->i_nlink = 2;
599 inode->i_op = &cifs_ipc_inode_ops;
600 inode->i_fop = &simple_dir_operations;
601 inode->i_uid = cifs_sb->mnt_uid;
602 inode->i_gid = cifs_sb->mnt_gid;
603 }
604
591 /* can not call macro FreeXid here since in a void func */ 605 /* can not call macro FreeXid here since in a void func */
592 _FreeXid(xid); 606 _FreeXid(xid);
593} 607}
@@ -919,18 +933,25 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
919 goto mkdir_out; 933 goto mkdir_out;
920 } 934 }
921 935
936 mode &= ~current->fs->umask;
922 rc = CIFSPOSIXCreate(xid, pTcon, SMB_O_DIRECTORY | SMB_O_CREAT, 937 rc = CIFSPOSIXCreate(xid, pTcon, SMB_O_DIRECTORY | SMB_O_CREAT,
923 mode, NULL /* netfid */, pInfo, &oplock, 938 mode, NULL /* netfid */, pInfo, &oplock,
924 full_path, cifs_sb->local_nls, 939 full_path, cifs_sb->local_nls,
925 cifs_sb->mnt_cifs_flags & 940 cifs_sb->mnt_cifs_flags &
926 CIFS_MOUNT_MAP_SPECIAL_CHR); 941 CIFS_MOUNT_MAP_SPECIAL_CHR);
927 if (rc) { 942 if (rc == -EOPNOTSUPP) {
943 kfree(pInfo);
944 goto mkdir_retry_old;
945 } else if (rc) {
928 cFYI(1, ("posix mkdir returned 0x%x", rc)); 946 cFYI(1, ("posix mkdir returned 0x%x", rc));
929 d_drop(direntry); 947 d_drop(direntry);
930 } else { 948 } else {
931 int obj_type; 949 int obj_type;
932 if (pInfo->Type == -1) /* no return info - go query */ 950 if (pInfo->Type == cpu_to_le32(-1)) {
951 /* no return info, go query for it */
952 kfree(pInfo);
933 goto mkdir_get_info; 953 goto mkdir_get_info;
954 }
934/*BB check (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID ) to see if need 955/*BB check (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID ) to see if need
935 to set uid/gid */ 956 to set uid/gid */
936 inc_nlink(inode); 957 inc_nlink(inode);
@@ -940,8 +961,10 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
940 direntry->d_op = &cifs_dentry_ops; 961 direntry->d_op = &cifs_dentry_ops;
941 962
942 newinode = new_inode(inode->i_sb); 963 newinode = new_inode(inode->i_sb);
943 if (newinode == NULL) 964 if (newinode == NULL) {
965 kfree(pInfo);
944 goto mkdir_get_info; 966 goto mkdir_get_info;
967 }
945 /* Is an i_ino of zero legal? */ 968 /* Is an i_ino of zero legal? */
946 /* Are there sanity checks we can use to ensure that 969 /* Are there sanity checks we can use to ensure that
947 the server is really filling in that field? */ 970 the server is really filling in that field? */
@@ -972,7 +995,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
972 kfree(pInfo); 995 kfree(pInfo);
973 goto mkdir_out; 996 goto mkdir_out;
974 } 997 }
975 998mkdir_retry_old:
976 /* BB add setting the equivalent of mode via CreateX w/ACLs */ 999 /* BB add setting the equivalent of mode via CreateX w/ACLs */
977 rc = CIFSSMBMkDir(xid, pTcon, full_path, cifs_sb->local_nls, 1000 rc = CIFSSMBMkDir(xid, pTcon, full_path, cifs_sb->local_nls,
978 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 1001 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -1377,8 +1400,17 @@ static int cifs_vmtruncate(struct inode *inode, loff_t offset)
1377 } 1400 }
1378 i_size_write(inode, offset); 1401 i_size_write(inode, offset);
1379 spin_unlock(&inode->i_lock); 1402 spin_unlock(&inode->i_lock);
1403 /*
1404 * unmap_mapping_range is called twice, first simply for efficiency
1405 * so that truncate_inode_pages does fewer single-page unmaps. However
1406 * after this first call, and before truncate_inode_pages finishes,
1407 * it is possible for private pages to be COWed, which remain after
1408 * truncate_inode_pages finishes, hence the second unmap_mapping_range
1409 * call must be made for correctness.
1410 */
1380 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); 1411 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
1381 truncate_inode_pages(mapping, offset); 1412 truncate_inode_pages(mapping, offset);
1413 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
1382 goto out_truncate; 1414 goto out_truncate;
1383 1415
1384do_expand: 1416do_expand:
@@ -1469,7 +1501,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1469 atomic_dec(&open_file->wrtPending); 1501 atomic_dec(&open_file->wrtPending);
1470 cFYI(1, ("SetFSize for attrs rc = %d", rc)); 1502 cFYI(1, ("SetFSize for attrs rc = %d", rc));
1471 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 1503 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
1472 int bytes_written; 1504 unsigned int bytes_written;
1473 rc = CIFSSMBWrite(xid, pTcon, 1505 rc = CIFSSMBWrite(xid, pTcon,
1474 nfid, 0, attrs->ia_size, 1506 nfid, 0, attrs->ia_size,
1475 &bytes_written, NULL, NULL, 1507 &bytes_written, NULL, NULL,
@@ -1502,7 +1534,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1502 cifs_sb->mnt_cifs_flags & 1534 cifs_sb->mnt_cifs_flags &
1503 CIFS_MOUNT_MAP_SPECIAL_CHR); 1535 CIFS_MOUNT_MAP_SPECIAL_CHR);
1504 if (rc == 0) { 1536 if (rc == 0) {
1505 int bytes_written; 1537 unsigned int bytes_written;
1506 rc = CIFSSMBWrite(xid, pTcon, 1538 rc = CIFSSMBWrite(xid, pTcon,
1507 netfid, 0, 1539 netfid, 0,
1508 attrs->ia_size, 1540 attrs->ia_size,
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 6a85ef7b8797..11f265726db7 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -237,7 +237,7 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen)
237 char *tmp_path = NULL; 237 char *tmp_path = NULL;
238 char *tmpbuffer; 238 char *tmpbuffer;
239 unsigned char *referrals = NULL; 239 unsigned char *referrals = NULL;
240 int num_referrals = 0; 240 unsigned int num_referrals = 0;
241 int len; 241 int len;
242 __u16 fid; 242 __u16 fid;
243 243
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 0bcec0844bee..51ec681fe74a 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -169,7 +169,6 @@ cifs_buf_get(void)
169void 169void
170cifs_buf_release(void *buf_to_free) 170cifs_buf_release(void *buf_to_free)
171{ 171{
172
173 if (buf_to_free == NULL) { 172 if (buf_to_free == NULL) {
174 /* cFYI(1, ("Null buffer passed to cifs_buf_release"));*/ 173 /* cFYI(1, ("Null buffer passed to cifs_buf_release"));*/
175 return; 174 return;
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 2bfed3f45d0f..f06359cb22ee 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -114,10 +114,16 @@ static const struct smb_to_posix_error mapping_table_ERRSRV[] = {
114 {ERRusempx, -EIO}, 114 {ERRusempx, -EIO},
115 {ERRusestd, -EIO}, 115 {ERRusestd, -EIO},
116 {ERR_NOTIFY_ENUM_DIR, -ENOBUFS}, 116 {ERR_NOTIFY_ENUM_DIR, -ENOBUFS},
117 {ERRaccountexpired, -EACCES}, 117 {ERRnoSuchUser, -EACCES},
118/* {ERRaccountexpired, -EACCES},
118 {ERRbadclient, -EACCES}, 119 {ERRbadclient, -EACCES},
119 {ERRbadLogonTime, -EACCES}, 120 {ERRbadLogonTime, -EACCES},
120 {ERRpasswordExpired, -EACCES}, 121 {ERRpasswordExpired, -EACCES},*/
122 {ERRaccountexpired, -EKEYEXPIRED},
123 {ERRbadclient, -EACCES},
124 {ERRbadLogonTime, -EACCES},
125 {ERRpasswordExpired, -EKEYEXPIRED},
126
121 {ERRnosupport, -EINVAL}, 127 {ERRnosupport, -EINVAL},
122 {0, 0} 128 {0, 0}
123}; 129};
@@ -270,7 +276,7 @@ static const struct {
270 from NT_STATUS_NO_SUCH_USER to NT_STATUS_LOGON_FAILURE 276 from NT_STATUS_NO_SUCH_USER to NT_STATUS_LOGON_FAILURE
271 during the session setup } */ 277 during the session setup } */
272 { 278 {
273 ERRDOS, ERRnoaccess, NT_STATUS_NO_SUCH_USER}, { 279 ERRDOS, ERRnoaccess, NT_STATUS_NO_SUCH_USER}, { /* could map to 2238 */
274 ERRHRD, ERRgeneral, NT_STATUS_GROUP_EXISTS}, { 280 ERRHRD, ERRgeneral, NT_STATUS_GROUP_EXISTS}, {
275 ERRHRD, ERRgeneral, NT_STATUS_NO_SUCH_GROUP}, { 281 ERRHRD, ERRgeneral, NT_STATUS_NO_SUCH_GROUP}, {
276 ERRHRD, ERRgeneral, NT_STATUS_MEMBER_IN_GROUP}, { 282 ERRHRD, ERRgeneral, NT_STATUS_MEMBER_IN_GROUP}, {
@@ -285,10 +291,10 @@ static const struct {
285 ERRHRD, ERRgeneral, NT_STATUS_PASSWORD_RESTRICTION}, { 291 ERRHRD, ERRgeneral, NT_STATUS_PASSWORD_RESTRICTION}, {
286 ERRDOS, ERRnoaccess, NT_STATUS_LOGON_FAILURE}, { 292 ERRDOS, ERRnoaccess, NT_STATUS_LOGON_FAILURE}, {
287 ERRHRD, ERRgeneral, NT_STATUS_ACCOUNT_RESTRICTION}, { 293 ERRHRD, ERRgeneral, NT_STATUS_ACCOUNT_RESTRICTION}, {
288 ERRSRV, 2241, NT_STATUS_INVALID_LOGON_HOURS}, { 294 ERRSRV, ERRbadLogonTime, NT_STATUS_INVALID_LOGON_HOURS}, {
289 ERRSRV, 2240, NT_STATUS_INVALID_WORKSTATION}, { 295 ERRSRV, ERRbadclient, NT_STATUS_INVALID_WORKSTATION}, {
290 ERRSRV, ERRpasswordExpired, NT_STATUS_PASSWORD_EXPIRED}, { 296 ERRSRV, ERRpasswordExpired, NT_STATUS_PASSWORD_EXPIRED}, {
291 ERRSRV, 2239, NT_STATUS_ACCOUNT_DISABLED}, { 297 ERRSRV, ERRaccountexpired, NT_STATUS_ACCOUNT_DISABLED}, {
292 ERRHRD, ERRgeneral, NT_STATUS_NONE_MAPPED}, { 298 ERRHRD, ERRgeneral, NT_STATUS_NONE_MAPPED}, {
293 ERRHRD, ERRgeneral, NT_STATUS_TOO_MANY_LUIDS_REQUESTED}, { 299 ERRHRD, ERRgeneral, NT_STATUS_TOO_MANY_LUIDS_REQUESTED}, {
294 ERRHRD, ERRgeneral, NT_STATUS_LUIDS_EXHAUSTED}, { 300 ERRHRD, ERRgeneral, NT_STATUS_LUIDS_EXHAUSTED}, {
@@ -585,7 +591,7 @@ static const struct {
585 ERRDOS, ERRnoaccess, NT_STATUS_TRUST_FAILURE}, { 591 ERRDOS, ERRnoaccess, NT_STATUS_TRUST_FAILURE}, {
586 ERRHRD, ERRgeneral, NT_STATUS_MUTANT_LIMIT_EXCEEDED}, { 592 ERRHRD, ERRgeneral, NT_STATUS_MUTANT_LIMIT_EXCEEDED}, {
587 ERRDOS, ERRnetlogonNotStarted, NT_STATUS_NETLOGON_NOT_STARTED}, { 593 ERRDOS, ERRnetlogonNotStarted, NT_STATUS_NETLOGON_NOT_STARTED}, {
588 ERRSRV, 2239, NT_STATUS_ACCOUNT_EXPIRED}, { 594 ERRSRV, ERRaccountexpired, NT_STATUS_ACCOUNT_EXPIRED}, {
589 ERRHRD, ERRgeneral, NT_STATUS_POSSIBLE_DEADLOCK}, { 595 ERRHRD, ERRgeneral, NT_STATUS_POSSIBLE_DEADLOCK}, {
590 ERRHRD, ERRgeneral, NT_STATUS_NETWORK_CREDENTIAL_CONFLICT}, { 596 ERRHRD, ERRgeneral, NT_STATUS_NETWORK_CREDENTIAL_CONFLICT}, {
591 ERRHRD, ERRgeneral, NT_STATUS_REMOTE_SESSION_LIMIT}, { 597 ERRHRD, ERRgeneral, NT_STATUS_REMOTE_SESSION_LIMIT}, {
@@ -754,7 +760,7 @@ ntstatus_to_dos(__u32 ntstatus, __u8 * eclass, __u16 * ecode)
754} 760}
755 761
756int 762int
757map_smb_to_linux_error(struct smb_hdr *smb) 763map_smb_to_linux_error(struct smb_hdr *smb, int logErr)
758{ 764{
759 unsigned int i; 765 unsigned int i;
760 int rc = -EIO; /* if transport error smb error may not be set */ 766 int rc = -EIO; /* if transport error smb error may not be set */
@@ -771,7 +777,9 @@ map_smb_to_linux_error(struct smb_hdr *smb)
771 /* translate the newer STATUS codes to old style SMB errors 777 /* translate the newer STATUS codes to old style SMB errors
772 * and then to POSIX errors */ 778 * and then to POSIX errors */
773 __u32 err = le32_to_cpu(smb->Status.CifsError); 779 __u32 err = le32_to_cpu(smb->Status.CifsError);
774 if (cifsFYI & CIFS_RC) 780 if (logErr && (err != (NT_STATUS_MORE_PROCESSING_REQUIRED)))
781 cifs_print_status(err);
782 else if (cifsFYI & CIFS_RC)
775 cifs_print_status(err); 783 cifs_print_status(err);
776 ntstatus_to_dos(err, &smberrclass, &smberrcode); 784 ntstatus_to_dos(err, &smberrclass, &smberrcode);
777 } else { 785 } else {
@@ -813,7 +821,7 @@ map_smb_to_linux_error(struct smb_hdr *smb)
813 } 821 }
814 /* else ERRHRD class errors or junk - return EIO */ 822 /* else ERRHRD class errors or junk - return EIO */
815 823
816 cFYI(1, (" !!Mapping smb error code %d to POSIX err %d !!", 824 cFYI(1, ("Mapping smb error code %d to POSIX err %d",
817 smberrcode, rc)); 825 smberrcode, rc));
818 826
819 /* generic corrective action e.g. reconnect SMB session on 827 /* generic corrective action e.g. reconnect SMB session on
@@ -899,8 +907,11 @@ struct timespec cnvrtDosUnixTm(__u16 date, __u16 time)
899 cERROR(1, ("illegal hours %d", st->Hours)); 907 cERROR(1, ("illegal hours %d", st->Hours));
900 days = sd->Day; 908 days = sd->Day;
901 month = sd->Month; 909 month = sd->Month;
902 if ((days > 31) || (month > 12)) 910 if ((days > 31) || (month > 12)) {
903 cERROR(1, ("illegal date, month %d day: %d", month, days)); 911 cERROR(1, ("illegal date, month %d day: %d", month, days));
912 if (month > 12)
913 month = 12;
914 }
904 month -= 1; 915 month -= 1;
905 days += total_days_of_prev_months[month]; 916 days += total_days_of_prev_months[month];
906 days += 3652; /* account for difference in days between 1980 and 1970 */ 917 days += 3652; /* account for difference in days between 1980 and 1970 */
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 916df9431336..3746580e9701 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -121,7 +121,7 @@ static void AdjustForTZ(struct cifsTconInfo *tcon, struct inode *inode)
121 121
122 122
123static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, 123static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
124 char *buf, int *pobject_type, int isNewInode) 124 char *buf, unsigned int *pobject_type, int isNewInode)
125{ 125{
126 loff_t local_size; 126 loff_t local_size;
127 struct timespec local_mtime; 127 struct timespec local_mtime;
@@ -294,7 +294,7 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
294} 294}
295 295
296static void unix_fill_in_inode(struct inode *tmp_inode, 296static void unix_fill_in_inode(struct inode *tmp_inode,
297 FILE_UNIX_INFO *pfindData, int *pobject_type, int isNewInode) 297 FILE_UNIX_INFO *pfindData, unsigned int *pobject_type, int isNewInode)
298{ 298{
299 loff_t local_size; 299 loff_t local_size;
300 struct timespec local_mtime; 300 struct timespec local_mtime;
@@ -826,7 +826,7 @@ static int cifs_filldir(char *pfindEntry, struct file *file,
826 int rc = 0; 826 int rc = 0;
827 struct qstr qstring; 827 struct qstr qstring;
828 struct cifsFileInfo *pCifsF; 828 struct cifsFileInfo *pCifsF;
829 unsigned obj_type; 829 unsigned int obj_type;
830 ino_t inum; 830 ino_t inum;
831 struct cifs_sb_info *cifs_sb; 831 struct cifs_sb_info *cifs_sb;
832 struct inode *tmp_inode; 832 struct inode *tmp_inode;
@@ -1067,7 +1067,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
1067 for (i = 0; (i < num_to_fill) && (rc == 0); i++) { 1067 for (i = 0; (i < num_to_fill) && (rc == 0); i++) {
1068 if (current_entry == NULL) { 1068 if (current_entry == NULL) {
1069 /* evaluate whether this case is an error */ 1069 /* evaluate whether this case is an error */
1070 cERROR(1,("past end of SMB num to fill %d i %d", 1070 cERROR(1, ("past SMB end, num to fill %d i %d",
1071 num_to_fill, i)); 1071 num_to_fill, i));
1072 break; 1072 break;
1073 } 1073 }
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 892be9b4d1f3..899dc6078d9a 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -67,14 +67,59 @@ static __u32 cifs_ssetup_hdr(struct cifsSesInfo *ses, SESSION_SETUP_ANDX *pSMB)
67 pSMB->req.hdr.Flags2 |= SMBFLG2_DFS; 67 pSMB->req.hdr.Flags2 |= SMBFLG2_DFS;
68 capabilities |= CAP_DFS; 68 capabilities |= CAP_DFS;
69 } 69 }
70 if (ses->capabilities & CAP_UNIX) { 70 if (ses->capabilities & CAP_UNIX)
71 capabilities |= CAP_UNIX; 71 capabilities |= CAP_UNIX;
72 }
73 72
74 /* BB check whether to init vcnum BB */ 73 /* BB check whether to init vcnum BB */
75 return capabilities; 74 return capabilities;
76} 75}
77 76
77static void
78unicode_oslm_strings(char **pbcc_area, const struct nls_table *nls_cp)
79{
80 char *bcc_ptr = *pbcc_area;
81 int bytes_ret = 0;
82
83 /* Copy OS version */
84 bytes_ret = cifs_strtoUCS((__le16 *)bcc_ptr, "Linux version ", 32,
85 nls_cp);
86 bcc_ptr += 2 * bytes_ret;
87 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, init_utsname()->release,
88 32, nls_cp);
89 bcc_ptr += 2 * bytes_ret;
90 bcc_ptr += 2; /* trailing null */
91
92 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS,
93 32, nls_cp);
94 bcc_ptr += 2 * bytes_ret;
95 bcc_ptr += 2; /* trailing null */
96
97 *pbcc_area = bcc_ptr;
98}
99
100static void unicode_domain_string(char **pbcc_area, struct cifsSesInfo *ses,
101 const struct nls_table *nls_cp)
102{
103 char *bcc_ptr = *pbcc_area;
104 int bytes_ret = 0;
105
106 /* copy domain */
107 if (ses->domainName == NULL) {
108 /* Sending null domain better than using a bogus domain name (as
109 we did briefly in 2.6.18) since server will use its default */
110 *bcc_ptr = 0;
111 *(bcc_ptr+1) = 0;
112 bytes_ret = 0;
113 } else
114 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->domainName,
115 256, nls_cp);
116 bcc_ptr += 2 * bytes_ret;
117 bcc_ptr += 2; /* account for null terminator */
118
119 *pbcc_area = bcc_ptr;
120}
121
122
78static void unicode_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses, 123static void unicode_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
79 const struct nls_table *nls_cp) 124 const struct nls_table *nls_cp)
80{ 125{
@@ -100,32 +145,9 @@ static void unicode_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
100 } 145 }
101 bcc_ptr += 2 * bytes_ret; 146 bcc_ptr += 2 * bytes_ret;
102 bcc_ptr += 2; /* account for null termination */ 147 bcc_ptr += 2; /* account for null termination */
103 /* copy domain */
104 if (ses->domainName == NULL) {
105 /* Sending null domain better than using a bogus domain name (as
106 we did briefly in 2.6.18) since server will use its default */
107 *bcc_ptr = 0;
108 *(bcc_ptr+1) = 0;
109 bytes_ret = 0;
110 } else
111 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->domainName,
112 256, nls_cp);
113 bcc_ptr += 2 * bytes_ret;
114 bcc_ptr += 2; /* account for null terminator */
115
116 /* Copy OS version */
117 bytes_ret = cifs_strtoUCS((__le16 *)bcc_ptr, "Linux version ", 32,
118 nls_cp);
119 bcc_ptr += 2 * bytes_ret;
120 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, init_utsname()->release,
121 32, nls_cp);
122 bcc_ptr += 2 * bytes_ret;
123 bcc_ptr += 2; /* trailing null */
124 148
125 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS, 149 unicode_domain_string(&bcc_ptr, ses, nls_cp);
126 32, nls_cp); 150 unicode_oslm_strings(&bcc_ptr, nls_cp);
127 bcc_ptr += 2 * bytes_ret;
128 bcc_ptr += 2; /* trailing null */
129 151
130 *pbcc_area = bcc_ptr; 152 *pbcc_area = bcc_ptr;
131} 153}
@@ -203,14 +225,11 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft,
203 if (len >= words_left) 225 if (len >= words_left)
204 return rc; 226 return rc;
205 227
206 if (ses->serverOS) 228 kfree(ses->serverOS);
207 kfree(ses->serverOS);
208 /* UTF-8 string will not grow more than four times as big as UCS-16 */ 229 /* UTF-8 string will not grow more than four times as big as UCS-16 */
209 ses->serverOS = kzalloc(4 * len, GFP_KERNEL); 230 ses->serverOS = kzalloc(4 * len, GFP_KERNEL);
210 if (ses->serverOS != NULL) { 231 if (ses->serverOS != NULL)
211 cifs_strfromUCS_le(ses->serverOS, (__le16 *)data, len, 232 cifs_strfromUCS_le(ses->serverOS, (__le16 *)data, len, nls_cp);
212 nls_cp);
213 }
214 data += 2 * (len + 1); 233 data += 2 * (len + 1);
215 words_left -= len + 1; 234 words_left -= len + 1;
216 235
@@ -220,8 +239,7 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft,
220 if (len >= words_left) 239 if (len >= words_left)
221 return rc; 240 return rc;
222 241
223 if (ses->serverNOS) 242 kfree(ses->serverNOS);
224 kfree(ses->serverNOS);
225 ses->serverNOS = kzalloc(4 * len, GFP_KERNEL); /* BB this is wrong length FIXME BB */ 243 ses->serverNOS = kzalloc(4 * len, GFP_KERNEL); /* BB this is wrong length FIXME BB */
226 if (ses->serverNOS != NULL) { 244 if (ses->serverNOS != NULL) {
227 cifs_strfromUCS_le(ses->serverNOS, (__le16 *)data, len, 245 cifs_strfromUCS_le(ses->serverNOS, (__le16 *)data, len,
@@ -240,8 +258,7 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft,
240 if (len > words_left) 258 if (len > words_left)
241 return rc; 259 return rc;
242 260
243 if (ses->serverDomain) 261 kfree(ses->serverDomain);
244 kfree(ses->serverDomain);
245 ses->serverDomain = kzalloc(2 * (len + 1), GFP_KERNEL); /* BB FIXME wrong length */ 262 ses->serverDomain = kzalloc(2 * (len + 1), GFP_KERNEL); /* BB FIXME wrong length */
246 if (ses->serverDomain != NULL) { 263 if (ses->serverDomain != NULL) {
247 cifs_strfromUCS_le(ses->serverDomain, (__le16 *)data, len, 264 cifs_strfromUCS_le(ses->serverDomain, (__le16 *)data, len,
@@ -271,8 +288,7 @@ static int decode_ascii_ssetup(char **pbcc_area, int bleft,
271 if (len >= bleft) 288 if (len >= bleft)
272 return rc; 289 return rc;
273 290
274 if (ses->serverOS) 291 kfree(ses->serverOS);
275 kfree(ses->serverOS);
276 292
277 ses->serverOS = kzalloc(len + 1, GFP_KERNEL); 293 ses->serverOS = kzalloc(len + 1, GFP_KERNEL);
278 if (ses->serverOS) 294 if (ses->serverOS)
@@ -289,8 +305,7 @@ static int decode_ascii_ssetup(char **pbcc_area, int bleft,
289 if (len >= bleft) 305 if (len >= bleft)
290 return rc; 306 return rc;
291 307
292 if (ses->serverNOS) 308 kfree(ses->serverNOS);
293 kfree(ses->serverNOS);
294 309
295 ses->serverNOS = kzalloc(len + 1, GFP_KERNEL); 310 ses->serverNOS = kzalloc(len + 1, GFP_KERNEL);
296 if (ses->serverNOS) 311 if (ses->serverNOS)
@@ -479,7 +494,8 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
479 if (ses->capabilities & CAP_UNICODE) { 494 if (ses->capabilities & CAP_UNICODE) {
480 if (iov[0].iov_len % 2) { 495 if (iov[0].iov_len % 2) {
481 *bcc_ptr = 0; 496 *bcc_ptr = 0;
482 } bcc_ptr++; 497 bcc_ptr++;
498 }
483 unicode_ssetup_strings(&bcc_ptr, ses, nls_cp); 499 unicode_ssetup_strings(&bcc_ptr, ses, nls_cp);
484 } else 500 } else
485 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); 501 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
@@ -497,7 +513,8 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
497 513
498 iov[1].iov_base = str_area; 514 iov[1].iov_base = str_area;
499 iov[1].iov_len = count; 515 iov[1].iov_len = count;
500 rc = SendReceive2(xid, ses, iov, 2 /* num_iovecs */, &resp_buf_type, 0); 516 rc = SendReceive2(xid, ses, iov, 2 /* num_iovecs */, &resp_buf_type,
517 0 /* not long op */, 1 /* log NT STATUS if any */ );
501 /* SMB request buf freed in SendReceive2 */ 518 /* SMB request buf freed in SendReceive2 */
502 519
503 cFYI(1, ("ssetup rc from sendrecv2 is %d", rc)); 520 cFYI(1, ("ssetup rc from sendrecv2 is %d", rc));
diff --git a/fs/cifs/smberr.h b/fs/cifs/smberr.h
index 2ef0be288820..7f50e8577c1c 100644
--- a/fs/cifs/smberr.h
+++ b/fs/cifs/smberr.h
@@ -173,9 +173,10 @@
173#define ERRusestd 251 /* temporarily unable to use either raw 173#define ERRusestd 251 /* temporarily unable to use either raw
174 or mpx */ 174 or mpx */
175#define ERR_NOTIFY_ENUM_DIR 1024 175#define ERR_NOTIFY_ENUM_DIR 1024
176#define ERRnoSuchUser 2238 /* user account does not exist */
176#define ERRaccountexpired 2239 177#define ERRaccountexpired 2239
177#define ERRbadclient 2240 178#define ERRbadclient 2240 /* can not logon from this client */
178#define ERRbadLogonTime 2241 179#define ERRbadLogonTime 2241 /* logon hours do not allow this */
179#define ERRpasswordExpired 2242 180#define ERRpasswordExpired 2242
180#define ERRnetlogonNotStarted 2455 181#define ERRnetlogonNotStarted 2455
181#define ERRnosupport 0xFFFF 182#define ERRnosupport 0xFFFF
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 746bc9405db1..7ed32b3cb781 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -55,7 +55,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses)
55 if (temp == NULL) 55 if (temp == NULL)
56 return temp; 56 return temp;
57 else { 57 else {
58 memset(temp, 0, sizeof (struct mid_q_entry)); 58 memset(temp, 0, sizeof(struct mid_q_entry));
59 temp->mid = smb_buffer->Mid; /* always LE */ 59 temp->mid = smb_buffer->Mid; /* always LE */
60 temp->pid = current->pid; 60 temp->pid = current->pid;
61 temp->command = smb_buffer->Command; 61 temp->command = smb_buffer->Command;
@@ -158,7 +158,7 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer,
158 iov.iov_len = len; 158 iov.iov_len = len;
159 159
160 smb_msg.msg_name = sin; 160 smb_msg.msg_name = sin;
161 smb_msg.msg_namelen = sizeof (struct sockaddr); 161 smb_msg.msg_namelen = sizeof(struct sockaddr);
162 smb_msg.msg_control = NULL; 162 smb_msg.msg_control = NULL;
163 smb_msg.msg_controllen = 0; 163 smb_msg.msg_controllen = 0;
164 smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/ 164 smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/
@@ -228,7 +228,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
228 return -ENOTSOCK; /* BB eventually add reconnect code here */ 228 return -ENOTSOCK; /* BB eventually add reconnect code here */
229 229
230 smb_msg.msg_name = sin; 230 smb_msg.msg_name = sin;
231 smb_msg.msg_namelen = sizeof (struct sockaddr); 231 smb_msg.msg_namelen = sizeof(struct sockaddr);
232 smb_msg.msg_control = NULL; 232 smb_msg.msg_control = NULL;
233 smb_msg.msg_controllen = 0; 233 smb_msg.msg_controllen = 0;
234 smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/ 234 smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/
@@ -363,9 +363,8 @@ static int allocate_mid(struct cifsSesInfo *ses, struct smb_hdr *in_buf,
363 } /* else ok - we are setting up session */ 363 } /* else ok - we are setting up session */
364 } 364 }
365 *ppmidQ = AllocMidQEntry(in_buf, ses); 365 *ppmidQ = AllocMidQEntry(in_buf, ses);
366 if (*ppmidQ == NULL) { 366 if (*ppmidQ == NULL)
367 return -ENOMEM; 367 return -ENOMEM;
368 }
369 return 0; 368 return 0;
370} 369}
371 370
@@ -419,7 +418,7 @@ static int wait_for_response(struct cifsSesInfo *ses,
419int 418int
420SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, 419SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
421 struct kvec *iov, int n_vec, int *pRespBufType /* ret */, 420 struct kvec *iov, int n_vec, int *pRespBufType /* ret */,
422 const int long_op) 421 const int long_op, const int logError)
423{ 422{
424 int rc = 0; 423 int rc = 0;
425 unsigned int receive_len; 424 unsigned int receive_len;
@@ -465,7 +464,6 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
465 wake_up(&ses->server->request_q); 464 wake_up(&ses->server->request_q);
466 return rc; 465 return rc;
467 } 466 }
468
469 rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number); 467 rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number);
470 468
471 midQ->midState = MID_REQUEST_SUBMITTED; 469 midQ->midState = MID_REQUEST_SUBMITTED;
@@ -568,13 +566,11 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
568 } 566 }
569 567
570 /* BB special case reconnect tid and uid here? */ 568 /* BB special case reconnect tid and uid here? */
571 /* BB special case Errbadpassword and pwdexpired here */ 569 rc = map_smb_to_linux_error(midQ->resp_buf, logError);
572 rc = map_smb_to_linux_error(midQ->resp_buf);
573 570
574 /* convert ByteCount if necessary */ 571 /* convert ByteCount if necessary */
575 if (receive_len >= 572 if (receive_len >= sizeof(struct smb_hdr) - 4
576 sizeof (struct smb_hdr) - 573 /* do not count RFC1001 header */ +
577 4 /* do not count RFC1001 header */ +
578 (2 * midQ->resp_buf->WordCount) + 2 /* bcc */ ) 574 (2 * midQ->resp_buf->WordCount) + 2 /* bcc */ )
579 BCC(midQ->resp_buf) = 575 BCC(midQ->resp_buf) =
580 le16_to_cpu(BCC_LE(midQ->resp_buf)); 576 le16_to_cpu(BCC_LE(midQ->resp_buf));
@@ -749,12 +745,11 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
749 *pbytes_returned = out_buf->smb_buf_length; 745 *pbytes_returned = out_buf->smb_buf_length;
750 746
751 /* BB special case reconnect tid and uid here? */ 747 /* BB special case reconnect tid and uid here? */
752 rc = map_smb_to_linux_error(out_buf); 748 rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
753 749
754 /* convert ByteCount if necessary */ 750 /* convert ByteCount if necessary */
755 if (receive_len >= 751 if (receive_len >= sizeof(struct smb_hdr) - 4
756 sizeof (struct smb_hdr) - 752 /* do not count RFC1001 header */ +
757 4 /* do not count RFC1001 header */ +
758 (2 * out_buf->WordCount) + 2 /* bcc */ ) 753 (2 * out_buf->WordCount) + 2 /* bcc */ )
759 BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf)); 754 BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf));
760 } else { 755 } else {
@@ -993,12 +988,11 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
993 *pbytes_returned = out_buf->smb_buf_length; 988 *pbytes_returned = out_buf->smb_buf_length;
994 989
995 /* BB special case reconnect tid and uid here? */ 990 /* BB special case reconnect tid and uid here? */
996 rc = map_smb_to_linux_error(out_buf); 991 rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
997 992
998 /* convert ByteCount if necessary */ 993 /* convert ByteCount if necessary */
999 if (receive_len >= 994 if (receive_len >= sizeof(struct smb_hdr) - 4
1000 sizeof (struct smb_hdr) - 995 /* do not count RFC1001 header */ +
1001 4 /* do not count RFC1001 header */ +
1002 (2 * out_buf->WordCount) + 2 /* bcc */ ) 996 (2 * out_buf->WordCount) + 2 /* bcc */ )
1003 BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf)); 997 BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf));
1004 } else { 998 } else {
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index f61e433d281c..369e838bebd3 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -261,21 +261,26 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
261 cifs_sb->local_nls, 261 cifs_sb->local_nls,
262 cifs_sb->mnt_cifs_flags & 262 cifs_sb->mnt_cifs_flags &
263 CIFS_MOUNT_MAP_SPECIAL_CHR); 263 CIFS_MOUNT_MAP_SPECIAL_CHR);
264/* else if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { 264#ifdef CONFIG_CIFS_EXPERIMENTAL
265 else if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
265 __u16 fid; 266 __u16 fid;
266 int oplock = FALSE; 267 int oplock = FALSE;
267 rc = CIFSSMBOpen(xid, pTcon, full_path, 268 if (experimEnabled)
268 FILE_OPEN, GENERIC_READ, 0, &fid, 269 rc = CIFSSMBOpen(xid, pTcon, full_path,
269 &oplock, NULL, cifs_sb->local_nls, 270 FILE_OPEN, GENERIC_READ, 0, &fid,
270 cifs_sb->mnt_cifs_flags & 271 &oplock, NULL, cifs_sb->local_nls,
271 CIFS_MOUNT_MAP_SPECIAL_CHR); 272 cifs_sb->mnt_cifs_flags &
273 CIFS_MOUNT_MAP_SPECIAL_CHR);
274 /* else rc is EOPNOTSUPP from above */
275
272 if(rc == 0) { 276 if(rc == 0) {
273 rc = CIFSSMBGetCIFSACL(xid, pTcon, fid, 277 rc = CIFSSMBGetCIFSACL(xid, pTcon, fid,
274 ea_value, buf_size, 278 ea_value, buf_size,
275 ACL_TYPE_ACCESS); 279 ACL_TYPE_ACCESS);
276 CIFSSMBClose(xid, pTcon, fid); 280 CIFSSMBClose(xid, pTcon, fid);
277 } 281 }
278 } */ /* BB enable after fixing up return data */ 282 }
283#endif /* EXPERIMENTAL */
279#else 284#else
280 cFYI(1, ("query POSIX ACL not supported yet")); 285 cFYI(1, ("query POSIX ACL not supported yet"));
281#endif /* CONFIG_CIFS_POSIX */ 286#endif /* CONFIG_CIFS_POSIX */
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index cdb4c07a7870..359e531094dd 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -51,7 +51,7 @@ static void *alloc_upcall(int opcode, int size)
51 51
52 inp->ih.opcode = opcode; 52 inp->ih.opcode = opcode;
53 inp->ih.pid = current->pid; 53 inp->ih.pid = current->pid;
54 inp->ih.pgid = process_group(current); 54 inp->ih.pgid = task_pgrp_nr(current);
55#ifdef CONFIG_CODA_FS_OLD_API 55#ifdef CONFIG_CODA_FS_OLD_API
56 memset(&inp->ih.cred, 0, sizeof(struct coda_cred)); 56 memset(&inp->ih.cred, 0, sizeof(struct coda_cred));
57 inp->ih.cred.cr_fsuid = current->fsuid; 57 inp->ih.cred.cr_fsuid = current->fsuid;
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 6438941ab1f8..4f741546f4bb 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -456,7 +456,7 @@ static int check_version(struct dlm_write_request *req)
456 printk(KERN_DEBUG "dlm: process %s (%d) version mismatch " 456 printk(KERN_DEBUG "dlm: process %s (%d) version mismatch "
457 "user (%d.%d.%d) kernel (%d.%d.%d)\n", 457 "user (%d.%d.%d) kernel (%d.%d.%d)\n",
458 current->comm, 458 current->comm,
459 current->pid, 459 task_pid_nr(current),
460 req->version[0], 460 req->version[0],
461 req->version[1], 461 req->version[1],
462 req->version[2], 462 req->version[2],
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index de6189291954..34f68f3a069a 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -325,15 +325,14 @@ static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq)
325 int wake_nests = 0; 325 int wake_nests = 0;
326 unsigned long flags; 326 unsigned long flags;
327 struct task_struct *this_task = current; 327 struct task_struct *this_task = current;
328 struct list_head *lsthead = &psw->wake_task_list, *lnk; 328 struct list_head *lsthead = &psw->wake_task_list;
329 struct wake_task_node *tncur; 329 struct wake_task_node *tncur;
330 struct wake_task_node tnode; 330 struct wake_task_node tnode;
331 331
332 spin_lock_irqsave(&psw->lock, flags); 332 spin_lock_irqsave(&psw->lock, flags);
333 333
334 /* Try to see if the current task is already inside this wakeup call */ 334 /* Try to see if the current task is already inside this wakeup call */
335 list_for_each(lnk, lsthead) { 335 list_for_each_entry(tncur, lsthead, llink) {
336 tncur = list_entry(lnk, struct wake_task_node, llink);
337 336
338 if (tncur->wq == wq || 337 if (tncur->wq == wq ||
339 (tncur->task == this_task && ++wake_nests > EP_MAX_POLLWAKE_NESTS)) { 338 (tncur->task == this_task && ++wake_nests > EP_MAX_POLLWAKE_NESTS)) {
diff --git a/fs/exec.c b/fs/exec.c
index 070ddf13cb71..2c942e2d14ea 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -234,7 +234,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
234 vma->vm_start = vma->vm_end - PAGE_SIZE; 234 vma->vm_start = vma->vm_end - PAGE_SIZE;
235 235
236 vma->vm_flags = VM_STACK_FLAGS; 236 vma->vm_flags = VM_STACK_FLAGS;
237 vma->vm_page_prot = protection_map[vma->vm_flags & 0x7]; 237 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
238 err = insert_vm_struct(mm, vma); 238 err = insert_vm_struct(mm, vma);
239 if (err) { 239 if (err) {
240 up_write(&mm->mmap_sem); 240 up_write(&mm->mmap_sem);
@@ -775,8 +775,8 @@ static int de_thread(struct task_struct *tsk)
775 * Reparenting needs write_lock on tasklist_lock, 775 * Reparenting needs write_lock on tasklist_lock,
776 * so it is safe to do it under read_lock. 776 * so it is safe to do it under read_lock.
777 */ 777 */
778 if (unlikely(tsk->group_leader == child_reaper(tsk))) 778 if (unlikely(tsk->group_leader == task_child_reaper(tsk)))
779 tsk->nsproxy->pid_ns->child_reaper = tsk; 779 task_active_pid_ns(tsk)->child_reaper = tsk;
780 780
781 zap_other_threads(tsk); 781 zap_other_threads(tsk);
782 read_unlock(&tasklist_lock); 782 read_unlock(&tasklist_lock);
@@ -841,8 +841,8 @@ static int de_thread(struct task_struct *tsk)
841 */ 841 */
842 tsk->start_time = leader->start_time; 842 tsk->start_time = leader->start_time;
843 843
844 BUG_ON(leader->tgid != tsk->tgid); 844 BUG_ON(!same_thread_group(leader, tsk));
845 BUG_ON(tsk->pid == tsk->tgid); 845 BUG_ON(has_group_leader_pid(tsk));
846 /* 846 /*
847 * An exec() starts a new thread group with the 847 * An exec() starts a new thread group with the
848 * TGID of the previous thread group. Rehash the 848 * TGID of the previous thread group. Rehash the
@@ -857,7 +857,7 @@ static int de_thread(struct task_struct *tsk)
857 */ 857 */
858 detach_pid(tsk, PIDTYPE_PID); 858 detach_pid(tsk, PIDTYPE_PID);
859 tsk->pid = leader->pid; 859 tsk->pid = leader->pid;
860 attach_pid(tsk, PIDTYPE_PID, find_pid(tsk->pid)); 860 attach_pid(tsk, PIDTYPE_PID, task_pid(leader));
861 transfer_pid(leader, tsk, PIDTYPE_PGID); 861 transfer_pid(leader, tsk, PIDTYPE_PGID);
862 transfer_pid(leader, tsk, PIDTYPE_SID); 862 transfer_pid(leader, tsk, PIDTYPE_SID);
863 list_replace_rcu(&leader->tasks, &tsk->tasks); 863 list_replace_rcu(&leader->tasks, &tsk->tasks);
@@ -1433,7 +1433,7 @@ static int format_corename(char *corename, const char *pattern, long signr)
1433 case 'p': 1433 case 'p':
1434 pid_in_pattern = 1; 1434 pid_in_pattern = 1;
1435 rc = snprintf(out_ptr, out_end - out_ptr, 1435 rc = snprintf(out_ptr, out_end - out_ptr,
1436 "%d", current->tgid); 1436 "%d", task_tgid_vnr(current));
1437 if (rc > out_end - out_ptr) 1437 if (rc > out_end - out_ptr)
1438 goto out; 1438 goto out;
1439 out_ptr += rc; 1439 out_ptr += rc;
@@ -1513,7 +1513,7 @@ static int format_corename(char *corename, const char *pattern, long signr)
1513 if (!ispipe && !pid_in_pattern 1513 if (!ispipe && !pid_in_pattern
1514 && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) { 1514 && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) {
1515 rc = snprintf(out_ptr, out_end - out_ptr, 1515 rc = snprintf(out_ptr, out_end - out_ptr,
1516 ".%d", current->tgid); 1516 ".%d", task_tgid_vnr(current));
1517 if (rc > out_end - out_ptr) 1517 if (rc > out_end - out_ptr)
1518 goto out; 1518 goto out;
1519 out_ptr += rc; 1519 out_ptr += rc;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 3dec003b773e..9b162cd6c16c 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2954,7 +2954,7 @@ int ext3_write_inode(struct inode *inode, int wait)
2954 return 0; 2954 return 0;
2955 2955
2956 if (ext3_journal_current_handle()) { 2956 if (ext3_journal_current_handle()) {
2957 jbd_debug(0, "called recursively, non-PF_MEMALLOC!\n"); 2957 jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n");
2958 dump_stack(); 2958 dump_stack();
2959 return -EIO; 2959 return -EIO;
2960 } 2960 }
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index f58cbb26323e..408373819e34 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -741,12 +741,11 @@ ext3_xattr_block_set(handle_t *handle, struct inode *inode,
741 } 741 }
742 } else { 742 } else {
743 /* Allocate a buffer where we construct the new block. */ 743 /* Allocate a buffer where we construct the new block. */
744 s->base = kmalloc(sb->s_blocksize, GFP_KERNEL); 744 s->base = kzalloc(sb->s_blocksize, GFP_KERNEL);
745 /* assert(header == s->base) */ 745 /* assert(header == s->base) */
746 error = -ENOMEM; 746 error = -ENOMEM;
747 if (s->base == NULL) 747 if (s->base == NULL)
748 goto cleanup; 748 goto cleanup;
749 memset(s->base, 0, sb->s_blocksize);
750 header(s->base)->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC); 749 header(s->base)->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
751 header(s->base)->h_blocks = cpu_to_le32(1); 750 header(s->base)->h_blocks = cpu_to_le32(1);
752 header(s->base)->h_refcount = cpu_to_le32(1); 751 header(s->base)->h_refcount = cpu_to_le32(1);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index c9db73fc5e3d..8685263ccc4a 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -18,6 +18,7 @@
18#include <linux/ptrace.h> 18#include <linux/ptrace.h>
19#include <linux/signal.h> 19#include <linux/signal.h>
20#include <linux/rcupdate.h> 20#include <linux/rcupdate.h>
21#include <linux/pid_namespace.h>
21 22
22#include <asm/poll.h> 23#include <asm/poll.h>
23#include <asm/siginfo.h> 24#include <asm/siginfo.h>
@@ -292,7 +293,7 @@ int f_setown(struct file *filp, unsigned long arg, int force)
292 who = -who; 293 who = -who;
293 } 294 }
294 rcu_read_lock(); 295 rcu_read_lock();
295 pid = find_pid(who); 296 pid = find_vpid(who);
296 result = __f_setown(filp, pid, type, force); 297 result = __f_setown(filp, pid, type, force);
297 rcu_read_unlock(); 298 rcu_read_unlock();
298 return result; 299 return result;
@@ -308,7 +309,7 @@ pid_t f_getown(struct file *filp)
308{ 309{
309 pid_t pid; 310 pid_t pid;
310 read_lock(&filp->f_owner.lock); 311 read_lock(&filp->f_owner.lock);
311 pid = pid_nr(filp->f_owner.pid); 312 pid = pid_nr_ns(filp->f_owner.pid, current->nsproxy->pid_ns);
312 if (filp->f_owner.pid_type == PIDTYPE_PGID) 313 if (filp->f_owner.pid_type == PIDTYPE_PGID)
313 pid = -pid; 314 pid = -pid;
314 read_unlock(&filp->f_owner.lock); 315 read_unlock(&filp->f_owner.lock);
diff --git a/fs/file_table.c b/fs/file_table.c
index 3176fefc92e1..664e3f2309b8 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -323,12 +323,11 @@ void file_kill(struct file *file)
323 323
324int fs_may_remount_ro(struct super_block *sb) 324int fs_may_remount_ro(struct super_block *sb)
325{ 325{
326 struct list_head *p; 326 struct file *file;
327 327
328 /* Check that no files are currently opened for writing. */ 328 /* Check that no files are currently opened for writing. */
329 file_list_lock(); 329 file_list_lock();
330 list_for_each(p, &sb->s_files) { 330 list_for_each_entry(file, &sb->s_files, f_u.fu_list) {
331 struct file *file = list_entry(p, struct file, f_u.fu_list);
332 struct inode *inode = file->f_path.dentry->d_inode; 331 struct inode *inode = file->f_path.dentry->d_inode;
333 332
334 /* File with pending delete? */ 333 /* File with pending delete? */
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 686734ff973d..0fca82021d76 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -89,7 +89,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
89 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) 89 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev"))
90 printk(KERN_DEBUG 90 printk(KERN_DEBUG
91 "%s(%d): dirtied inode %lu (%s) on %s\n", 91 "%s(%d): dirtied inode %lu (%s) on %s\n",
92 current->comm, current->pid, inode->i_ino, 92 current->comm, task_pid_nr(current), inode->i_ino,
93 name, inode->i_sb->s_id); 93 name, inode->i_sb->s_id);
94 } 94 }
95 95
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 10d2c211d18b..d6ff77e8e7ec 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -25,6 +25,7 @@
25#include <linux/capability.h> 25#include <linux/capability.h>
26#include <linux/syscalls.h> 26#include <linux/syscalls.h>
27#include <linux/security.h> 27#include <linux/security.h>
28#include <linux/pid_namespace.h>
28 29
29static int set_task_ioprio(struct task_struct *task, int ioprio) 30static int set_task_ioprio(struct task_struct *task, int ioprio)
30{ 31{
@@ -93,7 +94,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
93 if (!who) 94 if (!who)
94 p = current; 95 p = current;
95 else 96 else
96 p = find_task_by_pid(who); 97 p = find_task_by_vpid(who);
97 if (p) 98 if (p)
98 ret = set_task_ioprio(p, ioprio); 99 ret = set_task_ioprio(p, ioprio);
99 break; 100 break;
@@ -101,7 +102,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
101 if (!who) 102 if (!who)
102 pgrp = task_pgrp(current); 103 pgrp = task_pgrp(current);
103 else 104 else
104 pgrp = find_pid(who); 105 pgrp = find_vpid(who);
105 do_each_pid_task(pgrp, PIDTYPE_PGID, p) { 106 do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
106 ret = set_task_ioprio(p, ioprio); 107 ret = set_task_ioprio(p, ioprio);
107 if (ret) 108 if (ret)
@@ -180,7 +181,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
180 if (!who) 181 if (!who)
181 p = current; 182 p = current;
182 else 183 else
183 p = find_task_by_pid(who); 184 p = find_task_by_vpid(who);
184 if (p) 185 if (p)
185 ret = get_task_ioprio(p); 186 ret = get_task_ioprio(p);
186 break; 187 break;
@@ -188,7 +189,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
188 if (!who) 189 if (!who)
189 pgrp = task_pgrp(current); 190 pgrp = task_pgrp(current);
190 else 191 else
191 pgrp = find_pid(who); 192 pgrp = find_vpid(who);
192 do_each_pid_task(pgrp, PIDTYPE_PGID, p) { 193 do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
193 tmpio = get_task_ioprio(p); 194 tmpio = get_task_ioprio(p);
194 if (tmpio < 0) 195 if (tmpio < 0)
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index a263d82761df..8f1f2aa5fb39 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -466,7 +466,7 @@ void journal_commit_transaction(journal_t *journal)
466 spin_unlock(&journal->j_list_lock); 466 spin_unlock(&journal->j_list_lock);
467 467
468 if (err) 468 if (err)
469 __journal_abort_hard(journal); 469 journal_abort(journal, err);
470 470
471 journal_write_revoke_records(journal, commit_transaction); 471 journal_write_revoke_records(journal, commit_transaction);
472 472
@@ -524,7 +524,7 @@ void journal_commit_transaction(journal_t *journal)
524 524
525 descriptor = journal_get_descriptor_buffer(journal); 525 descriptor = journal_get_descriptor_buffer(journal);
526 if (!descriptor) { 526 if (!descriptor) {
527 __journal_abort_hard(journal); 527 journal_abort(journal, -EIO);
528 continue; 528 continue;
529 } 529 }
530 530
@@ -557,7 +557,7 @@ void journal_commit_transaction(journal_t *journal)
557 and repeat this loop: we'll fall into the 557 and repeat this loop: we'll fall into the
558 refile-on-abort condition above. */ 558 refile-on-abort condition above. */
559 if (err) { 559 if (err) {
560 __journal_abort_hard(journal); 560 journal_abort(journal, err);
561 continue; 561 continue;
562 } 562 }
563 563
@@ -748,7 +748,7 @@ wait_for_iobuf:
748 err = -EIO; 748 err = -EIO;
749 749
750 if (err) 750 if (err)
751 __journal_abort_hard(journal); 751 journal_abort(journal, err);
752 752
753 /* End of a transaction! Finally, we can do checkpoint 753 /* End of a transaction! Finally, we can do checkpoint
754 processing: any buffers committed as a result of this 754 processing: any buffers committed as a result of this
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 5d9fec0b7ebd..5d14243499d4 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -35,6 +35,7 @@
35#include <linux/kthread.h> 35#include <linux/kthread.h>
36#include <linux/poison.h> 36#include <linux/poison.h>
37#include <linux/proc_fs.h> 37#include <linux/proc_fs.h>
38#include <linux/debugfs.h>
38 39
39#include <asm/uaccess.h> 40#include <asm/uaccess.h>
40#include <asm/page.h> 41#include <asm/page.h>
@@ -654,10 +655,9 @@ static journal_t * journal_init_common (void)
654 journal_t *journal; 655 journal_t *journal;
655 int err; 656 int err;
656 657
657 journal = kmalloc(sizeof(*journal), GFP_KERNEL); 658 journal = kzalloc(sizeof(*journal), GFP_KERNEL);
658 if (!journal) 659 if (!journal)
659 goto fail; 660 goto fail;
660 memset(journal, 0, sizeof(*journal));
661 661
662 init_waitqueue_head(&journal->j_wait_transaction_locked); 662 init_waitqueue_head(&journal->j_wait_transaction_locked);
663 init_waitqueue_head(&journal->j_wait_logspace); 663 init_waitqueue_head(&journal->j_wait_logspace);
@@ -1852,64 +1852,41 @@ void journal_put_journal_head(struct journal_head *jh)
1852} 1852}
1853 1853
1854/* 1854/*
1855 * /proc tunables 1855 * debugfs tunables
1856 */ 1856 */
1857#if defined(CONFIG_JBD_DEBUG) 1857#ifdef CONFIG_JBD_DEBUG
1858int journal_enable_debug;
1859EXPORT_SYMBOL(journal_enable_debug);
1860#endif
1861 1858
1862#if defined(CONFIG_JBD_DEBUG) && defined(CONFIG_PROC_FS) 1859u8 journal_enable_debug __read_mostly;
1860EXPORT_SYMBOL(journal_enable_debug);
1863 1861
1864static struct proc_dir_entry *proc_jbd_debug; 1862static struct dentry *jbd_debugfs_dir;
1863static struct dentry *jbd_debug;
1865 1864
1866static int read_jbd_debug(char *page, char **start, off_t off, 1865static void __init jbd_create_debugfs_entry(void)
1867 int count, int *eof, void *data)
1868{ 1866{
1869 int ret; 1867 jbd_debugfs_dir = debugfs_create_dir("jbd", NULL);
1870 1868 if (jbd_debugfs_dir)
1871 ret = sprintf(page + off, "%d\n", journal_enable_debug); 1869 jbd_debug = debugfs_create_u8("jbd-debug", S_IRUGO,
1872 *eof = 1; 1870 jbd_debugfs_dir,
1873 return ret; 1871 &journal_enable_debug);
1874} 1872}
1875 1873
1876static int write_jbd_debug(struct file *file, const char __user *buffer, 1874static void __exit jbd_remove_debugfs_entry(void)
1877 unsigned long count, void *data)
1878{ 1875{
1879 char buf[32]; 1876 debugfs_remove(jbd_debug);
1880 1877 debugfs_remove(jbd_debugfs_dir);
1881 if (count > ARRAY_SIZE(buf) - 1)
1882 count = ARRAY_SIZE(buf) - 1;
1883 if (copy_from_user(buf, buffer, count))
1884 return -EFAULT;
1885 buf[ARRAY_SIZE(buf) - 1] = '\0';
1886 journal_enable_debug = simple_strtoul(buf, NULL, 10);
1887 return count;
1888} 1878}
1889 1879
1890#define JBD_PROC_NAME "sys/fs/jbd-debug" 1880#else
1891 1881
1892static void __init create_jbd_proc_entry(void) 1882static inline void jbd_create_debugfs_entry(void)
1893{ 1883{
1894 proc_jbd_debug = create_proc_entry(JBD_PROC_NAME, 0644, NULL);
1895 if (proc_jbd_debug) {
1896 /* Why is this so hard? */
1897 proc_jbd_debug->read_proc = read_jbd_debug;
1898 proc_jbd_debug->write_proc = write_jbd_debug;
1899 }
1900} 1884}
1901 1885
1902static void __exit remove_jbd_proc_entry(void) 1886static inline void jbd_remove_debugfs_entry(void)
1903{ 1887{
1904 if (proc_jbd_debug)
1905 remove_proc_entry(JBD_PROC_NAME, NULL);
1906} 1888}
1907 1889
1908#else
1909
1910#define create_jbd_proc_entry() do {} while (0)
1911#define remove_jbd_proc_entry() do {} while (0)
1912
1913#endif 1890#endif
1914 1891
1915struct kmem_cache *jbd_handle_cache; 1892struct kmem_cache *jbd_handle_cache;
@@ -1966,7 +1943,7 @@ static int __init journal_init(void)
1966 ret = journal_init_caches(); 1943 ret = journal_init_caches();
1967 if (ret != 0) 1944 if (ret != 0)
1968 journal_destroy_caches(); 1945 journal_destroy_caches();
1969 create_jbd_proc_entry(); 1946 jbd_create_debugfs_entry();
1970 return ret; 1947 return ret;
1971} 1948}
1972 1949
@@ -1977,7 +1954,7 @@ static void __exit journal_exit(void)
1977 if (n) 1954 if (n)
1978 printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n); 1955 printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
1979#endif 1956#endif
1980 remove_jbd_proc_entry(); 1957 jbd_remove_debugfs_entry();
1981 journal_destroy_caches(); 1958 journal_destroy_caches();
1982} 1959}
1983 1960
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index 2a5f4b833e35..c5d9694b6a2f 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -250,10 +250,10 @@ int journal_recover(journal_t *journal)
250 if (!err) 250 if (!err)
251 err = do_one_pass(journal, &info, PASS_REPLAY); 251 err = do_one_pass(journal, &info, PASS_REPLAY);
252 252
253 jbd_debug(0, "JBD: recovery, exit status %d, " 253 jbd_debug(1, "JBD: recovery, exit status %d, "
254 "recovered transactions %u to %u\n", 254 "recovered transactions %u to %u\n",
255 err, info.start_transaction, info.end_transaction); 255 err, info.start_transaction, info.end_transaction);
256 jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n", 256 jbd_debug(1, "JBD: Replayed %d and revoked %d/%d blocks\n",
257 info.nr_replays, info.nr_revoke_hits, info.nr_revokes); 257 info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
258 258
259 /* Restart the log at the next transaction ID, thus invalidating 259 /* Restart the log at the next transaction ID, thus invalidating
@@ -297,7 +297,7 @@ int journal_skip_recovery(journal_t *journal)
297#ifdef CONFIG_JBD_DEBUG 297#ifdef CONFIG_JBD_DEBUG
298 int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence); 298 int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence);
299#endif 299#endif
300 jbd_debug(0, 300 jbd_debug(1,
301 "JBD: ignoring %d transaction%s from the journal.\n", 301 "JBD: ignoring %d transaction%s from the journal.\n",
302 dropped, (dropped == 1) ? "" : "s"); 302 dropped, (dropped == 1) ? "" : "s");
303 journal->j_transaction_sequence = ++info.end_transaction; 303 journal->j_transaction_sequence = ++info.end_transaction;
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 9841b1e5af03..08ff6c7028cc 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -96,13 +96,12 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
96 96
97alloc_transaction: 97alloc_transaction:
98 if (!journal->j_running_transaction) { 98 if (!journal->j_running_transaction) {
99 new_transaction = kmalloc(sizeof(*new_transaction), 99 new_transaction = kzalloc(sizeof(*new_transaction),
100 GFP_NOFS|__GFP_NOFAIL); 100 GFP_NOFS|__GFP_NOFAIL);
101 if (!new_transaction) { 101 if (!new_transaction) {
102 ret = -ENOMEM; 102 ret = -ENOMEM;
103 goto out; 103 goto out;
104 } 104 }
105 memset(new_transaction, 0, sizeof(*new_transaction));
106 } 105 }
107 106
108 jbd_debug(3, "New handle %p going live.\n", handle); 107 jbd_debug(3, "New handle %p going live.\n", handle);
diff --git a/fs/jffs2/debug.h b/fs/jffs2/debug.h
index 2a49f2c51a9f..4130adabd76e 100644
--- a/fs/jffs2/debug.h
+++ b/fs/jffs2/debug.h
@@ -80,28 +80,28 @@
80#define JFFS2_ERROR(fmt, ...) \ 80#define JFFS2_ERROR(fmt, ...) \
81 do { \ 81 do { \
82 printk(JFFS2_ERR_MSG_PREFIX \ 82 printk(JFFS2_ERR_MSG_PREFIX \
83 " (%d) %s: " fmt, current->pid, \ 83 " (%d) %s: " fmt, task_pid_nr(current), \
84 __FUNCTION__ , ##__VA_ARGS__); \ 84 __FUNCTION__ , ##__VA_ARGS__); \
85 } while(0) 85 } while(0)
86 86
87#define JFFS2_WARNING(fmt, ...) \ 87#define JFFS2_WARNING(fmt, ...) \
88 do { \ 88 do { \
89 printk(JFFS2_WARN_MSG_PREFIX \ 89 printk(JFFS2_WARN_MSG_PREFIX \
90 " (%d) %s: " fmt, current->pid, \ 90 " (%d) %s: " fmt, task_pid_nr(current), \
91 __FUNCTION__ , ##__VA_ARGS__); \ 91 __FUNCTION__ , ##__VA_ARGS__); \
92 } while(0) 92 } while(0)
93 93
94#define JFFS2_NOTICE(fmt, ...) \ 94#define JFFS2_NOTICE(fmt, ...) \
95 do { \ 95 do { \
96 printk(JFFS2_NOTICE_MSG_PREFIX \ 96 printk(JFFS2_NOTICE_MSG_PREFIX \
97 " (%d) %s: " fmt, current->pid, \ 97 " (%d) %s: " fmt, task_pid_nr(current), \
98 __FUNCTION__ , ##__VA_ARGS__); \ 98 __FUNCTION__ , ##__VA_ARGS__); \
99 } while(0) 99 } while(0)
100 100
101#define JFFS2_DEBUG(fmt, ...) \ 101#define JFFS2_DEBUG(fmt, ...) \
102 do { \ 102 do { \
103 printk(JFFS2_DBG_MSG_PREFIX \ 103 printk(JFFS2_DBG_MSG_PREFIX \
104 " (%d) %s: " fmt, current->pid, \ 104 " (%d) %s: " fmt, task_pid_nr(current), \
105 __FUNCTION__ , ##__VA_ARGS__); \ 105 __FUNCTION__ , ##__VA_ARGS__); \
106 } while(0) 106 } while(0)
107 107
diff --git a/fs/namespace.c b/fs/namespace.c
index 07daa7972591..860752998fb3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1411,7 +1411,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
1411 mnt_flags |= MNT_RELATIME; 1411 mnt_flags |= MNT_RELATIME;
1412 1412
1413 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | 1413 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
1414 MS_NOATIME | MS_NODIRATIME | MS_RELATIME); 1414 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT);
1415 1415
1416 /* ... and get the mountpoint */ 1416 /* ... and get the mountpoint */
1417 retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd); 1417 retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 46934c97f8f7..d0199189924c 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1029,13 +1029,13 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1029 if (EX_WGATHER(exp)) { 1029 if (EX_WGATHER(exp)) {
1030 if (atomic_read(&inode->i_writecount) > 1 1030 if (atomic_read(&inode->i_writecount) > 1
1031 || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) { 1031 || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
1032 dprintk("nfsd: write defer %d\n", current->pid); 1032 dprintk("nfsd: write defer %d\n", task_pid_nr(current));
1033 msleep(10); 1033 msleep(10);
1034 dprintk("nfsd: write resume %d\n", current->pid); 1034 dprintk("nfsd: write resume %d\n", task_pid_nr(current));
1035 } 1035 }
1036 1036
1037 if (inode->i_state & I_DIRTY) { 1037 if (inode->i_state & I_DIRTY) {
1038 dprintk("nfsd: write sync %d\n", current->pid); 1038 dprintk("nfsd: write sync %d\n", task_pid_nr(current));
1039 host_err=nfsd_sync(file); 1039 host_err=nfsd_sync(file);
1040 } 1040 }
1041#if 0 1041#if 0
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index f14b541fab95..9cc7c0418b70 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1372,7 +1372,7 @@ static ssize_t o2hb_region_pid_read(struct o2hb_region *reg,
1372 1372
1373 spin_lock(&o2hb_live_lock); 1373 spin_lock(&o2hb_live_lock);
1374 if (reg->hr_task) 1374 if (reg->hr_task)
1375 pid = reg->hr_task->pid; 1375 pid = task_pid_nr(reg->hr_task);
1376 spin_unlock(&o2hb_live_lock); 1376 spin_unlock(&o2hb_live_lock);
1377 1377
1378 if (!pid) 1378 if (!pid)
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 75cd877f6d42..cd046060114e 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -192,7 +192,7 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
192 * previous token if args expands to nothing. 192 * previous token if args expands to nothing.
193 */ 193 */
194#define __mlog_printk(level, fmt, args...) \ 194#define __mlog_printk(level, fmt, args...) \
195 printk(level "(%u,%lu):%s:%d " fmt, current->pid, \ 195 printk(level "(%u,%lu):%s:%d " fmt, task_pid_nr(current), \
196 __mlog_cpu_guess, __PRETTY_FUNCTION__, __LINE__ , \ 196 __mlog_cpu_guess, __PRETTY_FUNCTION__, __LINE__ , \
197 ##args) 197 ##args)
198 198
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index a2c33160bfd6..2fde7bf91434 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -259,7 +259,7 @@ static void dlm_print_reco_node_status(struct dlm_ctxt *dlm)
259 struct dlm_lock_resource *res; 259 struct dlm_lock_resource *res;
260 260
261 mlog(ML_NOTICE, "%s(%d): recovery info, state=%s, dead=%u, master=%u\n", 261 mlog(ML_NOTICE, "%s(%d): recovery info, state=%s, dead=%u, master=%u\n",
262 dlm->name, dlm->dlm_reco_thread_task->pid, 262 dlm->name, task_pid_nr(dlm->dlm_reco_thread_task),
263 dlm->reco.state & DLM_RECO_STATE_ACTIVE ? "ACTIVE" : "inactive", 263 dlm->reco.state & DLM_RECO_STATE_ACTIVE ? "ACTIVE" : "inactive",
264 dlm->reco.dead_node, dlm->reco.new_master); 264 dlm->reco.dead_node, dlm->reco.new_master);
265 265
@@ -420,7 +420,7 @@ void dlm_wait_for_recovery(struct dlm_ctxt *dlm)
420 if (dlm_in_recovery(dlm)) { 420 if (dlm_in_recovery(dlm)) {
421 mlog(0, "%s: reco thread %d in recovery: " 421 mlog(0, "%s: reco thread %d in recovery: "
422 "state=%d, master=%u, dead=%u\n", 422 "state=%d, master=%u, dead=%u\n",
423 dlm->name, dlm->dlm_reco_thread_task->pid, 423 dlm->name, task_pid_nr(dlm->dlm_reco_thread_task),
424 dlm->reco.state, dlm->reco.new_master, 424 dlm->reco.state, dlm->reco.new_master,
425 dlm->reco.dead_node); 425 dlm->reco.dead_node);
426 } 426 }
@@ -483,7 +483,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
483 return 0; 483 return 0;
484 } 484 }
485 mlog(0, "%s(%d):recovery thread found node %u in the recovery map!\n", 485 mlog(0, "%s(%d):recovery thread found node %u in the recovery map!\n",
486 dlm->name, dlm->dlm_reco_thread_task->pid, 486 dlm->name, task_pid_nr(dlm->dlm_reco_thread_task),
487 dlm->reco.dead_node); 487 dlm->reco.dead_node);
488 spin_unlock(&dlm->spinlock); 488 spin_unlock(&dlm->spinlock);
489 489
@@ -507,7 +507,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
507 mlog(0, "another node will master this recovery session.\n"); 507 mlog(0, "another node will master this recovery session.\n");
508 } 508 }
509 mlog(0, "dlm=%s (%d), new_master=%u, this node=%u, dead_node=%u\n", 509 mlog(0, "dlm=%s (%d), new_master=%u, this node=%u, dead_node=%u\n",
510 dlm->name, dlm->dlm_reco_thread_task->pid, dlm->reco.new_master, 510 dlm->name, task_pid_nr(dlm->dlm_reco_thread_task), dlm->reco.new_master,
511 dlm->node_num, dlm->reco.dead_node); 511 dlm->node_num, dlm->reco.dead_node);
512 512
513 /* it is safe to start everything back up here 513 /* it is safe to start everything back up here
@@ -520,7 +520,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
520 520
521master_here: 521master_here:
522 mlog(0, "(%d) mastering recovery of %s:%u here(this=%u)!\n", 522 mlog(0, "(%d) mastering recovery of %s:%u here(this=%u)!\n",
523 dlm->dlm_reco_thread_task->pid, 523 task_pid_nr(dlm->dlm_reco_thread_task),
524 dlm->name, dlm->reco.dead_node, dlm->node_num); 524 dlm->name, dlm->reco.dead_node, dlm->node_num);
525 525
526 status = dlm_remaster_locks(dlm, dlm->reco.dead_node); 526 status = dlm_remaster_locks(dlm, dlm->reco.dead_node);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 27b59f5f3bd1..7a34571203bc 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -77,6 +77,7 @@
77#include <linux/cpuset.h> 77#include <linux/cpuset.h>
78#include <linux/rcupdate.h> 78#include <linux/rcupdate.h>
79#include <linux/delayacct.h> 79#include <linux/delayacct.h>
80#include <linux/pid_namespace.h>
80 81
81#include <asm/pgtable.h> 82#include <asm/pgtable.h>
82#include <asm/processor.h> 83#include <asm/processor.h>
@@ -145,8 +146,7 @@ static inline const char *get_task_state(struct task_struct *tsk)
145 TASK_UNINTERRUPTIBLE | 146 TASK_UNINTERRUPTIBLE |
146 TASK_STOPPED | 147 TASK_STOPPED |
147 TASK_TRACED)) | 148 TASK_TRACED)) |
148 (tsk->exit_state & (EXIT_ZOMBIE | 149 tsk->exit_state;
149 EXIT_DEAD));
150 const char **p = &task_state_array[0]; 150 const char **p = &task_state_array[0];
151 151
152 while (state) { 152 while (state) {
@@ -161,8 +161,15 @@ static inline char *task_state(struct task_struct *p, char *buffer)
161 struct group_info *group_info; 161 struct group_info *group_info;
162 int g; 162 int g;
163 struct fdtable *fdt = NULL; 163 struct fdtable *fdt = NULL;
164 struct pid_namespace *ns;
165 pid_t ppid, tpid;
164 166
167 ns = current->nsproxy->pid_ns;
165 rcu_read_lock(); 168 rcu_read_lock();
169 ppid = pid_alive(p) ?
170 task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
171 tpid = pid_alive(p) && p->ptrace ?
172 task_ppid_nr_ns(rcu_dereference(p->parent), ns) : 0;
166 buffer += sprintf(buffer, 173 buffer += sprintf(buffer,
167 "State:\t%s\n" 174 "State:\t%s\n"
168 "Tgid:\t%d\n" 175 "Tgid:\t%d\n"
@@ -172,9 +179,9 @@ static inline char *task_state(struct task_struct *p, char *buffer)
172 "Uid:\t%d\t%d\t%d\t%d\n" 179 "Uid:\t%d\t%d\t%d\t%d\n"
173 "Gid:\t%d\t%d\t%d\t%d\n", 180 "Gid:\t%d\t%d\t%d\t%d\n",
174 get_task_state(p), 181 get_task_state(p),
175 p->tgid, p->pid, 182 task_tgid_nr_ns(p, ns),
176 pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0, 183 task_pid_nr_ns(p, ns),
177 pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0, 184 ppid, tpid,
178 p->uid, p->euid, p->suid, p->fsuid, 185 p->uid, p->euid, p->suid, p->fsuid,
179 p->gid, p->egid, p->sgid, p->fsgid); 186 p->gid, p->egid, p->sgid, p->fsgid);
180 187
@@ -394,6 +401,9 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
394 unsigned long rsslim = 0; 401 unsigned long rsslim = 0;
395 char tcomm[sizeof(task->comm)]; 402 char tcomm[sizeof(task->comm)];
396 unsigned long flags; 403 unsigned long flags;
404 struct pid_namespace *ns;
405
406 ns = current->nsproxy->pid_ns;
397 407
398 state = *get_task_state(task); 408 state = *get_task_state(task);
399 vsize = eip = esp = 0; 409 vsize = eip = esp = 0;
@@ -416,7 +426,7 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
416 struct signal_struct *sig = task->signal; 426 struct signal_struct *sig = task->signal;
417 427
418 if (sig->tty) { 428 if (sig->tty) {
419 tty_pgrp = pid_nr(sig->tty->pgrp); 429 tty_pgrp = pid_nr_ns(sig->tty->pgrp, ns);
420 tty_nr = new_encode_dev(tty_devnum(sig->tty)); 430 tty_nr = new_encode_dev(tty_devnum(sig->tty));
421 } 431 }
422 432
@@ -449,9 +459,9 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
449 gtime += cputime_add(gtime, sig->gtime); 459 gtime += cputime_add(gtime, sig->gtime);
450 } 460 }
451 461
452 sid = signal_session(sig); 462 sid = task_session_nr_ns(task, ns);
453 pgid = process_group(task); 463 pgid = task_pgrp_nr_ns(task, ns);
454 ppid = rcu_dereference(task->real_parent)->tgid; 464 ppid = task_ppid_nr_ns(task, ns);
455 465
456 unlock_task_sighand(task, &flags); 466 unlock_task_sighand(task, &flags);
457 } 467 }
@@ -483,7 +493,7 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
483 res = sprintf(buffer, "%d (%s) %c %d %d %d %d %d %u %lu \ 493 res = sprintf(buffer, "%d (%s) %c %d %d %d %d %d %u %lu \
484%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ 494%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
485%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n", 495%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n",
486 task->pid, 496 task_pid_nr_ns(task, ns),
487 tcomm, 497 tcomm,
488 state, 498 state,
489 ppid, 499 ppid,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 4fe74d156416..39a3d7c969c5 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -63,16 +63,19 @@
63#include <linux/mm.h> 63#include <linux/mm.h>
64#include <linux/rcupdate.h> 64#include <linux/rcupdate.h>
65#include <linux/kallsyms.h> 65#include <linux/kallsyms.h>
66#include <linux/resource.h>
66#include <linux/module.h> 67#include <linux/module.h>
67#include <linux/mount.h> 68#include <linux/mount.h>
68#include <linux/security.h> 69#include <linux/security.h>
69#include <linux/ptrace.h> 70#include <linux/ptrace.h>
71#include <linux/cgroup.h>
70#include <linux/cpuset.h> 72#include <linux/cpuset.h>
71#include <linux/audit.h> 73#include <linux/audit.h>
72#include <linux/poll.h> 74#include <linux/poll.h>
73#include <linux/nsproxy.h> 75#include <linux/nsproxy.h>
74#include <linux/oom.h> 76#include <linux/oom.h>
75#include <linux/elf.h> 77#include <linux/elf.h>
78#include <linux/pid_namespace.h>
76#include "internal.h" 79#include "internal.h"
77 80
78/* NOTE: 81/* NOTE:
@@ -301,6 +304,78 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
301 return sprintf(buffer, "%lu\n", points); 304 return sprintf(buffer, "%lu\n", points);
302} 305}
303 306
307struct limit_names {
308 char *name;
309 char *unit;
310};
311
312static const struct limit_names lnames[RLIM_NLIMITS] = {
313 [RLIMIT_CPU] = {"Max cpu time", "ms"},
314 [RLIMIT_FSIZE] = {"Max file size", "bytes"},
315 [RLIMIT_DATA] = {"Max data size", "bytes"},
316 [RLIMIT_STACK] = {"Max stack size", "bytes"},
317 [RLIMIT_CORE] = {"Max core file size", "bytes"},
318 [RLIMIT_RSS] = {"Max resident set", "bytes"},
319 [RLIMIT_NPROC] = {"Max processes", "processes"},
320 [RLIMIT_NOFILE] = {"Max open files", "files"},
321 [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
322 [RLIMIT_AS] = {"Max address space", "bytes"},
323 [RLIMIT_LOCKS] = {"Max file locks", "locks"},
324 [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
325 [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
326 [RLIMIT_NICE] = {"Max nice priority", NULL},
327 [RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
328};
329
330/* Display limits for a process */
331static int proc_pid_limits(struct task_struct *task, char *buffer)
332{
333 unsigned int i;
334 int count = 0;
335 unsigned long flags;
336 char *bufptr = buffer;
337
338 struct rlimit rlim[RLIM_NLIMITS];
339
340 rcu_read_lock();
341 if (!lock_task_sighand(task,&flags)) {
342 rcu_read_unlock();
343 return 0;
344 }
345 memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
346 unlock_task_sighand(task, &flags);
347 rcu_read_unlock();
348
349 /*
350 * print the file header
351 */
352 count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n",
353 "Limit", "Soft Limit", "Hard Limit", "Units");
354
355 for (i = 0; i < RLIM_NLIMITS; i++) {
356 if (rlim[i].rlim_cur == RLIM_INFINITY)
357 count += sprintf(&bufptr[count], "%-25s %-20s ",
358 lnames[i].name, "unlimited");
359 else
360 count += sprintf(&bufptr[count], "%-25s %-20lu ",
361 lnames[i].name, rlim[i].rlim_cur);
362
363 if (rlim[i].rlim_max == RLIM_INFINITY)
364 count += sprintf(&bufptr[count], "%-20s ", "unlimited");
365 else
366 count += sprintf(&bufptr[count], "%-20lu ",
367 rlim[i].rlim_max);
368
369 if (lnames[i].unit)
370 count += sprintf(&bufptr[count], "%-10s\n",
371 lnames[i].unit);
372 else
373 count += sprintf(&bufptr[count], "\n");
374 }
375
376 return count;
377}
378
304/************************************************************************/ 379/************************************************************************/
305/* Here the fs part begins */ 380/* Here the fs part begins */
306/************************************************************************/ 381/************************************************************************/
@@ -349,18 +424,21 @@ struct proc_mounts {
349static int mounts_open(struct inode *inode, struct file *file) 424static int mounts_open(struct inode *inode, struct file *file)
350{ 425{
351 struct task_struct *task = get_proc_task(inode); 426 struct task_struct *task = get_proc_task(inode);
427 struct nsproxy *nsp;
352 struct mnt_namespace *ns = NULL; 428 struct mnt_namespace *ns = NULL;
353 struct proc_mounts *p; 429 struct proc_mounts *p;
354 int ret = -EINVAL; 430 int ret = -EINVAL;
355 431
356 if (task) { 432 if (task) {
357 task_lock(task); 433 rcu_read_lock();
358 if (task->nsproxy) { 434 nsp = task_nsproxy(task);
359 ns = task->nsproxy->mnt_ns; 435 if (nsp) {
436 ns = nsp->mnt_ns;
360 if (ns) 437 if (ns)
361 get_mnt_ns(ns); 438 get_mnt_ns(ns);
362 } 439 }
363 task_unlock(task); 440 rcu_read_unlock();
441
364 put_task_struct(task); 442 put_task_struct(task);
365 } 443 }
366 444
@@ -423,16 +501,20 @@ static int mountstats_open(struct inode *inode, struct file *file)
423 501
424 if (!ret) { 502 if (!ret) {
425 struct seq_file *m = file->private_data; 503 struct seq_file *m = file->private_data;
504 struct nsproxy *nsp;
426 struct mnt_namespace *mnt_ns = NULL; 505 struct mnt_namespace *mnt_ns = NULL;
427 struct task_struct *task = get_proc_task(inode); 506 struct task_struct *task = get_proc_task(inode);
428 507
429 if (task) { 508 if (task) {
430 task_lock(task); 509 rcu_read_lock();
431 if (task->nsproxy) 510 nsp = task_nsproxy(task);
432 mnt_ns = task->nsproxy->mnt_ns; 511 if (nsp) {
433 if (mnt_ns) 512 mnt_ns = nsp->mnt_ns;
434 get_mnt_ns(mnt_ns); 513 if (mnt_ns)
435 task_unlock(task); 514 get_mnt_ns(mnt_ns);
515 }
516 rcu_read_unlock();
517
436 put_task_struct(task); 518 put_task_struct(task);
437 } 519 }
438 520
@@ -1437,7 +1519,7 @@ static int proc_readfd_common(struct file * filp, void * dirent,
1437 struct dentry *dentry = filp->f_path.dentry; 1519 struct dentry *dentry = filp->f_path.dentry;
1438 struct inode *inode = dentry->d_inode; 1520 struct inode *inode = dentry->d_inode;
1439 struct task_struct *p = get_proc_task(inode); 1521 struct task_struct *p = get_proc_task(inode);
1440 unsigned int fd, tid, ino; 1522 unsigned int fd, ino;
1441 int retval; 1523 int retval;
1442 struct files_struct * files; 1524 struct files_struct * files;
1443 struct fdtable *fdt; 1525 struct fdtable *fdt;
@@ -1446,7 +1528,6 @@ static int proc_readfd_common(struct file * filp, void * dirent,
1446 if (!p) 1528 if (!p)
1447 goto out_no_task; 1529 goto out_no_task;
1448 retval = 0; 1530 retval = 0;
1449 tid = p->pid;
1450 1531
1451 fd = filp->f_pos; 1532 fd = filp->f_pos;
1452 switch (fd) { 1533 switch (fd) {
@@ -1681,7 +1762,6 @@ static int proc_pident_readdir(struct file *filp,
1681 const struct pid_entry *ents, unsigned int nents) 1762 const struct pid_entry *ents, unsigned int nents)
1682{ 1763{
1683 int i; 1764 int i;
1684 int pid;
1685 struct dentry *dentry = filp->f_path.dentry; 1765 struct dentry *dentry = filp->f_path.dentry;
1686 struct inode *inode = dentry->d_inode; 1766 struct inode *inode = dentry->d_inode;
1687 struct task_struct *task = get_proc_task(inode); 1767 struct task_struct *task = get_proc_task(inode);
@@ -1694,7 +1774,6 @@ static int proc_pident_readdir(struct file *filp,
1694 goto out_no_task; 1774 goto out_no_task;
1695 1775
1696 ret = 0; 1776 ret = 0;
1697 pid = task->pid;
1698 i = filp->f_pos; 1777 i = filp->f_pos;
1699 switch (i) { 1778 switch (i) {
1700 case 0: 1779 case 0:
@@ -1928,14 +2007,14 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
1928 int buflen) 2007 int buflen)
1929{ 2008{
1930 char tmp[PROC_NUMBUF]; 2009 char tmp[PROC_NUMBUF];
1931 sprintf(tmp, "%d", current->tgid); 2010 sprintf(tmp, "%d", task_tgid_vnr(current));
1932 return vfs_readlink(dentry,buffer,buflen,tmp); 2011 return vfs_readlink(dentry,buffer,buflen,tmp);
1933} 2012}
1934 2013
1935static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) 2014static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
1936{ 2015{
1937 char tmp[PROC_NUMBUF]; 2016 char tmp[PROC_NUMBUF];
1938 sprintf(tmp, "%d", current->tgid); 2017 sprintf(tmp, "%d", task_tgid_vnr(current));
1939 return ERR_PTR(vfs_follow_link(nd,tmp)); 2018 return ERR_PTR(vfs_follow_link(nd,tmp));
1940} 2019}
1941 2020
@@ -2101,6 +2180,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2101 REG("environ", S_IRUSR, environ), 2180 REG("environ", S_IRUSR, environ),
2102 INF("auxv", S_IRUSR, pid_auxv), 2181 INF("auxv", S_IRUSR, pid_auxv),
2103 INF("status", S_IRUGO, pid_status), 2182 INF("status", S_IRUGO, pid_status),
2183 INF("limits", S_IRUSR, pid_limits),
2104#ifdef CONFIG_SCHED_DEBUG 2184#ifdef CONFIG_SCHED_DEBUG
2105 REG("sched", S_IRUGO|S_IWUSR, pid_sched), 2185 REG("sched", S_IRUGO|S_IWUSR, pid_sched),
2106#endif 2186#endif
@@ -2130,9 +2210,12 @@ static const struct pid_entry tgid_base_stuff[] = {
2130#ifdef CONFIG_SCHEDSTATS 2210#ifdef CONFIG_SCHEDSTATS
2131 INF("schedstat", S_IRUGO, pid_schedstat), 2211 INF("schedstat", S_IRUGO, pid_schedstat),
2132#endif 2212#endif
2133#ifdef CONFIG_CPUSETS 2213#ifdef CONFIG_PROC_PID_CPUSET
2134 REG("cpuset", S_IRUGO, cpuset), 2214 REG("cpuset", S_IRUGO, cpuset),
2135#endif 2215#endif
2216#ifdef CONFIG_CGROUPS
2217 REG("cgroup", S_IRUGO, cgroup),
2218#endif
2136 INF("oom_score", S_IRUGO, oom_score), 2219 INF("oom_score", S_IRUGO, oom_score),
2137 REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust), 2220 REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust),
2138#ifdef CONFIG_AUDITSYSCALL 2221#ifdef CONFIG_AUDITSYSCALL
@@ -2193,27 +2276,27 @@ static const struct inode_operations proc_tgid_base_inode_operations = {
2193 * that no dcache entries will exist at process exit time it 2276 * that no dcache entries will exist at process exit time it
2194 * just makes it very unlikely that any will persist. 2277 * just makes it very unlikely that any will persist.
2195 */ 2278 */
2196void proc_flush_task(struct task_struct *task) 2279static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
2197{ 2280{
2198 struct dentry *dentry, *leader, *dir; 2281 struct dentry *dentry, *leader, *dir;
2199 char buf[PROC_NUMBUF]; 2282 char buf[PROC_NUMBUF];
2200 struct qstr name; 2283 struct qstr name;
2201 2284
2202 name.name = buf; 2285 name.name = buf;
2203 name.len = snprintf(buf, sizeof(buf), "%d", task->pid); 2286 name.len = snprintf(buf, sizeof(buf), "%d", pid);
2204 dentry = d_hash_and_lookup(proc_mnt->mnt_root, &name); 2287 dentry = d_hash_and_lookup(mnt->mnt_root, &name);
2205 if (dentry) { 2288 if (dentry) {
2206 shrink_dcache_parent(dentry); 2289 shrink_dcache_parent(dentry);
2207 d_drop(dentry); 2290 d_drop(dentry);
2208 dput(dentry); 2291 dput(dentry);
2209 } 2292 }
2210 2293
2211 if (thread_group_leader(task)) 2294 if (tgid == 0)
2212 goto out; 2295 goto out;
2213 2296
2214 name.name = buf; 2297 name.name = buf;
2215 name.len = snprintf(buf, sizeof(buf), "%d", task->tgid); 2298 name.len = snprintf(buf, sizeof(buf), "%d", tgid);
2216 leader = d_hash_and_lookup(proc_mnt->mnt_root, &name); 2299 leader = d_hash_and_lookup(mnt->mnt_root, &name);
2217 if (!leader) 2300 if (!leader)
2218 goto out; 2301 goto out;
2219 2302
@@ -2224,7 +2307,7 @@ void proc_flush_task(struct task_struct *task)
2224 goto out_put_leader; 2307 goto out_put_leader;
2225 2308
2226 name.name = buf; 2309 name.name = buf;
2227 name.len = snprintf(buf, sizeof(buf), "%d", task->pid); 2310 name.len = snprintf(buf, sizeof(buf), "%d", pid);
2228 dentry = d_hash_and_lookup(dir, &name); 2311 dentry = d_hash_and_lookup(dir, &name);
2229 if (dentry) { 2312 if (dentry) {
2230 shrink_dcache_parent(dentry); 2313 shrink_dcache_parent(dentry);
@@ -2239,6 +2322,36 @@ out:
2239 return; 2322 return;
2240} 2323}
2241 2324
2325/*
2326 * when flushing dentries from proc one need to flush them from global
2327 * proc (proc_mnt) and from all the namespaces' procs this task was seen
2328 * in. this call is supposed to make all this job.
2329 */
2330
2331void proc_flush_task(struct task_struct *task)
2332{
2333 int i, leader;
2334 struct pid *pid, *tgid;
2335 struct upid *upid;
2336
2337 leader = thread_group_leader(task);
2338 proc_flush_task_mnt(proc_mnt, task->pid, leader ? task->tgid : 0);
2339 pid = task_pid(task);
2340 if (pid->level == 0)
2341 return;
2342
2343 tgid = task_tgid(task);
2344 for (i = 1; i <= pid->level; i++) {
2345 upid = &pid->numbers[i];
2346 proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
2347 leader ? 0 : tgid->numbers[i].nr);
2348 }
2349
2350 upid = &pid->numbers[pid->level];
2351 if (upid->nr == 1)
2352 pid_ns_release_proc(upid->ns);
2353}
2354
2242static struct dentry *proc_pid_instantiate(struct inode *dir, 2355static struct dentry *proc_pid_instantiate(struct inode *dir,
2243 struct dentry * dentry, 2356 struct dentry * dentry,
2244 struct task_struct *task, const void *ptr) 2357 struct task_struct *task, const void *ptr)
@@ -2274,6 +2387,7 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
2274 struct dentry *result = ERR_PTR(-ENOENT); 2387 struct dentry *result = ERR_PTR(-ENOENT);
2275 struct task_struct *task; 2388 struct task_struct *task;
2276 unsigned tgid; 2389 unsigned tgid;
2390 struct pid_namespace *ns;
2277 2391
2278 result = proc_base_lookup(dir, dentry); 2392 result = proc_base_lookup(dir, dentry);
2279 if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT) 2393 if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT)
@@ -2283,8 +2397,9 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
2283 if (tgid == ~0U) 2397 if (tgid == ~0U)
2284 goto out; 2398 goto out;
2285 2399
2400 ns = dentry->d_sb->s_fs_info;
2286 rcu_read_lock(); 2401 rcu_read_lock();
2287 task = find_task_by_pid(tgid); 2402 task = find_task_by_pid_ns(tgid, ns);
2288 if (task) 2403 if (task)
2289 get_task_struct(task); 2404 get_task_struct(task);
2290 rcu_read_unlock(); 2405 rcu_read_unlock();
@@ -2301,7 +2416,8 @@ out:
2301 * Find the first task with tgid >= tgid 2416 * Find the first task with tgid >= tgid
2302 * 2417 *
2303 */ 2418 */
2304static struct task_struct *next_tgid(unsigned int tgid) 2419static struct task_struct *next_tgid(unsigned int tgid,
2420 struct pid_namespace *ns)
2305{ 2421{
2306 struct task_struct *task; 2422 struct task_struct *task;
2307 struct pid *pid; 2423 struct pid *pid;
@@ -2309,9 +2425,9 @@ static struct task_struct *next_tgid(unsigned int tgid)
2309 rcu_read_lock(); 2425 rcu_read_lock();
2310retry: 2426retry:
2311 task = NULL; 2427 task = NULL;
2312 pid = find_ge_pid(tgid); 2428 pid = find_ge_pid(tgid, ns);
2313 if (pid) { 2429 if (pid) {
2314 tgid = pid->nr + 1; 2430 tgid = pid_nr_ns(pid, ns) + 1;
2315 task = pid_task(pid, PIDTYPE_PID); 2431 task = pid_task(pid, PIDTYPE_PID);
2316 /* What we to know is if the pid we have find is the 2432 /* What we to know is if the pid we have find is the
2317 * pid of a thread_group_leader. Testing for task 2433 * pid of a thread_group_leader. Testing for task
@@ -2351,6 +2467,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
2351 struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode); 2467 struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode);
2352 struct task_struct *task; 2468 struct task_struct *task;
2353 int tgid; 2469 int tgid;
2470 struct pid_namespace *ns;
2354 2471
2355 if (!reaper) 2472 if (!reaper)
2356 goto out_no_task; 2473 goto out_no_task;
@@ -2361,11 +2478,12 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
2361 goto out; 2478 goto out;
2362 } 2479 }
2363 2480
2481 ns = filp->f_dentry->d_sb->s_fs_info;
2364 tgid = filp->f_pos - TGID_OFFSET; 2482 tgid = filp->f_pos - TGID_OFFSET;
2365 for (task = next_tgid(tgid); 2483 for (task = next_tgid(tgid, ns);
2366 task; 2484 task;
2367 put_task_struct(task), task = next_tgid(tgid + 1)) { 2485 put_task_struct(task), task = next_tgid(tgid + 1, ns)) {
2368 tgid = task->pid; 2486 tgid = task_pid_nr_ns(task, ns);
2369 filp->f_pos = tgid + TGID_OFFSET; 2487 filp->f_pos = tgid + TGID_OFFSET;
2370 if (proc_pid_fill_cache(filp, dirent, filldir, task, tgid) < 0) { 2488 if (proc_pid_fill_cache(filp, dirent, filldir, task, tgid) < 0) {
2371 put_task_struct(task); 2489 put_task_struct(task);
@@ -2388,6 +2506,7 @@ static const struct pid_entry tid_base_stuff[] = {
2388 REG("environ", S_IRUSR, environ), 2506 REG("environ", S_IRUSR, environ),
2389 INF("auxv", S_IRUSR, pid_auxv), 2507 INF("auxv", S_IRUSR, pid_auxv),
2390 INF("status", S_IRUGO, pid_status), 2508 INF("status", S_IRUGO, pid_status),
2509 INF("limits", S_IRUSR, pid_limits),
2391#ifdef CONFIG_SCHED_DEBUG 2510#ifdef CONFIG_SCHED_DEBUG
2392 REG("sched", S_IRUGO|S_IWUSR, pid_sched), 2511 REG("sched", S_IRUGO|S_IWUSR, pid_sched),
2393#endif 2512#endif
@@ -2416,9 +2535,12 @@ static const struct pid_entry tid_base_stuff[] = {
2416#ifdef CONFIG_SCHEDSTATS 2535#ifdef CONFIG_SCHEDSTATS
2417 INF("schedstat", S_IRUGO, pid_schedstat), 2536 INF("schedstat", S_IRUGO, pid_schedstat),
2418#endif 2537#endif
2419#ifdef CONFIG_CPUSETS 2538#ifdef CONFIG_PROC_PID_CPUSET
2420 REG("cpuset", S_IRUGO, cpuset), 2539 REG("cpuset", S_IRUGO, cpuset),
2421#endif 2540#endif
2541#ifdef CONFIG_CGROUPS
2542 REG("cgroup", S_IRUGO, cgroup),
2543#endif
2422 INF("oom_score", S_IRUGO, oom_score), 2544 INF("oom_score", S_IRUGO, oom_score),
2423 REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust), 2545 REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust),
2424#ifdef CONFIG_AUDITSYSCALL 2546#ifdef CONFIG_AUDITSYSCALL
@@ -2486,6 +2608,7 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
2486 struct task_struct *task; 2608 struct task_struct *task;
2487 struct task_struct *leader = get_proc_task(dir); 2609 struct task_struct *leader = get_proc_task(dir);
2488 unsigned tid; 2610 unsigned tid;
2611 struct pid_namespace *ns;
2489 2612
2490 if (!leader) 2613 if (!leader)
2491 goto out_no_task; 2614 goto out_no_task;
@@ -2494,14 +2617,15 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
2494 if (tid == ~0U) 2617 if (tid == ~0U)
2495 goto out; 2618 goto out;
2496 2619
2620 ns = dentry->d_sb->s_fs_info;
2497 rcu_read_lock(); 2621 rcu_read_lock();
2498 task = find_task_by_pid(tid); 2622 task = find_task_by_pid_ns(tid, ns);
2499 if (task) 2623 if (task)
2500 get_task_struct(task); 2624 get_task_struct(task);
2501 rcu_read_unlock(); 2625 rcu_read_unlock();
2502 if (!task) 2626 if (!task)
2503 goto out; 2627 goto out;
2504 if (leader->tgid != task->tgid) 2628 if (!same_thread_group(leader, task))
2505 goto out_drop_task; 2629 goto out_drop_task;
2506 2630
2507 result = proc_task_instantiate(dir, dentry, task, NULL); 2631 result = proc_task_instantiate(dir, dentry, task, NULL);
@@ -2526,14 +2650,14 @@ out_no_task:
2526 * threads past it. 2650 * threads past it.
2527 */ 2651 */
2528static struct task_struct *first_tid(struct task_struct *leader, 2652static struct task_struct *first_tid(struct task_struct *leader,
2529 int tid, int nr) 2653 int tid, int nr, struct pid_namespace *ns)
2530{ 2654{
2531 struct task_struct *pos; 2655 struct task_struct *pos;
2532 2656
2533 rcu_read_lock(); 2657 rcu_read_lock();
2534 /* Attempt to start with the pid of a thread */ 2658 /* Attempt to start with the pid of a thread */
2535 if (tid && (nr > 0)) { 2659 if (tid && (nr > 0)) {
2536 pos = find_task_by_pid(tid); 2660 pos = find_task_by_pid_ns(tid, ns);
2537 if (pos && (pos->group_leader == leader)) 2661 if (pos && (pos->group_leader == leader))
2538 goto found; 2662 goto found;
2539 } 2663 }
@@ -2602,6 +2726,7 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
2602 ino_t ino; 2726 ino_t ino;
2603 int tid; 2727 int tid;
2604 unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ 2728 unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */
2729 struct pid_namespace *ns;
2605 2730
2606 task = get_proc_task(inode); 2731 task = get_proc_task(inode);
2607 if (!task) 2732 if (!task)
@@ -2635,12 +2760,13 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
2635 /* f_version caches the tgid value that the last readdir call couldn't 2760 /* f_version caches the tgid value that the last readdir call couldn't
2636 * return. lseek aka telldir automagically resets f_version to 0. 2761 * return. lseek aka telldir automagically resets f_version to 0.
2637 */ 2762 */
2763 ns = filp->f_dentry->d_sb->s_fs_info;
2638 tid = (int)filp->f_version; 2764 tid = (int)filp->f_version;
2639 filp->f_version = 0; 2765 filp->f_version = 0;
2640 for (task = first_tid(leader, tid, pos - 2); 2766 for (task = first_tid(leader, tid, pos - 2, ns);
2641 task; 2767 task;
2642 task = next_tid(task), pos++) { 2768 task = next_tid(task), pos++) {
2643 tid = task->pid; 2769 tid = task_pid_nr_ns(task, ns);
2644 if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) { 2770 if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) {
2645 /* returning this tgid failed, save it as the first 2771 /* returning this tgid failed, save it as the first
2646 * pid for the next readir call */ 2772 * pid for the next readir call */
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 99ca00485fc3..abe6a3f04368 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -448,7 +448,7 @@ out_mod:
448 return NULL; 448 return NULL;
449} 449}
450 450
451int proc_fill_super(struct super_block *s, void *data, int silent) 451int proc_fill_super(struct super_block *s)
452{ 452{
453 struct inode * root_inode; 453 struct inode * root_inode;
454 454
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index d6dc72c78bc1..e0d064e9764e 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -91,7 +91,8 @@ static int loadavg_read_proc(char *page, char **start, off_t off,
91 LOAD_INT(a), LOAD_FRAC(a), 91 LOAD_INT(a), LOAD_FRAC(a),
92 LOAD_INT(b), LOAD_FRAC(b), 92 LOAD_INT(b), LOAD_FRAC(b),
93 LOAD_INT(c), LOAD_FRAC(c), 93 LOAD_INT(c), LOAD_FRAC(c),
94 nr_running(), nr_threads, current->nsproxy->pid_ns->last_pid); 94 nr_running(), nr_threads,
95 task_active_pid_ns(current)->last_pid);
95 return proc_calc_metrics(page, start, off, count, eof, len); 96 return proc_calc_metrics(page, start, off, count, eof, len);
96} 97}
97 98
diff --git a/fs/proc/root.c b/fs/proc/root.c
index cf3046638b09..ec9cb3b6c93b 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -18,32 +18,90 @@
18#include <linux/bitops.h> 18#include <linux/bitops.h>
19#include <linux/smp_lock.h> 19#include <linux/smp_lock.h>
20#include <linux/mount.h> 20#include <linux/mount.h>
21#include <linux/pid_namespace.h>
21 22
22#include "internal.h" 23#include "internal.h"
23 24
24struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver; 25struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver;
25 26
27static int proc_test_super(struct super_block *sb, void *data)
28{
29 return sb->s_fs_info == data;
30}
31
32static int proc_set_super(struct super_block *sb, void *data)
33{
34 struct pid_namespace *ns;
35
36 ns = (struct pid_namespace *)data;
37 sb->s_fs_info = get_pid_ns(ns);
38 return set_anon_super(sb, NULL);
39}
40
26static int proc_get_sb(struct file_system_type *fs_type, 41static int proc_get_sb(struct file_system_type *fs_type,
27 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 42 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
28{ 43{
44 int err;
45 struct super_block *sb;
46 struct pid_namespace *ns;
47 struct proc_inode *ei;
48
29 if (proc_mnt) { 49 if (proc_mnt) {
30 /* Seed the root directory with a pid so it doesn't need 50 /* Seed the root directory with a pid so it doesn't need
31 * to be special in base.c. I would do this earlier but 51 * to be special in base.c. I would do this earlier but
32 * the only task alive when /proc is mounted the first time 52 * the only task alive when /proc is mounted the first time
33 * is the init_task and it doesn't have any pids. 53 * is the init_task and it doesn't have any pids.
34 */ 54 */
35 struct proc_inode *ei;
36 ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode); 55 ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode);
37 if (!ei->pid) 56 if (!ei->pid)
38 ei->pid = find_get_pid(1); 57 ei->pid = find_get_pid(1);
39 } 58 }
40 return get_sb_single(fs_type, flags, data, proc_fill_super, mnt); 59
60 if (flags & MS_KERNMOUNT)
61 ns = (struct pid_namespace *)data;
62 else
63 ns = current->nsproxy->pid_ns;
64
65 sb = sget(fs_type, proc_test_super, proc_set_super, ns);
66 if (IS_ERR(sb))
67 return PTR_ERR(sb);
68
69 if (!sb->s_root) {
70 sb->s_flags = flags;
71 err = proc_fill_super(sb);
72 if (err) {
73 up_write(&sb->s_umount);
74 deactivate_super(sb);
75 return err;
76 }
77
78 ei = PROC_I(sb->s_root->d_inode);
79 if (!ei->pid) {
80 rcu_read_lock();
81 ei->pid = get_pid(find_pid_ns(1, ns));
82 rcu_read_unlock();
83 }
84
85 sb->s_flags |= MS_ACTIVE;
86 ns->proc_mnt = mnt;
87 }
88
89 return simple_set_mnt(mnt, sb);
90}
91
92static void proc_kill_sb(struct super_block *sb)
93{
94 struct pid_namespace *ns;
95
96 ns = (struct pid_namespace *)sb->s_fs_info;
97 kill_anon_super(sb);
98 put_pid_ns(ns);
41} 99}
42 100
43static struct file_system_type proc_fs_type = { 101static struct file_system_type proc_fs_type = {
44 .name = "proc", 102 .name = "proc",
45 .get_sb = proc_get_sb, 103 .get_sb = proc_get_sb,
46 .kill_sb = kill_anon_super, 104 .kill_sb = proc_kill_sb,
47}; 105};
48 106
49void __init proc_root_init(void) 107void __init proc_root_init(void)
@@ -54,12 +112,13 @@ void __init proc_root_init(void)
54 err = register_filesystem(&proc_fs_type); 112 err = register_filesystem(&proc_fs_type);
55 if (err) 113 if (err)
56 return; 114 return;
57 proc_mnt = kern_mount(&proc_fs_type); 115 proc_mnt = kern_mount_data(&proc_fs_type, &init_pid_ns);
58 err = PTR_ERR(proc_mnt); 116 err = PTR_ERR(proc_mnt);
59 if (IS_ERR(proc_mnt)) { 117 if (IS_ERR(proc_mnt)) {
60 unregister_filesystem(&proc_fs_type); 118 unregister_filesystem(&proc_fs_type);
61 return; 119 return;
62 } 120 }
121
63 proc_misc_init(); 122 proc_misc_init();
64 123
65 proc_net_init(); 124 proc_net_init();
@@ -153,6 +212,22 @@ struct proc_dir_entry proc_root = {
153 .parent = &proc_root, 212 .parent = &proc_root,
154}; 213};
155 214
215int pid_ns_prepare_proc(struct pid_namespace *ns)
216{
217 struct vfsmount *mnt;
218
219 mnt = kern_mount_data(&proc_fs_type, ns);
220 if (IS_ERR(mnt))
221 return PTR_ERR(mnt);
222
223 return 0;
224}
225
226void pid_ns_release_proc(struct pid_namespace *ns)
227{
228 mntput(ns->proc_mnt);
229}
230
156EXPORT_SYMBOL(proc_symlink); 231EXPORT_SYMBOL(proc_symlink);
157EXPORT_SYMBOL(proc_mkdir); 232EXPORT_SYMBOL(proc_mkdir);
158EXPORT_SYMBOL(create_proc_entry); 233EXPORT_SYMBOL(create_proc_entry);
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 2a5dd34649b3..16b331dd9913 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -47,7 +47,9 @@
47 test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)) 47 test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s))
48 48
49static inline void get_bit_address(struct super_block *s, 49static inline void get_bit_address(struct super_block *s,
50 b_blocknr_t block, int *bmap_nr, int *offset) 50 b_blocknr_t block,
51 unsigned int *bmap_nr,
52 unsigned int *offset)
51{ 53{
52 /* It is in the bitmap block number equal to the block 54 /* It is in the bitmap block number equal to the block
53 * number divided by the number of bits in a block. */ 55 * number divided by the number of bits in a block. */
@@ -56,10 +58,10 @@ static inline void get_bit_address(struct super_block *s,
56 *offset = block & ((s->s_blocksize << 3) - 1); 58 *offset = block & ((s->s_blocksize << 3) - 1);
57} 59}
58 60
59#ifdef CONFIG_REISERFS_CHECK
60int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value) 61int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
61{ 62{
62 int bmap, offset; 63 unsigned int bmap, offset;
64 unsigned int bmap_count = reiserfs_bmap_count(s);
63 65
64 if (block == 0 || block >= SB_BLOCK_COUNT(s)) { 66 if (block == 0 || block >= SB_BLOCK_COUNT(s)) {
65 reiserfs_warning(s, 67 reiserfs_warning(s,
@@ -75,25 +77,26 @@ int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
75 if (unlikely(test_bit(REISERFS_OLD_FORMAT, 77 if (unlikely(test_bit(REISERFS_OLD_FORMAT,
76 &(REISERFS_SB(s)->s_properties)))) { 78 &(REISERFS_SB(s)->s_properties)))) {
77 b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1; 79 b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1;
78 if (block >= bmap1 && block <= bmap1 + SB_BMAP_NR(s)) { 80 if (block >= bmap1 &&
81 block <= bmap1 + bmap_count) {
79 reiserfs_warning(s, "vs: 4019: is_reusable: " 82 reiserfs_warning(s, "vs: 4019: is_reusable: "
80 "bitmap block %lu(%u) can't be freed or reused", 83 "bitmap block %lu(%u) can't be freed or reused",
81 block, SB_BMAP_NR(s)); 84 block, bmap_count);
82 return 0; 85 return 0;
83 } 86 }
84 } else { 87 } else {
85 if (offset == 0) { 88 if (offset == 0) {
86 reiserfs_warning(s, "vs: 4020: is_reusable: " 89 reiserfs_warning(s, "vs: 4020: is_reusable: "
87 "bitmap block %lu(%u) can't be freed or reused", 90 "bitmap block %lu(%u) can't be freed or reused",
88 block, SB_BMAP_NR(s)); 91 block, bmap_count);
89 return 0; 92 return 0;
90 } 93 }
91 } 94 }
92 95
93 if (bmap >= SB_BMAP_NR(s)) { 96 if (bmap >= bmap_count) {
94 reiserfs_warning(s, 97 reiserfs_warning(s,
95 "vs-4030: is_reusable: there is no so many bitmap blocks: " 98 "vs-4030: is_reusable: there is no so many bitmap blocks: "
96 "block=%lu, bitmap_nr=%d", block, bmap); 99 "block=%lu, bitmap_nr=%u", block, bmap);
97 return 0; 100 return 0;
98 } 101 }
99 102
@@ -106,12 +109,11 @@ int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
106 109
107 return 1; 110 return 1;
108} 111}
109#endif /* CONFIG_REISERFS_CHECK */
110 112
111/* searches in journal structures for a given block number (bmap, off). If block 113/* searches in journal structures for a given block number (bmap, off). If block
112 is found in reiserfs journal it suggests next free block candidate to test. */ 114 is found in reiserfs journal it suggests next free block candidate to test. */
113static inline int is_block_in_journal(struct super_block *s, int bmap, int 115static inline int is_block_in_journal(struct super_block *s, unsigned int bmap,
114 off, int *next) 116 int off, int *next)
115{ 117{
116 b_blocknr_t tmp; 118 b_blocknr_t tmp;
117 119
@@ -132,8 +134,8 @@ static inline int is_block_in_journal(struct super_block *s, int bmap, int
132/* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap 134/* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap
133 * block; */ 135 * block; */
134static int scan_bitmap_block(struct reiserfs_transaction_handle *th, 136static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
135 int bmap_n, int *beg, int boundary, int min, 137 unsigned int bmap_n, int *beg, int boundary,
136 int max, int unfm) 138 int min, int max, int unfm)
137{ 139{
138 struct super_block *s = th->t_super; 140 struct super_block *s = th->t_super;
139 struct reiserfs_bitmap_info *bi = &SB_AP_BITMAP(s)[bmap_n]; 141 struct reiserfs_bitmap_info *bi = &SB_AP_BITMAP(s)[bmap_n];
@@ -143,8 +145,8 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
143 145
144 BUG_ON(!th->t_trans_id); 146 BUG_ON(!th->t_trans_id);
145 147
146 RFALSE(bmap_n >= SB_BMAP_NR(s), "Bitmap %d is out of range (0..%d)", 148 RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of "
147 bmap_n, SB_BMAP_NR(s) - 1); 149 "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1);
148 PROC_INFO_INC(s, scan_bitmap.bmap); 150 PROC_INFO_INC(s, scan_bitmap.bmap);
149/* this is unclear and lacks comments, explain how journal bitmaps 151/* this is unclear and lacks comments, explain how journal bitmaps
150 work here for the reader. Convey a sense of the design here. What 152 work here for the reader. Convey a sense of the design here. What
@@ -249,12 +251,12 @@ static int bmap_hash_id(struct super_block *s, u32 id)
249 } else { 251 } else {
250 hash_in = (char *)(&id); 252 hash_in = (char *)(&id);
251 hash = keyed_hash(hash_in, 4); 253 hash = keyed_hash(hash_in, 4);
252 bm = hash % SB_BMAP_NR(s); 254 bm = hash % reiserfs_bmap_count(s);
253 if (!bm) 255 if (!bm)
254 bm = 1; 256 bm = 1;
255 } 257 }
256 /* this can only be true when SB_BMAP_NR = 1 */ 258 /* this can only be true when SB_BMAP_NR = 1 */
257 if (bm >= SB_BMAP_NR(s)) 259 if (bm >= reiserfs_bmap_count(s))
258 bm = 0; 260 bm = 0;
259 return bm; 261 return bm;
260} 262}
@@ -273,7 +275,7 @@ static inline int block_group_used(struct super_block *s, u32 id)
273 * to make a better decision. This favors long-term performace gain 275 * to make a better decision. This favors long-term performace gain
274 * with a better on-disk layout vs. a short term gain of skipping the 276 * with a better on-disk layout vs. a short term gain of skipping the
275 * read and potentially having a bad placement. */ 277 * read and potentially having a bad placement. */
276 if (info->first_zero_hint == 0) { 278 if (info->free_count == UINT_MAX) {
277 struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm); 279 struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm);
278 brelse(bh); 280 brelse(bh);
279 } 281 }
@@ -309,16 +311,16 @@ __le32 reiserfs_choose_packing(struct inode * dir)
309 * bitmap and place new blocks there. Returns number of allocated blocks. */ 311 * bitmap and place new blocks there. Returns number of allocated blocks. */
310static int scan_bitmap(struct reiserfs_transaction_handle *th, 312static int scan_bitmap(struct reiserfs_transaction_handle *th,
311 b_blocknr_t * start, b_blocknr_t finish, 313 b_blocknr_t * start, b_blocknr_t finish,
312 int min, int max, int unfm, unsigned long file_block) 314 int min, int max, int unfm, sector_t file_block)
313{ 315{
314 int nr_allocated = 0; 316 int nr_allocated = 0;
315 struct super_block *s = th->t_super; 317 struct super_block *s = th->t_super;
316 /* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr 318 /* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr
317 * - Hans, it is not a block number - Zam. */ 319 * - Hans, it is not a block number - Zam. */
318 320
319 int bm, off; 321 unsigned int bm, off;
320 int end_bm, end_off; 322 unsigned int end_bm, end_off;
321 int off_max = s->s_blocksize << 3; 323 unsigned int off_max = s->s_blocksize << 3;
322 324
323 BUG_ON(!th->t_trans_id); 325 BUG_ON(!th->t_trans_id);
324 326
@@ -328,10 +330,10 @@ static int scan_bitmap(struct reiserfs_transaction_handle *th,
328 330
329 get_bit_address(s, *start, &bm, &off); 331 get_bit_address(s, *start, &bm, &off);
330 get_bit_address(s, finish, &end_bm, &end_off); 332 get_bit_address(s, finish, &end_bm, &end_off);
331 if (bm > SB_BMAP_NR(s)) 333 if (bm > reiserfs_bmap_count(s))
332 return 0; 334 return 0;
333 if (end_bm > SB_BMAP_NR(s)) 335 if (end_bm > reiserfs_bmap_count(s))
334 end_bm = SB_BMAP_NR(s); 336 end_bm = reiserfs_bmap_count(s);
335 337
336 /* When the bitmap is more than 10% free, anyone can allocate. 338 /* When the bitmap is more than 10% free, anyone can allocate.
337 * When it's less than 10% free, only files that already use the 339 * When it's less than 10% free, only files that already use the
@@ -385,7 +387,7 @@ static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
385 struct reiserfs_super_block *rs; 387 struct reiserfs_super_block *rs;
386 struct buffer_head *sbh, *bmbh; 388 struct buffer_head *sbh, *bmbh;
387 struct reiserfs_bitmap_info *apbi; 389 struct reiserfs_bitmap_info *apbi;
388 int nr, offset; 390 unsigned int nr, offset;
389 391
390 BUG_ON(!th->t_trans_id); 392 BUG_ON(!th->t_trans_id);
391 393
@@ -397,10 +399,12 @@ static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
397 399
398 get_bit_address(s, block, &nr, &offset); 400 get_bit_address(s, block, &nr, &offset);
399 401
400 if (nr >= sb_bmap_nr(rs)) { 402 if (nr >= reiserfs_bmap_count(s)) {
401 reiserfs_warning(s, "vs-4075: reiserfs_free_block: " 403 reiserfs_warning(s, "vs-4075: reiserfs_free_block: "
402 "block %lu is out of range on %s", 404 "block %lu is out of range on %s "
403 block, reiserfs_bdevname(s)); 405 "(nr=%u,max=%u)", block,
406 reiserfs_bdevname(s), nr,
407 reiserfs_bmap_count(s));
404 return; 408 return;
405 } 409 }
406 410
@@ -434,12 +438,19 @@ void reiserfs_free_block(struct reiserfs_transaction_handle *th,
434 int for_unformatted) 438 int for_unformatted)
435{ 439{
436 struct super_block *s = th->t_super; 440 struct super_block *s = th->t_super;
437
438 BUG_ON(!th->t_trans_id); 441 BUG_ON(!th->t_trans_id);
439 442
440 RFALSE(!s, "vs-4061: trying to free block on nonexistent device"); 443 RFALSE(!s, "vs-4061: trying to free block on nonexistent device");
441 RFALSE(is_reusable(s, block, 1) == 0, 444 if (!is_reusable(s, block, 1))
442 "vs-4071: can not free such block"); 445 return;
446
447 if (block > sb_block_count(REISERFS_SB(s)->s_rs)) {
448 reiserfs_panic(th->t_super, "bitmap-4072",
449 "Trying to free block outside file system "
450 "boundaries (%lu > %lu)",
451 block, sb_block_count(REISERFS_SB(s)->s_rs));
452 return;
453 }
443 /* mark it before we clear it, just in case */ 454 /* mark it before we clear it, just in case */
444 journal_mark_freed(th, s, block); 455 journal_mark_freed(th, s, block);
445 _reiserfs_free_block(th, inode, block, for_unformatted); 456 _reiserfs_free_block(th, inode, block, for_unformatted);
@@ -449,11 +460,11 @@ void reiserfs_free_block(struct reiserfs_transaction_handle *th,
449static void reiserfs_free_prealloc_block(struct reiserfs_transaction_handle *th, 460static void reiserfs_free_prealloc_block(struct reiserfs_transaction_handle *th,
450 struct inode *inode, b_blocknr_t block) 461 struct inode *inode, b_blocknr_t block)
451{ 462{
463 BUG_ON(!th->t_trans_id);
452 RFALSE(!th->t_super, 464 RFALSE(!th->t_super,
453 "vs-4060: trying to free block on nonexistent device"); 465 "vs-4060: trying to free block on nonexistent device");
454 RFALSE(is_reusable(th->t_super, block, 1) == 0, 466 if (!is_reusable(th->t_super, block, 1))
455 "vs-4070: can not free such block"); 467 return;
456 BUG_ON(!th->t_trans_id);
457 _reiserfs_free_block(th, inode, block, 1); 468 _reiserfs_free_block(th, inode, block, 1);
458} 469}
459 470
@@ -1207,27 +1218,22 @@ void reiserfs_cache_bitmap_metadata(struct super_block *sb,
1207{ 1218{
1208 unsigned long *cur = (unsigned long *)(bh->b_data + bh->b_size); 1219 unsigned long *cur = (unsigned long *)(bh->b_data + bh->b_size);
1209 1220
1210 info->first_zero_hint = 1 << (sb->s_blocksize_bits + 3); 1221 /* The first bit must ALWAYS be 1 */
1222 BUG_ON(!reiserfs_test_le_bit(0, (unsigned long *)bh->b_data));
1223
1224 info->free_count = 0;
1211 1225
1212 while (--cur >= (unsigned long *)bh->b_data) { 1226 while (--cur >= (unsigned long *)bh->b_data) {
1213 int base = ((char *)cur - bh->b_data) << 3; 1227 int i;
1214 1228
1215 /* 0 and ~0 are special, we can optimize for them */ 1229 /* 0 and ~0 are special, we can optimize for them */
1216 if (*cur == 0) { 1230 if (*cur == 0)
1217 info->first_zero_hint = base;
1218 info->free_count += BITS_PER_LONG; 1231 info->free_count += BITS_PER_LONG;
1219 } else if (*cur != ~0L) { /* A mix, investigate */ 1232 else if (*cur != ~0L) /* A mix, investigate */
1220 int b; 1233 for (i = BITS_PER_LONG - 1; i >= 0; i--)
1221 for (b = BITS_PER_LONG - 1; b >= 0; b--) { 1234 if (!reiserfs_test_le_bit(i, cur))
1222 if (!reiserfs_test_le_bit(b, cur)) {
1223 info->first_zero_hint = base + b;
1224 info->free_count++; 1235 info->free_count++;
1225 }
1226 }
1227 }
1228 } 1236 }
1229 /* The first bit must ALWAYS be 1 */
1230 BUG_ON(info->first_zero_hint == 0);
1231} 1237}
1232 1238
1233struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, 1239struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
@@ -1257,7 +1263,7 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
1257 BUG_ON(!buffer_uptodate(bh)); 1263 BUG_ON(!buffer_uptodate(bh));
1258 BUG_ON(atomic_read(&bh->b_count) == 0); 1264 BUG_ON(atomic_read(&bh->b_count) == 0);
1259 1265
1260 if (info->first_zero_hint == 0) 1266 if (info->free_count == UINT_MAX)
1261 reiserfs_cache_bitmap_metadata(sb, bh, info); 1267 reiserfs_cache_bitmap_metadata(sb, bh, info);
1262 } 1268 }
1263 1269
@@ -1267,12 +1273,13 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
1267int reiserfs_init_bitmap_cache(struct super_block *sb) 1273int reiserfs_init_bitmap_cache(struct super_block *sb)
1268{ 1274{
1269 struct reiserfs_bitmap_info *bitmap; 1275 struct reiserfs_bitmap_info *bitmap;
1276 unsigned int bmap_nr = reiserfs_bmap_count(sb);
1270 1277
1271 bitmap = vmalloc(sizeof (*bitmap) * SB_BMAP_NR(sb)); 1278 bitmap = vmalloc(sizeof(*bitmap) * bmap_nr);
1272 if (bitmap == NULL) 1279 if (bitmap == NULL)
1273 return -ENOMEM; 1280 return -ENOMEM;
1274 1281
1275 memset(bitmap, 0, sizeof (*bitmap) * SB_BMAP_NR(sb)); 1282 memset(bitmap, 0xff, sizeof(*bitmap) * bmap_nr);
1276 1283
1277 SB_AP_BITMAP(sb) = bitmap; 1284 SB_AP_BITMAP(sb) = bitmap;
1278 1285
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 0804289d355d..a991af96f3f0 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -199,7 +199,7 @@ static inline void set_block_dev_mapped(struct buffer_head *bh,
199// files which were created in the earlier version can not be longer, 199// files which were created in the earlier version can not be longer,
200// than 2 gb 200// than 2 gb
201// 201//
202static int file_capable(struct inode *inode, long block) 202static int file_capable(struct inode *inode, sector_t block)
203{ 203{
204 if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 || // it is new file. 204 if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 || // it is new file.
205 block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb 205 block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb
@@ -242,7 +242,7 @@ static int restart_transaction(struct reiserfs_transaction_handle *th,
242// Please improve the english/clarity in the comment above, as it is 242// Please improve the english/clarity in the comment above, as it is
243// hard to understand. 243// hard to understand.
244 244
245static int _get_block_create_0(struct inode *inode, long block, 245static int _get_block_create_0(struct inode *inode, sector_t block,
246 struct buffer_head *bh_result, int args) 246 struct buffer_head *bh_result, int args)
247{ 247{
248 INITIALIZE_PATH(path); 248 INITIALIZE_PATH(path);
@@ -250,7 +250,7 @@ static int _get_block_create_0(struct inode *inode, long block,
250 struct buffer_head *bh; 250 struct buffer_head *bh;
251 struct item_head *ih, tmp_ih; 251 struct item_head *ih, tmp_ih;
252 int fs_gen; 252 int fs_gen;
253 int blocknr; 253 b_blocknr_t blocknr;
254 char *p = NULL; 254 char *p = NULL;
255 int chars; 255 int chars;
256 int ret; 256 int ret;
@@ -569,7 +569,7 @@ static int convert_tail_for_hole(struct inode *inode,
569} 569}
570 570
571static inline int _allocate_block(struct reiserfs_transaction_handle *th, 571static inline int _allocate_block(struct reiserfs_transaction_handle *th,
572 long block, 572 sector_t block,
573 struct inode *inode, 573 struct inode *inode,
574 b_blocknr_t * allocated_block_nr, 574 b_blocknr_t * allocated_block_nr,
575 struct treepath *path, int flags) 575 struct treepath *path, int flags)
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 4cad9e75ef56..bb05a3e51b93 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -219,11 +219,12 @@ static void allocate_bitmap_nodes(struct super_block *p_s_sb)
219 } 219 }
220} 220}
221 221
222static int set_bit_in_list_bitmap(struct super_block *p_s_sb, int block, 222static int set_bit_in_list_bitmap(struct super_block *p_s_sb,
223 b_blocknr_t block,
223 struct reiserfs_list_bitmap *jb) 224 struct reiserfs_list_bitmap *jb)
224{ 225{
225 int bmap_nr = block / (p_s_sb->s_blocksize << 3); 226 unsigned int bmap_nr = block / (p_s_sb->s_blocksize << 3);
226 int bit_nr = block % (p_s_sb->s_blocksize << 3); 227 unsigned int bit_nr = block % (p_s_sb->s_blocksize << 3);
227 228
228 if (!jb->bitmaps[bmap_nr]) { 229 if (!jb->bitmaps[bmap_nr]) {
229 jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb); 230 jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb);
@@ -239,7 +240,7 @@ static void cleanup_bitmap_list(struct super_block *p_s_sb,
239 if (jb->bitmaps == NULL) 240 if (jb->bitmaps == NULL)
240 return; 241 return;
241 242
242 for (i = 0; i < SB_BMAP_NR(p_s_sb); i++) { 243 for (i = 0; i < reiserfs_bmap_count(p_s_sb); i++) {
243 if (jb->bitmaps[i]) { 244 if (jb->bitmaps[i]) {
244 free_bitmap_node(p_s_sb, jb->bitmaps[i]); 245 free_bitmap_node(p_s_sb, jb->bitmaps[i]);
245 jb->bitmaps[i] = NULL; 246 jb->bitmaps[i] = NULL;
@@ -289,7 +290,7 @@ static int free_bitmap_nodes(struct super_block *p_s_sb)
289*/ 290*/
290int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb, 291int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb,
291 struct reiserfs_list_bitmap *jb_array, 292 struct reiserfs_list_bitmap *jb_array,
292 int bmap_nr) 293 unsigned int bmap_nr)
293{ 294{
294 int i; 295 int i;
295 int failed = 0; 296 int failed = 0;
@@ -483,7 +484,7 @@ static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
483** 484**
484*/ 485*/
485int reiserfs_in_journal(struct super_block *p_s_sb, 486int reiserfs_in_journal(struct super_block *p_s_sb,
486 int bmap_nr, int bit_nr, int search_all, 487 unsigned int bmap_nr, int bit_nr, int search_all,
487 b_blocknr_t * next_zero_bit) 488 b_blocknr_t * next_zero_bit)
488{ 489{
489 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 490 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
@@ -1013,7 +1014,7 @@ static int flush_commit_list(struct super_block *s,
1013 struct reiserfs_journal_list *jl, int flushall) 1014 struct reiserfs_journal_list *jl, int flushall)
1014{ 1015{
1015 int i; 1016 int i;
1016 int bn; 1017 b_blocknr_t bn;
1017 struct buffer_head *tbh = NULL; 1018 struct buffer_head *tbh = NULL;
1018 unsigned long trans_id = jl->j_trans_id; 1019 unsigned long trans_id = jl->j_trans_id;
1019 struct reiserfs_journal *journal = SB_JOURNAL(s); 1020 struct reiserfs_journal *journal = SB_JOURNAL(s);
@@ -2307,8 +2308,9 @@ static int journal_read_transaction(struct super_block *p_s_sb,
2307 Right now it is only used from journal code. But later we might use it 2308 Right now it is only used from journal code. But later we might use it
2308 from other places. 2309 from other places.
2309 Note: Do not use journal_getblk/sb_getblk functions here! */ 2310 Note: Do not use journal_getblk/sb_getblk functions here! */
2310static struct buffer_head *reiserfs_breada(struct block_device *dev, int block, 2311static struct buffer_head *reiserfs_breada(struct block_device *dev,
2311 int bufsize, unsigned int max_block) 2312 b_blocknr_t block, int bufsize,
2313 b_blocknr_t max_block)
2312{ 2314{
2313 struct buffer_head *bhlist[BUFNR]; 2315 struct buffer_head *bhlist[BUFNR];
2314 unsigned int blocks = BUFNR; 2316 unsigned int blocks = BUFNR;
@@ -2732,7 +2734,7 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
2732 journal->j_persistent_trans = 0; 2734 journal->j_persistent_trans = 0;
2733 if (reiserfs_allocate_list_bitmaps(p_s_sb, 2735 if (reiserfs_allocate_list_bitmaps(p_s_sb,
2734 journal->j_list_bitmap, 2736 journal->j_list_bitmap,
2735 SB_BMAP_NR(p_s_sb))) 2737 reiserfs_bmap_count(p_s_sb)))
2736 goto free_and_return; 2738 goto free_and_return;
2737 allocate_bitmap_nodes(p_s_sb); 2739 allocate_bitmap_nodes(p_s_sb);
2738 2740
@@ -2740,7 +2742,7 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
2740 SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) = (old_format ? 2742 SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) = (old_format ?
2741 REISERFS_OLD_DISK_OFFSET_IN_BYTES 2743 REISERFS_OLD_DISK_OFFSET_IN_BYTES
2742 / p_s_sb->s_blocksize + 2744 / p_s_sb->s_blocksize +
2743 SB_BMAP_NR(p_s_sb) + 2745 reiserfs_bmap_count(p_s_sb) +
2744 1 : 2746 1 :
2745 REISERFS_DISK_OFFSET_IN_BYTES / 2747 REISERFS_DISK_OFFSET_IN_BYTES /
2746 p_s_sb->s_blocksize + 2); 2748 p_s_sb->s_blocksize + 2);
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index bc808a91eeaa..5e7388b32d02 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -356,13 +356,11 @@ extern struct tree_balance *cur_tb;
356void reiserfs_panic(struct super_block *sb, const char *fmt, ...) 356void reiserfs_panic(struct super_block *sb, const char *fmt, ...)
357{ 357{
358 do_reiserfs_warning(fmt); 358 do_reiserfs_warning(fmt);
359 printk(KERN_EMERG "REISERFS: panic (device %s): %s\n",
360 reiserfs_bdevname(sb), error_buf);
361 BUG();
362 359
363 /* this is not actually called, but makes reiserfs_panic() "noreturn" */ 360 dump_stack();
364 panic("REISERFS: panic (device %s): %s\n", 361
365 reiserfs_bdevname(sb), error_buf); 362 panic(KERN_EMERG "REISERFS: panic (device %s): %s\n",
363 reiserfs_bdevname(sb), error_buf);
366} 364}
367 365
368void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...) 366void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...)
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index 976cc7887a0d..f71c3948edef 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -61,7 +61,8 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
61 } 61 }
62 62
63 /* count used bits in last bitmap block */ 63 /* count used bits in last bitmap block */
64 block_r = SB_BLOCK_COUNT(s) - (SB_BMAP_NR(s) - 1) * s->s_blocksize * 8; 64 block_r = SB_BLOCK_COUNT(s) -
65 (reiserfs_bmap_count(s) - 1) * s->s_blocksize * 8;
65 66
66 /* count bitmap blocks in new fs */ 67 /* count bitmap blocks in new fs */
67 bmap_nr_new = block_count_new / (s->s_blocksize * 8); 68 bmap_nr_new = block_count_new / (s->s_blocksize * 8);
@@ -73,7 +74,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
73 74
74 /* save old values */ 75 /* save old values */
75 block_count = SB_BLOCK_COUNT(s); 76 block_count = SB_BLOCK_COUNT(s);
76 bmap_nr = SB_BMAP_NR(s); 77 bmap_nr = reiserfs_bmap_count(s);
77 78
78 /* resizing of reiserfs bitmaps (journal and real), if needed */ 79 /* resizing of reiserfs bitmaps (journal and real), if needed */
79 if (bmap_nr_new > bmap_nr) { 80 if (bmap_nr_new > bmap_nr) {
@@ -119,7 +120,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
119 return -ENOMEM; 120 return -ENOMEM;
120 } 121 }
121 memset(bitmap, 0, 122 memset(bitmap, 0,
122 sizeof(struct reiserfs_bitmap_info) * SB_BMAP_NR(s)); 123 sizeof(struct reiserfs_bitmap_info) * bmap_nr_new);
123 for (i = 0; i < bmap_nr; i++) 124 for (i = 0; i < bmap_nr; i++)
124 bitmap[i] = old_bitmap[i]; 125 bitmap[i] = old_bitmap[i];
125 126
@@ -143,7 +144,6 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
143 mark_buffer_dirty(bh); 144 mark_buffer_dirty(bh);
144 sync_dirty_buffer(bh); 145 sync_dirty_buffer(bh);
145 // update bitmap_info stuff 146 // update bitmap_info stuff
146 bitmap[i].first_zero_hint = 1;
147 bitmap[i].free_count = sb_blocksize(sb) * 8 - 1; 147 bitmap[i].free_count = sb_blocksize(sb) * 8 - 1;
148 brelse(bh); 148 brelse(bh);
149 } 149 }
@@ -173,8 +173,6 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
173 for (i = block_r; i < s->s_blocksize * 8; i++) 173 for (i = block_r; i < s->s_blocksize * 8; i++)
174 reiserfs_test_and_clear_le_bit(i, bh->b_data); 174 reiserfs_test_and_clear_le_bit(i, bh->b_data);
175 info->free_count += s->s_blocksize * 8 - block_r; 175 info->free_count += s->s_blocksize * 8 - block_r;
176 if (!info->first_zero_hint)
177 info->first_zero_hint = block_r;
178 176
179 journal_mark_dirty(&th, s, bh); 177 journal_mark_dirty(&th, s, bh);
180 brelse(bh); 178 brelse(bh);
@@ -196,9 +194,6 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
196 brelse(bh); 194 brelse(bh);
197 195
198 info->free_count -= s->s_blocksize * 8 - block_r_new; 196 info->free_count -= s->s_blocksize * 8 - block_r_new;
199 /* Extreme case where last bitmap is the only valid block in itself. */
200 if (!info->free_count)
201 info->first_zero_hint = 0;
202 /* update super */ 197 /* update super */
203 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); 198 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
204 free_blocks = SB_FREE_BLOCKS(s); 199 free_blocks = SB_FREE_BLOCKS(s);
@@ -206,7 +201,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
206 free_blocks + (block_count_new - block_count - 201 free_blocks + (block_count_new - block_count -
207 (bmap_nr_new - bmap_nr))); 202 (bmap_nr_new - bmap_nr)));
208 PUT_SB_BLOCK_COUNT(s, block_count_new); 203 PUT_SB_BLOCK_COUNT(s, block_count_new);
209 PUT_SB_BMAP_NR(s, bmap_nr_new); 204 PUT_SB_BMAP_NR(s, bmap_would_wrap(bmap_nr_new) ? : bmap_nr_new);
210 s->s_dirt = 1; 205 s->s_dirt = 1;
211 206
212 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); 207 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 981027d1187b..ca41567d7890 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -559,7 +559,7 @@ static int is_tree_node(struct buffer_head *bh, int level)
559/* The function is NOT SCHEDULE-SAFE! */ 559/* The function is NOT SCHEDULE-SAFE! */
560static void search_by_key_reada(struct super_block *s, 560static void search_by_key_reada(struct super_block *s,
561 struct buffer_head **bh, 561 struct buffer_head **bh,
562 unsigned long *b, int num) 562 b_blocknr_t *b, int num)
563{ 563{
564 int i, j; 564 int i, j;
565 565
@@ -611,7 +611,7 @@ int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key, /*
611 DISK_LEAF_NODE_LEVEL */ 611 DISK_LEAF_NODE_LEVEL */
612 ) 612 )
613{ 613{
614 int n_block_number; 614 b_blocknr_t n_block_number;
615 int expected_level; 615 int expected_level;
616 struct buffer_head *p_s_bh; 616 struct buffer_head *p_s_bh;
617 struct path_element *p_s_last_element; 617 struct path_element *p_s_last_element;
@@ -619,7 +619,7 @@ int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key, /*
619 int right_neighbor_of_leaf_node; 619 int right_neighbor_of_leaf_node;
620 int fs_gen; 620 int fs_gen;
621 struct buffer_head *reada_bh[SEARCH_BY_KEY_READA]; 621 struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
622 unsigned long reada_blocks[SEARCH_BY_KEY_READA]; 622 b_blocknr_t reada_blocks[SEARCH_BY_KEY_READA];
623 int reada_count = 0; 623 int reada_count = 0;
624 624
625#ifdef CONFIG_REISERFS_CHECK 625#ifdef CONFIG_REISERFS_CHECK
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index b82897ae090b..57adfe90d5ae 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1725,6 +1725,21 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1725 set_sb_umount_state(rs, REISERFS_ERROR_FS); 1725 set_sb_umount_state(rs, REISERFS_ERROR_FS);
1726 set_sb_fs_state(rs, 0); 1726 set_sb_fs_state(rs, 0);
1727 1727
1728 /* Clear out s_bmap_nr if it would wrap. We can handle this
1729 * case, but older revisions can't. This will cause the
1730 * file system to fail mount on those older implementations,
1731 * avoiding corruption. -jeffm */
1732 if (bmap_would_wrap(reiserfs_bmap_count(s)) &&
1733 sb_bmap_nr(rs) != 0) {
1734 reiserfs_warning(s, "super-2030: This file system "
1735 "claims to use %u bitmap blocks in "
1736 "its super block, but requires %u. "
1737 "Clearing to zero.", sb_bmap_nr(rs),
1738 reiserfs_bmap_count(s));
1739
1740 set_sb_bmap_nr(rs, 0);
1741 }
1742
1728 if (old_format_only(s)) { 1743 if (old_format_only(s)) {
1729 /* filesystem of format 3.5 either with standard or non-standard 1744 /* filesystem of format 3.5 either with standard or non-standard
1730 journal */ 1745 journal */
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index fab4b9b2664f..1597f6b649e0 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -484,7 +484,7 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
484 /* Resize it so we're ok to write there */ 484 /* Resize it so we're ok to write there */
485 newattrs.ia_size = buffer_size; 485 newattrs.ia_size = buffer_size;
486 newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; 486 newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
487 mutex_lock(&xinode->i_mutex); 487 mutex_lock_nested(&xinode->i_mutex, I_MUTEX_XATTR);
488 err = notify_change(fp->f_path.dentry, &newattrs); 488 err = notify_change(fp->f_path.dentry, &newattrs);
489 if (err) 489 if (err)
490 goto out_filp; 490 goto out_filp;
@@ -1223,7 +1223,8 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
1223 if (!IS_ERR(dentry)) { 1223 if (!IS_ERR(dentry)) {
1224 if (!(mount_flags & MS_RDONLY) && !dentry->d_inode) { 1224 if (!(mount_flags & MS_RDONLY) && !dentry->d_inode) {
1225 struct inode *inode = dentry->d_parent->d_inode; 1225 struct inode *inode = dentry->d_parent->d_inode;
1226 mutex_lock(&inode->i_mutex); 1226 mutex_lock_nested(&inode->i_mutex,
1227 I_MUTEX_XATTR);
1227 err = inode->i_op->mkdir(inode, dentry, 0700); 1228 err = inode->i_op->mkdir(inode, dentry, 0700);
1228 mutex_unlock(&inode->i_mutex); 1229 mutex_unlock(&inode->i_mutex);
1229 if (err) { 1230 if (err) {
diff --git a/fs/select.c b/fs/select.c
index 7dede89658f5..47f47925aea2 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -177,11 +177,6 @@ get_max:
177 return max; 177 return max;
178} 178}
179 179
180#define BIT(i) (1UL << ((i)&(__NFDBITS-1)))
181#define MEM(i,m) ((m)+(unsigned)(i)/__NFDBITS)
182#define ISSET(i,m) (((i)&*(m)) != 0)
183#define SET(i,m) (*(m) |= (i))
184
185#define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR) 180#define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR)
186#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) 181#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
187#define POLLEX_SET (POLLPRI) 182#define POLLEX_SET (POLLPRI)
diff --git a/fs/super.c b/fs/super.c
index 1bfcca2104be..d28fde7e1cfb 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -40,10 +40,6 @@
40#include <asm/uaccess.h> 40#include <asm/uaccess.h>
41 41
42 42
43void get_filesystem(struct file_system_type *fs);
44void put_filesystem(struct file_system_type *fs);
45struct file_system_type *get_fs_type(const char *name);
46
47LIST_HEAD(super_blocks); 43LIST_HEAD(super_blocks);
48DEFINE_SPINLOCK(sb_lock); 44DEFINE_SPINLOCK(sb_lock);
49 45
@@ -336,21 +332,21 @@ struct super_block *sget(struct file_system_type *type,
336 void *data) 332 void *data)
337{ 333{
338 struct super_block *s = NULL; 334 struct super_block *s = NULL;
339 struct list_head *p; 335 struct super_block *old;
340 int err; 336 int err;
341 337
342retry: 338retry:
343 spin_lock(&sb_lock); 339 spin_lock(&sb_lock);
344 if (test) list_for_each(p, &type->fs_supers) { 340 if (test) {
345 struct super_block *old; 341 list_for_each_entry(old, &type->fs_supers, s_instances) {
346 old = list_entry(p, struct super_block, s_instances); 342 if (!test(old, data))
347 if (!test(old, data)) 343 continue;
348 continue; 344 if (!grab_super(old))
349 if (!grab_super(old)) 345 goto retry;
350 goto retry; 346 if (s)
351 if (s) 347 destroy_super(s);
352 destroy_super(s); 348 return old;
353 return old; 349 }
354 } 350 }
355 if (!s) { 351 if (!s) {
356 spin_unlock(&sb_lock); 352 spin_unlock(&sb_lock);
@@ -948,9 +944,9 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data)
948 return mnt; 944 return mnt;
949} 945}
950 946
951struct vfsmount *kern_mount(struct file_system_type *type) 947struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
952{ 948{
953 return vfs_kern_mount(type, 0, type->name, NULL); 949 return vfs_kern_mount(type, MS_KERNMOUNT, type->name, data);
954} 950}
955 951
956EXPORT_SYMBOL(kern_mount); 952EXPORT_SYMBOL_GPL(kern_mount_data);
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 726449d4fd22..3586c7a28d2c 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -54,8 +54,8 @@ xfs_fs_decode_fh(
54 struct dentry *de), 54 struct dentry *de),
55 void *context) 55 void *context)
56{ 56{
57 xfs_fid2_t ifid; 57 xfs_fid_t ifid;
58 xfs_fid2_t pfid; 58 xfs_fid_t pfid;
59 void *parent = NULL; 59 void *parent = NULL;
60 int is64 = 0; 60 int is64 = 0;
61 __u32 *p = fh; 61 __u32 *p = fh;
@@ -144,7 +144,7 @@ xfs_fs_get_dentry(
144 struct dentry *result; 144 struct dentry *result;
145 int error; 145 int error;
146 146
147 error = xfs_vget(XFS_M(sb), &vp, (fid_t *)data); 147 error = xfs_vget(XFS_M(sb), &vp, data);
148 if (error || vp == NULL) 148 if (error || vp == NULL)
149 return ERR_PTR(-ESTALE) ; 149 return ERR_PTR(-ESTALE) ;
150 150
diff --git a/fs/xfs/linux-2.6/xfs_export.h b/fs/xfs/linux-2.6/xfs_export.h
index e794ca4efc76..2f36071a86f7 100644
--- a/fs/xfs/linux-2.6/xfs_export.h
+++ b/fs/xfs/linux-2.6/xfs_export.h
@@ -71,13 +71,13 @@ xfs_fileid_length(int hasparent, int is64)
71 71
72/* 72/*
73 * Decode encoded inode information (either for the inode itself 73 * Decode encoded inode information (either for the inode itself
74 * or the parent) into an xfs_fid2_t structure. Advances and 74 * or the parent) into an xfs_fid_t structure. Advances and
75 * returns the new data pointer 75 * returns the new data pointer
76 */ 76 */
77static inline __u32 * 77static inline __u32 *
78xfs_fileid_decode_fid2(__u32 *p, xfs_fid2_t *fid, int is64) 78xfs_fileid_decode_fid2(__u32 *p, xfs_fid_t *fid, int is64)
79{ 79{
80 fid->fid_len = sizeof(xfs_fid2_t) - sizeof(fid->fid_len); 80 fid->fid_len = sizeof(xfs_fid_t) - sizeof(fid->fid_len);
81 fid->fid_pad = 0; 81 fid->fid_pad = 0;
82 fid->fid_ino = *p++; 82 fid->fid_ino = *p++;
83#if XFS_BIG_INUMS 83#if XFS_BIG_INUMS
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index ffec630e7db7..2b34bad48b07 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -152,11 +152,11 @@ xfs_find_handle(
152 lock_mode = xfs_ilock_map_shared(ip); 152 lock_mode = xfs_ilock_map_shared(ip);
153 153
154 /* fill in fid section of handle from inode */ 154 /* fill in fid section of handle from inode */
155 handle.ha_fid.xfs_fid_len = sizeof(xfs_fid_t) - 155 handle.ha_fid.fid_len = sizeof(xfs_fid_t) -
156 sizeof(handle.ha_fid.xfs_fid_len); 156 sizeof(handle.ha_fid.fid_len);
157 handle.ha_fid.xfs_fid_pad = 0; 157 handle.ha_fid.fid_pad = 0;
158 handle.ha_fid.xfs_fid_gen = ip->i_d.di_gen; 158 handle.ha_fid.fid_gen = ip->i_d.di_gen;
159 handle.ha_fid.xfs_fid_ino = ip->i_ino; 159 handle.ha_fid.fid_ino = ip->i_ino;
160 160
161 xfs_iunlock_map_shared(ip, lock_mode); 161 xfs_iunlock_map_shared(ip, lock_mode);
162 162
@@ -222,10 +222,10 @@ xfs_vget_fsop_handlereq(
222 if (hlen < sizeof(*handlep)) 222 if (hlen < sizeof(*handlep))
223 memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen); 223 memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen);
224 if (hlen > sizeof(handlep->ha_fsid)) { 224 if (hlen > sizeof(handlep->ha_fsid)) {
225 if (handlep->ha_fid.xfs_fid_len != 225 if (handlep->ha_fid.fid_len !=
226 (hlen - sizeof(handlep->ha_fsid) 226 (hlen - sizeof(handlep->ha_fsid) -
227 - sizeof(handlep->ha_fid.xfs_fid_len)) 227 sizeof(handlep->ha_fid.fid_len)) ||
228 || handlep->ha_fid.xfs_fid_pad) 228 handlep->ha_fid.fid_pad)
229 return XFS_ERROR(EINVAL); 229 return XFS_ERROR(EINVAL);
230 } 230 }
231 231
@@ -233,9 +233,9 @@ xfs_vget_fsop_handlereq(
233 * Crack the handle, obtain the inode # & generation # 233 * Crack the handle, obtain the inode # & generation #
234 */ 234 */
235 xfid = (struct xfs_fid *)&handlep->ha_fid; 235 xfid = (struct xfs_fid *)&handlep->ha_fid;
236 if (xfid->xfs_fid_len == sizeof(*xfid) - sizeof(xfid->xfs_fid_len)) { 236 if (xfid->fid_len == sizeof(*xfid) - sizeof(xfid->fid_len)) {
237 ino = xfid->xfs_fid_ino; 237 ino = xfid->fid_ino;
238 igen = xfid->xfs_fid_gen; 238 igen = xfid->fid_gen;
239 } else { 239 } else {
240 return XFS_ERROR(EINVAL); 240 return XFS_ERROR(EINVAL);
241 } 241 }
diff --git a/fs/xfs/xfs_dmops.c b/fs/xfs/xfs_dmops.c
index 6cd5704258a2..a1e55fb9d5dd 100644
--- a/fs/xfs/xfs_dmops.c
+++ b/fs/xfs/xfs_dmops.c
@@ -41,29 +41,16 @@ int
41xfs_dmops_get(struct xfs_mount *mp, struct xfs_mount_args *args) 41xfs_dmops_get(struct xfs_mount *mp, struct xfs_mount_args *args)
42{ 42{
43 if (args->flags & XFSMNT_DMAPI) { 43 if (args->flags & XFSMNT_DMAPI) {
44 struct xfs_dmops *ops; 44 cmn_err(CE_WARN,
45 45 "XFS: dmapi support not available in this kernel.");
46 ops = symbol_get(xfs_dmcore_xfs); 46 return EINVAL;
47 if (!ops) {
48 request_module("xfs_dmapi");
49 ops = symbol_get(xfs_dmcore_xfs);
50 }
51
52 if (!ops) {
53 cmn_err(CE_WARN, "XFS: no dmapi support available.");
54 return EINVAL;
55 }
56 mp->m_dm_ops = ops;
57 } else {
58 mp->m_dm_ops = &xfs_dmcore_stub;
59 } 47 }
60 48
49 mp->m_dm_ops = &xfs_dmcore_stub;
61 return 0; 50 return 0;
62} 51}
63 52
64void 53void
65xfs_dmops_put(struct xfs_mount *mp) 54xfs_dmops_put(struct xfs_mount *mp)
66{ 55{
67 if (mp->m_dm_ops != &xfs_dmcore_stub)
68 symbol_put(xfs_dmcore_xfs);
69} 56}
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index ec3c9c27e0de..aab966276517 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -389,30 +389,13 @@ typedef struct xfs_fsop_attrmulti_handlereq {
389 */ 389 */
390typedef struct { __u32 val[2]; } xfs_fsid_t; /* file system id type */ 390typedef struct { __u32 val[2]; } xfs_fsid_t; /* file system id type */
391 391
392
393#ifndef HAVE_FID
394#define MAXFIDSZ 46
395
396typedef struct fid {
397 __u16 fid_len; /* length of data in bytes */
398 unsigned char fid_data[MAXFIDSZ]; /* data (fid_len worth) */
399} fid_t;
400#endif
401
402typedef struct xfs_fid { 392typedef struct xfs_fid {
403 __u16 xfs_fid_len; /* length of remainder */ 393 __u16 fid_len; /* length of remainder */
404 __u16 xfs_fid_pad; 394 __u16 fid_pad;
405 __u32 xfs_fid_gen; /* generation number */ 395 __u32 fid_gen; /* generation number */
406 __u64 xfs_fid_ino; /* 64 bits inode number */ 396 __u64 fid_ino; /* 64 bits inode number */
407} xfs_fid_t; 397} xfs_fid_t;
408 398
409typedef struct xfs_fid2 {
410 __u16 fid_len; /* length of remainder */
411 __u16 fid_pad; /* padding, must be zero */
412 __u32 fid_gen; /* generation number */
413 __u64 fid_ino; /* inode number */
414} xfs_fid2_t;
415
416typedef struct xfs_handle { 399typedef struct xfs_handle {
417 union { 400 union {
418 __s64 align; /* force alignment of ha_fid */ 401 __s64 align; /* force alignment of ha_fid */
@@ -422,9 +405,9 @@ typedef struct xfs_handle {
422} xfs_handle_t; 405} xfs_handle_t;
423#define ha_fsid ha_u._ha_fsid 406#define ha_fsid ha_u._ha_fsid
424 407
425#define XFS_HSIZE(handle) (((char *) &(handle).ha_fid.xfs_fid_pad \ 408#define XFS_HSIZE(handle) (((char *) &(handle).ha_fid.fid_pad \
426 - (char *) &(handle)) \ 409 - (char *) &(handle)) \
427 + (handle).ha_fid.xfs_fid_len) 410 + (handle).ha_fid.fid_len)
428 411
429/* 412/*
430 * Flags for going down operation 413 * Flags for going down operation
diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c
index c266a0184b42..2ec1d8a27352 100644
--- a/fs/xfs/xfs_qmops.c
+++ b/fs/xfs/xfs_qmops.c
@@ -135,19 +135,13 @@ int
135xfs_qmops_get(struct xfs_mount *mp, struct xfs_mount_args *args) 135xfs_qmops_get(struct xfs_mount *mp, struct xfs_mount_args *args)
136{ 136{
137 if (args->flags & (XFSMNT_UQUOTA | XFSMNT_PQUOTA | XFSMNT_GQUOTA)) { 137 if (args->flags & (XFSMNT_UQUOTA | XFSMNT_PQUOTA | XFSMNT_GQUOTA)) {
138 struct xfs_qmops *ops; 138#ifdef CONFIG_XFS_QUOTA
139 139 mp->m_qm_ops = &xfs_qmcore_xfs;
140 ops = symbol_get(xfs_qmcore_xfs); 140#else
141 if (!ops) { 141 cmn_err(CE_WARN,
142 request_module("xfs_quota"); 142 "XFS: qouta support not available in this kernel.");
143 ops = symbol_get(xfs_qmcore_xfs); 143 return EINVAL;
144 } 144#endif
145
146 if (!ops) {
147 cmn_err(CE_WARN, "XFS: no quota support available.");
148 return EINVAL;
149 }
150 mp->m_qm_ops = ops;
151 } else { 145 } else {
152 mp->m_qm_ops = &xfs_qmcore_stub; 146 mp->m_qm_ops = &xfs_qmcore_stub;
153 } 147 }
@@ -158,6 +152,4 @@ xfs_qmops_get(struct xfs_mount *mp, struct xfs_mount_args *args)
158void 152void
159xfs_qmops_put(struct xfs_mount *mp) 153xfs_qmops_put(struct xfs_mount *mp)
160{ 154{
161 if (mp->m_qm_ops != &xfs_qmcore_stub)
162 symbol_put(xfs_qmcore_xfs);
163} 155}
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index a5a8454f2a63..a1544597bcd3 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -1635,9 +1635,8 @@ int
1635xfs_vget( 1635xfs_vget(
1636 xfs_mount_t *mp, 1636 xfs_mount_t *mp,
1637 bhv_vnode_t **vpp, 1637 bhv_vnode_t **vpp,
1638 fid_t *fidp) 1638 xfs_fid_t *xfid)
1639{ 1639{
1640 xfs_fid_t *xfid = (struct xfs_fid *)fidp;
1641 xfs_inode_t *ip; 1640 xfs_inode_t *ip;
1642 int error; 1641 int error;
1643 xfs_ino_t ino; 1642 xfs_ino_t ino;
@@ -1647,11 +1646,11 @@ xfs_vget(
1647 * Invalid. Since handles can be created in user space and passed in 1646 * Invalid. Since handles can be created in user space and passed in
1648 * via gethandle(), this is not cause for a panic. 1647 * via gethandle(), this is not cause for a panic.
1649 */ 1648 */
1650 if (xfid->xfs_fid_len != sizeof(*xfid) - sizeof(xfid->xfs_fid_len)) 1649 if (xfid->fid_len != sizeof(*xfid) - sizeof(xfid->fid_len))
1651 return XFS_ERROR(EINVAL); 1650 return XFS_ERROR(EINVAL);
1652 1651
1653 ino = xfid->xfs_fid_ino; 1652 ino = xfid->fid_ino;
1654 igen = xfid->xfs_fid_gen; 1653 igen = xfid->fid_gen;
1655 1654
1656 /* 1655 /*
1657 * NFS can sometimes send requests for ino 0. Fail them gracefully. 1656 * NFS can sometimes send requests for ino 0. Fail them gracefully.
diff --git a/fs/xfs/xfs_vfsops.h b/fs/xfs/xfs_vfsops.h
index bc99e3eb7dbb..a592fe02a339 100644
--- a/fs/xfs/xfs_vfsops.h
+++ b/fs/xfs/xfs_vfsops.h
@@ -2,7 +2,7 @@
2#define _XFS_VFSOPS_H 1 2#define _XFS_VFSOPS_H 1
3 3
4struct cred; 4struct cred;
5struct fid; 5struct xfs_fid;
6struct inode; 6struct inode;
7struct kstatfs; 7struct kstatfs;
8struct xfs_mount; 8struct xfs_mount;
@@ -17,7 +17,7 @@ int xfs_root(struct xfs_mount *mp, bhv_vnode_t **vpp);
17int xfs_statvfs(struct xfs_mount *mp, struct kstatfs *statp, 17int xfs_statvfs(struct xfs_mount *mp, struct kstatfs *statp,
18 bhv_vnode_t *vp); 18 bhv_vnode_t *vp);
19int xfs_sync(struct xfs_mount *mp, int flags); 19int xfs_sync(struct xfs_mount *mp, int flags);
20int xfs_vget(struct xfs_mount *mp, bhv_vnode_t **vpp, struct fid *fidp); 20int xfs_vget(struct xfs_mount *mp, bhv_vnode_t **vpp, struct xfs_fid *xfid);
21int xfs_parseargs(struct xfs_mount *mp, char *options, 21int xfs_parseargs(struct xfs_mount *mp, char *options,
22 struct xfs_mount_args *args, int update); 22 struct xfs_mount_args *args, int update);
23int xfs_showargs(struct xfs_mount *mp, struct seq_file *m); 23int xfs_showargs(struct xfs_mount *mp, struct seq_file *m);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 5e3c57ca9981..efd5aff9eaf6 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -3466,23 +3466,14 @@ std_return:
3466} 3466}
3467 3467
3468 3468
3469/*
3470 * xfs_fid2
3471 *
3472 * A fid routine that takes a pointer to a previously allocated
3473 * fid structure (like xfs_fast_fid) but uses a 64 bit inode number.
3474 */
3475int 3469int
3476xfs_fid2( 3470xfs_fid2(
3477 xfs_inode_t *ip, 3471 xfs_inode_t *ip,
3478 fid_t *fidp) 3472 xfs_fid_t *xfid)
3479{ 3473{
3480 xfs_fid2_t *xfid = (xfs_fid2_t *)fidp;
3481
3482 vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); 3474 vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address);
3483 ASSERT(sizeof(fid_t) >= sizeof(xfs_fid2_t));
3484 3475
3485 xfid->fid_len = sizeof(xfs_fid2_t) - sizeof(xfid->fid_len); 3476 xfid->fid_len = sizeof(xfs_fid_t) - sizeof(xfid->fid_len);
3486 xfid->fid_pad = 0; 3477 xfid->fid_pad = 0;
3487 /* 3478 /*
3488 * use memcpy because the inode is a long long and there's no 3479 * use memcpy because the inode is a long long and there's no
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index f36e74f2f0c2..b7e461c40cfb 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -39,7 +39,7 @@ int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize,
39int xfs_symlink(struct xfs_inode *dp, bhv_vname_t *dentry, 39int xfs_symlink(struct xfs_inode *dp, bhv_vname_t *dentry,
40 char *target_path, mode_t mode, bhv_vnode_t **vpp, 40 char *target_path, mode_t mode, bhv_vnode_t **vpp,
41 struct cred *credp); 41 struct cred *credp);
42int xfs_fid2(struct xfs_inode *ip, fid_t *fidp); 42int xfs_fid2(struct xfs_inode *ip, struct xfs_fid *xfid);
43int xfs_rwlock(struct xfs_inode *ip, bhv_vrwlock_t locktype); 43int xfs_rwlock(struct xfs_inode *ip, bhv_vrwlock_t locktype);
44void xfs_rwunlock(struct xfs_inode *ip, bhv_vrwlock_t locktype); 44void xfs_rwunlock(struct xfs_inode *ip, bhv_vrwlock_t locktype);
45int xfs_inode_flush(struct xfs_inode *ip, int flags); 45int xfs_inode_flush(struct xfs_inode *ip, int flags);
diff --git a/include/asm-alpha/bitops.h b/include/asm-alpha/bitops.h
index 381b4f5b4d5d..9e19a704d484 100644
--- a/include/asm-alpha/bitops.h
+++ b/include/asm-alpha/bitops.h
@@ -1,6 +1,10 @@
1#ifndef _ALPHA_BITOPS_H 1#ifndef _ALPHA_BITOPS_H
2#define _ALPHA_BITOPS_H 2#define _ALPHA_BITOPS_H
3 3
4#ifndef _LINUX_BITOPS_H
5#error only <linux/bitops.h> can be included directly
6#endif
7
4#include <asm/compiler.h> 8#include <asm/compiler.h>
5#include <asm/barrier.h> 9#include <asm/barrier.h>
6 10
diff --git a/include/asm-alpha/tlbflush.h b/include/asm-alpha/tlbflush.h
index 1ca3ed3bd6d3..eefab3fb51ae 100644
--- a/include/asm-alpha/tlbflush.h
+++ b/include/asm-alpha/tlbflush.h
@@ -92,17 +92,6 @@ flush_tlb_other(struct mm_struct *mm)
92 if (*mmc) *mmc = 0; 92 if (*mmc) *mmc = 0;
93} 93}
94 94
95/* Flush a specified range of user mapping page tables from TLB.
96 Although Alpha uses VPTE caches, this can be a nop, as Alpha does
97 not have finegrained tlb flushing, so it will flush VPTE stuff
98 during next flush_tlb_range. */
99
100static inline void
101flush_tlb_pgtables(struct mm_struct *mm, unsigned long start,
102 unsigned long end)
103{
104}
105
106#ifndef CONFIG_SMP 95#ifndef CONFIG_SMP
107/* Flush everything (kernel mapping may also have changed 96/* Flush everything (kernel mapping may also have changed
108 due to vmalloc/vfree). */ 97 due to vmalloc/vfree). */
diff --git a/include/asm-arm/arch-ixp4xx/io.h b/include/asm-arm/arch-ixp4xx/io.h
index c72f9d79417c..eeeea90cd5a9 100644
--- a/include/asm-arm/arch-ixp4xx/io.h
+++ b/include/asm-arm/arch-ixp4xx/io.h
@@ -17,9 +17,6 @@
17 17
18#define IO_SPACE_LIMIT 0xffff0000 18#define IO_SPACE_LIMIT 0xffff0000
19 19
20#define BIT(x) ((1)<<(x))
21
22
23extern int (*ixp4xx_pci_read)(u32 addr, u32 cmd, u32* data); 20extern int (*ixp4xx_pci_read)(u32 addr, u32 cmd, u32* data);
24extern int ixp4xx_pci_write(u32 addr, u32 cmd, u32 data); 21extern int ixp4xx_pci_write(u32 addr, u32 cmd, u32 data);
25 22
diff --git a/include/asm-arm/bitops.h b/include/asm-arm/bitops.h
index 52fe05895deb..47a6b086eee2 100644
--- a/include/asm-arm/bitops.h
+++ b/include/asm-arm/bitops.h
@@ -19,6 +19,10 @@
19 19
20#ifdef __KERNEL__ 20#ifdef __KERNEL__
21 21
22#ifndef _LINUX_BITOPS_H
23#error only <linux/bitops.h> can be included directly
24#endif
25
22#include <linux/compiler.h> 26#include <linux/compiler.h>
23#include <asm/system.h> 27#include <asm/system.h>
24 28
diff --git a/include/asm-arm/tlbflush.h b/include/asm-arm/tlbflush.h
index 71be4fded7e2..8c6bc1bb9d1a 100644
--- a/include/asm-arm/tlbflush.h
+++ b/include/asm-arm/tlbflush.h
@@ -463,11 +463,6 @@ extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
463 */ 463 */
464extern void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t pte); 464extern void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t pte);
465 465
466/*
467 * ARM processors do not cache TLB tables in RAM.
468 */
469#define flush_tlb_pgtables(mm,start,end) do { } while (0)
470
471#endif 466#endif
472 467
473#endif /* CONFIG_MMU */ 468#endif /* CONFIG_MMU */
diff --git a/include/asm-avr32/bitops.h b/include/asm-avr32/bitops.h
index f3faddfd46a8..1a50b69b1a19 100644
--- a/include/asm-avr32/bitops.h
+++ b/include/asm-avr32/bitops.h
@@ -8,6 +8,10 @@
8#ifndef __ASM_AVR32_BITOPS_H 8#ifndef __ASM_AVR32_BITOPS_H
9#define __ASM_AVR32_BITOPS_H 9#define __ASM_AVR32_BITOPS_H
10 10
11#ifndef _LINUX_BITOPS_H
12#error only <linux/bitops.h> can be included directly
13#endif
14
11#include <asm/byteorder.h> 15#include <asm/byteorder.h>
12#include <asm/system.h> 16#include <asm/system.h>
13 17
diff --git a/include/asm-avr32/tlbflush.h b/include/asm-avr32/tlbflush.h
index 730e268f81f3..5bc7c88a5770 100644
--- a/include/asm-avr32/tlbflush.h
+++ b/include/asm-avr32/tlbflush.h
@@ -19,7 +19,6 @@
19 * - flush_tlb_page(vma, vmaddr) flushes one page 19 * - flush_tlb_page(vma, vmaddr) flushes one page
20 * - flush_tlb_range(vma, start, end) flushes a range of pages 20 * - flush_tlb_range(vma, start, end) flushes a range of pages
21 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages 21 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
22 * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
23 */ 22 */
24extern void flush_tlb(void); 23extern void flush_tlb(void);
25extern void flush_tlb_all(void); 24extern void flush_tlb_all(void);
@@ -29,12 +28,6 @@ extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
29extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page); 28extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
30extern void __flush_tlb_page(unsigned long asid, unsigned long page); 29extern void __flush_tlb_page(unsigned long asid, unsigned long page);
31 30
32static inline void flush_tlb_pgtables(struct mm_struct *mm,
33 unsigned long start, unsigned long end)
34{
35 /* Nothing to do */
36}
37
38extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); 31extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
39 32
40#endif /* __ASM_AVR32_TLBFLUSH_H */ 33#endif /* __ASM_AVR32_TLBFLUSH_H */
diff --git a/include/asm-blackfin/bitops.h b/include/asm-blackfin/bitops.h
index 03ecedc1f2a7..b39a175c79c1 100644
--- a/include/asm-blackfin/bitops.h
+++ b/include/asm-blackfin/bitops.h
@@ -11,6 +11,10 @@
11 11
12#ifdef __KERNEL__ 12#ifdef __KERNEL__
13 13
14#ifndef _LINUX_BITOPS_H
15#error only <linux/bitops.h> can be included directly
16#endif
17
14#include <asm-generic/bitops/ffs.h> 18#include <asm-generic/bitops/ffs.h>
15#include <asm-generic/bitops/__ffs.h> 19#include <asm-generic/bitops/__ffs.h>
16#include <asm-generic/bitops/sched.h> 20#include <asm-generic/bitops/sched.h>
diff --git a/include/asm-blackfin/tlbflush.h b/include/asm-blackfin/tlbflush.h
index 10a07ba1e011..277b400924b8 100644
--- a/include/asm-blackfin/tlbflush.h
+++ b/include/asm-blackfin/tlbflush.h
@@ -53,10 +53,4 @@ static inline void flush_tlb_kernel_page(unsigned long addr)
53 BUG(); 53 BUG();
54} 54}
55 55
56static inline void flush_tlb_pgtables(struct mm_struct *mm,
57 unsigned long start, unsigned long end)
58{
59 BUG();
60}
61
62#endif 56#endif
diff --git a/include/asm-cris/bitops.h b/include/asm-cris/bitops.h
index 617151b9b72b..e2f49c27ed29 100644
--- a/include/asm-cris/bitops.h
+++ b/include/asm-cris/bitops.h
@@ -14,6 +14,10 @@
14/* Currently this is unsuitable for consumption outside the kernel. */ 14/* Currently this is unsuitable for consumption outside the kernel. */
15#ifdef __KERNEL__ 15#ifdef __KERNEL__
16 16
17#ifndef _LINUX_BITOPS_H
18#error only <linux/bitops.h> can be included directly
19#endif
20
17#include <asm/arch/bitops.h> 21#include <asm/arch/bitops.h>
18#include <asm/system.h> 22#include <asm/system.h>
19#include <asm/atomic.h> 23#include <asm/atomic.h>
diff --git a/include/asm-cris/posix_types.h b/include/asm-cris/posix_types.h
index 7b9ed22ab5dd..92000d0c3f97 100644
--- a/include/asm-cris/posix_types.h
+++ b/include/asm-cris/posix_types.h
@@ -52,7 +52,7 @@ typedef struct {
52} __kernel_fsid_t; 52} __kernel_fsid_t;
53 53
54#ifdef __KERNEL__ 54#ifdef __KERNEL__
55#include <asm/bitops.h> 55#include <linux/bitops.h>
56 56
57#undef __FD_SET 57#undef __FD_SET
58#define __FD_SET(fd,fdsetp) set_bit(fd, (void *)(fdsetp)) 58#define __FD_SET(fd,fdsetp) set_bit(fd, (void *)(fdsetp))
diff --git a/include/asm-cris/tlbflush.h b/include/asm-cris/tlbflush.h
index 0569612477e3..20697e7ef4f2 100644
--- a/include/asm-cris/tlbflush.h
+++ b/include/asm-cris/tlbflush.h
@@ -38,13 +38,6 @@ static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long st
38 flush_tlb_mm(vma->vm_mm); 38 flush_tlb_mm(vma->vm_mm);
39} 39}
40 40
41static inline void flush_tlb_pgtables(struct mm_struct *mm,
42 unsigned long start, unsigned long end)
43{
44 /* CRIS does not keep any page table caches in TLB */
45}
46
47
48static inline void flush_tlb(void) 41static inline void flush_tlb(void)
49{ 42{
50 flush_tlb_mm(current->mm); 43 flush_tlb_mm(current->mm);
diff --git a/include/asm-frv/bitops.h b/include/asm-frv/bitops.h
index 8dba74b1a254..e29de7131b79 100644
--- a/include/asm-frv/bitops.h
+++ b/include/asm-frv/bitops.h
@@ -21,6 +21,10 @@
21 21
22#ifdef __KERNEL__ 22#ifdef __KERNEL__
23 23
24#ifndef _LINUX_BITOPS_H
25#error only <linux/bitops.h> can be included directly
26#endif
27
24#include <asm-generic/bitops/ffz.h> 28#include <asm-generic/bitops/ffz.h>
25 29
26/* 30/*
diff --git a/include/asm-frv/tlbflush.h b/include/asm-frv/tlbflush.h
index 8370f97e41ee..7ac5eafc5d98 100644
--- a/include/asm-frv/tlbflush.h
+++ b/include/asm-frv/tlbflush.h
@@ -57,7 +57,6 @@ do { \
57#define __flush_tlb_global() flush_tlb_all() 57#define __flush_tlb_global() flush_tlb_all()
58#define flush_tlb() flush_tlb_all() 58#define flush_tlb() flush_tlb_all()
59#define flush_tlb_kernel_range(start, end) flush_tlb_all() 59#define flush_tlb_kernel_range(start, end) flush_tlb_all()
60#define flush_tlb_pgtables(mm,start,end) do { } while(0)
61 60
62#else 61#else
63 62
@@ -66,7 +65,6 @@ do { \
66#define flush_tlb_mm(mm) BUG() 65#define flush_tlb_mm(mm) BUG()
67#define flush_tlb_page(vma,addr) BUG() 66#define flush_tlb_page(vma,addr) BUG()
68#define flush_tlb_range(mm,start,end) BUG() 67#define flush_tlb_range(mm,start,end) BUG()
69#define flush_tlb_pgtables(mm,start,end) BUG()
70#define flush_tlb_kernel_range(start, end) BUG() 68#define flush_tlb_kernel_range(start, end) BUG()
71 69
72#endif 70#endif
diff --git a/include/asm-generic/bitops.h b/include/asm-generic/bitops.h
index e022a0f59e6b..15e6f253dda4 100644
--- a/include/asm-generic/bitops.h
+++ b/include/asm-generic/bitops.h
@@ -19,6 +19,10 @@
19 19
20#ifdef __KERNEL__ 20#ifdef __KERNEL__
21 21
22#ifndef _LINUX_BITOPS_H
23#error only <linux/bitops.h> can be included directly
24#endif
25
22#include <asm-generic/bitops/sched.h> 26#include <asm-generic/bitops/sched.h>
23#include <asm-generic/bitops/ffs.h> 27#include <asm-generic/bitops/ffs.h>
24#include <asm-generic/bitops/hweight.h> 28#include <asm-generic/bitops/hweight.h>
diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h
index cd8a9641bd66..4657f3e410fc 100644
--- a/include/asm-generic/bitops/atomic.h
+++ b/include/asm-generic/bitops/atomic.h
@@ -3,9 +3,6 @@
3 3
4#include <asm/types.h> 4#include <asm/types.h>
5 5
6#define BITOP_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
7#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
8
9#ifdef CONFIG_SMP 6#ifdef CONFIG_SMP
10#include <asm/spinlock.h> 7#include <asm/spinlock.h>
11#include <asm/cache.h> /* we use L1_CACHE_BYTES */ 8#include <asm/cache.h> /* we use L1_CACHE_BYTES */
@@ -66,8 +63,8 @@ extern raw_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned;
66 */ 63 */
67static inline void set_bit(int nr, volatile unsigned long *addr) 64static inline void set_bit(int nr, volatile unsigned long *addr)
68{ 65{
69 unsigned long mask = BITOP_MASK(nr); 66 unsigned long mask = BIT_MASK(nr);
70 unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); 67 unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
71 unsigned long flags; 68 unsigned long flags;
72 69
73 _atomic_spin_lock_irqsave(p, flags); 70 _atomic_spin_lock_irqsave(p, flags);
@@ -87,8 +84,8 @@ static inline void set_bit(int nr, volatile unsigned long *addr)
87 */ 84 */
88static inline void clear_bit(int nr, volatile unsigned long *addr) 85static inline void clear_bit(int nr, volatile unsigned long *addr)
89{ 86{
90 unsigned long mask = BITOP_MASK(nr); 87 unsigned long mask = BIT_MASK(nr);
91 unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); 88 unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
92 unsigned long flags; 89 unsigned long flags;
93 90
94 _atomic_spin_lock_irqsave(p, flags); 91 _atomic_spin_lock_irqsave(p, flags);
@@ -108,8 +105,8 @@ static inline void clear_bit(int nr, volatile unsigned long *addr)
108 */ 105 */
109static inline void change_bit(int nr, volatile unsigned long *addr) 106static inline void change_bit(int nr, volatile unsigned long *addr)
110{ 107{
111 unsigned long mask = BITOP_MASK(nr); 108 unsigned long mask = BIT_MASK(nr);
112 unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); 109 unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
113 unsigned long flags; 110 unsigned long flags;
114 111
115 _atomic_spin_lock_irqsave(p, flags); 112 _atomic_spin_lock_irqsave(p, flags);
@@ -128,8 +125,8 @@ static inline void change_bit(int nr, volatile unsigned long *addr)
128 */ 125 */
129static inline int test_and_set_bit(int nr, volatile unsigned long *addr) 126static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
130{ 127{
131 unsigned long mask = BITOP_MASK(nr); 128 unsigned long mask = BIT_MASK(nr);
132 unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); 129 unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
133 unsigned long old; 130 unsigned long old;
134 unsigned long flags; 131 unsigned long flags;
135 132
@@ -152,8 +149,8 @@ static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
152 */ 149 */
153static inline int test_and_clear_bit(int nr, volatile unsigned long *addr) 150static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
154{ 151{
155 unsigned long mask = BITOP_MASK(nr); 152 unsigned long mask = BIT_MASK(nr);
156 unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); 153 unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
157 unsigned long old; 154 unsigned long old;
158 unsigned long flags; 155 unsigned long flags;
159 156
@@ -175,8 +172,8 @@ static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
175 */ 172 */
176static inline int test_and_change_bit(int nr, volatile unsigned long *addr) 173static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
177{ 174{
178 unsigned long mask = BITOP_MASK(nr); 175 unsigned long mask = BIT_MASK(nr);
179 unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); 176 unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
180 unsigned long old; 177 unsigned long old;
181 unsigned long flags; 178 unsigned long flags;
182 179
diff --git a/include/asm-generic/bitops/non-atomic.h b/include/asm-generic/bitops/non-atomic.h
index 46a825cf2ae1..697cc2b7e0f0 100644
--- a/include/asm-generic/bitops/non-atomic.h
+++ b/include/asm-generic/bitops/non-atomic.h
@@ -3,9 +3,6 @@
3 3
4#include <asm/types.h> 4#include <asm/types.h>
5 5
6#define BITOP_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
7#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
8
9/** 6/**
10 * __set_bit - Set a bit in memory 7 * __set_bit - Set a bit in memory
11 * @nr: the bit to set 8 * @nr: the bit to set
@@ -17,16 +14,16 @@
17 */ 14 */
18static inline void __set_bit(int nr, volatile unsigned long *addr) 15static inline void __set_bit(int nr, volatile unsigned long *addr)
19{ 16{
20 unsigned long mask = BITOP_MASK(nr); 17 unsigned long mask = BIT_MASK(nr);
21 unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); 18 unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
22 19
23 *p |= mask; 20 *p |= mask;
24} 21}
25 22
26static inline void __clear_bit(int nr, volatile unsigned long *addr) 23static inline void __clear_bit(int nr, volatile unsigned long *addr)
27{ 24{
28 unsigned long mask = BITOP_MASK(nr); 25 unsigned long mask = BIT_MASK(nr);
29 unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); 26 unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
30 27
31 *p &= ~mask; 28 *p &= ~mask;
32} 29}
@@ -42,8 +39,8 @@ static inline void __clear_bit(int nr, volatile unsigned long *addr)
42 */ 39 */
43static inline void __change_bit(int nr, volatile unsigned long *addr) 40static inline void __change_bit(int nr, volatile unsigned long *addr)
44{ 41{
45 unsigned long mask = BITOP_MASK(nr); 42 unsigned long mask = BIT_MASK(nr);
46 unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); 43 unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
47 44
48 *p ^= mask; 45 *p ^= mask;
49} 46}
@@ -59,8 +56,8 @@ static inline void __change_bit(int nr, volatile unsigned long *addr)
59 */ 56 */
60static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) 57static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
61{ 58{
62 unsigned long mask = BITOP_MASK(nr); 59 unsigned long mask = BIT_MASK(nr);
63 unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); 60 unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
64 unsigned long old = *p; 61 unsigned long old = *p;
65 62
66 *p = old | mask; 63 *p = old | mask;
@@ -78,8 +75,8 @@ static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
78 */ 75 */
79static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) 76static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
80{ 77{
81 unsigned long mask = BITOP_MASK(nr); 78 unsigned long mask = BIT_MASK(nr);
82 unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); 79 unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
83 unsigned long old = *p; 80 unsigned long old = *p;
84 81
85 *p = old & ~mask; 82 *p = old & ~mask;
@@ -90,8 +87,8 @@ static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
90static inline int __test_and_change_bit(int nr, 87static inline int __test_and_change_bit(int nr,
91 volatile unsigned long *addr) 88 volatile unsigned long *addr)
92{ 89{
93 unsigned long mask = BITOP_MASK(nr); 90 unsigned long mask = BIT_MASK(nr);
94 unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); 91 unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
95 unsigned long old = *p; 92 unsigned long old = *p;
96 93
97 *p = old ^ mask; 94 *p = old ^ mask;
@@ -105,7 +102,7 @@ static inline int __test_and_change_bit(int nr,
105 */ 102 */
106static inline int test_bit(int nr, const volatile unsigned long *addr) 103static inline int test_bit(int nr, const volatile unsigned long *addr)
107{ 104{
108 return 1UL & (addr[BITOP_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); 105 return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
109} 106}
110 107
111#endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ 108#endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 5615440027ec..9f584cc5c5fb 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -12,7 +12,11 @@
12/* .data section */ 12/* .data section */
13#define DATA_DATA \ 13#define DATA_DATA \
14 *(.data) \ 14 *(.data) \
15 *(.data.init.refok) 15 *(.data.init.refok) \
16 . = ALIGN(8); \
17 VMLINUX_SYMBOL(__start___markers) = .; \
18 *(__markers) \
19 VMLINUX_SYMBOL(__stop___markers) = .;
16 20
17#define RO_DATA(align) \ 21#define RO_DATA(align) \
18 . = ALIGN((align)); \ 22 . = ALIGN((align)); \
@@ -20,6 +24,7 @@
20 VMLINUX_SYMBOL(__start_rodata) = .; \ 24 VMLINUX_SYMBOL(__start_rodata) = .; \
21 *(.rodata) *(.rodata.*) \ 25 *(.rodata) *(.rodata.*) \
22 *(__vermagic) /* Kernel version magic */ \ 26 *(__vermagic) /* Kernel version magic */ \
27 *(__markers_strings) /* Markers: strings */ \
23 } \ 28 } \
24 \ 29 \
25 .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \ 30 .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \
diff --git a/include/asm-h8300/bitops.h b/include/asm-h8300/bitops.h
index e64ad315656d..cb18e3b0aa94 100644
--- a/include/asm-h8300/bitops.h
+++ b/include/asm-h8300/bitops.h
@@ -10,6 +10,11 @@
10#include <asm/system.h> 10#include <asm/system.h>
11 11
12#ifdef __KERNEL__ 12#ifdef __KERNEL__
13
14#ifndef _LINUX_BITOPS_H
15#error only <linux/bitops.h> can be included directly
16#endif
17
13/* 18/*
14 * Function prototypes to keep gcc -Wall happy 19 * Function prototypes to keep gcc -Wall happy
15 */ 20 */
diff --git a/include/asm-h8300/tlbflush.h b/include/asm-h8300/tlbflush.h
index 9a2c5c9fd700..41c148a9208e 100644
--- a/include/asm-h8300/tlbflush.h
+++ b/include/asm-h8300/tlbflush.h
@@ -52,10 +52,4 @@ static inline void flush_tlb_kernel_page(unsigned long addr)
52 BUG(); 52 BUG();
53} 53}
54 54
55static inline void flush_tlb_pgtables(struct mm_struct *mm,
56 unsigned long start, unsigned long end)
57{
58 BUG();
59}
60
61#endif /* _H8300_TLBFLUSH_H */ 55#endif /* _H8300_TLBFLUSH_H */
diff --git a/include/asm-ia64/bitops.h b/include/asm-ia64/bitops.h
index 2144f1a8ed6f..a977affaebec 100644
--- a/include/asm-ia64/bitops.h
+++ b/include/asm-ia64/bitops.h
@@ -9,6 +9,10 @@
9 * O(1) scheduler patch 9 * O(1) scheduler patch
10 */ 10 */
11 11
12#ifndef _LINUX_BITOPS_H
13#error only <linux/bitops.h> can be included directly
14#endif
15
12#include <linux/compiler.h> 16#include <linux/compiler.h>
13#include <linux/types.h> 17#include <linux/types.h>
14#include <asm/intrinsics.h> 18#include <asm/intrinsics.h>
diff --git a/include/asm-ia64/cacheflush.h b/include/asm-ia64/cacheflush.h
index 4906916d715b..afcfbda76e20 100644
--- a/include/asm-ia64/cacheflush.h
+++ b/include/asm-ia64/cacheflush.h
@@ -7,8 +7,8 @@
7 */ 7 */
8 8
9#include <linux/page-flags.h> 9#include <linux/page-flags.h>
10#include <linux/bitops.h>
10 11
11#include <asm/bitops.h>
12#include <asm/page.h> 12#include <asm/page.h>
13 13
14/* 14/*
diff --git a/include/asm-ia64/meminit.h b/include/asm-ia64/meminit.h
index 3a62878e84f3..f93308f54b61 100644
--- a/include/asm-ia64/meminit.h
+++ b/include/asm-ia64/meminit.h
@@ -35,7 +35,7 @@ extern void find_memory (void);
35extern void reserve_memory (void); 35extern void reserve_memory (void);
36extern void find_initrd (void); 36extern void find_initrd (void);
37extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg); 37extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg);
38extern void efi_memmap_init(unsigned long *, unsigned long *); 38extern unsigned long efi_memmap_init(unsigned long *s, unsigned long *e);
39extern int find_max_min_low_pfn (unsigned long , unsigned long, void *); 39extern int find_max_min_low_pfn (unsigned long , unsigned long, void *);
40 40
41extern unsigned long vmcore_find_descriptor_size(unsigned long address); 41extern unsigned long vmcore_find_descriptor_size(unsigned long address);
diff --git a/include/asm-ia64/pgtable.h b/include/asm-ia64/pgtable.h
index 0971ec90807e..e6204f14f614 100644
--- a/include/asm-ia64/pgtable.h
+++ b/include/asm-ia64/pgtable.h
@@ -150,7 +150,7 @@
150# ifndef __ASSEMBLY__ 150# ifndef __ASSEMBLY__
151 151
152#include <linux/sched.h> /* for mm_struct */ 152#include <linux/sched.h> /* for mm_struct */
153#include <asm/bitops.h> 153#include <linux/bitops.h>
154#include <asm/cacheflush.h> 154#include <asm/cacheflush.h>
155#include <asm/mmu_context.h> 155#include <asm/mmu_context.h>
156#include <asm/processor.h> 156#include <asm/processor.h>
diff --git a/include/asm-ia64/smp.h b/include/asm-ia64/smp.h
index 1703c9d885bd..471cc2ee9ac4 100644
--- a/include/asm-ia64/smp.h
+++ b/include/asm-ia64/smp.h
@@ -14,8 +14,8 @@
14#include <linux/threads.h> 14#include <linux/threads.h>
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/cpumask.h> 16#include <linux/cpumask.h>
17#include <linux/bitops.h>
17 18
18#include <asm/bitops.h>
19#include <asm/io.h> 19#include <asm/io.h>
20#include <asm/param.h> 20#include <asm/param.h>
21#include <asm/processor.h> 21#include <asm/processor.h>
diff --git a/include/asm-ia64/spinlock.h b/include/asm-ia64/spinlock.h
index ff857e31738a..0229fb95fb38 100644
--- a/include/asm-ia64/spinlock.h
+++ b/include/asm-ia64/spinlock.h
@@ -11,9 +11,9 @@
11 11
12#include <linux/compiler.h> 12#include <linux/compiler.h>
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/bitops.h>
14 15
15#include <asm/atomic.h> 16#include <asm/atomic.h>
16#include <asm/bitops.h>
17#include <asm/intrinsics.h> 17#include <asm/intrinsics.h>
18#include <asm/system.h> 18#include <asm/system.h>
19 19
diff --git a/include/asm-ia64/tlbflush.h b/include/asm-ia64/tlbflush.h
index e37f9fbf33af..80bcb0a38e8a 100644
--- a/include/asm-ia64/tlbflush.h
+++ b/include/asm-ia64/tlbflush.h
@@ -84,19 +84,6 @@ flush_tlb_page (struct vm_area_struct *vma, unsigned long addr)
84} 84}
85 85
86/* 86/*
87 * Flush the TLB entries mapping the virtually mapped linear page
88 * table corresponding to address range [START-END).
89 */
90static inline void
91flush_tlb_pgtables (struct mm_struct *mm, unsigned long start, unsigned long end)
92{
93 /*
94 * Deprecated. The virtual page table is now flushed via the normal gather/flush
95 * interface (see tlb.h).
96 */
97}
98
99/*
100 * Flush the local TLB. Invoked from another cpu using an IPI. 87 * Flush the local TLB. Invoked from another cpu using an IPI.
101 */ 88 */
102#ifdef CONFIG_SMP 89#ifdef CONFIG_SMP
diff --git a/include/asm-m32r/bitops.h b/include/asm-m32r/bitops.h
index 313a02c4a889..6dc9b81bf9f3 100644
--- a/include/asm-m32r/bitops.h
+++ b/include/asm-m32r/bitops.h
@@ -11,6 +11,10 @@
11 * Copyright (C) 2004 Hirokazu Takata <takata at linux-m32r.org> 11 * Copyright (C) 2004 Hirokazu Takata <takata at linux-m32r.org>
12 */ 12 */
13 13
14#ifndef _LINUX_BITOPS_H
15#error only <linux/bitops.h> can be included directly
16#endif
17
14#include <linux/compiler.h> 18#include <linux/compiler.h>
15#include <asm/assembler.h> 19#include <asm/assembler.h>
16#include <asm/system.h> 20#include <asm/system.h>
diff --git a/include/asm-m32r/pgtable.h b/include/asm-m32r/pgtable.h
index 92d7266783fd..86505387be08 100644
--- a/include/asm-m32r/pgtable.h
+++ b/include/asm-m32r/pgtable.h
@@ -21,9 +21,9 @@
21#ifndef __ASSEMBLY__ 21#ifndef __ASSEMBLY__
22 22
23#include <linux/threads.h> 23#include <linux/threads.h>
24#include <linux/bitops.h>
24#include <asm/processor.h> 25#include <asm/processor.h>
25#include <asm/addrspace.h> 26#include <asm/addrspace.h>
26#include <asm/bitops.h>
27#include <asm/page.h> 27#include <asm/page.h>
28 28
29struct mm_struct; 29struct mm_struct;
diff --git a/include/asm-m32r/tlbflush.h b/include/asm-m32r/tlbflush.h
index 3d37ac002bcc..0ef95307784e 100644
--- a/include/asm-m32r/tlbflush.h
+++ b/include/asm-m32r/tlbflush.h
@@ -12,7 +12,6 @@
12 * - flush_tlb_page(vma, vmaddr) flushes one page 12 * - flush_tlb_page(vma, vmaddr) flushes one page
13 * - flush_tlb_range(vma, start, end) flushes a range of pages 13 * - flush_tlb_range(vma, start, end) flushes a range of pages
14 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages 14 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
15 * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
16 */ 15 */
17 16
18extern void local_flush_tlb_all(void); 17extern void local_flush_tlb_all(void);
@@ -93,8 +92,6 @@ static __inline__ void __flush_tlb_all(void)
93 ); 92 );
94} 93}
95 94
96#define flush_tlb_pgtables(mm, start, end) do { } while (0)
97
98extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t); 95extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
99 96
100#endif /* _ASM_M32R_TLBFLUSH_H */ 97#endif /* _ASM_M32R_TLBFLUSH_H */
diff --git a/include/asm-m68k/bitops.h b/include/asm-m68k/bitops.h
index da151f70cdc6..2976b5d68e96 100644
--- a/include/asm-m68k/bitops.h
+++ b/include/asm-m68k/bitops.h
@@ -8,6 +8,10 @@
8 * for more details. 8 * for more details.
9 */ 9 */
10 10
11#ifndef _LINUX_BITOPS_H
12#error only <linux/bitops.h> can be included directly
13#endif
14
11#include <linux/compiler.h> 15#include <linux/compiler.h>
12 16
13/* 17/*
diff --git a/include/asm-m68k/tlbflush.h b/include/asm-m68k/tlbflush.h
index 31678831ee47..17707ec315e2 100644
--- a/include/asm-m68k/tlbflush.h
+++ b/include/asm-m68k/tlbflush.h
@@ -92,11 +92,6 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
92 flush_tlb_all(); 92 flush_tlb_all();
93} 93}
94 94
95static inline void flush_tlb_pgtables(struct mm_struct *mm,
96 unsigned long start, unsigned long end)
97{
98}
99
100#else 95#else
101 96
102 97
@@ -219,11 +214,6 @@ static inline void flush_tlb_kernel_page (unsigned long addr)
219 sun3_put_segmap (addr & ~(SUN3_PMEG_SIZE - 1), SUN3_INVALID_PMEG); 214 sun3_put_segmap (addr & ~(SUN3_PMEG_SIZE - 1), SUN3_INVALID_PMEG);
220} 215}
221 216
222static inline void flush_tlb_pgtables(struct mm_struct *mm,
223 unsigned long start, unsigned long end)
224{
225}
226
227#endif 217#endif
228 218
229#endif /* _M68K_TLBFLUSH_H */ 219#endif /* _M68K_TLBFLUSH_H */
diff --git a/include/asm-m68knommu/bitops.h b/include/asm-m68knommu/bitops.h
index b8b2770d6870..f8dfb7ba2e25 100644
--- a/include/asm-m68knommu/bitops.h
+++ b/include/asm-m68knommu/bitops.h
@@ -10,6 +10,10 @@
10 10
11#ifdef __KERNEL__ 11#ifdef __KERNEL__
12 12
13#ifndef _LINUX_BITOPS_H
14#error only <linux/bitops.h> can be included directly
15#endif
16
13#include <asm-generic/bitops/ffs.h> 17#include <asm-generic/bitops/ffs.h>
14#include <asm-generic/bitops/__ffs.h> 18#include <asm-generic/bitops/__ffs.h>
15#include <asm-generic/bitops/sched.h> 19#include <asm-generic/bitops/sched.h>
diff --git a/include/asm-m68knommu/tlbflush.h b/include/asm-m68knommu/tlbflush.h
index de858db28b00..a470cfb803eb 100644
--- a/include/asm-m68knommu/tlbflush.h
+++ b/include/asm-m68knommu/tlbflush.h
@@ -52,10 +52,4 @@ static inline void flush_tlb_kernel_page(unsigned long addr)
52 BUG(); 52 BUG();
53} 53}
54 54
55static inline void flush_tlb_pgtables(struct mm_struct *mm,
56 unsigned long start, unsigned long end)
57{
58 BUG();
59}
60
61#endif /* _M68KNOMMU_TLBFLUSH_H */ 55#endif /* _M68KNOMMU_TLBFLUSH_H */
diff --git a/include/asm-mips/bitops.h b/include/asm-mips/bitops.h
index 77ed0c79830b..ec75ce4cdb8c 100644
--- a/include/asm-mips/bitops.h
+++ b/include/asm-mips/bitops.h
@@ -9,6 +9,10 @@
9#ifndef _ASM_BITOPS_H 9#ifndef _ASM_BITOPS_H
10#define _ASM_BITOPS_H 10#define _ASM_BITOPS_H
11 11
12#ifndef _LINUX_BITOPS_H
13#error only <linux/bitops.h> can be included directly
14#endif
15
12#include <linux/compiler.h> 16#include <linux/compiler.h>
13#include <linux/irqflags.h> 17#include <linux/irqflags.h>
14#include <linux/types.h> 18#include <linux/types.h>
diff --git a/include/asm-mips/fpu.h b/include/asm-mips/fpu.h
index 483685b1592e..e59d4c039661 100644
--- a/include/asm-mips/fpu.h
+++ b/include/asm-mips/fpu.h
@@ -12,12 +12,12 @@
12 12
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/thread_info.h> 14#include <linux/thread_info.h>
15#include <linux/bitops.h>
15 16
16#include <asm/mipsregs.h> 17#include <asm/mipsregs.h>
17#include <asm/cpu.h> 18#include <asm/cpu.h>
18#include <asm/cpu-features.h> 19#include <asm/cpu-features.h>
19#include <asm/hazards.h> 20#include <asm/hazards.h>
20#include <asm/bitops.h>
21#include <asm/processor.h> 21#include <asm/processor.h>
22#include <asm/current.h> 22#include <asm/current.h>
23 23
diff --git a/include/asm-mips/ip32/crime.h b/include/asm-mips/ip32/crime.h
index a13702fafa85..7c36b0e5b1c6 100644
--- a/include/asm-mips/ip32/crime.h
+++ b/include/asm-mips/ip32/crime.h
@@ -17,9 +17,6 @@
17 */ 17 */
18#define CRIME_BASE 0x14000000 /* physical */ 18#define CRIME_BASE 0x14000000 /* physical */
19 19
20#undef BIT
21#define BIT(x) (1UL << (x))
22
23struct sgi_crime { 20struct sgi_crime {
24 volatile unsigned long id; 21 volatile unsigned long id;
25#define CRIME_ID_MASK 0xff 22#define CRIME_ID_MASK 0xff
diff --git a/include/asm-mips/ip32/mace.h b/include/asm-mips/ip32/mace.h
index 990082c81f39..d08d7c672139 100644
--- a/include/asm-mips/ip32/mace.h
+++ b/include/asm-mips/ip32/mace.h
@@ -17,9 +17,6 @@
17 */ 17 */
18#define MACE_BASE 0x1f000000 /* physical */ 18#define MACE_BASE 0x1f000000 /* physical */
19 19
20#undef BIT
21#define BIT(x) (1UL << (x))
22
23/* 20/*
24 * PCI interface 21 * PCI interface
25 */ 22 */
diff --git a/include/asm-mips/tlbflush.h b/include/asm-mips/tlbflush.h
index 730e841fb08a..86b21de12e91 100644
--- a/include/asm-mips/tlbflush.h
+++ b/include/asm-mips/tlbflush.h
@@ -11,7 +11,6 @@
11 * - flush_tlb_page(vma, vmaddr) flushes one page 11 * - flush_tlb_page(vma, vmaddr) flushes one page
12 * - flush_tlb_range(vma, start, end) flushes a range of pages 12 * - flush_tlb_range(vma, start, end) flushes a range of pages
13 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages 13 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
14 * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
15 */ 14 */
16extern void local_flush_tlb_all(void); 15extern void local_flush_tlb_all(void);
17extern void local_flush_tlb_mm(struct mm_struct *mm); 16extern void local_flush_tlb_mm(struct mm_struct *mm);
@@ -45,10 +44,4 @@ extern void flush_tlb_one(unsigned long vaddr);
45 44
46#endif /* CONFIG_SMP */ 45#endif /* CONFIG_SMP */
47 46
48static inline void flush_tlb_pgtables(struct mm_struct *mm,
49 unsigned long start, unsigned long end)
50{
51 /* Nothing to do on MIPS. */
52}
53
54#endif /* __ASM_TLBFLUSH_H */ 47#endif /* __ASM_TLBFLUSH_H */
diff --git a/include/asm-parisc/bitops.h b/include/asm-parisc/bitops.h
index 03ae287baf89..f8eebcbad01f 100644
--- a/include/asm-parisc/bitops.h
+++ b/include/asm-parisc/bitops.h
@@ -1,6 +1,10 @@
1#ifndef _PARISC_BITOPS_H 1#ifndef _PARISC_BITOPS_H
2#define _PARISC_BITOPS_H 2#define _PARISC_BITOPS_H
3 3
4#ifndef _LINUX_BITOPS_H
5#error only <linux/bitops.h> can be included directly
6#endif
7
4#include <linux/compiler.h> 8#include <linux/compiler.h>
5#include <asm/types.h> /* for BITS_PER_LONG/SHIFT_PER_LONG */ 9#include <asm/types.h> /* for BITS_PER_LONG/SHIFT_PER_LONG */
6#include <asm/byteorder.h> 10#include <asm/byteorder.h>
diff --git a/include/asm-parisc/pgtable.h b/include/asm-parisc/pgtable.h
index e88cacd63724..9ab79c8e5a41 100644
--- a/include/asm-parisc/pgtable.h
+++ b/include/asm-parisc/pgtable.h
@@ -11,9 +11,9 @@
11 */ 11 */
12 12
13#include <linux/mm.h> /* for vm_area_struct */ 13#include <linux/mm.h> /* for vm_area_struct */
14#include <linux/bitops.h>
14#include <asm/processor.h> 15#include <asm/processor.h>
15#include <asm/cache.h> 16#include <asm/cache.h>
16#include <asm/bitops.h>
17 17
18/* 18/*
19 * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel 19 * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
diff --git a/include/asm-parisc/tlbflush.h b/include/asm-parisc/tlbflush.h
index 270cf309772b..b72ec66db699 100644
--- a/include/asm-parisc/tlbflush.h
+++ b/include/asm-parisc/tlbflush.h
@@ -57,10 +57,6 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
57#endif 57#endif
58} 58}
59 59
60extern __inline__ void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
61{
62}
63
64static inline void flush_tlb_page(struct vm_area_struct *vma, 60static inline void flush_tlb_page(struct vm_area_struct *vma,
65 unsigned long addr) 61 unsigned long addr)
66{ 62{
diff --git a/include/asm-powerpc/bitops.h b/include/asm-powerpc/bitops.h
index e85c3e078ba2..733b4af7f4f1 100644
--- a/include/asm-powerpc/bitops.h
+++ b/include/asm-powerpc/bitops.h
@@ -38,6 +38,10 @@
38 38
39#ifdef __KERNEL__ 39#ifdef __KERNEL__
40 40
41#ifndef _LINUX_BITOPS_H
42#error only <linux/bitops.h> can be included directly
43#endif
44
41#include <linux/compiler.h> 45#include <linux/compiler.h>
42#include <asm/asm-compat.h> 46#include <asm/asm-compat.h>
43#include <asm/synch.h> 47#include <asm/synch.h>
diff --git a/include/asm-powerpc/iommu.h b/include/asm-powerpc/iommu.h
index 870967e47204..4a82fdccee92 100644
--- a/include/asm-powerpc/iommu.h
+++ b/include/asm-powerpc/iommu.h
@@ -26,9 +26,9 @@
26#include <linux/spinlock.h> 26#include <linux/spinlock.h>
27#include <linux/device.h> 27#include <linux/device.h>
28#include <linux/dma-mapping.h> 28#include <linux/dma-mapping.h>
29#include <linux/bitops.h>
29#include <asm/machdep.h> 30#include <asm/machdep.h>
30#include <asm/types.h> 31#include <asm/types.h>
31#include <asm/bitops.h>
32 32
33#define IOMMU_PAGE_SHIFT 12 33#define IOMMU_PAGE_SHIFT 12
34#define IOMMU_PAGE_SIZE (ASM_CONST(1) << IOMMU_PAGE_SHIFT) 34#define IOMMU_PAGE_SIZE (ASM_CONST(1) << IOMMU_PAGE_SHIFT)
diff --git a/include/asm-powerpc/mmu_context.h b/include/asm-powerpc/mmu_context.h
index f863ac21409e..9102b8bf0ead 100644
--- a/include/asm-powerpc/mmu_context.h
+++ b/include/asm-powerpc/mmu_context.h
@@ -8,7 +8,7 @@
8 8
9#ifndef CONFIG_PPC64 9#ifndef CONFIG_PPC64
10#include <asm/atomic.h> 10#include <asm/atomic.h>
11#include <asm/bitops.h> 11#include <linux/bitops.h>
12 12
13/* 13/*
14 * On 32-bit PowerPC 6xx/7xx/7xxx CPUs, we use a set of 16 VSIDs 14 * On 32-bit PowerPC 6xx/7xx/7xxx CPUs, we use a set of 16 VSIDs
diff --git a/include/asm-powerpc/tlbflush.h b/include/asm-powerpc/tlbflush.h
index a022f806bb21..b6b036ccee34 100644
--- a/include/asm-powerpc/tlbflush.h
+++ b/include/asm-powerpc/tlbflush.h
@@ -8,7 +8,6 @@
8 * - flush_tlb_page_nohash(vma, vmaddr) flushes one page if SW loaded TLB 8 * - flush_tlb_page_nohash(vma, vmaddr) flushes one page if SW loaded TLB
9 * - flush_tlb_range(vma, start, end) flushes a range of pages 9 * - flush_tlb_range(vma, start, end) flushes a range of pages
10 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages 10 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
11 * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
12 * 11 *
13 * This program is free software; you can redistribute it and/or 12 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License 13 * modify it under the terms of the GNU General Public License
@@ -174,15 +173,5 @@ extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
174 */ 173 */
175extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t); 174extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
176 175
177/*
178 * This is called in munmap when we have freed up some page-table
179 * pages. We don't need to do anything here, there's nothing special
180 * about our page-table pages. -- paulus
181 */
182static inline void flush_tlb_pgtables(struct mm_struct *mm,
183 unsigned long start, unsigned long end)
184{
185}
186
187#endif /*__KERNEL__ */ 176#endif /*__KERNEL__ */
188#endif /* _ASM_POWERPC_TLBFLUSH_H */ 177#endif /* _ASM_POWERPC_TLBFLUSH_H */
diff --git a/include/asm-ppc/mmu_context.h b/include/asm-ppc/mmu_context.h
index a6441a063e5d..b2e25d8997bf 100644
--- a/include/asm-ppc/mmu_context.h
+++ b/include/asm-ppc/mmu_context.h
@@ -2,8 +2,9 @@
2#ifndef __PPC_MMU_CONTEXT_H 2#ifndef __PPC_MMU_CONTEXT_H
3#define __PPC_MMU_CONTEXT_H 3#define __PPC_MMU_CONTEXT_H
4 4
5#include <linux/bitops.h>
6
5#include <asm/atomic.h> 7#include <asm/atomic.h>
6#include <asm/bitops.h>
7#include <asm/mmu.h> 8#include <asm/mmu.h>
8#include <asm/cputable.h> 9#include <asm/cputable.h>
9#include <asm-generic/mm_hooks.h> 10#include <asm-generic/mm_hooks.h>
diff --git a/include/asm-s390/bitops.h b/include/asm-s390/bitops.h
index d756b34d25f3..34d9a6357c38 100644
--- a/include/asm-s390/bitops.h
+++ b/include/asm-s390/bitops.h
@@ -15,6 +15,10 @@
15 15
16#ifdef __KERNEL__ 16#ifdef __KERNEL__
17 17
18#ifndef _LINUX_BITOPS_H
19#error only <linux/bitops.h> can be included directly
20#endif
21
18#include <linux/compiler.h> 22#include <linux/compiler.h>
19 23
20/* 24/*
diff --git a/include/asm-s390/tlbflush.h b/include/asm-s390/tlbflush.h
index 66793f55c8b2..6de2632a3e4f 100644
--- a/include/asm-s390/tlbflush.h
+++ b/include/asm-s390/tlbflush.h
@@ -14,7 +14,6 @@
14 * - flush_tlb_page(vma, vmaddr) flushes one page 14 * - flush_tlb_page(vma, vmaddr) flushes one page
15 * - flush_tlb_range(vma, start, end) flushes a range of pages 15 * - flush_tlb_range(vma, start, end) flushes a range of pages
16 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages 16 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
17 * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
18 */ 17 */
19 18
20/* 19/*
@@ -152,10 +151,4 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
152 151
153#endif 152#endif
154 153
155static inline void flush_tlb_pgtables(struct mm_struct *mm,
156 unsigned long start, unsigned long end)
157{
158 /* S/390 does not keep any page table caches in TLB */
159}
160
161#endif /* _S390_TLBFLUSH_H */ 154#endif /* _S390_TLBFLUSH_H */
diff --git a/include/asm-sh/bitops.h b/include/asm-sh/bitops.h
index 9d7021723a25..df805f20b267 100644
--- a/include/asm-sh/bitops.h
+++ b/include/asm-sh/bitops.h
@@ -2,6 +2,11 @@
2#define __ASM_SH_BITOPS_H 2#define __ASM_SH_BITOPS_H
3 3
4#ifdef __KERNEL__ 4#ifdef __KERNEL__
5
6#ifndef _LINUX_BITOPS_H
7#error only <linux/bitops.h> can be included directly
8#endif
9
5#include <asm/system.h> 10#include <asm/system.h>
6/* For __swab32 */ 11/* For __swab32 */
7#include <asm/byteorder.h> 12#include <asm/byteorder.h>
diff --git a/include/asm-sh/tlbflush.h b/include/asm-sh/tlbflush.h
index 455fb8da441e..e0ac97221ae6 100644
--- a/include/asm-sh/tlbflush.h
+++ b/include/asm-sh/tlbflush.h
@@ -9,7 +9,6 @@
9 * - flush_tlb_page(vma, vmaddr) flushes one page 9 * - flush_tlb_page(vma, vmaddr) flushes one page
10 * - flush_tlb_range(vma, start, end) flushes a range of pages 10 * - flush_tlb_range(vma, start, end) flushes a range of pages
11 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages 11 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
12 * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
13 */ 12 */
14extern void local_flush_tlb_all(void); 13extern void local_flush_tlb_all(void);
15extern void local_flush_tlb_mm(struct mm_struct *mm); 14extern void local_flush_tlb_mm(struct mm_struct *mm);
@@ -47,9 +46,4 @@ extern void flush_tlb_one(unsigned long asid, unsigned long page);
47 46
48#endif /* CONFIG_SMP */ 47#endif /* CONFIG_SMP */
49 48
50static inline void flush_tlb_pgtables(struct mm_struct *mm,
51 unsigned long start, unsigned long end)
52{
53 /* Nothing to do */
54}
55#endif /* __ASM_SH_TLBFLUSH_H */ 49#endif /* __ASM_SH_TLBFLUSH_H */
diff --git a/include/asm-sh64/bitops.h b/include/asm-sh64/bitops.h
index 444d5ea92ce9..600c59efb4c2 100644
--- a/include/asm-sh64/bitops.h
+++ b/include/asm-sh64/bitops.h
@@ -13,6 +13,11 @@
13 */ 13 */
14 14
15#ifdef __KERNEL__ 15#ifdef __KERNEL__
16
17#ifndef _LINUX_BITOPS_H
18#error only <linux/bitops.h> can be included directly
19#endif
20
16#include <linux/compiler.h> 21#include <linux/compiler.h>
17#include <asm/system.h> 22#include <asm/system.h>
18/* For __swab32 */ 23/* For __swab32 */
diff --git a/include/asm-sh64/tlbflush.h b/include/asm-sh64/tlbflush.h
index e45beadc29ee..16a164a23754 100644
--- a/include/asm-sh64/tlbflush.h
+++ b/include/asm-sh64/tlbflush.h
@@ -20,10 +20,6 @@ extern void flush_tlb_mm(struct mm_struct *mm);
20extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 20extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
21 unsigned long end); 21 unsigned long end);
22extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page); 22extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
23static inline void flush_tlb_pgtables(struct mm_struct *mm,
24 unsigned long start, unsigned long end)
25{
26}
27 23
28extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); 24extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
29 25
diff --git a/include/asm-sparc/bitops.h b/include/asm-sparc/bitops.h
index 00bd0a679d70..cb3cefab6e09 100644
--- a/include/asm-sparc/bitops.h
+++ b/include/asm-sparc/bitops.h
@@ -14,6 +14,10 @@
14 14
15#ifdef __KERNEL__ 15#ifdef __KERNEL__
16 16
17#ifndef _LINUX_BITOPS_H
18#error only <linux/bitops.h> can be included directly
19#endif
20
17extern unsigned long ___set_bit(unsigned long *addr, unsigned long mask); 21extern unsigned long ___set_bit(unsigned long *addr, unsigned long mask);
18extern unsigned long ___clear_bit(unsigned long *addr, unsigned long mask); 22extern unsigned long ___clear_bit(unsigned long *addr, unsigned long mask);
19extern unsigned long ___change_bit(unsigned long *addr, unsigned long mask); 23extern unsigned long ___change_bit(unsigned long *addr, unsigned long mask);
diff --git a/include/asm-sparc/tlbflush.h b/include/asm-sparc/tlbflush.h
index a619da5cfaa9..b957e29d2ae1 100644
--- a/include/asm-sparc/tlbflush.h
+++ b/include/asm-sparc/tlbflush.h
@@ -13,7 +13,6 @@
13 * - flush_tlb_page(vma, vmaddr) flushes one page 13 * - flush_tlb_page(vma, vmaddr) flushes one page
14 * - flush_tlb_range(vma, start, end) flushes a range of pages 14 * - flush_tlb_range(vma, start, end) flushes a range of pages
15 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages 15 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
16 * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
17 */ 16 */
18 17
19#ifdef CONFIG_SMP 18#ifdef CONFIG_SMP
@@ -42,11 +41,6 @@ BTFIXUPDEF_CALL(void, flush_tlb_mm, struct mm_struct *)
42BTFIXUPDEF_CALL(void, flush_tlb_range, struct vm_area_struct *, unsigned long, unsigned long) 41BTFIXUPDEF_CALL(void, flush_tlb_range, struct vm_area_struct *, unsigned long, unsigned long)
43BTFIXUPDEF_CALL(void, flush_tlb_page, struct vm_area_struct *, unsigned long) 42BTFIXUPDEF_CALL(void, flush_tlb_page, struct vm_area_struct *, unsigned long)
44 43
45// Thanks to Anton Blanchard, our pagetables became uncached in 2.4. Wee!
46// extern void flush_tlb_pgtables(struct mm_struct *mm,
47// unsigned long start, unsigned long end);
48#define flush_tlb_pgtables(mm, start, end) do{ }while(0)
49
50#define flush_tlb_all() BTFIXUP_CALL(flush_tlb_all)() 44#define flush_tlb_all() BTFIXUP_CALL(flush_tlb_all)()
51#define flush_tlb_mm(mm) BTFIXUP_CALL(flush_tlb_mm)(mm) 45#define flush_tlb_mm(mm) BTFIXUP_CALL(flush_tlb_mm)(mm)
52#define flush_tlb_range(vma,start,end) BTFIXUP_CALL(flush_tlb_range)(vma,start,end) 46#define flush_tlb_range(vma,start,end) BTFIXUP_CALL(flush_tlb_range)(vma,start,end)
diff --git a/include/asm-sparc64/bitops.h b/include/asm-sparc64/bitops.h
index dd4bfe993b61..982ce8992b91 100644
--- a/include/asm-sparc64/bitops.h
+++ b/include/asm-sparc64/bitops.h
@@ -7,6 +7,10 @@
7#ifndef _SPARC64_BITOPS_H 7#ifndef _SPARC64_BITOPS_H
8#define _SPARC64_BITOPS_H 8#define _SPARC64_BITOPS_H
9 9
10#ifndef _LINUX_BITOPS_H
11#error only <linux/bitops.h> can be included directly
12#endif
13
10#include <linux/compiler.h> 14#include <linux/compiler.h>
11#include <asm/byteorder.h> 15#include <asm/byteorder.h>
12 16
diff --git a/include/asm-sparc64/smp.h b/include/asm-sparc64/smp.h
index 42c09949526c..1c1c5ea5cea5 100644
--- a/include/asm-sparc64/smp.h
+++ b/include/asm-sparc64/smp.h
@@ -26,7 +26,7 @@
26 * Private routines/data 26 * Private routines/data
27 */ 27 */
28 28
29#include <asm/bitops.h> 29#include <linux/bitops.h>
30#include <asm/atomic.h> 30#include <asm/atomic.h>
31#include <asm/percpu.h> 31#include <asm/percpu.h>
32 32
diff --git a/include/asm-sparc64/tlbflush.h b/include/asm-sparc64/tlbflush.h
index 3487328570ed..fbb675dbe0c9 100644
--- a/include/asm-sparc64/tlbflush.h
+++ b/include/asm-sparc64/tlbflush.h
@@ -41,11 +41,4 @@ do { flush_tsb_kernel_range(start,end); \
41 41
42#endif /* ! CONFIG_SMP */ 42#endif /* ! CONFIG_SMP */
43 43
44static inline void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
45{
46 /* We don't use virtual page tables for TLB miss processing
47 * any more. Nowadays we use the TSB.
48 */
49}
50
51#endif /* _SPARC64_TLBFLUSH_H */ 44#endif /* _SPARC64_TLBFLUSH_H */
diff --git a/include/asm-um/bitops.h b/include/asm-um/bitops.h
index 46d781953d3a..e4d38d437b97 100644
--- a/include/asm-um/bitops.h
+++ b/include/asm-um/bitops.h
@@ -1,6 +1,10 @@
1#ifndef __UM_BITOPS_H 1#ifndef __UM_BITOPS_H
2#define __UM_BITOPS_H 2#define __UM_BITOPS_H
3 3
4#ifndef _LINUX_BITOPS_H
5#error only <linux/bitops.h> can be included directly
6#endif
7
4#include "asm/arch/bitops.h" 8#include "asm/arch/bitops.h"
5 9
6#endif 10#endif
diff --git a/include/asm-um/tlbflush.h b/include/asm-um/tlbflush.h
index 9d647c55350b..614f2c091178 100644
--- a/include/asm-um/tlbflush.h
+++ b/include/asm-um/tlbflush.h
@@ -17,7 +17,6 @@
17 * - flush_tlb_page(vma, vmaddr) flushes one page 17 * - flush_tlb_page(vma, vmaddr) flushes one page
18 * - flush_tlb_kernel_vm() flushes the kernel vm area 18 * - flush_tlb_kernel_vm() flushes the kernel vm area
19 * - flush_tlb_range(vma, start, end) flushes a range of pages 19 * - flush_tlb_range(vma, start, end) flushes a range of pages
20 * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
21 */ 20 */
22 21
23extern void flush_tlb_all(void); 22extern void flush_tlb_all(void);
@@ -29,9 +28,4 @@ extern void flush_tlb_kernel_vm(void);
29extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); 28extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
30extern void __flush_tlb_one(unsigned long addr); 29extern void __flush_tlb_one(unsigned long addr);
31 30
32static inline void flush_tlb_pgtables(struct mm_struct *mm,
33 unsigned long start, unsigned long end)
34{
35}
36
37#endif 31#endif
diff --git a/include/asm-v850/bitops.h b/include/asm-v850/bitops.h
index 8eafdb1c08ba..f82f5b4a56e0 100644
--- a/include/asm-v850/bitops.h
+++ b/include/asm-v850/bitops.h
@@ -13,6 +13,9 @@
13#ifndef __V850_BITOPS_H__ 13#ifndef __V850_BITOPS_H__
14#define __V850_BITOPS_H__ 14#define __V850_BITOPS_H__
15 15
16#ifndef _LINUX_BITOPS_H
17#error only <linux/bitops.h> can be included directly
18#endif
16 19
17#include <linux/compiler.h> /* unlikely */ 20#include <linux/compiler.h> /* unlikely */
18#include <asm/byteorder.h> /* swab32 */ 21#include <asm/byteorder.h> /* swab32 */
diff --git a/include/asm-v850/tlbflush.h b/include/asm-v850/tlbflush.h
index 5f2f85f636ea..c44aa64449c8 100644
--- a/include/asm-v850/tlbflush.h
+++ b/include/asm-v850/tlbflush.h
@@ -61,10 +61,4 @@ static inline void flush_tlb_kernel_page(unsigned long addr)
61 BUG (); 61 BUG ();
62} 62}
63 63
64static inline void flush_tlb_pgtables(struct mm_struct *mm,
65 unsigned long start, unsigned long end)
66{
67 BUG ();
68}
69
70#endif /* __V850_TLBFLUSH_H__ */ 64#endif /* __V850_TLBFLUSH_H__ */
diff --git a/include/asm-x86/bitops_32.h b/include/asm-x86/bitops_32.h
index c96641f75022..3268a341cf49 100644
--- a/include/asm-x86/bitops_32.h
+++ b/include/asm-x86/bitops_32.h
@@ -5,6 +5,10 @@
5 * Copyright 1992, Linus Torvalds. 5 * Copyright 1992, Linus Torvalds.
6 */ 6 */
7 7
8#ifndef _LINUX_BITOPS_H
9#error only <linux/bitops.h> can be included directly
10#endif
11
8#include <linux/compiler.h> 12#include <linux/compiler.h>
9#include <asm/alternative.h> 13#include <asm/alternative.h>
10 14
diff --git a/include/asm-x86/bitops_64.h b/include/asm-x86/bitops_64.h
index 525edf2ce5c2..dacaa5f1febc 100644
--- a/include/asm-x86/bitops_64.h
+++ b/include/asm-x86/bitops_64.h
@@ -5,6 +5,10 @@
5 * Copyright 1992, Linus Torvalds. 5 * Copyright 1992, Linus Torvalds.
6 */ 6 */
7 7
8#ifndef _LINUX_BITOPS_H
9#error only <linux/bitops.h> can be included directly
10#endif
11
8#include <asm/alternative.h> 12#include <asm/alternative.h>
9 13
10#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) 14#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index acd4b339c49b..ed3e70d8d04b 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -17,10 +17,7 @@
17#include <linux/threads.h> 17#include <linux/threads.h>
18#include <asm/paravirt.h> 18#include <asm/paravirt.h>
19 19
20#ifndef _I386_BITOPS_H 20#include <linux/bitops.h>
21#include <asm/bitops.h>
22#endif
23
24#include <linux/slab.h> 21#include <linux/slab.h>
25#include <linux/list.h> 22#include <linux/list.h>
26#include <linux/spinlock.h> 23#include <linux/spinlock.h>
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index a79f5355e3b0..9b0ff477b39e 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -9,7 +9,7 @@
9 * the x86-64 page table tree. 9 * the x86-64 page table tree.
10 */ 10 */
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <asm/bitops.h> 12#include <linux/bitops.h>
13#include <linux/threads.h> 13#include <linux/threads.h>
14#include <asm/pda.h> 14#include <asm/pda.h>
15 15
diff --git a/include/asm-x86/smp_32.h b/include/asm-x86/smp_32.h
index ee46038d126c..1f576a93368f 100644
--- a/include/asm-x86/smp_32.h
+++ b/include/asm-x86/smp_32.h
@@ -11,7 +11,7 @@
11#endif 11#endif
12 12
13#if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__) 13#if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__)
14#include <asm/bitops.h> 14#include <linux/bitops.h>
15#include <asm/mpspec.h> 15#include <asm/mpspec.h>
16#include <asm/apic.h> 16#include <asm/apic.h>
17#ifdef CONFIG_X86_IO_APIC 17#ifdef CONFIG_X86_IO_APIC
diff --git a/include/asm-x86/tlbflush_32.h b/include/asm-x86/tlbflush_32.h
index a50fa6741486..2bd5b95e2048 100644
--- a/include/asm-x86/tlbflush_32.h
+++ b/include/asm-x86/tlbflush_32.h
@@ -78,7 +78,6 @@
78 * - flush_tlb_page(vma, vmaddr) flushes one page 78 * - flush_tlb_page(vma, vmaddr) flushes one page
79 * - flush_tlb_range(vma, start, end) flushes a range of pages 79 * - flush_tlb_range(vma, start, end) flushes a range of pages
80 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages 80 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
81 * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
82 * - flush_tlb_others(cpumask, mm, va) flushes a TLBs on other cpus 81 * - flush_tlb_others(cpumask, mm, va) flushes a TLBs on other cpus
83 * 82 *
84 * ..but the i386 has somewhat limited tlb flushing capabilities, 83 * ..but the i386 has somewhat limited tlb flushing capabilities,
@@ -166,10 +165,4 @@ static inline void flush_tlb_kernel_range(unsigned long start,
166 flush_tlb_all(); 165 flush_tlb_all();
167} 166}
168 167
169static inline void flush_tlb_pgtables(struct mm_struct *mm,
170 unsigned long start, unsigned long end)
171{
172 /* i386 does not keep any page table caches in TLB */
173}
174
175#endif /* _I386_TLBFLUSH_H */ 168#endif /* _I386_TLBFLUSH_H */
diff --git a/include/asm-x86/tlbflush_64.h b/include/asm-x86/tlbflush_64.h
index 888eb4abdd07..7731fd23d572 100644
--- a/include/asm-x86/tlbflush_64.h
+++ b/include/asm-x86/tlbflush_64.h
@@ -31,7 +31,6 @@ static inline void __flush_tlb_all(void)
31 * - flush_tlb_page(vma, vmaddr) flushes one page 31 * - flush_tlb_page(vma, vmaddr) flushes one page
32 * - flush_tlb_range(vma, start, end) flushes a range of pages 32 * - flush_tlb_range(vma, start, end) flushes a range of pages
33 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages 33 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
34 * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
35 * 34 *
36 * x86-64 can only flush individual pages or full VMs. For a range flush 35 * x86-64 can only flush individual pages or full VMs. For a range flush
37 * we always do the full VM. Might be worth trying if for a small 36 * we always do the full VM. Might be worth trying if for a small
@@ -98,12 +97,4 @@ static inline void flush_tlb_kernel_range(unsigned long start,
98 flush_tlb_all(); 97 flush_tlb_all();
99} 98}
100 99
101static inline void flush_tlb_pgtables(struct mm_struct *mm,
102 unsigned long start, unsigned long end)
103{
104 /* x86_64 does not keep any page table caches in a software TLB.
105 The CPUs do in their hardware TLBs, but they are handled
106 by the normal TLB flushing algorithms. */
107}
108
109#endif /* _X8664_TLBFLUSH_H */ 100#endif /* _X8664_TLBFLUSH_H */
diff --git a/include/asm-x86/topology_64.h b/include/asm-x86/topology_64.h
index 848c17f92226..c0c93d744673 100644
--- a/include/asm-x86/topology_64.h
+++ b/include/asm-x86/topology_64.h
@@ -5,7 +5,7 @@
5#ifdef CONFIG_NUMA 5#ifdef CONFIG_NUMA
6 6
7#include <asm/mpspec.h> 7#include <asm/mpspec.h>
8#include <asm/bitops.h> 8#include <linux/bitops.h>
9 9
10extern cpumask_t cpu_online_map; 10extern cpumask_t cpu_online_map;
11 11
diff --git a/include/asm-xtensa/bitops.h b/include/asm-xtensa/bitops.h
index 78db04cf6e48..23261e8f2e5a 100644
--- a/include/asm-xtensa/bitops.h
+++ b/include/asm-xtensa/bitops.h
@@ -15,6 +15,10 @@
15 15
16#ifdef __KERNEL__ 16#ifdef __KERNEL__
17 17
18#ifndef _LINUX_BITOPS_H
19#error only <linux/bitops.h> can be included directly
20#endif
21
18#include <asm/processor.h> 22#include <asm/processor.h>
19#include <asm/byteorder.h> 23#include <asm/byteorder.h>
20#include <asm/system.h> 24#include <asm/system.h>
diff --git a/include/asm-xtensa/tlbflush.h b/include/asm-xtensa/tlbflush.h
index 7c637b3c352c..46d240074f74 100644
--- a/include/asm-xtensa/tlbflush.h
+++ b/include/asm-xtensa/tlbflush.h
@@ -41,17 +41,6 @@ extern void flush_tlb_range(struct vm_area_struct*,unsigned long,unsigned long);
41 41
42#define flush_tlb_kernel_range(start,end) flush_tlb_all() 42#define flush_tlb_kernel_range(start,end) flush_tlb_all()
43 43
44
45/* This is calld in munmap when we have freed up some page-table pages.
46 * We don't need to do anything here, there's nothing special about our
47 * page-table pages.
48 */
49
50static inline void flush_tlb_pgtables(struct mm_struct *mm,
51 unsigned long start, unsigned long end)
52{
53}
54
55/* TLB operations. */ 44/* TLB operations. */
56 45
57static inline unsigned long itlb_probe(unsigned long addr) 46static inline unsigned long itlb_probe(unsigned long addr)
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 7ac8303c8471..e3ffd14a3f0b 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -47,6 +47,7 @@ header-y += coda_psdev.h
47header-y += coff.h 47header-y += coff.h
48header-y += comstats.h 48header-y += comstats.h
49header-y += const.h 49header-y += const.h
50header-y += cgroupstats.h
50header-y += cycx_cfm.h 51header-y += cycx_cfm.h
51header-y += dlm_device.h 52header-y += dlm_device.h
52header-y += dlm_netlink.h 53header-y += dlm_netlink.h
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 64b4641904fe..acad1105d942 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -6,6 +6,7 @@
6#include <linux/types.h> 6#include <linux/types.h>
7#include <linux/bitops.h> 7#include <linux/bitops.h>
8#include <linux/string.h> 8#include <linux/string.h>
9#include <linux/kernel.h>
9 10
10/* 11/*
11 * bitmaps provide bit arrays that consume one or more unsigned 12 * bitmaps provide bit arrays that consume one or more unsigned
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index b9fb8ee3308b..69c1edb9fe54 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -2,6 +2,14 @@
2#define _LINUX_BITOPS_H 2#define _LINUX_BITOPS_H
3#include <asm/types.h> 3#include <asm/types.h>
4 4
5#ifdef __KERNEL__
6#define BIT(nr) (1UL << (nr))
7#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
8#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
9#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG)
10#define BITS_PER_BYTE 8
11#endif
12
5/* 13/*
6 * Include this here because some architectures need generic_ffs/fls in 14 * Include this here because some architectures need generic_ffs/fls in
7 * scope 15 * scope
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
new file mode 100644
index 000000000000..87479328d46d
--- /dev/null
+++ b/include/linux/cgroup.h
@@ -0,0 +1,327 @@
1#ifndef _LINUX_CGROUP_H
2#define _LINUX_CGROUP_H
3/*
4 * cgroup interface
5 *
6 * Copyright (C) 2003 BULL SA
7 * Copyright (C) 2004-2006 Silicon Graphics, Inc.
8 *
9 */
10
11#include <linux/sched.h>
12#include <linux/kref.h>
13#include <linux/cpumask.h>
14#include <linux/nodemask.h>
15#include <linux/rcupdate.h>
16#include <linux/cgroupstats.h>
17
18#ifdef CONFIG_CGROUPS
19
20struct cgroupfs_root;
21struct cgroup_subsys;
22struct inode;
23
24extern int cgroup_init_early(void);
25extern int cgroup_init(void);
26extern void cgroup_init_smp(void);
27extern void cgroup_lock(void);
28extern void cgroup_unlock(void);
29extern void cgroup_fork(struct task_struct *p);
30extern void cgroup_fork_callbacks(struct task_struct *p);
31extern void cgroup_post_fork(struct task_struct *p);
32extern void cgroup_exit(struct task_struct *p, int run_callbacks);
33extern int cgroupstats_build(struct cgroupstats *stats,
34 struct dentry *dentry);
35
36extern struct file_operations proc_cgroup_operations;
37
38/* Define the enumeration of all cgroup subsystems */
39#define SUBSYS(_x) _x ## _subsys_id,
40enum cgroup_subsys_id {
41#include <linux/cgroup_subsys.h>
42 CGROUP_SUBSYS_COUNT
43};
44#undef SUBSYS
45
46/* Per-subsystem/per-cgroup state maintained by the system. */
47struct cgroup_subsys_state {
48 /* The cgroup that this subsystem is attached to. Useful
49 * for subsystems that want to know about the cgroup
50 * hierarchy structure */
51 struct cgroup *cgroup;
52
53 /* State maintained by the cgroup system to allow
54 * subsystems to be "busy". Should be accessed via css_get()
55 * and css_put() */
56
57 atomic_t refcnt;
58
59 unsigned long flags;
60};
61
62/* bits in struct cgroup_subsys_state flags field */
63enum {
64 CSS_ROOT, /* This CSS is the root of the subsystem */
65};
66
67/*
68 * Call css_get() to hold a reference on the cgroup;
69 *
70 */
71
72static inline void css_get(struct cgroup_subsys_state *css)
73{
74 /* We don't need to reference count the root state */
75 if (!test_bit(CSS_ROOT, &css->flags))
76 atomic_inc(&css->refcnt);
77}
78/*
79 * css_put() should be called to release a reference taken by
80 * css_get()
81 */
82
83extern void __css_put(struct cgroup_subsys_state *css);
84static inline void css_put(struct cgroup_subsys_state *css)
85{
86 if (!test_bit(CSS_ROOT, &css->flags))
87 __css_put(css);
88}
89
90struct cgroup {
91 unsigned long flags; /* "unsigned long" so bitops work */
92
93 /* count users of this cgroup. >0 means busy, but doesn't
94 * necessarily indicate the number of tasks in the
95 * cgroup */
96 atomic_t count;
97
98 /*
99 * We link our 'sibling' struct into our parent's 'children'.
100 * Our children link their 'sibling' into our 'children'.
101 */
102 struct list_head sibling; /* my parent's children */
103 struct list_head children; /* my children */
104
105 struct cgroup *parent; /* my parent */
106 struct dentry *dentry; /* cgroup fs entry */
107
108 /* Private pointers for each registered subsystem */
109 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
110
111 struct cgroupfs_root *root;
112 struct cgroup *top_cgroup;
113
114 /*
115 * List of cg_cgroup_links pointing at css_sets with
116 * tasks in this cgroup. Protected by css_set_lock
117 */
118 struct list_head css_sets;
119
120 /*
121 * Linked list running through all cgroups that can
122 * potentially be reaped by the release agent. Protected by
123 * release_list_lock
124 */
125 struct list_head release_list;
126};
127
128/* A css_set is a structure holding pointers to a set of
129 * cgroup_subsys_state objects. This saves space in the task struct
130 * object and speeds up fork()/exit(), since a single inc/dec and a
131 * list_add()/del() can bump the reference count on the entire
132 * cgroup set for a task.
133 */
134
135struct css_set {
136
137 /* Reference count */
138 struct kref ref;
139
140 /*
141 * List running through all cgroup groups. Protected by
142 * css_set_lock
143 */
144 struct list_head list;
145
146 /*
147 * List running through all tasks using this cgroup
148 * group. Protected by css_set_lock
149 */
150 struct list_head tasks;
151
152 /*
153 * List of cg_cgroup_link objects on link chains from
154 * cgroups referenced from this css_set. Protected by
155 * css_set_lock
156 */
157 struct list_head cg_links;
158
159 /*
160 * Set of subsystem states, one for each subsystem. This array
161 * is immutable after creation apart from the init_css_set
162 * during subsystem registration (at boot time).
163 */
164 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
165
166};
167
168/* struct cftype:
169 *
170 * The files in the cgroup filesystem mostly have a very simple read/write
171 * handling, some common function will take care of it. Nevertheless some cases
172 * (read tasks) are special and therefore I define this structure for every
173 * kind of file.
174 *
175 *
176 * When reading/writing to a file:
177 * - the cgroup to use in file->f_dentry->d_parent->d_fsdata
178 * - the 'cftype' of the file is file->f_dentry->d_fsdata
179 */
180
181#define MAX_CFTYPE_NAME 64
182struct cftype {
183 /* By convention, the name should begin with the name of the
184 * subsystem, followed by a period */
185 char name[MAX_CFTYPE_NAME];
186 int private;
187 int (*open) (struct inode *inode, struct file *file);
188 ssize_t (*read) (struct cgroup *cont, struct cftype *cft,
189 struct file *file,
190 char __user *buf, size_t nbytes, loff_t *ppos);
191 /*
192 * read_uint() is a shortcut for the common case of returning a
193 * single integer. Use it in place of read()
194 */
195 u64 (*read_uint) (struct cgroup *cont, struct cftype *cft);
196 ssize_t (*write) (struct cgroup *cont, struct cftype *cft,
197 struct file *file,
198 const char __user *buf, size_t nbytes, loff_t *ppos);
199
200 /*
201 * write_uint() is a shortcut for the common case of accepting
202 * a single integer (as parsed by simple_strtoull) from
203 * userspace. Use in place of write(); return 0 or error.
204 */
205 int (*write_uint) (struct cgroup *cont, struct cftype *cft, u64 val);
206
207 int (*release) (struct inode *inode, struct file *file);
208};
209
210/* Add a new file to the given cgroup directory. Should only be
211 * called by subsystems from within a populate() method */
212int cgroup_add_file(struct cgroup *cont, struct cgroup_subsys *subsys,
213 const struct cftype *cft);
214
215/* Add a set of new files to the given cgroup directory. Should
216 * only be called by subsystems from within a populate() method */
217int cgroup_add_files(struct cgroup *cont,
218 struct cgroup_subsys *subsys,
219 const struct cftype cft[],
220 int count);
221
222int cgroup_is_removed(const struct cgroup *cont);
223
224int cgroup_path(const struct cgroup *cont, char *buf, int buflen);
225
226int cgroup_task_count(const struct cgroup *cont);
227
228/* Return true if the cgroup is a descendant of the current cgroup */
229int cgroup_is_descendant(const struct cgroup *cont);
230
231/* Control Group subsystem type. See Documentation/cgroups.txt for details */
232
233struct cgroup_subsys {
234 struct cgroup_subsys_state *(*create)(struct cgroup_subsys *ss,
235 struct cgroup *cont);
236 void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cont);
237 int (*can_attach)(struct cgroup_subsys *ss,
238 struct cgroup *cont, struct task_struct *tsk);
239 void (*attach)(struct cgroup_subsys *ss, struct cgroup *cont,
240 struct cgroup *old_cont, struct task_struct *tsk);
241 void (*fork)(struct cgroup_subsys *ss, struct task_struct *task);
242 void (*exit)(struct cgroup_subsys *ss, struct task_struct *task);
243 int (*populate)(struct cgroup_subsys *ss,
244 struct cgroup *cont);
245 void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cont);
246 void (*bind)(struct cgroup_subsys *ss, struct cgroup *root);
247 int subsys_id;
248 int active;
249 int early_init;
250#define MAX_CGROUP_TYPE_NAMELEN 32
251 const char *name;
252
253 /* Protected by RCU */
254 struct cgroupfs_root *root;
255
256 struct list_head sibling;
257
258 void *private;
259};
260
261#define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys;
262#include <linux/cgroup_subsys.h>
263#undef SUBSYS
264
265static inline struct cgroup_subsys_state *cgroup_subsys_state(
266 struct cgroup *cont, int subsys_id)
267{
268 return cont->subsys[subsys_id];
269}
270
271static inline struct cgroup_subsys_state *task_subsys_state(
272 struct task_struct *task, int subsys_id)
273{
274 return rcu_dereference(task->cgroups->subsys[subsys_id]);
275}
276
277static inline struct cgroup* task_cgroup(struct task_struct *task,
278 int subsys_id)
279{
280 return task_subsys_state(task, subsys_id)->cgroup;
281}
282
283int cgroup_path(const struct cgroup *cont, char *buf, int buflen);
284
285int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss);
286
287/* A cgroup_iter should be treated as an opaque object */
288struct cgroup_iter {
289 struct list_head *cg_link;
290 struct list_head *task;
291};
292
293/* To iterate across the tasks in a cgroup:
294 *
295 * 1) call cgroup_iter_start to intialize an iterator
296 *
297 * 2) call cgroup_iter_next() to retrieve member tasks until it
298 * returns NULL or until you want to end the iteration
299 *
300 * 3) call cgroup_iter_end() to destroy the iterator.
301 */
302void cgroup_iter_start(struct cgroup *cont, struct cgroup_iter *it);
303struct task_struct *cgroup_iter_next(struct cgroup *cont,
304 struct cgroup_iter *it);
305void cgroup_iter_end(struct cgroup *cont, struct cgroup_iter *it);
306
307#else /* !CONFIG_CGROUPS */
308
309static inline int cgroup_init_early(void) { return 0; }
310static inline int cgroup_init(void) { return 0; }
311static inline void cgroup_init_smp(void) {}
312static inline void cgroup_fork(struct task_struct *p) {}
313static inline void cgroup_fork_callbacks(struct task_struct *p) {}
314static inline void cgroup_post_fork(struct task_struct *p) {}
315static inline void cgroup_exit(struct task_struct *p, int callbacks) {}
316
317static inline void cgroup_lock(void) {}
318static inline void cgroup_unlock(void) {}
319static inline int cgroupstats_build(struct cgroupstats *stats,
320 struct dentry *dentry)
321{
322 return -EINVAL;
323}
324
325#endif /* !CONFIG_CGROUPS */
326
327#endif /* _LINUX_CGROUP_H */
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
new file mode 100644
index 000000000000..0b9bfbde8168
--- /dev/null
+++ b/include/linux/cgroup_subsys.h
@@ -0,0 +1,38 @@
1/* Add subsystem definitions of the form SUBSYS(<name>) in this
2 * file. Surround each one by a line of comment markers so that
3 * patches don't collide
4 */
5
6/* */
7
8/* */
9
10#ifdef CONFIG_CPUSETS
11SUBSYS(cpuset)
12#endif
13
14/* */
15
16#ifdef CONFIG_CGROUP_CPUACCT
17SUBSYS(cpuacct)
18#endif
19
20/* */
21
22#ifdef CONFIG_CGROUP_DEBUG
23SUBSYS(debug)
24#endif
25
26/* */
27
28#ifdef CONFIG_CGROUP_NS
29SUBSYS(ns)
30#endif
31
32/* */
33
34#ifdef CONFIG_FAIR_CGROUP_SCHED
35SUBSYS(cpu_cgroup)
36#endif
37
38/* */
diff --git a/include/linux/cgroupstats.h b/include/linux/cgroupstats.h
new file mode 100644
index 000000000000..4f53abf6855d
--- /dev/null
+++ b/include/linux/cgroupstats.h
@@ -0,0 +1,70 @@
1/* cgroupstats.h - exporting per-cgroup statistics
2 *
3 * Copyright IBM Corporation, 2007
4 * Author Balbir Singh <balbir@linux.vnet.ibm.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of version 2.1 of the GNU Lesser General Public License
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it would be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 */
14
15#ifndef _LINUX_CGROUPSTATS_H
16#define _LINUX_CGROUPSTATS_H
17
18#include <linux/taskstats.h>
19
20/*
21 * Data shared between user space and kernel space on a per cgroup
22 * basis. This data is shared using taskstats.
23 *
24 * Most of these states are derived by looking at the task->state value
25 * For the nr_io_wait state, a flag in the delay accounting structure
26 * indicates that the task is waiting on IO
27 *
28 * Each member is aligned to a 8 byte boundary.
29 */
30struct cgroupstats {
31 __u64 nr_sleeping; /* Number of tasks sleeping */
32 __u64 nr_running; /* Number of tasks running */
33 __u64 nr_stopped; /* Number of tasks in stopped state */
34 __u64 nr_uninterruptible; /* Number of tasks in uninterruptible */
35 /* state */
36 __u64 nr_io_wait; /* Number of tasks waiting on IO */
37};
38
39/*
40 * Commands sent from userspace
41 * Not versioned. New commands should only be inserted at the enum's end
42 * prior to __CGROUPSTATS_CMD_MAX
43 */
44
45enum {
46 CGROUPSTATS_CMD_UNSPEC = __TASKSTATS_CMD_MAX, /* Reserved */
47 CGROUPSTATS_CMD_GET, /* user->kernel request/get-response */
48 CGROUPSTATS_CMD_NEW, /* kernel->user event */
49 __CGROUPSTATS_CMD_MAX,
50};
51
52#define CGROUPSTATS_CMD_MAX (__CGROUPSTATS_CMD_MAX - 1)
53
54enum {
55 CGROUPSTATS_TYPE_UNSPEC = 0, /* Reserved */
56 CGROUPSTATS_TYPE_CGROUP_STATS, /* contains name + stats */
57 __CGROUPSTATS_TYPE_MAX,
58};
59
60#define CGROUPSTATS_TYPE_MAX (__CGROUPSTATS_TYPE_MAX - 1)
61
62enum {
63 CGROUPSTATS_CMD_ATTR_UNSPEC = 0,
64 CGROUPSTATS_CMD_ATTR_FD,
65 __CGROUPSTATS_CMD_ATTR_MAX,
66};
67
68#define CGROUPSTATS_CMD_ATTR_MAX (__CGROUPSTATS_CMD_ATTR_MAX - 1)
69
70#endif /* _LINUX_CGROUPSTATS_H */
diff --git a/include/linux/cpu_acct.h b/include/linux/cpu_acct.h
new file mode 100644
index 000000000000..6b5fd8a66c8d
--- /dev/null
+++ b/include/linux/cpu_acct.h
@@ -0,0 +1,14 @@
1
2#ifndef _LINUX_CPU_ACCT_H
3#define _LINUX_CPU_ACCT_H
4
5#include <linux/cgroup.h>
6#include <asm/cputime.h>
7
8#ifdef CONFIG_CGROUP_CPUACCT
9extern void cpuacct_charge(struct task_struct *, cputime_t cputime);
10#else
11static void inline cpuacct_charge(struct task_struct *p, cputime_t cputime) {}
12#endif
13
14#endif
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index ea44d2e768a0..ecae585ec3da 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -11,6 +11,7 @@
11#include <linux/sched.h> 11#include <linux/sched.h>
12#include <linux/cpumask.h> 12#include <linux/cpumask.h>
13#include <linux/nodemask.h> 13#include <linux/nodemask.h>
14#include <linux/cgroup.h>
14 15
15#ifdef CONFIG_CPUSETS 16#ifdef CONFIG_CPUSETS
16 17
@@ -19,9 +20,8 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */
19extern int cpuset_init_early(void); 20extern int cpuset_init_early(void);
20extern int cpuset_init(void); 21extern int cpuset_init(void);
21extern void cpuset_init_smp(void); 22extern void cpuset_init_smp(void);
22extern void cpuset_fork(struct task_struct *p);
23extern void cpuset_exit(struct task_struct *p);
24extern cpumask_t cpuset_cpus_allowed(struct task_struct *p); 23extern cpumask_t cpuset_cpus_allowed(struct task_struct *p);
24extern cpumask_t cpuset_cpus_allowed_locked(struct task_struct *p);
25extern nodemask_t cpuset_mems_allowed(struct task_struct *p); 25extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
26#define cpuset_current_mems_allowed (current->mems_allowed) 26#define cpuset_current_mems_allowed (current->mems_allowed)
27void cpuset_init_current_mems_allowed(void); 27void cpuset_init_current_mems_allowed(void);
@@ -76,18 +76,22 @@ static inline int cpuset_do_slab_mem_spread(void)
76 76
77extern void cpuset_track_online_nodes(void); 77extern void cpuset_track_online_nodes(void);
78 78
79extern int current_cpuset_is_being_rebound(void);
80
79#else /* !CONFIG_CPUSETS */ 81#else /* !CONFIG_CPUSETS */
80 82
81static inline int cpuset_init_early(void) { return 0; } 83static inline int cpuset_init_early(void) { return 0; }
82static inline int cpuset_init(void) { return 0; } 84static inline int cpuset_init(void) { return 0; }
83static inline void cpuset_init_smp(void) {} 85static inline void cpuset_init_smp(void) {}
84static inline void cpuset_fork(struct task_struct *p) {}
85static inline void cpuset_exit(struct task_struct *p) {}
86 86
87static inline cpumask_t cpuset_cpus_allowed(struct task_struct *p) 87static inline cpumask_t cpuset_cpus_allowed(struct task_struct *p)
88{ 88{
89 return cpu_possible_map; 89 return cpu_possible_map;
90} 90}
91static inline cpumask_t cpuset_cpus_allowed_locked(struct task_struct *p)
92{
93 return cpu_possible_map;
94}
91 95
92static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) 96static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
93{ 97{
@@ -148,6 +152,11 @@ static inline int cpuset_do_slab_mem_spread(void)
148 152
149static inline void cpuset_track_online_nodes(void) {} 153static inline void cpuset_track_online_nodes(void) {}
150 154
155static inline int current_cpuset_is_being_rebound(void)
156{
157 return 0;
158}
159
151#endif /* !CONFIG_CPUSETS */ 160#endif /* !CONFIG_CPUSETS */
152 161
153#endif /* _LINUX_CPUSET_H */ 162#endif /* _LINUX_CPUSET_H */
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 55d1ca5e60f5..ab94bc083558 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -26,6 +26,7 @@
26 * Used to set current->delays->flags 26 * Used to set current->delays->flags
27 */ 27 */
28#define DELAYACCT_PF_SWAPIN 0x00000001 /* I am doing a swapin */ 28#define DELAYACCT_PF_SWAPIN 0x00000001 /* I am doing a swapin */
29#define DELAYACCT_PF_BLKIO 0x00000002 /* I am waiting on IO */
29 30
30#ifdef CONFIG_TASK_DELAY_ACCT 31#ifdef CONFIG_TASK_DELAY_ACCT
31 32
@@ -39,6 +40,14 @@ extern void __delayacct_blkio_end(void);
39extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *); 40extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
40extern __u64 __delayacct_blkio_ticks(struct task_struct *); 41extern __u64 __delayacct_blkio_ticks(struct task_struct *);
41 42
43static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
44{
45 if (p->delays)
46 return (p->delays->flags & DELAYACCT_PF_BLKIO);
47 else
48 return 0;
49}
50
42static inline void delayacct_set_flag(int flag) 51static inline void delayacct_set_flag(int flag)
43{ 52{
44 if (current->delays) 53 if (current->delays)
@@ -71,6 +80,7 @@ static inline void delayacct_tsk_free(struct task_struct *tsk)
71 80
72static inline void delayacct_blkio_start(void) 81static inline void delayacct_blkio_start(void)
73{ 82{
83 delayacct_set_flag(DELAYACCT_PF_BLKIO);
74 if (current->delays) 84 if (current->delays)
75 __delayacct_blkio_start(); 85 __delayacct_blkio_start();
76} 86}
@@ -79,6 +89,7 @@ static inline void delayacct_blkio_end(void)
79{ 89{
80 if (current->delays) 90 if (current->delays)
81 __delayacct_blkio_end(); 91 __delayacct_blkio_end();
92 delayacct_clear_flag(DELAYACCT_PF_BLKIO);
82} 93}
83 94
84static inline int delayacct_add_tsk(struct taskstats *d, 95static inline int delayacct_add_tsk(struct taskstats *d,
@@ -116,6 +127,8 @@ static inline int delayacct_add_tsk(struct taskstats *d,
116{ return 0; } 127{ return 0; }
117static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk) 128static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
118{ return 0; } 129{ return 0; }
130static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
131{ return 0; }
119#endif /* CONFIG_TASK_DELAY_ACCT */ 132#endif /* CONFIG_TASK_DELAY_ACCT */
120 133
121#endif 134#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6a4d170ad9a5..1657e995f72c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -123,6 +123,7 @@ extern int dir_notify_enable;
123#define MS_SLAVE (1<<19) /* change to slave */ 123#define MS_SLAVE (1<<19) /* change to slave */
124#define MS_SHARED (1<<20) /* change to shared */ 124#define MS_SHARED (1<<20) /* change to shared */
125#define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ 125#define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */
126#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */
126#define MS_ACTIVE (1<<30) 127#define MS_ACTIVE (1<<30)
127#define MS_NOUSER (1<<31) 128#define MS_NOUSER (1<<31)
128 129
@@ -1459,7 +1460,8 @@ void unnamed_dev_init(void);
1459 1460
1460extern int register_filesystem(struct file_system_type *); 1461extern int register_filesystem(struct file_system_type *);
1461extern int unregister_filesystem(struct file_system_type *); 1462extern int unregister_filesystem(struct file_system_type *);
1462extern struct vfsmount *kern_mount(struct file_system_type *); 1463extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data);
1464#define kern_mount(type) kern_mount_data(type, NULL)
1463extern int may_umount_tree(struct vfsmount *); 1465extern int may_umount_tree(struct vfsmount *);
1464extern int may_umount(struct vfsmount *); 1466extern int may_umount(struct vfsmount *);
1465extern void umount_tree(struct vfsmount *, int, struct list_head *); 1467extern void umount_tree(struct vfsmount *, int, struct list_head *);
@@ -1922,6 +1924,8 @@ extern int vfs_fstat(unsigned int, struct kstat *);
1922 1924
1923extern int vfs_ioctl(struct file *, unsigned int, unsigned int, unsigned long); 1925extern int vfs_ioctl(struct file *, unsigned int, unsigned int, unsigned long);
1924 1926
1927extern void get_filesystem(struct file_system_type *fs);
1928extern void put_filesystem(struct file_system_type *fs);
1925extern struct file_system_type *get_fs_type(const char *name); 1929extern struct file_system_type *get_fs_type(const char *name);
1926extern struct super_block *get_super(struct block_device *); 1930extern struct super_block *get_super(struct block_device *);
1927extern struct super_block *user_get_super(dev_t); 1931extern struct super_block *user_get_super(dev_t);
diff --git a/include/linux/hid.h b/include/linux/hid.h
index edb8024d744b..6e35b92b1d2c 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -469,8 +469,8 @@ struct hid_device { /* device report descriptor */
469 /* handler for raw output data, used by hidraw */ 469 /* handler for raw output data, used by hidraw */
470 int (*hid_output_raw_report) (struct hid_device *, __u8 *, size_t); 470 int (*hid_output_raw_report) (struct hid_device *, __u8 *, size_t);
471#ifdef CONFIG_USB_HIDINPUT_POWERBOOK 471#ifdef CONFIG_USB_HIDINPUT_POWERBOOK
472 unsigned long pb_pressed_fn[NBITS(KEY_MAX)]; 472 unsigned long pb_pressed_fn[BITS_TO_LONGS(KEY_CNT)];
473 unsigned long pb_pressed_numlock[NBITS(KEY_MAX)]; 473 unsigned long pb_pressed_numlock[BITS_TO_LONGS(KEY_CNT)];
474#endif 474#endif
475}; 475};
476 476
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index d4b2f1c76e12..cae35b6b9aec 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -67,9 +67,6 @@
67 .posix_timers = LIST_HEAD_INIT(sig.posix_timers), \ 67 .posix_timers = LIST_HEAD_INIT(sig.posix_timers), \
68 .cpu_timers = INIT_CPU_TIMERS(sig.cpu_timers), \ 68 .cpu_timers = INIT_CPU_TIMERS(sig.cpu_timers), \
69 .rlim = INIT_RLIMITS, \ 69 .rlim = INIT_RLIMITS, \
70 .pgrp = 0, \
71 .tty_old_pgrp = NULL, \
72 { .__session = 0}, \
73} 70}
74 71
75extern struct nsproxy init_nsproxy; 72extern struct nsproxy init_nsproxy;
@@ -94,15 +91,18 @@ extern struct group_info init_groups;
94 91
95#define INIT_STRUCT_PID { \ 92#define INIT_STRUCT_PID { \
96 .count = ATOMIC_INIT(1), \ 93 .count = ATOMIC_INIT(1), \
97 .nr = 0, \
98 /* Don't put this struct pid in pid_hash */ \
99 .pid_chain = { .next = NULL, .pprev = NULL }, \
100 .tasks = { \ 94 .tasks = { \
101 { .first = &init_task.pids[PIDTYPE_PID].node }, \ 95 { .first = &init_task.pids[PIDTYPE_PID].node }, \
102 { .first = &init_task.pids[PIDTYPE_PGID].node }, \ 96 { .first = &init_task.pids[PIDTYPE_PGID].node }, \
103 { .first = &init_task.pids[PIDTYPE_SID].node }, \ 97 { .first = &init_task.pids[PIDTYPE_SID].node }, \
104 }, \ 98 }, \
105 .rcu = RCU_HEAD_INIT, \ 99 .rcu = RCU_HEAD_INIT, \
100 .level = 0, \
101 .numbers = { { \
102 .nr = 0, \
103 .ns = &init_pid_ns, \
104 .pid_chain = { .next = NULL, .pprev = NULL }, \
105 }, } \
106} 106}
107 107
108#define INIT_PID_LINK(type) \ 108#define INIT_PID_LINK(type) \
diff --git a/include/linux/input.h b/include/linux/input.h
index f30da6fc08e3..62268929856c 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -98,6 +98,7 @@ struct input_absinfo {
98#define EV_PWR 0x16 98#define EV_PWR 0x16
99#define EV_FF_STATUS 0x17 99#define EV_FF_STATUS 0x17
100#define EV_MAX 0x1f 100#define EV_MAX 0x1f
101#define EV_CNT (EV_MAX+1)
101 102
102/* 103/*
103 * Synchronization events. 104 * Synchronization events.
@@ -567,6 +568,7 @@ struct input_absinfo {
567/* We avoid low common keys in module aliases so they don't get huge. */ 568/* We avoid low common keys in module aliases so they don't get huge. */
568#define KEY_MIN_INTERESTING KEY_MUTE 569#define KEY_MIN_INTERESTING KEY_MUTE
569#define KEY_MAX 0x1ff 570#define KEY_MAX 0x1ff
571#define KEY_CNT (KEY_MAX+1)
570 572
571/* 573/*
572 * Relative axes 574 * Relative axes
@@ -583,6 +585,7 @@ struct input_absinfo {
583#define REL_WHEEL 0x08 585#define REL_WHEEL 0x08
584#define REL_MISC 0x09 586#define REL_MISC 0x09
585#define REL_MAX 0x0f 587#define REL_MAX 0x0f
588#define REL_CNT (REL_MAX+1)
586 589
587/* 590/*
588 * Absolute axes 591 * Absolute axes
@@ -615,6 +618,7 @@ struct input_absinfo {
615#define ABS_VOLUME 0x20 618#define ABS_VOLUME 0x20
616#define ABS_MISC 0x28 619#define ABS_MISC 0x28
617#define ABS_MAX 0x3f 620#define ABS_MAX 0x3f
621#define ABS_CNT (ABS_MAX+1)
618 622
619/* 623/*
620 * Switch events 624 * Switch events
@@ -625,6 +629,7 @@ struct input_absinfo {
625#define SW_HEADPHONE_INSERT 0x02 /* set = inserted */ 629#define SW_HEADPHONE_INSERT 0x02 /* set = inserted */
626#define SW_RADIO 0x03 /* set = radio enabled */ 630#define SW_RADIO 0x03 /* set = radio enabled */
627#define SW_MAX 0x0f 631#define SW_MAX 0x0f
632#define SW_CNT (SW_MAX+1)
628 633
629/* 634/*
630 * Misc events 635 * Misc events
@@ -636,6 +641,7 @@ struct input_absinfo {
636#define MSC_RAW 0x03 641#define MSC_RAW 0x03
637#define MSC_SCAN 0x04 642#define MSC_SCAN 0x04
638#define MSC_MAX 0x07 643#define MSC_MAX 0x07
644#define MSC_CNT (MSC_MAX+1)
639 645
640/* 646/*
641 * LEDs 647 * LEDs
@@ -653,6 +659,7 @@ struct input_absinfo {
653#define LED_MAIL 0x09 659#define LED_MAIL 0x09
654#define LED_CHARGING 0x0a 660#define LED_CHARGING 0x0a
655#define LED_MAX 0x0f 661#define LED_MAX 0x0f
662#define LED_CNT (LED_MAX+1)
656 663
657/* 664/*
658 * Autorepeat values 665 * Autorepeat values
@@ -670,6 +677,7 @@ struct input_absinfo {
670#define SND_BELL 0x01 677#define SND_BELL 0x01
671#define SND_TONE 0x02 678#define SND_TONE 0x02
672#define SND_MAX 0x07 679#define SND_MAX 0x07
680#define SND_CNT (SND_MAX+1)
673 681
674/* 682/*
675 * IDs. 683 * IDs.
@@ -920,6 +928,7 @@ struct ff_effect {
920#define FF_AUTOCENTER 0x61 928#define FF_AUTOCENTER 0x61
921 929
922#define FF_MAX 0x7f 930#define FF_MAX 0x7f
931#define FF_CNT (FF_MAX+1)
923 932
924#ifdef __KERNEL__ 933#ifdef __KERNEL__
925 934
@@ -932,10 +941,6 @@ struct ff_effect {
932#include <linux/timer.h> 941#include <linux/timer.h>
933#include <linux/mod_devicetable.h> 942#include <linux/mod_devicetable.h>
934 943
935#define NBITS(x) (((x)/BITS_PER_LONG)+1)
936#define BIT(x) (1UL<<((x)%BITS_PER_LONG))
937#define LONG(x) ((x)/BITS_PER_LONG)
938
939/** 944/**
940 * struct input_dev - represents an input device 945 * struct input_dev - represents an input device
941 * @name: name of the device 946 * @name: name of the device
@@ -1005,28 +1010,30 @@ struct ff_effect {
1005 * @going_away: marks devices that are in a middle of unregistering and 1010 * @going_away: marks devices that are in a middle of unregistering and
1006 * causes input_open_device*() fail with -ENODEV. 1011 * causes input_open_device*() fail with -ENODEV.
1007 * @dev: driver model's view of this device 1012 * @dev: driver model's view of this device
1013 * @cdev: union for struct device pointer
1008 * @h_list: list of input handles associated with the device. When 1014 * @h_list: list of input handles associated with the device. When
1009 * accessing the list dev->mutex must be held 1015 * accessing the list dev->mutex must be held
1010 * @node: used to place the device onto input_dev_list 1016 * @node: used to place the device onto input_dev_list
1011 */ 1017 */
1012struct input_dev { 1018struct input_dev {
1013 1019 /* private: */
1014 void *private; /* do not use */ 1020 void *private; /* do not use */
1021 /* public: */
1015 1022
1016 const char *name; 1023 const char *name;
1017 const char *phys; 1024 const char *phys;
1018 const char *uniq; 1025 const char *uniq;
1019 struct input_id id; 1026 struct input_id id;
1020 1027
1021 unsigned long evbit[NBITS(EV_MAX)]; 1028 unsigned long evbit[BITS_TO_LONGS(EV_CNT)];
1022 unsigned long keybit[NBITS(KEY_MAX)]; 1029 unsigned long keybit[BITS_TO_LONGS(KEY_CNT)];
1023 unsigned long relbit[NBITS(REL_MAX)]; 1030 unsigned long relbit[BITS_TO_LONGS(REL_CNT)];
1024 unsigned long absbit[NBITS(ABS_MAX)]; 1031 unsigned long absbit[BITS_TO_LONGS(ABS_CNT)];
1025 unsigned long mscbit[NBITS(MSC_MAX)]; 1032 unsigned long mscbit[BITS_TO_LONGS(MSC_CNT)];
1026 unsigned long ledbit[NBITS(LED_MAX)]; 1033 unsigned long ledbit[BITS_TO_LONGS(LED_CNT)];
1027 unsigned long sndbit[NBITS(SND_MAX)]; 1034 unsigned long sndbit[BITS_TO_LONGS(SND_CNT)];
1028 unsigned long ffbit[NBITS(FF_MAX)]; 1035 unsigned long ffbit[BITS_TO_LONGS(FF_CNT)];
1029 unsigned long swbit[NBITS(SW_MAX)]; 1036 unsigned long swbit[BITS_TO_LONGS(SW_CNT)];
1030 1037
1031 unsigned int keycodemax; 1038 unsigned int keycodemax;
1032 unsigned int keycodesize; 1039 unsigned int keycodesize;
@@ -1044,10 +1051,10 @@ struct input_dev {
1044 int abs[ABS_MAX + 1]; 1051 int abs[ABS_MAX + 1];
1045 int rep[REP_MAX + 1]; 1052 int rep[REP_MAX + 1];
1046 1053
1047 unsigned long key[NBITS(KEY_MAX)]; 1054 unsigned long key[BITS_TO_LONGS(KEY_CNT)];
1048 unsigned long led[NBITS(LED_MAX)]; 1055 unsigned long led[BITS_TO_LONGS(LED_CNT)];
1049 unsigned long snd[NBITS(SND_MAX)]; 1056 unsigned long snd[BITS_TO_LONGS(SND_CNT)];
1050 unsigned long sw[NBITS(SW_MAX)]; 1057 unsigned long sw[BITS_TO_LONGS(SW_CNT)];
1051 1058
1052 int absmax[ABS_MAX + 1]; 1059 int absmax[ABS_MAX + 1];
1053 int absmin[ABS_MAX + 1]; 1060 int absmin[ABS_MAX + 1];
@@ -1291,7 +1298,7 @@ static inline void input_set_abs_params(struct input_dev *dev, int axis, int min
1291 dev->absfuzz[axis] = fuzz; 1298 dev->absfuzz[axis] = fuzz;
1292 dev->absflat[axis] = flat; 1299 dev->absflat[axis] = flat;
1293 1300
1294 dev->absbit[LONG(axis)] |= BIT(axis); 1301 dev->absbit[BIT_WORD(axis)] |= BIT_MASK(axis);
1295} 1302}
1296 1303
1297extern struct class input_class; 1304extern struct class input_class;
@@ -1332,7 +1339,7 @@ struct ff_device {
1332 1339
1333 void *private; 1340 void *private;
1334 1341
1335 unsigned long ffbit[NBITS(FF_MAX)]; 1342 unsigned long ffbit[BITS_TO_LONGS(FF_CNT)];
1336 1343
1337 struct mutex mutex; 1344 struct mutex mutex;
1338 1345
diff --git a/include/linux/ipc.h b/include/linux/ipc.h
index ee111834091c..408696ea5189 100644
--- a/include/linux/ipc.h
+++ b/include/linux/ipc.h
@@ -89,6 +89,7 @@ struct kern_ipc_perm
89{ 89{
90 spinlock_t lock; 90 spinlock_t lock;
91 int deleted; 91 int deleted;
92 int id;
92 key_t key; 93 key_t key;
93 uid_t uid; 94 uid_t uid;
94 gid_t gid; 95 gid_t gid;
@@ -110,6 +111,8 @@ struct ipc_namespace {
110 int msg_ctlmax; 111 int msg_ctlmax;
111 int msg_ctlmnb; 112 int msg_ctlmnb;
112 int msg_ctlmni; 113 int msg_ctlmni;
114 atomic_t msg_bytes;
115 atomic_t msg_hdrs;
113 116
114 size_t shm_ctlmax; 117 size_t shm_ctlmax;
115 size_t shm_ctlall; 118 size_t shm_ctlall;
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index a3abf51e488f..16e7ed855a18 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -58,7 +58,7 @@
58 * CONFIG_JBD_DEBUG is on. 58 * CONFIG_JBD_DEBUG is on.
59 */ 59 */
60#define JBD_EXPENSIVE_CHECKING 60#define JBD_EXPENSIVE_CHECKING
61extern int journal_enable_debug; 61extern u8 journal_enable_debug;
62 62
63#define jbd_debug(n, f, a...) \ 63#define jbd_debug(n, f, a...) \
64 do { \ 64 do { \
@@ -248,17 +248,7 @@ typedef struct journal_superblock_s
248#include <linux/fs.h> 248#include <linux/fs.h>
249#include <linux/sched.h> 249#include <linux/sched.h>
250 250
251#define JBD_ASSERTIONS 251#define J_ASSERT(assert) BUG_ON(!(assert))
252#ifdef JBD_ASSERTIONS
253#define J_ASSERT(assert) \
254do { \
255 if (!(assert)) { \
256 printk (KERN_EMERG \
257 "Assertion failure in %s() at %s:%d: \"%s\"\n", \
258 __FUNCTION__, __FILE__, __LINE__, # assert); \
259 BUG(); \
260 } \
261} while (0)
262 252
263#if defined(CONFIG_BUFFER_DEBUG) 253#if defined(CONFIG_BUFFER_DEBUG)
264void buffer_assertion_failure(struct buffer_head *bh); 254void buffer_assertion_failure(struct buffer_head *bh);
@@ -274,10 +264,6 @@ void buffer_assertion_failure(struct buffer_head *bh);
274#define J_ASSERT_JH(jh, expr) J_ASSERT(expr) 264#define J_ASSERT_JH(jh, expr) J_ASSERT(expr)
275#endif 265#endif
276 266
277#else
278#define J_ASSERT(assert) do { } while (0)
279#endif /* JBD_ASSERTIONS */
280
281#if defined(JBD_PARANOID_IOFAIL) 267#if defined(JBD_PARANOID_IOFAIL)
282#define J_EXPECT(expr, why...) J_ASSERT(expr) 268#define J_EXPECT(expr, why...) J_ASSERT(expr)
283#define J_EXPECT_BH(bh, expr, why...) J_ASSERT_BH(bh, expr) 269#define J_EXPECT_BH(bh, expr, why...) J_ASSERT_BH(bh, expr)
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index ad4b82ce84af..2d9c448d8c52 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -187,6 +187,8 @@ extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
187extern size_t vmcoreinfo_size; 187extern size_t vmcoreinfo_size;
188extern size_t vmcoreinfo_max_size; 188extern size_t vmcoreinfo_max_size;
189 189
190int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
191 unsigned long long *crash_size, unsigned long long *crash_base);
190 192
191#else /* !CONFIG_KEXEC */ 193#else /* !CONFIG_KEXEC */
192struct pt_regs; 194struct pt_regs;
diff --git a/include/linux/keyboard.h b/include/linux/keyboard.h
index 33b5c2e325b9..65c2d70853e9 100644
--- a/include/linux/keyboard.h
+++ b/include/linux/keyboard.h
@@ -23,10 +23,21 @@
23#define MAX_NR_OF_USER_KEYMAPS 256 /* should be at least 7 */ 23#define MAX_NR_OF_USER_KEYMAPS 256 /* should be at least 7 */
24 24
25#ifdef __KERNEL__ 25#ifdef __KERNEL__
26struct notifier_block;
26extern const int NR_TYPES; 27extern const int NR_TYPES;
27extern const int max_vals[]; 28extern const int max_vals[];
28extern unsigned short *key_maps[MAX_NR_KEYMAPS]; 29extern unsigned short *key_maps[MAX_NR_KEYMAPS];
29extern unsigned short plain_map[NR_KEYS]; 30extern unsigned short plain_map[NR_KEYS];
31
32struct keyboard_notifier_param {
33 struct vc_data *vc; /* VC on which the keyboard press was done */
34 int down; /* Pressure of the key? */
35 int shift; /* Current shift mask */
36 unsigned int value; /* keycode, unicode value or keysym */
37};
38
39extern int register_keyboard_notifier(struct notifier_block *nb);
40extern int unregister_keyboard_notifier(struct notifier_block *nb);
30#endif 41#endif
31 42
32#define MAX_NR_FUNC 256 /* max nr of strings assigned to keys */ 43#define MAX_NR_FUNC 256 /* max nr of strings assigned to keys */
diff --git a/include/linux/list.h b/include/linux/list.h
index b0cf0135fe3e..75ce2cb4ff6e 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -478,8 +478,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
478 pos = n, n = pos->next) 478 pos = n, n = pos->next)
479 479
480/** 480/**
481 * list_for_each_prev_safe - iterate over a list backwards safe against removal 481 * list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry
482 of list entry
483 * @pos: the &struct list_head to use as a loop cursor. 482 * @pos: the &struct list_head to use as a loop cursor.
484 * @n: another &struct list_head to use as temporary storage 483 * @n: another &struct list_head to use as temporary storage
485 * @head: the head for your list. 484 * @head: the head for your list.
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index f6279f68a827..4c4d236ded18 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -276,6 +276,14 @@ extern void lockdep_init_map(struct lockdep_map *lock, const char *name,
276 (lock)->dep_map.key, sub) 276 (lock)->dep_map.key, sub)
277 277
278/* 278/*
279 * To initialize a lockdep_map statically use this macro.
280 * Note that _name must not be NULL.
281 */
282#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
283 { .name = (_name), .key = (void *)(_key), }
284
285
286/*
279 * Acquire a lock. 287 * Acquire a lock.
280 * 288 *
281 * Values for "read": 289 * Values for "read":
diff --git a/include/linux/magic.h b/include/linux/magic.h
index 722d4755060f..1fa0c2ce4dec 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -37,6 +37,7 @@
37 37
38#define SMB_SUPER_MAGIC 0x517B 38#define SMB_SUPER_MAGIC 0x517B
39#define USBDEVICE_SUPER_MAGIC 0x9fa2 39#define USBDEVICE_SUPER_MAGIC 0x9fa2
40#define CGROUP_SUPER_MAGIC 0x27e0eb
40 41
41#define FUTEXFS_SUPER_MAGIC 0xBAD1DEA 42#define FUTEXFS_SUPER_MAGIC 0xBAD1DEA
42#define INOTIFYFS_SUPER_MAGIC 0x2BAD1DEA 43#define INOTIFYFS_SUPER_MAGIC 0x2BAD1DEA
diff --git a/include/linux/marker.h b/include/linux/marker.h
new file mode 100644
index 000000000000..5f36cf946bcb
--- /dev/null
+++ b/include/linux/marker.h
@@ -0,0 +1,129 @@
1#ifndef _LINUX_MARKER_H
2#define _LINUX_MARKER_H
3
4/*
5 * Code markup for dynamic and static tracing.
6 *
7 * See Documentation/marker.txt.
8 *
9 * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
10 *
11 * This file is released under the GPLv2.
12 * See the file COPYING for more details.
13 */
14
15#include <linux/types.h>
16
17struct module;
18struct marker;
19
20/**
21 * marker_probe_func - Type of a marker probe function
22 * @mdata: pointer of type struct marker
23 * @private_data: caller site private data
24 * @fmt: format string
25 * @...: variable argument list
26 *
27 * Type of marker probe functions. They receive the mdata and need to parse the
28 * format string to recover the variable argument list.
29 */
30typedef void marker_probe_func(const struct marker *mdata,
31 void *private_data, const char *fmt, ...);
32
33struct marker {
34 const char *name; /* Marker name */
35 const char *format; /* Marker format string, describing the
36 * variable argument list.
37 */
38 char state; /* Marker state. */
39 marker_probe_func *call;/* Probe handler function pointer */
40 void *private; /* Private probe data */
41} __attribute__((aligned(8)));
42
43#ifdef CONFIG_MARKERS
44
45/*
46 * Note : the empty asm volatile with read constraint is used here instead of a
47 * "used" attribute to fix a gcc 4.1.x bug.
48 * Make sure the alignment of the structure in the __markers section will
49 * not add unwanted padding between the beginning of the section and the
50 * structure. Force alignment to the same alignment as the section start.
51 */
52#define __trace_mark(name, call_data, format, args...) \
53 do { \
54 static const char __mstrtab_name_##name[] \
55 __attribute__((section("__markers_strings"))) \
56 = #name; \
57 static const char __mstrtab_format_##name[] \
58 __attribute__((section("__markers_strings"))) \
59 = format; \
60 static struct marker __mark_##name \
61 __attribute__((section("__markers"), aligned(8))) = \
62 { __mstrtab_name_##name, __mstrtab_format_##name, \
63 0, __mark_empty_function, NULL }; \
64 __mark_check_format(format, ## args); \
65 if (unlikely(__mark_##name.state)) { \
66 preempt_disable(); \
67 (*__mark_##name.call) \
68 (&__mark_##name, call_data, \
69 format, ## args); \
70 preempt_enable(); \
71 } \
72 } while (0)
73
74extern void marker_update_probe_range(struct marker *begin,
75 struct marker *end, struct module *probe_module, int *refcount);
76#else /* !CONFIG_MARKERS */
77#define __trace_mark(name, call_data, format, args...) \
78 __mark_check_format(format, ## args)
79static inline void marker_update_probe_range(struct marker *begin,
80 struct marker *end, struct module *probe_module, int *refcount)
81{ }
82#endif /* CONFIG_MARKERS */
83
84/**
85 * trace_mark - Marker
86 * @name: marker name, not quoted.
87 * @format: format string
88 * @args...: variable argument list
89 *
90 * Places a marker.
91 */
92#define trace_mark(name, format, args...) \
93 __trace_mark(name, NULL, format, ## args)
94
95#define MARK_MAX_FORMAT_LEN 1024
96
97/**
98 * MARK_NOARGS - Format string for a marker with no argument.
99 */
100#define MARK_NOARGS " "
101
102/* To be used for string format validity checking with gcc */
103static inline void __printf(1, 2) __mark_check_format(const char *fmt, ...)
104{
105}
106
107extern marker_probe_func __mark_empty_function;
108
109/*
110 * Connect a probe to a marker.
111 * private data pointer must be a valid allocated memory address, or NULL.
112 */
113extern int marker_probe_register(const char *name, const char *format,
114 marker_probe_func *probe, void *private);
115
116/*
117 * Returns the private data given to marker_probe_register.
118 */
119extern void *marker_probe_unregister(const char *name);
120/*
121 * Unregister a marker by providing the registered private data.
122 */
123extern void *marker_probe_unregister_private_data(void *private);
124
125extern int marker_arm(const char *name);
126extern int marker_disarm(const char *name);
127extern void *marker_get_private_data(const char *name);
128
129#endif
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 38c04d61ee06..59c4865bc85f 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -148,14 +148,6 @@ extern void mpol_rebind_task(struct task_struct *tsk,
148 const nodemask_t *new); 148 const nodemask_t *new);
149extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); 149extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
150extern void mpol_fix_fork_child_flag(struct task_struct *p); 150extern void mpol_fix_fork_child_flag(struct task_struct *p);
151#define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x))
152
153#ifdef CONFIG_CPUSETS
154#define current_cpuset_is_being_rebound() \
155 (cpuset_being_rebound == current->cpuset)
156#else
157#define current_cpuset_is_being_rebound() 0
158#endif
159 151
160extern struct mempolicy default_policy; 152extern struct mempolicy default_policy;
161extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, 153extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
@@ -173,8 +165,6 @@ static inline void check_highest_zone(enum zone_type k)
173int do_migrate_pages(struct mm_struct *mm, 165int do_migrate_pages(struct mm_struct *mm,
174 const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags); 166 const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags);
175 167
176extern void *cpuset_being_rebound; /* Trigger mpol_copy vma rebind */
177
178#else 168#else
179 169
180struct mempolicy {}; 170struct mempolicy {};
@@ -248,8 +238,6 @@ static inline void mpol_fix_fork_child_flag(struct task_struct *p)
248{ 238{
249} 239}
250 240
251#define set_cpuset_being_rebound(x) do {} while (0)
252
253static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, 241static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
254 unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol) 242 unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol)
255{ 243{
diff --git a/include/linux/module.h b/include/linux/module.h
index 642f325e4917..2cbc0b87e329 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -15,6 +15,7 @@
15#include <linux/stringify.h> 15#include <linux/stringify.h>
16#include <linux/kobject.h> 16#include <linux/kobject.h>
17#include <linux/moduleparam.h> 17#include <linux/moduleparam.h>
18#include <linux/marker.h>
18#include <asm/local.h> 19#include <asm/local.h>
19 20
20#include <asm/module.h> 21#include <asm/module.h>
@@ -354,6 +355,10 @@ struct module
354 /* The command line arguments (may be mangled). People like 355 /* The command line arguments (may be mangled). People like
355 keeping pointers to this stuff */ 356 keeping pointers to this stuff */
356 char *args; 357 char *args;
358#ifdef CONFIG_MARKERS
359 struct marker *markers;
360 unsigned int num_markers;
361#endif
357}; 362};
358#ifndef MODULE_ARCH_INIT 363#ifndef MODULE_ARCH_INIT
359#define MODULE_ARCH_INIT {} 364#define MODULE_ARCH_INIT {}
@@ -457,6 +462,8 @@ int unregister_module_notifier(struct notifier_block * nb);
457 462
458extern void print_modules(void); 463extern void print_modules(void);
459 464
465extern void module_update_markers(struct module *probe_module, int *refcount);
466
460#else /* !CONFIG_MODULES... */ 467#else /* !CONFIG_MODULES... */
461#define EXPORT_SYMBOL(sym) 468#define EXPORT_SYMBOL(sym)
462#define EXPORT_SYMBOL_GPL(sym) 469#define EXPORT_SYMBOL_GPL(sym)
@@ -556,6 +563,11 @@ static inline void print_modules(void)
556{ 563{
557} 564}
558 565
566static inline void module_update_markers(struct module *probe_module,
567 int *refcount)
568{
569}
570
559#endif /* CONFIG_MODULES */ 571#endif /* CONFIG_MODULES */
560 572
561struct device_driver; 573struct device_driver;
diff --git a/include/linux/msg.h b/include/linux/msg.h
index f1b60740d641..10a3d5a1abff 100644
--- a/include/linux/msg.h
+++ b/include/linux/msg.h
@@ -77,7 +77,6 @@ struct msg_msg {
77/* one msq_queue structure for each present queue on the system */ 77/* one msq_queue structure for each present queue on the system */
78struct msg_queue { 78struct msg_queue {
79 struct kern_ipc_perm q_perm; 79 struct kern_ipc_perm q_perm;
80 int q_id;
81 time_t q_stime; /* last msgsnd time */ 80 time_t q_stime; /* last msgsnd time */
82 time_t q_rtime; /* last msgrcv time */ 81 time_t q_rtime; /* last msgrcv time */
83 time_t q_ctime; /* last change time */ 82 time_t q_ctime; /* last change time */
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index fad7ff17e468..0c40cc0b4a36 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -231,5 +231,22 @@ static inline int notifier_to_errno(int ret)
231#define PM_SUSPEND_PREPARE 0x0003 /* Going to suspend the system */ 231#define PM_SUSPEND_PREPARE 0x0003 /* Going to suspend the system */
232#define PM_POST_SUSPEND 0x0004 /* Suspend finished */ 232#define PM_POST_SUSPEND 0x0004 /* Suspend finished */
233 233
234/* Console keyboard events.
235 * Note: KBD_KEYCODE is always sent before KBD_UNBOUND_KEYCODE, KBD_UNICODE and
236 * KBD_KEYSYM. */
237#define KBD_KEYCODE 0x0001 /* Keyboard keycode, called before any other */
238#define KBD_UNBOUND_KEYCODE 0x0002 /* Keyboard keycode which is not bound to any other */
239#define KBD_UNICODE 0x0003 /* Keyboard unicode */
240#define KBD_KEYSYM 0x0004 /* Keyboard keysym */
241#define KBD_POST_KEYSYM 0x0005 /* Called after keyboard keysym interpretation */
242
243extern struct blocking_notifier_head reboot_notifier_list;
244
245/* Virtual Terminal events. */
246#define VT_ALLOCATE 0x0001 /* Console got allocated */
247#define VT_DEALLOCATE 0x0002 /* Console will be deallocated */
248#define VT_WRITE 0x0003 /* A char got output */
249#define VT_UPDATE 0x0004 /* A bigger update occurred */
250
234#endif /* __KERNEL__ */ 251#endif /* __KERNEL__ */
235#endif /* _LINUX_NOTIFIER_H */ 252#endif /* _LINUX_NOTIFIER_H */
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 033a648709b6..0e66b57631fc 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -32,8 +32,39 @@ struct nsproxy {
32}; 32};
33extern struct nsproxy init_nsproxy; 33extern struct nsproxy init_nsproxy;
34 34
35/*
36 * the namespaces access rules are:
37 *
38 * 1. only current task is allowed to change tsk->nsproxy pointer or
39 * any pointer on the nsproxy itself
40 *
41 * 2. when accessing (i.e. reading) current task's namespaces - no
42 * precautions should be taken - just dereference the pointers
43 *
44 * 3. the access to other task namespaces is performed like this
45 * rcu_read_lock();
46 * nsproxy = task_nsproxy(tsk);
47 * if (nsproxy != NULL) {
48 * / *
49 * * work with the namespaces here
50 * * e.g. get the reference on one of them
51 * * /
52 * } / *
53 * * NULL task_nsproxy() means that this task is
54 * * almost dead (zombie)
55 * * /
56 * rcu_read_unlock();
57 *
58 */
59
60static inline struct nsproxy *task_nsproxy(struct task_struct *tsk)
61{
62 return rcu_dereference(tsk->nsproxy);
63}
64
35int copy_namespaces(unsigned long flags, struct task_struct *tsk); 65int copy_namespaces(unsigned long flags, struct task_struct *tsk);
36void get_task_namespaces(struct task_struct *tsk); 66void exit_task_namespaces(struct task_struct *tsk);
67void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
37void free_nsproxy(struct nsproxy *ns); 68void free_nsproxy(struct nsproxy *ns);
38int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **, 69int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
39 struct fs_struct *); 70 struct fs_struct *);
@@ -45,14 +76,15 @@ static inline void put_nsproxy(struct nsproxy *ns)
45 } 76 }
46} 77}
47 78
48static inline void exit_task_namespaces(struct task_struct *p) 79static inline void get_nsproxy(struct nsproxy *ns)
49{ 80{
50 struct nsproxy *ns = p->nsproxy; 81 atomic_inc(&ns->count);
51 if (ns) {
52 task_lock(p);
53 p->nsproxy = NULL;
54 task_unlock(p);
55 put_nsproxy(ns);
56 }
57} 82}
83
84#ifdef CONFIG_CGROUP_NS
85int ns_cgroup_clone(struct task_struct *tsk);
86#else
87static inline int ns_cgroup_clone(struct task_struct *tsk) { return 0; }
88#endif
89
58#endif 90#endif
diff --git a/include/linux/of.h b/include/linux/of.h
index 6df80e985914..5c39b9270ff7 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -16,8 +16,8 @@
16 * 2 of the License, or (at your option) any later version. 16 * 2 of the License, or (at your option) any later version.
17 */ 17 */
18#include <linux/types.h> 18#include <linux/types.h>
19#include <linux/bitops.h>
19 20
20#include <asm/bitops.h>
21#include <asm/prom.h> 21#include <asm/prom.h>
22 22
23/* flag descriptions */ 23/* flag descriptions */
diff --git a/include/linux/phantom.h b/include/linux/phantom.h
index d3ebbfae6903..96f4048a6cc3 100644
--- a/include/linux/phantom.h
+++ b/include/linux/phantom.h
@@ -30,7 +30,11 @@ struct phm_regs {
30#define PHN_SET_REG _IOW (PH_IOC_MAGIC, 1, struct phm_reg *) 30#define PHN_SET_REG _IOW (PH_IOC_MAGIC, 1, struct phm_reg *)
31#define PHN_GET_REGS _IOWR(PH_IOC_MAGIC, 2, struct phm_regs *) 31#define PHN_GET_REGS _IOWR(PH_IOC_MAGIC, 2, struct phm_regs *)
32#define PHN_SET_REGS _IOW (PH_IOC_MAGIC, 3, struct phm_regs *) 32#define PHN_SET_REGS _IOW (PH_IOC_MAGIC, 3, struct phm_regs *)
33#define PH_IOC_MAXNR 3 33/* this ioctl tells the driver, that the caller is not OpenHaptics and might
34 * use improved registers update (no more phantom switchoffs when using
35 * libphantom) */
36#define PHN_NOT_OH _IO (PH_IOC_MAGIC, 4)
37#define PH_IOC_MAXNR 4
34 38
35#define PHN_CONTROL 0x6 /* control byte in iaddr space */ 39#define PHN_CONTROL 0x6 /* control byte in iaddr space */
36#define PHN_CTL_AMP 0x1 /* switch after torques change */ 40#define PHN_CTL_AMP 0x1 /* switch after torques change */
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 1e0e4e3423a6..e29a900a8499 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -40,15 +40,28 @@ enum pid_type
40 * processes. 40 * processes.
41 */ 41 */
42 42
43struct pid 43
44{ 44/*
45 atomic_t count; 45 * struct upid is used to get the id of the struct pid, as it is
46 * seen in particular namespace. Later the struct pid is found with
47 * find_pid_ns() using the int nr and struct pid_namespace *ns.
48 */
49
50struct upid {
46 /* Try to keep pid_chain in the same cacheline as nr for find_pid */ 51 /* Try to keep pid_chain in the same cacheline as nr for find_pid */
47 int nr; 52 int nr;
53 struct pid_namespace *ns;
48 struct hlist_node pid_chain; 54 struct hlist_node pid_chain;
55};
56
57struct pid
58{
59 atomic_t count;
49 /* lists of tasks that use this pid */ 60 /* lists of tasks that use this pid */
50 struct hlist_head tasks[PIDTYPE_MAX]; 61 struct hlist_head tasks[PIDTYPE_MAX];
51 struct rcu_head rcu; 62 struct rcu_head rcu;
63 int level;
64 struct upid numbers[1];
52}; 65};
53 66
54extern struct pid init_struct_pid; 67extern struct pid init_struct_pid;
@@ -83,26 +96,60 @@ extern void FASTCALL(detach_pid(struct task_struct *task, enum pid_type));
83extern void FASTCALL(transfer_pid(struct task_struct *old, 96extern void FASTCALL(transfer_pid(struct task_struct *old,
84 struct task_struct *new, enum pid_type)); 97 struct task_struct *new, enum pid_type));
85 98
99struct pid_namespace;
100extern struct pid_namespace init_pid_ns;
101
86/* 102/*
87 * look up a PID in the hash table. Must be called with the tasklist_lock 103 * look up a PID in the hash table. Must be called with the tasklist_lock
88 * or rcu_read_lock() held. 104 * or rcu_read_lock() held.
105 *
106 * find_pid_ns() finds the pid in the namespace specified
107 * find_pid() find the pid by its global id, i.e. in the init namespace
108 * find_vpid() finr the pid by its virtual id, i.e. in the current namespace
109 *
110 * see also find_task_by_pid() set in include/linux/sched.h
89 */ 111 */
90extern struct pid *FASTCALL(find_pid(int nr)); 112extern struct pid *FASTCALL(find_pid_ns(int nr, struct pid_namespace *ns));
113extern struct pid *find_vpid(int nr);
114extern struct pid *find_pid(int nr);
91 115
92/* 116/*
93 * Lookup a PID in the hash table, and return with it's count elevated. 117 * Lookup a PID in the hash table, and return with it's count elevated.
94 */ 118 */
95extern struct pid *find_get_pid(int nr); 119extern struct pid *find_get_pid(int nr);
96extern struct pid *find_ge_pid(int nr); 120extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
97 121
98extern struct pid *alloc_pid(void); 122extern struct pid *alloc_pid(struct pid_namespace *ns);
99extern void FASTCALL(free_pid(struct pid *pid)); 123extern void FASTCALL(free_pid(struct pid *pid));
124extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
125
126/*
127 * the helpers to get the pid's id seen from different namespaces
128 *
129 * pid_nr() : global id, i.e. the id seen from the init namespace;
130 * pid_vnr() : virtual id, i.e. the id seen from the namespace this pid
131 * belongs to. this only makes sence when called in the
132 * context of the task that belongs to the same namespace;
133 * pid_nr_ns() : id seen from the ns specified.
134 *
135 * see also task_xid_nr() etc in include/linux/sched.h
136 */
100 137
101static inline pid_t pid_nr(struct pid *pid) 138static inline pid_t pid_nr(struct pid *pid)
102{ 139{
103 pid_t nr = 0; 140 pid_t nr = 0;
104 if (pid) 141 if (pid)
105 nr = pid->nr; 142 nr = pid->numbers[0].nr;
143 return nr;
144}
145
146pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns);
147
148static inline pid_t pid_vnr(struct pid *pid)
149{
150 pid_t nr = 0;
151 if (pid)
152 nr = pid->numbers[pid->level].nr;
106 return nr; 153 return nr;
107} 154}
108 155
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index b9a17e08ff0f..0135c76c76c6 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -4,7 +4,6 @@
4#include <linux/sched.h> 4#include <linux/sched.h>
5#include <linux/mm.h> 5#include <linux/mm.h>
6#include <linux/threads.h> 6#include <linux/threads.h>
7#include <linux/pid.h>
8#include <linux/nsproxy.h> 7#include <linux/nsproxy.h>
9#include <linux/kref.h> 8#include <linux/kref.h>
10 9
@@ -20,13 +19,21 @@ struct pid_namespace {
20 struct pidmap pidmap[PIDMAP_ENTRIES]; 19 struct pidmap pidmap[PIDMAP_ENTRIES];
21 int last_pid; 20 int last_pid;
22 struct task_struct *child_reaper; 21 struct task_struct *child_reaper;
22 struct kmem_cache *pid_cachep;
23 int level;
24 struct pid_namespace *parent;
25#ifdef CONFIG_PROC_FS
26 struct vfsmount *proc_mnt;
27#endif
23}; 28};
24 29
25extern struct pid_namespace init_pid_ns; 30extern struct pid_namespace init_pid_ns;
26 31
27static inline void get_pid_ns(struct pid_namespace *ns) 32static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
28{ 33{
29 kref_get(&ns->kref); 34 if (ns != &init_pid_ns)
35 kref_get(&ns->kref);
36 return ns;
30} 37}
31 38
32extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns); 39extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns);
@@ -34,12 +41,19 @@ extern void free_pid_ns(struct kref *kref);
34 41
35static inline void put_pid_ns(struct pid_namespace *ns) 42static inline void put_pid_ns(struct pid_namespace *ns)
36{ 43{
37 kref_put(&ns->kref, free_pid_ns); 44 if (ns != &init_pid_ns)
45 kref_put(&ns->kref, free_pid_ns);
38} 46}
39 47
40static inline struct task_struct *child_reaper(struct task_struct *tsk) 48static inline struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
41{ 49{
42 return init_pid_ns.child_reaper; 50 return tsk->nsproxy->pid_ns;
51}
52
53static inline struct task_struct *task_child_reaper(struct task_struct *tsk)
54{
55 BUG_ON(tsk != current);
56 return tsk->nsproxy->pid_ns->child_reaper;
43} 57}
44 58
45#endif /* _LINUX_PID_NS_H */ 59#endif /* _LINUX_PID_NS_H */
diff --git a/include/linux/prio_heap.h b/include/linux/prio_heap.h
new file mode 100644
index 000000000000..08094350f26a
--- /dev/null
+++ b/include/linux/prio_heap.h
@@ -0,0 +1,58 @@
1#ifndef _LINUX_PRIO_HEAP_H
2#define _LINUX_PRIO_HEAP_H
3
4/*
5 * Simple insertion-only static-sized priority heap containing
6 * pointers, based on CLR, chapter 7
7 */
8
9#include <linux/gfp.h>
10
11/**
12 * struct ptr_heap - simple static-sized priority heap
13 * @ptrs - pointer to data area
14 * @max - max number of elements that can be stored in @ptrs
15 * @size - current number of valid elements in @ptrs (in the range 0..@size-1
16 * @gt: comparison operator, which should implement "greater than"
17 */
18struct ptr_heap {
19 void **ptrs;
20 int max;
21 int size;
22 int (*gt)(void *, void *);
23};
24
25/**
26 * heap_init - initialize an empty heap with a given memory size
27 * @heap: the heap structure to be initialized
28 * @size: amount of memory to use in bytes
29 * @gfp_mask: mask to pass to kmalloc()
30 * @gt: comparison operator, which should implement "greater than"
31 */
32extern int heap_init(struct ptr_heap *heap, size_t size, gfp_t gfp_mask,
33 int (*gt)(void *, void *));
34
35/**
36 * heap_free - release a heap's storage
37 * @heap: the heap structure whose data should be released
38 */
39void heap_free(struct ptr_heap *heap);
40
41/**
42 * heap_insert - insert a value into the heap and return any overflowed value
43 * @heap: the heap to be operated on
44 * @p: the pointer to be inserted
45 *
46 * Attempts to insert the given value into the priority heap. If the
47 * heap is full prior to the insertion, then the resulting heap will
48 * consist of the smallest @max elements of the original heap and the
49 * new element; the greatest element will be removed from the heap and
50 * returned. Note that the returned element will be the new element
51 * (i.e. no change to the heap) if the new element is greater than all
52 * elements currently in the heap.
53 */
54extern void *heap_insert(struct ptr_heap *heap, void *p);
55
56
57
58#endif /* _LINUX_PRIO_HEAP_H */
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 20741f668f7b..1ff461672060 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -125,7 +125,8 @@ extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
125extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent); 125extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent);
126 126
127extern struct vfsmount *proc_mnt; 127extern struct vfsmount *proc_mnt;
128extern int proc_fill_super(struct super_block *,void *,int); 128struct pid_namespace;
129extern int proc_fill_super(struct super_block *);
129extern struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *); 130extern struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *);
130 131
131/* 132/*
@@ -142,6 +143,9 @@ extern const struct file_operations proc_kcore_operations;
142extern const struct file_operations proc_kmsg_operations; 143extern const struct file_operations proc_kmsg_operations;
143extern const struct file_operations ppc_htab_operations; 144extern const struct file_operations ppc_htab_operations;
144 145
146extern int pid_ns_prepare_proc(struct pid_namespace *ns);
147extern void pid_ns_release_proc(struct pid_namespace *ns);
148
145/* 149/*
146 * proc_tty.c 150 * proc_tty.c
147 */ 151 */
@@ -207,7 +211,9 @@ extern void proc_net_remove(struct net *net, const char *name);
207#define proc_net_create(net, name, mode, info) ({ (void)(mode), NULL; }) 211#define proc_net_create(net, name, mode, info) ({ (void)(mode), NULL; })
208static inline void proc_net_remove(struct net *net, const char *name) {} 212static inline void proc_net_remove(struct net *net, const char *name) {}
209 213
210static inline void proc_flush_task(struct task_struct *task) { } 214static inline void proc_flush_task(struct task_struct *task)
215{
216}
211 217
212static inline struct proc_dir_entry *create_proc_entry(const char *name, 218static inline struct proc_dir_entry *create_proc_entry(const char *name,
213 mode_t mode, struct proc_dir_entry *parent) { return NULL; } 219 mode_t mode, struct proc_dir_entry *parent) { return NULL; }
@@ -232,6 +238,15 @@ static inline void proc_tty_unregister_driver(struct tty_driver *driver) {};
232 238
233extern struct proc_dir_entry proc_root; 239extern struct proc_dir_entry proc_root;
234 240
241static inline int pid_ns_prepare_proc(struct pid_namespace *ns)
242{
243 return 0;
244}
245
246static inline void pid_ns_release_proc(struct pid_namespace *ns)
247{
248}
249
235#endif /* CONFIG_PROC_FS */ 250#endif /* CONFIG_PROC_FS */
236 251
237#if !defined(CONFIG_PROC_KCORE) 252#if !defined(CONFIG_PROC_KCORE)
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 8dcf237d3386..72bfccd3da22 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -85,7 +85,7 @@ void reiserfs_warning(struct super_block *s, const char *fmt, ...);
85if( !( cond ) ) \ 85if( !( cond ) ) \
86 reiserfs_panic( NULL, "reiserfs[%i]: assertion " scond " failed at " \ 86 reiserfs_panic( NULL, "reiserfs[%i]: assertion " scond " failed at " \
87 __FILE__ ":%i:%s: " format "\n", \ 87 __FILE__ ":%i:%s: " format "\n", \
88 in_interrupt() ? -1 : current -> pid, __LINE__ , __FUNCTION__ , ##args ) 88 in_interrupt() ? -1 : task_pid_nr(current), __LINE__ , __FUNCTION__ , ##args )
89 89
90#define RASSERT(cond, format, args...) __RASSERT(cond, #cond, format, ##args) 90#define RASSERT(cond, format, args...) __RASSERT(cond, #cond, format, ##args)
91 91
@@ -283,6 +283,18 @@ static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb)
283 return sb->s_fs_info; 283 return sb->s_fs_info;
284} 284}
285 285
286/* Don't trust REISERFS_SB(sb)->s_bmap_nr, it's a u16
287 * which overflows on large file systems. */
288static inline u32 reiserfs_bmap_count(struct super_block *sb)
289{
290 return (SB_BLOCK_COUNT(sb) - 1) / (sb->s_blocksize * 8) + 1;
291}
292
293static inline int bmap_would_wrap(unsigned bmap_nr)
294{
295 return bmap_nr > ((1LL << 16) - 1);
296}
297
286/** this says about version of key of all items (but stat data) the 298/** this says about version of key of all items (but stat data) the
287 object consists of */ 299 object consists of */
288#define get_inode_item_key_version( inode ) \ 300#define get_inode_item_key_version( inode ) \
@@ -1734,8 +1746,8 @@ int journal_end_sync(struct reiserfs_transaction_handle *, struct super_block *,
1734int journal_mark_freed(struct reiserfs_transaction_handle *, 1746int journal_mark_freed(struct reiserfs_transaction_handle *,
1735 struct super_block *, b_blocknr_t blocknr); 1747 struct super_block *, b_blocknr_t blocknr);
1736int journal_transaction_should_end(struct reiserfs_transaction_handle *, int); 1748int journal_transaction_should_end(struct reiserfs_transaction_handle *, int);
1737int reiserfs_in_journal(struct super_block *p_s_sb, int bmap_nr, int bit_nr, 1749int reiserfs_in_journal(struct super_block *p_s_sb, unsigned int bmap_nr,
1738 int searchall, b_blocknr_t * next); 1750 int bit_nr, int searchall, b_blocknr_t *next);
1739int journal_begin(struct reiserfs_transaction_handle *, 1751int journal_begin(struct reiserfs_transaction_handle *,
1740 struct super_block *p_s_sb, unsigned long); 1752 struct super_block *p_s_sb, unsigned long);
1741int journal_join_abort(struct reiserfs_transaction_handle *, 1753int journal_join_abort(struct reiserfs_transaction_handle *,
@@ -1743,7 +1755,7 @@ int journal_join_abort(struct reiserfs_transaction_handle *,
1743void reiserfs_journal_abort(struct super_block *sb, int errno); 1755void reiserfs_journal_abort(struct super_block *sb, int errno);
1744void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...); 1756void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...);
1745int reiserfs_allocate_list_bitmaps(struct super_block *s, 1757int reiserfs_allocate_list_bitmaps(struct super_block *s,
1746 struct reiserfs_list_bitmap *, int); 1758 struct reiserfs_list_bitmap *, unsigned int);
1747 1759
1748void add_save_link(struct reiserfs_transaction_handle *th, 1760void add_save_link(struct reiserfs_transaction_handle *th,
1749 struct inode *inode, int truncate); 1761 struct inode *inode, int truncate);
@@ -2041,7 +2053,7 @@ struct buffer_head *get_FEB(struct tree_balance *);
2041 * arguments, such as node, search path, transaction_handle, etc. */ 2053 * arguments, such as node, search path, transaction_handle, etc. */
2042struct __reiserfs_blocknr_hint { 2054struct __reiserfs_blocknr_hint {
2043 struct inode *inode; /* inode passed to allocator, if we allocate unf. nodes */ 2055 struct inode *inode; /* inode passed to allocator, if we allocate unf. nodes */
2044 long block; /* file offset, in blocks */ 2056 sector_t block; /* file offset, in blocks */
2045 struct in_core_key key; 2057 struct in_core_key key;
2046 struct treepath *path; /* search path, used by allocator to deternine search_start by 2058 struct treepath *path; /* search path, used by allocator to deternine search_start by
2047 * various ways */ 2059 * various ways */
@@ -2099,7 +2111,8 @@ static inline int reiserfs_new_form_blocknrs(struct tree_balance *tb,
2099static inline int reiserfs_new_unf_blocknrs(struct reiserfs_transaction_handle 2111static inline int reiserfs_new_unf_blocknrs(struct reiserfs_transaction_handle
2100 *th, struct inode *inode, 2112 *th, struct inode *inode,
2101 b_blocknr_t * new_blocknrs, 2113 b_blocknr_t * new_blocknrs,
2102 struct treepath *path, long block) 2114 struct treepath *path,
2115 sector_t block)
2103{ 2116{
2104 reiserfs_blocknr_hint_t hint = { 2117 reiserfs_blocknr_hint_t hint = {
2105 .th = th, 2118 .th = th,
@@ -2116,7 +2129,8 @@ static inline int reiserfs_new_unf_blocknrs(struct reiserfs_transaction_handle
2116static inline int reiserfs_new_unf_blocknrs2(struct reiserfs_transaction_handle 2129static inline int reiserfs_new_unf_blocknrs2(struct reiserfs_transaction_handle
2117 *th, struct inode *inode, 2130 *th, struct inode *inode,
2118 b_blocknr_t * new_blocknrs, 2131 b_blocknr_t * new_blocknrs,
2119 struct treepath *path, long block) 2132 struct treepath *path,
2133 sector_t block)
2120{ 2134{
2121 reiserfs_blocknr_hint_t hint = { 2135 reiserfs_blocknr_hint_t hint = {
2122 .th = th, 2136 .th = th,
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
index ff9e9234f8ba..10fa0c832018 100644
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -265,9 +265,7 @@ enum journal_state_bits {
265typedef __u32(*hashf_t) (const signed char *, int); 265typedef __u32(*hashf_t) (const signed char *, int);
266 266
267struct reiserfs_bitmap_info { 267struct reiserfs_bitmap_info {
268 // FIXME: Won't work with block sizes > 8K 268 __u32 free_count;
269 __u16 first_zero_hint;
270 __u16 free_count;
271}; 269};
272 270
273struct proc_dir_entry; 271struct proc_dir_entry;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 10a83d8d5775..13df99fb2769 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -25,6 +25,7 @@
25#define CLONE_NEWUTS 0x04000000 /* New utsname group? */ 25#define CLONE_NEWUTS 0x04000000 /* New utsname group? */
26#define CLONE_NEWIPC 0x08000000 /* New ipcs */ 26#define CLONE_NEWIPC 0x08000000 /* New ipcs */
27#define CLONE_NEWUSER 0x10000000 /* New user namespace */ 27#define CLONE_NEWUSER 0x10000000 /* New user namespace */
28#define CLONE_NEWPID 0x20000000 /* New pid namespace */
28#define CLONE_NEWNET 0x40000000 /* New network namespace */ 29#define CLONE_NEWNET 0x40000000 /* New network namespace */
29 30
30/* 31/*
@@ -428,7 +429,17 @@ struct signal_struct {
428 cputime_t it_prof_incr, it_virt_incr; 429 cputime_t it_prof_incr, it_virt_incr;
429 430
430 /* job control IDs */ 431 /* job control IDs */
431 pid_t pgrp; 432
433 /*
434 * pgrp and session fields are deprecated.
435 * use the task_session_Xnr and task_pgrp_Xnr routines below
436 */
437
438 union {
439 pid_t pgrp __deprecated;
440 pid_t __pgrp;
441 };
442
432 struct pid *tty_old_pgrp; 443 struct pid *tty_old_pgrp;
433 444
434 union { 445 union {
@@ -736,6 +747,8 @@ struct sched_domain {
736#endif 747#endif
737}; 748};
738 749
750extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new);
751
739#endif /* CONFIG_SMP */ 752#endif /* CONFIG_SMP */
740 753
741/* 754/*
@@ -756,8 +769,6 @@ static inline int above_background_load(void)
756} 769}
757 770
758struct io_context; /* See blkdev.h */ 771struct io_context; /* See blkdev.h */
759struct cpuset;
760
761#define NGROUPS_SMALL 32 772#define NGROUPS_SMALL 32
762#define NGROUPS_PER_BLOCK ((int)(PAGE_SIZE / sizeof(gid_t))) 773#define NGROUPS_PER_BLOCK ((int)(PAGE_SIZE / sizeof(gid_t)))
763struct group_info { 774struct group_info {
@@ -1125,11 +1136,16 @@ struct task_struct {
1125 short il_next; 1136 short il_next;
1126#endif 1137#endif
1127#ifdef CONFIG_CPUSETS 1138#ifdef CONFIG_CPUSETS
1128 struct cpuset *cpuset;
1129 nodemask_t mems_allowed; 1139 nodemask_t mems_allowed;
1130 int cpuset_mems_generation; 1140 int cpuset_mems_generation;
1131 int cpuset_mem_spread_rotor; 1141 int cpuset_mem_spread_rotor;
1132#endif 1142#endif
1143#ifdef CONFIG_CGROUPS
1144 /* Control Group info protected by css_set_lock */
1145 struct css_set *cgroups;
1146 /* cg_list protected by css_set_lock and tsk->alloc_lock */
1147 struct list_head cg_list;
1148#endif
1133#ifdef CONFIG_FUTEX 1149#ifdef CONFIG_FUTEX
1134 struct robust_list_head __user *robust_list; 1150 struct robust_list_head __user *robust_list;
1135#ifdef CONFIG_COMPAT 1151#ifdef CONFIG_COMPAT
@@ -1185,24 +1201,14 @@ static inline int rt_task(struct task_struct *p)
1185 return rt_prio(p->prio); 1201 return rt_prio(p->prio);
1186} 1202}
1187 1203
1188static inline pid_t process_group(struct task_struct *tsk) 1204static inline void set_task_session(struct task_struct *tsk, pid_t session)
1189{ 1205{
1190 return tsk->signal->pgrp; 1206 tsk->signal->__session = session;
1191} 1207}
1192 1208
1193static inline pid_t signal_session(struct signal_struct *sig) 1209static inline void set_task_pgrp(struct task_struct *tsk, pid_t pgrp)
1194{ 1210{
1195 return sig->__session; 1211 tsk->signal->__pgrp = pgrp;
1196}
1197
1198static inline pid_t process_session(struct task_struct *tsk)
1199{
1200 return signal_session(tsk->signal);
1201}
1202
1203static inline void set_signal_session(struct signal_struct *sig, pid_t session)
1204{
1205 sig->__session = session;
1206} 1212}
1207 1213
1208static inline struct pid *task_pid(struct task_struct *task) 1214static inline struct pid *task_pid(struct task_struct *task)
@@ -1225,6 +1231,88 @@ static inline struct pid *task_session(struct task_struct *task)
1225 return task->group_leader->pids[PIDTYPE_SID].pid; 1231 return task->group_leader->pids[PIDTYPE_SID].pid;
1226} 1232}
1227 1233
1234struct pid_namespace;
1235
1236/*
1237 * the helpers to get the task's different pids as they are seen
1238 * from various namespaces
1239 *
1240 * task_xid_nr() : global id, i.e. the id seen from the init namespace;
1241 * task_xid_vnr() : virtual id, i.e. the id seen from the namespace the task
1242 * belongs to. this only makes sence when called in the
1243 * context of the task that belongs to the same namespace;
1244 * task_xid_nr_ns() : id seen from the ns specified;
1245 *
1246 * set_task_vxid() : assigns a virtual id to a task;
1247 *
1248 * task_ppid_nr_ns() : the parent's id as seen from the namespace specified.
1249 * the result depends on the namespace and whether the
1250 * task in question is the namespace's init. e.g. for the
1251 * namespace's init this will return 0 when called from
1252 * the namespace of this init, or appropriate id otherwise.
1253 *
1254 *
1255 * see also pid_nr() etc in include/linux/pid.h
1256 */
1257
1258static inline pid_t task_pid_nr(struct task_struct *tsk)
1259{
1260 return tsk->pid;
1261}
1262
1263pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
1264
1265static inline pid_t task_pid_vnr(struct task_struct *tsk)
1266{
1267 return pid_vnr(task_pid(tsk));
1268}
1269
1270
1271static inline pid_t task_tgid_nr(struct task_struct *tsk)
1272{
1273 return tsk->tgid;
1274}
1275
1276pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
1277
1278static inline pid_t task_tgid_vnr(struct task_struct *tsk)
1279{
1280 return pid_vnr(task_tgid(tsk));
1281}
1282
1283
1284static inline pid_t task_pgrp_nr(struct task_struct *tsk)
1285{
1286 return tsk->signal->__pgrp;
1287}
1288
1289pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
1290
1291static inline pid_t task_pgrp_vnr(struct task_struct *tsk)
1292{
1293 return pid_vnr(task_pgrp(tsk));
1294}
1295
1296
1297static inline pid_t task_session_nr(struct task_struct *tsk)
1298{
1299 return tsk->signal->__session;
1300}
1301
1302pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
1303
1304static inline pid_t task_session_vnr(struct task_struct *tsk)
1305{
1306 return pid_vnr(task_session(tsk));
1307}
1308
1309
1310static inline pid_t task_ppid_nr_ns(struct task_struct *tsk,
1311 struct pid_namespace *ns)
1312{
1313 return pid_nr_ns(task_pid(rcu_dereference(tsk->real_parent)), ns);
1314}
1315
1228/** 1316/**
1229 * pid_alive - check that a task structure is not stale 1317 * pid_alive - check that a task structure is not stale
1230 * @p: Task structure to be checked. 1318 * @p: Task structure to be checked.
@@ -1239,16 +1327,22 @@ static inline int pid_alive(struct task_struct *p)
1239} 1327}
1240 1328
1241/** 1329/**
1242 * is_init - check if a task structure is init 1330 * is_global_init - check if a task structure is init
1243 * @tsk: Task structure to be checked. 1331 * @tsk: Task structure to be checked.
1244 * 1332 *
1245 * Check if a task structure is the first user space task the kernel created. 1333 * Check if a task structure is the first user space task the kernel created.
1246 */ 1334 */
1247static inline int is_init(struct task_struct *tsk) 1335static inline int is_global_init(struct task_struct *tsk)
1248{ 1336{
1249 return tsk->pid == 1; 1337 return tsk->pid == 1;
1250} 1338}
1251 1339
1340/*
1341 * is_container_init:
1342 * check whether in the task is init in its own pid namespace.
1343 */
1344extern int is_container_init(struct task_struct *tsk);
1345
1252extern struct pid *cad_pid; 1346extern struct pid *cad_pid;
1253 1347
1254extern void free_task(struct task_struct *tsk); 1348extern void free_task(struct task_struct *tsk);
@@ -1420,8 +1514,32 @@ extern struct task_struct init_task;
1420 1514
1421extern struct mm_struct init_mm; 1515extern struct mm_struct init_mm;
1422 1516
1423#define find_task_by_pid(nr) find_task_by_pid_type(PIDTYPE_PID, nr) 1517extern struct pid_namespace init_pid_ns;
1424extern struct task_struct *find_task_by_pid_type(int type, int pid); 1518
1519/*
1520 * find a task by one of its numerical ids
1521 *
1522 * find_task_by_pid_type_ns():
1523 * it is the most generic call - it finds a task by all id,
1524 * type and namespace specified
1525 * find_task_by_pid_ns():
1526 * finds a task by its pid in the specified namespace
1527 * find_task_by_vpid():
1528 * finds a task by its virtual pid
1529 * find_task_by_pid():
1530 * finds a task by its global pid
1531 *
1532 * see also find_pid() etc in include/linux/pid.h
1533 */
1534
1535extern struct task_struct *find_task_by_pid_type_ns(int type, int pid,
1536 struct pid_namespace *ns);
1537
1538extern struct task_struct *find_task_by_pid(pid_t nr);
1539extern struct task_struct *find_task_by_vpid(pid_t nr);
1540extern struct task_struct *find_task_by_pid_ns(pid_t nr,
1541 struct pid_namespace *ns);
1542
1425extern void __set_special_pids(pid_t session, pid_t pgrp); 1543extern void __set_special_pids(pid_t session, pid_t pgrp);
1426 1544
1427/* per-UID process charging. */ 1545/* per-UID process charging. */
@@ -1608,6 +1726,12 @@ static inline int has_group_leader_pid(struct task_struct *p)
1608 return p->pid == p->tgid; 1726 return p->pid == p->tgid;
1609} 1727}
1610 1728
1729static inline
1730int same_thread_group(struct task_struct *p1, struct task_struct *p2)
1731{
1732 return p1->tgid == p2->tgid;
1733}
1734
1611static inline struct task_struct *next_thread(const struct task_struct *p) 1735static inline struct task_struct *next_thread(const struct task_struct *p)
1612{ 1736{
1613 return list_entry(rcu_dereference(p->thread_group.next), 1737 return list_entry(rcu_dereference(p->thread_group.next),
@@ -1625,7 +1749,8 @@ static inline int thread_group_empty(struct task_struct *p)
1625/* 1749/*
1626 * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring 1750 * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
1627 * subscriptions and synchronises with wait4(). Also used in procfs. Also 1751 * subscriptions and synchronises with wait4(). Also used in procfs. Also
1628 * pins the final release of task.io_context. Also protects ->cpuset. 1752 * pins the final release of task.io_context. Also protects ->cpuset and
1753 * ->cgroup.subsys[].
1629 * 1754 *
1630 * Nests both inside and outside of read_lock(&tasklist_lock). 1755 * Nests both inside and outside of read_lock(&tasklist_lock).
1631 * It must not be nested with write_lock_irq(&tasklist_lock), 1756 * It must not be nested with write_lock_irq(&tasklist_lock),
diff --git a/include/linux/sem.h b/include/linux/sem.h
index 9aaffb0b1d81..c8eaad9e4b72 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -90,7 +90,6 @@ struct sem {
90/* One sem_array data structure for each set of semaphores in the system. */ 90/* One sem_array data structure for each set of semaphores in the system. */
91struct sem_array { 91struct sem_array {
92 struct kern_ipc_perm sem_perm; /* permissions .. see ipc.h */ 92 struct kern_ipc_perm sem_perm; /* permissions .. see ipc.h */
93 int sem_id;
94 time_t sem_otime; /* last semop time */ 93 time_t sem_otime; /* last semop time */
95 time_t sem_ctime; /* last change time */ 94 time_t sem_ctime; /* last change time */
96 struct sem *sem_base; /* ptr to first semaphore in array */ 95 struct sem *sem_base; /* ptr to first semaphore in array */
diff --git a/include/linux/shm.h b/include/linux/shm.h
index bea65d9c93ef..eeaed921a1dc 100644
--- a/include/linux/shm.h
+++ b/include/linux/shm.h
@@ -79,7 +79,6 @@ struct shmid_kernel /* private to the kernel */
79{ 79{
80 struct kern_ipc_perm shm_perm; 80 struct kern_ipc_perm shm_perm;
81 struct file * shm_file; 81 struct file * shm_file;
82 int id;
83 unsigned long shm_nattch; 82 unsigned long shm_nattch;
84 unsigned long shm_segsz; 83 unsigned long shm_segsz;
85 time_t shm_atim; 84 time_t shm_atim;
diff --git a/include/linux/types.h b/include/linux/types.h
index 0351bf2fac85..4f0dad21c917 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -3,12 +3,9 @@
3 3
4#ifdef __KERNEL__ 4#ifdef __KERNEL__
5 5
6#define BITS_TO_LONGS(bits) \
7 (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
8#define DECLARE_BITMAP(name,bits) \ 6#define DECLARE_BITMAP(name,bits) \
9 unsigned long name[BITS_TO_LONGS(bits)] 7 unsigned long name[BITS_TO_LONGS(bits)]
10 8
11#define BITS_PER_BYTE 8
12#endif 9#endif
13 10
14#include <linux/posix_types.h> 11#include <linux/posix_types.h>
diff --git a/include/linux/uinput.h b/include/linux/uinput.h
index a6c1e8eed226..15ddd4483b09 100644
--- a/include/linux/uinput.h
+++ b/include/linux/uinput.h
@@ -162,10 +162,6 @@ struct uinput_ff_erase {
162#define UI_FF_UPLOAD 1 162#define UI_FF_UPLOAD 1
163#define UI_FF_ERASE 2 163#define UI_FF_ERASE 2
164 164
165#ifndef NBITS
166#define NBITS(x) ((((x)-1)/(sizeof(long)*8))+1)
167#endif /* NBITS */
168
169#define UINPUT_MAX_NAME_SIZE 80 165#define UINPUT_MAX_NAME_SIZE 80
170struct uinput_user_dev { 166struct uinput_user_dev {
171 char name[UINPUT_MAX_NAME_SIZE]; 167 char name[UINPUT_MAX_NAME_SIZE];
diff --git a/include/linux/vt.h b/include/linux/vt.h
index ba806e8711be..02c1c0288770 100644
--- a/include/linux/vt.h
+++ b/include/linux/vt.h
@@ -1,6 +1,18 @@
1#ifndef _LINUX_VT_H 1#ifndef _LINUX_VT_H
2#define _LINUX_VT_H 2#define _LINUX_VT_H
3 3
4#ifdef __KERNEL__
5struct notifier_block;
6
7struct vt_notifier_param {
8 struct vc_data *vc; /* VC on which the update happened */
9 unsigned int c; /* Printed char */
10};
11
12extern int register_vt_notifier(struct notifier_block *nb);
13extern int unregister_vt_notifier(struct notifier_block *nb);
14#endif
15
4/* 16/*
5 * These constants are also useful for user-level apps (e.g., VC 17 * These constants are also useful for user-level apps (e.g., VC
6 * resizing). 18 * resizing).
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index ce6badc98f6d..7daafdc2514b 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -8,6 +8,7 @@
8#include <linux/timer.h> 8#include <linux/timer.h>
9#include <linux/linkage.h> 9#include <linux/linkage.h>
10#include <linux/bitops.h> 10#include <linux/bitops.h>
11#include <linux/lockdep.h>
11#include <asm/atomic.h> 12#include <asm/atomic.h>
12 13
13struct workqueue_struct; 14struct workqueue_struct;
@@ -28,6 +29,9 @@ struct work_struct {
28#define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK) 29#define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
29 struct list_head entry; 30 struct list_head entry;
30 work_func_t func; 31 work_func_t func;
32#ifdef CONFIG_LOCKDEP
33 struct lockdep_map lockdep_map;
34#endif
31}; 35};
32 36
33#define WORK_DATA_INIT() ATOMIC_LONG_INIT(0) 37#define WORK_DATA_INIT() ATOMIC_LONG_INIT(0)
@@ -41,10 +45,23 @@ struct execute_work {
41 struct work_struct work; 45 struct work_struct work;
42}; 46};
43 47
48#ifdef CONFIG_LOCKDEP
49/*
50 * NB: because we have to copy the lockdep_map, setting _key
51 * here is required, otherwise it could get initialised to the
52 * copy of the lockdep_map!
53 */
54#define __WORK_INIT_LOCKDEP_MAP(n, k) \
55 .lockdep_map = STATIC_LOCKDEP_MAP_INIT(n, k),
56#else
57#define __WORK_INIT_LOCKDEP_MAP(n, k)
58#endif
59
44#define __WORK_INITIALIZER(n, f) { \ 60#define __WORK_INITIALIZER(n, f) { \
45 .data = WORK_DATA_INIT(), \ 61 .data = WORK_DATA_INIT(), \
46 .entry = { &(n).entry, &(n).entry }, \ 62 .entry = { &(n).entry, &(n).entry }, \
47 .func = (f), \ 63 .func = (f), \
64 __WORK_INIT_LOCKDEP_MAP(#n, &(n)) \
48 } 65 }
49 66
50#define __DELAYED_WORK_INITIALIZER(n, f) { \ 67#define __DELAYED_WORK_INITIALIZER(n, f) { \
@@ -76,12 +93,24 @@ struct execute_work {
76 * assignment of the work data initializer allows the compiler 93 * assignment of the work data initializer allows the compiler
77 * to generate better code. 94 * to generate better code.
78 */ 95 */
96#ifdef CONFIG_LOCKDEP
97#define INIT_WORK(_work, _func) \
98 do { \
99 static struct lock_class_key __key; \
100 \
101 (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \
102 lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0);\
103 INIT_LIST_HEAD(&(_work)->entry); \
104 PREPARE_WORK((_work), (_func)); \
105 } while (0)
106#else
79#define INIT_WORK(_work, _func) \ 107#define INIT_WORK(_work, _func) \
80 do { \ 108 do { \
81 (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ 109 (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \
82 INIT_LIST_HEAD(&(_work)->entry); \ 110 INIT_LIST_HEAD(&(_work)->entry); \
83 PREPARE_WORK((_work), (_func)); \ 111 PREPARE_WORK((_work), (_func)); \
84 } while (0) 112 } while (0)
113#endif
85 114
86#define INIT_DELAYED_WORK(_work, _func) \ 115#define INIT_DELAYED_WORK(_work, _func) \
87 do { \ 116 do { \
@@ -118,9 +147,23 @@ struct execute_work {
118 clear_bit(WORK_STRUCT_PENDING, work_data_bits(work)) 147 clear_bit(WORK_STRUCT_PENDING, work_data_bits(work))
119 148
120 149
121extern struct workqueue_struct *__create_workqueue(const char *name, 150extern struct workqueue_struct *
122 int singlethread, 151__create_workqueue_key(const char *name, int singlethread,
123 int freezeable); 152 int freezeable, struct lock_class_key *key);
153
154#ifdef CONFIG_LOCKDEP
155#define __create_workqueue(name, singlethread, freezeable) \
156({ \
157 static struct lock_class_key __key; \
158 \
159 __create_workqueue_key((name), (singlethread), \
160 (freezeable), &__key); \
161})
162#else
163#define __create_workqueue(name, singlethread, freezeable) \
164 __create_workqueue_key((name), (singlethread), (freezeable), NULL)
165#endif
166
124#define create_workqueue(name) __create_workqueue((name), 0, 0) 167#define create_workqueue(name) __create_workqueue((name), 0, 0)
125#define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1) 168#define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1)
126#define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0) 169#define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0)
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 686425a97b0f..625346c47ee2 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -44,7 +44,7 @@ extern unsigned int p9_debug_level;
44do { \ 44do { \
45 if ((p9_debug_level & level) == level) \ 45 if ((p9_debug_level & level) == level) \
46 printk(KERN_NOTICE "-- %s (%d): " \ 46 printk(KERN_NOTICE "-- %s (%d): " \
47 format , __FUNCTION__, current->pid , ## arg); \ 47 format , __FUNCTION__, task_pid_nr(current) , ## arg); \
48} while (0) 48} while (0)
49 49
50#define PRINT_FCALL_ERROR(s, fcall) P9_DPRINTK(P9_DEBUG_ERROR, \ 50#define PRINT_FCALL_ERROR(s, fcall) P9_DPRINTK(P9_DEBUG_ERROR, \
@@ -59,7 +59,7 @@ do { \
59#define P9_EPRINTK(level, format, arg...) \ 59#define P9_EPRINTK(level, format, arg...) \
60do { \ 60do { \
61 printk(level "9p: %s (%d): " \ 61 printk(level "9p: %s (%d): " \
62 format , __FUNCTION__, current->pid , ## arg); \ 62 format , __FUNCTION__, task_pid_nr(current), ## arg); \
63} while (0) 63} while (0)
64 64
65 65
diff --git a/include/net/scm.h b/include/net/scm.h
index 423cb1d5ac25..06df126103ca 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -4,6 +4,8 @@
4#include <linux/limits.h> 4#include <linux/limits.h>
5#include <linux/net.h> 5#include <linux/net.h>
6#include <linux/security.h> 6#include <linux/security.h>
7#include <linux/pid.h>
8#include <linux/nsproxy.h>
7 9
8/* Well, we should have at least one descriptor open 10/* Well, we should have at least one descriptor open
9 * to accept passed FDs 8) 11 * to accept passed FDs 8)
@@ -54,7 +56,7 @@ static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
54 struct task_struct *p = current; 56 struct task_struct *p = current;
55 scm->creds.uid = p->uid; 57 scm->creds.uid = p->uid;
56 scm->creds.gid = p->gid; 58 scm->creds.gid = p->gid;
57 scm->creds.pid = p->tgid; 59 scm->creds.pid = task_tgid_vnr(p);
58 scm->fp = NULL; 60 scm->fp = NULL;
59 scm->seq = 0; 61 scm->seq = 0;
60 unix_get_peersec_dgram(sock, scm); 62 unix_get_peersec_dgram(sock, scm);
diff --git a/include/video/sstfb.h b/include/video/sstfb.h
index baa163f770ab..b52f07381243 100644
--- a/include/video/sstfb.h
+++ b/include/video/sstfb.h
@@ -68,7 +68,6 @@
68# define print_var(X,Y...) 68# define print_var(X,Y...)
69#endif 69#endif
70 70
71#define BIT(x) (1ul<<(x))
72#define POW2(x) (1ul<<(x)) 71#define POW2(x) (1ul<<(x))
73 72
74/* 73/*
diff --git a/include/video/tdfx.h b/include/video/tdfx.h
index 05b63c2a5abc..7431d9681e57 100644
--- a/include/video/tdfx.h
+++ b/include/video/tdfx.h
@@ -79,8 +79,6 @@
79 79
80/* register bitfields (not all, only as needed) */ 80/* register bitfields (not all, only as needed) */
81 81
82#define BIT(x) (1UL << (x))
83
84/* COMMAND_2D reg. values */ 82/* COMMAND_2D reg. values */
85#define TDFX_ROP_COPY 0xcc /* src */ 83#define TDFX_ROP_COPY 0xcc /* src */
86#define TDFX_ROP_INVERT 0x55 /* NOT dst */ 84#define TDFX_ROP_INVERT 0x55 /* NOT dst */
diff --git a/init/Kconfig b/init/Kconfig
index a29a688c47d3..541382d539ad 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -270,9 +270,43 @@ config LOG_BUF_SHIFT
270 13 => 8 KB 270 13 => 8 KB
271 12 => 4 KB 271 12 => 4 KB
272 272
273config CGROUPS
274 bool "Control Group support"
275 help
276 This option will let you use process cgroup subsystems
277 such as Cpusets
278
279 Say N if unsure.
280
281config CGROUP_DEBUG
282 bool "Example debug cgroup subsystem"
283 depends on CGROUPS
284 help
285 This option enables a simple cgroup subsystem that
286 exports useful debugging information about the cgroups
287 framework
288
289 Say N if unsure
290
291config CGROUP_NS
292 bool "Namespace cgroup subsystem"
293 depends on CGROUPS
294 help
295 Provides a simple namespace cgroup subsystem to
296 provide hierarchical naming of sets of namespaces,
297 for instance virtual servers and checkpoint/restart
298 jobs.
299
300config CGROUP_CPUACCT
301 bool "Simple CPU accounting cgroup subsystem"
302 depends on CGROUPS
303 help
304 Provides a simple Resource Controller for monitoring the
305 total CPU consumed by the tasks in a cgroup
306
273config CPUSETS 307config CPUSETS
274 bool "Cpuset support" 308 bool "Cpuset support"
275 depends on SMP 309 depends on SMP && CGROUPS
276 help 310 help
277 This option will let you create and manage CPUSETs which 311 This option will let you create and manage CPUSETs which
278 allow dynamically partitioning a system into sets of CPUs and 312 allow dynamically partitioning a system into sets of CPUs and
@@ -300,6 +334,16 @@ config FAIR_USER_SCHED
300 This option will choose userid as the basis for grouping 334 This option will choose userid as the basis for grouping
301 tasks, thus providing equal CPU bandwidth to each user. 335 tasks, thus providing equal CPU bandwidth to each user.
302 336
337config FAIR_CGROUP_SCHED
338 bool "Control groups"
339 depends on CGROUPS
340 help
341 This option allows you to create arbitrary task groups
342 using the "cgroup" pseudo filesystem and control
343 the cpu bandwidth allocated to each such task group.
344 Refer to Documentation/cgroups.txt for more information
345 on "cgroup" pseudo filesystem.
346
303endchoice 347endchoice
304 348
305config SYSFS_DEPRECATED 349config SYSFS_DEPRECATED
@@ -322,6 +366,11 @@ config SYSFS_DEPRECATED
322 If you are using a distro that was released in 2006 or later, 366 If you are using a distro that was released in 2006 or later,
323 it should be safe to say N here. 367 it should be safe to say N here.
324 368
369config PROC_PID_CPUSET
370 bool "Include legacy /proc/<pid>/cpuset file"
371 depends on CPUSETS
372 default y
373
325config RELAY 374config RELAY
326 bool "Kernel->user space relay support (formerly relayfs)" 375 bool "Kernel->user space relay support (formerly relayfs)"
327 help 376 help
diff --git a/init/main.c b/init/main.c
index 9def935ab13a..0dd0e7a1f632 100644
--- a/init/main.c
+++ b/init/main.c
@@ -39,6 +39,7 @@
39#include <linux/writeback.h> 39#include <linux/writeback.h>
40#include <linux/cpu.h> 40#include <linux/cpu.h>
41#include <linux/cpuset.h> 41#include <linux/cpuset.h>
42#include <linux/cgroup.h>
42#include <linux/efi.h> 43#include <linux/efi.h>
43#include <linux/tick.h> 44#include <linux/tick.h>
44#include <linux/interrupt.h> 45#include <linux/interrupt.h>
@@ -523,6 +524,7 @@ asmlinkage void __init start_kernel(void)
523 */ 524 */
524 unwind_init(); 525 unwind_init();
525 lockdep_init(); 526 lockdep_init();
527 cgroup_init_early();
526 528
527 local_irq_disable(); 529 local_irq_disable();
528 early_boot_irqs_off(); 530 early_boot_irqs_off();
@@ -640,6 +642,7 @@ asmlinkage void __init start_kernel(void)
640#ifdef CONFIG_PROC_FS 642#ifdef CONFIG_PROC_FS
641 proc_root_init(); 643 proc_root_init();
642#endif 644#endif
645 cgroup_init();
643 cpuset_init(); 646 cpuset_init();
644 taskstats_init_early(); 647 taskstats_init_early();
645 delayacct_init(); 648 delayacct_init();
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 20f1fed8fa48..c0b26dc4617b 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -29,6 +29,8 @@
29#include <linux/audit.h> 29#include <linux/audit.h>
30#include <linux/signal.h> 30#include <linux/signal.h>
31#include <linux/mutex.h> 31#include <linux/mutex.h>
32#include <linux/nsproxy.h>
33#include <linux/pid.h>
32 34
33#include <net/sock.h> 35#include <net/sock.h>
34#include "util.h" 36#include "util.h"
@@ -330,7 +332,8 @@ static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
330 (info->notify_owner && 332 (info->notify_owner &&
331 info->notify.sigev_notify == SIGEV_SIGNAL) ? 333 info->notify.sigev_notify == SIGEV_SIGNAL) ?
332 info->notify.sigev_signo : 0, 334 info->notify.sigev_signo : 0,
333 pid_nr(info->notify_owner)); 335 pid_nr_ns(info->notify_owner,
336 current->nsproxy->pid_ns));
334 spin_unlock(&info->lock); 337 spin_unlock(&info->lock);
335 buffer[sizeof(buffer)-1] = '\0'; 338 buffer[sizeof(buffer)-1] = '\0';
336 slen = strlen(buffer)+1; 339 slen = strlen(buffer)+1;
@@ -507,7 +510,7 @@ static void __do_notify(struct mqueue_inode_info *info)
507 sig_i.si_errno = 0; 510 sig_i.si_errno = 0;
508 sig_i.si_code = SI_MESGQ; 511 sig_i.si_code = SI_MESGQ;
509 sig_i.si_value = info->notify.sigev_value; 512 sig_i.si_value = info->notify.sigev_value;
510 sig_i.si_pid = current->tgid; 513 sig_i.si_pid = task_pid_vnr(current);
511 sig_i.si_uid = current->uid; 514 sig_i.si_uid = current->uid;
512 515
513 kill_pid_info(info->notify.sigev_signo, 516 kill_pid_info(info->notify.sigev_signo,
diff --git a/ipc/msg.c b/ipc/msg.c
index a03fcb522fff..fdf3db5731ce 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -34,7 +34,7 @@
34#include <linux/syscalls.h> 34#include <linux/syscalls.h>
35#include <linux/audit.h> 35#include <linux/audit.h>
36#include <linux/seq_file.h> 36#include <linux/seq_file.h>
37#include <linux/mutex.h> 37#include <linux/rwsem.h>
38#include <linux/nsproxy.h> 38#include <linux/nsproxy.h>
39 39
40#include <asm/current.h> 40#include <asm/current.h>
@@ -66,23 +66,15 @@ struct msg_sender {
66#define SEARCH_NOTEQUAL 3 66#define SEARCH_NOTEQUAL 3
67#define SEARCH_LESSEQUAL 4 67#define SEARCH_LESSEQUAL 4
68 68
69static atomic_t msg_bytes = ATOMIC_INIT(0);
70static atomic_t msg_hdrs = ATOMIC_INIT(0);
71
72static struct ipc_ids init_msg_ids; 69static struct ipc_ids init_msg_ids;
73 70
74#define msg_ids(ns) (*((ns)->ids[IPC_MSG_IDS])) 71#define msg_ids(ns) (*((ns)->ids[IPC_MSG_IDS]))
75 72
76#define msg_lock(ns, id) ((struct msg_queue*)ipc_lock(&msg_ids(ns), id))
77#define msg_unlock(msq) ipc_unlock(&(msq)->q_perm) 73#define msg_unlock(msq) ipc_unlock(&(msq)->q_perm)
78#define msg_rmid(ns, id) ((struct msg_queue*)ipc_rmid(&msg_ids(ns), id)) 74#define msg_buildid(id, seq) ipc_buildid(id, seq)
79#define msg_checkid(ns, msq, msgid) \ 75
80 ipc_checkid(&msg_ids(ns), &msq->q_perm, msgid) 76static void freeque(struct ipc_namespace *, struct msg_queue *);
81#define msg_buildid(ns, id, seq) \ 77static int newque(struct ipc_namespace *, struct ipc_params *);
82 ipc_buildid(&msg_ids(ns), id, seq)
83
84static void freeque (struct ipc_namespace *ns, struct msg_queue *msq, int id);
85static int newque (struct ipc_namespace *ns, key_t key, int msgflg);
86#ifdef CONFIG_PROC_FS 78#ifdef CONFIG_PROC_FS
87static int sysvipc_msg_proc_show(struct seq_file *s, void *it); 79static int sysvipc_msg_proc_show(struct seq_file *s, void *it);
88#endif 80#endif
@@ -93,7 +85,9 @@ static void __msg_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
93 ns->msg_ctlmax = MSGMAX; 85 ns->msg_ctlmax = MSGMAX;
94 ns->msg_ctlmnb = MSGMNB; 86 ns->msg_ctlmnb = MSGMNB;
95 ns->msg_ctlmni = MSGMNI; 87 ns->msg_ctlmni = MSGMNI;
96 ipc_init_ids(ids, ns->msg_ctlmni); 88 atomic_set(&ns->msg_bytes, 0);
89 atomic_set(&ns->msg_hdrs, 0);
90 ipc_init_ids(ids);
97} 91}
98 92
99int msg_init_ns(struct ipc_namespace *ns) 93int msg_init_ns(struct ipc_namespace *ns)
@@ -110,20 +104,25 @@ int msg_init_ns(struct ipc_namespace *ns)
110 104
111void msg_exit_ns(struct ipc_namespace *ns) 105void msg_exit_ns(struct ipc_namespace *ns)
112{ 106{
113 int i;
114 struct msg_queue *msq; 107 struct msg_queue *msq;
108 int next_id;
109 int total, in_use;
110
111 down_write(&msg_ids(ns).rw_mutex);
112
113 in_use = msg_ids(ns).in_use;
115 114
116 mutex_lock(&msg_ids(ns).mutex); 115 for (total = 0, next_id = 0; total < in_use; next_id++) {
117 for (i = 0; i <= msg_ids(ns).max_id; i++) { 116 msq = idr_find(&msg_ids(ns).ipcs_idr, next_id);
118 msq = msg_lock(ns, i);
119 if (msq == NULL) 117 if (msq == NULL)
120 continue; 118 continue;
121 119 ipc_lock_by_ptr(&msq->q_perm);
122 freeque(ns, msq, i); 120 freeque(ns, msq);
121 total++;
123 } 122 }
124 mutex_unlock(&msg_ids(ns).mutex);
125 123
126 ipc_fini_ids(ns->ids[IPC_MSG_IDS]); 124 up_write(&msg_ids(ns).rw_mutex);
125
127 kfree(ns->ids[IPC_MSG_IDS]); 126 kfree(ns->ids[IPC_MSG_IDS]);
128 ns->ids[IPC_MSG_IDS] = NULL; 127 ns->ids[IPC_MSG_IDS] = NULL;
129} 128}
@@ -136,10 +135,55 @@ void __init msg_init(void)
136 IPC_MSG_IDS, sysvipc_msg_proc_show); 135 IPC_MSG_IDS, sysvipc_msg_proc_show);
137} 136}
138 137
139static int newque (struct ipc_namespace *ns, key_t key, int msgflg) 138/*
139 * This routine is called in the paths where the rw_mutex is held to protect
140 * access to the idr tree.
141 */
142static inline struct msg_queue *msg_lock_check_down(struct ipc_namespace *ns,
143 int id)
144{
145 struct kern_ipc_perm *ipcp = ipc_lock_check_down(&msg_ids(ns), id);
146
147 return container_of(ipcp, struct msg_queue, q_perm);
148}
149
150/*
151 * msg_lock_(check_) routines are called in the paths where the rw_mutex
152 * is not held.
153 */
154static inline struct msg_queue *msg_lock(struct ipc_namespace *ns, int id)
155{
156 struct kern_ipc_perm *ipcp = ipc_lock(&msg_ids(ns), id);
157
158 return container_of(ipcp, struct msg_queue, q_perm);
159}
160
161static inline struct msg_queue *msg_lock_check(struct ipc_namespace *ns,
162 int id)
163{
164 struct kern_ipc_perm *ipcp = ipc_lock_check(&msg_ids(ns), id);
165
166 return container_of(ipcp, struct msg_queue, q_perm);
167}
168
169static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
170{
171 ipc_rmid(&msg_ids(ns), &s->q_perm);
172}
173
174/**
175 * newque - Create a new msg queue
176 * @ns: namespace
177 * @params: ptr to the structure that contains the key and msgflg
178 *
179 * Called with msg_ids.rw_mutex held (writer)
180 */
181static int newque(struct ipc_namespace *ns, struct ipc_params *params)
140{ 182{
141 struct msg_queue *msq; 183 struct msg_queue *msq;
142 int id, retval; 184 int id, retval;
185 key_t key = params->key;
186 int msgflg = params->flg;
143 187
144 msq = ipc_rcu_alloc(sizeof(*msq)); 188 msq = ipc_rcu_alloc(sizeof(*msq));
145 if (!msq) 189 if (!msq)
@@ -155,14 +199,17 @@ static int newque (struct ipc_namespace *ns, key_t key, int msgflg)
155 return retval; 199 return retval;
156 } 200 }
157 201
202 /*
203 * ipc_addid() locks msq
204 */
158 id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); 205 id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
159 if (id == -1) { 206 if (id < 0) {
160 security_msg_queue_free(msq); 207 security_msg_queue_free(msq);
161 ipc_rcu_putref(msq); 208 ipc_rcu_putref(msq);
162 return -ENOSPC; 209 return id;
163 } 210 }
164 211
165 msq->q_id = msg_buildid(ns, id, msq->q_perm.seq); 212 msq->q_perm.id = msg_buildid(id, msq->q_perm.seq);
166 msq->q_stime = msq->q_rtime = 0; 213 msq->q_stime = msq->q_rtime = 0;
167 msq->q_ctime = get_seconds(); 214 msq->q_ctime = get_seconds();
168 msq->q_cbytes = msq->q_qnum = 0; 215 msq->q_cbytes = msq->q_qnum = 0;
@@ -171,9 +218,10 @@ static int newque (struct ipc_namespace *ns, key_t key, int msgflg)
171 INIT_LIST_HEAD(&msq->q_messages); 218 INIT_LIST_HEAD(&msq->q_messages);
172 INIT_LIST_HEAD(&msq->q_receivers); 219 INIT_LIST_HEAD(&msq->q_receivers);
173 INIT_LIST_HEAD(&msq->q_senders); 220 INIT_LIST_HEAD(&msq->q_senders);
221
174 msg_unlock(msq); 222 msg_unlock(msq);
175 223
176 return msq->q_id; 224 return msq->q_perm.id;
177} 225}
178 226
179static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss) 227static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss)
@@ -224,19 +272,19 @@ static void expunge_all(struct msg_queue *msq, int res)
224 272
225/* 273/*
226 * freeque() wakes up waiters on the sender and receiver waiting queue, 274 * freeque() wakes up waiters on the sender and receiver waiting queue,
227 * removes the message queue from message queue ID 275 * removes the message queue from message queue ID IDR, and cleans up all the
228 * array, and cleans up all the messages associated with this queue. 276 * messages associated with this queue.
229 * 277 *
230 * msg_ids.mutex and the spinlock for this message queue is hold 278 * msg_ids.rw_mutex (writer) and the spinlock for this message queue are held
231 * before freeque() is called. msg_ids.mutex remains locked on exit. 279 * before freeque() is called. msg_ids.rw_mutex remains locked on exit.
232 */ 280 */
233static void freeque(struct ipc_namespace *ns, struct msg_queue *msq, int id) 281static void freeque(struct ipc_namespace *ns, struct msg_queue *msq)
234{ 282{
235 struct list_head *tmp; 283 struct list_head *tmp;
236 284
237 expunge_all(msq, -EIDRM); 285 expunge_all(msq, -EIDRM);
238 ss_wakeup(&msq->q_senders, 1); 286 ss_wakeup(&msq->q_senders, 1);
239 msq = msg_rmid(ns, id); 287 msg_rmid(ns, msq);
240 msg_unlock(msq); 288 msg_unlock(msq);
241 289
242 tmp = msq->q_messages.next; 290 tmp = msq->q_messages.next;
@@ -244,49 +292,40 @@ static void freeque(struct ipc_namespace *ns, struct msg_queue *msq, int id)
244 struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list); 292 struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list);
245 293
246 tmp = tmp->next; 294 tmp = tmp->next;
247 atomic_dec(&msg_hdrs); 295 atomic_dec(&ns->msg_hdrs);
248 free_msg(msg); 296 free_msg(msg);
249 } 297 }
250 atomic_sub(msq->q_cbytes, &msg_bytes); 298 atomic_sub(msq->q_cbytes, &ns->msg_bytes);
251 security_msg_queue_free(msq); 299 security_msg_queue_free(msq);
252 ipc_rcu_putref(msq); 300 ipc_rcu_putref(msq);
253} 301}
254 302
303/*
304 * Called with msg_ids.rw_mutex and ipcp locked.
305 */
306static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg)
307{
308 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
309
310 return security_msg_queue_associate(msq, msgflg);
311}
312
255asmlinkage long sys_msgget(key_t key, int msgflg) 313asmlinkage long sys_msgget(key_t key, int msgflg)
256{ 314{
257 struct msg_queue *msq;
258 int id, ret = -EPERM;
259 struct ipc_namespace *ns; 315 struct ipc_namespace *ns;
316 struct ipc_ops msg_ops;
317 struct ipc_params msg_params;
260 318
261 ns = current->nsproxy->ipc_ns; 319 ns = current->nsproxy->ipc_ns;
262
263 mutex_lock(&msg_ids(ns).mutex);
264 if (key == IPC_PRIVATE)
265 ret = newque(ns, key, msgflg);
266 else if ((id = ipc_findkey(&msg_ids(ns), key)) == -1) { /* key not used */
267 if (!(msgflg & IPC_CREAT))
268 ret = -ENOENT;
269 else
270 ret = newque(ns, key, msgflg);
271 } else if (msgflg & IPC_CREAT && msgflg & IPC_EXCL) {
272 ret = -EEXIST;
273 } else {
274 msq = msg_lock(ns, id);
275 BUG_ON(msq == NULL);
276 if (ipcperms(&msq->q_perm, msgflg))
277 ret = -EACCES;
278 else {
279 int qid = msg_buildid(ns, id, msq->q_perm.seq);
280
281 ret = security_msg_queue_associate(msq, msgflg);
282 if (!ret)
283 ret = qid;
284 }
285 msg_unlock(msq);
286 }
287 mutex_unlock(&msg_ids(ns).mutex);
288 320
289 return ret; 321 msg_ops.getnew = newque;
322 msg_ops.associate = msg_security;
323 msg_ops.more_checks = NULL;
324
325 msg_params.key = key;
326 msg_params.flg = msgflg;
327
328 return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params);
290} 329}
291 330
292static inline unsigned long 331static inline unsigned long
@@ -420,23 +459,23 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
420 msginfo.msgmnb = ns->msg_ctlmnb; 459 msginfo.msgmnb = ns->msg_ctlmnb;
421 msginfo.msgssz = MSGSSZ; 460 msginfo.msgssz = MSGSSZ;
422 msginfo.msgseg = MSGSEG; 461 msginfo.msgseg = MSGSEG;
423 mutex_lock(&msg_ids(ns).mutex); 462 down_read(&msg_ids(ns).rw_mutex);
424 if (cmd == MSG_INFO) { 463 if (cmd == MSG_INFO) {
425 msginfo.msgpool = msg_ids(ns).in_use; 464 msginfo.msgpool = msg_ids(ns).in_use;
426 msginfo.msgmap = atomic_read(&msg_hdrs); 465 msginfo.msgmap = atomic_read(&ns->msg_hdrs);
427 msginfo.msgtql = atomic_read(&msg_bytes); 466 msginfo.msgtql = atomic_read(&ns->msg_bytes);
428 } else { 467 } else {
429 msginfo.msgmap = MSGMAP; 468 msginfo.msgmap = MSGMAP;
430 msginfo.msgpool = MSGPOOL; 469 msginfo.msgpool = MSGPOOL;
431 msginfo.msgtql = MSGTQL; 470 msginfo.msgtql = MSGTQL;
432 } 471 }
433 max_id = msg_ids(ns).max_id; 472 max_id = ipc_get_maxid(&msg_ids(ns));
434 mutex_unlock(&msg_ids(ns).mutex); 473 up_read(&msg_ids(ns).rw_mutex);
435 if (copy_to_user(buf, &msginfo, sizeof(struct msginfo))) 474 if (copy_to_user(buf, &msginfo, sizeof(struct msginfo)))
436 return -EFAULT; 475 return -EFAULT;
437 return (max_id < 0) ? 0 : max_id; 476 return (max_id < 0) ? 0 : max_id;
438 } 477 }
439 case MSG_STAT: 478 case MSG_STAT: /* msqid is an index rather than a msg queue id */
440 case IPC_STAT: 479 case IPC_STAT:
441 { 480 {
442 struct msqid64_ds tbuf; 481 struct msqid64_ds tbuf;
@@ -444,21 +483,16 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
444 483
445 if (!buf) 484 if (!buf)
446 return -EFAULT; 485 return -EFAULT;
447 if (cmd == MSG_STAT && msqid >= msg_ids(ns).entries->size)
448 return -EINVAL;
449
450 memset(&tbuf, 0, sizeof(tbuf));
451
452 msq = msg_lock(ns, msqid);
453 if (msq == NULL)
454 return -EINVAL;
455 486
456 if (cmd == MSG_STAT) { 487 if (cmd == MSG_STAT) {
457 success_return = msg_buildid(ns, msqid, msq->q_perm.seq); 488 msq = msg_lock(ns, msqid);
489 if (IS_ERR(msq))
490 return PTR_ERR(msq);
491 success_return = msq->q_perm.id;
458 } else { 492 } else {
459 err = -EIDRM; 493 msq = msg_lock_check(ns, msqid);
460 if (msg_checkid(ns, msq, msqid)) 494 if (IS_ERR(msq))
461 goto out_unlock; 495 return PTR_ERR(msq);
462 success_return = 0; 496 success_return = 0;
463 } 497 }
464 err = -EACCES; 498 err = -EACCES;
@@ -469,6 +503,8 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
469 if (err) 503 if (err)
470 goto out_unlock; 504 goto out_unlock;
471 505
506 memset(&tbuf, 0, sizeof(tbuf));
507
472 kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm); 508 kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm);
473 tbuf.msg_stime = msq->q_stime; 509 tbuf.msg_stime = msq->q_stime;
474 tbuf.msg_rtime = msq->q_rtime; 510 tbuf.msg_rtime = msq->q_rtime;
@@ -495,15 +531,13 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
495 return -EINVAL; 531 return -EINVAL;
496 } 532 }
497 533
498 mutex_lock(&msg_ids(ns).mutex); 534 down_write(&msg_ids(ns).rw_mutex);
499 msq = msg_lock(ns, msqid); 535 msq = msg_lock_check_down(ns, msqid);
500 err = -EINVAL; 536 if (IS_ERR(msq)) {
501 if (msq == NULL) 537 err = PTR_ERR(msq);
502 goto out_up; 538 goto out_up;
539 }
503 540
504 err = -EIDRM;
505 if (msg_checkid(ns, msq, msqid))
506 goto out_unlock_up;
507 ipcp = &msq->q_perm; 541 ipcp = &msq->q_perm;
508 542
509 err = audit_ipc_obj(ipcp); 543 err = audit_ipc_obj(ipcp);
@@ -552,12 +586,12 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
552 break; 586 break;
553 } 587 }
554 case IPC_RMID: 588 case IPC_RMID:
555 freeque(ns, msq, msqid); 589 freeque(ns, msq);
556 break; 590 break;
557 } 591 }
558 err = 0; 592 err = 0;
559out_up: 593out_up:
560 mutex_unlock(&msg_ids(ns).mutex); 594 up_write(&msg_ids(ns).rw_mutex);
561 return err; 595 return err;
562out_unlock_up: 596out_unlock_up:
563 msg_unlock(msq); 597 msg_unlock(msq);
@@ -611,7 +645,7 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
611 msr->r_msg = ERR_PTR(-E2BIG); 645 msr->r_msg = ERR_PTR(-E2BIG);
612 } else { 646 } else {
613 msr->r_msg = NULL; 647 msr->r_msg = NULL;
614 msq->q_lrpid = msr->r_tsk->pid; 648 msq->q_lrpid = task_pid_vnr(msr->r_tsk);
615 msq->q_rtime = get_seconds(); 649 msq->q_rtime = get_seconds();
616 wake_up_process(msr->r_tsk); 650 wake_up_process(msr->r_tsk);
617 smp_mb(); 651 smp_mb();
@@ -646,14 +680,11 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
646 msg->m_type = mtype; 680 msg->m_type = mtype;
647 msg->m_ts = msgsz; 681 msg->m_ts = msgsz;
648 682
649 msq = msg_lock(ns, msqid); 683 msq = msg_lock_check(ns, msqid);
650 err = -EINVAL; 684 if (IS_ERR(msq)) {
651 if (msq == NULL) 685 err = PTR_ERR(msq);
652 goto out_free; 686 goto out_free;
653 687 }
654 err= -EIDRM;
655 if (msg_checkid(ns, msq, msqid))
656 goto out_unlock_free;
657 688
658 for (;;) { 689 for (;;) {
659 struct msg_sender s; 690 struct msg_sender s;
@@ -695,7 +726,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
695 } 726 }
696 } 727 }
697 728
698 msq->q_lspid = current->tgid; 729 msq->q_lspid = task_tgid_vnr(current);
699 msq->q_stime = get_seconds(); 730 msq->q_stime = get_seconds();
700 731
701 if (!pipelined_send(msq, msg)) { 732 if (!pipelined_send(msq, msg)) {
@@ -703,8 +734,8 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
703 list_add_tail(&msg->m_list, &msq->q_messages); 734 list_add_tail(&msg->m_list, &msq->q_messages);
704 msq->q_cbytes += msgsz; 735 msq->q_cbytes += msgsz;
705 msq->q_qnum++; 736 msq->q_qnum++;
706 atomic_add(msgsz, &msg_bytes); 737 atomic_add(msgsz, &ns->msg_bytes);
707 atomic_inc(&msg_hdrs); 738 atomic_inc(&ns->msg_hdrs);
708 } 739 }
709 740
710 err = 0; 741 err = 0;
@@ -760,13 +791,9 @@ long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
760 mode = convert_mode(&msgtyp, msgflg); 791 mode = convert_mode(&msgtyp, msgflg);
761 ns = current->nsproxy->ipc_ns; 792 ns = current->nsproxy->ipc_ns;
762 793
763 msq = msg_lock(ns, msqid); 794 msq = msg_lock_check(ns, msqid);
764 if (msq == NULL) 795 if (IS_ERR(msq))
765 return -EINVAL; 796 return PTR_ERR(msq);
766
767 msg = ERR_PTR(-EIDRM);
768 if (msg_checkid(ns, msq, msqid))
769 goto out_unlock;
770 797
771 for (;;) { 798 for (;;) {
772 struct msg_receiver msr_d; 799 struct msg_receiver msr_d;
@@ -810,10 +837,10 @@ long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
810 list_del(&msg->m_list); 837 list_del(&msg->m_list);
811 msq->q_qnum--; 838 msq->q_qnum--;
812 msq->q_rtime = get_seconds(); 839 msq->q_rtime = get_seconds();
813 msq->q_lrpid = current->tgid; 840 msq->q_lrpid = task_tgid_vnr(current);
814 msq->q_cbytes -= msg->m_ts; 841 msq->q_cbytes -= msg->m_ts;
815 atomic_sub(msg->m_ts, &msg_bytes); 842 atomic_sub(msg->m_ts, &ns->msg_bytes);
816 atomic_dec(&msg_hdrs); 843 atomic_dec(&ns->msg_hdrs);
817 ss_wakeup(&msq->q_senders, 0); 844 ss_wakeup(&msq->q_senders, 0);
818 msg_unlock(msq); 845 msg_unlock(msq);
819 break; 846 break;
@@ -926,7 +953,7 @@ static int sysvipc_msg_proc_show(struct seq_file *s, void *it)
926 return seq_printf(s, 953 return seq_printf(s,
927 "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", 954 "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n",
928 msq->q_perm.key, 955 msq->q_perm.key,
929 msq->q_id, 956 msq->q_perm.id,
930 msq->q_perm.mode, 957 msq->q_perm.mode,
931 msq->q_cbytes, 958 msq->q_cbytes,
932 msq->q_qnum, 959 msq->q_qnum,
diff --git a/ipc/sem.c b/ipc/sem.c
index b676fef6d208..35952c0bae46 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -80,7 +80,7 @@
80#include <linux/audit.h> 80#include <linux/audit.h>
81#include <linux/capability.h> 81#include <linux/capability.h>
82#include <linux/seq_file.h> 82#include <linux/seq_file.h>
83#include <linux/mutex.h> 83#include <linux/rwsem.h>
84#include <linux/nsproxy.h> 84#include <linux/nsproxy.h>
85 85
86#include <asm/uaccess.h> 86#include <asm/uaccess.h>
@@ -88,18 +88,14 @@
88 88
89#define sem_ids(ns) (*((ns)->ids[IPC_SEM_IDS])) 89#define sem_ids(ns) (*((ns)->ids[IPC_SEM_IDS]))
90 90
91#define sem_lock(ns, id) ((struct sem_array*)ipc_lock(&sem_ids(ns), id))
92#define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm) 91#define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm)
93#define sem_rmid(ns, id) ((struct sem_array*)ipc_rmid(&sem_ids(ns), id)) 92#define sem_checkid(sma, semid) ipc_checkid(&sma->sem_perm, semid)
94#define sem_checkid(ns, sma, semid) \ 93#define sem_buildid(id, seq) ipc_buildid(id, seq)
95 ipc_checkid(&sem_ids(ns),&sma->sem_perm,semid)
96#define sem_buildid(ns, id, seq) \
97 ipc_buildid(&sem_ids(ns), id, seq)
98 94
99static struct ipc_ids init_sem_ids; 95static struct ipc_ids init_sem_ids;
100 96
101static int newary(struct ipc_namespace *, key_t, int, int); 97static int newary(struct ipc_namespace *, struct ipc_params *);
102static void freeary(struct ipc_namespace *ns, struct sem_array *sma, int id); 98static void freeary(struct ipc_namespace *, struct sem_array *);
103#ifdef CONFIG_PROC_FS 99#ifdef CONFIG_PROC_FS
104static int sysvipc_sem_proc_show(struct seq_file *s, void *it); 100static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
105#endif 101#endif
@@ -129,7 +125,7 @@ static void __sem_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
129 ns->sc_semopm = SEMOPM; 125 ns->sc_semopm = SEMOPM;
130 ns->sc_semmni = SEMMNI; 126 ns->sc_semmni = SEMMNI;
131 ns->used_sems = 0; 127 ns->used_sems = 0;
132 ipc_init_ids(ids, ns->sc_semmni); 128 ipc_init_ids(ids);
133} 129}
134 130
135int sem_init_ns(struct ipc_namespace *ns) 131int sem_init_ns(struct ipc_namespace *ns)
@@ -146,20 +142,24 @@ int sem_init_ns(struct ipc_namespace *ns)
146 142
147void sem_exit_ns(struct ipc_namespace *ns) 143void sem_exit_ns(struct ipc_namespace *ns)
148{ 144{
149 int i;
150 struct sem_array *sma; 145 struct sem_array *sma;
146 int next_id;
147 int total, in_use;
151 148
152 mutex_lock(&sem_ids(ns).mutex); 149 down_write(&sem_ids(ns).rw_mutex);
153 for (i = 0; i <= sem_ids(ns).max_id; i++) { 150
154 sma = sem_lock(ns, i); 151 in_use = sem_ids(ns).in_use;
152
153 for (total = 0, next_id = 0; total < in_use; next_id++) {
154 sma = idr_find(&sem_ids(ns).ipcs_idr, next_id);
155 if (sma == NULL) 155 if (sma == NULL)
156 continue; 156 continue;
157 157 ipc_lock_by_ptr(&sma->sem_perm);
158 freeary(ns, sma, i); 158 freeary(ns, sma);
159 total++;
159 } 160 }
160 mutex_unlock(&sem_ids(ns).mutex); 161 up_write(&sem_ids(ns).rw_mutex);
161 162
162 ipc_fini_ids(ns->ids[IPC_SEM_IDS]);
163 kfree(ns->ids[IPC_SEM_IDS]); 163 kfree(ns->ids[IPC_SEM_IDS]);
164 ns->ids[IPC_SEM_IDS] = NULL; 164 ns->ids[IPC_SEM_IDS] = NULL;
165} 165}
@@ -173,6 +173,42 @@ void __init sem_init (void)
173} 173}
174 174
175/* 175/*
176 * This routine is called in the paths where the rw_mutex is held to protect
177 * access to the idr tree.
178 */
179static inline struct sem_array *sem_lock_check_down(struct ipc_namespace *ns,
180 int id)
181{
182 struct kern_ipc_perm *ipcp = ipc_lock_check_down(&sem_ids(ns), id);
183
184 return container_of(ipcp, struct sem_array, sem_perm);
185}
186
187/*
188 * sem_lock_(check_) routines are called in the paths where the rw_mutex
189 * is not held.
190 */
191static inline struct sem_array *sem_lock(struct ipc_namespace *ns, int id)
192{
193 struct kern_ipc_perm *ipcp = ipc_lock(&sem_ids(ns), id);
194
195 return container_of(ipcp, struct sem_array, sem_perm);
196}
197
198static inline struct sem_array *sem_lock_check(struct ipc_namespace *ns,
199 int id)
200{
201 struct kern_ipc_perm *ipcp = ipc_lock_check(&sem_ids(ns), id);
202
203 return container_of(ipcp, struct sem_array, sem_perm);
204}
205
206static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
207{
208 ipc_rmid(&sem_ids(ns), &s->sem_perm);
209}
210
211/*
176 * Lockless wakeup algorithm: 212 * Lockless wakeup algorithm:
177 * Without the check/retry algorithm a lockless wakeup is possible: 213 * Without the check/retry algorithm a lockless wakeup is possible:
178 * - queue.status is initialized to -EINTR before blocking. 214 * - queue.status is initialized to -EINTR before blocking.
@@ -206,12 +242,23 @@ void __init sem_init (void)
206 */ 242 */
207#define IN_WAKEUP 1 243#define IN_WAKEUP 1
208 244
209static int newary (struct ipc_namespace *ns, key_t key, int nsems, int semflg) 245/**
246 * newary - Create a new semaphore set
247 * @ns: namespace
248 * @params: ptr to the structure that contains key, semflg and nsems
249 *
250 * Called with sem_ids.rw_mutex held (as a writer)
251 */
252
253static int newary(struct ipc_namespace *ns, struct ipc_params *params)
210{ 254{
211 int id; 255 int id;
212 int retval; 256 int retval;
213 struct sem_array *sma; 257 struct sem_array *sma;
214 int size; 258 int size;
259 key_t key = params->key;
260 int nsems = params->u.nsems;
261 int semflg = params->flg;
215 262
216 if (!nsems) 263 if (!nsems)
217 return -EINVAL; 264 return -EINVAL;
@@ -236,14 +283,14 @@ static int newary (struct ipc_namespace *ns, key_t key, int nsems, int semflg)
236 } 283 }
237 284
238 id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); 285 id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
239 if(id == -1) { 286 if (id < 0) {
240 security_sem_free(sma); 287 security_sem_free(sma);
241 ipc_rcu_putref(sma); 288 ipc_rcu_putref(sma);
242 return -ENOSPC; 289 return id;
243 } 290 }
244 ns->used_sems += nsems; 291 ns->used_sems += nsems;
245 292
246 sma->sem_id = sem_buildid(ns, id, sma->sem_perm.seq); 293 sma->sem_perm.id = sem_buildid(id, sma->sem_perm.seq);
247 sma->sem_base = (struct sem *) &sma[1]; 294 sma->sem_base = (struct sem *) &sma[1];
248 /* sma->sem_pending = NULL; */ 295 /* sma->sem_pending = NULL; */
249 sma->sem_pending_last = &sma->sem_pending; 296 sma->sem_pending_last = &sma->sem_pending;
@@ -252,48 +299,56 @@ static int newary (struct ipc_namespace *ns, key_t key, int nsems, int semflg)
252 sma->sem_ctime = get_seconds(); 299 sma->sem_ctime = get_seconds();
253 sem_unlock(sma); 300 sem_unlock(sma);
254 301
255 return sma->sem_id; 302 return sma->sem_perm.id;
303}
304
305
306/*
307 * Called with sem_ids.rw_mutex and ipcp locked.
308 */
309static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg)
310{
311 struct sem_array *sma;
312
313 sma = container_of(ipcp, struct sem_array, sem_perm);
314 return security_sem_associate(sma, semflg);
256} 315}
257 316
258asmlinkage long sys_semget (key_t key, int nsems, int semflg) 317/*
318 * Called with sem_ids.rw_mutex and ipcp locked.
319 */
320static inline int sem_more_checks(struct kern_ipc_perm *ipcp,
321 struct ipc_params *params)
259{ 322{
260 int id, err = -EINVAL;
261 struct sem_array *sma; 323 struct sem_array *sma;
324
325 sma = container_of(ipcp, struct sem_array, sem_perm);
326 if (params->u.nsems > sma->sem_nsems)
327 return -EINVAL;
328
329 return 0;
330}
331
332asmlinkage long sys_semget(key_t key, int nsems, int semflg)
333{
262 struct ipc_namespace *ns; 334 struct ipc_namespace *ns;
335 struct ipc_ops sem_ops;
336 struct ipc_params sem_params;
263 337
264 ns = current->nsproxy->ipc_ns; 338 ns = current->nsproxy->ipc_ns;
265 339
266 if (nsems < 0 || nsems > ns->sc_semmsl) 340 if (nsems < 0 || nsems > ns->sc_semmsl)
267 return -EINVAL; 341 return -EINVAL;
268 mutex_lock(&sem_ids(ns).mutex);
269
270 if (key == IPC_PRIVATE) {
271 err = newary(ns, key, nsems, semflg);
272 } else if ((id = ipc_findkey(&sem_ids(ns), key)) == -1) { /* key not used */
273 if (!(semflg & IPC_CREAT))
274 err = -ENOENT;
275 else
276 err = newary(ns, key, nsems, semflg);
277 } else if (semflg & IPC_CREAT && semflg & IPC_EXCL) {
278 err = -EEXIST;
279 } else {
280 sma = sem_lock(ns, id);
281 BUG_ON(sma==NULL);
282 if (nsems > sma->sem_nsems)
283 err = -EINVAL;
284 else if (ipcperms(&sma->sem_perm, semflg))
285 err = -EACCES;
286 else {
287 int semid = sem_buildid(ns, id, sma->sem_perm.seq);
288 err = security_sem_associate(sma, semflg);
289 if (!err)
290 err = semid;
291 }
292 sem_unlock(sma);
293 }
294 342
295 mutex_unlock(&sem_ids(ns).mutex); 343 sem_ops.getnew = newary;
296 return err; 344 sem_ops.associate = sem_security;
345 sem_ops.more_checks = sem_more_checks;
346
347 sem_params.key = key;
348 sem_params.flg = semflg;
349 sem_params.u.nsems = nsems;
350
351 return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
297} 352}
298 353
299/* Manage the doubly linked list sma->sem_pending as a FIFO: 354/* Manage the doubly linked list sma->sem_pending as a FIFO:
@@ -487,15 +542,14 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum)
487 return semzcnt; 542 return semzcnt;
488} 543}
489 544
490/* Free a semaphore set. freeary() is called with sem_ids.mutex locked and 545/* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked
491 * the spinlock for this semaphore set hold. sem_ids.mutex remains locked 546 * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex
492 * on exit. 547 * remains locked on exit.
493 */ 548 */
494static void freeary (struct ipc_namespace *ns, struct sem_array *sma, int id) 549static void freeary(struct ipc_namespace *ns, struct sem_array *sma)
495{ 550{
496 struct sem_undo *un; 551 struct sem_undo *un;
497 struct sem_queue *q; 552 struct sem_queue *q;
498 int size;
499 553
500 /* Invalidate the existing undo structures for this semaphore set. 554 /* Invalidate the existing undo structures for this semaphore set.
501 * (They will be freed without any further action in exit_sem() 555 * (They will be freed without any further action in exit_sem()
@@ -518,12 +572,11 @@ static void freeary (struct ipc_namespace *ns, struct sem_array *sma, int id)
518 q = n; 572 q = n;
519 } 573 }
520 574
521 /* Remove the semaphore set from the ID array*/ 575 /* Remove the semaphore set from the IDR */
522 sma = sem_rmid(ns, id); 576 sem_rmid(ns, sma);
523 sem_unlock(sma); 577 sem_unlock(sma);
524 578
525 ns->used_sems -= sma->sem_nsems; 579 ns->used_sems -= sma->sem_nsems;
526 size = sizeof (*sma) + sma->sem_nsems * sizeof (struct sem);
527 security_sem_free(sma); 580 security_sem_free(sma);
528 ipc_rcu_putref(sma); 581 ipc_rcu_putref(sma);
529} 582}
@@ -576,7 +629,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, int semnum,
576 seminfo.semmnu = SEMMNU; 629 seminfo.semmnu = SEMMNU;
577 seminfo.semmap = SEMMAP; 630 seminfo.semmap = SEMMAP;
578 seminfo.semume = SEMUME; 631 seminfo.semume = SEMUME;
579 mutex_lock(&sem_ids(ns).mutex); 632 down_read(&sem_ids(ns).rw_mutex);
580 if (cmd == SEM_INFO) { 633 if (cmd == SEM_INFO) {
581 seminfo.semusz = sem_ids(ns).in_use; 634 seminfo.semusz = sem_ids(ns).in_use;
582 seminfo.semaem = ns->used_sems; 635 seminfo.semaem = ns->used_sems;
@@ -584,8 +637,8 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, int semnum,
584 seminfo.semusz = SEMUSZ; 637 seminfo.semusz = SEMUSZ;
585 seminfo.semaem = SEMAEM; 638 seminfo.semaem = SEMAEM;
586 } 639 }
587 max_id = sem_ids(ns).max_id; 640 max_id = ipc_get_maxid(&sem_ids(ns));
588 mutex_unlock(&sem_ids(ns).mutex); 641 up_read(&sem_ids(ns).rw_mutex);
589 if (copy_to_user (arg.__buf, &seminfo, sizeof(struct seminfo))) 642 if (copy_to_user (arg.__buf, &seminfo, sizeof(struct seminfo)))
590 return -EFAULT; 643 return -EFAULT;
591 return (max_id < 0) ? 0: max_id; 644 return (max_id < 0) ? 0: max_id;
@@ -595,14 +648,9 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, int semnum,
595 struct semid64_ds tbuf; 648 struct semid64_ds tbuf;
596 int id; 649 int id;
597 650
598 if(semid >= sem_ids(ns).entries->size)
599 return -EINVAL;
600
601 memset(&tbuf,0,sizeof(tbuf));
602
603 sma = sem_lock(ns, semid); 651 sma = sem_lock(ns, semid);
604 if(sma == NULL) 652 if (IS_ERR(sma))
605 return -EINVAL; 653 return PTR_ERR(sma);
606 654
607 err = -EACCES; 655 err = -EACCES;
608 if (ipcperms (&sma->sem_perm, S_IRUGO)) 656 if (ipcperms (&sma->sem_perm, S_IRUGO))
@@ -612,7 +660,9 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, int semnum,
612 if (err) 660 if (err)
613 goto out_unlock; 661 goto out_unlock;
614 662
615 id = sem_buildid(ns, semid, sma->sem_perm.seq); 663 id = sma->sem_perm.id;
664
665 memset(&tbuf, 0, sizeof(tbuf));
616 666
617 kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); 667 kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm);
618 tbuf.sem_otime = sma->sem_otime; 668 tbuf.sem_otime = sma->sem_otime;
@@ -642,16 +692,12 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
642 ushort* sem_io = fast_sem_io; 692 ushort* sem_io = fast_sem_io;
643 int nsems; 693 int nsems;
644 694
645 sma = sem_lock(ns, semid); 695 sma = sem_lock_check(ns, semid);
646 if(sma==NULL) 696 if (IS_ERR(sma))
647 return -EINVAL; 697 return PTR_ERR(sma);
648 698
649 nsems = sma->sem_nsems; 699 nsems = sma->sem_nsems;
650 700
651 err=-EIDRM;
652 if (sem_checkid(ns,sma,semid))
653 goto out_unlock;
654
655 err = -EACCES; 701 err = -EACCES;
656 if (ipcperms (&sma->sem_perm, (cmd==SETVAL||cmd==SETALL)?S_IWUGO:S_IRUGO)) 702 if (ipcperms (&sma->sem_perm, (cmd==SETVAL||cmd==SETALL)?S_IWUGO:S_IRUGO))
657 goto out_unlock; 703 goto out_unlock;
@@ -795,7 +841,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
795 for (un = sma->undo; un; un = un->id_next) 841 for (un = sma->undo; un; un = un->id_next)
796 un->semadj[semnum] = 0; 842 un->semadj[semnum] = 0;
797 curr->semval = val; 843 curr->semval = val;
798 curr->sempid = current->tgid; 844 curr->sempid = task_tgid_vnr(current);
799 sma->sem_ctime = get_seconds(); 845 sma->sem_ctime = get_seconds();
800 /* maybe some queued-up processes were waiting for this */ 846 /* maybe some queued-up processes were waiting for this */
801 update_queue(sma); 847 update_queue(sma);
@@ -863,14 +909,10 @@ static int semctl_down(struct ipc_namespace *ns, int semid, int semnum,
863 if(copy_semid_from_user (&setbuf, arg.buf, version)) 909 if(copy_semid_from_user (&setbuf, arg.buf, version))
864 return -EFAULT; 910 return -EFAULT;
865 } 911 }
866 sma = sem_lock(ns, semid); 912 sma = sem_lock_check_down(ns, semid);
867 if(sma==NULL) 913 if (IS_ERR(sma))
868 return -EINVAL; 914 return PTR_ERR(sma);
869 915
870 if (sem_checkid(ns,sma,semid)) {
871 err=-EIDRM;
872 goto out_unlock;
873 }
874 ipcp = &sma->sem_perm; 916 ipcp = &sma->sem_perm;
875 917
876 err = audit_ipc_obj(ipcp); 918 err = audit_ipc_obj(ipcp);
@@ -894,7 +936,7 @@ static int semctl_down(struct ipc_namespace *ns, int semid, int semnum,
894 936
895 switch(cmd){ 937 switch(cmd){
896 case IPC_RMID: 938 case IPC_RMID:
897 freeary(ns, sma, semid); 939 freeary(ns, sma);
898 err = 0; 940 err = 0;
899 break; 941 break;
900 case IPC_SET: 942 case IPC_SET:
@@ -948,45 +990,15 @@ asmlinkage long sys_semctl (int semid, int semnum, int cmd, union semun arg)
948 return err; 990 return err;
949 case IPC_RMID: 991 case IPC_RMID:
950 case IPC_SET: 992 case IPC_SET:
951 mutex_lock(&sem_ids(ns).mutex); 993 down_write(&sem_ids(ns).rw_mutex);
952 err = semctl_down(ns,semid,semnum,cmd,version,arg); 994 err = semctl_down(ns,semid,semnum,cmd,version,arg);
953 mutex_unlock(&sem_ids(ns).mutex); 995 up_write(&sem_ids(ns).rw_mutex);
954 return err; 996 return err;
955 default: 997 default:
956 return -EINVAL; 998 return -EINVAL;
957 } 999 }
958} 1000}
959 1001
960static inline void lock_semundo(void)
961{
962 struct sem_undo_list *undo_list;
963
964 undo_list = current->sysvsem.undo_list;
965 if (undo_list)
966 spin_lock(&undo_list->lock);
967}
968
969/* This code has an interaction with copy_semundo().
970 * Consider; two tasks are sharing the undo_list. task1
971 * acquires the undo_list lock in lock_semundo(). If task2 now
972 * exits before task1 releases the lock (by calling
973 * unlock_semundo()), then task1 will never call spin_unlock().
974 * This leave the sem_undo_list in a locked state. If task1 now creats task3
975 * and once again shares the sem_undo_list, the sem_undo_list will still be
976 * locked, and future SEM_UNDO operations will deadlock. This case is
977 * dealt with in copy_semundo() by having it reinitialize the spin lock when
978 * the refcnt goes from 1 to 2.
979 */
980static inline void unlock_semundo(void)
981{
982 struct sem_undo_list *undo_list;
983
984 undo_list = current->sysvsem.undo_list;
985 if (undo_list)
986 spin_unlock(&undo_list->lock);
987}
988
989
990/* If the task doesn't already have a undo_list, then allocate one 1002/* If the task doesn't already have a undo_list, then allocate one
991 * here. We guarantee there is only one thread using this undo list, 1003 * here. We guarantee there is only one thread using this undo list,
992 * and current is THE ONE 1004 * and current is THE ONE
@@ -1047,22 +1059,17 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
1047 if (error) 1059 if (error)
1048 return ERR_PTR(error); 1060 return ERR_PTR(error);
1049 1061
1050 lock_semundo(); 1062 spin_lock(&ulp->lock);
1051 un = lookup_undo(ulp, semid); 1063 un = lookup_undo(ulp, semid);
1052 unlock_semundo(); 1064 spin_unlock(&ulp->lock);
1053 if (likely(un!=NULL)) 1065 if (likely(un!=NULL))
1054 goto out; 1066 goto out;
1055 1067
1056 /* no undo structure around - allocate one. */ 1068 /* no undo structure around - allocate one. */
1057 sma = sem_lock(ns, semid); 1069 sma = sem_lock_check(ns, semid);
1058 un = ERR_PTR(-EINVAL); 1070 if (IS_ERR(sma))
1059 if(sma==NULL) 1071 return ERR_PTR(PTR_ERR(sma));
1060 goto out; 1072
1061 un = ERR_PTR(-EIDRM);
1062 if (sem_checkid(ns,sma,semid)) {
1063 sem_unlock(sma);
1064 goto out;
1065 }
1066 nsems = sma->sem_nsems; 1073 nsems = sma->sem_nsems;
1067 ipc_rcu_getref(sma); 1074 ipc_rcu_getref(sma);
1068 sem_unlock(sma); 1075 sem_unlock(sma);
@@ -1077,10 +1084,10 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
1077 new->semadj = (short *) &new[1]; 1084 new->semadj = (short *) &new[1];
1078 new->semid = semid; 1085 new->semid = semid;
1079 1086
1080 lock_semundo(); 1087 spin_lock(&ulp->lock);
1081 un = lookup_undo(ulp, semid); 1088 un = lookup_undo(ulp, semid);
1082 if (un) { 1089 if (un) {
1083 unlock_semundo(); 1090 spin_unlock(&ulp->lock);
1084 kfree(new); 1091 kfree(new);
1085 ipc_lock_by_ptr(&sma->sem_perm); 1092 ipc_lock_by_ptr(&sma->sem_perm);
1086 ipc_rcu_putref(sma); 1093 ipc_rcu_putref(sma);
@@ -1091,7 +1098,7 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
1091 ipc_rcu_putref(sma); 1098 ipc_rcu_putref(sma);
1092 if (sma->sem_perm.deleted) { 1099 if (sma->sem_perm.deleted) {
1093 sem_unlock(sma); 1100 sem_unlock(sma);
1094 unlock_semundo(); 1101 spin_unlock(&ulp->lock);
1095 kfree(new); 1102 kfree(new);
1096 un = ERR_PTR(-EIDRM); 1103 un = ERR_PTR(-EIDRM);
1097 goto out; 1104 goto out;
@@ -1102,7 +1109,7 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
1102 sma->undo = new; 1109 sma->undo = new;
1103 sem_unlock(sma); 1110 sem_unlock(sma);
1104 un = new; 1111 un = new;
1105 unlock_semundo(); 1112 spin_unlock(&ulp->lock);
1106out: 1113out:
1107 return un; 1114 return un;
1108} 1115}
@@ -1168,15 +1175,14 @@ retry_undos:
1168 } else 1175 } else
1169 un = NULL; 1176 un = NULL;
1170 1177
1171 sma = sem_lock(ns, semid); 1178 sma = sem_lock_check(ns, semid);
1172 error=-EINVAL; 1179 if (IS_ERR(sma)) {
1173 if(sma==NULL) 1180 error = PTR_ERR(sma);
1174 goto out_free; 1181 goto out_free;
1175 error = -EIDRM; 1182 }
1176 if (sem_checkid(ns,sma,semid)) 1183
1177 goto out_unlock_free;
1178 /* 1184 /*
1179 * semid identifies are not unique - find_undo may have 1185 * semid identifiers are not unique - find_undo may have
1180 * allocated an undo structure, it was invalidated by an RMID 1186 * allocated an undo structure, it was invalidated by an RMID
1181 * and now a new array with received the same id. Check and retry. 1187 * and now a new array with received the same id. Check and retry.
1182 */ 1188 */
@@ -1196,7 +1202,7 @@ retry_undos:
1196 if (error) 1202 if (error)
1197 goto out_unlock_free; 1203 goto out_unlock_free;
1198 1204
1199 error = try_atomic_semop (sma, sops, nsops, un, current->tgid); 1205 error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current));
1200 if (error <= 0) { 1206 if (error <= 0) {
1201 if (alter && error == 0) 1207 if (alter && error == 0)
1202 update_queue (sma); 1208 update_queue (sma);
@@ -1211,7 +1217,7 @@ retry_undos:
1211 queue.sops = sops; 1217 queue.sops = sops;
1212 queue.nsops = nsops; 1218 queue.nsops = nsops;
1213 queue.undo = un; 1219 queue.undo = un;
1214 queue.pid = current->tgid; 1220 queue.pid = task_tgid_vnr(current);
1215 queue.id = semid; 1221 queue.id = semid;
1216 queue.alter = alter; 1222 queue.alter = alter;
1217 if (alter) 1223 if (alter)
@@ -1242,7 +1248,7 @@ retry_undos:
1242 } 1248 }
1243 1249
1244 sma = sem_lock(ns, semid); 1250 sma = sem_lock(ns, semid);
1245 if(sma==NULL) { 1251 if (IS_ERR(sma)) {
1246 BUG_ON(queue.prev != NULL); 1252 BUG_ON(queue.prev != NULL);
1247 error = -EIDRM; 1253 error = -EIDRM;
1248 goto out_free; 1254 goto out_free;
@@ -1279,10 +1285,6 @@ asmlinkage long sys_semop (int semid, struct sembuf __user *tsops, unsigned nsop
1279 1285
1280/* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between 1286/* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between
1281 * parent and child tasks. 1287 * parent and child tasks.
1282 *
1283 * See the notes above unlock_semundo() regarding the spin_lock_init()
1284 * in this code. Initialize the undo_list->lock here instead of get_undo_list()
1285 * because of the reasoning in the comment above unlock_semundo.
1286 */ 1288 */
1287 1289
1288int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) 1290int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
@@ -1342,13 +1344,13 @@ void exit_sem(struct task_struct *tsk)
1342 if(semid == -1) 1344 if(semid == -1)
1343 continue; 1345 continue;
1344 sma = sem_lock(ns, semid); 1346 sma = sem_lock(ns, semid);
1345 if (sma == NULL) 1347 if (IS_ERR(sma))
1346 continue; 1348 continue;
1347 1349
1348 if (u->semid == -1) 1350 if (u->semid == -1)
1349 goto next_entry; 1351 goto next_entry;
1350 1352
1351 BUG_ON(sem_checkid(ns,sma,u->semid)); 1353 BUG_ON(sem_checkid(sma, u->semid));
1352 1354
1353 /* remove u from the sma->undo list */ 1355 /* remove u from the sma->undo list */
1354 for (unp = &sma->undo; (un = *unp); unp = &un->id_next) { 1356 for (unp = &sma->undo; (un = *unp); unp = &un->id_next) {
@@ -1382,7 +1384,7 @@ found:
1382 semaphore->semval = 0; 1384 semaphore->semval = 0;
1383 if (semaphore->semval > SEMVMX) 1385 if (semaphore->semval > SEMVMX)
1384 semaphore->semval = SEMVMX; 1386 semaphore->semval = SEMVMX;
1385 semaphore->sempid = current->tgid; 1387 semaphore->sempid = task_tgid_vnr(current);
1386 } 1388 }
1387 } 1389 }
1388 sma->sem_otime = get_seconds(); 1390 sma->sem_otime = get_seconds();
@@ -1402,7 +1404,7 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
1402 return seq_printf(s, 1404 return seq_printf(s,
1403 "%10d %10d %4o %10lu %5u %5u %5u %5u %10lu %10lu\n", 1405 "%10d %10d %4o %10lu %5u %5u %5u %5u %10lu %10lu\n",
1404 sma->sem_perm.key, 1406 sma->sem_perm.key,
1405 sma->sem_id, 1407 sma->sem_perm.id,
1406 sma->sem_perm.mode, 1408 sma->sem_perm.mode,
1407 sma->sem_nsems, 1409 sma->sem_nsems,
1408 sma->sem_perm.uid, 1410 sma->sem_perm.uid,
diff --git a/ipc/shm.c b/ipc/shm.c
index 5fc5cf50cf1b..3818fae625c5 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -35,7 +35,7 @@
35#include <linux/capability.h> 35#include <linux/capability.h>
36#include <linux/ptrace.h> 36#include <linux/ptrace.h>
37#include <linux/seq_file.h> 37#include <linux/seq_file.h>
38#include <linux/mutex.h> 38#include <linux/rwsem.h>
39#include <linux/nsproxy.h> 39#include <linux/nsproxy.h>
40#include <linux/mount.h> 40#include <linux/mount.h>
41 41
@@ -59,17 +59,11 @@ static struct ipc_ids init_shm_ids;
59 59
60#define shm_ids(ns) (*((ns)->ids[IPC_SHM_IDS])) 60#define shm_ids(ns) (*((ns)->ids[IPC_SHM_IDS]))
61 61
62#define shm_lock(ns, id) \
63 ((struct shmid_kernel*)ipc_lock(&shm_ids(ns),id))
64#define shm_unlock(shp) \ 62#define shm_unlock(shp) \
65 ipc_unlock(&(shp)->shm_perm) 63 ipc_unlock(&(shp)->shm_perm)
66#define shm_get(ns, id) \ 64#define shm_buildid(id, seq) ipc_buildid(id, seq)
67 ((struct shmid_kernel*)ipc_get(&shm_ids(ns),id))
68#define shm_buildid(ns, id, seq) \
69 ipc_buildid(&shm_ids(ns), id, seq)
70 65
71static int newseg (struct ipc_namespace *ns, key_t key, 66static int newseg(struct ipc_namespace *, struct ipc_params *);
72 int shmflg, size_t size);
73static void shm_open(struct vm_area_struct *vma); 67static void shm_open(struct vm_area_struct *vma);
74static void shm_close(struct vm_area_struct *vma); 68static void shm_close(struct vm_area_struct *vma);
75static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp); 69static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp);
@@ -84,9 +78,13 @@ static void __shm_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
84 ns->shm_ctlall = SHMALL; 78 ns->shm_ctlall = SHMALL;
85 ns->shm_ctlmni = SHMMNI; 79 ns->shm_ctlmni = SHMMNI;
86 ns->shm_tot = 0; 80 ns->shm_tot = 0;
87 ipc_init_ids(ids, 1); 81 ipc_init_ids(ids);
88} 82}
89 83
84/*
85 * Called with shm_ids.rw_mutex (writer) and the shp structure locked.
86 * Only shm_ids.rw_mutex remains locked on exit.
87 */
90static void do_shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *shp) 88static void do_shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *shp)
91{ 89{
92 if (shp->shm_nattch){ 90 if (shp->shm_nattch){
@@ -112,20 +110,24 @@ int shm_init_ns(struct ipc_namespace *ns)
112 110
113void shm_exit_ns(struct ipc_namespace *ns) 111void shm_exit_ns(struct ipc_namespace *ns)
114{ 112{
115 int i;
116 struct shmid_kernel *shp; 113 struct shmid_kernel *shp;
114 int next_id;
115 int total, in_use;
116
117 down_write(&shm_ids(ns).rw_mutex);
117 118
118 mutex_lock(&shm_ids(ns).mutex); 119 in_use = shm_ids(ns).in_use;
119 for (i = 0; i <= shm_ids(ns).max_id; i++) { 120
120 shp = shm_lock(ns, i); 121 for (total = 0, next_id = 0; total < in_use; next_id++) {
122 shp = idr_find(&shm_ids(ns).ipcs_idr, next_id);
121 if (shp == NULL) 123 if (shp == NULL)
122 continue; 124 continue;
123 125 ipc_lock_by_ptr(&shp->shm_perm);
124 do_shm_rmid(ns, shp); 126 do_shm_rmid(ns, shp);
127 total++;
125 } 128 }
126 mutex_unlock(&shm_ids(ns).mutex); 129 up_write(&shm_ids(ns).rw_mutex);
127 130
128 ipc_fini_ids(ns->ids[IPC_SHM_IDS]);
129 kfree(ns->ids[IPC_SHM_IDS]); 131 kfree(ns->ids[IPC_SHM_IDS]);
130 ns->ids[IPC_SHM_IDS] = NULL; 132 ns->ids[IPC_SHM_IDS] = NULL;
131} 133}
@@ -138,17 +140,49 @@ void __init shm_init (void)
138 IPC_SHM_IDS, sysvipc_shm_proc_show); 140 IPC_SHM_IDS, sysvipc_shm_proc_show);
139} 141}
140 142
141static inline int shm_checkid(struct ipc_namespace *ns, 143/*
142 struct shmid_kernel *s, int id) 144 * shm_lock_(check_)down routines are called in the paths where the rw_mutex
145 * is held to protect access to the idr tree.
146 */
147static inline struct shmid_kernel *shm_lock_down(struct ipc_namespace *ns,
148 int id)
143{ 149{
144 if (ipc_checkid(&shm_ids(ns), &s->shm_perm, id)) 150 struct kern_ipc_perm *ipcp = ipc_lock_down(&shm_ids(ns), id);
145 return -EIDRM; 151
146 return 0; 152 return container_of(ipcp, struct shmid_kernel, shm_perm);
153}
154
155static inline struct shmid_kernel *shm_lock_check_down(
156 struct ipc_namespace *ns,
157 int id)
158{
159 struct kern_ipc_perm *ipcp = ipc_lock_check_down(&shm_ids(ns), id);
160
161 return container_of(ipcp, struct shmid_kernel, shm_perm);
162}
163
164/*
165 * shm_lock_(check_) routines are called in the paths where the rw_mutex
166 * is not held.
167 */
168static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
169{
170 struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
171
172 return container_of(ipcp, struct shmid_kernel, shm_perm);
173}
174
175static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns,
176 int id)
177{
178 struct kern_ipc_perm *ipcp = ipc_lock_check(&shm_ids(ns), id);
179
180 return container_of(ipcp, struct shmid_kernel, shm_perm);
147} 181}
148 182
149static inline struct shmid_kernel *shm_rmid(struct ipc_namespace *ns, int id) 183static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
150{ 184{
151 return (struct shmid_kernel *)ipc_rmid(&shm_ids(ns), id); 185 ipc_rmid(&shm_ids(ns), &s->shm_perm);
152} 186}
153 187
154static inline int shm_addid(struct ipc_namespace *ns, struct shmid_kernel *shp) 188static inline int shm_addid(struct ipc_namespace *ns, struct shmid_kernel *shp)
@@ -166,9 +200,9 @@ static void shm_open(struct vm_area_struct *vma)
166 struct shmid_kernel *shp; 200 struct shmid_kernel *shp;
167 201
168 shp = shm_lock(sfd->ns, sfd->id); 202 shp = shm_lock(sfd->ns, sfd->id);
169 BUG_ON(!shp); 203 BUG_ON(IS_ERR(shp));
170 shp->shm_atim = get_seconds(); 204 shp->shm_atim = get_seconds();
171 shp->shm_lprid = current->tgid; 205 shp->shm_lprid = task_tgid_vnr(current);
172 shp->shm_nattch++; 206 shp->shm_nattch++;
173 shm_unlock(shp); 207 shm_unlock(shp);
174} 208}
@@ -176,15 +210,16 @@ static void shm_open(struct vm_area_struct *vma)
176/* 210/*
177 * shm_destroy - free the struct shmid_kernel 211 * shm_destroy - free the struct shmid_kernel
178 * 212 *
213 * @ns: namespace
179 * @shp: struct to free 214 * @shp: struct to free
180 * 215 *
181 * It has to be called with shp and shm_ids.mutex locked, 216 * It has to be called with shp and shm_ids.rw_mutex (writer) locked,
182 * but returns with shp unlocked and freed. 217 * but returns with shp unlocked and freed.
183 */ 218 */
184static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) 219static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
185{ 220{
186 ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; 221 ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
187 shm_rmid(ns, shp->id); 222 shm_rmid(ns, shp);
188 shm_unlock(shp); 223 shm_unlock(shp);
189 if (!is_file_hugepages(shp->shm_file)) 224 if (!is_file_hugepages(shp->shm_file))
190 shmem_lock(shp->shm_file, 0, shp->mlock_user); 225 shmem_lock(shp->shm_file, 0, shp->mlock_user);
@@ -209,11 +244,11 @@ static void shm_close(struct vm_area_struct *vma)
209 struct shmid_kernel *shp; 244 struct shmid_kernel *shp;
210 struct ipc_namespace *ns = sfd->ns; 245 struct ipc_namespace *ns = sfd->ns;
211 246
212 mutex_lock(&shm_ids(ns).mutex); 247 down_write(&shm_ids(ns).rw_mutex);
213 /* remove from the list of attaches of the shm segment */ 248 /* remove from the list of attaches of the shm segment */
214 shp = shm_lock(ns, sfd->id); 249 shp = shm_lock_down(ns, sfd->id);
215 BUG_ON(!shp); 250 BUG_ON(IS_ERR(shp));
216 shp->shm_lprid = current->tgid; 251 shp->shm_lprid = task_tgid_vnr(current);
217 shp->shm_dtim = get_seconds(); 252 shp->shm_dtim = get_seconds();
218 shp->shm_nattch--; 253 shp->shm_nattch--;
219 if(shp->shm_nattch == 0 && 254 if(shp->shm_nattch == 0 &&
@@ -221,7 +256,7 @@ static void shm_close(struct vm_area_struct *vma)
221 shm_destroy(ns, shp); 256 shm_destroy(ns, shp);
222 else 257 else
223 shm_unlock(shp); 258 shm_unlock(shp);
224 mutex_unlock(&shm_ids(ns).mutex); 259 up_write(&shm_ids(ns).rw_mutex);
225} 260}
226 261
227static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 262static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -337,8 +372,19 @@ static struct vm_operations_struct shm_vm_ops = {
337#endif 372#endif
338}; 373};
339 374
340static int newseg (struct ipc_namespace *ns, key_t key, int shmflg, size_t size) 375/**
376 * newseg - Create a new shared memory segment
377 * @ns: namespace
378 * @params: ptr to the structure that contains key, size and shmflg
379 *
380 * Called with shm_ids.rw_mutex held as a writer.
381 */
382
383static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
341{ 384{
385 key_t key = params->key;
386 int shmflg = params->flg;
387 size_t size = params->u.size;
342 int error; 388 int error;
343 struct shmid_kernel *shp; 389 struct shmid_kernel *shp;
344 int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT; 390 int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
@@ -387,28 +433,30 @@ static int newseg (struct ipc_namespace *ns, key_t key, int shmflg, size_t size)
387 if (IS_ERR(file)) 433 if (IS_ERR(file))
388 goto no_file; 434 goto no_file;
389 435
390 error = -ENOSPC;
391 id = shm_addid(ns, shp); 436 id = shm_addid(ns, shp);
392 if(id == -1) 437 if (id < 0) {
438 error = id;
393 goto no_id; 439 goto no_id;
440 }
394 441
395 shp->shm_cprid = current->tgid; 442 shp->shm_cprid = task_tgid_vnr(current);
396 shp->shm_lprid = 0; 443 shp->shm_lprid = 0;
397 shp->shm_atim = shp->shm_dtim = 0; 444 shp->shm_atim = shp->shm_dtim = 0;
398 shp->shm_ctim = get_seconds(); 445 shp->shm_ctim = get_seconds();
399 shp->shm_segsz = size; 446 shp->shm_segsz = size;
400 shp->shm_nattch = 0; 447 shp->shm_nattch = 0;
401 shp->id = shm_buildid(ns, id, shp->shm_perm.seq); 448 shp->shm_perm.id = shm_buildid(id, shp->shm_perm.seq);
402 shp->shm_file = file; 449 shp->shm_file = file;
403 /* 450 /*
404 * shmid gets reported as "inode#" in /proc/pid/maps. 451 * shmid gets reported as "inode#" in /proc/pid/maps.
405 * proc-ps tools use this. Changing this will break them. 452 * proc-ps tools use this. Changing this will break them.
406 */ 453 */
407 file->f_dentry->d_inode->i_ino = shp->id; 454 file->f_dentry->d_inode->i_ino = shp->shm_perm.id;
408 455
409 ns->shm_tot += numpages; 456 ns->shm_tot += numpages;
457 error = shp->shm_perm.id;
410 shm_unlock(shp); 458 shm_unlock(shp);
411 return shp->id; 459 return error;
412 460
413no_id: 461no_id:
414 fput(file); 462 fput(file);
@@ -418,42 +466,49 @@ no_file:
418 return error; 466 return error;
419} 467}
420 468
421asmlinkage long sys_shmget (key_t key, size_t size, int shmflg) 469/*
470 * Called with shm_ids.rw_mutex and ipcp locked.
471 */
472static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg)
422{ 473{
423 struct shmid_kernel *shp; 474 struct shmid_kernel *shp;
424 int err, id = 0; 475
476 shp = container_of(ipcp, struct shmid_kernel, shm_perm);
477 return security_shm_associate(shp, shmflg);
478}
479
480/*
481 * Called with shm_ids.rw_mutex and ipcp locked.
482 */
483static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
484 struct ipc_params *params)
485{
486 struct shmid_kernel *shp;
487
488 shp = container_of(ipcp, struct shmid_kernel, shm_perm);
489 if (shp->shm_segsz < params->u.size)
490 return -EINVAL;
491
492 return 0;
493}
494
495asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
496{
425 struct ipc_namespace *ns; 497 struct ipc_namespace *ns;
498 struct ipc_ops shm_ops;
499 struct ipc_params shm_params;
426 500
427 ns = current->nsproxy->ipc_ns; 501 ns = current->nsproxy->ipc_ns;
428 502
429 mutex_lock(&shm_ids(ns).mutex); 503 shm_ops.getnew = newseg;
430 if (key == IPC_PRIVATE) { 504 shm_ops.associate = shm_security;
431 err = newseg(ns, key, shmflg, size); 505 shm_ops.more_checks = shm_more_checks;
432 } else if ((id = ipc_findkey(&shm_ids(ns), key)) == -1) {
433 if (!(shmflg & IPC_CREAT))
434 err = -ENOENT;
435 else
436 err = newseg(ns, key, shmflg, size);
437 } else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
438 err = -EEXIST;
439 } else {
440 shp = shm_lock(ns, id);
441 BUG_ON(shp==NULL);
442 if (shp->shm_segsz < size)
443 err = -EINVAL;
444 else if (ipcperms(&shp->shm_perm, shmflg))
445 err = -EACCES;
446 else {
447 int shmid = shm_buildid(ns, id, shp->shm_perm.seq);
448 err = security_shm_associate(shp, shmflg);
449 if (!err)
450 err = shmid;
451 }
452 shm_unlock(shp);
453 }
454 mutex_unlock(&shm_ids(ns).mutex);
455 506
456 return err; 507 shm_params.key = key;
508 shm_params.flg = shmflg;
509 shm_params.u.size = size;
510
511 return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
457} 512}
458 513
459static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version) 514static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
@@ -547,20 +602,26 @@ static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminf
547 } 602 }
548} 603}
549 604
605/*
606 * Called with shm_ids.rw_mutex held as a reader
607 */
550static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, 608static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
551 unsigned long *swp) 609 unsigned long *swp)
552{ 610{
553 int i; 611 int next_id;
612 int total, in_use;
554 613
555 *rss = 0; 614 *rss = 0;
556 *swp = 0; 615 *swp = 0;
557 616
558 for (i = 0; i <= shm_ids(ns).max_id; i++) { 617 in_use = shm_ids(ns).in_use;
618
619 for (total = 0, next_id = 0; total < in_use; next_id++) {
559 struct shmid_kernel *shp; 620 struct shmid_kernel *shp;
560 struct inode *inode; 621 struct inode *inode;
561 622
562 shp = shm_get(ns, i); 623 shp = idr_find(&shm_ids(ns).ipcs_idr, next_id);
563 if(!shp) 624 if (shp == NULL)
564 continue; 625 continue;
565 626
566 inode = shp->shm_file->f_path.dentry->d_inode; 627 inode = shp->shm_file->f_path.dentry->d_inode;
@@ -575,6 +636,8 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
575 *swp += info->swapped; 636 *swp += info->swapped;
576 spin_unlock(&info->lock); 637 spin_unlock(&info->lock);
577 } 638 }
639
640 total++;
578 } 641 }
579} 642}
580 643
@@ -610,8 +673,11 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
610 shminfo.shmmin = SHMMIN; 673 shminfo.shmmin = SHMMIN;
611 if(copy_shminfo_to_user (buf, &shminfo, version)) 674 if(copy_shminfo_to_user (buf, &shminfo, version))
612 return -EFAULT; 675 return -EFAULT;
613 /* reading a integer is always atomic */ 676
614 err= shm_ids(ns).max_id; 677 down_read(&shm_ids(ns).rw_mutex);
678 err = ipc_get_maxid(&shm_ids(ns));
679 up_read(&shm_ids(ns).rw_mutex);
680
615 if(err<0) 681 if(err<0)
616 err = 0; 682 err = 0;
617 goto out; 683 goto out;
@@ -625,14 +691,14 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
625 return err; 691 return err;
626 692
627 memset(&shm_info,0,sizeof(shm_info)); 693 memset(&shm_info,0,sizeof(shm_info));
628 mutex_lock(&shm_ids(ns).mutex); 694 down_read(&shm_ids(ns).rw_mutex);
629 shm_info.used_ids = shm_ids(ns).in_use; 695 shm_info.used_ids = shm_ids(ns).in_use;
630 shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp); 696 shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp);
631 shm_info.shm_tot = ns->shm_tot; 697 shm_info.shm_tot = ns->shm_tot;
632 shm_info.swap_attempts = 0; 698 shm_info.swap_attempts = 0;
633 shm_info.swap_successes = 0; 699 shm_info.swap_successes = 0;
634 err = shm_ids(ns).max_id; 700 err = ipc_get_maxid(&shm_ids(ns));
635 mutex_unlock(&shm_ids(ns).mutex); 701 up_read(&shm_ids(ns).rw_mutex);
636 if(copy_to_user (buf, &shm_info, sizeof(shm_info))) { 702 if(copy_to_user (buf, &shm_info, sizeof(shm_info))) {
637 err = -EFAULT; 703 err = -EFAULT;
638 goto out; 704 goto out;
@@ -646,20 +712,25 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
646 { 712 {
647 struct shmid64_ds tbuf; 713 struct shmid64_ds tbuf;
648 int result; 714 int result;
649 memset(&tbuf, 0, sizeof(tbuf)); 715
650 shp = shm_lock(ns, shmid); 716 if (!buf) {
651 if(shp==NULL) { 717 err = -EFAULT;
652 err = -EINVAL;
653 goto out; 718 goto out;
654 } else if(cmd==SHM_STAT) { 719 }
655 err = -EINVAL; 720
656 if (shmid > shm_ids(ns).max_id) 721 if (cmd == SHM_STAT) {
657 goto out_unlock; 722 shp = shm_lock(ns, shmid);
658 result = shm_buildid(ns, shmid, shp->shm_perm.seq); 723 if (IS_ERR(shp)) {
724 err = PTR_ERR(shp);
725 goto out;
726 }
727 result = shp->shm_perm.id;
659 } else { 728 } else {
660 err = shm_checkid(ns, shp,shmid); 729 shp = shm_lock_check(ns, shmid);
661 if(err) 730 if (IS_ERR(shp)) {
662 goto out_unlock; 731 err = PTR_ERR(shp);
732 goto out;
733 }
663 result = 0; 734 result = 0;
664 } 735 }
665 err=-EACCES; 736 err=-EACCES;
@@ -668,6 +739,7 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
668 err = security_shm_shmctl(shp, cmd); 739 err = security_shm_shmctl(shp, cmd);
669 if (err) 740 if (err)
670 goto out_unlock; 741 goto out_unlock;
742 memset(&tbuf, 0, sizeof(tbuf));
671 kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm); 743 kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
672 tbuf.shm_segsz = shp->shm_segsz; 744 tbuf.shm_segsz = shp->shm_segsz;
673 tbuf.shm_atime = shp->shm_atim; 745 tbuf.shm_atime = shp->shm_atim;
@@ -686,14 +758,11 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
686 case SHM_LOCK: 758 case SHM_LOCK:
687 case SHM_UNLOCK: 759 case SHM_UNLOCK:
688 { 760 {
689 shp = shm_lock(ns, shmid); 761 shp = shm_lock_check(ns, shmid);
690 if(shp==NULL) { 762 if (IS_ERR(shp)) {
691 err = -EINVAL; 763 err = PTR_ERR(shp);
692 goto out; 764 goto out;
693 } 765 }
694 err = shm_checkid(ns, shp,shmid);
695 if(err)
696 goto out_unlock;
697 766
698 err = audit_ipc_obj(&(shp->shm_perm)); 767 err = audit_ipc_obj(&(shp->shm_perm));
699 if (err) 768 if (err)
@@ -742,14 +811,12 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
742 * Instead we set a destroyed flag, and then blow 811 * Instead we set a destroyed flag, and then blow
743 * the name away when the usage hits zero. 812 * the name away when the usage hits zero.
744 */ 813 */
745 mutex_lock(&shm_ids(ns).mutex); 814 down_write(&shm_ids(ns).rw_mutex);
746 shp = shm_lock(ns, shmid); 815 shp = shm_lock_check_down(ns, shmid);
747 err = -EINVAL; 816 if (IS_ERR(shp)) {
748 if (shp == NULL) 817 err = PTR_ERR(shp);
749 goto out_up; 818 goto out_up;
750 err = shm_checkid(ns, shp, shmid); 819 }
751 if(err)
752 goto out_unlock_up;
753 820
754 err = audit_ipc_obj(&(shp->shm_perm)); 821 err = audit_ipc_obj(&(shp->shm_perm));
755 if (err) 822 if (err)
@@ -767,24 +834,27 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
767 goto out_unlock_up; 834 goto out_unlock_up;
768 835
769 do_shm_rmid(ns, shp); 836 do_shm_rmid(ns, shp);
770 mutex_unlock(&shm_ids(ns).mutex); 837 up_write(&shm_ids(ns).rw_mutex);
771 goto out; 838 goto out;
772 } 839 }
773 840
774 case IPC_SET: 841 case IPC_SET:
775 { 842 {
843 if (!buf) {
844 err = -EFAULT;
845 goto out;
846 }
847
776 if (copy_shmid_from_user (&setbuf, buf, version)) { 848 if (copy_shmid_from_user (&setbuf, buf, version)) {
777 err = -EFAULT; 849 err = -EFAULT;
778 goto out; 850 goto out;
779 } 851 }
780 mutex_lock(&shm_ids(ns).mutex); 852 down_write(&shm_ids(ns).rw_mutex);
781 shp = shm_lock(ns, shmid); 853 shp = shm_lock_check_down(ns, shmid);
782 err=-EINVAL; 854 if (IS_ERR(shp)) {
783 if(shp==NULL) 855 err = PTR_ERR(shp);
784 goto out_up; 856 goto out_up;
785 err = shm_checkid(ns, shp,shmid); 857 }
786 if(err)
787 goto out_unlock_up;
788 err = audit_ipc_obj(&(shp->shm_perm)); 858 err = audit_ipc_obj(&(shp->shm_perm));
789 if (err) 859 if (err)
790 goto out_unlock_up; 860 goto out_unlock_up;
@@ -819,7 +889,7 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
819out_unlock_up: 889out_unlock_up:
820 shm_unlock(shp); 890 shm_unlock(shp);
821out_up: 891out_up:
822 mutex_unlock(&shm_ids(ns).mutex); 892 up_write(&shm_ids(ns).rw_mutex);
823 goto out; 893 goto out;
824out_unlock: 894out_unlock:
825 shm_unlock(shp); 895 shm_unlock(shp);
@@ -890,13 +960,11 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
890 * additional creator id... 960 * additional creator id...
891 */ 961 */
892 ns = current->nsproxy->ipc_ns; 962 ns = current->nsproxy->ipc_ns;
893 shp = shm_lock(ns, shmid); 963 shp = shm_lock_check(ns, shmid);
894 if(shp == NULL) 964 if (IS_ERR(shp)) {
965 err = PTR_ERR(shp);
895 goto out; 966 goto out;
896 967 }
897 err = shm_checkid(ns, shp,shmid);
898 if (err)
899 goto out_unlock;
900 968
901 err = -EACCES; 969 err = -EACCES;
902 if (ipcperms(&shp->shm_perm, acc_mode)) 970 if (ipcperms(&shp->shm_perm, acc_mode))
@@ -925,7 +993,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
925 993
926 file->private_data = sfd; 994 file->private_data = sfd;
927 file->f_mapping = shp->shm_file->f_mapping; 995 file->f_mapping = shp->shm_file->f_mapping;
928 sfd->id = shp->id; 996 sfd->id = shp->shm_perm.id;
929 sfd->ns = get_ipc_ns(ns); 997 sfd->ns = get_ipc_ns(ns);
930 sfd->file = shp->shm_file; 998 sfd->file = shp->shm_file;
931 sfd->vm_ops = NULL; 999 sfd->vm_ops = NULL;
@@ -955,16 +1023,16 @@ invalid:
955 fput(file); 1023 fput(file);
956 1024
957out_nattch: 1025out_nattch:
958 mutex_lock(&shm_ids(ns).mutex); 1026 down_write(&shm_ids(ns).rw_mutex);
959 shp = shm_lock(ns, shmid); 1027 shp = shm_lock_down(ns, shmid);
960 BUG_ON(!shp); 1028 BUG_ON(IS_ERR(shp));
961 shp->shm_nattch--; 1029 shp->shm_nattch--;
962 if(shp->shm_nattch == 0 && 1030 if(shp->shm_nattch == 0 &&
963 shp->shm_perm.mode & SHM_DEST) 1031 shp->shm_perm.mode & SHM_DEST)
964 shm_destroy(ns, shp); 1032 shm_destroy(ns, shp);
965 else 1033 else
966 shm_unlock(shp); 1034 shm_unlock(shp);
967 mutex_unlock(&shm_ids(ns).mutex); 1035 up_write(&shm_ids(ns).rw_mutex);
968 1036
969out: 1037out:
970 return err; 1038 return err;
@@ -1094,7 +1162,7 @@ static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1094 format = BIG_STRING; 1162 format = BIG_STRING;
1095 return seq_printf(s, format, 1163 return seq_printf(s, format,
1096 shp->shm_perm.key, 1164 shp->shm_perm.key,
1097 shp->id, 1165 shp->shm_perm.id,
1098 shp->shm_perm.mode, 1166 shp->shm_perm.mode,
1099 shp->shm_segsz, 1167 shp->shm_segsz,
1100 shp->shm_cprid, 1168 shp->shm_cprid,
diff --git a/ipc/util.c b/ipc/util.c
index 44e5135aee47..1aa0ebf71bac 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -32,6 +32,7 @@
32#include <linux/proc_fs.h> 32#include <linux/proc_fs.h>
33#include <linux/audit.h> 33#include <linux/audit.h>
34#include <linux/nsproxy.h> 34#include <linux/nsproxy.h>
35#include <linux/rwsem.h>
35 36
36#include <asm/unistd.h> 37#include <asm/unistd.h>
37 38
@@ -129,23 +130,16 @@ __initcall(ipc_init);
129/** 130/**
130 * ipc_init_ids - initialise IPC identifiers 131 * ipc_init_ids - initialise IPC identifiers
131 * @ids: Identifier set 132 * @ids: Identifier set
132 * @size: Number of identifiers
133 * 133 *
134 * Given a size for the ipc identifier range (limited below IPCMNI) 134 * Set up the sequence range to use for the ipc identifier range (limited
135 * set up the sequence range to use then allocate and initialise the 135 * below IPCMNI) then initialise the ids idr.
136 * array itself.
137 */ 136 */
138 137
139void ipc_init_ids(struct ipc_ids* ids, int size) 138void ipc_init_ids(struct ipc_ids *ids)
140{ 139{
141 int i; 140 init_rwsem(&ids->rw_mutex);
142 141
143 mutex_init(&ids->mutex);
144
145 if(size > IPCMNI)
146 size = IPCMNI;
147 ids->in_use = 0; 142 ids->in_use = 0;
148 ids->max_id = -1;
149 ids->seq = 0; 143 ids->seq = 0;
150 { 144 {
151 int seq_limit = INT_MAX/SEQ_MULTIPLIER; 145 int seq_limit = INT_MAX/SEQ_MULTIPLIER;
@@ -155,17 +149,7 @@ void ipc_init_ids(struct ipc_ids* ids, int size)
155 ids->seq_max = seq_limit; 149 ids->seq_max = seq_limit;
156 } 150 }
157 151
158 ids->entries = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*size + 152 idr_init(&ids->ipcs_idr);
159 sizeof(struct ipc_id_ary));
160
161 if(ids->entries == NULL) {
162 printk(KERN_ERR "ipc_init_ids() failed, ipc service disabled.\n");
163 size = 0;
164 ids->entries = &ids->nullentry;
165 }
166 ids->entries->size = size;
167 for(i=0;i<size;i++)
168 ids->entries->p[i] = NULL;
169} 153}
170 154
171#ifdef CONFIG_PROC_FS 155#ifdef CONFIG_PROC_FS
@@ -208,99 +192,96 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
208 * @ids: Identifier set 192 * @ids: Identifier set
209 * @key: The key to find 193 * @key: The key to find
210 * 194 *
211 * Requires ipc_ids.mutex locked. 195 * Requires ipc_ids.rw_mutex locked.
212 * Returns the identifier if found or -1 if not. 196 * Returns the LOCKED pointer to the ipc structure if found or NULL
197 * if not.
198 * If key is found ipc points to the owning ipc structure
213 */ 199 */
214 200
215int ipc_findkey(struct ipc_ids* ids, key_t key) 201static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
216{ 202{
217 int id; 203 struct kern_ipc_perm *ipc;
218 struct kern_ipc_perm* p; 204 int next_id;
219 int max_id = ids->max_id; 205 int total;
220 206
221 /* 207 for (total = 0, next_id = 0; total < ids->in_use; next_id++) {
222 * rcu_dereference() is not needed here 208 ipc = idr_find(&ids->ipcs_idr, next_id);
223 * since ipc_ids.mutex is held 209
224 */ 210 if (ipc == NULL)
225 for (id = 0; id <= max_id; id++) { 211 continue;
226 p = ids->entries->p[id]; 212
227 if(p==NULL) 213 if (ipc->key != key) {
214 total++;
228 continue; 215 continue;
229 if (key == p->key) 216 }
230 return id; 217
218 ipc_lock_by_ptr(ipc);
219 return ipc;
231 } 220 }
232 return -1; 221
222 return NULL;
233} 223}
234 224
235/* 225/**
236 * Requires ipc_ids.mutex locked 226 * ipc_get_maxid - get the last assigned id
227 * @ids: IPC identifier set
228 *
229 * Called with ipc_ids.rw_mutex held.
237 */ 230 */
238static int grow_ary(struct ipc_ids* ids, int newsize)
239{
240 struct ipc_id_ary* new;
241 struct ipc_id_ary* old;
242 int i;
243 int size = ids->entries->size;
244
245 if(newsize > IPCMNI)
246 newsize = IPCMNI;
247 if(newsize <= size)
248 return newsize;
249
250 new = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*newsize +
251 sizeof(struct ipc_id_ary));
252 if(new == NULL)
253 return size;
254 new->size = newsize;
255 memcpy(new->p, ids->entries->p, sizeof(struct kern_ipc_perm *)*size);
256 for(i=size;i<newsize;i++) {
257 new->p[i] = NULL;
258 }
259 old = ids->entries;
260 231
261 /* 232int ipc_get_maxid(struct ipc_ids *ids)
262 * Use rcu_assign_pointer() to make sure the memcpyed contents 233{
263 * of the new array are visible before the new array becomes visible. 234 struct kern_ipc_perm *ipc;
264 */ 235 int max_id = -1;
265 rcu_assign_pointer(ids->entries, new); 236 int total, id;
237
238 if (ids->in_use == 0)
239 return -1;
266 240
267 __ipc_fini_ids(ids, old); 241 if (ids->in_use == IPCMNI)
268 return newsize; 242 return IPCMNI - 1;
243
244 /* Look for the last assigned id */
245 total = 0;
246 for (id = 0; id < IPCMNI && total < ids->in_use; id++) {
247 ipc = idr_find(&ids->ipcs_idr, id);
248 if (ipc != NULL) {
249 max_id = id;
250 total++;
251 }
252 }
253 return max_id;
269} 254}
270 255
271/** 256/**
272 * ipc_addid - add an IPC identifier 257 * ipc_addid - add an IPC identifier
273 * @ids: IPC identifier set 258 * @ids: IPC identifier set
274 * @new: new IPC permission set 259 * @new: new IPC permission set
275 * @size: new size limit for the id array 260 * @size: limit for the number of used ids
276 * 261 *
277 * Add an entry 'new' to the IPC arrays. The permissions object is 262 * Add an entry 'new' to the IPC ids idr. The permissions object is
278 * initialised and the first free entry is set up and the id assigned 263 * initialised and the first free entry is set up and the id assigned
279 * is returned. The list is returned in a locked state on success. 264 * is returned. The 'new' entry is returned in a locked state on success.
280 * On failure the list is not locked and -1 is returned. 265 * On failure the entry is not locked and a negative err-code is returned.
281 * 266 *
282 * Called with ipc_ids.mutex held. 267 * Called with ipc_ids.rw_mutex held as a writer.
283 */ 268 */
284 269
285int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) 270int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
286{ 271{
287 int id; 272 int id, err;
288 273
289 size = grow_ary(ids,size); 274 if (size > IPCMNI)
275 size = IPCMNI;
276
277 if (ids->in_use >= size)
278 return -ENOSPC;
279
280 err = idr_get_new(&ids->ipcs_idr, new, &id);
281 if (err)
282 return err;
290 283
291 /*
292 * rcu_dereference()() is not needed here since
293 * ipc_ids.mutex is held
294 */
295 for (id = 0; id < size; id++) {
296 if(ids->entries->p[id] == NULL)
297 goto found;
298 }
299 return -1;
300found:
301 ids->in_use++; 284 ids->in_use++;
302 if (id > ids->max_id)
303 ids->max_id = id;
304 285
305 new->cuid = new->uid = current->euid; 286 new->cuid = new->uid = current->euid;
306 new->gid = new->cgid = current->egid; 287 new->gid = new->cgid = current->egid;
@@ -313,48 +294,153 @@ found:
313 new->deleted = 0; 294 new->deleted = 0;
314 rcu_read_lock(); 295 rcu_read_lock();
315 spin_lock(&new->lock); 296 spin_lock(&new->lock);
316 ids->entries->p[id] = new;
317 return id; 297 return id;
318} 298}
319 299
320/** 300/**
301 * ipcget_new - create a new ipc object
302 * @ns: namespace
303 * @ids: IPC identifer set
304 * @ops: the actual creation routine to call
305 * @params: its parameters
306 *
307 * This routine is called by sys_msgget, sys_semget() and sys_shmget()
308 * when the key is IPC_PRIVATE.
309 */
310int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids,
311 struct ipc_ops *ops, struct ipc_params *params)
312{
313 int err;
314retry:
315 err = idr_pre_get(&ids->ipcs_idr, GFP_KERNEL);
316
317 if (!err)
318 return -ENOMEM;
319
320 down_write(&ids->rw_mutex);
321 err = ops->getnew(ns, params);
322 up_write(&ids->rw_mutex);
323
324 if (err == -EAGAIN)
325 goto retry;
326
327 return err;
328}
329
330/**
331 * ipc_check_perms - check security and permissions for an IPC
332 * @ipcp: ipc permission set
333 * @ops: the actual security routine to call
334 * @params: its parameters
335 *
336 * This routine is called by sys_msgget(), sys_semget() and sys_shmget()
337 * when the key is not IPC_PRIVATE and that key already exists in the
338 * ids IDR.
339 *
340 * On success, the IPC id is returned.
341 *
342 * It is called with ipc_ids.rw_mutex and ipcp->lock held.
343 */
344static int ipc_check_perms(struct kern_ipc_perm *ipcp, struct ipc_ops *ops,
345 struct ipc_params *params)
346{
347 int err;
348
349 if (ipcperms(ipcp, params->flg))
350 err = -EACCES;
351 else {
352 err = ops->associate(ipcp, params->flg);
353 if (!err)
354 err = ipcp->id;
355 }
356
357 return err;
358}
359
360/**
361 * ipcget_public - get an ipc object or create a new one
362 * @ns: namespace
363 * @ids: IPC identifer set
364 * @ops: the actual creation routine to call
365 * @params: its parameters
366 *
367 * This routine is called by sys_msgget, sys_semget() and sys_shmget()
368 * when the key is not IPC_PRIVATE.
369 * It adds a new entry if the key is not found and does some permission
370 * / security checkings if the key is found.
371 *
372 * On success, the ipc id is returned.
373 */
374int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids,
375 struct ipc_ops *ops, struct ipc_params *params)
376{
377 struct kern_ipc_perm *ipcp;
378 int flg = params->flg;
379 int err;
380retry:
381 err = idr_pre_get(&ids->ipcs_idr, GFP_KERNEL);
382
383 /*
384 * Take the lock as a writer since we are potentially going to add
385 * a new entry + read locks are not "upgradable"
386 */
387 down_write(&ids->rw_mutex);
388 ipcp = ipc_findkey(ids, params->key);
389 if (ipcp == NULL) {
390 /* key not used */
391 if (!(flg & IPC_CREAT))
392 err = -ENOENT;
393 else if (!err)
394 err = -ENOMEM;
395 else
396 err = ops->getnew(ns, params);
397 } else {
398 /* ipc object has been locked by ipc_findkey() */
399
400 if (flg & IPC_CREAT && flg & IPC_EXCL)
401 err = -EEXIST;
402 else {
403 err = 0;
404 if (ops->more_checks)
405 err = ops->more_checks(ipcp, params);
406 if (!err)
407 /*
408 * ipc_check_perms returns the IPC id on
409 * success
410 */
411 err = ipc_check_perms(ipcp, ops, params);
412 }
413 ipc_unlock(ipcp);
414 }
415 up_write(&ids->rw_mutex);
416
417 if (err == -EAGAIN)
418 goto retry;
419
420 return err;
421}
422
423
424/**
321 * ipc_rmid - remove an IPC identifier 425 * ipc_rmid - remove an IPC identifier
322 * @ids: identifier set 426 * @ids: IPC identifier set
323 * @id: Identifier to remove 427 * @ipcp: ipc perm structure containing the identifier to remove
324 * 428 *
325 * The identifier must be valid, and in use. The kernel will panic if 429 * ipc_ids.rw_mutex (as a writer) and the spinlock for this ID are held
326 * fed an invalid identifier. The entry is removed and internal 430 * before this function is called, and remain locked on the exit.
327 * variables recomputed. The object associated with the identifier
328 * is returned.
329 * ipc_ids.mutex and the spinlock for this ID is hold before this function
330 * is called, and remain locked on the exit.
331 */ 431 */
332 432
333struct kern_ipc_perm* ipc_rmid(struct ipc_ids* ids, int id) 433void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
334{ 434{
335 struct kern_ipc_perm* p; 435 int lid = ipcid_to_idx(ipcp->id);
336 int lid = id % SEQ_MULTIPLIER; 436
337 BUG_ON(lid >= ids->entries->size); 437 idr_remove(&ids->ipcs_idr, lid);
338 438
339 /*
340 * do not need a rcu_dereference()() here to force ordering
341 * on Alpha, since the ipc_ids.mutex is held.
342 */
343 p = ids->entries->p[lid];
344 ids->entries->p[lid] = NULL;
345 BUG_ON(p==NULL);
346 ids->in_use--; 439 ids->in_use--;
347 440
348 if (lid == ids->max_id) { 441 ipcp->deleted = 1;
349 do { 442
350 lid--; 443 return;
351 if(lid == -1)
352 break;
353 } while (ids->entries->p[lid] == NULL);
354 ids->max_id = lid;
355 }
356 p->deleted = 1;
357 return p;
358} 444}
359 445
360/** 446/**
@@ -491,10 +577,12 @@ static void ipc_do_vfree(struct work_struct *work)
491 */ 577 */
492static void ipc_schedule_free(struct rcu_head *head) 578static void ipc_schedule_free(struct rcu_head *head)
493{ 579{
494 struct ipc_rcu_grace *grace = 580 struct ipc_rcu_grace *grace;
495 container_of(head, struct ipc_rcu_grace, rcu); 581 struct ipc_rcu_sched *sched;
496 struct ipc_rcu_sched *sched = 582
497 container_of(&(grace->data[0]), struct ipc_rcu_sched, data[0]); 583 grace = container_of(head, struct ipc_rcu_grace, rcu);
584 sched = container_of(&(grace->data[0]), struct ipc_rcu_sched,
585 data[0]);
498 586
499 INIT_WORK(&sched->work, ipc_do_vfree); 587 INIT_WORK(&sched->work, ipc_do_vfree);
500 schedule_work(&sched->work); 588 schedule_work(&sched->work);
@@ -583,7 +671,7 @@ void kernel_to_ipc64_perm (struct kern_ipc_perm *in, struct ipc64_perm *out)
583} 671}
584 672
585/** 673/**
586 * ipc64_perm_to_ipc_perm - convert old ipc permissions to new 674 * ipc64_perm_to_ipc_perm - convert new ipc permissions to old
587 * @in: new style IPC permissions 675 * @in: new style IPC permissions
588 * @out: old style IPC permissions 676 * @out: old style IPC permissions
589 * 677 *
@@ -602,44 +690,37 @@ void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out)
602 out->seq = in->seq; 690 out->seq = in->seq;
603} 691}
604 692
605/* 693/**
606 * So far only shm_get_stat() calls ipc_get() via shm_get(), so ipc_get() 694 * ipc_lock - Lock an ipc structure without rw_mutex held
607 * is called with shm_ids.mutex locked. Since grow_ary() is also called with 695 * @ids: IPC identifier set
608 * shm_ids.mutex down(for Shared Memory), there is no need to add read 696 * @id: ipc id to look for
609 * barriers here to gurantee the writes in grow_ary() are seen in order 697 *
610 * here (for Alpha). 698 * Look for an id in the ipc ids idr and lock the associated ipc object.
611 * 699 *
612 * However ipc_get() itself does not necessary require ipc_ids.mutex down. So 700 * The ipc object is locked on exit.
613 * if in the future ipc_get() is used by other places without ipc_ids.mutex 701 *
614 * down, then ipc_get() needs read memery barriers as ipc_lock() does. 702 * This is the routine that should be called when the rw_mutex is not already
703 * held, i.e. idr tree not protected: it protects the idr tree in read mode
704 * during the idr_find().
615 */ 705 */
616struct kern_ipc_perm* ipc_get(struct ipc_ids* ids, int id)
617{
618 struct kern_ipc_perm* out;
619 int lid = id % SEQ_MULTIPLIER;
620 if(lid >= ids->entries->size)
621 return NULL;
622 out = ids->entries->p[lid];
623 return out;
624}
625 706
626struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id) 707struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
627{ 708{
628 struct kern_ipc_perm* out; 709 struct kern_ipc_perm *out;
629 int lid = id % SEQ_MULTIPLIER; 710 int lid = ipcid_to_idx(id);
630 struct ipc_id_ary* entries; 711
712 down_read(&ids->rw_mutex);
631 713
632 rcu_read_lock(); 714 rcu_read_lock();
633 entries = rcu_dereference(ids->entries); 715 out = idr_find(&ids->ipcs_idr, lid);
634 if(lid >= entries->size) { 716 if (out == NULL) {
635 rcu_read_unlock();
636 return NULL;
637 }
638 out = entries->p[lid];
639 if(out == NULL) {
640 rcu_read_unlock(); 717 rcu_read_unlock();
641 return NULL; 718 up_read(&ids->rw_mutex);
719 return ERR_PTR(-EINVAL);
642 } 720 }
721
722 up_read(&ids->rw_mutex);
723
643 spin_lock(&out->lock); 724 spin_lock(&out->lock);
644 725
645 /* ipc_rmid() may have already freed the ID while ipc_lock 726 /* ipc_rmid() may have already freed the ID while ipc_lock
@@ -648,33 +729,44 @@ struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id)
648 if (out->deleted) { 729 if (out->deleted) {
649 spin_unlock(&out->lock); 730 spin_unlock(&out->lock);
650 rcu_read_unlock(); 731 rcu_read_unlock();
651 return NULL; 732 return ERR_PTR(-EINVAL);
652 } 733 }
734
653 return out; 735 return out;
654} 736}
655 737
656void ipc_lock_by_ptr(struct kern_ipc_perm *perm) 738/**
657{ 739 * ipc_lock_down - Lock an ipc structure with rw_sem held
658 rcu_read_lock(); 740 * @ids: IPC identifier set
659 spin_lock(&perm->lock); 741 * @id: ipc id to look for
660} 742 *
743 * Look for an id in the ipc ids idr and lock the associated ipc object.
744 *
745 * The ipc object is locked on exit.
746 *
747 * This is the routine that should be called when the rw_mutex is already
748 * held, i.e. idr tree protected.
749 */
661 750
662void ipc_unlock(struct kern_ipc_perm* perm) 751struct kern_ipc_perm *ipc_lock_down(struct ipc_ids *ids, int id)
663{ 752{
664 spin_unlock(&perm->lock); 753 struct kern_ipc_perm *out;
665 rcu_read_unlock(); 754 int lid = ipcid_to_idx(id);
666}
667 755
668int ipc_buildid(struct ipc_ids* ids, int id, int seq) 756 rcu_read_lock();
669{ 757 out = idr_find(&ids->ipcs_idr, lid);
670 return SEQ_MULTIPLIER*seq + id; 758 if (out == NULL) {
671} 759 rcu_read_unlock();
760 return ERR_PTR(-EINVAL);
761 }
672 762
673int ipc_checkid(struct ipc_ids* ids, struct kern_ipc_perm* ipcp, int uid) 763 spin_lock(&out->lock);
674{ 764
675 if(uid/SEQ_MULTIPLIER != ipcp->seq) 765 /*
676 return 1; 766 * No need to verify that the structure is still valid since the
677 return 0; 767 * rw_mutex is held.
768 */
769 return out;
678} 770}
679 771
680#ifdef __ARCH_WANT_IPC_PARSE_VERSION 772#ifdef __ARCH_WANT_IPC_PARSE_VERSION
@@ -707,27 +799,30 @@ struct ipc_proc_iter {
707 struct ipc_proc_iface *iface; 799 struct ipc_proc_iface *iface;
708}; 800};
709 801
710static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos) 802/*
803 * This routine locks the ipc structure found at least at position pos.
804 */
805struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos,
806 loff_t *new_pos)
711{ 807{
712 struct ipc_proc_iter *iter = s->private; 808 struct kern_ipc_perm *ipc;
713 struct ipc_proc_iface *iface = iter->iface; 809 int total, id;
714 struct kern_ipc_perm *ipc = it;
715 loff_t p;
716 struct ipc_ids *ids;
717 810
718 ids = iter->ns->ids[iface->ids]; 811 total = 0;
812 for (id = 0; id < pos && total < ids->in_use; id++) {
813 ipc = idr_find(&ids->ipcs_idr, id);
814 if (ipc != NULL)
815 total++;
816 }
719 817
720 /* If we had an ipc id locked before, unlock it */ 818 if (total >= ids->in_use)
721 if (ipc && ipc != SEQ_START_TOKEN) 819 return NULL;
722 ipc_unlock(ipc);
723 820
724 /* 821 for ( ; pos < IPCMNI; pos++) {
725 * p = *pos - 1 (because id 0 starts at position 1) 822 ipc = idr_find(&ids->ipcs_idr, pos);
726 * + 1 (because we increment the position by one) 823 if (ipc != NULL) {
727 */ 824 *new_pos = pos + 1;
728 for (p = *pos; p <= ids->max_id; p++) { 825 ipc_lock_by_ptr(ipc);
729 if ((ipc = ipc_lock(ids, p)) != NULL) {
730 *pos = p + 1;
731 return ipc; 826 return ipc;
732 } 827 }
733 } 828 }
@@ -736,16 +831,27 @@ static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos)
736 return NULL; 831 return NULL;
737} 832}
738 833
834static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos)
835{
836 struct ipc_proc_iter *iter = s->private;
837 struct ipc_proc_iface *iface = iter->iface;
838 struct kern_ipc_perm *ipc = it;
839
840 /* If we had an ipc id locked before, unlock it */
841 if (ipc && ipc != SEQ_START_TOKEN)
842 ipc_unlock(ipc);
843
844 return sysvipc_find_ipc(iter->ns->ids[iface->ids], *pos, pos);
845}
846
739/* 847/*
740 * File positions: pos 0 -> header, pos n -> ipc id + 1. 848 * File positions: pos 0 -> header, pos n -> ipc id = n - 1.
741 * SeqFile iterator: iterator value locked shp or SEQ_TOKEN_START. 849 * SeqFile iterator: iterator value locked ipc pointer or SEQ_TOKEN_START.
742 */ 850 */
743static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos) 851static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos)
744{ 852{
745 struct ipc_proc_iter *iter = s->private; 853 struct ipc_proc_iter *iter = s->private;
746 struct ipc_proc_iface *iface = iter->iface; 854 struct ipc_proc_iface *iface = iter->iface;
747 struct kern_ipc_perm *ipc;
748 loff_t p;
749 struct ipc_ids *ids; 855 struct ipc_ids *ids;
750 856
751 ids = iter->ns->ids[iface->ids]; 857 ids = iter->ns->ids[iface->ids];
@@ -754,7 +860,7 @@ static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos)
754 * Take the lock - this will be released by the corresponding 860 * Take the lock - this will be released by the corresponding
755 * call to stop(). 861 * call to stop().
756 */ 862 */
757 mutex_lock(&ids->mutex); 863 down_read(&ids->rw_mutex);
758 864
759 /* pos < 0 is invalid */ 865 /* pos < 0 is invalid */
760 if (*pos < 0) 866 if (*pos < 0)
@@ -765,13 +871,7 @@ static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos)
765 return SEQ_START_TOKEN; 871 return SEQ_START_TOKEN;
766 872
767 /* Find the (pos-1)th ipc */ 873 /* Find the (pos-1)th ipc */
768 for (p = *pos - 1; p <= ids->max_id; p++) { 874 return sysvipc_find_ipc(ids, *pos - 1, pos);
769 if ((ipc = ipc_lock(ids, p)) != NULL) {
770 *pos = p + 1;
771 return ipc;
772 }
773 }
774 return NULL;
775} 875}
776 876
777static void sysvipc_proc_stop(struct seq_file *s, void *it) 877static void sysvipc_proc_stop(struct seq_file *s, void *it)
@@ -781,13 +881,13 @@ static void sysvipc_proc_stop(struct seq_file *s, void *it)
781 struct ipc_proc_iface *iface = iter->iface; 881 struct ipc_proc_iface *iface = iter->iface;
782 struct ipc_ids *ids; 882 struct ipc_ids *ids;
783 883
784 /* If we had a locked segment, release it */ 884 /* If we had a locked structure, release it */
785 if (ipc && ipc != SEQ_START_TOKEN) 885 if (ipc && ipc != SEQ_START_TOKEN)
786 ipc_unlock(ipc); 886 ipc_unlock(ipc);
787 887
788 ids = iter->ns->ids[iface->ids]; 888 ids = iter->ns->ids[iface->ids];
789 /* Release the lock we took in start() */ 889 /* Release the lock we took in start() */
790 mutex_unlock(&ids->mutex); 890 up_read(&ids->rw_mutex);
791} 891}
792 892
793static int sysvipc_proc_show(struct seq_file *s, void *it) 893static int sysvipc_proc_show(struct seq_file *s, void *it)
diff --git a/ipc/util.h b/ipc/util.h
index 333e891bcaca..9ffea40457ce 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -10,6 +10,9 @@
10#ifndef _IPC_UTIL_H 10#ifndef _IPC_UTIL_H
11#define _IPC_UTIL_H 11#define _IPC_UTIL_H
12 12
13#include <linux/idr.h>
14#include <linux/err.h>
15
13#define USHRT_MAX 0xffff 16#define USHRT_MAX 0xffff
14#define SEQ_MULTIPLIER (IPCMNI) 17#define SEQ_MULTIPLIER (IPCMNI)
15 18
@@ -25,24 +28,46 @@ void sem_exit_ns(struct ipc_namespace *ns);
25void msg_exit_ns(struct ipc_namespace *ns); 28void msg_exit_ns(struct ipc_namespace *ns);
26void shm_exit_ns(struct ipc_namespace *ns); 29void shm_exit_ns(struct ipc_namespace *ns);
27 30
28struct ipc_id_ary {
29 int size;
30 struct kern_ipc_perm *p[0];
31};
32
33struct ipc_ids { 31struct ipc_ids {
34 int in_use; 32 int in_use;
35 int max_id;
36 unsigned short seq; 33 unsigned short seq;
37 unsigned short seq_max; 34 unsigned short seq_max;
38 struct mutex mutex; 35 struct rw_semaphore rw_mutex;
39 struct ipc_id_ary nullentry; 36 struct idr ipcs_idr;
40 struct ipc_id_ary* entries; 37};
38
39/*
40 * Structure that holds the parameters needed by the ipc operations
41 * (see after)
42 */
43struct ipc_params {
44 key_t key;
45 int flg;
46 union {
47 size_t size; /* for shared memories */
48 int nsems; /* for semaphores */
49 } u; /* holds the getnew() specific param */
50};
51
52/*
53 * Structure that holds some ipc operations. This structure is used to unify
54 * the calls to sys_msgget(), sys_semget(), sys_shmget()
55 * . routine to call to create a new ipc object. Can be one of newque,
56 * newary, newseg
57 * . routine to call to check permissions for a new ipc object.
58 * Can be one of security_msg_associate, security_sem_associate,
59 * security_shm_associate
60 * . routine to call for an extra check if needed
61 */
62struct ipc_ops {
63 int (*getnew) (struct ipc_namespace *, struct ipc_params *);
64 int (*associate) (struct kern_ipc_perm *, int);
65 int (*more_checks) (struct kern_ipc_perm *, struct ipc_params *);
41}; 66};
42 67
43struct seq_file; 68struct seq_file;
44 69
45void ipc_init_ids(struct ipc_ids *ids, int size); 70void ipc_init_ids(struct ipc_ids *);
46#ifdef CONFIG_PROC_FS 71#ifdef CONFIG_PROC_FS
47void __init ipc_init_proc_interface(const char *path, const char *header, 72void __init ipc_init_proc_interface(const char *path, const char *header,
48 int ids, int (*show)(struct seq_file *, void *)); 73 int ids, int (*show)(struct seq_file *, void *));
@@ -54,14 +79,19 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
54#define IPC_MSG_IDS 1 79#define IPC_MSG_IDS 1
55#define IPC_SHM_IDS 2 80#define IPC_SHM_IDS 2
56 81
57/* must be called with ids->mutex acquired.*/ 82#define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER)
58int ipc_findkey(struct ipc_ids* ids, key_t key); 83
59int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size); 84/* must be called with ids->rw_mutex acquired for writing */
85int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int);
86
87/* must be called with ids->rw_mutex acquired for reading */
88int ipc_get_maxid(struct ipc_ids *);
60 89
61/* must be called with both locks acquired. */ 90/* must be called with both locks acquired. */
62struct kern_ipc_perm* ipc_rmid(struct ipc_ids* ids, int id); 91void ipc_rmid(struct ipc_ids *, struct kern_ipc_perm *);
63 92
64int ipcperms (struct kern_ipc_perm *ipcp, short flg); 93/* must be called with ipcp locked */
94int ipcperms(struct kern_ipc_perm *ipcp, short flg);
65 95
66/* for rare, potentially huge allocations. 96/* for rare, potentially huge allocations.
67 * both function can sleep 97 * both function can sleep
@@ -79,24 +109,12 @@ void* ipc_rcu_alloc(int size);
79void ipc_rcu_getref(void *ptr); 109void ipc_rcu_getref(void *ptr);
80void ipc_rcu_putref(void *ptr); 110void ipc_rcu_putref(void *ptr);
81 111
82static inline void __ipc_fini_ids(struct ipc_ids *ids, 112/*
83 struct ipc_id_ary *entries) 113 * ipc_lock_down: called with rw_mutex held
84{ 114 * ipc_lock: called without that lock held
85 if (entries != &ids->nullentry) 115 */
86 ipc_rcu_putref(entries); 116struct kern_ipc_perm *ipc_lock_down(struct ipc_ids *, int);
87} 117struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
88
89static inline void ipc_fini_ids(struct ipc_ids *ids)
90{
91 __ipc_fini_ids(ids, ids->entries);
92}
93
94struct kern_ipc_perm* ipc_get(struct ipc_ids* ids, int id);
95struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id);
96void ipc_lock_by_ptr(struct kern_ipc_perm *ipcp);
97void ipc_unlock(struct kern_ipc_perm* perm);
98int ipc_buildid(struct ipc_ids* ids, int id, int seq);
99int ipc_checkid(struct ipc_ids* ids, struct kern_ipc_perm* ipcp, int uid);
100 118
101void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out); 119void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out);
102void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out); 120void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out);
@@ -111,5 +129,89 @@ int ipc_parse_version (int *cmd);
111extern void free_msg(struct msg_msg *msg); 129extern void free_msg(struct msg_msg *msg);
112extern struct msg_msg *load_msg(const void __user *src, int len); 130extern struct msg_msg *load_msg(const void __user *src, int len);
113extern int store_msg(void __user *dest, struct msg_msg *msg, int len); 131extern int store_msg(void __user *dest, struct msg_msg *msg, int len);
132extern int ipcget_new(struct ipc_namespace *, struct ipc_ids *,
133 struct ipc_ops *, struct ipc_params *);
134extern int ipcget_public(struct ipc_namespace *, struct ipc_ids *,
135 struct ipc_ops *, struct ipc_params *);
136
137static inline int ipc_buildid(int id, int seq)
138{
139 return SEQ_MULTIPLIER * seq + id;
140}
141
142/*
143 * Must be called with ipcp locked
144 */
145static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int uid)
146{
147 if (uid / SEQ_MULTIPLIER != ipcp->seq)
148 return 1;
149 return 0;
150}
151
152static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm)
153{
154 rcu_read_lock();
155 spin_lock(&perm->lock);
156}
157
158static inline void ipc_unlock(struct kern_ipc_perm *perm)
159{
160 spin_unlock(&perm->lock);
161 rcu_read_unlock();
162}
163
164static inline struct kern_ipc_perm *ipc_lock_check_down(struct ipc_ids *ids,
165 int id)
166{
167 struct kern_ipc_perm *out;
168
169 out = ipc_lock_down(ids, id);
170 if (IS_ERR(out))
171 return out;
172
173 if (ipc_checkid(out, id)) {
174 ipc_unlock(out);
175 return ERR_PTR(-EIDRM);
176 }
177
178 return out;
179}
180
181static inline struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids,
182 int id)
183{
184 struct kern_ipc_perm *out;
185
186 out = ipc_lock(ids, id);
187 if (IS_ERR(out))
188 return out;
189
190 if (ipc_checkid(out, id)) {
191 ipc_unlock(out);
192 return ERR_PTR(-EIDRM);
193 }
194
195 return out;
196}
197
198/**
199 * ipcget - Common sys_*get() code
200 * @ns : namsepace
201 * @ids : IPC identifier set
202 * @ops : operations to be called on ipc object creation, permission checks
203 * and further checks
204 * @params : the parameters needed by the previous operations.
205 *
206 * Common routine called by sys_msgget(), sys_semget() and sys_shmget().
207 */
208static inline int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
209 struct ipc_ops *ops, struct ipc_params *params)
210{
211 if (params->key == IPC_PRIVATE)
212 return ipcget_new(ns, ids, ops, params);
213 else
214 return ipcget_public(ns, ids, ops, params);
215}
114 216
115#endif 217#endif
diff --git a/kernel/Kconfig.instrumentation b/kernel/Kconfig.instrumentation
new file mode 100644
index 000000000000..f5f2c769d95e
--- /dev/null
+++ b/kernel/Kconfig.instrumentation
@@ -0,0 +1,49 @@
1menuconfig INSTRUMENTATION
2 bool "Instrumentation Support"
3 default y
4 ---help---
5 Say Y here to get to see options related to performance measurement,
6 system-wide debugging, and testing. This option alone does not add any
7 kernel code.
8
9 If you say N, all options in this submenu will be skipped and
10 disabled. If you're trying to debug the kernel itself, go see the
11 Kernel Hacking menu.
12
13if INSTRUMENTATION
14
15config PROFILING
16 bool "Profiling support (EXPERIMENTAL)"
17 help
18 Say Y here to enable the extended profiling support mechanisms used
19 by profilers such as OProfile.
20
21config OPROFILE
22 tristate "OProfile system profiling (EXPERIMENTAL)"
23 depends on PROFILING
24 depends on ALPHA || ARM || BLACKFIN || X86_32 || IA64 || M32R || MIPS || PARISC || PPC || S390 || SUPERH || SPARC || X86_64
25 help
26 OProfile is a profiling system capable of profiling the
27 whole system, include the kernel, kernel modules, libraries,
28 and applications.
29
30 If unsure, say N.
31
32config KPROBES
33 bool "Kprobes"
34 depends on KALLSYMS && MODULES
35 depends on X86_32 || IA64 || PPC || S390 || SPARC64 || X86_64 || AVR32
36 help
37 Kprobes allows you to trap at almost any kernel address and
38 execute a callback function. register_kprobe() establishes
39 a probepoint and specifies the callback. Kprobes is useful
40 for kernel debugging, non-intrusive instrumentation and testing.
41 If in doubt, say "N".
42
43config MARKERS
44 bool "Activate markers"
45 help
46 Place an empty function call at each marker site. Can be
47 dynamically changed for a probe function.
48
49endif # INSTRUMENTATION
diff --git a/kernel/Makefile b/kernel/Makefile
index d63fbb18798a..05c3e6df8597 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -8,8 +8,8 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
8 signal.o sys.o kmod.o workqueue.o pid.o \ 8 signal.o sys.o kmod.o workqueue.o pid.o \
9 rcupdate.o extable.o params.o posix-timers.o \ 9 rcupdate.o extable.o params.o posix-timers.o \
10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ 10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
11 hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \ 11 hrtimer.o rwsem.o latency.o nsproxy.o srcu.o \
12 utsname.o sysctl_check.o 12 utsname.o sysctl_check.o notifier.o
13 13
14obj-$(CONFIG_STACKTRACE) += stacktrace.o 14obj-$(CONFIG_STACKTRACE) += stacktrace.o
15obj-y += time/ 15obj-y += time/
@@ -36,7 +36,11 @@ obj-$(CONFIG_PM) += power/
36obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o 36obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
37obj-$(CONFIG_KEXEC) += kexec.o 37obj-$(CONFIG_KEXEC) += kexec.o
38obj-$(CONFIG_COMPAT) += compat.o 38obj-$(CONFIG_COMPAT) += compat.o
39obj-$(CONFIG_CGROUPS) += cgroup.o
40obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
39obj-$(CONFIG_CPUSETS) += cpuset.o 41obj-$(CONFIG_CPUSETS) += cpuset.o
42obj-$(CONFIG_CGROUP_CPUACCT) += cpu_acct.o
43obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
40obj-$(CONFIG_IKCONFIG) += configs.o 44obj-$(CONFIG_IKCONFIG) += configs.o
41obj-$(CONFIG_STOP_MACHINE) += stop_machine.o 45obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
42obj-$(CONFIG_AUDIT) += audit.o auditfilter.o 46obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
@@ -51,6 +55,7 @@ obj-$(CONFIG_RELAY) += relay.o
51obj-$(CONFIG_SYSCTL) += utsname_sysctl.o 55obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
52obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o 56obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
53obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o 57obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
58obj-$(CONFIG_MARKERS) += marker.o
54 59
55ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) 60ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
56# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is 61# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/capability.c b/kernel/capability.c
index cbc5fd60c0f3..efbd9cdce132 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -12,6 +12,7 @@
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/security.h> 13#include <linux/security.h>
14#include <linux/syscalls.h> 14#include <linux/syscalls.h>
15#include <linux/pid_namespace.h>
15#include <asm/uaccess.h> 16#include <asm/uaccess.h>
16 17
17/* 18/*
@@ -61,8 +62,8 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
61 spin_lock(&task_capability_lock); 62 spin_lock(&task_capability_lock);
62 read_lock(&tasklist_lock); 63 read_lock(&tasklist_lock);
63 64
64 if (pid && pid != current->pid) { 65 if (pid && pid != task_pid_vnr(current)) {
65 target = find_task_by_pid(pid); 66 target = find_task_by_vpid(pid);
66 if (!target) { 67 if (!target) {
67 ret = -ESRCH; 68 ret = -ESRCH;
68 goto out; 69 goto out;
@@ -95,7 +96,7 @@ static inline int cap_set_pg(int pgrp_nr, kernel_cap_t *effective,
95 int found = 0; 96 int found = 0;
96 struct pid *pgrp; 97 struct pid *pgrp;
97 98
98 pgrp = find_pid(pgrp_nr); 99 pgrp = find_vpid(pgrp_nr);
99 do_each_pid_task(pgrp, PIDTYPE_PGID, g) { 100 do_each_pid_task(pgrp, PIDTYPE_PGID, g) {
100 target = g; 101 target = g;
101 while_each_thread(g, target) { 102 while_each_thread(g, target) {
@@ -129,7 +130,7 @@ static inline int cap_set_all(kernel_cap_t *effective,
129 int found = 0; 130 int found = 0;
130 131
131 do_each_thread(g, target) { 132 do_each_thread(g, target) {
132 if (target == current || is_init(target)) 133 if (target == current || is_container_init(target->group_leader))
133 continue; 134 continue;
134 found = 1; 135 found = 1;
135 if (security_capset_check(target, effective, inheritable, 136 if (security_capset_check(target, effective, inheritable,
@@ -184,7 +185,7 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
184 if (get_user(pid, &header->pid)) 185 if (get_user(pid, &header->pid))
185 return -EFAULT; 186 return -EFAULT;
186 187
187 if (pid && pid != current->pid && !capable(CAP_SETPCAP)) 188 if (pid && pid != task_pid_vnr(current) && !capable(CAP_SETPCAP))
188 return -EPERM; 189 return -EPERM;
189 190
190 if (copy_from_user(&effective, &data->effective, sizeof(effective)) || 191 if (copy_from_user(&effective, &data->effective, sizeof(effective)) ||
@@ -195,8 +196,8 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
195 spin_lock(&task_capability_lock); 196 spin_lock(&task_capability_lock);
196 read_lock(&tasklist_lock); 197 read_lock(&tasklist_lock);
197 198
198 if (pid > 0 && pid != current->pid) { 199 if (pid > 0 && pid != task_pid_vnr(current)) {
199 target = find_task_by_pid(pid); 200 target = find_task_by_vpid(pid);
200 if (!target) { 201 if (!target) {
201 ret = -ESRCH; 202 ret = -ESRCH;
202 goto out; 203 goto out;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
new file mode 100644
index 000000000000..5987dccdb2a0
--- /dev/null
+++ b/kernel/cgroup.c
@@ -0,0 +1,2805 @@
1/*
2 * kernel/cgroup.c
3 *
4 * Generic process-grouping system.
5 *
6 * Based originally on the cpuset system, extracted by Paul Menage
7 * Copyright (C) 2006 Google, Inc
8 *
9 * Copyright notices from the original cpuset code:
10 * --------------------------------------------------
11 * Copyright (C) 2003 BULL SA.
12 * Copyright (C) 2004-2006 Silicon Graphics, Inc.
13 *
14 * Portions derived from Patrick Mochel's sysfs code.
15 * sysfs is Copyright (c) 2001-3 Patrick Mochel
16 *
17 * 2003-10-10 Written by Simon Derr.
18 * 2003-10-22 Updates by Stephen Hemminger.
19 * 2004 May-July Rework by Paul Jackson.
20 * ---------------------------------------------------
21 *
22 * This file is subject to the terms and conditions of the GNU General Public
23 * License. See the file COPYING in the main directory of the Linux
24 * distribution for more details.
25 */
26
27#include <linux/cgroup.h>
28#include <linux/errno.h>
29#include <linux/fs.h>
30#include <linux/kernel.h>
31#include <linux/list.h>
32#include <linux/mm.h>
33#include <linux/mutex.h>
34#include <linux/mount.h>
35#include <linux/pagemap.h>
36#include <linux/proc_fs.h>
37#include <linux/rcupdate.h>
38#include <linux/sched.h>
39#include <linux/backing-dev.h>
40#include <linux/seq_file.h>
41#include <linux/slab.h>
42#include <linux/magic.h>
43#include <linux/spinlock.h>
44#include <linux/string.h>
45#include <linux/sort.h>
46#include <linux/kmod.h>
47#include <linux/delayacct.h>
48#include <linux/cgroupstats.h>
49
50#include <asm/atomic.h>
51
52static DEFINE_MUTEX(cgroup_mutex);
53
54/* Generate an array of cgroup subsystem pointers */
55#define SUBSYS(_x) &_x ## _subsys,
56
57static struct cgroup_subsys *subsys[] = {
58#include <linux/cgroup_subsys.h>
59};
60
61/*
62 * A cgroupfs_root represents the root of a cgroup hierarchy,
63 * and may be associated with a superblock to form an active
64 * hierarchy
65 */
66struct cgroupfs_root {
67 struct super_block *sb;
68
69 /*
70 * The bitmask of subsystems intended to be attached to this
71 * hierarchy
72 */
73 unsigned long subsys_bits;
74
75 /* The bitmask of subsystems currently attached to this hierarchy */
76 unsigned long actual_subsys_bits;
77
78 /* A list running through the attached subsystems */
79 struct list_head subsys_list;
80
81 /* The root cgroup for this hierarchy */
82 struct cgroup top_cgroup;
83
84 /* Tracks how many cgroups are currently defined in hierarchy.*/
85 int number_of_cgroups;
86
87 /* A list running through the mounted hierarchies */
88 struct list_head root_list;
89
90 /* Hierarchy-specific flags */
91 unsigned long flags;
92
93 /* The path to use for release notifications. No locking
94 * between setting and use - so if userspace updates this
95 * while child cgroups exist, you could miss a
96 * notification. We ensure that it's always a valid
97 * NUL-terminated string */
98 char release_agent_path[PATH_MAX];
99};
100
101
102/*
103 * The "rootnode" hierarchy is the "dummy hierarchy", reserved for the
104 * subsystems that are otherwise unattached - it never has more than a
105 * single cgroup, and all tasks are part of that cgroup.
106 */
107static struct cgroupfs_root rootnode;
108
109/* The list of hierarchy roots */
110
111static LIST_HEAD(roots);
112static int root_count;
113
114/* dummytop is a shorthand for the dummy hierarchy's top cgroup */
115#define dummytop (&rootnode.top_cgroup)
116
117/* This flag indicates whether tasks in the fork and exit paths should
118 * take callback_mutex and check for fork/exit handlers to call. This
119 * avoids us having to do extra work in the fork/exit path if none of the
120 * subsystems need to be called.
121 */
122static int need_forkexit_callback;
123
124/* bits in struct cgroup flags field */
125enum {
126 /* Control Group is dead */
127 CGRP_REMOVED,
128 /* Control Group has previously had a child cgroup or a task,
129 * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set) */
130 CGRP_RELEASABLE,
131 /* Control Group requires release notifications to userspace */
132 CGRP_NOTIFY_ON_RELEASE,
133};
134
135/* convenient tests for these bits */
136inline int cgroup_is_removed(const struct cgroup *cgrp)
137{
138 return test_bit(CGRP_REMOVED, &cgrp->flags);
139}
140
141/* bits in struct cgroupfs_root flags field */
142enum {
143 ROOT_NOPREFIX, /* mounted subsystems have no named prefix */
144};
145
146inline int cgroup_is_releasable(const struct cgroup *cgrp)
147{
148 const int bits =
149 (1 << CGRP_RELEASABLE) |
150 (1 << CGRP_NOTIFY_ON_RELEASE);
151 return (cgrp->flags & bits) == bits;
152}
153
154inline int notify_on_release(const struct cgroup *cgrp)
155{
156 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
157}
158
159/*
160 * for_each_subsys() allows you to iterate on each subsystem attached to
161 * an active hierarchy
162 */
163#define for_each_subsys(_root, _ss) \
164list_for_each_entry(_ss, &_root->subsys_list, sibling)
165
166/* for_each_root() allows you to iterate across the active hierarchies */
167#define for_each_root(_root) \
168list_for_each_entry(_root, &roots, root_list)
169
170/* the list of cgroups eligible for automatic release. Protected by
171 * release_list_lock */
172static LIST_HEAD(release_list);
173static DEFINE_SPINLOCK(release_list_lock);
174static void cgroup_release_agent(struct work_struct *work);
175static DECLARE_WORK(release_agent_work, cgroup_release_agent);
176static void check_for_release(struct cgroup *cgrp);
177
178/* Link structure for associating css_set objects with cgroups */
179struct cg_cgroup_link {
180 /*
181 * List running through cg_cgroup_links associated with a
182 * cgroup, anchored on cgroup->css_sets
183 */
184 struct list_head cgrp_link_list;
185 /*
186 * List running through cg_cgroup_links pointing at a
187 * single css_set object, anchored on css_set->cg_links
188 */
189 struct list_head cg_link_list;
190 struct css_set *cg;
191};
192
193/* The default css_set - used by init and its children prior to any
194 * hierarchies being mounted. It contains a pointer to the root state
195 * for each subsystem. Also used to anchor the list of css_sets. Not
196 * reference-counted, to improve performance when child cgroups
197 * haven't been created.
198 */
199
200static struct css_set init_css_set;
201static struct cg_cgroup_link init_css_set_link;
202
203/* css_set_lock protects the list of css_set objects, and the
204 * chain of tasks off each css_set. Nests outside task->alloc_lock
205 * due to cgroup_iter_start() */
206static DEFINE_RWLOCK(css_set_lock);
207static int css_set_count;
208
209/* We don't maintain the lists running through each css_set to its
210 * task until after the first call to cgroup_iter_start(). This
211 * reduces the fork()/exit() overhead for people who have cgroups
212 * compiled into their kernel but not actually in use */
213static int use_task_css_set_links;
214
215/* When we create or destroy a css_set, the operation simply
216 * takes/releases a reference count on all the cgroups referenced
217 * by subsystems in this css_set. This can end up multiple-counting
218 * some cgroups, but that's OK - the ref-count is just a
219 * busy/not-busy indicator; ensuring that we only count each cgroup
220 * once would require taking a global lock to ensure that no
221 * subsystems moved between hierarchies while we were doing so.
222 *
223 * Possible TODO: decide at boot time based on the number of
224 * registered subsystems and the number of CPUs or NUMA nodes whether
225 * it's better for performance to ref-count every subsystem, or to
226 * take a global lock and only add one ref count to each hierarchy.
227 */
228
229/*
230 * unlink a css_set from the list and free it
231 */
232static void unlink_css_set(struct css_set *cg)
233{
234 write_lock(&css_set_lock);
235 list_del(&cg->list);
236 css_set_count--;
237 while (!list_empty(&cg->cg_links)) {
238 struct cg_cgroup_link *link;
239 link = list_entry(cg->cg_links.next,
240 struct cg_cgroup_link, cg_link_list);
241 list_del(&link->cg_link_list);
242 list_del(&link->cgrp_link_list);
243 kfree(link);
244 }
245 write_unlock(&css_set_lock);
246}
247
248static void __release_css_set(struct kref *k, int taskexit)
249{
250 int i;
251 struct css_set *cg = container_of(k, struct css_set, ref);
252
253 unlink_css_set(cg);
254
255 rcu_read_lock();
256 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
257 struct cgroup *cgrp = cg->subsys[i]->cgroup;
258 if (atomic_dec_and_test(&cgrp->count) &&
259 notify_on_release(cgrp)) {
260 if (taskexit)
261 set_bit(CGRP_RELEASABLE, &cgrp->flags);
262 check_for_release(cgrp);
263 }
264 }
265 rcu_read_unlock();
266 kfree(cg);
267}
268
269static void release_css_set(struct kref *k)
270{
271 __release_css_set(k, 0);
272}
273
274static void release_css_set_taskexit(struct kref *k)
275{
276 __release_css_set(k, 1);
277}
278
279/*
280 * refcounted get/put for css_set objects
281 */
282static inline void get_css_set(struct css_set *cg)
283{
284 kref_get(&cg->ref);
285}
286
287static inline void put_css_set(struct css_set *cg)
288{
289 kref_put(&cg->ref, release_css_set);
290}
291
292static inline void put_css_set_taskexit(struct css_set *cg)
293{
294 kref_put(&cg->ref, release_css_set_taskexit);
295}
296
297/*
298 * find_existing_css_set() is a helper for
299 * find_css_set(), and checks to see whether an existing
300 * css_set is suitable. This currently walks a linked-list for
301 * simplicity; a later patch will use a hash table for better
302 * performance
303 *
304 * oldcg: the cgroup group that we're using before the cgroup
305 * transition
306 *
307 * cgrp: the cgroup that we're moving into
308 *
309 * template: location in which to build the desired set of subsystem
310 * state objects for the new cgroup group
311 */
312
313static struct css_set *find_existing_css_set(
314 struct css_set *oldcg,
315 struct cgroup *cgrp,
316 struct cgroup_subsys_state *template[])
317{
318 int i;
319 struct cgroupfs_root *root = cgrp->root;
320 struct list_head *l = &init_css_set.list;
321
322 /* Built the set of subsystem state objects that we want to
323 * see in the new css_set */
324 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
325 if (root->subsys_bits & (1ull << i)) {
326 /* Subsystem is in this hierarchy. So we want
327 * the subsystem state from the new
328 * cgroup */
329 template[i] = cgrp->subsys[i];
330 } else {
331 /* Subsystem is not in this hierarchy, so we
332 * don't want to change the subsystem state */
333 template[i] = oldcg->subsys[i];
334 }
335 }
336
337 /* Look through existing cgroup groups to find one to reuse */
338 do {
339 struct css_set *cg =
340 list_entry(l, struct css_set, list);
341
342 if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) {
343 /* All subsystems matched */
344 return cg;
345 }
346 /* Try the next cgroup group */
347 l = l->next;
348 } while (l != &init_css_set.list);
349
350 /* No existing cgroup group matched */
351 return NULL;
352}
353
354/*
355 * allocate_cg_links() allocates "count" cg_cgroup_link structures
356 * and chains them on tmp through their cgrp_link_list fields. Returns 0 on
357 * success or a negative error
358 */
359
360static int allocate_cg_links(int count, struct list_head *tmp)
361{
362 struct cg_cgroup_link *link;
363 int i;
364 INIT_LIST_HEAD(tmp);
365 for (i = 0; i < count; i++) {
366 link = kmalloc(sizeof(*link), GFP_KERNEL);
367 if (!link) {
368 while (!list_empty(tmp)) {
369 link = list_entry(tmp->next,
370 struct cg_cgroup_link,
371 cgrp_link_list);
372 list_del(&link->cgrp_link_list);
373 kfree(link);
374 }
375 return -ENOMEM;
376 }
377 list_add(&link->cgrp_link_list, tmp);
378 }
379 return 0;
380}
381
382static void free_cg_links(struct list_head *tmp)
383{
384 while (!list_empty(tmp)) {
385 struct cg_cgroup_link *link;
386 link = list_entry(tmp->next,
387 struct cg_cgroup_link,
388 cgrp_link_list);
389 list_del(&link->cgrp_link_list);
390 kfree(link);
391 }
392}
393
394/*
395 * find_css_set() takes an existing cgroup group and a
396 * cgroup object, and returns a css_set object that's
397 * equivalent to the old group, but with the given cgroup
398 * substituted into the appropriate hierarchy. Must be called with
399 * cgroup_mutex held
400 */
401
402static struct css_set *find_css_set(
403 struct css_set *oldcg, struct cgroup *cgrp)
404{
405 struct css_set *res;
406 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
407 int i;
408
409 struct list_head tmp_cg_links;
410 struct cg_cgroup_link *link;
411
412 /* First see if we already have a cgroup group that matches
413 * the desired set */
414 write_lock(&css_set_lock);
415 res = find_existing_css_set(oldcg, cgrp, template);
416 if (res)
417 get_css_set(res);
418 write_unlock(&css_set_lock);
419
420 if (res)
421 return res;
422
423 res = kmalloc(sizeof(*res), GFP_KERNEL);
424 if (!res)
425 return NULL;
426
427 /* Allocate all the cg_cgroup_link objects that we'll need */
428 if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {
429 kfree(res);
430 return NULL;
431 }
432
433 kref_init(&res->ref);
434 INIT_LIST_HEAD(&res->cg_links);
435 INIT_LIST_HEAD(&res->tasks);
436
437 /* Copy the set of subsystem state objects generated in
438 * find_existing_css_set() */
439 memcpy(res->subsys, template, sizeof(res->subsys));
440
441 write_lock(&css_set_lock);
442 /* Add reference counts and links from the new css_set. */
443 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
444 struct cgroup *cgrp = res->subsys[i]->cgroup;
445 struct cgroup_subsys *ss = subsys[i];
446 atomic_inc(&cgrp->count);
447 /*
448 * We want to add a link once per cgroup, so we
449 * only do it for the first subsystem in each
450 * hierarchy
451 */
452 if (ss->root->subsys_list.next == &ss->sibling) {
453 BUG_ON(list_empty(&tmp_cg_links));
454 link = list_entry(tmp_cg_links.next,
455 struct cg_cgroup_link,
456 cgrp_link_list);
457 list_del(&link->cgrp_link_list);
458 list_add(&link->cgrp_link_list, &cgrp->css_sets);
459 link->cg = res;
460 list_add(&link->cg_link_list, &res->cg_links);
461 }
462 }
463 if (list_empty(&rootnode.subsys_list)) {
464 link = list_entry(tmp_cg_links.next,
465 struct cg_cgroup_link,
466 cgrp_link_list);
467 list_del(&link->cgrp_link_list);
468 list_add(&link->cgrp_link_list, &dummytop->css_sets);
469 link->cg = res;
470 list_add(&link->cg_link_list, &res->cg_links);
471 }
472
473 BUG_ON(!list_empty(&tmp_cg_links));
474
475 /* Link this cgroup group into the list */
476 list_add(&res->list, &init_css_set.list);
477 css_set_count++;
478 INIT_LIST_HEAD(&res->tasks);
479 write_unlock(&css_set_lock);
480
481 return res;
482}
483
484/*
485 * There is one global cgroup mutex. We also require taking
486 * task_lock() when dereferencing a task's cgroup subsys pointers.
487 * See "The task_lock() exception", at the end of this comment.
488 *
489 * A task must hold cgroup_mutex to modify cgroups.
490 *
491 * Any task can increment and decrement the count field without lock.
492 * So in general, code holding cgroup_mutex can't rely on the count
493 * field not changing. However, if the count goes to zero, then only
494 * attach_task() can increment it again. Because a count of zero
495 * means that no tasks are currently attached, therefore there is no
496 * way a task attached to that cgroup can fork (the other way to
497 * increment the count). So code holding cgroup_mutex can safely
498 * assume that if the count is zero, it will stay zero. Similarly, if
499 * a task holds cgroup_mutex on a cgroup with zero count, it
500 * knows that the cgroup won't be removed, as cgroup_rmdir()
501 * needs that mutex.
502 *
503 * The cgroup_common_file_write handler for operations that modify
504 * the cgroup hierarchy holds cgroup_mutex across the entire operation,
505 * single threading all such cgroup modifications across the system.
506 *
507 * The fork and exit callbacks cgroup_fork() and cgroup_exit(), don't
508 * (usually) take cgroup_mutex. These are the two most performance
509 * critical pieces of code here. The exception occurs on cgroup_exit(),
510 * when a task in a notify_on_release cgroup exits. Then cgroup_mutex
511 * is taken, and if the cgroup count is zero, a usermode call made
512 * to /sbin/cgroup_release_agent with the name of the cgroup (path
513 * relative to the root of cgroup file system) as the argument.
514 *
515 * A cgroup can only be deleted if both its 'count' of using tasks
516 * is zero, and its list of 'children' cgroups is empty. Since all
517 * tasks in the system use _some_ cgroup, and since there is always at
518 * least one task in the system (init, pid == 1), therefore, top_cgroup
519 * always has either children cgroups and/or using tasks. So we don't
520 * need a special hack to ensure that top_cgroup cannot be deleted.
521 *
522 * The task_lock() exception
523 *
524 * The need for this exception arises from the action of
525 * attach_task(), which overwrites one tasks cgroup pointer with
526 * another. It does so using cgroup_mutexe, however there are
527 * several performance critical places that need to reference
528 * task->cgroup without the expense of grabbing a system global
529 * mutex. Therefore except as noted below, when dereferencing or, as
530 * in attach_task(), modifying a task'ss cgroup pointer we use
531 * task_lock(), which acts on a spinlock (task->alloc_lock) already in
532 * the task_struct routinely used for such matters.
533 *
534 * P.S. One more locking exception. RCU is used to guard the
535 * update of a tasks cgroup pointer by attach_task()
536 */
537
538/**
539 * cgroup_lock - lock out any changes to cgroup structures
540 *
541 */
542
543void cgroup_lock(void)
544{
545 mutex_lock(&cgroup_mutex);
546}
547
548/**
549 * cgroup_unlock - release lock on cgroup changes
550 *
551 * Undo the lock taken in a previous cgroup_lock() call.
552 */
553
554void cgroup_unlock(void)
555{
556 mutex_unlock(&cgroup_mutex);
557}
558
559/*
560 * A couple of forward declarations required, due to cyclic reference loop:
561 * cgroup_mkdir -> cgroup_create -> cgroup_populate_dir ->
562 * cgroup_add_file -> cgroup_create_file -> cgroup_dir_inode_operations
563 * -> cgroup_mkdir.
564 */
565
566static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);
567static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
568static int cgroup_populate_dir(struct cgroup *cgrp);
569static struct inode_operations cgroup_dir_inode_operations;
570static struct file_operations proc_cgroupstats_operations;
571
572static struct backing_dev_info cgroup_backing_dev_info = {
573 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
574};
575
576static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
577{
578 struct inode *inode = new_inode(sb);
579
580 if (inode) {
581 inode->i_mode = mode;
582 inode->i_uid = current->fsuid;
583 inode->i_gid = current->fsgid;
584 inode->i_blocks = 0;
585 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
586 inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
587 }
588 return inode;
589}
590
591static void cgroup_diput(struct dentry *dentry, struct inode *inode)
592{
593 /* is dentry a directory ? if so, kfree() associated cgroup */
594 if (S_ISDIR(inode->i_mode)) {
595 struct cgroup *cgrp = dentry->d_fsdata;
596 BUG_ON(!(cgroup_is_removed(cgrp)));
597 /* It's possible for external users to be holding css
598 * reference counts on a cgroup; css_put() needs to
599 * be able to access the cgroup after decrementing
600 * the reference count in order to know if it needs to
601 * queue the cgroup to be handled by the release
602 * agent */
603 synchronize_rcu();
604 kfree(cgrp);
605 }
606 iput(inode);
607}
608
609static void remove_dir(struct dentry *d)
610{
611 struct dentry *parent = dget(d->d_parent);
612
613 d_delete(d);
614 simple_rmdir(parent->d_inode, d);
615 dput(parent);
616}
617
618static void cgroup_clear_directory(struct dentry *dentry)
619{
620 struct list_head *node;
621
622 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
623 spin_lock(&dcache_lock);
624 node = dentry->d_subdirs.next;
625 while (node != &dentry->d_subdirs) {
626 struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
627 list_del_init(node);
628 if (d->d_inode) {
629 /* This should never be called on a cgroup
630 * directory with child cgroups */
631 BUG_ON(d->d_inode->i_mode & S_IFDIR);
632 d = dget_locked(d);
633 spin_unlock(&dcache_lock);
634 d_delete(d);
635 simple_unlink(dentry->d_inode, d);
636 dput(d);
637 spin_lock(&dcache_lock);
638 }
639 node = dentry->d_subdirs.next;
640 }
641 spin_unlock(&dcache_lock);
642}
643
644/*
645 * NOTE : the dentry must have been dget()'ed
646 */
647static void cgroup_d_remove_dir(struct dentry *dentry)
648{
649 cgroup_clear_directory(dentry);
650
651 spin_lock(&dcache_lock);
652 list_del_init(&dentry->d_u.d_child);
653 spin_unlock(&dcache_lock);
654 remove_dir(dentry);
655}
656
657static int rebind_subsystems(struct cgroupfs_root *root,
658 unsigned long final_bits)
659{
660 unsigned long added_bits, removed_bits;
661 struct cgroup *cgrp = &root->top_cgroup;
662 int i;
663
664 removed_bits = root->actual_subsys_bits & ~final_bits;
665 added_bits = final_bits & ~root->actual_subsys_bits;
666 /* Check that any added subsystems are currently free */
667 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
668 unsigned long long bit = 1ull << i;
669 struct cgroup_subsys *ss = subsys[i];
670 if (!(bit & added_bits))
671 continue;
672 if (ss->root != &rootnode) {
673 /* Subsystem isn't free */
674 return -EBUSY;
675 }
676 }
677
678 /* Currently we don't handle adding/removing subsystems when
679 * any child cgroups exist. This is theoretically supportable
680 * but involves complex error handling, so it's being left until
681 * later */
682 if (!list_empty(&cgrp->children))
683 return -EBUSY;
684
685 /* Process each subsystem */
686 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
687 struct cgroup_subsys *ss = subsys[i];
688 unsigned long bit = 1UL << i;
689 if (bit & added_bits) {
690 /* We're binding this subsystem to this hierarchy */
691 BUG_ON(cgrp->subsys[i]);
692 BUG_ON(!dummytop->subsys[i]);
693 BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
694 cgrp->subsys[i] = dummytop->subsys[i];
695 cgrp->subsys[i]->cgroup = cgrp;
696 list_add(&ss->sibling, &root->subsys_list);
697 rcu_assign_pointer(ss->root, root);
698 if (ss->bind)
699 ss->bind(ss, cgrp);
700
701 } else if (bit & removed_bits) {
702 /* We're removing this subsystem */
703 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
704 BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
705 if (ss->bind)
706 ss->bind(ss, dummytop);
707 dummytop->subsys[i]->cgroup = dummytop;
708 cgrp->subsys[i] = NULL;
709 rcu_assign_pointer(subsys[i]->root, &rootnode);
710 list_del(&ss->sibling);
711 } else if (bit & final_bits) {
712 /* Subsystem state should already exist */
713 BUG_ON(!cgrp->subsys[i]);
714 } else {
715 /* Subsystem state shouldn't exist */
716 BUG_ON(cgrp->subsys[i]);
717 }
718 }
719 root->subsys_bits = root->actual_subsys_bits = final_bits;
720 synchronize_rcu();
721
722 return 0;
723}
724
725static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
726{
727 struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info;
728 struct cgroup_subsys *ss;
729
730 mutex_lock(&cgroup_mutex);
731 for_each_subsys(root, ss)
732 seq_printf(seq, ",%s", ss->name);
733 if (test_bit(ROOT_NOPREFIX, &root->flags))
734 seq_puts(seq, ",noprefix");
735 if (strlen(root->release_agent_path))
736 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
737 mutex_unlock(&cgroup_mutex);
738 return 0;
739}
740
741struct cgroup_sb_opts {
742 unsigned long subsys_bits;
743 unsigned long flags;
744 char *release_agent;
745};
746
747/* Convert a hierarchy specifier into a bitmask of subsystems and
748 * flags. */
749static int parse_cgroupfs_options(char *data,
750 struct cgroup_sb_opts *opts)
751{
752 char *token, *o = data ?: "all";
753
754 opts->subsys_bits = 0;
755 opts->flags = 0;
756 opts->release_agent = NULL;
757
758 while ((token = strsep(&o, ",")) != NULL) {
759 if (!*token)
760 return -EINVAL;
761 if (!strcmp(token, "all")) {
762 opts->subsys_bits = (1 << CGROUP_SUBSYS_COUNT) - 1;
763 } else if (!strcmp(token, "noprefix")) {
764 set_bit(ROOT_NOPREFIX, &opts->flags);
765 } else if (!strncmp(token, "release_agent=", 14)) {
766 /* Specifying two release agents is forbidden */
767 if (opts->release_agent)
768 return -EINVAL;
769 opts->release_agent = kzalloc(PATH_MAX, GFP_KERNEL);
770 if (!opts->release_agent)
771 return -ENOMEM;
772 strncpy(opts->release_agent, token + 14, PATH_MAX - 1);
773 opts->release_agent[PATH_MAX - 1] = 0;
774 } else {
775 struct cgroup_subsys *ss;
776 int i;
777 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
778 ss = subsys[i];
779 if (!strcmp(token, ss->name)) {
780 set_bit(i, &opts->subsys_bits);
781 break;
782 }
783 }
784 if (i == CGROUP_SUBSYS_COUNT)
785 return -ENOENT;
786 }
787 }
788
789 /* We can't have an empty hierarchy */
790 if (!opts->subsys_bits)
791 return -EINVAL;
792
793 return 0;
794}
795
796static int cgroup_remount(struct super_block *sb, int *flags, char *data)
797{
798 int ret = 0;
799 struct cgroupfs_root *root = sb->s_fs_info;
800 struct cgroup *cgrp = &root->top_cgroup;
801 struct cgroup_sb_opts opts;
802
803 mutex_lock(&cgrp->dentry->d_inode->i_mutex);
804 mutex_lock(&cgroup_mutex);
805
806 /* See what subsystems are wanted */
807 ret = parse_cgroupfs_options(data, &opts);
808 if (ret)
809 goto out_unlock;
810
811 /* Don't allow flags to change at remount */
812 if (opts.flags != root->flags) {
813 ret = -EINVAL;
814 goto out_unlock;
815 }
816
817 ret = rebind_subsystems(root, opts.subsys_bits);
818
819 /* (re)populate subsystem files */
820 if (!ret)
821 cgroup_populate_dir(cgrp);
822
823 if (opts.release_agent)
824 strcpy(root->release_agent_path, opts.release_agent);
825 out_unlock:
826 if (opts.release_agent)
827 kfree(opts.release_agent);
828 mutex_unlock(&cgroup_mutex);
829 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
830 return ret;
831}
832
833static struct super_operations cgroup_ops = {
834 .statfs = simple_statfs,
835 .drop_inode = generic_delete_inode,
836 .show_options = cgroup_show_options,
837 .remount_fs = cgroup_remount,
838};
839
840static void init_cgroup_root(struct cgroupfs_root *root)
841{
842 struct cgroup *cgrp = &root->top_cgroup;
843 INIT_LIST_HEAD(&root->subsys_list);
844 INIT_LIST_HEAD(&root->root_list);
845 root->number_of_cgroups = 1;
846 cgrp->root = root;
847 cgrp->top_cgroup = cgrp;
848 INIT_LIST_HEAD(&cgrp->sibling);
849 INIT_LIST_HEAD(&cgrp->children);
850 INIT_LIST_HEAD(&cgrp->css_sets);
851 INIT_LIST_HEAD(&cgrp->release_list);
852}
853
854static int cgroup_test_super(struct super_block *sb, void *data)
855{
856 struct cgroupfs_root *new = data;
857 struct cgroupfs_root *root = sb->s_fs_info;
858
859 /* First check subsystems */
860 if (new->subsys_bits != root->subsys_bits)
861 return 0;
862
863 /* Next check flags */
864 if (new->flags != root->flags)
865 return 0;
866
867 return 1;
868}
869
870static int cgroup_set_super(struct super_block *sb, void *data)
871{
872 int ret;
873 struct cgroupfs_root *root = data;
874
875 ret = set_anon_super(sb, NULL);
876 if (ret)
877 return ret;
878
879 sb->s_fs_info = root;
880 root->sb = sb;
881
882 sb->s_blocksize = PAGE_CACHE_SIZE;
883 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
884 sb->s_magic = CGROUP_SUPER_MAGIC;
885 sb->s_op = &cgroup_ops;
886
887 return 0;
888}
889
890static int cgroup_get_rootdir(struct super_block *sb)
891{
892 struct inode *inode =
893 cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
894 struct dentry *dentry;
895
896 if (!inode)
897 return -ENOMEM;
898
899 inode->i_op = &simple_dir_inode_operations;
900 inode->i_fop = &simple_dir_operations;
901 inode->i_op = &cgroup_dir_inode_operations;
902 /* directories start off with i_nlink == 2 (for "." entry) */
903 inc_nlink(inode);
904 dentry = d_alloc_root(inode);
905 if (!dentry) {
906 iput(inode);
907 return -ENOMEM;
908 }
909 sb->s_root = dentry;
910 return 0;
911}
912
913static int cgroup_get_sb(struct file_system_type *fs_type,
914 int flags, const char *unused_dev_name,
915 void *data, struct vfsmount *mnt)
916{
917 struct cgroup_sb_opts opts;
918 int ret = 0;
919 struct super_block *sb;
920 struct cgroupfs_root *root;
921 struct list_head tmp_cg_links, *l;
922 INIT_LIST_HEAD(&tmp_cg_links);
923
924 /* First find the desired set of subsystems */
925 ret = parse_cgroupfs_options(data, &opts);
926 if (ret) {
927 if (opts.release_agent)
928 kfree(opts.release_agent);
929 return ret;
930 }
931
932 root = kzalloc(sizeof(*root), GFP_KERNEL);
933 if (!root)
934 return -ENOMEM;
935
936 init_cgroup_root(root);
937 root->subsys_bits = opts.subsys_bits;
938 root->flags = opts.flags;
939 if (opts.release_agent) {
940 strcpy(root->release_agent_path, opts.release_agent);
941 kfree(opts.release_agent);
942 }
943
944 sb = sget(fs_type, cgroup_test_super, cgroup_set_super, root);
945
946 if (IS_ERR(sb)) {
947 kfree(root);
948 return PTR_ERR(sb);
949 }
950
951 if (sb->s_fs_info != root) {
952 /* Reusing an existing superblock */
953 BUG_ON(sb->s_root == NULL);
954 kfree(root);
955 root = NULL;
956 } else {
957 /* New superblock */
958 struct cgroup *cgrp = &root->top_cgroup;
959 struct inode *inode;
960
961 BUG_ON(sb->s_root != NULL);
962
963 ret = cgroup_get_rootdir(sb);
964 if (ret)
965 goto drop_new_super;
966 inode = sb->s_root->d_inode;
967
968 mutex_lock(&inode->i_mutex);
969 mutex_lock(&cgroup_mutex);
970
971 /*
972 * We're accessing css_set_count without locking
973 * css_set_lock here, but that's OK - it can only be
974 * increased by someone holding cgroup_lock, and
975 * that's us. The worst that can happen is that we
976 * have some link structures left over
977 */
978 ret = allocate_cg_links(css_set_count, &tmp_cg_links);
979 if (ret) {
980 mutex_unlock(&cgroup_mutex);
981 mutex_unlock(&inode->i_mutex);
982 goto drop_new_super;
983 }
984
985 ret = rebind_subsystems(root, root->subsys_bits);
986 if (ret == -EBUSY) {
987 mutex_unlock(&cgroup_mutex);
988 mutex_unlock(&inode->i_mutex);
989 goto drop_new_super;
990 }
991
992 /* EBUSY should be the only error here */
993 BUG_ON(ret);
994
995 list_add(&root->root_list, &roots);
996 root_count++;
997
998 sb->s_root->d_fsdata = &root->top_cgroup;
999 root->top_cgroup.dentry = sb->s_root;
1000
1001 /* Link the top cgroup in this hierarchy into all
1002 * the css_set objects */
1003 write_lock(&css_set_lock);
1004 l = &init_css_set.list;
1005 do {
1006 struct css_set *cg;
1007 struct cg_cgroup_link *link;
1008 cg = list_entry(l, struct css_set, list);
1009 BUG_ON(list_empty(&tmp_cg_links));
1010 link = list_entry(tmp_cg_links.next,
1011 struct cg_cgroup_link,
1012 cgrp_link_list);
1013 list_del(&link->cgrp_link_list);
1014 link->cg = cg;
1015 list_add(&link->cgrp_link_list,
1016 &root->top_cgroup.css_sets);
1017 list_add(&link->cg_link_list, &cg->cg_links);
1018 l = l->next;
1019 } while (l != &init_css_set.list);
1020 write_unlock(&css_set_lock);
1021
1022 free_cg_links(&tmp_cg_links);
1023
1024 BUG_ON(!list_empty(&cgrp->sibling));
1025 BUG_ON(!list_empty(&cgrp->children));
1026 BUG_ON(root->number_of_cgroups != 1);
1027
1028 cgroup_populate_dir(cgrp);
1029 mutex_unlock(&inode->i_mutex);
1030 mutex_unlock(&cgroup_mutex);
1031 }
1032
1033 return simple_set_mnt(mnt, sb);
1034
1035 drop_new_super:
1036 up_write(&sb->s_umount);
1037 deactivate_super(sb);
1038 free_cg_links(&tmp_cg_links);
1039 return ret;
1040}
1041
1042static void cgroup_kill_sb(struct super_block *sb) {
1043 struct cgroupfs_root *root = sb->s_fs_info;
1044 struct cgroup *cgrp = &root->top_cgroup;
1045 int ret;
1046
1047 BUG_ON(!root);
1048
1049 BUG_ON(root->number_of_cgroups != 1);
1050 BUG_ON(!list_empty(&cgrp->children));
1051 BUG_ON(!list_empty(&cgrp->sibling));
1052
1053 mutex_lock(&cgroup_mutex);
1054
1055 /* Rebind all subsystems back to the default hierarchy */
1056 ret = rebind_subsystems(root, 0);
1057 /* Shouldn't be able to fail ... */
1058 BUG_ON(ret);
1059
1060 /*
1061 * Release all the links from css_sets to this hierarchy's
1062 * root cgroup
1063 */
1064 write_lock(&css_set_lock);
1065 while (!list_empty(&cgrp->css_sets)) {
1066 struct cg_cgroup_link *link;
1067 link = list_entry(cgrp->css_sets.next,
1068 struct cg_cgroup_link, cgrp_link_list);
1069 list_del(&link->cg_link_list);
1070 list_del(&link->cgrp_link_list);
1071 kfree(link);
1072 }
1073 write_unlock(&css_set_lock);
1074
1075 if (!list_empty(&root->root_list)) {
1076 list_del(&root->root_list);
1077 root_count--;
1078 }
1079 mutex_unlock(&cgroup_mutex);
1080
1081 kfree(root);
1082 kill_litter_super(sb);
1083}
1084
1085static struct file_system_type cgroup_fs_type = {
1086 .name = "cgroup",
1087 .get_sb = cgroup_get_sb,
1088 .kill_sb = cgroup_kill_sb,
1089};
1090
1091static inline struct cgroup *__d_cgrp(struct dentry *dentry)
1092{
1093 return dentry->d_fsdata;
1094}
1095
1096static inline struct cftype *__d_cft(struct dentry *dentry)
1097{
1098 return dentry->d_fsdata;
1099}
1100
1101/*
1102 * Called with cgroup_mutex held. Writes path of cgroup into buf.
1103 * Returns 0 on success, -errno on error.
1104 */
1105int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1106{
1107 char *start;
1108
1109 if (cgrp == dummytop) {
1110 /*
1111 * Inactive subsystems have no dentry for their root
1112 * cgroup
1113 */
1114 strcpy(buf, "/");
1115 return 0;
1116 }
1117
1118 start = buf + buflen;
1119
1120 *--start = '\0';
1121 for (;;) {
1122 int len = cgrp->dentry->d_name.len;
1123 if ((start -= len) < buf)
1124 return -ENAMETOOLONG;
1125 memcpy(start, cgrp->dentry->d_name.name, len);
1126 cgrp = cgrp->parent;
1127 if (!cgrp)
1128 break;
1129 if (!cgrp->parent)
1130 continue;
1131 if (--start < buf)
1132 return -ENAMETOOLONG;
1133 *start = '/';
1134 }
1135 memmove(buf, start, buf + buflen - start);
1136 return 0;
1137}
1138
1139/*
1140 * Return the first subsystem attached to a cgroup's hierarchy, and
1141 * its subsystem id.
1142 */
1143
1144static void get_first_subsys(const struct cgroup *cgrp,
1145 struct cgroup_subsys_state **css, int *subsys_id)
1146{
1147 const struct cgroupfs_root *root = cgrp->root;
1148 const struct cgroup_subsys *test_ss;
1149 BUG_ON(list_empty(&root->subsys_list));
1150 test_ss = list_entry(root->subsys_list.next,
1151 struct cgroup_subsys, sibling);
1152 if (css) {
1153 *css = cgrp->subsys[test_ss->subsys_id];
1154 BUG_ON(!*css);
1155 }
1156 if (subsys_id)
1157 *subsys_id = test_ss->subsys_id;
1158}
1159
1160/*
1161 * Attach task 'tsk' to cgroup 'cgrp'
1162 *
1163 * Call holding cgroup_mutex. May take task_lock of
1164 * the task 'pid' during call.
1165 */
1166static int attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1167{
1168 int retval = 0;
1169 struct cgroup_subsys *ss;
1170 struct cgroup *oldcgrp;
1171 struct css_set *cg = tsk->cgroups;
1172 struct css_set *newcg;
1173 struct cgroupfs_root *root = cgrp->root;
1174 int subsys_id;
1175
1176 get_first_subsys(cgrp, NULL, &subsys_id);
1177
1178 /* Nothing to do if the task is already in that cgroup */
1179 oldcgrp = task_cgroup(tsk, subsys_id);
1180 if (cgrp == oldcgrp)
1181 return 0;
1182
1183 for_each_subsys(root, ss) {
1184 if (ss->can_attach) {
1185 retval = ss->can_attach(ss, cgrp, tsk);
1186 if (retval) {
1187 return retval;
1188 }
1189 }
1190 }
1191
1192 /*
1193 * Locate or allocate a new css_set for this task,
1194 * based on its final set of cgroups
1195 */
1196 newcg = find_css_set(cg, cgrp);
1197 if (!newcg) {
1198 return -ENOMEM;
1199 }
1200
1201 task_lock(tsk);
1202 if (tsk->flags & PF_EXITING) {
1203 task_unlock(tsk);
1204 put_css_set(newcg);
1205 return -ESRCH;
1206 }
1207 rcu_assign_pointer(tsk->cgroups, newcg);
1208 task_unlock(tsk);
1209
1210 /* Update the css_set linked lists if we're using them */
1211 write_lock(&css_set_lock);
1212 if (!list_empty(&tsk->cg_list)) {
1213 list_del(&tsk->cg_list);
1214 list_add(&tsk->cg_list, &newcg->tasks);
1215 }
1216 write_unlock(&css_set_lock);
1217
1218 for_each_subsys(root, ss) {
1219 if (ss->attach) {
1220 ss->attach(ss, cgrp, oldcgrp, tsk);
1221 }
1222 }
1223 set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
1224 synchronize_rcu();
1225 put_css_set(cg);
1226 return 0;
1227}
1228
1229/*
1230 * Attach task with pid 'pid' to cgroup 'cgrp'. Call with
1231 * cgroup_mutex, may take task_lock of task
1232 */
1233static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf)
1234{
1235 pid_t pid;
1236 struct task_struct *tsk;
1237 int ret;
1238
1239 if (sscanf(pidbuf, "%d", &pid) != 1)
1240 return -EIO;
1241
1242 if (pid) {
1243 rcu_read_lock();
1244 tsk = find_task_by_pid(pid);
1245 if (!tsk || tsk->flags & PF_EXITING) {
1246 rcu_read_unlock();
1247 return -ESRCH;
1248 }
1249 get_task_struct(tsk);
1250 rcu_read_unlock();
1251
1252 if ((current->euid) && (current->euid != tsk->uid)
1253 && (current->euid != tsk->suid)) {
1254 put_task_struct(tsk);
1255 return -EACCES;
1256 }
1257 } else {
1258 tsk = current;
1259 get_task_struct(tsk);
1260 }
1261
1262 ret = attach_task(cgrp, tsk);
1263 put_task_struct(tsk);
1264 return ret;
1265}
1266
1267/* The various types of files and directories in a cgroup file system */
1268
1269enum cgroup_filetype {
1270 FILE_ROOT,
1271 FILE_DIR,
1272 FILE_TASKLIST,
1273 FILE_NOTIFY_ON_RELEASE,
1274 FILE_RELEASABLE,
1275 FILE_RELEASE_AGENT,
1276};
1277
1278static ssize_t cgroup_write_uint(struct cgroup *cgrp, struct cftype *cft,
1279 struct file *file,
1280 const char __user *userbuf,
1281 size_t nbytes, loff_t *unused_ppos)
1282{
1283 char buffer[64];
1284 int retval = 0;
1285 u64 val;
1286 char *end;
1287
1288 if (!nbytes)
1289 return -EINVAL;
1290 if (nbytes >= sizeof(buffer))
1291 return -E2BIG;
1292 if (copy_from_user(buffer, userbuf, nbytes))
1293 return -EFAULT;
1294
1295 buffer[nbytes] = 0; /* nul-terminate */
1296
1297 /* strip newline if necessary */
1298 if (nbytes && (buffer[nbytes-1] == '\n'))
1299 buffer[nbytes-1] = 0;
1300 val = simple_strtoull(buffer, &end, 0);
1301 if (*end)
1302 return -EINVAL;
1303
1304 /* Pass to subsystem */
1305 retval = cft->write_uint(cgrp, cft, val);
1306 if (!retval)
1307 retval = nbytes;
1308 return retval;
1309}
1310
1311static ssize_t cgroup_common_file_write(struct cgroup *cgrp,
1312 struct cftype *cft,
1313 struct file *file,
1314 const char __user *userbuf,
1315 size_t nbytes, loff_t *unused_ppos)
1316{
1317 enum cgroup_filetype type = cft->private;
1318 char *buffer;
1319 int retval = 0;
1320
1321 if (nbytes >= PATH_MAX)
1322 return -E2BIG;
1323
1324 /* +1 for nul-terminator */
1325 buffer = kmalloc(nbytes + 1, GFP_KERNEL);
1326 if (buffer == NULL)
1327 return -ENOMEM;
1328
1329 if (copy_from_user(buffer, userbuf, nbytes)) {
1330 retval = -EFAULT;
1331 goto out1;
1332 }
1333 buffer[nbytes] = 0; /* nul-terminate */
1334
1335 mutex_lock(&cgroup_mutex);
1336
1337 if (cgroup_is_removed(cgrp)) {
1338 retval = -ENODEV;
1339 goto out2;
1340 }
1341
1342 switch (type) {
1343 case FILE_TASKLIST:
1344 retval = attach_task_by_pid(cgrp, buffer);
1345 break;
1346 case FILE_NOTIFY_ON_RELEASE:
1347 clear_bit(CGRP_RELEASABLE, &cgrp->flags);
1348 if (simple_strtoul(buffer, NULL, 10) != 0)
1349 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
1350 else
1351 clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
1352 break;
1353 case FILE_RELEASE_AGENT:
1354 {
1355 struct cgroupfs_root *root = cgrp->root;
1356 /* Strip trailing newline */
1357 if (nbytes && (buffer[nbytes-1] == '\n')) {
1358 buffer[nbytes-1] = 0;
1359 }
1360 if (nbytes < sizeof(root->release_agent_path)) {
1361 /* We never write anything other than '\0'
1362 * into the last char of release_agent_path,
1363 * so it always remains a NUL-terminated
1364 * string */
1365 strncpy(root->release_agent_path, buffer, nbytes);
1366 root->release_agent_path[nbytes] = 0;
1367 } else {
1368 retval = -ENOSPC;
1369 }
1370 break;
1371 }
1372 default:
1373 retval = -EINVAL;
1374 goto out2;
1375 }
1376
1377 if (retval == 0)
1378 retval = nbytes;
1379out2:
1380 mutex_unlock(&cgroup_mutex);
1381out1:
1382 kfree(buffer);
1383 return retval;
1384}
1385
1386static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
1387 size_t nbytes, loff_t *ppos)
1388{
1389 struct cftype *cft = __d_cft(file->f_dentry);
1390 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
1391
1392 if (!cft)
1393 return -ENODEV;
1394 if (cft->write)
1395 return cft->write(cgrp, cft, file, buf, nbytes, ppos);
1396 if (cft->write_uint)
1397 return cgroup_write_uint(cgrp, cft, file, buf, nbytes, ppos);
1398 return -EINVAL;
1399}
1400
1401static ssize_t cgroup_read_uint(struct cgroup *cgrp, struct cftype *cft,
1402 struct file *file,
1403 char __user *buf, size_t nbytes,
1404 loff_t *ppos)
1405{
1406 char tmp[64];
1407 u64 val = cft->read_uint(cgrp, cft);
1408 int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
1409
1410 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
1411}
1412
1413static ssize_t cgroup_common_file_read(struct cgroup *cgrp,
1414 struct cftype *cft,
1415 struct file *file,
1416 char __user *buf,
1417 size_t nbytes, loff_t *ppos)
1418{
1419 enum cgroup_filetype type = cft->private;
1420 char *page;
1421 ssize_t retval = 0;
1422 char *s;
1423
1424 if (!(page = (char *)__get_free_page(GFP_KERNEL)))
1425 return -ENOMEM;
1426
1427 s = page;
1428
1429 switch (type) {
1430 case FILE_RELEASE_AGENT:
1431 {
1432 struct cgroupfs_root *root;
1433 size_t n;
1434 mutex_lock(&cgroup_mutex);
1435 root = cgrp->root;
1436 n = strnlen(root->release_agent_path,
1437 sizeof(root->release_agent_path));
1438 n = min(n, (size_t) PAGE_SIZE);
1439 strncpy(s, root->release_agent_path, n);
1440 mutex_unlock(&cgroup_mutex);
1441 s += n;
1442 break;
1443 }
1444 default:
1445 retval = -EINVAL;
1446 goto out;
1447 }
1448 *s++ = '\n';
1449
1450 retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page);
1451out:
1452 free_page((unsigned long)page);
1453 return retval;
1454}
1455
1456static ssize_t cgroup_file_read(struct file *file, char __user *buf,
1457 size_t nbytes, loff_t *ppos)
1458{
1459 struct cftype *cft = __d_cft(file->f_dentry);
1460 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
1461
1462 if (!cft)
1463 return -ENODEV;
1464
1465 if (cft->read)
1466 return cft->read(cgrp, cft, file, buf, nbytes, ppos);
1467 if (cft->read_uint)
1468 return cgroup_read_uint(cgrp, cft, file, buf, nbytes, ppos);
1469 return -EINVAL;
1470}
1471
1472static int cgroup_file_open(struct inode *inode, struct file *file)
1473{
1474 int err;
1475 struct cftype *cft;
1476
1477 err = generic_file_open(inode, file);
1478 if (err)
1479 return err;
1480
1481 cft = __d_cft(file->f_dentry);
1482 if (!cft)
1483 return -ENODEV;
1484 if (cft->open)
1485 err = cft->open(inode, file);
1486 else
1487 err = 0;
1488
1489 return err;
1490}
1491
1492static int cgroup_file_release(struct inode *inode, struct file *file)
1493{
1494 struct cftype *cft = __d_cft(file->f_dentry);
1495 if (cft->release)
1496 return cft->release(inode, file);
1497 return 0;
1498}
1499
1500/*
1501 * cgroup_rename - Only allow simple rename of directories in place.
1502 */
1503static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
1504 struct inode *new_dir, struct dentry *new_dentry)
1505{
1506 if (!S_ISDIR(old_dentry->d_inode->i_mode))
1507 return -ENOTDIR;
1508 if (new_dentry->d_inode)
1509 return -EEXIST;
1510 if (old_dir != new_dir)
1511 return -EIO;
1512 return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
1513}
1514
1515static struct file_operations cgroup_file_operations = {
1516 .read = cgroup_file_read,
1517 .write = cgroup_file_write,
1518 .llseek = generic_file_llseek,
1519 .open = cgroup_file_open,
1520 .release = cgroup_file_release,
1521};
1522
1523static struct inode_operations cgroup_dir_inode_operations = {
1524 .lookup = simple_lookup,
1525 .mkdir = cgroup_mkdir,
1526 .rmdir = cgroup_rmdir,
1527 .rename = cgroup_rename,
1528};
1529
1530static int cgroup_create_file(struct dentry *dentry, int mode,
1531 struct super_block *sb)
1532{
1533 static struct dentry_operations cgroup_dops = {
1534 .d_iput = cgroup_diput,
1535 };
1536
1537 struct inode *inode;
1538
1539 if (!dentry)
1540 return -ENOENT;
1541 if (dentry->d_inode)
1542 return -EEXIST;
1543
1544 inode = cgroup_new_inode(mode, sb);
1545 if (!inode)
1546 return -ENOMEM;
1547
1548 if (S_ISDIR(mode)) {
1549 inode->i_op = &cgroup_dir_inode_operations;
1550 inode->i_fop = &simple_dir_operations;
1551
1552 /* start off with i_nlink == 2 (for "." entry) */
1553 inc_nlink(inode);
1554
1555 /* start with the directory inode held, so that we can
1556 * populate it without racing with another mkdir */
1557 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
1558 } else if (S_ISREG(mode)) {
1559 inode->i_size = 0;
1560 inode->i_fop = &cgroup_file_operations;
1561 }
1562 dentry->d_op = &cgroup_dops;
1563 d_instantiate(dentry, inode);
1564 dget(dentry); /* Extra count - pin the dentry in core */
1565 return 0;
1566}
1567
1568/*
1569 * cgroup_create_dir - create a directory for an object.
1570 * cgrp: the cgroup we create the directory for.
1571 * It must have a valid ->parent field
1572 * And we are going to fill its ->dentry field.
1573 * dentry: dentry of the new cgroup
1574 * mode: mode to set on new directory.
1575 */
1576static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
1577 int mode)
1578{
1579 struct dentry *parent;
1580 int error = 0;
1581
1582 parent = cgrp->parent->dentry;
1583 error = cgroup_create_file(dentry, S_IFDIR | mode, cgrp->root->sb);
1584 if (!error) {
1585 dentry->d_fsdata = cgrp;
1586 inc_nlink(parent->d_inode);
1587 cgrp->dentry = dentry;
1588 dget(dentry);
1589 }
1590 dput(dentry);
1591
1592 return error;
1593}
1594
1595int cgroup_add_file(struct cgroup *cgrp,
1596 struct cgroup_subsys *subsys,
1597 const struct cftype *cft)
1598{
1599 struct dentry *dir = cgrp->dentry;
1600 struct dentry *dentry;
1601 int error;
1602
1603 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
1604 if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
1605 strcpy(name, subsys->name);
1606 strcat(name, ".");
1607 }
1608 strcat(name, cft->name);
1609 BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
1610 dentry = lookup_one_len(name, dir, strlen(name));
1611 if (!IS_ERR(dentry)) {
1612 error = cgroup_create_file(dentry, 0644 | S_IFREG,
1613 cgrp->root->sb);
1614 if (!error)
1615 dentry->d_fsdata = (void *)cft;
1616 dput(dentry);
1617 } else
1618 error = PTR_ERR(dentry);
1619 return error;
1620}
1621
1622int cgroup_add_files(struct cgroup *cgrp,
1623 struct cgroup_subsys *subsys,
1624 const struct cftype cft[],
1625 int count)
1626{
1627 int i, err;
1628 for (i = 0; i < count; i++) {
1629 err = cgroup_add_file(cgrp, subsys, &cft[i]);
1630 if (err)
1631 return err;
1632 }
1633 return 0;
1634}
1635
1636/* Count the number of tasks in a cgroup. */
1637
1638int cgroup_task_count(const struct cgroup *cgrp)
1639{
1640 int count = 0;
1641 struct list_head *l;
1642
1643 read_lock(&css_set_lock);
1644 l = cgrp->css_sets.next;
1645 while (l != &cgrp->css_sets) {
1646 struct cg_cgroup_link *link =
1647 list_entry(l, struct cg_cgroup_link, cgrp_link_list);
1648 count += atomic_read(&link->cg->ref.refcount);
1649 l = l->next;
1650 }
1651 read_unlock(&css_set_lock);
1652 return count;
1653}
1654
1655/*
1656 * Advance a list_head iterator. The iterator should be positioned at
1657 * the start of a css_set
1658 */
1659static void cgroup_advance_iter(struct cgroup *cgrp,
1660 struct cgroup_iter *it)
1661{
1662 struct list_head *l = it->cg_link;
1663 struct cg_cgroup_link *link;
1664 struct css_set *cg;
1665
1666 /* Advance to the next non-empty css_set */
1667 do {
1668 l = l->next;
1669 if (l == &cgrp->css_sets) {
1670 it->cg_link = NULL;
1671 return;
1672 }
1673 link = list_entry(l, struct cg_cgroup_link, cgrp_link_list);
1674 cg = link->cg;
1675 } while (list_empty(&cg->tasks));
1676 it->cg_link = l;
1677 it->task = cg->tasks.next;
1678}
1679
1680void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
1681{
1682 /*
1683 * The first time anyone tries to iterate across a cgroup,
1684 * we need to enable the list linking each css_set to its
1685 * tasks, and fix up all existing tasks.
1686 */
1687 if (!use_task_css_set_links) {
1688 struct task_struct *p, *g;
1689 write_lock(&css_set_lock);
1690 use_task_css_set_links = 1;
1691 do_each_thread(g, p) {
1692 task_lock(p);
1693 if (list_empty(&p->cg_list))
1694 list_add(&p->cg_list, &p->cgroups->tasks);
1695 task_unlock(p);
1696 } while_each_thread(g, p);
1697 write_unlock(&css_set_lock);
1698 }
1699 read_lock(&css_set_lock);
1700 it->cg_link = &cgrp->css_sets;
1701 cgroup_advance_iter(cgrp, it);
1702}
1703
1704struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
1705 struct cgroup_iter *it)
1706{
1707 struct task_struct *res;
1708 struct list_head *l = it->task;
1709
1710 /* If the iterator cg is NULL, we have no tasks */
1711 if (!it->cg_link)
1712 return NULL;
1713 res = list_entry(l, struct task_struct, cg_list);
1714 /* Advance iterator to find next entry */
1715 l = l->next;
1716 if (l == &res->cgroups->tasks) {
1717 /* We reached the end of this task list - move on to
1718 * the next cg_cgroup_link */
1719 cgroup_advance_iter(cgrp, it);
1720 } else {
1721 it->task = l;
1722 }
1723 return res;
1724}
1725
1726void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it)
1727{
1728 read_unlock(&css_set_lock);
1729}
1730
1731/*
1732 * Stuff for reading the 'tasks' file.
1733 *
1734 * Reading this file can return large amounts of data if a cgroup has
1735 * *lots* of attached tasks. So it may need several calls to read(),
1736 * but we cannot guarantee that the information we produce is correct
1737 * unless we produce it entirely atomically.
1738 *
1739 * Upon tasks file open(), a struct ctr_struct is allocated, that
1740 * will have a pointer to an array (also allocated here). The struct
1741 * ctr_struct * is stored in file->private_data. Its resources will
1742 * be freed by release() when the file is closed. The array is used
1743 * to sprintf the PIDs and then used by read().
1744 */
1745struct ctr_struct {
1746 char *buf;
1747 int bufsz;
1748};
1749
1750/*
1751 * Load into 'pidarray' up to 'npids' of the tasks using cgroup
1752 * 'cgrp'. Return actual number of pids loaded. No need to
1753 * task_lock(p) when reading out p->cgroup, since we're in an RCU
1754 * read section, so the css_set can't go away, and is
1755 * immutable after creation.
1756 */
1757static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp)
1758{
1759 int n = 0;
1760 struct cgroup_iter it;
1761 struct task_struct *tsk;
1762 cgroup_iter_start(cgrp, &it);
1763 while ((tsk = cgroup_iter_next(cgrp, &it))) {
1764 if (unlikely(n == npids))
1765 break;
1766 pidarray[n++] = task_pid_nr(tsk);
1767 }
1768 cgroup_iter_end(cgrp, &it);
1769 return n;
1770}
1771
1772/**
1773 * Build and fill cgroupstats so that taskstats can export it to user
1774 * space.
1775 *
1776 * @stats: cgroupstats to fill information into
1777 * @dentry: A dentry entry belonging to the cgroup for which stats have
1778 * been requested.
1779 */
1780int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
1781{
1782 int ret = -EINVAL;
1783 struct cgroup *cgrp;
1784 struct cgroup_iter it;
1785 struct task_struct *tsk;
1786 /*
1787 * Validate dentry by checking the superblock operations
1788 */
1789 if (dentry->d_sb->s_op != &cgroup_ops)
1790 goto err;
1791
1792 ret = 0;
1793 cgrp = dentry->d_fsdata;
1794 rcu_read_lock();
1795
1796 cgroup_iter_start(cgrp, &it);
1797 while ((tsk = cgroup_iter_next(cgrp, &it))) {
1798 switch (tsk->state) {
1799 case TASK_RUNNING:
1800 stats->nr_running++;
1801 break;
1802 case TASK_INTERRUPTIBLE:
1803 stats->nr_sleeping++;
1804 break;
1805 case TASK_UNINTERRUPTIBLE:
1806 stats->nr_uninterruptible++;
1807 break;
1808 case TASK_STOPPED:
1809 stats->nr_stopped++;
1810 break;
1811 default:
1812 if (delayacct_is_task_waiting_on_io(tsk))
1813 stats->nr_io_wait++;
1814 break;
1815 }
1816 }
1817 cgroup_iter_end(cgrp, &it);
1818
1819 rcu_read_unlock();
1820err:
1821 return ret;
1822}
1823
1824static int cmppid(const void *a, const void *b)
1825{
1826 return *(pid_t *)a - *(pid_t *)b;
1827}
1828
1829/*
1830 * Convert array 'a' of 'npids' pid_t's to a string of newline separated
1831 * decimal pids in 'buf'. Don't write more than 'sz' chars, but return
1832 * count 'cnt' of how many chars would be written if buf were large enough.
1833 */
1834static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids)
1835{
1836 int cnt = 0;
1837 int i;
1838
1839 for (i = 0; i < npids; i++)
1840 cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]);
1841 return cnt;
1842}
1843
1844/*
1845 * Handle an open on 'tasks' file. Prepare a buffer listing the
1846 * process id's of tasks currently attached to the cgroup being opened.
1847 *
1848 * Does not require any specific cgroup mutexes, and does not take any.
1849 */
1850static int cgroup_tasks_open(struct inode *unused, struct file *file)
1851{
1852 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
1853 struct ctr_struct *ctr;
1854 pid_t *pidarray;
1855 int npids;
1856 char c;
1857
1858 if (!(file->f_mode & FMODE_READ))
1859 return 0;
1860
1861 ctr = kmalloc(sizeof(*ctr), GFP_KERNEL);
1862 if (!ctr)
1863 goto err0;
1864
1865 /*
1866 * If cgroup gets more users after we read count, we won't have
1867 * enough space - tough. This race is indistinguishable to the
1868 * caller from the case that the additional cgroup users didn't
1869 * show up until sometime later on.
1870 */
1871 npids = cgroup_task_count(cgrp);
1872 if (npids) {
1873 pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
1874 if (!pidarray)
1875 goto err1;
1876
1877 npids = pid_array_load(pidarray, npids, cgrp);
1878 sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
1879
1880 /* Call pid_array_to_buf() twice, first just to get bufsz */
1881 ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1;
1882 ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL);
1883 if (!ctr->buf)
1884 goto err2;
1885 ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids);
1886
1887 kfree(pidarray);
1888 } else {
1889 ctr->buf = 0;
1890 ctr->bufsz = 0;
1891 }
1892 file->private_data = ctr;
1893 return 0;
1894
1895err2:
1896 kfree(pidarray);
1897err1:
1898 kfree(ctr);
1899err0:
1900 return -ENOMEM;
1901}
1902
1903static ssize_t cgroup_tasks_read(struct cgroup *cgrp,
1904 struct cftype *cft,
1905 struct file *file, char __user *buf,
1906 size_t nbytes, loff_t *ppos)
1907{
1908 struct ctr_struct *ctr = file->private_data;
1909
1910 return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz);
1911}
1912
1913static int cgroup_tasks_release(struct inode *unused_inode,
1914 struct file *file)
1915{
1916 struct ctr_struct *ctr;
1917
1918 if (file->f_mode & FMODE_READ) {
1919 ctr = file->private_data;
1920 kfree(ctr->buf);
1921 kfree(ctr);
1922 }
1923 return 0;
1924}
1925
1926static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
1927 struct cftype *cft)
1928{
1929 return notify_on_release(cgrp);
1930}
1931
1932static u64 cgroup_read_releasable(struct cgroup *cgrp, struct cftype *cft)
1933{
1934 return test_bit(CGRP_RELEASABLE, &cgrp->flags);
1935}
1936
1937/*
1938 * for the common functions, 'private' gives the type of file
1939 */
1940static struct cftype files[] = {
1941 {
1942 .name = "tasks",
1943 .open = cgroup_tasks_open,
1944 .read = cgroup_tasks_read,
1945 .write = cgroup_common_file_write,
1946 .release = cgroup_tasks_release,
1947 .private = FILE_TASKLIST,
1948 },
1949
1950 {
1951 .name = "notify_on_release",
1952 .read_uint = cgroup_read_notify_on_release,
1953 .write = cgroup_common_file_write,
1954 .private = FILE_NOTIFY_ON_RELEASE,
1955 },
1956
1957 {
1958 .name = "releasable",
1959 .read_uint = cgroup_read_releasable,
1960 .private = FILE_RELEASABLE,
1961 }
1962};
1963
1964static struct cftype cft_release_agent = {
1965 .name = "release_agent",
1966 .read = cgroup_common_file_read,
1967 .write = cgroup_common_file_write,
1968 .private = FILE_RELEASE_AGENT,
1969};
1970
1971static int cgroup_populate_dir(struct cgroup *cgrp)
1972{
1973 int err;
1974 struct cgroup_subsys *ss;
1975
1976 /* First clear out any existing files */
1977 cgroup_clear_directory(cgrp->dentry);
1978
1979 err = cgroup_add_files(cgrp, NULL, files, ARRAY_SIZE(files));
1980 if (err < 0)
1981 return err;
1982
1983 if (cgrp == cgrp->top_cgroup) {
1984 if ((err = cgroup_add_file(cgrp, NULL, &cft_release_agent)) < 0)
1985 return err;
1986 }
1987
1988 for_each_subsys(cgrp->root, ss) {
1989 if (ss->populate && (err = ss->populate(ss, cgrp)) < 0)
1990 return err;
1991 }
1992
1993 return 0;
1994}
1995
1996static void init_cgroup_css(struct cgroup_subsys_state *css,
1997 struct cgroup_subsys *ss,
1998 struct cgroup *cgrp)
1999{
2000 css->cgroup = cgrp;
2001 atomic_set(&css->refcnt, 0);
2002 css->flags = 0;
2003 if (cgrp == dummytop)
2004 set_bit(CSS_ROOT, &css->flags);
2005 BUG_ON(cgrp->subsys[ss->subsys_id]);
2006 cgrp->subsys[ss->subsys_id] = css;
2007}
2008
2009/*
2010 * cgroup_create - create a cgroup
2011 * parent: cgroup that will be parent of the new cgroup.
2012 * name: name of the new cgroup. Will be strcpy'ed.
2013 * mode: mode to set on new inode
2014 *
2015 * Must be called with the mutex on the parent inode held
2016 */
2017
2018static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
2019 int mode)
2020{
2021 struct cgroup *cgrp;
2022 struct cgroupfs_root *root = parent->root;
2023 int err = 0;
2024 struct cgroup_subsys *ss;
2025 struct super_block *sb = root->sb;
2026
2027 cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
2028 if (!cgrp)
2029 return -ENOMEM;
2030
2031 /* Grab a reference on the superblock so the hierarchy doesn't
2032 * get deleted on unmount if there are child cgroups. This
2033 * can be done outside cgroup_mutex, since the sb can't
2034 * disappear while someone has an open control file on the
2035 * fs */
2036 atomic_inc(&sb->s_active);
2037
2038 mutex_lock(&cgroup_mutex);
2039
2040 cgrp->flags = 0;
2041 INIT_LIST_HEAD(&cgrp->sibling);
2042 INIT_LIST_HEAD(&cgrp->children);
2043 INIT_LIST_HEAD(&cgrp->css_sets);
2044 INIT_LIST_HEAD(&cgrp->release_list);
2045
2046 cgrp->parent = parent;
2047 cgrp->root = parent->root;
2048 cgrp->top_cgroup = parent->top_cgroup;
2049
2050 for_each_subsys(root, ss) {
2051 struct cgroup_subsys_state *css = ss->create(ss, cgrp);
2052 if (IS_ERR(css)) {
2053 err = PTR_ERR(css);
2054 goto err_destroy;
2055 }
2056 init_cgroup_css(css, ss, cgrp);
2057 }
2058
2059 list_add(&cgrp->sibling, &cgrp->parent->children);
2060 root->number_of_cgroups++;
2061
2062 err = cgroup_create_dir(cgrp, dentry, mode);
2063 if (err < 0)
2064 goto err_remove;
2065
2066 /* The cgroup directory was pre-locked for us */
2067 BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
2068
2069 err = cgroup_populate_dir(cgrp);
2070 /* If err < 0, we have a half-filled directory - oh well ;) */
2071
2072 mutex_unlock(&cgroup_mutex);
2073 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
2074
2075 return 0;
2076
2077 err_remove:
2078
2079 list_del(&cgrp->sibling);
2080 root->number_of_cgroups--;
2081
2082 err_destroy:
2083
2084 for_each_subsys(root, ss) {
2085 if (cgrp->subsys[ss->subsys_id])
2086 ss->destroy(ss, cgrp);
2087 }
2088
2089 mutex_unlock(&cgroup_mutex);
2090
2091 /* Release the reference count that we took on the superblock */
2092 deactivate_super(sb);
2093
2094 kfree(cgrp);
2095 return err;
2096}
2097
2098static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode)
2099{
2100 struct cgroup *c_parent = dentry->d_parent->d_fsdata;
2101
2102 /* the vfs holds inode->i_mutex already */
2103 return cgroup_create(c_parent, dentry, mode | S_IFDIR);
2104}
2105
2106static inline int cgroup_has_css_refs(struct cgroup *cgrp)
2107{
2108 /* Check the reference count on each subsystem. Since we
2109 * already established that there are no tasks in the
2110 * cgroup, if the css refcount is also 0, then there should
2111 * be no outstanding references, so the subsystem is safe to
2112 * destroy. We scan across all subsystems rather than using
2113 * the per-hierarchy linked list of mounted subsystems since
2114 * we can be called via check_for_release() with no
2115 * synchronization other than RCU, and the subsystem linked
2116 * list isn't RCU-safe */
2117 int i;
2118 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2119 struct cgroup_subsys *ss = subsys[i];
2120 struct cgroup_subsys_state *css;
2121 /* Skip subsystems not in this hierarchy */
2122 if (ss->root != cgrp->root)
2123 continue;
2124 css = cgrp->subsys[ss->subsys_id];
2125 /* When called from check_for_release() it's possible
2126 * that by this point the cgroup has been removed
2127 * and the css deleted. But a false-positive doesn't
2128 * matter, since it can only happen if the cgroup
2129 * has been deleted and hence no longer needs the
2130 * release agent to be called anyway. */
2131 if (css && atomic_read(&css->refcnt)) {
2132 return 1;
2133 }
2134 }
2135 return 0;
2136}
2137
2138static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
2139{
2140 struct cgroup *cgrp = dentry->d_fsdata;
2141 struct dentry *d;
2142 struct cgroup *parent;
2143 struct cgroup_subsys *ss;
2144 struct super_block *sb;
2145 struct cgroupfs_root *root;
2146
2147 /* the vfs holds both inode->i_mutex already */
2148
2149 mutex_lock(&cgroup_mutex);
2150 if (atomic_read(&cgrp->count) != 0) {
2151 mutex_unlock(&cgroup_mutex);
2152 return -EBUSY;
2153 }
2154 if (!list_empty(&cgrp->children)) {
2155 mutex_unlock(&cgroup_mutex);
2156 return -EBUSY;
2157 }
2158
2159 parent = cgrp->parent;
2160 root = cgrp->root;
2161 sb = root->sb;
2162
2163 if (cgroup_has_css_refs(cgrp)) {
2164 mutex_unlock(&cgroup_mutex);
2165 return -EBUSY;
2166 }
2167
2168 for_each_subsys(root, ss) {
2169 if (cgrp->subsys[ss->subsys_id])
2170 ss->destroy(ss, cgrp);
2171 }
2172
2173 spin_lock(&release_list_lock);
2174 set_bit(CGRP_REMOVED, &cgrp->flags);
2175 if (!list_empty(&cgrp->release_list))
2176 list_del(&cgrp->release_list);
2177 spin_unlock(&release_list_lock);
2178 /* delete my sibling from parent->children */
2179 list_del(&cgrp->sibling);
2180 spin_lock(&cgrp->dentry->d_lock);
2181 d = dget(cgrp->dentry);
2182 cgrp->dentry = NULL;
2183 spin_unlock(&d->d_lock);
2184
2185 cgroup_d_remove_dir(d);
2186 dput(d);
2187 root->number_of_cgroups--;
2188
2189 set_bit(CGRP_RELEASABLE, &parent->flags);
2190 check_for_release(parent);
2191
2192 mutex_unlock(&cgroup_mutex);
2193 /* Drop the active superblock reference that we took when we
2194 * created the cgroup */
2195 deactivate_super(sb);
2196 return 0;
2197}
2198
2199static void cgroup_init_subsys(struct cgroup_subsys *ss)
2200{
2201 struct cgroup_subsys_state *css;
2202 struct list_head *l;
2203 printk(KERN_ERR "Initializing cgroup subsys %s\n", ss->name);
2204
2205 /* Create the top cgroup state for this subsystem */
2206 ss->root = &rootnode;
2207 css = ss->create(ss, dummytop);
2208 /* We don't handle early failures gracefully */
2209 BUG_ON(IS_ERR(css));
2210 init_cgroup_css(css, ss, dummytop);
2211
2212 /* Update all cgroup groups to contain a subsys
2213 * pointer to this state - since the subsystem is
2214 * newly registered, all tasks and hence all cgroup
2215 * groups are in the subsystem's top cgroup. */
2216 write_lock(&css_set_lock);
2217 l = &init_css_set.list;
2218 do {
2219 struct css_set *cg =
2220 list_entry(l, struct css_set, list);
2221 cg->subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
2222 l = l->next;
2223 } while (l != &init_css_set.list);
2224 write_unlock(&css_set_lock);
2225
2226 /* If this subsystem requested that it be notified with fork
2227 * events, we should send it one now for every process in the
2228 * system */
2229 if (ss->fork) {
2230 struct task_struct *g, *p;
2231
2232 read_lock(&tasklist_lock);
2233 do_each_thread(g, p) {
2234 ss->fork(ss, p);
2235 } while_each_thread(g, p);
2236 read_unlock(&tasklist_lock);
2237 }
2238
2239 need_forkexit_callback |= ss->fork || ss->exit;
2240
2241 ss->active = 1;
2242}
2243
2244/**
2245 * cgroup_init_early - initialize cgroups at system boot, and
2246 * initialize any subsystems that request early init.
2247 */
2248int __init cgroup_init_early(void)
2249{
2250 int i;
2251 kref_init(&init_css_set.ref);
2252 kref_get(&init_css_set.ref);
2253 INIT_LIST_HEAD(&init_css_set.list);
2254 INIT_LIST_HEAD(&init_css_set.cg_links);
2255 INIT_LIST_HEAD(&init_css_set.tasks);
2256 css_set_count = 1;
2257 init_cgroup_root(&rootnode);
2258 list_add(&rootnode.root_list, &roots);
2259 root_count = 1;
2260 init_task.cgroups = &init_css_set;
2261
2262 init_css_set_link.cg = &init_css_set;
2263 list_add(&init_css_set_link.cgrp_link_list,
2264 &rootnode.top_cgroup.css_sets);
2265 list_add(&init_css_set_link.cg_link_list,
2266 &init_css_set.cg_links);
2267
2268 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2269 struct cgroup_subsys *ss = subsys[i];
2270
2271 BUG_ON(!ss->name);
2272 BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
2273 BUG_ON(!ss->create);
2274 BUG_ON(!ss->destroy);
2275 if (ss->subsys_id != i) {
2276 printk(KERN_ERR "Subsys %s id == %d\n",
2277 ss->name, ss->subsys_id);
2278 BUG();
2279 }
2280
2281 if (ss->early_init)
2282 cgroup_init_subsys(ss);
2283 }
2284 return 0;
2285}
2286
2287/**
2288 * cgroup_init - register cgroup filesystem and /proc file, and
2289 * initialize any subsystems that didn't request early init.
2290 */
2291int __init cgroup_init(void)
2292{
2293 int err;
2294 int i;
2295 struct proc_dir_entry *entry;
2296
2297 err = bdi_init(&cgroup_backing_dev_info);
2298 if (err)
2299 return err;
2300
2301 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2302 struct cgroup_subsys *ss = subsys[i];
2303 if (!ss->early_init)
2304 cgroup_init_subsys(ss);
2305 }
2306
2307 err = register_filesystem(&cgroup_fs_type);
2308 if (err < 0)
2309 goto out;
2310
2311 entry = create_proc_entry("cgroups", 0, NULL);
2312 if (entry)
2313 entry->proc_fops = &proc_cgroupstats_operations;
2314
2315out:
2316 if (err)
2317 bdi_destroy(&cgroup_backing_dev_info);
2318
2319 return err;
2320}
2321
2322/*
2323 * proc_cgroup_show()
2324 * - Print task's cgroup paths into seq_file, one line for each hierarchy
2325 * - Used for /proc/<pid>/cgroup.
2326 * - No need to task_lock(tsk) on this tsk->cgroup reference, as it
2327 * doesn't really matter if tsk->cgroup changes after we read it,
2328 * and we take cgroup_mutex, keeping attach_task() from changing it
2329 * anyway. No need to check that tsk->cgroup != NULL, thanks to
2330 * the_top_cgroup_hack in cgroup_exit(), which sets an exiting tasks
2331 * cgroup to top_cgroup.
2332 */
2333
2334/* TODO: Use a proper seq_file iterator */
2335static int proc_cgroup_show(struct seq_file *m, void *v)
2336{
2337 struct pid *pid;
2338 struct task_struct *tsk;
2339 char *buf;
2340 int retval;
2341 struct cgroupfs_root *root;
2342
2343 retval = -ENOMEM;
2344 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
2345 if (!buf)
2346 goto out;
2347
2348 retval = -ESRCH;
2349 pid = m->private;
2350 tsk = get_pid_task(pid, PIDTYPE_PID);
2351 if (!tsk)
2352 goto out_free;
2353
2354 retval = 0;
2355
2356 mutex_lock(&cgroup_mutex);
2357
2358 for_each_root(root) {
2359 struct cgroup_subsys *ss;
2360 struct cgroup *cgrp;
2361 int subsys_id;
2362 int count = 0;
2363
2364 /* Skip this hierarchy if it has no active subsystems */
2365 if (!root->actual_subsys_bits)
2366 continue;
2367 for_each_subsys(root, ss)
2368 seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
2369 seq_putc(m, ':');
2370 get_first_subsys(&root->top_cgroup, NULL, &subsys_id);
2371 cgrp = task_cgroup(tsk, subsys_id);
2372 retval = cgroup_path(cgrp, buf, PAGE_SIZE);
2373 if (retval < 0)
2374 goto out_unlock;
2375 seq_puts(m, buf);
2376 seq_putc(m, '\n');
2377 }
2378
2379out_unlock:
2380 mutex_unlock(&cgroup_mutex);
2381 put_task_struct(tsk);
2382out_free:
2383 kfree(buf);
2384out:
2385 return retval;
2386}
2387
2388static int cgroup_open(struct inode *inode, struct file *file)
2389{
2390 struct pid *pid = PROC_I(inode)->pid;
2391 return single_open(file, proc_cgroup_show, pid);
2392}
2393
2394struct file_operations proc_cgroup_operations = {
2395 .open = cgroup_open,
2396 .read = seq_read,
2397 .llseek = seq_lseek,
2398 .release = single_release,
2399};
2400
2401/* Display information about each subsystem and each hierarchy */
2402static int proc_cgroupstats_show(struct seq_file *m, void *v)
2403{
2404 int i;
2405 struct cgroupfs_root *root;
2406
2407 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\n");
2408 mutex_lock(&cgroup_mutex);
2409 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2410 struct cgroup_subsys *ss = subsys[i];
2411 seq_printf(m, "%s\t%lu\t%d\n",
2412 ss->name, ss->root->subsys_bits,
2413 ss->root->number_of_cgroups);
2414 }
2415 mutex_unlock(&cgroup_mutex);
2416 return 0;
2417}
2418
2419static int cgroupstats_open(struct inode *inode, struct file *file)
2420{
2421 return single_open(file, proc_cgroupstats_show, 0);
2422}
2423
2424static struct file_operations proc_cgroupstats_operations = {
2425 .open = cgroupstats_open,
2426 .read = seq_read,
2427 .llseek = seq_lseek,
2428 .release = single_release,
2429};
2430
2431/**
2432 * cgroup_fork - attach newly forked task to its parents cgroup.
2433 * @tsk: pointer to task_struct of forking parent process.
2434 *
2435 * Description: A task inherits its parent's cgroup at fork().
2436 *
2437 * A pointer to the shared css_set was automatically copied in
2438 * fork.c by dup_task_struct(). However, we ignore that copy, since
2439 * it was not made under the protection of RCU or cgroup_mutex, so
2440 * might no longer be a valid cgroup pointer. attach_task() might
2441 * have already changed current->cgroups, allowing the previously
2442 * referenced cgroup group to be removed and freed.
2443 *
2444 * At the point that cgroup_fork() is called, 'current' is the parent
2445 * task, and the passed argument 'child' points to the child task.
2446 */
2447void cgroup_fork(struct task_struct *child)
2448{
2449 task_lock(current);
2450 child->cgroups = current->cgroups;
2451 get_css_set(child->cgroups);
2452 task_unlock(current);
2453 INIT_LIST_HEAD(&child->cg_list);
2454}
2455
2456/**
2457 * cgroup_fork_callbacks - called on a new task very soon before
2458 * adding it to the tasklist. No need to take any locks since no-one
2459 * can be operating on this task
2460 */
2461void cgroup_fork_callbacks(struct task_struct *child)
2462{
2463 if (need_forkexit_callback) {
2464 int i;
2465 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2466 struct cgroup_subsys *ss = subsys[i];
2467 if (ss->fork)
2468 ss->fork(ss, child);
2469 }
2470 }
2471}
2472
2473/**
2474 * cgroup_post_fork - called on a new task after adding it to the
2475 * task list. Adds the task to the list running through its css_set
2476 * if necessary. Has to be after the task is visible on the task list
2477 * in case we race with the first call to cgroup_iter_start() - to
2478 * guarantee that the new task ends up on its list. */
2479void cgroup_post_fork(struct task_struct *child)
2480{
2481 if (use_task_css_set_links) {
2482 write_lock(&css_set_lock);
2483 if (list_empty(&child->cg_list))
2484 list_add(&child->cg_list, &child->cgroups->tasks);
2485 write_unlock(&css_set_lock);
2486 }
2487}
2488/**
2489 * cgroup_exit - detach cgroup from exiting task
2490 * @tsk: pointer to task_struct of exiting process
2491 *
2492 * Description: Detach cgroup from @tsk and release it.
2493 *
2494 * Note that cgroups marked notify_on_release force every task in
2495 * them to take the global cgroup_mutex mutex when exiting.
2496 * This could impact scaling on very large systems. Be reluctant to
2497 * use notify_on_release cgroups where very high task exit scaling
2498 * is required on large systems.
2499 *
2500 * the_top_cgroup_hack:
2501 *
2502 * Set the exiting tasks cgroup to the root cgroup (top_cgroup).
2503 *
2504 * We call cgroup_exit() while the task is still competent to
2505 * handle notify_on_release(), then leave the task attached to the
2506 * root cgroup in each hierarchy for the remainder of its exit.
2507 *
2508 * To do this properly, we would increment the reference count on
2509 * top_cgroup, and near the very end of the kernel/exit.c do_exit()
2510 * code we would add a second cgroup function call, to drop that
2511 * reference. This would just create an unnecessary hot spot on
2512 * the top_cgroup reference count, to no avail.
2513 *
2514 * Normally, holding a reference to a cgroup without bumping its
2515 * count is unsafe. The cgroup could go away, or someone could
2516 * attach us to a different cgroup, decrementing the count on
2517 * the first cgroup that we never incremented. But in this case,
2518 * top_cgroup isn't going away, and either task has PF_EXITING set,
2519 * which wards off any attach_task() attempts, or task is a failed
2520 * fork, never visible to attach_task.
2521 *
2522 */
2523void cgroup_exit(struct task_struct *tsk, int run_callbacks)
2524{
2525 int i;
2526 struct css_set *cg;
2527
2528 if (run_callbacks && need_forkexit_callback) {
2529 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2530 struct cgroup_subsys *ss = subsys[i];
2531 if (ss->exit)
2532 ss->exit(ss, tsk);
2533 }
2534 }
2535
2536 /*
2537 * Unlink from the css_set task list if necessary.
2538 * Optimistically check cg_list before taking
2539 * css_set_lock
2540 */
2541 if (!list_empty(&tsk->cg_list)) {
2542 write_lock(&css_set_lock);
2543 if (!list_empty(&tsk->cg_list))
2544 list_del(&tsk->cg_list);
2545 write_unlock(&css_set_lock);
2546 }
2547
2548 /* Reassign the task to the init_css_set. */
2549 task_lock(tsk);
2550 cg = tsk->cgroups;
2551 tsk->cgroups = &init_css_set;
2552 task_unlock(tsk);
2553 if (cg)
2554 put_css_set_taskexit(cg);
2555}
2556
2557/**
2558 * cgroup_clone - duplicate the current cgroup in the hierarchy
2559 * that the given subsystem is attached to, and move this task into
2560 * the new child
2561 */
2562int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys)
2563{
2564 struct dentry *dentry;
2565 int ret = 0;
2566 char nodename[MAX_CGROUP_TYPE_NAMELEN];
2567 struct cgroup *parent, *child;
2568 struct inode *inode;
2569 struct css_set *cg;
2570 struct cgroupfs_root *root;
2571 struct cgroup_subsys *ss;
2572
2573 /* We shouldn't be called by an unregistered subsystem */
2574 BUG_ON(!subsys->active);
2575
2576 /* First figure out what hierarchy and cgroup we're dealing
2577 * with, and pin them so we can drop cgroup_mutex */
2578 mutex_lock(&cgroup_mutex);
2579 again:
2580 root = subsys->root;
2581 if (root == &rootnode) {
2582 printk(KERN_INFO
2583 "Not cloning cgroup for unused subsystem %s\n",
2584 subsys->name);
2585 mutex_unlock(&cgroup_mutex);
2586 return 0;
2587 }
2588 cg = tsk->cgroups;
2589 parent = task_cgroup(tsk, subsys->subsys_id);
2590
2591 snprintf(nodename, MAX_CGROUP_TYPE_NAMELEN, "node_%d", tsk->pid);
2592
2593 /* Pin the hierarchy */
2594 atomic_inc(&parent->root->sb->s_active);
2595
2596 /* Keep the cgroup alive */
2597 get_css_set(cg);
2598 mutex_unlock(&cgroup_mutex);
2599
2600 /* Now do the VFS work to create a cgroup */
2601 inode = parent->dentry->d_inode;
2602
2603 /* Hold the parent directory mutex across this operation to
2604 * stop anyone else deleting the new cgroup */
2605 mutex_lock(&inode->i_mutex);
2606 dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename));
2607 if (IS_ERR(dentry)) {
2608 printk(KERN_INFO
2609 "Couldn't allocate dentry for %s: %ld\n", nodename,
2610 PTR_ERR(dentry));
2611 ret = PTR_ERR(dentry);
2612 goto out_release;
2613 }
2614
2615 /* Create the cgroup directory, which also creates the cgroup */
2616 ret = vfs_mkdir(inode, dentry, S_IFDIR | 0755);
2617 child = __d_cgrp(dentry);
2618 dput(dentry);
2619 if (ret) {
2620 printk(KERN_INFO
2621 "Failed to create cgroup %s: %d\n", nodename,
2622 ret);
2623 goto out_release;
2624 }
2625
2626 if (!child) {
2627 printk(KERN_INFO
2628 "Couldn't find new cgroup %s\n", nodename);
2629 ret = -ENOMEM;
2630 goto out_release;
2631 }
2632
2633 /* The cgroup now exists. Retake cgroup_mutex and check
2634 * that we're still in the same state that we thought we
2635 * were. */
2636 mutex_lock(&cgroup_mutex);
2637 if ((root != subsys->root) ||
2638 (parent != task_cgroup(tsk, subsys->subsys_id))) {
2639 /* Aargh, we raced ... */
2640 mutex_unlock(&inode->i_mutex);
2641 put_css_set(cg);
2642
2643 deactivate_super(parent->root->sb);
2644 /* The cgroup is still accessible in the VFS, but
2645 * we're not going to try to rmdir() it at this
2646 * point. */
2647 printk(KERN_INFO
2648 "Race in cgroup_clone() - leaking cgroup %s\n",
2649 nodename);
2650 goto again;
2651 }
2652
2653 /* do any required auto-setup */
2654 for_each_subsys(root, ss) {
2655 if (ss->post_clone)
2656 ss->post_clone(ss, child);
2657 }
2658
2659 /* All seems fine. Finish by moving the task into the new cgroup */
2660 ret = attach_task(child, tsk);
2661 mutex_unlock(&cgroup_mutex);
2662
2663 out_release:
2664 mutex_unlock(&inode->i_mutex);
2665
2666 mutex_lock(&cgroup_mutex);
2667 put_css_set(cg);
2668 mutex_unlock(&cgroup_mutex);
2669 deactivate_super(parent->root->sb);
2670 return ret;
2671}
2672
2673/*
2674 * See if "cgrp" is a descendant of the current task's cgroup in
2675 * the appropriate hierarchy
2676 *
2677 * If we are sending in dummytop, then presumably we are creating
2678 * the top cgroup in the subsystem.
2679 *
2680 * Called only by the ns (nsproxy) cgroup.
2681 */
2682int cgroup_is_descendant(const struct cgroup *cgrp)
2683{
2684 int ret;
2685 struct cgroup *target;
2686 int subsys_id;
2687
2688 if (cgrp == dummytop)
2689 return 1;
2690
2691 get_first_subsys(cgrp, NULL, &subsys_id);
2692 target = task_cgroup(current, subsys_id);
2693 while (cgrp != target && cgrp!= cgrp->top_cgroup)
2694 cgrp = cgrp->parent;
2695 ret = (cgrp == target);
2696 return ret;
2697}
2698
2699static void check_for_release(struct cgroup *cgrp)
2700{
2701 /* All of these checks rely on RCU to keep the cgroup
2702 * structure alive */
2703 if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count)
2704 && list_empty(&cgrp->children) && !cgroup_has_css_refs(cgrp)) {
2705 /* Control Group is currently removeable. If it's not
2706 * already queued for a userspace notification, queue
2707 * it now */
2708 int need_schedule_work = 0;
2709 spin_lock(&release_list_lock);
2710 if (!cgroup_is_removed(cgrp) &&
2711 list_empty(&cgrp->release_list)) {
2712 list_add(&cgrp->release_list, &release_list);
2713 need_schedule_work = 1;
2714 }
2715 spin_unlock(&release_list_lock);
2716 if (need_schedule_work)
2717 schedule_work(&release_agent_work);
2718 }
2719}
2720
2721void __css_put(struct cgroup_subsys_state *css)
2722{
2723 struct cgroup *cgrp = css->cgroup;
2724 rcu_read_lock();
2725 if (atomic_dec_and_test(&css->refcnt) && notify_on_release(cgrp)) {
2726 set_bit(CGRP_RELEASABLE, &cgrp->flags);
2727 check_for_release(cgrp);
2728 }
2729 rcu_read_unlock();
2730}
2731
2732/*
2733 * Notify userspace when a cgroup is released, by running the
2734 * configured release agent with the name of the cgroup (path
2735 * relative to the root of cgroup file system) as the argument.
2736 *
2737 * Most likely, this user command will try to rmdir this cgroup.
2738 *
2739 * This races with the possibility that some other task will be
2740 * attached to this cgroup before it is removed, or that some other
2741 * user task will 'mkdir' a child cgroup of this cgroup. That's ok.
2742 * The presumed 'rmdir' will fail quietly if this cgroup is no longer
2743 * unused, and this cgroup will be reprieved from its death sentence,
2744 * to continue to serve a useful existence. Next time it's released,
2745 * we will get notified again, if it still has 'notify_on_release' set.
2746 *
2747 * The final arg to call_usermodehelper() is UMH_WAIT_EXEC, which
2748 * means only wait until the task is successfully execve()'d. The
2749 * separate release agent task is forked by call_usermodehelper(),
2750 * then control in this thread returns here, without waiting for the
2751 * release agent task. We don't bother to wait because the caller of
2752 * this routine has no use for the exit status of the release agent
2753 * task, so no sense holding our caller up for that.
2754 *
2755 */
2756
2757static void cgroup_release_agent(struct work_struct *work)
2758{
2759 BUG_ON(work != &release_agent_work);
2760 mutex_lock(&cgroup_mutex);
2761 spin_lock(&release_list_lock);
2762 while (!list_empty(&release_list)) {
2763 char *argv[3], *envp[3];
2764 int i;
2765 char *pathbuf;
2766 struct cgroup *cgrp = list_entry(release_list.next,
2767 struct cgroup,
2768 release_list);
2769 list_del_init(&cgrp->release_list);
2770 spin_unlock(&release_list_lock);
2771 pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
2772 if (!pathbuf) {
2773 spin_lock(&release_list_lock);
2774 continue;
2775 }
2776
2777 if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0) {
2778 kfree(pathbuf);
2779 spin_lock(&release_list_lock);
2780 continue;
2781 }
2782
2783 i = 0;
2784 argv[i++] = cgrp->root->release_agent_path;
2785 argv[i++] = (char *)pathbuf;
2786 argv[i] = NULL;
2787
2788 i = 0;
2789 /* minimal command environment */
2790 envp[i++] = "HOME=/";
2791 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
2792 envp[i] = NULL;
2793
2794 /* Drop the lock while we invoke the usermode helper,
2795 * since the exec could involve hitting disk and hence
2796 * be a slow process */
2797 mutex_unlock(&cgroup_mutex);
2798 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
2799 kfree(pathbuf);
2800 mutex_lock(&cgroup_mutex);
2801 spin_lock(&release_list_lock);
2802 }
2803 spin_unlock(&release_list_lock);
2804 mutex_unlock(&cgroup_mutex);
2805}
diff --git a/kernel/cgroup_debug.c b/kernel/cgroup_debug.c
new file mode 100644
index 000000000000..37301e877cb0
--- /dev/null
+++ b/kernel/cgroup_debug.c
@@ -0,0 +1,97 @@
1/*
2 * kernel/ccontainer_debug.c - Example cgroup subsystem that
3 * exposes debug info
4 *
5 * Copyright (C) Google Inc, 2007
6 *
7 * Developed by Paul Menage (menage@google.com)
8 *
9 */
10
11#include <linux/cgroup.h>
12#include <linux/fs.h>
13#include <linux/slab.h>
14#include <linux/rcupdate.h>
15
16#include <asm/atomic.h>
17
18static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
19 struct cgroup *cont)
20{
21 struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
22
23 if (!css)
24 return ERR_PTR(-ENOMEM);
25
26 return css;
27}
28
29static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
30{
31 kfree(cont->subsys[debug_subsys_id]);
32}
33
34static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
35{
36 return atomic_read(&cont->count);
37}
38
39static u64 taskcount_read(struct cgroup *cont, struct cftype *cft)
40{
41 u64 count;
42
43 cgroup_lock();
44 count = cgroup_task_count(cont);
45 cgroup_unlock();
46 return count;
47}
48
49static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
50{
51 return (u64)(long)current->cgroups;
52}
53
54static u64 current_css_set_refcount_read(struct cgroup *cont,
55 struct cftype *cft)
56{
57 u64 count;
58
59 rcu_read_lock();
60 count = atomic_read(&current->cgroups->ref.refcount);
61 rcu_read_unlock();
62 return count;
63}
64
65static struct cftype files[] = {
66 {
67 .name = "cgroup_refcount",
68 .read_uint = cgroup_refcount_read,
69 },
70 {
71 .name = "taskcount",
72 .read_uint = taskcount_read,
73 },
74
75 {
76 .name = "current_css_set",
77 .read_uint = current_css_set_read,
78 },
79
80 {
81 .name = "current_css_set_refcount",
82 .read_uint = current_css_set_refcount_read,
83 },
84};
85
86static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
87{
88 return cgroup_add_files(cont, ss, files, ARRAY_SIZE(files));
89}
90
91struct cgroup_subsys debug_subsys = {
92 .name = "debug",
93 .create = debug_create,
94 .destroy = debug_destroy,
95 .populate = debug_populate,
96 .subsys_id = debug_subsys_id,
97};
diff --git a/kernel/cpu.c b/kernel/cpu.c
index a21f71af9d81..6b3a0c15144f 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -98,7 +98,8 @@ static inline void check_for_tasks(int cpu)
98 !cputime_eq(p->stime, cputime_zero))) 98 !cputime_eq(p->stime, cputime_zero)))
99 printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\ 99 printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
100 (state = %ld, flags = %x) \n", 100 (state = %ld, flags = %x) \n",
101 p->comm, p->pid, cpu, p->state, p->flags); 101 p->comm, task_pid_nr(p), cpu,
102 p->state, p->flags);
102 } 103 }
103 write_unlock_irq(&tasklist_lock); 104 write_unlock_irq(&tasklist_lock);
104} 105}
@@ -264,6 +265,15 @@ out_notify:
264int __cpuinit cpu_up(unsigned int cpu) 265int __cpuinit cpu_up(unsigned int cpu)
265{ 266{
266 int err = 0; 267 int err = 0;
268 if (!cpu_isset(cpu, cpu_possible_map)) {
269 printk(KERN_ERR "can't online cpu %d because it is not "
270 "configured as may-hotadd at boot time\n", cpu);
271#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) || defined(CONFIG_S390)
272 printk(KERN_ERR "please check additional_cpus= boot "
273 "parameter\n");
274#endif
275 return -EINVAL;
276 }
267 277
268 mutex_lock(&cpu_add_remove_lock); 278 mutex_lock(&cpu_add_remove_lock);
269 if (cpu_hotplug_disabled) 279 if (cpu_hotplug_disabled)
diff --git a/kernel/cpu_acct.c b/kernel/cpu_acct.c
new file mode 100644
index 000000000000..731e47e7f164
--- /dev/null
+++ b/kernel/cpu_acct.c
@@ -0,0 +1,186 @@
1/*
2 * kernel/cpu_acct.c - CPU accounting cgroup subsystem
3 *
4 * Copyright (C) Google Inc, 2006
5 *
6 * Developed by Paul Menage (menage@google.com) and Balbir Singh
7 * (balbir@in.ibm.com)
8 *
9 */
10
11/*
12 * Example cgroup subsystem for reporting total CPU usage of tasks in a
13 * cgroup, along with percentage load over a time interval
14 */
15
16#include <linux/module.h>
17#include <linux/cgroup.h>
18#include <linux/fs.h>
19#include <linux/rcupdate.h>
20
21#include <asm/div64.h>
22
23struct cpuacct {
24 struct cgroup_subsys_state css;
25 spinlock_t lock;
26 /* total time used by this class */
27 cputime64_t time;
28
29 /* time when next load calculation occurs */
30 u64 next_interval_check;
31
32 /* time used in current period */
33 cputime64_t current_interval_time;
34
35 /* time used in last period */
36 cputime64_t last_interval_time;
37};
38
39struct cgroup_subsys cpuacct_subsys;
40
41static inline struct cpuacct *cgroup_ca(struct cgroup *cont)
42{
43 return container_of(cgroup_subsys_state(cont, cpuacct_subsys_id),
44 struct cpuacct, css);
45}
46
47static inline struct cpuacct *task_ca(struct task_struct *task)
48{
49 return container_of(task_subsys_state(task, cpuacct_subsys_id),
50 struct cpuacct, css);
51}
52
53#define INTERVAL (HZ * 10)
54
55static inline u64 next_interval_boundary(u64 now)
56{
57 /* calculate the next interval boundary beyond the
58 * current time */
59 do_div(now, INTERVAL);
60 return (now + 1) * INTERVAL;
61}
62
63static struct cgroup_subsys_state *cpuacct_create(
64 struct cgroup_subsys *ss, struct cgroup *cont)
65{
66 struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
67
68 if (!ca)
69 return ERR_PTR(-ENOMEM);
70 spin_lock_init(&ca->lock);
71 ca->next_interval_check = next_interval_boundary(get_jiffies_64());
72 return &ca->css;
73}
74
75static void cpuacct_destroy(struct cgroup_subsys *ss,
76 struct cgroup *cont)
77{
78 kfree(cgroup_ca(cont));
79}
80
81/* Lazily update the load calculation if necessary. Called with ca locked */
82static void cpuusage_update(struct cpuacct *ca)
83{
84 u64 now = get_jiffies_64();
85
86 /* If we're not due for an update, return */
87 if (ca->next_interval_check > now)
88 return;
89
90 if (ca->next_interval_check <= (now - INTERVAL)) {
91 /* If it's been more than an interval since the last
92 * check, then catch up - the last interval must have
93 * been zero load */
94 ca->last_interval_time = 0;
95 ca->next_interval_check = next_interval_boundary(now);
96 } else {
97 /* If a steal takes the last interval time negative,
98 * then we just ignore it */
99 if ((s64)ca->current_interval_time > 0)
100 ca->last_interval_time = ca->current_interval_time;
101 else
102 ca->last_interval_time = 0;
103 ca->next_interval_check += INTERVAL;
104 }
105 ca->current_interval_time = 0;
106}
107
108static u64 cpuusage_read(struct cgroup *cont, struct cftype *cft)
109{
110 struct cpuacct *ca = cgroup_ca(cont);
111 u64 time;
112
113 spin_lock_irq(&ca->lock);
114 cpuusage_update(ca);
115 time = cputime64_to_jiffies64(ca->time);
116 spin_unlock_irq(&ca->lock);
117
118 /* Convert 64-bit jiffies to seconds */
119 time *= 1000;
120 do_div(time, HZ);
121 return time;
122}
123
124static u64 load_read(struct cgroup *cont, struct cftype *cft)
125{
126 struct cpuacct *ca = cgroup_ca(cont);
127 u64 time;
128
129 /* Find the time used in the previous interval */
130 spin_lock_irq(&ca->lock);
131 cpuusage_update(ca);
132 time = cputime64_to_jiffies64(ca->last_interval_time);
133 spin_unlock_irq(&ca->lock);
134
135 /* Convert time to a percentage, to give the load in the
136 * previous period */
137 time *= 100;
138 do_div(time, INTERVAL);
139
140 return time;
141}
142
143static struct cftype files[] = {
144 {
145 .name = "usage",
146 .read_uint = cpuusage_read,
147 },
148 {
149 .name = "load",
150 .read_uint = load_read,
151 }
152};
153
154static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cont)
155{
156 return cgroup_add_files(cont, ss, files, ARRAY_SIZE(files));
157}
158
159void cpuacct_charge(struct task_struct *task, cputime_t cputime)
160{
161
162 struct cpuacct *ca;
163 unsigned long flags;
164
165 if (!cpuacct_subsys.active)
166 return;
167 rcu_read_lock();
168 ca = task_ca(task);
169 if (ca) {
170 spin_lock_irqsave(&ca->lock, flags);
171 cpuusage_update(ca);
172 ca->time = cputime64_add(ca->time, cputime);
173 ca->current_interval_time =
174 cputime64_add(ca->current_interval_time, cputime);
175 spin_unlock_irqrestore(&ca->lock, flags);
176 }
177 rcu_read_unlock();
178}
179
180struct cgroup_subsys cpuacct_subsys = {
181 .name = "cpuacct",
182 .create = cpuacct_create,
183 .destroy = cpuacct_destroy,
184 .populate = cpuacct_populate,
185 .subsys_id = cpuacct_subsys_id,
186};
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 64950fa5d321..50f5dc463688 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -4,7 +4,8 @@
4 * Processor and Memory placement constraints for sets of tasks. 4 * Processor and Memory placement constraints for sets of tasks.
5 * 5 *
6 * Copyright (C) 2003 BULL SA. 6 * Copyright (C) 2003 BULL SA.
7 * Copyright (C) 2004-2006 Silicon Graphics, Inc. 7 * Copyright (C) 2004-2007 Silicon Graphics, Inc.
8 * Copyright (C) 2006 Google, Inc
8 * 9 *
9 * Portions derived from Patrick Mochel's sysfs code. 10 * Portions derived from Patrick Mochel's sysfs code.
10 * sysfs is Copyright (c) 2001-3 Patrick Mochel 11 * sysfs is Copyright (c) 2001-3 Patrick Mochel
@@ -12,6 +13,7 @@
12 * 2003-10-10 Written by Simon Derr. 13 * 2003-10-10 Written by Simon Derr.
13 * 2003-10-22 Updates by Stephen Hemminger. 14 * 2003-10-22 Updates by Stephen Hemminger.
14 * 2004 May-July Rework by Paul Jackson. 15 * 2004 May-July Rework by Paul Jackson.
16 * 2006 Rework by Paul Menage to use generic cgroups
15 * 17 *
16 * This file is subject to the terms and conditions of the GNU General Public 18 * This file is subject to the terms and conditions of the GNU General Public
17 * License. See the file COPYING in the main directory of the Linux 19 * License. See the file COPYING in the main directory of the Linux
@@ -36,6 +38,7 @@
36#include <linux/mount.h> 38#include <linux/mount.h>
37#include <linux/namei.h> 39#include <linux/namei.h>
38#include <linux/pagemap.h> 40#include <linux/pagemap.h>
41#include <linux/prio_heap.h>
39#include <linux/proc_fs.h> 42#include <linux/proc_fs.h>
40#include <linux/rcupdate.h> 43#include <linux/rcupdate.h>
41#include <linux/sched.h> 44#include <linux/sched.h>
@@ -52,8 +55,7 @@
52#include <asm/uaccess.h> 55#include <asm/uaccess.h>
53#include <asm/atomic.h> 56#include <asm/atomic.h>
54#include <linux/mutex.h> 57#include <linux/mutex.h>
55 58#include <linux/kfifo.h>
56#define CPUSET_SUPER_MAGIC 0x27e0eb
57 59
58/* 60/*
59 * Tracks how many cpusets are currently defined in system. 61 * Tracks how many cpusets are currently defined in system.
@@ -62,6 +64,10 @@
62 */ 64 */
63int number_of_cpusets __read_mostly; 65int number_of_cpusets __read_mostly;
64 66
67/* Retrieve the cpuset from a cgroup */
68struct cgroup_subsys cpuset_subsys;
69struct cpuset;
70
65/* See "Frequency meter" comments, below. */ 71/* See "Frequency meter" comments, below. */
66 72
67struct fmeter { 73struct fmeter {
@@ -72,24 +78,13 @@ struct fmeter {
72}; 78};
73 79
74struct cpuset { 80struct cpuset {
81 struct cgroup_subsys_state css;
82
75 unsigned long flags; /* "unsigned long" so bitops work */ 83 unsigned long flags; /* "unsigned long" so bitops work */
76 cpumask_t cpus_allowed; /* CPUs allowed to tasks in cpuset */ 84 cpumask_t cpus_allowed; /* CPUs allowed to tasks in cpuset */
77 nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */ 85 nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */
78 86
79 /*
80 * Count is atomic so can incr (fork) or decr (exit) without a lock.
81 */
82 atomic_t count; /* count tasks using this cpuset */
83
84 /*
85 * We link our 'sibling' struct into our parents 'children'.
86 * Our children link their 'sibling' into our 'children'.
87 */
88 struct list_head sibling; /* my parents children */
89 struct list_head children; /* my children */
90
91 struct cpuset *parent; /* my parent */ 87 struct cpuset *parent; /* my parent */
92 struct dentry *dentry; /* cpuset fs entry */
93 88
94 /* 89 /*
95 * Copy of global cpuset_mems_generation as of the most 90 * Copy of global cpuset_mems_generation as of the most
@@ -98,15 +93,32 @@ struct cpuset {
98 int mems_generation; 93 int mems_generation;
99 94
100 struct fmeter fmeter; /* memory_pressure filter */ 95 struct fmeter fmeter; /* memory_pressure filter */
96
97 /* partition number for rebuild_sched_domains() */
98 int pn;
101}; 99};
102 100
101/* Retrieve the cpuset for a cgroup */
102static inline struct cpuset *cgroup_cs(struct cgroup *cont)
103{
104 return container_of(cgroup_subsys_state(cont, cpuset_subsys_id),
105 struct cpuset, css);
106}
107
108/* Retrieve the cpuset for a task */
109static inline struct cpuset *task_cs(struct task_struct *task)
110{
111 return container_of(task_subsys_state(task, cpuset_subsys_id),
112 struct cpuset, css);
113}
114
115
103/* bits in struct cpuset flags field */ 116/* bits in struct cpuset flags field */
104typedef enum { 117typedef enum {
105 CS_CPU_EXCLUSIVE, 118 CS_CPU_EXCLUSIVE,
106 CS_MEM_EXCLUSIVE, 119 CS_MEM_EXCLUSIVE,
107 CS_MEMORY_MIGRATE, 120 CS_MEMORY_MIGRATE,
108 CS_REMOVED, 121 CS_SCHED_LOAD_BALANCE,
109 CS_NOTIFY_ON_RELEASE,
110 CS_SPREAD_PAGE, 122 CS_SPREAD_PAGE,
111 CS_SPREAD_SLAB, 123 CS_SPREAD_SLAB,
112} cpuset_flagbits_t; 124} cpuset_flagbits_t;
@@ -122,14 +134,9 @@ static inline int is_mem_exclusive(const struct cpuset *cs)
122 return test_bit(CS_MEM_EXCLUSIVE, &cs->flags); 134 return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
123} 135}
124 136
125static inline int is_removed(const struct cpuset *cs) 137static inline int is_sched_load_balance(const struct cpuset *cs)
126{ 138{
127 return test_bit(CS_REMOVED, &cs->flags); 139 return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
128}
129
130static inline int notify_on_release(const struct cpuset *cs)
131{
132 return test_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
133} 140}
134 141
135static inline int is_memory_migrate(const struct cpuset *cs) 142static inline int is_memory_migrate(const struct cpuset *cs)
@@ -172,14 +179,8 @@ static struct cpuset top_cpuset = {
172 .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)), 179 .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),
173 .cpus_allowed = CPU_MASK_ALL, 180 .cpus_allowed = CPU_MASK_ALL,
174 .mems_allowed = NODE_MASK_ALL, 181 .mems_allowed = NODE_MASK_ALL,
175 .count = ATOMIC_INIT(0),
176 .sibling = LIST_HEAD_INIT(top_cpuset.sibling),
177 .children = LIST_HEAD_INIT(top_cpuset.children),
178}; 182};
179 183
180static struct vfsmount *cpuset_mount;
181static struct super_block *cpuset_sb;
182
183/* 184/*
184 * We have two global cpuset mutexes below. They can nest. 185 * We have two global cpuset mutexes below. They can nest.
185 * It is ok to first take manage_mutex, then nest callback_mutex. We also 186 * It is ok to first take manage_mutex, then nest callback_mutex. We also
@@ -263,297 +264,33 @@ static struct super_block *cpuset_sb;
263 * the routine cpuset_update_task_memory_state(). 264 * the routine cpuset_update_task_memory_state().
264 */ 265 */
265 266
266static DEFINE_MUTEX(manage_mutex);
267static DEFINE_MUTEX(callback_mutex); 267static DEFINE_MUTEX(callback_mutex);
268 268
269/* 269/* This is ugly, but preserves the userspace API for existing cpuset
270 * A couple of forward declarations required, due to cyclic reference loop: 270 * users. If someone tries to mount the "cpuset" filesystem, we
271 * cpuset_mkdir -> cpuset_create -> cpuset_populate_dir -> cpuset_add_file 271 * silently switch it to mount "cgroup" instead */
272 * -> cpuset_create_file -> cpuset_dir_inode_operations -> cpuset_mkdir.
273 */
274
275static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode);
276static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry);
277
278static struct backing_dev_info cpuset_backing_dev_info = {
279 .ra_pages = 0, /* No readahead */
280 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
281};
282
283static struct inode *cpuset_new_inode(mode_t mode)
284{
285 struct inode *inode = new_inode(cpuset_sb);
286
287 if (inode) {
288 inode->i_mode = mode;
289 inode->i_uid = current->fsuid;
290 inode->i_gid = current->fsgid;
291 inode->i_blocks = 0;
292 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
293 inode->i_mapping->backing_dev_info = &cpuset_backing_dev_info;
294 }
295 return inode;
296}
297
298static void cpuset_diput(struct dentry *dentry, struct inode *inode)
299{
300 /* is dentry a directory ? if so, kfree() associated cpuset */
301 if (S_ISDIR(inode->i_mode)) {
302 struct cpuset *cs = dentry->d_fsdata;
303 BUG_ON(!(is_removed(cs)));
304 kfree(cs);
305 }
306 iput(inode);
307}
308
309static struct dentry_operations cpuset_dops = {
310 .d_iput = cpuset_diput,
311};
312
313static struct dentry *cpuset_get_dentry(struct dentry *parent, const char *name)
314{
315 struct dentry *d = lookup_one_len(name, parent, strlen(name));
316 if (!IS_ERR(d))
317 d->d_op = &cpuset_dops;
318 return d;
319}
320
321static void remove_dir(struct dentry *d)
322{
323 struct dentry *parent = dget(d->d_parent);
324
325 d_delete(d);
326 simple_rmdir(parent->d_inode, d);
327 dput(parent);
328}
329
330/*
331 * NOTE : the dentry must have been dget()'ed
332 */
333static void cpuset_d_remove_dir(struct dentry *dentry)
334{
335 struct list_head *node;
336
337 spin_lock(&dcache_lock);
338 node = dentry->d_subdirs.next;
339 while (node != &dentry->d_subdirs) {
340 struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
341 list_del_init(node);
342 if (d->d_inode) {
343 d = dget_locked(d);
344 spin_unlock(&dcache_lock);
345 d_delete(d);
346 simple_unlink(dentry->d_inode, d);
347 dput(d);
348 spin_lock(&dcache_lock);
349 }
350 node = dentry->d_subdirs.next;
351 }
352 list_del_init(&dentry->d_u.d_child);
353 spin_unlock(&dcache_lock);
354 remove_dir(dentry);
355}
356
357static struct super_operations cpuset_ops = {
358 .statfs = simple_statfs,
359 .drop_inode = generic_delete_inode,
360};
361
362static int cpuset_fill_super(struct super_block *sb, void *unused_data,
363 int unused_silent)
364{
365 struct inode *inode;
366 struct dentry *root;
367
368 sb->s_blocksize = PAGE_CACHE_SIZE;
369 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
370 sb->s_magic = CPUSET_SUPER_MAGIC;
371 sb->s_op = &cpuset_ops;
372 cpuset_sb = sb;
373
374 inode = cpuset_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR);
375 if (inode) {
376 inode->i_op = &simple_dir_inode_operations;
377 inode->i_fop = &simple_dir_operations;
378 /* directories start off with i_nlink == 2 (for "." entry) */
379 inc_nlink(inode);
380 } else {
381 return -ENOMEM;
382 }
383
384 root = d_alloc_root(inode);
385 if (!root) {
386 iput(inode);
387 return -ENOMEM;
388 }
389 sb->s_root = root;
390 return 0;
391}
392
393static int cpuset_get_sb(struct file_system_type *fs_type, 272static int cpuset_get_sb(struct file_system_type *fs_type,
394 int flags, const char *unused_dev_name, 273 int flags, const char *unused_dev_name,
395 void *data, struct vfsmount *mnt) 274 void *data, struct vfsmount *mnt)
396{ 275{
397 return get_sb_single(fs_type, flags, data, cpuset_fill_super, mnt); 276 struct file_system_type *cgroup_fs = get_fs_type("cgroup");
277 int ret = -ENODEV;
278 if (cgroup_fs) {
279 char mountopts[] =
280 "cpuset,noprefix,"
281 "release_agent=/sbin/cpuset_release_agent";
282 ret = cgroup_fs->get_sb(cgroup_fs, flags,
283 unused_dev_name, mountopts, mnt);
284 put_filesystem(cgroup_fs);
285 }
286 return ret;
398} 287}
399 288
400static struct file_system_type cpuset_fs_type = { 289static struct file_system_type cpuset_fs_type = {
401 .name = "cpuset", 290 .name = "cpuset",
402 .get_sb = cpuset_get_sb, 291 .get_sb = cpuset_get_sb,
403 .kill_sb = kill_litter_super,
404}; 292};
405 293
406/* struct cftype:
407 *
408 * The files in the cpuset filesystem mostly have a very simple read/write
409 * handling, some common function will take care of it. Nevertheless some cases
410 * (read tasks) are special and therefore I define this structure for every
411 * kind of file.
412 *
413 *
414 * When reading/writing to a file:
415 * - the cpuset to use in file->f_path.dentry->d_parent->d_fsdata
416 * - the 'cftype' of the file is file->f_path.dentry->d_fsdata
417 */
418
419struct cftype {
420 char *name;
421 int private;
422 int (*open) (struct inode *inode, struct file *file);
423 ssize_t (*read) (struct file *file, char __user *buf, size_t nbytes,
424 loff_t *ppos);
425 int (*write) (struct file *file, const char __user *buf, size_t nbytes,
426 loff_t *ppos);
427 int (*release) (struct inode *inode, struct file *file);
428};
429
430static inline struct cpuset *__d_cs(struct dentry *dentry)
431{
432 return dentry->d_fsdata;
433}
434
435static inline struct cftype *__d_cft(struct dentry *dentry)
436{
437 return dentry->d_fsdata;
438}
439
440/*
441 * Call with manage_mutex held. Writes path of cpuset into buf.
442 * Returns 0 on success, -errno on error.
443 */
444
445static int cpuset_path(const struct cpuset *cs, char *buf, int buflen)
446{
447 char *start;
448
449 start = buf + buflen;
450
451 *--start = '\0';
452 for (;;) {
453 int len = cs->dentry->d_name.len;
454 if ((start -= len) < buf)
455 return -ENAMETOOLONG;
456 memcpy(start, cs->dentry->d_name.name, len);
457 cs = cs->parent;
458 if (!cs)
459 break;
460 if (!cs->parent)
461 continue;
462 if (--start < buf)
463 return -ENAMETOOLONG;
464 *start = '/';
465 }
466 memmove(buf, start, buf + buflen - start);
467 return 0;
468}
469
470/*
471 * Notify userspace when a cpuset is released, by running
472 * /sbin/cpuset_release_agent with the name of the cpuset (path
473 * relative to the root of cpuset file system) as the argument.
474 *
475 * Most likely, this user command will try to rmdir this cpuset.
476 *
477 * This races with the possibility that some other task will be
478 * attached to this cpuset before it is removed, or that some other
479 * user task will 'mkdir' a child cpuset of this cpuset. That's ok.
480 * The presumed 'rmdir' will fail quietly if this cpuset is no longer
481 * unused, and this cpuset will be reprieved from its death sentence,
482 * to continue to serve a useful existence. Next time it's released,
483 * we will get notified again, if it still has 'notify_on_release' set.
484 *
485 * The final arg to call_usermodehelper() is 0, which means don't
486 * wait. The separate /sbin/cpuset_release_agent task is forked by
487 * call_usermodehelper(), then control in this thread returns here,
488 * without waiting for the release agent task. We don't bother to
489 * wait because the caller of this routine has no use for the exit
490 * status of the /sbin/cpuset_release_agent task, so no sense holding
491 * our caller up for that.
492 *
493 * When we had only one cpuset mutex, we had to call this
494 * without holding it, to avoid deadlock when call_usermodehelper()
495 * allocated memory. With two locks, we could now call this while
496 * holding manage_mutex, but we still don't, so as to minimize
497 * the time manage_mutex is held.
498 */
499
500static void cpuset_release_agent(const char *pathbuf)
501{
502 char *argv[3], *envp[3];
503 int i;
504
505 if (!pathbuf)
506 return;
507
508 i = 0;
509 argv[i++] = "/sbin/cpuset_release_agent";
510 argv[i++] = (char *)pathbuf;
511 argv[i] = NULL;
512
513 i = 0;
514 /* minimal command environment */
515 envp[i++] = "HOME=/";
516 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
517 envp[i] = NULL;
518
519 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
520 kfree(pathbuf);
521}
522
523/*
524 * Either cs->count of using tasks transitioned to zero, or the
525 * cs->children list of child cpusets just became empty. If this
526 * cs is notify_on_release() and now both the user count is zero and
527 * the list of children is empty, prepare cpuset path in a kmalloc'd
528 * buffer, to be returned via ppathbuf, so that the caller can invoke
529 * cpuset_release_agent() with it later on, once manage_mutex is dropped.
530 * Call here with manage_mutex held.
531 *
532 * This check_for_release() routine is responsible for kmalloc'ing
533 * pathbuf. The above cpuset_release_agent() is responsible for
534 * kfree'ing pathbuf. The caller of these routines is responsible
535 * for providing a pathbuf pointer, initialized to NULL, then
536 * calling check_for_release() with manage_mutex held and the address
537 * of the pathbuf pointer, then dropping manage_mutex, then calling
538 * cpuset_release_agent() with pathbuf, as set by check_for_release().
539 */
540
541static void check_for_release(struct cpuset *cs, char **ppathbuf)
542{
543 if (notify_on_release(cs) && atomic_read(&cs->count) == 0 &&
544 list_empty(&cs->children)) {
545 char *buf;
546
547 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
548 if (!buf)
549 return;
550 if (cpuset_path(cs, buf, PAGE_SIZE) < 0)
551 kfree(buf);
552 else
553 *ppathbuf = buf;
554 }
555}
556
557/* 294/*
558 * Return in *pmask the portion of a cpusets's cpus_allowed that 295 * Return in *pmask the portion of a cpusets's cpus_allowed that
559 * are online. If none are online, walk up the cpuset hierarchy 296 * are online. If none are online, walk up the cpuset hierarchy
@@ -653,20 +390,19 @@ void cpuset_update_task_memory_state(void)
653 struct task_struct *tsk = current; 390 struct task_struct *tsk = current;
654 struct cpuset *cs; 391 struct cpuset *cs;
655 392
656 if (tsk->cpuset == &top_cpuset) { 393 if (task_cs(tsk) == &top_cpuset) {
657 /* Don't need rcu for top_cpuset. It's never freed. */ 394 /* Don't need rcu for top_cpuset. It's never freed. */
658 my_cpusets_mem_gen = top_cpuset.mems_generation; 395 my_cpusets_mem_gen = top_cpuset.mems_generation;
659 } else { 396 } else {
660 rcu_read_lock(); 397 rcu_read_lock();
661 cs = rcu_dereference(tsk->cpuset); 398 my_cpusets_mem_gen = task_cs(current)->mems_generation;
662 my_cpusets_mem_gen = cs->mems_generation;
663 rcu_read_unlock(); 399 rcu_read_unlock();
664 } 400 }
665 401
666 if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) { 402 if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {
667 mutex_lock(&callback_mutex); 403 mutex_lock(&callback_mutex);
668 task_lock(tsk); 404 task_lock(tsk);
669 cs = tsk->cpuset; /* Maybe changed when task not locked */ 405 cs = task_cs(tsk); /* Maybe changed when task not locked */
670 guarantee_online_mems(cs, &tsk->mems_allowed); 406 guarantee_online_mems(cs, &tsk->mems_allowed);
671 tsk->cpuset_mems_generation = cs->mems_generation; 407 tsk->cpuset_mems_generation = cs->mems_generation;
672 if (is_spread_page(cs)) 408 if (is_spread_page(cs))
@@ -721,11 +457,12 @@ static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
721 457
722static int validate_change(const struct cpuset *cur, const struct cpuset *trial) 458static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
723{ 459{
460 struct cgroup *cont;
724 struct cpuset *c, *par; 461 struct cpuset *c, *par;
725 462
726 /* Each of our child cpusets must be a subset of us */ 463 /* Each of our child cpusets must be a subset of us */
727 list_for_each_entry(c, &cur->children, sibling) { 464 list_for_each_entry(cont, &cur->css.cgroup->children, sibling) {
728 if (!is_cpuset_subset(c, trial)) 465 if (!is_cpuset_subset(cgroup_cs(cont), trial))
729 return -EBUSY; 466 return -EBUSY;
730 } 467 }
731 468
@@ -740,7 +477,8 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
740 return -EACCES; 477 return -EACCES;
741 478
742 /* If either I or some sibling (!= me) is exclusive, we can't overlap */ 479 /* If either I or some sibling (!= me) is exclusive, we can't overlap */
743 list_for_each_entry(c, &par->children, sibling) { 480 list_for_each_entry(cont, &par->css.cgroup->children, sibling) {
481 c = cgroup_cs(cont);
744 if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) && 482 if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
745 c != cur && 483 c != cur &&
746 cpus_intersects(trial->cpus_allowed, c->cpus_allowed)) 484 cpus_intersects(trial->cpus_allowed, c->cpus_allowed))
@@ -751,17 +489,265 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
751 return -EINVAL; 489 return -EINVAL;
752 } 490 }
753 491
492 /* Cpusets with tasks can't have empty cpus_allowed or mems_allowed */
493 if (cgroup_task_count(cur->css.cgroup)) {
494 if (cpus_empty(trial->cpus_allowed) ||
495 nodes_empty(trial->mems_allowed)) {
496 return -ENOSPC;
497 }
498 }
499
754 return 0; 500 return 0;
755} 501}
756 502
757/* 503/*
504 * Helper routine for rebuild_sched_domains().
505 * Do cpusets a, b have overlapping cpus_allowed masks?
506 */
507
508static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
509{
510 return cpus_intersects(a->cpus_allowed, b->cpus_allowed);
511}
512
513/*
514 * rebuild_sched_domains()
515 *
516 * If the flag 'sched_load_balance' of any cpuset with non-empty
517 * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
518 * which has that flag enabled, or if any cpuset with a non-empty
519 * 'cpus' is removed, then call this routine to rebuild the
520 * scheduler's dynamic sched domains.
521 *
522 * This routine builds a partial partition of the systems CPUs
523 * (the set of non-overlappping cpumask_t's in the array 'part'
524 * below), and passes that partial partition to the kernel/sched.c
525 * partition_sched_domains() routine, which will rebuild the
526 * schedulers load balancing domains (sched domains) as specified
527 * by that partial partition. A 'partial partition' is a set of
528 * non-overlapping subsets whose union is a subset of that set.
529 *
530 * See "What is sched_load_balance" in Documentation/cpusets.txt
531 * for a background explanation of this.
532 *
533 * Does not return errors, on the theory that the callers of this
534 * routine would rather not worry about failures to rebuild sched
535 * domains when operating in the severe memory shortage situations
536 * that could cause allocation failures below.
537 *
538 * Call with cgroup_mutex held. May take callback_mutex during
539 * call due to the kfifo_alloc() and kmalloc() calls. May nest
540 * a call to the lock_cpu_hotplug()/unlock_cpu_hotplug() pair.
541 * Must not be called holding callback_mutex, because we must not
542 * call lock_cpu_hotplug() while holding callback_mutex. Elsewhere
543 * the kernel nests callback_mutex inside lock_cpu_hotplug() calls.
544 * So the reverse nesting would risk an ABBA deadlock.
545 *
546 * The three key local variables below are:
547 * q - a kfifo queue of cpuset pointers, used to implement a
548 * top-down scan of all cpusets. This scan loads a pointer
549 * to each cpuset marked is_sched_load_balance into the
550 * array 'csa'. For our purposes, rebuilding the schedulers
551 * sched domains, we can ignore !is_sched_load_balance cpusets.
552 * csa - (for CpuSet Array) Array of pointers to all the cpusets
553 * that need to be load balanced, for convenient iterative
554 * access by the subsequent code that finds the best partition,
555 * i.e the set of domains (subsets) of CPUs such that the
556 * cpus_allowed of every cpuset marked is_sched_load_balance
557 * is a subset of one of these domains, while there are as
558 * many such domains as possible, each as small as possible.
559 * doms - Conversion of 'csa' to an array of cpumasks, for passing to
560 * the kernel/sched.c routine partition_sched_domains() in a
561 * convenient format, that can be easily compared to the prior
562 * value to determine what partition elements (sched domains)
563 * were changed (added or removed.)
564 *
565 * Finding the best partition (set of domains):
566 * The triple nested loops below over i, j, k scan over the
567 * load balanced cpusets (using the array of cpuset pointers in
568 * csa[]) looking for pairs of cpusets that have overlapping
569 * cpus_allowed, but which don't have the same 'pn' partition
570 * number and gives them in the same partition number. It keeps
571 * looping on the 'restart' label until it can no longer find
572 * any such pairs.
573 *
574 * The union of the cpus_allowed masks from the set of
575 * all cpusets having the same 'pn' value then form the one
576 * element of the partition (one sched domain) to be passed to
577 * partition_sched_domains().
578 */
579
580static void rebuild_sched_domains(void)
581{
582 struct kfifo *q; /* queue of cpusets to be scanned */
583 struct cpuset *cp; /* scans q */
584 struct cpuset **csa; /* array of all cpuset ptrs */
585 int csn; /* how many cpuset ptrs in csa so far */
586 int i, j, k; /* indices for partition finding loops */
587 cpumask_t *doms; /* resulting partition; i.e. sched domains */
588 int ndoms; /* number of sched domains in result */
589 int nslot; /* next empty doms[] cpumask_t slot */
590
591 q = NULL;
592 csa = NULL;
593 doms = NULL;
594
595 /* Special case for the 99% of systems with one, full, sched domain */
596 if (is_sched_load_balance(&top_cpuset)) {
597 ndoms = 1;
598 doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
599 if (!doms)
600 goto rebuild;
601 *doms = top_cpuset.cpus_allowed;
602 goto rebuild;
603 }
604
605 q = kfifo_alloc(number_of_cpusets * sizeof(cp), GFP_KERNEL, NULL);
606 if (IS_ERR(q))
607 goto done;
608 csa = kmalloc(number_of_cpusets * sizeof(cp), GFP_KERNEL);
609 if (!csa)
610 goto done;
611 csn = 0;
612
613 cp = &top_cpuset;
614 __kfifo_put(q, (void *)&cp, sizeof(cp));
615 while (__kfifo_get(q, (void *)&cp, sizeof(cp))) {
616 struct cgroup *cont;
617 struct cpuset *child; /* scans child cpusets of cp */
618 if (is_sched_load_balance(cp))
619 csa[csn++] = cp;
620 list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
621 child = cgroup_cs(cont);
622 __kfifo_put(q, (void *)&child, sizeof(cp));
623 }
624 }
625
626 for (i = 0; i < csn; i++)
627 csa[i]->pn = i;
628 ndoms = csn;
629
630restart:
631 /* Find the best partition (set of sched domains) */
632 for (i = 0; i < csn; i++) {
633 struct cpuset *a = csa[i];
634 int apn = a->pn;
635
636 for (j = 0; j < csn; j++) {
637 struct cpuset *b = csa[j];
638 int bpn = b->pn;
639
640 if (apn != bpn && cpusets_overlap(a, b)) {
641 for (k = 0; k < csn; k++) {
642 struct cpuset *c = csa[k];
643
644 if (c->pn == bpn)
645 c->pn = apn;
646 }
647 ndoms--; /* one less element */
648 goto restart;
649 }
650 }
651 }
652
653 /* Convert <csn, csa> to <ndoms, doms> */
654 doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL);
655 if (!doms)
656 goto rebuild;
657
658 for (nslot = 0, i = 0; i < csn; i++) {
659 struct cpuset *a = csa[i];
660 int apn = a->pn;
661
662 if (apn >= 0) {
663 cpumask_t *dp = doms + nslot;
664
665 if (nslot == ndoms) {
666 static int warnings = 10;
667 if (warnings) {
668 printk(KERN_WARNING
669 "rebuild_sched_domains confused:"
670 " nslot %d, ndoms %d, csn %d, i %d,"
671 " apn %d\n",
672 nslot, ndoms, csn, i, apn);
673 warnings--;
674 }
675 continue;
676 }
677
678 cpus_clear(*dp);
679 for (j = i; j < csn; j++) {
680 struct cpuset *b = csa[j];
681
682 if (apn == b->pn) {
683 cpus_or(*dp, *dp, b->cpus_allowed);
684 b->pn = -1;
685 }
686 }
687 nslot++;
688 }
689 }
690 BUG_ON(nslot != ndoms);
691
692rebuild:
693 /* Have scheduler rebuild sched domains */
694 lock_cpu_hotplug();
695 partition_sched_domains(ndoms, doms);
696 unlock_cpu_hotplug();
697
698done:
699 if (q && !IS_ERR(q))
700 kfifo_free(q);
701 kfree(csa);
702 /* Don't kfree(doms) -- partition_sched_domains() does that. */
703}
704
705static inline int started_after_time(struct task_struct *t1,
706 struct timespec *time,
707 struct task_struct *t2)
708{
709 int start_diff = timespec_compare(&t1->start_time, time);
710 if (start_diff > 0) {
711 return 1;
712 } else if (start_diff < 0) {
713 return 0;
714 } else {
715 /*
716 * Arbitrarily, if two processes started at the same
717 * time, we'll say that the lower pointer value
718 * started first. Note that t2 may have exited by now
719 * so this may not be a valid pointer any longer, but
720 * that's fine - it still serves to distinguish
721 * between two tasks started (effectively)
722 * simultaneously.
723 */
724 return t1 > t2;
725 }
726}
727
728static inline int started_after(void *p1, void *p2)
729{
730 struct task_struct *t1 = p1;
731 struct task_struct *t2 = p2;
732 return started_after_time(t1, &t2->start_time, t2);
733}
734
735/*
758 * Call with manage_mutex held. May take callback_mutex during call. 736 * Call with manage_mutex held. May take callback_mutex during call.
759 */ 737 */
760 738
761static int update_cpumask(struct cpuset *cs, char *buf) 739static int update_cpumask(struct cpuset *cs, char *buf)
762{ 740{
763 struct cpuset trialcs; 741 struct cpuset trialcs;
764 int retval; 742 int retval, i;
743 int is_load_balanced;
744 struct cgroup_iter it;
745 struct cgroup *cgrp = cs->css.cgroup;
746 struct task_struct *p, *dropped;
747 /* Never dereference latest_task, since it's not refcounted */
748 struct task_struct *latest_task = NULL;
749 struct ptr_heap heap;
750 struct timespec latest_time = { 0, 0 };
765 751
766 /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */ 752 /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */
767 if (cs == &top_cpuset) 753 if (cs == &top_cpuset)
@@ -770,11 +756,13 @@ static int update_cpumask(struct cpuset *cs, char *buf)
770 trialcs = *cs; 756 trialcs = *cs;
771 757
772 /* 758 /*
773 * We allow a cpuset's cpus_allowed to be empty; if it has attached 759 * An empty cpus_allowed is ok iff there are no tasks in the cpuset.
774 * tasks, we'll catch it later when we validate the change and return 760 * Since cpulist_parse() fails on an empty mask, we special case
775 * -ENOSPC. 761 * that parsing. The validate_change() call ensures that cpusets
762 * with tasks have cpus.
776 */ 763 */
777 if (!buf[0] || (buf[0] == '\n' && !buf[1])) { 764 buf = strstrip(buf);
765 if (!*buf) {
778 cpus_clear(trialcs.cpus_allowed); 766 cpus_clear(trialcs.cpus_allowed);
779 } else { 767 } else {
780 retval = cpulist_parse(buf, trialcs.cpus_allowed); 768 retval = cpulist_parse(buf, trialcs.cpus_allowed);
@@ -782,15 +770,79 @@ static int update_cpumask(struct cpuset *cs, char *buf)
782 return retval; 770 return retval;
783 } 771 }
784 cpus_and(trialcs.cpus_allowed, trialcs.cpus_allowed, cpu_online_map); 772 cpus_and(trialcs.cpus_allowed, trialcs.cpus_allowed, cpu_online_map);
785 /* cpus_allowed cannot be empty for a cpuset with attached tasks. */
786 if (atomic_read(&cs->count) && cpus_empty(trialcs.cpus_allowed))
787 return -ENOSPC;
788 retval = validate_change(cs, &trialcs); 773 retval = validate_change(cs, &trialcs);
789 if (retval < 0) 774 if (retval < 0)
790 return retval; 775 return retval;
776
777 /* Nothing to do if the cpus didn't change */
778 if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed))
779 return 0;
780 retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, &started_after);
781 if (retval)
782 return retval;
783
784 is_load_balanced = is_sched_load_balance(&trialcs);
785
791 mutex_lock(&callback_mutex); 786 mutex_lock(&callback_mutex);
792 cs->cpus_allowed = trialcs.cpus_allowed; 787 cs->cpus_allowed = trialcs.cpus_allowed;
793 mutex_unlock(&callback_mutex); 788 mutex_unlock(&callback_mutex);
789
790 again:
791 /*
792 * Scan tasks in the cpuset, and update the cpumasks of any
793 * that need an update. Since we can't call set_cpus_allowed()
794 * while holding tasklist_lock, gather tasks to be processed
795 * in a heap structure. If the statically-sized heap fills up,
796 * overflow tasks that started later, and in future iterations
797 * only consider tasks that started after the latest task in
798 * the previous pass. This guarantees forward progress and
799 * that we don't miss any tasks
800 */
801 heap.size = 0;
802 cgroup_iter_start(cgrp, &it);
803 while ((p = cgroup_iter_next(cgrp, &it))) {
804 /* Only affect tasks that don't have the right cpus_allowed */
805 if (cpus_equal(p->cpus_allowed, cs->cpus_allowed))
806 continue;
807 /*
808 * Only process tasks that started after the last task
809 * we processed
810 */
811 if (!started_after_time(p, &latest_time, latest_task))
812 continue;
813 dropped = heap_insert(&heap, p);
814 if (dropped == NULL) {
815 get_task_struct(p);
816 } else if (dropped != p) {
817 get_task_struct(p);
818 put_task_struct(dropped);
819 }
820 }
821 cgroup_iter_end(cgrp, &it);
822 if (heap.size) {
823 for (i = 0; i < heap.size; i++) {
824 struct task_struct *p = heap.ptrs[i];
825 if (i == 0) {
826 latest_time = p->start_time;
827 latest_task = p;
828 }
829 set_cpus_allowed(p, cs->cpus_allowed);
830 put_task_struct(p);
831 }
832 /*
833 * If we had to process any tasks at all, scan again
834 * in case some of them were in the middle of forking
835 * children that didn't notice the new cpumask
836 * restriction. Not the most efficient way to do it,
837 * but it avoids having to take callback_mutex in the
838 * fork path
839 */
840 goto again;
841 }
842 heap_free(&heap);
843 if (is_load_balanced)
844 rebuild_sched_domains();
845
794 return 0; 846 return 0;
795} 847}
796 848
@@ -839,7 +891,7 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
839 do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL); 891 do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL);
840 892
841 mutex_lock(&callback_mutex); 893 mutex_lock(&callback_mutex);
842 guarantee_online_mems(tsk->cpuset, &tsk->mems_allowed); 894 guarantee_online_mems(task_cs(tsk),&tsk->mems_allowed);
843 mutex_unlock(&callback_mutex); 895 mutex_unlock(&callback_mutex);
844} 896}
845 897
@@ -857,16 +909,19 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
857 * their mempolicies to the cpusets new mems_allowed. 909 * their mempolicies to the cpusets new mems_allowed.
858 */ 910 */
859 911
912static void *cpuset_being_rebound;
913
860static int update_nodemask(struct cpuset *cs, char *buf) 914static int update_nodemask(struct cpuset *cs, char *buf)
861{ 915{
862 struct cpuset trialcs; 916 struct cpuset trialcs;
863 nodemask_t oldmem; 917 nodemask_t oldmem;
864 struct task_struct *g, *p; 918 struct task_struct *p;
865 struct mm_struct **mmarray; 919 struct mm_struct **mmarray;
866 int i, n, ntasks; 920 int i, n, ntasks;
867 int migrate; 921 int migrate;
868 int fudge; 922 int fudge;
869 int retval; 923 int retval;
924 struct cgroup_iter it;
870 925
871 /* 926 /*
872 * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY]; 927 * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
@@ -878,29 +933,19 @@ static int update_nodemask(struct cpuset *cs, char *buf)
878 trialcs = *cs; 933 trialcs = *cs;
879 934
880 /* 935 /*
881 * We allow a cpuset's mems_allowed to be empty; if it has attached 936 * An empty mems_allowed is ok iff there are no tasks in the cpuset.
882 * tasks, we'll catch it later when we validate the change and return 937 * Since nodelist_parse() fails on an empty mask, we special case
883 * -ENOSPC. 938 * that parsing. The validate_change() call ensures that cpusets
939 * with tasks have memory.
884 */ 940 */
885 if (!buf[0] || (buf[0] == '\n' && !buf[1])) { 941 buf = strstrip(buf);
942 if (!*buf) {
886 nodes_clear(trialcs.mems_allowed); 943 nodes_clear(trialcs.mems_allowed);
887 } else { 944 } else {
888 retval = nodelist_parse(buf, trialcs.mems_allowed); 945 retval = nodelist_parse(buf, trialcs.mems_allowed);
889 if (retval < 0) 946 if (retval < 0)
890 goto done; 947 goto done;
891 if (!nodes_intersects(trialcs.mems_allowed,
892 node_states[N_HIGH_MEMORY])) {
893 /*
894 * error if only memoryless nodes specified.
895 */
896 retval = -ENOSPC;
897 goto done;
898 }
899 } 948 }
900 /*
901 * Exclude memoryless nodes. We know that trialcs.mems_allowed
902 * contains at least one node with memory.
903 */
904 nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, 949 nodes_and(trialcs.mems_allowed, trialcs.mems_allowed,
905 node_states[N_HIGH_MEMORY]); 950 node_states[N_HIGH_MEMORY]);
906 oldmem = cs->mems_allowed; 951 oldmem = cs->mems_allowed;
@@ -908,11 +953,6 @@ static int update_nodemask(struct cpuset *cs, char *buf)
908 retval = 0; /* Too easy - nothing to do */ 953 retval = 0; /* Too easy - nothing to do */
909 goto done; 954 goto done;
910 } 955 }
911 /* mems_allowed cannot be empty for a cpuset with attached tasks. */
912 if (atomic_read(&cs->count) && nodes_empty(trialcs.mems_allowed)) {
913 retval = -ENOSPC;
914 goto done;
915 }
916 retval = validate_change(cs, &trialcs); 956 retval = validate_change(cs, &trialcs);
917 if (retval < 0) 957 if (retval < 0)
918 goto done; 958 goto done;
@@ -922,7 +962,7 @@ static int update_nodemask(struct cpuset *cs, char *buf)
922 cs->mems_generation = cpuset_mems_generation++; 962 cs->mems_generation = cpuset_mems_generation++;
923 mutex_unlock(&callback_mutex); 963 mutex_unlock(&callback_mutex);
924 964
925 set_cpuset_being_rebound(cs); /* causes mpol_copy() rebind */ 965 cpuset_being_rebound = cs; /* causes mpol_copy() rebind */
926 966
927 fudge = 10; /* spare mmarray[] slots */ 967 fudge = 10; /* spare mmarray[] slots */
928 fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */ 968 fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */
@@ -936,13 +976,13 @@ static int update_nodemask(struct cpuset *cs, char *buf)
936 * enough mmarray[] w/o using GFP_ATOMIC. 976 * enough mmarray[] w/o using GFP_ATOMIC.
937 */ 977 */
938 while (1) { 978 while (1) {
939 ntasks = atomic_read(&cs->count); /* guess */ 979 ntasks = cgroup_task_count(cs->css.cgroup); /* guess */
940 ntasks += fudge; 980 ntasks += fudge;
941 mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL); 981 mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL);
942 if (!mmarray) 982 if (!mmarray)
943 goto done; 983 goto done;
944 read_lock(&tasklist_lock); /* block fork */ 984 read_lock(&tasklist_lock); /* block fork */
945 if (atomic_read(&cs->count) <= ntasks) 985 if (cgroup_task_count(cs->css.cgroup) <= ntasks)
946 break; /* got enough */ 986 break; /* got enough */
947 read_unlock(&tasklist_lock); /* try again */ 987 read_unlock(&tasklist_lock); /* try again */
948 kfree(mmarray); 988 kfree(mmarray);
@@ -951,21 +991,21 @@ static int update_nodemask(struct cpuset *cs, char *buf)
951 n = 0; 991 n = 0;
952 992
953 /* Load up mmarray[] with mm reference for each task in cpuset. */ 993 /* Load up mmarray[] with mm reference for each task in cpuset. */
954 do_each_thread(g, p) { 994 cgroup_iter_start(cs->css.cgroup, &it);
995 while ((p = cgroup_iter_next(cs->css.cgroup, &it))) {
955 struct mm_struct *mm; 996 struct mm_struct *mm;
956 997
957 if (n >= ntasks) { 998 if (n >= ntasks) {
958 printk(KERN_WARNING 999 printk(KERN_WARNING
959 "Cpuset mempolicy rebind incomplete.\n"); 1000 "Cpuset mempolicy rebind incomplete.\n");
960 continue; 1001 break;
961 } 1002 }
962 if (p->cpuset != cs)
963 continue;
964 mm = get_task_mm(p); 1003 mm = get_task_mm(p);
965 if (!mm) 1004 if (!mm)
966 continue; 1005 continue;
967 mmarray[n++] = mm; 1006 mmarray[n++] = mm;
968 } while_each_thread(g, p); 1007 }
1008 cgroup_iter_end(cs->css.cgroup, &it);
969 read_unlock(&tasklist_lock); 1009 read_unlock(&tasklist_lock);
970 1010
971 /* 1011 /*
@@ -993,12 +1033,17 @@ static int update_nodemask(struct cpuset *cs, char *buf)
993 1033
994 /* We're done rebinding vma's to this cpusets new mems_allowed. */ 1034 /* We're done rebinding vma's to this cpusets new mems_allowed. */
995 kfree(mmarray); 1035 kfree(mmarray);
996 set_cpuset_being_rebound(NULL); 1036 cpuset_being_rebound = NULL;
997 retval = 0; 1037 retval = 0;
998done: 1038done:
999 return retval; 1039 return retval;
1000} 1040}
1001 1041
1042int current_cpuset_is_being_rebound(void)
1043{
1044 return task_cs(current) == cpuset_being_rebound;
1045}
1046
1002/* 1047/*
1003 * Call with manage_mutex held. 1048 * Call with manage_mutex held.
1004 */ 1049 */
@@ -1015,6 +1060,7 @@ static int update_memory_pressure_enabled(struct cpuset *cs, char *buf)
1015/* 1060/*
1016 * update_flag - read a 0 or a 1 in a file and update associated flag 1061 * update_flag - read a 0 or a 1 in a file and update associated flag
1017 * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, 1062 * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE,
1063 * CS_SCHED_LOAD_BALANCE,
1018 * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE, 1064 * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE,
1019 * CS_SPREAD_PAGE, CS_SPREAD_SLAB) 1065 * CS_SPREAD_PAGE, CS_SPREAD_SLAB)
1020 * cs: the cpuset to update 1066 * cs: the cpuset to update
@@ -1028,6 +1074,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
1028 int turning_on; 1074 int turning_on;
1029 struct cpuset trialcs; 1075 struct cpuset trialcs;
1030 int err; 1076 int err;
1077 int cpus_nonempty, balance_flag_changed;
1031 1078
1032 turning_on = (simple_strtoul(buf, NULL, 10) != 0); 1079 turning_on = (simple_strtoul(buf, NULL, 10) != 0);
1033 1080
@@ -1040,10 +1087,18 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
1040 err = validate_change(cs, &trialcs); 1087 err = validate_change(cs, &trialcs);
1041 if (err < 0) 1088 if (err < 0)
1042 return err; 1089 return err;
1090
1091 cpus_nonempty = !cpus_empty(trialcs.cpus_allowed);
1092 balance_flag_changed = (is_sched_load_balance(cs) !=
1093 is_sched_load_balance(&trialcs));
1094
1043 mutex_lock(&callback_mutex); 1095 mutex_lock(&callback_mutex);
1044 cs->flags = trialcs.flags; 1096 cs->flags = trialcs.flags;
1045 mutex_unlock(&callback_mutex); 1097 mutex_unlock(&callback_mutex);
1046 1098
1099 if (cpus_nonempty && balance_flag_changed)
1100 rebuild_sched_domains();
1101
1047 return 0; 1102 return 0;
1048} 1103}
1049 1104
@@ -1145,85 +1200,34 @@ static int fmeter_getrate(struct fmeter *fmp)
1145 return val; 1200 return val;
1146} 1201}
1147 1202
1148/* 1203static int cpuset_can_attach(struct cgroup_subsys *ss,
1149 * Attack task specified by pid in 'pidbuf' to cpuset 'cs', possibly 1204 struct cgroup *cont, struct task_struct *tsk)
1150 * writing the path of the old cpuset in 'ppathbuf' if it needs to be
1151 * notified on release.
1152 *
1153 * Call holding manage_mutex. May take callback_mutex and task_lock of
1154 * the task 'pid' during call.
1155 */
1156
1157static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
1158{ 1205{
1159 pid_t pid; 1206 struct cpuset *cs = cgroup_cs(cont);
1160 struct task_struct *tsk;
1161 struct cpuset *oldcs;
1162 cpumask_t cpus;
1163 nodemask_t from, to;
1164 struct mm_struct *mm;
1165 int retval;
1166 1207
1167 if (sscanf(pidbuf, "%d", &pid) != 1)
1168 return -EIO;
1169 if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) 1208 if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
1170 return -ENOSPC; 1209 return -ENOSPC;
1171 1210
1172 if (pid) { 1211 return security_task_setscheduler(tsk, 0, NULL);
1173 read_lock(&tasklist_lock); 1212}
1174
1175 tsk = find_task_by_pid(pid);
1176 if (!tsk || tsk->flags & PF_EXITING) {
1177 read_unlock(&tasklist_lock);
1178 return -ESRCH;
1179 }
1180
1181 get_task_struct(tsk);
1182 read_unlock(&tasklist_lock);
1183
1184 if ((current->euid) && (current->euid != tsk->uid)
1185 && (current->euid != tsk->suid)) {
1186 put_task_struct(tsk);
1187 return -EACCES;
1188 }
1189 } else {
1190 tsk = current;
1191 get_task_struct(tsk);
1192 }
1193 1213
1194 retval = security_task_setscheduler(tsk, 0, NULL); 1214static void cpuset_attach(struct cgroup_subsys *ss,
1195 if (retval) { 1215 struct cgroup *cont, struct cgroup *oldcont,
1196 put_task_struct(tsk); 1216 struct task_struct *tsk)
1197 return retval; 1217{
1198 } 1218 cpumask_t cpus;
1219 nodemask_t from, to;
1220 struct mm_struct *mm;
1221 struct cpuset *cs = cgroup_cs(cont);
1222 struct cpuset *oldcs = cgroup_cs(oldcont);
1199 1223
1200 mutex_lock(&callback_mutex); 1224 mutex_lock(&callback_mutex);
1201
1202 task_lock(tsk);
1203 oldcs = tsk->cpuset;
1204 /*
1205 * After getting 'oldcs' cpuset ptr, be sure still not exiting.
1206 * If 'oldcs' might be the top_cpuset due to the_top_cpuset_hack
1207 * then fail this attach_task(), to avoid breaking top_cpuset.count.
1208 */
1209 if (tsk->flags & PF_EXITING) {
1210 task_unlock(tsk);
1211 mutex_unlock(&callback_mutex);
1212 put_task_struct(tsk);
1213 return -ESRCH;
1214 }
1215 atomic_inc(&cs->count);
1216 rcu_assign_pointer(tsk->cpuset, cs);
1217 task_unlock(tsk);
1218
1219 guarantee_online_cpus(cs, &cpus); 1225 guarantee_online_cpus(cs, &cpus);
1220 set_cpus_allowed(tsk, cpus); 1226 set_cpus_allowed(tsk, cpus);
1227 mutex_unlock(&callback_mutex);
1221 1228
1222 from = oldcs->mems_allowed; 1229 from = oldcs->mems_allowed;
1223 to = cs->mems_allowed; 1230 to = cs->mems_allowed;
1224
1225 mutex_unlock(&callback_mutex);
1226
1227 mm = get_task_mm(tsk); 1231 mm = get_task_mm(tsk);
1228 if (mm) { 1232 if (mm) {
1229 mpol_rebind_mm(mm, &to); 1233 mpol_rebind_mm(mm, &to);
@@ -1232,44 +1236,36 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
1232 mmput(mm); 1236 mmput(mm);
1233 } 1237 }
1234 1238
1235 put_task_struct(tsk);
1236 synchronize_rcu();
1237 if (atomic_dec_and_test(&oldcs->count))
1238 check_for_release(oldcs, ppathbuf);
1239 return 0;
1240} 1239}
1241 1240
1242/* The various types of files and directories in a cpuset file system */ 1241/* The various types of files and directories in a cpuset file system */
1243 1242
1244typedef enum { 1243typedef enum {
1245 FILE_ROOT,
1246 FILE_DIR,
1247 FILE_MEMORY_MIGRATE, 1244 FILE_MEMORY_MIGRATE,
1248 FILE_CPULIST, 1245 FILE_CPULIST,
1249 FILE_MEMLIST, 1246 FILE_MEMLIST,
1250 FILE_CPU_EXCLUSIVE, 1247 FILE_CPU_EXCLUSIVE,
1251 FILE_MEM_EXCLUSIVE, 1248 FILE_MEM_EXCLUSIVE,
1252 FILE_NOTIFY_ON_RELEASE, 1249 FILE_SCHED_LOAD_BALANCE,
1253 FILE_MEMORY_PRESSURE_ENABLED, 1250 FILE_MEMORY_PRESSURE_ENABLED,
1254 FILE_MEMORY_PRESSURE, 1251 FILE_MEMORY_PRESSURE,
1255 FILE_SPREAD_PAGE, 1252 FILE_SPREAD_PAGE,
1256 FILE_SPREAD_SLAB, 1253 FILE_SPREAD_SLAB,
1257 FILE_TASKLIST,
1258} cpuset_filetype_t; 1254} cpuset_filetype_t;
1259 1255
1260static ssize_t cpuset_common_file_write(struct file *file, 1256static ssize_t cpuset_common_file_write(struct cgroup *cont,
1257 struct cftype *cft,
1258 struct file *file,
1261 const char __user *userbuf, 1259 const char __user *userbuf,
1262 size_t nbytes, loff_t *unused_ppos) 1260 size_t nbytes, loff_t *unused_ppos)
1263{ 1261{
1264 struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent); 1262 struct cpuset *cs = cgroup_cs(cont);
1265 struct cftype *cft = __d_cft(file->f_path.dentry);
1266 cpuset_filetype_t type = cft->private; 1263 cpuset_filetype_t type = cft->private;
1267 char *buffer; 1264 char *buffer;
1268 char *pathbuf = NULL;
1269 int retval = 0; 1265 int retval = 0;
1270 1266
1271 /* Crude upper limit on largest legitimate cpulist user might write. */ 1267 /* Crude upper limit on largest legitimate cpulist user might write. */
1272 if (nbytes > 100 + 6 * max(NR_CPUS, MAX_NUMNODES)) 1268 if (nbytes > 100U + 6 * max(NR_CPUS, MAX_NUMNODES))
1273 return -E2BIG; 1269 return -E2BIG;
1274 1270
1275 /* +1 for nul-terminator */ 1271 /* +1 for nul-terminator */
@@ -1282,9 +1278,9 @@ static ssize_t cpuset_common_file_write(struct file *file,
1282 } 1278 }
1283 buffer[nbytes] = 0; /* nul-terminate */ 1279 buffer[nbytes] = 0; /* nul-terminate */
1284 1280
1285 mutex_lock(&manage_mutex); 1281 cgroup_lock();
1286 1282
1287 if (is_removed(cs)) { 1283 if (cgroup_is_removed(cont)) {
1288 retval = -ENODEV; 1284 retval = -ENODEV;
1289 goto out2; 1285 goto out2;
1290 } 1286 }
@@ -1302,8 +1298,8 @@ static ssize_t cpuset_common_file_write(struct file *file,
1302 case FILE_MEM_EXCLUSIVE: 1298 case FILE_MEM_EXCLUSIVE:
1303 retval = update_flag(CS_MEM_EXCLUSIVE, cs, buffer); 1299 retval = update_flag(CS_MEM_EXCLUSIVE, cs, buffer);
1304 break; 1300 break;
1305 case FILE_NOTIFY_ON_RELEASE: 1301 case FILE_SCHED_LOAD_BALANCE:
1306 retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer); 1302 retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, buffer);
1307 break; 1303 break;
1308 case FILE_MEMORY_MIGRATE: 1304 case FILE_MEMORY_MIGRATE:
1309 retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer); 1305 retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer);
@@ -1322,9 +1318,6 @@ static ssize_t cpuset_common_file_write(struct file *file,
1322 retval = update_flag(CS_SPREAD_SLAB, cs, buffer); 1318 retval = update_flag(CS_SPREAD_SLAB, cs, buffer);
1323 cs->mems_generation = cpuset_mems_generation++; 1319 cs->mems_generation = cpuset_mems_generation++;
1324 break; 1320 break;
1325 case FILE_TASKLIST:
1326 retval = attach_task(cs, buffer, &pathbuf);
1327 break;
1328 default: 1321 default:
1329 retval = -EINVAL; 1322 retval = -EINVAL;
1330 goto out2; 1323 goto out2;
@@ -1333,30 +1326,12 @@ static ssize_t cpuset_common_file_write(struct file *file,
1333 if (retval == 0) 1326 if (retval == 0)
1334 retval = nbytes; 1327 retval = nbytes;
1335out2: 1328out2:
1336 mutex_unlock(&manage_mutex); 1329 cgroup_unlock();
1337 cpuset_release_agent(pathbuf);
1338out1: 1330out1:
1339 kfree(buffer); 1331 kfree(buffer);
1340 return retval; 1332 return retval;
1341} 1333}
1342 1334
1343static ssize_t cpuset_file_write(struct file *file, const char __user *buf,
1344 size_t nbytes, loff_t *ppos)
1345{
1346 ssize_t retval = 0;
1347 struct cftype *cft = __d_cft(file->f_path.dentry);
1348 if (!cft)
1349 return -ENODEV;
1350
1351 /* special function ? */
1352 if (cft->write)
1353 retval = cft->write(file, buf, nbytes, ppos);
1354 else
1355 retval = cpuset_common_file_write(file, buf, nbytes, ppos);
1356
1357 return retval;
1358}
1359
1360/* 1335/*
1361 * These ascii lists should be read in a single call, by using a user 1336 * These ascii lists should be read in a single call, by using a user
1362 * buffer large enough to hold the entire map. If read in smaller 1337 * buffer large enough to hold the entire map. If read in smaller
@@ -1391,11 +1366,13 @@ static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
1391 return nodelist_scnprintf(page, PAGE_SIZE, mask); 1366 return nodelist_scnprintf(page, PAGE_SIZE, mask);
1392} 1367}
1393 1368
1394static ssize_t cpuset_common_file_read(struct file *file, char __user *buf, 1369static ssize_t cpuset_common_file_read(struct cgroup *cont,
1395 size_t nbytes, loff_t *ppos) 1370 struct cftype *cft,
1371 struct file *file,
1372 char __user *buf,
1373 size_t nbytes, loff_t *ppos)
1396{ 1374{
1397 struct cftype *cft = __d_cft(file->f_path.dentry); 1375 struct cpuset *cs = cgroup_cs(cont);
1398 struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
1399 cpuset_filetype_t type = cft->private; 1376 cpuset_filetype_t type = cft->private;
1400 char *page; 1377 char *page;
1401 ssize_t retval = 0; 1378 ssize_t retval = 0;
@@ -1419,8 +1396,8 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
1419 case FILE_MEM_EXCLUSIVE: 1396 case FILE_MEM_EXCLUSIVE:
1420 *s++ = is_mem_exclusive(cs) ? '1' : '0'; 1397 *s++ = is_mem_exclusive(cs) ? '1' : '0';
1421 break; 1398 break;
1422 case FILE_NOTIFY_ON_RELEASE: 1399 case FILE_SCHED_LOAD_BALANCE:
1423 *s++ = notify_on_release(cs) ? '1' : '0'; 1400 *s++ = is_sched_load_balance(cs) ? '1' : '0';
1424 break; 1401 break;
1425 case FILE_MEMORY_MIGRATE: 1402 case FILE_MEMORY_MIGRATE:
1426 *s++ = is_memory_migrate(cs) ? '1' : '0'; 1403 *s++ = is_memory_migrate(cs) ? '1' : '0';
@@ -1449,390 +1426,150 @@ out:
1449 return retval; 1426 return retval;
1450} 1427}
1451 1428
1452static ssize_t cpuset_file_read(struct file *file, char __user *buf, size_t nbytes,
1453 loff_t *ppos)
1454{
1455 ssize_t retval = 0;
1456 struct cftype *cft = __d_cft(file->f_path.dentry);
1457 if (!cft)
1458 return -ENODEV;
1459
1460 /* special function ? */
1461 if (cft->read)
1462 retval = cft->read(file, buf, nbytes, ppos);
1463 else
1464 retval = cpuset_common_file_read(file, buf, nbytes, ppos);
1465
1466 return retval;
1467}
1468
1469static int cpuset_file_open(struct inode *inode, struct file *file)
1470{
1471 int err;
1472 struct cftype *cft;
1473
1474 err = generic_file_open(inode, file);
1475 if (err)
1476 return err;
1477
1478 cft = __d_cft(file->f_path.dentry);
1479 if (!cft)
1480 return -ENODEV;
1481 if (cft->open)
1482 err = cft->open(inode, file);
1483 else
1484 err = 0;
1485
1486 return err;
1487}
1488
1489static int cpuset_file_release(struct inode *inode, struct file *file)
1490{
1491 struct cftype *cft = __d_cft(file->f_path.dentry);
1492 if (cft->release)
1493 return cft->release(inode, file);
1494 return 0;
1495}
1496
1497/*
1498 * cpuset_rename - Only allow simple rename of directories in place.
1499 */
1500static int cpuset_rename(struct inode *old_dir, struct dentry *old_dentry,
1501 struct inode *new_dir, struct dentry *new_dentry)
1502{
1503 if (!S_ISDIR(old_dentry->d_inode->i_mode))
1504 return -ENOTDIR;
1505 if (new_dentry->d_inode)
1506 return -EEXIST;
1507 if (old_dir != new_dir)
1508 return -EIO;
1509 return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
1510}
1511
1512static const struct file_operations cpuset_file_operations = {
1513 .read = cpuset_file_read,
1514 .write = cpuset_file_write,
1515 .llseek = generic_file_llseek,
1516 .open = cpuset_file_open,
1517 .release = cpuset_file_release,
1518};
1519
1520static const struct inode_operations cpuset_dir_inode_operations = {
1521 .lookup = simple_lookup,
1522 .mkdir = cpuset_mkdir,
1523 .rmdir = cpuset_rmdir,
1524 .rename = cpuset_rename,
1525};
1526
1527static int cpuset_create_file(struct dentry *dentry, int mode)
1528{
1529 struct inode *inode;
1530
1531 if (!dentry)
1532 return -ENOENT;
1533 if (dentry->d_inode)
1534 return -EEXIST;
1535
1536 inode = cpuset_new_inode(mode);
1537 if (!inode)
1538 return -ENOMEM;
1539
1540 if (S_ISDIR(mode)) {
1541 inode->i_op = &cpuset_dir_inode_operations;
1542 inode->i_fop = &simple_dir_operations;
1543
1544 /* start off with i_nlink == 2 (for "." entry) */
1545 inc_nlink(inode);
1546 } else if (S_ISREG(mode)) {
1547 inode->i_size = 0;
1548 inode->i_fop = &cpuset_file_operations;
1549 }
1550
1551 d_instantiate(dentry, inode);
1552 dget(dentry); /* Extra count - pin the dentry in core */
1553 return 0;
1554}
1555
1556/*
1557 * cpuset_create_dir - create a directory for an object.
1558 * cs: the cpuset we create the directory for.
1559 * It must have a valid ->parent field
1560 * And we are going to fill its ->dentry field.
1561 * name: The name to give to the cpuset directory. Will be copied.
1562 * mode: mode to set on new directory.
1563 */
1564
1565static int cpuset_create_dir(struct cpuset *cs, const char *name, int mode)
1566{
1567 struct dentry *dentry = NULL;
1568 struct dentry *parent;
1569 int error = 0;
1570
1571 parent = cs->parent->dentry;
1572 dentry = cpuset_get_dentry(parent, name);
1573 if (IS_ERR(dentry))
1574 return PTR_ERR(dentry);
1575 error = cpuset_create_file(dentry, S_IFDIR | mode);
1576 if (!error) {
1577 dentry->d_fsdata = cs;
1578 inc_nlink(parent->d_inode);
1579 cs->dentry = dentry;
1580 }
1581 dput(dentry);
1582
1583 return error;
1584}
1585
1586static int cpuset_add_file(struct dentry *dir, const struct cftype *cft)
1587{
1588 struct dentry *dentry;
1589 int error;
1590
1591 mutex_lock(&dir->d_inode->i_mutex);
1592 dentry = cpuset_get_dentry(dir, cft->name);
1593 if (!IS_ERR(dentry)) {
1594 error = cpuset_create_file(dentry, 0644 | S_IFREG);
1595 if (!error)
1596 dentry->d_fsdata = (void *)cft;
1597 dput(dentry);
1598 } else
1599 error = PTR_ERR(dentry);
1600 mutex_unlock(&dir->d_inode->i_mutex);
1601 return error;
1602}
1603
1604/*
1605 * Stuff for reading the 'tasks' file.
1606 *
1607 * Reading this file can return large amounts of data if a cpuset has
1608 * *lots* of attached tasks. So it may need several calls to read(),
1609 * but we cannot guarantee that the information we produce is correct
1610 * unless we produce it entirely atomically.
1611 *
1612 * Upon tasks file open(), a struct ctr_struct is allocated, that
1613 * will have a pointer to an array (also allocated here). The struct
1614 * ctr_struct * is stored in file->private_data. Its resources will
1615 * be freed by release() when the file is closed. The array is used
1616 * to sprintf the PIDs and then used by read().
1617 */
1618
1619/* cpusets_tasks_read array */
1620
1621struct ctr_struct {
1622 char *buf;
1623 int bufsz;
1624};
1625
1626/*
1627 * Load into 'pidarray' up to 'npids' of the tasks using cpuset 'cs'.
1628 * Return actual number of pids loaded. No need to task_lock(p)
1629 * when reading out p->cpuset, as we don't really care if it changes
1630 * on the next cycle, and we are not going to try to dereference it.
1631 */
1632static int pid_array_load(pid_t *pidarray, int npids, struct cpuset *cs)
1633{
1634 int n = 0;
1635 struct task_struct *g, *p;
1636
1637 read_lock(&tasklist_lock);
1638
1639 do_each_thread(g, p) {
1640 if (p->cpuset == cs) {
1641 if (unlikely(n == npids))
1642 goto array_full;
1643 pidarray[n++] = p->pid;
1644 }
1645 } while_each_thread(g, p);
1646
1647array_full:
1648 read_unlock(&tasklist_lock);
1649 return n;
1650}
1651
1652static int cmppid(const void *a, const void *b)
1653{
1654 return *(pid_t *)a - *(pid_t *)b;
1655}
1656
1657/*
1658 * Convert array 'a' of 'npids' pid_t's to a string of newline separated
1659 * decimal pids in 'buf'. Don't write more than 'sz' chars, but return
1660 * count 'cnt' of how many chars would be written if buf were large enough.
1661 */
1662static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids)
1663{
1664 int cnt = 0;
1665 int i;
1666
1667 for (i = 0; i < npids; i++)
1668 cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]);
1669 return cnt;
1670}
1671
1672/*
1673 * Handle an open on 'tasks' file. Prepare a buffer listing the
1674 * process id's of tasks currently attached to the cpuset being opened.
1675 *
1676 * Does not require any specific cpuset mutexes, and does not take any.
1677 */
1678static int cpuset_tasks_open(struct inode *unused, struct file *file)
1679{
1680 struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
1681 struct ctr_struct *ctr;
1682 pid_t *pidarray;
1683 int npids;
1684 char c;
1685
1686 if (!(file->f_mode & FMODE_READ))
1687 return 0;
1688
1689 ctr = kmalloc(sizeof(*ctr), GFP_KERNEL);
1690 if (!ctr)
1691 goto err0;
1692
1693 /*
1694 * If cpuset gets more users after we read count, we won't have
1695 * enough space - tough. This race is indistinguishable to the
1696 * caller from the case that the additional cpuset users didn't
1697 * show up until sometime later on.
1698 */
1699 npids = atomic_read(&cs->count);
1700 pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
1701 if (!pidarray)
1702 goto err1;
1703
1704 npids = pid_array_load(pidarray, npids, cs);
1705 sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
1706
1707 /* Call pid_array_to_buf() twice, first just to get bufsz */
1708 ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1;
1709 ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL);
1710 if (!ctr->buf)
1711 goto err2;
1712 ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids);
1713
1714 kfree(pidarray);
1715 file->private_data = ctr;
1716 return 0;
1717
1718err2:
1719 kfree(pidarray);
1720err1:
1721 kfree(ctr);
1722err0:
1723 return -ENOMEM;
1724}
1725
1726static ssize_t cpuset_tasks_read(struct file *file, char __user *buf,
1727 size_t nbytes, loff_t *ppos)
1728{
1729 struct ctr_struct *ctr = file->private_data;
1730 1429
1731 return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz);
1732}
1733 1430
1734static int cpuset_tasks_release(struct inode *unused_inode, struct file *file)
1735{
1736 struct ctr_struct *ctr;
1737 1431
1738 if (file->f_mode & FMODE_READ) {
1739 ctr = file->private_data;
1740 kfree(ctr->buf);
1741 kfree(ctr);
1742 }
1743 return 0;
1744}
1745 1432
1746/* 1433/*
1747 * for the common functions, 'private' gives the type of file 1434 * for the common functions, 'private' gives the type of file
1748 */ 1435 */
1749 1436
1750static struct cftype cft_tasks = {
1751 .name = "tasks",
1752 .open = cpuset_tasks_open,
1753 .read = cpuset_tasks_read,
1754 .release = cpuset_tasks_release,
1755 .private = FILE_TASKLIST,
1756};
1757
1758static struct cftype cft_cpus = { 1437static struct cftype cft_cpus = {
1759 .name = "cpus", 1438 .name = "cpus",
1439 .read = cpuset_common_file_read,
1440 .write = cpuset_common_file_write,
1760 .private = FILE_CPULIST, 1441 .private = FILE_CPULIST,
1761}; 1442};
1762 1443
1763static struct cftype cft_mems = { 1444static struct cftype cft_mems = {
1764 .name = "mems", 1445 .name = "mems",
1446 .read = cpuset_common_file_read,
1447 .write = cpuset_common_file_write,
1765 .private = FILE_MEMLIST, 1448 .private = FILE_MEMLIST,
1766}; 1449};
1767 1450
1768static struct cftype cft_cpu_exclusive = { 1451static struct cftype cft_cpu_exclusive = {
1769 .name = "cpu_exclusive", 1452 .name = "cpu_exclusive",
1453 .read = cpuset_common_file_read,
1454 .write = cpuset_common_file_write,
1770 .private = FILE_CPU_EXCLUSIVE, 1455 .private = FILE_CPU_EXCLUSIVE,
1771}; 1456};
1772 1457
1773static struct cftype cft_mem_exclusive = { 1458static struct cftype cft_mem_exclusive = {
1774 .name = "mem_exclusive", 1459 .name = "mem_exclusive",
1460 .read = cpuset_common_file_read,
1461 .write = cpuset_common_file_write,
1775 .private = FILE_MEM_EXCLUSIVE, 1462 .private = FILE_MEM_EXCLUSIVE,
1776}; 1463};
1777 1464
1778static struct cftype cft_notify_on_release = { 1465static struct cftype cft_sched_load_balance = {
1779 .name = "notify_on_release", 1466 .name = "sched_load_balance",
1780 .private = FILE_NOTIFY_ON_RELEASE, 1467 .read = cpuset_common_file_read,
1468 .write = cpuset_common_file_write,
1469 .private = FILE_SCHED_LOAD_BALANCE,
1781}; 1470};
1782 1471
1783static struct cftype cft_memory_migrate = { 1472static struct cftype cft_memory_migrate = {
1784 .name = "memory_migrate", 1473 .name = "memory_migrate",
1474 .read = cpuset_common_file_read,
1475 .write = cpuset_common_file_write,
1785 .private = FILE_MEMORY_MIGRATE, 1476 .private = FILE_MEMORY_MIGRATE,
1786}; 1477};
1787 1478
1788static struct cftype cft_memory_pressure_enabled = { 1479static struct cftype cft_memory_pressure_enabled = {
1789 .name = "memory_pressure_enabled", 1480 .name = "memory_pressure_enabled",
1481 .read = cpuset_common_file_read,
1482 .write = cpuset_common_file_write,
1790 .private = FILE_MEMORY_PRESSURE_ENABLED, 1483 .private = FILE_MEMORY_PRESSURE_ENABLED,
1791}; 1484};
1792 1485
1793static struct cftype cft_memory_pressure = { 1486static struct cftype cft_memory_pressure = {
1794 .name = "memory_pressure", 1487 .name = "memory_pressure",
1488 .read = cpuset_common_file_read,
1489 .write = cpuset_common_file_write,
1795 .private = FILE_MEMORY_PRESSURE, 1490 .private = FILE_MEMORY_PRESSURE,
1796}; 1491};
1797 1492
1798static struct cftype cft_spread_page = { 1493static struct cftype cft_spread_page = {
1799 .name = "memory_spread_page", 1494 .name = "memory_spread_page",
1495 .read = cpuset_common_file_read,
1496 .write = cpuset_common_file_write,
1800 .private = FILE_SPREAD_PAGE, 1497 .private = FILE_SPREAD_PAGE,
1801}; 1498};
1802 1499
1803static struct cftype cft_spread_slab = { 1500static struct cftype cft_spread_slab = {
1804 .name = "memory_spread_slab", 1501 .name = "memory_spread_slab",
1502 .read = cpuset_common_file_read,
1503 .write = cpuset_common_file_write,
1805 .private = FILE_SPREAD_SLAB, 1504 .private = FILE_SPREAD_SLAB,
1806}; 1505};
1807 1506
1808static int cpuset_populate_dir(struct dentry *cs_dentry) 1507static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont)
1809{ 1508{
1810 int err; 1509 int err;
1811 1510
1812 if ((err = cpuset_add_file(cs_dentry, &cft_cpus)) < 0) 1511 if ((err = cgroup_add_file(cont, ss, &cft_cpus)) < 0)
1813 return err; 1512 return err;
1814 if ((err = cpuset_add_file(cs_dentry, &cft_mems)) < 0) 1513 if ((err = cgroup_add_file(cont, ss, &cft_mems)) < 0)
1815 return err; 1514 return err;
1816 if ((err = cpuset_add_file(cs_dentry, &cft_cpu_exclusive)) < 0) 1515 if ((err = cgroup_add_file(cont, ss, &cft_cpu_exclusive)) < 0)
1817 return err; 1516 return err;
1818 if ((err = cpuset_add_file(cs_dentry, &cft_mem_exclusive)) < 0) 1517 if ((err = cgroup_add_file(cont, ss, &cft_mem_exclusive)) < 0)
1819 return err; 1518 return err;
1820 if ((err = cpuset_add_file(cs_dentry, &cft_notify_on_release)) < 0) 1519 if ((err = cgroup_add_file(cont, ss, &cft_memory_migrate)) < 0)
1821 return err; 1520 return err;
1822 if ((err = cpuset_add_file(cs_dentry, &cft_memory_migrate)) < 0) 1521 if ((err = cgroup_add_file(cont, ss, &cft_sched_load_balance)) < 0)
1823 return err; 1522 return err;
1824 if ((err = cpuset_add_file(cs_dentry, &cft_memory_pressure)) < 0) 1523 if ((err = cgroup_add_file(cont, ss, &cft_memory_pressure)) < 0)
1825 return err; 1524 return err;
1826 if ((err = cpuset_add_file(cs_dentry, &cft_spread_page)) < 0) 1525 if ((err = cgroup_add_file(cont, ss, &cft_spread_page)) < 0)
1827 return err; 1526 return err;
1828 if ((err = cpuset_add_file(cs_dentry, &cft_spread_slab)) < 0) 1527 if ((err = cgroup_add_file(cont, ss, &cft_spread_slab)) < 0)
1829 return err;
1830 if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0)
1831 return err; 1528 return err;
1529 /* memory_pressure_enabled is in root cpuset only */
1530 if (err == 0 && !cont->parent)
1531 err = cgroup_add_file(cont, ss,
1532 &cft_memory_pressure_enabled);
1832 return 0; 1533 return 0;
1833} 1534}
1834 1535
1835/* 1536/*
1537 * post_clone() is called at the end of cgroup_clone().
1538 * 'cgroup' was just created automatically as a result of
1539 * a cgroup_clone(), and the current task is about to
1540 * be moved into 'cgroup'.
1541 *
1542 * Currently we refuse to set up the cgroup - thereby
1543 * refusing the task to be entered, and as a result refusing
1544 * the sys_unshare() or clone() which initiated it - if any
1545 * sibling cpusets have exclusive cpus or mem.
1546 *
1547 * If this becomes a problem for some users who wish to
1548 * allow that scenario, then cpuset_post_clone() could be
1549 * changed to grant parent->cpus_allowed-sibling_cpus_exclusive
1550 * (and likewise for mems) to the new cgroup.
1551 */
1552static void cpuset_post_clone(struct cgroup_subsys *ss,
1553 struct cgroup *cgroup)
1554{
1555 struct cgroup *parent, *child;
1556 struct cpuset *cs, *parent_cs;
1557
1558 parent = cgroup->parent;
1559 list_for_each_entry(child, &parent->children, sibling) {
1560 cs = cgroup_cs(child);
1561 if (is_mem_exclusive(cs) || is_cpu_exclusive(cs))
1562 return;
1563 }
1564 cs = cgroup_cs(cgroup);
1565 parent_cs = cgroup_cs(parent);
1566
1567 cs->mems_allowed = parent_cs->mems_allowed;
1568 cs->cpus_allowed = parent_cs->cpus_allowed;
1569 return;
1570}
1571
1572/*
1836 * cpuset_create - create a cpuset 1573 * cpuset_create - create a cpuset
1837 * parent: cpuset that will be parent of the new cpuset. 1574 * parent: cpuset that will be parent of the new cpuset.
1838 * name: name of the new cpuset. Will be strcpy'ed. 1575 * name: name of the new cpuset. Will be strcpy'ed.
@@ -1841,106 +1578,77 @@ static int cpuset_populate_dir(struct dentry *cs_dentry)
1841 * Must be called with the mutex on the parent inode held 1578 * Must be called with the mutex on the parent inode held
1842 */ 1579 */
1843 1580
1844static long cpuset_create(struct cpuset *parent, const char *name, int mode) 1581static struct cgroup_subsys_state *cpuset_create(
1582 struct cgroup_subsys *ss,
1583 struct cgroup *cont)
1845{ 1584{
1846 struct cpuset *cs; 1585 struct cpuset *cs;
1847 int err; 1586 struct cpuset *parent;
1848 1587
1588 if (!cont->parent) {
1589 /* This is early initialization for the top cgroup */
1590 top_cpuset.mems_generation = cpuset_mems_generation++;
1591 return &top_cpuset.css;
1592 }
1593 parent = cgroup_cs(cont->parent);
1849 cs = kmalloc(sizeof(*cs), GFP_KERNEL); 1594 cs = kmalloc(sizeof(*cs), GFP_KERNEL);
1850 if (!cs) 1595 if (!cs)
1851 return -ENOMEM; 1596 return ERR_PTR(-ENOMEM);
1852 1597
1853 mutex_lock(&manage_mutex);
1854 cpuset_update_task_memory_state(); 1598 cpuset_update_task_memory_state();
1855 cs->flags = 0; 1599 cs->flags = 0;
1856 if (notify_on_release(parent))
1857 set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
1858 if (is_spread_page(parent)) 1600 if (is_spread_page(parent))
1859 set_bit(CS_SPREAD_PAGE, &cs->flags); 1601 set_bit(CS_SPREAD_PAGE, &cs->flags);
1860 if (is_spread_slab(parent)) 1602 if (is_spread_slab(parent))
1861 set_bit(CS_SPREAD_SLAB, &cs->flags); 1603 set_bit(CS_SPREAD_SLAB, &cs->flags);
1604 set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
1862 cs->cpus_allowed = CPU_MASK_NONE; 1605 cs->cpus_allowed = CPU_MASK_NONE;
1863 cs->mems_allowed = NODE_MASK_NONE; 1606 cs->mems_allowed = NODE_MASK_NONE;
1864 atomic_set(&cs->count, 0);
1865 INIT_LIST_HEAD(&cs->sibling);
1866 INIT_LIST_HEAD(&cs->children);
1867 cs->mems_generation = cpuset_mems_generation++; 1607 cs->mems_generation = cpuset_mems_generation++;
1868 fmeter_init(&cs->fmeter); 1608 fmeter_init(&cs->fmeter);
1869 1609
1870 cs->parent = parent; 1610 cs->parent = parent;
1871
1872 mutex_lock(&callback_mutex);
1873 list_add(&cs->sibling, &cs->parent->children);
1874 number_of_cpusets++; 1611 number_of_cpusets++;
1875 mutex_unlock(&callback_mutex); 1612 return &cs->css ;
1876
1877 err = cpuset_create_dir(cs, name, mode);
1878 if (err < 0)
1879 goto err;
1880
1881 /*
1882 * Release manage_mutex before cpuset_populate_dir() because it
1883 * will down() this new directory's i_mutex and if we race with
1884 * another mkdir, we might deadlock.
1885 */
1886 mutex_unlock(&manage_mutex);
1887
1888 err = cpuset_populate_dir(cs->dentry);
1889 /* If err < 0, we have a half-filled directory - oh well ;) */
1890 return 0;
1891err:
1892 list_del(&cs->sibling);
1893 mutex_unlock(&manage_mutex);
1894 kfree(cs);
1895 return err;
1896} 1613}
1897 1614
1898static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode) 1615/*
1899{ 1616 * Locking note on the strange update_flag() call below:
1900 struct cpuset *c_parent = dentry->d_parent->d_fsdata; 1617 *
1901 1618 * If the cpuset being removed has its flag 'sched_load_balance'
1902 /* the vfs holds inode->i_mutex already */ 1619 * enabled, then simulate turning sched_load_balance off, which
1903 return cpuset_create(c_parent, dentry->d_name.name, mode | S_IFDIR); 1620 * will call rebuild_sched_domains(). The lock_cpu_hotplug()
1904} 1621 * call in rebuild_sched_domains() must not be made while holding
1622 * callback_mutex. Elsewhere the kernel nests callback_mutex inside
1623 * lock_cpu_hotplug() calls. So the reverse nesting would risk an
1624 * ABBA deadlock.
1625 */
1905 1626
1906static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) 1627static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
1907{ 1628{
1908 struct cpuset *cs = dentry->d_fsdata; 1629 struct cpuset *cs = cgroup_cs(cont);
1909 struct dentry *d;
1910 struct cpuset *parent;
1911 char *pathbuf = NULL;
1912 1630
1913 /* the vfs holds both inode->i_mutex already */
1914
1915 mutex_lock(&manage_mutex);
1916 cpuset_update_task_memory_state(); 1631 cpuset_update_task_memory_state();
1917 if (atomic_read(&cs->count) > 0) { 1632
1918 mutex_unlock(&manage_mutex); 1633 if (is_sched_load_balance(cs))
1919 return -EBUSY; 1634 update_flag(CS_SCHED_LOAD_BALANCE, cs, "0");
1920 } 1635
1921 if (!list_empty(&cs->children)) {
1922 mutex_unlock(&manage_mutex);
1923 return -EBUSY;
1924 }
1925 parent = cs->parent;
1926 mutex_lock(&callback_mutex);
1927 set_bit(CS_REMOVED, &cs->flags);
1928 list_del(&cs->sibling); /* delete my sibling from parent->children */
1929 spin_lock(&cs->dentry->d_lock);
1930 d = dget(cs->dentry);
1931 cs->dentry = NULL;
1932 spin_unlock(&d->d_lock);
1933 cpuset_d_remove_dir(d);
1934 dput(d);
1935 number_of_cpusets--; 1636 number_of_cpusets--;
1936 mutex_unlock(&callback_mutex); 1637 kfree(cs);
1937 if (list_empty(&parent->children))
1938 check_for_release(parent, &pathbuf);
1939 mutex_unlock(&manage_mutex);
1940 cpuset_release_agent(pathbuf);
1941 return 0;
1942} 1638}
1943 1639
1640struct cgroup_subsys cpuset_subsys = {
1641 .name = "cpuset",
1642 .create = cpuset_create,
1643 .destroy = cpuset_destroy,
1644 .can_attach = cpuset_can_attach,
1645 .attach = cpuset_attach,
1646 .populate = cpuset_populate,
1647 .post_clone = cpuset_post_clone,
1648 .subsys_id = cpuset_subsys_id,
1649 .early_init = 1,
1650};
1651
1944/* 1652/*
1945 * cpuset_init_early - just enough so that the calls to 1653 * cpuset_init_early - just enough so that the calls to
1946 * cpuset_update_task_memory_state() in early init code 1654 * cpuset_update_task_memory_state() in early init code
@@ -1949,13 +1657,11 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
1949 1657
1950int __init cpuset_init_early(void) 1658int __init cpuset_init_early(void)
1951{ 1659{
1952 struct task_struct *tsk = current; 1660 top_cpuset.mems_generation = cpuset_mems_generation++;
1953
1954 tsk->cpuset = &top_cpuset;
1955 tsk->cpuset->mems_generation = cpuset_mems_generation++;
1956 return 0; 1661 return 0;
1957} 1662}
1958 1663
1664
1959/** 1665/**
1960 * cpuset_init - initialize cpusets at system boot 1666 * cpuset_init - initialize cpusets at system boot
1961 * 1667 *
@@ -1964,39 +1670,21 @@ int __init cpuset_init_early(void)
1964 1670
1965int __init cpuset_init(void) 1671int __init cpuset_init(void)
1966{ 1672{
1967 struct dentry *root; 1673 int err = 0;
1968 int err;
1969 1674
1970 top_cpuset.cpus_allowed = CPU_MASK_ALL; 1675 top_cpuset.cpus_allowed = CPU_MASK_ALL;
1971 top_cpuset.mems_allowed = NODE_MASK_ALL; 1676 top_cpuset.mems_allowed = NODE_MASK_ALL;
1972 1677
1973 fmeter_init(&top_cpuset.fmeter); 1678 fmeter_init(&top_cpuset.fmeter);
1974 top_cpuset.mems_generation = cpuset_mems_generation++; 1679 top_cpuset.mems_generation = cpuset_mems_generation++;
1975 1680 set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
1976 init_task.cpuset = &top_cpuset;
1977 1681
1978 err = register_filesystem(&cpuset_fs_type); 1682 err = register_filesystem(&cpuset_fs_type);
1979 if (err < 0) 1683 if (err < 0)
1980 goto out; 1684 return err;
1981 cpuset_mount = kern_mount(&cpuset_fs_type); 1685
1982 if (IS_ERR(cpuset_mount)) {
1983 printk(KERN_ERR "cpuset: could not mount!\n");
1984 err = PTR_ERR(cpuset_mount);
1985 cpuset_mount = NULL;
1986 goto out;
1987 }
1988 root = cpuset_mount->mnt_sb->s_root;
1989 root->d_fsdata = &top_cpuset;
1990 inc_nlink(root->d_inode);
1991 top_cpuset.dentry = root;
1992 root->d_inode->i_op = &cpuset_dir_inode_operations;
1993 number_of_cpusets = 1; 1686 number_of_cpusets = 1;
1994 err = cpuset_populate_dir(root); 1687 return 0;
1995 /* memory_pressure_enabled is in root cpuset only */
1996 if (err == 0)
1997 err = cpuset_add_file(root, &cft_memory_pressure_enabled);
1998out:
1999 return err;
2000} 1688}
2001 1689
2002/* 1690/*
@@ -2022,10 +1710,12 @@ out:
2022 1710
2023static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur) 1711static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur)
2024{ 1712{
1713 struct cgroup *cont;
2025 struct cpuset *c; 1714 struct cpuset *c;
2026 1715
2027 /* Each of our child cpusets mems must be online */ 1716 /* Each of our child cpusets mems must be online */
2028 list_for_each_entry(c, &cur->children, sibling) { 1717 list_for_each_entry(cont, &cur->css.cgroup->children, sibling) {
1718 c = cgroup_cs(cont);
2029 guarantee_online_cpus_mems_in_subtree(c); 1719 guarantee_online_cpus_mems_in_subtree(c);
2030 if (!cpus_empty(c->cpus_allowed)) 1720 if (!cpus_empty(c->cpus_allowed))
2031 guarantee_online_cpus(c, &c->cpus_allowed); 1721 guarantee_online_cpus(c, &c->cpus_allowed);
@@ -2053,7 +1743,7 @@ static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur)
2053 1743
2054static void common_cpu_mem_hotplug_unplug(void) 1744static void common_cpu_mem_hotplug_unplug(void)
2055{ 1745{
2056 mutex_lock(&manage_mutex); 1746 cgroup_lock();
2057 mutex_lock(&callback_mutex); 1747 mutex_lock(&callback_mutex);
2058 1748
2059 guarantee_online_cpus_mems_in_subtree(&top_cpuset); 1749 guarantee_online_cpus_mems_in_subtree(&top_cpuset);
@@ -2061,7 +1751,7 @@ static void common_cpu_mem_hotplug_unplug(void)
2061 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; 1751 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
2062 1752
2063 mutex_unlock(&callback_mutex); 1753 mutex_unlock(&callback_mutex);
2064 mutex_unlock(&manage_mutex); 1754 cgroup_unlock();
2065} 1755}
2066 1756
2067/* 1757/*
@@ -2074,8 +1764,8 @@ static void common_cpu_mem_hotplug_unplug(void)
2074 * cpu_online_map on each CPU hotplug (cpuhp) event. 1764 * cpu_online_map on each CPU hotplug (cpuhp) event.
2075 */ 1765 */
2076 1766
2077static int cpuset_handle_cpuhp(struct notifier_block *nb, 1767static int cpuset_handle_cpuhp(struct notifier_block *unused_nb,
2078 unsigned long phase, void *cpu) 1768 unsigned long phase, void *unused_cpu)
2079{ 1769{
2080 if (phase == CPU_DYING || phase == CPU_DYING_FROZEN) 1770 if (phase == CPU_DYING || phase == CPU_DYING_FROZEN)
2081 return NOTIFY_DONE; 1771 return NOTIFY_DONE;
@@ -2113,109 +1803,7 @@ void __init cpuset_init_smp(void)
2113} 1803}
2114 1804
2115/** 1805/**
2116 * cpuset_fork - attach newly forked task to its parents cpuset.
2117 * @tsk: pointer to task_struct of forking parent process.
2118 *
2119 * Description: A task inherits its parent's cpuset at fork().
2120 *
2121 * A pointer to the shared cpuset was automatically copied in fork.c
2122 * by dup_task_struct(). However, we ignore that copy, since it was
2123 * not made under the protection of task_lock(), so might no longer be
2124 * a valid cpuset pointer. attach_task() might have already changed
2125 * current->cpuset, allowing the previously referenced cpuset to
2126 * be removed and freed. Instead, we task_lock(current) and copy
2127 * its present value of current->cpuset for our freshly forked child.
2128 *
2129 * At the point that cpuset_fork() is called, 'current' is the parent
2130 * task, and the passed argument 'child' points to the child task.
2131 **/
2132
2133void cpuset_fork(struct task_struct *child)
2134{
2135 task_lock(current);
2136 child->cpuset = current->cpuset;
2137 atomic_inc(&child->cpuset->count);
2138 task_unlock(current);
2139}
2140
2141/**
2142 * cpuset_exit - detach cpuset from exiting task
2143 * @tsk: pointer to task_struct of exiting process
2144 *
2145 * Description: Detach cpuset from @tsk and release it.
2146 *
2147 * Note that cpusets marked notify_on_release force every task in
2148 * them to take the global manage_mutex mutex when exiting.
2149 * This could impact scaling on very large systems. Be reluctant to
2150 * use notify_on_release cpusets where very high task exit scaling
2151 * is required on large systems.
2152 *
2153 * Don't even think about derefencing 'cs' after the cpuset use count
2154 * goes to zero, except inside a critical section guarded by manage_mutex
2155 * or callback_mutex. Otherwise a zero cpuset use count is a license to
2156 * any other task to nuke the cpuset immediately, via cpuset_rmdir().
2157 *
2158 * This routine has to take manage_mutex, not callback_mutex, because
2159 * it is holding that mutex while calling check_for_release(),
2160 * which calls kmalloc(), so can't be called holding callback_mutex().
2161 *
2162 * the_top_cpuset_hack:
2163 *
2164 * Set the exiting tasks cpuset to the root cpuset (top_cpuset).
2165 *
2166 * Don't leave a task unable to allocate memory, as that is an
2167 * accident waiting to happen should someone add a callout in
2168 * do_exit() after the cpuset_exit() call that might allocate.
2169 * If a task tries to allocate memory with an invalid cpuset,
2170 * it will oops in cpuset_update_task_memory_state().
2171 *
2172 * We call cpuset_exit() while the task is still competent to
2173 * handle notify_on_release(), then leave the task attached to
2174 * the root cpuset (top_cpuset) for the remainder of its exit.
2175 *
2176 * To do this properly, we would increment the reference count on
2177 * top_cpuset, and near the very end of the kernel/exit.c do_exit()
2178 * code we would add a second cpuset function call, to drop that
2179 * reference. This would just create an unnecessary hot spot on
2180 * the top_cpuset reference count, to no avail.
2181 *
2182 * Normally, holding a reference to a cpuset without bumping its
2183 * count is unsafe. The cpuset could go away, or someone could
2184 * attach us to a different cpuset, decrementing the count on
2185 * the first cpuset that we never incremented. But in this case,
2186 * top_cpuset isn't going away, and either task has PF_EXITING set,
2187 * which wards off any attach_task() attempts, or task is a failed
2188 * fork, never visible to attach_task.
2189 *
2190 * Another way to do this would be to set the cpuset pointer
2191 * to NULL here, and check in cpuset_update_task_memory_state()
2192 * for a NULL pointer. This hack avoids that NULL check, for no
2193 * cost (other than this way too long comment ;).
2194 **/
2195 1806
2196void cpuset_exit(struct task_struct *tsk)
2197{
2198 struct cpuset *cs;
2199
2200 task_lock(current);
2201 cs = tsk->cpuset;
2202 tsk->cpuset = &top_cpuset; /* the_top_cpuset_hack - see above */
2203 task_unlock(current);
2204
2205 if (notify_on_release(cs)) {
2206 char *pathbuf = NULL;
2207
2208 mutex_lock(&manage_mutex);
2209 if (atomic_dec_and_test(&cs->count))
2210 check_for_release(cs, &pathbuf);
2211 mutex_unlock(&manage_mutex);
2212 cpuset_release_agent(pathbuf);
2213 } else {
2214 atomic_dec(&cs->count);
2215 }
2216}
2217
2218/**
2219 * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset. 1807 * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
2220 * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed. 1808 * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
2221 * 1809 *
@@ -2230,10 +1818,23 @@ cpumask_t cpuset_cpus_allowed(struct task_struct *tsk)
2230 cpumask_t mask; 1818 cpumask_t mask;
2231 1819
2232 mutex_lock(&callback_mutex); 1820 mutex_lock(&callback_mutex);
1821 mask = cpuset_cpus_allowed_locked(tsk);
1822 mutex_unlock(&callback_mutex);
1823
1824 return mask;
1825}
1826
1827/**
1828 * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset.
1829 * Must be called with callback_mutex held.
1830 **/
1831cpumask_t cpuset_cpus_allowed_locked(struct task_struct *tsk)
1832{
1833 cpumask_t mask;
1834
2233 task_lock(tsk); 1835 task_lock(tsk);
2234 guarantee_online_cpus(tsk->cpuset, &mask); 1836 guarantee_online_cpus(task_cs(tsk), &mask);
2235 task_unlock(tsk); 1837 task_unlock(tsk);
2236 mutex_unlock(&callback_mutex);
2237 1838
2238 return mask; 1839 return mask;
2239} 1840}
@@ -2259,7 +1860,7 @@ nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
2259 1860
2260 mutex_lock(&callback_mutex); 1861 mutex_lock(&callback_mutex);
2261 task_lock(tsk); 1862 task_lock(tsk);
2262 guarantee_online_mems(tsk->cpuset, &mask); 1863 guarantee_online_mems(task_cs(tsk), &mask);
2263 task_unlock(tsk); 1864 task_unlock(tsk);
2264 mutex_unlock(&callback_mutex); 1865 mutex_unlock(&callback_mutex);
2265 1866
@@ -2390,7 +1991,7 @@ int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
2390 mutex_lock(&callback_mutex); 1991 mutex_lock(&callback_mutex);
2391 1992
2392 task_lock(current); 1993 task_lock(current);
2393 cs = nearest_exclusive_ancestor(current->cpuset); 1994 cs = nearest_exclusive_ancestor(task_cs(current));
2394 task_unlock(current); 1995 task_unlock(current);
2395 1996
2396 allowed = node_isset(node, cs->mems_allowed); 1997 allowed = node_isset(node, cs->mems_allowed);
@@ -2550,14 +2151,12 @@ int cpuset_memory_pressure_enabled __read_mostly;
2550 2151
2551void __cpuset_memory_pressure_bump(void) 2152void __cpuset_memory_pressure_bump(void)
2552{ 2153{
2553 struct cpuset *cs;
2554
2555 task_lock(current); 2154 task_lock(current);
2556 cs = current->cpuset; 2155 fmeter_markevent(&task_cs(current)->fmeter);
2557 fmeter_markevent(&cs->fmeter);
2558 task_unlock(current); 2156 task_unlock(current);
2559} 2157}
2560 2158
2159#ifdef CONFIG_PROC_PID_CPUSET
2561/* 2160/*
2562 * proc_cpuset_show() 2161 * proc_cpuset_show()
2563 * - Print tasks cpuset path into seq_file. 2162 * - Print tasks cpuset path into seq_file.
@@ -2569,11 +2168,12 @@ void __cpuset_memory_pressure_bump(void)
2569 * the_top_cpuset_hack in cpuset_exit(), which sets an exiting tasks 2168 * the_top_cpuset_hack in cpuset_exit(), which sets an exiting tasks
2570 * cpuset to top_cpuset. 2169 * cpuset to top_cpuset.
2571 */ 2170 */
2572static int proc_cpuset_show(struct seq_file *m, void *v) 2171static int proc_cpuset_show(struct seq_file *m, void *unused_v)
2573{ 2172{
2574 struct pid *pid; 2173 struct pid *pid;
2575 struct task_struct *tsk; 2174 struct task_struct *tsk;
2576 char *buf; 2175 char *buf;
2176 struct cgroup_subsys_state *css;
2577 int retval; 2177 int retval;
2578 2178
2579 retval = -ENOMEM; 2179 retval = -ENOMEM;
@@ -2588,15 +2188,15 @@ static int proc_cpuset_show(struct seq_file *m, void *v)
2588 goto out_free; 2188 goto out_free;
2589 2189
2590 retval = -EINVAL; 2190 retval = -EINVAL;
2591 mutex_lock(&manage_mutex); 2191 cgroup_lock();
2592 2192 css = task_subsys_state(tsk, cpuset_subsys_id);
2593 retval = cpuset_path(tsk->cpuset, buf, PAGE_SIZE); 2193 retval = cgroup_path(css->cgroup, buf, PAGE_SIZE);
2594 if (retval < 0) 2194 if (retval < 0)
2595 goto out_unlock; 2195 goto out_unlock;
2596 seq_puts(m, buf); 2196 seq_puts(m, buf);
2597 seq_putc(m, '\n'); 2197 seq_putc(m, '\n');
2598out_unlock: 2198out_unlock:
2599 mutex_unlock(&manage_mutex); 2199 cgroup_unlock();
2600 put_task_struct(tsk); 2200 put_task_struct(tsk);
2601out_free: 2201out_free:
2602 kfree(buf); 2202 kfree(buf);
@@ -2616,6 +2216,7 @@ const struct file_operations proc_cpuset_operations = {
2616 .llseek = seq_lseek, 2216 .llseek = seq_lseek,
2617 .release = single_release, 2217 .release = single_release,
2618}; 2218};
2219#endif /* CONFIG_PROC_PID_CPUSET */
2619 2220
2620/* Display task cpus_allowed, mems_allowed in /proc/<pid>/status file. */ 2221/* Display task cpus_allowed, mems_allowed in /proc/<pid>/status file. */
2621char *cpuset_task_status_allowed(struct task_struct *task, char *buffer) 2222char *cpuset_task_status_allowed(struct task_struct *task, char *buffer)
diff --git a/kernel/die_notifier.c b/kernel/die_notifier.c
deleted file mode 100644
index 0d98827887a7..000000000000
--- a/kernel/die_notifier.c
+++ /dev/null
@@ -1,38 +0,0 @@
1
2#include <linux/module.h>
3#include <linux/notifier.h>
4#include <linux/vmalloc.h>
5#include <linux/kdebug.h>
6
7
8static ATOMIC_NOTIFIER_HEAD(die_chain);
9
10int notify_die(enum die_val val, const char *str,
11 struct pt_regs *regs, long err, int trap, int sig)
12{
13 struct die_args args = {
14 .regs = regs,
15 .str = str,
16 .err = err,
17 .trapnr = trap,
18 .signr = sig,
19
20 };
21
22 return atomic_notifier_call_chain(&die_chain, val, &args);
23}
24
25int register_die_notifier(struct notifier_block *nb)
26{
27 vmalloc_sync_all();
28 return atomic_notifier_chain_register(&die_chain, nb);
29}
30EXPORT_SYMBOL_GPL(register_die_notifier);
31
32int unregister_die_notifier(struct notifier_block *nb)
33{
34 return atomic_notifier_chain_unregister(&die_chain, nb);
35}
36EXPORT_SYMBOL_GPL(unregister_die_notifier);
37
38
diff --git a/kernel/exit.c b/kernel/exit.c
index 2c704c86edb3..f1aec27f1df0 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -31,7 +31,7 @@
31#include <linux/taskstats_kern.h> 31#include <linux/taskstats_kern.h>
32#include <linux/delayacct.h> 32#include <linux/delayacct.h>
33#include <linux/freezer.h> 33#include <linux/freezer.h>
34#include <linux/cpuset.h> 34#include <linux/cgroup.h>
35#include <linux/syscalls.h> 35#include <linux/syscalls.h>
36#include <linux/signal.h> 36#include <linux/signal.h>
37#include <linux/posix-timers.h> 37#include <linux/posix-timers.h>
@@ -148,6 +148,7 @@ void release_task(struct task_struct * p)
148 int zap_leader; 148 int zap_leader;
149repeat: 149repeat:
150 atomic_dec(&p->user->processes); 150 atomic_dec(&p->user->processes);
151 proc_flush_task(p);
151 write_lock_irq(&tasklist_lock); 152 write_lock_irq(&tasklist_lock);
152 ptrace_unlink(p); 153 ptrace_unlink(p);
153 BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); 154 BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
@@ -175,7 +176,6 @@ repeat:
175 } 176 }
176 177
177 write_unlock_irq(&tasklist_lock); 178 write_unlock_irq(&tasklist_lock);
178 proc_flush_task(p);
179 release_thread(p); 179 release_thread(p);
180 call_rcu(&p->rcu, delayed_put_task_struct); 180 call_rcu(&p->rcu, delayed_put_task_struct);
181 181
@@ -221,7 +221,7 @@ static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignor
221 do_each_pid_task(pgrp, PIDTYPE_PGID, p) { 221 do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
222 if (p == ignored_task 222 if (p == ignored_task
223 || p->exit_state 223 || p->exit_state
224 || is_init(p->real_parent)) 224 || is_global_init(p->real_parent))
225 continue; 225 continue;
226 if (task_pgrp(p->real_parent) != pgrp && 226 if (task_pgrp(p->real_parent) != pgrp &&
227 task_session(p->real_parent) == task_session(p)) { 227 task_session(p->real_parent) == task_session(p)) {
@@ -299,14 +299,14 @@ void __set_special_pids(pid_t session, pid_t pgrp)
299{ 299{
300 struct task_struct *curr = current->group_leader; 300 struct task_struct *curr = current->group_leader;
301 301
302 if (process_session(curr) != session) { 302 if (task_session_nr(curr) != session) {
303 detach_pid(curr, PIDTYPE_SID); 303 detach_pid(curr, PIDTYPE_SID);
304 set_signal_session(curr->signal, session); 304 set_task_session(curr, session);
305 attach_pid(curr, PIDTYPE_SID, find_pid(session)); 305 attach_pid(curr, PIDTYPE_SID, find_pid(session));
306 } 306 }
307 if (process_group(curr) != pgrp) { 307 if (task_pgrp_nr(curr) != pgrp) {
308 detach_pid(curr, PIDTYPE_PGID); 308 detach_pid(curr, PIDTYPE_PGID);
309 curr->signal->pgrp = pgrp; 309 set_task_pgrp(curr, pgrp);
310 attach_pid(curr, PIDTYPE_PGID, find_pid(pgrp)); 310 attach_pid(curr, PIDTYPE_PGID, find_pid(pgrp));
311 } 311 }
312} 312}
@@ -400,11 +400,12 @@ void daemonize(const char *name, ...)
400 current->fs = fs; 400 current->fs = fs;
401 atomic_inc(&fs->count); 401 atomic_inc(&fs->count);
402 402
403 exit_task_namespaces(current); 403 if (current->nsproxy != init_task.nsproxy) {
404 current->nsproxy = init_task.nsproxy; 404 get_nsproxy(init_task.nsproxy);
405 get_task_namespaces(current); 405 switch_task_namespaces(current, init_task.nsproxy);
406 }
406 407
407 exit_files(current); 408 exit_files(current);
408 current->files = init_task.files; 409 current->files = init_task.files;
409 atomic_inc(&current->files->count); 410 atomic_inc(&current->files->count);
410 411
@@ -492,7 +493,7 @@ void reset_files_struct(struct task_struct *tsk, struct files_struct *files)
492} 493}
493EXPORT_SYMBOL(reset_files_struct); 494EXPORT_SYMBOL(reset_files_struct);
494 495
495static inline void __exit_files(struct task_struct *tsk) 496static void __exit_files(struct task_struct *tsk)
496{ 497{
497 struct files_struct * files = tsk->files; 498 struct files_struct * files = tsk->files;
498 499
@@ -509,7 +510,7 @@ void exit_files(struct task_struct *tsk)
509 __exit_files(tsk); 510 __exit_files(tsk);
510} 511}
511 512
512static inline void __put_fs_struct(struct fs_struct *fs) 513static void __put_fs_struct(struct fs_struct *fs)
513{ 514{
514 /* No need to hold fs->lock if we are killing it */ 515 /* No need to hold fs->lock if we are killing it */
515 if (atomic_dec_and_test(&fs->count)) { 516 if (atomic_dec_and_test(&fs->count)) {
@@ -530,7 +531,7 @@ void put_fs_struct(struct fs_struct *fs)
530 __put_fs_struct(fs); 531 __put_fs_struct(fs);
531} 532}
532 533
533static inline void __exit_fs(struct task_struct *tsk) 534static void __exit_fs(struct task_struct *tsk)
534{ 535{
535 struct fs_struct * fs = tsk->fs; 536 struct fs_struct * fs = tsk->fs;
536 537
@@ -665,19 +666,22 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
665 * the child reaper process (ie "init") in our pid 666 * the child reaper process (ie "init") in our pid
666 * space. 667 * space.
667 */ 668 */
668static void 669static void forget_original_parent(struct task_struct *father)
669forget_original_parent(struct task_struct *father, struct list_head *to_release)
670{ 670{
671 struct task_struct *p, *reaper = father; 671 struct task_struct *p, *n, *reaper = father;
672 struct list_head *_p, *_n; 672 struct list_head ptrace_dead;
673
674 INIT_LIST_HEAD(&ptrace_dead);
675
676 write_lock_irq(&tasklist_lock);
673 677
674 do { 678 do {
675 reaper = next_thread(reaper); 679 reaper = next_thread(reaper);
676 if (reaper == father) { 680 if (reaper == father) {
677 reaper = child_reaper(father); 681 reaper = task_child_reaper(father);
678 break; 682 break;
679 } 683 }
680 } while (reaper->exit_state); 684 } while (reaper->flags & PF_EXITING);
681 685
682 /* 686 /*
683 * There are only two places where our children can be: 687 * There are only two places where our children can be:
@@ -687,9 +691,8 @@ forget_original_parent(struct task_struct *father, struct list_head *to_release)
687 * 691 *
688 * Search them and reparent children. 692 * Search them and reparent children.
689 */ 693 */
690 list_for_each_safe(_p, _n, &father->children) { 694 list_for_each_entry_safe(p, n, &father->children, sibling) {
691 int ptrace; 695 int ptrace;
692 p = list_entry(_p, struct task_struct, sibling);
693 696
694 ptrace = p->ptrace; 697 ptrace = p->ptrace;
695 698
@@ -715,13 +718,23 @@ forget_original_parent(struct task_struct *father, struct list_head *to_release)
715 * while it was being traced by us, to be able to see it in wait4. 718 * while it was being traced by us, to be able to see it in wait4.
716 */ 719 */
717 if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && p->exit_signal == -1)) 720 if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && p->exit_signal == -1))
718 list_add(&p->ptrace_list, to_release); 721 list_add(&p->ptrace_list, &ptrace_dead);
719 } 722 }
720 list_for_each_safe(_p, _n, &father->ptrace_children) { 723
721 p = list_entry(_p, struct task_struct, ptrace_list); 724 list_for_each_entry_safe(p, n, &father->ptrace_children, ptrace_list) {
722 p->real_parent = reaper; 725 p->real_parent = reaper;
723 reparent_thread(p, father, 1); 726 reparent_thread(p, father, 1);
724 } 727 }
728
729 write_unlock_irq(&tasklist_lock);
730 BUG_ON(!list_empty(&father->children));
731 BUG_ON(!list_empty(&father->ptrace_children));
732
733 list_for_each_entry_safe(p, n, &ptrace_dead, ptrace_list) {
734 list_del_init(&p->ptrace_list);
735 release_task(p);
736 }
737
725} 738}
726 739
727/* 740/*
@@ -732,7 +745,6 @@ static void exit_notify(struct task_struct *tsk)
732{ 745{
733 int state; 746 int state;
734 struct task_struct *t; 747 struct task_struct *t;
735 struct list_head ptrace_dead, *_p, *_n;
736 struct pid *pgrp; 748 struct pid *pgrp;
737 749
738 if (signal_pending(tsk) && !(tsk->signal->flags & SIGNAL_GROUP_EXIT) 750 if (signal_pending(tsk) && !(tsk->signal->flags & SIGNAL_GROUP_EXIT)
@@ -753,8 +765,6 @@ static void exit_notify(struct task_struct *tsk)
753 spin_unlock_irq(&tsk->sighand->siglock); 765 spin_unlock_irq(&tsk->sighand->siglock);
754 } 766 }
755 767
756 write_lock_irq(&tasklist_lock);
757
758 /* 768 /*
759 * This does two things: 769 * This does two things:
760 * 770 *
@@ -763,12 +773,10 @@ static void exit_notify(struct task_struct *tsk)
763 * as a result of our exiting, and if they have any stopped 773 * as a result of our exiting, and if they have any stopped
764 * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) 774 * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
765 */ 775 */
776 forget_original_parent(tsk);
777 exit_task_namespaces(tsk);
766 778
767 INIT_LIST_HEAD(&ptrace_dead); 779 write_lock_irq(&tasklist_lock);
768 forget_original_parent(tsk, &ptrace_dead);
769 BUG_ON(!list_empty(&tsk->children));
770 BUG_ON(!list_empty(&tsk->ptrace_children));
771
772 /* 780 /*
773 * Check to see if any process groups have become orphaned 781 * Check to see if any process groups have become orphaned
774 * as a result of our exiting, and if they have any stopped 782 * as a result of our exiting, and if they have any stopped
@@ -792,7 +800,7 @@ static void exit_notify(struct task_struct *tsk)
792 /* Let father know we died 800 /* Let father know we died
793 * 801 *
794 * Thread signals are configurable, but you aren't going to use 802 * Thread signals are configurable, but you aren't going to use
795 * that to send signals to arbitary processes. 803 * that to send signals to arbitary processes.
796 * That stops right now. 804 * That stops right now.
797 * 805 *
798 * If the parent exec id doesn't match the exec id we saved 806 * If the parent exec id doesn't match the exec id we saved
@@ -833,12 +841,6 @@ static void exit_notify(struct task_struct *tsk)
833 841
834 write_unlock_irq(&tasklist_lock); 842 write_unlock_irq(&tasklist_lock);
835 843
836 list_for_each_safe(_p, _n, &ptrace_dead) {
837 list_del_init(_p);
838 t = list_entry(_p, struct task_struct, ptrace_list);
839 release_task(t);
840 }
841
842 /* If the process is dead, release it - nobody will wait for it */ 844 /* If the process is dead, release it - nobody will wait for it */
843 if (state == EXIT_DEAD) 845 if (state == EXIT_DEAD)
844 release_task(tsk); 846 release_task(tsk);
@@ -874,10 +876,35 @@ static inline void check_stack_usage(void) {}
874 876
875static inline void exit_child_reaper(struct task_struct *tsk) 877static inline void exit_child_reaper(struct task_struct *tsk)
876{ 878{
877 if (likely(tsk->group_leader != child_reaper(tsk))) 879 if (likely(tsk->group_leader != task_child_reaper(tsk)))
878 return; 880 return;
879 881
880 panic("Attempted to kill init!"); 882 if (tsk->nsproxy->pid_ns == &init_pid_ns)
883 panic("Attempted to kill init!");
884
885 /*
886 * @tsk is the last thread in the 'cgroup-init' and is exiting.
887 * Terminate all remaining processes in the namespace and reap them
888 * before exiting @tsk.
889 *
890 * Note that @tsk (last thread of cgroup-init) may not necessarily
891 * be the child-reaper (i.e main thread of cgroup-init) of the
892 * namespace i.e the child_reaper may have already exited.
893 *
894 * Even after a child_reaper exits, we let it inherit orphaned children,
895 * because, pid_ns->child_reaper remains valid as long as there is
896 * at least one living sub-thread in the cgroup init.
897
898 * This living sub-thread of the cgroup-init will be notified when
899 * a child inherited by the 'child-reaper' exits (do_notify_parent()
900 * uses __group_send_sig_info()). Further, when reaping child processes,
901 * do_wait() iterates over children of all living sub threads.
902
903 * i.e even though 'child_reaper' thread is listed as the parent of the
904 * orphaned children, any living sub-thread in the cgroup-init can
905 * perform the role of the child_reaper.
906 */
907 zap_pid_ns_processes(tsk->nsproxy->pid_ns);
881} 908}
882 909
883fastcall NORET_TYPE void do_exit(long code) 910fastcall NORET_TYPE void do_exit(long code)
@@ -932,7 +959,7 @@ fastcall NORET_TYPE void do_exit(long code)
932 959
933 if (unlikely(in_atomic())) 960 if (unlikely(in_atomic()))
934 printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", 961 printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
935 current->comm, current->pid, 962 current->comm, task_pid_nr(current),
936 preempt_count()); 963 preempt_count());
937 964
938 acct_update_integrals(tsk); 965 acct_update_integrals(tsk);
@@ -972,7 +999,7 @@ fastcall NORET_TYPE void do_exit(long code)
972 __exit_fs(tsk); 999 __exit_fs(tsk);
973 check_stack_usage(); 1000 check_stack_usage();
974 exit_thread(); 1001 exit_thread();
975 cpuset_exit(tsk); 1002 cgroup_exit(tsk, 1);
976 exit_keys(tsk); 1003 exit_keys(tsk);
977 1004
978 if (group_dead && tsk->signal->leader) 1005 if (group_dead && tsk->signal->leader)
@@ -983,7 +1010,6 @@ fastcall NORET_TYPE void do_exit(long code)
983 module_put(tsk->binfmt->module); 1010 module_put(tsk->binfmt->module);
984 1011
985 proc_exit_connector(tsk); 1012 proc_exit_connector(tsk);
986 exit_task_namespaces(tsk);
987 exit_notify(tsk); 1013 exit_notify(tsk);
988#ifdef CONFIG_NUMA 1014#ifdef CONFIG_NUMA
989 mpol_free(tsk->mempolicy); 1015 mpol_free(tsk->mempolicy);
@@ -1086,15 +1112,17 @@ asmlinkage void sys_exit_group(int error_code)
1086static int eligible_child(pid_t pid, int options, struct task_struct *p) 1112static int eligible_child(pid_t pid, int options, struct task_struct *p)
1087{ 1113{
1088 int err; 1114 int err;
1115 struct pid_namespace *ns;
1089 1116
1117 ns = current->nsproxy->pid_ns;
1090 if (pid > 0) { 1118 if (pid > 0) {
1091 if (p->pid != pid) 1119 if (task_pid_nr_ns(p, ns) != pid)
1092 return 0; 1120 return 0;
1093 } else if (!pid) { 1121 } else if (!pid) {
1094 if (process_group(p) != process_group(current)) 1122 if (task_pgrp_nr_ns(p, ns) != task_pgrp_vnr(current))
1095 return 0; 1123 return 0;
1096 } else if (pid != -1) { 1124 } else if (pid != -1) {
1097 if (process_group(p) != -pid) 1125 if (task_pgrp_nr_ns(p, ns) != -pid)
1098 return 0; 1126 return 0;
1099 } 1127 }
1100 1128
@@ -1164,9 +1192,12 @@ static int wait_task_zombie(struct task_struct *p, int noreap,
1164{ 1192{
1165 unsigned long state; 1193 unsigned long state;
1166 int retval, status, traced; 1194 int retval, status, traced;
1195 struct pid_namespace *ns;
1196
1197 ns = current->nsproxy->pid_ns;
1167 1198
1168 if (unlikely(noreap)) { 1199 if (unlikely(noreap)) {
1169 pid_t pid = p->pid; 1200 pid_t pid = task_pid_nr_ns(p, ns);
1170 uid_t uid = p->uid; 1201 uid_t uid = p->uid;
1171 int exit_code = p->exit_code; 1202 int exit_code = p->exit_code;
1172 int why, status; 1203 int why, status;
@@ -1285,11 +1316,11 @@ static int wait_task_zombie(struct task_struct *p, int noreap,
1285 retval = put_user(status, &infop->si_status); 1316 retval = put_user(status, &infop->si_status);
1286 } 1317 }
1287 if (!retval && infop) 1318 if (!retval && infop)
1288 retval = put_user(p->pid, &infop->si_pid); 1319 retval = put_user(task_pid_nr_ns(p, ns), &infop->si_pid);
1289 if (!retval && infop) 1320 if (!retval && infop)
1290 retval = put_user(p->uid, &infop->si_uid); 1321 retval = put_user(p->uid, &infop->si_uid);
1291 if (!retval) 1322 if (!retval)
1292 retval = p->pid; 1323 retval = task_pid_nr_ns(p, ns);
1293 1324
1294 if (traced) { 1325 if (traced) {
1295 write_lock_irq(&tasklist_lock); 1326 write_lock_irq(&tasklist_lock);
@@ -1326,6 +1357,7 @@ static int wait_task_stopped(struct task_struct *p, int delayed_group_leader,
1326 int __user *stat_addr, struct rusage __user *ru) 1357 int __user *stat_addr, struct rusage __user *ru)
1327{ 1358{
1328 int retval, exit_code; 1359 int retval, exit_code;
1360 struct pid_namespace *ns;
1329 1361
1330 if (!p->exit_code) 1362 if (!p->exit_code)
1331 return 0; 1363 return 0;
@@ -1344,11 +1376,12 @@ static int wait_task_stopped(struct task_struct *p, int delayed_group_leader,
1344 * keep holding onto the tasklist_lock while we call getrusage and 1376 * keep holding onto the tasklist_lock while we call getrusage and
1345 * possibly take page faults for user memory. 1377 * possibly take page faults for user memory.
1346 */ 1378 */
1379 ns = current->nsproxy->pid_ns;
1347 get_task_struct(p); 1380 get_task_struct(p);
1348 read_unlock(&tasklist_lock); 1381 read_unlock(&tasklist_lock);
1349 1382
1350 if (unlikely(noreap)) { 1383 if (unlikely(noreap)) {
1351 pid_t pid = p->pid; 1384 pid_t pid = task_pid_nr_ns(p, ns);
1352 uid_t uid = p->uid; 1385 uid_t uid = p->uid;
1353 int why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED; 1386 int why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED;
1354 1387
@@ -1419,11 +1452,11 @@ bail_ref:
1419 if (!retval && infop) 1452 if (!retval && infop)
1420 retval = put_user(exit_code, &infop->si_status); 1453 retval = put_user(exit_code, &infop->si_status);
1421 if (!retval && infop) 1454 if (!retval && infop)
1422 retval = put_user(p->pid, &infop->si_pid); 1455 retval = put_user(task_pid_nr_ns(p, ns), &infop->si_pid);
1423 if (!retval && infop) 1456 if (!retval && infop)
1424 retval = put_user(p->uid, &infop->si_uid); 1457 retval = put_user(p->uid, &infop->si_uid);
1425 if (!retval) 1458 if (!retval)
1426 retval = p->pid; 1459 retval = task_pid_nr_ns(p, ns);
1427 put_task_struct(p); 1460 put_task_struct(p);
1428 1461
1429 BUG_ON(!retval); 1462 BUG_ON(!retval);
@@ -1443,6 +1476,7 @@ static int wait_task_continued(struct task_struct *p, int noreap,
1443 int retval; 1476 int retval;
1444 pid_t pid; 1477 pid_t pid;
1445 uid_t uid; 1478 uid_t uid;
1479 struct pid_namespace *ns;
1446 1480
1447 if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) 1481 if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
1448 return 0; 1482 return 0;
@@ -1457,7 +1491,8 @@ static int wait_task_continued(struct task_struct *p, int noreap,
1457 p->signal->flags &= ~SIGNAL_STOP_CONTINUED; 1491 p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
1458 spin_unlock_irq(&p->sighand->siglock); 1492 spin_unlock_irq(&p->sighand->siglock);
1459 1493
1460 pid = p->pid; 1494 ns = current->nsproxy->pid_ns;
1495 pid = task_pid_nr_ns(p, ns);
1461 uid = p->uid; 1496 uid = p->uid;
1462 get_task_struct(p); 1497 get_task_struct(p);
1463 read_unlock(&tasklist_lock); 1498 read_unlock(&tasklist_lock);
@@ -1468,7 +1503,7 @@ static int wait_task_continued(struct task_struct *p, int noreap,
1468 if (!retval && stat_addr) 1503 if (!retval && stat_addr)
1469 retval = put_user(0xffff, stat_addr); 1504 retval = put_user(0xffff, stat_addr);
1470 if (!retval) 1505 if (!retval)
1471 retval = p->pid; 1506 retval = task_pid_nr_ns(p, ns);
1472 } else { 1507 } else {
1473 retval = wait_noreap_copyout(p, pid, uid, 1508 retval = wait_noreap_copyout(p, pid, uid,
1474 CLD_CONTINUED, SIGCONT, 1509 CLD_CONTINUED, SIGCONT,
@@ -1517,12 +1552,9 @@ repeat:
1517 tsk = current; 1552 tsk = current;
1518 do { 1553 do {
1519 struct task_struct *p; 1554 struct task_struct *p;
1520 struct list_head *_p;
1521 int ret; 1555 int ret;
1522 1556
1523 list_for_each(_p,&tsk->children) { 1557 list_for_each_entry(p, &tsk->children, sibling) {
1524 p = list_entry(_p, struct task_struct, sibling);
1525
1526 ret = eligible_child(pid, options, p); 1558 ret = eligible_child(pid, options, p);
1527 if (!ret) 1559 if (!ret)
1528 continue; 1560 continue;
@@ -1604,9 +1636,8 @@ check_continued:
1604 } 1636 }
1605 } 1637 }
1606 if (!flag) { 1638 if (!flag) {
1607 list_for_each(_p, &tsk->ptrace_children) { 1639 list_for_each_entry(p, &tsk->ptrace_children,
1608 p = list_entry(_p, struct task_struct, 1640 ptrace_list) {
1609 ptrace_list);
1610 if (!eligible_child(pid, options, p)) 1641 if (!eligible_child(pid, options, p))
1611 continue; 1642 continue;
1612 flag = 1; 1643 flag = 1;
diff --git a/kernel/fork.c b/kernel/fork.c
index 2ce28f165e31..ddafdfac9456 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -29,7 +29,7 @@
29#include <linux/nsproxy.h> 29#include <linux/nsproxy.h>
30#include <linux/capability.h> 30#include <linux/capability.h>
31#include <linux/cpu.h> 31#include <linux/cpu.h>
32#include <linux/cpuset.h> 32#include <linux/cgroup.h>
33#include <linux/security.h> 33#include <linux/security.h>
34#include <linux/swap.h> 34#include <linux/swap.h>
35#include <linux/syscalls.h> 35#include <linux/syscalls.h>
@@ -50,6 +50,7 @@
50#include <linux/taskstats_kern.h> 50#include <linux/taskstats_kern.h>
51#include <linux/random.h> 51#include <linux/random.h>
52#include <linux/tty.h> 52#include <linux/tty.h>
53#include <linux/proc_fs.h>
53 54
54#include <asm/pgtable.h> 55#include <asm/pgtable.h>
55#include <asm/pgalloc.h> 56#include <asm/pgalloc.h>
@@ -116,7 +117,7 @@ EXPORT_SYMBOL(free_task);
116 117
117void __put_task_struct(struct task_struct *tsk) 118void __put_task_struct(struct task_struct *tsk)
118{ 119{
119 WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE))); 120 WARN_ON(!tsk->exit_state);
120 WARN_ON(atomic_read(&tsk->usage)); 121 WARN_ON(atomic_read(&tsk->usage));
121 WARN_ON(tsk == current); 122 WARN_ON(tsk == current);
122 123
@@ -205,7 +206,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
205} 206}
206 207
207#ifdef CONFIG_MMU 208#ifdef CONFIG_MMU
208static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) 209static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
209{ 210{
210 struct vm_area_struct *mpnt, *tmp, **pprev; 211 struct vm_area_struct *mpnt, *tmp, **pprev;
211 struct rb_node **rb_link, *rb_parent; 212 struct rb_node **rb_link, *rb_parent;
@@ -583,7 +584,7 @@ fail_nomem:
583 return retval; 584 return retval;
584} 585}
585 586
586static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old) 587static struct fs_struct *__copy_fs_struct(struct fs_struct *old)
587{ 588{
588 struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); 589 struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
589 /* We don't need to lock fs - think why ;-) */ 590 /* We don't need to lock fs - think why ;-) */
@@ -615,7 +616,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
615 616
616EXPORT_SYMBOL_GPL(copy_fs_struct); 617EXPORT_SYMBOL_GPL(copy_fs_struct);
617 618
618static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk) 619static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
619{ 620{
620 if (clone_flags & CLONE_FS) { 621 if (clone_flags & CLONE_FS) {
621 atomic_inc(&current->fs->count); 622 atomic_inc(&current->fs->count);
@@ -818,7 +819,7 @@ int unshare_files(void)
818 819
819EXPORT_SYMBOL(unshare_files); 820EXPORT_SYMBOL(unshare_files);
820 821
821static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk) 822static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
822{ 823{
823 struct sighand_struct *sig; 824 struct sighand_struct *sig;
824 825
@@ -841,7 +842,7 @@ void __cleanup_sighand(struct sighand_struct *sighand)
841 kmem_cache_free(sighand_cachep, sighand); 842 kmem_cache_free(sighand_cachep, sighand);
842} 843}
843 844
844static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk) 845static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
845{ 846{
846 struct signal_struct *sig; 847 struct signal_struct *sig;
847 int ret; 848 int ret;
@@ -923,7 +924,7 @@ void __cleanup_signal(struct signal_struct *sig)
923 kmem_cache_free(signal_cachep, sig); 924 kmem_cache_free(signal_cachep, sig);
924} 925}
925 926
926static inline void cleanup_signal(struct task_struct *tsk) 927static void cleanup_signal(struct task_struct *tsk)
927{ 928{
928 struct signal_struct *sig = tsk->signal; 929 struct signal_struct *sig = tsk->signal;
929 930
@@ -933,7 +934,7 @@ static inline void cleanup_signal(struct task_struct *tsk)
933 __cleanup_signal(sig); 934 __cleanup_signal(sig);
934} 935}
935 936
936static inline void copy_flags(unsigned long clone_flags, struct task_struct *p) 937static void copy_flags(unsigned long clone_flags, struct task_struct *p)
937{ 938{
938 unsigned long new_flags = p->flags; 939 unsigned long new_flags = p->flags;
939 940
@@ -949,10 +950,10 @@ asmlinkage long sys_set_tid_address(int __user *tidptr)
949{ 950{
950 current->clear_child_tid = tidptr; 951 current->clear_child_tid = tidptr;
951 952
952 return current->pid; 953 return task_pid_vnr(current);
953} 954}
954 955
955static inline void rt_mutex_init_task(struct task_struct *p) 956static void rt_mutex_init_task(struct task_struct *p)
956{ 957{
957 spin_lock_init(&p->pi_lock); 958 spin_lock_init(&p->pi_lock);
958#ifdef CONFIG_RT_MUTEXES 959#ifdef CONFIG_RT_MUTEXES
@@ -973,12 +974,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
973 unsigned long stack_start, 974 unsigned long stack_start,
974 struct pt_regs *regs, 975 struct pt_regs *regs,
975 unsigned long stack_size, 976 unsigned long stack_size,
976 int __user *parent_tidptr,
977 int __user *child_tidptr, 977 int __user *child_tidptr,
978 struct pid *pid) 978 struct pid *pid)
979{ 979{
980 int retval; 980 int retval;
981 struct task_struct *p = NULL; 981 struct task_struct *p;
982 int cgroup_callbacks_done = 0;
982 983
983 if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) 984 if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
984 return ERR_PTR(-EINVAL); 985 return ERR_PTR(-EINVAL);
@@ -1042,12 +1043,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1042 p->did_exec = 0; 1043 p->did_exec = 0;
1043 delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ 1044 delayacct_tsk_init(p); /* Must remain after dup_task_struct() */
1044 copy_flags(clone_flags, p); 1045 copy_flags(clone_flags, p);
1045 p->pid = pid_nr(pid);
1046 retval = -EFAULT;
1047 if (clone_flags & CLONE_PARENT_SETTID)
1048 if (put_user(p->pid, parent_tidptr))
1049 goto bad_fork_cleanup_delays_binfmt;
1050
1051 INIT_LIST_HEAD(&p->children); 1046 INIT_LIST_HEAD(&p->children);
1052 INIT_LIST_HEAD(&p->sibling); 1047 INIT_LIST_HEAD(&p->sibling);
1053 p->vfork_done = NULL; 1048 p->vfork_done = NULL;
@@ -1087,13 +1082,13 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1087#endif 1082#endif
1088 p->io_context = NULL; 1083 p->io_context = NULL;
1089 p->audit_context = NULL; 1084 p->audit_context = NULL;
1090 cpuset_fork(p); 1085 cgroup_fork(p);
1091#ifdef CONFIG_NUMA 1086#ifdef CONFIG_NUMA
1092 p->mempolicy = mpol_copy(p->mempolicy); 1087 p->mempolicy = mpol_copy(p->mempolicy);
1093 if (IS_ERR(p->mempolicy)) { 1088 if (IS_ERR(p->mempolicy)) {
1094 retval = PTR_ERR(p->mempolicy); 1089 retval = PTR_ERR(p->mempolicy);
1095 p->mempolicy = NULL; 1090 p->mempolicy = NULL;
1096 goto bad_fork_cleanup_cpuset; 1091 goto bad_fork_cleanup_cgroup;
1097 } 1092 }
1098 mpol_fix_fork_child_flag(p); 1093 mpol_fix_fork_child_flag(p);
1099#endif 1094#endif
@@ -1126,10 +1121,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1126 p->blocked_on = NULL; /* not blocked yet */ 1121 p->blocked_on = NULL; /* not blocked yet */
1127#endif 1122#endif
1128 1123
1129 p->tgid = p->pid;
1130 if (clone_flags & CLONE_THREAD)
1131 p->tgid = current->tgid;
1132
1133 if ((retval = security_task_alloc(p))) 1124 if ((retval = security_task_alloc(p)))
1134 goto bad_fork_cleanup_policy; 1125 goto bad_fork_cleanup_policy;
1135 if ((retval = audit_alloc(p))) 1126 if ((retval = audit_alloc(p)))
@@ -1155,6 +1146,24 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1155 if (retval) 1146 if (retval)
1156 goto bad_fork_cleanup_namespaces; 1147 goto bad_fork_cleanup_namespaces;
1157 1148
1149 if (pid != &init_struct_pid) {
1150 retval = -ENOMEM;
1151 pid = alloc_pid(task_active_pid_ns(p));
1152 if (!pid)
1153 goto bad_fork_cleanup_namespaces;
1154
1155 if (clone_flags & CLONE_NEWPID) {
1156 retval = pid_ns_prepare_proc(task_active_pid_ns(p));
1157 if (retval < 0)
1158 goto bad_fork_free_pid;
1159 }
1160 }
1161
1162 p->pid = pid_nr(pid);
1163 p->tgid = p->pid;
1164 if (clone_flags & CLONE_THREAD)
1165 p->tgid = current->tgid;
1166
1158 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; 1167 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
1159 /* 1168 /*
1160 * Clear TID on mm_release()? 1169 * Clear TID on mm_release()?
@@ -1204,6 +1213,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1204 /* Perform scheduler related setup. Assign this task to a CPU. */ 1213 /* Perform scheduler related setup. Assign this task to a CPU. */
1205 sched_fork(p, clone_flags); 1214 sched_fork(p, clone_flags);
1206 1215
1216 /* Now that the task is set up, run cgroup callbacks if
1217 * necessary. We need to run them before the task is visible
1218 * on the tasklist. */
1219 cgroup_fork_callbacks(p);
1220 cgroup_callbacks_done = 1;
1221
1207 /* Need tasklist lock for parent etc handling! */ 1222 /* Need tasklist lock for parent etc handling! */
1208 write_lock_irq(&tasklist_lock); 1223 write_lock_irq(&tasklist_lock);
1209 1224
@@ -1246,7 +1261,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1246 spin_unlock(&current->sighand->siglock); 1261 spin_unlock(&current->sighand->siglock);
1247 write_unlock_irq(&tasklist_lock); 1262 write_unlock_irq(&tasklist_lock);
1248 retval = -ERESTARTNOINTR; 1263 retval = -ERESTARTNOINTR;
1249 goto bad_fork_cleanup_namespaces; 1264 goto bad_fork_free_pid;
1250 } 1265 }
1251 1266
1252 if (clone_flags & CLONE_THREAD) { 1267 if (clone_flags & CLONE_THREAD) {
@@ -1275,11 +1290,22 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1275 __ptrace_link(p, current->parent); 1290 __ptrace_link(p, current->parent);
1276 1291
1277 if (thread_group_leader(p)) { 1292 if (thread_group_leader(p)) {
1278 p->signal->tty = current->signal->tty; 1293 if (clone_flags & CLONE_NEWPID) {
1279 p->signal->pgrp = process_group(current); 1294 p->nsproxy->pid_ns->child_reaper = p;
1280 set_signal_session(p->signal, process_session(current)); 1295 p->signal->tty = NULL;
1281 attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); 1296 set_task_pgrp(p, p->pid);
1282 attach_pid(p, PIDTYPE_SID, task_session(current)); 1297 set_task_session(p, p->pid);
1298 attach_pid(p, PIDTYPE_PGID, pid);
1299 attach_pid(p, PIDTYPE_SID, pid);
1300 } else {
1301 p->signal->tty = current->signal->tty;
1302 set_task_pgrp(p, task_pgrp_nr(current));
1303 set_task_session(p, task_session_nr(current));
1304 attach_pid(p, PIDTYPE_PGID,
1305 task_pgrp(current));
1306 attach_pid(p, PIDTYPE_SID,
1307 task_session(current));
1308 }
1283 1309
1284 list_add_tail_rcu(&p->tasks, &init_task.tasks); 1310 list_add_tail_rcu(&p->tasks, &init_task.tasks);
1285 __get_cpu_var(process_counts)++; 1311 __get_cpu_var(process_counts)++;
@@ -1292,8 +1318,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1292 spin_unlock(&current->sighand->siglock); 1318 spin_unlock(&current->sighand->siglock);
1293 write_unlock_irq(&tasklist_lock); 1319 write_unlock_irq(&tasklist_lock);
1294 proc_fork_connector(p); 1320 proc_fork_connector(p);
1321 cgroup_post_fork(p);
1295 return p; 1322 return p;
1296 1323
1324bad_fork_free_pid:
1325 if (pid != &init_struct_pid)
1326 free_pid(pid);
1297bad_fork_cleanup_namespaces: 1327bad_fork_cleanup_namespaces:
1298 exit_task_namespaces(p); 1328 exit_task_namespaces(p);
1299bad_fork_cleanup_keys: 1329bad_fork_cleanup_keys:
@@ -1318,10 +1348,9 @@ bad_fork_cleanup_security:
1318bad_fork_cleanup_policy: 1348bad_fork_cleanup_policy:
1319#ifdef CONFIG_NUMA 1349#ifdef CONFIG_NUMA
1320 mpol_free(p->mempolicy); 1350 mpol_free(p->mempolicy);
1321bad_fork_cleanup_cpuset: 1351bad_fork_cleanup_cgroup:
1322#endif 1352#endif
1323 cpuset_exit(p); 1353 cgroup_exit(p, cgroup_callbacks_done);
1324bad_fork_cleanup_delays_binfmt:
1325 delayacct_tsk_free(p); 1354 delayacct_tsk_free(p);
1326 if (p->binfmt) 1355 if (p->binfmt)
1327 module_put(p->binfmt->module); 1356 module_put(p->binfmt->module);
@@ -1348,7 +1377,7 @@ struct task_struct * __cpuinit fork_idle(int cpu)
1348 struct task_struct *task; 1377 struct task_struct *task;
1349 struct pt_regs regs; 1378 struct pt_regs regs;
1350 1379
1351 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 1380 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
1352 &init_struct_pid); 1381 &init_struct_pid);
1353 if (!IS_ERR(task)) 1382 if (!IS_ERR(task))
1354 init_idle(task, cpu); 1383 init_idle(task, cpu);
@@ -1356,7 +1385,7 @@ struct task_struct * __cpuinit fork_idle(int cpu)
1356 return task; 1385 return task;
1357} 1386}
1358 1387
1359static inline int fork_traceflag (unsigned clone_flags) 1388static int fork_traceflag(unsigned clone_flags)
1360{ 1389{
1361 if (clone_flags & CLONE_UNTRACED) 1390 if (clone_flags & CLONE_UNTRACED)
1362 return 0; 1391 return 0;
@@ -1387,19 +1416,16 @@ long do_fork(unsigned long clone_flags,
1387{ 1416{
1388 struct task_struct *p; 1417 struct task_struct *p;
1389 int trace = 0; 1418 int trace = 0;
1390 struct pid *pid = alloc_pid();
1391 long nr; 1419 long nr;
1392 1420
1393 if (!pid)
1394 return -EAGAIN;
1395 nr = pid->nr;
1396 if (unlikely(current->ptrace)) { 1421 if (unlikely(current->ptrace)) {
1397 trace = fork_traceflag (clone_flags); 1422 trace = fork_traceflag (clone_flags);
1398 if (trace) 1423 if (trace)
1399 clone_flags |= CLONE_PTRACE; 1424 clone_flags |= CLONE_PTRACE;
1400 } 1425 }
1401 1426
1402 p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid); 1427 p = copy_process(clone_flags, stack_start, regs, stack_size,
1428 child_tidptr, NULL);
1403 /* 1429 /*
1404 * Do this prior waking up the new thread - the thread pointer 1430 * Do this prior waking up the new thread - the thread pointer
1405 * might get invalid after that point, if the thread exits quickly. 1431 * might get invalid after that point, if the thread exits quickly.
@@ -1407,6 +1433,17 @@ long do_fork(unsigned long clone_flags,
1407 if (!IS_ERR(p)) { 1433 if (!IS_ERR(p)) {
1408 struct completion vfork; 1434 struct completion vfork;
1409 1435
1436 /*
1437 * this is enough to call pid_nr_ns here, but this if
1438 * improves optimisation of regular fork()
1439 */
1440 nr = (clone_flags & CLONE_NEWPID) ?
1441 task_pid_nr_ns(p, current->nsproxy->pid_ns) :
1442 task_pid_vnr(p);
1443
1444 if (clone_flags & CLONE_PARENT_SETTID)
1445 put_user(nr, parent_tidptr);
1446
1410 if (clone_flags & CLONE_VFORK) { 1447 if (clone_flags & CLONE_VFORK) {
1411 p->vfork_done = &vfork; 1448 p->vfork_done = &vfork;
1412 init_completion(&vfork); 1449 init_completion(&vfork);
@@ -1440,7 +1477,6 @@ long do_fork(unsigned long clone_flags,
1440 } 1477 }
1441 } 1478 }
1442 } else { 1479 } else {
1443 free_pid(pid);
1444 nr = PTR_ERR(p); 1480 nr = PTR_ERR(p);
1445 } 1481 }
1446 return nr; 1482 return nr;
@@ -1485,7 +1521,7 @@ void __init proc_caches_init(void)
1485 * Check constraints on flags passed to the unshare system call and 1521 * Check constraints on flags passed to the unshare system call and
1486 * force unsharing of additional process context as appropriate. 1522 * force unsharing of additional process context as appropriate.
1487 */ 1523 */
1488static inline void check_unshare_flags(unsigned long *flags_ptr) 1524static void check_unshare_flags(unsigned long *flags_ptr)
1489{ 1525{
1490 /* 1526 /*
1491 * If unsharing a thread from a thread group, must also 1527 * If unsharing a thread from a thread group, must also
@@ -1617,7 +1653,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
1617 struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; 1653 struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
1618 struct files_struct *fd, *new_fd = NULL; 1654 struct files_struct *fd, *new_fd = NULL;
1619 struct sem_undo_list *new_ulist = NULL; 1655 struct sem_undo_list *new_ulist = NULL;
1620 struct nsproxy *new_nsproxy = NULL, *old_nsproxy = NULL; 1656 struct nsproxy *new_nsproxy = NULL;
1621 1657
1622 check_unshare_flags(&unshare_flags); 1658 check_unshare_flags(&unshare_flags);
1623 1659
@@ -1647,14 +1683,13 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
1647 1683
1648 if (new_fs || new_mm || new_fd || new_ulist || new_nsproxy) { 1684 if (new_fs || new_mm || new_fd || new_ulist || new_nsproxy) {
1649 1685
1650 task_lock(current);
1651
1652 if (new_nsproxy) { 1686 if (new_nsproxy) {
1653 old_nsproxy = current->nsproxy; 1687 switch_task_namespaces(current, new_nsproxy);
1654 current->nsproxy = new_nsproxy; 1688 new_nsproxy = NULL;
1655 new_nsproxy = old_nsproxy;
1656 } 1689 }
1657 1690
1691 task_lock(current);
1692
1658 if (new_fs) { 1693 if (new_fs) {
1659 fs = current->fs; 1694 fs = current->fs;
1660 current->fs = new_fs; 1695 current->fs = new_fs;
diff --git a/kernel/futex.c b/kernel/futex.c
index e45a65e41686..32710451dc20 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -53,6 +53,9 @@
53#include <linux/signal.h> 53#include <linux/signal.h>
54#include <linux/module.h> 54#include <linux/module.h>
55#include <linux/magic.h> 55#include <linux/magic.h>
56#include <linux/pid.h>
57#include <linux/nsproxy.h>
58
56#include <asm/futex.h> 59#include <asm/futex.h>
57 60
58#include "rtmutex_common.h" 61#include "rtmutex_common.h"
@@ -443,8 +446,7 @@ static struct task_struct * futex_find_get_task(pid_t pid)
443 struct task_struct *p; 446 struct task_struct *p;
444 447
445 rcu_read_lock(); 448 rcu_read_lock();
446 p = find_task_by_pid(pid); 449 p = find_task_by_vpid(pid);
447
448 if (!p || ((current->euid != p->euid) && (current->euid != p->uid))) 450 if (!p || ((current->euid != p->euid) && (current->euid != p->uid)))
449 p = ERR_PTR(-ESRCH); 451 p = ERR_PTR(-ESRCH);
450 else 452 else
@@ -653,7 +655,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
653 if (!(uval & FUTEX_OWNER_DIED)) { 655 if (!(uval & FUTEX_OWNER_DIED)) {
654 int ret = 0; 656 int ret = 0;
655 657
656 newval = FUTEX_WAITERS | new_owner->pid; 658 newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
657 659
658 curval = cmpxchg_futex_value_locked(uaddr, uval, newval); 660 curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
659 661
@@ -1106,7 +1108,7 @@ static void unqueue_me_pi(struct futex_q *q)
1106static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, 1108static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1107 struct task_struct *curr) 1109 struct task_struct *curr)
1108{ 1110{
1109 u32 newtid = curr->pid | FUTEX_WAITERS; 1111 u32 newtid = task_pid_vnr(curr) | FUTEX_WAITERS;
1110 struct futex_pi_state *pi_state = q->pi_state; 1112 struct futex_pi_state *pi_state = q->pi_state;
1111 u32 uval, curval, newval; 1113 u32 uval, curval, newval;
1112 int ret; 1114 int ret;
@@ -1368,7 +1370,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1368 * (by doing a 0 -> TID atomic cmpxchg), while holding all 1370 * (by doing a 0 -> TID atomic cmpxchg), while holding all
1369 * the locks. It will most likely not succeed. 1371 * the locks. It will most likely not succeed.
1370 */ 1372 */
1371 newval = current->pid; 1373 newval = task_pid_vnr(current);
1372 1374
1373 curval = cmpxchg_futex_value_locked(uaddr, 0, newval); 1375 curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
1374 1376
@@ -1379,7 +1381,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1379 * Detect deadlocks. In case of REQUEUE_PI this is a valid 1381 * Detect deadlocks. In case of REQUEUE_PI this is a valid
1380 * situation and we return success to user space. 1382 * situation and we return success to user space.
1381 */ 1383 */
1382 if (unlikely((curval & FUTEX_TID_MASK) == current->pid)) { 1384 if (unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(current))) {
1383 ret = -EDEADLK; 1385 ret = -EDEADLK;
1384 goto out_unlock_release_sem; 1386 goto out_unlock_release_sem;
1385 } 1387 }
@@ -1408,7 +1410,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1408 */ 1410 */
1409 if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) { 1411 if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
1410 /* Keep the OWNER_DIED bit */ 1412 /* Keep the OWNER_DIED bit */
1411 newval = (curval & ~FUTEX_TID_MASK) | current->pid; 1413 newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(current);
1412 ownerdied = 0; 1414 ownerdied = 0;
1413 lock_taken = 1; 1415 lock_taken = 1;
1414 } 1416 }
@@ -1587,7 +1589,7 @@ retry:
1587 /* 1589 /*
1588 * We release only a lock we actually own: 1590 * We release only a lock we actually own:
1589 */ 1591 */
1590 if ((uval & FUTEX_TID_MASK) != current->pid) 1592 if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
1591 return -EPERM; 1593 return -EPERM;
1592 /* 1594 /*
1593 * First take all the futex related locks: 1595 * First take all the futex related locks:
@@ -1608,7 +1610,7 @@ retry_unlocked:
1608 * anyone else up: 1610 * anyone else up:
1609 */ 1611 */
1610 if (!(uval & FUTEX_OWNER_DIED)) 1612 if (!(uval & FUTEX_OWNER_DIED))
1611 uval = cmpxchg_futex_value_locked(uaddr, current->pid, 0); 1613 uval = cmpxchg_futex_value_locked(uaddr, task_pid_vnr(current), 0);
1612 1614
1613 1615
1614 if (unlikely(uval == -EFAULT)) 1616 if (unlikely(uval == -EFAULT))
@@ -1617,7 +1619,7 @@ retry_unlocked:
1617 * Rare case: we managed to release the lock atomically, 1619 * Rare case: we managed to release the lock atomically,
1618 * no need to wake anyone else up: 1620 * no need to wake anyone else up:
1619 */ 1621 */
1620 if (unlikely(uval == current->pid)) 1622 if (unlikely(uval == task_pid_vnr(current)))
1621 goto out_unlock; 1623 goto out_unlock;
1622 1624
1623 /* 1625 /*
@@ -1854,7 +1856,7 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr,
1854 1856
1855 ret = -ESRCH; 1857 ret = -ESRCH;
1856 rcu_read_lock(); 1858 rcu_read_lock();
1857 p = find_task_by_pid(pid); 1859 p = find_task_by_vpid(pid);
1858 if (!p) 1860 if (!p)
1859 goto err_unlock; 1861 goto err_unlock;
1860 ret = -EPERM; 1862 ret = -EPERM;
@@ -1887,7 +1889,7 @@ retry:
1887 if (get_user(uval, uaddr)) 1889 if (get_user(uval, uaddr))
1888 return -1; 1890 return -1;
1889 1891
1890 if ((uval & FUTEX_TID_MASK) == curr->pid) { 1892 if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
1891 /* 1893 /*
1892 * Ok, this dying thread is truly holding a futex 1894 * Ok, this dying thread is truly holding a futex
1893 * of interest. Set the OWNER_DIED bit atomically 1895 * of interest. Set the OWNER_DIED bit atomically
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 2c2e2954b713..00b572666cc7 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -8,6 +8,7 @@
8 8
9#include <linux/linkage.h> 9#include <linux/linkage.h>
10#include <linux/compat.h> 10#include <linux/compat.h>
11#include <linux/nsproxy.h>
11#include <linux/futex.h> 12#include <linux/futex.h>
12 13
13#include <asm/uaccess.h> 14#include <asm/uaccess.h>
@@ -124,7 +125,7 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
124 125
125 ret = -ESRCH; 126 ret = -ESRCH;
126 read_lock(&tasklist_lock); 127 read_lock(&tasklist_lock);
127 p = find_task_by_pid(pid); 128 p = find_task_by_vpid(pid);
128 if (!p) 129 if (!p)
129 goto err_unlock; 130 goto err_unlock;
130 ret = -EPERM; 131 ret = -EPERM;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index e9f1b4ea504d..aa74a1ef2da8 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -51,7 +51,7 @@ struct resource crashk_res = {
51 51
52int kexec_should_crash(struct task_struct *p) 52int kexec_should_crash(struct task_struct *p)
53{ 53{
54 if (in_interrupt() || !p->pid || is_init(p) || panic_on_oops) 54 if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
55 return 1; 55 return 1;
56 return 0; 56 return 0;
57} 57}
@@ -1146,6 +1146,172 @@ static int __init crash_notes_memory_init(void)
1146} 1146}
1147module_init(crash_notes_memory_init) 1147module_init(crash_notes_memory_init)
1148 1148
1149
1150/*
1151 * parsing the "crashkernel" commandline
1152 *
1153 * this code is intended to be called from architecture specific code
1154 */
1155
1156
1157/*
1158 * This function parses command lines in the format
1159 *
1160 * crashkernel=ramsize-range:size[,...][@offset]
1161 *
1162 * The function returns 0 on success and -EINVAL on failure.
1163 */
1164static int __init parse_crashkernel_mem(char *cmdline,
1165 unsigned long long system_ram,
1166 unsigned long long *crash_size,
1167 unsigned long long *crash_base)
1168{
1169 char *cur = cmdline, *tmp;
1170
1171 /* for each entry of the comma-separated list */
1172 do {
1173 unsigned long long start, end = ULLONG_MAX, size;
1174
1175 /* get the start of the range */
1176 start = memparse(cur, &tmp);
1177 if (cur == tmp) {
1178 pr_warning("crashkernel: Memory value expected\n");
1179 return -EINVAL;
1180 }
1181 cur = tmp;
1182 if (*cur != '-') {
1183 pr_warning("crashkernel: '-' expected\n");
1184 return -EINVAL;
1185 }
1186 cur++;
1187
1188 /* if no ':' is here, than we read the end */
1189 if (*cur != ':') {
1190 end = memparse(cur, &tmp);
1191 if (cur == tmp) {
1192 pr_warning("crashkernel: Memory "
1193 "value expected\n");
1194 return -EINVAL;
1195 }
1196 cur = tmp;
1197 if (end <= start) {
1198 pr_warning("crashkernel: end <= start\n");
1199 return -EINVAL;
1200 }
1201 }
1202
1203 if (*cur != ':') {
1204 pr_warning("crashkernel: ':' expected\n");
1205 return -EINVAL;
1206 }
1207 cur++;
1208
1209 size = memparse(cur, &tmp);
1210 if (cur == tmp) {
1211 pr_warning("Memory value expected\n");
1212 return -EINVAL;
1213 }
1214 cur = tmp;
1215 if (size >= system_ram) {
1216 pr_warning("crashkernel: invalid size\n");
1217 return -EINVAL;
1218 }
1219
1220 /* match ? */
1221 if (system_ram >= start && system_ram <= end) {
1222 *crash_size = size;
1223 break;
1224 }
1225 } while (*cur++ == ',');
1226
1227 if (*crash_size > 0) {
1228 while (*cur != ' ' && *cur != '@')
1229 cur++;
1230 if (*cur == '@') {
1231 cur++;
1232 *crash_base = memparse(cur, &tmp);
1233 if (cur == tmp) {
1234 pr_warning("Memory value expected "
1235 "after '@'\n");
1236 return -EINVAL;
1237 }
1238 }
1239 }
1240
1241 return 0;
1242}
1243
1244/*
1245 * That function parses "simple" (old) crashkernel command lines like
1246 *
1247 * crashkernel=size[@offset]
1248 *
1249 * It returns 0 on success and -EINVAL on failure.
1250 */
1251static int __init parse_crashkernel_simple(char *cmdline,
1252 unsigned long long *crash_size,
1253 unsigned long long *crash_base)
1254{
1255 char *cur = cmdline;
1256
1257 *crash_size = memparse(cmdline, &cur);
1258 if (cmdline == cur) {
1259 pr_warning("crashkernel: memory value expected\n");
1260 return -EINVAL;
1261 }
1262
1263 if (*cur == '@')
1264 *crash_base = memparse(cur+1, &cur);
1265
1266 return 0;
1267}
1268
1269/*
1270 * That function is the entry point for command line parsing and should be
1271 * called from the arch-specific code.
1272 */
1273int __init parse_crashkernel(char *cmdline,
1274 unsigned long long system_ram,
1275 unsigned long long *crash_size,
1276 unsigned long long *crash_base)
1277{
1278 char *p = cmdline, *ck_cmdline = NULL;
1279 char *first_colon, *first_space;
1280
1281 BUG_ON(!crash_size || !crash_base);
1282 *crash_size = 0;
1283 *crash_base = 0;
1284
1285 /* find crashkernel and use the last one if there are more */
1286 p = strstr(p, "crashkernel=");
1287 while (p) {
1288 ck_cmdline = p;
1289 p = strstr(p+1, "crashkernel=");
1290 }
1291
1292 if (!ck_cmdline)
1293 return -EINVAL;
1294
1295 ck_cmdline += 12; /* strlen("crashkernel=") */
1296
1297 /*
1298 * if the commandline contains a ':', then that's the extended
1299 * syntax -- if not, it must be the classic syntax
1300 */
1301 first_colon = strchr(ck_cmdline, ':');
1302 first_space = strchr(ck_cmdline, ' ');
1303 if (first_colon && (!first_space || first_colon < first_space))
1304 return parse_crashkernel_mem(ck_cmdline, system_ram,
1305 crash_size, crash_base);
1306 else
1307 return parse_crashkernel_simple(ck_cmdline, crash_size,
1308 crash_base);
1309
1310 return 0;
1311}
1312
1313
1314
1149void crash_save_vmcoreinfo(void) 1315void crash_save_vmcoreinfo(void)
1150{ 1316{
1151 u32 *buf; 1317 u32 *buf;
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index a6f1ee9c92d9..55fe0c7cd95f 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -511,11 +511,11 @@ static void lockdep_print_held_locks(struct task_struct *curr)
511 int i, depth = curr->lockdep_depth; 511 int i, depth = curr->lockdep_depth;
512 512
513 if (!depth) { 513 if (!depth) {
514 printk("no locks held by %s/%d.\n", curr->comm, curr->pid); 514 printk("no locks held by %s/%d.\n", curr->comm, task_pid_nr(curr));
515 return; 515 return;
516 } 516 }
517 printk("%d lock%s held by %s/%d:\n", 517 printk("%d lock%s held by %s/%d:\n",
518 depth, depth > 1 ? "s" : "", curr->comm, curr->pid); 518 depth, depth > 1 ? "s" : "", curr->comm, task_pid_nr(curr));
519 519
520 for (i = 0; i < depth; i++) { 520 for (i = 0; i < depth; i++) {
521 printk(" #%d: ", i); 521 printk(" #%d: ", i);
@@ -904,7 +904,7 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth)
904 print_kernel_version(); 904 print_kernel_version();
905 printk( "-------------------------------------------------------\n"); 905 printk( "-------------------------------------------------------\n");
906 printk("%s/%d is trying to acquire lock:\n", 906 printk("%s/%d is trying to acquire lock:\n",
907 curr->comm, curr->pid); 907 curr->comm, task_pid_nr(curr));
908 print_lock(check_source); 908 print_lock(check_source);
909 printk("\nbut task is already holding lock:\n"); 909 printk("\nbut task is already holding lock:\n");
910 print_lock(check_target); 910 print_lock(check_target);
@@ -1085,7 +1085,7 @@ print_bad_irq_dependency(struct task_struct *curr,
1085 print_kernel_version(); 1085 print_kernel_version();
1086 printk( "------------------------------------------------------\n"); 1086 printk( "------------------------------------------------------\n");
1087 printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", 1087 printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
1088 curr->comm, curr->pid, 1088 curr->comm, task_pid_nr(curr),
1089 curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT, 1089 curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT,
1090 curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT, 1090 curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT,
1091 curr->hardirqs_enabled, 1091 curr->hardirqs_enabled,
@@ -1237,7 +1237,7 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
1237 print_kernel_version(); 1237 print_kernel_version();
1238 printk( "---------------------------------------------\n"); 1238 printk( "---------------------------------------------\n");
1239 printk("%s/%d is trying to acquire lock:\n", 1239 printk("%s/%d is trying to acquire lock:\n",
1240 curr->comm, curr->pid); 1240 curr->comm, task_pid_nr(curr));
1241 print_lock(next); 1241 print_lock(next);
1242 printk("\nbut task is already holding lock:\n"); 1242 printk("\nbut task is already holding lock:\n");
1243 print_lock(prev); 1243 print_lock(prev);
@@ -1521,7 +1521,7 @@ cache_hit:
1521} 1521}
1522 1522
1523static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, 1523static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
1524 struct held_lock *hlock, int chain_head, u64 chain_key) 1524 struct held_lock *hlock, int chain_head, u64 chain_key)
1525{ 1525{
1526 /* 1526 /*
1527 * Trylock needs to maintain the stack of held locks, but it 1527 * Trylock needs to maintain the stack of held locks, but it
@@ -1641,7 +1641,7 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
1641 usage_str[prev_bit], usage_str[new_bit]); 1641 usage_str[prev_bit], usage_str[new_bit]);
1642 1642
1643 printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n", 1643 printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
1644 curr->comm, curr->pid, 1644 curr->comm, task_pid_nr(curr),
1645 trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT, 1645 trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
1646 trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT, 1646 trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT,
1647 trace_hardirqs_enabled(curr), 1647 trace_hardirqs_enabled(curr),
@@ -1694,7 +1694,7 @@ print_irq_inversion_bug(struct task_struct *curr, struct lock_class *other,
1694 print_kernel_version(); 1694 print_kernel_version();
1695 printk( "---------------------------------------------------------\n"); 1695 printk( "---------------------------------------------------------\n");
1696 printk("%s/%d just changed the state of lock:\n", 1696 printk("%s/%d just changed the state of lock:\n",
1697 curr->comm, curr->pid); 1697 curr->comm, task_pid_nr(curr));
1698 print_lock(this); 1698 print_lock(this);
1699 if (forwards) 1699 if (forwards)
1700 printk("but this lock took another, %s-irq-unsafe lock in the past:\n", irqclass); 1700 printk("but this lock took another, %s-irq-unsafe lock in the past:\n", irqclass);
@@ -2487,7 +2487,7 @@ print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
2487 printk( "[ BUG: bad unlock balance detected! ]\n"); 2487 printk( "[ BUG: bad unlock balance detected! ]\n");
2488 printk( "-------------------------------------\n"); 2488 printk( "-------------------------------------\n");
2489 printk("%s/%d is trying to release lock (", 2489 printk("%s/%d is trying to release lock (",
2490 curr->comm, curr->pid); 2490 curr->comm, task_pid_nr(curr));
2491 print_lockdep_cache(lock); 2491 print_lockdep_cache(lock);
2492 printk(") at:\n"); 2492 printk(") at:\n");
2493 print_ip_sym(ip); 2493 print_ip_sym(ip);
@@ -2737,7 +2737,7 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
2737 printk( "[ BUG: bad contention detected! ]\n"); 2737 printk( "[ BUG: bad contention detected! ]\n");
2738 printk( "---------------------------------\n"); 2738 printk( "---------------------------------\n");
2739 printk("%s/%d is trying to contend lock (", 2739 printk("%s/%d is trying to contend lock (",
2740 curr->comm, curr->pid); 2740 curr->comm, task_pid_nr(curr));
2741 print_lockdep_cache(lock); 2741 print_lockdep_cache(lock);
2742 printk(") at:\n"); 2742 printk(") at:\n");
2743 print_ip_sym(ip); 2743 print_ip_sym(ip);
@@ -3072,7 +3072,7 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
3072 printk( "[ BUG: held lock freed! ]\n"); 3072 printk( "[ BUG: held lock freed! ]\n");
3073 printk( "-------------------------\n"); 3073 printk( "-------------------------\n");
3074 printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n", 3074 printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
3075 curr->comm, curr->pid, mem_from, mem_to-1); 3075 curr->comm, task_pid_nr(curr), mem_from, mem_to-1);
3076 print_lock(hlock); 3076 print_lock(hlock);
3077 lockdep_print_held_locks(curr); 3077 lockdep_print_held_locks(curr);
3078 3078
@@ -3125,7 +3125,7 @@ static void print_held_locks_bug(struct task_struct *curr)
3125 printk( "[ BUG: lock held at task exit time! ]\n"); 3125 printk( "[ BUG: lock held at task exit time! ]\n");
3126 printk( "-------------------------------------\n"); 3126 printk( "-------------------------------------\n");
3127 printk("%s/%d is exiting with locks still held!\n", 3127 printk("%s/%d is exiting with locks still held!\n",
3128 curr->comm, curr->pid); 3128 curr->comm, task_pid_nr(curr));
3129 lockdep_print_held_locks(curr); 3129 lockdep_print_held_locks(curr);
3130 3130
3131 printk("\nstack backtrace:\n"); 3131 printk("\nstack backtrace:\n");
diff --git a/kernel/marker.c b/kernel/marker.c
new file mode 100644
index 000000000000..ccb48d9a3657
--- /dev/null
+++ b/kernel/marker.c
@@ -0,0 +1,525 @@
1/*
2 * Copyright (C) 2007 Mathieu Desnoyers
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 */
18#include <linux/module.h>
19#include <linux/mutex.h>
20#include <linux/types.h>
21#include <linux/jhash.h>
22#include <linux/list.h>
23#include <linux/rcupdate.h>
24#include <linux/marker.h>
25#include <linux/err.h>
26
27extern struct marker __start___markers[];
28extern struct marker __stop___markers[];
29
30/*
31 * module_mutex nests inside markers_mutex. Markers mutex protects the builtin
32 * and module markers, the hash table and deferred_sync.
33 */
34static DEFINE_MUTEX(markers_mutex);
35
36/*
37 * Marker deferred synchronization.
38 * Upon marker probe_unregister, we delay call to synchronize_sched() to
39 * accelerate mass unregistration (only when there is no more reference to a
40 * given module do we call synchronize_sched()). However, we need to make sure
41 * every critical region has ended before we re-arm a marker that has been
42 * unregistered and then registered back with a different probe data.
43 */
44static int deferred_sync;
45
46/*
47 * Marker hash table, containing the active markers.
48 * Protected by module_mutex.
49 */
50#define MARKER_HASH_BITS 6
51#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
52
53struct marker_entry {
54 struct hlist_node hlist;
55 char *format;
56 marker_probe_func *probe;
57 void *private;
58 int refcount; /* Number of times armed. 0 if disarmed. */
59 char name[0]; /* Contains name'\0'format'\0' */
60};
61
62static struct hlist_head marker_table[MARKER_TABLE_SIZE];
63
64/**
65 * __mark_empty_function - Empty probe callback
66 * @mdata: pointer of type const struct marker
67 * @fmt: format string
68 * @...: variable argument list
69 *
70 * Empty callback provided as a probe to the markers. By providing this to a
71 * disabled marker, we make sure the execution flow is always valid even
72 * though the function pointer change and the marker enabling are two distinct
73 * operations that modifies the execution flow of preemptible code.
74 */
75void __mark_empty_function(const struct marker *mdata, void *private,
76 const char *fmt, ...)
77{
78}
79EXPORT_SYMBOL_GPL(__mark_empty_function);
80
81/*
82 * Get marker if the marker is present in the marker hash table.
83 * Must be called with markers_mutex held.
84 * Returns NULL if not present.
85 */
86static struct marker_entry *get_marker(const char *name)
87{
88 struct hlist_head *head;
89 struct hlist_node *node;
90 struct marker_entry *e;
91 u32 hash = jhash(name, strlen(name), 0);
92
93 head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
94 hlist_for_each_entry(e, node, head, hlist) {
95 if (!strcmp(name, e->name))
96 return e;
97 }
98 return NULL;
99}
100
101/*
102 * Add the marker to the marker hash table. Must be called with markers_mutex
103 * held.
104 */
105static int add_marker(const char *name, const char *format,
106 marker_probe_func *probe, void *private)
107{
108 struct hlist_head *head;
109 struct hlist_node *node;
110 struct marker_entry *e;
111 size_t name_len = strlen(name) + 1;
112 size_t format_len = 0;
113 u32 hash = jhash(name, name_len-1, 0);
114
115 if (format)
116 format_len = strlen(format) + 1;
117 head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
118 hlist_for_each_entry(e, node, head, hlist) {
119 if (!strcmp(name, e->name)) {
120 printk(KERN_NOTICE
121 "Marker %s busy, probe %p already installed\n",
122 name, e->probe);
123 return -EBUSY; /* Already there */
124 }
125 }
126 /*
127 * Using kmalloc here to allocate a variable length element. Could
128 * cause some memory fragmentation if overused.
129 */
130 e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
131 GFP_KERNEL);
132 if (!e)
133 return -ENOMEM;
134 memcpy(&e->name[0], name, name_len);
135 if (format) {
136 e->format = &e->name[name_len];
137 memcpy(e->format, format, format_len);
138 trace_mark(core_marker_format, "name %s format %s",
139 e->name, e->format);
140 } else
141 e->format = NULL;
142 e->probe = probe;
143 e->private = private;
144 e->refcount = 0;
145 hlist_add_head(&e->hlist, head);
146 return 0;
147}
148
149/*
150 * Remove the marker from the marker hash table. Must be called with mutex_lock
151 * held.
152 */
153static void *remove_marker(const char *name)
154{
155 struct hlist_head *head;
156 struct hlist_node *node;
157 struct marker_entry *e;
158 int found = 0;
159 size_t len = strlen(name) + 1;
160 void *private = NULL;
161 u32 hash = jhash(name, len-1, 0);
162
163 head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
164 hlist_for_each_entry(e, node, head, hlist) {
165 if (!strcmp(name, e->name)) {
166 found = 1;
167 break;
168 }
169 }
170 if (found) {
171 private = e->private;
172 hlist_del(&e->hlist);
173 kfree(e);
174 }
175 return private;
176}
177
178/*
179 * Set the mark_entry format to the format found in the element.
180 */
181static int marker_set_format(struct marker_entry **entry, const char *format)
182{
183 struct marker_entry *e;
184 size_t name_len = strlen((*entry)->name) + 1;
185 size_t format_len = strlen(format) + 1;
186
187 e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
188 GFP_KERNEL);
189 if (!e)
190 return -ENOMEM;
191 memcpy(&e->name[0], (*entry)->name, name_len);
192 e->format = &e->name[name_len];
193 memcpy(e->format, format, format_len);
194 e->probe = (*entry)->probe;
195 e->private = (*entry)->private;
196 e->refcount = (*entry)->refcount;
197 hlist_add_before(&e->hlist, &(*entry)->hlist);
198 hlist_del(&(*entry)->hlist);
199 kfree(*entry);
200 *entry = e;
201 trace_mark(core_marker_format, "name %s format %s",
202 e->name, e->format);
203 return 0;
204}
205
206/*
207 * Sets the probe callback corresponding to one marker.
208 */
209static int set_marker(struct marker_entry **entry, struct marker *elem)
210{
211 int ret;
212 WARN_ON(strcmp((*entry)->name, elem->name) != 0);
213
214 if ((*entry)->format) {
215 if (strcmp((*entry)->format, elem->format) != 0) {
216 printk(KERN_NOTICE
217 "Format mismatch for probe %s "
218 "(%s), marker (%s)\n",
219 (*entry)->name,
220 (*entry)->format,
221 elem->format);
222 return -EPERM;
223 }
224 } else {
225 ret = marker_set_format(entry, elem->format);
226 if (ret)
227 return ret;
228 }
229 elem->call = (*entry)->probe;
230 elem->private = (*entry)->private;
231 elem->state = 1;
232 return 0;
233}
234
235/*
236 * Disable a marker and its probe callback.
237 * Note: only after a synchronize_sched() issued after setting elem->call to the
238 * empty function insures that the original callback is not used anymore. This
239 * insured by preemption disabling around the call site.
240 */
241static void disable_marker(struct marker *elem)
242{
243 elem->state = 0;
244 elem->call = __mark_empty_function;
245 /*
246 * Leave the private data and id there, because removal is racy and
247 * should be done only after a synchronize_sched(). These are never used
248 * until the next initialization anyway.
249 */
250}
251
252/**
253 * marker_update_probe_range - Update a probe range
254 * @begin: beginning of the range
255 * @end: end of the range
256 * @probe_module: module address of the probe being updated
257 * @refcount: number of references left to the given probe_module (out)
258 *
259 * Updates the probe callback corresponding to a range of markers.
260 * Must be called with markers_mutex held.
261 */
262void marker_update_probe_range(struct marker *begin,
263 struct marker *end, struct module *probe_module,
264 int *refcount)
265{
266 struct marker *iter;
267 struct marker_entry *mark_entry;
268
269 for (iter = begin; iter < end; iter++) {
270 mark_entry = get_marker(iter->name);
271 if (mark_entry && mark_entry->refcount) {
272 set_marker(&mark_entry, iter);
273 /*
274 * ignore error, continue
275 */
276 if (probe_module)
277 if (probe_module ==
278 __module_text_address((unsigned long)mark_entry->probe))
279 (*refcount)++;
280 } else {
281 disable_marker(iter);
282 }
283 }
284}
285
286/*
287 * Update probes, removing the faulty probes.
288 * Issues a synchronize_sched() when no reference to the module passed
289 * as parameter is found in the probes so the probe module can be
290 * safely unloaded from now on.
291 */
292static void marker_update_probes(struct module *probe_module)
293{
294 int refcount = 0;
295
296 mutex_lock(&markers_mutex);
297 /* Core kernel markers */
298 marker_update_probe_range(__start___markers,
299 __stop___markers, probe_module, &refcount);
300 /* Markers in modules. */
301 module_update_markers(probe_module, &refcount);
302 if (probe_module && refcount == 0) {
303 synchronize_sched();
304 deferred_sync = 0;
305 }
306 mutex_unlock(&markers_mutex);
307}
308
309/**
310 * marker_probe_register - Connect a probe to a marker
311 * @name: marker name
312 * @format: format string
313 * @probe: probe handler
314 * @private: probe private data
315 *
316 * private data must be a valid allocated memory address, or NULL.
317 * Returns 0 if ok, error value on error.
318 */
319int marker_probe_register(const char *name, const char *format,
320 marker_probe_func *probe, void *private)
321{
322 struct marker_entry *entry;
323 int ret = 0, need_update = 0;
324
325 mutex_lock(&markers_mutex);
326 entry = get_marker(name);
327 if (entry && entry->refcount) {
328 ret = -EBUSY;
329 goto end;
330 }
331 if (deferred_sync) {
332 synchronize_sched();
333 deferred_sync = 0;
334 }
335 ret = add_marker(name, format, probe, private);
336 if (ret)
337 goto end;
338 need_update = 1;
339end:
340 mutex_unlock(&markers_mutex);
341 if (need_update)
342 marker_update_probes(NULL);
343 return ret;
344}
345EXPORT_SYMBOL_GPL(marker_probe_register);
346
347/**
348 * marker_probe_unregister - Disconnect a probe from a marker
349 * @name: marker name
350 *
351 * Returns the private data given to marker_probe_register, or an ERR_PTR().
352 */
353void *marker_probe_unregister(const char *name)
354{
355 struct module *probe_module;
356 struct marker_entry *entry;
357 void *private;
358 int need_update = 0;
359
360 mutex_lock(&markers_mutex);
361 entry = get_marker(name);
362 if (!entry) {
363 private = ERR_PTR(-ENOENT);
364 goto end;
365 }
366 entry->refcount = 0;
367 /* In what module is the probe handler ? */
368 probe_module = __module_text_address((unsigned long)entry->probe);
369 private = remove_marker(name);
370 deferred_sync = 1;
371 need_update = 1;
372end:
373 mutex_unlock(&markers_mutex);
374 if (need_update)
375 marker_update_probes(probe_module);
376 return private;
377}
378EXPORT_SYMBOL_GPL(marker_probe_unregister);
379
380/**
381 * marker_probe_unregister_private_data - Disconnect a probe from a marker
382 * @private: probe private data
383 *
384 * Unregister a marker by providing the registered private data.
385 * Returns the private data given to marker_probe_register, or an ERR_PTR().
386 */
387void *marker_probe_unregister_private_data(void *private)
388{
389 struct module *probe_module;
390 struct hlist_head *head;
391 struct hlist_node *node;
392 struct marker_entry *entry;
393 int found = 0;
394 unsigned int i;
395 int need_update = 0;
396
397 mutex_lock(&markers_mutex);
398 for (i = 0; i < MARKER_TABLE_SIZE; i++) {
399 head = &marker_table[i];
400 hlist_for_each_entry(entry, node, head, hlist) {
401 if (entry->private == private) {
402 found = 1;
403 goto iter_end;
404 }
405 }
406 }
407iter_end:
408 if (!found) {
409 private = ERR_PTR(-ENOENT);
410 goto end;
411 }
412 entry->refcount = 0;
413 /* In what module is the probe handler ? */
414 probe_module = __module_text_address((unsigned long)entry->probe);
415 private = remove_marker(entry->name);
416 deferred_sync = 1;
417 need_update = 1;
418end:
419 mutex_unlock(&markers_mutex);
420 if (need_update)
421 marker_update_probes(probe_module);
422 return private;
423}
424EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data);
425
426/**
427 * marker_arm - Arm a marker
428 * @name: marker name
429 *
430 * Activate a marker. It keeps a reference count of the number of
431 * arming/disarming done.
432 * Returns 0 if ok, error value on error.
433 */
434int marker_arm(const char *name)
435{
436 struct marker_entry *entry;
437 int ret = 0, need_update = 0;
438
439 mutex_lock(&markers_mutex);
440 entry = get_marker(name);
441 if (!entry) {
442 ret = -ENOENT;
443 goto end;
444 }
445 /*
446 * Only need to update probes when refcount passes from 0 to 1.
447 */
448 if (entry->refcount++)
449 goto end;
450 need_update = 1;
451end:
452 mutex_unlock(&markers_mutex);
453 if (need_update)
454 marker_update_probes(NULL);
455 return ret;
456}
457EXPORT_SYMBOL_GPL(marker_arm);
458
459/**
460 * marker_disarm - Disarm a marker
461 * @name: marker name
462 *
463 * Disarm a marker. It keeps a reference count of the number of arming/disarming
464 * done.
465 * Returns 0 if ok, error value on error.
466 */
467int marker_disarm(const char *name)
468{
469 struct marker_entry *entry;
470 int ret = 0, need_update = 0;
471
472 mutex_lock(&markers_mutex);
473 entry = get_marker(name);
474 if (!entry) {
475 ret = -ENOENT;
476 goto end;
477 }
478 /*
479 * Only permit decrement refcount if higher than 0.
480 * Do probe update only on 1 -> 0 transition.
481 */
482 if (entry->refcount) {
483 if (--entry->refcount)
484 goto end;
485 } else {
486 ret = -EPERM;
487 goto end;
488 }
489 need_update = 1;
490end:
491 mutex_unlock(&markers_mutex);
492 if (need_update)
493 marker_update_probes(NULL);
494 return ret;
495}
496EXPORT_SYMBOL_GPL(marker_disarm);
497
498/**
499 * marker_get_private_data - Get a marker's probe private data
500 * @name: marker name
501 *
502 * Returns the private data pointer, or an ERR_PTR.
503 * The private data pointer should _only_ be dereferenced if the caller is the
504 * owner of the data, or its content could vanish. This is mostly used to
505 * confirm that a caller is the owner of a registered probe.
506 */
507void *marker_get_private_data(const char *name)
508{
509 struct hlist_head *head;
510 struct hlist_node *node;
511 struct marker_entry *e;
512 size_t name_len = strlen(name) + 1;
513 u32 hash = jhash(name, name_len-1, 0);
514 int found = 0;
515
516 head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
517 hlist_for_each_entry(e, node, head, hlist) {
518 if (!strcmp(name, e->name)) {
519 found = 1;
520 return e->private;
521 }
522 }
523 return ERR_PTR(-ENOENT);
524}
525EXPORT_SYMBOL_GPL(marker_get_private_data);
diff --git a/kernel/module.c b/kernel/module.c
index 7734595bd329..3202c9950073 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1673,6 +1673,8 @@ static struct module *load_module(void __user *umod,
1673 unsigned int unusedcrcindex; 1673 unsigned int unusedcrcindex;
1674 unsigned int unusedgplindex; 1674 unsigned int unusedgplindex;
1675 unsigned int unusedgplcrcindex; 1675 unsigned int unusedgplcrcindex;
1676 unsigned int markersindex;
1677 unsigned int markersstringsindex;
1676 struct module *mod; 1678 struct module *mod;
1677 long err = 0; 1679 long err = 0;
1678 void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ 1680 void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -1939,6 +1941,9 @@ static struct module *load_module(void __user *umod,
1939 add_taint_module(mod, TAINT_FORCED_MODULE); 1941 add_taint_module(mod, TAINT_FORCED_MODULE);
1940 } 1942 }
1941#endif 1943#endif
1944 markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
1945 markersstringsindex = find_sec(hdr, sechdrs, secstrings,
1946 "__markers_strings");
1942 1947
1943 /* Now do relocations. */ 1948 /* Now do relocations. */
1944 for (i = 1; i < hdr->e_shnum; i++) { 1949 for (i = 1; i < hdr->e_shnum; i++) {
@@ -1961,6 +1966,11 @@ static struct module *load_module(void __user *umod,
1961 if (err < 0) 1966 if (err < 0)
1962 goto cleanup; 1967 goto cleanup;
1963 } 1968 }
1969#ifdef CONFIG_MARKERS
1970 mod->markers = (void *)sechdrs[markersindex].sh_addr;
1971 mod->num_markers =
1972 sechdrs[markersindex].sh_size / sizeof(*mod->markers);
1973#endif
1964 1974
1965 /* Find duplicate symbols */ 1975 /* Find duplicate symbols */
1966 err = verify_export_symbols(mod); 1976 err = verify_export_symbols(mod);
@@ -1979,6 +1989,11 @@ static struct module *load_module(void __user *umod,
1979 1989
1980 add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); 1990 add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
1981 1991
1992#ifdef CONFIG_MARKERS
1993 if (!mod->taints)
1994 marker_update_probe_range(mod->markers,
1995 mod->markers + mod->num_markers, NULL, NULL);
1996#endif
1982 err = module_finalize(hdr, sechdrs, mod); 1997 err = module_finalize(hdr, sechdrs, mod);
1983 if (err < 0) 1998 if (err < 0)
1984 goto cleanup; 1999 goto cleanup;
@@ -2570,3 +2585,18 @@ EXPORT_SYMBOL(module_remove_driver);
2570void struct_module(struct module *mod) { return; } 2585void struct_module(struct module *mod) { return; }
2571EXPORT_SYMBOL(struct_module); 2586EXPORT_SYMBOL(struct_module);
2572#endif 2587#endif
2588
2589#ifdef CONFIG_MARKERS
2590void module_update_markers(struct module *probe_module, int *refcount)
2591{
2592 struct module *mod;
2593
2594 mutex_lock(&module_mutex);
2595 list_for_each_entry(mod, &modules, list)
2596 if (!mod->taints)
2597 marker_update_probe_range(mod->markers,
2598 mod->markers + mod->num_markers,
2599 probe_module, refcount);
2600 mutex_unlock(&module_mutex);
2601}
2602#endif
diff --git a/kernel/notifier.c b/kernel/notifier.c
new file mode 100644
index 000000000000..4253f472f060
--- /dev/null
+++ b/kernel/notifier.c
@@ -0,0 +1,539 @@
1#include <linux/kdebug.h>
2#include <linux/kprobes.h>
3#include <linux/module.h>
4#include <linux/notifier.h>
5#include <linux/rcupdate.h>
6#include <linux/vmalloc.h>
7
8/*
9 * Notifier list for kernel code which wants to be called
10 * at shutdown. This is used to stop any idling DMA operations
11 * and the like.
12 */
13BLOCKING_NOTIFIER_HEAD(reboot_notifier_list);
14
15/*
16 * Notifier chain core routines. The exported routines below
17 * are layered on top of these, with appropriate locking added.
18 */
19
20static int notifier_chain_register(struct notifier_block **nl,
21 struct notifier_block *n)
22{
23 while ((*nl) != NULL) {
24 if (n->priority > (*nl)->priority)
25 break;
26 nl = &((*nl)->next);
27 }
28 n->next = *nl;
29 rcu_assign_pointer(*nl, n);
30 return 0;
31}
32
33static int notifier_chain_unregister(struct notifier_block **nl,
34 struct notifier_block *n)
35{
36 while ((*nl) != NULL) {
37 if ((*nl) == n) {
38 rcu_assign_pointer(*nl, n->next);
39 return 0;
40 }
41 nl = &((*nl)->next);
42 }
43 return -ENOENT;
44}
45
46/**
47 * notifier_call_chain - Informs the registered notifiers about an event.
48 * @nl: Pointer to head of the blocking notifier chain
49 * @val: Value passed unmodified to notifier function
50 * @v: Pointer passed unmodified to notifier function
51 * @nr_to_call: Number of notifier functions to be called. Don't care
52 * value of this parameter is -1.
53 * @nr_calls: Records the number of notifications sent. Don't care
54 * value of this field is NULL.
55 * @returns: notifier_call_chain returns the value returned by the
56 * last notifier function called.
57 */
58static int __kprobes notifier_call_chain(struct notifier_block **nl,
59 unsigned long val, void *v,
60 int nr_to_call, int *nr_calls)
61{
62 int ret = NOTIFY_DONE;
63 struct notifier_block *nb, *next_nb;
64
65 nb = rcu_dereference(*nl);
66
67 while (nb && nr_to_call) {
68 next_nb = rcu_dereference(nb->next);
69 ret = nb->notifier_call(nb, val, v);
70
71 if (nr_calls)
72 (*nr_calls)++;
73
74 if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK)
75 break;
76 nb = next_nb;
77 nr_to_call--;
78 }
79 return ret;
80}
81
82/*
83 * Atomic notifier chain routines. Registration and unregistration
84 * use a spinlock, and call_chain is synchronized by RCU (no locks).
85 */
86
87/**
88 * atomic_notifier_chain_register - Add notifier to an atomic notifier chain
89 * @nh: Pointer to head of the atomic notifier chain
90 * @n: New entry in notifier chain
91 *
92 * Adds a notifier to an atomic notifier chain.
93 *
94 * Currently always returns zero.
95 */
96int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
97 struct notifier_block *n)
98{
99 unsigned long flags;
100 int ret;
101
102 spin_lock_irqsave(&nh->lock, flags);
103 ret = notifier_chain_register(&nh->head, n);
104 spin_unlock_irqrestore(&nh->lock, flags);
105 return ret;
106}
107EXPORT_SYMBOL_GPL(atomic_notifier_chain_register);
108
109/**
110 * atomic_notifier_chain_unregister - Remove notifier from an atomic notifier chain
111 * @nh: Pointer to head of the atomic notifier chain
112 * @n: Entry to remove from notifier chain
113 *
114 * Removes a notifier from an atomic notifier chain.
115 *
116 * Returns zero on success or %-ENOENT on failure.
117 */
118int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh,
119 struct notifier_block *n)
120{
121 unsigned long flags;
122 int ret;
123
124 spin_lock_irqsave(&nh->lock, flags);
125 ret = notifier_chain_unregister(&nh->head, n);
126 spin_unlock_irqrestore(&nh->lock, flags);
127 synchronize_rcu();
128 return ret;
129}
130EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);
131
132/**
133 * __atomic_notifier_call_chain - Call functions in an atomic notifier chain
134 * @nh: Pointer to head of the atomic notifier chain
135 * @val: Value passed unmodified to notifier function
136 * @v: Pointer passed unmodified to notifier function
137 * @nr_to_call: See the comment for notifier_call_chain.
138 * @nr_calls: See the comment for notifier_call_chain.
139 *
140 * Calls each function in a notifier chain in turn. The functions
141 * run in an atomic context, so they must not block.
142 * This routine uses RCU to synchronize with changes to the chain.
143 *
144 * If the return value of the notifier can be and'ed
145 * with %NOTIFY_STOP_MASK then atomic_notifier_call_chain()
146 * will return immediately, with the return value of
147 * the notifier function which halted execution.
148 * Otherwise the return value is the return value
149 * of the last notifier function called.
150 */
151int __kprobes __atomic_notifier_call_chain(struct atomic_notifier_head *nh,
152 unsigned long val, void *v,
153 int nr_to_call, int *nr_calls)
154{
155 int ret;
156
157 rcu_read_lock();
158 ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
159 rcu_read_unlock();
160 return ret;
161}
162EXPORT_SYMBOL_GPL(__atomic_notifier_call_chain);
163
164int __kprobes atomic_notifier_call_chain(struct atomic_notifier_head *nh,
165 unsigned long val, void *v)
166{
167 return __atomic_notifier_call_chain(nh, val, v, -1, NULL);
168}
169EXPORT_SYMBOL_GPL(atomic_notifier_call_chain);
170
171/*
172 * Blocking notifier chain routines. All access to the chain is
173 * synchronized by an rwsem.
174 */
175
176/**
177 * blocking_notifier_chain_register - Add notifier to a blocking notifier chain
178 * @nh: Pointer to head of the blocking notifier chain
179 * @n: New entry in notifier chain
180 *
181 * Adds a notifier to a blocking notifier chain.
182 * Must be called in process context.
183 *
184 * Currently always returns zero.
185 */
186int blocking_notifier_chain_register(struct blocking_notifier_head *nh,
187 struct notifier_block *n)
188{
189 int ret;
190
191 /*
192 * This code gets used during boot-up, when task switching is
193 * not yet working and interrupts must remain disabled. At
194 * such times we must not call down_write().
195 */
196 if (unlikely(system_state == SYSTEM_BOOTING))
197 return notifier_chain_register(&nh->head, n);
198
199 down_write(&nh->rwsem);
200 ret = notifier_chain_register(&nh->head, n);
201 up_write(&nh->rwsem);
202 return ret;
203}
204EXPORT_SYMBOL_GPL(blocking_notifier_chain_register);
205
206/**
207 * blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain
208 * @nh: Pointer to head of the blocking notifier chain
209 * @n: Entry to remove from notifier chain
210 *
211 * Removes a notifier from a blocking notifier chain.
212 * Must be called from process context.
213 *
214 * Returns zero on success or %-ENOENT on failure.
215 */
216int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh,
217 struct notifier_block *n)
218{
219 int ret;
220
221 /*
222 * This code gets used during boot-up, when task switching is
223 * not yet working and interrupts must remain disabled. At
224 * such times we must not call down_write().
225 */
226 if (unlikely(system_state == SYSTEM_BOOTING))
227 return notifier_chain_unregister(&nh->head, n);
228
229 down_write(&nh->rwsem);
230 ret = notifier_chain_unregister(&nh->head, n);
231 up_write(&nh->rwsem);
232 return ret;
233}
234EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister);
235
236/**
237 * __blocking_notifier_call_chain - Call functions in a blocking notifier chain
238 * @nh: Pointer to head of the blocking notifier chain
239 * @val: Value passed unmodified to notifier function
240 * @v: Pointer passed unmodified to notifier function
241 * @nr_to_call: See comment for notifier_call_chain.
242 * @nr_calls: See comment for notifier_call_chain.
243 *
244 * Calls each function in a notifier chain in turn. The functions
245 * run in a process context, so they are allowed to block.
246 *
247 * If the return value of the notifier can be and'ed
248 * with %NOTIFY_STOP_MASK then blocking_notifier_call_chain()
249 * will return immediately, with the return value of
250 * the notifier function which halted execution.
251 * Otherwise the return value is the return value
252 * of the last notifier function called.
253 */
254int __blocking_notifier_call_chain(struct blocking_notifier_head *nh,
255 unsigned long val, void *v,
256 int nr_to_call, int *nr_calls)
257{
258 int ret = NOTIFY_DONE;
259
260 /*
261 * We check the head outside the lock, but if this access is
262 * racy then it does not matter what the result of the test
263 * is, we re-check the list after having taken the lock anyway:
264 */
265 if (rcu_dereference(nh->head)) {
266 down_read(&nh->rwsem);
267 ret = notifier_call_chain(&nh->head, val, v, nr_to_call,
268 nr_calls);
269 up_read(&nh->rwsem);
270 }
271 return ret;
272}
273EXPORT_SYMBOL_GPL(__blocking_notifier_call_chain);
274
275int blocking_notifier_call_chain(struct blocking_notifier_head *nh,
276 unsigned long val, void *v)
277{
278 return __blocking_notifier_call_chain(nh, val, v, -1, NULL);
279}
280EXPORT_SYMBOL_GPL(blocking_notifier_call_chain);
281
282/*
283 * Raw notifier chain routines. There is no protection;
284 * the caller must provide it. Use at your own risk!
285 */
286
287/**
288 * raw_notifier_chain_register - Add notifier to a raw notifier chain
289 * @nh: Pointer to head of the raw notifier chain
290 * @n: New entry in notifier chain
291 *
292 * Adds a notifier to a raw notifier chain.
293 * All locking must be provided by the caller.
294 *
295 * Currently always returns zero.
296 */
297int raw_notifier_chain_register(struct raw_notifier_head *nh,
298 struct notifier_block *n)
299{
300 return notifier_chain_register(&nh->head, n);
301}
302EXPORT_SYMBOL_GPL(raw_notifier_chain_register);
303
304/**
305 * raw_notifier_chain_unregister - Remove notifier from a raw notifier chain
306 * @nh: Pointer to head of the raw notifier chain
307 * @n: Entry to remove from notifier chain
308 *
309 * Removes a notifier from a raw notifier chain.
310 * All locking must be provided by the caller.
311 *
312 * Returns zero on success or %-ENOENT on failure.
313 */
314int raw_notifier_chain_unregister(struct raw_notifier_head *nh,
315 struct notifier_block *n)
316{
317 return notifier_chain_unregister(&nh->head, n);
318}
319EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister);
320
321/**
322 * __raw_notifier_call_chain - Call functions in a raw notifier chain
323 * @nh: Pointer to head of the raw notifier chain
324 * @val: Value passed unmodified to notifier function
325 * @v: Pointer passed unmodified to notifier function
326 * @nr_to_call: See comment for notifier_call_chain.
327 * @nr_calls: See comment for notifier_call_chain
328 *
329 * Calls each function in a notifier chain in turn. The functions
330 * run in an undefined context.
331 * All locking must be provided by the caller.
332 *
333 * If the return value of the notifier can be and'ed
334 * with %NOTIFY_STOP_MASK then raw_notifier_call_chain()
335 * will return immediately, with the return value of
336 * the notifier function which halted execution.
337 * Otherwise the return value is the return value
338 * of the last notifier function called.
339 */
340int __raw_notifier_call_chain(struct raw_notifier_head *nh,
341 unsigned long val, void *v,
342 int nr_to_call, int *nr_calls)
343{
344 return notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
345}
346EXPORT_SYMBOL_GPL(__raw_notifier_call_chain);
347
348int raw_notifier_call_chain(struct raw_notifier_head *nh,
349 unsigned long val, void *v)
350{
351 return __raw_notifier_call_chain(nh, val, v, -1, NULL);
352}
353EXPORT_SYMBOL_GPL(raw_notifier_call_chain);
354
355/*
356 * SRCU notifier chain routines. Registration and unregistration
357 * use a mutex, and call_chain is synchronized by SRCU (no locks).
358 */
359
360/**
361 * srcu_notifier_chain_register - Add notifier to an SRCU notifier chain
362 * @nh: Pointer to head of the SRCU notifier chain
363 * @n: New entry in notifier chain
364 *
365 * Adds a notifier to an SRCU notifier chain.
366 * Must be called in process context.
367 *
368 * Currently always returns zero.
369 */
370int srcu_notifier_chain_register(struct srcu_notifier_head *nh,
371 struct notifier_block *n)
372{
373 int ret;
374
375 /*
376 * This code gets used during boot-up, when task switching is
377 * not yet working and interrupts must remain disabled. At
378 * such times we must not call mutex_lock().
379 */
380 if (unlikely(system_state == SYSTEM_BOOTING))
381 return notifier_chain_register(&nh->head, n);
382
383 mutex_lock(&nh->mutex);
384 ret = notifier_chain_register(&nh->head, n);
385 mutex_unlock(&nh->mutex);
386 return ret;
387}
388EXPORT_SYMBOL_GPL(srcu_notifier_chain_register);
389
390/**
391 * srcu_notifier_chain_unregister - Remove notifier from an SRCU notifier chain
392 * @nh: Pointer to head of the SRCU notifier chain
393 * @n: Entry to remove from notifier chain
394 *
395 * Removes a notifier from an SRCU notifier chain.
396 * Must be called from process context.
397 *
398 * Returns zero on success or %-ENOENT on failure.
399 */
400int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh,
401 struct notifier_block *n)
402{
403 int ret;
404
405 /*
406 * This code gets used during boot-up, when task switching is
407 * not yet working and interrupts must remain disabled. At
408 * such times we must not call mutex_lock().
409 */
410 if (unlikely(system_state == SYSTEM_BOOTING))
411 return notifier_chain_unregister(&nh->head, n);
412
413 mutex_lock(&nh->mutex);
414 ret = notifier_chain_unregister(&nh->head, n);
415 mutex_unlock(&nh->mutex);
416 synchronize_srcu(&nh->srcu);
417 return ret;
418}
419EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister);
420
421/**
422 * __srcu_notifier_call_chain - Call functions in an SRCU notifier chain
423 * @nh: Pointer to head of the SRCU notifier chain
424 * @val: Value passed unmodified to notifier function
425 * @v: Pointer passed unmodified to notifier function
426 * @nr_to_call: See comment for notifier_call_chain.
427 * @nr_calls: See comment for notifier_call_chain
428 *
429 * Calls each function in a notifier chain in turn. The functions
430 * run in a process context, so they are allowed to block.
431 *
432 * If the return value of the notifier can be and'ed
433 * with %NOTIFY_STOP_MASK then srcu_notifier_call_chain()
434 * will return immediately, with the return value of
435 * the notifier function which halted execution.
436 * Otherwise the return value is the return value
437 * of the last notifier function called.
438 */
439int __srcu_notifier_call_chain(struct srcu_notifier_head *nh,
440 unsigned long val, void *v,
441 int nr_to_call, int *nr_calls)
442{
443 int ret;
444 int idx;
445
446 idx = srcu_read_lock(&nh->srcu);
447 ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
448 srcu_read_unlock(&nh->srcu, idx);
449 return ret;
450}
451EXPORT_SYMBOL_GPL(__srcu_notifier_call_chain);
452
453int srcu_notifier_call_chain(struct srcu_notifier_head *nh,
454 unsigned long val, void *v)
455{
456 return __srcu_notifier_call_chain(nh, val, v, -1, NULL);
457}
458EXPORT_SYMBOL_GPL(srcu_notifier_call_chain);
459
460/**
461 * srcu_init_notifier_head - Initialize an SRCU notifier head
462 * @nh: Pointer to head of the srcu notifier chain
463 *
464 * Unlike other sorts of notifier heads, SRCU notifier heads require
465 * dynamic initialization. Be sure to call this routine before
466 * calling any of the other SRCU notifier routines for this head.
467 *
468 * If an SRCU notifier head is deallocated, it must first be cleaned
469 * up by calling srcu_cleanup_notifier_head(). Otherwise the head's
470 * per-cpu data (used by the SRCU mechanism) will leak.
471 */
472void srcu_init_notifier_head(struct srcu_notifier_head *nh)
473{
474 mutex_init(&nh->mutex);
475 if (init_srcu_struct(&nh->srcu) < 0)
476 BUG();
477 nh->head = NULL;
478}
479EXPORT_SYMBOL_GPL(srcu_init_notifier_head);
480
481/**
482 * register_reboot_notifier - Register function to be called at reboot time
483 * @nb: Info about notifier function to be called
484 *
485 * Registers a function with the list of functions
486 * to be called at reboot time.
487 *
488 * Currently always returns zero, as blocking_notifier_chain_register()
489 * always returns zero.
490 */
491int register_reboot_notifier(struct notifier_block *nb)
492{
493 return blocking_notifier_chain_register(&reboot_notifier_list, nb);
494}
495EXPORT_SYMBOL(register_reboot_notifier);
496
497/**
498 * unregister_reboot_notifier - Unregister previously registered reboot notifier
499 * @nb: Hook to be unregistered
500 *
501 * Unregisters a previously registered reboot
502 * notifier function.
503 *
504 * Returns zero on success, or %-ENOENT on failure.
505 */
506int unregister_reboot_notifier(struct notifier_block *nb)
507{
508 return blocking_notifier_chain_unregister(&reboot_notifier_list, nb);
509}
510EXPORT_SYMBOL(unregister_reboot_notifier);
511
512static ATOMIC_NOTIFIER_HEAD(die_chain);
513
514int notify_die(enum die_val val, const char *str,
515 struct pt_regs *regs, long err, int trap, int sig)
516{
517 struct die_args args = {
518 .regs = regs,
519 .str = str,
520 .err = err,
521 .trapnr = trap,
522 .signr = sig,
523
524 };
525 return atomic_notifier_call_chain(&die_chain, val, &args);
526}
527
528int register_die_notifier(struct notifier_block *nb)
529{
530 vmalloc_sync_all();
531 return atomic_notifier_chain_register(&die_chain, nb);
532}
533EXPORT_SYMBOL_GPL(register_die_notifier);
534
535int unregister_die_notifier(struct notifier_block *nb)
536{
537 return atomic_notifier_chain_unregister(&die_chain, nb);
538}
539EXPORT_SYMBOL_GPL(unregister_die_notifier);
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c
new file mode 100644
index 000000000000..aead4d69f62b
--- /dev/null
+++ b/kernel/ns_cgroup.c
@@ -0,0 +1,100 @@
1/*
2 * ns_cgroup.c - namespace cgroup subsystem
3 *
4 * Copyright 2006, 2007 IBM Corp
5 */
6
7#include <linux/module.h>
8#include <linux/cgroup.h>
9#include <linux/fs.h>
10
11struct ns_cgroup {
12 struct cgroup_subsys_state css;
13 spinlock_t lock;
14};
15
16struct cgroup_subsys ns_subsys;
17
18static inline struct ns_cgroup *cgroup_to_ns(
19 struct cgroup *cgroup)
20{
21 return container_of(cgroup_subsys_state(cgroup, ns_subsys_id),
22 struct ns_cgroup, css);
23}
24
25int ns_cgroup_clone(struct task_struct *task)
26{
27 return cgroup_clone(task, &ns_subsys);
28}
29
30/*
31 * Rules:
32 * 1. you can only enter a cgroup which is a child of your current
33 * cgroup
34 * 2. you can only place another process into a cgroup if
35 * a. you have CAP_SYS_ADMIN
36 * b. your cgroup is an ancestor of task's destination cgroup
37 * (hence either you are in the same cgroup as task, or in an
38 * ancestor cgroup thereof)
39 */
40static int ns_can_attach(struct cgroup_subsys *ss,
41 struct cgroup *new_cgroup, struct task_struct *task)
42{
43 struct cgroup *orig;
44
45 if (current != task) {
46 if (!capable(CAP_SYS_ADMIN))
47 return -EPERM;
48
49 if (!cgroup_is_descendant(new_cgroup))
50 return -EPERM;
51 }
52
53 if (atomic_read(&new_cgroup->count) != 0)
54 return -EPERM;
55
56 orig = task_cgroup(task, ns_subsys_id);
57 if (orig && orig != new_cgroup->parent)
58 return -EPERM;
59
60 return 0;
61}
62
63/*
64 * Rules: you can only create a cgroup if
65 * 1. you are capable(CAP_SYS_ADMIN)
66 * 2. the target cgroup is a descendant of your own cgroup
67 */
68static struct cgroup_subsys_state *ns_create(struct cgroup_subsys *ss,
69 struct cgroup *cgroup)
70{
71 struct ns_cgroup *ns_cgroup;
72
73 if (!capable(CAP_SYS_ADMIN))
74 return ERR_PTR(-EPERM);
75 if (!cgroup_is_descendant(cgroup))
76 return ERR_PTR(-EPERM);
77
78 ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL);
79 if (!ns_cgroup)
80 return ERR_PTR(-ENOMEM);
81 spin_lock_init(&ns_cgroup->lock);
82 return &ns_cgroup->css;
83}
84
85static void ns_destroy(struct cgroup_subsys *ss,
86 struct cgroup *cgroup)
87{
88 struct ns_cgroup *ns_cgroup;
89
90 ns_cgroup = cgroup_to_ns(cgroup);
91 kfree(ns_cgroup);
92}
93
94struct cgroup_subsys ns_subsys = {
95 .name = "ns",
96 .can_attach = ns_can_attach,
97 .create = ns_create,
98 .destroy = ns_destroy,
99 .subsys_id = ns_subsys_id,
100};
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 049e7c0ac566..79f871bc0ef4 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -26,19 +26,6 @@ static struct kmem_cache *nsproxy_cachep;
26 26
27struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); 27struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
28 28
29static inline void get_nsproxy(struct nsproxy *ns)
30{
31 atomic_inc(&ns->count);
32}
33
34void get_task_namespaces(struct task_struct *tsk)
35{
36 struct nsproxy *ns = tsk->nsproxy;
37 if (ns) {
38 get_nsproxy(ns);
39 }
40}
41
42/* 29/*
43 * creates a copy of "orig" with refcount 1. 30 * creates a copy of "orig" with refcount 1.
44 */ 31 */
@@ -87,7 +74,7 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
87 goto out_ipc; 74 goto out_ipc;
88 } 75 }
89 76
90 new_nsp->pid_ns = copy_pid_ns(flags, tsk->nsproxy->pid_ns); 77 new_nsp->pid_ns = copy_pid_ns(flags, task_active_pid_ns(tsk));
91 if (IS_ERR(new_nsp->pid_ns)) { 78 if (IS_ERR(new_nsp->pid_ns)) {
92 err = PTR_ERR(new_nsp->pid_ns); 79 err = PTR_ERR(new_nsp->pid_ns);
93 goto out_pid; 80 goto out_pid;
@@ -142,7 +129,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
142 129
143 get_nsproxy(old_ns); 130 get_nsproxy(old_ns);
144 131
145 if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER | CLONE_NEWNET))) 132 if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
133 CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNET)))
146 return 0; 134 return 0;
147 135
148 if (!capable(CAP_SYS_ADMIN)) { 136 if (!capable(CAP_SYS_ADMIN)) {
@@ -156,7 +144,14 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
156 goto out; 144 goto out;
157 } 145 }
158 146
147 err = ns_cgroup_clone(tsk);
148 if (err) {
149 put_nsproxy(new_ns);
150 goto out;
151 }
152
159 tsk->nsproxy = new_ns; 153 tsk->nsproxy = new_ns;
154
160out: 155out:
161 put_nsproxy(old_ns); 156 put_nsproxy(old_ns);
162 return err; 157 return err;
@@ -196,11 +191,46 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
196 191
197 *new_nsp = create_new_namespaces(unshare_flags, current, 192 *new_nsp = create_new_namespaces(unshare_flags, current,
198 new_fs ? new_fs : current->fs); 193 new_fs ? new_fs : current->fs);
199 if (IS_ERR(*new_nsp)) 194 if (IS_ERR(*new_nsp)) {
200 err = PTR_ERR(*new_nsp); 195 err = PTR_ERR(*new_nsp);
196 goto out;
197 }
198
199 err = ns_cgroup_clone(current);
200 if (err)
201 put_nsproxy(*new_nsp);
202
203out:
201 return err; 204 return err;
202} 205}
203 206
207void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
208{
209 struct nsproxy *ns;
210
211 might_sleep();
212
213 ns = p->nsproxy;
214
215 rcu_assign_pointer(p->nsproxy, new);
216
217 if (ns && atomic_dec_and_test(&ns->count)) {
218 /*
219 * wait for others to get what they want from this nsproxy.
220 *
221 * cannot release this nsproxy via the call_rcu() since
222 * put_mnt_ns() will want to sleep
223 */
224 synchronize_rcu();
225 free_nsproxy(ns);
226 }
227}
228
229void exit_task_namespaces(struct task_struct *p)
230{
231 switch_task_namespaces(p, NULL);
232}
233
204static int __init nsproxy_cache_init(void) 234static int __init nsproxy_cache_init(void)
205{ 235{
206 nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC); 236 nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC);
diff --git a/kernel/pid.c b/kernel/pid.c
index c6e3f9ffff87..d1db36b94674 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -18,6 +18,12 @@
18 * allocation scenario when all but one out of 1 million PIDs possible are 18 * allocation scenario when all but one out of 1 million PIDs possible are
19 * allocated already: the scanning of 32 list entries and at most PAGE_SIZE 19 * allocated already: the scanning of 32 list entries and at most PAGE_SIZE
20 * bytes. The typical fastpath is a single successful setbit. Freeing is O(1). 20 * bytes. The typical fastpath is a single successful setbit. Freeing is O(1).
21 *
22 * Pid namespaces:
23 * (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc.
24 * (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM
25 * Many thanks to Oleg Nesterov for comments and help
26 *
21 */ 27 */
22 28
23#include <linux/mm.h> 29#include <linux/mm.h>
@@ -28,12 +34,14 @@
28#include <linux/hash.h> 34#include <linux/hash.h>
29#include <linux/pid_namespace.h> 35#include <linux/pid_namespace.h>
30#include <linux/init_task.h> 36#include <linux/init_task.h>
37#include <linux/syscalls.h>
31 38
32#define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift) 39#define pid_hashfn(nr, ns) \
40 hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
33static struct hlist_head *pid_hash; 41static struct hlist_head *pid_hash;
34static int pidhash_shift; 42static int pidhash_shift;
35static struct kmem_cache *pid_cachep;
36struct pid init_struct_pid = INIT_STRUCT_PID; 43struct pid init_struct_pid = INIT_STRUCT_PID;
44static struct kmem_cache *pid_ns_cachep;
37 45
38int pid_max = PID_MAX_DEFAULT; 46int pid_max = PID_MAX_DEFAULT;
39 47
@@ -68,8 +76,25 @@ struct pid_namespace init_pid_ns = {
68 [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } 76 [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
69 }, 77 },
70 .last_pid = 0, 78 .last_pid = 0,
71 .child_reaper = &init_task 79 .level = 0,
80 .child_reaper = &init_task,
72}; 81};
82EXPORT_SYMBOL_GPL(init_pid_ns);
83
84int is_container_init(struct task_struct *tsk)
85{
86 int ret = 0;
87 struct pid *pid;
88
89 rcu_read_lock();
90 pid = task_pid(tsk);
91 if (pid != NULL && pid->numbers[pid->level].nr == 1)
92 ret = 1;
93 rcu_read_unlock();
94
95 return ret;
96}
97EXPORT_SYMBOL(is_container_init);
73 98
74/* 99/*
75 * Note: disable interrupts while the pidmap_lock is held as an 100 * Note: disable interrupts while the pidmap_lock is held as an
@@ -176,11 +201,17 @@ static int next_pidmap(struct pid_namespace *pid_ns, int last)
176 201
177fastcall void put_pid(struct pid *pid) 202fastcall void put_pid(struct pid *pid)
178{ 203{
204 struct pid_namespace *ns;
205
179 if (!pid) 206 if (!pid)
180 return; 207 return;
208
209 ns = pid->numbers[pid->level].ns;
181 if ((atomic_read(&pid->count) == 1) || 210 if ((atomic_read(&pid->count) == 1) ||
182 atomic_dec_and_test(&pid->count)) 211 atomic_dec_and_test(&pid->count)) {
183 kmem_cache_free(pid_cachep, pid); 212 kmem_cache_free(ns->pid_cachep, pid);
213 put_pid_ns(ns);
214 }
184} 215}
185EXPORT_SYMBOL_GPL(put_pid); 216EXPORT_SYMBOL_GPL(put_pid);
186 217
@@ -193,60 +224,94 @@ static void delayed_put_pid(struct rcu_head *rhp)
193fastcall void free_pid(struct pid *pid) 224fastcall void free_pid(struct pid *pid)
194{ 225{
195 /* We can be called with write_lock_irq(&tasklist_lock) held */ 226 /* We can be called with write_lock_irq(&tasklist_lock) held */
227 int i;
196 unsigned long flags; 228 unsigned long flags;
197 229
198 spin_lock_irqsave(&pidmap_lock, flags); 230 spin_lock_irqsave(&pidmap_lock, flags);
199 hlist_del_rcu(&pid->pid_chain); 231 for (i = 0; i <= pid->level; i++)
232 hlist_del_rcu(&pid->numbers[i].pid_chain);
200 spin_unlock_irqrestore(&pidmap_lock, flags); 233 spin_unlock_irqrestore(&pidmap_lock, flags);
201 234
202 free_pidmap(&init_pid_ns, pid->nr); 235 for (i = 0; i <= pid->level; i++)
236 free_pidmap(pid->numbers[i].ns, pid->numbers[i].nr);
237
203 call_rcu(&pid->rcu, delayed_put_pid); 238 call_rcu(&pid->rcu, delayed_put_pid);
204} 239}
205 240
206struct pid *alloc_pid(void) 241struct pid *alloc_pid(struct pid_namespace *ns)
207{ 242{
208 struct pid *pid; 243 struct pid *pid;
209 enum pid_type type; 244 enum pid_type type;
210 int nr = -1; 245 int i, nr;
246 struct pid_namespace *tmp;
247 struct upid *upid;
211 248
212 pid = kmem_cache_alloc(pid_cachep, GFP_KERNEL); 249 pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
213 if (!pid) 250 if (!pid)
214 goto out; 251 goto out;
215 252
216 nr = alloc_pidmap(current->nsproxy->pid_ns); 253 tmp = ns;
217 if (nr < 0) 254 for (i = ns->level; i >= 0; i--) {
218 goto out_free; 255 nr = alloc_pidmap(tmp);
256 if (nr < 0)
257 goto out_free;
258
259 pid->numbers[i].nr = nr;
260 pid->numbers[i].ns = tmp;
261 tmp = tmp->parent;
262 }
219 263
264 get_pid_ns(ns);
265 pid->level = ns->level;
220 atomic_set(&pid->count, 1); 266 atomic_set(&pid->count, 1);
221 pid->nr = nr;
222 for (type = 0; type < PIDTYPE_MAX; ++type) 267 for (type = 0; type < PIDTYPE_MAX; ++type)
223 INIT_HLIST_HEAD(&pid->tasks[type]); 268 INIT_HLIST_HEAD(&pid->tasks[type]);
224 269
225 spin_lock_irq(&pidmap_lock); 270 spin_lock_irq(&pidmap_lock);
226 hlist_add_head_rcu(&pid->pid_chain, &pid_hash[pid_hashfn(pid->nr)]); 271 for (i = ns->level; i >= 0; i--) {
272 upid = &pid->numbers[i];
273 hlist_add_head_rcu(&upid->pid_chain,
274 &pid_hash[pid_hashfn(upid->nr, upid->ns)]);
275 }
227 spin_unlock_irq(&pidmap_lock); 276 spin_unlock_irq(&pidmap_lock);
228 277
229out: 278out:
230 return pid; 279 return pid;
231 280
232out_free: 281out_free:
233 kmem_cache_free(pid_cachep, pid); 282 for (i++; i <= ns->level; i++)
283 free_pidmap(pid->numbers[i].ns, pid->numbers[i].nr);
284
285 kmem_cache_free(ns->pid_cachep, pid);
234 pid = NULL; 286 pid = NULL;
235 goto out; 287 goto out;
236} 288}
237 289
238struct pid * fastcall find_pid(int nr) 290struct pid * fastcall find_pid_ns(int nr, struct pid_namespace *ns)
239{ 291{
240 struct hlist_node *elem; 292 struct hlist_node *elem;
241 struct pid *pid; 293 struct upid *pnr;
294
295 hlist_for_each_entry_rcu(pnr, elem,
296 &pid_hash[pid_hashfn(nr, ns)], pid_chain)
297 if (pnr->nr == nr && pnr->ns == ns)
298 return container_of(pnr, struct pid,
299 numbers[ns->level]);
242 300
243 hlist_for_each_entry_rcu(pid, elem,
244 &pid_hash[pid_hashfn(nr)], pid_chain) {
245 if (pid->nr == nr)
246 return pid;
247 }
248 return NULL; 301 return NULL;
249} 302}
303EXPORT_SYMBOL_GPL(find_pid_ns);
304
305struct pid *find_vpid(int nr)
306{
307 return find_pid_ns(nr, current->nsproxy->pid_ns);
308}
309EXPORT_SYMBOL_GPL(find_vpid);
310
311struct pid *find_pid(int nr)
312{
313 return find_pid_ns(nr, &init_pid_ns);
314}
250EXPORT_SYMBOL_GPL(find_pid); 315EXPORT_SYMBOL_GPL(find_pid);
251 316
252/* 317/*
@@ -307,12 +372,32 @@ struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type)
307/* 372/*
308 * Must be called under rcu_read_lock() or with tasklist_lock read-held. 373 * Must be called under rcu_read_lock() or with tasklist_lock read-held.
309 */ 374 */
310struct task_struct *find_task_by_pid_type(int type, int nr) 375struct task_struct *find_task_by_pid_type_ns(int type, int nr,
376 struct pid_namespace *ns)
311{ 377{
312 return pid_task(find_pid(nr), type); 378 return pid_task(find_pid_ns(nr, ns), type);
313} 379}
314 380
315EXPORT_SYMBOL(find_task_by_pid_type); 381EXPORT_SYMBOL(find_task_by_pid_type_ns);
382
383struct task_struct *find_task_by_pid(pid_t nr)
384{
385 return find_task_by_pid_type_ns(PIDTYPE_PID, nr, &init_pid_ns);
386}
387EXPORT_SYMBOL(find_task_by_pid);
388
389struct task_struct *find_task_by_vpid(pid_t vnr)
390{
391 return find_task_by_pid_type_ns(PIDTYPE_PID, vnr,
392 current->nsproxy->pid_ns);
393}
394EXPORT_SYMBOL(find_task_by_vpid);
395
396struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
397{
398 return find_task_by_pid_type_ns(PIDTYPE_PID, nr, ns);
399}
400EXPORT_SYMBOL(find_task_by_pid_ns);
316 401
317struct pid *get_task_pid(struct task_struct *task, enum pid_type type) 402struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
318{ 403{
@@ -339,45 +424,239 @@ struct pid *find_get_pid(pid_t nr)
339 struct pid *pid; 424 struct pid *pid;
340 425
341 rcu_read_lock(); 426 rcu_read_lock();
342 pid = get_pid(find_pid(nr)); 427 pid = get_pid(find_vpid(nr));
343 rcu_read_unlock(); 428 rcu_read_unlock();
344 429
345 return pid; 430 return pid;
346} 431}
347 432
433pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
434{
435 struct upid *upid;
436 pid_t nr = 0;
437
438 if (pid && ns->level <= pid->level) {
439 upid = &pid->numbers[ns->level];
440 if (upid->ns == ns)
441 nr = upid->nr;
442 }
443 return nr;
444}
445
446pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
447{
448 return pid_nr_ns(task_pid(tsk), ns);
449}
450EXPORT_SYMBOL(task_pid_nr_ns);
451
452pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
453{
454 return pid_nr_ns(task_tgid(tsk), ns);
455}
456EXPORT_SYMBOL(task_tgid_nr_ns);
457
458pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
459{
460 return pid_nr_ns(task_pgrp(tsk), ns);
461}
462EXPORT_SYMBOL(task_pgrp_nr_ns);
463
464pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
465{
466 return pid_nr_ns(task_session(tsk), ns);
467}
468EXPORT_SYMBOL(task_session_nr_ns);
469
348/* 470/*
349 * Used by proc to find the first pid that is greater then or equal to nr. 471 * Used by proc to find the first pid that is greater then or equal to nr.
350 * 472 *
351 * If there is a pid at nr this function is exactly the same as find_pid. 473 * If there is a pid at nr this function is exactly the same as find_pid.
352 */ 474 */
353struct pid *find_ge_pid(int nr) 475struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
354{ 476{
355 struct pid *pid; 477 struct pid *pid;
356 478
357 do { 479 do {
358 pid = find_pid(nr); 480 pid = find_pid_ns(nr, ns);
359 if (pid) 481 if (pid)
360 break; 482 break;
361 nr = next_pidmap(current->nsproxy->pid_ns, nr); 483 nr = next_pidmap(ns, nr);
362 } while (nr > 0); 484 } while (nr > 0);
363 485
364 return pid; 486 return pid;
365} 487}
366EXPORT_SYMBOL_GPL(find_get_pid); 488EXPORT_SYMBOL_GPL(find_get_pid);
367 489
490struct pid_cache {
491 int nr_ids;
492 char name[16];
493 struct kmem_cache *cachep;
494 struct list_head list;
495};
496
497static LIST_HEAD(pid_caches_lh);
498static DEFINE_MUTEX(pid_caches_mutex);
499
500/*
501 * creates the kmem cache to allocate pids from.
502 * @nr_ids: the number of numerical ids this pid will have to carry
503 */
504
505static struct kmem_cache *create_pid_cachep(int nr_ids)
506{
507 struct pid_cache *pcache;
508 struct kmem_cache *cachep;
509
510 mutex_lock(&pid_caches_mutex);
511 list_for_each_entry (pcache, &pid_caches_lh, list)
512 if (pcache->nr_ids == nr_ids)
513 goto out;
514
515 pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL);
516 if (pcache == NULL)
517 goto err_alloc;
518
519 snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids);
520 cachep = kmem_cache_create(pcache->name,
521 sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid),
522 0, SLAB_HWCACHE_ALIGN, NULL);
523 if (cachep == NULL)
524 goto err_cachep;
525
526 pcache->nr_ids = nr_ids;
527 pcache->cachep = cachep;
528 list_add(&pcache->list, &pid_caches_lh);
529out:
530 mutex_unlock(&pid_caches_mutex);
531 return pcache->cachep;
532
533err_cachep:
534 kfree(pcache);
535err_alloc:
536 mutex_unlock(&pid_caches_mutex);
537 return NULL;
538}
539
540static struct pid_namespace *create_pid_namespace(int level)
541{
542 struct pid_namespace *ns;
543 int i;
544
545 ns = kmem_cache_alloc(pid_ns_cachep, GFP_KERNEL);
546 if (ns == NULL)
547 goto out;
548
549 ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
550 if (!ns->pidmap[0].page)
551 goto out_free;
552
553 ns->pid_cachep = create_pid_cachep(level + 1);
554 if (ns->pid_cachep == NULL)
555 goto out_free_map;
556
557 kref_init(&ns->kref);
558 ns->last_pid = 0;
559 ns->child_reaper = NULL;
560 ns->level = level;
561
562 set_bit(0, ns->pidmap[0].page);
563 atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
564
565 for (i = 1; i < PIDMAP_ENTRIES; i++) {
566 ns->pidmap[i].page = 0;
567 atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
568 }
569
570 return ns;
571
572out_free_map:
573 kfree(ns->pidmap[0].page);
574out_free:
575 kmem_cache_free(pid_ns_cachep, ns);
576out:
577 return ERR_PTR(-ENOMEM);
578}
579
580static void destroy_pid_namespace(struct pid_namespace *ns)
581{
582 int i;
583
584 for (i = 0; i < PIDMAP_ENTRIES; i++)
585 kfree(ns->pidmap[i].page);
586 kmem_cache_free(pid_ns_cachep, ns);
587}
588
368struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) 589struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
369{ 590{
591 struct pid_namespace *new_ns;
592
370 BUG_ON(!old_ns); 593 BUG_ON(!old_ns);
371 get_pid_ns(old_ns); 594 new_ns = get_pid_ns(old_ns);
372 return old_ns; 595 if (!(flags & CLONE_NEWPID))
596 goto out;
597
598 new_ns = ERR_PTR(-EINVAL);
599 if (flags & CLONE_THREAD)
600 goto out_put;
601
602 new_ns = create_pid_namespace(old_ns->level + 1);
603 if (!IS_ERR(new_ns))
604 new_ns->parent = get_pid_ns(old_ns);
605
606out_put:
607 put_pid_ns(old_ns);
608out:
609 return new_ns;
373} 610}
374 611
375void free_pid_ns(struct kref *kref) 612void free_pid_ns(struct kref *kref)
376{ 613{
377 struct pid_namespace *ns; 614 struct pid_namespace *ns, *parent;
378 615
379 ns = container_of(kref, struct pid_namespace, kref); 616 ns = container_of(kref, struct pid_namespace, kref);
380 kfree(ns); 617
618 parent = ns->parent;
619 destroy_pid_namespace(ns);
620
621 if (parent != NULL)
622 put_pid_ns(parent);
623}
624
625void zap_pid_ns_processes(struct pid_namespace *pid_ns)
626{
627 int nr;
628 int rc;
629
630 /*
631 * The last thread in the cgroup-init thread group is terminating.
632 * Find remaining pid_ts in the namespace, signal and wait for them
633 * to exit.
634 *
635 * Note: This signals each threads in the namespace - even those that
636 * belong to the same thread group, To avoid this, we would have
637 * to walk the entire tasklist looking a processes in this
638 * namespace, but that could be unnecessarily expensive if the
639 * pid namespace has just a few processes. Or we need to
640 * maintain a tasklist for each pid namespace.
641 *
642 */
643 read_lock(&tasklist_lock);
644 nr = next_pidmap(pid_ns, 1);
645 while (nr > 0) {
646 kill_proc_info(SIGKILL, SEND_SIG_PRIV, nr);
647 nr = next_pidmap(pid_ns, nr);
648 }
649 read_unlock(&tasklist_lock);
650
651 do {
652 clear_thread_flag(TIF_SIGPENDING);
653 rc = sys_wait4(-1, NULL, __WALL, NULL);
654 } while (rc != -ECHILD);
655
656
657 /* Child reaper for the pid namespace is going away */
658 pid_ns->child_reaper = NULL;
659 return;
381} 660}
382 661
383/* 662/*
@@ -412,5 +691,9 @@ void __init pidmap_init(void)
412 set_bit(0, init_pid_ns.pidmap[0].page); 691 set_bit(0, init_pid_ns.pidmap[0].page);
413 atomic_dec(&init_pid_ns.pidmap[0].nr_free); 692 atomic_dec(&init_pid_ns.pidmap[0].nr_free);
414 693
415 pid_cachep = KMEM_CACHE(pid, SLAB_PANIC); 694 init_pid_ns.pid_cachep = create_pid_cachep(1);
695 if (init_pid_ns.pid_cachep == NULL)
696 panic("Can't create pid_1 cachep\n");
697
698 pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
416} 699}
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index b53c8fcd9d82..68c96376e84a 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -21,8 +21,8 @@ static int check_clock(const clockid_t which_clock)
21 21
22 read_lock(&tasklist_lock); 22 read_lock(&tasklist_lock);
23 p = find_task_by_pid(pid); 23 p = find_task_by_pid(pid);
24 if (!p || (CPUCLOCK_PERTHREAD(which_clock) ? 24 if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ?
25 p->tgid != current->tgid : p->tgid != pid)) { 25 same_thread_group(p, current) : thread_group_leader(p))) {
26 error = -EINVAL; 26 error = -EINVAL;
27 } 27 }
28 read_unlock(&tasklist_lock); 28 read_unlock(&tasklist_lock);
@@ -308,13 +308,13 @@ int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
308 p = find_task_by_pid(pid); 308 p = find_task_by_pid(pid);
309 if (p) { 309 if (p) {
310 if (CPUCLOCK_PERTHREAD(which_clock)) { 310 if (CPUCLOCK_PERTHREAD(which_clock)) {
311 if (p->tgid == current->tgid) { 311 if (same_thread_group(p, current)) {
312 error = cpu_clock_sample(which_clock, 312 error = cpu_clock_sample(which_clock,
313 p, &rtn); 313 p, &rtn);
314 } 314 }
315 } else { 315 } else {
316 read_lock(&tasklist_lock); 316 read_lock(&tasklist_lock);
317 if (p->tgid == pid && p->signal) { 317 if (thread_group_leader(p) && p->signal) {
318 error = 318 error =
319 cpu_clock_sample_group(which_clock, 319 cpu_clock_sample_group(which_clock,
320 p, &rtn); 320 p, &rtn);
@@ -355,7 +355,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
355 p = current; 355 p = current;
356 } else { 356 } else {
357 p = find_task_by_pid(pid); 357 p = find_task_by_pid(pid);
358 if (p && p->tgid != current->tgid) 358 if (p && !same_thread_group(p, current))
359 p = NULL; 359 p = NULL;
360 } 360 }
361 } else { 361 } else {
@@ -363,7 +363,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
363 p = current->group_leader; 363 p = current->group_leader;
364 } else { 364 } else {
365 p = find_task_by_pid(pid); 365 p = find_task_by_pid(pid);
366 if (p && p->tgid != pid) 366 if (p && !thread_group_leader(p))
367 p = NULL; 367 p = NULL;
368 } 368 }
369 } 369 }
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index d11f579d189a..35b4bbfc78ff 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -404,7 +404,7 @@ static struct task_struct * good_sigevent(sigevent_t * event)
404 404
405 if ((event->sigev_notify & SIGEV_THREAD_ID ) && 405 if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
406 (!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) || 406 (!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) ||
407 rtn->tgid != current->tgid || 407 !same_thread_group(rtn, current) ||
408 (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL)) 408 (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
409 return NULL; 409 return NULL;
410 410
@@ -608,7 +608,7 @@ static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags)
608 spin_lock(&timr->it_lock); 608 spin_lock(&timr->it_lock);
609 609
610 if ((timr->it_id != timer_id) || !(timr->it_process) || 610 if ((timr->it_id != timer_id) || !(timr->it_process) ||
611 timr->it_process->tgid != current->tgid) { 611 !same_thread_group(timr->it_process, current)) {
612 spin_unlock(&timr->it_lock); 612 spin_unlock(&timr->it_lock);
613 spin_unlock_irqrestore(&idr_lock, *flags); 613 spin_unlock_irqrestore(&idr_lock, *flags);
614 timr = NULL; 614 timr = NULL;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index a73ebd3b9d4c..7c76f2ffaeaa 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -19,6 +19,7 @@
19#include <linux/security.h> 19#include <linux/security.h>
20#include <linux/signal.h> 20#include <linux/signal.h>
21#include <linux/audit.h> 21#include <linux/audit.h>
22#include <linux/pid_namespace.h>
22 23
23#include <asm/pgtable.h> 24#include <asm/pgtable.h>
24#include <asm/uaccess.h> 25#include <asm/uaccess.h>
@@ -168,7 +169,7 @@ int ptrace_attach(struct task_struct *task)
168 retval = -EPERM; 169 retval = -EPERM;
169 if (task->pid <= 1) 170 if (task->pid <= 1)
170 goto out; 171 goto out;
171 if (task->tgid == current->tgid) 172 if (same_thread_group(task, current))
172 goto out; 173 goto out;
173 174
174repeat: 175repeat:
@@ -443,7 +444,7 @@ struct task_struct *ptrace_get_task_struct(pid_t pid)
443 return ERR_PTR(-EPERM); 444 return ERR_PTR(-EPERM);
444 445
445 read_lock(&tasklist_lock); 446 read_lock(&tasklist_lock);
446 child = find_task_by_pid(pid); 447 child = find_task_by_vpid(pid);
447 if (child) 448 if (child)
448 get_task_struct(child); 449 get_task_struct(child);
449 450
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index 6b0703db152d..56d73cb8826d 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -87,7 +87,7 @@ static int rt_trace_on = 1;
87static void printk_task(struct task_struct *p) 87static void printk_task(struct task_struct *p)
88{ 88{
89 if (p) 89 if (p)
90 printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio); 90 printk("%16s:%5d [%p, %3d]", p->comm, task_pid_nr(p), p, p->prio);
91 else 91 else
92 printk("<none>"); 92 printk("<none>");
93} 93}
@@ -152,22 +152,25 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter)
152 printk( "[ BUG: circular locking deadlock detected! ]\n"); 152 printk( "[ BUG: circular locking deadlock detected! ]\n");
153 printk( "--------------------------------------------\n"); 153 printk( "--------------------------------------------\n");
154 printk("%s/%d is deadlocking current task %s/%d\n\n", 154 printk("%s/%d is deadlocking current task %s/%d\n\n",
155 task->comm, task->pid, current->comm, current->pid); 155 task->comm, task_pid_nr(task),
156 current->comm, task_pid_nr(current));
156 157
157 printk("\n1) %s/%d is trying to acquire this lock:\n", 158 printk("\n1) %s/%d is trying to acquire this lock:\n",
158 current->comm, current->pid); 159 current->comm, task_pid_nr(current));
159 printk_lock(waiter->lock, 1); 160 printk_lock(waiter->lock, 1);
160 161
161 printk("\n2) %s/%d is blocked on this lock:\n", task->comm, task->pid); 162 printk("\n2) %s/%d is blocked on this lock:\n",
163 task->comm, task_pid_nr(task));
162 printk_lock(waiter->deadlock_lock, 1); 164 printk_lock(waiter->deadlock_lock, 1);
163 165
164 debug_show_held_locks(current); 166 debug_show_held_locks(current);
165 debug_show_held_locks(task); 167 debug_show_held_locks(task);
166 168
167 printk("\n%s/%d's [blocked] stackdump:\n\n", task->comm, task->pid); 169 printk("\n%s/%d's [blocked] stackdump:\n\n",
170 task->comm, task_pid_nr(task));
168 show_stack(task, NULL); 171 show_stack(task, NULL);
169 printk("\n%s/%d's [current] stackdump:\n\n", 172 printk("\n%s/%d's [current] stackdump:\n\n",
170 current->comm, current->pid); 173 current->comm, task_pid_nr(current));
171 dump_stack(); 174 dump_stack();
172 debug_show_all_locks(); 175 debug_show_all_locks();
173 176
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 8cd9bd2cdb34..0deef71ff8d2 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -185,7 +185,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
185 prev_max = max_lock_depth; 185 prev_max = max_lock_depth;
186 printk(KERN_WARNING "Maximum lock depth %d reached " 186 printk(KERN_WARNING "Maximum lock depth %d reached "
187 "task: %s (%d)\n", max_lock_depth, 187 "task: %s (%d)\n", max_lock_depth,
188 top_task->comm, top_task->pid); 188 top_task->comm, task_pid_nr(top_task));
189 } 189 }
190 put_task_struct(task); 190 put_task_struct(task);
191 191
diff --git a/kernel/sched.c b/kernel/sched.c
index ed90be46fb31..afe76ec2e7fe 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -44,6 +44,7 @@
44#include <linux/vmalloc.h> 44#include <linux/vmalloc.h>
45#include <linux/blkdev.h> 45#include <linux/blkdev.h>
46#include <linux/delay.h> 46#include <linux/delay.h>
47#include <linux/pid_namespace.h>
47#include <linux/smp.h> 48#include <linux/smp.h>
48#include <linux/threads.h> 49#include <linux/threads.h>
49#include <linux/timer.h> 50#include <linux/timer.h>
@@ -51,6 +52,7 @@
51#include <linux/cpu.h> 52#include <linux/cpu.h>
52#include <linux/cpuset.h> 53#include <linux/cpuset.h>
53#include <linux/percpu.h> 54#include <linux/percpu.h>
55#include <linux/cpu_acct.h>
54#include <linux/kthread.h> 56#include <linux/kthread.h>
55#include <linux/seq_file.h> 57#include <linux/seq_file.h>
56#include <linux/sysctl.h> 58#include <linux/sysctl.h>
@@ -153,10 +155,15 @@ struct rt_prio_array {
153 155
154#ifdef CONFIG_FAIR_GROUP_SCHED 156#ifdef CONFIG_FAIR_GROUP_SCHED
155 157
158#include <linux/cgroup.h>
159
156struct cfs_rq; 160struct cfs_rq;
157 161
158/* task group related information */ 162/* task group related information */
159struct task_group { 163struct task_group {
164#ifdef CONFIG_FAIR_CGROUP_SCHED
165 struct cgroup_subsys_state css;
166#endif
160 /* schedulable entities of this group on each cpu */ 167 /* schedulable entities of this group on each cpu */
161 struct sched_entity **se; 168 struct sched_entity **se;
162 /* runqueue "owned" by this group on each cpu */ 169 /* runqueue "owned" by this group on each cpu */
@@ -197,6 +204,9 @@ static inline struct task_group *task_group(struct task_struct *p)
197 204
198#ifdef CONFIG_FAIR_USER_SCHED 205#ifdef CONFIG_FAIR_USER_SCHED
199 tg = p->user->tg; 206 tg = p->user->tg;
207#elif defined(CONFIG_FAIR_CGROUP_SCHED)
208 tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
209 struct task_group, css);
200#else 210#else
201 tg = &init_task_group; 211 tg = &init_task_group;
202#endif 212#endif
@@ -1875,7 +1885,7 @@ asmlinkage void schedule_tail(struct task_struct *prev)
1875 preempt_enable(); 1885 preempt_enable();
1876#endif 1886#endif
1877 if (current->set_child_tid) 1887 if (current->set_child_tid)
1878 put_user(current->pid, current->set_child_tid); 1888 put_user(task_pid_vnr(current), current->set_child_tid);
1879} 1889}
1880 1890
1881/* 1891/*
@@ -3307,9 +3317,13 @@ void account_user_time(struct task_struct *p, cputime_t cputime)
3307{ 3317{
3308 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; 3318 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
3309 cputime64_t tmp; 3319 cputime64_t tmp;
3320 struct rq *rq = this_rq();
3310 3321
3311 p->utime = cputime_add(p->utime, cputime); 3322 p->utime = cputime_add(p->utime, cputime);
3312 3323
3324 if (p != rq->idle)
3325 cpuacct_charge(p, cputime);
3326
3313 /* Add user time to cpustat. */ 3327 /* Add user time to cpustat. */
3314 tmp = cputime_to_cputime64(cputime); 3328 tmp = cputime_to_cputime64(cputime);
3315 if (TASK_NICE(p) > 0) 3329 if (TASK_NICE(p) > 0)
@@ -3374,9 +3388,10 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
3374 cpustat->irq = cputime64_add(cpustat->irq, tmp); 3388 cpustat->irq = cputime64_add(cpustat->irq, tmp);
3375 else if (softirq_count()) 3389 else if (softirq_count())
3376 cpustat->softirq = cputime64_add(cpustat->softirq, tmp); 3390 cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
3377 else if (p != rq->idle) 3391 else if (p != rq->idle) {
3378 cpustat->system = cputime64_add(cpustat->system, tmp); 3392 cpustat->system = cputime64_add(cpustat->system, tmp);
3379 else if (atomic_read(&rq->nr_iowait) > 0) 3393 cpuacct_charge(p, cputime);
3394 } else if (atomic_read(&rq->nr_iowait) > 0)
3380 cpustat->iowait = cputime64_add(cpustat->iowait, tmp); 3395 cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
3381 else 3396 else
3382 cpustat->idle = cputime64_add(cpustat->idle, tmp); 3397 cpustat->idle = cputime64_add(cpustat->idle, tmp);
@@ -3412,8 +3427,10 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
3412 cpustat->iowait = cputime64_add(cpustat->iowait, tmp); 3427 cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
3413 else 3428 else
3414 cpustat->idle = cputime64_add(cpustat->idle, tmp); 3429 cpustat->idle = cputime64_add(cpustat->idle, tmp);
3415 } else 3430 } else {
3416 cpustat->steal = cputime64_add(cpustat->steal, tmp); 3431 cpustat->steal = cputime64_add(cpustat->steal, tmp);
3432 cpuacct_charge(p, -tmp);
3433 }
3417} 3434}
3418 3435
3419/* 3436/*
@@ -3493,7 +3510,7 @@ EXPORT_SYMBOL(sub_preempt_count);
3493static noinline void __schedule_bug(struct task_struct *prev) 3510static noinline void __schedule_bug(struct task_struct *prev)
3494{ 3511{
3495 printk(KERN_ERR "BUG: scheduling while atomic: %s/0x%08x/%d\n", 3512 printk(KERN_ERR "BUG: scheduling while atomic: %s/0x%08x/%d\n",
3496 prev->comm, preempt_count(), prev->pid); 3513 prev->comm, preempt_count(), task_pid_nr(prev));
3497 debug_show_held_locks(prev); 3514 debug_show_held_locks(prev);
3498 if (irqs_disabled()) 3515 if (irqs_disabled())
3499 print_irqtrace_events(prev); 3516 print_irqtrace_events(prev);
@@ -4159,7 +4176,7 @@ struct task_struct *idle_task(int cpu)
4159 */ 4176 */
4160static struct task_struct *find_process_by_pid(pid_t pid) 4177static struct task_struct *find_process_by_pid(pid_t pid)
4161{ 4178{
4162 return pid ? find_task_by_pid(pid) : current; 4179 return pid ? find_task_by_vpid(pid) : current;
4163} 4180}
4164 4181
4165/* Actually do priority change: must hold rq lock. */ 4182/* Actually do priority change: must hold rq lock. */
@@ -4462,8 +4479,21 @@ long sched_setaffinity(pid_t pid, cpumask_t new_mask)
4462 4479
4463 cpus_allowed = cpuset_cpus_allowed(p); 4480 cpus_allowed = cpuset_cpus_allowed(p);
4464 cpus_and(new_mask, new_mask, cpus_allowed); 4481 cpus_and(new_mask, new_mask, cpus_allowed);
4482 again:
4465 retval = set_cpus_allowed(p, new_mask); 4483 retval = set_cpus_allowed(p, new_mask);
4466 4484
4485 if (!retval) {
4486 cpus_allowed = cpuset_cpus_allowed(p);
4487 if (!cpus_subset(new_mask, cpus_allowed)) {
4488 /*
4489 * We must have raced with a concurrent cpuset
4490 * update. Just reset the cpus_allowed to the
4491 * cpuset's cpus_allowed
4492 */
4493 new_mask = cpus_allowed;
4494 goto again;
4495 }
4496 }
4467out_unlock: 4497out_unlock:
4468 put_task_struct(p); 4498 put_task_struct(p);
4469 mutex_unlock(&sched_hotcpu_mutex); 4499 mutex_unlock(&sched_hotcpu_mutex);
@@ -4843,7 +4873,8 @@ static void show_task(struct task_struct *p)
4843 free = (unsigned long)n - (unsigned long)end_of_stack(p); 4873 free = (unsigned long)n - (unsigned long)end_of_stack(p);
4844 } 4874 }
4845#endif 4875#endif
4846 printk(KERN_CONT "%5lu %5d %6d\n", free, p->pid, p->parent->pid); 4876 printk(KERN_CONT "%5lu %5d %6d\n", free,
4877 task_pid_nr(p), task_pid_nr(p->parent));
4847 4878
4848 if (state != TASK_RUNNING) 4879 if (state != TASK_RUNNING)
4849 show_stack(p, NULL); 4880 show_stack(p, NULL);
@@ -5137,8 +5168,16 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
5137 5168
5138 /* No more Mr. Nice Guy. */ 5169 /* No more Mr. Nice Guy. */
5139 if (dest_cpu == NR_CPUS) { 5170 if (dest_cpu == NR_CPUS) {
5171 cpumask_t cpus_allowed = cpuset_cpus_allowed_locked(p);
5172 /*
5173 * Try to stay on the same cpuset, where the
5174 * current cpuset may be a subset of all cpus.
5175 * The cpuset_cpus_allowed_locked() variant of
5176 * cpuset_cpus_allowed() will not block. It must be
5177 * called within calls to cpuset_lock/cpuset_unlock.
5178 */
5140 rq = task_rq_lock(p, &flags); 5179 rq = task_rq_lock(p, &flags);
5141 cpus_setall(p->cpus_allowed); 5180 p->cpus_allowed = cpus_allowed;
5142 dest_cpu = any_online_cpu(p->cpus_allowed); 5181 dest_cpu = any_online_cpu(p->cpus_allowed);
5143 task_rq_unlock(rq, &flags); 5182 task_rq_unlock(rq, &flags);
5144 5183
@@ -5150,7 +5189,7 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
5150 if (p->mm && printk_ratelimit()) 5189 if (p->mm && printk_ratelimit())
5151 printk(KERN_INFO "process %d (%s) no " 5190 printk(KERN_INFO "process %d (%s) no "
5152 "longer affine to cpu%d\n", 5191 "longer affine to cpu%d\n",
5153 p->pid, p->comm, dead_cpu); 5192 task_pid_nr(p), p->comm, dead_cpu);
5154 } 5193 }
5155 } while (!__migrate_task_irq(p, dead_cpu, dest_cpu)); 5194 } while (!__migrate_task_irq(p, dead_cpu, dest_cpu));
5156} 5195}
@@ -5257,7 +5296,7 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
5257 struct rq *rq = cpu_rq(dead_cpu); 5296 struct rq *rq = cpu_rq(dead_cpu);
5258 5297
5259 /* Must be exiting, otherwise would be on tasklist. */ 5298 /* Must be exiting, otherwise would be on tasklist. */
5260 BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD); 5299 BUG_ON(!p->exit_state);
5261 5300
5262 /* Cannot have done final schedule yet: would have vanished. */ 5301 /* Cannot have done final schedule yet: would have vanished. */
5263 BUG_ON(p->state == TASK_DEAD); 5302 BUG_ON(p->state == TASK_DEAD);
@@ -5504,6 +5543,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5504 5543
5505 case CPU_DEAD: 5544 case CPU_DEAD:
5506 case CPU_DEAD_FROZEN: 5545 case CPU_DEAD_FROZEN:
5546 cpuset_lock(); /* around calls to cpuset_cpus_allowed_lock() */
5507 migrate_live_tasks(cpu); 5547 migrate_live_tasks(cpu);
5508 rq = cpu_rq(cpu); 5548 rq = cpu_rq(cpu);
5509 kthread_stop(rq->migration_thread); 5549 kthread_stop(rq->migration_thread);
@@ -5517,6 +5557,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5517 rq->idle->sched_class = &idle_sched_class; 5557 rq->idle->sched_class = &idle_sched_class;
5518 migrate_dead_tasks(cpu); 5558 migrate_dead_tasks(cpu);
5519 spin_unlock_irq(&rq->lock); 5559 spin_unlock_irq(&rq->lock);
5560 cpuset_unlock();
5520 migrate_nr_uninterruptible(rq); 5561 migrate_nr_uninterruptible(rq);
5521 BUG_ON(rq->nr_running != 0); 5562 BUG_ON(rq->nr_running != 0);
5522 5563
@@ -6367,26 +6408,31 @@ error:
6367 return -ENOMEM; 6408 return -ENOMEM;
6368#endif 6409#endif
6369} 6410}
6411
6412static cpumask_t *doms_cur; /* current sched domains */
6413static int ndoms_cur; /* number of sched domains in 'doms_cur' */
6414
6415/*
6416 * Special case: If a kmalloc of a doms_cur partition (array of
6417 * cpumask_t) fails, then fallback to a single sched domain,
6418 * as determined by the single cpumask_t fallback_doms.
6419 */
6420static cpumask_t fallback_doms;
6421
6370/* 6422/*
6371 * Set up scheduler domains and groups. Callers must hold the hotplug lock. 6423 * Set up scheduler domains and groups. Callers must hold the hotplug lock.
6424 * For now this just excludes isolated cpus, but could be used to
6425 * exclude other special cases in the future.
6372 */ 6426 */
6373static int arch_init_sched_domains(const cpumask_t *cpu_map) 6427static int arch_init_sched_domains(const cpumask_t *cpu_map)
6374{ 6428{
6375 cpumask_t cpu_default_map; 6429 ndoms_cur = 1;
6376 int err; 6430 doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
6377 6431 if (!doms_cur)
6378 /* 6432 doms_cur = &fallback_doms;
6379 * Setup mask for cpus without special case scheduling requirements. 6433 cpus_andnot(*doms_cur, *cpu_map, cpu_isolated_map);
6380 * For now this just excludes isolated cpus, but could be used to
6381 * exclude other special cases in the future.
6382 */
6383 cpus_andnot(cpu_default_map, *cpu_map, cpu_isolated_map);
6384
6385 err = build_sched_domains(&cpu_default_map);
6386
6387 register_sched_domain_sysctl(); 6434 register_sched_domain_sysctl();
6388 6435 return build_sched_domains(doms_cur);
6389 return err;
6390} 6436}
6391 6437
6392static void arch_destroy_sched_domains(const cpumask_t *cpu_map) 6438static void arch_destroy_sched_domains(const cpumask_t *cpu_map)
@@ -6410,6 +6456,68 @@ static void detach_destroy_domains(const cpumask_t *cpu_map)
6410 arch_destroy_sched_domains(cpu_map); 6456 arch_destroy_sched_domains(cpu_map);
6411} 6457}
6412 6458
6459/*
6460 * Partition sched domains as specified by the 'ndoms_new'
6461 * cpumasks in the array doms_new[] of cpumasks. This compares
6462 * doms_new[] to the current sched domain partitioning, doms_cur[].
6463 * It destroys each deleted domain and builds each new domain.
6464 *
6465 * 'doms_new' is an array of cpumask_t's of length 'ndoms_new'.
6466 * The masks don't intersect (don't overlap.) We should setup one
6467 * sched domain for each mask. CPUs not in any of the cpumasks will
6468 * not be load balanced. If the same cpumask appears both in the
6469 * current 'doms_cur' domains and in the new 'doms_new', we can leave
6470 * it as it is.
6471 *
6472 * The passed in 'doms_new' should be kmalloc'd. This routine takes
6473 * ownership of it and will kfree it when done with it. If the caller
6474 * failed the kmalloc call, then it can pass in doms_new == NULL,
6475 * and partition_sched_domains() will fallback to the single partition
6476 * 'fallback_doms'.
6477 *
6478 * Call with hotplug lock held
6479 */
6480void partition_sched_domains(int ndoms_new, cpumask_t *doms_new)
6481{
6482 int i, j;
6483
6484 if (doms_new == NULL) {
6485 ndoms_new = 1;
6486 doms_new = &fallback_doms;
6487 cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
6488 }
6489
6490 /* Destroy deleted domains */
6491 for (i = 0; i < ndoms_cur; i++) {
6492 for (j = 0; j < ndoms_new; j++) {
6493 if (cpus_equal(doms_cur[i], doms_new[j]))
6494 goto match1;
6495 }
6496 /* no match - a current sched domain not in new doms_new[] */
6497 detach_destroy_domains(doms_cur + i);
6498match1:
6499 ;
6500 }
6501
6502 /* Build new domains */
6503 for (i = 0; i < ndoms_new; i++) {
6504 for (j = 0; j < ndoms_cur; j++) {
6505 if (cpus_equal(doms_new[i], doms_cur[j]))
6506 goto match2;
6507 }
6508 /* no match - add a new doms_new */
6509 build_sched_domains(doms_new + i);
6510match2:
6511 ;
6512 }
6513
6514 /* Remember the new sched domains */
6515 if (doms_cur != &fallback_doms)
6516 kfree(doms_cur);
6517 doms_cur = doms_new;
6518 ndoms_cur = ndoms_new;
6519}
6520
6413#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 6521#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
6414static int arch_reinit_sched_domains(void) 6522static int arch_reinit_sched_domains(void)
6415{ 6523{
@@ -6991,3 +7099,116 @@ unsigned long sched_group_shares(struct task_group *tg)
6991} 7099}
6992 7100
6993#endif /* CONFIG_FAIR_GROUP_SCHED */ 7101#endif /* CONFIG_FAIR_GROUP_SCHED */
7102
7103#ifdef CONFIG_FAIR_CGROUP_SCHED
7104
7105/* return corresponding task_group object of a cgroup */
7106static inline struct task_group *cgroup_tg(struct cgroup *cont)
7107{
7108 return container_of(cgroup_subsys_state(cont, cpu_cgroup_subsys_id),
7109 struct task_group, css);
7110}
7111
7112static struct cgroup_subsys_state *
7113cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
7114{
7115 struct task_group *tg;
7116
7117 if (!cont->parent) {
7118 /* This is early initialization for the top cgroup */
7119 init_task_group.css.cgroup = cont;
7120 return &init_task_group.css;
7121 }
7122
7123 /* we support only 1-level deep hierarchical scheduler atm */
7124 if (cont->parent->parent)
7125 return ERR_PTR(-EINVAL);
7126
7127 tg = sched_create_group();
7128 if (IS_ERR(tg))
7129 return ERR_PTR(-ENOMEM);
7130
7131 /* Bind the cgroup to task_group object we just created */
7132 tg->css.cgroup = cont;
7133
7134 return &tg->css;
7135}
7136
7137static void cpu_cgroup_destroy(struct cgroup_subsys *ss,
7138 struct cgroup *cont)
7139{
7140 struct task_group *tg = cgroup_tg(cont);
7141
7142 sched_destroy_group(tg);
7143}
7144
7145static int cpu_cgroup_can_attach(struct cgroup_subsys *ss,
7146 struct cgroup *cont, struct task_struct *tsk)
7147{
7148 /* We don't support RT-tasks being in separate groups */
7149 if (tsk->sched_class != &fair_sched_class)
7150 return -EINVAL;
7151
7152 return 0;
7153}
7154
7155static void
7156cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cont,
7157 struct cgroup *old_cont, struct task_struct *tsk)
7158{
7159 sched_move_task(tsk);
7160}
7161
7162static ssize_t cpu_shares_write(struct cgroup *cont, struct cftype *cftype,
7163 struct file *file, const char __user *userbuf,
7164 size_t nbytes, loff_t *ppos)
7165{
7166 unsigned long shareval;
7167 struct task_group *tg = cgroup_tg(cont);
7168 char buffer[2*sizeof(unsigned long) + 1];
7169 int rc;
7170
7171 if (nbytes > 2*sizeof(unsigned long)) /* safety check */
7172 return -E2BIG;
7173
7174 if (copy_from_user(buffer, userbuf, nbytes))
7175 return -EFAULT;
7176
7177 buffer[nbytes] = 0; /* nul-terminate */
7178 shareval = simple_strtoul(buffer, NULL, 10);
7179
7180 rc = sched_group_set_shares(tg, shareval);
7181
7182 return (rc < 0 ? rc : nbytes);
7183}
7184
7185static u64 cpu_shares_read_uint(struct cgroup *cont, struct cftype *cft)
7186{
7187 struct task_group *tg = cgroup_tg(cont);
7188
7189 return (u64) tg->shares;
7190}
7191
7192static struct cftype cpu_shares = {
7193 .name = "shares",
7194 .read_uint = cpu_shares_read_uint,
7195 .write = cpu_shares_write,
7196};
7197
7198static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
7199{
7200 return cgroup_add_file(cont, ss, &cpu_shares);
7201}
7202
7203struct cgroup_subsys cpu_cgroup_subsys = {
7204 .name = "cpu",
7205 .create = cpu_cgroup_create,
7206 .destroy = cpu_cgroup_destroy,
7207 .can_attach = cpu_cgroup_can_attach,
7208 .attach = cpu_cgroup_attach,
7209 .populate = cpu_cgroup_populate,
7210 .subsys_id = cpu_cgroup_subsys_id,
7211 .early_init = 1,
7212};
7213
7214#endif /* CONFIG_FAIR_CGROUP_SCHED */
diff --git a/kernel/signal.c b/kernel/signal.c
index e4f059cd9867..12006308c7eb 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -256,7 +256,7 @@ flush_signal_handlers(struct task_struct *t, int force_default)
256 256
257int unhandled_signal(struct task_struct *tsk, int sig) 257int unhandled_signal(struct task_struct *tsk, int sig)
258{ 258{
259 if (is_init(tsk)) 259 if (is_global_init(tsk))
260 return 1; 260 return 1;
261 if (tsk->ptrace & PT_PTRACED) 261 if (tsk->ptrace & PT_PTRACED)
262 return 0; 262 return 0;
@@ -536,7 +536,7 @@ static int check_kill_permission(int sig, struct siginfo *info,
536 return error; 536 return error;
537 error = -EPERM; 537 error = -EPERM;
538 if (((sig != SIGCONT) || 538 if (((sig != SIGCONT) ||
539 (process_session(current) != process_session(t))) 539 (task_session_nr(current) != task_session_nr(t)))
540 && (current->euid ^ t->suid) && (current->euid ^ t->uid) 540 && (current->euid ^ t->suid) && (current->euid ^ t->uid)
541 && (current->uid ^ t->suid) && (current->uid ^ t->uid) 541 && (current->uid ^ t->suid) && (current->uid ^ t->uid)
542 && !capable(CAP_KILL)) 542 && !capable(CAP_KILL))
@@ -694,7 +694,7 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
694 q->info.si_signo = sig; 694 q->info.si_signo = sig;
695 q->info.si_errno = 0; 695 q->info.si_errno = 0;
696 q->info.si_code = SI_USER; 696 q->info.si_code = SI_USER;
697 q->info.si_pid = current->pid; 697 q->info.si_pid = task_pid_vnr(current);
698 q->info.si_uid = current->uid; 698 q->info.si_uid = current->uid;
699 break; 699 break;
700 case (unsigned long) SEND_SIG_PRIV: 700 case (unsigned long) SEND_SIG_PRIV:
@@ -730,7 +730,7 @@ int print_fatal_signals;
730static void print_fatal_signal(struct pt_regs *regs, int signr) 730static void print_fatal_signal(struct pt_regs *regs, int signr)
731{ 731{
732 printk("%s/%d: potentially unexpected fatal signal %d.\n", 732 printk("%s/%d: potentially unexpected fatal signal %d.\n",
733 current->comm, current->pid, signr); 733 current->comm, task_pid_nr(current), signr);
734 734
735#ifdef __i386__ 735#ifdef __i386__
736 printk("code at %08lx: ", regs->eip); 736 printk("code at %08lx: ", regs->eip);
@@ -1089,7 +1089,7 @@ kill_proc_info(int sig, struct siginfo *info, pid_t pid)
1089{ 1089{
1090 int error; 1090 int error;
1091 rcu_read_lock(); 1091 rcu_read_lock();
1092 error = kill_pid_info(sig, info, find_pid(pid)); 1092 error = kill_pid_info(sig, info, find_vpid(pid));
1093 rcu_read_unlock(); 1093 rcu_read_unlock();
1094 return error; 1094 return error;
1095} 1095}
@@ -1150,7 +1150,7 @@ static int kill_something_info(int sig, struct siginfo *info, int pid)
1150 1150
1151 read_lock(&tasklist_lock); 1151 read_lock(&tasklist_lock);
1152 for_each_process(p) { 1152 for_each_process(p) {
1153 if (p->pid > 1 && p->tgid != current->tgid) { 1153 if (p->pid > 1 && !same_thread_group(p, current)) {
1154 int err = group_send_sig_info(sig, info, p); 1154 int err = group_send_sig_info(sig, info, p);
1155 ++count; 1155 ++count;
1156 if (err != -EPERM) 1156 if (err != -EPERM)
@@ -1160,9 +1160,9 @@ static int kill_something_info(int sig, struct siginfo *info, int pid)
1160 read_unlock(&tasklist_lock); 1160 read_unlock(&tasklist_lock);
1161 ret = count ? retval : -ESRCH; 1161 ret = count ? retval : -ESRCH;
1162 } else if (pid < 0) { 1162 } else if (pid < 0) {
1163 ret = kill_pgrp_info(sig, info, find_pid(-pid)); 1163 ret = kill_pgrp_info(sig, info, find_vpid(-pid));
1164 } else { 1164 } else {
1165 ret = kill_pid_info(sig, info, find_pid(pid)); 1165 ret = kill_pid_info(sig, info, find_vpid(pid));
1166 } 1166 }
1167 rcu_read_unlock(); 1167 rcu_read_unlock();
1168 return ret; 1168 return ret;
@@ -1266,7 +1266,12 @@ EXPORT_SYMBOL(kill_pid);
1266int 1266int
1267kill_proc(pid_t pid, int sig, int priv) 1267kill_proc(pid_t pid, int sig, int priv)
1268{ 1268{
1269 return kill_proc_info(sig, __si_special(priv), pid); 1269 int ret;
1270
1271 rcu_read_lock();
1272 ret = kill_pid_info(sig, __si_special(priv), find_pid(pid));
1273 rcu_read_unlock();
1274 return ret;
1270} 1275}
1271 1276
1272/* 1277/*
@@ -1443,7 +1448,22 @@ void do_notify_parent(struct task_struct *tsk, int sig)
1443 1448
1444 info.si_signo = sig; 1449 info.si_signo = sig;
1445 info.si_errno = 0; 1450 info.si_errno = 0;
1446 info.si_pid = tsk->pid; 1451 /*
1452 * we are under tasklist_lock here so our parent is tied to
1453 * us and cannot exit and release its namespace.
1454 *
1455 * the only it can is to switch its nsproxy with sys_unshare,
1456 * bu uncharing pid namespaces is not allowed, so we'll always
1457 * see relevant namespace
1458 *
1459 * write_lock() currently calls preempt_disable() which is the
1460 * same as rcu_read_lock(), but according to Oleg, this is not
1461 * correct to rely on this
1462 */
1463 rcu_read_lock();
1464 info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
1465 rcu_read_unlock();
1466
1447 info.si_uid = tsk->uid; 1467 info.si_uid = tsk->uid;
1448 1468
1449 /* FIXME: find out whether or not this is supposed to be c*time. */ 1469 /* FIXME: find out whether or not this is supposed to be c*time. */
@@ -1508,7 +1528,13 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
1508 1528
1509 info.si_signo = SIGCHLD; 1529 info.si_signo = SIGCHLD;
1510 info.si_errno = 0; 1530 info.si_errno = 0;
1511 info.si_pid = tsk->pid; 1531 /*
1532 * see comment in do_notify_parent() abot the following 3 lines
1533 */
1534 rcu_read_lock();
1535 info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
1536 rcu_read_unlock();
1537
1512 info.si_uid = tsk->uid; 1538 info.si_uid = tsk->uid;
1513 1539
1514 /* FIXME: find out whether or not this is supposed to be c*time. */ 1540 /* FIXME: find out whether or not this is supposed to be c*time. */
@@ -1634,7 +1660,7 @@ void ptrace_notify(int exit_code)
1634 memset(&info, 0, sizeof info); 1660 memset(&info, 0, sizeof info);
1635 info.si_signo = SIGTRAP; 1661 info.si_signo = SIGTRAP;
1636 info.si_code = exit_code; 1662 info.si_code = exit_code;
1637 info.si_pid = current->pid; 1663 info.si_pid = task_pid_vnr(current);
1638 info.si_uid = current->uid; 1664 info.si_uid = current->uid;
1639 1665
1640 /* Let the debugger run. */ 1666 /* Let the debugger run. */
@@ -1804,7 +1830,7 @@ relock:
1804 info->si_signo = signr; 1830 info->si_signo = signr;
1805 info->si_errno = 0; 1831 info->si_errno = 0;
1806 info->si_code = SI_USER; 1832 info->si_code = SI_USER;
1807 info->si_pid = current->parent->pid; 1833 info->si_pid = task_pid_vnr(current->parent);
1808 info->si_uid = current->parent->uid; 1834 info->si_uid = current->parent->uid;
1809 } 1835 }
1810 1836
@@ -1835,11 +1861,9 @@ relock:
1835 continue; 1861 continue;
1836 1862
1837 /* 1863 /*
1838 * Init of a pid space gets no signals it doesn't want from 1864 * Global init gets no signals it doesn't want.
1839 * within that pid space. It can of course get signals from
1840 * its parent pid space.
1841 */ 1865 */
1842 if (current == child_reaper(current)) 1866 if (is_global_init(current))
1843 continue; 1867 continue;
1844 1868
1845 if (sig_kernel_stop(signr)) { 1869 if (sig_kernel_stop(signr)) {
@@ -2193,7 +2217,7 @@ sys_kill(int pid, int sig)
2193 info.si_signo = sig; 2217 info.si_signo = sig;
2194 info.si_errno = 0; 2218 info.si_errno = 0;
2195 info.si_code = SI_USER; 2219 info.si_code = SI_USER;
2196 info.si_pid = current->tgid; 2220 info.si_pid = task_tgid_vnr(current);
2197 info.si_uid = current->uid; 2221 info.si_uid = current->uid;
2198 2222
2199 return kill_something_info(sig, &info, pid); 2223 return kill_something_info(sig, &info, pid);
@@ -2209,12 +2233,12 @@ static int do_tkill(int tgid, int pid, int sig)
2209 info.si_signo = sig; 2233 info.si_signo = sig;
2210 info.si_errno = 0; 2234 info.si_errno = 0;
2211 info.si_code = SI_TKILL; 2235 info.si_code = SI_TKILL;
2212 info.si_pid = current->tgid; 2236 info.si_pid = task_tgid_vnr(current);
2213 info.si_uid = current->uid; 2237 info.si_uid = current->uid;
2214 2238
2215 read_lock(&tasklist_lock); 2239 read_lock(&tasklist_lock);
2216 p = find_task_by_pid(pid); 2240 p = find_task_by_vpid(pid);
2217 if (p && (tgid <= 0 || p->tgid == tgid)) { 2241 if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) {
2218 error = check_kill_permission(sig, &info, p); 2242 error = check_kill_permission(sig, &info, p);
2219 /* 2243 /*
2220 * The null signal is a permissions and process existence 2244 * The null signal is a permissions and process existence
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index edeeef3a6a32..11df812263c8 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -113,7 +113,7 @@ void softlockup_tick(void)
113 spin_lock(&print_lock); 113 spin_lock(&print_lock);
114 printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n", 114 printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n",
115 this_cpu, now - touch_timestamp, 115 this_cpu, now - touch_timestamp,
116 current->comm, current->pid); 116 current->comm, task_pid_nr(current));
117 if (regs) 117 if (regs)
118 show_regs(regs); 118 show_regs(regs);
119 else 119 else
diff --git a/kernel/sys.c b/kernel/sys.c
index bc8879c822a5..304b5410d746 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -106,537 +106,6 @@ EXPORT_SYMBOL(cad_pid);
106 106
107void (*pm_power_off_prepare)(void); 107void (*pm_power_off_prepare)(void);
108 108
109/*
110 * Notifier list for kernel code which wants to be called
111 * at shutdown. This is used to stop any idling DMA operations
112 * and the like.
113 */
114
115static BLOCKING_NOTIFIER_HEAD(reboot_notifier_list);
116
117/*
118 * Notifier chain core routines. The exported routines below
119 * are layered on top of these, with appropriate locking added.
120 */
121
122static int notifier_chain_register(struct notifier_block **nl,
123 struct notifier_block *n)
124{
125 while ((*nl) != NULL) {
126 if (n->priority > (*nl)->priority)
127 break;
128 nl = &((*nl)->next);
129 }
130 n->next = *nl;
131 rcu_assign_pointer(*nl, n);
132 return 0;
133}
134
135static int notifier_chain_unregister(struct notifier_block **nl,
136 struct notifier_block *n)
137{
138 while ((*nl) != NULL) {
139 if ((*nl) == n) {
140 rcu_assign_pointer(*nl, n->next);
141 return 0;
142 }
143 nl = &((*nl)->next);
144 }
145 return -ENOENT;
146}
147
148/**
149 * notifier_call_chain - Informs the registered notifiers about an event.
150 * @nl: Pointer to head of the blocking notifier chain
151 * @val: Value passed unmodified to notifier function
152 * @v: Pointer passed unmodified to notifier function
153 * @nr_to_call: Number of notifier functions to be called. Don't care
154 * value of this parameter is -1.
155 * @nr_calls: Records the number of notifications sent. Don't care
156 * value of this field is NULL.
157 * @returns: notifier_call_chain returns the value returned by the
158 * last notifier function called.
159 */
160
161static int __kprobes notifier_call_chain(struct notifier_block **nl,
162 unsigned long val, void *v,
163 int nr_to_call, int *nr_calls)
164{
165 int ret = NOTIFY_DONE;
166 struct notifier_block *nb, *next_nb;
167
168 nb = rcu_dereference(*nl);
169
170 while (nb && nr_to_call) {
171 next_nb = rcu_dereference(nb->next);
172 ret = nb->notifier_call(nb, val, v);
173
174 if (nr_calls)
175 (*nr_calls)++;
176
177 if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK)
178 break;
179 nb = next_nb;
180 nr_to_call--;
181 }
182 return ret;
183}
184
185/*
186 * Atomic notifier chain routines. Registration and unregistration
187 * use a spinlock, and call_chain is synchronized by RCU (no locks).
188 */
189
190/**
191 * atomic_notifier_chain_register - Add notifier to an atomic notifier chain
192 * @nh: Pointer to head of the atomic notifier chain
193 * @n: New entry in notifier chain
194 *
195 * Adds a notifier to an atomic notifier chain.
196 *
197 * Currently always returns zero.
198 */
199
200int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
201 struct notifier_block *n)
202{
203 unsigned long flags;
204 int ret;
205
206 spin_lock_irqsave(&nh->lock, flags);
207 ret = notifier_chain_register(&nh->head, n);
208 spin_unlock_irqrestore(&nh->lock, flags);
209 return ret;
210}
211
212EXPORT_SYMBOL_GPL(atomic_notifier_chain_register);
213
214/**
215 * atomic_notifier_chain_unregister - Remove notifier from an atomic notifier chain
216 * @nh: Pointer to head of the atomic notifier chain
217 * @n: Entry to remove from notifier chain
218 *
219 * Removes a notifier from an atomic notifier chain.
220 *
221 * Returns zero on success or %-ENOENT on failure.
222 */
223int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh,
224 struct notifier_block *n)
225{
226 unsigned long flags;
227 int ret;
228
229 spin_lock_irqsave(&nh->lock, flags);
230 ret = notifier_chain_unregister(&nh->head, n);
231 spin_unlock_irqrestore(&nh->lock, flags);
232 synchronize_rcu();
233 return ret;
234}
235
236EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);
237
238/**
239 * __atomic_notifier_call_chain - Call functions in an atomic notifier chain
240 * @nh: Pointer to head of the atomic notifier chain
241 * @val: Value passed unmodified to notifier function
242 * @v: Pointer passed unmodified to notifier function
243 * @nr_to_call: See the comment for notifier_call_chain.
244 * @nr_calls: See the comment for notifier_call_chain.
245 *
246 * Calls each function in a notifier chain in turn. The functions
247 * run in an atomic context, so they must not block.
248 * This routine uses RCU to synchronize with changes to the chain.
249 *
250 * If the return value of the notifier can be and'ed
251 * with %NOTIFY_STOP_MASK then atomic_notifier_call_chain()
252 * will return immediately, with the return value of
253 * the notifier function which halted execution.
254 * Otherwise the return value is the return value
255 * of the last notifier function called.
256 */
257
258int __kprobes __atomic_notifier_call_chain(struct atomic_notifier_head *nh,
259 unsigned long val, void *v,
260 int nr_to_call, int *nr_calls)
261{
262 int ret;
263
264 rcu_read_lock();
265 ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
266 rcu_read_unlock();
267 return ret;
268}
269
270EXPORT_SYMBOL_GPL(__atomic_notifier_call_chain);
271
272int __kprobes atomic_notifier_call_chain(struct atomic_notifier_head *nh,
273 unsigned long val, void *v)
274{
275 return __atomic_notifier_call_chain(nh, val, v, -1, NULL);
276}
277
278EXPORT_SYMBOL_GPL(atomic_notifier_call_chain);
279/*
280 * Blocking notifier chain routines. All access to the chain is
281 * synchronized by an rwsem.
282 */
283
284/**
285 * blocking_notifier_chain_register - Add notifier to a blocking notifier chain
286 * @nh: Pointer to head of the blocking notifier chain
287 * @n: New entry in notifier chain
288 *
289 * Adds a notifier to a blocking notifier chain.
290 * Must be called in process context.
291 *
292 * Currently always returns zero.
293 */
294
295int blocking_notifier_chain_register(struct blocking_notifier_head *nh,
296 struct notifier_block *n)
297{
298 int ret;
299
300 /*
301 * This code gets used during boot-up, when task switching is
302 * not yet working and interrupts must remain disabled. At
303 * such times we must not call down_write().
304 */
305 if (unlikely(system_state == SYSTEM_BOOTING))
306 return notifier_chain_register(&nh->head, n);
307
308 down_write(&nh->rwsem);
309 ret = notifier_chain_register(&nh->head, n);
310 up_write(&nh->rwsem);
311 return ret;
312}
313
314EXPORT_SYMBOL_GPL(blocking_notifier_chain_register);
315
316/**
317 * blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain
318 * @nh: Pointer to head of the blocking notifier chain
319 * @n: Entry to remove from notifier chain
320 *
321 * Removes a notifier from a blocking notifier chain.
322 * Must be called from process context.
323 *
324 * Returns zero on success or %-ENOENT on failure.
325 */
326int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh,
327 struct notifier_block *n)
328{
329 int ret;
330
331 /*
332 * This code gets used during boot-up, when task switching is
333 * not yet working and interrupts must remain disabled. At
334 * such times we must not call down_write().
335 */
336 if (unlikely(system_state == SYSTEM_BOOTING))
337 return notifier_chain_unregister(&nh->head, n);
338
339 down_write(&nh->rwsem);
340 ret = notifier_chain_unregister(&nh->head, n);
341 up_write(&nh->rwsem);
342 return ret;
343}
344
345EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister);
346
347/**
348 * __blocking_notifier_call_chain - Call functions in a blocking notifier chain
349 * @nh: Pointer to head of the blocking notifier chain
350 * @val: Value passed unmodified to notifier function
351 * @v: Pointer passed unmodified to notifier function
352 * @nr_to_call: See comment for notifier_call_chain.
353 * @nr_calls: See comment for notifier_call_chain.
354 *
355 * Calls each function in a notifier chain in turn. The functions
356 * run in a process context, so they are allowed to block.
357 *
358 * If the return value of the notifier can be and'ed
359 * with %NOTIFY_STOP_MASK then blocking_notifier_call_chain()
360 * will return immediately, with the return value of
361 * the notifier function which halted execution.
362 * Otherwise the return value is the return value
363 * of the last notifier function called.
364 */
365
366int __blocking_notifier_call_chain(struct blocking_notifier_head *nh,
367 unsigned long val, void *v,
368 int nr_to_call, int *nr_calls)
369{
370 int ret = NOTIFY_DONE;
371
372 /*
373 * We check the head outside the lock, but if this access is
374 * racy then it does not matter what the result of the test
375 * is, we re-check the list after having taken the lock anyway:
376 */
377 if (rcu_dereference(nh->head)) {
378 down_read(&nh->rwsem);
379 ret = notifier_call_chain(&nh->head, val, v, nr_to_call,
380 nr_calls);
381 up_read(&nh->rwsem);
382 }
383 return ret;
384}
385EXPORT_SYMBOL_GPL(__blocking_notifier_call_chain);
386
387int blocking_notifier_call_chain(struct blocking_notifier_head *nh,
388 unsigned long val, void *v)
389{
390 return __blocking_notifier_call_chain(nh, val, v, -1, NULL);
391}
392EXPORT_SYMBOL_GPL(blocking_notifier_call_chain);
393
394/*
395 * Raw notifier chain routines. There is no protection;
396 * the caller must provide it. Use at your own risk!
397 */
398
399/**
400 * raw_notifier_chain_register - Add notifier to a raw notifier chain
401 * @nh: Pointer to head of the raw notifier chain
402 * @n: New entry in notifier chain
403 *
404 * Adds a notifier to a raw notifier chain.
405 * All locking must be provided by the caller.
406 *
407 * Currently always returns zero.
408 */
409
410int raw_notifier_chain_register(struct raw_notifier_head *nh,
411 struct notifier_block *n)
412{
413 return notifier_chain_register(&nh->head, n);
414}
415
416EXPORT_SYMBOL_GPL(raw_notifier_chain_register);
417
418/**
419 * raw_notifier_chain_unregister - Remove notifier from a raw notifier chain
420 * @nh: Pointer to head of the raw notifier chain
421 * @n: Entry to remove from notifier chain
422 *
423 * Removes a notifier from a raw notifier chain.
424 * All locking must be provided by the caller.
425 *
426 * Returns zero on success or %-ENOENT on failure.
427 */
428int raw_notifier_chain_unregister(struct raw_notifier_head *nh,
429 struct notifier_block *n)
430{
431 return notifier_chain_unregister(&nh->head, n);
432}
433
434EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister);
435
436/**
437 * __raw_notifier_call_chain - Call functions in a raw notifier chain
438 * @nh: Pointer to head of the raw notifier chain
439 * @val: Value passed unmodified to notifier function
440 * @v: Pointer passed unmodified to notifier function
441 * @nr_to_call: See comment for notifier_call_chain.
442 * @nr_calls: See comment for notifier_call_chain
443 *
444 * Calls each function in a notifier chain in turn. The functions
445 * run in an undefined context.
446 * All locking must be provided by the caller.
447 *
448 * If the return value of the notifier can be and'ed
449 * with %NOTIFY_STOP_MASK then raw_notifier_call_chain()
450 * will return immediately, with the return value of
451 * the notifier function which halted execution.
452 * Otherwise the return value is the return value
453 * of the last notifier function called.
454 */
455
456int __raw_notifier_call_chain(struct raw_notifier_head *nh,
457 unsigned long val, void *v,
458 int nr_to_call, int *nr_calls)
459{
460 return notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
461}
462
463EXPORT_SYMBOL_GPL(__raw_notifier_call_chain);
464
465int raw_notifier_call_chain(struct raw_notifier_head *nh,
466 unsigned long val, void *v)
467{
468 return __raw_notifier_call_chain(nh, val, v, -1, NULL);
469}
470
471EXPORT_SYMBOL_GPL(raw_notifier_call_chain);
472
473/*
474 * SRCU notifier chain routines. Registration and unregistration
475 * use a mutex, and call_chain is synchronized by SRCU (no locks).
476 */
477
478/**
479 * srcu_notifier_chain_register - Add notifier to an SRCU notifier chain
480 * @nh: Pointer to head of the SRCU notifier chain
481 * @n: New entry in notifier chain
482 *
483 * Adds a notifier to an SRCU notifier chain.
484 * Must be called in process context.
485 *
486 * Currently always returns zero.
487 */
488
489int srcu_notifier_chain_register(struct srcu_notifier_head *nh,
490 struct notifier_block *n)
491{
492 int ret;
493
494 /*
495 * This code gets used during boot-up, when task switching is
496 * not yet working and interrupts must remain disabled. At
497 * such times we must not call mutex_lock().
498 */
499 if (unlikely(system_state == SYSTEM_BOOTING))
500 return notifier_chain_register(&nh->head, n);
501
502 mutex_lock(&nh->mutex);
503 ret = notifier_chain_register(&nh->head, n);
504 mutex_unlock(&nh->mutex);
505 return ret;
506}
507
508EXPORT_SYMBOL_GPL(srcu_notifier_chain_register);
509
510/**
511 * srcu_notifier_chain_unregister - Remove notifier from an SRCU notifier chain
512 * @nh: Pointer to head of the SRCU notifier chain
513 * @n: Entry to remove from notifier chain
514 *
515 * Removes a notifier from an SRCU notifier chain.
516 * Must be called from process context.
517 *
518 * Returns zero on success or %-ENOENT on failure.
519 */
520int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh,
521 struct notifier_block *n)
522{
523 int ret;
524
525 /*
526 * This code gets used during boot-up, when task switching is
527 * not yet working and interrupts must remain disabled. At
528 * such times we must not call mutex_lock().
529 */
530 if (unlikely(system_state == SYSTEM_BOOTING))
531 return notifier_chain_unregister(&nh->head, n);
532
533 mutex_lock(&nh->mutex);
534 ret = notifier_chain_unregister(&nh->head, n);
535 mutex_unlock(&nh->mutex);
536 synchronize_srcu(&nh->srcu);
537 return ret;
538}
539
540EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister);
541
542/**
543 * __srcu_notifier_call_chain - Call functions in an SRCU notifier chain
544 * @nh: Pointer to head of the SRCU notifier chain
545 * @val: Value passed unmodified to notifier function
546 * @v: Pointer passed unmodified to notifier function
547 * @nr_to_call: See comment for notifier_call_chain.
548 * @nr_calls: See comment for notifier_call_chain
549 *
550 * Calls each function in a notifier chain in turn. The functions
551 * run in a process context, so they are allowed to block.
552 *
553 * If the return value of the notifier can be and'ed
554 * with %NOTIFY_STOP_MASK then srcu_notifier_call_chain()
555 * will return immediately, with the return value of
556 * the notifier function which halted execution.
557 * Otherwise the return value is the return value
558 * of the last notifier function called.
559 */
560
561int __srcu_notifier_call_chain(struct srcu_notifier_head *nh,
562 unsigned long val, void *v,
563 int nr_to_call, int *nr_calls)
564{
565 int ret;
566 int idx;
567
568 idx = srcu_read_lock(&nh->srcu);
569 ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
570 srcu_read_unlock(&nh->srcu, idx);
571 return ret;
572}
573EXPORT_SYMBOL_GPL(__srcu_notifier_call_chain);
574
575int srcu_notifier_call_chain(struct srcu_notifier_head *nh,
576 unsigned long val, void *v)
577{
578 return __srcu_notifier_call_chain(nh, val, v, -1, NULL);
579}
580EXPORT_SYMBOL_GPL(srcu_notifier_call_chain);
581
582/**
583 * srcu_init_notifier_head - Initialize an SRCU notifier head
584 * @nh: Pointer to head of the srcu notifier chain
585 *
586 * Unlike other sorts of notifier heads, SRCU notifier heads require
587 * dynamic initialization. Be sure to call this routine before
588 * calling any of the other SRCU notifier routines for this head.
589 *
590 * If an SRCU notifier head is deallocated, it must first be cleaned
591 * up by calling srcu_cleanup_notifier_head(). Otherwise the head's
592 * per-cpu data (used by the SRCU mechanism) will leak.
593 */
594
595void srcu_init_notifier_head(struct srcu_notifier_head *nh)
596{
597 mutex_init(&nh->mutex);
598 if (init_srcu_struct(&nh->srcu) < 0)
599 BUG();
600 nh->head = NULL;
601}
602
603EXPORT_SYMBOL_GPL(srcu_init_notifier_head);
604
605/**
606 * register_reboot_notifier - Register function to be called at reboot time
607 * @nb: Info about notifier function to be called
608 *
609 * Registers a function with the list of functions
610 * to be called at reboot time.
611 *
612 * Currently always returns zero, as blocking_notifier_chain_register()
613 * always returns zero.
614 */
615
616int register_reboot_notifier(struct notifier_block * nb)
617{
618 return blocking_notifier_chain_register(&reboot_notifier_list, nb);
619}
620
621EXPORT_SYMBOL(register_reboot_notifier);
622
623/**
624 * unregister_reboot_notifier - Unregister previously registered reboot notifier
625 * @nb: Hook to be unregistered
626 *
627 * Unregisters a previously registered reboot
628 * notifier function.
629 *
630 * Returns zero on success, or %-ENOENT on failure.
631 */
632
633int unregister_reboot_notifier(struct notifier_block * nb)
634{
635 return blocking_notifier_chain_unregister(&reboot_notifier_list, nb);
636}
637
638EXPORT_SYMBOL(unregister_reboot_notifier);
639
640static int set_one_prio(struct task_struct *p, int niceval, int error) 109static int set_one_prio(struct task_struct *p, int niceval, int error)
641{ 110{
642 int no_nice; 111 int no_nice;
@@ -683,7 +152,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval)
683 switch (which) { 152 switch (which) {
684 case PRIO_PROCESS: 153 case PRIO_PROCESS:
685 if (who) 154 if (who)
686 p = find_task_by_pid(who); 155 p = find_task_by_vpid(who);
687 else 156 else
688 p = current; 157 p = current;
689 if (p) 158 if (p)
@@ -691,7 +160,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval)
691 break; 160 break;
692 case PRIO_PGRP: 161 case PRIO_PGRP:
693 if (who) 162 if (who)
694 pgrp = find_pid(who); 163 pgrp = find_vpid(who);
695 else 164 else
696 pgrp = task_pgrp(current); 165 pgrp = task_pgrp(current);
697 do_each_pid_task(pgrp, PIDTYPE_PGID, p) { 166 do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
@@ -740,7 +209,7 @@ asmlinkage long sys_getpriority(int which, int who)
740 switch (which) { 209 switch (which) {
741 case PRIO_PROCESS: 210 case PRIO_PROCESS:
742 if (who) 211 if (who)
743 p = find_task_by_pid(who); 212 p = find_task_by_vpid(who);
744 else 213 else
745 p = current; 214 p = current;
746 if (p) { 215 if (p) {
@@ -751,7 +220,7 @@ asmlinkage long sys_getpriority(int which, int who)
751 break; 220 break;
752 case PRIO_PGRP: 221 case PRIO_PGRP:
753 if (who) 222 if (who)
754 pgrp = find_pid(who); 223 pgrp = find_vpid(who);
755 else 224 else
756 pgrp = task_pgrp(current); 225 pgrp = task_pgrp(current);
757 do_each_pid_task(pgrp, PIDTYPE_PGID, p) { 226 do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
@@ -1448,9 +917,10 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
1448 struct task_struct *p; 917 struct task_struct *p;
1449 struct task_struct *group_leader = current->group_leader; 918 struct task_struct *group_leader = current->group_leader;
1450 int err = -EINVAL; 919 int err = -EINVAL;
920 struct pid_namespace *ns;
1451 921
1452 if (!pid) 922 if (!pid)
1453 pid = group_leader->pid; 923 pid = task_pid_vnr(group_leader);
1454 if (!pgid) 924 if (!pgid)
1455 pgid = pid; 925 pgid = pid;
1456 if (pgid < 0) 926 if (pgid < 0)
@@ -1459,10 +929,12 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
1459 /* From this point forward we keep holding onto the tasklist lock 929 /* From this point forward we keep holding onto the tasklist lock
1460 * so that our parent does not change from under us. -DaveM 930 * so that our parent does not change from under us. -DaveM
1461 */ 931 */
932 ns = current->nsproxy->pid_ns;
933
1462 write_lock_irq(&tasklist_lock); 934 write_lock_irq(&tasklist_lock);
1463 935
1464 err = -ESRCH; 936 err = -ESRCH;
1465 p = find_task_by_pid(pid); 937 p = find_task_by_pid_ns(pid, ns);
1466 if (!p) 938 if (!p)
1467 goto out; 939 goto out;
1468 940
@@ -1488,9 +960,9 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
1488 goto out; 960 goto out;
1489 961
1490 if (pgid != pid) { 962 if (pgid != pid) {
1491 struct task_struct *g = 963 struct task_struct *g;
1492 find_task_by_pid_type(PIDTYPE_PGID, pgid);
1493 964
965 g = find_task_by_pid_type_ns(PIDTYPE_PGID, pgid, ns);
1494 if (!g || task_session(g) != task_session(group_leader)) 966 if (!g || task_session(g) != task_session(group_leader))
1495 goto out; 967 goto out;
1496 } 968 }
@@ -1499,10 +971,13 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
1499 if (err) 971 if (err)
1500 goto out; 972 goto out;
1501 973
1502 if (process_group(p) != pgid) { 974 if (task_pgrp_nr_ns(p, ns) != pgid) {
975 struct pid *pid;
976
1503 detach_pid(p, PIDTYPE_PGID); 977 detach_pid(p, PIDTYPE_PGID);
1504 p->signal->pgrp = pgid; 978 pid = find_vpid(pgid);
1505 attach_pid(p, PIDTYPE_PGID, find_pid(pgid)); 979 attach_pid(p, PIDTYPE_PGID, pid);
980 set_task_pgrp(p, pid_nr(pid));
1506 } 981 }
1507 982
1508 err = 0; 983 err = 0;
@@ -1515,19 +990,21 @@ out:
1515asmlinkage long sys_getpgid(pid_t pid) 990asmlinkage long sys_getpgid(pid_t pid)
1516{ 991{
1517 if (!pid) 992 if (!pid)
1518 return process_group(current); 993 return task_pgrp_vnr(current);
1519 else { 994 else {
1520 int retval; 995 int retval;
1521 struct task_struct *p; 996 struct task_struct *p;
997 struct pid_namespace *ns;
1522 998
1523 read_lock(&tasklist_lock); 999 ns = current->nsproxy->pid_ns;
1524 p = find_task_by_pid(pid);
1525 1000
1001 read_lock(&tasklist_lock);
1002 p = find_task_by_pid_ns(pid, ns);
1526 retval = -ESRCH; 1003 retval = -ESRCH;
1527 if (p) { 1004 if (p) {
1528 retval = security_task_getpgid(p); 1005 retval = security_task_getpgid(p);
1529 if (!retval) 1006 if (!retval)
1530 retval = process_group(p); 1007 retval = task_pgrp_nr_ns(p, ns);
1531 } 1008 }
1532 read_unlock(&tasklist_lock); 1009 read_unlock(&tasklist_lock);
1533 return retval; 1010 return retval;
@@ -1539,7 +1016,7 @@ asmlinkage long sys_getpgid(pid_t pid)
1539asmlinkage long sys_getpgrp(void) 1016asmlinkage long sys_getpgrp(void)
1540{ 1017{
1541 /* SMP - assuming writes are word atomic this is fine */ 1018 /* SMP - assuming writes are word atomic this is fine */
1542 return process_group(current); 1019 return task_pgrp_vnr(current);
1543} 1020}
1544 1021
1545#endif 1022#endif
@@ -1547,19 +1024,21 @@ asmlinkage long sys_getpgrp(void)
1547asmlinkage long sys_getsid(pid_t pid) 1024asmlinkage long sys_getsid(pid_t pid)
1548{ 1025{
1549 if (!pid) 1026 if (!pid)
1550 return process_session(current); 1027 return task_session_vnr(current);
1551 else { 1028 else {
1552 int retval; 1029 int retval;
1553 struct task_struct *p; 1030 struct task_struct *p;
1031 struct pid_namespace *ns;
1554 1032
1555 read_lock(&tasklist_lock); 1033 ns = current->nsproxy->pid_ns;
1556 p = find_task_by_pid(pid);
1557 1034
1035 read_lock(&tasklist_lock);
1036 p = find_task_by_pid_ns(pid, ns);
1558 retval = -ESRCH; 1037 retval = -ESRCH;
1559 if (p) { 1038 if (p) {
1560 retval = security_task_getsid(p); 1039 retval = security_task_getsid(p);
1561 if (!retval) 1040 if (!retval)
1562 retval = process_session(p); 1041 retval = task_session_nr_ns(p, ns);
1563 } 1042 }
1564 read_unlock(&tasklist_lock); 1043 read_unlock(&tasklist_lock);
1565 return retval; 1044 return retval;
@@ -1586,7 +1065,8 @@ asmlinkage long sys_setsid(void)
1586 * session id and so the check will always fail and make it so 1065 * session id and so the check will always fail and make it so
1587 * init cannot successfully call setsid. 1066 * init cannot successfully call setsid.
1588 */ 1067 */
1589 if (session > 1 && find_task_by_pid_type(PIDTYPE_PGID, session)) 1068 if (session > 1 && find_task_by_pid_type_ns(PIDTYPE_PGID,
1069 session, &init_pid_ns))
1590 goto out; 1070 goto out;
1591 1071
1592 group_leader->signal->leader = 1; 1072 group_leader->signal->leader = 1;
@@ -1596,7 +1076,7 @@ asmlinkage long sys_setsid(void)
1596 group_leader->signal->tty = NULL; 1076 group_leader->signal->tty = NULL;
1597 spin_unlock(&group_leader->sighand->siglock); 1077 spin_unlock(&group_leader->sighand->siglock);
1598 1078
1599 err = process_group(group_leader); 1079 err = task_pgrp_vnr(group_leader);
1600out: 1080out:
1601 write_unlock_irq(&tasklist_lock); 1081 write_unlock_irq(&tasklist_lock);
1602 return err; 1082 return err;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 067554bda8b7..3b4efbe26445 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1888,7 +1888,7 @@ int proc_dointvec_bset(struct ctl_table *table, int write, struct file *filp,
1888 return -EPERM; 1888 return -EPERM;
1889 } 1889 }
1890 1890
1891 op = is_init(current) ? OP_SET : OP_AND; 1891 op = is_global_init(current) ? OP_SET : OP_AND;
1892 return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, 1892 return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1893 do_proc_dointvec_bset_conv,&op); 1893 do_proc_dointvec_bset_conv,&op);
1894} 1894}
@@ -2278,7 +2278,7 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp
2278 pid_t tmp; 2278 pid_t tmp;
2279 int r; 2279 int r;
2280 2280
2281 tmp = pid_nr(cad_pid); 2281 tmp = pid_nr_ns(cad_pid, current->nsproxy->pid_ns);
2282 2282
2283 r = __do_proc_dointvec(&tmp, table, write, filp, buffer, 2283 r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
2284 lenp, ppos, NULL, NULL); 2284 lenp, ppos, NULL, NULL);
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 7d4d7f9c1bb2..9f360f68aad6 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -22,6 +22,10 @@
22#include <linux/delayacct.h> 22#include <linux/delayacct.h>
23#include <linux/cpumask.h> 23#include <linux/cpumask.h>
24#include <linux/percpu.h> 24#include <linux/percpu.h>
25#include <linux/cgroupstats.h>
26#include <linux/cgroup.h>
27#include <linux/fs.h>
28#include <linux/file.h>
25#include <net/genetlink.h> 29#include <net/genetlink.h>
26#include <asm/atomic.h> 30#include <asm/atomic.h>
27 31
@@ -49,6 +53,11 @@ __read_mostly = {
49 [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING }, 53 [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING },
50 [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; 54 [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },};
51 55
56static struct nla_policy
57cgroupstats_cmd_get_policy[CGROUPSTATS_CMD_ATTR_MAX+1] __read_mostly = {
58 [CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_U32 },
59};
60
52struct listener { 61struct listener {
53 struct list_head list; 62 struct list_head list;
54 pid_t pid; 63 pid_t pid;
@@ -372,6 +381,51 @@ err:
372 return NULL; 381 return NULL;
373} 382}
374 383
384static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
385{
386 int rc = 0;
387 struct sk_buff *rep_skb;
388 struct cgroupstats *stats;
389 struct nlattr *na;
390 size_t size;
391 u32 fd;
392 struct file *file;
393 int fput_needed;
394
395 na = info->attrs[CGROUPSTATS_CMD_ATTR_FD];
396 if (!na)
397 return -EINVAL;
398
399 fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]);
400 file = fget_light(fd, &fput_needed);
401 if (file) {
402 size = nla_total_size(sizeof(struct cgroupstats));
403
404 rc = prepare_reply(info, CGROUPSTATS_CMD_NEW, &rep_skb,
405 size);
406 if (rc < 0)
407 goto err;
408
409 na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS,
410 sizeof(struct cgroupstats));
411 stats = nla_data(na);
412 memset(stats, 0, sizeof(*stats));
413
414 rc = cgroupstats_build(stats, file->f_dentry);
415 if (rc < 0)
416 goto err;
417
418 fput_light(file, fput_needed);
419 return send_reply(rep_skb, info->snd_pid);
420 }
421
422err:
423 if (file)
424 fput_light(file, fput_needed);
425 nlmsg_free(rep_skb);
426 return rc;
427}
428
375static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) 429static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
376{ 430{
377 int rc = 0; 431 int rc = 0;
@@ -522,6 +576,12 @@ static struct genl_ops taskstats_ops = {
522 .policy = taskstats_cmd_get_policy, 576 .policy = taskstats_cmd_get_policy,
523}; 577};
524 578
579static struct genl_ops cgroupstats_ops = {
580 .cmd = CGROUPSTATS_CMD_GET,
581 .doit = cgroupstats_user_cmd,
582 .policy = cgroupstats_cmd_get_policy,
583};
584
525/* Needed early in initialization */ 585/* Needed early in initialization */
526void __init taskstats_init_early(void) 586void __init taskstats_init_early(void)
527{ 587{
@@ -546,8 +606,15 @@ static int __init taskstats_init(void)
546 if (rc < 0) 606 if (rc < 0)
547 goto err; 607 goto err;
548 608
609 rc = genl_register_ops(&family, &cgroupstats_ops);
610 if (rc < 0)
611 goto err_cgroup_ops;
612
549 family_registered = 1; 613 family_registered = 1;
614 printk("registered taskstats version %d\n", TASKSTATS_GENL_VERSION);
550 return 0; 615 return 0;
616err_cgroup_ops:
617 genl_unregister_ops(&family, &taskstats_ops);
551err: 618err:
552 genl_unregister_family(&family); 619 genl_unregister_family(&family);
553 return rc; 620 return rc;
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 51b6a6a6158c..c8a9d13874df 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -207,15 +207,12 @@ static inline void clocksource_resume_watchdog(void) { }
207 */ 207 */
208void clocksource_resume(void) 208void clocksource_resume(void)
209{ 209{
210 struct list_head *tmp; 210 struct clocksource *cs;
211 unsigned long flags; 211 unsigned long flags;
212 212
213 spin_lock_irqsave(&clocksource_lock, flags); 213 spin_lock_irqsave(&clocksource_lock, flags);
214 214
215 list_for_each(tmp, &clocksource_list) { 215 list_for_each_entry(cs, &clocksource_list, list) {
216 struct clocksource *cs;
217
218 cs = list_entry(tmp, struct clocksource, list);
219 if (cs->resume) 216 if (cs->resume)
220 cs->resume(); 217 cs->resume();
221 } 218 }
@@ -369,7 +366,6 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
369 const char *buf, size_t count) 366 const char *buf, size_t count)
370{ 367{
371 struct clocksource *ovr = NULL; 368 struct clocksource *ovr = NULL;
372 struct list_head *tmp;
373 size_t ret = count; 369 size_t ret = count;
374 int len; 370 int len;
375 371
@@ -389,12 +385,11 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
389 385
390 len = strlen(override_name); 386 len = strlen(override_name);
391 if (len) { 387 if (len) {
388 struct clocksource *cs;
389
392 ovr = clocksource_override; 390 ovr = clocksource_override;
393 /* try to select it: */ 391 /* try to select it: */
394 list_for_each(tmp, &clocksource_list) { 392 list_for_each_entry(cs, &clocksource_list, list) {
395 struct clocksource *cs;
396
397 cs = list_entry(tmp, struct clocksource, list);
398 if (strlen(cs->name) == len && 393 if (strlen(cs->name) == len &&
399 !strcmp(cs->name, override_name)) 394 !strcmp(cs->name, override_name))
400 ovr = cs; 395 ovr = cs;
@@ -422,14 +417,11 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
422static ssize_t 417static ssize_t
423sysfs_show_available_clocksources(struct sys_device *dev, char *buf) 418sysfs_show_available_clocksources(struct sys_device *dev, char *buf)
424{ 419{
425 struct list_head *tmp; 420 struct clocksource *src;
426 char *curr = buf; 421 char *curr = buf;
427 422
428 spin_lock_irq(&clocksource_lock); 423 spin_lock_irq(&clocksource_lock);
429 list_for_each(tmp, &clocksource_list) { 424 list_for_each_entry(src, &clocksource_list, list) {
430 struct clocksource *src;
431
432 src = list_entry(tmp, struct clocksource, list);
433 curr += sprintf(curr, "%s ", src->name); 425 curr += sprintf(curr, "%s ", src->name);
434 } 426 }
435 spin_unlock_irq(&clocksource_lock); 427 spin_unlock_irq(&clocksource_lock);
diff --git a/kernel/timer.c b/kernel/timer.c
index 8521d10fbb27..fb4e67d5dd60 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -26,6 +26,7 @@
26#include <linux/init.h> 26#include <linux/init.h>
27#include <linux/mm.h> 27#include <linux/mm.h>
28#include <linux/swap.h> 28#include <linux/swap.h>
29#include <linux/pid_namespace.h>
29#include <linux/notifier.h> 30#include <linux/notifier.h>
30#include <linux/thread_info.h> 31#include <linux/thread_info.h>
31#include <linux/time.h> 32#include <linux/time.h>
@@ -956,7 +957,7 @@ asmlinkage unsigned long sys_alarm(unsigned int seconds)
956 */ 957 */
957asmlinkage long sys_getpid(void) 958asmlinkage long sys_getpid(void)
958{ 959{
959 return current->tgid; 960 return task_tgid_vnr(current);
960} 961}
961 962
962/* 963/*
@@ -970,7 +971,7 @@ asmlinkage long sys_getppid(void)
970 int pid; 971 int pid;
971 972
972 rcu_read_lock(); 973 rcu_read_lock();
973 pid = rcu_dereference(current->real_parent)->tgid; 974 pid = task_ppid_nr_ns(current, current->nsproxy->pid_ns);
974 rcu_read_unlock(); 975 rcu_read_unlock();
975 976
976 return pid; 977 return pid;
@@ -1102,7 +1103,7 @@ EXPORT_SYMBOL(schedule_timeout_uninterruptible);
1102/* Thread ID - the internal kernel "pid" */ 1103/* Thread ID - the internal kernel "pid" */
1103asmlinkage long sys_gettid(void) 1104asmlinkage long sys_gettid(void)
1104{ 1105{
1105 return current->pid; 1106 return task_pid_vnr(current);
1106} 1107}
1107 1108
1108/** 1109/**
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e080d1d744cc..52d5e7c9a8e6 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -32,6 +32,7 @@
32#include <linux/freezer.h> 32#include <linux/freezer.h>
33#include <linux/kallsyms.h> 33#include <linux/kallsyms.h>
34#include <linux/debug_locks.h> 34#include <linux/debug_locks.h>
35#include <linux/lockdep.h>
35 36
36/* 37/*
37 * The per-CPU workqueue (if single thread, we always use the first 38 * The per-CPU workqueue (if single thread, we always use the first
@@ -61,6 +62,9 @@ struct workqueue_struct {
61 const char *name; 62 const char *name;
62 int singlethread; 63 int singlethread;
63 int freezeable; /* Freeze threads during suspend */ 64 int freezeable; /* Freeze threads during suspend */
65#ifdef CONFIG_LOCKDEP
66 struct lockdep_map lockdep_map;
67#endif
64}; 68};
65 69
66/* All the per-cpu workqueues on the system, for hotplug cpu to add/remove 70/* All the per-cpu workqueues on the system, for hotplug cpu to add/remove
@@ -250,6 +254,17 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
250 struct work_struct *work = list_entry(cwq->worklist.next, 254 struct work_struct *work = list_entry(cwq->worklist.next,
251 struct work_struct, entry); 255 struct work_struct, entry);
252 work_func_t f = work->func; 256 work_func_t f = work->func;
257#ifdef CONFIG_LOCKDEP
258 /*
259 * It is permissible to free the struct work_struct
260 * from inside the function that is called from it,
261 * this we need to take into account for lockdep too.
262 * To avoid bogus "held lock freed" warnings as well
263 * as problems when looking into work->lockdep_map,
264 * make a copy and use that here.
265 */
266 struct lockdep_map lockdep_map = work->lockdep_map;
267#endif
253 268
254 cwq->current_work = work; 269 cwq->current_work = work;
255 list_del_init(cwq->worklist.next); 270 list_del_init(cwq->worklist.next);
@@ -257,13 +272,17 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
257 272
258 BUG_ON(get_wq_data(work) != cwq); 273 BUG_ON(get_wq_data(work) != cwq);
259 work_clear_pending(work); 274 work_clear_pending(work);
275 lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
276 lock_acquire(&lockdep_map, 0, 0, 0, 2, _THIS_IP_);
260 f(work); 277 f(work);
278 lock_release(&lockdep_map, 1, _THIS_IP_);
279 lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_);
261 280
262 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) { 281 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
263 printk(KERN_ERR "BUG: workqueue leaked lock or atomic: " 282 printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
264 "%s/0x%08x/%d\n", 283 "%s/0x%08x/%d\n",
265 current->comm, preempt_count(), 284 current->comm, preempt_count(),
266 current->pid); 285 task_pid_nr(current));
267 printk(KERN_ERR " last function: "); 286 printk(KERN_ERR " last function: ");
268 print_symbol("%s\n", (unsigned long)f); 287 print_symbol("%s\n", (unsigned long)f);
269 debug_show_held_locks(current); 288 debug_show_held_locks(current);
@@ -376,6 +395,8 @@ void fastcall flush_workqueue(struct workqueue_struct *wq)
376 int cpu; 395 int cpu;
377 396
378 might_sleep(); 397 might_sleep();
398 lock_acquire(&wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
399 lock_release(&wq->lockdep_map, 1, _THIS_IP_);
379 for_each_cpu_mask(cpu, *cpu_map) 400 for_each_cpu_mask(cpu, *cpu_map)
380 flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu)); 401 flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
381} 402}
@@ -446,6 +467,9 @@ static void wait_on_work(struct work_struct *work)
446 467
447 might_sleep(); 468 might_sleep();
448 469
470 lock_acquire(&work->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
471 lock_release(&work->lockdep_map, 1, _THIS_IP_);
472
449 cwq = get_wq_data(work); 473 cwq = get_wq_data(work);
450 if (!cwq) 474 if (!cwq)
451 return; 475 return;
@@ -695,8 +719,10 @@ static void start_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
695 } 719 }
696} 720}
697 721
698struct workqueue_struct *__create_workqueue(const char *name, 722struct workqueue_struct *__create_workqueue_key(const char *name,
699 int singlethread, int freezeable) 723 int singlethread,
724 int freezeable,
725 struct lock_class_key *key)
700{ 726{
701 struct workqueue_struct *wq; 727 struct workqueue_struct *wq;
702 struct cpu_workqueue_struct *cwq; 728 struct cpu_workqueue_struct *cwq;
@@ -713,6 +739,7 @@ struct workqueue_struct *__create_workqueue(const char *name,
713 } 739 }
714 740
715 wq->name = name; 741 wq->name = name;
742 lockdep_init_map(&wq->lockdep_map, name, key, 0);
716 wq->singlethread = singlethread; 743 wq->singlethread = singlethread;
717 wq->freezeable = freezeable; 744 wq->freezeable = freezeable;
718 INIT_LIST_HEAD(&wq->list); 745 INIT_LIST_HEAD(&wq->list);
@@ -741,7 +768,7 @@ struct workqueue_struct *__create_workqueue(const char *name,
741 } 768 }
742 return wq; 769 return wq;
743} 770}
744EXPORT_SYMBOL_GPL(__create_workqueue); 771EXPORT_SYMBOL_GPL(__create_workqueue_key);
745 772
746static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) 773static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
747{ 774{
@@ -752,6 +779,9 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
752 if (cwq->thread == NULL) 779 if (cwq->thread == NULL)
753 return; 780 return;
754 781
782 lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
783 lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_);
784
755 flush_cpu_workqueue(cwq); 785 flush_cpu_workqueue(cwq);
756 /* 786 /*
757 * If the caller is CPU_DEAD and cwq->worklist was not empty, 787 * If the caller is CPU_DEAD and cwq->worklist was not empty,
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 7d16e6433302..c567f219191d 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -498,3 +498,5 @@ config FAULT_INJECTION_STACKTRACE_FILTER
498 select FRAME_POINTER 498 select FRAME_POINTER
499 help 499 help
500 Provide stacktrace filter for fault-injection capabilities 500 Provide stacktrace filter for fault-injection capabilities
501
502source "samples/Kconfig"
diff --git a/lib/Makefile b/lib/Makefile
index c5f215d509d3..3a0983b77412 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -6,7 +6,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
6 rbtree.o radix-tree.o dump_stack.o \ 6 rbtree.o radix-tree.o dump_stack.o \
7 idr.o int_sqrt.o bitmap.o extable.o prio_tree.o \ 7 idr.o int_sqrt.o bitmap.o extable.o prio_tree.o \
8 sha1.o irq_regs.o reciprocal_div.o argv_split.o \ 8 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
9 proportions.o 9 proportions.o prio_heap.o
10 10
11lib-$(CONFIG_MMU) += ioremap.o 11lib-$(CONFIG_MMU) += ioremap.o
12lib-$(CONFIG_SMP) += cpumask.o 12lib-$(CONFIG_SMP) += cpumask.o
diff --git a/lib/hweight.c b/lib/hweight.c
index 360556a7803d..389424ecb129 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -1,6 +1,6 @@
1#include <linux/module.h> 1#include <linux/module.h>
2#include <linux/bitops.h>
2#include <asm/types.h> 3#include <asm/types.h>
3#include <asm/bitops.h>
4 4
5/** 5/**
6 * hweightN - returns the hamming weight of a N-bit word 6 * hweightN - returns the hamming weight of a N-bit word
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 9659eabffc31..393a0e915c23 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -124,12 +124,13 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
124 mutex_lock(&percpu_counters_lock); 124 mutex_lock(&percpu_counters_lock);
125 list_for_each_entry(fbc, &percpu_counters, list) { 125 list_for_each_entry(fbc, &percpu_counters, list) {
126 s32 *pcount; 126 s32 *pcount;
127 unsigned long flags;
127 128
128 spin_lock(&fbc->lock); 129 spin_lock_irqsave(&fbc->lock, flags);
129 pcount = per_cpu_ptr(fbc->counters, cpu); 130 pcount = per_cpu_ptr(fbc->counters, cpu);
130 fbc->count += *pcount; 131 fbc->count += *pcount;
131 *pcount = 0; 132 *pcount = 0;
132 spin_unlock(&fbc->lock); 133 spin_unlock_irqrestore(&fbc->lock, flags);
133 } 134 }
134 mutex_unlock(&percpu_counters_lock); 135 mutex_unlock(&percpu_counters_lock);
135 return NOTIFY_OK; 136 return NOTIFY_OK;
diff --git a/lib/prio_heap.c b/lib/prio_heap.c
new file mode 100644
index 000000000000..471944a54e23
--- /dev/null
+++ b/lib/prio_heap.c
@@ -0,0 +1,70 @@
1/*
2 * Simple insertion-only static-sized priority heap containing
3 * pointers, based on CLR, chapter 7
4 */
5
6#include <linux/slab.h>
7#include <linux/prio_heap.h>
8
9int heap_init(struct ptr_heap *heap, size_t size, gfp_t gfp_mask,
10 int (*gt)(void *, void *))
11{
12 heap->ptrs = kmalloc(size, gfp_mask);
13 if (!heap->ptrs)
14 return -ENOMEM;
15 heap->size = 0;
16 heap->max = size / sizeof(void *);
17 heap->gt = gt;
18 return 0;
19}
20
21void heap_free(struct ptr_heap *heap)
22{
23 kfree(heap->ptrs);
24}
25
26void *heap_insert(struct ptr_heap *heap, void *p)
27{
28 void *res;
29 void **ptrs = heap->ptrs;
30 int pos;
31
32 if (heap->size < heap->max) {
33 /* Heap insertion */
34 int pos = heap->size++;
35 while (pos > 0 && heap->gt(p, ptrs[(pos-1)/2])) {
36 ptrs[pos] = ptrs[(pos-1)/2];
37 pos = (pos-1)/2;
38 }
39 ptrs[pos] = p;
40 return NULL;
41 }
42
43 /* The heap is full, so something will have to be dropped */
44
45 /* If the new pointer is greater than the current max, drop it */
46 if (heap->gt(p, ptrs[0]))
47 return p;
48
49 /* Replace the current max and heapify */
50 res = ptrs[0];
51 ptrs[0] = p;
52 pos = 0;
53
54 while (1) {
55 int left = 2 * pos + 1;
56 int right = 2 * pos + 2;
57 int largest = pos;
58 if (left < heap->size && heap->gt(ptrs[left], p))
59 largest = left;
60 if (right < heap->size && heap->gt(ptrs[right], ptrs[largest]))
61 largest = right;
62 if (largest == pos)
63 break;
64 /* Push p down the heap one level and bump one up */
65 ptrs[pos] = ptrs[largest];
66 ptrs[largest] = p;
67 pos = largest;
68 }
69 return res;
70}
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index 479fd462eaa9..9c4b0256490b 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -60,12 +60,12 @@ static void spin_bug(spinlock_t *lock, const char *msg)
60 owner = lock->owner; 60 owner = lock->owner;
61 printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n", 61 printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n",
62 msg, raw_smp_processor_id(), 62 msg, raw_smp_processor_id(),
63 current->comm, current->pid); 63 current->comm, task_pid_nr(current));
64 printk(KERN_EMERG " lock: %p, .magic: %08x, .owner: %s/%d, " 64 printk(KERN_EMERG " lock: %p, .magic: %08x, .owner: %s/%d, "
65 ".owner_cpu: %d\n", 65 ".owner_cpu: %d\n",
66 lock, lock->magic, 66 lock, lock->magic,
67 owner ? owner->comm : "<none>", 67 owner ? owner->comm : "<none>",
68 owner ? owner->pid : -1, 68 owner ? task_pid_nr(owner) : -1,
69 lock->owner_cpu); 69 lock->owner_cpu);
70 dump_stack(); 70 dump_stack();
71} 71}
@@ -116,7 +116,7 @@ static void __spin_lock_debug(spinlock_t *lock)
116 printk(KERN_EMERG "BUG: spinlock lockup on CPU#%d, " 116 printk(KERN_EMERG "BUG: spinlock lockup on CPU#%d, "
117 "%s/%d, %p\n", 117 "%s/%d, %p\n",
118 raw_smp_processor_id(), current->comm, 118 raw_smp_processor_id(), current->comm,
119 current->pid, lock); 119 task_pid_nr(current), lock);
120 dump_stack(); 120 dump_stack();
121#ifdef CONFIG_SMP 121#ifdef CONFIG_SMP
122 trigger_all_cpu_backtrace(); 122 trigger_all_cpu_backtrace();
@@ -161,7 +161,7 @@ static void rwlock_bug(rwlock_t *lock, const char *msg)
161 161
162 printk(KERN_EMERG "BUG: rwlock %s on CPU#%d, %s/%d, %p\n", 162 printk(KERN_EMERG "BUG: rwlock %s on CPU#%d, %s/%d, %p\n",
163 msg, raw_smp_processor_id(), current->comm, 163 msg, raw_smp_processor_id(), current->comm,
164 current->pid, lock); 164 task_pid_nr(current), lock);
165 dump_stack(); 165 dump_stack();
166} 166}
167 167
diff --git a/mm/filemap.c b/mm/filemap.c
index 920366399eed..5209e47b7fe3 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -841,7 +841,7 @@ static void shrink_readahead_size_eio(struct file *filp,
841/** 841/**
842 * do_generic_mapping_read - generic file read routine 842 * do_generic_mapping_read - generic file read routine
843 * @mapping: address_space to be read 843 * @mapping: address_space to be read
844 * @_ra: file's readahead state 844 * @ra: file's readahead state
845 * @filp: the file to read 845 * @filp: the file to read
846 * @ppos: current file position 846 * @ppos: current file position
847 * @desc: read_descriptor 847 * @desc: read_descriptor
diff --git a/mm/memory.c b/mm/memory.c
index bd16dcaeefb8..142683df8755 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -259,9 +259,6 @@ void free_pgd_range(struct mmu_gather **tlb,
259 continue; 259 continue;
260 free_pud_range(*tlb, pgd, addr, next, floor, ceiling); 260 free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
261 } while (pgd++, addr = next, addr != end); 261 } while (pgd++, addr = next, addr != end);
262
263 if (!(*tlb)->fullmm)
264 flush_tlb_pgtables((*tlb)->mm, start, end);
265} 262}
266 263
267void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma, 264void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 568152ae6caf..c1592a94582f 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -78,6 +78,7 @@
78#include <linux/slab.h> 78#include <linux/slab.h>
79#include <linux/string.h> 79#include <linux/string.h>
80#include <linux/module.h> 80#include <linux/module.h>
81#include <linux/nsproxy.h>
81#include <linux/interrupt.h> 82#include <linux/interrupt.h>
82#include <linux/init.h> 83#include <linux/init.h>
83#include <linux/compat.h> 84#include <linux/compat.h>
@@ -940,7 +941,7 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
940 941
941 /* Find the mm_struct */ 942 /* Find the mm_struct */
942 read_lock(&tasklist_lock); 943 read_lock(&tasklist_lock);
943 task = pid ? find_task_by_pid(pid) : current; 944 task = pid ? find_task_by_vpid(pid) : current;
944 if (!task) { 945 if (!task) {
945 read_unlock(&tasklist_lock); 946 read_unlock(&tasklist_lock);
946 return -ESRCH; 947 return -ESRCH;
@@ -1388,7 +1389,6 @@ EXPORT_SYMBOL(alloc_pages_current);
1388 * keeps mempolicies cpuset relative after its cpuset moves. See 1389 * keeps mempolicies cpuset relative after its cpuset moves. See
1389 * further kernel/cpuset.c update_nodemask(). 1390 * further kernel/cpuset.c update_nodemask().
1390 */ 1391 */
1391void *cpuset_being_rebound;
1392 1392
1393/* Slow path of a mempolicy copy */ 1393/* Slow path of a mempolicy copy */
1394struct mempolicy *__mpol_copy(struct mempolicy *old) 1394struct mempolicy *__mpol_copy(struct mempolicy *old)
@@ -2019,4 +2019,3 @@ out:
2019 m->version = (vma != priv->tail_vma) ? vma->vm_start : 0; 2019 m->version = (vma != priv->tail_vma) ? vma->vm_start : 0;
2020 return 0; 2020 return 0;
2021} 2021}
2022
diff --git a/mm/migrate.c b/mm/migrate.c
index 06d0877a66ef..4d6ee03db946 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -19,6 +19,7 @@
19#include <linux/pagemap.h> 19#include <linux/pagemap.h>
20#include <linux/buffer_head.h> 20#include <linux/buffer_head.h>
21#include <linux/mm_inline.h> 21#include <linux/mm_inline.h>
22#include <linux/nsproxy.h>
22#include <linux/pagevec.h> 23#include <linux/pagevec.h>
23#include <linux/rmap.h> 24#include <linux/rmap.h>
24#include <linux/topology.h> 25#include <linux/topology.h>
@@ -924,7 +925,7 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
924 925
925 /* Find the mm_struct */ 926 /* Find the mm_struct */
926 read_lock(&tasklist_lock); 927 read_lock(&tasklist_lock);
927 task = pid ? find_task_by_pid(pid) : current; 928 task = pid ? find_task_by_vpid(pid) : current;
928 if (!task) { 929 if (!task) {
929 read_unlock(&tasklist_lock); 930 read_unlock(&tasklist_lock);
930 return -ESRCH; 931 return -ESRCH;
diff --git a/mm/mmap.c b/mm/mmap.c
index 4275e81e25ba..7a30c4988231 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1048,8 +1048,7 @@ int vma_wants_writenotify(struct vm_area_struct *vma)
1048 1048
1049 /* The open routine did something to the protections already? */ 1049 /* The open routine did something to the protections already? */
1050 if (pgprot_val(vma->vm_page_prot) != 1050 if (pgprot_val(vma->vm_page_prot) !=
1051 pgprot_val(protection_map[vm_flags & 1051 pgprot_val(vm_get_page_prot(vm_flags)))
1052 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]))
1053 return 0; 1052 return 0;
1054 1053
1055 /* Specialty mapping? */ 1054 /* Specialty mapping? */
@@ -1130,8 +1129,7 @@ munmap_back:
1130 vma->vm_start = addr; 1129 vma->vm_start = addr;
1131 vma->vm_end = addr + len; 1130 vma->vm_end = addr + len;
1132 vma->vm_flags = vm_flags; 1131 vma->vm_flags = vm_flags;
1133 vma->vm_page_prot = protection_map[vm_flags & 1132 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1134 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
1135 vma->vm_pgoff = pgoff; 1133 vma->vm_pgoff = pgoff;
1136 1134
1137 if (file) { 1135 if (file) {
@@ -2002,8 +2000,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
2002 vma->vm_end = addr + len; 2000 vma->vm_end = addr + len;
2003 vma->vm_pgoff = pgoff; 2001 vma->vm_pgoff = pgoff;
2004 vma->vm_flags = flags; 2002 vma->vm_flags = flags;
2005 vma->vm_page_prot = protection_map[flags & 2003 vma->vm_page_prot = vm_get_page_prot(flags);
2006 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
2007 vma_link(mm, vma, prev, rb_link, rb_parent); 2004 vma_link(mm, vma, prev, rb_link, rb_parent);
2008out: 2005out:
2009 mm->total_vm += len >> PAGE_SHIFT; 2006 mm->total_vm += len >> PAGE_SHIFT;
@@ -2209,7 +2206,7 @@ int install_special_mapping(struct mm_struct *mm,
2209 vma->vm_end = addr + len; 2206 vma->vm_end = addr + len;
2210 2207
2211 vma->vm_flags = vm_flags | mm->def_flags; 2208 vma->vm_flags = vm_flags | mm->def_flags;
2212 vma->vm_page_prot = protection_map[vma->vm_flags & 7]; 2209 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
2213 2210
2214 vma->vm_ops = &special_mapping_vmops; 2211 vma->vm_ops = &special_mapping_vmops;
2215 vma->vm_private_data = pages; 2212 vma->vm_private_data = pages;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 1d4d69790e59..55227845abbe 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -192,11 +192,9 @@ success:
192 * held in write mode. 192 * held in write mode.
193 */ 193 */
194 vma->vm_flags = newflags; 194 vma->vm_flags = newflags;
195 vma->vm_page_prot = protection_map[newflags & 195 vma->vm_page_prot = vm_get_page_prot(newflags);
196 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
197 if (vma_wants_writenotify(vma)) { 196 if (vma_wants_writenotify(vma)) {
198 vma->vm_page_prot = protection_map[newflags & 197 vma->vm_page_prot = vm_get_page_prot(newflags);
199 (VM_READ|VM_WRITE|VM_EXEC)];
200 dirty_accountable = 1; 198 dirty_accountable = 1;
201 } 199 }
202 200
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index a64decb5b13f..824cade07827 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -212,7 +212,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
212 if (!p->mm) 212 if (!p->mm)
213 continue; 213 continue;
214 /* skip the init task */ 214 /* skip the init task */
215 if (is_init(p)) 215 if (is_global_init(p))
216 continue; 216 continue;
217 217
218 /* 218 /*
@@ -265,7 +265,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
265 */ 265 */
266static void __oom_kill_task(struct task_struct *p, int verbose) 266static void __oom_kill_task(struct task_struct *p, int verbose)
267{ 267{
268 if (is_init(p)) { 268 if (is_global_init(p)) {
269 WARN_ON(1); 269 WARN_ON(1);
270 printk(KERN_WARNING "tried to kill init!\n"); 270 printk(KERN_WARNING "tried to kill init!\n");
271 return; 271 return;
@@ -278,7 +278,8 @@ static void __oom_kill_task(struct task_struct *p, int verbose)
278 } 278 }
279 279
280 if (verbose) 280 if (verbose)
281 printk(KERN_ERR "Killed process %d (%s)\n", p->pid, p->comm); 281 printk(KERN_ERR "Killed process %d (%s)\n",
282 task_pid_nr(p), p->comm);
282 283
283 /* 284 /*
284 * We give our sacrificial lamb high priority and access to 285 * We give our sacrificial lamb high priority and access to
@@ -326,7 +327,7 @@ static int oom_kill_task(struct task_struct *p)
326 * to memory reserves though, otherwise we might deplete all memory. 327 * to memory reserves though, otherwise we might deplete all memory.
327 */ 328 */
328 do_each_thread(g, q) { 329 do_each_thread(g, q) {
329 if (q->mm == mm && q->tgid != p->tgid) 330 if (q->mm == mm && !same_thread_group(q, p))
330 force_sig(SIGKILL, q); 331 force_sig(SIGKILL, q);
331 } while_each_thread(g, q); 332 } while_each_thread(g, q);
332 333
@@ -337,7 +338,6 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
337 unsigned long points, const char *message) 338 unsigned long points, const char *message)
338{ 339{
339 struct task_struct *c; 340 struct task_struct *c;
340 struct list_head *tsk;
341 341
342 if (printk_ratelimit()) { 342 if (printk_ratelimit()) {
343 printk(KERN_WARNING "%s invoked oom-killer: " 343 printk(KERN_WARNING "%s invoked oom-killer: "
@@ -357,11 +357,10 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
357 } 357 }
358 358
359 printk(KERN_ERR "%s: kill process %d (%s) score %li or a child\n", 359 printk(KERN_ERR "%s: kill process %d (%s) score %li or a child\n",
360 message, p->pid, p->comm, points); 360 message, task_pid_nr(p), p->comm, points);
361 361
362 /* Try to kill a child first */ 362 /* Try to kill a child first */
363 list_for_each(tsk, &p->children) { 363 list_for_each_entry(c, &p->children, sibling) {
364 c = list_entry(tsk, struct task_struct, sibling);
365 if (c->mm == p->mm) 364 if (c->mm == p->mm)
366 continue; 365 continue;
367 if (!oom_kill_task(c)) 366 if (!oom_kill_task(c))
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index ff5784b440d7..66c736953cfe 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -656,11 +656,13 @@ static inline int hidp_setup_input(struct hidp_session *session, struct hidp_con
656 } 656 }
657 657
658 if (req->subclass & 0x80) { 658 if (req->subclass & 0x80) {
659 input->evbit[0] = BIT(EV_KEY) | BIT(EV_REL); 659 input->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
660 input->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_RIGHT) | BIT(BTN_MIDDLE); 660 input->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
661 input->relbit[0] = BIT(REL_X) | BIT(REL_Y); 661 BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
662 input->keybit[LONG(BTN_MOUSE)] |= BIT(BTN_SIDE) | BIT(BTN_EXTRA); 662 input->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
663 input->relbit[0] |= BIT(REL_WHEEL); 663 input->keybit[BIT_WORD(BTN_MOUSE)] |= BIT_MASK(BTN_SIDE) |
664 BIT_MASK(BTN_EXTRA);
665 input->relbit[0] |= BIT_MASK(REL_WHEEL);
664 } 666 }
665 667
666 input->dev.parent = hidp_get_device(session); 668 input->dev.parent = hidp_get_device(session);
diff --git a/net/core/filter.c b/net/core/filter.c
index 1f0068eae501..e0a06942c025 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -447,7 +447,8 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
447 rcu_assign_pointer(sk->sk_filter, fp); 447 rcu_assign_pointer(sk->sk_filter, fp);
448 rcu_read_unlock_bh(); 448 rcu_read_unlock_bh();
449 449
450 sk_filter_delayed_uncharge(sk, old_fp); 450 if (old_fp)
451 sk_filter_delayed_uncharge(sk, old_fp);
451 return 0; 452 return 0;
452} 453}
453 454
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 590a767b029c..daadbcc4e8dd 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -15,7 +15,7 @@
15 15
16#include <asm/uaccess.h> 16#include <asm/uaccess.h>
17#include <asm/system.h> 17#include <asm/system.h>
18#include <asm/bitops.h> 18#include <linux/bitops.h>
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/types.h> 20#include <linux/types.h>
21#include <linux/kernel.h> 21#include <linux/kernel.h>
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 8cae60c53383..7ac703171ff3 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -161,7 +161,7 @@
161#endif 161#endif
162#include <asm/byteorder.h> 162#include <asm/byteorder.h>
163#include <linux/rcupdate.h> 163#include <linux/rcupdate.h>
164#include <asm/bitops.h> 164#include <linux/bitops.h>
165#include <asm/io.h> 165#include <asm/io.h>
166#include <asm/dma.h> 166#include <asm/dma.h>
167#include <asm/uaccess.h> 167#include <asm/uaccess.h>
@@ -3514,7 +3514,7 @@ static int pktgen_thread_worker(void *arg)
3514 3514
3515 init_waitqueue_head(&t->queue); 3515 init_waitqueue_head(&t->queue);
3516 3516
3517 pr_debug("pktgen: starting pktgen/%d: pid=%d\n", cpu, current->pid); 3517 pr_debug("pktgen: starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current));
3518 3518
3519 set_current_state(TASK_INTERRUPTIBLE); 3519 set_current_state(TASK_INTERRUPTIBLE);
3520 3520
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1072d16696c3..4a2640d38261 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -744,10 +744,10 @@ static struct net *get_net_ns_by_pid(pid_t pid)
744 rcu_read_lock(); 744 rcu_read_lock();
745 tsk = find_task_by_pid(pid); 745 tsk = find_task_by_pid(pid);
746 if (tsk) { 746 if (tsk) {
747 task_lock(tsk); 747 struct nsproxy *nsproxy;
748 if (tsk->nsproxy) 748 nsproxy = task_nsproxy(tsk);
749 net = get_net(tsk->nsproxy->net_ns); 749 if (nsproxy)
750 task_unlock(tsk); 750 net = get_net(nsproxy->net_ns);
751 } 751 }
752 rcu_read_unlock(); 752 rcu_read_unlock();
753 return net; 753 return net;
diff --git a/net/core/scm.c b/net/core/scm.c
index 530bee8d9ed9..100ba6d9d478 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -24,6 +24,8 @@
24#include <linux/interrupt.h> 24#include <linux/interrupt.h>
25#include <linux/netdevice.h> 25#include <linux/netdevice.h>
26#include <linux/security.h> 26#include <linux/security.h>
27#include <linux/pid.h>
28#include <linux/nsproxy.h>
27 29
28#include <asm/system.h> 30#include <asm/system.h>
29#include <asm/uaccess.h> 31#include <asm/uaccess.h>
@@ -42,7 +44,7 @@
42 44
43static __inline__ int scm_check_creds(struct ucred *creds) 45static __inline__ int scm_check_creds(struct ucred *creds)
44{ 46{
45 if ((creds->pid == current->tgid || capable(CAP_SYS_ADMIN)) && 47 if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) &&
46 ((creds->uid == current->uid || creds->uid == current->euid || 48 ((creds->uid == current->uid || creds->uid == current->euid ||
47 creds->uid == current->suid) || capable(CAP_SETUID)) && 49 creds->uid == current->suid) || capable(CAP_SETUID)) &&
48 ((creds->gid == current->gid || creds->gid == current->egid || 50 ((creds->gid == current->gid || creds->gid == current->egid ||
diff --git a/net/core/sock.c b/net/core/sock.c
index d292b4113d6e..febbcbcf8022 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -232,7 +232,7 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
232 warned++; 232 warned++;
233 printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) " 233 printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
234 "tries to set negative timeout\n", 234 "tries to set negative timeout\n",
235 current->comm, current->pid); 235 current->comm, task_pid_nr(current));
236 return 0; 236 return 0;
237 } 237 }
238 *timeo_p = MAX_SCHEDULE_TIMEOUT; 238 *timeo_p = MAX_SCHEDULE_TIMEOUT;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 81a8285d6d6a..8d8c2915e064 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -54,7 +54,7 @@
54 54
55#include <asm/uaccess.h> 55#include <asm/uaccess.h>
56#include <asm/system.h> 56#include <asm/system.h>
57#include <asm/bitops.h> 57#include <linux/bitops.h>
58#include <linux/types.h> 58#include <linux/types.h>
59#include <linux/kernel.h> 59#include <linux/kernel.h>
60#include <linux/mm.h> 60#include <linux/mm.h>
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 3cef12835c4b..8fb6ca23700a 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -93,7 +93,7 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo,
93 int remaining, rover, low, high; 93 int remaining, rover, low, high;
94 94
95 inet_get_local_port_range(&low, &high); 95 inet_get_local_port_range(&low, &high);
96 remaining = high - low; 96 remaining = (high - low) + 1;
97 rover = net_random() % remaining + low; 97 rover = net_random() % remaining + low;
98 98
99 do { 99 do {
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index fac6398e4367..16eecc7046a3 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -286,7 +286,7 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row,
286 struct inet_timewait_sock *tw = NULL; 286 struct inet_timewait_sock *tw = NULL;
287 287
288 inet_get_local_port_range(&low, &high); 288 inet_get_local_port_range(&low, &high);
289 remaining = high - low; 289 remaining = (high - low) + 1;
290 290
291 local_bh_disable(); 291 local_bh_disable();
292 for (i = 1; i <= remaining; i++) { 292 for (i = 1; i <= remaining; i++) {
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 1960747f354c..c99f2a33fb9e 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -794,7 +794,7 @@ static int sync_thread(void *startup)
794 794
795 add_wait_queue(&sync_wait, &wait); 795 add_wait_queue(&sync_wait, &wait);
796 796
797 set_sync_pid(state, current->pid); 797 set_sync_pid(state, task_pid_nr(current));
798 complete(tinfo->startup); 798 complete(tinfo->startup);
799 799
800 /* 800 /*
@@ -877,7 +877,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
877 if (!tinfo) 877 if (!tinfo)
878 return -ENOMEM; 878 return -ENOMEM;
879 879
880 IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid); 880 IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, task_pid_nr(current));
881 IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n", 881 IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n",
882 sizeof(struct ip_vs_sync_conn)); 882 sizeof(struct ip_vs_sync_conn));
883 883
@@ -917,7 +917,7 @@ int stop_sync_thread(int state)
917 (state == IP_VS_STATE_BACKUP && !sync_backup_pid)) 917 (state == IP_VS_STATE_BACKUP && !sync_backup_pid))
918 return -ESRCH; 918 return -ESRCH;
919 919
920 IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid); 920 IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, task_pid_nr(current));
921 IP_VS_INFO("stopping sync thread %d ...\n", 921 IP_VS_INFO("stopping sync thread %d ...\n",
922 (state == IP_VS_STATE_MASTER) ? 922 (state == IP_VS_STATE_MASTER) ?
923 sync_master_pid : sync_backup_pid); 923 sync_master_pid : sync_backup_pid);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index c78acc1a7f11..ffddd2b45352 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -122,7 +122,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp,
122 ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos); 122 ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos);
123 123
124 if (write && ret == 0) { 124 if (write && ret == 0) {
125 if (range[1] <= range[0]) 125 if (range[1] < range[0])
126 ret = -EINVAL; 126 ret = -EINVAL;
127 else 127 else
128 set_local_port_range(range); 128 set_local_port_range(range);
@@ -150,7 +150,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name,
150 150
151 ret = sysctl_intvec(&tmp, name, nlen, oldval, oldlenp, newval, newlen); 151 ret = sysctl_intvec(&tmp, name, nlen, oldval, oldlenp, newval, newlen);
152 if (ret == 0 && newval && newlen) { 152 if (ret == 0 && newval && newlen) {
153 if (range[1] <= range[0]) 153 if (range[1] < range[0])
154 ret = -EINVAL; 154 ret = -EINVAL;
155 else 155 else
156 set_local_port_range(range); 156 set_local_port_range(range);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4f322003835d..2e6ad6dbba6c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1334,7 +1334,7 @@ do_prequeue:
1334 if ((flags & MSG_PEEK) && peek_seq != tp->copied_seq) { 1334 if ((flags & MSG_PEEK) && peek_seq != tp->copied_seq) {
1335 if (net_ratelimit()) 1335 if (net_ratelimit())
1336 printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n", 1336 printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n",
1337 current->comm, current->pid); 1337 current->comm, task_pid_nr(current));
1338 peek_seq = tp->copied_seq; 1338 peek_seq = tp->copied_seq;
1339 } 1339 }
1340 continue; 1340 continue;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index cb9fc58efb2f..35d2b0e9e10b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -147,13 +147,14 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
147 write_lock_bh(&udp_hash_lock); 147 write_lock_bh(&udp_hash_lock);
148 148
149 if (!snum) { 149 if (!snum) {
150 int i, low, high; 150 int i, low, high, remaining;
151 unsigned rover, best, best_size_so_far; 151 unsigned rover, best, best_size_so_far;
152 152
153 inet_get_local_port_range(&low, &high); 153 inet_get_local_port_range(&low, &high);
154 remaining = (high - low) + 1;
154 155
155 best_size_so_far = UINT_MAX; 156 best_size_so_far = UINT_MAX;
156 best = rover = net_random() % (high - low) + low; 157 best = rover = net_random() % remaining + low;
157 158
158 /* 1st pass: look for empty (or shortest) hash chain */ 159 /* 1st pass: look for empty (or shortest) hash chain */
159 for (i = 0; i < UDP_HTABLE_SIZE; i++) { 160 for (i = 0; i < UDP_HTABLE_SIZE; i++) {
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 1c2c27655435..d6f1026f1943 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -261,7 +261,7 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row,
261 struct inet_timewait_sock *tw = NULL; 261 struct inet_timewait_sock *tw = NULL;
262 262
263 inet_get_local_port_range(&low, &high); 263 inet_get_local_port_range(&low, &high);
264 remaining = high - low; 264 remaining = (high - low) + 1;
265 265
266 local_bh_disable(); 266 local_bh_disable();
267 for (i = 1; i <= remaining; i++) { 267 for (i = 1; i <= remaining; i++) {
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 49eacba824df..46cf962f7f88 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -762,7 +762,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
762 if (net_ratelimit()) 762 if (net_ratelimit())
763 printk(KERN_DEBUG "LLC(%s:%d): Application " 763 printk(KERN_DEBUG "LLC(%s:%d): Application "
764 "bug, race in MSG_PEEK.\n", 764 "bug, race in MSG_PEEK.\n",
765 current->comm, current->pid); 765 current->comm, task_pid_nr(current));
766 peek_seq = llc->copied_seq; 766 peek_seq = llc->copied_seq;
767 } 767 }
768 continue; 768 continue;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index d34a9deca67a..4b4ed2a5803c 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -37,8 +37,6 @@
37 37
38struct ieee80211_local; 38struct ieee80211_local;
39 39
40#define BIT(x) (1 << (x))
41
42#define IEEE80211_ALIGN32_PAD(a) ((4 - ((a) & 3)) & 3) 40#define IEEE80211_ALIGN32_PAD(a) ((4 - ((a) & 3)) & 3)
43 41
44/* Maximum number of broadcast/multicast frames to buffer when some of the 42/* Maximum number of broadcast/multicast frames to buffer when some of the
diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c
index db81aef6177a..f7ffeec3913f 100644
--- a/net/mac80211/ieee80211_sta.c
+++ b/net/mac80211/ieee80211_sta.c
@@ -108,14 +108,11 @@ struct ieee802_11_elems {
108 u8 wmm_param_len; 108 u8 wmm_param_len;
109}; 109};
110 110
111enum ParseRes { ParseOK = 0, ParseUnknown = 1, ParseFailed = -1 }; 111static void ieee802_11_parse_elems(u8 *start, size_t len,
112 112 struct ieee802_11_elems *elems)
113static enum ParseRes ieee802_11_parse_elems(u8 *start, size_t len,
114 struct ieee802_11_elems *elems)
115{ 113{
116 size_t left = len; 114 size_t left = len;
117 u8 *pos = start; 115 u8 *pos = start;
118 int unknown = 0;
119 116
120 memset(elems, 0, sizeof(*elems)); 117 memset(elems, 0, sizeof(*elems));
121 118
@@ -126,15 +123,8 @@ static enum ParseRes ieee802_11_parse_elems(u8 *start, size_t len,
126 elen = *pos++; 123 elen = *pos++;
127 left -= 2; 124 left -= 2;
128 125
129 if (elen > left) { 126 if (elen > left)
130#if 0 127 return;
131 if (net_ratelimit())
132 printk(KERN_DEBUG "IEEE 802.11 element parse "
133 "failed (id=%d elen=%d left=%d)\n",
134 id, elen, left);
135#endif
136 return ParseFailed;
137 }
138 128
139 switch (id) { 129 switch (id) {
140 case WLAN_EID_SSID: 130 case WLAN_EID_SSID:
@@ -201,28 +191,15 @@ static enum ParseRes ieee802_11_parse_elems(u8 *start, size_t len,
201 elems->ext_supp_rates_len = elen; 191 elems->ext_supp_rates_len = elen;
202 break; 192 break;
203 default: 193 default:
204#if 0
205 printk(KERN_DEBUG "IEEE 802.11 element parse ignored "
206 "unknown element (id=%d elen=%d)\n",
207 id, elen);
208#endif
209 unknown++;
210 break; 194 break;
211 } 195 }
212 196
213 left -= elen; 197 left -= elen;
214 pos += elen; 198 pos += elen;
215 } 199 }
216
217 /* Do not trigger error if left == 1 as Apple Airport base stations
218 * send AssocResps that are one spurious byte too long. */
219
220 return unknown ? ParseUnknown : ParseOK;
221} 200}
222 201
223 202
224
225
226static int ecw2cw(int ecw) 203static int ecw2cw(int ecw)
227{ 204{
228 int cw = 1; 205 int cw = 1;
@@ -931,12 +908,7 @@ static void ieee80211_auth_challenge(struct net_device *dev,
931 908
932 printk(KERN_DEBUG "%s: replying to auth challenge\n", dev->name); 909 printk(KERN_DEBUG "%s: replying to auth challenge\n", dev->name);
933 pos = mgmt->u.auth.variable; 910 pos = mgmt->u.auth.variable;
934 if (ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems) 911 ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
935 == ParseFailed) {
936 printk(KERN_DEBUG "%s: failed to parse Auth(challenge)\n",
937 dev->name);
938 return;
939 }
940 if (!elems.challenge) { 912 if (!elems.challenge) {
941 printk(KERN_DEBUG "%s: no challenge IE in shared key auth " 913 printk(KERN_DEBUG "%s: no challenge IE in shared key auth "
942 "frame\n", dev->name); 914 "frame\n", dev->name);
@@ -1230,12 +1202,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev,
1230 aid &= ~(BIT(15) | BIT(14)); 1202 aid &= ~(BIT(15) | BIT(14));
1231 1203
1232 pos = mgmt->u.assoc_resp.variable; 1204 pos = mgmt->u.assoc_resp.variable;
1233 if (ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems) 1205 ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
1234 == ParseFailed) {
1235 printk(KERN_DEBUG "%s: failed to parse AssocResp\n",
1236 dev->name);
1237 return;
1238 }
1239 1206
1240 if (!elems.supp_rates) { 1207 if (!elems.supp_rates) {
1241 printk(KERN_DEBUG "%s: no SuppRates element in AssocResp\n", 1208 printk(KERN_DEBUG "%s: no SuppRates element in AssocResp\n",
@@ -1459,7 +1426,7 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
1459 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 1426 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
1460 struct ieee802_11_elems elems; 1427 struct ieee802_11_elems elems;
1461 size_t baselen; 1428 size_t baselen;
1462 int channel, invalid = 0, clen; 1429 int channel, clen;
1463 struct ieee80211_sta_bss *bss; 1430 struct ieee80211_sta_bss *bss;
1464 struct sta_info *sta; 1431 struct sta_info *sta;
1465 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 1432 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -1505,9 +1472,7 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
1505#endif /* CONFIG_MAC80211_IBSS_DEBUG */ 1472#endif /* CONFIG_MAC80211_IBSS_DEBUG */
1506 } 1473 }
1507 1474
1508 if (ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, 1475 ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems);
1509 &elems) == ParseFailed)
1510 invalid = 1;
1511 1476
1512 if (sdata->type == IEEE80211_IF_TYPE_IBSS && elems.supp_rates && 1477 if (sdata->type == IEEE80211_IF_TYPE_IBSS && elems.supp_rates &&
1513 memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0 && 1478 memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0 &&
@@ -1724,9 +1689,7 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev,
1724 if (baselen > len) 1689 if (baselen > len)
1725 return; 1690 return;
1726 1691
1727 if (ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, 1692 ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems);
1728 &elems) == ParseFailed)
1729 return;
1730 1693
1731 if (elems.erp_info && elems.erp_info_len >= 1) 1694 if (elems.erp_info && elems.erp_info_len >= 1)
1732 ieee80211_handle_erp_ie(dev, elems.erp_info[0]); 1695 ieee80211_handle_erp_ie(dev, elems.erp_info[0]);
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index af79423bc8e8..9ec50139b9a1 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -2,13 +2,13 @@
2 * GPL (C) 2002 Martin Devera (devik@cdi.cz). 2 * GPL (C) 2002 Martin Devera (devik@cdi.cz).
3 */ 3 */
4#include <linux/module.h> 4#include <linux/module.h>
5#include <linux/bitops.h>
5#include <linux/skbuff.h> 6#include <linux/skbuff.h>
6#include <linux/netfilter/x_tables.h> 7#include <linux/netfilter/x_tables.h>
7#include <linux/netfilter/xt_connbytes.h> 8#include <linux/netfilter/xt_connbytes.h>
8#include <net/netfilter/nf_conntrack.h> 9#include <net/netfilter/nf_conntrack.h>
9 10
10#include <asm/div64.h> 11#include <asm/div64.h>
11#include <asm/bitops.h>
12 12
13MODULE_LICENSE("GPL"); 13MODULE_LICENSE("GPL");
14MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 14MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index e11000a8e950..d0936506b731 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1623,11 +1623,6 @@ static struct vm_operations_struct packet_mmap_ops = {
1623 .close =packet_mm_close, 1623 .close =packet_mm_close,
1624}; 1624};
1625 1625
1626static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
1627{
1628 return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
1629}
1630
1631static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len) 1626static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
1632{ 1627{
1633 int i; 1628 int i;
diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c
index eaabf087c59b..d1e9d68f8ba0 100644
--- a/net/rfkill/rfkill-input.c
+++ b/net/rfkill/rfkill-input.c
@@ -146,18 +146,18 @@ static void rfkill_disconnect(struct input_handle *handle)
146static const struct input_device_id rfkill_ids[] = { 146static const struct input_device_id rfkill_ids[] = {
147 { 147 {
148 .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT, 148 .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
149 .evbit = { BIT(EV_KEY) }, 149 .evbit = { BIT_MASK(EV_KEY) },
150 .keybit = { [LONG(KEY_WLAN)] = BIT(KEY_WLAN) }, 150 .keybit = { [BIT_WORD(KEY_WLAN)] = BIT_MASK(KEY_WLAN) },
151 }, 151 },
152 { 152 {
153 .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT, 153 .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
154 .evbit = { BIT(EV_KEY) }, 154 .evbit = { BIT_MASK(EV_KEY) },
155 .keybit = { [LONG(KEY_BLUETOOTH)] = BIT(KEY_BLUETOOTH) }, 155 .keybit = { [BIT_WORD(KEY_BLUETOOTH)] = BIT_MASK(KEY_BLUETOOTH) },
156 }, 156 },
157 { 157 {
158 .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT, 158 .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
159 .evbit = { BIT(EV_KEY) }, 159 .evbit = { BIT_MASK(EV_KEY) },
160 .keybit = { [LONG(KEY_UWB)] = BIT(KEY_UWB) }, 160 .keybit = { [BIT_WORD(KEY_UWB)] = BIT_MASK(KEY_UWB) },
161 }, 161 },
162 { } 162 { }
163}; 163};
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 92435a882fac..9c15c4888d12 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -2,9 +2,7 @@
2# Traffic control configuration. 2# Traffic control configuration.
3# 3#
4 4
5menu "QoS and/or fair queueing" 5menuconfig NET_SCHED
6
7config NET_SCHED
8 bool "QoS and/or fair queueing" 6 bool "QoS and/or fair queueing"
9 select NET_SCH_FIFO 7 select NET_SCH_FIFO
10 ---help--- 8 ---help---
@@ -41,9 +39,6 @@ config NET_SCHED
41 The available schedulers are listed in the following questions; you 39 The available schedulers are listed in the following questions; you
42 can say Y to as many as you like. If unsure, say N now. 40 can say Y to as many as you like. If unsure, say N now.
43 41
44config NET_SCH_FIFO
45 bool
46
47if NET_SCHED 42if NET_SCHED
48 43
49comment "Queueing/Scheduling" 44comment "Queueing/Scheduling"
@@ -500,4 +495,5 @@ config NET_CLS_IND
500 495
501endif # NET_SCHED 496endif # NET_SCHED
502 497
503endmenu 498config NET_SCH_FIFO
499 bool
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index e01d57692c9a..fa1a6f45dc41 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -556,6 +556,7 @@ void dev_deactivate(struct net_device *dev)
556{ 556{
557 struct Qdisc *qdisc; 557 struct Qdisc *qdisc;
558 struct sk_buff *skb; 558 struct sk_buff *skb;
559 int running;
559 560
560 spin_lock_bh(&dev->queue_lock); 561 spin_lock_bh(&dev->queue_lock);
561 qdisc = dev->qdisc; 562 qdisc = dev->qdisc;
@@ -571,12 +572,31 @@ void dev_deactivate(struct net_device *dev)
571 572
572 dev_watchdog_down(dev); 573 dev_watchdog_down(dev);
573 574
574 /* Wait for outstanding dev_queue_xmit calls. */ 575 /* Wait for outstanding qdisc-less dev_queue_xmit calls. */
575 synchronize_rcu(); 576 synchronize_rcu();
576 577
577 /* Wait for outstanding qdisc_run calls. */ 578 /* Wait for outstanding qdisc_run calls. */
578 while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state)) 579 do {
579 yield(); 580 while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state))
581 yield();
582
583 /*
584 * Double-check inside queue lock to ensure that all effects
585 * of the queue run are visible when we return.
586 */
587 spin_lock_bh(&dev->queue_lock);
588 running = test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state);
589 spin_unlock_bh(&dev->queue_lock);
590
591 /*
592 * The running flag should never be set at this point because
593 * we've already set dev->qdisc to noop_qdisc *inside* the same
594 * pair of spin locks. That is, if any qdisc_run starts after
595 * our initial test it should see the noop_qdisc and then
596 * clear the RUNNING bit before dropping the queue lock. So
597 * if it is set here then we've found a bug.
598 */
599 } while (WARN_ON_ONCE(running));
580} 600}
581 601
582void dev_init_scheduler(struct net_device *dev) 602void dev_init_scheduler(struct net_device *dev)
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 3c773c53e12e..c98873f39aec 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -847,7 +847,7 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons
847 task->tk_start = jiffies; 847 task->tk_start = jiffies;
848 848
849 dprintk("RPC: new task initialized, procpid %u\n", 849 dprintk("RPC: new task initialized, procpid %u\n",
850 current->pid); 850 task_pid_nr(current));
851} 851}
852 852
853static struct rpc_task * 853static struct rpc_task *
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 6996cba5aa96..9163ec526c2a 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -483,7 +483,7 @@ static int unix_listen(struct socket *sock, int backlog)
483 sk->sk_max_ack_backlog = backlog; 483 sk->sk_max_ack_backlog = backlog;
484 sk->sk_state = TCP_LISTEN; 484 sk->sk_state = TCP_LISTEN;
485 /* set credentials so connect can copy them */ 485 /* set credentials so connect can copy them */
486 sk->sk_peercred.pid = current->tgid; 486 sk->sk_peercred.pid = task_tgid_vnr(current);
487 sk->sk_peercred.uid = current->euid; 487 sk->sk_peercred.uid = current->euid;
488 sk->sk_peercred.gid = current->egid; 488 sk->sk_peercred.gid = current->egid;
489 err = 0; 489 err = 0;
@@ -1133,7 +1133,7 @@ restart:
1133 unix_peer(newsk) = sk; 1133 unix_peer(newsk) = sk;
1134 newsk->sk_state = TCP_ESTABLISHED; 1134 newsk->sk_state = TCP_ESTABLISHED;
1135 newsk->sk_type = sk->sk_type; 1135 newsk->sk_type = sk->sk_type;
1136 newsk->sk_peercred.pid = current->tgid; 1136 newsk->sk_peercred.pid = task_tgid_vnr(current);
1137 newsk->sk_peercred.uid = current->euid; 1137 newsk->sk_peercred.uid = current->euid;
1138 newsk->sk_peercred.gid = current->egid; 1138 newsk->sk_peercred.gid = current->egid;
1139 newu = unix_sk(newsk); 1139 newu = unix_sk(newsk);
@@ -1194,7 +1194,7 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb)
1194 sock_hold(skb); 1194 sock_hold(skb);
1195 unix_peer(ska)=skb; 1195 unix_peer(ska)=skb;
1196 unix_peer(skb)=ska; 1196 unix_peer(skb)=ska;
1197 ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid; 1197 ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
1198 ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid; 1198 ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1199 ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid; 1199 ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1200 1200
diff --git a/samples/Kconfig b/samples/Kconfig
new file mode 100644
index 000000000000..57bb2236952c
--- /dev/null
+++ b/samples/Kconfig
@@ -0,0 +1,16 @@
1# samples/Kconfig
2
3menuconfig SAMPLES
4 bool "Sample kernel code"
5 help
6 You can build and test sample kernel code here.
7
8if SAMPLES
9
10config SAMPLE_MARKERS
11 tristate "Build markers examples -- loadable modules only"
12 depends on MARKERS && m
13 help
14 This build markers example modules.
15
16endif # SAMPLES
diff --git a/samples/Makefile b/samples/Makefile
new file mode 100644
index 000000000000..5a4f0b6bcbed
--- /dev/null
+++ b/samples/Makefile
@@ -0,0 +1,3 @@
1# Makefile for Linux samples code
2
3obj-$(CONFIG_SAMPLES) += markers/
diff --git a/samples/markers/Makefile b/samples/markers/Makefile
new file mode 100644
index 000000000000..6d7231265f0f
--- /dev/null
+++ b/samples/markers/Makefile
@@ -0,0 +1,4 @@
1# builds the kprobes example kernel modules;
2# then to use one (as root): insmod <module_name.ko>
3
4obj-$(CONFIG_SAMPLE_MARKERS) += probe-example.o marker-example.o
diff --git a/samples/markers/marker-example.c b/samples/markers/marker-example.c
new file mode 100644
index 000000000000..e787c6d16dd7
--- /dev/null
+++ b/samples/markers/marker-example.c
@@ -0,0 +1,54 @@
1/* marker-example.c
2 *
3 * Executes a marker when /proc/marker-example is opened.
4 *
5 * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
6 *
7 * This file is released under the GPLv2.
8 * See the file COPYING for more details.
9 */
10
11#include <linux/module.h>
12#include <linux/marker.h>
13#include <linux/sched.h>
14#include <linux/proc_fs.h>
15
16struct proc_dir_entry *pentry_example;
17
18static int my_open(struct inode *inode, struct file *file)
19{
20 int i;
21
22 trace_mark(subsystem_event, "%d %s", 123, "example string");
23 for (i = 0; i < 10; i++)
24 trace_mark(subsystem_eventb, MARK_NOARGS);
25 return -EPERM;
26}
27
28static struct file_operations mark_ops = {
29 .open = my_open,
30};
31
32static int example_init(void)
33{
34 printk(KERN_ALERT "example init\n");
35 pentry_example = create_proc_entry("marker-example", 0444, NULL);
36 if (pentry_example)
37 pentry_example->proc_fops = &mark_ops;
38 else
39 return -EPERM;
40 return 0;
41}
42
43static void example_exit(void)
44{
45 printk(KERN_ALERT "example exit\n");
46 remove_proc_entry("marker-example", NULL);
47}
48
49module_init(example_init)
50module_exit(example_exit)
51
52MODULE_LICENSE("GPL");
53MODULE_AUTHOR("Mathieu Desnoyers");
54MODULE_DESCRIPTION("Marker example");
diff --git a/samples/markers/probe-example.c b/samples/markers/probe-example.c
new file mode 100644
index 000000000000..238b2e384fc8
--- /dev/null
+++ b/samples/markers/probe-example.c
@@ -0,0 +1,98 @@
1/* probe-example.c
2 *
3 * Connects two functions to marker call sites.
4 *
5 * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
6 *
7 * This file is released under the GPLv2.
8 * See the file COPYING for more details.
9 */
10
11#include <linux/sched.h>
12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/marker.h>
15#include <asm/atomic.h>
16
17struct probe_data {
18 const char *name;
19 const char *format;
20 marker_probe_func *probe_func;
21};
22
23void probe_subsystem_event(const struct marker *mdata, void *private,
24 const char *format, ...)
25{
26 va_list ap;
27 /* Declare args */
28 unsigned int value;
29 const char *mystr;
30
31 /* Assign args */
32 va_start(ap, format);
33 value = va_arg(ap, typeof(value));
34 mystr = va_arg(ap, typeof(mystr));
35
36 /* Call printk */
37 printk(KERN_DEBUG "Value %u, string %s\n", value, mystr);
38
39 /* or count, check rights, serialize data in a buffer */
40
41 va_end(ap);
42}
43
44atomic_t eventb_count = ATOMIC_INIT(0);
45
46void probe_subsystem_eventb(const struct marker *mdata, void *private,
47 const char *format, ...)
48{
49 /* Increment counter */
50 atomic_inc(&eventb_count);
51}
52
53static struct probe_data probe_array[] =
54{
55 { .name = "subsystem_event",
56 .format = "%d %s",
57 .probe_func = probe_subsystem_event },
58 { .name = "subsystem_eventb",
59 .format = MARK_NOARGS,
60 .probe_func = probe_subsystem_eventb },
61};
62
63static int __init probe_init(void)
64{
65 int result;
66 int i;
67
68 for (i = 0; i < ARRAY_SIZE(probe_array); i++) {
69 result = marker_probe_register(probe_array[i].name,
70 probe_array[i].format,
71 probe_array[i].probe_func, &probe_array[i]);
72 if (result)
73 printk(KERN_INFO "Unable to register probe %s\n",
74 probe_array[i].name);
75 result = marker_arm(probe_array[i].name);
76 if (result)
77 printk(KERN_INFO "Unable to arm probe %s\n",
78 probe_array[i].name);
79 }
80 return 0;
81}
82
83static void __exit probe_fini(void)
84{
85 int i;
86
87 for (i = 0; i < ARRAY_SIZE(probe_array); i++)
88 marker_probe_unregister(probe_array[i].name);
89 printk(KERN_INFO "Number of event b : %u\n",
90 atomic_read(&eventb_count));
91}
92
93module_init(probe_init);
94module_exit(probe_fini);
95
96MODULE_LICENSE("GPL");
97MODULE_AUTHOR("Mathieu Desnoyers");
98MODULE_DESCRIPTION("SUBSYSTEM Probe");
diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl
index b458e2acb4ac..28e480c8100f 100755
--- a/scripts/checkstack.pl
+++ b/scripts/checkstack.pl
@@ -15,7 +15,7 @@
15# AVR32 port by Haavard Skinnemoen <hskinnemoen@atmel.com> 15# AVR32 port by Haavard Skinnemoen <hskinnemoen@atmel.com>
16# 16#
17# Usage: 17# Usage:
18# objdump -d vmlinux | stackcheck.pl [arch] 18# objdump -d vmlinux | scripts/checkstack.pl [arch]
19# 19#
20# TODO : Port to all architectures (one regex per arch) 20# TODO : Port to all architectures (one regex per arch)
21 21
diff --git a/security/commoncap.c b/security/commoncap.c
index 48ca5b092768..43f902750a1b 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -23,6 +23,7 @@
23#include <linux/xattr.h> 23#include <linux/xattr.h>
24#include <linux/hugetlb.h> 24#include <linux/hugetlb.h>
25#include <linux/mount.h> 25#include <linux/mount.h>
26#include <linux/sched.h>
26 27
27#ifdef CONFIG_SECURITY_FILE_CAPABILITIES 28#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
28/* 29/*
@@ -334,7 +335,7 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
334 /* For init, we want to retain the capabilities set 335 /* For init, we want to retain the capabilities set
335 * in the init_task struct. Thus we skip the usual 336 * in the init_task struct. Thus we skip the usual
336 * capability rules */ 337 * capability rules */
337 if (!is_init(current)) { 338 if (!is_global_init(current)) {
338 current->cap_permitted = new_permitted; 339 current->cap_permitted = new_permitted;
339 current->cap_effective = bprm->cap_effective ? 340 current->cap_effective = bprm->cap_effective ?
340 new_permitted : 0; 341 new_permitted : 0;
diff --git a/sound/ppc/beep.c b/sound/ppc/beep.c
index a1aa89f2faf3..566b5ab9d4e8 100644
--- a/sound/ppc/beep.c
+++ b/sound/ppc/beep.c
@@ -236,8 +236,8 @@ int __init snd_pmac_attach_beep(struct snd_pmac *chip)
236 input_dev->id.product = 0x0001; 236 input_dev->id.product = 0x0001;
237 input_dev->id.version = 0x0100; 237 input_dev->id.version = 0x0100;
238 238
239 input_dev->evbit[0] = BIT(EV_SND); 239 input_dev->evbit[0] = BIT_MASK(EV_SND);
240 input_dev->sndbit[0] = BIT(SND_BELL) | BIT(SND_TONE); 240 input_dev->sndbit[0] = BIT_MASK(SND_BELL) | BIT_MASK(SND_TONE);
241 input_dev->event = snd_pmac_beep_event; 241 input_dev->event = snd_pmac_beep_event;
242 input_dev->dev.parent = &chip->pdev->dev; 242 input_dev->dev.parent = &chip->pdev->dev;
243 input_set_drvdata(input_dev, chip); 243 input_set_drvdata(input_dev, chip);
diff --git a/sound/usb/caiaq/caiaq-input.c b/sound/usb/caiaq/caiaq-input.c
index a1de0c608957..cd536ca20e56 100644
--- a/sound/usb/caiaq/caiaq-input.c
+++ b/sound/usb/caiaq/caiaq-input.c
@@ -200,8 +200,9 @@ int snd_usb_caiaq_input_init(struct snd_usb_caiaqdev *dev)
200 200
201 switch (dev->chip.usb_id) { 201 switch (dev->chip.usb_id) {
202 case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_RIGKONTROL2): 202 case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_RIGKONTROL2):
203 input->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 203 input->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
204 input->absbit[0] = BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_Z); 204 input->absbit[0] = BIT_MASK(ABS_X) | BIT_MASK(ABS_Y) |
205 BIT_MASK(ABS_Z);
205 input->keycode = keycode_rk2; 206 input->keycode = keycode_rk2;
206 input->keycodesize = sizeof(char); 207 input->keycodesize = sizeof(char);
207 input->keycodemax = ARRAY_SIZE(keycode_rk2); 208 input->keycodemax = ARRAY_SIZE(keycode_rk2);
@@ -228,8 +229,8 @@ int snd_usb_caiaq_input_init(struct snd_usb_caiaqdev *dev)
228 snd_usb_caiaq_set_auto_msg(dev, 1, 10, 0); 229 snd_usb_caiaq_set_auto_msg(dev, 1, 10, 0);
229 break; 230 break;
230 case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_AK1): 231 case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_AK1):
231 input->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); 232 input->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
232 input->absbit[0] = BIT(ABS_X); 233 input->absbit[0] = BIT_MASK(ABS_X);
233 input->keycode = keycode_ak1; 234 input->keycode = keycode_ak1;
234 input->keycodesize = sizeof(char); 235 input->keycodesize = sizeof(char);
235 input->keycodemax = ARRAY_SIZE(keycode_ak1); 236 input->keycodemax = ARRAY_SIZE(keycode_ak1);